xref: /titanic_53/usr/src/cmd/awk_xpg4/awk1.c (revision 88f3d7297a011d0fa0768a7b2082305a822487b2)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
57c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
67c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
77c478bd9Sstevel@tonic-gate  * with the License.
87c478bd9Sstevel@tonic-gate  *
97c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
107c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
117c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
127c478bd9Sstevel@tonic-gate  * and limitations under the License.
137c478bd9Sstevel@tonic-gate  *
147c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
157c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
167c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
177c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
187c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
197c478bd9Sstevel@tonic-gate  *
207c478bd9Sstevel@tonic-gate  * CDDL HEADER END
217c478bd9Sstevel@tonic-gate  */
227c478bd9Sstevel@tonic-gate /*
23*88f3d729Sakaplan  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
247c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
257c478bd9Sstevel@tonic-gate  */
267c478bd9Sstevel@tonic-gate 
277c478bd9Sstevel@tonic-gate /*
287c478bd9Sstevel@tonic-gate  * awk -- mainline, yylex, etc.
297c478bd9Sstevel@tonic-gate  *
307c478bd9Sstevel@tonic-gate  * Copyright 1986, 1994 by Mortice Kern Systems Inc.  All rights reserved.
317c478bd9Sstevel@tonic-gate  *
327c478bd9Sstevel@tonic-gate  * Based on MKS awk(1) ported to be /usr/xpg4/bin/awk with POSIX/XCU4 changes
337c478bd9Sstevel@tonic-gate  */
347c478bd9Sstevel@tonic-gate 
357c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
367c478bd9Sstevel@tonic-gate 
377c478bd9Sstevel@tonic-gate #include "awk.h"
387c478bd9Sstevel@tonic-gate #include "y.tab.h"
397c478bd9Sstevel@tonic-gate #include <stdarg.h>
407c478bd9Sstevel@tonic-gate #include <unistd.h>
417c478bd9Sstevel@tonic-gate #include <locale.h>
427c478bd9Sstevel@tonic-gate 
437c478bd9Sstevel@tonic-gate static char	*progfiles[NPFILE];	/* Programmes files for yylex */
447c478bd9Sstevel@tonic-gate static char	**progfilep = &progfiles[0]; /* Pointer to last file */
457c478bd9Sstevel@tonic-gate static wchar_t	*progptr;		/* In-memory programme */
467c478bd9Sstevel@tonic-gate static int	proglen;		/* Length of progptr */
477c478bd9Sstevel@tonic-gate static wchar_t	context[NCONTEXT];	/* Circular buffer of context */
487c478bd9Sstevel@tonic-gate static wchar_t	*conptr = &context[0];	/* context ptr */
497c478bd9Sstevel@tonic-gate static FILE	*progfp;		/* Stdio stream for programme */
507c478bd9Sstevel@tonic-gate static char	*filename;
517c478bd9Sstevel@tonic-gate #ifdef	DEBUG
527c478bd9Sstevel@tonic-gate static int	dflag;
537c478bd9Sstevel@tonic-gate #endif
547c478bd9Sstevel@tonic-gate 
557c478bd9Sstevel@tonic-gate #define AWK_EXEC_MAGIC	"<MKS AWKC>"
567c478bd9Sstevel@tonic-gate #define LEN_EXEC_MAGIC	10
577c478bd9Sstevel@tonic-gate 
587c478bd9Sstevel@tonic-gate static char	unbal[] = "unbalanced E char";
597c478bd9Sstevel@tonic-gate 
607c478bd9Sstevel@tonic-gate static void	awkarginit(int c, char **av);
617c478bd9Sstevel@tonic-gate static int	lexid(wint_t c);
627c478bd9Sstevel@tonic-gate static int	lexnumber(wint_t c);
637c478bd9Sstevel@tonic-gate static int	lexstring(wint_t endc);
647c478bd9Sstevel@tonic-gate static int	lexregexp(register wint_t endc);
657c478bd9Sstevel@tonic-gate 
667c478bd9Sstevel@tonic-gate static void	awkvarinit(void);
677c478bd9Sstevel@tonic-gate static wint_t	lexgetc(void);
687c478bd9Sstevel@tonic-gate static void	lexungetc(wint_t c);
697c478bd9Sstevel@tonic-gate static size_t	lexescape(wint_t endc, int regx, int cmd_line_operand);
707c478bd9Sstevel@tonic-gate static void	awkierr(int perr, char *fmt, va_list ap);
717c478bd9Sstevel@tonic-gate static int	usage(void);
727c478bd9Sstevel@tonic-gate void		strescape(wchar_t *str);
737c478bd9Sstevel@tonic-gate static const char      *toprint(wint_t);
747c478bd9Sstevel@tonic-gate char *_cmdname;
757c478bd9Sstevel@tonic-gate static wchar_t *mbconvert(char *str);
767c478bd9Sstevel@tonic-gate 
777c478bd9Sstevel@tonic-gate 
787c478bd9Sstevel@tonic-gate /*
797c478bd9Sstevel@tonic-gate  * mainline for awk
807c478bd9Sstevel@tonic-gate  */
817c478bd9Sstevel@tonic-gate int
827c478bd9Sstevel@tonic-gate main(int argc, char *argv[])
837c478bd9Sstevel@tonic-gate {
847c478bd9Sstevel@tonic-gate 	register wchar_t *ap;
857c478bd9Sstevel@tonic-gate 	register char *cmd;
867c478bd9Sstevel@tonic-gate 
877c478bd9Sstevel@tonic-gate 	cmd = argv[0];
887c478bd9Sstevel@tonic-gate 	_cmdname = cmd;
897c478bd9Sstevel@tonic-gate 
907c478bd9Sstevel@tonic-gate 	linebuf = emalloc(NLINE * sizeof(wchar_t));
917c478bd9Sstevel@tonic-gate 
927c478bd9Sstevel@tonic-gate 	/*l
937c478bd9Sstevel@tonic-gate 	 * At this point only messaging should be internationalized.
947c478bd9Sstevel@tonic-gate 	 * numbers are still scanned as in the Posix locale.
957c478bd9Sstevel@tonic-gate 	 */
967c478bd9Sstevel@tonic-gate 	(void) setlocale(LC_ALL,"");
977c478bd9Sstevel@tonic-gate 	(void) setlocale(LC_NUMERIC,"C");
987c478bd9Sstevel@tonic-gate #if !defined(TEXT_DOMAIN)
997c478bd9Sstevel@tonic-gate #define	TEXT_DOMAIN	"SYS_TEST"
1007c478bd9Sstevel@tonic-gate #endif
1017c478bd9Sstevel@tonic-gate 	(void) textdomain(TEXT_DOMAIN);
1027c478bd9Sstevel@tonic-gate 
1037c478bd9Sstevel@tonic-gate 	awkvarinit();
1047c478bd9Sstevel@tonic-gate 	/*running = 1;*/
1057c478bd9Sstevel@tonic-gate 	while (argc>1 && *argv[1]=='-') {
1067c478bd9Sstevel@tonic-gate 		void *save_ptr = NULL;
1077c478bd9Sstevel@tonic-gate 		ap = mbstowcsdup(&argv[1][1]);
1087c478bd9Sstevel@tonic-gate 		if (ap == NULL)
1097c478bd9Sstevel@tonic-gate 			break;
1107c478bd9Sstevel@tonic-gate 		if (*ap == '\0') {
1117c478bd9Sstevel@tonic-gate 			free(ap);
1127c478bd9Sstevel@tonic-gate 			break;
1137c478bd9Sstevel@tonic-gate 		}
1147c478bd9Sstevel@tonic-gate 		save_ptr = (void *) ap;
1157c478bd9Sstevel@tonic-gate 		++argv;
1167c478bd9Sstevel@tonic-gate 		--argc;
1177c478bd9Sstevel@tonic-gate 		if (*ap=='-' && ap[1]=='\0')
1187c478bd9Sstevel@tonic-gate 			break;
1197c478bd9Sstevel@tonic-gate 		for ( ; *ap != '\0'; ++ap) {
1207c478bd9Sstevel@tonic-gate 			switch (*ap) {
1217c478bd9Sstevel@tonic-gate #ifdef DEBUG
1227c478bd9Sstevel@tonic-gate 			case 'd':
1237c478bd9Sstevel@tonic-gate 				dflag = 1;
1247c478bd9Sstevel@tonic-gate 				continue;
1257c478bd9Sstevel@tonic-gate 
1267c478bd9Sstevel@tonic-gate #endif
1277c478bd9Sstevel@tonic-gate 			case 'f':
1287c478bd9Sstevel@tonic-gate 				if (argc < 2) {
1297c478bd9Sstevel@tonic-gate 					(void) fprintf(stderr,
1307c478bd9Sstevel@tonic-gate 				gettext("Missing script file\n"));
1317c478bd9Sstevel@tonic-gate 					return (1);
1327c478bd9Sstevel@tonic-gate 				}
1337c478bd9Sstevel@tonic-gate 				*progfilep++ = argv[1];
1347c478bd9Sstevel@tonic-gate 				--argc;
1357c478bd9Sstevel@tonic-gate 				++argv;
1367c478bd9Sstevel@tonic-gate 				continue;
1377c478bd9Sstevel@tonic-gate 
1387c478bd9Sstevel@tonic-gate 			case 'F':
1397c478bd9Sstevel@tonic-gate 				if (ap[1] == '\0') {
1407c478bd9Sstevel@tonic-gate 					if (argc < 2) {
1417c478bd9Sstevel@tonic-gate 						(void) fprintf(stderr,
1427c478bd9Sstevel@tonic-gate 				gettext("Missing field separator\n"));
1437c478bd9Sstevel@tonic-gate 						return (1);
1447c478bd9Sstevel@tonic-gate 					}
1457c478bd9Sstevel@tonic-gate 					ap = mbstowcsdup(argv[1]);
1467c478bd9Sstevel@tonic-gate 					--argc;
1477c478bd9Sstevel@tonic-gate 					++argv;
1487c478bd9Sstevel@tonic-gate 				} else
1497c478bd9Sstevel@tonic-gate 					++ap;
1507c478bd9Sstevel@tonic-gate 				strescape(ap);
1517c478bd9Sstevel@tonic-gate 				strassign(varFS, linebuf, FALLOC,
1527c478bd9Sstevel@tonic-gate 					wcslen(linebuf));
1537c478bd9Sstevel@tonic-gate 				break;
1547c478bd9Sstevel@tonic-gate 
1557c478bd9Sstevel@tonic-gate 			case 'v': {
1567c478bd9Sstevel@tonic-gate 				register wchar_t *vp;
1577c478bd9Sstevel@tonic-gate 				register wchar_t *arg;
1587c478bd9Sstevel@tonic-gate 
1597c478bd9Sstevel@tonic-gate 				if (argc < 2) {
1607c478bd9Sstevel@tonic-gate 					(void) fprintf(stderr,
1617c478bd9Sstevel@tonic-gate 		gettext("Missing variable assignment\n"));
1627c478bd9Sstevel@tonic-gate 					return (1);
1637c478bd9Sstevel@tonic-gate 				}
1647c478bd9Sstevel@tonic-gate 				arg = mbconvert(argv[1]);
1657c478bd9Sstevel@tonic-gate 				if ((vp = wcschr(arg, '=')) != NULL) {
1667c478bd9Sstevel@tonic-gate 					*vp = '\0';
1677c478bd9Sstevel@tonic-gate 					strescape(vp+1);
1687c478bd9Sstevel@tonic-gate 					strassign(vlook(arg), linebuf,
1697c478bd9Sstevel@tonic-gate 					    FALLOC|FSENSE, wcslen(linebuf));
1707c478bd9Sstevel@tonic-gate 					*vp = '=';
1717c478bd9Sstevel@tonic-gate 				}
1727c478bd9Sstevel@tonic-gate 				--argc;
1737c478bd9Sstevel@tonic-gate 				++argv;
1747c478bd9Sstevel@tonic-gate 				continue;
1757c478bd9Sstevel@tonic-gate 			}
1767c478bd9Sstevel@tonic-gate 
1777c478bd9Sstevel@tonic-gate 			default:
1787c478bd9Sstevel@tonic-gate 				(void) fprintf(stderr,
1797c478bd9Sstevel@tonic-gate 				gettext("Unknown option \"-%S\"\n"), ap);
1807c478bd9Sstevel@tonic-gate 				return (usage());
1817c478bd9Sstevel@tonic-gate 			}
1827c478bd9Sstevel@tonic-gate 			break;
1837c478bd9Sstevel@tonic-gate 		}
1847c478bd9Sstevel@tonic-gate 		if (save_ptr)
1857c478bd9Sstevel@tonic-gate 			free(save_ptr);
1867c478bd9Sstevel@tonic-gate 	}
1877c478bd9Sstevel@tonic-gate 	if (progfilep == &progfiles[0]) {
1887c478bd9Sstevel@tonic-gate 		if (argc < 2)
1897c478bd9Sstevel@tonic-gate 			return (usage());
1907c478bd9Sstevel@tonic-gate 		filename = "[command line]";	/* BUG: NEEDS TRANSLATION */
1917c478bd9Sstevel@tonic-gate 		progptr = mbstowcsdup(argv[1]);
1927c478bd9Sstevel@tonic-gate 		proglen = wcslen(progptr);
1937c478bd9Sstevel@tonic-gate 		--argc;
1947c478bd9Sstevel@tonic-gate 		++argv;
1957c478bd9Sstevel@tonic-gate 	}
1967c478bd9Sstevel@tonic-gate 
1977c478bd9Sstevel@tonic-gate 	argv[0] = cmd;
1987c478bd9Sstevel@tonic-gate 
1997c478bd9Sstevel@tonic-gate 	awkarginit(argc, argv);
2007c478bd9Sstevel@tonic-gate 
2017c478bd9Sstevel@tonic-gate 	/*running = 0;*/
2027c478bd9Sstevel@tonic-gate 	(void)yyparse();
2037c478bd9Sstevel@tonic-gate 
2047c478bd9Sstevel@tonic-gate 	lineno = 0;
2057c478bd9Sstevel@tonic-gate 	/*
2067c478bd9Sstevel@tonic-gate 	 * Ok, done parsing, so now activate the rest of the nls stuff, set
2077c478bd9Sstevel@tonic-gate 	 * the radix character.
2087c478bd9Sstevel@tonic-gate 	 */
2097c478bd9Sstevel@tonic-gate 	(void) setlocale(LC_ALL,"");
2107c478bd9Sstevel@tonic-gate 	radixpoint = *localeconv()->decimal_point;
2117c478bd9Sstevel@tonic-gate 	awk();
2127c478bd9Sstevel@tonic-gate 	/* NOTREACHED */
2137c478bd9Sstevel@tonic-gate 	return (0);
2147c478bd9Sstevel@tonic-gate }
2157c478bd9Sstevel@tonic-gate 
2167c478bd9Sstevel@tonic-gate /*
2177c478bd9Sstevel@tonic-gate  * Do initial setup of buffers, etc.
2187c478bd9Sstevel@tonic-gate  * This must be called before most processing
2197c478bd9Sstevel@tonic-gate  * and especially before lexical analysis.
2207c478bd9Sstevel@tonic-gate  * Variables initialised here will be overruled by command
2217c478bd9Sstevel@tonic-gate  * line parameter initialisation.
2227c478bd9Sstevel@tonic-gate  */
2237c478bd9Sstevel@tonic-gate static void
2247c478bd9Sstevel@tonic-gate awkvarinit()
2257c478bd9Sstevel@tonic-gate {
2267c478bd9Sstevel@tonic-gate 	register NODE *np;
2277c478bd9Sstevel@tonic-gate 
2287c478bd9Sstevel@tonic-gate 	(void) setvbuf(stderr, NULL, _IONBF, 0);
2297c478bd9Sstevel@tonic-gate 
2307c478bd9Sstevel@tonic-gate 	if ((NIOSTREAM = sysconf(_SC_OPEN_MAX) - 4) <= 0) {
2317c478bd9Sstevel@tonic-gate 		(void) fprintf(stderr,
2327c478bd9Sstevel@tonic-gate 	gettext("not enough available file descriptors"));
2337c478bd9Sstevel@tonic-gate 		exit(1);
2347c478bd9Sstevel@tonic-gate 	}
2357c478bd9Sstevel@tonic-gate 	ofiles = (OFILE *) emalloc(sizeof(OFILE)*NIOSTREAM);
2367c478bd9Sstevel@tonic-gate #ifdef A_ZERO_POINTERS
2377c478bd9Sstevel@tonic-gate 	(void) memset((wchar_t *) ofiles, 0, sizeof(OFILE) * NIOSTREAM);
2387c478bd9Sstevel@tonic-gate #else
2397c478bd9Sstevel@tonic-gate 	{
2407c478bd9Sstevel@tonic-gate 	    /* initialize file descriptor table */
2417c478bd9Sstevel@tonic-gate 	    OFILE *fp;
2427c478bd9Sstevel@tonic-gate 	    for (fp = ofiles; fp < &ofiles[NIOSTREAM]; fp += 1) {
2437c478bd9Sstevel@tonic-gate 		fp->f_fp = FNULL;
2447c478bd9Sstevel@tonic-gate 		fp->f_mode = 0;
2457c478bd9Sstevel@tonic-gate 		fp->f_name = (char *)0;
2467c478bd9Sstevel@tonic-gate 	    }
2477c478bd9Sstevel@tonic-gate 	}
2487c478bd9Sstevel@tonic-gate #endif
2497c478bd9Sstevel@tonic-gate 	constant = intnode((INT)0);
2507c478bd9Sstevel@tonic-gate 
2517c478bd9Sstevel@tonic-gate 	const0 = intnode((INT)0);
2527c478bd9Sstevel@tonic-gate 	const1 = intnode((INT)1);
2537c478bd9Sstevel@tonic-gate 	constundef = emptynode(CONSTANT, 0);
2547c478bd9Sstevel@tonic-gate 	constundef->n_flags = FSTRING|FVINT;
2557c478bd9Sstevel@tonic-gate 	constundef->n_string = _null;
2567c478bd9Sstevel@tonic-gate 	constundef->n_strlen = 0;
2577c478bd9Sstevel@tonic-gate 	inc_oper = emptynode(ADD, 0);
2587c478bd9Sstevel@tonic-gate 	inc_oper->n_right = const1;
2597c478bd9Sstevel@tonic-gate 	asn_oper = emptynode(ADD, 0);
2607c478bd9Sstevel@tonic-gate 	field0 = node(FIELD, const0, NNULL);
2617c478bd9Sstevel@tonic-gate 
2627c478bd9Sstevel@tonic-gate 	{
2637c478bd9Sstevel@tonic-gate 		register RESFUNC near*rp;
2647c478bd9Sstevel@tonic-gate 
2657c478bd9Sstevel@tonic-gate 		for (rp = &resfuncs[0]; rp->rf_name != (LOCCHARP)NULL; ++rp) {
2667c478bd9Sstevel@tonic-gate 			np = finstall(rp->rf_name, rp->rf_func, rp->rf_type);
2677c478bd9Sstevel@tonic-gate 		}
2687c478bd9Sstevel@tonic-gate 	}
2697c478bd9Sstevel@tonic-gate 	{
2707c478bd9Sstevel@tonic-gate 		register RESERVED near*rp;
2717c478bd9Sstevel@tonic-gate 
2727c478bd9Sstevel@tonic-gate 		for (rp = &reserved[0]; rp->r_name != (LOCCHARP)NULL; ++rp) {
2737c478bd9Sstevel@tonic-gate 			switch (rp->r_type) {
2747c478bd9Sstevel@tonic-gate 			case SVAR:
2757c478bd9Sstevel@tonic-gate 			case VAR:
2767c478bd9Sstevel@tonic-gate 				running = 1;
2777c478bd9Sstevel@tonic-gate 				np = vlook(rp->r_name);
2787c478bd9Sstevel@tonic-gate 				if (rp->r_type == SVAR)
2797c478bd9Sstevel@tonic-gate 					np->n_flags |= FSPECIAL;
2807c478bd9Sstevel@tonic-gate 				if (rp->r_svalue != NULL)
2817c478bd9Sstevel@tonic-gate 					strassign(np, rp->r_svalue, FSTATIC,
2827c478bd9Sstevel@tonic-gate 					    (size_t)rp->r_ivalue);
2837c478bd9Sstevel@tonic-gate 				else {
2847c478bd9Sstevel@tonic-gate 					constant->n_int = rp->r_ivalue;
2857c478bd9Sstevel@tonic-gate 					(void)assign(np, constant);
2867c478bd9Sstevel@tonic-gate 				}
2877c478bd9Sstevel@tonic-gate 				running = 0;
2887c478bd9Sstevel@tonic-gate 				break;
2897c478bd9Sstevel@tonic-gate 
2907c478bd9Sstevel@tonic-gate 			case KEYWORD:
2917c478bd9Sstevel@tonic-gate 				kinstall(rp->r_name, (int)rp->r_ivalue);
2927c478bd9Sstevel@tonic-gate 				break;
2937c478bd9Sstevel@tonic-gate 			}
2947c478bd9Sstevel@tonic-gate 		}
2957c478bd9Sstevel@tonic-gate 	}
2967c478bd9Sstevel@tonic-gate 
2977c478bd9Sstevel@tonic-gate 	varNR = vlook(s_NR);
2987c478bd9Sstevel@tonic-gate 	varFNR = vlook(s_FNR);
2997c478bd9Sstevel@tonic-gate 	varNF = vlook(s_NF);
3007c478bd9Sstevel@tonic-gate 	varOFMT = vlook(s_OFMT);
3017c478bd9Sstevel@tonic-gate 	varCONVFMT = vlook(s_CONVFMT);
3027c478bd9Sstevel@tonic-gate 	varOFS = vlook(s_OFS);
3037c478bd9Sstevel@tonic-gate 	varORS = vlook(s_ORS);
3047c478bd9Sstevel@tonic-gate 	varRS = vlook(s_RS);
3057c478bd9Sstevel@tonic-gate 	varFS = vlook(s_FS);
3067c478bd9Sstevel@tonic-gate 	varARGC = vlook(s_ARGC);
3077c478bd9Sstevel@tonic-gate 	varSUBSEP = vlook(s_SUBSEP);
3087c478bd9Sstevel@tonic-gate 	varENVIRON = vlook(s_ENVIRON);
3097c478bd9Sstevel@tonic-gate 	varFILENAME = vlook(s_FILENAME);
3107c478bd9Sstevel@tonic-gate 	varSYMTAB = vlook(s_SYMTAB);
3117c478bd9Sstevel@tonic-gate 	incNR = node(ASG, varNR, node(ADD, varNR, const1));
3127c478bd9Sstevel@tonic-gate 	incFNR = node(ASG, varFNR, node(ADD, varFNR, const1));
3137c478bd9Sstevel@tonic-gate 	clrFNR = node(ASG, varFNR, const0);
3147c478bd9Sstevel@tonic-gate }
3157c478bd9Sstevel@tonic-gate 
3167c478bd9Sstevel@tonic-gate /*
3177c478bd9Sstevel@tonic-gate  * Initialise awk ARGC, ARGV variables.
3187c478bd9Sstevel@tonic-gate  */
3197c478bd9Sstevel@tonic-gate static void
3207c478bd9Sstevel@tonic-gate awkarginit(int ac, char **av)
3217c478bd9Sstevel@tonic-gate {
3227c478bd9Sstevel@tonic-gate 	register int i;
3237c478bd9Sstevel@tonic-gate 	register wchar_t *cp;
3247c478bd9Sstevel@tonic-gate 
3257c478bd9Sstevel@tonic-gate 	ARGVsubi = node(INDEX, vlook(s_ARGV), constant);
3267c478bd9Sstevel@tonic-gate 	running = 1;
3277c478bd9Sstevel@tonic-gate 	constant->n_int = ac;
3287c478bd9Sstevel@tonic-gate 	(void)assign(varARGC, constant);
3297c478bd9Sstevel@tonic-gate 	for (i = 0; i < ac; ++i) {
3307c478bd9Sstevel@tonic-gate 		cp = mbstowcsdup(av[i]);
3317c478bd9Sstevel@tonic-gate 		constant->n_int = i;
3327c478bd9Sstevel@tonic-gate 		strassign(exprreduce(ARGVsubi), cp,
3337c478bd9Sstevel@tonic-gate 		    FSTATIC|FSENSE, wcslen(cp));
3347c478bd9Sstevel@tonic-gate 	}
3357c478bd9Sstevel@tonic-gate 	running = 0;
3367c478bd9Sstevel@tonic-gate }
3377c478bd9Sstevel@tonic-gate 
3387c478bd9Sstevel@tonic-gate /*
3397c478bd9Sstevel@tonic-gate  * Clean up when done parsing a function.
3407c478bd9Sstevel@tonic-gate  * All formal parameters, because of a deal (funparm) in
3417c478bd9Sstevel@tonic-gate  * yylex, get put into the symbol table in front of any
3427c478bd9Sstevel@tonic-gate  * global variable of the same name.  When the entire
3437c478bd9Sstevel@tonic-gate  * function is parsed, remove these formal dummy nodes
3447c478bd9Sstevel@tonic-gate  * from the symbol table but retain the nodes because
3457c478bd9Sstevel@tonic-gate  * the generated tree points at them.
3467c478bd9Sstevel@tonic-gate  */
3477c478bd9Sstevel@tonic-gate void
3487c478bd9Sstevel@tonic-gate uexit(NODE *np)
3497c478bd9Sstevel@tonic-gate {
3507c478bd9Sstevel@tonic-gate 	register NODE *formal;
3517c478bd9Sstevel@tonic-gate 
3527c478bd9Sstevel@tonic-gate 	while ((formal = getlist(&np)) != NNULL)
3537c478bd9Sstevel@tonic-gate 		delsymtab(formal, 0);
3547c478bd9Sstevel@tonic-gate }
3557c478bd9Sstevel@tonic-gate 
3567c478bd9Sstevel@tonic-gate /*
3577c478bd9Sstevel@tonic-gate  * The lexical analyzer.
3587c478bd9Sstevel@tonic-gate  */
3597c478bd9Sstevel@tonic-gate int
3607c478bd9Sstevel@tonic-gate yylex()
3617c478bd9Sstevel@tonic-gate #ifdef	DEBUG
3627c478bd9Sstevel@tonic-gate {
3637c478bd9Sstevel@tonic-gate 	register int l;
3647c478bd9Sstevel@tonic-gate 
3657c478bd9Sstevel@tonic-gate 	l = yyhex();
3667c478bd9Sstevel@tonic-gate 	if (dflag)
3677c478bd9Sstevel@tonic-gate 		(void) printf("%d\n", l);
3687c478bd9Sstevel@tonic-gate 	return (l);
3697c478bd9Sstevel@tonic-gate }
3707c478bd9Sstevel@tonic-gate yyhex()
3717c478bd9Sstevel@tonic-gate #endif
3727c478bd9Sstevel@tonic-gate {
3737c478bd9Sstevel@tonic-gate 	register wint_t c, c1;
3747c478bd9Sstevel@tonic-gate 	int i;
3757c478bd9Sstevel@tonic-gate 	static int savetoken = 0;
376*88f3d729Sakaplan 	static int wasfield;
3777c478bd9Sstevel@tonic-gate 	static int isfuncdef;
3787c478bd9Sstevel@tonic-gate 	static int nbrace, nparen, nbracket;
3797c478bd9Sstevel@tonic-gate 	static struct ctosymstruct {
3807c478bd9Sstevel@tonic-gate 		wint_t c, sym;
3817c478bd9Sstevel@tonic-gate 	} ctosym[] = {
3827c478bd9Sstevel@tonic-gate 		{ '|', BAR },		{ '^', CARAT },
3837c478bd9Sstevel@tonic-gate 	  	{ '~', TILDE },		{ '<', LANGLE },
3847c478bd9Sstevel@tonic-gate 	  	{ '>', RANGLE },	{ '+', PLUSC },
3857c478bd9Sstevel@tonic-gate 	  	{ '-', HYPHEN },	{ '*', STAR },
3867c478bd9Sstevel@tonic-gate 	  	{ '/', SLASH },		{ '%', PERCENT },
3877c478bd9Sstevel@tonic-gate 	  	{ '!', EXCLAMATION },	{ '$', DOLLAR },
3887c478bd9Sstevel@tonic-gate 	  	{ '[', LSQUARE },	{ ']', RSQUARE },
3897c478bd9Sstevel@tonic-gate 		{ '(', LPAREN },	{ ')', RPAREN },
3907c478bd9Sstevel@tonic-gate 		{ ';', SEMI },		{ '{', LBRACE },
3917c478bd9Sstevel@tonic-gate 		{ '}', RBRACE },	{   0, 0 }
3927c478bd9Sstevel@tonic-gate 	};
3937c478bd9Sstevel@tonic-gate 
3947c478bd9Sstevel@tonic-gate 	if (savetoken) {
3957c478bd9Sstevel@tonic-gate 		c = savetoken;
3967c478bd9Sstevel@tonic-gate 		savetoken = 0;
3977c478bd9Sstevel@tonic-gate 	} else if (redelim != '\0') {
3987c478bd9Sstevel@tonic-gate 		c = redelim;
3997c478bd9Sstevel@tonic-gate 		redelim = 0;
4007c478bd9Sstevel@tonic-gate 		catterm = 0;
4017c478bd9Sstevel@tonic-gate 		savetoken = c;
4027c478bd9Sstevel@tonic-gate 		return (lexlast = lexregexp(c));
4037c478bd9Sstevel@tonic-gate 	} else while ((c = lexgetc()) != WEOF) {
4047c478bd9Sstevel@tonic-gate 		if (iswalpha(c) || c=='_') {
4057c478bd9Sstevel@tonic-gate 			c = lexid(c);
4067c478bd9Sstevel@tonic-gate 		} else if (iswdigit(c) || c=='.') {
4077c478bd9Sstevel@tonic-gate 			c = lexnumber(c);
4087c478bd9Sstevel@tonic-gate 		} else if (isWblank(c)) {
4097c478bd9Sstevel@tonic-gate 			continue;
4107c478bd9Sstevel@tonic-gate 		} else switch (c) {
4117c478bd9Sstevel@tonic-gate #if DOS || OS2
4127c478bd9Sstevel@tonic-gate 		case 032:		/* ^Z */
4137c478bd9Sstevel@tonic-gate 			continue;
4147c478bd9Sstevel@tonic-gate #endif
4157c478bd9Sstevel@tonic-gate 
4167c478bd9Sstevel@tonic-gate 		case '"':
4177c478bd9Sstevel@tonic-gate 			c = lexstring(c);
4187c478bd9Sstevel@tonic-gate 			break;
4197c478bd9Sstevel@tonic-gate 
4207c478bd9Sstevel@tonic-gate 		case '#':
4217c478bd9Sstevel@tonic-gate 			while ((c = lexgetc())!='\n' && c!=WEOF)
4227c478bd9Sstevel@tonic-gate 				;
4237c478bd9Sstevel@tonic-gate 			lexungetc(c);
4247c478bd9Sstevel@tonic-gate 			continue;
4257c478bd9Sstevel@tonic-gate 
4267c478bd9Sstevel@tonic-gate 		case '+':
4277c478bd9Sstevel@tonic-gate 			if ((c1 = lexgetc()) == '+')
4287c478bd9Sstevel@tonic-gate 				c = INC;
4297c478bd9Sstevel@tonic-gate 			else if (c1 == '=')
4307c478bd9Sstevel@tonic-gate 				c = AADD;
4317c478bd9Sstevel@tonic-gate 			else
4327c478bd9Sstevel@tonic-gate 				lexungetc(c1);
4337c478bd9Sstevel@tonic-gate 			break;
4347c478bd9Sstevel@tonic-gate 
4357c478bd9Sstevel@tonic-gate 		case '-':
4367c478bd9Sstevel@tonic-gate 			if ((c1 = lexgetc()) == '-')
4377c478bd9Sstevel@tonic-gate 				c = DEC;
4387c478bd9Sstevel@tonic-gate 			else if (c1 == '=')
4397c478bd9Sstevel@tonic-gate 				c = ASUB;
4407c478bd9Sstevel@tonic-gate 			else
4417c478bd9Sstevel@tonic-gate 				lexungetc(c1);
4427c478bd9Sstevel@tonic-gate 			break;
4437c478bd9Sstevel@tonic-gate 
4447c478bd9Sstevel@tonic-gate 		case '*':
4457c478bd9Sstevel@tonic-gate 			if ((c1 = lexgetc()) == '=')
4467c478bd9Sstevel@tonic-gate 				c = AMUL;
4477c478bd9Sstevel@tonic-gate 			else if (c1 == '*') {
4487c478bd9Sstevel@tonic-gate 				if ((c1 = lexgetc()) == '=')
4497c478bd9Sstevel@tonic-gate 					c = AEXP;
4507c478bd9Sstevel@tonic-gate 				else {
4517c478bd9Sstevel@tonic-gate 					c = EXP;
4527c478bd9Sstevel@tonic-gate 					lexungetc(c1);
4537c478bd9Sstevel@tonic-gate 				}
4547c478bd9Sstevel@tonic-gate 			} else
4557c478bd9Sstevel@tonic-gate 				lexungetc(c1);
4567c478bd9Sstevel@tonic-gate 			break;
4577c478bd9Sstevel@tonic-gate 
4587c478bd9Sstevel@tonic-gate 		case '^':
4597c478bd9Sstevel@tonic-gate 			if ((c1 = lexgetc()) == '=') {
4607c478bd9Sstevel@tonic-gate 				c = AEXP;
4617c478bd9Sstevel@tonic-gate 			} else {
4627c478bd9Sstevel@tonic-gate 				c = EXP;
4637c478bd9Sstevel@tonic-gate 				lexungetc(c1);
4647c478bd9Sstevel@tonic-gate 			}
4657c478bd9Sstevel@tonic-gate 			break;
4667c478bd9Sstevel@tonic-gate 
4677c478bd9Sstevel@tonic-gate 		case '/':
4687c478bd9Sstevel@tonic-gate 			if ((c1 = lexgetc()) == '='
4697c478bd9Sstevel@tonic-gate 			 && lexlast!=RE && lexlast!=NRE
4707c478bd9Sstevel@tonic-gate 			 && lexlast!=';' && lexlast!='\n'
4717c478bd9Sstevel@tonic-gate 			 && lexlast!=',' && lexlast!='(')
4727c478bd9Sstevel@tonic-gate 				c = ADIV;
4737c478bd9Sstevel@tonic-gate 			else
4747c478bd9Sstevel@tonic-gate 				lexungetc(c1);
4757c478bd9Sstevel@tonic-gate 			break;
4767c478bd9Sstevel@tonic-gate 
4777c478bd9Sstevel@tonic-gate 		case '%':
4787c478bd9Sstevel@tonic-gate 			if ((c1 = lexgetc()) == '=')
4797c478bd9Sstevel@tonic-gate 				c = AREM;
4807c478bd9Sstevel@tonic-gate 			else
4817c478bd9Sstevel@tonic-gate 				lexungetc(c1);
4827c478bd9Sstevel@tonic-gate 			break;
4837c478bd9Sstevel@tonic-gate 
4847c478bd9Sstevel@tonic-gate 		case '&':
4857c478bd9Sstevel@tonic-gate 			if ((c1 = lexgetc()) == '&')
4867c478bd9Sstevel@tonic-gate 				c = AND;
4877c478bd9Sstevel@tonic-gate 			else
4887c478bd9Sstevel@tonic-gate 				lexungetc(c1);
4897c478bd9Sstevel@tonic-gate 			break;
4907c478bd9Sstevel@tonic-gate 
4917c478bd9Sstevel@tonic-gate 		case '|':
4927c478bd9Sstevel@tonic-gate 			if ((c1 = lexgetc()) == '|')
4937c478bd9Sstevel@tonic-gate 				c = OR;
4947c478bd9Sstevel@tonic-gate 			else {
4957c478bd9Sstevel@tonic-gate 				lexungetc(c1);
4967c478bd9Sstevel@tonic-gate 				if (inprint)
4977c478bd9Sstevel@tonic-gate 					c = PIPE;
4987c478bd9Sstevel@tonic-gate 			}
4997c478bd9Sstevel@tonic-gate 			break;
5007c478bd9Sstevel@tonic-gate 
5017c478bd9Sstevel@tonic-gate 		case '>':
5027c478bd9Sstevel@tonic-gate 			if ((c1 = lexgetc()) == '=')
5037c478bd9Sstevel@tonic-gate 				c = GE;
5047c478bd9Sstevel@tonic-gate 			else if (c1 == '>')
5057c478bd9Sstevel@tonic-gate 				c = APPEND;
5067c478bd9Sstevel@tonic-gate 			else {
5077c478bd9Sstevel@tonic-gate 				lexungetc(c1);
5087c478bd9Sstevel@tonic-gate 				if (nparen==0 && inprint)
5097c478bd9Sstevel@tonic-gate 					c = WRITE;
5107c478bd9Sstevel@tonic-gate 			}
5117c478bd9Sstevel@tonic-gate 			break;
5127c478bd9Sstevel@tonic-gate 
5137c478bd9Sstevel@tonic-gate 		case '<':
5147c478bd9Sstevel@tonic-gate 			if ((c1 = lexgetc()) == '=')
5157c478bd9Sstevel@tonic-gate 				c = LE;
5167c478bd9Sstevel@tonic-gate 			else
5177c478bd9Sstevel@tonic-gate 				lexungetc(c1);
5187c478bd9Sstevel@tonic-gate 			break;
5197c478bd9Sstevel@tonic-gate 
5207c478bd9Sstevel@tonic-gate 		case '!':
5217c478bd9Sstevel@tonic-gate 			if ((c1 = lexgetc()) == '=')
5227c478bd9Sstevel@tonic-gate 				c = NE;
5237c478bd9Sstevel@tonic-gate 			else if (c1 == '~')
5247c478bd9Sstevel@tonic-gate 				c = NRE;
5257c478bd9Sstevel@tonic-gate 			else
5267c478bd9Sstevel@tonic-gate 				lexungetc(c1);
5277c478bd9Sstevel@tonic-gate 			break;
5287c478bd9Sstevel@tonic-gate 
5297c478bd9Sstevel@tonic-gate 		case '=':
5307c478bd9Sstevel@tonic-gate 			if ((c1 = lexgetc()) == '=')
5317c478bd9Sstevel@tonic-gate 				c = EQ;
5327c478bd9Sstevel@tonic-gate 			else {
5337c478bd9Sstevel@tonic-gate 				lexungetc(c1);
5347c478bd9Sstevel@tonic-gate 				c = ASG;
5357c478bd9Sstevel@tonic-gate 			}
5367c478bd9Sstevel@tonic-gate 			break;
5377c478bd9Sstevel@tonic-gate 
5387c478bd9Sstevel@tonic-gate 		case '\n':
5397c478bd9Sstevel@tonic-gate 			switch (lexlast) {
5407c478bd9Sstevel@tonic-gate 			case ')':
5417c478bd9Sstevel@tonic-gate 				if (catterm || inprint) {
5427c478bd9Sstevel@tonic-gate 					c = ';';
5437c478bd9Sstevel@tonic-gate 					break;
5447c478bd9Sstevel@tonic-gate 				}
5457c478bd9Sstevel@tonic-gate 			case AND:
5467c478bd9Sstevel@tonic-gate 			case OR:
5477c478bd9Sstevel@tonic-gate 			case COMMA:
5487c478bd9Sstevel@tonic-gate 			case '{':
5497c478bd9Sstevel@tonic-gate 			case ELSE:
5507c478bd9Sstevel@tonic-gate 			case ';':
5517c478bd9Sstevel@tonic-gate 			case DO:
5527c478bd9Sstevel@tonic-gate 				continue;
5537c478bd9Sstevel@tonic-gate 
5547c478bd9Sstevel@tonic-gate 			case '}':
5557c478bd9Sstevel@tonic-gate 				if (nbrace != 0)
5567c478bd9Sstevel@tonic-gate 					continue;
5577c478bd9Sstevel@tonic-gate 
5587c478bd9Sstevel@tonic-gate 			default:
5597c478bd9Sstevel@tonic-gate 				c = ';';
5607c478bd9Sstevel@tonic-gate 				break;
5617c478bd9Sstevel@tonic-gate 			}
5627c478bd9Sstevel@tonic-gate 			break;
5637c478bd9Sstevel@tonic-gate 
5647c478bd9Sstevel@tonic-gate 		case ELSE:
5657c478bd9Sstevel@tonic-gate 			if (lexlast != ';') {
5667c478bd9Sstevel@tonic-gate 				savetoken = ELSE;
5677c478bd9Sstevel@tonic-gate 				c = ';';
5687c478bd9Sstevel@tonic-gate 			}
5697c478bd9Sstevel@tonic-gate 			break;
5707c478bd9Sstevel@tonic-gate 
5717c478bd9Sstevel@tonic-gate 		case '(':
5727c478bd9Sstevel@tonic-gate 			++nparen;
5737c478bd9Sstevel@tonic-gate 			break;
5747c478bd9Sstevel@tonic-gate 
5757c478bd9Sstevel@tonic-gate 		case ')':
5767c478bd9Sstevel@tonic-gate 			if (--nparen < 0)
5777c478bd9Sstevel@tonic-gate 				awkerr(unbal, "()");
5787c478bd9Sstevel@tonic-gate 			break;
5797c478bd9Sstevel@tonic-gate 
5807c478bd9Sstevel@tonic-gate 		case '{':
5817c478bd9Sstevel@tonic-gate 			nbrace++;
5827c478bd9Sstevel@tonic-gate 			break;
5837c478bd9Sstevel@tonic-gate 
5847c478bd9Sstevel@tonic-gate 		case '}':
5857c478bd9Sstevel@tonic-gate 			if (--nbrace < 0) {
5867c478bd9Sstevel@tonic-gate 				char brk[3];
5877c478bd9Sstevel@tonic-gate 
5887c478bd9Sstevel@tonic-gate 				brk[0] = '{';
5897c478bd9Sstevel@tonic-gate 				brk[1] = '}';
5907c478bd9Sstevel@tonic-gate 				brk[2] = '\0';
5917c478bd9Sstevel@tonic-gate 				awkerr(unbal, brk);
5927c478bd9Sstevel@tonic-gate 			}
5937c478bd9Sstevel@tonic-gate 			if (lexlast != ';') {
5947c478bd9Sstevel@tonic-gate 				savetoken = c;
5957c478bd9Sstevel@tonic-gate 				c = ';';
5967c478bd9Sstevel@tonic-gate 			}
5977c478bd9Sstevel@tonic-gate 			break;
5987c478bd9Sstevel@tonic-gate 
5997c478bd9Sstevel@tonic-gate 		case '[':
6007c478bd9Sstevel@tonic-gate 			++nbracket;
6017c478bd9Sstevel@tonic-gate 			break;
6027c478bd9Sstevel@tonic-gate 
6037c478bd9Sstevel@tonic-gate 		case ']':
6047c478bd9Sstevel@tonic-gate 			if (--nbracket < 0) {
6057c478bd9Sstevel@tonic-gate 				char brk[3];
6067c478bd9Sstevel@tonic-gate 
6077c478bd9Sstevel@tonic-gate 				brk[0] = '[';
6087c478bd9Sstevel@tonic-gate 				brk[1] = ']';
6097c478bd9Sstevel@tonic-gate 				brk[2] = '\0';
6107c478bd9Sstevel@tonic-gate 				awkerr(unbal, brk);
6117c478bd9Sstevel@tonic-gate 			}
6127c478bd9Sstevel@tonic-gate 			break;
6137c478bd9Sstevel@tonic-gate 
6147c478bd9Sstevel@tonic-gate 		case '\\':
6157c478bd9Sstevel@tonic-gate 			if ((c1 = lexgetc()) == '\n')
6167c478bd9Sstevel@tonic-gate 				continue;
6177c478bd9Sstevel@tonic-gate 			lexungetc(c1);
6187c478bd9Sstevel@tonic-gate 			break;
6197c478bd9Sstevel@tonic-gate 
6207c478bd9Sstevel@tonic-gate 		case ',':
6217c478bd9Sstevel@tonic-gate 			c = COMMA;
6227c478bd9Sstevel@tonic-gate 			break;
6237c478bd9Sstevel@tonic-gate 
6247c478bd9Sstevel@tonic-gate 		case '?':
6257c478bd9Sstevel@tonic-gate 			c = QUEST;
6267c478bd9Sstevel@tonic-gate 			break;
6277c478bd9Sstevel@tonic-gate 
6287c478bd9Sstevel@tonic-gate 		case ':':
6297c478bd9Sstevel@tonic-gate 			c = COLON;
6307c478bd9Sstevel@tonic-gate 			break;
6317c478bd9Sstevel@tonic-gate 
6327c478bd9Sstevel@tonic-gate 		default:
6337c478bd9Sstevel@tonic-gate 			if (!iswprint(c))
6347c478bd9Sstevel@tonic-gate 				awkerr(
6357c478bd9Sstevel@tonic-gate 				   gettext("invalid character \"%s\""),
6367c478bd9Sstevel@tonic-gate 				   toprint(c));
6377c478bd9Sstevel@tonic-gate 			break;
6387c478bd9Sstevel@tonic-gate 		}
6397c478bd9Sstevel@tonic-gate 		break;
6407c478bd9Sstevel@tonic-gate 	}
6417c478bd9Sstevel@tonic-gate 
6427c478bd9Sstevel@tonic-gate 	switch (c) {
6437c478bd9Sstevel@tonic-gate 	case ']':
6447c478bd9Sstevel@tonic-gate 		++catterm;
6457c478bd9Sstevel@tonic-gate 		break;
6467c478bd9Sstevel@tonic-gate 
6477c478bd9Sstevel@tonic-gate 	case VAR:
6487c478bd9Sstevel@tonic-gate 		if (catterm) {
6497c478bd9Sstevel@tonic-gate 			savetoken = c;
6507c478bd9Sstevel@tonic-gate 			c = CONCAT;
6517c478bd9Sstevel@tonic-gate 			catterm = 0;
6527c478bd9Sstevel@tonic-gate 		} else if (!isfuncdef) {
6537c478bd9Sstevel@tonic-gate 			if ((c1=lexgetc()) != '(')
6547c478bd9Sstevel@tonic-gate 				++catterm;
6557c478bd9Sstevel@tonic-gate 			lexungetc(c1);
6567c478bd9Sstevel@tonic-gate 		}
6577c478bd9Sstevel@tonic-gate 		isfuncdef = 0;
6587c478bd9Sstevel@tonic-gate 		break;
6597c478bd9Sstevel@tonic-gate 
6607c478bd9Sstevel@tonic-gate 	case PARM:
6617c478bd9Sstevel@tonic-gate 	case CONSTANT:
6627c478bd9Sstevel@tonic-gate 		if (catterm) {
6637c478bd9Sstevel@tonic-gate 			savetoken = c;
6647c478bd9Sstevel@tonic-gate 			c = CONCAT;
6657c478bd9Sstevel@tonic-gate 			catterm = 0;
6667c478bd9Sstevel@tonic-gate 		} else {
6677c478bd9Sstevel@tonic-gate 			if (lexlast == '$')
6687c478bd9Sstevel@tonic-gate 				wasfield = 2;
6697c478bd9Sstevel@tonic-gate 			++catterm;
6707c478bd9Sstevel@tonic-gate 		}
6717c478bd9Sstevel@tonic-gate 		break;
6727c478bd9Sstevel@tonic-gate 
6737c478bd9Sstevel@tonic-gate 	case INC:
6747c478bd9Sstevel@tonic-gate 	case DEC:
6757c478bd9Sstevel@tonic-gate 		if (!catterm || lexlast!=CONSTANT || wasfield)
6767c478bd9Sstevel@tonic-gate 			break;
6777c478bd9Sstevel@tonic-gate 
6787c478bd9Sstevel@tonic-gate 	case UFUNC:
6797c478bd9Sstevel@tonic-gate 	case FUNC:
6807c478bd9Sstevel@tonic-gate 	case GETLINE:
6817c478bd9Sstevel@tonic-gate 	case '!':
6827c478bd9Sstevel@tonic-gate 	case '$':
6837c478bd9Sstevel@tonic-gate 	case '(':
6847c478bd9Sstevel@tonic-gate 		if (catterm) {
6857c478bd9Sstevel@tonic-gate 			savetoken = c;
6867c478bd9Sstevel@tonic-gate 			c = CONCAT;
6877c478bd9Sstevel@tonic-gate 			catterm = 0;
6887c478bd9Sstevel@tonic-gate 		}
6897c478bd9Sstevel@tonic-gate 		break;
6907c478bd9Sstevel@tonic-gate 
6917c478bd9Sstevel@tonic-gate 	/*{*/case '}':
6927c478bd9Sstevel@tonic-gate 		if (nbrace == 0)
6937c478bd9Sstevel@tonic-gate 			savetoken = ';';
6947c478bd9Sstevel@tonic-gate 	case ';':
6957c478bd9Sstevel@tonic-gate 		inprint = 0;
6967c478bd9Sstevel@tonic-gate 	default:
6977c478bd9Sstevel@tonic-gate 		if (c == DEFFUNC)
6987c478bd9Sstevel@tonic-gate 			isfuncdef = 1;
6997c478bd9Sstevel@tonic-gate 		catterm = 0;
7007c478bd9Sstevel@tonic-gate 	}
7017c478bd9Sstevel@tonic-gate 	lexlast = c;
7027c478bd9Sstevel@tonic-gate 	if (wasfield)
7037c478bd9Sstevel@tonic-gate 		wasfield--;
7047c478bd9Sstevel@tonic-gate 	/*
7057c478bd9Sstevel@tonic-gate 	 * Map character constants to symbolic names.
7067c478bd9Sstevel@tonic-gate 	 */
7077c478bd9Sstevel@tonic-gate 	for (i = 0; ctosym[i].c != 0; i++)
7087c478bd9Sstevel@tonic-gate 		if (c == ctosym[i].c) {
7097c478bd9Sstevel@tonic-gate 			c = ctosym[i].sym;
7107c478bd9Sstevel@tonic-gate 			break;
7117c478bd9Sstevel@tonic-gate 		}
7127c478bd9Sstevel@tonic-gate 	return ((int)c);
7137c478bd9Sstevel@tonic-gate }
7147c478bd9Sstevel@tonic-gate 
7157c478bd9Sstevel@tonic-gate /*
7167c478bd9Sstevel@tonic-gate  * Read a number for the lexical analyzer.
7177c478bd9Sstevel@tonic-gate  * Input is the first character of the number.
7187c478bd9Sstevel@tonic-gate  * Return value is the lexical type.
7197c478bd9Sstevel@tonic-gate  */
7207c478bd9Sstevel@tonic-gate static int
7217c478bd9Sstevel@tonic-gate lexnumber(wint_t c)
7227c478bd9Sstevel@tonic-gate {
7237c478bd9Sstevel@tonic-gate 	register wchar_t *cp;
7247c478bd9Sstevel@tonic-gate 	register int dotfound = 0;
7257c478bd9Sstevel@tonic-gate 	register int efound = 0;
7267c478bd9Sstevel@tonic-gate 	INT number;
7277c478bd9Sstevel@tonic-gate 
7287c478bd9Sstevel@tonic-gate 	cp = linebuf;
7297c478bd9Sstevel@tonic-gate 	do {
7307c478bd9Sstevel@tonic-gate 		if (iswdigit(c))
7317c478bd9Sstevel@tonic-gate 			;
7327c478bd9Sstevel@tonic-gate 		else if (c == '.') {
7337c478bd9Sstevel@tonic-gate 			if (dotfound++)
7347c478bd9Sstevel@tonic-gate 				break;
7357c478bd9Sstevel@tonic-gate 		} else if (c=='e' || c=='E') {
7367c478bd9Sstevel@tonic-gate 			if ((c = lexgetc())!='-'  &&  c!='+') {
7377c478bd9Sstevel@tonic-gate 				lexungetc(c);
7387c478bd9Sstevel@tonic-gate 				c = 'e';
7397c478bd9Sstevel@tonic-gate 			} else
7407c478bd9Sstevel@tonic-gate 				*cp++ = 'e';
7417c478bd9Sstevel@tonic-gate 			if (efound++)
7427c478bd9Sstevel@tonic-gate 				break;
7437c478bd9Sstevel@tonic-gate 		} else
7447c478bd9Sstevel@tonic-gate 			break;
7457c478bd9Sstevel@tonic-gate 		*cp++ = c;
7467c478bd9Sstevel@tonic-gate 	} while ((c = lexgetc()) != WEOF);
7477c478bd9Sstevel@tonic-gate 	*cp = '\0';
7487c478bd9Sstevel@tonic-gate 	if (dotfound && cp==linebuf+1)
7497c478bd9Sstevel@tonic-gate 		return (DOT);
7507c478bd9Sstevel@tonic-gate 	lexungetc(c);
7517c478bd9Sstevel@tonic-gate 	errno = 0;
7527c478bd9Sstevel@tonic-gate 	if (!dotfound
7537c478bd9Sstevel@tonic-gate 	 && !efound
7547c478bd9Sstevel@tonic-gate 	 && ((number=wcstol(linebuf, (wchar_t **)0, 10)), errno!=ERANGE))
7557c478bd9Sstevel@tonic-gate 		yylval.node = intnode(number);
7567c478bd9Sstevel@tonic-gate 	else
7577c478bd9Sstevel@tonic-gate 		yylval.node = realnode((REAL)wcstod(linebuf, (wchar_t **)0));
7587c478bd9Sstevel@tonic-gate 	return (CONSTANT);
7597c478bd9Sstevel@tonic-gate }
7607c478bd9Sstevel@tonic-gate 
7617c478bd9Sstevel@tonic-gate /*
7627c478bd9Sstevel@tonic-gate  * Read an identifier.
7637c478bd9Sstevel@tonic-gate  * Input is first character of identifier.
7647c478bd9Sstevel@tonic-gate  * Return VAR.
7657c478bd9Sstevel@tonic-gate  */
7667c478bd9Sstevel@tonic-gate static int
7677c478bd9Sstevel@tonic-gate lexid(wint_t c)
7687c478bd9Sstevel@tonic-gate {
7697c478bd9Sstevel@tonic-gate 	register wchar_t *cp;
7707c478bd9Sstevel@tonic-gate 	register size_t i;
7717c478bd9Sstevel@tonic-gate 	register NODE *np;
7727c478bd9Sstevel@tonic-gate 
7737c478bd9Sstevel@tonic-gate 	cp = linebuf;
7747c478bd9Sstevel@tonic-gate 	do {
7757c478bd9Sstevel@tonic-gate 		*cp++ = c;
7767c478bd9Sstevel@tonic-gate 		c = lexgetc();
7777c478bd9Sstevel@tonic-gate 	} while (iswalpha(c) || iswdigit(c) || c=='_');
7787c478bd9Sstevel@tonic-gate 	*cp = '\0';
7797c478bd9Sstevel@tonic-gate 	lexungetc(c);
7807c478bd9Sstevel@tonic-gate 	yylval.node = np = vlook(linebuf);
7817c478bd9Sstevel@tonic-gate 
7827c478bd9Sstevel@tonic-gate 	switch(np->n_type) {
7837c478bd9Sstevel@tonic-gate 	case KEYWORD:
7847c478bd9Sstevel@tonic-gate 		switch (np->n_keywtype) {
7857c478bd9Sstevel@tonic-gate 		case PRINT:
7867c478bd9Sstevel@tonic-gate 		case PRINTF:
7877c478bd9Sstevel@tonic-gate 			++inprint;
7887c478bd9Sstevel@tonic-gate 		default:
7897c478bd9Sstevel@tonic-gate 			return ((int)np->n_keywtype);
7907c478bd9Sstevel@tonic-gate 		}
7917c478bd9Sstevel@tonic-gate 		/* NOTREACHED */
7927c478bd9Sstevel@tonic-gate 
7937c478bd9Sstevel@tonic-gate 	case ARRAY:
7947c478bd9Sstevel@tonic-gate 	case VAR:
7957c478bd9Sstevel@tonic-gate 		/*
7967c478bd9Sstevel@tonic-gate 		 * If reading the argument list, create a dummy node
7977c478bd9Sstevel@tonic-gate 		 * for the duration of that function. These variables
7987c478bd9Sstevel@tonic-gate 		 * can be removed from the symbol table at function end
7997c478bd9Sstevel@tonic-gate 		 * but they must still exist because the execution tree
8007c478bd9Sstevel@tonic-gate 		 * knows about them.
8017c478bd9Sstevel@tonic-gate 		 */
8027c478bd9Sstevel@tonic-gate 		if (funparm) {
8037c478bd9Sstevel@tonic-gate do_funparm:
8047c478bd9Sstevel@tonic-gate 			np = emptynode(PARM, i=(cp-linebuf));
8057c478bd9Sstevel@tonic-gate 			np->n_flags = FSTRING;
8067c478bd9Sstevel@tonic-gate 			np->n_string = _null;
8077c478bd9Sstevel@tonic-gate 			np->n_strlen = 0;
8087c478bd9Sstevel@tonic-gate 			(void) memcpy(np->n_name, linebuf,
8097c478bd9Sstevel@tonic-gate 				(i+1) * sizeof(wchar_t));
8107c478bd9Sstevel@tonic-gate 			addsymtab(np);
8117c478bd9Sstevel@tonic-gate 			yylval.node = np;
8127c478bd9Sstevel@tonic-gate 		} else if (np == varNF || (np == varFS &&
8137c478bd9Sstevel@tonic-gate 			(!doing_begin || begin_getline))) {
8147c478bd9Sstevel@tonic-gate 			/*
8157c478bd9Sstevel@tonic-gate 			 * If the user program references NF or sets
8167c478bd9Sstevel@tonic-gate 			 * FS either outside of a begin block or
8177c478bd9Sstevel@tonic-gate 			 * in a begin block after a getline then the
8187c478bd9Sstevel@tonic-gate 			 * input line will be split immediately upon read
8197c478bd9Sstevel@tonic-gate 			 * rather than when a field is first referenced.
8207c478bd9Sstevel@tonic-gate 			 */
8217c478bd9Sstevel@tonic-gate 			needsplit = 1;
8227c478bd9Sstevel@tonic-gate 		} else if (np == varENVIRON)
8237c478bd9Sstevel@tonic-gate 			needenviron = 1;
8247c478bd9Sstevel@tonic-gate 	case PARM:
8257c478bd9Sstevel@tonic-gate 		return (VAR);
8267c478bd9Sstevel@tonic-gate 
8277c478bd9Sstevel@tonic-gate 	case UFUNC:
8287c478bd9Sstevel@tonic-gate 		/*
8297c478bd9Sstevel@tonic-gate 		 * It is ok to redefine functions as parameters
8307c478bd9Sstevel@tonic-gate 		 */
8317c478bd9Sstevel@tonic-gate 		if (funparm) goto do_funparm;
8327c478bd9Sstevel@tonic-gate 	case FUNC:
8337c478bd9Sstevel@tonic-gate 	case GETLINE:
8347c478bd9Sstevel@tonic-gate 		/*
8357c478bd9Sstevel@tonic-gate 		 * When a getline is encountered, clear the 'doing_begin' flag.
8367c478bd9Sstevel@tonic-gate 		 * This will force the 'needsplit' flag to be set, even inside
8377c478bd9Sstevel@tonic-gate 		 * a begin block, if FS is altered. (See VAR case above)
8387c478bd9Sstevel@tonic-gate 		 */
8397c478bd9Sstevel@tonic-gate 		if (doing_begin)
8407c478bd9Sstevel@tonic-gate 			begin_getline = 1;
8417c478bd9Sstevel@tonic-gate 		return (np->n_type);
8427c478bd9Sstevel@tonic-gate 	}
8437c478bd9Sstevel@tonic-gate 	/* NOTREACHED */
844*88f3d729Sakaplan 	return (0);
8457c478bd9Sstevel@tonic-gate }
8467c478bd9Sstevel@tonic-gate 
8477c478bd9Sstevel@tonic-gate /*
8487c478bd9Sstevel@tonic-gate  * Read a string for the lexical analyzer.
8497c478bd9Sstevel@tonic-gate  * `endc' terminates the string.
8507c478bd9Sstevel@tonic-gate  */
8517c478bd9Sstevel@tonic-gate static int
8527c478bd9Sstevel@tonic-gate lexstring(wint_t endc)
8537c478bd9Sstevel@tonic-gate {
8547c478bd9Sstevel@tonic-gate 	register size_t length = lexescape(endc, 0, 0);
8557c478bd9Sstevel@tonic-gate 
8567c478bd9Sstevel@tonic-gate 	yylval.node = stringnode(linebuf, FALLOC, length);
8577c478bd9Sstevel@tonic-gate 	return (CONSTANT);
8587c478bd9Sstevel@tonic-gate }
8597c478bd9Sstevel@tonic-gate 
8607c478bd9Sstevel@tonic-gate /*
8617c478bd9Sstevel@tonic-gate  * Read a regular expression.
8627c478bd9Sstevel@tonic-gate  */
8637c478bd9Sstevel@tonic-gate static int
8647c478bd9Sstevel@tonic-gate lexregexp(wint_t endc)
8657c478bd9Sstevel@tonic-gate {
8667c478bd9Sstevel@tonic-gate 	(void) lexescape(endc, 1, 0);
8677c478bd9Sstevel@tonic-gate 	yylval.node = renode(linebuf);
8687c478bd9Sstevel@tonic-gate 	return (URE);
8697c478bd9Sstevel@tonic-gate }
8707c478bd9Sstevel@tonic-gate 
8717c478bd9Sstevel@tonic-gate /*
8727c478bd9Sstevel@tonic-gate  * Process a string, converting the escape characters as required by
8737c478bd9Sstevel@tonic-gate  * 1003.2. The processed string ends up in the global linebuf[]. This
8747c478bd9Sstevel@tonic-gate  * routine also changes the value of 'progfd' - the program file
8757c478bd9Sstevel@tonic-gate  * descriptor, so it should be used with some care. It is presently used to
8767c478bd9Sstevel@tonic-gate  * process -v (awk1.c) and var=str type arguments (awk2.c, nextrecord()).
8777c478bd9Sstevel@tonic-gate  */
8787c478bd9Sstevel@tonic-gate void
8797c478bd9Sstevel@tonic-gate strescape(wchar_t *str)
8807c478bd9Sstevel@tonic-gate {
8817c478bd9Sstevel@tonic-gate 	progptr = str;
8827c478bd9Sstevel@tonic-gate 	proglen = wcslen(str) + 1;	/* Include \0 */
8837c478bd9Sstevel@tonic-gate 	(void) lexescape('\0', 0, 1);
8847c478bd9Sstevel@tonic-gate 	progptr = NULL;
8857c478bd9Sstevel@tonic-gate }
8867c478bd9Sstevel@tonic-gate 
8877c478bd9Sstevel@tonic-gate /*
8887c478bd9Sstevel@tonic-gate  * Read a string or regular expression, terminated by ``endc'',
8897c478bd9Sstevel@tonic-gate  * for lexical analyzer, processing escape sequences.
8907c478bd9Sstevel@tonic-gate  * Return string length.
8917c478bd9Sstevel@tonic-gate  */
8927c478bd9Sstevel@tonic-gate static size_t
8937c478bd9Sstevel@tonic-gate lexescape(wint_t endc, int regx, int cmd_line_operand)
8947c478bd9Sstevel@tonic-gate {
8957c478bd9Sstevel@tonic-gate 	static char nlre[256];
8967c478bd9Sstevel@tonic-gate 	static char nlstr[256];
8977c478bd9Sstevel@tonic-gate 	static char eofre[256];
8987c478bd9Sstevel@tonic-gate 	static char eofstr[256];
8997c478bd9Sstevel@tonic-gate 	int first_time = 1;
9007c478bd9Sstevel@tonic-gate 	wint_t c;
9017c478bd9Sstevel@tonic-gate 	wchar_t *cp;
9027c478bd9Sstevel@tonic-gate 	int n, max;
9037c478bd9Sstevel@tonic-gate 
9047c478bd9Sstevel@tonic-gate 	if (first_time == 1) {
9057c478bd9Sstevel@tonic-gate 		(void) strcpy(nlre, gettext("Newline in regular expression\n"));
9067c478bd9Sstevel@tonic-gate 		(void) strcpy(nlstr, gettext("Newline in string\n"));
9077c478bd9Sstevel@tonic-gate 		(void) strcpy(eofre, gettext("EOF in regular expression\n"));
9087c478bd9Sstevel@tonic-gate 		(void) strcpy(eofstr, gettext("EOF in string\n"));
9097c478bd9Sstevel@tonic-gate 		first_time = 0;
9107c478bd9Sstevel@tonic-gate         }
9117c478bd9Sstevel@tonic-gate 
9127c478bd9Sstevel@tonic-gate 	cp = linebuf;
9137c478bd9Sstevel@tonic-gate 	while ((c = lexgetc()) != endc) {
9147c478bd9Sstevel@tonic-gate 		if (c == '\n')
9157c478bd9Sstevel@tonic-gate 			awkerr(regx ? nlre : nlstr);
9167c478bd9Sstevel@tonic-gate 		if (c == '\\') {
9177c478bd9Sstevel@tonic-gate 			switch (c = lexgetc(), c) {
9187c478bd9Sstevel@tonic-gate 			case '\\':
9197c478bd9Sstevel@tonic-gate 				if (regx)
9207c478bd9Sstevel@tonic-gate 					*cp++ = '\\';
9217c478bd9Sstevel@tonic-gate 				break;
9227c478bd9Sstevel@tonic-gate 
9237c478bd9Sstevel@tonic-gate 			case '/':
9247c478bd9Sstevel@tonic-gate 				c = '/';
9257c478bd9Sstevel@tonic-gate 				break;
9267c478bd9Sstevel@tonic-gate 
9277c478bd9Sstevel@tonic-gate 			case 'n':
9287c478bd9Sstevel@tonic-gate 				c = '\n';
9297c478bd9Sstevel@tonic-gate 				break;
9307c478bd9Sstevel@tonic-gate 
9317c478bd9Sstevel@tonic-gate 			case 'b':
9327c478bd9Sstevel@tonic-gate 				c = '\b';
9337c478bd9Sstevel@tonic-gate 				break;
9347c478bd9Sstevel@tonic-gate 
9357c478bd9Sstevel@tonic-gate 			case 't':
9367c478bd9Sstevel@tonic-gate 				c = '\t';
9377c478bd9Sstevel@tonic-gate 				break;
9387c478bd9Sstevel@tonic-gate 
9397c478bd9Sstevel@tonic-gate 			case 'r':
9407c478bd9Sstevel@tonic-gate 				c = '\r';
9417c478bd9Sstevel@tonic-gate 				break;
9427c478bd9Sstevel@tonic-gate 
9437c478bd9Sstevel@tonic-gate 			case 'f':
9447c478bd9Sstevel@tonic-gate 				c = '\f';
9457c478bd9Sstevel@tonic-gate 				break;
9467c478bd9Sstevel@tonic-gate 
9477c478bd9Sstevel@tonic-gate 			case 'v':
9487c478bd9Sstevel@tonic-gate 				c = '\v';
9497c478bd9Sstevel@tonic-gate 				break;
9507c478bd9Sstevel@tonic-gate 
9517c478bd9Sstevel@tonic-gate 			case 'a':
9527c478bd9Sstevel@tonic-gate 				c = (char) 0x07;
9537c478bd9Sstevel@tonic-gate 				break;
9547c478bd9Sstevel@tonic-gate 
9557c478bd9Sstevel@tonic-gate 			case 'x':
9567c478bd9Sstevel@tonic-gate 				n = 0;
9577c478bd9Sstevel@tonic-gate 				while (iswxdigit(c = lexgetc())) {
9587c478bd9Sstevel@tonic-gate 					if (iswdigit(c))
9597c478bd9Sstevel@tonic-gate 						c -= '0';
9607c478bd9Sstevel@tonic-gate 					else if (iswupper(c))
9617c478bd9Sstevel@tonic-gate 						c -= 'A'-10;
9627c478bd9Sstevel@tonic-gate 					else
9637c478bd9Sstevel@tonic-gate 						c -= 'a'-10;
9647c478bd9Sstevel@tonic-gate 					n = (n<<4) + c;
9657c478bd9Sstevel@tonic-gate 				}
9667c478bd9Sstevel@tonic-gate 				lexungetc(c);
9677c478bd9Sstevel@tonic-gate 				c = n;
9687c478bd9Sstevel@tonic-gate 				break;
9697c478bd9Sstevel@tonic-gate 
9707c478bd9Sstevel@tonic-gate 			case '0':
9717c478bd9Sstevel@tonic-gate 			case '1':
9727c478bd9Sstevel@tonic-gate 			case '2':
9737c478bd9Sstevel@tonic-gate 			case '3':
9747c478bd9Sstevel@tonic-gate 			case '4':
9757c478bd9Sstevel@tonic-gate 			case '5':
9767c478bd9Sstevel@tonic-gate 			case '6':
9777c478bd9Sstevel@tonic-gate 			case '7':
9787c478bd9Sstevel@tonic-gate #if 0
9797c478bd9Sstevel@tonic-gate /*
9807c478bd9Sstevel@tonic-gate  * Posix.2 draft 10 disallows the use of back-referencing - it explicitly
9817c478bd9Sstevel@tonic-gate  * requires processing of the octal escapes both in strings and
9827c478bd9Sstevel@tonic-gate  * regular expressions. The following code is disabled instead of
9837c478bd9Sstevel@tonic-gate  * removed as back-referencing may be reintroduced in a future draft
9847c478bd9Sstevel@tonic-gate  * of the standard.
9857c478bd9Sstevel@tonic-gate  */
9867c478bd9Sstevel@tonic-gate 				/*
9877c478bd9Sstevel@tonic-gate 				 * For regular expressions, we disallow
9887c478bd9Sstevel@tonic-gate 				 * \ooo to mean octal character, in favour
9897c478bd9Sstevel@tonic-gate 				 * of back referencing.
9907c478bd9Sstevel@tonic-gate 				 */
9917c478bd9Sstevel@tonic-gate 				if (regx) {
9927c478bd9Sstevel@tonic-gate 					*cp++ = '\\';
9937c478bd9Sstevel@tonic-gate 					break;
9947c478bd9Sstevel@tonic-gate 				}
9957c478bd9Sstevel@tonic-gate #endif
9967c478bd9Sstevel@tonic-gate 				max = 3;
9977c478bd9Sstevel@tonic-gate 				n = 0;
9987c478bd9Sstevel@tonic-gate 				do {
9997c478bd9Sstevel@tonic-gate 					n = (n<<3) + c-'0';
10007c478bd9Sstevel@tonic-gate 					if ((c = lexgetc())>'7' || c<'0')
10017c478bd9Sstevel@tonic-gate 						break;
10027c478bd9Sstevel@tonic-gate 				} while (--max);
10037c478bd9Sstevel@tonic-gate 				lexungetc(c);
10047c478bd9Sstevel@tonic-gate 				/*
10057c478bd9Sstevel@tonic-gate 				 * an octal escape sequence must have at least
10067c478bd9Sstevel@tonic-gate 				 * 2 digits after the backslash, otherwise
10077c478bd9Sstevel@tonic-gate 				 * it gets passed straight thru for possible
10087c478bd9Sstevel@tonic-gate 				 * use in backreferencing.
10097c478bd9Sstevel@tonic-gate 				 */
10107c478bd9Sstevel@tonic-gate 				if (max == 3) {
10117c478bd9Sstevel@tonic-gate 					*cp++ = '\\';
10127c478bd9Sstevel@tonic-gate 					n += '0';
10137c478bd9Sstevel@tonic-gate 				}
10147c478bd9Sstevel@tonic-gate 				c = n;
10157c478bd9Sstevel@tonic-gate 				break;
10167c478bd9Sstevel@tonic-gate 
10177c478bd9Sstevel@tonic-gate 			case '\n':
10187c478bd9Sstevel@tonic-gate 				continue;
10197c478bd9Sstevel@tonic-gate 
10207c478bd9Sstevel@tonic-gate 			default:
10217c478bd9Sstevel@tonic-gate 				if (c != endc || cmd_line_operand) {
10227c478bd9Sstevel@tonic-gate 					*cp++ = '\\';
10237c478bd9Sstevel@tonic-gate 					if (c == endc)
10247c478bd9Sstevel@tonic-gate 						lexungetc(c);
10257c478bd9Sstevel@tonic-gate 				}
10267c478bd9Sstevel@tonic-gate 			}
10277c478bd9Sstevel@tonic-gate 		}
10287c478bd9Sstevel@tonic-gate 		if (c == WEOF)
10297c478bd9Sstevel@tonic-gate 			awkerr(regx ? eofre : eofstr);
10307c478bd9Sstevel@tonic-gate 		*cp++ = c;
10317c478bd9Sstevel@tonic-gate 	}
10327c478bd9Sstevel@tonic-gate 	*cp = '\0';
10337c478bd9Sstevel@tonic-gate 	return (cp - linebuf);
10347c478bd9Sstevel@tonic-gate }
10357c478bd9Sstevel@tonic-gate 
10367c478bd9Sstevel@tonic-gate /*
10377c478bd9Sstevel@tonic-gate  * Build a regular expression NODE.
10387c478bd9Sstevel@tonic-gate  * Argument is the string holding the expression.
10397c478bd9Sstevel@tonic-gate  */
10407c478bd9Sstevel@tonic-gate NODE *
10417c478bd9Sstevel@tonic-gate renode(wchar_t *s)
10427c478bd9Sstevel@tonic-gate {
10437c478bd9Sstevel@tonic-gate 	register NODE *np;
10447c478bd9Sstevel@tonic-gate 	int n;
10457c478bd9Sstevel@tonic-gate 
10467c478bd9Sstevel@tonic-gate 	np = emptynode(RE, 0);
10477c478bd9Sstevel@tonic-gate 	np->n_left = np->n_right = NNULL;
10487c478bd9Sstevel@tonic-gate 	np->n_regexp = (REGEXP)emalloc(sizeof(regex_t));
10497c478bd9Sstevel@tonic-gate 	if ((n = REGWCOMP(np->n_regexp, s, REG_EXTENDED)) != REG_OK) {
10507c478bd9Sstevel@tonic-gate 		int m;
10517c478bd9Sstevel@tonic-gate 		char *p;
10527c478bd9Sstevel@tonic-gate 
10537c478bd9Sstevel@tonic-gate 		m = regerror(n, np->n_regexp, NULL, 0);
10547c478bd9Sstevel@tonic-gate 		p = (char *)emalloc(m);
10557c478bd9Sstevel@tonic-gate 		regerror(n, np->n_regexp, p, m);
10567c478bd9Sstevel@tonic-gate 		awkerr("/%S/: %s", s, p);
10577c478bd9Sstevel@tonic-gate 	}
10587c478bd9Sstevel@tonic-gate 	return (np);
10597c478bd9Sstevel@tonic-gate }
10607c478bd9Sstevel@tonic-gate /*
10617c478bd9Sstevel@tonic-gate  * Get a character for the lexical analyser routine.
10627c478bd9Sstevel@tonic-gate  */
10637c478bd9Sstevel@tonic-gate static wint_t
10647c478bd9Sstevel@tonic-gate lexgetc()
10657c478bd9Sstevel@tonic-gate {
10667c478bd9Sstevel@tonic-gate 	register wint_t c;
10677c478bd9Sstevel@tonic-gate 	static char **files = &progfiles[0];
10687c478bd9Sstevel@tonic-gate 
10697c478bd9Sstevel@tonic-gate 	if (progfp!=FNULL && (c = fgetwc(progfp))!=WEOF)
10707c478bd9Sstevel@tonic-gate 		;
10717c478bd9Sstevel@tonic-gate 	else {
10727c478bd9Sstevel@tonic-gate 		if (progptr != NULL) {
10737c478bd9Sstevel@tonic-gate 			if (proglen-- <= 0)
10747c478bd9Sstevel@tonic-gate 				c = WEOF;
10757c478bd9Sstevel@tonic-gate 			else
10767c478bd9Sstevel@tonic-gate 				c = *progptr++;
10777c478bd9Sstevel@tonic-gate 		} else {
10787c478bd9Sstevel@tonic-gate 			if (progfp != FNULL)
10797c478bd9Sstevel@tonic-gate 				if (progfp != stdin)
10807c478bd9Sstevel@tonic-gate 					(void)fclose(progfp);
10817c478bd9Sstevel@tonic-gate 				else
10827c478bd9Sstevel@tonic-gate 					clearerr(progfp);
10837c478bd9Sstevel@tonic-gate 				progfp = FNULL;
10847c478bd9Sstevel@tonic-gate 			if (files < progfilep) {
10857c478bd9Sstevel@tonic-gate 				filename = *files++;
10867c478bd9Sstevel@tonic-gate 				lineno = 1;
10877c478bd9Sstevel@tonic-gate 				if (filename[0]=='-' && filename[1]=='\0')
10887c478bd9Sstevel@tonic-gate 					progfp = stdin;
10897c478bd9Sstevel@tonic-gate 				else if ((progfp=fopen(filename, r)) == FNULL) {
10907c478bd9Sstevel@tonic-gate 					(void) fprintf(stderr,
10917c478bd9Sstevel@tonic-gate 				gettext("script file \"%s\""), filename);
10927c478bd9Sstevel@tonic-gate 					exit(1);
10937c478bd9Sstevel@tonic-gate 				}
10947c478bd9Sstevel@tonic-gate 				c = fgetwc(progfp);
10957c478bd9Sstevel@tonic-gate 			}
10967c478bd9Sstevel@tonic-gate 		}
10977c478bd9Sstevel@tonic-gate 	}
10987c478bd9Sstevel@tonic-gate 	if (c == '\n')
10997c478bd9Sstevel@tonic-gate 		++lineno;
11007c478bd9Sstevel@tonic-gate 	if (conptr >= &context[NCONTEXT])
11017c478bd9Sstevel@tonic-gate 		conptr = &context[0];
11027c478bd9Sstevel@tonic-gate 	if (c != WEOF)
11037c478bd9Sstevel@tonic-gate 		*conptr++ = c;
11047c478bd9Sstevel@tonic-gate 	return (c);
11057c478bd9Sstevel@tonic-gate }
11067c478bd9Sstevel@tonic-gate 
11077c478bd9Sstevel@tonic-gate /*
11087c478bd9Sstevel@tonic-gate  * Return a character for lexical analyser.
11097c478bd9Sstevel@tonic-gate  * Only one returned character is (not enforced) legitimite.
11107c478bd9Sstevel@tonic-gate  */
11117c478bd9Sstevel@tonic-gate static void
11127c478bd9Sstevel@tonic-gate lexungetc(wint_t c)
11137c478bd9Sstevel@tonic-gate {
11147c478bd9Sstevel@tonic-gate 	if (c == '\n')
11157c478bd9Sstevel@tonic-gate 		--lineno;
11167c478bd9Sstevel@tonic-gate 	if (c != WEOF) {
11177c478bd9Sstevel@tonic-gate 		if (conptr == &context[0])
11187c478bd9Sstevel@tonic-gate 			conptr = &context[NCONTEXT];
11197c478bd9Sstevel@tonic-gate 		*--conptr = '\0';
11207c478bd9Sstevel@tonic-gate 	}
11217c478bd9Sstevel@tonic-gate 	if (progfp != FNULL) {
11227c478bd9Sstevel@tonic-gate 		(void)ungetwc(c, progfp);
11237c478bd9Sstevel@tonic-gate 		return;
11247c478bd9Sstevel@tonic-gate 	}
11257c478bd9Sstevel@tonic-gate 	if (c == WEOF)
11267c478bd9Sstevel@tonic-gate 		return;
11277c478bd9Sstevel@tonic-gate 	*--progptr = c;
11287c478bd9Sstevel@tonic-gate 	proglen++;
11297c478bd9Sstevel@tonic-gate }
11307c478bd9Sstevel@tonic-gate 
11317c478bd9Sstevel@tonic-gate /*
11327c478bd9Sstevel@tonic-gate  * Syntax errors during parsing.
11337c478bd9Sstevel@tonic-gate  */
11347c478bd9Sstevel@tonic-gate void
11357c478bd9Sstevel@tonic-gate yyerror(char *s, ...)
11367c478bd9Sstevel@tonic-gate {
11377c478bd9Sstevel@tonic-gate 	if (lexlast==FUNC || lexlast==GETLINE || lexlast==KEYWORD)
11387c478bd9Sstevel@tonic-gate 		if (lexlast == KEYWORD)
11397c478bd9Sstevel@tonic-gate 			awkerr(gettext("inadmissible use of reserved keyword"));
11407c478bd9Sstevel@tonic-gate 		else
11417c478bd9Sstevel@tonic-gate 			awkerr(gettext("attempt to redefine builtin function"));
11427c478bd9Sstevel@tonic-gate 	awkerr(s);
11437c478bd9Sstevel@tonic-gate }
11447c478bd9Sstevel@tonic-gate 
11457c478bd9Sstevel@tonic-gate /*
11467c478bd9Sstevel@tonic-gate  * Error routine for all awk errors.
11477c478bd9Sstevel@tonic-gate  */
11487c478bd9Sstevel@tonic-gate /* ARGSUSED */
11497c478bd9Sstevel@tonic-gate void
11507c478bd9Sstevel@tonic-gate awkerr(char *fmt, ...)
11517c478bd9Sstevel@tonic-gate {
11527c478bd9Sstevel@tonic-gate 	va_list args;
11537c478bd9Sstevel@tonic-gate 
11547c478bd9Sstevel@tonic-gate 	va_start(args, fmt);
11557c478bd9Sstevel@tonic-gate 	awkierr(0, fmt, args);
11567c478bd9Sstevel@tonic-gate 	va_end(args);
11577c478bd9Sstevel@tonic-gate }
11587c478bd9Sstevel@tonic-gate 
11597c478bd9Sstevel@tonic-gate /*
11607c478bd9Sstevel@tonic-gate  * Error routine like "awkerr" except that it prints out
11617c478bd9Sstevel@tonic-gate  * a message that includes an errno-specific indication.
11627c478bd9Sstevel@tonic-gate  */
11637c478bd9Sstevel@tonic-gate /* ARGSUSED */
11647c478bd9Sstevel@tonic-gate void
11657c478bd9Sstevel@tonic-gate awkperr(char *fmt, ...)
11667c478bd9Sstevel@tonic-gate {
11677c478bd9Sstevel@tonic-gate 	va_list args;
11687c478bd9Sstevel@tonic-gate 
11697c478bd9Sstevel@tonic-gate 	va_start(args, fmt);
11707c478bd9Sstevel@tonic-gate 	awkierr(1, fmt, args);
11717c478bd9Sstevel@tonic-gate 	va_end(args);
11727c478bd9Sstevel@tonic-gate }
11737c478bd9Sstevel@tonic-gate 
11747c478bd9Sstevel@tonic-gate /*
11757c478bd9Sstevel@tonic-gate  * Common internal routine for awkerr, awkperr
11767c478bd9Sstevel@tonic-gate  */
11777c478bd9Sstevel@tonic-gate static void
11787c478bd9Sstevel@tonic-gate awkierr(int perr, char *fmt, va_list ap)
11797c478bd9Sstevel@tonic-gate {
11807c478bd9Sstevel@tonic-gate 	static char sep1[] = "\n>>>\t";
11817c478bd9Sstevel@tonic-gate 	static char sep2[] = "\t<<<";
11827c478bd9Sstevel@tonic-gate 	int saveerr = errno;
11837c478bd9Sstevel@tonic-gate 
11847c478bd9Sstevel@tonic-gate 	(void) fprintf(stderr, "%s: ", _cmdname);
11857c478bd9Sstevel@tonic-gate 	if (running) {
11867c478bd9Sstevel@tonic-gate 		(void) fprintf(stderr, gettext("line %u ("),
11877c478bd9Sstevel@tonic-gate 		    curnode==NNULL ? 0 : curnode->n_lineno);
11887c478bd9Sstevel@tonic-gate 		if (phase == 0)
11897c478bd9Sstevel@tonic-gate 		      (void) fprintf(stderr, "NR=%lld): ", (INT)exprint(varNR));
11907c478bd9Sstevel@tonic-gate 		else
11917c478bd9Sstevel@tonic-gate 		      (void) fprintf(stderr, "%s): ",
11927c478bd9Sstevel@tonic-gate 			    phase==BEGIN ? s_BEGIN : s_END);
11937c478bd9Sstevel@tonic-gate 	} else if (lineno != 0) {
11947c478bd9Sstevel@tonic-gate 		(void) fprintf(stderr, gettext("file \"%s\": "), filename);
11957c478bd9Sstevel@tonic-gate 		(void) fprintf(stderr, gettext("line %u: "), lineno);
11967c478bd9Sstevel@tonic-gate 	}
11977c478bd9Sstevel@tonic-gate 	(void) vfprintf(stderr, gettext(fmt), ap);
11987c478bd9Sstevel@tonic-gate 	if (perr == 1)
11997c478bd9Sstevel@tonic-gate 		(void) fprintf(stderr, ": %s", strerror(saveerr));
12007c478bd9Sstevel@tonic-gate 	if (perr != 2 && !running) {
12017c478bd9Sstevel@tonic-gate 		register wchar_t *cp;
12027c478bd9Sstevel@tonic-gate 		register int n;
12037c478bd9Sstevel@tonic-gate 		register int c;
12047c478bd9Sstevel@tonic-gate 
12057c478bd9Sstevel@tonic-gate 		(void) fprintf(stderr, gettext("  Context is:%s"), sep1);
12067c478bd9Sstevel@tonic-gate 		cp = conptr;
12077c478bd9Sstevel@tonic-gate 		n = NCONTEXT;
12087c478bd9Sstevel@tonic-gate 		do {
12097c478bd9Sstevel@tonic-gate 			if (cp >= &context[NCONTEXT])
12107c478bd9Sstevel@tonic-gate 				cp = &context[0];
12117c478bd9Sstevel@tonic-gate 			if ((c = *cp++) != '\0')
12127c478bd9Sstevel@tonic-gate 				(void)fputs(c=='\n' ? sep1 : toprint(c),
12137c478bd9Sstevel@tonic-gate 					stderr);
12147c478bd9Sstevel@tonic-gate 		} while (--n != 0);
12157c478bd9Sstevel@tonic-gate 		(void)fputs(sep2, stderr);
12167c478bd9Sstevel@tonic-gate 	}
12177c478bd9Sstevel@tonic-gate 	(void) fprintf(stderr, "\n");
12187c478bd9Sstevel@tonic-gate 	exit(1);
12197c478bd9Sstevel@tonic-gate }
12207c478bd9Sstevel@tonic-gate 
12217c478bd9Sstevel@tonic-gate wchar_t *
12227c478bd9Sstevel@tonic-gate emalloc(unsigned n)
12237c478bd9Sstevel@tonic-gate {
12247c478bd9Sstevel@tonic-gate 	wchar_t *cp;
12257c478bd9Sstevel@tonic-gate 
12267c478bd9Sstevel@tonic-gate 	if ((cp = malloc(n)) == NULL)
12277c478bd9Sstevel@tonic-gate 		awkerr(nomem);
12287c478bd9Sstevel@tonic-gate 	return cp;
12297c478bd9Sstevel@tonic-gate }
12307c478bd9Sstevel@tonic-gate 
12317c478bd9Sstevel@tonic-gate wchar_t *
12327c478bd9Sstevel@tonic-gate erealloc(wchar_t *p, unsigned n)
12337c478bd9Sstevel@tonic-gate {
12347c478bd9Sstevel@tonic-gate 	wchar_t *cp;
12357c478bd9Sstevel@tonic-gate 
12367c478bd9Sstevel@tonic-gate 	if ((cp = realloc(p, n)) == NULL)
12377c478bd9Sstevel@tonic-gate 		awkerr(nomem);
12387c478bd9Sstevel@tonic-gate 	return cp;
12397c478bd9Sstevel@tonic-gate }
12407c478bd9Sstevel@tonic-gate 
12417c478bd9Sstevel@tonic-gate 
12427c478bd9Sstevel@tonic-gate /*
12437c478bd9Sstevel@tonic-gate  * usage message for awk
12447c478bd9Sstevel@tonic-gate  */
12457c478bd9Sstevel@tonic-gate static int
12467c478bd9Sstevel@tonic-gate usage()
12477c478bd9Sstevel@tonic-gate {
12487c478bd9Sstevel@tonic-gate 	(void) fprintf(stderr, gettext(
12497c478bd9Sstevel@tonic-gate "Usage:	awk [-F ERE] [-v var=val] 'program' [var=val ...] [file ...]\n"
12507c478bd9Sstevel@tonic-gate "	awk [-F ERE] -f progfile ... [-v var=val] [var=val ...] [file ...]\n"));
12517c478bd9Sstevel@tonic-gate 	return (2);
12527c478bd9Sstevel@tonic-gate }
12537c478bd9Sstevel@tonic-gate 
12547c478bd9Sstevel@tonic-gate 
12557c478bd9Sstevel@tonic-gate static wchar_t *
12567c478bd9Sstevel@tonic-gate mbconvert(char *str)
12577c478bd9Sstevel@tonic-gate {
12587c478bd9Sstevel@tonic-gate 	static wchar_t *op = 0;
12597c478bd9Sstevel@tonic-gate 
12607c478bd9Sstevel@tonic-gate 	if (op != 0)
12617c478bd9Sstevel@tonic-gate 		free(op);
12627c478bd9Sstevel@tonic-gate 	return (op = mbstowcsdup(str));
12637c478bd9Sstevel@tonic-gate }
12647c478bd9Sstevel@tonic-gate 
12657c478bd9Sstevel@tonic-gate char *
12667c478bd9Sstevel@tonic-gate mbunconvert(wchar_t *str)
12677c478bd9Sstevel@tonic-gate {
12687c478bd9Sstevel@tonic-gate 	static char *op = 0;
12697c478bd9Sstevel@tonic-gate 
12707c478bd9Sstevel@tonic-gate 	if (op != 0)
12717c478bd9Sstevel@tonic-gate 		free(op);
12727c478bd9Sstevel@tonic-gate 	return (op = wcstombsdup(str));
12737c478bd9Sstevel@tonic-gate }
12747c478bd9Sstevel@tonic-gate 
12757c478bd9Sstevel@tonic-gate /*
12767c478bd9Sstevel@tonic-gate  * Solaris port - following functions are typical MKS functions written
12777c478bd9Sstevel@tonic-gate  * to work for Solaris.
12787c478bd9Sstevel@tonic-gate  */
12797c478bd9Sstevel@tonic-gate 
12807c478bd9Sstevel@tonic-gate wchar_t *
12817c478bd9Sstevel@tonic-gate mbstowcsdup(s)
12827c478bd9Sstevel@tonic-gate char *s;
12837c478bd9Sstevel@tonic-gate {
12847c478bd9Sstevel@tonic-gate         int n;
12857c478bd9Sstevel@tonic-gate         wchar_t *w;
12867c478bd9Sstevel@tonic-gate 
12877c478bd9Sstevel@tonic-gate         n = strlen(s) + 1;
12887c478bd9Sstevel@tonic-gate         if ((w = (wchar_t *)malloc(n * sizeof (wchar_t))) == NULL)
12897c478bd9Sstevel@tonic-gate                 return (NULL);
12907c478bd9Sstevel@tonic-gate 
12917c478bd9Sstevel@tonic-gate         if (mbstowcs(w, s, n) == -1)
12927c478bd9Sstevel@tonic-gate                 return (NULL);
12937c478bd9Sstevel@tonic-gate         return (w);
12947c478bd9Sstevel@tonic-gate 
12957c478bd9Sstevel@tonic-gate }
12967c478bd9Sstevel@tonic-gate 
12977c478bd9Sstevel@tonic-gate char *
12987c478bd9Sstevel@tonic-gate wcstombsdup(wchar_t *w)
12997c478bd9Sstevel@tonic-gate {
13007c478bd9Sstevel@tonic-gate         int n;
13017c478bd9Sstevel@tonic-gate         char *mb;
13027c478bd9Sstevel@tonic-gate 
13037c478bd9Sstevel@tonic-gate         /* Fetch memory for worst case string length */
13047c478bd9Sstevel@tonic-gate         n = wslen(w) + 1;
13057c478bd9Sstevel@tonic-gate         n *= MB_CUR_MAX;
13067c478bd9Sstevel@tonic-gate         if ((mb = (char *)malloc(n)) == NULL) {
13077c478bd9Sstevel@tonic-gate                 return (NULL);
13087c478bd9Sstevel@tonic-gate         }
13097c478bd9Sstevel@tonic-gate 
13107c478bd9Sstevel@tonic-gate         /* Convert the string */
13117c478bd9Sstevel@tonic-gate         if ((n = wcstombs(mb, w, n)) == -1) {
13127c478bd9Sstevel@tonic-gate                 int saverr = errno;
13137c478bd9Sstevel@tonic-gate 
13147c478bd9Sstevel@tonic-gate                 free(mb);
13157c478bd9Sstevel@tonic-gate                 errno = saverr;
13167c478bd9Sstevel@tonic-gate                 return (0);
13177c478bd9Sstevel@tonic-gate         }
13187c478bd9Sstevel@tonic-gate 
13197c478bd9Sstevel@tonic-gate         /* Shrink the string down */
13207c478bd9Sstevel@tonic-gate         if ((mb = (char *)realloc(mb, strlen(mb)+1)) == NULL)  {
13217c478bd9Sstevel@tonic-gate                 return (NULL);
13227c478bd9Sstevel@tonic-gate         }
13237c478bd9Sstevel@tonic-gate         return (mb);
13247c478bd9Sstevel@tonic-gate }
13257c478bd9Sstevel@tonic-gate 
13267c478bd9Sstevel@tonic-gate /*
13277c478bd9Sstevel@tonic-gate  * The upe_ctrls[] table contains the printable 'control-sequences' for the
13287c478bd9Sstevel@tonic-gate  * character values 0..31 and 127.  The first entry is for value 127, thus the
13297c478bd9Sstevel@tonic-gate  * entries for the remaining character values are from 1..32.
13307c478bd9Sstevel@tonic-gate  */
13317c478bd9Sstevel@tonic-gate static const char *const upe_ctrls[] =
13327c478bd9Sstevel@tonic-gate {
13337c478bd9Sstevel@tonic-gate         "^?",
13347c478bd9Sstevel@tonic-gate         "^@",  "^A",  "^B",  "^C",  "^D",  "^E",  "^F",  "^G",
13357c478bd9Sstevel@tonic-gate         "^H",  "^I",  "^J",  "^K",  "^L",  "^M",  "^N",  "^O",
13367c478bd9Sstevel@tonic-gate         "^P",  "^Q",  "^R",  "^S",  "^T",  "^U",  "^V",  "^W",
13377c478bd9Sstevel@tonic-gate         "^X",  "^Y",  "^Z",  "^[",  "^\\", "^]",  "^^",  "^_"
13387c478bd9Sstevel@tonic-gate };
13397c478bd9Sstevel@tonic-gate 
13407c478bd9Sstevel@tonic-gate 
13417c478bd9Sstevel@tonic-gate /*
13427c478bd9Sstevel@tonic-gate  * Return a printable string corresponding to the given character value.  If
13437c478bd9Sstevel@tonic-gate  * the character is printable, simply return it as the string.  If it is in
13447c478bd9Sstevel@tonic-gate  * the range specified by table 5-101 in the UPE, return the corresponding
13457c478bd9Sstevel@tonic-gate  * string.  Otherwise, return an octal escape sequence.
13467c478bd9Sstevel@tonic-gate  */
13477c478bd9Sstevel@tonic-gate static const char *
13487c478bd9Sstevel@tonic-gate toprint(c)
13497c478bd9Sstevel@tonic-gate wchar_t c;
13507c478bd9Sstevel@tonic-gate {
13517c478bd9Sstevel@tonic-gate         int n, len;
13527c478bd9Sstevel@tonic-gate         unsigned char *ptr;
13537c478bd9Sstevel@tonic-gate         static char mbch[MB_LEN_MAX+1];
13547c478bd9Sstevel@tonic-gate         static char buf[5 * MB_LEN_MAX + 1];
13557c478bd9Sstevel@tonic-gate 
13567c478bd9Sstevel@tonic-gate         if ((n = wctomb(mbch, c)) == -1) {
13577c478bd9Sstevel@tonic-gate                 /* Should never happen */
13587c478bd9Sstevel@tonic-gate                 (void) sprintf(buf, "\\%x", c);
13597c478bd9Sstevel@tonic-gate                 return (buf);
13607c478bd9Sstevel@tonic-gate         }
13617c478bd9Sstevel@tonic-gate         mbch[n] = '\0';
13627c478bd9Sstevel@tonic-gate         if (iswprint(c)) {
13637c478bd9Sstevel@tonic-gate                 return (mbch);
13647c478bd9Sstevel@tonic-gate         } else if (c == 127) {
13657c478bd9Sstevel@tonic-gate                 return (upe_ctrls[0]);
13667c478bd9Sstevel@tonic-gate         } else if (c < 32) {
13677c478bd9Sstevel@tonic-gate                 /* Print as in Table 5-101 in the UPE */
13687c478bd9Sstevel@tonic-gate                 return (upe_ctrls[c+1]);
13697c478bd9Sstevel@tonic-gate         } else {
13707c478bd9Sstevel@tonic-gate                 /* Print as an octal escape sequence */
13717c478bd9Sstevel@tonic-gate                 for (len = 0, ptr = (unsigned char *) mbch; 0 < n; --n, ++ptr)
13727c478bd9Sstevel@tonic-gate                         len += sprintf(buf+len, "\\%03o", *ptr);
13737c478bd9Sstevel@tonic-gate         }
13747c478bd9Sstevel@tonic-gate         return (buf);
13757c478bd9Sstevel@tonic-gate }
13767c478bd9Sstevel@tonic-gate 
13777c478bd9Sstevel@tonic-gate static int
13787c478bd9Sstevel@tonic-gate wcoff(const wchar_t *astring, const int off)
13797c478bd9Sstevel@tonic-gate {
13807c478bd9Sstevel@tonic-gate 	const wchar_t *s = astring;
13817c478bd9Sstevel@tonic-gate 	int c = 0;
13827c478bd9Sstevel@tonic-gate 	char mb[MB_LEN_MAX];
13837c478bd9Sstevel@tonic-gate 
13847c478bd9Sstevel@tonic-gate 	while (c < off) {
13857c478bd9Sstevel@tonic-gate 		int n;
13867c478bd9Sstevel@tonic-gate 		if ((n = wctomb(mb, *s)) == 0)
13877c478bd9Sstevel@tonic-gate 			break;
13887c478bd9Sstevel@tonic-gate 		if (n == -1)
13897c478bd9Sstevel@tonic-gate 			n = 1;
13907c478bd9Sstevel@tonic-gate 		c += n;
13917c478bd9Sstevel@tonic-gate 		s++;
13927c478bd9Sstevel@tonic-gate 	}
13937c478bd9Sstevel@tonic-gate 
13947c478bd9Sstevel@tonic-gate 	return (s - astring);
13957c478bd9Sstevel@tonic-gate }
13967c478bd9Sstevel@tonic-gate 
13977c478bd9Sstevel@tonic-gate int
13987c478bd9Sstevel@tonic-gate int_regwcomp(register regex_t *r, const wchar_t *pattern, int uflags)
13997c478bd9Sstevel@tonic-gate {
14007c478bd9Sstevel@tonic-gate 	char *mbpattern;
14017c478bd9Sstevel@tonic-gate 	int ret;
14027c478bd9Sstevel@tonic-gate 
14037c478bd9Sstevel@tonic-gate 	if ((mbpattern = wcstombsdup((wchar_t *) pattern)) == NULL)
14047c478bd9Sstevel@tonic-gate 		return (REG_ESPACE);
14057c478bd9Sstevel@tonic-gate 
14067c478bd9Sstevel@tonic-gate 	ret = regcomp(r, mbpattern, uflags);
14077c478bd9Sstevel@tonic-gate 
14087c478bd9Sstevel@tonic-gate 	free(mbpattern);
14097c478bd9Sstevel@tonic-gate 
14107c478bd9Sstevel@tonic-gate 	return (ret);
14117c478bd9Sstevel@tonic-gate }
14127c478bd9Sstevel@tonic-gate 
14137c478bd9Sstevel@tonic-gate int
14147c478bd9Sstevel@tonic-gate int_regwexec(const regex_t *r,	/* compiled RE */
14157c478bd9Sstevel@tonic-gate 	const wchar_t *astring,	/* subject string */
14167c478bd9Sstevel@tonic-gate 	size_t nsub,		/* number of subexpressions */
14177c478bd9Sstevel@tonic-gate 	int_regwmatch_t *sub,	/* subexpression pointers */
14187c478bd9Sstevel@tonic-gate 	int flags)
14197c478bd9Sstevel@tonic-gate {
14207c478bd9Sstevel@tonic-gate 	char *mbs;
14217c478bd9Sstevel@tonic-gate 	regmatch_t *mbsub = NULL;
14227c478bd9Sstevel@tonic-gate 	register int i;
14237c478bd9Sstevel@tonic-gate 
14247c478bd9Sstevel@tonic-gate 	if ((mbs = wcstombsdup((wchar_t *) astring)) == NULL)
14257c478bd9Sstevel@tonic-gate 		return (REG_ESPACE);
14267c478bd9Sstevel@tonic-gate 
14277c478bd9Sstevel@tonic-gate 	if (nsub > 0 && sub) {
14287c478bd9Sstevel@tonic-gate 		if ((mbsub = malloc(nsub * sizeof (regmatch_t))) == NULL)
14297c478bd9Sstevel@tonic-gate 			return (REG_ESPACE);
14307c478bd9Sstevel@tonic-gate 	}
14317c478bd9Sstevel@tonic-gate 
14327c478bd9Sstevel@tonic-gate 	i = regexec(r, mbs, nsub, mbsub, flags);
14337c478bd9Sstevel@tonic-gate 
14347c478bd9Sstevel@tonic-gate 	/* Now, adjust the pointers/counts in sub */
14357c478bd9Sstevel@tonic-gate 	if (i == REG_OK && nsub > 0 && mbsub) {
14367c478bd9Sstevel@tonic-gate 		register int j, k;
14377c478bd9Sstevel@tonic-gate 
14387c478bd9Sstevel@tonic-gate 		for (j = 0; j < nsub; j++) {
14397c478bd9Sstevel@tonic-gate 			regmatch_t *ms = &mbsub[j];
14407c478bd9Sstevel@tonic-gate 			int_regwmatch_t *ws = &sub[j];
14417c478bd9Sstevel@tonic-gate 
14427c478bd9Sstevel@tonic-gate 			if ((k = ms->rm_so) >= 0) {
14437c478bd9Sstevel@tonic-gate 				ws->rm_so = wcoff(astring, k);
14447c478bd9Sstevel@tonic-gate 				ws->rm_sp = astring + ws->rm_so;
14457c478bd9Sstevel@tonic-gate 			}
14467c478bd9Sstevel@tonic-gate 			if ((k = ms->rm_eo) >= 0) {
14477c478bd9Sstevel@tonic-gate 				ws->rm_eo = wcoff(astring, k);
14487c478bd9Sstevel@tonic-gate 				ws->rm_ep = astring + ws->rm_eo;
14497c478bd9Sstevel@tonic-gate 			}
14507c478bd9Sstevel@tonic-gate 		}
14517c478bd9Sstevel@tonic-gate 	}
14527c478bd9Sstevel@tonic-gate 
14537c478bd9Sstevel@tonic-gate 	free(mbs);
14547c478bd9Sstevel@tonic-gate 	if (mbsub)
14557c478bd9Sstevel@tonic-gate 		free(mbsub);
14567c478bd9Sstevel@tonic-gate 	return (i);
14577c478bd9Sstevel@tonic-gate }
14587c478bd9Sstevel@tonic-gate 
14597c478bd9Sstevel@tonic-gate int
14607c478bd9Sstevel@tonic-gate int_regwdosuba(register regex_t *rp,	/* compiled RE: Pattern */
14617c478bd9Sstevel@tonic-gate 	const wchar_t *rpl,		/* replacement string: /rpl/ */
14627c478bd9Sstevel@tonic-gate 	const wchar_t *src,		/* source string */
14637c478bd9Sstevel@tonic-gate 	wchar_t **dstp,			/* destination string */
14647c478bd9Sstevel@tonic-gate 	int len,			/* destination length */
14657c478bd9Sstevel@tonic-gate 	int *globp)	/* IN: occurence, 0 for all; OUT: substitutions */
14667c478bd9Sstevel@tonic-gate {
14677c478bd9Sstevel@tonic-gate 	wchar_t *dst, *odst;
14687c478bd9Sstevel@tonic-gate 	register const wchar_t *ip, *xp;
14697c478bd9Sstevel@tonic-gate 	register wchar_t *op;
14707c478bd9Sstevel@tonic-gate 	register int i;
14717c478bd9Sstevel@tonic-gate 	register wchar_t c;
14727c478bd9Sstevel@tonic-gate 	int glob, iglob = *globp, oglob = 0;
14737c478bd9Sstevel@tonic-gate #define	NSUB	10
14747c478bd9Sstevel@tonic-gate 	int_regwmatch_t rm[NSUB], *rmp;
14757c478bd9Sstevel@tonic-gate 	int flags;
14767c478bd9Sstevel@tonic-gate 	wchar_t *end;
14777c478bd9Sstevel@tonic-gate 	int regerr;
14787c478bd9Sstevel@tonic-gate 
14797c478bd9Sstevel@tonic-gate /* handle overflow of dst. we need "i" more bytes */
14807c478bd9Sstevel@tonic-gate #ifdef OVERFLOW
14817c478bd9Sstevel@tonic-gate #undef OVERFLOW
14827c478bd9Sstevel@tonic-gate #define	OVERFLOW(i) if (1) { \
14837c478bd9Sstevel@tonic-gate 		int pos = op - dst; \
14847c478bd9Sstevel@tonic-gate 		dst = (wchar_t *) realloc(odst = dst, \
14857c478bd9Sstevel@tonic-gate 			(len += len + i) * sizeof (wchar_t)); \
14867c478bd9Sstevel@tonic-gate 		if (dst == NULL) \
14877c478bd9Sstevel@tonic-gate 			goto nospace; \
14887c478bd9Sstevel@tonic-gate 		op = dst + pos; \
14897c478bd9Sstevel@tonic-gate 		end = dst + len; \
14907c478bd9Sstevel@tonic-gate 	} else
14917c478bd9Sstevel@tonic-gate #endif
14927c478bd9Sstevel@tonic-gate 
14937c478bd9Sstevel@tonic-gate 	*dstp = dst = (wchar_t *) malloc(len * sizeof (wchar_t));
14947c478bd9Sstevel@tonic-gate 	if (dst == NULL)
14957c478bd9Sstevel@tonic-gate 		return (REG_ESPACE);
14967c478bd9Sstevel@tonic-gate 
14977c478bd9Sstevel@tonic-gate 	if (rp == NULL || rpl == NULL || src == NULL || dst ==  NULL)
14987c478bd9Sstevel@tonic-gate 		return (REG_EFATAL);
14997c478bd9Sstevel@tonic-gate 
15007c478bd9Sstevel@tonic-gate 	glob = 0;	/* match count */
15017c478bd9Sstevel@tonic-gate 	ip = src;	/* source position */
15027c478bd9Sstevel@tonic-gate 	op = dst;	/* destination position */
15037c478bd9Sstevel@tonic-gate 	end = dst + len;
15047c478bd9Sstevel@tonic-gate 
15057c478bd9Sstevel@tonic-gate 	flags = 0;
15067c478bd9Sstevel@tonic-gate 	while ((regerr = int_regwexec(rp, ip, NSUB, rm, flags)) == REG_OK) {
15077c478bd9Sstevel@tonic-gate 		/* Copy text preceding match */
15087c478bd9Sstevel@tonic-gate 		if (op + (i = rm[0].rm_sp - ip) >= end)
15097c478bd9Sstevel@tonic-gate 			OVERFLOW(i);
15107c478bd9Sstevel@tonic-gate 		while (i--)
15117c478bd9Sstevel@tonic-gate 			*op++ = *ip++;
15127c478bd9Sstevel@tonic-gate 
15137c478bd9Sstevel@tonic-gate 		if (iglob == 0 || ++glob == iglob) {
15147c478bd9Sstevel@tonic-gate 			oglob++;
15157c478bd9Sstevel@tonic-gate 			xp = rpl;		/* do substitute */
15167c478bd9Sstevel@tonic-gate 		} else
15177c478bd9Sstevel@tonic-gate 			xp = L"&";		/* preserve text */
15187c478bd9Sstevel@tonic-gate 
15197c478bd9Sstevel@tonic-gate 		/* Perform replacement of matched substing */
15207c478bd9Sstevel@tonic-gate 		while ((c = *xp++) != '\0') {
15217c478bd9Sstevel@tonic-gate 			rmp = NULL;
15227c478bd9Sstevel@tonic-gate 			if (c == '&')
15237c478bd9Sstevel@tonic-gate 				rmp = &rm[0];
15247c478bd9Sstevel@tonic-gate 			else if (c == '\\') {
15257c478bd9Sstevel@tonic-gate 				if ('0' <= *xp && *xp <= '9')
15267c478bd9Sstevel@tonic-gate 					rmp = &rm[*xp++ - '0'];
15277c478bd9Sstevel@tonic-gate 				else if (*xp != '\0')
15287c478bd9Sstevel@tonic-gate 					c = *xp++;
15297c478bd9Sstevel@tonic-gate 			}
15307c478bd9Sstevel@tonic-gate 
15317c478bd9Sstevel@tonic-gate 			if (rmp ==  NULL) {	/* Ordinary character. */
15327c478bd9Sstevel@tonic-gate 				*op++ = c;
15337c478bd9Sstevel@tonic-gate 				if (op >= end)
15347c478bd9Sstevel@tonic-gate 					OVERFLOW(1);
15357c478bd9Sstevel@tonic-gate 			} else if (rmp->rm_sp != NULL && rmp->rm_ep != NULL) {
15367c478bd9Sstevel@tonic-gate 				ip = rmp->rm_sp;
15377c478bd9Sstevel@tonic-gate 				if (op + (i = rmp->rm_ep - rmp->rm_sp) >= end)
15387c478bd9Sstevel@tonic-gate 					OVERFLOW(i);
15397c478bd9Sstevel@tonic-gate 				while (i--)
15407c478bd9Sstevel@tonic-gate 					*op++ = *ip++;
15417c478bd9Sstevel@tonic-gate 			}
15427c478bd9Sstevel@tonic-gate 		}
15437c478bd9Sstevel@tonic-gate 
15447c478bd9Sstevel@tonic-gate 		ip = rm[0].rm_ep;
15457c478bd9Sstevel@tonic-gate 		if (*ip == '\0')	/* If at end break */
15467c478bd9Sstevel@tonic-gate 			break;
15477c478bd9Sstevel@tonic-gate 		else if (rm[0].rm_sp == rm[0].rm_ep) {
15487c478bd9Sstevel@tonic-gate 			/* If empty match copy next char */
15497c478bd9Sstevel@tonic-gate 			*op++ = *ip++;
15507c478bd9Sstevel@tonic-gate 			if (op >= end)
15517c478bd9Sstevel@tonic-gate 				OVERFLOW(1);
15527c478bd9Sstevel@tonic-gate 		}
15537c478bd9Sstevel@tonic-gate 		flags = REG_NOTBOL;
15547c478bd9Sstevel@tonic-gate 	}
15557c478bd9Sstevel@tonic-gate 
15567c478bd9Sstevel@tonic-gate 	if (regerr != REG_OK && regerr != REG_NOMATCH)
15577c478bd9Sstevel@tonic-gate 		return (regerr);
15587c478bd9Sstevel@tonic-gate 
15597c478bd9Sstevel@tonic-gate 	/* Copy rest of text */
15607c478bd9Sstevel@tonic-gate 	if (op + (i =  wcslen(ip)) >= end)
15617c478bd9Sstevel@tonic-gate 		OVERFLOW(i);
15627c478bd9Sstevel@tonic-gate 	while (i--)
15637c478bd9Sstevel@tonic-gate 	    *op++ = *ip++;
15647c478bd9Sstevel@tonic-gate 	*op++ = '\0';
15657c478bd9Sstevel@tonic-gate 
15667c478bd9Sstevel@tonic-gate 	if ((*dstp = dst = (wchar_t *) realloc(odst = dst,
15677c478bd9Sstevel@tonic-gate 			sizeof (wchar_t) * (size_t)(op - dst))) == NULL) {
15687c478bd9Sstevel@tonic-gate nospace:
15697c478bd9Sstevel@tonic-gate 		free(odst);
15707c478bd9Sstevel@tonic-gate 		return (REG_ESPACE);
15717c478bd9Sstevel@tonic-gate 	}
15727c478bd9Sstevel@tonic-gate 
15737c478bd9Sstevel@tonic-gate 	*globp = oglob;
15747c478bd9Sstevel@tonic-gate 
15757c478bd9Sstevel@tonic-gate 	return ((oglob == 0) ? REG_NOMATCH : REG_OK);
15767c478bd9Sstevel@tonic-gate }
1577