xref: /titanic_53/usr/src/cmd/awk_xpg4/awk1.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1*7c478bd9Sstevel@tonic-gate /*
2*7c478bd9Sstevel@tonic-gate  * CDDL HEADER START
3*7c478bd9Sstevel@tonic-gate  *
4*7c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*7c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*7c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*7c478bd9Sstevel@tonic-gate  * with the License.
8*7c478bd9Sstevel@tonic-gate  *
9*7c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*7c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*7c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*7c478bd9Sstevel@tonic-gate  * and limitations under the License.
13*7c478bd9Sstevel@tonic-gate  *
14*7c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*7c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*7c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*7c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*7c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*7c478bd9Sstevel@tonic-gate  *
20*7c478bd9Sstevel@tonic-gate  * CDDL HEADER END
21*7c478bd9Sstevel@tonic-gate  */
22*7c478bd9Sstevel@tonic-gate /*
23*7c478bd9Sstevel@tonic-gate  * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
24*7c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
25*7c478bd9Sstevel@tonic-gate  */
26*7c478bd9Sstevel@tonic-gate 
27*7c478bd9Sstevel@tonic-gate /*
28*7c478bd9Sstevel@tonic-gate  * awk -- mainline, yylex, etc.
29*7c478bd9Sstevel@tonic-gate  *
30*7c478bd9Sstevel@tonic-gate  * Copyright 1986, 1994 by Mortice Kern Systems Inc.  All rights reserved.
31*7c478bd9Sstevel@tonic-gate  *
32*7c478bd9Sstevel@tonic-gate  * Based on MKS awk(1) ported to be /usr/xpg4/bin/awk with POSIX/XCU4 changes
33*7c478bd9Sstevel@tonic-gate  */
34*7c478bd9Sstevel@tonic-gate 
35*7c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
36*7c478bd9Sstevel@tonic-gate 
37*7c478bd9Sstevel@tonic-gate #include "awk.h"
38*7c478bd9Sstevel@tonic-gate #include "y.tab.h"
39*7c478bd9Sstevel@tonic-gate #include <stdarg.h>
40*7c478bd9Sstevel@tonic-gate #include <unistd.h>
41*7c478bd9Sstevel@tonic-gate #include <locale.h>
42*7c478bd9Sstevel@tonic-gate 
43*7c478bd9Sstevel@tonic-gate static char	*progfiles[NPFILE];	/* Programmes files for yylex */
44*7c478bd9Sstevel@tonic-gate static char	**progfilep = &progfiles[0]; /* Pointer to last file */
45*7c478bd9Sstevel@tonic-gate static wchar_t	*progptr;		/* In-memory programme */
46*7c478bd9Sstevel@tonic-gate static int	proglen;		/* Length of progptr */
47*7c478bd9Sstevel@tonic-gate static wchar_t	context[NCONTEXT];	/* Circular buffer of context */
48*7c478bd9Sstevel@tonic-gate static wchar_t	*conptr = &context[0];	/* context ptr */
49*7c478bd9Sstevel@tonic-gate static FILE	*progfp;		/* Stdio stream for programme */
50*7c478bd9Sstevel@tonic-gate static char	*filename;
51*7c478bd9Sstevel@tonic-gate #ifdef	DEBUG
52*7c478bd9Sstevel@tonic-gate static int	dflag;
53*7c478bd9Sstevel@tonic-gate #endif
54*7c478bd9Sstevel@tonic-gate 
55*7c478bd9Sstevel@tonic-gate #define AWK_EXEC_MAGIC	"<MKS AWKC>"
56*7c478bd9Sstevel@tonic-gate #define LEN_EXEC_MAGIC	10
57*7c478bd9Sstevel@tonic-gate 
58*7c478bd9Sstevel@tonic-gate static char	unbal[] = "unbalanced E char";
59*7c478bd9Sstevel@tonic-gate 
60*7c478bd9Sstevel@tonic-gate static void	awkarginit(int c, char **av);
61*7c478bd9Sstevel@tonic-gate static int	lexid(wint_t c);
62*7c478bd9Sstevel@tonic-gate static int	lexnumber(wint_t c);
63*7c478bd9Sstevel@tonic-gate static int	lexstring(wint_t endc);
64*7c478bd9Sstevel@tonic-gate static int	lexregexp(register wint_t endc);
65*7c478bd9Sstevel@tonic-gate 
66*7c478bd9Sstevel@tonic-gate static void	awkvarinit(void);
67*7c478bd9Sstevel@tonic-gate static wint_t	lexgetc(void);
68*7c478bd9Sstevel@tonic-gate static void	lexungetc(wint_t c);
69*7c478bd9Sstevel@tonic-gate static size_t	lexescape(wint_t endc, int regx, int cmd_line_operand);
70*7c478bd9Sstevel@tonic-gate static void	awkierr(int perr, char *fmt, va_list ap);
71*7c478bd9Sstevel@tonic-gate static int	usage(void);
72*7c478bd9Sstevel@tonic-gate void		strescape(wchar_t *str);
73*7c478bd9Sstevel@tonic-gate static const char      *toprint(wint_t);
74*7c478bd9Sstevel@tonic-gate char *_cmdname;
75*7c478bd9Sstevel@tonic-gate static wchar_t *mbconvert(char *str);
76*7c478bd9Sstevel@tonic-gate 
77*7c478bd9Sstevel@tonic-gate 
78*7c478bd9Sstevel@tonic-gate /*
79*7c478bd9Sstevel@tonic-gate  * mainline for awk
80*7c478bd9Sstevel@tonic-gate  */
81*7c478bd9Sstevel@tonic-gate int
82*7c478bd9Sstevel@tonic-gate main(int argc, char *argv[])
83*7c478bd9Sstevel@tonic-gate {
84*7c478bd9Sstevel@tonic-gate 	register wchar_t *ap;
85*7c478bd9Sstevel@tonic-gate 	register char *cmd;
86*7c478bd9Sstevel@tonic-gate 
87*7c478bd9Sstevel@tonic-gate 	cmd = argv[0];
88*7c478bd9Sstevel@tonic-gate 	_cmdname = cmd;
89*7c478bd9Sstevel@tonic-gate 
90*7c478bd9Sstevel@tonic-gate 	linebuf = emalloc(NLINE * sizeof(wchar_t));
91*7c478bd9Sstevel@tonic-gate 
92*7c478bd9Sstevel@tonic-gate 	/*l
93*7c478bd9Sstevel@tonic-gate 	 * At this point only messaging should be internationalized.
94*7c478bd9Sstevel@tonic-gate 	 * numbers are still scanned as in the Posix locale.
95*7c478bd9Sstevel@tonic-gate 	 */
96*7c478bd9Sstevel@tonic-gate 	(void) setlocale(LC_ALL,"");
97*7c478bd9Sstevel@tonic-gate 	(void) setlocale(LC_NUMERIC,"C");
98*7c478bd9Sstevel@tonic-gate #if !defined(TEXT_DOMAIN)
99*7c478bd9Sstevel@tonic-gate #define	TEXT_DOMAIN	"SYS_TEST"
100*7c478bd9Sstevel@tonic-gate #endif
101*7c478bd9Sstevel@tonic-gate 	(void) textdomain(TEXT_DOMAIN);
102*7c478bd9Sstevel@tonic-gate 
103*7c478bd9Sstevel@tonic-gate 	awkvarinit();
104*7c478bd9Sstevel@tonic-gate 	/*running = 1;*/
105*7c478bd9Sstevel@tonic-gate 	while (argc>1 && *argv[1]=='-') {
106*7c478bd9Sstevel@tonic-gate 		void *save_ptr = NULL;
107*7c478bd9Sstevel@tonic-gate 		ap = mbstowcsdup(&argv[1][1]);
108*7c478bd9Sstevel@tonic-gate 		if (ap == NULL)
109*7c478bd9Sstevel@tonic-gate 			break;
110*7c478bd9Sstevel@tonic-gate 		if (*ap == '\0') {
111*7c478bd9Sstevel@tonic-gate 			free(ap);
112*7c478bd9Sstevel@tonic-gate 			break;
113*7c478bd9Sstevel@tonic-gate 		}
114*7c478bd9Sstevel@tonic-gate 		save_ptr = (void *) ap;
115*7c478bd9Sstevel@tonic-gate 		++argv;
116*7c478bd9Sstevel@tonic-gate 		--argc;
117*7c478bd9Sstevel@tonic-gate 		if (*ap=='-' && ap[1]=='\0')
118*7c478bd9Sstevel@tonic-gate 			break;
119*7c478bd9Sstevel@tonic-gate 		for ( ; *ap != '\0'; ++ap) {
120*7c478bd9Sstevel@tonic-gate 			switch (*ap) {
121*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
122*7c478bd9Sstevel@tonic-gate 			case 'd':
123*7c478bd9Sstevel@tonic-gate 				dflag = 1;
124*7c478bd9Sstevel@tonic-gate 				continue;
125*7c478bd9Sstevel@tonic-gate 
126*7c478bd9Sstevel@tonic-gate #endif
127*7c478bd9Sstevel@tonic-gate 			case 'f':
128*7c478bd9Sstevel@tonic-gate 				if (argc < 2) {
129*7c478bd9Sstevel@tonic-gate 					(void) fprintf(stderr,
130*7c478bd9Sstevel@tonic-gate 				gettext("Missing script file\n"));
131*7c478bd9Sstevel@tonic-gate 					return (1);
132*7c478bd9Sstevel@tonic-gate 				}
133*7c478bd9Sstevel@tonic-gate 				*progfilep++ = argv[1];
134*7c478bd9Sstevel@tonic-gate 				--argc;
135*7c478bd9Sstevel@tonic-gate 				++argv;
136*7c478bd9Sstevel@tonic-gate 				continue;
137*7c478bd9Sstevel@tonic-gate 
138*7c478bd9Sstevel@tonic-gate 			case 'F':
139*7c478bd9Sstevel@tonic-gate 				if (ap[1] == '\0') {
140*7c478bd9Sstevel@tonic-gate 					if (argc < 2) {
141*7c478bd9Sstevel@tonic-gate 						(void) fprintf(stderr,
142*7c478bd9Sstevel@tonic-gate 				gettext("Missing field separator\n"));
143*7c478bd9Sstevel@tonic-gate 						return (1);
144*7c478bd9Sstevel@tonic-gate 					}
145*7c478bd9Sstevel@tonic-gate 					ap = mbstowcsdup(argv[1]);
146*7c478bd9Sstevel@tonic-gate 					--argc;
147*7c478bd9Sstevel@tonic-gate 					++argv;
148*7c478bd9Sstevel@tonic-gate 				} else
149*7c478bd9Sstevel@tonic-gate 					++ap;
150*7c478bd9Sstevel@tonic-gate 				strescape(ap);
151*7c478bd9Sstevel@tonic-gate 				strassign(varFS, linebuf, FALLOC,
152*7c478bd9Sstevel@tonic-gate 					wcslen(linebuf));
153*7c478bd9Sstevel@tonic-gate 				break;
154*7c478bd9Sstevel@tonic-gate 
155*7c478bd9Sstevel@tonic-gate 			case 'v': {
156*7c478bd9Sstevel@tonic-gate 				register wchar_t *vp;
157*7c478bd9Sstevel@tonic-gate 				register wchar_t *arg;
158*7c478bd9Sstevel@tonic-gate 
159*7c478bd9Sstevel@tonic-gate 				if (argc < 2) {
160*7c478bd9Sstevel@tonic-gate 					(void) fprintf(stderr,
161*7c478bd9Sstevel@tonic-gate 		gettext("Missing variable assignment\n"));
162*7c478bd9Sstevel@tonic-gate 					return (1);
163*7c478bd9Sstevel@tonic-gate 				}
164*7c478bd9Sstevel@tonic-gate 				arg = mbconvert(argv[1]);
165*7c478bd9Sstevel@tonic-gate 				if ((vp = wcschr(arg, '=')) != NULL) {
166*7c478bd9Sstevel@tonic-gate 					*vp = '\0';
167*7c478bd9Sstevel@tonic-gate 					strescape(vp+1);
168*7c478bd9Sstevel@tonic-gate 					strassign(vlook(arg), linebuf,
169*7c478bd9Sstevel@tonic-gate 					    FALLOC|FSENSE, wcslen(linebuf));
170*7c478bd9Sstevel@tonic-gate 					*vp = '=';
171*7c478bd9Sstevel@tonic-gate 				}
172*7c478bd9Sstevel@tonic-gate 				--argc;
173*7c478bd9Sstevel@tonic-gate 				++argv;
174*7c478bd9Sstevel@tonic-gate 				continue;
175*7c478bd9Sstevel@tonic-gate 			}
176*7c478bd9Sstevel@tonic-gate 
177*7c478bd9Sstevel@tonic-gate 			default:
178*7c478bd9Sstevel@tonic-gate 				(void) fprintf(stderr,
179*7c478bd9Sstevel@tonic-gate 				gettext("Unknown option \"-%S\"\n"), ap);
180*7c478bd9Sstevel@tonic-gate 				return (usage());
181*7c478bd9Sstevel@tonic-gate 			}
182*7c478bd9Sstevel@tonic-gate 			break;
183*7c478bd9Sstevel@tonic-gate 		}
184*7c478bd9Sstevel@tonic-gate 		if (save_ptr)
185*7c478bd9Sstevel@tonic-gate 			free(save_ptr);
186*7c478bd9Sstevel@tonic-gate 	}
187*7c478bd9Sstevel@tonic-gate 	if (progfilep == &progfiles[0]) {
188*7c478bd9Sstevel@tonic-gate 		if (argc < 2)
189*7c478bd9Sstevel@tonic-gate 			return (usage());
190*7c478bd9Sstevel@tonic-gate 		filename = "[command line]";	/* BUG: NEEDS TRANSLATION */
191*7c478bd9Sstevel@tonic-gate 		progptr = mbstowcsdup(argv[1]);
192*7c478bd9Sstevel@tonic-gate 		proglen = wcslen(progptr);
193*7c478bd9Sstevel@tonic-gate 		--argc;
194*7c478bd9Sstevel@tonic-gate 		++argv;
195*7c478bd9Sstevel@tonic-gate 	}
196*7c478bd9Sstevel@tonic-gate 
197*7c478bd9Sstevel@tonic-gate 	argv[0] = cmd;
198*7c478bd9Sstevel@tonic-gate 
199*7c478bd9Sstevel@tonic-gate 	awkarginit(argc, argv);
200*7c478bd9Sstevel@tonic-gate 
201*7c478bd9Sstevel@tonic-gate 	/*running = 0;*/
202*7c478bd9Sstevel@tonic-gate 	(void)yyparse();
203*7c478bd9Sstevel@tonic-gate 
204*7c478bd9Sstevel@tonic-gate 	lineno = 0;
205*7c478bd9Sstevel@tonic-gate 	/*
206*7c478bd9Sstevel@tonic-gate 	 * Ok, done parsing, so now activate the rest of the nls stuff, set
207*7c478bd9Sstevel@tonic-gate 	 * the radix character.
208*7c478bd9Sstevel@tonic-gate 	 */
209*7c478bd9Sstevel@tonic-gate 	(void) setlocale(LC_ALL,"");
210*7c478bd9Sstevel@tonic-gate 	radixpoint = *localeconv()->decimal_point;
211*7c478bd9Sstevel@tonic-gate 	awk();
212*7c478bd9Sstevel@tonic-gate 	/* NOTREACHED */
213*7c478bd9Sstevel@tonic-gate 	return (0);
214*7c478bd9Sstevel@tonic-gate }
215*7c478bd9Sstevel@tonic-gate 
216*7c478bd9Sstevel@tonic-gate /*
217*7c478bd9Sstevel@tonic-gate  * Do initial setup of buffers, etc.
218*7c478bd9Sstevel@tonic-gate  * This must be called before most processing
219*7c478bd9Sstevel@tonic-gate  * and especially before lexical analysis.
220*7c478bd9Sstevel@tonic-gate  * Variables initialised here will be overruled by command
221*7c478bd9Sstevel@tonic-gate  * line parameter initialisation.
222*7c478bd9Sstevel@tonic-gate  */
223*7c478bd9Sstevel@tonic-gate static void
224*7c478bd9Sstevel@tonic-gate awkvarinit()
225*7c478bd9Sstevel@tonic-gate {
226*7c478bd9Sstevel@tonic-gate 	register NODE *np;
227*7c478bd9Sstevel@tonic-gate 
228*7c478bd9Sstevel@tonic-gate 	(void) setvbuf(stderr, NULL, _IONBF, 0);
229*7c478bd9Sstevel@tonic-gate 
230*7c478bd9Sstevel@tonic-gate 	if ((NIOSTREAM = sysconf(_SC_OPEN_MAX) - 4) <= 0) {
231*7c478bd9Sstevel@tonic-gate 		(void) fprintf(stderr,
232*7c478bd9Sstevel@tonic-gate 	gettext("not enough available file descriptors"));
233*7c478bd9Sstevel@tonic-gate 		exit(1);
234*7c478bd9Sstevel@tonic-gate 	}
235*7c478bd9Sstevel@tonic-gate 	ofiles = (OFILE *) emalloc(sizeof(OFILE)*NIOSTREAM);
236*7c478bd9Sstevel@tonic-gate #ifdef A_ZERO_POINTERS
237*7c478bd9Sstevel@tonic-gate 	(void) memset((wchar_t *) ofiles, 0, sizeof(OFILE) * NIOSTREAM);
238*7c478bd9Sstevel@tonic-gate #else
239*7c478bd9Sstevel@tonic-gate 	{
240*7c478bd9Sstevel@tonic-gate 	    /* initialize file descriptor table */
241*7c478bd9Sstevel@tonic-gate 	    OFILE *fp;
242*7c478bd9Sstevel@tonic-gate 	    for (fp = ofiles; fp < &ofiles[NIOSTREAM]; fp += 1) {
243*7c478bd9Sstevel@tonic-gate 		fp->f_fp = FNULL;
244*7c478bd9Sstevel@tonic-gate 		fp->f_mode = 0;
245*7c478bd9Sstevel@tonic-gate 		fp->f_name = (char *)0;
246*7c478bd9Sstevel@tonic-gate 	    }
247*7c478bd9Sstevel@tonic-gate 	}
248*7c478bd9Sstevel@tonic-gate #endif
249*7c478bd9Sstevel@tonic-gate 	constant = intnode((INT)0);
250*7c478bd9Sstevel@tonic-gate 
251*7c478bd9Sstevel@tonic-gate 	const0 = intnode((INT)0);
252*7c478bd9Sstevel@tonic-gate 	const1 = intnode((INT)1);
253*7c478bd9Sstevel@tonic-gate 	constundef = emptynode(CONSTANT, 0);
254*7c478bd9Sstevel@tonic-gate 	constundef->n_flags = FSTRING|FVINT;
255*7c478bd9Sstevel@tonic-gate 	constundef->n_string = _null;
256*7c478bd9Sstevel@tonic-gate 	constundef->n_strlen = 0;
257*7c478bd9Sstevel@tonic-gate 	inc_oper = emptynode(ADD, 0);
258*7c478bd9Sstevel@tonic-gate 	inc_oper->n_right = const1;
259*7c478bd9Sstevel@tonic-gate 	asn_oper = emptynode(ADD, 0);
260*7c478bd9Sstevel@tonic-gate 	field0 = node(FIELD, const0, NNULL);
261*7c478bd9Sstevel@tonic-gate 
262*7c478bd9Sstevel@tonic-gate 	{
263*7c478bd9Sstevel@tonic-gate 		register RESFUNC near*rp;
264*7c478bd9Sstevel@tonic-gate 
265*7c478bd9Sstevel@tonic-gate 		for (rp = &resfuncs[0]; rp->rf_name != (LOCCHARP)NULL; ++rp) {
266*7c478bd9Sstevel@tonic-gate 			np = finstall(rp->rf_name, rp->rf_func, rp->rf_type);
267*7c478bd9Sstevel@tonic-gate 		}
268*7c478bd9Sstevel@tonic-gate 	}
269*7c478bd9Sstevel@tonic-gate 	{
270*7c478bd9Sstevel@tonic-gate 		register RESERVED near*rp;
271*7c478bd9Sstevel@tonic-gate 
272*7c478bd9Sstevel@tonic-gate 		for (rp = &reserved[0]; rp->r_name != (LOCCHARP)NULL; ++rp) {
273*7c478bd9Sstevel@tonic-gate 			switch (rp->r_type) {
274*7c478bd9Sstevel@tonic-gate 			case SVAR:
275*7c478bd9Sstevel@tonic-gate 			case VAR:
276*7c478bd9Sstevel@tonic-gate 				running = 1;
277*7c478bd9Sstevel@tonic-gate 				np = vlook(rp->r_name);
278*7c478bd9Sstevel@tonic-gate 				if (rp->r_type == SVAR)
279*7c478bd9Sstevel@tonic-gate 					np->n_flags |= FSPECIAL;
280*7c478bd9Sstevel@tonic-gate 				if (rp->r_svalue != NULL)
281*7c478bd9Sstevel@tonic-gate 					strassign(np, rp->r_svalue, FSTATIC,
282*7c478bd9Sstevel@tonic-gate 					    (size_t)rp->r_ivalue);
283*7c478bd9Sstevel@tonic-gate 				else {
284*7c478bd9Sstevel@tonic-gate 					constant->n_int = rp->r_ivalue;
285*7c478bd9Sstevel@tonic-gate 					(void)assign(np, constant);
286*7c478bd9Sstevel@tonic-gate 				}
287*7c478bd9Sstevel@tonic-gate 				running = 0;
288*7c478bd9Sstevel@tonic-gate 				break;
289*7c478bd9Sstevel@tonic-gate 
290*7c478bd9Sstevel@tonic-gate 			case KEYWORD:
291*7c478bd9Sstevel@tonic-gate 				kinstall(rp->r_name, (int)rp->r_ivalue);
292*7c478bd9Sstevel@tonic-gate 				break;
293*7c478bd9Sstevel@tonic-gate 			}
294*7c478bd9Sstevel@tonic-gate 		}
295*7c478bd9Sstevel@tonic-gate 	}
296*7c478bd9Sstevel@tonic-gate 
297*7c478bd9Sstevel@tonic-gate 	varNR = vlook(s_NR);
298*7c478bd9Sstevel@tonic-gate 	varFNR = vlook(s_FNR);
299*7c478bd9Sstevel@tonic-gate 	varNF = vlook(s_NF);
300*7c478bd9Sstevel@tonic-gate 	varOFMT = vlook(s_OFMT);
301*7c478bd9Sstevel@tonic-gate 	varCONVFMT = vlook(s_CONVFMT);
302*7c478bd9Sstevel@tonic-gate 	varOFS = vlook(s_OFS);
303*7c478bd9Sstevel@tonic-gate 	varORS = vlook(s_ORS);
304*7c478bd9Sstevel@tonic-gate 	varRS = vlook(s_RS);
305*7c478bd9Sstevel@tonic-gate 	varFS = vlook(s_FS);
306*7c478bd9Sstevel@tonic-gate 	varARGC = vlook(s_ARGC);
307*7c478bd9Sstevel@tonic-gate 	varSUBSEP = vlook(s_SUBSEP);
308*7c478bd9Sstevel@tonic-gate 	varENVIRON = vlook(s_ENVIRON);
309*7c478bd9Sstevel@tonic-gate 	varFILENAME = vlook(s_FILENAME);
310*7c478bd9Sstevel@tonic-gate 	varSYMTAB = vlook(s_SYMTAB);
311*7c478bd9Sstevel@tonic-gate 	incNR = node(ASG, varNR, node(ADD, varNR, const1));
312*7c478bd9Sstevel@tonic-gate 	incFNR = node(ASG, varFNR, node(ADD, varFNR, const1));
313*7c478bd9Sstevel@tonic-gate 	clrFNR = node(ASG, varFNR, const0);
314*7c478bd9Sstevel@tonic-gate }
315*7c478bd9Sstevel@tonic-gate 
316*7c478bd9Sstevel@tonic-gate /*
317*7c478bd9Sstevel@tonic-gate  * Initialise awk ARGC, ARGV variables.
318*7c478bd9Sstevel@tonic-gate  */
319*7c478bd9Sstevel@tonic-gate static void
320*7c478bd9Sstevel@tonic-gate awkarginit(int ac, char **av)
321*7c478bd9Sstevel@tonic-gate {
322*7c478bd9Sstevel@tonic-gate 	register int i;
323*7c478bd9Sstevel@tonic-gate 	register wchar_t *cp;
324*7c478bd9Sstevel@tonic-gate 
325*7c478bd9Sstevel@tonic-gate 	ARGVsubi = node(INDEX, vlook(s_ARGV), constant);
326*7c478bd9Sstevel@tonic-gate 	running = 1;
327*7c478bd9Sstevel@tonic-gate 	constant->n_int = ac;
328*7c478bd9Sstevel@tonic-gate 	(void)assign(varARGC, constant);
329*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < ac; ++i) {
330*7c478bd9Sstevel@tonic-gate 		cp = mbstowcsdup(av[i]);
331*7c478bd9Sstevel@tonic-gate 		constant->n_int = i;
332*7c478bd9Sstevel@tonic-gate 		strassign(exprreduce(ARGVsubi), cp,
333*7c478bd9Sstevel@tonic-gate 		    FSTATIC|FSENSE, wcslen(cp));
334*7c478bd9Sstevel@tonic-gate 	}
335*7c478bd9Sstevel@tonic-gate 	running = 0;
336*7c478bd9Sstevel@tonic-gate }
337*7c478bd9Sstevel@tonic-gate 
338*7c478bd9Sstevel@tonic-gate /*
339*7c478bd9Sstevel@tonic-gate  * Clean up when done parsing a function.
340*7c478bd9Sstevel@tonic-gate  * All formal parameters, because of a deal (funparm) in
341*7c478bd9Sstevel@tonic-gate  * yylex, get put into the symbol table in front of any
342*7c478bd9Sstevel@tonic-gate  * global variable of the same name.  When the entire
343*7c478bd9Sstevel@tonic-gate  * function is parsed, remove these formal dummy nodes
344*7c478bd9Sstevel@tonic-gate  * from the symbol table but retain the nodes because
345*7c478bd9Sstevel@tonic-gate  * the generated tree points at them.
346*7c478bd9Sstevel@tonic-gate  */
347*7c478bd9Sstevel@tonic-gate void
348*7c478bd9Sstevel@tonic-gate uexit(NODE *np)
349*7c478bd9Sstevel@tonic-gate {
350*7c478bd9Sstevel@tonic-gate 	register NODE *formal;
351*7c478bd9Sstevel@tonic-gate 
352*7c478bd9Sstevel@tonic-gate 	while ((formal = getlist(&np)) != NNULL)
353*7c478bd9Sstevel@tonic-gate 		delsymtab(formal, 0);
354*7c478bd9Sstevel@tonic-gate }
355*7c478bd9Sstevel@tonic-gate 
356*7c478bd9Sstevel@tonic-gate /*
357*7c478bd9Sstevel@tonic-gate  * The lexical analyzer.
358*7c478bd9Sstevel@tonic-gate  */
359*7c478bd9Sstevel@tonic-gate int
360*7c478bd9Sstevel@tonic-gate yylex()
361*7c478bd9Sstevel@tonic-gate #ifdef	DEBUG
362*7c478bd9Sstevel@tonic-gate {
363*7c478bd9Sstevel@tonic-gate 	register int l;
364*7c478bd9Sstevel@tonic-gate 
365*7c478bd9Sstevel@tonic-gate 	l = yyhex();
366*7c478bd9Sstevel@tonic-gate 	if (dflag)
367*7c478bd9Sstevel@tonic-gate 		(void) printf("%d\n", l);
368*7c478bd9Sstevel@tonic-gate 	return (l);
369*7c478bd9Sstevel@tonic-gate }
370*7c478bd9Sstevel@tonic-gate yyhex()
371*7c478bd9Sstevel@tonic-gate #endif
372*7c478bd9Sstevel@tonic-gate {
373*7c478bd9Sstevel@tonic-gate 	register wint_t c, c1;
374*7c478bd9Sstevel@tonic-gate 	int i;
375*7c478bd9Sstevel@tonic-gate 	static int savetoken = 0;
376*7c478bd9Sstevel@tonic-gate 	static wasfield;
377*7c478bd9Sstevel@tonic-gate 	static int isfuncdef;
378*7c478bd9Sstevel@tonic-gate 	static int nbrace, nparen, nbracket;
379*7c478bd9Sstevel@tonic-gate 	static struct ctosymstruct {
380*7c478bd9Sstevel@tonic-gate 		wint_t c, sym;
381*7c478bd9Sstevel@tonic-gate 	} ctosym[] = {
382*7c478bd9Sstevel@tonic-gate 		{ '|', BAR },		{ '^', CARAT },
383*7c478bd9Sstevel@tonic-gate 	  	{ '~', TILDE },		{ '<', LANGLE },
384*7c478bd9Sstevel@tonic-gate 	  	{ '>', RANGLE },	{ '+', PLUSC },
385*7c478bd9Sstevel@tonic-gate 	  	{ '-', HYPHEN },	{ '*', STAR },
386*7c478bd9Sstevel@tonic-gate 	  	{ '/', SLASH },		{ '%', PERCENT },
387*7c478bd9Sstevel@tonic-gate 	  	{ '!', EXCLAMATION },	{ '$', DOLLAR },
388*7c478bd9Sstevel@tonic-gate 	  	{ '[', LSQUARE },	{ ']', RSQUARE },
389*7c478bd9Sstevel@tonic-gate 		{ '(', LPAREN },	{ ')', RPAREN },
390*7c478bd9Sstevel@tonic-gate 		{ ';', SEMI },		{ '{', LBRACE },
391*7c478bd9Sstevel@tonic-gate 		{ '}', RBRACE },	{   0, 0 }
392*7c478bd9Sstevel@tonic-gate 	};
393*7c478bd9Sstevel@tonic-gate 
394*7c478bd9Sstevel@tonic-gate 	if (savetoken) {
395*7c478bd9Sstevel@tonic-gate 		c = savetoken;
396*7c478bd9Sstevel@tonic-gate 		savetoken = 0;
397*7c478bd9Sstevel@tonic-gate 	} else if (redelim != '\0') {
398*7c478bd9Sstevel@tonic-gate 		c = redelim;
399*7c478bd9Sstevel@tonic-gate 		redelim = 0;
400*7c478bd9Sstevel@tonic-gate 		catterm = 0;
401*7c478bd9Sstevel@tonic-gate 		savetoken = c;
402*7c478bd9Sstevel@tonic-gate 		return (lexlast = lexregexp(c));
403*7c478bd9Sstevel@tonic-gate 	} else while ((c = lexgetc()) != WEOF) {
404*7c478bd9Sstevel@tonic-gate 		if (iswalpha(c) || c=='_') {
405*7c478bd9Sstevel@tonic-gate 			c = lexid(c);
406*7c478bd9Sstevel@tonic-gate 		} else if (iswdigit(c) || c=='.') {
407*7c478bd9Sstevel@tonic-gate 			c = lexnumber(c);
408*7c478bd9Sstevel@tonic-gate 		} else if (isWblank(c)) {
409*7c478bd9Sstevel@tonic-gate 			continue;
410*7c478bd9Sstevel@tonic-gate 		} else switch (c) {
411*7c478bd9Sstevel@tonic-gate #if DOS || OS2
412*7c478bd9Sstevel@tonic-gate 		case 032:		/* ^Z */
413*7c478bd9Sstevel@tonic-gate 			continue;
414*7c478bd9Sstevel@tonic-gate #endif
415*7c478bd9Sstevel@tonic-gate 
416*7c478bd9Sstevel@tonic-gate 		case '"':
417*7c478bd9Sstevel@tonic-gate 			c = lexstring(c);
418*7c478bd9Sstevel@tonic-gate 			break;
419*7c478bd9Sstevel@tonic-gate 
420*7c478bd9Sstevel@tonic-gate 		case '#':
421*7c478bd9Sstevel@tonic-gate 			while ((c = lexgetc())!='\n' && c!=WEOF)
422*7c478bd9Sstevel@tonic-gate 				;
423*7c478bd9Sstevel@tonic-gate 			lexungetc(c);
424*7c478bd9Sstevel@tonic-gate 			continue;
425*7c478bd9Sstevel@tonic-gate 
426*7c478bd9Sstevel@tonic-gate 		case '+':
427*7c478bd9Sstevel@tonic-gate 			if ((c1 = lexgetc()) == '+')
428*7c478bd9Sstevel@tonic-gate 				c = INC;
429*7c478bd9Sstevel@tonic-gate 			else if (c1 == '=')
430*7c478bd9Sstevel@tonic-gate 				c = AADD;
431*7c478bd9Sstevel@tonic-gate 			else
432*7c478bd9Sstevel@tonic-gate 				lexungetc(c1);
433*7c478bd9Sstevel@tonic-gate 			break;
434*7c478bd9Sstevel@tonic-gate 
435*7c478bd9Sstevel@tonic-gate 		case '-':
436*7c478bd9Sstevel@tonic-gate 			if ((c1 = lexgetc()) == '-')
437*7c478bd9Sstevel@tonic-gate 				c = DEC;
438*7c478bd9Sstevel@tonic-gate 			else if (c1 == '=')
439*7c478bd9Sstevel@tonic-gate 				c = ASUB;
440*7c478bd9Sstevel@tonic-gate 			else
441*7c478bd9Sstevel@tonic-gate 				lexungetc(c1);
442*7c478bd9Sstevel@tonic-gate 			break;
443*7c478bd9Sstevel@tonic-gate 
444*7c478bd9Sstevel@tonic-gate 		case '*':
445*7c478bd9Sstevel@tonic-gate 			if ((c1 = lexgetc()) == '=')
446*7c478bd9Sstevel@tonic-gate 				c = AMUL;
447*7c478bd9Sstevel@tonic-gate 			else if (c1 == '*') {
448*7c478bd9Sstevel@tonic-gate 				if ((c1 = lexgetc()) == '=')
449*7c478bd9Sstevel@tonic-gate 					c = AEXP;
450*7c478bd9Sstevel@tonic-gate 				else {
451*7c478bd9Sstevel@tonic-gate 					c = EXP;
452*7c478bd9Sstevel@tonic-gate 					lexungetc(c1);
453*7c478bd9Sstevel@tonic-gate 				}
454*7c478bd9Sstevel@tonic-gate 			} else
455*7c478bd9Sstevel@tonic-gate 				lexungetc(c1);
456*7c478bd9Sstevel@tonic-gate 			break;
457*7c478bd9Sstevel@tonic-gate 
458*7c478bd9Sstevel@tonic-gate 		case '^':
459*7c478bd9Sstevel@tonic-gate 			if ((c1 = lexgetc()) == '=') {
460*7c478bd9Sstevel@tonic-gate 				c = AEXP;
461*7c478bd9Sstevel@tonic-gate 			} else {
462*7c478bd9Sstevel@tonic-gate 				c = EXP;
463*7c478bd9Sstevel@tonic-gate 				lexungetc(c1);
464*7c478bd9Sstevel@tonic-gate 			}
465*7c478bd9Sstevel@tonic-gate 			break;
466*7c478bd9Sstevel@tonic-gate 
467*7c478bd9Sstevel@tonic-gate 		case '/':
468*7c478bd9Sstevel@tonic-gate 			if ((c1 = lexgetc()) == '='
469*7c478bd9Sstevel@tonic-gate 			 && lexlast!=RE && lexlast!=NRE
470*7c478bd9Sstevel@tonic-gate 			 && lexlast!=';' && lexlast!='\n'
471*7c478bd9Sstevel@tonic-gate 			 && lexlast!=',' && lexlast!='(')
472*7c478bd9Sstevel@tonic-gate 				c = ADIV;
473*7c478bd9Sstevel@tonic-gate 			else
474*7c478bd9Sstevel@tonic-gate 				lexungetc(c1);
475*7c478bd9Sstevel@tonic-gate 			break;
476*7c478bd9Sstevel@tonic-gate 
477*7c478bd9Sstevel@tonic-gate 		case '%':
478*7c478bd9Sstevel@tonic-gate 			if ((c1 = lexgetc()) == '=')
479*7c478bd9Sstevel@tonic-gate 				c = AREM;
480*7c478bd9Sstevel@tonic-gate 			else
481*7c478bd9Sstevel@tonic-gate 				lexungetc(c1);
482*7c478bd9Sstevel@tonic-gate 			break;
483*7c478bd9Sstevel@tonic-gate 
484*7c478bd9Sstevel@tonic-gate 		case '&':
485*7c478bd9Sstevel@tonic-gate 			if ((c1 = lexgetc()) == '&')
486*7c478bd9Sstevel@tonic-gate 				c = AND;
487*7c478bd9Sstevel@tonic-gate 			else
488*7c478bd9Sstevel@tonic-gate 				lexungetc(c1);
489*7c478bd9Sstevel@tonic-gate 			break;
490*7c478bd9Sstevel@tonic-gate 
491*7c478bd9Sstevel@tonic-gate 		case '|':
492*7c478bd9Sstevel@tonic-gate 			if ((c1 = lexgetc()) == '|')
493*7c478bd9Sstevel@tonic-gate 				c = OR;
494*7c478bd9Sstevel@tonic-gate 			else {
495*7c478bd9Sstevel@tonic-gate 				lexungetc(c1);
496*7c478bd9Sstevel@tonic-gate 				if (inprint)
497*7c478bd9Sstevel@tonic-gate 					c = PIPE;
498*7c478bd9Sstevel@tonic-gate 			}
499*7c478bd9Sstevel@tonic-gate 			break;
500*7c478bd9Sstevel@tonic-gate 
501*7c478bd9Sstevel@tonic-gate 		case '>':
502*7c478bd9Sstevel@tonic-gate 			if ((c1 = lexgetc()) == '=')
503*7c478bd9Sstevel@tonic-gate 				c = GE;
504*7c478bd9Sstevel@tonic-gate 			else if (c1 == '>')
505*7c478bd9Sstevel@tonic-gate 				c = APPEND;
506*7c478bd9Sstevel@tonic-gate 			else {
507*7c478bd9Sstevel@tonic-gate 				lexungetc(c1);
508*7c478bd9Sstevel@tonic-gate 				if (nparen==0 && inprint)
509*7c478bd9Sstevel@tonic-gate 					c = WRITE;
510*7c478bd9Sstevel@tonic-gate 			}
511*7c478bd9Sstevel@tonic-gate 			break;
512*7c478bd9Sstevel@tonic-gate 
513*7c478bd9Sstevel@tonic-gate 		case '<':
514*7c478bd9Sstevel@tonic-gate 			if ((c1 = lexgetc()) == '=')
515*7c478bd9Sstevel@tonic-gate 				c = LE;
516*7c478bd9Sstevel@tonic-gate 			else
517*7c478bd9Sstevel@tonic-gate 				lexungetc(c1);
518*7c478bd9Sstevel@tonic-gate 			break;
519*7c478bd9Sstevel@tonic-gate 
520*7c478bd9Sstevel@tonic-gate 		case '!':
521*7c478bd9Sstevel@tonic-gate 			if ((c1 = lexgetc()) == '=')
522*7c478bd9Sstevel@tonic-gate 				c = NE;
523*7c478bd9Sstevel@tonic-gate 			else if (c1 == '~')
524*7c478bd9Sstevel@tonic-gate 				c = NRE;
525*7c478bd9Sstevel@tonic-gate 			else
526*7c478bd9Sstevel@tonic-gate 				lexungetc(c1);
527*7c478bd9Sstevel@tonic-gate 			break;
528*7c478bd9Sstevel@tonic-gate 
529*7c478bd9Sstevel@tonic-gate 		case '=':
530*7c478bd9Sstevel@tonic-gate 			if ((c1 = lexgetc()) == '=')
531*7c478bd9Sstevel@tonic-gate 				c = EQ;
532*7c478bd9Sstevel@tonic-gate 			else {
533*7c478bd9Sstevel@tonic-gate 				lexungetc(c1);
534*7c478bd9Sstevel@tonic-gate 				c = ASG;
535*7c478bd9Sstevel@tonic-gate 			}
536*7c478bd9Sstevel@tonic-gate 			break;
537*7c478bd9Sstevel@tonic-gate 
538*7c478bd9Sstevel@tonic-gate 		case '\n':
539*7c478bd9Sstevel@tonic-gate 			switch (lexlast) {
540*7c478bd9Sstevel@tonic-gate 			case ')':
541*7c478bd9Sstevel@tonic-gate 				if (catterm || inprint) {
542*7c478bd9Sstevel@tonic-gate 					c = ';';
543*7c478bd9Sstevel@tonic-gate 					break;
544*7c478bd9Sstevel@tonic-gate 				}
545*7c478bd9Sstevel@tonic-gate 			case AND:
546*7c478bd9Sstevel@tonic-gate 			case OR:
547*7c478bd9Sstevel@tonic-gate 			case COMMA:
548*7c478bd9Sstevel@tonic-gate 			case '{':
549*7c478bd9Sstevel@tonic-gate 			case ELSE:
550*7c478bd9Sstevel@tonic-gate 			case ';':
551*7c478bd9Sstevel@tonic-gate 			case DO:
552*7c478bd9Sstevel@tonic-gate 				continue;
553*7c478bd9Sstevel@tonic-gate 
554*7c478bd9Sstevel@tonic-gate 			case '}':
555*7c478bd9Sstevel@tonic-gate 				if (nbrace != 0)
556*7c478bd9Sstevel@tonic-gate 					continue;
557*7c478bd9Sstevel@tonic-gate 
558*7c478bd9Sstevel@tonic-gate 			default:
559*7c478bd9Sstevel@tonic-gate 				c = ';';
560*7c478bd9Sstevel@tonic-gate 				break;
561*7c478bd9Sstevel@tonic-gate 			}
562*7c478bd9Sstevel@tonic-gate 			break;
563*7c478bd9Sstevel@tonic-gate 
564*7c478bd9Sstevel@tonic-gate 		case ELSE:
565*7c478bd9Sstevel@tonic-gate 			if (lexlast != ';') {
566*7c478bd9Sstevel@tonic-gate 				savetoken = ELSE;
567*7c478bd9Sstevel@tonic-gate 				c = ';';
568*7c478bd9Sstevel@tonic-gate 			}
569*7c478bd9Sstevel@tonic-gate 			break;
570*7c478bd9Sstevel@tonic-gate 
571*7c478bd9Sstevel@tonic-gate 		case '(':
572*7c478bd9Sstevel@tonic-gate 			++nparen;
573*7c478bd9Sstevel@tonic-gate 			break;
574*7c478bd9Sstevel@tonic-gate 
575*7c478bd9Sstevel@tonic-gate 		case ')':
576*7c478bd9Sstevel@tonic-gate 			if (--nparen < 0)
577*7c478bd9Sstevel@tonic-gate 				awkerr(unbal, "()");
578*7c478bd9Sstevel@tonic-gate 			break;
579*7c478bd9Sstevel@tonic-gate 
580*7c478bd9Sstevel@tonic-gate 		case '{':
581*7c478bd9Sstevel@tonic-gate 			nbrace++;
582*7c478bd9Sstevel@tonic-gate 			break;
583*7c478bd9Sstevel@tonic-gate 
584*7c478bd9Sstevel@tonic-gate 		case '}':
585*7c478bd9Sstevel@tonic-gate 			if (--nbrace < 0) {
586*7c478bd9Sstevel@tonic-gate 				char brk[3];
587*7c478bd9Sstevel@tonic-gate 
588*7c478bd9Sstevel@tonic-gate 				brk[0] = '{';
589*7c478bd9Sstevel@tonic-gate 				brk[1] = '}';
590*7c478bd9Sstevel@tonic-gate 				brk[2] = '\0';
591*7c478bd9Sstevel@tonic-gate 				awkerr(unbal, brk);
592*7c478bd9Sstevel@tonic-gate 			}
593*7c478bd9Sstevel@tonic-gate 			if (lexlast != ';') {
594*7c478bd9Sstevel@tonic-gate 				savetoken = c;
595*7c478bd9Sstevel@tonic-gate 				c = ';';
596*7c478bd9Sstevel@tonic-gate 			}
597*7c478bd9Sstevel@tonic-gate 			break;
598*7c478bd9Sstevel@tonic-gate 
599*7c478bd9Sstevel@tonic-gate 		case '[':
600*7c478bd9Sstevel@tonic-gate 			++nbracket;
601*7c478bd9Sstevel@tonic-gate 			break;
602*7c478bd9Sstevel@tonic-gate 
603*7c478bd9Sstevel@tonic-gate 		case ']':
604*7c478bd9Sstevel@tonic-gate 			if (--nbracket < 0) {
605*7c478bd9Sstevel@tonic-gate 				char brk[3];
606*7c478bd9Sstevel@tonic-gate 
607*7c478bd9Sstevel@tonic-gate 				brk[0] = '[';
608*7c478bd9Sstevel@tonic-gate 				brk[1] = ']';
609*7c478bd9Sstevel@tonic-gate 				brk[2] = '\0';
610*7c478bd9Sstevel@tonic-gate 				awkerr(unbal, brk);
611*7c478bd9Sstevel@tonic-gate 			}
612*7c478bd9Sstevel@tonic-gate 			break;
613*7c478bd9Sstevel@tonic-gate 
614*7c478bd9Sstevel@tonic-gate 		case '\\':
615*7c478bd9Sstevel@tonic-gate 			if ((c1 = lexgetc()) == '\n')
616*7c478bd9Sstevel@tonic-gate 				continue;
617*7c478bd9Sstevel@tonic-gate 			lexungetc(c1);
618*7c478bd9Sstevel@tonic-gate 			break;
619*7c478bd9Sstevel@tonic-gate 
620*7c478bd9Sstevel@tonic-gate 		case ',':
621*7c478bd9Sstevel@tonic-gate 			c = COMMA;
622*7c478bd9Sstevel@tonic-gate 			break;
623*7c478bd9Sstevel@tonic-gate 
624*7c478bd9Sstevel@tonic-gate 		case '?':
625*7c478bd9Sstevel@tonic-gate 			c = QUEST;
626*7c478bd9Sstevel@tonic-gate 			break;
627*7c478bd9Sstevel@tonic-gate 
628*7c478bd9Sstevel@tonic-gate 		case ':':
629*7c478bd9Sstevel@tonic-gate 			c = COLON;
630*7c478bd9Sstevel@tonic-gate 			break;
631*7c478bd9Sstevel@tonic-gate 
632*7c478bd9Sstevel@tonic-gate 		default:
633*7c478bd9Sstevel@tonic-gate 			if (!iswprint(c))
634*7c478bd9Sstevel@tonic-gate 				awkerr(
635*7c478bd9Sstevel@tonic-gate 				   gettext("invalid character \"%s\""),
636*7c478bd9Sstevel@tonic-gate 				   toprint(c));
637*7c478bd9Sstevel@tonic-gate 			break;
638*7c478bd9Sstevel@tonic-gate 		}
639*7c478bd9Sstevel@tonic-gate 		break;
640*7c478bd9Sstevel@tonic-gate 	}
641*7c478bd9Sstevel@tonic-gate 
642*7c478bd9Sstevel@tonic-gate 	switch (c) {
643*7c478bd9Sstevel@tonic-gate 	case ']':
644*7c478bd9Sstevel@tonic-gate 		++catterm;
645*7c478bd9Sstevel@tonic-gate 		break;
646*7c478bd9Sstevel@tonic-gate 
647*7c478bd9Sstevel@tonic-gate 	case VAR:
648*7c478bd9Sstevel@tonic-gate 		if (catterm) {
649*7c478bd9Sstevel@tonic-gate 			savetoken = c;
650*7c478bd9Sstevel@tonic-gate 			c = CONCAT;
651*7c478bd9Sstevel@tonic-gate 			catterm = 0;
652*7c478bd9Sstevel@tonic-gate 		} else if (!isfuncdef) {
653*7c478bd9Sstevel@tonic-gate 			if ((c1=lexgetc()) != '(')
654*7c478bd9Sstevel@tonic-gate 				++catterm;
655*7c478bd9Sstevel@tonic-gate 			lexungetc(c1);
656*7c478bd9Sstevel@tonic-gate 		}
657*7c478bd9Sstevel@tonic-gate 		isfuncdef = 0;
658*7c478bd9Sstevel@tonic-gate 		break;
659*7c478bd9Sstevel@tonic-gate 
660*7c478bd9Sstevel@tonic-gate 	case PARM:
661*7c478bd9Sstevel@tonic-gate 	case CONSTANT:
662*7c478bd9Sstevel@tonic-gate 		if (catterm) {
663*7c478bd9Sstevel@tonic-gate 			savetoken = c;
664*7c478bd9Sstevel@tonic-gate 			c = CONCAT;
665*7c478bd9Sstevel@tonic-gate 			catterm = 0;
666*7c478bd9Sstevel@tonic-gate 		} else {
667*7c478bd9Sstevel@tonic-gate 			if (lexlast == '$')
668*7c478bd9Sstevel@tonic-gate 				wasfield = 2;
669*7c478bd9Sstevel@tonic-gate 			++catterm;
670*7c478bd9Sstevel@tonic-gate 		}
671*7c478bd9Sstevel@tonic-gate 		break;
672*7c478bd9Sstevel@tonic-gate 
673*7c478bd9Sstevel@tonic-gate 	case INC:
674*7c478bd9Sstevel@tonic-gate 	case DEC:
675*7c478bd9Sstevel@tonic-gate 		if (!catterm || lexlast!=CONSTANT || wasfield)
676*7c478bd9Sstevel@tonic-gate 			break;
677*7c478bd9Sstevel@tonic-gate 
678*7c478bd9Sstevel@tonic-gate 	case UFUNC:
679*7c478bd9Sstevel@tonic-gate 	case FUNC:
680*7c478bd9Sstevel@tonic-gate 	case GETLINE:
681*7c478bd9Sstevel@tonic-gate 	case '!':
682*7c478bd9Sstevel@tonic-gate 	case '$':
683*7c478bd9Sstevel@tonic-gate 	case '(':
684*7c478bd9Sstevel@tonic-gate 		if (catterm) {
685*7c478bd9Sstevel@tonic-gate 			savetoken = c;
686*7c478bd9Sstevel@tonic-gate 			c = CONCAT;
687*7c478bd9Sstevel@tonic-gate 			catterm = 0;
688*7c478bd9Sstevel@tonic-gate 		}
689*7c478bd9Sstevel@tonic-gate 		break;
690*7c478bd9Sstevel@tonic-gate 
691*7c478bd9Sstevel@tonic-gate 	/*{*/case '}':
692*7c478bd9Sstevel@tonic-gate 		if (nbrace == 0)
693*7c478bd9Sstevel@tonic-gate 			savetoken = ';';
694*7c478bd9Sstevel@tonic-gate 	case ';':
695*7c478bd9Sstevel@tonic-gate 		inprint = 0;
696*7c478bd9Sstevel@tonic-gate 	default:
697*7c478bd9Sstevel@tonic-gate 		if (c == DEFFUNC)
698*7c478bd9Sstevel@tonic-gate 			isfuncdef = 1;
699*7c478bd9Sstevel@tonic-gate 		catterm = 0;
700*7c478bd9Sstevel@tonic-gate 	}
701*7c478bd9Sstevel@tonic-gate 	lexlast = c;
702*7c478bd9Sstevel@tonic-gate 	if (wasfield)
703*7c478bd9Sstevel@tonic-gate 		wasfield--;
704*7c478bd9Sstevel@tonic-gate 	/*
705*7c478bd9Sstevel@tonic-gate 	 * Map character constants to symbolic names.
706*7c478bd9Sstevel@tonic-gate 	 */
707*7c478bd9Sstevel@tonic-gate 	for (i = 0; ctosym[i].c != 0; i++)
708*7c478bd9Sstevel@tonic-gate 		if (c == ctosym[i].c) {
709*7c478bd9Sstevel@tonic-gate 			c = ctosym[i].sym;
710*7c478bd9Sstevel@tonic-gate 			break;
711*7c478bd9Sstevel@tonic-gate 		}
712*7c478bd9Sstevel@tonic-gate 	return ((int)c);
713*7c478bd9Sstevel@tonic-gate }
714*7c478bd9Sstevel@tonic-gate 
715*7c478bd9Sstevel@tonic-gate /*
716*7c478bd9Sstevel@tonic-gate  * Read a number for the lexical analyzer.
717*7c478bd9Sstevel@tonic-gate  * Input is the first character of the number.
718*7c478bd9Sstevel@tonic-gate  * Return value is the lexical type.
719*7c478bd9Sstevel@tonic-gate  */
720*7c478bd9Sstevel@tonic-gate static int
721*7c478bd9Sstevel@tonic-gate lexnumber(wint_t c)
722*7c478bd9Sstevel@tonic-gate {
723*7c478bd9Sstevel@tonic-gate 	register wchar_t *cp;
724*7c478bd9Sstevel@tonic-gate 	register int dotfound = 0;
725*7c478bd9Sstevel@tonic-gate 	register int efound = 0;
726*7c478bd9Sstevel@tonic-gate 	INT number;
727*7c478bd9Sstevel@tonic-gate 
728*7c478bd9Sstevel@tonic-gate 	cp = linebuf;
729*7c478bd9Sstevel@tonic-gate 	do {
730*7c478bd9Sstevel@tonic-gate 		if (iswdigit(c))
731*7c478bd9Sstevel@tonic-gate 			;
732*7c478bd9Sstevel@tonic-gate 		else if (c == '.') {
733*7c478bd9Sstevel@tonic-gate 			if (dotfound++)
734*7c478bd9Sstevel@tonic-gate 				break;
735*7c478bd9Sstevel@tonic-gate 		} else if (c=='e' || c=='E') {
736*7c478bd9Sstevel@tonic-gate 			if ((c = lexgetc())!='-'  &&  c!='+') {
737*7c478bd9Sstevel@tonic-gate 				lexungetc(c);
738*7c478bd9Sstevel@tonic-gate 				c = 'e';
739*7c478bd9Sstevel@tonic-gate 			} else
740*7c478bd9Sstevel@tonic-gate 				*cp++ = 'e';
741*7c478bd9Sstevel@tonic-gate 			if (efound++)
742*7c478bd9Sstevel@tonic-gate 				break;
743*7c478bd9Sstevel@tonic-gate 		} else
744*7c478bd9Sstevel@tonic-gate 			break;
745*7c478bd9Sstevel@tonic-gate 		*cp++ = c;
746*7c478bd9Sstevel@tonic-gate 	} while ((c = lexgetc()) != WEOF);
747*7c478bd9Sstevel@tonic-gate 	*cp = '\0';
748*7c478bd9Sstevel@tonic-gate 	if (dotfound && cp==linebuf+1)
749*7c478bd9Sstevel@tonic-gate 		return (DOT);
750*7c478bd9Sstevel@tonic-gate 	lexungetc(c);
751*7c478bd9Sstevel@tonic-gate 	errno = 0;
752*7c478bd9Sstevel@tonic-gate 	if (!dotfound
753*7c478bd9Sstevel@tonic-gate 	 && !efound
754*7c478bd9Sstevel@tonic-gate 	 && ((number=wcstol(linebuf, (wchar_t **)0, 10)), errno!=ERANGE))
755*7c478bd9Sstevel@tonic-gate 		yylval.node = intnode(number);
756*7c478bd9Sstevel@tonic-gate 	else
757*7c478bd9Sstevel@tonic-gate 		yylval.node = realnode((REAL)wcstod(linebuf, (wchar_t **)0));
758*7c478bd9Sstevel@tonic-gate 	return (CONSTANT);
759*7c478bd9Sstevel@tonic-gate }
760*7c478bd9Sstevel@tonic-gate 
761*7c478bd9Sstevel@tonic-gate /*
762*7c478bd9Sstevel@tonic-gate  * Read an identifier.
763*7c478bd9Sstevel@tonic-gate  * Input is first character of identifier.
764*7c478bd9Sstevel@tonic-gate  * Return VAR.
765*7c478bd9Sstevel@tonic-gate  */
766*7c478bd9Sstevel@tonic-gate static int
767*7c478bd9Sstevel@tonic-gate lexid(wint_t c)
768*7c478bd9Sstevel@tonic-gate {
769*7c478bd9Sstevel@tonic-gate 	register wchar_t *cp;
770*7c478bd9Sstevel@tonic-gate 	register size_t i;
771*7c478bd9Sstevel@tonic-gate 	register NODE *np;
772*7c478bd9Sstevel@tonic-gate 
773*7c478bd9Sstevel@tonic-gate 	cp = linebuf;
774*7c478bd9Sstevel@tonic-gate 	do {
775*7c478bd9Sstevel@tonic-gate 		*cp++ = c;
776*7c478bd9Sstevel@tonic-gate 		c = lexgetc();
777*7c478bd9Sstevel@tonic-gate 	} while (iswalpha(c) || iswdigit(c) || c=='_');
778*7c478bd9Sstevel@tonic-gate 	*cp = '\0';
779*7c478bd9Sstevel@tonic-gate 	lexungetc(c);
780*7c478bd9Sstevel@tonic-gate 	yylval.node = np = vlook(linebuf);
781*7c478bd9Sstevel@tonic-gate 
782*7c478bd9Sstevel@tonic-gate 	switch(np->n_type) {
783*7c478bd9Sstevel@tonic-gate 	case KEYWORD:
784*7c478bd9Sstevel@tonic-gate 		switch (np->n_keywtype) {
785*7c478bd9Sstevel@tonic-gate 		case PRINT:
786*7c478bd9Sstevel@tonic-gate 		case PRINTF:
787*7c478bd9Sstevel@tonic-gate 			++inprint;
788*7c478bd9Sstevel@tonic-gate 		default:
789*7c478bd9Sstevel@tonic-gate 			return ((int)np->n_keywtype);
790*7c478bd9Sstevel@tonic-gate 		}
791*7c478bd9Sstevel@tonic-gate 		/* NOTREACHED */
792*7c478bd9Sstevel@tonic-gate 
793*7c478bd9Sstevel@tonic-gate 	case ARRAY:
794*7c478bd9Sstevel@tonic-gate 	case VAR:
795*7c478bd9Sstevel@tonic-gate 		/*
796*7c478bd9Sstevel@tonic-gate 		 * If reading the argument list, create a dummy node
797*7c478bd9Sstevel@tonic-gate 		 * for the duration of that function. These variables
798*7c478bd9Sstevel@tonic-gate 		 * can be removed from the symbol table at function end
799*7c478bd9Sstevel@tonic-gate 		 * but they must still exist because the execution tree
800*7c478bd9Sstevel@tonic-gate 		 * knows about them.
801*7c478bd9Sstevel@tonic-gate 		 */
802*7c478bd9Sstevel@tonic-gate 		if (funparm) {
803*7c478bd9Sstevel@tonic-gate do_funparm:
804*7c478bd9Sstevel@tonic-gate 			np = emptynode(PARM, i=(cp-linebuf));
805*7c478bd9Sstevel@tonic-gate 			np->n_flags = FSTRING;
806*7c478bd9Sstevel@tonic-gate 			np->n_string = _null;
807*7c478bd9Sstevel@tonic-gate 			np->n_strlen = 0;
808*7c478bd9Sstevel@tonic-gate 			(void) memcpy(np->n_name, linebuf,
809*7c478bd9Sstevel@tonic-gate 				(i+1) * sizeof(wchar_t));
810*7c478bd9Sstevel@tonic-gate 			addsymtab(np);
811*7c478bd9Sstevel@tonic-gate 			yylval.node = np;
812*7c478bd9Sstevel@tonic-gate 		} else if (np == varNF || (np == varFS &&
813*7c478bd9Sstevel@tonic-gate 			(!doing_begin || begin_getline))) {
814*7c478bd9Sstevel@tonic-gate 			/*
815*7c478bd9Sstevel@tonic-gate 			 * If the user program references NF or sets
816*7c478bd9Sstevel@tonic-gate 			 * FS either outside of a begin block or
817*7c478bd9Sstevel@tonic-gate 			 * in a begin block after a getline then the
818*7c478bd9Sstevel@tonic-gate 			 * input line will be split immediately upon read
819*7c478bd9Sstevel@tonic-gate 			 * rather than when a field is first referenced.
820*7c478bd9Sstevel@tonic-gate 			 */
821*7c478bd9Sstevel@tonic-gate 			needsplit = 1;
822*7c478bd9Sstevel@tonic-gate 		} else if (np == varENVIRON)
823*7c478bd9Sstevel@tonic-gate 			needenviron = 1;
824*7c478bd9Sstevel@tonic-gate 	case PARM:
825*7c478bd9Sstevel@tonic-gate 		return (VAR);
826*7c478bd9Sstevel@tonic-gate 
827*7c478bd9Sstevel@tonic-gate 	case UFUNC:
828*7c478bd9Sstevel@tonic-gate 		/*
829*7c478bd9Sstevel@tonic-gate 		 * It is ok to redefine functions as parameters
830*7c478bd9Sstevel@tonic-gate 		 */
831*7c478bd9Sstevel@tonic-gate 		if (funparm) goto do_funparm;
832*7c478bd9Sstevel@tonic-gate 	case FUNC:
833*7c478bd9Sstevel@tonic-gate 	case GETLINE:
834*7c478bd9Sstevel@tonic-gate 		/*
835*7c478bd9Sstevel@tonic-gate 		 * When a getline is encountered, clear the 'doing_begin' flag.
836*7c478bd9Sstevel@tonic-gate 		 * This will force the 'needsplit' flag to be set, even inside
837*7c478bd9Sstevel@tonic-gate 		 * a begin block, if FS is altered. (See VAR case above)
838*7c478bd9Sstevel@tonic-gate 		 */
839*7c478bd9Sstevel@tonic-gate 		if (doing_begin)
840*7c478bd9Sstevel@tonic-gate 			begin_getline = 1;
841*7c478bd9Sstevel@tonic-gate 		return (np->n_type);
842*7c478bd9Sstevel@tonic-gate 	}
843*7c478bd9Sstevel@tonic-gate 	/* NOTREACHED */
844*7c478bd9Sstevel@tonic-gate }
845*7c478bd9Sstevel@tonic-gate 
846*7c478bd9Sstevel@tonic-gate /*
847*7c478bd9Sstevel@tonic-gate  * Read a string for the lexical analyzer.
848*7c478bd9Sstevel@tonic-gate  * `endc' terminates the string.
849*7c478bd9Sstevel@tonic-gate  */
850*7c478bd9Sstevel@tonic-gate static int
851*7c478bd9Sstevel@tonic-gate lexstring(wint_t endc)
852*7c478bd9Sstevel@tonic-gate {
853*7c478bd9Sstevel@tonic-gate 	register size_t length = lexescape(endc, 0, 0);
854*7c478bd9Sstevel@tonic-gate 
855*7c478bd9Sstevel@tonic-gate 	yylval.node = stringnode(linebuf, FALLOC, length);
856*7c478bd9Sstevel@tonic-gate 	return (CONSTANT);
857*7c478bd9Sstevel@tonic-gate }
858*7c478bd9Sstevel@tonic-gate 
859*7c478bd9Sstevel@tonic-gate /*
860*7c478bd9Sstevel@tonic-gate  * Read a regular expression.
861*7c478bd9Sstevel@tonic-gate  */
862*7c478bd9Sstevel@tonic-gate static int
863*7c478bd9Sstevel@tonic-gate lexregexp(wint_t endc)
864*7c478bd9Sstevel@tonic-gate {
865*7c478bd9Sstevel@tonic-gate 	(void) lexescape(endc, 1, 0);
866*7c478bd9Sstevel@tonic-gate 	yylval.node = renode(linebuf);
867*7c478bd9Sstevel@tonic-gate 	return (URE);
868*7c478bd9Sstevel@tonic-gate }
869*7c478bd9Sstevel@tonic-gate 
870*7c478bd9Sstevel@tonic-gate /*
871*7c478bd9Sstevel@tonic-gate  * Process a string, converting the escape characters as required by
872*7c478bd9Sstevel@tonic-gate  * 1003.2. The processed string ends up in the global linebuf[]. This
873*7c478bd9Sstevel@tonic-gate  * routine also changes the value of 'progfd' - the program file
874*7c478bd9Sstevel@tonic-gate  * descriptor, so it should be used with some care. It is presently used to
875*7c478bd9Sstevel@tonic-gate  * process -v (awk1.c) and var=str type arguments (awk2.c, nextrecord()).
876*7c478bd9Sstevel@tonic-gate  */
877*7c478bd9Sstevel@tonic-gate void
878*7c478bd9Sstevel@tonic-gate strescape(wchar_t *str)
879*7c478bd9Sstevel@tonic-gate {
880*7c478bd9Sstevel@tonic-gate 	progptr = str;
881*7c478bd9Sstevel@tonic-gate 	proglen = wcslen(str) + 1;	/* Include \0 */
882*7c478bd9Sstevel@tonic-gate 	(void) lexescape('\0', 0, 1);
883*7c478bd9Sstevel@tonic-gate 	progptr = NULL;
884*7c478bd9Sstevel@tonic-gate }
885*7c478bd9Sstevel@tonic-gate 
886*7c478bd9Sstevel@tonic-gate /*
887*7c478bd9Sstevel@tonic-gate  * Read a string or regular expression, terminated by ``endc'',
888*7c478bd9Sstevel@tonic-gate  * for lexical analyzer, processing escape sequences.
889*7c478bd9Sstevel@tonic-gate  * Return string length.
890*7c478bd9Sstevel@tonic-gate  */
891*7c478bd9Sstevel@tonic-gate static size_t
892*7c478bd9Sstevel@tonic-gate lexescape(wint_t endc, int regx, int cmd_line_operand)
893*7c478bd9Sstevel@tonic-gate {
894*7c478bd9Sstevel@tonic-gate 	static char nlre[256];
895*7c478bd9Sstevel@tonic-gate 	static char nlstr[256];
896*7c478bd9Sstevel@tonic-gate 	static char eofre[256];
897*7c478bd9Sstevel@tonic-gate 	static char eofstr[256];
898*7c478bd9Sstevel@tonic-gate 	int first_time = 1;
899*7c478bd9Sstevel@tonic-gate 	wint_t c;
900*7c478bd9Sstevel@tonic-gate 	wchar_t *cp;
901*7c478bd9Sstevel@tonic-gate 	int n, max;
902*7c478bd9Sstevel@tonic-gate 
903*7c478bd9Sstevel@tonic-gate 	if (first_time == 1) {
904*7c478bd9Sstevel@tonic-gate 		(void) strcpy(nlre, gettext("Newline in regular expression\n"));
905*7c478bd9Sstevel@tonic-gate 		(void) strcpy(nlstr, gettext("Newline in string\n"));
906*7c478bd9Sstevel@tonic-gate 		(void) strcpy(eofre, gettext("EOF in regular expression\n"));
907*7c478bd9Sstevel@tonic-gate 		(void) strcpy(eofstr, gettext("EOF in string\n"));
908*7c478bd9Sstevel@tonic-gate 		first_time = 0;
909*7c478bd9Sstevel@tonic-gate         }
910*7c478bd9Sstevel@tonic-gate 
911*7c478bd9Sstevel@tonic-gate 	cp = linebuf;
912*7c478bd9Sstevel@tonic-gate 	while ((c = lexgetc()) != endc) {
913*7c478bd9Sstevel@tonic-gate 		if (c == '\n')
914*7c478bd9Sstevel@tonic-gate 			awkerr(regx ? nlre : nlstr);
915*7c478bd9Sstevel@tonic-gate 		if (c == '\\') {
916*7c478bd9Sstevel@tonic-gate 			switch (c = lexgetc(), c) {
917*7c478bd9Sstevel@tonic-gate 			case '\\':
918*7c478bd9Sstevel@tonic-gate 				if (regx)
919*7c478bd9Sstevel@tonic-gate 					*cp++ = '\\';
920*7c478bd9Sstevel@tonic-gate 				break;
921*7c478bd9Sstevel@tonic-gate 
922*7c478bd9Sstevel@tonic-gate 			case '/':
923*7c478bd9Sstevel@tonic-gate 				c = '/';
924*7c478bd9Sstevel@tonic-gate 				break;
925*7c478bd9Sstevel@tonic-gate 
926*7c478bd9Sstevel@tonic-gate 			case 'n':
927*7c478bd9Sstevel@tonic-gate 				c = '\n';
928*7c478bd9Sstevel@tonic-gate 				break;
929*7c478bd9Sstevel@tonic-gate 
930*7c478bd9Sstevel@tonic-gate 			case 'b':
931*7c478bd9Sstevel@tonic-gate 				c = '\b';
932*7c478bd9Sstevel@tonic-gate 				break;
933*7c478bd9Sstevel@tonic-gate 
934*7c478bd9Sstevel@tonic-gate 			case 't':
935*7c478bd9Sstevel@tonic-gate 				c = '\t';
936*7c478bd9Sstevel@tonic-gate 				break;
937*7c478bd9Sstevel@tonic-gate 
938*7c478bd9Sstevel@tonic-gate 			case 'r':
939*7c478bd9Sstevel@tonic-gate 				c = '\r';
940*7c478bd9Sstevel@tonic-gate 				break;
941*7c478bd9Sstevel@tonic-gate 
942*7c478bd9Sstevel@tonic-gate 			case 'f':
943*7c478bd9Sstevel@tonic-gate 				c = '\f';
944*7c478bd9Sstevel@tonic-gate 				break;
945*7c478bd9Sstevel@tonic-gate 
946*7c478bd9Sstevel@tonic-gate 			case 'v':
947*7c478bd9Sstevel@tonic-gate 				c = '\v';
948*7c478bd9Sstevel@tonic-gate 				break;
949*7c478bd9Sstevel@tonic-gate 
950*7c478bd9Sstevel@tonic-gate 			case 'a':
951*7c478bd9Sstevel@tonic-gate 				c = (char) 0x07;
952*7c478bd9Sstevel@tonic-gate 				break;
953*7c478bd9Sstevel@tonic-gate 
954*7c478bd9Sstevel@tonic-gate 			case 'x':
955*7c478bd9Sstevel@tonic-gate 				n = 0;
956*7c478bd9Sstevel@tonic-gate 				while (iswxdigit(c = lexgetc())) {
957*7c478bd9Sstevel@tonic-gate 					if (iswdigit(c))
958*7c478bd9Sstevel@tonic-gate 						c -= '0';
959*7c478bd9Sstevel@tonic-gate 					else if (iswupper(c))
960*7c478bd9Sstevel@tonic-gate 						c -= 'A'-10;
961*7c478bd9Sstevel@tonic-gate 					else
962*7c478bd9Sstevel@tonic-gate 						c -= 'a'-10;
963*7c478bd9Sstevel@tonic-gate 					n = (n<<4) + c;
964*7c478bd9Sstevel@tonic-gate 				}
965*7c478bd9Sstevel@tonic-gate 				lexungetc(c);
966*7c478bd9Sstevel@tonic-gate 				c = n;
967*7c478bd9Sstevel@tonic-gate 				break;
968*7c478bd9Sstevel@tonic-gate 
969*7c478bd9Sstevel@tonic-gate 			case '0':
970*7c478bd9Sstevel@tonic-gate 			case '1':
971*7c478bd9Sstevel@tonic-gate 			case '2':
972*7c478bd9Sstevel@tonic-gate 			case '3':
973*7c478bd9Sstevel@tonic-gate 			case '4':
974*7c478bd9Sstevel@tonic-gate 			case '5':
975*7c478bd9Sstevel@tonic-gate 			case '6':
976*7c478bd9Sstevel@tonic-gate 			case '7':
977*7c478bd9Sstevel@tonic-gate #if 0
978*7c478bd9Sstevel@tonic-gate /*
979*7c478bd9Sstevel@tonic-gate  * Posix.2 draft 10 disallows the use of back-referencing - it explicitly
980*7c478bd9Sstevel@tonic-gate  * requires processing of the octal escapes both in strings and
981*7c478bd9Sstevel@tonic-gate  * regular expressions. The following code is disabled instead of
982*7c478bd9Sstevel@tonic-gate  * removed as back-referencing may be reintroduced in a future draft
983*7c478bd9Sstevel@tonic-gate  * of the standard.
984*7c478bd9Sstevel@tonic-gate  */
985*7c478bd9Sstevel@tonic-gate 				/*
986*7c478bd9Sstevel@tonic-gate 				 * For regular expressions, we disallow
987*7c478bd9Sstevel@tonic-gate 				 * \ooo to mean octal character, in favour
988*7c478bd9Sstevel@tonic-gate 				 * of back referencing.
989*7c478bd9Sstevel@tonic-gate 				 */
990*7c478bd9Sstevel@tonic-gate 				if (regx) {
991*7c478bd9Sstevel@tonic-gate 					*cp++ = '\\';
992*7c478bd9Sstevel@tonic-gate 					break;
993*7c478bd9Sstevel@tonic-gate 				}
994*7c478bd9Sstevel@tonic-gate #endif
995*7c478bd9Sstevel@tonic-gate 				max = 3;
996*7c478bd9Sstevel@tonic-gate 				n = 0;
997*7c478bd9Sstevel@tonic-gate 				do {
998*7c478bd9Sstevel@tonic-gate 					n = (n<<3) + c-'0';
999*7c478bd9Sstevel@tonic-gate 					if ((c = lexgetc())>'7' || c<'0')
1000*7c478bd9Sstevel@tonic-gate 						break;
1001*7c478bd9Sstevel@tonic-gate 				} while (--max);
1002*7c478bd9Sstevel@tonic-gate 				lexungetc(c);
1003*7c478bd9Sstevel@tonic-gate 				/*
1004*7c478bd9Sstevel@tonic-gate 				 * an octal escape sequence must have at least
1005*7c478bd9Sstevel@tonic-gate 				 * 2 digits after the backslash, otherwise
1006*7c478bd9Sstevel@tonic-gate 				 * it gets passed straight thru for possible
1007*7c478bd9Sstevel@tonic-gate 				 * use in backreferencing.
1008*7c478bd9Sstevel@tonic-gate 				 */
1009*7c478bd9Sstevel@tonic-gate 				if (max == 3) {
1010*7c478bd9Sstevel@tonic-gate 					*cp++ = '\\';
1011*7c478bd9Sstevel@tonic-gate 					n += '0';
1012*7c478bd9Sstevel@tonic-gate 				}
1013*7c478bd9Sstevel@tonic-gate 				c = n;
1014*7c478bd9Sstevel@tonic-gate 				break;
1015*7c478bd9Sstevel@tonic-gate 
1016*7c478bd9Sstevel@tonic-gate 			case '\n':
1017*7c478bd9Sstevel@tonic-gate 				continue;
1018*7c478bd9Sstevel@tonic-gate 
1019*7c478bd9Sstevel@tonic-gate 			default:
1020*7c478bd9Sstevel@tonic-gate 				if (c != endc || cmd_line_operand) {
1021*7c478bd9Sstevel@tonic-gate 					*cp++ = '\\';
1022*7c478bd9Sstevel@tonic-gate 					if (c == endc)
1023*7c478bd9Sstevel@tonic-gate 						lexungetc(c);
1024*7c478bd9Sstevel@tonic-gate 				}
1025*7c478bd9Sstevel@tonic-gate 			}
1026*7c478bd9Sstevel@tonic-gate 		}
1027*7c478bd9Sstevel@tonic-gate 		if (c == WEOF)
1028*7c478bd9Sstevel@tonic-gate 			awkerr(regx ? eofre : eofstr);
1029*7c478bd9Sstevel@tonic-gate 		*cp++ = c;
1030*7c478bd9Sstevel@tonic-gate 	}
1031*7c478bd9Sstevel@tonic-gate 	*cp = '\0';
1032*7c478bd9Sstevel@tonic-gate 	return (cp - linebuf);
1033*7c478bd9Sstevel@tonic-gate }
1034*7c478bd9Sstevel@tonic-gate 
1035*7c478bd9Sstevel@tonic-gate /*
1036*7c478bd9Sstevel@tonic-gate  * Build a regular expression NODE.
1037*7c478bd9Sstevel@tonic-gate  * Argument is the string holding the expression.
1038*7c478bd9Sstevel@tonic-gate  */
1039*7c478bd9Sstevel@tonic-gate NODE *
1040*7c478bd9Sstevel@tonic-gate renode(wchar_t *s)
1041*7c478bd9Sstevel@tonic-gate {
1042*7c478bd9Sstevel@tonic-gate 	register NODE *np;
1043*7c478bd9Sstevel@tonic-gate 	int n;
1044*7c478bd9Sstevel@tonic-gate 
1045*7c478bd9Sstevel@tonic-gate 	np = emptynode(RE, 0);
1046*7c478bd9Sstevel@tonic-gate 	np->n_left = np->n_right = NNULL;
1047*7c478bd9Sstevel@tonic-gate 	np->n_regexp = (REGEXP)emalloc(sizeof(regex_t));
1048*7c478bd9Sstevel@tonic-gate 	if ((n = REGWCOMP(np->n_regexp, s, REG_EXTENDED)) != REG_OK) {
1049*7c478bd9Sstevel@tonic-gate 		int m;
1050*7c478bd9Sstevel@tonic-gate 		char *p;
1051*7c478bd9Sstevel@tonic-gate 
1052*7c478bd9Sstevel@tonic-gate 		m = regerror(n, np->n_regexp, NULL, 0);
1053*7c478bd9Sstevel@tonic-gate 		p = (char *)emalloc(m);
1054*7c478bd9Sstevel@tonic-gate 		regerror(n, np->n_regexp, p, m);
1055*7c478bd9Sstevel@tonic-gate 		awkerr("/%S/: %s", s, p);
1056*7c478bd9Sstevel@tonic-gate 	}
1057*7c478bd9Sstevel@tonic-gate 	return (np);
1058*7c478bd9Sstevel@tonic-gate }
1059*7c478bd9Sstevel@tonic-gate /*
1060*7c478bd9Sstevel@tonic-gate  * Get a character for the lexical analyser routine.
1061*7c478bd9Sstevel@tonic-gate  */
1062*7c478bd9Sstevel@tonic-gate static wint_t
1063*7c478bd9Sstevel@tonic-gate lexgetc()
1064*7c478bd9Sstevel@tonic-gate {
1065*7c478bd9Sstevel@tonic-gate 	register wint_t c;
1066*7c478bd9Sstevel@tonic-gate 	static char **files = &progfiles[0];
1067*7c478bd9Sstevel@tonic-gate 
1068*7c478bd9Sstevel@tonic-gate 	if (progfp!=FNULL && (c = fgetwc(progfp))!=WEOF)
1069*7c478bd9Sstevel@tonic-gate 		;
1070*7c478bd9Sstevel@tonic-gate 	else {
1071*7c478bd9Sstevel@tonic-gate 		if (progptr != NULL) {
1072*7c478bd9Sstevel@tonic-gate 			if (proglen-- <= 0)
1073*7c478bd9Sstevel@tonic-gate 				c = WEOF;
1074*7c478bd9Sstevel@tonic-gate 			else
1075*7c478bd9Sstevel@tonic-gate 				c = *progptr++;
1076*7c478bd9Sstevel@tonic-gate 		} else {
1077*7c478bd9Sstevel@tonic-gate 			if (progfp != FNULL)
1078*7c478bd9Sstevel@tonic-gate 				if (progfp != stdin)
1079*7c478bd9Sstevel@tonic-gate 					(void)fclose(progfp);
1080*7c478bd9Sstevel@tonic-gate 				else
1081*7c478bd9Sstevel@tonic-gate 					clearerr(progfp);
1082*7c478bd9Sstevel@tonic-gate 				progfp = FNULL;
1083*7c478bd9Sstevel@tonic-gate 			if (files < progfilep) {
1084*7c478bd9Sstevel@tonic-gate 				filename = *files++;
1085*7c478bd9Sstevel@tonic-gate 				lineno = 1;
1086*7c478bd9Sstevel@tonic-gate 				if (filename[0]=='-' && filename[1]=='\0')
1087*7c478bd9Sstevel@tonic-gate 					progfp = stdin;
1088*7c478bd9Sstevel@tonic-gate 				else if ((progfp=fopen(filename, r)) == FNULL) {
1089*7c478bd9Sstevel@tonic-gate 					(void) fprintf(stderr,
1090*7c478bd9Sstevel@tonic-gate 				gettext("script file \"%s\""), filename);
1091*7c478bd9Sstevel@tonic-gate 					exit(1);
1092*7c478bd9Sstevel@tonic-gate 				}
1093*7c478bd9Sstevel@tonic-gate 				c = fgetwc(progfp);
1094*7c478bd9Sstevel@tonic-gate 			}
1095*7c478bd9Sstevel@tonic-gate 		}
1096*7c478bd9Sstevel@tonic-gate 	}
1097*7c478bd9Sstevel@tonic-gate 	if (c == '\n')
1098*7c478bd9Sstevel@tonic-gate 		++lineno;
1099*7c478bd9Sstevel@tonic-gate 	if (conptr >= &context[NCONTEXT])
1100*7c478bd9Sstevel@tonic-gate 		conptr = &context[0];
1101*7c478bd9Sstevel@tonic-gate 	if (c != WEOF)
1102*7c478bd9Sstevel@tonic-gate 		*conptr++ = c;
1103*7c478bd9Sstevel@tonic-gate 	return (c);
1104*7c478bd9Sstevel@tonic-gate }
1105*7c478bd9Sstevel@tonic-gate 
1106*7c478bd9Sstevel@tonic-gate /*
1107*7c478bd9Sstevel@tonic-gate  * Return a character for lexical analyser.
1108*7c478bd9Sstevel@tonic-gate  * Only one returned character is (not enforced) legitimite.
1109*7c478bd9Sstevel@tonic-gate  */
1110*7c478bd9Sstevel@tonic-gate static void
1111*7c478bd9Sstevel@tonic-gate lexungetc(wint_t c)
1112*7c478bd9Sstevel@tonic-gate {
1113*7c478bd9Sstevel@tonic-gate 	if (c == '\n')
1114*7c478bd9Sstevel@tonic-gate 		--lineno;
1115*7c478bd9Sstevel@tonic-gate 	if (c != WEOF) {
1116*7c478bd9Sstevel@tonic-gate 		if (conptr == &context[0])
1117*7c478bd9Sstevel@tonic-gate 			conptr = &context[NCONTEXT];
1118*7c478bd9Sstevel@tonic-gate 		*--conptr = '\0';
1119*7c478bd9Sstevel@tonic-gate 	}
1120*7c478bd9Sstevel@tonic-gate 	if (progfp != FNULL) {
1121*7c478bd9Sstevel@tonic-gate 		(void)ungetwc(c, progfp);
1122*7c478bd9Sstevel@tonic-gate 		return;
1123*7c478bd9Sstevel@tonic-gate 	}
1124*7c478bd9Sstevel@tonic-gate 	if (c == WEOF)
1125*7c478bd9Sstevel@tonic-gate 		return;
1126*7c478bd9Sstevel@tonic-gate 	*--progptr = c;
1127*7c478bd9Sstevel@tonic-gate 	proglen++;
1128*7c478bd9Sstevel@tonic-gate }
1129*7c478bd9Sstevel@tonic-gate 
1130*7c478bd9Sstevel@tonic-gate /*
1131*7c478bd9Sstevel@tonic-gate  * Syntax errors during parsing.
1132*7c478bd9Sstevel@tonic-gate  */
1133*7c478bd9Sstevel@tonic-gate void
1134*7c478bd9Sstevel@tonic-gate yyerror(char *s, ...)
1135*7c478bd9Sstevel@tonic-gate {
1136*7c478bd9Sstevel@tonic-gate 	if (lexlast==FUNC || lexlast==GETLINE || lexlast==KEYWORD)
1137*7c478bd9Sstevel@tonic-gate 		if (lexlast == KEYWORD)
1138*7c478bd9Sstevel@tonic-gate 			awkerr(gettext("inadmissible use of reserved keyword"));
1139*7c478bd9Sstevel@tonic-gate 		else
1140*7c478bd9Sstevel@tonic-gate 			awkerr(gettext("attempt to redefine builtin function"));
1141*7c478bd9Sstevel@tonic-gate 	awkerr(s);
1142*7c478bd9Sstevel@tonic-gate }
1143*7c478bd9Sstevel@tonic-gate 
1144*7c478bd9Sstevel@tonic-gate /*
1145*7c478bd9Sstevel@tonic-gate  * Error routine for all awk errors.
1146*7c478bd9Sstevel@tonic-gate  */
1147*7c478bd9Sstevel@tonic-gate /* ARGSUSED */
1148*7c478bd9Sstevel@tonic-gate void
1149*7c478bd9Sstevel@tonic-gate awkerr(char *fmt, ...)
1150*7c478bd9Sstevel@tonic-gate {
1151*7c478bd9Sstevel@tonic-gate 	va_list args;
1152*7c478bd9Sstevel@tonic-gate 
1153*7c478bd9Sstevel@tonic-gate 	va_start(args, fmt);
1154*7c478bd9Sstevel@tonic-gate 	awkierr(0, fmt, args);
1155*7c478bd9Sstevel@tonic-gate 	va_end(args);
1156*7c478bd9Sstevel@tonic-gate }
1157*7c478bd9Sstevel@tonic-gate 
1158*7c478bd9Sstevel@tonic-gate /*
1159*7c478bd9Sstevel@tonic-gate  * Error routine like "awkerr" except that it prints out
1160*7c478bd9Sstevel@tonic-gate  * a message that includes an errno-specific indication.
1161*7c478bd9Sstevel@tonic-gate  */
1162*7c478bd9Sstevel@tonic-gate /* ARGSUSED */
1163*7c478bd9Sstevel@tonic-gate void
1164*7c478bd9Sstevel@tonic-gate awkperr(char *fmt, ...)
1165*7c478bd9Sstevel@tonic-gate {
1166*7c478bd9Sstevel@tonic-gate 	va_list args;
1167*7c478bd9Sstevel@tonic-gate 
1168*7c478bd9Sstevel@tonic-gate 	va_start(args, fmt);
1169*7c478bd9Sstevel@tonic-gate 	awkierr(1, fmt, args);
1170*7c478bd9Sstevel@tonic-gate 	va_end(args);
1171*7c478bd9Sstevel@tonic-gate }
1172*7c478bd9Sstevel@tonic-gate 
1173*7c478bd9Sstevel@tonic-gate /*
1174*7c478bd9Sstevel@tonic-gate  * Common internal routine for awkerr, awkperr
1175*7c478bd9Sstevel@tonic-gate  */
1176*7c478bd9Sstevel@tonic-gate static void
1177*7c478bd9Sstevel@tonic-gate awkierr(int perr, char *fmt, va_list ap)
1178*7c478bd9Sstevel@tonic-gate {
1179*7c478bd9Sstevel@tonic-gate 	static char sep1[] = "\n>>>\t";
1180*7c478bd9Sstevel@tonic-gate 	static char sep2[] = "\t<<<";
1181*7c478bd9Sstevel@tonic-gate 	int saveerr = errno;
1182*7c478bd9Sstevel@tonic-gate 
1183*7c478bd9Sstevel@tonic-gate 	(void) fprintf(stderr, "%s: ", _cmdname);
1184*7c478bd9Sstevel@tonic-gate 	if (running) {
1185*7c478bd9Sstevel@tonic-gate 		(void) fprintf(stderr, gettext("line %u ("),
1186*7c478bd9Sstevel@tonic-gate 		    curnode==NNULL ? 0 : curnode->n_lineno);
1187*7c478bd9Sstevel@tonic-gate 		if (phase == 0)
1188*7c478bd9Sstevel@tonic-gate 		      (void) fprintf(stderr, "NR=%lld): ", (INT)exprint(varNR));
1189*7c478bd9Sstevel@tonic-gate 		else
1190*7c478bd9Sstevel@tonic-gate 		      (void) fprintf(stderr, "%s): ",
1191*7c478bd9Sstevel@tonic-gate 			    phase==BEGIN ? s_BEGIN : s_END);
1192*7c478bd9Sstevel@tonic-gate 	} else if (lineno != 0) {
1193*7c478bd9Sstevel@tonic-gate 		(void) fprintf(stderr, gettext("file \"%s\": "), filename);
1194*7c478bd9Sstevel@tonic-gate 		(void) fprintf(stderr, gettext("line %u: "), lineno);
1195*7c478bd9Sstevel@tonic-gate 	}
1196*7c478bd9Sstevel@tonic-gate 	(void) vfprintf(stderr, gettext(fmt), ap);
1197*7c478bd9Sstevel@tonic-gate 	if (perr == 1)
1198*7c478bd9Sstevel@tonic-gate 		(void) fprintf(stderr, ": %s", strerror(saveerr));
1199*7c478bd9Sstevel@tonic-gate 	if (perr != 2 && !running) {
1200*7c478bd9Sstevel@tonic-gate 		register wchar_t *cp;
1201*7c478bd9Sstevel@tonic-gate 		register int n;
1202*7c478bd9Sstevel@tonic-gate 		register int c;
1203*7c478bd9Sstevel@tonic-gate 
1204*7c478bd9Sstevel@tonic-gate 		(void) fprintf(stderr, gettext("  Context is:%s"), sep1);
1205*7c478bd9Sstevel@tonic-gate 		cp = conptr;
1206*7c478bd9Sstevel@tonic-gate 		n = NCONTEXT;
1207*7c478bd9Sstevel@tonic-gate 		do {
1208*7c478bd9Sstevel@tonic-gate 			if (cp >= &context[NCONTEXT])
1209*7c478bd9Sstevel@tonic-gate 				cp = &context[0];
1210*7c478bd9Sstevel@tonic-gate 			if ((c = *cp++) != '\0')
1211*7c478bd9Sstevel@tonic-gate 				(void)fputs(c=='\n' ? sep1 : toprint(c),
1212*7c478bd9Sstevel@tonic-gate 					stderr);
1213*7c478bd9Sstevel@tonic-gate 		} while (--n != 0);
1214*7c478bd9Sstevel@tonic-gate 		(void)fputs(sep2, stderr);
1215*7c478bd9Sstevel@tonic-gate 	}
1216*7c478bd9Sstevel@tonic-gate 	(void) fprintf(stderr, "\n");
1217*7c478bd9Sstevel@tonic-gate 	exit(1);
1218*7c478bd9Sstevel@tonic-gate }
1219*7c478bd9Sstevel@tonic-gate 
1220*7c478bd9Sstevel@tonic-gate wchar_t *
1221*7c478bd9Sstevel@tonic-gate emalloc(unsigned n)
1222*7c478bd9Sstevel@tonic-gate {
1223*7c478bd9Sstevel@tonic-gate 	wchar_t *cp;
1224*7c478bd9Sstevel@tonic-gate 
1225*7c478bd9Sstevel@tonic-gate 	if ((cp = malloc(n)) == NULL)
1226*7c478bd9Sstevel@tonic-gate 		awkerr(nomem);
1227*7c478bd9Sstevel@tonic-gate 	return cp;
1228*7c478bd9Sstevel@tonic-gate }
1229*7c478bd9Sstevel@tonic-gate 
1230*7c478bd9Sstevel@tonic-gate wchar_t *
1231*7c478bd9Sstevel@tonic-gate erealloc(wchar_t *p, unsigned n)
1232*7c478bd9Sstevel@tonic-gate {
1233*7c478bd9Sstevel@tonic-gate 	wchar_t *cp;
1234*7c478bd9Sstevel@tonic-gate 
1235*7c478bd9Sstevel@tonic-gate 	if ((cp = realloc(p, n)) == NULL)
1236*7c478bd9Sstevel@tonic-gate 		awkerr(nomem);
1237*7c478bd9Sstevel@tonic-gate 	return cp;
1238*7c478bd9Sstevel@tonic-gate }
1239*7c478bd9Sstevel@tonic-gate 
1240*7c478bd9Sstevel@tonic-gate 
1241*7c478bd9Sstevel@tonic-gate /*
1242*7c478bd9Sstevel@tonic-gate  * usage message for awk
1243*7c478bd9Sstevel@tonic-gate  */
1244*7c478bd9Sstevel@tonic-gate static int
1245*7c478bd9Sstevel@tonic-gate usage()
1246*7c478bd9Sstevel@tonic-gate {
1247*7c478bd9Sstevel@tonic-gate 	(void) fprintf(stderr, gettext(
1248*7c478bd9Sstevel@tonic-gate "Usage:	awk [-F ERE] [-v var=val] 'program' [var=val ...] [file ...]\n"
1249*7c478bd9Sstevel@tonic-gate "	awk [-F ERE] -f progfile ... [-v var=val] [var=val ...] [file ...]\n"));
1250*7c478bd9Sstevel@tonic-gate 	return (2);
1251*7c478bd9Sstevel@tonic-gate }
1252*7c478bd9Sstevel@tonic-gate 
1253*7c478bd9Sstevel@tonic-gate 
1254*7c478bd9Sstevel@tonic-gate static wchar_t *
1255*7c478bd9Sstevel@tonic-gate mbconvert(char *str)
1256*7c478bd9Sstevel@tonic-gate {
1257*7c478bd9Sstevel@tonic-gate 	static wchar_t *op = 0;
1258*7c478bd9Sstevel@tonic-gate 
1259*7c478bd9Sstevel@tonic-gate 	if (op != 0)
1260*7c478bd9Sstevel@tonic-gate 		free(op);
1261*7c478bd9Sstevel@tonic-gate 	return (op = mbstowcsdup(str));
1262*7c478bd9Sstevel@tonic-gate }
1263*7c478bd9Sstevel@tonic-gate 
1264*7c478bd9Sstevel@tonic-gate char *
1265*7c478bd9Sstevel@tonic-gate mbunconvert(wchar_t *str)
1266*7c478bd9Sstevel@tonic-gate {
1267*7c478bd9Sstevel@tonic-gate 	static char *op = 0;
1268*7c478bd9Sstevel@tonic-gate 
1269*7c478bd9Sstevel@tonic-gate 	if (op != 0)
1270*7c478bd9Sstevel@tonic-gate 		free(op);
1271*7c478bd9Sstevel@tonic-gate 	return (op = wcstombsdup(str));
1272*7c478bd9Sstevel@tonic-gate }
1273*7c478bd9Sstevel@tonic-gate 
1274*7c478bd9Sstevel@tonic-gate /*
1275*7c478bd9Sstevel@tonic-gate  * Solaris port - following functions are typical MKS functions written
1276*7c478bd9Sstevel@tonic-gate  * to work for Solaris.
1277*7c478bd9Sstevel@tonic-gate  */
1278*7c478bd9Sstevel@tonic-gate 
1279*7c478bd9Sstevel@tonic-gate wchar_t *
1280*7c478bd9Sstevel@tonic-gate mbstowcsdup(s)
1281*7c478bd9Sstevel@tonic-gate char *s;
1282*7c478bd9Sstevel@tonic-gate {
1283*7c478bd9Sstevel@tonic-gate         int n;
1284*7c478bd9Sstevel@tonic-gate         wchar_t *w;
1285*7c478bd9Sstevel@tonic-gate 
1286*7c478bd9Sstevel@tonic-gate         n = strlen(s) + 1;
1287*7c478bd9Sstevel@tonic-gate         if ((w = (wchar_t *)malloc(n * sizeof (wchar_t))) == NULL)
1288*7c478bd9Sstevel@tonic-gate                 return (NULL);
1289*7c478bd9Sstevel@tonic-gate 
1290*7c478bd9Sstevel@tonic-gate         if (mbstowcs(w, s, n) == -1)
1291*7c478bd9Sstevel@tonic-gate                 return (NULL);
1292*7c478bd9Sstevel@tonic-gate         return (w);
1293*7c478bd9Sstevel@tonic-gate 
1294*7c478bd9Sstevel@tonic-gate }
1295*7c478bd9Sstevel@tonic-gate 
1296*7c478bd9Sstevel@tonic-gate char *
1297*7c478bd9Sstevel@tonic-gate wcstombsdup(wchar_t *w)
1298*7c478bd9Sstevel@tonic-gate {
1299*7c478bd9Sstevel@tonic-gate         int n;
1300*7c478bd9Sstevel@tonic-gate         char *mb;
1301*7c478bd9Sstevel@tonic-gate 
1302*7c478bd9Sstevel@tonic-gate         /* Fetch memory for worst case string length */
1303*7c478bd9Sstevel@tonic-gate         n = wslen(w) + 1;
1304*7c478bd9Sstevel@tonic-gate         n *= MB_CUR_MAX;
1305*7c478bd9Sstevel@tonic-gate         if ((mb = (char *)malloc(n)) == NULL) {
1306*7c478bd9Sstevel@tonic-gate                 return (NULL);
1307*7c478bd9Sstevel@tonic-gate         }
1308*7c478bd9Sstevel@tonic-gate 
1309*7c478bd9Sstevel@tonic-gate         /* Convert the string */
1310*7c478bd9Sstevel@tonic-gate         if ((n = wcstombs(mb, w, n)) == -1) {
1311*7c478bd9Sstevel@tonic-gate                 int saverr = errno;
1312*7c478bd9Sstevel@tonic-gate 
1313*7c478bd9Sstevel@tonic-gate                 free(mb);
1314*7c478bd9Sstevel@tonic-gate                 errno = saverr;
1315*7c478bd9Sstevel@tonic-gate                 return (0);
1316*7c478bd9Sstevel@tonic-gate         }
1317*7c478bd9Sstevel@tonic-gate 
1318*7c478bd9Sstevel@tonic-gate         /* Shrink the string down */
1319*7c478bd9Sstevel@tonic-gate         if ((mb = (char *)realloc(mb, strlen(mb)+1)) == NULL)  {
1320*7c478bd9Sstevel@tonic-gate                 return (NULL);
1321*7c478bd9Sstevel@tonic-gate         }
1322*7c478bd9Sstevel@tonic-gate         return (mb);
1323*7c478bd9Sstevel@tonic-gate }
1324*7c478bd9Sstevel@tonic-gate 
1325*7c478bd9Sstevel@tonic-gate /*
1326*7c478bd9Sstevel@tonic-gate  * The upe_ctrls[] table contains the printable 'control-sequences' for the
1327*7c478bd9Sstevel@tonic-gate  * character values 0..31 and 127.  The first entry is for value 127, thus the
1328*7c478bd9Sstevel@tonic-gate  * entries for the remaining character values are from 1..32.
1329*7c478bd9Sstevel@tonic-gate  */
1330*7c478bd9Sstevel@tonic-gate static const char *const upe_ctrls[] =
1331*7c478bd9Sstevel@tonic-gate {
1332*7c478bd9Sstevel@tonic-gate         "^?",
1333*7c478bd9Sstevel@tonic-gate         "^@",  "^A",  "^B",  "^C",  "^D",  "^E",  "^F",  "^G",
1334*7c478bd9Sstevel@tonic-gate         "^H",  "^I",  "^J",  "^K",  "^L",  "^M",  "^N",  "^O",
1335*7c478bd9Sstevel@tonic-gate         "^P",  "^Q",  "^R",  "^S",  "^T",  "^U",  "^V",  "^W",
1336*7c478bd9Sstevel@tonic-gate         "^X",  "^Y",  "^Z",  "^[",  "^\\", "^]",  "^^",  "^_"
1337*7c478bd9Sstevel@tonic-gate };
1338*7c478bd9Sstevel@tonic-gate 
1339*7c478bd9Sstevel@tonic-gate 
1340*7c478bd9Sstevel@tonic-gate /*
1341*7c478bd9Sstevel@tonic-gate  * Return a printable string corresponding to the given character value.  If
1342*7c478bd9Sstevel@tonic-gate  * the character is printable, simply return it as the string.  If it is in
1343*7c478bd9Sstevel@tonic-gate  * the range specified by table 5-101 in the UPE, return the corresponding
1344*7c478bd9Sstevel@tonic-gate  * string.  Otherwise, return an octal escape sequence.
1345*7c478bd9Sstevel@tonic-gate  */
1346*7c478bd9Sstevel@tonic-gate static const char *
1347*7c478bd9Sstevel@tonic-gate toprint(c)
1348*7c478bd9Sstevel@tonic-gate wchar_t c;
1349*7c478bd9Sstevel@tonic-gate {
1350*7c478bd9Sstevel@tonic-gate         int n, len;
1351*7c478bd9Sstevel@tonic-gate         unsigned char *ptr;
1352*7c478bd9Sstevel@tonic-gate         static char mbch[MB_LEN_MAX+1];
1353*7c478bd9Sstevel@tonic-gate         static char buf[5 * MB_LEN_MAX + 1];
1354*7c478bd9Sstevel@tonic-gate 
1355*7c478bd9Sstevel@tonic-gate         if ((n = wctomb(mbch, c)) == -1) {
1356*7c478bd9Sstevel@tonic-gate                 /* Should never happen */
1357*7c478bd9Sstevel@tonic-gate                 (void) sprintf(buf, "\\%x", c);
1358*7c478bd9Sstevel@tonic-gate                 return (buf);
1359*7c478bd9Sstevel@tonic-gate         }
1360*7c478bd9Sstevel@tonic-gate         mbch[n] = '\0';
1361*7c478bd9Sstevel@tonic-gate         if (iswprint(c)) {
1362*7c478bd9Sstevel@tonic-gate                 return (mbch);
1363*7c478bd9Sstevel@tonic-gate         } else if (c == 127) {
1364*7c478bd9Sstevel@tonic-gate                 return (upe_ctrls[0]);
1365*7c478bd9Sstevel@tonic-gate         } else if (c < 32) {
1366*7c478bd9Sstevel@tonic-gate                 /* Print as in Table 5-101 in the UPE */
1367*7c478bd9Sstevel@tonic-gate                 return (upe_ctrls[c+1]);
1368*7c478bd9Sstevel@tonic-gate         } else {
1369*7c478bd9Sstevel@tonic-gate                 /* Print as an octal escape sequence */
1370*7c478bd9Sstevel@tonic-gate                 for (len = 0, ptr = (unsigned char *) mbch; 0 < n; --n, ++ptr)
1371*7c478bd9Sstevel@tonic-gate                         len += sprintf(buf+len, "\\%03o", *ptr);
1372*7c478bd9Sstevel@tonic-gate         }
1373*7c478bd9Sstevel@tonic-gate         return (buf);
1374*7c478bd9Sstevel@tonic-gate }
1375*7c478bd9Sstevel@tonic-gate 
1376*7c478bd9Sstevel@tonic-gate static int
1377*7c478bd9Sstevel@tonic-gate wcoff(const wchar_t *astring, const int off)
1378*7c478bd9Sstevel@tonic-gate {
1379*7c478bd9Sstevel@tonic-gate 	const wchar_t *s = astring;
1380*7c478bd9Sstevel@tonic-gate 	int c = 0;
1381*7c478bd9Sstevel@tonic-gate 	char mb[MB_LEN_MAX];
1382*7c478bd9Sstevel@tonic-gate 
1383*7c478bd9Sstevel@tonic-gate 	while (c < off) {
1384*7c478bd9Sstevel@tonic-gate 		int n;
1385*7c478bd9Sstevel@tonic-gate 		if ((n = wctomb(mb, *s)) == 0)
1386*7c478bd9Sstevel@tonic-gate 			break;
1387*7c478bd9Sstevel@tonic-gate 		if (n == -1)
1388*7c478bd9Sstevel@tonic-gate 			n = 1;
1389*7c478bd9Sstevel@tonic-gate 		c += n;
1390*7c478bd9Sstevel@tonic-gate 		s++;
1391*7c478bd9Sstevel@tonic-gate 	}
1392*7c478bd9Sstevel@tonic-gate 
1393*7c478bd9Sstevel@tonic-gate 	return (s - astring);
1394*7c478bd9Sstevel@tonic-gate }
1395*7c478bd9Sstevel@tonic-gate 
1396*7c478bd9Sstevel@tonic-gate int
1397*7c478bd9Sstevel@tonic-gate int_regwcomp(register regex_t *r, const wchar_t *pattern, int uflags)
1398*7c478bd9Sstevel@tonic-gate {
1399*7c478bd9Sstevel@tonic-gate 	char *mbpattern;
1400*7c478bd9Sstevel@tonic-gate 	int ret;
1401*7c478bd9Sstevel@tonic-gate 
1402*7c478bd9Sstevel@tonic-gate 	if ((mbpattern = wcstombsdup((wchar_t *) pattern)) == NULL)
1403*7c478bd9Sstevel@tonic-gate 		return (REG_ESPACE);
1404*7c478bd9Sstevel@tonic-gate 
1405*7c478bd9Sstevel@tonic-gate 	ret = regcomp(r, mbpattern, uflags);
1406*7c478bd9Sstevel@tonic-gate 
1407*7c478bd9Sstevel@tonic-gate 	free(mbpattern);
1408*7c478bd9Sstevel@tonic-gate 
1409*7c478bd9Sstevel@tonic-gate 	return (ret);
1410*7c478bd9Sstevel@tonic-gate }
1411*7c478bd9Sstevel@tonic-gate 
1412*7c478bd9Sstevel@tonic-gate int
1413*7c478bd9Sstevel@tonic-gate int_regwexec(const regex_t *r,	/* compiled RE */
1414*7c478bd9Sstevel@tonic-gate 	const wchar_t *astring,	/* subject string */
1415*7c478bd9Sstevel@tonic-gate 	size_t nsub,		/* number of subexpressions */
1416*7c478bd9Sstevel@tonic-gate 	int_regwmatch_t *sub,	/* subexpression pointers */
1417*7c478bd9Sstevel@tonic-gate 	int flags)
1418*7c478bd9Sstevel@tonic-gate {
1419*7c478bd9Sstevel@tonic-gate 	char *mbs;
1420*7c478bd9Sstevel@tonic-gate 	regmatch_t *mbsub = NULL;
1421*7c478bd9Sstevel@tonic-gate 	register int i;
1422*7c478bd9Sstevel@tonic-gate 
1423*7c478bd9Sstevel@tonic-gate 	if ((mbs = wcstombsdup((wchar_t *) astring)) == NULL)
1424*7c478bd9Sstevel@tonic-gate 		return (REG_ESPACE);
1425*7c478bd9Sstevel@tonic-gate 
1426*7c478bd9Sstevel@tonic-gate 	if (nsub > 0 && sub) {
1427*7c478bd9Sstevel@tonic-gate 		if ((mbsub = malloc(nsub * sizeof (regmatch_t))) == NULL)
1428*7c478bd9Sstevel@tonic-gate 			return (REG_ESPACE);
1429*7c478bd9Sstevel@tonic-gate 	}
1430*7c478bd9Sstevel@tonic-gate 
1431*7c478bd9Sstevel@tonic-gate 	i = regexec(r, mbs, nsub, mbsub, flags);
1432*7c478bd9Sstevel@tonic-gate 
1433*7c478bd9Sstevel@tonic-gate 	/* Now, adjust the pointers/counts in sub */
1434*7c478bd9Sstevel@tonic-gate 	if (i == REG_OK && nsub > 0 && mbsub) {
1435*7c478bd9Sstevel@tonic-gate 		register int j, k;
1436*7c478bd9Sstevel@tonic-gate 
1437*7c478bd9Sstevel@tonic-gate 		for (j = 0; j < nsub; j++) {
1438*7c478bd9Sstevel@tonic-gate 			regmatch_t *ms = &mbsub[j];
1439*7c478bd9Sstevel@tonic-gate 			int_regwmatch_t *ws = &sub[j];
1440*7c478bd9Sstevel@tonic-gate 
1441*7c478bd9Sstevel@tonic-gate 			if ((k = ms->rm_so) >= 0) {
1442*7c478bd9Sstevel@tonic-gate 				ws->rm_so = wcoff(astring, k);
1443*7c478bd9Sstevel@tonic-gate 				ws->rm_sp = astring + ws->rm_so;
1444*7c478bd9Sstevel@tonic-gate 			}
1445*7c478bd9Sstevel@tonic-gate 			if ((k = ms->rm_eo) >= 0) {
1446*7c478bd9Sstevel@tonic-gate 				ws->rm_eo = wcoff(astring, k);
1447*7c478bd9Sstevel@tonic-gate 				ws->rm_ep = astring + ws->rm_eo;
1448*7c478bd9Sstevel@tonic-gate 			}
1449*7c478bd9Sstevel@tonic-gate 		}
1450*7c478bd9Sstevel@tonic-gate 	}
1451*7c478bd9Sstevel@tonic-gate 
1452*7c478bd9Sstevel@tonic-gate 	free(mbs);
1453*7c478bd9Sstevel@tonic-gate 	if (mbsub)
1454*7c478bd9Sstevel@tonic-gate 		free(mbsub);
1455*7c478bd9Sstevel@tonic-gate 	return (i);
1456*7c478bd9Sstevel@tonic-gate }
1457*7c478bd9Sstevel@tonic-gate 
1458*7c478bd9Sstevel@tonic-gate int
1459*7c478bd9Sstevel@tonic-gate int_regwdosuba(register regex_t *rp,	/* compiled RE: Pattern */
1460*7c478bd9Sstevel@tonic-gate 	const wchar_t *rpl,		/* replacement string: /rpl/ */
1461*7c478bd9Sstevel@tonic-gate 	const wchar_t *src,		/* source string */
1462*7c478bd9Sstevel@tonic-gate 	wchar_t **dstp,			/* destination string */
1463*7c478bd9Sstevel@tonic-gate 	int len,			/* destination length */
1464*7c478bd9Sstevel@tonic-gate 	int *globp)	/* IN: occurence, 0 for all; OUT: substitutions */
1465*7c478bd9Sstevel@tonic-gate {
1466*7c478bd9Sstevel@tonic-gate 	wchar_t *dst, *odst;
1467*7c478bd9Sstevel@tonic-gate 	register const wchar_t *ip, *xp;
1468*7c478bd9Sstevel@tonic-gate 	register wchar_t *op;
1469*7c478bd9Sstevel@tonic-gate 	register int i;
1470*7c478bd9Sstevel@tonic-gate 	register wchar_t c;
1471*7c478bd9Sstevel@tonic-gate 	int glob, iglob = *globp, oglob = 0;
1472*7c478bd9Sstevel@tonic-gate #define	NSUB	10
1473*7c478bd9Sstevel@tonic-gate 	int_regwmatch_t rm[NSUB], *rmp;
1474*7c478bd9Sstevel@tonic-gate 	int flags;
1475*7c478bd9Sstevel@tonic-gate 	wchar_t *end;
1476*7c478bd9Sstevel@tonic-gate 	int regerr;
1477*7c478bd9Sstevel@tonic-gate 
1478*7c478bd9Sstevel@tonic-gate /* handle overflow of dst. we need "i" more bytes */
1479*7c478bd9Sstevel@tonic-gate #ifdef OVERFLOW
1480*7c478bd9Sstevel@tonic-gate #undef OVERFLOW
1481*7c478bd9Sstevel@tonic-gate #define	OVERFLOW(i) if (1) { \
1482*7c478bd9Sstevel@tonic-gate 		int pos = op - dst; \
1483*7c478bd9Sstevel@tonic-gate 		dst = (wchar_t *) realloc(odst = dst, \
1484*7c478bd9Sstevel@tonic-gate 			(len += len + i) * sizeof (wchar_t)); \
1485*7c478bd9Sstevel@tonic-gate 		if (dst == NULL) \
1486*7c478bd9Sstevel@tonic-gate 			goto nospace; \
1487*7c478bd9Sstevel@tonic-gate 		op = dst + pos; \
1488*7c478bd9Sstevel@tonic-gate 		end = dst + len; \
1489*7c478bd9Sstevel@tonic-gate 	} else
1490*7c478bd9Sstevel@tonic-gate #endif
1491*7c478bd9Sstevel@tonic-gate 
1492*7c478bd9Sstevel@tonic-gate 	*dstp = dst = (wchar_t *) malloc(len * sizeof (wchar_t));
1493*7c478bd9Sstevel@tonic-gate 	if (dst == NULL)
1494*7c478bd9Sstevel@tonic-gate 		return (REG_ESPACE);
1495*7c478bd9Sstevel@tonic-gate 
1496*7c478bd9Sstevel@tonic-gate 	if (rp == NULL || rpl == NULL || src == NULL || dst ==  NULL)
1497*7c478bd9Sstevel@tonic-gate 		return (REG_EFATAL);
1498*7c478bd9Sstevel@tonic-gate 
1499*7c478bd9Sstevel@tonic-gate 	glob = 0;	/* match count */
1500*7c478bd9Sstevel@tonic-gate 	ip = src;	/* source position */
1501*7c478bd9Sstevel@tonic-gate 	op = dst;	/* destination position */
1502*7c478bd9Sstevel@tonic-gate 	end = dst + len;
1503*7c478bd9Sstevel@tonic-gate 
1504*7c478bd9Sstevel@tonic-gate 	flags = 0;
1505*7c478bd9Sstevel@tonic-gate 	while ((regerr = int_regwexec(rp, ip, NSUB, rm, flags)) == REG_OK) {
1506*7c478bd9Sstevel@tonic-gate 		/* Copy text preceding match */
1507*7c478bd9Sstevel@tonic-gate 		if (op + (i = rm[0].rm_sp - ip) >= end)
1508*7c478bd9Sstevel@tonic-gate 			OVERFLOW(i);
1509*7c478bd9Sstevel@tonic-gate 		while (i--)
1510*7c478bd9Sstevel@tonic-gate 			*op++ = *ip++;
1511*7c478bd9Sstevel@tonic-gate 
1512*7c478bd9Sstevel@tonic-gate 		if (iglob == 0 || ++glob == iglob) {
1513*7c478bd9Sstevel@tonic-gate 			oglob++;
1514*7c478bd9Sstevel@tonic-gate 			xp = rpl;		/* do substitute */
1515*7c478bd9Sstevel@tonic-gate 		} else
1516*7c478bd9Sstevel@tonic-gate 			xp = L"&";		/* preserve text */
1517*7c478bd9Sstevel@tonic-gate 
1518*7c478bd9Sstevel@tonic-gate 		/* Perform replacement of matched substing */
1519*7c478bd9Sstevel@tonic-gate 		while ((c = *xp++) != '\0') {
1520*7c478bd9Sstevel@tonic-gate 			rmp = NULL;
1521*7c478bd9Sstevel@tonic-gate 			if (c == '&')
1522*7c478bd9Sstevel@tonic-gate 				rmp = &rm[0];
1523*7c478bd9Sstevel@tonic-gate 			else if (c == '\\') {
1524*7c478bd9Sstevel@tonic-gate 				if ('0' <= *xp && *xp <= '9')
1525*7c478bd9Sstevel@tonic-gate 					rmp = &rm[*xp++ - '0'];
1526*7c478bd9Sstevel@tonic-gate 				else if (*xp != '\0')
1527*7c478bd9Sstevel@tonic-gate 					c = *xp++;
1528*7c478bd9Sstevel@tonic-gate 			}
1529*7c478bd9Sstevel@tonic-gate 
1530*7c478bd9Sstevel@tonic-gate 			if (rmp ==  NULL) {	/* Ordinary character. */
1531*7c478bd9Sstevel@tonic-gate 				*op++ = c;
1532*7c478bd9Sstevel@tonic-gate 				if (op >= end)
1533*7c478bd9Sstevel@tonic-gate 					OVERFLOW(1);
1534*7c478bd9Sstevel@tonic-gate 			} else if (rmp->rm_sp != NULL && rmp->rm_ep != NULL) {
1535*7c478bd9Sstevel@tonic-gate 				ip = rmp->rm_sp;
1536*7c478bd9Sstevel@tonic-gate 				if (op + (i = rmp->rm_ep - rmp->rm_sp) >= end)
1537*7c478bd9Sstevel@tonic-gate 					OVERFLOW(i);
1538*7c478bd9Sstevel@tonic-gate 				while (i--)
1539*7c478bd9Sstevel@tonic-gate 					*op++ = *ip++;
1540*7c478bd9Sstevel@tonic-gate 			}
1541*7c478bd9Sstevel@tonic-gate 		}
1542*7c478bd9Sstevel@tonic-gate 
1543*7c478bd9Sstevel@tonic-gate 		ip = rm[0].rm_ep;
1544*7c478bd9Sstevel@tonic-gate 		if (*ip == '\0')	/* If at end break */
1545*7c478bd9Sstevel@tonic-gate 			break;
1546*7c478bd9Sstevel@tonic-gate 		else if (rm[0].rm_sp == rm[0].rm_ep) {
1547*7c478bd9Sstevel@tonic-gate 			/* If empty match copy next char */
1548*7c478bd9Sstevel@tonic-gate 			*op++ = *ip++;
1549*7c478bd9Sstevel@tonic-gate 			if (op >= end)
1550*7c478bd9Sstevel@tonic-gate 				OVERFLOW(1);
1551*7c478bd9Sstevel@tonic-gate 		}
1552*7c478bd9Sstevel@tonic-gate 		flags = REG_NOTBOL;
1553*7c478bd9Sstevel@tonic-gate 	}
1554*7c478bd9Sstevel@tonic-gate 
1555*7c478bd9Sstevel@tonic-gate 	if (regerr != REG_OK && regerr != REG_NOMATCH)
1556*7c478bd9Sstevel@tonic-gate 		return (regerr);
1557*7c478bd9Sstevel@tonic-gate 
1558*7c478bd9Sstevel@tonic-gate 	/* Copy rest of text */
1559*7c478bd9Sstevel@tonic-gate 	if (op + (i =  wcslen(ip)) >= end)
1560*7c478bd9Sstevel@tonic-gate 		OVERFLOW(i);
1561*7c478bd9Sstevel@tonic-gate 	while (i--)
1562*7c478bd9Sstevel@tonic-gate 	    *op++ = *ip++;
1563*7c478bd9Sstevel@tonic-gate 	*op++ = '\0';
1564*7c478bd9Sstevel@tonic-gate 
1565*7c478bd9Sstevel@tonic-gate 	if ((*dstp = dst = (wchar_t *) realloc(odst = dst,
1566*7c478bd9Sstevel@tonic-gate 			sizeof (wchar_t) * (size_t)(op - dst))) == NULL) {
1567*7c478bd9Sstevel@tonic-gate nospace:
1568*7c478bd9Sstevel@tonic-gate 		free(odst);
1569*7c478bd9Sstevel@tonic-gate 		return (REG_ESPACE);
1570*7c478bd9Sstevel@tonic-gate 	}
1571*7c478bd9Sstevel@tonic-gate 
1572*7c478bd9Sstevel@tonic-gate 	*globp = oglob;
1573*7c478bd9Sstevel@tonic-gate 
1574*7c478bd9Sstevel@tonic-gate 	return ((oglob == 0) ? REG_NOMATCH : REG_OK);
1575*7c478bd9Sstevel@tonic-gate }
1576