xref: /titanic_41/usr/src/cmd/awk_xpg4/awk3.c (revision 79777a7dd0179283917bda2ba98999c382d31c2c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * awk -- executor
23  *
24  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
25  * Use is subject to license terms.
26  *
27  * Copyright 1985, 1994 by Mortice Kern Systems Inc.  All rights reserved.
28  *
29  * Based on MKS awk(1) ported to be /usr/xpg4/bin/awk with POSIX/XCU4 changes
30  */
31 
32 #pragma ident	"%Z%%M%	%I%	%E% SMI"
33 
34 #include "awk.h"
35 #include "y.tab.h"
36 
37 static int	dohash(wchar_t *name);
38 static NODE	*arithmetic(NODE *np);
39 static NODE	*comparison(NODE *np);
40 static int	type_of(NODE *np);
41 static NODE	*lfield(INT fieldno, NODE *value);
42 static NODE	*rfield(INT fieldno);
43 static NODE	*userfunc(NODE *np);
44 static wchar_t	*lltoa(long long l);
45 static NODE	*exprconcat(NODE *np, int len);
46 static int	s_if(NODE *np);
47 static int	s_while(NODE *np);
48 static int	s_for(NODE *np);
49 static int	s_forin(NODE *np);
50 static void	setrefield(NODE *value);
51 static void	freetemps(void);
52 static int	action(NODE *np);
53 static wchar_t	*makeindex(NODE *np, wchar_t *array, int tag);
54 static int	exprtest(NODE *np);
55 
56 #define	regmatch(rp, s) REGWEXEC(rp, s, 0, (REGWMATCH_T*)NULL, 0)
57 
58 /*
59  * This code allows for integers to be stored in longs (type INT) and
60  * only promoted to double precision floating point numbers (type REAL)
61  * when overflow occurs during +, -, or * operations.  This is very
62  * non-portable if you desire such a speed optimisation.  You may wish
63  * to put something here for your system.  This "something" would likely
64  * include either an assembler "jump on overflow" instruction or a
65  * method to get traps on overflows from the hardware.
66  *
67  * This portable method works for ones and twos complement integer
68  * representations (which is, realistically) almost all machines.
69  */
70 #if	__TURBOC__
71 #define	addoverflow()	asm	jo	overflow
72 #define	suboverflow()	asm	jo	overflow
73 #else
74 /*
75  * These are portable to two's complement integer machines
76  */
77 #define	addoverflow()	if ((i1^i2) >= 0 && (iresult^i1) < 0) goto overflow
78 #define	suboverflow()	if ((i1^i2) < 0 && (iresult^i2) >= 0) goto overflow
79 #endif
80 #define	muloverflow()	if (((short)i1 != i1 || (short)i2 != i2) &&	\
81 			    ((i2 != 0 && iresult/i2 != i1) ||		\
82 			    (i1 == LONG_MIN && i2 == -1)))	  goto overflow
83 
84 static char	notarray[] = "scalar \"%s\" cannot be used as array";
85 static char	badarray[] = "array \"%s\" cannot be used as a scalar";
86 static char	varnotfunc[] = "variable \"%s\" cannot be used as a function";
87 static char	tmfld[] = "Too many fields (LIMIT: %d)";
88 static char	toolong[] = "Record too long (LIMIT: %d bytes)";
89 static char	divzero[] =  "division (/ or %%) by zero";
90 static char	toodeep[] = "too deeply nested for in loop (LIMIT: %d)";
91 
92 static wchar_t	numbuf[NUMSIZE];	/* Used to convert INTs to strings */
93 static wchar_t	*fields[NFIELD];	/* Cache of pointers into fieldbuf */
94 static wchar_t	*fieldbuf;		/* '\0' separated copy of linebuf */
95 static NODE	nodes[NSNODE];		/* Cache of quick access nodes */
96 static NODE	*fnodep = &nodes[0];
97 #define	NINDEXBUF	50
98 static wchar_t	indexbuf[NINDEXBUF];	/* Used for simple array indices */
99 static int	concflag;		/* In CONCAT operation (no frees) */
100 static NODE	*retval;		/* Last return value of a function */
101 
102 /*
103  * The following stack is used to store the next pointers for all nested
104  * for-in loops. This needs to be global so that delete can check to see
105  * if it is deleting the next node to be used by a loop.
106  */
107 #define	NFORINLOOP	10
108 static NODE*	forindex[NFORINLOOP];
109 static NODE**	next_forin = forindex;
110 
111 /*
112  * Assign a string directly to a NODE without creating an intermediate
113  * NODE.  This can handle either FALLOC, FSTATIC, FNOALLOC or FSENSE for
114  * "flags" argument.  Also the NODE "np" must be reduced to an lvalue
115  * (PARM nodes are not acceptable).
116  */
117 void
strassign(NODE * np,STRING string,int flags,size_t length)118 strassign(NODE *np, STRING string, int flags, size_t length)
119 {
120 	if (np->n_type == FUNC)
121 		awkerr(gettext("attempt to redefine builtin function"));
122 	else if (np->n_type == GETLINE || np->n_type == KEYWORD)
123 		awkerr(gettext("inadmissible use of reserved keyword"));
124 	if (np->n_flags & FSPECIAL) {
125 		(void) nassign(np, stringnode(string, flags, length));
126 		return;
127 	}
128 	if (isastring(np->n_flags))
129 		free((wchar_t *)np->n_string);
130 	np->n_strlen = length++;
131 	if (flags & FALLOC) {
132 		length *= sizeof (wchar_t);
133 		np->n_string = (STRING) emalloc(length);
134 		(void) memcpy((void *)np->n_string, string, length);
135 	} else {
136 		np->n_string = string;
137 		if (flags & FNOALLOC) {
138 			flags &= ~FNOALLOC;
139 			flags |= FALLOC;
140 		}
141 	}
142 	np->n_flags &= FSAVE;
143 	if (flags & FSENSE) {
144 		flags &= ~FSENSE;
145 		flags |= type_of(np);
146 	} else
147 		flags |= FSTRING;
148 	np->n_flags |= flags;
149 }
150 
151 /*
152  * Assign to a variable node.
153  * LHS must be a VAR type and RHS must be reduced by now.
154  * To speed certain operations up, check for
155  * certain things here and do special assignments.
156  */
157 NODE *
nassign(NODE * np,NODE * value)158 nassign(NODE *np, NODE *value)
159 {
160 	register wchar_t *cp;
161 	register int len;
162 
163 	/* short circuit assignment of a node to itself */
164 	if (np == value)
165 		return (np);
166 	if (np->n_flags & FSPECIAL) {
167 		if (np == varRS || np == varFS) {
168 			if (isastring(np->n_flags))
169 				free((void *)np->n_string);
170 			len = sizeof (wchar_t) * ((np->n_strlen =
171 				wcslen(cp = exprstring(value)))+1);
172 			np->n_string = emalloc(len);
173 			(void) memcpy((wchar_t *)np->n_string, cp, len);
174 			np->n_flags = FALLOC|FSTRING|FSPECIAL;
175 			if (np == varRS) {
176 				if (np->n_string[0] == '\n')
177 					awkrecord = defrecord;
178 				else if (np->n_string[0] == '\0')
179 					awkrecord = multirecord;
180 				else
181 					awkrecord = charrecord;
182 			} else if (np == varFS) {
183 				if (resep != (REGEXP)NULL) {
184 					REGWFREE(resep);
185 					resep = (REGEXP)NULL;
186 				}
187 				if (wcslen((wchar_t *)np->n_string) > 1)
188 					setrefield(np);
189 				else if (np->n_string[0] == ' ')
190 					awkfield = whitefield;
191 				else
192 					awkfield = blackfield;
193 			}
194 			return (np);
195 		}
196 	}
197 	if (isastring(np->n_flags))
198 		free((wchar_t *)np->n_string);
199 	if (isstring(value->n_flags)) {
200 		np->n_strlen = value->n_strlen;
201 		if (value->n_flags&FALLOC || value->n_string != _null) {
202 			len = (np->n_strlen+1) * sizeof (wchar_t);
203 			np->n_string = emalloc(len);
204 			(void) memcpy(np->n_string, value->n_string, len);
205 			np->n_flags &= FSAVE;
206 			np->n_flags |= value->n_flags & ~FSAVE;
207 			np->n_flags |= FALLOC;
208 			return (np);
209 		} else
210 			np->n_string = value->n_string;
211 	} else if (value->n_flags & FINT)
212 		np->n_int = value->n_int;
213 	else
214 		np->n_real = value->n_real;
215 	np->n_flags &= FSAVE;
216 	np->n_flags |= value->n_flags & ~FSAVE;
217 	return (np);
218 }
219 
220 /*
221  * Set regular expression FS value.
222  */
223 static void
setrefield(NODE * np)224 setrefield(NODE *np)
225 {
226 	static REGEXP re;
227 	int n;
228 
229 	if ((n = REGWCOMP(&re, np->n_string)) != REG_OK) {
230 		REGWERROR(n, &re, (char *)linebuf, sizeof (linebuf));
231 		awkerr(gettext("syntax error \"%s\" in /%s/\n"),
232 			(char *)linebuf, np->n_string);
233 	}
234 	resep = re;
235 	awkfield = refield;
236 }
237 
238 /*
239  * Assign to an l-value node.
240  */
241 NODE *
assign(NODE * left,NODE * right)242 assign(NODE *left, NODE *right)
243 {
244 	if (isleaf(right->n_flags)) {
245 		if (right->n_type == PARM)
246 			right = right->n_next;
247 	} else
248 		right = exprreduce(right);
249 top:
250 	switch (left->n_type) {
251 	case INDEX:
252 		left = exprreduce(left);
253 	/*FALLTHRU*/
254 	case VAR:
255 		return (nassign(left, right));
256 
257 	case PARM:
258 		/*
259 		 * If it's a parameter then link to the actual value node and
260 		 * do the checks again.
261 		 */
262 		left = left->n_next;
263 		goto top;
264 
265 	case FIELD:
266 		return (lfield(exprint(left->n_left), right));
267 
268 	case CALLUFUNC:
269 	case UFUNC:
270 		awkerr(gettext("cannot assign to function \"%s\""),
271 		    left->n_name);
272 
273 	default:
274 		awkerr(gettext("lvalue required in assignment"));
275 	}
276 	/* NOTREACHED */
277 	return (0);
278 }
279 
280 /*
281  * Compiled tree non-terminal node.
282  */
283 NODE *
node(int type,NODE * left,NODE * right)284 node(int type, NODE *left, NODE *right)
285 {
286 	register NODE *np;
287 
288 	np = emptynode(type, 0);
289 	np->n_left = left;
290 	np->n_right = right;
291 	np->n_lineno = lineno;
292 	return (np);
293 }
294 
295 /*
296  * Create an integer node.
297  */
298 NODE *
intnode(INT i)299 intnode(INT i)
300 {
301 	register NODE *np;
302 
303 	np = emptynode(CONSTANT, 0);
304 	np->n_flags = FINT|FVINT;
305 	np->n_int = i;
306 	return (np);
307 }
308 
309 /*
310  * Create a real number node.
311  */
312 NODE *
realnode(REAL real)313 realnode(REAL real)
314 {
315 	register NODE *np;
316 
317 	np = emptynode(CONSTANT, 0);
318 	np->n_flags = FREAL|FVREAL;
319 	np->n_real = real;
320 	return (np);
321 }
322 
323 /*
324  * Make a node for a string.
325  */
326 NODE *
stringnode(STRING s,int how,size_t length)327 stringnode(STRING s, int how, size_t length)
328 {
329 	register NODE *np;
330 
331 	np = emptynode(CONSTANT, 0);
332 	np->n_strlen = length;
333 	if (how & FALLOC) {
334 		np->n_string = emalloc(length = (length+1) * sizeof (wchar_t));
335 		(void) memcpy(np->n_string, s, length);
336 	} else {
337 		np->n_string = s;
338 		if (how & FNOALLOC) {
339 			how &= ~FNOALLOC;
340 			how |= FALLOC;
341 		}
342 	}
343 	if (how & FSENSE) {
344 		np->n_flags = type_of(np);
345 		how &= ~FSENSE;
346 	} else
347 		np->n_flags = FSTRING;
348 	np->n_flags |= how;
349 	return (np);
350 }
351 
352 /*
353  * Save a copy of a string.
354  */
355 STRING
strsave(wchar_t * old)356 strsave(wchar_t *old)
357 {
358 	STRING new;
359 	register size_t len;
360 
361 	new = (STRING)emalloc(len = (wcslen(old)+1) * sizeof (wchar_t));
362 	(void) memcpy(new, old, len);
363 	return (new);
364 }
365 
366 /*
367  * Allocate an empty node of given type.
368  * String space for the node is given by `length'.
369  */
370 NODE *
emptynode(int type,size_t length)371 emptynode(int type, size_t length)
372 {
373 	register NODE *np;
374 
375 	if (length == 0 && running && fnodep < &nodes[NSNODE]) {
376 		np = fnodep++;
377 	} else {
378 		np = (NODE *)emalloc(sizeof (NODE) +
379 		    (length * sizeof (wchar_t)));
380 		if (running && type != VAR && type != ARRAY) {
381 			np->n_next = freelist;
382 			freelist = np;
383 		}
384 	}
385 	np->n_flags = FNONTOK;
386 	np->n_type = type;
387 	np->n_alink = NNULL;
388 
389 	return (np);
390 }
391 
392 /*
393  * Free a node.
394  */
395 void
freenode(NODE * np)396 freenode(NODE *np)
397 {
398 	if (isastring(np->n_flags))
399 		free((wchar_t *)np->n_string);
400 	else if (np->n_type == RE) {
401 		REGWFREE(np->n_regexp);
402 	}
403 	free((wchar_t *)np);
404 }
405 
406 /*
407  * Install a keyword of given `type'.
408  */
409 void
kinstall(LOCCHARP name,int type)410 kinstall(LOCCHARP name, int type)
411 {
412 	register NODE *np;
413 	register size_t l;
414 
415 	l = wcslen(name);
416 	np = emptynode(KEYWORD, l);
417 	np->n_keywtype = type;
418 	(void) memcpy(np->n_name, name, (l+1) * sizeof (wchar_t));
419 	addsymtab(np);
420 }
421 
422 /*
423  * Install built-in function.
424  */
425 NODE *
finstall(LOCCHARP name,FUNCTION func,int type)426 finstall(LOCCHARP name, FUNCTION func, int type)
427 {
428 	register NODE *np;
429 	register size_t l;
430 
431 	l = wcslen(name);
432 	np = emptynode(type, l);
433 	np->n_function = func;
434 	(void) memcpy(np->n_name, name, (l+1) * sizeof (wchar_t));
435 	addsymtab(np);
436 	return (np);
437 }
438 
439 /*
440  * Lookup an identifier.
441  * nocreate contains the following flag values:
442  *	1 if no creation of a new NODE,
443  *	0 if ok to create new NODE
444  */
445 NODE *
vlookup(wchar_t * name,int nocreate)446 vlookup(wchar_t *name, int nocreate)
447 {
448 	register ushort_t hash;
449 	register NODE *np;
450 
451 	np = symtab[hashbuck(hash = dohash((wchar_t *)name))];
452 	while (np != NNULL) {
453 		if (np->n_hash == hash && wcscmp(name, np->n_name) == 0)
454 			return (np);
455 		np = np->n_next;
456 	}
457 	if (nocreate) {
458 		np = NNULL;
459 	} else {
460 		np = emptynode(VAR, hash = wcslen(name));
461 		np->n_flags = FSTRING|FVINT;
462 		np->n_strlen = 0;
463 		np->n_string = _null;
464 		(void) memcpy(np->n_name, name,
465 			(hash+1) * sizeof (wchar_t));
466 		addsymtab(np);
467 	}
468 	return (np);
469 }
470 
471 /*
472  * Add a symbol to the table.
473  */
474 void
addsymtab(NODE * np)475 addsymtab(NODE *np)
476 {
477 	register NODE **spp;
478 
479 	np->n_hash = dohash((wchar_t *)np->n_name);
480 	spp = &symtab[hashbuck(np->n_hash)];
481 	np->n_next = *spp;
482 	*spp = np;
483 }
484 
485 /*
486  * Delete the given node from the symbol table.
487  * If fflag is non-zero, also free the node space.
488  * This routine must also check the stack of forin loop pointers. If
489  * we are deleting the next item to be used, then the pointer must be
490  * advanced.
491  */
492 void
delsymtab(NODE * np,int fflag)493 delsymtab(NODE *np, int fflag)
494 {
495 	register NODE *rnp;
496 	register NODE *prevp;
497 	register NODE **sptr;
498 	register ushort_t h;
499 
500 
501 
502 
503 
504 	h = hashbuck(np->n_hash);
505 	prevp = NNULL;
506 	for (rnp = symtab[h]; rnp != NNULL; rnp = rnp->n_next) {
507 		if (rnp == np) {
508 			/*
509 			 * check all of the for-in loop pointers
510 			 * to see if any need to be advanced because
511 			 * this element is being deleted.
512 			 */
513 			if (next_forin != forindex) {
514 				sptr = next_forin;
515 				do {
516 					if (*--sptr == rnp) {
517 						*sptr = rnp->n_next;
518 						break;
519 					}
520 				} while (sptr != forindex);
521 			}
522 			if (prevp == NNULL)
523 				symtab[h] = rnp->n_next; else
524 				prevp->n_next = rnp->n_next;
525 			if (fflag)
526 				freenode(rnp);
527 			break;
528 		}
529 		prevp = rnp;
530 	}
531 }
532 
533 /*
534  * Hashing function.
535  */
536 static int
dohash(wchar_t * name)537 dohash(wchar_t *name)
538 {
539 	register int hash = 0;
540 
541 	while (*name != '\0')
542 		hash += *name++;
543 	return (hash);
544 }
545 
546 /*
547  * Top level executor for an awk programme.
548  * This will be passed: pattern, action or a list of these.
549  * The former function to evaluate a pattern has been
550  * subsumed into this function for speed.
551  * Patterns are:
552  *	BEGIN,
553  *	END,
554  *	other expressions (including regular expressions)
555  */
556 void
execute(NODE * wp)557 execute(NODE *wp)
558 {
559 	register NODE *np;
560 	register int type;
561 	register NODE *tnp;
562 
563 	curnode = wp;
564 	if (phase != 0) {
565 		linebuf[0] = '\0';
566 		lbuflen = 0;
567 	}
568 	while (wp != NNULL) {
569 		if (wp->n_type == COMMA) {
570 			np = wp->n_left;
571 			wp = wp->n_right;
572 		} else {
573 			np = wp;
574 			wp = NNULL;
575 		}
576 		if (np->n_type != PACT)
577 			awkerr(interr, "PACT");
578 		/*
579 		 * Save the parent node and evaluate the pattern.
580 		 * If it evaluates to false (0) just continue
581 		 * to the next pattern/action (PACT) pair.
582 		 */
583 		tnp = np;
584 		np = np->n_left;
585 		if (np == NNULL) {
586 			if (phase != 0)
587 				continue;
588 		} else if (phase != 0) {
589 			if (np->n_type != phase)
590 				continue;
591 		} else if ((type = np->n_type) == BEGIN || type == END) {
592 			continue;
593 		} else if (type == COMMA) {
594 			/*
595 			 * The grammar only allows expressions
596 			 * to be separated by the ',' operator
597 			 * for range patterns.
598 			 */
599 			if (np->n_flags & FMATCH) {
600 				if (exprint(np->n_right) != 0)
601 					np->n_flags &= ~FMATCH;
602 			} else if (exprint(np->n_left) != 0) {
603 				if (exprint(np->n_right) == 0)
604 					np->n_flags |= FMATCH;
605 			} else
606 				continue;
607 		} else if (exprint(np) == 0)
608 			continue;
609 		np = tnp;
610 		if (action(np->n_right)) {
611 			loopexit = 0;
612 			break;
613 		}
614 	}
615 	if (freelist != NNULL)
616 		freetemps();
617 }
618 
619 /*
620  * Free all temporary nodes.
621  */
622 static void
freetemps()623 freetemps()
624 {
625 	register NODE *np, *nnp;
626 
627 	if (concflag)
628 		return;
629 	for (np = &nodes[0]; np < fnodep; np++) {
630 		if (isastring(np->n_flags)) {
631 			free((wchar_t *)np->n_string);
632 		} else if (np->n_type == RE) {
633 			REGWFREE(np->n_regexp);
634 		}
635 	}
636 	fnodep = &nodes[0];
637 	for (np = freelist; np != NNULL; np = nnp) {
638 		nnp = np->n_next;
639 		freenode(np);
640 	}
641 	freelist = NNULL;
642 }
643 
644 /*
645  * Do the given action.
646  * Actions are statements or expressions.
647  */
648 static int
action(NODE * wp)649 action(NODE *wp)
650 {
651 	register NODE *np;
652 	register int act = 0;
653 	register NODE *l;
654 
655 	while (wp != NNULL) {
656 		if (wp->n_type == COMMA) {
657 			np = wp->n_left;
658 			wp = wp->n_right;
659 		} else {
660 			np = wp;
661 			wp = NNULL;
662 		}
663 		if (freelist != NNULL)
664 			freetemps();
665 		curnode = np;
666 		/*
667 		 * Don't change order of these cases without
668 		 * changing order in awk.y declarations.
669 		 * The order is optimised.
670 		 */
671 		switch (np->n_type) {
672 		case ASG:
673 			(void) assign(np->n_left, np->n_right);
674 			continue;
675 
676 		case PRINT:
677 			s_print(np);
678 			continue;
679 
680 		case PRINTF:
681 			s_prf(np);
682 			continue;
683 
684 		case EXIT:
685 			if (np->n_left != NNULL)
686 				act = (int)exprint(np->n_left); else
687 				act = 0;
688 			doend(act);
689 			/* NOTREACHED */
690 
691 		case RETURN:
692 			if (slevel == 0)
693 				awkerr(gettext("return outside of a function"));
694 			np = np->n_left != NNULL
695 			    ? exprreduce(np->n_left)
696 			    : const0;
697 			retval = emptynode(CONSTANT, 0);
698 			retval->n_flags = FINT;
699 			(void) nassign(retval, np);
700 			return (RETURN);
701 
702 		case NEXT:
703 			loopexit = NEXT;
704 		/*FALLTHRU*/
705 		case BREAK:
706 		case CONTINUE:
707 			return (np->n_type);
708 
709 		case DELETE:
710 			if ((l = np->n_left)->n_type == PARM) {
711 				l = l->n_next;
712 				if (!(l->n_flags & FLARRAY))
713 					l = l->n_alink;
714 			}
715 			switch (l->n_type) {
716 			case ARRAY:
717 				delarray(l);
718 				break;
719 
720 			case INDEX:
721 				if ((np = l->n_left)->n_type == PARM) {
722 					np = np->n_next;
723 					if (!(np->n_flags & FLARRAY))
724 						np = np->n_alink;
725 				}
726 				/*
727 				 * get pointer to the node for this array
728 				 * element using the hash key.
729 				 */
730 				l = exprreduce(l);
731 				/*
732 				 * now search linearly from the beginning of
733 				 * the list to find the element before the
734 				 * one being deleted. This must be done
735 				 * because arrays are singley-linked.
736 				 */
737 				while (np != NNULL) {
738 					if (np->n_alink == l) {
739 						np->n_alink = l->n_alink;
740 						break;
741 					}
742 					np = np->n_alink;
743 				}
744 				delsymtab(l, 1);
745 				break;
746 
747 			case VAR:
748 				if (isstring(l->n_flags) &&
749 				    l->n_string == _null)
750 					break;
751 			default:
752 				awkerr(gettext(
753 				    "may delete only array element or array"));
754 				break;
755 			}
756 			continue;
757 
758 		case WHILE:
759 		case DO:
760 			if ((act = s_while(np)) != 0)
761 				break;
762 			continue;
763 
764 		case FOR:
765 			if ((act = s_for(np)) != 0)
766 				break;
767 			continue;
768 
769 		case FORIN:
770 			if ((act = s_forin(np)) != 0)
771 				break;
772 			continue;
773 
774 		case IF:
775 			if ((act = s_if(np)) != 0)
776 				break;
777 			continue;
778 
779 		default:
780 			(void) exprreduce(np);
781 			if (loopexit != 0) {
782 				act = loopexit;
783 				break;
784 			}
785 			continue;
786 		}
787 		return (act);
788 	}
789 	return (0);
790 }
791 
792 /*
793  * Delete an entire array
794  */
795 void
delarray(NODE * np)796 delarray(NODE *np)
797 {
798 	register NODE *nnp;
799 
800 	nnp = np->n_alink;
801 	np->n_alink = NNULL;
802 	while (nnp != NNULL) {
803 		np = nnp->n_alink;
804 		delsymtab(nnp, 1);
805 		nnp = np;
806 	}
807 }
808 
809 /*
810  * Return the INT value of an expression.
811  */
812 INT
exprint(NODE * np)813 exprint(NODE *np)
814 {
815 	if (isleaf(np->n_flags)) {
816 		if (np->n_type == PARM)
817 			np = np->n_next;
818 		goto leaf;
819 	}
820 	np = exprreduce(np);
821 	switch (np->n_type) {
822 	case CONSTANT:
823 	case VAR:
824 	leaf:
825 		if (np->n_flags & FINT)
826 			return (np->n_int);
827 		if (np->n_flags & FREAL)
828 			return ((INT)np->n_real);
829 		return ((INT)wcstoll(np->n_string, NULL, 10));
830 
831 	default:
832 		awkerr(interr, "exprint");
833 	}
834 	/* NOTREACHED */
835 	return (0);
836 }
837 
838 /*
839  * Return a real number from an expression tree.
840  */
841 REAL
exprreal(NODE * np)842 exprreal(NODE *np)
843 {
844 	if (loopexit)
845 		return ((REAL)loopexit);
846 	if (isleaf(np->n_flags)) {
847 		if (np->n_type == PARM)
848 			np = np->n_next;
849 		goto leaf;
850 	}
851 	np = exprreduce(np);
852 	switch (np->n_type) {
853 	case CONSTANT:
854 	case VAR:
855 	leaf:
856 		if (np->n_flags & FREAL)
857 			return (np->n_real);
858 		if (np->n_flags & FINT)
859 			return ((REAL)np->n_int);
860 		return ((REAL)wcstod((wchar_t *)np->n_string, (wchar_t **)0));
861 
862 	default:
863 		awkerr(interr, "exprreal");
864 	}
865 	/* NOTREACHED */
866 	return ((REAL)0);
867 }
868 
869 /*
870  * Return a string from an expression tree.
871  */
872 STRING
exprstring(NODE * np)873 exprstring(NODE *np)
874 {
875 	if (isleaf(np->n_flags)) {
876 		if (np->n_type == PARM)
877 			np = np->n_next;
878 		goto leaf;
879 	}
880 	np = exprreduce(np);
881 	switch (np->n_type) {
882 	case CONSTANT:
883 	case VAR:
884 	leaf:
885 		if (isstring(np->n_flags))
886 			return (np->n_string);
887 		if (np->n_flags & FINT)
888 			return (STRING)lltoa((long long)np->n_int);
889 		{
890 			char *tmp;
891 			(void) wsprintf(numbuf,
892 		(const char *) (tmp = wcstombsdup(exprstring(varCONVFMT))),
893 				(double)np->n_real);
894 			if (tmp != NULL)
895 				free(tmp);
896 		}
897 		return ((STRING)numbuf);
898 
899 	default:
900 		awkerr(interr, "exprstring");
901 	}
902 	/* NOTREACHED */
903 	return (0);
904 }
905 
906 /*
907  * Convert number to string.
908  */
909 static wchar_t *
lltoa(long long l)910 lltoa(long long l)
911 {
912 	register wchar_t *p = &numbuf[NUMSIZE];
913 	register int s;
914 	register int neg;
915 	static wchar_t zero[] = M_MB_L("0");
916 
917 	if (l == 0)
918 		return (zero);
919 	*--p = '\0';
920 	if (l < 0)
921 		neg = 1, l = -l; else
922 		neg = 0;
923 	if ((s = (short)l) == l) {
924 		while (s != 0) {
925 			*--p = s%10 + '0';
926 			s /= 10;
927 		}
928 	} else {
929 		while (l != 0) {
930 			*--p = l%10 + '0';
931 			l /= 10;
932 		}
933 	}
934 	if (neg)
935 		*--p = '-';
936 	return (wcscpy(numbuf, p));
937 }
938 
939 /*
940  * Return pointer to node with concatenation of operands of CONCAT node.
941  * In the interest of speed, a left recursive tree of CONCAT nodes
942  * is handled with a single malloc.  The accumulated lengths of the
943  * right operands are passed down recursive invocations of this
944  * routine, which allocates a large enough string when the left
945  * operand is not a CONCAT node.
946  */
947 static NODE *
exprconcat(NODE * np,int len)948 exprconcat(NODE *np, int len)
949 {
950 	/* we KNOW (np->n_type==CONCAT) */
951 	register NODE *lnp = np->n_left;
952 	register NODE *rnp = np->n_right;
953 	register STRING	rsp;
954 	int rlen;
955 	size_t llen;
956 	wchar_t *cp;
957 	wchar_t rnumbuf[NUMSIZE];
958 
959 	if (isleaf(rnp->n_flags) && rnp->n_type == PARM)
960 		rnp = rnp->n_next;
961 	if (isstring(rnp->n_flags)) {
962 		rsp = rnp->n_string;
963 		rlen = rnp->n_strlen;
964 	} else
965 		rlen = wcslen((wchar_t *)(rsp = exprstring(rnp)));
966 	if (rsp == numbuf) {	/* static, so save a copy */
967 		(void) memcpy(rnumbuf, (wchar_t *)rsp,
968 			(rlen+1) * sizeof (wchar_t));
969 		rsp = rnumbuf;
970 	}
971 	len += rlen;
972 	if (lnp->n_type == CONCAT) {
973 		lnp = exprconcat(lnp, len);
974 		cp = lnp->n_string;
975 		llen = lnp->n_strlen;
976 	} else {
977 		register STRING	lsp;
978 
979 		if (isleaf(lnp->n_flags) && lnp->n_type == PARM)
980 			lnp = lnp->n_next;
981 		if (isstring(lnp->n_flags)) {
982 			lsp = lnp->n_string;
983 			llen = lnp->n_strlen;
984 		} else
985 			llen = wcslen((wchar_t *)(lsp = exprstring(lnp)));
986 		cp = emalloc((llen+len+1) * sizeof (wchar_t));
987 		(void) memcpy(cp, (wchar_t *)lsp, llen * sizeof (wchar_t));
988 		lnp = stringnode(cp, FNOALLOC, llen);
989 	}
990 	(void) memcpy(cp+llen, (wchar_t *)rsp, (rlen+1) * sizeof (wchar_t));
991 	lnp->n_strlen += rlen;
992 	return (lnp);
993 }
994 
995 /*
996  * Reduce an expression to a terminal node.
997  */
998 NODE *
exprreduce(NODE * np)999 exprreduce(NODE *np)
1000 {
1001 	register wchar_t *cp;
1002 	NODE *tnp;
1003 	register int temp;
1004 	register int t;
1005 	register int  tag;
1006 	register wchar_t *fname;
1007 	register wchar_t *aname;
1008 
1009 	/*
1010 	 * a var or constant is a leaf-node (no further reduction required)
1011 	 * so return immediately.
1012 	 */
1013 	if ((t = np->n_type) == VAR || t == CONSTANT)
1014 		return (np);
1015 	/*
1016 	 * If it's a parameter then it is probably a leaf node but it
1017 	 * might be an array so we check.. If it is an array, then signal
1018 	 * an error as an array by itself cannot be used in this context.
1019 	 */
1020 	if (t == PARM)
1021 		if ((np = np->n_next)->n_type == ARRAY)
1022 			awkerr(badarray, np->n_name);
1023 		else
1024 			return (np);
1025 	/*
1026 	 * All the rest are non-leaf nodes.
1027 	 */
1028 	curnode = np;
1029 	switch (t) {
1030 	case CALLUFUNC:
1031 		return (userfunc(np));
1032 
1033 	case FIELD:
1034 		return (rfield(exprint(np->n_left)));
1035 
1036 	case IN:
1037 	case INDEX:
1038 		tag = 0;
1039 		temp = np->n_type;
1040 		tnp = np->n_left;
1041 		np = np->n_right;
1042 		/* initially formal var name and array key name are the same */
1043 		fname = aname = tnp->n_name;
1044 		if (tnp->n_type == PARM) {
1045 			tnp = tnp->n_next;
1046 			tag = tnp->n_scope;
1047 			if (!(tnp->n_flags & FLARRAY)) {
1048 				tnp = tnp->n_alink;
1049 			}
1050 			aname = tnp->n_name;
1051 		}
1052 		if (tnp->n_type != ARRAY) {
1053 			if (!isstring(tnp->n_flags) || tnp->n_string != _null)
1054 				awkerr(notarray, fname);
1055 			else {
1056 				/* promotion to array */
1057 				promote(tnp);
1058 				if (tnp->n_alink != NNULL) {
1059 					tag = tnp->n_scope;
1060 					if (!(tnp->n_flags & FLARRAY))
1061 						tnp = tnp->n_alink;
1062 					aname = tnp->n_name;
1063 				} else {
1064 					tag = 0;
1065 					if (tnp->n_flags & FLARRAY)
1066 						tag = tnp->n_scope;
1067 				}
1068 			}
1069 		}
1070 		if (tnp == varSYMTAB) {
1071 			if (np == NNULL || np->n_type == COMMA)
1072 				awkerr(gettext(
1073 				    "SYMTAB must have exactly one index"));
1074 			np = vlook(exprstring(np));
1075 			return (np);
1076 		}
1077 		cp = makeindex(np, aname, tag);
1078 		if (temp == INDEX) {
1079 			np = vlook(cp);
1080 			if (!(np->n_flags & FINARRAY)) {
1081 				np->n_alink = tnp->n_alink;
1082 				tnp->n_alink = np;
1083 				np->n_flags |= FINARRAY;
1084 			}
1085 		} else
1086 			np = vlookup(cp, 1) == NNULL ? const0 : const1;
1087 		if (cp != indexbuf)
1088 			free(cp);
1089 		return (np);
1090 
1091 	case CONCAT:
1092 		++concflag;
1093 		np = exprconcat(np, 0);
1094 		--concflag;
1095 		return (np);
1096 
1097 	case NOT:
1098 		return (intnode(exprtest(np->n_left) == 0 ? (INT)1 : (INT)0));
1099 
1100 	case AND:
1101 		return ((exprtest(np->n_left) != 0 &&
1102 		    exprtest(np->n_right) != 0) ? const1 : const0);
1103 
1104 	case OR:
1105 		return ((exprtest(np->n_left) != 0 ||
1106 		    exprtest(np->n_right) != 0) ? const1 : const0);
1107 
1108 	case EXP:
1109 		{
1110 			double f1, f2;
1111 
1112 			/*
1113 			 * evaluate expressions in proper order before
1114 			 * calling pow().
1115 			 * Can't guarantee that compiler will do this
1116 			 * correctly for us if we put them inline.
1117 			 */
1118 			f1 = (double)exprreal(np->n_left);
1119 			f2 = (double)exprreal(np->n_right);
1120 			return (realnode((REAL)pow(f1, f2)));
1121 		}
1122 
1123 	case QUEST:
1124 		if (np->n_right->n_type != COLON)
1125 			awkerr(interr, "?:");
1126 		if (exprtest(np->n_left))
1127 			np = np->n_right->n_left; else
1128 			np = np->n_right->n_right;
1129 		return (exprreduce(np));
1130 
1131 	case EQ:
1132 	case NE:
1133 	case GE:
1134 	case LE:
1135 	case GT:
1136 	case LT:
1137 		return (comparison(np));
1138 
1139 	case ADD:
1140 	case SUB:
1141 	case MUL:
1142 	case DIV:
1143 	case REM:
1144 		return (arithmetic(np));
1145 
1146 	case DEC:
1147 		inc_oper->n_type = SUB;
1148 		goto do_inc_op;
1149 	case INC:
1150 		inc_oper->n_type = ADD;
1151 do_inc_op:
1152 		if ((np = np->n_left)->n_type == INDEX)
1153 			np = exprreduce(np);
1154 		if (np->n_flags & FREAL)
1155 			tnp = realnode(np->n_real);
1156 		else
1157 			tnp = intnode(exprint(np));
1158 		inc_oper->n_left = np;
1159 		(void) assign(np, inc_oper);
1160 		return (tnp);
1161 
1162 	case PRE_DEC:
1163 		inc_oper->n_type = SUB;
1164 		goto do_pinc_op;
1165 	case PRE_INC:
1166 		inc_oper->n_type = ADD;
1167 do_pinc_op:
1168 		if ((np = np->n_left)->n_type == INDEX)
1169 			np = exprreduce(np);
1170 		inc_oper->n_left = np;
1171 		return (assign(np, inc_oper));
1172 
1173 	case AADD:
1174 		asn_oper->n_type = ADD;
1175 		goto do_asn_op;
1176 	case ASUB:
1177 		asn_oper->n_type = SUB;
1178 		goto do_asn_op;
1179 	case AMUL:
1180 		asn_oper->n_type = MUL;
1181 		goto do_asn_op;
1182 	case ADIV:
1183 		asn_oper->n_type = DIV;
1184 		goto do_asn_op;
1185 	case AREM:
1186 		asn_oper->n_type = REM;
1187 		goto do_asn_op;
1188 	case AEXP:
1189 		asn_oper->n_type = EXP;
1190 do_asn_op:
1191 		asn_oper->n_right = np->n_right;
1192 		if ((np = np->n_left)->n_type == INDEX)
1193 			np = exprreduce(np);
1194 		asn_oper->n_left = np;
1195 		return (assign(np, asn_oper));
1196 
1197 
1198 	case GETLINE:
1199 		return (f_getline(np));
1200 
1201 	case CALLFUNC:
1202 		return ((*np->n_left->n_function)(np->n_right));
1203 
1204 	case RE:
1205 		if (regmatch(np->n_regexp, linebuf) == REG_OK)
1206 			return (const1);
1207 		return (const0);
1208 
1209 	case TILDE:
1210 		cp = exprstring(np->n_left);
1211 		if (regmatch(getregexp(np->n_right), cp) == REG_OK)
1212 			return (const1);
1213 		return (const0);
1214 
1215 	case NRE:
1216 		cp = exprstring(np->n_left);
1217 		if (regmatch(getregexp(np->n_right), cp) != REG_OK)
1218 			return (const1);
1219 		return (const0);
1220 
1221 	case ASG:
1222 		return (assign(np->n_left, np->n_right));
1223 
1224 	case ARRAY:
1225 		awkerr(badarray, np->n_name);
1226 
1227 	/*FALLTHRU*/
1228 	case UFUNC:
1229 		awkerr(varnotfunc, np->n_name);
1230 
1231 	/*FALLTHRU*/
1232 	default:
1233 		awkerr(gettext("panic: exprreduce(%d)"), t);
1234 		/* NOTREACHED */
1235 	}
1236 	return (0);
1237 }
1238 
1239 /*
1240  * Do arithmetic operators.
1241  */
1242 static NODE *
arithmetic(NODE * np)1243 arithmetic(NODE *np)
1244 {
1245 	register NODE *left, *right;
1246 	int type;
1247 	register INT i1, i2;
1248 	register INT iresult;
1249 	register REAL r1, r2;
1250 
1251 	left = exprreduce(np->n_left);
1252 	if (isreal(left->n_flags) ||
1253 	    (isstring(left->n_flags) && (type_of(left)&FVREAL))) {
1254 		type = FREAL;
1255 		r1 = exprreal(left);
1256 		r2 = exprreal(np->n_right);
1257 	} else {
1258 		i1 = exprint(left);
1259 		right = exprreduce(np->n_right);
1260 		if (isreal(right->n_flags) ||
1261 		    (isstring(right->n_flags) && (type_of(right)&FVREAL))) {
1262 
1263 			type = FREAL;
1264 			r1 = i1;
1265 			r2 = exprreal(right);
1266 		} else {
1267 			type = FINT;
1268 			i2 = exprint(right);
1269 		}
1270 	}
1271 reswitch:
1272 	switch (np->n_type) {
1273 	case ADD:
1274 		if (type == FINT) {
1275 			iresult = i1 + i2;
1276 			addoverflow();
1277 		} else
1278 			r1 += r2;
1279 		break;
1280 
1281 	/*
1282 	 * Strategically placed between ADD and SUB
1283 	 * so "jo" branches will reach on 80*86
1284 	 */
1285 	overflow:
1286 		r1 = i1;
1287 		r2 = i2;
1288 		type = FREAL;
1289 		goto reswitch;
1290 
1291 	case SUB:
1292 		if (type == FINT) {
1293 			iresult = i1 - i2;
1294 			suboverflow();
1295 		} else
1296 			r1 -= r2;
1297 		break;
1298 
1299 	case MUL:
1300 		if (type == FINT) {
1301 			iresult = i1 * i2;
1302 			muloverflow();
1303 		} else
1304 			r1 *= r2;
1305 		break;
1306 
1307 	case DIV:
1308 		if (type == FINT) {
1309 			r1 = i1;
1310 			r2 = i2;
1311 			type = FREAL;
1312 		}
1313 		if (r2 == 0.0)
1314 			awkerr(divzero);
1315 		r1 /= r2;
1316 		break;
1317 
1318 	case REM:
1319 		if (type == FINT) {
1320 			if (i2 == 0)
1321 				awkerr(divzero);
1322 			iresult = i1 % i2;
1323 		} else {
1324 			double fmod(double x, double y);
1325 
1326 			errno = 0;
1327 			r1 = fmod(r1, r2);
1328 			if (errno == EDOM)
1329 				awkerr(divzero);
1330 		}
1331 		break;
1332 	}
1333 	return (type == FINT ? intnode(iresult) : realnode(r1));
1334 }
1335 
1336 /*
1337  * Do comparison operators.
1338  */
1339 static NODE *
comparison(NODE * np)1340 comparison(NODE *np)
1341 {
1342 	register NODE *left, *right;
1343 	register int cmp;
1344 	int tl, tr;
1345 	register REAL r1, r2;
1346 	register INT i1, i2;
1347 
1348 	left = np->n_left;
1349 	if (isleaf(left->n_flags)) {
1350 		if (left->n_type == PARM)
1351 			left = left->n_next;
1352 	} else
1353 		left = exprreduce(left);
1354 	tl = left->n_flags;
1355 	right = np->n_right;
1356 	if (isleaf(right->n_flags)) {
1357 		if (right->n_type == PARM)
1358 			right = right->n_next;
1359 	} else {
1360 		++concflag;
1361 		right = exprreduce(right);
1362 		--concflag;
1363 	}
1364 	tr = right->n_flags;
1365 	/*
1366 	 * Posix mandates semantics for the comparison operators that
1367 	 * are incompatible with traditional AWK behaviour. If the following
1368 	 * define is true then awk will use the traditional behaviour.
1369 	 * if it's false, then AWK will use the POSIX-mandated behaviour.
1370 	 */
1371 #define	TRADITIONAL 0
1372 #if TRADITIONAL
1373 	if (!isnumber(tl) || !isnumber(tr)) {
1374 		cmp = wcscoll((wchar_t *)exprstring(left),
1375 		    (wchar_t *)exprstring(right));
1376 	} else if (isreal(tl) || isreal(tr)) {
1377 		r1 = exprreal(left);
1378 		r2 = exprreal(right);
1379 		if (r1 < r2)
1380 			cmp = -1;
1381 		else if (r1 > r2)
1382 			cmp = 1;
1383 		else
1384 			cmp = 0;
1385 	} else {
1386 		i1 = exprint(left);
1387 		i2 = exprint(right);
1388 		if (i1 < i2)
1389 			cmp = -1;
1390 		else if (i1 > i2)
1391 			cmp = 1;
1392 		else
1393 			cmp = 0;
1394 	}
1395 #else
1396 	if (!isnumber(tl) && !isnumber(tr)) {
1397 do_strcmp:
1398 		cmp = wcscoll((wchar_t *)exprstring(left),
1399 		    (wchar_t *)exprstring(right));
1400 	} else {
1401 		if (isstring(tl))
1402 			tl = type_of(left);
1403 		if (isstring(tr))
1404 			tr = type_of(right);
1405 		if (!isnumber(tl) || !isnumber(tr))
1406 			goto do_strcmp;
1407 		if (isreal(tl) || isreal(tr)) {
1408 			r1 = exprreal(left);
1409 			r2 = exprreal(right);
1410 			if (r1 < r2)
1411 				cmp = -1;
1412 			else if (r1 > r2)
1413 				cmp = 1;
1414 			else
1415 				cmp = 0;
1416 		} else {
1417 			i1 = exprint(left);
1418 			i2 = exprint(right);
1419 			if (i1 < i2)
1420 				cmp = -1;
1421 			else if (i1 > i2)
1422 				cmp = 1;
1423 			else
1424 				cmp = 0;
1425 		}
1426 	}
1427 #endif
1428 	switch (np->n_type) {
1429 	case EQ:
1430 		return (cmp == 0 ? const1 : const0);
1431 
1432 	case  NE:
1433 		return (cmp != 0 ? const1 : const0);
1434 
1435 	case GE:
1436 		return (cmp >= 0 ? const1 : const0);
1437 
1438 	case LE:
1439 		return (cmp <= 0 ? const1 : const0);
1440 
1441 	case GT:
1442 		return (cmp > 0 ? const1 : const0);
1443 
1444 	case LT:
1445 		return (cmp < 0 ? const1 : const0);
1446 
1447 	default:
1448 		awkerr(interr, "comparison");
1449 	}
1450 	/* NOTREACHED */
1451 	return (0);
1452 }
1453 
1454 /*
1455  * Return the type of a constant that is a string.
1456  * The node must be a FSTRING type and the return value
1457  * will possibly have FVINT or FVREAL or'ed in.
1458  */
1459 static int
type_of(NODE * np)1460 type_of(NODE *np)
1461 {
1462 	wchar_t *cp;
1463 	int somedigits = 0;
1464 	int seene = 0;
1465 	int seenradix = 0;
1466 	int seensign = 0;
1467 	int digitsaftere = 0;
1468 
1469 	cp = (wchar_t *)np->n_string;
1470 	if (*cp == '\0')
1471 		return (FSTRING|FVINT);
1472 	while (iswspace(*cp))
1473 		cp++;
1474 	if (*cp == '-' || *cp == '+')
1475 		cp++;
1476 	while (*cp != '\0') {
1477 		switch (*cp) {
1478 		case '0':
1479 		case '1':
1480 		case '2':
1481 		case '3':
1482 		case '4':
1483 		case '5':
1484 		case '6':
1485 		case '7':
1486 		case '8':
1487 		case '9':
1488 			if (seene)
1489 				digitsaftere = 1;
1490 			somedigits++;
1491 			break;
1492 
1493 		case 'E':
1494 		case 'e':
1495 			if (seene || !somedigits)
1496 				return (FSTRING);
1497 			seene = 1;
1498 			break;
1499 
1500 		case '+':
1501 		case '-':
1502 			if (seensign || !seene || digitsaftere)
1503 				return (FSTRING);
1504 			seensign = 1;
1505 			break;
1506 
1507 		default:
1508 			if (*cp == radixpoint) {
1509 				if (seenradix || seene || (!somedigits &&
1510 				    !iswdigit(*++cp)))
1511 					return (FSTRING);
1512 			} else
1513 				return (FSTRING);
1514 			seenradix = 1;
1515 		}
1516 		cp++;
1517 	}
1518 	if (somedigits == 0)
1519 		return (FSTRING);
1520 	if (somedigits >= MAXDIGINT || seenradix || seene) {
1521 		if (seensign && !digitsaftere)
1522 			return (FSTRING);
1523 		else
1524 			return (FSTRING|FVREAL);
1525 	} else
1526 		return (FSTRING|FVINT);
1527 }
1528 
1529 /*
1530  * Return a field rvalue.
1531  */
1532 static NODE *
rfield(INT fieldno)1533 rfield(INT fieldno)
1534 {
1535 	register wchar_t *cp;
1536 
1537 	if (fieldno == 0)
1538 		return (stringnode(linebuf, FSTATIC|FSENSE, lbuflen));
1539 	if (!splitdone)
1540 		fieldsplit();
1541 	if (fieldno > nfield || fieldno < 0)
1542 		return (stringnode(_null, FSTATIC, 0));
1543 	cp = fields[fieldno-1];
1544 	return (stringnode(cp, FSTATIC|FSENSE, wcslen(cp)));
1545 }
1546 
1547 /*
1548  * Split linebuf into fields.  Done only once
1549  * per input record (maximum).
1550  */
1551 void
fieldsplit()1552 fieldsplit()
1553 {
1554 	register wchar_t *ip, *op;
1555 	register int n;
1556 	wchar_t *ep;
1557 
1558 	if (fieldbuf == NULL)
1559 		fieldbuf = emalloc(NLINE * sizeof (wchar_t));
1560 	fcount = 0;
1561 	ep = linebuf;
1562 	op = fieldbuf;
1563 	while ((ip = (*awkfield)(&ep)) != NULL) {
1564 		fields[fcount++] = op;
1565 		if (fcount > NFIELD)
1566 			awkerr(tmfld, NFIELD);
1567 		n = ep-ip;
1568 		(void) memcpy(op, ip, n * sizeof (wchar_t));
1569 		op += n;
1570 		*op++ = '\0';
1571 	}
1572 	if (varNF->n_flags & FINT)
1573 		varNF->n_int = fcount;
1574 	else {
1575 		constant->n_int = fcount;
1576 		(void) nassign(varNF, constant);
1577 	}
1578 	nfield = fcount;
1579 	splitdone++;
1580 }
1581 
1582 /*
1583  * Assign to a field as an lvalue.
1584  * Return the unevaluated node as one doesn't always need it
1585  * evaluated in an assignment.
1586  */
1587 static NODE *
lfield(INT fieldno,NODE * np)1588 lfield(INT fieldno, NODE *np)
1589 {
1590 	register wchar_t *cp;
1591 	register wchar_t *op;
1592 	register wchar_t *sep;
1593 	register int i;
1594 	register wchar_t *newval;
1595 	register int seplen;
1596 	register int newlen;
1597 
1598 	newlen = wcslen(newval = (wchar_t *)exprstring(np));
1599 	if (fieldno == 0) {
1600 		splitdone = 0;
1601 		(void) memcpy(linebuf, newval, (newlen+1) * sizeof (wchar_t));
1602 		lbuflen = newlen;
1603 		fieldsplit();
1604 	} else {
1605 		seplen = wcslen(sep = (wchar_t *)exprstring(varOFS));
1606 		if (!splitdone)
1607 			fieldsplit();
1608 		if (--fieldno < nfield &&
1609 		    (newlen <= wcslen(fields[fieldno]))) {
1610 			(void) memcpy(fields[fieldno], newval,
1611 				(newlen+1) * sizeof (wchar_t));
1612 		} else {
1613 			register wchar_t *buf;
1614 
1615 			buf = fieldbuf;
1616 			fieldbuf = emalloc(NLINE * sizeof (wchar_t));
1617 			if (fieldno >= nfield) {
1618 				if (fieldno >= NFIELD)
1619 					awkerr(tmfld, NFIELD);
1620 				while (nfield < fieldno)
1621 					fields[nfield++] = _null;
1622 				++nfield;
1623 			}
1624 			fields[fieldno] = newval;
1625 			op = fieldbuf;
1626 			for (i = 0; i < nfield; i++) {
1627 				newlen = wcslen(cp = fields[i])+1;
1628 				fields[i] = op;
1629 				if (op+newlen >= fieldbuf+NLINE)
1630 					awkerr(toolong, NLINE);
1631 				(void) memcpy(op, cp,
1632 				    newlen * sizeof (wchar_t));
1633 				op += newlen;
1634 			}
1635 			free(buf);
1636 		}
1637 		/*
1638 		 * Reconstruct $0
1639 		 */
1640 		op = linebuf;
1641 		i = 0;
1642 		while (i < nfield) {
1643 			newlen = wcslen(cp = fields[i++]);
1644 			(void) memcpy(op, cp, newlen * sizeof (wchar_t));
1645 			op += newlen;
1646 			if (i < nfield) {
1647 				(void) memcpy(op, sep,
1648 					seplen * sizeof (wchar_t));
1649 				op += seplen;
1650 			}
1651 			if (op >= &linebuf[NLINE])
1652 				awkerr(toolong, NLINE);
1653 		}
1654 		*op = '\0';
1655 		lbuflen = op-linebuf;
1656 		if (varNF->n_flags & FINT)
1657 			varNF->n_int = nfield;
1658 		else {
1659 			constant->n_int = nfield;
1660 			(void) nassign(varNF, constant);
1661 		}
1662 	}
1663 	return (np);
1664 }
1665 
1666 /*
1667  * Do a user function.
1668  * Each formal parameter must:
1669  *	have the actual parameter assigned to it (call by value),
1670  *	have a pointer to an array put into it (call by reference),
1671  *	and be made undefined (extra formal parameters)
1672  */
1673 static NODE *
userfunc(NODE * np)1674 userfunc(NODE *np)
1675 {
1676 	register NODE *temp;
1677 	NODE *fnp;
1678 
1679 	if ((fnp = np->n_left) == NNULL)
1680 		awkerr(gettext("impossible function call"));
1681 	if (fnp->n_type != UFUNC)
1682 		awkerr(varnotfunc, fnp->n_name);
1683 
1684 #ifndef M_STKCHK
1685 	if (slevel >= NRECUR)
1686 		awkerr(gettext("function \"%S\" nesting level > %u"),
1687 		    fnp->n_name, NRECUR);
1688 #else
1689 	if (!M_STKCHK)
1690 		awkerr(gettext("function \"%s\" nesting level too deep"),
1691 		    fnp->n_name);
1692 #endif
1693 
1694 	fnp = fnp->n_ufunc;
1695 	{
1696 		register NODE *formal;
1697 		register NODE *actual;
1698 		NODE *formlist, *actlist, *templist, *temptail;
1699 
1700 		templist = temptail = NNULL;
1701 		actlist = np->n_right;
1702 		formlist = fnp->n_left;
1703 		/*
1704 		 * pass through formal list, setting up a list
1705 		 * (on templist) containing temps for the values
1706 		 * of the actuals.
1707 		 * If the actual list runs out before the formal
1708 		 * list, assign 'constundef' as the value
1709 		 */
1710 		while ((formal = getlist(&formlist)) != NNULL) {
1711 			register NODE *array;
1712 			register int t;
1713 			register size_t len;
1714 			register int scope_tag;
1715 
1716 			actual = getlist(&actlist);
1717 			if (actual == NNULL) {
1718 				actual = constundef;
1719 				scope_tag = slevel+1;
1720 			} else
1721 				scope_tag = 0;
1722 			array = actual;
1723 			switch (actual->n_type) {
1724 			case ARRAY:
1725 				t = ARRAY;
1726 				scope_tag = 0;
1727 				break;
1728 
1729 			case PARM:
1730 				array = actual = actual->n_next;
1731 				t = actual->n_type;
1732 				scope_tag = actual->n_scope;
1733 				if (!(actual->n_flags & FLARRAY))
1734 					array = actual->n_alink;
1735 				break;
1736 
1737 			default:
1738 				t = VAR;
1739 				break;
1740 			}
1741 			temp = emptynode(t, len = wcslen(formal->n_name));
1742 			(void) memcpy(temp->n_name, formal->n_name,
1743 			    (len+1) * sizeof (wchar_t));
1744 			temp->n_flags = FSTRING|FVINT;
1745 			temp->n_string = _null;
1746 			temp->n_strlen = 0;
1747 			if (t == VAR)
1748 				(void) assign(temp, actual);
1749 			if (t != ARRAY)
1750 				temp->n_flags |= FLARRAY;
1751 			temp->n_scope = scope_tag;
1752 			/*
1753 			 * link to actual parameter in case of promotion to
1754 			 * array
1755 			 */
1756 			if (actual != constundef)
1757 				temp->n_alink = actual;
1758 			/*
1759 			 * Build the templist
1760 			 */
1761 			if (templist != NNULL) {
1762 				temptail->n_next = temp;
1763 				temptail = temp;
1764 			} else
1765 				templist = temptail = temp;
1766 			temp->n_next = NNULL;
1767 			if (actual->n_type == CONSTANT)
1768 				temp->n_alink = temp;
1769 			else
1770 				temp->n_alink = array;
1771 		}
1772 		/*
1773 		 * Bind results of the evaluation of actuals to formals.
1774 		 */
1775 		formlist = fnp->n_left;
1776 		while (templist != NNULL) {
1777 			temp = templist;
1778 			templist = temp->n_next;
1779 			formal = getlist(&formlist);
1780 			temp->n_next = formal->n_next;
1781 			formal->n_next = temp;
1782 
1783 
1784 
1785 
1786 
1787 
1788 
1789 
1790 		}
1791 	}
1792 	{
1793 		register NODE *savenode = curnode;
1794 
1795 		++slevel;
1796 		if (action(fnp->n_right) == RETURN)
1797 			np = retval; else
1798 			np = const0;
1799 		curnode = savenode;
1800 	}
1801 	{
1802 		register NODE *formal;
1803 		NODE *formlist;
1804 
1805 		formlist = fnp->n_left;
1806 		while ((formal = getlist(&formlist)) != NNULL) {
1807 			temp = formal->n_next;
1808 			formal->n_next = temp->n_next;
1809 			/* if node is a local array, free the elements */
1810 			if (temp->n_type == ARRAY && (temp->n_scope == slevel))
1811 				delarray(temp);
1812 			freenode(temp);
1813 		}
1814 	}
1815 	--slevel;
1816 	return (np);
1817 }
1818 
1819 /*
1820  * Get the regular expression from an expression tree.
1821  */
1822 REGEXP
getregexp(NODE * np)1823 getregexp(NODE *np)
1824 {
1825 	if (np->n_type == RE)
1826 		return (np->n_regexp);
1827 	np = renode((wchar_t *)exprstring(np));
1828 	return (np->n_regexp);
1829 }
1830 
1831 /*
1832  * Get the next element from a list.
1833  */
1834 NODE *
getlist(NODE ** npp)1835 getlist(NODE **npp)
1836 {
1837 	register NODE *np;
1838 
1839 	if ((np = *npp) == NNULL)
1840 		return (np);
1841 	if (np->n_type == COMMA) {
1842 		*npp = np->n_right;
1843 		return (np->n_left);
1844 	} else {
1845 		*npp = NNULL;
1846 		return (np);
1847 	}
1848 }
1849 
1850 /*
1851  * if statement.
1852  */
1853 static int
s_if(NODE * np)1854 s_if(NODE *np)
1855 {
1856 	register NODE *xp;
1857 	register int test;
1858 
1859 	test = exprtest(np->n_left);
1860 	xp = np->n_right;
1861 	if (xp->n_type != ELSE)
1862 		awkerr(interr, "if/else");
1863 	if (test)
1864 		xp = xp->n_left;
1865 	else
1866 		xp = xp->n_right;
1867 	return (action(xp));
1868 }
1869 
1870 /*
1871  * while and do{}while statements.
1872  */
1873 static int
s_while(NODE * np)1874 s_while(NODE *np)
1875 {
1876 	register int act = 0;
1877 
1878 	if (np->n_type == DO)
1879 		goto dowhile;
1880 	for (;;) {
1881 		if (exprtest(np->n_left) == 0)
1882 			break;
1883 	dowhile:
1884 		if ((act = action(np->n_right)) != 0) {
1885 			switch (act) {
1886 			case BREAK:
1887 				act = 0;
1888 				break;
1889 
1890 			case CONTINUE:
1891 				act = 0;
1892 				continue;
1893 			}
1894 			break;
1895 		}
1896 	}
1897 	return (act);
1898 }
1899 
1900 /*
1901  * for statement.
1902  */
1903 static int
s_for(NODE * np)1904 s_for(NODE *np)
1905 {
1906 	register NODE *testnp, *incnp, *initnp;
1907 	register int act = 0;
1908 	NODE *listp;
1909 
1910 	listp = np->n_left;
1911 	initnp = getlist(&listp);
1912 	testnp = getlist(&listp);
1913 	incnp = getlist(&listp);
1914 	if (initnp != NNULL)
1915 		(void) exprreduce(initnp);
1916 	for (;;) {
1917 		if (exprtest(testnp) == 0)
1918 			break;
1919 		if ((act = action(np->n_right)) != 0) {
1920 			switch (act) {
1921 			case BREAK:
1922 				act = 0;
1923 				break;
1924 
1925 			case CONTINUE:
1926 				act = 0;
1927 				goto clabel;
1928 			}
1929 			break;
1930 		}
1931 	clabel:
1932 		if (incnp != NNULL)
1933 			(void) exprreduce(incnp);
1934 	}
1935 	return (act);
1936 }
1937 
1938 /*
1939  * for variable in array statement.
1940  */
1941 static int
s_forin(NODE * np)1942 s_forin(NODE *np)
1943 {
1944 	register NODE *left;
1945 	register int act = 0;
1946 	register NODE *var;
1947 	register NODE **nnp;
1948 	register NODE *statement;
1949 	register int issymtab = 0;
1950 	wchar_t *index;
1951 	register int alen;
1952 	int nbuck;
1953 
1954 	left = np->n_left;
1955 	statement = np->n_right;
1956 	if (left->n_type != IN)
1957 		awkerr(interr, "for (var in array)");
1958 	if ((var = left->n_left)->n_type == PARM)
1959 		var = var->n_next;
1960 	np = left->n_right;
1961 	if (np->n_type == PARM) {
1962 		np = np->n_next;
1963 		if (!(np->n_flags & FLARRAY))
1964 			np = np->n_alink;
1965 	}
1966 	if (np == varSYMTAB) {
1967 		issymtab++;
1968 		np = NNULL;
1969 		nbuck = 0;
1970 	} else {
1971 		/*
1972 		 * At this point if the node is not actually an array
1973 		 * check to see if it has already been established as
1974 		 * a scalar. If it is a scalar then flag an error. If
1975 		 * not then promote the object to an array type.
1976 		 */
1977 		if (np->n_type != ARRAY) {
1978 			if (!isstring(np->n_flags) || np->n_string != _null)
1979 				awkerr(notarray, np->n_name);
1980 			else {
1981 				/* promotion to array */
1982 				promote(np);
1983 				if (np->n_alink != NNULL)
1984 					if (!(np->n_flags & FLARRAY))
1985 						np = np->n_alink;
1986 			}
1987 		}
1988 		/*
1989 		 * Set up a pointer to the first node in the array list.
1990 		 * Save this pointer on the delete stack. This information
1991 		 * is used by the delete function to advance any pointers
1992 		 * that might be pointing at a node which has been deleted.
1993 		 * See the delsymtab() function for more information. Note
1994 		 * that if the a_link field is nil, then just return 0 since
1995 		 * this array has no elements yet.
1996 		 */
1997 		if ((*(nnp = next_forin) = np->n_alink) == 0)
1998 			return (0);
1999 		if (++next_forin > &forindex[NFORINLOOP])
2000 			awkerr(toodeep, NFORINLOOP);
2001 		/*
2002 		 * array elements have names of the form
2003 		 *	<name>]<index> (global arrays)
2004 		 * or
2005 		 *	<name>[<scope>]<index> (local arrays)
2006 		 * We need to know the offset of the index portion of the
2007 		 * name string in order to place it in the index variable so
2008 		 * we look for the ']'. This is calculated here and then
2009 		 * used below.
2010 		 */
2011 		for (alen = 0; (*nnp)->n_name[alen++] != ']'; )
2012 			if ((*nnp)->n_name[alen] == '\0')
2013 				awkerr(interr, "for: invalid array");
2014 	}
2015 	for (;;) {
2016 		if (issymtab) {
2017 			if ((left = symwalk(&nbuck, &np)) == NNULL)
2018 				break;
2019 			index = left->n_name;
2020 		} else {
2021 			if ((np = *nnp) == NNULL)
2022 				break;
2023 			index = np->n_name+alen;
2024 			*nnp = np->n_alink;
2025 		}
2026 		strassign(var, index, FSTATIC, wcslen(index));
2027 		if ((act = action(statement)) != 0) {
2028 			switch (act) {
2029 			case BREAK:
2030 				act = 0;
2031 				break;
2032 
2033 			case CONTINUE:
2034 				act = 0;
2035 				continue;
2036 			}
2037 			break;
2038 		}
2039 	}
2040 	next_forin--;
2041 	return (act);
2042 }
2043 
2044 /*
2045  * Walk the symbol table using the same algorithm as arraynode.
2046  */
2047 NODE *
symwalk(int * buckp,NODE ** npp)2048 symwalk(int *buckp, NODE **npp)
2049 {
2050 	register NODE *np;
2051 
2052 	np = *npp;
2053 	for (;;) {
2054 		while (np == NNULL) {
2055 			if (*buckp >= NBUCKET)
2056 				return (*npp = NNULL);
2057 			np = symtab[(*buckp)++];
2058 		}
2059 		if (np->n_type == VAR &&
2060 		    (!isstring(np->n_flags) || np->n_string != _null)) {
2061 			*npp = np->n_next;
2062 			return (np);
2063 		}
2064 		np = np->n_next;
2065 	}
2066 	/* NOTREACHED */
2067 }
2068 
2069 /*
2070  * Test the result of an expression.
2071  */
2072 static int
exprtest(NODE * np)2073 exprtest(NODE *np)
2074 {
2075 	register int t;
2076 
2077 	if (np == NNULL)
2078 		return (1);
2079 	if (freelist != NNULL)
2080 		freetemps();
2081 	np = exprreduce(np);
2082 	if (isint(t = np->n_flags)) {
2083 		if (isstring(t))
2084 			return (exprint(np) != 0);
2085 		return (np->n_int != 0);
2086 	}
2087 	if (isreal(t)) {
2088 		REAL rval;
2089 
2090 		rval = isstring(t) ? exprreal(np) : np->n_real;
2091 		return (rval != 0.0);
2092 	}
2093 	return (*(wchar_t *)exprstring(np) != '\0');
2094 }
2095 
2096 /*
2097  * Return malloc'ed space that holds the given name "[" scope "]" index ...
2098  * concatenated string.
2099  * The node (np) is the list of indices and 'array' is the array name.
2100  */
2101 static wchar_t *
makeindex(NODE * np,wchar_t * array,int tag)2102 makeindex(NODE *np, wchar_t *array, int tag)
2103 {
2104 	static wchar_t tags[sizeof (int)];
2105 	static wchar_t tag_chars[] = M_MB_L("0123456789ABCDEF");
2106 	register wchar_t *cp;
2107 	register NODE *index;
2108 	register uint_t n;
2109 	register int len;
2110 	register wchar_t *indstr;
2111 	register wchar_t *sep;
2112 	register int seplen;
2113 	register int taglen;
2114 
2115 
2116 	/*
2117 	 * calculate and create the tag string
2118 	 */
2119 	for (taglen = 0; tag; tag >>= 4)
2120 		tags[taglen++] = tag_chars[tag & 0xf];
2121 	/*
2122 	 * Special (normal) case: only one index.
2123 	 */
2124 	if (np->n_type != COMMA) {
2125 		wchar_t *ocp;
2126 		size_t i;
2127 
2128 		if (isleaf(np->n_flags) && np->n_type == PARM)
2129 			np = np->n_next;
2130 		if (isstring(np->n_flags)) {
2131 			indstr = np->n_string;
2132 			len = np->n_strlen;
2133 		} else {
2134 			indstr = exprstring(np);
2135 			len = wcslen(indstr);
2136 		}
2137 		i = (n = wcslen(array)) + len + 3 + taglen;
2138 		if (i < NINDEXBUF)
2139 			ocp = indexbuf;
2140 		else
2141 			ocp = emalloc(i * sizeof (wchar_t));
2142 		(void) memcpy(ocp, array, n * sizeof (wchar_t));
2143 		cp = ocp+n;
2144 		if (taglen) {
2145 			*cp++ = '[';
2146 			while (taglen)
2147 				*cp++ = tags[--taglen];
2148 		}
2149 		*cp++ = ']';
2150 		(void) memcpy(cp, indstr, (len+1) * sizeof (wchar_t));
2151 
2152 		return (ocp);
2153 	}
2154 	n = 0;
2155 	seplen = wcslen(sep = (wchar_t *)exprstring(varSUBSEP));
2156 	while ((index = getlist(&np)) != NNULL) {
2157 		indstr = exprstring(index);
2158 		len = wcslen(indstr);
2159 		if (n == 0) {
2160 			cp = emalloc(sizeof (wchar_t) * ((n = wcslen(array)) +
2161 				len + 3 + taglen));
2162 			(void) memcpy(cp, array, n * sizeof (wchar_t));
2163 			if (taglen) {
2164 				cp[n++] = '[';
2165 				while (taglen)
2166 					cp[n++] = tags[--taglen];
2167 			}
2168 			cp[n++] = ']';
2169 		} else {
2170 			cp = erealloc(cp, (n+len+seplen+1) * sizeof (wchar_t));
2171 			(void) memcpy(cp+n, sep, seplen * sizeof (wchar_t));
2172 			n += seplen;
2173 		}
2174 		(void) memcpy(cp+n, indstr, (len+1) * sizeof (wchar_t));
2175 		n += len;
2176 	}
2177 	return (cp);
2178 }
2179 
2180 
2181 /*
2182  * Promote a node to an array. In the simplest case, just set the
2183  * node type field to ARRAY. The more complicated case involves walking
2184  * a list of variables that haven't been determined yet as scalar or array.
2185  * This routine plays with the pointers to avoid recursion.
2186  */
2187 void
promote(NODE * n)2188 promote(NODE *n)
2189 {
2190 	register NODE *prev = NNULL;
2191 	register NODE *next;
2192 
2193 	/*
2194 	 * walk down the variable chain, reversing the pointers and
2195 	 * setting each node to type array.
2196 	 */
2197 	while ((n->n_flags & FLARRAY) && (n->n_alink != n)) {
2198 		n->n_type = ARRAY;
2199 		next = n->n_alink;
2200 		n->n_alink = prev;
2201 		prev = n;
2202 		n = next;
2203 	}
2204 
2205 	/*
2206 	 * If the final entity on the chain is a local variable, then
2207 	 * reset it's alink field to NNULL - normally it points back
2208 	 * to itself - this is used in other parts of the code to
2209 	 * reduce the number of conditionals when handling locals.
2210 	 */
2211 	n->n_type = ARRAY;
2212 	if (n->n_flags & FLARRAY)
2213 		n->n_alink = NNULL;
2214 
2215 	/*
2216 	 * Now walk back up the list setting the alink to point to
2217 	 * the last entry in the chain and clear the 'local array'
2218 	 * flag.
2219 	 */
2220 	while (prev != NNULL) {
2221 		prev->n_flags &= ~FLARRAY;
2222 		next = prev->n_alink;
2223 		prev->n_alink = n;
2224 		prev = next;
2225 	}
2226 }
2227