xref: /freebsd/contrib/one-true-awk/run.c (revision 9f44a47fd07924afc035991af15d84e6585dea4f)
1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
4 
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
14 
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
24 
25 #define DEBUG
26 #include <stdio.h>
27 #include <ctype.h>
28 #include <errno.h>
29 #include <wchar.h>
30 #include <wctype.h>
31 #include <fcntl.h>
32 #include <setjmp.h>
33 #include <limits.h>
34 #include <math.h>
35 #include <string.h>
36 #include <stdlib.h>
37 #include <time.h>
38 #include <sys/types.h>
39 #include <sys/wait.h>
40 #include "awk.h"
41 #include "awkgram.tab.h"
42 
43 static void stdinit(void);
44 static void flush_all(void);
45 
46 #if 1
47 #define tempfree(x)	do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0)
48 #else
49 void tempfree(Cell *p) {
50 	if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) {
51 		WARNING("bad csub %d in Cell %d %s",
52 			p->csub, p->ctype, p->sval);
53 	}
54 	if (istemp(p))
55 		tfree(p);
56 }
57 #endif
58 
59 /* do we really need these? */
60 /* #ifdef _NFILE */
61 /* #ifndef FOPEN_MAX */
62 /* #define FOPEN_MAX _NFILE */
63 /* #endif */
64 /* #endif */
65 /*  */
66 /* #ifndef	FOPEN_MAX */
67 /* #define	FOPEN_MAX	40 */	/* max number of open files */
68 /* #endif */
69 /*  */
70 /* #ifndef RAND_MAX */
71 /* #define RAND_MAX	32767 */	/* all that ansi guarantees */
72 /* #endif */
73 
74 jmp_buf env;
75 extern	int	pairstack[];
76 extern	Awkfloat	srand_seed;
77 
78 Node	*winner = NULL;	/* root of parse tree */
79 Cell	*tmps;		/* free temporary cells for execution */
80 
81 static Cell	truecell	={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL, NULL };
82 Cell	*True	= &truecell;
83 static Cell	falsecell	={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL, NULL };
84 Cell	*False	= &falsecell;
85 static Cell	breakcell	={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL, NULL };
86 Cell	*jbreak	= &breakcell;
87 static Cell	contcell	={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL, NULL };
88 Cell	*jcont	= &contcell;
89 static Cell	nextcell	={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL, NULL };
90 Cell	*jnext	= &nextcell;
91 static Cell	nextfilecell	={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL, NULL };
92 Cell	*jnextfile	= &nextfilecell;
93 static Cell	exitcell	={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL, NULL };
94 Cell	*jexit	= &exitcell;
95 static Cell	retcell		={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL, NULL };
96 Cell	*jret	= &retcell;
97 static Cell	tempcell	={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL };
98 
99 Node	*curnode = NULL;	/* the node being executed, for debugging */
100 
101 /* buffer memory management */
102 int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr,
103 	const char *whatrtn)
104 /* pbuf:    address of pointer to buffer being managed
105  * psiz:    address of buffer size variable
106  * minlen:  minimum length of buffer needed
107  * quantum: buffer size quantum
108  * pbptr:   address of movable pointer into buffer, or 0 if none
109  * whatrtn: name of the calling routine if failure should cause fatal error
110  *
111  * return   0 for realloc failure, !=0 for success
112  */
113 {
114 	if (minlen > *psiz) {
115 		char *tbuf;
116 		int rminlen = quantum ? minlen % quantum : 0;
117 		int boff = pbptr ? *pbptr - *pbuf : 0;
118 		/* round up to next multiple of quantum */
119 		if (rminlen)
120 			minlen += quantum - rminlen;
121 		tbuf = (char *) realloc(*pbuf, minlen);
122 		DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void*)*pbuf, (void*)tbuf);
123 		if (tbuf == NULL) {
124 			if (whatrtn)
125 				FATAL("out of memory in %s", whatrtn);
126 			return 0;
127 		}
128 		*pbuf = tbuf;
129 		*psiz = minlen;
130 		if (pbptr)
131 			*pbptr = tbuf + boff;
132 	}
133 	return 1;
134 }
135 
136 void run(Node *a)	/* execution of parse tree starts here */
137 {
138 
139 	stdinit();
140 	execute(a);
141 	closeall();
142 }
143 
144 Cell *execute(Node *u)	/* execute a node of the parse tree */
145 {
146 	Cell *(*proc)(Node **, int);
147 	Cell *x;
148 	Node *a;
149 
150 	if (u == NULL)
151 		return(True);
152 	for (a = u; ; a = a->nnext) {
153 		curnode = a;
154 		if (isvalue(a)) {
155 			x = (Cell *) (a->narg[0]);
156 			if (isfld(x) && !donefld)
157 				fldbld();
158 			else if (isrec(x) && !donerec)
159 				recbld();
160 			return(x);
161 		}
162 		if (notlegal(a->nobj))	/* probably a Cell* but too risky to print */
163 			FATAL("illegal statement");
164 		proc = proctab[a->nobj-FIRSTTOKEN];
165 		x = (*proc)(a->narg, a->nobj);
166 		if (isfld(x) && !donefld)
167 			fldbld();
168 		else if (isrec(x) && !donerec)
169 			recbld();
170 		if (isexpr(a))
171 			return(x);
172 		if (isjump(x))
173 			return(x);
174 		if (a->nnext == NULL)
175 			return(x);
176 		tempfree(x);
177 	}
178 }
179 
180 
181 Cell *program(Node **a, int n)	/* execute an awk program */
182 {				/* a[0] = BEGIN, a[1] = body, a[2] = END */
183 	Cell *x;
184 
185 	if (setjmp(env) != 0)
186 		goto ex;
187 	if (a[0]) {		/* BEGIN */
188 		x = execute(a[0]);
189 		if (isexit(x))
190 			return(True);
191 		if (isjump(x))
192 			FATAL("illegal break, continue, next or nextfile from BEGIN");
193 		tempfree(x);
194 	}
195 	if (a[1] || a[2])
196 		while (getrec(&record, &recsize, true) > 0) {
197 			x = execute(a[1]);
198 			if (isexit(x))
199 				break;
200 			tempfree(x);
201 		}
202   ex:
203 	if (setjmp(env) != 0)	/* handles exit within END */
204 		goto ex1;
205 	if (a[2]) {		/* END */
206 		x = execute(a[2]);
207 		if (isbreak(x) || isnext(x) || iscont(x))
208 			FATAL("illegal break, continue, next or nextfile from END");
209 		tempfree(x);
210 	}
211   ex1:
212 	return(True);
213 }
214 
215 struct Frame {	/* stack frame for awk function calls */
216 	int nargs;	/* number of arguments in this call */
217 	Cell *fcncell;	/* pointer to Cell for function */
218 	Cell **args;	/* pointer to array of arguments after execute */
219 	Cell *retval;	/* return value */
220 };
221 
222 #define	NARGS	50	/* max args in a call */
223 
224 struct Frame *frame = NULL;	/* base of stack frames; dynamically allocated */
225 int	nframe = 0;		/* number of frames allocated */
226 struct Frame *frp = NULL;	/* frame pointer. bottom level unused */
227 
228 Cell *call(Node **a, int n)	/* function call.  very kludgy and fragile */
229 {
230 	static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL };
231 	int i, ncall, ndef;
232 	int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */
233 	Node *x;
234 	Cell *args[NARGS], *oargs[NARGS];	/* BUG: fixed size arrays */
235 	Cell *y, *z, *fcn;
236 	char *s;
237 
238 	fcn = execute(a[0]);	/* the function itself */
239 	s = fcn->nval;
240 	if (!isfcn(fcn))
241 		FATAL("calling undefined function %s", s);
242 	if (frame == NULL) {
243 		frp = frame = (struct Frame *) calloc(nframe += 100, sizeof(*frame));
244 		if (frame == NULL)
245 			FATAL("out of space for stack frames calling %s", s);
246 	}
247 	for (ncall = 0, x = a[1]; x != NULL; x = x->nnext)	/* args in call */
248 		ncall++;
249 	ndef = (int) fcn->fval;			/* args in defn */
250 	DPRINTF("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame));
251 	if (ncall > ndef)
252 		WARNING("function %s called with %d args, uses only %d",
253 			s, ncall, ndef);
254 	if (ncall + ndef > NARGS)
255 		FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS);
256 	for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) {	/* get call args */
257 		DPRINTF("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame));
258 		y = execute(x);
259 		oargs[i] = y;
260 		DPRINTF("args[%d]: %s %f <%s>, t=%o\n",
261 			i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval);
262 		if (isfcn(y))
263 			FATAL("can't use function %s as argument in %s", y->nval, s);
264 		if (isarr(y))
265 			args[i] = y;	/* arrays by ref */
266 		else
267 			args[i] = copycell(y);
268 		tempfree(y);
269 	}
270 	for ( ; i < ndef; i++) {	/* add null args for ones not provided */
271 		args[i] = gettemp();
272 		*args[i] = newcopycell;
273 	}
274 	frp++;	/* now ok to up frame */
275 	if (frp >= frame + nframe) {
276 		int dfp = frp - frame;	/* old index */
277 		frame = (struct Frame *) realloc(frame, (nframe += 100) * sizeof(*frame));
278 		if (frame == NULL)
279 			FATAL("out of space for stack frames in %s", s);
280 		frp = frame + dfp;
281 	}
282 	frp->fcncell = fcn;
283 	frp->args = args;
284 	frp->nargs = ndef;	/* number defined with (excess are locals) */
285 	frp->retval = gettemp();
286 
287 	DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame));
288 	y = execute((Node *)(fcn->sval));	/* execute body */
289 	DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame));
290 
291 	for (i = 0; i < ndef; i++) {
292 		Cell *t = frp->args[i];
293 		if (isarr(t)) {
294 			if (t->csub == CCOPY) {
295 				if (i >= ncall) {
296 					freesymtab(t);
297 					t->csub = CTEMP;
298 					tempfree(t);
299 				} else {
300 					oargs[i]->tval = t->tval;
301 					oargs[i]->tval &= ~(STR|NUM|DONTFREE);
302 					oargs[i]->sval = t->sval;
303 					tempfree(t);
304 				}
305 			}
306 		} else if (t != y) {	/* kludge to prevent freeing twice */
307 			t->csub = CTEMP;
308 			tempfree(t);
309 		} else if (t == y && t->csub == CCOPY) {
310 			t->csub = CTEMP;
311 			tempfree(t);
312 			freed = 1;
313 		}
314 	}
315 	tempfree(fcn);
316 	if (isexit(y) || isnext(y))
317 		return y;
318 	if (freed == 0) {
319 		tempfree(y);	/* don't free twice! */
320 	}
321 	z = frp->retval;			/* return value */
322 	DPRINTF("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval);
323 	frp--;
324 	return(z);
325 }
326 
327 Cell *copycell(Cell *x)	/* make a copy of a cell in a temp */
328 {
329 	Cell *y;
330 
331 	/* copy is not constant or field */
332 
333 	y = gettemp();
334 	y->tval = x->tval & ~(CON|FLD|REC);
335 	y->csub = CCOPY;	/* prevents freeing until call is over */
336 	y->nval = x->nval;	/* BUG? */
337 	if (isstr(x) /* || x->ctype == OCELL */) {
338 		y->sval = tostring(x->sval);
339 		y->tval &= ~DONTFREE;
340 	} else
341 		y->tval |= DONTFREE;
342 	y->fval = x->fval;
343 	return y;
344 }
345 
346 Cell *arg(Node **a, int n)	/* nth argument of a function */
347 {
348 
349 	n = ptoi(a[0]);	/* argument number, counting from 0 */
350 	DPRINTF("arg(%d), frp->nargs=%d\n", n, frp->nargs);
351 	if (n+1 > frp->nargs)
352 		FATAL("argument #%d of function %s was not supplied",
353 			n+1, frp->fcncell->nval);
354 	return frp->args[n];
355 }
356 
357 Cell *jump(Node **a, int n)	/* break, continue, next, nextfile, return */
358 {
359 	Cell *y;
360 
361 	switch (n) {
362 	case EXIT:
363 		if (a[0] != NULL) {
364 			y = execute(a[0]);
365 			errorflag = (int) getfval(y);
366 			tempfree(y);
367 		}
368 		longjmp(env, 1);
369 	case RETURN:
370 		if (a[0] != NULL) {
371 			y = execute(a[0]);
372 			if ((y->tval & (STR|NUM)) == (STR|NUM)) {
373 				setsval(frp->retval, getsval(y));
374 				frp->retval->fval = getfval(y);
375 				frp->retval->tval |= NUM;
376 			}
377 			else if (y->tval & STR)
378 				setsval(frp->retval, getsval(y));
379 			else if (y->tval & NUM)
380 				setfval(frp->retval, getfval(y));
381 			else		/* can't happen */
382 				FATAL("bad type variable %d", y->tval);
383 			tempfree(y);
384 		}
385 		return(jret);
386 	case NEXT:
387 		return(jnext);
388 	case NEXTFILE:
389 		nextfile();
390 		return(jnextfile);
391 	case BREAK:
392 		return(jbreak);
393 	case CONTINUE:
394 		return(jcont);
395 	default:	/* can't happen */
396 		FATAL("illegal jump type %d", n);
397 	}
398 	return 0;	/* not reached */
399 }
400 
401 Cell *awkgetline(Node **a, int n)	/* get next line from specific input */
402 {		/* a[0] is variable, a[1] is operator, a[2] is filename */
403 	Cell *r, *x;
404 	extern Cell **fldtab;
405 	FILE *fp;
406 	char *buf;
407 	int bufsize = recsize;
408 	int mode;
409 	bool newflag;
410 	double result;
411 
412 	if ((buf = (char *) malloc(bufsize)) == NULL)
413 		FATAL("out of memory in getline");
414 
415 	fflush(stdout);	/* in case someone is waiting for a prompt */
416 	r = gettemp();
417 	if (a[1] != NULL) {		/* getline < file */
418 		x = execute(a[2]);		/* filename */
419 		mode = ptoi(a[1]);
420 		if (mode == '|')		/* input pipe */
421 			mode = LE;	/* arbitrary flag */
422 		fp = openfile(mode, getsval(x), &newflag);
423 		tempfree(x);
424 		if (fp == NULL)
425 			n = -1;
426 		else
427 			n = readrec(&buf, &bufsize, fp, newflag);
428 		if (n <= 0) {
429 			;
430 		} else if (a[0] != NULL) {	/* getline var <file */
431 			x = execute(a[0]);
432 			setsval(x, buf);
433 			if (is_number(x->sval, & result)) {
434 				x->fval = result;
435 				x->tval |= NUM;
436 			}
437 			tempfree(x);
438 		} else {			/* getline <file */
439 			setsval(fldtab[0], buf);
440 			if (is_number(fldtab[0]->sval, & result)) {
441 				fldtab[0]->fval = result;
442 				fldtab[0]->tval |= NUM;
443 			}
444 		}
445 	} else {			/* bare getline; use current input */
446 		if (a[0] == NULL)	/* getline */
447 			n = getrec(&record, &recsize, true);
448 		else {			/* getline var */
449 			n = getrec(&buf, &bufsize, false);
450 			x = execute(a[0]);
451 			setsval(x, buf);
452 			if (is_number(x->sval, & result)) {
453 				x->fval = result;
454 				x->tval |= NUM;
455 			}
456 			tempfree(x);
457 		}
458 	}
459 	setfval(r, (Awkfloat) n);
460 	free(buf);
461 	return r;
462 }
463 
464 Cell *getnf(Node **a, int n)	/* get NF */
465 {
466 	if (!donefld)
467 		fldbld();
468 	return (Cell *) a[0];
469 }
470 
471 static char *
472 makearraystring(Node *p, const char *func)
473 {
474 	char *buf;
475 	int bufsz = recsize;
476 	size_t blen;
477 
478 	if ((buf = (char *) malloc(bufsz)) == NULL) {
479 		FATAL("%s: out of memory", func);
480 	}
481 
482 	blen = 0;
483 	buf[blen] = '\0';
484 
485 	for (; p; p = p->nnext) {
486 		Cell *x = execute(p);	/* expr */
487 		char *s = getsval(x);
488 		size_t seplen = strlen(getsval(subseploc));
489 		size_t nsub = p->nnext ? seplen : 0;
490 		size_t slen = strlen(s);
491 		size_t tlen = blen + slen + nsub;
492 
493 		if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) {
494 			FATAL("%s: out of memory %s[%s...]",
495 			    func, x->nval, buf);
496 		}
497 		memcpy(buf + blen, s, slen);
498 		if (nsub) {
499 			memcpy(buf + blen + slen, *SUBSEP, nsub);
500 		}
501 		buf[tlen] = '\0';
502 		blen = tlen;
503 		tempfree(x);
504 	}
505 	return buf;
506 }
507 
508 Cell *array(Node **a, int n)	/* a[0] is symtab, a[1] is list of subscripts */
509 {
510 	Cell *x, *z;
511 	char *buf;
512 
513 	x = execute(a[0]);	/* Cell* for symbol table */
514 	buf = makearraystring(a[1], __func__);
515 	if (!isarr(x)) {
516 		DPRINTF("making %s into an array\n", NN(x->nval));
517 		if (freeable(x))
518 			xfree(x->sval);
519 		x->tval &= ~(STR|NUM|DONTFREE);
520 		x->tval |= ARR;
521 		x->sval = (char *) makesymtab(NSYMTAB);
522 	}
523 	z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval);
524 	z->ctype = OCELL;
525 	z->csub = CVAR;
526 	tempfree(x);
527 	free(buf);
528 	return(z);
529 }
530 
531 Cell *awkdelete(Node **a, int n)	/* a[0] is symtab, a[1] is list of subscripts */
532 {
533 	Cell *x;
534 
535 	x = execute(a[0]);	/* Cell* for symbol table */
536 	if (x == symtabloc) {
537 		FATAL("cannot delete SYMTAB or its elements");
538 	}
539 	if (!isarr(x))
540 		return True;
541 	if (a[1] == NULL) {	/* delete the elements, not the table */
542 		freesymtab(x);
543 		x->tval &= ~STR;
544 		x->tval |= ARR;
545 		x->sval = (char *) makesymtab(NSYMTAB);
546 	} else {
547 		char *buf = makearraystring(a[1], __func__);
548 		freeelem(x, buf);
549 		free(buf);
550 	}
551 	tempfree(x);
552 	return True;
553 }
554 
555 Cell *intest(Node **a, int n)	/* a[0] is index (list), a[1] is symtab */
556 {
557 	Cell *ap, *k;
558 	char *buf;
559 
560 	ap = execute(a[1]);	/* array name */
561 	if (!isarr(ap)) {
562 		DPRINTF("making %s into an array\n", ap->nval);
563 		if (freeable(ap))
564 			xfree(ap->sval);
565 		ap->tval &= ~(STR|NUM|DONTFREE);
566 		ap->tval |= ARR;
567 		ap->sval = (char *) makesymtab(NSYMTAB);
568 	}
569 	buf = makearraystring(a[0], __func__);
570 	k = lookup(buf, (Array *) ap->sval);
571 	tempfree(ap);
572 	free(buf);
573 	if (k == NULL)
574 		return(False);
575 	else
576 		return(True);
577 }
578 
579 
580 Cell *matchop(Node **a, int n)	/* ~ and match() */
581 {
582 	Cell *x, *y;
583 	char *s, *t;
584 	int i;
585 	fa *pfa;
586 	int (*mf)(fa *, const char *) = match, mode = 0;
587 
588 	if (n == MATCHFCN) {
589 		mf = pmatch;
590 		mode = 1;
591 	}
592 	x = execute(a[1]);	/* a[1] = target text */
593 	s = getsval(x);
594 	if (a[0] == NULL)	/* a[1] == 0: already-compiled reg expr */
595 		i = (*mf)((fa *) a[2], s);
596 	else {
597 		y = execute(a[2]);	/* a[2] = regular expr */
598 		t = getsval(y);
599 		pfa = makedfa(t, mode);
600 		i = (*mf)(pfa, s);
601 		tempfree(y);
602 	}
603 	tempfree(x);
604 	if (n == MATCHFCN) {
605 		int start = patbeg - s + 1;
606 		if (patlen < 0)
607 			start = 0;
608 		setfval(rstartloc, (Awkfloat) start);
609 		setfval(rlengthloc, (Awkfloat) patlen);
610 		x = gettemp();
611 		x->tval = NUM;
612 		x->fval = start;
613 		return x;
614 	} else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0))
615 		return(True);
616 	else
617 		return(False);
618 }
619 
620 
621 Cell *boolop(Node **a, int n)	/* a[0] || a[1], a[0] && a[1], !a[0] */
622 {
623 	Cell *x, *y;
624 	int i;
625 
626 	x = execute(a[0]);
627 	i = istrue(x);
628 	tempfree(x);
629 	switch (n) {
630 	case BOR:
631 		if (i) return(True);
632 		y = execute(a[1]);
633 		i = istrue(y);
634 		tempfree(y);
635 		if (i) return(True);
636 		else return(False);
637 	case AND:
638 		if ( !i ) return(False);
639 		y = execute(a[1]);
640 		i = istrue(y);
641 		tempfree(y);
642 		if (i) return(True);
643 		else return(False);
644 	case NOT:
645 		if (i) return(False);
646 		else return(True);
647 	default:	/* can't happen */
648 		FATAL("unknown boolean operator %d", n);
649 	}
650 	return 0;	/*NOTREACHED*/
651 }
652 
653 Cell *relop(Node **a, int n)	/* a[0 < a[1], etc. */
654 {
655 	int i;
656 	Cell *x, *y;
657 	Awkfloat j;
658 
659 	x = execute(a[0]);
660 	y = execute(a[1]);
661 	if (x->tval&NUM && y->tval&NUM) {
662 		j = x->fval - y->fval;
663 		i = j<0? -1: (j>0? 1: 0);
664 	} else {
665 		i = strcmp(getsval(x), getsval(y));
666 	}
667 	tempfree(x);
668 	tempfree(y);
669 	switch (n) {
670 	case LT:	if (i<0) return(True);
671 			else return(False);
672 	case LE:	if (i<=0) return(True);
673 			else return(False);
674 	case NE:	if (i!=0) return(True);
675 			else return(False);
676 	case EQ:	if (i == 0) return(True);
677 			else return(False);
678 	case GE:	if (i>=0) return(True);
679 			else return(False);
680 	case GT:	if (i>0) return(True);
681 			else return(False);
682 	default:	/* can't happen */
683 		FATAL("unknown relational operator %d", n);
684 	}
685 	return 0;	/*NOTREACHED*/
686 }
687 
688 void tfree(Cell *a)	/* free a tempcell */
689 {
690 	if (freeable(a)) {
691 		DPRINTF("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval);
692 		xfree(a->sval);
693 	}
694 	if (a == tmps)
695 		FATAL("tempcell list is curdled");
696 	a->cnext = tmps;
697 	tmps = a;
698 }
699 
700 Cell *gettemp(void)	/* get a tempcell */
701 {	int i;
702 	Cell *x;
703 
704 	if (!tmps) {
705 		tmps = (Cell *) calloc(100, sizeof(*tmps));
706 		if (!tmps)
707 			FATAL("out of space for temporaries");
708 		for (i = 1; i < 100; i++)
709 			tmps[i-1].cnext = &tmps[i];
710 		tmps[i-1].cnext = NULL;
711 	}
712 	x = tmps;
713 	tmps = x->cnext;
714 	*x = tempcell;
715 	return(x);
716 }
717 
718 Cell *indirect(Node **a, int n)	/* $( a[0] ) */
719 {
720 	Awkfloat val;
721 	Cell *x;
722 	int m;
723 	char *s;
724 
725 	x = execute(a[0]);
726 	val = getfval(x);	/* freebsd: defend against super large field numbers */
727 	if ((Awkfloat)INT_MAX < val)
728 		FATAL("trying to access out of range field %s", x->nval);
729 	m = (int) val;
730 	if (m == 0 && !is_number(s = getsval(x), NULL))	/* suspicion! */
731 		FATAL("illegal field $(%s), name \"%s\"", s, x->nval);
732 		/* BUG: can x->nval ever be null??? */
733 	tempfree(x);
734 	x = fieldadr(m);
735 	x->ctype = OCELL;	/* BUG?  why are these needed? */
736 	x->csub = CFLD;
737 	return(x);
738 }
739 
740 Cell *substr(Node **a, int nnn)		/* substr(a[0], a[1], a[2]) */
741 {
742 	int k, m, n;
743 	char *s;
744 	int temp;
745 	Cell *x, *y, *z = NULL;
746 
747 	x = execute(a[0]);
748 	y = execute(a[1]);
749 	if (a[2] != NULL)
750 		z = execute(a[2]);
751 	s = getsval(x);
752 	k = strlen(s) + 1;
753 	if (k <= 1) {
754 		tempfree(x);
755 		tempfree(y);
756 		if (a[2] != NULL) {
757 			tempfree(z);
758 		}
759 		x = gettemp();
760 		setsval(x, "");
761 		return(x);
762 	}
763 	m = (int) getfval(y);
764 	if (m <= 0)
765 		m = 1;
766 	else if (m > k)
767 		m = k;
768 	tempfree(y);
769 	if (a[2] != NULL) {
770 		n = (int) getfval(z);
771 		tempfree(z);
772 	} else
773 		n = k - 1;
774 	if (n < 0)
775 		n = 0;
776 	else if (n > k - m)
777 		n = k - m;
778 	DPRINTF("substr: m=%d, n=%d, s=%s\n", m, n, s);
779 	y = gettemp();
780 	temp = s[n+m-1];	/* with thanks to John Linderman */
781 	s[n+m-1] = '\0';
782 	setsval(y, s + m - 1);
783 	s[n+m-1] = temp;
784 	tempfree(x);
785 	return(y);
786 }
787 
788 Cell *sindex(Node **a, int nnn)		/* index(a[0], a[1]) */
789 {
790 	Cell *x, *y, *z;
791 	char *s1, *s2, *p1, *p2, *q;
792 	Awkfloat v = 0.0;
793 
794 	x = execute(a[0]);
795 	s1 = getsval(x);
796 	y = execute(a[1]);
797 	s2 = getsval(y);
798 
799 	z = gettemp();
800 	for (p1 = s1; *p1 != '\0'; p1++) {
801 		for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++)
802 			continue;
803 		if (*p2 == '\0') {
804 			v = (Awkfloat) (p1 - s1 + 1);	/* origin 1 */
805 			break;
806 		}
807 	}
808 	tempfree(x);
809 	tempfree(y);
810 	setfval(z, v);
811 	return(z);
812 }
813 
814 #define	MAXNUMSIZE	50
815 
816 int format(char **pbuf, int *pbufsize, const char *s, Node *a)	/* printf-like conversions */
817 {
818 	char *fmt;
819 	char *p, *t;
820 	const char *os;
821 	Cell *x;
822 	int flag = 0, n;
823 	int fmtwd; /* format width */
824 	int fmtsz = recsize;
825 	char *buf = *pbuf;
826 	int bufsize = *pbufsize;
827 #define FMTSZ(a)   (fmtsz - ((a) - fmt))
828 #define BUFSZ(a)   (bufsize - ((a) - buf))
829 
830 	static bool first = true;
831 	static bool have_a_format = false;
832 
833 	if (first) {
834 		char xbuf[100];
835 
836 		snprintf(xbuf, sizeof(xbuf), "%a", 42.0);
837 		have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0);
838 		first = false;
839 	}
840 
841 	os = s;
842 	p = buf;
843 	if ((fmt = (char *) malloc(fmtsz)) == NULL)
844 		FATAL("out of memory in format()");
845 	while (*s) {
846 		adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1");
847 		if (*s != '%') {
848 			*p++ = *s++;
849 			continue;
850 		}
851 		if (*(s+1) == '%') {
852 			*p++ = '%';
853 			s += 2;
854 			continue;
855 		}
856 		/* have to be real careful in case this is a huge number, eg, %100000d */
857 		fmtwd = atoi(s+1);
858 		if (fmtwd < 0)
859 			fmtwd = -fmtwd;
860 		adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2");
861 		for (t = fmt; (*t++ = *s) != '\0'; s++) {
862 			if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3"))
863 				FATAL("format item %.30s... ran format() out of memory", os);
864 			/* Ignore size specifiers */
865 			if (strchr("hjLlqtz", *s) != NULL) {	/* the ansi panoply */
866 				t--;
867 				continue;
868 			}
869 			if (isalpha((uschar)*s))
870 				break;
871 			if (*s == '$') {
872 				FATAL("'$' not permitted in awk formats");
873 			}
874 			if (*s == '*') {
875 				if (a == NULL) {
876 					FATAL("not enough args in printf(%s)", os);
877 				}
878 				x = execute(a);
879 				a = a->nnext;
880 				snprintf(t - 1, FMTSZ(t - 1),
881 				    "%d", fmtwd=(int) getfval(x));
882 				if (fmtwd < 0)
883 					fmtwd = -fmtwd;
884 				adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format");
885 				t = fmt + strlen(fmt);
886 				tempfree(x);
887 			}
888 		}
889 		*t = '\0';
890 		if (fmtwd < 0)
891 			fmtwd = -fmtwd;
892 		adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4");
893 		switch (*s) {
894 		case 'a': case 'A':
895 			if (have_a_format)
896 				flag = *s;
897 			else
898 				flag = 'f';
899 			break;
900 		case 'f': case 'e': case 'g': case 'E': case 'G':
901 			flag = 'f';
902 			break;
903 		case 'd': case 'i': case 'o': case 'x': case 'X': case 'u':
904 			flag = (*s == 'd' || *s == 'i') ? 'd' : 'u';
905 			*(t-1) = 'j';
906 			*t = *s;
907 			*++t = '\0';
908 			break;
909 		case 's':
910 			flag = 's';
911 			break;
912 		case 'c':
913 			flag = 'c';
914 			break;
915 		case '\0':
916 			FATAL("missing printf conversion specifier");
917 			break;
918 		default:
919 			WARNING("weird printf conversion %s", fmt);
920 			flag = '?';
921 			break;
922 		}
923 		if (a == NULL)
924 			FATAL("not enough args in printf(%s)", os);
925 		x = execute(a);
926 		a = a->nnext;
927 		n = MAXNUMSIZE;
928 		if (fmtwd > n)
929 			n = fmtwd;
930 		adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5");
931 		switch (flag) {
932 		case '?':	snprintf(p, BUFSZ(p), "%s", fmt);	/* unknown, so dump it too */
933 			t = getsval(x);
934 			n = strlen(t);
935 			if (fmtwd > n)
936 				n = fmtwd;
937 			adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6");
938 			p += strlen(p);
939 			snprintf(p, BUFSZ(p), "%s", t);
940 			break;
941 		case 'a':
942 		case 'A':
943 		case 'f':	snprintf(p, BUFSZ(p), fmt, getfval(x)); break;
944 		case 'd':	snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break;
945 		case 'u':	snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break;
946 		case 's':
947 			t = getsval(x);
948 			n = strlen(t);
949 			if (fmtwd > n)
950 				n = fmtwd;
951 			if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7"))
952 				FATAL("huge string/format (%d chars) in printf %.30s... ran format() out of memory", n, t);
953 			snprintf(p, BUFSZ(p), fmt, t);
954 			break;
955 		case 'c':
956 			if (isnum(x)) {
957 				if ((int)getfval(x))
958 					snprintf(p, BUFSZ(p), fmt, (int) getfval(x));
959 				else {
960 					*p++ = '\0'; /* explicit null byte */
961 					*p = '\0';   /* next output will start here */
962 				}
963 			} else
964 				snprintf(p, BUFSZ(p), fmt, getsval(x)[0]);
965 			break;
966 		default:
967 			FATAL("can't happen: bad conversion %c in format()", flag);
968 		}
969 		tempfree(x);
970 		p += strlen(p);
971 		s++;
972 	}
973 	*p = '\0';
974 	free(fmt);
975 	for ( ; a; a = a->nnext)		/* evaluate any remaining args */
976 		execute(a);
977 	*pbuf = buf;
978 	*pbufsize = bufsize;
979 	return p - buf;
980 }
981 
982 Cell *awksprintf(Node **a, int n)		/* sprintf(a[0]) */
983 {
984 	Cell *x;
985 	Node *y;
986 	char *buf;
987 	int bufsz=3*recsize;
988 
989 	if ((buf = (char *) malloc(bufsz)) == NULL)
990 		FATAL("out of memory in awksprintf");
991 	y = a[0]->nnext;
992 	x = execute(a[0]);
993 	if (format(&buf, &bufsz, getsval(x), y) == -1)
994 		FATAL("sprintf string %.30s... too long.  can't happen.", buf);
995 	tempfree(x);
996 	x = gettemp();
997 	x->sval = buf;
998 	x->tval = STR;
999 	return(x);
1000 }
1001 
1002 Cell *awkprintf(Node **a, int n)		/* printf */
1003 {	/* a[0] is list of args, starting with format string */
1004 	/* a[1] is redirection operator, a[2] is redirection file */
1005 	FILE *fp;
1006 	Cell *x;
1007 	Node *y;
1008 	char *buf;
1009 	int len;
1010 	int bufsz=3*recsize;
1011 
1012 	if ((buf = (char *) malloc(bufsz)) == NULL)
1013 		FATAL("out of memory in awkprintf");
1014 	y = a[0]->nnext;
1015 	x = execute(a[0]);
1016 	if ((len = format(&buf, &bufsz, getsval(x), y)) == -1)
1017 		FATAL("printf string %.30s... too long.  can't happen.", buf);
1018 	tempfree(x);
1019 	if (a[1] == NULL) {
1020 		/* fputs(buf, stdout); */
1021 		fwrite(buf, len, 1, stdout);
1022 		if (ferror(stdout))
1023 			FATAL("write error on stdout");
1024 	} else {
1025 		fp = redirect(ptoi(a[1]), a[2]);
1026 		/* fputs(buf, fp); */
1027 		fwrite(buf, len, 1, fp);
1028 		fflush(fp);
1029 		if (ferror(fp))
1030 			FATAL("write error on %s", filename(fp));
1031 	}
1032 	free(buf);
1033 	return(True);
1034 }
1035 
1036 Cell *arith(Node **a, int n)	/* a[0] + a[1], etc.  also -a[0] */
1037 {
1038 	Awkfloat i, j = 0;
1039 	double v;
1040 	Cell *x, *y, *z;
1041 
1042 	x = execute(a[0]);
1043 	i = getfval(x);
1044 	tempfree(x);
1045 	if (n != UMINUS && n != UPLUS) {
1046 		y = execute(a[1]);
1047 		j = getfval(y);
1048 		tempfree(y);
1049 	}
1050 	z = gettemp();
1051 	switch (n) {
1052 	case ADD:
1053 		i += j;
1054 		break;
1055 	case MINUS:
1056 		i -= j;
1057 		break;
1058 	case MULT:
1059 		i *= j;
1060 		break;
1061 	case DIVIDE:
1062 		if (j == 0)
1063 			FATAL("division by zero");
1064 		i /= j;
1065 		break;
1066 	case MOD:
1067 		if (j == 0)
1068 			FATAL("division by zero in mod");
1069 		modf(i/j, &v);
1070 		i = i - j * v;
1071 		break;
1072 	case UMINUS:
1073 		i = -i;
1074 		break;
1075 	case UPLUS: /* handled by getfval(), above */
1076 		break;
1077 	case POWER:
1078 		if (j >= 0 && modf(j, &v) == 0.0)	/* pos integer exponent */
1079 			i = ipow(i, (int) j);
1080                else {
1081 			errno = 0;
1082 			i = errcheck(pow(i, j), "pow");
1083                }
1084 		break;
1085 	default:	/* can't happen */
1086 		FATAL("illegal arithmetic operator %d", n);
1087 	}
1088 	setfval(z, i);
1089 	return(z);
1090 }
1091 
1092 double ipow(double x, int n)	/* x**n.  ought to be done by pow, but isn't always */
1093 {
1094 	double v;
1095 
1096 	if (n <= 0)
1097 		return 1;
1098 	v = ipow(x, n/2);
1099 	if (n % 2 == 0)
1100 		return v * v;
1101 	else
1102 		return x * v * v;
1103 }
1104 
1105 Cell *incrdecr(Node **a, int n)		/* a[0]++, etc. */
1106 {
1107 	Cell *x, *z;
1108 	int k;
1109 	Awkfloat xf;
1110 
1111 	x = execute(a[0]);
1112 	xf = getfval(x);
1113 	k = (n == PREINCR || n == POSTINCR) ? 1 : -1;
1114 	if (n == PREINCR || n == PREDECR) {
1115 		setfval(x, xf + k);
1116 		return(x);
1117 	}
1118 	z = gettemp();
1119 	setfval(z, xf);
1120 	setfval(x, xf + k);
1121 	tempfree(x);
1122 	return(z);
1123 }
1124 
1125 Cell *assign(Node **a, int n)	/* a[0] = a[1], a[0] += a[1], etc. */
1126 {		/* this is subtle; don't muck with it. */
1127 	Cell *x, *y;
1128 	Awkfloat xf, yf;
1129 	double v;
1130 
1131 	y = execute(a[1]);
1132 	x = execute(a[0]);
1133 	if (n == ASSIGN) {	/* ordinary assignment */
1134 		if (x == y && !(x->tval & (FLD|REC)) && x != nfloc)
1135 			;	/* self-assignment: leave alone unless it's a field or NF */
1136 		else if ((y->tval & (STR|NUM)) == (STR|NUM)) {
1137 			setsval(x, getsval(y));
1138 			x->fval = getfval(y);
1139 			x->tval |= NUM;
1140 		}
1141 		else if (isstr(y))
1142 			setsval(x, getsval(y));
1143 		else if (isnum(y))
1144 			setfval(x, getfval(y));
1145 		else
1146 			funnyvar(y, "read value of");
1147 		tempfree(y);
1148 		return(x);
1149 	}
1150 	xf = getfval(x);
1151 	yf = getfval(y);
1152 	switch (n) {
1153 	case ADDEQ:
1154 		xf += yf;
1155 		break;
1156 	case SUBEQ:
1157 		xf -= yf;
1158 		break;
1159 	case MULTEQ:
1160 		xf *= yf;
1161 		break;
1162 	case DIVEQ:
1163 		if (yf == 0)
1164 			FATAL("division by zero in /=");
1165 		xf /= yf;
1166 		break;
1167 	case MODEQ:
1168 		if (yf == 0)
1169 			FATAL("division by zero in %%=");
1170 		modf(xf/yf, &v);
1171 		xf = xf - yf * v;
1172 		break;
1173 	case POWEQ:
1174 		if (yf >= 0 && modf(yf, &v) == 0.0)	/* pos integer exponent */
1175 			xf = ipow(xf, (int) yf);
1176                else {
1177 			errno = 0;
1178 			xf = errcheck(pow(xf, yf), "pow");
1179                }
1180 		break;
1181 	default:
1182 		FATAL("illegal assignment operator %d", n);
1183 		break;
1184 	}
1185 	tempfree(y);
1186 	setfval(x, xf);
1187 	return(x);
1188 }
1189 
1190 Cell *cat(Node **a, int q)	/* a[0] cat a[1] */
1191 {
1192 	Cell *x, *y, *z;
1193 	int n1, n2;
1194 	char *s = NULL;
1195 	int ssz = 0;
1196 
1197 	x = execute(a[0]);
1198 	n1 = strlen(getsval(x));
1199 	adjbuf(&s, &ssz, n1, recsize, 0, "cat1");
1200 	memcpy(s, x->sval, n1);
1201 
1202 	y = execute(a[1]);
1203 	n2 = strlen(getsval(y));
1204 	adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2");
1205 	memcpy(s + n1, y->sval, n2);
1206 	s[n1 + n2] = '\0';
1207 
1208 	tempfree(x);
1209 	tempfree(y);
1210 
1211 	z = gettemp();
1212 	z->sval = s;
1213 	z->tval = STR;
1214 
1215 	return(z);
1216 }
1217 
1218 Cell *pastat(Node **a, int n)	/* a[0] { a[1] } */
1219 {
1220 	Cell *x;
1221 
1222 	if (a[0] == NULL)
1223 		x = execute(a[1]);
1224 	else {
1225 		x = execute(a[0]);
1226 		if (istrue(x)) {
1227 			tempfree(x);
1228 			x = execute(a[1]);
1229 		}
1230 	}
1231 	return x;
1232 }
1233 
1234 Cell *dopa2(Node **a, int n)	/* a[0], a[1] { a[2] } */
1235 {
1236 	Cell *x;
1237 	int pair;
1238 
1239 	pair = ptoi(a[3]);
1240 	if (pairstack[pair] == 0) {
1241 		x = execute(a[0]);
1242 		if (istrue(x))
1243 			pairstack[pair] = 1;
1244 		tempfree(x);
1245 	}
1246 	if (pairstack[pair] == 1) {
1247 		x = execute(a[1]);
1248 		if (istrue(x))
1249 			pairstack[pair] = 0;
1250 		tempfree(x);
1251 		x = execute(a[2]);
1252 		return(x);
1253 	}
1254 	return(False);
1255 }
1256 
1257 Cell *split(Node **a, int nnn)	/* split(a[0], a[1], a[2]); a[3] is type */
1258 {
1259 	Cell *x = NULL, *y, *ap;
1260 	const char *s, *origs, *t;
1261 	const char *fs = NULL;
1262 	char *origfs = NULL;
1263 	int sep;
1264 	char temp, num[50];
1265 	int n, tempstat, arg3type;
1266 	double result;
1267 
1268 	y = execute(a[0]);	/* source string */
1269 	origs = s = strdup(getsval(y));
1270 	arg3type = ptoi(a[3]);
1271 	if (a[2] == NULL)		/* fs string */
1272 		fs = getsval(fsloc);
1273 	else if (arg3type == STRING) {	/* split(str,arr,"string") */
1274 		x = execute(a[2]);
1275 		fs = origfs = strdup(getsval(x));
1276 		tempfree(x);
1277 	} else if (arg3type == REGEXPR)
1278 		fs = "(regexpr)";	/* split(str,arr,/regexpr/) */
1279 	else
1280 		FATAL("illegal type of split");
1281 	sep = *fs;
1282 	ap = execute(a[1]);	/* array name */
1283 	freesymtab(ap);
1284 	DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs);
1285 	ap->tval &= ~STR;
1286 	ap->tval |= ARR;
1287 	ap->sval = (char *) makesymtab(NSYMTAB);
1288 
1289 	n = 0;
1290         if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) {
1291 		/* split(s, a, //); have to arrange that it looks like empty sep */
1292 		arg3type = 0;
1293 		fs = "";
1294 		sep = 0;
1295 	}
1296 	if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) {	/* reg expr */
1297 		fa *pfa;
1298 		if (arg3type == REGEXPR) {	/* it's ready already */
1299 			pfa = (fa *) a[2];
1300 		} else {
1301 			pfa = makedfa(fs, 1);
1302 		}
1303 		if (nematch(pfa,s)) {
1304 			tempstat = pfa->initstat;
1305 			pfa->initstat = 2;
1306 			do {
1307 				n++;
1308 				snprintf(num, sizeof(num), "%d", n);
1309 				temp = *patbeg;
1310 				setptr(patbeg, '\0');
1311 				if (is_number(s, & result))
1312 					setsymtab(num, s, result, STR|NUM, (Array *) ap->sval);
1313 				else
1314 					setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1315 				setptr(patbeg, temp);
1316 				s = patbeg + patlen;
1317 				if (*(patbeg+patlen-1) == '\0' || *s == '\0') {
1318 					n++;
1319 					snprintf(num, sizeof(num), "%d", n);
1320 					setsymtab(num, "", 0.0, STR, (Array *) ap->sval);
1321 					pfa->initstat = tempstat;
1322 					goto spdone;
1323 				}
1324 			} while (nematch(pfa,s));
1325 			pfa->initstat = tempstat; 	/* bwk: has to be here to reset */
1326 							/* cf gsub and refldbld */
1327 		}
1328 		n++;
1329 		snprintf(num, sizeof(num), "%d", n);
1330 		if (is_number(s, & result))
1331 			setsymtab(num, s, result, STR|NUM, (Array *) ap->sval);
1332 		else
1333 			setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1334   spdone:
1335 		pfa = NULL;
1336 	} else if (sep == ' ') {
1337 		for (n = 0; ; ) {
1338 #define ISWS(c)	((c) == ' ' || (c) == '\t' || (c) == '\n')
1339 			while (ISWS(*s))
1340 				s++;
1341 			if (*s == '\0')
1342 				break;
1343 			n++;
1344 			t = s;
1345 			do
1346 				s++;
1347 			while (*s != '\0' && !ISWS(*s));
1348 			temp = *s;
1349 			setptr(s, '\0');
1350 			snprintf(num, sizeof(num), "%d", n);
1351 			if (is_number(t, & result))
1352 				setsymtab(num, t, result, STR|NUM, (Array *) ap->sval);
1353 			else
1354 				setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1355 			setptr(s, temp);
1356 			if (*s != '\0')
1357 				s++;
1358 		}
1359 	} else if (sep == 0) {	/* new: split(s, a, "") => 1 char/elem */
1360 		for (n = 0; *s != '\0'; s++) {
1361 			char buf[2];
1362 			n++;
1363 			snprintf(num, sizeof(num), "%d", n);
1364 			buf[0] = *s;
1365 			buf[1] = '\0';
1366 			if (isdigit((uschar)buf[0]))
1367 				setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval);
1368 			else
1369 				setsymtab(num, buf, 0.0, STR, (Array *) ap->sval);
1370 		}
1371 	} else if (*s != '\0') {
1372 		for (;;) {
1373 			n++;
1374 			t = s;
1375 			while (*s != sep && *s != '\n' && *s != '\0')
1376 				s++;
1377 			temp = *s;
1378 			setptr(s, '\0');
1379 			snprintf(num, sizeof(num), "%d", n);
1380 			if (is_number(t, & result))
1381 				setsymtab(num, t, result, STR|NUM, (Array *) ap->sval);
1382 			else
1383 				setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1384 			setptr(s, temp);
1385 			if (*s++ == '\0')
1386 				break;
1387 		}
1388 	}
1389 	tempfree(ap);
1390 	tempfree(y);
1391 	xfree(origs);
1392 	xfree(origfs);
1393 	x = gettemp();
1394 	x->tval = NUM;
1395 	x->fval = n;
1396 	return(x);
1397 }
1398 
1399 Cell *condexpr(Node **a, int n)	/* a[0] ? a[1] : a[2] */
1400 {
1401 	Cell *x;
1402 
1403 	x = execute(a[0]);
1404 	if (istrue(x)) {
1405 		tempfree(x);
1406 		x = execute(a[1]);
1407 	} else {
1408 		tempfree(x);
1409 		x = execute(a[2]);
1410 	}
1411 	return(x);
1412 }
1413 
1414 Cell *ifstat(Node **a, int n)	/* if (a[0]) a[1]; else a[2] */
1415 {
1416 	Cell *x;
1417 
1418 	x = execute(a[0]);
1419 	if (istrue(x)) {
1420 		tempfree(x);
1421 		x = execute(a[1]);
1422 	} else if (a[2] != NULL) {
1423 		tempfree(x);
1424 		x = execute(a[2]);
1425 	}
1426 	return(x);
1427 }
1428 
1429 Cell *whilestat(Node **a, int n)	/* while (a[0]) a[1] */
1430 {
1431 	Cell *x;
1432 
1433 	for (;;) {
1434 		x = execute(a[0]);
1435 		if (!istrue(x))
1436 			return(x);
1437 		tempfree(x);
1438 		x = execute(a[1]);
1439 		if (isbreak(x)) {
1440 			x = True;
1441 			return(x);
1442 		}
1443 		if (isnext(x) || isexit(x) || isret(x))
1444 			return(x);
1445 		tempfree(x);
1446 	}
1447 }
1448 
1449 Cell *dostat(Node **a, int n)	/* do a[0]; while(a[1]) */
1450 {
1451 	Cell *x;
1452 
1453 	for (;;) {
1454 		x = execute(a[0]);
1455 		if (isbreak(x))
1456 			return True;
1457 		if (isnext(x) || isexit(x) || isret(x))
1458 			return(x);
1459 		tempfree(x);
1460 		x = execute(a[1]);
1461 		if (!istrue(x))
1462 			return(x);
1463 		tempfree(x);
1464 	}
1465 }
1466 
1467 Cell *forstat(Node **a, int n)	/* for (a[0]; a[1]; a[2]) a[3] */
1468 {
1469 	Cell *x;
1470 
1471 	x = execute(a[0]);
1472 	tempfree(x);
1473 	for (;;) {
1474 		if (a[1]!=NULL) {
1475 			x = execute(a[1]);
1476 			if (!istrue(x)) return(x);
1477 			else tempfree(x);
1478 		}
1479 		x = execute(a[3]);
1480 		if (isbreak(x))		/* turn off break */
1481 			return True;
1482 		if (isnext(x) || isexit(x) || isret(x))
1483 			return(x);
1484 		tempfree(x);
1485 		x = execute(a[2]);
1486 		tempfree(x);
1487 	}
1488 }
1489 
1490 Cell *instat(Node **a, int n)	/* for (a[0] in a[1]) a[2] */
1491 {
1492 	Cell *x, *vp, *arrayp, *cp, *ncp;
1493 	Array *tp;
1494 	int i;
1495 
1496 	vp = execute(a[0]);
1497 	arrayp = execute(a[1]);
1498 	if (!isarr(arrayp)) {
1499 		return True;
1500 	}
1501 	tp = (Array *) arrayp->sval;
1502 	tempfree(arrayp);
1503 	for (i = 0; i < tp->size; i++) {	/* this routine knows too much */
1504 		for (cp = tp->tab[i]; cp != NULL; cp = ncp) {
1505 			setsval(vp, cp->nval);
1506 			ncp = cp->cnext;
1507 			x = execute(a[2]);
1508 			if (isbreak(x)) {
1509 				tempfree(vp);
1510 				return True;
1511 			}
1512 			if (isnext(x) || isexit(x) || isret(x)) {
1513 				tempfree(vp);
1514 				return(x);
1515 			}
1516 			tempfree(x);
1517 		}
1518 	}
1519 	return True;
1520 }
1521 
1522 static char *nawk_convert(const char *s, int (*fun_c)(int),
1523     wint_t (*fun_wc)(wint_t))
1524 {
1525 	char *buf      = NULL;
1526 	char *pbuf     = NULL;
1527 	const char *ps = NULL;
1528 	size_t n       = 0;
1529 	wchar_t wc;
1530 	size_t sz = MB_CUR_MAX;
1531 
1532 	if (sz == 1) {
1533 		buf = tostring(s);
1534 
1535 		for (pbuf = buf; *pbuf; pbuf++)
1536 			*pbuf = fun_c((uschar)*pbuf);
1537 
1538 		return buf;
1539 	} else {
1540 		/* upper/lower character may be shorter/longer */
1541 		buf = tostringN(s, strlen(s) * sz + 1);
1542 
1543 		(void) mbtowc(NULL, NULL, 0);	/* reset internal state */
1544 		/*
1545 		 * Reset internal state here too.
1546 		 * Assign result to avoid a compiler warning. (Casting to void
1547 		 * doesn't work.)
1548 		 * Increment said variable to avoid a different warning.
1549 		 */
1550 		int unused = wctomb(NULL, L'\0');
1551 		unused++;
1552 
1553 		ps   = s;
1554 		pbuf = buf;
1555 		while (n = mbtowc(&wc, ps, sz),
1556 		       n > 0 && n != (size_t)-1 && n != (size_t)-2)
1557 		{
1558 			ps += n;
1559 
1560 			n = wctomb(pbuf, fun_wc(wc));
1561 			if (n == (size_t)-1)
1562 				FATAL("illegal wide character %s", s);
1563 
1564 			pbuf += n;
1565 		}
1566 
1567 		*pbuf = '\0';
1568 
1569 		if (n)
1570 			FATAL("illegal byte sequence %s", s);
1571 
1572 		return buf;
1573 	}
1574 }
1575 
1576 #ifdef __DJGPP__
1577 static wint_t towupper(wint_t wc)
1578 {
1579 	if (wc >= 0 && wc < 256)
1580 		return toupper(wc & 0xFF);
1581 
1582 	return wc;
1583 }
1584 
1585 static wint_t towlower(wint_t wc)
1586 {
1587 	if (wc >= 0 && wc < 256)
1588 		return tolower(wc & 0xFF);
1589 
1590 	return wc;
1591 }
1592 #endif
1593 
1594 static char *nawk_toupper(const char *s)
1595 {
1596 	return nawk_convert(s, toupper, towupper);
1597 }
1598 
1599 static char *nawk_tolower(const char *s)
1600 {
1601 	return nawk_convert(s, tolower, towlower);
1602 }
1603 
1604 Cell *bltin(Node **a, int n)	/* builtin functions. a[0] is type, a[1] is arg list */
1605 {
1606 	Cell *x, *y;
1607 	Awkfloat u;
1608 	int t, sz;
1609 	Awkfloat tmp;
1610 	char *buf, *fmt;
1611 	Node *nextarg;
1612 	FILE *fp;
1613 	int status = 0;
1614 	time_t tv;
1615 	struct tm *tm;
1616 
1617 	t = ptoi(a[0]);
1618 	x = execute(a[1]);
1619 	nextarg = a[1]->nnext;
1620 	switch (t) {
1621 	case FLENGTH:
1622 		if (isarr(x))
1623 			u = ((Array *) x->sval)->nelem;	/* GROT.  should be function*/
1624 		else
1625 			u = strlen(getsval(x));
1626 		break;
1627 	case FLOG:
1628 		errno = 0;
1629 		u = errcheck(log(getfval(x)), "log");
1630 		break;
1631 	case FINT:
1632 		modf(getfval(x), &u); break;
1633 	case FEXP:
1634 		errno = 0;
1635 		u = errcheck(exp(getfval(x)), "exp");
1636 		break;
1637 	case FSQRT:
1638 		errno = 0;
1639 		u = errcheck(sqrt(getfval(x)), "sqrt");
1640 		break;
1641 	case FSIN:
1642 		u = sin(getfval(x)); break;
1643 	case FCOS:
1644 		u = cos(getfval(x)); break;
1645 	case FATAN:
1646 		if (nextarg == NULL) {
1647 			WARNING("atan2 requires two arguments; returning 1.0");
1648 			u = 1.0;
1649 		} else {
1650 			y = execute(a[1]->nnext);
1651 			u = atan2(getfval(x), getfval(y));
1652 			tempfree(y);
1653 			nextarg = nextarg->nnext;
1654 		}
1655 		break;
1656 	case FCOMPL:
1657 		u = ~((int)getfval(x));
1658 		break;
1659 	case FAND:
1660 		if (nextarg == 0) {
1661 			WARNING("and requires two arguments; returning 0");
1662 			u = 0;
1663 			break;
1664 		}
1665 		y = execute(a[1]->nnext);
1666 		u = ((int)getfval(x)) & ((int)getfval(y));
1667 		tempfree(y);
1668 		nextarg = nextarg->nnext;
1669 		break;
1670 	case FFOR:
1671 		if (nextarg == 0) {
1672 			WARNING("or requires two arguments; returning 0");
1673 			u = 0;
1674 			break;
1675 		}
1676 		y = execute(a[1]->nnext);
1677 		u = ((int)getfval(x)) | ((int)getfval(y));
1678 		tempfree(y);
1679 		nextarg = nextarg->nnext;
1680 		break;
1681 	case FXOR:
1682 		if (nextarg == 0) {
1683 			WARNING("xor requires two arguments; returning 0");
1684 			u = 0;
1685 			break;
1686 		}
1687 		y = execute(a[1]->nnext);
1688 		u = ((int)getfval(x)) ^ ((int)getfval(y));
1689 		tempfree(y);
1690 		nextarg = nextarg->nnext;
1691 		break;
1692 	case FLSHIFT:
1693 		if (nextarg == 0) {
1694 			WARNING("lshift requires two arguments; returning 0");
1695 			u = 0;
1696 			break;
1697 		}
1698 		y = execute(a[1]->nnext);
1699 		u = ((int)getfval(x)) << ((int)getfval(y));
1700 		tempfree(y);
1701 		nextarg = nextarg->nnext;
1702 		break;
1703 	case FRSHIFT:
1704 		if (nextarg == 0) {
1705 			WARNING("rshift requires two arguments; returning 0");
1706 			u = 0;
1707 			break;
1708 		}
1709 		y = execute(a[1]->nnext);
1710 		u = ((int)getfval(x)) >> ((int)getfval(y));
1711 		tempfree(y);
1712 		nextarg = nextarg->nnext;
1713 		break;
1714 	case FSYSTEM:
1715 		fflush(stdout);		/* in case something is buffered already */
1716 		status = system(getsval(x));
1717 		u = status;
1718 		if (status != -1) {
1719 			if (WIFEXITED(status)) {
1720 				u = WEXITSTATUS(status);
1721 			} else if (WIFSIGNALED(status)) {
1722 				u = WTERMSIG(status) + 256;
1723 #ifdef WCOREDUMP
1724 				if (WCOREDUMP(status))
1725 					u += 256;
1726 #endif
1727 			} else	/* something else?!? */
1728 				u = 0;
1729 		}
1730 		break;
1731 	case FRAND:
1732 		/* random() returns numbers in [0..2^31-1]
1733 		 * in order to get a number in [0, 1), divide it by 2^31
1734 		 */
1735 		u = (Awkfloat) random() / (0x7fffffffL + 0x1UL);
1736 		break;
1737 	case FSRAND:
1738 		if (isrec(x))	/* no argument provided */
1739 			u = time((time_t *)0);
1740 		else
1741 			u = getfval(x);
1742 		tmp = u;
1743 		srandom((unsigned long) u);
1744 		u = srand_seed;
1745 		srand_seed = tmp;
1746 		break;
1747 	case FTOUPPER:
1748 	case FTOLOWER:
1749 		if (t == FTOUPPER)
1750 			buf = nawk_toupper(getsval(x));
1751 		else
1752 			buf = nawk_tolower(getsval(x));
1753 		tempfree(x);
1754 		x = gettemp();
1755 		setsval(x, buf);
1756 		free(buf);
1757 		return x;
1758 	case FFLUSH:
1759 		if (isrec(x) || strlen(getsval(x)) == 0) {
1760 			flush_all();	/* fflush() or fflush("") -> all */
1761 			u = 0;
1762 		} else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL)
1763 			u = EOF;
1764 		else
1765 			u = fflush(fp);
1766 		break;
1767 	case FSYSTIME:
1768 		u = time((time_t *) 0);
1769 		break;
1770 	case FSTRFTIME:
1771 		/* strftime([format [,timestamp]]) */
1772 		if (nextarg) {
1773 			y = execute(nextarg);
1774 			nextarg = nextarg->nnext;
1775 			tv = (time_t) getfval(y);
1776 			tempfree(y);
1777 		} else
1778 			tv = time((time_t *) 0);
1779 		tm = localtime(&tv);
1780 		if (tm == NULL)
1781 			FATAL("bad time %ld", (long)tv);
1782 
1783 		if (isrec(x)) {
1784 			/* format argument not provided, use default */
1785 			fmt = tostring("%a %b %d %H:%M:%S %Z %Y");
1786 		} else
1787 			fmt = tostring(getsval(x));
1788 
1789 		sz = 32;
1790 		buf = NULL;
1791 		do {
1792 			if ((buf = realloc(buf, (sz *= 2))) == NULL)
1793 				FATAL("out of memory in strftime");
1794 		} while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0');
1795 
1796 		y = gettemp();
1797 		setsval(y, buf);
1798 		free(fmt);
1799 		free(buf);
1800 
1801 		return y;
1802 	default:	/* can't happen */
1803 		FATAL("illegal function type %d", t);
1804 		break;
1805 	}
1806 	tempfree(x);
1807 	x = gettemp();
1808 	setfval(x, u);
1809 	if (nextarg != NULL) {
1810 		WARNING("warning: function has too many arguments");
1811 		for ( ; nextarg; nextarg = nextarg->nnext)
1812 			execute(nextarg);
1813 	}
1814 	return(x);
1815 }
1816 
1817 Cell *printstat(Node **a, int n)	/* print a[0] */
1818 {
1819 	Node *x;
1820 	Cell *y;
1821 	FILE *fp;
1822 
1823 	if (a[1] == NULL)	/* a[1] is redirection operator, a[2] is file */
1824 		fp = stdout;
1825 	else
1826 		fp = redirect(ptoi(a[1]), a[2]);
1827 	for (x = a[0]; x != NULL; x = x->nnext) {
1828 		y = execute(x);
1829 		fputs(getpssval(y), fp);
1830 		tempfree(y);
1831 		if (x->nnext == NULL)
1832 			fputs(getsval(orsloc), fp);
1833 		else
1834 			fputs(getsval(ofsloc), fp);
1835 	}
1836 	if (a[1] != NULL)
1837 		fflush(fp);
1838 	if (ferror(fp))
1839 		FATAL("write error on %s", filename(fp));
1840 	return(True);
1841 }
1842 
1843 Cell *nullproc(Node **a, int n)
1844 {
1845 	return 0;
1846 }
1847 
1848 
1849 FILE *redirect(int a, Node *b)	/* set up all i/o redirections */
1850 {
1851 	FILE *fp;
1852 	Cell *x;
1853 	char *fname;
1854 
1855 	x = execute(b);
1856 	fname = getsval(x);
1857 	fp = openfile(a, fname, NULL);
1858 	if (fp == NULL)
1859 		FATAL("can't open file %s", fname);
1860 	tempfree(x);
1861 	return fp;
1862 }
1863 
1864 struct files {
1865 	FILE	*fp;
1866 	const char	*fname;
1867 	int	mode;	/* '|', 'a', 'w' => LE/LT, GT */
1868 } *files;
1869 
1870 size_t nfiles;
1871 
1872 static void stdinit(void)	/* in case stdin, etc., are not constants */
1873 {
1874 	nfiles = FOPEN_MAX;
1875 	files = (struct files *) calloc(nfiles, sizeof(*files));
1876 	if (files == NULL)
1877 		FATAL("can't allocate file memory for %zu files", nfiles);
1878         files[0].fp = stdin;
1879 	files[0].fname = "/dev/stdin";
1880 	files[0].mode = LT;
1881         files[1].fp = stdout;
1882 	files[1].fname = "/dev/stdout";
1883 	files[1].mode = GT;
1884         files[2].fp = stderr;
1885 	files[2].fname = "/dev/stderr";
1886 	files[2].mode = GT;
1887 }
1888 
1889 FILE *openfile(int a, const char *us, bool *pnewflag)
1890 {
1891 	const char *s = us;
1892 	size_t i;
1893 	int m;
1894 	FILE *fp = NULL;
1895 
1896 	if (*s == '\0')
1897 		FATAL("null file name in print or getline");
1898 	for (i = 0; i < nfiles; i++)
1899 		if (files[i].fname && strcmp(s, files[i].fname) == 0 &&
1900 		    (a == files[i].mode || (a==APPEND && files[i].mode==GT) ||
1901 		     a == FFLUSH)) {
1902 			if (pnewflag)
1903 				*pnewflag = false;
1904 			return files[i].fp;
1905 		}
1906 	if (a == FFLUSH)	/* didn't find it, so don't create it! */
1907 		return NULL;
1908 
1909 	for (i = 0; i < nfiles; i++)
1910 		if (files[i].fp == NULL)
1911 			break;
1912 	if (i >= nfiles) {
1913 		struct files *nf;
1914 		size_t nnf = nfiles + FOPEN_MAX;
1915 		nf = (struct files *) realloc(files, nnf * sizeof(*nf));
1916 		if (nf == NULL)
1917 			FATAL("cannot grow files for %s and %zu files", s, nnf);
1918 		memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf));
1919 		nfiles = nnf;
1920 		files = nf;
1921 	}
1922 	fflush(stdout);	/* force a semblance of order */
1923 	m = a;
1924 	if (a == GT) {
1925 		fp = fopen(s, "w");
1926 	} else if (a == APPEND) {
1927 		fp = fopen(s, "a");
1928 		m = GT;	/* so can mix > and >> */
1929 	} else if (a == '|') {	/* output pipe */
1930 		fp = popen(s, "w");
1931 	} else if (a == LE) {	/* input pipe */
1932 		fp = popen(s, "r");
1933 	} else if (a == LT) {	/* getline <file */
1934 		fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r");	/* "-" is stdin */
1935 	} else	/* can't happen */
1936 		FATAL("illegal redirection %d", a);
1937 	if (fp != NULL) {
1938 		files[i].fname = tostring(s);
1939 		files[i].fp = fp;
1940 		files[i].mode = m;
1941 		if (pnewflag)
1942 			*pnewflag = true;
1943 		if (fp != stdin && fp != stdout && fp != stderr)
1944 			(void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC);
1945 	}
1946 	return fp;
1947 }
1948 
1949 const char *filename(FILE *fp)
1950 {
1951 	size_t i;
1952 
1953 	for (i = 0; i < nfiles; i++)
1954 		if (fp == files[i].fp)
1955 			return files[i].fname;
1956 	return "???";
1957 }
1958 
1959  Cell *closefile(Node **a, int n)
1960  {
1961  	Cell *x;
1962 	size_t i;
1963 	bool stat;
1964 
1965  	x = execute(a[0]);
1966  	getsval(x);
1967 	stat = true;
1968  	for (i = 0; i < nfiles; i++) {
1969 		if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0)
1970 			continue;
1971 		if (ferror(files[i].fp))
1972 			FATAL("i/o error occurred on %s", files[i].fname);
1973 		if (files[i].fp == stdin || files[i].fp == stdout ||
1974 		    files[i].fp == stderr)
1975 			stat = freopen("/dev/null", "r+", files[i].fp) == NULL;
1976 		else if (files[i].mode == '|' || files[i].mode == LE)
1977 			stat = pclose(files[i].fp) == -1;
1978 		else
1979 			stat = fclose(files[i].fp) == EOF;
1980 		if (stat)
1981 			FATAL("i/o error occurred closing %s", files[i].fname);
1982 		if (i > 2)	/* don't do /dev/std... */
1983 			xfree(files[i].fname);
1984 		files[i].fname = NULL;	/* watch out for ref thru this */
1985 		files[i].fp = NULL;
1986 		break;
1987  	}
1988  	tempfree(x);
1989  	x = gettemp();
1990 	setfval(x, (Awkfloat) (stat ? -1 : 0));
1991  	return(x);
1992  }
1993 
1994 void closeall(void)
1995 {
1996 	size_t i;
1997 	bool stat = false;
1998 
1999 	for (i = 0; i < nfiles; i++) {
2000 		if (! files[i].fp)
2001 			continue;
2002 		if (ferror(files[i].fp))
2003 			FATAL( "i/o error occurred on %s", files[i].fname );
2004 		if (files[i].fp == stdin)
2005 			continue;
2006 		if (files[i].mode == '|' || files[i].mode == LE)
2007 			stat = pclose(files[i].fp) == -1;
2008 		else if (files[i].fp == stdout || files[i].fp == stderr)
2009 			stat = fflush(files[i].fp) == EOF;
2010 		else
2011 			stat = fclose(files[i].fp) == EOF;
2012 		if (stat)
2013 			FATAL( "i/o error occurred while closing %s", files[i].fname );
2014 	}
2015 }
2016 
2017 static void flush_all(void)
2018 {
2019 	size_t i;
2020 
2021 	for (i = 0; i < nfiles; i++)
2022 		if (files[i].fp)
2023 			fflush(files[i].fp);
2024 }
2025 
2026 void backsub(char **pb_ptr, const char **sptr_ptr);
2027 
2028 Cell *sub(Node **a, int nnn)	/* substitute command */
2029 {
2030 	const char *sptr, *q;
2031 	Cell *x, *y, *result;
2032 	char *t, *buf, *pb;
2033 	fa *pfa;
2034 	int bufsz = recsize;
2035 
2036 	if ((buf = (char *) malloc(bufsz)) == NULL)
2037 		FATAL("out of memory in sub");
2038 	x = execute(a[3]);	/* target string */
2039 	t = getsval(x);
2040 	if (a[0] == NULL)	/* 0 => a[1] is already-compiled regexpr */
2041 		pfa = (fa *) a[1];	/* regular expression */
2042 	else {
2043 		y = execute(a[1]);
2044 		pfa = makedfa(getsval(y), 1);
2045 		tempfree(y);
2046 	}
2047 	y = execute(a[2]);	/* replacement string */
2048 	result = False;
2049 	if (pmatch(pfa, t)) {
2050 		sptr = t;
2051 		adjbuf(&buf, &bufsz, 1+patbeg-sptr, recsize, 0, "sub");
2052 		pb = buf;
2053 		while (sptr < patbeg)
2054 			*pb++ = *sptr++;
2055 		sptr = getsval(y);
2056 		while (*sptr != '\0') {
2057 			adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "sub");
2058 			if (*sptr == '\\') {
2059 				backsub(&pb, &sptr);
2060 			} else if (*sptr == '&') {
2061 				sptr++;
2062 				adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "sub");
2063 				for (q = patbeg; q < patbeg+patlen; )
2064 					*pb++ = *q++;
2065 			} else
2066 				*pb++ = *sptr++;
2067 		}
2068 		*pb = '\0';
2069 		if (pb > buf + bufsz)
2070 			FATAL("sub result1 %.30s too big; can't happen", buf);
2071 		sptr = patbeg + patlen;
2072 		if ((patlen == 0 && *patbeg) || (patlen && *(sptr-1))) {
2073 			adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "sub");
2074 			while ((*pb++ = *sptr++) != '\0')
2075 				continue;
2076 		}
2077 		if (pb > buf + bufsz)
2078 			FATAL("sub result2 %.30s too big; can't happen", buf);
2079 		setsval(x, buf);	/* BUG: should be able to avoid copy */
2080 		result = True;
2081 	}
2082 	tempfree(x);
2083 	tempfree(y);
2084 	free(buf);
2085 	return result;
2086 }
2087 
2088 Cell *gsub(Node **a, int nnn)	/* global substitute */
2089 {
2090 	Cell *x, *y;
2091 	char *rptr, *pb;
2092 	const char *q, *t, *sptr;
2093 	char *buf;
2094 	fa *pfa;
2095 	int mflag, tempstat, num;
2096 	int bufsz = recsize;
2097 
2098 	if ((buf = (char *) malloc(bufsz)) == NULL)
2099 		FATAL("out of memory in gsub");
2100 	mflag = 0;	/* if mflag == 0, can replace empty string */
2101 	num = 0;
2102 	x = execute(a[3]);	/* target string */
2103 	t = getsval(x);
2104 	if (a[0] == NULL)	/* 0 => a[1] is already-compiled regexpr */
2105 		pfa = (fa *) a[1];	/* regular expression */
2106 	else {
2107 		y = execute(a[1]);
2108 		pfa = makedfa(getsval(y), 1);
2109 		tempfree(y);
2110 	}
2111 	y = execute(a[2]);	/* replacement string */
2112 	if (pmatch(pfa, t)) {
2113 		tempstat = pfa->initstat;
2114 		pfa->initstat = 2;
2115 		pb = buf;
2116 		rptr = getsval(y);
2117 		do {
2118 			if (patlen == 0 && *patbeg != '\0') {	/* matched empty string */
2119 				if (mflag == 0) {	/* can replace empty */
2120 					num++;
2121 					sptr = rptr;
2122 					while (*sptr != '\0') {
2123 						adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub");
2124 						if (*sptr == '\\') {
2125 							backsub(&pb, &sptr);
2126 						} else if (*sptr == '&') {
2127 							sptr++;
2128 							adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub");
2129 							for (q = patbeg; q < patbeg+patlen; )
2130 								*pb++ = *q++;
2131 						} else
2132 							*pb++ = *sptr++;
2133 					}
2134 				}
2135 				if (*t == '\0')	/* at end */
2136 					goto done;
2137 				adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gsub");
2138 				*pb++ = *t++;
2139 				if (pb > buf + bufsz)	/* BUG: not sure of this test */
2140 					FATAL("gsub result0 %.30s too big; can't happen", buf);
2141 				mflag = 0;
2142 			}
2143 			else {	/* matched nonempty string */
2144 				num++;
2145 				sptr = t;
2146 				adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gsub");
2147 				while (sptr < patbeg)
2148 					*pb++ = *sptr++;
2149 				sptr = rptr;
2150 				while (*sptr != '\0') {
2151 					adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub");
2152 					if (*sptr == '\\') {
2153 						backsub(&pb, &sptr);
2154 					} else if (*sptr == '&') {
2155 						sptr++;
2156 						adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub");
2157 						for (q = patbeg; q < patbeg+patlen; )
2158 							*pb++ = *q++;
2159 					} else
2160 						*pb++ = *sptr++;
2161 				}
2162 				t = patbeg + patlen;
2163 				if (patlen == 0 || *t == '\0' || *(t-1) == '\0')
2164 					goto done;
2165 				if (pb > buf + bufsz)
2166 					FATAL("gsub result1 %.30s too big; can't happen", buf);
2167 				mflag = 1;
2168 			}
2169 		} while (pmatch(pfa,t));
2170 		sptr = t;
2171 		adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gsub");
2172 		while ((*pb++ = *sptr++) != '\0')
2173 			continue;
2174 	done:	if (pb < buf + bufsz)
2175 			*pb = '\0';
2176 		else if (*(pb-1) != '\0')
2177 			FATAL("gsub result2 %.30s truncated; can't happen", buf);
2178 		setsval(x, buf);	/* BUG: should be able to avoid copy + free */
2179 		pfa->initstat = tempstat;
2180 	}
2181 	tempfree(x);
2182 	tempfree(y);
2183 	x = gettemp();
2184 	x->tval = NUM;
2185 	x->fval = num;
2186 	free(buf);
2187 	return(x);
2188 }
2189 
2190 Cell *gensub(Node **a, int nnn)	/* global selective substitute */
2191 	/* XXX incomplete - doesn't support backreferences \0 ... \9 */
2192 {
2193 	Cell *x, *y, *res, *h;
2194 	char *rptr;
2195 	const char *sptr;
2196 	char *buf, *pb;
2197 	const char *t, *q;
2198 	fa *pfa;
2199 	int mflag, tempstat, num, whichm;
2200 	int bufsz = recsize;
2201 
2202 	if ((buf = malloc(bufsz)) == NULL)
2203 		FATAL("out of memory in gensub");
2204 	mflag = 0;	/* if mflag == 0, can replace empty string */
2205 	num = 0;
2206 	x = execute(a[4]);	/* source string */
2207 	t = getsval(x);
2208 	res = copycell(x);	/* target string - initially copy of source */
2209 	res->csub = CTEMP;	/* result values are temporary */
2210 	if (a[0] == 0)		/* 0 => a[1] is already-compiled regexpr */
2211 		pfa = (fa *) a[1];	/* regular expression */
2212 	else {
2213 		y = execute(a[1]);
2214 		pfa = makedfa(getsval(y), 1);
2215 		tempfree(y);
2216 	}
2217 	y = execute(a[2]);	/* replacement string */
2218 	h = execute(a[3]);	/* which matches should be replaced */
2219 	sptr = getsval(h);
2220 	if (sptr[0] == 'g' || sptr[0] == 'G')
2221 		whichm = -1;
2222 	else {
2223 		/*
2224 		 * The specified number is index of replacement, starting
2225 		 * from 1. GNU awk treats index lower than 0 same as
2226 		 * 1, we do same for compatibility.
2227 		 */
2228 		whichm = (int) getfval(h) - 1;
2229 		if (whichm < 0)
2230 			whichm = 0;
2231 	}
2232 	tempfree(h);
2233 
2234 	if (pmatch(pfa, t)) {
2235 		char *sl;
2236 
2237 		tempstat = pfa->initstat;
2238 		pfa->initstat = 2;
2239 		pb = buf;
2240 		rptr = getsval(y);
2241 		/*
2242 		 * XXX if there are any backreferences in subst string,
2243 		 * complain now.
2244 		 */
2245 		for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) {
2246 			if (strchr("0123456789", sl[1])) {
2247 				FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr);
2248 			}
2249 		}
2250 
2251 		do {
2252 			if (whichm >= 0 && whichm != num) {
2253 				num++;
2254 				adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub");
2255 
2256 				/* copy the part of string up to and including
2257 				 * match to output buffer */
2258 				while (t < patbeg + patlen)
2259 					*pb++ = *t++;
2260 				continue;
2261 			}
2262 
2263 			if (patlen == 0 && *patbeg != 0) {	/* matched empty string */
2264 				if (mflag == 0) {	/* can replace empty */
2265 					num++;
2266 					sptr = rptr;
2267 					while (*sptr != 0) {
2268 						adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
2269 						if (*sptr == '\\') {
2270 							backsub(&pb, &sptr);
2271 						} else if (*sptr == '&') {
2272 							sptr++;
2273 							adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
2274 							for (q = patbeg; q < patbeg+patlen; )
2275 								*pb++ = *q++;
2276 						} else
2277 							*pb++ = *sptr++;
2278 					}
2279 				}
2280 				if (*t == 0)	/* at end */
2281 					goto done;
2282 				adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub");
2283 				*pb++ = *t++;
2284 				if (pb > buf + bufsz)	/* BUG: not sure of this test */
2285 					FATAL("gensub result0 %.30s too big; can't happen", buf);
2286 				mflag = 0;
2287 			}
2288 			else {	/* matched nonempty string */
2289 				num++;
2290 				sptr = t;
2291 				adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub");
2292 				while (sptr < patbeg)
2293 					*pb++ = *sptr++;
2294 				sptr = rptr;
2295 				while (*sptr != 0) {
2296 					adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
2297 					if (*sptr == '\\') {
2298 						backsub(&pb, &sptr);
2299 					} else if (*sptr == '&') {
2300 						sptr++;
2301 						adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
2302 						for (q = patbeg; q < patbeg+patlen; )
2303 							*pb++ = *q++;
2304 					} else
2305 						*pb++ = *sptr++;
2306 				}
2307 				t = patbeg + patlen;
2308 				if (patlen == 0 || *t == 0 || *(t-1) == 0)
2309 					goto done;
2310 				if (pb > buf + bufsz)
2311 					FATAL("gensub result1 %.30s too big; can't happen", buf);
2312 				mflag = 1;
2313 			}
2314 		} while (pmatch(pfa,t));
2315 		sptr = t;
2316 		adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub");
2317 		while ((*pb++ = *sptr++) != 0)
2318 			;
2319 	done:	if (pb > buf + bufsz)
2320 			FATAL("gensub result2 %.30s too big; can't happen", buf);
2321 		*pb = '\0';
2322 		setsval(res, buf);
2323 		pfa->initstat = tempstat;
2324 	}
2325 	tempfree(x);
2326 	tempfree(y);
2327 	free(buf);
2328 	return(res);
2329 }
2330 
2331 void backsub(char **pb_ptr, const char **sptr_ptr)	/* handle \\& variations */
2332 {						/* sptr[0] == '\\' */
2333 	char *pb = *pb_ptr;
2334 	const char *sptr = *sptr_ptr;
2335 	static bool first = true;
2336 	static bool do_posix = false;
2337 
2338 	if (first) {
2339 		first = false;
2340 		do_posix = (getenv("POSIXLY_CORRECT") != NULL);
2341 	}
2342 
2343 	if (sptr[1] == '\\') {
2344 		if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */
2345 			*pb++ = '\\';
2346 			*pb++ = '&';
2347 			sptr += 4;
2348 		} else if (sptr[2] == '&') {	/* \\& -> \ + matched */
2349 			*pb++ = '\\';
2350 			sptr += 2;
2351 		} else if (do_posix) {		/* \\x -> \x */
2352 			sptr++;
2353 			*pb++ = *sptr++;
2354 		} else {			/* \\x -> \\x */
2355 			*pb++ = *sptr++;
2356 			*pb++ = *sptr++;
2357 		}
2358 	} else if (sptr[1] == '&') {	/* literal & */
2359 		sptr++;
2360 		*pb++ = *sptr++;
2361 	} else				/* literal \ */
2362 		*pb++ = *sptr++;
2363 
2364 	*pb_ptr = pb;
2365 	*sptr_ptr = sptr;
2366 }
2367