xref: /freebsd/contrib/one-true-awk/run.c (revision 5eb61f6c6549f134a4f3bed4c164345d4f616bad)
1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
4 
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
14 
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
24 
25 #include <sys/cdefs.h>
26 __FBSDID("$FreeBSD$");
27 
28 #define DEBUG
29 #include <stdio.h>
30 #include <ctype.h>
31 #include <errno.h>
32 #include <wchar.h>
33 #include <wctype.h>
34 #include <fcntl.h>
35 #include <setjmp.h>
36 #include <limits.h>
37 #include <math.h>
38 #include <string.h>
39 #include <stdlib.h>
40 #include <time.h>
41 #include <sys/types.h>
42 #include <sys/wait.h>
43 #include "awk.h"
44 #include "awkgram.tab.h"
45 
46 static void stdinit(void);
47 static void flush_all(void);
48 
49 #if 1
50 #define tempfree(x)	do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0)
51 #else
52 void tempfree(Cell *p) {
53 	if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) {
54 		WARNING("bad csub %d in Cell %d %s",
55 			p->csub, p->ctype, p->sval);
56 	}
57 	if (istemp(p))
58 		tfree(p);
59 }
60 #endif
61 
62 /* do we really need these? */
63 /* #ifdef _NFILE */
64 /* #ifndef FOPEN_MAX */
65 /* #define FOPEN_MAX _NFILE */
66 /* #endif */
67 /* #endif */
68 /*  */
69 /* #ifndef	FOPEN_MAX */
70 /* #define	FOPEN_MAX	40 */	/* max number of open files */
71 /* #endif */
72 /*  */
73 /* #ifndef RAND_MAX */
74 /* #define RAND_MAX	32767 */	/* all that ansi guarantees */
75 /* #endif */
76 
77 jmp_buf env;
78 extern	int	pairstack[];
79 extern	Awkfloat	srand_seed;
80 
81 Node	*winner = NULL;	/* root of parse tree */
82 Cell	*tmps;		/* free temporary cells for execution */
83 
84 static Cell	truecell	={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL, NULL };
85 Cell	*True	= &truecell;
86 static Cell	falsecell	={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL, NULL };
87 Cell	*False	= &falsecell;
88 static Cell	breakcell	={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL, NULL };
89 Cell	*jbreak	= &breakcell;
90 static Cell	contcell	={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL, NULL };
91 Cell	*jcont	= &contcell;
92 static Cell	nextcell	={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL, NULL };
93 Cell	*jnext	= &nextcell;
94 static Cell	nextfilecell	={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL, NULL };
95 Cell	*jnextfile	= &nextfilecell;
96 static Cell	exitcell	={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL, NULL };
97 Cell	*jexit	= &exitcell;
98 static Cell	retcell		={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL, NULL };
99 Cell	*jret	= &retcell;
100 static Cell	tempcell	={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL };
101 
102 Node	*curnode = NULL;	/* the node being executed, for debugging */
103 
104 /* buffer memory management */
105 int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr,
106 	const char *whatrtn)
107 /* pbuf:    address of pointer to buffer being managed
108  * psiz:    address of buffer size variable
109  * minlen:  minimum length of buffer needed
110  * quantum: buffer size quantum
111  * pbptr:   address of movable pointer into buffer, or 0 if none
112  * whatrtn: name of the calling routine if failure should cause fatal error
113  *
114  * return   0 for realloc failure, !=0 for success
115  */
116 {
117 	if (minlen > *psiz) {
118 		char *tbuf;
119 		int rminlen = quantum ? minlen % quantum : 0;
120 		int boff = pbptr ? *pbptr - *pbuf : 0;
121 		/* round up to next multiple of quantum */
122 		if (rminlen)
123 			minlen += quantum - rminlen;
124 		tbuf = (char *) realloc(*pbuf, minlen);
125 		DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void*)*pbuf, (void*)tbuf);
126 		if (tbuf == NULL) {
127 			if (whatrtn)
128 				FATAL("out of memory in %s", whatrtn);
129 			return 0;
130 		}
131 		*pbuf = tbuf;
132 		*psiz = minlen;
133 		if (pbptr)
134 			*pbptr = tbuf + boff;
135 	}
136 	return 1;
137 }
138 
139 void run(Node *a)	/* execution of parse tree starts here */
140 {
141 
142 	stdinit();
143 	execute(a);
144 	closeall();
145 }
146 
147 Cell *execute(Node *u)	/* execute a node of the parse tree */
148 {
149 	Cell *(*proc)(Node **, int);
150 	Cell *x;
151 	Node *a;
152 
153 	if (u == NULL)
154 		return(True);
155 	for (a = u; ; a = a->nnext) {
156 		curnode = a;
157 		if (isvalue(a)) {
158 			x = (Cell *) (a->narg[0]);
159 			if (isfld(x) && !donefld)
160 				fldbld();
161 			else if (isrec(x) && !donerec)
162 				recbld();
163 			return(x);
164 		}
165 		if (notlegal(a->nobj))	/* probably a Cell* but too risky to print */
166 			FATAL("illegal statement");
167 		proc = proctab[a->nobj-FIRSTTOKEN];
168 		x = (*proc)(a->narg, a->nobj);
169 		if (isfld(x) && !donefld)
170 			fldbld();
171 		else if (isrec(x) && !donerec)
172 			recbld();
173 		if (isexpr(a))
174 			return(x);
175 		if (isjump(x))
176 			return(x);
177 		if (a->nnext == NULL)
178 			return(x);
179 		tempfree(x);
180 	}
181 }
182 
183 
184 Cell *program(Node **a, int n)	/* execute an awk program */
185 {				/* a[0] = BEGIN, a[1] = body, a[2] = END */
186 	Cell *x;
187 
188 	if (setjmp(env) != 0)
189 		goto ex;
190 	if (a[0]) {		/* BEGIN */
191 		x = execute(a[0]);
192 		if (isexit(x))
193 			return(True);
194 		if (isjump(x))
195 			FATAL("illegal break, continue, next or nextfile from BEGIN");
196 		tempfree(x);
197 	}
198 	if (a[1] || a[2])
199 		while (getrec(&record, &recsize, true) > 0) {
200 			x = execute(a[1]);
201 			if (isexit(x))
202 				break;
203 			tempfree(x);
204 		}
205   ex:
206 	if (setjmp(env) != 0)	/* handles exit within END */
207 		goto ex1;
208 	if (a[2]) {		/* END */
209 		x = execute(a[2]);
210 		if (isbreak(x) || isnext(x) || iscont(x))
211 			FATAL("illegal break, continue, next or nextfile from END");
212 		tempfree(x);
213 	}
214   ex1:
215 	return(True);
216 }
217 
218 struct Frame {	/* stack frame for awk function calls */
219 	int nargs;	/* number of arguments in this call */
220 	Cell *fcncell;	/* pointer to Cell for function */
221 	Cell **args;	/* pointer to array of arguments after execute */
222 	Cell *retval;	/* return value */
223 };
224 
225 #define	NARGS	50	/* max args in a call */
226 
227 struct Frame *frame = NULL;	/* base of stack frames; dynamically allocated */
228 int	nframe = 0;		/* number of frames allocated */
229 struct Frame *frp = NULL;	/* frame pointer. bottom level unused */
230 
231 Cell *call(Node **a, int n)	/* function call.  very kludgy and fragile */
232 {
233 	static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL };
234 	int i, ncall, ndef;
235 	int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */
236 	Node *x;
237 	Cell *args[NARGS], *oargs[NARGS];	/* BUG: fixed size arrays */
238 	Cell *y, *z, *fcn;
239 	char *s;
240 
241 	fcn = execute(a[0]);	/* the function itself */
242 	s = fcn->nval;
243 	if (!isfcn(fcn))
244 		FATAL("calling undefined function %s", s);
245 	if (frame == NULL) {
246 		frp = frame = (struct Frame *) calloc(nframe += 100, sizeof(*frame));
247 		if (frame == NULL)
248 			FATAL("out of space for stack frames calling %s", s);
249 	}
250 	for (ncall = 0, x = a[1]; x != NULL; x = x->nnext)	/* args in call */
251 		ncall++;
252 	ndef = (int) fcn->fval;			/* args in defn */
253 	DPRINTF("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame));
254 	if (ncall > ndef)
255 		WARNING("function %s called with %d args, uses only %d",
256 			s, ncall, ndef);
257 	if (ncall + ndef > NARGS)
258 		FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS);
259 	for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) {	/* get call args */
260 		DPRINTF("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame));
261 		y = execute(x);
262 		oargs[i] = y;
263 		DPRINTF("args[%d]: %s %f <%s>, t=%o\n",
264 			i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval);
265 		if (isfcn(y))
266 			FATAL("can't use function %s as argument in %s", y->nval, s);
267 		if (isarr(y))
268 			args[i] = y;	/* arrays by ref */
269 		else
270 			args[i] = copycell(y);
271 		tempfree(y);
272 	}
273 	for ( ; i < ndef; i++) {	/* add null args for ones not provided */
274 		args[i] = gettemp();
275 		*args[i] = newcopycell;
276 	}
277 	frp++;	/* now ok to up frame */
278 	if (frp >= frame + nframe) {
279 		int dfp = frp - frame;	/* old index */
280 		frame = (struct Frame *) realloc(frame, (nframe += 100) * sizeof(*frame));
281 		if (frame == NULL)
282 			FATAL("out of space for stack frames in %s", s);
283 		frp = frame + dfp;
284 	}
285 	frp->fcncell = fcn;
286 	frp->args = args;
287 	frp->nargs = ndef;	/* number defined with (excess are locals) */
288 	frp->retval = gettemp();
289 
290 	DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame));
291 	y = execute((Node *)(fcn->sval));	/* execute body */
292 	DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame));
293 
294 	for (i = 0; i < ndef; i++) {
295 		Cell *t = frp->args[i];
296 		if (isarr(t)) {
297 			if (t->csub == CCOPY) {
298 				if (i >= ncall) {
299 					freesymtab(t);
300 					t->csub = CTEMP;
301 					tempfree(t);
302 				} else {
303 					oargs[i]->tval = t->tval;
304 					oargs[i]->tval &= ~(STR|NUM|DONTFREE);
305 					oargs[i]->sval = t->sval;
306 					tempfree(t);
307 				}
308 			}
309 		} else if (t != y) {	/* kludge to prevent freeing twice */
310 			t->csub = CTEMP;
311 			tempfree(t);
312 		} else if (t == y && t->csub == CCOPY) {
313 			t->csub = CTEMP;
314 			tempfree(t);
315 			freed = 1;
316 		}
317 	}
318 	tempfree(fcn);
319 	if (isexit(y) || isnext(y))
320 		return y;
321 	if (freed == 0) {
322 		tempfree(y);	/* don't free twice! */
323 	}
324 	z = frp->retval;			/* return value */
325 	DPRINTF("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval);
326 	frp--;
327 	return(z);
328 }
329 
330 Cell *copycell(Cell *x)	/* make a copy of a cell in a temp */
331 {
332 	Cell *y;
333 
334 	/* copy is not constant or field */
335 
336 	y = gettemp();
337 	y->tval = x->tval & ~(CON|FLD|REC);
338 	y->csub = CCOPY;	/* prevents freeing until call is over */
339 	y->nval = x->nval;	/* BUG? */
340 	if (isstr(x) /* || x->ctype == OCELL */) {
341 		y->sval = tostring(x->sval);
342 		y->tval &= ~DONTFREE;
343 	} else
344 		y->tval |= DONTFREE;
345 	y->fval = x->fval;
346 	return y;
347 }
348 
349 Cell *arg(Node **a, int n)	/* nth argument of a function */
350 {
351 
352 	n = ptoi(a[0]);	/* argument number, counting from 0 */
353 	DPRINTF("arg(%d), frp->nargs=%d\n", n, frp->nargs);
354 	if (n+1 > frp->nargs)
355 		FATAL("argument #%d of function %s was not supplied",
356 			n+1, frp->fcncell->nval);
357 	return frp->args[n];
358 }
359 
360 Cell *jump(Node **a, int n)	/* break, continue, next, nextfile, return */
361 {
362 	Cell *y;
363 
364 	switch (n) {
365 	case EXIT:
366 		if (a[0] != NULL) {
367 			y = execute(a[0]);
368 			errorflag = (int) getfval(y);
369 			tempfree(y);
370 		}
371 		longjmp(env, 1);
372 	case RETURN:
373 		if (a[0] != NULL) {
374 			y = execute(a[0]);
375 			if ((y->tval & (STR|NUM)) == (STR|NUM)) {
376 				setsval(frp->retval, getsval(y));
377 				frp->retval->fval = getfval(y);
378 				frp->retval->tval |= NUM;
379 			}
380 			else if (y->tval & STR)
381 				setsval(frp->retval, getsval(y));
382 			else if (y->tval & NUM)
383 				setfval(frp->retval, getfval(y));
384 			else		/* can't happen */
385 				FATAL("bad type variable %d", y->tval);
386 			tempfree(y);
387 		}
388 		return(jret);
389 	case NEXT:
390 		return(jnext);
391 	case NEXTFILE:
392 		nextfile();
393 		return(jnextfile);
394 	case BREAK:
395 		return(jbreak);
396 	case CONTINUE:
397 		return(jcont);
398 	default:	/* can't happen */
399 		FATAL("illegal jump type %d", n);
400 	}
401 	return 0;	/* not reached */
402 }
403 
404 Cell *awkgetline(Node **a, int n)	/* get next line from specific input */
405 {		/* a[0] is variable, a[1] is operator, a[2] is filename */
406 	Cell *r, *x;
407 	extern Cell **fldtab;
408 	FILE *fp;
409 	char *buf;
410 	int bufsize = recsize;
411 	int mode;
412 	bool newflag;
413 	double result;
414 
415 	if ((buf = (char *) malloc(bufsize)) == NULL)
416 		FATAL("out of memory in getline");
417 
418 	fflush(stdout);	/* in case someone is waiting for a prompt */
419 	r = gettemp();
420 	if (a[1] != NULL) {		/* getline < file */
421 		x = execute(a[2]);		/* filename */
422 		mode = ptoi(a[1]);
423 		if (mode == '|')		/* input pipe */
424 			mode = LE;	/* arbitrary flag */
425 		fp = openfile(mode, getsval(x), &newflag);
426 		tempfree(x);
427 		if (fp == NULL)
428 			n = -1;
429 		else
430 			n = readrec(&buf, &bufsize, fp, newflag);
431 		if (n <= 0) {
432 			;
433 		} else if (a[0] != NULL) {	/* getline var <file */
434 			x = execute(a[0]);
435 			setsval(x, buf);
436 			if (is_number(x->sval, & result)) {
437 				x->fval = result;
438 				x->tval |= NUM;
439 			}
440 			tempfree(x);
441 		} else {			/* getline <file */
442 			setsval(fldtab[0], buf);
443 			if (is_number(fldtab[0]->sval, & result)) {
444 				fldtab[0]->fval = result;
445 				fldtab[0]->tval |= NUM;
446 			}
447 		}
448 	} else {			/* bare getline; use current input */
449 		if (a[0] == NULL)	/* getline */
450 			n = getrec(&record, &recsize, true);
451 		else {			/* getline var */
452 			n = getrec(&buf, &bufsize, false);
453 			x = execute(a[0]);
454 			setsval(x, buf);
455 			if (is_number(x->sval, & result)) {
456 				x->fval = result;
457 				x->tval |= NUM;
458 			}
459 			tempfree(x);
460 		}
461 	}
462 	setfval(r, (Awkfloat) n);
463 	free(buf);
464 	return r;
465 }
466 
467 Cell *getnf(Node **a, int n)	/* get NF */
468 {
469 	if (!donefld)
470 		fldbld();
471 	return (Cell *) a[0];
472 }
473 
474 static char *
475 makearraystring(Node *p, const char *func)
476 {
477 	char *buf;
478 	int bufsz = recsize;
479 	size_t blen;
480 
481 	if ((buf = (char *) malloc(bufsz)) == NULL) {
482 		FATAL("%s: out of memory", func);
483 	}
484 
485 	blen = 0;
486 	buf[blen] = '\0';
487 
488 	for (; p; p = p->nnext) {
489 		Cell *x = execute(p);	/* expr */
490 		char *s = getsval(x);
491 		size_t seplen = strlen(getsval(subseploc));
492 		size_t nsub = p->nnext ? seplen : 0;
493 		size_t slen = strlen(s);
494 		size_t tlen = blen + slen + nsub;
495 
496 		if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) {
497 			FATAL("%s: out of memory %s[%s...]",
498 			    func, x->nval, buf);
499 		}
500 		memcpy(buf + blen, s, slen);
501 		if (nsub) {
502 			memcpy(buf + blen + slen, *SUBSEP, nsub);
503 		}
504 		buf[tlen] = '\0';
505 		blen = tlen;
506 		tempfree(x);
507 	}
508 	return buf;
509 }
510 
511 Cell *array(Node **a, int n)	/* a[0] is symtab, a[1] is list of subscripts */
512 {
513 	Cell *x, *z;
514 	char *buf;
515 
516 	x = execute(a[0]);	/* Cell* for symbol table */
517 	buf = makearraystring(a[1], __func__);
518 	if (!isarr(x)) {
519 		DPRINTF("making %s into an array\n", NN(x->nval));
520 		if (freeable(x))
521 			xfree(x->sval);
522 		x->tval &= ~(STR|NUM|DONTFREE);
523 		x->tval |= ARR;
524 		x->sval = (char *) makesymtab(NSYMTAB);
525 	}
526 	z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval);
527 	z->ctype = OCELL;
528 	z->csub = CVAR;
529 	tempfree(x);
530 	free(buf);
531 	return(z);
532 }
533 
534 Cell *awkdelete(Node **a, int n)	/* a[0] is symtab, a[1] is list of subscripts */
535 {
536 	Cell *x;
537 
538 	x = execute(a[0]);	/* Cell* for symbol table */
539 	if (x == symtabloc) {
540 		FATAL("cannot delete SYMTAB or its elements");
541 	}
542 	if (!isarr(x))
543 		return True;
544 	if (a[1] == NULL) {	/* delete the elements, not the table */
545 		freesymtab(x);
546 		x->tval &= ~STR;
547 		x->tval |= ARR;
548 		x->sval = (char *) makesymtab(NSYMTAB);
549 	} else {
550 		char *buf = makearraystring(a[1], __func__);
551 		freeelem(x, buf);
552 		free(buf);
553 	}
554 	tempfree(x);
555 	return True;
556 }
557 
558 Cell *intest(Node **a, int n)	/* a[0] is index (list), a[1] is symtab */
559 {
560 	Cell *ap, *k;
561 	char *buf;
562 
563 	ap = execute(a[1]);	/* array name */
564 	if (!isarr(ap)) {
565 		DPRINTF("making %s into an array\n", ap->nval);
566 		if (freeable(ap))
567 			xfree(ap->sval);
568 		ap->tval &= ~(STR|NUM|DONTFREE);
569 		ap->tval |= ARR;
570 		ap->sval = (char *) makesymtab(NSYMTAB);
571 	}
572 	buf = makearraystring(a[0], __func__);
573 	k = lookup(buf, (Array *) ap->sval);
574 	tempfree(ap);
575 	free(buf);
576 	if (k == NULL)
577 		return(False);
578 	else
579 		return(True);
580 }
581 
582 
583 Cell *matchop(Node **a, int n)	/* ~ and match() */
584 {
585 	Cell *x, *y;
586 	char *s, *t;
587 	int i;
588 	fa *pfa;
589 	int (*mf)(fa *, const char *) = match, mode = 0;
590 
591 	if (n == MATCHFCN) {
592 		mf = pmatch;
593 		mode = 1;
594 	}
595 	x = execute(a[1]);	/* a[1] = target text */
596 	s = getsval(x);
597 	if (a[0] == NULL)	/* a[1] == 0: already-compiled reg expr */
598 		i = (*mf)((fa *) a[2], s);
599 	else {
600 		y = execute(a[2]);	/* a[2] = regular expr */
601 		t = getsval(y);
602 		pfa = makedfa(t, mode);
603 		i = (*mf)(pfa, s);
604 		tempfree(y);
605 	}
606 	tempfree(x);
607 	if (n == MATCHFCN) {
608 		int start = patbeg - s + 1;
609 		if (patlen < 0)
610 			start = 0;
611 		setfval(rstartloc, (Awkfloat) start);
612 		setfval(rlengthloc, (Awkfloat) patlen);
613 		x = gettemp();
614 		x->tval = NUM;
615 		x->fval = start;
616 		return x;
617 	} else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0))
618 		return(True);
619 	else
620 		return(False);
621 }
622 
623 
624 Cell *boolop(Node **a, int n)	/* a[0] || a[1], a[0] && a[1], !a[0] */
625 {
626 	Cell *x, *y;
627 	int i;
628 
629 	x = execute(a[0]);
630 	i = istrue(x);
631 	tempfree(x);
632 	switch (n) {
633 	case BOR:
634 		if (i) return(True);
635 		y = execute(a[1]);
636 		i = istrue(y);
637 		tempfree(y);
638 		if (i) return(True);
639 		else return(False);
640 	case AND:
641 		if ( !i ) return(False);
642 		y = execute(a[1]);
643 		i = istrue(y);
644 		tempfree(y);
645 		if (i) return(True);
646 		else return(False);
647 	case NOT:
648 		if (i) return(False);
649 		else return(True);
650 	default:	/* can't happen */
651 		FATAL("unknown boolean operator %d", n);
652 	}
653 	return 0;	/*NOTREACHED*/
654 }
655 
656 Cell *relop(Node **a, int n)	/* a[0 < a[1], etc. */
657 {
658 	int i;
659 	Cell *x, *y;
660 	Awkfloat j;
661 
662 	x = execute(a[0]);
663 	y = execute(a[1]);
664 	if (x->tval&NUM && y->tval&NUM) {
665 		j = x->fval - y->fval;
666 		i = j<0? -1: (j>0? 1: 0);
667 	} else {
668 		i = strcoll(getsval(x), getsval(y));
669 	}
670 	tempfree(x);
671 	tempfree(y);
672 	switch (n) {
673 	case LT:	if (i<0) return(True);
674 			else return(False);
675 	case LE:	if (i<=0) return(True);
676 			else return(False);
677 	case NE:	if (i!=0) return(True);
678 			else return(False);
679 	case EQ:	if (i == 0) return(True);
680 			else return(False);
681 	case GE:	if (i>=0) return(True);
682 			else return(False);
683 	case GT:	if (i>0) return(True);
684 			else return(False);
685 	default:	/* can't happen */
686 		FATAL("unknown relational operator %d", n);
687 	}
688 	return 0;	/*NOTREACHED*/
689 }
690 
691 void tfree(Cell *a)	/* free a tempcell */
692 {
693 	if (freeable(a)) {
694 		DPRINTF("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval);
695 		xfree(a->sval);
696 	}
697 	if (a == tmps)
698 		FATAL("tempcell list is curdled");
699 	a->cnext = tmps;
700 	tmps = a;
701 }
702 
703 Cell *gettemp(void)	/* get a tempcell */
704 {	int i;
705 	Cell *x;
706 
707 	if (!tmps) {
708 		tmps = (Cell *) calloc(100, sizeof(*tmps));
709 		if (!tmps)
710 			FATAL("out of space for temporaries");
711 		for (i = 1; i < 100; i++)
712 			tmps[i-1].cnext = &tmps[i];
713 		tmps[i-1].cnext = NULL;
714 	}
715 	x = tmps;
716 	tmps = x->cnext;
717 	*x = tempcell;
718 	return(x);
719 }
720 
721 Cell *indirect(Node **a, int n)	/* $( a[0] ) */
722 {
723 	Awkfloat val;
724 	Cell *x;
725 	int m;
726 	char *s;
727 
728 	x = execute(a[0]);
729 	val = getfval(x);	/* freebsd: defend against super large field numbers */
730 	if ((Awkfloat)INT_MAX < val)
731 		FATAL("trying to access out of range field %s", x->nval);
732 	m = (int) val;
733 	if (m == 0 && !is_number(s = getsval(x), NULL))	/* suspicion! */
734 		FATAL("illegal field $(%s), name \"%s\"", s, x->nval);
735 		/* BUG: can x->nval ever be null??? */
736 	tempfree(x);
737 	x = fieldadr(m);
738 	x->ctype = OCELL;	/* BUG?  why are these needed? */
739 	x->csub = CFLD;
740 	return(x);
741 }
742 
743 Cell *substr(Node **a, int nnn)		/* substr(a[0], a[1], a[2]) */
744 {
745 	int k, m, n;
746 	char *s;
747 	int temp;
748 	Cell *x, *y, *z = NULL;
749 
750 	x = execute(a[0]);
751 	y = execute(a[1]);
752 	if (a[2] != NULL)
753 		z = execute(a[2]);
754 	s = getsval(x);
755 	k = strlen(s) + 1;
756 	if (k <= 1) {
757 		tempfree(x);
758 		tempfree(y);
759 		if (a[2] != NULL) {
760 			tempfree(z);
761 		}
762 		x = gettemp();
763 		setsval(x, "");
764 		return(x);
765 	}
766 	m = (int) getfval(y);
767 	if (m <= 0)
768 		m = 1;
769 	else if (m > k)
770 		m = k;
771 	tempfree(y);
772 	if (a[2] != NULL) {
773 		n = (int) getfval(z);
774 		tempfree(z);
775 	} else
776 		n = k - 1;
777 	if (n < 0)
778 		n = 0;
779 	else if (n > k - m)
780 		n = k - m;
781 	DPRINTF("substr: m=%d, n=%d, s=%s\n", m, n, s);
782 	y = gettemp();
783 	temp = s[n+m-1];	/* with thanks to John Linderman */
784 	s[n+m-1] = '\0';
785 	setsval(y, s + m - 1);
786 	s[n+m-1] = temp;
787 	tempfree(x);
788 	return(y);
789 }
790 
791 Cell *sindex(Node **a, int nnn)		/* index(a[0], a[1]) */
792 {
793 	Cell *x, *y, *z;
794 	char *s1, *s2, *p1, *p2, *q;
795 	Awkfloat v = 0.0;
796 
797 	x = execute(a[0]);
798 	s1 = getsval(x);
799 	y = execute(a[1]);
800 	s2 = getsval(y);
801 
802 	z = gettemp();
803 	for (p1 = s1; *p1 != '\0'; p1++) {
804 		for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++)
805 			continue;
806 		if (*p2 == '\0') {
807 			v = (Awkfloat) (p1 - s1 + 1);	/* origin 1 */
808 			break;
809 		}
810 	}
811 	tempfree(x);
812 	tempfree(y);
813 	setfval(z, v);
814 	return(z);
815 }
816 
817 #define	MAXNUMSIZE	50
818 
819 int format(char **pbuf, int *pbufsize, const char *s, Node *a)	/* printf-like conversions */
820 {
821 	char *fmt;
822 	char *p, *t;
823 	const char *os;
824 	Cell *x;
825 	int flag = 0, n;
826 	int fmtwd; /* format width */
827 	int fmtsz = recsize;
828 	char *buf = *pbuf;
829 	int bufsize = *pbufsize;
830 #define FMTSZ(a)   (fmtsz - ((a) - fmt))
831 #define BUFSZ(a)   (bufsize - ((a) - buf))
832 
833 	static bool first = true;
834 	static bool have_a_format = false;
835 
836 	if (first) {
837 		char xbuf[100];
838 
839 		snprintf(xbuf, sizeof(xbuf), "%a", 42.0);
840 		have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0);
841 		first = false;
842 	}
843 
844 	os = s;
845 	p = buf;
846 	if ((fmt = (char *) malloc(fmtsz)) == NULL)
847 		FATAL("out of memory in format()");
848 	while (*s) {
849 		adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1");
850 		if (*s != '%') {
851 			*p++ = *s++;
852 			continue;
853 		}
854 		if (*(s+1) == '%') {
855 			*p++ = '%';
856 			s += 2;
857 			continue;
858 		}
859 		/* have to be real careful in case this is a huge number, eg, %100000d */
860 		fmtwd = atoi(s+1);
861 		if (fmtwd < 0)
862 			fmtwd = -fmtwd;
863 		adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2");
864 		for (t = fmt; (*t++ = *s) != '\0'; s++) {
865 			if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3"))
866 				FATAL("format item %.30s... ran format() out of memory", os);
867 			/* Ignore size specifiers */
868 			if (strchr("hjLlqtz", *s) != NULL) {	/* the ansi panoply */
869 				t--;
870 				continue;
871 			}
872 			if (isalpha((uschar)*s))
873 				break;
874 			if (*s == '$') {
875 				FATAL("'$' not permitted in awk formats");
876 			}
877 			if (*s == '*') {
878 				if (a == NULL) {
879 					FATAL("not enough args in printf(%s)", os);
880 				}
881 				x = execute(a);
882 				a = a->nnext;
883 				snprintf(t - 1, FMTSZ(t - 1),
884 				    "%d", fmtwd=(int) getfval(x));
885 				if (fmtwd < 0)
886 					fmtwd = -fmtwd;
887 				adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format");
888 				t = fmt + strlen(fmt);
889 				tempfree(x);
890 			}
891 		}
892 		*t = '\0';
893 		if (fmtwd < 0)
894 			fmtwd = -fmtwd;
895 		adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4");
896 		switch (*s) {
897 		case 'a': case 'A':
898 			if (have_a_format)
899 				flag = *s;
900 			else
901 				flag = 'f';
902 			break;
903 		case 'f': case 'e': case 'g': case 'E': case 'G':
904 			flag = 'f';
905 			break;
906 		case 'd': case 'i': case 'o': case 'x': case 'X': case 'u':
907 			flag = (*s == 'd' || *s == 'i') ? 'd' : 'u';
908 			*(t-1) = 'j';
909 			*t = *s;
910 			*++t = '\0';
911 			break;
912 		case 's':
913 			flag = 's';
914 			break;
915 		case 'c':
916 			flag = 'c';
917 			break;
918 		default:
919 			WARNING("weird printf conversion %s", fmt);
920 			flag = '?';
921 			break;
922 		}
923 		if (a == NULL)
924 			FATAL("not enough args in printf(%s)", os);
925 		x = execute(a);
926 		a = a->nnext;
927 		n = MAXNUMSIZE;
928 		if (fmtwd > n)
929 			n = fmtwd;
930 		adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5");
931 		switch (flag) {
932 		case '?':	snprintf(p, BUFSZ(p), "%s", fmt);	/* unknown, so dump it too */
933 			t = getsval(x);
934 			n = strlen(t);
935 			if (fmtwd > n)
936 				n = fmtwd;
937 			adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6");
938 			p += strlen(p);
939 			snprintf(p, BUFSZ(p), "%s", t);
940 			break;
941 		case 'a':
942 		case 'A':
943 		case 'f':	snprintf(p, BUFSZ(p), fmt, getfval(x)); break;
944 		case 'd':	snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break;
945 		case 'u':	snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break;
946 		case 's':
947 			t = getsval(x);
948 			n = strlen(t);
949 			if (fmtwd > n)
950 				n = fmtwd;
951 			if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7"))
952 				FATAL("huge string/format (%d chars) in printf %.30s... ran format() out of memory", n, t);
953 			snprintf(p, BUFSZ(p), fmt, t);
954 			break;
955 		case 'c':
956 			if (isnum(x)) {
957 				if ((int)getfval(x))
958 					snprintf(p, BUFSZ(p), fmt, (int) getfval(x));
959 				else {
960 					*p++ = '\0'; /* explicit null byte */
961 					*p = '\0';   /* next output will start here */
962 				}
963 			} else
964 				snprintf(p, BUFSZ(p), fmt, getsval(x)[0]);
965 			break;
966 		default:
967 			FATAL("can't happen: bad conversion %c in format()", flag);
968 		}
969 		tempfree(x);
970 		p += strlen(p);
971 		s++;
972 	}
973 	*p = '\0';
974 	free(fmt);
975 	for ( ; a; a = a->nnext)		/* evaluate any remaining args */
976 		execute(a);
977 	*pbuf = buf;
978 	*pbufsize = bufsize;
979 	return p - buf;
980 }
981 
982 Cell *awksprintf(Node **a, int n)		/* sprintf(a[0]) */
983 {
984 	Cell *x;
985 	Node *y;
986 	char *buf;
987 	int bufsz=3*recsize;
988 
989 	if ((buf = (char *) malloc(bufsz)) == NULL)
990 		FATAL("out of memory in awksprintf");
991 	y = a[0]->nnext;
992 	x = execute(a[0]);
993 	if (format(&buf, &bufsz, getsval(x), y) == -1)
994 		FATAL("sprintf string %.30s... too long.  can't happen.", buf);
995 	tempfree(x);
996 	x = gettemp();
997 	x->sval = buf;
998 	x->tval = STR;
999 	return(x);
1000 }
1001 
1002 Cell *awkprintf(Node **a, int n)		/* printf */
1003 {	/* a[0] is list of args, starting with format string */
1004 	/* a[1] is redirection operator, a[2] is redirection file */
1005 	FILE *fp;
1006 	Cell *x;
1007 	Node *y;
1008 	char *buf;
1009 	int len;
1010 	int bufsz=3*recsize;
1011 
1012 	if ((buf = (char *) malloc(bufsz)) == NULL)
1013 		FATAL("out of memory in awkprintf");
1014 	y = a[0]->nnext;
1015 	x = execute(a[0]);
1016 	if ((len = format(&buf, &bufsz, getsval(x), y)) == -1)
1017 		FATAL("printf string %.30s... too long.  can't happen.", buf);
1018 	tempfree(x);
1019 	if (a[1] == NULL) {
1020 		/* fputs(buf, stdout); */
1021 		fwrite(buf, len, 1, stdout);
1022 		if (ferror(stdout))
1023 			FATAL("write error on stdout");
1024 	} else {
1025 		fp = redirect(ptoi(a[1]), a[2]);
1026 		/* fputs(buf, fp); */
1027 		fwrite(buf, len, 1, fp);
1028 		fflush(fp);
1029 		if (ferror(fp))
1030 			FATAL("write error on %s", filename(fp));
1031 	}
1032 	free(buf);
1033 	return(True);
1034 }
1035 
1036 Cell *arith(Node **a, int n)	/* a[0] + a[1], etc.  also -a[0] */
1037 {
1038 	Awkfloat i, j = 0;
1039 	double v;
1040 	Cell *x, *y, *z;
1041 
1042 	x = execute(a[0]);
1043 	i = getfval(x);
1044 	tempfree(x);
1045 	if (n != UMINUS && n != UPLUS) {
1046 		y = execute(a[1]);
1047 		j = getfval(y);
1048 		tempfree(y);
1049 	}
1050 	z = gettemp();
1051 	switch (n) {
1052 	case ADD:
1053 		i += j;
1054 		break;
1055 	case MINUS:
1056 		i -= j;
1057 		break;
1058 	case MULT:
1059 		i *= j;
1060 		break;
1061 	case DIVIDE:
1062 		if (j == 0)
1063 			FATAL("division by zero");
1064 		i /= j;
1065 		break;
1066 	case MOD:
1067 		if (j == 0)
1068 			FATAL("division by zero in mod");
1069 		modf(i/j, &v);
1070 		i = i - j * v;
1071 		break;
1072 	case UMINUS:
1073 		i = -i;
1074 		break;
1075 	case UPLUS: /* handled by getfval(), above */
1076 		break;
1077 	case POWER:
1078 		if (j >= 0 && modf(j, &v) == 0.0)	/* pos integer exponent */
1079 			i = ipow(i, (int) j);
1080                else {
1081 			errno = 0;
1082 			i = errcheck(pow(i, j), "pow");
1083                }
1084 		break;
1085 	default:	/* can't happen */
1086 		FATAL("illegal arithmetic operator %d", n);
1087 	}
1088 	setfval(z, i);
1089 	return(z);
1090 }
1091 
1092 double ipow(double x, int n)	/* x**n.  ought to be done by pow, but isn't always */
1093 {
1094 	double v;
1095 
1096 	if (n <= 0)
1097 		return 1;
1098 	v = ipow(x, n/2);
1099 	if (n % 2 == 0)
1100 		return v * v;
1101 	else
1102 		return x * v * v;
1103 }
1104 
1105 Cell *incrdecr(Node **a, int n)		/* a[0]++, etc. */
1106 {
1107 	Cell *x, *z;
1108 	int k;
1109 	Awkfloat xf;
1110 
1111 	x = execute(a[0]);
1112 	xf = getfval(x);
1113 	k = (n == PREINCR || n == POSTINCR) ? 1 : -1;
1114 	if (n == PREINCR || n == PREDECR) {
1115 		setfval(x, xf + k);
1116 		return(x);
1117 	}
1118 	z = gettemp();
1119 	setfval(z, xf);
1120 	setfval(x, xf + k);
1121 	tempfree(x);
1122 	return(z);
1123 }
1124 
1125 Cell *assign(Node **a, int n)	/* a[0] = a[1], a[0] += a[1], etc. */
1126 {		/* this is subtle; don't muck with it. */
1127 	Cell *x, *y;
1128 	Awkfloat xf, yf;
1129 	double v;
1130 
1131 	y = execute(a[1]);
1132 	x = execute(a[0]);
1133 	if (n == ASSIGN) {	/* ordinary assignment */
1134 		if (x == y && !(x->tval & (FLD|REC)) && x != nfloc)
1135 			;	/* self-assignment: leave alone unless it's a field or NF */
1136 		else if ((y->tval & (STR|NUM)) == (STR|NUM)) {
1137 			setsval(x, getsval(y));
1138 			x->fval = getfval(y);
1139 			x->tval |= NUM;
1140 		}
1141 		else if (isstr(y))
1142 			setsval(x, getsval(y));
1143 		else if (isnum(y))
1144 			setfval(x, getfval(y));
1145 		else
1146 			funnyvar(y, "read value of");
1147 		tempfree(y);
1148 		return(x);
1149 	}
1150 	xf = getfval(x);
1151 	yf = getfval(y);
1152 	switch (n) {
1153 	case ADDEQ:
1154 		xf += yf;
1155 		break;
1156 	case SUBEQ:
1157 		xf -= yf;
1158 		break;
1159 	case MULTEQ:
1160 		xf *= yf;
1161 		break;
1162 	case DIVEQ:
1163 		if (yf == 0)
1164 			FATAL("division by zero in /=");
1165 		xf /= yf;
1166 		break;
1167 	case MODEQ:
1168 		if (yf == 0)
1169 			FATAL("division by zero in %%=");
1170 		modf(xf/yf, &v);
1171 		xf = xf - yf * v;
1172 		break;
1173 	case POWEQ:
1174 		if (yf >= 0 && modf(yf, &v) == 0.0)	/* pos integer exponent */
1175 			xf = ipow(xf, (int) yf);
1176                else {
1177 			errno = 0;
1178 			xf = errcheck(pow(xf, yf), "pow");
1179                }
1180 		break;
1181 	default:
1182 		FATAL("illegal assignment operator %d", n);
1183 		break;
1184 	}
1185 	tempfree(y);
1186 	setfval(x, xf);
1187 	return(x);
1188 }
1189 
1190 Cell *cat(Node **a, int q)	/* a[0] cat a[1] */
1191 {
1192 	Cell *x, *y, *z;
1193 	int n1, n2;
1194 	char *s = NULL;
1195 	int ssz = 0;
1196 
1197 	x = execute(a[0]);
1198 	n1 = strlen(getsval(x));
1199 	adjbuf(&s, &ssz, n1, recsize, 0, "cat1");
1200 	memcpy(s, x->sval, n1);
1201 
1202 	y = execute(a[1]);
1203 	n2 = strlen(getsval(y));
1204 	adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2");
1205 	memcpy(s + n1, y->sval, n2);
1206 	s[n1 + n2] = '\0';
1207 
1208 	tempfree(x);
1209 	tempfree(y);
1210 
1211 	z = gettemp();
1212 	z->sval = s;
1213 	z->tval = STR;
1214 
1215 	return(z);
1216 }
1217 
1218 Cell *pastat(Node **a, int n)	/* a[0] { a[1] } */
1219 {
1220 	Cell *x;
1221 
1222 	if (a[0] == NULL)
1223 		x = execute(a[1]);
1224 	else {
1225 		x = execute(a[0]);
1226 		if (istrue(x)) {
1227 			tempfree(x);
1228 			x = execute(a[1]);
1229 		}
1230 	}
1231 	return x;
1232 }
1233 
1234 Cell *dopa2(Node **a, int n)	/* a[0], a[1] { a[2] } */
1235 {
1236 	Cell *x;
1237 	int pair;
1238 
1239 	pair = ptoi(a[3]);
1240 	if (pairstack[pair] == 0) {
1241 		x = execute(a[0]);
1242 		if (istrue(x))
1243 			pairstack[pair] = 1;
1244 		tempfree(x);
1245 	}
1246 	if (pairstack[pair] == 1) {
1247 		x = execute(a[1]);
1248 		if (istrue(x))
1249 			pairstack[pair] = 0;
1250 		tempfree(x);
1251 		x = execute(a[2]);
1252 		return(x);
1253 	}
1254 	return(False);
1255 }
1256 
1257 Cell *split(Node **a, int nnn)	/* split(a[0], a[1], a[2]); a[3] is type */
1258 {
1259 	Cell *x = NULL, *y, *ap;
1260 	const char *s, *origs, *t;
1261 	const char *fs = NULL;
1262 	char *origfs = NULL;
1263 	int sep;
1264 	char temp, num[50];
1265 	int n, tempstat, arg3type;
1266 	double result;
1267 
1268 	y = execute(a[0]);	/* source string */
1269 	origs = s = strdup(getsval(y));
1270 	arg3type = ptoi(a[3]);
1271 	if (a[2] == NULL)		/* fs string */
1272 		fs = getsval(fsloc);
1273 	else if (arg3type == STRING) {	/* split(str,arr,"string") */
1274 		x = execute(a[2]);
1275 		fs = origfs = strdup(getsval(x));
1276 		tempfree(x);
1277 	} else if (arg3type == REGEXPR)
1278 		fs = "(regexpr)";	/* split(str,arr,/regexpr/) */
1279 	else
1280 		FATAL("illegal type of split");
1281 	sep = *fs;
1282 	ap = execute(a[1]);	/* array name */
1283 	freesymtab(ap);
1284 	DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs);
1285 	ap->tval &= ~STR;
1286 	ap->tval |= ARR;
1287 	ap->sval = (char *) makesymtab(NSYMTAB);
1288 
1289 	n = 0;
1290         if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) {
1291 		/* split(s, a, //); have to arrange that it looks like empty sep */
1292 		arg3type = 0;
1293 		fs = "";
1294 		sep = 0;
1295 	}
1296 	if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) {	/* reg expr */
1297 		fa *pfa;
1298 		if (arg3type == REGEXPR) {	/* it's ready already */
1299 			pfa = (fa *) a[2];
1300 		} else {
1301 			pfa = makedfa(fs, 1);
1302 		}
1303 		if (nematch(pfa,s)) {
1304 			tempstat = pfa->initstat;
1305 			pfa->initstat = 2;
1306 			do {
1307 				n++;
1308 				snprintf(num, sizeof(num), "%d", n);
1309 				temp = *patbeg;
1310 				setptr(patbeg, '\0');
1311 				if (is_number(s, & result))
1312 					setsymtab(num, s, result, STR|NUM, (Array *) ap->sval);
1313 				else
1314 					setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1315 				setptr(patbeg, temp);
1316 				s = patbeg + patlen;
1317 				if (*(patbeg+patlen-1) == '\0' || *s == '\0') {
1318 					n++;
1319 					snprintf(num, sizeof(num), "%d", n);
1320 					setsymtab(num, "", 0.0, STR, (Array *) ap->sval);
1321 					pfa->initstat = tempstat;
1322 					goto spdone;
1323 				}
1324 			} while (nematch(pfa,s));
1325 			pfa->initstat = tempstat; 	/* bwk: has to be here to reset */
1326 							/* cf gsub and refldbld */
1327 		}
1328 		n++;
1329 		snprintf(num, sizeof(num), "%d", n);
1330 		if (is_number(s, & result))
1331 			setsymtab(num, s, result, STR|NUM, (Array *) ap->sval);
1332 		else
1333 			setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1334   spdone:
1335 		pfa = NULL;
1336 	} else if (sep == ' ') {
1337 		for (n = 0; ; ) {
1338 #define ISWS(c)	((c) == ' ' || (c) == '\t' || (c) == '\n')
1339 			while (ISWS(*s))
1340 				s++;
1341 			if (*s == '\0')
1342 				break;
1343 			n++;
1344 			t = s;
1345 			do
1346 				s++;
1347 			while (*s != '\0' && !ISWS(*s));
1348 			temp = *s;
1349 			setptr(s, '\0');
1350 			snprintf(num, sizeof(num), "%d", n);
1351 			if (is_number(t, & result))
1352 				setsymtab(num, t, result, STR|NUM, (Array *) ap->sval);
1353 			else
1354 				setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1355 			setptr(s, temp);
1356 			if (*s != '\0')
1357 				s++;
1358 		}
1359 	} else if (sep == 0) {	/* new: split(s, a, "") => 1 char/elem */
1360 		for (n = 0; *s != '\0'; s++) {
1361 			char buf[2];
1362 			n++;
1363 			snprintf(num, sizeof(num), "%d", n);
1364 			buf[0] = *s;
1365 			buf[1] = '\0';
1366 			if (isdigit((uschar)buf[0]))
1367 				setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval);
1368 			else
1369 				setsymtab(num, buf, 0.0, STR, (Array *) ap->sval);
1370 		}
1371 	} else if (*s != '\0') {
1372 		for (;;) {
1373 			n++;
1374 			t = s;
1375 			while (*s != sep && *s != '\n' && *s != '\0')
1376 				s++;
1377 			temp = *s;
1378 			setptr(s, '\0');
1379 			snprintf(num, sizeof(num), "%d", n);
1380 			if (is_number(t, & result))
1381 				setsymtab(num, t, result, STR|NUM, (Array *) ap->sval);
1382 			else
1383 				setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1384 			setptr(s, temp);
1385 			if (*s++ == '\0')
1386 				break;
1387 		}
1388 	}
1389 	tempfree(ap);
1390 	tempfree(y);
1391 	xfree(origs);
1392 	xfree(origfs);
1393 	x = gettemp();
1394 	x->tval = NUM;
1395 	x->fval = n;
1396 	return(x);
1397 }
1398 
1399 Cell *condexpr(Node **a, int n)	/* a[0] ? a[1] : a[2] */
1400 {
1401 	Cell *x;
1402 
1403 	x = execute(a[0]);
1404 	if (istrue(x)) {
1405 		tempfree(x);
1406 		x = execute(a[1]);
1407 	} else {
1408 		tempfree(x);
1409 		x = execute(a[2]);
1410 	}
1411 	return(x);
1412 }
1413 
1414 Cell *ifstat(Node **a, int n)	/* if (a[0]) a[1]; else a[2] */
1415 {
1416 	Cell *x;
1417 
1418 	x = execute(a[0]);
1419 	if (istrue(x)) {
1420 		tempfree(x);
1421 		x = execute(a[1]);
1422 	} else if (a[2] != NULL) {
1423 		tempfree(x);
1424 		x = execute(a[2]);
1425 	}
1426 	return(x);
1427 }
1428 
1429 Cell *whilestat(Node **a, int n)	/* while (a[0]) a[1] */
1430 {
1431 	Cell *x;
1432 
1433 	for (;;) {
1434 		x = execute(a[0]);
1435 		if (!istrue(x))
1436 			return(x);
1437 		tempfree(x);
1438 		x = execute(a[1]);
1439 		if (isbreak(x)) {
1440 			x = True;
1441 			return(x);
1442 		}
1443 		if (isnext(x) || isexit(x) || isret(x))
1444 			return(x);
1445 		tempfree(x);
1446 	}
1447 }
1448 
1449 Cell *dostat(Node **a, int n)	/* do a[0]; while(a[1]) */
1450 {
1451 	Cell *x;
1452 
1453 	for (;;) {
1454 		x = execute(a[0]);
1455 		if (isbreak(x))
1456 			return True;
1457 		if (isnext(x) || isexit(x) || isret(x))
1458 			return(x);
1459 		tempfree(x);
1460 		x = execute(a[1]);
1461 		if (!istrue(x))
1462 			return(x);
1463 		tempfree(x);
1464 	}
1465 }
1466 
1467 Cell *forstat(Node **a, int n)	/* for (a[0]; a[1]; a[2]) a[3] */
1468 {
1469 	Cell *x;
1470 
1471 	x = execute(a[0]);
1472 	tempfree(x);
1473 	for (;;) {
1474 		if (a[1]!=NULL) {
1475 			x = execute(a[1]);
1476 			if (!istrue(x)) return(x);
1477 			else tempfree(x);
1478 		}
1479 		x = execute(a[3]);
1480 		if (isbreak(x))		/* turn off break */
1481 			return True;
1482 		if (isnext(x) || isexit(x) || isret(x))
1483 			return(x);
1484 		tempfree(x);
1485 		x = execute(a[2]);
1486 		tempfree(x);
1487 	}
1488 }
1489 
1490 Cell *instat(Node **a, int n)	/* for (a[0] in a[1]) a[2] */
1491 {
1492 	Cell *x, *vp, *arrayp, *cp, *ncp;
1493 	Array *tp;
1494 	int i;
1495 
1496 	vp = execute(a[0]);
1497 	arrayp = execute(a[1]);
1498 	if (!isarr(arrayp)) {
1499 		return True;
1500 	}
1501 	tp = (Array *) arrayp->sval;
1502 	tempfree(arrayp);
1503 	for (i = 0; i < tp->size; i++) {	/* this routine knows too much */
1504 		for (cp = tp->tab[i]; cp != NULL; cp = ncp) {
1505 			setsval(vp, cp->nval);
1506 			ncp = cp->cnext;
1507 			x = execute(a[2]);
1508 			if (isbreak(x)) {
1509 				tempfree(vp);
1510 				return True;
1511 			}
1512 			if (isnext(x) || isexit(x) || isret(x)) {
1513 				tempfree(vp);
1514 				return(x);
1515 			}
1516 			tempfree(x);
1517 		}
1518 	}
1519 	return True;
1520 }
1521 
1522 static char *nawk_convert(const char *s, int (*fun_c)(int),
1523     wint_t (*fun_wc)(wint_t))
1524 {
1525 	char *buf      = NULL;
1526 	char *pbuf     = NULL;
1527 	const char *ps = NULL;
1528 	size_t n       = 0;
1529 	wchar_t wc;
1530 	size_t sz = MB_CUR_MAX;
1531 
1532 	if (sz == 1) {
1533 		buf = tostring(s);
1534 
1535 		for (pbuf = buf; *pbuf; pbuf++)
1536 			*pbuf = fun_c((uschar)*pbuf);
1537 
1538 		return buf;
1539 	} else {
1540 		/* upper/lower character may be shorter/longer */
1541 		buf = tostringN(s, strlen(s) * sz + 1);
1542 
1543 		(void) mbtowc(NULL, NULL, 0);	/* reset internal state */
1544 		/*
1545 		 * Reset internal state here too.
1546 		 * Assign result to avoid a compiler warning. (Casting to void
1547 		 * doesn't work.)
1548 		 * Increment said variable to avoid a different warning.
1549 		 */
1550 		int unused = wctomb(NULL, L'\0');
1551 		unused++;
1552 
1553 		ps   = s;
1554 		pbuf = buf;
1555 		while (n = mbtowc(&wc, ps, sz),
1556 		       n > 0 && n != (size_t)-1 && n != (size_t)-2)
1557 		{
1558 			ps += n;
1559 
1560 			n = wctomb(pbuf, fun_wc(wc));
1561 			if (n == (size_t)-1)
1562 				FATAL("illegal wide character %s", s);
1563 
1564 			pbuf += n;
1565 		}
1566 
1567 		*pbuf = '\0';
1568 
1569 		if (n)
1570 			FATAL("illegal byte sequence %s", s);
1571 
1572 		return buf;
1573 	}
1574 }
1575 
1576 #ifdef __DJGPP__
1577 static wint_t towupper(wint_t wc)
1578 {
1579 	if (wc >= 0 && wc < 256)
1580 		return toupper(wc & 0xFF);
1581 
1582 	return wc;
1583 }
1584 
1585 static wint_t towlower(wint_t wc)
1586 {
1587 	if (wc >= 0 && wc < 256)
1588 		return tolower(wc & 0xFF);
1589 
1590 	return wc;
1591 }
1592 #endif
1593 
1594 static char *nawk_toupper(const char *s)
1595 {
1596 	return nawk_convert(s, toupper, towupper);
1597 }
1598 
1599 static char *nawk_tolower(const char *s)
1600 {
1601 	return nawk_convert(s, tolower, towlower);
1602 }
1603 
1604 Cell *bltin(Node **a, int n)	/* builtin functions. a[0] is type, a[1] is arg list */
1605 {
1606 	Cell *x, *y;
1607 	Awkfloat u;
1608 	int t, sz;
1609 	Awkfloat tmp;
1610 	char *buf, *fmt;
1611 	Node *nextarg;
1612 	FILE *fp;
1613 	int status = 0;
1614 	time_t tv;
1615 	struct tm *tm;
1616 
1617 	t = ptoi(a[0]);
1618 	x = execute(a[1]);
1619 	nextarg = a[1]->nnext;
1620 	switch (t) {
1621 	case FLENGTH:
1622 		if (isarr(x))
1623 			u = ((Array *) x->sval)->nelem;	/* GROT.  should be function*/
1624 		else
1625 			u = strlen(getsval(x));
1626 		break;
1627 	case FLOG:
1628 		errno = 0;
1629 		u = errcheck(log(getfval(x)), "log");
1630 		break;
1631 	case FINT:
1632 		modf(getfval(x), &u); break;
1633 	case FEXP:
1634 		errno = 0;
1635 		u = errcheck(exp(getfval(x)), "exp");
1636 		break;
1637 	case FSQRT:
1638 		errno = 0;
1639 		u = errcheck(sqrt(getfval(x)), "sqrt");
1640 		break;
1641 	case FSIN:
1642 		u = sin(getfval(x)); break;
1643 	case FCOS:
1644 		u = cos(getfval(x)); break;
1645 	case FATAN:
1646 		if (nextarg == NULL) {
1647 			WARNING("atan2 requires two arguments; returning 1.0");
1648 			u = 1.0;
1649 		} else {
1650 			y = execute(a[1]->nnext);
1651 			u = atan2(getfval(x), getfval(y));
1652 			tempfree(y);
1653 			nextarg = nextarg->nnext;
1654 		}
1655 		break;
1656 	case FCOMPL:
1657 		u = ~((int)getfval(x));
1658 		break;
1659 	case FAND:
1660 		if (nextarg == 0) {
1661 			WARNING("and requires two arguments; returning 0");
1662 			u = 0;
1663 			break;
1664 		}
1665 		y = execute(a[1]->nnext);
1666 		u = ((int)getfval(x)) & ((int)getfval(y));
1667 		tempfree(y);
1668 		nextarg = nextarg->nnext;
1669 		break;
1670 	case FFOR:
1671 		if (nextarg == 0) {
1672 			WARNING("or requires two arguments; returning 0");
1673 			u = 0;
1674 			break;
1675 		}
1676 		y = execute(a[1]->nnext);
1677 		u = ((int)getfval(x)) | ((int)getfval(y));
1678 		tempfree(y);
1679 		nextarg = nextarg->nnext;
1680 		break;
1681 	case FXOR:
1682 		if (nextarg == 0) {
1683 			WARNING("xor requires two arguments; returning 0");
1684 			u = 0;
1685 			break;
1686 		}
1687 		y = execute(a[1]->nnext);
1688 		u = ((int)getfval(x)) ^ ((int)getfval(y));
1689 		tempfree(y);
1690 		nextarg = nextarg->nnext;
1691 		break;
1692 	case FLSHIFT:
1693 		if (nextarg == 0) {
1694 			WARNING("lshift requires two arguments; returning 0");
1695 			u = 0;
1696 			break;
1697 		}
1698 		y = execute(a[1]->nnext);
1699 		u = ((int)getfval(x)) << ((int)getfval(y));
1700 		tempfree(y);
1701 		nextarg = nextarg->nnext;
1702 		break;
1703 	case FRSHIFT:
1704 		if (nextarg == 0) {
1705 			WARNING("rshift requires two arguments; returning 0");
1706 			u = 0;
1707 			break;
1708 		}
1709 		y = execute(a[1]->nnext);
1710 		u = ((int)getfval(x)) >> ((int)getfval(y));
1711 		tempfree(y);
1712 		nextarg = nextarg->nnext;
1713 		break;
1714 	case FSYSTEM:
1715 		fflush(stdout);		/* in case something is buffered already */
1716 		status = system(getsval(x));
1717 		u = status;
1718 		if (status != -1) {
1719 			if (WIFEXITED(status)) {
1720 				u = WEXITSTATUS(status);
1721 			} else if (WIFSIGNALED(status)) {
1722 				u = WTERMSIG(status) + 256;
1723 #ifdef WCOREDUMP
1724 				if (WCOREDUMP(status))
1725 					u += 256;
1726 #endif
1727 			} else	/* something else?!? */
1728 				u = 0;
1729 		}
1730 		break;
1731 	case FRAND:
1732 		/* random() returns numbers in [0..2^31-1]
1733 		 * in order to get a number in [0, 1), divide it by 2^31
1734 		 */
1735 		u = (Awkfloat) random() / (0x7fffffffL + 0x1UL);
1736 		break;
1737 	case FSRAND:
1738 		if (isrec(x))	/* no argument provided */
1739 			u = time((time_t *)0);
1740 		else
1741 			u = getfval(x);
1742 		tmp = u;
1743 		srandom((unsigned long) u);
1744 		u = srand_seed;
1745 		srand_seed = tmp;
1746 		break;
1747 	case FTOUPPER:
1748 	case FTOLOWER:
1749 		if (t == FTOUPPER)
1750 			buf = nawk_toupper(getsval(x));
1751 		else
1752 			buf = nawk_tolower(getsval(x));
1753 		tempfree(x);
1754 		x = gettemp();
1755 		setsval(x, buf);
1756 		free(buf);
1757 		return x;
1758 	case FFLUSH:
1759 		if (isrec(x) || strlen(getsval(x)) == 0) {
1760 			flush_all();	/* fflush() or fflush("") -> all */
1761 			u = 0;
1762 		} else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL)
1763 			u = EOF;
1764 		else
1765 			u = fflush(fp);
1766 		break;
1767 	case FSYSTIME:
1768 		u = time((time_t *) 0);
1769 		break;
1770 	case FSTRFTIME:
1771 		/* strftime([format [,timestamp]]) */
1772 		if (nextarg) {
1773 			y = execute(nextarg);
1774 			nextarg = nextarg->nnext;
1775 			tv = (time_t) getfval(y);
1776 			tempfree(y);
1777 		} else
1778 			tv = time((time_t *) 0);
1779 		tm = localtime(&tv);
1780 		if (tm == NULL)
1781 			FATAL("bad time %ld", (long)tv);
1782 
1783 		if (isrec(x)) {
1784 			/* format argument not provided, use default */
1785 			fmt = tostring("%a %b %d %H:%M:%S %Z %Y");
1786 		} else
1787 			fmt = tostring(getsval(x));
1788 
1789 		sz = 32;
1790 		buf = NULL;
1791 		do {
1792 			if ((buf = realloc(buf, (sz *= 2))) == NULL)
1793 				FATAL("out of memory in strftime");
1794 		} while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0');
1795 
1796 		y = gettemp();
1797 		setsval(y, buf);
1798 		free(fmt);
1799 		free(buf);
1800 
1801 		return y;
1802 	default:	/* can't happen */
1803 		FATAL("illegal function type %d", t);
1804 		break;
1805 	}
1806 	tempfree(x);
1807 	x = gettemp();
1808 	setfval(x, u);
1809 	if (nextarg != NULL) {
1810 		WARNING("warning: function has too many arguments");
1811 		for ( ; nextarg; nextarg = nextarg->nnext)
1812 			execute(nextarg);
1813 	}
1814 	return(x);
1815 }
1816 
1817 Cell *printstat(Node **a, int n)	/* print a[0] */
1818 {
1819 	Node *x;
1820 	Cell *y;
1821 	FILE *fp;
1822 
1823 	if (a[1] == NULL)	/* a[1] is redirection operator, a[2] is file */
1824 		fp = stdout;
1825 	else
1826 		fp = redirect(ptoi(a[1]), a[2]);
1827 	for (x = a[0]; x != NULL; x = x->nnext) {
1828 		y = execute(x);
1829 		fputs(getpssval(y), fp);
1830 		tempfree(y);
1831 		if (x->nnext == NULL)
1832 			fputs(getsval(orsloc), fp);
1833 		else
1834 			fputs(getsval(ofsloc), fp);
1835 	}
1836 	if (a[1] != NULL)
1837 		fflush(fp);
1838 	if (ferror(fp))
1839 		FATAL("write error on %s", filename(fp));
1840 	return(True);
1841 }
1842 
1843 Cell *nullproc(Node **a, int n)
1844 {
1845 	return 0;
1846 }
1847 
1848 
1849 FILE *redirect(int a, Node *b)	/* set up all i/o redirections */
1850 {
1851 	FILE *fp;
1852 	Cell *x;
1853 	char *fname;
1854 
1855 	x = execute(b);
1856 	fname = getsval(x);
1857 	fp = openfile(a, fname, NULL);
1858 	if (fp == NULL)
1859 		FATAL("can't open file %s", fname);
1860 	tempfree(x);
1861 	return fp;
1862 }
1863 
1864 struct files {
1865 	FILE	*fp;
1866 	const char	*fname;
1867 	int	mode;	/* '|', 'a', 'w' => LE/LT, GT */
1868 } *files;
1869 
1870 size_t nfiles;
1871 
1872 static void stdinit(void)	/* in case stdin, etc., are not constants */
1873 {
1874 	nfiles = FOPEN_MAX;
1875 	files = (struct files *) calloc(nfiles, sizeof(*files));
1876 	if (files == NULL)
1877 		FATAL("can't allocate file memory for %zu files", nfiles);
1878         files[0].fp = stdin;
1879 	files[0].fname = "/dev/stdin";
1880 	files[0].mode = LT;
1881         files[1].fp = stdout;
1882 	files[1].fname = "/dev/stdout";
1883 	files[1].mode = GT;
1884         files[2].fp = stderr;
1885 	files[2].fname = "/dev/stderr";
1886 	files[2].mode = GT;
1887 }
1888 
1889 FILE *openfile(int a, const char *us, bool *pnewflag)
1890 {
1891 	const char *s = us;
1892 	size_t i;
1893 	int m;
1894 	FILE *fp = NULL;
1895 
1896 	if (*s == '\0')
1897 		FATAL("null file name in print or getline");
1898 	for (i = 0; i < nfiles; i++)
1899 		if (files[i].fname && strcmp(s, files[i].fname) == 0 &&
1900 		    (a == files[i].mode || (a==APPEND && files[i].mode==GT) ||
1901 		     a == FFLUSH)) {
1902 			if (pnewflag)
1903 				*pnewflag = false;
1904 			return files[i].fp;
1905 		}
1906 	if (a == FFLUSH)	/* didn't find it, so don't create it! */
1907 		return NULL;
1908 
1909 	for (i = 0; i < nfiles; i++)
1910 		if (files[i].fp == NULL)
1911 			break;
1912 	if (i >= nfiles) {
1913 		struct files *nf;
1914 		size_t nnf = nfiles + FOPEN_MAX;
1915 		nf = (struct files *) realloc(files, nnf * sizeof(*nf));
1916 		if (nf == NULL)
1917 			FATAL("cannot grow files for %s and %zu files", s, nnf);
1918 		memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf));
1919 		nfiles = nnf;
1920 		files = nf;
1921 	}
1922 	fflush(stdout);	/* force a semblance of order */
1923 	m = a;
1924 	if (a == GT) {
1925 		fp = fopen(s, "w");
1926 	} else if (a == APPEND) {
1927 		fp = fopen(s, "a");
1928 		m = GT;	/* so can mix > and >> */
1929 	} else if (a == '|') {	/* output pipe */
1930 		fp = popen(s, "w");
1931 	} else if (a == LE) {	/* input pipe */
1932 		fp = popen(s, "r");
1933 	} else if (a == LT) {	/* getline <file */
1934 		fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r");	/* "-" is stdin */
1935 	} else	/* can't happen */
1936 		FATAL("illegal redirection %d", a);
1937 	if (fp != NULL) {
1938 		files[i].fname = tostring(s);
1939 		files[i].fp = fp;
1940 		files[i].mode = m;
1941 		if (pnewflag)
1942 			*pnewflag = true;
1943 		if (fp != stdin && fp != stdout && fp != stderr)
1944 			(void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC);
1945 	}
1946 	return fp;
1947 }
1948 
1949 const char *filename(FILE *fp)
1950 {
1951 	size_t i;
1952 
1953 	for (i = 0; i < nfiles; i++)
1954 		if (fp == files[i].fp)
1955 			return files[i].fname;
1956 	return "???";
1957 }
1958 
1959  Cell *closefile(Node **a, int n)
1960  {
1961  	Cell *x;
1962 	size_t i;
1963 	bool stat;
1964 
1965  	x = execute(a[0]);
1966  	getsval(x);
1967 	stat = true;
1968  	for (i = 0; i < nfiles; i++) {
1969 		if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0)
1970 			continue;
1971 		if (ferror(files[i].fp))
1972 			FATAL("i/o error occurred on %s", files[i].fname);
1973 		if (files[i].fp == stdin || files[i].fp == stdout ||
1974 		    files[i].fp == stderr)
1975 			stat = freopen("/dev/null", "r+", files[i].fp) == NULL;
1976 		else if (files[i].mode == '|' || files[i].mode == LE)
1977 			stat = pclose(files[i].fp) == -1;
1978 		else
1979 			stat = fclose(files[i].fp) == EOF;
1980 		if (stat)
1981 			FATAL("i/o error occurred closing %s", files[i].fname);
1982 		if (i > 2)	/* don't do /dev/std... */
1983 			xfree(files[i].fname);
1984 		files[i].fname = NULL;	/* watch out for ref thru this */
1985 		files[i].fp = NULL;
1986 		break;
1987  	}
1988  	tempfree(x);
1989  	x = gettemp();
1990 	setfval(x, (Awkfloat) (stat ? -1 : 0));
1991  	return(x);
1992  }
1993 
1994 void closeall(void)
1995 {
1996 	size_t i;
1997 	bool stat = false;
1998 
1999 	for (i = 0; i < nfiles; i++) {
2000 		if (! files[i].fp)
2001 			continue;
2002 		if (ferror(files[i].fp))
2003 			FATAL( "i/o error occurred on %s", files[i].fname );
2004 		if (files[i].fp == stdin)
2005 			continue;
2006 		if (files[i].mode == '|' || files[i].mode == LE)
2007 			stat = pclose(files[i].fp) == -1;
2008 		else if (files[i].fp == stdout || files[i].fp == stderr)
2009 			stat = fflush(files[i].fp) == EOF;
2010 		else
2011 			stat = fclose(files[i].fp) == EOF;
2012 		if (stat)
2013 			FATAL( "i/o error occurred while closing %s", files[i].fname );
2014 	}
2015 }
2016 
2017 static void flush_all(void)
2018 {
2019 	size_t i;
2020 
2021 	for (i = 0; i < nfiles; i++)
2022 		if (files[i].fp)
2023 			fflush(files[i].fp);
2024 }
2025 
2026 void backsub(char **pb_ptr, const char **sptr_ptr);
2027 
2028 Cell *sub(Node **a, int nnn)	/* substitute command */
2029 {
2030 	const char *sptr, *q;
2031 	Cell *x, *y, *result;
2032 	char *t, *buf, *pb;
2033 	fa *pfa;
2034 	int bufsz = recsize;
2035 
2036 	if ((buf = (char *) malloc(bufsz)) == NULL)
2037 		FATAL("out of memory in sub");
2038 	x = execute(a[3]);	/* target string */
2039 	t = getsval(x);
2040 	if (a[0] == NULL)	/* 0 => a[1] is already-compiled regexpr */
2041 		pfa = (fa *) a[1];	/* regular expression */
2042 	else {
2043 		y = execute(a[1]);
2044 		pfa = makedfa(getsval(y), 1);
2045 		tempfree(y);
2046 	}
2047 	y = execute(a[2]);	/* replacement string */
2048 	result = False;
2049 	if (pmatch(pfa, t)) {
2050 		sptr = t;
2051 		adjbuf(&buf, &bufsz, 1+patbeg-sptr, recsize, 0, "sub");
2052 		pb = buf;
2053 		while (sptr < patbeg)
2054 			*pb++ = *sptr++;
2055 		sptr = getsval(y);
2056 		while (*sptr != '\0') {
2057 			adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "sub");
2058 			if (*sptr == '\\') {
2059 				backsub(&pb, &sptr);
2060 			} else if (*sptr == '&') {
2061 				sptr++;
2062 				adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "sub");
2063 				for (q = patbeg; q < patbeg+patlen; )
2064 					*pb++ = *q++;
2065 			} else
2066 				*pb++ = *sptr++;
2067 		}
2068 		*pb = '\0';
2069 		if (pb > buf + bufsz)
2070 			FATAL("sub result1 %.30s too big; can't happen", buf);
2071 		sptr = patbeg + patlen;
2072 		if ((patlen == 0 && *patbeg) || (patlen && *(sptr-1))) {
2073 			adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "sub");
2074 			while ((*pb++ = *sptr++) != '\0')
2075 				continue;
2076 		}
2077 		if (pb > buf + bufsz)
2078 			FATAL("sub result2 %.30s too big; can't happen", buf);
2079 		setsval(x, buf);	/* BUG: should be able to avoid copy */
2080 		result = True;
2081 	}
2082 	tempfree(x);
2083 	tempfree(y);
2084 	free(buf);
2085 	return result;
2086 }
2087 
2088 Cell *gsub(Node **a, int nnn)	/* global substitute */
2089 {
2090 	Cell *x, *y;
2091 	char *rptr, *pb;
2092 	const char *q, *t, *sptr;
2093 	char *buf;
2094 	fa *pfa;
2095 	int mflag, tempstat, num;
2096 	int bufsz = recsize;
2097 
2098 	if ((buf = (char *) malloc(bufsz)) == NULL)
2099 		FATAL("out of memory in gsub");
2100 	mflag = 0;	/* if mflag == 0, can replace empty string */
2101 	num = 0;
2102 	x = execute(a[3]);	/* target string */
2103 	t = getsval(x);
2104 	if (a[0] == NULL)	/* 0 => a[1] is already-compiled regexpr */
2105 		pfa = (fa *) a[1];	/* regular expression */
2106 	else {
2107 		y = execute(a[1]);
2108 		pfa = makedfa(getsval(y), 1);
2109 		tempfree(y);
2110 	}
2111 	y = execute(a[2]);	/* replacement string */
2112 	if (pmatch(pfa, t)) {
2113 		tempstat = pfa->initstat;
2114 		pfa->initstat = 2;
2115 		pb = buf;
2116 		rptr = getsval(y);
2117 		do {
2118 			if (patlen == 0 && *patbeg != '\0') {	/* matched empty string */
2119 				if (mflag == 0) {	/* can replace empty */
2120 					num++;
2121 					sptr = rptr;
2122 					while (*sptr != '\0') {
2123 						adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub");
2124 						if (*sptr == '\\') {
2125 							backsub(&pb, &sptr);
2126 						} else if (*sptr == '&') {
2127 							sptr++;
2128 							adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub");
2129 							for (q = patbeg; q < patbeg+patlen; )
2130 								*pb++ = *q++;
2131 						} else
2132 							*pb++ = *sptr++;
2133 					}
2134 				}
2135 				if (*t == '\0')	/* at end */
2136 					goto done;
2137 				adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gsub");
2138 				*pb++ = *t++;
2139 				if (pb > buf + bufsz)	/* BUG: not sure of this test */
2140 					FATAL("gsub result0 %.30s too big; can't happen", buf);
2141 				mflag = 0;
2142 			}
2143 			else {	/* matched nonempty string */
2144 				num++;
2145 				sptr = t;
2146 				adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gsub");
2147 				while (sptr < patbeg)
2148 					*pb++ = *sptr++;
2149 				sptr = rptr;
2150 				while (*sptr != '\0') {
2151 					adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub");
2152 					if (*sptr == '\\') {
2153 						backsub(&pb, &sptr);
2154 					} else if (*sptr == '&') {
2155 						sptr++;
2156 						adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub");
2157 						for (q = patbeg; q < patbeg+patlen; )
2158 							*pb++ = *q++;
2159 					} else
2160 						*pb++ = *sptr++;
2161 				}
2162 				t = patbeg + patlen;
2163 				if (patlen == 0 || *t == '\0' || *(t-1) == '\0')
2164 					goto done;
2165 				if (pb > buf + bufsz)
2166 					FATAL("gsub result1 %.30s too big; can't happen", buf);
2167 				mflag = 1;
2168 			}
2169 		} while (pmatch(pfa,t));
2170 		sptr = t;
2171 		adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gsub");
2172 		while ((*pb++ = *sptr++) != '\0')
2173 			continue;
2174 	done:	if (pb < buf + bufsz)
2175 			*pb = '\0';
2176 		else if (*(pb-1) != '\0')
2177 			FATAL("gsub result2 %.30s truncated; can't happen", buf);
2178 		setsval(x, buf);	/* BUG: should be able to avoid copy + free */
2179 		pfa->initstat = tempstat;
2180 	}
2181 	tempfree(x);
2182 	tempfree(y);
2183 	x = gettemp();
2184 	x->tval = NUM;
2185 	x->fval = num;
2186 	free(buf);
2187 	return(x);
2188 }
2189 
2190 Cell *gensub(Node **a, int nnn)	/* global selective substitute */
2191 	/* XXX incomplete - doesn't support backreferences \0 ... \9 */
2192 {
2193 	Cell *x, *y, *res, *h;
2194 	char *rptr;
2195 	const char *sptr;
2196 	char *buf, *pb;
2197 	const char *t, *q;
2198 	fa *pfa;
2199 	int mflag, tempstat, num, whichm;
2200 	int bufsz = recsize;
2201 
2202 	if ((buf = malloc(bufsz)) == NULL)
2203 		FATAL("out of memory in gensub");
2204 	mflag = 0;	/* if mflag == 0, can replace empty string */
2205 	num = 0;
2206 	x = execute(a[4]);	/* source string */
2207 	t = getsval(x);
2208 	res = copycell(x);	/* target string - initially copy of source */
2209 	res->csub = CTEMP;	/* result values are temporary */
2210 	if (a[0] == 0)		/* 0 => a[1] is already-compiled regexpr */
2211 		pfa = (fa *) a[1];	/* regular expression */
2212 	else {
2213 		y = execute(a[1]);
2214 		pfa = makedfa(getsval(y), 1);
2215 		tempfree(y);
2216 	}
2217 	y = execute(a[2]);	/* replacement string */
2218 	h = execute(a[3]);	/* which matches should be replaced */
2219 	sptr = getsval(h);
2220 	if (sptr[0] == 'g' || sptr[0] == 'G')
2221 		whichm = -1;
2222 	else {
2223 		/*
2224 		 * The specified number is index of replacement, starting
2225 		 * from 1. GNU awk treats index lower than 0 same as
2226 		 * 1, we do same for compatibility.
2227 		 */
2228 		whichm = (int) getfval(h) - 1;
2229 		if (whichm < 0)
2230 			whichm = 0;
2231 	}
2232 	tempfree(h);
2233 
2234 	if (pmatch(pfa, t)) {
2235 		char *sl;
2236 
2237 		tempstat = pfa->initstat;
2238 		pfa->initstat = 2;
2239 		pb = buf;
2240 		rptr = getsval(y);
2241 		/*
2242 		 * XXX if there are any backreferences in subst string,
2243 		 * complain now.
2244 		 */
2245 		for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) {
2246 			if (strchr("0123456789", sl[1])) {
2247 				FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr);
2248 			}
2249 		}
2250 
2251 		do {
2252 			if (whichm >= 0 && whichm != num) {
2253 				num++;
2254 				adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub");
2255 
2256 				/* copy the part of string up to and including
2257 				 * match to output buffer */
2258 				while (t < patbeg + patlen)
2259 					*pb++ = *t++;
2260 				continue;
2261 			}
2262 
2263 			if (patlen == 0 && *patbeg != 0) {	/* matched empty string */
2264 				if (mflag == 0) {	/* can replace empty */
2265 					num++;
2266 					sptr = rptr;
2267 					while (*sptr != 0) {
2268 						adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
2269 						if (*sptr == '\\') {
2270 							backsub(&pb, &sptr);
2271 						} else if (*sptr == '&') {
2272 							sptr++;
2273 							adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
2274 							for (q = patbeg; q < patbeg+patlen; )
2275 								*pb++ = *q++;
2276 						} else
2277 							*pb++ = *sptr++;
2278 					}
2279 				}
2280 				if (*t == 0)	/* at end */
2281 					goto done;
2282 				adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub");
2283 				*pb++ = *t++;
2284 				if (pb > buf + bufsz)	/* BUG: not sure of this test */
2285 					FATAL("gensub result0 %.30s too big; can't happen", buf);
2286 				mflag = 0;
2287 			}
2288 			else {	/* matched nonempty string */
2289 				num++;
2290 				sptr = t;
2291 				adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub");
2292 				while (sptr < patbeg)
2293 					*pb++ = *sptr++;
2294 				sptr = rptr;
2295 				while (*sptr != 0) {
2296 					adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
2297 					if (*sptr == '\\') {
2298 						backsub(&pb, &sptr);
2299 					} else if (*sptr == '&') {
2300 						sptr++;
2301 						adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
2302 						for (q = patbeg; q < patbeg+patlen; )
2303 							*pb++ = *q++;
2304 					} else
2305 						*pb++ = *sptr++;
2306 				}
2307 				t = patbeg + patlen;
2308 				if (patlen == 0 || *t == 0 || *(t-1) == 0)
2309 					goto done;
2310 				if (pb > buf + bufsz)
2311 					FATAL("gensub result1 %.30s too big; can't happen", buf);
2312 				mflag = 1;
2313 			}
2314 		} while (pmatch(pfa,t));
2315 		sptr = t;
2316 		adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub");
2317 		while ((*pb++ = *sptr++) != 0)
2318 			;
2319 	done:	if (pb > buf + bufsz)
2320 			FATAL("gensub result2 %.30s too big; can't happen", buf);
2321 		*pb = '\0';
2322 		setsval(res, buf);
2323 		pfa->initstat = tempstat;
2324 	}
2325 	tempfree(x);
2326 	tempfree(y);
2327 	free(buf);
2328 	return(res);
2329 }
2330 
2331 void backsub(char **pb_ptr, const char **sptr_ptr)	/* handle \\& variations */
2332 {						/* sptr[0] == '\\' */
2333 	char *pb = *pb_ptr;
2334 	const char *sptr = *sptr_ptr;
2335 	static bool first = true;
2336 	static bool do_posix = false;
2337 
2338 	if (first) {
2339 		first = false;
2340 		do_posix = (getenv("POSIXLY_CORRECT") != NULL);
2341 	}
2342 
2343 	if (sptr[1] == '\\') {
2344 		if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */
2345 			*pb++ = '\\';
2346 			*pb++ = '&';
2347 			sptr += 4;
2348 		} else if (sptr[2] == '&') {	/* \\& -> \ + matched */
2349 			*pb++ = '\\';
2350 			sptr += 2;
2351 		} else if (do_posix) {		/* \\x -> \x */
2352 			sptr++;
2353 			*pb++ = *sptr++;
2354 		} else {			/* \\x -> \\x */
2355 			*pb++ = *sptr++;
2356 			*pb++ = *sptr++;
2357 		}
2358 	} else if (sptr[1] == '&') {	/* literal & */
2359 		sptr++;
2360 		*pb++ = *sptr++;
2361 	} else				/* literal \ */
2362 		*pb++ = *sptr++;
2363 
2364 	*pb_ptr = pb;
2365 	*sptr_ptr = sptr;
2366 }
2367