xref: /freebsd/contrib/one-true-awk/lib.c (revision 17853db4b0dc36ed32af039cd803f13b692913da)
1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
4 
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
14 
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
24 
25 #define DEBUG
26 #include <stdio.h>
27 #include <string.h>
28 #include <strings.h>
29 #include <ctype.h>
30 #include <errno.h>
31 #include <stdlib.h>
32 #include <stdarg.h>
33 #include <limits.h>
34 #include <math.h>
35 #include "awk.h"
36 
37 extern int u8_nextlen(const char *s);
38 
39 char	EMPTY[] = { '\0' };
40 FILE	*infile	= NULL;
41 bool	innew;		/* true = infile has not been read by readrec */
42 char	*file	= EMPTY;
43 char	*record;
44 int	recsize	= RECSIZE;
45 char	*fields;
46 int	fieldssize = RECSIZE;
47 
48 Cell	**fldtab;	/* pointers to Cells */
49 static size_t	len_inputFS = 0;
50 static char	*inputFS = NULL; /* FS at time of input, for field splitting */
51 
52 #define	MAXFLD	2
53 int	nfields	= MAXFLD;	/* last allocated slot for $i */
54 
55 bool	donefld;	/* true = implies rec broken into fields */
56 bool	donerec;	/* true = record is valid (no flds have changed) */
57 
58 int	lastfld	= 0;	/* last used field */
59 int	argno	= 1;	/* current input argument number */
60 extern	Awkfloat *ARGC;
61 
62 static Cell dollar0 = { OCELL, CFLD, NULL, EMPTY, 0.0, REC|STR|DONTFREE, NULL, NULL };
63 static Cell dollar1 = { OCELL, CFLD, NULL, EMPTY, 0.0, FLD|STR|DONTFREE, NULL, NULL };
64 
recinit(unsigned int n)65 void recinit(unsigned int n)
66 {
67 	if ( (record = (char *) malloc(n)) == NULL
68 	  || (fields = (char *) malloc(n+1)) == NULL
69 	  || (fldtab = (Cell **) calloc(nfields+2, sizeof(*fldtab))) == NULL
70 	  || (fldtab[0] = (Cell *) malloc(sizeof(**fldtab))) == NULL)
71 		FATAL("out of space for $0 and fields");
72 	*record = '\0';
73 	*fldtab[0] = dollar0;
74 	fldtab[0]->sval = record;
75 	fldtab[0]->nval = tostring("0");
76 	makefields(1, nfields);
77 }
78 
makefields(int n1,int n2)79 void makefields(int n1, int n2)		/* create $n1..$n2 inclusive */
80 {
81 	char temp[50];
82 	int i;
83 
84 	for (i = n1; i <= n2; i++) {
85 		fldtab[i] = (Cell *) malloc(sizeof(**fldtab));
86 		if (fldtab[i] == NULL)
87 			FATAL("out of space in makefields %d", i);
88 		*fldtab[i] = dollar1;
89 		snprintf(temp, sizeof(temp), "%d", i);
90 		fldtab[i]->nval = tostring(temp);
91 	}
92 }
93 
initgetrec(void)94 void initgetrec(void)
95 {
96 	int i;
97 	char *p;
98 
99 	for (i = 1; i < *ARGC; i++) {
100 		p = getargv(i); /* find 1st real filename */
101 		if (p == NULL || *p == '\0') {  /* deleted or zapped */
102 			argno++;
103 			continue;
104 		}
105 		if (!isclvar(p)) {
106 			setsval(lookup("FILENAME", symtab), p);
107 			return;
108 		}
109 		setclvar(p);	/* a commandline assignment before filename */
110 		argno++;
111 	}
112 	infile = stdin;		/* no filenames, so use stdin */
113 	innew = true;
114 }
115 
116 /*
117  * POSIX specifies that fields are supposed to be evaluated as if they were
118  * split using the value of FS at the time that the record's value ($0) was
119  * read.
120  *
121  * Since field-splitting is done lazily, we save the current value of FS
122  * whenever a new record is read in (implicitly or via getline), or when
123  * a new value is assigned to $0.
124  */
savefs(void)125 void savefs(void)
126 {
127 	size_t len;
128 	if ((len = strlen(getsval(fsloc))) < len_inputFS) {
129 		strcpy(inputFS, *FS);	/* for subsequent field splitting */
130 		return;
131 	}
132 
133 	len_inputFS = len + 1;
134 	inputFS = (char *) realloc(inputFS, len_inputFS);
135 	if (inputFS == NULL)
136 		FATAL("field separator %.10s... is too long", *FS);
137 	memcpy(inputFS, *FS, len_inputFS);
138 }
139 
140 static bool firsttime = true;
141 
getrec(char ** pbuf,int * pbufsize,bool isrecord)142 int getrec(char **pbuf, int *pbufsize, bool isrecord)	/* get next input record */
143 {			/* note: cares whether buf == record */
144 	int c;
145 	char *buf = *pbuf;
146 	uschar saveb0;
147 	int bufsize = *pbufsize, savebufsize = bufsize;
148 
149 	if (firsttime) {
150 		firsttime = false;
151 		initgetrec();
152 	}
153 	DPRINTF("RS=<%s>, FS=<%s>, ARGC=%g, FILENAME=%s\n",
154 		*RS, *FS, *ARGC, *FILENAME);
155 	saveb0 = buf[0];
156 	buf[0] = 0;
157 	while (argno < *ARGC || infile == stdin) {
158 		DPRINTF("argno=%d, file=|%s|\n", argno, file);
159 		if (infile == NULL) {	/* have to open a new file */
160 			file = getargv(argno);
161 			if (file == NULL || *file == '\0') {	/* deleted or zapped */
162 				argno++;
163 				continue;
164 			}
165 			if (isclvar(file)) {	/* a var=value arg */
166 				setclvar(file);
167 				argno++;
168 				continue;
169 			}
170 			*FILENAME = file;
171 			DPRINTF("opening file %s\n", file);
172 			if (*file == '-' && *(file+1) == '\0')
173 				infile = stdin;
174 			else if ((infile = fopen(file, "r")) == NULL)
175 				FATAL("can't open file %s", file);
176 			innew = true;
177 			setfval(fnrloc, 0.0);
178 		}
179 		c = readrec(&buf, &bufsize, infile, innew);
180 		if (innew)
181 			innew = false;
182 		if (c != 0 || buf[0] != '\0') {	/* normal record */
183 			if (isrecord) {
184 				double result;
185 
186 				if (freeable(fldtab[0]))
187 					xfree(fldtab[0]->sval);
188 				fldtab[0]->sval = buf;	/* buf == record */
189 				fldtab[0]->tval = REC | STR | DONTFREE;
190 				if (is_number(fldtab[0]->sval, & result)) {
191 					fldtab[0]->fval = result;
192 					fldtab[0]->tval |= NUM;
193 				}
194 				donefld = false;
195 				donerec = true;
196 				savefs();
197 			}
198 			setfval(nrloc, nrloc->fval+1);
199 			setfval(fnrloc, fnrloc->fval+1);
200 			*pbuf = buf;
201 			*pbufsize = bufsize;
202 			return 1;
203 		}
204 		/* EOF arrived on this file; set up next */
205 		if (infile != stdin)
206 			fclose(infile);
207 		infile = NULL;
208 		argno++;
209 	}
210 	buf[0] = saveb0;
211 	*pbuf = buf;
212 	*pbufsize = savebufsize;
213 	return 0;	/* true end of file */
214 }
215 
nextfile(void)216 void nextfile(void)
217 {
218 	if (infile != NULL && infile != stdin)
219 		fclose(infile);
220 	infile = NULL;
221 	argno++;
222 }
223 
224 extern int readcsvrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag);
225 
readrec(char ** pbuf,int * pbufsize,FILE * inf,bool newflag)226 int readrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag)	/* read one record into buf */
227 {
228 	int sep, c, isrec; // POTENTIAL BUG? isrec is a macro in awk.h
229 	char *rr = *pbuf, *buf = *pbuf;
230 	int bufsize = *pbufsize;
231 	char *rs = getsval(rsloc);
232 
233 	if (CSV) {
234 		c = readcsvrec(pbuf, pbufsize, inf, newflag);
235 		isrec = (c == EOF && rr == buf) ? false : true;
236 	} else if (*rs && rs[1]) {
237 		bool found;
238 
239 		memset(buf, 0, bufsize);
240 		fa *pfa = makedfa(rs, 1);
241 		if (newflag)
242 			found = fnematch(pfa, inf, &buf, &bufsize, recsize);
243 		else {
244 			int tempstat = pfa->initstat;
245 			pfa->initstat = 2;
246 			found = fnematch(pfa, inf, &buf, &bufsize, recsize);
247 			pfa->initstat = tempstat;
248 		}
249 		if (found)
250 			setptr(patbeg, '\0');
251 		isrec = (found == 0 && *buf == '\0') ? false : true;
252 
253 	} else {
254 		if ((sep = *rs) == 0) {
255 			sep = '\n';
256 			while ((c=getc(inf)) == '\n' && c != EOF)	/* skip leading \n's */
257 				;
258 			if (c != EOF)
259 				ungetc(c, inf);
260 		}
261 		for (rr = buf; ; ) {
262 			for (; (c=getc(inf)) != sep && c != EOF; ) {
263 				if (rr-buf+1 > bufsize)
264 					if (!adjbuf(&buf, &bufsize, 1+rr-buf,
265 					    recsize, &rr, "readrec 1"))
266 						FATAL("input record `%.30s...' too long", buf);
267 				*rr++ = c;
268 			}
269 			if (*rs == sep || c == EOF)
270 				break;
271 			if ((c = getc(inf)) == '\n' || c == EOF)	/* 2 in a row */
272 				break;
273 			if (!adjbuf(&buf, &bufsize, 2+rr-buf, recsize, &rr,
274 			    "readrec 2"))
275 				FATAL("input record `%.30s...' too long", buf);
276 			*rr++ = '\n';
277 			*rr++ = c;
278 		}
279 		if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 3"))
280 			FATAL("input record `%.30s...' too long", buf);
281 		*rr = 0;
282 		isrec = (c == EOF && rr == buf) ? false : true;
283 	}
284 	*pbuf = buf;
285 	*pbufsize = bufsize;
286 	DPRINTF("readrec saw <%s>, returns %d\n", buf, isrec);
287 	return isrec;
288 }
289 
290 
291 /*******************
292  * loose ends here:
293  *   \r\n should become \n
294  *   what about bare \r?  Excel uses that for embedded newlines
295  *   can't have "" in unquoted fields, according to RFC 4180
296 */
297 
298 
readcsvrec(char ** pbuf,int * pbufsize,FILE * inf,bool newflag)299 int readcsvrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag) /* csv can have \n's */
300 {			/* so read a complete record that might be multiple lines */
301 	int sep, c;
302 	char *rr = *pbuf, *buf = *pbuf;
303 	int bufsize = *pbufsize;
304 	bool in_quote = false;
305 
306 	sep = '\n'; /* the only separator; have to skip over \n embedded in "..." */
307 	rr = buf;
308 	while ((c = getc(inf)) != EOF) {
309 		if (c == sep) {
310 			if (! in_quote)
311 				break;
312 			if (rr > buf && rr[-1] == '\r')	// remove \r if was \r\n
313 				rr--;
314 		}
315 
316 		if (rr-buf+1 > bufsize)
317 			if (!adjbuf(&buf, &bufsize, 1+rr-buf,
318 			    recsize, &rr, "readcsvrec 1"))
319 				FATAL("input record `%.30s...' too long", buf);
320 		*rr++ = c;
321 		if (c == '"')
322 			in_quote = ! in_quote;
323  	}
324 	if (c == '\n' && rr > buf && rr[-1] == '\r') 	// remove \r if was \r\n
325 		rr--;
326 
327 	if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readcsvrec 4"))
328 		FATAL("input record `%.30s...' too long", buf);
329 	*rr = 0;
330 	*pbuf = buf;
331 	*pbufsize = bufsize;
332 	DPRINTF("readcsvrec saw <%s>, returns %d\n", buf, c);
333 	return c;
334 }
335 
getargv(int n)336 char *getargv(int n)	/* get ARGV[n] */
337 {
338 	Array *ap;
339 	Cell *x;
340 	char *s, temp[50];
341 	extern Cell *ARGVcell;
342 
343 	ap = (Array *)ARGVcell->sval;
344 	snprintf(temp, sizeof(temp), "%d", n);
345 	if (lookup(temp, ap) == NULL)
346 		return NULL;
347 	x = setsymtab(temp, "", 0.0, STR, ap);
348 	s = getsval(x);
349 	DPRINTF("getargv(%d) returns |%s|\n", n, s);
350 	return s;
351 }
352 
setclvar(char * s)353 void setclvar(char *s)	/* set var=value from s */
354 {
355 	char *e, *p;
356 	Cell *q;
357 	double result;
358 
359 /* commit f3d9187d4e0f02294fb1b0e31152070506314e67 broke T.argv test */
360 /* I don't understand why it was changed. */
361 
362 	for (p=s; *p != '='; p++)
363 		;
364 	e = p;
365 	*p++ = 0;
366 	p = qstring(p, '\0');
367 	q = setsymtab(s, p, 0.0, STR, symtab);
368 	setsval(q, p);
369 	if (is_number(q->sval, & result)) {
370 		q->fval = result;
371 		q->tval |= NUM;
372 	}
373 	DPRINTF("command line set %s to |%s|\n", s, p);
374 	free(p);
375 	*e = '=';
376 }
377 
378 
fldbld(void)379 void fldbld(void)	/* create fields from current record */
380 {
381 	/* this relies on having fields[] the same length as $0 */
382 	/* the fields are all stored in this one array with \0's */
383 	/* possibly with a final trailing \0 not associated with any field */
384 	char *r, *fr, sep;
385 	Cell *p;
386 	int i, j, n;
387 
388 	if (donefld)
389 		return;
390 	if (!isstr(fldtab[0]))
391 		getsval(fldtab[0]);
392 	r = fldtab[0]->sval;
393 	n = strlen(r);
394 	if (n > fieldssize) {
395 		xfree(fields);
396 		if ((fields = (char *) malloc(n+2)) == NULL) /* possibly 2 final \0s */
397 			FATAL("out of space for fields in fldbld %d", n);
398 		fieldssize = n;
399 	}
400 	fr = fields;
401 	i = 0;	/* number of fields accumulated here */
402 	if (inputFS == NULL)	/* make sure we have a copy of FS */
403 		savefs();
404 	if (!CSV && strlen(inputFS) > 1) {	/* it's a regular expression */
405 		i = refldbld(r, inputFS);
406 	} else if (!CSV && (sep = *inputFS) == ' ') {	/* default whitespace */
407 		for (i = 0; ; ) {
408 			while (*r == ' ' || *r == '\t' || *r == '\n')
409 				r++;
410 			if (*r == 0)
411 				break;
412 			i++;
413 			if (i > nfields)
414 				growfldtab(i);
415 			if (freeable(fldtab[i]))
416 				xfree(fldtab[i]->sval);
417 			fldtab[i]->sval = fr;
418 			fldtab[i]->tval = FLD | STR | DONTFREE;
419 			do
420 				*fr++ = *r++;
421 			while (*r != ' ' && *r != '\t' && *r != '\n' && *r != '\0');
422 			*fr++ = 0;
423 		}
424 		*fr = 0;
425 	} else if (CSV) {	/* CSV processing.  no error handling */
426 		if (*r != 0) {
427 			for (;;) {
428 				i++;
429 				if (i > nfields)
430 					growfldtab(i);
431 				if (freeable(fldtab[i]))
432 					xfree(fldtab[i]->sval);
433 				fldtab[i]->sval = fr;
434 				fldtab[i]->tval = FLD | STR | DONTFREE;
435 				if (*r == '"' ) { /* start of "..." */
436 					for (r++ ; *r != '\0'; ) {
437 						if (*r == '"' && r[1] != '\0' && r[1] == '"') {
438 							r += 2; /* doubled quote */
439 							*fr++ = '"';
440 						} else if (*r == '"' && (r[1] == '\0' || r[1] == ',')) {
441 							r++; /* skip over closing quote */
442 							break;
443 						} else {
444 							*fr++ = *r++;
445 						}
446 					}
447 					*fr++ = 0;
448 				} else {	/* unquoted field */
449 					while (*r != ',' && *r != '\0')
450 						*fr++ = *r++;
451 					*fr++ = 0;
452 				}
453 				if (*r++ == 0)
454 					break;
455 
456 			}
457 		}
458 		*fr = 0;
459 	} else if ((sep = *inputFS) == 0) {	/* new: FS="" => 1 char/field */
460 		for (i = 0; *r != '\0'; ) {
461 			char buf[10];
462 			i++;
463 			if (i > nfields)
464 				growfldtab(i);
465 			if (freeable(fldtab[i]))
466 				xfree(fldtab[i]->sval);
467 			n = u8_nextlen(r);
468 			for (j = 0; j < n; j++)
469 				buf[j] = *r++;
470 			buf[j] = '\0';
471 			fldtab[i]->sval = tostring(buf);
472 			fldtab[i]->tval = FLD | STR;
473 		}
474 		*fr = 0;
475 	} else if (*r != 0) {	/* if 0, it's a null field */
476 		/* subtle case: if length(FS) == 1 && length(RS > 0)
477 		 * \n is NOT a field separator (cf awk book 61,84).
478 		 * this variable is tested in the inner while loop.
479 		 */
480 		int rtest = '\n';  /* normal case */
481 		if (strlen(*RS) > 0)
482 			rtest = '\0';
483 		for (;;) {
484 			i++;
485 			if (i > nfields)
486 				growfldtab(i);
487 			if (freeable(fldtab[i]))
488 				xfree(fldtab[i]->sval);
489 			fldtab[i]->sval = fr;
490 			fldtab[i]->tval = FLD | STR | DONTFREE;
491 			while (*r != sep && *r != rtest && *r != '\0')	/* \n is always a separator */
492 				*fr++ = *r++;
493 			*fr++ = 0;
494 			if (*r++ == 0)
495 				break;
496 		}
497 		*fr = 0;
498 	}
499 	if (i > nfields)
500 		FATAL("record `%.30s...' has too many fields; can't happen", r);
501 	cleanfld(i+1, lastfld);	/* clean out junk from previous record */
502 	lastfld = i;
503 	donefld = true;
504 	for (j = 1; j <= lastfld; j++) {
505 		double result;
506 
507 		p = fldtab[j];
508 		if(is_number(p->sval, & result)) {
509 			p->fval = result;
510 			p->tval |= NUM;
511 		}
512 	}
513 	setfval(nfloc, (Awkfloat) lastfld);
514 	donerec = true; /* restore */
515 	if (dbg) {
516 		for (j = 0; j <= lastfld; j++) {
517 			p = fldtab[j];
518 			printf("field %d (%s): |%s|\n", j, p->nval, p->sval);
519 		}
520 	}
521 }
522 
cleanfld(int n1,int n2)523 void cleanfld(int n1, int n2)	/* clean out fields n1 .. n2 inclusive */
524 {				/* nvals remain intact */
525 	Cell *p;
526 	int i;
527 
528 	for (i = n1; i <= n2; i++) {
529 		p = fldtab[i];
530 		if (freeable(p))
531 			xfree(p->sval);
532 		p->sval = EMPTY,
533 		p->tval = FLD | STR | DONTFREE;
534 	}
535 }
536 
newfld(int n)537 void newfld(int n)	/* add field n after end of existing lastfld */
538 {
539 	if (n > nfields)
540 		growfldtab(n);
541 	cleanfld(lastfld+1, n);
542 	lastfld = n;
543 	setfval(nfloc, (Awkfloat) n);
544 }
545 
setlastfld(int n)546 void setlastfld(int n)	/* set lastfld cleaning fldtab cells if necessary */
547 {
548 	if (n < 0)
549 		FATAL("cannot set NF to a negative value");
550 	if (n > nfields)
551 		growfldtab(n);
552 
553 	if (lastfld < n)
554 	    cleanfld(lastfld+1, n);
555 	else
556 	    cleanfld(n+1, lastfld);
557 
558 	lastfld = n;
559 }
560 
fieldadr(int n)561 Cell *fieldadr(int n)	/* get nth field */
562 {
563 	if (n < 0)
564 		FATAL("trying to access out of range field %d", n);
565 	if (n > nfields)	/* fields after NF are empty */
566 		growfldtab(n);	/* but does not increase NF */
567 	return(fldtab[n]);
568 }
569 
growfldtab(int n)570 void growfldtab(int n)	/* make new fields up to at least $n */
571 {
572 	int nf = 2 * nfields;
573 	size_t s;
574 
575 	if (n > nf)
576 		nf = n;
577 	s = (nf+1) * (sizeof (struct Cell *));  /* freebsd: how much do we need? */
578 	if (s / sizeof(struct Cell *) - 1 == (size_t)nf) /* didn't overflow */
579 		fldtab = (Cell **) realloc(fldtab, s);
580 	else					/* overflow sizeof int */
581 		xfree(fldtab);	/* make it null */
582 	if (fldtab == NULL)
583 		FATAL("out of space creating %d fields", nf);
584 	makefields(nfields+1, nf);
585 	nfields = nf;
586 }
587 
refldbld(const char * rec,const char * fs)588 int refldbld(const char *rec, const char *fs)	/* build fields from reg expr in FS */
589 {
590 	/* this relies on having fields[] the same length as $0 */
591 	/* the fields are all stored in this one array with \0's */
592 	char *fr;
593 	int i, tempstat, n;
594 	fa *pfa;
595 
596 	n = strlen(rec);
597 	if (n > fieldssize) {
598 		xfree(fields);
599 		if ((fields = (char *) malloc(n+1)) == NULL)
600 			FATAL("out of space for fields in refldbld %d", n);
601 		fieldssize = n;
602 	}
603 	fr = fields;
604 	*fr = '\0';
605 	if (*rec == '\0')
606 		return 0;
607 	pfa = makedfa(fs, 1);
608 	DPRINTF("into refldbld, rec = <%s>, pat = <%s>\n", rec, fs);
609 	tempstat = pfa->initstat;
610 	for (i = 1; ; i++) {
611 		if (i > nfields)
612 			growfldtab(i);
613 		if (freeable(fldtab[i]))
614 			xfree(fldtab[i]->sval);
615 		fldtab[i]->tval = FLD | STR | DONTFREE;
616 		fldtab[i]->sval = fr;
617 		DPRINTF("refldbld: i=%d\n", i);
618 		if (nematch(pfa, rec)) {
619 			pfa->initstat = 2;	/* horrible coupling to b.c */
620 			DPRINTF("match %s (%d chars)\n", patbeg, patlen);
621 			strncpy(fr, rec, patbeg-rec);
622 			fr += patbeg - rec + 1;
623 			*(fr-1) = '\0';
624 			rec = patbeg + patlen;
625 		} else {
626 			DPRINTF("no match %s\n", rec);
627 			strcpy(fr, rec);
628 			pfa->initstat = tempstat;
629 			break;
630 		}
631 	}
632 	return i;
633 }
634 
recbld(void)635 void recbld(void)	/* create $0 from $1..$NF if necessary */
636 {
637 	int i;
638 	char *r, *p;
639 	char *sep = getsval(ofsloc);
640 
641 	if (donerec)
642 		return;
643 	r = record;
644 	for (i = 1; i <= *NF; i++) {
645 		p = getsval(fldtab[i]);
646 		if (!adjbuf(&record, &recsize, 1+strlen(p)+r-record, recsize, &r, "recbld 1"))
647 			FATAL("created $0 `%.30s...' too long", record);
648 		while ((*r = *p++) != 0)
649 			r++;
650 		if (i < *NF) {
651 			if (!adjbuf(&record, &recsize, 2+strlen(sep)+r-record, recsize, &r, "recbld 2"))
652 				FATAL("created $0 `%.30s...' too long", record);
653 			for (p = sep; (*r = *p++) != 0; )
654 				r++;
655 		}
656 	}
657 	if (!adjbuf(&record, &recsize, 2+r-record, recsize, &r, "recbld 3"))
658 		FATAL("built giant record `%.30s...'", record);
659 	*r = '\0';
660 	DPRINTF("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, (void*)fldtab[0]);
661 
662 	if (freeable(fldtab[0]))
663 		xfree(fldtab[0]->sval);
664 	fldtab[0]->tval = REC | STR | DONTFREE;
665 	fldtab[0]->sval = record;
666 
667 	DPRINTF("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, (void*)fldtab[0]);
668 	DPRINTF("recbld = |%s|\n", record);
669 	donerec = true;
670 }
671 
672 int	errorflag	= 0;
673 
yyerror(const char * s)674 void yyerror(const char *s)
675 {
676 	SYNTAX("%s", s);
677 }
678 
SYNTAX(const char * fmt,...)679 void SYNTAX(const char *fmt, ...)
680 {
681 	extern char *cmdname, *curfname;
682 	static int been_here = 0;
683 	va_list varg;
684 
685 	if (been_here++ > 2)
686 		return;
687 	fprintf(stderr, "%s: ", cmdname);
688 	va_start(varg, fmt);
689 	vfprintf(stderr, fmt, varg);
690 	va_end(varg);
691 	fprintf(stderr, " at source line %d", lineno);
692 	if (curfname != NULL)
693 		fprintf(stderr, " in function %s", curfname);
694 	if (compile_time == COMPILING && cursource() != NULL)
695 		fprintf(stderr, " source file %s", cursource());
696 	fprintf(stderr, "\n");
697 	errorflag = 2;
698 	eprint();
699 }
700 
701 extern int bracecnt, brackcnt, parencnt;
702 
bracecheck(void)703 void bracecheck(void)
704 {
705 	int c;
706 	static int beenhere = 0;
707 
708 	if (beenhere++)
709 		return;
710 	while ((c = input()) != EOF && c != '\0')
711 		bclass(c);
712 	bcheck2(bracecnt, '{', '}');
713 	bcheck2(brackcnt, '[', ']');
714 	bcheck2(parencnt, '(', ')');
715 }
716 
bcheck2(int n,int c1,int c2)717 void bcheck2(int n, int c1, int c2)
718 {
719 	if (n == 1)
720 		fprintf(stderr, "\tmissing %c\n", c2);
721 	else if (n > 1)
722 		fprintf(stderr, "\t%d missing %c's\n", n, c2);
723 	else if (n == -1)
724 		fprintf(stderr, "\textra %c\n", c2);
725 	else if (n < -1)
726 		fprintf(stderr, "\t%d extra %c's\n", -n, c2);
727 }
728 
FATAL(const char * fmt,...)729 void FATAL(const char *fmt, ...)
730 {
731 	extern char *cmdname;
732 	va_list varg;
733 
734 	fflush(stdout);
735 	fprintf(stderr, "%s: ", cmdname);
736 	va_start(varg, fmt);
737 	vfprintf(stderr, fmt, varg);
738 	va_end(varg);
739 	error();
740 	if (dbg > 1)		/* core dump if serious debugging on */
741 		abort();
742 	exit(2);
743 }
744 
WARNING(const char * fmt,...)745 void WARNING(const char *fmt, ...)
746 {
747 	extern char *cmdname;
748 	va_list varg;
749 
750 	fflush(stdout);
751 	fprintf(stderr, "%s: ", cmdname);
752 	va_start(varg, fmt);
753 	vfprintf(stderr, fmt, varg);
754 	va_end(varg);
755 	error();
756 }
757 
error()758 void error()
759 {
760 	extern Node *curnode;
761 
762 	fprintf(stderr, "\n");
763 	if (compile_time != ERROR_PRINTING) {
764 		if (NR && *NR > 0) {
765 			fprintf(stderr, " input record number %d", (int) (*FNR));
766 			if (strcmp(*FILENAME, "-") != 0)
767 				fprintf(stderr, ", file %s", *FILENAME);
768 			fprintf(stderr, "\n");
769 		}
770 		if (curnode)
771 			fprintf(stderr, " source line number %d", curnode->lineno);
772 		else if (lineno)
773 			fprintf(stderr, " source line number %d", lineno);
774 		if (compile_time == COMPILING && cursource() != NULL)
775 			fprintf(stderr, " source file %s", cursource());
776 		fprintf(stderr, "\n");
777 		eprint();
778 	}
779 }
780 
eprint(void)781 void eprint(void)	/* try to print context around error */
782 {
783 	char *p, *q;
784 	int c;
785 	static int been_here = 0;
786 	extern char ebuf[], *ep;
787 
788 	if (compile_time != COMPILING || been_here++ > 0 || ebuf == ep)
789 		return;
790 	if (ebuf == ep)
791 		return;
792 	p = ep - 1;
793 	if (p > ebuf && *p == '\n')
794 		p--;
795 	for ( ; p > ebuf && *p != '\n' && *p != '\0'; p--)
796 		;
797 	while (*p == '\n')
798 		p++;
799 	fprintf(stderr, " context is\n\t");
800 	for (q=ep-1; q>=p && *q!=' ' && *q!='\t' && *q!='\n'; q--)
801 		;
802 	for ( ; p < q; p++)
803 		if (*p)
804 			putc(*p, stderr);
805 	fprintf(stderr, " >>> ");
806 	for ( ; p < ep; p++)
807 		if (*p)
808 			putc(*p, stderr);
809 	fprintf(stderr, " <<< ");
810 	if (*ep)
811 		while ((c = input()) != '\n' && c != '\0' && c != EOF) {
812 			putc(c, stderr);
813 			bclass(c);
814 		}
815 	putc('\n', stderr);
816 	ep = ebuf;
817 }
818 
bclass(int c)819 void bclass(int c)
820 {
821 	switch (c) {
822 	case '{': bracecnt++; break;
823 	case '}': bracecnt--; break;
824 	case '[': brackcnt++; break;
825 	case ']': brackcnt--; break;
826 	case '(': parencnt++; break;
827 	case ')': parencnt--; break;
828 	}
829 }
830 
errcheck(double x,const char * s)831 double errcheck(double x, const char *s)
832 {
833 
834 	if (errno == EDOM) {
835 		errno = 0;
836 		WARNING("%s argument out of domain", s);
837 		x = 1;
838 	} else if (errno == ERANGE) {
839 		errno = 0;
840 		WARNING("%s result out of range", s);
841 		x = 1;
842 	}
843 	return x;
844 }
845 
isclvar(const char * s)846 int isclvar(const char *s)	/* is s of form var=something ? */
847 {
848 	const char *os = s;
849 
850 	if (!isalpha((int) *s) && *s != '_')
851 		return 0;
852 	for ( ; *s; s++)
853 		if (!(isalnum((int) *s) || *s == '_'))
854 			break;
855 	return *s == '=' && s > os;
856 }
857 
858 /* strtod is supposed to be a proper test of what's a valid number */
859 /* appears to be broken in gcc on linux: thinks 0x123 is a valid FP number */
860 /* wrong: violates 4.10.1.4 of ansi C standard */
861 
862 /* well, not quite. As of C99, hex floating point is allowed. so this is
863  * a bit of a mess. We work around the mess by checking for a hexadecimal
864  * value and disallowing it. Similarly, we now follow gawk and allow only
865  * +nan, -nan, +inf, and -inf for NaN and infinity values.
866  */
867 
868 /*
869  * This routine now has a more complicated interface, the main point
870  * being to avoid the double conversion of a string to double, and
871  * also to convey out, if requested, the information that the numeric
872  * value was a leading string or is all of the string. The latter bit
873  * is used in getfval().
874  */
875 
is_valid_number(const char * s,bool trailing_stuff_ok,bool * no_trailing,double * result)876 bool is_valid_number(const char *s, bool trailing_stuff_ok,
877 			bool *no_trailing, double *result)
878 {
879 	double r;
880 	char *ep;
881 	bool retval = false;
882 	bool is_nan = false;
883 	bool is_inf = false;
884 
885 	if (no_trailing)
886 		*no_trailing = false;
887 
888 	while (isspace((int) *s))
889 		s++;
890 
891 	/* no hex floating point, sorry */
892 	if (s[0] == '0' && tolower(s[1]) == 'x')
893 		return false;
894 
895 	/* allow +nan, -nan, +inf, -inf, any other letter, no */
896 	if (s[0] == '+' || s[0] == '-') {
897 		is_nan = (strncasecmp(s+1, "nan", 3) == 0);
898 		is_inf = (strncasecmp(s+1, "inf", 3) == 0);
899 		if ((is_nan || is_inf)
900 		    && (isspace((int) s[4]) || s[4] == '\0'))
901 			goto convert;
902 		else if (! isdigit(s[1]) && s[1] != '.')
903 			return false;
904 	}
905 	else if (! isdigit(s[0]) && s[0] != '.')
906 		return false;
907 
908 convert:
909 	errno = 0;
910 	r = strtod(s, &ep);
911 	if (ep == s || errno == ERANGE)
912 		return false;
913 
914 	if (isnan(r) && s[0] == '-' && signbit(r) == 0)
915 		r = -r;
916 
917 	if (result != NULL)
918 		*result = r;
919 
920 	/*
921 	 * check for trailing stuff
922 	 */
923 	while (isspace((int) *ep))
924 		ep++;
925 
926 	if (no_trailing != NULL)
927 		*no_trailing = (*ep == '\0');
928 
929         /* return true if found the end, or trailing stuff is allowed */
930 	retval = *ep == '\0' || trailing_stuff_ok;
931 
932 	return retval;
933 }
934