xref: /freebsd/contrib/one-true-awk/tran.c (revision e8d8bef961a50d4dc22501cde4fb9fb0be1b2532)
1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
4 
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
14 
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
24 
25 #define	DEBUG
26 #include <stdio.h>
27 #include <math.h>
28 #include <ctype.h>
29 #include <string.h>
30 #include <stdlib.h>
31 #include "awk.h"
32 #include "ytab.h"
33 
34 #define	FULLTAB	2	/* rehash when table gets this x full */
35 #define	GROWTAB 4	/* grow table by this factor */
36 
37 Array	*symtab;	/* main symbol table */
38 
39 char	**FS;		/* initial field sep */
40 char	**RS;		/* initial record sep */
41 char	**OFS;		/* output field sep */
42 char	**ORS;		/* output record sep */
43 char	**OFMT;		/* output format for numbers */
44 char	**CONVFMT;	/* format for conversions in getsval */
45 Awkfloat *NF;		/* number of fields in current record */
46 Awkfloat *NR;		/* number of current record */
47 Awkfloat *FNR;		/* number of current record in current file */
48 char	**FILENAME;	/* current filename argument */
49 Awkfloat *ARGC;		/* number of arguments from command line */
50 char	**SUBSEP;	/* subscript separator for a[i,j,k]; default \034 */
51 Awkfloat *RSTART;	/* start of re matched with ~; origin 1 (!) */
52 Awkfloat *RLENGTH;	/* length of same */
53 
54 Cell	*fsloc;		/* FS */
55 Cell	*nrloc;		/* NR */
56 Cell	*nfloc;		/* NF */
57 Cell	*fnrloc;	/* FNR */
58 Cell	*ofsloc;	/* OFS */
59 Cell	*orsloc;	/* ORS */
60 Cell	*rsloc;		/* RS */
61 Array	*ARGVtab;	/* symbol table containing ARGV[...] */
62 Array	*ENVtab;	/* symbol table containing ENVIRON[...] */
63 Cell	*rstartloc;	/* RSTART */
64 Cell	*rlengthloc;	/* RLENGTH */
65 Cell	*subseploc;	/* SUBSEP */
66 Cell	*symtabloc;	/* SYMTAB */
67 
68 Cell	*nullloc;	/* a guaranteed empty cell */
69 Node	*nullnode;	/* zero&null, converted into a node for comparisons */
70 Cell	*literal0;
71 
72 extern Cell **fldtab;
73 
74 static void
75 setfree(Cell *vp)
76 {
77 	if (&vp->sval == FS || &vp->sval == RS ||
78 	    &vp->sval == OFS || &vp->sval == ORS ||
79 	    &vp->sval == OFMT || &vp->sval == CONVFMT ||
80 	    &vp->sval == FILENAME || &vp->sval == SUBSEP)
81 		vp->tval |= DONTFREE;
82 	else
83 		vp->tval &= ~DONTFREE;
84 }
85 
86 void syminit(void)	/* initialize symbol table with builtin vars */
87 {
88 	literal0 = setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab);
89 	/* this is used for if(x)... tests: */
90 	nullloc = setsymtab("$zero&null", "", 0.0, NUM|STR|CON|DONTFREE, symtab);
91 	nullnode = celltonode(nullloc, CCON);
92 
93 	fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab);
94 	FS = &fsloc->sval;
95 	rsloc = setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab);
96 	RS = &rsloc->sval;
97 	ofsloc = setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab);
98 	OFS = &ofsloc->sval;
99 	orsloc = setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab);
100 	ORS = &orsloc->sval;
101 	OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
102 	CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
103 	FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval;
104 	nfloc = setsymtab("NF", "", 0.0, NUM, symtab);
105 	NF = &nfloc->fval;
106 	nrloc = setsymtab("NR", "", 0.0, NUM, symtab);
107 	NR = &nrloc->fval;
108 	fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);
109 	FNR = &fnrloc->fval;
110 	subseploc = setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab);
111 	SUBSEP = &subseploc->sval;
112 	rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);
113 	RSTART = &rstartloc->fval;
114 	rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
115 	RLENGTH = &rlengthloc->fval;
116 	symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab);
117 	symtabloc->sval = (char *) symtab;
118 }
119 
120 void arginit(int ac, char **av)	/* set up ARGV and ARGC */
121 {
122 	Cell *cp;
123 	int i;
124 	char temp[50];
125 
126 	ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
127 	cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
128 	ARGVtab = makesymtab(NSYMTAB);	/* could be (int) ARGC as well */
129 	cp->sval = (char *) ARGVtab;
130 	for (i = 0; i < ac; i++) {
131 		sprintf(temp, "%d", i);
132 		if (is_number(*av))
133 			setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab);
134 		else
135 			setsymtab(temp, *av, 0.0, STR, ARGVtab);
136 		av++;
137 	}
138 }
139 
140 void envinit(char **envp)	/* set up ENVIRON variable */
141 {
142 	Cell *cp;
143 	char *p;
144 
145 	cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab);
146 	ENVtab = makesymtab(NSYMTAB);
147 	cp->sval = (char *) ENVtab;
148 	for ( ; *envp; envp++) {
149 		if ((p = strchr(*envp, '=')) == NULL)
150 			continue;
151 		if( p == *envp ) /* no left hand side name in env string */
152 			continue;
153 		*p++ = 0;	/* split into two strings at = */
154 		if (is_number(p))
155 			setsymtab(*envp, p, atof(p), STR|NUM, ENVtab);
156 		else
157 			setsymtab(*envp, p, 0.0, STR, ENVtab);
158 		p[-1] = '=';	/* restore in case env is passed down to a shell */
159 	}
160 }
161 
162 Array *makesymtab(int n)	/* make a new symbol table */
163 {
164 	Array *ap;
165 	Cell **tp;
166 
167 	ap = (Array *) malloc(sizeof(Array));
168 	tp = (Cell **) calloc(n, sizeof(Cell *));
169 	if (ap == NULL || tp == NULL)
170 		FATAL("out of space in makesymtab");
171 	ap->nelem = 0;
172 	ap->size = n;
173 	ap->tab = tp;
174 	return(ap);
175 }
176 
177 void freesymtab(Cell *ap)	/* free a symbol table */
178 {
179 	Cell *cp, *temp;
180 	Array *tp;
181 	int i;
182 
183 	if (!isarr(ap))
184 		return;
185 	tp = (Array *) ap->sval;
186 	if (tp == NULL)
187 		return;
188 	for (i = 0; i < tp->size; i++) {
189 		for (cp = tp->tab[i]; cp != NULL; cp = temp) {
190 			xfree(cp->nval);
191 			if (freeable(cp))
192 				xfree(cp->sval);
193 			temp = cp->cnext;	/* avoids freeing then using */
194 			free(cp);
195 			tp->nelem--;
196 		}
197 		tp->tab[i] = NULL;
198 	}
199 	if (tp->nelem != 0)
200 		WARNING("can't happen: inconsistent element count freeing %s", ap->nval);
201 	free(tp->tab);
202 	free(tp);
203 }
204 
205 void freeelem(Cell *ap, const char *s)	/* free elem s from ap (i.e., ap["s"] */
206 {
207 	Array *tp;
208 	Cell *p, *prev = NULL;
209 	int h;
210 
211 	tp = (Array *) ap->sval;
212 	h = hash(s, tp->size);
213 	for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
214 		if (strcmp(s, p->nval) == 0) {
215 			if (prev == NULL)	/* 1st one */
216 				tp->tab[h] = p->cnext;
217 			else			/* middle somewhere */
218 				prev->cnext = p->cnext;
219 			if (freeable(p))
220 				xfree(p->sval);
221 			free(p->nval);
222 			free(p);
223 			tp->nelem--;
224 			return;
225 		}
226 }
227 
228 Cell *setsymtab(const char *n, const char *s, Awkfloat f, unsigned t, Array *tp)
229 {
230 	int h;
231 	Cell *p;
232 
233 	if (n != NULL && (p = lookup(n, tp)) != NULL) {
234 		   dprintf( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
235 			(void*)p, NN(p->nval), NN(p->sval), p->fval, p->tval) );
236 		return(p);
237 	}
238 	p = (Cell *) malloc(sizeof(Cell));
239 	if (p == NULL)
240 		FATAL("out of space for symbol table at %s", n);
241 	p->nval = tostring(n);
242 	p->sval = s ? tostring(s) : tostring("");
243 	p->fval = f;
244 	p->tval = t;
245 	p->csub = CUNK;
246 	p->ctype = OCELL;
247 	tp->nelem++;
248 	if (tp->nelem > FULLTAB * tp->size)
249 		rehash(tp);
250 	h = hash(n, tp->size);
251 	p->cnext = tp->tab[h];
252 	tp->tab[h] = p;
253 	   dprintf( ("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
254 		(void*)p, p->nval, p->sval, p->fval, p->tval) );
255 	return(p);
256 }
257 
258 int hash(const char *s, int n)	/* form hash value for string s */
259 {
260 	unsigned hashval;
261 
262 	for (hashval = 0; *s != '\0'; s++)
263 		hashval = (*s + 31 * hashval);
264 	return hashval % n;
265 }
266 
267 void rehash(Array *tp)	/* rehash items in small table into big one */
268 {
269 	int i, nh, nsz;
270 	Cell *cp, *op, **np;
271 
272 	nsz = GROWTAB * tp->size;
273 	np = (Cell **) calloc(nsz, sizeof(Cell *));
274 	if (np == NULL)		/* can't do it, but can keep running. */
275 		return;		/* someone else will run out later. */
276 	for (i = 0; i < tp->size; i++) {
277 		for (cp = tp->tab[i]; cp; cp = op) {
278 			op = cp->cnext;
279 			nh = hash(cp->nval, nsz);
280 			cp->cnext = np[nh];
281 			np[nh] = cp;
282 		}
283 	}
284 	free(tp->tab);
285 	tp->tab = np;
286 	tp->size = nsz;
287 }
288 
289 Cell *lookup(const char *s, Array *tp)	/* look for s in tp */
290 {
291 	Cell *p;
292 	int h;
293 
294 	h = hash(s, tp->size);
295 	for (p = tp->tab[h]; p != NULL; p = p->cnext)
296 		if (strcmp(s, p->nval) == 0)
297 			return(p);	/* found it */
298 	return(NULL);			/* not found */
299 }
300 
301 Awkfloat setfval(Cell *vp, Awkfloat f)	/* set float val of a Cell */
302 {
303 	int fldno;
304 
305 	f += 0.0;		/* normalise negative zero to positive zero */
306 	if ((vp->tval & (NUM | STR)) == 0)
307 		funnyvar(vp, "assign to");
308 	if (isfld(vp)) {
309 		donerec = 0;	/* mark $0 invalid */
310 		fldno = atoi(vp->nval);
311 		if (fldno > *NF)
312 			newfld(fldno);
313 		   dprintf( ("setting field %d to %g\n", fldno, f) );
314 	} else if (&vp->fval == NF) {
315 		donerec = 0;	/* mark $0 invalid */
316 		setlastfld(f);
317 		dprintf( ("setting NF to %g\n", f) );
318 	} else if (isrec(vp)) {
319 		donefld = 0;	/* mark $1... invalid */
320 		donerec = 1;
321 	} else if (vp == ofsloc) {
322 		if (donerec == 0)
323 			recbld();
324 	}
325 	if (freeable(vp))
326 		xfree(vp->sval); /* free any previous string */
327 	vp->tval &= ~(STR|CONVC|CONVO); /* mark string invalid */
328 	vp->fmt = NULL;
329 	vp->tval |= NUM;	/* mark number ok */
330 	if (f == -0)  /* who would have thought this possible? */
331 		f = 0;
332 	   dprintf( ("setfval %p: %s = %g, t=%o\n", (void*)vp, NN(vp->nval), f, vp->tval) );
333 	return vp->fval = f;
334 }
335 
336 void funnyvar(Cell *vp, const char *rw)
337 {
338 	if (isarr(vp))
339 		FATAL("can't %s %s; it's an array name.", rw, vp->nval);
340 	if (vp->tval & FCN)
341 		FATAL("can't %s %s; it's a function.", rw, vp->nval);
342 	WARNING("funny variable %p: n=%s s=\"%s\" f=%g t=%o",
343 		vp, vp->nval, vp->sval, vp->fval, vp->tval);
344 }
345 
346 char *setsval(Cell *vp, const char *s)	/* set string val of a Cell */
347 {
348 	char *t;
349 	int fldno;
350 	Awkfloat f;
351 
352 	   dprintf( ("starting setsval %p: %s = \"%s\", t=%o, r,f=%d,%d\n",
353 		(void*)vp, NN(vp->nval), s, vp->tval, donerec, donefld) );
354 	if ((vp->tval & (NUM | STR)) == 0)
355 		funnyvar(vp, "assign to");
356 	if (isfld(vp)) {
357 		donerec = 0;	/* mark $0 invalid */
358 		fldno = atoi(vp->nval);
359 		if (fldno > *NF)
360 			newfld(fldno);
361 		   dprintf( ("setting field %d to %s (%p)\n", fldno, s, (void *) s) );
362 	} else if (isrec(vp)) {
363 		donefld = 0;	/* mark $1... invalid */
364 		donerec = 1;
365 	} else if (vp == ofsloc) {
366 		if (donerec == 0)
367 			recbld();
368 	}
369 	t = s ? tostring(s) : tostring("");	/* in case it's self-assign */
370 	if (freeable(vp))
371 		xfree(vp->sval);
372 	vp->tval &= ~(NUM|CONVC|CONVO);
373 	vp->tval |= STR;
374 	vp->fmt = NULL;
375 	setfree(vp);
376 	   dprintf( ("setsval %p: %s = \"%s (%p) \", t=%o r,f=%d,%d\n",
377 		(void*)vp, NN(vp->nval), t, (void *) t, vp->tval, donerec, donefld) );
378 	vp->sval = t;
379 	if (&vp->fval == NF) {
380 		donerec = 0;	/* mark $0 invalid */
381 		f = getfval(vp);
382 		setlastfld(f);
383 		dprintf( ("setting NF to %g\n", f) );
384 	}
385 
386 	return(vp->sval);
387 }
388 
389 Awkfloat getfval(Cell *vp)	/* get float val of a Cell */
390 {
391 	if ((vp->tval & (NUM | STR)) == 0)
392 		funnyvar(vp, "read value of");
393 	if (isfld(vp) && donefld == 0)
394 		fldbld();
395 	else if (isrec(vp) && donerec == 0)
396 		recbld();
397 	if (!isnum(vp)) {	/* not a number */
398 		vp->fval = atof(vp->sval);	/* best guess */
399 		if (is_number(vp->sval) && !(vp->tval&CON))
400 			vp->tval |= NUM;	/* make NUM only sparingly */
401 	}
402 	   dprintf( ("getfval %p: %s = %g, t=%o\n",
403 		(void*)vp, NN(vp->nval), vp->fval, vp->tval) );
404 	return(vp->fval);
405 }
406 
407 static char *get_str_val(Cell *vp, char **fmt)        /* get string val of a Cell */
408 {
409 	char s[256];
410 	double dtemp;
411 
412 	if ((vp->tval & (NUM | STR)) == 0)
413 		funnyvar(vp, "read value of");
414 	if (isfld(vp) && donefld == 0)
415 		fldbld();
416 	else if (isrec(vp) && donerec == 0)
417 		recbld();
418 
419 	/*
420 	 * ADR: This is complicated and more fragile than is desirable.
421 	 * Retrieving a string value for a number associates the string
422 	 * value with the scalar.  Previously, the string value was
423 	 * sticky, meaning if converted via OFMT that became the value
424 	 * (even though POSIX wants it to be via CONVFMT). Or if CONVFMT
425 	 * changed after a string value was retrieved, the original value
426 	 * was maintained and used.  Also not per POSIX.
427 	 *
428 	 * We work around this design by adding two additional flags,
429 	 * CONVC and CONVO, indicating how the string value was
430 	 * obtained (via CONVFMT or OFMT) and _also_ maintaining a copy
431 	 * of the pointer to the xFMT format string used for the
432 	 * conversion.  This pointer is only read, **never** dereferenced.
433 	 * The next time we do a conversion, if it's coming from the same
434 	 * xFMT as last time, and the pointer value is different, we
435 	 * know that the xFMT format string changed, and we need to
436 	 * redo the conversion. If it's the same, we don't have to.
437 	 *
438 	 * There are also several cases where we don't do a conversion,
439 	 * such as for a field (see the checks below).
440 	 */
441 
442 	/* Don't duplicate the code for actually updating the value */
443 #define update_str_val(vp) \
444 	{ \
445 		if (freeable(vp)) \
446 			xfree(vp->sval); \
447 		if (modf(vp->fval, &dtemp) == 0)	/* it's integral */ \
448 			snprintf(s, sizeof (s), "%.30g", vp->fval); \
449 		else \
450 			snprintf(s, sizeof (s), *fmt, vp->fval); \
451 		vp->sval = tostring(s); \
452 		vp->tval &= ~DONTFREE; \
453 		vp->tval |= STR; \
454 	}
455 
456 	if (isstr(vp) == 0) {
457 		update_str_val(vp);
458 		if (fmt == OFMT) {
459 			vp->tval &= ~CONVC;
460 			vp->tval |= CONVO;
461 		} else {
462 			/* CONVFMT */
463 			vp->tval &= ~CONVO;
464 			vp->tval |= CONVC;
465 		}
466 		vp->fmt = *fmt;
467 	} else if ((vp->tval & DONTFREE) != 0 || ! isnum(vp) || isfld(vp)) {
468 		goto done;
469 	} else if (isstr(vp)) {
470 		if (fmt == OFMT) {
471 			if ((vp->tval & CONVC) != 0
472 			    || ((vp->tval & CONVO) != 0 && vp->fmt != *fmt)) {
473 				update_str_val(vp);
474 				vp->tval &= ~CONVC;
475 				vp->tval |= CONVO;
476 				vp->fmt = *fmt;
477 			}
478 		} else {
479 			/* CONVFMT */
480 			if ((vp->tval & CONVO) != 0
481 			    || ((vp->tval & CONVC) != 0 && vp->fmt != *fmt)) {
482 				update_str_val(vp);
483 				vp->tval &= ~CONVO;
484 				vp->tval |= CONVC;
485 				vp->fmt = *fmt;
486 			}
487 		}
488 	}
489 done:
490 	   dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n",
491 		(void*)vp, NN(vp->nval), vp->sval, (void *) vp->sval, vp->tval) );
492 	return(vp->sval);
493 }
494 
495 char *getsval(Cell *vp)       /* get string val of a Cell */
496 {
497       return get_str_val(vp, CONVFMT);
498 }
499 
500 char *getpssval(Cell *vp)     /* get string val of a Cell for print */
501 {
502       return get_str_val(vp, OFMT);
503 }
504 
505 
506 char *tostring(const char *s)	/* make a copy of string s */
507 {
508 	char *p;
509 
510 	p = (char *) malloc(strlen(s)+1);
511 	if (p == NULL)
512 		FATAL("out of space in tostring on %s", s);
513 	strcpy(p, s);
514 	return(p);
515 }
516 
517 char *qstring(const char *is, int delim)	/* collect string up to next delim */
518 {
519 	const char *os = is;
520 	int c, n;
521 	uschar *s = (uschar *) is;
522 	uschar *buf, *bp;
523 
524 	if ((buf = (uschar *) malloc(strlen(is)+3)) == NULL)
525 		FATAL( "out of space in qstring(%s)", s);
526 	for (bp = buf; (c = *s) != delim; s++) {
527 		if (c == '\n')
528 			SYNTAX( "newline in string %.20s...", os );
529 		else if (c != '\\')
530 			*bp++ = c;
531 		else {	/* \something */
532 			c = *++s;
533 			if (c == 0) {	/* \ at end */
534 				*bp++ = '\\';
535 				break;	/* for loop */
536 			}
537 			switch (c) {
538 			case '\\':	*bp++ = '\\'; break;
539 			case 'n':	*bp++ = '\n'; break;
540 			case 't':	*bp++ = '\t'; break;
541 			case 'b':	*bp++ = '\b'; break;
542 			case 'f':	*bp++ = '\f'; break;
543 			case 'r':	*bp++ = '\r'; break;
544 			default:
545 				if (!isdigit(c)) {
546 					*bp++ = c;
547 					break;
548 				}
549 				n = c - '0';
550 				if (isdigit(s[1])) {
551 					n = 8 * n + *++s - '0';
552 					if (isdigit(s[1]))
553 						n = 8 * n + *++s - '0';
554 				}
555 				*bp++ = n;
556 				break;
557 			}
558 		}
559 	}
560 	*bp++ = 0;
561 	return (char *) buf;
562 }
563 
564 const char *flags2str(int flags)
565 {
566 	static const struct ftab {
567 		const char *name;
568 		int value;
569 	} flagtab[] = {
570 		{ "NUM", NUM },
571 		{ "STR", STR },
572 		{ "DONTFREE", DONTFREE },
573 		{ "CON", CON },
574 		{ "ARR", ARR },
575 		{ "FCN", FCN },
576 		{ "FLD", FLD },
577 		{ "REC", REC },
578 		{ "CONVC", CONVC },
579 		{ "CONVO", CONVO },
580 		{ NULL, 0 }
581 	};
582 	static char buf[100];
583 	int i;
584 	char *cp = buf;
585 
586 	for (i = 0; flagtab[i].name != NULL; i++) {
587 		if ((flags & flagtab[i].value) != 0) {
588 			if (cp > buf)
589 				*cp++ = '|';
590 			strcpy(cp, flagtab[i].name);
591 			cp += strlen(cp);
592 		}
593 	}
594 
595 	return buf;
596 }
597