xref: /freebsd/contrib/one-true-awk/tran.c (revision 2b743a9e9ddc6736208dc8ca1ce06ce64ad20a19)
1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
4 
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
14 
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
24 
25 #define	DEBUG
26 #include <stdio.h>
27 #include <math.h>
28 #include <ctype.h>
29 #include <string.h>
30 #include <stdlib.h>
31 #include "awk.h"
32 #include "ytab.h"
33 
34 #define	FULLTAB	2	/* rehash when table gets this x full */
35 #define	GROWTAB 4	/* grow table by this factor */
36 
37 Array	*symtab;	/* main symbol table */
38 
39 char	**FS;		/* initial field sep */
40 char	**RS;		/* initial record sep */
41 char	**OFS;		/* output field sep */
42 char	**ORS;		/* output record sep */
43 char	**OFMT;		/* output format for numbers */
44 char	**CONVFMT;	/* format for conversions in getsval */
45 Awkfloat *NF;		/* number of fields in current record */
46 Awkfloat *NR;		/* number of current record */
47 Awkfloat *FNR;		/* number of current record in current file */
48 char	**FILENAME;	/* current filename argument */
49 Awkfloat *ARGC;		/* number of arguments from command line */
50 char	**SUBSEP;	/* subscript separator for a[i,j,k]; default \034 */
51 Awkfloat *RSTART;	/* start of re matched with ~; origin 1 (!) */
52 Awkfloat *RLENGTH;	/* length of same */
53 
54 Cell	*fsloc;		/* FS */
55 Cell	*nrloc;		/* NR */
56 Cell	*nfloc;		/* NF */
57 Cell	*fnrloc;	/* FNR */
58 Array	*ARGVtab;	/* symbol table containing ARGV[...] */
59 Array	*ENVtab;	/* symbol table containing ENVIRON[...] */
60 Cell	*rstartloc;	/* RSTART */
61 Cell	*rlengthloc;	/* RLENGTH */
62 Cell	*symtabloc;	/* SYMTAB */
63 
64 Cell	*nullloc;	/* a guaranteed empty cell */
65 Node	*nullnode;	/* zero&null, converted into a node for comparisons */
66 Cell	*literal0;
67 
68 extern Cell **fldtab;
69 
70 void syminit(void)	/* initialize symbol table with builtin vars */
71 {
72 	literal0 = setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab);
73 	/* this is used for if(x)... tests: */
74 	nullloc = setsymtab("$zero&null", "", 0.0, NUM|STR|CON|DONTFREE, symtab);
75 	nullnode = celltonode(nullloc, CCON);
76 
77 	fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab);
78 	FS = &fsloc->sval;
79 	RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
80 	OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval;
81 	ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
82 	OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
83 	CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
84 	FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval;
85 	nfloc = setsymtab("NF", "", 0.0, NUM, symtab);
86 	NF = &nfloc->fval;
87 	nrloc = setsymtab("NR", "", 0.0, NUM, symtab);
88 	NR = &nrloc->fval;
89 	fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);
90 	FNR = &fnrloc->fval;
91 	SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval;
92 	rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);
93 	RSTART = &rstartloc->fval;
94 	rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
95 	RLENGTH = &rlengthloc->fval;
96 	symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab);
97 	symtabloc->sval = (char *) symtab;
98 }
99 
100 void arginit(int ac, char **av)	/* set up ARGV and ARGC */
101 {
102 	Cell *cp;
103 	int i;
104 	char temp[50];
105 
106 	ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
107 	cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
108 	ARGVtab = makesymtab(NSYMTAB);	/* could be (int) ARGC as well */
109 	cp->sval = (char *) ARGVtab;
110 	for (i = 0; i < ac; i++) {
111 		sprintf(temp, "%d", i);
112 		if (is_number(*av))
113 			setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab);
114 		else
115 			setsymtab(temp, *av, 0.0, STR, ARGVtab);
116 		av++;
117 	}
118 }
119 
120 void envinit(char **envp)	/* set up ENVIRON variable */
121 {
122 	Cell *cp;
123 	char *p;
124 
125 	cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab);
126 	ENVtab = makesymtab(NSYMTAB);
127 	cp->sval = (char *) ENVtab;
128 	for ( ; *envp; envp++) {
129 		if ((p = strchr(*envp, '=')) == NULL)
130 			continue;
131 		if( p == *envp ) /* no left hand side name in env string */
132 			continue;
133 		*p++ = 0;	/* split into two strings at = */
134 		if (is_number(p))
135 			setsymtab(*envp, p, atof(p), STR|NUM, ENVtab);
136 		else
137 			setsymtab(*envp, p, 0.0, STR, ENVtab);
138 		p[-1] = '=';	/* restore in case env is passed down to a shell */
139 	}
140 }
141 
142 Array *makesymtab(int n)	/* make a new symbol table */
143 {
144 	Array *ap;
145 	Cell **tp;
146 
147 	ap = (Array *) malloc(sizeof(Array));
148 	tp = (Cell **) calloc(n, sizeof(Cell *));
149 	if (ap == NULL || tp == NULL)
150 		FATAL("out of space in makesymtab");
151 	ap->nelem = 0;
152 	ap->size = n;
153 	ap->tab = tp;
154 	return(ap);
155 }
156 
157 void freesymtab(Cell *ap)	/* free a symbol table */
158 {
159 	Cell *cp, *temp;
160 	Array *tp;
161 	int i;
162 
163 	if (!isarr(ap))
164 		return;
165 	tp = (Array *) ap->sval;
166 	if (tp == NULL)
167 		return;
168 	for (i = 0; i < tp->size; i++) {
169 		for (cp = tp->tab[i]; cp != NULL; cp = temp) {
170 			xfree(cp->nval);
171 			if (freeable(cp))
172 				xfree(cp->sval);
173 			temp = cp->cnext;	/* avoids freeing then using */
174 			free(cp);
175 			tp->nelem--;
176 		}
177 		tp->tab[i] = 0;
178 	}
179 	if (tp->nelem != 0)
180 		WARNING("can't happen: inconsistent element count freeing %s", ap->nval);
181 	free(tp->tab);
182 	free(tp);
183 }
184 
185 void freeelem(Cell *ap, const char *s)	/* free elem s from ap (i.e., ap["s"] */
186 {
187 	Array *tp;
188 	Cell *p, *prev = NULL;
189 	int h;
190 
191 	tp = (Array *) ap->sval;
192 	h = hash(s, tp->size);
193 	for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
194 		if (strcmp(s, p->nval) == 0) {
195 			if (prev == NULL)	/* 1st one */
196 				tp->tab[h] = p->cnext;
197 			else			/* middle somewhere */
198 				prev->cnext = p->cnext;
199 			if (freeable(p))
200 				xfree(p->sval);
201 			free(p->nval);
202 			free(p);
203 			tp->nelem--;
204 			return;
205 		}
206 }
207 
208 Cell *setsymtab(const char *n, const char *s, Awkfloat f, unsigned t, Array *tp)
209 {
210 	int h;
211 	Cell *p;
212 
213 	if (n != NULL && (p = lookup(n, tp)) != NULL) {
214 		   dprintf( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
215 			p, NN(p->nval), NN(p->sval), p->fval, p->tval) );
216 		return(p);
217 	}
218 	p = (Cell *) malloc(sizeof(Cell));
219 	if (p == NULL)
220 		FATAL("out of space for symbol table at %s", n);
221 	p->nval = tostring(n);
222 	p->sval = s ? tostring(s) : tostring("");
223 	p->fval = f;
224 	p->tval = t;
225 	p->csub = CUNK;
226 	p->ctype = OCELL;
227 	tp->nelem++;
228 	if (tp->nelem > FULLTAB * tp->size)
229 		rehash(tp);
230 	h = hash(n, tp->size);
231 	p->cnext = tp->tab[h];
232 	tp->tab[h] = p;
233 	   dprintf( ("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
234 		p, p->nval, p->sval, p->fval, p->tval) );
235 	return(p);
236 }
237 
238 int hash(const char *s, int n)	/* form hash value for string s */
239 {
240 	unsigned hashval;
241 
242 	for (hashval = 0; *s != '\0'; s++)
243 		hashval = (*s + 31 * hashval);
244 	return hashval % n;
245 }
246 
247 void rehash(Array *tp)	/* rehash items in small table into big one */
248 {
249 	int i, nh, nsz;
250 	Cell *cp, *op, **np;
251 
252 	nsz = GROWTAB * tp->size;
253 	np = (Cell **) calloc(nsz, sizeof(Cell *));
254 	if (np == NULL)		/* can't do it, but can keep running. */
255 		return;		/* someone else will run out later. */
256 	for (i = 0; i < tp->size; i++) {
257 		for (cp = tp->tab[i]; cp; cp = op) {
258 			op = cp->cnext;
259 			nh = hash(cp->nval, nsz);
260 			cp->cnext = np[nh];
261 			np[nh] = cp;
262 		}
263 	}
264 	free(tp->tab);
265 	tp->tab = np;
266 	tp->size = nsz;
267 }
268 
269 Cell *lookup(const char *s, Array *tp)	/* look for s in tp */
270 {
271 	Cell *p;
272 	int h;
273 
274 	h = hash(s, tp->size);
275 	for (p = tp->tab[h]; p != NULL; p = p->cnext)
276 		if (strcmp(s, p->nval) == 0)
277 			return(p);	/* found it */
278 	return(NULL);			/* not found */
279 }
280 
281 Awkfloat setfval(Cell *vp, Awkfloat f)	/* set float val of a Cell */
282 {
283 	int fldno;
284 
285 	if ((vp->tval & (NUM | STR)) == 0)
286 		funnyvar(vp, "assign to");
287 	if (isfld(vp)) {
288 		donerec = 0;	/* mark $0 invalid */
289 		fldno = atoi(vp->nval);
290 		if (fldno > *NF)
291 			newfld(fldno);
292 		   dprintf( ("setting field %d to %g\n", fldno, f) );
293 	} else if (isrec(vp)) {
294 		donefld = 0;	/* mark $1... invalid */
295 		donerec = 1;
296 	}
297 	if (freeable(vp))
298 		xfree(vp->sval); /* free any previous string */
299 	vp->tval &= ~STR;	/* mark string invalid */
300 	vp->tval |= NUM;	/* mark number ok */
301 	   dprintf( ("setfval %p: %s = %g, t=%o\n", vp, NN(vp->nval), f, vp->tval) );
302 	return vp->fval = f;
303 }
304 
305 void funnyvar(Cell *vp, const char *rw)
306 {
307 	if (isarr(vp))
308 		FATAL("can't %s %s; it's an array name.", rw, vp->nval);
309 	if (vp->tval & FCN)
310 		FATAL("can't %s %s; it's a function.", rw, vp->nval);
311 	WARNING("funny variable %p: n=%s s=\"%s\" f=%g t=%o",
312 		vp, vp->nval, vp->sval, vp->fval, vp->tval);
313 }
314 
315 char *setsval(Cell *vp, const char *s)	/* set string val of a Cell */
316 {
317 	char *t;
318 	int fldno;
319 
320 	   dprintf( ("starting setsval %p: %s = \"%s\", t=%o, r,f=%d,%d\n",
321 		vp, NN(vp->nval), s, vp->tval, donerec, donefld) );
322 	if ((vp->tval & (NUM | STR)) == 0)
323 		funnyvar(vp, "assign to");
324 	if (isfld(vp)) {
325 		donerec = 0;	/* mark $0 invalid */
326 		fldno = atoi(vp->nval);
327 		if (fldno > *NF)
328 			newfld(fldno);
329 		   dprintf( ("setting field %d to %s (%p)\n", fldno, s, s) );
330 	} else if (isrec(vp)) {
331 		donefld = 0;	/* mark $1... invalid */
332 		donerec = 1;
333 	}
334 	t = tostring(s);	/* in case it's self-assign */
335 	vp->tval &= ~NUM;
336 	vp->tval |= STR;
337 	if (freeable(vp))
338 		xfree(vp->sval);
339 	vp->tval &= ~DONTFREE;
340 	   dprintf( ("setsval %p: %s = \"%s (%p) \", t=%o r,f=%d,%d\n",
341 		vp, NN(vp->nval), t,t, vp->tval, donerec, donefld) );
342 	return(vp->sval = t);
343 }
344 
345 Awkfloat getfval(Cell *vp)	/* get float val of a Cell */
346 {
347 	if ((vp->tval & (NUM | STR)) == 0)
348 		funnyvar(vp, "read value of");
349 	if (isfld(vp) && donefld == 0)
350 		fldbld();
351 	else if (isrec(vp) && donerec == 0)
352 		recbld();
353 	if (!isnum(vp)) {	/* not a number */
354 		vp->fval = atof(vp->sval);	/* best guess */
355 		if (is_number(vp->sval) && !(vp->tval&CON))
356 			vp->tval |= NUM;	/* make NUM only sparingly */
357 	}
358 	   dprintf( ("getfval %p: %s = %g, t=%o\n", vp, NN(vp->nval), vp->fval, vp->tval) );
359 	return(vp->fval);
360 }
361 
362 static char *get_str_val(Cell *vp, char **fmt)        /* get string val of a Cell */
363 {
364 	char s[100];	/* BUG: unchecked */
365 	double dtemp;
366 
367 	if ((vp->tval & (NUM | STR)) == 0)
368 		funnyvar(vp, "read value of");
369 	if (isfld(vp) && donefld == 0)
370 		fldbld();
371 	else if (isrec(vp) && donerec == 0)
372 		recbld();
373 	if (isstr(vp) == 0) {
374 		if (freeable(vp))
375 			xfree(vp->sval);
376 		if (modf(vp->fval, &dtemp) == 0)	/* it's integral */
377 			sprintf(s, "%.30g", vp->fval);
378 		else
379 			sprintf(s, *fmt, vp->fval);
380 		vp->sval = tostring(s);
381 		vp->tval &= ~DONTFREE;
382 		vp->tval |= STR;
383 	}
384 	   dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n", vp, NN(vp->nval), vp->sval, vp->sval, vp->tval) );
385 	return(vp->sval);
386 }
387 
388 char *getsval(Cell *vp)       /* get string val of a Cell */
389 {
390       return get_str_val(vp, CONVFMT);
391 }
392 
393 char *getpssval(Cell *vp)     /* get string val of a Cell for print */
394 {
395       return get_str_val(vp, OFMT);
396 }
397 
398 
399 char *tostring(const char *s)	/* make a copy of string s */
400 {
401 	char *p;
402 
403 	p = (char *) malloc(strlen(s)+1);
404 	if (p == NULL)
405 		FATAL("out of space in tostring on %s", s);
406 	strcpy(p, s);
407 	return(p);
408 }
409 
410 char *qstring(const char *is, int delim)	/* collect string up to next delim */
411 {
412 	const char *os = is;
413 	int c, n;
414 	uschar *s = (uschar *) is;
415 	uschar *buf, *bp;
416 
417 	if ((buf = (uschar *) malloc(strlen(is)+3)) == NULL)
418 		FATAL( "out of space in qstring(%s)", s);
419 	for (bp = buf; (c = *s) != delim; s++) {
420 		if (c == '\n')
421 			SYNTAX( "newline in string %.20s...", os );
422 		else if (c != '\\')
423 			*bp++ = c;
424 		else {	/* \something */
425 			c = *++s;
426 			if (c == 0) {	/* \ at end */
427 				*bp++ = '\\';
428 				break;	/* for loop */
429 			}
430 			switch (c) {
431 			case '\\':	*bp++ = '\\'; break;
432 			case 'n':	*bp++ = '\n'; break;
433 			case 't':	*bp++ = '\t'; break;
434 			case 'b':	*bp++ = '\b'; break;
435 			case 'f':	*bp++ = '\f'; break;
436 			case 'r':	*bp++ = '\r'; break;
437 			default:
438 				if (!isdigit(c)) {
439 					*bp++ = c;
440 					break;
441 				}
442 				n = c - '0';
443 				if (isdigit(s[1])) {
444 					n = 8 * n + *++s - '0';
445 					if (isdigit(s[1]))
446 						n = 8 * n + *++s - '0';
447 				}
448 				*bp++ = n;
449 				break;
450 			}
451 		}
452 	}
453 	*bp++ = 0;
454 	return (char *) buf;
455 }
456