xref: /freebsd/contrib/one-true-awk/tran.c (revision 9a14aa017b21c292740c00ee098195cd46642730)
1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
4 
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
14 
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
24 
25 #define	DEBUG
26 #include <stdio.h>
27 #include <math.h>
28 #include <ctype.h>
29 #include <string.h>
30 #include <stdlib.h>
31 #include "awk.h"
32 #include "ytab.h"
33 
34 #define	FULLTAB	2	/* rehash when table gets this x full */
35 #define	GROWTAB 4	/* grow table by this factor */
36 
37 Array	*symtab;	/* main symbol table */
38 
39 char	**FS;		/* initial field sep */
40 char	**RS;		/* initial record sep */
41 char	**OFS;		/* output field sep */
42 char	**ORS;		/* output record sep */
43 char	**OFMT;		/* output format for numbers */
44 char	**CONVFMT;	/* format for conversions in getsval */
45 Awkfloat *NF;		/* number of fields in current record */
46 Awkfloat *NR;		/* number of current record */
47 Awkfloat *FNR;		/* number of current record in current file */
48 char	**FILENAME;	/* current filename argument */
49 Awkfloat *ARGC;		/* number of arguments from command line */
50 char	**SUBSEP;	/* subscript separator for a[i,j,k]; default \034 */
51 Awkfloat *RSTART;	/* start of re matched with ~; origin 1 (!) */
52 Awkfloat *RLENGTH;	/* length of same */
53 
54 Cell	*fsloc;		/* FS */
55 Cell	*nrloc;		/* NR */
56 Cell	*nfloc;		/* NF */
57 Cell	*fnrloc;	/* FNR */
58 Array	*ARGVtab;	/* symbol table containing ARGV[...] */
59 Array	*ENVtab;	/* symbol table containing ENVIRON[...] */
60 Cell	*rstartloc;	/* RSTART */
61 Cell	*rlengthloc;	/* RLENGTH */
62 Cell	*symtabloc;	/* SYMTAB */
63 
64 Cell	*nullloc;	/* a guaranteed empty cell */
65 Node	*nullnode;	/* zero&null, converted into a node for comparisons */
66 Cell	*literal0;
67 
68 extern Cell **fldtab;
69 
70 void syminit(void)	/* initialize symbol table with builtin vars */
71 {
72 	literal0 = setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab);
73 	/* this is used for if(x)... tests: */
74 	nullloc = setsymtab("$zero&null", "", 0.0, NUM|STR|CON|DONTFREE, symtab);
75 	nullnode = celltonode(nullloc, CCON);
76 
77 	fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab);
78 	FS = &fsloc->sval;
79 	RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
80 	OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval;
81 	ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
82 	OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
83 	CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
84 	FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval;
85 	nfloc = setsymtab("NF", "", 0.0, NUM, symtab);
86 	NF = &nfloc->fval;
87 	nrloc = setsymtab("NR", "", 0.0, NUM, symtab);
88 	NR = &nrloc->fval;
89 	fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);
90 	FNR = &fnrloc->fval;
91 	SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval;
92 	rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);
93 	RSTART = &rstartloc->fval;
94 	rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
95 	RLENGTH = &rlengthloc->fval;
96 	symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab);
97 	symtabloc->sval = (char *) symtab;
98 }
99 
100 void arginit(int ac, char **av)	/* set up ARGV and ARGC */
101 {
102 	Cell *cp;
103 	int i;
104 	char temp[50];
105 
106 	ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
107 	cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
108 	ARGVtab = makesymtab(NSYMTAB);	/* could be (int) ARGC as well */
109 	cp->sval = (char *) ARGVtab;
110 	for (i = 0; i < ac; i++) {
111 		sprintf(temp, "%d", i);
112 		if (is_number(*av))
113 			setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab);
114 		else
115 			setsymtab(temp, *av, 0.0, STR, ARGVtab);
116 		av++;
117 	}
118 }
119 
120 void envinit(char **envp)	/* set up ENVIRON variable */
121 {
122 	Cell *cp;
123 	char *p;
124 
125 	cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab);
126 	ENVtab = makesymtab(NSYMTAB);
127 	cp->sval = (char *) ENVtab;
128 	for ( ; *envp; envp++) {
129 		if ((p = strchr(*envp, '=')) == NULL)
130 			continue;
131 		if( p == *envp ) /* no left hand side name in env string */
132 			continue;
133 		*p++ = 0;	/* split into two strings at = */
134 		if (is_number(p))
135 			setsymtab(*envp, p, atof(p), STR|NUM, ENVtab);
136 		else
137 			setsymtab(*envp, p, 0.0, STR, ENVtab);
138 		p[-1] = '=';	/* restore in case env is passed down to a shell */
139 	}
140 }
141 
142 Array *makesymtab(int n)	/* make a new symbol table */
143 {
144 	Array *ap;
145 	Cell **tp;
146 
147 	ap = (Array *) malloc(sizeof(Array));
148 	tp = (Cell **) calloc(n, sizeof(Cell *));
149 	if (ap == NULL || tp == NULL)
150 		FATAL("out of space in makesymtab");
151 	ap->nelem = 0;
152 	ap->size = n;
153 	ap->tab = tp;
154 	return(ap);
155 }
156 
157 void freesymtab(Cell *ap)	/* free a symbol table */
158 {
159 	Cell *cp, *temp;
160 	Array *tp;
161 	int i;
162 
163 	if (!isarr(ap))
164 		return;
165 	tp = (Array *) ap->sval;
166 	if (tp == NULL)
167 		return;
168 	for (i = 0; i < tp->size; i++) {
169 		for (cp = tp->tab[i]; cp != NULL; cp = temp) {
170 			xfree(cp->nval);
171 			if (freeable(cp))
172 				xfree(cp->sval);
173 			temp = cp->cnext;	/* avoids freeing then using */
174 			free(cp);
175 			tp->nelem--;
176 		}
177 		tp->tab[i] = 0;
178 	}
179 	if (tp->nelem != 0)
180 		WARNING("can't happen: inconsistent element count freeing %s", ap->nval);
181 	free(tp->tab);
182 	free(tp);
183 }
184 
185 void freeelem(Cell *ap, const char *s)	/* free elem s from ap (i.e., ap["s"] */
186 {
187 	Array *tp;
188 	Cell *p, *prev = NULL;
189 	int h;
190 
191 	tp = (Array *) ap->sval;
192 	h = hash(s, tp->size);
193 	for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
194 		if (strcmp(s, p->nval) == 0) {
195 			if (prev == NULL)	/* 1st one */
196 				tp->tab[h] = p->cnext;
197 			else			/* middle somewhere */
198 				prev->cnext = p->cnext;
199 			if (freeable(p))
200 				xfree(p->sval);
201 			free(p->nval);
202 			free(p);
203 			tp->nelem--;
204 			return;
205 		}
206 }
207 
208 Cell *setsymtab(const char *n, const char *s, Awkfloat f, unsigned t, Array *tp)
209 {
210 	int h;
211 	Cell *p;
212 
213 	if (n != NULL && (p = lookup(n, tp)) != NULL) {
214 		   dprintf( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
215 			(void*)p, NN(p->nval), NN(p->sval), p->fval, p->tval) );
216 		return(p);
217 	}
218 	p = (Cell *) malloc(sizeof(Cell));
219 	if (p == NULL)
220 		FATAL("out of space for symbol table at %s", n);
221 	p->nval = tostring(n);
222 	p->sval = s ? tostring(s) : tostring("");
223 	p->fval = f;
224 	p->tval = t;
225 	p->csub = CUNK;
226 	p->ctype = OCELL;
227 	tp->nelem++;
228 	if (tp->nelem > FULLTAB * tp->size)
229 		rehash(tp);
230 	h = hash(n, tp->size);
231 	p->cnext = tp->tab[h];
232 	tp->tab[h] = p;
233 	   dprintf( ("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
234 		(void*)p, p->nval, p->sval, p->fval, p->tval) );
235 	return(p);
236 }
237 
238 int hash(const char *s, int n)	/* form hash value for string s */
239 {
240 	unsigned hashval;
241 
242 	for (hashval = 0; *s != '\0'; s++)
243 		hashval = (*s + 31 * hashval);
244 	return hashval % n;
245 }
246 
247 void rehash(Array *tp)	/* rehash items in small table into big one */
248 {
249 	int i, nh, nsz;
250 	Cell *cp, *op, **np;
251 
252 	nsz = GROWTAB * tp->size;
253 	np = (Cell **) calloc(nsz, sizeof(Cell *));
254 	if (np == NULL)		/* can't do it, but can keep running. */
255 		return;		/* someone else will run out later. */
256 	for (i = 0; i < tp->size; i++) {
257 		for (cp = tp->tab[i]; cp; cp = op) {
258 			op = cp->cnext;
259 			nh = hash(cp->nval, nsz);
260 			cp->cnext = np[nh];
261 			np[nh] = cp;
262 		}
263 	}
264 	free(tp->tab);
265 	tp->tab = np;
266 	tp->size = nsz;
267 }
268 
269 Cell *lookup(const char *s, Array *tp)	/* look for s in tp */
270 {
271 	Cell *p;
272 	int h;
273 
274 	h = hash(s, tp->size);
275 	for (p = tp->tab[h]; p != NULL; p = p->cnext)
276 		if (strcmp(s, p->nval) == 0)
277 			return(p);	/* found it */
278 	return(NULL);			/* not found */
279 }
280 
281 Awkfloat setfval(Cell *vp, Awkfloat f)	/* set float val of a Cell */
282 {
283 	int fldno;
284 
285 	if ((vp->tval & (NUM | STR)) == 0)
286 		funnyvar(vp, "assign to");
287 	if (isfld(vp)) {
288 		donerec = 0;	/* mark $0 invalid */
289 		fldno = atoi(vp->nval);
290 		if (fldno > *NF)
291 			newfld(fldno);
292 		   dprintf( ("setting field %d to %g\n", fldno, f) );
293 	} else if (isrec(vp)) {
294 		donefld = 0;	/* mark $1... invalid */
295 		donerec = 1;
296 	}
297 	if (freeable(vp))
298 		xfree(vp->sval); /* free any previous string */
299 	vp->tval &= ~STR;	/* mark string invalid */
300 	vp->tval |= NUM;	/* mark number ok */
301 	   dprintf( ("setfval %p: %s = %g, t=%o\n", (void*)vp, NN(vp->nval), f, vp->tval) );
302 	return vp->fval = f;
303 }
304 
305 void funnyvar(Cell *vp, const char *rw)
306 {
307 	if (isarr(vp))
308 		FATAL("can't %s %s; it's an array name.", rw, vp->nval);
309 	if (vp->tval & FCN)
310 		FATAL("can't %s %s; it's a function.", rw, vp->nval);
311 	WARNING("funny variable %p: n=%s s=\"%s\" f=%g t=%o",
312 		vp, vp->nval, vp->sval, vp->fval, vp->tval);
313 }
314 
315 char *setsval(Cell *vp, const char *s)	/* set string val of a Cell */
316 {
317 	char *t;
318 	int fldno;
319 
320 	   dprintf( ("starting setsval %p: %s = \"%s\", t=%o, r,f=%d,%d\n",
321 		(void*)vp, NN(vp->nval), s, vp->tval, donerec, donefld) );
322 	if ((vp->tval & (NUM | STR)) == 0)
323 		funnyvar(vp, "assign to");
324 	if (isfld(vp)) {
325 		donerec = 0;	/* mark $0 invalid */
326 		fldno = atoi(vp->nval);
327 		if (fldno > *NF)
328 			newfld(fldno);
329 		   dprintf( ("setting field %d to %s (%p)\n", fldno, s, s) );
330 	} else if (isrec(vp)) {
331 		donefld = 0;	/* mark $1... invalid */
332 		donerec = 1;
333 	}
334 	t = tostring(s);	/* in case it's self-assign */
335 	if (freeable(vp))
336 		xfree(vp->sval);
337 	vp->tval &= ~NUM;
338 	vp->tval |= STR;
339 	vp->tval &= ~DONTFREE;
340 	   dprintf( ("setsval %p: %s = \"%s (%p) \", t=%o r,f=%d,%d\n",
341 		(void*)vp, NN(vp->nval), t,t, vp->tval, donerec, donefld) );
342 	return(vp->sval = t);
343 }
344 
345 Awkfloat getfval(Cell *vp)	/* get float val of a Cell */
346 {
347 	if ((vp->tval & (NUM | STR)) == 0)
348 		funnyvar(vp, "read value of");
349 	if (isfld(vp) && donefld == 0)
350 		fldbld();
351 	else if (isrec(vp) && donerec == 0)
352 		recbld();
353 	if (!isnum(vp)) {	/* not a number */
354 		vp->fval = atof(vp->sval);	/* best guess */
355 		if (is_number(vp->sval) && !(vp->tval&CON))
356 			vp->tval |= NUM;	/* make NUM only sparingly */
357 	}
358 	   dprintf( ("getfval %p: %s = %g, t=%o\n",
359 		(void*)vp, NN(vp->nval), vp->fval, vp->tval) );
360 	return(vp->fval);
361 }
362 
363 static char *get_str_val(Cell *vp, char **fmt)        /* get string val of a Cell */
364 {
365 	char s[100];	/* BUG: unchecked */
366 	double dtemp;
367 
368 	if ((vp->tval & (NUM | STR)) == 0)
369 		funnyvar(vp, "read value of");
370 	if (isfld(vp) && donefld == 0)
371 		fldbld();
372 	else if (isrec(vp) && donerec == 0)
373 		recbld();
374 	if (isstr(vp) == 0) {
375 		if (freeable(vp))
376 			xfree(vp->sval);
377 		if (modf(vp->fval, &dtemp) == 0)	/* it's integral */
378 			sprintf(s, "%.30g", vp->fval);
379 		else
380 			sprintf(s, *fmt, vp->fval);
381 		vp->sval = tostring(s);
382 		vp->tval &= ~DONTFREE;
383 		vp->tval |= STR;
384 	}
385 	   dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n",
386 		(void*)vp, NN(vp->nval), vp->sval, vp->sval, vp->tval) );
387 	return(vp->sval);
388 }
389 
390 char *getsval(Cell *vp)       /* get string val of a Cell */
391 {
392       return get_str_val(vp, CONVFMT);
393 }
394 
395 char *getpssval(Cell *vp)     /* get string val of a Cell for print */
396 {
397       return get_str_val(vp, OFMT);
398 }
399 
400 
401 char *tostring(const char *s)	/* make a copy of string s */
402 {
403 	char *p;
404 
405 	p = (char *) malloc(strlen(s)+1);
406 	if (p == NULL)
407 		FATAL("out of space in tostring on %s", s);
408 	strcpy(p, s);
409 	return(p);
410 }
411 
412 char *qstring(const char *is, int delim)	/* collect string up to next delim */
413 {
414 	const char *os = is;
415 	int c, n;
416 	uschar *s = (uschar *) is;
417 	uschar *buf, *bp;
418 
419 	if ((buf = (uschar *) malloc(strlen(is)+3)) == NULL)
420 		FATAL( "out of space in qstring(%s)", s);
421 	for (bp = buf; (c = *s) != delim; s++) {
422 		if (c == '\n')
423 			SYNTAX( "newline in string %.20s...", os );
424 		else if (c != '\\')
425 			*bp++ = c;
426 		else {	/* \something */
427 			c = *++s;
428 			if (c == 0) {	/* \ at end */
429 				*bp++ = '\\';
430 				break;	/* for loop */
431 			}
432 			switch (c) {
433 			case '\\':	*bp++ = '\\'; break;
434 			case 'n':	*bp++ = '\n'; break;
435 			case 't':	*bp++ = '\t'; break;
436 			case 'b':	*bp++ = '\b'; break;
437 			case 'f':	*bp++ = '\f'; break;
438 			case 'r':	*bp++ = '\r'; break;
439 			default:
440 				if (!isdigit(c)) {
441 					*bp++ = c;
442 					break;
443 				}
444 				n = c - '0';
445 				if (isdigit(s[1])) {
446 					n = 8 * n + *++s - '0';
447 					if (isdigit(s[1]))
448 						n = 8 * n + *++s - '0';
449 				}
450 				*bp++ = n;
451 				break;
452 			}
453 		}
454 	}
455 	*bp++ = 0;
456 	return (char *) buf;
457 }
458