xref: /freebsd/contrib/one-true-awk/tran.c (revision 2a4a1db342263067035ce69a4017c645da63455d)
1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
4 
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
14 
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
24 
25 #define	DEBUG
26 #include <stdio.h>
27 #include <math.h>
28 #include <ctype.h>
29 #include <string.h>
30 #include <stdlib.h>
31 #include "awk.h"
32 #include "ytab.h"
33 
34 #define	FULLTAB	2	/* rehash when table gets this x full */
35 #define	GROWTAB 4	/* grow table by this factor */
36 
37 Array	*symtab;	/* main symbol table */
38 
39 char	**FS;		/* initial field sep */
40 char	**RS;		/* initial record sep */
41 char	**OFS;		/* output field sep */
42 char	**ORS;		/* output record sep */
43 char	**OFMT;		/* output format for numbers */
44 char	**CONVFMT;	/* format for conversions in getsval */
45 Awkfloat *NF;		/* number of fields in current record */
46 Awkfloat *NR;		/* number of current record */
47 Awkfloat *FNR;		/* number of current record in current file */
48 char	**FILENAME;	/* current filename argument */
49 Awkfloat *ARGC;		/* number of arguments from command line */
50 char	**SUBSEP;	/* subscript separator for a[i,j,k]; default \034 */
51 Awkfloat *RSTART;	/* start of re matched with ~; origin 1 (!) */
52 Awkfloat *RLENGTH;	/* length of same */
53 
54 Cell	*nrloc;		/* NR */
55 Cell	*nfloc;		/* NF */
56 Cell	*fnrloc;	/* FNR */
57 Array	*ARGVtab;	/* symbol table containing ARGV[...] */
58 Array	*ENVtab;	/* symbol table containing ENVIRON[...] */
59 Cell	*rstartloc;	/* RSTART */
60 Cell	*rlengthloc;	/* RLENGTH */
61 Cell	*symtabloc;	/* SYMTAB */
62 
63 Cell	*nullloc;	/* a guaranteed empty cell */
64 Node	*nullnode;	/* zero&null, converted into a node for comparisons */
65 Cell	*literal0;
66 
67 extern Cell **fldtab;
68 
69 void syminit(void)	/* initialize symbol table with builtin vars */
70 {
71 	literal0 = setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab);
72 	/* this is used for if(x)... tests: */
73 	nullloc = setsymtab("$zero&null", "", 0.0, NUM|STR|CON|DONTFREE, symtab);
74 	nullnode = celltonode(nullloc, CCON);
75 
76 	FS = &setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab)->sval;
77 	RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
78 	OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval;
79 	ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
80 	OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
81 	CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
82 	FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval;
83 	nfloc = setsymtab("NF", "", 0.0, NUM, symtab);
84 	NF = &nfloc->fval;
85 	nrloc = setsymtab("NR", "", 0.0, NUM, symtab);
86 	NR = &nrloc->fval;
87 	fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);
88 	FNR = &fnrloc->fval;
89 	SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval;
90 	rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);
91 	RSTART = &rstartloc->fval;
92 	rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
93 	RLENGTH = &rlengthloc->fval;
94 	symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab);
95 	symtabloc->sval = (char *) symtab;
96 }
97 
98 void arginit(int ac, char **av)	/* set up ARGV and ARGC */
99 {
100 	Cell *cp;
101 	int i;
102 	char temp[50];
103 
104 	ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
105 	cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
106 	ARGVtab = makesymtab(NSYMTAB);	/* could be (int) ARGC as well */
107 	cp->sval = (char *) ARGVtab;
108 	for (i = 0; i < ac; i++) {
109 		sprintf(temp, "%d", i);
110 		if (is_number(*av))
111 			setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab);
112 		else
113 			setsymtab(temp, *av, 0.0, STR, ARGVtab);
114 		av++;
115 	}
116 }
117 
118 void envinit(char **envp)	/* set up ENVIRON variable */
119 {
120 	Cell *cp;
121 	char *p;
122 
123 	cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab);
124 	ENVtab = makesymtab(NSYMTAB);
125 	cp->sval = (char *) ENVtab;
126 	for ( ; *envp; envp++) {
127 		if ((p = strchr(*envp, '=')) == NULL)
128 			continue;
129 		if( p == *envp ) /* no left hand side name in env string */
130 			continue;
131 		*p++ = 0;	/* split into two strings at = */
132 		if (is_number(p))
133 			setsymtab(*envp, p, atof(p), STR|NUM, ENVtab);
134 		else
135 			setsymtab(*envp, p, 0.0, STR, ENVtab);
136 		p[-1] = '=';	/* restore in case env is passed down to a shell */
137 	}
138 }
139 
140 Array *makesymtab(int n)	/* make a new symbol table */
141 {
142 	Array *ap;
143 	Cell **tp;
144 
145 	ap = (Array *) malloc(sizeof(Array));
146 	tp = (Cell **) calloc(n, sizeof(Cell *));
147 	if (ap == NULL || tp == NULL)
148 		FATAL("out of space in makesymtab");
149 	ap->nelem = 0;
150 	ap->size = n;
151 	ap->tab = tp;
152 	return(ap);
153 }
154 
155 void freesymtab(Cell *ap)	/* free a symbol table */
156 {
157 	Cell *cp, *temp;
158 	Array *tp;
159 	int i;
160 
161 	if (!isarr(ap))
162 		return;
163 	tp = (Array *) ap->sval;
164 	if (tp == NULL)
165 		return;
166 	for (i = 0; i < tp->size; i++) {
167 		for (cp = tp->tab[i]; cp != NULL; cp = temp) {
168 			xfree(cp->nval);
169 			if (freeable(cp))
170 				xfree(cp->sval);
171 			temp = cp->cnext;	/* avoids freeing then using */
172 			free(cp);
173 			tp->nelem--;
174 		}
175 		tp->tab[i] = 0;
176 	}
177 	if (tp->nelem != 0)
178 		WARNING("can't happen: inconsistent element count freeing %s", ap->nval);
179 	free(tp->tab);
180 	free(tp);
181 }
182 
183 void freeelem(Cell *ap, char *s)	/* free elem s from ap (i.e., ap["s"] */
184 {
185 	Array *tp;
186 	Cell *p, *prev = NULL;
187 	int h;
188 
189 	tp = (Array *) ap->sval;
190 	h = hash(s, tp->size);
191 	for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
192 		if (strcmp(s, p->nval) == 0) {
193 			if (prev == NULL)	/* 1st one */
194 				tp->tab[h] = p->cnext;
195 			else			/* middle somewhere */
196 				prev->cnext = p->cnext;
197 			if (freeable(p))
198 				xfree(p->sval);
199 			free(p->nval);
200 			free(p);
201 			tp->nelem--;
202 			return;
203 		}
204 }
205 
206 Cell *setsymtab(char *n, char *s, Awkfloat f, unsigned t, Array *tp)
207 {
208 	int h;
209 	Cell *p;
210 
211 	if (n != NULL && (p = lookup(n, tp)) != NULL) {
212 		   dprintf( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
213 			p, p->nval, p->sval, p->fval, p->tval) );
214 		return(p);
215 	}
216 	p = (Cell *) malloc(sizeof(Cell));
217 	if (p == NULL)
218 		FATAL("out of space for symbol table at %s", n);
219 	p->nval = tostring(n);
220 	p->sval = s ? tostring(s) : tostring("");
221 	p->fval = f;
222 	p->tval = t;
223 	p->csub = CUNK;
224 	p->ctype = OCELL;
225 	tp->nelem++;
226 	if (tp->nelem > FULLTAB * tp->size)
227 		rehash(tp);
228 	h = hash(n, tp->size);
229 	p->cnext = tp->tab[h];
230 	tp->tab[h] = p;
231 	   dprintf( ("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
232 		p, p->nval, p->sval, p->fval, p->tval) );
233 	return(p);
234 }
235 
236 int hash(char *s, int n)	/* form hash value for string s */
237 {
238 	unsigned hashval;
239 
240 	for (hashval = 0; *s != '\0'; s++)
241 		hashval = (*s + 31 * hashval);
242 	return hashval % n;
243 }
244 
245 void rehash(Array *tp)	/* rehash items in small table into big one */
246 {
247 	int i, nh, nsz;
248 	Cell *cp, *op, **np;
249 
250 	nsz = GROWTAB * tp->size;
251 	np = (Cell **) calloc(nsz, sizeof(Cell *));
252 	if (np == NULL)		/* can't do it, but can keep running. */
253 		return;		/* someone else will run out later. */
254 	for (i = 0; i < tp->size; i++) {
255 		for (cp = tp->tab[i]; cp; cp = op) {
256 			op = cp->cnext;
257 			nh = hash(cp->nval, nsz);
258 			cp->cnext = np[nh];
259 			np[nh] = cp;
260 		}
261 	}
262 	free(tp->tab);
263 	tp->tab = np;
264 	tp->size = nsz;
265 }
266 
267 Cell *lookup(char *s, Array *tp)	/* look for s in tp */
268 {
269 	Cell *p;
270 	int h;
271 
272 	h = hash(s, tp->size);
273 	for (p = tp->tab[h]; p != NULL; p = p->cnext)
274 		if (strcmp(s, p->nval) == 0)
275 			return(p);	/* found it */
276 	return(NULL);			/* not found */
277 }
278 
279 Awkfloat setfval(Cell *vp, Awkfloat f)	/* set float val of a Cell */
280 {
281 	int fldno;
282 
283 	if ((vp->tval & (NUM | STR)) == 0)
284 		funnyvar(vp, "assign to");
285 	if (isfld(vp)) {
286 		donerec = 0;	/* mark $0 invalid */
287 		fldno = atoi(vp->nval);
288 		if (fldno > *NF)
289 			newfld(fldno);
290 		   dprintf( ("setting field %d to %g\n", fldno, f) );
291 	} else if (isrec(vp)) {
292 		donefld = 0;	/* mark $1... invalid */
293 		donerec = 1;
294 	}
295 	if (freeable(vp))
296 		xfree(vp->sval); /* free any previous string */
297 	vp->tval &= ~STR;	/* mark string invalid */
298 	vp->tval |= NUM;	/* mark number ok */
299 	   dprintf( ("setfval %p: %s = %g, t=%o\n", vp, vp->nval, f, vp->tval) );
300 	return vp->fval = f;
301 }
302 
303 void funnyvar(Cell *vp, char *rw)
304 {
305 	if (isarr(vp))
306 		FATAL("can't %s %s; it's an array name.", rw, vp->nval);
307 	if (vp->tval & FCN)
308 		FATAL("can't %s %s; it's a function.", rw, vp->nval);
309 	WARNING("funny variable %p: n=%s s=\"%s\" f=%g t=%o",
310 		vp, vp->nval, vp->sval, vp->fval, vp->tval);
311 }
312 
313 char *setsval(Cell *vp, char *s)	/* set string val of a Cell */
314 {
315 	char *t;
316 	int fldno;
317 
318 	   dprintf( ("starting setsval %p: %s = \"%s\", t=%o\n", vp, vp->nval, s, vp->tval) );
319 	if ((vp->tval & (NUM | STR)) == 0)
320 		funnyvar(vp, "assign to");
321 	if (isfld(vp)) {
322 		donerec = 0;	/* mark $0 invalid */
323 		fldno = atoi(vp->nval);
324 		if (fldno > *NF)
325 			newfld(fldno);
326 		   dprintf( ("setting field %d to %s (%p)\n", fldno, s, s) );
327 	} else if (isrec(vp)) {
328 		donefld = 0;	/* mark $1... invalid */
329 		donerec = 1;
330 	}
331 	t = tostring(s);	/* in case it's self-assign */
332 	vp->tval &= ~NUM;
333 	vp->tval |= STR;
334 	if (freeable(vp))
335 		xfree(vp->sval);
336 	vp->tval &= ~DONTFREE;
337 	   dprintf( ("setsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, t,t, vp->tval) );
338 	return(vp->sval = t);
339 }
340 
341 Awkfloat getfval(Cell *vp)	/* get float val of a Cell */
342 {
343 	if ((vp->tval & (NUM | STR)) == 0)
344 		funnyvar(vp, "read value of");
345 	if (isfld(vp) && donefld == 0)
346 		fldbld();
347 	else if (isrec(vp) && donerec == 0)
348 		recbld();
349 	if (!isnum(vp)) {	/* not a number */
350 		vp->fval = atof(vp->sval);	/* best guess */
351 		if (is_number(vp->sval) && !(vp->tval&CON))
352 			vp->tval |= NUM;	/* make NUM only sparingly */
353 	}
354 	   dprintf( ("getfval %p: %s = %g, t=%o\n", vp, vp->nval, vp->fval, vp->tval) );
355 	return(vp->fval);
356 }
357 
358 char *getsval(Cell *vp)	/* get string val of a Cell */
359 {
360 	char s[100];	/* BUG: unchecked */
361 	double dtemp;
362 
363 	if ((vp->tval & (NUM | STR)) == 0)
364 		funnyvar(vp, "read value of");
365 	if (isfld(vp) && donefld == 0)
366 		fldbld();
367 	else if (isrec(vp) && donerec == 0)
368 		recbld();
369 	if (isstr(vp) == 0) {
370 		if (freeable(vp))
371 			xfree(vp->sval);
372 		if (modf(vp->fval, &dtemp) == 0)	/* it's integral */
373 			sprintf(s, "%.30g", vp->fval);
374 		else
375 			sprintf(s, *CONVFMT, vp->fval);
376 		vp->sval = tostring(s);
377 		vp->tval &= ~DONTFREE;
378 		vp->tval |= STR;
379 	}
380 	   dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, vp->sval, vp->sval, vp->tval) );
381 	return(vp->sval);
382 }
383 
384 char *tostring(char *s)	/* make a copy of string s */
385 {
386 	char *p;
387 
388 	p = (char *) malloc(strlen(s)+1);
389 	if (p == NULL)
390 		FATAL("out of space in tostring on %s", s);
391 	strcpy(p, s);
392 	return(p);
393 }
394 
395 char *qstring(char *is, int delim)	/* collect string up to next delim */
396 {
397 	char *os = is;
398 	int c, n;
399 	uschar *s = (uschar *) is;
400 	uschar *buf, *bp;
401 
402 	if ((buf = (uschar *) malloc(strlen(is)+3)) == NULL)
403 		FATAL( "out of space in qstring(%s)", s);
404 	for (bp = buf; (c = *s) != delim; s++) {
405 		if (c == '\n')
406 			SYNTAX( "newline in string %.20s...", os );
407 		else if (c != '\\')
408 			*bp++ = c;
409 		else {	/* \something */
410 			c = *++s;
411 			if (c == 0) {	/* \ at end */
412 				*bp++ = '\\';
413 				break;	/* for loop */
414 			}
415 			switch (c) {
416 			case '\\':	*bp++ = '\\'; break;
417 			case 'n':	*bp++ = '\n'; break;
418 			case 't':	*bp++ = '\t'; break;
419 			case 'b':	*bp++ = '\b'; break;
420 			case 'f':	*bp++ = '\f'; break;
421 			case 'r':	*bp++ = '\r'; break;
422 			default:
423 				if (!isdigit(c)) {
424 					*bp++ = c;
425 					break;
426 				}
427 				n = c - '0';
428 				if (isdigit(s[1])) {
429 					n = 8 * n + *++s - '0';
430 					if (isdigit(s[1]))
431 						n = 8 * n + *++s - '0';
432 				}
433 				*bp++ = n;
434 				break;
435 			}
436 		}
437 	}
438 	*bp++ = 0;
439 	return (char *) buf;
440 }
441