xref: /freebsd/contrib/one-true-awk/tran.c (revision 4436b51dff5736e74da464946049ea6899a88938)
1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
4 
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
14 
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
24 
25 #define	DEBUG
26 #include <stdio.h>
27 #include <math.h>
28 #include <ctype.h>
29 #include <string.h>
30 #include <stdlib.h>
31 #include "awk.h"
32 #include "ytab.h"
33 
34 #define	FULLTAB	2	/* rehash when table gets this x full */
35 #define	GROWTAB 4	/* grow table by this factor */
36 
37 Array	*symtab;	/* main symbol table */
38 
39 char	**FS;		/* initial field sep */
40 char	**RS;		/* initial record sep */
41 char	**OFS;		/* output field sep */
42 char	**ORS;		/* output record sep */
43 char	**OFMT;		/* output format for numbers */
44 char	**CONVFMT;	/* format for conversions in getsval */
45 Awkfloat *NF;		/* number of fields in current record */
46 Awkfloat *NR;		/* number of current record */
47 Awkfloat *FNR;		/* number of current record in current file */
48 char	**FILENAME;	/* current filename argument */
49 Awkfloat *ARGC;		/* number of arguments from command line */
50 char	**SUBSEP;	/* subscript separator for a[i,j,k]; default \034 */
51 Awkfloat *RSTART;	/* start of re matched with ~; origin 1 (!) */
52 Awkfloat *RLENGTH;	/* length of same */
53 
54 Cell	*fsloc;		/* FS */
55 Cell	*nrloc;		/* NR */
56 Cell	*nfloc;		/* NF */
57 Cell	*fnrloc;	/* FNR */
58 Array	*ARGVtab;	/* symbol table containing ARGV[...] */
59 Array	*ENVtab;	/* symbol table containing ENVIRON[...] */
60 Cell	*rstartloc;	/* RSTART */
61 Cell	*rlengthloc;	/* RLENGTH */
62 Cell	*symtabloc;	/* SYMTAB */
63 
64 Cell	*nullloc;	/* a guaranteed empty cell */
65 Node	*nullnode;	/* zero&null, converted into a node for comparisons */
66 Cell	*literal0;
67 
68 extern Cell **fldtab;
69 
70 void syminit(void)	/* initialize symbol table with builtin vars */
71 {
72 	literal0 = setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab);
73 	/* this is used for if(x)... tests: */
74 	nullloc = setsymtab("$zero&null", "", 0.0, NUM|STR|CON|DONTFREE, symtab);
75 	nullnode = celltonode(nullloc, CCON);
76 
77 	fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab);
78 	FS = &fsloc->sval;
79 	RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
80 	OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval;
81 	ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
82 	OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
83 	CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
84 	FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval;
85 	nfloc = setsymtab("NF", "", 0.0, NUM, symtab);
86 	NF = &nfloc->fval;
87 	nrloc = setsymtab("NR", "", 0.0, NUM, symtab);
88 	NR = &nrloc->fval;
89 	fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);
90 	FNR = &fnrloc->fval;
91 	SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval;
92 	rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);
93 	RSTART = &rstartloc->fval;
94 	rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
95 	RLENGTH = &rlengthloc->fval;
96 	symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab);
97 	symtabloc->sval = (char *) symtab;
98 }
99 
100 void arginit(int ac, char **av)	/* set up ARGV and ARGC */
101 {
102 	Cell *cp;
103 	int i;
104 	char temp[50];
105 
106 	ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
107 	cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
108 	ARGVtab = makesymtab(NSYMTAB);	/* could be (int) ARGC as well */
109 	cp->sval = (char *) ARGVtab;
110 	for (i = 0; i < ac; i++) {
111 		sprintf(temp, "%d", i);
112 		if (is_number(*av))
113 			setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab);
114 		else
115 			setsymtab(temp, *av, 0.0, STR, ARGVtab);
116 		av++;
117 	}
118 }
119 
120 void envinit(char **envp)	/* set up ENVIRON variable */
121 {
122 	Cell *cp;
123 	char *p;
124 
125 	cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab);
126 	ENVtab = makesymtab(NSYMTAB);
127 	cp->sval = (char *) ENVtab;
128 	for ( ; *envp; envp++) {
129 		if ((p = strchr(*envp, '=')) == NULL)
130 			continue;
131 		if( p == *envp ) /* no left hand side name in env string */
132 			continue;
133 		*p++ = 0;	/* split into two strings at = */
134 		if (is_number(p))
135 			setsymtab(*envp, p, atof(p), STR|NUM, ENVtab);
136 		else
137 			setsymtab(*envp, p, 0.0, STR, ENVtab);
138 		p[-1] = '=';	/* restore in case env is passed down to a shell */
139 	}
140 }
141 
142 Array *makesymtab(int n)	/* make a new symbol table */
143 {
144 	Array *ap;
145 	Cell **tp;
146 
147 	ap = (Array *) malloc(sizeof(Array));
148 	tp = (Cell **) calloc(n, sizeof(Cell *));
149 	if (ap == NULL || tp == NULL)
150 		FATAL("out of space in makesymtab");
151 	ap->nelem = 0;
152 	ap->size = n;
153 	ap->tab = tp;
154 	return(ap);
155 }
156 
157 void freesymtab(Cell *ap)	/* free a symbol table */
158 {
159 	Cell *cp, *temp;
160 	Array *tp;
161 	int i;
162 
163 	if (!isarr(ap))
164 		return;
165 	tp = (Array *) ap->sval;
166 	if (tp == NULL)
167 		return;
168 	for (i = 0; i < tp->size; i++) {
169 		for (cp = tp->tab[i]; cp != NULL; cp = temp) {
170 			xfree(cp->nval);
171 			if (freeable(cp))
172 				xfree(cp->sval);
173 			temp = cp->cnext;	/* avoids freeing then using */
174 			free(cp);
175 			tp->nelem--;
176 		}
177 		tp->tab[i] = 0;
178 	}
179 	if (tp->nelem != 0)
180 		WARNING("can't happen: inconsistent element count freeing %s", ap->nval);
181 	free(tp->tab);
182 	free(tp);
183 }
184 
185 void freeelem(Cell *ap, const char *s)	/* free elem s from ap (i.e., ap["s"] */
186 {
187 	Array *tp;
188 	Cell *p, *prev = NULL;
189 	int h;
190 
191 	tp = (Array *) ap->sval;
192 	h = hash(s, tp->size);
193 	for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
194 		if (strcmp(s, p->nval) == 0) {
195 			if (prev == NULL)	/* 1st one */
196 				tp->tab[h] = p->cnext;
197 			else			/* middle somewhere */
198 				prev->cnext = p->cnext;
199 			if (freeable(p))
200 				xfree(p->sval);
201 			free(p->nval);
202 			free(p);
203 			tp->nelem--;
204 			return;
205 		}
206 }
207 
208 Cell *setsymtab(const char *n, const char *s, Awkfloat f, unsigned t, Array *tp)
209 {
210 	int h;
211 	Cell *p;
212 
213 	if (n != NULL && (p = lookup(n, tp)) != NULL) {
214 		   dprintf( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
215 			(void*)p, NN(p->nval), NN(p->sval), p->fval, p->tval) );
216 		return(p);
217 	}
218 	p = (Cell *) malloc(sizeof(Cell));
219 	if (p == NULL)
220 		FATAL("out of space for symbol table at %s", n);
221 	p->nval = tostring(n);
222 	p->sval = s ? tostring(s) : tostring("");
223 	p->fval = f;
224 	p->tval = t;
225 	p->csub = CUNK;
226 	p->ctype = OCELL;
227 	tp->nelem++;
228 	if (tp->nelem > FULLTAB * tp->size)
229 		rehash(tp);
230 	h = hash(n, tp->size);
231 	p->cnext = tp->tab[h];
232 	tp->tab[h] = p;
233 	   dprintf( ("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
234 		(void*)p, p->nval, p->sval, p->fval, p->tval) );
235 	return(p);
236 }
237 
238 int hash(const char *s, int n)	/* form hash value for string s */
239 {
240 	unsigned hashval;
241 
242 	for (hashval = 0; *s != '\0'; s++)
243 		hashval = (*s + 31 * hashval);
244 	return hashval % n;
245 }
246 
247 void rehash(Array *tp)	/* rehash items in small table into big one */
248 {
249 	int i, nh, nsz;
250 	Cell *cp, *op, **np;
251 
252 	nsz = GROWTAB * tp->size;
253 	np = (Cell **) calloc(nsz, sizeof(Cell *));
254 	if (np == NULL)		/* can't do it, but can keep running. */
255 		return;		/* someone else will run out later. */
256 	for (i = 0; i < tp->size; i++) {
257 		for (cp = tp->tab[i]; cp; cp = op) {
258 			op = cp->cnext;
259 			nh = hash(cp->nval, nsz);
260 			cp->cnext = np[nh];
261 			np[nh] = cp;
262 		}
263 	}
264 	free(tp->tab);
265 	tp->tab = np;
266 	tp->size = nsz;
267 }
268 
269 Cell *lookup(const char *s, Array *tp)	/* look for s in tp */
270 {
271 	Cell *p;
272 	int h;
273 
274 	h = hash(s, tp->size);
275 	for (p = tp->tab[h]; p != NULL; p = p->cnext)
276 		if (strcmp(s, p->nval) == 0)
277 			return(p);	/* found it */
278 	return(NULL);			/* not found */
279 }
280 
281 Awkfloat setfval(Cell *vp, Awkfloat f)	/* set float val of a Cell */
282 {
283 	int fldno;
284 
285 	if ((vp->tval & (NUM | STR)) == 0)
286 		funnyvar(vp, "assign to");
287 	if (isfld(vp)) {
288 		donerec = 0;	/* mark $0 invalid */
289 		fldno = atoi(vp->nval);
290 		if (fldno > *NF)
291 			newfld(fldno);
292 		   dprintf( ("setting field %d to %g\n", fldno, f) );
293 	} else if (isrec(vp)) {
294 		donefld = 0;	/* mark $1... invalid */
295 		donerec = 1;
296 	}
297 	if (freeable(vp))
298 		xfree(vp->sval); /* free any previous string */
299 	vp->tval &= ~STR;	/* mark string invalid */
300 	vp->tval |= NUM;	/* mark number ok */
301 	if (f == -0)  /* who would have thought this possible? */
302 		f = 0;
303 	   dprintf( ("setfval %p: %s = %g, t=%o\n", (void*)vp, NN(vp->nval), f, vp->tval) );
304 	return vp->fval = f;
305 }
306 
307 void funnyvar(Cell *vp, const char *rw)
308 {
309 	if (isarr(vp))
310 		FATAL("can't %s %s; it's an array name.", rw, vp->nval);
311 	if (vp->tval & FCN)
312 		FATAL("can't %s %s; it's a function.", rw, vp->nval);
313 	WARNING("funny variable %p: n=%s s=\"%s\" f=%g t=%o",
314 		vp, vp->nval, vp->sval, vp->fval, vp->tval);
315 }
316 
317 char *setsval(Cell *vp, const char *s)	/* set string val of a Cell */
318 {
319 	char *t;
320 	int fldno;
321 
322 	   dprintf( ("starting setsval %p: %s = \"%s\", t=%o, r,f=%d,%d\n",
323 		(void*)vp, NN(vp->nval), s, vp->tval, donerec, donefld) );
324 	if ((vp->tval & (NUM | STR)) == 0)
325 		funnyvar(vp, "assign to");
326 	if (isfld(vp)) {
327 		donerec = 0;	/* mark $0 invalid */
328 		fldno = atoi(vp->nval);
329 		if (fldno > *NF)
330 			newfld(fldno);
331 		   dprintf( ("setting field %d to %s (%p)\n", fldno, s, s) );
332 	} else if (isrec(vp)) {
333 		donefld = 0;	/* mark $1... invalid */
334 		donerec = 1;
335 	}
336 	t = tostring(s);	/* in case it's self-assign */
337 	if (freeable(vp))
338 		xfree(vp->sval);
339 	vp->tval &= ~NUM;
340 	vp->tval |= STR;
341 	vp->tval &= ~DONTFREE;
342 	   dprintf( ("setsval %p: %s = \"%s (%p) \", t=%o r,f=%d,%d\n",
343 		(void*)vp, NN(vp->nval), t,t, vp->tval, donerec, donefld) );
344 	return(vp->sval = t);
345 }
346 
347 Awkfloat getfval(Cell *vp)	/* get float val of a Cell */
348 {
349 	if ((vp->tval & (NUM | STR)) == 0)
350 		funnyvar(vp, "read value of");
351 	if (isfld(vp) && donefld == 0)
352 		fldbld();
353 	else if (isrec(vp) && donerec == 0)
354 		recbld();
355 	if (!isnum(vp)) {	/* not a number */
356 		vp->fval = atof(vp->sval);	/* best guess */
357 		if (is_number(vp->sval) && !(vp->tval&CON))
358 			vp->tval |= NUM;	/* make NUM only sparingly */
359 	}
360 	   dprintf( ("getfval %p: %s = %g, t=%o\n",
361 		(void*)vp, NN(vp->nval), vp->fval, vp->tval) );
362 	return(vp->fval);
363 }
364 
365 static char *get_str_val(Cell *vp, char **fmt)        /* get string val of a Cell */
366 {
367 	char s[100];	/* BUG: unchecked */
368 	double dtemp;
369 
370 	if ((vp->tval & (NUM | STR)) == 0)
371 		funnyvar(vp, "read value of");
372 	if (isfld(vp) && donefld == 0)
373 		fldbld();
374 	else if (isrec(vp) && donerec == 0)
375 		recbld();
376 	if (isstr(vp) == 0) {
377 		if (freeable(vp))
378 			xfree(vp->sval);
379 		if (modf(vp->fval, &dtemp) == 0)	/* it's integral */
380 			sprintf(s, "%.30g", vp->fval);
381 		else
382 			sprintf(s, *fmt, vp->fval);
383 		vp->sval = tostring(s);
384 		vp->tval &= ~DONTFREE;
385 		vp->tval |= STR;
386 	}
387 	   dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n",
388 		(void*)vp, NN(vp->nval), vp->sval, vp->sval, vp->tval) );
389 	return(vp->sval);
390 }
391 
392 char *getsval(Cell *vp)       /* get string val of a Cell */
393 {
394       return get_str_val(vp, CONVFMT);
395 }
396 
397 char *getpssval(Cell *vp)     /* get string val of a Cell for print */
398 {
399       return get_str_val(vp, OFMT);
400 }
401 
402 
403 char *tostring(const char *s)	/* make a copy of string s */
404 {
405 	char *p;
406 
407 	p = (char *) malloc(strlen(s)+1);
408 	if (p == NULL)
409 		FATAL("out of space in tostring on %s", s);
410 	strcpy(p, s);
411 	return(p);
412 }
413 
414 char *qstring(const char *is, int delim)	/* collect string up to next delim */
415 {
416 	const char *os = is;
417 	int c, n;
418 	uschar *s = (uschar *) is;
419 	uschar *buf, *bp;
420 
421 	if ((buf = (uschar *) malloc(strlen(is)+3)) == NULL)
422 		FATAL( "out of space in qstring(%s)", s);
423 	for (bp = buf; (c = *s) != delim; s++) {
424 		if (c == '\n')
425 			SYNTAX( "newline in string %.20s...", os );
426 		else if (c != '\\')
427 			*bp++ = c;
428 		else {	/* \something */
429 			c = *++s;
430 			if (c == 0) {	/* \ at end */
431 				*bp++ = '\\';
432 				break;	/* for loop */
433 			}
434 			switch (c) {
435 			case '\\':	*bp++ = '\\'; break;
436 			case 'n':	*bp++ = '\n'; break;
437 			case 't':	*bp++ = '\t'; break;
438 			case 'b':	*bp++ = '\b'; break;
439 			case 'f':	*bp++ = '\f'; break;
440 			case 'r':	*bp++ = '\r'; break;
441 			default:
442 				if (!isdigit(c)) {
443 					*bp++ = c;
444 					break;
445 				}
446 				n = c - '0';
447 				if (isdigit(s[1])) {
448 					n = 8 * n + *++s - '0';
449 					if (isdigit(s[1]))
450 						n = 8 * n + *++s - '0';
451 				}
452 				*bp++ = n;
453 				break;
454 			}
455 		}
456 	}
457 	*bp++ = 0;
458 	return (char *) buf;
459 }
460