xref: /freebsd/contrib/one-true-awk/tran.c (revision 6780ab54325a71e7e70112b11657973edde8655e)
1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
4 
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
14 
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
24 
25 #define	DEBUG
26 #include <stdio.h>
27 #include <math.h>
28 #include <ctype.h>
29 #include <string.h>
30 #include <stdlib.h>
31 #include "awk.h"
32 #include "ytab.h"
33 
34 #define	FULLTAB	2	/* rehash when table gets this x full */
35 #define	GROWTAB 4	/* grow table by this factor */
36 
37 Array	*symtab;	/* main symbol table */
38 
39 char	**FS;		/* initial field sep */
40 char	**RS;		/* initial record sep */
41 char	**OFS;		/* output field sep */
42 char	**ORS;		/* output record sep */
43 char	**OFMT;		/* output format for numbers */
44 char	**CONVFMT;	/* format for conversions in getsval */
45 Awkfloat *NF;		/* number of fields in current record */
46 Awkfloat *NR;		/* number of current record */
47 Awkfloat *FNR;		/* number of current record in current file */
48 char	**FILENAME;	/* current filename argument */
49 Awkfloat *ARGC;		/* number of arguments from command line */
50 char	**SUBSEP;	/* subscript separator for a[i,j,k]; default \034 */
51 Awkfloat *RSTART;	/* start of re matched with ~; origin 1 (!) */
52 Awkfloat *RLENGTH;	/* length of same */
53 
54 Cell	*nrloc;		/* NR */
55 Cell	*nfloc;		/* NF */
56 Cell	*fnrloc;	/* FNR */
57 Array	*ARGVtab;	/* symbol table containing ARGV[...] */
58 Array	*ENVtab;	/* symbol table containing ENVIRON[...] */
59 Cell	*rstartloc;	/* RSTART */
60 Cell	*rlengthloc;	/* RLENGTH */
61 Cell	*symtabloc;	/* SYMTAB */
62 
63 Cell	*nullloc;	/* a guaranteed empty cell */
64 Node	*nullnode;	/* zero&null, converted into a node for comparisons */
65 Cell	*literal0;
66 
67 extern Cell **fldtab;
68 
69 void syminit(void)	/* initialize symbol table with builtin vars */
70 {
71 	literal0 = setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab);
72 	/* this is used for if(x)... tests: */
73 	nullloc = setsymtab("$zero&null", "", 0.0, NUM|STR|CON|DONTFREE, symtab);
74 	nullnode = celltonode(nullloc, CCON);
75 
76 	FS = &setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab)->sval;
77 	RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
78 	OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval;
79 	ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
80 	OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
81 	CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
82 	FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval;
83 	nfloc = setsymtab("NF", "", 0.0, NUM, symtab);
84 	NF = &nfloc->fval;
85 	nrloc = setsymtab("NR", "", 0.0, NUM, symtab);
86 	NR = &nrloc->fval;
87 	fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);
88 	FNR = &fnrloc->fval;
89 	SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval;
90 	rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);
91 	RSTART = &rstartloc->fval;
92 	rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
93 	RLENGTH = &rlengthloc->fval;
94 	symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab);
95 	symtabloc->sval = (char *) symtab;
96 }
97 
98 void arginit(int ac, char **av)	/* set up ARGV and ARGC */
99 {
100 	Cell *cp;
101 	int i;
102 	char temp[50];
103 
104 	ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
105 	cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
106 	ARGVtab = makesymtab(NSYMTAB);	/* could be (int) ARGC as well */
107 	cp->sval = (char *) ARGVtab;
108 	for (i = 0; i < ac; i++) {
109 		sprintf(temp, "%d", i);
110 		if (is_number(*av))
111 			setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab);
112 		else
113 			setsymtab(temp, *av, 0.0, STR, ARGVtab);
114 		av++;
115 	}
116 }
117 
118 void envinit(char **envp)	/* set up ENVIRON variable */
119 {
120 	Cell *cp;
121 	char *p;
122 
123 	cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab);
124 	ENVtab = makesymtab(NSYMTAB);
125 	cp->sval = (char *) ENVtab;
126 	for ( ; *envp; envp++) {
127 		if ((p = strchr(*envp, '=')) == NULL)
128 			continue;
129 		if( p == *envp ) /* no left hand side name in env string */
130 			continue;
131 		*p++ = 0;	/* split into two strings at = */
132 		if (is_number(p))
133 			setsymtab(*envp, p, atof(p), STR|NUM, ENVtab);
134 		else
135 			setsymtab(*envp, p, 0.0, STR, ENVtab);
136 		p[-1] = '=';	/* restore in case env is passed down to a shell */
137 	}
138 }
139 
140 Array *makesymtab(int n)	/* make a new symbol table */
141 {
142 	Array *ap;
143 	Cell **tp;
144 
145 	ap = (Array *) malloc(sizeof(Array));
146 	tp = (Cell **) calloc(n, sizeof(Cell *));
147 	if (ap == NULL || tp == NULL)
148 		FATAL("out of space in makesymtab");
149 	ap->nelem = 0;
150 	ap->size = n;
151 	ap->tab = tp;
152 	return(ap);
153 }
154 
155 void freesymtab(Cell *ap)	/* free a symbol table */
156 {
157 	Cell *cp, *temp;
158 	Array *tp;
159 	int i;
160 
161 	if (!isarr(ap))
162 		return;
163 	tp = (Array *) ap->sval;
164 	if (tp == NULL)
165 		return;
166 	for (i = 0; i < tp->size; i++) {
167 		for (cp = tp->tab[i]; cp != NULL; cp = temp) {
168 			xfree(cp->nval);
169 			if (freeable(cp))
170 				xfree(cp->sval);
171 			temp = cp->cnext;	/* avoids freeing then using */
172 			free(cp);
173 			tp->nelem--;
174 		}
175 		tp->tab[i] = 0;
176 	}
177 	if (tp->nelem != 0)
178 		WARNING("can't happen: inconsistent element count freeing %s", ap->nval);
179 	free(tp->tab);
180 	free(tp);
181 }
182 
183 void freeelem(Cell *ap, const char *s)	/* free elem s from ap (i.e., ap["s"] */
184 {
185 	Array *tp;
186 	Cell *p, *prev = NULL;
187 	int h;
188 
189 	tp = (Array *) ap->sval;
190 	h = hash(s, tp->size);
191 	for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
192 		if (strcmp(s, p->nval) == 0) {
193 			if (prev == NULL)	/* 1st one */
194 				tp->tab[h] = p->cnext;
195 			else			/* middle somewhere */
196 				prev->cnext = p->cnext;
197 			if (freeable(p))
198 				xfree(p->sval);
199 			free(p->nval);
200 			free(p);
201 			tp->nelem--;
202 			return;
203 		}
204 }
205 
206 Cell *setsymtab(const char *n, const char *s, Awkfloat f, unsigned t, Array *tp)
207 {
208 	int h;
209 	Cell *p;
210 
211 	if (n != NULL && (p = lookup(n, tp)) != NULL) {
212 		   dprintf( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
213 			p, NN(p->nval), NN(p->sval), p->fval, p->tval) );
214 		return(p);
215 	}
216 	p = (Cell *) malloc(sizeof(Cell));
217 	if (p == NULL)
218 		FATAL("out of space for symbol table at %s", n);
219 	p->nval = tostring(n);
220 	p->sval = s ? tostring(s) : tostring("");
221 	p->fval = f;
222 	p->tval = t;
223 	p->csub = CUNK;
224 	p->ctype = OCELL;
225 	tp->nelem++;
226 	if (tp->nelem > FULLTAB * tp->size)
227 		rehash(tp);
228 	h = hash(n, tp->size);
229 	p->cnext = tp->tab[h];
230 	tp->tab[h] = p;
231 	   dprintf( ("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
232 		p, p->nval, p->sval, p->fval, p->tval) );
233 	return(p);
234 }
235 
236 int hash(const char *s, int n)	/* form hash value for string s */
237 {
238 	unsigned hashval;
239 
240 	for (hashval = 0; *s != '\0'; s++)
241 		hashval = (*s + 31 * hashval);
242 	return hashval % n;
243 }
244 
245 void rehash(Array *tp)	/* rehash items in small table into big one */
246 {
247 	int i, nh, nsz;
248 	Cell *cp, *op, **np;
249 
250 	nsz = GROWTAB * tp->size;
251 	np = (Cell **) calloc(nsz, sizeof(Cell *));
252 	if (np == NULL)		/* can't do it, but can keep running. */
253 		return;		/* someone else will run out later. */
254 	for (i = 0; i < tp->size; i++) {
255 		for (cp = tp->tab[i]; cp; cp = op) {
256 			op = cp->cnext;
257 			nh = hash(cp->nval, nsz);
258 			cp->cnext = np[nh];
259 			np[nh] = cp;
260 		}
261 	}
262 	free(tp->tab);
263 	tp->tab = np;
264 	tp->size = nsz;
265 }
266 
267 Cell *lookup(const char *s, Array *tp)	/* look for s in tp */
268 {
269 	Cell *p;
270 	int h;
271 
272 	h = hash(s, tp->size);
273 	for (p = tp->tab[h]; p != NULL; p = p->cnext)
274 		if (strcmp(s, p->nval) == 0)
275 			return(p);	/* found it */
276 	return(NULL);			/* not found */
277 }
278 
279 Awkfloat setfval(Cell *vp, Awkfloat f)	/* set float val of a Cell */
280 {
281 	int fldno;
282 
283 	if ((vp->tval & (NUM | STR)) == 0)
284 		funnyvar(vp, "assign to");
285 	if (isfld(vp)) {
286 		donerec = 0;	/* mark $0 invalid */
287 		fldno = atoi(vp->nval);
288 		if (fldno > *NF)
289 			newfld(fldno);
290 		   dprintf( ("setting field %d to %g\n", fldno, f) );
291 	} else if (isrec(vp)) {
292 		donefld = 0;	/* mark $1... invalid */
293 		donerec = 1;
294 	}
295 	if (freeable(vp))
296 		xfree(vp->sval); /* free any previous string */
297 	vp->tval &= ~STR;	/* mark string invalid */
298 	vp->tval |= NUM;	/* mark number ok */
299 	   dprintf( ("setfval %p: %s = %g, t=%o\n", vp, NN(vp->nval), f, vp->tval) );
300 	return vp->fval = f;
301 }
302 
303 void funnyvar(Cell *vp, const char *rw)
304 {
305 	if (isarr(vp))
306 		FATAL("can't %s %s; it's an array name.", rw, vp->nval);
307 	if (vp->tval & FCN)
308 		FATAL("can't %s %s; it's a function.", rw, vp->nval);
309 	WARNING("funny variable %p: n=%s s=\"%s\" f=%g t=%o",
310 		vp, vp->nval, vp->sval, vp->fval, vp->tval);
311 }
312 
313 char *setsval(Cell *vp, const char *s)	/* set string val of a Cell */
314 {
315 	char *t;
316 	int fldno;
317 
318 	   dprintf( ("starting setsval %p: %s = \"%s\", t=%o\n", vp, NN(vp->nval), s, vp->tval) );
319 	if ((vp->tval & (NUM | STR)) == 0)
320 		funnyvar(vp, "assign to");
321 	if (isfld(vp)) {
322 		donerec = 0;	/* mark $0 invalid */
323 		fldno = atoi(vp->nval);
324 		if (fldno > *NF)
325 			newfld(fldno);
326 		   dprintf( ("setting field %d to %s (%p)\n", fldno, s, s) );
327 	} else if (isrec(vp)) {
328 		donefld = 0;	/* mark $1... invalid */
329 		donerec = 1;
330 	}
331 	t = tostring(s);	/* in case it's self-assign */
332 	vp->tval &= ~NUM;
333 	vp->tval |= STR;
334 	if (freeable(vp))
335 		xfree(vp->sval);
336 	vp->tval &= ~DONTFREE;
337 	   dprintf( ("setsval %p: %s = \"%s (%p)\", t=%o\n", vp, NN(vp->nval), t,t, vp->tval) );
338 	return(vp->sval = t);
339 }
340 
341 Awkfloat getfval(Cell *vp)	/* get float val of a Cell */
342 {
343 	if ((vp->tval & (NUM | STR)) == 0)
344 		funnyvar(vp, "read value of");
345 	if (isfld(vp) && donefld == 0)
346 		fldbld();
347 	else if (isrec(vp) && donerec == 0)
348 		recbld();
349 	if (!isnum(vp)) {	/* not a number */
350 		vp->fval = atof(vp->sval);	/* best guess */
351 		if (is_number(vp->sval) && !(vp->tval&CON))
352 			vp->tval |= NUM;	/* make NUM only sparingly */
353 	}
354 	   dprintf( ("getfval %p: %s = %g, t=%o\n", vp, NN(vp->nval), vp->fval, vp->tval) );
355 	return(vp->fval);
356 }
357 
358  static char *get_str_val(Cell *vp, char **fmt)        /* get string val of a Cell */
359 
360 {
361 	char s[100];	/* BUG: unchecked */
362 	double dtemp;
363 
364 	if ((vp->tval & (NUM | STR)) == 0)
365 		funnyvar(vp, "read value of");
366 	if (isfld(vp) && donefld == 0)
367 		fldbld();
368 	else if (isrec(vp) && donerec == 0)
369 		recbld();
370 	if (isstr(vp) == 0) {
371 		if (freeable(vp))
372 			xfree(vp->sval);
373 		if (modf(vp->fval, &dtemp) == 0)	/* it's integral */
374 			sprintf(s, "%.30g", vp->fval);
375 		else
376 			sprintf(s, *fmt, vp->fval);
377 		vp->sval = tostring(s);
378 		vp->tval &= ~DONTFREE;
379 		vp->tval |= STR;
380 	}
381 	   dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n", vp, NN(vp->nval), vp->sval, vp->sval, vp->tval) );
382 	return(vp->sval);
383 }
384 
385 char *getsval(Cell *vp)       /* get string val of a Cell */
386 {
387       return get_str_val(vp, CONVFMT);
388 }
389 
390 char *getpssval(Cell *vp)     /* get string val of a Cell for print */
391 {
392       return get_str_val(vp, OFMT);
393 }
394 
395 
396 char *tostring(const char *s)	/* make a copy of string s */
397 {
398 	char *p;
399 
400 	p = (char *) malloc(strlen(s)+1);
401 	if (p == NULL)
402 		FATAL("out of space in tostring on %s", s);
403 	strcpy(p, s);
404 	return(p);
405 }
406 
407 char *qstring(const char *is, int delim)	/* collect string up to next delim */
408 {
409 	const char *os = is;
410 	int c, n;
411 	uschar *s = (uschar *) is;
412 	uschar *buf, *bp;
413 
414 	if ((buf = (uschar *) malloc(strlen(is)+3)) == NULL)
415 		FATAL( "out of space in qstring(%s)", s);
416 	for (bp = buf; (c = *s) != delim; s++) {
417 		if (c == '\n')
418 			SYNTAX( "newline in string %.20s...", os );
419 		else if (c != '\\')
420 			*bp++ = c;
421 		else {	/* \something */
422 			c = *++s;
423 			if (c == 0) {	/* \ at end */
424 				*bp++ = '\\';
425 				break;	/* for loop */
426 			}
427 			switch (c) {
428 			case '\\':	*bp++ = '\\'; break;
429 			case 'n':	*bp++ = '\n'; break;
430 			case 't':	*bp++ = '\t'; break;
431 			case 'b':	*bp++ = '\b'; break;
432 			case 'f':	*bp++ = '\f'; break;
433 			case 'r':	*bp++ = '\r'; break;
434 			default:
435 				if (!isdigit(c)) {
436 					*bp++ = c;
437 					break;
438 				}
439 				n = c - '0';
440 				if (isdigit(s[1])) {
441 					n = 8 * n + *++s - '0';
442 					if (isdigit(s[1]))
443 						n = 8 * n + *++s - '0';
444 				}
445 				*bp++ = n;
446 				break;
447 			}
448 		}
449 	}
450 	*bp++ = 0;
451 	return (char *) buf;
452 }
453