xref: /illumos-gate/usr/src/cmd/awk/tran.c (revision 35a5a3587fd94b666239c157d3722745250ccbd7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 #define	DEBUG
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <ctype.h>
36 #include <string.h>
37 #include "awk.h"
38 #include "y.tab.h"
39 
40 #define	FULLTAB	2	/* rehash when table gets this x full */
41 #define	GROWTAB 4	/* grow table by this factor */
42 
43 Array	*symtab;	/* main symbol table */
44 
45 uchar	**FS;		/* initial field sep */
46 uchar	**RS;		/* initial record sep */
47 uchar	**OFS;		/* output field sep */
48 uchar	**ORS;		/* output record sep */
49 uchar	**OFMT;		/* output format for numbers */
50 Awkfloat *NF;		/* number of fields in current record */
51 Awkfloat *NR;		/* number of current record */
52 Awkfloat *FNR;		/* number of current record in current file */
53 uchar	**FILENAME;	/* current filename argument */
54 Awkfloat *ARGC;		/* number of arguments from command line */
55 uchar	**SUBSEP;	/* subscript separator for a[i,j,k]; default \034 */
56 Awkfloat *RSTART;	/* start of re matched with ~; origin 1 (!) */
57 Awkfloat *RLENGTH;	/* length of same */
58 
59 Cell	*recloc;	/* location of record */
60 Cell	*nrloc;		/* NR */
61 Cell	*nfloc;		/* NF */
62 Cell	*fnrloc;	/* FNR */
63 Array	*ARGVtab;	/* symbol table containing ARGV[...] */
64 Array	*ENVtab;	/* symbol table containing ENVIRON[...] */
65 Cell	*rstartloc;	/* RSTART */
66 Cell	*rlengthloc;	/* RLENGTH */
67 Cell	*symtabloc;	/* SYMTAB */
68 
69 Cell	*nullloc;
70 Node	*nullnode;	/* zero&null, converted into a node for comparisons */
71 
72 static	void	rehash(Array *);
73 
74 void
75 syminit(void)
76 {
77 	init_buf(&record, &record_size, LINE_INCR);
78 
79 	/* initialize $0 */
80 	recloc = getfld(0);
81 	recloc->nval = (uchar *)"$0";
82 	recloc->sval = record;
83 	recloc->tval = REC|STR|DONTFREE;
84 
85 	symtab = makesymtab(NSYMTAB);
86 	(void) setsymtab((uchar *)"0", (uchar *)"0", 0.0,
87 	    NUM|STR|CON|DONTFREE, symtab);
88 	/* this is used for if(x)... tests: */
89 	nullloc = setsymtab((uchar *)"$zero&null", (uchar *)"", 0.0,
90 	    NUM|STR|CON|DONTFREE, symtab);
91 	nullnode = valtonode(nullloc, CCON);
92 	FS = &setsymtab((uchar *)"FS", (uchar *)" ", 0.0,
93 	    STR|DONTFREE, symtab)->sval;
94 	RS = &setsymtab((uchar *)"RS", (uchar *)"\n", 0.0,
95 	    STR|DONTFREE, symtab)->sval;
96 	OFS = &setsymtab((uchar *)"OFS", (uchar *)" ", 0.0,
97 	    STR|DONTFREE, symtab)->sval;
98 	ORS = &setsymtab((uchar *)"ORS", (uchar *)"\n", 0.0,
99 	    STR|DONTFREE, symtab)->sval;
100 	OFMT = &setsymtab((uchar *)"OFMT", (uchar *)"%.6g", 0.0,
101 	    STR|DONTFREE, symtab)->sval;
102 	FILENAME = &setsymtab((uchar *)"FILENAME", (uchar *)"-", 0.0,
103 	    STR|DONTFREE, symtab)->sval;
104 	nfloc = setsymtab((uchar *)"NF", (uchar *)"", 0.0, NUM, symtab);
105 	NF = &nfloc->fval;
106 	nrloc = setsymtab((uchar *)"NR", (uchar *)"", 0.0, NUM, symtab);
107 	NR = &nrloc->fval;
108 	fnrloc = setsymtab((uchar *)"FNR", (uchar *)"", 0.0, NUM, symtab);
109 	FNR = &fnrloc->fval;
110 	SUBSEP = &setsymtab((uchar *)"SUBSEP", (uchar *)"\034", 0.0,
111 	    STR|DONTFREE, symtab)->sval;
112 	rstartloc = setsymtab((uchar *)"RSTART", (uchar *)"", 0.0,
113 	    NUM, symtab);
114 	RSTART = &rstartloc->fval;
115 	rlengthloc = setsymtab((uchar *)"RLENGTH", (uchar *)"", 0.0,
116 	    NUM, symtab);
117 	RLENGTH = &rlengthloc->fval;
118 	symtabloc = setsymtab((uchar *)"SYMTAB", (uchar *)"", 0.0, ARR, symtab);
119 	symtabloc->sval = (uchar *)symtab;
120 }
121 
122 void
123 arginit(int ac, uchar *av[])
124 {
125 	Cell *cp;
126 	int i;
127 	uchar temp[11];
128 
129 	/* first make FILENAME first real argument */
130 	for (i = 1; i < ac; i++) {
131 		if (!isclvar(av[i])) {
132 			(void) setsval(lookup((uchar *)"FILENAME", symtab),
133 			    av[i]);
134 			break;
135 		}
136 	}
137 	ARGC = &setsymtab((uchar *)"ARGC", (uchar *)"", (Awkfloat)ac,
138 	    NUM, symtab)->fval;
139 	cp = setsymtab((uchar *)"ARGV", (uchar *)"", 0.0, ARR, symtab);
140 	ARGVtab = makesymtab(NSYMTAB);	/* could be (int) ARGC as well */
141 	cp->sval = (uchar *) ARGVtab;
142 	for (i = 0; i < ac; i++) {
143 		(void) sprintf((char *)temp, "%d", i);
144 		if (is_number(*av)) {
145 			(void) setsymtab(temp, *av, atof((const char *)*av),
146 			    STR|NUM, ARGVtab);
147 		} else {
148 			(void) setsymtab(temp, *av, 0.0, STR, ARGVtab);
149 		}
150 		av++;
151 	}
152 }
153 
154 void
155 envinit(uchar *envp[])
156 {
157 	Cell *cp;
158 	uchar *p;
159 
160 	cp = setsymtab((uchar *)"ENVIRON", (uchar *)"", 0.0, ARR, symtab);
161 	ENVtab = makesymtab(NSYMTAB);
162 	cp->sval = (uchar *) ENVtab;
163 	for (; *envp; envp++) {
164 		if ((p = (uchar *)strchr((char *)*envp, '=')) == NULL)
165 			continue;
166 		*p++ = 0;	/* split into two strings at = */
167 		if (is_number(p)) {
168 			(void) setsymtab(*envp, p, atof((const char *)p),
169 			    STR|NUM, ENVtab);
170 		} else {
171 			(void) setsymtab(*envp, p, 0.0, STR, ENVtab);
172 		}
173 		/* restore in case env is passed down to a shell */
174 		p[-1] = '=';
175 	}
176 }
177 
178 Array *
179 makesymtab(int n)
180 {
181 	Array *ap;
182 	Cell **tp;
183 
184 	ap = (Array *)malloc(sizeof (Array));
185 	tp = (Cell **)calloc(n, sizeof (Cell *));
186 	if (ap == NULL || tp == NULL)
187 		ERROR "out of space in makesymtab" FATAL;
188 	ap->nelem = 0;
189 	ap->size = n;
190 	ap->tab = tp;
191 	return (ap);
192 }
193 
194 void
195 freesymtab(Cell *ap)	/* free symbol table */
196 {
197 	Cell *cp, *next;
198 	Array *tp;
199 	int i;
200 
201 	if (!isarr(ap))
202 		return;
203 	/*LINTED align*/
204 	tp = (Array *)ap->sval;
205 	if (tp == NULL)
206 		return;
207 	for (i = 0; i < tp->size; i++) {
208 		for (cp = tp->tab[i]; cp != NULL; cp = next) {
209 			next = cp->cnext;
210 			xfree(cp->nval);
211 			if (freeable(cp))
212 				xfree(cp->sval);
213 			free(cp);
214 		}
215 	}
216 	free(tp->tab);
217 	free(tp);
218 }
219 
220 void
221 freeelem(Cell *ap, uchar *s)		/* free elem s from ap (i.e., ap["s"] */
222 {
223 	Array *tp;
224 	Cell *p, *prev = NULL;
225 	int h;
226 
227 	/*LINTED align*/
228 	tp = (Array *)ap->sval;
229 	h = hash(s, tp->size);
230 	for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
231 		if (strcmp((char *)s, (char *)p->nval) == 0) {
232 			if (prev == NULL)	/* 1st one */
233 				tp->tab[h] = p->cnext;
234 			else			/* middle somewhere */
235 				prev->cnext = p->cnext;
236 			if (freeable(p))
237 				xfree(p->sval);
238 			free(p->nval);
239 			free(p);
240 			tp->nelem--;
241 			return;
242 		}
243 }
244 
245 Cell *
246 setsymtab(uchar *n, uchar *s, Awkfloat f, unsigned int t, Array *tp)
247 {
248 	register int h;
249 	register Cell *p;
250 
251 	if (n != NULL && (p = lookup(n, tp)) != NULL) {
252 		dprintf(("setsymtab found %p: n=%s", (void *)p, p->nval));
253 		dprintf((" s=\"%s\" f=%g t=%p\n",
254 		    p->sval, p->fval, (void *)p->tval));
255 		return (p);
256 	}
257 	p = (Cell *)malloc(sizeof (Cell));
258 	if (p == NULL)
259 		ERROR "symbol table overflow at %s", n FATAL;
260 	p->nval = tostring(n);
261 	p->sval = s ? tostring(s) : tostring((uchar *)"");
262 	p->fval = f;
263 	p->tval = t;
264 	p->csub = 0;
265 
266 	tp->nelem++;
267 	if (tp->nelem > FULLTAB * tp->size)
268 		rehash(tp);
269 	h = hash(n, tp->size);
270 	p->cnext = tp->tab[h];
271 	tp->tab[h] = p;
272 	dprintf(("setsymtab set %p: n=%s", (void *)p, p->nval));
273 	dprintf((" s=\"%s\" f=%g t=%p\n", p->sval, p->fval, (void *)p->tval));
274 	return (p);
275 }
276 
277 int
278 hash(uchar *s, int n)	/* form hash value for string s */
279 {
280 	register unsigned hashval;
281 
282 	for (hashval = 0; *s != '\0'; s++)
283 		hashval = (*s + 31 * hashval);
284 	return (hashval % n);
285 }
286 
287 static void
288 rehash(Array *tp)	/* rehash items in small table into big one */
289 {
290 	int i, nh, nsz;
291 	Cell *cp, *op, **np;
292 
293 	nsz = GROWTAB * tp->size;
294 	np = (Cell **)calloc(nsz, sizeof (Cell *));
295 	if (np == NULL)
296 		ERROR "out of space in rehash" FATAL;
297 	for (i = 0; i < tp->size; i++) {
298 		for (cp = tp->tab[i]; cp; cp = op) {
299 			op = cp->cnext;
300 			nh = hash(cp->nval, nsz);
301 			cp->cnext = np[nh];
302 			np[nh] = cp;
303 		}
304 	}
305 	free(tp->tab);
306 	tp->tab = np;
307 	tp->size = nsz;
308 }
309 
310 Cell *
311 lookup(uchar *s, Array *tp)	/* look for s in tp */
312 {
313 	register Cell *p;
314 	int h;
315 
316 	h = hash(s, tp->size);
317 	for (p = tp->tab[h]; p != NULL; p = p->cnext) {
318 		if (strcmp((char *)s, (char *)p->nval) == 0)
319 			return (p);	/* found it */
320 	}
321 	return (NULL);			/* not found */
322 }
323 
324 Awkfloat
325 setfval(Cell *vp, Awkfloat f)
326 {
327 	int	i;
328 
329 	if ((vp->tval & (NUM | STR)) == 0)
330 		funnyvar(vp, "assign to");
331 	if (vp->tval & FLD) {
332 		donerec = 0;	/* mark $0 invalid */
333 		i = fldidx(vp);
334 		if (i > *NF)
335 			newfld(i);
336 		dprintf(("setting field %d to %g\n", i, f));
337 	} else if (vp->tval & REC) {
338 		donefld = 0;	/* mark $1... invalid */
339 		donerec = 1;
340 	}
341 	vp->tval &= ~STR;	/* mark string invalid */
342 	vp->tval |= NUM;	/* mark number ok */
343 	dprintf(("setfval %p: %s = %g, t=%p\n", (void *)vp,
344 	    vp->nval ? vp->nval : (unsigned char *)"NULL",
345 	    f, (void *)vp->tval));
346 	return (vp->fval = f);
347 }
348 
349 void
350 funnyvar(Cell *vp, char *rw)
351 {
352 	if (vp->tval & ARR)
353 		ERROR "can't %s %s; it's an array name.", rw, vp->nval FATAL;
354 	if (vp->tval & FCN)
355 		ERROR "can't %s %s; it's a function.", rw, vp->nval FATAL;
356 	ERROR "funny variable %o: n=%s s=\"%s\" f=%g t=%o",
357 	    vp, vp->nval, vp->sval, vp->fval, vp->tval CONT;
358 }
359 
360 uchar *
361 setsval(Cell *vp, uchar *s)
362 {
363 	int	i;
364 
365 	if ((vp->tval & (NUM | STR)) == 0)
366 		funnyvar(vp, "assign to");
367 	if (vp->tval & FLD) {
368 		donerec = 0;	/* mark $0 invalid */
369 		i = fldidx(vp);
370 		if (i > *NF)
371 			newfld(i);
372 		dprintf(("setting field %d to %s\n", i, s));
373 	} else if (vp->tval & REC) {
374 		donefld = 0;	/* mark $1... invalid */
375 		donerec = 1;
376 	}
377 	vp->tval &= ~NUM;
378 	vp->tval |= STR;
379 	if (freeable(vp))
380 		xfree(vp->sval);
381 	vp->tval &= ~DONTFREE;
382 	dprintf(("setsval %p: %s = \"%s\", t=%p\n",
383 	    (void *)vp,
384 	    vp->nval ? (char *)vp->nval : "",
385 	    s,
386 	    (void *)(vp->tval ? (char *)vp->tval : "")));
387 	return (vp->sval = tostring(s));
388 }
389 
390 Awkfloat
391 r_getfval(Cell *vp)
392 {
393 	if ((vp->tval & (NUM | STR)) == 0)
394 		funnyvar(vp, "read value of");
395 	if ((vp->tval & FLD) && donefld == 0)
396 		fldbld();
397 	else if ((vp->tval & REC) && donerec == 0)
398 		recbld();
399 	if (!isnum(vp)) {	/* not a number */
400 		vp->fval = atof((const char *)vp->sval);	/* best guess */
401 		if (is_number(vp->sval) && !(vp->tval&CON))
402 			vp->tval |= NUM;	/* make NUM only sparingly */
403 	}
404 	dprintf(("getfval %p: %s = %g, t=%p\n",
405 	    (void *)vp, vp->nval, vp->fval, (void *)vp->tval));
406 	return (vp->fval);
407 }
408 
409 uchar *
410 r_getsval(Cell *vp)
411 {
412 	uchar s[256];
413 
414 	if ((vp->tval & (NUM | STR)) == 0)
415 		funnyvar(vp, "read value of");
416 	if ((vp->tval & FLD) && donefld == 0)
417 		fldbld();
418 	else if ((vp->tval & REC) && donerec == 0)
419 		recbld();
420 	if ((vp->tval & STR) == 0) {
421 		if (!(vp->tval&DONTFREE))
422 			xfree(vp->sval);
423 		if ((long long)vp->fval == vp->fval) {
424 			(void) snprintf((char *)s, sizeof (s),
425 			    "%.20g", vp->fval);
426 		} else {
427 			/*LINTED*/
428 			(void) snprintf((char *)s, sizeof (s),
429 			    (char *)*OFMT, vp->fval);
430 		}
431 		vp->sval = tostring(s);
432 		vp->tval &= ~DONTFREE;
433 		vp->tval |= STR;
434 	}
435 	dprintf(("getsval %p: %s = \"%s\", t=%p\n",
436 	    (void *)vp,
437 	    vp->nval ? (char *)vp->nval : "",
438 	    vp->sval ? (char *)vp->sval : "",
439 	    (void *)vp->tval));
440 	return (vp->sval);
441 }
442 
443 uchar *
444 tostring(uchar *s)
445 {
446 	register uchar *p;
447 
448 	p = (uchar *)malloc(strlen((char *)s)+1);
449 	if (p == NULL)
450 		ERROR "out of space in tostring on %s", s FATAL;
451 	(void) strcpy((char *)p, (char *)s);
452 	return (p);
453 }
454 
455 uchar *
456 qstring(uchar *s, int delim)	/* collect string up to delim */
457 {
458 	uchar *cbuf, *ret;
459 	int c, n;
460 	size_t	cbufsz, cnt;
461 
462 	init_buf(&cbuf, &cbufsz, LINE_INCR);
463 
464 	for (cnt = 0; (c = *s) != delim; s++) {
465 		if (c == '\n') {
466 			ERROR "newline in string %.10s...", cbuf SYNTAX;
467 		} else if (c != '\\') {
468 			expand_buf(&cbuf, &cbufsz, cnt);
469 			cbuf[cnt++] = c;
470 		} else {	/* \something */
471 			expand_buf(&cbuf, &cbufsz, cnt);
472 			switch (c = *++s) {
473 			case '\\':	cbuf[cnt++] = '\\'; break;
474 			case 'n':	cbuf[cnt++] = '\n'; break;
475 			case 't':	cbuf[cnt++] = '\t'; break;
476 			case 'b':	cbuf[cnt++] = '\b'; break;
477 			case 'f':	cbuf[cnt++] = '\f'; break;
478 			case 'r':	cbuf[cnt++] = '\r'; break;
479 			default:
480 				if (!isdigit(c)) {
481 					cbuf[cnt++] = c;
482 					break;
483 				}
484 				n = c - '0';
485 				if (isdigit(s[1])) {
486 					n = 8 * n + *++s - '0';
487 					if (isdigit(s[1]))
488 						n = 8 * n + *++s - '0';
489 				}
490 				cbuf[cnt++] = n;
491 				break;
492 			}
493 		}
494 	}
495 	cbuf[cnt] = '\0';
496 	ret = tostring(cbuf);
497 	free(cbuf);
498 	return (ret);
499 }
500