xref: /titanic_51/usr/src/cmd/awk/tran.c (revision 1a7c1b724419d3cb5fa6eea75123c6b2060ba31b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
23 /*	  All Rights Reserved  	*/
24 
25 
26 /*
27  * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
28  * Use is subject to license terms.
29  */
30 
31 #pragma ident	"%Z%%M%	%I%	%E% SMI"	/* SVr4.0 2.13	*/
32 
33 
34 #define	DEBUG
35 #include <stdio.h>
36 #include <ctype.h>
37 #include <string.h>
38 #include "awk.h"
39 #include "y.tab.h"
40 
41 #define	FULLTAB	2	/* rehash when table gets this x full */
42 #define	GROWTAB 4	/* grow table by this factor */
43 
44 Array	*symtab;	/* main symbol table */
45 
46 uchar	**FS;		/* initial field sep */
47 uchar	**RS;		/* initial record sep */
48 uchar	**OFS;		/* output field sep */
49 uchar	**ORS;		/* output record sep */
50 uchar	**OFMT;		/* output format for numbers*/
51 Awkfloat *NF;		/* number of fields in current record */
52 Awkfloat *NR;		/* number of current record */
53 Awkfloat *FNR;		/* number of current record in current file */
54 uchar	**FILENAME;	/* current filename argument */
55 Awkfloat *ARGC;		/* number of arguments from command line */
56 uchar	**SUBSEP;	/* subscript separator for a[i,j,k]; default \034 */
57 Awkfloat *RSTART;	/* start of re matched with ~; origin 1 (!) */
58 Awkfloat *RLENGTH;	/* length of same */
59 
60 Cell	*recloc;	/* location of record */
61 Cell	*nrloc;		/* NR */
62 Cell	*nfloc;		/* NF */
63 Cell	*fnrloc;	/* FNR */
64 Array	*ARGVtab;	/* symbol table containing ARGV[...] */
65 Array	*ENVtab;	/* symbol table containing ENVIRON[...] */
66 Cell	*rstartloc;	/* RSTART */
67 Cell	*rlengthloc;	/* RLENGTH */
68 Cell	*symtabloc;	/* SYMTAB */
69 
70 Cell	*nullloc;
71 Node	*nullnode;	/* zero&null, converted into a node for comparisons */
72 
73 extern Node *valtonode();
74 extern Cell fldtab[];
75 extern uchar recdata[];
76 
77 syminit()
78 {
79 	int i;
80 
81 	fldtab[0].ctype = OCELL;
82 	fldtab[0].csub = CFLD;
83 	fldtab[0].nval = (uchar*) "$0";
84 	fldtab[0].sval = recdata;
85 	fldtab[0].fval = 0.0;
86 	fldtab[0].tval = REC|STR|DONTFREE;
87 
88 	for (i = 1; i < MAXFLD; i++) {
89 		fldtab[i].ctype = OCELL;
90 		fldtab[i].csub = CFLD;
91 		fldtab[i].nval = NULL;
92 		fldtab[i].sval = (uchar*) "";
93 		fldtab[i].fval = 0.0;
94 		fldtab[i].tval = FLD|STR|DONTFREE;
95 	}
96 	symtab = makesymtab(NSYMTAB);
97 	setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab);
98 	/* this is used for if(x)... tests: */
99 	nullloc = setsymtab("$zero&null", "", 0.0, NUM|STR|CON|DONTFREE, symtab);
100 	nullnode = valtonode(nullloc, CCON);
101 	/* recloc = setsymtab("$0", record, 0.0, REC|STR|DONTFREE, symtab); */
102 	recloc = &fldtab[0];
103 	FS = &setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab)->sval;
104 	RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
105 	OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval;
106 	ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
107 	OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
108 	FILENAME = &setsymtab("FILENAME", "-", 0.0, STR|DONTFREE, symtab)->sval;
109 	nfloc = setsymtab("NF", "", 0.0, NUM, symtab);
110 	NF = &nfloc->fval;
111 	nrloc = setsymtab("NR", "", 0.0, NUM, symtab);
112 	NR = &nrloc->fval;
113 	fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);
114 	FNR = &fnrloc->fval;
115 	SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval;
116 	rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);
117 	RSTART = &rstartloc->fval;
118 	rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
119 	RLENGTH = &rlengthloc->fval;
120 	symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab);
121 	symtabloc->sval = (uchar *) symtab;
122 }
123 
124 arginit(ac, av)
125 	int ac;
126 	uchar *av[];
127 {
128 	Cell *cp;
129 	Array *makesymtab();
130 	int i;
131 	uchar temp[5];
132 
133 	for (i = 1; i < ac; i++)	/* first make FILENAME first real argument */
134 		if (!isclvar(av[i])) {
135 			setsval(lookup("FILENAME", symtab), av[i]);
136 			break;
137 		}
138 	ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
139 	cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
140 	ARGVtab = makesymtab(NSYMTAB);	/* could be (int) ARGC as well */
141 	cp->sval = (uchar *) ARGVtab;
142 	for (i = 0; i < ac; i++) {
143 		sprintf((char *)temp, "%d", i);
144 		if (isnumber(*av))
145 			setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab);
146 		else
147 			setsymtab(temp, *av, 0.0, STR, ARGVtab);
148 		av++;
149 	}
150 }
151 
152 envinit(envp)
153 	uchar *envp[];
154 {
155 	Cell *cp;
156 	Array *makesymtab();
157 	uchar *p;
158 
159 	cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab);
160 	ENVtab = makesymtab(NSYMTAB);
161 	cp->sval = (uchar *) ENVtab;
162 	for ( ; *envp; envp++) {
163 		if ((p = (uchar *) strchr((char *) *envp, '=')) == NULL)	/* index() on bsd */
164 			continue;
165 		*p++ = 0;	/* split into two strings at = */
166 		if (isnumber(p))
167 			setsymtab(*envp, p, atof(p), STR|NUM, ENVtab);
168 		else
169 			setsymtab(*envp, p, 0.0, STR, ENVtab);
170 		p[-1] = '=';	/* restore in case env is passed down to a shell */
171 	}
172 }
173 
174 Array *makesymtab(n)
175 	int n;
176 {
177 	Array *ap;
178 	Cell **tp;
179 
180 	ap = (Array *) malloc(sizeof(Array));
181 	tp = (Cell **) calloc(n, sizeof(Cell *));
182 	if (ap == NULL || tp == NULL)
183 		ERROR "out of space in makesymtab" FATAL;
184 	ap->nelem = 0;
185 	ap->size = n;
186 	ap->tab = tp;
187 	return(ap);
188 }
189 
190 freesymtab(ap)	/* free symbol table */
191 	Cell *ap;
192 {
193 	Cell *cp, *next;
194 	Array *tp;
195 	int i;
196 
197 	if (!isarr(ap))
198 		return;
199 	tp = (Array *) ap->sval;
200 	if (tp == NULL)
201 		return;
202 	for (i = 0; i < tp->size; i++) {
203 		for (cp = tp->tab[i]; cp != NULL; cp = next) {
204 			next = cp->cnext;
205 			xfree(cp->nval);
206 			if (freeable(cp))
207 				xfree(cp->sval);
208 			free(cp);
209 		}
210 	}
211 	free(tp->tab);
212 	free(tp);
213 }
214 
215 freeelem(ap, s)		/* free elem s from ap (i.e., ap["s"] */
216 	Cell *ap;
217 	uchar *s;
218 {
219 	Array *tp;
220 	Cell *p, *prev = NULL;
221 	int h;
222 
223 	tp = (Array *) ap->sval;
224 	h = hash(s, tp->size);
225 	for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
226 		if (strcmp((char *) s, (char *) p->nval) == 0) {
227 			if (prev == NULL)	/* 1st one */
228 				tp->tab[h] = p->cnext;
229 			else			/* middle somewhere */
230 				prev->cnext = p->cnext;
231 			if (freeable(p))
232 				xfree(p->sval);
233 			free(p->nval);
234 			free(p);
235 			tp->nelem--;
236 			return;
237 		}
238 }
239 
240 Cell *setsymtab(n, s, f, t, tp)
241 	uchar *n, *s;
242 	Awkfloat f;
243 	unsigned t;
244 	Array *tp;
245 {
246 	register int h;
247 	register Cell *p;
248 	Cell *lookup();
249 
250 	if (n != NULL && (p = lookup(n, tp)) != NULL) {
251 		dprintf( ("setsymtab found %o: n=%s", p, p->nval) );
252 		dprintf( (" s=\"%s\" f=%g t=%o\n", p->sval, p->fval, p->tval) );
253 		return(p);
254 	}
255 	p = (Cell *) malloc(sizeof(Cell));
256 	if (p == NULL)
257 		ERROR "symbol table overflow at %s", n FATAL;
258 	p->nval = tostring(n);
259 	p->sval = s ? tostring(s) : tostring("");
260 	p->fval = f;
261 	p->tval = t;
262 	p->csub = 0;
263 
264 	tp->nelem++;
265 	if (tp->nelem > FULLTAB * tp->size)
266 		rehash(tp);
267 	h = hash(n, tp->size);
268 	p->cnext = tp->tab[h];
269 	tp->tab[h] = p;
270 	dprintf( ("setsymtab set %o: n=%s", p, p->nval) );
271 	dprintf( (" s=\"%s\" f=%g t=%o\n", p->sval, p->fval, p->tval) );
272 	return(p);
273 }
274 
275 hash(s, n)	/* form hash value for string s */
276 	register uchar *s;
277 	int n;
278 {
279 	register unsigned hashval;
280 
281 	for (hashval = 0; *s != '\0'; s++)
282 		hashval = (*s + 31 * hashval);
283 	return hashval % n;
284 }
285 
286 rehash(tp)	/* rehash items in small table into big one */
287 	Array *tp;
288 {
289 	int i, nh, nsz;
290 	Cell *cp, *op, **np;
291 
292 	nsz = GROWTAB * tp->size;
293 	np = (Cell **) calloc(nsz, sizeof(Cell *));
294 	if (np == NULL)
295 		ERROR "out of space in rehash" FATAL;
296 	for (i = 0; i < tp->size; i++) {
297 		for (cp = tp->tab[i]; cp; cp = op) {
298 			op = cp->cnext;
299 			nh = hash(cp->nval, nsz);
300 			cp->cnext = np[nh];
301 			np[nh] = cp;
302 		}
303 	}
304 	free(tp->tab);
305 	tp->tab = np;
306 	tp->size = nsz;
307 }
308 
309 Cell *lookup(s, tp)	/* look for s in tp */
310 	register uchar *s;
311 	Array *tp;
312 {
313 	register Cell *p, *prev = NULL;
314 	int h;
315 
316 	h = hash(s, tp->size);
317 	for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
318 		if (strcmp((char *) s, (char *) p->nval) == 0)
319 			return(p);	/* found it */
320 	return(NULL);			/* not found */
321 }
322 
323 Awkfloat setfval(vp, f)
324 	register Cell *vp;
325 	Awkfloat f;
326 {
327 	if ((vp->tval & (NUM | STR)) == 0)
328 		funnyvar(vp, "assign to");
329 	if (vp->tval & FLD) {
330 		donerec = 0;	/* mark $0 invalid */
331 		if (vp-fldtab > *NF)
332 			newfld(vp-fldtab);
333 		dprintf( ("setting field %d to %g\n", vp-fldtab, f) );
334 	} else if (vp->tval & REC) {
335 		donefld = 0;	/* mark $1... invalid */
336 		donerec = 1;
337 	}
338 	vp->tval &= ~STR;	/* mark string invalid */
339 	vp->tval |= NUM;	/* mark number ok */
340 	dprintf( ("setfval %p: %s = %g, t=%o\n", vp,
341 		vp->nval ? vp->nval : (unsigned char *)"NULL",
342 		f, vp->tval) );
343 	return vp->fval = f;
344 }
345 
346 funnyvar(vp, rw)
347 	Cell *vp;
348 	char *rw;
349 {
350 	if (vp->tval & ARR)
351 		ERROR "can't %s %s; it's an array name.", rw, vp->nval FATAL;
352 	if (vp->tval & FCN)
353 		ERROR "can't %s %s; it's a function.", rw, vp->nval FATAL;
354 	ERROR "funny variable %o: n=%s s=\"%s\" f=%g t=%o",
355 		vp, vp->nval, vp->sval, vp->fval, vp->tval);
356 }
357 
358 uchar *setsval(vp, s)
359 register Cell *vp;
360 uchar *s;
361 {
362 	if ((vp->tval & (NUM | STR)) == 0)
363 		funnyvar(vp, "assign to");
364 	if (vp->tval & FLD) {
365 		donerec = 0;	/* mark $0 invalid */
366 		if (vp-fldtab > *NF)
367 			newfld(vp-fldtab);
368 		dprintf( ("setting field %d to %s\n", vp-fldtab, s) );
369 	} else if (vp->tval & REC) {
370 		donefld = 0;	/* mark $1... invalid */
371 		donerec = 1;
372 	}
373 	vp->tval &= ~NUM;
374 	vp->tval |= STR;
375 	if (freeable(vp))
376 		xfree(vp->sval);
377 	vp->tval &= ~DONTFREE;
378 	dprintf( ("setsval %o: %s = \"%s\", t=%o\n", vp, vp->nval, s, vp->tval) );
379 	return(vp->sval = tostring(s));
380 }
381 
382 Awkfloat r_getfval(vp)
383 register Cell *vp;
384 {
385 	/* if (vp->tval & ARR)
386 		ERROR "illegal reference to array %s", vp->nval FATAL;
387 		return 0.0; */
388 	if ((vp->tval & (NUM | STR)) == 0)
389 		funnyvar(vp, "read value of");
390 	if ((vp->tval & FLD) && donefld == 0)
391 		fldbld();
392 	else if ((vp->tval & REC) && donerec == 0)
393 		recbld();
394 	if (!isnum(vp)) {	/* not a number */
395 		vp->fval = atof(vp->sval);	/* best guess */
396 		if (isnumber(vp->sval) && !(vp->tval&CON))
397 			vp->tval |= NUM;	/* make NUM only sparingly */
398 	}
399 	dprintf( ("getfval %o: %s = %g, t=%o\n", vp, vp->nval, vp->fval, vp->tval) );
400 	return(vp->fval);
401 }
402 
403 uchar *r_getsval(vp)
404 register Cell *vp;
405 {
406 	uchar s[100];
407 
408 	/* if (vp->tval & ARR)
409 		ERROR "illegal reference to array %s", vp->nval FATAL;
410 		return ""; */
411 	if ((vp->tval & (NUM | STR)) == 0)
412 		funnyvar(vp, "read value of");
413 	if ((vp->tval & FLD) && donefld == 0)
414 		fldbld();
415 	else if ((vp->tval & REC) && donerec == 0)
416 		recbld();
417 	if ((vp->tval & STR) == 0) {
418 		if (!(vp->tval&DONTFREE))
419 			xfree(vp->sval);
420 		if ((long long)vp->fval == vp->fval)
421 			sprintf((char *)s, "%.20g", vp->fval);
422 		else
423 			sprintf((char *)s, (char *)*OFMT, vp->fval);
424 		vp->sval = tostring(s);
425 		vp->tval &= ~DONTFREE;
426 		vp->tval |= STR;
427 	}
428 	dprintf( ("getsval %p: %s = \"%s\", t=%o\n", vp, vp->nval, vp->sval, vp->tval) );
429 	return(vp->sval);
430 }
431 
432 uchar *tostring(s)
433 register uchar *s;
434 {
435 	register uchar *p;
436 
437 	p = (uchar *) malloc(strlen((char *) s)+1);
438 	if (p == NULL)
439 		ERROR "out of space in tostring on %s", s FATAL;
440 	strcpy((char *) p, (char *) s);
441 	return(p);
442 }
443 
444 uchar *qstring(s, delim)	/* collect string up to delim */
445 	uchar *s;
446 	int delim;
447 {
448 	uchar *q;
449 	int c, n;
450 
451 	for (q = cbuf; (c = *s) != delim; s++) {
452 		if (q >= cbuf + RECSIZE - 1)
453 			ERROR "string %.10s... too long", cbuf SYNTAX;
454 		else if (c == '\n')
455 			ERROR "newline in string %.10s...", cbuf SYNTAX;
456 		else if (c != '\\')
457 			*q++ = c;
458 		else	/* \something */
459 			switch (c = *++s) {
460 			case '\\':	*q++ = '\\'; break;
461 			case 'n':	*q++ = '\n'; break;
462 			case 't':	*q++ = '\t'; break;
463 			case 'b':	*q++ = '\b'; break;
464 			case 'f':	*q++ = '\f'; break;
465 			case 'r':	*q++ = '\r'; break;
466 			default:
467 				if (!isdigit(c)) {
468 					*q++ = c;
469 					break;
470 				}
471 				n = c - '0';
472 				if (isdigit(s[1])) {
473 					n = 8 * n + *++s - '0';
474 					if (isdigit(s[1]))
475 						n = 8 * n + *++s - '0';
476 				}
477 				*q++ = n;
478 				break;
479 			}
480 	}
481 	*q = '\0';
482 	return cbuf;
483 }
484