1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
29
30 #pragma ident "%Z%%M% %I% %E% SMI"
31
32 #define DEBUG
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <ctype.h>
36 #include <string.h>
37 #include "awk.h"
38 #include "y.tab.h"
39
40 #define FULLTAB 2 /* rehash when table gets this x full */
41 #define GROWTAB 4 /* grow table by this factor */
42
43 Array *symtab; /* main symbol table */
44
45 uchar **FS; /* initial field sep */
46 uchar **RS; /* initial record sep */
47 uchar **OFS; /* output field sep */
48 uchar **ORS; /* output record sep */
49 uchar **OFMT; /* output format for numbers */
50 Awkfloat *NF; /* number of fields in current record */
51 Awkfloat *NR; /* number of current record */
52 Awkfloat *FNR; /* number of current record in current file */
53 uchar **FILENAME; /* current filename argument */
54 Awkfloat *ARGC; /* number of arguments from command line */
55 uchar **SUBSEP; /* subscript separator for a[i,j,k]; default \034 */
56 Awkfloat *RSTART; /* start of re matched with ~; origin 1 (!) */
57 Awkfloat *RLENGTH; /* length of same */
58
59 Cell *recloc; /* location of record */
60 Cell *nrloc; /* NR */
61 Cell *nfloc; /* NF */
62 Cell *fnrloc; /* FNR */
63 Array *ARGVtab; /* symbol table containing ARGV[...] */
64 Array *ENVtab; /* symbol table containing ENVIRON[...] */
65 Cell *rstartloc; /* RSTART */
66 Cell *rlengthloc; /* RLENGTH */
67 Cell *symtabloc; /* SYMTAB */
68
69 Cell *nullloc;
70 Node *nullnode; /* zero&null, converted into a node for comparisons */
71
72 static void rehash(Array *);
73
74 void
syminit(void)75 syminit(void)
76 {
77 init_buf(&record, &record_size, LINE_INCR);
78
79 /* initialize $0 */
80 recloc = getfld(0);
81 recloc->nval = (uchar *)"$0";
82 recloc->sval = record;
83 recloc->tval = REC|STR|DONTFREE;
84
85 symtab = makesymtab(NSYMTAB);
86 (void) setsymtab((uchar *)"0", (uchar *)"0", 0.0,
87 NUM|STR|CON|DONTFREE, symtab);
88 /* this is used for if(x)... tests: */
89 nullloc = setsymtab((uchar *)"$zero&null", (uchar *)"", 0.0,
90 NUM|STR|CON|DONTFREE, symtab);
91 nullnode = valtonode(nullloc, CCON);
92 FS = &setsymtab((uchar *)"FS", (uchar *)" ", 0.0,
93 STR|DONTFREE, symtab)->sval;
94 RS = &setsymtab((uchar *)"RS", (uchar *)"\n", 0.0,
95 STR|DONTFREE, symtab)->sval;
96 OFS = &setsymtab((uchar *)"OFS", (uchar *)" ", 0.0,
97 STR|DONTFREE, symtab)->sval;
98 ORS = &setsymtab((uchar *)"ORS", (uchar *)"\n", 0.0,
99 STR|DONTFREE, symtab)->sval;
100 OFMT = &setsymtab((uchar *)"OFMT", (uchar *)"%.6g", 0.0,
101 STR|DONTFREE, symtab)->sval;
102 FILENAME = &setsymtab((uchar *)"FILENAME", (uchar *)"-", 0.0,
103 STR|DONTFREE, symtab)->sval;
104 nfloc = setsymtab((uchar *)"NF", (uchar *)"", 0.0, NUM, symtab);
105 NF = &nfloc->fval;
106 nrloc = setsymtab((uchar *)"NR", (uchar *)"", 0.0, NUM, symtab);
107 NR = &nrloc->fval;
108 fnrloc = setsymtab((uchar *)"FNR", (uchar *)"", 0.0, NUM, symtab);
109 FNR = &fnrloc->fval;
110 SUBSEP = &setsymtab((uchar *)"SUBSEP", (uchar *)"\034", 0.0,
111 STR|DONTFREE, symtab)->sval;
112 rstartloc = setsymtab((uchar *)"RSTART", (uchar *)"", 0.0,
113 NUM, symtab);
114 RSTART = &rstartloc->fval;
115 rlengthloc = setsymtab((uchar *)"RLENGTH", (uchar *)"", 0.0,
116 NUM, symtab);
117 RLENGTH = &rlengthloc->fval;
118 symtabloc = setsymtab((uchar *)"SYMTAB", (uchar *)"", 0.0, ARR, symtab);
119 symtabloc->sval = (uchar *)symtab;
120 }
121
122 void
arginit(int ac,uchar * av[])123 arginit(int ac, uchar *av[])
124 {
125 Cell *cp;
126 int i;
127 uchar temp[11];
128
129 /* first make FILENAME first real argument */
130 for (i = 1; i < ac; i++) {
131 if (!isclvar(av[i])) {
132 (void) setsval(lookup((uchar *)"FILENAME", symtab),
133 av[i]);
134 break;
135 }
136 }
137 ARGC = &setsymtab((uchar *)"ARGC", (uchar *)"", (Awkfloat)ac,
138 NUM, symtab)->fval;
139 cp = setsymtab((uchar *)"ARGV", (uchar *)"", 0.0, ARR, symtab);
140 ARGVtab = makesymtab(NSYMTAB); /* could be (int) ARGC as well */
141 cp->sval = (uchar *) ARGVtab;
142 for (i = 0; i < ac; i++) {
143 (void) sprintf((char *)temp, "%d", i);
144 if (is_number(*av)) {
145 (void) setsymtab(temp, *av, atof((const char *)*av),
146 STR|NUM, ARGVtab);
147 } else {
148 (void) setsymtab(temp, *av, 0.0, STR, ARGVtab);
149 }
150 av++;
151 }
152 }
153
154 void
envinit(uchar * envp[])155 envinit(uchar *envp[])
156 {
157 Cell *cp;
158 uchar *p;
159
160 cp = setsymtab((uchar *)"ENVIRON", (uchar *)"", 0.0, ARR, symtab);
161 ENVtab = makesymtab(NSYMTAB);
162 cp->sval = (uchar *) ENVtab;
163 for (; *envp; envp++) {
164 if ((p = (uchar *)strchr((char *)*envp, '=')) == NULL)
165 continue;
166 *p++ = 0; /* split into two strings at = */
167 if (is_number(p)) {
168 (void) setsymtab(*envp, p, atof((const char *)p),
169 STR|NUM, ENVtab);
170 } else {
171 (void) setsymtab(*envp, p, 0.0, STR, ENVtab);
172 }
173 /* restore in case env is passed down to a shell */
174 p[-1] = '=';
175 }
176 }
177
178 Array *
makesymtab(int n)179 makesymtab(int n)
180 {
181 Array *ap;
182 Cell **tp;
183
184 ap = (Array *)malloc(sizeof (Array));
185 tp = (Cell **)calloc(n, sizeof (Cell *));
186 if (ap == NULL || tp == NULL)
187 ERROR "out of space in makesymtab" FATAL;
188 ap->nelem = 0;
189 ap->size = n;
190 ap->tab = tp;
191 return (ap);
192 }
193
194 void
freesymtab(Cell * ap)195 freesymtab(Cell *ap) /* free symbol table */
196 {
197 Cell *cp, *next;
198 Array *tp;
199 int i;
200
201 if (!isarr(ap))
202 return;
203 /*LINTED align*/
204 tp = (Array *)ap->sval;
205 if (tp == NULL)
206 return;
207 for (i = 0; i < tp->size; i++) {
208 for (cp = tp->tab[i]; cp != NULL; cp = next) {
209 next = cp->cnext;
210 xfree(cp->nval);
211 if (freeable(cp))
212 xfree(cp->sval);
213 free(cp);
214 }
215 }
216 free(tp->tab);
217 free(tp);
218 }
219
220 void
freeelem(Cell * ap,uchar * s)221 freeelem(Cell *ap, uchar *s) /* free elem s from ap (i.e., ap["s"] */
222 {
223 Array *tp;
224 Cell *p, *prev = NULL;
225 int h;
226
227 /*LINTED align*/
228 tp = (Array *)ap->sval;
229 h = hash(s, tp->size);
230 for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
231 if (strcmp((char *)s, (char *)p->nval) == 0) {
232 if (prev == NULL) /* 1st one */
233 tp->tab[h] = p->cnext;
234 else /* middle somewhere */
235 prev->cnext = p->cnext;
236 if (freeable(p))
237 xfree(p->sval);
238 free(p->nval);
239 free(p);
240 tp->nelem--;
241 return;
242 }
243 }
244
245 Cell *
setsymtab(uchar * n,uchar * s,Awkfloat f,unsigned int t,Array * tp)246 setsymtab(uchar *n, uchar *s, Awkfloat f, unsigned int t, Array *tp)
247 {
248 register int h;
249 register Cell *p;
250
251 if (n != NULL && (p = lookup(n, tp)) != NULL) {
252 dprintf(("setsymtab found %p: n=%s", (void *)p, p->nval));
253 dprintf((" s=\"%s\" f=%g t=%p\n",
254 p->sval, p->fval, (void *)p->tval));
255 return (p);
256 }
257 p = (Cell *)malloc(sizeof (Cell));
258 if (p == NULL)
259 ERROR "symbol table overflow at %s", n FATAL;
260 p->nval = tostring(n);
261 p->sval = s ? tostring(s) : tostring((uchar *)"");
262 p->fval = f;
263 p->tval = t;
264 p->csub = 0;
265
266 tp->nelem++;
267 if (tp->nelem > FULLTAB * tp->size)
268 rehash(tp);
269 h = hash(n, tp->size);
270 p->cnext = tp->tab[h];
271 tp->tab[h] = p;
272 dprintf(("setsymtab set %p: n=%s", (void *)p, p->nval));
273 dprintf((" s=\"%s\" f=%g t=%p\n", p->sval, p->fval, (void *)p->tval));
274 return (p);
275 }
276
277 int
hash(uchar * s,int n)278 hash(uchar *s, int n) /* form hash value for string s */
279 {
280 register unsigned hashval;
281
282 for (hashval = 0; *s != '\0'; s++)
283 hashval = (*s + 31 * hashval);
284 return (hashval % n);
285 }
286
287 static void
rehash(Array * tp)288 rehash(Array *tp) /* rehash items in small table into big one */
289 {
290 int i, nh, nsz;
291 Cell *cp, *op, **np;
292
293 nsz = GROWTAB * tp->size;
294 np = (Cell **)calloc(nsz, sizeof (Cell *));
295 if (np == NULL)
296 ERROR "out of space in rehash" FATAL;
297 for (i = 0; i < tp->size; i++) {
298 for (cp = tp->tab[i]; cp; cp = op) {
299 op = cp->cnext;
300 nh = hash(cp->nval, nsz);
301 cp->cnext = np[nh];
302 np[nh] = cp;
303 }
304 }
305 free(tp->tab);
306 tp->tab = np;
307 tp->size = nsz;
308 }
309
310 Cell *
lookup(uchar * s,Array * tp)311 lookup(uchar *s, Array *tp) /* look for s in tp */
312 {
313 register Cell *p;
314 int h;
315
316 h = hash(s, tp->size);
317 for (p = tp->tab[h]; p != NULL; p = p->cnext) {
318 if (strcmp((char *)s, (char *)p->nval) == 0)
319 return (p); /* found it */
320 }
321 return (NULL); /* not found */
322 }
323
324 Awkfloat
setfval(Cell * vp,Awkfloat f)325 setfval(Cell *vp, Awkfloat f)
326 {
327 int i;
328
329 if ((vp->tval & (NUM | STR)) == 0)
330 funnyvar(vp, "assign to");
331 if (vp->tval & FLD) {
332 donerec = 0; /* mark $0 invalid */
333 i = fldidx(vp);
334 if (i > *NF)
335 newfld(i);
336 dprintf(("setting field %d to %g\n", i, f));
337 } else if (vp->tval & REC) {
338 donefld = 0; /* mark $1... invalid */
339 donerec = 1;
340 }
341 vp->tval &= ~STR; /* mark string invalid */
342 vp->tval |= NUM; /* mark number ok */
343 dprintf(("setfval %p: %s = %g, t=%p\n", (void *)vp,
344 vp->nval ? vp->nval : (unsigned char *)"NULL",
345 f, (void *)vp->tval));
346 return (vp->fval = f);
347 }
348
349 void
funnyvar(Cell * vp,char * rw)350 funnyvar(Cell *vp, char *rw)
351 {
352 if (vp->tval & ARR)
353 ERROR "can't %s %s; it's an array name.", rw, vp->nval FATAL;
354 if (vp->tval & FCN)
355 ERROR "can't %s %s; it's a function.", rw, vp->nval FATAL;
356 ERROR "funny variable %o: n=%s s=\"%s\" f=%g t=%o",
357 vp, vp->nval, vp->sval, vp->fval, vp->tval CONT;
358 }
359
360 uchar *
setsval(Cell * vp,uchar * s)361 setsval(Cell *vp, uchar *s)
362 {
363 int i;
364
365 if ((vp->tval & (NUM | STR)) == 0)
366 funnyvar(vp, "assign to");
367 if (vp->tval & FLD) {
368 donerec = 0; /* mark $0 invalid */
369 i = fldidx(vp);
370 if (i > *NF)
371 newfld(i);
372 dprintf(("setting field %d to %s\n", i, s));
373 } else if (vp->tval & REC) {
374 donefld = 0; /* mark $1... invalid */
375 donerec = 1;
376 }
377 vp->tval &= ~NUM;
378 vp->tval |= STR;
379 if (freeable(vp))
380 xfree(vp->sval);
381 vp->tval &= ~DONTFREE;
382 dprintf(("setsval %p: %s = \"%s\", t=%p\n",
383 (void *)vp,
384 vp->nval ? (char *)vp->nval : "",
385 s,
386 (void *)(vp->tval ? (char *)vp->tval : "")));
387 return (vp->sval = tostring(s));
388 }
389
390 Awkfloat
r_getfval(Cell * vp)391 r_getfval(Cell *vp)
392 {
393 if ((vp->tval & (NUM | STR)) == 0)
394 funnyvar(vp, "read value of");
395 if ((vp->tval & FLD) && donefld == 0)
396 fldbld();
397 else if ((vp->tval & REC) && donerec == 0)
398 recbld();
399 if (!isnum(vp)) { /* not a number */
400 vp->fval = atof((const char *)vp->sval); /* best guess */
401 if (is_number(vp->sval) && !(vp->tval&CON))
402 vp->tval |= NUM; /* make NUM only sparingly */
403 }
404 dprintf(("getfval %p: %s = %g, t=%p\n",
405 (void *)vp, vp->nval, vp->fval, (void *)vp->tval));
406 return (vp->fval);
407 }
408
409 uchar *
r_getsval(Cell * vp)410 r_getsval(Cell *vp)
411 {
412 uchar s[256];
413
414 if ((vp->tval & (NUM | STR)) == 0)
415 funnyvar(vp, "read value of");
416 if ((vp->tval & FLD) && donefld == 0)
417 fldbld();
418 else if ((vp->tval & REC) && donerec == 0)
419 recbld();
420 if ((vp->tval & STR) == 0) {
421 if (!(vp->tval&DONTFREE))
422 xfree(vp->sval);
423 if ((long long)vp->fval == vp->fval) {
424 (void) snprintf((char *)s, sizeof (s),
425 "%.20g", vp->fval);
426 } else {
427 /*LINTED*/
428 (void) snprintf((char *)s, sizeof (s),
429 (char *)*OFMT, vp->fval);
430 }
431 vp->sval = tostring(s);
432 vp->tval &= ~DONTFREE;
433 vp->tval |= STR;
434 }
435 dprintf(("getsval %p: %s = \"%s\", t=%p\n",
436 (void *)vp,
437 vp->nval ? (char *)vp->nval : "",
438 vp->sval ? (char *)vp->sval : "",
439 (void *)vp->tval));
440 return (vp->sval);
441 }
442
443 uchar *
tostring(uchar * s)444 tostring(uchar *s)
445 {
446 register uchar *p;
447
448 p = (uchar *)malloc(strlen((char *)s)+1);
449 if (p == NULL)
450 ERROR "out of space in tostring on %s", s FATAL;
451 (void) strcpy((char *)p, (char *)s);
452 return (p);
453 }
454
455 uchar *
qstring(uchar * s,int delim)456 qstring(uchar *s, int delim) /* collect string up to delim */
457 {
458 uchar *cbuf, *ret;
459 int c, n;
460 size_t cbufsz, cnt;
461
462 init_buf(&cbuf, &cbufsz, LINE_INCR);
463
464 for (cnt = 0; (c = *s) != delim; s++) {
465 if (c == '\n') {
466 ERROR "newline in string %.10s...", cbuf SYNTAX;
467 } else if (c != '\\') {
468 expand_buf(&cbuf, &cbufsz, cnt);
469 cbuf[cnt++] = c;
470 } else { /* \something */
471 expand_buf(&cbuf, &cbufsz, cnt);
472 switch (c = *++s) {
473 case '\\': cbuf[cnt++] = '\\'; break;
474 case 'n': cbuf[cnt++] = '\n'; break;
475 case 't': cbuf[cnt++] = '\t'; break;
476 case 'b': cbuf[cnt++] = '\b'; break;
477 case 'f': cbuf[cnt++] = '\f'; break;
478 case 'r': cbuf[cnt++] = '\r'; break;
479 default:
480 if (!isdigit(c)) {
481 cbuf[cnt++] = c;
482 break;
483 }
484 n = c - '0';
485 if (isdigit(s[1])) {
486 n = 8 * n + *++s - '0';
487 if (isdigit(s[1]))
488 n = 8 * n + *++s - '0';
489 }
490 cbuf[cnt++] = n;
491 break;
492 }
493 }
494 }
495 cbuf[cnt] = '\0';
496 ret = tostring(cbuf);
497 free(cbuf);
498 return (ret);
499 }
500