1 /*
2 * Copyright (C) Lucent Technologies 1997
3 * All Rights Reserved
4 *
5 * Permission to use, copy, modify, and distribute this software and
6 * its documentation for any purpose and without fee is hereby
7 * granted, provided that the above copyright notice appear in all
8 * copies and that both that the copyright notice and this
9 * permission notice and warranty disclaimer appear in supporting
10 * documentation, and that the name Lucent Technologies or any of
11 * its entities not be used in advertising or publicity pertaining
12 * to distribution of the software without specific, written prior
13 * permission.
14 *
15 * LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 * IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 * THIS SOFTWARE.
23 */
24
25 /*
26 * CDDL HEADER START
27 *
28 * The contents of this file are subject to the terms of the
29 * Common Development and Distribution License (the "License").
30 * You may not use this file except in compliance with the License.
31 *
32 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
33 * or http://www.opensolaris.org/os/licensing.
34 * See the License for the specific language governing permissions
35 * and limitations under the License.
36 *
37 * When distributing Covered Code, include this CDDL HEADER in each
38 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
39 * If applicable, add the following below this CDDL HEADER, with the
40 * fields enclosed by brackets "[]" replaced with your own identifying
41 * information: Portions Copyright [yyyy] [name of copyright owner]
42 *
43 * CDDL HEADER END
44 */
45
46 /*
47 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
48 * Use is subject to license terms.
49 */
50
51 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
52 /* All Rights Reserved */
53
54 #define DEBUG
55 #include <stdio.h>
56 #include <math.h>
57 #include <ctype.h>
58 #include <string.h>
59 #include <stdlib.h>
60 #include "awk.h"
61 #include "y.tab.h"
62
63 #define FULLTAB 2 /* rehash when table gets this x full */
64 #define GROWTAB 4 /* grow table by this factor */
65
66 Array *symtab; /* main symbol table */
67
68 char **FS; /* initial field sep */
69 char **RS; /* initial record sep */
70 char **OFS; /* output field sep */
71 char **ORS; /* output record sep */
72 char **OFMT; /* output format for numbers */
73 char **CONVFMT; /* format for conversions in getsval */
74 Awkfloat *NF; /* number of fields in current record */
75 Awkfloat *NR; /* number of current record */
76 Awkfloat *FNR; /* number of current record in current file */
77 char **FILENAME; /* current filename argument */
78 Awkfloat *ARGC; /* number of arguments from command line */
79 char **SUBSEP; /* subscript separator for a[i,j,k]; default \034 */
80 Awkfloat *RSTART; /* start of re matched with ~; origin 1 (!) */
81 Awkfloat *RLENGTH; /* length of same */
82
83 Cell *recloc; /* location of record */
84 Cell *fsloc; /* FS */
85 Cell *nrloc; /* NR */
86 Cell *nfloc; /* NF */
87 Cell *fnrloc; /* FNR */
88 Cell *ofsloc; /* OFS */
89 Cell *orsloc; /* ORS */
90 Cell *rsloc; /* RS */
91 Cell *rtloc; /* RT */
92 Array *ARGVtab; /* symbol table containing ARGV[...] */
93 Array *ENVtab; /* symbol table containing ENVIRON[...] */
94 Cell *rstartloc; /* RSTART */
95 Cell *rlengthloc; /* RLENGTH */
96 Cell *subseploc; /* SUBSEP */
97 Cell *symtabloc; /* SYMTAB */
98
99 Cell *nullloc; /* a guaranteed empty cell */
100 Node *nullnode; /* zero&null, converted into a node for comparisons */
101 Cell *literal0;
102
103 static void rehash(Array *);
104
105 static void
setfree(Cell * vp)106 setfree(Cell *vp)
107 {
108 if (&vp->sval == FS || &vp->sval == RS ||
109 &vp->sval == OFS || &vp->sval == ORS ||
110 &vp->sval == OFMT || &vp->sval == CONVFMT ||
111 &vp->sval == FILENAME || &vp->sval == SUBSEP)
112 vp->tval |= DONTFREE;
113 else
114 vp->tval &= ~DONTFREE;
115 }
116
117 void
syminit(void)118 syminit(void) /* initialize symbol table with builtin vars */
119 {
120 /* initialize $0 */
121 recloc = fieldadr(0);
122 recloc->nval = "$0";
123 recloc->sval = record;
124 recloc->tval = REC|STR|DONTFREE;
125
126 literal0 = setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab);
127 /* this is used for if(x)... tests: */
128 nullloc = setsymtab("$zero&null", "", 0.0,
129 NUM|STR|CON|DONTFREE, symtab);
130 nullnode = celltonode(nullloc, CCON);
131
132 fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab);
133 FS = &fsloc->sval;
134 rsloc = setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab);
135 RS = &rsloc->sval;
136 rtloc = setsymtab("RT", "", 0.0, STR|DONTFREE, symtab);
137 ofsloc = setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab);
138 OFS = &ofsloc->sval;
139 orsloc = setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab);
140 ORS = &orsloc->sval;
141 OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
142 CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0,
143 STR|DONTFREE, symtab)->sval;
144 FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval;
145 nfloc = setsymtab("NF", "", 0.0, NUM, symtab);
146 NF = &nfloc->fval;
147 nrloc = setsymtab("NR", "", 0.0, NUM, symtab);
148 NR = &nrloc->fval;
149 fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);
150 FNR = &fnrloc->fval;
151 subseploc = setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab);
152 SUBSEP = &subseploc->sval;
153 rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);
154 RSTART = &rstartloc->fval;
155 rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
156 RLENGTH = &rlengthloc->fval;
157 symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab);
158 symtabloc->sval = (char *)symtab;
159 }
160
161 void
arginit(int ac,char ** av)162 arginit(int ac, char **av) /* set up ARGV and ARGC */
163 {
164 Cell *cp;
165 int i;
166 char temp[50];
167
168 ARGC = &setsymtab("ARGC", "", (Awkfloat)ac, NUM, symtab)->fval;
169 cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
170 ARGVtab = makesymtab(NSYMTAB); /* could be (int) ARGC as well */
171 cp->sval = (char *)ARGVtab;
172 for (i = 0; i < ac; i++) {
173 (void) sprintf(temp, "%d", i);
174 if (is_number(*av)) {
175 (void) setsymtab(temp, *av, atof(*av),
176 STR|NUM, ARGVtab);
177 } else {
178 (void) setsymtab(temp, *av, 0.0, STR, ARGVtab);
179 }
180 av++;
181 }
182 }
183
184 void
envinit(char ** envp)185 envinit(char **envp) /* set up ENVIRON variable */
186 {
187 Cell *cp;
188 char *p;
189
190 cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab);
191 ENVtab = makesymtab(NSYMTAB);
192 cp->sval = (char *)ENVtab;
193 for (; *envp; envp++) {
194 if ((p = strchr(*envp, '=')) == NULL)
195 continue;
196 if (p == *envp) /* no left hand side name in env string */
197 continue;
198 *p++ = 0; /* split into two strings at = */
199 if (is_number(p)) {
200 (void) setsymtab(*envp, p, atof(p),
201 STR|NUM, ENVtab);
202 } else {
203 (void) setsymtab(*envp, p, 0.0, STR, ENVtab);
204 }
205 /* restore in case env is passed down to a shell */
206 p[-1] = '=';
207 }
208 }
209
210 Array *
makesymtab(int n)211 makesymtab(int n) /* make a new symbol table */
212 {
213 Array *ap;
214 Cell **tp;
215
216 ap = (Array *)malloc(sizeof (Array));
217 tp = (Cell **)calloc(n, sizeof (Cell *));
218 if (ap == NULL || tp == NULL)
219 FATAL("out of space in makesymtab");
220 ap->nelem = 0;
221 ap->size = n;
222 ap->tab = tp;
223 return (ap);
224 }
225
226 void
freesymtab(Cell * ap)227 freesymtab(Cell *ap) /* free a symbol table */
228 {
229 Cell *cp, *temp;
230 Array *tp;
231 int i;
232
233 if (!isarr(ap))
234 return;
235 /*LINTED align*/
236 tp = (Array *)ap->sval;
237 if (tp == NULL)
238 return;
239 for (i = 0; i < tp->size; i++) {
240 for (cp = tp->tab[i]; cp != NULL; cp = temp) {
241 xfree(cp->nval);
242 if (freeable(cp))
243 xfree(cp->sval);
244 temp = cp->cnext; /* avoids freeing then using */
245 free(cp);
246 tp->nelem--;
247 }
248 tp->tab[i] = 0;
249 }
250 if (tp->nelem != 0) {
251 WARNING("can't happen: inconsistent element count freeing %s",
252 ap->nval);
253 }
254 free(tp->tab);
255 free(tp);
256 }
257
258 void
freeelem(Cell * ap,const char * s)259 freeelem(Cell *ap, const char *s) /* free elem s from ap (i.e., ap["s"] */
260 {
261 Array *tp;
262 Cell *p, *prev = NULL;
263 int h;
264
265 /*LINTED align*/
266 tp = (Array *)ap->sval;
267 h = hash(s, tp->size);
268 for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
269 if (strcmp(s, p->nval) == 0) {
270 if (prev == NULL) /* 1st one */
271 tp->tab[h] = p->cnext;
272 else /* middle somewhere */
273 prev->cnext = p->cnext;
274 if (freeable(p))
275 xfree(p->sval);
276 free(p->nval);
277 free(p);
278 tp->nelem--;
279 return;
280 }
281 }
282
283 Cell *
setsymtab(const char * n,const char * s,Awkfloat f,unsigned int t,Array * tp)284 setsymtab(const char *n, const char *s, Awkfloat f, unsigned int t, Array *tp)
285 {
286 int h;
287 Cell *p;
288
289 if (n != NULL && (p = lookup(n, tp)) != NULL) {
290 dprintf(("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
291 (void *)p, NN(p->nval), NN(p->sval), p->fval, p->tval));
292 return (p);
293 }
294 p = (Cell *)malloc(sizeof (Cell));
295 if (p == NULL)
296 FATAL("out of space for symbol table at %s", n);
297 p->nval = tostring(n);
298 p->sval = s ? tostring(s) : tostring("");
299 p->fval = f;
300 p->tval = t;
301 p->csub = CUNK;
302 p->ctype = OCELL;
303 tp->nelem++;
304 if (tp->nelem > FULLTAB * tp->size)
305 rehash(tp);
306 h = hash(n, tp->size);
307 p->cnext = tp->tab[h];
308 tp->tab[h] = p;
309 dprintf(("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
310 (void *)p, p->nval, p->sval, p->fval, p->tval));
311 return (p);
312 }
313
314 int
hash(const char * s,int n)315 hash(const char *s, int n) /* form hash value for string s */
316 {
317 unsigned int hashval;
318
319 for (hashval = 0; *s != '\0'; s++)
320 hashval = (*s + 31 * hashval);
321 return (hashval % n);
322 }
323
324 static void
rehash(Array * tp)325 rehash(Array *tp) /* rehash items in small table into big one */
326 {
327 int i, nh, nsz;
328 Cell *cp, *op, **np;
329
330 nsz = GROWTAB * tp->size;
331 np = (Cell **)calloc(nsz, sizeof (Cell *));
332 if (np == NULL) /* can't do it, but can keep running. */
333 return; /* someone else will run out later. */
334 for (i = 0; i < tp->size; i++) {
335 for (cp = tp->tab[i]; cp != NULL; cp = op) {
336 op = cp->cnext;
337 nh = hash(cp->nval, nsz);
338 cp->cnext = np[nh];
339 np[nh] = cp;
340 }
341 }
342 free(tp->tab);
343 tp->tab = np;
344 tp->size = nsz;
345 }
346
347 Cell *
lookup(const char * s,Array * tp)348 lookup(const char *s, Array *tp) /* look for s in tp */
349 {
350 Cell *p;
351 int h;
352
353 h = hash(s, tp->size);
354 for (p = tp->tab[h]; p != NULL; p = p->cnext) {
355 if (strcmp(s, p->nval) == 0)
356 return (p); /* found it */
357 }
358 return (NULL); /* not found */
359 }
360
361 Awkfloat
setfval(Cell * vp,Awkfloat f)362 setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */
363 {
364 int fldno;
365
366 f += 0.0; /* normalise negative zero to positive zero */
367 if ((vp->tval & (NUM | STR)) == 0)
368 funnyvar(vp, "assign to");
369 if (isfld(vp)) {
370 donerec = 0; /* mark $0 invalid */
371 fldno = atoi(vp->nval);
372 if (fldno > *NF)
373 newfld(fldno);
374 dprintf(("setting field %d to %g\n", fldno, f));
375 } else if (&vp->fval == NF) {
376 donerec = 0; /* mark $0 invalid */
377 setlastfld((int)f);
378 dprintf(("setting NF to %g\n", f));
379 } else if (isrec(vp)) {
380 donefld = 0; /* mark $1... invalid */
381 donerec = 1;
382 savefs();
383 } else if (vp == ofsloc) {
384 if (donerec == 0)
385 recbld();
386 }
387 if (freeable(vp))
388 xfree(vp->sval); /* free any previous string */
389 vp->tval &= ~(STR|CONVC|CONVO); /* mark string invalid */
390 vp->fmt = NULL;
391 vp->tval |= NUM; /* mark number ok */
392 if (f == -0) /* who would have thought this possible? */
393 f = 0;
394 dprintf(("setfval %p: %s = %g, t=%o\n", (void *)vp,
395 NN(vp->nval), f, vp->tval));
396 return (vp->fval = f);
397 }
398
399 void
funnyvar(Cell * vp,const char * rw)400 funnyvar(Cell *vp, const char *rw)
401 {
402 if (isarr(vp))
403 FATAL("can't %s %s; it's an array name.", rw, vp->nval);
404 if (isfcn(vp))
405 FATAL("can't %s %s; it's a function.", rw, vp->nval);
406 WARNING("funny variable %p: n=%s s=\"%s\" f=%g t=%o",
407 vp, vp->nval, vp->sval, vp->fval, vp->tval);
408 }
409
410 char *
setsval(Cell * vp,const char * s)411 setsval(Cell *vp, const char *s) /* set string val of a Cell */
412 {
413 char *t;
414 int fldno;
415 Awkfloat f;
416
417 dprintf(("starting setsval %p: %s = \"%s\", t=%o, r,f=%d,%d\n",
418 (void *)vp, NN(vp->nval), s, vp->tval, donerec, donefld));
419 if ((vp->tval & (NUM | STR)) == 0)
420 funnyvar(vp, "assign to");
421 if (isfld(vp)) {
422 donerec = 0; /* mark $0 invalid */
423 fldno = atoi(vp->nval);
424 if (fldno > *NF)
425 newfld(fldno);
426 dprintf(("setting field %d to %s (%p)\n", fldno, s, (void *)s));
427 } else if (isrec(vp)) {
428 donefld = 0; /* mark $1... invalid */
429 donerec = 1;
430 savefs();
431 } else if (vp == ofsloc) {
432 if (donerec == 0)
433 recbld();
434 }
435 t = s ? tostring(s) : tostring(""); /* in case it's self-assign */
436 if (freeable(vp))
437 xfree(vp->sval);
438 vp->tval &= ~(NUM|CONVC|CONVO);
439 vp->tval |= STR;
440 vp->fmt = NULL;
441 setfree(vp);
442 dprintf(("setsval %p: %s = \"%s (%p) \", t=%o r,f=%d,%d\n",
443 (void *)vp, NN(vp->nval), t, (void *)t,
444 vp->tval, donerec, donefld));
445 vp->sval = t;
446 if (&vp->fval == NF) {
447 donerec = 0; /* mark $0 invalid */
448 f = getfval(vp);
449 setlastfld((int)f);
450 dprintf(("setting NF to %g\n", f));
451 }
452
453 return (vp->sval);
454 }
455
456 Awkfloat
getfval(Cell * vp)457 getfval(Cell *vp) /* get float val of a Cell */
458 {
459 if ((vp->tval & (NUM | STR)) == 0)
460 funnyvar(vp, "read value of");
461 if (isfld(vp) && donefld == 0)
462 fldbld();
463 else if (isrec(vp) && donerec == 0)
464 recbld();
465 if (!isnum(vp)) { /* not a number */
466 vp->fval = atof(vp->sval); /* best guess */
467 if (is_number(vp->sval) && !(vp->tval&CON))
468 vp->tval |= NUM; /* make NUM only sparingly */
469 }
470 dprintf(("getfval %p: %s = %g, t=%o\n",
471 (void *)vp, NN(vp->nval), vp->fval, vp->tval));
472 return (vp->fval);
473 }
474
475 static char *
get_str_val(Cell * vp,char ** fmt)476 get_str_val(Cell *vp, char **fmt) /* get string val of a Cell */
477 {
478 char s[256];
479 double dtemp;
480
481 if ((vp->tval & (NUM | STR)) == 0)
482 funnyvar(vp, "read value of");
483 if (isfld(vp) && donefld == 0)
484 fldbld();
485 else if (isrec(vp) && donerec == 0)
486 recbld();
487
488 /*
489 * ADR: This is complicated and more fragile than is desirable.
490 * Retrieving a string value for a number associates the string
491 * value with the scalar. Previously, the string value was
492 * sticky, meaning if converted via OFMT that became the value
493 * (even though POSIX wants it to be via CONVFMT). Or if CONVFMT
494 * changed after a string value was retrieved, the original value
495 * was maintained and used. Also not per POSIX.
496 *
497 * We work around this design by adding two additional flags,
498 * CONVC and CONVO, indicating how the string value was
499 * obtained (via CONVFMT or OFMT) and _also_ maintaining a copy
500 * of the pointer to the xFMT format string used for the
501 * conversion. This pointer is only read, **never** dereferenced.
502 * The next time we do a conversion, if it's coming from the same
503 * xFMT as last time, and the pointer value is different, we
504 * know that the xFMT format string changed, and we need to
505 * redo the conversion. If it's the same, we don't have to.
506 *
507 * There are also several cases where we don't do a conversion,
508 * such as for a field (see the checks below).
509 */
510
511 /* Don't duplicate the code for actually updating the value */
512 #define update_str_val(vp) \
513 { \
514 if (freeable(vp)) \
515 xfree(vp->sval); \
516 if (modf(vp->fval, &dtemp) == 0) /* it's integral */ \
517 (void) snprintf(s, sizeof (s), "%.30g", vp->fval); \
518 else \
519 (void) snprintf(s, sizeof (s), *fmt, vp->fval); \
520 vp->sval = tostring(s); \
521 vp->tval &= ~DONTFREE; \
522 vp->tval |= STR; \
523 }
524
525 if (isstr(vp) == 0) {
526 /*LINTED*/
527 update_str_val(vp);
528 if (fmt == OFMT) {
529 vp->tval &= ~CONVC;
530 vp->tval |= CONVO;
531 } else {
532 /* CONVFMT */
533 vp->tval &= ~CONVO;
534 vp->tval |= CONVC;
535 }
536 vp->fmt = *fmt;
537 } else if ((vp->tval & DONTFREE) != 0 || !isnum(vp) || isfld(vp)) {
538 goto done;
539 } else if (isstr(vp)) {
540 if (fmt == OFMT) {
541 if ((vp->tval & CONVC) != 0 ||
542 ((vp->tval & CONVO) != 0 && vp->fmt != *fmt)) {
543 /*LINTED*/
544 update_str_val(vp);
545 vp->tval &= ~CONVC;
546 vp->tval |= CONVO;
547 vp->fmt = *fmt;
548 }
549 } else {
550 /* CONVFMT */
551 if ((vp->tval & CONVO) != 0 ||
552 ((vp->tval & CONVC) != 0 && vp->fmt != *fmt)) {
553 /*LINTED*/
554 update_str_val(vp);
555 vp->tval &= ~CONVO;
556 vp->tval |= CONVC;
557 vp->fmt = *fmt;
558 }
559 }
560 }
561 done:
562 dprintf(("getsval %p: %s = \"%s (%p)\", t=%o\n",
563 (void *)vp, NN(vp->nval), vp->sval, (void *)vp->sval, vp->tval));
564 return (vp->sval);
565 }
566
567 char *
getsval(Cell * vp)568 getsval(Cell *vp) /* get string val of a Cell */
569 {
570 return (get_str_val(vp, CONVFMT));
571 }
572
573 char *
getpssval(Cell * vp)574 getpssval(Cell *vp) /* get string val of a Cell for print */
575 {
576 return (get_str_val(vp, OFMT));
577 }
578
579
580 char *
tostring(const char * s)581 tostring(const char *s) /* make a copy of string s */
582 {
583 char *p = strdup(s);
584 if (p == NULL)
585 FATAL("out of space in tostring on %s", s);
586 return (p);
587 }
588
589 char *
qstring(const char * is,int delim)590 qstring(const char *is, int delim) /* collect string up to next delim */
591 {
592 const char *os = is;
593 int c, n;
594 uschar *s = (uschar *)is;
595 uschar *buf, *bp;
596
597 if ((buf = (uschar *)malloc(strlen(is)+3)) == NULL)
598 FATAL("out of space in qstring(%s)", s);
599 for (bp = buf; (c = *s) != delim; s++) {
600 if (c == '\n') {
601 SYNTAX("newline in string %.20s...", os);
602 } else if (c != '\\')
603 *bp++ = c;
604 else { /* \something */
605 c = *++s;
606 if (c == 0) { /* \ at end */
607 *bp++ = '\\';
608 break; /* for loop */
609 }
610 switch (c) {
611 case '\\': *bp++ = '\\'; break;
612 case 'n': *bp++ = '\n'; break;
613 case 't': *bp++ = '\t'; break;
614 case 'b': *bp++ = '\b'; break;
615 case 'f': *bp++ = '\f'; break;
616 case 'r': *bp++ = '\r'; break;
617 default:
618 if (!isdigit(c)) {
619 *bp++ = c;
620 break;
621 }
622 n = c - '0';
623 if (isdigit(s[1])) {
624 n = 8 * n + *++s - '0';
625 if (isdigit(s[1]))
626 n = 8 * n + *++s - '0';
627 }
628 *bp++ = n;
629 break;
630 }
631 }
632 }
633 *bp++ = 0;
634 return ((char *)buf);
635 }
636