1 /**************************************************************** 2 Copyright (C) Lucent Technologies 1997 3 All Rights Reserved 4 5 Permission to use, copy, modify, and distribute this software and 6 its documentation for any purpose and without fee is hereby 7 granted, provided that the above copyright notice appear in all 8 copies and that both that the copyright notice and this 9 permission notice and warranty disclaimer appear in supporting 10 documentation, and that the name Lucent Technologies or any of 11 its entities not be used in advertising or publicity pertaining 12 to distribution of the software without specific, written prior 13 permission. 14 15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 22 THIS SOFTWARE. 23 ****************************************************************/ 24 25 #define DEBUG 26 #include <stdio.h> 27 #include <math.h> 28 #include <ctype.h> 29 #include <string.h> 30 #include <stdlib.h> 31 #include "awk.h" 32 33 #define FULLTAB 2 /* rehash when table gets this x full */ 34 #define GROWTAB 4 /* grow table by this factor */ 35 36 Array *symtab; /* main symbol table */ 37 38 char **FS; /* initial field sep */ 39 char **RS; /* initial record sep */ 40 char **OFS; /* output field sep */ 41 char **ORS; /* output record sep */ 42 char **OFMT; /* output format for numbers */ 43 char **CONVFMT; /* format for conversions in getsval */ 44 Awkfloat *NF; /* number of fields in current record */ 45 Awkfloat *NR; /* number of current record */ 46 Awkfloat *FNR; /* number of current record in current file */ 47 char **FILENAME; /* current filename argument */ 48 Awkfloat *ARGC; /* number of arguments from command line */ 49 char **SUBSEP; /* subscript separator for a[i,j,k]; default \034 */ 50 Awkfloat *RSTART; /* start of re matched with ~; origin 1 (!) */ 51 Awkfloat *RLENGTH; /* length of same */ 52 53 Cell *fsloc; /* FS */ 54 Cell *nrloc; /* NR */ 55 Cell *nfloc; /* NF */ 56 Cell *fnrloc; /* FNR */ 57 Cell *ofsloc; /* OFS */ 58 Cell *orsloc; /* ORS */ 59 Cell *rsloc; /* RS */ 60 Cell *ARGVcell; /* cell with symbol table containing ARGV[...] */ 61 Cell *rstartloc; /* RSTART */ 62 Cell *rlengthloc; /* RLENGTH */ 63 Cell *subseploc; /* SUBSEP */ 64 Cell *symtabloc; /* SYMTAB */ 65 66 Cell *nullloc; /* a guaranteed empty cell */ 67 Node *nullnode; /* zero&null, converted into a node for comparisons */ 68 Cell *literal0; 69 70 extern Cell **fldtab; 71 72 void syminit(void) /* initialize symbol table with builtin vars */ 73 { 74 literal0 = setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab); 75 /* this is used for if(x)... tests: */ 76 nullloc = setsymtab("$zero&null", "", 0.0, NUM|STR|CON|DONTFREE, symtab); 77 nullnode = celltonode(nullloc, CCON); 78 79 fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab); 80 FS = &fsloc->sval; 81 rsloc = setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab); 82 RS = &rsloc->sval; 83 ofsloc = setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab); 84 OFS = &ofsloc->sval; 85 orsloc = setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab); 86 ORS = &orsloc->sval; 87 OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval; 88 CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval; 89 FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval; 90 nfloc = setsymtab("NF", "", 0.0, NUM, symtab); 91 NF = &nfloc->fval; 92 nrloc = setsymtab("NR", "", 0.0, NUM, symtab); 93 NR = &nrloc->fval; 94 fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab); 95 FNR = &fnrloc->fval; 96 subseploc = setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab); 97 SUBSEP = &subseploc->sval; 98 rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab); 99 RSTART = &rstartloc->fval; 100 rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab); 101 RLENGTH = &rlengthloc->fval; 102 symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab); 103 free(symtabloc->sval); 104 symtabloc->sval = (char *) symtab; 105 } 106 107 void arginit(int ac, char **av) /* set up ARGV and ARGC */ 108 { 109 Array *ap; 110 Cell *cp; 111 int i; 112 char temp[50]; 113 114 ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval; 115 cp = setsymtab("ARGV", "", 0.0, ARR, symtab); 116 ap = makesymtab(NSYMTAB); /* could be (int) ARGC as well */ 117 free(cp->sval); 118 cp->sval = (char *) ap; 119 for (i = 0; i < ac; i++) { 120 double result; 121 122 sprintf(temp, "%d", i); 123 if (is_number(*av, & result)) 124 setsymtab(temp, *av, result, STR|NUM, ap); 125 else 126 setsymtab(temp, *av, 0.0, STR, ap); 127 av++; 128 } 129 ARGVcell = cp; 130 } 131 132 void envinit(char **envp) /* set up ENVIRON variable */ 133 { 134 Array *ap; 135 Cell *cp; 136 char *p; 137 138 cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab); 139 ap = makesymtab(NSYMTAB); 140 free(cp->sval); 141 cp->sval = (char *) ap; 142 for ( ; *envp; envp++) { 143 double result; 144 145 if ((p = strchr(*envp, '=')) == NULL) 146 continue; 147 if( p == *envp ) /* no left hand side name in env string */ 148 continue; 149 *p++ = 0; /* split into two strings at = */ 150 if (is_number(p, & result)) 151 setsymtab(*envp, p, result, STR|NUM, ap); 152 else 153 setsymtab(*envp, p, 0.0, STR, ap); 154 p[-1] = '='; /* restore in case env is passed down to a shell */ 155 } 156 } 157 158 Array *makesymtab(int n) /* make a new symbol table */ 159 { 160 Array *ap; 161 Cell **tp; 162 163 ap = (Array *) malloc(sizeof(*ap)); 164 tp = (Cell **) calloc(n, sizeof(*tp)); 165 if (ap == NULL || tp == NULL) 166 FATAL("out of space in makesymtab"); 167 ap->nelem = 0; 168 ap->size = n; 169 ap->tab = tp; 170 return(ap); 171 } 172 173 void freesymtab(Cell *ap) /* free a symbol table */ 174 { 175 Cell *cp, *temp; 176 Array *tp; 177 int i; 178 179 if (!isarr(ap)) 180 return; 181 tp = (Array *) ap->sval; 182 if (tp == NULL) 183 return; 184 for (i = 0; i < tp->size; i++) { 185 for (cp = tp->tab[i]; cp != NULL; cp = temp) { 186 xfree(cp->nval); 187 if (freeable(cp)) 188 xfree(cp->sval); 189 temp = cp->cnext; /* avoids freeing then using */ 190 free(cp); 191 tp->nelem--; 192 } 193 tp->tab[i] = NULL; 194 } 195 if (tp->nelem != 0) 196 WARNING("can't happen: inconsistent element count freeing %s", ap->nval); 197 free(tp->tab); 198 free(tp); 199 } 200 201 void freeelem(Cell *ap, const char *s) /* free elem s from ap (i.e., ap["s"] */ 202 { 203 Array *tp; 204 Cell *p, *prev = NULL; 205 int h; 206 207 tp = (Array *) ap->sval; 208 h = hash(s, tp->size); 209 for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext) 210 if (strcmp(s, p->nval) == 0) { 211 if (prev == NULL) /* 1st one */ 212 tp->tab[h] = p->cnext; 213 else /* middle somewhere */ 214 prev->cnext = p->cnext; 215 if (freeable(p)) 216 xfree(p->sval); 217 free(p->nval); 218 free(p); 219 tp->nelem--; 220 return; 221 } 222 } 223 224 Cell *setsymtab(const char *n, const char *s, Awkfloat f, unsigned t, Array *tp) 225 { 226 int h; 227 Cell *p; 228 229 if (n != NULL && (p = lookup(n, tp)) != NULL) { 230 DPRINTF("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n", 231 (void*)p, NN(p->nval), NN(p->sval), p->fval, p->tval); 232 return(p); 233 } 234 p = (Cell *) malloc(sizeof(*p)); 235 if (p == NULL) 236 FATAL("out of space for symbol table at %s", n); 237 p->nval = tostring(n); 238 p->sval = s ? tostring(s) : tostring(""); 239 p->fval = f; 240 p->tval = t; 241 p->csub = CUNK; 242 p->ctype = OCELL; 243 tp->nelem++; 244 if (tp->nelem > FULLTAB * tp->size) 245 rehash(tp); 246 h = hash(n, tp->size); 247 p->cnext = tp->tab[h]; 248 tp->tab[h] = p; 249 DPRINTF("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n", 250 (void*)p, p->nval, p->sval, p->fval, p->tval); 251 return(p); 252 } 253 254 int hash(const char *s, int n) /* form hash value for string s */ 255 { 256 unsigned hashval; 257 258 for (hashval = 0; *s != '\0'; s++) 259 hashval = (*s + 31 * hashval); 260 return hashval % n; 261 } 262 263 void rehash(Array *tp) /* rehash items in small table into big one */ 264 { 265 int i, nh, nsz; 266 Cell *cp, *op, **np; 267 268 nsz = GROWTAB * tp->size; 269 np = (Cell **) calloc(nsz, sizeof(*np)); 270 if (np == NULL) /* can't do it, but can keep running. */ 271 return; /* someone else will run out later. */ 272 for (i = 0; i < tp->size; i++) { 273 for (cp = tp->tab[i]; cp; cp = op) { 274 op = cp->cnext; 275 nh = hash(cp->nval, nsz); 276 cp->cnext = np[nh]; 277 np[nh] = cp; 278 } 279 } 280 free(tp->tab); 281 tp->tab = np; 282 tp->size = nsz; 283 } 284 285 Cell *lookup(const char *s, Array *tp) /* look for s in tp */ 286 { 287 Cell *p; 288 int h; 289 290 h = hash(s, tp->size); 291 for (p = tp->tab[h]; p != NULL; p = p->cnext) 292 if (strcmp(s, p->nval) == 0) 293 return(p); /* found it */ 294 return(NULL); /* not found */ 295 } 296 297 Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */ 298 { 299 int fldno; 300 301 f += 0.0; /* normalise negative zero to positive zero */ 302 if ((vp->tval & (NUM | STR)) == 0) 303 funnyvar(vp, "assign to"); 304 if (isfld(vp)) { 305 donerec = false; /* mark $0 invalid */ 306 fldno = atoi(vp->nval); 307 if (fldno > *NF) 308 newfld(fldno); 309 DPRINTF("setting field %d to %g\n", fldno, f); 310 } else if (&vp->fval == NF) { 311 donerec = false; /* mark $0 invalid */ 312 setlastfld(f); 313 DPRINTF("setfval: setting NF to %g\n", f); 314 } else if (isrec(vp)) { 315 donefld = false; /* mark $1... invalid */ 316 donerec = true; 317 savefs(); 318 } else if (vp == ofsloc) { 319 if (!donerec) 320 recbld(); 321 } 322 if (freeable(vp)) 323 xfree(vp->sval); /* free any previous string */ 324 vp->tval &= ~(STR|CONVC|CONVO); /* mark string invalid */ 325 vp->fmt = NULL; 326 vp->tval |= NUM; /* mark number ok */ 327 if (f == -0) /* who would have thought this possible? */ 328 f = 0; 329 DPRINTF("setfval %p: %s = %g, t=%o\n", (void*)vp, NN(vp->nval), f, vp->tval); 330 return vp->fval = f; 331 } 332 333 void funnyvar(Cell *vp, const char *rw) 334 { 335 if (isarr(vp)) 336 FATAL("can't %s %s; it's an array name.", rw, vp->nval); 337 if (vp->tval & FCN) 338 FATAL("can't %s %s; it's a function.", rw, vp->nval); 339 WARNING("funny variable %p: n=%s s=\"%s\" f=%g t=%o", 340 (void *)vp, vp->nval, vp->sval, vp->fval, vp->tval); 341 } 342 343 char *setsval(Cell *vp, const char *s) /* set string val of a Cell */ 344 { 345 char *t; 346 int fldno; 347 Awkfloat f; 348 349 DPRINTF("starting setsval %p: %s = \"%s\", t=%o, r,f=%d,%d\n", 350 (void*)vp, NN(vp->nval), s, vp->tval, donerec, donefld); 351 if ((vp->tval & (NUM | STR)) == 0) 352 funnyvar(vp, "assign to"); 353 if (CSV && (vp == rsloc)) 354 WARNING("danger: don't set RS when --csv is in effect"); 355 if (CSV && (vp == fsloc)) 356 WARNING("danger: don't set FS when --csv is in effect"); 357 if (isfld(vp)) { 358 donerec = false; /* mark $0 invalid */ 359 fldno = atoi(vp->nval); 360 if (fldno > *NF) 361 newfld(fldno); 362 DPRINTF("setting field %d to %s (%p)\n", fldno, s, (const void*)s); 363 } else if (isrec(vp)) { 364 donefld = false; /* mark $1... invalid */ 365 donerec = true; 366 savefs(); 367 } else if (vp == ofsloc) { 368 if (!donerec) 369 recbld(); 370 } 371 t = s ? tostring(s) : tostring(""); /* in case it's self-assign */ 372 if (freeable(vp)) 373 xfree(vp->sval); 374 vp->tval &= ~(NUM|DONTFREE|CONVC|CONVO); 375 vp->tval |= STR; 376 vp->fmt = NULL; 377 DPRINTF("setsval %p: %s = \"%s (%p) \", t=%o r,f=%d,%d\n", 378 (void*)vp, NN(vp->nval), t, (void*)t, vp->tval, donerec, donefld); 379 vp->sval = t; 380 if (&vp->fval == NF) { 381 donerec = false; /* mark $0 invalid */ 382 f = getfval(vp); 383 setlastfld(f); 384 DPRINTF("setsval: setting NF to %g\n", f); 385 } 386 387 return(vp->sval); 388 } 389 390 Awkfloat getfval(Cell *vp) /* get float val of a Cell */ 391 { 392 if ((vp->tval & (NUM | STR)) == 0) 393 funnyvar(vp, "read value of"); 394 if (isfld(vp) && !donefld) 395 fldbld(); 396 else if (isrec(vp) && !donerec) 397 recbld(); 398 if (!isnum(vp)) { /* not a number */ 399 double fval; 400 bool no_trailing; 401 402 if (is_valid_number(vp->sval, true, & no_trailing, & fval)) { 403 vp->fval = fval; 404 if (no_trailing && !(vp->tval&CON)) 405 vp->tval |= NUM; /* make NUM only sparingly */ 406 } else 407 vp->fval = 0.0; 408 } 409 DPRINTF("getfval %p: %s = %g, t=%o\n", 410 (void*)vp, NN(vp->nval), vp->fval, vp->tval); 411 return(vp->fval); 412 } 413 414 static const char *get_inf_nan(double d) 415 { 416 if (isinf(d)) { 417 return (d < 0 ? "-inf" : "+inf"); 418 } else if (isnan(d)) { 419 return (signbit(d) != 0 ? "-nan" : "+nan"); 420 } else 421 return NULL; 422 } 423 424 static char *get_str_val(Cell *vp, char **fmt) /* get string val of a Cell */ 425 { 426 char s[256]; 427 double dtemp; 428 const char *p; 429 430 if ((vp->tval & (NUM | STR)) == 0) 431 funnyvar(vp, "read value of"); 432 if (isfld(vp) && ! donefld) 433 fldbld(); 434 else if (isrec(vp) && ! donerec) 435 recbld(); 436 437 /* 438 * ADR: This is complicated and more fragile than is desirable. 439 * Retrieving a string value for a number associates the string 440 * value with the scalar. Previously, the string value was 441 * sticky, meaning if converted via OFMT that became the value 442 * (even though POSIX wants it to be via CONVFMT). Or if CONVFMT 443 * changed after a string value was retrieved, the original value 444 * was maintained and used. Also not per POSIX. 445 * 446 * We work around this design by adding two additional flags, 447 * CONVC and CONVO, indicating how the string value was 448 * obtained (via CONVFMT or OFMT) and _also_ maintaining a copy 449 * of the pointer to the xFMT format string used for the 450 * conversion. This pointer is only read, **never** dereferenced. 451 * The next time we do a conversion, if it's coming from the same 452 * xFMT as last time, and the pointer value is different, we 453 * know that the xFMT format string changed, and we need to 454 * redo the conversion. If it's the same, we don't have to. 455 * 456 * There are also several cases where we don't do a conversion, 457 * such as for a field (see the checks below). 458 */ 459 460 /* Don't duplicate the code for actually updating the value */ 461 #define update_str_val(vp) \ 462 { \ 463 if (freeable(vp)) \ 464 xfree(vp->sval); \ 465 if ((p = get_inf_nan(vp->fval)) != NULL) \ 466 strcpy(s, p); \ 467 else if (modf(vp->fval, &dtemp) == 0) /* it's integral */ \ 468 snprintf(s, sizeof (s), "%.30g", vp->fval); \ 469 else \ 470 snprintf(s, sizeof (s), *fmt, vp->fval); \ 471 vp->sval = tostring(s); \ 472 vp->tval &= ~DONTFREE; \ 473 vp->tval |= STR; \ 474 } 475 476 if (isstr(vp) == 0) { 477 update_str_val(vp); 478 if (fmt == OFMT) { 479 vp->tval &= ~CONVC; 480 vp->tval |= CONVO; 481 } else { 482 /* CONVFMT */ 483 vp->tval &= ~CONVO; 484 vp->tval |= CONVC; 485 } 486 vp->fmt = *fmt; 487 } else if ((vp->tval & DONTFREE) != 0 || ! isnum(vp) || isfld(vp)) { 488 goto done; 489 } else if (isstr(vp)) { 490 if (fmt == OFMT) { 491 if ((vp->tval & CONVC) != 0 492 || ((vp->tval & CONVO) != 0 && vp->fmt != *fmt)) { 493 update_str_val(vp); 494 vp->tval &= ~CONVC; 495 vp->tval |= CONVO; 496 vp->fmt = *fmt; 497 } 498 } else { 499 /* CONVFMT */ 500 if ((vp->tval & CONVO) != 0 501 || ((vp->tval & CONVC) != 0 && vp->fmt != *fmt)) { 502 update_str_val(vp); 503 vp->tval &= ~CONVO; 504 vp->tval |= CONVC; 505 vp->fmt = *fmt; 506 } 507 } 508 } 509 done: 510 DPRINTF("getsval %p: %s = \"%s (%p)\", t=%o\n", 511 (void*)vp, NN(vp->nval), vp->sval, (void*)vp->sval, vp->tval); 512 return(vp->sval); 513 } 514 515 char *getsval(Cell *vp) /* get string val of a Cell */ 516 { 517 return get_str_val(vp, CONVFMT); 518 } 519 520 char *getpssval(Cell *vp) /* get string val of a Cell for print */ 521 { 522 return get_str_val(vp, OFMT); 523 } 524 525 526 char *tostring(const char *s) /* make a copy of string s */ 527 { 528 char *p = strdup(s); 529 if (p == NULL) 530 FATAL("out of space in tostring on %s", s); 531 return(p); 532 } 533 534 char *tostringN(const char *s, size_t n) /* make a copy of string s */ 535 { 536 char *p; 537 538 p = (char *) malloc(n); 539 if (p == NULL) 540 FATAL("out of space in tostring on %s", s); 541 strcpy(p, s); 542 return(p); 543 } 544 545 Cell *catstr(Cell *a, Cell *b) /* concatenate a and b */ 546 { 547 Cell *c; 548 char *p; 549 char *sa = getsval(a); 550 char *sb = getsval(b); 551 size_t l = strlen(sa) + strlen(sb) + 1; 552 p = (char *) malloc(l); 553 if (p == NULL) 554 FATAL("out of space concatenating %s and %s", sa, sb); 555 snprintf(p, l, "%s%s", sa, sb); 556 557 l++; // add room for ' ' 558 char *newbuf = (char *) malloc(l); 559 if (newbuf == NULL) 560 FATAL("out of space concatenating %s and %s", sa, sb); 561 // See string() in lex.c; a string "xx" is stored in the symbol 562 // table as "xx ". 563 snprintf(newbuf, l, "%s ", p); 564 c = setsymtab(newbuf, p, 0.0, CON|STR|DONTFREE, symtab); 565 free(p); 566 free(newbuf); 567 return c; 568 } 569 570 char *qstring(const char *is, int delim) /* collect string up to next delim */ 571 { 572 int c, n; 573 const uschar *s = (const uschar *) is; 574 uschar *buf, *bp; 575 576 if ((buf = (uschar *) malloc(strlen(is)+3)) == NULL) 577 FATAL( "out of space in qstring(%s)", s); 578 for (bp = buf; (c = *s) != delim; s++) { 579 if (c == '\n') 580 SYNTAX( "newline in string %.20s...", is ); 581 else if (c != '\\') 582 *bp++ = c; 583 else { /* \something */ 584 c = *++s; 585 if (c == 0) { /* \ at end */ 586 *bp++ = '\\'; 587 break; /* for loop */ 588 } 589 switch (c) { 590 case '\\': *bp++ = '\\'; break; 591 case 'n': *bp++ = '\n'; break; 592 case 't': *bp++ = '\t'; break; 593 case 'b': *bp++ = '\b'; break; 594 case 'f': *bp++ = '\f'; break; 595 case 'r': *bp++ = '\r'; break; 596 case 'v': *bp++ = '\v'; break; 597 case 'a': *bp++ = '\a'; break; 598 default: 599 if (!isdigit(c)) { 600 *bp++ = c; 601 break; 602 } 603 n = c - '0'; 604 if (isdigit(s[1])) { 605 n = 8 * n + *++s - '0'; 606 if (isdigit(s[1])) 607 n = 8 * n + *++s - '0'; 608 } 609 *bp++ = n; 610 break; 611 } 612 } 613 } 614 *bp++ = 0; 615 return (char *) buf; 616 } 617 618 const char *flags2str(int flags) 619 { 620 static const struct ftab { 621 const char *name; 622 int value; 623 } flagtab[] = { 624 { "NUM", NUM }, 625 { "STR", STR }, 626 { "DONTFREE", DONTFREE }, 627 { "CON", CON }, 628 { "ARR", ARR }, 629 { "FCN", FCN }, 630 { "FLD", FLD }, 631 { "REC", REC }, 632 { "CONVC", CONVC }, 633 { "CONVO", CONVO }, 634 { NULL, 0 } 635 }; 636 static char buf[100]; 637 int i; 638 char *cp = buf; 639 640 for (i = 0; flagtab[i].name != NULL; i++) { 641 if ((flags & flagtab[i].value) != 0) { 642 if (cp > buf) 643 *cp++ = '|'; 644 strcpy(cp, flagtab[i].name); 645 cp += strlen(cp); 646 } 647 } 648 649 return buf; 650 } 651