1 /* 2 * Copyright (C) Lucent Technologies 1997 3 * All Rights Reserved 4 * 5 * Permission to use, copy, modify, and distribute this software and 6 * its documentation for any purpose and without fee is hereby 7 * granted, provided that the above copyright notice appear in all 8 * copies and that both that the copyright notice and this 9 * permission notice and warranty disclaimer appear in supporting 10 * documentation, and that the name Lucent Technologies or any of 11 * its entities not be used in advertising or publicity pertaining 12 * to distribution of the software without specific, written prior 13 * permission. 14 * 15 * LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 16 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 17 * IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 18 * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 19 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 20 * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 21 * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 22 * THIS SOFTWARE. 23 */ 24 25 /* 26 * CDDL HEADER START 27 * 28 * The contents of this file are subject to the terms of the 29 * Common Development and Distribution License (the "License"). 30 * You may not use this file except in compliance with the License. 31 * 32 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 33 * or http://www.opensolaris.org/os/licensing. 34 * See the License for the specific language governing permissions 35 * and limitations under the License. 36 * 37 * When distributing Covered Code, include this CDDL HEADER in each 38 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 39 * If applicable, add the following below this CDDL HEADER, with the 40 * fields enclosed by brackets "[]" replaced with your own identifying 41 * information: Portions Copyright [yyyy] [name of copyright owner] 42 * 43 * CDDL HEADER END 44 */ 45 46 /* 47 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 48 * Use is subject to license terms. 49 */ 50 51 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 52 /* All Rights Reserved */ 53 54 #define DEBUG 55 #include <stdio.h> 56 #include <math.h> 57 #include <ctype.h> 58 #include <string.h> 59 #include <stdlib.h> 60 #include "awk.h" 61 #include "y.tab.h" 62 63 #define FULLTAB 2 /* rehash when table gets this x full */ 64 #define GROWTAB 4 /* grow table by this factor */ 65 66 Array *symtab; /* main symbol table */ 67 68 char **FS; /* initial field sep */ 69 char **RS; /* initial record sep */ 70 char **OFS; /* output field sep */ 71 char **ORS; /* output record sep */ 72 char **OFMT; /* output format for numbers */ 73 char **CONVFMT; /* format for conversions in getsval */ 74 Awkfloat *NF; /* number of fields in current record */ 75 Awkfloat *NR; /* number of current record */ 76 Awkfloat *FNR; /* number of current record in current file */ 77 char **FILENAME; /* current filename argument */ 78 Awkfloat *ARGC; /* number of arguments from command line */ 79 char **SUBSEP; /* subscript separator for a[i,j,k]; default \034 */ 80 Awkfloat *RSTART; /* start of re matched with ~; origin 1 (!) */ 81 Awkfloat *RLENGTH; /* length of same */ 82 83 Cell *recloc; /* location of record */ 84 Cell *fsloc; /* FS */ 85 Cell *nrloc; /* NR */ 86 Cell *nfloc; /* NF */ 87 Cell *fnrloc; /* FNR */ 88 Cell *ofsloc; /* OFS */ 89 Cell *orsloc; /* ORS */ 90 Cell *rsloc; /* RS */ 91 Cell *rtloc; /* RT */ 92 Array *ARGVtab; /* symbol table containing ARGV[...] */ 93 Array *ENVtab; /* symbol table containing ENVIRON[...] */ 94 Cell *rstartloc; /* RSTART */ 95 Cell *rlengthloc; /* RLENGTH */ 96 Cell *subseploc; /* SUBSEP */ 97 Cell *symtabloc; /* SYMTAB */ 98 99 Cell *nullloc; /* a guaranteed empty cell */ 100 Node *nullnode; /* zero&null, converted into a node for comparisons */ 101 Cell *literal0; 102 103 static void rehash(Array *); 104 105 static void 106 setfree(Cell *vp) 107 { 108 if (&vp->sval == FS || &vp->sval == RS || 109 &vp->sval == OFS || &vp->sval == ORS || 110 &vp->sval == OFMT || &vp->sval == CONVFMT || 111 &vp->sval == FILENAME || &vp->sval == SUBSEP) 112 vp->tval |= DONTFREE; 113 else 114 vp->tval &= ~DONTFREE; 115 } 116 117 void 118 syminit(void) /* initialize symbol table with builtin vars */ 119 { 120 /* initialize $0 */ 121 recloc = fieldadr(0); 122 recloc->nval = "$0"; 123 recloc->sval = record; 124 recloc->tval = REC|STR|DONTFREE; 125 126 literal0 = setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab); 127 /* this is used for if(x)... tests: */ 128 nullloc = setsymtab("$zero&null", "", 0.0, 129 NUM|STR|CON|DONTFREE, symtab); 130 nullnode = celltonode(nullloc, CCON); 131 132 fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab); 133 FS = &fsloc->sval; 134 rsloc = setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab); 135 RS = &rsloc->sval; 136 rtloc = setsymtab("RT", "", 0.0, STR|DONTFREE, symtab); 137 ofsloc = setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab); 138 OFS = &ofsloc->sval; 139 orsloc = setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab); 140 ORS = &orsloc->sval; 141 OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval; 142 CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, 143 STR|DONTFREE, symtab)->sval; 144 FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval; 145 nfloc = setsymtab("NF", "", 0.0, NUM, symtab); 146 NF = &nfloc->fval; 147 nrloc = setsymtab("NR", "", 0.0, NUM, symtab); 148 NR = &nrloc->fval; 149 fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab); 150 FNR = &fnrloc->fval; 151 subseploc = setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab); 152 SUBSEP = &subseploc->sval; 153 rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab); 154 RSTART = &rstartloc->fval; 155 rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab); 156 RLENGTH = &rlengthloc->fval; 157 symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab); 158 symtabloc->sval = (char *)symtab; 159 } 160 161 void 162 arginit(int ac, char **av) /* set up ARGV and ARGC */ 163 { 164 Cell *cp; 165 int i; 166 char temp[50]; 167 168 ARGC = &setsymtab("ARGC", "", (Awkfloat)ac, NUM, symtab)->fval; 169 cp = setsymtab("ARGV", "", 0.0, ARR, symtab); 170 ARGVtab = makesymtab(NSYMTAB); /* could be (int) ARGC as well */ 171 cp->sval = (char *)ARGVtab; 172 for (i = 0; i < ac; i++) { 173 (void) sprintf(temp, "%d", i); 174 if (is_number(*av)) { 175 (void) setsymtab(temp, *av, atof(*av), 176 STR|NUM, ARGVtab); 177 } else { 178 (void) setsymtab(temp, *av, 0.0, STR, ARGVtab); 179 } 180 av++; 181 } 182 } 183 184 void 185 envinit(char **envp) /* set up ENVIRON variable */ 186 { 187 Cell *cp; 188 char *p; 189 190 cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab); 191 ENVtab = makesymtab(NSYMTAB); 192 cp->sval = (char *)ENVtab; 193 for (; *envp; envp++) { 194 if ((p = strchr(*envp, '=')) == NULL) 195 continue; 196 if (p == *envp) /* no left hand side name in env string */ 197 continue; 198 *p++ = 0; /* split into two strings at = */ 199 if (is_number(p)) { 200 (void) setsymtab(*envp, p, atof(p), 201 STR|NUM, ENVtab); 202 } else { 203 (void) setsymtab(*envp, p, 0.0, STR, ENVtab); 204 } 205 /* restore in case env is passed down to a shell */ 206 p[-1] = '='; 207 } 208 } 209 210 Array * 211 makesymtab(int n) /* make a new symbol table */ 212 { 213 Array *ap; 214 Cell **tp; 215 216 ap = (Array *)malloc(sizeof (Array)); 217 tp = (Cell **)calloc(n, sizeof (Cell *)); 218 if (ap == NULL || tp == NULL) 219 FATAL("out of space in makesymtab"); 220 ap->nelem = 0; 221 ap->size = n; 222 ap->tab = tp; 223 return (ap); 224 } 225 226 void 227 freesymtab(Cell *ap) /* free a symbol table */ 228 { 229 Cell *cp, *temp; 230 Array *tp; 231 int i; 232 233 if (!isarr(ap)) 234 return; 235 /*LINTED align*/ 236 tp = (Array *)ap->sval; 237 if (tp == NULL) 238 return; 239 for (i = 0; i < tp->size; i++) { 240 for (cp = tp->tab[i]; cp != NULL; cp = temp) { 241 xfree(cp->nval); 242 if (freeable(cp)) 243 xfree(cp->sval); 244 temp = cp->cnext; /* avoids freeing then using */ 245 free(cp); 246 tp->nelem--; 247 } 248 tp->tab[i] = 0; 249 } 250 if (tp->nelem != 0) { 251 WARNING("can't happen: inconsistent element count freeing %s", 252 ap->nval); 253 } 254 free(tp->tab); 255 free(tp); 256 } 257 258 void 259 freeelem(Cell *ap, const char *s) /* free elem s from ap (i.e., ap["s"] */ 260 { 261 Array *tp; 262 Cell *p, *prev = NULL; 263 int h; 264 265 /*LINTED align*/ 266 tp = (Array *)ap->sval; 267 h = hash(s, tp->size); 268 for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext) 269 if (strcmp(s, p->nval) == 0) { 270 if (prev == NULL) /* 1st one */ 271 tp->tab[h] = p->cnext; 272 else /* middle somewhere */ 273 prev->cnext = p->cnext; 274 if (freeable(p)) 275 xfree(p->sval); 276 free(p->nval); 277 free(p); 278 tp->nelem--; 279 return; 280 } 281 } 282 283 Cell * 284 setsymtab(const char *n, const char *s, Awkfloat f, unsigned int t, Array *tp) 285 { 286 int h; 287 Cell *p; 288 289 if (n != NULL && (p = lookup(n, tp)) != NULL) { 290 dprintf(("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n", 291 (void *)p, NN(p->nval), NN(p->sval), p->fval, p->tval)); 292 return (p); 293 } 294 p = (Cell *)malloc(sizeof (Cell)); 295 if (p == NULL) 296 FATAL("out of space for symbol table at %s", n); 297 p->nval = tostring(n); 298 p->sval = s ? tostring(s) : tostring(""); 299 p->fval = f; 300 p->tval = t; 301 p->csub = CUNK; 302 p->ctype = OCELL; 303 tp->nelem++; 304 if (tp->nelem > FULLTAB * tp->size) 305 rehash(tp); 306 h = hash(n, tp->size); 307 p->cnext = tp->tab[h]; 308 tp->tab[h] = p; 309 dprintf(("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n", 310 (void *)p, p->nval, p->sval, p->fval, p->tval)); 311 return (p); 312 } 313 314 int 315 hash(const char *s, int n) /* form hash value for string s */ 316 { 317 unsigned int hashval; 318 319 for (hashval = 0; *s != '\0'; s++) 320 hashval = (*s + 31 * hashval); 321 return (hashval % n); 322 } 323 324 static void 325 rehash(Array *tp) /* rehash items in small table into big one */ 326 { 327 int i, nh, nsz; 328 Cell *cp, *op, **np; 329 330 nsz = GROWTAB * tp->size; 331 np = (Cell **)calloc(nsz, sizeof (Cell *)); 332 if (np == NULL) /* can't do it, but can keep running. */ 333 return; /* someone else will run out later. */ 334 for (i = 0; i < tp->size; i++) { 335 for (cp = tp->tab[i]; cp != NULL; cp = op) { 336 op = cp->cnext; 337 nh = hash(cp->nval, nsz); 338 cp->cnext = np[nh]; 339 np[nh] = cp; 340 } 341 } 342 free(tp->tab); 343 tp->tab = np; 344 tp->size = nsz; 345 } 346 347 Cell * 348 lookup(const char *s, Array *tp) /* look for s in tp */ 349 { 350 Cell *p; 351 int h; 352 353 h = hash(s, tp->size); 354 for (p = tp->tab[h]; p != NULL; p = p->cnext) { 355 if (strcmp(s, p->nval) == 0) 356 return (p); /* found it */ 357 } 358 return (NULL); /* not found */ 359 } 360 361 Awkfloat 362 setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */ 363 { 364 int fldno; 365 366 f += 0.0; /* normalise negative zero to positive zero */ 367 if ((vp->tval & (NUM | STR)) == 0) 368 funnyvar(vp, "assign to"); 369 if (isfld(vp)) { 370 donerec = 0; /* mark $0 invalid */ 371 fldno = atoi(vp->nval); 372 if (fldno > *NF) 373 newfld(fldno); 374 dprintf(("setting field %d to %g\n", fldno, f)); 375 } else if (&vp->fval == NF) { 376 donerec = 0; /* mark $0 invalid */ 377 setlastfld((int)f); 378 dprintf(("setting NF to %g\n", f)); 379 } else if (isrec(vp)) { 380 donefld = 0; /* mark $1... invalid */ 381 donerec = 1; 382 savefs(); 383 } else if (vp == ofsloc) { 384 if (donerec == 0) 385 recbld(); 386 } 387 if (freeable(vp)) 388 xfree(vp->sval); /* free any previous string */ 389 vp->tval &= ~(STR|CONVC|CONVO); /* mark string invalid */ 390 vp->fmt = NULL; 391 vp->tval |= NUM; /* mark number ok */ 392 if (f == -0) /* who would have thought this possible? */ 393 f = 0; 394 dprintf(("setfval %p: %s = %g, t=%o\n", (void *)vp, 395 NN(vp->nval), f, vp->tval)); 396 return (vp->fval = f); 397 } 398 399 void 400 funnyvar(Cell *vp, const char *rw) 401 { 402 if (isarr(vp)) 403 FATAL("can't %s %s; it's an array name.", rw, vp->nval); 404 if (isfcn(vp)) 405 FATAL("can't %s %s; it's a function.", rw, vp->nval); 406 WARNING("funny variable %p: n=%s s=\"%s\" f=%g t=%o", 407 vp, vp->nval, vp->sval, vp->fval, vp->tval); 408 } 409 410 char * 411 setsval(Cell *vp, const char *s) /* set string val of a Cell */ 412 { 413 char *t; 414 int fldno; 415 Awkfloat f; 416 417 dprintf(("starting setsval %p: %s = \"%s\", t=%o, r,f=%d,%d\n", 418 (void *)vp, NN(vp->nval), s, vp->tval, donerec, donefld)); 419 if ((vp->tval & (NUM | STR)) == 0) 420 funnyvar(vp, "assign to"); 421 if (isfld(vp)) { 422 donerec = 0; /* mark $0 invalid */ 423 fldno = atoi(vp->nval); 424 if (fldno > *NF) 425 newfld(fldno); 426 dprintf(("setting field %d to %s (%p)\n", fldno, s, (void *)s)); 427 } else if (isrec(vp)) { 428 donefld = 0; /* mark $1... invalid */ 429 donerec = 1; 430 savefs(); 431 } else if (vp == ofsloc) { 432 if (donerec == 0) 433 recbld(); 434 } 435 t = s ? tostring(s) : tostring(""); /* in case it's self-assign */ 436 if (freeable(vp)) 437 xfree(vp->sval); 438 vp->tval &= ~(NUM|CONVC|CONVO); 439 vp->tval |= STR; 440 vp->fmt = NULL; 441 setfree(vp); 442 dprintf(("setsval %p: %s = \"%s (%p) \", t=%o r,f=%d,%d\n", 443 (void *)vp, NN(vp->nval), t, (void *)t, 444 vp->tval, donerec, donefld)); 445 vp->sval = t; 446 if (&vp->fval == NF) { 447 donerec = 0; /* mark $0 invalid */ 448 f = getfval(vp); 449 setlastfld((int)f); 450 dprintf(("setting NF to %g\n", f)); 451 } 452 453 return (vp->sval); 454 } 455 456 Awkfloat 457 getfval(Cell *vp) /* get float val of a Cell */ 458 { 459 if ((vp->tval & (NUM | STR)) == 0) 460 funnyvar(vp, "read value of"); 461 if (isfld(vp) && donefld == 0) 462 fldbld(); 463 else if (isrec(vp) && donerec == 0) 464 recbld(); 465 if (!isnum(vp)) { /* not a number */ 466 vp->fval = atof(vp->sval); /* best guess */ 467 if (is_number(vp->sval) && !(vp->tval&CON)) 468 vp->tval |= NUM; /* make NUM only sparingly */ 469 } 470 dprintf(("getfval %p: %s = %g, t=%o\n", 471 (void *)vp, NN(vp->nval), vp->fval, vp->tval)); 472 return (vp->fval); 473 } 474 475 static char * 476 get_str_val(Cell *vp, char **fmt) /* get string val of a Cell */ 477 { 478 char s[256]; 479 double dtemp; 480 481 if ((vp->tval & (NUM | STR)) == 0) 482 funnyvar(vp, "read value of"); 483 if (isfld(vp) && donefld == 0) 484 fldbld(); 485 else if (isrec(vp) && donerec == 0) 486 recbld(); 487 488 /* 489 * ADR: This is complicated and more fragile than is desirable. 490 * Retrieving a string value for a number associates the string 491 * value with the scalar. Previously, the string value was 492 * sticky, meaning if converted via OFMT that became the value 493 * (even though POSIX wants it to be via CONVFMT). Or if CONVFMT 494 * changed after a string value was retrieved, the original value 495 * was maintained and used. Also not per POSIX. 496 * 497 * We work around this design by adding two additional flags, 498 * CONVC and CONVO, indicating how the string value was 499 * obtained (via CONVFMT or OFMT) and _also_ maintaining a copy 500 * of the pointer to the xFMT format string used for the 501 * conversion. This pointer is only read, **never** dereferenced. 502 * The next time we do a conversion, if it's coming from the same 503 * xFMT as last time, and the pointer value is different, we 504 * know that the xFMT format string changed, and we need to 505 * redo the conversion. If it's the same, we don't have to. 506 * 507 * There are also several cases where we don't do a conversion, 508 * such as for a field (see the checks below). 509 */ 510 511 /* Don't duplicate the code for actually updating the value */ 512 #define update_str_val(vp) \ 513 { \ 514 if (freeable(vp)) \ 515 xfree(vp->sval); \ 516 if (modf(vp->fval, &dtemp) == 0) /* it's integral */ \ 517 (void) snprintf(s, sizeof (s), "%.30g", vp->fval); \ 518 else \ 519 (void) snprintf(s, sizeof (s), *fmt, vp->fval); \ 520 vp->sval = tostring(s); \ 521 vp->tval &= ~DONTFREE; \ 522 vp->tval |= STR; \ 523 } 524 525 if (isstr(vp) == 0) { 526 /*LINTED*/ 527 update_str_val(vp); 528 if (fmt == OFMT) { 529 vp->tval &= ~CONVC; 530 vp->tval |= CONVO; 531 } else { 532 /* CONVFMT */ 533 vp->tval &= ~CONVO; 534 vp->tval |= CONVC; 535 } 536 vp->fmt = *fmt; 537 } else if ((vp->tval & DONTFREE) != 0 || !isnum(vp) || isfld(vp)) { 538 goto done; 539 } else if (isstr(vp)) { 540 if (fmt == OFMT) { 541 if ((vp->tval & CONVC) != 0 || 542 ((vp->tval & CONVO) != 0 && vp->fmt != *fmt)) { 543 /*LINTED*/ 544 update_str_val(vp); 545 vp->tval &= ~CONVC; 546 vp->tval |= CONVO; 547 vp->fmt = *fmt; 548 } 549 } else { 550 /* CONVFMT */ 551 if ((vp->tval & CONVO) != 0 || 552 ((vp->tval & CONVC) != 0 && vp->fmt != *fmt)) { 553 /*LINTED*/ 554 update_str_val(vp); 555 vp->tval &= ~CONVO; 556 vp->tval |= CONVC; 557 vp->fmt = *fmt; 558 } 559 } 560 } 561 done: 562 dprintf(("getsval %p: %s = \"%s (%p)\", t=%o\n", 563 (void *)vp, NN(vp->nval), vp->sval, (void *)vp->sval, vp->tval)); 564 return (vp->sval); 565 } 566 567 char * 568 getsval(Cell *vp) /* get string val of a Cell */ 569 { 570 return (get_str_val(vp, CONVFMT)); 571 } 572 573 char * 574 getpssval(Cell *vp) /* get string val of a Cell for print */ 575 { 576 return (get_str_val(vp, OFMT)); 577 } 578 579 580 char * 581 tostring(const char *s) /* make a copy of string s */ 582 { 583 char *p = strdup(s); 584 if (p == NULL) 585 FATAL("out of space in tostring on %s", s); 586 return (p); 587 } 588 589 char * 590 qstring(const char *is, int delim) /* collect string up to next delim */ 591 { 592 const char *os = is; 593 int c, n; 594 uschar *s = (uschar *)is; 595 uschar *buf, *bp; 596 597 if ((buf = (uschar *)malloc(strlen(is)+3)) == NULL) 598 FATAL("out of space in qstring(%s)", s); 599 for (bp = buf; (c = *s) != delim; s++) { 600 if (c == '\n') { 601 SYNTAX("newline in string %.20s...", os); 602 } else if (c != '\\') 603 *bp++ = c; 604 else { /* \something */ 605 c = *++s; 606 if (c == 0) { /* \ at end */ 607 *bp++ = '\\'; 608 break; /* for loop */ 609 } 610 switch (c) { 611 case '\\': *bp++ = '\\'; break; 612 case 'n': *bp++ = '\n'; break; 613 case 't': *bp++ = '\t'; break; 614 case 'b': *bp++ = '\b'; break; 615 case 'f': *bp++ = '\f'; break; 616 case 'r': *bp++ = '\r'; break; 617 default: 618 if (!isdigit(c)) { 619 *bp++ = c; 620 break; 621 } 622 n = c - '0'; 623 if (isdigit(s[1])) { 624 n = 8 * n + *++s - '0'; 625 if (isdigit(s[1])) 626 n = 8 * n + *++s - '0'; 627 } 628 *bp++ = n; 629 break; 630 } 631 } 632 } 633 *bp++ = 0; 634 return ((char *)buf); 635 } 636