1 /**************************************************************** 2 Copyright (C) Lucent Technologies 1997 3 All Rights Reserved 4 5 Permission to use, copy, modify, and distribute this software and 6 its documentation for any purpose and without fee is hereby 7 granted, provided that the above copyright notice appear in all 8 copies and that both that the copyright notice and this 9 permission notice and warranty disclaimer appear in supporting 10 documentation, and that the name Lucent Technologies or any of 11 its entities not be used in advertising or publicity pertaining 12 to distribution of the software without specific, written prior 13 permission. 14 15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 22 THIS SOFTWARE. 23 ****************************************************************/ 24 25 #define DEBUG 26 #include <stdio.h> 27 #include <ctype.h> 28 #include <errno.h> 29 #include <wchar.h> 30 #include <wctype.h> 31 #include <fcntl.h> 32 #include <setjmp.h> 33 #include <limits.h> 34 #include <math.h> 35 #include <string.h> 36 #include <stdlib.h> 37 #include <time.h> 38 #include <sys/types.h> 39 #include <sys/wait.h> 40 #include "awk.h" 41 #include "awkgram.tab.h" 42 43 static void stdinit(void); 44 static void flush_all(void); 45 46 #if 1 47 #define tempfree(x) do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0) 48 #else 49 void tempfree(Cell *p) { 50 if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) { 51 WARNING("bad csub %d in Cell %d %s", 52 p->csub, p->ctype, p->sval); 53 } 54 if (istemp(p)) 55 tfree(p); 56 } 57 #endif 58 59 /* do we really need these? */ 60 /* #ifdef _NFILE */ 61 /* #ifndef FOPEN_MAX */ 62 /* #define FOPEN_MAX _NFILE */ 63 /* #endif */ 64 /* #endif */ 65 /* */ 66 /* #ifndef FOPEN_MAX */ 67 /* #define FOPEN_MAX 40 */ /* max number of open files */ 68 /* #endif */ 69 /* */ 70 /* #ifndef RAND_MAX */ 71 /* #define RAND_MAX 32767 */ /* all that ansi guarantees */ 72 /* #endif */ 73 74 jmp_buf env; 75 extern int pairstack[]; 76 extern Awkfloat srand_seed; 77 78 Node *winner = NULL; /* root of parse tree */ 79 Cell *tmps; /* free temporary cells for execution */ 80 81 static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL, NULL }; 82 Cell *True = &truecell; 83 static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL, NULL }; 84 Cell *False = &falsecell; 85 static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL, NULL }; 86 Cell *jbreak = &breakcell; 87 static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL, NULL }; 88 Cell *jcont = &contcell; 89 static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL, NULL }; 90 Cell *jnext = &nextcell; 91 static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL, NULL }; 92 Cell *jnextfile = &nextfilecell; 93 static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL, NULL }; 94 Cell *jexit = &exitcell; 95 static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL, NULL }; 96 Cell *jret = &retcell; 97 static Cell tempcell ={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL }; 98 99 Node *curnode = NULL; /* the node being executed, for debugging */ 100 101 /* buffer memory management */ 102 int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr, 103 const char *whatrtn) 104 /* pbuf: address of pointer to buffer being managed 105 * psiz: address of buffer size variable 106 * minlen: minimum length of buffer needed 107 * quantum: buffer size quantum 108 * pbptr: address of movable pointer into buffer, or 0 if none 109 * whatrtn: name of the calling routine if failure should cause fatal error 110 * 111 * return 0 for realloc failure, !=0 for success 112 */ 113 { 114 if (minlen > *psiz) { 115 char *tbuf; 116 int rminlen = quantum ? minlen % quantum : 0; 117 int boff = pbptr ? *pbptr - *pbuf : 0; 118 /* round up to next multiple of quantum */ 119 if (rminlen) 120 minlen += quantum - rminlen; 121 tbuf = (char *) realloc(*pbuf, minlen); 122 DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void*)*pbuf, (void*)tbuf); 123 if (tbuf == NULL) { 124 if (whatrtn) 125 FATAL("out of memory in %s", whatrtn); 126 return 0; 127 } 128 *pbuf = tbuf; 129 *psiz = minlen; 130 if (pbptr) 131 *pbptr = tbuf + boff; 132 } 133 return 1; 134 } 135 136 void run(Node *a) /* execution of parse tree starts here */ 137 { 138 139 stdinit(); 140 execute(a); 141 closeall(); 142 } 143 144 Cell *execute(Node *u) /* execute a node of the parse tree */ 145 { 146 Cell *(*proc)(Node **, int); 147 Cell *x; 148 Node *a; 149 150 if (u == NULL) 151 return(True); 152 for (a = u; ; a = a->nnext) { 153 curnode = a; 154 if (isvalue(a)) { 155 x = (Cell *) (a->narg[0]); 156 if (isfld(x) && !donefld) 157 fldbld(); 158 else if (isrec(x) && !donerec) 159 recbld(); 160 return(x); 161 } 162 if (notlegal(a->nobj)) /* probably a Cell* but too risky to print */ 163 FATAL("illegal statement"); 164 proc = proctab[a->nobj-FIRSTTOKEN]; 165 x = (*proc)(a->narg, a->nobj); 166 if (isfld(x) && !donefld) 167 fldbld(); 168 else if (isrec(x) && !donerec) 169 recbld(); 170 if (isexpr(a)) 171 return(x); 172 if (isjump(x)) 173 return(x); 174 if (a->nnext == NULL) 175 return(x); 176 tempfree(x); 177 } 178 } 179 180 181 Cell *program(Node **a, int n) /* execute an awk program */ 182 { /* a[0] = BEGIN, a[1] = body, a[2] = END */ 183 Cell *x; 184 185 if (setjmp(env) != 0) 186 goto ex; 187 if (a[0]) { /* BEGIN */ 188 x = execute(a[0]); 189 if (isexit(x)) 190 return(True); 191 if (isjump(x)) 192 FATAL("illegal break, continue, next or nextfile from BEGIN"); 193 tempfree(x); 194 } 195 if (a[1] || a[2]) 196 while (getrec(&record, &recsize, true) > 0) { 197 x = execute(a[1]); 198 if (isexit(x)) 199 break; 200 tempfree(x); 201 } 202 ex: 203 if (setjmp(env) != 0) /* handles exit within END */ 204 goto ex1; 205 if (a[2]) { /* END */ 206 x = execute(a[2]); 207 if (isbreak(x) || isnext(x) || iscont(x)) 208 FATAL("illegal break, continue, next or nextfile from END"); 209 tempfree(x); 210 } 211 ex1: 212 return(True); 213 } 214 215 struct Frame { /* stack frame for awk function calls */ 216 int nargs; /* number of arguments in this call */ 217 Cell *fcncell; /* pointer to Cell for function */ 218 Cell **args; /* pointer to array of arguments after execute */ 219 Cell *retval; /* return value */ 220 }; 221 222 #define NARGS 50 /* max args in a call */ 223 224 struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */ 225 int nframe = 0; /* number of frames allocated */ 226 struct Frame *frp = NULL; /* frame pointer. bottom level unused */ 227 228 Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ 229 { 230 static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL }; 231 int i, ncall, ndef; 232 int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */ 233 Node *x; 234 Cell *args[NARGS], *oargs[NARGS]; /* BUG: fixed size arrays */ 235 Cell *y, *z, *fcn; 236 char *s; 237 238 fcn = execute(a[0]); /* the function itself */ 239 s = fcn->nval; 240 if (!isfcn(fcn)) 241 FATAL("calling undefined function %s", s); 242 if (frame == NULL) { 243 frp = frame = (struct Frame *) calloc(nframe += 100, sizeof(*frame)); 244 if (frame == NULL) 245 FATAL("out of space for stack frames calling %s", s); 246 } 247 for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */ 248 ncall++; 249 ndef = (int) fcn->fval; /* args in defn */ 250 DPRINTF("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame)); 251 if (ncall > ndef) 252 WARNING("function %s called with %d args, uses only %d", 253 s, ncall, ndef); 254 if (ncall + ndef > NARGS) 255 FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS); 256 for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */ 257 DPRINTF("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame)); 258 y = execute(x); 259 oargs[i] = y; 260 DPRINTF("args[%d]: %s %f <%s>, t=%o\n", 261 i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval); 262 if (isfcn(y)) 263 FATAL("can't use function %s as argument in %s", y->nval, s); 264 if (isarr(y)) 265 args[i] = y; /* arrays by ref */ 266 else 267 args[i] = copycell(y); 268 tempfree(y); 269 } 270 for ( ; i < ndef; i++) { /* add null args for ones not provided */ 271 args[i] = gettemp(); 272 *args[i] = newcopycell; 273 } 274 frp++; /* now ok to up frame */ 275 if (frp >= frame + nframe) { 276 int dfp = frp - frame; /* old index */ 277 frame = (struct Frame *) realloc(frame, (nframe += 100) * sizeof(*frame)); 278 if (frame == NULL) 279 FATAL("out of space for stack frames in %s", s); 280 frp = frame + dfp; 281 } 282 frp->fcncell = fcn; 283 frp->args = args; 284 frp->nargs = ndef; /* number defined with (excess are locals) */ 285 frp->retval = gettemp(); 286 287 DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame)); 288 y = execute((Node *)(fcn->sval)); /* execute body */ 289 DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame)); 290 291 for (i = 0; i < ndef; i++) { 292 Cell *t = frp->args[i]; 293 if (isarr(t)) { 294 if (t->csub == CCOPY) { 295 if (i >= ncall) { 296 freesymtab(t); 297 t->csub = CTEMP; 298 tempfree(t); 299 } else { 300 oargs[i]->tval = t->tval; 301 oargs[i]->tval &= ~(STR|NUM|DONTFREE); 302 oargs[i]->sval = t->sval; 303 tempfree(t); 304 } 305 } 306 } else if (t != y) { /* kludge to prevent freeing twice */ 307 t->csub = CTEMP; 308 tempfree(t); 309 } else if (t == y && t->csub == CCOPY) { 310 t->csub = CTEMP; 311 tempfree(t); 312 freed = 1; 313 } 314 } 315 tempfree(fcn); 316 if (isexit(y) || isnext(y)) 317 return y; 318 if (freed == 0) { 319 tempfree(y); /* don't free twice! */ 320 } 321 z = frp->retval; /* return value */ 322 DPRINTF("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval); 323 frp--; 324 return(z); 325 } 326 327 Cell *copycell(Cell *x) /* make a copy of a cell in a temp */ 328 { 329 Cell *y; 330 331 /* copy is not constant or field */ 332 333 y = gettemp(); 334 y->tval = x->tval & ~(CON|FLD|REC); 335 y->csub = CCOPY; /* prevents freeing until call is over */ 336 y->nval = x->nval; /* BUG? */ 337 if (isstr(x) /* || x->ctype == OCELL */) { 338 y->sval = tostring(x->sval); 339 y->tval &= ~DONTFREE; 340 } else 341 y->tval |= DONTFREE; 342 y->fval = x->fval; 343 return y; 344 } 345 346 Cell *arg(Node **a, int n) /* nth argument of a function */ 347 { 348 349 n = ptoi(a[0]); /* argument number, counting from 0 */ 350 DPRINTF("arg(%d), frp->nargs=%d\n", n, frp->nargs); 351 if (n+1 > frp->nargs) 352 FATAL("argument #%d of function %s was not supplied", 353 n+1, frp->fcncell->nval); 354 return frp->args[n]; 355 } 356 357 Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */ 358 { 359 Cell *y; 360 361 switch (n) { 362 case EXIT: 363 if (a[0] != NULL) { 364 y = execute(a[0]); 365 errorflag = (int) getfval(y); 366 tempfree(y); 367 } 368 longjmp(env, 1); 369 case RETURN: 370 if (a[0] != NULL) { 371 y = execute(a[0]); 372 if ((y->tval & (STR|NUM)) == (STR|NUM)) { 373 setsval(frp->retval, getsval(y)); 374 frp->retval->fval = getfval(y); 375 frp->retval->tval |= NUM; 376 } 377 else if (y->tval & STR) 378 setsval(frp->retval, getsval(y)); 379 else if (y->tval & NUM) 380 setfval(frp->retval, getfval(y)); 381 else /* can't happen */ 382 FATAL("bad type variable %d", y->tval); 383 tempfree(y); 384 } 385 return(jret); 386 case NEXT: 387 return(jnext); 388 case NEXTFILE: 389 nextfile(); 390 return(jnextfile); 391 case BREAK: 392 return(jbreak); 393 case CONTINUE: 394 return(jcont); 395 default: /* can't happen */ 396 FATAL("illegal jump type %d", n); 397 } 398 return 0; /* not reached */ 399 } 400 401 Cell *awkgetline(Node **a, int n) /* get next line from specific input */ 402 { /* a[0] is variable, a[1] is operator, a[2] is filename */ 403 Cell *r, *x; 404 extern Cell **fldtab; 405 FILE *fp; 406 char *buf; 407 int bufsize = recsize; 408 int mode; 409 bool newflag; 410 double result; 411 412 if ((buf = (char *) malloc(bufsize)) == NULL) 413 FATAL("out of memory in getline"); 414 415 fflush(stdout); /* in case someone is waiting for a prompt */ 416 r = gettemp(); 417 if (a[1] != NULL) { /* getline < file */ 418 x = execute(a[2]); /* filename */ 419 mode = ptoi(a[1]); 420 if (mode == '|') /* input pipe */ 421 mode = LE; /* arbitrary flag */ 422 fp = openfile(mode, getsval(x), &newflag); 423 tempfree(x); 424 if (fp == NULL) 425 n = -1; 426 else 427 n = readrec(&buf, &bufsize, fp, newflag); 428 if (n <= 0) { 429 ; 430 } else if (a[0] != NULL) { /* getline var <file */ 431 x = execute(a[0]); 432 setsval(x, buf); 433 if (is_number(x->sval, & result)) { 434 x->fval = result; 435 x->tval |= NUM; 436 } 437 tempfree(x); 438 } else { /* getline <file */ 439 setsval(fldtab[0], buf); 440 if (is_number(fldtab[0]->sval, & result)) { 441 fldtab[0]->fval = result; 442 fldtab[0]->tval |= NUM; 443 } 444 } 445 } else { /* bare getline; use current input */ 446 if (a[0] == NULL) /* getline */ 447 n = getrec(&record, &recsize, true); 448 else { /* getline var */ 449 n = getrec(&buf, &bufsize, false); 450 x = execute(a[0]); 451 setsval(x, buf); 452 if (is_number(x->sval, & result)) { 453 x->fval = result; 454 x->tval |= NUM; 455 } 456 tempfree(x); 457 } 458 } 459 setfval(r, (Awkfloat) n); 460 free(buf); 461 return r; 462 } 463 464 Cell *getnf(Node **a, int n) /* get NF */ 465 { 466 if (!donefld) 467 fldbld(); 468 return (Cell *) a[0]; 469 } 470 471 static char * 472 makearraystring(Node *p, const char *func) 473 { 474 char *buf; 475 int bufsz = recsize; 476 size_t blen; 477 478 if ((buf = (char *) malloc(bufsz)) == NULL) { 479 FATAL("%s: out of memory", func); 480 } 481 482 blen = 0; 483 buf[blen] = '\0'; 484 485 for (; p; p = p->nnext) { 486 Cell *x = execute(p); /* expr */ 487 char *s = getsval(x); 488 size_t seplen = strlen(getsval(subseploc)); 489 size_t nsub = p->nnext ? seplen : 0; 490 size_t slen = strlen(s); 491 size_t tlen = blen + slen + nsub; 492 493 if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) { 494 FATAL("%s: out of memory %s[%s...]", 495 func, x->nval, buf); 496 } 497 memcpy(buf + blen, s, slen); 498 if (nsub) { 499 memcpy(buf + blen + slen, *SUBSEP, nsub); 500 } 501 buf[tlen] = '\0'; 502 blen = tlen; 503 tempfree(x); 504 } 505 return buf; 506 } 507 508 Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ 509 { 510 Cell *x, *z; 511 char *buf; 512 513 x = execute(a[0]); /* Cell* for symbol table */ 514 buf = makearraystring(a[1], __func__); 515 if (!isarr(x)) { 516 DPRINTF("making %s into an array\n", NN(x->nval)); 517 if (freeable(x)) 518 xfree(x->sval); 519 x->tval &= ~(STR|NUM|DONTFREE); 520 x->tval |= ARR; 521 x->sval = (char *) makesymtab(NSYMTAB); 522 } 523 z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval); 524 z->ctype = OCELL; 525 z->csub = CVAR; 526 tempfree(x); 527 free(buf); 528 return(z); 529 } 530 531 Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ 532 { 533 Cell *x; 534 535 x = execute(a[0]); /* Cell* for symbol table */ 536 if (x == symtabloc) { 537 FATAL("cannot delete SYMTAB or its elements"); 538 } 539 if (!isarr(x)) 540 return True; 541 if (a[1] == NULL) { /* delete the elements, not the table */ 542 freesymtab(x); 543 x->tval &= ~STR; 544 x->tval |= ARR; 545 x->sval = (char *) makesymtab(NSYMTAB); 546 } else { 547 char *buf = makearraystring(a[1], __func__); 548 freeelem(x, buf); 549 free(buf); 550 } 551 tempfree(x); 552 return True; 553 } 554 555 Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */ 556 { 557 Cell *ap, *k; 558 char *buf; 559 560 ap = execute(a[1]); /* array name */ 561 if (!isarr(ap)) { 562 DPRINTF("making %s into an array\n", ap->nval); 563 if (freeable(ap)) 564 xfree(ap->sval); 565 ap->tval &= ~(STR|NUM|DONTFREE); 566 ap->tval |= ARR; 567 ap->sval = (char *) makesymtab(NSYMTAB); 568 } 569 buf = makearraystring(a[0], __func__); 570 k = lookup(buf, (Array *) ap->sval); 571 tempfree(ap); 572 free(buf); 573 if (k == NULL) 574 return(False); 575 else 576 return(True); 577 } 578 579 580 Cell *matchop(Node **a, int n) /* ~ and match() */ 581 { 582 Cell *x, *y; 583 char *s, *t; 584 int i; 585 fa *pfa; 586 int (*mf)(fa *, const char *) = match, mode = 0; 587 588 if (n == MATCHFCN) { 589 mf = pmatch; 590 mode = 1; 591 } 592 x = execute(a[1]); /* a[1] = target text */ 593 s = getsval(x); 594 if (a[0] == NULL) /* a[1] == 0: already-compiled reg expr */ 595 i = (*mf)((fa *) a[2], s); 596 else { 597 y = execute(a[2]); /* a[2] = regular expr */ 598 t = getsval(y); 599 pfa = makedfa(t, mode); 600 i = (*mf)(pfa, s); 601 tempfree(y); 602 } 603 tempfree(x); 604 if (n == MATCHFCN) { 605 int start = patbeg - s + 1; 606 if (patlen < 0) 607 start = 0; 608 setfval(rstartloc, (Awkfloat) start); 609 setfval(rlengthloc, (Awkfloat) patlen); 610 x = gettemp(); 611 x->tval = NUM; 612 x->fval = start; 613 return x; 614 } else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0)) 615 return(True); 616 else 617 return(False); 618 } 619 620 621 Cell *boolop(Node **a, int n) /* a[0] || a[1], a[0] && a[1], !a[0] */ 622 { 623 Cell *x, *y; 624 int i; 625 626 x = execute(a[0]); 627 i = istrue(x); 628 tempfree(x); 629 switch (n) { 630 case BOR: 631 if (i) return(True); 632 y = execute(a[1]); 633 i = istrue(y); 634 tempfree(y); 635 if (i) return(True); 636 else return(False); 637 case AND: 638 if ( !i ) return(False); 639 y = execute(a[1]); 640 i = istrue(y); 641 tempfree(y); 642 if (i) return(True); 643 else return(False); 644 case NOT: 645 if (i) return(False); 646 else return(True); 647 default: /* can't happen */ 648 FATAL("unknown boolean operator %d", n); 649 } 650 return 0; /*NOTREACHED*/ 651 } 652 653 Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */ 654 { 655 int i; 656 Cell *x, *y; 657 Awkfloat j; 658 659 x = execute(a[0]); 660 y = execute(a[1]); 661 if (x->tval&NUM && y->tval&NUM) { 662 j = x->fval - y->fval; 663 i = j<0? -1: (j>0? 1: 0); 664 } else { 665 i = strcmp(getsval(x), getsval(y)); 666 } 667 tempfree(x); 668 tempfree(y); 669 switch (n) { 670 case LT: if (i<0) return(True); 671 else return(False); 672 case LE: if (i<=0) return(True); 673 else return(False); 674 case NE: if (i!=0) return(True); 675 else return(False); 676 case EQ: if (i == 0) return(True); 677 else return(False); 678 case GE: if (i>=0) return(True); 679 else return(False); 680 case GT: if (i>0) return(True); 681 else return(False); 682 default: /* can't happen */ 683 FATAL("unknown relational operator %d", n); 684 } 685 return 0; /*NOTREACHED*/ 686 } 687 688 void tfree(Cell *a) /* free a tempcell */ 689 { 690 if (freeable(a)) { 691 DPRINTF("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval); 692 xfree(a->sval); 693 } 694 if (a == tmps) 695 FATAL("tempcell list is curdled"); 696 a->cnext = tmps; 697 tmps = a; 698 } 699 700 Cell *gettemp(void) /* get a tempcell */ 701 { int i; 702 Cell *x; 703 704 if (!tmps) { 705 tmps = (Cell *) calloc(100, sizeof(*tmps)); 706 if (!tmps) 707 FATAL("out of space for temporaries"); 708 for (i = 1; i < 100; i++) 709 tmps[i-1].cnext = &tmps[i]; 710 tmps[i-1].cnext = NULL; 711 } 712 x = tmps; 713 tmps = x->cnext; 714 *x = tempcell; 715 return(x); 716 } 717 718 Cell *indirect(Node **a, int n) /* $( a[0] ) */ 719 { 720 Awkfloat val; 721 Cell *x; 722 int m; 723 char *s; 724 725 x = execute(a[0]); 726 val = getfval(x); /* freebsd: defend against super large field numbers */ 727 if ((Awkfloat)INT_MAX < val) 728 FATAL("trying to access out of range field %s", x->nval); 729 m = (int) val; 730 if (m == 0 && !is_number(s = getsval(x), NULL)) /* suspicion! */ 731 FATAL("illegal field $(%s), name \"%s\"", s, x->nval); 732 /* BUG: can x->nval ever be null??? */ 733 tempfree(x); 734 x = fieldadr(m); 735 x->ctype = OCELL; /* BUG? why are these needed? */ 736 x->csub = CFLD; 737 return(x); 738 } 739 740 Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */ 741 { 742 int k, m, n; 743 char *s; 744 int temp; 745 Cell *x, *y, *z = NULL; 746 747 x = execute(a[0]); 748 y = execute(a[1]); 749 if (a[2] != NULL) 750 z = execute(a[2]); 751 s = getsval(x); 752 k = strlen(s) + 1; 753 if (k <= 1) { 754 tempfree(x); 755 tempfree(y); 756 if (a[2] != NULL) { 757 tempfree(z); 758 } 759 x = gettemp(); 760 setsval(x, ""); 761 return(x); 762 } 763 m = (int) getfval(y); 764 if (m <= 0) 765 m = 1; 766 else if (m > k) 767 m = k; 768 tempfree(y); 769 if (a[2] != NULL) { 770 n = (int) getfval(z); 771 tempfree(z); 772 } else 773 n = k - 1; 774 if (n < 0) 775 n = 0; 776 else if (n > k - m) 777 n = k - m; 778 DPRINTF("substr: m=%d, n=%d, s=%s\n", m, n, s); 779 y = gettemp(); 780 temp = s[n+m-1]; /* with thanks to John Linderman */ 781 s[n+m-1] = '\0'; 782 setsval(y, s + m - 1); 783 s[n+m-1] = temp; 784 tempfree(x); 785 return(y); 786 } 787 788 Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */ 789 { 790 Cell *x, *y, *z; 791 char *s1, *s2, *p1, *p2, *q; 792 Awkfloat v = 0.0; 793 794 x = execute(a[0]); 795 s1 = getsval(x); 796 y = execute(a[1]); 797 s2 = getsval(y); 798 799 z = gettemp(); 800 for (p1 = s1; *p1 != '\0'; p1++) { 801 for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++) 802 continue; 803 if (*p2 == '\0') { 804 v = (Awkfloat) (p1 - s1 + 1); /* origin 1 */ 805 break; 806 } 807 } 808 tempfree(x); 809 tempfree(y); 810 setfval(z, v); 811 return(z); 812 } 813 814 #define MAXNUMSIZE 50 815 816 int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like conversions */ 817 { 818 char *fmt; 819 char *p, *t; 820 const char *os; 821 Cell *x; 822 int flag = 0, n; 823 int fmtwd; /* format width */ 824 int fmtsz = recsize; 825 char *buf = *pbuf; 826 int bufsize = *pbufsize; 827 #define FMTSZ(a) (fmtsz - ((a) - fmt)) 828 #define BUFSZ(a) (bufsize - ((a) - buf)) 829 830 static bool first = true; 831 static bool have_a_format = false; 832 833 if (first) { 834 char xbuf[100]; 835 836 snprintf(xbuf, sizeof(xbuf), "%a", 42.0); 837 have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0); 838 first = false; 839 } 840 841 os = s; 842 p = buf; 843 if ((fmt = (char *) malloc(fmtsz)) == NULL) 844 FATAL("out of memory in format()"); 845 while (*s) { 846 adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1"); 847 if (*s != '%') { 848 *p++ = *s++; 849 continue; 850 } 851 if (*(s+1) == '%') { 852 *p++ = '%'; 853 s += 2; 854 continue; 855 } 856 /* have to be real careful in case this is a huge number, eg, %100000d */ 857 fmtwd = atoi(s+1); 858 if (fmtwd < 0) 859 fmtwd = -fmtwd; 860 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2"); 861 for (t = fmt; (*t++ = *s) != '\0'; s++) { 862 if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3")) 863 FATAL("format item %.30s... ran format() out of memory", os); 864 /* Ignore size specifiers */ 865 if (strchr("hjLlqtz", *s) != NULL) { /* the ansi panoply */ 866 t--; 867 continue; 868 } 869 if (isalpha((uschar)*s)) 870 break; 871 if (*s == '$') { 872 FATAL("'$' not permitted in awk formats"); 873 } 874 if (*s == '*') { 875 if (a == NULL) { 876 FATAL("not enough args in printf(%s)", os); 877 } 878 x = execute(a); 879 a = a->nnext; 880 snprintf(t - 1, FMTSZ(t - 1), 881 "%d", fmtwd=(int) getfval(x)); 882 if (fmtwd < 0) 883 fmtwd = -fmtwd; 884 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format"); 885 t = fmt + strlen(fmt); 886 tempfree(x); 887 } 888 } 889 *t = '\0'; 890 if (fmtwd < 0) 891 fmtwd = -fmtwd; 892 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4"); 893 switch (*s) { 894 case 'a': case 'A': 895 if (have_a_format) 896 flag = *s; 897 else 898 flag = 'f'; 899 break; 900 case 'f': case 'e': case 'g': case 'E': case 'G': 901 flag = 'f'; 902 break; 903 case 'd': case 'i': case 'o': case 'x': case 'X': case 'u': 904 flag = (*s == 'd' || *s == 'i') ? 'd' : 'u'; 905 *(t-1) = 'j'; 906 *t = *s; 907 *++t = '\0'; 908 break; 909 case 's': 910 flag = 's'; 911 break; 912 case 'c': 913 flag = 'c'; 914 break; 915 case '\0': 916 FATAL("missing printf conversion specifier"); 917 break; 918 default: 919 WARNING("weird printf conversion %s", fmt); 920 flag = '?'; 921 break; 922 } 923 if (a == NULL) 924 FATAL("not enough args in printf(%s)", os); 925 x = execute(a); 926 a = a->nnext; 927 n = MAXNUMSIZE; 928 if (fmtwd > n) 929 n = fmtwd; 930 adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5"); 931 switch (flag) { 932 case '?': snprintf(p, BUFSZ(p), "%s", fmt); /* unknown, so dump it too */ 933 t = getsval(x); 934 n = strlen(t); 935 if (fmtwd > n) 936 n = fmtwd; 937 adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6"); 938 p += strlen(p); 939 snprintf(p, BUFSZ(p), "%s", t); 940 break; 941 case 'a': 942 case 'A': 943 case 'f': snprintf(p, BUFSZ(p), fmt, getfval(x)); break; 944 case 'd': snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break; 945 case 'u': snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break; 946 case 's': 947 t = getsval(x); 948 n = strlen(t); 949 if (fmtwd > n) 950 n = fmtwd; 951 if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7")) 952 FATAL("huge string/format (%d chars) in printf %.30s... ran format() out of memory", n, t); 953 snprintf(p, BUFSZ(p), fmt, t); 954 break; 955 case 'c': 956 if (isnum(x)) { 957 if ((int)getfval(x)) 958 snprintf(p, BUFSZ(p), fmt, (int) getfval(x)); 959 else { 960 *p++ = '\0'; /* explicit null byte */ 961 *p = '\0'; /* next output will start here */ 962 } 963 } else 964 snprintf(p, BUFSZ(p), fmt, getsval(x)[0]); 965 break; 966 default: 967 FATAL("can't happen: bad conversion %c in format()", flag); 968 } 969 tempfree(x); 970 p += strlen(p); 971 s++; 972 } 973 *p = '\0'; 974 free(fmt); 975 for ( ; a; a = a->nnext) /* evaluate any remaining args */ 976 execute(a); 977 *pbuf = buf; 978 *pbufsize = bufsize; 979 return p - buf; 980 } 981 982 Cell *awksprintf(Node **a, int n) /* sprintf(a[0]) */ 983 { 984 Cell *x; 985 Node *y; 986 char *buf; 987 int bufsz=3*recsize; 988 989 if ((buf = (char *) malloc(bufsz)) == NULL) 990 FATAL("out of memory in awksprintf"); 991 y = a[0]->nnext; 992 x = execute(a[0]); 993 if (format(&buf, &bufsz, getsval(x), y) == -1) 994 FATAL("sprintf string %.30s... too long. can't happen.", buf); 995 tempfree(x); 996 x = gettemp(); 997 x->sval = buf; 998 x->tval = STR; 999 return(x); 1000 } 1001 1002 Cell *awkprintf(Node **a, int n) /* printf */ 1003 { /* a[0] is list of args, starting with format string */ 1004 /* a[1] is redirection operator, a[2] is redirection file */ 1005 FILE *fp; 1006 Cell *x; 1007 Node *y; 1008 char *buf; 1009 int len; 1010 int bufsz=3*recsize; 1011 1012 if ((buf = (char *) malloc(bufsz)) == NULL) 1013 FATAL("out of memory in awkprintf"); 1014 y = a[0]->nnext; 1015 x = execute(a[0]); 1016 if ((len = format(&buf, &bufsz, getsval(x), y)) == -1) 1017 FATAL("printf string %.30s... too long. can't happen.", buf); 1018 tempfree(x); 1019 if (a[1] == NULL) { 1020 /* fputs(buf, stdout); */ 1021 fwrite(buf, len, 1, stdout); 1022 if (ferror(stdout)) 1023 FATAL("write error on stdout"); 1024 } else { 1025 fp = redirect(ptoi(a[1]), a[2]); 1026 /* fputs(buf, fp); */ 1027 fwrite(buf, len, 1, fp); 1028 fflush(fp); 1029 if (ferror(fp)) 1030 FATAL("write error on %s", filename(fp)); 1031 } 1032 free(buf); 1033 return(True); 1034 } 1035 1036 Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */ 1037 { 1038 Awkfloat i, j = 0; 1039 double v; 1040 Cell *x, *y, *z; 1041 1042 x = execute(a[0]); 1043 i = getfval(x); 1044 tempfree(x); 1045 if (n != UMINUS && n != UPLUS) { 1046 y = execute(a[1]); 1047 j = getfval(y); 1048 tempfree(y); 1049 } 1050 z = gettemp(); 1051 switch (n) { 1052 case ADD: 1053 i += j; 1054 break; 1055 case MINUS: 1056 i -= j; 1057 break; 1058 case MULT: 1059 i *= j; 1060 break; 1061 case DIVIDE: 1062 if (j == 0) 1063 FATAL("division by zero"); 1064 i /= j; 1065 break; 1066 case MOD: 1067 if (j == 0) 1068 FATAL("division by zero in mod"); 1069 modf(i/j, &v); 1070 i = i - j * v; 1071 break; 1072 case UMINUS: 1073 i = -i; 1074 break; 1075 case UPLUS: /* handled by getfval(), above */ 1076 break; 1077 case POWER: 1078 if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */ 1079 i = ipow(i, (int) j); 1080 else { 1081 errno = 0; 1082 i = errcheck(pow(i, j), "pow"); 1083 } 1084 break; 1085 default: /* can't happen */ 1086 FATAL("illegal arithmetic operator %d", n); 1087 } 1088 setfval(z, i); 1089 return(z); 1090 } 1091 1092 double ipow(double x, int n) /* x**n. ought to be done by pow, but isn't always */ 1093 { 1094 double v; 1095 1096 if (n <= 0) 1097 return 1; 1098 v = ipow(x, n/2); 1099 if (n % 2 == 0) 1100 return v * v; 1101 else 1102 return x * v * v; 1103 } 1104 1105 Cell *incrdecr(Node **a, int n) /* a[0]++, etc. */ 1106 { 1107 Cell *x, *z; 1108 int k; 1109 Awkfloat xf; 1110 1111 x = execute(a[0]); 1112 xf = getfval(x); 1113 k = (n == PREINCR || n == POSTINCR) ? 1 : -1; 1114 if (n == PREINCR || n == PREDECR) { 1115 setfval(x, xf + k); 1116 return(x); 1117 } 1118 z = gettemp(); 1119 setfval(z, xf); 1120 setfval(x, xf + k); 1121 tempfree(x); 1122 return(z); 1123 } 1124 1125 Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */ 1126 { /* this is subtle; don't muck with it. */ 1127 Cell *x, *y; 1128 Awkfloat xf, yf; 1129 double v; 1130 1131 y = execute(a[1]); 1132 x = execute(a[0]); 1133 if (n == ASSIGN) { /* ordinary assignment */ 1134 if (x == y && !(x->tval & (FLD|REC)) && x != nfloc) 1135 ; /* self-assignment: leave alone unless it's a field or NF */ 1136 else if ((y->tval & (STR|NUM)) == (STR|NUM)) { 1137 setsval(x, getsval(y)); 1138 x->fval = getfval(y); 1139 x->tval |= NUM; 1140 } 1141 else if (isstr(y)) 1142 setsval(x, getsval(y)); 1143 else if (isnum(y)) 1144 setfval(x, getfval(y)); 1145 else 1146 funnyvar(y, "read value of"); 1147 tempfree(y); 1148 return(x); 1149 } 1150 xf = getfval(x); 1151 yf = getfval(y); 1152 switch (n) { 1153 case ADDEQ: 1154 xf += yf; 1155 break; 1156 case SUBEQ: 1157 xf -= yf; 1158 break; 1159 case MULTEQ: 1160 xf *= yf; 1161 break; 1162 case DIVEQ: 1163 if (yf == 0) 1164 FATAL("division by zero in /="); 1165 xf /= yf; 1166 break; 1167 case MODEQ: 1168 if (yf == 0) 1169 FATAL("division by zero in %%="); 1170 modf(xf/yf, &v); 1171 xf = xf - yf * v; 1172 break; 1173 case POWEQ: 1174 if (yf >= 0 && modf(yf, &v) == 0.0) /* pos integer exponent */ 1175 xf = ipow(xf, (int) yf); 1176 else { 1177 errno = 0; 1178 xf = errcheck(pow(xf, yf), "pow"); 1179 } 1180 break; 1181 default: 1182 FATAL("illegal assignment operator %d", n); 1183 break; 1184 } 1185 tempfree(y); 1186 setfval(x, xf); 1187 return(x); 1188 } 1189 1190 Cell *cat(Node **a, int q) /* a[0] cat a[1] */ 1191 { 1192 Cell *x, *y, *z; 1193 int n1, n2; 1194 char *s = NULL; 1195 int ssz = 0; 1196 1197 x = execute(a[0]); 1198 n1 = strlen(getsval(x)); 1199 adjbuf(&s, &ssz, n1, recsize, 0, "cat1"); 1200 memcpy(s, x->sval, n1); 1201 1202 y = execute(a[1]); 1203 n2 = strlen(getsval(y)); 1204 adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2"); 1205 memcpy(s + n1, y->sval, n2); 1206 s[n1 + n2] = '\0'; 1207 1208 tempfree(x); 1209 tempfree(y); 1210 1211 z = gettemp(); 1212 z->sval = s; 1213 z->tval = STR; 1214 1215 return(z); 1216 } 1217 1218 Cell *pastat(Node **a, int n) /* a[0] { a[1] } */ 1219 { 1220 Cell *x; 1221 1222 if (a[0] == NULL) 1223 x = execute(a[1]); 1224 else { 1225 x = execute(a[0]); 1226 if (istrue(x)) { 1227 tempfree(x); 1228 x = execute(a[1]); 1229 } 1230 } 1231 return x; 1232 } 1233 1234 Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */ 1235 { 1236 Cell *x; 1237 int pair; 1238 1239 pair = ptoi(a[3]); 1240 if (pairstack[pair] == 0) { 1241 x = execute(a[0]); 1242 if (istrue(x)) 1243 pairstack[pair] = 1; 1244 tempfree(x); 1245 } 1246 if (pairstack[pair] == 1) { 1247 x = execute(a[1]); 1248 if (istrue(x)) 1249 pairstack[pair] = 0; 1250 tempfree(x); 1251 x = execute(a[2]); 1252 return(x); 1253 } 1254 return(False); 1255 } 1256 1257 Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ 1258 { 1259 Cell *x = NULL, *y, *ap; 1260 const char *s, *origs, *t; 1261 const char *fs = NULL; 1262 char *origfs = NULL; 1263 int sep; 1264 char temp, num[50]; 1265 int n, tempstat, arg3type; 1266 double result; 1267 1268 y = execute(a[0]); /* source string */ 1269 origs = s = strdup(getsval(y)); 1270 arg3type = ptoi(a[3]); 1271 if (a[2] == NULL) /* fs string */ 1272 fs = getsval(fsloc); 1273 else if (arg3type == STRING) { /* split(str,arr,"string") */ 1274 x = execute(a[2]); 1275 fs = origfs = strdup(getsval(x)); 1276 tempfree(x); 1277 } else if (arg3type == REGEXPR) 1278 fs = "(regexpr)"; /* split(str,arr,/regexpr/) */ 1279 else 1280 FATAL("illegal type of split"); 1281 sep = *fs; 1282 ap = execute(a[1]); /* array name */ 1283 freesymtab(ap); 1284 DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs); 1285 ap->tval &= ~STR; 1286 ap->tval |= ARR; 1287 ap->sval = (char *) makesymtab(NSYMTAB); 1288 1289 n = 0; 1290 if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) { 1291 /* split(s, a, //); have to arrange that it looks like empty sep */ 1292 arg3type = 0; 1293 fs = ""; 1294 sep = 0; 1295 } 1296 if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) { /* reg expr */ 1297 fa *pfa; 1298 if (arg3type == REGEXPR) { /* it's ready already */ 1299 pfa = (fa *) a[2]; 1300 } else { 1301 pfa = makedfa(fs, 1); 1302 } 1303 if (nematch(pfa,s)) { 1304 tempstat = pfa->initstat; 1305 pfa->initstat = 2; 1306 do { 1307 n++; 1308 snprintf(num, sizeof(num), "%d", n); 1309 temp = *patbeg; 1310 setptr(patbeg, '\0'); 1311 if (is_number(s, & result)) 1312 setsymtab(num, s, result, STR|NUM, (Array *) ap->sval); 1313 else 1314 setsymtab(num, s, 0.0, STR, (Array *) ap->sval); 1315 setptr(patbeg, temp); 1316 s = patbeg + patlen; 1317 if (*(patbeg+patlen-1) == '\0' || *s == '\0') { 1318 n++; 1319 snprintf(num, sizeof(num), "%d", n); 1320 setsymtab(num, "", 0.0, STR, (Array *) ap->sval); 1321 pfa->initstat = tempstat; 1322 goto spdone; 1323 } 1324 } while (nematch(pfa,s)); 1325 pfa->initstat = tempstat; /* bwk: has to be here to reset */ 1326 /* cf gsub and refldbld */ 1327 } 1328 n++; 1329 snprintf(num, sizeof(num), "%d", n); 1330 if (is_number(s, & result)) 1331 setsymtab(num, s, result, STR|NUM, (Array *) ap->sval); 1332 else 1333 setsymtab(num, s, 0.0, STR, (Array *) ap->sval); 1334 spdone: 1335 pfa = NULL; 1336 } else if (sep == ' ') { 1337 for (n = 0; ; ) { 1338 #define ISWS(c) ((c) == ' ' || (c) == '\t' || (c) == '\n') 1339 while (ISWS(*s)) 1340 s++; 1341 if (*s == '\0') 1342 break; 1343 n++; 1344 t = s; 1345 do 1346 s++; 1347 while (*s != '\0' && !ISWS(*s)); 1348 temp = *s; 1349 setptr(s, '\0'); 1350 snprintf(num, sizeof(num), "%d", n); 1351 if (is_number(t, & result)) 1352 setsymtab(num, t, result, STR|NUM, (Array *) ap->sval); 1353 else 1354 setsymtab(num, t, 0.0, STR, (Array *) ap->sval); 1355 setptr(s, temp); 1356 if (*s != '\0') 1357 s++; 1358 } 1359 } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */ 1360 for (n = 0; *s != '\0'; s++) { 1361 char buf[2]; 1362 n++; 1363 snprintf(num, sizeof(num), "%d", n); 1364 buf[0] = *s; 1365 buf[1] = '\0'; 1366 if (isdigit((uschar)buf[0])) 1367 setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval); 1368 else 1369 setsymtab(num, buf, 0.0, STR, (Array *) ap->sval); 1370 } 1371 } else if (*s != '\0') { 1372 for (;;) { 1373 n++; 1374 t = s; 1375 while (*s != sep && *s != '\n' && *s != '\0') 1376 s++; 1377 temp = *s; 1378 setptr(s, '\0'); 1379 snprintf(num, sizeof(num), "%d", n); 1380 if (is_number(t, & result)) 1381 setsymtab(num, t, result, STR|NUM, (Array *) ap->sval); 1382 else 1383 setsymtab(num, t, 0.0, STR, (Array *) ap->sval); 1384 setptr(s, temp); 1385 if (*s++ == '\0') 1386 break; 1387 } 1388 } 1389 tempfree(ap); 1390 tempfree(y); 1391 xfree(origs); 1392 xfree(origfs); 1393 x = gettemp(); 1394 x->tval = NUM; 1395 x->fval = n; 1396 return(x); 1397 } 1398 1399 Cell *condexpr(Node **a, int n) /* a[0] ? a[1] : a[2] */ 1400 { 1401 Cell *x; 1402 1403 x = execute(a[0]); 1404 if (istrue(x)) { 1405 tempfree(x); 1406 x = execute(a[1]); 1407 } else { 1408 tempfree(x); 1409 x = execute(a[2]); 1410 } 1411 return(x); 1412 } 1413 1414 Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */ 1415 { 1416 Cell *x; 1417 1418 x = execute(a[0]); 1419 if (istrue(x)) { 1420 tempfree(x); 1421 x = execute(a[1]); 1422 } else if (a[2] != NULL) { 1423 tempfree(x); 1424 x = execute(a[2]); 1425 } 1426 return(x); 1427 } 1428 1429 Cell *whilestat(Node **a, int n) /* while (a[0]) a[1] */ 1430 { 1431 Cell *x; 1432 1433 for (;;) { 1434 x = execute(a[0]); 1435 if (!istrue(x)) 1436 return(x); 1437 tempfree(x); 1438 x = execute(a[1]); 1439 if (isbreak(x)) { 1440 x = True; 1441 return(x); 1442 } 1443 if (isnext(x) || isexit(x) || isret(x)) 1444 return(x); 1445 tempfree(x); 1446 } 1447 } 1448 1449 Cell *dostat(Node **a, int n) /* do a[0]; while(a[1]) */ 1450 { 1451 Cell *x; 1452 1453 for (;;) { 1454 x = execute(a[0]); 1455 if (isbreak(x)) 1456 return True; 1457 if (isnext(x) || isexit(x) || isret(x)) 1458 return(x); 1459 tempfree(x); 1460 x = execute(a[1]); 1461 if (!istrue(x)) 1462 return(x); 1463 tempfree(x); 1464 } 1465 } 1466 1467 Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */ 1468 { 1469 Cell *x; 1470 1471 x = execute(a[0]); 1472 tempfree(x); 1473 for (;;) { 1474 if (a[1]!=NULL) { 1475 x = execute(a[1]); 1476 if (!istrue(x)) return(x); 1477 else tempfree(x); 1478 } 1479 x = execute(a[3]); 1480 if (isbreak(x)) /* turn off break */ 1481 return True; 1482 if (isnext(x) || isexit(x) || isret(x)) 1483 return(x); 1484 tempfree(x); 1485 x = execute(a[2]); 1486 tempfree(x); 1487 } 1488 } 1489 1490 Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */ 1491 { 1492 Cell *x, *vp, *arrayp, *cp, *ncp; 1493 Array *tp; 1494 int i; 1495 1496 vp = execute(a[0]); 1497 arrayp = execute(a[1]); 1498 if (!isarr(arrayp)) { 1499 return True; 1500 } 1501 tp = (Array *) arrayp->sval; 1502 tempfree(arrayp); 1503 for (i = 0; i < tp->size; i++) { /* this routine knows too much */ 1504 for (cp = tp->tab[i]; cp != NULL; cp = ncp) { 1505 setsval(vp, cp->nval); 1506 ncp = cp->cnext; 1507 x = execute(a[2]); 1508 if (isbreak(x)) { 1509 tempfree(vp); 1510 return True; 1511 } 1512 if (isnext(x) || isexit(x) || isret(x)) { 1513 tempfree(vp); 1514 return(x); 1515 } 1516 tempfree(x); 1517 } 1518 } 1519 return True; 1520 } 1521 1522 static char *nawk_convert(const char *s, int (*fun_c)(int), 1523 wint_t (*fun_wc)(wint_t)) 1524 { 1525 char *buf = NULL; 1526 char *pbuf = NULL; 1527 const char *ps = NULL; 1528 size_t n = 0; 1529 wchar_t wc; 1530 size_t sz = MB_CUR_MAX; 1531 1532 if (sz == 1) { 1533 buf = tostring(s); 1534 1535 for (pbuf = buf; *pbuf; pbuf++) 1536 *pbuf = fun_c((uschar)*pbuf); 1537 1538 return buf; 1539 } else { 1540 /* upper/lower character may be shorter/longer */ 1541 buf = tostringN(s, strlen(s) * sz + 1); 1542 1543 (void) mbtowc(NULL, NULL, 0); /* reset internal state */ 1544 /* 1545 * Reset internal state here too. 1546 * Assign result to avoid a compiler warning. (Casting to void 1547 * doesn't work.) 1548 * Increment said variable to avoid a different warning. 1549 */ 1550 int unused = wctomb(NULL, L'\0'); 1551 unused++; 1552 1553 ps = s; 1554 pbuf = buf; 1555 while (n = mbtowc(&wc, ps, sz), 1556 n > 0 && n != (size_t)-1 && n != (size_t)-2) 1557 { 1558 ps += n; 1559 1560 n = wctomb(pbuf, fun_wc(wc)); 1561 if (n == (size_t)-1) 1562 FATAL("illegal wide character %s", s); 1563 1564 pbuf += n; 1565 } 1566 1567 *pbuf = '\0'; 1568 1569 if (n) 1570 FATAL("illegal byte sequence %s", s); 1571 1572 return buf; 1573 } 1574 } 1575 1576 #ifdef __DJGPP__ 1577 static wint_t towupper(wint_t wc) 1578 { 1579 if (wc >= 0 && wc < 256) 1580 return toupper(wc & 0xFF); 1581 1582 return wc; 1583 } 1584 1585 static wint_t towlower(wint_t wc) 1586 { 1587 if (wc >= 0 && wc < 256) 1588 return tolower(wc & 0xFF); 1589 1590 return wc; 1591 } 1592 #endif 1593 1594 static char *nawk_toupper(const char *s) 1595 { 1596 return nawk_convert(s, toupper, towupper); 1597 } 1598 1599 static char *nawk_tolower(const char *s) 1600 { 1601 return nawk_convert(s, tolower, towlower); 1602 } 1603 1604 Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */ 1605 { 1606 Cell *x, *y; 1607 Awkfloat u; 1608 int t, sz; 1609 Awkfloat tmp; 1610 char *buf, *fmt; 1611 Node *nextarg; 1612 FILE *fp; 1613 int status = 0; 1614 time_t tv; 1615 struct tm *tm; 1616 1617 t = ptoi(a[0]); 1618 x = execute(a[1]); 1619 nextarg = a[1]->nnext; 1620 switch (t) { 1621 case FLENGTH: 1622 if (isarr(x)) 1623 u = ((Array *) x->sval)->nelem; /* GROT. should be function*/ 1624 else 1625 u = strlen(getsval(x)); 1626 break; 1627 case FLOG: 1628 errno = 0; 1629 u = errcheck(log(getfval(x)), "log"); 1630 break; 1631 case FINT: 1632 modf(getfval(x), &u); break; 1633 case FEXP: 1634 errno = 0; 1635 u = errcheck(exp(getfval(x)), "exp"); 1636 break; 1637 case FSQRT: 1638 errno = 0; 1639 u = errcheck(sqrt(getfval(x)), "sqrt"); 1640 break; 1641 case FSIN: 1642 u = sin(getfval(x)); break; 1643 case FCOS: 1644 u = cos(getfval(x)); break; 1645 case FATAN: 1646 if (nextarg == NULL) { 1647 WARNING("atan2 requires two arguments; returning 1.0"); 1648 u = 1.0; 1649 } else { 1650 y = execute(a[1]->nnext); 1651 u = atan2(getfval(x), getfval(y)); 1652 tempfree(y); 1653 nextarg = nextarg->nnext; 1654 } 1655 break; 1656 case FCOMPL: 1657 u = ~((int)getfval(x)); 1658 break; 1659 case FAND: 1660 if (nextarg == 0) { 1661 WARNING("and requires two arguments; returning 0"); 1662 u = 0; 1663 break; 1664 } 1665 y = execute(a[1]->nnext); 1666 u = ((int)getfval(x)) & ((int)getfval(y)); 1667 tempfree(y); 1668 nextarg = nextarg->nnext; 1669 break; 1670 case FFOR: 1671 if (nextarg == 0) { 1672 WARNING("or requires two arguments; returning 0"); 1673 u = 0; 1674 break; 1675 } 1676 y = execute(a[1]->nnext); 1677 u = ((int)getfval(x)) | ((int)getfval(y)); 1678 tempfree(y); 1679 nextarg = nextarg->nnext; 1680 break; 1681 case FXOR: 1682 if (nextarg == 0) { 1683 WARNING("xor requires two arguments; returning 0"); 1684 u = 0; 1685 break; 1686 } 1687 y = execute(a[1]->nnext); 1688 u = ((int)getfval(x)) ^ ((int)getfval(y)); 1689 tempfree(y); 1690 nextarg = nextarg->nnext; 1691 break; 1692 case FLSHIFT: 1693 if (nextarg == 0) { 1694 WARNING("lshift requires two arguments; returning 0"); 1695 u = 0; 1696 break; 1697 } 1698 y = execute(a[1]->nnext); 1699 u = ((int)getfval(x)) << ((int)getfval(y)); 1700 tempfree(y); 1701 nextarg = nextarg->nnext; 1702 break; 1703 case FRSHIFT: 1704 if (nextarg == 0) { 1705 WARNING("rshift requires two arguments; returning 0"); 1706 u = 0; 1707 break; 1708 } 1709 y = execute(a[1]->nnext); 1710 u = ((int)getfval(x)) >> ((int)getfval(y)); 1711 tempfree(y); 1712 nextarg = nextarg->nnext; 1713 break; 1714 case FSYSTEM: 1715 fflush(stdout); /* in case something is buffered already */ 1716 status = system(getsval(x)); 1717 u = status; 1718 if (status != -1) { 1719 if (WIFEXITED(status)) { 1720 u = WEXITSTATUS(status); 1721 } else if (WIFSIGNALED(status)) { 1722 u = WTERMSIG(status) + 256; 1723 #ifdef WCOREDUMP 1724 if (WCOREDUMP(status)) 1725 u += 256; 1726 #endif 1727 } else /* something else?!? */ 1728 u = 0; 1729 } 1730 break; 1731 case FRAND: 1732 /* random() returns numbers in [0..2^31-1] 1733 * in order to get a number in [0, 1), divide it by 2^31 1734 */ 1735 u = (Awkfloat) random() / (0x7fffffffL + 0x1UL); 1736 break; 1737 case FSRAND: 1738 if (isrec(x)) /* no argument provided */ 1739 u = time((time_t *)0); 1740 else 1741 u = getfval(x); 1742 tmp = u; 1743 srandom((unsigned long) u); 1744 u = srand_seed; 1745 srand_seed = tmp; 1746 break; 1747 case FTOUPPER: 1748 case FTOLOWER: 1749 if (t == FTOUPPER) 1750 buf = nawk_toupper(getsval(x)); 1751 else 1752 buf = nawk_tolower(getsval(x)); 1753 tempfree(x); 1754 x = gettemp(); 1755 setsval(x, buf); 1756 free(buf); 1757 return x; 1758 case FFLUSH: 1759 if (isrec(x) || strlen(getsval(x)) == 0) { 1760 flush_all(); /* fflush() or fflush("") -> all */ 1761 u = 0; 1762 } else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL) 1763 u = EOF; 1764 else 1765 u = fflush(fp); 1766 break; 1767 case FSYSTIME: 1768 u = time((time_t *) 0); 1769 break; 1770 case FSTRFTIME: 1771 /* strftime([format [,timestamp]]) */ 1772 if (nextarg) { 1773 y = execute(nextarg); 1774 nextarg = nextarg->nnext; 1775 tv = (time_t) getfval(y); 1776 tempfree(y); 1777 } else 1778 tv = time((time_t *) 0); 1779 tm = localtime(&tv); 1780 if (tm == NULL) 1781 FATAL("bad time %ld", (long)tv); 1782 1783 if (isrec(x)) { 1784 /* format argument not provided, use default */ 1785 fmt = tostring("%a %b %d %H:%M:%S %Z %Y"); 1786 } else 1787 fmt = tostring(getsval(x)); 1788 1789 sz = 32; 1790 buf = NULL; 1791 do { 1792 if ((buf = realloc(buf, (sz *= 2))) == NULL) 1793 FATAL("out of memory in strftime"); 1794 } while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0'); 1795 1796 y = gettemp(); 1797 setsval(y, buf); 1798 free(fmt); 1799 free(buf); 1800 1801 return y; 1802 default: /* can't happen */ 1803 FATAL("illegal function type %d", t); 1804 break; 1805 } 1806 tempfree(x); 1807 x = gettemp(); 1808 setfval(x, u); 1809 if (nextarg != NULL) { 1810 WARNING("warning: function has too many arguments"); 1811 for ( ; nextarg; nextarg = nextarg->nnext) 1812 execute(nextarg); 1813 } 1814 return(x); 1815 } 1816 1817 Cell *printstat(Node **a, int n) /* print a[0] */ 1818 { 1819 Node *x; 1820 Cell *y; 1821 FILE *fp; 1822 1823 if (a[1] == NULL) /* a[1] is redirection operator, a[2] is file */ 1824 fp = stdout; 1825 else 1826 fp = redirect(ptoi(a[1]), a[2]); 1827 for (x = a[0]; x != NULL; x = x->nnext) { 1828 y = execute(x); 1829 fputs(getpssval(y), fp); 1830 tempfree(y); 1831 if (x->nnext == NULL) 1832 fputs(getsval(orsloc), fp); 1833 else 1834 fputs(getsval(ofsloc), fp); 1835 } 1836 if (a[1] != NULL) 1837 fflush(fp); 1838 if (ferror(fp)) 1839 FATAL("write error on %s", filename(fp)); 1840 return(True); 1841 } 1842 1843 Cell *nullproc(Node **a, int n) 1844 { 1845 return 0; 1846 } 1847 1848 1849 FILE *redirect(int a, Node *b) /* set up all i/o redirections */ 1850 { 1851 FILE *fp; 1852 Cell *x; 1853 char *fname; 1854 1855 x = execute(b); 1856 fname = getsval(x); 1857 fp = openfile(a, fname, NULL); 1858 if (fp == NULL) 1859 FATAL("can't open file %s", fname); 1860 tempfree(x); 1861 return fp; 1862 } 1863 1864 struct files { 1865 FILE *fp; 1866 const char *fname; 1867 int mode; /* '|', 'a', 'w' => LE/LT, GT */ 1868 } *files; 1869 1870 size_t nfiles; 1871 1872 static void stdinit(void) /* in case stdin, etc., are not constants */ 1873 { 1874 nfiles = FOPEN_MAX; 1875 files = (struct files *) calloc(nfiles, sizeof(*files)); 1876 if (files == NULL) 1877 FATAL("can't allocate file memory for %zu files", nfiles); 1878 files[0].fp = stdin; 1879 files[0].fname = "/dev/stdin"; 1880 files[0].mode = LT; 1881 files[1].fp = stdout; 1882 files[1].fname = "/dev/stdout"; 1883 files[1].mode = GT; 1884 files[2].fp = stderr; 1885 files[2].fname = "/dev/stderr"; 1886 files[2].mode = GT; 1887 } 1888 1889 FILE *openfile(int a, const char *us, bool *pnewflag) 1890 { 1891 const char *s = us; 1892 size_t i; 1893 int m; 1894 FILE *fp = NULL; 1895 1896 if (*s == '\0') 1897 FATAL("null file name in print or getline"); 1898 for (i = 0; i < nfiles; i++) 1899 if (files[i].fname && strcmp(s, files[i].fname) == 0 && 1900 (a == files[i].mode || (a==APPEND && files[i].mode==GT) || 1901 a == FFLUSH)) { 1902 if (pnewflag) 1903 *pnewflag = false; 1904 return files[i].fp; 1905 } 1906 if (a == FFLUSH) /* didn't find it, so don't create it! */ 1907 return NULL; 1908 1909 for (i = 0; i < nfiles; i++) 1910 if (files[i].fp == NULL) 1911 break; 1912 if (i >= nfiles) { 1913 struct files *nf; 1914 size_t nnf = nfiles + FOPEN_MAX; 1915 nf = (struct files *) realloc(files, nnf * sizeof(*nf)); 1916 if (nf == NULL) 1917 FATAL("cannot grow files for %s and %zu files", s, nnf); 1918 memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf)); 1919 nfiles = nnf; 1920 files = nf; 1921 } 1922 fflush(stdout); /* force a semblance of order */ 1923 m = a; 1924 if (a == GT) { 1925 fp = fopen(s, "w"); 1926 } else if (a == APPEND) { 1927 fp = fopen(s, "a"); 1928 m = GT; /* so can mix > and >> */ 1929 } else if (a == '|') { /* output pipe */ 1930 fp = popen(s, "w"); 1931 } else if (a == LE) { /* input pipe */ 1932 fp = popen(s, "r"); 1933 } else if (a == LT) { /* getline <file */ 1934 fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r"); /* "-" is stdin */ 1935 } else /* can't happen */ 1936 FATAL("illegal redirection %d", a); 1937 if (fp != NULL) { 1938 files[i].fname = tostring(s); 1939 files[i].fp = fp; 1940 files[i].mode = m; 1941 if (pnewflag) 1942 *pnewflag = true; 1943 if (fp != stdin && fp != stdout && fp != stderr) 1944 (void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC); 1945 } 1946 return fp; 1947 } 1948 1949 const char *filename(FILE *fp) 1950 { 1951 size_t i; 1952 1953 for (i = 0; i < nfiles; i++) 1954 if (fp == files[i].fp) 1955 return files[i].fname; 1956 return "???"; 1957 } 1958 1959 Cell *closefile(Node **a, int n) 1960 { 1961 Cell *x; 1962 size_t i; 1963 bool stat; 1964 1965 x = execute(a[0]); 1966 getsval(x); 1967 stat = true; 1968 for (i = 0; i < nfiles; i++) { 1969 if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0) 1970 continue; 1971 if (ferror(files[i].fp)) 1972 FATAL("i/o error occurred on %s", files[i].fname); 1973 if (files[i].fp == stdin || files[i].fp == stdout || 1974 files[i].fp == stderr) 1975 stat = freopen("/dev/null", "r+", files[i].fp) == NULL; 1976 else if (files[i].mode == '|' || files[i].mode == LE) 1977 stat = pclose(files[i].fp) == -1; 1978 else 1979 stat = fclose(files[i].fp) == EOF; 1980 if (stat) 1981 FATAL("i/o error occurred closing %s", files[i].fname); 1982 if (i > 2) /* don't do /dev/std... */ 1983 xfree(files[i].fname); 1984 files[i].fname = NULL; /* watch out for ref thru this */ 1985 files[i].fp = NULL; 1986 break; 1987 } 1988 tempfree(x); 1989 x = gettemp(); 1990 setfval(x, (Awkfloat) (stat ? -1 : 0)); 1991 return(x); 1992 } 1993 1994 void closeall(void) 1995 { 1996 size_t i; 1997 bool stat = false; 1998 1999 for (i = 0; i < nfiles; i++) { 2000 if (! files[i].fp) 2001 continue; 2002 if (ferror(files[i].fp)) 2003 FATAL( "i/o error occurred on %s", files[i].fname ); 2004 if (files[i].fp == stdin) 2005 continue; 2006 if (files[i].mode == '|' || files[i].mode == LE) 2007 stat = pclose(files[i].fp) == -1; 2008 else if (files[i].fp == stdout || files[i].fp == stderr) 2009 stat = fflush(files[i].fp) == EOF; 2010 else 2011 stat = fclose(files[i].fp) == EOF; 2012 if (stat) 2013 FATAL( "i/o error occurred while closing %s", files[i].fname ); 2014 } 2015 } 2016 2017 static void flush_all(void) 2018 { 2019 size_t i; 2020 2021 for (i = 0; i < nfiles; i++) 2022 if (files[i].fp) 2023 fflush(files[i].fp); 2024 } 2025 2026 void backsub(char **pb_ptr, const char **sptr_ptr); 2027 2028 Cell *sub(Node **a, int nnn) /* substitute command */ 2029 { 2030 const char *sptr, *q; 2031 Cell *x, *y, *result; 2032 char *t, *buf, *pb; 2033 fa *pfa; 2034 int bufsz = recsize; 2035 2036 if ((buf = (char *) malloc(bufsz)) == NULL) 2037 FATAL("out of memory in sub"); 2038 x = execute(a[3]); /* target string */ 2039 t = getsval(x); 2040 if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */ 2041 pfa = (fa *) a[1]; /* regular expression */ 2042 else { 2043 y = execute(a[1]); 2044 pfa = makedfa(getsval(y), 1); 2045 tempfree(y); 2046 } 2047 y = execute(a[2]); /* replacement string */ 2048 result = False; 2049 if (pmatch(pfa, t)) { 2050 sptr = t; 2051 adjbuf(&buf, &bufsz, 1+patbeg-sptr, recsize, 0, "sub"); 2052 pb = buf; 2053 while (sptr < patbeg) 2054 *pb++ = *sptr++; 2055 sptr = getsval(y); 2056 while (*sptr != '\0') { 2057 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "sub"); 2058 if (*sptr == '\\') { 2059 backsub(&pb, &sptr); 2060 } else if (*sptr == '&') { 2061 sptr++; 2062 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "sub"); 2063 for (q = patbeg; q < patbeg+patlen; ) 2064 *pb++ = *q++; 2065 } else 2066 *pb++ = *sptr++; 2067 } 2068 *pb = '\0'; 2069 if (pb > buf + bufsz) 2070 FATAL("sub result1 %.30s too big; can't happen", buf); 2071 sptr = patbeg + patlen; 2072 if ((patlen == 0 && *patbeg) || (patlen && *(sptr-1))) { 2073 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "sub"); 2074 while ((*pb++ = *sptr++) != '\0') 2075 continue; 2076 } 2077 if (pb > buf + bufsz) 2078 FATAL("sub result2 %.30s too big; can't happen", buf); 2079 setsval(x, buf); /* BUG: should be able to avoid copy */ 2080 result = True; 2081 } 2082 tempfree(x); 2083 tempfree(y); 2084 free(buf); 2085 return result; 2086 } 2087 2088 Cell *gsub(Node **a, int nnn) /* global substitute */ 2089 { 2090 Cell *x, *y; 2091 char *rptr, *pb; 2092 const char *q, *t, *sptr; 2093 char *buf; 2094 fa *pfa; 2095 int mflag, tempstat, num; 2096 int bufsz = recsize; 2097 2098 if ((buf = (char *) malloc(bufsz)) == NULL) 2099 FATAL("out of memory in gsub"); 2100 mflag = 0; /* if mflag == 0, can replace empty string */ 2101 num = 0; 2102 x = execute(a[3]); /* target string */ 2103 t = getsval(x); 2104 if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */ 2105 pfa = (fa *) a[1]; /* regular expression */ 2106 else { 2107 y = execute(a[1]); 2108 pfa = makedfa(getsval(y), 1); 2109 tempfree(y); 2110 } 2111 y = execute(a[2]); /* replacement string */ 2112 if (pmatch(pfa, t)) { 2113 tempstat = pfa->initstat; 2114 pfa->initstat = 2; 2115 pb = buf; 2116 rptr = getsval(y); 2117 do { 2118 if (patlen == 0 && *patbeg != '\0') { /* matched empty string */ 2119 if (mflag == 0) { /* can replace empty */ 2120 num++; 2121 sptr = rptr; 2122 while (*sptr != '\0') { 2123 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub"); 2124 if (*sptr == '\\') { 2125 backsub(&pb, &sptr); 2126 } else if (*sptr == '&') { 2127 sptr++; 2128 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub"); 2129 for (q = patbeg; q < patbeg+patlen; ) 2130 *pb++ = *q++; 2131 } else 2132 *pb++ = *sptr++; 2133 } 2134 } 2135 if (*t == '\0') /* at end */ 2136 goto done; 2137 adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gsub"); 2138 *pb++ = *t++; 2139 if (pb > buf + bufsz) /* BUG: not sure of this test */ 2140 FATAL("gsub result0 %.30s too big; can't happen", buf); 2141 mflag = 0; 2142 } 2143 else { /* matched nonempty string */ 2144 num++; 2145 sptr = t; 2146 adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gsub"); 2147 while (sptr < patbeg) 2148 *pb++ = *sptr++; 2149 sptr = rptr; 2150 while (*sptr != '\0') { 2151 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub"); 2152 if (*sptr == '\\') { 2153 backsub(&pb, &sptr); 2154 } else if (*sptr == '&') { 2155 sptr++; 2156 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub"); 2157 for (q = patbeg; q < patbeg+patlen; ) 2158 *pb++ = *q++; 2159 } else 2160 *pb++ = *sptr++; 2161 } 2162 t = patbeg + patlen; 2163 if (patlen == 0 || *t == '\0' || *(t-1) == '\0') 2164 goto done; 2165 if (pb > buf + bufsz) 2166 FATAL("gsub result1 %.30s too big; can't happen", buf); 2167 mflag = 1; 2168 } 2169 } while (pmatch(pfa,t)); 2170 sptr = t; 2171 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gsub"); 2172 while ((*pb++ = *sptr++) != '\0') 2173 continue; 2174 done: if (pb < buf + bufsz) 2175 *pb = '\0'; 2176 else if (*(pb-1) != '\0') 2177 FATAL("gsub result2 %.30s truncated; can't happen", buf); 2178 setsval(x, buf); /* BUG: should be able to avoid copy + free */ 2179 pfa->initstat = tempstat; 2180 } 2181 tempfree(x); 2182 tempfree(y); 2183 x = gettemp(); 2184 x->tval = NUM; 2185 x->fval = num; 2186 free(buf); 2187 return(x); 2188 } 2189 2190 Cell *gensub(Node **a, int nnn) /* global selective substitute */ 2191 /* XXX incomplete - doesn't support backreferences \0 ... \9 */ 2192 { 2193 Cell *x, *y, *res, *h; 2194 char *rptr; 2195 const char *sptr; 2196 char *buf, *pb; 2197 const char *t, *q; 2198 fa *pfa; 2199 int mflag, tempstat, num, whichm; 2200 int bufsz = recsize; 2201 2202 if ((buf = malloc(bufsz)) == NULL) 2203 FATAL("out of memory in gensub"); 2204 mflag = 0; /* if mflag == 0, can replace empty string */ 2205 num = 0; 2206 x = execute(a[4]); /* source string */ 2207 t = getsval(x); 2208 res = copycell(x); /* target string - initially copy of source */ 2209 res->csub = CTEMP; /* result values are temporary */ 2210 if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */ 2211 pfa = (fa *) a[1]; /* regular expression */ 2212 else { 2213 y = execute(a[1]); 2214 pfa = makedfa(getsval(y), 1); 2215 tempfree(y); 2216 } 2217 y = execute(a[2]); /* replacement string */ 2218 h = execute(a[3]); /* which matches should be replaced */ 2219 sptr = getsval(h); 2220 if (sptr[0] == 'g' || sptr[0] == 'G') 2221 whichm = -1; 2222 else { 2223 /* 2224 * The specified number is index of replacement, starting 2225 * from 1. GNU awk treats index lower than 0 same as 2226 * 1, we do same for compatibility. 2227 */ 2228 whichm = (int) getfval(h) - 1; 2229 if (whichm < 0) 2230 whichm = 0; 2231 } 2232 tempfree(h); 2233 2234 if (pmatch(pfa, t)) { 2235 char *sl; 2236 2237 tempstat = pfa->initstat; 2238 pfa->initstat = 2; 2239 pb = buf; 2240 rptr = getsval(y); 2241 /* 2242 * XXX if there are any backreferences in subst string, 2243 * complain now. 2244 */ 2245 for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) { 2246 if (strchr("0123456789", sl[1])) { 2247 FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr); 2248 } 2249 } 2250 2251 do { 2252 if (whichm >= 0 && whichm != num) { 2253 num++; 2254 adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub"); 2255 2256 /* copy the part of string up to and including 2257 * match to output buffer */ 2258 while (t < patbeg + patlen) 2259 *pb++ = *t++; 2260 continue; 2261 } 2262 2263 if (patlen == 0 && *patbeg != 0) { /* matched empty string */ 2264 if (mflag == 0) { /* can replace empty */ 2265 num++; 2266 sptr = rptr; 2267 while (*sptr != 0) { 2268 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); 2269 if (*sptr == '\\') { 2270 backsub(&pb, &sptr); 2271 } else if (*sptr == '&') { 2272 sptr++; 2273 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); 2274 for (q = patbeg; q < patbeg+patlen; ) 2275 *pb++ = *q++; 2276 } else 2277 *pb++ = *sptr++; 2278 } 2279 } 2280 if (*t == 0) /* at end */ 2281 goto done; 2282 adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub"); 2283 *pb++ = *t++; 2284 if (pb > buf + bufsz) /* BUG: not sure of this test */ 2285 FATAL("gensub result0 %.30s too big; can't happen", buf); 2286 mflag = 0; 2287 } 2288 else { /* matched nonempty string */ 2289 num++; 2290 sptr = t; 2291 adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub"); 2292 while (sptr < patbeg) 2293 *pb++ = *sptr++; 2294 sptr = rptr; 2295 while (*sptr != 0) { 2296 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); 2297 if (*sptr == '\\') { 2298 backsub(&pb, &sptr); 2299 } else if (*sptr == '&') { 2300 sptr++; 2301 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); 2302 for (q = patbeg; q < patbeg+patlen; ) 2303 *pb++ = *q++; 2304 } else 2305 *pb++ = *sptr++; 2306 } 2307 t = patbeg + patlen; 2308 if (patlen == 0 || *t == 0 || *(t-1) == 0) 2309 goto done; 2310 if (pb > buf + bufsz) 2311 FATAL("gensub result1 %.30s too big; can't happen", buf); 2312 mflag = 1; 2313 } 2314 } while (pmatch(pfa,t)); 2315 sptr = t; 2316 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub"); 2317 while ((*pb++ = *sptr++) != 0) 2318 ; 2319 done: if (pb > buf + bufsz) 2320 FATAL("gensub result2 %.30s too big; can't happen", buf); 2321 *pb = '\0'; 2322 setsval(res, buf); 2323 pfa->initstat = tempstat; 2324 } 2325 tempfree(x); 2326 tempfree(y); 2327 free(buf); 2328 return(res); 2329 } 2330 2331 void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */ 2332 { /* sptr[0] == '\\' */ 2333 char *pb = *pb_ptr; 2334 const char *sptr = *sptr_ptr; 2335 static bool first = true; 2336 static bool do_posix = false; 2337 2338 if (first) { 2339 first = false; 2340 do_posix = (getenv("POSIXLY_CORRECT") != NULL); 2341 } 2342 2343 if (sptr[1] == '\\') { 2344 if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */ 2345 *pb++ = '\\'; 2346 *pb++ = '&'; 2347 sptr += 4; 2348 } else if (sptr[2] == '&') { /* \\& -> \ + matched */ 2349 *pb++ = '\\'; 2350 sptr += 2; 2351 } else if (do_posix) { /* \\x -> \x */ 2352 sptr++; 2353 *pb++ = *sptr++; 2354 } else { /* \\x -> \\x */ 2355 *pb++ = *sptr++; 2356 *pb++ = *sptr++; 2357 } 2358 } else if (sptr[1] == '&') { /* literal & */ 2359 sptr++; 2360 *pb++ = *sptr++; 2361 } else /* literal \ */ 2362 *pb++ = *sptr++; 2363 2364 *pb_ptr = pb; 2365 *sptr_ptr = sptr; 2366 } 2367