1 /**************************************************************** 2 Copyright (C) Lucent Technologies 1997 3 All Rights Reserved 4 5 Permission to use, copy, modify, and distribute this software and 6 its documentation for any purpose and without fee is hereby 7 granted, provided that the above copyright notice appear in all 8 copies and that both that the copyright notice and this 9 permission notice and warranty disclaimer appear in supporting 10 documentation, and that the name Lucent Technologies or any of 11 its entities not be used in advertising or publicity pertaining 12 to distribution of the software without specific, written prior 13 permission. 14 15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 22 THIS SOFTWARE. 23 ****************************************************************/ 24 25 #define DEBUG 26 #include <stdio.h> 27 #include <ctype.h> 28 #include <errno.h> 29 #include <wchar.h> 30 #include <wctype.h> 31 #include <fcntl.h> 32 #include <setjmp.h> 33 #include <limits.h> 34 #include <math.h> 35 #include <string.h> 36 #include <stdlib.h> 37 #include <time.h> 38 #include <sys/types.h> 39 #include <sys/wait.h> 40 #include "awk.h" 41 #include "awkgram.tab.h" 42 43 static void stdinit(void); 44 static void flush_all(void); 45 46 #if 1 47 #define tempfree(x) do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0) 48 #else 49 void tempfree(Cell *p) { 50 if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) { 51 WARNING("bad csub %d in Cell %d %s", 52 p->csub, p->ctype, p->sval); 53 } 54 if (istemp(p)) 55 tfree(p); 56 } 57 #endif 58 59 /* do we really need these? */ 60 /* #ifdef _NFILE */ 61 /* #ifndef FOPEN_MAX */ 62 /* #define FOPEN_MAX _NFILE */ 63 /* #endif */ 64 /* #endif */ 65 /* */ 66 /* #ifndef FOPEN_MAX */ 67 /* #define FOPEN_MAX 40 */ /* max number of open files */ 68 /* #endif */ 69 /* */ 70 /* #ifndef RAND_MAX */ 71 /* #define RAND_MAX 32767 */ /* all that ansi guarantees */ 72 /* #endif */ 73 74 jmp_buf env; 75 extern int pairstack[]; 76 extern Awkfloat srand_seed; 77 78 Node *winner = NULL; /* root of parse tree */ 79 Cell *tmps; /* free temporary cells for execution */ 80 81 static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL, NULL }; 82 Cell *True = &truecell; 83 static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL, NULL }; 84 Cell *False = &falsecell; 85 static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL, NULL }; 86 Cell *jbreak = &breakcell; 87 static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL, NULL }; 88 Cell *jcont = &contcell; 89 static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL, NULL }; 90 Cell *jnext = &nextcell; 91 static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL, NULL }; 92 Cell *jnextfile = &nextfilecell; 93 static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL, NULL }; 94 Cell *jexit = &exitcell; 95 static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL, NULL }; 96 Cell *jret = &retcell; 97 static Cell tempcell ={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL }; 98 99 Node *curnode = NULL; /* the node being executed, for debugging */ 100 101 /* buffer memory management */ 102 int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr, 103 const char *whatrtn) 104 /* pbuf: address of pointer to buffer being managed 105 * psiz: address of buffer size variable 106 * minlen: minimum length of buffer needed 107 * quantum: buffer size quantum 108 * pbptr: address of movable pointer into buffer, or 0 if none 109 * whatrtn: name of the calling routine if failure should cause fatal error 110 * 111 * return 0 for realloc failure, !=0 for success 112 */ 113 { 114 if (minlen > *psiz) { 115 char *tbuf; 116 int rminlen = quantum ? minlen % quantum : 0; 117 int boff = pbptr ? *pbptr - *pbuf : 0; 118 /* round up to next multiple of quantum */ 119 if (rminlen) 120 minlen += quantum - rminlen; 121 tbuf = (char *) realloc(*pbuf, minlen); 122 DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void*)*pbuf, (void*)tbuf); 123 if (tbuf == NULL) { 124 if (whatrtn) 125 FATAL("out of memory in %s", whatrtn); 126 return 0; 127 } 128 *pbuf = tbuf; 129 *psiz = minlen; 130 if (pbptr) 131 *pbptr = tbuf + boff; 132 } 133 return 1; 134 } 135 136 void run(Node *a) /* execution of parse tree starts here */ 137 { 138 139 stdinit(); 140 execute(a); 141 closeall(); 142 } 143 144 Cell *execute(Node *u) /* execute a node of the parse tree */ 145 { 146 Cell *(*proc)(Node **, int); 147 Cell *x; 148 Node *a; 149 150 if (u == NULL) 151 return(True); 152 for (a = u; ; a = a->nnext) { 153 curnode = a; 154 if (isvalue(a)) { 155 x = (Cell *) (a->narg[0]); 156 if (isfld(x) && !donefld) 157 fldbld(); 158 else if (isrec(x) && !donerec) 159 recbld(); 160 return(x); 161 } 162 if (notlegal(a->nobj)) /* probably a Cell* but too risky to print */ 163 FATAL("illegal statement"); 164 proc = proctab[a->nobj-FIRSTTOKEN]; 165 x = (*proc)(a->narg, a->nobj); 166 if (isfld(x) && !donefld) 167 fldbld(); 168 else if (isrec(x) && !donerec) 169 recbld(); 170 if (isexpr(a)) 171 return(x); 172 if (isjump(x)) 173 return(x); 174 if (a->nnext == NULL) 175 return(x); 176 tempfree(x); 177 } 178 } 179 180 181 Cell *program(Node **a, int n) /* execute an awk program */ 182 { /* a[0] = BEGIN, a[1] = body, a[2] = END */ 183 Cell *x; 184 185 if (setjmp(env) != 0) 186 goto ex; 187 if (a[0]) { /* BEGIN */ 188 x = execute(a[0]); 189 if (isexit(x)) 190 return(True); 191 if (isjump(x)) 192 FATAL("illegal break, continue, next or nextfile from BEGIN"); 193 tempfree(x); 194 } 195 if (a[1] || a[2]) 196 while (getrec(&record, &recsize, true) > 0) { 197 x = execute(a[1]); 198 if (isexit(x)) 199 break; 200 tempfree(x); 201 } 202 ex: 203 if (setjmp(env) != 0) /* handles exit within END */ 204 goto ex1; 205 if (a[2]) { /* END */ 206 x = execute(a[2]); 207 if (isbreak(x) || isnext(x) || iscont(x)) 208 FATAL("illegal break, continue, next or nextfile from END"); 209 tempfree(x); 210 } 211 ex1: 212 return(True); 213 } 214 215 struct Frame { /* stack frame for awk function calls */ 216 int nargs; /* number of arguments in this call */ 217 Cell *fcncell; /* pointer to Cell for function */ 218 Cell **args; /* pointer to array of arguments after execute */ 219 Cell *retval; /* return value */ 220 }; 221 222 #define NARGS 50 /* max args in a call */ 223 224 struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */ 225 int nframe = 0; /* number of frames allocated */ 226 struct Frame *frp = NULL; /* frame pointer. bottom level unused */ 227 228 Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ 229 { 230 static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL }; 231 int i, ncall, ndef; 232 int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */ 233 Node *x; 234 Cell *args[NARGS], *oargs[NARGS]; /* BUG: fixed size arrays */ 235 Cell *y, *z, *fcn; 236 char *s; 237 238 fcn = execute(a[0]); /* the function itself */ 239 s = fcn->nval; 240 if (!isfcn(fcn)) 241 FATAL("calling undefined function %s", s); 242 if (frame == NULL) { 243 frp = frame = (struct Frame *) calloc(nframe += 100, sizeof(*frame)); 244 if (frame == NULL) 245 FATAL("out of space for stack frames calling %s", s); 246 } 247 for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */ 248 ncall++; 249 ndef = (int) fcn->fval; /* args in defn */ 250 DPRINTF("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame)); 251 if (ncall > ndef) 252 WARNING("function %s called with %d args, uses only %d", 253 s, ncall, ndef); 254 if (ncall + ndef > NARGS) 255 FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS); 256 for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */ 257 DPRINTF("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame)); 258 y = execute(x); 259 oargs[i] = y; 260 DPRINTF("args[%d]: %s %f <%s>, t=%o\n", 261 i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval); 262 if (isfcn(y)) 263 FATAL("can't use function %s as argument in %s", y->nval, s); 264 if (isarr(y)) 265 args[i] = y; /* arrays by ref */ 266 else 267 args[i] = copycell(y); 268 tempfree(y); 269 } 270 for ( ; i < ndef; i++) { /* add null args for ones not provided */ 271 args[i] = gettemp(); 272 *args[i] = newcopycell; 273 } 274 frp++; /* now ok to up frame */ 275 if (frp >= frame + nframe) { 276 int dfp = frp - frame; /* old index */ 277 frame = (struct Frame *) realloc(frame, (nframe += 100) * sizeof(*frame)); 278 if (frame == NULL) 279 FATAL("out of space for stack frames in %s", s); 280 frp = frame + dfp; 281 } 282 frp->fcncell = fcn; 283 frp->args = args; 284 frp->nargs = ndef; /* number defined with (excess are locals) */ 285 frp->retval = gettemp(); 286 287 DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame)); 288 y = execute((Node *)(fcn->sval)); /* execute body */ 289 DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame)); 290 291 for (i = 0; i < ndef; i++) { 292 Cell *t = frp->args[i]; 293 if (isarr(t)) { 294 if (t->csub == CCOPY) { 295 if (i >= ncall) { 296 freesymtab(t); 297 t->csub = CTEMP; 298 tempfree(t); 299 } else { 300 oargs[i]->tval = t->tval; 301 oargs[i]->tval &= ~(STR|NUM|DONTFREE); 302 oargs[i]->sval = t->sval; 303 tempfree(t); 304 } 305 } 306 } else if (t != y) { /* kludge to prevent freeing twice */ 307 t->csub = CTEMP; 308 tempfree(t); 309 } else if (t == y && t->csub == CCOPY) { 310 t->csub = CTEMP; 311 tempfree(t); 312 freed = 1; 313 } 314 } 315 tempfree(fcn); 316 if (isexit(y) || isnext(y)) 317 return y; 318 if (freed == 0) { 319 tempfree(y); /* don't free twice! */ 320 } 321 z = frp->retval; /* return value */ 322 DPRINTF("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval); 323 frp--; 324 return(z); 325 } 326 327 Cell *copycell(Cell *x) /* make a copy of a cell in a temp */ 328 { 329 Cell *y; 330 331 /* copy is not constant or field */ 332 333 y = gettemp(); 334 y->tval = x->tval & ~(CON|FLD|REC); 335 y->csub = CCOPY; /* prevents freeing until call is over */ 336 y->nval = x->nval; /* BUG? */ 337 if (isstr(x) /* || x->ctype == OCELL */) { 338 y->sval = tostring(x->sval); 339 y->tval &= ~DONTFREE; 340 } else 341 y->tval |= DONTFREE; 342 y->fval = x->fval; 343 return y; 344 } 345 346 Cell *arg(Node **a, int n) /* nth argument of a function */ 347 { 348 349 n = ptoi(a[0]); /* argument number, counting from 0 */ 350 DPRINTF("arg(%d), frp->nargs=%d\n", n, frp->nargs); 351 if (n+1 > frp->nargs) 352 FATAL("argument #%d of function %s was not supplied", 353 n+1, frp->fcncell->nval); 354 return frp->args[n]; 355 } 356 357 Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */ 358 { 359 Cell *y; 360 361 switch (n) { 362 case EXIT: 363 if (a[0] != NULL) { 364 y = execute(a[0]); 365 errorflag = (int) getfval(y); 366 tempfree(y); 367 } 368 longjmp(env, 1); 369 case RETURN: 370 if (a[0] != NULL) { 371 y = execute(a[0]); 372 if ((y->tval & (STR|NUM)) == (STR|NUM)) { 373 setsval(frp->retval, getsval(y)); 374 frp->retval->fval = getfval(y); 375 frp->retval->tval |= NUM; 376 } 377 else if (y->tval & STR) 378 setsval(frp->retval, getsval(y)); 379 else if (y->tval & NUM) 380 setfval(frp->retval, getfval(y)); 381 else /* can't happen */ 382 FATAL("bad type variable %d", y->tval); 383 tempfree(y); 384 } 385 return(jret); 386 case NEXT: 387 return(jnext); 388 case NEXTFILE: 389 nextfile(); 390 return(jnextfile); 391 case BREAK: 392 return(jbreak); 393 case CONTINUE: 394 return(jcont); 395 default: /* can't happen */ 396 FATAL("illegal jump type %d", n); 397 } 398 return 0; /* not reached */ 399 } 400 401 Cell *awkgetline(Node **a, int n) /* get next line from specific input */ 402 { /* a[0] is variable, a[1] is operator, a[2] is filename */ 403 Cell *r, *x; 404 extern Cell **fldtab; 405 FILE *fp; 406 char *buf; 407 int bufsize = recsize; 408 int mode; 409 bool newflag; 410 double result; 411 412 if ((buf = (char *) malloc(bufsize)) == NULL) 413 FATAL("out of memory in getline"); 414 415 fflush(stdout); /* in case someone is waiting for a prompt */ 416 r = gettemp(); 417 if (a[1] != NULL) { /* getline < file */ 418 x = execute(a[2]); /* filename */ 419 mode = ptoi(a[1]); 420 if (mode == '|') /* input pipe */ 421 mode = LE; /* arbitrary flag */ 422 fp = openfile(mode, getsval(x), &newflag); 423 tempfree(x); 424 if (fp == NULL) 425 n = -1; 426 else 427 n = readrec(&buf, &bufsize, fp, newflag); 428 if (n <= 0) { 429 ; 430 } else if (a[0] != NULL) { /* getline var <file */ 431 x = execute(a[0]); 432 setsval(x, buf); 433 if (is_number(x->sval, & result)) { 434 x->fval = result; 435 x->tval |= NUM; 436 } 437 tempfree(x); 438 } else { /* getline <file */ 439 setsval(fldtab[0], buf); 440 if (is_number(fldtab[0]->sval, & result)) { 441 fldtab[0]->fval = result; 442 fldtab[0]->tval |= NUM; 443 } 444 } 445 } else { /* bare getline; use current input */ 446 if (a[0] == NULL) /* getline */ 447 n = getrec(&record, &recsize, true); 448 else { /* getline var */ 449 n = getrec(&buf, &bufsize, false); 450 x = execute(a[0]); 451 setsval(x, buf); 452 if (is_number(x->sval, & result)) { 453 x->fval = result; 454 x->tval |= NUM; 455 } 456 tempfree(x); 457 } 458 } 459 setfval(r, (Awkfloat) n); 460 free(buf); 461 return r; 462 } 463 464 Cell *getnf(Node **a, int n) /* get NF */ 465 { 466 if (!donefld) 467 fldbld(); 468 return (Cell *) a[0]; 469 } 470 471 static char * 472 makearraystring(Node *p, const char *func) 473 { 474 char *buf; 475 int bufsz = recsize; 476 size_t blen; 477 478 if ((buf = (char *) malloc(bufsz)) == NULL) { 479 FATAL("%s: out of memory", func); 480 } 481 482 blen = 0; 483 buf[blen] = '\0'; 484 485 for (; p; p = p->nnext) { 486 Cell *x = execute(p); /* expr */ 487 char *s = getsval(x); 488 size_t seplen = strlen(getsval(subseploc)); 489 size_t nsub = p->nnext ? seplen : 0; 490 size_t slen = strlen(s); 491 size_t tlen = blen + slen + nsub; 492 493 if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) { 494 FATAL("%s: out of memory %s[%s...]", 495 func, x->nval, buf); 496 } 497 memcpy(buf + blen, s, slen); 498 if (nsub) { 499 memcpy(buf + blen + slen, *SUBSEP, nsub); 500 } 501 buf[tlen] = '\0'; 502 blen = tlen; 503 tempfree(x); 504 } 505 return buf; 506 } 507 508 Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ 509 { 510 Cell *x, *z; 511 char *buf; 512 513 x = execute(a[0]); /* Cell* for symbol table */ 514 buf = makearraystring(a[1], __func__); 515 if (!isarr(x)) { 516 DPRINTF("making %s into an array\n", NN(x->nval)); 517 if (freeable(x)) 518 xfree(x->sval); 519 x->tval &= ~(STR|NUM|DONTFREE); 520 x->tval |= ARR; 521 x->sval = (char *) makesymtab(NSYMTAB); 522 } 523 z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval); 524 z->ctype = OCELL; 525 z->csub = CVAR; 526 tempfree(x); 527 free(buf); 528 return(z); 529 } 530 531 Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ 532 { 533 Cell *x; 534 535 x = execute(a[0]); /* Cell* for symbol table */ 536 if (x == symtabloc) { 537 FATAL("cannot delete SYMTAB or its elements"); 538 } 539 if (!isarr(x)) 540 return True; 541 if (a[1] == NULL) { /* delete the elements, not the table */ 542 freesymtab(x); 543 x->tval &= ~STR; 544 x->tval |= ARR; 545 x->sval = (char *) makesymtab(NSYMTAB); 546 } else { 547 char *buf = makearraystring(a[1], __func__); 548 freeelem(x, buf); 549 free(buf); 550 } 551 tempfree(x); 552 return True; 553 } 554 555 Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */ 556 { 557 Cell *ap, *k; 558 char *buf; 559 560 ap = execute(a[1]); /* array name */ 561 if (!isarr(ap)) { 562 DPRINTF("making %s into an array\n", ap->nval); 563 if (freeable(ap)) 564 xfree(ap->sval); 565 ap->tval &= ~(STR|NUM|DONTFREE); 566 ap->tval |= ARR; 567 ap->sval = (char *) makesymtab(NSYMTAB); 568 } 569 buf = makearraystring(a[0], __func__); 570 k = lookup(buf, (Array *) ap->sval); 571 tempfree(ap); 572 free(buf); 573 if (k == NULL) 574 return(False); 575 else 576 return(True); 577 } 578 579 580 Cell *matchop(Node **a, int n) /* ~ and match() */ 581 { 582 Cell *x, *y; 583 char *s, *t; 584 int i; 585 fa *pfa; 586 int (*mf)(fa *, const char *) = match, mode = 0; 587 588 if (n == MATCHFCN) { 589 mf = pmatch; 590 mode = 1; 591 } 592 x = execute(a[1]); /* a[1] = target text */ 593 s = getsval(x); 594 if (a[0] == NULL) /* a[1] == 0: already-compiled reg expr */ 595 i = (*mf)((fa *) a[2], s); 596 else { 597 y = execute(a[2]); /* a[2] = regular expr */ 598 t = getsval(y); 599 pfa = makedfa(t, mode); 600 i = (*mf)(pfa, s); 601 tempfree(y); 602 } 603 tempfree(x); 604 if (n == MATCHFCN) { 605 int start = patbeg - s + 1; 606 if (patlen < 0) 607 start = 0; 608 setfval(rstartloc, (Awkfloat) start); 609 setfval(rlengthloc, (Awkfloat) patlen); 610 x = gettemp(); 611 x->tval = NUM; 612 x->fval = start; 613 return x; 614 } else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0)) 615 return(True); 616 else 617 return(False); 618 } 619 620 621 Cell *boolop(Node **a, int n) /* a[0] || a[1], a[0] && a[1], !a[0] */ 622 { 623 Cell *x, *y; 624 int i; 625 626 x = execute(a[0]); 627 i = istrue(x); 628 tempfree(x); 629 switch (n) { 630 case BOR: 631 if (i) return(True); 632 y = execute(a[1]); 633 i = istrue(y); 634 tempfree(y); 635 if (i) return(True); 636 else return(False); 637 case AND: 638 if ( !i ) return(False); 639 y = execute(a[1]); 640 i = istrue(y); 641 tempfree(y); 642 if (i) return(True); 643 else return(False); 644 case NOT: 645 if (i) return(False); 646 else return(True); 647 default: /* can't happen */ 648 FATAL("unknown boolean operator %d", n); 649 } 650 return 0; /*NOTREACHED*/ 651 } 652 653 Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */ 654 { 655 int i; 656 Cell *x, *y; 657 Awkfloat j; 658 659 x = execute(a[0]); 660 y = execute(a[1]); 661 if (x->tval&NUM && y->tval&NUM) { 662 j = x->fval - y->fval; 663 i = j<0? -1: (j>0? 1: 0); 664 } else { 665 i = strcmp(getsval(x), getsval(y)); 666 } 667 tempfree(x); 668 tempfree(y); 669 switch (n) { 670 case LT: if (i<0) return(True); 671 else return(False); 672 case LE: if (i<=0) return(True); 673 else return(False); 674 case NE: if (i!=0) return(True); 675 else return(False); 676 case EQ: if (i == 0) return(True); 677 else return(False); 678 case GE: if (i>=0) return(True); 679 else return(False); 680 case GT: if (i>0) return(True); 681 else return(False); 682 default: /* can't happen */ 683 FATAL("unknown relational operator %d", n); 684 } 685 return 0; /*NOTREACHED*/ 686 } 687 688 void tfree(Cell *a) /* free a tempcell */ 689 { 690 if (freeable(a)) { 691 DPRINTF("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval); 692 xfree(a->sval); 693 } 694 if (a == tmps) 695 FATAL("tempcell list is curdled"); 696 a->cnext = tmps; 697 tmps = a; 698 } 699 700 Cell *gettemp(void) /* get a tempcell */ 701 { int i; 702 Cell *x; 703 704 if (!tmps) { 705 tmps = (Cell *) calloc(100, sizeof(*tmps)); 706 if (!tmps) 707 FATAL("out of space for temporaries"); 708 for (i = 1; i < 100; i++) 709 tmps[i-1].cnext = &tmps[i]; 710 tmps[i-1].cnext = NULL; 711 } 712 x = tmps; 713 tmps = x->cnext; 714 *x = tempcell; 715 return(x); 716 } 717 718 Cell *indirect(Node **a, int n) /* $( a[0] ) */ 719 { 720 Awkfloat val; 721 Cell *x; 722 int m; 723 char *s; 724 725 x = execute(a[0]); 726 val = getfval(x); /* freebsd: defend against super large field numbers */ 727 if ((Awkfloat)INT_MAX < val) 728 FATAL("trying to access out of range field %s", x->nval); 729 m = (int) val; 730 if (m == 0 && !is_number(s = getsval(x), NULL)) /* suspicion! */ 731 FATAL("illegal field $(%s), name \"%s\"", s, x->nval); 732 /* BUG: can x->nval ever be null??? */ 733 tempfree(x); 734 x = fieldadr(m); 735 x->ctype = OCELL; /* BUG? why are these needed? */ 736 x->csub = CFLD; 737 return(x); 738 } 739 740 Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */ 741 { 742 int k, m, n; 743 char *s; 744 int temp; 745 Cell *x, *y, *z = NULL; 746 747 x = execute(a[0]); 748 y = execute(a[1]); 749 if (a[2] != NULL) 750 z = execute(a[2]); 751 s = getsval(x); 752 k = strlen(s) + 1; 753 if (k <= 1) { 754 tempfree(x); 755 tempfree(y); 756 if (a[2] != NULL) { 757 tempfree(z); 758 } 759 x = gettemp(); 760 setsval(x, ""); 761 return(x); 762 } 763 m = (int) getfval(y); 764 if (m <= 0) 765 m = 1; 766 else if (m > k) 767 m = k; 768 tempfree(y); 769 if (a[2] != NULL) { 770 n = (int) getfval(z); 771 tempfree(z); 772 } else 773 n = k - 1; 774 if (n < 0) 775 n = 0; 776 else if (n > k - m) 777 n = k - m; 778 DPRINTF("substr: m=%d, n=%d, s=%s\n", m, n, s); 779 y = gettemp(); 780 temp = s[n+m-1]; /* with thanks to John Linderman */ 781 s[n+m-1] = '\0'; 782 setsval(y, s + m - 1); 783 s[n+m-1] = temp; 784 tempfree(x); 785 return(y); 786 } 787 788 Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */ 789 { 790 Cell *x, *y, *z; 791 char *s1, *s2, *p1, *p2, *q; 792 Awkfloat v = 0.0; 793 794 x = execute(a[0]); 795 s1 = getsval(x); 796 y = execute(a[1]); 797 s2 = getsval(y); 798 799 z = gettemp(); 800 for (p1 = s1; *p1 != '\0'; p1++) { 801 for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++) 802 continue; 803 if (*p2 == '\0') { 804 v = (Awkfloat) (p1 - s1 + 1); /* origin 1 */ 805 break; 806 } 807 } 808 tempfree(x); 809 tempfree(y); 810 setfval(z, v); 811 return(z); 812 } 813 814 #define MAXNUMSIZE 50 815 816 int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like conversions */ 817 { 818 char *fmt; 819 char *p, *t; 820 const char *os; 821 Cell *x; 822 int flag = 0, n; 823 int fmtwd; /* format width */ 824 int fmtsz = recsize; 825 char *buf = *pbuf; 826 int bufsize = *pbufsize; 827 #define FMTSZ(a) (fmtsz - ((a) - fmt)) 828 #define BUFSZ(a) (bufsize - ((a) - buf)) 829 830 static bool first = true; 831 static bool have_a_format = false; 832 833 if (first) { 834 char xbuf[100]; 835 836 snprintf(xbuf, sizeof(xbuf), "%a", 42.0); 837 have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0); 838 first = false; 839 } 840 841 os = s; 842 p = buf; 843 if ((fmt = (char *) malloc(fmtsz)) == NULL) 844 FATAL("out of memory in format()"); 845 while (*s) { 846 adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1"); 847 if (*s != '%') { 848 *p++ = *s++; 849 continue; 850 } 851 if (*(s+1) == '%') { 852 *p++ = '%'; 853 s += 2; 854 continue; 855 } 856 /* have to be real careful in case this is a huge number, eg, %100000d */ 857 fmtwd = atoi(s+1); 858 if (fmtwd < 0) 859 fmtwd = -fmtwd; 860 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2"); 861 for (t = fmt; (*t++ = *s) != '\0'; s++) { 862 if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3")) 863 FATAL("format item %.30s... ran format() out of memory", os); 864 /* Ignore size specifiers */ 865 if (strchr("hjLlqtz", *s) != NULL) { /* the ansi panoply */ 866 t--; 867 continue; 868 } 869 if (isalpha((uschar)*s)) 870 break; 871 if (*s == '$') { 872 FATAL("'$' not permitted in awk formats"); 873 } 874 if (*s == '*') { 875 if (a == NULL) { 876 FATAL("not enough args in printf(%s)", os); 877 } 878 x = execute(a); 879 a = a->nnext; 880 snprintf(t - 1, FMTSZ(t - 1), 881 "%d", fmtwd=(int) getfval(x)); 882 if (fmtwd < 0) 883 fmtwd = -fmtwd; 884 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format"); 885 t = fmt + strlen(fmt); 886 tempfree(x); 887 } 888 } 889 *t = '\0'; 890 if (fmtwd < 0) 891 fmtwd = -fmtwd; 892 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4"); 893 switch (*s) { 894 case 'a': case 'A': 895 if (have_a_format) 896 flag = *s; 897 else 898 flag = 'f'; 899 break; 900 case 'f': case 'e': case 'g': case 'E': case 'G': 901 flag = 'f'; 902 break; 903 case 'd': case 'i': case 'o': case 'x': case 'X': case 'u': 904 flag = (*s == 'd' || *s == 'i') ? 'd' : 'u'; 905 *(t-1) = 'j'; 906 *t = *s; 907 *++t = '\0'; 908 break; 909 case 's': 910 flag = 's'; 911 break; 912 case 'c': 913 flag = 'c'; 914 break; 915 default: 916 WARNING("weird printf conversion %s", fmt); 917 flag = '?'; 918 break; 919 } 920 if (a == NULL) 921 FATAL("not enough args in printf(%s)", os); 922 x = execute(a); 923 a = a->nnext; 924 n = MAXNUMSIZE; 925 if (fmtwd > n) 926 n = fmtwd; 927 adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5"); 928 switch (flag) { 929 case '?': snprintf(p, BUFSZ(p), "%s", fmt); /* unknown, so dump it too */ 930 t = getsval(x); 931 n = strlen(t); 932 if (fmtwd > n) 933 n = fmtwd; 934 adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6"); 935 p += strlen(p); 936 snprintf(p, BUFSZ(p), "%s", t); 937 break; 938 case 'a': 939 case 'A': 940 case 'f': snprintf(p, BUFSZ(p), fmt, getfval(x)); break; 941 case 'd': snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break; 942 case 'u': snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break; 943 case 's': 944 t = getsval(x); 945 n = strlen(t); 946 if (fmtwd > n) 947 n = fmtwd; 948 if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7")) 949 FATAL("huge string/format (%d chars) in printf %.30s... ran format() out of memory", n, t); 950 snprintf(p, BUFSZ(p), fmt, t); 951 break; 952 case 'c': 953 if (isnum(x)) { 954 if ((int)getfval(x)) 955 snprintf(p, BUFSZ(p), fmt, (int) getfval(x)); 956 else { 957 *p++ = '\0'; /* explicit null byte */ 958 *p = '\0'; /* next output will start here */ 959 } 960 } else 961 snprintf(p, BUFSZ(p), fmt, getsval(x)[0]); 962 break; 963 default: 964 FATAL("can't happen: bad conversion %c in format()", flag); 965 } 966 tempfree(x); 967 p += strlen(p); 968 s++; 969 } 970 *p = '\0'; 971 free(fmt); 972 for ( ; a; a = a->nnext) /* evaluate any remaining args */ 973 execute(a); 974 *pbuf = buf; 975 *pbufsize = bufsize; 976 return p - buf; 977 } 978 979 Cell *awksprintf(Node **a, int n) /* sprintf(a[0]) */ 980 { 981 Cell *x; 982 Node *y; 983 char *buf; 984 int bufsz=3*recsize; 985 986 if ((buf = (char *) malloc(bufsz)) == NULL) 987 FATAL("out of memory in awksprintf"); 988 y = a[0]->nnext; 989 x = execute(a[0]); 990 if (format(&buf, &bufsz, getsval(x), y) == -1) 991 FATAL("sprintf string %.30s... too long. can't happen.", buf); 992 tempfree(x); 993 x = gettemp(); 994 x->sval = buf; 995 x->tval = STR; 996 return(x); 997 } 998 999 Cell *awkprintf(Node **a, int n) /* printf */ 1000 { /* a[0] is list of args, starting with format string */ 1001 /* a[1] is redirection operator, a[2] is redirection file */ 1002 FILE *fp; 1003 Cell *x; 1004 Node *y; 1005 char *buf; 1006 int len; 1007 int bufsz=3*recsize; 1008 1009 if ((buf = (char *) malloc(bufsz)) == NULL) 1010 FATAL("out of memory in awkprintf"); 1011 y = a[0]->nnext; 1012 x = execute(a[0]); 1013 if ((len = format(&buf, &bufsz, getsval(x), y)) == -1) 1014 FATAL("printf string %.30s... too long. can't happen.", buf); 1015 tempfree(x); 1016 if (a[1] == NULL) { 1017 /* fputs(buf, stdout); */ 1018 fwrite(buf, len, 1, stdout); 1019 if (ferror(stdout)) 1020 FATAL("write error on stdout"); 1021 } else { 1022 fp = redirect(ptoi(a[1]), a[2]); 1023 /* fputs(buf, fp); */ 1024 fwrite(buf, len, 1, fp); 1025 fflush(fp); 1026 if (ferror(fp)) 1027 FATAL("write error on %s", filename(fp)); 1028 } 1029 free(buf); 1030 return(True); 1031 } 1032 1033 Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */ 1034 { 1035 Awkfloat i, j = 0; 1036 double v; 1037 Cell *x, *y, *z; 1038 1039 x = execute(a[0]); 1040 i = getfval(x); 1041 tempfree(x); 1042 if (n != UMINUS && n != UPLUS) { 1043 y = execute(a[1]); 1044 j = getfval(y); 1045 tempfree(y); 1046 } 1047 z = gettemp(); 1048 switch (n) { 1049 case ADD: 1050 i += j; 1051 break; 1052 case MINUS: 1053 i -= j; 1054 break; 1055 case MULT: 1056 i *= j; 1057 break; 1058 case DIVIDE: 1059 if (j == 0) 1060 FATAL("division by zero"); 1061 i /= j; 1062 break; 1063 case MOD: 1064 if (j == 0) 1065 FATAL("division by zero in mod"); 1066 modf(i/j, &v); 1067 i = i - j * v; 1068 break; 1069 case UMINUS: 1070 i = -i; 1071 break; 1072 case UPLUS: /* handled by getfval(), above */ 1073 break; 1074 case POWER: 1075 if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */ 1076 i = ipow(i, (int) j); 1077 else { 1078 errno = 0; 1079 i = errcheck(pow(i, j), "pow"); 1080 } 1081 break; 1082 default: /* can't happen */ 1083 FATAL("illegal arithmetic operator %d", n); 1084 } 1085 setfval(z, i); 1086 return(z); 1087 } 1088 1089 double ipow(double x, int n) /* x**n. ought to be done by pow, but isn't always */ 1090 { 1091 double v; 1092 1093 if (n <= 0) 1094 return 1; 1095 v = ipow(x, n/2); 1096 if (n % 2 == 0) 1097 return v * v; 1098 else 1099 return x * v * v; 1100 } 1101 1102 Cell *incrdecr(Node **a, int n) /* a[0]++, etc. */ 1103 { 1104 Cell *x, *z; 1105 int k; 1106 Awkfloat xf; 1107 1108 x = execute(a[0]); 1109 xf = getfval(x); 1110 k = (n == PREINCR || n == POSTINCR) ? 1 : -1; 1111 if (n == PREINCR || n == PREDECR) { 1112 setfval(x, xf + k); 1113 return(x); 1114 } 1115 z = gettemp(); 1116 setfval(z, xf); 1117 setfval(x, xf + k); 1118 tempfree(x); 1119 return(z); 1120 } 1121 1122 Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */ 1123 { /* this is subtle; don't muck with it. */ 1124 Cell *x, *y; 1125 Awkfloat xf, yf; 1126 double v; 1127 1128 y = execute(a[1]); 1129 x = execute(a[0]); 1130 if (n == ASSIGN) { /* ordinary assignment */ 1131 if (x == y && !(x->tval & (FLD|REC)) && x != nfloc) 1132 ; /* self-assignment: leave alone unless it's a field or NF */ 1133 else if ((y->tval & (STR|NUM)) == (STR|NUM)) { 1134 setsval(x, getsval(y)); 1135 x->fval = getfval(y); 1136 x->tval |= NUM; 1137 } 1138 else if (isstr(y)) 1139 setsval(x, getsval(y)); 1140 else if (isnum(y)) 1141 setfval(x, getfval(y)); 1142 else 1143 funnyvar(y, "read value of"); 1144 tempfree(y); 1145 return(x); 1146 } 1147 xf = getfval(x); 1148 yf = getfval(y); 1149 switch (n) { 1150 case ADDEQ: 1151 xf += yf; 1152 break; 1153 case SUBEQ: 1154 xf -= yf; 1155 break; 1156 case MULTEQ: 1157 xf *= yf; 1158 break; 1159 case DIVEQ: 1160 if (yf == 0) 1161 FATAL("division by zero in /="); 1162 xf /= yf; 1163 break; 1164 case MODEQ: 1165 if (yf == 0) 1166 FATAL("division by zero in %%="); 1167 modf(xf/yf, &v); 1168 xf = xf - yf * v; 1169 break; 1170 case POWEQ: 1171 if (yf >= 0 && modf(yf, &v) == 0.0) /* pos integer exponent */ 1172 xf = ipow(xf, (int) yf); 1173 else { 1174 errno = 0; 1175 xf = errcheck(pow(xf, yf), "pow"); 1176 } 1177 break; 1178 default: 1179 FATAL("illegal assignment operator %d", n); 1180 break; 1181 } 1182 tempfree(y); 1183 setfval(x, xf); 1184 return(x); 1185 } 1186 1187 Cell *cat(Node **a, int q) /* a[0] cat a[1] */ 1188 { 1189 Cell *x, *y, *z; 1190 int n1, n2; 1191 char *s = NULL; 1192 int ssz = 0; 1193 1194 x = execute(a[0]); 1195 n1 = strlen(getsval(x)); 1196 adjbuf(&s, &ssz, n1, recsize, 0, "cat1"); 1197 memcpy(s, x->sval, n1); 1198 1199 y = execute(a[1]); 1200 n2 = strlen(getsval(y)); 1201 adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2"); 1202 memcpy(s + n1, y->sval, n2); 1203 s[n1 + n2] = '\0'; 1204 1205 tempfree(x); 1206 tempfree(y); 1207 1208 z = gettemp(); 1209 z->sval = s; 1210 z->tval = STR; 1211 1212 return(z); 1213 } 1214 1215 Cell *pastat(Node **a, int n) /* a[0] { a[1] } */ 1216 { 1217 Cell *x; 1218 1219 if (a[0] == NULL) 1220 x = execute(a[1]); 1221 else { 1222 x = execute(a[0]); 1223 if (istrue(x)) { 1224 tempfree(x); 1225 x = execute(a[1]); 1226 } 1227 } 1228 return x; 1229 } 1230 1231 Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */ 1232 { 1233 Cell *x; 1234 int pair; 1235 1236 pair = ptoi(a[3]); 1237 if (pairstack[pair] == 0) { 1238 x = execute(a[0]); 1239 if (istrue(x)) 1240 pairstack[pair] = 1; 1241 tempfree(x); 1242 } 1243 if (pairstack[pair] == 1) { 1244 x = execute(a[1]); 1245 if (istrue(x)) 1246 pairstack[pair] = 0; 1247 tempfree(x); 1248 x = execute(a[2]); 1249 return(x); 1250 } 1251 return(False); 1252 } 1253 1254 Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ 1255 { 1256 Cell *x = NULL, *y, *ap; 1257 const char *s, *origs, *t; 1258 const char *fs = NULL; 1259 char *origfs = NULL; 1260 int sep; 1261 char temp, num[50]; 1262 int n, tempstat, arg3type; 1263 double result; 1264 1265 y = execute(a[0]); /* source string */ 1266 origs = s = strdup(getsval(y)); 1267 arg3type = ptoi(a[3]); 1268 if (a[2] == NULL) /* fs string */ 1269 fs = getsval(fsloc); 1270 else if (arg3type == STRING) { /* split(str,arr,"string") */ 1271 x = execute(a[2]); 1272 fs = origfs = strdup(getsval(x)); 1273 tempfree(x); 1274 } else if (arg3type == REGEXPR) 1275 fs = "(regexpr)"; /* split(str,arr,/regexpr/) */ 1276 else 1277 FATAL("illegal type of split"); 1278 sep = *fs; 1279 ap = execute(a[1]); /* array name */ 1280 freesymtab(ap); 1281 DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs); 1282 ap->tval &= ~STR; 1283 ap->tval |= ARR; 1284 ap->sval = (char *) makesymtab(NSYMTAB); 1285 1286 n = 0; 1287 if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) { 1288 /* split(s, a, //); have to arrange that it looks like empty sep */ 1289 arg3type = 0; 1290 fs = ""; 1291 sep = 0; 1292 } 1293 if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) { /* reg expr */ 1294 fa *pfa; 1295 if (arg3type == REGEXPR) { /* it's ready already */ 1296 pfa = (fa *) a[2]; 1297 } else { 1298 pfa = makedfa(fs, 1); 1299 } 1300 if (nematch(pfa,s)) { 1301 tempstat = pfa->initstat; 1302 pfa->initstat = 2; 1303 do { 1304 n++; 1305 snprintf(num, sizeof(num), "%d", n); 1306 temp = *patbeg; 1307 setptr(patbeg, '\0'); 1308 if (is_number(s, & result)) 1309 setsymtab(num, s, result, STR|NUM, (Array *) ap->sval); 1310 else 1311 setsymtab(num, s, 0.0, STR, (Array *) ap->sval); 1312 setptr(patbeg, temp); 1313 s = patbeg + patlen; 1314 if (*(patbeg+patlen-1) == '\0' || *s == '\0') { 1315 n++; 1316 snprintf(num, sizeof(num), "%d", n); 1317 setsymtab(num, "", 0.0, STR, (Array *) ap->sval); 1318 pfa->initstat = tempstat; 1319 goto spdone; 1320 } 1321 } while (nematch(pfa,s)); 1322 pfa->initstat = tempstat; /* bwk: has to be here to reset */ 1323 /* cf gsub and refldbld */ 1324 } 1325 n++; 1326 snprintf(num, sizeof(num), "%d", n); 1327 if (is_number(s, & result)) 1328 setsymtab(num, s, result, STR|NUM, (Array *) ap->sval); 1329 else 1330 setsymtab(num, s, 0.0, STR, (Array *) ap->sval); 1331 spdone: 1332 pfa = NULL; 1333 } else if (sep == ' ') { 1334 for (n = 0; ; ) { 1335 #define ISWS(c) ((c) == ' ' || (c) == '\t' || (c) == '\n') 1336 while (ISWS(*s)) 1337 s++; 1338 if (*s == '\0') 1339 break; 1340 n++; 1341 t = s; 1342 do 1343 s++; 1344 while (*s != '\0' && !ISWS(*s)); 1345 temp = *s; 1346 setptr(s, '\0'); 1347 snprintf(num, sizeof(num), "%d", n); 1348 if (is_number(t, & result)) 1349 setsymtab(num, t, result, STR|NUM, (Array *) ap->sval); 1350 else 1351 setsymtab(num, t, 0.0, STR, (Array *) ap->sval); 1352 setptr(s, temp); 1353 if (*s != '\0') 1354 s++; 1355 } 1356 } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */ 1357 for (n = 0; *s != '\0'; s++) { 1358 char buf[2]; 1359 n++; 1360 snprintf(num, sizeof(num), "%d", n); 1361 buf[0] = *s; 1362 buf[1] = '\0'; 1363 if (isdigit((uschar)buf[0])) 1364 setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval); 1365 else 1366 setsymtab(num, buf, 0.0, STR, (Array *) ap->sval); 1367 } 1368 } else if (*s != '\0') { 1369 for (;;) { 1370 n++; 1371 t = s; 1372 while (*s != sep && *s != '\n' && *s != '\0') 1373 s++; 1374 temp = *s; 1375 setptr(s, '\0'); 1376 snprintf(num, sizeof(num), "%d", n); 1377 if (is_number(t, & result)) 1378 setsymtab(num, t, result, STR|NUM, (Array *) ap->sval); 1379 else 1380 setsymtab(num, t, 0.0, STR, (Array *) ap->sval); 1381 setptr(s, temp); 1382 if (*s++ == '\0') 1383 break; 1384 } 1385 } 1386 tempfree(ap); 1387 tempfree(y); 1388 xfree(origs); 1389 xfree(origfs); 1390 x = gettemp(); 1391 x->tval = NUM; 1392 x->fval = n; 1393 return(x); 1394 } 1395 1396 Cell *condexpr(Node **a, int n) /* a[0] ? a[1] : a[2] */ 1397 { 1398 Cell *x; 1399 1400 x = execute(a[0]); 1401 if (istrue(x)) { 1402 tempfree(x); 1403 x = execute(a[1]); 1404 } else { 1405 tempfree(x); 1406 x = execute(a[2]); 1407 } 1408 return(x); 1409 } 1410 1411 Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */ 1412 { 1413 Cell *x; 1414 1415 x = execute(a[0]); 1416 if (istrue(x)) { 1417 tempfree(x); 1418 x = execute(a[1]); 1419 } else if (a[2] != NULL) { 1420 tempfree(x); 1421 x = execute(a[2]); 1422 } 1423 return(x); 1424 } 1425 1426 Cell *whilestat(Node **a, int n) /* while (a[0]) a[1] */ 1427 { 1428 Cell *x; 1429 1430 for (;;) { 1431 x = execute(a[0]); 1432 if (!istrue(x)) 1433 return(x); 1434 tempfree(x); 1435 x = execute(a[1]); 1436 if (isbreak(x)) { 1437 x = True; 1438 return(x); 1439 } 1440 if (isnext(x) || isexit(x) || isret(x)) 1441 return(x); 1442 tempfree(x); 1443 } 1444 } 1445 1446 Cell *dostat(Node **a, int n) /* do a[0]; while(a[1]) */ 1447 { 1448 Cell *x; 1449 1450 for (;;) { 1451 x = execute(a[0]); 1452 if (isbreak(x)) 1453 return True; 1454 if (isnext(x) || isexit(x) || isret(x)) 1455 return(x); 1456 tempfree(x); 1457 x = execute(a[1]); 1458 if (!istrue(x)) 1459 return(x); 1460 tempfree(x); 1461 } 1462 } 1463 1464 Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */ 1465 { 1466 Cell *x; 1467 1468 x = execute(a[0]); 1469 tempfree(x); 1470 for (;;) { 1471 if (a[1]!=NULL) { 1472 x = execute(a[1]); 1473 if (!istrue(x)) return(x); 1474 else tempfree(x); 1475 } 1476 x = execute(a[3]); 1477 if (isbreak(x)) /* turn off break */ 1478 return True; 1479 if (isnext(x) || isexit(x) || isret(x)) 1480 return(x); 1481 tempfree(x); 1482 x = execute(a[2]); 1483 tempfree(x); 1484 } 1485 } 1486 1487 Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */ 1488 { 1489 Cell *x, *vp, *arrayp, *cp, *ncp; 1490 Array *tp; 1491 int i; 1492 1493 vp = execute(a[0]); 1494 arrayp = execute(a[1]); 1495 if (!isarr(arrayp)) { 1496 return True; 1497 } 1498 tp = (Array *) arrayp->sval; 1499 tempfree(arrayp); 1500 for (i = 0; i < tp->size; i++) { /* this routine knows too much */ 1501 for (cp = tp->tab[i]; cp != NULL; cp = ncp) { 1502 setsval(vp, cp->nval); 1503 ncp = cp->cnext; 1504 x = execute(a[2]); 1505 if (isbreak(x)) { 1506 tempfree(vp); 1507 return True; 1508 } 1509 if (isnext(x) || isexit(x) || isret(x)) { 1510 tempfree(vp); 1511 return(x); 1512 } 1513 tempfree(x); 1514 } 1515 } 1516 return True; 1517 } 1518 1519 static char *nawk_convert(const char *s, int (*fun_c)(int), 1520 wint_t (*fun_wc)(wint_t)) 1521 { 1522 char *buf = NULL; 1523 char *pbuf = NULL; 1524 const char *ps = NULL; 1525 size_t n = 0; 1526 wchar_t wc; 1527 size_t sz = MB_CUR_MAX; 1528 1529 if (sz == 1) { 1530 buf = tostring(s); 1531 1532 for (pbuf = buf; *pbuf; pbuf++) 1533 *pbuf = fun_c((uschar)*pbuf); 1534 1535 return buf; 1536 } else { 1537 /* upper/lower character may be shorter/longer */ 1538 buf = tostringN(s, strlen(s) * sz + 1); 1539 1540 (void) mbtowc(NULL, NULL, 0); /* reset internal state */ 1541 /* 1542 * Reset internal state here too. 1543 * Assign result to avoid a compiler warning. (Casting to void 1544 * doesn't work.) 1545 * Increment said variable to avoid a different warning. 1546 */ 1547 int unused = wctomb(NULL, L'\0'); 1548 unused++; 1549 1550 ps = s; 1551 pbuf = buf; 1552 while (n = mbtowc(&wc, ps, sz), 1553 n > 0 && n != (size_t)-1 && n != (size_t)-2) 1554 { 1555 ps += n; 1556 1557 n = wctomb(pbuf, fun_wc(wc)); 1558 if (n == (size_t)-1) 1559 FATAL("illegal wide character %s", s); 1560 1561 pbuf += n; 1562 } 1563 1564 *pbuf = '\0'; 1565 1566 if (n) 1567 FATAL("illegal byte sequence %s", s); 1568 1569 return buf; 1570 } 1571 } 1572 1573 #ifdef __DJGPP__ 1574 static wint_t towupper(wint_t wc) 1575 { 1576 if (wc >= 0 && wc < 256) 1577 return toupper(wc & 0xFF); 1578 1579 return wc; 1580 } 1581 1582 static wint_t towlower(wint_t wc) 1583 { 1584 if (wc >= 0 && wc < 256) 1585 return tolower(wc & 0xFF); 1586 1587 return wc; 1588 } 1589 #endif 1590 1591 static char *nawk_toupper(const char *s) 1592 { 1593 return nawk_convert(s, toupper, towupper); 1594 } 1595 1596 static char *nawk_tolower(const char *s) 1597 { 1598 return nawk_convert(s, tolower, towlower); 1599 } 1600 1601 Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */ 1602 { 1603 Cell *x, *y; 1604 Awkfloat u; 1605 int t, sz; 1606 Awkfloat tmp; 1607 char *buf, *fmt; 1608 Node *nextarg; 1609 FILE *fp; 1610 int status = 0; 1611 time_t tv; 1612 struct tm *tm; 1613 1614 t = ptoi(a[0]); 1615 x = execute(a[1]); 1616 nextarg = a[1]->nnext; 1617 switch (t) { 1618 case FLENGTH: 1619 if (isarr(x)) 1620 u = ((Array *) x->sval)->nelem; /* GROT. should be function*/ 1621 else 1622 u = strlen(getsval(x)); 1623 break; 1624 case FLOG: 1625 errno = 0; 1626 u = errcheck(log(getfval(x)), "log"); 1627 break; 1628 case FINT: 1629 modf(getfval(x), &u); break; 1630 case FEXP: 1631 errno = 0; 1632 u = errcheck(exp(getfval(x)), "exp"); 1633 break; 1634 case FSQRT: 1635 errno = 0; 1636 u = errcheck(sqrt(getfval(x)), "sqrt"); 1637 break; 1638 case FSIN: 1639 u = sin(getfval(x)); break; 1640 case FCOS: 1641 u = cos(getfval(x)); break; 1642 case FATAN: 1643 if (nextarg == NULL) { 1644 WARNING("atan2 requires two arguments; returning 1.0"); 1645 u = 1.0; 1646 } else { 1647 y = execute(a[1]->nnext); 1648 u = atan2(getfval(x), getfval(y)); 1649 tempfree(y); 1650 nextarg = nextarg->nnext; 1651 } 1652 break; 1653 case FCOMPL: 1654 u = ~((int)getfval(x)); 1655 break; 1656 case FAND: 1657 if (nextarg == 0) { 1658 WARNING("and requires two arguments; returning 0"); 1659 u = 0; 1660 break; 1661 } 1662 y = execute(a[1]->nnext); 1663 u = ((int)getfval(x)) & ((int)getfval(y)); 1664 tempfree(y); 1665 nextarg = nextarg->nnext; 1666 break; 1667 case FFOR: 1668 if (nextarg == 0) { 1669 WARNING("or requires two arguments; returning 0"); 1670 u = 0; 1671 break; 1672 } 1673 y = execute(a[1]->nnext); 1674 u = ((int)getfval(x)) | ((int)getfval(y)); 1675 tempfree(y); 1676 nextarg = nextarg->nnext; 1677 break; 1678 case FXOR: 1679 if (nextarg == 0) { 1680 WARNING("xor requires two arguments; returning 0"); 1681 u = 0; 1682 break; 1683 } 1684 y = execute(a[1]->nnext); 1685 u = ((int)getfval(x)) ^ ((int)getfval(y)); 1686 tempfree(y); 1687 nextarg = nextarg->nnext; 1688 break; 1689 case FLSHIFT: 1690 if (nextarg == 0) { 1691 WARNING("lshift requires two arguments; returning 0"); 1692 u = 0; 1693 break; 1694 } 1695 y = execute(a[1]->nnext); 1696 u = ((int)getfval(x)) << ((int)getfval(y)); 1697 tempfree(y); 1698 nextarg = nextarg->nnext; 1699 break; 1700 case FRSHIFT: 1701 if (nextarg == 0) { 1702 WARNING("rshift requires two arguments; returning 0"); 1703 u = 0; 1704 break; 1705 } 1706 y = execute(a[1]->nnext); 1707 u = ((int)getfval(x)) >> ((int)getfval(y)); 1708 tempfree(y); 1709 nextarg = nextarg->nnext; 1710 break; 1711 case FSYSTEM: 1712 fflush(stdout); /* in case something is buffered already */ 1713 status = system(getsval(x)); 1714 u = status; 1715 if (status != -1) { 1716 if (WIFEXITED(status)) { 1717 u = WEXITSTATUS(status); 1718 } else if (WIFSIGNALED(status)) { 1719 u = WTERMSIG(status) + 256; 1720 #ifdef WCOREDUMP 1721 if (WCOREDUMP(status)) 1722 u += 256; 1723 #endif 1724 } else /* something else?!? */ 1725 u = 0; 1726 } 1727 break; 1728 case FRAND: 1729 /* random() returns numbers in [0..2^31-1] 1730 * in order to get a number in [0, 1), divide it by 2^31 1731 */ 1732 u = (Awkfloat) random() / (0x7fffffffL + 0x1UL); 1733 break; 1734 case FSRAND: 1735 if (isrec(x)) /* no argument provided */ 1736 u = time((time_t *)0); 1737 else 1738 u = getfval(x); 1739 tmp = u; 1740 srandom((unsigned long) u); 1741 u = srand_seed; 1742 srand_seed = tmp; 1743 break; 1744 case FTOUPPER: 1745 case FTOLOWER: 1746 if (t == FTOUPPER) 1747 buf = nawk_toupper(getsval(x)); 1748 else 1749 buf = nawk_tolower(getsval(x)); 1750 tempfree(x); 1751 x = gettemp(); 1752 setsval(x, buf); 1753 free(buf); 1754 return x; 1755 case FFLUSH: 1756 if (isrec(x) || strlen(getsval(x)) == 0) { 1757 flush_all(); /* fflush() or fflush("") -> all */ 1758 u = 0; 1759 } else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL) 1760 u = EOF; 1761 else 1762 u = fflush(fp); 1763 break; 1764 case FSYSTIME: 1765 u = time((time_t *) 0); 1766 break; 1767 case FSTRFTIME: 1768 /* strftime([format [,timestamp]]) */ 1769 if (nextarg) { 1770 y = execute(nextarg); 1771 nextarg = nextarg->nnext; 1772 tv = (time_t) getfval(y); 1773 tempfree(y); 1774 } else 1775 tv = time((time_t *) 0); 1776 tm = localtime(&tv); 1777 if (tm == NULL) 1778 FATAL("bad time %ld", (long)tv); 1779 1780 if (isrec(x)) { 1781 /* format argument not provided, use default */ 1782 fmt = tostring("%a %b %d %H:%M:%S %Z %Y"); 1783 } else 1784 fmt = tostring(getsval(x)); 1785 1786 sz = 32; 1787 buf = NULL; 1788 do { 1789 if ((buf = realloc(buf, (sz *= 2))) == NULL) 1790 FATAL("out of memory in strftime"); 1791 } while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0'); 1792 1793 y = gettemp(); 1794 setsval(y, buf); 1795 free(fmt); 1796 free(buf); 1797 1798 return y; 1799 default: /* can't happen */ 1800 FATAL("illegal function type %d", t); 1801 break; 1802 } 1803 tempfree(x); 1804 x = gettemp(); 1805 setfval(x, u); 1806 if (nextarg != NULL) { 1807 WARNING("warning: function has too many arguments"); 1808 for ( ; nextarg; nextarg = nextarg->nnext) 1809 execute(nextarg); 1810 } 1811 return(x); 1812 } 1813 1814 Cell *printstat(Node **a, int n) /* print a[0] */ 1815 { 1816 Node *x; 1817 Cell *y; 1818 FILE *fp; 1819 1820 if (a[1] == NULL) /* a[1] is redirection operator, a[2] is file */ 1821 fp = stdout; 1822 else 1823 fp = redirect(ptoi(a[1]), a[2]); 1824 for (x = a[0]; x != NULL; x = x->nnext) { 1825 y = execute(x); 1826 fputs(getpssval(y), fp); 1827 tempfree(y); 1828 if (x->nnext == NULL) 1829 fputs(getsval(orsloc), fp); 1830 else 1831 fputs(getsval(ofsloc), fp); 1832 } 1833 if (a[1] != NULL) 1834 fflush(fp); 1835 if (ferror(fp)) 1836 FATAL("write error on %s", filename(fp)); 1837 return(True); 1838 } 1839 1840 Cell *nullproc(Node **a, int n) 1841 { 1842 return 0; 1843 } 1844 1845 1846 FILE *redirect(int a, Node *b) /* set up all i/o redirections */ 1847 { 1848 FILE *fp; 1849 Cell *x; 1850 char *fname; 1851 1852 x = execute(b); 1853 fname = getsval(x); 1854 fp = openfile(a, fname, NULL); 1855 if (fp == NULL) 1856 FATAL("can't open file %s", fname); 1857 tempfree(x); 1858 return fp; 1859 } 1860 1861 struct files { 1862 FILE *fp; 1863 const char *fname; 1864 int mode; /* '|', 'a', 'w' => LE/LT, GT */ 1865 } *files; 1866 1867 size_t nfiles; 1868 1869 static void stdinit(void) /* in case stdin, etc., are not constants */ 1870 { 1871 nfiles = FOPEN_MAX; 1872 files = (struct files *) calloc(nfiles, sizeof(*files)); 1873 if (files == NULL) 1874 FATAL("can't allocate file memory for %zu files", nfiles); 1875 files[0].fp = stdin; 1876 files[0].fname = "/dev/stdin"; 1877 files[0].mode = LT; 1878 files[1].fp = stdout; 1879 files[1].fname = "/dev/stdout"; 1880 files[1].mode = GT; 1881 files[2].fp = stderr; 1882 files[2].fname = "/dev/stderr"; 1883 files[2].mode = GT; 1884 } 1885 1886 FILE *openfile(int a, const char *us, bool *pnewflag) 1887 { 1888 const char *s = us; 1889 size_t i; 1890 int m; 1891 FILE *fp = NULL; 1892 1893 if (*s == '\0') 1894 FATAL("null file name in print or getline"); 1895 for (i = 0; i < nfiles; i++) 1896 if (files[i].fname && strcmp(s, files[i].fname) == 0 && 1897 (a == files[i].mode || (a==APPEND && files[i].mode==GT) || 1898 a == FFLUSH)) { 1899 if (pnewflag) 1900 *pnewflag = false; 1901 return files[i].fp; 1902 } 1903 if (a == FFLUSH) /* didn't find it, so don't create it! */ 1904 return NULL; 1905 1906 for (i = 0; i < nfiles; i++) 1907 if (files[i].fp == NULL) 1908 break; 1909 if (i >= nfiles) { 1910 struct files *nf; 1911 size_t nnf = nfiles + FOPEN_MAX; 1912 nf = (struct files *) realloc(files, nnf * sizeof(*nf)); 1913 if (nf == NULL) 1914 FATAL("cannot grow files for %s and %zu files", s, nnf); 1915 memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf)); 1916 nfiles = nnf; 1917 files = nf; 1918 } 1919 fflush(stdout); /* force a semblance of order */ 1920 m = a; 1921 if (a == GT) { 1922 fp = fopen(s, "w"); 1923 } else if (a == APPEND) { 1924 fp = fopen(s, "a"); 1925 m = GT; /* so can mix > and >> */ 1926 } else if (a == '|') { /* output pipe */ 1927 fp = popen(s, "w"); 1928 } else if (a == LE) { /* input pipe */ 1929 fp = popen(s, "r"); 1930 } else if (a == LT) { /* getline <file */ 1931 fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r"); /* "-" is stdin */ 1932 } else /* can't happen */ 1933 FATAL("illegal redirection %d", a); 1934 if (fp != NULL) { 1935 files[i].fname = tostring(s); 1936 files[i].fp = fp; 1937 files[i].mode = m; 1938 if (pnewflag) 1939 *pnewflag = true; 1940 if (fp != stdin && fp != stdout && fp != stderr) 1941 (void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC); 1942 } 1943 return fp; 1944 } 1945 1946 const char *filename(FILE *fp) 1947 { 1948 size_t i; 1949 1950 for (i = 0; i < nfiles; i++) 1951 if (fp == files[i].fp) 1952 return files[i].fname; 1953 return "???"; 1954 } 1955 1956 Cell *closefile(Node **a, int n) 1957 { 1958 Cell *x; 1959 size_t i; 1960 bool stat; 1961 1962 x = execute(a[0]); 1963 getsval(x); 1964 stat = true; 1965 for (i = 0; i < nfiles; i++) { 1966 if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0) 1967 continue; 1968 if (ferror(files[i].fp)) 1969 FATAL("i/o error occurred on %s", files[i].fname); 1970 if (files[i].fp == stdin || files[i].fp == stdout || 1971 files[i].fp == stderr) 1972 stat = freopen("/dev/null", "r+", files[i].fp) == NULL; 1973 else if (files[i].mode == '|' || files[i].mode == LE) 1974 stat = pclose(files[i].fp) == -1; 1975 else 1976 stat = fclose(files[i].fp) == EOF; 1977 if (stat) 1978 FATAL("i/o error occurred closing %s", files[i].fname); 1979 if (i > 2) /* don't do /dev/std... */ 1980 xfree(files[i].fname); 1981 files[i].fname = NULL; /* watch out for ref thru this */ 1982 files[i].fp = NULL; 1983 break; 1984 } 1985 tempfree(x); 1986 x = gettemp(); 1987 setfval(x, (Awkfloat) (stat ? -1 : 0)); 1988 return(x); 1989 } 1990 1991 void closeall(void) 1992 { 1993 size_t i; 1994 bool stat = false; 1995 1996 for (i = 0; i < nfiles; i++) { 1997 if (! files[i].fp) 1998 continue; 1999 if (ferror(files[i].fp)) 2000 FATAL( "i/o error occurred on %s", files[i].fname ); 2001 if (files[i].fp == stdin) 2002 continue; 2003 if (files[i].mode == '|' || files[i].mode == LE) 2004 stat = pclose(files[i].fp) == -1; 2005 else if (files[i].fp == stdout || files[i].fp == stderr) 2006 stat = fflush(files[i].fp) == EOF; 2007 else 2008 stat = fclose(files[i].fp) == EOF; 2009 if (stat) 2010 FATAL( "i/o error occurred while closing %s", files[i].fname ); 2011 } 2012 } 2013 2014 static void flush_all(void) 2015 { 2016 size_t i; 2017 2018 for (i = 0; i < nfiles; i++) 2019 if (files[i].fp) 2020 fflush(files[i].fp); 2021 } 2022 2023 void backsub(char **pb_ptr, const char **sptr_ptr); 2024 2025 Cell *sub(Node **a, int nnn) /* substitute command */ 2026 { 2027 const char *sptr, *q; 2028 Cell *x, *y, *result; 2029 char *t, *buf, *pb; 2030 fa *pfa; 2031 int bufsz = recsize; 2032 2033 if ((buf = (char *) malloc(bufsz)) == NULL) 2034 FATAL("out of memory in sub"); 2035 x = execute(a[3]); /* target string */ 2036 t = getsval(x); 2037 if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */ 2038 pfa = (fa *) a[1]; /* regular expression */ 2039 else { 2040 y = execute(a[1]); 2041 pfa = makedfa(getsval(y), 1); 2042 tempfree(y); 2043 } 2044 y = execute(a[2]); /* replacement string */ 2045 result = False; 2046 if (pmatch(pfa, t)) { 2047 sptr = t; 2048 adjbuf(&buf, &bufsz, 1+patbeg-sptr, recsize, 0, "sub"); 2049 pb = buf; 2050 while (sptr < patbeg) 2051 *pb++ = *sptr++; 2052 sptr = getsval(y); 2053 while (*sptr != '\0') { 2054 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "sub"); 2055 if (*sptr == '\\') { 2056 backsub(&pb, &sptr); 2057 } else if (*sptr == '&') { 2058 sptr++; 2059 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "sub"); 2060 for (q = patbeg; q < patbeg+patlen; ) 2061 *pb++ = *q++; 2062 } else 2063 *pb++ = *sptr++; 2064 } 2065 *pb = '\0'; 2066 if (pb > buf + bufsz) 2067 FATAL("sub result1 %.30s too big; can't happen", buf); 2068 sptr = patbeg + patlen; 2069 if ((patlen == 0 && *patbeg) || (patlen && *(sptr-1))) { 2070 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "sub"); 2071 while ((*pb++ = *sptr++) != '\0') 2072 continue; 2073 } 2074 if (pb > buf + bufsz) 2075 FATAL("sub result2 %.30s too big; can't happen", buf); 2076 setsval(x, buf); /* BUG: should be able to avoid copy */ 2077 result = True; 2078 } 2079 tempfree(x); 2080 tempfree(y); 2081 free(buf); 2082 return result; 2083 } 2084 2085 Cell *gsub(Node **a, int nnn) /* global substitute */ 2086 { 2087 Cell *x, *y; 2088 char *rptr, *pb; 2089 const char *q, *t, *sptr; 2090 char *buf; 2091 fa *pfa; 2092 int mflag, tempstat, num; 2093 int bufsz = recsize; 2094 2095 if ((buf = (char *) malloc(bufsz)) == NULL) 2096 FATAL("out of memory in gsub"); 2097 mflag = 0; /* if mflag == 0, can replace empty string */ 2098 num = 0; 2099 x = execute(a[3]); /* target string */ 2100 t = getsval(x); 2101 if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */ 2102 pfa = (fa *) a[1]; /* regular expression */ 2103 else { 2104 y = execute(a[1]); 2105 pfa = makedfa(getsval(y), 1); 2106 tempfree(y); 2107 } 2108 y = execute(a[2]); /* replacement string */ 2109 if (pmatch(pfa, t)) { 2110 tempstat = pfa->initstat; 2111 pfa->initstat = 2; 2112 pb = buf; 2113 rptr = getsval(y); 2114 do { 2115 if (patlen == 0 && *patbeg != '\0') { /* matched empty string */ 2116 if (mflag == 0) { /* can replace empty */ 2117 num++; 2118 sptr = rptr; 2119 while (*sptr != '\0') { 2120 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub"); 2121 if (*sptr == '\\') { 2122 backsub(&pb, &sptr); 2123 } else if (*sptr == '&') { 2124 sptr++; 2125 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub"); 2126 for (q = patbeg; q < patbeg+patlen; ) 2127 *pb++ = *q++; 2128 } else 2129 *pb++ = *sptr++; 2130 } 2131 } 2132 if (*t == '\0') /* at end */ 2133 goto done; 2134 adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gsub"); 2135 *pb++ = *t++; 2136 if (pb > buf + bufsz) /* BUG: not sure of this test */ 2137 FATAL("gsub result0 %.30s too big; can't happen", buf); 2138 mflag = 0; 2139 } 2140 else { /* matched nonempty string */ 2141 num++; 2142 sptr = t; 2143 adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gsub"); 2144 while (sptr < patbeg) 2145 *pb++ = *sptr++; 2146 sptr = rptr; 2147 while (*sptr != '\0') { 2148 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub"); 2149 if (*sptr == '\\') { 2150 backsub(&pb, &sptr); 2151 } else if (*sptr == '&') { 2152 sptr++; 2153 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub"); 2154 for (q = patbeg; q < patbeg+patlen; ) 2155 *pb++ = *q++; 2156 } else 2157 *pb++ = *sptr++; 2158 } 2159 t = patbeg + patlen; 2160 if (patlen == 0 || *t == '\0' || *(t-1) == '\0') 2161 goto done; 2162 if (pb > buf + bufsz) 2163 FATAL("gsub result1 %.30s too big; can't happen", buf); 2164 mflag = 1; 2165 } 2166 } while (pmatch(pfa,t)); 2167 sptr = t; 2168 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gsub"); 2169 while ((*pb++ = *sptr++) != '\0') 2170 continue; 2171 done: if (pb < buf + bufsz) 2172 *pb = '\0'; 2173 else if (*(pb-1) != '\0') 2174 FATAL("gsub result2 %.30s truncated; can't happen", buf); 2175 setsval(x, buf); /* BUG: should be able to avoid copy + free */ 2176 pfa->initstat = tempstat; 2177 } 2178 tempfree(x); 2179 tempfree(y); 2180 x = gettemp(); 2181 x->tval = NUM; 2182 x->fval = num; 2183 free(buf); 2184 return(x); 2185 } 2186 2187 Cell *gensub(Node **a, int nnn) /* global selective substitute */ 2188 /* XXX incomplete - doesn't support backreferences \0 ... \9 */ 2189 { 2190 Cell *x, *y, *res, *h; 2191 char *rptr; 2192 const char *sptr; 2193 char *buf, *pb; 2194 const char *t, *q; 2195 fa *pfa; 2196 int mflag, tempstat, num, whichm; 2197 int bufsz = recsize; 2198 2199 if ((buf = malloc(bufsz)) == NULL) 2200 FATAL("out of memory in gensub"); 2201 mflag = 0; /* if mflag == 0, can replace empty string */ 2202 num = 0; 2203 x = execute(a[4]); /* source string */ 2204 t = getsval(x); 2205 res = copycell(x); /* target string - initially copy of source */ 2206 res->csub = CTEMP; /* result values are temporary */ 2207 if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */ 2208 pfa = (fa *) a[1]; /* regular expression */ 2209 else { 2210 y = execute(a[1]); 2211 pfa = makedfa(getsval(y), 1); 2212 tempfree(y); 2213 } 2214 y = execute(a[2]); /* replacement string */ 2215 h = execute(a[3]); /* which matches should be replaced */ 2216 sptr = getsval(h); 2217 if (sptr[0] == 'g' || sptr[0] == 'G') 2218 whichm = -1; 2219 else { 2220 /* 2221 * The specified number is index of replacement, starting 2222 * from 1. GNU awk treats index lower than 0 same as 2223 * 1, we do same for compatibility. 2224 */ 2225 whichm = (int) getfval(h) - 1; 2226 if (whichm < 0) 2227 whichm = 0; 2228 } 2229 tempfree(h); 2230 2231 if (pmatch(pfa, t)) { 2232 char *sl; 2233 2234 tempstat = pfa->initstat; 2235 pfa->initstat = 2; 2236 pb = buf; 2237 rptr = getsval(y); 2238 /* 2239 * XXX if there are any backreferences in subst string, 2240 * complain now. 2241 */ 2242 for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) { 2243 if (strchr("0123456789", sl[1])) { 2244 FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr); 2245 } 2246 } 2247 2248 do { 2249 if (whichm >= 0 && whichm != num) { 2250 num++; 2251 adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub"); 2252 2253 /* copy the part of string up to and including 2254 * match to output buffer */ 2255 while (t < patbeg + patlen) 2256 *pb++ = *t++; 2257 continue; 2258 } 2259 2260 if (patlen == 0 && *patbeg != 0) { /* matched empty string */ 2261 if (mflag == 0) { /* can replace empty */ 2262 num++; 2263 sptr = rptr; 2264 while (*sptr != 0) { 2265 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); 2266 if (*sptr == '\\') { 2267 backsub(&pb, &sptr); 2268 } else if (*sptr == '&') { 2269 sptr++; 2270 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); 2271 for (q = patbeg; q < patbeg+patlen; ) 2272 *pb++ = *q++; 2273 } else 2274 *pb++ = *sptr++; 2275 } 2276 } 2277 if (*t == 0) /* at end */ 2278 goto done; 2279 adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub"); 2280 *pb++ = *t++; 2281 if (pb > buf + bufsz) /* BUG: not sure of this test */ 2282 FATAL("gensub result0 %.30s too big; can't happen", buf); 2283 mflag = 0; 2284 } 2285 else { /* matched nonempty string */ 2286 num++; 2287 sptr = t; 2288 adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub"); 2289 while (sptr < patbeg) 2290 *pb++ = *sptr++; 2291 sptr = rptr; 2292 while (*sptr != 0) { 2293 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); 2294 if (*sptr == '\\') { 2295 backsub(&pb, &sptr); 2296 } else if (*sptr == '&') { 2297 sptr++; 2298 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); 2299 for (q = patbeg; q < patbeg+patlen; ) 2300 *pb++ = *q++; 2301 } else 2302 *pb++ = *sptr++; 2303 } 2304 t = patbeg + patlen; 2305 if (patlen == 0 || *t == 0 || *(t-1) == 0) 2306 goto done; 2307 if (pb > buf + bufsz) 2308 FATAL("gensub result1 %.30s too big; can't happen", buf); 2309 mflag = 1; 2310 } 2311 } while (pmatch(pfa,t)); 2312 sptr = t; 2313 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub"); 2314 while ((*pb++ = *sptr++) != 0) 2315 ; 2316 done: if (pb > buf + bufsz) 2317 FATAL("gensub result2 %.30s too big; can't happen", buf); 2318 *pb = '\0'; 2319 setsval(res, buf); 2320 pfa->initstat = tempstat; 2321 } 2322 tempfree(x); 2323 tempfree(y); 2324 free(buf); 2325 return(res); 2326 } 2327 2328 void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */ 2329 { /* sptr[0] == '\\' */ 2330 char *pb = *pb_ptr; 2331 const char *sptr = *sptr_ptr; 2332 static bool first = true; 2333 static bool do_posix = false; 2334 2335 if (first) { 2336 first = false; 2337 do_posix = (getenv("POSIXLY_CORRECT") != NULL); 2338 } 2339 2340 if (sptr[1] == '\\') { 2341 if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */ 2342 *pb++ = '\\'; 2343 *pb++ = '&'; 2344 sptr += 4; 2345 } else if (sptr[2] == '&') { /* \\& -> \ + matched */ 2346 *pb++ = '\\'; 2347 sptr += 2; 2348 } else if (do_posix) { /* \\x -> \x */ 2349 sptr++; 2350 *pb++ = *sptr++; 2351 } else { /* \\x -> \\x */ 2352 *pb++ = *sptr++; 2353 *pb++ = *sptr++; 2354 } 2355 } else if (sptr[1] == '&') { /* literal & */ 2356 sptr++; 2357 *pb++ = *sptr++; 2358 } else /* literal \ */ 2359 *pb++ = *sptr++; 2360 2361 *pb_ptr = pb; 2362 *sptr_ptr = sptr; 2363 } 2364