1 /**************************************************************** 2 Copyright (C) Lucent Technologies 1997 3 All Rights Reserved 4 5 Permission to use, copy, modify, and distribute this software and 6 its documentation for any purpose and without fee is hereby 7 granted, provided that the above copyright notice appear in all 8 copies and that both that the copyright notice and this 9 permission notice and warranty disclaimer appear in supporting 10 documentation, and that the name Lucent Technologies or any of 11 its entities not be used in advertising or publicity pertaining 12 to distribution of the software without specific, written prior 13 permission. 14 15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 22 THIS SOFTWARE. 23 ****************************************************************/ 24 25 #include <sys/cdefs.h> 26 __FBSDID("$FreeBSD$"); 27 28 #define DEBUG 29 #include <stdio.h> 30 #include <ctype.h> 31 #include <errno.h> 32 #include <wchar.h> 33 #include <wctype.h> 34 #include <fcntl.h> 35 #include <setjmp.h> 36 #include <limits.h> 37 #include <math.h> 38 #include <string.h> 39 #include <stdlib.h> 40 #include <time.h> 41 #include <sys/types.h> 42 #include <sys/wait.h> 43 #include "awk.h" 44 #include "awkgram.tab.h" 45 46 static void stdinit(void); 47 static void flush_all(void); 48 49 #if 1 50 #define tempfree(x) do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0) 51 #else 52 void tempfree(Cell *p) { 53 if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) { 54 WARNING("bad csub %d in Cell %d %s", 55 p->csub, p->ctype, p->sval); 56 } 57 if (istemp(p)) 58 tfree(p); 59 } 60 #endif 61 62 /* do we really need these? */ 63 /* #ifdef _NFILE */ 64 /* #ifndef FOPEN_MAX */ 65 /* #define FOPEN_MAX _NFILE */ 66 /* #endif */ 67 /* #endif */ 68 /* */ 69 /* #ifndef FOPEN_MAX */ 70 /* #define FOPEN_MAX 40 */ /* max number of open files */ 71 /* #endif */ 72 /* */ 73 /* #ifndef RAND_MAX */ 74 /* #define RAND_MAX 32767 */ /* all that ansi guarantees */ 75 /* #endif */ 76 77 jmp_buf env; 78 extern int pairstack[]; 79 extern Awkfloat srand_seed; 80 81 Node *winner = NULL; /* root of parse tree */ 82 Cell *tmps; /* free temporary cells for execution */ 83 84 static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL, NULL }; 85 Cell *True = &truecell; 86 static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL, NULL }; 87 Cell *False = &falsecell; 88 static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL, NULL }; 89 Cell *jbreak = &breakcell; 90 static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL, NULL }; 91 Cell *jcont = &contcell; 92 static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL, NULL }; 93 Cell *jnext = &nextcell; 94 static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL, NULL }; 95 Cell *jnextfile = &nextfilecell; 96 static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL, NULL }; 97 Cell *jexit = &exitcell; 98 static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL, NULL }; 99 Cell *jret = &retcell; 100 static Cell tempcell ={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL }; 101 102 Node *curnode = NULL; /* the node being executed, for debugging */ 103 104 /* buffer memory management */ 105 int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr, 106 const char *whatrtn) 107 /* pbuf: address of pointer to buffer being managed 108 * psiz: address of buffer size variable 109 * minlen: minimum length of buffer needed 110 * quantum: buffer size quantum 111 * pbptr: address of movable pointer into buffer, or 0 if none 112 * whatrtn: name of the calling routine if failure should cause fatal error 113 * 114 * return 0 for realloc failure, !=0 for success 115 */ 116 { 117 if (minlen > *psiz) { 118 char *tbuf; 119 int rminlen = quantum ? minlen % quantum : 0; 120 int boff = pbptr ? *pbptr - *pbuf : 0; 121 /* round up to next multiple of quantum */ 122 if (rminlen) 123 minlen += quantum - rminlen; 124 tbuf = (char *) realloc(*pbuf, minlen); 125 DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void*)*pbuf, (void*)tbuf); 126 if (tbuf == NULL) { 127 if (whatrtn) 128 FATAL("out of memory in %s", whatrtn); 129 return 0; 130 } 131 *pbuf = tbuf; 132 *psiz = minlen; 133 if (pbptr) 134 *pbptr = tbuf + boff; 135 } 136 return 1; 137 } 138 139 void run(Node *a) /* execution of parse tree starts here */ 140 { 141 142 stdinit(); 143 execute(a); 144 closeall(); 145 } 146 147 Cell *execute(Node *u) /* execute a node of the parse tree */ 148 { 149 Cell *(*proc)(Node **, int); 150 Cell *x; 151 Node *a; 152 153 if (u == NULL) 154 return(True); 155 for (a = u; ; a = a->nnext) { 156 curnode = a; 157 if (isvalue(a)) { 158 x = (Cell *) (a->narg[0]); 159 if (isfld(x) && !donefld) 160 fldbld(); 161 else if (isrec(x) && !donerec) 162 recbld(); 163 return(x); 164 } 165 if (notlegal(a->nobj)) /* probably a Cell* but too risky to print */ 166 FATAL("illegal statement"); 167 proc = proctab[a->nobj-FIRSTTOKEN]; 168 x = (*proc)(a->narg, a->nobj); 169 if (isfld(x) && !donefld) 170 fldbld(); 171 else if (isrec(x) && !donerec) 172 recbld(); 173 if (isexpr(a)) 174 return(x); 175 if (isjump(x)) 176 return(x); 177 if (a->nnext == NULL) 178 return(x); 179 tempfree(x); 180 } 181 } 182 183 184 Cell *program(Node **a, int n) /* execute an awk program */ 185 { /* a[0] = BEGIN, a[1] = body, a[2] = END */ 186 Cell *x; 187 188 if (setjmp(env) != 0) 189 goto ex; 190 if (a[0]) { /* BEGIN */ 191 x = execute(a[0]); 192 if (isexit(x)) 193 return(True); 194 if (isjump(x)) 195 FATAL("illegal break, continue, next or nextfile from BEGIN"); 196 tempfree(x); 197 } 198 if (a[1] || a[2]) 199 while (getrec(&record, &recsize, true) > 0) { 200 x = execute(a[1]); 201 if (isexit(x)) 202 break; 203 tempfree(x); 204 } 205 ex: 206 if (setjmp(env) != 0) /* handles exit within END */ 207 goto ex1; 208 if (a[2]) { /* END */ 209 x = execute(a[2]); 210 if (isbreak(x) || isnext(x) || iscont(x)) 211 FATAL("illegal break, continue, next or nextfile from END"); 212 tempfree(x); 213 } 214 ex1: 215 return(True); 216 } 217 218 struct Frame { /* stack frame for awk function calls */ 219 int nargs; /* number of arguments in this call */ 220 Cell *fcncell; /* pointer to Cell for function */ 221 Cell **args; /* pointer to array of arguments after execute */ 222 Cell *retval; /* return value */ 223 }; 224 225 #define NARGS 50 /* max args in a call */ 226 227 struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */ 228 int nframe = 0; /* number of frames allocated */ 229 struct Frame *frp = NULL; /* frame pointer. bottom level unused */ 230 231 Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ 232 { 233 static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL }; 234 int i, ncall, ndef; 235 int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */ 236 Node *x; 237 Cell *args[NARGS], *oargs[NARGS]; /* BUG: fixed size arrays */ 238 Cell *y, *z, *fcn; 239 char *s; 240 241 fcn = execute(a[0]); /* the function itself */ 242 s = fcn->nval; 243 if (!isfcn(fcn)) 244 FATAL("calling undefined function %s", s); 245 if (frame == NULL) { 246 frp = frame = (struct Frame *) calloc(nframe += 100, sizeof(*frame)); 247 if (frame == NULL) 248 FATAL("out of space for stack frames calling %s", s); 249 } 250 for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */ 251 ncall++; 252 ndef = (int) fcn->fval; /* args in defn */ 253 DPRINTF("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame)); 254 if (ncall > ndef) 255 WARNING("function %s called with %d args, uses only %d", 256 s, ncall, ndef); 257 if (ncall + ndef > NARGS) 258 FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS); 259 for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */ 260 DPRINTF("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame)); 261 y = execute(x); 262 oargs[i] = y; 263 DPRINTF("args[%d]: %s %f <%s>, t=%o\n", 264 i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval); 265 if (isfcn(y)) 266 FATAL("can't use function %s as argument in %s", y->nval, s); 267 if (isarr(y)) 268 args[i] = y; /* arrays by ref */ 269 else 270 args[i] = copycell(y); 271 tempfree(y); 272 } 273 for ( ; i < ndef; i++) { /* add null args for ones not provided */ 274 args[i] = gettemp(); 275 *args[i] = newcopycell; 276 } 277 frp++; /* now ok to up frame */ 278 if (frp >= frame + nframe) { 279 int dfp = frp - frame; /* old index */ 280 frame = (struct Frame *) realloc(frame, (nframe += 100) * sizeof(*frame)); 281 if (frame == NULL) 282 FATAL("out of space for stack frames in %s", s); 283 frp = frame + dfp; 284 } 285 frp->fcncell = fcn; 286 frp->args = args; 287 frp->nargs = ndef; /* number defined with (excess are locals) */ 288 frp->retval = gettemp(); 289 290 DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame)); 291 y = execute((Node *)(fcn->sval)); /* execute body */ 292 DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame)); 293 294 for (i = 0; i < ndef; i++) { 295 Cell *t = frp->args[i]; 296 if (isarr(t)) { 297 if (t->csub == CCOPY) { 298 if (i >= ncall) { 299 freesymtab(t); 300 t->csub = CTEMP; 301 tempfree(t); 302 } else { 303 oargs[i]->tval = t->tval; 304 oargs[i]->tval &= ~(STR|NUM|DONTFREE); 305 oargs[i]->sval = t->sval; 306 tempfree(t); 307 } 308 } 309 } else if (t != y) { /* kludge to prevent freeing twice */ 310 t->csub = CTEMP; 311 tempfree(t); 312 } else if (t == y && t->csub == CCOPY) { 313 t->csub = CTEMP; 314 tempfree(t); 315 freed = 1; 316 } 317 } 318 tempfree(fcn); 319 if (isexit(y) || isnext(y)) 320 return y; 321 if (freed == 0) { 322 tempfree(y); /* don't free twice! */ 323 } 324 z = frp->retval; /* return value */ 325 DPRINTF("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval); 326 frp--; 327 return(z); 328 } 329 330 Cell *copycell(Cell *x) /* make a copy of a cell in a temp */ 331 { 332 Cell *y; 333 334 /* copy is not constant or field */ 335 336 y = gettemp(); 337 y->tval = x->tval & ~(CON|FLD|REC); 338 y->csub = CCOPY; /* prevents freeing until call is over */ 339 y->nval = x->nval; /* BUG? */ 340 if (isstr(x) /* || x->ctype == OCELL */) { 341 y->sval = tostring(x->sval); 342 y->tval &= ~DONTFREE; 343 } else 344 y->tval |= DONTFREE; 345 y->fval = x->fval; 346 return y; 347 } 348 349 Cell *arg(Node **a, int n) /* nth argument of a function */ 350 { 351 352 n = ptoi(a[0]); /* argument number, counting from 0 */ 353 DPRINTF("arg(%d), frp->nargs=%d\n", n, frp->nargs); 354 if (n+1 > frp->nargs) 355 FATAL("argument #%d of function %s was not supplied", 356 n+1, frp->fcncell->nval); 357 return frp->args[n]; 358 } 359 360 Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */ 361 { 362 Cell *y; 363 364 switch (n) { 365 case EXIT: 366 if (a[0] != NULL) { 367 y = execute(a[0]); 368 errorflag = (int) getfval(y); 369 tempfree(y); 370 } 371 longjmp(env, 1); 372 case RETURN: 373 if (a[0] != NULL) { 374 y = execute(a[0]); 375 if ((y->tval & (STR|NUM)) == (STR|NUM)) { 376 setsval(frp->retval, getsval(y)); 377 frp->retval->fval = getfval(y); 378 frp->retval->tval |= NUM; 379 } 380 else if (y->tval & STR) 381 setsval(frp->retval, getsval(y)); 382 else if (y->tval & NUM) 383 setfval(frp->retval, getfval(y)); 384 else /* can't happen */ 385 FATAL("bad type variable %d", y->tval); 386 tempfree(y); 387 } 388 return(jret); 389 case NEXT: 390 return(jnext); 391 case NEXTFILE: 392 nextfile(); 393 return(jnextfile); 394 case BREAK: 395 return(jbreak); 396 case CONTINUE: 397 return(jcont); 398 default: /* can't happen */ 399 FATAL("illegal jump type %d", n); 400 } 401 return 0; /* not reached */ 402 } 403 404 Cell *awkgetline(Node **a, int n) /* get next line from specific input */ 405 { /* a[0] is variable, a[1] is operator, a[2] is filename */ 406 Cell *r, *x; 407 extern Cell **fldtab; 408 FILE *fp; 409 char *buf; 410 int bufsize = recsize; 411 int mode; 412 bool newflag; 413 double result; 414 415 if ((buf = (char *) malloc(bufsize)) == NULL) 416 FATAL("out of memory in getline"); 417 418 fflush(stdout); /* in case someone is waiting for a prompt */ 419 r = gettemp(); 420 if (a[1] != NULL) { /* getline < file */ 421 x = execute(a[2]); /* filename */ 422 mode = ptoi(a[1]); 423 if (mode == '|') /* input pipe */ 424 mode = LE; /* arbitrary flag */ 425 fp = openfile(mode, getsval(x), &newflag); 426 tempfree(x); 427 if (fp == NULL) 428 n = -1; 429 else 430 n = readrec(&buf, &bufsize, fp, newflag); 431 if (n <= 0) { 432 ; 433 } else if (a[0] != NULL) { /* getline var <file */ 434 x = execute(a[0]); 435 setsval(x, buf); 436 if (is_number(x->sval, & result)) { 437 x->fval = result; 438 x->tval |= NUM; 439 } 440 tempfree(x); 441 } else { /* getline <file */ 442 setsval(fldtab[0], buf); 443 if (is_number(fldtab[0]->sval, & result)) { 444 fldtab[0]->fval = result; 445 fldtab[0]->tval |= NUM; 446 } 447 } 448 } else { /* bare getline; use current input */ 449 if (a[0] == NULL) /* getline */ 450 n = getrec(&record, &recsize, true); 451 else { /* getline var */ 452 n = getrec(&buf, &bufsize, false); 453 x = execute(a[0]); 454 setsval(x, buf); 455 if (is_number(x->sval, & result)) { 456 x->fval = result; 457 x->tval |= NUM; 458 } 459 tempfree(x); 460 } 461 } 462 setfval(r, (Awkfloat) n); 463 free(buf); 464 return r; 465 } 466 467 Cell *getnf(Node **a, int n) /* get NF */ 468 { 469 if (!donefld) 470 fldbld(); 471 return (Cell *) a[0]; 472 } 473 474 static char * 475 makearraystring(Node *p, const char *func) 476 { 477 char *buf; 478 int bufsz = recsize; 479 size_t blen; 480 481 if ((buf = (char *) malloc(bufsz)) == NULL) { 482 FATAL("%s: out of memory", func); 483 } 484 485 blen = 0; 486 buf[blen] = '\0'; 487 488 for (; p; p = p->nnext) { 489 Cell *x = execute(p); /* expr */ 490 char *s = getsval(x); 491 size_t seplen = strlen(getsval(subseploc)); 492 size_t nsub = p->nnext ? seplen : 0; 493 size_t slen = strlen(s); 494 size_t tlen = blen + slen + nsub; 495 496 if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) { 497 FATAL("%s: out of memory %s[%s...]", 498 func, x->nval, buf); 499 } 500 memcpy(buf + blen, s, slen); 501 if (nsub) { 502 memcpy(buf + blen + slen, *SUBSEP, nsub); 503 } 504 buf[tlen] = '\0'; 505 blen = tlen; 506 tempfree(x); 507 } 508 return buf; 509 } 510 511 Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ 512 { 513 Cell *x, *z; 514 char *buf; 515 516 x = execute(a[0]); /* Cell* for symbol table */ 517 buf = makearraystring(a[1], __func__); 518 if (!isarr(x)) { 519 DPRINTF("making %s into an array\n", NN(x->nval)); 520 if (freeable(x)) 521 xfree(x->sval); 522 x->tval &= ~(STR|NUM|DONTFREE); 523 x->tval |= ARR; 524 x->sval = (char *) makesymtab(NSYMTAB); 525 } 526 z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval); 527 z->ctype = OCELL; 528 z->csub = CVAR; 529 tempfree(x); 530 free(buf); 531 return(z); 532 } 533 534 Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ 535 { 536 Cell *x; 537 538 x = execute(a[0]); /* Cell* for symbol table */ 539 if (x == symtabloc) { 540 FATAL("cannot delete SYMTAB or its elements"); 541 } 542 if (!isarr(x)) 543 return True; 544 if (a[1] == NULL) { /* delete the elements, not the table */ 545 freesymtab(x); 546 x->tval &= ~STR; 547 x->tval |= ARR; 548 x->sval = (char *) makesymtab(NSYMTAB); 549 } else { 550 char *buf = makearraystring(a[1], __func__); 551 freeelem(x, buf); 552 free(buf); 553 } 554 tempfree(x); 555 return True; 556 } 557 558 Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */ 559 { 560 Cell *ap, *k; 561 char *buf; 562 563 ap = execute(a[1]); /* array name */ 564 if (!isarr(ap)) { 565 DPRINTF("making %s into an array\n", ap->nval); 566 if (freeable(ap)) 567 xfree(ap->sval); 568 ap->tval &= ~(STR|NUM|DONTFREE); 569 ap->tval |= ARR; 570 ap->sval = (char *) makesymtab(NSYMTAB); 571 } 572 buf = makearraystring(a[0], __func__); 573 k = lookup(buf, (Array *) ap->sval); 574 tempfree(ap); 575 free(buf); 576 if (k == NULL) 577 return(False); 578 else 579 return(True); 580 } 581 582 583 Cell *matchop(Node **a, int n) /* ~ and match() */ 584 { 585 Cell *x, *y; 586 char *s, *t; 587 int i; 588 fa *pfa; 589 int (*mf)(fa *, const char *) = match, mode = 0; 590 591 if (n == MATCHFCN) { 592 mf = pmatch; 593 mode = 1; 594 } 595 x = execute(a[1]); /* a[1] = target text */ 596 s = getsval(x); 597 if (a[0] == NULL) /* a[1] == 0: already-compiled reg expr */ 598 i = (*mf)((fa *) a[2], s); 599 else { 600 y = execute(a[2]); /* a[2] = regular expr */ 601 t = getsval(y); 602 pfa = makedfa(t, mode); 603 i = (*mf)(pfa, s); 604 tempfree(y); 605 } 606 tempfree(x); 607 if (n == MATCHFCN) { 608 int start = patbeg - s + 1; 609 if (patlen < 0) 610 start = 0; 611 setfval(rstartloc, (Awkfloat) start); 612 setfval(rlengthloc, (Awkfloat) patlen); 613 x = gettemp(); 614 x->tval = NUM; 615 x->fval = start; 616 return x; 617 } else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0)) 618 return(True); 619 else 620 return(False); 621 } 622 623 624 Cell *boolop(Node **a, int n) /* a[0] || a[1], a[0] && a[1], !a[0] */ 625 { 626 Cell *x, *y; 627 int i; 628 629 x = execute(a[0]); 630 i = istrue(x); 631 tempfree(x); 632 switch (n) { 633 case BOR: 634 if (i) return(True); 635 y = execute(a[1]); 636 i = istrue(y); 637 tempfree(y); 638 if (i) return(True); 639 else return(False); 640 case AND: 641 if ( !i ) return(False); 642 y = execute(a[1]); 643 i = istrue(y); 644 tempfree(y); 645 if (i) return(True); 646 else return(False); 647 case NOT: 648 if (i) return(False); 649 else return(True); 650 default: /* can't happen */ 651 FATAL("unknown boolean operator %d", n); 652 } 653 return 0; /*NOTREACHED*/ 654 } 655 656 Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */ 657 { 658 int i; 659 Cell *x, *y; 660 Awkfloat j; 661 662 x = execute(a[0]); 663 y = execute(a[1]); 664 if (x->tval&NUM && y->tval&NUM) { 665 j = x->fval - y->fval; 666 i = j<0? -1: (j>0? 1: 0); 667 } else { 668 i = strcoll(getsval(x), getsval(y)); 669 } 670 tempfree(x); 671 tempfree(y); 672 switch (n) { 673 case LT: if (i<0) return(True); 674 else return(False); 675 case LE: if (i<=0) return(True); 676 else return(False); 677 case NE: if (i!=0) return(True); 678 else return(False); 679 case EQ: if (i == 0) return(True); 680 else return(False); 681 case GE: if (i>=0) return(True); 682 else return(False); 683 case GT: if (i>0) return(True); 684 else return(False); 685 default: /* can't happen */ 686 FATAL("unknown relational operator %d", n); 687 } 688 return 0; /*NOTREACHED*/ 689 } 690 691 void tfree(Cell *a) /* free a tempcell */ 692 { 693 if (freeable(a)) { 694 DPRINTF("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval); 695 xfree(a->sval); 696 } 697 if (a == tmps) 698 FATAL("tempcell list is curdled"); 699 a->cnext = tmps; 700 tmps = a; 701 } 702 703 Cell *gettemp(void) /* get a tempcell */ 704 { int i; 705 Cell *x; 706 707 if (!tmps) { 708 tmps = (Cell *) calloc(100, sizeof(*tmps)); 709 if (!tmps) 710 FATAL("out of space for temporaries"); 711 for (i = 1; i < 100; i++) 712 tmps[i-1].cnext = &tmps[i]; 713 tmps[i-1].cnext = NULL; 714 } 715 x = tmps; 716 tmps = x->cnext; 717 *x = tempcell; 718 return(x); 719 } 720 721 Cell *indirect(Node **a, int n) /* $( a[0] ) */ 722 { 723 Awkfloat val; 724 Cell *x; 725 int m; 726 char *s; 727 728 x = execute(a[0]); 729 val = getfval(x); /* freebsd: defend against super large field numbers */ 730 if ((Awkfloat)INT_MAX < val) 731 FATAL("trying to access out of range field %s", x->nval); 732 m = (int) val; 733 if (m == 0 && !is_number(s = getsval(x), NULL)) /* suspicion! */ 734 FATAL("illegal field $(%s), name \"%s\"", s, x->nval); 735 /* BUG: can x->nval ever be null??? */ 736 tempfree(x); 737 x = fieldadr(m); 738 x->ctype = OCELL; /* BUG? why are these needed? */ 739 x->csub = CFLD; 740 return(x); 741 } 742 743 Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */ 744 { 745 int k, m, n; 746 char *s; 747 int temp; 748 Cell *x, *y, *z = NULL; 749 750 x = execute(a[0]); 751 y = execute(a[1]); 752 if (a[2] != NULL) 753 z = execute(a[2]); 754 s = getsval(x); 755 k = strlen(s) + 1; 756 if (k <= 1) { 757 tempfree(x); 758 tempfree(y); 759 if (a[2] != NULL) { 760 tempfree(z); 761 } 762 x = gettemp(); 763 setsval(x, ""); 764 return(x); 765 } 766 m = (int) getfval(y); 767 if (m <= 0) 768 m = 1; 769 else if (m > k) 770 m = k; 771 tempfree(y); 772 if (a[2] != NULL) { 773 n = (int) getfval(z); 774 tempfree(z); 775 } else 776 n = k - 1; 777 if (n < 0) 778 n = 0; 779 else if (n > k - m) 780 n = k - m; 781 DPRINTF("substr: m=%d, n=%d, s=%s\n", m, n, s); 782 y = gettemp(); 783 temp = s[n+m-1]; /* with thanks to John Linderman */ 784 s[n+m-1] = '\0'; 785 setsval(y, s + m - 1); 786 s[n+m-1] = temp; 787 tempfree(x); 788 return(y); 789 } 790 791 Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */ 792 { 793 Cell *x, *y, *z; 794 char *s1, *s2, *p1, *p2, *q; 795 Awkfloat v = 0.0; 796 797 x = execute(a[0]); 798 s1 = getsval(x); 799 y = execute(a[1]); 800 s2 = getsval(y); 801 802 z = gettemp(); 803 for (p1 = s1; *p1 != '\0'; p1++) { 804 for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++) 805 continue; 806 if (*p2 == '\0') { 807 v = (Awkfloat) (p1 - s1 + 1); /* origin 1 */ 808 break; 809 } 810 } 811 tempfree(x); 812 tempfree(y); 813 setfval(z, v); 814 return(z); 815 } 816 817 #define MAXNUMSIZE 50 818 819 int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like conversions */ 820 { 821 char *fmt; 822 char *p, *t; 823 const char *os; 824 Cell *x; 825 int flag = 0, n; 826 int fmtwd; /* format width */ 827 int fmtsz = recsize; 828 char *buf = *pbuf; 829 int bufsize = *pbufsize; 830 #define FMTSZ(a) (fmtsz - ((a) - fmt)) 831 #define BUFSZ(a) (bufsize - ((a) - buf)) 832 833 static bool first = true; 834 static bool have_a_format = false; 835 836 if (first) { 837 char xbuf[100]; 838 839 snprintf(xbuf, sizeof(xbuf), "%a", 42.0); 840 have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0); 841 first = false; 842 } 843 844 os = s; 845 p = buf; 846 if ((fmt = (char *) malloc(fmtsz)) == NULL) 847 FATAL("out of memory in format()"); 848 while (*s) { 849 adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1"); 850 if (*s != '%') { 851 *p++ = *s++; 852 continue; 853 } 854 if (*(s+1) == '%') { 855 *p++ = '%'; 856 s += 2; 857 continue; 858 } 859 /* have to be real careful in case this is a huge number, eg, %100000d */ 860 fmtwd = atoi(s+1); 861 if (fmtwd < 0) 862 fmtwd = -fmtwd; 863 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2"); 864 for (t = fmt; (*t++ = *s) != '\0'; s++) { 865 if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3")) 866 FATAL("format item %.30s... ran format() out of memory", os); 867 /* Ignore size specifiers */ 868 if (strchr("hjLlqtz", *s) != NULL) { /* the ansi panoply */ 869 t--; 870 continue; 871 } 872 if (isalpha((uschar)*s)) 873 break; 874 if (*s == '$') { 875 FATAL("'$' not permitted in awk formats"); 876 } 877 if (*s == '*') { 878 if (a == NULL) { 879 FATAL("not enough args in printf(%s)", os); 880 } 881 x = execute(a); 882 a = a->nnext; 883 snprintf(t - 1, FMTSZ(t - 1), 884 "%d", fmtwd=(int) getfval(x)); 885 if (fmtwd < 0) 886 fmtwd = -fmtwd; 887 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format"); 888 t = fmt + strlen(fmt); 889 tempfree(x); 890 } 891 } 892 *t = '\0'; 893 if (fmtwd < 0) 894 fmtwd = -fmtwd; 895 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4"); 896 switch (*s) { 897 case 'a': case 'A': 898 if (have_a_format) 899 flag = *s; 900 else 901 flag = 'f'; 902 break; 903 case 'f': case 'e': case 'g': case 'E': case 'G': 904 flag = 'f'; 905 break; 906 case 'd': case 'i': case 'o': case 'x': case 'X': case 'u': 907 flag = (*s == 'd' || *s == 'i') ? 'd' : 'u'; 908 *(t-1) = 'j'; 909 *t = *s; 910 *++t = '\0'; 911 break; 912 case 's': 913 flag = 's'; 914 break; 915 case 'c': 916 flag = 'c'; 917 break; 918 default: 919 WARNING("weird printf conversion %s", fmt); 920 flag = '?'; 921 break; 922 } 923 if (a == NULL) 924 FATAL("not enough args in printf(%s)", os); 925 x = execute(a); 926 a = a->nnext; 927 n = MAXNUMSIZE; 928 if (fmtwd > n) 929 n = fmtwd; 930 adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5"); 931 switch (flag) { 932 case '?': snprintf(p, BUFSZ(p), "%s", fmt); /* unknown, so dump it too */ 933 t = getsval(x); 934 n = strlen(t); 935 if (fmtwd > n) 936 n = fmtwd; 937 adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6"); 938 p += strlen(p); 939 snprintf(p, BUFSZ(p), "%s", t); 940 break; 941 case 'a': 942 case 'A': 943 case 'f': snprintf(p, BUFSZ(p), fmt, getfval(x)); break; 944 case 'd': snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break; 945 case 'u': snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break; 946 case 's': 947 t = getsval(x); 948 n = strlen(t); 949 if (fmtwd > n) 950 n = fmtwd; 951 if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7")) 952 FATAL("huge string/format (%d chars) in printf %.30s... ran format() out of memory", n, t); 953 snprintf(p, BUFSZ(p), fmt, t); 954 break; 955 case 'c': 956 if (isnum(x)) { 957 if ((int)getfval(x)) 958 snprintf(p, BUFSZ(p), fmt, (int) getfval(x)); 959 else { 960 *p++ = '\0'; /* explicit null byte */ 961 *p = '\0'; /* next output will start here */ 962 } 963 } else 964 snprintf(p, BUFSZ(p), fmt, getsval(x)[0]); 965 break; 966 default: 967 FATAL("can't happen: bad conversion %c in format()", flag); 968 } 969 tempfree(x); 970 p += strlen(p); 971 s++; 972 } 973 *p = '\0'; 974 free(fmt); 975 for ( ; a; a = a->nnext) /* evaluate any remaining args */ 976 execute(a); 977 *pbuf = buf; 978 *pbufsize = bufsize; 979 return p - buf; 980 } 981 982 Cell *awksprintf(Node **a, int n) /* sprintf(a[0]) */ 983 { 984 Cell *x; 985 Node *y; 986 char *buf; 987 int bufsz=3*recsize; 988 989 if ((buf = (char *) malloc(bufsz)) == NULL) 990 FATAL("out of memory in awksprintf"); 991 y = a[0]->nnext; 992 x = execute(a[0]); 993 if (format(&buf, &bufsz, getsval(x), y) == -1) 994 FATAL("sprintf string %.30s... too long. can't happen.", buf); 995 tempfree(x); 996 x = gettemp(); 997 x->sval = buf; 998 x->tval = STR; 999 return(x); 1000 } 1001 1002 Cell *awkprintf(Node **a, int n) /* printf */ 1003 { /* a[0] is list of args, starting with format string */ 1004 /* a[1] is redirection operator, a[2] is redirection file */ 1005 FILE *fp; 1006 Cell *x; 1007 Node *y; 1008 char *buf; 1009 int len; 1010 int bufsz=3*recsize; 1011 1012 if ((buf = (char *) malloc(bufsz)) == NULL) 1013 FATAL("out of memory in awkprintf"); 1014 y = a[0]->nnext; 1015 x = execute(a[0]); 1016 if ((len = format(&buf, &bufsz, getsval(x), y)) == -1) 1017 FATAL("printf string %.30s... too long. can't happen.", buf); 1018 tempfree(x); 1019 if (a[1] == NULL) { 1020 /* fputs(buf, stdout); */ 1021 fwrite(buf, len, 1, stdout); 1022 if (ferror(stdout)) 1023 FATAL("write error on stdout"); 1024 } else { 1025 fp = redirect(ptoi(a[1]), a[2]); 1026 /* fputs(buf, fp); */ 1027 fwrite(buf, len, 1, fp); 1028 fflush(fp); 1029 if (ferror(fp)) 1030 FATAL("write error on %s", filename(fp)); 1031 } 1032 free(buf); 1033 return(True); 1034 } 1035 1036 Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */ 1037 { 1038 Awkfloat i, j = 0; 1039 double v; 1040 Cell *x, *y, *z; 1041 1042 x = execute(a[0]); 1043 i = getfval(x); 1044 tempfree(x); 1045 if (n != UMINUS && n != UPLUS) { 1046 y = execute(a[1]); 1047 j = getfval(y); 1048 tempfree(y); 1049 } 1050 z = gettemp(); 1051 switch (n) { 1052 case ADD: 1053 i += j; 1054 break; 1055 case MINUS: 1056 i -= j; 1057 break; 1058 case MULT: 1059 i *= j; 1060 break; 1061 case DIVIDE: 1062 if (j == 0) 1063 FATAL("division by zero"); 1064 i /= j; 1065 break; 1066 case MOD: 1067 if (j == 0) 1068 FATAL("division by zero in mod"); 1069 modf(i/j, &v); 1070 i = i - j * v; 1071 break; 1072 case UMINUS: 1073 i = -i; 1074 break; 1075 case UPLUS: /* handled by getfval(), above */ 1076 break; 1077 case POWER: 1078 if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */ 1079 i = ipow(i, (int) j); 1080 else { 1081 errno = 0; 1082 i = errcheck(pow(i, j), "pow"); 1083 } 1084 break; 1085 default: /* can't happen */ 1086 FATAL("illegal arithmetic operator %d", n); 1087 } 1088 setfval(z, i); 1089 return(z); 1090 } 1091 1092 double ipow(double x, int n) /* x**n. ought to be done by pow, but isn't always */ 1093 { 1094 double v; 1095 1096 if (n <= 0) 1097 return 1; 1098 v = ipow(x, n/2); 1099 if (n % 2 == 0) 1100 return v * v; 1101 else 1102 return x * v * v; 1103 } 1104 1105 Cell *incrdecr(Node **a, int n) /* a[0]++, etc. */ 1106 { 1107 Cell *x, *z; 1108 int k; 1109 Awkfloat xf; 1110 1111 x = execute(a[0]); 1112 xf = getfval(x); 1113 k = (n == PREINCR || n == POSTINCR) ? 1 : -1; 1114 if (n == PREINCR || n == PREDECR) { 1115 setfval(x, xf + k); 1116 return(x); 1117 } 1118 z = gettemp(); 1119 setfval(z, xf); 1120 setfval(x, xf + k); 1121 tempfree(x); 1122 return(z); 1123 } 1124 1125 Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */ 1126 { /* this is subtle; don't muck with it. */ 1127 Cell *x, *y; 1128 Awkfloat xf, yf; 1129 double v; 1130 1131 y = execute(a[1]); 1132 x = execute(a[0]); 1133 if (n == ASSIGN) { /* ordinary assignment */ 1134 if (x == y && !(x->tval & (FLD|REC)) && x != nfloc) 1135 ; /* self-assignment: leave alone unless it's a field or NF */ 1136 else if ((y->tval & (STR|NUM)) == (STR|NUM)) { 1137 setsval(x, getsval(y)); 1138 x->fval = getfval(y); 1139 x->tval |= NUM; 1140 } 1141 else if (isstr(y)) 1142 setsval(x, getsval(y)); 1143 else if (isnum(y)) 1144 setfval(x, getfval(y)); 1145 else 1146 funnyvar(y, "read value of"); 1147 tempfree(y); 1148 return(x); 1149 } 1150 xf = getfval(x); 1151 yf = getfval(y); 1152 switch (n) { 1153 case ADDEQ: 1154 xf += yf; 1155 break; 1156 case SUBEQ: 1157 xf -= yf; 1158 break; 1159 case MULTEQ: 1160 xf *= yf; 1161 break; 1162 case DIVEQ: 1163 if (yf == 0) 1164 FATAL("division by zero in /="); 1165 xf /= yf; 1166 break; 1167 case MODEQ: 1168 if (yf == 0) 1169 FATAL("division by zero in %%="); 1170 modf(xf/yf, &v); 1171 xf = xf - yf * v; 1172 break; 1173 case POWEQ: 1174 if (yf >= 0 && modf(yf, &v) == 0.0) /* pos integer exponent */ 1175 xf = ipow(xf, (int) yf); 1176 else { 1177 errno = 0; 1178 xf = errcheck(pow(xf, yf), "pow"); 1179 } 1180 break; 1181 default: 1182 FATAL("illegal assignment operator %d", n); 1183 break; 1184 } 1185 tempfree(y); 1186 setfval(x, xf); 1187 return(x); 1188 } 1189 1190 Cell *cat(Node **a, int q) /* a[0] cat a[1] */ 1191 { 1192 Cell *x, *y, *z; 1193 int n1, n2; 1194 char *s = NULL; 1195 int ssz = 0; 1196 1197 x = execute(a[0]); 1198 n1 = strlen(getsval(x)); 1199 adjbuf(&s, &ssz, n1, recsize, 0, "cat1"); 1200 memcpy(s, x->sval, n1); 1201 1202 y = execute(a[1]); 1203 n2 = strlen(getsval(y)); 1204 adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2"); 1205 memcpy(s + n1, y->sval, n2); 1206 s[n1 + n2] = '\0'; 1207 1208 tempfree(x); 1209 tempfree(y); 1210 1211 z = gettemp(); 1212 z->sval = s; 1213 z->tval = STR; 1214 1215 return(z); 1216 } 1217 1218 Cell *pastat(Node **a, int n) /* a[0] { a[1] } */ 1219 { 1220 Cell *x; 1221 1222 if (a[0] == NULL) 1223 x = execute(a[1]); 1224 else { 1225 x = execute(a[0]); 1226 if (istrue(x)) { 1227 tempfree(x); 1228 x = execute(a[1]); 1229 } 1230 } 1231 return x; 1232 } 1233 1234 Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */ 1235 { 1236 Cell *x; 1237 int pair; 1238 1239 pair = ptoi(a[3]); 1240 if (pairstack[pair] == 0) { 1241 x = execute(a[0]); 1242 if (istrue(x)) 1243 pairstack[pair] = 1; 1244 tempfree(x); 1245 } 1246 if (pairstack[pair] == 1) { 1247 x = execute(a[1]); 1248 if (istrue(x)) 1249 pairstack[pair] = 0; 1250 tempfree(x); 1251 x = execute(a[2]); 1252 return(x); 1253 } 1254 return(False); 1255 } 1256 1257 Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ 1258 { 1259 Cell *x = NULL, *y, *ap; 1260 const char *s, *origs, *t; 1261 const char *fs = NULL; 1262 char *origfs = NULL; 1263 int sep; 1264 char temp, num[50]; 1265 int n, tempstat, arg3type; 1266 double result; 1267 1268 y = execute(a[0]); /* source string */ 1269 origs = s = strdup(getsval(y)); 1270 arg3type = ptoi(a[3]); 1271 if (a[2] == NULL) /* fs string */ 1272 fs = getsval(fsloc); 1273 else if (arg3type == STRING) { /* split(str,arr,"string") */ 1274 x = execute(a[2]); 1275 fs = origfs = strdup(getsval(x)); 1276 tempfree(x); 1277 } else if (arg3type == REGEXPR) 1278 fs = "(regexpr)"; /* split(str,arr,/regexpr/) */ 1279 else 1280 FATAL("illegal type of split"); 1281 sep = *fs; 1282 ap = execute(a[1]); /* array name */ 1283 freesymtab(ap); 1284 DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs); 1285 ap->tval &= ~STR; 1286 ap->tval |= ARR; 1287 ap->sval = (char *) makesymtab(NSYMTAB); 1288 1289 n = 0; 1290 if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) { 1291 /* split(s, a, //); have to arrange that it looks like empty sep */ 1292 arg3type = 0; 1293 fs = ""; 1294 sep = 0; 1295 } 1296 if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) { /* reg expr */ 1297 fa *pfa; 1298 if (arg3type == REGEXPR) { /* it's ready already */ 1299 pfa = (fa *) a[2]; 1300 } else { 1301 pfa = makedfa(fs, 1); 1302 } 1303 if (nematch(pfa,s)) { 1304 tempstat = pfa->initstat; 1305 pfa->initstat = 2; 1306 do { 1307 n++; 1308 snprintf(num, sizeof(num), "%d", n); 1309 temp = *patbeg; 1310 setptr(patbeg, '\0'); 1311 if (is_number(s, & result)) 1312 setsymtab(num, s, result, STR|NUM, (Array *) ap->sval); 1313 else 1314 setsymtab(num, s, 0.0, STR, (Array *) ap->sval); 1315 setptr(patbeg, temp); 1316 s = patbeg + patlen; 1317 if (*(patbeg+patlen-1) == '\0' || *s == '\0') { 1318 n++; 1319 snprintf(num, sizeof(num), "%d", n); 1320 setsymtab(num, "", 0.0, STR, (Array *) ap->sval); 1321 pfa->initstat = tempstat; 1322 goto spdone; 1323 } 1324 } while (nematch(pfa,s)); 1325 pfa->initstat = tempstat; /* bwk: has to be here to reset */ 1326 /* cf gsub and refldbld */ 1327 } 1328 n++; 1329 snprintf(num, sizeof(num), "%d", n); 1330 if (is_number(s, & result)) 1331 setsymtab(num, s, result, STR|NUM, (Array *) ap->sval); 1332 else 1333 setsymtab(num, s, 0.0, STR, (Array *) ap->sval); 1334 spdone: 1335 pfa = NULL; 1336 } else if (sep == ' ') { 1337 for (n = 0; ; ) { 1338 #define ISWS(c) ((c) == ' ' || (c) == '\t' || (c) == '\n') 1339 while (ISWS(*s)) 1340 s++; 1341 if (*s == '\0') 1342 break; 1343 n++; 1344 t = s; 1345 do 1346 s++; 1347 while (*s != '\0' && !ISWS(*s)); 1348 temp = *s; 1349 setptr(s, '\0'); 1350 snprintf(num, sizeof(num), "%d", n); 1351 if (is_number(t, & result)) 1352 setsymtab(num, t, result, STR|NUM, (Array *) ap->sval); 1353 else 1354 setsymtab(num, t, 0.0, STR, (Array *) ap->sval); 1355 setptr(s, temp); 1356 if (*s != '\0') 1357 s++; 1358 } 1359 } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */ 1360 for (n = 0; *s != '\0'; s++) { 1361 char buf[2]; 1362 n++; 1363 snprintf(num, sizeof(num), "%d", n); 1364 buf[0] = *s; 1365 buf[1] = '\0'; 1366 if (isdigit((uschar)buf[0])) 1367 setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval); 1368 else 1369 setsymtab(num, buf, 0.0, STR, (Array *) ap->sval); 1370 } 1371 } else if (*s != '\0') { 1372 for (;;) { 1373 n++; 1374 t = s; 1375 while (*s != sep && *s != '\n' && *s != '\0') 1376 s++; 1377 temp = *s; 1378 setptr(s, '\0'); 1379 snprintf(num, sizeof(num), "%d", n); 1380 if (is_number(t, & result)) 1381 setsymtab(num, t, result, STR|NUM, (Array *) ap->sval); 1382 else 1383 setsymtab(num, t, 0.0, STR, (Array *) ap->sval); 1384 setptr(s, temp); 1385 if (*s++ == '\0') 1386 break; 1387 } 1388 } 1389 tempfree(ap); 1390 tempfree(y); 1391 xfree(origs); 1392 xfree(origfs); 1393 x = gettemp(); 1394 x->tval = NUM; 1395 x->fval = n; 1396 return(x); 1397 } 1398 1399 Cell *condexpr(Node **a, int n) /* a[0] ? a[1] : a[2] */ 1400 { 1401 Cell *x; 1402 1403 x = execute(a[0]); 1404 if (istrue(x)) { 1405 tempfree(x); 1406 x = execute(a[1]); 1407 } else { 1408 tempfree(x); 1409 x = execute(a[2]); 1410 } 1411 return(x); 1412 } 1413 1414 Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */ 1415 { 1416 Cell *x; 1417 1418 x = execute(a[0]); 1419 if (istrue(x)) { 1420 tempfree(x); 1421 x = execute(a[1]); 1422 } else if (a[2] != NULL) { 1423 tempfree(x); 1424 x = execute(a[2]); 1425 } 1426 return(x); 1427 } 1428 1429 Cell *whilestat(Node **a, int n) /* while (a[0]) a[1] */ 1430 { 1431 Cell *x; 1432 1433 for (;;) { 1434 x = execute(a[0]); 1435 if (!istrue(x)) 1436 return(x); 1437 tempfree(x); 1438 x = execute(a[1]); 1439 if (isbreak(x)) { 1440 x = True; 1441 return(x); 1442 } 1443 if (isnext(x) || isexit(x) || isret(x)) 1444 return(x); 1445 tempfree(x); 1446 } 1447 } 1448 1449 Cell *dostat(Node **a, int n) /* do a[0]; while(a[1]) */ 1450 { 1451 Cell *x; 1452 1453 for (;;) { 1454 x = execute(a[0]); 1455 if (isbreak(x)) 1456 return True; 1457 if (isnext(x) || isexit(x) || isret(x)) 1458 return(x); 1459 tempfree(x); 1460 x = execute(a[1]); 1461 if (!istrue(x)) 1462 return(x); 1463 tempfree(x); 1464 } 1465 } 1466 1467 Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */ 1468 { 1469 Cell *x; 1470 1471 x = execute(a[0]); 1472 tempfree(x); 1473 for (;;) { 1474 if (a[1]!=NULL) { 1475 x = execute(a[1]); 1476 if (!istrue(x)) return(x); 1477 else tempfree(x); 1478 } 1479 x = execute(a[3]); 1480 if (isbreak(x)) /* turn off break */ 1481 return True; 1482 if (isnext(x) || isexit(x) || isret(x)) 1483 return(x); 1484 tempfree(x); 1485 x = execute(a[2]); 1486 tempfree(x); 1487 } 1488 } 1489 1490 Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */ 1491 { 1492 Cell *x, *vp, *arrayp, *cp, *ncp; 1493 Array *tp; 1494 int i; 1495 1496 vp = execute(a[0]); 1497 arrayp = execute(a[1]); 1498 if (!isarr(arrayp)) { 1499 return True; 1500 } 1501 tp = (Array *) arrayp->sval; 1502 tempfree(arrayp); 1503 for (i = 0; i < tp->size; i++) { /* this routine knows too much */ 1504 for (cp = tp->tab[i]; cp != NULL; cp = ncp) { 1505 setsval(vp, cp->nval); 1506 ncp = cp->cnext; 1507 x = execute(a[2]); 1508 if (isbreak(x)) { 1509 tempfree(vp); 1510 return True; 1511 } 1512 if (isnext(x) || isexit(x) || isret(x)) { 1513 tempfree(vp); 1514 return(x); 1515 } 1516 tempfree(x); 1517 } 1518 } 1519 return True; 1520 } 1521 1522 static char *nawk_convert(const char *s, int (*fun_c)(int), 1523 wint_t (*fun_wc)(wint_t)) 1524 { 1525 char *buf = NULL; 1526 char *pbuf = NULL; 1527 const char *ps = NULL; 1528 size_t n = 0; 1529 wchar_t wc; 1530 size_t sz = MB_CUR_MAX; 1531 1532 if (sz == 1) { 1533 buf = tostring(s); 1534 1535 for (pbuf = buf; *pbuf; pbuf++) 1536 *pbuf = fun_c((uschar)*pbuf); 1537 1538 return buf; 1539 } else { 1540 /* upper/lower character may be shorter/longer */ 1541 buf = tostringN(s, strlen(s) * sz + 1); 1542 1543 (void) mbtowc(NULL, NULL, 0); /* reset internal state */ 1544 /* 1545 * Reset internal state here too. 1546 * Assign result to avoid a compiler warning. (Casting to void 1547 * doesn't work.) 1548 * Increment said variable to avoid a different warning. 1549 */ 1550 int unused = wctomb(NULL, L'\0'); 1551 unused++; 1552 1553 ps = s; 1554 pbuf = buf; 1555 while (n = mbtowc(&wc, ps, sz), 1556 n > 0 && n != (size_t)-1 && n != (size_t)-2) 1557 { 1558 ps += n; 1559 1560 n = wctomb(pbuf, fun_wc(wc)); 1561 if (n == (size_t)-1) 1562 FATAL("illegal wide character %s", s); 1563 1564 pbuf += n; 1565 } 1566 1567 *pbuf = '\0'; 1568 1569 if (n) 1570 FATAL("illegal byte sequence %s", s); 1571 1572 return buf; 1573 } 1574 } 1575 1576 #ifdef __DJGPP__ 1577 static wint_t towupper(wint_t wc) 1578 { 1579 if (wc >= 0 && wc < 256) 1580 return toupper(wc & 0xFF); 1581 1582 return wc; 1583 } 1584 1585 static wint_t towlower(wint_t wc) 1586 { 1587 if (wc >= 0 && wc < 256) 1588 return tolower(wc & 0xFF); 1589 1590 return wc; 1591 } 1592 #endif 1593 1594 static char *nawk_toupper(const char *s) 1595 { 1596 return nawk_convert(s, toupper, towupper); 1597 } 1598 1599 static char *nawk_tolower(const char *s) 1600 { 1601 return nawk_convert(s, tolower, towlower); 1602 } 1603 1604 Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */ 1605 { 1606 Cell *x, *y; 1607 Awkfloat u; 1608 int t, sz; 1609 Awkfloat tmp; 1610 char *buf, *fmt; 1611 Node *nextarg; 1612 FILE *fp; 1613 int status = 0; 1614 time_t tv; 1615 struct tm *tm; 1616 1617 t = ptoi(a[0]); 1618 x = execute(a[1]); 1619 nextarg = a[1]->nnext; 1620 switch (t) { 1621 case FLENGTH: 1622 if (isarr(x)) 1623 u = ((Array *) x->sval)->nelem; /* GROT. should be function*/ 1624 else 1625 u = strlen(getsval(x)); 1626 break; 1627 case FLOG: 1628 errno = 0; 1629 u = errcheck(log(getfval(x)), "log"); 1630 break; 1631 case FINT: 1632 modf(getfval(x), &u); break; 1633 case FEXP: 1634 errno = 0; 1635 u = errcheck(exp(getfval(x)), "exp"); 1636 break; 1637 case FSQRT: 1638 errno = 0; 1639 u = errcheck(sqrt(getfval(x)), "sqrt"); 1640 break; 1641 case FSIN: 1642 u = sin(getfval(x)); break; 1643 case FCOS: 1644 u = cos(getfval(x)); break; 1645 case FATAN: 1646 if (nextarg == NULL) { 1647 WARNING("atan2 requires two arguments; returning 1.0"); 1648 u = 1.0; 1649 } else { 1650 y = execute(a[1]->nnext); 1651 u = atan2(getfval(x), getfval(y)); 1652 tempfree(y); 1653 nextarg = nextarg->nnext; 1654 } 1655 break; 1656 case FCOMPL: 1657 u = ~((int)getfval(x)); 1658 break; 1659 case FAND: 1660 if (nextarg == 0) { 1661 WARNING("and requires two arguments; returning 0"); 1662 u = 0; 1663 break; 1664 } 1665 y = execute(a[1]->nnext); 1666 u = ((int)getfval(x)) & ((int)getfval(y)); 1667 tempfree(y); 1668 nextarg = nextarg->nnext; 1669 break; 1670 case FFOR: 1671 if (nextarg == 0) { 1672 WARNING("or requires two arguments; returning 0"); 1673 u = 0; 1674 break; 1675 } 1676 y = execute(a[1]->nnext); 1677 u = ((int)getfval(x)) | ((int)getfval(y)); 1678 tempfree(y); 1679 nextarg = nextarg->nnext; 1680 break; 1681 case FXOR: 1682 if (nextarg == 0) { 1683 WARNING("xor requires two arguments; returning 0"); 1684 u = 0; 1685 break; 1686 } 1687 y = execute(a[1]->nnext); 1688 u = ((int)getfval(x)) ^ ((int)getfval(y)); 1689 tempfree(y); 1690 nextarg = nextarg->nnext; 1691 break; 1692 case FLSHIFT: 1693 if (nextarg == 0) { 1694 WARNING("lshift requires two arguments; returning 0"); 1695 u = 0; 1696 break; 1697 } 1698 y = execute(a[1]->nnext); 1699 u = ((int)getfval(x)) << ((int)getfval(y)); 1700 tempfree(y); 1701 nextarg = nextarg->nnext; 1702 break; 1703 case FRSHIFT: 1704 if (nextarg == 0) { 1705 WARNING("rshift requires two arguments; returning 0"); 1706 u = 0; 1707 break; 1708 } 1709 y = execute(a[1]->nnext); 1710 u = ((int)getfval(x)) >> ((int)getfval(y)); 1711 tempfree(y); 1712 nextarg = nextarg->nnext; 1713 break; 1714 case FSYSTEM: 1715 fflush(stdout); /* in case something is buffered already */ 1716 status = system(getsval(x)); 1717 u = status; 1718 if (status != -1) { 1719 if (WIFEXITED(status)) { 1720 u = WEXITSTATUS(status); 1721 } else if (WIFSIGNALED(status)) { 1722 u = WTERMSIG(status) + 256; 1723 #ifdef WCOREDUMP 1724 if (WCOREDUMP(status)) 1725 u += 256; 1726 #endif 1727 } else /* something else?!? */ 1728 u = 0; 1729 } 1730 break; 1731 case FRAND: 1732 /* random() returns numbers in [0..2^31-1] 1733 * in order to get a number in [0, 1), divide it by 2^31 1734 */ 1735 u = (Awkfloat) random() / (0x7fffffffL + 0x1UL); 1736 break; 1737 case FSRAND: 1738 if (isrec(x)) /* no argument provided */ 1739 u = time((time_t *)0); 1740 else 1741 u = getfval(x); 1742 tmp = u; 1743 srandom((unsigned long) u); 1744 u = srand_seed; 1745 srand_seed = tmp; 1746 break; 1747 case FTOUPPER: 1748 case FTOLOWER: 1749 if (t == FTOUPPER) 1750 buf = nawk_toupper(getsval(x)); 1751 else 1752 buf = nawk_tolower(getsval(x)); 1753 tempfree(x); 1754 x = gettemp(); 1755 setsval(x, buf); 1756 free(buf); 1757 return x; 1758 case FFLUSH: 1759 if (isrec(x) || strlen(getsval(x)) == 0) { 1760 flush_all(); /* fflush() or fflush("") -> all */ 1761 u = 0; 1762 } else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL) 1763 u = EOF; 1764 else 1765 u = fflush(fp); 1766 break; 1767 case FSYSTIME: 1768 u = time((time_t *) 0); 1769 break; 1770 case FSTRFTIME: 1771 /* strftime([format [,timestamp]]) */ 1772 if (nextarg) { 1773 y = execute(nextarg); 1774 nextarg = nextarg->nnext; 1775 tv = (time_t) getfval(y); 1776 tempfree(y); 1777 } else 1778 tv = time((time_t *) 0); 1779 tm = localtime(&tv); 1780 if (tm == NULL) 1781 FATAL("bad time %ld", (long)tv); 1782 1783 if (isrec(x)) { 1784 /* format argument not provided, use default */ 1785 fmt = tostring("%a %b %d %H:%M:%S %Z %Y"); 1786 } else 1787 fmt = tostring(getsval(x)); 1788 1789 sz = 32; 1790 buf = NULL; 1791 do { 1792 if ((buf = realloc(buf, (sz *= 2))) == NULL) 1793 FATAL("out of memory in strftime"); 1794 } while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0'); 1795 1796 y = gettemp(); 1797 setsval(y, buf); 1798 free(fmt); 1799 free(buf); 1800 1801 return y; 1802 default: /* can't happen */ 1803 FATAL("illegal function type %d", t); 1804 break; 1805 } 1806 tempfree(x); 1807 x = gettemp(); 1808 setfval(x, u); 1809 if (nextarg != NULL) { 1810 WARNING("warning: function has too many arguments"); 1811 for ( ; nextarg; nextarg = nextarg->nnext) 1812 execute(nextarg); 1813 } 1814 return(x); 1815 } 1816 1817 Cell *printstat(Node **a, int n) /* print a[0] */ 1818 { 1819 Node *x; 1820 Cell *y; 1821 FILE *fp; 1822 1823 if (a[1] == NULL) /* a[1] is redirection operator, a[2] is file */ 1824 fp = stdout; 1825 else 1826 fp = redirect(ptoi(a[1]), a[2]); 1827 for (x = a[0]; x != NULL; x = x->nnext) { 1828 y = execute(x); 1829 fputs(getpssval(y), fp); 1830 tempfree(y); 1831 if (x->nnext == NULL) 1832 fputs(getsval(orsloc), fp); 1833 else 1834 fputs(getsval(ofsloc), fp); 1835 } 1836 if (a[1] != NULL) 1837 fflush(fp); 1838 if (ferror(fp)) 1839 FATAL("write error on %s", filename(fp)); 1840 return(True); 1841 } 1842 1843 Cell *nullproc(Node **a, int n) 1844 { 1845 return 0; 1846 } 1847 1848 1849 FILE *redirect(int a, Node *b) /* set up all i/o redirections */ 1850 { 1851 FILE *fp; 1852 Cell *x; 1853 char *fname; 1854 1855 x = execute(b); 1856 fname = getsval(x); 1857 fp = openfile(a, fname, NULL); 1858 if (fp == NULL) 1859 FATAL("can't open file %s", fname); 1860 tempfree(x); 1861 return fp; 1862 } 1863 1864 struct files { 1865 FILE *fp; 1866 const char *fname; 1867 int mode; /* '|', 'a', 'w' => LE/LT, GT */ 1868 } *files; 1869 1870 size_t nfiles; 1871 1872 static void stdinit(void) /* in case stdin, etc., are not constants */ 1873 { 1874 nfiles = FOPEN_MAX; 1875 files = (struct files *) calloc(nfiles, sizeof(*files)); 1876 if (files == NULL) 1877 FATAL("can't allocate file memory for %zu files", nfiles); 1878 files[0].fp = stdin; 1879 files[0].fname = "/dev/stdin"; 1880 files[0].mode = LT; 1881 files[1].fp = stdout; 1882 files[1].fname = "/dev/stdout"; 1883 files[1].mode = GT; 1884 files[2].fp = stderr; 1885 files[2].fname = "/dev/stderr"; 1886 files[2].mode = GT; 1887 } 1888 1889 FILE *openfile(int a, const char *us, bool *pnewflag) 1890 { 1891 const char *s = us; 1892 size_t i; 1893 int m; 1894 FILE *fp = NULL; 1895 1896 if (*s == '\0') 1897 FATAL("null file name in print or getline"); 1898 for (i = 0; i < nfiles; i++) 1899 if (files[i].fname && strcmp(s, files[i].fname) == 0 && 1900 (a == files[i].mode || (a==APPEND && files[i].mode==GT) || 1901 a == FFLUSH)) { 1902 if (pnewflag) 1903 *pnewflag = false; 1904 return files[i].fp; 1905 } 1906 if (a == FFLUSH) /* didn't find it, so don't create it! */ 1907 return NULL; 1908 1909 for (i = 0; i < nfiles; i++) 1910 if (files[i].fp == NULL) 1911 break; 1912 if (i >= nfiles) { 1913 struct files *nf; 1914 size_t nnf = nfiles + FOPEN_MAX; 1915 nf = (struct files *) realloc(files, nnf * sizeof(*nf)); 1916 if (nf == NULL) 1917 FATAL("cannot grow files for %s and %zu files", s, nnf); 1918 memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf)); 1919 nfiles = nnf; 1920 files = nf; 1921 } 1922 fflush(stdout); /* force a semblance of order */ 1923 m = a; 1924 if (a == GT) { 1925 fp = fopen(s, "w"); 1926 } else if (a == APPEND) { 1927 fp = fopen(s, "a"); 1928 m = GT; /* so can mix > and >> */ 1929 } else if (a == '|') { /* output pipe */ 1930 fp = popen(s, "w"); 1931 } else if (a == LE) { /* input pipe */ 1932 fp = popen(s, "r"); 1933 } else if (a == LT) { /* getline <file */ 1934 fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r"); /* "-" is stdin */ 1935 } else /* can't happen */ 1936 FATAL("illegal redirection %d", a); 1937 if (fp != NULL) { 1938 files[i].fname = tostring(s); 1939 files[i].fp = fp; 1940 files[i].mode = m; 1941 if (pnewflag) 1942 *pnewflag = true; 1943 if (fp != stdin && fp != stdout && fp != stderr) 1944 (void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC); 1945 } 1946 return fp; 1947 } 1948 1949 const char *filename(FILE *fp) 1950 { 1951 size_t i; 1952 1953 for (i = 0; i < nfiles; i++) 1954 if (fp == files[i].fp) 1955 return files[i].fname; 1956 return "???"; 1957 } 1958 1959 Cell *closefile(Node **a, int n) 1960 { 1961 Cell *x; 1962 size_t i; 1963 bool stat; 1964 1965 x = execute(a[0]); 1966 getsval(x); 1967 stat = true; 1968 for (i = 0; i < nfiles; i++) { 1969 if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0) 1970 continue; 1971 if (ferror(files[i].fp)) 1972 FATAL("i/o error occurred on %s", files[i].fname); 1973 if (files[i].fp == stdin || files[i].fp == stdout || 1974 files[i].fp == stderr) 1975 stat = freopen("/dev/null", "r+", files[i].fp) == NULL; 1976 else if (files[i].mode == '|' || files[i].mode == LE) 1977 stat = pclose(files[i].fp) == -1; 1978 else 1979 stat = fclose(files[i].fp) == EOF; 1980 if (stat) 1981 FATAL("i/o error occurred closing %s", files[i].fname); 1982 if (i > 2) /* don't do /dev/std... */ 1983 xfree(files[i].fname); 1984 files[i].fname = NULL; /* watch out for ref thru this */ 1985 files[i].fp = NULL; 1986 break; 1987 } 1988 tempfree(x); 1989 x = gettemp(); 1990 setfval(x, (Awkfloat) (stat ? -1 : 0)); 1991 return(x); 1992 } 1993 1994 void closeall(void) 1995 { 1996 size_t i; 1997 bool stat = false; 1998 1999 for (i = 0; i < nfiles; i++) { 2000 if (! files[i].fp) 2001 continue; 2002 if (ferror(files[i].fp)) 2003 FATAL( "i/o error occurred on %s", files[i].fname ); 2004 if (files[i].fp == stdin) 2005 continue; 2006 if (files[i].mode == '|' || files[i].mode == LE) 2007 stat = pclose(files[i].fp) == -1; 2008 else if (files[i].fp == stdout || files[i].fp == stderr) 2009 stat = fflush(files[i].fp) == EOF; 2010 else 2011 stat = fclose(files[i].fp) == EOF; 2012 if (stat) 2013 FATAL( "i/o error occurred while closing %s", files[i].fname ); 2014 } 2015 } 2016 2017 static void flush_all(void) 2018 { 2019 size_t i; 2020 2021 for (i = 0; i < nfiles; i++) 2022 if (files[i].fp) 2023 fflush(files[i].fp); 2024 } 2025 2026 void backsub(char **pb_ptr, const char **sptr_ptr); 2027 2028 Cell *sub(Node **a, int nnn) /* substitute command */ 2029 { 2030 const char *sptr, *q; 2031 Cell *x, *y, *result; 2032 char *t, *buf, *pb; 2033 fa *pfa; 2034 int bufsz = recsize; 2035 2036 if ((buf = (char *) malloc(bufsz)) == NULL) 2037 FATAL("out of memory in sub"); 2038 x = execute(a[3]); /* target string */ 2039 t = getsval(x); 2040 if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */ 2041 pfa = (fa *) a[1]; /* regular expression */ 2042 else { 2043 y = execute(a[1]); 2044 pfa = makedfa(getsval(y), 1); 2045 tempfree(y); 2046 } 2047 y = execute(a[2]); /* replacement string */ 2048 result = False; 2049 if (pmatch(pfa, t)) { 2050 sptr = t; 2051 adjbuf(&buf, &bufsz, 1+patbeg-sptr, recsize, 0, "sub"); 2052 pb = buf; 2053 while (sptr < patbeg) 2054 *pb++ = *sptr++; 2055 sptr = getsval(y); 2056 while (*sptr != '\0') { 2057 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "sub"); 2058 if (*sptr == '\\') { 2059 backsub(&pb, &sptr); 2060 } else if (*sptr == '&') { 2061 sptr++; 2062 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "sub"); 2063 for (q = patbeg; q < patbeg+patlen; ) 2064 *pb++ = *q++; 2065 } else 2066 *pb++ = *sptr++; 2067 } 2068 *pb = '\0'; 2069 if (pb > buf + bufsz) 2070 FATAL("sub result1 %.30s too big; can't happen", buf); 2071 sptr = patbeg + patlen; 2072 if ((patlen == 0 && *patbeg) || (patlen && *(sptr-1))) { 2073 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "sub"); 2074 while ((*pb++ = *sptr++) != '\0') 2075 continue; 2076 } 2077 if (pb > buf + bufsz) 2078 FATAL("sub result2 %.30s too big; can't happen", buf); 2079 setsval(x, buf); /* BUG: should be able to avoid copy */ 2080 result = True; 2081 } 2082 tempfree(x); 2083 tempfree(y); 2084 free(buf); 2085 return result; 2086 } 2087 2088 Cell *gsub(Node **a, int nnn) /* global substitute */ 2089 { 2090 Cell *x, *y; 2091 char *rptr, *pb; 2092 const char *q, *t, *sptr; 2093 char *buf; 2094 fa *pfa; 2095 int mflag, tempstat, num; 2096 int bufsz = recsize; 2097 2098 if ((buf = (char *) malloc(bufsz)) == NULL) 2099 FATAL("out of memory in gsub"); 2100 mflag = 0; /* if mflag == 0, can replace empty string */ 2101 num = 0; 2102 x = execute(a[3]); /* target string */ 2103 t = getsval(x); 2104 if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */ 2105 pfa = (fa *) a[1]; /* regular expression */ 2106 else { 2107 y = execute(a[1]); 2108 pfa = makedfa(getsval(y), 1); 2109 tempfree(y); 2110 } 2111 y = execute(a[2]); /* replacement string */ 2112 if (pmatch(pfa, t)) { 2113 tempstat = pfa->initstat; 2114 pfa->initstat = 2; 2115 pb = buf; 2116 rptr = getsval(y); 2117 do { 2118 if (patlen == 0 && *patbeg != '\0') { /* matched empty string */ 2119 if (mflag == 0) { /* can replace empty */ 2120 num++; 2121 sptr = rptr; 2122 while (*sptr != '\0') { 2123 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub"); 2124 if (*sptr == '\\') { 2125 backsub(&pb, &sptr); 2126 } else if (*sptr == '&') { 2127 sptr++; 2128 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub"); 2129 for (q = patbeg; q < patbeg+patlen; ) 2130 *pb++ = *q++; 2131 } else 2132 *pb++ = *sptr++; 2133 } 2134 } 2135 if (*t == '\0') /* at end */ 2136 goto done; 2137 adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gsub"); 2138 *pb++ = *t++; 2139 if (pb > buf + bufsz) /* BUG: not sure of this test */ 2140 FATAL("gsub result0 %.30s too big; can't happen", buf); 2141 mflag = 0; 2142 } 2143 else { /* matched nonempty string */ 2144 num++; 2145 sptr = t; 2146 adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gsub"); 2147 while (sptr < patbeg) 2148 *pb++ = *sptr++; 2149 sptr = rptr; 2150 while (*sptr != '\0') { 2151 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub"); 2152 if (*sptr == '\\') { 2153 backsub(&pb, &sptr); 2154 } else if (*sptr == '&') { 2155 sptr++; 2156 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub"); 2157 for (q = patbeg; q < patbeg+patlen; ) 2158 *pb++ = *q++; 2159 } else 2160 *pb++ = *sptr++; 2161 } 2162 t = patbeg + patlen; 2163 if (patlen == 0 || *t == '\0' || *(t-1) == '\0') 2164 goto done; 2165 if (pb > buf + bufsz) 2166 FATAL("gsub result1 %.30s too big; can't happen", buf); 2167 mflag = 1; 2168 } 2169 } while (pmatch(pfa,t)); 2170 sptr = t; 2171 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gsub"); 2172 while ((*pb++ = *sptr++) != '\0') 2173 continue; 2174 done: if (pb < buf + bufsz) 2175 *pb = '\0'; 2176 else if (*(pb-1) != '\0') 2177 FATAL("gsub result2 %.30s truncated; can't happen", buf); 2178 setsval(x, buf); /* BUG: should be able to avoid copy + free */ 2179 pfa->initstat = tempstat; 2180 } 2181 tempfree(x); 2182 tempfree(y); 2183 x = gettemp(); 2184 x->tval = NUM; 2185 x->fval = num; 2186 free(buf); 2187 return(x); 2188 } 2189 2190 Cell *gensub(Node **a, int nnn) /* global selective substitute */ 2191 /* XXX incomplete - doesn't support backreferences \0 ... \9 */ 2192 { 2193 Cell *x, *y, *res, *h; 2194 char *rptr; 2195 const char *sptr; 2196 char *buf, *pb; 2197 const char *t, *q; 2198 fa *pfa; 2199 int mflag, tempstat, num, whichm; 2200 int bufsz = recsize; 2201 2202 if ((buf = malloc(bufsz)) == NULL) 2203 FATAL("out of memory in gensub"); 2204 mflag = 0; /* if mflag == 0, can replace empty string */ 2205 num = 0; 2206 x = execute(a[4]); /* source string */ 2207 t = getsval(x); 2208 res = copycell(x); /* target string - initially copy of source */ 2209 res->csub = CTEMP; /* result values are temporary */ 2210 if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */ 2211 pfa = (fa *) a[1]; /* regular expression */ 2212 else { 2213 y = execute(a[1]); 2214 pfa = makedfa(getsval(y), 1); 2215 tempfree(y); 2216 } 2217 y = execute(a[2]); /* replacement string */ 2218 h = execute(a[3]); /* which matches should be replaced */ 2219 sptr = getsval(h); 2220 if (sptr[0] == 'g' || sptr[0] == 'G') 2221 whichm = -1; 2222 else { 2223 /* 2224 * The specified number is index of replacement, starting 2225 * from 1. GNU awk treats index lower than 0 same as 2226 * 1, we do same for compatibility. 2227 */ 2228 whichm = (int) getfval(h) - 1; 2229 if (whichm < 0) 2230 whichm = 0; 2231 } 2232 tempfree(h); 2233 2234 if (pmatch(pfa, t)) { 2235 char *sl; 2236 2237 tempstat = pfa->initstat; 2238 pfa->initstat = 2; 2239 pb = buf; 2240 rptr = getsval(y); 2241 /* 2242 * XXX if there are any backreferences in subst string, 2243 * complain now. 2244 */ 2245 for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) { 2246 if (strchr("0123456789", sl[1])) { 2247 FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr); 2248 } 2249 } 2250 2251 do { 2252 if (whichm >= 0 && whichm != num) { 2253 num++; 2254 adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub"); 2255 2256 /* copy the part of string up to and including 2257 * match to output buffer */ 2258 while (t < patbeg + patlen) 2259 *pb++ = *t++; 2260 continue; 2261 } 2262 2263 if (patlen == 0 && *patbeg != 0) { /* matched empty string */ 2264 if (mflag == 0) { /* can replace empty */ 2265 num++; 2266 sptr = rptr; 2267 while (*sptr != 0) { 2268 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); 2269 if (*sptr == '\\') { 2270 backsub(&pb, &sptr); 2271 } else if (*sptr == '&') { 2272 sptr++; 2273 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); 2274 for (q = patbeg; q < patbeg+patlen; ) 2275 *pb++ = *q++; 2276 } else 2277 *pb++ = *sptr++; 2278 } 2279 } 2280 if (*t == 0) /* at end */ 2281 goto done; 2282 adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub"); 2283 *pb++ = *t++; 2284 if (pb > buf + bufsz) /* BUG: not sure of this test */ 2285 FATAL("gensub result0 %.30s too big; can't happen", buf); 2286 mflag = 0; 2287 } 2288 else { /* matched nonempty string */ 2289 num++; 2290 sptr = t; 2291 adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub"); 2292 while (sptr < patbeg) 2293 *pb++ = *sptr++; 2294 sptr = rptr; 2295 while (*sptr != 0) { 2296 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); 2297 if (*sptr == '\\') { 2298 backsub(&pb, &sptr); 2299 } else if (*sptr == '&') { 2300 sptr++; 2301 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); 2302 for (q = patbeg; q < patbeg+patlen; ) 2303 *pb++ = *q++; 2304 } else 2305 *pb++ = *sptr++; 2306 } 2307 t = patbeg + patlen; 2308 if (patlen == 0 || *t == 0 || *(t-1) == 0) 2309 goto done; 2310 if (pb > buf + bufsz) 2311 FATAL("gensub result1 %.30s too big; can't happen", buf); 2312 mflag = 1; 2313 } 2314 } while (pmatch(pfa,t)); 2315 sptr = t; 2316 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub"); 2317 while ((*pb++ = *sptr++) != 0) 2318 ; 2319 done: if (pb > buf + bufsz) 2320 FATAL("gensub result2 %.30s too big; can't happen", buf); 2321 *pb = '\0'; 2322 setsval(res, buf); 2323 pfa->initstat = tempstat; 2324 } 2325 tempfree(x); 2326 tempfree(y); 2327 free(buf); 2328 return(res); 2329 } 2330 2331 void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */ 2332 { /* sptr[0] == '\\' */ 2333 char *pb = *pb_ptr; 2334 const char *sptr = *sptr_ptr; 2335 static bool first = true; 2336 static bool do_posix = false; 2337 2338 if (first) { 2339 first = false; 2340 do_posix = (getenv("POSIXLY_CORRECT") != NULL); 2341 } 2342 2343 if (sptr[1] == '\\') { 2344 if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */ 2345 *pb++ = '\\'; 2346 *pb++ = '&'; 2347 sptr += 4; 2348 } else if (sptr[2] == '&') { /* \\& -> \ + matched */ 2349 *pb++ = '\\'; 2350 sptr += 2; 2351 } else if (do_posix) { /* \\x -> \x */ 2352 sptr++; 2353 *pb++ = *sptr++; 2354 } else { /* \\x -> \\x */ 2355 *pb++ = *sptr++; 2356 *pb++ = *sptr++; 2357 } 2358 } else if (sptr[1] == '&') { /* literal & */ 2359 sptr++; 2360 *pb++ = *sptr++; 2361 } else /* literal \ */ 2362 *pb++ = *sptr++; 2363 2364 *pb_ptr = pb; 2365 *sptr_ptr = sptr; 2366 } 2367