1 /**************************************************************** 2 Copyright (C) Lucent Technologies 1997 3 All Rights Reserved 4 5 Permission to use, copy, modify, and distribute this software and 6 its documentation for any purpose and without fee is hereby 7 granted, provided that the above copyright notice appear in all 8 copies and that both that the copyright notice and this 9 permission notice and warranty disclaimer appear in supporting 10 documentation, and that the name Lucent Technologies or any of 11 its entities not be used in advertising or publicity pertaining 12 to distribution of the software without specific, written prior 13 permission. 14 15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 22 THIS SOFTWARE. 23 ****************************************************************/ 24 25 #define DEBUG 26 #include <stdio.h> 27 #include <ctype.h> 28 #include <errno.h> 29 #include <wctype.h> 30 #include <fcntl.h> 31 #include <setjmp.h> 32 #include <limits.h> 33 #include <math.h> 34 #include <string.h> 35 #include <stdlib.h> 36 #include <time.h> 37 #include <sys/types.h> 38 #include <sys/wait.h> 39 #include "awk.h" 40 #include "awkgram.tab.h" 41 42 43 static void stdinit(void); 44 static void flush_all(void); 45 static char *wide_char_to_byte_str(int rune, size_t *outlen); 46 47 #if 1 48 #define tempfree(x) do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0) 49 #else 50 void tempfree(Cell *p) { 51 if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) { 52 WARNING("bad csub %d in Cell %d %s", 53 p->csub, p->ctype, p->sval); 54 } 55 if (istemp(p)) 56 tfree(p); 57 } 58 #endif 59 60 /* do we really need these? */ 61 /* #ifdef _NFILE */ 62 /* #ifndef FOPEN_MAX */ 63 /* #define FOPEN_MAX _NFILE */ 64 /* #endif */ 65 /* #endif */ 66 /* */ 67 /* #ifndef FOPEN_MAX */ 68 /* #define FOPEN_MAX 40 */ /* max number of open files */ 69 /* #endif */ 70 /* */ 71 /* #ifndef RAND_MAX */ 72 /* #define RAND_MAX 32767 */ /* all that ansi guarantees */ 73 /* #endif */ 74 75 jmp_buf env; 76 extern int pairstack[]; 77 extern Awkfloat srand_seed; 78 79 Node *winner = NULL; /* root of parse tree */ 80 Cell *tmps; /* free temporary cells for execution */ 81 82 static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL, NULL }; 83 Cell *True = &truecell; 84 static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL, NULL }; 85 Cell *False = &falsecell; 86 static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL, NULL }; 87 Cell *jbreak = &breakcell; 88 static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL, NULL }; 89 Cell *jcont = &contcell; 90 static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL, NULL }; 91 Cell *jnext = &nextcell; 92 static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL, NULL }; 93 Cell *jnextfile = &nextfilecell; 94 static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL, NULL }; 95 Cell *jexit = &exitcell; 96 static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL, NULL }; 97 Cell *jret = &retcell; 98 static Cell tempcell ={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL }; 99 100 Node *curnode = NULL; /* the node being executed, for debugging */ 101 102 /* buffer memory management */ 103 int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr, 104 const char *whatrtn) 105 /* pbuf: address of pointer to buffer being managed 106 * psiz: address of buffer size variable 107 * minlen: minimum length of buffer needed 108 * quantum: buffer size quantum 109 * pbptr: address of movable pointer into buffer, or 0 if none 110 * whatrtn: name of the calling routine if failure should cause fatal error 111 * 112 * return 0 for realloc failure, !=0 for success 113 */ 114 { 115 if (minlen > *psiz) { 116 char *tbuf; 117 int rminlen = quantum ? minlen % quantum : 0; 118 int boff = pbptr ? *pbptr - *pbuf : 0; 119 /* round up to next multiple of quantum */ 120 if (rminlen) 121 minlen += quantum - rminlen; 122 tbuf = (char *) realloc(*pbuf, minlen); 123 DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void*)*pbuf, (void*)tbuf); 124 if (tbuf == NULL) { 125 if (whatrtn) 126 FATAL("out of memory in %s", whatrtn); 127 return 0; 128 } 129 *pbuf = tbuf; 130 *psiz = minlen; 131 if (pbptr) 132 *pbptr = tbuf + boff; 133 } 134 return 1; 135 } 136 137 void run(Node *a) /* execution of parse tree starts here */ 138 { 139 140 stdinit(); 141 execute(a); 142 closeall(); 143 } 144 145 Cell *execute(Node *u) /* execute a node of the parse tree */ 146 { 147 Cell *(*proc)(Node **, int); 148 Cell *x; 149 Node *a; 150 151 if (u == NULL) 152 return(True); 153 for (a = u; ; a = a->nnext) { 154 curnode = a; 155 if (isvalue(a)) { 156 x = (Cell *) (a->narg[0]); 157 if (isfld(x) && !donefld) 158 fldbld(); 159 else if (isrec(x) && !donerec) 160 recbld(); 161 return(x); 162 } 163 if (notlegal(a->nobj)) /* probably a Cell* but too risky to print */ 164 FATAL("illegal statement"); 165 proc = proctab[a->nobj-FIRSTTOKEN]; 166 x = (*proc)(a->narg, a->nobj); 167 if (isfld(x) && !donefld) 168 fldbld(); 169 else if (isrec(x) && !donerec) 170 recbld(); 171 if (isexpr(a)) 172 return(x); 173 if (isjump(x)) 174 return(x); 175 if (a->nnext == NULL) 176 return(x); 177 tempfree(x); 178 } 179 } 180 181 182 Cell *program(Node **a, int n) /* execute an awk program */ 183 { /* a[0] = BEGIN, a[1] = body, a[2] = END */ 184 Cell *x; 185 186 if (setjmp(env) != 0) 187 goto ex; 188 if (a[0]) { /* BEGIN */ 189 x = execute(a[0]); 190 if (isexit(x)) 191 return(True); 192 if (isjump(x)) 193 FATAL("illegal break, continue, next or nextfile from BEGIN"); 194 tempfree(x); 195 } 196 if (a[1] || a[2]) 197 while (getrec(&record, &recsize, true) > 0) { 198 x = execute(a[1]); 199 if (isexit(x)) 200 break; 201 tempfree(x); 202 } 203 ex: 204 if (setjmp(env) != 0) /* handles exit within END */ 205 goto ex1; 206 if (a[2]) { /* END */ 207 x = execute(a[2]); 208 if (isbreak(x) || isnext(x) || iscont(x)) 209 FATAL("illegal break, continue, next or nextfile from END"); 210 tempfree(x); 211 } 212 ex1: 213 return(True); 214 } 215 216 struct Frame { /* stack frame for awk function calls */ 217 int nargs; /* number of arguments in this call */ 218 Cell *fcncell; /* pointer to Cell for function */ 219 Cell **args; /* pointer to array of arguments after execute */ 220 Cell *retval; /* return value */ 221 }; 222 223 #define NARGS 50 /* max args in a call */ 224 225 struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */ 226 int nframe = 0; /* number of frames allocated */ 227 struct Frame *frp = NULL; /* frame pointer. bottom level unused */ 228 229 Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ 230 { 231 static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL }; 232 int i, ncall, ndef; 233 int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */ 234 Node *x; 235 Cell *args[NARGS], *oargs[NARGS]; /* BUG: fixed size arrays */ 236 Cell *y, *z, *fcn; 237 char *s; 238 239 fcn = execute(a[0]); /* the function itself */ 240 s = fcn->nval; 241 if (!isfcn(fcn)) 242 FATAL("calling undefined function %s", s); 243 if (frame == NULL) { 244 frp = frame = (struct Frame *) calloc(nframe += 100, sizeof(*frame)); 245 if (frame == NULL) 246 FATAL("out of space for stack frames calling %s", s); 247 } 248 for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */ 249 ncall++; 250 ndef = (int) fcn->fval; /* args in defn */ 251 DPRINTF("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame)); 252 if (ncall > ndef) 253 WARNING("function %s called with %d args, uses only %d", 254 s, ncall, ndef); 255 if (ncall + ndef > NARGS) 256 FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS); 257 for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */ 258 DPRINTF("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame)); 259 y = execute(x); 260 oargs[i] = y; 261 DPRINTF("args[%d]: %s %f <%s>, t=%o\n", 262 i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval); 263 if (isfcn(y)) 264 FATAL("can't use function %s as argument in %s", y->nval, s); 265 if (isarr(y)) 266 args[i] = y; /* arrays by ref */ 267 else 268 args[i] = copycell(y); 269 tempfree(y); 270 } 271 for ( ; i < ndef; i++) { /* add null args for ones not provided */ 272 args[i] = gettemp(); 273 *args[i] = newcopycell; 274 } 275 frp++; /* now ok to up frame */ 276 if (frp >= frame + nframe) { 277 int dfp = frp - frame; /* old index */ 278 frame = (struct Frame *) realloc(frame, (nframe += 100) * sizeof(*frame)); 279 if (frame == NULL) 280 FATAL("out of space for stack frames in %s", s); 281 frp = frame + dfp; 282 } 283 frp->fcncell = fcn; 284 frp->args = args; 285 frp->nargs = ndef; /* number defined with (excess are locals) */ 286 frp->retval = gettemp(); 287 288 DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame)); 289 y = execute((Node *)(fcn->sval)); /* execute body */ 290 DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame)); 291 292 for (i = 0; i < ndef; i++) { 293 Cell *t = frp->args[i]; 294 if (isarr(t)) { 295 if (t->csub == CCOPY) { 296 if (i >= ncall) { 297 freesymtab(t); 298 t->csub = CTEMP; 299 tempfree(t); 300 } else { 301 oargs[i]->tval = t->tval; 302 oargs[i]->tval &= ~(STR|NUM|DONTFREE); 303 oargs[i]->sval = t->sval; 304 tempfree(t); 305 } 306 } 307 } else if (t != y) { /* kludge to prevent freeing twice */ 308 t->csub = CTEMP; 309 tempfree(t); 310 } else if (t == y && t->csub == CCOPY) { 311 t->csub = CTEMP; 312 tempfree(t); 313 freed = 1; 314 } 315 } 316 tempfree(fcn); 317 if (isexit(y) || isnext(y)) 318 return y; 319 if (freed == 0) { 320 tempfree(y); /* don't free twice! */ 321 } 322 z = frp->retval; /* return value */ 323 DPRINTF("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval); 324 frp--; 325 return(z); 326 } 327 328 Cell *copycell(Cell *x) /* make a copy of a cell in a temp */ 329 { 330 Cell *y; 331 332 /* copy is not constant or field */ 333 334 y = gettemp(); 335 y->tval = x->tval & ~(CON|FLD|REC); 336 y->csub = CCOPY; /* prevents freeing until call is over */ 337 y->nval = x->nval; /* BUG? */ 338 if (isstr(x) /* || x->ctype == OCELL */) { 339 y->sval = tostring(x->sval); 340 y->tval &= ~DONTFREE; 341 } else 342 y->tval |= DONTFREE; 343 y->fval = x->fval; 344 return y; 345 } 346 347 Cell *arg(Node **a, int n) /* nth argument of a function */ 348 { 349 350 n = ptoi(a[0]); /* argument number, counting from 0 */ 351 DPRINTF("arg(%d), frp->nargs=%d\n", n, frp->nargs); 352 if (n+1 > frp->nargs) 353 FATAL("argument #%d of function %s was not supplied", 354 n+1, frp->fcncell->nval); 355 return frp->args[n]; 356 } 357 358 Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */ 359 { 360 Cell *y; 361 362 switch (n) { 363 case EXIT: 364 if (a[0] != NULL) { 365 y = execute(a[0]); 366 errorflag = (int) getfval(y); 367 tempfree(y); 368 } 369 longjmp(env, 1); 370 case RETURN: 371 if (a[0] != NULL) { 372 y = execute(a[0]); 373 if ((y->tval & (STR|NUM)) == (STR|NUM)) { 374 setsval(frp->retval, getsval(y)); 375 frp->retval->fval = getfval(y); 376 frp->retval->tval |= NUM; 377 } 378 else if (y->tval & STR) 379 setsval(frp->retval, getsval(y)); 380 else if (y->tval & NUM) 381 setfval(frp->retval, getfval(y)); 382 else /* can't happen */ 383 FATAL("bad type variable %d", y->tval); 384 tempfree(y); 385 } 386 return(jret); 387 case NEXT: 388 return(jnext); 389 case NEXTFILE: 390 nextfile(); 391 return(jnextfile); 392 case BREAK: 393 return(jbreak); 394 case CONTINUE: 395 return(jcont); 396 default: /* can't happen */ 397 FATAL("illegal jump type %d", n); 398 } 399 return 0; /* not reached */ 400 } 401 402 Cell *awkgetline(Node **a, int n) /* get next line from specific input */ 403 { /* a[0] is variable, a[1] is operator, a[2] is filename */ 404 Cell *r, *x; 405 extern Cell **fldtab; 406 FILE *fp; 407 char *buf; 408 int bufsize = recsize; 409 int mode; 410 bool newflag; 411 double result; 412 413 if ((buf = (char *) malloc(bufsize)) == NULL) 414 FATAL("out of memory in getline"); 415 416 fflush(stdout); /* in case someone is waiting for a prompt */ 417 r = gettemp(); 418 if (a[1] != NULL) { /* getline < file */ 419 x = execute(a[2]); /* filename */ 420 mode = ptoi(a[1]); 421 if (mode == '|') /* input pipe */ 422 mode = LE; /* arbitrary flag */ 423 fp = openfile(mode, getsval(x), &newflag); 424 tempfree(x); 425 if (fp == NULL) 426 n = -1; 427 else 428 n = readrec(&buf, &bufsize, fp, newflag); 429 if (n <= 0) { 430 ; 431 } else if (a[0] != NULL) { /* getline var <file */ 432 x = execute(a[0]); 433 setsval(x, buf); 434 if (is_number(x->sval, & result)) { 435 x->fval = result; 436 x->tval |= NUM; 437 } 438 tempfree(x); 439 } else { /* getline <file */ 440 setsval(fldtab[0], buf); 441 if (is_number(fldtab[0]->sval, & result)) { 442 fldtab[0]->fval = result; 443 fldtab[0]->tval |= NUM; 444 } 445 } 446 } else { /* bare getline; use current input */ 447 if (a[0] == NULL) /* getline */ 448 n = getrec(&record, &recsize, true); 449 else { /* getline var */ 450 n = getrec(&buf, &bufsize, false); 451 if (n > 0) { 452 x = execute(a[0]); 453 setsval(x, buf); 454 if (is_number(x->sval, & result)) { 455 x->fval = result; 456 x->tval |= NUM; 457 } 458 tempfree(x); 459 } 460 } 461 } 462 setfval(r, (Awkfloat) n); 463 free(buf); 464 return r; 465 } 466 467 Cell *getnf(Node **a, int n) /* get NF */ 468 { 469 if (!donefld) 470 fldbld(); 471 return (Cell *) a[0]; 472 } 473 474 static char * 475 makearraystring(Node *p, const char *func) 476 { 477 char *buf; 478 int bufsz = recsize; 479 size_t blen; 480 481 if ((buf = (char *) malloc(bufsz)) == NULL) { 482 FATAL("%s: out of memory", func); 483 } 484 485 blen = 0; 486 buf[blen] = '\0'; 487 488 for (; p; p = p->nnext) { 489 Cell *x = execute(p); /* expr */ 490 char *s = getsval(x); 491 size_t seplen = strlen(getsval(subseploc)); 492 size_t nsub = p->nnext ? seplen : 0; 493 size_t slen = strlen(s); 494 size_t tlen = blen + slen + nsub; 495 496 if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) { 497 FATAL("%s: out of memory %s[%s...]", 498 func, x->nval, buf); 499 } 500 memcpy(buf + blen, s, slen); 501 if (nsub) { 502 memcpy(buf + blen + slen, *SUBSEP, nsub); 503 } 504 buf[tlen] = '\0'; 505 blen = tlen; 506 tempfree(x); 507 } 508 return buf; 509 } 510 511 Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ 512 { 513 Cell *x, *z; 514 char *buf; 515 516 x = execute(a[0]); /* Cell* for symbol table */ 517 buf = makearraystring(a[1], __func__); 518 if (!isarr(x)) { 519 DPRINTF("making %s into an array\n", NN(x->nval)); 520 if (freeable(x)) 521 xfree(x->sval); 522 x->tval &= ~(STR|NUM|DONTFREE); 523 x->tval |= ARR; 524 x->sval = (char *) makesymtab(NSYMTAB); 525 } 526 z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval); 527 z->ctype = OCELL; 528 z->csub = CVAR; 529 tempfree(x); 530 free(buf); 531 return(z); 532 } 533 534 Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ 535 { 536 Cell *x; 537 538 x = execute(a[0]); /* Cell* for symbol table */ 539 if (x == symtabloc) { 540 FATAL("cannot delete SYMTAB or its elements"); 541 } 542 if (!isarr(x)) 543 return True; 544 if (a[1] == NULL) { /* delete the elements, not the table */ 545 freesymtab(x); 546 x->tval &= ~STR; 547 x->tval |= ARR; 548 x->sval = (char *) makesymtab(NSYMTAB); 549 } else { 550 char *buf = makearraystring(a[1], __func__); 551 freeelem(x, buf); 552 free(buf); 553 } 554 tempfree(x); 555 return True; 556 } 557 558 Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */ 559 { 560 Cell *ap, *k; 561 char *buf; 562 563 ap = execute(a[1]); /* array name */ 564 if (!isarr(ap)) { 565 DPRINTF("making %s into an array\n", ap->nval); 566 if (freeable(ap)) 567 xfree(ap->sval); 568 ap->tval &= ~(STR|NUM|DONTFREE); 569 ap->tval |= ARR; 570 ap->sval = (char *) makesymtab(NSYMTAB); 571 } 572 buf = makearraystring(a[0], __func__); 573 k = lookup(buf, (Array *) ap->sval); 574 tempfree(ap); 575 free(buf); 576 if (k == NULL) 577 return(False); 578 else 579 return(True); 580 } 581 582 583 /* ======== utf-8 code ========== */ 584 585 /* 586 * Awk strings can contain ascii, random 8-bit items (eg Latin-1), 587 * or utf-8. u8_isutf tests whether a string starts with a valid 588 * utf-8 sequence, and returns 0 if not (e.g., high bit set). 589 * u8_nextlen returns length of next valid sequence, which is 590 * 1 for ascii, 2..4 for utf-8, or 1 for high bit non-utf. 591 * u8_strlen returns length of string in valid utf-8 sequences 592 * and/or high-bit bytes. Conversion functions go between byte 593 * number and character number. 594 * 595 * In theory, this behaves the same as before for non-utf8 bytes. 596 * 597 * Limited checking! This is a potential security hole. 598 */ 599 600 /* is s the beginning of a valid utf-8 string? */ 601 /* return length 1..4 if yes, 0 if no */ 602 int u8_isutf(const char *s) 603 { 604 int n, ret; 605 unsigned char c; 606 607 c = s[0]; 608 if (c < 128 || awk_mb_cur_max == 1) 609 return 1; /* what if it's 0? */ 610 611 n = strlen(s); 612 if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) { 613 ret = 2; /* 110xxxxx 10xxxxxx */ 614 } else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80 615 && (s[2] & 0xC0) == 0x80) { 616 ret = 3; /* 1110xxxx 10xxxxxx 10xxxxxx */ 617 } else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80 618 && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) { 619 ret = 4; /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ 620 } else { 621 ret = 0; 622 } 623 return ret; 624 } 625 626 /* Convert (prefix of) utf8 string to utf-32 rune. */ 627 /* Sets *rune to the value, returns the length. */ 628 /* No error checking: watch out. */ 629 int u8_rune(int *rune, const char *s) 630 { 631 int n, ret; 632 unsigned char c; 633 634 c = s[0]; 635 if (c < 128 || awk_mb_cur_max == 1) { 636 *rune = c; 637 return 1; 638 } 639 640 n = strlen(s); 641 if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) { 642 *rune = ((c & 0x1F) << 6) | (s[1] & 0x3F); /* 110xxxxx 10xxxxxx */ 643 ret = 2; 644 } else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80 645 && (s[2] & 0xC0) == 0x80) { 646 *rune = ((c & 0xF) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F); 647 /* 1110xxxx 10xxxxxx 10xxxxxx */ 648 ret = 3; 649 } else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80 650 && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) { 651 *rune = ((c & 0x7) << 18) | ((s[1] & 0x3F) << 12) | ((s[2] & 0x3F) << 6) | (s[3] & 0x3F); 652 /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ 653 ret = 4; 654 } else { 655 *rune = c; 656 ret = 1; 657 } 658 return ret; /* returns one byte if sequence doesn't look like utf */ 659 } 660 661 /* return length of next sequence: 1 for ascii or random, 2..4 for valid utf8 */ 662 int u8_nextlen(const char *s) 663 { 664 int len; 665 666 len = u8_isutf(s); 667 if (len == 0) 668 len = 1; 669 return len; 670 } 671 672 /* return number of utf characters or single non-utf bytes */ 673 int u8_strlen(const char *s) 674 { 675 int i, len, n, totlen; 676 unsigned char c; 677 678 n = strlen(s); 679 totlen = 0; 680 for (i = 0; i < n; i += len) { 681 c = s[i]; 682 if (c < 128 || awk_mb_cur_max == 1) { 683 len = 1; 684 } else { 685 len = u8_nextlen(&s[i]); 686 } 687 totlen++; 688 if (i > n) 689 FATAL("bad utf count [%s] n=%d i=%d\n", s, n, i); 690 } 691 return totlen; 692 } 693 694 /* convert utf-8 char number in a string to its byte offset */ 695 int u8_char2byte(const char *s, int charnum) 696 { 697 int n; 698 int bytenum = 0; 699 700 while (charnum > 0) { 701 n = u8_nextlen(s); 702 s += n; 703 bytenum += n; 704 charnum--; 705 } 706 return bytenum; 707 } 708 709 /* convert byte offset in s to utf-8 char number that starts there */ 710 int u8_byte2char(const char *s, int bytenum) 711 { 712 int i, len, b; 713 int charnum = 0; /* BUG: what origin? */ 714 /* should be 0 to match start==0 which means no match */ 715 716 b = strlen(s); 717 if (bytenum > b) { 718 return -1; /* ??? */ 719 } 720 for (i = 0; i <= bytenum; i += len) { 721 len = u8_nextlen(s+i); 722 charnum++; 723 } 724 return charnum; 725 } 726 727 /* runetochar() adapted from rune.c in the Plan 9 distributione */ 728 729 enum 730 { 731 Runeerror = 128, /* from somewhere else */ 732 Runemax = 0x10FFFF, 733 734 Bit1 = 7, 735 Bitx = 6, 736 Bit2 = 5, 737 Bit3 = 4, 738 Bit4 = 3, 739 Bit5 = 2, 740 741 T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */ 742 Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */ 743 T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */ 744 T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */ 745 T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */ 746 T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */ 747 748 Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0000 0000 0111 1111 */ 749 Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0000 0000 0111 1111 1111 */ 750 Rune3 = (1<<(Bit3+2*Bitx))-1, /* 0000 0000 1111 1111 1111 1111 */ 751 Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0011 1111 1111 1111 1111 1111 */ 752 753 Maskx = (1<<Bitx)-1, /* 0011 1111 */ 754 Testx = Maskx ^ 0xFF, /* 1100 0000 */ 755 756 }; 757 758 int runetochar(char *str, int c) 759 { 760 /* one character sequence 00000-0007F => 00-7F */ 761 if (c <= Rune1) { 762 str[0] = c; 763 return 1; 764 } 765 766 /* two character sequence 00080-007FF => T2 Tx */ 767 if (c <= Rune2) { 768 str[0] = T2 | (c >> 1*Bitx); 769 str[1] = Tx | (c & Maskx); 770 return 2; 771 } 772 773 /* three character sequence 00800-0FFFF => T3 Tx Tx */ 774 if (c > Runemax) 775 c = Runeerror; 776 if (c <= Rune3) { 777 str[0] = T3 | (c >> 2*Bitx); 778 str[1] = Tx | ((c >> 1*Bitx) & Maskx); 779 str[2] = Tx | (c & Maskx); 780 return 3; 781 } 782 783 /* four character sequence 010000-1FFFFF => T4 Tx Tx Tx */ 784 str[0] = T4 | (c >> 3*Bitx); 785 str[1] = Tx | ((c >> 2*Bitx) & Maskx); 786 str[2] = Tx | ((c >> 1*Bitx) & Maskx); 787 str[3] = Tx | (c & Maskx); 788 return 4; 789 } 790 791 792 /* ========== end of utf8 code =========== */ 793 794 795 796 Cell *matchop(Node **a, int n) /* ~ and match() */ 797 { 798 Cell *x, *y; 799 char *s, *t; 800 int i; 801 int cstart, cpatlen, len; 802 fa *pfa; 803 int (*mf)(fa *, const char *) = match, mode = 0; 804 805 if (n == MATCHFCN) { 806 mf = pmatch; 807 mode = 1; 808 } 809 x = execute(a[1]); /* a[1] = target text */ 810 s = getsval(x); 811 if (a[0] == NULL) /* a[1] == 0: already-compiled reg expr */ 812 i = (*mf)((fa *) a[2], s); 813 else { 814 y = execute(a[2]); /* a[2] = regular expr */ 815 t = getsval(y); 816 pfa = makedfa(t, mode); 817 i = (*mf)(pfa, s); 818 tempfree(y); 819 } 820 tempfree(x); 821 if (n == MATCHFCN) { 822 int start = patbeg - s + 1; /* origin 1 */ 823 if (patlen < 0) { 824 start = 0; /* not found */ 825 } else { 826 cstart = u8_byte2char(s, start-1); 827 cpatlen = 0; 828 for (i = 0; i < patlen; i += len) { 829 len = u8_nextlen(patbeg+i); 830 cpatlen++; 831 } 832 833 start = cstart; 834 patlen = cpatlen; 835 } 836 837 setfval(rstartloc, (Awkfloat) start); 838 setfval(rlengthloc, (Awkfloat) patlen); 839 x = gettemp(); 840 x->tval = NUM; 841 x->fval = start; 842 return x; 843 } else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0)) 844 return(True); 845 else 846 return(False); 847 } 848 849 850 Cell *boolop(Node **a, int n) /* a[0] || a[1], a[0] && a[1], !a[0] */ 851 { 852 Cell *x, *y; 853 int i; 854 855 x = execute(a[0]); 856 i = istrue(x); 857 tempfree(x); 858 switch (n) { 859 case BOR: 860 if (i) return(True); 861 y = execute(a[1]); 862 i = istrue(y); 863 tempfree(y); 864 if (i) return(True); 865 else return(False); 866 case AND: 867 if ( !i ) return(False); 868 y = execute(a[1]); 869 i = istrue(y); 870 tempfree(y); 871 if (i) return(True); 872 else return(False); 873 case NOT: 874 if (i) return(False); 875 else return(True); 876 default: /* can't happen */ 877 FATAL("unknown boolean operator %d", n); 878 } 879 return 0; /*NOTREACHED*/ 880 } 881 882 Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */ 883 { 884 int i; 885 Cell *x, *y; 886 Awkfloat j; 887 bool x_is_nan, y_is_nan; 888 889 x = execute(a[0]); 890 y = execute(a[1]); 891 x_is_nan = isnan(x->fval); 892 y_is_nan = isnan(y->fval); 893 if (x->tval&NUM && y->tval&NUM) { 894 if ((x_is_nan || y_is_nan) && n != NE) 895 return(False); 896 j = x->fval - y->fval; 897 i = j<0? -1: (j>0? 1: 0); 898 } else { 899 i = strcmp(getsval(x), getsval(y)); 900 } 901 tempfree(x); 902 tempfree(y); 903 switch (n) { 904 case LT: if (i<0) return(True); 905 else return(False); 906 case LE: if (i<=0) return(True); 907 else return(False); 908 case NE: if (x_is_nan && y_is_nan) return(True); 909 else if (i!=0) return(True); 910 else return(False); 911 case EQ: if (i == 0) return(True); 912 else return(False); 913 case GE: if (i>=0) return(True); 914 else return(False); 915 case GT: if (i>0) return(True); 916 else return(False); 917 default: /* can't happen */ 918 FATAL("unknown relational operator %d", n); 919 } 920 return 0; /*NOTREACHED*/ 921 } 922 923 void tfree(Cell *a) /* free a tempcell */ 924 { 925 if (freeable(a)) { 926 DPRINTF("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval); 927 xfree(a->sval); 928 } 929 if (a == tmps) 930 FATAL("tempcell list is curdled"); 931 a->cnext = tmps; 932 tmps = a; 933 } 934 935 Cell *gettemp(void) /* get a tempcell */ 936 { int i; 937 Cell *x; 938 939 if (!tmps) { 940 tmps = (Cell *) calloc(100, sizeof(*tmps)); 941 if (!tmps) 942 FATAL("out of space for temporaries"); 943 for (i = 1; i < 100; i++) 944 tmps[i-1].cnext = &tmps[i]; 945 tmps[i-1].cnext = NULL; 946 } 947 x = tmps; 948 tmps = x->cnext; 949 *x = tempcell; 950 return(x); 951 } 952 953 Cell *indirect(Node **a, int n) /* $( a[0] ) */ 954 { 955 Awkfloat val; 956 Cell *x; 957 int m; 958 char *s; 959 960 x = execute(a[0]); 961 val = getfval(x); /* freebsd: defend against super large field numbers */ 962 if ((Awkfloat)INT_MAX < val) 963 FATAL("trying to access out of range field %s", x->nval); 964 m = (int) val; 965 if (m == 0 && !is_number(s = getsval(x), NULL)) /* suspicion! */ 966 FATAL("illegal field $(%s), name \"%s\"", s, x->nval); 967 /* BUG: can x->nval ever be null??? */ 968 tempfree(x); 969 x = fieldadr(m); 970 x->ctype = OCELL; /* BUG? why are these needed? */ 971 x->csub = CFLD; 972 return(x); 973 } 974 975 Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */ 976 { 977 int k, m, n; 978 int mb, nb; 979 char *s; 980 int temp; 981 Cell *x, *y, *z = NULL; 982 983 x = execute(a[0]); 984 y = execute(a[1]); 985 if (a[2] != NULL) 986 z = execute(a[2]); 987 s = getsval(x); 988 k = u8_strlen(s) + 1; 989 if (k <= 1) { 990 tempfree(x); 991 tempfree(y); 992 if (a[2] != NULL) { 993 tempfree(z); 994 } 995 x = gettemp(); 996 setsval(x, ""); 997 return(x); 998 } 999 m = (int) getfval(y); 1000 if (m <= 0) 1001 m = 1; 1002 else if (m > k) 1003 m = k; 1004 tempfree(y); 1005 if (a[2] != NULL) { 1006 n = (int) getfval(z); 1007 tempfree(z); 1008 } else 1009 n = k - 1; 1010 if (n < 0) 1011 n = 0; 1012 else if (n > k - m) 1013 n = k - m; 1014 /* m is start, n is length from there */ 1015 DPRINTF("substr: m=%d, n=%d, s=%s\n", m, n, s); 1016 y = gettemp(); 1017 mb = u8_char2byte(s, m-1); /* byte offset of start char in s */ 1018 nb = u8_char2byte(s, m-1+n); /* byte offset of end+1 char in s */ 1019 1020 temp = s[nb]; /* with thanks to John Linderman */ 1021 s[nb] = '\0'; 1022 setsval(y, s + mb); 1023 s[nb] = temp; 1024 tempfree(x); 1025 return(y); 1026 } 1027 1028 Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */ 1029 { 1030 Cell *x, *y, *z; 1031 char *s1, *s2, *p1, *p2, *q; 1032 Awkfloat v = 0.0; 1033 1034 x = execute(a[0]); 1035 s1 = getsval(x); 1036 y = execute(a[1]); 1037 s2 = getsval(y); 1038 1039 z = gettemp(); 1040 for (p1 = s1; *p1 != '\0'; p1++) { 1041 for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++) 1042 continue; 1043 if (*p2 == '\0') { 1044 /* v = (Awkfloat) (p1 - s1 + 1); origin 1 */ 1045 1046 /* should be a function: used in match() as well */ 1047 int i, len; 1048 v = 0; 1049 for (i = 0; i < p1-s1+1; i += len) { 1050 len = u8_nextlen(s1+i); 1051 v++; 1052 } 1053 break; 1054 } 1055 } 1056 tempfree(x); 1057 tempfree(y); 1058 setfval(z, v); 1059 return(z); 1060 } 1061 1062 int has_utf8(char *s) /* return 1 if s contains any utf-8 (2 bytes or more) character */ 1063 { 1064 int n; 1065 1066 for (n = 0; *s != 0; s += n) { 1067 n = u8_nextlen(s); 1068 if (n > 1) 1069 return 1; 1070 } 1071 return 0; 1072 } 1073 1074 #define MAXNUMSIZE 50 1075 1076 int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like conversions */ 1077 { 1078 char *fmt; 1079 char *p, *t; 1080 const char *os; 1081 Cell *x; 1082 int flag = 0, n; 1083 int fmtwd; /* format width */ 1084 int fmtsz = recsize; 1085 char *buf = *pbuf; 1086 int bufsize = *pbufsize; 1087 #define FMTSZ(a) (fmtsz - ((a) - fmt)) 1088 #define BUFSZ(a) (bufsize - ((a) - buf)) 1089 1090 static bool first = true; 1091 static bool have_a_format = false; 1092 1093 if (first) { 1094 char xbuf[100]; 1095 1096 snprintf(xbuf, sizeof(xbuf), "%a", 42.0); 1097 have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0); 1098 first = false; 1099 } 1100 1101 os = s; 1102 p = buf; 1103 if ((fmt = (char *) malloc(fmtsz)) == NULL) 1104 FATAL("out of memory in format()"); 1105 while (*s) { 1106 adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1"); 1107 if (*s != '%') { 1108 *p++ = *s++; 1109 continue; 1110 } 1111 if (*(s+1) == '%') { 1112 *p++ = '%'; 1113 s += 2; 1114 continue; 1115 } 1116 fmtwd = atoi(s+1); 1117 if (fmtwd < 0) 1118 fmtwd = -fmtwd; 1119 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2"); 1120 for (t = fmt; (*t++ = *s) != '\0'; s++) { 1121 if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3")) 1122 FATAL("format item %.30s... ran format() out of memory", os); 1123 /* Ignore size specifiers */ 1124 if (strchr("hjLlqtz", *s) != NULL) { /* the ansi panoply */ 1125 t--; 1126 continue; 1127 } 1128 if (isalpha((uschar)*s)) 1129 break; 1130 if (*s == '$') { 1131 FATAL("'$' not permitted in awk formats"); 1132 } 1133 if (*s == '*') { 1134 if (a == NULL) { 1135 FATAL("not enough args in printf(%s)", os); 1136 } 1137 x = execute(a); 1138 a = a->nnext; 1139 snprintf(t - 1, FMTSZ(t - 1), 1140 "%d", fmtwd=(int) getfval(x)); 1141 if (fmtwd < 0) 1142 fmtwd = -fmtwd; 1143 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format"); 1144 t = fmt + strlen(fmt); 1145 tempfree(x); 1146 } 1147 } 1148 *t = '\0'; 1149 if (fmtwd < 0) 1150 fmtwd = -fmtwd; 1151 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4"); 1152 switch (*s) { 1153 case 'a': case 'A': 1154 if (have_a_format) 1155 flag = *s; 1156 else 1157 flag = 'f'; 1158 break; 1159 case 'f': case 'e': case 'g': case 'E': case 'G': 1160 flag = 'f'; 1161 break; 1162 case 'd': case 'i': case 'o': case 'x': case 'X': case 'u': 1163 flag = (*s == 'd' || *s == 'i') ? 'd' : 'u'; 1164 *(t-1) = 'j'; 1165 *t = *s; 1166 *++t = '\0'; 1167 break; 1168 case 's': 1169 flag = 's'; 1170 break; 1171 case 'c': 1172 flag = 'c'; 1173 break; 1174 case '\0': 1175 FATAL("missing printf conversion specifier"); 1176 break; 1177 default: 1178 WARNING("weird printf conversion %s", fmt); 1179 flag = '?'; 1180 break; 1181 } 1182 if (a == NULL) 1183 FATAL("not enough args in printf(%s)", os); 1184 x = execute(a); 1185 a = a->nnext; 1186 n = MAXNUMSIZE; 1187 if (fmtwd > n) 1188 n = fmtwd; 1189 adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5"); 1190 switch (flag) { 1191 case '?': 1192 snprintf(p, BUFSZ(p), "%s", fmt); /* unknown, so dump it too */ 1193 t = getsval(x); 1194 n = strlen(t); 1195 if (fmtwd > n) 1196 n = fmtwd; 1197 adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6"); 1198 p += strlen(p); 1199 snprintf(p, BUFSZ(p), "%s", t); 1200 break; 1201 case 'a': 1202 case 'A': 1203 case 'f': snprintf(p, BUFSZ(p), fmt, getfval(x)); break; 1204 case 'd': snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break; 1205 case 'u': snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break; 1206 1207 case 's': { 1208 t = getsval(x); 1209 n = strlen(t); 1210 /* if simple format or no utf-8 in the string, sprintf works */ 1211 if (!has_utf8(t) || strcmp(fmt,"%s") == 0) { 1212 if (fmtwd > n) 1213 n = fmtwd; 1214 if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7")) 1215 FATAL("huge string/format (%d chars) in printf %.30s..." \ 1216 " ran format() out of memory", n, t); 1217 snprintf(p, BUFSZ(p), fmt, t); 1218 break; 1219 } 1220 1221 /* get here if string has utf-8 chars and fmt is not plain %s */ 1222 /* "%-w.ps", where -, w and .p are all optional */ 1223 /* '0' before the w is a flag character */ 1224 /* fmt points at % */ 1225 int ljust = 0, wid = 0, prec = n, pad = 0; 1226 char *f = fmt+1; 1227 if (f[0] == '-') { 1228 ljust = 1; 1229 f++; 1230 } 1231 // flags '0' and '+' are recognized but skipped 1232 if (f[0] == '0') { 1233 f++; 1234 if (f[0] == '+') 1235 f++; 1236 } 1237 if (f[0] == '+') { 1238 f++; 1239 if (f[0] == '0') 1240 f++; 1241 } 1242 if (isdigit(f[0])) { /* there is a wid */ 1243 wid = strtol(f, &f, 10); 1244 } 1245 if (f[0] == '.') { /* there is a .prec */ 1246 prec = strtol(++f, &f, 10); 1247 } 1248 if (prec > u8_strlen(t)) 1249 prec = u8_strlen(t); 1250 pad = wid>prec ? wid - prec : 0; // has to be >= 0 1251 int i, k, n; 1252 1253 if (ljust) { // print prec chars from t, then pad blanks 1254 n = u8_char2byte(t, prec); 1255 for (k = 0; k < n; k++) { 1256 //putchar(t[k]); 1257 *p++ = t[k]; 1258 } 1259 for (i = 0; i < pad; i++) { 1260 //printf(" "); 1261 *p++ = ' '; 1262 } 1263 } else { // print pad blanks, then prec chars from t 1264 for (i = 0; i < pad; i++) { 1265 //printf(" "); 1266 *p++ = ' '; 1267 } 1268 n = u8_char2byte(t, prec); 1269 for (k = 0; k < n; k++) { 1270 //putchar(t[k]); 1271 *p++ = t[k]; 1272 } 1273 } 1274 *p = 0; 1275 break; 1276 } 1277 1278 case 'c': { 1279 /* 1280 * If a numeric value is given, awk should just turn 1281 * it into a character and print it: 1282 * BEGIN { printf("%c\n", 65) } 1283 * prints "A". 1284 * 1285 * But what if the numeric value is > 128 and 1286 * represents a valid Unicode code point?!? We do 1287 * our best to convert it back into UTF-8. If we 1288 * can't, we output the encoding of the Unicode 1289 * "invalid character", 0xFFFD. 1290 */ 1291 if (isnum(x)) { 1292 int charval = (int) getfval(x); 1293 1294 if (charval != 0) { 1295 if (charval < 128 || awk_mb_cur_max == 1) 1296 snprintf(p, BUFSZ(p), fmt, charval); 1297 else { 1298 // possible unicode character 1299 size_t count; 1300 char *bs = wide_char_to_byte_str(charval, &count); 1301 1302 if (bs == NULL) { // invalid character 1303 // use unicode invalid character, 0xFFFD 1304 bs = "\357\277\275"; 1305 count = 3; 1306 } 1307 t = bs; 1308 n = count; 1309 goto format_percent_c; 1310 } 1311 } else { 1312 *p++ = '\0'; /* explicit null byte */ 1313 *p = '\0'; /* next output will start here */ 1314 } 1315 break; 1316 } 1317 t = getsval(x); 1318 n = u8_nextlen(t); 1319 format_percent_c: 1320 if (n < 2) { /* not utf8 */ 1321 snprintf(p, BUFSZ(p), fmt, getsval(x)[0]); 1322 break; 1323 } 1324 1325 // utf8 character, almost same song and dance as for %s 1326 int ljust = 0, wid = 0, prec = n, pad = 0; 1327 char *f = fmt+1; 1328 if (f[0] == '-') { 1329 ljust = 1; 1330 f++; 1331 } 1332 // flags '0' and '+' are recognized but skipped 1333 if (f[0] == '0') { 1334 f++; 1335 if (f[0] == '+') 1336 f++; 1337 } 1338 if (f[0] == '+') { 1339 f++; 1340 if (f[0] == '0') 1341 f++; 1342 } 1343 if (isdigit(f[0])) { /* there is a wid */ 1344 wid = strtol(f, &f, 10); 1345 } 1346 if (f[0] == '.') { /* there is a .prec */ 1347 prec = strtol(++f, &f, 10); 1348 } 1349 if (prec > 1) // %c --> only one character 1350 prec = 1; 1351 pad = wid>prec ? wid - prec : 0; // has to be >= 0 1352 int i; 1353 1354 if (ljust) { // print one char from t, then pad blanks 1355 for (i = 0; i < n; i++) 1356 *p++ = t[i]; 1357 for (i = 0; i < pad; i++) { 1358 //printf(" "); 1359 *p++ = ' '; 1360 } 1361 } else { // print pad blanks, then prec chars from t 1362 for (i = 0; i < pad; i++) { 1363 //printf(" "); 1364 *p++ = ' '; 1365 } 1366 for (i = 0; i < n; i++) 1367 *p++ = t[i]; 1368 } 1369 *p = 0; 1370 break; 1371 } 1372 default: 1373 FATAL("can't happen: bad conversion %c in format()", flag); 1374 } 1375 1376 tempfree(x); 1377 p += strlen(p); 1378 s++; 1379 } 1380 *p = '\0'; 1381 free(fmt); 1382 for ( ; a; a = a->nnext) { /* evaluate any remaining args */ 1383 x = execute(a); 1384 tempfree(x); 1385 } 1386 *pbuf = buf; 1387 *pbufsize = bufsize; 1388 return p - buf; 1389 } 1390 1391 Cell *awksprintf(Node **a, int n) /* sprintf(a[0]) */ 1392 { 1393 Cell *x; 1394 Node *y; 1395 char *buf; 1396 int bufsz=3*recsize; 1397 1398 if ((buf = (char *) malloc(bufsz)) == NULL) 1399 FATAL("out of memory in awksprintf"); 1400 y = a[0]->nnext; 1401 x = execute(a[0]); 1402 if (format(&buf, &bufsz, getsval(x), y) == -1) 1403 FATAL("sprintf string %.30s... too long. can't happen.", buf); 1404 tempfree(x); 1405 x = gettemp(); 1406 x->sval = buf; 1407 x->tval = STR; 1408 return(x); 1409 } 1410 1411 Cell *awkprintf(Node **a, int n) /* printf */ 1412 { /* a[0] is list of args, starting with format string */ 1413 /* a[1] is redirection operator, a[2] is redirection file */ 1414 FILE *fp; 1415 Cell *x; 1416 Node *y; 1417 char *buf; 1418 int len; 1419 int bufsz=3*recsize; 1420 1421 if ((buf = (char *) malloc(bufsz)) == NULL) 1422 FATAL("out of memory in awkprintf"); 1423 y = a[0]->nnext; 1424 x = execute(a[0]); 1425 if ((len = format(&buf, &bufsz, getsval(x), y)) == -1) 1426 FATAL("printf string %.30s... too long. can't happen.", buf); 1427 tempfree(x); 1428 if (a[1] == NULL) { 1429 /* fputs(buf, stdout); */ 1430 fwrite(buf, len, 1, stdout); 1431 if (ferror(stdout)) 1432 FATAL("write error on stdout"); 1433 } else { 1434 fp = redirect(ptoi(a[1]), a[2]); 1435 /* fputs(buf, fp); */ 1436 fwrite(buf, len, 1, fp); 1437 fflush(fp); 1438 if (ferror(fp)) 1439 FATAL("write error on %s", filename(fp)); 1440 } 1441 free(buf); 1442 return(True); 1443 } 1444 1445 Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */ 1446 { 1447 Awkfloat i, j = 0; 1448 double v; 1449 Cell *x, *y, *z; 1450 1451 x = execute(a[0]); 1452 i = getfval(x); 1453 tempfree(x); 1454 if (n != UMINUS && n != UPLUS) { 1455 y = execute(a[1]); 1456 j = getfval(y); 1457 tempfree(y); 1458 } 1459 z = gettemp(); 1460 switch (n) { 1461 case ADD: 1462 i += j; 1463 break; 1464 case MINUS: 1465 i -= j; 1466 break; 1467 case MULT: 1468 i *= j; 1469 break; 1470 case DIVIDE: 1471 if (j == 0) 1472 FATAL("division by zero"); 1473 i /= j; 1474 break; 1475 case MOD: 1476 if (j == 0) 1477 FATAL("division by zero in mod"); 1478 modf(i/j, &v); 1479 i = i - j * v; 1480 break; 1481 case UMINUS: 1482 i = -i; 1483 break; 1484 case UPLUS: /* handled by getfval(), above */ 1485 break; 1486 case POWER: 1487 if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */ 1488 i = ipow(i, (int) j); 1489 else { 1490 errno = 0; 1491 i = errcheck(pow(i, j), "pow"); 1492 } 1493 break; 1494 default: /* can't happen */ 1495 FATAL("illegal arithmetic operator %d", n); 1496 } 1497 setfval(z, i); 1498 return(z); 1499 } 1500 1501 double ipow(double x, int n) /* x**n. ought to be done by pow, but isn't always */ 1502 { 1503 double v; 1504 1505 if (n <= 0) 1506 return 1; 1507 v = ipow(x, n/2); 1508 if (n % 2 == 0) 1509 return v * v; 1510 else 1511 return x * v * v; 1512 } 1513 1514 Cell *incrdecr(Node **a, int n) /* a[0]++, etc. */ 1515 { 1516 Cell *x, *z; 1517 int k; 1518 Awkfloat xf; 1519 1520 x = execute(a[0]); 1521 xf = getfval(x); 1522 k = (n == PREINCR || n == POSTINCR) ? 1 : -1; 1523 if (n == PREINCR || n == PREDECR) { 1524 setfval(x, xf + k); 1525 return(x); 1526 } 1527 z = gettemp(); 1528 setfval(z, xf); 1529 setfval(x, xf + k); 1530 tempfree(x); 1531 return(z); 1532 } 1533 1534 Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */ 1535 { /* this is subtle; don't muck with it. */ 1536 Cell *x, *y; 1537 Awkfloat xf, yf; 1538 double v; 1539 1540 y = execute(a[1]); 1541 x = execute(a[0]); 1542 if (n == ASSIGN) { /* ordinary assignment */ 1543 if (x == y && !(x->tval & (FLD|REC)) && x != nfloc) 1544 ; /* self-assignment: leave alone unless it's a field or NF */ 1545 else if ((y->tval & (STR|NUM)) == (STR|NUM)) { 1546 setsval(x, getsval(y)); 1547 x->fval = getfval(y); 1548 x->tval |= NUM; 1549 } 1550 else if (isstr(y)) 1551 setsval(x, getsval(y)); 1552 else if (isnum(y)) 1553 setfval(x, getfval(y)); 1554 else 1555 funnyvar(y, "read value of"); 1556 tempfree(y); 1557 return(x); 1558 } 1559 xf = getfval(x); 1560 yf = getfval(y); 1561 switch (n) { 1562 case ADDEQ: 1563 xf += yf; 1564 break; 1565 case SUBEQ: 1566 xf -= yf; 1567 break; 1568 case MULTEQ: 1569 xf *= yf; 1570 break; 1571 case DIVEQ: 1572 if (yf == 0) 1573 FATAL("division by zero in /="); 1574 xf /= yf; 1575 break; 1576 case MODEQ: 1577 if (yf == 0) 1578 FATAL("division by zero in %%="); 1579 modf(xf/yf, &v); 1580 xf = xf - yf * v; 1581 break; 1582 case POWEQ: 1583 if (yf >= 0 && modf(yf, &v) == 0.0) /* pos integer exponent */ 1584 xf = ipow(xf, (int) yf); 1585 else { 1586 errno = 0; 1587 xf = errcheck(pow(xf, yf), "pow"); 1588 } 1589 break; 1590 default: 1591 FATAL("illegal assignment operator %d", n); 1592 break; 1593 } 1594 tempfree(y); 1595 setfval(x, xf); 1596 return(x); 1597 } 1598 1599 Cell *cat(Node **a, int q) /* a[0] cat a[1] */ 1600 { 1601 Cell *x, *y, *z; 1602 int n1, n2; 1603 char *s = NULL; 1604 int ssz = 0; 1605 1606 x = execute(a[0]); 1607 n1 = strlen(getsval(x)); 1608 adjbuf(&s, &ssz, n1 + 1, recsize, 0, "cat1"); 1609 memcpy(s, x->sval, n1); 1610 1611 tempfree(x); 1612 1613 y = execute(a[1]); 1614 n2 = strlen(getsval(y)); 1615 adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2"); 1616 memcpy(s + n1, y->sval, n2); 1617 s[n1 + n2] = '\0'; 1618 1619 tempfree(y); 1620 1621 z = gettemp(); 1622 z->sval = s; 1623 z->tval = STR; 1624 1625 return(z); 1626 } 1627 1628 Cell *pastat(Node **a, int n) /* a[0] { a[1] } */ 1629 { 1630 Cell *x; 1631 1632 if (a[0] == NULL) 1633 x = execute(a[1]); 1634 else { 1635 x = execute(a[0]); 1636 if (istrue(x)) { 1637 tempfree(x); 1638 x = execute(a[1]); 1639 } 1640 } 1641 return x; 1642 } 1643 1644 Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */ 1645 { 1646 Cell *x; 1647 int pair; 1648 1649 pair = ptoi(a[3]); 1650 if (pairstack[pair] == 0) { 1651 x = execute(a[0]); 1652 if (istrue(x)) 1653 pairstack[pair] = 1; 1654 tempfree(x); 1655 } 1656 if (pairstack[pair] == 1) { 1657 x = execute(a[1]); 1658 if (istrue(x)) 1659 pairstack[pair] = 0; 1660 tempfree(x); 1661 x = execute(a[2]); 1662 return(x); 1663 } 1664 return(False); 1665 } 1666 1667 Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ 1668 { 1669 Cell *x = NULL, *y, *ap; 1670 const char *s, *origs, *t; 1671 const char *fs = NULL; 1672 char *origfs = NULL; 1673 int sep; 1674 char temp, num[50]; 1675 int n, tempstat, arg3type; 1676 int j; 1677 double result; 1678 1679 y = execute(a[0]); /* source string */ 1680 origs = s = strdup(getsval(y)); 1681 tempfree(y); 1682 arg3type = ptoi(a[3]); 1683 if (a[2] == NULL) { /* BUG: CSV should override implicit fs but not explicit */ 1684 fs = getsval(fsloc); 1685 } else if (arg3type == STRING) { /* split(str,arr,"string") */ 1686 x = execute(a[2]); 1687 fs = origfs = strdup(getsval(x)); 1688 tempfree(x); 1689 } else if (arg3type == REGEXPR) { 1690 fs = "(regexpr)"; /* split(str,arr,/regexpr/) */ 1691 } else { 1692 FATAL("illegal type of split"); 1693 } 1694 sep = *fs; 1695 ap = execute(a[1]); /* array name */ 1696 /* BUG 7/26/22: this appears not to reset array: see C1/asplit */ 1697 freesymtab(ap); 1698 DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs); 1699 ap->tval &= ~STR; 1700 ap->tval |= ARR; 1701 ap->sval = (char *) makesymtab(NSYMTAB); 1702 1703 n = 0; 1704 if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) { 1705 /* split(s, a, //); have to arrange that it looks like empty sep */ 1706 arg3type = 0; 1707 fs = ""; 1708 sep = 0; 1709 } 1710 if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) { /* reg expr */ 1711 fa *pfa; 1712 if (arg3type == REGEXPR) { /* it's ready already */ 1713 pfa = (fa *) a[2]; 1714 } else { 1715 pfa = makedfa(fs, 1); 1716 } 1717 if (nematch(pfa,s)) { 1718 tempstat = pfa->initstat; 1719 pfa->initstat = 2; 1720 do { 1721 n++; 1722 snprintf(num, sizeof(num), "%d", n); 1723 temp = *patbeg; 1724 setptr(patbeg, '\0'); 1725 if (is_number(s, & result)) 1726 setsymtab(num, s, result, STR|NUM, (Array *) ap->sval); 1727 else 1728 setsymtab(num, s, 0.0, STR, (Array *) ap->sval); 1729 setptr(patbeg, temp); 1730 s = patbeg + patlen; 1731 if (*(patbeg+patlen-1) == '\0' || *s == '\0') { 1732 n++; 1733 snprintf(num, sizeof(num), "%d", n); 1734 setsymtab(num, "", 0.0, STR, (Array *) ap->sval); 1735 pfa->initstat = tempstat; 1736 goto spdone; 1737 } 1738 } while (nematch(pfa,s)); 1739 pfa->initstat = tempstat; /* bwk: has to be here to reset */ 1740 /* cf gsub and refldbld */ 1741 } 1742 n++; 1743 snprintf(num, sizeof(num), "%d", n); 1744 if (is_number(s, & result)) 1745 setsymtab(num, s, result, STR|NUM, (Array *) ap->sval); 1746 else 1747 setsymtab(num, s, 0.0, STR, (Array *) ap->sval); 1748 spdone: 1749 pfa = NULL; 1750 1751 } else if (a[2] == NULL && CSV) { /* CSV only if no explicit separator */ 1752 char *newt = (char *) malloc(strlen(s)); /* for building new string; reuse for each field */ 1753 for (;;) { 1754 char *fr = newt; 1755 n++; 1756 if (*s == '"' ) { /* start of "..." */ 1757 for (s++ ; *s != '\0'; ) { 1758 if (*s == '"' && s[1] != '\0' && s[1] == '"') { 1759 s += 2; /* doubled quote */ 1760 *fr++ = '"'; 1761 } else if (*s == '"' && (s[1] == '\0' || s[1] == ',')) { 1762 s++; /* skip over closing quote */ 1763 break; 1764 } else { 1765 *fr++ = *s++; 1766 } 1767 } 1768 *fr++ = 0; 1769 } else { /* unquoted field */ 1770 while (*s != ',' && *s != '\0') 1771 *fr++ = *s++; 1772 *fr++ = 0; 1773 } 1774 snprintf(num, sizeof(num), "%d", n); 1775 if (is_number(newt, &result)) 1776 setsymtab(num, newt, result, STR|NUM, (Array *) ap->sval); 1777 else 1778 setsymtab(num, newt, 0.0, STR, (Array *) ap->sval); 1779 if (*s++ == '\0') 1780 break; 1781 } 1782 free(newt); 1783 1784 } else if (!CSV && sep == ' ') { /* usual case: split on white space */ 1785 for (n = 0; ; ) { 1786 #define ISWS(c) ((c) == ' ' || (c) == '\t' || (c) == '\n') 1787 while (ISWS(*s)) 1788 s++; 1789 if (*s == '\0') 1790 break; 1791 n++; 1792 t = s; 1793 do 1794 s++; 1795 while (*s != '\0' && !ISWS(*s)); 1796 temp = *s; 1797 setptr(s, '\0'); 1798 snprintf(num, sizeof(num), "%d", n); 1799 if (is_number(t, & result)) 1800 setsymtab(num, t, result, STR|NUM, (Array *) ap->sval); 1801 else 1802 setsymtab(num, t, 0.0, STR, (Array *) ap->sval); 1803 setptr(s, temp); 1804 if (*s != '\0') 1805 s++; 1806 } 1807 1808 } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */ 1809 for (n = 0; *s != '\0'; s += u8_nextlen(s)) { 1810 char buf[10]; 1811 n++; 1812 snprintf(num, sizeof(num), "%d", n); 1813 1814 for (j = 0; j < u8_nextlen(s); j++) { 1815 buf[j] = s[j]; 1816 } 1817 buf[j] = '\0'; 1818 1819 if (isdigit((uschar)buf[0])) 1820 setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval); 1821 else 1822 setsymtab(num, buf, 0.0, STR, (Array *) ap->sval); 1823 } 1824 1825 } else if (*s != '\0') { /* some random single character */ 1826 for (;;) { 1827 n++; 1828 t = s; 1829 while (*s != sep && *s != '\n' && *s != '\0') 1830 s++; 1831 temp = *s; 1832 setptr(s, '\0'); 1833 snprintf(num, sizeof(num), "%d", n); 1834 if (is_number(t, & result)) 1835 setsymtab(num, t, result, STR|NUM, (Array *) ap->sval); 1836 else 1837 setsymtab(num, t, 0.0, STR, (Array *) ap->sval); 1838 setptr(s, temp); 1839 if (*s++ == '\0') 1840 break; 1841 } 1842 } 1843 tempfree(ap); 1844 xfree(origs); 1845 xfree(origfs); 1846 x = gettemp(); 1847 x->tval = NUM; 1848 x->fval = n; 1849 return(x); 1850 } 1851 1852 Cell *condexpr(Node **a, int n) /* a[0] ? a[1] : a[2] */ 1853 { 1854 Cell *x; 1855 1856 x = execute(a[0]); 1857 if (istrue(x)) { 1858 tempfree(x); 1859 x = execute(a[1]); 1860 } else { 1861 tempfree(x); 1862 x = execute(a[2]); 1863 } 1864 return(x); 1865 } 1866 1867 Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */ 1868 { 1869 Cell *x; 1870 1871 x = execute(a[0]); 1872 if (istrue(x)) { 1873 tempfree(x); 1874 x = execute(a[1]); 1875 } else if (a[2] != NULL) { 1876 tempfree(x); 1877 x = execute(a[2]); 1878 } 1879 return(x); 1880 } 1881 1882 Cell *whilestat(Node **a, int n) /* while (a[0]) a[1] */ 1883 { 1884 Cell *x; 1885 1886 for (;;) { 1887 x = execute(a[0]); 1888 if (!istrue(x)) 1889 return(x); 1890 tempfree(x); 1891 x = execute(a[1]); 1892 if (isbreak(x)) { 1893 x = True; 1894 return(x); 1895 } 1896 if (isnext(x) || isexit(x) || isret(x)) 1897 return(x); 1898 tempfree(x); 1899 } 1900 } 1901 1902 Cell *dostat(Node **a, int n) /* do a[0]; while(a[1]) */ 1903 { 1904 Cell *x; 1905 1906 for (;;) { 1907 x = execute(a[0]); 1908 if (isbreak(x)) 1909 return True; 1910 if (isnext(x) || isexit(x) || isret(x)) 1911 return(x); 1912 tempfree(x); 1913 x = execute(a[1]); 1914 if (!istrue(x)) 1915 return(x); 1916 tempfree(x); 1917 } 1918 } 1919 1920 Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */ 1921 { 1922 Cell *x; 1923 1924 x = execute(a[0]); 1925 tempfree(x); 1926 for (;;) { 1927 if (a[1]!=NULL) { 1928 x = execute(a[1]); 1929 if (!istrue(x)) return(x); 1930 else tempfree(x); 1931 } 1932 x = execute(a[3]); 1933 if (isbreak(x)) /* turn off break */ 1934 return True; 1935 if (isnext(x) || isexit(x) || isret(x)) 1936 return(x); 1937 tempfree(x); 1938 x = execute(a[2]); 1939 tempfree(x); 1940 } 1941 } 1942 1943 Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */ 1944 { 1945 Cell *x, *vp, *arrayp, *cp, *ncp; 1946 Array *tp; 1947 int i; 1948 1949 vp = execute(a[0]); 1950 arrayp = execute(a[1]); 1951 if (!isarr(arrayp)) { 1952 return True; 1953 } 1954 tp = (Array *) arrayp->sval; 1955 tempfree(arrayp); 1956 for (i = 0; i < tp->size; i++) { /* this routine knows too much */ 1957 for (cp = tp->tab[i]; cp != NULL; cp = ncp) { 1958 setsval(vp, cp->nval); 1959 ncp = cp->cnext; 1960 x = execute(a[2]); 1961 if (isbreak(x)) { 1962 tempfree(vp); 1963 return True; 1964 } 1965 if (isnext(x) || isexit(x) || isret(x)) { 1966 tempfree(vp); 1967 return(x); 1968 } 1969 tempfree(x); 1970 } 1971 } 1972 return True; 1973 } 1974 1975 static char *nawk_convert(const char *s, int (*fun_c)(int), 1976 wint_t (*fun_wc)(wint_t)) 1977 { 1978 char *buf = NULL; 1979 char *pbuf = NULL; 1980 const char *ps = NULL; 1981 size_t n = 0; 1982 wchar_t wc; 1983 const size_t sz = awk_mb_cur_max; 1984 int unused; 1985 1986 if (sz == 1) { 1987 buf = tostring(s); 1988 1989 for (pbuf = buf; *pbuf; pbuf++) 1990 *pbuf = fun_c((uschar)*pbuf); 1991 1992 return buf; 1993 } else { 1994 /* upper/lower character may be shorter/longer */ 1995 buf = tostringN(s, strlen(s) * sz + 1); 1996 1997 (void) mbtowc(NULL, NULL, 0); /* reset internal state */ 1998 /* 1999 * Reset internal state here too. 2000 * Assign result to avoid a compiler warning. (Casting to void 2001 * doesn't work.) 2002 * Increment said variable to avoid a different warning. 2003 */ 2004 unused = wctomb(NULL, L'\0'); 2005 unused++; 2006 2007 ps = s; 2008 pbuf = buf; 2009 while (n = mbtowc(&wc, ps, sz), 2010 n > 0 && n != (size_t)-1 && n != (size_t)-2) 2011 { 2012 ps += n; 2013 2014 n = wctomb(pbuf, fun_wc(wc)); 2015 if (n == (size_t)-1) 2016 FATAL("illegal wide character %s", s); 2017 2018 pbuf += n; 2019 } 2020 2021 *pbuf = '\0'; 2022 2023 if (n) 2024 FATAL("illegal byte sequence %s", s); 2025 2026 return buf; 2027 } 2028 } 2029 2030 #ifdef __DJGPP__ 2031 static wint_t towupper(wint_t wc) 2032 { 2033 if (wc >= 0 && wc < 256) 2034 return toupper(wc & 0xFF); 2035 2036 return wc; 2037 } 2038 2039 static wint_t towlower(wint_t wc) 2040 { 2041 if (wc >= 0 && wc < 256) 2042 return tolower(wc & 0xFF); 2043 2044 return wc; 2045 } 2046 #endif 2047 2048 static char *nawk_toupper(const char *s) 2049 { 2050 return nawk_convert(s, toupper, towupper); 2051 } 2052 2053 static char *nawk_tolower(const char *s) 2054 { 2055 return nawk_convert(s, tolower, towlower); 2056 } 2057 2058 2059 2060 Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */ 2061 { 2062 Cell *x, *y; 2063 Awkfloat u; 2064 int t, sz; 2065 Awkfloat tmp; 2066 char *buf, *fmt; 2067 Node *nextarg; 2068 FILE *fp; 2069 int status = 0; 2070 time_t tv; 2071 struct tm *tm; 2072 2073 t = ptoi(a[0]); 2074 x = execute(a[1]); 2075 nextarg = a[1]->nnext; 2076 switch (t) { 2077 case FLENGTH: 2078 if (isarr(x)) 2079 u = ((Array *) x->sval)->nelem; /* GROT. should be function*/ 2080 else 2081 u = u8_strlen(getsval(x)); 2082 break; 2083 case FLOG: 2084 errno = 0; 2085 u = errcheck(log(getfval(x)), "log"); 2086 break; 2087 case FINT: 2088 modf(getfval(x), &u); break; 2089 case FEXP: 2090 errno = 0; 2091 u = errcheck(exp(getfval(x)), "exp"); 2092 break; 2093 case FSQRT: 2094 errno = 0; 2095 u = errcheck(sqrt(getfval(x)), "sqrt"); 2096 break; 2097 case FSIN: 2098 u = sin(getfval(x)); break; 2099 case FCOS: 2100 u = cos(getfval(x)); break; 2101 case FATAN: 2102 if (nextarg == NULL) { 2103 WARNING("atan2 requires two arguments; returning 1.0"); 2104 u = 1.0; 2105 } else { 2106 y = execute(a[1]->nnext); 2107 u = atan2(getfval(x), getfval(y)); 2108 tempfree(y); 2109 nextarg = nextarg->nnext; 2110 } 2111 break; 2112 case FCOMPL: 2113 u = ~((int)getfval(x)); 2114 break; 2115 case FAND: 2116 if (nextarg == 0) { 2117 WARNING("and requires two arguments; returning 0"); 2118 u = 0; 2119 break; 2120 } 2121 y = execute(a[1]->nnext); 2122 u = ((int)getfval(x)) & ((int)getfval(y)); 2123 tempfree(y); 2124 nextarg = nextarg->nnext; 2125 break; 2126 case FFOR: 2127 if (nextarg == 0) { 2128 WARNING("or requires two arguments; returning 0"); 2129 u = 0; 2130 break; 2131 } 2132 y = execute(a[1]->nnext); 2133 u = ((int)getfval(x)) | ((int)getfval(y)); 2134 tempfree(y); 2135 nextarg = nextarg->nnext; 2136 break; 2137 case FXOR: 2138 if (nextarg == 0) { 2139 WARNING("xor requires two arguments; returning 0"); 2140 u = 0; 2141 break; 2142 } 2143 y = execute(a[1]->nnext); 2144 u = ((int)getfval(x)) ^ ((int)getfval(y)); 2145 tempfree(y); 2146 nextarg = nextarg->nnext; 2147 break; 2148 case FLSHIFT: 2149 if (nextarg == 0) { 2150 WARNING("lshift requires two arguments; returning 0"); 2151 u = 0; 2152 break; 2153 } 2154 y = execute(a[1]->nnext); 2155 u = ((int)getfval(x)) << ((int)getfval(y)); 2156 tempfree(y); 2157 nextarg = nextarg->nnext; 2158 break; 2159 case FRSHIFT: 2160 if (nextarg == 0) { 2161 WARNING("rshift requires two arguments; returning 0"); 2162 u = 0; 2163 break; 2164 } 2165 y = execute(a[1]->nnext); 2166 u = ((int)getfval(x)) >> ((int)getfval(y)); 2167 tempfree(y); 2168 nextarg = nextarg->nnext; 2169 break; 2170 case FSYSTEM: 2171 fflush(stdout); /* in case something is buffered already */ 2172 status = system(getsval(x)); 2173 u = status; 2174 if (status != -1) { 2175 if (WIFEXITED(status)) { 2176 u = WEXITSTATUS(status); 2177 } else if (WIFSIGNALED(status)) { 2178 u = WTERMSIG(status) + 256; 2179 #ifdef WCOREDUMP 2180 if (WCOREDUMP(status)) 2181 u += 256; 2182 #endif 2183 } else /* something else?!? */ 2184 u = 0; 2185 } 2186 break; 2187 case FRAND: 2188 /* random() returns numbers in [0..2^31-1] 2189 * in order to get a number in [0, 1), divide it by 2^31 2190 */ 2191 u = (Awkfloat) random() / (0x7fffffffL + 0x1UL); 2192 break; 2193 case FSRAND: 2194 if (isrec(x)) /* no argument provided */ 2195 u = time((time_t *)0); 2196 else 2197 u = getfval(x); 2198 tmp = u; 2199 srandom((unsigned long) u); 2200 u = srand_seed; 2201 srand_seed = tmp; 2202 break; 2203 case FTOUPPER: 2204 case FTOLOWER: 2205 if (t == FTOUPPER) 2206 buf = nawk_toupper(getsval(x)); 2207 else 2208 buf = nawk_tolower(getsval(x)); 2209 tempfree(x); 2210 x = gettemp(); 2211 setsval(x, buf); 2212 free(buf); 2213 return x; 2214 case FFLUSH: 2215 if (isrec(x) || strlen(getsval(x)) == 0) { 2216 flush_all(); /* fflush() or fflush("") -> all */ 2217 u = 0; 2218 } else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL) 2219 u = EOF; 2220 else 2221 u = fflush(fp); 2222 break; 2223 case FSYSTIME: 2224 u = time((time_t *) 0); 2225 break; 2226 case FSTRFTIME: 2227 /* strftime([format [,timestamp]]) */ 2228 if (nextarg) { 2229 y = execute(nextarg); 2230 nextarg = nextarg->nnext; 2231 tv = (time_t) getfval(y); 2232 tempfree(y); 2233 } else 2234 tv = time((time_t *) 0); 2235 tm = localtime(&tv); 2236 if (tm == NULL) 2237 FATAL("bad time %ld", (long)tv); 2238 2239 if (isrec(x)) { 2240 /* format argument not provided, use default */ 2241 fmt = tostring("%a %b %d %H:%M:%S %Z %Y"); 2242 } else 2243 fmt = tostring(getsval(x)); 2244 2245 sz = 32; 2246 buf = NULL; 2247 do { 2248 if ((buf = realloc(buf, (sz *= 2))) == NULL) 2249 FATAL("out of memory in strftime"); 2250 } while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0'); 2251 2252 y = gettemp(); 2253 setsval(y, buf); 2254 free(fmt); 2255 free(buf); 2256 2257 return y; 2258 default: /* can't happen */ 2259 FATAL("illegal function type %d", t); 2260 break; 2261 } 2262 tempfree(x); 2263 x = gettemp(); 2264 setfval(x, u); 2265 if (nextarg != NULL) { 2266 WARNING("warning: function has too many arguments"); 2267 for ( ; nextarg; nextarg = nextarg->nnext) { 2268 y = execute(nextarg); 2269 tempfree(y); 2270 } 2271 } 2272 return(x); 2273 } 2274 2275 Cell *printstat(Node **a, int n) /* print a[0] */ 2276 { 2277 Node *x; 2278 Cell *y; 2279 FILE *fp; 2280 2281 if (a[1] == NULL) /* a[1] is redirection operator, a[2] is file */ 2282 fp = stdout; 2283 else 2284 fp = redirect(ptoi(a[1]), a[2]); 2285 for (x = a[0]; x != NULL; x = x->nnext) { 2286 y = execute(x); 2287 fputs(getpssval(y), fp); 2288 tempfree(y); 2289 if (x->nnext == NULL) 2290 fputs(getsval(orsloc), fp); 2291 else 2292 fputs(getsval(ofsloc), fp); 2293 } 2294 if (a[1] != NULL) 2295 fflush(fp); 2296 if (ferror(fp)) 2297 FATAL("write error on %s", filename(fp)); 2298 return(True); 2299 } 2300 2301 Cell *nullproc(Node **a, int n) 2302 { 2303 return 0; 2304 } 2305 2306 2307 FILE *redirect(int a, Node *b) /* set up all i/o redirections */ 2308 { 2309 FILE *fp; 2310 Cell *x; 2311 char *fname; 2312 2313 x = execute(b); 2314 fname = getsval(x); 2315 fp = openfile(a, fname, NULL); 2316 if (fp == NULL) 2317 FATAL("can't open file %s", fname); 2318 tempfree(x); 2319 return fp; 2320 } 2321 2322 struct files { 2323 FILE *fp; 2324 const char *fname; 2325 int mode; /* '|', 'a', 'w' => LE/LT, GT */ 2326 } *files; 2327 2328 size_t nfiles; 2329 2330 static void stdinit(void) /* in case stdin, etc., are not constants */ 2331 { 2332 nfiles = FOPEN_MAX; 2333 files = (struct files *) calloc(nfiles, sizeof(*files)); 2334 if (files == NULL) 2335 FATAL("can't allocate file memory for %zu files", nfiles); 2336 files[0].fp = stdin; 2337 files[0].fname = tostring("/dev/stdin"); 2338 files[0].mode = LT; 2339 files[1].fp = stdout; 2340 files[1].fname = tostring("/dev/stdout"); 2341 files[1].mode = GT; 2342 files[2].fp = stderr; 2343 files[2].fname = tostring("/dev/stderr"); 2344 files[2].mode = GT; 2345 } 2346 2347 FILE *openfile(int a, const char *us, bool *pnewflag) 2348 { 2349 const char *s = us; 2350 size_t i; 2351 int m; 2352 FILE *fp = NULL; 2353 2354 if (*s == '\0') 2355 FATAL("null file name in print or getline"); 2356 for (i = 0; i < nfiles; i++) 2357 if (files[i].fname && strcmp(s, files[i].fname) == 0 && 2358 (a == files[i].mode || (a==APPEND && files[i].mode==GT) || 2359 a == FFLUSH)) { 2360 if (pnewflag) 2361 *pnewflag = false; 2362 return files[i].fp; 2363 } 2364 if (a == FFLUSH) /* didn't find it, so don't create it! */ 2365 return NULL; 2366 2367 for (i = 0; i < nfiles; i++) 2368 if (files[i].fp == NULL) 2369 break; 2370 if (i >= nfiles) { 2371 struct files *nf; 2372 size_t nnf = nfiles + FOPEN_MAX; 2373 nf = (struct files *) realloc(files, nnf * sizeof(*nf)); 2374 if (nf == NULL) 2375 FATAL("cannot grow files for %s and %zu files", s, nnf); 2376 memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf)); 2377 nfiles = nnf; 2378 files = nf; 2379 } 2380 fflush(stdout); /* force a semblance of order */ 2381 m = a; 2382 if (a == GT) { 2383 fp = fopen(s, "w"); 2384 } else if (a == APPEND) { 2385 fp = fopen(s, "a"); 2386 m = GT; /* so can mix > and >> */ 2387 } else if (a == '|') { /* output pipe */ 2388 fp = popen(s, "w"); 2389 } else if (a == LE) { /* input pipe */ 2390 fp = popen(s, "r"); 2391 } else if (a == LT) { /* getline <file */ 2392 fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r"); /* "-" is stdin */ 2393 } else /* can't happen */ 2394 FATAL("illegal redirection %d", a); 2395 if (fp != NULL) { 2396 files[i].fname = tostring(s); 2397 files[i].fp = fp; 2398 files[i].mode = m; 2399 if (pnewflag) 2400 *pnewflag = true; 2401 if (fp != stdin && fp != stdout && fp != stderr) 2402 (void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC); 2403 } 2404 return fp; 2405 } 2406 2407 const char *filename(FILE *fp) 2408 { 2409 size_t i; 2410 2411 for (i = 0; i < nfiles; i++) 2412 if (fp == files[i].fp) 2413 return files[i].fname; 2414 return "???"; 2415 } 2416 2417 Cell *closefile(Node **a, int n) 2418 { 2419 Cell *x; 2420 size_t i; 2421 bool stat; 2422 2423 x = execute(a[0]); 2424 getsval(x); 2425 stat = true; 2426 for (i = 0; i < nfiles; i++) { 2427 if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0) 2428 continue; 2429 if (files[i].mode == GT || files[i].mode == '|') 2430 fflush(files[i].fp); 2431 if (ferror(files[i].fp)) { 2432 if ((files[i].mode == GT && files[i].fp != stderr) 2433 || files[i].mode == '|') 2434 FATAL("write error on %s", files[i].fname); 2435 else 2436 WARNING("i/o error occurred on %s", files[i].fname); 2437 } 2438 if (files[i].fp == stdin || files[i].fp == stdout || 2439 files[i].fp == stderr) 2440 stat = freopen("/dev/null", "r+", files[i].fp) == NULL; 2441 else if (files[i].mode == '|' || files[i].mode == LE) 2442 stat = pclose(files[i].fp) == -1; 2443 else 2444 stat = fclose(files[i].fp) == EOF; 2445 if (stat) 2446 WARNING("i/o error occurred closing %s", files[i].fname); 2447 xfree(files[i].fname); 2448 files[i].fname = NULL; /* watch out for ref thru this */ 2449 files[i].fp = NULL; 2450 break; 2451 } 2452 tempfree(x); 2453 x = gettemp(); 2454 setfval(x, (Awkfloat) (stat ? -1 : 0)); 2455 return(x); 2456 } 2457 2458 void closeall(void) 2459 { 2460 size_t i; 2461 bool stat = false; 2462 2463 for (i = 0; i < nfiles; i++) { 2464 if (! files[i].fp) 2465 continue; 2466 if (files[i].mode == GT || files[i].mode == '|') 2467 fflush(files[i].fp); 2468 if (ferror(files[i].fp)) { 2469 if ((files[i].mode == GT && files[i].fp != stderr) 2470 || files[i].mode == '|') 2471 FATAL("write error on %s", files[i].fname); 2472 else 2473 WARNING("i/o error occurred on %s", files[i].fname); 2474 } 2475 if (files[i].fp == stdin || files[i].fp == stdout || 2476 files[i].fp == stderr) 2477 continue; 2478 if (files[i].mode == '|' || files[i].mode == LE) 2479 stat = pclose(files[i].fp) == -1; 2480 else 2481 stat = fclose(files[i].fp) == EOF; 2482 if (stat) 2483 WARNING("i/o error occurred while closing %s", files[i].fname); 2484 } 2485 } 2486 2487 static void flush_all(void) 2488 { 2489 size_t i; 2490 2491 for (i = 0; i < nfiles; i++) 2492 if (files[i].fp) 2493 fflush(files[i].fp); 2494 } 2495 2496 void backsub(char **pb_ptr, const char **sptr_ptr); 2497 2498 Cell *sub(Node **a, int nnn) /* substitute command */ 2499 { 2500 const char *sptr, *q; 2501 Cell *x, *y, *result; 2502 char *t, *buf, *pb; 2503 fa *pfa; 2504 int bufsz = recsize; 2505 2506 if ((buf = (char *) malloc(bufsz)) == NULL) 2507 FATAL("out of memory in sub"); 2508 x = execute(a[3]); /* target string */ 2509 t = getsval(x); 2510 if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */ 2511 pfa = (fa *) a[1]; /* regular expression */ 2512 else { 2513 y = execute(a[1]); 2514 pfa = makedfa(getsval(y), 1); 2515 tempfree(y); 2516 } 2517 y = execute(a[2]); /* replacement string */ 2518 result = False; 2519 if (pmatch(pfa, t)) { 2520 sptr = t; 2521 adjbuf(&buf, &bufsz, 1+patbeg-sptr, recsize, 0, "sub"); 2522 pb = buf; 2523 while (sptr < patbeg) 2524 *pb++ = *sptr++; 2525 sptr = getsval(y); 2526 while (*sptr != '\0') { 2527 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "sub"); 2528 if (*sptr == '\\') { 2529 backsub(&pb, &sptr); 2530 } else if (*sptr == '&') { 2531 sptr++; 2532 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "sub"); 2533 for (q = patbeg; q < patbeg+patlen; ) 2534 *pb++ = *q++; 2535 } else 2536 *pb++ = *sptr++; 2537 } 2538 *pb = '\0'; 2539 if (pb > buf + bufsz) 2540 FATAL("sub result1 %.30s too big; can't happen", buf); 2541 sptr = patbeg + patlen; 2542 if ((patlen == 0 && *patbeg) || (patlen && *(sptr-1))) { 2543 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "sub"); 2544 while ((*pb++ = *sptr++) != '\0') 2545 continue; 2546 } 2547 if (pb > buf + bufsz) 2548 FATAL("sub result2 %.30s too big; can't happen", buf); 2549 setsval(x, buf); /* BUG: should be able to avoid copy */ 2550 result = True; 2551 } 2552 tempfree(x); 2553 tempfree(y); 2554 free(buf); 2555 return result; 2556 } 2557 2558 Cell *gsub(Node **a, int nnn) /* global substitute */ 2559 { 2560 Cell *x, *y; 2561 char *rptr, *pb; 2562 const char *q, *t, *sptr; 2563 char *buf; 2564 fa *pfa; 2565 int mflag, tempstat, num; 2566 int bufsz = recsize; 2567 int charlen = 0; 2568 2569 if ((buf = (char *) malloc(bufsz)) == NULL) 2570 FATAL("out of memory in gsub"); 2571 mflag = 0; /* if mflag == 0, can replace empty string */ 2572 num = 0; 2573 x = execute(a[3]); /* target string */ 2574 t = getsval(x); 2575 if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */ 2576 pfa = (fa *) a[1]; /* regular expression */ 2577 else { 2578 y = execute(a[1]); 2579 pfa = makedfa(getsval(y), 1); 2580 tempfree(y); 2581 } 2582 y = execute(a[2]); /* replacement string */ 2583 if (pmatch(pfa, t)) { 2584 tempstat = pfa->initstat; 2585 pfa->initstat = 2; 2586 pb = buf; 2587 rptr = getsval(y); 2588 do { 2589 if (patlen == 0 && *patbeg != '\0') { /* matched empty string */ 2590 if (mflag == 0) { /* can replace empty */ 2591 num++; 2592 sptr = rptr; 2593 while (*sptr != '\0') { 2594 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub"); 2595 if (*sptr == '\\') { 2596 backsub(&pb, &sptr); 2597 } else if (*sptr == '&') { 2598 sptr++; 2599 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub"); 2600 for (q = patbeg; q < patbeg+patlen; ) 2601 *pb++ = *q++; 2602 } else 2603 *pb++ = *sptr++; 2604 } 2605 } 2606 if (*t == '\0') /* at end */ 2607 goto done; 2608 adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gsub"); 2609 charlen = u8_nextlen(t); 2610 while (charlen-- > 0) 2611 *pb++ = *t++; 2612 if (pb > buf + bufsz) /* BUG: not sure of this test */ 2613 FATAL("gsub result0 %.30s too big; can't happen", buf); 2614 mflag = 0; 2615 } 2616 else { /* matched nonempty string */ 2617 num++; 2618 sptr = t; 2619 adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gsub"); 2620 while (sptr < patbeg) 2621 *pb++ = *sptr++; 2622 sptr = rptr; 2623 while (*sptr != '\0') { 2624 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub"); 2625 if (*sptr == '\\') { 2626 backsub(&pb, &sptr); 2627 } else if (*sptr == '&') { 2628 sptr++; 2629 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub"); 2630 for (q = patbeg; q < patbeg+patlen; ) 2631 *pb++ = *q++; 2632 } else 2633 *pb++ = *sptr++; 2634 } 2635 t = patbeg + patlen; 2636 if (patlen == 0 || *t == '\0' || *(t-1) == '\0') 2637 goto done; 2638 if (pb > buf + bufsz) 2639 FATAL("gsub result1 %.30s too big; can't happen", buf); 2640 mflag = 1; 2641 } 2642 } while (pmatch(pfa,t)); 2643 sptr = t; 2644 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gsub"); 2645 while ((*pb++ = *sptr++) != '\0') 2646 continue; 2647 done: if (pb < buf + bufsz) 2648 *pb = '\0'; 2649 else if (*(pb-1) != '\0') 2650 FATAL("gsub result2 %.30s truncated; can't happen", buf); 2651 setsval(x, buf); /* BUG: should be able to avoid copy + free */ 2652 pfa->initstat = tempstat; 2653 } 2654 tempfree(x); 2655 tempfree(y); 2656 x = gettemp(); 2657 x->tval = NUM; 2658 x->fval = num; 2659 free(buf); 2660 return(x); 2661 } 2662 2663 Cell *gensub(Node **a, int nnn) /* global selective substitute */ 2664 /* XXX incomplete - doesn't support backreferences \0 ... \9 */ 2665 { 2666 Cell *x, *y, *res, *h; 2667 char *rptr; 2668 const char *sptr; 2669 char *buf, *pb; 2670 const char *t, *q; 2671 fa *pfa; 2672 int mflag, tempstat, num, whichm; 2673 int bufsz = recsize; 2674 2675 if ((buf = malloc(bufsz)) == NULL) 2676 FATAL("out of memory in gensub"); 2677 mflag = 0; /* if mflag == 0, can replace empty string */ 2678 num = 0; 2679 x = execute(a[4]); /* source string */ 2680 t = getsval(x); 2681 res = copycell(x); /* target string - initially copy of source */ 2682 res->csub = CTEMP; /* result values are temporary */ 2683 if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */ 2684 pfa = (fa *) a[1]; /* regular expression */ 2685 else { 2686 y = execute(a[1]); 2687 pfa = makedfa(getsval(y), 1); 2688 tempfree(y); 2689 } 2690 y = execute(a[2]); /* replacement string */ 2691 h = execute(a[3]); /* which matches should be replaced */ 2692 sptr = getsval(h); 2693 if (sptr[0] == 'g' || sptr[0] == 'G') 2694 whichm = -1; 2695 else { 2696 /* 2697 * The specified number is index of replacement, starting 2698 * from 1. GNU awk treats index lower than 0 same as 2699 * 1, we do same for compatibility. 2700 */ 2701 whichm = (int) getfval(h) - 1; 2702 if (whichm < 0) 2703 whichm = 0; 2704 } 2705 tempfree(h); 2706 2707 if (pmatch(pfa, t)) { 2708 char *sl; 2709 2710 tempstat = pfa->initstat; 2711 pfa->initstat = 2; 2712 pb = buf; 2713 rptr = getsval(y); 2714 /* 2715 * XXX if there are any backreferences in subst string, 2716 * complain now. 2717 */ 2718 for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) { 2719 if (strchr("0123456789", sl[1])) { 2720 FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr); 2721 } 2722 } 2723 2724 do { 2725 if (whichm >= 0 && whichm != num) { 2726 num++; 2727 adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub"); 2728 2729 /* copy the part of string up to and including 2730 * match to output buffer */ 2731 while (t < patbeg + patlen) 2732 *pb++ = *t++; 2733 continue; 2734 } 2735 2736 if (patlen == 0 && *patbeg != 0) { /* matched empty string */ 2737 if (mflag == 0) { /* can replace empty */ 2738 num++; 2739 sptr = rptr; 2740 while (*sptr != 0) { 2741 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); 2742 if (*sptr == '\\') { 2743 backsub(&pb, &sptr); 2744 } else if (*sptr == '&') { 2745 sptr++; 2746 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); 2747 for (q = patbeg; q < patbeg+patlen; ) 2748 *pb++ = *q++; 2749 } else 2750 *pb++ = *sptr++; 2751 } 2752 } 2753 if (*t == 0) /* at end */ 2754 goto done; 2755 adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub"); 2756 *pb++ = *t++; 2757 if (pb > buf + bufsz) /* BUG: not sure of this test */ 2758 FATAL("gensub result0 %.30s too big; can't happen", buf); 2759 mflag = 0; 2760 } 2761 else { /* matched nonempty string */ 2762 num++; 2763 sptr = t; 2764 adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub"); 2765 while (sptr < patbeg) 2766 *pb++ = *sptr++; 2767 sptr = rptr; 2768 while (*sptr != 0) { 2769 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); 2770 if (*sptr == '\\') { 2771 backsub(&pb, &sptr); 2772 } else if (*sptr == '&') { 2773 sptr++; 2774 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); 2775 for (q = patbeg; q < patbeg+patlen; ) 2776 *pb++ = *q++; 2777 } else 2778 *pb++ = *sptr++; 2779 } 2780 t = patbeg + patlen; 2781 if (patlen == 0 || *t == 0 || *(t-1) == 0) 2782 goto done; 2783 if (pb > buf + bufsz) 2784 FATAL("gensub result1 %.30s too big; can't happen", buf); 2785 mflag = 1; 2786 } 2787 } while (pmatch(pfa,t)); 2788 sptr = t; 2789 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub"); 2790 while ((*pb++ = *sptr++) != 0) 2791 ; 2792 done: if (pb > buf + bufsz) 2793 FATAL("gensub result2 %.30s too big; can't happen", buf); 2794 *pb = '\0'; 2795 setsval(res, buf); 2796 pfa->initstat = tempstat; 2797 } 2798 tempfree(x); 2799 tempfree(y); 2800 free(buf); 2801 return(res); 2802 } 2803 2804 void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */ 2805 { /* sptr[0] == '\\' */ 2806 char *pb = *pb_ptr; 2807 const char *sptr = *sptr_ptr; 2808 static bool first = true; 2809 static bool do_posix = false; 2810 2811 if (first) { 2812 first = false; 2813 do_posix = (getenv("POSIXLY_CORRECT") != NULL); 2814 } 2815 2816 if (sptr[1] == '\\') { 2817 if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */ 2818 *pb++ = '\\'; 2819 *pb++ = '&'; 2820 sptr += 4; 2821 } else if (sptr[2] == '&') { /* \\& -> \ + matched */ 2822 *pb++ = '\\'; 2823 sptr += 2; 2824 } else if (do_posix) { /* \\x -> \x */ 2825 sptr++; 2826 *pb++ = *sptr++; 2827 } else { /* \\x -> \\x */ 2828 *pb++ = *sptr++; 2829 *pb++ = *sptr++; 2830 } 2831 } else if (sptr[1] == '&') { /* literal & */ 2832 sptr++; 2833 *pb++ = *sptr++; 2834 } else /* literal \ */ 2835 *pb++ = *sptr++; 2836 2837 *pb_ptr = pb; 2838 *sptr_ptr = sptr; 2839 } 2840 2841 static char *wide_char_to_byte_str(int rune, size_t *outlen) 2842 { 2843 static char buf[5]; 2844 int len; 2845 2846 if (rune < 0 || rune > 0x10FFFF) 2847 return NULL; 2848 2849 memset(buf, 0, sizeof(buf)); 2850 2851 len = 0; 2852 if (rune <= 0x0000007F) { 2853 buf[len++] = rune; 2854 } else if (rune <= 0x000007FF) { 2855 // 110xxxxx 10xxxxxx 2856 buf[len++] = 0xC0 | (rune >> 6); 2857 buf[len++] = 0x80 | (rune & 0x3F); 2858 } else if (rune <= 0x0000FFFF) { 2859 // 1110xxxx 10xxxxxx 10xxxxxx 2860 buf[len++] = 0xE0 | (rune >> 12); 2861 buf[len++] = 0x80 | ((rune >> 6) & 0x3F); 2862 buf[len++] = 0x80 | (rune & 0x3F); 2863 2864 } else { 2865 // 0x00010000 - 0x10FFFF 2866 // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 2867 buf[len++] = 0xF0 | (rune >> 18); 2868 buf[len++] = 0x80 | ((rune >> 12) & 0x3F); 2869 buf[len++] = 0x80 | ((rune >> 6) & 0x3F); 2870 buf[len++] = 0x80 | (rune & 0x3F); 2871 } 2872 2873 *outlen = len; 2874 buf[len++] = '\0'; 2875 2876 return buf; 2877 } 2878