1 /**************************************************************** 2 Copyright (C) Lucent Technologies 1997 3 All Rights Reserved 4 5 Permission to use, copy, modify, and distribute this software and 6 its documentation for any purpose and without fee is hereby 7 granted, provided that the above copyright notice appear in all 8 copies and that both that the copyright notice and this 9 permission notice and warranty disclaimer appear in supporting 10 documentation, and that the name Lucent Technologies or any of 11 its entities not be used in advertising or publicity pertaining 12 to distribution of the software without specific, written prior 13 permission. 14 15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 22 THIS SOFTWARE. 23 ****************************************************************/ 24 25 #define DEBUG 26 #include <stdio.h> 27 #include <ctype.h> 28 #include <errno.h> 29 #include <wctype.h> 30 #include <fcntl.h> 31 #include <setjmp.h> 32 #include <limits.h> 33 #include <math.h> 34 #include <string.h> 35 #include <stdlib.h> 36 #include <time.h> 37 #include <sys/types.h> 38 #include <sys/stat.h> 39 #include <sys/wait.h> 40 #include "awk.h" 41 #include "awkgram.tab.h" 42 43 44 static void stdinit(void); 45 static void flush_all(void); 46 static char *wide_char_to_byte_str(int rune, size_t *outlen); 47 48 #if 1 49 #define tempfree(x) do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0) 50 #else 51 void tempfree(Cell *p) { 52 if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) { 53 WARNING("bad csub %d in Cell %d %s", 54 p->csub, p->ctype, p->sval); 55 } 56 if (istemp(p)) 57 tfree(p); 58 } 59 #endif 60 61 /* do we really need these? */ 62 /* #ifdef _NFILE */ 63 /* #ifndef FOPEN_MAX */ 64 /* #define FOPEN_MAX _NFILE */ 65 /* #endif */ 66 /* #endif */ 67 /* */ 68 /* #ifndef FOPEN_MAX */ 69 /* #define FOPEN_MAX 40 */ /* max number of open files */ 70 /* #endif */ 71 /* */ 72 /* #ifndef RAND_MAX */ 73 /* #define RAND_MAX 32767 */ /* all that ansi guarantees */ 74 /* #endif */ 75 76 jmp_buf env; 77 extern int pairstack[]; 78 extern Awkfloat srand_seed; 79 80 Node *winner = NULL; /* root of parse tree */ 81 Cell *tmps; /* free temporary cells for execution */ 82 83 static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL, NULL }; 84 Cell *True = &truecell; 85 static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL, NULL }; 86 Cell *False = &falsecell; 87 static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL, NULL }; 88 Cell *jbreak = &breakcell; 89 static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL, NULL }; 90 Cell *jcont = &contcell; 91 static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL, NULL }; 92 Cell *jnext = &nextcell; 93 static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL, NULL }; 94 Cell *jnextfile = &nextfilecell; 95 static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL, NULL }; 96 Cell *jexit = &exitcell; 97 static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL, NULL }; 98 Cell *jret = &retcell; 99 static Cell tempcell ={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL }; 100 101 Node *curnode = NULL; /* the node being executed, for debugging */ 102 103 /* buffer memory management */ 104 int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr, 105 const char *whatrtn) 106 /* pbuf: address of pointer to buffer being managed 107 * psiz: address of buffer size variable 108 * minlen: minimum length of buffer needed 109 * quantum: buffer size quantum 110 * pbptr: address of movable pointer into buffer, or 0 if none 111 * whatrtn: name of the calling routine if failure should cause fatal error 112 * 113 * return 0 for realloc failure, !=0 for success 114 */ 115 { 116 if (minlen > *psiz) { 117 char *tbuf; 118 int rminlen = quantum ? minlen % quantum : 0; 119 int boff = pbptr ? *pbptr - *pbuf : 0; 120 /* round up to next multiple of quantum */ 121 if (rminlen) 122 minlen += quantum - rminlen; 123 tbuf = (char *) realloc(*pbuf, minlen); 124 DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void*)*pbuf, (void*)tbuf); 125 if (tbuf == NULL) { 126 if (whatrtn) 127 FATAL("out of memory in %s", whatrtn); 128 return 0; 129 } 130 *pbuf = tbuf; 131 *psiz = minlen; 132 if (pbptr) 133 *pbptr = tbuf + boff; 134 } 135 return 1; 136 } 137 138 void run(Node *a) /* execution of parse tree starts here */ 139 { 140 141 stdinit(); 142 execute(a); 143 closeall(); 144 } 145 146 Cell *execute(Node *u) /* execute a node of the parse tree */ 147 { 148 Cell *(*proc)(Node **, int); 149 Cell *x; 150 Node *a; 151 152 if (u == NULL) 153 return(True); 154 for (a = u; ; a = a->nnext) { 155 curnode = a; 156 if (isvalue(a)) { 157 x = (Cell *) (a->narg[0]); 158 if (isfld(x) && !donefld) 159 fldbld(); 160 else if (isrec(x) && !donerec) 161 recbld(); 162 return(x); 163 } 164 if (notlegal(a->nobj)) /* probably a Cell* but too risky to print */ 165 FATAL("illegal statement"); 166 proc = proctab[a->nobj-FIRSTTOKEN]; 167 x = (*proc)(a->narg, a->nobj); 168 if (isfld(x) && !donefld) 169 fldbld(); 170 else if (isrec(x) && !donerec) 171 recbld(); 172 if (isexpr(a)) 173 return(x); 174 if (isjump(x)) 175 return(x); 176 if (a->nnext == NULL) 177 return(x); 178 tempfree(x); 179 } 180 } 181 182 183 Cell *program(Node **a, int n) /* execute an awk program */ 184 { /* a[0] = BEGIN, a[1] = body, a[2] = END */ 185 Cell *x; 186 187 if (setjmp(env) != 0) 188 goto ex; 189 if (a[0]) { /* BEGIN */ 190 x = execute(a[0]); 191 if (isexit(x)) 192 return(True); 193 if (isjump(x)) 194 FATAL("illegal break, continue, next or nextfile from BEGIN"); 195 tempfree(x); 196 } 197 if (a[1] || a[2]) 198 while (getrec(&record, &recsize, true) > 0) { 199 x = execute(a[1]); 200 if (isexit(x)) 201 break; 202 tempfree(x); 203 } 204 ex: 205 if (setjmp(env) != 0) /* handles exit within END */ 206 goto ex1; 207 if (a[2]) { /* END */ 208 x = execute(a[2]); 209 if (isbreak(x) || isnext(x) || iscont(x)) 210 FATAL("illegal break, continue, next or nextfile from END"); 211 tempfree(x); 212 } 213 ex1: 214 return(True); 215 } 216 217 struct Frame { /* stack frame for awk function calls */ 218 int nargs; /* number of arguments in this call */ 219 Cell *fcncell; /* pointer to Cell for function */ 220 Cell **args; /* pointer to array of arguments after execute */ 221 Cell *retval; /* return value */ 222 }; 223 224 #define NARGS 50 /* max args in a call */ 225 226 struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */ 227 int nframe = 0; /* number of frames allocated */ 228 struct Frame *frp = NULL; /* frame pointer. bottom level unused */ 229 230 Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ 231 { 232 static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL }; 233 int i, ncall, ndef; 234 int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */ 235 Node *x; 236 Cell *args[NARGS], *oargs[NARGS]; /* BUG: fixed size arrays */ 237 Cell *y, *z, *fcn; 238 char *s; 239 240 fcn = execute(a[0]); /* the function itself */ 241 s = fcn->nval; 242 if (!isfcn(fcn)) 243 FATAL("calling undefined function %s", s); 244 if (frame == NULL) { 245 frp = frame = (struct Frame *) calloc(nframe += 100, sizeof(*frame)); 246 if (frame == NULL) 247 FATAL("out of space for stack frames calling %s", s); 248 } 249 for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */ 250 ncall++; 251 ndef = (int) fcn->fval; /* args in defn */ 252 DPRINTF("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame)); 253 if (ncall > ndef) 254 WARNING("function %s called with %d args, uses only %d", 255 s, ncall, ndef); 256 if (ncall + ndef > NARGS) 257 FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS); 258 for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */ 259 DPRINTF("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame)); 260 y = execute(x); 261 oargs[i] = y; 262 DPRINTF("args[%d]: %s %f <%s>, t=%o\n", 263 i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval); 264 if (isfcn(y)) 265 FATAL("can't use function %s as argument in %s", y->nval, s); 266 if (isarr(y)) 267 args[i] = y; /* arrays by ref */ 268 else 269 args[i] = copycell(y); 270 tempfree(y); 271 } 272 for ( ; i < ndef; i++) { /* add null args for ones not provided */ 273 args[i] = gettemp(); 274 *args[i] = newcopycell; 275 } 276 frp++; /* now ok to up frame */ 277 if (frp >= frame + nframe) { 278 int dfp = frp - frame; /* old index */ 279 frame = (struct Frame *) realloc(frame, (nframe += 100) * sizeof(*frame)); 280 if (frame == NULL) 281 FATAL("out of space for stack frames in %s", s); 282 frp = frame + dfp; 283 } 284 frp->fcncell = fcn; 285 frp->args = args; 286 frp->nargs = ndef; /* number defined with (excess are locals) */ 287 frp->retval = gettemp(); 288 289 DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame)); 290 y = execute((Node *)(fcn->sval)); /* execute body */ 291 DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame)); 292 293 for (i = 0; i < ndef; i++) { 294 Cell *t = frp->args[i]; 295 if (isarr(t)) { 296 if (t->csub == CCOPY) { 297 if (i >= ncall) { 298 freesymtab(t); 299 t->csub = CTEMP; 300 tempfree(t); 301 } else { 302 oargs[i]->tval = t->tval; 303 oargs[i]->tval &= ~(STR|NUM|DONTFREE); 304 oargs[i]->sval = t->sval; 305 tempfree(t); 306 } 307 } 308 } else if (t != y) { /* kludge to prevent freeing twice */ 309 t->csub = CTEMP; 310 tempfree(t); 311 } else if (t == y && t->csub == CCOPY) { 312 t->csub = CTEMP; 313 tempfree(t); 314 freed = 1; 315 } 316 } 317 tempfree(fcn); 318 if (isexit(y) || isnext(y)) 319 return y; 320 if (freed == 0) { 321 tempfree(y); /* don't free twice! */ 322 } 323 z = frp->retval; /* return value */ 324 DPRINTF("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval); 325 frp--; 326 return(z); 327 } 328 329 Cell *copycell(Cell *x) /* make a copy of a cell in a temp */ 330 { 331 Cell *y; 332 333 /* copy is not constant or field */ 334 335 y = gettemp(); 336 y->tval = x->tval & ~(CON|FLD|REC); 337 y->csub = CCOPY; /* prevents freeing until call is over */ 338 y->nval = x->nval; /* BUG? */ 339 if (isstr(x) /* || x->ctype == OCELL */) { 340 y->sval = tostring(x->sval); 341 y->tval &= ~DONTFREE; 342 } else 343 y->tval |= DONTFREE; 344 y->fval = x->fval; 345 return y; 346 } 347 348 Cell *arg(Node **a, int n) /* nth argument of a function */ 349 { 350 351 n = ptoi(a[0]); /* argument number, counting from 0 */ 352 DPRINTF("arg(%d), frp->nargs=%d\n", n, frp->nargs); 353 if (n+1 > frp->nargs) 354 FATAL("argument #%d of function %s was not supplied", 355 n+1, frp->fcncell->nval); 356 return frp->args[n]; 357 } 358 359 Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */ 360 { 361 Cell *y; 362 363 switch (n) { 364 case EXIT: 365 if (a[0] != NULL) { 366 y = execute(a[0]); 367 errorflag = (int) getfval(y); 368 tempfree(y); 369 } 370 longjmp(env, 1); 371 case RETURN: 372 if (a[0] != NULL) { 373 y = execute(a[0]); 374 if ((y->tval & (STR|NUM)) == (STR|NUM)) { 375 setsval(frp->retval, getsval(y)); 376 frp->retval->fval = getfval(y); 377 frp->retval->tval |= NUM; 378 } 379 else if (y->tval & STR) 380 setsval(frp->retval, getsval(y)); 381 else if (y->tval & NUM) 382 setfval(frp->retval, getfval(y)); 383 else /* can't happen */ 384 FATAL("bad type variable %d", y->tval); 385 tempfree(y); 386 } 387 return(jret); 388 case NEXT: 389 return(jnext); 390 case NEXTFILE: 391 nextfile(); 392 return(jnextfile); 393 case BREAK: 394 return(jbreak); 395 case CONTINUE: 396 return(jcont); 397 default: /* can't happen */ 398 FATAL("illegal jump type %d", n); 399 } 400 return 0; /* not reached */ 401 } 402 403 Cell *awkgetline(Node **a, int n) /* get next line from specific input */ 404 { /* a[0] is variable, a[1] is operator, a[2] is filename */ 405 Cell *r, *x; 406 extern Cell **fldtab; 407 FILE *fp; 408 char *buf; 409 int bufsize = recsize; 410 int mode; 411 bool newflag; 412 double result; 413 414 if ((buf = (char *) malloc(bufsize)) == NULL) 415 FATAL("out of memory in getline"); 416 417 fflush(stdout); /* in case someone is waiting for a prompt */ 418 r = gettemp(); 419 if (a[1] != NULL) { /* getline < file */ 420 x = execute(a[2]); /* filename */ 421 mode = ptoi(a[1]); 422 if (mode == '|') /* input pipe */ 423 mode = LE; /* arbitrary flag */ 424 fp = openfile(mode, getsval(x), &newflag); 425 tempfree(x); 426 if (fp == NULL) 427 n = -1; 428 else 429 n = readrec(&buf, &bufsize, fp, newflag); 430 if (n <= 0) { 431 ; 432 } else if (a[0] != NULL) { /* getline var <file */ 433 x = execute(a[0]); 434 setsval(x, buf); 435 if (is_number(x->sval, & result)) { 436 x->fval = result; 437 x->tval |= NUM; 438 } 439 tempfree(x); 440 } else { /* getline <file */ 441 setsval(fldtab[0], buf); 442 if (is_number(fldtab[0]->sval, & result)) { 443 fldtab[0]->fval = result; 444 fldtab[0]->tval |= NUM; 445 } 446 } 447 } else { /* bare getline; use current input */ 448 if (a[0] == NULL) /* getline */ 449 n = getrec(&record, &recsize, true); 450 else { /* getline var */ 451 n = getrec(&buf, &bufsize, false); 452 if (n > 0) { 453 x = execute(a[0]); 454 setsval(x, buf); 455 if (is_number(x->sval, & result)) { 456 x->fval = result; 457 x->tval |= NUM; 458 } 459 tempfree(x); 460 } 461 } 462 } 463 setfval(r, (Awkfloat) n); 464 free(buf); 465 return r; 466 } 467 468 Cell *getnf(Node **a, int n) /* get NF */ 469 { 470 if (!donefld) 471 fldbld(); 472 return (Cell *) a[0]; 473 } 474 475 static char * 476 makearraystring(Node *p, const char *func) 477 { 478 char *buf; 479 int bufsz = recsize; 480 size_t blen; 481 482 if ((buf = (char *) malloc(bufsz)) == NULL) { 483 FATAL("%s: out of memory", func); 484 } 485 486 blen = 0; 487 buf[blen] = '\0'; 488 489 for (; p; p = p->nnext) { 490 Cell *x = execute(p); /* expr */ 491 char *s = getsval(x); 492 size_t seplen = strlen(getsval(subseploc)); 493 size_t nsub = p->nnext ? seplen : 0; 494 size_t slen = strlen(s); 495 size_t tlen = blen + slen + nsub; 496 497 if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) { 498 FATAL("%s: out of memory %s[%s...]", 499 func, x->nval, buf); 500 } 501 memcpy(buf + blen, s, slen); 502 if (nsub) { 503 memcpy(buf + blen + slen, *SUBSEP, nsub); 504 } 505 buf[tlen] = '\0'; 506 blen = tlen; 507 tempfree(x); 508 } 509 return buf; 510 } 511 512 Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ 513 { 514 Cell *x, *z; 515 char *buf; 516 517 x = execute(a[0]); /* Cell* for symbol table */ 518 buf = makearraystring(a[1], __func__); 519 if (!isarr(x)) { 520 DPRINTF("making %s into an array\n", NN(x->nval)); 521 if (freeable(x)) 522 xfree(x->sval); 523 x->tval &= ~(STR|NUM|DONTFREE); 524 x->tval |= ARR; 525 x->sval = (char *) makesymtab(NSYMTAB); 526 } 527 z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval); 528 z->ctype = OCELL; 529 z->csub = CVAR; 530 tempfree(x); 531 free(buf); 532 return(z); 533 } 534 535 Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ 536 { 537 Cell *x; 538 539 x = execute(a[0]); /* Cell* for symbol table */ 540 if (x == symtabloc) { 541 FATAL("cannot delete SYMTAB or its elements"); 542 } 543 if (!isarr(x)) 544 return True; 545 if (a[1] == NULL) { /* delete the elements, not the table */ 546 freesymtab(x); 547 x->tval &= ~STR; 548 x->tval |= ARR; 549 x->sval = (char *) makesymtab(NSYMTAB); 550 } else { 551 char *buf = makearraystring(a[1], __func__); 552 freeelem(x, buf); 553 free(buf); 554 } 555 tempfree(x); 556 return True; 557 } 558 559 Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */ 560 { 561 Cell *ap, *k; 562 char *buf; 563 564 ap = execute(a[1]); /* array name */ 565 if (!isarr(ap)) { 566 DPRINTF("making %s into an array\n", ap->nval); 567 if (freeable(ap)) 568 xfree(ap->sval); 569 ap->tval &= ~(STR|NUM|DONTFREE); 570 ap->tval |= ARR; 571 ap->sval = (char *) makesymtab(NSYMTAB); 572 } 573 buf = makearraystring(a[0], __func__); 574 k = lookup(buf, (Array *) ap->sval); 575 tempfree(ap); 576 free(buf); 577 if (k == NULL) 578 return(False); 579 else 580 return(True); 581 } 582 583 584 /* ======== utf-8 code ========== */ 585 586 /* 587 * Awk strings can contain ascii, random 8-bit items (eg Latin-1), 588 * or utf-8. u8_isutf tests whether a string starts with a valid 589 * utf-8 sequence, and returns 0 if not (e.g., high bit set). 590 * u8_nextlen returns length of next valid sequence, which is 591 * 1 for ascii, 2..4 for utf-8, or 1 for high bit non-utf. 592 * u8_strlen returns length of string in valid utf-8 sequences 593 * and/or high-bit bytes. Conversion functions go between byte 594 * number and character number. 595 * 596 * In theory, this behaves the same as before for non-utf8 bytes. 597 * 598 * Limited checking! This is a potential security hole. 599 */ 600 601 /* is s the beginning of a valid utf-8 string? */ 602 /* return length 1..4 if yes, 0 if no */ 603 int u8_isutf(const char *s) 604 { 605 int n, ret; 606 unsigned char c; 607 608 c = s[0]; 609 if (c < 128 || awk_mb_cur_max == 1) 610 return 1; /* what if it's 0? */ 611 612 n = strlen(s); 613 if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) { 614 ret = 2; /* 110xxxxx 10xxxxxx */ 615 } else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80 616 && (s[2] & 0xC0) == 0x80) { 617 ret = 3; /* 1110xxxx 10xxxxxx 10xxxxxx */ 618 } else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80 619 && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) { 620 ret = 4; /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ 621 } else { 622 ret = 0; 623 } 624 return ret; 625 } 626 627 /* Convert (prefix of) utf8 string to utf-32 rune. */ 628 /* Sets *rune to the value, returns the length. */ 629 /* No error checking: watch out. */ 630 int u8_rune(int *rune, const char *s) 631 { 632 int n, ret; 633 unsigned char c; 634 635 c = s[0]; 636 if (c < 128 || awk_mb_cur_max == 1) { 637 *rune = c; 638 return 1; 639 } 640 641 n = strlen(s); 642 if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) { 643 *rune = ((c & 0x1F) << 6) | (s[1] & 0x3F); /* 110xxxxx 10xxxxxx */ 644 ret = 2; 645 } else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80 646 && (s[2] & 0xC0) == 0x80) { 647 *rune = ((c & 0xF) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F); 648 /* 1110xxxx 10xxxxxx 10xxxxxx */ 649 ret = 3; 650 } else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80 651 && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) { 652 *rune = ((c & 0x7) << 18) | ((s[1] & 0x3F) << 12) | ((s[2] & 0x3F) << 6) | (s[3] & 0x3F); 653 /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ 654 ret = 4; 655 } else { 656 *rune = c; 657 ret = 1; 658 } 659 return ret; /* returns one byte if sequence doesn't look like utf */ 660 } 661 662 /* return length of next sequence: 1 for ascii or random, 2..4 for valid utf8 */ 663 int u8_nextlen(const char *s) 664 { 665 int len; 666 667 len = u8_isutf(s); 668 if (len == 0) 669 len = 1; 670 return len; 671 } 672 673 /* return number of utf characters or single non-utf bytes */ 674 int u8_strlen(const char *s) 675 { 676 int i, len, n, totlen; 677 unsigned char c; 678 679 n = strlen(s); 680 totlen = 0; 681 for (i = 0; i < n; i += len) { 682 c = s[i]; 683 if (c < 128 || awk_mb_cur_max == 1) { 684 len = 1; 685 } else { 686 len = u8_nextlen(&s[i]); 687 } 688 totlen++; 689 if (i > n) 690 FATAL("bad utf count [%s] n=%d i=%d\n", s, n, i); 691 } 692 return totlen; 693 } 694 695 /* convert utf-8 char number in a string to its byte offset */ 696 int u8_char2byte(const char *s, int charnum) 697 { 698 int n; 699 int bytenum = 0; 700 701 while (charnum > 0) { 702 n = u8_nextlen(s); 703 s += n; 704 bytenum += n; 705 charnum--; 706 } 707 return bytenum; 708 } 709 710 /* convert byte offset in s to utf-8 char number that starts there */ 711 int u8_byte2char(const char *s, int bytenum) 712 { 713 int i, len, b; 714 int charnum = 0; /* BUG: what origin? */ 715 /* should be 0 to match start==0 which means no match */ 716 717 b = strlen(s); 718 if (bytenum > b) { 719 return -1; /* ??? */ 720 } 721 for (i = 0; i <= bytenum; i += len) { 722 len = u8_nextlen(s+i); 723 charnum++; 724 } 725 return charnum; 726 } 727 728 /* runetochar() adapted from rune.c in the Plan 9 distribution */ 729 730 enum 731 { 732 Runeerror = 128, /* from somewhere else */ 733 Runemax = 0x10FFFF, 734 735 Bit1 = 7, 736 Bitx = 6, 737 Bit2 = 5, 738 Bit3 = 4, 739 Bit4 = 3, 740 Bit5 = 2, 741 742 T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */ 743 Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */ 744 T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */ 745 T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */ 746 T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */ 747 T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */ 748 749 Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0000 0000 0111 1111 */ 750 Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0000 0000 0111 1111 1111 */ 751 Rune3 = (1<<(Bit3+2*Bitx))-1, /* 0000 0000 1111 1111 1111 1111 */ 752 Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0011 1111 1111 1111 1111 1111 */ 753 754 Maskx = (1<<Bitx)-1, /* 0011 1111 */ 755 Testx = Maskx ^ 0xFF, /* 1100 0000 */ 756 757 }; 758 759 int runetochar(char *str, int c) 760 { 761 /* one character sequence 00000-0007F => 00-7F */ 762 if (c <= Rune1) { 763 str[0] = c; 764 return 1; 765 } 766 767 /* two character sequence 00080-007FF => T2 Tx */ 768 if (c <= Rune2) { 769 str[0] = T2 | (c >> 1*Bitx); 770 str[1] = Tx | (c & Maskx); 771 return 2; 772 } 773 774 /* three character sequence 00800-0FFFF => T3 Tx Tx */ 775 if (c > Runemax) 776 c = Runeerror; 777 if (c <= Rune3) { 778 str[0] = T3 | (c >> 2*Bitx); 779 str[1] = Tx | ((c >> 1*Bitx) & Maskx); 780 str[2] = Tx | (c & Maskx); 781 return 3; 782 } 783 784 /* four character sequence 010000-1FFFFF => T4 Tx Tx Tx */ 785 str[0] = T4 | (c >> 3*Bitx); 786 str[1] = Tx | ((c >> 2*Bitx) & Maskx); 787 str[2] = Tx | ((c >> 1*Bitx) & Maskx); 788 str[3] = Tx | (c & Maskx); 789 return 4; 790 } 791 792 793 /* ========== end of utf8 code =========== */ 794 795 796 797 Cell *matchop(Node **a, int n) /* ~ and match() */ 798 { 799 Cell *x, *y, *z; 800 char *s, *t; 801 int i; 802 int cstart, cpatlen, len; 803 fa *pfa; 804 int (*mf)(fa *, const char *) = match, mode = 0; 805 806 if (n == MATCHFCN) { 807 mf = pmatch; 808 mode = 1; 809 } 810 x = execute(a[1]); /* a[1] = target text */ 811 s = getsval(x); 812 if (a[0] == NULL) /* a[1] == 0: already-compiled reg expr */ 813 i = (*mf)((fa *) a[2], s); 814 else { 815 y = execute(a[2]); /* a[2] = regular expr */ 816 t = getsval(y); 817 pfa = makedfa(t, mode); 818 i = (*mf)(pfa, s); 819 tempfree(y); 820 } 821 z = x; 822 if (n == MATCHFCN) { 823 int start = patbeg - s + 1; /* origin 1 */ 824 if (patlen < 0) { 825 start = 0; /* not found */ 826 } else { 827 cstart = u8_byte2char(s, start-1); 828 cpatlen = 0; 829 for (i = 0; i < patlen; i += len) { 830 len = u8_nextlen(patbeg+i); 831 cpatlen++; 832 } 833 834 start = cstart; 835 patlen = cpatlen; 836 } 837 838 setfval(rstartloc, (Awkfloat) start); 839 setfval(rlengthloc, (Awkfloat) patlen); 840 x = gettemp(); 841 x->tval = NUM; 842 x->fval = start; 843 } else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0)) 844 x = True; 845 else 846 x = False; 847 848 tempfree(z); 849 return x; 850 } 851 852 853 Cell *boolop(Node **a, int n) /* a[0] || a[1], a[0] && a[1], !a[0] */ 854 { 855 Cell *x, *y; 856 int i; 857 858 x = execute(a[0]); 859 i = istrue(x); 860 tempfree(x); 861 switch (n) { 862 case BOR: 863 if (i) return(True); 864 y = execute(a[1]); 865 i = istrue(y); 866 tempfree(y); 867 if (i) return(True); 868 else return(False); 869 case AND: 870 if ( !i ) return(False); 871 y = execute(a[1]); 872 i = istrue(y); 873 tempfree(y); 874 if (i) return(True); 875 else return(False); 876 case NOT: 877 if (i) return(False); 878 else return(True); 879 default: /* can't happen */ 880 FATAL("unknown boolean operator %d", n); 881 } 882 return 0; /*NOTREACHED*/ 883 } 884 885 Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */ 886 { 887 int i; 888 Cell *x, *y; 889 Awkfloat j; 890 bool x_is_nan, y_is_nan; 891 892 x = execute(a[0]); 893 y = execute(a[1]); 894 x_is_nan = isnan(x->fval); 895 y_is_nan = isnan(y->fval); 896 if (x->tval&NUM && y->tval&NUM) { 897 if ((x_is_nan || y_is_nan) && n != NE) 898 return(False); 899 j = x->fval - y->fval; 900 i = j<0? -1: (j>0? 1: 0); 901 } else { 902 i = strcmp(getsval(x), getsval(y)); 903 } 904 tempfree(x); 905 tempfree(y); 906 switch (n) { 907 case LT: if (i<0) return(True); 908 else return(False); 909 case LE: if (i<=0) return(True); 910 else return(False); 911 case NE: if (x_is_nan && y_is_nan) return(True); 912 else if (i!=0) return(True); 913 else return(False); 914 case EQ: if (i == 0) return(True); 915 else return(False); 916 case GE: if (i>=0) return(True); 917 else return(False); 918 case GT: if (i>0) return(True); 919 else return(False); 920 default: /* can't happen */ 921 FATAL("unknown relational operator %d", n); 922 } 923 return 0; /*NOTREACHED*/ 924 } 925 926 void tfree(Cell *a) /* free a tempcell */ 927 { 928 if (freeable(a)) { 929 DPRINTF("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval); 930 xfree(a->sval); 931 } 932 if (a == tmps) 933 FATAL("tempcell list is curdled"); 934 a->cnext = tmps; 935 tmps = a; 936 } 937 938 Cell *gettemp(void) /* get a tempcell */ 939 { int i; 940 Cell *x; 941 942 if (!tmps) { 943 tmps = (Cell *) calloc(100, sizeof(*tmps)); 944 if (!tmps) 945 FATAL("out of space for temporaries"); 946 for (i = 1; i < 100; i++) 947 tmps[i-1].cnext = &tmps[i]; 948 tmps[i-1].cnext = NULL; 949 } 950 x = tmps; 951 tmps = x->cnext; 952 *x = tempcell; 953 return(x); 954 } 955 956 Cell *indirect(Node **a, int n) /* $( a[0] ) */ 957 { 958 Awkfloat val; 959 Cell *x; 960 int m; 961 962 x = execute(a[0]); 963 val = getfval(x); /* freebsd: defend against super large field numbers */ 964 if ((Awkfloat)INT_MAX < val) 965 FATAL("trying to access out of range field %s", x->nval); 966 m = (int) val; 967 tempfree(x); 968 x = fieldadr(m); 969 x->ctype = OCELL; /* BUG? why are these needed? */ 970 x->csub = CFLD; 971 return(x); 972 } 973 974 Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */ 975 { 976 int k, m, n; 977 int mb, nb; 978 char *s; 979 int temp; 980 Cell *x, *y, *z = NULL; 981 982 x = execute(a[0]); 983 y = execute(a[1]); 984 if (a[2] != NULL) 985 z = execute(a[2]); 986 s = getsval(x); 987 k = u8_strlen(s) + 1; 988 if (k <= 1) { 989 tempfree(x); 990 tempfree(y); 991 if (a[2] != NULL) { 992 tempfree(z); 993 } 994 x = gettemp(); 995 setsval(x, ""); 996 return(x); 997 } 998 m = (int) getfval(y); 999 if (m <= 0) 1000 m = 1; 1001 else if (m > k) 1002 m = k; 1003 tempfree(y); 1004 if (a[2] != NULL) { 1005 n = (int) getfval(z); 1006 tempfree(z); 1007 } else 1008 n = k - 1; 1009 if (n < 0) 1010 n = 0; 1011 else if (n > k - m) 1012 n = k - m; 1013 /* m is start, n is length from there */ 1014 DPRINTF("substr: m=%d, n=%d, s=%s\n", m, n, s); 1015 y = gettemp(); 1016 mb = u8_char2byte(s, m-1); /* byte offset of start char in s */ 1017 nb = u8_char2byte(s, m-1+n); /* byte offset of end+1 char in s */ 1018 1019 temp = s[nb]; /* with thanks to John Linderman */ 1020 s[nb] = '\0'; 1021 setsval(y, s + mb); 1022 s[nb] = temp; 1023 tempfree(x); 1024 return(y); 1025 } 1026 1027 Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */ 1028 { 1029 Cell *x, *y, *z; 1030 char *s1, *s2, *p1, *p2, *q; 1031 Awkfloat v = 0.0; 1032 1033 x = execute(a[0]); 1034 s1 = getsval(x); 1035 y = execute(a[1]); 1036 s2 = getsval(y); 1037 1038 z = gettemp(); 1039 for (p1 = s1; *p1 != '\0'; p1++) { 1040 for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++) 1041 continue; 1042 if (*p2 == '\0') { 1043 /* v = (Awkfloat) (p1 - s1 + 1); origin 1 */ 1044 1045 /* should be a function: used in match() as well */ 1046 int i, len; 1047 v = 0; 1048 for (i = 0; i < p1-s1+1; i += len) { 1049 len = u8_nextlen(s1+i); 1050 v++; 1051 } 1052 break; 1053 } 1054 } 1055 tempfree(x); 1056 tempfree(y); 1057 setfval(z, v); 1058 return(z); 1059 } 1060 1061 int has_utf8(char *s) /* return 1 if s contains any utf-8 (2 bytes or more) character */ 1062 { 1063 int n; 1064 1065 for (n = 0; *s != 0; s += n) { 1066 n = u8_nextlen(s); 1067 if (n > 1) 1068 return 1; 1069 } 1070 return 0; 1071 } 1072 1073 #define MAXNUMSIZE 50 1074 1075 int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like conversions */ 1076 { 1077 char *fmt; 1078 char *p, *t; 1079 const char *os; 1080 Cell *x; 1081 int flag = 0, n; 1082 int fmtwd; /* format width */ 1083 int fmtsz = recsize; 1084 char *buf = *pbuf; 1085 int bufsize = *pbufsize; 1086 #define FMTSZ(a) (fmtsz - ((a) - fmt)) 1087 #define BUFSZ(a) (bufsize - ((a) - buf)) 1088 1089 static bool first = true; 1090 static bool have_a_format = false; 1091 1092 if (first) { 1093 char xbuf[100]; 1094 1095 snprintf(xbuf, sizeof(xbuf), "%a", 42.0); 1096 have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0); 1097 first = false; 1098 } 1099 1100 os = s; 1101 p = buf; 1102 if ((fmt = (char *) malloc(fmtsz)) == NULL) 1103 FATAL("out of memory in format()"); 1104 while (*s) { 1105 adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1"); 1106 if (*s != '%') { 1107 *p++ = *s++; 1108 continue; 1109 } 1110 if (*(s+1) == '%') { 1111 *p++ = '%'; 1112 s += 2; 1113 continue; 1114 } 1115 fmtwd = atoi(s+1); 1116 if (fmtwd < 0) 1117 fmtwd = -fmtwd; 1118 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2"); 1119 for (t = fmt; (*t++ = *s) != '\0'; s++) { 1120 if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3")) 1121 FATAL("format item %.30s... ran format() out of memory", os); 1122 /* Ignore size specifiers */ 1123 if (strchr("hjLlqtz", *s) != NULL) { /* the ansi panoply */ 1124 t--; 1125 continue; 1126 } 1127 if (isalpha((uschar)*s)) 1128 break; 1129 if (*s == '$') { 1130 FATAL("'$' not permitted in awk formats"); 1131 } 1132 if (*s == '*') { 1133 if (a == NULL) { 1134 FATAL("not enough args in printf(%s)", os); 1135 } 1136 x = execute(a); 1137 a = a->nnext; 1138 snprintf(t - 1, FMTSZ(t - 1), 1139 "%d", fmtwd=(int) getfval(x)); 1140 if (fmtwd < 0) 1141 fmtwd = -fmtwd; 1142 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format"); 1143 t = fmt + strlen(fmt); 1144 tempfree(x); 1145 } 1146 } 1147 *t = '\0'; 1148 if (fmtwd < 0) 1149 fmtwd = -fmtwd; 1150 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4"); 1151 switch (*s) { 1152 case 'a': case 'A': 1153 if (have_a_format) 1154 flag = *s; 1155 else 1156 flag = 'f'; 1157 break; 1158 case 'f': case 'e': case 'g': case 'E': case 'G': 1159 flag = 'f'; 1160 break; 1161 case 'd': case 'i': case 'o': case 'x': case 'X': case 'u': 1162 flag = (*s == 'd' || *s == 'i') ? 'd' : 'u'; 1163 *(t-1) = 'j'; 1164 *t = *s; 1165 *++t = '\0'; 1166 break; 1167 case 's': 1168 flag = 's'; 1169 break; 1170 case 'c': 1171 flag = 'c'; 1172 break; 1173 default: 1174 WARNING("weird printf conversion %s", fmt); 1175 flag = '?'; 1176 break; 1177 } 1178 if (a == NULL) 1179 FATAL("not enough args in printf(%s)", os); 1180 x = execute(a); 1181 a = a->nnext; 1182 n = MAXNUMSIZE; 1183 if (fmtwd > n) 1184 n = fmtwd; 1185 adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5"); 1186 switch (flag) { 1187 case '?': 1188 snprintf(p, BUFSZ(p), "%s", fmt); /* unknown, so dump it too */ 1189 t = getsval(x); 1190 n = strlen(t); 1191 if (fmtwd > n) 1192 n = fmtwd; 1193 adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6"); 1194 p += strlen(p); 1195 snprintf(p, BUFSZ(p), "%s", t); 1196 break; 1197 case 'a': 1198 case 'A': 1199 case 'f': snprintf(p, BUFSZ(p), fmt, getfval(x)); break; 1200 case 'd': snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break; 1201 case 'u': snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break; 1202 1203 case 's': { 1204 t = getsval(x); 1205 n = strlen(t); 1206 /* if simple format or no utf-8 in the string, sprintf works */ 1207 if (!has_utf8(t) || strcmp(fmt,"%s") == 0) { 1208 if (fmtwd > n) 1209 n = fmtwd; 1210 if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7")) 1211 FATAL("huge string/format (%d chars) in printf %.30s..." \ 1212 " ran format() out of memory", n, t); 1213 snprintf(p, BUFSZ(p), fmt, t); 1214 break; 1215 } 1216 1217 /* get here if string has utf-8 chars and fmt is not plain %s */ 1218 /* "%-w.ps", where -, w and .p are all optional */ 1219 /* '0' before the w is a flag character */ 1220 /* fmt points at % */ 1221 int ljust = 0, wid = 0, prec = n, pad = 0; 1222 char *f = fmt+1; 1223 if (f[0] == '-') { 1224 ljust = 1; 1225 f++; 1226 } 1227 // flags '0' and '+' are recognized but skipped 1228 if (f[0] == '0') { 1229 f++; 1230 if (f[0] == '+') 1231 f++; 1232 } 1233 if (f[0] == '+') { 1234 f++; 1235 if (f[0] == '0') 1236 f++; 1237 } 1238 if (isdigit(f[0])) { /* there is a wid */ 1239 wid = strtol(f, &f, 10); 1240 } 1241 if (f[0] == '.') { /* there is a .prec */ 1242 prec = strtol(++f, &f, 10); 1243 } 1244 if (prec > u8_strlen(t)) 1245 prec = u8_strlen(t); 1246 pad = wid>prec ? wid - prec : 0; // has to be >= 0 1247 int i, k, n; 1248 1249 if (ljust) { // print prec chars from t, then pad blanks 1250 n = u8_char2byte(t, prec); 1251 for (k = 0; k < n; k++) { 1252 //putchar(t[k]); 1253 *p++ = t[k]; 1254 } 1255 for (i = 0; i < pad; i++) { 1256 //printf(" "); 1257 *p++ = ' '; 1258 } 1259 } else { // print pad blanks, then prec chars from t 1260 for (i = 0; i < pad; i++) { 1261 //printf(" "); 1262 *p++ = ' '; 1263 } 1264 n = u8_char2byte(t, prec); 1265 for (k = 0; k < n; k++) { 1266 //putchar(t[k]); 1267 *p++ = t[k]; 1268 } 1269 } 1270 *p = 0; 1271 break; 1272 } 1273 1274 case 'c': { 1275 /* 1276 * If a numeric value is given, awk should just turn 1277 * it into a character and print it: 1278 * BEGIN { printf("%c\n", 65) } 1279 * prints "A". 1280 * 1281 * But what if the numeric value is > 128 and 1282 * represents a valid Unicode code point?!? We do 1283 * our best to convert it back into UTF-8. If we 1284 * can't, we output the encoding of the Unicode 1285 * "invalid character", 0xFFFD. 1286 */ 1287 if (isnum(x)) { 1288 int charval = (int) getfval(x); 1289 1290 if (charval != 0) { 1291 if (charval < 128 || awk_mb_cur_max == 1) 1292 snprintf(p, BUFSZ(p), fmt, charval); 1293 else { 1294 // possible unicode character 1295 size_t count; 1296 char *bs = wide_char_to_byte_str(charval, &count); 1297 1298 if (bs == NULL) { // invalid character 1299 // use unicode invalid character, 0xFFFD 1300 static char invalid_char[] = "\357\277\275"; 1301 bs = invalid_char; 1302 count = 3; 1303 } 1304 t = bs; 1305 n = count; 1306 goto format_percent_c; 1307 } 1308 } else { 1309 *p++ = '\0'; /* explicit null byte */ 1310 *p = '\0'; /* next output will start here */ 1311 } 1312 break; 1313 } 1314 t = getsval(x); 1315 n = u8_nextlen(t); 1316 format_percent_c: 1317 if (n < 2) { /* not utf8 */ 1318 snprintf(p, BUFSZ(p), fmt, getsval(x)[0]); 1319 break; 1320 } 1321 1322 // utf8 character, almost same song and dance as for %s 1323 int ljust = 0, wid = 0, prec = n, pad = 0; 1324 char *f = fmt+1; 1325 if (f[0] == '-') { 1326 ljust = 1; 1327 f++; 1328 } 1329 // flags '0' and '+' are recognized but skipped 1330 if (f[0] == '0') { 1331 f++; 1332 if (f[0] == '+') 1333 f++; 1334 } 1335 if (f[0] == '+') { 1336 f++; 1337 if (f[0] == '0') 1338 f++; 1339 } 1340 if (isdigit(f[0])) { /* there is a wid */ 1341 wid = strtol(f, &f, 10); 1342 } 1343 if (f[0] == '.') { /* there is a .prec */ 1344 prec = strtol(++f, &f, 10); 1345 } 1346 if (prec > 1) // %c --> only one character 1347 prec = 1; 1348 pad = wid>prec ? wid - prec : 0; // has to be >= 0 1349 int i; 1350 1351 if (ljust) { // print one char from t, then pad blanks 1352 for (i = 0; i < n; i++) 1353 *p++ = t[i]; 1354 for (i = 0; i < pad; i++) { 1355 //printf(" "); 1356 *p++ = ' '; 1357 } 1358 } else { // print pad blanks, then prec chars from t 1359 for (i = 0; i < pad; i++) { 1360 //printf(" "); 1361 *p++ = ' '; 1362 } 1363 for (i = 0; i < n; i++) 1364 *p++ = t[i]; 1365 } 1366 *p = 0; 1367 break; 1368 } 1369 default: 1370 FATAL("can't happen: bad conversion %c in format()", flag); 1371 } 1372 1373 tempfree(x); 1374 p += strlen(p); 1375 s++; 1376 } 1377 *p = '\0'; 1378 free(fmt); 1379 for ( ; a; a = a->nnext) { /* evaluate any remaining args */ 1380 x = execute(a); 1381 tempfree(x); 1382 } 1383 *pbuf = buf; 1384 *pbufsize = bufsize; 1385 return p - buf; 1386 } 1387 1388 Cell *awksprintf(Node **a, int n) /* sprintf(a[0]) */ 1389 { 1390 Cell *x; 1391 Node *y; 1392 char *buf; 1393 int bufsz=3*recsize; 1394 1395 if ((buf = (char *) malloc(bufsz)) == NULL) 1396 FATAL("out of memory in awksprintf"); 1397 y = a[0]->nnext; 1398 x = execute(a[0]); 1399 if (format(&buf, &bufsz, getsval(x), y) == -1) 1400 FATAL("sprintf string %.30s... too long. can't happen.", buf); 1401 tempfree(x); 1402 x = gettemp(); 1403 x->sval = buf; 1404 x->tval = STR; 1405 return(x); 1406 } 1407 1408 Cell *awkprintf(Node **a, int n) /* printf */ 1409 { /* a[0] is list of args, starting with format string */ 1410 /* a[1] is redirection operator, a[2] is redirection file */ 1411 FILE *fp; 1412 Cell *x; 1413 Node *y; 1414 char *buf; 1415 int len; 1416 int bufsz=3*recsize; 1417 1418 if ((buf = (char *) malloc(bufsz)) == NULL) 1419 FATAL("out of memory in awkprintf"); 1420 y = a[0]->nnext; 1421 x = execute(a[0]); 1422 if ((len = format(&buf, &bufsz, getsval(x), y)) == -1) 1423 FATAL("printf string %.30s... too long. can't happen.", buf); 1424 tempfree(x); 1425 if (a[1] == NULL) { 1426 /* fputs(buf, stdout); */ 1427 fwrite(buf, len, 1, stdout); 1428 if (ferror(stdout)) 1429 FATAL("write error on stdout"); 1430 } else { 1431 fp = redirect(ptoi(a[1]), a[2]); 1432 /* fputs(buf, fp); */ 1433 fwrite(buf, len, 1, fp); 1434 fflush(fp); 1435 if (ferror(fp)) 1436 FATAL("write error on %s", filename(fp)); 1437 } 1438 free(buf); 1439 return(True); 1440 } 1441 1442 Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */ 1443 { 1444 Awkfloat i, j = 0; 1445 double v; 1446 Cell *x, *y, *z; 1447 1448 x = execute(a[0]); 1449 i = getfval(x); 1450 tempfree(x); 1451 if (n != UMINUS && n != UPLUS) { 1452 y = execute(a[1]); 1453 j = getfval(y); 1454 tempfree(y); 1455 } 1456 z = gettemp(); 1457 switch (n) { 1458 case ADD: 1459 i += j; 1460 break; 1461 case MINUS: 1462 i -= j; 1463 break; 1464 case MULT: 1465 i *= j; 1466 break; 1467 case DIVIDE: 1468 if (j == 0) 1469 FATAL("division by zero"); 1470 i /= j; 1471 break; 1472 case MOD: 1473 if (j == 0) 1474 FATAL("division by zero in mod"); 1475 modf(i/j, &v); 1476 i = i - j * v; 1477 break; 1478 case UMINUS: 1479 i = -i; 1480 break; 1481 case UPLUS: /* handled by getfval(), above */ 1482 break; 1483 case POWER: 1484 if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */ 1485 i = ipow(i, (int) j); 1486 else { 1487 errno = 0; 1488 i = errcheck(pow(i, j), "pow"); 1489 } 1490 break; 1491 default: /* can't happen */ 1492 FATAL("illegal arithmetic operator %d", n); 1493 } 1494 setfval(z, i); 1495 return(z); 1496 } 1497 1498 double ipow(double x, int n) /* x**n. ought to be done by pow, but isn't always */ 1499 { 1500 double v; 1501 1502 if (n <= 0) 1503 return 1; 1504 v = ipow(x, n/2); 1505 if (n % 2 == 0) 1506 return v * v; 1507 else 1508 return x * v * v; 1509 } 1510 1511 Cell *incrdecr(Node **a, int n) /* a[0]++, etc. */ 1512 { 1513 Cell *x, *z; 1514 int k; 1515 Awkfloat xf; 1516 1517 x = execute(a[0]); 1518 xf = getfval(x); 1519 k = (n == PREINCR || n == POSTINCR) ? 1 : -1; 1520 if (n == PREINCR || n == PREDECR) { 1521 setfval(x, xf + k); 1522 return(x); 1523 } 1524 z = gettemp(); 1525 setfval(z, xf); 1526 setfval(x, xf + k); 1527 tempfree(x); 1528 return(z); 1529 } 1530 1531 Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */ 1532 { /* this is subtle; don't muck with it. */ 1533 Cell *x, *y; 1534 Awkfloat xf, yf; 1535 double v; 1536 1537 y = execute(a[1]); 1538 x = execute(a[0]); 1539 if (n == ASSIGN) { /* ordinary assignment */ 1540 if (x == y && !(x->tval & (FLD|REC)) && x != nfloc) 1541 ; /* self-assignment: leave alone unless it's a field or NF */ 1542 else if ((y->tval & (STR|NUM)) == (STR|NUM)) { 1543 yf = getfval(y); 1544 setsval(x, getsval(y)); 1545 x->fval = yf; 1546 x->tval |= NUM; 1547 } 1548 else if (isstr(y)) 1549 setsval(x, getsval(y)); 1550 else if (isnum(y)) 1551 setfval(x, getfval(y)); 1552 else 1553 funnyvar(y, "read value of"); 1554 tempfree(y); 1555 return(x); 1556 } 1557 xf = getfval(x); 1558 yf = getfval(y); 1559 switch (n) { 1560 case ADDEQ: 1561 xf += yf; 1562 break; 1563 case SUBEQ: 1564 xf -= yf; 1565 break; 1566 case MULTEQ: 1567 xf *= yf; 1568 break; 1569 case DIVEQ: 1570 if (yf == 0) 1571 FATAL("division by zero in /="); 1572 xf /= yf; 1573 break; 1574 case MODEQ: 1575 if (yf == 0) 1576 FATAL("division by zero in %%="); 1577 modf(xf/yf, &v); 1578 xf = xf - yf * v; 1579 break; 1580 case POWEQ: 1581 if (yf >= 0 && modf(yf, &v) == 0.0) /* pos integer exponent */ 1582 xf = ipow(xf, (int) yf); 1583 else { 1584 errno = 0; 1585 xf = errcheck(pow(xf, yf), "pow"); 1586 } 1587 break; 1588 default: 1589 FATAL("illegal assignment operator %d", n); 1590 break; 1591 } 1592 tempfree(y); 1593 setfval(x, xf); 1594 return(x); 1595 } 1596 1597 Cell *cat(Node **a, int q) /* a[0] cat a[1] */ 1598 { 1599 Cell *x, *y, *z; 1600 int n1, n2; 1601 char *s = NULL; 1602 int ssz = 0; 1603 1604 x = execute(a[0]); 1605 n1 = strlen(getsval(x)); 1606 adjbuf(&s, &ssz, n1 + 1, recsize, 0, "cat1"); 1607 memcpy(s, x->sval, n1); 1608 1609 tempfree(x); 1610 1611 y = execute(a[1]); 1612 n2 = strlen(getsval(y)); 1613 adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2"); 1614 memcpy(s + n1, y->sval, n2); 1615 s[n1 + n2] = '\0'; 1616 1617 tempfree(y); 1618 1619 z = gettemp(); 1620 z->sval = s; 1621 z->tval = STR; 1622 1623 return(z); 1624 } 1625 1626 Cell *pastat(Node **a, int n) /* a[0] { a[1] } */ 1627 { 1628 Cell *x; 1629 1630 if (a[0] == NULL) 1631 x = execute(a[1]); 1632 else { 1633 x = execute(a[0]); 1634 if (istrue(x)) { 1635 tempfree(x); 1636 x = execute(a[1]); 1637 } 1638 } 1639 return x; 1640 } 1641 1642 Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */ 1643 { 1644 Cell *x; 1645 int pair; 1646 1647 pair = ptoi(a[3]); 1648 if (pairstack[pair] == 0) { 1649 x = execute(a[0]); 1650 if (istrue(x)) 1651 pairstack[pair] = 1; 1652 tempfree(x); 1653 } 1654 if (pairstack[pair] == 1) { 1655 x = execute(a[1]); 1656 if (istrue(x)) 1657 pairstack[pair] = 0; 1658 tempfree(x); 1659 x = execute(a[2]); 1660 return(x); 1661 } 1662 return(False); 1663 } 1664 1665 Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ 1666 { 1667 Cell *x = NULL, *y, *ap; 1668 const char *s, *origs, *t; 1669 const char *fs = NULL; 1670 char *origfs = NULL; 1671 int sep; 1672 char temp, num[50]; 1673 int n, tempstat, arg3type; 1674 int j; 1675 double result; 1676 1677 y = execute(a[0]); /* source string */ 1678 origs = s = strdup(getsval(y)); 1679 tempfree(y); 1680 arg3type = ptoi(a[3]); 1681 if (a[2] == NULL) { /* BUG: CSV should override implicit fs but not explicit */ 1682 fs = getsval(fsloc); 1683 } else if (arg3type == STRING) { /* split(str,arr,"string") */ 1684 x = execute(a[2]); 1685 fs = origfs = strdup(getsval(x)); 1686 tempfree(x); 1687 } else if (arg3type == REGEXPR) { 1688 fs = "(regexpr)"; /* split(str,arr,/regexpr/) */ 1689 } else { 1690 FATAL("illegal type of split"); 1691 } 1692 sep = *fs; 1693 ap = execute(a[1]); /* array name */ 1694 /* BUG 7/26/22: this appears not to reset array: see C1/asplit */ 1695 freesymtab(ap); 1696 DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs); 1697 ap->tval &= ~STR; 1698 ap->tval |= ARR; 1699 ap->sval = (char *) makesymtab(NSYMTAB); 1700 1701 n = 0; 1702 if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) { 1703 /* split(s, a, //); have to arrange that it looks like empty sep */ 1704 arg3type = 0; 1705 fs = ""; 1706 sep = 0; 1707 } 1708 if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) { /* reg expr */ 1709 fa *pfa; 1710 if (arg3type == REGEXPR) { /* it's ready already */ 1711 pfa = (fa *) a[2]; 1712 } else { 1713 pfa = makedfa(fs, 1); 1714 } 1715 if (nematch(pfa,s)) { 1716 tempstat = pfa->initstat; 1717 pfa->initstat = 2; 1718 do { 1719 n++; 1720 snprintf(num, sizeof(num), "%d", n); 1721 temp = *patbeg; 1722 setptr(patbeg, '\0'); 1723 if (is_number(s, & result)) 1724 setsymtab(num, s, result, STR|NUM, (Array *) ap->sval); 1725 else 1726 setsymtab(num, s, 0.0, STR, (Array *) ap->sval); 1727 setptr(patbeg, temp); 1728 s = patbeg + patlen; 1729 if (*(patbeg+patlen-1) == '\0' || *s == '\0') { 1730 n++; 1731 snprintf(num, sizeof(num), "%d", n); 1732 setsymtab(num, "", 0.0, STR, (Array *) ap->sval); 1733 pfa->initstat = tempstat; 1734 goto spdone; 1735 } 1736 } while (nematch(pfa,s)); 1737 pfa->initstat = tempstat; /* bwk: has to be here to reset */ 1738 /* cf gsub and refldbld */ 1739 } 1740 n++; 1741 snprintf(num, sizeof(num), "%d", n); 1742 if (is_number(s, & result)) 1743 setsymtab(num, s, result, STR|NUM, (Array *) ap->sval); 1744 else 1745 setsymtab(num, s, 0.0, STR, (Array *) ap->sval); 1746 spdone: 1747 pfa = NULL; 1748 1749 } else if (a[2] == NULL && CSV) { /* CSV only if no explicit separator */ 1750 char *newt = (char *) malloc(strlen(s)); /* for building new string; reuse for each field */ 1751 for (;;) { 1752 char *fr = newt; 1753 n++; 1754 if (*s == '"' ) { /* start of "..." */ 1755 for (s++ ; *s != '\0'; ) { 1756 if (*s == '"' && s[1] != '\0' && s[1] == '"') { 1757 s += 2; /* doubled quote */ 1758 *fr++ = '"'; 1759 } else if (*s == '"' && (s[1] == '\0' || s[1] == ',')) { 1760 s++; /* skip over closing quote */ 1761 break; 1762 } else { 1763 *fr++ = *s++; 1764 } 1765 } 1766 *fr++ = 0; 1767 } else { /* unquoted field */ 1768 while (*s != ',' && *s != '\0') 1769 *fr++ = *s++; 1770 *fr++ = 0; 1771 } 1772 snprintf(num, sizeof(num), "%d", n); 1773 if (is_number(newt, &result)) 1774 setsymtab(num, newt, result, STR|NUM, (Array *) ap->sval); 1775 else 1776 setsymtab(num, newt, 0.0, STR, (Array *) ap->sval); 1777 if (*s++ == '\0') 1778 break; 1779 } 1780 free(newt); 1781 1782 } else if (!CSV && sep == ' ') { /* usual case: split on white space */ 1783 for (n = 0; ; ) { 1784 #define ISWS(c) ((c) == ' ' || (c) == '\t' || (c) == '\n') 1785 while (ISWS(*s)) 1786 s++; 1787 if (*s == '\0') 1788 break; 1789 n++; 1790 t = s; 1791 do 1792 s++; 1793 while (*s != '\0' && !ISWS(*s)); 1794 temp = *s; 1795 setptr(s, '\0'); 1796 snprintf(num, sizeof(num), "%d", n); 1797 if (is_number(t, & result)) 1798 setsymtab(num, t, result, STR|NUM, (Array *) ap->sval); 1799 else 1800 setsymtab(num, t, 0.0, STR, (Array *) ap->sval); 1801 setptr(s, temp); 1802 if (*s != '\0') 1803 s++; 1804 } 1805 1806 } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */ 1807 for (n = 0; *s != '\0'; s += u8_nextlen(s)) { 1808 char buf[10]; 1809 n++; 1810 snprintf(num, sizeof(num), "%d", n); 1811 1812 for (j = 0; j < u8_nextlen(s); j++) { 1813 buf[j] = s[j]; 1814 } 1815 buf[j] = '\0'; 1816 1817 if (isdigit((uschar)buf[0])) 1818 setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval); 1819 else 1820 setsymtab(num, buf, 0.0, STR, (Array *) ap->sval); 1821 } 1822 1823 } else if (*s != '\0') { /* some random single character */ 1824 for (;;) { 1825 n++; 1826 t = s; 1827 while (*s != sep && *s != '\0') 1828 s++; 1829 temp = *s; 1830 setptr(s, '\0'); 1831 snprintf(num, sizeof(num), "%d", n); 1832 if (is_number(t, & result)) 1833 setsymtab(num, t, result, STR|NUM, (Array *) ap->sval); 1834 else 1835 setsymtab(num, t, 0.0, STR, (Array *) ap->sval); 1836 setptr(s, temp); 1837 if (*s++ == '\0') 1838 break; 1839 } 1840 } 1841 tempfree(ap); 1842 xfree(origs); 1843 xfree(origfs); 1844 x = gettemp(); 1845 x->tval = NUM; 1846 x->fval = n; 1847 return(x); 1848 } 1849 1850 Cell *condexpr(Node **a, int n) /* a[0] ? a[1] : a[2] */ 1851 { 1852 Cell *x; 1853 1854 x = execute(a[0]); 1855 if (istrue(x)) { 1856 tempfree(x); 1857 x = execute(a[1]); 1858 } else { 1859 tempfree(x); 1860 x = execute(a[2]); 1861 } 1862 return(x); 1863 } 1864 1865 Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */ 1866 { 1867 Cell *x; 1868 1869 x = execute(a[0]); 1870 if (istrue(x)) { 1871 tempfree(x); 1872 x = execute(a[1]); 1873 } else if (a[2] != NULL) { 1874 tempfree(x); 1875 x = execute(a[2]); 1876 } 1877 return(x); 1878 } 1879 1880 Cell *whilestat(Node **a, int n) /* while (a[0]) a[1] */ 1881 { 1882 Cell *x; 1883 1884 for (;;) { 1885 x = execute(a[0]); 1886 if (!istrue(x)) 1887 return(x); 1888 tempfree(x); 1889 x = execute(a[1]); 1890 if (isbreak(x)) { 1891 x = True; 1892 return(x); 1893 } 1894 if (isnext(x) || isexit(x) || isret(x)) 1895 return(x); 1896 tempfree(x); 1897 } 1898 } 1899 1900 Cell *dostat(Node **a, int n) /* do a[0]; while(a[1]) */ 1901 { 1902 Cell *x; 1903 1904 for (;;) { 1905 x = execute(a[0]); 1906 if (isbreak(x)) 1907 return True; 1908 if (isnext(x) || isexit(x) || isret(x)) 1909 return(x); 1910 tempfree(x); 1911 x = execute(a[1]); 1912 if (!istrue(x)) 1913 return(x); 1914 tempfree(x); 1915 } 1916 } 1917 1918 Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */ 1919 { 1920 Cell *x; 1921 1922 x = execute(a[0]); 1923 tempfree(x); 1924 for (;;) { 1925 if (a[1]!=NULL) { 1926 x = execute(a[1]); 1927 if (!istrue(x)) return(x); 1928 else tempfree(x); 1929 } 1930 x = execute(a[3]); 1931 if (isbreak(x)) /* turn off break */ 1932 return True; 1933 if (isnext(x) || isexit(x) || isret(x)) 1934 return(x); 1935 tempfree(x); 1936 x = execute(a[2]); 1937 tempfree(x); 1938 } 1939 } 1940 1941 Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */ 1942 { 1943 Cell *x, *vp, *arrayp, *cp, *ncp; 1944 Array *tp; 1945 int i; 1946 1947 vp = execute(a[0]); 1948 arrayp = execute(a[1]); 1949 if (!isarr(arrayp)) { 1950 return True; 1951 } 1952 tp = (Array *) arrayp->sval; 1953 tempfree(arrayp); 1954 for (i = 0; i < tp->size; i++) { /* this routine knows too much */ 1955 for (cp = tp->tab[i]; cp != NULL; cp = ncp) { 1956 setsval(vp, cp->nval); 1957 ncp = cp->cnext; 1958 x = execute(a[2]); 1959 if (isbreak(x)) { 1960 tempfree(vp); 1961 return True; 1962 } 1963 if (isnext(x) || isexit(x) || isret(x)) { 1964 tempfree(vp); 1965 return(x); 1966 } 1967 tempfree(x); 1968 } 1969 } 1970 return True; 1971 } 1972 1973 static char *nawk_convert(const char *s, int (*fun_c)(int), 1974 wint_t (*fun_wc)(wint_t)) 1975 { 1976 char *buf = NULL; 1977 char *pbuf = NULL; 1978 const char *ps = NULL; 1979 size_t n = 0; 1980 wchar_t wc; 1981 const size_t sz = awk_mb_cur_max; 1982 int unused; 1983 1984 if (sz == 1) { 1985 buf = tostring(s); 1986 1987 for (pbuf = buf; *pbuf; pbuf++) 1988 *pbuf = fun_c((uschar)*pbuf); 1989 1990 return buf; 1991 } else { 1992 /* upper/lower character may be shorter/longer */ 1993 buf = tostringN(s, strlen(s) * sz + 1); 1994 1995 (void) mbtowc(NULL, NULL, 0); /* reset internal state */ 1996 /* 1997 * Reset internal state here too. 1998 * Assign result to avoid a compiler warning. (Casting to void 1999 * doesn't work.) 2000 * Increment said variable to avoid a different warning. 2001 */ 2002 unused = wctomb(NULL, L'\0'); 2003 unused++; 2004 2005 ps = s; 2006 pbuf = buf; 2007 while (n = mbtowc(&wc, ps, sz), 2008 n > 0 && n != (size_t)-1 && n != (size_t)-2) 2009 { 2010 ps += n; 2011 2012 n = wctomb(pbuf, fun_wc(wc)); 2013 if (n == (size_t)-1) 2014 FATAL("illegal wide character %s", s); 2015 2016 pbuf += n; 2017 } 2018 2019 *pbuf = '\0'; 2020 2021 if (n) 2022 FATAL("illegal byte sequence %s", s); 2023 2024 return buf; 2025 } 2026 } 2027 2028 #ifdef __DJGPP__ 2029 static wint_t towupper(wint_t wc) 2030 { 2031 if (wc >= 0 && wc < 256) 2032 return toupper(wc & 0xFF); 2033 2034 return wc; 2035 } 2036 2037 static wint_t towlower(wint_t wc) 2038 { 2039 if (wc >= 0 && wc < 256) 2040 return tolower(wc & 0xFF); 2041 2042 return wc; 2043 } 2044 #endif 2045 2046 static char *nawk_toupper(const char *s) 2047 { 2048 return nawk_convert(s, toupper, towupper); 2049 } 2050 2051 static char *nawk_tolower(const char *s) 2052 { 2053 return nawk_convert(s, tolower, towlower); 2054 } 2055 2056 2057 2058 Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */ 2059 { 2060 Cell *x, *y; 2061 Awkfloat u = 0; 2062 int t, sz; 2063 Awkfloat tmp; 2064 char *buf, *fmt; 2065 Node *nextarg; 2066 FILE *fp; 2067 int status = 0; 2068 time_t tv; 2069 struct tm *tm, tmbuf; 2070 int estatus = 0; 2071 2072 t = ptoi(a[0]); 2073 x = execute(a[1]); 2074 nextarg = a[1]->nnext; 2075 switch (t) { 2076 case FLENGTH: 2077 if (isarr(x)) 2078 u = ((Array *) x->sval)->nelem; /* GROT. should be function*/ 2079 else 2080 u = u8_strlen(getsval(x)); 2081 break; 2082 case FLOG: 2083 errno = 0; 2084 u = errcheck(log(getfval(x)), "log"); 2085 break; 2086 case FINT: 2087 modf(getfval(x), &u); break; 2088 case FEXP: 2089 errno = 0; 2090 u = errcheck(exp(getfval(x)), "exp"); 2091 break; 2092 case FSQRT: 2093 errno = 0; 2094 u = errcheck(sqrt(getfval(x)), "sqrt"); 2095 break; 2096 case FSIN: 2097 u = sin(getfval(x)); break; 2098 case FCOS: 2099 u = cos(getfval(x)); break; 2100 case FATAN: 2101 if (nextarg == NULL) { 2102 WARNING("atan2 requires two arguments; returning 1.0"); 2103 u = 1.0; 2104 } else { 2105 y = execute(a[1]->nnext); 2106 u = atan2(getfval(x), getfval(y)); 2107 tempfree(y); 2108 nextarg = nextarg->nnext; 2109 } 2110 break; 2111 case FCOMPL: 2112 u = ~((int)getfval(x)); 2113 break; 2114 case FAND: 2115 if (nextarg == 0) { 2116 WARNING("and requires two arguments; returning 0"); 2117 u = 0; 2118 break; 2119 } 2120 y = execute(a[1]->nnext); 2121 u = ((int)getfval(x)) & ((int)getfval(y)); 2122 tempfree(y); 2123 nextarg = nextarg->nnext; 2124 break; 2125 case FFOR: 2126 if (nextarg == 0) { 2127 WARNING("or requires two arguments; returning 0"); 2128 u = 0; 2129 break; 2130 } 2131 y = execute(a[1]->nnext); 2132 u = ((int)getfval(x)) | ((int)getfval(y)); 2133 tempfree(y); 2134 nextarg = nextarg->nnext; 2135 break; 2136 case FXOR: 2137 if (nextarg == 0) { 2138 WARNING("xor requires two arguments; returning 0"); 2139 u = 0; 2140 break; 2141 } 2142 y = execute(a[1]->nnext); 2143 u = ((int)getfval(x)) ^ ((int)getfval(y)); 2144 tempfree(y); 2145 nextarg = nextarg->nnext; 2146 break; 2147 case FLSHIFT: 2148 if (nextarg == 0) { 2149 WARNING("lshift requires two arguments; returning 0"); 2150 u = 0; 2151 break; 2152 } 2153 y = execute(a[1]->nnext); 2154 u = ((int)getfval(x)) << ((int)getfval(y)); 2155 tempfree(y); 2156 nextarg = nextarg->nnext; 2157 break; 2158 case FRSHIFT: 2159 if (nextarg == 0) { 2160 WARNING("rshift requires two arguments; returning 0"); 2161 u = 0; 2162 break; 2163 } 2164 y = execute(a[1]->nnext); 2165 u = ((int)getfval(x)) >> ((int)getfval(y)); 2166 tempfree(y); 2167 nextarg = nextarg->nnext; 2168 break; 2169 case FSYSTEM: 2170 fflush(stdout); /* in case something is buffered already */ 2171 estatus = status = system(getsval(x)); 2172 if (status != -1) { 2173 if (WIFEXITED(status)) { 2174 estatus = WEXITSTATUS(status); 2175 } else if (WIFSIGNALED(status)) { 2176 estatus = WTERMSIG(status) + 256; 2177 #ifdef WCOREDUMP 2178 if (WCOREDUMP(status)) 2179 estatus += 256; 2180 #endif 2181 } else /* something else?!? */ 2182 estatus = 0; 2183 } 2184 /* else estatus was set to -1 */ 2185 u = estatus; 2186 break; 2187 case FRAND: 2188 /* random() returns numbers in [0..2^31-1] 2189 * in order to get a number in [0, 1), divide it by 2^31 2190 */ 2191 u = (Awkfloat) random() / (0x7fffffffL + 0x1UL); 2192 break; 2193 case FSRAND: 2194 if (isrec(x)) /* no argument provided */ 2195 u = time((time_t *)0); 2196 else 2197 u = getfval(x); 2198 tmp = u; 2199 srandom((unsigned long) u); 2200 u = srand_seed; 2201 srand_seed = tmp; 2202 break; 2203 case FTOUPPER: 2204 case FTOLOWER: 2205 if (t == FTOUPPER) 2206 buf = nawk_toupper(getsval(x)); 2207 else 2208 buf = nawk_tolower(getsval(x)); 2209 tempfree(x); 2210 x = gettemp(); 2211 setsval(x, buf); 2212 free(buf); 2213 return x; 2214 case FFLUSH: 2215 if (isrec(x) || strlen(getsval(x)) == 0) { 2216 flush_all(); /* fflush() or fflush("") -> all */ 2217 u = 0; 2218 } else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL) 2219 u = EOF; 2220 else 2221 u = fflush(fp); 2222 break; 2223 case FMKTIME: 2224 memset(&tmbuf, 0, sizeof(tmbuf)); 2225 tm = &tmbuf; 2226 t = sscanf(getsval(x), "%d %d %d %d %d %d %d", 2227 &tm->tm_year, &tm->tm_mon, &tm->tm_mday, &tm->tm_hour, 2228 &tm->tm_min, &tm->tm_sec, &tm->tm_isdst); 2229 switch (t) { 2230 case 6: 2231 tm->tm_isdst = -1; /* let mktime figure it out */ 2232 /* FALLTHROUGH */ 2233 case 7: 2234 tm->tm_year -= 1900; 2235 tm->tm_mon--; 2236 u = mktime(tm); 2237 break; 2238 default: 2239 u = -1; 2240 break; 2241 } 2242 break; 2243 case FSYSTIME: 2244 u = time((time_t *) 0); 2245 break; 2246 case FSTRFTIME: 2247 /* strftime([format [,timestamp]]) */ 2248 if (nextarg) { 2249 y = execute(nextarg); 2250 nextarg = nextarg->nnext; 2251 tv = (time_t) getfval(y); 2252 tempfree(y); 2253 } else 2254 tv = time((time_t *) 0); 2255 tm = localtime(&tv); 2256 if (tm == NULL) 2257 FATAL("bad time %ld", (long)tv); 2258 2259 if (isrec(x)) { 2260 /* format argument not provided, use default */ 2261 fmt = tostring("%a %b %d %H:%M:%S %Z %Y"); 2262 } else 2263 fmt = tostring(getsval(x)); 2264 2265 sz = 32; 2266 buf = NULL; 2267 do { 2268 if ((buf = realloc(buf, (sz *= 2))) == NULL) 2269 FATAL("out of memory in strftime"); 2270 } while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0'); 2271 2272 y = gettemp(); 2273 setsval(y, buf); 2274 free(fmt); 2275 free(buf); 2276 2277 return y; 2278 default: /* can't happen */ 2279 FATAL("illegal function type %d", t); 2280 break; 2281 } 2282 tempfree(x); 2283 x = gettemp(); 2284 setfval(x, u); 2285 if (nextarg != NULL) { 2286 WARNING("warning: function has too many arguments"); 2287 for ( ; nextarg; nextarg = nextarg->nnext) { 2288 y = execute(nextarg); 2289 tempfree(y); 2290 } 2291 } 2292 return(x); 2293 } 2294 2295 Cell *printstat(Node **a, int n) /* print a[0] */ 2296 { 2297 Node *x; 2298 Cell *y; 2299 FILE *fp; 2300 2301 if (a[1] == NULL) /* a[1] is redirection operator, a[2] is file */ 2302 fp = stdout; 2303 else 2304 fp = redirect(ptoi(a[1]), a[2]); 2305 for (x = a[0]; x != NULL; x = x->nnext) { 2306 y = execute(x); 2307 fputs(getpssval(y), fp); 2308 tempfree(y); 2309 if (x->nnext == NULL) 2310 fputs(getsval(orsloc), fp); 2311 else 2312 fputs(getsval(ofsloc), fp); 2313 } 2314 if (a[1] != NULL) 2315 fflush(fp); 2316 if (ferror(fp)) 2317 FATAL("write error on %s", filename(fp)); 2318 return(True); 2319 } 2320 2321 Cell *nullproc(Node **a, int n) 2322 { 2323 return 0; 2324 } 2325 2326 2327 FILE *redirect(int a, Node *b) /* set up all i/o redirections */ 2328 { 2329 FILE *fp; 2330 Cell *x; 2331 char *fname; 2332 2333 x = execute(b); 2334 fname = getsval(x); 2335 fp = openfile(a, fname, NULL); 2336 if (fp == NULL) 2337 FATAL("can't open file %s", fname); 2338 tempfree(x); 2339 return fp; 2340 } 2341 2342 struct files { 2343 FILE *fp; 2344 const char *fname; 2345 int mode; /* '|', 'a', 'w' => LE/LT, GT */ 2346 } *files; 2347 2348 size_t nfiles; 2349 2350 static void stdinit(void) /* in case stdin, etc., are not constants */ 2351 { 2352 nfiles = FOPEN_MAX; 2353 files = (struct files *) calloc(nfiles, sizeof(*files)); 2354 if (files == NULL) 2355 FATAL("can't allocate file memory for %zu files", nfiles); 2356 files[0].fp = stdin; 2357 files[0].fname = tostring("/dev/stdin"); 2358 files[0].mode = LT; 2359 files[1].fp = stdout; 2360 files[1].fname = tostring("/dev/stdout"); 2361 files[1].mode = GT; 2362 files[2].fp = stderr; 2363 files[2].fname = tostring("/dev/stderr"); 2364 files[2].mode = GT; 2365 } 2366 2367 FILE *openfile(int a, const char *us, bool *pnewflag) 2368 { 2369 const char *s = us; 2370 size_t i; 2371 int m; 2372 FILE *fp = NULL; 2373 struct stat sbuf; 2374 2375 if (*s == '\0') 2376 FATAL("null file name in print or getline"); 2377 2378 for (i = 0; i < nfiles; i++) 2379 if (files[i].fname && strcmp(s, files[i].fname) == 0 && 2380 (a == files[i].mode || (a==APPEND && files[i].mode==GT) || 2381 a == FFLUSH)) { 2382 if (pnewflag) 2383 *pnewflag = false; 2384 return files[i].fp; 2385 } 2386 if (a == FFLUSH) /* didn't find it, so don't create it! */ 2387 return NULL; 2388 for (i = 0; i < nfiles; i++) 2389 if (files[i].fp == NULL) 2390 break; 2391 if (i >= nfiles) { 2392 struct files *nf; 2393 size_t nnf = nfiles + FOPEN_MAX; 2394 nf = (struct files *) realloc(files, nnf * sizeof(*nf)); 2395 if (nf == NULL) 2396 FATAL("cannot grow files for %s and %zu files", s, nnf); 2397 memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf)); 2398 nfiles = nnf; 2399 files = nf; 2400 } 2401 2402 fflush(stdout); /* force a semblance of order */ 2403 2404 /* don't try to read or write a directory */ 2405 if (a == LT || a == GT || a == APPEND) 2406 if (stat(s, &sbuf) == 0 && S_ISDIR(sbuf.st_mode)) 2407 return NULL; 2408 2409 m = a; 2410 if (a == GT) { 2411 fp = fopen(s, "w"); 2412 } else if (a == APPEND) { 2413 fp = fopen(s, "a"); 2414 m = GT; /* so can mix > and >> */ 2415 } else if (a == '|') { /* output pipe */ 2416 fp = popen(s, "w"); 2417 } else if (a == LE) { /* input pipe */ 2418 fp = popen(s, "r"); 2419 } else if (a == LT) { /* getline <file */ 2420 fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r"); /* "-" is stdin */ 2421 } else /* can't happen */ 2422 FATAL("illegal redirection %d", a); 2423 if (fp != NULL) { 2424 files[i].fname = tostring(s); 2425 files[i].fp = fp; 2426 files[i].mode = m; 2427 if (pnewflag) 2428 *pnewflag = true; 2429 if (fp != stdin && fp != stdout && fp != stderr) 2430 (void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC); 2431 } 2432 return fp; 2433 } 2434 2435 const char *filename(FILE *fp) 2436 { 2437 size_t i; 2438 2439 for (i = 0; i < nfiles; i++) 2440 if (fp == files[i].fp) 2441 return files[i].fname; 2442 return "???"; 2443 } 2444 2445 Cell *closefile(Node **a, int n) 2446 { 2447 Cell *x; 2448 size_t i; 2449 bool stat; 2450 2451 x = execute(a[0]); 2452 getsval(x); 2453 stat = true; 2454 for (i = 0; i < nfiles; i++) { 2455 if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0) 2456 continue; 2457 if (files[i].mode == GT || files[i].mode == '|') 2458 fflush(files[i].fp); 2459 if (ferror(files[i].fp)) { 2460 if ((files[i].mode == GT && files[i].fp != stderr) 2461 || files[i].mode == '|') 2462 FATAL("write error on %s", files[i].fname); 2463 else 2464 WARNING("i/o error occurred on %s", files[i].fname); 2465 } 2466 if (files[i].fp == stdin || files[i].fp == stdout || 2467 files[i].fp == stderr) 2468 stat = freopen("/dev/null", "r+", files[i].fp) == NULL; 2469 else if (files[i].mode == '|' || files[i].mode == LE) 2470 stat = pclose(files[i].fp) == -1; 2471 else 2472 stat = fclose(files[i].fp) == EOF; 2473 if (stat) 2474 WARNING("i/o error occurred closing %s", files[i].fname); 2475 xfree(files[i].fname); 2476 files[i].fname = NULL; /* watch out for ref thru this */ 2477 files[i].fp = NULL; 2478 break; 2479 } 2480 tempfree(x); 2481 x = gettemp(); 2482 setfval(x, (Awkfloat) (stat ? -1 : 0)); 2483 return(x); 2484 } 2485 2486 void closeall(void) 2487 { 2488 size_t i; 2489 bool stat = false; 2490 2491 for (i = 0; i < nfiles; i++) { 2492 if (! files[i].fp) 2493 continue; 2494 if (files[i].mode == GT || files[i].mode == '|') 2495 fflush(files[i].fp); 2496 if (ferror(files[i].fp)) { 2497 if ((files[i].mode == GT && files[i].fp != stderr) 2498 || files[i].mode == '|') 2499 FATAL("write error on %s", files[i].fname); 2500 else 2501 WARNING("i/o error occurred on %s", files[i].fname); 2502 } 2503 if (files[i].fp == stdin || files[i].fp == stdout || 2504 files[i].fp == stderr) 2505 continue; 2506 if (files[i].mode == '|' || files[i].mode == LE) 2507 stat = pclose(files[i].fp) == -1; 2508 else 2509 stat = fclose(files[i].fp) == EOF; 2510 if (stat) 2511 WARNING("i/o error occurred while closing %s", files[i].fname); 2512 } 2513 } 2514 2515 static void flush_all(void) 2516 { 2517 size_t i; 2518 2519 for (i = 0; i < nfiles; i++) 2520 if (files[i].fp) 2521 fflush(files[i].fp); 2522 } 2523 2524 void backsub(char **pb_ptr, const char **sptr_ptr); 2525 2526 Cell *dosub(Node **a, int subop) /* sub and gsub */ 2527 { 2528 fa *pfa; 2529 int tempstat = 0; 2530 char *repl; 2531 Cell *x; 2532 2533 char *buf = NULL; 2534 char *pb = NULL; 2535 int bufsz = recsize; 2536 2537 const char *r, *s; 2538 const char *start; 2539 const char *noempty = NULL; /* empty match disallowed here */ 2540 size_t m = 0; /* match count */ 2541 size_t whichm = 0; /* which match to select, 0 = global */ 2542 int mtype; /* match type */ 2543 2544 if (a[0] == NULL) { /* 0 => a[1] is already-compiled regexpr */ 2545 pfa = (fa *) a[1]; 2546 } else { 2547 x = execute(a[1]); 2548 pfa = makedfa(getsval(x), 1); 2549 tempfree(x); 2550 } 2551 2552 x = execute(a[2]); /* replacement string */ 2553 repl = tostring(getsval(x)); 2554 tempfree(x); 2555 2556 switch (subop) { 2557 case SUB: 2558 whichm = 1; 2559 x = execute(a[3]); /* source string */ 2560 break; 2561 case GSUB: 2562 whichm = 0; 2563 x = execute(a[3]); /* source string */ 2564 break; 2565 default: 2566 FATAL("dosub: unrecognized subop: %d", subop); 2567 } 2568 2569 start = getsval(x); 2570 while (pmatch(pfa, start)) { 2571 if (buf == NULL) { 2572 if ((pb = buf = (char *) malloc(bufsz)) == NULL) 2573 FATAL("out of memory in dosub"); 2574 tempstat = pfa->initstat; 2575 pfa->initstat = 2; 2576 } 2577 2578 /* match types */ 2579 #define MT_IGNORE 0 /* unselected or invalid */ 2580 #define MT_INSERT 1 /* selected, empty */ 2581 #define MT_REPLACE 2 /* selected, not empty */ 2582 2583 /* an empty match just after replacement is invalid */ 2584 2585 if (patbeg == noempty && patlen == 0) { 2586 mtype = MT_IGNORE; /* invalid, not counted */ 2587 } else if (whichm == ++m || whichm == 0) { 2588 mtype = patlen ? MT_REPLACE : MT_INSERT; 2589 } else { 2590 mtype = MT_IGNORE; /* unselected, but counted */ 2591 } 2592 2593 /* leading text: */ 2594 if (patbeg > start) { 2595 adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - start), 2596 recsize, &pb, "dosub"); 2597 s = start; 2598 while (s < patbeg) 2599 *pb++ = *s++; 2600 } 2601 2602 if (mtype == MT_IGNORE) 2603 goto matching_text; /* skip replacement text */ 2604 2605 r = repl; 2606 while (*r != 0) { 2607 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "dosub"); 2608 if (*r == '\\') { 2609 backsub(&pb, &r); 2610 } else if (*r == '&') { 2611 r++; 2612 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, 2613 &pb, "dosub"); 2614 for (s = patbeg; s < patbeg+patlen; ) 2615 *pb++ = *s++; 2616 } else { 2617 *pb++ = *r++; 2618 } 2619 } 2620 2621 matching_text: 2622 if (mtype == MT_REPLACE || *patbeg == '\0') 2623 goto next_search; /* skip matching text */ 2624 2625 if (patlen == 0) 2626 patlen = u8_nextlen(patbeg); 2627 adjbuf(&buf, &bufsz, (pb-buf) + patlen, recsize, &pb, "dosub"); 2628 s = patbeg; 2629 while (s < patbeg + patlen) 2630 *pb++ = *s++; 2631 2632 next_search: 2633 start = patbeg + patlen; 2634 if (m == whichm || *patbeg == '\0') 2635 break; 2636 if (mtype == MT_REPLACE) 2637 noempty = start; 2638 2639 #undef MT_IGNORE 2640 #undef MT_INSERT 2641 #undef MT_REPLACE 2642 } 2643 2644 xfree(repl); 2645 2646 if (buf != NULL) { 2647 pfa->initstat = tempstat; 2648 2649 /* trailing text */ 2650 adjbuf(&buf, &bufsz, 1+strlen(start)+pb-buf, 0, &pb, "dosub"); 2651 while ((*pb++ = *start++) != '\0') 2652 ; 2653 2654 setsval(x, buf); 2655 free(buf); 2656 } 2657 2658 tempfree(x); 2659 x = gettemp(); 2660 x->tval = NUM; 2661 x->fval = m; 2662 return x; 2663 } 2664 2665 Cell *gensub(Node **a, int nnn) /* global selective substitute */ 2666 /* XXX incomplete - doesn't support backreferences \0 ... \9 */ 2667 { 2668 Cell *x, *y, *res, *h; 2669 char *rptr; 2670 const char *sptr; 2671 char *buf, *pb; 2672 const char *t, *q; 2673 fa *pfa; 2674 int mflag, tempstat, num, whichm; 2675 int bufsz = recsize; 2676 2677 if ((buf = malloc(bufsz)) == NULL) 2678 FATAL("out of memory in gensub"); 2679 mflag = 0; /* if mflag == 0, can replace empty string */ 2680 num = 0; 2681 x = execute(a[4]); /* source string */ 2682 t = getsval(x); 2683 res = copycell(x); /* target string - initially copy of source */ 2684 res->csub = CTEMP; /* result values are temporary */ 2685 if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */ 2686 pfa = (fa *) a[1]; /* regular expression */ 2687 else { 2688 y = execute(a[1]); 2689 pfa = makedfa(getsval(y), 1); 2690 tempfree(y); 2691 } 2692 y = execute(a[2]); /* replacement string */ 2693 h = execute(a[3]); /* which matches should be replaced */ 2694 sptr = getsval(h); 2695 if (sptr[0] == 'g' || sptr[0] == 'G') 2696 whichm = -1; 2697 else { 2698 /* 2699 * The specified number is index of replacement, starting 2700 * from 1. GNU awk treats index lower than 0 same as 2701 * 1, we do same for compatibility. 2702 */ 2703 whichm = (int) getfval(h) - 1; 2704 if (whichm < 0) 2705 whichm = 0; 2706 } 2707 tempfree(h); 2708 2709 if (pmatch(pfa, t)) { 2710 char *sl; 2711 2712 tempstat = pfa->initstat; 2713 pfa->initstat = 2; 2714 pb = buf; 2715 rptr = getsval(y); 2716 /* 2717 * XXX if there are any backreferences in subst string, 2718 * complain now. 2719 */ 2720 for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) { 2721 if (strchr("0123456789", sl[1])) { 2722 FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr); 2723 } 2724 } 2725 2726 do { 2727 if (whichm >= 0 && whichm != num) { 2728 num++; 2729 adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub"); 2730 2731 /* copy the part of string up to and including 2732 * match to output buffer */ 2733 while (t < patbeg + patlen) 2734 *pb++ = *t++; 2735 continue; 2736 } 2737 2738 if (patlen == 0 && *patbeg != 0) { /* matched empty string */ 2739 if (mflag == 0) { /* can replace empty */ 2740 num++; 2741 sptr = rptr; 2742 while (*sptr != 0) { 2743 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); 2744 if (*sptr == '\\') { 2745 backsub(&pb, &sptr); 2746 } else if (*sptr == '&') { 2747 sptr++; 2748 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); 2749 for (q = patbeg; q < patbeg+patlen; ) 2750 *pb++ = *q++; 2751 } else 2752 *pb++ = *sptr++; 2753 } 2754 } 2755 if (*t == 0) /* at end */ 2756 goto done; 2757 adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub"); 2758 *pb++ = *t++; 2759 if (pb > buf + bufsz) /* BUG: not sure of this test */ 2760 FATAL("gensub result0 %.30s too big; can't happen", buf); 2761 mflag = 0; 2762 } 2763 else { /* matched nonempty string */ 2764 num++; 2765 sptr = t; 2766 adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub"); 2767 while (sptr < patbeg) 2768 *pb++ = *sptr++; 2769 sptr = rptr; 2770 while (*sptr != 0) { 2771 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); 2772 if (*sptr == '\\') { 2773 backsub(&pb, &sptr); 2774 } else if (*sptr == '&') { 2775 sptr++; 2776 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); 2777 for (q = patbeg; q < patbeg+patlen; ) 2778 *pb++ = *q++; 2779 } else 2780 *pb++ = *sptr++; 2781 } 2782 t = patbeg + patlen; 2783 if (patlen == 0 || *t == 0 || *(t-1) == 0) 2784 goto done; 2785 if (pb > buf + bufsz) 2786 FATAL("gensub result1 %.30s too big; can't happen", buf); 2787 mflag = 1; 2788 } 2789 } while (pmatch(pfa,t)); 2790 sptr = t; 2791 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub"); 2792 while ((*pb++ = *sptr++) != 0) 2793 ; 2794 done: if (pb > buf + bufsz) 2795 FATAL("gensub result2 %.30s too big; can't happen", buf); 2796 *pb = '\0'; 2797 setsval(res, buf); 2798 pfa->initstat = tempstat; 2799 } 2800 tempfree(x); 2801 tempfree(y); 2802 free(buf); 2803 return(res); 2804 } 2805 2806 void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */ 2807 { /* sptr[0] == '\\' */ 2808 char *pb = *pb_ptr; 2809 const char *sptr = *sptr_ptr; 2810 static bool first = true; 2811 static bool do_posix = false; 2812 2813 if (first) { 2814 first = false; 2815 do_posix = (getenv("POSIXLY_CORRECT") != NULL); 2816 } 2817 2818 if (sptr[1] == '\\') { 2819 if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */ 2820 *pb++ = '\\'; 2821 *pb++ = '&'; 2822 sptr += 4; 2823 } else if (sptr[2] == '&') { /* \\& -> \ + matched */ 2824 *pb++ = '\\'; 2825 sptr += 2; 2826 } else if (do_posix) { /* \\x -> \x */ 2827 sptr++; 2828 *pb++ = *sptr++; 2829 } else { /* \\x -> \\x */ 2830 *pb++ = *sptr++; 2831 *pb++ = *sptr++; 2832 } 2833 } else if (sptr[1] == '&') { /* literal & */ 2834 sptr++; 2835 *pb++ = *sptr++; 2836 } else /* literal \ */ 2837 *pb++ = *sptr++; 2838 2839 *pb_ptr = pb; 2840 *sptr_ptr = sptr; 2841 } 2842 2843 static char *wide_char_to_byte_str(int rune, size_t *outlen) 2844 { 2845 static char buf[5]; 2846 int len; 2847 2848 if (rune < 0 || rune > 0x10FFFF) 2849 return NULL; 2850 2851 memset(buf, 0, sizeof(buf)); 2852 2853 len = 0; 2854 if (rune <= 0x0000007F) { 2855 buf[len++] = rune; 2856 } else if (rune <= 0x000007FF) { 2857 // 110xxxxx 10xxxxxx 2858 buf[len++] = 0xC0 | (rune >> 6); 2859 buf[len++] = 0x80 | (rune & 0x3F); 2860 } else if (rune <= 0x0000FFFF) { 2861 // 1110xxxx 10xxxxxx 10xxxxxx 2862 buf[len++] = 0xE0 | (rune >> 12); 2863 buf[len++] = 0x80 | ((rune >> 6) & 0x3F); 2864 buf[len++] = 0x80 | (rune & 0x3F); 2865 2866 } else { 2867 // 0x00010000 - 0x10FFFF 2868 // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 2869 buf[len++] = 0xF0 | (rune >> 18); 2870 buf[len++] = 0x80 | ((rune >> 12) & 0x3F); 2871 buf[len++] = 0x80 | ((rune >> 6) & 0x3F); 2872 buf[len++] = 0x80 | (rune & 0x3F); 2873 } 2874 2875 *outlen = len; 2876 buf[len++] = '\0'; 2877 2878 return buf; 2879 } 2880