1 /**************************************************************** 2 Copyright (C) Lucent Technologies 1997 3 All Rights Reserved 4 5 Permission to use, copy, modify, and distribute this software and 6 its documentation for any purpose and without fee is hereby 7 granted, provided that the above copyright notice appear in all 8 copies and that both that the copyright notice and this 9 permission notice and warranty disclaimer appear in supporting 10 documentation, and that the name Lucent Technologies or any of 11 its entities not be used in advertising or publicity pertaining 12 to distribution of the software without specific, written prior 13 permission. 14 15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 22 THIS SOFTWARE. 23 ****************************************************************/ 24 25 #define DEBUG 26 #include <stdio.h> 27 #include <ctype.h> 28 #include <errno.h> 29 #include <wctype.h> 30 #include <fcntl.h> 31 #include <setjmp.h> 32 #include <limits.h> 33 #include <math.h> 34 #include <string.h> 35 #include <stdlib.h> 36 #include <time.h> 37 #include <sys/types.h> 38 #include <sys/wait.h> 39 #include "awk.h" 40 #include "awkgram.tab.h" 41 42 43 static void stdinit(void); 44 static void flush_all(void); 45 static char *wide_char_to_byte_str(int rune, size_t *outlen); 46 47 #if 1 48 #define tempfree(x) do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0) 49 #else 50 void tempfree(Cell *p) { 51 if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) { 52 WARNING("bad csub %d in Cell %d %s", 53 p->csub, p->ctype, p->sval); 54 } 55 if (istemp(p)) 56 tfree(p); 57 } 58 #endif 59 60 /* do we really need these? */ 61 /* #ifdef _NFILE */ 62 /* #ifndef FOPEN_MAX */ 63 /* #define FOPEN_MAX _NFILE */ 64 /* #endif */ 65 /* #endif */ 66 /* */ 67 /* #ifndef FOPEN_MAX */ 68 /* #define FOPEN_MAX 40 */ /* max number of open files */ 69 /* #endif */ 70 /* */ 71 /* #ifndef RAND_MAX */ 72 /* #define RAND_MAX 32767 */ /* all that ansi guarantees */ 73 /* #endif */ 74 75 jmp_buf env; 76 extern int pairstack[]; 77 extern Awkfloat srand_seed; 78 79 Node *winner = NULL; /* root of parse tree */ 80 Cell *tmps; /* free temporary cells for execution */ 81 82 static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL, NULL }; 83 Cell *True = &truecell; 84 static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL, NULL }; 85 Cell *False = &falsecell; 86 static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL, NULL }; 87 Cell *jbreak = &breakcell; 88 static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL, NULL }; 89 Cell *jcont = &contcell; 90 static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL, NULL }; 91 Cell *jnext = &nextcell; 92 static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL, NULL }; 93 Cell *jnextfile = &nextfilecell; 94 static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL, NULL }; 95 Cell *jexit = &exitcell; 96 static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL, NULL }; 97 Cell *jret = &retcell; 98 static Cell tempcell ={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL }; 99 100 Node *curnode = NULL; /* the node being executed, for debugging */ 101 102 /* buffer memory management */ 103 int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr, 104 const char *whatrtn) 105 /* pbuf: address of pointer to buffer being managed 106 * psiz: address of buffer size variable 107 * minlen: minimum length of buffer needed 108 * quantum: buffer size quantum 109 * pbptr: address of movable pointer into buffer, or 0 if none 110 * whatrtn: name of the calling routine if failure should cause fatal error 111 * 112 * return 0 for realloc failure, !=0 for success 113 */ 114 { 115 if (minlen > *psiz) { 116 char *tbuf; 117 int rminlen = quantum ? minlen % quantum : 0; 118 int boff = pbptr ? *pbptr - *pbuf : 0; 119 /* round up to next multiple of quantum */ 120 if (rminlen) 121 minlen += quantum - rminlen; 122 tbuf = (char *) realloc(*pbuf, minlen); 123 DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void*)*pbuf, (void*)tbuf); 124 if (tbuf == NULL) { 125 if (whatrtn) 126 FATAL("out of memory in %s", whatrtn); 127 return 0; 128 } 129 *pbuf = tbuf; 130 *psiz = minlen; 131 if (pbptr) 132 *pbptr = tbuf + boff; 133 } 134 return 1; 135 } 136 137 void run(Node *a) /* execution of parse tree starts here */ 138 { 139 140 stdinit(); 141 execute(a); 142 closeall(); 143 } 144 145 Cell *execute(Node *u) /* execute a node of the parse tree */ 146 { 147 Cell *(*proc)(Node **, int); 148 Cell *x; 149 Node *a; 150 151 if (u == NULL) 152 return(True); 153 for (a = u; ; a = a->nnext) { 154 curnode = a; 155 if (isvalue(a)) { 156 x = (Cell *) (a->narg[0]); 157 if (isfld(x) && !donefld) 158 fldbld(); 159 else if (isrec(x) && !donerec) 160 recbld(); 161 return(x); 162 } 163 if (notlegal(a->nobj)) /* probably a Cell* but too risky to print */ 164 FATAL("illegal statement"); 165 proc = proctab[a->nobj-FIRSTTOKEN]; 166 x = (*proc)(a->narg, a->nobj); 167 if (isfld(x) && !donefld) 168 fldbld(); 169 else if (isrec(x) && !donerec) 170 recbld(); 171 if (isexpr(a)) 172 return(x); 173 if (isjump(x)) 174 return(x); 175 if (a->nnext == NULL) 176 return(x); 177 tempfree(x); 178 } 179 } 180 181 182 Cell *program(Node **a, int n) /* execute an awk program */ 183 { /* a[0] = BEGIN, a[1] = body, a[2] = END */ 184 Cell *x; 185 186 if (setjmp(env) != 0) 187 goto ex; 188 if (a[0]) { /* BEGIN */ 189 x = execute(a[0]); 190 if (isexit(x)) 191 return(True); 192 if (isjump(x)) 193 FATAL("illegal break, continue, next or nextfile from BEGIN"); 194 tempfree(x); 195 } 196 if (a[1] || a[2]) 197 while (getrec(&record, &recsize, true) > 0) { 198 x = execute(a[1]); 199 if (isexit(x)) 200 break; 201 tempfree(x); 202 } 203 ex: 204 if (setjmp(env) != 0) /* handles exit within END */ 205 goto ex1; 206 if (a[2]) { /* END */ 207 x = execute(a[2]); 208 if (isbreak(x) || isnext(x) || iscont(x)) 209 FATAL("illegal break, continue, next or nextfile from END"); 210 tempfree(x); 211 } 212 ex1: 213 return(True); 214 } 215 216 struct Frame { /* stack frame for awk function calls */ 217 int nargs; /* number of arguments in this call */ 218 Cell *fcncell; /* pointer to Cell for function */ 219 Cell **args; /* pointer to array of arguments after execute */ 220 Cell *retval; /* return value */ 221 }; 222 223 #define NARGS 50 /* max args in a call */ 224 225 struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */ 226 int nframe = 0; /* number of frames allocated */ 227 struct Frame *frp = NULL; /* frame pointer. bottom level unused */ 228 229 Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ 230 { 231 static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL }; 232 int i, ncall, ndef; 233 int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */ 234 Node *x; 235 Cell *args[NARGS], *oargs[NARGS]; /* BUG: fixed size arrays */ 236 Cell *y, *z, *fcn; 237 char *s; 238 239 fcn = execute(a[0]); /* the function itself */ 240 s = fcn->nval; 241 if (!isfcn(fcn)) 242 FATAL("calling undefined function %s", s); 243 if (frame == NULL) { 244 frp = frame = (struct Frame *) calloc(nframe += 100, sizeof(*frame)); 245 if (frame == NULL) 246 FATAL("out of space for stack frames calling %s", s); 247 } 248 for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */ 249 ncall++; 250 ndef = (int) fcn->fval; /* args in defn */ 251 DPRINTF("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame)); 252 if (ncall > ndef) 253 WARNING("function %s called with %d args, uses only %d", 254 s, ncall, ndef); 255 if (ncall + ndef > NARGS) 256 FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS); 257 for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */ 258 DPRINTF("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame)); 259 y = execute(x); 260 oargs[i] = y; 261 DPRINTF("args[%d]: %s %f <%s>, t=%o\n", 262 i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval); 263 if (isfcn(y)) 264 FATAL("can't use function %s as argument in %s", y->nval, s); 265 if (isarr(y)) 266 args[i] = y; /* arrays by ref */ 267 else 268 args[i] = copycell(y); 269 tempfree(y); 270 } 271 for ( ; i < ndef; i++) { /* add null args for ones not provided */ 272 args[i] = gettemp(); 273 *args[i] = newcopycell; 274 } 275 frp++; /* now ok to up frame */ 276 if (frp >= frame + nframe) { 277 int dfp = frp - frame; /* old index */ 278 frame = (struct Frame *) realloc(frame, (nframe += 100) * sizeof(*frame)); 279 if (frame == NULL) 280 FATAL("out of space for stack frames in %s", s); 281 frp = frame + dfp; 282 } 283 frp->fcncell = fcn; 284 frp->args = args; 285 frp->nargs = ndef; /* number defined with (excess are locals) */ 286 frp->retval = gettemp(); 287 288 DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame)); 289 y = execute((Node *)(fcn->sval)); /* execute body */ 290 DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame)); 291 292 for (i = 0; i < ndef; i++) { 293 Cell *t = frp->args[i]; 294 if (isarr(t)) { 295 if (t->csub == CCOPY) { 296 if (i >= ncall) { 297 freesymtab(t); 298 t->csub = CTEMP; 299 tempfree(t); 300 } else { 301 oargs[i]->tval = t->tval; 302 oargs[i]->tval &= ~(STR|NUM|DONTFREE); 303 oargs[i]->sval = t->sval; 304 tempfree(t); 305 } 306 } 307 } else if (t != y) { /* kludge to prevent freeing twice */ 308 t->csub = CTEMP; 309 tempfree(t); 310 } else if (t == y && t->csub == CCOPY) { 311 t->csub = CTEMP; 312 tempfree(t); 313 freed = 1; 314 } 315 } 316 tempfree(fcn); 317 if (isexit(y) || isnext(y)) 318 return y; 319 if (freed == 0) { 320 tempfree(y); /* don't free twice! */ 321 } 322 z = frp->retval; /* return value */ 323 DPRINTF("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval); 324 frp--; 325 return(z); 326 } 327 328 Cell *copycell(Cell *x) /* make a copy of a cell in a temp */ 329 { 330 Cell *y; 331 332 /* copy is not constant or field */ 333 334 y = gettemp(); 335 y->tval = x->tval & ~(CON|FLD|REC); 336 y->csub = CCOPY; /* prevents freeing until call is over */ 337 y->nval = x->nval; /* BUG? */ 338 if (isstr(x) /* || x->ctype == OCELL */) { 339 y->sval = tostring(x->sval); 340 y->tval &= ~DONTFREE; 341 } else 342 y->tval |= DONTFREE; 343 y->fval = x->fval; 344 return y; 345 } 346 347 Cell *arg(Node **a, int n) /* nth argument of a function */ 348 { 349 350 n = ptoi(a[0]); /* argument number, counting from 0 */ 351 DPRINTF("arg(%d), frp->nargs=%d\n", n, frp->nargs); 352 if (n+1 > frp->nargs) 353 FATAL("argument #%d of function %s was not supplied", 354 n+1, frp->fcncell->nval); 355 return frp->args[n]; 356 } 357 358 Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */ 359 { 360 Cell *y; 361 362 switch (n) { 363 case EXIT: 364 if (a[0] != NULL) { 365 y = execute(a[0]); 366 errorflag = (int) getfval(y); 367 tempfree(y); 368 } 369 longjmp(env, 1); 370 case RETURN: 371 if (a[0] != NULL) { 372 y = execute(a[0]); 373 if ((y->tval & (STR|NUM)) == (STR|NUM)) { 374 setsval(frp->retval, getsval(y)); 375 frp->retval->fval = getfval(y); 376 frp->retval->tval |= NUM; 377 } 378 else if (y->tval & STR) 379 setsval(frp->retval, getsval(y)); 380 else if (y->tval & NUM) 381 setfval(frp->retval, getfval(y)); 382 else /* can't happen */ 383 FATAL("bad type variable %d", y->tval); 384 tempfree(y); 385 } 386 return(jret); 387 case NEXT: 388 return(jnext); 389 case NEXTFILE: 390 nextfile(); 391 return(jnextfile); 392 case BREAK: 393 return(jbreak); 394 case CONTINUE: 395 return(jcont); 396 default: /* can't happen */ 397 FATAL("illegal jump type %d", n); 398 } 399 return 0; /* not reached */ 400 } 401 402 Cell *awkgetline(Node **a, int n) /* get next line from specific input */ 403 { /* a[0] is variable, a[1] is operator, a[2] is filename */ 404 Cell *r, *x; 405 extern Cell **fldtab; 406 FILE *fp; 407 char *buf; 408 int bufsize = recsize; 409 int mode; 410 bool newflag; 411 double result; 412 413 if ((buf = (char *) malloc(bufsize)) == NULL) 414 FATAL("out of memory in getline"); 415 416 fflush(stdout); /* in case someone is waiting for a prompt */ 417 r = gettemp(); 418 if (a[1] != NULL) { /* getline < file */ 419 x = execute(a[2]); /* filename */ 420 mode = ptoi(a[1]); 421 if (mode == '|') /* input pipe */ 422 mode = LE; /* arbitrary flag */ 423 fp = openfile(mode, getsval(x), &newflag); 424 tempfree(x); 425 if (fp == NULL) 426 n = -1; 427 else 428 n = readrec(&buf, &bufsize, fp, newflag); 429 if (n <= 0) { 430 ; 431 } else if (a[0] != NULL) { /* getline var <file */ 432 x = execute(a[0]); 433 setsval(x, buf); 434 if (is_number(x->sval, & result)) { 435 x->fval = result; 436 x->tval |= NUM; 437 } 438 tempfree(x); 439 } else { /* getline <file */ 440 setsval(fldtab[0], buf); 441 if (is_number(fldtab[0]->sval, & result)) { 442 fldtab[0]->fval = result; 443 fldtab[0]->tval |= NUM; 444 } 445 } 446 } else { /* bare getline; use current input */ 447 if (a[0] == NULL) /* getline */ 448 n = getrec(&record, &recsize, true); 449 else { /* getline var */ 450 n = getrec(&buf, &bufsize, false); 451 if (n > 0) { 452 x = execute(a[0]); 453 setsval(x, buf); 454 if (is_number(x->sval, & result)) { 455 x->fval = result; 456 x->tval |= NUM; 457 } 458 tempfree(x); 459 } 460 } 461 } 462 setfval(r, (Awkfloat) n); 463 free(buf); 464 return r; 465 } 466 467 Cell *getnf(Node **a, int n) /* get NF */ 468 { 469 if (!donefld) 470 fldbld(); 471 return (Cell *) a[0]; 472 } 473 474 static char * 475 makearraystring(Node *p, const char *func) 476 { 477 char *buf; 478 int bufsz = recsize; 479 size_t blen; 480 481 if ((buf = (char *) malloc(bufsz)) == NULL) { 482 FATAL("%s: out of memory", func); 483 } 484 485 blen = 0; 486 buf[blen] = '\0'; 487 488 for (; p; p = p->nnext) { 489 Cell *x = execute(p); /* expr */ 490 char *s = getsval(x); 491 size_t seplen = strlen(getsval(subseploc)); 492 size_t nsub = p->nnext ? seplen : 0; 493 size_t slen = strlen(s); 494 size_t tlen = blen + slen + nsub; 495 496 if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) { 497 FATAL("%s: out of memory %s[%s...]", 498 func, x->nval, buf); 499 } 500 memcpy(buf + blen, s, slen); 501 if (nsub) { 502 memcpy(buf + blen + slen, *SUBSEP, nsub); 503 } 504 buf[tlen] = '\0'; 505 blen = tlen; 506 tempfree(x); 507 } 508 return buf; 509 } 510 511 Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ 512 { 513 Cell *x, *z; 514 char *buf; 515 516 x = execute(a[0]); /* Cell* for symbol table */ 517 buf = makearraystring(a[1], __func__); 518 if (!isarr(x)) { 519 DPRINTF("making %s into an array\n", NN(x->nval)); 520 if (freeable(x)) 521 xfree(x->sval); 522 x->tval &= ~(STR|NUM|DONTFREE); 523 x->tval |= ARR; 524 x->sval = (char *) makesymtab(NSYMTAB); 525 } 526 z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval); 527 z->ctype = OCELL; 528 z->csub = CVAR; 529 tempfree(x); 530 free(buf); 531 return(z); 532 } 533 534 Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ 535 { 536 Cell *x; 537 538 x = execute(a[0]); /* Cell* for symbol table */ 539 if (x == symtabloc) { 540 FATAL("cannot delete SYMTAB or its elements"); 541 } 542 if (!isarr(x)) 543 return True; 544 if (a[1] == NULL) { /* delete the elements, not the table */ 545 freesymtab(x); 546 x->tval &= ~STR; 547 x->tval |= ARR; 548 x->sval = (char *) makesymtab(NSYMTAB); 549 } else { 550 char *buf = makearraystring(a[1], __func__); 551 freeelem(x, buf); 552 free(buf); 553 } 554 tempfree(x); 555 return True; 556 } 557 558 Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */ 559 { 560 Cell *ap, *k; 561 char *buf; 562 563 ap = execute(a[1]); /* array name */ 564 if (!isarr(ap)) { 565 DPRINTF("making %s into an array\n", ap->nval); 566 if (freeable(ap)) 567 xfree(ap->sval); 568 ap->tval &= ~(STR|NUM|DONTFREE); 569 ap->tval |= ARR; 570 ap->sval = (char *) makesymtab(NSYMTAB); 571 } 572 buf = makearraystring(a[0], __func__); 573 k = lookup(buf, (Array *) ap->sval); 574 tempfree(ap); 575 free(buf); 576 if (k == NULL) 577 return(False); 578 else 579 return(True); 580 } 581 582 583 /* ======== utf-8 code ========== */ 584 585 /* 586 * Awk strings can contain ascii, random 8-bit items (eg Latin-1), 587 * or utf-8. u8_isutf tests whether a string starts with a valid 588 * utf-8 sequence, and returns 0 if not (e.g., high bit set). 589 * u8_nextlen returns length of next valid sequence, which is 590 * 1 for ascii, 2..4 for utf-8, or 1 for high bit non-utf. 591 * u8_strlen returns length of string in valid utf-8 sequences 592 * and/or high-bit bytes. Conversion functions go between byte 593 * number and character number. 594 * 595 * In theory, this behaves the same as before for non-utf8 bytes. 596 * 597 * Limited checking! This is a potential security hole. 598 */ 599 600 /* is s the beginning of a valid utf-8 string? */ 601 /* return length 1..4 if yes, 0 if no */ 602 int u8_isutf(const char *s) 603 { 604 int n, ret; 605 unsigned char c; 606 607 c = s[0]; 608 if (c < 128 || awk_mb_cur_max == 1) 609 return 1; /* what if it's 0? */ 610 611 n = strlen(s); 612 if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) { 613 ret = 2; /* 110xxxxx 10xxxxxx */ 614 } else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80 615 && (s[2] & 0xC0) == 0x80) { 616 ret = 3; /* 1110xxxx 10xxxxxx 10xxxxxx */ 617 } else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80 618 && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) { 619 ret = 4; /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ 620 } else { 621 ret = 0; 622 } 623 return ret; 624 } 625 626 /* Convert (prefix of) utf8 string to utf-32 rune. */ 627 /* Sets *rune to the value, returns the length. */ 628 /* No error checking: watch out. */ 629 int u8_rune(int *rune, const char *s) 630 { 631 int n, ret; 632 unsigned char c; 633 634 c = s[0]; 635 if (c < 128 || awk_mb_cur_max == 1) { 636 *rune = c; 637 return 1; 638 } 639 640 n = strlen(s); 641 if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) { 642 *rune = ((c & 0x1F) << 6) | (s[1] & 0x3F); /* 110xxxxx 10xxxxxx */ 643 ret = 2; 644 } else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80 645 && (s[2] & 0xC0) == 0x80) { 646 *rune = ((c & 0xF) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F); 647 /* 1110xxxx 10xxxxxx 10xxxxxx */ 648 ret = 3; 649 } else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80 650 && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) { 651 *rune = ((c & 0x7) << 18) | ((s[1] & 0x3F) << 12) | ((s[2] & 0x3F) << 6) | (s[3] & 0x3F); 652 /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ 653 ret = 4; 654 } else { 655 *rune = c; 656 ret = 1; 657 } 658 return ret; /* returns one byte if sequence doesn't look like utf */ 659 } 660 661 /* return length of next sequence: 1 for ascii or random, 2..4 for valid utf8 */ 662 int u8_nextlen(const char *s) 663 { 664 int len; 665 666 len = u8_isutf(s); 667 if (len == 0) 668 len = 1; 669 return len; 670 } 671 672 /* return number of utf characters or single non-utf bytes */ 673 int u8_strlen(const char *s) 674 { 675 int i, len, n, totlen; 676 unsigned char c; 677 678 n = strlen(s); 679 totlen = 0; 680 for (i = 0; i < n; i += len) { 681 c = s[i]; 682 if (c < 128 || awk_mb_cur_max == 1) { 683 len = 1; 684 } else { 685 len = u8_nextlen(&s[i]); 686 } 687 totlen++; 688 if (i > n) 689 FATAL("bad utf count [%s] n=%d i=%d\n", s, n, i); 690 } 691 return totlen; 692 } 693 694 /* convert utf-8 char number in a string to its byte offset */ 695 int u8_char2byte(const char *s, int charnum) 696 { 697 int n; 698 int bytenum = 0; 699 700 while (charnum > 0) { 701 n = u8_nextlen(s); 702 s += n; 703 bytenum += n; 704 charnum--; 705 } 706 return bytenum; 707 } 708 709 /* convert byte offset in s to utf-8 char number that starts there */ 710 int u8_byte2char(const char *s, int bytenum) 711 { 712 int i, len, b; 713 int charnum = 0; /* BUG: what origin? */ 714 /* should be 0 to match start==0 which means no match */ 715 716 b = strlen(s); 717 if (bytenum > b) { 718 return -1; /* ??? */ 719 } 720 for (i = 0; i <= bytenum; i += len) { 721 len = u8_nextlen(s+i); 722 charnum++; 723 } 724 return charnum; 725 } 726 727 /* runetochar() adapted from rune.c in the Plan 9 distribution */ 728 729 enum 730 { 731 Runeerror = 128, /* from somewhere else */ 732 Runemax = 0x10FFFF, 733 734 Bit1 = 7, 735 Bitx = 6, 736 Bit2 = 5, 737 Bit3 = 4, 738 Bit4 = 3, 739 Bit5 = 2, 740 741 T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */ 742 Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */ 743 T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */ 744 T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */ 745 T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */ 746 T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */ 747 748 Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0000 0000 0111 1111 */ 749 Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0000 0000 0111 1111 1111 */ 750 Rune3 = (1<<(Bit3+2*Bitx))-1, /* 0000 0000 1111 1111 1111 1111 */ 751 Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0011 1111 1111 1111 1111 1111 */ 752 753 Maskx = (1<<Bitx)-1, /* 0011 1111 */ 754 Testx = Maskx ^ 0xFF, /* 1100 0000 */ 755 756 }; 757 758 int runetochar(char *str, int c) 759 { 760 /* one character sequence 00000-0007F => 00-7F */ 761 if (c <= Rune1) { 762 str[0] = c; 763 return 1; 764 } 765 766 /* two character sequence 00080-007FF => T2 Tx */ 767 if (c <= Rune2) { 768 str[0] = T2 | (c >> 1*Bitx); 769 str[1] = Tx | (c & Maskx); 770 return 2; 771 } 772 773 /* three character sequence 00800-0FFFF => T3 Tx Tx */ 774 if (c > Runemax) 775 c = Runeerror; 776 if (c <= Rune3) { 777 str[0] = T3 | (c >> 2*Bitx); 778 str[1] = Tx | ((c >> 1*Bitx) & Maskx); 779 str[2] = Tx | (c & Maskx); 780 return 3; 781 } 782 783 /* four character sequence 010000-1FFFFF => T4 Tx Tx Tx */ 784 str[0] = T4 | (c >> 3*Bitx); 785 str[1] = Tx | ((c >> 2*Bitx) & Maskx); 786 str[2] = Tx | ((c >> 1*Bitx) & Maskx); 787 str[3] = Tx | (c & Maskx); 788 return 4; 789 } 790 791 792 /* ========== end of utf8 code =========== */ 793 794 795 796 Cell *matchop(Node **a, int n) /* ~ and match() */ 797 { 798 Cell *x, *y, *z; 799 char *s, *t; 800 int i; 801 int cstart, cpatlen, len; 802 fa *pfa; 803 int (*mf)(fa *, const char *) = match, mode = 0; 804 805 if (n == MATCHFCN) { 806 mf = pmatch; 807 mode = 1; 808 } 809 x = execute(a[1]); /* a[1] = target text */ 810 s = getsval(x); 811 if (a[0] == NULL) /* a[1] == 0: already-compiled reg expr */ 812 i = (*mf)((fa *) a[2], s); 813 else { 814 y = execute(a[2]); /* a[2] = regular expr */ 815 t = getsval(y); 816 pfa = makedfa(t, mode); 817 i = (*mf)(pfa, s); 818 tempfree(y); 819 } 820 z = x; 821 if (n == MATCHFCN) { 822 int start = patbeg - s + 1; /* origin 1 */ 823 if (patlen < 0) { 824 start = 0; /* not found */ 825 } else { 826 cstart = u8_byte2char(s, start-1); 827 cpatlen = 0; 828 for (i = 0; i < patlen; i += len) { 829 len = u8_nextlen(patbeg+i); 830 cpatlen++; 831 } 832 833 start = cstart; 834 patlen = cpatlen; 835 } 836 837 setfval(rstartloc, (Awkfloat) start); 838 setfval(rlengthloc, (Awkfloat) patlen); 839 x = gettemp(); 840 x->tval = NUM; 841 x->fval = start; 842 } else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0)) 843 x = True; 844 else 845 x = False; 846 847 tempfree(z); 848 return x; 849 } 850 851 852 Cell *boolop(Node **a, int n) /* a[0] || a[1], a[0] && a[1], !a[0] */ 853 { 854 Cell *x, *y; 855 int i; 856 857 x = execute(a[0]); 858 i = istrue(x); 859 tempfree(x); 860 switch (n) { 861 case BOR: 862 if (i) return(True); 863 y = execute(a[1]); 864 i = istrue(y); 865 tempfree(y); 866 if (i) return(True); 867 else return(False); 868 case AND: 869 if ( !i ) return(False); 870 y = execute(a[1]); 871 i = istrue(y); 872 tempfree(y); 873 if (i) return(True); 874 else return(False); 875 case NOT: 876 if (i) return(False); 877 else return(True); 878 default: /* can't happen */ 879 FATAL("unknown boolean operator %d", n); 880 } 881 return 0; /*NOTREACHED*/ 882 } 883 884 Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */ 885 { 886 int i; 887 Cell *x, *y; 888 Awkfloat j; 889 bool x_is_nan, y_is_nan; 890 891 x = execute(a[0]); 892 y = execute(a[1]); 893 x_is_nan = isnan(x->fval); 894 y_is_nan = isnan(y->fval); 895 if (x->tval&NUM && y->tval&NUM) { 896 if ((x_is_nan || y_is_nan) && n != NE) 897 return(False); 898 j = x->fval - y->fval; 899 i = j<0? -1: (j>0? 1: 0); 900 } else { 901 i = strcmp(getsval(x), getsval(y)); 902 } 903 tempfree(x); 904 tempfree(y); 905 switch (n) { 906 case LT: if (i<0) return(True); 907 else return(False); 908 case LE: if (i<=0) return(True); 909 else return(False); 910 case NE: if (x_is_nan && y_is_nan) return(True); 911 else if (i!=0) return(True); 912 else return(False); 913 case EQ: if (i == 0) return(True); 914 else return(False); 915 case GE: if (i>=0) return(True); 916 else return(False); 917 case GT: if (i>0) return(True); 918 else return(False); 919 default: /* can't happen */ 920 FATAL("unknown relational operator %d", n); 921 } 922 return 0; /*NOTREACHED*/ 923 } 924 925 void tfree(Cell *a) /* free a tempcell */ 926 { 927 if (freeable(a)) { 928 DPRINTF("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval); 929 xfree(a->sval); 930 } 931 if (a == tmps) 932 FATAL("tempcell list is curdled"); 933 a->cnext = tmps; 934 tmps = a; 935 } 936 937 Cell *gettemp(void) /* get a tempcell */ 938 { int i; 939 Cell *x; 940 941 if (!tmps) { 942 tmps = (Cell *) calloc(100, sizeof(*tmps)); 943 if (!tmps) 944 FATAL("out of space for temporaries"); 945 for (i = 1; i < 100; i++) 946 tmps[i-1].cnext = &tmps[i]; 947 tmps[i-1].cnext = NULL; 948 } 949 x = tmps; 950 tmps = x->cnext; 951 *x = tempcell; 952 return(x); 953 } 954 955 Cell *indirect(Node **a, int n) /* $( a[0] ) */ 956 { 957 Awkfloat val; 958 Cell *x; 959 int m; 960 char *s; 961 962 x = execute(a[0]); 963 val = getfval(x); /* freebsd: defend against super large field numbers */ 964 if ((Awkfloat)INT_MAX < val) 965 FATAL("trying to access out of range field %s", x->nval); 966 m = (int) val; 967 if (m == 0 && !is_number(s = getsval(x), NULL)) /* suspicion! */ 968 FATAL("illegal field $(%s), name \"%s\"", s, x->nval); 969 /* BUG: can x->nval ever be null??? */ 970 tempfree(x); 971 x = fieldadr(m); 972 x->ctype = OCELL; /* BUG? why are these needed? */ 973 x->csub = CFLD; 974 return(x); 975 } 976 977 Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */ 978 { 979 int k, m, n; 980 int mb, nb; 981 char *s; 982 int temp; 983 Cell *x, *y, *z = NULL; 984 985 x = execute(a[0]); 986 y = execute(a[1]); 987 if (a[2] != NULL) 988 z = execute(a[2]); 989 s = getsval(x); 990 k = u8_strlen(s) + 1; 991 if (k <= 1) { 992 tempfree(x); 993 tempfree(y); 994 if (a[2] != NULL) { 995 tempfree(z); 996 } 997 x = gettemp(); 998 setsval(x, ""); 999 return(x); 1000 } 1001 m = (int) getfval(y); 1002 if (m <= 0) 1003 m = 1; 1004 else if (m > k) 1005 m = k; 1006 tempfree(y); 1007 if (a[2] != NULL) { 1008 n = (int) getfval(z); 1009 tempfree(z); 1010 } else 1011 n = k - 1; 1012 if (n < 0) 1013 n = 0; 1014 else if (n > k - m) 1015 n = k - m; 1016 /* m is start, n is length from there */ 1017 DPRINTF("substr: m=%d, n=%d, s=%s\n", m, n, s); 1018 y = gettemp(); 1019 mb = u8_char2byte(s, m-1); /* byte offset of start char in s */ 1020 nb = u8_char2byte(s, m-1+n); /* byte offset of end+1 char in s */ 1021 1022 temp = s[nb]; /* with thanks to John Linderman */ 1023 s[nb] = '\0'; 1024 setsval(y, s + mb); 1025 s[nb] = temp; 1026 tempfree(x); 1027 return(y); 1028 } 1029 1030 Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */ 1031 { 1032 Cell *x, *y, *z; 1033 char *s1, *s2, *p1, *p2, *q; 1034 Awkfloat v = 0.0; 1035 1036 x = execute(a[0]); 1037 s1 = getsval(x); 1038 y = execute(a[1]); 1039 s2 = getsval(y); 1040 1041 z = gettemp(); 1042 for (p1 = s1; *p1 != '\0'; p1++) { 1043 for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++) 1044 continue; 1045 if (*p2 == '\0') { 1046 /* v = (Awkfloat) (p1 - s1 + 1); origin 1 */ 1047 1048 /* should be a function: used in match() as well */ 1049 int i, len; 1050 v = 0; 1051 for (i = 0; i < p1-s1+1; i += len) { 1052 len = u8_nextlen(s1+i); 1053 v++; 1054 } 1055 break; 1056 } 1057 } 1058 tempfree(x); 1059 tempfree(y); 1060 setfval(z, v); 1061 return(z); 1062 } 1063 1064 int has_utf8(char *s) /* return 1 if s contains any utf-8 (2 bytes or more) character */ 1065 { 1066 int n; 1067 1068 for (n = 0; *s != 0; s += n) { 1069 n = u8_nextlen(s); 1070 if (n > 1) 1071 return 1; 1072 } 1073 return 0; 1074 } 1075 1076 #define MAXNUMSIZE 50 1077 1078 int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like conversions */ 1079 { 1080 char *fmt; 1081 char *p, *t; 1082 const char *os; 1083 Cell *x; 1084 int flag = 0, n; 1085 int fmtwd; /* format width */ 1086 int fmtsz = recsize; 1087 char *buf = *pbuf; 1088 int bufsize = *pbufsize; 1089 #define FMTSZ(a) (fmtsz - ((a) - fmt)) 1090 #define BUFSZ(a) (bufsize - ((a) - buf)) 1091 1092 static bool first = true; 1093 static bool have_a_format = false; 1094 1095 if (first) { 1096 char xbuf[100]; 1097 1098 snprintf(xbuf, sizeof(xbuf), "%a", 42.0); 1099 have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0); 1100 first = false; 1101 } 1102 1103 os = s; 1104 p = buf; 1105 if ((fmt = (char *) malloc(fmtsz)) == NULL) 1106 FATAL("out of memory in format()"); 1107 while (*s) { 1108 adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1"); 1109 if (*s != '%') { 1110 *p++ = *s++; 1111 continue; 1112 } 1113 if (*(s+1) == '%') { 1114 *p++ = '%'; 1115 s += 2; 1116 continue; 1117 } 1118 fmtwd = atoi(s+1); 1119 if (fmtwd < 0) 1120 fmtwd = -fmtwd; 1121 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2"); 1122 for (t = fmt; (*t++ = *s) != '\0'; s++) { 1123 if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3")) 1124 FATAL("format item %.30s... ran format() out of memory", os); 1125 /* Ignore size specifiers */ 1126 if (strchr("hjLlqtz", *s) != NULL) { /* the ansi panoply */ 1127 t--; 1128 continue; 1129 } 1130 if (isalpha((uschar)*s)) 1131 break; 1132 if (*s == '$') { 1133 FATAL("'$' not permitted in awk formats"); 1134 } 1135 if (*s == '*') { 1136 if (a == NULL) { 1137 FATAL("not enough args in printf(%s)", os); 1138 } 1139 x = execute(a); 1140 a = a->nnext; 1141 snprintf(t - 1, FMTSZ(t - 1), 1142 "%d", fmtwd=(int) getfval(x)); 1143 if (fmtwd < 0) 1144 fmtwd = -fmtwd; 1145 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format"); 1146 t = fmt + strlen(fmt); 1147 tempfree(x); 1148 } 1149 } 1150 *t = '\0'; 1151 if (fmtwd < 0) 1152 fmtwd = -fmtwd; 1153 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4"); 1154 switch (*s) { 1155 case 'a': case 'A': 1156 if (have_a_format) 1157 flag = *s; 1158 else 1159 flag = 'f'; 1160 break; 1161 case 'f': case 'e': case 'g': case 'E': case 'G': 1162 flag = 'f'; 1163 break; 1164 case 'd': case 'i': case 'o': case 'x': case 'X': case 'u': 1165 flag = (*s == 'd' || *s == 'i') ? 'd' : 'u'; 1166 *(t-1) = 'j'; 1167 *t = *s; 1168 *++t = '\0'; 1169 break; 1170 case 's': 1171 flag = 's'; 1172 break; 1173 case 'c': 1174 flag = 'c'; 1175 break; 1176 default: 1177 WARNING("weird printf conversion %s", fmt); 1178 flag = '?'; 1179 break; 1180 } 1181 if (a == NULL) 1182 FATAL("not enough args in printf(%s)", os); 1183 x = execute(a); 1184 a = a->nnext; 1185 n = MAXNUMSIZE; 1186 if (fmtwd > n) 1187 n = fmtwd; 1188 adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5"); 1189 switch (flag) { 1190 case '?': 1191 snprintf(p, BUFSZ(p), "%s", fmt); /* unknown, so dump it too */ 1192 t = getsval(x); 1193 n = strlen(t); 1194 if (fmtwd > n) 1195 n = fmtwd; 1196 adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6"); 1197 p += strlen(p); 1198 snprintf(p, BUFSZ(p), "%s", t); 1199 break; 1200 case 'a': 1201 case 'A': 1202 case 'f': snprintf(p, BUFSZ(p), fmt, getfval(x)); break; 1203 case 'd': snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break; 1204 case 'u': snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break; 1205 1206 case 's': { 1207 t = getsval(x); 1208 n = strlen(t); 1209 /* if simple format or no utf-8 in the string, sprintf works */ 1210 if (!has_utf8(t) || strcmp(fmt,"%s") == 0) { 1211 if (fmtwd > n) 1212 n = fmtwd; 1213 if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7")) 1214 FATAL("huge string/format (%d chars) in printf %.30s..." \ 1215 " ran format() out of memory", n, t); 1216 snprintf(p, BUFSZ(p), fmt, t); 1217 break; 1218 } 1219 1220 /* get here if string has utf-8 chars and fmt is not plain %s */ 1221 /* "%-w.ps", where -, w and .p are all optional */ 1222 /* '0' before the w is a flag character */ 1223 /* fmt points at % */ 1224 int ljust = 0, wid = 0, prec = n, pad = 0; 1225 char *f = fmt+1; 1226 if (f[0] == '-') { 1227 ljust = 1; 1228 f++; 1229 } 1230 // flags '0' and '+' are recognized but skipped 1231 if (f[0] == '0') { 1232 f++; 1233 if (f[0] == '+') 1234 f++; 1235 } 1236 if (f[0] == '+') { 1237 f++; 1238 if (f[0] == '0') 1239 f++; 1240 } 1241 if (isdigit(f[0])) { /* there is a wid */ 1242 wid = strtol(f, &f, 10); 1243 } 1244 if (f[0] == '.') { /* there is a .prec */ 1245 prec = strtol(++f, &f, 10); 1246 } 1247 if (prec > u8_strlen(t)) 1248 prec = u8_strlen(t); 1249 pad = wid>prec ? wid - prec : 0; // has to be >= 0 1250 int i, k, n; 1251 1252 if (ljust) { // print prec chars from t, then pad blanks 1253 n = u8_char2byte(t, prec); 1254 for (k = 0; k < n; k++) { 1255 //putchar(t[k]); 1256 *p++ = t[k]; 1257 } 1258 for (i = 0; i < pad; i++) { 1259 //printf(" "); 1260 *p++ = ' '; 1261 } 1262 } else { // print pad blanks, then prec chars from t 1263 for (i = 0; i < pad; i++) { 1264 //printf(" "); 1265 *p++ = ' '; 1266 } 1267 n = u8_char2byte(t, prec); 1268 for (k = 0; k < n; k++) { 1269 //putchar(t[k]); 1270 *p++ = t[k]; 1271 } 1272 } 1273 *p = 0; 1274 break; 1275 } 1276 1277 case 'c': { 1278 /* 1279 * If a numeric value is given, awk should just turn 1280 * it into a character and print it: 1281 * BEGIN { printf("%c\n", 65) } 1282 * prints "A". 1283 * 1284 * But what if the numeric value is > 128 and 1285 * represents a valid Unicode code point?!? We do 1286 * our best to convert it back into UTF-8. If we 1287 * can't, we output the encoding of the Unicode 1288 * "invalid character", 0xFFFD. 1289 */ 1290 if (isnum(x)) { 1291 int charval = (int) getfval(x); 1292 1293 if (charval != 0) { 1294 if (charval < 128 || awk_mb_cur_max == 1) 1295 snprintf(p, BUFSZ(p), fmt, charval); 1296 else { 1297 // possible unicode character 1298 size_t count; 1299 char *bs = wide_char_to_byte_str(charval, &count); 1300 1301 if (bs == NULL) { // invalid character 1302 // use unicode invalid character, 0xFFFD 1303 static char invalid_char[] = "\357\277\275"; 1304 bs = invalid_char; 1305 count = 3; 1306 } 1307 t = bs; 1308 n = count; 1309 goto format_percent_c; 1310 } 1311 } else { 1312 *p++ = '\0'; /* explicit null byte */ 1313 *p = '\0'; /* next output will start here */ 1314 } 1315 break; 1316 } 1317 t = getsval(x); 1318 n = u8_nextlen(t); 1319 format_percent_c: 1320 if (n < 2) { /* not utf8 */ 1321 snprintf(p, BUFSZ(p), fmt, getsval(x)[0]); 1322 break; 1323 } 1324 1325 // utf8 character, almost same song and dance as for %s 1326 int ljust = 0, wid = 0, prec = n, pad = 0; 1327 char *f = fmt+1; 1328 if (f[0] == '-') { 1329 ljust = 1; 1330 f++; 1331 } 1332 // flags '0' and '+' are recognized but skipped 1333 if (f[0] == '0') { 1334 f++; 1335 if (f[0] == '+') 1336 f++; 1337 } 1338 if (f[0] == '+') { 1339 f++; 1340 if (f[0] == '0') 1341 f++; 1342 } 1343 if (isdigit(f[0])) { /* there is a wid */ 1344 wid = strtol(f, &f, 10); 1345 } 1346 if (f[0] == '.') { /* there is a .prec */ 1347 prec = strtol(++f, &f, 10); 1348 } 1349 if (prec > 1) // %c --> only one character 1350 prec = 1; 1351 pad = wid>prec ? wid - prec : 0; // has to be >= 0 1352 int i; 1353 1354 if (ljust) { // print one char from t, then pad blanks 1355 for (i = 0; i < n; i++) 1356 *p++ = t[i]; 1357 for (i = 0; i < pad; i++) { 1358 //printf(" "); 1359 *p++ = ' '; 1360 } 1361 } else { // print pad blanks, then prec chars from t 1362 for (i = 0; i < pad; i++) { 1363 //printf(" "); 1364 *p++ = ' '; 1365 } 1366 for (i = 0; i < n; i++) 1367 *p++ = t[i]; 1368 } 1369 *p = 0; 1370 break; 1371 } 1372 default: 1373 FATAL("can't happen: bad conversion %c in format()", flag); 1374 } 1375 1376 tempfree(x); 1377 p += strlen(p); 1378 s++; 1379 } 1380 *p = '\0'; 1381 free(fmt); 1382 for ( ; a; a = a->nnext) { /* evaluate any remaining args */ 1383 x = execute(a); 1384 tempfree(x); 1385 } 1386 *pbuf = buf; 1387 *pbufsize = bufsize; 1388 return p - buf; 1389 } 1390 1391 Cell *awksprintf(Node **a, int n) /* sprintf(a[0]) */ 1392 { 1393 Cell *x; 1394 Node *y; 1395 char *buf; 1396 int bufsz=3*recsize; 1397 1398 if ((buf = (char *) malloc(bufsz)) == NULL) 1399 FATAL("out of memory in awksprintf"); 1400 y = a[0]->nnext; 1401 x = execute(a[0]); 1402 if (format(&buf, &bufsz, getsval(x), y) == -1) 1403 FATAL("sprintf string %.30s... too long. can't happen.", buf); 1404 tempfree(x); 1405 x = gettemp(); 1406 x->sval = buf; 1407 x->tval = STR; 1408 return(x); 1409 } 1410 1411 Cell *awkprintf(Node **a, int n) /* printf */ 1412 { /* a[0] is list of args, starting with format string */ 1413 /* a[1] is redirection operator, a[2] is redirection file */ 1414 FILE *fp; 1415 Cell *x; 1416 Node *y; 1417 char *buf; 1418 int len; 1419 int bufsz=3*recsize; 1420 1421 if ((buf = (char *) malloc(bufsz)) == NULL) 1422 FATAL("out of memory in awkprintf"); 1423 y = a[0]->nnext; 1424 x = execute(a[0]); 1425 if ((len = format(&buf, &bufsz, getsval(x), y)) == -1) 1426 FATAL("printf string %.30s... too long. can't happen.", buf); 1427 tempfree(x); 1428 if (a[1] == NULL) { 1429 /* fputs(buf, stdout); */ 1430 fwrite(buf, len, 1, stdout); 1431 if (ferror(stdout)) 1432 FATAL("write error on stdout"); 1433 } else { 1434 fp = redirect(ptoi(a[1]), a[2]); 1435 /* fputs(buf, fp); */ 1436 fwrite(buf, len, 1, fp); 1437 fflush(fp); 1438 if (ferror(fp)) 1439 FATAL("write error on %s", filename(fp)); 1440 } 1441 free(buf); 1442 return(True); 1443 } 1444 1445 Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */ 1446 { 1447 Awkfloat i, j = 0; 1448 double v; 1449 Cell *x, *y, *z; 1450 1451 x = execute(a[0]); 1452 i = getfval(x); 1453 tempfree(x); 1454 if (n != UMINUS && n != UPLUS) { 1455 y = execute(a[1]); 1456 j = getfval(y); 1457 tempfree(y); 1458 } 1459 z = gettemp(); 1460 switch (n) { 1461 case ADD: 1462 i += j; 1463 break; 1464 case MINUS: 1465 i -= j; 1466 break; 1467 case MULT: 1468 i *= j; 1469 break; 1470 case DIVIDE: 1471 if (j == 0) 1472 FATAL("division by zero"); 1473 i /= j; 1474 break; 1475 case MOD: 1476 if (j == 0) 1477 FATAL("division by zero in mod"); 1478 modf(i/j, &v); 1479 i = i - j * v; 1480 break; 1481 case UMINUS: 1482 i = -i; 1483 break; 1484 case UPLUS: /* handled by getfval(), above */ 1485 break; 1486 case POWER: 1487 if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */ 1488 i = ipow(i, (int) j); 1489 else { 1490 errno = 0; 1491 i = errcheck(pow(i, j), "pow"); 1492 } 1493 break; 1494 default: /* can't happen */ 1495 FATAL("illegal arithmetic operator %d", n); 1496 } 1497 setfval(z, i); 1498 return(z); 1499 } 1500 1501 double ipow(double x, int n) /* x**n. ought to be done by pow, but isn't always */ 1502 { 1503 double v; 1504 1505 if (n <= 0) 1506 return 1; 1507 v = ipow(x, n/2); 1508 if (n % 2 == 0) 1509 return v * v; 1510 else 1511 return x * v * v; 1512 } 1513 1514 Cell *incrdecr(Node **a, int n) /* a[0]++, etc. */ 1515 { 1516 Cell *x, *z; 1517 int k; 1518 Awkfloat xf; 1519 1520 x = execute(a[0]); 1521 xf = getfval(x); 1522 k = (n == PREINCR || n == POSTINCR) ? 1 : -1; 1523 if (n == PREINCR || n == PREDECR) { 1524 setfval(x, xf + k); 1525 return(x); 1526 } 1527 z = gettemp(); 1528 setfval(z, xf); 1529 setfval(x, xf + k); 1530 tempfree(x); 1531 return(z); 1532 } 1533 1534 Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */ 1535 { /* this is subtle; don't muck with it. */ 1536 Cell *x, *y; 1537 Awkfloat xf, yf; 1538 double v; 1539 1540 y = execute(a[1]); 1541 x = execute(a[0]); 1542 if (n == ASSIGN) { /* ordinary assignment */ 1543 if (x == y && !(x->tval & (FLD|REC)) && x != nfloc) 1544 ; /* self-assignment: leave alone unless it's a field or NF */ 1545 else if ((y->tval & (STR|NUM)) == (STR|NUM)) { 1546 yf = getfval(y); 1547 setsval(x, getsval(y)); 1548 x->fval = yf; 1549 x->tval |= NUM; 1550 } 1551 else if (isstr(y)) 1552 setsval(x, getsval(y)); 1553 else if (isnum(y)) 1554 setfval(x, getfval(y)); 1555 else 1556 funnyvar(y, "read value of"); 1557 tempfree(y); 1558 return(x); 1559 } 1560 xf = getfval(x); 1561 yf = getfval(y); 1562 switch (n) { 1563 case ADDEQ: 1564 xf += yf; 1565 break; 1566 case SUBEQ: 1567 xf -= yf; 1568 break; 1569 case MULTEQ: 1570 xf *= yf; 1571 break; 1572 case DIVEQ: 1573 if (yf == 0) 1574 FATAL("division by zero in /="); 1575 xf /= yf; 1576 break; 1577 case MODEQ: 1578 if (yf == 0) 1579 FATAL("division by zero in %%="); 1580 modf(xf/yf, &v); 1581 xf = xf - yf * v; 1582 break; 1583 case POWEQ: 1584 if (yf >= 0 && modf(yf, &v) == 0.0) /* pos integer exponent */ 1585 xf = ipow(xf, (int) yf); 1586 else { 1587 errno = 0; 1588 xf = errcheck(pow(xf, yf), "pow"); 1589 } 1590 break; 1591 default: 1592 FATAL("illegal assignment operator %d", n); 1593 break; 1594 } 1595 tempfree(y); 1596 setfval(x, xf); 1597 return(x); 1598 } 1599 1600 Cell *cat(Node **a, int q) /* a[0] cat a[1] */ 1601 { 1602 Cell *x, *y, *z; 1603 int n1, n2; 1604 char *s = NULL; 1605 int ssz = 0; 1606 1607 x = execute(a[0]); 1608 n1 = strlen(getsval(x)); 1609 adjbuf(&s, &ssz, n1 + 1, recsize, 0, "cat1"); 1610 memcpy(s, x->sval, n1); 1611 1612 tempfree(x); 1613 1614 y = execute(a[1]); 1615 n2 = strlen(getsval(y)); 1616 adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2"); 1617 memcpy(s + n1, y->sval, n2); 1618 s[n1 + n2] = '\0'; 1619 1620 tempfree(y); 1621 1622 z = gettemp(); 1623 z->sval = s; 1624 z->tval = STR; 1625 1626 return(z); 1627 } 1628 1629 Cell *pastat(Node **a, int n) /* a[0] { a[1] } */ 1630 { 1631 Cell *x; 1632 1633 if (a[0] == NULL) 1634 x = execute(a[1]); 1635 else { 1636 x = execute(a[0]); 1637 if (istrue(x)) { 1638 tempfree(x); 1639 x = execute(a[1]); 1640 } 1641 } 1642 return x; 1643 } 1644 1645 Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */ 1646 { 1647 Cell *x; 1648 int pair; 1649 1650 pair = ptoi(a[3]); 1651 if (pairstack[pair] == 0) { 1652 x = execute(a[0]); 1653 if (istrue(x)) 1654 pairstack[pair] = 1; 1655 tempfree(x); 1656 } 1657 if (pairstack[pair] == 1) { 1658 x = execute(a[1]); 1659 if (istrue(x)) 1660 pairstack[pair] = 0; 1661 tempfree(x); 1662 x = execute(a[2]); 1663 return(x); 1664 } 1665 return(False); 1666 } 1667 1668 Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ 1669 { 1670 Cell *x = NULL, *y, *ap; 1671 const char *s, *origs, *t; 1672 const char *fs = NULL; 1673 char *origfs = NULL; 1674 int sep; 1675 char temp, num[50]; 1676 int n, tempstat, arg3type; 1677 int j; 1678 double result; 1679 1680 y = execute(a[0]); /* source string */ 1681 origs = s = strdup(getsval(y)); 1682 tempfree(y); 1683 arg3type = ptoi(a[3]); 1684 if (a[2] == NULL) { /* BUG: CSV should override implicit fs but not explicit */ 1685 fs = getsval(fsloc); 1686 } else if (arg3type == STRING) { /* split(str,arr,"string") */ 1687 x = execute(a[2]); 1688 fs = origfs = strdup(getsval(x)); 1689 tempfree(x); 1690 } else if (arg3type == REGEXPR) { 1691 fs = "(regexpr)"; /* split(str,arr,/regexpr/) */ 1692 } else { 1693 FATAL("illegal type of split"); 1694 } 1695 sep = *fs; 1696 ap = execute(a[1]); /* array name */ 1697 /* BUG 7/26/22: this appears not to reset array: see C1/asplit */ 1698 freesymtab(ap); 1699 DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs); 1700 ap->tval &= ~STR; 1701 ap->tval |= ARR; 1702 ap->sval = (char *) makesymtab(NSYMTAB); 1703 1704 n = 0; 1705 if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) { 1706 /* split(s, a, //); have to arrange that it looks like empty sep */ 1707 arg3type = 0; 1708 fs = ""; 1709 sep = 0; 1710 } 1711 if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) { /* reg expr */ 1712 fa *pfa; 1713 if (arg3type == REGEXPR) { /* it's ready already */ 1714 pfa = (fa *) a[2]; 1715 } else { 1716 pfa = makedfa(fs, 1); 1717 } 1718 if (nematch(pfa,s)) { 1719 tempstat = pfa->initstat; 1720 pfa->initstat = 2; 1721 do { 1722 n++; 1723 snprintf(num, sizeof(num), "%d", n); 1724 temp = *patbeg; 1725 setptr(patbeg, '\0'); 1726 if (is_number(s, & result)) 1727 setsymtab(num, s, result, STR|NUM, (Array *) ap->sval); 1728 else 1729 setsymtab(num, s, 0.0, STR, (Array *) ap->sval); 1730 setptr(patbeg, temp); 1731 s = patbeg + patlen; 1732 if (*(patbeg+patlen-1) == '\0' || *s == '\0') { 1733 n++; 1734 snprintf(num, sizeof(num), "%d", n); 1735 setsymtab(num, "", 0.0, STR, (Array *) ap->sval); 1736 pfa->initstat = tempstat; 1737 goto spdone; 1738 } 1739 } while (nematch(pfa,s)); 1740 pfa->initstat = tempstat; /* bwk: has to be here to reset */ 1741 /* cf gsub and refldbld */ 1742 } 1743 n++; 1744 snprintf(num, sizeof(num), "%d", n); 1745 if (is_number(s, & result)) 1746 setsymtab(num, s, result, STR|NUM, (Array *) ap->sval); 1747 else 1748 setsymtab(num, s, 0.0, STR, (Array *) ap->sval); 1749 spdone: 1750 pfa = NULL; 1751 1752 } else if (a[2] == NULL && CSV) { /* CSV only if no explicit separator */ 1753 char *newt = (char *) malloc(strlen(s)); /* for building new string; reuse for each field */ 1754 for (;;) { 1755 char *fr = newt; 1756 n++; 1757 if (*s == '"' ) { /* start of "..." */ 1758 for (s++ ; *s != '\0'; ) { 1759 if (*s == '"' && s[1] != '\0' && s[1] == '"') { 1760 s += 2; /* doubled quote */ 1761 *fr++ = '"'; 1762 } else if (*s == '"' && (s[1] == '\0' || s[1] == ',')) { 1763 s++; /* skip over closing quote */ 1764 break; 1765 } else { 1766 *fr++ = *s++; 1767 } 1768 } 1769 *fr++ = 0; 1770 } else { /* unquoted field */ 1771 while (*s != ',' && *s != '\0') 1772 *fr++ = *s++; 1773 *fr++ = 0; 1774 } 1775 snprintf(num, sizeof(num), "%d", n); 1776 if (is_number(newt, &result)) 1777 setsymtab(num, newt, result, STR|NUM, (Array *) ap->sval); 1778 else 1779 setsymtab(num, newt, 0.0, STR, (Array *) ap->sval); 1780 if (*s++ == '\0') 1781 break; 1782 } 1783 free(newt); 1784 1785 } else if (!CSV && sep == ' ') { /* usual case: split on white space */ 1786 for (n = 0; ; ) { 1787 #define ISWS(c) ((c) == ' ' || (c) == '\t' || (c) == '\n') 1788 while (ISWS(*s)) 1789 s++; 1790 if (*s == '\0') 1791 break; 1792 n++; 1793 t = s; 1794 do 1795 s++; 1796 while (*s != '\0' && !ISWS(*s)); 1797 temp = *s; 1798 setptr(s, '\0'); 1799 snprintf(num, sizeof(num), "%d", n); 1800 if (is_number(t, & result)) 1801 setsymtab(num, t, result, STR|NUM, (Array *) ap->sval); 1802 else 1803 setsymtab(num, t, 0.0, STR, (Array *) ap->sval); 1804 setptr(s, temp); 1805 if (*s != '\0') 1806 s++; 1807 } 1808 1809 } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */ 1810 for (n = 0; *s != '\0'; s += u8_nextlen(s)) { 1811 char buf[10]; 1812 n++; 1813 snprintf(num, sizeof(num), "%d", n); 1814 1815 for (j = 0; j < u8_nextlen(s); j++) { 1816 buf[j] = s[j]; 1817 } 1818 buf[j] = '\0'; 1819 1820 if (isdigit((uschar)buf[0])) 1821 setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval); 1822 else 1823 setsymtab(num, buf, 0.0, STR, (Array *) ap->sval); 1824 } 1825 1826 } else if (*s != '\0') { /* some random single character */ 1827 for (;;) { 1828 n++; 1829 t = s; 1830 while (*s != sep && *s != '\0') 1831 s++; 1832 temp = *s; 1833 setptr(s, '\0'); 1834 snprintf(num, sizeof(num), "%d", n); 1835 if (is_number(t, & result)) 1836 setsymtab(num, t, result, STR|NUM, (Array *) ap->sval); 1837 else 1838 setsymtab(num, t, 0.0, STR, (Array *) ap->sval); 1839 setptr(s, temp); 1840 if (*s++ == '\0') 1841 break; 1842 } 1843 } 1844 tempfree(ap); 1845 xfree(origs); 1846 xfree(origfs); 1847 x = gettemp(); 1848 x->tval = NUM; 1849 x->fval = n; 1850 return(x); 1851 } 1852 1853 Cell *condexpr(Node **a, int n) /* a[0] ? a[1] : a[2] */ 1854 { 1855 Cell *x; 1856 1857 x = execute(a[0]); 1858 if (istrue(x)) { 1859 tempfree(x); 1860 x = execute(a[1]); 1861 } else { 1862 tempfree(x); 1863 x = execute(a[2]); 1864 } 1865 return(x); 1866 } 1867 1868 Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */ 1869 { 1870 Cell *x; 1871 1872 x = execute(a[0]); 1873 if (istrue(x)) { 1874 tempfree(x); 1875 x = execute(a[1]); 1876 } else if (a[2] != NULL) { 1877 tempfree(x); 1878 x = execute(a[2]); 1879 } 1880 return(x); 1881 } 1882 1883 Cell *whilestat(Node **a, int n) /* while (a[0]) a[1] */ 1884 { 1885 Cell *x; 1886 1887 for (;;) { 1888 x = execute(a[0]); 1889 if (!istrue(x)) 1890 return(x); 1891 tempfree(x); 1892 x = execute(a[1]); 1893 if (isbreak(x)) { 1894 x = True; 1895 return(x); 1896 } 1897 if (isnext(x) || isexit(x) || isret(x)) 1898 return(x); 1899 tempfree(x); 1900 } 1901 } 1902 1903 Cell *dostat(Node **a, int n) /* do a[0]; while(a[1]) */ 1904 { 1905 Cell *x; 1906 1907 for (;;) { 1908 x = execute(a[0]); 1909 if (isbreak(x)) 1910 return True; 1911 if (isnext(x) || isexit(x) || isret(x)) 1912 return(x); 1913 tempfree(x); 1914 x = execute(a[1]); 1915 if (!istrue(x)) 1916 return(x); 1917 tempfree(x); 1918 } 1919 } 1920 1921 Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */ 1922 { 1923 Cell *x; 1924 1925 x = execute(a[0]); 1926 tempfree(x); 1927 for (;;) { 1928 if (a[1]!=NULL) { 1929 x = execute(a[1]); 1930 if (!istrue(x)) return(x); 1931 else tempfree(x); 1932 } 1933 x = execute(a[3]); 1934 if (isbreak(x)) /* turn off break */ 1935 return True; 1936 if (isnext(x) || isexit(x) || isret(x)) 1937 return(x); 1938 tempfree(x); 1939 x = execute(a[2]); 1940 tempfree(x); 1941 } 1942 } 1943 1944 Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */ 1945 { 1946 Cell *x, *vp, *arrayp, *cp, *ncp; 1947 Array *tp; 1948 int i; 1949 1950 vp = execute(a[0]); 1951 arrayp = execute(a[1]); 1952 if (!isarr(arrayp)) { 1953 return True; 1954 } 1955 tp = (Array *) arrayp->sval; 1956 tempfree(arrayp); 1957 for (i = 0; i < tp->size; i++) { /* this routine knows too much */ 1958 for (cp = tp->tab[i]; cp != NULL; cp = ncp) { 1959 setsval(vp, cp->nval); 1960 ncp = cp->cnext; 1961 x = execute(a[2]); 1962 if (isbreak(x)) { 1963 tempfree(vp); 1964 return True; 1965 } 1966 if (isnext(x) || isexit(x) || isret(x)) { 1967 tempfree(vp); 1968 return(x); 1969 } 1970 tempfree(x); 1971 } 1972 } 1973 return True; 1974 } 1975 1976 static char *nawk_convert(const char *s, int (*fun_c)(int), 1977 wint_t (*fun_wc)(wint_t)) 1978 { 1979 char *buf = NULL; 1980 char *pbuf = NULL; 1981 const char *ps = NULL; 1982 size_t n = 0; 1983 wchar_t wc; 1984 const size_t sz = awk_mb_cur_max; 1985 int unused; 1986 1987 if (sz == 1) { 1988 buf = tostring(s); 1989 1990 for (pbuf = buf; *pbuf; pbuf++) 1991 *pbuf = fun_c((uschar)*pbuf); 1992 1993 return buf; 1994 } else { 1995 /* upper/lower character may be shorter/longer */ 1996 buf = tostringN(s, strlen(s) * sz + 1); 1997 1998 (void) mbtowc(NULL, NULL, 0); /* reset internal state */ 1999 /* 2000 * Reset internal state here too. 2001 * Assign result to avoid a compiler warning. (Casting to void 2002 * doesn't work.) 2003 * Increment said variable to avoid a different warning. 2004 */ 2005 unused = wctomb(NULL, L'\0'); 2006 unused++; 2007 2008 ps = s; 2009 pbuf = buf; 2010 while (n = mbtowc(&wc, ps, sz), 2011 n > 0 && n != (size_t)-1 && n != (size_t)-2) 2012 { 2013 ps += n; 2014 2015 n = wctomb(pbuf, fun_wc(wc)); 2016 if (n == (size_t)-1) 2017 FATAL("illegal wide character %s", s); 2018 2019 pbuf += n; 2020 } 2021 2022 *pbuf = '\0'; 2023 2024 if (n) 2025 FATAL("illegal byte sequence %s", s); 2026 2027 return buf; 2028 } 2029 } 2030 2031 #ifdef __DJGPP__ 2032 static wint_t towupper(wint_t wc) 2033 { 2034 if (wc >= 0 && wc < 256) 2035 return toupper(wc & 0xFF); 2036 2037 return wc; 2038 } 2039 2040 static wint_t towlower(wint_t wc) 2041 { 2042 if (wc >= 0 && wc < 256) 2043 return tolower(wc & 0xFF); 2044 2045 return wc; 2046 } 2047 #endif 2048 2049 static char *nawk_toupper(const char *s) 2050 { 2051 return nawk_convert(s, toupper, towupper); 2052 } 2053 2054 static char *nawk_tolower(const char *s) 2055 { 2056 return nawk_convert(s, tolower, towlower); 2057 } 2058 2059 2060 2061 Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */ 2062 { 2063 Cell *x, *y; 2064 Awkfloat u = 0; 2065 int t, sz; 2066 Awkfloat tmp; 2067 char *buf, *fmt; 2068 Node *nextarg; 2069 FILE *fp; 2070 int status = 0; 2071 time_t tv; 2072 struct tm *tm, tmbuf; 2073 int estatus = 0; 2074 2075 t = ptoi(a[0]); 2076 x = execute(a[1]); 2077 nextarg = a[1]->nnext; 2078 switch (t) { 2079 case FLENGTH: 2080 if (isarr(x)) 2081 u = ((Array *) x->sval)->nelem; /* GROT. should be function*/ 2082 else 2083 u = u8_strlen(getsval(x)); 2084 break; 2085 case FLOG: 2086 errno = 0; 2087 u = errcheck(log(getfval(x)), "log"); 2088 break; 2089 case FINT: 2090 modf(getfval(x), &u); break; 2091 case FEXP: 2092 errno = 0; 2093 u = errcheck(exp(getfval(x)), "exp"); 2094 break; 2095 case FSQRT: 2096 errno = 0; 2097 u = errcheck(sqrt(getfval(x)), "sqrt"); 2098 break; 2099 case FSIN: 2100 u = sin(getfval(x)); break; 2101 case FCOS: 2102 u = cos(getfval(x)); break; 2103 case FATAN: 2104 if (nextarg == NULL) { 2105 WARNING("atan2 requires two arguments; returning 1.0"); 2106 u = 1.0; 2107 } else { 2108 y = execute(a[1]->nnext); 2109 u = atan2(getfval(x), getfval(y)); 2110 tempfree(y); 2111 nextarg = nextarg->nnext; 2112 } 2113 break; 2114 case FCOMPL: 2115 u = ~((int)getfval(x)); 2116 break; 2117 case FAND: 2118 if (nextarg == 0) { 2119 WARNING("and requires two arguments; returning 0"); 2120 u = 0; 2121 break; 2122 } 2123 y = execute(a[1]->nnext); 2124 u = ((int)getfval(x)) & ((int)getfval(y)); 2125 tempfree(y); 2126 nextarg = nextarg->nnext; 2127 break; 2128 case FFOR: 2129 if (nextarg == 0) { 2130 WARNING("or requires two arguments; returning 0"); 2131 u = 0; 2132 break; 2133 } 2134 y = execute(a[1]->nnext); 2135 u = ((int)getfval(x)) | ((int)getfval(y)); 2136 tempfree(y); 2137 nextarg = nextarg->nnext; 2138 break; 2139 case FXOR: 2140 if (nextarg == 0) { 2141 WARNING("xor requires two arguments; returning 0"); 2142 u = 0; 2143 break; 2144 } 2145 y = execute(a[1]->nnext); 2146 u = ((int)getfval(x)) ^ ((int)getfval(y)); 2147 tempfree(y); 2148 nextarg = nextarg->nnext; 2149 break; 2150 case FLSHIFT: 2151 if (nextarg == 0) { 2152 WARNING("lshift requires two arguments; returning 0"); 2153 u = 0; 2154 break; 2155 } 2156 y = execute(a[1]->nnext); 2157 u = ((int)getfval(x)) << ((int)getfval(y)); 2158 tempfree(y); 2159 nextarg = nextarg->nnext; 2160 break; 2161 case FRSHIFT: 2162 if (nextarg == 0) { 2163 WARNING("rshift requires two arguments; returning 0"); 2164 u = 0; 2165 break; 2166 } 2167 y = execute(a[1]->nnext); 2168 u = ((int)getfval(x)) >> ((int)getfval(y)); 2169 tempfree(y); 2170 nextarg = nextarg->nnext; 2171 break; 2172 case FSYSTEM: 2173 fflush(stdout); /* in case something is buffered already */ 2174 estatus = status = system(getsval(x)); 2175 if (status != -1) { 2176 if (WIFEXITED(status)) { 2177 estatus = WEXITSTATUS(status); 2178 } else if (WIFSIGNALED(status)) { 2179 estatus = WTERMSIG(status) + 256; 2180 #ifdef WCOREDUMP 2181 if (WCOREDUMP(status)) 2182 estatus += 256; 2183 #endif 2184 } else /* something else?!? */ 2185 estatus = 0; 2186 } 2187 /* else estatus was set to -1 */ 2188 u = estatus; 2189 break; 2190 case FRAND: 2191 /* random() returns numbers in [0..2^31-1] 2192 * in order to get a number in [0, 1), divide it by 2^31 2193 */ 2194 u = (Awkfloat) random() / (0x7fffffffL + 0x1UL); 2195 break; 2196 case FSRAND: 2197 if (isrec(x)) /* no argument provided */ 2198 u = time((time_t *)0); 2199 else 2200 u = getfval(x); 2201 tmp = u; 2202 srandom((unsigned long) u); 2203 u = srand_seed; 2204 srand_seed = tmp; 2205 break; 2206 case FTOUPPER: 2207 case FTOLOWER: 2208 if (t == FTOUPPER) 2209 buf = nawk_toupper(getsval(x)); 2210 else 2211 buf = nawk_tolower(getsval(x)); 2212 tempfree(x); 2213 x = gettemp(); 2214 setsval(x, buf); 2215 free(buf); 2216 return x; 2217 case FFLUSH: 2218 if (isrec(x) || strlen(getsval(x)) == 0) { 2219 flush_all(); /* fflush() or fflush("") -> all */ 2220 u = 0; 2221 } else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL) 2222 u = EOF; 2223 else 2224 u = fflush(fp); 2225 break; 2226 case FMKTIME: 2227 memset(&tmbuf, 0, sizeof(tmbuf)); 2228 tm = &tmbuf; 2229 t = sscanf(getsval(x), "%d %d %d %d %d %d %d", 2230 &tm->tm_year, &tm->tm_mon, &tm->tm_mday, &tm->tm_hour, 2231 &tm->tm_min, &tm->tm_sec, &tm->tm_isdst); 2232 switch (t) { 2233 case 6: 2234 tm->tm_isdst = -1; /* let mktime figure it out */ 2235 /* FALLTHROUGH */ 2236 case 7: 2237 tm->tm_year -= 1900; 2238 tm->tm_mon--; 2239 u = mktime(tm); 2240 break; 2241 default: 2242 u = -1; 2243 break; 2244 } 2245 break; 2246 case FSYSTIME: 2247 u = time((time_t *) 0); 2248 break; 2249 case FSTRFTIME: 2250 /* strftime([format [,timestamp]]) */ 2251 if (nextarg) { 2252 y = execute(nextarg); 2253 nextarg = nextarg->nnext; 2254 tv = (time_t) getfval(y); 2255 tempfree(y); 2256 } else 2257 tv = time((time_t *) 0); 2258 tm = localtime(&tv); 2259 if (tm == NULL) 2260 FATAL("bad time %ld", (long)tv); 2261 2262 if (isrec(x)) { 2263 /* format argument not provided, use default */ 2264 fmt = tostring("%a %b %d %H:%M:%S %Z %Y"); 2265 } else 2266 fmt = tostring(getsval(x)); 2267 2268 sz = 32; 2269 buf = NULL; 2270 do { 2271 if ((buf = realloc(buf, (sz *= 2))) == NULL) 2272 FATAL("out of memory in strftime"); 2273 } while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0'); 2274 2275 y = gettemp(); 2276 setsval(y, buf); 2277 free(fmt); 2278 free(buf); 2279 2280 return y; 2281 default: /* can't happen */ 2282 FATAL("illegal function type %d", t); 2283 break; 2284 } 2285 tempfree(x); 2286 x = gettemp(); 2287 setfval(x, u); 2288 if (nextarg != NULL) { 2289 WARNING("warning: function has too many arguments"); 2290 for ( ; nextarg; nextarg = nextarg->nnext) { 2291 y = execute(nextarg); 2292 tempfree(y); 2293 } 2294 } 2295 return(x); 2296 } 2297 2298 Cell *printstat(Node **a, int n) /* print a[0] */ 2299 { 2300 Node *x; 2301 Cell *y; 2302 FILE *fp; 2303 2304 if (a[1] == NULL) /* a[1] is redirection operator, a[2] is file */ 2305 fp = stdout; 2306 else 2307 fp = redirect(ptoi(a[1]), a[2]); 2308 for (x = a[0]; x != NULL; x = x->nnext) { 2309 y = execute(x); 2310 fputs(getpssval(y), fp); 2311 tempfree(y); 2312 if (x->nnext == NULL) 2313 fputs(getsval(orsloc), fp); 2314 else 2315 fputs(getsval(ofsloc), fp); 2316 } 2317 if (a[1] != NULL) 2318 fflush(fp); 2319 if (ferror(fp)) 2320 FATAL("write error on %s", filename(fp)); 2321 return(True); 2322 } 2323 2324 Cell *nullproc(Node **a, int n) 2325 { 2326 return 0; 2327 } 2328 2329 2330 FILE *redirect(int a, Node *b) /* set up all i/o redirections */ 2331 { 2332 FILE *fp; 2333 Cell *x; 2334 char *fname; 2335 2336 x = execute(b); 2337 fname = getsval(x); 2338 fp = openfile(a, fname, NULL); 2339 if (fp == NULL) 2340 FATAL("can't open file %s", fname); 2341 tempfree(x); 2342 return fp; 2343 } 2344 2345 struct files { 2346 FILE *fp; 2347 const char *fname; 2348 int mode; /* '|', 'a', 'w' => LE/LT, GT */ 2349 } *files; 2350 2351 size_t nfiles; 2352 2353 static void stdinit(void) /* in case stdin, etc., are not constants */ 2354 { 2355 nfiles = FOPEN_MAX; 2356 files = (struct files *) calloc(nfiles, sizeof(*files)); 2357 if (files == NULL) 2358 FATAL("can't allocate file memory for %zu files", nfiles); 2359 files[0].fp = stdin; 2360 files[0].fname = tostring("/dev/stdin"); 2361 files[0].mode = LT; 2362 files[1].fp = stdout; 2363 files[1].fname = tostring("/dev/stdout"); 2364 files[1].mode = GT; 2365 files[2].fp = stderr; 2366 files[2].fname = tostring("/dev/stderr"); 2367 files[2].mode = GT; 2368 } 2369 2370 FILE *openfile(int a, const char *us, bool *pnewflag) 2371 { 2372 const char *s = us; 2373 size_t i; 2374 int m; 2375 FILE *fp = NULL; 2376 2377 if (*s == '\0') 2378 FATAL("null file name in print or getline"); 2379 for (i = 0; i < nfiles; i++) 2380 if (files[i].fname && strcmp(s, files[i].fname) == 0 && 2381 (a == files[i].mode || (a==APPEND && files[i].mode==GT) || 2382 a == FFLUSH)) { 2383 if (pnewflag) 2384 *pnewflag = false; 2385 return files[i].fp; 2386 } 2387 if (a == FFLUSH) /* didn't find it, so don't create it! */ 2388 return NULL; 2389 2390 for (i = 0; i < nfiles; i++) 2391 if (files[i].fp == NULL) 2392 break; 2393 if (i >= nfiles) { 2394 struct files *nf; 2395 size_t nnf = nfiles + FOPEN_MAX; 2396 nf = (struct files *) realloc(files, nnf * sizeof(*nf)); 2397 if (nf == NULL) 2398 FATAL("cannot grow files for %s and %zu files", s, nnf); 2399 memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf)); 2400 nfiles = nnf; 2401 files = nf; 2402 } 2403 fflush(stdout); /* force a semblance of order */ 2404 m = a; 2405 if (a == GT) { 2406 fp = fopen(s, "w"); 2407 } else if (a == APPEND) { 2408 fp = fopen(s, "a"); 2409 m = GT; /* so can mix > and >> */ 2410 } else if (a == '|') { /* output pipe */ 2411 fp = popen(s, "w"); 2412 } else if (a == LE) { /* input pipe */ 2413 fp = popen(s, "r"); 2414 } else if (a == LT) { /* getline <file */ 2415 fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r"); /* "-" is stdin */ 2416 } else /* can't happen */ 2417 FATAL("illegal redirection %d", a); 2418 if (fp != NULL) { 2419 files[i].fname = tostring(s); 2420 files[i].fp = fp; 2421 files[i].mode = m; 2422 if (pnewflag) 2423 *pnewflag = true; 2424 if (fp != stdin && fp != stdout && fp != stderr) 2425 (void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC); 2426 } 2427 return fp; 2428 } 2429 2430 const char *filename(FILE *fp) 2431 { 2432 size_t i; 2433 2434 for (i = 0; i < nfiles; i++) 2435 if (fp == files[i].fp) 2436 return files[i].fname; 2437 return "???"; 2438 } 2439 2440 Cell *closefile(Node **a, int n) 2441 { 2442 Cell *x; 2443 size_t i; 2444 bool stat; 2445 2446 x = execute(a[0]); 2447 getsval(x); 2448 stat = true; 2449 for (i = 0; i < nfiles; i++) { 2450 if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0) 2451 continue; 2452 if (files[i].mode == GT || files[i].mode == '|') 2453 fflush(files[i].fp); 2454 if (ferror(files[i].fp)) { 2455 if ((files[i].mode == GT && files[i].fp != stderr) 2456 || files[i].mode == '|') 2457 FATAL("write error on %s", files[i].fname); 2458 else 2459 WARNING("i/o error occurred on %s", files[i].fname); 2460 } 2461 if (files[i].fp == stdin || files[i].fp == stdout || 2462 files[i].fp == stderr) 2463 stat = freopen("/dev/null", "r+", files[i].fp) == NULL; 2464 else if (files[i].mode == '|' || files[i].mode == LE) 2465 stat = pclose(files[i].fp) == -1; 2466 else 2467 stat = fclose(files[i].fp) == EOF; 2468 if (stat) 2469 WARNING("i/o error occurred closing %s", files[i].fname); 2470 xfree(files[i].fname); 2471 files[i].fname = NULL; /* watch out for ref thru this */ 2472 files[i].fp = NULL; 2473 break; 2474 } 2475 tempfree(x); 2476 x = gettemp(); 2477 setfval(x, (Awkfloat) (stat ? -1 : 0)); 2478 return(x); 2479 } 2480 2481 void closeall(void) 2482 { 2483 size_t i; 2484 bool stat = false; 2485 2486 for (i = 0; i < nfiles; i++) { 2487 if (! files[i].fp) 2488 continue; 2489 if (files[i].mode == GT || files[i].mode == '|') 2490 fflush(files[i].fp); 2491 if (ferror(files[i].fp)) { 2492 if ((files[i].mode == GT && files[i].fp != stderr) 2493 || files[i].mode == '|') 2494 FATAL("write error on %s", files[i].fname); 2495 else 2496 WARNING("i/o error occurred on %s", files[i].fname); 2497 } 2498 if (files[i].fp == stdin || files[i].fp == stdout || 2499 files[i].fp == stderr) 2500 continue; 2501 if (files[i].mode == '|' || files[i].mode == LE) 2502 stat = pclose(files[i].fp) == -1; 2503 else 2504 stat = fclose(files[i].fp) == EOF; 2505 if (stat) 2506 WARNING("i/o error occurred while closing %s", files[i].fname); 2507 } 2508 } 2509 2510 static void flush_all(void) 2511 { 2512 size_t i; 2513 2514 for (i = 0; i < nfiles; i++) 2515 if (files[i].fp) 2516 fflush(files[i].fp); 2517 } 2518 2519 void backsub(char **pb_ptr, const char **sptr_ptr); 2520 2521 Cell *dosub(Node **a, int subop) /* sub and gsub */ 2522 { 2523 fa *pfa; 2524 int tempstat = 0; 2525 char *repl; 2526 Cell *x; 2527 2528 char *buf = NULL; 2529 char *pb = NULL; 2530 int bufsz = recsize; 2531 2532 const char *r, *s; 2533 const char *start; 2534 const char *noempty = NULL; /* empty match disallowed here */ 2535 size_t m = 0; /* match count */ 2536 size_t whichm = 0; /* which match to select, 0 = global */ 2537 int mtype; /* match type */ 2538 2539 if (a[0] == NULL) { /* 0 => a[1] is already-compiled regexpr */ 2540 pfa = (fa *) a[1]; 2541 } else { 2542 x = execute(a[1]); 2543 pfa = makedfa(getsval(x), 1); 2544 tempfree(x); 2545 } 2546 2547 x = execute(a[2]); /* replacement string */ 2548 repl = tostring(getsval(x)); 2549 tempfree(x); 2550 2551 switch (subop) { 2552 case SUB: 2553 whichm = 1; 2554 x = execute(a[3]); /* source string */ 2555 break; 2556 case GSUB: 2557 whichm = 0; 2558 x = execute(a[3]); /* source string */ 2559 break; 2560 default: 2561 FATAL("dosub: unrecognized subop: %d", subop); 2562 } 2563 2564 start = getsval(x); 2565 while (pmatch(pfa, start)) { 2566 if (buf == NULL) { 2567 if ((pb = buf = (char *) malloc(bufsz)) == NULL) 2568 FATAL("out of memory in dosub"); 2569 tempstat = pfa->initstat; 2570 pfa->initstat = 2; 2571 } 2572 2573 /* match types */ 2574 #define MT_IGNORE 0 /* unselected or invalid */ 2575 #define MT_INSERT 1 /* selected, empty */ 2576 #define MT_REPLACE 2 /* selected, not empty */ 2577 2578 /* an empty match just after replacement is invalid */ 2579 2580 if (patbeg == noempty && patlen == 0) { 2581 mtype = MT_IGNORE; /* invalid, not counted */ 2582 } else if (whichm == ++m || whichm == 0) { 2583 mtype = patlen ? MT_REPLACE : MT_INSERT; 2584 } else { 2585 mtype = MT_IGNORE; /* unselected, but counted */ 2586 } 2587 2588 /* leading text: */ 2589 if (patbeg > start) { 2590 adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - start), 2591 recsize, &pb, "dosub"); 2592 s = start; 2593 while (s < patbeg) 2594 *pb++ = *s++; 2595 } 2596 2597 if (mtype == MT_IGNORE) 2598 goto matching_text; /* skip replacement text */ 2599 2600 r = repl; 2601 while (*r != 0) { 2602 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "dosub"); 2603 if (*r == '\\') { 2604 backsub(&pb, &r); 2605 } else if (*r == '&') { 2606 r++; 2607 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, 2608 &pb, "dosub"); 2609 for (s = patbeg; s < patbeg+patlen; ) 2610 *pb++ = *s++; 2611 } else { 2612 *pb++ = *r++; 2613 } 2614 } 2615 2616 matching_text: 2617 if (mtype == MT_REPLACE || *patbeg == '\0') 2618 goto next_search; /* skip matching text */ 2619 2620 if (patlen == 0) 2621 patlen = u8_nextlen(patbeg); 2622 adjbuf(&buf, &bufsz, (pb-buf) + patlen, recsize, &pb, "dosub"); 2623 s = patbeg; 2624 while (s < patbeg + patlen) 2625 *pb++ = *s++; 2626 2627 next_search: 2628 start = patbeg + patlen; 2629 if (m == whichm || *patbeg == '\0') 2630 break; 2631 if (mtype == MT_REPLACE) 2632 noempty = start; 2633 2634 #undef MT_IGNORE 2635 #undef MT_INSERT 2636 #undef MT_REPLACE 2637 } 2638 2639 xfree(repl); 2640 2641 if (buf != NULL) { 2642 pfa->initstat = tempstat; 2643 2644 /* trailing text */ 2645 adjbuf(&buf, &bufsz, 1+strlen(start)+pb-buf, 0, &pb, "dosub"); 2646 while ((*pb++ = *start++) != '\0') 2647 ; 2648 2649 setsval(x, buf); 2650 free(buf); 2651 } 2652 2653 tempfree(x); 2654 x = gettemp(); 2655 x->tval = NUM; 2656 x->fval = m; 2657 return x; 2658 } 2659 2660 Cell *gensub(Node **a, int nnn) /* global selective substitute */ 2661 /* XXX incomplete - doesn't support backreferences \0 ... \9 */ 2662 { 2663 Cell *x, *y, *res, *h; 2664 char *rptr; 2665 const char *sptr; 2666 char *buf, *pb; 2667 const char *t, *q; 2668 fa *pfa; 2669 int mflag, tempstat, num, whichm; 2670 int bufsz = recsize; 2671 2672 if ((buf = malloc(bufsz)) == NULL) 2673 FATAL("out of memory in gensub"); 2674 mflag = 0; /* if mflag == 0, can replace empty string */ 2675 num = 0; 2676 x = execute(a[4]); /* source string */ 2677 t = getsval(x); 2678 res = copycell(x); /* target string - initially copy of source */ 2679 res->csub = CTEMP; /* result values are temporary */ 2680 if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */ 2681 pfa = (fa *) a[1]; /* regular expression */ 2682 else { 2683 y = execute(a[1]); 2684 pfa = makedfa(getsval(y), 1); 2685 tempfree(y); 2686 } 2687 y = execute(a[2]); /* replacement string */ 2688 h = execute(a[3]); /* which matches should be replaced */ 2689 sptr = getsval(h); 2690 if (sptr[0] == 'g' || sptr[0] == 'G') 2691 whichm = -1; 2692 else { 2693 /* 2694 * The specified number is index of replacement, starting 2695 * from 1. GNU awk treats index lower than 0 same as 2696 * 1, we do same for compatibility. 2697 */ 2698 whichm = (int) getfval(h) - 1; 2699 if (whichm < 0) 2700 whichm = 0; 2701 } 2702 tempfree(h); 2703 2704 if (pmatch(pfa, t)) { 2705 char *sl; 2706 2707 tempstat = pfa->initstat; 2708 pfa->initstat = 2; 2709 pb = buf; 2710 rptr = getsval(y); 2711 /* 2712 * XXX if there are any backreferences in subst string, 2713 * complain now. 2714 */ 2715 for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) { 2716 if (strchr("0123456789", sl[1])) { 2717 FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr); 2718 } 2719 } 2720 2721 do { 2722 if (whichm >= 0 && whichm != num) { 2723 num++; 2724 adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub"); 2725 2726 /* copy the part of string up to and including 2727 * match to output buffer */ 2728 while (t < patbeg + patlen) 2729 *pb++ = *t++; 2730 continue; 2731 } 2732 2733 if (patlen == 0 && *patbeg != 0) { /* matched empty string */ 2734 if (mflag == 0) { /* can replace empty */ 2735 num++; 2736 sptr = rptr; 2737 while (*sptr != 0) { 2738 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); 2739 if (*sptr == '\\') { 2740 backsub(&pb, &sptr); 2741 } else if (*sptr == '&') { 2742 sptr++; 2743 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); 2744 for (q = patbeg; q < patbeg+patlen; ) 2745 *pb++ = *q++; 2746 } else 2747 *pb++ = *sptr++; 2748 } 2749 } 2750 if (*t == 0) /* at end */ 2751 goto done; 2752 adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub"); 2753 *pb++ = *t++; 2754 if (pb > buf + bufsz) /* BUG: not sure of this test */ 2755 FATAL("gensub result0 %.30s too big; can't happen", buf); 2756 mflag = 0; 2757 } 2758 else { /* matched nonempty string */ 2759 num++; 2760 sptr = t; 2761 adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub"); 2762 while (sptr < patbeg) 2763 *pb++ = *sptr++; 2764 sptr = rptr; 2765 while (*sptr != 0) { 2766 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); 2767 if (*sptr == '\\') { 2768 backsub(&pb, &sptr); 2769 } else if (*sptr == '&') { 2770 sptr++; 2771 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); 2772 for (q = patbeg; q < patbeg+patlen; ) 2773 *pb++ = *q++; 2774 } else 2775 *pb++ = *sptr++; 2776 } 2777 t = patbeg + patlen; 2778 if (patlen == 0 || *t == 0 || *(t-1) == 0) 2779 goto done; 2780 if (pb > buf + bufsz) 2781 FATAL("gensub result1 %.30s too big; can't happen", buf); 2782 mflag = 1; 2783 } 2784 } while (pmatch(pfa,t)); 2785 sptr = t; 2786 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub"); 2787 while ((*pb++ = *sptr++) != 0) 2788 ; 2789 done: if (pb > buf + bufsz) 2790 FATAL("gensub result2 %.30s too big; can't happen", buf); 2791 *pb = '\0'; 2792 setsval(res, buf); 2793 pfa->initstat = tempstat; 2794 } 2795 tempfree(x); 2796 tempfree(y); 2797 free(buf); 2798 return(res); 2799 } 2800 2801 void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */ 2802 { /* sptr[0] == '\\' */ 2803 char *pb = *pb_ptr; 2804 const char *sptr = *sptr_ptr; 2805 static bool first = true; 2806 static bool do_posix = false; 2807 2808 if (first) { 2809 first = false; 2810 do_posix = (getenv("POSIXLY_CORRECT") != NULL); 2811 } 2812 2813 if (sptr[1] == '\\') { 2814 if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */ 2815 *pb++ = '\\'; 2816 *pb++ = '&'; 2817 sptr += 4; 2818 } else if (sptr[2] == '&') { /* \\& -> \ + matched */ 2819 *pb++ = '\\'; 2820 sptr += 2; 2821 } else if (do_posix) { /* \\x -> \x */ 2822 sptr++; 2823 *pb++ = *sptr++; 2824 } else { /* \\x -> \\x */ 2825 *pb++ = *sptr++; 2826 *pb++ = *sptr++; 2827 } 2828 } else if (sptr[1] == '&') { /* literal & */ 2829 sptr++; 2830 *pb++ = *sptr++; 2831 } else /* literal \ */ 2832 *pb++ = *sptr++; 2833 2834 *pb_ptr = pb; 2835 *sptr_ptr = sptr; 2836 } 2837 2838 static char *wide_char_to_byte_str(int rune, size_t *outlen) 2839 { 2840 static char buf[5]; 2841 int len; 2842 2843 if (rune < 0 || rune > 0x10FFFF) 2844 return NULL; 2845 2846 memset(buf, 0, sizeof(buf)); 2847 2848 len = 0; 2849 if (rune <= 0x0000007F) { 2850 buf[len++] = rune; 2851 } else if (rune <= 0x000007FF) { 2852 // 110xxxxx 10xxxxxx 2853 buf[len++] = 0xC0 | (rune >> 6); 2854 buf[len++] = 0x80 | (rune & 0x3F); 2855 } else if (rune <= 0x0000FFFF) { 2856 // 1110xxxx 10xxxxxx 10xxxxxx 2857 buf[len++] = 0xE0 | (rune >> 12); 2858 buf[len++] = 0x80 | ((rune >> 6) & 0x3F); 2859 buf[len++] = 0x80 | (rune & 0x3F); 2860 2861 } else { 2862 // 0x00010000 - 0x10FFFF 2863 // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 2864 buf[len++] = 0xF0 | (rune >> 18); 2865 buf[len++] = 0x80 | ((rune >> 12) & 0x3F); 2866 buf[len++] = 0x80 | ((rune >> 6) & 0x3F); 2867 buf[len++] = 0x80 | (rune & 0x3F); 2868 } 2869 2870 *outlen = len; 2871 buf[len++] = '\0'; 2872 2873 return buf; 2874 } 2875