1 /**************************************************************** 2 Copyright (C) Lucent Technologies 1997 3 All Rights Reserved 4 5 Permission to use, copy, modify, and distribute this software and 6 its documentation for any purpose and without fee is hereby 7 granted, provided that the above copyright notice appear in all 8 copies and that both that the copyright notice and this 9 permission notice and warranty disclaimer appear in supporting 10 documentation, and that the name Lucent Technologies or any of 11 its entities not be used in advertising or publicity pertaining 12 to distribution of the software without specific, written prior 13 permission. 14 15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 22 THIS SOFTWARE. 23 ****************************************************************/ 24 25 #define DEBUG 26 #include <stdio.h> 27 #include <ctype.h> 28 #include <errno.h> 29 #include <wctype.h> 30 #include <fcntl.h> 31 #include <setjmp.h> 32 #include <limits.h> 33 #include <math.h> 34 #include <string.h> 35 #include <stdlib.h> 36 #include <time.h> 37 #include <sys/types.h> 38 #include <sys/wait.h> 39 #include "awk.h" 40 #include "awkgram.tab.h" 41 42 43 static void stdinit(void); 44 static void flush_all(void); 45 static char *wide_char_to_byte_str(int rune, size_t *outlen); 46 47 #if 1 48 #define tempfree(x) do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0) 49 #else 50 void tempfree(Cell *p) { 51 if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) { 52 WARNING("bad csub %d in Cell %d %s", 53 p->csub, p->ctype, p->sval); 54 } 55 if (istemp(p)) 56 tfree(p); 57 } 58 #endif 59 60 /* do we really need these? */ 61 /* #ifdef _NFILE */ 62 /* #ifndef FOPEN_MAX */ 63 /* #define FOPEN_MAX _NFILE */ 64 /* #endif */ 65 /* #endif */ 66 /* */ 67 /* #ifndef FOPEN_MAX */ 68 /* #define FOPEN_MAX 40 */ /* max number of open files */ 69 /* #endif */ 70 /* */ 71 /* #ifndef RAND_MAX */ 72 /* #define RAND_MAX 32767 */ /* all that ansi guarantees */ 73 /* #endif */ 74 75 jmp_buf env; 76 extern int pairstack[]; 77 extern Awkfloat srand_seed; 78 79 Node *winner = NULL; /* root of parse tree */ 80 Cell *tmps; /* free temporary cells for execution */ 81 82 static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL, NULL }; 83 Cell *True = &truecell; 84 static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL, NULL }; 85 Cell *False = &falsecell; 86 static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL, NULL }; 87 Cell *jbreak = &breakcell; 88 static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL, NULL }; 89 Cell *jcont = &contcell; 90 static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL, NULL }; 91 Cell *jnext = &nextcell; 92 static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL, NULL }; 93 Cell *jnextfile = &nextfilecell; 94 static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL, NULL }; 95 Cell *jexit = &exitcell; 96 static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL, NULL }; 97 Cell *jret = &retcell; 98 static Cell tempcell ={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL }; 99 100 Node *curnode = NULL; /* the node being executed, for debugging */ 101 102 /* buffer memory management */ 103 int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr, 104 const char *whatrtn) 105 /* pbuf: address of pointer to buffer being managed 106 * psiz: address of buffer size variable 107 * minlen: minimum length of buffer needed 108 * quantum: buffer size quantum 109 * pbptr: address of movable pointer into buffer, or 0 if none 110 * whatrtn: name of the calling routine if failure should cause fatal error 111 * 112 * return 0 for realloc failure, !=0 for success 113 */ 114 { 115 if (minlen > *psiz) { 116 char *tbuf; 117 int rminlen = quantum ? minlen % quantum : 0; 118 int boff = pbptr ? *pbptr - *pbuf : 0; 119 /* round up to next multiple of quantum */ 120 if (rminlen) 121 minlen += quantum - rminlen; 122 tbuf = (char *) realloc(*pbuf, minlen); 123 DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void*)*pbuf, (void*)tbuf); 124 if (tbuf == NULL) { 125 if (whatrtn) 126 FATAL("out of memory in %s", whatrtn); 127 return 0; 128 } 129 *pbuf = tbuf; 130 *psiz = minlen; 131 if (pbptr) 132 *pbptr = tbuf + boff; 133 } 134 return 1; 135 } 136 137 void run(Node *a) /* execution of parse tree starts here */ 138 { 139 140 stdinit(); 141 execute(a); 142 closeall(); 143 } 144 145 Cell *execute(Node *u) /* execute a node of the parse tree */ 146 { 147 Cell *(*proc)(Node **, int); 148 Cell *x; 149 Node *a; 150 151 if (u == NULL) 152 return(True); 153 for (a = u; ; a = a->nnext) { 154 curnode = a; 155 if (isvalue(a)) { 156 x = (Cell *) (a->narg[0]); 157 if (isfld(x) && !donefld) 158 fldbld(); 159 else if (isrec(x) && !donerec) 160 recbld(); 161 return(x); 162 } 163 if (notlegal(a->nobj)) /* probably a Cell* but too risky to print */ 164 FATAL("illegal statement"); 165 proc = proctab[a->nobj-FIRSTTOKEN]; 166 x = (*proc)(a->narg, a->nobj); 167 if (isfld(x) && !donefld) 168 fldbld(); 169 else if (isrec(x) && !donerec) 170 recbld(); 171 if (isexpr(a)) 172 return(x); 173 if (isjump(x)) 174 return(x); 175 if (a->nnext == NULL) 176 return(x); 177 tempfree(x); 178 } 179 } 180 181 182 Cell *program(Node **a, int n) /* execute an awk program */ 183 { /* a[0] = BEGIN, a[1] = body, a[2] = END */ 184 Cell *x; 185 186 if (setjmp(env) != 0) 187 goto ex; 188 if (a[0]) { /* BEGIN */ 189 x = execute(a[0]); 190 if (isexit(x)) 191 return(True); 192 if (isjump(x)) 193 FATAL("illegal break, continue, next or nextfile from BEGIN"); 194 tempfree(x); 195 } 196 if (a[1] || a[2]) 197 while (getrec(&record, &recsize, true) > 0) { 198 x = execute(a[1]); 199 if (isexit(x)) 200 break; 201 tempfree(x); 202 } 203 ex: 204 if (setjmp(env) != 0) /* handles exit within END */ 205 goto ex1; 206 if (a[2]) { /* END */ 207 x = execute(a[2]); 208 if (isbreak(x) || isnext(x) || iscont(x)) 209 FATAL("illegal break, continue, next or nextfile from END"); 210 tempfree(x); 211 } 212 ex1: 213 return(True); 214 } 215 216 struct Frame { /* stack frame for awk function calls */ 217 int nargs; /* number of arguments in this call */ 218 Cell *fcncell; /* pointer to Cell for function */ 219 Cell **args; /* pointer to array of arguments after execute */ 220 Cell *retval; /* return value */ 221 }; 222 223 #define NARGS 50 /* max args in a call */ 224 225 struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */ 226 int nframe = 0; /* number of frames allocated */ 227 struct Frame *frp = NULL; /* frame pointer. bottom level unused */ 228 229 Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ 230 { 231 static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL }; 232 int i, ncall, ndef; 233 int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */ 234 Node *x; 235 Cell *args[NARGS], *oargs[NARGS]; /* BUG: fixed size arrays */ 236 Cell *y, *z, *fcn; 237 char *s; 238 239 fcn = execute(a[0]); /* the function itself */ 240 s = fcn->nval; 241 if (!isfcn(fcn)) 242 FATAL("calling undefined function %s", s); 243 if (frame == NULL) { 244 frp = frame = (struct Frame *) calloc(nframe += 100, sizeof(*frame)); 245 if (frame == NULL) 246 FATAL("out of space for stack frames calling %s", s); 247 } 248 for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */ 249 ncall++; 250 ndef = (int) fcn->fval; /* args in defn */ 251 DPRINTF("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame)); 252 if (ncall > ndef) 253 WARNING("function %s called with %d args, uses only %d", 254 s, ncall, ndef); 255 if (ncall + ndef > NARGS) 256 FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS); 257 for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */ 258 DPRINTF("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame)); 259 y = execute(x); 260 oargs[i] = y; 261 DPRINTF("args[%d]: %s %f <%s>, t=%o\n", 262 i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval); 263 if (isfcn(y)) 264 FATAL("can't use function %s as argument in %s", y->nval, s); 265 if (isarr(y)) 266 args[i] = y; /* arrays by ref */ 267 else 268 args[i] = copycell(y); 269 tempfree(y); 270 } 271 for ( ; i < ndef; i++) { /* add null args for ones not provided */ 272 args[i] = gettemp(); 273 *args[i] = newcopycell; 274 } 275 frp++; /* now ok to up frame */ 276 if (frp >= frame + nframe) { 277 int dfp = frp - frame; /* old index */ 278 frame = (struct Frame *) realloc(frame, (nframe += 100) * sizeof(*frame)); 279 if (frame == NULL) 280 FATAL("out of space for stack frames in %s", s); 281 frp = frame + dfp; 282 } 283 frp->fcncell = fcn; 284 frp->args = args; 285 frp->nargs = ndef; /* number defined with (excess are locals) */ 286 frp->retval = gettemp(); 287 288 DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame)); 289 y = execute((Node *)(fcn->sval)); /* execute body */ 290 DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame)); 291 292 for (i = 0; i < ndef; i++) { 293 Cell *t = frp->args[i]; 294 if (isarr(t)) { 295 if (t->csub == CCOPY) { 296 if (i >= ncall) { 297 freesymtab(t); 298 t->csub = CTEMP; 299 tempfree(t); 300 } else { 301 oargs[i]->tval = t->tval; 302 oargs[i]->tval &= ~(STR|NUM|DONTFREE); 303 oargs[i]->sval = t->sval; 304 tempfree(t); 305 } 306 } 307 } else if (t != y) { /* kludge to prevent freeing twice */ 308 t->csub = CTEMP; 309 tempfree(t); 310 } else if (t == y && t->csub == CCOPY) { 311 t->csub = CTEMP; 312 tempfree(t); 313 freed = 1; 314 } 315 } 316 tempfree(fcn); 317 if (isexit(y) || isnext(y)) 318 return y; 319 if (freed == 0) { 320 tempfree(y); /* don't free twice! */ 321 } 322 z = frp->retval; /* return value */ 323 DPRINTF("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval); 324 frp--; 325 return(z); 326 } 327 328 Cell *copycell(Cell *x) /* make a copy of a cell in a temp */ 329 { 330 Cell *y; 331 332 /* copy is not constant or field */ 333 334 y = gettemp(); 335 y->tval = x->tval & ~(CON|FLD|REC); 336 y->csub = CCOPY; /* prevents freeing until call is over */ 337 y->nval = x->nval; /* BUG? */ 338 if (isstr(x) /* || x->ctype == OCELL */) { 339 y->sval = tostring(x->sval); 340 y->tval &= ~DONTFREE; 341 } else 342 y->tval |= DONTFREE; 343 y->fval = x->fval; 344 return y; 345 } 346 347 Cell *arg(Node **a, int n) /* nth argument of a function */ 348 { 349 350 n = ptoi(a[0]); /* argument number, counting from 0 */ 351 DPRINTF("arg(%d), frp->nargs=%d\n", n, frp->nargs); 352 if (n+1 > frp->nargs) 353 FATAL("argument #%d of function %s was not supplied", 354 n+1, frp->fcncell->nval); 355 return frp->args[n]; 356 } 357 358 Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */ 359 { 360 Cell *y; 361 362 switch (n) { 363 case EXIT: 364 if (a[0] != NULL) { 365 y = execute(a[0]); 366 errorflag = (int) getfval(y); 367 tempfree(y); 368 } 369 longjmp(env, 1); 370 case RETURN: 371 if (a[0] != NULL) { 372 y = execute(a[0]); 373 if ((y->tval & (STR|NUM)) == (STR|NUM)) { 374 setsval(frp->retval, getsval(y)); 375 frp->retval->fval = getfval(y); 376 frp->retval->tval |= NUM; 377 } 378 else if (y->tval & STR) 379 setsval(frp->retval, getsval(y)); 380 else if (y->tval & NUM) 381 setfval(frp->retval, getfval(y)); 382 else /* can't happen */ 383 FATAL("bad type variable %d", y->tval); 384 tempfree(y); 385 } 386 return(jret); 387 case NEXT: 388 return(jnext); 389 case NEXTFILE: 390 nextfile(); 391 return(jnextfile); 392 case BREAK: 393 return(jbreak); 394 case CONTINUE: 395 return(jcont); 396 default: /* can't happen */ 397 FATAL("illegal jump type %d", n); 398 } 399 return 0; /* not reached */ 400 } 401 402 Cell *awkgetline(Node **a, int n) /* get next line from specific input */ 403 { /* a[0] is variable, a[1] is operator, a[2] is filename */ 404 Cell *r, *x; 405 extern Cell **fldtab; 406 FILE *fp; 407 char *buf; 408 int bufsize = recsize; 409 int mode; 410 bool newflag; 411 double result; 412 413 if ((buf = (char *) malloc(bufsize)) == NULL) 414 FATAL("out of memory in getline"); 415 416 fflush(stdout); /* in case someone is waiting for a prompt */ 417 r = gettemp(); 418 if (a[1] != NULL) { /* getline < file */ 419 x = execute(a[2]); /* filename */ 420 mode = ptoi(a[1]); 421 if (mode == '|') /* input pipe */ 422 mode = LE; /* arbitrary flag */ 423 fp = openfile(mode, getsval(x), &newflag); 424 tempfree(x); 425 if (fp == NULL) 426 n = -1; 427 else 428 n = readrec(&buf, &bufsize, fp, newflag); 429 if (n <= 0) { 430 ; 431 } else if (a[0] != NULL) { /* getline var <file */ 432 x = execute(a[0]); 433 setsval(x, buf); 434 if (is_number(x->sval, & result)) { 435 x->fval = result; 436 x->tval |= NUM; 437 } 438 tempfree(x); 439 } else { /* getline <file */ 440 setsval(fldtab[0], buf); 441 if (is_number(fldtab[0]->sval, & result)) { 442 fldtab[0]->fval = result; 443 fldtab[0]->tval |= NUM; 444 } 445 } 446 } else { /* bare getline; use current input */ 447 if (a[0] == NULL) /* getline */ 448 n = getrec(&record, &recsize, true); 449 else { /* getline var */ 450 n = getrec(&buf, &bufsize, false); 451 if (n > 0) { 452 x = execute(a[0]); 453 setsval(x, buf); 454 if (is_number(x->sval, & result)) { 455 x->fval = result; 456 x->tval |= NUM; 457 } 458 tempfree(x); 459 } 460 } 461 } 462 setfval(r, (Awkfloat) n); 463 free(buf); 464 return r; 465 } 466 467 Cell *getnf(Node **a, int n) /* get NF */ 468 { 469 if (!donefld) 470 fldbld(); 471 return (Cell *) a[0]; 472 } 473 474 static char * 475 makearraystring(Node *p, const char *func) 476 { 477 char *buf; 478 int bufsz = recsize; 479 size_t blen; 480 481 if ((buf = (char *) malloc(bufsz)) == NULL) { 482 FATAL("%s: out of memory", func); 483 } 484 485 blen = 0; 486 buf[blen] = '\0'; 487 488 for (; p; p = p->nnext) { 489 Cell *x = execute(p); /* expr */ 490 char *s = getsval(x); 491 size_t seplen = strlen(getsval(subseploc)); 492 size_t nsub = p->nnext ? seplen : 0; 493 size_t slen = strlen(s); 494 size_t tlen = blen + slen + nsub; 495 496 if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) { 497 FATAL("%s: out of memory %s[%s...]", 498 func, x->nval, buf); 499 } 500 memcpy(buf + blen, s, slen); 501 if (nsub) { 502 memcpy(buf + blen + slen, *SUBSEP, nsub); 503 } 504 buf[tlen] = '\0'; 505 blen = tlen; 506 tempfree(x); 507 } 508 return buf; 509 } 510 511 Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ 512 { 513 Cell *x, *z; 514 char *buf; 515 516 x = execute(a[0]); /* Cell* for symbol table */ 517 buf = makearraystring(a[1], __func__); 518 if (!isarr(x)) { 519 DPRINTF("making %s into an array\n", NN(x->nval)); 520 if (freeable(x)) 521 xfree(x->sval); 522 x->tval &= ~(STR|NUM|DONTFREE); 523 x->tval |= ARR; 524 x->sval = (char *) makesymtab(NSYMTAB); 525 } 526 z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval); 527 z->ctype = OCELL; 528 z->csub = CVAR; 529 tempfree(x); 530 free(buf); 531 return(z); 532 } 533 534 Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ 535 { 536 Cell *x; 537 538 x = execute(a[0]); /* Cell* for symbol table */ 539 if (x == symtabloc) { 540 FATAL("cannot delete SYMTAB or its elements"); 541 } 542 if (!isarr(x)) 543 return True; 544 if (a[1] == NULL) { /* delete the elements, not the table */ 545 freesymtab(x); 546 x->tval &= ~STR; 547 x->tval |= ARR; 548 x->sval = (char *) makesymtab(NSYMTAB); 549 } else { 550 char *buf = makearraystring(a[1], __func__); 551 freeelem(x, buf); 552 free(buf); 553 } 554 tempfree(x); 555 return True; 556 } 557 558 Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */ 559 { 560 Cell *ap, *k; 561 char *buf; 562 563 ap = execute(a[1]); /* array name */ 564 if (!isarr(ap)) { 565 DPRINTF("making %s into an array\n", ap->nval); 566 if (freeable(ap)) 567 xfree(ap->sval); 568 ap->tval &= ~(STR|NUM|DONTFREE); 569 ap->tval |= ARR; 570 ap->sval = (char *) makesymtab(NSYMTAB); 571 } 572 buf = makearraystring(a[0], __func__); 573 k = lookup(buf, (Array *) ap->sval); 574 tempfree(ap); 575 free(buf); 576 if (k == NULL) 577 return(False); 578 else 579 return(True); 580 } 581 582 583 /* ======== utf-8 code ========== */ 584 585 /* 586 * Awk strings can contain ascii, random 8-bit items (eg Latin-1), 587 * or utf-8. u8_isutf tests whether a string starts with a valid 588 * utf-8 sequence, and returns 0 if not (e.g., high bit set). 589 * u8_nextlen returns length of next valid sequence, which is 590 * 1 for ascii, 2..4 for utf-8, or 1 for high bit non-utf. 591 * u8_strlen returns length of string in valid utf-8 sequences 592 * and/or high-bit bytes. Conversion functions go between byte 593 * number and character number. 594 * 595 * In theory, this behaves the same as before for non-utf8 bytes. 596 * 597 * Limited checking! This is a potential security hole. 598 */ 599 600 /* is s the beginning of a valid utf-8 string? */ 601 /* return length 1..4 if yes, 0 if no */ 602 int u8_isutf(const char *s) 603 { 604 int n, ret; 605 unsigned char c; 606 607 c = s[0]; 608 if (c < 128 || awk_mb_cur_max == 1) 609 return 1; /* what if it's 0? */ 610 611 n = strlen(s); 612 if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) { 613 ret = 2; /* 110xxxxx 10xxxxxx */ 614 } else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80 615 && (s[2] & 0xC0) == 0x80) { 616 ret = 3; /* 1110xxxx 10xxxxxx 10xxxxxx */ 617 } else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80 618 && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) { 619 ret = 4; /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ 620 } else { 621 ret = 0; 622 } 623 return ret; 624 } 625 626 /* Convert (prefix of) utf8 string to utf-32 rune. */ 627 /* Sets *rune to the value, returns the length. */ 628 /* No error checking: watch out. */ 629 int u8_rune(int *rune, const char *s) 630 { 631 int n, ret; 632 unsigned char c; 633 634 c = s[0]; 635 if (c < 128 || awk_mb_cur_max == 1) { 636 *rune = c; 637 return 1; 638 } 639 640 n = strlen(s); 641 if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) { 642 *rune = ((c & 0x1F) << 6) | (s[1] & 0x3F); /* 110xxxxx 10xxxxxx */ 643 ret = 2; 644 } else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80 645 && (s[2] & 0xC0) == 0x80) { 646 *rune = ((c & 0xF) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F); 647 /* 1110xxxx 10xxxxxx 10xxxxxx */ 648 ret = 3; 649 } else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80 650 && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) { 651 *rune = ((c & 0x7) << 18) | ((s[1] & 0x3F) << 12) | ((s[2] & 0x3F) << 6) | (s[3] & 0x3F); 652 /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ 653 ret = 4; 654 } else { 655 *rune = c; 656 ret = 1; 657 } 658 return ret; /* returns one byte if sequence doesn't look like utf */ 659 } 660 661 /* return length of next sequence: 1 for ascii or random, 2..4 for valid utf8 */ 662 int u8_nextlen(const char *s) 663 { 664 int len; 665 666 len = u8_isutf(s); 667 if (len == 0) 668 len = 1; 669 return len; 670 } 671 672 /* return number of utf characters or single non-utf bytes */ 673 int u8_strlen(const char *s) 674 { 675 int i, len, n, totlen; 676 unsigned char c; 677 678 n = strlen(s); 679 totlen = 0; 680 for (i = 0; i < n; i += len) { 681 c = s[i]; 682 if (c < 128 || awk_mb_cur_max == 1) { 683 len = 1; 684 } else { 685 len = u8_nextlen(&s[i]); 686 } 687 totlen++; 688 if (i > n) 689 FATAL("bad utf count [%s] n=%d i=%d\n", s, n, i); 690 } 691 return totlen; 692 } 693 694 /* convert utf-8 char number in a string to its byte offset */ 695 int u8_char2byte(const char *s, int charnum) 696 { 697 int n; 698 int bytenum = 0; 699 700 while (charnum > 0) { 701 n = u8_nextlen(s); 702 s += n; 703 bytenum += n; 704 charnum--; 705 } 706 return bytenum; 707 } 708 709 /* convert byte offset in s to utf-8 char number that starts there */ 710 int u8_byte2char(const char *s, int bytenum) 711 { 712 int i, len, b; 713 int charnum = 0; /* BUG: what origin? */ 714 /* should be 0 to match start==0 which means no match */ 715 716 b = strlen(s); 717 if (bytenum > b) { 718 return -1; /* ??? */ 719 } 720 for (i = 0; i <= bytenum; i += len) { 721 len = u8_nextlen(s+i); 722 charnum++; 723 } 724 return charnum; 725 } 726 727 /* runetochar() adapted from rune.c in the Plan 9 distributione */ 728 729 enum 730 { 731 Runeerror = 128, /* from somewhere else */ 732 Runemax = 0x10FFFF, 733 734 Bit1 = 7, 735 Bitx = 6, 736 Bit2 = 5, 737 Bit3 = 4, 738 Bit4 = 3, 739 Bit5 = 2, 740 741 T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */ 742 Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */ 743 T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */ 744 T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */ 745 T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */ 746 T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */ 747 748 Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0000 0000 0111 1111 */ 749 Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0000 0000 0111 1111 1111 */ 750 Rune3 = (1<<(Bit3+2*Bitx))-1, /* 0000 0000 1111 1111 1111 1111 */ 751 Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0011 1111 1111 1111 1111 1111 */ 752 753 Maskx = (1<<Bitx)-1, /* 0011 1111 */ 754 Testx = Maskx ^ 0xFF, /* 1100 0000 */ 755 756 }; 757 758 int runetochar(char *str, int c) 759 { 760 /* one character sequence 00000-0007F => 00-7F */ 761 if (c <= Rune1) { 762 str[0] = c; 763 return 1; 764 } 765 766 /* two character sequence 00080-007FF => T2 Tx */ 767 if (c <= Rune2) { 768 str[0] = T2 | (c >> 1*Bitx); 769 str[1] = Tx | (c & Maskx); 770 return 2; 771 } 772 773 /* three character sequence 00800-0FFFF => T3 Tx Tx */ 774 if (c > Runemax) 775 c = Runeerror; 776 if (c <= Rune3) { 777 str[0] = T3 | (c >> 2*Bitx); 778 str[1] = Tx | ((c >> 1*Bitx) & Maskx); 779 str[2] = Tx | (c & Maskx); 780 return 3; 781 } 782 783 /* four character sequence 010000-1FFFFF => T4 Tx Tx Tx */ 784 str[0] = T4 | (c >> 3*Bitx); 785 str[1] = Tx | ((c >> 2*Bitx) & Maskx); 786 str[2] = Tx | ((c >> 1*Bitx) & Maskx); 787 str[3] = Tx | (c & Maskx); 788 return 4; 789 } 790 791 792 /* ========== end of utf8 code =========== */ 793 794 795 796 Cell *matchop(Node **a, int n) /* ~ and match() */ 797 { 798 Cell *x, *y, *z; 799 char *s, *t; 800 int i; 801 int cstart, cpatlen, len; 802 fa *pfa; 803 int (*mf)(fa *, const char *) = match, mode = 0; 804 805 if (n == MATCHFCN) { 806 mf = pmatch; 807 mode = 1; 808 } 809 x = execute(a[1]); /* a[1] = target text */ 810 s = getsval(x); 811 if (a[0] == NULL) /* a[1] == 0: already-compiled reg expr */ 812 i = (*mf)((fa *) a[2], s); 813 else { 814 y = execute(a[2]); /* a[2] = regular expr */ 815 t = getsval(y); 816 pfa = makedfa(t, mode); 817 i = (*mf)(pfa, s); 818 tempfree(y); 819 } 820 z = x; 821 if (n == MATCHFCN) { 822 int start = patbeg - s + 1; /* origin 1 */ 823 if (patlen < 0) { 824 start = 0; /* not found */ 825 } else { 826 cstart = u8_byte2char(s, start-1); 827 cpatlen = 0; 828 for (i = 0; i < patlen; i += len) { 829 len = u8_nextlen(patbeg+i); 830 cpatlen++; 831 } 832 833 start = cstart; 834 patlen = cpatlen; 835 } 836 837 setfval(rstartloc, (Awkfloat) start); 838 setfval(rlengthloc, (Awkfloat) patlen); 839 x = gettemp(); 840 x->tval = NUM; 841 x->fval = start; 842 } else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0)) 843 x = True; 844 else 845 x = False; 846 847 tempfree(z); 848 return x; 849 } 850 851 852 Cell *boolop(Node **a, int n) /* a[0] || a[1], a[0] && a[1], !a[0] */ 853 { 854 Cell *x, *y; 855 int i; 856 857 x = execute(a[0]); 858 i = istrue(x); 859 tempfree(x); 860 switch (n) { 861 case BOR: 862 if (i) return(True); 863 y = execute(a[1]); 864 i = istrue(y); 865 tempfree(y); 866 if (i) return(True); 867 else return(False); 868 case AND: 869 if ( !i ) return(False); 870 y = execute(a[1]); 871 i = istrue(y); 872 tempfree(y); 873 if (i) return(True); 874 else return(False); 875 case NOT: 876 if (i) return(False); 877 else return(True); 878 default: /* can't happen */ 879 FATAL("unknown boolean operator %d", n); 880 } 881 return 0; /*NOTREACHED*/ 882 } 883 884 Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */ 885 { 886 int i; 887 Cell *x, *y; 888 Awkfloat j; 889 bool x_is_nan, y_is_nan; 890 891 x = execute(a[0]); 892 y = execute(a[1]); 893 x_is_nan = isnan(x->fval); 894 y_is_nan = isnan(y->fval); 895 if (x->tval&NUM && y->tval&NUM) { 896 if ((x_is_nan || y_is_nan) && n != NE) 897 return(False); 898 j = x->fval - y->fval; 899 i = j<0? -1: (j>0? 1: 0); 900 } else { 901 i = strcmp(getsval(x), getsval(y)); 902 } 903 tempfree(x); 904 tempfree(y); 905 switch (n) { 906 case LT: if (i<0) return(True); 907 else return(False); 908 case LE: if (i<=0) return(True); 909 else return(False); 910 case NE: if (x_is_nan && y_is_nan) return(True); 911 else if (i!=0) return(True); 912 else return(False); 913 case EQ: if (i == 0) return(True); 914 else return(False); 915 case GE: if (i>=0) return(True); 916 else return(False); 917 case GT: if (i>0) return(True); 918 else return(False); 919 default: /* can't happen */ 920 FATAL("unknown relational operator %d", n); 921 } 922 return 0; /*NOTREACHED*/ 923 } 924 925 void tfree(Cell *a) /* free a tempcell */ 926 { 927 if (freeable(a)) { 928 DPRINTF("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval); 929 xfree(a->sval); 930 } 931 if (a == tmps) 932 FATAL("tempcell list is curdled"); 933 a->cnext = tmps; 934 tmps = a; 935 } 936 937 Cell *gettemp(void) /* get a tempcell */ 938 { int i; 939 Cell *x; 940 941 if (!tmps) { 942 tmps = (Cell *) calloc(100, sizeof(*tmps)); 943 if (!tmps) 944 FATAL("out of space for temporaries"); 945 for (i = 1; i < 100; i++) 946 tmps[i-1].cnext = &tmps[i]; 947 tmps[i-1].cnext = NULL; 948 } 949 x = tmps; 950 tmps = x->cnext; 951 *x = tempcell; 952 return(x); 953 } 954 955 Cell *indirect(Node **a, int n) /* $( a[0] ) */ 956 { 957 Awkfloat val; 958 Cell *x; 959 int m; 960 char *s; 961 962 x = execute(a[0]); 963 val = getfval(x); /* freebsd: defend against super large field numbers */ 964 if ((Awkfloat)INT_MAX < val) 965 FATAL("trying to access out of range field %s", x->nval); 966 m = (int) val; 967 if (m == 0 && !is_number(s = getsval(x), NULL)) /* suspicion! */ 968 FATAL("illegal field $(%s), name \"%s\"", s, x->nval); 969 /* BUG: can x->nval ever be null??? */ 970 tempfree(x); 971 x = fieldadr(m); 972 x->ctype = OCELL; /* BUG? why are these needed? */ 973 x->csub = CFLD; 974 return(x); 975 } 976 977 Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */ 978 { 979 int k, m, n; 980 int mb, nb; 981 char *s; 982 int temp; 983 Cell *x, *y, *z = NULL; 984 985 x = execute(a[0]); 986 y = execute(a[1]); 987 if (a[2] != NULL) 988 z = execute(a[2]); 989 s = getsval(x); 990 k = u8_strlen(s) + 1; 991 if (k <= 1) { 992 tempfree(x); 993 tempfree(y); 994 if (a[2] != NULL) { 995 tempfree(z); 996 } 997 x = gettemp(); 998 setsval(x, ""); 999 return(x); 1000 } 1001 m = (int) getfval(y); 1002 if (m <= 0) 1003 m = 1; 1004 else if (m > k) 1005 m = k; 1006 tempfree(y); 1007 if (a[2] != NULL) { 1008 n = (int) getfval(z); 1009 tempfree(z); 1010 } else 1011 n = k - 1; 1012 if (n < 0) 1013 n = 0; 1014 else if (n > k - m) 1015 n = k - m; 1016 /* m is start, n is length from there */ 1017 DPRINTF("substr: m=%d, n=%d, s=%s\n", m, n, s); 1018 y = gettemp(); 1019 mb = u8_char2byte(s, m-1); /* byte offset of start char in s */ 1020 nb = u8_char2byte(s, m-1+n); /* byte offset of end+1 char in s */ 1021 1022 temp = s[nb]; /* with thanks to John Linderman */ 1023 s[nb] = '\0'; 1024 setsval(y, s + mb); 1025 s[nb] = temp; 1026 tempfree(x); 1027 return(y); 1028 } 1029 1030 Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */ 1031 { 1032 Cell *x, *y, *z; 1033 char *s1, *s2, *p1, *p2, *q; 1034 Awkfloat v = 0.0; 1035 1036 x = execute(a[0]); 1037 s1 = getsval(x); 1038 y = execute(a[1]); 1039 s2 = getsval(y); 1040 1041 z = gettemp(); 1042 for (p1 = s1; *p1 != '\0'; p1++) { 1043 for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++) 1044 continue; 1045 if (*p2 == '\0') { 1046 /* v = (Awkfloat) (p1 - s1 + 1); origin 1 */ 1047 1048 /* should be a function: used in match() as well */ 1049 int i, len; 1050 v = 0; 1051 for (i = 0; i < p1-s1+1; i += len) { 1052 len = u8_nextlen(s1+i); 1053 v++; 1054 } 1055 break; 1056 } 1057 } 1058 tempfree(x); 1059 tempfree(y); 1060 setfval(z, v); 1061 return(z); 1062 } 1063 1064 int has_utf8(char *s) /* return 1 if s contains any utf-8 (2 bytes or more) character */ 1065 { 1066 int n; 1067 1068 for (n = 0; *s != 0; s += n) { 1069 n = u8_nextlen(s); 1070 if (n > 1) 1071 return 1; 1072 } 1073 return 0; 1074 } 1075 1076 #define MAXNUMSIZE 50 1077 1078 int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like conversions */ 1079 { 1080 char *fmt; 1081 char *p, *t; 1082 const char *os; 1083 Cell *x; 1084 int flag = 0, n; 1085 int fmtwd; /* format width */ 1086 int fmtsz = recsize; 1087 char *buf = *pbuf; 1088 int bufsize = *pbufsize; 1089 #define FMTSZ(a) (fmtsz - ((a) - fmt)) 1090 #define BUFSZ(a) (bufsize - ((a) - buf)) 1091 1092 static bool first = true; 1093 static bool have_a_format = false; 1094 1095 if (first) { 1096 char xbuf[100]; 1097 1098 snprintf(xbuf, sizeof(xbuf), "%a", 42.0); 1099 have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0); 1100 first = false; 1101 } 1102 1103 os = s; 1104 p = buf; 1105 if ((fmt = (char *) malloc(fmtsz)) == NULL) 1106 FATAL("out of memory in format()"); 1107 while (*s) { 1108 adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1"); 1109 if (*s != '%') { 1110 *p++ = *s++; 1111 continue; 1112 } 1113 if (*(s+1) == '%') { 1114 *p++ = '%'; 1115 s += 2; 1116 continue; 1117 } 1118 fmtwd = atoi(s+1); 1119 if (fmtwd < 0) 1120 fmtwd = -fmtwd; 1121 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2"); 1122 for (t = fmt; (*t++ = *s) != '\0'; s++) { 1123 if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3")) 1124 FATAL("format item %.30s... ran format() out of memory", os); 1125 /* Ignore size specifiers */ 1126 if (strchr("hjLlqtz", *s) != NULL) { /* the ansi panoply */ 1127 t--; 1128 continue; 1129 } 1130 if (isalpha((uschar)*s)) 1131 break; 1132 if (*s == '$') { 1133 FATAL("'$' not permitted in awk formats"); 1134 } 1135 if (*s == '*') { 1136 if (a == NULL) { 1137 FATAL("not enough args in printf(%s)", os); 1138 } 1139 x = execute(a); 1140 a = a->nnext; 1141 snprintf(t - 1, FMTSZ(t - 1), 1142 "%d", fmtwd=(int) getfval(x)); 1143 if (fmtwd < 0) 1144 fmtwd = -fmtwd; 1145 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format"); 1146 t = fmt + strlen(fmt); 1147 tempfree(x); 1148 } 1149 } 1150 *t = '\0'; 1151 if (fmtwd < 0) 1152 fmtwd = -fmtwd; 1153 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4"); 1154 switch (*s) { 1155 case 'a': case 'A': 1156 if (have_a_format) 1157 flag = *s; 1158 else 1159 flag = 'f'; 1160 break; 1161 case 'f': case 'e': case 'g': case 'E': case 'G': 1162 flag = 'f'; 1163 break; 1164 case 'd': case 'i': case 'o': case 'x': case 'X': case 'u': 1165 flag = (*s == 'd' || *s == 'i') ? 'd' : 'u'; 1166 *(t-1) = 'j'; 1167 *t = *s; 1168 *++t = '\0'; 1169 break; 1170 case 's': 1171 flag = 's'; 1172 break; 1173 case 'c': 1174 flag = 'c'; 1175 break; 1176 default: 1177 WARNING("weird printf conversion %s", fmt); 1178 flag = '?'; 1179 break; 1180 } 1181 if (a == NULL) 1182 FATAL("not enough args in printf(%s)", os); 1183 x = execute(a); 1184 a = a->nnext; 1185 n = MAXNUMSIZE; 1186 if (fmtwd > n) 1187 n = fmtwd; 1188 adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5"); 1189 switch (flag) { 1190 case '?': 1191 snprintf(p, BUFSZ(p), "%s", fmt); /* unknown, so dump it too */ 1192 t = getsval(x); 1193 n = strlen(t); 1194 if (fmtwd > n) 1195 n = fmtwd; 1196 adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6"); 1197 p += strlen(p); 1198 snprintf(p, BUFSZ(p), "%s", t); 1199 break; 1200 case 'a': 1201 case 'A': 1202 case 'f': snprintf(p, BUFSZ(p), fmt, getfval(x)); break; 1203 case 'd': snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break; 1204 case 'u': snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break; 1205 1206 case 's': { 1207 t = getsval(x); 1208 n = strlen(t); 1209 /* if simple format or no utf-8 in the string, sprintf works */ 1210 if (!has_utf8(t) || strcmp(fmt,"%s") == 0) { 1211 if (fmtwd > n) 1212 n = fmtwd; 1213 if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7")) 1214 FATAL("huge string/format (%d chars) in printf %.30s..." \ 1215 " ran format() out of memory", n, t); 1216 snprintf(p, BUFSZ(p), fmt, t); 1217 break; 1218 } 1219 1220 /* get here if string has utf-8 chars and fmt is not plain %s */ 1221 /* "%-w.ps", where -, w and .p are all optional */ 1222 /* '0' before the w is a flag character */ 1223 /* fmt points at % */ 1224 int ljust = 0, wid = 0, prec = n, pad = 0; 1225 char *f = fmt+1; 1226 if (f[0] == '-') { 1227 ljust = 1; 1228 f++; 1229 } 1230 // flags '0' and '+' are recognized but skipped 1231 if (f[0] == '0') { 1232 f++; 1233 if (f[0] == '+') 1234 f++; 1235 } 1236 if (f[0] == '+') { 1237 f++; 1238 if (f[0] == '0') 1239 f++; 1240 } 1241 if (isdigit(f[0])) { /* there is a wid */ 1242 wid = strtol(f, &f, 10); 1243 } 1244 if (f[0] == '.') { /* there is a .prec */ 1245 prec = strtol(++f, &f, 10); 1246 } 1247 if (prec > u8_strlen(t)) 1248 prec = u8_strlen(t); 1249 pad = wid>prec ? wid - prec : 0; // has to be >= 0 1250 int i, k, n; 1251 1252 if (ljust) { // print prec chars from t, then pad blanks 1253 n = u8_char2byte(t, prec); 1254 for (k = 0; k < n; k++) { 1255 //putchar(t[k]); 1256 *p++ = t[k]; 1257 } 1258 for (i = 0; i < pad; i++) { 1259 //printf(" "); 1260 *p++ = ' '; 1261 } 1262 } else { // print pad blanks, then prec chars from t 1263 for (i = 0; i < pad; i++) { 1264 //printf(" "); 1265 *p++ = ' '; 1266 } 1267 n = u8_char2byte(t, prec); 1268 for (k = 0; k < n; k++) { 1269 //putchar(t[k]); 1270 *p++ = t[k]; 1271 } 1272 } 1273 *p = 0; 1274 break; 1275 } 1276 1277 case 'c': { 1278 /* 1279 * If a numeric value is given, awk should just turn 1280 * it into a character and print it: 1281 * BEGIN { printf("%c\n", 65) } 1282 * prints "A". 1283 * 1284 * But what if the numeric value is > 128 and 1285 * represents a valid Unicode code point?!? We do 1286 * our best to convert it back into UTF-8. If we 1287 * can't, we output the encoding of the Unicode 1288 * "invalid character", 0xFFFD. 1289 */ 1290 if (isnum(x)) { 1291 int charval = (int) getfval(x); 1292 1293 if (charval != 0) { 1294 if (charval < 128 || awk_mb_cur_max == 1) 1295 snprintf(p, BUFSZ(p), fmt, charval); 1296 else { 1297 // possible unicode character 1298 size_t count; 1299 char *bs = wide_char_to_byte_str(charval, &count); 1300 1301 if (bs == NULL) { // invalid character 1302 // use unicode invalid character, 0xFFFD 1303 static char invalid_char[] = "\357\277\275"; 1304 bs = invalid_char; 1305 count = 3; 1306 } 1307 t = bs; 1308 n = count; 1309 goto format_percent_c; 1310 } 1311 } else { 1312 *p++ = '\0'; /* explicit null byte */ 1313 *p = '\0'; /* next output will start here */ 1314 } 1315 break; 1316 } 1317 t = getsval(x); 1318 n = u8_nextlen(t); 1319 format_percent_c: 1320 if (n < 2) { /* not utf8 */ 1321 snprintf(p, BUFSZ(p), fmt, getsval(x)[0]); 1322 break; 1323 } 1324 1325 // utf8 character, almost same song and dance as for %s 1326 int ljust = 0, wid = 0, prec = n, pad = 0; 1327 char *f = fmt+1; 1328 if (f[0] == '-') { 1329 ljust = 1; 1330 f++; 1331 } 1332 // flags '0' and '+' are recognized but skipped 1333 if (f[0] == '0') { 1334 f++; 1335 if (f[0] == '+') 1336 f++; 1337 } 1338 if (f[0] == '+') { 1339 f++; 1340 if (f[0] == '0') 1341 f++; 1342 } 1343 if (isdigit(f[0])) { /* there is a wid */ 1344 wid = strtol(f, &f, 10); 1345 } 1346 if (f[0] == '.') { /* there is a .prec */ 1347 prec = strtol(++f, &f, 10); 1348 } 1349 if (prec > 1) // %c --> only one character 1350 prec = 1; 1351 pad = wid>prec ? wid - prec : 0; // has to be >= 0 1352 int i; 1353 1354 if (ljust) { // print one char from t, then pad blanks 1355 for (i = 0; i < n; i++) 1356 *p++ = t[i]; 1357 for (i = 0; i < pad; i++) { 1358 //printf(" "); 1359 *p++ = ' '; 1360 } 1361 } else { // print pad blanks, then prec chars from t 1362 for (i = 0; i < pad; i++) { 1363 //printf(" "); 1364 *p++ = ' '; 1365 } 1366 for (i = 0; i < n; i++) 1367 *p++ = t[i]; 1368 } 1369 *p = 0; 1370 break; 1371 } 1372 default: 1373 FATAL("can't happen: bad conversion %c in format()", flag); 1374 } 1375 1376 tempfree(x); 1377 p += strlen(p); 1378 s++; 1379 } 1380 *p = '\0'; 1381 free(fmt); 1382 for ( ; a; a = a->nnext) { /* evaluate any remaining args */ 1383 x = execute(a); 1384 tempfree(x); 1385 } 1386 *pbuf = buf; 1387 *pbufsize = bufsize; 1388 return p - buf; 1389 } 1390 1391 Cell *awksprintf(Node **a, int n) /* sprintf(a[0]) */ 1392 { 1393 Cell *x; 1394 Node *y; 1395 char *buf; 1396 int bufsz=3*recsize; 1397 1398 if ((buf = (char *) malloc(bufsz)) == NULL) 1399 FATAL("out of memory in awksprintf"); 1400 y = a[0]->nnext; 1401 x = execute(a[0]); 1402 if (format(&buf, &bufsz, getsval(x), y) == -1) 1403 FATAL("sprintf string %.30s... too long. can't happen.", buf); 1404 tempfree(x); 1405 x = gettemp(); 1406 x->sval = buf; 1407 x->tval = STR; 1408 return(x); 1409 } 1410 1411 Cell *awkprintf(Node **a, int n) /* printf */ 1412 { /* a[0] is list of args, starting with format string */ 1413 /* a[1] is redirection operator, a[2] is redirection file */ 1414 FILE *fp; 1415 Cell *x; 1416 Node *y; 1417 char *buf; 1418 int len; 1419 int bufsz=3*recsize; 1420 1421 if ((buf = (char *) malloc(bufsz)) == NULL) 1422 FATAL("out of memory in awkprintf"); 1423 y = a[0]->nnext; 1424 x = execute(a[0]); 1425 if ((len = format(&buf, &bufsz, getsval(x), y)) == -1) 1426 FATAL("printf string %.30s... too long. can't happen.", buf); 1427 tempfree(x); 1428 if (a[1] == NULL) { 1429 /* fputs(buf, stdout); */ 1430 fwrite(buf, len, 1, stdout); 1431 if (ferror(stdout)) 1432 FATAL("write error on stdout"); 1433 } else { 1434 fp = redirect(ptoi(a[1]), a[2]); 1435 /* fputs(buf, fp); */ 1436 fwrite(buf, len, 1, fp); 1437 fflush(fp); 1438 if (ferror(fp)) 1439 FATAL("write error on %s", filename(fp)); 1440 } 1441 free(buf); 1442 return(True); 1443 } 1444 1445 Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */ 1446 { 1447 Awkfloat i, j = 0; 1448 double v; 1449 Cell *x, *y, *z; 1450 1451 x = execute(a[0]); 1452 i = getfval(x); 1453 tempfree(x); 1454 if (n != UMINUS && n != UPLUS) { 1455 y = execute(a[1]); 1456 j = getfval(y); 1457 tempfree(y); 1458 } 1459 z = gettemp(); 1460 switch (n) { 1461 case ADD: 1462 i += j; 1463 break; 1464 case MINUS: 1465 i -= j; 1466 break; 1467 case MULT: 1468 i *= j; 1469 break; 1470 case DIVIDE: 1471 if (j == 0) 1472 FATAL("division by zero"); 1473 i /= j; 1474 break; 1475 case MOD: 1476 if (j == 0) 1477 FATAL("division by zero in mod"); 1478 modf(i/j, &v); 1479 i = i - j * v; 1480 break; 1481 case UMINUS: 1482 i = -i; 1483 break; 1484 case UPLUS: /* handled by getfval(), above */ 1485 break; 1486 case POWER: 1487 if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */ 1488 i = ipow(i, (int) j); 1489 else { 1490 errno = 0; 1491 i = errcheck(pow(i, j), "pow"); 1492 } 1493 break; 1494 default: /* can't happen */ 1495 FATAL("illegal arithmetic operator %d", n); 1496 } 1497 setfval(z, i); 1498 return(z); 1499 } 1500 1501 double ipow(double x, int n) /* x**n. ought to be done by pow, but isn't always */ 1502 { 1503 double v; 1504 1505 if (n <= 0) 1506 return 1; 1507 v = ipow(x, n/2); 1508 if (n % 2 == 0) 1509 return v * v; 1510 else 1511 return x * v * v; 1512 } 1513 1514 Cell *incrdecr(Node **a, int n) /* a[0]++, etc. */ 1515 { 1516 Cell *x, *z; 1517 int k; 1518 Awkfloat xf; 1519 1520 x = execute(a[0]); 1521 xf = getfval(x); 1522 k = (n == PREINCR || n == POSTINCR) ? 1 : -1; 1523 if (n == PREINCR || n == PREDECR) { 1524 setfval(x, xf + k); 1525 return(x); 1526 } 1527 z = gettemp(); 1528 setfval(z, xf); 1529 setfval(x, xf + k); 1530 tempfree(x); 1531 return(z); 1532 } 1533 1534 Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */ 1535 { /* this is subtle; don't muck with it. */ 1536 Cell *x, *y; 1537 Awkfloat xf, yf; 1538 double v; 1539 1540 y = execute(a[1]); 1541 x = execute(a[0]); 1542 if (n == ASSIGN) { /* ordinary assignment */ 1543 if (x == y && !(x->tval & (FLD|REC)) && x != nfloc) 1544 ; /* self-assignment: leave alone unless it's a field or NF */ 1545 else if ((y->tval & (STR|NUM)) == (STR|NUM)) { 1546 yf = getfval(y); 1547 setsval(x, getsval(y)); 1548 x->fval = yf; 1549 x->tval |= NUM; 1550 } 1551 else if (isstr(y)) 1552 setsval(x, getsval(y)); 1553 else if (isnum(y)) 1554 setfval(x, getfval(y)); 1555 else 1556 funnyvar(y, "read value of"); 1557 tempfree(y); 1558 return(x); 1559 } 1560 xf = getfval(x); 1561 yf = getfval(y); 1562 switch (n) { 1563 case ADDEQ: 1564 xf += yf; 1565 break; 1566 case SUBEQ: 1567 xf -= yf; 1568 break; 1569 case MULTEQ: 1570 xf *= yf; 1571 break; 1572 case DIVEQ: 1573 if (yf == 0) 1574 FATAL("division by zero in /="); 1575 xf /= yf; 1576 break; 1577 case MODEQ: 1578 if (yf == 0) 1579 FATAL("division by zero in %%="); 1580 modf(xf/yf, &v); 1581 xf = xf - yf * v; 1582 break; 1583 case POWEQ: 1584 if (yf >= 0 && modf(yf, &v) == 0.0) /* pos integer exponent */ 1585 xf = ipow(xf, (int) yf); 1586 else { 1587 errno = 0; 1588 xf = errcheck(pow(xf, yf), "pow"); 1589 } 1590 break; 1591 default: 1592 FATAL("illegal assignment operator %d", n); 1593 break; 1594 } 1595 tempfree(y); 1596 setfval(x, xf); 1597 return(x); 1598 } 1599 1600 Cell *cat(Node **a, int q) /* a[0] cat a[1] */ 1601 { 1602 Cell *x, *y, *z; 1603 int n1, n2; 1604 char *s = NULL; 1605 int ssz = 0; 1606 1607 x = execute(a[0]); 1608 n1 = strlen(getsval(x)); 1609 adjbuf(&s, &ssz, n1 + 1, recsize, 0, "cat1"); 1610 memcpy(s, x->sval, n1); 1611 1612 tempfree(x); 1613 1614 y = execute(a[1]); 1615 n2 = strlen(getsval(y)); 1616 adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2"); 1617 memcpy(s + n1, y->sval, n2); 1618 s[n1 + n2] = '\0'; 1619 1620 tempfree(y); 1621 1622 z = gettemp(); 1623 z->sval = s; 1624 z->tval = STR; 1625 1626 return(z); 1627 } 1628 1629 Cell *pastat(Node **a, int n) /* a[0] { a[1] } */ 1630 { 1631 Cell *x; 1632 1633 if (a[0] == NULL) 1634 x = execute(a[1]); 1635 else { 1636 x = execute(a[0]); 1637 if (istrue(x)) { 1638 tempfree(x); 1639 x = execute(a[1]); 1640 } 1641 } 1642 return x; 1643 } 1644 1645 Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */ 1646 { 1647 Cell *x; 1648 int pair; 1649 1650 pair = ptoi(a[3]); 1651 if (pairstack[pair] == 0) { 1652 x = execute(a[0]); 1653 if (istrue(x)) 1654 pairstack[pair] = 1; 1655 tempfree(x); 1656 } 1657 if (pairstack[pair] == 1) { 1658 x = execute(a[1]); 1659 if (istrue(x)) 1660 pairstack[pair] = 0; 1661 tempfree(x); 1662 x = execute(a[2]); 1663 return(x); 1664 } 1665 return(False); 1666 } 1667 1668 Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ 1669 { 1670 Cell *x = NULL, *y, *ap; 1671 const char *s, *origs, *t; 1672 const char *fs = NULL; 1673 char *origfs = NULL; 1674 int sep; 1675 char temp, num[50]; 1676 int n, tempstat, arg3type; 1677 int j; 1678 double result; 1679 1680 y = execute(a[0]); /* source string */ 1681 origs = s = strdup(getsval(y)); 1682 tempfree(y); 1683 arg3type = ptoi(a[3]); 1684 if (a[2] == NULL) { /* BUG: CSV should override implicit fs but not explicit */ 1685 fs = getsval(fsloc); 1686 } else if (arg3type == STRING) { /* split(str,arr,"string") */ 1687 x = execute(a[2]); 1688 fs = origfs = strdup(getsval(x)); 1689 tempfree(x); 1690 } else if (arg3type == REGEXPR) { 1691 fs = "(regexpr)"; /* split(str,arr,/regexpr/) */ 1692 } else { 1693 FATAL("illegal type of split"); 1694 } 1695 sep = *fs; 1696 ap = execute(a[1]); /* array name */ 1697 /* BUG 7/26/22: this appears not to reset array: see C1/asplit */ 1698 freesymtab(ap); 1699 DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs); 1700 ap->tval &= ~STR; 1701 ap->tval |= ARR; 1702 ap->sval = (char *) makesymtab(NSYMTAB); 1703 1704 n = 0; 1705 if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) { 1706 /* split(s, a, //); have to arrange that it looks like empty sep */ 1707 arg3type = 0; 1708 fs = ""; 1709 sep = 0; 1710 } 1711 if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) { /* reg expr */ 1712 fa *pfa; 1713 if (arg3type == REGEXPR) { /* it's ready already */ 1714 pfa = (fa *) a[2]; 1715 } else { 1716 pfa = makedfa(fs, 1); 1717 } 1718 if (nematch(pfa,s)) { 1719 tempstat = pfa->initstat; 1720 pfa->initstat = 2; 1721 do { 1722 n++; 1723 snprintf(num, sizeof(num), "%d", n); 1724 temp = *patbeg; 1725 setptr(patbeg, '\0'); 1726 if (is_number(s, & result)) 1727 setsymtab(num, s, result, STR|NUM, (Array *) ap->sval); 1728 else 1729 setsymtab(num, s, 0.0, STR, (Array *) ap->sval); 1730 setptr(patbeg, temp); 1731 s = patbeg + patlen; 1732 if (*(patbeg+patlen-1) == '\0' || *s == '\0') { 1733 n++; 1734 snprintf(num, sizeof(num), "%d", n); 1735 setsymtab(num, "", 0.0, STR, (Array *) ap->sval); 1736 pfa->initstat = tempstat; 1737 goto spdone; 1738 } 1739 } while (nematch(pfa,s)); 1740 pfa->initstat = tempstat; /* bwk: has to be here to reset */ 1741 /* cf gsub and refldbld */ 1742 } 1743 n++; 1744 snprintf(num, sizeof(num), "%d", n); 1745 if (is_number(s, & result)) 1746 setsymtab(num, s, result, STR|NUM, (Array *) ap->sval); 1747 else 1748 setsymtab(num, s, 0.0, STR, (Array *) ap->sval); 1749 spdone: 1750 pfa = NULL; 1751 1752 } else if (a[2] == NULL && CSV) { /* CSV only if no explicit separator */ 1753 char *newt = (char *) malloc(strlen(s)); /* for building new string; reuse for each field */ 1754 for (;;) { 1755 char *fr = newt; 1756 n++; 1757 if (*s == '"' ) { /* start of "..." */ 1758 for (s++ ; *s != '\0'; ) { 1759 if (*s == '"' && s[1] != '\0' && s[1] == '"') { 1760 s += 2; /* doubled quote */ 1761 *fr++ = '"'; 1762 } else if (*s == '"' && (s[1] == '\0' || s[1] == ',')) { 1763 s++; /* skip over closing quote */ 1764 break; 1765 } else { 1766 *fr++ = *s++; 1767 } 1768 } 1769 *fr++ = 0; 1770 } else { /* unquoted field */ 1771 while (*s != ',' && *s != '\0') 1772 *fr++ = *s++; 1773 *fr++ = 0; 1774 } 1775 snprintf(num, sizeof(num), "%d", n); 1776 if (is_number(newt, &result)) 1777 setsymtab(num, newt, result, STR|NUM, (Array *) ap->sval); 1778 else 1779 setsymtab(num, newt, 0.0, STR, (Array *) ap->sval); 1780 if (*s++ == '\0') 1781 break; 1782 } 1783 free(newt); 1784 1785 } else if (!CSV && sep == ' ') { /* usual case: split on white space */ 1786 for (n = 0; ; ) { 1787 #define ISWS(c) ((c) == ' ' || (c) == '\t' || (c) == '\n') 1788 while (ISWS(*s)) 1789 s++; 1790 if (*s == '\0') 1791 break; 1792 n++; 1793 t = s; 1794 do 1795 s++; 1796 while (*s != '\0' && !ISWS(*s)); 1797 temp = *s; 1798 setptr(s, '\0'); 1799 snprintf(num, sizeof(num), "%d", n); 1800 if (is_number(t, & result)) 1801 setsymtab(num, t, result, STR|NUM, (Array *) ap->sval); 1802 else 1803 setsymtab(num, t, 0.0, STR, (Array *) ap->sval); 1804 setptr(s, temp); 1805 if (*s != '\0') 1806 s++; 1807 } 1808 1809 } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */ 1810 for (n = 0; *s != '\0'; s += u8_nextlen(s)) { 1811 char buf[10]; 1812 n++; 1813 snprintf(num, sizeof(num), "%d", n); 1814 1815 for (j = 0; j < u8_nextlen(s); j++) { 1816 buf[j] = s[j]; 1817 } 1818 buf[j] = '\0'; 1819 1820 if (isdigit((uschar)buf[0])) 1821 setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval); 1822 else 1823 setsymtab(num, buf, 0.0, STR, (Array *) ap->sval); 1824 } 1825 1826 } else if (*s != '\0') { /* some random single character */ 1827 for (;;) { 1828 n++; 1829 t = s; 1830 while (*s != sep && *s != '\0') 1831 s++; 1832 temp = *s; 1833 setptr(s, '\0'); 1834 snprintf(num, sizeof(num), "%d", n); 1835 if (is_number(t, & result)) 1836 setsymtab(num, t, result, STR|NUM, (Array *) ap->sval); 1837 else 1838 setsymtab(num, t, 0.0, STR, (Array *) ap->sval); 1839 setptr(s, temp); 1840 if (*s++ == '\0') 1841 break; 1842 } 1843 } 1844 tempfree(ap); 1845 xfree(origs); 1846 xfree(origfs); 1847 x = gettemp(); 1848 x->tval = NUM; 1849 x->fval = n; 1850 return(x); 1851 } 1852 1853 Cell *condexpr(Node **a, int n) /* a[0] ? a[1] : a[2] */ 1854 { 1855 Cell *x; 1856 1857 x = execute(a[0]); 1858 if (istrue(x)) { 1859 tempfree(x); 1860 x = execute(a[1]); 1861 } else { 1862 tempfree(x); 1863 x = execute(a[2]); 1864 } 1865 return(x); 1866 } 1867 1868 Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */ 1869 { 1870 Cell *x; 1871 1872 x = execute(a[0]); 1873 if (istrue(x)) { 1874 tempfree(x); 1875 x = execute(a[1]); 1876 } else if (a[2] != NULL) { 1877 tempfree(x); 1878 x = execute(a[2]); 1879 } 1880 return(x); 1881 } 1882 1883 Cell *whilestat(Node **a, int n) /* while (a[0]) a[1] */ 1884 { 1885 Cell *x; 1886 1887 for (;;) { 1888 x = execute(a[0]); 1889 if (!istrue(x)) 1890 return(x); 1891 tempfree(x); 1892 x = execute(a[1]); 1893 if (isbreak(x)) { 1894 x = True; 1895 return(x); 1896 } 1897 if (isnext(x) || isexit(x) || isret(x)) 1898 return(x); 1899 tempfree(x); 1900 } 1901 } 1902 1903 Cell *dostat(Node **a, int n) /* do a[0]; while(a[1]) */ 1904 { 1905 Cell *x; 1906 1907 for (;;) { 1908 x = execute(a[0]); 1909 if (isbreak(x)) 1910 return True; 1911 if (isnext(x) || isexit(x) || isret(x)) 1912 return(x); 1913 tempfree(x); 1914 x = execute(a[1]); 1915 if (!istrue(x)) 1916 return(x); 1917 tempfree(x); 1918 } 1919 } 1920 1921 Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */ 1922 { 1923 Cell *x; 1924 1925 x = execute(a[0]); 1926 tempfree(x); 1927 for (;;) { 1928 if (a[1]!=NULL) { 1929 x = execute(a[1]); 1930 if (!istrue(x)) return(x); 1931 else tempfree(x); 1932 } 1933 x = execute(a[3]); 1934 if (isbreak(x)) /* turn off break */ 1935 return True; 1936 if (isnext(x) || isexit(x) || isret(x)) 1937 return(x); 1938 tempfree(x); 1939 x = execute(a[2]); 1940 tempfree(x); 1941 } 1942 } 1943 1944 Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */ 1945 { 1946 Cell *x, *vp, *arrayp, *cp, *ncp; 1947 Array *tp; 1948 int i; 1949 1950 vp = execute(a[0]); 1951 arrayp = execute(a[1]); 1952 if (!isarr(arrayp)) { 1953 return True; 1954 } 1955 tp = (Array *) arrayp->sval; 1956 tempfree(arrayp); 1957 for (i = 0; i < tp->size; i++) { /* this routine knows too much */ 1958 for (cp = tp->tab[i]; cp != NULL; cp = ncp) { 1959 setsval(vp, cp->nval); 1960 ncp = cp->cnext; 1961 x = execute(a[2]); 1962 if (isbreak(x)) { 1963 tempfree(vp); 1964 return True; 1965 } 1966 if (isnext(x) || isexit(x) || isret(x)) { 1967 tempfree(vp); 1968 return(x); 1969 } 1970 tempfree(x); 1971 } 1972 } 1973 return True; 1974 } 1975 1976 static char *nawk_convert(const char *s, int (*fun_c)(int), 1977 wint_t (*fun_wc)(wint_t)) 1978 { 1979 char *buf = NULL; 1980 char *pbuf = NULL; 1981 const char *ps = NULL; 1982 size_t n = 0; 1983 wchar_t wc; 1984 const size_t sz = awk_mb_cur_max; 1985 int unused; 1986 1987 if (sz == 1) { 1988 buf = tostring(s); 1989 1990 for (pbuf = buf; *pbuf; pbuf++) 1991 *pbuf = fun_c((uschar)*pbuf); 1992 1993 return buf; 1994 } else { 1995 /* upper/lower character may be shorter/longer */ 1996 buf = tostringN(s, strlen(s) * sz + 1); 1997 1998 (void) mbtowc(NULL, NULL, 0); /* reset internal state */ 1999 /* 2000 * Reset internal state here too. 2001 * Assign result to avoid a compiler warning. (Casting to void 2002 * doesn't work.) 2003 * Increment said variable to avoid a different warning. 2004 */ 2005 unused = wctomb(NULL, L'\0'); 2006 unused++; 2007 2008 ps = s; 2009 pbuf = buf; 2010 while (n = mbtowc(&wc, ps, sz), 2011 n > 0 && n != (size_t)-1 && n != (size_t)-2) 2012 { 2013 ps += n; 2014 2015 n = wctomb(pbuf, fun_wc(wc)); 2016 if (n == (size_t)-1) 2017 FATAL("illegal wide character %s", s); 2018 2019 pbuf += n; 2020 } 2021 2022 *pbuf = '\0'; 2023 2024 if (n) 2025 FATAL("illegal byte sequence %s", s); 2026 2027 return buf; 2028 } 2029 } 2030 2031 #ifdef __DJGPP__ 2032 static wint_t towupper(wint_t wc) 2033 { 2034 if (wc >= 0 && wc < 256) 2035 return toupper(wc & 0xFF); 2036 2037 return wc; 2038 } 2039 2040 static wint_t towlower(wint_t wc) 2041 { 2042 if (wc >= 0 && wc < 256) 2043 return tolower(wc & 0xFF); 2044 2045 return wc; 2046 } 2047 #endif 2048 2049 static char *nawk_toupper(const char *s) 2050 { 2051 return nawk_convert(s, toupper, towupper); 2052 } 2053 2054 static char *nawk_tolower(const char *s) 2055 { 2056 return nawk_convert(s, tolower, towlower); 2057 } 2058 2059 2060 2061 Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */ 2062 { 2063 Cell *x, *y; 2064 Awkfloat u; 2065 int t, sz; 2066 Awkfloat tmp; 2067 char *buf, *fmt; 2068 Node *nextarg; 2069 FILE *fp; 2070 int status = 0; 2071 time_t tv; 2072 struct tm *tm; 2073 int estatus = 0; 2074 2075 t = ptoi(a[0]); 2076 x = execute(a[1]); 2077 nextarg = a[1]->nnext; 2078 switch (t) { 2079 case FLENGTH: 2080 if (isarr(x)) 2081 u = ((Array *) x->sval)->nelem; /* GROT. should be function*/ 2082 else 2083 u = u8_strlen(getsval(x)); 2084 break; 2085 case FLOG: 2086 errno = 0; 2087 u = errcheck(log(getfval(x)), "log"); 2088 break; 2089 case FINT: 2090 modf(getfval(x), &u); break; 2091 case FEXP: 2092 errno = 0; 2093 u = errcheck(exp(getfval(x)), "exp"); 2094 break; 2095 case FSQRT: 2096 errno = 0; 2097 u = errcheck(sqrt(getfval(x)), "sqrt"); 2098 break; 2099 case FSIN: 2100 u = sin(getfval(x)); break; 2101 case FCOS: 2102 u = cos(getfval(x)); break; 2103 case FATAN: 2104 if (nextarg == NULL) { 2105 WARNING("atan2 requires two arguments; returning 1.0"); 2106 u = 1.0; 2107 } else { 2108 y = execute(a[1]->nnext); 2109 u = atan2(getfval(x), getfval(y)); 2110 tempfree(y); 2111 nextarg = nextarg->nnext; 2112 } 2113 break; 2114 case FCOMPL: 2115 u = ~((int)getfval(x)); 2116 break; 2117 case FAND: 2118 if (nextarg == 0) { 2119 WARNING("and requires two arguments; returning 0"); 2120 u = 0; 2121 break; 2122 } 2123 y = execute(a[1]->nnext); 2124 u = ((int)getfval(x)) & ((int)getfval(y)); 2125 tempfree(y); 2126 nextarg = nextarg->nnext; 2127 break; 2128 case FFOR: 2129 if (nextarg == 0) { 2130 WARNING("or requires two arguments; returning 0"); 2131 u = 0; 2132 break; 2133 } 2134 y = execute(a[1]->nnext); 2135 u = ((int)getfval(x)) | ((int)getfval(y)); 2136 tempfree(y); 2137 nextarg = nextarg->nnext; 2138 break; 2139 case FXOR: 2140 if (nextarg == 0) { 2141 WARNING("xor requires two arguments; returning 0"); 2142 u = 0; 2143 break; 2144 } 2145 y = execute(a[1]->nnext); 2146 u = ((int)getfval(x)) ^ ((int)getfval(y)); 2147 tempfree(y); 2148 nextarg = nextarg->nnext; 2149 break; 2150 case FLSHIFT: 2151 if (nextarg == 0) { 2152 WARNING("lshift requires two arguments; returning 0"); 2153 u = 0; 2154 break; 2155 } 2156 y = execute(a[1]->nnext); 2157 u = ((int)getfval(x)) << ((int)getfval(y)); 2158 tempfree(y); 2159 nextarg = nextarg->nnext; 2160 break; 2161 case FRSHIFT: 2162 if (nextarg == 0) { 2163 WARNING("rshift requires two arguments; returning 0"); 2164 u = 0; 2165 break; 2166 } 2167 y = execute(a[1]->nnext); 2168 u = ((int)getfval(x)) >> ((int)getfval(y)); 2169 tempfree(y); 2170 nextarg = nextarg->nnext; 2171 break; 2172 case FSYSTEM: 2173 fflush(stdout); /* in case something is buffered already */ 2174 estatus = status = system(getsval(x)); 2175 if (status != -1) { 2176 if (WIFEXITED(status)) { 2177 estatus = WEXITSTATUS(status); 2178 } else if (WIFSIGNALED(status)) { 2179 estatus = WTERMSIG(status) + 256; 2180 #ifdef WCOREDUMP 2181 if (WCOREDUMP(status)) 2182 estatus += 256; 2183 #endif 2184 } else /* something else?!? */ 2185 estatus = 0; 2186 } 2187 /* else estatus was set to -1 */ 2188 u = estatus; 2189 break; 2190 case FRAND: 2191 /* random() returns numbers in [0..2^31-1] 2192 * in order to get a number in [0, 1), divide it by 2^31 2193 */ 2194 u = (Awkfloat) random() / (0x7fffffffL + 0x1UL); 2195 break; 2196 case FSRAND: 2197 if (isrec(x)) /* no argument provided */ 2198 u = time((time_t *)0); 2199 else 2200 u = getfval(x); 2201 tmp = u; 2202 srandom((unsigned long) u); 2203 u = srand_seed; 2204 srand_seed = tmp; 2205 break; 2206 case FTOUPPER: 2207 case FTOLOWER: 2208 if (t == FTOUPPER) 2209 buf = nawk_toupper(getsval(x)); 2210 else 2211 buf = nawk_tolower(getsval(x)); 2212 tempfree(x); 2213 x = gettemp(); 2214 setsval(x, buf); 2215 free(buf); 2216 return x; 2217 case FFLUSH: 2218 if (isrec(x) || strlen(getsval(x)) == 0) { 2219 flush_all(); /* fflush() or fflush("") -> all */ 2220 u = 0; 2221 } else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL) 2222 u = EOF; 2223 else 2224 u = fflush(fp); 2225 break; 2226 case FSYSTIME: 2227 u = time((time_t *) 0); 2228 break; 2229 case FSTRFTIME: 2230 /* strftime([format [,timestamp]]) */ 2231 if (nextarg) { 2232 y = execute(nextarg); 2233 nextarg = nextarg->nnext; 2234 tv = (time_t) getfval(y); 2235 tempfree(y); 2236 } else 2237 tv = time((time_t *) 0); 2238 tm = localtime(&tv); 2239 if (tm == NULL) 2240 FATAL("bad time %ld", (long)tv); 2241 2242 if (isrec(x)) { 2243 /* format argument not provided, use default */ 2244 fmt = tostring("%a %b %d %H:%M:%S %Z %Y"); 2245 } else 2246 fmt = tostring(getsval(x)); 2247 2248 sz = 32; 2249 buf = NULL; 2250 do { 2251 if ((buf = realloc(buf, (sz *= 2))) == NULL) 2252 FATAL("out of memory in strftime"); 2253 } while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0'); 2254 2255 y = gettemp(); 2256 setsval(y, buf); 2257 free(fmt); 2258 free(buf); 2259 2260 return y; 2261 default: /* can't happen */ 2262 FATAL("illegal function type %d", t); 2263 break; 2264 } 2265 tempfree(x); 2266 x = gettemp(); 2267 setfval(x, u); 2268 if (nextarg != NULL) { 2269 WARNING("warning: function has too many arguments"); 2270 for ( ; nextarg; nextarg = nextarg->nnext) { 2271 y = execute(nextarg); 2272 tempfree(y); 2273 } 2274 } 2275 return(x); 2276 } 2277 2278 Cell *printstat(Node **a, int n) /* print a[0] */ 2279 { 2280 Node *x; 2281 Cell *y; 2282 FILE *fp; 2283 2284 if (a[1] == NULL) /* a[1] is redirection operator, a[2] is file */ 2285 fp = stdout; 2286 else 2287 fp = redirect(ptoi(a[1]), a[2]); 2288 for (x = a[0]; x != NULL; x = x->nnext) { 2289 y = execute(x); 2290 fputs(getpssval(y), fp); 2291 tempfree(y); 2292 if (x->nnext == NULL) 2293 fputs(getsval(orsloc), fp); 2294 else 2295 fputs(getsval(ofsloc), fp); 2296 } 2297 if (a[1] != NULL) 2298 fflush(fp); 2299 if (ferror(fp)) 2300 FATAL("write error on %s", filename(fp)); 2301 return(True); 2302 } 2303 2304 Cell *nullproc(Node **a, int n) 2305 { 2306 return 0; 2307 } 2308 2309 2310 FILE *redirect(int a, Node *b) /* set up all i/o redirections */ 2311 { 2312 FILE *fp; 2313 Cell *x; 2314 char *fname; 2315 2316 x = execute(b); 2317 fname = getsval(x); 2318 fp = openfile(a, fname, NULL); 2319 if (fp == NULL) 2320 FATAL("can't open file %s", fname); 2321 tempfree(x); 2322 return fp; 2323 } 2324 2325 struct files { 2326 FILE *fp; 2327 const char *fname; 2328 int mode; /* '|', 'a', 'w' => LE/LT, GT */ 2329 } *files; 2330 2331 size_t nfiles; 2332 2333 static void stdinit(void) /* in case stdin, etc., are not constants */ 2334 { 2335 nfiles = FOPEN_MAX; 2336 files = (struct files *) calloc(nfiles, sizeof(*files)); 2337 if (files == NULL) 2338 FATAL("can't allocate file memory for %zu files", nfiles); 2339 files[0].fp = stdin; 2340 files[0].fname = tostring("/dev/stdin"); 2341 files[0].mode = LT; 2342 files[1].fp = stdout; 2343 files[1].fname = tostring("/dev/stdout"); 2344 files[1].mode = GT; 2345 files[2].fp = stderr; 2346 files[2].fname = tostring("/dev/stderr"); 2347 files[2].mode = GT; 2348 } 2349 2350 FILE *openfile(int a, const char *us, bool *pnewflag) 2351 { 2352 const char *s = us; 2353 size_t i; 2354 int m; 2355 FILE *fp = NULL; 2356 2357 if (*s == '\0') 2358 FATAL("null file name in print or getline"); 2359 for (i = 0; i < nfiles; i++) 2360 if (files[i].fname && strcmp(s, files[i].fname) == 0 && 2361 (a == files[i].mode || (a==APPEND && files[i].mode==GT) || 2362 a == FFLUSH)) { 2363 if (pnewflag) 2364 *pnewflag = false; 2365 return files[i].fp; 2366 } 2367 if (a == FFLUSH) /* didn't find it, so don't create it! */ 2368 return NULL; 2369 2370 for (i = 0; i < nfiles; i++) 2371 if (files[i].fp == NULL) 2372 break; 2373 if (i >= nfiles) { 2374 struct files *nf; 2375 size_t nnf = nfiles + FOPEN_MAX; 2376 nf = (struct files *) realloc(files, nnf * sizeof(*nf)); 2377 if (nf == NULL) 2378 FATAL("cannot grow files for %s and %zu files", s, nnf); 2379 memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf)); 2380 nfiles = nnf; 2381 files = nf; 2382 } 2383 fflush(stdout); /* force a semblance of order */ 2384 m = a; 2385 if (a == GT) { 2386 fp = fopen(s, "w"); 2387 } else if (a == APPEND) { 2388 fp = fopen(s, "a"); 2389 m = GT; /* so can mix > and >> */ 2390 } else if (a == '|') { /* output pipe */ 2391 fp = popen(s, "w"); 2392 } else if (a == LE) { /* input pipe */ 2393 fp = popen(s, "r"); 2394 } else if (a == LT) { /* getline <file */ 2395 fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r"); /* "-" is stdin */ 2396 } else /* can't happen */ 2397 FATAL("illegal redirection %d", a); 2398 if (fp != NULL) { 2399 files[i].fname = tostring(s); 2400 files[i].fp = fp; 2401 files[i].mode = m; 2402 if (pnewflag) 2403 *pnewflag = true; 2404 if (fp != stdin && fp != stdout && fp != stderr) 2405 (void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC); 2406 } 2407 return fp; 2408 } 2409 2410 const char *filename(FILE *fp) 2411 { 2412 size_t i; 2413 2414 for (i = 0; i < nfiles; i++) 2415 if (fp == files[i].fp) 2416 return files[i].fname; 2417 return "???"; 2418 } 2419 2420 Cell *closefile(Node **a, int n) 2421 { 2422 Cell *x; 2423 size_t i; 2424 bool stat; 2425 2426 x = execute(a[0]); 2427 getsval(x); 2428 stat = true; 2429 for (i = 0; i < nfiles; i++) { 2430 if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0) 2431 continue; 2432 if (files[i].mode == GT || files[i].mode == '|') 2433 fflush(files[i].fp); 2434 if (ferror(files[i].fp)) { 2435 if ((files[i].mode == GT && files[i].fp != stderr) 2436 || files[i].mode == '|') 2437 FATAL("write error on %s", files[i].fname); 2438 else 2439 WARNING("i/o error occurred on %s", files[i].fname); 2440 } 2441 if (files[i].fp == stdin || files[i].fp == stdout || 2442 files[i].fp == stderr) 2443 stat = freopen("/dev/null", "r+", files[i].fp) == NULL; 2444 else if (files[i].mode == '|' || files[i].mode == LE) 2445 stat = pclose(files[i].fp) == -1; 2446 else 2447 stat = fclose(files[i].fp) == EOF; 2448 if (stat) 2449 WARNING("i/o error occurred closing %s", files[i].fname); 2450 xfree(files[i].fname); 2451 files[i].fname = NULL; /* watch out for ref thru this */ 2452 files[i].fp = NULL; 2453 break; 2454 } 2455 tempfree(x); 2456 x = gettemp(); 2457 setfval(x, (Awkfloat) (stat ? -1 : 0)); 2458 return(x); 2459 } 2460 2461 void closeall(void) 2462 { 2463 size_t i; 2464 bool stat = false; 2465 2466 for (i = 0; i < nfiles; i++) { 2467 if (! files[i].fp) 2468 continue; 2469 if (files[i].mode == GT || files[i].mode == '|') 2470 fflush(files[i].fp); 2471 if (ferror(files[i].fp)) { 2472 if ((files[i].mode == GT && files[i].fp != stderr) 2473 || files[i].mode == '|') 2474 FATAL("write error on %s", files[i].fname); 2475 else 2476 WARNING("i/o error occurred on %s", files[i].fname); 2477 } 2478 if (files[i].fp == stdin || files[i].fp == stdout || 2479 files[i].fp == stderr) 2480 continue; 2481 if (files[i].mode == '|' || files[i].mode == LE) 2482 stat = pclose(files[i].fp) == -1; 2483 else 2484 stat = fclose(files[i].fp) == EOF; 2485 if (stat) 2486 WARNING("i/o error occurred while closing %s", files[i].fname); 2487 } 2488 } 2489 2490 static void flush_all(void) 2491 { 2492 size_t i; 2493 2494 for (i = 0; i < nfiles; i++) 2495 if (files[i].fp) 2496 fflush(files[i].fp); 2497 } 2498 2499 void backsub(char **pb_ptr, const char **sptr_ptr); 2500 2501 Cell *dosub(Node **a, int subop) /* sub and gsub */ 2502 { 2503 fa *pfa; 2504 int tempstat = 0; 2505 char *repl; 2506 Cell *x; 2507 2508 char *buf = NULL; 2509 char *pb = NULL; 2510 int bufsz = recsize; 2511 2512 const char *r, *s; 2513 const char *start; 2514 const char *noempty = NULL; /* empty match disallowed here */ 2515 size_t m = 0; /* match count */ 2516 size_t whichm; /* which match to select, 0 = global */ 2517 int mtype; /* match type */ 2518 2519 if (a[0] == NULL) { /* 0 => a[1] is already-compiled regexpr */ 2520 pfa = (fa *) a[1]; 2521 } else { 2522 x = execute(a[1]); 2523 pfa = makedfa(getsval(x), 1); 2524 tempfree(x); 2525 } 2526 2527 x = execute(a[2]); /* replacement string */ 2528 repl = tostring(getsval(x)); 2529 tempfree(x); 2530 2531 switch (subop) { 2532 case SUB: 2533 whichm = 1; 2534 x = execute(a[3]); /* source string */ 2535 break; 2536 case GSUB: 2537 whichm = 0; 2538 x = execute(a[3]); /* source string */ 2539 break; 2540 default: 2541 FATAL("dosub: unrecognized subop: %d", subop); 2542 } 2543 2544 start = getsval(x); 2545 while (pmatch(pfa, start)) { 2546 if (buf == NULL) { 2547 if ((pb = buf = (char *) malloc(bufsz)) == NULL) 2548 FATAL("out of memory in dosub"); 2549 tempstat = pfa->initstat; 2550 pfa->initstat = 2; 2551 } 2552 2553 /* match types */ 2554 #define MT_IGNORE 0 /* unselected or invalid */ 2555 #define MT_INSERT 1 /* selected, empty */ 2556 #define MT_REPLACE 2 /* selected, not empty */ 2557 2558 /* an empty match just after replacement is invalid */ 2559 2560 if (patbeg == noempty && patlen == 0) { 2561 mtype = MT_IGNORE; /* invalid, not counted */ 2562 } else if (whichm == ++m || whichm == 0) { 2563 mtype = patlen ? MT_REPLACE : MT_INSERT; 2564 } else { 2565 mtype = MT_IGNORE; /* unselected, but counted */ 2566 } 2567 2568 /* leading text: */ 2569 if (patbeg > start) { 2570 adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - start), 2571 recsize, &pb, "dosub"); 2572 s = start; 2573 while (s < patbeg) 2574 *pb++ = *s++; 2575 } 2576 2577 if (mtype == MT_IGNORE) 2578 goto matching_text; /* skip replacement text */ 2579 2580 r = repl; 2581 while (*r != 0) { 2582 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "dosub"); 2583 if (*r == '\\') { 2584 backsub(&pb, &r); 2585 } else if (*r == '&') { 2586 r++; 2587 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, 2588 &pb, "dosub"); 2589 for (s = patbeg; s < patbeg+patlen; ) 2590 *pb++ = *s++; 2591 } else { 2592 *pb++ = *r++; 2593 } 2594 } 2595 2596 matching_text: 2597 if (mtype == MT_REPLACE || *patbeg == '\0') 2598 goto next_search; /* skip matching text */ 2599 2600 if (patlen == 0) 2601 patlen = u8_nextlen(patbeg); 2602 adjbuf(&buf, &bufsz, (pb-buf) + patlen, recsize, &pb, "dosub"); 2603 s = patbeg; 2604 while (s < patbeg + patlen) 2605 *pb++ = *s++; 2606 2607 next_search: 2608 start = patbeg + patlen; 2609 if (m == whichm || *patbeg == '\0') 2610 break; 2611 if (mtype == MT_REPLACE) 2612 noempty = start; 2613 2614 #undef MT_IGNORE 2615 #undef MT_INSERT 2616 #undef MT_REPLACE 2617 } 2618 2619 xfree(repl); 2620 2621 if (buf != NULL) { 2622 pfa->initstat = tempstat; 2623 2624 /* trailing text */ 2625 adjbuf(&buf, &bufsz, 1+strlen(start)+pb-buf, 0, &pb, "dosub"); 2626 while ((*pb++ = *start++) != '\0') 2627 ; 2628 2629 setsval(x, buf); 2630 free(buf); 2631 } 2632 2633 tempfree(x); 2634 x = gettemp(); 2635 x->tval = NUM; 2636 x->fval = m; 2637 return x; 2638 } 2639 2640 Cell *gensub(Node **a, int nnn) /* global selective substitute */ 2641 /* XXX incomplete - doesn't support backreferences \0 ... \9 */ 2642 { 2643 Cell *x, *y, *res, *h; 2644 char *rptr; 2645 const char *sptr; 2646 char *buf, *pb; 2647 const char *t, *q; 2648 fa *pfa; 2649 int mflag, tempstat, num, whichm; 2650 int bufsz = recsize; 2651 2652 if ((buf = malloc(bufsz)) == NULL) 2653 FATAL("out of memory in gensub"); 2654 mflag = 0; /* if mflag == 0, can replace empty string */ 2655 num = 0; 2656 x = execute(a[4]); /* source string */ 2657 t = getsval(x); 2658 res = copycell(x); /* target string - initially copy of source */ 2659 res->csub = CTEMP; /* result values are temporary */ 2660 if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */ 2661 pfa = (fa *) a[1]; /* regular expression */ 2662 else { 2663 y = execute(a[1]); 2664 pfa = makedfa(getsval(y), 1); 2665 tempfree(y); 2666 } 2667 y = execute(a[2]); /* replacement string */ 2668 h = execute(a[3]); /* which matches should be replaced */ 2669 sptr = getsval(h); 2670 if (sptr[0] == 'g' || sptr[0] == 'G') 2671 whichm = -1; 2672 else { 2673 /* 2674 * The specified number is index of replacement, starting 2675 * from 1. GNU awk treats index lower than 0 same as 2676 * 1, we do same for compatibility. 2677 */ 2678 whichm = (int) getfval(h) - 1; 2679 if (whichm < 0) 2680 whichm = 0; 2681 } 2682 tempfree(h); 2683 2684 if (pmatch(pfa, t)) { 2685 char *sl; 2686 2687 tempstat = pfa->initstat; 2688 pfa->initstat = 2; 2689 pb = buf; 2690 rptr = getsval(y); 2691 /* 2692 * XXX if there are any backreferences in subst string, 2693 * complain now. 2694 */ 2695 for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) { 2696 if (strchr("0123456789", sl[1])) { 2697 FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr); 2698 } 2699 } 2700 2701 do { 2702 if (whichm >= 0 && whichm != num) { 2703 num++; 2704 adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub"); 2705 2706 /* copy the part of string up to and including 2707 * match to output buffer */ 2708 while (t < patbeg + patlen) 2709 *pb++ = *t++; 2710 continue; 2711 } 2712 2713 if (patlen == 0 && *patbeg != 0) { /* matched empty string */ 2714 if (mflag == 0) { /* can replace empty */ 2715 num++; 2716 sptr = rptr; 2717 while (*sptr != 0) { 2718 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); 2719 if (*sptr == '\\') { 2720 backsub(&pb, &sptr); 2721 } else if (*sptr == '&') { 2722 sptr++; 2723 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); 2724 for (q = patbeg; q < patbeg+patlen; ) 2725 *pb++ = *q++; 2726 } else 2727 *pb++ = *sptr++; 2728 } 2729 } 2730 if (*t == 0) /* at end */ 2731 goto done; 2732 adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub"); 2733 *pb++ = *t++; 2734 if (pb > buf + bufsz) /* BUG: not sure of this test */ 2735 FATAL("gensub result0 %.30s too big; can't happen", buf); 2736 mflag = 0; 2737 } 2738 else { /* matched nonempty string */ 2739 num++; 2740 sptr = t; 2741 adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub"); 2742 while (sptr < patbeg) 2743 *pb++ = *sptr++; 2744 sptr = rptr; 2745 while (*sptr != 0) { 2746 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); 2747 if (*sptr == '\\') { 2748 backsub(&pb, &sptr); 2749 } else if (*sptr == '&') { 2750 sptr++; 2751 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); 2752 for (q = patbeg; q < patbeg+patlen; ) 2753 *pb++ = *q++; 2754 } else 2755 *pb++ = *sptr++; 2756 } 2757 t = patbeg + patlen; 2758 if (patlen == 0 || *t == 0 || *(t-1) == 0) 2759 goto done; 2760 if (pb > buf + bufsz) 2761 FATAL("gensub result1 %.30s too big; can't happen", buf); 2762 mflag = 1; 2763 } 2764 } while (pmatch(pfa,t)); 2765 sptr = t; 2766 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub"); 2767 while ((*pb++ = *sptr++) != 0) 2768 ; 2769 done: if (pb > buf + bufsz) 2770 FATAL("gensub result2 %.30s too big; can't happen", buf); 2771 *pb = '\0'; 2772 setsval(res, buf); 2773 pfa->initstat = tempstat; 2774 } 2775 tempfree(x); 2776 tempfree(y); 2777 free(buf); 2778 return(res); 2779 } 2780 2781 void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */ 2782 { /* sptr[0] == '\\' */ 2783 char *pb = *pb_ptr; 2784 const char *sptr = *sptr_ptr; 2785 static bool first = true; 2786 static bool do_posix = false; 2787 2788 if (first) { 2789 first = false; 2790 do_posix = (getenv("POSIXLY_CORRECT") != NULL); 2791 } 2792 2793 if (sptr[1] == '\\') { 2794 if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */ 2795 *pb++ = '\\'; 2796 *pb++ = '&'; 2797 sptr += 4; 2798 } else if (sptr[2] == '&') { /* \\& -> \ + matched */ 2799 *pb++ = '\\'; 2800 sptr += 2; 2801 } else if (do_posix) { /* \\x -> \x */ 2802 sptr++; 2803 *pb++ = *sptr++; 2804 } else { /* \\x -> \\x */ 2805 *pb++ = *sptr++; 2806 *pb++ = *sptr++; 2807 } 2808 } else if (sptr[1] == '&') { /* literal & */ 2809 sptr++; 2810 *pb++ = *sptr++; 2811 } else /* literal \ */ 2812 *pb++ = *sptr++; 2813 2814 *pb_ptr = pb; 2815 *sptr_ptr = sptr; 2816 } 2817 2818 static char *wide_char_to_byte_str(int rune, size_t *outlen) 2819 { 2820 static char buf[5]; 2821 int len; 2822 2823 if (rune < 0 || rune > 0x10FFFF) 2824 return NULL; 2825 2826 memset(buf, 0, sizeof(buf)); 2827 2828 len = 0; 2829 if (rune <= 0x0000007F) { 2830 buf[len++] = rune; 2831 } else if (rune <= 0x000007FF) { 2832 // 110xxxxx 10xxxxxx 2833 buf[len++] = 0xC0 | (rune >> 6); 2834 buf[len++] = 0x80 | (rune & 0x3F); 2835 } else if (rune <= 0x0000FFFF) { 2836 // 1110xxxx 10xxxxxx 10xxxxxx 2837 buf[len++] = 0xE0 | (rune >> 12); 2838 buf[len++] = 0x80 | ((rune >> 6) & 0x3F); 2839 buf[len++] = 0x80 | (rune & 0x3F); 2840 2841 } else { 2842 // 0x00010000 - 0x10FFFF 2843 // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 2844 buf[len++] = 0xF0 | (rune >> 18); 2845 buf[len++] = 0x80 | ((rune >> 12) & 0x3F); 2846 buf[len++] = 0x80 | ((rune >> 6) & 0x3F); 2847 buf[len++] = 0x80 | (rune & 0x3F); 2848 } 2849 2850 *outlen = len; 2851 buf[len++] = '\0'; 2852 2853 return buf; 2854 } 2855