1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
4
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
14
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
24
25 #define DEBUG
26 #include <stdio.h>
27 #include <ctype.h>
28 #include <errno.h>
29 #include <wctype.h>
30 #include <fcntl.h>
31 #include <setjmp.h>
32 #include <limits.h>
33 #include <math.h>
34 #include <string.h>
35 #include <stdlib.h>
36 #include <time.h>
37 #include <sys/types.h>
38 #include <sys/stat.h>
39 #include <sys/wait.h>
40 #include "awk.h"
41 #include "awkgram.tab.h"
42
43
44 static void stdinit(void);
45 static void flush_all(void);
46 static char *wide_char_to_byte_str(int rune, size_t *outlen);
47
48 #if 1
49 #define tempfree(x) do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0)
50 #else
tempfree(Cell * p)51 void tempfree(Cell *p) {
52 if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) {
53 WARNING("bad csub %d in Cell %d %s",
54 p->csub, p->ctype, p->sval);
55 }
56 if (istemp(p))
57 tfree(p);
58 }
59 #endif
60
61 /* do we really need these? */
62 /* #ifdef _NFILE */
63 /* #ifndef FOPEN_MAX */
64 /* #define FOPEN_MAX _NFILE */
65 /* #endif */
66 /* #endif */
67 /* */
68 /* #ifndef FOPEN_MAX */
69 /* #define FOPEN_MAX 40 */ /* max number of open files */
70 /* #endif */
71 /* */
72 /* #ifndef RAND_MAX */
73 /* #define RAND_MAX 32767 */ /* all that ansi guarantees */
74 /* #endif */
75
76 jmp_buf env;
77 extern int pairstack[];
78 extern Awkfloat srand_seed;
79
80 Node *winner = NULL; /* root of parse tree */
81 Cell *tmps; /* free temporary cells for execution */
82
83 static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL, NULL };
84 Cell *True = &truecell;
85 static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL, NULL };
86 Cell *False = &falsecell;
87 static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL, NULL };
88 Cell *jbreak = &breakcell;
89 static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL, NULL };
90 Cell *jcont = &contcell;
91 static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL, NULL };
92 Cell *jnext = &nextcell;
93 static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL, NULL };
94 Cell *jnextfile = &nextfilecell;
95 static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL, NULL };
96 Cell *jexit = &exitcell;
97 static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL, NULL };
98 Cell *jret = &retcell;
99 static Cell tempcell ={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL };
100
101 Node *curnode = NULL; /* the node being executed, for debugging */
102
103 /* buffer memory management */
adjbuf(char ** pbuf,int * psiz,int minlen,int quantum,char ** pbptr,const char * whatrtn)104 int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr,
105 const char *whatrtn)
106 /* pbuf: address of pointer to buffer being managed
107 * psiz: address of buffer size variable
108 * minlen: minimum length of buffer needed
109 * quantum: buffer size quantum
110 * pbptr: address of movable pointer into buffer, or 0 if none
111 * whatrtn: name of the calling routine if failure should cause fatal error
112 *
113 * return 0 for realloc failure, !=0 for success
114 */
115 {
116 if (minlen > *psiz) {
117 char *tbuf;
118 int rminlen = quantum ? minlen % quantum : 0;
119 int boff = pbptr ? *pbptr - *pbuf : 0;
120 /* round up to next multiple of quantum */
121 if (rminlen)
122 minlen += quantum - rminlen;
123 tbuf = (char *) realloc(*pbuf, minlen);
124 DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void*)*pbuf, (void*)tbuf);
125 if (tbuf == NULL) {
126 if (whatrtn)
127 FATAL("out of memory in %s", whatrtn);
128 return 0;
129 }
130 *pbuf = tbuf;
131 *psiz = minlen;
132 if (pbptr)
133 *pbptr = tbuf + boff;
134 }
135 return 1;
136 }
137
run(Node * a)138 void run(Node *a) /* execution of parse tree starts here */
139 {
140
141 stdinit();
142 execute(a);
143 closeall();
144 }
145
execute(Node * u)146 Cell *execute(Node *u) /* execute a node of the parse tree */
147 {
148 Cell *(*proc)(Node **, int);
149 Cell *x;
150 Node *a;
151
152 if (u == NULL)
153 return(True);
154 for (a = u; ; a = a->nnext) {
155 curnode = a;
156 if (isvalue(a)) {
157 x = (Cell *) (a->narg[0]);
158 if (isfld(x) && !donefld)
159 fldbld();
160 else if (isrec(x) && !donerec)
161 recbld();
162 return(x);
163 }
164 if (notlegal(a->nobj)) /* probably a Cell* but too risky to print */
165 FATAL("illegal statement");
166 proc = proctab[a->nobj-FIRSTTOKEN];
167 x = (*proc)(a->narg, a->nobj);
168 if (isfld(x) && !donefld)
169 fldbld();
170 else if (isrec(x) && !donerec)
171 recbld();
172 if (isexpr(a))
173 return(x);
174 if (isjump(x))
175 return(x);
176 if (a->nnext == NULL)
177 return(x);
178 tempfree(x);
179 }
180 }
181
182
program(Node ** a,int n)183 Cell *program(Node **a, int n) /* execute an awk program */
184 { /* a[0] = BEGIN, a[1] = body, a[2] = END */
185 Cell *x;
186
187 if (setjmp(env) != 0)
188 goto ex;
189 if (a[0]) { /* BEGIN */
190 x = execute(a[0]);
191 if (isexit(x))
192 return(True);
193 if (isjump(x))
194 FATAL("illegal break, continue, next or nextfile from BEGIN");
195 tempfree(x);
196 }
197 if (a[1] || a[2])
198 while (getrec(&record, &recsize, true) > 0) {
199 x = execute(a[1]);
200 if (isexit(x))
201 break;
202 tempfree(x);
203 }
204 ex:
205 if (setjmp(env) != 0) /* handles exit within END */
206 goto ex1;
207 if (a[2]) { /* END */
208 x = execute(a[2]);
209 if (isbreak(x) || isnext(x) || iscont(x))
210 FATAL("illegal break, continue, next or nextfile from END");
211 tempfree(x);
212 }
213 ex1:
214 return(True);
215 }
216
217 struct Frame { /* stack frame for awk function calls */
218 int nargs; /* number of arguments in this call */
219 Cell *fcncell; /* pointer to Cell for function */
220 Cell **args; /* pointer to array of arguments after execute */
221 Cell *retval; /* return value */
222 };
223
224 #define NARGS 50 /* max args in a call */
225
226 struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */
227 int nframe = 0; /* number of frames allocated */
228 struct Frame *frp = NULL; /* frame pointer. bottom level unused */
229
call(Node ** a,int n)230 Cell *call(Node **a, int n) /* function call. very kludgy and fragile */
231 {
232 static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL };
233 int i, ncall, ndef;
234 int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */
235 Node *x;
236 Cell *args[NARGS], *oargs[NARGS]; /* BUG: fixed size arrays */
237 Cell *y, *z, *fcn;
238 char *s;
239
240 fcn = execute(a[0]); /* the function itself */
241 s = fcn->nval;
242 if (!isfcn(fcn))
243 FATAL("calling undefined function %s", s);
244 if (frame == NULL) {
245 frp = frame = (struct Frame *) calloc(nframe += 100, sizeof(*frame));
246 if (frame == NULL)
247 FATAL("out of space for stack frames calling %s", s);
248 }
249 for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */
250 ncall++;
251 ndef = (int) fcn->fval; /* args in defn */
252 DPRINTF("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame));
253 if (ncall > ndef)
254 WARNING("function %s called with %d args, uses only %d",
255 s, ncall, ndef);
256 if (ncall + ndef > NARGS)
257 FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS);
258 for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */
259 DPRINTF("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame));
260 y = execute(x);
261 oargs[i] = y;
262 DPRINTF("args[%d]: %s %f <%s>, t=%o\n",
263 i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval);
264 if (isfcn(y))
265 FATAL("can't use function %s as argument in %s", y->nval, s);
266 if (isarr(y))
267 args[i] = y; /* arrays by ref */
268 else
269 args[i] = copycell(y);
270 tempfree(y);
271 }
272 for ( ; i < ndef; i++) { /* add null args for ones not provided */
273 args[i] = gettemp();
274 *args[i] = newcopycell;
275 }
276 frp++; /* now ok to up frame */
277 if (frp >= frame + nframe) {
278 int dfp = frp - frame; /* old index */
279 frame = (struct Frame *) realloc(frame, (nframe += 100) * sizeof(*frame));
280 if (frame == NULL)
281 FATAL("out of space for stack frames in %s", s);
282 frp = frame + dfp;
283 }
284 frp->fcncell = fcn;
285 frp->args = args;
286 frp->nargs = ndef; /* number defined with (excess are locals) */
287 frp->retval = gettemp();
288
289 DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame));
290 y = execute((Node *)(fcn->sval)); /* execute body */
291 DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame));
292
293 for (i = 0; i < ndef; i++) {
294 Cell *t = frp->args[i];
295 if (isarr(t)) {
296 if (t->csub == CCOPY) {
297 if (i >= ncall) {
298 freesymtab(t);
299 t->csub = CTEMP;
300 tempfree(t);
301 } else {
302 oargs[i]->tval = t->tval;
303 oargs[i]->tval &= ~(STR|NUM|DONTFREE);
304 oargs[i]->sval = t->sval;
305 tempfree(t);
306 }
307 }
308 } else if (t != y) { /* kludge to prevent freeing twice */
309 t->csub = CTEMP;
310 tempfree(t);
311 } else if (t == y && t->csub == CCOPY) {
312 t->csub = CTEMP;
313 tempfree(t);
314 freed = 1;
315 }
316 }
317 tempfree(fcn);
318 if (isexit(y) || isnext(y))
319 return y;
320 if (freed == 0) {
321 tempfree(y); /* don't free twice! */
322 }
323 z = frp->retval; /* return value */
324 DPRINTF("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval);
325 frp--;
326 return(z);
327 }
328
copycell(Cell * x)329 Cell *copycell(Cell *x) /* make a copy of a cell in a temp */
330 {
331 Cell *y;
332
333 /* copy is not constant or field */
334
335 y = gettemp();
336 y->tval = x->tval & ~(CON|FLD|REC);
337 y->csub = CCOPY; /* prevents freeing until call is over */
338 y->nval = x->nval; /* BUG? */
339 if (isstr(x) /* || x->ctype == OCELL */) {
340 y->sval = tostring(x->sval);
341 y->tval &= ~DONTFREE;
342 } else
343 y->tval |= DONTFREE;
344 y->fval = x->fval;
345 return y;
346 }
347
arg(Node ** a,int n)348 Cell *arg(Node **a, int n) /* nth argument of a function */
349 {
350
351 n = ptoi(a[0]); /* argument number, counting from 0 */
352 DPRINTF("arg(%d), frp->nargs=%d\n", n, frp->nargs);
353 if (n+1 > frp->nargs)
354 FATAL("argument #%d of function %s was not supplied",
355 n+1, frp->fcncell->nval);
356 return frp->args[n];
357 }
358
jump(Node ** a,int n)359 Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */
360 {
361 Cell *y;
362
363 switch (n) {
364 case EXIT:
365 if (a[0] != NULL) {
366 y = execute(a[0]);
367 errorflag = (int) getfval(y);
368 tempfree(y);
369 }
370 longjmp(env, 1);
371 case RETURN:
372 if (a[0] != NULL) {
373 y = execute(a[0]);
374 if ((y->tval & (STR|NUM)) == (STR|NUM)) {
375 setsval(frp->retval, getsval(y));
376 frp->retval->fval = getfval(y);
377 frp->retval->tval |= NUM;
378 }
379 else if (y->tval & STR)
380 setsval(frp->retval, getsval(y));
381 else if (y->tval & NUM)
382 setfval(frp->retval, getfval(y));
383 else /* can't happen */
384 FATAL("bad type variable %d", y->tval);
385 tempfree(y);
386 }
387 return(jret);
388 case NEXT:
389 return(jnext);
390 case NEXTFILE:
391 nextfile();
392 return(jnextfile);
393 case BREAK:
394 return(jbreak);
395 case CONTINUE:
396 return(jcont);
397 default: /* can't happen */
398 FATAL("illegal jump type %d", n);
399 }
400 return 0; /* not reached */
401 }
402
awkgetline(Node ** a,int n)403 Cell *awkgetline(Node **a, int n) /* get next line from specific input */
404 { /* a[0] is variable, a[1] is operator, a[2] is filename */
405 Cell *r, *x;
406 extern Cell **fldtab;
407 FILE *fp;
408 char *buf;
409 int bufsize = recsize;
410 int mode;
411 bool newflag;
412 double result;
413
414 if ((buf = (char *) malloc(bufsize)) == NULL)
415 FATAL("out of memory in getline");
416
417 fflush(stdout); /* in case someone is waiting for a prompt */
418 r = gettemp();
419 if (a[1] != NULL) { /* getline < file */
420 x = execute(a[2]); /* filename */
421 mode = ptoi(a[1]);
422 if (mode == '|') /* input pipe */
423 mode = LE; /* arbitrary flag */
424 fp = openfile(mode, getsval(x), &newflag);
425 tempfree(x);
426 if (fp == NULL)
427 n = -1;
428 else
429 n = readrec(&buf, &bufsize, fp, newflag);
430 if (n <= 0) {
431 ;
432 } else if (a[0] != NULL) { /* getline var <file */
433 x = execute(a[0]);
434 setsval(x, buf);
435 if (is_number(x->sval, & result)) {
436 x->fval = result;
437 x->tval |= NUM;
438 }
439 tempfree(x);
440 } else { /* getline <file */
441 setsval(fldtab[0], buf);
442 if (is_number(fldtab[0]->sval, & result)) {
443 fldtab[0]->fval = result;
444 fldtab[0]->tval |= NUM;
445 }
446 }
447 } else { /* bare getline; use current input */
448 if (a[0] == NULL) /* getline */
449 n = getrec(&record, &recsize, true);
450 else { /* getline var */
451 n = getrec(&buf, &bufsize, false);
452 if (n > 0) {
453 x = execute(a[0]);
454 setsval(x, buf);
455 if (is_number(x->sval, & result)) {
456 x->fval = result;
457 x->tval |= NUM;
458 }
459 tempfree(x);
460 }
461 }
462 }
463 setfval(r, (Awkfloat) n);
464 free(buf);
465 return r;
466 }
467
getnf(Node ** a,int n)468 Cell *getnf(Node **a, int n) /* get NF */
469 {
470 if (!donefld)
471 fldbld();
472 return (Cell *) a[0];
473 }
474
475 static char *
makearraystring(Node * p,const char * func)476 makearraystring(Node *p, const char *func)
477 {
478 char *buf;
479 int bufsz = recsize;
480 size_t blen;
481
482 if ((buf = (char *) malloc(bufsz)) == NULL) {
483 FATAL("%s: out of memory", func);
484 }
485
486 blen = 0;
487 buf[blen] = '\0';
488
489 for (; p; p = p->nnext) {
490 Cell *x = execute(p); /* expr */
491 char *s = getsval(x);
492 size_t seplen = strlen(getsval(subseploc));
493 size_t nsub = p->nnext ? seplen : 0;
494 size_t slen = strlen(s);
495 size_t tlen = blen + slen + nsub;
496
497 if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) {
498 FATAL("%s: out of memory %s[%s...]",
499 func, x->nval, buf);
500 }
501 memcpy(buf + blen, s, slen);
502 if (nsub) {
503 memcpy(buf + blen + slen, *SUBSEP, nsub);
504 }
505 buf[tlen] = '\0';
506 blen = tlen;
507 tempfree(x);
508 }
509 return buf;
510 }
511
array(Node ** a,int n)512 Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
513 {
514 Cell *x, *z;
515 char *buf;
516
517 x = execute(a[0]); /* Cell* for symbol table */
518 buf = makearraystring(a[1], __func__);
519 if (!isarr(x)) {
520 DPRINTF("making %s into an array\n", NN(x->nval));
521 if (freeable(x))
522 xfree(x->sval);
523 x->tval &= ~(STR|NUM|DONTFREE);
524 x->tval |= ARR;
525 x->sval = (char *) makesymtab(NSYMTAB);
526 }
527 z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval);
528 z->ctype = OCELL;
529 z->csub = CVAR;
530 tempfree(x);
531 free(buf);
532 return(z);
533 }
534
awkdelete(Node ** a,int n)535 Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
536 {
537 Cell *x;
538
539 x = execute(a[0]); /* Cell* for symbol table */
540 if (x == symtabloc) {
541 FATAL("cannot delete SYMTAB or its elements");
542 }
543 if (!isarr(x))
544 return True;
545 if (a[1] == NULL) { /* delete the elements, not the table */
546 freesymtab(x);
547 x->tval &= ~STR;
548 x->tval |= ARR;
549 x->sval = (char *) makesymtab(NSYMTAB);
550 } else {
551 char *buf = makearraystring(a[1], __func__);
552 freeelem(x, buf);
553 free(buf);
554 }
555 tempfree(x);
556 return True;
557 }
558
intest(Node ** a,int n)559 Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */
560 {
561 Cell *ap, *k;
562 char *buf;
563
564 ap = execute(a[1]); /* array name */
565 if (!isarr(ap)) {
566 DPRINTF("making %s into an array\n", ap->nval);
567 if (freeable(ap))
568 xfree(ap->sval);
569 ap->tval &= ~(STR|NUM|DONTFREE);
570 ap->tval |= ARR;
571 ap->sval = (char *) makesymtab(NSYMTAB);
572 }
573 buf = makearraystring(a[0], __func__);
574 k = lookup(buf, (Array *) ap->sval);
575 tempfree(ap);
576 free(buf);
577 if (k == NULL)
578 return(False);
579 else
580 return(True);
581 }
582
583
584 /* ======== utf-8 code ========== */
585
586 /*
587 * Awk strings can contain ascii, random 8-bit items (eg Latin-1),
588 * or utf-8. u8_isutf tests whether a string starts with a valid
589 * utf-8 sequence, and returns 0 if not (e.g., high bit set).
590 * u8_nextlen returns length of next valid sequence, which is
591 * 1 for ascii, 2..4 for utf-8, or 1 for high bit non-utf.
592 * u8_strlen returns length of string in valid utf-8 sequences
593 * and/or high-bit bytes. Conversion functions go between byte
594 * number and character number.
595 *
596 * In theory, this behaves the same as before for non-utf8 bytes.
597 *
598 * Limited checking! This is a potential security hole.
599 */
600
601 /* is s the beginning of a valid utf-8 string? */
602 /* return length 1..4 if yes, 0 if no */
u8_isutf(const char * s)603 int u8_isutf(const char *s)
604 {
605 int n, ret;
606 unsigned char c;
607
608 c = s[0];
609 if (c < 128 || awk_mb_cur_max == 1)
610 return 1; /* what if it's 0? */
611
612 n = strlen(s);
613 if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) {
614 ret = 2; /* 110xxxxx 10xxxxxx */
615 } else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80
616 && (s[2] & 0xC0) == 0x80) {
617 ret = 3; /* 1110xxxx 10xxxxxx 10xxxxxx */
618 } else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80
619 && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) {
620 ret = 4; /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
621 } else {
622 ret = 0;
623 }
624 return ret;
625 }
626
627 /* Convert (prefix of) utf8 string to utf-32 rune. */
628 /* Sets *rune to the value, returns the length. */
629 /* No error checking: watch out. */
u8_rune(int * rune,const char * s)630 int u8_rune(int *rune, const char *s)
631 {
632 int n, ret;
633 unsigned char c;
634
635 c = s[0];
636 if (c < 128 || awk_mb_cur_max == 1) {
637 *rune = c;
638 return 1;
639 }
640
641 n = strlen(s);
642 if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) {
643 *rune = ((c & 0x1F) << 6) | (s[1] & 0x3F); /* 110xxxxx 10xxxxxx */
644 ret = 2;
645 } else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80
646 && (s[2] & 0xC0) == 0x80) {
647 *rune = ((c & 0xF) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F);
648 /* 1110xxxx 10xxxxxx 10xxxxxx */
649 ret = 3;
650 } else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80
651 && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) {
652 *rune = ((c & 0x7) << 18) | ((s[1] & 0x3F) << 12) | ((s[2] & 0x3F) << 6) | (s[3] & 0x3F);
653 /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
654 ret = 4;
655 } else {
656 *rune = c;
657 ret = 1;
658 }
659 return ret; /* returns one byte if sequence doesn't look like utf */
660 }
661
662 /* return length of next sequence: 1 for ascii or random, 2..4 for valid utf8 */
u8_nextlen(const char * s)663 int u8_nextlen(const char *s)
664 {
665 int len;
666
667 len = u8_isutf(s);
668 if (len == 0)
669 len = 1;
670 return len;
671 }
672
673 /* return number of utf characters or single non-utf bytes */
u8_strlen(const char * s)674 int u8_strlen(const char *s)
675 {
676 int i, len, n, totlen;
677 unsigned char c;
678
679 n = strlen(s);
680 totlen = 0;
681 for (i = 0; i < n; i += len) {
682 c = s[i];
683 if (c < 128 || awk_mb_cur_max == 1) {
684 len = 1;
685 } else {
686 len = u8_nextlen(&s[i]);
687 }
688 totlen++;
689 if (i > n)
690 FATAL("bad utf count [%s] n=%d i=%d\n", s, n, i);
691 }
692 return totlen;
693 }
694
695 /* convert utf-8 char number in a string to its byte offset */
u8_char2byte(const char * s,int charnum)696 int u8_char2byte(const char *s, int charnum)
697 {
698 int n;
699 int bytenum = 0;
700
701 while (charnum > 0) {
702 n = u8_nextlen(s);
703 s += n;
704 bytenum += n;
705 charnum--;
706 }
707 return bytenum;
708 }
709
710 /* convert byte offset in s to utf-8 char number that starts there */
u8_byte2char(const char * s,int bytenum)711 int u8_byte2char(const char *s, int bytenum)
712 {
713 int i, len, b;
714 int charnum = 0; /* BUG: what origin? */
715 /* should be 0 to match start==0 which means no match */
716
717 b = strlen(s);
718 if (bytenum > b) {
719 return -1; /* ??? */
720 }
721 for (i = 0; i <= bytenum; i += len) {
722 len = u8_nextlen(s+i);
723 charnum++;
724 }
725 return charnum;
726 }
727
728 /* runetochar() adapted from rune.c in the Plan 9 distribution */
729
730 enum
731 {
732 Runeerror = 128, /* from somewhere else */
733 Runemax = 0x10FFFF,
734
735 Bit1 = 7,
736 Bitx = 6,
737 Bit2 = 5,
738 Bit3 = 4,
739 Bit4 = 3,
740 Bit5 = 2,
741
742 T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */
743 Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */
744 T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */
745 T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */
746 T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
747 T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */
748
749 Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0000 0000 0111 1111 */
750 Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0000 0000 0111 1111 1111 */
751 Rune3 = (1<<(Bit3+2*Bitx))-1, /* 0000 0000 1111 1111 1111 1111 */
752 Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0011 1111 1111 1111 1111 1111 */
753
754 Maskx = (1<<Bitx)-1, /* 0011 1111 */
755 Testx = Maskx ^ 0xFF, /* 1100 0000 */
756
757 };
758
runetochar(char * str,int c)759 int runetochar(char *str, int c)
760 {
761 /* one character sequence 00000-0007F => 00-7F */
762 if (c <= Rune1) {
763 str[0] = c;
764 return 1;
765 }
766
767 /* two character sequence 00080-007FF => T2 Tx */
768 if (c <= Rune2) {
769 str[0] = T2 | (c >> 1*Bitx);
770 str[1] = Tx | (c & Maskx);
771 return 2;
772 }
773
774 /* three character sequence 00800-0FFFF => T3 Tx Tx */
775 if (c > Runemax)
776 c = Runeerror;
777 if (c <= Rune3) {
778 str[0] = T3 | (c >> 2*Bitx);
779 str[1] = Tx | ((c >> 1*Bitx) & Maskx);
780 str[2] = Tx | (c & Maskx);
781 return 3;
782 }
783
784 /* four character sequence 010000-1FFFFF => T4 Tx Tx Tx */
785 str[0] = T4 | (c >> 3*Bitx);
786 str[1] = Tx | ((c >> 2*Bitx) & Maskx);
787 str[2] = Tx | ((c >> 1*Bitx) & Maskx);
788 str[3] = Tx | (c & Maskx);
789 return 4;
790 }
791
792
793 /* ========== end of utf8 code =========== */
794
795
796
matchop(Node ** a,int n)797 Cell *matchop(Node **a, int n) /* ~ and match() */
798 {
799 Cell *x, *y, *z;
800 char *s, *t;
801 int i;
802 int cstart, cpatlen, len;
803 fa *pfa;
804 int (*mf)(fa *, const char *) = match, mode = 0;
805
806 if (n == MATCHFCN) {
807 mf = pmatch;
808 mode = 1;
809 }
810 x = execute(a[1]); /* a[1] = target text */
811 s = getsval(x);
812 if (a[0] == NULL) /* a[1] == 0: already-compiled reg expr */
813 i = (*mf)((fa *) a[2], s);
814 else {
815 y = execute(a[2]); /* a[2] = regular expr */
816 t = getsval(y);
817 pfa = makedfa(t, mode);
818 i = (*mf)(pfa, s);
819 tempfree(y);
820 }
821 z = x;
822 if (n == MATCHFCN) {
823 int start = patbeg - s + 1; /* origin 1 */
824 if (patlen < 0) {
825 start = 0; /* not found */
826 } else {
827 cstart = u8_byte2char(s, start-1);
828 cpatlen = 0;
829 for (i = 0; i < patlen; i += len) {
830 len = u8_nextlen(patbeg+i);
831 cpatlen++;
832 }
833
834 start = cstart;
835 patlen = cpatlen;
836 }
837
838 setfval(rstartloc, (Awkfloat) start);
839 setfval(rlengthloc, (Awkfloat) patlen);
840 x = gettemp();
841 x->tval = NUM;
842 x->fval = start;
843 } else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0))
844 x = True;
845 else
846 x = False;
847
848 tempfree(z);
849 return x;
850 }
851
852
boolop(Node ** a,int n)853 Cell *boolop(Node **a, int n) /* a[0] || a[1], a[0] && a[1], !a[0] */
854 {
855 Cell *x, *y;
856 int i;
857
858 x = execute(a[0]);
859 i = istrue(x);
860 tempfree(x);
861 switch (n) {
862 case BOR:
863 if (i) return(True);
864 y = execute(a[1]);
865 i = istrue(y);
866 tempfree(y);
867 if (i) return(True);
868 else return(False);
869 case AND:
870 if ( !i ) return(False);
871 y = execute(a[1]);
872 i = istrue(y);
873 tempfree(y);
874 if (i) return(True);
875 else return(False);
876 case NOT:
877 if (i) return(False);
878 else return(True);
879 default: /* can't happen */
880 FATAL("unknown boolean operator %d", n);
881 }
882 return 0; /*NOTREACHED*/
883 }
884
relop(Node ** a,int n)885 Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */
886 {
887 int i;
888 Cell *x, *y;
889 Awkfloat j;
890 bool x_is_nan, y_is_nan;
891
892 x = execute(a[0]);
893 y = execute(a[1]);
894 x_is_nan = isnan(x->fval);
895 y_is_nan = isnan(y->fval);
896 if (x->tval&NUM && y->tval&NUM) {
897 if ((x_is_nan || y_is_nan) && n != NE)
898 return(False);
899 j = x->fval - y->fval;
900 i = j<0? -1: (j>0? 1: 0);
901 } else {
902 i = strcmp(getsval(x), getsval(y));
903 }
904 tempfree(x);
905 tempfree(y);
906 switch (n) {
907 case LT: if (i<0) return(True);
908 else return(False);
909 case LE: if (i<=0) return(True);
910 else return(False);
911 case NE: if (x_is_nan && y_is_nan) return(True);
912 else if (i!=0) return(True);
913 else return(False);
914 case EQ: if (i == 0) return(True);
915 else return(False);
916 case GE: if (i>=0) return(True);
917 else return(False);
918 case GT: if (i>0) return(True);
919 else return(False);
920 default: /* can't happen */
921 FATAL("unknown relational operator %d", n);
922 }
923 return 0; /*NOTREACHED*/
924 }
925
tfree(Cell * a)926 void tfree(Cell *a) /* free a tempcell */
927 {
928 if (freeable(a)) {
929 DPRINTF("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval);
930 xfree(a->sval);
931 }
932 if (a == tmps)
933 FATAL("tempcell list is curdled");
934 a->cnext = tmps;
935 tmps = a;
936 }
937
gettemp(void)938 Cell *gettemp(void) /* get a tempcell */
939 { int i;
940 Cell *x;
941
942 if (!tmps) {
943 tmps = (Cell *) calloc(100, sizeof(*tmps));
944 if (!tmps)
945 FATAL("out of space for temporaries");
946 for (i = 1; i < 100; i++)
947 tmps[i-1].cnext = &tmps[i];
948 tmps[i-1].cnext = NULL;
949 }
950 x = tmps;
951 tmps = x->cnext;
952 *x = tempcell;
953 return(x);
954 }
955
indirect(Node ** a,int n)956 Cell *indirect(Node **a, int n) /* $( a[0] ) */
957 {
958 Awkfloat val;
959 Cell *x;
960 int m;
961
962 x = execute(a[0]);
963 val = getfval(x); /* freebsd: defend against super large field numbers */
964 if ((Awkfloat)INT_MAX < val)
965 FATAL("trying to access out of range field %s", x->nval);
966 m = (int) val;
967 tempfree(x);
968 x = fieldadr(m);
969 x->ctype = OCELL; /* BUG? why are these needed? */
970 x->csub = CFLD;
971 return(x);
972 }
973
substr(Node ** a,int nnn)974 Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */
975 {
976 int k, m, n;
977 int mb, nb;
978 char *s;
979 int temp;
980 Cell *x, *y, *z = NULL;
981
982 x = execute(a[0]);
983 y = execute(a[1]);
984 if (a[2] != NULL)
985 z = execute(a[2]);
986 s = getsval(x);
987 k = u8_strlen(s) + 1;
988 if (k <= 1) {
989 tempfree(x);
990 tempfree(y);
991 if (a[2] != NULL) {
992 tempfree(z);
993 }
994 x = gettemp();
995 setsval(x, "");
996 return(x);
997 }
998 m = (int) getfval(y);
999 if (m <= 0)
1000 m = 1;
1001 else if (m > k)
1002 m = k;
1003 tempfree(y);
1004 if (a[2] != NULL) {
1005 n = (int) getfval(z);
1006 tempfree(z);
1007 } else
1008 n = k - 1;
1009 if (n < 0)
1010 n = 0;
1011 else if (n > k - m)
1012 n = k - m;
1013 /* m is start, n is length from there */
1014 DPRINTF("substr: m=%d, n=%d, s=%s\n", m, n, s);
1015 y = gettemp();
1016 mb = u8_char2byte(s, m-1); /* byte offset of start char in s */
1017 nb = u8_char2byte(s, m-1+n); /* byte offset of end+1 char in s */
1018
1019 temp = s[nb]; /* with thanks to John Linderman */
1020 s[nb] = '\0';
1021 setsval(y, s + mb);
1022 s[nb] = temp;
1023 tempfree(x);
1024 return(y);
1025 }
1026
sindex(Node ** a,int nnn)1027 Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */
1028 {
1029 Cell *x, *y, *z;
1030 char *s1, *s2, *p1, *p2, *q;
1031 Awkfloat v = 0.0;
1032
1033 x = execute(a[0]);
1034 s1 = getsval(x);
1035 y = execute(a[1]);
1036 s2 = getsval(y);
1037
1038 z = gettemp();
1039 for (p1 = s1; *p1 != '\0'; p1++) {
1040 for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++)
1041 continue;
1042 if (*p2 == '\0') {
1043 /* v = (Awkfloat) (p1 - s1 + 1); origin 1 */
1044
1045 /* should be a function: used in match() as well */
1046 int i, len;
1047 v = 0;
1048 for (i = 0; i < p1-s1+1; i += len) {
1049 len = u8_nextlen(s1+i);
1050 v++;
1051 }
1052 break;
1053 }
1054 }
1055 tempfree(x);
1056 tempfree(y);
1057 setfval(z, v);
1058 return(z);
1059 }
1060
has_utf8(char * s)1061 int has_utf8(char *s) /* return 1 if s contains any utf-8 (2 bytes or more) character */
1062 {
1063 int n;
1064
1065 for (n = 0; *s != 0; s += n) {
1066 n = u8_nextlen(s);
1067 if (n > 1)
1068 return 1;
1069 }
1070 return 0;
1071 }
1072
1073 #define MAXNUMSIZE 50
1074
format(char ** pbuf,int * pbufsize,const char * s,Node * a)1075 int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like conversions */
1076 {
1077 char *fmt;
1078 char *p, *t;
1079 const char *os;
1080 Cell *x;
1081 int flag = 0, n;
1082 int fmtwd; /* format width */
1083 int fmtsz = recsize;
1084 char *buf = *pbuf;
1085 int bufsize = *pbufsize;
1086 #define FMTSZ(a) (fmtsz - ((a) - fmt))
1087 #define BUFSZ(a) (bufsize - ((a) - buf))
1088
1089 static bool first = true;
1090 static bool have_a_format = false;
1091
1092 if (first) {
1093 char xbuf[100];
1094
1095 snprintf(xbuf, sizeof(xbuf), "%a", 42.0);
1096 have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0);
1097 first = false;
1098 }
1099
1100 os = s;
1101 p = buf;
1102 if ((fmt = (char *) malloc(fmtsz)) == NULL)
1103 FATAL("out of memory in format()");
1104 while (*s) {
1105 adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1");
1106 if (*s != '%') {
1107 *p++ = *s++;
1108 continue;
1109 }
1110 if (*(s+1) == '%') {
1111 *p++ = '%';
1112 s += 2;
1113 continue;
1114 }
1115 fmtwd = atoi(s+1);
1116 if (fmtwd < 0)
1117 fmtwd = -fmtwd;
1118 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2");
1119 for (t = fmt; (*t++ = *s) != '\0'; s++) {
1120 if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3"))
1121 FATAL("format item %.30s... ran format() out of memory", os);
1122 /* Ignore size specifiers */
1123 if (strchr("hjLlqtz", *s) != NULL) { /* the ansi panoply */
1124 t--;
1125 continue;
1126 }
1127 if (isalpha((uschar)*s))
1128 break;
1129 if (*s == '$') {
1130 FATAL("'$' not permitted in awk formats");
1131 }
1132 if (*s == '*') {
1133 if (a == NULL) {
1134 FATAL("not enough args in printf(%s)", os);
1135 }
1136 x = execute(a);
1137 a = a->nnext;
1138 snprintf(t - 1, FMTSZ(t - 1),
1139 "%d", fmtwd=(int) getfval(x));
1140 if (fmtwd < 0)
1141 fmtwd = -fmtwd;
1142 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format");
1143 t = fmt + strlen(fmt);
1144 tempfree(x);
1145 }
1146 }
1147 *t = '\0';
1148 if (fmtwd < 0)
1149 fmtwd = -fmtwd;
1150 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4");
1151 switch (*s) {
1152 case 'a': case 'A':
1153 if (have_a_format)
1154 flag = *s;
1155 else
1156 flag = 'f';
1157 break;
1158 case 'f': case 'e': case 'g': case 'E': case 'G':
1159 flag = 'f';
1160 break;
1161 case 'd': case 'i': case 'o': case 'x': case 'X': case 'u':
1162 flag = (*s == 'd' || *s == 'i') ? 'd' : 'u';
1163 *(t-1) = 'j';
1164 *t = *s;
1165 *++t = '\0';
1166 break;
1167 case 's':
1168 flag = 's';
1169 break;
1170 case 'c':
1171 flag = 'c';
1172 break;
1173 default:
1174 WARNING("weird printf conversion %s", fmt);
1175 flag = '?';
1176 break;
1177 }
1178 if (a == NULL)
1179 FATAL("not enough args in printf(%s)", os);
1180 x = execute(a);
1181 a = a->nnext;
1182 n = MAXNUMSIZE;
1183 if (fmtwd > n)
1184 n = fmtwd;
1185 adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5");
1186 switch (flag) {
1187 case '?':
1188 snprintf(p, BUFSZ(p), "%s", fmt); /* unknown, so dump it too */
1189 t = getsval(x);
1190 n = strlen(t);
1191 if (fmtwd > n)
1192 n = fmtwd;
1193 adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6");
1194 p += strlen(p);
1195 snprintf(p, BUFSZ(p), "%s", t);
1196 break;
1197 case 'a':
1198 case 'A':
1199 case 'f': snprintf(p, BUFSZ(p), fmt, getfval(x)); break;
1200 case 'd': snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break;
1201 case 'u': snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break;
1202
1203 case 's': {
1204 t = getsval(x);
1205 n = strlen(t);
1206 /* if simple format or no utf-8 in the string, sprintf works */
1207 if (!has_utf8(t) || strcmp(fmt,"%s") == 0) {
1208 if (fmtwd > n)
1209 n = fmtwd;
1210 if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7"))
1211 FATAL("huge string/format (%d chars) in printf %.30s..." \
1212 " ran format() out of memory", n, t);
1213 snprintf(p, BUFSZ(p), fmt, t);
1214 break;
1215 }
1216
1217 /* get here if string has utf-8 chars and fmt is not plain %s */
1218 /* "%-w.ps", where -, w and .p are all optional */
1219 /* '0' before the w is a flag character */
1220 /* fmt points at % */
1221 int ljust = 0, wid = 0, prec = n, pad = 0;
1222 char *f = fmt+1;
1223 if (f[0] == '-') {
1224 ljust = 1;
1225 f++;
1226 }
1227 // flags '0' and '+' are recognized but skipped
1228 if (f[0] == '0') {
1229 f++;
1230 if (f[0] == '+')
1231 f++;
1232 }
1233 if (f[0] == '+') {
1234 f++;
1235 if (f[0] == '0')
1236 f++;
1237 }
1238 if (isdigit(f[0])) { /* there is a wid */
1239 wid = strtol(f, &f, 10);
1240 }
1241 if (f[0] == '.') { /* there is a .prec */
1242 prec = strtol(++f, &f, 10);
1243 }
1244 if (prec > u8_strlen(t))
1245 prec = u8_strlen(t);
1246 pad = wid>prec ? wid - prec : 0; // has to be >= 0
1247 int i, k, n;
1248
1249 if (ljust) { // print prec chars from t, then pad blanks
1250 n = u8_char2byte(t, prec);
1251 for (k = 0; k < n; k++) {
1252 //putchar(t[k]);
1253 *p++ = t[k];
1254 }
1255 for (i = 0; i < pad; i++) {
1256 //printf(" ");
1257 *p++ = ' ';
1258 }
1259 } else { // print pad blanks, then prec chars from t
1260 for (i = 0; i < pad; i++) {
1261 //printf(" ");
1262 *p++ = ' ';
1263 }
1264 n = u8_char2byte(t, prec);
1265 for (k = 0; k < n; k++) {
1266 //putchar(t[k]);
1267 *p++ = t[k];
1268 }
1269 }
1270 *p = 0;
1271 break;
1272 }
1273
1274 case 'c': {
1275 /*
1276 * If a numeric value is given, awk should just turn
1277 * it into a character and print it:
1278 * BEGIN { printf("%c\n", 65) }
1279 * prints "A".
1280 *
1281 * But what if the numeric value is > 128 and
1282 * represents a valid Unicode code point?!? We do
1283 * our best to convert it back into UTF-8. If we
1284 * can't, we output the encoding of the Unicode
1285 * "invalid character", 0xFFFD.
1286 */
1287 if (isnum(x)) {
1288 int charval = (int) getfval(x);
1289
1290 if (charval != 0) {
1291 if (charval < 128 || awk_mb_cur_max == 1)
1292 snprintf(p, BUFSZ(p), fmt, charval);
1293 else {
1294 // possible unicode character
1295 size_t count;
1296 char *bs = wide_char_to_byte_str(charval, &count);
1297
1298 if (bs == NULL) { // invalid character
1299 // use unicode invalid character, 0xFFFD
1300 static char invalid_char[] = "\357\277\275";
1301 bs = invalid_char;
1302 count = 3;
1303 }
1304 t = bs;
1305 n = count;
1306 goto format_percent_c;
1307 }
1308 } else {
1309 *p++ = '\0'; /* explicit null byte */
1310 *p = '\0'; /* next output will start here */
1311 }
1312 break;
1313 }
1314 t = getsval(x);
1315 n = u8_nextlen(t);
1316 format_percent_c:
1317 if (n < 2) { /* not utf8 */
1318 snprintf(p, BUFSZ(p), fmt, getsval(x)[0]);
1319 break;
1320 }
1321
1322 // utf8 character, almost same song and dance as for %s
1323 int ljust = 0, wid = 0, prec = n, pad = 0;
1324 char *f = fmt+1;
1325 if (f[0] == '-') {
1326 ljust = 1;
1327 f++;
1328 }
1329 // flags '0' and '+' are recognized but skipped
1330 if (f[0] == '0') {
1331 f++;
1332 if (f[0] == '+')
1333 f++;
1334 }
1335 if (f[0] == '+') {
1336 f++;
1337 if (f[0] == '0')
1338 f++;
1339 }
1340 if (isdigit(f[0])) { /* there is a wid */
1341 wid = strtol(f, &f, 10);
1342 }
1343 if (f[0] == '.') { /* there is a .prec */
1344 prec = strtol(++f, &f, 10);
1345 }
1346 if (prec > 1) // %c --> only one character
1347 prec = 1;
1348 pad = wid>prec ? wid - prec : 0; // has to be >= 0
1349 int i;
1350
1351 if (ljust) { // print one char from t, then pad blanks
1352 for (i = 0; i < n; i++)
1353 *p++ = t[i];
1354 for (i = 0; i < pad; i++) {
1355 //printf(" ");
1356 *p++ = ' ';
1357 }
1358 } else { // print pad blanks, then prec chars from t
1359 for (i = 0; i < pad; i++) {
1360 //printf(" ");
1361 *p++ = ' ';
1362 }
1363 for (i = 0; i < n; i++)
1364 *p++ = t[i];
1365 }
1366 *p = 0;
1367 break;
1368 }
1369 default:
1370 FATAL("can't happen: bad conversion %c in format()", flag);
1371 }
1372
1373 tempfree(x);
1374 p += strlen(p);
1375 s++;
1376 }
1377 *p = '\0';
1378 free(fmt);
1379 for ( ; a; a = a->nnext) { /* evaluate any remaining args */
1380 x = execute(a);
1381 tempfree(x);
1382 }
1383 *pbuf = buf;
1384 *pbufsize = bufsize;
1385 return p - buf;
1386 }
1387
awksprintf(Node ** a,int n)1388 Cell *awksprintf(Node **a, int n) /* sprintf(a[0]) */
1389 {
1390 Cell *x;
1391 Node *y;
1392 char *buf;
1393 int bufsz=3*recsize;
1394
1395 if ((buf = (char *) malloc(bufsz)) == NULL)
1396 FATAL("out of memory in awksprintf");
1397 y = a[0]->nnext;
1398 x = execute(a[0]);
1399 if (format(&buf, &bufsz, getsval(x), y) == -1)
1400 FATAL("sprintf string %.30s... too long. can't happen.", buf);
1401 tempfree(x);
1402 x = gettemp();
1403 x->sval = buf;
1404 x->tval = STR;
1405 return(x);
1406 }
1407
awkprintf(Node ** a,int n)1408 Cell *awkprintf(Node **a, int n) /* printf */
1409 { /* a[0] is list of args, starting with format string */
1410 /* a[1] is redirection operator, a[2] is redirection file */
1411 FILE *fp;
1412 Cell *x;
1413 Node *y;
1414 char *buf;
1415 int len;
1416 int bufsz=3*recsize;
1417
1418 if ((buf = (char *) malloc(bufsz)) == NULL)
1419 FATAL("out of memory in awkprintf");
1420 y = a[0]->nnext;
1421 x = execute(a[0]);
1422 if ((len = format(&buf, &bufsz, getsval(x), y)) == -1)
1423 FATAL("printf string %.30s... too long. can't happen.", buf);
1424 tempfree(x);
1425 if (a[1] == NULL) {
1426 /* fputs(buf, stdout); */
1427 fwrite(buf, len, 1, stdout);
1428 if (ferror(stdout))
1429 FATAL("write error on stdout");
1430 } else {
1431 fp = redirect(ptoi(a[1]), a[2]);
1432 /* fputs(buf, fp); */
1433 fwrite(buf, len, 1, fp);
1434 fflush(fp);
1435 if (ferror(fp))
1436 FATAL("write error on %s", filename(fp));
1437 }
1438 free(buf);
1439 return(True);
1440 }
1441
arith(Node ** a,int n)1442 Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */
1443 {
1444 Awkfloat i, j = 0;
1445 double v;
1446 Cell *x, *y, *z;
1447
1448 x = execute(a[0]);
1449 i = getfval(x);
1450 tempfree(x);
1451 if (n != UMINUS && n != UPLUS) {
1452 y = execute(a[1]);
1453 j = getfval(y);
1454 tempfree(y);
1455 }
1456 z = gettemp();
1457 switch (n) {
1458 case ADD:
1459 i += j;
1460 break;
1461 case MINUS:
1462 i -= j;
1463 break;
1464 case MULT:
1465 i *= j;
1466 break;
1467 case DIVIDE:
1468 if (j == 0)
1469 FATAL("division by zero");
1470 i /= j;
1471 break;
1472 case MOD:
1473 if (j == 0)
1474 FATAL("division by zero in mod");
1475 modf(i/j, &v);
1476 i = i - j * v;
1477 break;
1478 case UMINUS:
1479 i = -i;
1480 break;
1481 case UPLUS: /* handled by getfval(), above */
1482 break;
1483 case POWER:
1484 if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */
1485 i = ipow(i, (int) j);
1486 else {
1487 errno = 0;
1488 i = errcheck(pow(i, j), "pow");
1489 }
1490 break;
1491 default: /* can't happen */
1492 FATAL("illegal arithmetic operator %d", n);
1493 }
1494 setfval(z, i);
1495 return(z);
1496 }
1497
ipow(double x,int n)1498 double ipow(double x, int n) /* x**n. ought to be done by pow, but isn't always */
1499 {
1500 double v;
1501
1502 if (n <= 0)
1503 return 1;
1504 v = ipow(x, n/2);
1505 if (n % 2 == 0)
1506 return v * v;
1507 else
1508 return x * v * v;
1509 }
1510
incrdecr(Node ** a,int n)1511 Cell *incrdecr(Node **a, int n) /* a[0]++, etc. */
1512 {
1513 Cell *x, *z;
1514 int k;
1515 Awkfloat xf;
1516
1517 x = execute(a[0]);
1518 xf = getfval(x);
1519 k = (n == PREINCR || n == POSTINCR) ? 1 : -1;
1520 if (n == PREINCR || n == PREDECR) {
1521 setfval(x, xf + k);
1522 return(x);
1523 }
1524 z = gettemp();
1525 setfval(z, xf);
1526 setfval(x, xf + k);
1527 tempfree(x);
1528 return(z);
1529 }
1530
assign(Node ** a,int n)1531 Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */
1532 { /* this is subtle; don't muck with it. */
1533 Cell *x, *y;
1534 Awkfloat xf, yf;
1535 double v;
1536
1537 y = execute(a[1]);
1538 x = execute(a[0]);
1539 if (n == ASSIGN) { /* ordinary assignment */
1540 if (x == y && !(x->tval & (FLD|REC)) && x != nfloc)
1541 ; /* self-assignment: leave alone unless it's a field or NF */
1542 else if ((y->tval & (STR|NUM)) == (STR|NUM)) {
1543 yf = getfval(y);
1544 setsval(x, getsval(y));
1545 x->fval = yf;
1546 x->tval |= NUM;
1547 }
1548 else if (isstr(y))
1549 setsval(x, getsval(y));
1550 else if (isnum(y))
1551 setfval(x, getfval(y));
1552 else
1553 funnyvar(y, "read value of");
1554 tempfree(y);
1555 return(x);
1556 }
1557 xf = getfval(x);
1558 yf = getfval(y);
1559 switch (n) {
1560 case ADDEQ:
1561 xf += yf;
1562 break;
1563 case SUBEQ:
1564 xf -= yf;
1565 break;
1566 case MULTEQ:
1567 xf *= yf;
1568 break;
1569 case DIVEQ:
1570 if ((x->tval & CON) != 0)
1571 FATAL("non-constant required for left side of /=");
1572 if (yf == 0)
1573 FATAL("division by zero in /=");
1574 xf /= yf;
1575 break;
1576 case MODEQ:
1577 if (yf == 0)
1578 FATAL("division by zero in %%=");
1579 modf(xf/yf, &v);
1580 xf = xf - yf * v;
1581 break;
1582 case POWEQ:
1583 if (yf >= 0 && modf(yf, &v) == 0.0) /* pos integer exponent */
1584 xf = ipow(xf, (int) yf);
1585 else {
1586 errno = 0;
1587 xf = errcheck(pow(xf, yf), "pow");
1588 }
1589 break;
1590 default:
1591 FATAL("illegal assignment operator %d", n);
1592 break;
1593 }
1594 tempfree(y);
1595 setfval(x, xf);
1596 return(x);
1597 }
1598
cat(Node ** a,int q)1599 Cell *cat(Node **a, int q) /* a[0] cat a[1] */
1600 {
1601 Cell *x, *y, *z;
1602 int n1, n2;
1603 char *s = NULL;
1604 int ssz = 0;
1605
1606 x = execute(a[0]);
1607 n1 = strlen(getsval(x));
1608 adjbuf(&s, &ssz, n1 + 1, recsize, 0, "cat1");
1609 memcpy(s, x->sval, n1);
1610
1611 tempfree(x);
1612
1613 y = execute(a[1]);
1614 n2 = strlen(getsval(y));
1615 adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2");
1616 memcpy(s + n1, y->sval, n2);
1617 s[n1 + n2] = '\0';
1618
1619 tempfree(y);
1620
1621 z = gettemp();
1622 z->sval = s;
1623 z->tval = STR;
1624
1625 return(z);
1626 }
1627
pastat(Node ** a,int n)1628 Cell *pastat(Node **a, int n) /* a[0] { a[1] } */
1629 {
1630 Cell *x;
1631
1632 if (a[0] == NULL)
1633 x = execute(a[1]);
1634 else {
1635 x = execute(a[0]);
1636 if (istrue(x)) {
1637 tempfree(x);
1638 x = execute(a[1]);
1639 }
1640 }
1641 return x;
1642 }
1643
dopa2(Node ** a,int n)1644 Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */
1645 {
1646 Cell *x;
1647 int pair;
1648
1649 pair = ptoi(a[3]);
1650 if (pairstack[pair] == 0) {
1651 x = execute(a[0]);
1652 if (istrue(x))
1653 pairstack[pair] = 1;
1654 tempfree(x);
1655 }
1656 if (pairstack[pair] == 1) {
1657 x = execute(a[1]);
1658 if (istrue(x))
1659 pairstack[pair] = 0;
1660 tempfree(x);
1661 x = execute(a[2]);
1662 return(x);
1663 }
1664 return(False);
1665 }
1666
split(Node ** a,int nnn)1667 Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
1668 {
1669 Cell *x = NULL, *y, *ap;
1670 const char *s, *origs, *t;
1671 const char *fs = NULL;
1672 char *origfs = NULL;
1673 int sep;
1674 char temp, num[50];
1675 int n, tempstat, arg3type;
1676 int j;
1677 double result;
1678
1679 y = execute(a[0]); /* source string */
1680 origs = s = strdup(getsval(y));
1681 tempfree(y);
1682 arg3type = ptoi(a[3]);
1683 if (a[2] == NULL) { /* BUG: CSV should override implicit fs but not explicit */
1684 fs = getsval(fsloc);
1685 } else if (arg3type == STRING) { /* split(str,arr,"string") */
1686 x = execute(a[2]);
1687 fs = origfs = strdup(getsval(x));
1688 tempfree(x);
1689 } else if (arg3type == REGEXPR) {
1690 fs = "(regexpr)"; /* split(str,arr,/regexpr/) */
1691 } else {
1692 FATAL("illegal type of split");
1693 }
1694 sep = *fs;
1695 ap = execute(a[1]); /* array name */
1696 /* BUG 7/26/22: this appears not to reset array: see C1/asplit */
1697 freesymtab(ap);
1698 DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs);
1699 ap->tval &= ~STR;
1700 ap->tval |= ARR;
1701 ap->sval = (char *) makesymtab(NSYMTAB);
1702
1703 n = 0;
1704 if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) {
1705 /* split(s, a, //); have to arrange that it looks like empty sep */
1706 arg3type = 0;
1707 fs = "";
1708 sep = 0;
1709 }
1710 if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) { /* reg expr */
1711 fa *pfa;
1712 if (arg3type == REGEXPR) { /* it's ready already */
1713 pfa = (fa *) a[2];
1714 } else {
1715 pfa = makedfa(fs, 1);
1716 }
1717 if (nematch(pfa,s)) {
1718 tempstat = pfa->initstat;
1719 pfa->initstat = 2;
1720 do {
1721 n++;
1722 snprintf(num, sizeof(num), "%d", n);
1723 temp = *patbeg;
1724 setptr(patbeg, '\0');
1725 if (is_number(s, & result))
1726 setsymtab(num, s, result, STR|NUM, (Array *) ap->sval);
1727 else
1728 setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1729 setptr(patbeg, temp);
1730 s = patbeg + patlen;
1731 if (*(patbeg+patlen-1) == '\0' || *s == '\0') {
1732 n++;
1733 snprintf(num, sizeof(num), "%d", n);
1734 setsymtab(num, "", 0.0, STR, (Array *) ap->sval);
1735 pfa->initstat = tempstat;
1736 goto spdone;
1737 }
1738 } while (nematch(pfa,s));
1739 pfa->initstat = tempstat; /* bwk: has to be here to reset */
1740 /* cf gsub and refldbld */
1741 }
1742 n++;
1743 snprintf(num, sizeof(num), "%d", n);
1744 if (is_number(s, & result))
1745 setsymtab(num, s, result, STR|NUM, (Array *) ap->sval);
1746 else
1747 setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1748 spdone:
1749 pfa = NULL;
1750
1751 } else if (a[2] == NULL && CSV) { /* CSV only if no explicit separator */
1752 char *newt = (char *) malloc(strlen(s)); /* for building new string; reuse for each field */
1753 for (;;) {
1754 char *fr = newt;
1755 n++;
1756 if (*s == '"' ) { /* start of "..." */
1757 for (s++ ; *s != '\0'; ) {
1758 if (*s == '"' && s[1] != '\0' && s[1] == '"') {
1759 s += 2; /* doubled quote */
1760 *fr++ = '"';
1761 } else if (*s == '"' && (s[1] == '\0' || s[1] == ',')) {
1762 s++; /* skip over closing quote */
1763 break;
1764 } else {
1765 *fr++ = *s++;
1766 }
1767 }
1768 *fr++ = 0;
1769 } else { /* unquoted field */
1770 while (*s != ',' && *s != '\0')
1771 *fr++ = *s++;
1772 *fr++ = 0;
1773 }
1774 snprintf(num, sizeof(num), "%d", n);
1775 if (is_number(newt, &result))
1776 setsymtab(num, newt, result, STR|NUM, (Array *) ap->sval);
1777 else
1778 setsymtab(num, newt, 0.0, STR, (Array *) ap->sval);
1779 if (*s++ == '\0')
1780 break;
1781 }
1782 free(newt);
1783
1784 } else if (!CSV && sep == ' ') { /* usual case: split on white space */
1785 for (n = 0; ; ) {
1786 #define ISWS(c) ((c) == ' ' || (c) == '\t' || (c) == '\n')
1787 while (ISWS(*s))
1788 s++;
1789 if (*s == '\0')
1790 break;
1791 n++;
1792 t = s;
1793 do
1794 s++;
1795 while (*s != '\0' && !ISWS(*s));
1796 temp = *s;
1797 setptr(s, '\0');
1798 snprintf(num, sizeof(num), "%d", n);
1799 if (is_number(t, & result))
1800 setsymtab(num, t, result, STR|NUM, (Array *) ap->sval);
1801 else
1802 setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1803 setptr(s, temp);
1804 if (*s != '\0')
1805 s++;
1806 }
1807
1808 } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */
1809 for (n = 0; *s != '\0'; s += u8_nextlen(s)) {
1810 char buf[10];
1811 n++;
1812 snprintf(num, sizeof(num), "%d", n);
1813
1814 for (j = 0; j < u8_nextlen(s); j++) {
1815 buf[j] = s[j];
1816 }
1817 buf[j] = '\0';
1818
1819 if (isdigit((uschar)buf[0]))
1820 setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval);
1821 else
1822 setsymtab(num, buf, 0.0, STR, (Array *) ap->sval);
1823 }
1824
1825 } else if (*s != '\0') { /* some random single character */
1826 for (;;) {
1827 n++;
1828 t = s;
1829 while (*s != sep && *s != '\0')
1830 s++;
1831 temp = *s;
1832 setptr(s, '\0');
1833 snprintf(num, sizeof(num), "%d", n);
1834 if (is_number(t, & result))
1835 setsymtab(num, t, result, STR|NUM, (Array *) ap->sval);
1836 else
1837 setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1838 setptr(s, temp);
1839 if (*s++ == '\0')
1840 break;
1841 }
1842 }
1843 tempfree(ap);
1844 xfree(origs);
1845 xfree(origfs);
1846 x = gettemp();
1847 x->tval = NUM;
1848 x->fval = n;
1849 return(x);
1850 }
1851
condexpr(Node ** a,int n)1852 Cell *condexpr(Node **a, int n) /* a[0] ? a[1] : a[2] */
1853 {
1854 Cell *x;
1855
1856 x = execute(a[0]);
1857 if (istrue(x)) {
1858 tempfree(x);
1859 x = execute(a[1]);
1860 } else {
1861 tempfree(x);
1862 x = execute(a[2]);
1863 }
1864 return(x);
1865 }
1866
ifstat(Node ** a,int n)1867 Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */
1868 {
1869 Cell *x;
1870
1871 x = execute(a[0]);
1872 if (istrue(x)) {
1873 tempfree(x);
1874 x = execute(a[1]);
1875 } else if (a[2] != NULL) {
1876 tempfree(x);
1877 x = execute(a[2]);
1878 }
1879 return(x);
1880 }
1881
whilestat(Node ** a,int n)1882 Cell *whilestat(Node **a, int n) /* while (a[0]) a[1] */
1883 {
1884 Cell *x;
1885
1886 for (;;) {
1887 x = execute(a[0]);
1888 if (!istrue(x))
1889 return(x);
1890 tempfree(x);
1891 x = execute(a[1]);
1892 if (isbreak(x)) {
1893 x = True;
1894 return(x);
1895 }
1896 if (isnext(x) || isexit(x) || isret(x))
1897 return(x);
1898 tempfree(x);
1899 }
1900 }
1901
dostat(Node ** a,int n)1902 Cell *dostat(Node **a, int n) /* do a[0]; while(a[1]) */
1903 {
1904 Cell *x;
1905
1906 for (;;) {
1907 x = execute(a[0]);
1908 if (isbreak(x))
1909 return True;
1910 if (isnext(x) || isexit(x) || isret(x))
1911 return(x);
1912 tempfree(x);
1913 x = execute(a[1]);
1914 if (!istrue(x))
1915 return(x);
1916 tempfree(x);
1917 }
1918 }
1919
forstat(Node ** a,int n)1920 Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */
1921 {
1922 Cell *x;
1923
1924 x = execute(a[0]);
1925 tempfree(x);
1926 for (;;) {
1927 if (a[1]!=NULL) {
1928 x = execute(a[1]);
1929 if (!istrue(x)) return(x);
1930 else tempfree(x);
1931 }
1932 x = execute(a[3]);
1933 if (isbreak(x)) /* turn off break */
1934 return True;
1935 if (isnext(x) || isexit(x) || isret(x))
1936 return(x);
1937 tempfree(x);
1938 x = execute(a[2]);
1939 tempfree(x);
1940 }
1941 }
1942
instat(Node ** a,int n)1943 Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */
1944 {
1945 Cell *x, *vp, *arrayp, *cp, *ncp;
1946 Array *tp;
1947 int i;
1948
1949 vp = execute(a[0]);
1950 arrayp = execute(a[1]);
1951 if (!isarr(arrayp)) {
1952 return True;
1953 }
1954 tp = (Array *) arrayp->sval;
1955 tempfree(arrayp);
1956 for (i = 0; i < tp->size; i++) { /* this routine knows too much */
1957 for (cp = tp->tab[i]; cp != NULL; cp = ncp) {
1958 setsval(vp, cp->nval);
1959 ncp = cp->cnext;
1960 x = execute(a[2]);
1961 if (isbreak(x)) {
1962 tempfree(vp);
1963 return True;
1964 }
1965 if (isnext(x) || isexit(x) || isret(x)) {
1966 tempfree(vp);
1967 return(x);
1968 }
1969 tempfree(x);
1970 }
1971 }
1972 return True;
1973 }
1974
nawk_convert(const char * s,int (* fun_c)(int),wint_t (* fun_wc)(wint_t))1975 static char *nawk_convert(const char *s, int (*fun_c)(int),
1976 wint_t (*fun_wc)(wint_t))
1977 {
1978 char *buf = NULL;
1979 char *pbuf = NULL;
1980 const char *ps = NULL;
1981 size_t n = 0;
1982 wchar_t wc;
1983 const size_t sz = awk_mb_cur_max;
1984 int unused;
1985
1986 if (sz == 1) {
1987 buf = tostring(s);
1988
1989 for (pbuf = buf; *pbuf; pbuf++)
1990 *pbuf = fun_c((uschar)*pbuf);
1991
1992 return buf;
1993 } else {
1994 /* upper/lower character may be shorter/longer */
1995 buf = tostringN(s, strlen(s) * sz + 1);
1996
1997 (void) mbtowc(NULL, NULL, 0); /* reset internal state */
1998 /*
1999 * Reset internal state here too.
2000 * Assign result to avoid a compiler warning. (Casting to void
2001 * doesn't work.)
2002 * Increment said variable to avoid a different warning.
2003 */
2004 unused = wctomb(NULL, L'\0');
2005 unused++;
2006
2007 ps = s;
2008 pbuf = buf;
2009 while (n = mbtowc(&wc, ps, sz),
2010 n > 0 && n != (size_t)-1 && n != (size_t)-2)
2011 {
2012 ps += n;
2013
2014 n = wctomb(pbuf, fun_wc(wc));
2015 if (n == (size_t)-1)
2016 FATAL("illegal wide character %s", s);
2017
2018 pbuf += n;
2019 }
2020
2021 *pbuf = '\0';
2022
2023 if (n)
2024 FATAL("illegal byte sequence %s", s);
2025
2026 return buf;
2027 }
2028 }
2029
2030 #ifdef __DJGPP__
towupper(wint_t wc)2031 static wint_t towupper(wint_t wc)
2032 {
2033 if (wc >= 0 && wc < 256)
2034 return toupper(wc & 0xFF);
2035
2036 return wc;
2037 }
2038
towlower(wint_t wc)2039 static wint_t towlower(wint_t wc)
2040 {
2041 if (wc >= 0 && wc < 256)
2042 return tolower(wc & 0xFF);
2043
2044 return wc;
2045 }
2046 #endif
2047
nawk_toupper(const char * s)2048 static char *nawk_toupper(const char *s)
2049 {
2050 return nawk_convert(s, toupper, towupper);
2051 }
2052
nawk_tolower(const char * s)2053 static char *nawk_tolower(const char *s)
2054 {
2055 return nawk_convert(s, tolower, towlower);
2056 }
2057
2058
2059
bltin(Node ** a,int n)2060 Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */
2061 {
2062 Cell *x, *y;
2063 Awkfloat u = 0;
2064 int t, sz;
2065 Awkfloat tmp;
2066 char *buf, *fmt;
2067 Node *nextarg;
2068 FILE *fp;
2069 int status = 0;
2070 time_t tv;
2071 struct tm *tm, tmbuf;
2072 int estatus = 0;
2073
2074 t = ptoi(a[0]);
2075 x = execute(a[1]);
2076 nextarg = a[1]->nnext;
2077 switch (t) {
2078 case FLENGTH:
2079 if (isarr(x))
2080 u = ((Array *) x->sval)->nelem; /* GROT. should be function*/
2081 else
2082 u = u8_strlen(getsval(x));
2083 break;
2084 case FLOG:
2085 errno = 0;
2086 u = errcheck(log(getfval(x)), "log");
2087 break;
2088 case FINT:
2089 modf(getfval(x), &u); break;
2090 case FEXP:
2091 errno = 0;
2092 u = errcheck(exp(getfval(x)), "exp");
2093 break;
2094 case FSQRT:
2095 errno = 0;
2096 u = errcheck(sqrt(getfval(x)), "sqrt");
2097 break;
2098 case FSIN:
2099 u = sin(getfval(x)); break;
2100 case FCOS:
2101 u = cos(getfval(x)); break;
2102 case FATAN:
2103 if (nextarg == NULL) {
2104 WARNING("atan2 requires two arguments; returning 1.0");
2105 u = 1.0;
2106 } else {
2107 y = execute(a[1]->nnext);
2108 u = atan2(getfval(x), getfval(y));
2109 tempfree(y);
2110 nextarg = nextarg->nnext;
2111 }
2112 break;
2113 case FCOMPL:
2114 u = ~((int)getfval(x));
2115 break;
2116 case FAND:
2117 if (nextarg == 0) {
2118 WARNING("and requires two arguments; returning 0");
2119 u = 0;
2120 break;
2121 }
2122 y = execute(a[1]->nnext);
2123 u = ((int)getfval(x)) & ((int)getfval(y));
2124 tempfree(y);
2125 nextarg = nextarg->nnext;
2126 break;
2127 case FFOR:
2128 if (nextarg == 0) {
2129 WARNING("or requires two arguments; returning 0");
2130 u = 0;
2131 break;
2132 }
2133 y = execute(a[1]->nnext);
2134 u = ((int)getfval(x)) | ((int)getfval(y));
2135 tempfree(y);
2136 nextarg = nextarg->nnext;
2137 break;
2138 case FXOR:
2139 if (nextarg == 0) {
2140 WARNING("xor requires two arguments; returning 0");
2141 u = 0;
2142 break;
2143 }
2144 y = execute(a[1]->nnext);
2145 u = ((int)getfval(x)) ^ ((int)getfval(y));
2146 tempfree(y);
2147 nextarg = nextarg->nnext;
2148 break;
2149 case FLSHIFT:
2150 if (nextarg == 0) {
2151 WARNING("lshift requires two arguments; returning 0");
2152 u = 0;
2153 break;
2154 }
2155 y = execute(a[1]->nnext);
2156 u = ((int)getfval(x)) << ((int)getfval(y));
2157 tempfree(y);
2158 nextarg = nextarg->nnext;
2159 break;
2160 case FRSHIFT:
2161 if (nextarg == 0) {
2162 WARNING("rshift requires two arguments; returning 0");
2163 u = 0;
2164 break;
2165 }
2166 y = execute(a[1]->nnext);
2167 u = ((int)getfval(x)) >> ((int)getfval(y));
2168 tempfree(y);
2169 nextarg = nextarg->nnext;
2170 break;
2171 case FSYSTEM:
2172 fflush(stdout); /* in case something is buffered already */
2173 estatus = status = system(getsval(x));
2174 if (status != -1) {
2175 if (WIFEXITED(status)) {
2176 estatus = WEXITSTATUS(status);
2177 } else if (WIFSIGNALED(status)) {
2178 estatus = WTERMSIG(status) + 256;
2179 #ifdef WCOREDUMP
2180 if (WCOREDUMP(status))
2181 estatus += 256;
2182 #endif
2183 } else /* something else?!? */
2184 estatus = 0;
2185 }
2186 /* else estatus was set to -1 */
2187 u = estatus;
2188 break;
2189 case FRAND:
2190 /* random() returns numbers in [0..2^31-1]
2191 * in order to get a number in [0, 1), divide it by 2^31
2192 */
2193 u = (Awkfloat) random() / RAND_MAX;
2194 break;
2195 case FSRAND:
2196 if (isrec(x)) /* no argument provided */
2197 u = time((time_t *)0);
2198 else
2199 u = getfval(x);
2200 tmp = u;
2201 srandom((unsigned long) u);
2202 u = srand_seed;
2203 srand_seed = tmp;
2204 break;
2205 case FTOUPPER:
2206 case FTOLOWER:
2207 if (t == FTOUPPER)
2208 buf = nawk_toupper(getsval(x));
2209 else
2210 buf = nawk_tolower(getsval(x));
2211 tempfree(x);
2212 x = gettemp();
2213 setsval(x, buf);
2214 free(buf);
2215 return x;
2216 case FFLUSH:
2217 if (isrec(x) || strlen(getsval(x)) == 0) {
2218 flush_all(); /* fflush() or fflush("") -> all */
2219 u = 0;
2220 } else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL)
2221 u = EOF;
2222 else
2223 u = fflush(fp);
2224 break;
2225 case FMKTIME:
2226 memset(&tmbuf, 0, sizeof(tmbuf));
2227 tm = &tmbuf;
2228 t = sscanf(getsval(x), "%d %d %d %d %d %d %d",
2229 &tm->tm_year, &tm->tm_mon, &tm->tm_mday, &tm->tm_hour,
2230 &tm->tm_min, &tm->tm_sec, &tm->tm_isdst);
2231 switch (t) {
2232 case 6:
2233 tm->tm_isdst = -1; /* let mktime figure it out */
2234 /* FALLTHROUGH */
2235 case 7:
2236 tm->tm_year -= 1900;
2237 tm->tm_mon--;
2238 u = mktime(tm);
2239 break;
2240 default:
2241 u = -1;
2242 break;
2243 }
2244 break;
2245 case FSYSTIME:
2246 u = time((time_t *) 0);
2247 break;
2248 case FSTRFTIME:
2249 /* strftime([format [,timestamp]]) */
2250 if (nextarg) {
2251 y = execute(nextarg);
2252 nextarg = nextarg->nnext;
2253 tv = (time_t) getfval(y);
2254 tempfree(y);
2255 } else
2256 tv = time((time_t *) 0);
2257 tm = localtime(&tv);
2258 if (tm == NULL)
2259 FATAL("bad time %ld", (long)tv);
2260
2261 if (isrec(x)) {
2262 /* format argument not provided, use default */
2263 fmt = tostring("%a %b %d %H:%M:%S %Z %Y");
2264 } else
2265 fmt = tostring(getsval(x));
2266
2267 sz = 32;
2268 buf = NULL;
2269 do {
2270 if ((buf = realloc(buf, (sz *= 2))) == NULL)
2271 FATAL("out of memory in strftime");
2272 } while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0');
2273
2274 y = gettemp();
2275 setsval(y, buf);
2276 free(fmt);
2277 free(buf);
2278
2279 return y;
2280 default: /* can't happen */
2281 FATAL("illegal function type %d", t);
2282 break;
2283 }
2284 tempfree(x);
2285 x = gettemp();
2286 setfval(x, u);
2287 if (nextarg != NULL) {
2288 WARNING("warning: function has too many arguments");
2289 for ( ; nextarg; nextarg = nextarg->nnext) {
2290 y = execute(nextarg);
2291 tempfree(y);
2292 }
2293 }
2294 return(x);
2295 }
2296
printstat(Node ** a,int n)2297 Cell *printstat(Node **a, int n) /* print a[0] */
2298 {
2299 Node *x;
2300 Cell *y;
2301 FILE *fp;
2302
2303 if (a[1] == NULL) /* a[1] is redirection operator, a[2] is file */
2304 fp = stdout;
2305 else
2306 fp = redirect(ptoi(a[1]), a[2]);
2307 for (x = a[0]; x != NULL; x = x->nnext) {
2308 y = execute(x);
2309 fputs(getpssval(y), fp);
2310 tempfree(y);
2311 if (x->nnext == NULL)
2312 fputs(getsval(orsloc), fp);
2313 else
2314 fputs(getsval(ofsloc), fp);
2315 }
2316 if (a[1] != NULL)
2317 fflush(fp);
2318 if (ferror(fp))
2319 FATAL("write error on %s", filename(fp));
2320 return(True);
2321 }
2322
nullproc(Node ** a,int n)2323 Cell *nullproc(Node **a, int n)
2324 {
2325 return 0;
2326 }
2327
2328
redirect(int a,Node * b)2329 FILE *redirect(int a, Node *b) /* set up all i/o redirections */
2330 {
2331 FILE *fp;
2332 Cell *x;
2333 char *fname;
2334
2335 x = execute(b);
2336 fname = getsval(x);
2337 fp = openfile(a, fname, NULL);
2338 if (fp == NULL)
2339 FATAL("can't open file %s", fname);
2340 tempfree(x);
2341 return fp;
2342 }
2343
2344 struct files {
2345 FILE *fp;
2346 const char *fname;
2347 int mode; /* '|', 'a', 'w' => LE/LT, GT */
2348 } *files;
2349
2350 size_t nfiles;
2351
stdinit(void)2352 static void stdinit(void) /* in case stdin, etc., are not constants */
2353 {
2354 nfiles = FOPEN_MAX;
2355 files = (struct files *) calloc(nfiles, sizeof(*files));
2356 if (files == NULL)
2357 FATAL("can't allocate file memory for %zu files", nfiles);
2358 files[0].fp = stdin;
2359 files[0].fname = tostring("/dev/stdin");
2360 files[0].mode = LT;
2361 files[1].fp = stdout;
2362 files[1].fname = tostring("/dev/stdout");
2363 files[1].mode = GT;
2364 files[2].fp = stderr;
2365 files[2].fname = tostring("/dev/stderr");
2366 files[2].mode = GT;
2367 }
2368
openfile(int a,const char * us,bool * pnewflag)2369 FILE *openfile(int a, const char *us, bool *pnewflag)
2370 {
2371 const char *s = us;
2372 size_t i;
2373 int m;
2374 FILE *fp = NULL;
2375 struct stat sbuf;
2376
2377 if (*s == '\0')
2378 FATAL("null file name in print or getline");
2379
2380 for (i = 0; i < nfiles; i++)
2381 if (files[i].fname && strcmp(s, files[i].fname) == 0 &&
2382 (a == files[i].mode || (a==APPEND && files[i].mode==GT) ||
2383 a == FFLUSH)) {
2384 if (pnewflag)
2385 *pnewflag = false;
2386 return files[i].fp;
2387 }
2388 if (a == FFLUSH) /* didn't find it, so don't create it! */
2389 return NULL;
2390 for (i = 0; i < nfiles; i++)
2391 if (files[i].fp == NULL)
2392 break;
2393 if (i >= nfiles) {
2394 struct files *nf;
2395 size_t nnf = nfiles + FOPEN_MAX;
2396 nf = (struct files *) realloc(files, nnf * sizeof(*nf));
2397 if (nf == NULL)
2398 FATAL("cannot grow files for %s and %zu files", s, nnf);
2399 memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf));
2400 nfiles = nnf;
2401 files = nf;
2402 }
2403
2404 fflush(stdout); /* force a semblance of order */
2405
2406 /* don't try to read or write a directory */
2407 if (a == LT || a == GT || a == APPEND)
2408 if (stat(s, &sbuf) == 0 && S_ISDIR(sbuf.st_mode))
2409 return NULL;
2410
2411 m = a;
2412 if (a == GT) {
2413 fp = fopen(s, "w");
2414 } else if (a == APPEND) {
2415 fp = fopen(s, "a");
2416 m = GT; /* so can mix > and >> */
2417 } else if (a == '|') { /* output pipe */
2418 fp = popen(s, "w");
2419 } else if (a == LE) { /* input pipe */
2420 fp = popen(s, "r");
2421 } else if (a == LT) { /* getline <file */
2422 fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r"); /* "-" is stdin */
2423 } else /* can't happen */
2424 FATAL("illegal redirection %d", a);
2425 if (fp != NULL) {
2426 files[i].fname = tostring(s);
2427 files[i].fp = fp;
2428 files[i].mode = m;
2429 if (pnewflag)
2430 *pnewflag = true;
2431 if (fp != stdin && fp != stdout && fp != stderr)
2432 (void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC);
2433 }
2434 return fp;
2435 }
2436
filename(FILE * fp)2437 const char *filename(FILE *fp)
2438 {
2439 size_t i;
2440
2441 for (i = 0; i < nfiles; i++)
2442 if (fp == files[i].fp)
2443 return files[i].fname;
2444 return "???";
2445 }
2446
closefile(Node ** a,int n)2447 Cell *closefile(Node **a, int n)
2448 {
2449 Cell *x;
2450 size_t i;
2451 bool stat;
2452
2453 x = execute(a[0]);
2454 getsval(x);
2455 stat = true;
2456 for (i = 0; i < nfiles; i++) {
2457 if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0)
2458 continue;
2459 if (files[i].mode == GT || files[i].mode == '|')
2460 fflush(files[i].fp);
2461 if (ferror(files[i].fp)) {
2462 if ((files[i].mode == GT && files[i].fp != stderr)
2463 || files[i].mode == '|')
2464 FATAL("write error on %s", files[i].fname);
2465 else
2466 WARNING("i/o error occurred on %s", files[i].fname);
2467 }
2468 if (files[i].fp == stdin || files[i].fp == stdout ||
2469 files[i].fp == stderr)
2470 stat = freopen("/dev/null", "r+", files[i].fp) == NULL;
2471 else if (files[i].mode == '|' || files[i].mode == LE)
2472 stat = pclose(files[i].fp) == -1;
2473 else
2474 stat = fclose(files[i].fp) == EOF;
2475 if (stat)
2476 WARNING("i/o error occurred closing %s", files[i].fname);
2477 xfree(files[i].fname);
2478 files[i].fname = NULL; /* watch out for ref thru this */
2479 files[i].fp = NULL;
2480 break;
2481 }
2482 tempfree(x);
2483 x = gettemp();
2484 setfval(x, (Awkfloat) (stat ? -1 : 0));
2485 return(x);
2486 }
2487
closeall(void)2488 void closeall(void)
2489 {
2490 size_t i;
2491 bool stat = false;
2492
2493 for (i = 0; i < nfiles; i++) {
2494 if (! files[i].fp)
2495 continue;
2496 if (files[i].mode == GT || files[i].mode == '|')
2497 fflush(files[i].fp);
2498 if (ferror(files[i].fp)) {
2499 if ((files[i].mode == GT && files[i].fp != stderr)
2500 || files[i].mode == '|')
2501 FATAL("write error on %s", files[i].fname);
2502 else
2503 WARNING("i/o error occurred on %s", files[i].fname);
2504 }
2505 if (files[i].fp == stdin || files[i].fp == stdout ||
2506 files[i].fp == stderr)
2507 continue;
2508 if (files[i].mode == '|' || files[i].mode == LE)
2509 stat = pclose(files[i].fp) == -1;
2510 else
2511 stat = fclose(files[i].fp) == EOF;
2512 if (stat)
2513 WARNING("i/o error occurred while closing %s", files[i].fname);
2514 }
2515 }
2516
flush_all(void)2517 static void flush_all(void)
2518 {
2519 size_t i;
2520
2521 for (i = 0; i < nfiles; i++)
2522 if (files[i].fp)
2523 fflush(files[i].fp);
2524 }
2525
2526 void backsub(char **pb_ptr, const char **sptr_ptr);
2527
dosub(Node ** a,int subop)2528 Cell *dosub(Node **a, int subop) /* sub and gsub */
2529 {
2530 fa *pfa;
2531 int tempstat = 0;
2532 char *repl;
2533 Cell *x;
2534
2535 char *buf = NULL;
2536 char *pb = NULL;
2537 int bufsz = recsize;
2538
2539 const char *r, *s;
2540 const char *start;
2541 const char *noempty = NULL; /* empty match disallowed here */
2542 size_t m = 0; /* match count */
2543 size_t whichm = 0; /* which match to select, 0 = global */
2544 int mtype; /* match type */
2545
2546 if (a[0] == NULL) { /* 0 => a[1] is already-compiled regexpr */
2547 pfa = (fa *) a[1];
2548 } else {
2549 x = execute(a[1]);
2550 pfa = makedfa(getsval(x), 1);
2551 tempfree(x);
2552 }
2553
2554 x = execute(a[2]); /* replacement string */
2555 repl = tostring(getsval(x));
2556 tempfree(x);
2557
2558 switch (subop) {
2559 case SUB:
2560 whichm = 1;
2561 x = execute(a[3]); /* source string */
2562 break;
2563 case GSUB:
2564 whichm = 0;
2565 x = execute(a[3]); /* source string */
2566 break;
2567 default:
2568 FATAL("dosub: unrecognized subop: %d", subop);
2569 }
2570
2571 start = getsval(x);
2572 while (pmatch(pfa, start)) {
2573 if (buf == NULL) {
2574 if ((pb = buf = (char *) malloc(bufsz)) == NULL)
2575 FATAL("out of memory in dosub");
2576 tempstat = pfa->initstat;
2577 pfa->initstat = 2;
2578 }
2579
2580 /* match types */
2581 #define MT_IGNORE 0 /* unselected or invalid */
2582 #define MT_INSERT 1 /* selected, empty */
2583 #define MT_REPLACE 2 /* selected, not empty */
2584
2585 /* an empty match just after replacement is invalid */
2586
2587 if (patbeg == noempty && patlen == 0) {
2588 mtype = MT_IGNORE; /* invalid, not counted */
2589 } else if (whichm == ++m || whichm == 0) {
2590 mtype = patlen ? MT_REPLACE : MT_INSERT;
2591 } else {
2592 mtype = MT_IGNORE; /* unselected, but counted */
2593 }
2594
2595 /* leading text: */
2596 if (patbeg > start) {
2597 adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - start),
2598 recsize, &pb, "dosub");
2599 s = start;
2600 while (s < patbeg)
2601 *pb++ = *s++;
2602 }
2603
2604 if (mtype == MT_IGNORE)
2605 goto matching_text; /* skip replacement text */
2606
2607 r = repl;
2608 while (*r != 0) {
2609 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "dosub");
2610 if (*r == '\\') {
2611 backsub(&pb, &r);
2612 } else if (*r == '&') {
2613 r++;
2614 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize,
2615 &pb, "dosub");
2616 for (s = patbeg; s < patbeg+patlen; )
2617 *pb++ = *s++;
2618 } else {
2619 *pb++ = *r++;
2620 }
2621 }
2622
2623 matching_text:
2624 if (mtype == MT_REPLACE || *patbeg == '\0')
2625 goto next_search; /* skip matching text */
2626
2627 if (patlen == 0)
2628 patlen = u8_nextlen(patbeg);
2629 adjbuf(&buf, &bufsz, (pb-buf) + patlen, recsize, &pb, "dosub");
2630 s = patbeg;
2631 while (s < patbeg + patlen)
2632 *pb++ = *s++;
2633
2634 next_search:
2635 start = patbeg + patlen;
2636 if (m == whichm || *patbeg == '\0')
2637 break;
2638 if (mtype == MT_REPLACE)
2639 noempty = start;
2640
2641 #undef MT_IGNORE
2642 #undef MT_INSERT
2643 #undef MT_REPLACE
2644 }
2645
2646 xfree(repl);
2647
2648 if (buf != NULL) {
2649 pfa->initstat = tempstat;
2650
2651 /* trailing text */
2652 adjbuf(&buf, &bufsz, 1+strlen(start)+pb-buf, 0, &pb, "dosub");
2653 while ((*pb++ = *start++) != '\0')
2654 ;
2655
2656 setsval(x, buf);
2657 free(buf);
2658 }
2659
2660 tempfree(x);
2661 x = gettemp();
2662 x->tval = NUM;
2663 x->fval = m;
2664 return x;
2665 }
2666
gensub(Node ** a,int nnn)2667 Cell *gensub(Node **a, int nnn) /* global selective substitute */
2668 /* XXX incomplete - doesn't support backreferences \0 ... \9 */
2669 {
2670 Cell *x, *y, *res, *h;
2671 char *rptr;
2672 const char *sptr;
2673 char *buf, *pb;
2674 const char *t, *q;
2675 fa *pfa;
2676 int mflag, tempstat, num, whichm;
2677 int bufsz = recsize;
2678
2679 if ((buf = malloc(bufsz)) == NULL)
2680 FATAL("out of memory in gensub");
2681 mflag = 0; /* if mflag == 0, can replace empty string */
2682 num = 0;
2683 x = execute(a[4]); /* source string */
2684 t = getsval(x);
2685 res = copycell(x); /* target string - initially copy of source */
2686 res->csub = CTEMP; /* result values are temporary */
2687 if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */
2688 pfa = (fa *) a[1]; /* regular expression */
2689 else {
2690 y = execute(a[1]);
2691 pfa = makedfa(getsval(y), 1);
2692 tempfree(y);
2693 }
2694 y = execute(a[2]); /* replacement string */
2695 h = execute(a[3]); /* which matches should be replaced */
2696 sptr = getsval(h);
2697 if (sptr[0] == 'g' || sptr[0] == 'G')
2698 whichm = -1;
2699 else {
2700 /*
2701 * The specified number is index of replacement, starting
2702 * from 1. GNU awk treats index lower than 0 same as
2703 * 1, we do same for compatibility.
2704 */
2705 whichm = (int) getfval(h) - 1;
2706 if (whichm < 0)
2707 whichm = 0;
2708 }
2709 tempfree(h);
2710
2711 if (pmatch(pfa, t)) {
2712 char *sl;
2713
2714 tempstat = pfa->initstat;
2715 pfa->initstat = 2;
2716 pb = buf;
2717 rptr = getsval(y);
2718 /*
2719 * XXX if there are any backreferences in subst string,
2720 * complain now.
2721 */
2722 for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) {
2723 if (strchr("0123456789", sl[1])) {
2724 FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr);
2725 }
2726 }
2727
2728 do {
2729 if (whichm >= 0 && whichm != num) {
2730 num++;
2731 adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub");
2732
2733 /* copy the part of string up to and including
2734 * match to output buffer */
2735 while (t < patbeg + patlen)
2736 *pb++ = *t++;
2737 continue;
2738 }
2739
2740 if (patlen == 0 && *patbeg != 0) { /* matched empty string */
2741 if (mflag == 0) { /* can replace empty */
2742 num++;
2743 sptr = rptr;
2744 while (*sptr != 0) {
2745 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
2746 if (*sptr == '\\') {
2747 backsub(&pb, &sptr);
2748 } else if (*sptr == '&') {
2749 sptr++;
2750 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
2751 for (q = patbeg; q < patbeg+patlen; )
2752 *pb++ = *q++;
2753 } else
2754 *pb++ = *sptr++;
2755 }
2756 }
2757 if (*t == 0) /* at end */
2758 goto done;
2759 adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub");
2760 *pb++ = *t++;
2761 if (pb > buf + bufsz) /* BUG: not sure of this test */
2762 FATAL("gensub result0 %.30s too big; can't happen", buf);
2763 mflag = 0;
2764 }
2765 else { /* matched nonempty string */
2766 num++;
2767 sptr = t;
2768 adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub");
2769 while (sptr < patbeg)
2770 *pb++ = *sptr++;
2771 sptr = rptr;
2772 while (*sptr != 0) {
2773 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
2774 if (*sptr == '\\') {
2775 backsub(&pb, &sptr);
2776 } else if (*sptr == '&') {
2777 sptr++;
2778 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
2779 for (q = patbeg; q < patbeg+patlen; )
2780 *pb++ = *q++;
2781 } else
2782 *pb++ = *sptr++;
2783 }
2784 t = patbeg + patlen;
2785 if (patlen == 0 || *t == 0 || *(t-1) == 0)
2786 goto done;
2787 if (pb > buf + bufsz)
2788 FATAL("gensub result1 %.30s too big; can't happen", buf);
2789 mflag = 1;
2790 }
2791 } while (pmatch(pfa,t));
2792 sptr = t;
2793 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub");
2794 while ((*pb++ = *sptr++) != 0)
2795 ;
2796 done: if (pb > buf + bufsz)
2797 FATAL("gensub result2 %.30s too big; can't happen", buf);
2798 *pb = '\0';
2799 setsval(res, buf);
2800 pfa->initstat = tempstat;
2801 }
2802 tempfree(x);
2803 tempfree(y);
2804 free(buf);
2805 return(res);
2806 }
2807
backsub(char ** pb_ptr,const char ** sptr_ptr)2808 void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */
2809 { /* sptr[0] == '\\' */
2810 char *pb = *pb_ptr;
2811 const char *sptr = *sptr_ptr;
2812 static bool first = true;
2813 static bool do_posix = false;
2814
2815 if (first) {
2816 first = false;
2817 do_posix = (getenv("POSIXLY_CORRECT") != NULL);
2818 }
2819
2820 if (sptr[1] == '\\') {
2821 if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */
2822 *pb++ = '\\';
2823 *pb++ = '&';
2824 sptr += 4;
2825 } else if (sptr[2] == '&') { /* \\& -> \ + matched */
2826 *pb++ = '\\';
2827 sptr += 2;
2828 } else if (do_posix) { /* \\x -> \x */
2829 sptr++;
2830 *pb++ = *sptr++;
2831 } else { /* \\x -> \\x */
2832 *pb++ = *sptr++;
2833 *pb++ = *sptr++;
2834 }
2835 } else if (sptr[1] == '&') { /* literal & */
2836 sptr++;
2837 *pb++ = *sptr++;
2838 } else /* literal \ */
2839 *pb++ = *sptr++;
2840
2841 *pb_ptr = pb;
2842 *sptr_ptr = sptr;
2843 }
2844
wide_char_to_byte_str(int rune,size_t * outlen)2845 static char *wide_char_to_byte_str(int rune, size_t *outlen)
2846 {
2847 static char buf[5];
2848 int len;
2849
2850 if (rune < 0 || rune > 0x10FFFF)
2851 return NULL;
2852
2853 memset(buf, 0, sizeof(buf));
2854
2855 len = 0;
2856 if (rune <= 0x0000007F) {
2857 buf[len++] = rune;
2858 } else if (rune <= 0x000007FF) {
2859 // 110xxxxx 10xxxxxx
2860 buf[len++] = 0xC0 | (rune >> 6);
2861 buf[len++] = 0x80 | (rune & 0x3F);
2862 } else if (rune <= 0x0000FFFF) {
2863 // 1110xxxx 10xxxxxx 10xxxxxx
2864 buf[len++] = 0xE0 | (rune >> 12);
2865 buf[len++] = 0x80 | ((rune >> 6) & 0x3F);
2866 buf[len++] = 0x80 | (rune & 0x3F);
2867
2868 } else {
2869 // 0x00010000 - 0x10FFFF
2870 // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
2871 buf[len++] = 0xF0 | (rune >> 18);
2872 buf[len++] = 0x80 | ((rune >> 12) & 0x3F);
2873 buf[len++] = 0x80 | ((rune >> 6) & 0x3F);
2874 buf[len++] = 0x80 | (rune & 0x3F);
2875 }
2876
2877 *outlen = len;
2878 buf[len++] = '\0';
2879
2880 return buf;
2881 }
2882