1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
4
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
14
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
24
25 #define DEBUG
26 #include <stdio.h>
27 #include <ctype.h>
28 #include <errno.h>
29 #include <wctype.h>
30 #include <fcntl.h>
31 #include <setjmp.h>
32 #include <limits.h>
33 #include <math.h>
34 #include <string.h>
35 #include <stdlib.h>
36 #include <time.h>
37 #include <sys/types.h>
38 #include <sys/stat.h>
39 #include <sys/wait.h>
40 #include "awk.h"
41 #include "awkgram.tab.h"
42
43
44 static void stdinit(void);
45 static void flush_all(void);
46 static char *wide_char_to_byte_str(int rune, size_t *outlen);
47
48 #if 1
49 #define tempfree(x) do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0)
50 #else
tempfree(Cell * p)51 void tempfree(Cell *p) {
52 if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) {
53 WARNING("bad csub %d in Cell %d %s",
54 p->csub, p->ctype, p->sval);
55 }
56 if (istemp(p))
57 tfree(p);
58 }
59 #endif
60
61 /* do we really need these? */
62 /* #ifdef _NFILE */
63 /* #ifndef FOPEN_MAX */
64 /* #define FOPEN_MAX _NFILE */
65 /* #endif */
66 /* #endif */
67 /* */
68 /* #ifndef FOPEN_MAX */
69 /* #define FOPEN_MAX 40 */ /* max number of open files */
70 /* #endif */
71 /* */
72 /* #ifndef RAND_MAX */
73 /* #define RAND_MAX 32767 */ /* all that ansi guarantees */
74 /* #endif */
75
76 jmp_buf env;
77 extern int pairstack[];
78 extern Awkfloat srand_seed;
79
80 Node *winner = NULL; /* root of parse tree */
81 Cell *tmps; /* free temporary cells for execution */
82
83 static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL, NULL };
84 Cell *True = &truecell;
85 static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL, NULL };
86 Cell *False = &falsecell;
87 static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL, NULL };
88 Cell *jbreak = &breakcell;
89 static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL, NULL };
90 Cell *jcont = &contcell;
91 static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL, NULL };
92 Cell *jnext = &nextcell;
93 static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL, NULL };
94 Cell *jnextfile = &nextfilecell;
95 static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL, NULL };
96 Cell *jexit = &exitcell;
97 static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL, NULL };
98 Cell *jret = &retcell;
99 static Cell tempcell ={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL };
100
101 Node *curnode = NULL; /* the node being executed, for debugging */
102
103 /* buffer memory management */
adjbuf(char ** pbuf,int * psiz,int minlen,int quantum,char ** pbptr,const char * whatrtn)104 int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr,
105 const char *whatrtn)
106 /* pbuf: address of pointer to buffer being managed
107 * psiz: address of buffer size variable
108 * minlen: minimum length of buffer needed
109 * quantum: buffer size quantum
110 * pbptr: address of movable pointer into buffer, or 0 if none
111 * whatrtn: name of the calling routine if failure should cause fatal error
112 *
113 * return 0 for realloc failure, !=0 for success
114 */
115 {
116 if (minlen > *psiz) {
117 char *tbuf;
118 int rminlen = quantum ? minlen % quantum : 0;
119 int boff = pbptr ? *pbptr - *pbuf : 0;
120 /* round up to next multiple of quantum */
121 if (rminlen)
122 minlen += quantum - rminlen;
123 tbuf = (char *) realloc(*pbuf, minlen);
124 DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void*)*pbuf, (void*)tbuf);
125 if (tbuf == NULL) {
126 if (whatrtn)
127 FATAL("out of memory in %s", whatrtn);
128 return 0;
129 }
130 *pbuf = tbuf;
131 *psiz = minlen;
132 if (pbptr)
133 *pbptr = tbuf + boff;
134 }
135 return 1;
136 }
137
run(Node * a)138 void run(Node *a) /* execution of parse tree starts here */
139 {
140
141 stdinit();
142 execute(a);
143 closeall();
144 }
145
execute(Node * u)146 Cell *execute(Node *u) /* execute a node of the parse tree */
147 {
148 Cell *(*proc)(Node **, int);
149 Cell *x;
150 Node *a;
151
152 if (u == NULL)
153 return(True);
154 for (a = u; ; a = a->nnext) {
155 curnode = a;
156 if (isvalue(a)) {
157 x = (Cell *) (a->narg[0]);
158 if (isfld(x) && !donefld)
159 fldbld();
160 else if (isrec(x) && !donerec)
161 recbld();
162 return(x);
163 }
164 if (notlegal(a->nobj)) /* probably a Cell* but too risky to print */
165 FATAL("illegal statement");
166 proc = proctab[a->nobj-FIRSTTOKEN];
167 x = (*proc)(a->narg, a->nobj);
168 if (isfld(x) && !donefld)
169 fldbld();
170 else if (isrec(x) && !donerec)
171 recbld();
172 if (isexpr(a))
173 return(x);
174 if (isjump(x))
175 return(x);
176 if (a->nnext == NULL)
177 return(x);
178 tempfree(x);
179 }
180 }
181
182
program(Node ** a,int n)183 Cell *program(Node **a, int n) /* execute an awk program */
184 { /* a[0] = BEGIN, a[1] = body, a[2] = END */
185 Cell *x;
186
187 if (setjmp(env) != 0)
188 goto ex;
189 if (a[0]) { /* BEGIN */
190 x = execute(a[0]);
191 if (isexit(x))
192 return(True);
193 if (isjump(x))
194 FATAL("illegal break, continue, next or nextfile from BEGIN");
195 tempfree(x);
196 }
197 if (a[1] || a[2])
198 while (getrec(&record, &recsize, true) > 0) {
199 x = execute(a[1]);
200 if (isexit(x))
201 break;
202 tempfree(x);
203 }
204 ex:
205 if (setjmp(env) != 0) /* handles exit within END */
206 goto ex1;
207 if (a[2]) { /* END */
208 x = execute(a[2]);
209 if (isbreak(x) || isnext(x) || iscont(x))
210 FATAL("illegal break, continue, next or nextfile from END");
211 tempfree(x);
212 }
213 ex1:
214 return(True);
215 }
216
217 struct Frame { /* stack frame for awk function calls */
218 int nargs; /* number of arguments in this call */
219 Cell *fcncell; /* pointer to Cell for function */
220 Cell **args; /* pointer to array of arguments after execute */
221 Cell *retval; /* return value */
222 };
223
224 #define NARGS 50 /* max args in a call */
225
226 struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */
227 int nframe = 0; /* number of frames allocated */
228 struct Frame *frp = NULL; /* frame pointer. bottom level unused */
229
call(Node ** a,int n)230 Cell *call(Node **a, int n) /* function call. very kludgy and fragile */
231 {
232 static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL };
233 int i, ncall, ndef;
234 int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */
235 Node *x;
236 Cell *args[NARGS], *oargs[NARGS]; /* BUG: fixed size arrays */
237 Cell *y, *z, *fcn;
238 char *s;
239
240 fcn = execute(a[0]); /* the function itself */
241 s = fcn->nval;
242 if (!isfcn(fcn))
243 FATAL("calling undefined function %s", s);
244 if (frame == NULL) {
245 frp = frame = (struct Frame *) calloc(nframe += 100, sizeof(*frame));
246 if (frame == NULL)
247 FATAL("out of space for stack frames calling %s", s);
248 }
249 for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */
250 ncall++;
251 ndef = (int) fcn->fval; /* args in defn */
252 DPRINTF("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame));
253 if (ncall > ndef)
254 WARNING("function %s called with %d args, uses only %d",
255 s, ncall, ndef);
256 if (ncall + ndef > NARGS)
257 FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS);
258 for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */
259 DPRINTF("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame));
260 y = execute(x);
261 oargs[i] = y;
262 DPRINTF("args[%d]: %s %f <%s>, t=%o\n",
263 i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval);
264 if (isfcn(y))
265 FATAL("can't use function %s as argument in %s", y->nval, s);
266 if (isarr(y))
267 args[i] = y; /* arrays by ref */
268 else
269 args[i] = copycell(y);
270 tempfree(y);
271 }
272 for ( ; i < ndef; i++) { /* add null args for ones not provided */
273 args[i] = gettemp();
274 *args[i] = newcopycell;
275 }
276 frp++; /* now ok to up frame */
277 if (frp >= frame + nframe) {
278 int dfp = frp - frame; /* old index */
279 frame = (struct Frame *) realloc(frame, (nframe += 100) * sizeof(*frame));
280 if (frame == NULL)
281 FATAL("out of space for stack frames in %s", s);
282 frp = frame + dfp;
283 }
284 frp->fcncell = fcn;
285 frp->args = args;
286 frp->nargs = ndef; /* number defined with (excess are locals) */
287 frp->retval = gettemp();
288
289 DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame));
290 y = execute((Node *)(fcn->sval)); /* execute body */
291 DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame));
292
293 for (i = 0; i < ndef; i++) {
294 Cell *t = frp->args[i];
295 if (isarr(t)) {
296 if (t->csub == CCOPY) {
297 if (i >= ncall) {
298 freesymtab(t);
299 t->csub = CTEMP;
300 tempfree(t);
301 } else {
302 oargs[i]->tval = t->tval;
303 oargs[i]->tval &= ~(STR|NUM|DONTFREE);
304 oargs[i]->sval = t->sval;
305 tempfree(t);
306 }
307 }
308 } else if (t != y) { /* kludge to prevent freeing twice */
309 t->csub = CTEMP;
310 tempfree(t);
311 } else if (t == y && t->csub == CCOPY) {
312 t->csub = CTEMP;
313 tempfree(t);
314 freed = 1;
315 }
316 }
317 tempfree(fcn);
318 if (isexit(y) || isnext(y))
319 return y;
320 if (freed == 0) {
321 tempfree(y); /* don't free twice! */
322 }
323 z = frp->retval; /* return value */
324 DPRINTF("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval);
325 frp--;
326 return(z);
327 }
328
copycell(Cell * x)329 Cell *copycell(Cell *x) /* make a copy of a cell in a temp */
330 {
331 Cell *y;
332
333 /* copy is not constant or field */
334
335 y = gettemp();
336 y->tval = x->tval & ~(CON|FLD|REC);
337 y->csub = CCOPY; /* prevents freeing until call is over */
338 y->nval = x->nval; /* BUG? */
339 if (isstr(x) /* || x->ctype == OCELL */) {
340 y->sval = tostring(x->sval);
341 y->tval &= ~DONTFREE;
342 } else
343 y->tval |= DONTFREE;
344 y->fval = x->fval;
345 return y;
346 }
347
arg(Node ** a,int n)348 Cell *arg(Node **a, int n) /* nth argument of a function */
349 {
350
351 n = ptoi(a[0]); /* argument number, counting from 0 */
352 DPRINTF("arg(%d), frp->nargs=%d\n", n, frp->nargs);
353 if (n+1 > frp->nargs)
354 FATAL("argument #%d of function %s was not supplied",
355 n+1, frp->fcncell->nval);
356 return frp->args[n];
357 }
358
jump(Node ** a,int n)359 Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */
360 {
361 Cell *y;
362
363 switch (n) {
364 case EXIT:
365 if (a[0] != NULL) {
366 y = execute(a[0]);
367 errorflag = (int) getfval(y);
368 tempfree(y);
369 }
370 longjmp(env, 1);
371 case RETURN:
372 if (a[0] != NULL) {
373 y = execute(a[0]);
374 if ((y->tval & (STR|NUM)) == (STR|NUM)) {
375 setsval(frp->retval, getsval(y));
376 frp->retval->fval = getfval(y);
377 frp->retval->tval |= NUM;
378 }
379 else if (y->tval & STR)
380 setsval(frp->retval, getsval(y));
381 else if (y->tval & NUM)
382 setfval(frp->retval, getfval(y));
383 else /* can't happen */
384 FATAL("bad type variable %d", y->tval);
385 tempfree(y);
386 }
387 return(jret);
388 case NEXT:
389 return(jnext);
390 case NEXTFILE:
391 nextfile();
392 return(jnextfile);
393 case BREAK:
394 return(jbreak);
395 case CONTINUE:
396 return(jcont);
397 default: /* can't happen */
398 FATAL("illegal jump type %d", n);
399 }
400 return 0; /* not reached */
401 }
402
awkgetline(Node ** a,int n)403 Cell *awkgetline(Node **a, int n) /* get next line from specific input */
404 { /* a[0] is variable, a[1] is operator, a[2] is filename */
405 Cell *r, *x;
406 extern Cell **fldtab;
407 FILE *fp;
408 char *buf;
409 int bufsize = recsize;
410 int mode;
411 bool newflag;
412 double result;
413
414 if ((buf = (char *) malloc(bufsize)) == NULL)
415 FATAL("out of memory in getline");
416
417 fflush(stdout); /* in case someone is waiting for a prompt */
418 r = gettemp();
419 if (a[1] != NULL) { /* getline < file */
420 x = execute(a[2]); /* filename */
421 mode = ptoi(a[1]);
422 if (mode == '|') /* input pipe */
423 mode = LE; /* arbitrary flag */
424 fp = openfile(mode, getsval(x), &newflag);
425 tempfree(x);
426 if (fp == NULL)
427 n = -1;
428 else
429 n = readrec(&buf, &bufsize, fp, newflag);
430 if (n <= 0) {
431 ;
432 } else if (a[0] != NULL) { /* getline var <file */
433 x = execute(a[0]);
434 setsval(x, buf);
435 if (is_number(x->sval, & result)) {
436 x->fval = result;
437 x->tval |= NUM;
438 }
439 tempfree(x);
440 } else { /* getline <file */
441 setsval(fldtab[0], buf);
442 if (is_number(fldtab[0]->sval, & result)) {
443 fldtab[0]->fval = result;
444 fldtab[0]->tval |= NUM;
445 }
446 }
447 } else { /* bare getline; use current input */
448 if (a[0] == NULL) /* getline */
449 n = getrec(&record, &recsize, true);
450 else { /* getline var */
451 n = getrec(&buf, &bufsize, false);
452 if (n > 0) {
453 x = execute(a[0]);
454 setsval(x, buf);
455 if (is_number(x->sval, & result)) {
456 x->fval = result;
457 x->tval |= NUM;
458 }
459 tempfree(x);
460 }
461 }
462 }
463 setfval(r, (Awkfloat) n);
464 free(buf);
465 return r;
466 }
467
getnf(Node ** a,int n)468 Cell *getnf(Node **a, int n) /* get NF */
469 {
470 if (!donefld)
471 fldbld();
472 return (Cell *) a[0];
473 }
474
475 static char *
makearraystring(Node * p,const char * func)476 makearraystring(Node *p, const char *func)
477 {
478 char *buf;
479 int bufsz = recsize;
480 size_t blen;
481
482 if ((buf = (char *) malloc(bufsz)) == NULL) {
483 FATAL("%s: out of memory", func);
484 }
485
486 blen = 0;
487 buf[blen] = '\0';
488
489 for (; p; p = p->nnext) {
490 Cell *x = execute(p); /* expr */
491 char *s = getsval(x);
492 size_t seplen = strlen(getsval(subseploc));
493 size_t nsub = p->nnext ? seplen : 0;
494 size_t slen = strlen(s);
495 size_t tlen = blen + slen + nsub;
496
497 if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) {
498 FATAL("%s: out of memory %s[%s...]",
499 func, x->nval, buf);
500 }
501 memcpy(buf + blen, s, slen);
502 if (nsub) {
503 memcpy(buf + blen + slen, *SUBSEP, nsub);
504 }
505 buf[tlen] = '\0';
506 blen = tlen;
507 tempfree(x);
508 }
509 return buf;
510 }
511
array(Node ** a,int n)512 Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
513 {
514 Cell *x, *z;
515 char *buf;
516
517 x = execute(a[0]); /* Cell* for symbol table */
518 buf = makearraystring(a[1], __func__);
519 if (!isarr(x)) {
520 DPRINTF("making %s into an array\n", NN(x->nval));
521 if (freeable(x))
522 xfree(x->sval);
523 x->tval &= ~(STR|NUM|DONTFREE);
524 x->tval |= ARR;
525 x->sval = (char *) makesymtab(NSYMTAB);
526 }
527 z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval);
528 z->ctype = OCELL;
529 z->csub = CVAR;
530 tempfree(x);
531 free(buf);
532 return(z);
533 }
534
awkdelete(Node ** a,int n)535 Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
536 {
537 Cell *x;
538
539 x = execute(a[0]); /* Cell* for symbol table */
540 if (x == symtabloc) {
541 FATAL("cannot delete SYMTAB or its elements");
542 }
543 if (!isarr(x))
544 return True;
545 if (a[1] == NULL) { /* delete the elements, not the table */
546 freesymtab(x);
547 x->tval &= ~STR;
548 x->tval |= ARR;
549 x->sval = (char *) makesymtab(NSYMTAB);
550 } else {
551 char *buf = makearraystring(a[1], __func__);
552 freeelem(x, buf);
553 free(buf);
554 }
555 tempfree(x);
556 return True;
557 }
558
intest(Node ** a,int n)559 Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */
560 {
561 Cell *ap, *k;
562 char *buf;
563
564 ap = execute(a[1]); /* array name */
565 if (!isarr(ap)) {
566 DPRINTF("making %s into an array\n", ap->nval);
567 if (freeable(ap))
568 xfree(ap->sval);
569 ap->tval &= ~(STR|NUM|DONTFREE);
570 ap->tval |= ARR;
571 ap->sval = (char *) makesymtab(NSYMTAB);
572 }
573 buf = makearraystring(a[0], __func__);
574 k = lookup(buf, (Array *) ap->sval);
575 tempfree(ap);
576 free(buf);
577 if (k == NULL)
578 return(False);
579 else
580 return(True);
581 }
582
583
584 /* ======== utf-8 code ========== */
585
586 /*
587 * Awk strings can contain ascii, random 8-bit items (eg Latin-1),
588 * or utf-8. u8_isutf tests whether a string starts with a valid
589 * utf-8 sequence, and returns 0 if not (e.g., high bit set).
590 * u8_nextlen returns length of next valid sequence, which is
591 * 1 for ascii, 2..4 for utf-8, or 1 for high bit non-utf.
592 * u8_strlen returns length of string in valid utf-8 sequences
593 * and/or high-bit bytes. Conversion functions go between byte
594 * number and character number.
595 *
596 * In theory, this behaves the same as before for non-utf8 bytes.
597 *
598 * Limited checking! This is a potential security hole.
599 */
600
601 /* is s the beginning of a valid utf-8 string? */
602 /* return length 1..4 if yes, 0 if no */
u8_isutf(const char * s)603 int u8_isutf(const char *s)
604 {
605 int n, ret;
606 unsigned char c;
607
608 c = s[0];
609 if (c < 128 || awk_mb_cur_max == 1)
610 return 1; /* what if it's 0? */
611
612 n = strlen(s);
613 if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) {
614 ret = 2; /* 110xxxxx 10xxxxxx */
615 } else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80
616 && (s[2] & 0xC0) == 0x80) {
617 ret = 3; /* 1110xxxx 10xxxxxx 10xxxxxx */
618 } else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80
619 && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) {
620 ret = 4; /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
621 } else {
622 ret = 0;
623 }
624 return ret;
625 }
626
627 /* Convert (prefix of) utf8 string to utf-32 rune. */
628 /* Sets *rune to the value, returns the length. */
629 /* No error checking: watch out. */
u8_rune(int * rune,const char * s)630 int u8_rune(int *rune, const char *s)
631 {
632 int n, ret;
633 unsigned char c;
634
635 c = s[0];
636 if (c < 128 || awk_mb_cur_max == 1) {
637 *rune = c;
638 return 1;
639 }
640
641 n = strlen(s);
642 if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) {
643 *rune = ((c & 0x1F) << 6) | (s[1] & 0x3F); /* 110xxxxx 10xxxxxx */
644 ret = 2;
645 } else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80
646 && (s[2] & 0xC0) == 0x80) {
647 *rune = ((c & 0xF) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F);
648 /* 1110xxxx 10xxxxxx 10xxxxxx */
649 ret = 3;
650 } else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80
651 && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) {
652 *rune = ((c & 0x7) << 18) | ((s[1] & 0x3F) << 12) | ((s[2] & 0x3F) << 6) | (s[3] & 0x3F);
653 /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
654 ret = 4;
655 } else {
656 *rune = c;
657 ret = 1;
658 }
659 return ret; /* returns one byte if sequence doesn't look like utf */
660 }
661
662 /* return length of next sequence: 1 for ascii or random, 2..4 for valid utf8 */
u8_nextlen(const char * s)663 int u8_nextlen(const char *s)
664 {
665 int len;
666
667 len = u8_isutf(s);
668 if (len == 0)
669 len = 1;
670 return len;
671 }
672
673 /* return number of utf characters or single non-utf bytes */
u8_strlen(const char * s)674 int u8_strlen(const char *s)
675 {
676 int i, len, n, totlen;
677 unsigned char c;
678
679 n = strlen(s);
680 totlen = 0;
681 for (i = 0; i < n; i += len) {
682 c = s[i];
683 if (c < 128 || awk_mb_cur_max == 1) {
684 len = 1;
685 } else {
686 len = u8_nextlen(&s[i]);
687 }
688 totlen++;
689 if (i > n)
690 FATAL("bad utf count [%s] n=%d i=%d\n", s, n, i);
691 }
692 return totlen;
693 }
694
695 /* convert utf-8 char number in a string to its byte offset */
u8_char2byte(const char * s,int charnum)696 int u8_char2byte(const char *s, int charnum)
697 {
698 int n;
699 int bytenum = 0;
700
701 while (charnum > 0) {
702 n = u8_nextlen(s);
703 s += n;
704 bytenum += n;
705 charnum--;
706 }
707 return bytenum;
708 }
709
710 /* convert byte offset in s to utf-8 char number that starts there */
u8_byte2char(const char * s,int bytenum)711 int u8_byte2char(const char *s, int bytenum)
712 {
713 int i, len, b;
714 int charnum = 0; /* BUG: what origin? */
715 /* should be 0 to match start==0 which means no match */
716
717 b = strlen(s);
718 if (bytenum > b) {
719 return -1; /* ??? */
720 }
721 for (i = 0; i <= bytenum; i += len) {
722 len = u8_nextlen(s+i);
723 charnum++;
724 }
725 return charnum;
726 }
727
728 /* runetochar() adapted from rune.c in the Plan 9 distribution */
729
730 enum
731 {
732 Runeerror = 128, /* from somewhere else */
733 Runemax = 0x10FFFF,
734
735 Bit1 = 7,
736 Bitx = 6,
737 Bit2 = 5,
738 Bit3 = 4,
739 Bit4 = 3,
740 Bit5 = 2,
741
742 T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */
743 Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */
744 T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */
745 T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */
746 T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
747 T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */
748
749 Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0000 0000 0111 1111 */
750 Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0000 0000 0111 1111 1111 */
751 Rune3 = (1<<(Bit3+2*Bitx))-1, /* 0000 0000 1111 1111 1111 1111 */
752 Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0011 1111 1111 1111 1111 1111 */
753
754 Maskx = (1<<Bitx)-1, /* 0011 1111 */
755 Testx = Maskx ^ 0xFF, /* 1100 0000 */
756
757 };
758
runetochar(char * str,int c)759 int runetochar(char *str, int c)
760 {
761 /* one character sequence 00000-0007F => 00-7F */
762 if (c <= Rune1) {
763 str[0] = c;
764 return 1;
765 }
766
767 /* two character sequence 00080-007FF => T2 Tx */
768 if (c <= Rune2) {
769 str[0] = T2 | (c >> 1*Bitx);
770 str[1] = Tx | (c & Maskx);
771 return 2;
772 }
773
774 /* three character sequence 00800-0FFFF => T3 Tx Tx */
775 if (c > Runemax)
776 c = Runeerror;
777 if (c <= Rune3) {
778 str[0] = T3 | (c >> 2*Bitx);
779 str[1] = Tx | ((c >> 1*Bitx) & Maskx);
780 str[2] = Tx | (c & Maskx);
781 return 3;
782 }
783
784 /* four character sequence 010000-1FFFFF => T4 Tx Tx Tx */
785 str[0] = T4 | (c >> 3*Bitx);
786 str[1] = Tx | ((c >> 2*Bitx) & Maskx);
787 str[2] = Tx | ((c >> 1*Bitx) & Maskx);
788 str[3] = Tx | (c & Maskx);
789 return 4;
790 }
791
792
793 /* ========== end of utf8 code =========== */
794
795
796
matchop(Node ** a,int n)797 Cell *matchop(Node **a, int n) /* ~ and match() */
798 {
799 Cell *x, *y, *z;
800 char *s, *t;
801 int i;
802 int cstart, cpatlen, len;
803 fa *pfa;
804 int (*mf)(fa *, const char *) = match, mode = 0;
805
806 if (n == MATCHFCN) {
807 mf = pmatch;
808 mode = 1;
809 }
810 x = execute(a[1]); /* a[1] = target text */
811 s = getsval(x);
812 if (a[0] == NULL) /* a[1] == 0: already-compiled reg expr */
813 i = (*mf)((fa *) a[2], s);
814 else {
815 y = execute(a[2]); /* a[2] = regular expr */
816 t = getsval(y);
817 pfa = makedfa(t, mode);
818 i = (*mf)(pfa, s);
819 tempfree(y);
820 }
821 z = x;
822 if (n == MATCHFCN) {
823 int start = patbeg - s + 1; /* origin 1 */
824 if (patlen < 0) {
825 start = 0; /* not found */
826 } else {
827 cstart = u8_byte2char(s, start-1);
828 cpatlen = 0;
829 for (i = 0; i < patlen; i += len) {
830 len = u8_nextlen(patbeg+i);
831 cpatlen++;
832 }
833
834 start = cstart;
835 patlen = cpatlen;
836 }
837
838 setfval(rstartloc, (Awkfloat) start);
839 setfval(rlengthloc, (Awkfloat) patlen);
840 x = gettemp();
841 x->tval = NUM;
842 x->fval = start;
843 } else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0))
844 x = True;
845 else
846 x = False;
847
848 tempfree(z);
849 return x;
850 }
851
852
boolop(Node ** a,int n)853 Cell *boolop(Node **a, int n) /* a[0] || a[1], a[0] && a[1], !a[0] */
854 {
855 Cell *x, *y;
856 int i;
857
858 x = execute(a[0]);
859 i = istrue(x);
860 tempfree(x);
861 switch (n) {
862 case BOR:
863 if (i) return(True);
864 y = execute(a[1]);
865 i = istrue(y);
866 tempfree(y);
867 if (i) return(True);
868 else return(False);
869 case AND:
870 if ( !i ) return(False);
871 y = execute(a[1]);
872 i = istrue(y);
873 tempfree(y);
874 if (i) return(True);
875 else return(False);
876 case NOT:
877 if (i) return(False);
878 else return(True);
879 default: /* can't happen */
880 FATAL("unknown boolean operator %d", n);
881 }
882 return 0; /*NOTREACHED*/
883 }
884
relop(Node ** a,int n)885 Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */
886 {
887 int i;
888 Cell *x, *y;
889 Awkfloat j;
890 bool x_is_nan, y_is_nan;
891
892 x = execute(a[0]);
893 y = execute(a[1]);
894 x_is_nan = isnan(x->fval);
895 y_is_nan = isnan(y->fval);
896 if (x->tval&NUM && y->tval&NUM) {
897 if ((x_is_nan || y_is_nan) && n != NE)
898 return(False);
899 j = x->fval - y->fval;
900 i = j<0? -1: (j>0? 1: 0);
901 } else {
902 i = strcmp(getsval(x), getsval(y));
903 }
904 tempfree(x);
905 tempfree(y);
906 switch (n) {
907 case LT: if (i<0) return(True);
908 else return(False);
909 case LE: if (i<=0) return(True);
910 else return(False);
911 case NE: if (x_is_nan && y_is_nan) return(True);
912 else if (i!=0) return(True);
913 else return(False);
914 case EQ: if (i == 0) return(True);
915 else return(False);
916 case GE: if (i>=0) return(True);
917 else return(False);
918 case GT: if (i>0) return(True);
919 else return(False);
920 default: /* can't happen */
921 FATAL("unknown relational operator %d", n);
922 }
923 return 0; /*NOTREACHED*/
924 }
925
tfree(Cell * a)926 void tfree(Cell *a) /* free a tempcell */
927 {
928 if (freeable(a)) {
929 DPRINTF("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval);
930 xfree(a->sval);
931 }
932 if (a == tmps)
933 FATAL("tempcell list is curdled");
934 a->cnext = tmps;
935 tmps = a;
936 }
937
gettemp(void)938 Cell *gettemp(void) /* get a tempcell */
939 { int i;
940 Cell *x;
941
942 if (!tmps) {
943 tmps = (Cell *) calloc(100, sizeof(*tmps));
944 if (!tmps)
945 FATAL("out of space for temporaries");
946 for (i = 1; i < 100; i++)
947 tmps[i-1].cnext = &tmps[i];
948 tmps[i-1].cnext = NULL;
949 }
950 x = tmps;
951 tmps = x->cnext;
952 *x = tempcell;
953 return(x);
954 }
955
indirect(Node ** a,int n)956 Cell *indirect(Node **a, int n) /* $( a[0] ) */
957 {
958 Awkfloat val;
959 Cell *x;
960 int m;
961
962 x = execute(a[0]);
963 val = getfval(x); /* freebsd: defend against super large field numbers */
964 if ((Awkfloat)INT_MAX < val)
965 FATAL("trying to access out of range field %s", x->nval);
966 m = (int) val;
967 tempfree(x);
968 x = fieldadr(m);
969 x->ctype = OCELL; /* BUG? why are these needed? */
970 x->csub = CFLD;
971 return(x);
972 }
973
substr(Node ** a,int nnn)974 Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */
975 {
976 int k, m, n;
977 int mb, nb;
978 char *s;
979 int temp;
980 Cell *x, *y, *z = NULL;
981
982 x = execute(a[0]);
983 y = execute(a[1]);
984 if (a[2] != NULL)
985 z = execute(a[2]);
986 s = getsval(x);
987 k = u8_strlen(s) + 1;
988 if (k <= 1) {
989 tempfree(x);
990 tempfree(y);
991 if (a[2] != NULL) {
992 tempfree(z);
993 }
994 x = gettemp();
995 setsval(x, "");
996 return(x);
997 }
998 m = (int) getfval(y);
999 if (m <= 0)
1000 m = 1;
1001 else if (m > k)
1002 m = k;
1003 tempfree(y);
1004 if (a[2] != NULL) {
1005 n = (int) getfval(z);
1006 tempfree(z);
1007 } else
1008 n = k - 1;
1009 if (n < 0)
1010 n = 0;
1011 else if (n > k - m)
1012 n = k - m;
1013 /* m is start, n is length from there */
1014 DPRINTF("substr: m=%d, n=%d, s=%s\n", m, n, s);
1015 y = gettemp();
1016 mb = u8_char2byte(s, m-1); /* byte offset of start char in s */
1017 nb = u8_char2byte(s, m-1+n); /* byte offset of end+1 char in s */
1018
1019 temp = s[nb]; /* with thanks to John Linderman */
1020 s[nb] = '\0';
1021 setsval(y, s + mb);
1022 s[nb] = temp;
1023 tempfree(x);
1024 return(y);
1025 }
1026
sindex(Node ** a,int nnn)1027 Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */
1028 {
1029 Cell *x, *y, *z;
1030 char *s1, *s2, *p1, *p2, *q;
1031 Awkfloat v = 0.0;
1032
1033 x = execute(a[0]);
1034 s1 = getsval(x);
1035 y = execute(a[1]);
1036 s2 = getsval(y);
1037
1038 z = gettemp();
1039 for (p1 = s1; *p1 != '\0'; p1++) {
1040 for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++)
1041 continue;
1042 if (*p2 == '\0') {
1043 /* v = (Awkfloat) (p1 - s1 + 1); origin 1 */
1044
1045 /* should be a function: used in match() as well */
1046 int i, len;
1047 v = 0;
1048 for (i = 0; i < p1-s1+1; i += len) {
1049 len = u8_nextlen(s1+i);
1050 v++;
1051 }
1052 break;
1053 }
1054 }
1055 tempfree(x);
1056 tempfree(y);
1057 setfval(z, v);
1058 return(z);
1059 }
1060
has_utf8(char * s)1061 int has_utf8(char *s) /* return 1 if s contains any utf-8 (2 bytes or more) character */
1062 {
1063 int n;
1064
1065 for (n = 0; *s != 0; s += n) {
1066 n = u8_nextlen(s);
1067 if (n > 1)
1068 return 1;
1069 }
1070 return 0;
1071 }
1072
1073 #define MAXNUMSIZE 50
1074
format(char ** pbuf,int * pbufsize,const char * s,Node * a)1075 int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like conversions */
1076 {
1077 char *fmt;
1078 char *p, *t;
1079 const char *os;
1080 Cell *x;
1081 int flag = 0, n;
1082 int fmtwd; /* format width */
1083 int fmtsz = recsize;
1084 char *buf = *pbuf;
1085 int bufsize = *pbufsize;
1086 #define FMTSZ(a) (fmtsz - ((a) - fmt))
1087 #define BUFSZ(a) (bufsize - ((a) - buf))
1088
1089 static bool first = true;
1090 static bool have_a_format = false;
1091
1092 if (first) {
1093 char xbuf[100];
1094
1095 snprintf(xbuf, sizeof(xbuf), "%a", 42.0);
1096 have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0);
1097 first = false;
1098 }
1099
1100 os = s;
1101 p = buf;
1102 if ((fmt = (char *) malloc(fmtsz)) == NULL)
1103 FATAL("out of memory in format()");
1104 while (*s) {
1105 adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1");
1106 if (*s != '%') {
1107 *p++ = *s++;
1108 continue;
1109 }
1110 if (*(s+1) == '%') {
1111 *p++ = '%';
1112 s += 2;
1113 continue;
1114 }
1115 fmtwd = atoi(s+1);
1116 if (fmtwd < 0)
1117 fmtwd = -fmtwd;
1118 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2");
1119 for (t = fmt; (*t++ = *s) != '\0'; s++) {
1120 if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3"))
1121 FATAL("format item %.30s... ran format() out of memory", os);
1122 /* Ignore size specifiers */
1123 if (strchr("hjLlqtz", *s) != NULL) { /* the ansi panoply */
1124 t--;
1125 continue;
1126 }
1127 if (isalpha((uschar)*s))
1128 break;
1129 if (*s == '$') {
1130 FATAL("'$' not permitted in awk formats");
1131 }
1132 if (*s == '*') {
1133 if (a == NULL) {
1134 FATAL("not enough args in printf(%s)", os);
1135 }
1136 x = execute(a);
1137 a = a->nnext;
1138 snprintf(t - 1, FMTSZ(t - 1),
1139 "%d", fmtwd=(int) getfval(x));
1140 if (fmtwd < 0)
1141 fmtwd = -fmtwd;
1142 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format");
1143 t = fmt + strlen(fmt);
1144 tempfree(x);
1145 }
1146 }
1147 *t = '\0';
1148 if (fmtwd < 0)
1149 fmtwd = -fmtwd;
1150 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4");
1151 switch (*s) {
1152 case 'a': case 'A':
1153 if (have_a_format)
1154 flag = *s;
1155 else
1156 flag = 'f';
1157 break;
1158 case 'f': case 'e': case 'g': case 'E': case 'G':
1159 flag = 'f';
1160 break;
1161 case 'd': case 'i': case 'o': case 'x': case 'X': case 'u':
1162 flag = (*s == 'd' || *s == 'i') ? 'd' : 'u';
1163 *(t-1) = 'j';
1164 *t = *s;
1165 *++t = '\0';
1166 break;
1167 case 's':
1168 flag = 's';
1169 break;
1170 case 'c':
1171 flag = 'c';
1172 break;
1173 default:
1174 WARNING("weird printf conversion %s", fmt);
1175 flag = '?';
1176 break;
1177 }
1178 if (a == NULL)
1179 FATAL("not enough args in printf(%s)", os);
1180 x = execute(a);
1181 a = a->nnext;
1182 n = MAXNUMSIZE;
1183 if (fmtwd > n)
1184 n = fmtwd;
1185 adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5");
1186 switch (flag) {
1187 case '?':
1188 snprintf(p, BUFSZ(p), "%s", fmt); /* unknown, so dump it too */
1189 t = getsval(x);
1190 n = strlen(t);
1191 if (fmtwd > n)
1192 n = fmtwd;
1193 adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6");
1194 p += strlen(p);
1195 snprintf(p, BUFSZ(p), "%s", t);
1196 break;
1197 case 'a':
1198 case 'A':
1199 case 'f': snprintf(p, BUFSZ(p), fmt, getfval(x)); break;
1200 case 'd': snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break;
1201 case 'u': snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break;
1202
1203 case 's': {
1204 t = getsval(x);
1205 n = strlen(t);
1206 /* if simple format or no utf-8 in the string, sprintf works */
1207 if (!has_utf8(t) || strcmp(fmt,"%s") == 0) {
1208 if (fmtwd > n)
1209 n = fmtwd;
1210 if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7"))
1211 FATAL("huge string/format (%d chars) in printf %.30s..." \
1212 " ran format() out of memory", n, t);
1213 snprintf(p, BUFSZ(p), fmt, t);
1214 break;
1215 }
1216
1217 /* get here if string has utf-8 chars and fmt is not plain %s */
1218 /* "%-w.ps", where -, w and .p are all optional */
1219 /* '0' before the w is a flag character */
1220 /* fmt points at % */
1221 int ljust = 0, wid = 0, prec = n, pad = 0;
1222 char *f = fmt+1;
1223 if (f[0] == '-') {
1224 ljust = 1;
1225 f++;
1226 }
1227 // flags '0' and '+' are recognized but skipped
1228 if (f[0] == '0') {
1229 f++;
1230 if (f[0] == '+')
1231 f++;
1232 }
1233 if (f[0] == '+') {
1234 f++;
1235 if (f[0] == '0')
1236 f++;
1237 }
1238 if (isdigit(f[0])) { /* there is a wid */
1239 wid = strtol(f, &f, 10);
1240 }
1241 if (f[0] == '.') { /* there is a .prec */
1242 prec = strtol(++f, &f, 10);
1243 }
1244 if (prec > u8_strlen(t))
1245 prec = u8_strlen(t);
1246 pad = wid>prec ? wid - prec : 0; // has to be >= 0
1247 int i, k, n;
1248
1249 if (ljust) { // print prec chars from t, then pad blanks
1250 n = u8_char2byte(t, prec);
1251 for (k = 0; k < n; k++) {
1252 //putchar(t[k]);
1253 *p++ = t[k];
1254 }
1255 for (i = 0; i < pad; i++) {
1256 //printf(" ");
1257 *p++ = ' ';
1258 }
1259 } else { // print pad blanks, then prec chars from t
1260 for (i = 0; i < pad; i++) {
1261 //printf(" ");
1262 *p++ = ' ';
1263 }
1264 n = u8_char2byte(t, prec);
1265 for (k = 0; k < n; k++) {
1266 //putchar(t[k]);
1267 *p++ = t[k];
1268 }
1269 }
1270 *p = 0;
1271 break;
1272 }
1273
1274 case 'c': {
1275 /*
1276 * If a numeric value is given, awk should just turn
1277 * it into a character and print it:
1278 * BEGIN { printf("%c\n", 65) }
1279 * prints "A".
1280 *
1281 * But what if the numeric value is > 128 and
1282 * represents a valid Unicode code point?!? We do
1283 * our best to convert it back into UTF-8. If we
1284 * can't, we output the encoding of the Unicode
1285 * "invalid character", 0xFFFD.
1286 */
1287 if (isnum(x)) {
1288 int charval = (int) getfval(x);
1289
1290 if (charval != 0) {
1291 if (charval < 128 || awk_mb_cur_max == 1)
1292 snprintf(p, BUFSZ(p), fmt, charval);
1293 else {
1294 // possible unicode character
1295 size_t count;
1296 char *bs = wide_char_to_byte_str(charval, &count);
1297
1298 if (bs == NULL) { // invalid character
1299 // use unicode invalid character, 0xFFFD
1300 static char invalid_char[] = "\357\277\275";
1301 bs = invalid_char;
1302 count = 3;
1303 }
1304 t = bs;
1305 n = count;
1306 goto format_percent_c;
1307 }
1308 } else {
1309 *p++ = '\0'; /* explicit null byte */
1310 *p = '\0'; /* next output will start here */
1311 }
1312 break;
1313 }
1314 t = getsval(x);
1315 n = u8_nextlen(t);
1316 format_percent_c:
1317 if (n < 2) { /* not utf8 */
1318 snprintf(p, BUFSZ(p), fmt, getsval(x)[0]);
1319 break;
1320 }
1321
1322 // utf8 character, almost same song and dance as for %s
1323 int ljust = 0, wid = 0, prec = n, pad = 0;
1324 char *f = fmt+1;
1325 if (f[0] == '-') {
1326 ljust = 1;
1327 f++;
1328 }
1329 // flags '0' and '+' are recognized but skipped
1330 if (f[0] == '0') {
1331 f++;
1332 if (f[0] == '+')
1333 f++;
1334 }
1335 if (f[0] == '+') {
1336 f++;
1337 if (f[0] == '0')
1338 f++;
1339 }
1340 if (isdigit(f[0])) { /* there is a wid */
1341 wid = strtol(f, &f, 10);
1342 }
1343 if (f[0] == '.') { /* there is a .prec */
1344 prec = strtol(++f, &f, 10);
1345 }
1346 if (prec > 1) // %c --> only one character
1347 prec = 1;
1348 pad = wid>prec ? wid - prec : 0; // has to be >= 0
1349 int i;
1350
1351 if (ljust) { // print one char from t, then pad blanks
1352 for (i = 0; i < n; i++)
1353 *p++ = t[i];
1354 for (i = 0; i < pad; i++) {
1355 //printf(" ");
1356 *p++ = ' ';
1357 }
1358 } else { // print pad blanks, then prec chars from t
1359 for (i = 0; i < pad; i++) {
1360 //printf(" ");
1361 *p++ = ' ';
1362 }
1363 for (i = 0; i < n; i++)
1364 *p++ = t[i];
1365 }
1366 *p = 0;
1367 break;
1368 }
1369 default:
1370 FATAL("can't happen: bad conversion %c in format()", flag);
1371 }
1372
1373 tempfree(x);
1374 p += strlen(p);
1375 s++;
1376 }
1377 *p = '\0';
1378 free(fmt);
1379 for ( ; a; a = a->nnext) { /* evaluate any remaining args */
1380 x = execute(a);
1381 tempfree(x);
1382 }
1383 *pbuf = buf;
1384 *pbufsize = bufsize;
1385 return p - buf;
1386 }
1387
awksprintf(Node ** a,int n)1388 Cell *awksprintf(Node **a, int n) /* sprintf(a[0]) */
1389 {
1390 Cell *x;
1391 Node *y;
1392 char *buf;
1393 int bufsz=3*recsize;
1394
1395 if ((buf = (char *) malloc(bufsz)) == NULL)
1396 FATAL("out of memory in awksprintf");
1397 y = a[0]->nnext;
1398 x = execute(a[0]);
1399 if (format(&buf, &bufsz, getsval(x), y) == -1)
1400 FATAL("sprintf string %.30s... too long. can't happen.", buf);
1401 tempfree(x);
1402 x = gettemp();
1403 x->sval = buf;
1404 x->tval = STR;
1405 return(x);
1406 }
1407
awkprintf(Node ** a,int n)1408 Cell *awkprintf(Node **a, int n) /* printf */
1409 { /* a[0] is list of args, starting with format string */
1410 /* a[1] is redirection operator, a[2] is redirection file */
1411 FILE *fp;
1412 Cell *x;
1413 Node *y;
1414 char *buf;
1415 int len;
1416 int bufsz=3*recsize;
1417
1418 if ((buf = (char *) malloc(bufsz)) == NULL)
1419 FATAL("out of memory in awkprintf");
1420 y = a[0]->nnext;
1421 x = execute(a[0]);
1422 if ((len = format(&buf, &bufsz, getsval(x), y)) == -1)
1423 FATAL("printf string %.30s... too long. can't happen.", buf);
1424 tempfree(x);
1425 if (a[1] == NULL) {
1426 /* fputs(buf, stdout); */
1427 fwrite(buf, len, 1, stdout);
1428 if (ferror(stdout))
1429 FATAL("write error on stdout");
1430 } else {
1431 fp = redirect(ptoi(a[1]), a[2]);
1432 /* fputs(buf, fp); */
1433 fwrite(buf, len, 1, fp);
1434 fflush(fp);
1435 if (ferror(fp))
1436 FATAL("write error on %s", filename(fp));
1437 }
1438 free(buf);
1439 return(True);
1440 }
1441
arith(Node ** a,int n)1442 Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */
1443 {
1444 Awkfloat i, j = 0;
1445 double v;
1446 Cell *x, *y, *z;
1447
1448 x = execute(a[0]);
1449 i = getfval(x);
1450 tempfree(x);
1451 if (n != UMINUS && n != UPLUS) {
1452 y = execute(a[1]);
1453 j = getfval(y);
1454 tempfree(y);
1455 }
1456 z = gettemp();
1457 switch (n) {
1458 case ADD:
1459 i += j;
1460 break;
1461 case MINUS:
1462 i -= j;
1463 break;
1464 case MULT:
1465 i *= j;
1466 break;
1467 case DIVIDE:
1468 if (j == 0)
1469 FATAL("division by zero");
1470 i /= j;
1471 break;
1472 case MOD:
1473 if (j == 0)
1474 FATAL("division by zero in mod");
1475 modf(i/j, &v);
1476 i = i - j * v;
1477 break;
1478 case UMINUS:
1479 i = -i;
1480 break;
1481 case UPLUS: /* handled by getfval(), above */
1482 break;
1483 case POWER:
1484 if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */
1485 i = ipow(i, (int) j);
1486 else {
1487 errno = 0;
1488 i = errcheck(pow(i, j), "pow");
1489 }
1490 break;
1491 default: /* can't happen */
1492 FATAL("illegal arithmetic operator %d", n);
1493 }
1494 setfval(z, i);
1495 return(z);
1496 }
1497
ipow(double x,int n)1498 double ipow(double x, int n) /* x**n. ought to be done by pow, but isn't always */
1499 {
1500 double v;
1501
1502 if (n <= 0)
1503 return 1;
1504 v = ipow(x, n/2);
1505 if (n % 2 == 0)
1506 return v * v;
1507 else
1508 return x * v * v;
1509 }
1510
incrdecr(Node ** a,int n)1511 Cell *incrdecr(Node **a, int n) /* a[0]++, etc. */
1512 {
1513 Cell *x, *z;
1514 int k;
1515 Awkfloat xf;
1516
1517 x = execute(a[0]);
1518 xf = getfval(x);
1519 k = (n == PREINCR || n == POSTINCR) ? 1 : -1;
1520 if (n == PREINCR || n == PREDECR) {
1521 setfval(x, xf + k);
1522 return(x);
1523 }
1524 z = gettemp();
1525 setfval(z, xf);
1526 setfval(x, xf + k);
1527 tempfree(x);
1528 return(z);
1529 }
1530
assign(Node ** a,int n)1531 Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */
1532 { /* this is subtle; don't muck with it. */
1533 Cell *x, *y;
1534 Awkfloat xf, yf;
1535 double v;
1536
1537 y = execute(a[1]);
1538 x = execute(a[0]);
1539 if (n == ASSIGN) { /* ordinary assignment */
1540 if (x == y && !(x->tval & (FLD|REC)) && x != nfloc)
1541 ; /* self-assignment: leave alone unless it's a field or NF */
1542 else if ((y->tval & (STR|NUM)) == (STR|NUM)) {
1543 yf = getfval(y);
1544 setsval(x, getsval(y));
1545 x->fval = yf;
1546 x->tval |= NUM;
1547 }
1548 else if (isstr(y))
1549 setsval(x, getsval(y));
1550 else if (isnum(y))
1551 setfval(x, getfval(y));
1552 else
1553 funnyvar(y, "read value of");
1554 tempfree(y);
1555 return(x);
1556 }
1557 xf = getfval(x);
1558 yf = getfval(y);
1559 switch (n) {
1560 case ADDEQ:
1561 xf += yf;
1562 break;
1563 case SUBEQ:
1564 xf -= yf;
1565 break;
1566 case MULTEQ:
1567 xf *= yf;
1568 break;
1569 case DIVEQ:
1570 if (yf == 0)
1571 FATAL("division by zero in /=");
1572 xf /= yf;
1573 break;
1574 case MODEQ:
1575 if (yf == 0)
1576 FATAL("division by zero in %%=");
1577 modf(xf/yf, &v);
1578 xf = xf - yf * v;
1579 break;
1580 case POWEQ:
1581 if (yf >= 0 && modf(yf, &v) == 0.0) /* pos integer exponent */
1582 xf = ipow(xf, (int) yf);
1583 else {
1584 errno = 0;
1585 xf = errcheck(pow(xf, yf), "pow");
1586 }
1587 break;
1588 default:
1589 FATAL("illegal assignment operator %d", n);
1590 break;
1591 }
1592 tempfree(y);
1593 setfval(x, xf);
1594 return(x);
1595 }
1596
cat(Node ** a,int q)1597 Cell *cat(Node **a, int q) /* a[0] cat a[1] */
1598 {
1599 Cell *x, *y, *z;
1600 int n1, n2;
1601 char *s = NULL;
1602 int ssz = 0;
1603
1604 x = execute(a[0]);
1605 n1 = strlen(getsval(x));
1606 adjbuf(&s, &ssz, n1 + 1, recsize, 0, "cat1");
1607 memcpy(s, x->sval, n1);
1608
1609 tempfree(x);
1610
1611 y = execute(a[1]);
1612 n2 = strlen(getsval(y));
1613 adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2");
1614 memcpy(s + n1, y->sval, n2);
1615 s[n1 + n2] = '\0';
1616
1617 tempfree(y);
1618
1619 z = gettemp();
1620 z->sval = s;
1621 z->tval = STR;
1622
1623 return(z);
1624 }
1625
pastat(Node ** a,int n)1626 Cell *pastat(Node **a, int n) /* a[0] { a[1] } */
1627 {
1628 Cell *x;
1629
1630 if (a[0] == NULL)
1631 x = execute(a[1]);
1632 else {
1633 x = execute(a[0]);
1634 if (istrue(x)) {
1635 tempfree(x);
1636 x = execute(a[1]);
1637 }
1638 }
1639 return x;
1640 }
1641
dopa2(Node ** a,int n)1642 Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */
1643 {
1644 Cell *x;
1645 int pair;
1646
1647 pair = ptoi(a[3]);
1648 if (pairstack[pair] == 0) {
1649 x = execute(a[0]);
1650 if (istrue(x))
1651 pairstack[pair] = 1;
1652 tempfree(x);
1653 }
1654 if (pairstack[pair] == 1) {
1655 x = execute(a[1]);
1656 if (istrue(x))
1657 pairstack[pair] = 0;
1658 tempfree(x);
1659 x = execute(a[2]);
1660 return(x);
1661 }
1662 return(False);
1663 }
1664
split(Node ** a,int nnn)1665 Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
1666 {
1667 Cell *x = NULL, *y, *ap;
1668 const char *s, *origs, *t;
1669 const char *fs = NULL;
1670 char *origfs = NULL;
1671 int sep;
1672 char temp, num[50];
1673 int n, tempstat, arg3type;
1674 int j;
1675 double result;
1676
1677 y = execute(a[0]); /* source string */
1678 origs = s = strdup(getsval(y));
1679 tempfree(y);
1680 arg3type = ptoi(a[3]);
1681 if (a[2] == NULL) { /* BUG: CSV should override implicit fs but not explicit */
1682 fs = getsval(fsloc);
1683 } else if (arg3type == STRING) { /* split(str,arr,"string") */
1684 x = execute(a[2]);
1685 fs = origfs = strdup(getsval(x));
1686 tempfree(x);
1687 } else if (arg3type == REGEXPR) {
1688 fs = "(regexpr)"; /* split(str,arr,/regexpr/) */
1689 } else {
1690 FATAL("illegal type of split");
1691 }
1692 sep = *fs;
1693 ap = execute(a[1]); /* array name */
1694 /* BUG 7/26/22: this appears not to reset array: see C1/asplit */
1695 freesymtab(ap);
1696 DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs);
1697 ap->tval &= ~STR;
1698 ap->tval |= ARR;
1699 ap->sval = (char *) makesymtab(NSYMTAB);
1700
1701 n = 0;
1702 if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) {
1703 /* split(s, a, //); have to arrange that it looks like empty sep */
1704 arg3type = 0;
1705 fs = "";
1706 sep = 0;
1707 }
1708 if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) { /* reg expr */
1709 fa *pfa;
1710 if (arg3type == REGEXPR) { /* it's ready already */
1711 pfa = (fa *) a[2];
1712 } else {
1713 pfa = makedfa(fs, 1);
1714 }
1715 if (nematch(pfa,s)) {
1716 tempstat = pfa->initstat;
1717 pfa->initstat = 2;
1718 do {
1719 n++;
1720 snprintf(num, sizeof(num), "%d", n);
1721 temp = *patbeg;
1722 setptr(patbeg, '\0');
1723 if (is_number(s, & result))
1724 setsymtab(num, s, result, STR|NUM, (Array *) ap->sval);
1725 else
1726 setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1727 setptr(patbeg, temp);
1728 s = patbeg + patlen;
1729 if (*(patbeg+patlen-1) == '\0' || *s == '\0') {
1730 n++;
1731 snprintf(num, sizeof(num), "%d", n);
1732 setsymtab(num, "", 0.0, STR, (Array *) ap->sval);
1733 pfa->initstat = tempstat;
1734 goto spdone;
1735 }
1736 } while (nematch(pfa,s));
1737 pfa->initstat = tempstat; /* bwk: has to be here to reset */
1738 /* cf gsub and refldbld */
1739 }
1740 n++;
1741 snprintf(num, sizeof(num), "%d", n);
1742 if (is_number(s, & result))
1743 setsymtab(num, s, result, STR|NUM, (Array *) ap->sval);
1744 else
1745 setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1746 spdone:
1747 pfa = NULL;
1748
1749 } else if (a[2] == NULL && CSV) { /* CSV only if no explicit separator */
1750 char *newt = (char *) malloc(strlen(s)); /* for building new string; reuse for each field */
1751 for (;;) {
1752 char *fr = newt;
1753 n++;
1754 if (*s == '"' ) { /* start of "..." */
1755 for (s++ ; *s != '\0'; ) {
1756 if (*s == '"' && s[1] != '\0' && s[1] == '"') {
1757 s += 2; /* doubled quote */
1758 *fr++ = '"';
1759 } else if (*s == '"' && (s[1] == '\0' || s[1] == ',')) {
1760 s++; /* skip over closing quote */
1761 break;
1762 } else {
1763 *fr++ = *s++;
1764 }
1765 }
1766 *fr++ = 0;
1767 } else { /* unquoted field */
1768 while (*s != ',' && *s != '\0')
1769 *fr++ = *s++;
1770 *fr++ = 0;
1771 }
1772 snprintf(num, sizeof(num), "%d", n);
1773 if (is_number(newt, &result))
1774 setsymtab(num, newt, result, STR|NUM, (Array *) ap->sval);
1775 else
1776 setsymtab(num, newt, 0.0, STR, (Array *) ap->sval);
1777 if (*s++ == '\0')
1778 break;
1779 }
1780 free(newt);
1781
1782 } else if (!CSV && sep == ' ') { /* usual case: split on white space */
1783 for (n = 0; ; ) {
1784 #define ISWS(c) ((c) == ' ' || (c) == '\t' || (c) == '\n')
1785 while (ISWS(*s))
1786 s++;
1787 if (*s == '\0')
1788 break;
1789 n++;
1790 t = s;
1791 do
1792 s++;
1793 while (*s != '\0' && !ISWS(*s));
1794 temp = *s;
1795 setptr(s, '\0');
1796 snprintf(num, sizeof(num), "%d", n);
1797 if (is_number(t, & result))
1798 setsymtab(num, t, result, STR|NUM, (Array *) ap->sval);
1799 else
1800 setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1801 setptr(s, temp);
1802 if (*s != '\0')
1803 s++;
1804 }
1805
1806 } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */
1807 for (n = 0; *s != '\0'; s += u8_nextlen(s)) {
1808 char buf[10];
1809 n++;
1810 snprintf(num, sizeof(num), "%d", n);
1811
1812 for (j = 0; j < u8_nextlen(s); j++) {
1813 buf[j] = s[j];
1814 }
1815 buf[j] = '\0';
1816
1817 if (isdigit((uschar)buf[0]))
1818 setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval);
1819 else
1820 setsymtab(num, buf, 0.0, STR, (Array *) ap->sval);
1821 }
1822
1823 } else if (*s != '\0') { /* some random single character */
1824 for (;;) {
1825 n++;
1826 t = s;
1827 while (*s != sep && *s != '\0')
1828 s++;
1829 temp = *s;
1830 setptr(s, '\0');
1831 snprintf(num, sizeof(num), "%d", n);
1832 if (is_number(t, & result))
1833 setsymtab(num, t, result, STR|NUM, (Array *) ap->sval);
1834 else
1835 setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1836 setptr(s, temp);
1837 if (*s++ == '\0')
1838 break;
1839 }
1840 }
1841 tempfree(ap);
1842 xfree(origs);
1843 xfree(origfs);
1844 x = gettemp();
1845 x->tval = NUM;
1846 x->fval = n;
1847 return(x);
1848 }
1849
condexpr(Node ** a,int n)1850 Cell *condexpr(Node **a, int n) /* a[0] ? a[1] : a[2] */
1851 {
1852 Cell *x;
1853
1854 x = execute(a[0]);
1855 if (istrue(x)) {
1856 tempfree(x);
1857 x = execute(a[1]);
1858 } else {
1859 tempfree(x);
1860 x = execute(a[2]);
1861 }
1862 return(x);
1863 }
1864
ifstat(Node ** a,int n)1865 Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */
1866 {
1867 Cell *x;
1868
1869 x = execute(a[0]);
1870 if (istrue(x)) {
1871 tempfree(x);
1872 x = execute(a[1]);
1873 } else if (a[2] != NULL) {
1874 tempfree(x);
1875 x = execute(a[2]);
1876 }
1877 return(x);
1878 }
1879
whilestat(Node ** a,int n)1880 Cell *whilestat(Node **a, int n) /* while (a[0]) a[1] */
1881 {
1882 Cell *x;
1883
1884 for (;;) {
1885 x = execute(a[0]);
1886 if (!istrue(x))
1887 return(x);
1888 tempfree(x);
1889 x = execute(a[1]);
1890 if (isbreak(x)) {
1891 x = True;
1892 return(x);
1893 }
1894 if (isnext(x) || isexit(x) || isret(x))
1895 return(x);
1896 tempfree(x);
1897 }
1898 }
1899
dostat(Node ** a,int n)1900 Cell *dostat(Node **a, int n) /* do a[0]; while(a[1]) */
1901 {
1902 Cell *x;
1903
1904 for (;;) {
1905 x = execute(a[0]);
1906 if (isbreak(x))
1907 return True;
1908 if (isnext(x) || isexit(x) || isret(x))
1909 return(x);
1910 tempfree(x);
1911 x = execute(a[1]);
1912 if (!istrue(x))
1913 return(x);
1914 tempfree(x);
1915 }
1916 }
1917
forstat(Node ** a,int n)1918 Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */
1919 {
1920 Cell *x;
1921
1922 x = execute(a[0]);
1923 tempfree(x);
1924 for (;;) {
1925 if (a[1]!=NULL) {
1926 x = execute(a[1]);
1927 if (!istrue(x)) return(x);
1928 else tempfree(x);
1929 }
1930 x = execute(a[3]);
1931 if (isbreak(x)) /* turn off break */
1932 return True;
1933 if (isnext(x) || isexit(x) || isret(x))
1934 return(x);
1935 tempfree(x);
1936 x = execute(a[2]);
1937 tempfree(x);
1938 }
1939 }
1940
instat(Node ** a,int n)1941 Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */
1942 {
1943 Cell *x, *vp, *arrayp, *cp, *ncp;
1944 Array *tp;
1945 int i;
1946
1947 vp = execute(a[0]);
1948 arrayp = execute(a[1]);
1949 if (!isarr(arrayp)) {
1950 return True;
1951 }
1952 tp = (Array *) arrayp->sval;
1953 tempfree(arrayp);
1954 for (i = 0; i < tp->size; i++) { /* this routine knows too much */
1955 for (cp = tp->tab[i]; cp != NULL; cp = ncp) {
1956 setsval(vp, cp->nval);
1957 ncp = cp->cnext;
1958 x = execute(a[2]);
1959 if (isbreak(x)) {
1960 tempfree(vp);
1961 return True;
1962 }
1963 if (isnext(x) || isexit(x) || isret(x)) {
1964 tempfree(vp);
1965 return(x);
1966 }
1967 tempfree(x);
1968 }
1969 }
1970 return True;
1971 }
1972
nawk_convert(const char * s,int (* fun_c)(int),wint_t (* fun_wc)(wint_t))1973 static char *nawk_convert(const char *s, int (*fun_c)(int),
1974 wint_t (*fun_wc)(wint_t))
1975 {
1976 char *buf = NULL;
1977 char *pbuf = NULL;
1978 const char *ps = NULL;
1979 size_t n = 0;
1980 wchar_t wc;
1981 const size_t sz = awk_mb_cur_max;
1982 int unused;
1983
1984 if (sz == 1) {
1985 buf = tostring(s);
1986
1987 for (pbuf = buf; *pbuf; pbuf++)
1988 *pbuf = fun_c((uschar)*pbuf);
1989
1990 return buf;
1991 } else {
1992 /* upper/lower character may be shorter/longer */
1993 buf = tostringN(s, strlen(s) * sz + 1);
1994
1995 (void) mbtowc(NULL, NULL, 0); /* reset internal state */
1996 /*
1997 * Reset internal state here too.
1998 * Assign result to avoid a compiler warning. (Casting to void
1999 * doesn't work.)
2000 * Increment said variable to avoid a different warning.
2001 */
2002 unused = wctomb(NULL, L'\0');
2003 unused++;
2004
2005 ps = s;
2006 pbuf = buf;
2007 while (n = mbtowc(&wc, ps, sz),
2008 n > 0 && n != (size_t)-1 && n != (size_t)-2)
2009 {
2010 ps += n;
2011
2012 n = wctomb(pbuf, fun_wc(wc));
2013 if (n == (size_t)-1)
2014 FATAL("illegal wide character %s", s);
2015
2016 pbuf += n;
2017 }
2018
2019 *pbuf = '\0';
2020
2021 if (n)
2022 FATAL("illegal byte sequence %s", s);
2023
2024 return buf;
2025 }
2026 }
2027
2028 #ifdef __DJGPP__
towupper(wint_t wc)2029 static wint_t towupper(wint_t wc)
2030 {
2031 if (wc >= 0 && wc < 256)
2032 return toupper(wc & 0xFF);
2033
2034 return wc;
2035 }
2036
towlower(wint_t wc)2037 static wint_t towlower(wint_t wc)
2038 {
2039 if (wc >= 0 && wc < 256)
2040 return tolower(wc & 0xFF);
2041
2042 return wc;
2043 }
2044 #endif
2045
nawk_toupper(const char * s)2046 static char *nawk_toupper(const char *s)
2047 {
2048 return nawk_convert(s, toupper, towupper);
2049 }
2050
nawk_tolower(const char * s)2051 static char *nawk_tolower(const char *s)
2052 {
2053 return nawk_convert(s, tolower, towlower);
2054 }
2055
2056
2057
bltin(Node ** a,int n)2058 Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */
2059 {
2060 Cell *x, *y;
2061 Awkfloat u = 0;
2062 int t, sz;
2063 Awkfloat tmp;
2064 char *buf, *fmt;
2065 Node *nextarg;
2066 FILE *fp;
2067 int status = 0;
2068 time_t tv;
2069 struct tm *tm, tmbuf;
2070 int estatus = 0;
2071
2072 t = ptoi(a[0]);
2073 x = execute(a[1]);
2074 nextarg = a[1]->nnext;
2075 switch (t) {
2076 case FLENGTH:
2077 if (isarr(x))
2078 u = ((Array *) x->sval)->nelem; /* GROT. should be function*/
2079 else
2080 u = u8_strlen(getsval(x));
2081 break;
2082 case FLOG:
2083 errno = 0;
2084 u = errcheck(log(getfval(x)), "log");
2085 break;
2086 case FINT:
2087 modf(getfval(x), &u); break;
2088 case FEXP:
2089 errno = 0;
2090 u = errcheck(exp(getfval(x)), "exp");
2091 break;
2092 case FSQRT:
2093 errno = 0;
2094 u = errcheck(sqrt(getfval(x)), "sqrt");
2095 break;
2096 case FSIN:
2097 u = sin(getfval(x)); break;
2098 case FCOS:
2099 u = cos(getfval(x)); break;
2100 case FATAN:
2101 if (nextarg == NULL) {
2102 WARNING("atan2 requires two arguments; returning 1.0");
2103 u = 1.0;
2104 } else {
2105 y = execute(a[1]->nnext);
2106 u = atan2(getfval(x), getfval(y));
2107 tempfree(y);
2108 nextarg = nextarg->nnext;
2109 }
2110 break;
2111 case FCOMPL:
2112 u = ~((int)getfval(x));
2113 break;
2114 case FAND:
2115 if (nextarg == 0) {
2116 WARNING("and requires two arguments; returning 0");
2117 u = 0;
2118 break;
2119 }
2120 y = execute(a[1]->nnext);
2121 u = ((int)getfval(x)) & ((int)getfval(y));
2122 tempfree(y);
2123 nextarg = nextarg->nnext;
2124 break;
2125 case FFOR:
2126 if (nextarg == 0) {
2127 WARNING("or requires two arguments; returning 0");
2128 u = 0;
2129 break;
2130 }
2131 y = execute(a[1]->nnext);
2132 u = ((int)getfval(x)) | ((int)getfval(y));
2133 tempfree(y);
2134 nextarg = nextarg->nnext;
2135 break;
2136 case FXOR:
2137 if (nextarg == 0) {
2138 WARNING("xor requires two arguments; returning 0");
2139 u = 0;
2140 break;
2141 }
2142 y = execute(a[1]->nnext);
2143 u = ((int)getfval(x)) ^ ((int)getfval(y));
2144 tempfree(y);
2145 nextarg = nextarg->nnext;
2146 break;
2147 case FLSHIFT:
2148 if (nextarg == 0) {
2149 WARNING("lshift requires two arguments; returning 0");
2150 u = 0;
2151 break;
2152 }
2153 y = execute(a[1]->nnext);
2154 u = ((int)getfval(x)) << ((int)getfval(y));
2155 tempfree(y);
2156 nextarg = nextarg->nnext;
2157 break;
2158 case FRSHIFT:
2159 if (nextarg == 0) {
2160 WARNING("rshift requires two arguments; returning 0");
2161 u = 0;
2162 break;
2163 }
2164 y = execute(a[1]->nnext);
2165 u = ((int)getfval(x)) >> ((int)getfval(y));
2166 tempfree(y);
2167 nextarg = nextarg->nnext;
2168 break;
2169 case FSYSTEM:
2170 fflush(stdout); /* in case something is buffered already */
2171 estatus = status = system(getsval(x));
2172 if (status != -1) {
2173 if (WIFEXITED(status)) {
2174 estatus = WEXITSTATUS(status);
2175 } else if (WIFSIGNALED(status)) {
2176 estatus = WTERMSIG(status) + 256;
2177 #ifdef WCOREDUMP
2178 if (WCOREDUMP(status))
2179 estatus += 256;
2180 #endif
2181 } else /* something else?!? */
2182 estatus = 0;
2183 }
2184 /* else estatus was set to -1 */
2185 u = estatus;
2186 break;
2187 case FRAND:
2188 /* random() returns numbers in [0..2^31-1]
2189 * in order to get a number in [0, 1), divide it by 2^31
2190 */
2191 u = (Awkfloat) random() / (0x7fffffffL + 0x1UL);
2192 break;
2193 case FSRAND:
2194 if (isrec(x)) /* no argument provided */
2195 u = time((time_t *)0);
2196 else
2197 u = getfval(x);
2198 tmp = u;
2199 srandom((unsigned long) u);
2200 u = srand_seed;
2201 srand_seed = tmp;
2202 break;
2203 case FTOUPPER:
2204 case FTOLOWER:
2205 if (t == FTOUPPER)
2206 buf = nawk_toupper(getsval(x));
2207 else
2208 buf = nawk_tolower(getsval(x));
2209 tempfree(x);
2210 x = gettemp();
2211 setsval(x, buf);
2212 free(buf);
2213 return x;
2214 case FFLUSH:
2215 if (isrec(x) || strlen(getsval(x)) == 0) {
2216 flush_all(); /* fflush() or fflush("") -> all */
2217 u = 0;
2218 } else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL)
2219 u = EOF;
2220 else
2221 u = fflush(fp);
2222 break;
2223 case FMKTIME:
2224 memset(&tmbuf, 0, sizeof(tmbuf));
2225 tm = &tmbuf;
2226 t = sscanf(getsval(x), "%d %d %d %d %d %d %d",
2227 &tm->tm_year, &tm->tm_mon, &tm->tm_mday, &tm->tm_hour,
2228 &tm->tm_min, &tm->tm_sec, &tm->tm_isdst);
2229 switch (t) {
2230 case 6:
2231 tm->tm_isdst = -1; /* let mktime figure it out */
2232 /* FALLTHROUGH */
2233 case 7:
2234 tm->tm_year -= 1900;
2235 tm->tm_mon--;
2236 u = mktime(tm);
2237 break;
2238 default:
2239 u = -1;
2240 break;
2241 }
2242 break;
2243 case FSYSTIME:
2244 u = time((time_t *) 0);
2245 break;
2246 case FSTRFTIME:
2247 /* strftime([format [,timestamp]]) */
2248 if (nextarg) {
2249 y = execute(nextarg);
2250 nextarg = nextarg->nnext;
2251 tv = (time_t) getfval(y);
2252 tempfree(y);
2253 } else
2254 tv = time((time_t *) 0);
2255 tm = localtime(&tv);
2256 if (tm == NULL)
2257 FATAL("bad time %ld", (long)tv);
2258
2259 if (isrec(x)) {
2260 /* format argument not provided, use default */
2261 fmt = tostring("%a %b %d %H:%M:%S %Z %Y");
2262 } else
2263 fmt = tostring(getsval(x));
2264
2265 sz = 32;
2266 buf = NULL;
2267 do {
2268 if ((buf = realloc(buf, (sz *= 2))) == NULL)
2269 FATAL("out of memory in strftime");
2270 } while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0');
2271
2272 y = gettemp();
2273 setsval(y, buf);
2274 free(fmt);
2275 free(buf);
2276
2277 return y;
2278 default: /* can't happen */
2279 FATAL("illegal function type %d", t);
2280 break;
2281 }
2282 tempfree(x);
2283 x = gettemp();
2284 setfval(x, u);
2285 if (nextarg != NULL) {
2286 WARNING("warning: function has too many arguments");
2287 for ( ; nextarg; nextarg = nextarg->nnext) {
2288 y = execute(nextarg);
2289 tempfree(y);
2290 }
2291 }
2292 return(x);
2293 }
2294
printstat(Node ** a,int n)2295 Cell *printstat(Node **a, int n) /* print a[0] */
2296 {
2297 Node *x;
2298 Cell *y;
2299 FILE *fp;
2300
2301 if (a[1] == NULL) /* a[1] is redirection operator, a[2] is file */
2302 fp = stdout;
2303 else
2304 fp = redirect(ptoi(a[1]), a[2]);
2305 for (x = a[0]; x != NULL; x = x->nnext) {
2306 y = execute(x);
2307 fputs(getpssval(y), fp);
2308 tempfree(y);
2309 if (x->nnext == NULL)
2310 fputs(getsval(orsloc), fp);
2311 else
2312 fputs(getsval(ofsloc), fp);
2313 }
2314 if (a[1] != NULL)
2315 fflush(fp);
2316 if (ferror(fp))
2317 FATAL("write error on %s", filename(fp));
2318 return(True);
2319 }
2320
nullproc(Node ** a,int n)2321 Cell *nullproc(Node **a, int n)
2322 {
2323 return 0;
2324 }
2325
2326
redirect(int a,Node * b)2327 FILE *redirect(int a, Node *b) /* set up all i/o redirections */
2328 {
2329 FILE *fp;
2330 Cell *x;
2331 char *fname;
2332
2333 x = execute(b);
2334 fname = getsval(x);
2335 fp = openfile(a, fname, NULL);
2336 if (fp == NULL)
2337 FATAL("can't open file %s", fname);
2338 tempfree(x);
2339 return fp;
2340 }
2341
2342 struct files {
2343 FILE *fp;
2344 const char *fname;
2345 int mode; /* '|', 'a', 'w' => LE/LT, GT */
2346 } *files;
2347
2348 size_t nfiles;
2349
stdinit(void)2350 static void stdinit(void) /* in case stdin, etc., are not constants */
2351 {
2352 nfiles = FOPEN_MAX;
2353 files = (struct files *) calloc(nfiles, sizeof(*files));
2354 if (files == NULL)
2355 FATAL("can't allocate file memory for %zu files", nfiles);
2356 files[0].fp = stdin;
2357 files[0].fname = tostring("/dev/stdin");
2358 files[0].mode = LT;
2359 files[1].fp = stdout;
2360 files[1].fname = tostring("/dev/stdout");
2361 files[1].mode = GT;
2362 files[2].fp = stderr;
2363 files[2].fname = tostring("/dev/stderr");
2364 files[2].mode = GT;
2365 }
2366
openfile(int a,const char * us,bool * pnewflag)2367 FILE *openfile(int a, const char *us, bool *pnewflag)
2368 {
2369 const char *s = us;
2370 size_t i;
2371 int m;
2372 FILE *fp = NULL;
2373 struct stat sbuf;
2374
2375 if (*s == '\0')
2376 FATAL("null file name in print or getline");
2377
2378 for (i = 0; i < nfiles; i++)
2379 if (files[i].fname && strcmp(s, files[i].fname) == 0 &&
2380 (a == files[i].mode || (a==APPEND && files[i].mode==GT) ||
2381 a == FFLUSH)) {
2382 if (pnewflag)
2383 *pnewflag = false;
2384 return files[i].fp;
2385 }
2386 if (a == FFLUSH) /* didn't find it, so don't create it! */
2387 return NULL;
2388 for (i = 0; i < nfiles; i++)
2389 if (files[i].fp == NULL)
2390 break;
2391 if (i >= nfiles) {
2392 struct files *nf;
2393 size_t nnf = nfiles + FOPEN_MAX;
2394 nf = (struct files *) realloc(files, nnf * sizeof(*nf));
2395 if (nf == NULL)
2396 FATAL("cannot grow files for %s and %zu files", s, nnf);
2397 memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf));
2398 nfiles = nnf;
2399 files = nf;
2400 }
2401
2402 fflush(stdout); /* force a semblance of order */
2403
2404 /* don't try to read or write a directory */
2405 if (a == LT || a == GT || a == APPEND)
2406 if (stat(s, &sbuf) == 0 && S_ISDIR(sbuf.st_mode))
2407 return NULL;
2408
2409 m = a;
2410 if (a == GT) {
2411 fp = fopen(s, "w");
2412 } else if (a == APPEND) {
2413 fp = fopen(s, "a");
2414 m = GT; /* so can mix > and >> */
2415 } else if (a == '|') { /* output pipe */
2416 fp = popen(s, "w");
2417 } else if (a == LE) { /* input pipe */
2418 fp = popen(s, "r");
2419 } else if (a == LT) { /* getline <file */
2420 fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r"); /* "-" is stdin */
2421 } else /* can't happen */
2422 FATAL("illegal redirection %d", a);
2423 if (fp != NULL) {
2424 files[i].fname = tostring(s);
2425 files[i].fp = fp;
2426 files[i].mode = m;
2427 if (pnewflag)
2428 *pnewflag = true;
2429 if (fp != stdin && fp != stdout && fp != stderr)
2430 (void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC);
2431 }
2432 return fp;
2433 }
2434
filename(FILE * fp)2435 const char *filename(FILE *fp)
2436 {
2437 size_t i;
2438
2439 for (i = 0; i < nfiles; i++)
2440 if (fp == files[i].fp)
2441 return files[i].fname;
2442 return "???";
2443 }
2444
closefile(Node ** a,int n)2445 Cell *closefile(Node **a, int n)
2446 {
2447 Cell *x;
2448 size_t i;
2449 bool stat;
2450
2451 x = execute(a[0]);
2452 getsval(x);
2453 stat = true;
2454 for (i = 0; i < nfiles; i++) {
2455 if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0)
2456 continue;
2457 if (files[i].mode == GT || files[i].mode == '|')
2458 fflush(files[i].fp);
2459 if (ferror(files[i].fp)) {
2460 if ((files[i].mode == GT && files[i].fp != stderr)
2461 || files[i].mode == '|')
2462 FATAL("write error on %s", files[i].fname);
2463 else
2464 WARNING("i/o error occurred on %s", files[i].fname);
2465 }
2466 if (files[i].fp == stdin || files[i].fp == stdout ||
2467 files[i].fp == stderr)
2468 stat = freopen("/dev/null", "r+", files[i].fp) == NULL;
2469 else if (files[i].mode == '|' || files[i].mode == LE)
2470 stat = pclose(files[i].fp) == -1;
2471 else
2472 stat = fclose(files[i].fp) == EOF;
2473 if (stat)
2474 WARNING("i/o error occurred closing %s", files[i].fname);
2475 xfree(files[i].fname);
2476 files[i].fname = NULL; /* watch out for ref thru this */
2477 files[i].fp = NULL;
2478 break;
2479 }
2480 tempfree(x);
2481 x = gettemp();
2482 setfval(x, (Awkfloat) (stat ? -1 : 0));
2483 return(x);
2484 }
2485
closeall(void)2486 void closeall(void)
2487 {
2488 size_t i;
2489 bool stat = false;
2490
2491 for (i = 0; i < nfiles; i++) {
2492 if (! files[i].fp)
2493 continue;
2494 if (files[i].mode == GT || files[i].mode == '|')
2495 fflush(files[i].fp);
2496 if (ferror(files[i].fp)) {
2497 if ((files[i].mode == GT && files[i].fp != stderr)
2498 || files[i].mode == '|')
2499 FATAL("write error on %s", files[i].fname);
2500 else
2501 WARNING("i/o error occurred on %s", files[i].fname);
2502 }
2503 if (files[i].fp == stdin || files[i].fp == stdout ||
2504 files[i].fp == stderr)
2505 continue;
2506 if (files[i].mode == '|' || files[i].mode == LE)
2507 stat = pclose(files[i].fp) == -1;
2508 else
2509 stat = fclose(files[i].fp) == EOF;
2510 if (stat)
2511 WARNING("i/o error occurred while closing %s", files[i].fname);
2512 }
2513 }
2514
flush_all(void)2515 static void flush_all(void)
2516 {
2517 size_t i;
2518
2519 for (i = 0; i < nfiles; i++)
2520 if (files[i].fp)
2521 fflush(files[i].fp);
2522 }
2523
2524 void backsub(char **pb_ptr, const char **sptr_ptr);
2525
dosub(Node ** a,int subop)2526 Cell *dosub(Node **a, int subop) /* sub and gsub */
2527 {
2528 fa *pfa;
2529 int tempstat = 0;
2530 char *repl;
2531 Cell *x;
2532
2533 char *buf = NULL;
2534 char *pb = NULL;
2535 int bufsz = recsize;
2536
2537 const char *r, *s;
2538 const char *start;
2539 const char *noempty = NULL; /* empty match disallowed here */
2540 size_t m = 0; /* match count */
2541 size_t whichm = 0; /* which match to select, 0 = global */
2542 int mtype; /* match type */
2543
2544 if (a[0] == NULL) { /* 0 => a[1] is already-compiled regexpr */
2545 pfa = (fa *) a[1];
2546 } else {
2547 x = execute(a[1]);
2548 pfa = makedfa(getsval(x), 1);
2549 tempfree(x);
2550 }
2551
2552 x = execute(a[2]); /* replacement string */
2553 repl = tostring(getsval(x));
2554 tempfree(x);
2555
2556 switch (subop) {
2557 case SUB:
2558 whichm = 1;
2559 x = execute(a[3]); /* source string */
2560 break;
2561 case GSUB:
2562 whichm = 0;
2563 x = execute(a[3]); /* source string */
2564 break;
2565 default:
2566 FATAL("dosub: unrecognized subop: %d", subop);
2567 }
2568
2569 start = getsval(x);
2570 while (pmatch(pfa, start)) {
2571 if (buf == NULL) {
2572 if ((pb = buf = (char *) malloc(bufsz)) == NULL)
2573 FATAL("out of memory in dosub");
2574 tempstat = pfa->initstat;
2575 pfa->initstat = 2;
2576 }
2577
2578 /* match types */
2579 #define MT_IGNORE 0 /* unselected or invalid */
2580 #define MT_INSERT 1 /* selected, empty */
2581 #define MT_REPLACE 2 /* selected, not empty */
2582
2583 /* an empty match just after replacement is invalid */
2584
2585 if (patbeg == noempty && patlen == 0) {
2586 mtype = MT_IGNORE; /* invalid, not counted */
2587 } else if (whichm == ++m || whichm == 0) {
2588 mtype = patlen ? MT_REPLACE : MT_INSERT;
2589 } else {
2590 mtype = MT_IGNORE; /* unselected, but counted */
2591 }
2592
2593 /* leading text: */
2594 if (patbeg > start) {
2595 adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - start),
2596 recsize, &pb, "dosub");
2597 s = start;
2598 while (s < patbeg)
2599 *pb++ = *s++;
2600 }
2601
2602 if (mtype == MT_IGNORE)
2603 goto matching_text; /* skip replacement text */
2604
2605 r = repl;
2606 while (*r != 0) {
2607 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "dosub");
2608 if (*r == '\\') {
2609 backsub(&pb, &r);
2610 } else if (*r == '&') {
2611 r++;
2612 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize,
2613 &pb, "dosub");
2614 for (s = patbeg; s < patbeg+patlen; )
2615 *pb++ = *s++;
2616 } else {
2617 *pb++ = *r++;
2618 }
2619 }
2620
2621 matching_text:
2622 if (mtype == MT_REPLACE || *patbeg == '\0')
2623 goto next_search; /* skip matching text */
2624
2625 if (patlen == 0)
2626 patlen = u8_nextlen(patbeg);
2627 adjbuf(&buf, &bufsz, (pb-buf) + patlen, recsize, &pb, "dosub");
2628 s = patbeg;
2629 while (s < patbeg + patlen)
2630 *pb++ = *s++;
2631
2632 next_search:
2633 start = patbeg + patlen;
2634 if (m == whichm || *patbeg == '\0')
2635 break;
2636 if (mtype == MT_REPLACE)
2637 noempty = start;
2638
2639 #undef MT_IGNORE
2640 #undef MT_INSERT
2641 #undef MT_REPLACE
2642 }
2643
2644 xfree(repl);
2645
2646 if (buf != NULL) {
2647 pfa->initstat = tempstat;
2648
2649 /* trailing text */
2650 adjbuf(&buf, &bufsz, 1+strlen(start)+pb-buf, 0, &pb, "dosub");
2651 while ((*pb++ = *start++) != '\0')
2652 ;
2653
2654 setsval(x, buf);
2655 free(buf);
2656 }
2657
2658 tempfree(x);
2659 x = gettemp();
2660 x->tval = NUM;
2661 x->fval = m;
2662 return x;
2663 }
2664
gensub(Node ** a,int nnn)2665 Cell *gensub(Node **a, int nnn) /* global selective substitute */
2666 /* XXX incomplete - doesn't support backreferences \0 ... \9 */
2667 {
2668 Cell *x, *y, *res, *h;
2669 char *rptr;
2670 const char *sptr;
2671 char *buf, *pb;
2672 const char *t, *q;
2673 fa *pfa;
2674 int mflag, tempstat, num, whichm;
2675 int bufsz = recsize;
2676
2677 if ((buf = malloc(bufsz)) == NULL)
2678 FATAL("out of memory in gensub");
2679 mflag = 0; /* if mflag == 0, can replace empty string */
2680 num = 0;
2681 x = execute(a[4]); /* source string */
2682 t = getsval(x);
2683 res = copycell(x); /* target string - initially copy of source */
2684 res->csub = CTEMP; /* result values are temporary */
2685 if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */
2686 pfa = (fa *) a[1]; /* regular expression */
2687 else {
2688 y = execute(a[1]);
2689 pfa = makedfa(getsval(y), 1);
2690 tempfree(y);
2691 }
2692 y = execute(a[2]); /* replacement string */
2693 h = execute(a[3]); /* which matches should be replaced */
2694 sptr = getsval(h);
2695 if (sptr[0] == 'g' || sptr[0] == 'G')
2696 whichm = -1;
2697 else {
2698 /*
2699 * The specified number is index of replacement, starting
2700 * from 1. GNU awk treats index lower than 0 same as
2701 * 1, we do same for compatibility.
2702 */
2703 whichm = (int) getfval(h) - 1;
2704 if (whichm < 0)
2705 whichm = 0;
2706 }
2707 tempfree(h);
2708
2709 if (pmatch(pfa, t)) {
2710 char *sl;
2711
2712 tempstat = pfa->initstat;
2713 pfa->initstat = 2;
2714 pb = buf;
2715 rptr = getsval(y);
2716 /*
2717 * XXX if there are any backreferences in subst string,
2718 * complain now.
2719 */
2720 for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) {
2721 if (strchr("0123456789", sl[1])) {
2722 FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr);
2723 }
2724 }
2725
2726 do {
2727 if (whichm >= 0 && whichm != num) {
2728 num++;
2729 adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub");
2730
2731 /* copy the part of string up to and including
2732 * match to output buffer */
2733 while (t < patbeg + patlen)
2734 *pb++ = *t++;
2735 continue;
2736 }
2737
2738 if (patlen == 0 && *patbeg != 0) { /* matched empty string */
2739 if (mflag == 0) { /* can replace empty */
2740 num++;
2741 sptr = rptr;
2742 while (*sptr != 0) {
2743 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
2744 if (*sptr == '\\') {
2745 backsub(&pb, &sptr);
2746 } else if (*sptr == '&') {
2747 sptr++;
2748 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
2749 for (q = patbeg; q < patbeg+patlen; )
2750 *pb++ = *q++;
2751 } else
2752 *pb++ = *sptr++;
2753 }
2754 }
2755 if (*t == 0) /* at end */
2756 goto done;
2757 adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub");
2758 *pb++ = *t++;
2759 if (pb > buf + bufsz) /* BUG: not sure of this test */
2760 FATAL("gensub result0 %.30s too big; can't happen", buf);
2761 mflag = 0;
2762 }
2763 else { /* matched nonempty string */
2764 num++;
2765 sptr = t;
2766 adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub");
2767 while (sptr < patbeg)
2768 *pb++ = *sptr++;
2769 sptr = rptr;
2770 while (*sptr != 0) {
2771 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
2772 if (*sptr == '\\') {
2773 backsub(&pb, &sptr);
2774 } else if (*sptr == '&') {
2775 sptr++;
2776 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
2777 for (q = patbeg; q < patbeg+patlen; )
2778 *pb++ = *q++;
2779 } else
2780 *pb++ = *sptr++;
2781 }
2782 t = patbeg + patlen;
2783 if (patlen == 0 || *t == 0 || *(t-1) == 0)
2784 goto done;
2785 if (pb > buf + bufsz)
2786 FATAL("gensub result1 %.30s too big; can't happen", buf);
2787 mflag = 1;
2788 }
2789 } while (pmatch(pfa,t));
2790 sptr = t;
2791 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub");
2792 while ((*pb++ = *sptr++) != 0)
2793 ;
2794 done: if (pb > buf + bufsz)
2795 FATAL("gensub result2 %.30s too big; can't happen", buf);
2796 *pb = '\0';
2797 setsval(res, buf);
2798 pfa->initstat = tempstat;
2799 }
2800 tempfree(x);
2801 tempfree(y);
2802 free(buf);
2803 return(res);
2804 }
2805
backsub(char ** pb_ptr,const char ** sptr_ptr)2806 void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */
2807 { /* sptr[0] == '\\' */
2808 char *pb = *pb_ptr;
2809 const char *sptr = *sptr_ptr;
2810 static bool first = true;
2811 static bool do_posix = false;
2812
2813 if (first) {
2814 first = false;
2815 do_posix = (getenv("POSIXLY_CORRECT") != NULL);
2816 }
2817
2818 if (sptr[1] == '\\') {
2819 if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */
2820 *pb++ = '\\';
2821 *pb++ = '&';
2822 sptr += 4;
2823 } else if (sptr[2] == '&') { /* \\& -> \ + matched */
2824 *pb++ = '\\';
2825 sptr += 2;
2826 } else if (do_posix) { /* \\x -> \x */
2827 sptr++;
2828 *pb++ = *sptr++;
2829 } else { /* \\x -> \\x */
2830 *pb++ = *sptr++;
2831 *pb++ = *sptr++;
2832 }
2833 } else if (sptr[1] == '&') { /* literal & */
2834 sptr++;
2835 *pb++ = *sptr++;
2836 } else /* literal \ */
2837 *pb++ = *sptr++;
2838
2839 *pb_ptr = pb;
2840 *sptr_ptr = sptr;
2841 }
2842
wide_char_to_byte_str(int rune,size_t * outlen)2843 static char *wide_char_to_byte_str(int rune, size_t *outlen)
2844 {
2845 static char buf[5];
2846 int len;
2847
2848 if (rune < 0 || rune > 0x10FFFF)
2849 return NULL;
2850
2851 memset(buf, 0, sizeof(buf));
2852
2853 len = 0;
2854 if (rune <= 0x0000007F) {
2855 buf[len++] = rune;
2856 } else if (rune <= 0x000007FF) {
2857 // 110xxxxx 10xxxxxx
2858 buf[len++] = 0xC0 | (rune >> 6);
2859 buf[len++] = 0x80 | (rune & 0x3F);
2860 } else if (rune <= 0x0000FFFF) {
2861 // 1110xxxx 10xxxxxx 10xxxxxx
2862 buf[len++] = 0xE0 | (rune >> 12);
2863 buf[len++] = 0x80 | ((rune >> 6) & 0x3F);
2864 buf[len++] = 0x80 | (rune & 0x3F);
2865
2866 } else {
2867 // 0x00010000 - 0x10FFFF
2868 // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
2869 buf[len++] = 0xF0 | (rune >> 18);
2870 buf[len++] = 0x80 | ((rune >> 12) & 0x3F);
2871 buf[len++] = 0x80 | ((rune >> 6) & 0x3F);
2872 buf[len++] = 0x80 | (rune & 0x3F);
2873 }
2874
2875 *outlen = len;
2876 buf[len++] = '\0';
2877
2878 return buf;
2879 }
2880