1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
4
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
14
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
24
25 #define DEBUG
26 #include <stdio.h>
27 #include <ctype.h>
28 #include <errno.h>
29 #include <wctype.h>
30 #include <fcntl.h>
31 #include <setjmp.h>
32 #include <limits.h>
33 #include <math.h>
34 #include <string.h>
35 #include <stdlib.h>
36 #include <time.h>
37 #include <sys/types.h>
38 #include <sys/wait.h>
39 #include "awk.h"
40 #include "awkgram.tab.h"
41
42
43 static void stdinit(void);
44 static void flush_all(void);
45 static char *wide_char_to_byte_str(int rune, size_t *outlen);
46
47 #if 1
48 #define tempfree(x) do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0)
49 #else
tempfree(Cell * p)50 void tempfree(Cell *p) {
51 if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) {
52 WARNING("bad csub %d in Cell %d %s",
53 p->csub, p->ctype, p->sval);
54 }
55 if (istemp(p))
56 tfree(p);
57 }
58 #endif
59
60 /* do we really need these? */
61 /* #ifdef _NFILE */
62 /* #ifndef FOPEN_MAX */
63 /* #define FOPEN_MAX _NFILE */
64 /* #endif */
65 /* #endif */
66 /* */
67 /* #ifndef FOPEN_MAX */
68 /* #define FOPEN_MAX 40 */ /* max number of open files */
69 /* #endif */
70 /* */
71 /* #ifndef RAND_MAX */
72 /* #define RAND_MAX 32767 */ /* all that ansi guarantees */
73 /* #endif */
74
75 jmp_buf env;
76 extern int pairstack[];
77 extern Awkfloat srand_seed;
78
79 Node *winner = NULL; /* root of parse tree */
80 Cell *tmps; /* free temporary cells for execution */
81
82 static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL, NULL };
83 Cell *True = &truecell;
84 static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL, NULL };
85 Cell *False = &falsecell;
86 static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL, NULL };
87 Cell *jbreak = &breakcell;
88 static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL, NULL };
89 Cell *jcont = &contcell;
90 static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL, NULL };
91 Cell *jnext = &nextcell;
92 static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL, NULL };
93 Cell *jnextfile = &nextfilecell;
94 static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL, NULL };
95 Cell *jexit = &exitcell;
96 static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL, NULL };
97 Cell *jret = &retcell;
98 static Cell tempcell ={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL };
99
100 Node *curnode = NULL; /* the node being executed, for debugging */
101
102 /* buffer memory management */
adjbuf(char ** pbuf,int * psiz,int minlen,int quantum,char ** pbptr,const char * whatrtn)103 int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr,
104 const char *whatrtn)
105 /* pbuf: address of pointer to buffer being managed
106 * psiz: address of buffer size variable
107 * minlen: minimum length of buffer needed
108 * quantum: buffer size quantum
109 * pbptr: address of movable pointer into buffer, or 0 if none
110 * whatrtn: name of the calling routine if failure should cause fatal error
111 *
112 * return 0 for realloc failure, !=0 for success
113 */
114 {
115 if (minlen > *psiz) {
116 char *tbuf;
117 int rminlen = quantum ? minlen % quantum : 0;
118 int boff = pbptr ? *pbptr - *pbuf : 0;
119 /* round up to next multiple of quantum */
120 if (rminlen)
121 minlen += quantum - rminlen;
122 tbuf = (char *) realloc(*pbuf, minlen);
123 DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void*)*pbuf, (void*)tbuf);
124 if (tbuf == NULL) {
125 if (whatrtn)
126 FATAL("out of memory in %s", whatrtn);
127 return 0;
128 }
129 *pbuf = tbuf;
130 *psiz = minlen;
131 if (pbptr)
132 *pbptr = tbuf + boff;
133 }
134 return 1;
135 }
136
run(Node * a)137 void run(Node *a) /* execution of parse tree starts here */
138 {
139
140 stdinit();
141 execute(a);
142 closeall();
143 }
144
execute(Node * u)145 Cell *execute(Node *u) /* execute a node of the parse tree */
146 {
147 Cell *(*proc)(Node **, int);
148 Cell *x;
149 Node *a;
150
151 if (u == NULL)
152 return(True);
153 for (a = u; ; a = a->nnext) {
154 curnode = a;
155 if (isvalue(a)) {
156 x = (Cell *) (a->narg[0]);
157 if (isfld(x) && !donefld)
158 fldbld();
159 else if (isrec(x) && !donerec)
160 recbld();
161 return(x);
162 }
163 if (notlegal(a->nobj)) /* probably a Cell* but too risky to print */
164 FATAL("illegal statement");
165 proc = proctab[a->nobj-FIRSTTOKEN];
166 x = (*proc)(a->narg, a->nobj);
167 if (isfld(x) && !donefld)
168 fldbld();
169 else if (isrec(x) && !donerec)
170 recbld();
171 if (isexpr(a))
172 return(x);
173 if (isjump(x))
174 return(x);
175 if (a->nnext == NULL)
176 return(x);
177 tempfree(x);
178 }
179 }
180
181
program(Node ** a,int n)182 Cell *program(Node **a, int n) /* execute an awk program */
183 { /* a[0] = BEGIN, a[1] = body, a[2] = END */
184 Cell *x;
185
186 if (setjmp(env) != 0)
187 goto ex;
188 if (a[0]) { /* BEGIN */
189 x = execute(a[0]);
190 if (isexit(x))
191 return(True);
192 if (isjump(x))
193 FATAL("illegal break, continue, next or nextfile from BEGIN");
194 tempfree(x);
195 }
196 if (a[1] || a[2])
197 while (getrec(&record, &recsize, true) > 0) {
198 x = execute(a[1]);
199 if (isexit(x))
200 break;
201 tempfree(x);
202 }
203 ex:
204 if (setjmp(env) != 0) /* handles exit within END */
205 goto ex1;
206 if (a[2]) { /* END */
207 x = execute(a[2]);
208 if (isbreak(x) || isnext(x) || iscont(x))
209 FATAL("illegal break, continue, next or nextfile from END");
210 tempfree(x);
211 }
212 ex1:
213 return(True);
214 }
215
216 struct Frame { /* stack frame for awk function calls */
217 int nargs; /* number of arguments in this call */
218 Cell *fcncell; /* pointer to Cell for function */
219 Cell **args; /* pointer to array of arguments after execute */
220 Cell *retval; /* return value */
221 };
222
223 #define NARGS 50 /* max args in a call */
224
225 struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */
226 int nframe = 0; /* number of frames allocated */
227 struct Frame *frp = NULL; /* frame pointer. bottom level unused */
228
call(Node ** a,int n)229 Cell *call(Node **a, int n) /* function call. very kludgy and fragile */
230 {
231 static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL };
232 int i, ncall, ndef;
233 int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */
234 Node *x;
235 Cell *args[NARGS], *oargs[NARGS]; /* BUG: fixed size arrays */
236 Cell *y, *z, *fcn;
237 char *s;
238
239 fcn = execute(a[0]); /* the function itself */
240 s = fcn->nval;
241 if (!isfcn(fcn))
242 FATAL("calling undefined function %s", s);
243 if (frame == NULL) {
244 frp = frame = (struct Frame *) calloc(nframe += 100, sizeof(*frame));
245 if (frame == NULL)
246 FATAL("out of space for stack frames calling %s", s);
247 }
248 for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */
249 ncall++;
250 ndef = (int) fcn->fval; /* args in defn */
251 DPRINTF("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame));
252 if (ncall > ndef)
253 WARNING("function %s called with %d args, uses only %d",
254 s, ncall, ndef);
255 if (ncall + ndef > NARGS)
256 FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS);
257 for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */
258 DPRINTF("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame));
259 y = execute(x);
260 oargs[i] = y;
261 DPRINTF("args[%d]: %s %f <%s>, t=%o\n",
262 i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval);
263 if (isfcn(y))
264 FATAL("can't use function %s as argument in %s", y->nval, s);
265 if (isarr(y))
266 args[i] = y; /* arrays by ref */
267 else
268 args[i] = copycell(y);
269 tempfree(y);
270 }
271 for ( ; i < ndef; i++) { /* add null args for ones not provided */
272 args[i] = gettemp();
273 *args[i] = newcopycell;
274 }
275 frp++; /* now ok to up frame */
276 if (frp >= frame + nframe) {
277 int dfp = frp - frame; /* old index */
278 frame = (struct Frame *) realloc(frame, (nframe += 100) * sizeof(*frame));
279 if (frame == NULL)
280 FATAL("out of space for stack frames in %s", s);
281 frp = frame + dfp;
282 }
283 frp->fcncell = fcn;
284 frp->args = args;
285 frp->nargs = ndef; /* number defined with (excess are locals) */
286 frp->retval = gettemp();
287
288 DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame));
289 y = execute((Node *)(fcn->sval)); /* execute body */
290 DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame));
291
292 for (i = 0; i < ndef; i++) {
293 Cell *t = frp->args[i];
294 if (isarr(t)) {
295 if (t->csub == CCOPY) {
296 if (i >= ncall) {
297 freesymtab(t);
298 t->csub = CTEMP;
299 tempfree(t);
300 } else {
301 oargs[i]->tval = t->tval;
302 oargs[i]->tval &= ~(STR|NUM|DONTFREE);
303 oargs[i]->sval = t->sval;
304 tempfree(t);
305 }
306 }
307 } else if (t != y) { /* kludge to prevent freeing twice */
308 t->csub = CTEMP;
309 tempfree(t);
310 } else if (t == y && t->csub == CCOPY) {
311 t->csub = CTEMP;
312 tempfree(t);
313 freed = 1;
314 }
315 }
316 tempfree(fcn);
317 if (isexit(y) || isnext(y))
318 return y;
319 if (freed == 0) {
320 tempfree(y); /* don't free twice! */
321 }
322 z = frp->retval; /* return value */
323 DPRINTF("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval);
324 frp--;
325 return(z);
326 }
327
copycell(Cell * x)328 Cell *copycell(Cell *x) /* make a copy of a cell in a temp */
329 {
330 Cell *y;
331
332 /* copy is not constant or field */
333
334 y = gettemp();
335 y->tval = x->tval & ~(CON|FLD|REC);
336 y->csub = CCOPY; /* prevents freeing until call is over */
337 y->nval = x->nval; /* BUG? */
338 if (isstr(x) /* || x->ctype == OCELL */) {
339 y->sval = tostring(x->sval);
340 y->tval &= ~DONTFREE;
341 } else
342 y->tval |= DONTFREE;
343 y->fval = x->fval;
344 return y;
345 }
346
arg(Node ** a,int n)347 Cell *arg(Node **a, int n) /* nth argument of a function */
348 {
349
350 n = ptoi(a[0]); /* argument number, counting from 0 */
351 DPRINTF("arg(%d), frp->nargs=%d\n", n, frp->nargs);
352 if (n+1 > frp->nargs)
353 FATAL("argument #%d of function %s was not supplied",
354 n+1, frp->fcncell->nval);
355 return frp->args[n];
356 }
357
jump(Node ** a,int n)358 Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */
359 {
360 Cell *y;
361
362 switch (n) {
363 case EXIT:
364 if (a[0] != NULL) {
365 y = execute(a[0]);
366 errorflag = (int) getfval(y);
367 tempfree(y);
368 }
369 longjmp(env, 1);
370 case RETURN:
371 if (a[0] != NULL) {
372 y = execute(a[0]);
373 if ((y->tval & (STR|NUM)) == (STR|NUM)) {
374 setsval(frp->retval, getsval(y));
375 frp->retval->fval = getfval(y);
376 frp->retval->tval |= NUM;
377 }
378 else if (y->tval & STR)
379 setsval(frp->retval, getsval(y));
380 else if (y->tval & NUM)
381 setfval(frp->retval, getfval(y));
382 else /* can't happen */
383 FATAL("bad type variable %d", y->tval);
384 tempfree(y);
385 }
386 return(jret);
387 case NEXT:
388 return(jnext);
389 case NEXTFILE:
390 nextfile();
391 return(jnextfile);
392 case BREAK:
393 return(jbreak);
394 case CONTINUE:
395 return(jcont);
396 default: /* can't happen */
397 FATAL("illegal jump type %d", n);
398 }
399 return 0; /* not reached */
400 }
401
awkgetline(Node ** a,int n)402 Cell *awkgetline(Node **a, int n) /* get next line from specific input */
403 { /* a[0] is variable, a[1] is operator, a[2] is filename */
404 Cell *r, *x;
405 extern Cell **fldtab;
406 FILE *fp;
407 char *buf;
408 int bufsize = recsize;
409 int mode;
410 bool newflag;
411 double result;
412
413 if ((buf = (char *) malloc(bufsize)) == NULL)
414 FATAL("out of memory in getline");
415
416 fflush(stdout); /* in case someone is waiting for a prompt */
417 r = gettemp();
418 if (a[1] != NULL) { /* getline < file */
419 x = execute(a[2]); /* filename */
420 mode = ptoi(a[1]);
421 if (mode == '|') /* input pipe */
422 mode = LE; /* arbitrary flag */
423 fp = openfile(mode, getsval(x), &newflag);
424 tempfree(x);
425 if (fp == NULL)
426 n = -1;
427 else
428 n = readrec(&buf, &bufsize, fp, newflag);
429 if (n <= 0) {
430 ;
431 } else if (a[0] != NULL) { /* getline var <file */
432 x = execute(a[0]);
433 setsval(x, buf);
434 if (is_number(x->sval, & result)) {
435 x->fval = result;
436 x->tval |= NUM;
437 }
438 tempfree(x);
439 } else { /* getline <file */
440 setsval(fldtab[0], buf);
441 if (is_number(fldtab[0]->sval, & result)) {
442 fldtab[0]->fval = result;
443 fldtab[0]->tval |= NUM;
444 }
445 }
446 } else { /* bare getline; use current input */
447 if (a[0] == NULL) /* getline */
448 n = getrec(&record, &recsize, true);
449 else { /* getline var */
450 n = getrec(&buf, &bufsize, false);
451 if (n > 0) {
452 x = execute(a[0]);
453 setsval(x, buf);
454 if (is_number(x->sval, & result)) {
455 x->fval = result;
456 x->tval |= NUM;
457 }
458 tempfree(x);
459 }
460 }
461 }
462 setfval(r, (Awkfloat) n);
463 free(buf);
464 return r;
465 }
466
getnf(Node ** a,int n)467 Cell *getnf(Node **a, int n) /* get NF */
468 {
469 if (!donefld)
470 fldbld();
471 return (Cell *) a[0];
472 }
473
474 static char *
makearraystring(Node * p,const char * func)475 makearraystring(Node *p, const char *func)
476 {
477 char *buf;
478 int bufsz = recsize;
479 size_t blen;
480
481 if ((buf = (char *) malloc(bufsz)) == NULL) {
482 FATAL("%s: out of memory", func);
483 }
484
485 blen = 0;
486 buf[blen] = '\0';
487
488 for (; p; p = p->nnext) {
489 Cell *x = execute(p); /* expr */
490 char *s = getsval(x);
491 size_t seplen = strlen(getsval(subseploc));
492 size_t nsub = p->nnext ? seplen : 0;
493 size_t slen = strlen(s);
494 size_t tlen = blen + slen + nsub;
495
496 if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) {
497 FATAL("%s: out of memory %s[%s...]",
498 func, x->nval, buf);
499 }
500 memcpy(buf + blen, s, slen);
501 if (nsub) {
502 memcpy(buf + blen + slen, *SUBSEP, nsub);
503 }
504 buf[tlen] = '\0';
505 blen = tlen;
506 tempfree(x);
507 }
508 return buf;
509 }
510
array(Node ** a,int n)511 Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
512 {
513 Cell *x, *z;
514 char *buf;
515
516 x = execute(a[0]); /* Cell* for symbol table */
517 buf = makearraystring(a[1], __func__);
518 if (!isarr(x)) {
519 DPRINTF("making %s into an array\n", NN(x->nval));
520 if (freeable(x))
521 xfree(x->sval);
522 x->tval &= ~(STR|NUM|DONTFREE);
523 x->tval |= ARR;
524 x->sval = (char *) makesymtab(NSYMTAB);
525 }
526 z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval);
527 z->ctype = OCELL;
528 z->csub = CVAR;
529 tempfree(x);
530 free(buf);
531 return(z);
532 }
533
awkdelete(Node ** a,int n)534 Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
535 {
536 Cell *x;
537
538 x = execute(a[0]); /* Cell* for symbol table */
539 if (x == symtabloc) {
540 FATAL("cannot delete SYMTAB or its elements");
541 }
542 if (!isarr(x))
543 return True;
544 if (a[1] == NULL) { /* delete the elements, not the table */
545 freesymtab(x);
546 x->tval &= ~STR;
547 x->tval |= ARR;
548 x->sval = (char *) makesymtab(NSYMTAB);
549 } else {
550 char *buf = makearraystring(a[1], __func__);
551 freeelem(x, buf);
552 free(buf);
553 }
554 tempfree(x);
555 return True;
556 }
557
intest(Node ** a,int n)558 Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */
559 {
560 Cell *ap, *k;
561 char *buf;
562
563 ap = execute(a[1]); /* array name */
564 if (!isarr(ap)) {
565 DPRINTF("making %s into an array\n", ap->nval);
566 if (freeable(ap))
567 xfree(ap->sval);
568 ap->tval &= ~(STR|NUM|DONTFREE);
569 ap->tval |= ARR;
570 ap->sval = (char *) makesymtab(NSYMTAB);
571 }
572 buf = makearraystring(a[0], __func__);
573 k = lookup(buf, (Array *) ap->sval);
574 tempfree(ap);
575 free(buf);
576 if (k == NULL)
577 return(False);
578 else
579 return(True);
580 }
581
582
583 /* ======== utf-8 code ========== */
584
585 /*
586 * Awk strings can contain ascii, random 8-bit items (eg Latin-1),
587 * or utf-8. u8_isutf tests whether a string starts with a valid
588 * utf-8 sequence, and returns 0 if not (e.g., high bit set).
589 * u8_nextlen returns length of next valid sequence, which is
590 * 1 for ascii, 2..4 for utf-8, or 1 for high bit non-utf.
591 * u8_strlen returns length of string in valid utf-8 sequences
592 * and/or high-bit bytes. Conversion functions go between byte
593 * number and character number.
594 *
595 * In theory, this behaves the same as before for non-utf8 bytes.
596 *
597 * Limited checking! This is a potential security hole.
598 */
599
600 /* is s the beginning of a valid utf-8 string? */
601 /* return length 1..4 if yes, 0 if no */
u8_isutf(const char * s)602 int u8_isutf(const char *s)
603 {
604 int n, ret;
605 unsigned char c;
606
607 c = s[0];
608 if (c < 128 || awk_mb_cur_max == 1)
609 return 1; /* what if it's 0? */
610
611 n = strlen(s);
612 if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) {
613 ret = 2; /* 110xxxxx 10xxxxxx */
614 } else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80
615 && (s[2] & 0xC0) == 0x80) {
616 ret = 3; /* 1110xxxx 10xxxxxx 10xxxxxx */
617 } else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80
618 && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) {
619 ret = 4; /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
620 } else {
621 ret = 0;
622 }
623 return ret;
624 }
625
626 /* Convert (prefix of) utf8 string to utf-32 rune. */
627 /* Sets *rune to the value, returns the length. */
628 /* No error checking: watch out. */
u8_rune(int * rune,const char * s)629 int u8_rune(int *rune, const char *s)
630 {
631 int n, ret;
632 unsigned char c;
633
634 c = s[0];
635 if (c < 128 || awk_mb_cur_max == 1) {
636 *rune = c;
637 return 1;
638 }
639
640 n = strlen(s);
641 if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) {
642 *rune = ((c & 0x1F) << 6) | (s[1] & 0x3F); /* 110xxxxx 10xxxxxx */
643 ret = 2;
644 } else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80
645 && (s[2] & 0xC0) == 0x80) {
646 *rune = ((c & 0xF) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F);
647 /* 1110xxxx 10xxxxxx 10xxxxxx */
648 ret = 3;
649 } else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80
650 && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) {
651 *rune = ((c & 0x7) << 18) | ((s[1] & 0x3F) << 12) | ((s[2] & 0x3F) << 6) | (s[3] & 0x3F);
652 /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
653 ret = 4;
654 } else {
655 *rune = c;
656 ret = 1;
657 }
658 return ret; /* returns one byte if sequence doesn't look like utf */
659 }
660
661 /* return length of next sequence: 1 for ascii or random, 2..4 for valid utf8 */
u8_nextlen(const char * s)662 int u8_nextlen(const char *s)
663 {
664 int len;
665
666 len = u8_isutf(s);
667 if (len == 0)
668 len = 1;
669 return len;
670 }
671
672 /* return number of utf characters or single non-utf bytes */
u8_strlen(const char * s)673 int u8_strlen(const char *s)
674 {
675 int i, len, n, totlen;
676 unsigned char c;
677
678 n = strlen(s);
679 totlen = 0;
680 for (i = 0; i < n; i += len) {
681 c = s[i];
682 if (c < 128 || awk_mb_cur_max == 1) {
683 len = 1;
684 } else {
685 len = u8_nextlen(&s[i]);
686 }
687 totlen++;
688 if (i > n)
689 FATAL("bad utf count [%s] n=%d i=%d\n", s, n, i);
690 }
691 return totlen;
692 }
693
694 /* convert utf-8 char number in a string to its byte offset */
u8_char2byte(const char * s,int charnum)695 int u8_char2byte(const char *s, int charnum)
696 {
697 int n;
698 int bytenum = 0;
699
700 while (charnum > 0) {
701 n = u8_nextlen(s);
702 s += n;
703 bytenum += n;
704 charnum--;
705 }
706 return bytenum;
707 }
708
709 /* convert byte offset in s to utf-8 char number that starts there */
u8_byte2char(const char * s,int bytenum)710 int u8_byte2char(const char *s, int bytenum)
711 {
712 int i, len, b;
713 int charnum = 0; /* BUG: what origin? */
714 /* should be 0 to match start==0 which means no match */
715
716 b = strlen(s);
717 if (bytenum > b) {
718 return -1; /* ??? */
719 }
720 for (i = 0; i <= bytenum; i += len) {
721 len = u8_nextlen(s+i);
722 charnum++;
723 }
724 return charnum;
725 }
726
727 /* runetochar() adapted from rune.c in the Plan 9 distribution */
728
729 enum
730 {
731 Runeerror = 128, /* from somewhere else */
732 Runemax = 0x10FFFF,
733
734 Bit1 = 7,
735 Bitx = 6,
736 Bit2 = 5,
737 Bit3 = 4,
738 Bit4 = 3,
739 Bit5 = 2,
740
741 T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */
742 Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */
743 T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */
744 T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */
745 T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
746 T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */
747
748 Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0000 0000 0111 1111 */
749 Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0000 0000 0111 1111 1111 */
750 Rune3 = (1<<(Bit3+2*Bitx))-1, /* 0000 0000 1111 1111 1111 1111 */
751 Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0011 1111 1111 1111 1111 1111 */
752
753 Maskx = (1<<Bitx)-1, /* 0011 1111 */
754 Testx = Maskx ^ 0xFF, /* 1100 0000 */
755
756 };
757
runetochar(char * str,int c)758 int runetochar(char *str, int c)
759 {
760 /* one character sequence 00000-0007F => 00-7F */
761 if (c <= Rune1) {
762 str[0] = c;
763 return 1;
764 }
765
766 /* two character sequence 00080-007FF => T2 Tx */
767 if (c <= Rune2) {
768 str[0] = T2 | (c >> 1*Bitx);
769 str[1] = Tx | (c & Maskx);
770 return 2;
771 }
772
773 /* three character sequence 00800-0FFFF => T3 Tx Tx */
774 if (c > Runemax)
775 c = Runeerror;
776 if (c <= Rune3) {
777 str[0] = T3 | (c >> 2*Bitx);
778 str[1] = Tx | ((c >> 1*Bitx) & Maskx);
779 str[2] = Tx | (c & Maskx);
780 return 3;
781 }
782
783 /* four character sequence 010000-1FFFFF => T4 Tx Tx Tx */
784 str[0] = T4 | (c >> 3*Bitx);
785 str[1] = Tx | ((c >> 2*Bitx) & Maskx);
786 str[2] = Tx | ((c >> 1*Bitx) & Maskx);
787 str[3] = Tx | (c & Maskx);
788 return 4;
789 }
790
791
792 /* ========== end of utf8 code =========== */
793
794
795
matchop(Node ** a,int n)796 Cell *matchop(Node **a, int n) /* ~ and match() */
797 {
798 Cell *x, *y, *z;
799 char *s, *t;
800 int i;
801 int cstart, cpatlen, len;
802 fa *pfa;
803 int (*mf)(fa *, const char *) = match, mode = 0;
804
805 if (n == MATCHFCN) {
806 mf = pmatch;
807 mode = 1;
808 }
809 x = execute(a[1]); /* a[1] = target text */
810 s = getsval(x);
811 if (a[0] == NULL) /* a[1] == 0: already-compiled reg expr */
812 i = (*mf)((fa *) a[2], s);
813 else {
814 y = execute(a[2]); /* a[2] = regular expr */
815 t = getsval(y);
816 pfa = makedfa(t, mode);
817 i = (*mf)(pfa, s);
818 tempfree(y);
819 }
820 z = x;
821 if (n == MATCHFCN) {
822 int start = patbeg - s + 1; /* origin 1 */
823 if (patlen < 0) {
824 start = 0; /* not found */
825 } else {
826 cstart = u8_byte2char(s, start-1);
827 cpatlen = 0;
828 for (i = 0; i < patlen; i += len) {
829 len = u8_nextlen(patbeg+i);
830 cpatlen++;
831 }
832
833 start = cstart;
834 patlen = cpatlen;
835 }
836
837 setfval(rstartloc, (Awkfloat) start);
838 setfval(rlengthloc, (Awkfloat) patlen);
839 x = gettemp();
840 x->tval = NUM;
841 x->fval = start;
842 } else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0))
843 x = True;
844 else
845 x = False;
846
847 tempfree(z);
848 return x;
849 }
850
851
boolop(Node ** a,int n)852 Cell *boolop(Node **a, int n) /* a[0] || a[1], a[0] && a[1], !a[0] */
853 {
854 Cell *x, *y;
855 int i;
856
857 x = execute(a[0]);
858 i = istrue(x);
859 tempfree(x);
860 switch (n) {
861 case BOR:
862 if (i) return(True);
863 y = execute(a[1]);
864 i = istrue(y);
865 tempfree(y);
866 if (i) return(True);
867 else return(False);
868 case AND:
869 if ( !i ) return(False);
870 y = execute(a[1]);
871 i = istrue(y);
872 tempfree(y);
873 if (i) return(True);
874 else return(False);
875 case NOT:
876 if (i) return(False);
877 else return(True);
878 default: /* can't happen */
879 FATAL("unknown boolean operator %d", n);
880 }
881 return 0; /*NOTREACHED*/
882 }
883
relop(Node ** a,int n)884 Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */
885 {
886 int i;
887 Cell *x, *y;
888 Awkfloat j;
889 bool x_is_nan, y_is_nan;
890
891 x = execute(a[0]);
892 y = execute(a[1]);
893 x_is_nan = isnan(x->fval);
894 y_is_nan = isnan(y->fval);
895 if (x->tval&NUM && y->tval&NUM) {
896 if ((x_is_nan || y_is_nan) && n != NE)
897 return(False);
898 j = x->fval - y->fval;
899 i = j<0? -1: (j>0? 1: 0);
900 } else {
901 i = strcmp(getsval(x), getsval(y));
902 }
903 tempfree(x);
904 tempfree(y);
905 switch (n) {
906 case LT: if (i<0) return(True);
907 else return(False);
908 case LE: if (i<=0) return(True);
909 else return(False);
910 case NE: if (x_is_nan && y_is_nan) return(True);
911 else if (i!=0) return(True);
912 else return(False);
913 case EQ: if (i == 0) return(True);
914 else return(False);
915 case GE: if (i>=0) return(True);
916 else return(False);
917 case GT: if (i>0) return(True);
918 else return(False);
919 default: /* can't happen */
920 FATAL("unknown relational operator %d", n);
921 }
922 return 0; /*NOTREACHED*/
923 }
924
tfree(Cell * a)925 void tfree(Cell *a) /* free a tempcell */
926 {
927 if (freeable(a)) {
928 DPRINTF("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval);
929 xfree(a->sval);
930 }
931 if (a == tmps)
932 FATAL("tempcell list is curdled");
933 a->cnext = tmps;
934 tmps = a;
935 }
936
gettemp(void)937 Cell *gettemp(void) /* get a tempcell */
938 { int i;
939 Cell *x;
940
941 if (!tmps) {
942 tmps = (Cell *) calloc(100, sizeof(*tmps));
943 if (!tmps)
944 FATAL("out of space for temporaries");
945 for (i = 1; i < 100; i++)
946 tmps[i-1].cnext = &tmps[i];
947 tmps[i-1].cnext = NULL;
948 }
949 x = tmps;
950 tmps = x->cnext;
951 *x = tempcell;
952 return(x);
953 }
954
indirect(Node ** a,int n)955 Cell *indirect(Node **a, int n) /* $( a[0] ) */
956 {
957 Awkfloat val;
958 Cell *x;
959 int m;
960 char *s;
961
962 x = execute(a[0]);
963 val = getfval(x); /* freebsd: defend against super large field numbers */
964 if ((Awkfloat)INT_MAX < val)
965 FATAL("trying to access out of range field %s", x->nval);
966 m = (int) val;
967 if (m == 0 && !is_number(s = getsval(x), NULL)) /* suspicion! */
968 FATAL("illegal field $(%s), name \"%s\"", s, x->nval);
969 /* BUG: can x->nval ever be null??? */
970 tempfree(x);
971 x = fieldadr(m);
972 x->ctype = OCELL; /* BUG? why are these needed? */
973 x->csub = CFLD;
974 return(x);
975 }
976
substr(Node ** a,int nnn)977 Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */
978 {
979 int k, m, n;
980 int mb, nb;
981 char *s;
982 int temp;
983 Cell *x, *y, *z = NULL;
984
985 x = execute(a[0]);
986 y = execute(a[1]);
987 if (a[2] != NULL)
988 z = execute(a[2]);
989 s = getsval(x);
990 k = u8_strlen(s) + 1;
991 if (k <= 1) {
992 tempfree(x);
993 tempfree(y);
994 if (a[2] != NULL) {
995 tempfree(z);
996 }
997 x = gettemp();
998 setsval(x, "");
999 return(x);
1000 }
1001 m = (int) getfval(y);
1002 if (m <= 0)
1003 m = 1;
1004 else if (m > k)
1005 m = k;
1006 tempfree(y);
1007 if (a[2] != NULL) {
1008 n = (int) getfval(z);
1009 tempfree(z);
1010 } else
1011 n = k - 1;
1012 if (n < 0)
1013 n = 0;
1014 else if (n > k - m)
1015 n = k - m;
1016 /* m is start, n is length from there */
1017 DPRINTF("substr: m=%d, n=%d, s=%s\n", m, n, s);
1018 y = gettemp();
1019 mb = u8_char2byte(s, m-1); /* byte offset of start char in s */
1020 nb = u8_char2byte(s, m-1+n); /* byte offset of end+1 char in s */
1021
1022 temp = s[nb]; /* with thanks to John Linderman */
1023 s[nb] = '\0';
1024 setsval(y, s + mb);
1025 s[nb] = temp;
1026 tempfree(x);
1027 return(y);
1028 }
1029
sindex(Node ** a,int nnn)1030 Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */
1031 {
1032 Cell *x, *y, *z;
1033 char *s1, *s2, *p1, *p2, *q;
1034 Awkfloat v = 0.0;
1035
1036 x = execute(a[0]);
1037 s1 = getsval(x);
1038 y = execute(a[1]);
1039 s2 = getsval(y);
1040
1041 z = gettemp();
1042 for (p1 = s1; *p1 != '\0'; p1++) {
1043 for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++)
1044 continue;
1045 if (*p2 == '\0') {
1046 /* v = (Awkfloat) (p1 - s1 + 1); origin 1 */
1047
1048 /* should be a function: used in match() as well */
1049 int i, len;
1050 v = 0;
1051 for (i = 0; i < p1-s1+1; i += len) {
1052 len = u8_nextlen(s1+i);
1053 v++;
1054 }
1055 break;
1056 }
1057 }
1058 tempfree(x);
1059 tempfree(y);
1060 setfval(z, v);
1061 return(z);
1062 }
1063
has_utf8(char * s)1064 int has_utf8(char *s) /* return 1 if s contains any utf-8 (2 bytes or more) character */
1065 {
1066 int n;
1067
1068 for (n = 0; *s != 0; s += n) {
1069 n = u8_nextlen(s);
1070 if (n > 1)
1071 return 1;
1072 }
1073 return 0;
1074 }
1075
1076 #define MAXNUMSIZE 50
1077
format(char ** pbuf,int * pbufsize,const char * s,Node * a)1078 int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like conversions */
1079 {
1080 char *fmt;
1081 char *p, *t;
1082 const char *os;
1083 Cell *x;
1084 int flag = 0, n;
1085 int fmtwd; /* format width */
1086 int fmtsz = recsize;
1087 char *buf = *pbuf;
1088 int bufsize = *pbufsize;
1089 #define FMTSZ(a) (fmtsz - ((a) - fmt))
1090 #define BUFSZ(a) (bufsize - ((a) - buf))
1091
1092 static bool first = true;
1093 static bool have_a_format = false;
1094
1095 if (first) {
1096 char xbuf[100];
1097
1098 snprintf(xbuf, sizeof(xbuf), "%a", 42.0);
1099 have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0);
1100 first = false;
1101 }
1102
1103 os = s;
1104 p = buf;
1105 if ((fmt = (char *) malloc(fmtsz)) == NULL)
1106 FATAL("out of memory in format()");
1107 while (*s) {
1108 adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1");
1109 if (*s != '%') {
1110 *p++ = *s++;
1111 continue;
1112 }
1113 if (*(s+1) == '%') {
1114 *p++ = '%';
1115 s += 2;
1116 continue;
1117 }
1118 fmtwd = atoi(s+1);
1119 if (fmtwd < 0)
1120 fmtwd = -fmtwd;
1121 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2");
1122 for (t = fmt; (*t++ = *s) != '\0'; s++) {
1123 if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3"))
1124 FATAL("format item %.30s... ran format() out of memory", os);
1125 /* Ignore size specifiers */
1126 if (strchr("hjLlqtz", *s) != NULL) { /* the ansi panoply */
1127 t--;
1128 continue;
1129 }
1130 if (isalpha((uschar)*s))
1131 break;
1132 if (*s == '$') {
1133 FATAL("'$' not permitted in awk formats");
1134 }
1135 if (*s == '*') {
1136 if (a == NULL) {
1137 FATAL("not enough args in printf(%s)", os);
1138 }
1139 x = execute(a);
1140 a = a->nnext;
1141 snprintf(t - 1, FMTSZ(t - 1),
1142 "%d", fmtwd=(int) getfval(x));
1143 if (fmtwd < 0)
1144 fmtwd = -fmtwd;
1145 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format");
1146 t = fmt + strlen(fmt);
1147 tempfree(x);
1148 }
1149 }
1150 *t = '\0';
1151 if (fmtwd < 0)
1152 fmtwd = -fmtwd;
1153 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4");
1154 switch (*s) {
1155 case 'a': case 'A':
1156 if (have_a_format)
1157 flag = *s;
1158 else
1159 flag = 'f';
1160 break;
1161 case 'f': case 'e': case 'g': case 'E': case 'G':
1162 flag = 'f';
1163 break;
1164 case 'd': case 'i': case 'o': case 'x': case 'X': case 'u':
1165 flag = (*s == 'd' || *s == 'i') ? 'd' : 'u';
1166 *(t-1) = 'j';
1167 *t = *s;
1168 *++t = '\0';
1169 break;
1170 case 's':
1171 flag = 's';
1172 break;
1173 case 'c':
1174 flag = 'c';
1175 break;
1176 default:
1177 WARNING("weird printf conversion %s", fmt);
1178 flag = '?';
1179 break;
1180 }
1181 if (a == NULL)
1182 FATAL("not enough args in printf(%s)", os);
1183 x = execute(a);
1184 a = a->nnext;
1185 n = MAXNUMSIZE;
1186 if (fmtwd > n)
1187 n = fmtwd;
1188 adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5");
1189 switch (flag) {
1190 case '?':
1191 snprintf(p, BUFSZ(p), "%s", fmt); /* unknown, so dump it too */
1192 t = getsval(x);
1193 n = strlen(t);
1194 if (fmtwd > n)
1195 n = fmtwd;
1196 adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6");
1197 p += strlen(p);
1198 snprintf(p, BUFSZ(p), "%s", t);
1199 break;
1200 case 'a':
1201 case 'A':
1202 case 'f': snprintf(p, BUFSZ(p), fmt, getfval(x)); break;
1203 case 'd': snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break;
1204 case 'u': snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break;
1205
1206 case 's': {
1207 t = getsval(x);
1208 n = strlen(t);
1209 /* if simple format or no utf-8 in the string, sprintf works */
1210 if (!has_utf8(t) || strcmp(fmt,"%s") == 0) {
1211 if (fmtwd > n)
1212 n = fmtwd;
1213 if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7"))
1214 FATAL("huge string/format (%d chars) in printf %.30s..." \
1215 " ran format() out of memory", n, t);
1216 snprintf(p, BUFSZ(p), fmt, t);
1217 break;
1218 }
1219
1220 /* get here if string has utf-8 chars and fmt is not plain %s */
1221 /* "%-w.ps", where -, w and .p are all optional */
1222 /* '0' before the w is a flag character */
1223 /* fmt points at % */
1224 int ljust = 0, wid = 0, prec = n, pad = 0;
1225 char *f = fmt+1;
1226 if (f[0] == '-') {
1227 ljust = 1;
1228 f++;
1229 }
1230 // flags '0' and '+' are recognized but skipped
1231 if (f[0] == '0') {
1232 f++;
1233 if (f[0] == '+')
1234 f++;
1235 }
1236 if (f[0] == '+') {
1237 f++;
1238 if (f[0] == '0')
1239 f++;
1240 }
1241 if (isdigit(f[0])) { /* there is a wid */
1242 wid = strtol(f, &f, 10);
1243 }
1244 if (f[0] == '.') { /* there is a .prec */
1245 prec = strtol(++f, &f, 10);
1246 }
1247 if (prec > u8_strlen(t))
1248 prec = u8_strlen(t);
1249 pad = wid>prec ? wid - prec : 0; // has to be >= 0
1250 int i, k, n;
1251
1252 if (ljust) { // print prec chars from t, then pad blanks
1253 n = u8_char2byte(t, prec);
1254 for (k = 0; k < n; k++) {
1255 //putchar(t[k]);
1256 *p++ = t[k];
1257 }
1258 for (i = 0; i < pad; i++) {
1259 //printf(" ");
1260 *p++ = ' ';
1261 }
1262 } else { // print pad blanks, then prec chars from t
1263 for (i = 0; i < pad; i++) {
1264 //printf(" ");
1265 *p++ = ' ';
1266 }
1267 n = u8_char2byte(t, prec);
1268 for (k = 0; k < n; k++) {
1269 //putchar(t[k]);
1270 *p++ = t[k];
1271 }
1272 }
1273 *p = 0;
1274 break;
1275 }
1276
1277 case 'c': {
1278 /*
1279 * If a numeric value is given, awk should just turn
1280 * it into a character and print it:
1281 * BEGIN { printf("%c\n", 65) }
1282 * prints "A".
1283 *
1284 * But what if the numeric value is > 128 and
1285 * represents a valid Unicode code point?!? We do
1286 * our best to convert it back into UTF-8. If we
1287 * can't, we output the encoding of the Unicode
1288 * "invalid character", 0xFFFD.
1289 */
1290 if (isnum(x)) {
1291 int charval = (int) getfval(x);
1292
1293 if (charval != 0) {
1294 if (charval < 128 || awk_mb_cur_max == 1)
1295 snprintf(p, BUFSZ(p), fmt, charval);
1296 else {
1297 // possible unicode character
1298 size_t count;
1299 char *bs = wide_char_to_byte_str(charval, &count);
1300
1301 if (bs == NULL) { // invalid character
1302 // use unicode invalid character, 0xFFFD
1303 static char invalid_char[] = "\357\277\275";
1304 bs = invalid_char;
1305 count = 3;
1306 }
1307 t = bs;
1308 n = count;
1309 goto format_percent_c;
1310 }
1311 } else {
1312 *p++ = '\0'; /* explicit null byte */
1313 *p = '\0'; /* next output will start here */
1314 }
1315 break;
1316 }
1317 t = getsval(x);
1318 n = u8_nextlen(t);
1319 format_percent_c:
1320 if (n < 2) { /* not utf8 */
1321 snprintf(p, BUFSZ(p), fmt, getsval(x)[0]);
1322 break;
1323 }
1324
1325 // utf8 character, almost same song and dance as for %s
1326 int ljust = 0, wid = 0, prec = n, pad = 0;
1327 char *f = fmt+1;
1328 if (f[0] == '-') {
1329 ljust = 1;
1330 f++;
1331 }
1332 // flags '0' and '+' are recognized but skipped
1333 if (f[0] == '0') {
1334 f++;
1335 if (f[0] == '+')
1336 f++;
1337 }
1338 if (f[0] == '+') {
1339 f++;
1340 if (f[0] == '0')
1341 f++;
1342 }
1343 if (isdigit(f[0])) { /* there is a wid */
1344 wid = strtol(f, &f, 10);
1345 }
1346 if (f[0] == '.') { /* there is a .prec */
1347 prec = strtol(++f, &f, 10);
1348 }
1349 if (prec > 1) // %c --> only one character
1350 prec = 1;
1351 pad = wid>prec ? wid - prec : 0; // has to be >= 0
1352 int i;
1353
1354 if (ljust) { // print one char from t, then pad blanks
1355 for (i = 0; i < n; i++)
1356 *p++ = t[i];
1357 for (i = 0; i < pad; i++) {
1358 //printf(" ");
1359 *p++ = ' ';
1360 }
1361 } else { // print pad blanks, then prec chars from t
1362 for (i = 0; i < pad; i++) {
1363 //printf(" ");
1364 *p++ = ' ';
1365 }
1366 for (i = 0; i < n; i++)
1367 *p++ = t[i];
1368 }
1369 *p = 0;
1370 break;
1371 }
1372 default:
1373 FATAL("can't happen: bad conversion %c in format()", flag);
1374 }
1375
1376 tempfree(x);
1377 p += strlen(p);
1378 s++;
1379 }
1380 *p = '\0';
1381 free(fmt);
1382 for ( ; a; a = a->nnext) { /* evaluate any remaining args */
1383 x = execute(a);
1384 tempfree(x);
1385 }
1386 *pbuf = buf;
1387 *pbufsize = bufsize;
1388 return p - buf;
1389 }
1390
awksprintf(Node ** a,int n)1391 Cell *awksprintf(Node **a, int n) /* sprintf(a[0]) */
1392 {
1393 Cell *x;
1394 Node *y;
1395 char *buf;
1396 int bufsz=3*recsize;
1397
1398 if ((buf = (char *) malloc(bufsz)) == NULL)
1399 FATAL("out of memory in awksprintf");
1400 y = a[0]->nnext;
1401 x = execute(a[0]);
1402 if (format(&buf, &bufsz, getsval(x), y) == -1)
1403 FATAL("sprintf string %.30s... too long. can't happen.", buf);
1404 tempfree(x);
1405 x = gettemp();
1406 x->sval = buf;
1407 x->tval = STR;
1408 return(x);
1409 }
1410
awkprintf(Node ** a,int n)1411 Cell *awkprintf(Node **a, int n) /* printf */
1412 { /* a[0] is list of args, starting with format string */
1413 /* a[1] is redirection operator, a[2] is redirection file */
1414 FILE *fp;
1415 Cell *x;
1416 Node *y;
1417 char *buf;
1418 int len;
1419 int bufsz=3*recsize;
1420
1421 if ((buf = (char *) malloc(bufsz)) == NULL)
1422 FATAL("out of memory in awkprintf");
1423 y = a[0]->nnext;
1424 x = execute(a[0]);
1425 if ((len = format(&buf, &bufsz, getsval(x), y)) == -1)
1426 FATAL("printf string %.30s... too long. can't happen.", buf);
1427 tempfree(x);
1428 if (a[1] == NULL) {
1429 /* fputs(buf, stdout); */
1430 fwrite(buf, len, 1, stdout);
1431 if (ferror(stdout))
1432 FATAL("write error on stdout");
1433 } else {
1434 fp = redirect(ptoi(a[1]), a[2]);
1435 /* fputs(buf, fp); */
1436 fwrite(buf, len, 1, fp);
1437 fflush(fp);
1438 if (ferror(fp))
1439 FATAL("write error on %s", filename(fp));
1440 }
1441 free(buf);
1442 return(True);
1443 }
1444
arith(Node ** a,int n)1445 Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */
1446 {
1447 Awkfloat i, j = 0;
1448 double v;
1449 Cell *x, *y, *z;
1450
1451 x = execute(a[0]);
1452 i = getfval(x);
1453 tempfree(x);
1454 if (n != UMINUS && n != UPLUS) {
1455 y = execute(a[1]);
1456 j = getfval(y);
1457 tempfree(y);
1458 }
1459 z = gettemp();
1460 switch (n) {
1461 case ADD:
1462 i += j;
1463 break;
1464 case MINUS:
1465 i -= j;
1466 break;
1467 case MULT:
1468 i *= j;
1469 break;
1470 case DIVIDE:
1471 if (j == 0)
1472 FATAL("division by zero");
1473 i /= j;
1474 break;
1475 case MOD:
1476 if (j == 0)
1477 FATAL("division by zero in mod");
1478 modf(i/j, &v);
1479 i = i - j * v;
1480 break;
1481 case UMINUS:
1482 i = -i;
1483 break;
1484 case UPLUS: /* handled by getfval(), above */
1485 break;
1486 case POWER:
1487 if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */
1488 i = ipow(i, (int) j);
1489 else {
1490 errno = 0;
1491 i = errcheck(pow(i, j), "pow");
1492 }
1493 break;
1494 default: /* can't happen */
1495 FATAL("illegal arithmetic operator %d", n);
1496 }
1497 setfval(z, i);
1498 return(z);
1499 }
1500
ipow(double x,int n)1501 double ipow(double x, int n) /* x**n. ought to be done by pow, but isn't always */
1502 {
1503 double v;
1504
1505 if (n <= 0)
1506 return 1;
1507 v = ipow(x, n/2);
1508 if (n % 2 == 0)
1509 return v * v;
1510 else
1511 return x * v * v;
1512 }
1513
incrdecr(Node ** a,int n)1514 Cell *incrdecr(Node **a, int n) /* a[0]++, etc. */
1515 {
1516 Cell *x, *z;
1517 int k;
1518 Awkfloat xf;
1519
1520 x = execute(a[0]);
1521 xf = getfval(x);
1522 k = (n == PREINCR || n == POSTINCR) ? 1 : -1;
1523 if (n == PREINCR || n == PREDECR) {
1524 setfval(x, xf + k);
1525 return(x);
1526 }
1527 z = gettemp();
1528 setfval(z, xf);
1529 setfval(x, xf + k);
1530 tempfree(x);
1531 return(z);
1532 }
1533
assign(Node ** a,int n)1534 Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */
1535 { /* this is subtle; don't muck with it. */
1536 Cell *x, *y;
1537 Awkfloat xf, yf;
1538 double v;
1539
1540 y = execute(a[1]);
1541 x = execute(a[0]);
1542 if (n == ASSIGN) { /* ordinary assignment */
1543 if (x == y && !(x->tval & (FLD|REC)) && x != nfloc)
1544 ; /* self-assignment: leave alone unless it's a field or NF */
1545 else if ((y->tval & (STR|NUM)) == (STR|NUM)) {
1546 yf = getfval(y);
1547 setsval(x, getsval(y));
1548 x->fval = yf;
1549 x->tval |= NUM;
1550 }
1551 else if (isstr(y))
1552 setsval(x, getsval(y));
1553 else if (isnum(y))
1554 setfval(x, getfval(y));
1555 else
1556 funnyvar(y, "read value of");
1557 tempfree(y);
1558 return(x);
1559 }
1560 xf = getfval(x);
1561 yf = getfval(y);
1562 switch (n) {
1563 case ADDEQ:
1564 xf += yf;
1565 break;
1566 case SUBEQ:
1567 xf -= yf;
1568 break;
1569 case MULTEQ:
1570 xf *= yf;
1571 break;
1572 case DIVEQ:
1573 if (yf == 0)
1574 FATAL("division by zero in /=");
1575 xf /= yf;
1576 break;
1577 case MODEQ:
1578 if (yf == 0)
1579 FATAL("division by zero in %%=");
1580 modf(xf/yf, &v);
1581 xf = xf - yf * v;
1582 break;
1583 case POWEQ:
1584 if (yf >= 0 && modf(yf, &v) == 0.0) /* pos integer exponent */
1585 xf = ipow(xf, (int) yf);
1586 else {
1587 errno = 0;
1588 xf = errcheck(pow(xf, yf), "pow");
1589 }
1590 break;
1591 default:
1592 FATAL("illegal assignment operator %d", n);
1593 break;
1594 }
1595 tempfree(y);
1596 setfval(x, xf);
1597 return(x);
1598 }
1599
cat(Node ** a,int q)1600 Cell *cat(Node **a, int q) /* a[0] cat a[1] */
1601 {
1602 Cell *x, *y, *z;
1603 int n1, n2;
1604 char *s = NULL;
1605 int ssz = 0;
1606
1607 x = execute(a[0]);
1608 n1 = strlen(getsval(x));
1609 adjbuf(&s, &ssz, n1 + 1, recsize, 0, "cat1");
1610 memcpy(s, x->sval, n1);
1611
1612 tempfree(x);
1613
1614 y = execute(a[1]);
1615 n2 = strlen(getsval(y));
1616 adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2");
1617 memcpy(s + n1, y->sval, n2);
1618 s[n1 + n2] = '\0';
1619
1620 tempfree(y);
1621
1622 z = gettemp();
1623 z->sval = s;
1624 z->tval = STR;
1625
1626 return(z);
1627 }
1628
pastat(Node ** a,int n)1629 Cell *pastat(Node **a, int n) /* a[0] { a[1] } */
1630 {
1631 Cell *x;
1632
1633 if (a[0] == NULL)
1634 x = execute(a[1]);
1635 else {
1636 x = execute(a[0]);
1637 if (istrue(x)) {
1638 tempfree(x);
1639 x = execute(a[1]);
1640 }
1641 }
1642 return x;
1643 }
1644
dopa2(Node ** a,int n)1645 Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */
1646 {
1647 Cell *x;
1648 int pair;
1649
1650 pair = ptoi(a[3]);
1651 if (pairstack[pair] == 0) {
1652 x = execute(a[0]);
1653 if (istrue(x))
1654 pairstack[pair] = 1;
1655 tempfree(x);
1656 }
1657 if (pairstack[pair] == 1) {
1658 x = execute(a[1]);
1659 if (istrue(x))
1660 pairstack[pair] = 0;
1661 tempfree(x);
1662 x = execute(a[2]);
1663 return(x);
1664 }
1665 return(False);
1666 }
1667
split(Node ** a,int nnn)1668 Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
1669 {
1670 Cell *x = NULL, *y, *ap;
1671 const char *s, *origs, *t;
1672 const char *fs = NULL;
1673 char *origfs = NULL;
1674 int sep;
1675 char temp, num[50];
1676 int n, tempstat, arg3type;
1677 int j;
1678 double result;
1679
1680 y = execute(a[0]); /* source string */
1681 origs = s = strdup(getsval(y));
1682 tempfree(y);
1683 arg3type = ptoi(a[3]);
1684 if (a[2] == NULL) { /* BUG: CSV should override implicit fs but not explicit */
1685 fs = getsval(fsloc);
1686 } else if (arg3type == STRING) { /* split(str,arr,"string") */
1687 x = execute(a[2]);
1688 fs = origfs = strdup(getsval(x));
1689 tempfree(x);
1690 } else if (arg3type == REGEXPR) {
1691 fs = "(regexpr)"; /* split(str,arr,/regexpr/) */
1692 } else {
1693 FATAL("illegal type of split");
1694 }
1695 sep = *fs;
1696 ap = execute(a[1]); /* array name */
1697 /* BUG 7/26/22: this appears not to reset array: see C1/asplit */
1698 freesymtab(ap);
1699 DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs);
1700 ap->tval &= ~STR;
1701 ap->tval |= ARR;
1702 ap->sval = (char *) makesymtab(NSYMTAB);
1703
1704 n = 0;
1705 if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) {
1706 /* split(s, a, //); have to arrange that it looks like empty sep */
1707 arg3type = 0;
1708 fs = "";
1709 sep = 0;
1710 }
1711 if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) { /* reg expr */
1712 fa *pfa;
1713 if (arg3type == REGEXPR) { /* it's ready already */
1714 pfa = (fa *) a[2];
1715 } else {
1716 pfa = makedfa(fs, 1);
1717 }
1718 if (nematch(pfa,s)) {
1719 tempstat = pfa->initstat;
1720 pfa->initstat = 2;
1721 do {
1722 n++;
1723 snprintf(num, sizeof(num), "%d", n);
1724 temp = *patbeg;
1725 setptr(patbeg, '\0');
1726 if (is_number(s, & result))
1727 setsymtab(num, s, result, STR|NUM, (Array *) ap->sval);
1728 else
1729 setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1730 setptr(patbeg, temp);
1731 s = patbeg + patlen;
1732 if (*(patbeg+patlen-1) == '\0' || *s == '\0') {
1733 n++;
1734 snprintf(num, sizeof(num), "%d", n);
1735 setsymtab(num, "", 0.0, STR, (Array *) ap->sval);
1736 pfa->initstat = tempstat;
1737 goto spdone;
1738 }
1739 } while (nematch(pfa,s));
1740 pfa->initstat = tempstat; /* bwk: has to be here to reset */
1741 /* cf gsub and refldbld */
1742 }
1743 n++;
1744 snprintf(num, sizeof(num), "%d", n);
1745 if (is_number(s, & result))
1746 setsymtab(num, s, result, STR|NUM, (Array *) ap->sval);
1747 else
1748 setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1749 spdone:
1750 pfa = NULL;
1751
1752 } else if (a[2] == NULL && CSV) { /* CSV only if no explicit separator */
1753 char *newt = (char *) malloc(strlen(s)); /* for building new string; reuse for each field */
1754 for (;;) {
1755 char *fr = newt;
1756 n++;
1757 if (*s == '"' ) { /* start of "..." */
1758 for (s++ ; *s != '\0'; ) {
1759 if (*s == '"' && s[1] != '\0' && s[1] == '"') {
1760 s += 2; /* doubled quote */
1761 *fr++ = '"';
1762 } else if (*s == '"' && (s[1] == '\0' || s[1] == ',')) {
1763 s++; /* skip over closing quote */
1764 break;
1765 } else {
1766 *fr++ = *s++;
1767 }
1768 }
1769 *fr++ = 0;
1770 } else { /* unquoted field */
1771 while (*s != ',' && *s != '\0')
1772 *fr++ = *s++;
1773 *fr++ = 0;
1774 }
1775 snprintf(num, sizeof(num), "%d", n);
1776 if (is_number(newt, &result))
1777 setsymtab(num, newt, result, STR|NUM, (Array *) ap->sval);
1778 else
1779 setsymtab(num, newt, 0.0, STR, (Array *) ap->sval);
1780 if (*s++ == '\0')
1781 break;
1782 }
1783 free(newt);
1784
1785 } else if (!CSV && sep == ' ') { /* usual case: split on white space */
1786 for (n = 0; ; ) {
1787 #define ISWS(c) ((c) == ' ' || (c) == '\t' || (c) == '\n')
1788 while (ISWS(*s))
1789 s++;
1790 if (*s == '\0')
1791 break;
1792 n++;
1793 t = s;
1794 do
1795 s++;
1796 while (*s != '\0' && !ISWS(*s));
1797 temp = *s;
1798 setptr(s, '\0');
1799 snprintf(num, sizeof(num), "%d", n);
1800 if (is_number(t, & result))
1801 setsymtab(num, t, result, STR|NUM, (Array *) ap->sval);
1802 else
1803 setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1804 setptr(s, temp);
1805 if (*s != '\0')
1806 s++;
1807 }
1808
1809 } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */
1810 for (n = 0; *s != '\0'; s += u8_nextlen(s)) {
1811 char buf[10];
1812 n++;
1813 snprintf(num, sizeof(num), "%d", n);
1814
1815 for (j = 0; j < u8_nextlen(s); j++) {
1816 buf[j] = s[j];
1817 }
1818 buf[j] = '\0';
1819
1820 if (isdigit((uschar)buf[0]))
1821 setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval);
1822 else
1823 setsymtab(num, buf, 0.0, STR, (Array *) ap->sval);
1824 }
1825
1826 } else if (*s != '\0') { /* some random single character */
1827 for (;;) {
1828 n++;
1829 t = s;
1830 while (*s != sep && *s != '\0')
1831 s++;
1832 temp = *s;
1833 setptr(s, '\0');
1834 snprintf(num, sizeof(num), "%d", n);
1835 if (is_number(t, & result))
1836 setsymtab(num, t, result, STR|NUM, (Array *) ap->sval);
1837 else
1838 setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1839 setptr(s, temp);
1840 if (*s++ == '\0')
1841 break;
1842 }
1843 }
1844 tempfree(ap);
1845 xfree(origs);
1846 xfree(origfs);
1847 x = gettemp();
1848 x->tval = NUM;
1849 x->fval = n;
1850 return(x);
1851 }
1852
condexpr(Node ** a,int n)1853 Cell *condexpr(Node **a, int n) /* a[0] ? a[1] : a[2] */
1854 {
1855 Cell *x;
1856
1857 x = execute(a[0]);
1858 if (istrue(x)) {
1859 tempfree(x);
1860 x = execute(a[1]);
1861 } else {
1862 tempfree(x);
1863 x = execute(a[2]);
1864 }
1865 return(x);
1866 }
1867
ifstat(Node ** a,int n)1868 Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */
1869 {
1870 Cell *x;
1871
1872 x = execute(a[0]);
1873 if (istrue(x)) {
1874 tempfree(x);
1875 x = execute(a[1]);
1876 } else if (a[2] != NULL) {
1877 tempfree(x);
1878 x = execute(a[2]);
1879 }
1880 return(x);
1881 }
1882
whilestat(Node ** a,int n)1883 Cell *whilestat(Node **a, int n) /* while (a[0]) a[1] */
1884 {
1885 Cell *x;
1886
1887 for (;;) {
1888 x = execute(a[0]);
1889 if (!istrue(x))
1890 return(x);
1891 tempfree(x);
1892 x = execute(a[1]);
1893 if (isbreak(x)) {
1894 x = True;
1895 return(x);
1896 }
1897 if (isnext(x) || isexit(x) || isret(x))
1898 return(x);
1899 tempfree(x);
1900 }
1901 }
1902
dostat(Node ** a,int n)1903 Cell *dostat(Node **a, int n) /* do a[0]; while(a[1]) */
1904 {
1905 Cell *x;
1906
1907 for (;;) {
1908 x = execute(a[0]);
1909 if (isbreak(x))
1910 return True;
1911 if (isnext(x) || isexit(x) || isret(x))
1912 return(x);
1913 tempfree(x);
1914 x = execute(a[1]);
1915 if (!istrue(x))
1916 return(x);
1917 tempfree(x);
1918 }
1919 }
1920
forstat(Node ** a,int n)1921 Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */
1922 {
1923 Cell *x;
1924
1925 x = execute(a[0]);
1926 tempfree(x);
1927 for (;;) {
1928 if (a[1]!=NULL) {
1929 x = execute(a[1]);
1930 if (!istrue(x)) return(x);
1931 else tempfree(x);
1932 }
1933 x = execute(a[3]);
1934 if (isbreak(x)) /* turn off break */
1935 return True;
1936 if (isnext(x) || isexit(x) || isret(x))
1937 return(x);
1938 tempfree(x);
1939 x = execute(a[2]);
1940 tempfree(x);
1941 }
1942 }
1943
instat(Node ** a,int n)1944 Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */
1945 {
1946 Cell *x, *vp, *arrayp, *cp, *ncp;
1947 Array *tp;
1948 int i;
1949
1950 vp = execute(a[0]);
1951 arrayp = execute(a[1]);
1952 if (!isarr(arrayp)) {
1953 return True;
1954 }
1955 tp = (Array *) arrayp->sval;
1956 tempfree(arrayp);
1957 for (i = 0; i < tp->size; i++) { /* this routine knows too much */
1958 for (cp = tp->tab[i]; cp != NULL; cp = ncp) {
1959 setsval(vp, cp->nval);
1960 ncp = cp->cnext;
1961 x = execute(a[2]);
1962 if (isbreak(x)) {
1963 tempfree(vp);
1964 return True;
1965 }
1966 if (isnext(x) || isexit(x) || isret(x)) {
1967 tempfree(vp);
1968 return(x);
1969 }
1970 tempfree(x);
1971 }
1972 }
1973 return True;
1974 }
1975
nawk_convert(const char * s,int (* fun_c)(int),wint_t (* fun_wc)(wint_t))1976 static char *nawk_convert(const char *s, int (*fun_c)(int),
1977 wint_t (*fun_wc)(wint_t))
1978 {
1979 char *buf = NULL;
1980 char *pbuf = NULL;
1981 const char *ps = NULL;
1982 size_t n = 0;
1983 wchar_t wc;
1984 const size_t sz = awk_mb_cur_max;
1985 int unused;
1986
1987 if (sz == 1) {
1988 buf = tostring(s);
1989
1990 for (pbuf = buf; *pbuf; pbuf++)
1991 *pbuf = fun_c((uschar)*pbuf);
1992
1993 return buf;
1994 } else {
1995 /* upper/lower character may be shorter/longer */
1996 buf = tostringN(s, strlen(s) * sz + 1);
1997
1998 (void) mbtowc(NULL, NULL, 0); /* reset internal state */
1999 /*
2000 * Reset internal state here too.
2001 * Assign result to avoid a compiler warning. (Casting to void
2002 * doesn't work.)
2003 * Increment said variable to avoid a different warning.
2004 */
2005 unused = wctomb(NULL, L'\0');
2006 unused++;
2007
2008 ps = s;
2009 pbuf = buf;
2010 while (n = mbtowc(&wc, ps, sz),
2011 n > 0 && n != (size_t)-1 && n != (size_t)-2)
2012 {
2013 ps += n;
2014
2015 n = wctomb(pbuf, fun_wc(wc));
2016 if (n == (size_t)-1)
2017 FATAL("illegal wide character %s", s);
2018
2019 pbuf += n;
2020 }
2021
2022 *pbuf = '\0';
2023
2024 if (n)
2025 FATAL("illegal byte sequence %s", s);
2026
2027 return buf;
2028 }
2029 }
2030
2031 #ifdef __DJGPP__
towupper(wint_t wc)2032 static wint_t towupper(wint_t wc)
2033 {
2034 if (wc >= 0 && wc < 256)
2035 return toupper(wc & 0xFF);
2036
2037 return wc;
2038 }
2039
towlower(wint_t wc)2040 static wint_t towlower(wint_t wc)
2041 {
2042 if (wc >= 0 && wc < 256)
2043 return tolower(wc & 0xFF);
2044
2045 return wc;
2046 }
2047 #endif
2048
nawk_toupper(const char * s)2049 static char *nawk_toupper(const char *s)
2050 {
2051 return nawk_convert(s, toupper, towupper);
2052 }
2053
nawk_tolower(const char * s)2054 static char *nawk_tolower(const char *s)
2055 {
2056 return nawk_convert(s, tolower, towlower);
2057 }
2058
2059
2060
bltin(Node ** a,int n)2061 Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */
2062 {
2063 Cell *x, *y;
2064 Awkfloat u = 0;
2065 int t, sz;
2066 Awkfloat tmp;
2067 char *buf, *fmt;
2068 Node *nextarg;
2069 FILE *fp;
2070 int status = 0;
2071 time_t tv;
2072 struct tm *tm, tmbuf;
2073 int estatus = 0;
2074
2075 t = ptoi(a[0]);
2076 x = execute(a[1]);
2077 nextarg = a[1]->nnext;
2078 switch (t) {
2079 case FLENGTH:
2080 if (isarr(x))
2081 u = ((Array *) x->sval)->nelem; /* GROT. should be function*/
2082 else
2083 u = u8_strlen(getsval(x));
2084 break;
2085 case FLOG:
2086 errno = 0;
2087 u = errcheck(log(getfval(x)), "log");
2088 break;
2089 case FINT:
2090 modf(getfval(x), &u); break;
2091 case FEXP:
2092 errno = 0;
2093 u = errcheck(exp(getfval(x)), "exp");
2094 break;
2095 case FSQRT:
2096 errno = 0;
2097 u = errcheck(sqrt(getfval(x)), "sqrt");
2098 break;
2099 case FSIN:
2100 u = sin(getfval(x)); break;
2101 case FCOS:
2102 u = cos(getfval(x)); break;
2103 case FATAN:
2104 if (nextarg == NULL) {
2105 WARNING("atan2 requires two arguments; returning 1.0");
2106 u = 1.0;
2107 } else {
2108 y = execute(a[1]->nnext);
2109 u = atan2(getfval(x), getfval(y));
2110 tempfree(y);
2111 nextarg = nextarg->nnext;
2112 }
2113 break;
2114 case FCOMPL:
2115 u = ~((int)getfval(x));
2116 break;
2117 case FAND:
2118 if (nextarg == 0) {
2119 WARNING("and requires two arguments; returning 0");
2120 u = 0;
2121 break;
2122 }
2123 y = execute(a[1]->nnext);
2124 u = ((int)getfval(x)) & ((int)getfval(y));
2125 tempfree(y);
2126 nextarg = nextarg->nnext;
2127 break;
2128 case FFOR:
2129 if (nextarg == 0) {
2130 WARNING("or requires two arguments; returning 0");
2131 u = 0;
2132 break;
2133 }
2134 y = execute(a[1]->nnext);
2135 u = ((int)getfval(x)) | ((int)getfval(y));
2136 tempfree(y);
2137 nextarg = nextarg->nnext;
2138 break;
2139 case FXOR:
2140 if (nextarg == 0) {
2141 WARNING("xor requires two arguments; returning 0");
2142 u = 0;
2143 break;
2144 }
2145 y = execute(a[1]->nnext);
2146 u = ((int)getfval(x)) ^ ((int)getfval(y));
2147 tempfree(y);
2148 nextarg = nextarg->nnext;
2149 break;
2150 case FLSHIFT:
2151 if (nextarg == 0) {
2152 WARNING("lshift requires two arguments; returning 0");
2153 u = 0;
2154 break;
2155 }
2156 y = execute(a[1]->nnext);
2157 u = ((int)getfval(x)) << ((int)getfval(y));
2158 tempfree(y);
2159 nextarg = nextarg->nnext;
2160 break;
2161 case FRSHIFT:
2162 if (nextarg == 0) {
2163 WARNING("rshift requires two arguments; returning 0");
2164 u = 0;
2165 break;
2166 }
2167 y = execute(a[1]->nnext);
2168 u = ((int)getfval(x)) >> ((int)getfval(y));
2169 tempfree(y);
2170 nextarg = nextarg->nnext;
2171 break;
2172 case FSYSTEM:
2173 fflush(stdout); /* in case something is buffered already */
2174 estatus = status = system(getsval(x));
2175 if (status != -1) {
2176 if (WIFEXITED(status)) {
2177 estatus = WEXITSTATUS(status);
2178 } else if (WIFSIGNALED(status)) {
2179 estatus = WTERMSIG(status) + 256;
2180 #ifdef WCOREDUMP
2181 if (WCOREDUMP(status))
2182 estatus += 256;
2183 #endif
2184 } else /* something else?!? */
2185 estatus = 0;
2186 }
2187 /* else estatus was set to -1 */
2188 u = estatus;
2189 break;
2190 case FRAND:
2191 /* random() returns numbers in [0..2^31-1]
2192 * in order to get a number in [0, 1), divide it by 2^31
2193 */
2194 u = (Awkfloat) random() / (0x7fffffffL + 0x1UL);
2195 break;
2196 case FSRAND:
2197 if (isrec(x)) /* no argument provided */
2198 u = time((time_t *)0);
2199 else
2200 u = getfval(x);
2201 tmp = u;
2202 srandom((unsigned long) u);
2203 u = srand_seed;
2204 srand_seed = tmp;
2205 break;
2206 case FTOUPPER:
2207 case FTOLOWER:
2208 if (t == FTOUPPER)
2209 buf = nawk_toupper(getsval(x));
2210 else
2211 buf = nawk_tolower(getsval(x));
2212 tempfree(x);
2213 x = gettemp();
2214 setsval(x, buf);
2215 free(buf);
2216 return x;
2217 case FFLUSH:
2218 if (isrec(x) || strlen(getsval(x)) == 0) {
2219 flush_all(); /* fflush() or fflush("") -> all */
2220 u = 0;
2221 } else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL)
2222 u = EOF;
2223 else
2224 u = fflush(fp);
2225 break;
2226 case FMKTIME:
2227 memset(&tmbuf, 0, sizeof(tmbuf));
2228 tm = &tmbuf;
2229 t = sscanf(getsval(x), "%d %d %d %d %d %d %d",
2230 &tm->tm_year, &tm->tm_mon, &tm->tm_mday, &tm->tm_hour,
2231 &tm->tm_min, &tm->tm_sec, &tm->tm_isdst);
2232 switch (t) {
2233 case 6:
2234 tm->tm_isdst = -1; /* let mktime figure it out */
2235 /* FALLTHROUGH */
2236 case 7:
2237 tm->tm_year -= 1900;
2238 tm->tm_mon--;
2239 u = mktime(tm);
2240 break;
2241 default:
2242 u = -1;
2243 break;
2244 }
2245 break;
2246 case FSYSTIME:
2247 u = time((time_t *) 0);
2248 break;
2249 case FSTRFTIME:
2250 /* strftime([format [,timestamp]]) */
2251 if (nextarg) {
2252 y = execute(nextarg);
2253 nextarg = nextarg->nnext;
2254 tv = (time_t) getfval(y);
2255 tempfree(y);
2256 } else
2257 tv = time((time_t *) 0);
2258 tm = localtime(&tv);
2259 if (tm == NULL)
2260 FATAL("bad time %ld", (long)tv);
2261
2262 if (isrec(x)) {
2263 /* format argument not provided, use default */
2264 fmt = tostring("%a %b %d %H:%M:%S %Z %Y");
2265 } else
2266 fmt = tostring(getsval(x));
2267
2268 sz = 32;
2269 buf = NULL;
2270 do {
2271 if ((buf = realloc(buf, (sz *= 2))) == NULL)
2272 FATAL("out of memory in strftime");
2273 } while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0');
2274
2275 y = gettemp();
2276 setsval(y, buf);
2277 free(fmt);
2278 free(buf);
2279
2280 return y;
2281 default: /* can't happen */
2282 FATAL("illegal function type %d", t);
2283 break;
2284 }
2285 tempfree(x);
2286 x = gettemp();
2287 setfval(x, u);
2288 if (nextarg != NULL) {
2289 WARNING("warning: function has too many arguments");
2290 for ( ; nextarg; nextarg = nextarg->nnext) {
2291 y = execute(nextarg);
2292 tempfree(y);
2293 }
2294 }
2295 return(x);
2296 }
2297
printstat(Node ** a,int n)2298 Cell *printstat(Node **a, int n) /* print a[0] */
2299 {
2300 Node *x;
2301 Cell *y;
2302 FILE *fp;
2303
2304 if (a[1] == NULL) /* a[1] is redirection operator, a[2] is file */
2305 fp = stdout;
2306 else
2307 fp = redirect(ptoi(a[1]), a[2]);
2308 for (x = a[0]; x != NULL; x = x->nnext) {
2309 y = execute(x);
2310 fputs(getpssval(y), fp);
2311 tempfree(y);
2312 if (x->nnext == NULL)
2313 fputs(getsval(orsloc), fp);
2314 else
2315 fputs(getsval(ofsloc), fp);
2316 }
2317 if (a[1] != NULL)
2318 fflush(fp);
2319 if (ferror(fp))
2320 FATAL("write error on %s", filename(fp));
2321 return(True);
2322 }
2323
nullproc(Node ** a,int n)2324 Cell *nullproc(Node **a, int n)
2325 {
2326 return 0;
2327 }
2328
2329
redirect(int a,Node * b)2330 FILE *redirect(int a, Node *b) /* set up all i/o redirections */
2331 {
2332 FILE *fp;
2333 Cell *x;
2334 char *fname;
2335
2336 x = execute(b);
2337 fname = getsval(x);
2338 fp = openfile(a, fname, NULL);
2339 if (fp == NULL)
2340 FATAL("can't open file %s", fname);
2341 tempfree(x);
2342 return fp;
2343 }
2344
2345 struct files {
2346 FILE *fp;
2347 const char *fname;
2348 int mode; /* '|', 'a', 'w' => LE/LT, GT */
2349 } *files;
2350
2351 size_t nfiles;
2352
stdinit(void)2353 static void stdinit(void) /* in case stdin, etc., are not constants */
2354 {
2355 nfiles = FOPEN_MAX;
2356 files = (struct files *) calloc(nfiles, sizeof(*files));
2357 if (files == NULL)
2358 FATAL("can't allocate file memory for %zu files", nfiles);
2359 files[0].fp = stdin;
2360 files[0].fname = tostring("/dev/stdin");
2361 files[0].mode = LT;
2362 files[1].fp = stdout;
2363 files[1].fname = tostring("/dev/stdout");
2364 files[1].mode = GT;
2365 files[2].fp = stderr;
2366 files[2].fname = tostring("/dev/stderr");
2367 files[2].mode = GT;
2368 }
2369
openfile(int a,const char * us,bool * pnewflag)2370 FILE *openfile(int a, const char *us, bool *pnewflag)
2371 {
2372 const char *s = us;
2373 size_t i;
2374 int m;
2375 FILE *fp = NULL;
2376
2377 if (*s == '\0')
2378 FATAL("null file name in print or getline");
2379 for (i = 0; i < nfiles; i++)
2380 if (files[i].fname && strcmp(s, files[i].fname) == 0 &&
2381 (a == files[i].mode || (a==APPEND && files[i].mode==GT) ||
2382 a == FFLUSH)) {
2383 if (pnewflag)
2384 *pnewflag = false;
2385 return files[i].fp;
2386 }
2387 if (a == FFLUSH) /* didn't find it, so don't create it! */
2388 return NULL;
2389
2390 for (i = 0; i < nfiles; i++)
2391 if (files[i].fp == NULL)
2392 break;
2393 if (i >= nfiles) {
2394 struct files *nf;
2395 size_t nnf = nfiles + FOPEN_MAX;
2396 nf = (struct files *) realloc(files, nnf * sizeof(*nf));
2397 if (nf == NULL)
2398 FATAL("cannot grow files for %s and %zu files", s, nnf);
2399 memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf));
2400 nfiles = nnf;
2401 files = nf;
2402 }
2403 fflush(stdout); /* force a semblance of order */
2404 m = a;
2405 if (a == GT) {
2406 fp = fopen(s, "w");
2407 } else if (a == APPEND) {
2408 fp = fopen(s, "a");
2409 m = GT; /* so can mix > and >> */
2410 } else if (a == '|') { /* output pipe */
2411 fp = popen(s, "w");
2412 } else if (a == LE) { /* input pipe */
2413 fp = popen(s, "r");
2414 } else if (a == LT) { /* getline <file */
2415 fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r"); /* "-" is stdin */
2416 } else /* can't happen */
2417 FATAL("illegal redirection %d", a);
2418 if (fp != NULL) {
2419 files[i].fname = tostring(s);
2420 files[i].fp = fp;
2421 files[i].mode = m;
2422 if (pnewflag)
2423 *pnewflag = true;
2424 if (fp != stdin && fp != stdout && fp != stderr)
2425 (void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC);
2426 }
2427 return fp;
2428 }
2429
filename(FILE * fp)2430 const char *filename(FILE *fp)
2431 {
2432 size_t i;
2433
2434 for (i = 0; i < nfiles; i++)
2435 if (fp == files[i].fp)
2436 return files[i].fname;
2437 return "???";
2438 }
2439
closefile(Node ** a,int n)2440 Cell *closefile(Node **a, int n)
2441 {
2442 Cell *x;
2443 size_t i;
2444 bool stat;
2445
2446 x = execute(a[0]);
2447 getsval(x);
2448 stat = true;
2449 for (i = 0; i < nfiles; i++) {
2450 if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0)
2451 continue;
2452 if (files[i].mode == GT || files[i].mode == '|')
2453 fflush(files[i].fp);
2454 if (ferror(files[i].fp)) {
2455 if ((files[i].mode == GT && files[i].fp != stderr)
2456 || files[i].mode == '|')
2457 FATAL("write error on %s", files[i].fname);
2458 else
2459 WARNING("i/o error occurred on %s", files[i].fname);
2460 }
2461 if (files[i].fp == stdin || files[i].fp == stdout ||
2462 files[i].fp == stderr)
2463 stat = freopen("/dev/null", "r+", files[i].fp) == NULL;
2464 else if (files[i].mode == '|' || files[i].mode == LE)
2465 stat = pclose(files[i].fp) == -1;
2466 else
2467 stat = fclose(files[i].fp) == EOF;
2468 if (stat)
2469 WARNING("i/o error occurred closing %s", files[i].fname);
2470 xfree(files[i].fname);
2471 files[i].fname = NULL; /* watch out for ref thru this */
2472 files[i].fp = NULL;
2473 break;
2474 }
2475 tempfree(x);
2476 x = gettemp();
2477 setfval(x, (Awkfloat) (stat ? -1 : 0));
2478 return(x);
2479 }
2480
closeall(void)2481 void closeall(void)
2482 {
2483 size_t i;
2484 bool stat = false;
2485
2486 for (i = 0; i < nfiles; i++) {
2487 if (! files[i].fp)
2488 continue;
2489 if (files[i].mode == GT || files[i].mode == '|')
2490 fflush(files[i].fp);
2491 if (ferror(files[i].fp)) {
2492 if ((files[i].mode == GT && files[i].fp != stderr)
2493 || files[i].mode == '|')
2494 FATAL("write error on %s", files[i].fname);
2495 else
2496 WARNING("i/o error occurred on %s", files[i].fname);
2497 }
2498 if (files[i].fp == stdin || files[i].fp == stdout ||
2499 files[i].fp == stderr)
2500 continue;
2501 if (files[i].mode == '|' || files[i].mode == LE)
2502 stat = pclose(files[i].fp) == -1;
2503 else
2504 stat = fclose(files[i].fp) == EOF;
2505 if (stat)
2506 WARNING("i/o error occurred while closing %s", files[i].fname);
2507 }
2508 }
2509
flush_all(void)2510 static void flush_all(void)
2511 {
2512 size_t i;
2513
2514 for (i = 0; i < nfiles; i++)
2515 if (files[i].fp)
2516 fflush(files[i].fp);
2517 }
2518
2519 void backsub(char **pb_ptr, const char **sptr_ptr);
2520
dosub(Node ** a,int subop)2521 Cell *dosub(Node **a, int subop) /* sub and gsub */
2522 {
2523 fa *pfa;
2524 int tempstat = 0;
2525 char *repl;
2526 Cell *x;
2527
2528 char *buf = NULL;
2529 char *pb = NULL;
2530 int bufsz = recsize;
2531
2532 const char *r, *s;
2533 const char *start;
2534 const char *noempty = NULL; /* empty match disallowed here */
2535 size_t m = 0; /* match count */
2536 size_t whichm = 0; /* which match to select, 0 = global */
2537 int mtype; /* match type */
2538
2539 if (a[0] == NULL) { /* 0 => a[1] is already-compiled regexpr */
2540 pfa = (fa *) a[1];
2541 } else {
2542 x = execute(a[1]);
2543 pfa = makedfa(getsval(x), 1);
2544 tempfree(x);
2545 }
2546
2547 x = execute(a[2]); /* replacement string */
2548 repl = tostring(getsval(x));
2549 tempfree(x);
2550
2551 switch (subop) {
2552 case SUB:
2553 whichm = 1;
2554 x = execute(a[3]); /* source string */
2555 break;
2556 case GSUB:
2557 whichm = 0;
2558 x = execute(a[3]); /* source string */
2559 break;
2560 default:
2561 FATAL("dosub: unrecognized subop: %d", subop);
2562 }
2563
2564 start = getsval(x);
2565 while (pmatch(pfa, start)) {
2566 if (buf == NULL) {
2567 if ((pb = buf = (char *) malloc(bufsz)) == NULL)
2568 FATAL("out of memory in dosub");
2569 tempstat = pfa->initstat;
2570 pfa->initstat = 2;
2571 }
2572
2573 /* match types */
2574 #define MT_IGNORE 0 /* unselected or invalid */
2575 #define MT_INSERT 1 /* selected, empty */
2576 #define MT_REPLACE 2 /* selected, not empty */
2577
2578 /* an empty match just after replacement is invalid */
2579
2580 if (patbeg == noempty && patlen == 0) {
2581 mtype = MT_IGNORE; /* invalid, not counted */
2582 } else if (whichm == ++m || whichm == 0) {
2583 mtype = patlen ? MT_REPLACE : MT_INSERT;
2584 } else {
2585 mtype = MT_IGNORE; /* unselected, but counted */
2586 }
2587
2588 /* leading text: */
2589 if (patbeg > start) {
2590 adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - start),
2591 recsize, &pb, "dosub");
2592 s = start;
2593 while (s < patbeg)
2594 *pb++ = *s++;
2595 }
2596
2597 if (mtype == MT_IGNORE)
2598 goto matching_text; /* skip replacement text */
2599
2600 r = repl;
2601 while (*r != 0) {
2602 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "dosub");
2603 if (*r == '\\') {
2604 backsub(&pb, &r);
2605 } else if (*r == '&') {
2606 r++;
2607 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize,
2608 &pb, "dosub");
2609 for (s = patbeg; s < patbeg+patlen; )
2610 *pb++ = *s++;
2611 } else {
2612 *pb++ = *r++;
2613 }
2614 }
2615
2616 matching_text:
2617 if (mtype == MT_REPLACE || *patbeg == '\0')
2618 goto next_search; /* skip matching text */
2619
2620 if (patlen == 0)
2621 patlen = u8_nextlen(patbeg);
2622 adjbuf(&buf, &bufsz, (pb-buf) + patlen, recsize, &pb, "dosub");
2623 s = patbeg;
2624 while (s < patbeg + patlen)
2625 *pb++ = *s++;
2626
2627 next_search:
2628 start = patbeg + patlen;
2629 if (m == whichm || *patbeg == '\0')
2630 break;
2631 if (mtype == MT_REPLACE)
2632 noempty = start;
2633
2634 #undef MT_IGNORE
2635 #undef MT_INSERT
2636 #undef MT_REPLACE
2637 }
2638
2639 xfree(repl);
2640
2641 if (buf != NULL) {
2642 pfa->initstat = tempstat;
2643
2644 /* trailing text */
2645 adjbuf(&buf, &bufsz, 1+strlen(start)+pb-buf, 0, &pb, "dosub");
2646 while ((*pb++ = *start++) != '\0')
2647 ;
2648
2649 setsval(x, buf);
2650 free(buf);
2651 }
2652
2653 tempfree(x);
2654 x = gettemp();
2655 x->tval = NUM;
2656 x->fval = m;
2657 return x;
2658 }
2659
gensub(Node ** a,int nnn)2660 Cell *gensub(Node **a, int nnn) /* global selective substitute */
2661 /* XXX incomplete - doesn't support backreferences \0 ... \9 */
2662 {
2663 Cell *x, *y, *res, *h;
2664 char *rptr;
2665 const char *sptr;
2666 char *buf, *pb;
2667 const char *t, *q;
2668 fa *pfa;
2669 int mflag, tempstat, num, whichm;
2670 int bufsz = recsize;
2671
2672 if ((buf = malloc(bufsz)) == NULL)
2673 FATAL("out of memory in gensub");
2674 mflag = 0; /* if mflag == 0, can replace empty string */
2675 num = 0;
2676 x = execute(a[4]); /* source string */
2677 t = getsval(x);
2678 res = copycell(x); /* target string - initially copy of source */
2679 res->csub = CTEMP; /* result values are temporary */
2680 if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */
2681 pfa = (fa *) a[1]; /* regular expression */
2682 else {
2683 y = execute(a[1]);
2684 pfa = makedfa(getsval(y), 1);
2685 tempfree(y);
2686 }
2687 y = execute(a[2]); /* replacement string */
2688 h = execute(a[3]); /* which matches should be replaced */
2689 sptr = getsval(h);
2690 if (sptr[0] == 'g' || sptr[0] == 'G')
2691 whichm = -1;
2692 else {
2693 /*
2694 * The specified number is index of replacement, starting
2695 * from 1. GNU awk treats index lower than 0 same as
2696 * 1, we do same for compatibility.
2697 */
2698 whichm = (int) getfval(h) - 1;
2699 if (whichm < 0)
2700 whichm = 0;
2701 }
2702 tempfree(h);
2703
2704 if (pmatch(pfa, t)) {
2705 char *sl;
2706
2707 tempstat = pfa->initstat;
2708 pfa->initstat = 2;
2709 pb = buf;
2710 rptr = getsval(y);
2711 /*
2712 * XXX if there are any backreferences in subst string,
2713 * complain now.
2714 */
2715 for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) {
2716 if (strchr("0123456789", sl[1])) {
2717 FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr);
2718 }
2719 }
2720
2721 do {
2722 if (whichm >= 0 && whichm != num) {
2723 num++;
2724 adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub");
2725
2726 /* copy the part of string up to and including
2727 * match to output buffer */
2728 while (t < patbeg + patlen)
2729 *pb++ = *t++;
2730 continue;
2731 }
2732
2733 if (patlen == 0 && *patbeg != 0) { /* matched empty string */
2734 if (mflag == 0) { /* can replace empty */
2735 num++;
2736 sptr = rptr;
2737 while (*sptr != 0) {
2738 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
2739 if (*sptr == '\\') {
2740 backsub(&pb, &sptr);
2741 } else if (*sptr == '&') {
2742 sptr++;
2743 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
2744 for (q = patbeg; q < patbeg+patlen; )
2745 *pb++ = *q++;
2746 } else
2747 *pb++ = *sptr++;
2748 }
2749 }
2750 if (*t == 0) /* at end */
2751 goto done;
2752 adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub");
2753 *pb++ = *t++;
2754 if (pb > buf + bufsz) /* BUG: not sure of this test */
2755 FATAL("gensub result0 %.30s too big; can't happen", buf);
2756 mflag = 0;
2757 }
2758 else { /* matched nonempty string */
2759 num++;
2760 sptr = t;
2761 adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub");
2762 while (sptr < patbeg)
2763 *pb++ = *sptr++;
2764 sptr = rptr;
2765 while (*sptr != 0) {
2766 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
2767 if (*sptr == '\\') {
2768 backsub(&pb, &sptr);
2769 } else if (*sptr == '&') {
2770 sptr++;
2771 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
2772 for (q = patbeg; q < patbeg+patlen; )
2773 *pb++ = *q++;
2774 } else
2775 *pb++ = *sptr++;
2776 }
2777 t = patbeg + patlen;
2778 if (patlen == 0 || *t == 0 || *(t-1) == 0)
2779 goto done;
2780 if (pb > buf + bufsz)
2781 FATAL("gensub result1 %.30s too big; can't happen", buf);
2782 mflag = 1;
2783 }
2784 } while (pmatch(pfa,t));
2785 sptr = t;
2786 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub");
2787 while ((*pb++ = *sptr++) != 0)
2788 ;
2789 done: if (pb > buf + bufsz)
2790 FATAL("gensub result2 %.30s too big; can't happen", buf);
2791 *pb = '\0';
2792 setsval(res, buf);
2793 pfa->initstat = tempstat;
2794 }
2795 tempfree(x);
2796 tempfree(y);
2797 free(buf);
2798 return(res);
2799 }
2800
backsub(char ** pb_ptr,const char ** sptr_ptr)2801 void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */
2802 { /* sptr[0] == '\\' */
2803 char *pb = *pb_ptr;
2804 const char *sptr = *sptr_ptr;
2805 static bool first = true;
2806 static bool do_posix = false;
2807
2808 if (first) {
2809 first = false;
2810 do_posix = (getenv("POSIXLY_CORRECT") != NULL);
2811 }
2812
2813 if (sptr[1] == '\\') {
2814 if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */
2815 *pb++ = '\\';
2816 *pb++ = '&';
2817 sptr += 4;
2818 } else if (sptr[2] == '&') { /* \\& -> \ + matched */
2819 *pb++ = '\\';
2820 sptr += 2;
2821 } else if (do_posix) { /* \\x -> \x */
2822 sptr++;
2823 *pb++ = *sptr++;
2824 } else { /* \\x -> \\x */
2825 *pb++ = *sptr++;
2826 *pb++ = *sptr++;
2827 }
2828 } else if (sptr[1] == '&') { /* literal & */
2829 sptr++;
2830 *pb++ = *sptr++;
2831 } else /* literal \ */
2832 *pb++ = *sptr++;
2833
2834 *pb_ptr = pb;
2835 *sptr_ptr = sptr;
2836 }
2837
wide_char_to_byte_str(int rune,size_t * outlen)2838 static char *wide_char_to_byte_str(int rune, size_t *outlen)
2839 {
2840 static char buf[5];
2841 int len;
2842
2843 if (rune < 0 || rune > 0x10FFFF)
2844 return NULL;
2845
2846 memset(buf, 0, sizeof(buf));
2847
2848 len = 0;
2849 if (rune <= 0x0000007F) {
2850 buf[len++] = rune;
2851 } else if (rune <= 0x000007FF) {
2852 // 110xxxxx 10xxxxxx
2853 buf[len++] = 0xC0 | (rune >> 6);
2854 buf[len++] = 0x80 | (rune & 0x3F);
2855 } else if (rune <= 0x0000FFFF) {
2856 // 1110xxxx 10xxxxxx 10xxxxxx
2857 buf[len++] = 0xE0 | (rune >> 12);
2858 buf[len++] = 0x80 | ((rune >> 6) & 0x3F);
2859 buf[len++] = 0x80 | (rune & 0x3F);
2860
2861 } else {
2862 // 0x00010000 - 0x10FFFF
2863 // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
2864 buf[len++] = 0xF0 | (rune >> 18);
2865 buf[len++] = 0x80 | ((rune >> 12) & 0x3F);
2866 buf[len++] = 0x80 | ((rune >> 6) & 0x3F);
2867 buf[len++] = 0x80 | (rune & 0x3F);
2868 }
2869
2870 *outlen = len;
2871 buf[len++] = '\0';
2872
2873 return buf;
2874 }
2875