1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * Copyright 1986, 1994 by Mortice Kern Systems Inc. All rights reserved.
28 */
29
30 /*
31 * awk -- mainline, yylex, etc.
32 *
33 * Based on MKS awk(1) ported to be /usr/xpg4/bin/awk with POSIX/XCU4 changes
34 */
35
36 #include "awk.h"
37 #include "y.tab.h"
38 #include <stdarg.h>
39 #include <unistd.h>
40 #include <locale.h>
41 #include <search.h>
42
43 static char *progfiles[NPFILE]; /* Programmes files for yylex */
44 static char **progfilep = &progfiles[0]; /* Pointer to last file */
45 static wchar_t *progptr; /* In-memory programme */
46 static int proglen; /* Length of progptr */
47 static wchar_t context[NCONTEXT]; /* Circular buffer of context */
48 static wchar_t *conptr = &context[0]; /* context ptr */
49 static FILE *progfp; /* Stdio stream for programme */
50 static char *filename;
51 #ifdef DEBUG
52 static int dflag;
53 #endif
54
55 #define AWK_EXEC_MAGIC "<MKS AWKC>"
56 #define LEN_EXEC_MAGIC 10
57
58 static char unbal[] = "unbalanced E char";
59
60 static void awkarginit(int c, char **av);
61 static int lexid(wint_t c);
62 static int lexnumber(wint_t c);
63 static int lexstring(wint_t endc);
64 static int lexregexp(wint_t endc);
65
66 static void awkvarinit(void);
67 static wint_t lexgetc(void);
68 static void lexungetc(wint_t c);
69 static size_t lexescape(wint_t endc, int regx, int cmd_line_operand);
70 static void awkierr(int perr, const char *fmt, va_list ap) __NORETURN;
71 static int usage(void);
72 void strescape(wchar_t *str);
73 static const char *toprint(wint_t);
74 char *_cmdname;
75 static wchar_t *mbconvert(char *str);
76
77 extern int isclvar(wchar_t *arg);
78
79 /*
80 * mainline for awk
81 */
82 int
main(int argc,char * argv[])83 main(int argc, char *argv[])
84 {
85 wchar_t *ap;
86 char *cmd;
87
88 cmd = argv[0];
89 _cmdname = cmd;
90
91 linebuf = emalloc(NLINE * sizeof (wchar_t));
92
93 /*
94 * At this point only messaging should be internationalized.
95 * numbers are still scanned as in the Posix locale.
96 */
97 (void) setlocale(LC_ALL, "");
98 (void) setlocale(LC_NUMERIC, "C");
99 #if !defined(TEXT_DOMAIN)
100 #define TEXT_DOMAIN "SYS_TEST"
101 #endif
102 (void) textdomain(TEXT_DOMAIN);
103
104 awkvarinit();
105 /* running = 1; */
106 while (argc > 1 && *argv[1] == '-') {
107 void *save_ptr = NULL;
108 ap = mbstowcsdup(&argv[1][1]);
109 if (ap == NULL)
110 break;
111 if (*ap == '\0') {
112 free(ap);
113 break;
114 }
115 save_ptr = (void *) ap;
116 ++argv;
117 --argc;
118 if (*ap == '-' && ap[1] == '\0')
119 break;
120 for (; *ap != '\0'; ++ap) {
121 switch (*ap) {
122 #ifdef DEBUG
123 case 'd':
124 dflag = 1;
125 continue;
126
127 #endif
128 case 'f':
129 if (argc < 2) {
130 (void) fprintf(stderr,
131 gettext("Missing script file\n"));
132 return (1);
133 }
134 *progfilep++ = argv[1];
135 --argc;
136 ++argv;
137 continue;
138
139 case 'F':
140 if (ap[1] == '\0') {
141 if (argc < 2) {
142 (void) fprintf(stderr,
143 gettext("Missing field separator\n"));
144 return (1);
145 }
146 ap = mbstowcsdup(argv[1]);
147 --argc;
148 ++argv;
149 } else
150 ++ap;
151 strescape(ap);
152 strassign(varFS, linebuf, FALLOC,
153 wcslen(linebuf));
154 break;
155
156 case 'v': {
157 wchar_t *vp;
158 wchar_t *arg;
159
160 if (argc < 2) {
161 (void) fprintf(stderr,
162 gettext("Missing variable assignment\n"));
163 return (1);
164 }
165 arg = mbconvert(argv[1]);
166 /*
167 * Ensure the variable expression
168 * is valid (correct form).
169 */
170 if (((vp = wcschr(arg, '=')) != NULL) &&
171 isclvar(arg)) {
172 *vp = '\0';
173 strescape(vp+1);
174 strassign(vlook(arg), linebuf,
175 FALLOC|FSENSE,
176 wcslen(linebuf));
177 *vp = '=';
178 } else {
179 (void) fprintf(stderr, gettext(
180 "Invalid form for variable "
181 "assignment: %S\n"), arg);
182 return (1);
183 }
184 --argc;
185 ++argv;
186 continue;
187 }
188
189 default:
190 (void) fprintf(stderr,
191 gettext("Unknown option \"-%S\"\n"), ap);
192 return (usage());
193 }
194 break;
195 }
196 if (save_ptr)
197 free(save_ptr);
198 }
199 if (progfilep == &progfiles[0]) {
200 if (argc < 2)
201 return (usage());
202 filename = "[command line]"; /* BUG: NEEDS TRANSLATION */
203 progptr = mbstowcsdup(argv[1]);
204 proglen = wcslen(progptr);
205 --argc;
206 ++argv;
207 }
208
209 argv[0] = cmd;
210
211 awkarginit(argc, argv);
212
213 /* running = 0; */
214 (void) yyparse();
215
216 lineno = 0;
217 /*
218 * Ok, done parsing, so now activate the rest of the nls stuff, set
219 * the radix character.
220 */
221 (void) setlocale(LC_ALL, "");
222 radixpoint = *localeconv()->decimal_point;
223 awk();
224 /* NOTREACHED */
225 return (0);
226 }
227
228 /*
229 * Do initial setup of buffers, etc.
230 * This must be called before most processing
231 * and especially before lexical analysis.
232 * Variables initialised here will be overruled by command
233 * line parameter initialisation.
234 */
235 static void
awkvarinit()236 awkvarinit()
237 {
238 NODE *np;
239
240 (void) setvbuf(stderr, NULL, _IONBF, 0);
241
242 if ((NIOSTREAM = sysconf(_SC_OPEN_MAX) - 4) <= 0) {
243 (void) fprintf(stderr,
244 gettext("not enough available file descriptors"));
245 exit(1);
246 }
247 ofiles = (OFILE *)emalloc(sizeof (OFILE)*NIOSTREAM);
248 #ifdef A_ZERO_POINTERS
249 (void) memset((wchar_t *)ofiles, 0, sizeof (OFILE) * NIOSTREAM);
250 #else
251 {
252 /* initialize file descriptor table */
253 OFILE *fp;
254 for (fp = ofiles; fp < &ofiles[NIOSTREAM]; fp += 1) {
255 fp->f_fp = FNULL;
256 fp->f_mode = 0;
257 fp->f_name = (char *)0;
258 }
259 }
260 #endif
261 constant = intnode((INT)0);
262
263 const0 = intnode((INT)0);
264 const1 = intnode((INT)1);
265 constundef = emptynode(CONSTANT, 0);
266 constundef->n_flags = FSTRING|FVINT;
267 constundef->n_string = _null;
268 constundef->n_strlen = 0;
269 inc_oper = emptynode(ADD, 0);
270 inc_oper->n_right = const1;
271 asn_oper = emptynode(ADD, 0);
272 field0 = node(FIELD, const0, NNULL);
273
274 {
275 RESFUNC near*rp;
276
277 for (rp = &resfuncs[0]; rp->rf_name != (LOCCHARP)NULL; ++rp) {
278 np = finstall(rp->rf_name, rp->rf_func, rp->rf_type);
279 }
280 }
281 {
282 RESERVED near*rp;
283
284 for (rp = &reserved[0]; rp->r_name != (LOCCHARP)NULL; ++rp) {
285 switch (rp->r_type) {
286 case SVAR:
287 case VAR:
288 running = 1;
289 np = vlook(rp->r_name);
290 if (rp->r_type == SVAR)
291 np->n_flags |= FSPECIAL;
292 if (rp->r_svalue != NULL)
293 strassign(np, rp->r_svalue, FSTATIC,
294 (size_t)rp->r_ivalue);
295 else {
296 constant->n_int = rp->r_ivalue;
297 (void) assign(np, constant);
298 }
299 running = 0;
300 break;
301
302 case KEYWORD:
303 kinstall(rp->r_name, (int)rp->r_ivalue);
304 break;
305 }
306 }
307 }
308
309 varNR = vlook(s_NR);
310 varFNR = vlook(s_FNR);
311 varNF = vlook(s_NF);
312 varOFMT = vlook(s_OFMT);
313 varCONVFMT = vlook(s_CONVFMT);
314 varOFS = vlook(s_OFS);
315 varORS = vlook(s_ORS);
316 varRS = vlook(s_RS);
317 varFS = vlook(s_FS);
318 varARGC = vlook(s_ARGC);
319 varSUBSEP = vlook(s_SUBSEP);
320 varENVIRON = vlook(s_ENVIRON);
321 varFILENAME = vlook(s_FILENAME);
322 varSYMTAB = vlook(s_SYMTAB);
323 incNR = node(ASG, varNR, node(ADD, varNR, const1));
324 incFNR = node(ASG, varFNR, node(ADD, varFNR, const1));
325 clrFNR = node(ASG, varFNR, const0);
326 }
327
328 /*
329 * Initialise awk ARGC, ARGV variables.
330 */
331 static void
awkarginit(int ac,char ** av)332 awkarginit(int ac, char **av)
333 {
334 int i;
335 wchar_t *cp;
336
337 ARGVsubi = node(INDEX, vlook(s_ARGV), constant);
338 running = 1;
339 constant->n_int = ac;
340 (void) assign(varARGC, constant);
341 for (i = 0; i < ac; ++i) {
342 cp = mbstowcsdup(av[i]);
343 constant->n_int = i;
344 strassign(exprreduce(ARGVsubi), cp,
345 FSTATIC|FSENSE, wcslen(cp));
346 }
347 running = 0;
348 }
349
350 /*
351 * Clean up when done parsing a function.
352 * All formal parameters, because of a deal (funparm) in
353 * yylex, get put into the symbol table in front of any
354 * global variable of the same name. When the entire
355 * function is parsed, remove these formal dummy nodes
356 * from the symbol table but retain the nodes because
357 * the generated tree points at them.
358 */
359 void
uexit(NODE * np)360 uexit(NODE *np)
361 {
362 NODE *formal;
363
364 while ((formal = getlist(&np)) != NNULL)
365 delsymtab(formal, 0);
366 }
367
368 /*
369 * The lexical analyzer.
370 */
371 int
yylex()372 yylex()
373 {
374 wint_t c, c1;
375 int i;
376 static int savetoken = 0;
377 static int wasfield;
378 static int isfuncdef;
379 static int nbrace, nparen, nbracket;
380 static struct ctosymstruct {
381 wint_t c, sym;
382 } ctosym[] = {
383 { '|', BAR }, { '^', CARAT },
384 { '~', TILDE }, { '<', LANGLE },
385 { '>', RANGLE }, { '+', PLUSC },
386 { '-', HYPHEN }, { '*', STAR },
387 { '/', SLASH }, { '%', PERCENT },
388 { '!', EXCLAMATION }, { '$', DOLLAR },
389 { '[', LSQUARE }, { ']', RSQUARE },
390 { '(', LPAREN }, { ')', RPAREN },
391 { ';', SEMI }, { '{', LBRACE },
392 { '}', RBRACE }, { 0, 0 }
393 };
394
395 if (savetoken) {
396 c = savetoken;
397 savetoken = 0;
398 } else if (redelim != '\0') {
399 c = redelim;
400 redelim = 0;
401 catterm = 0;
402 savetoken = c;
403 c = lexlast = lexregexp(c);
404 goto out;
405 } else while ((c = lexgetc()) != WEOF) {
406 if (iswalpha(c) || c == '_') {
407 c = lexid(c);
408 } else if (iswdigit(c) || c == '.') {
409 c = lexnumber(c);
410 } else if (isWblank(c)) {
411 continue;
412 } else switch (c) {
413 #if DOS || OS2
414 case 032: /* ^Z */
415 continue;
416 #endif
417
418 case '"':
419 c = lexstring(c);
420 break;
421
422 case '#':
423 while ((c = lexgetc()) != '\n' && c != WEOF)
424 ;
425 lexungetc(c);
426 continue;
427
428 case '+':
429 if ((c1 = lexgetc()) == '+')
430 c = INC;
431 else if (c1 == '=')
432 c = AADD;
433 else
434 lexungetc(c1);
435 break;
436
437 case '-':
438 if ((c1 = lexgetc()) == '-')
439 c = DEC;
440 else if (c1 == '=')
441 c = ASUB;
442 else
443 lexungetc(c1);
444 break;
445
446 case '*':
447 if ((c1 = lexgetc()) == '=')
448 c = AMUL;
449 else if (c1 == '*') {
450 if ((c1 = lexgetc()) == '=')
451 c = AEXP;
452 else {
453 c = EXP;
454 lexungetc(c1);
455 }
456 } else
457 lexungetc(c1);
458 break;
459
460 case '^':
461 if ((c1 = lexgetc()) == '=') {
462 c = AEXP;
463 } else {
464 c = EXP;
465 lexungetc(c1);
466 }
467 break;
468
469 case '/':
470 if ((c1 = lexgetc()) == '=' &&
471 lexlast != RE && lexlast != NRE &&
472 lexlast != ';' && lexlast != '\n' &&
473 lexlast != ',' && lexlast != '(')
474 c = ADIV;
475 else
476 lexungetc(c1);
477 break;
478
479 case '%':
480 if ((c1 = lexgetc()) == '=')
481 c = AREM;
482 else
483 lexungetc(c1);
484 break;
485
486 case '&':
487 if ((c1 = lexgetc()) == '&')
488 c = AND;
489 else
490 lexungetc(c1);
491 break;
492
493 case '|':
494 if ((c1 = lexgetc()) == '|')
495 c = OR;
496 else {
497 lexungetc(c1);
498 if (inprint)
499 c = PIPE;
500 }
501 break;
502
503 case '>':
504 if ((c1 = lexgetc()) == '=')
505 c = GE;
506 else if (c1 == '>')
507 c = APPEND;
508 else {
509 lexungetc(c1);
510 if (nparen == 0 && inprint)
511 c = WRITE;
512 }
513 break;
514
515 case '<':
516 if ((c1 = lexgetc()) == '=')
517 c = LE;
518 else
519 lexungetc(c1);
520 break;
521
522 case '!':
523 if ((c1 = lexgetc()) == '=')
524 c = NE;
525 else if (c1 == '~')
526 c = NRE;
527 else
528 lexungetc(c1);
529 break;
530
531 case '=':
532 if ((c1 = lexgetc()) == '=')
533 c = EQ;
534 else {
535 lexungetc(c1);
536 c = ASG;
537 }
538 break;
539
540 case '\n':
541 switch (lexlast) {
542 case ')':
543 if (catterm || inprint) {
544 c = ';';
545 break;
546 }
547 /* FALLTHROUGH */
548 case AND:
549 case OR:
550 case COMMA:
551 case '{':
552 case ELSE:
553 case ';':
554 case DO:
555 continue;
556
557 case '}':
558 if (nbrace != 0)
559 continue;
560 /* FALLTHROUGH */
561
562 default:
563 c = ';';
564 break;
565 }
566 break;
567
568 case ELSE:
569 if (lexlast != ';') {
570 savetoken = ELSE;
571 c = ';';
572 }
573 break;
574
575 case '(':
576 ++nparen;
577 break;
578
579 case ')':
580 if (--nparen < 0)
581 awkerr(unbal, "()");
582 break;
583
584 case '{':
585 nbrace++;
586 break;
587
588 case '}':
589 if (--nbrace < 0) {
590 char brk[3];
591
592 brk[0] = '{';
593 brk[1] = '}';
594 brk[2] = '\0';
595 awkerr(unbal, brk);
596 }
597 if (lexlast != ';') {
598 savetoken = c;
599 c = ';';
600 }
601 break;
602
603 case '[':
604 ++nbracket;
605 break;
606
607 case ']':
608 if (--nbracket < 0) {
609 char brk[3];
610
611 brk[0] = '[';
612 brk[1] = ']';
613 brk[2] = '\0';
614 awkerr(unbal, brk);
615 }
616 break;
617
618 case '\\':
619 if ((c1 = lexgetc()) == '\n')
620 continue;
621 lexungetc(c1);
622 break;
623
624 case ',':
625 c = COMMA;
626 break;
627
628 case '?':
629 c = QUEST;
630 break;
631
632 case ':':
633 c = COLON;
634 break;
635
636 default:
637 if (!iswprint(c))
638 awkerr(
639 gettext("invalid character \"%s\""),
640 toprint(c));
641 break;
642 }
643 break;
644 }
645
646 switch (c) {
647 case ']':
648 ++catterm;
649 break;
650
651 case VAR:
652 if (catterm) {
653 savetoken = c;
654 c = CONCAT;
655 catterm = 0;
656 } else if (!isfuncdef) {
657 if ((c1 = lexgetc()) != '(')
658 ++catterm;
659 lexungetc(c1);
660 }
661 isfuncdef = 0;
662 break;
663
664 case PARM:
665 case CONSTANT:
666 if (catterm) {
667 savetoken = c;
668 c = CONCAT;
669 catterm = 0;
670 } else {
671 if (lexlast == '$')
672 wasfield = 2;
673 ++catterm;
674 }
675 break;
676
677 case INC:
678 case DEC:
679 if (!catterm || lexlast != CONSTANT || wasfield)
680 break;
681
682 /* FALLTHROUGH */
683 case UFUNC:
684 case FUNC:
685 case GETLINE:
686 case '!':
687 case '$':
688 case '(':
689 if (catterm) {
690 savetoken = c;
691 c = CONCAT;
692 catterm = 0;
693 }
694 break;
695
696 case '}':
697 if (nbrace == 0)
698 savetoken = ';';
699 /* FALLTHROUGH */
700 case ';':
701 inprint = 0;
702 /* FALLTHROUGH */
703 default:
704 if (c == DEFFUNC)
705 isfuncdef = 1;
706 catterm = 0;
707 }
708 lexlast = c;
709 if (wasfield)
710 wasfield--;
711 /*
712 * Map character constants to symbolic names.
713 */
714 for (i = 0; ctosym[i].c != 0; i++)
715 if (c == ctosym[i].c) {
716 c = ctosym[i].sym;
717 break;
718 }
719 out:
720 #ifdef DEBUG
721 if (dflag)
722 (void) printf("%d\n", (int)c);
723 #endif
724 return ((int)c);
725 }
726
727 /*
728 * Read a number for the lexical analyzer.
729 * Input is the first character of the number.
730 * Return value is the lexical type.
731 */
732 static int
lexnumber(wint_t c)733 lexnumber(wint_t c)
734 {
735 wchar_t *cp;
736 int dotfound = 0;
737 int efound = 0;
738 INT number;
739
740 cp = linebuf;
741 do {
742 if (iswdigit(c))
743 ;
744 else if (c == '.') {
745 if (dotfound++)
746 break;
747 } else if (c == 'e' || c == 'E') {
748 if ((c = lexgetc()) != '-' && c != '+') {
749 lexungetc(c);
750 c = 'e';
751 } else
752 *cp++ = 'e';
753 if (efound++)
754 break;
755 } else
756 break;
757 *cp++ = c;
758 } while ((c = lexgetc()) != WEOF);
759 *cp = '\0';
760 if (dotfound && cp == linebuf+1)
761 return (DOT);
762 lexungetc(c);
763 errno = 0;
764 if (!dotfound && !efound &&
765 ((number = wcstol(linebuf, (wchar_t **)0, 10)), errno != ERANGE))
766 yylval.node = intnode(number);
767 else
768 yylval.node = realnode((REAL)wcstod(linebuf, (wchar_t **)0));
769 return (CONSTANT);
770 }
771
772 /*
773 * Read an identifier.
774 * Input is first character of identifier.
775 * Return VAR.
776 */
777 static int
lexid(wint_t c)778 lexid(wint_t c)
779 {
780 wchar_t *cp;
781 size_t i;
782 NODE *np;
783
784 cp = linebuf;
785 do {
786 *cp++ = c;
787 c = lexgetc();
788 } while (iswalpha(c) || iswdigit(c) || c == '_');
789 *cp = '\0';
790 lexungetc(c);
791 yylval.node = np = vlook(linebuf);
792
793 switch (np->n_type) {
794 case KEYWORD:
795 switch (np->n_keywtype) {
796 case PRINT:
797 case PRINTF:
798 ++inprint;
799 /* FALLTHROUGH */
800 default:
801 return ((int)np->n_keywtype);
802 }
803 /* NOTREACHED */
804
805 case ARRAY:
806 case VAR:
807 /*
808 * If reading the argument list, create a dummy node
809 * for the duration of that function. These variables
810 * can be removed from the symbol table at function end
811 * but they must still exist because the execution tree
812 * knows about them.
813 */
814 if (funparm) {
815 do_funparm:
816 np = emptynode(PARM, i = (cp-linebuf));
817 np->n_flags = FSTRING;
818 np->n_string = _null;
819 np->n_strlen = 0;
820 (void) memcpy(np->n_name, linebuf,
821 (i+1) * sizeof (wchar_t));
822 addsymtab(np);
823 yylval.node = np;
824 } else if (np == varNF || (np == varFS &&
825 (!doing_begin || begin_getline))) {
826 /*
827 * If the user program references NF or sets
828 * FS either outside of a begin block or
829 * in a begin block after a getline then the
830 * input line will be split immediately upon read
831 * rather than when a field is first referenced.
832 */
833 needsplit = 1;
834 } else if (np == varENVIRON)
835 needenviron = 1;
836 /* FALLTHROUGH */
837 case PARM:
838 return (VAR);
839
840 case UFUNC:
841 /*
842 * It is ok to redefine functions as parameters
843 */
844 if (funparm) goto do_funparm;
845 /* FALLTHROUGH */
846 case FUNC:
847 case GETLINE:
848 /*
849 * When a getline is encountered, clear the 'doing_begin' flag.
850 * This will force the 'needsplit' flag to be set, even inside
851 * a begin block, if FS is altered. (See VAR case above)
852 */
853 if (doing_begin)
854 begin_getline = 1;
855 return (np->n_type);
856 }
857 /* NOTREACHED */
858 return (0);
859 }
860
861 /*
862 * Read a string for the lexical analyzer.
863 * `endc' terminates the string.
864 */
865 static int
lexstring(wint_t endc)866 lexstring(wint_t endc)
867 {
868 size_t length = lexescape(endc, 0, 0);
869
870 yylval.node = stringnode(linebuf, FALLOC, length);
871 return (CONSTANT);
872 }
873
874 /*
875 * Read a regular expression.
876 */
877 static int
lexregexp(wint_t endc)878 lexregexp(wint_t endc)
879 {
880 (void) lexescape(endc, 1, 0);
881 yylval.node = renode(linebuf);
882 return (URE);
883 }
884
885 /*
886 * Process a string, converting the escape characters as required by
887 * 1003.2. The processed string ends up in the global linebuf[]. This
888 * routine also changes the value of 'progfd' - the program file
889 * descriptor, so it should be used with some care. It is presently used to
890 * process -v (awk1.c) and var=str type arguments (awk2.c, nextrecord()).
891 */
892 void
strescape(wchar_t * str)893 strescape(wchar_t *str)
894 {
895 progptr = str;
896 proglen = wcslen(str) + 1; /* Include \0 */
897 (void) lexescape('\0', 0, 1);
898 progptr = NULL;
899 }
900
901 /*
902 * Read a string or regular expression, terminated by ``endc'',
903 * for lexical analyzer, processing escape sequences.
904 * Return string length.
905 */
906 static size_t
lexescape(wint_t endc,int regx,int cmd_line_operand)907 lexescape(wint_t endc, int regx, int cmd_line_operand)
908 {
909 static char nlre[256];
910 static char nlstr[256];
911 static char eofre[256];
912 static char eofstr[256];
913 int first_time = 1;
914 wint_t c;
915 wchar_t *cp;
916 int n, max;
917
918 if (first_time == 1) {
919 (void) strcpy(nlre, gettext("Newline in regular expression\n"));
920 (void) strcpy(nlstr, gettext("Newline in string\n"));
921 (void) strcpy(eofre, gettext("EOF in regular expression\n"));
922 (void) strcpy(eofstr, gettext("EOF in string\n"));
923 first_time = 0;
924 }
925
926 cp = linebuf;
927 while ((c = lexgetc()) != endc) {
928 if (c == '\n')
929 awkerr(regx ? nlre : nlstr);
930 if (c == '\\') {
931 switch (c = lexgetc(), c) {
932 case '\\':
933 if (regx)
934 *cp++ = '\\';
935 break;
936
937 case '/':
938 c = '/';
939 break;
940
941 case 'n':
942 c = '\n';
943 break;
944
945 case 'b':
946 c = '\b';
947 break;
948
949 case 't':
950 c = '\t';
951 break;
952
953 case 'r':
954 c = '\r';
955 break;
956
957 case 'f':
958 c = '\f';
959 break;
960
961 case 'v':
962 c = '\v';
963 break;
964
965 case 'a':
966 c = (char)0x07;
967 break;
968
969 case 'x':
970 n = 0;
971 while (iswxdigit(c = lexgetc())) {
972 if (iswdigit(c))
973 c -= '0';
974 else if (iswupper(c))
975 c -= 'A'-10;
976 else
977 c -= 'a'-10;
978 n = (n<<4) + c;
979 }
980 lexungetc(c);
981 c = n;
982 break;
983
984 case '0':
985 case '1':
986 case '2':
987 case '3':
988 case '4':
989 case '5':
990 case '6':
991 case '7':
992 #if 0
993 /*
994 * Posix.2 draft 10 disallows the use of back-referencing - it explicitly
995 * requires processing of the octal escapes both in strings and
996 * regular expressions. The following code is disabled instead of
997 * removed as back-referencing may be reintroduced in a future draft
998 * of the standard.
999 */
1000 /*
1001 * For regular expressions, we disallow
1002 * \ooo to mean octal character, in favour
1003 * of back referencing.
1004 */
1005 if (regx) {
1006 *cp++ = '\\';
1007 break;
1008 }
1009 #endif
1010 max = 3;
1011 n = 0;
1012 do {
1013 n = (n<<3) + c-'0';
1014 if ((c = lexgetc()) > '7' || c < '0')
1015 break;
1016 } while (--max);
1017 lexungetc(c);
1018 /*
1019 * an octal escape sequence must have at least
1020 * 2 digits after the backslash, otherwise
1021 * it gets passed straight thru for possible
1022 * use in backreferencing.
1023 */
1024 if (max == 3) {
1025 *cp++ = '\\';
1026 n += '0';
1027 }
1028 c = n;
1029 break;
1030
1031 case '\n':
1032 continue;
1033
1034 default:
1035 if (c != endc || cmd_line_operand) {
1036 *cp++ = '\\';
1037 if (c == endc)
1038 lexungetc(c);
1039 }
1040 }
1041 }
1042 if (c == WEOF)
1043 awkerr(regx ? eofre : eofstr);
1044 *cp++ = c;
1045 }
1046 *cp = '\0';
1047 return (cp - linebuf);
1048 }
1049
1050 /*
1051 * Build a regular expression NODE.
1052 * Argument is the string holding the expression.
1053 */
1054 NODE *
renode(wchar_t * s)1055 renode(wchar_t *s)
1056 {
1057 NODE *np;
1058 int n;
1059
1060 np = emptynode(RE, 0);
1061 np->n_left = np->n_right = NNULL;
1062 if ((n = REGWCOMP(&np->n_regexp, s)) != REG_OK) {
1063 int m;
1064 char *p;
1065
1066 m = REGWERROR(n, np->n_regexp, NULL, 0);
1067 p = (char *)emalloc(m);
1068 REGWERROR(n, np->n_regexp, p, m);
1069 awkerr("/%S/: %s", s, p);
1070 }
1071 return (np);
1072 }
1073 /*
1074 * Get a character for the lexical analyser routine.
1075 */
1076 static wint_t
lexgetc()1077 lexgetc()
1078 {
1079 wint_t c;
1080 static char **files = &progfiles[0];
1081
1082 if (progfp != FNULL && (c = fgetwc(progfp)) != WEOF)
1083 ;
1084 else {
1085 if (progptr != NULL) {
1086 if (proglen-- <= 0)
1087 c = WEOF;
1088 else
1089 c = *progptr++;
1090 } else {
1091 if (progfp != FNULL) {
1092 if (progfp != stdin)
1093 (void) fclose(progfp);
1094 else
1095 clearerr(progfp);
1096 progfp = FNULL;
1097 }
1098 if (files < progfilep) {
1099 filename = *files++;
1100 lineno = 1;
1101 if (filename[0] == '-' && filename[1] == '\0')
1102 progfp = stdin;
1103 else if ((progfp = fopen(filename, r))
1104 == FNULL) {
1105 (void) fprintf(stderr,
1106 gettext("script file \"%s\""), filename);
1107 exit(1);
1108 }
1109 c = fgetwc(progfp);
1110 }
1111 }
1112 }
1113 if (c == '\n')
1114 ++lineno;
1115 if (conptr >= &context[NCONTEXT])
1116 conptr = &context[0];
1117 if (c != WEOF)
1118 *conptr++ = c;
1119 return (c);
1120 }
1121
1122 /*
1123 * Return a character for lexical analyser.
1124 * Only one returned character is (not enforced) legitimite.
1125 */
1126 static void
lexungetc(wint_t c)1127 lexungetc(wint_t c)
1128 {
1129 if (c == '\n')
1130 --lineno;
1131 if (c != WEOF) {
1132 if (conptr == &context[0])
1133 conptr = &context[NCONTEXT];
1134 *--conptr = '\0';
1135 }
1136 if (progfp != FNULL) {
1137 (void) ungetwc(c, progfp);
1138 return;
1139 }
1140 if (c == WEOF)
1141 return;
1142 *--progptr = c;
1143 proglen++;
1144 }
1145
1146 /*
1147 * Syntax errors during parsing.
1148 */
1149 int
yyerror(const char * s,...)1150 yyerror(const char *s, ...)
1151 {
1152 if (lexlast == FUNC || lexlast == GETLINE || lexlast == KEYWORD)
1153 if (lexlast == KEYWORD)
1154 awkerr(gettext("inadmissible use of reserved keyword"));
1155 else
1156 awkerr(gettext("attempt to redefine builtin function"));
1157 awkerr(s);
1158 return (0);
1159 }
1160
1161 /*
1162 * Error routine for all awk errors.
1163 */
1164 void
awkerr(const char * fmt,...)1165 awkerr(const char *fmt, ...)
1166 {
1167 va_list args;
1168
1169 va_start(args, fmt);
1170 awkierr(0, fmt, args);
1171 va_end(args);
1172 }
1173
1174 /*
1175 * Error routine like "awkerr" except that it prints out
1176 * a message that includes an errno-specific indication.
1177 */
1178 void
awkperr(const char * fmt,...)1179 awkperr(const char *fmt, ...)
1180 {
1181 va_list args;
1182
1183 va_start(args, fmt);
1184 awkierr(1, fmt, args);
1185 va_end(args);
1186 }
1187
1188 /*
1189 * Common internal routine for awkerr, awkperr
1190 */
1191 static void
awkierr(int perr,const char * fmt,va_list ap)1192 awkierr(int perr, const char *fmt, va_list ap)
1193 {
1194 static char sep1[] = "\n>>>\t";
1195 static char sep2[] = "\t<<<";
1196 int saveerr = errno;
1197
1198 (void) fprintf(stderr, "%s: ", _cmdname);
1199 if (running) {
1200 (void) fprintf(stderr, gettext("line %u ("),
1201 curnode == NNULL ? 0 : curnode->n_lineno);
1202 if (phase == 0)
1203 (void) fprintf(stderr, "NR=%lld): ",
1204 (INT)exprint(varNR));
1205 else
1206 (void) fprintf(stderr, "%s): ",
1207 phase == BEGIN ? s_BEGIN : s_END);
1208 } else if (lineno != 0) {
1209 (void) fprintf(stderr, gettext("file \"%s\": "), filename);
1210 (void) fprintf(stderr, gettext("line %u: "), lineno);
1211 }
1212 (void) vfprintf(stderr, gettext(fmt), ap);
1213 if (perr == 1)
1214 (void) fprintf(stderr, ": %s", strerror(saveerr));
1215 if (perr != 2 && !running) {
1216 wchar_t *cp;
1217 int n;
1218 int c;
1219
1220 (void) fprintf(stderr, gettext(" Context is:%s"), sep1);
1221 cp = conptr;
1222 n = NCONTEXT;
1223 do {
1224 if (cp >= &context[NCONTEXT])
1225 cp = &context[0];
1226 if ((c = *cp++) != '\0')
1227 (void) fputs(c == '\n' ? sep1 : toprint(c),
1228 stderr);
1229 } while (--n != 0);
1230 (void) fputs(sep2, stderr);
1231 }
1232 (void) fprintf(stderr, "\n");
1233 exit(1);
1234 }
1235
1236 wchar_t *
emalloc(unsigned n)1237 emalloc(unsigned n)
1238 {
1239 wchar_t *cp;
1240
1241 if ((cp = malloc(n)) == NULL)
1242 awkerr(nomem);
1243 return (cp);
1244 }
1245
1246 wchar_t *
erealloc(wchar_t * p,unsigned n)1247 erealloc(wchar_t *p, unsigned n)
1248 {
1249 wchar_t *cp;
1250
1251 if ((cp = realloc(p, n)) == NULL)
1252 awkerr(nomem);
1253 return (cp);
1254 }
1255
1256
1257 /*
1258 * usage message for awk
1259 */
1260 static int
usage()1261 usage()
1262 {
1263 (void) fprintf(stderr, gettext(
1264 "Usage: awk [-F ERE] [-v var=val] 'program' [var=val ...] [file ...]\n"
1265 " awk [-F ERE] -f progfile ... [-v var=val] [var=val ...] [file ...]\n"));
1266 return (2);
1267 }
1268
1269
1270 static wchar_t *
mbconvert(char * str)1271 mbconvert(char *str)
1272 {
1273 static wchar_t *op = 0;
1274
1275 if (op != 0)
1276 free(op);
1277 return (op = mbstowcsdup(str));
1278 }
1279
1280 char *
mbunconvert(wchar_t * str)1281 mbunconvert(wchar_t *str)
1282 {
1283 static char *op = 0;
1284
1285 if (op != 0)
1286 free(op);
1287 return (op = wcstombsdup(str));
1288 }
1289
1290 /*
1291 * Solaris port - following functions are typical MKS functions written
1292 * to work for Solaris.
1293 */
1294
1295 wchar_t *
mbstowcsdup(char * s)1296 mbstowcsdup(char *s)
1297 {
1298 int n;
1299 wchar_t *w;
1300
1301 n = strlen(s) + 1;
1302 if ((w = (wchar_t *)malloc(n * sizeof (wchar_t))) == NULL)
1303 return (NULL);
1304
1305 if (mbstowcs(w, s, n) == (size_t)-1)
1306 return (NULL);
1307 return (w);
1308
1309 }
1310
1311 char *
wcstombsdup(wchar_t * w)1312 wcstombsdup(wchar_t *w)
1313 {
1314 int n;
1315 char *mb;
1316
1317 /* Fetch memory for worst case string length */
1318 n = wslen(w) + 1;
1319 n *= MB_CUR_MAX;
1320 if ((mb = (char *)malloc(n)) == NULL) {
1321 return (NULL);
1322 }
1323
1324 /* Convert the string */
1325 if ((n = wcstombs(mb, w, n)) == -1) {
1326 int saverr = errno;
1327
1328 free(mb);
1329 errno = saverr;
1330 return (0);
1331 }
1332
1333 /* Shrink the string down */
1334 if ((mb = (char *)realloc(mb, strlen(mb)+1)) == NULL) {
1335 return (NULL);
1336 }
1337 return (mb);
1338 }
1339
1340 /*
1341 * The upe_ctrls[] table contains the printable 'control-sequences' for the
1342 * character values 0..31 and 127. The first entry is for value 127, thus the
1343 * entries for the remaining character values are from 1..32.
1344 */
1345 static const char *const upe_ctrls[] =
1346 {
1347 "^?",
1348 "^@", "^A", "^B", "^C", "^D", "^E", "^F", "^G",
1349 "^H", "^I", "^J", "^K", "^L", "^M", "^N", "^O",
1350 "^P", "^Q", "^R", "^S", "^T", "^U", "^V", "^W",
1351 "^X", "^Y", "^Z", "^[", "^\\", "^]", "^^", "^_"
1352 };
1353
1354
1355 /*
1356 * Return a printable string corresponding to the given character value. If
1357 * the character is printable, simply return it as the string. If it is in
1358 * the range specified by table 5-101 in the UPE, return the corresponding
1359 * string. Otherwise, return an octal escape sequence.
1360 */
1361 static const char *
toprint(wchar_t c)1362 toprint(wchar_t c)
1363 {
1364 int n, len;
1365 unsigned char *ptr;
1366 static char mbch[MB_LEN_MAX+1];
1367 static char buf[5 * MB_LEN_MAX + 1];
1368
1369 if ((n = wctomb(mbch, c)) == -1) {
1370 /* Should never happen */
1371 (void) sprintf(buf, "\\%x", c);
1372 return (buf);
1373 }
1374 mbch[n] = '\0';
1375 if (iswprint(c)) {
1376 return (mbch);
1377 } else if (c == 127) {
1378 return (upe_ctrls[0]);
1379 } else if (c < 32) {
1380 /* Print as in Table 5-101 in the UPE */
1381 return (upe_ctrls[c+1]);
1382 } else {
1383 /* Print as an octal escape sequence */
1384 for (len = 0, ptr = (unsigned char *) mbch; 0 < n; --n, ++ptr)
1385 len += sprintf(buf+len, "\\%03o", *ptr);
1386 }
1387 return (buf);
1388 }
1389
1390 static int
wcoff(const wchar_t * astring,const int off)1391 wcoff(const wchar_t *astring, const int off)
1392 {
1393 const wchar_t *s = astring;
1394 int c = 0;
1395 char mb[MB_LEN_MAX];
1396
1397 while (c < off) {
1398 int n;
1399 if ((n = wctomb(mb, *s)) == 0)
1400 break;
1401 if (n == -1)
1402 n = 1;
1403 c += n;
1404 s++;
1405 }
1406
1407 return (s - astring);
1408 }
1409
1410 #define NREGHASH 64
1411 #define NREGHOLD 1024 /* max number unused entries */
1412
1413 static int nregunref;
1414
1415 struct reghashq {
1416 struct qelem hq;
1417 struct regcache *regcachep;
1418 };
1419
1420 struct regcache {
1421 struct qelem lq;
1422 wchar_t *pattern;
1423 regex_t re;
1424 int refcnt;
1425 struct reghashq hash;
1426 };
1427
1428 static struct qelem reghash[NREGHASH], reglink;
1429
1430 /*
1431 * Generate a hash value of the given wchar string.
1432 * The hashing method is similar to what Java does for strings.
1433 */
1434 static uint_t
regtxthash(const wchar_t * str)1435 regtxthash(const wchar_t *str)
1436 {
1437 int k = 0;
1438
1439 while (*str != L'\0')
1440 k = (31 * k) + *str++;
1441
1442 k += ~(k << 9);
1443 k ^= (k >> 14);
1444 k += (k << 4);
1445 k ^= (k >> 10);
1446
1447 return (k % NREGHASH);
1448 }
1449
1450 int
int_regwcomp(REGEXP * r,const wchar_t * pattern)1451 int_regwcomp(REGEXP *r, const wchar_t *pattern)
1452 {
1453 regex_t re;
1454 char *mbpattern;
1455 int ret;
1456 uint_t key;
1457 struct qelem *qp;
1458 struct regcache *rcp;
1459
1460 key = regtxthash(pattern);
1461 for (qp = reghash[key].q_forw; qp != NULL; qp = qp->q_forw) {
1462 rcp = ((struct reghashq *)qp)->regcachep;
1463 if (*rcp->pattern == *pattern &&
1464 wcscmp(rcp->pattern, pattern) == 0)
1465 break;
1466 }
1467 if (qp != NULL) {
1468 /* update link. put this one at the beginning */
1469 if (rcp != (struct regcache *)reglink.q_forw) {
1470 remque(&rcp->lq);
1471 insque(&rcp->lq, ®link);
1472 }
1473 if (rcp->refcnt == 0)
1474 nregunref--; /* no longer unref'ed */
1475 rcp->refcnt++;
1476 *(struct regcache **)r = rcp;
1477 return (REG_OK);
1478 }
1479
1480 if ((mbpattern = wcstombsdup((wchar_t *)pattern)) == NULL)
1481 return (REG_ESPACE);
1482
1483 ret = regcomp(&re, mbpattern, REG_EXTENDED);
1484
1485 free(mbpattern);
1486
1487 if (ret != REG_OK)
1488 return (ret);
1489
1490 if ((rcp = malloc(sizeof (struct regcache))) == NULL)
1491 return (REG_ESPACE);
1492 rcp->re = re;
1493 if ((rcp->pattern = wsdup(pattern)) == NULL) {
1494 regfree(&re);
1495 free(rcp);
1496 return (REG_ESPACE);
1497 }
1498 rcp->refcnt = 1;
1499 insque(&rcp->lq, ®link);
1500 insque(&rcp->hash.hq, ®hash[key]);
1501 rcp->hash.regcachep = rcp;
1502
1503 *(struct regcache **)r = rcp;
1504 return (ret);
1505 }
1506
1507 void
int_regwfree(REGEXP r)1508 int_regwfree(REGEXP r)
1509 {
1510 int cnt;
1511 struct qelem *qp, *nqp;
1512 struct regcache *rcp;
1513
1514 rcp = (struct regcache *)r;
1515
1516 if (--rcp->refcnt != 0)
1517 return;
1518
1519 /* this cache has no reference */
1520 if (++nregunref < NREGHOLD)
1521 return;
1522
1523 /*
1524 * We've got too much unref'ed regex. Free half of least
1525 * used regex.
1526 */
1527 cnt = 0;
1528 for (qp = reglink.q_forw; qp != NULL; qp = nqp) {
1529 nqp = qp->q_forw;
1530 rcp = (struct regcache *)qp;
1531 if (rcp->refcnt != 0)
1532 continue;
1533
1534 /* free half of them */
1535 if (++cnt < (NREGHOLD / 2))
1536 continue;
1537
1538 /* detach and free */
1539 remque(&rcp->lq);
1540 remque(&rcp->hash.hq);
1541
1542 /* free up */
1543 free(rcp->pattern);
1544 regfree(&rcp->re);
1545 free(rcp);
1546
1547 nregunref--;
1548 }
1549 }
1550
1551 size_t
int_regwerror(int errcode,REGEXP r,char * errbuf,size_t bufsiz)1552 int_regwerror(int errcode, REGEXP r, char *errbuf, size_t bufsiz)
1553 {
1554 struct regcache *rcp;
1555
1556 rcp = (struct regcache *)r;
1557 return (regerror(errcode, &rcp->re, errbuf, bufsiz));
1558 }
1559
1560 int
int_regwexec(REGEXP r,const wchar_t * astring,size_t nsub,int_regwmatch_t * sub,int flags)1561 int_regwexec(REGEXP r, /* compiled RE */
1562 const wchar_t *astring, /* subject string */
1563 size_t nsub, /* number of subexpressions */
1564 int_regwmatch_t *sub, /* subexpression pointers */
1565 int flags)
1566 {
1567 char *mbs;
1568 regmatch_t *mbsub = NULL;
1569 int i;
1570 struct regcache *rcp;
1571
1572 if ((mbs = wcstombsdup((wchar_t *)astring)) == NULL)
1573 return (REG_ESPACE);
1574
1575 if (nsub > 0 && sub) {
1576 if ((mbsub = malloc(nsub * sizeof (regmatch_t))) == NULL)
1577 return (REG_ESPACE);
1578 }
1579
1580 rcp = (struct regcache *)r;
1581
1582 i = regexec(&rcp->re, mbs, nsub, mbsub, flags);
1583
1584 /* Now, adjust the pointers/counts in sub */
1585 if (i == REG_OK && nsub > 0 && mbsub) {
1586 int j, k;
1587
1588 for (j = 0; j < nsub; j++) {
1589 regmatch_t *ms = &mbsub[j];
1590 int_regwmatch_t *ws = &sub[j];
1591
1592 if ((k = ms->rm_so) >= 0) {
1593 ws->rm_so = wcoff(astring, k);
1594 ws->rm_sp = astring + ws->rm_so;
1595 }
1596 if ((k = ms->rm_eo) >= 0) {
1597 ws->rm_eo = wcoff(astring, k);
1598 ws->rm_ep = astring + ws->rm_eo;
1599 }
1600 }
1601 }
1602
1603 free(mbs);
1604 if (mbsub)
1605 free(mbsub);
1606 return (i);
1607 }
1608
1609 int
int_regwdosuba(REGEXP rp,const wchar_t * rpl,const wchar_t * src,wchar_t ** dstp,int len,int * globp)1610 int_regwdosuba(REGEXP rp, /* compiled RE: Pattern */
1611 const wchar_t *rpl, /* replacement string: /rpl/ */
1612 const wchar_t *src, /* source string */
1613 wchar_t **dstp, /* destination string */
1614 int len, /* destination length */
1615 int *globp) /* IN: occurence, 0 for all; OUT: substitutions */
1616 {
1617 wchar_t *dst, *odst;
1618 const wchar_t *ip, *xp;
1619 wchar_t *op;
1620 int i;
1621 wchar_t c;
1622 int glob, iglob = *globp, oglob = 0;
1623 #define NSUB 10
1624 int_regwmatch_t rm[NSUB], *rmp;
1625 int flags;
1626 wchar_t *end;
1627 int regerr;
1628
1629 /* handle overflow of dst. we need "i" more bytes */
1630 #ifdef OVERFLOW
1631 #undef OVERFLOW
1632 #define OVERFLOW(i) { \
1633 int pos = op - dst; \
1634 dst = (wchar_t *)realloc(odst = dst, \
1635 (len += len + i) * sizeof (wchar_t)); \
1636 if (dst == NULL) \
1637 goto nospace; \
1638 op = dst + pos; \
1639 end = dst + len; \
1640 }
1641 #endif
1642
1643 *dstp = dst = (wchar_t *)malloc(len * sizeof (wchar_t));
1644 if (dst == NULL)
1645 return (REG_ESPACE);
1646
1647 if (rp == NULL || rpl == NULL || src == NULL || dst == NULL)
1648 return (REG_EFATAL);
1649
1650 glob = 0; /* match count */
1651 ip = src; /* source position */
1652 op = dst; /* destination position */
1653 end = dst + len;
1654
1655 flags = 0;
1656 while ((regerr = int_regwexec(rp, ip, NSUB, rm, flags)) == REG_OK) {
1657 /* Copy text preceding match */
1658 if (op + (i = rm[0].rm_sp - ip) >= end)
1659 OVERFLOW(i)
1660 while (i--)
1661 *op++ = *ip++;
1662
1663 if (iglob == 0 || ++glob == iglob) {
1664 oglob++;
1665 xp = rpl; /* do substitute */
1666 } else
1667 xp = L"&"; /* preserve text */
1668
1669 /* Perform replacement of matched substing */
1670 while ((c = *xp++) != '\0') {
1671 rmp = NULL;
1672 if (c == '&')
1673 rmp = &rm[0];
1674 else if (c == '\\') {
1675 if ('0' <= *xp && *xp <= '9')
1676 rmp = &rm[*xp++ - '0'];
1677 else if (*xp != '\0')
1678 c = *xp++;
1679 }
1680
1681 if (rmp == NULL) { /* Ordinary character. */
1682 *op++ = c;
1683 if (op >= end)
1684 OVERFLOW(1)
1685 } else if (rmp->rm_sp != NULL && rmp->rm_ep != NULL) {
1686 ip = rmp->rm_sp;
1687 if (op + (i = rmp->rm_ep - rmp->rm_sp) >= end)
1688 OVERFLOW(i)
1689 while (i--)
1690 *op++ = *ip++;
1691 }
1692 }
1693
1694 ip = rm[0].rm_ep;
1695 if (*ip == '\0') /* If at end break */
1696 break;
1697 else if (rm[0].rm_sp == rm[0].rm_ep) {
1698 /* If empty match copy next char */
1699 *op++ = *ip++;
1700 if (op >= end)
1701 OVERFLOW(1)
1702 }
1703 flags = REG_NOTBOL;
1704 }
1705
1706 if (regerr != REG_OK && regerr != REG_NOMATCH)
1707 return (regerr);
1708
1709 /* Copy rest of text */
1710 if (op + (i = wcslen(ip)) >= end)
1711 OVERFLOW(i)
1712 while (i--)
1713 *op++ = *ip++;
1714 *op++ = '\0';
1715
1716 if ((*dstp = dst = (wchar_t *)realloc(odst = dst,
1717 sizeof (wchar_t) * (size_t)(op - dst))) == NULL) {
1718 nospace:
1719 free(odst);
1720 return (REG_ESPACE);
1721 }
1722
1723 *globp = oglob;
1724
1725 return ((oglob == 0) ? REG_NOMATCH : REG_OK);
1726 }
1727