1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * Copyright 1986, 1994 by Mortice Kern Systems Inc. All rights reserved.
28 */
29
30 #pragma ident "%Z%%M% %I% %E% SMI"
31
32 /*
33 * awk -- mainline, yylex, etc.
34 *
35 * Based on MKS awk(1) ported to be /usr/xpg4/bin/awk with POSIX/XCU4 changes
36 */
37
38 #include "awk.h"
39 #include "y.tab.h"
40 #include <stdarg.h>
41 #include <unistd.h>
42 #include <locale.h>
43 #include <search.h>
44
45 static char *progfiles[NPFILE]; /* Programmes files for yylex */
46 static char **progfilep = &progfiles[0]; /* Pointer to last file */
47 static wchar_t *progptr; /* In-memory programme */
48 static int proglen; /* Length of progptr */
49 static wchar_t context[NCONTEXT]; /* Circular buffer of context */
50 static wchar_t *conptr = &context[0]; /* context ptr */
51 static FILE *progfp; /* Stdio stream for programme */
52 static char *filename;
53 #ifdef DEBUG
54 static int dflag;
55 #endif
56
57 #define AWK_EXEC_MAGIC "<MKS AWKC>"
58 #define LEN_EXEC_MAGIC 10
59
60 static char unbal[] = "unbalanced E char";
61
62 static void awkarginit(int c, char **av);
63 static int lexid(wint_t c);
64 static int lexnumber(wint_t c);
65 static int lexstring(wint_t endc);
66 static int lexregexp(wint_t endc);
67
68 static void awkvarinit(void);
69 static wint_t lexgetc(void);
70 static void lexungetc(wint_t c);
71 static size_t lexescape(wint_t endc, int regx, int cmd_line_operand);
72 static void awkierr(int perr, char *fmt, va_list ap);
73 static int usage(void);
74 void strescape(wchar_t *str);
75 static const char *toprint(wint_t);
76 char *_cmdname;
77 static wchar_t *mbconvert(char *str);
78
79 extern int isclvar(wchar_t *arg);
80
81 /*
82 * mainline for awk
83 */
84 int
main(int argc,char * argv[])85 main(int argc, char *argv[])
86 {
87 wchar_t *ap;
88 char *cmd;
89
90 cmd = argv[0];
91 _cmdname = cmd;
92
93 linebuf = emalloc(NLINE * sizeof (wchar_t));
94
95 /*
96 * At this point only messaging should be internationalized.
97 * numbers are still scanned as in the Posix locale.
98 */
99 (void) setlocale(LC_ALL, "");
100 (void) setlocale(LC_NUMERIC, "C");
101 #if !defined(TEXT_DOMAIN)
102 #define TEXT_DOMAIN "SYS_TEST"
103 #endif
104 (void) textdomain(TEXT_DOMAIN);
105
106 awkvarinit();
107 /* running = 1; */
108 while (argc > 1 && *argv[1] == '-') {
109 void *save_ptr = NULL;
110 ap = mbstowcsdup(&argv[1][1]);
111 if (ap == NULL)
112 break;
113 if (*ap == '\0') {
114 free(ap);
115 break;
116 }
117 save_ptr = (void *) ap;
118 ++argv;
119 --argc;
120 if (*ap == '-' && ap[1] == '\0')
121 break;
122 for (; *ap != '\0'; ++ap) {
123 switch (*ap) {
124 #ifdef DEBUG
125 case 'd':
126 dflag = 1;
127 continue;
128
129 #endif
130 case 'f':
131 if (argc < 2) {
132 (void) fprintf(stderr,
133 gettext("Missing script file\n"));
134 return (1);
135 }
136 *progfilep++ = argv[1];
137 --argc;
138 ++argv;
139 continue;
140
141 case 'F':
142 if (ap[1] == '\0') {
143 if (argc < 2) {
144 (void) fprintf(stderr,
145 gettext("Missing field separator\n"));
146 return (1);
147 }
148 ap = mbstowcsdup(argv[1]);
149 --argc;
150 ++argv;
151 } else
152 ++ap;
153 strescape(ap);
154 strassign(varFS, linebuf, FALLOC,
155 wcslen(linebuf));
156 break;
157
158 case 'v': {
159 wchar_t *vp;
160 wchar_t *arg;
161
162 if (argc < 2) {
163 (void) fprintf(stderr,
164 gettext("Missing variable assignment\n"));
165 return (1);
166 }
167 arg = mbconvert(argv[1]);
168 /*
169 * Ensure the variable expression
170 * is valid (correct form).
171 */
172 if (((vp = wcschr(arg, '=')) != NULL) &&
173 isclvar(arg)) {
174 *vp = '\0';
175 strescape(vp+1);
176 strassign(vlook(arg), linebuf,
177 FALLOC|FSENSE,
178 wcslen(linebuf));
179 *vp = '=';
180 } else {
181 (void) fprintf(stderr, gettext(
182 "Invalid form for variable "
183 "assignment: %S\n"), arg);
184 return (1);
185 }
186 --argc;
187 ++argv;
188 continue;
189 }
190
191 default:
192 (void) fprintf(stderr,
193 gettext("Unknown option \"-%S\"\n"), ap);
194 return (usage());
195 }
196 break;
197 }
198 if (save_ptr)
199 free(save_ptr);
200 }
201 if (progfilep == &progfiles[0]) {
202 if (argc < 2)
203 return (usage());
204 filename = "[command line]"; /* BUG: NEEDS TRANSLATION */
205 progptr = mbstowcsdup(argv[1]);
206 proglen = wcslen(progptr);
207 --argc;
208 ++argv;
209 }
210
211 argv[0] = cmd;
212
213 awkarginit(argc, argv);
214
215 /* running = 0; */
216 (void) yyparse();
217
218 lineno = 0;
219 /*
220 * Ok, done parsing, so now activate the rest of the nls stuff, set
221 * the radix character.
222 */
223 (void) setlocale(LC_ALL, "");
224 radixpoint = *localeconv()->decimal_point;
225 awk();
226 /* NOTREACHED */
227 return (0);
228 }
229
230 /*
231 * Do initial setup of buffers, etc.
232 * This must be called before most processing
233 * and especially before lexical analysis.
234 * Variables initialised here will be overruled by command
235 * line parameter initialisation.
236 */
237 static void
awkvarinit()238 awkvarinit()
239 {
240 NODE *np;
241
242 (void) setvbuf(stderr, NULL, _IONBF, 0);
243
244 if ((NIOSTREAM = sysconf(_SC_OPEN_MAX) - 4) <= 0) {
245 (void) fprintf(stderr,
246 gettext("not enough available file descriptors"));
247 exit(1);
248 }
249 ofiles = (OFILE *)emalloc(sizeof (OFILE)*NIOSTREAM);
250 #ifdef A_ZERO_POINTERS
251 (void) memset((wchar_t *)ofiles, 0, sizeof (OFILE) * NIOSTREAM);
252 #else
253 {
254 /* initialize file descriptor table */
255 OFILE *fp;
256 for (fp = ofiles; fp < &ofiles[NIOSTREAM]; fp += 1) {
257 fp->f_fp = FNULL;
258 fp->f_mode = 0;
259 fp->f_name = (char *)0;
260 }
261 }
262 #endif
263 constant = intnode((INT)0);
264
265 const0 = intnode((INT)0);
266 const1 = intnode((INT)1);
267 constundef = emptynode(CONSTANT, 0);
268 constundef->n_flags = FSTRING|FVINT;
269 constundef->n_string = _null;
270 constundef->n_strlen = 0;
271 inc_oper = emptynode(ADD, 0);
272 inc_oper->n_right = const1;
273 asn_oper = emptynode(ADD, 0);
274 field0 = node(FIELD, const0, NNULL);
275
276 {
277 RESFUNC near*rp;
278
279 for (rp = &resfuncs[0]; rp->rf_name != (LOCCHARP)NULL; ++rp) {
280 np = finstall(rp->rf_name, rp->rf_func, rp->rf_type);
281 }
282 }
283 {
284 RESERVED near*rp;
285
286 for (rp = &reserved[0]; rp->r_name != (LOCCHARP)NULL; ++rp) {
287 switch (rp->r_type) {
288 case SVAR:
289 case VAR:
290 running = 1;
291 np = vlook(rp->r_name);
292 if (rp->r_type == SVAR)
293 np->n_flags |= FSPECIAL;
294 if (rp->r_svalue != NULL)
295 strassign(np, rp->r_svalue, FSTATIC,
296 (size_t)rp->r_ivalue);
297 else {
298 constant->n_int = rp->r_ivalue;
299 (void) assign(np, constant);
300 }
301 running = 0;
302 break;
303
304 case KEYWORD:
305 kinstall(rp->r_name, (int)rp->r_ivalue);
306 break;
307 }
308 }
309 }
310
311 varNR = vlook(s_NR);
312 varFNR = vlook(s_FNR);
313 varNF = vlook(s_NF);
314 varOFMT = vlook(s_OFMT);
315 varCONVFMT = vlook(s_CONVFMT);
316 varOFS = vlook(s_OFS);
317 varORS = vlook(s_ORS);
318 varRS = vlook(s_RS);
319 varFS = vlook(s_FS);
320 varARGC = vlook(s_ARGC);
321 varSUBSEP = vlook(s_SUBSEP);
322 varENVIRON = vlook(s_ENVIRON);
323 varFILENAME = vlook(s_FILENAME);
324 varSYMTAB = vlook(s_SYMTAB);
325 incNR = node(ASG, varNR, node(ADD, varNR, const1));
326 incFNR = node(ASG, varFNR, node(ADD, varFNR, const1));
327 clrFNR = node(ASG, varFNR, const0);
328 }
329
330 /*
331 * Initialise awk ARGC, ARGV variables.
332 */
333 static void
awkarginit(int ac,char ** av)334 awkarginit(int ac, char **av)
335 {
336 int i;
337 wchar_t *cp;
338
339 ARGVsubi = node(INDEX, vlook(s_ARGV), constant);
340 running = 1;
341 constant->n_int = ac;
342 (void) assign(varARGC, constant);
343 for (i = 0; i < ac; ++i) {
344 cp = mbstowcsdup(av[i]);
345 constant->n_int = i;
346 strassign(exprreduce(ARGVsubi), cp,
347 FSTATIC|FSENSE, wcslen(cp));
348 }
349 running = 0;
350 }
351
352 /*
353 * Clean up when done parsing a function.
354 * All formal parameters, because of a deal (funparm) in
355 * yylex, get put into the symbol table in front of any
356 * global variable of the same name. When the entire
357 * function is parsed, remove these formal dummy nodes
358 * from the symbol table but retain the nodes because
359 * the generated tree points at them.
360 */
361 void
uexit(NODE * np)362 uexit(NODE *np)
363 {
364 NODE *formal;
365
366 while ((formal = getlist(&np)) != NNULL)
367 delsymtab(formal, 0);
368 }
369
370 /*
371 * The lexical analyzer.
372 */
373 int
yylex()374 yylex()
375 #ifdef DEBUG
376 {
377 int l;
378
379 l = yyhex();
380 if (dflag)
381 (void) printf("%d\n", l);
382 return (l);
383 }
yyhex()384 yyhex()
385 #endif
386 {
387 wint_t c, c1;
388 int i;
389 static int savetoken = 0;
390 static int wasfield;
391 static int isfuncdef;
392 static int nbrace, nparen, nbracket;
393 static struct ctosymstruct {
394 wint_t c, sym;
395 } ctosym[] = {
396 { '|', BAR }, { '^', CARAT },
397 { '~', TILDE }, { '<', LANGLE },
398 { '>', RANGLE }, { '+', PLUSC },
399 { '-', HYPHEN }, { '*', STAR },
400 { '/', SLASH }, { '%', PERCENT },
401 { '!', EXCLAMATION }, { '$', DOLLAR },
402 { '[', LSQUARE }, { ']', RSQUARE },
403 { '(', LPAREN }, { ')', RPAREN },
404 { ';', SEMI }, { '{', LBRACE },
405 { '}', RBRACE }, { 0, 0 }
406 };
407
408 if (savetoken) {
409 c = savetoken;
410 savetoken = 0;
411 } else if (redelim != '\0') {
412 c = redelim;
413 redelim = 0;
414 catterm = 0;
415 savetoken = c;
416 return (lexlast = lexregexp(c));
417 } else while ((c = lexgetc()) != WEOF) {
418 if (iswalpha(c) || c == '_') {
419 c = lexid(c);
420 } else if (iswdigit(c) || c == '.') {
421 c = lexnumber(c);
422 } else if (isWblank(c)) {
423 continue;
424 } else switch (c) {
425 #if DOS || OS2
426 case 032: /* ^Z */
427 continue;
428 #endif
429
430 case '"':
431 c = lexstring(c);
432 break;
433
434 case '#':
435 while ((c = lexgetc()) != '\n' && c != WEOF)
436 ;
437 lexungetc(c);
438 continue;
439
440 case '+':
441 if ((c1 = lexgetc()) == '+')
442 c = INC;
443 else if (c1 == '=')
444 c = AADD;
445 else
446 lexungetc(c1);
447 break;
448
449 case '-':
450 if ((c1 = lexgetc()) == '-')
451 c = DEC;
452 else if (c1 == '=')
453 c = ASUB;
454 else
455 lexungetc(c1);
456 break;
457
458 case '*':
459 if ((c1 = lexgetc()) == '=')
460 c = AMUL;
461 else if (c1 == '*') {
462 if ((c1 = lexgetc()) == '=')
463 c = AEXP;
464 else {
465 c = EXP;
466 lexungetc(c1);
467 }
468 } else
469 lexungetc(c1);
470 break;
471
472 case '^':
473 if ((c1 = lexgetc()) == '=') {
474 c = AEXP;
475 } else {
476 c = EXP;
477 lexungetc(c1);
478 }
479 break;
480
481 case '/':
482 if ((c1 = lexgetc()) == '=' &&
483 lexlast != RE && lexlast != NRE &&
484 lexlast != ';' && lexlast != '\n' &&
485 lexlast != ',' && lexlast != '(')
486 c = ADIV;
487 else
488 lexungetc(c1);
489 break;
490
491 case '%':
492 if ((c1 = lexgetc()) == '=')
493 c = AREM;
494 else
495 lexungetc(c1);
496 break;
497
498 case '&':
499 if ((c1 = lexgetc()) == '&')
500 c = AND;
501 else
502 lexungetc(c1);
503 break;
504
505 case '|':
506 if ((c1 = lexgetc()) == '|')
507 c = OR;
508 else {
509 lexungetc(c1);
510 if (inprint)
511 c = PIPE;
512 }
513 break;
514
515 case '>':
516 if ((c1 = lexgetc()) == '=')
517 c = GE;
518 else if (c1 == '>')
519 c = APPEND;
520 else {
521 lexungetc(c1);
522 if (nparen == 0 && inprint)
523 c = WRITE;
524 }
525 break;
526
527 case '<':
528 if ((c1 = lexgetc()) == '=')
529 c = LE;
530 else
531 lexungetc(c1);
532 break;
533
534 case '!':
535 if ((c1 = lexgetc()) == '=')
536 c = NE;
537 else if (c1 == '~')
538 c = NRE;
539 else
540 lexungetc(c1);
541 break;
542
543 case '=':
544 if ((c1 = lexgetc()) == '=')
545 c = EQ;
546 else {
547 lexungetc(c1);
548 c = ASG;
549 }
550 break;
551
552 case '\n':
553 switch (lexlast) {
554 case ')':
555 if (catterm || inprint) {
556 c = ';';
557 break;
558 }
559 /*FALLTHRU*/
560 case AND:
561 case OR:
562 case COMMA:
563 case '{':
564 case ELSE:
565 case ';':
566 case DO:
567 continue;
568
569 case '}':
570 if (nbrace != 0)
571 continue;
572
573 default:
574 c = ';';
575 break;
576 }
577 break;
578
579 case ELSE:
580 if (lexlast != ';') {
581 savetoken = ELSE;
582 c = ';';
583 }
584 break;
585
586 case '(':
587 ++nparen;
588 break;
589
590 case ')':
591 if (--nparen < 0)
592 awkerr(unbal, "()");
593 break;
594
595 case '{':
596 nbrace++;
597 break;
598
599 case '}':
600 if (--nbrace < 0) {
601 char brk[3];
602
603 brk[0] = '{';
604 brk[1] = '}';
605 brk[2] = '\0';
606 awkerr(unbal, brk);
607 }
608 if (lexlast != ';') {
609 savetoken = c;
610 c = ';';
611 }
612 break;
613
614 case '[':
615 ++nbracket;
616 break;
617
618 case ']':
619 if (--nbracket < 0) {
620 char brk[3];
621
622 brk[0] = '[';
623 brk[1] = ']';
624 brk[2] = '\0';
625 awkerr(unbal, brk);
626 }
627 break;
628
629 case '\\':
630 if ((c1 = lexgetc()) == '\n')
631 continue;
632 lexungetc(c1);
633 break;
634
635 case ',':
636 c = COMMA;
637 break;
638
639 case '?':
640 c = QUEST;
641 break;
642
643 case ':':
644 c = COLON;
645 break;
646
647 default:
648 if (!iswprint(c))
649 awkerr(
650 gettext("invalid character \"%s\""),
651 toprint(c));
652 break;
653 }
654 break;
655 }
656
657 switch (c) {
658 case ']':
659 ++catterm;
660 break;
661
662 case VAR:
663 if (catterm) {
664 savetoken = c;
665 c = CONCAT;
666 catterm = 0;
667 } else if (!isfuncdef) {
668 if ((c1 = lexgetc()) != '(')
669 ++catterm;
670 lexungetc(c1);
671 }
672 isfuncdef = 0;
673 break;
674
675 case PARM:
676 case CONSTANT:
677 if (catterm) {
678 savetoken = c;
679 c = CONCAT;
680 catterm = 0;
681 } else {
682 if (lexlast == '$')
683 wasfield = 2;
684 ++catterm;
685 }
686 break;
687
688 case INC:
689 case DEC:
690 if (!catterm || lexlast != CONSTANT || wasfield)
691 break;
692
693 /*FALLTHRU*/
694 case UFUNC:
695 case FUNC:
696 case GETLINE:
697 case '!':
698 case '$':
699 case '(':
700 if (catterm) {
701 savetoken = c;
702 c = CONCAT;
703 catterm = 0;
704 }
705 break;
706
707 /* { */ case '}':
708 if (nbrace == 0)
709 savetoken = ';';
710 /*FALLTHRU*/
711 case ';':
712 inprint = 0;
713 /*FALLTHRU*/
714 default:
715 if (c == DEFFUNC)
716 isfuncdef = 1;
717 catterm = 0;
718 }
719 lexlast = c;
720 if (wasfield)
721 wasfield--;
722 /*
723 * Map character constants to symbolic names.
724 */
725 for (i = 0; ctosym[i].c != 0; i++)
726 if (c == ctosym[i].c) {
727 c = ctosym[i].sym;
728 break;
729 }
730 return ((int)c);
731 }
732
733 /*
734 * Read a number for the lexical analyzer.
735 * Input is the first character of the number.
736 * Return value is the lexical type.
737 */
738 static int
lexnumber(wint_t c)739 lexnumber(wint_t c)
740 {
741 wchar_t *cp;
742 int dotfound = 0;
743 int efound = 0;
744 INT number;
745
746 cp = linebuf;
747 do {
748 if (iswdigit(c))
749 ;
750 else if (c == '.') {
751 if (dotfound++)
752 break;
753 } else if (c == 'e' || c == 'E') {
754 if ((c = lexgetc()) != '-' && c != '+') {
755 lexungetc(c);
756 c = 'e';
757 } else
758 *cp++ = 'e';
759 if (efound++)
760 break;
761 } else
762 break;
763 *cp++ = c;
764 } while ((c = lexgetc()) != WEOF);
765 *cp = '\0';
766 if (dotfound && cp == linebuf+1)
767 return (DOT);
768 lexungetc(c);
769 errno = 0;
770 if (!dotfound && !efound &&
771 ((number = wcstol(linebuf, (wchar_t **)0, 10)), errno != ERANGE))
772 yylval.node = intnode(number);
773 else
774 yylval.node = realnode((REAL)wcstod(linebuf, (wchar_t **)0));
775 return (CONSTANT);
776 }
777
778 /*
779 * Read an identifier.
780 * Input is first character of identifier.
781 * Return VAR.
782 */
783 static int
lexid(wint_t c)784 lexid(wint_t c)
785 {
786 wchar_t *cp;
787 size_t i;
788 NODE *np;
789
790 cp = linebuf;
791 do {
792 *cp++ = c;
793 c = lexgetc();
794 } while (iswalpha(c) || iswdigit(c) || c == '_');
795 *cp = '\0';
796 lexungetc(c);
797 yylval.node = np = vlook(linebuf);
798
799 switch (np->n_type) {
800 case KEYWORD:
801 switch (np->n_keywtype) {
802 case PRINT:
803 case PRINTF:
804 ++inprint;
805 default:
806 return ((int)np->n_keywtype);
807 }
808 /* NOTREACHED */
809
810 case ARRAY:
811 case VAR:
812 /*
813 * If reading the argument list, create a dummy node
814 * for the duration of that function. These variables
815 * can be removed from the symbol table at function end
816 * but they must still exist because the execution tree
817 * knows about them.
818 */
819 if (funparm) {
820 do_funparm:
821 np = emptynode(PARM, i = (cp-linebuf));
822 np->n_flags = FSTRING;
823 np->n_string = _null;
824 np->n_strlen = 0;
825 (void) memcpy(np->n_name, linebuf,
826 (i+1) * sizeof (wchar_t));
827 addsymtab(np);
828 yylval.node = np;
829 } else if (np == varNF || (np == varFS &&
830 (!doing_begin || begin_getline))) {
831 /*
832 * If the user program references NF or sets
833 * FS either outside of a begin block or
834 * in a begin block after a getline then the
835 * input line will be split immediately upon read
836 * rather than when a field is first referenced.
837 */
838 needsplit = 1;
839 } else if (np == varENVIRON)
840 needenviron = 1;
841 /*FALLTHRU*/
842 case PARM:
843 return (VAR);
844
845 case UFUNC:
846 /*
847 * It is ok to redefine functions as parameters
848 */
849 if (funparm) goto do_funparm;
850 /*FALLTHRU*/
851 case FUNC:
852 case GETLINE:
853 /*
854 * When a getline is encountered, clear the 'doing_begin' flag.
855 * This will force the 'needsplit' flag to be set, even inside
856 * a begin block, if FS is altered. (See VAR case above)
857 */
858 if (doing_begin)
859 begin_getline = 1;
860 return (np->n_type);
861 }
862 /* NOTREACHED */
863 return (0);
864 }
865
866 /*
867 * Read a string for the lexical analyzer.
868 * `endc' terminates the string.
869 */
870 static int
lexstring(wint_t endc)871 lexstring(wint_t endc)
872 {
873 size_t length = lexescape(endc, 0, 0);
874
875 yylval.node = stringnode(linebuf, FALLOC, length);
876 return (CONSTANT);
877 }
878
879 /*
880 * Read a regular expression.
881 */
882 static int
lexregexp(wint_t endc)883 lexregexp(wint_t endc)
884 {
885 (void) lexescape(endc, 1, 0);
886 yylval.node = renode(linebuf);
887 return (URE);
888 }
889
890 /*
891 * Process a string, converting the escape characters as required by
892 * 1003.2. The processed string ends up in the global linebuf[]. This
893 * routine also changes the value of 'progfd' - the program file
894 * descriptor, so it should be used with some care. It is presently used to
895 * process -v (awk1.c) and var=str type arguments (awk2.c, nextrecord()).
896 */
897 void
strescape(wchar_t * str)898 strescape(wchar_t *str)
899 {
900 progptr = str;
901 proglen = wcslen(str) + 1; /* Include \0 */
902 (void) lexescape('\0', 0, 1);
903 progptr = NULL;
904 }
905
906 /*
907 * Read a string or regular expression, terminated by ``endc'',
908 * for lexical analyzer, processing escape sequences.
909 * Return string length.
910 */
911 static size_t
lexescape(wint_t endc,int regx,int cmd_line_operand)912 lexescape(wint_t endc, int regx, int cmd_line_operand)
913 {
914 static char nlre[256];
915 static char nlstr[256];
916 static char eofre[256];
917 static char eofstr[256];
918 int first_time = 1;
919 wint_t c;
920 wchar_t *cp;
921 int n, max;
922
923 if (first_time == 1) {
924 (void) strcpy(nlre, gettext("Newline in regular expression\n"));
925 (void) strcpy(nlstr, gettext("Newline in string\n"));
926 (void) strcpy(eofre, gettext("EOF in regular expression\n"));
927 (void) strcpy(eofstr, gettext("EOF in string\n"));
928 first_time = 0;
929 }
930
931 cp = linebuf;
932 while ((c = lexgetc()) != endc) {
933 if (c == '\n')
934 awkerr(regx ? nlre : nlstr);
935 if (c == '\\') {
936 switch (c = lexgetc(), c) {
937 case '\\':
938 if (regx)
939 *cp++ = '\\';
940 break;
941
942 case '/':
943 c = '/';
944 break;
945
946 case 'n':
947 c = '\n';
948 break;
949
950 case 'b':
951 c = '\b';
952 break;
953
954 case 't':
955 c = '\t';
956 break;
957
958 case 'r':
959 c = '\r';
960 break;
961
962 case 'f':
963 c = '\f';
964 break;
965
966 case 'v':
967 c = '\v';
968 break;
969
970 case 'a':
971 c = (char)0x07;
972 break;
973
974 case 'x':
975 n = 0;
976 while (iswxdigit(c = lexgetc())) {
977 if (iswdigit(c))
978 c -= '0';
979 else if (iswupper(c))
980 c -= 'A'-10;
981 else
982 c -= 'a'-10;
983 n = (n<<4) + c;
984 }
985 lexungetc(c);
986 c = n;
987 break;
988
989 case '0':
990 case '1':
991 case '2':
992 case '3':
993 case '4':
994 case '5':
995 case '6':
996 case '7':
997 #if 0
998 /*
999 * Posix.2 draft 10 disallows the use of back-referencing - it explicitly
1000 * requires processing of the octal escapes both in strings and
1001 * regular expressions. The following code is disabled instead of
1002 * removed as back-referencing may be reintroduced in a future draft
1003 * of the standard.
1004 */
1005 /*
1006 * For regular expressions, we disallow
1007 * \ooo to mean octal character, in favour
1008 * of back referencing.
1009 */
1010 if (regx) {
1011 *cp++ = '\\';
1012 break;
1013 }
1014 #endif
1015 max = 3;
1016 n = 0;
1017 do {
1018 n = (n<<3) + c-'0';
1019 if ((c = lexgetc()) > '7' || c < '0')
1020 break;
1021 } while (--max);
1022 lexungetc(c);
1023 /*
1024 * an octal escape sequence must have at least
1025 * 2 digits after the backslash, otherwise
1026 * it gets passed straight thru for possible
1027 * use in backreferencing.
1028 */
1029 if (max == 3) {
1030 *cp++ = '\\';
1031 n += '0';
1032 }
1033 c = n;
1034 break;
1035
1036 case '\n':
1037 continue;
1038
1039 default:
1040 if (c != endc || cmd_line_operand) {
1041 *cp++ = '\\';
1042 if (c == endc)
1043 lexungetc(c);
1044 }
1045 }
1046 }
1047 if (c == WEOF)
1048 awkerr(regx ? eofre : eofstr);
1049 *cp++ = c;
1050 }
1051 *cp = '\0';
1052 return (cp - linebuf);
1053 }
1054
1055 /*
1056 * Build a regular expression NODE.
1057 * Argument is the string holding the expression.
1058 */
1059 NODE *
renode(wchar_t * s)1060 renode(wchar_t *s)
1061 {
1062 NODE *np;
1063 int n;
1064
1065 np = emptynode(RE, 0);
1066 np->n_left = np->n_right = NNULL;
1067 if ((n = REGWCOMP(&np->n_regexp, s)) != REG_OK) {
1068 int m;
1069 char *p;
1070
1071 m = REGWERROR(n, np->n_regexp, NULL, 0);
1072 p = (char *)emalloc(m);
1073 REGWERROR(n, np->n_regexp, p, m);
1074 awkerr("/%S/: %s", s, p);
1075 }
1076 return (np);
1077 }
1078 /*
1079 * Get a character for the lexical analyser routine.
1080 */
1081 static wint_t
lexgetc()1082 lexgetc()
1083 {
1084 wint_t c;
1085 static char **files = &progfiles[0];
1086
1087 if (progfp != FNULL && (c = fgetwc(progfp)) != WEOF)
1088 ;
1089 else {
1090 if (progptr != NULL) {
1091 if (proglen-- <= 0)
1092 c = WEOF;
1093 else
1094 c = *progptr++;
1095 } else {
1096 if (progfp != FNULL)
1097 if (progfp != stdin)
1098 (void) fclose(progfp);
1099 else
1100 clearerr(progfp);
1101 progfp = FNULL;
1102 if (files < progfilep) {
1103 filename = *files++;
1104 lineno = 1;
1105 if (filename[0] == '-' && filename[1] == '\0')
1106 progfp = stdin;
1107 else if ((progfp = fopen(filename, r))
1108 == FNULL) {
1109 (void) fprintf(stderr,
1110 gettext("script file \"%s\""), filename);
1111 exit(1);
1112 }
1113 c = fgetwc(progfp);
1114 }
1115 }
1116 }
1117 if (c == '\n')
1118 ++lineno;
1119 if (conptr >= &context[NCONTEXT])
1120 conptr = &context[0];
1121 if (c != WEOF)
1122 *conptr++ = c;
1123 return (c);
1124 }
1125
1126 /*
1127 * Return a character for lexical analyser.
1128 * Only one returned character is (not enforced) legitimite.
1129 */
1130 static void
lexungetc(wint_t c)1131 lexungetc(wint_t c)
1132 {
1133 if (c == '\n')
1134 --lineno;
1135 if (c != WEOF) {
1136 if (conptr == &context[0])
1137 conptr = &context[NCONTEXT];
1138 *--conptr = '\0';
1139 }
1140 if (progfp != FNULL) {
1141 (void) ungetwc(c, progfp);
1142 return;
1143 }
1144 if (c == WEOF)
1145 return;
1146 *--progptr = c;
1147 proglen++;
1148 }
1149
1150 /*
1151 * Syntax errors during parsing.
1152 */
1153 void
yyerror(char * s,...)1154 yyerror(char *s, ...)
1155 {
1156 if (lexlast == FUNC || lexlast == GETLINE || lexlast == KEYWORD)
1157 if (lexlast == KEYWORD)
1158 awkerr(gettext("inadmissible use of reserved keyword"));
1159 else
1160 awkerr(gettext("attempt to redefine builtin function"));
1161 awkerr(s);
1162 }
1163
1164 /*
1165 * Error routine for all awk errors.
1166 */
1167 /* ARGSUSED */
1168 void
awkerr(char * fmt,...)1169 awkerr(char *fmt, ...)
1170 {
1171 va_list args;
1172
1173 va_start(args, fmt);
1174 awkierr(0, fmt, args);
1175 va_end(args);
1176 }
1177
1178 /*
1179 * Error routine like "awkerr" except that it prints out
1180 * a message that includes an errno-specific indication.
1181 */
1182 /* ARGSUSED */
1183 void
awkperr(char * fmt,...)1184 awkperr(char *fmt, ...)
1185 {
1186 va_list args;
1187
1188 va_start(args, fmt);
1189 awkierr(1, fmt, args);
1190 va_end(args);
1191 }
1192
1193 /*
1194 * Common internal routine for awkerr, awkperr
1195 */
1196 static void
awkierr(int perr,char * fmt,va_list ap)1197 awkierr(int perr, char *fmt, va_list ap)
1198 {
1199 static char sep1[] = "\n>>>\t";
1200 static char sep2[] = "\t<<<";
1201 int saveerr = errno;
1202
1203 (void) fprintf(stderr, "%s: ", _cmdname);
1204 if (running) {
1205 (void) fprintf(stderr, gettext("line %u ("),
1206 curnode == NNULL ? 0 : curnode->n_lineno);
1207 if (phase == 0)
1208 (void) fprintf(stderr, "NR=%lld): ",
1209 (INT)exprint(varNR));
1210 else
1211 (void) fprintf(stderr, "%s): ",
1212 phase == BEGIN ? s_BEGIN : s_END);
1213 } else if (lineno != 0) {
1214 (void) fprintf(stderr, gettext("file \"%s\": "), filename);
1215 (void) fprintf(stderr, gettext("line %u: "), lineno);
1216 }
1217 (void) vfprintf(stderr, gettext(fmt), ap);
1218 if (perr == 1)
1219 (void) fprintf(stderr, ": %s", strerror(saveerr));
1220 if (perr != 2 && !running) {
1221 wchar_t *cp;
1222 int n;
1223 int c;
1224
1225 (void) fprintf(stderr, gettext(" Context is:%s"), sep1);
1226 cp = conptr;
1227 n = NCONTEXT;
1228 do {
1229 if (cp >= &context[NCONTEXT])
1230 cp = &context[0];
1231 if ((c = *cp++) != '\0')
1232 (void) fputs(c == '\n' ? sep1 : toprint(c),
1233 stderr);
1234 } while (--n != 0);
1235 (void) fputs(sep2, stderr);
1236 }
1237 (void) fprintf(stderr, "\n");
1238 exit(1);
1239 }
1240
1241 wchar_t *
emalloc(unsigned n)1242 emalloc(unsigned n)
1243 {
1244 wchar_t *cp;
1245
1246 if ((cp = malloc(n)) == NULL)
1247 awkerr(nomem);
1248 return (cp);
1249 }
1250
1251 wchar_t *
erealloc(wchar_t * p,unsigned n)1252 erealloc(wchar_t *p, unsigned n)
1253 {
1254 wchar_t *cp;
1255
1256 if ((cp = realloc(p, n)) == NULL)
1257 awkerr(nomem);
1258 return (cp);
1259 }
1260
1261
1262 /*
1263 * usage message for awk
1264 */
1265 static int
usage()1266 usage()
1267 {
1268 (void) fprintf(stderr, gettext(
1269 "Usage: awk [-F ERE] [-v var=val] 'program' [var=val ...] [file ...]\n"
1270 " awk [-F ERE] -f progfile ... [-v var=val] [var=val ...] [file ...]\n"));
1271 return (2);
1272 }
1273
1274
1275 static wchar_t *
mbconvert(char * str)1276 mbconvert(char *str)
1277 {
1278 static wchar_t *op = 0;
1279
1280 if (op != 0)
1281 free(op);
1282 return (op = mbstowcsdup(str));
1283 }
1284
1285 char *
mbunconvert(wchar_t * str)1286 mbunconvert(wchar_t *str)
1287 {
1288 static char *op = 0;
1289
1290 if (op != 0)
1291 free(op);
1292 return (op = wcstombsdup(str));
1293 }
1294
1295 /*
1296 * Solaris port - following functions are typical MKS functions written
1297 * to work for Solaris.
1298 */
1299
1300 wchar_t *
mbstowcsdup(s)1301 mbstowcsdup(s)
1302 char *s;
1303 {
1304 int n;
1305 wchar_t *w;
1306
1307 n = strlen(s) + 1;
1308 if ((w = (wchar_t *)malloc(n * sizeof (wchar_t))) == NULL)
1309 return (NULL);
1310
1311 if (mbstowcs(w, s, n) == (size_t)-1)
1312 return (NULL);
1313 return (w);
1314
1315 }
1316
1317 char *
wcstombsdup(wchar_t * w)1318 wcstombsdup(wchar_t *w)
1319 {
1320 int n;
1321 char *mb;
1322
1323 /* Fetch memory for worst case string length */
1324 n = wslen(w) + 1;
1325 n *= MB_CUR_MAX;
1326 if ((mb = (char *)malloc(n)) == NULL) {
1327 return (NULL);
1328 }
1329
1330 /* Convert the string */
1331 if ((n = wcstombs(mb, w, n)) == -1) {
1332 int saverr = errno;
1333
1334 free(mb);
1335 errno = saverr;
1336 return (0);
1337 }
1338
1339 /* Shrink the string down */
1340 if ((mb = (char *)realloc(mb, strlen(mb)+1)) == NULL) {
1341 return (NULL);
1342 }
1343 return (mb);
1344 }
1345
1346 /*
1347 * The upe_ctrls[] table contains the printable 'control-sequences' for the
1348 * character values 0..31 and 127. The first entry is for value 127, thus the
1349 * entries for the remaining character values are from 1..32.
1350 */
1351 static const char *const upe_ctrls[] =
1352 {
1353 "^?",
1354 "^@", "^A", "^B", "^C", "^D", "^E", "^F", "^G",
1355 "^H", "^I", "^J", "^K", "^L", "^M", "^N", "^O",
1356 "^P", "^Q", "^R", "^S", "^T", "^U", "^V", "^W",
1357 "^X", "^Y", "^Z", "^[", "^\\", "^]", "^^", "^_"
1358 };
1359
1360
1361 /*
1362 * Return a printable string corresponding to the given character value. If
1363 * the character is printable, simply return it as the string. If it is in
1364 * the range specified by table 5-101 in the UPE, return the corresponding
1365 * string. Otherwise, return an octal escape sequence.
1366 */
1367 static const char *
toprint(c)1368 toprint(c)
1369 wchar_t c;
1370 {
1371 int n, len;
1372 unsigned char *ptr;
1373 static char mbch[MB_LEN_MAX+1];
1374 static char buf[5 * MB_LEN_MAX + 1];
1375
1376 if ((n = wctomb(mbch, c)) == -1) {
1377 /* Should never happen */
1378 (void) sprintf(buf, "\\%x", c);
1379 return (buf);
1380 }
1381 mbch[n] = '\0';
1382 if (iswprint(c)) {
1383 return (mbch);
1384 } else if (c == 127) {
1385 return (upe_ctrls[0]);
1386 } else if (c < 32) {
1387 /* Print as in Table 5-101 in the UPE */
1388 return (upe_ctrls[c+1]);
1389 } else {
1390 /* Print as an octal escape sequence */
1391 for (len = 0, ptr = (unsigned char *) mbch; 0 < n; --n, ++ptr)
1392 len += sprintf(buf+len, "\\%03o", *ptr);
1393 }
1394 return (buf);
1395 }
1396
1397 static int
wcoff(const wchar_t * astring,const int off)1398 wcoff(const wchar_t *astring, const int off)
1399 {
1400 const wchar_t *s = astring;
1401 int c = 0;
1402 char mb[MB_LEN_MAX];
1403
1404 while (c < off) {
1405 int n;
1406 if ((n = wctomb(mb, *s)) == 0)
1407 break;
1408 if (n == -1)
1409 n = 1;
1410 c += n;
1411 s++;
1412 }
1413
1414 return (s - astring);
1415 }
1416
1417 #define NREGHASH 64
1418 #define NREGHOLD 1024 /* max number unused entries */
1419
1420 static int nregunref;
1421
1422 struct reghashq {
1423 struct qelem hq;
1424 struct regcache *regcachep;
1425 };
1426
1427 struct regcache {
1428 struct qelem lq;
1429 wchar_t *pattern;
1430 regex_t re;
1431 int refcnt;
1432 struct reghashq hash;
1433 };
1434
1435 static struct qelem reghash[NREGHASH], reglink;
1436
1437 /*
1438 * Generate a hash value of the given wchar string.
1439 * The hashing method is similar to what Java does for strings.
1440 */
1441 static uint_t
regtxthash(const wchar_t * str)1442 regtxthash(const wchar_t *str)
1443 {
1444 int k = 0;
1445
1446 while (*str != L'\0')
1447 k = (31 * k) + *str++;
1448
1449 k += ~(k << 9);
1450 k ^= (k >> 14);
1451 k += (k << 4);
1452 k ^= (k >> 10);
1453
1454 return (k % NREGHASH);
1455 }
1456
1457 int
int_regwcomp(REGEXP * r,const wchar_t * pattern)1458 int_regwcomp(REGEXP *r, const wchar_t *pattern)
1459 {
1460 regex_t re;
1461 char *mbpattern;
1462 int ret;
1463 uint_t key;
1464 struct qelem *qp;
1465 struct regcache *rcp;
1466
1467 key = regtxthash(pattern);
1468 for (qp = reghash[key].q_forw; qp != NULL; qp = qp->q_forw) {
1469 rcp = ((struct reghashq *)qp)->regcachep;
1470 if (*rcp->pattern == *pattern &&
1471 wcscmp(rcp->pattern, pattern) == 0)
1472 break;
1473 }
1474 if (qp != NULL) {
1475 /* update link. put this one at the beginning */
1476 if (rcp != (struct regcache *)reglink.q_forw) {
1477 remque(&rcp->lq);
1478 insque(&rcp->lq, ®link);
1479 }
1480 if (rcp->refcnt == 0)
1481 nregunref--; /* no longer unref'ed */
1482 rcp->refcnt++;
1483 *(struct regcache **)r = rcp;
1484 return (REG_OK);
1485 }
1486
1487 if ((mbpattern = wcstombsdup((wchar_t *)pattern)) == NULL)
1488 return (REG_ESPACE);
1489
1490 ret = regcomp(&re, mbpattern, REG_EXTENDED);
1491
1492 free(mbpattern);
1493
1494 if (ret != REG_OK)
1495 return (ret);
1496
1497 if ((rcp = malloc(sizeof (struct regcache))) == NULL)
1498 return (REG_ESPACE);
1499 rcp->re = re;
1500 if ((rcp->pattern = wsdup(pattern)) == NULL) {
1501 regfree(&re);
1502 free(rcp);
1503 return (REG_ESPACE);
1504 }
1505 rcp->refcnt = 1;
1506 insque(&rcp->lq, ®link);
1507 insque(&rcp->hash.hq, ®hash[key]);
1508 rcp->hash.regcachep = rcp;
1509
1510 *(struct regcache **)r = rcp;
1511 return (ret);
1512 }
1513
1514 void
int_regwfree(REGEXP r)1515 int_regwfree(REGEXP r)
1516 {
1517 int cnt;
1518 struct qelem *qp, *nqp;
1519 struct regcache *rcp;
1520
1521 rcp = (struct regcache *)r;
1522
1523 if (--rcp->refcnt != 0)
1524 return;
1525
1526 /* this cache has no reference */
1527 if (++nregunref < NREGHOLD)
1528 return;
1529
1530 /*
1531 * We've got too much unref'ed regex. Free half of least
1532 * used regex.
1533 */
1534 cnt = 0;
1535 for (qp = reglink.q_forw; qp != NULL; qp = nqp) {
1536 nqp = qp->q_forw;
1537 rcp = (struct regcache *)qp;
1538 if (rcp->refcnt != 0)
1539 continue;
1540
1541 /* free half of them */
1542 if (++cnt < (NREGHOLD / 2))
1543 continue;
1544
1545 /* detach and free */
1546 remque(&rcp->lq);
1547 remque(&rcp->hash.hq);
1548
1549 /* free up */
1550 free(rcp->pattern);
1551 regfree(&rcp->re);
1552 free(rcp);
1553
1554 nregunref--;
1555 }
1556 }
1557
1558 size_t
int_regwerror(int errcode,REGEXP r,char * errbuf,size_t bufsiz)1559 int_regwerror(int errcode, REGEXP r, char *errbuf, size_t bufsiz)
1560 {
1561 struct regcache *rcp;
1562
1563 rcp = (struct regcache *)r;
1564 return (regerror(errcode, &rcp->re, errbuf, bufsiz));
1565 }
1566
1567 int
int_regwexec(REGEXP r,const wchar_t * astring,size_t nsub,int_regwmatch_t * sub,int flags)1568 int_regwexec(REGEXP r, /* compiled RE */
1569 const wchar_t *astring, /* subject string */
1570 size_t nsub, /* number of subexpressions */
1571 int_regwmatch_t *sub, /* subexpression pointers */
1572 int flags)
1573 {
1574 char *mbs;
1575 regmatch_t *mbsub = NULL;
1576 int i;
1577 struct regcache *rcp;
1578
1579 if ((mbs = wcstombsdup((wchar_t *)astring)) == NULL)
1580 return (REG_ESPACE);
1581
1582 if (nsub > 0 && sub) {
1583 if ((mbsub = malloc(nsub * sizeof (regmatch_t))) == NULL)
1584 return (REG_ESPACE);
1585 }
1586
1587 rcp = (struct regcache *)r;
1588
1589 i = regexec(&rcp->re, mbs, nsub, mbsub, flags);
1590
1591 /* Now, adjust the pointers/counts in sub */
1592 if (i == REG_OK && nsub > 0 && mbsub) {
1593 int j, k;
1594
1595 for (j = 0; j < nsub; j++) {
1596 regmatch_t *ms = &mbsub[j];
1597 int_regwmatch_t *ws = &sub[j];
1598
1599 if ((k = ms->rm_so) >= 0) {
1600 ws->rm_so = wcoff(astring, k);
1601 ws->rm_sp = astring + ws->rm_so;
1602 }
1603 if ((k = ms->rm_eo) >= 0) {
1604 ws->rm_eo = wcoff(astring, k);
1605 ws->rm_ep = astring + ws->rm_eo;
1606 }
1607 }
1608 }
1609
1610 free(mbs);
1611 if (mbsub)
1612 free(mbsub);
1613 return (i);
1614 }
1615
1616 int
int_regwdosuba(REGEXP rp,const wchar_t * rpl,const wchar_t * src,wchar_t ** dstp,int len,int * globp)1617 int_regwdosuba(REGEXP rp, /* compiled RE: Pattern */
1618 const wchar_t *rpl, /* replacement string: /rpl/ */
1619 const wchar_t *src, /* source string */
1620 wchar_t **dstp, /* destination string */
1621 int len, /* destination length */
1622 int *globp) /* IN: occurence, 0 for all; OUT: substitutions */
1623 {
1624 wchar_t *dst, *odst;
1625 const wchar_t *ip, *xp;
1626 wchar_t *op;
1627 int i;
1628 wchar_t c;
1629 int glob, iglob = *globp, oglob = 0;
1630 #define NSUB 10
1631 int_regwmatch_t rm[NSUB], *rmp;
1632 int flags;
1633 wchar_t *end;
1634 int regerr;
1635
1636 /* handle overflow of dst. we need "i" more bytes */
1637 #ifdef OVERFLOW
1638 #undef OVERFLOW
1639 #define OVERFLOW(i) { \
1640 int pos = op - dst; \
1641 dst = (wchar_t *)realloc(odst = dst, \
1642 (len += len + i) * sizeof (wchar_t)); \
1643 if (dst == NULL) \
1644 goto nospace; \
1645 op = dst + pos; \
1646 end = dst + len; \
1647 }
1648 #endif
1649
1650 *dstp = dst = (wchar_t *)malloc(len * sizeof (wchar_t));
1651 if (dst == NULL)
1652 return (REG_ESPACE);
1653
1654 if (rp == NULL || rpl == NULL || src == NULL || dst == NULL)
1655 return (REG_EFATAL);
1656
1657 glob = 0; /* match count */
1658 ip = src; /* source position */
1659 op = dst; /* destination position */
1660 end = dst + len;
1661
1662 flags = 0;
1663 while ((regerr = int_regwexec(rp, ip, NSUB, rm, flags)) == REG_OK) {
1664 /* Copy text preceding match */
1665 if (op + (i = rm[0].rm_sp - ip) >= end)
1666 OVERFLOW(i)
1667 while (i--)
1668 *op++ = *ip++;
1669
1670 if (iglob == 0 || ++glob == iglob) {
1671 oglob++;
1672 xp = rpl; /* do substitute */
1673 } else
1674 xp = L"&"; /* preserve text */
1675
1676 /* Perform replacement of matched substing */
1677 while ((c = *xp++) != '\0') {
1678 rmp = NULL;
1679 if (c == '&')
1680 rmp = &rm[0];
1681 else if (c == '\\') {
1682 if ('0' <= *xp && *xp <= '9')
1683 rmp = &rm[*xp++ - '0'];
1684 else if (*xp != '\0')
1685 c = *xp++;
1686 }
1687
1688 if (rmp == NULL) { /* Ordinary character. */
1689 *op++ = c;
1690 if (op >= end)
1691 OVERFLOW(1)
1692 } else if (rmp->rm_sp != NULL && rmp->rm_ep != NULL) {
1693 ip = rmp->rm_sp;
1694 if (op + (i = rmp->rm_ep - rmp->rm_sp) >= end)
1695 OVERFLOW(i)
1696 while (i--)
1697 *op++ = *ip++;
1698 }
1699 }
1700
1701 ip = rm[0].rm_ep;
1702 if (*ip == '\0') /* If at end break */
1703 break;
1704 else if (rm[0].rm_sp == rm[0].rm_ep) {
1705 /* If empty match copy next char */
1706 *op++ = *ip++;
1707 if (op >= end)
1708 OVERFLOW(1)
1709 }
1710 flags = REG_NOTBOL;
1711 }
1712
1713 if (regerr != REG_OK && regerr != REG_NOMATCH)
1714 return (regerr);
1715
1716 /* Copy rest of text */
1717 if (op + (i = wcslen(ip)) >= end)
1718 OVERFLOW(i)
1719 while (i--)
1720 *op++ = *ip++;
1721 *op++ = '\0';
1722
1723 if ((*dstp = dst = (wchar_t *)realloc(odst = dst,
1724 sizeof (wchar_t) * (size_t)(op - dst))) == NULL) {
1725 nospace:
1726 free(odst);
1727 return (REG_ESPACE);
1728 }
1729
1730 *globp = oglob;
1731
1732 return ((oglob == 0) ? REG_NOMATCH : REG_OK);
1733 }
1734