1 %{
2 /*
3 * CDDL HEADER START
4 *
5 * The contents of this file are subject to the terms of the
6 * Common Development and Distribution License, Version 1.0 only
7 * (the "License"). You may not use this file except in compliance
8 * with the License.
9 *
10 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
11 * or http://www.opensolaris.org/os/licensing.
12 * See the License for the specific language governing permissions
13 * and limitations under the License.
14 *
15 * When distributing Covered Code, include this CDDL HEADER in each
16 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
17 * If applicable, add the following below this CDDL HEADER, with the
18 * fields enclosed by brackets "[]" replaced with your own identifying
19 * information: Portions Copyright [yyyy] [name of copyright owner]
20 *
21 * CDDL HEADER END
22 */
23 %}
24 /*
25 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
26 * Use is subject to license terms.
27 */
28
29 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
30 /* All Rights Reserved */
31
32 /* Copyright (c) 1987, 1988 Microsoft Corporation */
33 /* All Rights Reserved */
34
35 /*
36 * Copyright 2013 Damian Bogel. All rights reserved.
37 */
38
39 /*
40 * egrep -- print lines containing (or not containing) a regular expression
41 *
42 * status returns:
43 * 0 - ok, and some matches
44 * 1 - ok, but no matches
45 * 2 - some error; matches irrelevant
46 */
47 %token CHAR MCHAR DOT MDOT CCL NCCL MCCL NMCCL OR CAT STAR PLUS QUEST
48 %left OR
49 %left CHAR MCHAR DOT CCL NCCL MCCL NMCCL '('
50 %left CAT
51 %left STAR PLUS QUEST
52
53 %{
54 #include <stdio.h>
55 #include <ctype.h>
56 #include <memory.h>
57 #include <wchar.h>
58 #include <wctype.h>
59 #include <widec.h>
60 #include <stdlib.h>
61 #include <limits.h>
62 #include <locale.h>
63
64 #define STDIN_FILENAME gettext("(standard input)")
65
66 #define BLKSIZE 512 /* size of reported disk blocks */
67 #define EBUFSIZ 8192
68 #define MAXLIN 350
69 #define NCHARS 256
70 #define MAXPOS 4000
71 #define NSTATES 64
72 #define FINAL -1
73 #define RIGHT '\n' /* serves as record separator and as $ */
74 #define LEFT '\n' /* beginning of line */
75 int gotofn[NSTATES][NCHARS];
76 int state[NSTATES];
77 int out[NSTATES];
78 int line = 1;
79 int *name;
80 int *left;
81 int *right;
82 int *parent;
83 int *foll;
84 int *positions;
85 char *chars;
86 wchar_t *lower;
87 wchar_t *upper;
88 int maxlin, maxclin, maxwclin, maxpos;
89 int nxtpos = 0;
90 int inxtpos;
91 int nxtchar = 0;
92 int *tmpstat;
93 int *initstat;
94 int istat;
95 int nstate = 1;
96 int xstate;
97 int count;
98 int icount;
99 char *input;
100
101
102 wchar_t lyylval;
103 wchar_t nextch();
104 wchar_t maxmin();
105 int compare();
106 void overflo();
107
108 char reinit = 0;
109
110 long long lnum;
111 int bflag;
112 int cflag;
113 int eflag;
114 int fflag;
115 int Hflag;
116 int hflag;
117 int iflag;
118 int lflag;
119 int nflag;
120 int qflag;
121 int vflag;
122 int nfile;
123 long long blkno;
124 long long tln;
125 int nsucc;
126 int badbotch;
127 extern char *optarg;
128 extern int optind;
129
130 int f;
131 FILE *expfile;
132 %}
133
134 %%
135 s: t
136 {
137 unary(FINAL, $1);
138 line--;
139 }
140 ;
141 t: b r
142 { $$ = node(CAT, $1, $2); }
143 | OR b r OR
144 { $$ = node(CAT, $2, $3); }
145 | OR b r
146 { $$ = node(CAT, $2, $3); }
147 | b r OR
148 { $$ = node(CAT, $1, $2); }
149 ;
150 b:
151 { /* if(multibyte)
152 $$ = mdotenter();
153 else */
154 $$ = enter(DOT);
155 $$ = unary(STAR, $$);
156 }
157 ;
158 r: CHAR
159 { $$ = iflag && isalpha($1) ?
160 node(OR, enter(tolower($1)), enter(toupper($1))) : enter($1); }
161 | MCHAR
162 { $$ = (iflag && iswalpha(lyylval)) ?
163 node(OR, mchar(towlower(lyylval)), mchar(towupper(lyylval))) :
164 mchar(lyylval); }
165 | DOT
166 { if(multibyte)
167 $$ = mdotenter();
168 else
169 $$ = enter(DOT);
170 }
171 | CCL
172 { $$ = cclenter(CCL); }
173 | NCCL
174 { $$ = cclenter(NCCL); }
175 | MCCL
176 { $$ = ccl(CCL); }
177 | NMCCL
178 { $$ = ccl(NCCL); }
179 ;
180
181 r: r OR r
182 { $$ = node(OR, $1, $3); }
183 | r r %prec CAT
184 { $$ = node(CAT, $1, $2); }
185 | r STAR
186 { $$ = unary(STAR, $1); }
187 | r PLUS
188 { $$ = unary(PLUS, $1); }
189 | r QUEST
190 { $$ = unary(QUEST, $1); }
191 | '(' r ')'
192 { $$ = $2; }
193 | error
194 ;
195
196 %%
197 void add(int *, int);
198 void clearg(void);
199 void execute(char *);
200 void follow(int);
201 int mgetc(void);
202 void synerror(void);
203
204
205 void
yyerror(char * s)206 yyerror(char *s)
207 {
208 fprintf(stderr, "egrep: %s\n", s);
209 exit(2);
210 }
211
212 int
yylex(void)213 yylex(void)
214 {
215 extern int yylval;
216 int cclcnt, x, ccount, oldccount;
217 wchar_t c, lc;
218
219 c = nextch();
220 switch(c) {
221 case '^':
222 yylval = LEFT;
223 return(CHAR);
224 case '$':
225 c = RIGHT;
226 goto defchar;
227 case '|': return (OR);
228 case '*': return (STAR);
229 case '+': return (PLUS);
230 case '?': return (QUEST);
231 case '(': return (c);
232 case ')': return (c);
233 case '.': return(DOT);
234 case '\0': return (0);
235 case RIGHT: return (OR);
236 case '[':
237 x = (multibyte ? MCCL : CCL);
238 cclcnt = 0;
239 count = nxtchar++;
240 if ((c = nextch()) == '^') {
241 x = (multibyte ? NMCCL : NCCL);
242 c = nextch();
243 }
244 lc = 0;
245 do {
246 if (iflag && iswalpha(c))
247 c = towlower(c);
248 if (c == '\0') synerror();
249 if (c == '-' && cclcnt > 0 && lc != 0) {
250 if ((c = nextch()) != 0) {
251 if(c == ']') {
252 chars[nxtchar++] = '-';
253 cclcnt++;
254 break;
255 }
256 if (iflag && iswalpha(c))
257 c = towlower(c);
258 if (!multibyte ||
259 (c & WCHAR_CSMASK) == (lc & WCHAR_CSMASK) &&
260 lc < c &&
261 !iswcntrl(c) && !iswcntrl(lc)) {
262 if (nxtchar >= maxclin)
263 if (allocchars() == 0)
264 overflo();
265 chars[nxtchar++] = '-';
266 cclcnt++;
267 }
268 }
269 }
270 ccount = oldccount = nxtchar;
271 if(ccount + MB_LEN_MAX >= maxclin)
272 if(allocchars() == 0)
273 overflo();
274 ccount += wctomb(&chars[ccount], c);
275 cclcnt += ccount - oldccount;
276 nxtchar += ccount - oldccount;
277 lc = c;
278 } while ((c = nextch()) != ']');
279 chars[count] = cclcnt;
280 return(x);
281
282 case '\\':
283 if ((c = nextch()) == '\0') synerror();
284 defchar:
285 default:
286 if (c <= 0177) {
287 yylval = c;
288 return (CHAR);
289 } else {
290 lyylval = c;
291 return (MCHAR);
292 }
293 }
294 }
295
296 wchar_t
nextch(void)297 nextch(void)
298 {
299 wchar_t lc;
300 char multic[MB_LEN_MAX];
301 int length, d;
302 if (fflag) {
303 if ((length = _mbftowc(multic, &lc, mgetc, &d)) < 0)
304 synerror();
305 if(length == 0)
306 lc = '\0';
307 }
308 else {
309 if((length = mbtowc(&lc, input, MB_LEN_MAX)) == -1)
310 synerror();
311 if(length == 0)
312 return(0);
313 input += length;
314 }
315 return(lc);
316 }
317
318 int
mgetc(void)319 mgetc(void)
320 {
321 return(getc(expfile));
322 }
323
324 void
synerror(void)325 synerror(void)
326 {
327 fprintf(stderr, gettext("egrep: syntax error\n"));
328 exit(2);
329 }
330
331 int
enter(int x)332 enter(int x)
333 {
334 if(line >= maxlin)
335 if(alloctree() == 0)
336 overflo();
337 name[line] = x;
338 left[line] = 0;
339 right[line] = 0;
340 return(line++);
341 }
342
343 int
cclenter(int x)344 cclenter(int x)
345 {
346 int linno;
347 linno = enter(x);
348 right[linno] = count;
349 return (linno);
350 }
351
352 int
node(int x,int l,int r)353 node(int x, int l, int r)
354 {
355 if(line >= maxlin)
356 if(alloctree() == 0)
357 overflo();
358 name[line] = x;
359 left[line] = l;
360 right[line] = r;
361 parent[l] = line;
362 parent[r] = line;
363 return(line++);
364 }
365
366 int
unary(int x,int d)367 unary(int x, int d)
368 {
369 if(line >= maxlin)
370 if(alloctree() == 0)
371 overflo();
372 name[line] = x;
373 left[line] = d;
374 right[line] = 0;
375 parent[d] = line;
376 return(line++);
377 }
378
379 int
allocchars(void)380 allocchars(void)
381 {
382 maxclin += MAXLIN;
383 if((chars = realloc(chars, maxclin)) == (char *)0)
384 return 0;
385 return 1;
386 }
387
388 int
alloctree(void)389 alloctree(void)
390 {
391 maxlin += MAXLIN;
392 if((name = (int *)realloc(name, maxlin*sizeof(int))) == (int *)0)
393 return 0;
394 if((left = (int *)realloc(left, maxlin*sizeof(int))) == (int *)0)
395 return 0;
396 if((right = (int *)realloc(right, maxlin*sizeof(int))) == (int *)0)
397 return 0;
398 if((parent = (int *)realloc(parent, maxlin*sizeof(int))) == (int *)0)
399 return 0;
400 if((foll = (int *)realloc(foll, maxlin*sizeof(int))) == (int *)0)
401 return 0;
402 if((tmpstat = (int *)realloc(tmpstat, maxlin*sizeof(int))) == (int *)0)
403 return 0;
404 if((initstat = (int *)realloc(initstat, maxlin*sizeof(int))) == (int *)0)
405 return 0;
406 return 1;
407 }
408
409 void
overflo(void)410 overflo(void)
411 {
412 fprintf(stderr, gettext("egrep: regular expression too long\n"));
413 exit(2);
414 }
415
416 void
cfoll(int v)417 cfoll(int v)
418 {
419 int i;
420 if (left[v] == 0) {
421 count = 0;
422 for (i=1; i<=line; i++) tmpstat[i] = 0;
423 follow(v);
424 add(foll, v);
425 }
426 else if (right[v] == 0) cfoll(left[v]);
427 else {
428 cfoll(left[v]);
429 cfoll(right[v]);
430 }
431 }
432
433 void
cgotofn(void)434 cgotofn(void)
435 {
436 int i;
437 count = 0;
438 inxtpos = nxtpos;
439 for (i=3; i<=line; i++) tmpstat[i] = 0;
440 if (cstate(line-1)==0) {
441 tmpstat[line] = 1;
442 count++;
443 out[1] = 1;
444 }
445 for (i=3; i<=line; i++) initstat[i] = tmpstat[i];
446 count--; /*leave out position 1 */
447 icount = count;
448 tmpstat[1] = 0;
449 add(state, 1);
450 istat = nxtst(1, LEFT);
451 }
452
453 int
nxtst(int s,int c)454 nxtst(int s, int c)
455 {
456 int i, num, k;
457 int pos, curpos, number, newpos;
458 num = positions[state[s]];
459 count = icount;
460 for (i=3; i<=line; i++) tmpstat[i] = initstat[i];
461 pos = state[s] + 1;
462 for (i=0; i<num; i++) {
463 curpos = positions[pos];
464 k = name[curpos];
465 if (k >= 0)
466 if (
467 (k == c)
468 || (k == DOT && dot(c))
469 || (k == MDOT && mdot(c))
470 || (k == CCL && dot(c) && member(c, right[curpos], 1))
471 || (k == NCCL && dot(c) && member(c, right[curpos], 0))
472 || (k == MCCL && mdot(c) && member(c, right[curpos], 1))
473 ) {
474 number = positions[foll[curpos]];
475 newpos = foll[curpos] + 1;
476 for (k=0; k<number; k++) {
477 if (tmpstat[positions[newpos]] != 1) {
478 tmpstat[positions[newpos]] = 1;
479 count++;
480 }
481 newpos++;
482 }
483 }
484 pos++;
485 }
486 if (notin(nstate)) {
487 if (++nstate >= NSTATES) {
488 for (i=1; i<NSTATES; i++)
489 out[i] = 0;
490 for (i=1; i<NSTATES; i++)
491 for (k=0; k<NCHARS; k++)
492 gotofn[i][k] = 0;
493 nstate = 1;
494 nxtpos = inxtpos;
495 reinit = 1;
496 add(state, nstate);
497 if (tmpstat[line] == 1) out[nstate] = 1;
498 return nstate;
499 }
500 add(state, nstate);
501 if (tmpstat[line] == 1) out[nstate] = 1;
502 gotofn[s][c] = nstate;
503 return nstate;
504 }
505 else {
506 gotofn[s][c] = xstate;
507 return xstate;
508 }
509 }
510
511
512 int
cstate(int v)513 cstate(int v)
514 {
515 int b;
516 if (left[v] == 0) {
517 if (tmpstat[v] != 1) {
518 tmpstat[v] = 1;
519 count++;
520 }
521 return(1);
522 }
523 else if (right[v] == 0) {
524 if (cstate(left[v]) == 0) return (0);
525 else if (name[v] == PLUS) return (1);
526 else return (0);
527 }
528 else if (name[v] == CAT) {
529 if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0);
530 else return (1);
531 }
532 else { /* name[v] == OR */
533 b = cstate(right[v]);
534 if (cstate(left[v]) == 0 || b == 0) return (0);
535 else return (1);
536 }
537 }
538
539
540 int
dot(int c)541 dot(int c)
542 {
543 if(multibyte && c >= 0200 && (!iscntrl(c) || c == SS2 && eucw2 || c == SS3 && eucw3))
544 return(0);
545 if(c == RIGHT || c == LEFT)
546 return(0);
547 return(1);
548 }
549
550 int
mdot(int c)551 mdot(int c)
552 {
553 if(c >= 0200 && !iscntrl(c))
554 return(1);
555 return(0);
556 }
557
558 int
member(int symb,int set,int torf)559 member(int symb, int set, int torf)
560 {
561 int i, num, pos, c, lc;
562 if(symb == RIGHT || symb == LEFT)
563 return(0);
564 num = chars[set];
565 pos = set + 1;
566 lc = 0;
567 if(iflag)
568 symb = tolower(symb);
569 for (i=0; i<num; i++) {
570 c = (unsigned char)chars[pos++];
571 if(c == '-' && lc != 0 && ++i < num) {
572 c = (unsigned char)chars[pos++];
573 if(lc <= symb && symb <= c)
574 return(torf);
575 }
576 if (symb == c)
577 return (torf);
578 lc = c;
579 }
580 return(!torf);
581 }
582
583 int
notin(int n)584 notin(int n)
585 {
586 int i, j, pos;
587 for (i=1; i<=n; i++) {
588 if (positions[state[i]] == count) {
589 pos = state[i] + 1;
590 for (j=0; j < count; j++)
591 if (tmpstat[positions[pos++]] != 1) goto nxt;
592 xstate = i;
593 return (0);
594 }
595 nxt: ;
596 }
597 return (1);
598 }
599
600 void
add(int * array,int n)601 add(int *array, int n)
602 {
603 int i;
604 if (nxtpos + count >= maxpos) {
605 maxpos += MAXPOS + count;
606 if((positions = (int *)realloc(positions, maxpos *sizeof(int))) == (int *)0)
607 overflo();
608 }
609 array[n] = nxtpos;
610 positions[nxtpos++] = count;
611 for (i=3; i <= line; i++) {
612 if (tmpstat[i] == 1) {
613 positions[nxtpos++] = i;
614 }
615 }
616 }
617
618 void
follow(int v)619 follow(int v)
620 {
621 int p;
622 if (v == line) return;
623 p = parent[v];
624 switch(name[p]) {
625 case STAR:
626 case PLUS: cstate(v);
627 follow(p);
628 return;
629
630 case OR:
631 case QUEST: follow(p);
632 return;
633
634 case CAT: if (v == left[p]) {
635 if (cstate(right[p]) == 0) {
636 follow(p);
637 return;
638 }
639 }
640 else follow(p);
641 return;
642 case FINAL: if (tmpstat[line] != 1) {
643 tmpstat[line] = 1;
644 count++;
645 }
646 return;
647 }
648 }
649
650 #define USAGE "[ -bchHilnsqv ] [ -e exp ] [ -f file ] [ strings ] [ file ] ..."
651
652 int
main(int argc,char ** argv)653 main(int argc, char **argv)
654 {
655 char c;
656 char nl = '\n';
657 int errflag = 0;
658
659 (void)setlocale(LC_ALL, "");
660
661 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */
662 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't. */
663 #endif
664 (void) textdomain(TEXT_DOMAIN);
665
666 while((c = getopt(argc, argv, "ybcie:f:Hhlnvsq")) != -1)
667 switch(c) {
668
669 case 'b':
670 bflag++;
671 continue;
672
673 case 'c':
674 cflag++;
675 continue;
676
677 case 'e':
678 eflag++;
679 input = optarg;
680 continue;
681
682 case 'f':
683 fflag++;
684 expfile = fopen(optarg, "r");
685 if(expfile == NULL) {
686 fprintf(stderr,
687 gettext("egrep: can't open %s\n"), optarg);
688 exit(2);
689 }
690 continue;
691
692 case 'H':
693 if (!lflag) /* H is excluded by l as in GNU grep */
694 Hflag++;
695 hflag = 0; /* H excludes h */
696 continue;
697
698 case 'h':
699 hflag++;
700 Hflag = 0; /* h excludes H */
701 continue;
702
703 case 'y':
704 case 'i':
705 iflag++;
706 continue;
707
708 case 'l':
709 lflag++;
710 Hflag = 0; /* l excludes H */
711 continue;
712
713 case 'n':
714 nflag++;
715 continue;
716
717 case 'q':
718 case 's': /* Solaris: legacy option */
719 qflag++;
720 continue;
721
722 case 'v':
723 vflag++;
724 continue;
725
726 case '?':
727 errflag++;
728 }
729 if (errflag || ((argc <= 0) && !fflag && !eflag)) {
730 fprintf(stderr, gettext("usage: egrep %s\n"), gettext(USAGE));
731 exit(2);
732 }
733 if(!eflag && !fflag) {
734 input = argv[optind];
735 optind++;
736 }
737
738 argc -= optind;
739 argv = &argv[optind];
740
741 /* allocate initial space for arrays */
742 if((name = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
743 overflo();
744 if((left = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
745 overflo();
746 if((right = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
747 overflo();
748 if((parent = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
749 overflo();
750 if((foll = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
751 overflo();
752 if((tmpstat = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
753 overflo();
754 if((initstat = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
755 overflo();
756 if((chars = (char *)malloc(MAXLIN)) == (char *)0)
757 overflo();
758 if((lower = (wchar_t *)malloc(MAXLIN*sizeof(wchar_t))) == (wchar_t *)0)
759 overflo();
760 if((upper = (wchar_t *)malloc(MAXLIN*sizeof(wchar_t))) == (wchar_t *)0)
761 overflo();
762 if((positions = (int *)malloc(MAXPOS*sizeof(int))) == (int *)0)
763 overflo();
764 maxlin = MAXLIN;
765 maxclin = MAXLIN;
766 maxwclin = MAXLIN;
767 maxpos = MAXPOS;
768
769 yyparse();
770
771 cfoll(line-1);
772 cgotofn();
773 nfile = argc;
774 if (argc<=0) {
775 execute(0);
776 }
777 else while (--argc >= 0) {
778 if (reinit == 1) clearg();
779 execute(*argv++);
780 }
781 return (badbotch ? 2 : nsucc==0);
782 }
783
784 void
execute(char * file)785 execute(char *file)
786 {
787 char *p;
788 int cstat;
789 wchar_t c;
790 int t;
791 long count;
792 long count1, count2;
793 long nchars;
794 int succ;
795 char *ptr, *ptrend, *lastptr;
796 char *buf;
797 long lBufSiz;
798 FILE *f;
799 int nlflag;
800
801 lBufSiz = EBUFSIZ;
802 if ((buf = malloc (lBufSiz + EBUFSIZ)) == NULL) {
803 exit (2); /* out of memory - BAIL */
804 }
805
806 if (file) {
807 if ((f = fopen(file, "r")) == NULL) {
808 fprintf(stderr,
809 gettext("egrep: can't open %s\n"), file);
810 badbotch=1;
811 return;
812 }
813 } else {
814 f = stdin;
815 file = STDIN_FILENAME;
816 }
817 lnum = 1;
818 tln = 0;
819 if((count = read(fileno(f), buf, EBUFSIZ)) <= 0) {
820 fclose(f);
821
822 if (cflag && !qflag) {
823 if (Hflag || (nfile > 1 && !hflag))
824 fprintf(stdout, "%s:", file);
825 fprintf(stdout, "%lld\n", tln);
826 }
827 return;
828 }
829
830 blkno = count;
831 ptr = buf;
832 for(;;) {
833 if((ptrend = memchr(ptr, '\n', buf + count - ptr)) == NULL) {
834 /*
835 move the unused partial record to the head of the buffer
836 */
837 if (ptr > buf) {
838 count = buf + count - ptr;
839 memmove (buf, ptr, count);
840 ptr = buf;
841 }
842
843 /*
844 Get a bigger buffer if this one is full
845 */
846 if(count > lBufSiz) {
847 /*
848 expand the buffer
849 */
850 lBufSiz += EBUFSIZ;
851 if ((buf = realloc (buf, lBufSiz + EBUFSIZ)) == NULL) {
852 exit (2); /* out of memory - BAIL */
853 }
854
855 ptr = buf;
856 }
857
858 p = buf + count;
859 if((count1 = read(fileno(f), p, EBUFSIZ)) > 0) {
860 count += count1;
861 blkno += count1;
862 continue;
863 }
864 ptrend = ptr + count;
865 nlflag = 0;
866 } else
867 nlflag = 1;
868 *ptrend = '\n';
869 p = ptr;
870 lastptr = ptr;
871 cstat = istat;
872 succ = 0;
873 for(;;) {
874 if(out[cstat]) {
875 if(multibyte && p > ptr) {
876 wchar_t wchar;
877 int length;
878 char *endptr = p;
879 p = lastptr;
880 while(p < endptr) {
881 length = mbtowc(&wchar, p, MB_LEN_MAX);
882 if(length <= 1)
883 p++;
884 else
885 p += length;
886 }
887 if(p == endptr) {
888 succ = !vflag;
889 break;
890 }
891 cstat = 1;
892 length = mbtowc(&wchar, lastptr, MB_LEN_MAX);
893 if(length <= 1)
894 lastptr++;
895 else
896 lastptr += length;
897 p = lastptr;
898 continue;
899 }
900 succ = !vflag;
901 break;
902 }
903 c = (unsigned char)*p++;
904 if ((t = gotofn[cstat][c]) == 0)
905 cstat = nxtst(cstat, c);
906 else
907 cstat = t;
908 if(c == RIGHT) {
909 if(out[cstat]) {
910 succ = !vflag;
911 break;
912 }
913 succ = vflag;
914 break;
915 }
916 }
917 if (succ) {
918 nsucc = 1;
919 if (lflag || qflag) {
920 if (!qflag)
921 (void) printf("%s\n", file);
922 fclose(f);
923 return;
924 }
925 if (cflag) {
926 tln++;
927 } else {
928 if (Hflag || (nfile > 1 && !hflag))
929 printf("%s:", file);
930 if (bflag) {
931 nchars = blkno - (buf + count - ptrend) - 2;
932 if(nlflag)
933 nchars++;
934 printf("%lld:", nchars/BLKSIZE);
935 }
936 if (nflag)
937 printf("%lld:", lnum);
938 if(nlflag)
939 nchars = ptrend - ptr + 1;
940 else
941 nchars = ptrend - ptr;
942 fwrite(ptr, (size_t)1, (size_t)nchars, stdout);
943 }
944 }
945 if(!nlflag)
946 break;
947 ptr = ptrend + 1;
948 if(ptr >= buf + count) {
949 ptr = buf;
950 if((count = read(fileno(f), buf, EBUFSIZ)) <= 0)
951 break;
952 blkno += count;
953 }
954 lnum++;
955 if (reinit == 1)
956 clearg();
957 }
958 fclose(f);
959 if (cflag && !qflag) {
960 if (Hflag || (nfile > 1 && !hflag))
961 printf("%s:", file);
962 printf("%lld\n", tln);
963 }
964 }
965
966 void
clearg(void)967 clearg(void)
968 {
969 int i, k;
970 for (i=1; i<=nstate; i++)
971 out[i] = 0;
972 for (i=1; i<=nstate; i++)
973 for (k=0; k<NCHARS; k++)
974 gotofn[i][k] = 0;
975 nstate = 1;
976 nxtpos = inxtpos;
977 reinit = 0;
978 count = 0;
979 for (i=3; i<=line; i++) tmpstat[i] = 0;
980 if (cstate(line-1)==0) {
981 tmpstat[line] = 1;
982 count++;
983 out[1] = 1;
984 }
985 for (i=3; i<=line; i++) initstat[i] = tmpstat[i];
986 count--; /*leave out position 1 */
987 icount = count;
988 tmpstat[1] = 0;
989 add(state, 1);
990 istat = nxtst(1, LEFT);
991 }
992
993 int
mdotenter(void)994 mdotenter(void)
995 {
996 int i, x1, x2;
997 x1 = enter(DOT);
998 x2 = enter(MDOT);
999 for(i = 1; i < (int) eucw1; i++)
1000 x2 = node(CAT, x2, enter(MDOT));
1001 x1 = node(OR, x1, x2);
1002 if(eucw2) {
1003 x2 = enter('\216');
1004 for(i = 1; i <= (int) eucw2; i++)
1005 x2 = node(CAT, x2, enter(MDOT));
1006 x1 = node(OR, x1, x2);
1007 }
1008 if(eucw3) {
1009 x2 = enter('\217');
1010 for(i = 1; i <= (int) eucw3; i++)
1011 x2 = node(CAT, x2, enter(MDOT));
1012 x1 = node(OR, x1, x2);
1013 }
1014 return(x1);
1015 }
1016
1017 int
mchar(wchar_t c)1018 mchar(wchar_t c)
1019 {
1020 char multichar[MB_LEN_MAX+1];
1021 char *p;
1022 int x1, lc, length;
1023
1024 length = wctomb(multichar, c);
1025 p = multichar;
1026 *(p + length) = '\0';
1027 x1 = enter((unsigned char)*p++);
1028 while(lc = (unsigned char)*p++)
1029 x1 = node(CAT, x1, enter(lc));
1030 return(x1);
1031 }
1032
1033 int
ccl(int type)1034 ccl(int type)
1035 {
1036 wchar_t c, lc;
1037 char multic1[MB_LEN_MAX];
1038 char multic2[MB_LEN_MAX];
1039 int x1, x2, length, current, last, cclcnt;
1040 x2 = 0;
1041 current = 0;
1042 last = genrange(type);
1043 nxtchar = count + 1;
1044 cclcnt = 0;
1045 /* create usual character class for single byte characters */
1046 while(current <= last && (isascii(c = lower[current]) || c <= 0377 && iscntrl(c))) {
1047 cclcnt++;
1048 chars[nxtchar++] = c;
1049 if(lower[current] != upper[current]) {
1050 chars[nxtchar++] = '-';
1051 chars[nxtchar++] = upper[current];
1052 cclcnt += 2;
1053 }
1054 current++;
1055 }
1056
1057 if(cclcnt)
1058 chars[count] = cclcnt;
1059 else
1060 nxtchar = count;
1061 if(current > 0)
1062 /* single byte part of character class */
1063 x2 = cclenter(type);
1064 else if(type == NCCL)
1065 /* all single byte characters match */
1066 x2 = enter(DOT);
1067 while(current <= last) {
1068 if(upper[current] == lower[current])
1069 x1 = mchar(lower[current]);
1070 else {
1071 length = wctomb(multic1, lower[current]);
1072 wctomb(multic2, upper[current]);
1073 x1 = range((unsigned char *)multic1,
1074 (unsigned char *)multic2, length);
1075 }
1076 if(x2)
1077 x2 = node(OR, x2, x1);
1078 else
1079 x2 = x1;
1080 current++;
1081 }
1082 return x2;
1083 }
1084
1085 int
range(unsigned char * p1,unsigned char * p2,int length)1086 range(unsigned char *p1, unsigned char *p2, int length)
1087 {
1088 char multic[MB_LEN_MAX+1];
1089 char *p;
1090 int i, x1, x2;
1091 if(length == 1)
1092 return(classenter(*p1, *p2));
1093 if(p1[0] == p2[0])
1094 return(node(CAT, enter(p1[0]), range(p1+1, p2+1, length - 1)));
1095 p = multic;
1096 for(i = 1; i < length; i++)
1097 *p++ = 0377;
1098 x1 = node(CAT, enter(p1[0]),
1099 range(p1+1, (unsigned char *)multic, length - 1));
1100 if((unsigned char)(p1[0] + 1) < p2[0]) {
1101 x2 = classenter(p1[0] + 1, p2[0] - 1);
1102 for(i = 1; i < length; i++)
1103 x2 = node(CAT, x2, enter(MDOT));
1104 x1 = node(OR, x1, x2);
1105 }
1106 p = multic;
1107 for(i = 1; i < length; i++)
1108 *p++ = 0200;
1109 x2 = node(CAT, enter(p2[0]),
1110 range((unsigned char *)multic, p2+1, length - 1));
1111 return node(OR, x1, x2);
1112 }
1113
1114 int
classenter(int x1,int x2)1115 classenter(int x1, int x2)
1116 {
1117 static int max, min;
1118 if(!max) {
1119 int i;
1120 for(i = 0200; i <= 0377; i++)
1121 if(!iscntrl(i))
1122 break;
1123 min = i;
1124 for(i = 0377; i >= 0200; i--)
1125 if(!iscntrl(i))
1126 break;
1127 max = i;
1128 }
1129 if(x1 <= min && x2 >= max)
1130 return enter(MDOT);
1131 if(nxtchar + 4 >= maxclin)
1132 if(allocchars() == 0)
1133 overflo();
1134 count = nxtchar++;
1135 chars[nxtchar++] = x1;
1136 chars[nxtchar++] = '-';
1137 chars[nxtchar++] = x2;
1138 chars[count] = 3;
1139 return cclenter(MCCL);
1140 }
1141
1142 int
genrange(int type)1143 genrange(int type)
1144 {
1145 char *p, *endp;
1146 int current, nel, i, last, length;
1147 wchar_t c, lc;
1148
1149 current = 0;
1150 p = &chars[count+1];
1151 endp = &chars[count+1] + chars[count];
1152 lc = 0;
1153
1154 /* convert character class into union of ranges */
1155 while(p < endp) {
1156 length = mbtowc(&c, p, MB_LEN_MAX);
1157 p += length;
1158 if(c == '-' && lc != 0) {
1159 length = mbtowc(&c, p, MB_LEN_MAX);
1160 upper[current-1] = c;
1161 p += length;
1162 } else {
1163 lower[current] = c;
1164 upper[current++] = c;
1165 }
1166 lc = c;
1167 }
1168 nel = current;
1169 /* sort lower and upper bounds of ranges */
1170 qsort((char *)lower, nel, sizeof(wchar_t), compare);
1171 qsort((char *)upper, nel, sizeof(wchar_t), compare);
1172 last = current - 1;
1173 current = 0;
1174 /* combine overlapping or adjacent ranges */
1175 for(i = 0; i < last; i++)
1176 if(upper[i] >= lower[i+1] - 1)
1177 upper[current] = upper[i+1];
1178 else {
1179 lower[++current] = lower[i+1];
1180 upper[current] = upper[i+1];
1181 }
1182 if(type == NCCL) {
1183 /* find complement of character class */
1184 int j, next;
1185 i = 0;
1186 while(i <= current && isascii(c=lower[i]) || c <= 0377 && iscntrl(c))
1187 i++;
1188 if(i > current) {
1189 /* match all multibyte characters */
1190 if(eucw2) {
1191 lower[i] = maxmin(WCHAR_CS2, 0);
1192 upper[i++] = maxmin(WCHAR_CS2, 1);
1193 }
1194 if(eucw3) {
1195 lower[i] = maxmin(WCHAR_CS3, 0);
1196 upper[i++] = maxmin(WCHAR_CS3, 1);
1197 }
1198 lower[i] = maxmin(WCHAR_CS1, 0);
1199 upper[i++] = maxmin(WCHAR_CS1, 1);
1200 return i - 1;
1201 }
1202 next = current + 1;
1203 if(next + current + 2 >= maxwclin) {
1204 maxwclin += MAXLIN + next + current + 2;
1205 if((lower = (wchar_t *)realloc(lower, maxwclin *sizeof(wchar_t))) == (wchar_t *)0 ||
1206 (upper = (wchar_t *)realloc(upper, maxwclin * sizeof(wchar_t))) == (wchar_t *)0)
1207 overflo();
1208 }
1209 if(eucw2 && lower[i] > maxmin(WCHAR_CS2, 0)) {
1210 lower[next] = maxmin(WCHAR_CS2, 0);
1211 if((lower[i] & WCHAR_CSMASK) != WCHAR_CS2) {
1212 upper[next++] = maxmin(WCHAR_CS2, 1);
1213 if((lower[i] & WCHAR_CSMASK) == WCHAR_CS1 && eucw3) {
1214 lower[next] = maxmin(WCHAR_CS3, 0);
1215 upper[next++] = maxmin(WCHAR_CS3, 1);
1216 }
1217 if(lower[i] > maxmin(lower[i] & WCHAR_CSMASK, 0)) {
1218 lower[next] = maxmin(lower[i] & WCHAR_CSMASK, 0);
1219 upper[next++] = lower[i] - 1;
1220 }
1221 } else
1222 upper[next++] = lower[i] - 1;
1223 } else if(lower[i] > maxmin(lower[i] & WCHAR_CSMASK, 0)) {
1224 lower[next] = maxmin(lower[i] & WCHAR_CSMASK, 0);
1225 upper[next++] = lower[i] - 1;
1226 }
1227 for(j = i; j < current; j++) {
1228 if(upper[j] < maxmin(upper[j] & WCHAR_CSMASK, 1)) {
1229 lower[next] = upper[j] + 1;
1230 if((upper[j] & WCHAR_CSMASK) != (lower[j+1] & WCHAR_CSMASK)) {
1231 upper[next++] = maxmin(upper[j] & WCHAR_CSMASK, 1);
1232 if(eucw3 && (upper[j] & WCHAR_CSMASK) == WCHAR_CS2 && (lower[j+1] & WCHAR_CSMASK) == WCHAR_CS1) {
1233 lower[next] = maxmin(WCHAR_CS3, 0);
1234 upper[next++] = maxmin(WCHAR_CS3, 1);
1235 }
1236 if(lower[j+1] > maxmin(lower[j+1] & WCHAR_CSMASK, 0)) {
1237 lower[next] = maxmin(lower[j+1] & WCHAR_CSMASK, 0);
1238 upper[next++] = lower[j+1] - 1;
1239 }
1240 } else
1241 upper[next++] = lower[j+1] - 1;
1242 } else if(lower[j+1] > maxmin(lower[j+1], 0)) {
1243 lower[next] = maxmin(lower[j+1], 0);
1244 upper[next++] = lower[j+1] - 1;
1245 }
1246 }
1247 if(upper[current] < maxmin(upper[current] & WCHAR_CSMASK, 1)) {
1248 lower[next] = upper[current] + 1;
1249 upper[next++] = maxmin(upper[current] & WCHAR_CSMASK, 1);
1250 }
1251 if((upper[current] & WCHAR_CSMASK) != WCHAR_CS1) {
1252 if((upper[current] & WCHAR_CSMASK) == WCHAR_CS2 && eucw3) {
1253 lower[next] = maxmin(WCHAR_CS3, 0);
1254 upper[next++] = maxmin(WCHAR_CS3, 1);
1255 }
1256 lower[next] = maxmin(WCHAR_CS1, 0);
1257 upper[next++] = maxmin(WCHAR_CS1, 1);
1258 }
1259 for(j = current + 1; j < next; j++) {
1260 lower[i] = lower[j];
1261 upper[i++] = upper[j];
1262 }
1263 current = i - 1;
1264 }
1265 return(current);
1266 }
1267
1268 int
compare(wchar_t * c,wchar_t * d)1269 compare(wchar_t *c, wchar_t *d)
1270 {
1271 if(*c < *d)
1272 return -1;
1273 if(*c == *d)
1274 return 0;
1275 return 1;
1276 }
1277
1278 wchar_t
maxmin(wchar_t c,int flag)1279 maxmin(wchar_t c, int flag)
1280 {
1281 static wchar_t minmax1[2], minmax2[2], minmax3[2];
1282
1283 if(!minmax1[0]) {
1284 /* compute min and max process codes for all code sets */
1285 int length, i;
1286 char multic[MB_LEN_MAX], minmax[2];
1287 for(i = 0377; i >= 0200; i--)
1288 if(!iscntrl(i))
1289 break;
1290 minmax[1] = i;
1291 for(i = 0240; i <= 0377; i++)
1292 if(!iscntrl(i))
1293 break;
1294 minmax[0] = i;
1295 for(i = 0; i <= 1; i++) {
1296 length = MB_LEN_MAX;
1297 while(length--)
1298 multic[length] = minmax[i];
1299 mbtowc(&minmax1[i], multic, MB_LEN_MAX);
1300 if(eucw2) {
1301 multic[0] = SS2;
1302 mbtowc(&minmax2[i], multic, MB_LEN_MAX);
1303 }
1304 if(eucw3) {
1305 multic[0] = SS3;
1306 mbtowc(&minmax3[i], multic, MB_LEN_MAX);
1307 }
1308 }
1309 }
1310 switch(c) {
1311 case WCHAR_CS1: return minmax1[flag];
1312 case WCHAR_CS2: return minmax2[flag];
1313 case WCHAR_CS3: return minmax3[flag];
1314 }
1315
1316 /* NOTREACHED */
1317 return (0);
1318 }
1319