xref: /titanic_50/usr/src/cmd/egrep/egrep.y (revision 894fa7ba71de90ec2b4a468b77d5d20d75299104)
1 %{
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License, Version 1.0 only
7  * (the "License").  You may not use this file except in compliance
8  * with the License.
9  *
10  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
11  * or http://www.opensolaris.org/os/licensing.
12  * See the License for the specific language governing permissions
13  * and limitations under the License.
14  *
15  * When distributing Covered Code, include this CDDL HEADER in each
16  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
17  * If applicable, add the following below this CDDL HEADER, with the
18  * fields enclosed by brackets "[]" replaced with your own identifying
19  * information: Portions Copyright [yyyy] [name of copyright owner]
20  *
21  * CDDL HEADER END
22  */
23 %}
24 /*
25  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
26  * Use is subject to license terms.
27  */
28 
29 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
30 /*	  All Rights Reserved  	*/
31 
32 /*	Copyright (c) 1987, 1988 Microsoft Corporation	*/
33 /*	  All Rights Reserved	*/
34 
35 /*
36  * Copyright 2013 Damian Bogel. All rights reserved.
37  */
38 
39 /*
40  * egrep -- print lines containing (or not containing) a regular expression
41  *
42  *	status returns:
43  *		0 - ok, and some matches
44  *		1 - ok, but no matches
45  *		2 - some error; matches irrelevant
46  */
47 %token CHAR MCHAR DOT MDOT CCL NCCL MCCL NMCCL OR CAT STAR PLUS QUEST
48 %left OR
49 %left CHAR MCHAR DOT CCL NCCL MCCL NMCCL '('
50 %left CAT
51 %left STAR PLUS QUEST
52 
53 %{
54 #include <stdio.h>
55 #include <ctype.h>
56 #include <memory.h>
57 #include <wchar.h>
58 #include <wctype.h>
59 #include <widec.h>
60 #include <stdlib.h>
61 #include <limits.h>
62 #include <locale.h>
63 
64 #define STDIN_FILENAME gettext("(standard input)")
65 
66 #define BLKSIZE 512	/* size of reported disk blocks */
67 #define EBUFSIZ 8192
68 #define MAXLIN 350
69 #define NCHARS 256
70 #define MAXPOS 4000
71 #define NSTATES 64
72 #define FINAL -1
73 #define RIGHT '\n'	/* serves as record separator and as $ */
74 #define LEFT '\n'	/* beginning of line */
75 int gotofn[NSTATES][NCHARS];
76 int state[NSTATES];
77 int out[NSTATES];
78 int line  = 1;
79 int *name;
80 int *left;
81 int *right;
82 int *parent;
83 int *foll;
84 int *positions;
85 char *chars;
86 wchar_t *lower;
87 wchar_t *upper;
88 int maxlin, maxclin, maxwclin, maxpos;
89 int nxtpos = 0;
90 int inxtpos;
91 int nxtchar = 0;
92 int *tmpstat;
93 int *initstat;
94 int istat;
95 int nstate = 1;
96 int xstate;
97 int count;
98 int icount;
99 char *input;
100 
101 
102 wchar_t lyylval;
103 wchar_t nextch();
104 wchar_t maxmin();
105 int compare();
106 void overflo();
107 
108 char reinit = 0;
109 
110 long long lnum;
111 int	bflag;
112 int	cflag;
113 int	eflag;
114 int	fflag;
115 int	Hflag;
116 int	hflag;
117 int	iflag;
118 int	lflag;
119 int	nflag;
120 int	qflag;
121 int	vflag;
122 int	nfile;
123 long long blkno;
124 long long tln;
125 int	nsucc;
126 int	badbotch;
127 extern 	char *optarg;
128 extern 	int optind;
129 
130 int	f;
131 FILE	*expfile;
132 %}
133 
134 %%
135 s:	t
136 		{
137 		  unary(FINAL, $1);
138 		  line--;
139 		}
140 	;
141 t:	b r
142 		{ $$ = node(CAT, $1, $2); }
143 	| OR b r OR
144 		{ $$ = node(CAT, $2, $3); }
145 	| OR b r
146 		{ $$ = node(CAT, $2, $3); }
147 	| b r OR
148 		{ $$ = node(CAT, $1, $2); }
149 	;
150 b:
151 		{ /* if(multibyte)
152 			$$ = mdotenter();
153 		  else */
154 			$$ = enter(DOT);
155 		  $$ = unary(STAR, $$);
156 		}
157 	;
158 r:	CHAR
159 		{ $$ = iflag && isalpha($1) ?
160 		node(OR, enter(tolower($1)), enter(toupper($1))) : enter($1); }
161 	| MCHAR
162 		{ $$ = (iflag && iswalpha(lyylval)) ?
163 		node(OR, mchar(towlower(lyylval)), mchar(towupper(lyylval))) :
164 		mchar(lyylval); }
165 	| DOT
166 		{ if(multibyte)
167 			$$ = mdotenter();
168 		  else
169 			$$ = enter(DOT);
170 		}
171 	| CCL
172 		{ $$ = cclenter(CCL); }
173 	| NCCL
174 		{ $$ = cclenter(NCCL); }
175 	| MCCL
176 		{ $$ = ccl(CCL); }
177 	| NMCCL
178 		{ $$ = ccl(NCCL); }
179 	;
180 
181 r:	r OR r
182 		{ $$ = node(OR, $1, $3); }
183 	| r r %prec CAT
184 		{ $$ = node(CAT, $1, $2); }
185 	| r STAR
186 		{ $$ = unary(STAR, $1); }
187 	| r PLUS
188 		{ $$ = unary(PLUS, $1); }
189 	| r QUEST
190 		{ $$ = unary(QUEST, $1); }
191 	| '(' r ')'
192 		{ $$ = $2; }
193 	| error
194 	;
195 
196 %%
197 void	add(int *, int);
198 void	clearg(void);
199 void	execute(char *);
200 void	follow(int);
201 int	mgetc(void);
202 void	synerror(void);
203 
204 
205 void
206 yyerror(char *s)
207 {
208 	fprintf(stderr, "egrep: %s\n", s);
209 	exit(2);
210 }
211 
212 int
213 yylex(void)
214 {
215 	extern int yylval;
216 	int cclcnt, x, ccount, oldccount;
217 	wchar_t c, lc;
218 
219 	c = nextch();
220 	switch(c) {
221 		case '^':
222 			yylval = LEFT;
223 			return(CHAR);
224 		case '$':
225 			c = RIGHT;
226 			goto defchar;
227 		case '|': return (OR);
228 		case '*': return (STAR);
229 		case '+': return (PLUS);
230 		case '?': return (QUEST);
231 		case '(': return (c);
232 		case ')': return (c);
233 		case '.': return(DOT);
234 		case '\0': return (0);
235 		case RIGHT: return (OR);
236 		case '[':
237 			x = (multibyte ? MCCL : CCL);
238 			cclcnt = 0;
239 			count = nxtchar++;
240 			if ((c = nextch()) == '^') {
241 				x = (multibyte ? NMCCL : NCCL);
242 				c = nextch();
243 			}
244 			lc = 0;
245 			do {
246 				if (iflag && iswalpha(c))
247 					c = towlower(c);
248 				if (c == '\0') synerror();
249 				if (c == '-' && cclcnt > 0 && lc != 0) {
250 					if ((c = nextch()) != 0) {
251 						if(c == ']') {
252 							chars[nxtchar++] = '-';
253 							cclcnt++;
254 							break;
255 						}
256 						if (iflag && iswalpha(c))
257 							c = towlower(c);
258 						if (!multibyte ||
259 						(c & WCHAR_CSMASK) == (lc & WCHAR_CSMASK) &&
260 						lc < c &&
261 						!iswcntrl(c) && !iswcntrl(lc)) {
262 							if (nxtchar >= maxclin)
263 								if (allocchars() == 0)
264 									overflo();
265 							chars[nxtchar++] = '-';
266 							cclcnt++;
267 						}
268 					}
269 				}
270 				ccount = oldccount = nxtchar;
271 				if(ccount + MB_LEN_MAX >= maxclin)
272 					if(allocchars() == 0)
273 						overflo();
274 				ccount += wctomb(&chars[ccount], c);
275 				cclcnt += ccount - oldccount;
276 				nxtchar += ccount - oldccount;
277 				lc = c;
278 			} while ((c = nextch()) != ']');
279 			chars[count] = cclcnt;
280 			return(x);
281 
282 		case '\\':
283 			if ((c = nextch()) == '\0') synerror();
284 		defchar:
285 		default:
286 			if (c <= 0177) {
287 				yylval = c;
288 				return (CHAR);
289 			} else {
290 				lyylval = c;
291 				return (MCHAR);
292 			}
293 	}
294 }
295 
296 wchar_t
297 nextch(void)
298 {
299 	wchar_t lc;
300 	char multic[MB_LEN_MAX];
301 	int length, d;
302 	if (fflag) {
303 		if ((length = _mbftowc(multic, &lc, mgetc, &d)) < 0)
304 			synerror();
305 		if(length == 0)
306 			lc = '\0';
307 	}
308 	else  {
309 		if((length = mbtowc(&lc, input, MB_LEN_MAX)) == -1)
310 			synerror();
311 		if(length == 0)
312 			return(0);
313 		input += length;
314 	}
315 	return(lc);
316 }
317 
318 int
319 mgetc(void)
320 {
321 	return(getc(expfile));
322 }
323 
324 void
325 synerror(void)
326 {
327 	fprintf(stderr, gettext("egrep: syntax error\n"));
328 	exit(2);
329 }
330 
331 int
332 enter(int x)
333 {
334 	if(line >= maxlin)
335 		if(alloctree() == 0)
336 			overflo();
337 	name[line] = x;
338 	left[line] = 0;
339 	right[line] = 0;
340 	return(line++);
341 }
342 
343 int
344 cclenter(int x)
345 {
346 	int linno;
347 	linno = enter(x);
348 	right[linno] = count;
349 	return (linno);
350 }
351 
352 int
353 node(int x, int l, int r)
354 {
355 	if(line >= maxlin)
356 		if(alloctree() == 0)
357 			overflo();
358 	name[line] = x;
359 	left[line] = l;
360 	right[line] = r;
361 	parent[l] = line;
362 	parent[r] = line;
363 	return(line++);
364 }
365 
366 int
367 unary(int x, int d)
368 {
369 	if(line >= maxlin)
370 		if(alloctree() == 0)
371 			overflo();
372 	name[line] = x;
373 	left[line] = d;
374 	right[line] = 0;
375 	parent[d] = line;
376 	return(line++);
377 }
378 
379 int
380 allocchars(void)
381 {
382 	maxclin += MAXLIN;
383 	if((chars = realloc(chars, maxclin)) == (char *)0)
384 		return 0;
385 	return 1;
386 }
387 
388 int
389 alloctree(void)
390 {
391 	maxlin += MAXLIN;
392 	if((name = (int *)realloc(name, maxlin*sizeof(int))) == (int *)0)
393 		return 0;
394 	if((left = (int *)realloc(left, maxlin*sizeof(int))) == (int *)0)
395 		return 0;
396 	if((right = (int *)realloc(right, maxlin*sizeof(int))) == (int *)0)
397 		return 0;
398 	if((parent = (int *)realloc(parent, maxlin*sizeof(int))) == (int *)0)
399 		return 0;
400 	if((foll = (int *)realloc(foll, maxlin*sizeof(int))) == (int *)0)
401 		return 0;
402 	if((tmpstat = (int *)realloc(tmpstat, maxlin*sizeof(int))) == (int *)0)
403 		return 0;
404 	if((initstat = (int *)realloc(initstat, maxlin*sizeof(int))) == (int *)0)
405 		return 0;
406 	return 1;
407 }
408 
409 void
410 overflo(void)
411 {
412 	fprintf(stderr, gettext("egrep: regular expression too long\n"));
413 	exit(2);
414 }
415 
416 void
417 cfoll(int v)
418 {
419 	int i;
420 	if (left[v] == 0) {
421 		count = 0;
422 		for (i=1; i<=line; i++) tmpstat[i] = 0;
423 		follow(v);
424 		add(foll, v);
425 	}
426 	else if (right[v] == 0) cfoll(left[v]);
427 	else {
428 		cfoll(left[v]);
429 		cfoll(right[v]);
430 	}
431 }
432 
433 void
434 cgotofn(void)
435 {
436 	int i;
437 	count = 0;
438 	inxtpos = nxtpos;
439 	for (i=3; i<=line; i++) tmpstat[i] = 0;
440 	if (cstate(line-1)==0) {
441 		tmpstat[line] = 1;
442 		count++;
443 		out[1] = 1;
444 	}
445 	for (i=3; i<=line; i++) initstat[i] = tmpstat[i];
446 	count--;		/*leave out position 1 */
447 	icount = count;
448 	tmpstat[1] = 0;
449 	add(state, 1);
450 	istat = nxtst(1, LEFT);
451 }
452 
453 int
454 nxtst(int s, int c)
455 {
456 	int i, num, k;
457 	int pos, curpos, number, newpos;
458 	num = positions[state[s]];
459 	count = icount;
460 	for (i=3; i<=line; i++) tmpstat[i] = initstat[i];
461 	pos = state[s] + 1;
462 	for (i=0; i<num; i++) {
463 		curpos = positions[pos];
464 		k = name[curpos];
465 		if (k >= 0)
466 			if (
467 				(k == c)
468 				|| (k == DOT && dot(c))
469 				|| (k == MDOT && mdot(c))
470 				|| (k == CCL && dot(c) && member(c, right[curpos], 1))
471 				|| (k == NCCL && dot(c) && member(c, right[curpos], 0))
472 				|| (k == MCCL && mdot(c) && member(c, right[curpos], 1))
473 			) {
474 				number = positions[foll[curpos]];
475 				newpos = foll[curpos] + 1;
476 				for (k=0; k<number; k++) {
477 					if (tmpstat[positions[newpos]] != 1) {
478 						tmpstat[positions[newpos]] = 1;
479 						count++;
480 					}
481 					newpos++;
482 				}
483 			}
484 		pos++;
485 	}
486 	if (notin(nstate)) {
487 		if (++nstate >= NSTATES) {
488 			for (i=1; i<NSTATES; i++)
489 				out[i] = 0;
490 			for (i=1; i<NSTATES; i++)
491 				for (k=0; k<NCHARS; k++)
492 					gotofn[i][k] = 0;
493 			nstate = 1;
494 			nxtpos = inxtpos;
495 			reinit = 1;
496 			add(state, nstate);
497 			if (tmpstat[line] == 1) out[nstate] = 1;
498 			return nstate;
499 		}
500 		add(state, nstate);
501 		if (tmpstat[line] == 1) out[nstate] = 1;
502 		gotofn[s][c] = nstate;
503 		return nstate;
504 	}
505 	else {
506 		gotofn[s][c] = xstate;
507 		return xstate;
508 	}
509 }
510 
511 
512 int
513 cstate(int v)
514 {
515 	int b;
516 	if (left[v] == 0) {
517 		if (tmpstat[v] != 1) {
518 			tmpstat[v] = 1;
519 			count++;
520 		}
521 		return(1);
522 	}
523 	else if (right[v] == 0) {
524 		if (cstate(left[v]) == 0) return (0);
525 		else if (name[v] == PLUS) return (1);
526 		else return (0);
527 	}
528 	else if (name[v] == CAT) {
529 		if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0);
530 		else return (1);
531 	}
532 	else { /* name[v] == OR */
533 		b = cstate(right[v]);
534 		if (cstate(left[v]) == 0 || b == 0) return (0);
535 		else return (1);
536 	}
537 }
538 
539 
540 int
541 dot(int c)
542 {
543 	if(multibyte && c >= 0200 && (!iscntrl(c) || c == SS2 && eucw2 || c == SS3 && eucw3))
544 		return(0);
545 	if(c == RIGHT || c == LEFT)
546 		return(0);
547 	return(1);
548 }
549 
550 int
551 mdot(int c)
552 {
553 	if(c >= 0200 && !iscntrl(c))
554 		return(1);
555 	return(0);
556 }
557 
558 int
559 member(int symb, int set, int torf)
560 {
561 	int i, num, pos, c, lc;
562 	if(symb == RIGHT || symb == LEFT)
563 		return(0);
564 	num = chars[set];
565 	pos = set + 1;
566 	lc = 0;
567 	if(iflag)
568 		symb = tolower(symb);
569 	for (i=0; i<num; i++) {
570 		c = (unsigned char)chars[pos++];
571 		if(c == '-' && lc != 0 && ++i < num) {
572 			c = (unsigned char)chars[pos++];
573 			if(lc <= symb && symb <= c)
574 				return(torf);
575 		}
576 		if (symb == c)
577 			return (torf);
578 		lc = c;
579 	}
580 	return(!torf);
581 }
582 
583 int
584 notin(int n)
585 {
586 	int i, j, pos;
587 	for (i=1; i<=n; i++) {
588 		if (positions[state[i]] == count) {
589 			pos = state[i] + 1;
590 			for (j=0; j < count; j++)
591 				if (tmpstat[positions[pos++]] != 1) goto nxt;
592 			xstate = i;
593 			return (0);
594 		}
595 		nxt: ;
596 	}
597 	return (1);
598 }
599 
600 void
601 add(int *array, int n)
602 {
603 	int i;
604 	if (nxtpos + count >= maxpos) {
605 		maxpos += MAXPOS + count;
606 		if((positions = (int *)realloc(positions, maxpos *sizeof(int))) == (int *)0)
607 			overflo();
608 	}
609 	array[n] = nxtpos;
610 	positions[nxtpos++] = count;
611 	for (i=3; i <= line; i++) {
612 		if (tmpstat[i] == 1) {
613 			positions[nxtpos++] = i;
614 		}
615 	}
616 }
617 
618 void
619 follow(int v)
620 {
621 	int p;
622 	if (v == line) return;
623 	p = parent[v];
624 	switch(name[p]) {
625 		case STAR:
626 		case PLUS:	cstate(v);
627 				follow(p);
628 				return;
629 
630 		case OR:
631 		case QUEST:	follow(p);
632 				return;
633 
634 		case CAT:	if (v == left[p]) {
635 					if (cstate(right[p]) == 0) {
636 						follow(p);
637 						return;
638 					}
639 				}
640 				else follow(p);
641 				return;
642 		case FINAL:	if (tmpstat[line] != 1) {
643 					tmpstat[line] = 1;
644 					count++;
645 				}
646 				return;
647 	}
648 }
649 
650 #define USAGE "[ -bchHilnsqv ] [ -e exp ] [ -f file ] [ strings ] [ file ] ..."
651 
652 int
653 main(int argc, char **argv)
654 {
655 	char c;
656 	char nl = '\n';
657 	int errflag = 0;
658 
659 	(void)setlocale(LC_ALL, "");
660 
661 #if !defined(TEXT_DOMAIN)		/* Should be defined by cc -D */
662 	#define TEXT_DOMAIN "SYS_TEST"  /* Use this only if it weren't. */
663 #endif
664 	(void) textdomain(TEXT_DOMAIN);
665 
666 	while((c = getopt(argc, argv, "ybcie:f:Hhlnvsq")) != -1)
667 		switch(c) {
668 
669 		case 'b':
670 			bflag++;
671 			continue;
672 
673 		case 'c':
674 			cflag++;
675 			continue;
676 
677 		case 'e':
678 			eflag++;
679 			input = optarg;
680 			continue;
681 
682 		case 'f':
683 			fflag++;
684 			expfile = fopen(optarg, "r");
685 			if(expfile == NULL) {
686 				fprintf(stderr,
687 				  gettext("egrep: can't open %s\n"), optarg);
688 				exit(2);
689 			}
690 			continue;
691 
692 		case 'H':
693 			if (!lflag) /* H is excluded by l as in GNU grep */
694 				Hflag++;
695 			hflag = 0; /* H excludes h */
696 			continue;
697 
698 		case 'h':
699 			hflag++;
700 			Hflag = 0; /* h excludes H */
701 			continue;
702 
703 		case 'y':
704 		case 'i':
705 			iflag++;
706 			continue;
707 
708 		case 'l':
709 			lflag++;
710 			Hflag = 0; /* l excludes H */
711 			continue;
712 
713 		case 'n':
714 			nflag++;
715 			continue;
716 
717 		case 'q':
718 		case 's': /* Solaris: legacy option */
719 			qflag++;
720 			continue;
721 
722 		case 'v':
723 			vflag++;
724 			continue;
725 
726 		case '?':
727 			errflag++;
728 		}
729 	if (errflag || ((argc <= 0) && !fflag && !eflag)) {
730 		fprintf(stderr, gettext("usage: egrep %s\n"), gettext(USAGE));
731 		exit(2);
732 	}
733 	if(!eflag && !fflag) {
734 		input = argv[optind];
735 		optind++;
736 	}
737 
738 	argc -= optind;
739 	argv = &argv[optind];
740 
741 	/* allocate initial space for arrays */
742 	if((name = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
743 		overflo();
744 	if((left = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
745 		overflo();
746 	if((right = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
747 		overflo();
748 	if((parent = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
749 		overflo();
750 	if((foll = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
751 		overflo();
752 	if((tmpstat = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
753 		overflo();
754 	if((initstat = (int *)malloc(MAXLIN*sizeof(int))) == (int *)0)
755 		overflo();
756 	if((chars = (char *)malloc(MAXLIN)) == (char *)0)
757 		overflo();
758 	if((lower = (wchar_t *)malloc(MAXLIN*sizeof(wchar_t))) == (wchar_t *)0)
759 		overflo();
760 	if((upper = (wchar_t *)malloc(MAXLIN*sizeof(wchar_t))) == (wchar_t *)0)
761 		overflo();
762 	if((positions = (int *)malloc(MAXPOS*sizeof(int))) == (int *)0)
763 		overflo();
764 	maxlin = MAXLIN;
765 	maxclin = MAXLIN;
766 	maxwclin = MAXLIN;
767 	maxpos = MAXPOS;
768 
769 	yyparse();
770 
771 	cfoll(line-1);
772 	cgotofn();
773 	nfile = argc;
774 	if (argc<=0) {
775 		execute(0);
776 	}
777 	else while (--argc >= 0) {
778 		if (reinit == 1) clearg();
779 		execute(*argv++);
780 	}
781 	return (badbotch ? 2 : nsucc==0);
782 }
783 
784 void
785 execute(char *file)
786 {
787 	char *p;
788 	int cstat;
789 	wchar_t c;
790 	int t;
791 	long count;
792 	long count1, count2;
793 	long nchars;
794 	int succ;
795 	char *ptr, *ptrend, *lastptr;
796 	char *buf;
797 	long lBufSiz;
798 	FILE *f;
799 	int nlflag;
800 
801 	lBufSiz = EBUFSIZ;
802 	if ((buf = malloc (lBufSiz + EBUFSIZ)) == NULL) {
803 		exit (2); /* out of memory - BAIL */
804 	}
805 
806 	if (file) {
807 		if ((f = fopen(file, "r")) == NULL) {
808 			fprintf(stderr,
809 				gettext("egrep: can't open %s\n"), file);
810 			badbotch=1;
811 			return;
812 		}
813 	} else {
814 		f = stdin;
815 		file = STDIN_FILENAME;
816 	}
817 	lnum = 1;
818 	tln = 0;
819 	if((count = read(fileno(f), buf, EBUFSIZ)) <= 0) {
820 		fclose(f);
821 
822 		if (cflag && !qflag) {
823 			if (Hflag || (nfile > 1 && !hflag))
824 				fprintf(stdout, "%s:", file);
825 			fprintf(stdout, "%lld\n", tln);
826 		}
827 		return;
828 	}
829 
830 	blkno = count;
831 	ptr = buf;
832 	for(;;) {
833 		if((ptrend = memchr(ptr, '\n', buf + count - ptr)) == NULL) {
834 			/*
835 				move the unused partial record to the head of the buffer
836 			*/
837 			if (ptr > buf) {
838 				count = buf + count - ptr;
839 				memmove (buf, ptr, count);
840 				ptr = buf;
841 			}
842 
843 			/*
844 				Get a bigger buffer if this one is full
845 			*/
846 			if(count > lBufSiz) {
847 				/*
848 					expand the buffer
849 				*/
850 				lBufSiz += EBUFSIZ;
851 				if ((buf = realloc (buf, lBufSiz + EBUFSIZ)) == NULL) {
852 					exit (2); /* out of memory - BAIL */
853 				}
854 
855 				ptr = buf;
856 			}
857 
858 			p = buf + count;
859 			if((count1 = read(fileno(f), p, EBUFSIZ)) > 0) {
860 				count += count1;
861 				blkno += count1;
862 				continue;
863 			}
864 			ptrend = ptr + count;
865 			nlflag = 0;
866 		} else
867 			nlflag = 1;
868 		*ptrend = '\n';
869 		p = ptr;
870 		lastptr = ptr;
871 		cstat = istat;
872 		succ = 0;
873 		for(;;) {
874 			if(out[cstat]) {
875 				if(multibyte && p > ptr) {
876 					wchar_t wchar;
877 					int length;
878 					char *endptr = p;
879 					p = lastptr;
880 					while(p < endptr) {
881 						length = mbtowc(&wchar, p, MB_LEN_MAX);
882 						if(length <= 1)
883 							p++;
884 						else
885 							p += length;
886 					}
887 					if(p == endptr) {
888 						succ = !vflag;
889 						break;
890 					}
891 					cstat = 1;
892 					length = mbtowc(&wchar, lastptr, MB_LEN_MAX);
893 					if(length <= 1)
894 						lastptr++;
895 					else
896 						lastptr += length;
897 					p = lastptr;
898 					continue;
899 				}
900 				succ = !vflag;
901 				break;
902 			}
903 			c = (unsigned char)*p++;
904 			if ((t = gotofn[cstat][c]) == 0)
905 				cstat = nxtst(cstat, c);
906 			else
907 				cstat = t;
908 			if(c == RIGHT) {
909 				if(out[cstat]) {
910 					succ = !vflag;
911 					break;
912 				}
913 				succ = vflag;
914 				break;
915 			}
916 		}
917 		if (succ) {
918 			nsucc = 1;
919 			if (lflag || qflag) {
920 				if (!qflag)
921 					(void) printf("%s\n", file);
922 				fclose(f);
923 				return;
924 			}
925 			if (cflag) {
926 				tln++;
927 			} else {
928 				if (Hflag || (nfile > 1 && !hflag))
929 					printf("%s:", file);
930 				if (bflag) {
931 					nchars = blkno - (buf + count - ptrend) - 2;
932 					if(nlflag)
933 						nchars++;
934 					printf("%lld:", nchars/BLKSIZE);
935 				}
936 				if (nflag)
937 					printf("%lld:", lnum);
938 				if(nlflag)
939 					nchars = ptrend - ptr + 1;
940 				else
941 					nchars = ptrend - ptr;
942 				fwrite(ptr, (size_t)1, (size_t)nchars, stdout);
943 			}
944 		}
945 		if(!nlflag)
946 			break;
947 		ptr = ptrend + 1;
948 		if(ptr >= buf + count) {
949 			ptr = buf;
950 			if((count = read(fileno(f), buf, EBUFSIZ)) <= 0)
951 				break;
952 			blkno += count;
953 		}
954 		lnum++;
955 		if (reinit == 1)
956 			clearg();
957 	}
958 	fclose(f);
959 	if (cflag && !qflag) {
960 		if (Hflag || (nfile > 1 && !hflag))
961 			printf("%s:", file);
962 		printf("%lld\n", tln);
963 	}
964 }
965 
966 void
967 clearg(void)
968 {
969 	int i, k;
970 	for (i=1; i<=nstate; i++)
971 		out[i] = 0;
972 	for (i=1; i<=nstate; i++)
973 		for (k=0; k<NCHARS; k++)
974 			gotofn[i][k] = 0;
975 	nstate = 1;
976 	nxtpos = inxtpos;
977 	reinit = 0;
978 	count = 0;
979 	for (i=3; i<=line; i++) tmpstat[i] = 0;
980 	if (cstate(line-1)==0) {
981 		tmpstat[line] = 1;
982 		count++;
983 		out[1] = 1;
984 	}
985 	for (i=3; i<=line; i++) initstat[i] = tmpstat[i];
986 	count--;		/*leave out position 1 */
987 	icount = count;
988 	tmpstat[1] = 0;
989 	add(state, 1);
990 	istat = nxtst(1, LEFT);
991 }
992 
993 int
994 mdotenter(void)
995 {
996 	int i, x1, x2;
997 	x1 = enter(DOT);
998 	x2 = enter(MDOT);
999 	for(i = 1; i < (int) eucw1; i++)
1000 		x2 = node(CAT, x2, enter(MDOT));
1001 	x1 = node(OR, x1, x2);
1002 	if(eucw2) {
1003 		x2 = enter('\216');
1004 		for(i = 1; i <= (int) eucw2; i++)
1005 			x2 = node(CAT, x2, enter(MDOT));
1006 		x1 = node(OR, x1, x2);
1007 	}
1008 	if(eucw3) {
1009 		x2 = enter('\217');
1010 		for(i = 1; i <= (int) eucw3; i++)
1011 			x2 = node(CAT, x2, enter(MDOT));
1012 		x1 = node(OR, x1, x2);
1013 	}
1014 	return(x1);
1015 }
1016 
1017 int
1018 mchar(wchar_t c)
1019 {
1020 	char multichar[MB_LEN_MAX+1];
1021 	char *p;
1022 	int x1, lc, length;
1023 
1024 	length = wctomb(multichar, c);
1025 	p = multichar;
1026 	*(p + length) = '\0';
1027 	x1 = enter((unsigned char)*p++);
1028 	while(lc = (unsigned char)*p++)
1029 		x1 = node(CAT, x1, enter(lc));
1030 	return(x1);
1031 }
1032 
1033 int
1034 ccl(int type)
1035 {
1036 	wchar_t c, lc;
1037 	char multic1[MB_LEN_MAX];
1038 	char multic2[MB_LEN_MAX];
1039 	int x1, x2, length, current, last, cclcnt;
1040 	x2 = 0;
1041 	current = 0;
1042 	last = genrange(type);
1043 	nxtchar = count + 1;
1044 	cclcnt = 0;
1045 	/* create usual character class for single byte characters */
1046 	while(current <= last && (isascii(c = lower[current]) || c <= 0377 && iscntrl(c))) {
1047 		cclcnt++;
1048 		chars[nxtchar++] = c;
1049 		if(lower[current] != upper[current]) {
1050 			chars[nxtchar++] = '-';
1051 			chars[nxtchar++] = upper[current];
1052 			cclcnt += 2;
1053 		}
1054 		current++;
1055 	}
1056 
1057 	if(cclcnt)
1058 		chars[count] = cclcnt;
1059 	else
1060 		nxtchar = count;
1061 	if(current > 0)
1062 		/* single byte part of character class */
1063 		x2 = cclenter(type);
1064 	else if(type == NCCL)
1065 		/* all single byte characters match */
1066 		x2 = enter(DOT);
1067 	while(current <= last) {
1068 		if(upper[current] == lower[current])
1069 			x1 = mchar(lower[current]);
1070 		else {
1071 			length = wctomb(multic1, lower[current]);
1072 			wctomb(multic2, upper[current]);
1073 			x1 = range((unsigned char *)multic1,
1074 			    (unsigned char *)multic2, length);
1075 		}
1076 		if(x2)
1077 			x2 = node(OR, x2, x1);
1078 		else
1079 			x2 = x1;
1080 		current++;
1081 	}
1082 	return x2;
1083 }
1084 
1085 int
1086 range(unsigned char *p1, unsigned char *p2, int length)
1087 {
1088 	char multic[MB_LEN_MAX+1];
1089 	char *p;
1090 	int i, x1, x2;
1091 	if(length == 1)
1092 		return(classenter(*p1, *p2));
1093 	if(p1[0] == p2[0])
1094 		return(node(CAT, enter(p1[0]), range(p1+1, p2+1, length - 1)));
1095 	p = multic;
1096 	for(i = 1; i < length; i++)
1097 		*p++ = 0377;
1098 	x1 = node(CAT, enter(p1[0]),
1099 	    range(p1+1, (unsigned char *)multic, length - 1));
1100 	if((unsigned char)(p1[0] + 1) < p2[0]) {
1101 		x2 = classenter(p1[0] + 1, p2[0] - 1);
1102 		for(i = 1; i < length; i++)
1103 			x2 = node(CAT, x2, enter(MDOT));
1104 		x1 = node(OR, x1, x2);
1105 	}
1106 	p = multic;
1107 	for(i = 1; i < length; i++)
1108 		*p++ = 0200;
1109 	x2 = node(CAT, enter(p2[0]),
1110 	    range((unsigned char *)multic, p2+1, length - 1));
1111 	return node(OR, x1, x2);
1112 }
1113 
1114 int
1115 classenter(int x1, int x2)
1116 {
1117 	static int max, min;
1118 	if(!max) {
1119 		int i;
1120 		for(i = 0200; i <= 0377; i++)
1121 			if(!iscntrl(i))
1122 				break;
1123 		min = i;
1124 		for(i = 0377; i >= 0200; i--)
1125 			if(!iscntrl(i))
1126 				break;
1127 		max = i;
1128 	}
1129 	if(x1 <= min && x2 >= max)
1130 		return enter(MDOT);
1131 	if(nxtchar + 4 >= maxclin)
1132 		if(allocchars() == 0)
1133 			overflo();
1134 	count = nxtchar++;
1135 	chars[nxtchar++] = x1;
1136 	chars[nxtchar++] = '-';
1137 	chars[nxtchar++] = x2;
1138 	chars[count] = 3;
1139 	return cclenter(MCCL);
1140 }
1141 
1142 int
1143 genrange(int type)
1144 {
1145 	char *p, *endp;
1146 	int current, nel, i, last, length;
1147 	wchar_t c, lc;
1148 
1149 	current = 0;
1150 	p = &chars[count+1];
1151 	endp = &chars[count+1] + chars[count];
1152 	lc = 0;
1153 
1154 	/* convert character class into union of ranges */
1155 	while(p < endp) {
1156 		length = mbtowc(&c, p, MB_LEN_MAX);
1157 		p += length;
1158 		if(c == '-' && lc != 0) {
1159 			length = mbtowc(&c, p, MB_LEN_MAX);
1160 			upper[current-1] = c;
1161 			p += length;
1162 		} else {
1163 			lower[current] = c;
1164 			upper[current++] = c;
1165 		}
1166 		lc = c;
1167 	}
1168 	nel = current;
1169 	/* sort lower and upper bounds of ranges */
1170 	qsort((char *)lower, nel, sizeof(wchar_t), compare);
1171 	qsort((char *)upper, nel, sizeof(wchar_t), compare);
1172 	last = current - 1;
1173 	current = 0;
1174 	/* combine overlapping or adjacent ranges */
1175 	for(i = 0; i < last; i++)
1176 		if(upper[i] >= lower[i+1] - 1)
1177 			upper[current] = upper[i+1];
1178 		else {
1179 			lower[++current] = lower[i+1];
1180 			upper[current] = upper[i+1];
1181 		}
1182 	if(type == NCCL) {
1183 		/* find complement of character class */
1184 		int j, next;
1185 		i = 0;
1186 		while(i <= current && isascii(c=lower[i]) || c <= 0377 && iscntrl(c))
1187 			i++;
1188 		if(i > current) {
1189 			/* match all multibyte characters */
1190 			if(eucw2) {
1191 				lower[i] = maxmin(WCHAR_CS2, 0);
1192 				upper[i++] = maxmin(WCHAR_CS2, 1);
1193 			}
1194 			if(eucw3) {
1195 				lower[i] = maxmin(WCHAR_CS3, 0);
1196 				upper[i++] = maxmin(WCHAR_CS3, 1);
1197 			}
1198 			lower[i] = maxmin(WCHAR_CS1, 0);
1199 			upper[i++] = maxmin(WCHAR_CS1, 1);
1200 			return i - 1;
1201 		}
1202 		next = current + 1;
1203 		if(next + current + 2 >= maxwclin) {
1204 			maxwclin += MAXLIN + next + current + 2;
1205 			if((lower = (wchar_t *)realloc(lower, maxwclin *sizeof(wchar_t))) == (wchar_t *)0 ||
1206 			   (upper = (wchar_t *)realloc(upper, maxwclin * sizeof(wchar_t))) == (wchar_t *)0)
1207 				overflo();
1208 		}
1209 		if(eucw2 && lower[i] > maxmin(WCHAR_CS2, 0)) {
1210 			lower[next] = maxmin(WCHAR_CS2, 0);
1211 			if((lower[i] & WCHAR_CSMASK) != WCHAR_CS2) {
1212 				upper[next++] = maxmin(WCHAR_CS2, 1);
1213 				if((lower[i] & WCHAR_CSMASK) == WCHAR_CS1 && eucw3) {
1214 					lower[next] = maxmin(WCHAR_CS3, 0);
1215 					upper[next++] = maxmin(WCHAR_CS3, 1);
1216 				}
1217 				if(lower[i] > maxmin(lower[i] & WCHAR_CSMASK, 0)) {
1218 					lower[next] = maxmin(lower[i] & WCHAR_CSMASK, 0);
1219 					upper[next++] = lower[i] - 1;
1220 				}
1221 			} else
1222 				upper[next++] = lower[i] - 1;
1223 		} else if(lower[i] > maxmin(lower[i] & WCHAR_CSMASK, 0)) {
1224 			lower[next] = maxmin(lower[i] & WCHAR_CSMASK, 0);
1225 			upper[next++] = lower[i] - 1;
1226 		}
1227 		for(j = i; j < current; j++) {
1228 			if(upper[j] < maxmin(upper[j] & WCHAR_CSMASK, 1)) {
1229 				lower[next] = upper[j] + 1;
1230 				if((upper[j] & WCHAR_CSMASK) != (lower[j+1] & WCHAR_CSMASK)) {
1231 					upper[next++] = maxmin(upper[j] & WCHAR_CSMASK, 1);
1232 					if(eucw3 && (upper[j] & WCHAR_CSMASK) == WCHAR_CS2 && (lower[j+1] & WCHAR_CSMASK) == WCHAR_CS1) {
1233 						lower[next] = maxmin(WCHAR_CS3, 0);
1234 						upper[next++] = maxmin(WCHAR_CS3, 1);
1235 					}
1236 					if(lower[j+1] > maxmin(lower[j+1] & WCHAR_CSMASK, 0)) {
1237 						lower[next] = maxmin(lower[j+1] & WCHAR_CSMASK, 0);
1238 						upper[next++] = lower[j+1] - 1;
1239 					}
1240 				} else
1241 					upper[next++] = lower[j+1] - 1;
1242 			} else if(lower[j+1] > maxmin(lower[j+1], 0)) {
1243 				lower[next] = maxmin(lower[j+1], 0);
1244 				upper[next++] = lower[j+1] - 1;
1245 			}
1246 		}
1247 		if(upper[current] < maxmin(upper[current] & WCHAR_CSMASK, 1)) {
1248 			lower[next] = upper[current] + 1;
1249 			upper[next++] = maxmin(upper[current] & WCHAR_CSMASK, 1);
1250 		}
1251 		if((upper[current] & WCHAR_CSMASK) != WCHAR_CS1) {
1252 			if((upper[current] & WCHAR_CSMASK) == WCHAR_CS2 && eucw3) {
1253 				lower[next] = maxmin(WCHAR_CS3, 0);
1254 				upper[next++] = maxmin(WCHAR_CS3, 1);
1255 			}
1256 			lower[next] = maxmin(WCHAR_CS1, 0);
1257 			upper[next++] = maxmin(WCHAR_CS1, 1);
1258 		}
1259 		for(j = current + 1; j < next; j++) {
1260 			lower[i] = lower[j];
1261 			upper[i++] = upper[j];
1262 		}
1263 		current = i - 1;
1264 	}
1265 	return(current);
1266 }
1267 
1268 int
1269 compare(wchar_t *c, wchar_t *d)
1270 {
1271 	if(*c < *d)
1272 		return -1;
1273 	if(*c == *d)
1274 		return 0;
1275 	return 1;
1276 }
1277 
1278 wchar_t
1279 maxmin(wchar_t c, int flag)
1280 {
1281 	static wchar_t minmax1[2], minmax2[2], minmax3[2];
1282 
1283 	if(!minmax1[0]) {
1284 		/* compute min and max process codes for all code sets */
1285 		int length, i;
1286 		char multic[MB_LEN_MAX], minmax[2];
1287 		for(i = 0377; i >= 0200; i--)
1288 			if(!iscntrl(i))
1289 				break;
1290 		minmax[1] = i;
1291 		for(i = 0240; i <= 0377; i++)
1292 			if(!iscntrl(i))
1293 				break;
1294 		minmax[0] = i;
1295 		for(i = 0; i <= 1; i++) {
1296 			length = MB_LEN_MAX;
1297 			while(length--)
1298 				multic[length] = minmax[i];
1299 			mbtowc(&minmax1[i], multic, MB_LEN_MAX);
1300 			if(eucw2) {
1301 				multic[0] = SS2;
1302 				mbtowc(&minmax2[i], multic, MB_LEN_MAX);
1303 			}
1304 			if(eucw3) {
1305 				multic[0] = SS3;
1306 				mbtowc(&minmax3[i], multic, MB_LEN_MAX);
1307 			}
1308 		}
1309 	}
1310 	switch(c) {
1311 		case WCHAR_CS1: return minmax1[flag];
1312 		case WCHAR_CS2: return minmax2[flag];
1313 		case WCHAR_CS3: return minmax3[flag];
1314 	}
1315 
1316 	/* NOTREACHED */
1317 	return (0);
1318 }
1319