xref: /titanic_51/usr/src/cmd/awk/b.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1*7c478bd9Sstevel@tonic-gate /*
2*7c478bd9Sstevel@tonic-gate  * CDDL HEADER START
3*7c478bd9Sstevel@tonic-gate  *
4*7c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*7c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*7c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*7c478bd9Sstevel@tonic-gate  * with the License.
8*7c478bd9Sstevel@tonic-gate  *
9*7c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*7c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*7c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*7c478bd9Sstevel@tonic-gate  * and limitations under the License.
13*7c478bd9Sstevel@tonic-gate  *
14*7c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*7c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*7c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*7c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*7c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*7c478bd9Sstevel@tonic-gate  *
20*7c478bd9Sstevel@tonic-gate  * CDDL HEADER END
21*7c478bd9Sstevel@tonic-gate  */
22*7c478bd9Sstevel@tonic-gate /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
23*7c478bd9Sstevel@tonic-gate /*	  All Rights Reserved  	*/
24*7c478bd9Sstevel@tonic-gate 
25*7c478bd9Sstevel@tonic-gate 
26*7c478bd9Sstevel@tonic-gate /*
27*7c478bd9Sstevel@tonic-gate  * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
28*7c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
29*7c478bd9Sstevel@tonic-gate  */
30*7c478bd9Sstevel@tonic-gate 
31*7c478bd9Sstevel@tonic-gate #ident	"%Z%%M%	%I%	%E% SMI"	/* SVr4.0 2.11	*/
32*7c478bd9Sstevel@tonic-gate 
33*7c478bd9Sstevel@tonic-gate #define	DEBUG
34*7c478bd9Sstevel@tonic-gate 
35*7c478bd9Sstevel@tonic-gate #include "awk.h"
36*7c478bd9Sstevel@tonic-gate #include <ctype.h>
37*7c478bd9Sstevel@tonic-gate #include <stdio.h>
38*7c478bd9Sstevel@tonic-gate #include "y.tab.h"
39*7c478bd9Sstevel@tonic-gate 
40*7c478bd9Sstevel@tonic-gate #define	HAT	(NCHARS-1)	/* matches ^ in regular expr */
41*7c478bd9Sstevel@tonic-gate 				/* NCHARS is 2**n */
42*7c478bd9Sstevel@tonic-gate 
43*7c478bd9Sstevel@tonic-gate #define type(v)		(v)->nobj
44*7c478bd9Sstevel@tonic-gate #define left(v)		(v)->narg[0]
45*7c478bd9Sstevel@tonic-gate #define right(v)	(v)->narg[1]
46*7c478bd9Sstevel@tonic-gate #define parent(v)	(v)->nnext
47*7c478bd9Sstevel@tonic-gate 
48*7c478bd9Sstevel@tonic-gate #define LEAF	case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
49*7c478bd9Sstevel@tonic-gate #define UNARY	case STAR: case PLUS: case QUEST:
50*7c478bd9Sstevel@tonic-gate 
51*7c478bd9Sstevel@tonic-gate /* encoding in tree Nodes:
52*7c478bd9Sstevel@tonic-gate 	leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):
53*7c478bd9Sstevel@tonic-gate 		left is index, right contains value or pointer to value
54*7c478bd9Sstevel@tonic-gate 	unary (STAR, PLUS, QUEST): left is child, right is null
55*7c478bd9Sstevel@tonic-gate 	binary (CAT, OR): left and right are children
56*7c478bd9Sstevel@tonic-gate 	parent contains pointer to parent
57*7c478bd9Sstevel@tonic-gate */
58*7c478bd9Sstevel@tonic-gate 
59*7c478bd9Sstevel@tonic-gate 
60*7c478bd9Sstevel@tonic-gate uchar	chars[RECSIZE];
61*7c478bd9Sstevel@tonic-gate int	setvec[RECSIZE];
62*7c478bd9Sstevel@tonic-gate int	tmpset[RECSIZE];
63*7c478bd9Sstevel@tonic-gate Node	*point[RECSIZE];
64*7c478bd9Sstevel@tonic-gate 
65*7c478bd9Sstevel@tonic-gate int	rtok;		/* next token in current re */
66*7c478bd9Sstevel@tonic-gate int	rlxval;
67*7c478bd9Sstevel@tonic-gate uchar	*rlxstr;
68*7c478bd9Sstevel@tonic-gate uchar	*prestr;	/* current position in current re */
69*7c478bd9Sstevel@tonic-gate uchar	*lastre;	/* origin of last re */
70*7c478bd9Sstevel@tonic-gate 
71*7c478bd9Sstevel@tonic-gate static	int setcnt;
72*7c478bd9Sstevel@tonic-gate static	int poscnt;
73*7c478bd9Sstevel@tonic-gate 
74*7c478bd9Sstevel@tonic-gate uchar	*patbeg;
75*7c478bd9Sstevel@tonic-gate int	patlen;
76*7c478bd9Sstevel@tonic-gate 
77*7c478bd9Sstevel@tonic-gate #define	NFA	20	/* cache this many dynamic fa's */
78*7c478bd9Sstevel@tonic-gate fa	*fatab[NFA];
79*7c478bd9Sstevel@tonic-gate int	nfatab	= 0;	/* entries in fatab */
80*7c478bd9Sstevel@tonic-gate fa	*mkdfa();
81*7c478bd9Sstevel@tonic-gate 
82*7c478bd9Sstevel@tonic-gate fa *makedfa(s, anchor)	/* returns dfa for reg expr s */
83*7c478bd9Sstevel@tonic-gate 	uchar *s;
84*7c478bd9Sstevel@tonic-gate 	int anchor;
85*7c478bd9Sstevel@tonic-gate {
86*7c478bd9Sstevel@tonic-gate 	int i, use, nuse;
87*7c478bd9Sstevel@tonic-gate 	fa *pfa;
88*7c478bd9Sstevel@tonic-gate 
89*7c478bd9Sstevel@tonic-gate 	if (compile_time)	/* a constant for sure */
90*7c478bd9Sstevel@tonic-gate 		return mkdfa(s, anchor);
91*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < nfatab; i++)	/* is it there already? */
92*7c478bd9Sstevel@tonic-gate 		if (fatab[i]->anchor == anchor && strcmp(fatab[i]->restr,s) == 0) {
93*7c478bd9Sstevel@tonic-gate 			fatab[i]->use++;
94*7c478bd9Sstevel@tonic-gate 			return fatab[i];
95*7c478bd9Sstevel@tonic-gate 	}
96*7c478bd9Sstevel@tonic-gate 	pfa = mkdfa(s, anchor);
97*7c478bd9Sstevel@tonic-gate 	if (nfatab < NFA) {	/* room for another */
98*7c478bd9Sstevel@tonic-gate 		fatab[nfatab] = pfa;
99*7c478bd9Sstevel@tonic-gate 		fatab[nfatab]->use = 1;
100*7c478bd9Sstevel@tonic-gate 		nfatab++;
101*7c478bd9Sstevel@tonic-gate 		return pfa;
102*7c478bd9Sstevel@tonic-gate 	}
103*7c478bd9Sstevel@tonic-gate 	use = fatab[0]->use;	/* replace least-recently used */
104*7c478bd9Sstevel@tonic-gate 	nuse = 0;
105*7c478bd9Sstevel@tonic-gate 	for (i = 1; i < nfatab; i++)
106*7c478bd9Sstevel@tonic-gate 		if (fatab[i]->use < use) {
107*7c478bd9Sstevel@tonic-gate 			use = fatab[i]->use;
108*7c478bd9Sstevel@tonic-gate 			nuse = i;
109*7c478bd9Sstevel@tonic-gate 		}
110*7c478bd9Sstevel@tonic-gate 	freefa(fatab[nuse]);
111*7c478bd9Sstevel@tonic-gate 	fatab[nuse] = pfa;
112*7c478bd9Sstevel@tonic-gate 	pfa->use = 1;
113*7c478bd9Sstevel@tonic-gate 	return pfa;
114*7c478bd9Sstevel@tonic-gate }
115*7c478bd9Sstevel@tonic-gate 
116*7c478bd9Sstevel@tonic-gate fa *mkdfa(s, anchor)	/* does the real work of making a dfa */
117*7c478bd9Sstevel@tonic-gate 	uchar *s;
118*7c478bd9Sstevel@tonic-gate 	int anchor;	/* anchor = 1 for anchored matches, else 0 */
119*7c478bd9Sstevel@tonic-gate {
120*7c478bd9Sstevel@tonic-gate 	Node *p, *p1, *reparse();
121*7c478bd9Sstevel@tonic-gate 	fa *f;
122*7c478bd9Sstevel@tonic-gate 
123*7c478bd9Sstevel@tonic-gate 	p = reparse(s);
124*7c478bd9Sstevel@tonic-gate 	p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
125*7c478bd9Sstevel@tonic-gate 		/* put ALL STAR in front of reg.  exp. */
126*7c478bd9Sstevel@tonic-gate 	p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
127*7c478bd9Sstevel@tonic-gate 		/* put FINAL after reg.  exp. */
128*7c478bd9Sstevel@tonic-gate 
129*7c478bd9Sstevel@tonic-gate 	poscnt = 0;
130*7c478bd9Sstevel@tonic-gate 	penter(p1);	/* enter parent pointers and leaf indices */
131*7c478bd9Sstevel@tonic-gate 	if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
132*7c478bd9Sstevel@tonic-gate 		overflo("no room for fa");
133*7c478bd9Sstevel@tonic-gate 	f->accept = poscnt-1;	/* penter has computed number of positions in re */
134*7c478bd9Sstevel@tonic-gate 	cfoll(f, p1);	/* set up follow sets */
135*7c478bd9Sstevel@tonic-gate 	freetr(p1);
136*7c478bd9Sstevel@tonic-gate 	if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
137*7c478bd9Sstevel@tonic-gate 			overflo("out of space in makedfa");
138*7c478bd9Sstevel@tonic-gate 	if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
139*7c478bd9Sstevel@tonic-gate 		overflo("out of space in makedfa");
140*7c478bd9Sstevel@tonic-gate 	*f->posns[1] = 0;
141*7c478bd9Sstevel@tonic-gate 	f->initstat = makeinit(f, anchor);
142*7c478bd9Sstevel@tonic-gate 	f->anchor = anchor;
143*7c478bd9Sstevel@tonic-gate 	f->restr = tostring(s);
144*7c478bd9Sstevel@tonic-gate 	return f;
145*7c478bd9Sstevel@tonic-gate }
146*7c478bd9Sstevel@tonic-gate 
147*7c478bd9Sstevel@tonic-gate int makeinit(f, anchor)
148*7c478bd9Sstevel@tonic-gate 	fa *f;
149*7c478bd9Sstevel@tonic-gate 	int anchor;
150*7c478bd9Sstevel@tonic-gate {
151*7c478bd9Sstevel@tonic-gate 	register int i, k;
152*7c478bd9Sstevel@tonic-gate 
153*7c478bd9Sstevel@tonic-gate 	f->curstat = 2;
154*7c478bd9Sstevel@tonic-gate 	f->out[2] = 0;
155*7c478bd9Sstevel@tonic-gate 	f->reset = 0;
156*7c478bd9Sstevel@tonic-gate 	k = *(f->re[0].lfollow);
157*7c478bd9Sstevel@tonic-gate 	xfree(f->posns[2]);
158*7c478bd9Sstevel@tonic-gate 	if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
159*7c478bd9Sstevel@tonic-gate 		overflo("out of space in makeinit");
160*7c478bd9Sstevel@tonic-gate 	for (i=0; i<=k; i++) {
161*7c478bd9Sstevel@tonic-gate 		(f->posns[2])[i] = (f->re[0].lfollow)[i];
162*7c478bd9Sstevel@tonic-gate 	}
163*7c478bd9Sstevel@tonic-gate 	if ((f->posns[2])[1] == f->accept)
164*7c478bd9Sstevel@tonic-gate 		f->out[2] = 1;
165*7c478bd9Sstevel@tonic-gate 	for (i=0; i<NCHARS; i++)
166*7c478bd9Sstevel@tonic-gate 		f->gototab[2][i] = 0;
167*7c478bd9Sstevel@tonic-gate 	f->curstat = cgoto(f, 2, HAT);
168*7c478bd9Sstevel@tonic-gate 	if (anchor) {
169*7c478bd9Sstevel@tonic-gate 		*f->posns[2] = k-1;	/* leave out position 0 */
170*7c478bd9Sstevel@tonic-gate 		for (i=0; i<k; i++) {
171*7c478bd9Sstevel@tonic-gate 			(f->posns[0])[i] = (f->posns[2])[i];
172*7c478bd9Sstevel@tonic-gate 		}
173*7c478bd9Sstevel@tonic-gate 
174*7c478bd9Sstevel@tonic-gate 		f->out[0] = f->out[2];
175*7c478bd9Sstevel@tonic-gate 		if (f->curstat != 2)
176*7c478bd9Sstevel@tonic-gate 			--(*f->posns[f->curstat]);
177*7c478bd9Sstevel@tonic-gate 	}
178*7c478bd9Sstevel@tonic-gate 	return f->curstat;
179*7c478bd9Sstevel@tonic-gate }
180*7c478bd9Sstevel@tonic-gate 
181*7c478bd9Sstevel@tonic-gate penter(p)	/* set up parent pointers and leaf indices */
182*7c478bd9Sstevel@tonic-gate 	Node *p;
183*7c478bd9Sstevel@tonic-gate {
184*7c478bd9Sstevel@tonic-gate 	switch(type(p)) {
185*7c478bd9Sstevel@tonic-gate 	LEAF
186*7c478bd9Sstevel@tonic-gate 		left(p) = (Node *) poscnt;
187*7c478bd9Sstevel@tonic-gate 		point[poscnt++] = p;
188*7c478bd9Sstevel@tonic-gate 		break;
189*7c478bd9Sstevel@tonic-gate 	UNARY
190*7c478bd9Sstevel@tonic-gate 		penter(left(p));
191*7c478bd9Sstevel@tonic-gate 		parent(left(p)) = p;
192*7c478bd9Sstevel@tonic-gate 		break;
193*7c478bd9Sstevel@tonic-gate 	case CAT:
194*7c478bd9Sstevel@tonic-gate 	case OR:
195*7c478bd9Sstevel@tonic-gate 		penter(left(p));
196*7c478bd9Sstevel@tonic-gate 		penter(right(p));
197*7c478bd9Sstevel@tonic-gate 		parent(left(p)) = p;
198*7c478bd9Sstevel@tonic-gate 		parent(right(p)) = p;
199*7c478bd9Sstevel@tonic-gate 		break;
200*7c478bd9Sstevel@tonic-gate 	default:
201*7c478bd9Sstevel@tonic-gate 		ERROR "unknown type %d in penter", type(p) FATAL;
202*7c478bd9Sstevel@tonic-gate 		break;
203*7c478bd9Sstevel@tonic-gate 	}
204*7c478bd9Sstevel@tonic-gate }
205*7c478bd9Sstevel@tonic-gate 
206*7c478bd9Sstevel@tonic-gate freetr(p)	/* free parse tree */
207*7c478bd9Sstevel@tonic-gate 	Node *p;
208*7c478bd9Sstevel@tonic-gate {
209*7c478bd9Sstevel@tonic-gate 	switch (type(p)) {
210*7c478bd9Sstevel@tonic-gate 	LEAF
211*7c478bd9Sstevel@tonic-gate 		xfree(p);
212*7c478bd9Sstevel@tonic-gate 		break;
213*7c478bd9Sstevel@tonic-gate 	UNARY
214*7c478bd9Sstevel@tonic-gate 		freetr(left(p));
215*7c478bd9Sstevel@tonic-gate 		xfree(p);
216*7c478bd9Sstevel@tonic-gate 		break;
217*7c478bd9Sstevel@tonic-gate 	case CAT:
218*7c478bd9Sstevel@tonic-gate 	case OR:
219*7c478bd9Sstevel@tonic-gate 		freetr(left(p));
220*7c478bd9Sstevel@tonic-gate 		freetr(right(p));
221*7c478bd9Sstevel@tonic-gate 		xfree(p);
222*7c478bd9Sstevel@tonic-gate 		break;
223*7c478bd9Sstevel@tonic-gate 	default:
224*7c478bd9Sstevel@tonic-gate 		ERROR "unknown type %d in freetr", type(p) FATAL;
225*7c478bd9Sstevel@tonic-gate 		break;
226*7c478bd9Sstevel@tonic-gate 	}
227*7c478bd9Sstevel@tonic-gate }
228*7c478bd9Sstevel@tonic-gate 
229*7c478bd9Sstevel@tonic-gate uchar *cclenter(p)
230*7c478bd9Sstevel@tonic-gate 	register uchar *p;
231*7c478bd9Sstevel@tonic-gate {
232*7c478bd9Sstevel@tonic-gate 	register int i, c;
233*7c478bd9Sstevel@tonic-gate 	uchar *op;
234*7c478bd9Sstevel@tonic-gate 
235*7c478bd9Sstevel@tonic-gate 	op = p;
236*7c478bd9Sstevel@tonic-gate 	i = 0;
237*7c478bd9Sstevel@tonic-gate 	while ((c = *p++) != 0) {
238*7c478bd9Sstevel@tonic-gate 		if (c == '\\') {
239*7c478bd9Sstevel@tonic-gate 			if ((c = *p++) == 't')
240*7c478bd9Sstevel@tonic-gate 				c = '\t';
241*7c478bd9Sstevel@tonic-gate 			else if (c == 'n')
242*7c478bd9Sstevel@tonic-gate 				c = '\n';
243*7c478bd9Sstevel@tonic-gate 			else if (c == 'f')
244*7c478bd9Sstevel@tonic-gate 				c = '\f';
245*7c478bd9Sstevel@tonic-gate 			else if (c == 'r')
246*7c478bd9Sstevel@tonic-gate 				c = '\r';
247*7c478bd9Sstevel@tonic-gate 			else if (c == 'b')
248*7c478bd9Sstevel@tonic-gate 				c = '\b';
249*7c478bd9Sstevel@tonic-gate 			else if (c == '\\')
250*7c478bd9Sstevel@tonic-gate 				c = '\\';
251*7c478bd9Sstevel@tonic-gate 			else if (isdigit(c)) {
252*7c478bd9Sstevel@tonic-gate 				int n = c - '0';
253*7c478bd9Sstevel@tonic-gate 				if (isdigit(*p)) {
254*7c478bd9Sstevel@tonic-gate 					n = 8 * n + *p++ - '0';
255*7c478bd9Sstevel@tonic-gate 					if (isdigit(*p))
256*7c478bd9Sstevel@tonic-gate 						n = 8 * n + *p++ - '0';
257*7c478bd9Sstevel@tonic-gate 				}
258*7c478bd9Sstevel@tonic-gate 				c = n;
259*7c478bd9Sstevel@tonic-gate 			} /* else */
260*7c478bd9Sstevel@tonic-gate 				/* c = c; */
261*7c478bd9Sstevel@tonic-gate 		} else if (c == '-' && i > 0 && chars[i-1] != 0) {
262*7c478bd9Sstevel@tonic-gate 			if (*p != 0) {
263*7c478bd9Sstevel@tonic-gate 				c = chars[i-1];
264*7c478bd9Sstevel@tonic-gate 				while ((uchar) c < *p) {	/* fails if *p is \\ */
265*7c478bd9Sstevel@tonic-gate 					if (i >= RECSIZE-1)
266*7c478bd9Sstevel@tonic-gate 						overflo("character class too big");
267*7c478bd9Sstevel@tonic-gate 					chars[i++] = ++c;
268*7c478bd9Sstevel@tonic-gate 				}
269*7c478bd9Sstevel@tonic-gate 				p++;
270*7c478bd9Sstevel@tonic-gate 				continue;
271*7c478bd9Sstevel@tonic-gate 			}
272*7c478bd9Sstevel@tonic-gate 		}
273*7c478bd9Sstevel@tonic-gate 		if (i >= RECSIZE-1)
274*7c478bd9Sstevel@tonic-gate 			overflo("character class too big");
275*7c478bd9Sstevel@tonic-gate 		chars[i++] = c;
276*7c478bd9Sstevel@tonic-gate 	}
277*7c478bd9Sstevel@tonic-gate 	chars[i++] = '\0';
278*7c478bd9Sstevel@tonic-gate 	dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, chars) );
279*7c478bd9Sstevel@tonic-gate 	xfree(op);
280*7c478bd9Sstevel@tonic-gate 	return(tostring(chars));
281*7c478bd9Sstevel@tonic-gate }
282*7c478bd9Sstevel@tonic-gate 
283*7c478bd9Sstevel@tonic-gate overflo(s)
284*7c478bd9Sstevel@tonic-gate 	uchar *s;
285*7c478bd9Sstevel@tonic-gate {
286*7c478bd9Sstevel@tonic-gate 	ERROR "regular expression too big: %s", gettext((char *) s) FATAL;
287*7c478bd9Sstevel@tonic-gate }
288*7c478bd9Sstevel@tonic-gate 
289*7c478bd9Sstevel@tonic-gate cfoll(f, v)	/* enter follow set of each leaf of vertex v into lfollow[leaf] */
290*7c478bd9Sstevel@tonic-gate 	fa *f;
291*7c478bd9Sstevel@tonic-gate 	register Node *v;
292*7c478bd9Sstevel@tonic-gate {
293*7c478bd9Sstevel@tonic-gate 	register int i;
294*7c478bd9Sstevel@tonic-gate 	register int *p;
295*7c478bd9Sstevel@tonic-gate 
296*7c478bd9Sstevel@tonic-gate 	switch(type(v)) {
297*7c478bd9Sstevel@tonic-gate 	LEAF
298*7c478bd9Sstevel@tonic-gate 		f->re[(int) left(v)].ltype = type(v);
299*7c478bd9Sstevel@tonic-gate 		f->re[(int) left(v)].lval = (int) right(v);
300*7c478bd9Sstevel@tonic-gate 		for (i=0; i<=f->accept; i++)
301*7c478bd9Sstevel@tonic-gate 			setvec[i] = 0;
302*7c478bd9Sstevel@tonic-gate 		setcnt = 0;
303*7c478bd9Sstevel@tonic-gate 		follow(v);	/* computes setvec and setcnt */
304*7c478bd9Sstevel@tonic-gate 		if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
305*7c478bd9Sstevel@tonic-gate 			overflo("follow set overflow");
306*7c478bd9Sstevel@tonic-gate 		f->re[(int) left(v)].lfollow = p;
307*7c478bd9Sstevel@tonic-gate 		*p = setcnt;
308*7c478bd9Sstevel@tonic-gate 		for (i = f->accept; i >= 0; i--)
309*7c478bd9Sstevel@tonic-gate 			if (setvec[i] == 1) *++p = i;
310*7c478bd9Sstevel@tonic-gate 		break;
311*7c478bd9Sstevel@tonic-gate 	UNARY
312*7c478bd9Sstevel@tonic-gate 		cfoll(f,left(v));
313*7c478bd9Sstevel@tonic-gate 		break;
314*7c478bd9Sstevel@tonic-gate 	case CAT:
315*7c478bd9Sstevel@tonic-gate 	case OR:
316*7c478bd9Sstevel@tonic-gate 		cfoll(f,left(v));
317*7c478bd9Sstevel@tonic-gate 		cfoll(f,right(v));
318*7c478bd9Sstevel@tonic-gate 		break;
319*7c478bd9Sstevel@tonic-gate 	default:
320*7c478bd9Sstevel@tonic-gate 		ERROR "unknown type %d in cfoll", type(v) FATAL;
321*7c478bd9Sstevel@tonic-gate 	}
322*7c478bd9Sstevel@tonic-gate }
323*7c478bd9Sstevel@tonic-gate 
324*7c478bd9Sstevel@tonic-gate first(p)		/* collects initially active leaves of p into setvec */
325*7c478bd9Sstevel@tonic-gate 	register Node *p;	/* returns 0 or 1 depending on whether p matches empty string */
326*7c478bd9Sstevel@tonic-gate {
327*7c478bd9Sstevel@tonic-gate 	register int b;
328*7c478bd9Sstevel@tonic-gate 
329*7c478bd9Sstevel@tonic-gate 	switch(type(p)) {
330*7c478bd9Sstevel@tonic-gate 	LEAF
331*7c478bd9Sstevel@tonic-gate 		if (setvec[(int) left(p)] != 1) {
332*7c478bd9Sstevel@tonic-gate 			setvec[(int) left(p)] = 1;
333*7c478bd9Sstevel@tonic-gate 			setcnt++;
334*7c478bd9Sstevel@tonic-gate 		}
335*7c478bd9Sstevel@tonic-gate 		if (type(p) == CCL && (*(uchar *) right(p)) == '\0')
336*7c478bd9Sstevel@tonic-gate 			return(0);		/* empty CCL */
337*7c478bd9Sstevel@tonic-gate 		else return(1);
338*7c478bd9Sstevel@tonic-gate 	case PLUS:
339*7c478bd9Sstevel@tonic-gate 		if (first(left(p)) == 0) return(0);
340*7c478bd9Sstevel@tonic-gate 		return(1);
341*7c478bd9Sstevel@tonic-gate 	case STAR:
342*7c478bd9Sstevel@tonic-gate 	case QUEST:
343*7c478bd9Sstevel@tonic-gate 		first(left(p));
344*7c478bd9Sstevel@tonic-gate 		return(0);
345*7c478bd9Sstevel@tonic-gate 	case CAT:
346*7c478bd9Sstevel@tonic-gate 		if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
347*7c478bd9Sstevel@tonic-gate 		return(1);
348*7c478bd9Sstevel@tonic-gate 	case OR:
349*7c478bd9Sstevel@tonic-gate 		b = first(right(p));
350*7c478bd9Sstevel@tonic-gate 		if (first(left(p)) == 0 || b == 0) return(0);
351*7c478bd9Sstevel@tonic-gate 		return(1);
352*7c478bd9Sstevel@tonic-gate 	}
353*7c478bd9Sstevel@tonic-gate 	ERROR "unknown type %d in first", type(p) FATAL;
354*7c478bd9Sstevel@tonic-gate 	return(-1);
355*7c478bd9Sstevel@tonic-gate }
356*7c478bd9Sstevel@tonic-gate 
357*7c478bd9Sstevel@tonic-gate follow(v)
358*7c478bd9Sstevel@tonic-gate 	Node *v;		/* collects leaves that can follow v into setvec */
359*7c478bd9Sstevel@tonic-gate {
360*7c478bd9Sstevel@tonic-gate 	Node *p;
361*7c478bd9Sstevel@tonic-gate 
362*7c478bd9Sstevel@tonic-gate 	if (type(v) == FINAL)
363*7c478bd9Sstevel@tonic-gate 		return;
364*7c478bd9Sstevel@tonic-gate 	p = parent(v);
365*7c478bd9Sstevel@tonic-gate 	switch (type(p)) {
366*7c478bd9Sstevel@tonic-gate 	case STAR:
367*7c478bd9Sstevel@tonic-gate 	case PLUS:
368*7c478bd9Sstevel@tonic-gate 		first(v);
369*7c478bd9Sstevel@tonic-gate 		follow(p);
370*7c478bd9Sstevel@tonic-gate 		return;
371*7c478bd9Sstevel@tonic-gate 
372*7c478bd9Sstevel@tonic-gate 	case OR:
373*7c478bd9Sstevel@tonic-gate 	case QUEST:
374*7c478bd9Sstevel@tonic-gate 		follow(p);
375*7c478bd9Sstevel@tonic-gate 		return;
376*7c478bd9Sstevel@tonic-gate 
377*7c478bd9Sstevel@tonic-gate 	case CAT:
378*7c478bd9Sstevel@tonic-gate 		if (v == left(p)) {	/* v is left child of p */
379*7c478bd9Sstevel@tonic-gate 			if (first(right(p)) == 0) {
380*7c478bd9Sstevel@tonic-gate 				follow(p);
381*7c478bd9Sstevel@tonic-gate 				return;
382*7c478bd9Sstevel@tonic-gate 			}
383*7c478bd9Sstevel@tonic-gate 		}
384*7c478bd9Sstevel@tonic-gate 		else		/* v is right child */
385*7c478bd9Sstevel@tonic-gate 			follow(p);
386*7c478bd9Sstevel@tonic-gate 		return;
387*7c478bd9Sstevel@tonic-gate 	default:
388*7c478bd9Sstevel@tonic-gate 		ERROR "unknown type %d in follow", type(p) FATAL;
389*7c478bd9Sstevel@tonic-gate 		break;
390*7c478bd9Sstevel@tonic-gate 	}
391*7c478bd9Sstevel@tonic-gate }
392*7c478bd9Sstevel@tonic-gate 
393*7c478bd9Sstevel@tonic-gate member(c, s)	/* is c in s? */
394*7c478bd9Sstevel@tonic-gate 	register uchar c, *s;
395*7c478bd9Sstevel@tonic-gate {
396*7c478bd9Sstevel@tonic-gate 	while (*s)
397*7c478bd9Sstevel@tonic-gate 		if (c == *s++)
398*7c478bd9Sstevel@tonic-gate 			return(1);
399*7c478bd9Sstevel@tonic-gate 	return(0);
400*7c478bd9Sstevel@tonic-gate }
401*7c478bd9Sstevel@tonic-gate 
402*7c478bd9Sstevel@tonic-gate 
403*7c478bd9Sstevel@tonic-gate match(f, p)
404*7c478bd9Sstevel@tonic-gate 	register fa *f;
405*7c478bd9Sstevel@tonic-gate 	register uchar *p;
406*7c478bd9Sstevel@tonic-gate {
407*7c478bd9Sstevel@tonic-gate 	register int s, ns;
408*7c478bd9Sstevel@tonic-gate 
409*7c478bd9Sstevel@tonic-gate 	s = f->reset?makeinit(f,0):f->initstat;
410*7c478bd9Sstevel@tonic-gate 	if (f->out[s])
411*7c478bd9Sstevel@tonic-gate 		return(1);
412*7c478bd9Sstevel@tonic-gate 	do {
413*7c478bd9Sstevel@tonic-gate 		if (ns=f->gototab[s][*p])
414*7c478bd9Sstevel@tonic-gate 			s=ns;
415*7c478bd9Sstevel@tonic-gate 		else
416*7c478bd9Sstevel@tonic-gate 			s=cgoto(f,s,*p);
417*7c478bd9Sstevel@tonic-gate 		if (f->out[s])
418*7c478bd9Sstevel@tonic-gate 			return(1);
419*7c478bd9Sstevel@tonic-gate 	} while (*p++ != 0);
420*7c478bd9Sstevel@tonic-gate 	return(0);
421*7c478bd9Sstevel@tonic-gate }
422*7c478bd9Sstevel@tonic-gate 
423*7c478bd9Sstevel@tonic-gate pmatch(f, p)
424*7c478bd9Sstevel@tonic-gate 	register fa *f;
425*7c478bd9Sstevel@tonic-gate 	register uchar *p;
426*7c478bd9Sstevel@tonic-gate {
427*7c478bd9Sstevel@tonic-gate 	register int s, ns;
428*7c478bd9Sstevel@tonic-gate 	register uchar *q;
429*7c478bd9Sstevel@tonic-gate 	int i, k;
430*7c478bd9Sstevel@tonic-gate 
431*7c478bd9Sstevel@tonic-gate         if (f->reset) {
432*7c478bd9Sstevel@tonic-gate                 f->initstat = s = makeinit(f,1);
433*7c478bd9Sstevel@tonic-gate         } else {
434*7c478bd9Sstevel@tonic-gate                 s = f->initstat;
435*7c478bd9Sstevel@tonic-gate         }
436*7c478bd9Sstevel@tonic-gate 	patbeg = p;
437*7c478bd9Sstevel@tonic-gate 	patlen = -1;
438*7c478bd9Sstevel@tonic-gate 	do {
439*7c478bd9Sstevel@tonic-gate 		q = p;
440*7c478bd9Sstevel@tonic-gate 		do {
441*7c478bd9Sstevel@tonic-gate 			if (f->out[s])		/* final state */
442*7c478bd9Sstevel@tonic-gate 				patlen = q-p;
443*7c478bd9Sstevel@tonic-gate 			if (ns=f->gototab[s][*q])
444*7c478bd9Sstevel@tonic-gate 				s=ns;
445*7c478bd9Sstevel@tonic-gate 			else
446*7c478bd9Sstevel@tonic-gate 				s=cgoto(f,s,*q);
447*7c478bd9Sstevel@tonic-gate 			if (s==1)	/* no transition */
448*7c478bd9Sstevel@tonic-gate 				if (patlen >= 0) {
449*7c478bd9Sstevel@tonic-gate 					patbeg = p;
450*7c478bd9Sstevel@tonic-gate 					return(1);
451*7c478bd9Sstevel@tonic-gate 				}
452*7c478bd9Sstevel@tonic-gate 				else
453*7c478bd9Sstevel@tonic-gate 					goto nextin;	/* no match */
454*7c478bd9Sstevel@tonic-gate 		} while (*q++ != 0);
455*7c478bd9Sstevel@tonic-gate 		if (f->out[s])
456*7c478bd9Sstevel@tonic-gate 			patlen = q-p-1;	/* don't count $ */
457*7c478bd9Sstevel@tonic-gate 		if (patlen >= 0) {
458*7c478bd9Sstevel@tonic-gate 			patbeg = p;
459*7c478bd9Sstevel@tonic-gate 			return(1);
460*7c478bd9Sstevel@tonic-gate 		}
461*7c478bd9Sstevel@tonic-gate 	nextin:
462*7c478bd9Sstevel@tonic-gate 		s = 2;
463*7c478bd9Sstevel@tonic-gate 		if (f->reset) {
464*7c478bd9Sstevel@tonic-gate 			for (i=2; i<=f->curstat; i++)
465*7c478bd9Sstevel@tonic-gate 				xfree(f->posns[i]);
466*7c478bd9Sstevel@tonic-gate 			k = *f->posns[0];
467*7c478bd9Sstevel@tonic-gate 			if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
468*7c478bd9Sstevel@tonic-gate 				overflo("out of space in pmatch");
469*7c478bd9Sstevel@tonic-gate 			for (i=0; i<=k; i++)
470*7c478bd9Sstevel@tonic-gate 				(f->posns[2])[i] = (f->posns[0])[i];
471*7c478bd9Sstevel@tonic-gate 			f->initstat = f->curstat = 2;
472*7c478bd9Sstevel@tonic-gate 			f->out[2] = f->out[0];
473*7c478bd9Sstevel@tonic-gate 			for (i=0; i<NCHARS; i++)
474*7c478bd9Sstevel@tonic-gate 				f->gototab[2][i] = 0;
475*7c478bd9Sstevel@tonic-gate 		}
476*7c478bd9Sstevel@tonic-gate 	} while (*p++ != 0);
477*7c478bd9Sstevel@tonic-gate 	return (0);
478*7c478bd9Sstevel@tonic-gate }
479*7c478bd9Sstevel@tonic-gate 
480*7c478bd9Sstevel@tonic-gate nematch(f, p)
481*7c478bd9Sstevel@tonic-gate 	register fa *f;
482*7c478bd9Sstevel@tonic-gate 	register uchar *p;
483*7c478bd9Sstevel@tonic-gate {
484*7c478bd9Sstevel@tonic-gate 	register int s, ns;
485*7c478bd9Sstevel@tonic-gate 	register uchar *q;
486*7c478bd9Sstevel@tonic-gate 	int i, k;
487*7c478bd9Sstevel@tonic-gate 
488*7c478bd9Sstevel@tonic-gate 	if (f->reset) {
489*7c478bd9Sstevel@tonic-gate 		f->initstat = s = makeinit(f,1);
490*7c478bd9Sstevel@tonic-gate 	} else {
491*7c478bd9Sstevel@tonic-gate 		s = f->initstat;
492*7c478bd9Sstevel@tonic-gate 	}
493*7c478bd9Sstevel@tonic-gate 	patlen = -1;
494*7c478bd9Sstevel@tonic-gate 	while (*p) {
495*7c478bd9Sstevel@tonic-gate 		q = p;
496*7c478bd9Sstevel@tonic-gate 		do {
497*7c478bd9Sstevel@tonic-gate 			if (f->out[s])		/* final state */
498*7c478bd9Sstevel@tonic-gate 				patlen = q-p;
499*7c478bd9Sstevel@tonic-gate 			if (ns=f->gototab[s][*q])
500*7c478bd9Sstevel@tonic-gate 				s=ns;
501*7c478bd9Sstevel@tonic-gate 			else
502*7c478bd9Sstevel@tonic-gate 				s=cgoto(f,s,*q);
503*7c478bd9Sstevel@tonic-gate 			if (s==1)	/* no transition */
504*7c478bd9Sstevel@tonic-gate 				if (patlen > 0) {
505*7c478bd9Sstevel@tonic-gate 					patbeg = p;
506*7c478bd9Sstevel@tonic-gate 					return(1);
507*7c478bd9Sstevel@tonic-gate 				}
508*7c478bd9Sstevel@tonic-gate 				else
509*7c478bd9Sstevel@tonic-gate 					goto nnextin;	/* no nonempty match */
510*7c478bd9Sstevel@tonic-gate 		} while (*q++ != 0);
511*7c478bd9Sstevel@tonic-gate 		if (f->out[s])
512*7c478bd9Sstevel@tonic-gate 			patlen = q-p-1;	/* don't count $ */
513*7c478bd9Sstevel@tonic-gate 		if (patlen > 0 ) {
514*7c478bd9Sstevel@tonic-gate 			patbeg = p;
515*7c478bd9Sstevel@tonic-gate 			return(1);
516*7c478bd9Sstevel@tonic-gate 		}
517*7c478bd9Sstevel@tonic-gate 	nnextin:
518*7c478bd9Sstevel@tonic-gate 		s = 2;
519*7c478bd9Sstevel@tonic-gate 		if (f->reset) {
520*7c478bd9Sstevel@tonic-gate 			for (i=2; i<=f->curstat; i++)
521*7c478bd9Sstevel@tonic-gate 				xfree(f->posns[i]);
522*7c478bd9Sstevel@tonic-gate 			k = *f->posns[0];
523*7c478bd9Sstevel@tonic-gate 			if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
524*7c478bd9Sstevel@tonic-gate 				overflo("out of state space");
525*7c478bd9Sstevel@tonic-gate 			for (i=0; i<=k; i++)
526*7c478bd9Sstevel@tonic-gate 				(f->posns[2])[i] = (f->posns[0])[i];
527*7c478bd9Sstevel@tonic-gate 			f->initstat = f->curstat = 2;
528*7c478bd9Sstevel@tonic-gate 			f->out[2] = f->out[0];
529*7c478bd9Sstevel@tonic-gate 			for (i=0; i<NCHARS; i++)
530*7c478bd9Sstevel@tonic-gate 				f->gototab[2][i] = 0;
531*7c478bd9Sstevel@tonic-gate 		}
532*7c478bd9Sstevel@tonic-gate 	p++;
533*7c478bd9Sstevel@tonic-gate 	}
534*7c478bd9Sstevel@tonic-gate 	return (0);
535*7c478bd9Sstevel@tonic-gate }
536*7c478bd9Sstevel@tonic-gate 
537*7c478bd9Sstevel@tonic-gate Node *regexp(), *primary(), *concat(), *alt(), *unary();
538*7c478bd9Sstevel@tonic-gate 
539*7c478bd9Sstevel@tonic-gate Node *reparse(p)
540*7c478bd9Sstevel@tonic-gate 	uchar *p;
541*7c478bd9Sstevel@tonic-gate {
542*7c478bd9Sstevel@tonic-gate 	/* parses regular expression pointed to by p */
543*7c478bd9Sstevel@tonic-gate 	/* uses relex() to scan regular expression */
544*7c478bd9Sstevel@tonic-gate 	Node *np;
545*7c478bd9Sstevel@tonic-gate 
546*7c478bd9Sstevel@tonic-gate 	dprintf( ("reparse <%s>\n", p) );
547*7c478bd9Sstevel@tonic-gate 	lastre = prestr = p;	/* prestr points to string to be parsed */
548*7c478bd9Sstevel@tonic-gate 	rtok = relex();
549*7c478bd9Sstevel@tonic-gate 	if (rtok == '\0')
550*7c478bd9Sstevel@tonic-gate 		ERROR "empty regular expression" FATAL;
551*7c478bd9Sstevel@tonic-gate 	np = regexp();
552*7c478bd9Sstevel@tonic-gate 	if (rtok == '\0')
553*7c478bd9Sstevel@tonic-gate 		return(np);
554*7c478bd9Sstevel@tonic-gate 	else
555*7c478bd9Sstevel@tonic-gate 		ERROR "syntax error in regular expression %s at %s", lastre, prestr FATAL;
556*7c478bd9Sstevel@tonic-gate 	/*NOTREACHED*/
557*7c478bd9Sstevel@tonic-gate }
558*7c478bd9Sstevel@tonic-gate 
559*7c478bd9Sstevel@tonic-gate Node *regexp()
560*7c478bd9Sstevel@tonic-gate {
561*7c478bd9Sstevel@tonic-gate 	return (alt(concat(primary())));
562*7c478bd9Sstevel@tonic-gate }
563*7c478bd9Sstevel@tonic-gate 
564*7c478bd9Sstevel@tonic-gate Node *primary()
565*7c478bd9Sstevel@tonic-gate {
566*7c478bd9Sstevel@tonic-gate 	Node *np;
567*7c478bd9Sstevel@tonic-gate 
568*7c478bd9Sstevel@tonic-gate 	switch (rtok) {
569*7c478bd9Sstevel@tonic-gate 	case CHAR:
570*7c478bd9Sstevel@tonic-gate 		np = op2(CHAR, NIL, (Node *) rlxval);
571*7c478bd9Sstevel@tonic-gate 		rtok = relex();
572*7c478bd9Sstevel@tonic-gate 		return (unary(np));
573*7c478bd9Sstevel@tonic-gate 	case ALL:
574*7c478bd9Sstevel@tonic-gate 		rtok = relex();
575*7c478bd9Sstevel@tonic-gate 		return (unary(op2(ALL, NIL, NIL)));
576*7c478bd9Sstevel@tonic-gate 	case DOT:
577*7c478bd9Sstevel@tonic-gate 		rtok = relex();
578*7c478bd9Sstevel@tonic-gate 		return (unary(op2(DOT, NIL, NIL)));
579*7c478bd9Sstevel@tonic-gate 	case CCL:
580*7c478bd9Sstevel@tonic-gate 		np = op2(CCL, NIL, (Node*) cclenter(rlxstr));
581*7c478bd9Sstevel@tonic-gate 		rtok = relex();
582*7c478bd9Sstevel@tonic-gate 		return (unary(np));
583*7c478bd9Sstevel@tonic-gate 	case NCCL:
584*7c478bd9Sstevel@tonic-gate 		np = op2(NCCL, NIL, (Node *) cclenter(rlxstr));
585*7c478bd9Sstevel@tonic-gate 		rtok = relex();
586*7c478bd9Sstevel@tonic-gate 		return (unary(np));
587*7c478bd9Sstevel@tonic-gate 	case '^':
588*7c478bd9Sstevel@tonic-gate 		rtok = relex();
589*7c478bd9Sstevel@tonic-gate 		return (unary(op2(CHAR, NIL, (Node *) HAT)));
590*7c478bd9Sstevel@tonic-gate 	case '$':
591*7c478bd9Sstevel@tonic-gate 		rtok = relex();
592*7c478bd9Sstevel@tonic-gate 		return (unary(op2(CHAR, NIL, NIL)));
593*7c478bd9Sstevel@tonic-gate 	case '(':
594*7c478bd9Sstevel@tonic-gate 		rtok = relex();
595*7c478bd9Sstevel@tonic-gate 		if (rtok == ')') {	/* special pleading for () */
596*7c478bd9Sstevel@tonic-gate 			rtok = relex();
597*7c478bd9Sstevel@tonic-gate 			return unary(op2(CCL, NIL, (Node *) tostring("")));
598*7c478bd9Sstevel@tonic-gate 		}
599*7c478bd9Sstevel@tonic-gate 		np = regexp();
600*7c478bd9Sstevel@tonic-gate 		if (rtok == ')') {
601*7c478bd9Sstevel@tonic-gate 			rtok = relex();
602*7c478bd9Sstevel@tonic-gate 			return (unary(np));
603*7c478bd9Sstevel@tonic-gate 		}
604*7c478bd9Sstevel@tonic-gate 		else
605*7c478bd9Sstevel@tonic-gate 			ERROR "syntax error in regular expression %s at %s", lastre, prestr FATAL;
606*7c478bd9Sstevel@tonic-gate 	default:
607*7c478bd9Sstevel@tonic-gate 		ERROR "illegal primary in regular expression %s at %s", lastre, prestr FATAL;
608*7c478bd9Sstevel@tonic-gate 	}
609*7c478bd9Sstevel@tonic-gate 	/*NOTREACHED*/
610*7c478bd9Sstevel@tonic-gate }
611*7c478bd9Sstevel@tonic-gate 
612*7c478bd9Sstevel@tonic-gate Node *concat(np)
613*7c478bd9Sstevel@tonic-gate 	Node *np;
614*7c478bd9Sstevel@tonic-gate {
615*7c478bd9Sstevel@tonic-gate 	switch (rtok) {
616*7c478bd9Sstevel@tonic-gate 	case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
617*7c478bd9Sstevel@tonic-gate 		return (concat(op2(CAT, np, primary())));
618*7c478bd9Sstevel@tonic-gate 	default:
619*7c478bd9Sstevel@tonic-gate 		return (np);
620*7c478bd9Sstevel@tonic-gate 	}
621*7c478bd9Sstevel@tonic-gate }
622*7c478bd9Sstevel@tonic-gate 
623*7c478bd9Sstevel@tonic-gate Node *alt(np)
624*7c478bd9Sstevel@tonic-gate 	Node *np;
625*7c478bd9Sstevel@tonic-gate {
626*7c478bd9Sstevel@tonic-gate 	if (rtok == OR) {
627*7c478bd9Sstevel@tonic-gate 		rtok = relex();
628*7c478bd9Sstevel@tonic-gate 		return (alt(op2(OR, np, concat(primary()))));
629*7c478bd9Sstevel@tonic-gate 	}
630*7c478bd9Sstevel@tonic-gate 	return (np);
631*7c478bd9Sstevel@tonic-gate }
632*7c478bd9Sstevel@tonic-gate 
633*7c478bd9Sstevel@tonic-gate Node *unary(np)
634*7c478bd9Sstevel@tonic-gate 	Node *np;
635*7c478bd9Sstevel@tonic-gate {
636*7c478bd9Sstevel@tonic-gate 	switch (rtok) {
637*7c478bd9Sstevel@tonic-gate 	case STAR:
638*7c478bd9Sstevel@tonic-gate 		rtok = relex();
639*7c478bd9Sstevel@tonic-gate 		return (unary(op2(STAR, np, NIL)));
640*7c478bd9Sstevel@tonic-gate 	case PLUS:
641*7c478bd9Sstevel@tonic-gate 		rtok = relex();
642*7c478bd9Sstevel@tonic-gate 		return (unary(op2(PLUS, np, NIL)));
643*7c478bd9Sstevel@tonic-gate 	case QUEST:
644*7c478bd9Sstevel@tonic-gate 		rtok = relex();
645*7c478bd9Sstevel@tonic-gate 		return (unary(op2(QUEST, np, NIL)));
646*7c478bd9Sstevel@tonic-gate 	default:
647*7c478bd9Sstevel@tonic-gate 		return (np);
648*7c478bd9Sstevel@tonic-gate 	}
649*7c478bd9Sstevel@tonic-gate }
650*7c478bd9Sstevel@tonic-gate 
651*7c478bd9Sstevel@tonic-gate relex()		/* lexical analyzer for reparse */
652*7c478bd9Sstevel@tonic-gate {
653*7c478bd9Sstevel@tonic-gate 	register int c;
654*7c478bd9Sstevel@tonic-gate 	uchar cbuf[RECSIZE];
655*7c478bd9Sstevel@tonic-gate 	int clen, cflag;
656*7c478bd9Sstevel@tonic-gate 
657*7c478bd9Sstevel@tonic-gate 	switch (c = *prestr++) {
658*7c478bd9Sstevel@tonic-gate 	case '|': return OR;
659*7c478bd9Sstevel@tonic-gate 	case '*': return STAR;
660*7c478bd9Sstevel@tonic-gate 	case '+': return PLUS;
661*7c478bd9Sstevel@tonic-gate 	case '?': return QUEST;
662*7c478bd9Sstevel@tonic-gate 	case '.': return DOT;
663*7c478bd9Sstevel@tonic-gate 	case '\0': prestr--; return '\0';
664*7c478bd9Sstevel@tonic-gate 	case '^':
665*7c478bd9Sstevel@tonic-gate 	case '$':
666*7c478bd9Sstevel@tonic-gate 	case '(':
667*7c478bd9Sstevel@tonic-gate 	case ')':
668*7c478bd9Sstevel@tonic-gate 		return c;
669*7c478bd9Sstevel@tonic-gate 	case '\\':
670*7c478bd9Sstevel@tonic-gate 		if ((c = *prestr++) == 't')
671*7c478bd9Sstevel@tonic-gate 			c = '\t';
672*7c478bd9Sstevel@tonic-gate 		else if (c == 'n')
673*7c478bd9Sstevel@tonic-gate 			c = '\n';
674*7c478bd9Sstevel@tonic-gate 		else if (c == 'f')
675*7c478bd9Sstevel@tonic-gate 			c = '\f';
676*7c478bd9Sstevel@tonic-gate 		else if (c == 'r')
677*7c478bd9Sstevel@tonic-gate 			c = '\r';
678*7c478bd9Sstevel@tonic-gate 		else if (c == 'b')
679*7c478bd9Sstevel@tonic-gate 			c = '\b';
680*7c478bd9Sstevel@tonic-gate 		else if (c == '\\')
681*7c478bd9Sstevel@tonic-gate 			c = '\\';
682*7c478bd9Sstevel@tonic-gate 		else if (isdigit(c)) {
683*7c478bd9Sstevel@tonic-gate 			int n = c - '0';
684*7c478bd9Sstevel@tonic-gate 			if (isdigit(*prestr)) {
685*7c478bd9Sstevel@tonic-gate 				n = 8 * n + *prestr++ - '0';
686*7c478bd9Sstevel@tonic-gate 				if (isdigit(*prestr))
687*7c478bd9Sstevel@tonic-gate 					n = 8 * n + *prestr++ - '0';
688*7c478bd9Sstevel@tonic-gate 			}
689*7c478bd9Sstevel@tonic-gate 			c = n;
690*7c478bd9Sstevel@tonic-gate 		} /* else it's now in c */
691*7c478bd9Sstevel@tonic-gate 		rlxval = c;
692*7c478bd9Sstevel@tonic-gate 		return CHAR;
693*7c478bd9Sstevel@tonic-gate 	default:
694*7c478bd9Sstevel@tonic-gate 		rlxval = c;
695*7c478bd9Sstevel@tonic-gate 		return CHAR;
696*7c478bd9Sstevel@tonic-gate 	case '[':
697*7c478bd9Sstevel@tonic-gate 		clen = 0;
698*7c478bd9Sstevel@tonic-gate 		if (*prestr == '^') {
699*7c478bd9Sstevel@tonic-gate 			cflag = 1;
700*7c478bd9Sstevel@tonic-gate 			prestr++;
701*7c478bd9Sstevel@tonic-gate 		}
702*7c478bd9Sstevel@tonic-gate 		else
703*7c478bd9Sstevel@tonic-gate 			cflag = 0;
704*7c478bd9Sstevel@tonic-gate 		for (;;) {
705*7c478bd9Sstevel@tonic-gate 			if ((c = *prestr++) == '\\') {
706*7c478bd9Sstevel@tonic-gate 				cbuf[clen++] = '\\';
707*7c478bd9Sstevel@tonic-gate 				if ((c = *prestr++) == '\0')
708*7c478bd9Sstevel@tonic-gate 					ERROR "nonterminated character class %s", lastre FATAL;
709*7c478bd9Sstevel@tonic-gate 				cbuf[clen++] = c;
710*7c478bd9Sstevel@tonic-gate 			} else if (c == ']') {
711*7c478bd9Sstevel@tonic-gate 				cbuf[clen] = 0;
712*7c478bd9Sstevel@tonic-gate 				rlxstr = tostring(cbuf);
713*7c478bd9Sstevel@tonic-gate 				if (cflag == 0)
714*7c478bd9Sstevel@tonic-gate 					return CCL;
715*7c478bd9Sstevel@tonic-gate 				else
716*7c478bd9Sstevel@tonic-gate 					return NCCL;
717*7c478bd9Sstevel@tonic-gate 			} else if (c == '\n') {
718*7c478bd9Sstevel@tonic-gate 				ERROR "newline in character class %s...", lastre FATAL;
719*7c478bd9Sstevel@tonic-gate 			} else if (c == '\0') {
720*7c478bd9Sstevel@tonic-gate 				ERROR "nonterminated character class %s", lastre FATAL;
721*7c478bd9Sstevel@tonic-gate 			} else
722*7c478bd9Sstevel@tonic-gate 				cbuf[clen++] = c;
723*7c478bd9Sstevel@tonic-gate 		}
724*7c478bd9Sstevel@tonic-gate 	}
725*7c478bd9Sstevel@tonic-gate }
726*7c478bd9Sstevel@tonic-gate 
727*7c478bd9Sstevel@tonic-gate int cgoto(f, s, c)
728*7c478bd9Sstevel@tonic-gate 	fa *f;
729*7c478bd9Sstevel@tonic-gate 	int s, c;
730*7c478bd9Sstevel@tonic-gate {
731*7c478bd9Sstevel@tonic-gate 	register int i, j, k;
732*7c478bd9Sstevel@tonic-gate 	register int *p, *q;
733*7c478bd9Sstevel@tonic-gate 
734*7c478bd9Sstevel@tonic-gate 	for (i=0; i<=f->accept; i++)
735*7c478bd9Sstevel@tonic-gate 		setvec[i] = 0;
736*7c478bd9Sstevel@tonic-gate 	setcnt = 0;
737*7c478bd9Sstevel@tonic-gate 	/* compute positions of gototab[s,c] into setvec */
738*7c478bd9Sstevel@tonic-gate 	p = f->posns[s];
739*7c478bd9Sstevel@tonic-gate 	for (i=1; i<=*p; i++) {
740*7c478bd9Sstevel@tonic-gate 		if ((k = f->re[p[i]].ltype) != FINAL) {
741*7c478bd9Sstevel@tonic-gate 			if (k == CHAR && c == f->re[p[i]].lval
742*7c478bd9Sstevel@tonic-gate 				|| k == DOT && c != 0 && c != HAT
743*7c478bd9Sstevel@tonic-gate 				|| k == ALL && c != 0
744*7c478bd9Sstevel@tonic-gate 				|| k == CCL && member(c, (uchar *) f->re[p[i]].lval)
745*7c478bd9Sstevel@tonic-gate 				|| k == NCCL && !member(c, (uchar *) f->re[p[i]].lval) && c != 0 && c != HAT) {
746*7c478bd9Sstevel@tonic-gate 					q = f->re[p[i]].lfollow;
747*7c478bd9Sstevel@tonic-gate 					for (j=1; j<=*q; j++) {
748*7c478bd9Sstevel@tonic-gate 						if (setvec[q[j]] == 0) {
749*7c478bd9Sstevel@tonic-gate 							setcnt++;
750*7c478bd9Sstevel@tonic-gate 							setvec[q[j]] = 1;
751*7c478bd9Sstevel@tonic-gate 						}
752*7c478bd9Sstevel@tonic-gate 					}
753*7c478bd9Sstevel@tonic-gate 				}
754*7c478bd9Sstevel@tonic-gate 		}
755*7c478bd9Sstevel@tonic-gate 	}
756*7c478bd9Sstevel@tonic-gate 	/* determine if setvec is a previous state */
757*7c478bd9Sstevel@tonic-gate 	tmpset[0] = setcnt;
758*7c478bd9Sstevel@tonic-gate 	j = 1;
759*7c478bd9Sstevel@tonic-gate 	for (i = f->accept; i >= 0; i--)
760*7c478bd9Sstevel@tonic-gate 		if (setvec[i]) {
761*7c478bd9Sstevel@tonic-gate 			tmpset[j++] = i;
762*7c478bd9Sstevel@tonic-gate 		}
763*7c478bd9Sstevel@tonic-gate 	/* tmpset == previous state? */
764*7c478bd9Sstevel@tonic-gate 	for (i=1; i<= f->curstat; i++) {
765*7c478bd9Sstevel@tonic-gate 		p = f->posns[i];
766*7c478bd9Sstevel@tonic-gate 		if ((k = tmpset[0]) != p[0])
767*7c478bd9Sstevel@tonic-gate 			goto different;
768*7c478bd9Sstevel@tonic-gate 		for (j = 1; j <= k; j++)
769*7c478bd9Sstevel@tonic-gate 			if (tmpset[j] != p[j])
770*7c478bd9Sstevel@tonic-gate 				goto different;
771*7c478bd9Sstevel@tonic-gate 		/* setvec is state i */
772*7c478bd9Sstevel@tonic-gate 		f->gototab[s][c] = i;
773*7c478bd9Sstevel@tonic-gate 		return i;
774*7c478bd9Sstevel@tonic-gate 	different:;
775*7c478bd9Sstevel@tonic-gate 	}
776*7c478bd9Sstevel@tonic-gate 
777*7c478bd9Sstevel@tonic-gate 	/* add tmpset to current set of states */
778*7c478bd9Sstevel@tonic-gate 	if (f->curstat >= NSTATES-1) {
779*7c478bd9Sstevel@tonic-gate 		f->curstat = 2;
780*7c478bd9Sstevel@tonic-gate 		f->reset = 1;
781*7c478bd9Sstevel@tonic-gate 		for (i=2; i<NSTATES; i++)
782*7c478bd9Sstevel@tonic-gate 			xfree(f->posns[i]);
783*7c478bd9Sstevel@tonic-gate 	}
784*7c478bd9Sstevel@tonic-gate 	else
785*7c478bd9Sstevel@tonic-gate 		++(f->curstat);
786*7c478bd9Sstevel@tonic-gate 	for (i=0; i<NCHARS; i++)
787*7c478bd9Sstevel@tonic-gate 		f->gototab[f->curstat][i] = 0;
788*7c478bd9Sstevel@tonic-gate 	xfree(f->posns[f->curstat]);
789*7c478bd9Sstevel@tonic-gate 	if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
790*7c478bd9Sstevel@tonic-gate 		overflo("out of space in cgoto");
791*7c478bd9Sstevel@tonic-gate 
792*7c478bd9Sstevel@tonic-gate 	f->posns[f->curstat] = p;
793*7c478bd9Sstevel@tonic-gate 	f->gototab[s][c] = f->curstat;
794*7c478bd9Sstevel@tonic-gate 	for (i = 0; i <= setcnt; i++)
795*7c478bd9Sstevel@tonic-gate 		p[i] = tmpset[i];
796*7c478bd9Sstevel@tonic-gate 	if (setvec[f->accept])
797*7c478bd9Sstevel@tonic-gate 		f->out[f->curstat] = 1;
798*7c478bd9Sstevel@tonic-gate 	else
799*7c478bd9Sstevel@tonic-gate 		f->out[f->curstat] = 0;
800*7c478bd9Sstevel@tonic-gate 	return f->curstat;
801*7c478bd9Sstevel@tonic-gate }
802*7c478bd9Sstevel@tonic-gate 
803*7c478bd9Sstevel@tonic-gate 
804*7c478bd9Sstevel@tonic-gate freefa(f)
805*7c478bd9Sstevel@tonic-gate 	struct fa *f;
806*7c478bd9Sstevel@tonic-gate {
807*7c478bd9Sstevel@tonic-gate 
808*7c478bd9Sstevel@tonic-gate 	register int i;
809*7c478bd9Sstevel@tonic-gate 
810*7c478bd9Sstevel@tonic-gate 	if (f == NULL)
811*7c478bd9Sstevel@tonic-gate 		return;
812*7c478bd9Sstevel@tonic-gate 	for (i=0; i<=f->curstat; i++)
813*7c478bd9Sstevel@tonic-gate 		xfree(f->posns[i]);
814*7c478bd9Sstevel@tonic-gate 	for (i=0; i<=f->accept; i++)
815*7c478bd9Sstevel@tonic-gate 		xfree(f->re[i].lfollow);
816*7c478bd9Sstevel@tonic-gate 	xfree(f->restr);
817*7c478bd9Sstevel@tonic-gate 	xfree(f);
818*7c478bd9Sstevel@tonic-gate }
819