xref: /titanic_52/usr/src/cmd/awk/b.c (revision 1ee2e5fa222f6d33d1ff1c48f155973a5e146434)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
57c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
67c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
77c478bd9Sstevel@tonic-gate  * with the License.
87c478bd9Sstevel@tonic-gate  *
97c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
107c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
117c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
127c478bd9Sstevel@tonic-gate  * and limitations under the License.
137c478bd9Sstevel@tonic-gate  *
147c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
157c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
167c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
177c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
187c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
197c478bd9Sstevel@tonic-gate  *
207c478bd9Sstevel@tonic-gate  * CDDL HEADER END
217c478bd9Sstevel@tonic-gate  */
227c478bd9Sstevel@tonic-gate 
237c478bd9Sstevel@tonic-gate /*
24*1ee2e5faSnakanon  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
257c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
267c478bd9Sstevel@tonic-gate  */
277c478bd9Sstevel@tonic-gate 
28*1ee2e5faSnakanon /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
29*1ee2e5faSnakanon /*	  All Rights Reserved  	*/
30*1ee2e5faSnakanon 
31*1ee2e5faSnakanon #pragma ident	"%Z%%M%	%I%	%E% SMI"
327c478bd9Sstevel@tonic-gate 
337c478bd9Sstevel@tonic-gate #define	DEBUG
347c478bd9Sstevel@tonic-gate 
357c478bd9Sstevel@tonic-gate #include "awk.h"
367c478bd9Sstevel@tonic-gate #include "y.tab.h"
377c478bd9Sstevel@tonic-gate 
387c478bd9Sstevel@tonic-gate #define	HAT	(NCHARS-1)	/* matches ^ in regular expr */
397c478bd9Sstevel@tonic-gate 				/* NCHARS is 2**n */
40*1ee2e5faSnakanon #define	MAXLIN (3 * LINE_MAX)
417c478bd9Sstevel@tonic-gate 
427c478bd9Sstevel@tonic-gate #define	type(v)		(v)->nobj
437c478bd9Sstevel@tonic-gate #define	left(v)		(v)->narg[0]
447c478bd9Sstevel@tonic-gate #define	right(v)	(v)->narg[1]
457c478bd9Sstevel@tonic-gate #define	parent(v)	(v)->nnext
467c478bd9Sstevel@tonic-gate 
477c478bd9Sstevel@tonic-gate #define	LEAF	case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
487c478bd9Sstevel@tonic-gate #define	UNARY	case STAR: case PLUS: case QUEST:
497c478bd9Sstevel@tonic-gate 
50*1ee2e5faSnakanon /*
51*1ee2e5faSnakanon  * encoding in tree Nodes:
52*1ee2e5faSnakanon  *	leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):
53*1ee2e5faSnakanon  *		left is index, right contains value or pointer to value
54*1ee2e5faSnakanon  *	unary (STAR, PLUS, QUEST): left is child, right is null
55*1ee2e5faSnakanon  *	binary (CAT, OR): left and right are children
56*1ee2e5faSnakanon  *	parent contains pointer to parent
577c478bd9Sstevel@tonic-gate  */
587c478bd9Sstevel@tonic-gate 
59*1ee2e5faSnakanon int	setvec[MAXLIN];
60*1ee2e5faSnakanon int	tmpset[MAXLIN];
61*1ee2e5faSnakanon Node	*point[MAXLIN];
627c478bd9Sstevel@tonic-gate 
637c478bd9Sstevel@tonic-gate int	rtok;		/* next token in current re */
647c478bd9Sstevel@tonic-gate int	rlxval;
657c478bd9Sstevel@tonic-gate uchar	*rlxstr;
667c478bd9Sstevel@tonic-gate uchar	*prestr;	/* current position in current re */
677c478bd9Sstevel@tonic-gate uchar	*lastre;	/* origin of last re */
687c478bd9Sstevel@tonic-gate 
697c478bd9Sstevel@tonic-gate static	int setcnt;
707c478bd9Sstevel@tonic-gate static	int poscnt;
717c478bd9Sstevel@tonic-gate 
727c478bd9Sstevel@tonic-gate uchar	*patbeg;
737c478bd9Sstevel@tonic-gate int	patlen;
747c478bd9Sstevel@tonic-gate 
757c478bd9Sstevel@tonic-gate #define	NFA	20	/* cache this many dynamic fa's */
767c478bd9Sstevel@tonic-gate fa	*fatab[NFA];
777c478bd9Sstevel@tonic-gate int	nfatab	= 0;	/* entries in fatab */
787c478bd9Sstevel@tonic-gate 
79*1ee2e5faSnakanon static fa	*mkdfa(uchar *, int);
80*1ee2e5faSnakanon static int	makeinit(fa *, int);
81*1ee2e5faSnakanon static void	penter(Node *);
82*1ee2e5faSnakanon static void	freetr(Node *);
83*1ee2e5faSnakanon static void	overflo(char *);
84*1ee2e5faSnakanon static void	cfoll(fa *, Node *);
85*1ee2e5faSnakanon static void	follow(Node *);
86*1ee2e5faSnakanon static Node	*reparse(uchar *);
87*1ee2e5faSnakanon static int	relex(void);
88*1ee2e5faSnakanon static void	freefa(fa *);
89*1ee2e5faSnakanon static int	cgoto(fa *, int, int);
90*1ee2e5faSnakanon 
91*1ee2e5faSnakanon fa *
92*1ee2e5faSnakanon makedfa(uchar *s, int anchor)	/* returns dfa for reg expr s */
937c478bd9Sstevel@tonic-gate {
947c478bd9Sstevel@tonic-gate 	int i, use, nuse;
957c478bd9Sstevel@tonic-gate 	fa *pfa;
967c478bd9Sstevel@tonic-gate 
977c478bd9Sstevel@tonic-gate 	if (compile_time)	/* a constant for sure */
98*1ee2e5faSnakanon 		return (mkdfa(s, anchor));
99*1ee2e5faSnakanon 	for (i = 0; i < nfatab; i++) {	/* is it there already? */
100*1ee2e5faSnakanon 		if (fatab[i]->anchor == anchor &&
101*1ee2e5faSnakanon 		    strcmp((char *)fatab[i]->restr, (char *)s) == 0) {
1027c478bd9Sstevel@tonic-gate 			fatab[i]->use++;
103*1ee2e5faSnakanon 			return (fatab[i]);
104*1ee2e5faSnakanon 		}
1057c478bd9Sstevel@tonic-gate 	}
1067c478bd9Sstevel@tonic-gate 	pfa = mkdfa(s, anchor);
1077c478bd9Sstevel@tonic-gate 	if (nfatab < NFA) {	/* room for another */
1087c478bd9Sstevel@tonic-gate 		fatab[nfatab] = pfa;
1097c478bd9Sstevel@tonic-gate 		fatab[nfatab]->use = 1;
1107c478bd9Sstevel@tonic-gate 		nfatab++;
111*1ee2e5faSnakanon 		return (pfa);
1127c478bd9Sstevel@tonic-gate 	}
1137c478bd9Sstevel@tonic-gate 	use = fatab[0]->use;	/* replace least-recently used */
1147c478bd9Sstevel@tonic-gate 	nuse = 0;
1157c478bd9Sstevel@tonic-gate 	for (i = 1; i < nfatab; i++)
1167c478bd9Sstevel@tonic-gate 		if (fatab[i]->use < use) {
1177c478bd9Sstevel@tonic-gate 			use = fatab[i]->use;
1187c478bd9Sstevel@tonic-gate 			nuse = i;
1197c478bd9Sstevel@tonic-gate 		}
1207c478bd9Sstevel@tonic-gate 	freefa(fatab[nuse]);
1217c478bd9Sstevel@tonic-gate 	fatab[nuse] = pfa;
1227c478bd9Sstevel@tonic-gate 	pfa->use = 1;
123*1ee2e5faSnakanon 	return (pfa);
1247c478bd9Sstevel@tonic-gate }
1257c478bd9Sstevel@tonic-gate 
126*1ee2e5faSnakanon fa *
127*1ee2e5faSnakanon mkdfa(uchar *s, int anchor)	/* does the real work of making a dfa */
128*1ee2e5faSnakanon 	/* anchor = 1 for anchored matches, else 0 */
1297c478bd9Sstevel@tonic-gate {
130*1ee2e5faSnakanon 	Node *p, *p1;
1317c478bd9Sstevel@tonic-gate 	fa *f;
1327c478bd9Sstevel@tonic-gate 
1337c478bd9Sstevel@tonic-gate 	p = reparse(s);
1347c478bd9Sstevel@tonic-gate 	p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
1357c478bd9Sstevel@tonic-gate 		/* put ALL STAR in front of reg.  exp. */
1367c478bd9Sstevel@tonic-gate 	p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
1377c478bd9Sstevel@tonic-gate 		/* put FINAL after reg.  exp. */
1387c478bd9Sstevel@tonic-gate 
1397c478bd9Sstevel@tonic-gate 	poscnt = 0;
1407c478bd9Sstevel@tonic-gate 	penter(p1);	/* enter parent pointers and leaf indices */
1417c478bd9Sstevel@tonic-gate 	if ((f = (fa *)calloc(1, sizeof (fa) + poscnt * sizeof (rrow))) == NULL)
1427c478bd9Sstevel@tonic-gate 		overflo("no room for fa");
143*1ee2e5faSnakanon 	/* penter has computed number of positions in re */
144*1ee2e5faSnakanon 	f->accept = poscnt-1;
1457c478bd9Sstevel@tonic-gate 	cfoll(f, p1);	/* set up follow sets */
1467c478bd9Sstevel@tonic-gate 	freetr(p1);
147*1ee2e5faSnakanon 	if ((f->posns[0] =
148*1ee2e5faSnakanon 	    (int *)calloc(1, *(f->re[0].lfollow) * sizeof (int))) == NULL) {
1497c478bd9Sstevel@tonic-gate 			overflo("out of space in makedfa");
150*1ee2e5faSnakanon 	}
1517c478bd9Sstevel@tonic-gate 	if ((f->posns[1] = (int *)calloc(1, sizeof (int))) == NULL)
1527c478bd9Sstevel@tonic-gate 		overflo("out of space in makedfa");
1537c478bd9Sstevel@tonic-gate 	*f->posns[1] = 0;
1547c478bd9Sstevel@tonic-gate 	f->initstat = makeinit(f, anchor);
1557c478bd9Sstevel@tonic-gate 	f->anchor = anchor;
1567c478bd9Sstevel@tonic-gate 	f->restr = tostring(s);
157*1ee2e5faSnakanon 	return (f);
1587c478bd9Sstevel@tonic-gate }
1597c478bd9Sstevel@tonic-gate 
160*1ee2e5faSnakanon static int
161*1ee2e5faSnakanon makeinit(fa *f, int anchor)
1627c478bd9Sstevel@tonic-gate {
1637c478bd9Sstevel@tonic-gate 	register int i, k;
1647c478bd9Sstevel@tonic-gate 
1657c478bd9Sstevel@tonic-gate 	f->curstat = 2;
1667c478bd9Sstevel@tonic-gate 	f->out[2] = 0;
1677c478bd9Sstevel@tonic-gate 	f->reset = 0;
1687c478bd9Sstevel@tonic-gate 	k = *(f->re[0].lfollow);
1697c478bd9Sstevel@tonic-gate 	xfree(f->posns[2]);
1707c478bd9Sstevel@tonic-gate 	if ((f->posns[2] = (int *)calloc(1, (k+1) * sizeof (int))) == NULL)
1717c478bd9Sstevel@tonic-gate 		overflo("out of space in makeinit");
1727c478bd9Sstevel@tonic-gate 	for (i = 0; i <= k; i++) {
1737c478bd9Sstevel@tonic-gate 		(f->posns[2])[i] = (f->re[0].lfollow)[i];
1747c478bd9Sstevel@tonic-gate 	}
1757c478bd9Sstevel@tonic-gate 	if ((f->posns[2])[1] == f->accept)
1767c478bd9Sstevel@tonic-gate 		f->out[2] = 1;
1777c478bd9Sstevel@tonic-gate 	for (i = 0; i < NCHARS; i++)
1787c478bd9Sstevel@tonic-gate 		f->gototab[2][i] = 0;
1797c478bd9Sstevel@tonic-gate 	f->curstat = cgoto(f, 2, HAT);
1807c478bd9Sstevel@tonic-gate 	if (anchor) {
1817c478bd9Sstevel@tonic-gate 		*f->posns[2] = k-1;	/* leave out position 0 */
1827c478bd9Sstevel@tonic-gate 		for (i = 0; i < k; i++) {
1837c478bd9Sstevel@tonic-gate 			(f->posns[0])[i] = (f->posns[2])[i];
1847c478bd9Sstevel@tonic-gate 		}
1857c478bd9Sstevel@tonic-gate 
1867c478bd9Sstevel@tonic-gate 		f->out[0] = f->out[2];
1877c478bd9Sstevel@tonic-gate 		if (f->curstat != 2)
1887c478bd9Sstevel@tonic-gate 			--(*f->posns[f->curstat]);
1897c478bd9Sstevel@tonic-gate 	}
190*1ee2e5faSnakanon 	return (f->curstat);
1917c478bd9Sstevel@tonic-gate }
1927c478bd9Sstevel@tonic-gate 
193*1ee2e5faSnakanon void
194*1ee2e5faSnakanon penter(Node *p)	/* set up parent pointers and leaf indices */
1957c478bd9Sstevel@tonic-gate {
1967c478bd9Sstevel@tonic-gate 	switch (type(p)) {
1977c478bd9Sstevel@tonic-gate 	LEAF
1987c478bd9Sstevel@tonic-gate 		left(p) = (Node *) poscnt;
1997c478bd9Sstevel@tonic-gate 		point[poscnt++] = p;
2007c478bd9Sstevel@tonic-gate 		break;
2017c478bd9Sstevel@tonic-gate 	UNARY
2027c478bd9Sstevel@tonic-gate 		penter(left(p));
2037c478bd9Sstevel@tonic-gate 		parent(left(p)) = p;
2047c478bd9Sstevel@tonic-gate 		break;
2057c478bd9Sstevel@tonic-gate 	case CAT:
2067c478bd9Sstevel@tonic-gate 	case OR:
2077c478bd9Sstevel@tonic-gate 		penter(left(p));
2087c478bd9Sstevel@tonic-gate 		penter(right(p));
2097c478bd9Sstevel@tonic-gate 		parent(left(p)) = p;
2107c478bd9Sstevel@tonic-gate 		parent(right(p)) = p;
2117c478bd9Sstevel@tonic-gate 		break;
2127c478bd9Sstevel@tonic-gate 	default:
2137c478bd9Sstevel@tonic-gate 		ERROR "unknown type %d in penter", type(p) FATAL;
2147c478bd9Sstevel@tonic-gate 		break;
2157c478bd9Sstevel@tonic-gate 	}
2167c478bd9Sstevel@tonic-gate }
2177c478bd9Sstevel@tonic-gate 
218*1ee2e5faSnakanon static void
219*1ee2e5faSnakanon freetr(Node *p)	/* free parse tree */
2207c478bd9Sstevel@tonic-gate {
2217c478bd9Sstevel@tonic-gate 	switch (type(p)) {
2227c478bd9Sstevel@tonic-gate 	LEAF
2237c478bd9Sstevel@tonic-gate 		xfree(p);
2247c478bd9Sstevel@tonic-gate 		break;
2257c478bd9Sstevel@tonic-gate 	UNARY
2267c478bd9Sstevel@tonic-gate 		freetr(left(p));
2277c478bd9Sstevel@tonic-gate 		xfree(p);
2287c478bd9Sstevel@tonic-gate 		break;
2297c478bd9Sstevel@tonic-gate 	case CAT:
2307c478bd9Sstevel@tonic-gate 	case OR:
2317c478bd9Sstevel@tonic-gate 		freetr(left(p));
2327c478bd9Sstevel@tonic-gate 		freetr(right(p));
2337c478bd9Sstevel@tonic-gate 		xfree(p);
2347c478bd9Sstevel@tonic-gate 		break;
2357c478bd9Sstevel@tonic-gate 	default:
2367c478bd9Sstevel@tonic-gate 		ERROR "unknown type %d in freetr", type(p) FATAL;
2377c478bd9Sstevel@tonic-gate 		break;
2387c478bd9Sstevel@tonic-gate 	}
2397c478bd9Sstevel@tonic-gate }
2407c478bd9Sstevel@tonic-gate 
241*1ee2e5faSnakanon uchar *
242*1ee2e5faSnakanon cclenter(uchar *p)
2437c478bd9Sstevel@tonic-gate {
2447c478bd9Sstevel@tonic-gate 	register int i, c;
245*1ee2e5faSnakanon 	uchar *op, *chars, *ret;
246*1ee2e5faSnakanon 	size_t	bsize;
2477c478bd9Sstevel@tonic-gate 
248*1ee2e5faSnakanon 	init_buf(&chars, &bsize, LINE_INCR);
2497c478bd9Sstevel@tonic-gate 	op = p;
2507c478bd9Sstevel@tonic-gate 	i = 0;
2517c478bd9Sstevel@tonic-gate 	while ((c = *p++) != 0) {
2527c478bd9Sstevel@tonic-gate 		if (c == '\\') {
2537c478bd9Sstevel@tonic-gate 			if ((c = *p++) == 't')
2547c478bd9Sstevel@tonic-gate 				c = '\t';
2557c478bd9Sstevel@tonic-gate 			else if (c == 'n')
2567c478bd9Sstevel@tonic-gate 				c = '\n';
2577c478bd9Sstevel@tonic-gate 			else if (c == 'f')
2587c478bd9Sstevel@tonic-gate 				c = '\f';
2597c478bd9Sstevel@tonic-gate 			else if (c == 'r')
2607c478bd9Sstevel@tonic-gate 				c = '\r';
2617c478bd9Sstevel@tonic-gate 			else if (c == 'b')
2627c478bd9Sstevel@tonic-gate 				c = '\b';
2637c478bd9Sstevel@tonic-gate 			else if (c == '\\')
2647c478bd9Sstevel@tonic-gate 				c = '\\';
2657c478bd9Sstevel@tonic-gate 			else if (isdigit(c)) {
2667c478bd9Sstevel@tonic-gate 				int n = c - '0';
2677c478bd9Sstevel@tonic-gate 				if (isdigit(*p)) {
2687c478bd9Sstevel@tonic-gate 					n = 8 * n + *p++ - '0';
2697c478bd9Sstevel@tonic-gate 					if (isdigit(*p))
2707c478bd9Sstevel@tonic-gate 						n = 8 * n + *p++ - '0';
2717c478bd9Sstevel@tonic-gate 				}
2727c478bd9Sstevel@tonic-gate 				c = n;
2737c478bd9Sstevel@tonic-gate 			} /* else */
2747c478bd9Sstevel@tonic-gate 				/* c = c; */
2757c478bd9Sstevel@tonic-gate 		} else if (c == '-' && i > 0 && chars[i-1] != 0) {
2767c478bd9Sstevel@tonic-gate 			if (*p != 0) {
2777c478bd9Sstevel@tonic-gate 				c = chars[i-1];
2787c478bd9Sstevel@tonic-gate 				while ((uchar)c < *p) {	/* fails if *p is \\ */
279*1ee2e5faSnakanon 					expand_buf(&chars, &bsize, i);
2807c478bd9Sstevel@tonic-gate 					chars[i++] = ++c;
2817c478bd9Sstevel@tonic-gate 				}
2827c478bd9Sstevel@tonic-gate 				p++;
2837c478bd9Sstevel@tonic-gate 				continue;
2847c478bd9Sstevel@tonic-gate 			}
2857c478bd9Sstevel@tonic-gate 		}
286*1ee2e5faSnakanon 		expand_buf(&chars, &bsize, i);
2877c478bd9Sstevel@tonic-gate 		chars[i++] = c;
2887c478bd9Sstevel@tonic-gate 	}
2897c478bd9Sstevel@tonic-gate 	chars[i++] = '\0';
2907c478bd9Sstevel@tonic-gate 	dprintf(("cclenter: in = |%s|, out = |%s|\n", op, chars));
2917c478bd9Sstevel@tonic-gate 	xfree(op);
292*1ee2e5faSnakanon 	ret = tostring(chars);
293*1ee2e5faSnakanon 	free(chars);
294*1ee2e5faSnakanon 	return (ret);
2957c478bd9Sstevel@tonic-gate }
2967c478bd9Sstevel@tonic-gate 
297*1ee2e5faSnakanon static void
298*1ee2e5faSnakanon overflo(char *s)
2997c478bd9Sstevel@tonic-gate {
3007c478bd9Sstevel@tonic-gate 	ERROR "regular expression too big: %s", gettext((char *)s) FATAL;
3017c478bd9Sstevel@tonic-gate }
3027c478bd9Sstevel@tonic-gate 
303*1ee2e5faSnakanon /* enter follow set of each leaf of vertex v into lfollow[leaf] */
304*1ee2e5faSnakanon static void
305*1ee2e5faSnakanon cfoll(fa *f, Node *v)
3067c478bd9Sstevel@tonic-gate {
3077c478bd9Sstevel@tonic-gate 	register int i;
3087c478bd9Sstevel@tonic-gate 	register int *p;
3097c478bd9Sstevel@tonic-gate 
3107c478bd9Sstevel@tonic-gate 	switch (type(v)) {
3117c478bd9Sstevel@tonic-gate 	LEAF
3127c478bd9Sstevel@tonic-gate 		f->re[(int)left(v)].ltype = type(v);
3137c478bd9Sstevel@tonic-gate 		f->re[(int)left(v)].lval = (int)right(v);
3147c478bd9Sstevel@tonic-gate 		for (i = 0; i <= f->accept; i++)
3157c478bd9Sstevel@tonic-gate 			setvec[i] = 0;
3167c478bd9Sstevel@tonic-gate 		setcnt = 0;
3177c478bd9Sstevel@tonic-gate 		follow(v);	/* computes setvec and setcnt */
3187c478bd9Sstevel@tonic-gate 		if ((p = (int *)calloc(1, (setcnt+1) * sizeof (int))) == NULL)
3197c478bd9Sstevel@tonic-gate 			overflo("follow set overflow");
3207c478bd9Sstevel@tonic-gate 		f->re[(int)left(v)].lfollow = p;
3217c478bd9Sstevel@tonic-gate 		*p = setcnt;
322*1ee2e5faSnakanon 		for (i = f->accept; i >= 0; i--) {
323*1ee2e5faSnakanon 			if (setvec[i] == 1)
324*1ee2e5faSnakanon 				*++p = i;
325*1ee2e5faSnakanon 		}
3267c478bd9Sstevel@tonic-gate 		break;
3277c478bd9Sstevel@tonic-gate 	UNARY
3287c478bd9Sstevel@tonic-gate 		cfoll(f, left(v));
3297c478bd9Sstevel@tonic-gate 		break;
3307c478bd9Sstevel@tonic-gate 	case CAT:
3317c478bd9Sstevel@tonic-gate 	case OR:
3327c478bd9Sstevel@tonic-gate 		cfoll(f, left(v));
3337c478bd9Sstevel@tonic-gate 		cfoll(f, right(v));
3347c478bd9Sstevel@tonic-gate 		break;
3357c478bd9Sstevel@tonic-gate 	default:
3367c478bd9Sstevel@tonic-gate 		ERROR "unknown type %d in cfoll", type(v) FATAL;
3377c478bd9Sstevel@tonic-gate 	}
3387c478bd9Sstevel@tonic-gate }
3397c478bd9Sstevel@tonic-gate 
340*1ee2e5faSnakanon /*
341*1ee2e5faSnakanon  * collects initially active leaves of p into setvec
342*1ee2e5faSnakanon  * returns 0 or 1 depending on whether p matches empty string
343*1ee2e5faSnakanon  */
344*1ee2e5faSnakanon static int
345*1ee2e5faSnakanon first(Node *p)
3467c478bd9Sstevel@tonic-gate {
3477c478bd9Sstevel@tonic-gate 	register int b;
3487c478bd9Sstevel@tonic-gate 
3497c478bd9Sstevel@tonic-gate 	switch (type(p)) {
3507c478bd9Sstevel@tonic-gate 	LEAF
3517c478bd9Sstevel@tonic-gate 		if (setvec[(int)left(p)] != 1) {
3527c478bd9Sstevel@tonic-gate 			setvec[(int)left(p)] = 1;
3537c478bd9Sstevel@tonic-gate 			setcnt++;
3547c478bd9Sstevel@tonic-gate 		}
3557c478bd9Sstevel@tonic-gate 		if (type(p) == CCL && (*(uchar *)right(p)) == '\0')
3567c478bd9Sstevel@tonic-gate 			return (0);		/* empty CCL */
357*1ee2e5faSnakanon 		else
358*1ee2e5faSnakanon 			return (1);
3597c478bd9Sstevel@tonic-gate 	case PLUS:
360*1ee2e5faSnakanon 		if (first(left(p)) == 0)
361*1ee2e5faSnakanon 			return (0);
3627c478bd9Sstevel@tonic-gate 		return (1);
3637c478bd9Sstevel@tonic-gate 	case STAR:
3647c478bd9Sstevel@tonic-gate 	case QUEST:
365*1ee2e5faSnakanon 		(void) first(left(p));
3667c478bd9Sstevel@tonic-gate 		return (0);
3677c478bd9Sstevel@tonic-gate 	case CAT:
368*1ee2e5faSnakanon 		if (first(left(p)) == 0 && first(right(p)) == 0)
369*1ee2e5faSnakanon 			return (0);
3707c478bd9Sstevel@tonic-gate 		return (1);
3717c478bd9Sstevel@tonic-gate 	case OR:
3727c478bd9Sstevel@tonic-gate 		b = first(right(p));
373*1ee2e5faSnakanon 		if (first(left(p)) == 0 || b == 0)
374*1ee2e5faSnakanon 			return (0);
3757c478bd9Sstevel@tonic-gate 		return (1);
3767c478bd9Sstevel@tonic-gate 	}
3777c478bd9Sstevel@tonic-gate 	ERROR "unknown type %d in first", type(p) FATAL;
3787c478bd9Sstevel@tonic-gate 	return (-1);
3797c478bd9Sstevel@tonic-gate }
3807c478bd9Sstevel@tonic-gate 
381*1ee2e5faSnakanon /* collects leaves that can follow v into setvec */
382*1ee2e5faSnakanon static void
383*1ee2e5faSnakanon follow(Node *v)
3847c478bd9Sstevel@tonic-gate {
3857c478bd9Sstevel@tonic-gate 	Node *p;
3867c478bd9Sstevel@tonic-gate 
3877c478bd9Sstevel@tonic-gate 	if (type(v) == FINAL)
3887c478bd9Sstevel@tonic-gate 		return;
3897c478bd9Sstevel@tonic-gate 	p = parent(v);
3907c478bd9Sstevel@tonic-gate 	switch (type(p)) {
3917c478bd9Sstevel@tonic-gate 	case STAR:
3927c478bd9Sstevel@tonic-gate 	case PLUS:
393*1ee2e5faSnakanon 		(void) first(v);
3947c478bd9Sstevel@tonic-gate 		follow(p);
3957c478bd9Sstevel@tonic-gate 		return;
3967c478bd9Sstevel@tonic-gate 
3977c478bd9Sstevel@tonic-gate 	case OR:
3987c478bd9Sstevel@tonic-gate 	case QUEST:
3997c478bd9Sstevel@tonic-gate 		follow(p);
4007c478bd9Sstevel@tonic-gate 		return;
4017c478bd9Sstevel@tonic-gate 
4027c478bd9Sstevel@tonic-gate 	case CAT:
4037c478bd9Sstevel@tonic-gate 		if (v == left(p)) {	/* v is left child of p */
4047c478bd9Sstevel@tonic-gate 			if (first(right(p)) == 0) {
4057c478bd9Sstevel@tonic-gate 				follow(p);
4067c478bd9Sstevel@tonic-gate 				return;
4077c478bd9Sstevel@tonic-gate 			}
408*1ee2e5faSnakanon 		} else		/* v is right child */
4097c478bd9Sstevel@tonic-gate 			follow(p);
4107c478bd9Sstevel@tonic-gate 		return;
4117c478bd9Sstevel@tonic-gate 	default:
4127c478bd9Sstevel@tonic-gate 		ERROR "unknown type %d in follow", type(p) FATAL;
4137c478bd9Sstevel@tonic-gate 		break;
4147c478bd9Sstevel@tonic-gate 	}
4157c478bd9Sstevel@tonic-gate }
4167c478bd9Sstevel@tonic-gate 
417*1ee2e5faSnakanon static int
418*1ee2e5faSnakanon member(uchar c, uchar *s)	/* is c in s? */
4197c478bd9Sstevel@tonic-gate {
4207c478bd9Sstevel@tonic-gate 	while (*s)
4217c478bd9Sstevel@tonic-gate 		if (c == *s++)
4227c478bd9Sstevel@tonic-gate 			return (1);
4237c478bd9Sstevel@tonic-gate 	return (0);
4247c478bd9Sstevel@tonic-gate }
4257c478bd9Sstevel@tonic-gate 
4267c478bd9Sstevel@tonic-gate 
427*1ee2e5faSnakanon int
428*1ee2e5faSnakanon match(fa *f, uchar *p)
4297c478bd9Sstevel@tonic-gate {
4307c478bd9Sstevel@tonic-gate 	register int s, ns;
4317c478bd9Sstevel@tonic-gate 
4327c478bd9Sstevel@tonic-gate 	s = f->reset ? makeinit(f, 0) : f->initstat;
4337c478bd9Sstevel@tonic-gate 	if (f->out[s])
4347c478bd9Sstevel@tonic-gate 		return (1);
4357c478bd9Sstevel@tonic-gate 	do {
436*1ee2e5faSnakanon 		if ((ns = f->gototab[s][*p]) != 0)
4377c478bd9Sstevel@tonic-gate 			s = ns;
4387c478bd9Sstevel@tonic-gate 		else
4397c478bd9Sstevel@tonic-gate 			s = cgoto(f, s, *p);
4407c478bd9Sstevel@tonic-gate 		if (f->out[s])
4417c478bd9Sstevel@tonic-gate 			return (1);
4427c478bd9Sstevel@tonic-gate 	} while (*p++ != 0);
4437c478bd9Sstevel@tonic-gate 	return (0);
4447c478bd9Sstevel@tonic-gate }
4457c478bd9Sstevel@tonic-gate 
446*1ee2e5faSnakanon int
447*1ee2e5faSnakanon pmatch(fa *f, uchar *p)
4487c478bd9Sstevel@tonic-gate {
4497c478bd9Sstevel@tonic-gate 	register int s, ns;
4507c478bd9Sstevel@tonic-gate 	register uchar *q;
4517c478bd9Sstevel@tonic-gate 	int i, k;
4527c478bd9Sstevel@tonic-gate 
4537c478bd9Sstevel@tonic-gate 	if (f->reset) {
4547c478bd9Sstevel@tonic-gate 		f->initstat = s = makeinit(f, 1);
4557c478bd9Sstevel@tonic-gate 	} else {
4567c478bd9Sstevel@tonic-gate 		s = f->initstat;
4577c478bd9Sstevel@tonic-gate 	}
4587c478bd9Sstevel@tonic-gate 	patbeg = p;
4597c478bd9Sstevel@tonic-gate 	patlen = -1;
4607c478bd9Sstevel@tonic-gate 	do {
4617c478bd9Sstevel@tonic-gate 		q = p;
4627c478bd9Sstevel@tonic-gate 		do {
4637c478bd9Sstevel@tonic-gate 			if (f->out[s])		/* final state */
4647c478bd9Sstevel@tonic-gate 				patlen = q-p;
465*1ee2e5faSnakanon 			if ((ns = f->gototab[s][*q]) != 0)
4667c478bd9Sstevel@tonic-gate 				s = ns;
4677c478bd9Sstevel@tonic-gate 			else
4687c478bd9Sstevel@tonic-gate 				s = cgoto(f, s, *q);
469*1ee2e5faSnakanon 			if (s == 1) {	/* no transition */
4707c478bd9Sstevel@tonic-gate 				if (patlen >= 0) {
4717c478bd9Sstevel@tonic-gate 					patbeg = p;
4727c478bd9Sstevel@tonic-gate 					return (1);
473*1ee2e5faSnakanon 				} else
4747c478bd9Sstevel@tonic-gate 					goto nextin;	/* no match */
475*1ee2e5faSnakanon 			}
4767c478bd9Sstevel@tonic-gate 		} while (*q++ != 0);
4777c478bd9Sstevel@tonic-gate 		if (f->out[s])
4787c478bd9Sstevel@tonic-gate 			patlen = q - p - 1;	/* don't count $ */
4797c478bd9Sstevel@tonic-gate 		if (patlen >= 0) {
4807c478bd9Sstevel@tonic-gate 			patbeg = p;
4817c478bd9Sstevel@tonic-gate 			return (1);
4827c478bd9Sstevel@tonic-gate 		}
4837c478bd9Sstevel@tonic-gate 	nextin:
4847c478bd9Sstevel@tonic-gate 		s = 2;
4857c478bd9Sstevel@tonic-gate 		if (f->reset) {
4867c478bd9Sstevel@tonic-gate 			for (i = 2; i <= f->curstat; i++)
4877c478bd9Sstevel@tonic-gate 				xfree(f->posns[i]);
4887c478bd9Sstevel@tonic-gate 			k = *f->posns[0];
489*1ee2e5faSnakanon 			if ((f->posns[2] =
490*1ee2e5faSnakanon 			    (int *)calloc(1, (k + 1) * sizeof (int))) == NULL) {
4917c478bd9Sstevel@tonic-gate 				overflo("out of space in pmatch");
492*1ee2e5faSnakanon 			}
4937c478bd9Sstevel@tonic-gate 			for (i = 0; i <= k; i++)
4947c478bd9Sstevel@tonic-gate 				(f->posns[2])[i] = (f->posns[0])[i];
4957c478bd9Sstevel@tonic-gate 			f->initstat = f->curstat = 2;
4967c478bd9Sstevel@tonic-gate 			f->out[2] = f->out[0];
4977c478bd9Sstevel@tonic-gate 			for (i = 0; i < NCHARS; i++)
4987c478bd9Sstevel@tonic-gate 				f->gototab[2][i] = 0;
4997c478bd9Sstevel@tonic-gate 		}
5007c478bd9Sstevel@tonic-gate 	} while (*p++ != 0);
5017c478bd9Sstevel@tonic-gate 	return (0);
5027c478bd9Sstevel@tonic-gate }
5037c478bd9Sstevel@tonic-gate 
504*1ee2e5faSnakanon int
505*1ee2e5faSnakanon nematch(fa *f, uchar *p)
5067c478bd9Sstevel@tonic-gate {
5077c478bd9Sstevel@tonic-gate 	register int s, ns;
5087c478bd9Sstevel@tonic-gate 	register uchar *q;
5097c478bd9Sstevel@tonic-gate 	int i, k;
5107c478bd9Sstevel@tonic-gate 
5117c478bd9Sstevel@tonic-gate 	if (f->reset) {
5127c478bd9Sstevel@tonic-gate 		f->initstat = s = makeinit(f, 1);
5137c478bd9Sstevel@tonic-gate 	} else {
5147c478bd9Sstevel@tonic-gate 		s = f->initstat;
5157c478bd9Sstevel@tonic-gate 	}
5167c478bd9Sstevel@tonic-gate 	patlen = -1;
5177c478bd9Sstevel@tonic-gate 	while (*p) {
5187c478bd9Sstevel@tonic-gate 		q = p;
5197c478bd9Sstevel@tonic-gate 		do {
5207c478bd9Sstevel@tonic-gate 			if (f->out[s])		/* final state */
5217c478bd9Sstevel@tonic-gate 				patlen = q-p;
522*1ee2e5faSnakanon 			if ((ns = f->gototab[s][*q]) != 0)
5237c478bd9Sstevel@tonic-gate 				s = ns;
5247c478bd9Sstevel@tonic-gate 			else
5257c478bd9Sstevel@tonic-gate 				s = cgoto(f, s, *q);
526*1ee2e5faSnakanon 			if (s == 1) {	/* no transition */
5277c478bd9Sstevel@tonic-gate 				if (patlen > 0) {
5287c478bd9Sstevel@tonic-gate 					patbeg = p;
5297c478bd9Sstevel@tonic-gate 					return (1);
530*1ee2e5faSnakanon 				} else
5317c478bd9Sstevel@tonic-gate 					goto nnextin;	/* no nonempty match */
532*1ee2e5faSnakanon 			}
5337c478bd9Sstevel@tonic-gate 		} while (*q++ != 0);
5347c478bd9Sstevel@tonic-gate 		if (f->out[s])
5357c478bd9Sstevel@tonic-gate 			patlen = q - p - 1;	/* don't count $ */
5367c478bd9Sstevel@tonic-gate 		if (patlen > 0) {
5377c478bd9Sstevel@tonic-gate 			patbeg = p;
5387c478bd9Sstevel@tonic-gate 			return (1);
5397c478bd9Sstevel@tonic-gate 		}
5407c478bd9Sstevel@tonic-gate 	nnextin:
5417c478bd9Sstevel@tonic-gate 		s = 2;
5427c478bd9Sstevel@tonic-gate 		if (f->reset) {
5437c478bd9Sstevel@tonic-gate 			for (i = 2; i <= f->curstat; i++)
5447c478bd9Sstevel@tonic-gate 				xfree(f->posns[i]);
5457c478bd9Sstevel@tonic-gate 			k = *f->posns[0];
546*1ee2e5faSnakanon 			if ((f->posns[2] =
547*1ee2e5faSnakanon 			    (int *)calloc(1, (k + 1) * sizeof (int))) == NULL) {
5487c478bd9Sstevel@tonic-gate 				overflo("out of state space");
549*1ee2e5faSnakanon 			}
5507c478bd9Sstevel@tonic-gate 			for (i = 0; i <= k; i++)
5517c478bd9Sstevel@tonic-gate 				(f->posns[2])[i] = (f->posns[0])[i];
5527c478bd9Sstevel@tonic-gate 			f->initstat = f->curstat = 2;
5537c478bd9Sstevel@tonic-gate 			f->out[2] = f->out[0];
5547c478bd9Sstevel@tonic-gate 			for (i = 0; i < NCHARS; i++)
5557c478bd9Sstevel@tonic-gate 				f->gototab[2][i] = 0;
5567c478bd9Sstevel@tonic-gate 		}
5577c478bd9Sstevel@tonic-gate 		p++;
5587c478bd9Sstevel@tonic-gate 	}
5597c478bd9Sstevel@tonic-gate 	return (0);
5607c478bd9Sstevel@tonic-gate }
5617c478bd9Sstevel@tonic-gate 
562*1ee2e5faSnakanon static Node *regexp(void), *primary(void), *concat(Node *);
563*1ee2e5faSnakanon static Node *alt(Node *), *unary(Node *);
5647c478bd9Sstevel@tonic-gate 
565*1ee2e5faSnakanon static Node *
566*1ee2e5faSnakanon reparse(uchar *p)
5677c478bd9Sstevel@tonic-gate {
5687c478bd9Sstevel@tonic-gate 	/* parses regular expression pointed to by p */
5697c478bd9Sstevel@tonic-gate 	/* uses relex() to scan regular expression */
5707c478bd9Sstevel@tonic-gate 	Node *np;
5717c478bd9Sstevel@tonic-gate 
5727c478bd9Sstevel@tonic-gate 	dprintf(("reparse <%s>\n", p));
5737c478bd9Sstevel@tonic-gate 	lastre = prestr = p;	/* prestr points to string to be parsed */
5747c478bd9Sstevel@tonic-gate 	rtok = relex();
5757c478bd9Sstevel@tonic-gate 	if (rtok == '\0')
5767c478bd9Sstevel@tonic-gate 		ERROR "empty regular expression" FATAL;
5777c478bd9Sstevel@tonic-gate 	np = regexp();
578*1ee2e5faSnakanon 	if (rtok == '\0') {
5797c478bd9Sstevel@tonic-gate 		return (np);
580*1ee2e5faSnakanon 	} else {
581*1ee2e5faSnakanon 		ERROR "syntax error in regular expression %s at %s",
582*1ee2e5faSnakanon 		    lastre, prestr FATAL;
583*1ee2e5faSnakanon 	}
5847c478bd9Sstevel@tonic-gate 	/*NOTREACHED*/
585*1ee2e5faSnakanon 	return (NULL);
5867c478bd9Sstevel@tonic-gate }
5877c478bd9Sstevel@tonic-gate 
588*1ee2e5faSnakanon static Node *
589*1ee2e5faSnakanon regexp(void)
5907c478bd9Sstevel@tonic-gate {
5917c478bd9Sstevel@tonic-gate 	return (alt(concat(primary())));
5927c478bd9Sstevel@tonic-gate }
5937c478bd9Sstevel@tonic-gate 
594*1ee2e5faSnakanon static Node *
595*1ee2e5faSnakanon primary(void)
5967c478bd9Sstevel@tonic-gate {
5977c478bd9Sstevel@tonic-gate 	Node *np;
5987c478bd9Sstevel@tonic-gate 
5997c478bd9Sstevel@tonic-gate 	switch (rtok) {
6007c478bd9Sstevel@tonic-gate 	case CHAR:
6017c478bd9Sstevel@tonic-gate 		np = op2(CHAR, NIL, (Node *)rlxval);
6027c478bd9Sstevel@tonic-gate 		rtok = relex();
6037c478bd9Sstevel@tonic-gate 		return (unary(np));
6047c478bd9Sstevel@tonic-gate 	case ALL:
6057c478bd9Sstevel@tonic-gate 		rtok = relex();
6067c478bd9Sstevel@tonic-gate 		return (unary(op2(ALL, NIL, NIL)));
6077c478bd9Sstevel@tonic-gate 	case DOT:
6087c478bd9Sstevel@tonic-gate 		rtok = relex();
6097c478bd9Sstevel@tonic-gate 		return (unary(op2(DOT, NIL, NIL)));
6107c478bd9Sstevel@tonic-gate 	case CCL:
611*1ee2e5faSnakanon 		/*LINTED align*/
6127c478bd9Sstevel@tonic-gate 		np = op2(CCL, NIL, (Node *)cclenter(rlxstr));
6137c478bd9Sstevel@tonic-gate 		rtok = relex();
6147c478bd9Sstevel@tonic-gate 		return (unary(np));
6157c478bd9Sstevel@tonic-gate 	case NCCL:
616*1ee2e5faSnakanon 		/*LINTED align*/
6177c478bd9Sstevel@tonic-gate 		np = op2(NCCL, NIL, (Node *)cclenter(rlxstr));
6187c478bd9Sstevel@tonic-gate 		rtok = relex();
6197c478bd9Sstevel@tonic-gate 		return (unary(np));
6207c478bd9Sstevel@tonic-gate 	case '^':
6217c478bd9Sstevel@tonic-gate 		rtok = relex();
6227c478bd9Sstevel@tonic-gate 		return (unary(op2(CHAR, NIL, (Node *)HAT)));
6237c478bd9Sstevel@tonic-gate 	case '$':
6247c478bd9Sstevel@tonic-gate 		rtok = relex();
6257c478bd9Sstevel@tonic-gate 		return (unary(op2(CHAR, NIL, NIL)));
6267c478bd9Sstevel@tonic-gate 	case '(':
6277c478bd9Sstevel@tonic-gate 		rtok = relex();
6287c478bd9Sstevel@tonic-gate 		if (rtok == ')') {	/* special pleading for () */
6297c478bd9Sstevel@tonic-gate 			rtok = relex();
630*1ee2e5faSnakanon 			return (unary(op2(CCL, NIL,
631*1ee2e5faSnakanon 			    /*LINTED align*/
632*1ee2e5faSnakanon 			    (Node *)tostring((uchar *)""))));
6337c478bd9Sstevel@tonic-gate 		}
6347c478bd9Sstevel@tonic-gate 		np = regexp();
6357c478bd9Sstevel@tonic-gate 		if (rtok == ')') {
6367c478bd9Sstevel@tonic-gate 			rtok = relex();
6377c478bd9Sstevel@tonic-gate 			return (unary(np));
638*1ee2e5faSnakanon 		} else {
639*1ee2e5faSnakanon 			ERROR "syntax error in regular expression %s at %s",
640*1ee2e5faSnakanon 			    lastre, prestr FATAL;
6417c478bd9Sstevel@tonic-gate 		}
6427c478bd9Sstevel@tonic-gate 	default:
643*1ee2e5faSnakanon 		ERROR "illegal primary in regular expression %s at %s",
644*1ee2e5faSnakanon 		    lastre, prestr FATAL;
6457c478bd9Sstevel@tonic-gate 	}
6467c478bd9Sstevel@tonic-gate 	/*NOTREACHED*/
647*1ee2e5faSnakanon 	return (NULL);
6487c478bd9Sstevel@tonic-gate }
6497c478bd9Sstevel@tonic-gate 
650*1ee2e5faSnakanon static Node *
651*1ee2e5faSnakanon concat(Node *np)
6527c478bd9Sstevel@tonic-gate {
6537c478bd9Sstevel@tonic-gate 	switch (rtok) {
6547c478bd9Sstevel@tonic-gate 	case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
6557c478bd9Sstevel@tonic-gate 		return (concat(op2(CAT, np, primary())));
6567c478bd9Sstevel@tonic-gate 	default:
6577c478bd9Sstevel@tonic-gate 		return (np);
6587c478bd9Sstevel@tonic-gate 	}
6597c478bd9Sstevel@tonic-gate }
6607c478bd9Sstevel@tonic-gate 
661*1ee2e5faSnakanon static Node *
662*1ee2e5faSnakanon alt(Node *np)
6637c478bd9Sstevel@tonic-gate {
6647c478bd9Sstevel@tonic-gate 	if (rtok == OR) {
6657c478bd9Sstevel@tonic-gate 		rtok = relex();
6667c478bd9Sstevel@tonic-gate 		return (alt(op2(OR, np, concat(primary()))));
6677c478bd9Sstevel@tonic-gate 	}
6687c478bd9Sstevel@tonic-gate 	return (np);
6697c478bd9Sstevel@tonic-gate }
6707c478bd9Sstevel@tonic-gate 
671*1ee2e5faSnakanon static Node *
672*1ee2e5faSnakanon unary(Node *np)
6737c478bd9Sstevel@tonic-gate {
6747c478bd9Sstevel@tonic-gate 	switch (rtok) {
6757c478bd9Sstevel@tonic-gate 	case STAR:
6767c478bd9Sstevel@tonic-gate 		rtok = relex();
6777c478bd9Sstevel@tonic-gate 		return (unary(op2(STAR, np, NIL)));
6787c478bd9Sstevel@tonic-gate 	case PLUS:
6797c478bd9Sstevel@tonic-gate 		rtok = relex();
6807c478bd9Sstevel@tonic-gate 		return (unary(op2(PLUS, np, NIL)));
6817c478bd9Sstevel@tonic-gate 	case QUEST:
6827c478bd9Sstevel@tonic-gate 		rtok = relex();
6837c478bd9Sstevel@tonic-gate 		return (unary(op2(QUEST, np, NIL)));
6847c478bd9Sstevel@tonic-gate 	default:
6857c478bd9Sstevel@tonic-gate 		return (np);
6867c478bd9Sstevel@tonic-gate 	}
6877c478bd9Sstevel@tonic-gate }
6887c478bd9Sstevel@tonic-gate 
689*1ee2e5faSnakanon static int
690*1ee2e5faSnakanon relex(void)		/* lexical analyzer for reparse */
6917c478bd9Sstevel@tonic-gate {
6927c478bd9Sstevel@tonic-gate 	register int c;
693*1ee2e5faSnakanon 	uchar *cbuf;
6947c478bd9Sstevel@tonic-gate 	int clen, cflag;
6957c478bd9Sstevel@tonic-gate 
6967c478bd9Sstevel@tonic-gate 	switch (c = *prestr++) {
6977c478bd9Sstevel@tonic-gate 	case '|': return OR;
6987c478bd9Sstevel@tonic-gate 	case '*': return STAR;
6997c478bd9Sstevel@tonic-gate 	case '+': return PLUS;
7007c478bd9Sstevel@tonic-gate 	case '?': return QUEST;
7017c478bd9Sstevel@tonic-gate 	case '.': return DOT;
7027c478bd9Sstevel@tonic-gate 	case '\0': prestr--; return '\0';
7037c478bd9Sstevel@tonic-gate 	case '^':
7047c478bd9Sstevel@tonic-gate 	case '$':
7057c478bd9Sstevel@tonic-gate 	case '(':
7067c478bd9Sstevel@tonic-gate 	case ')':
707*1ee2e5faSnakanon 		return (c);
7087c478bd9Sstevel@tonic-gate 	case '\\':
7097c478bd9Sstevel@tonic-gate 		if ((c = *prestr++) == 't')
7107c478bd9Sstevel@tonic-gate 			c = '\t';
7117c478bd9Sstevel@tonic-gate 		else if (c == 'n')
7127c478bd9Sstevel@tonic-gate 			c = '\n';
7137c478bd9Sstevel@tonic-gate 		else if (c == 'f')
7147c478bd9Sstevel@tonic-gate 			c = '\f';
7157c478bd9Sstevel@tonic-gate 		else if (c == 'r')
7167c478bd9Sstevel@tonic-gate 			c = '\r';
7177c478bd9Sstevel@tonic-gate 		else if (c == 'b')
7187c478bd9Sstevel@tonic-gate 			c = '\b';
7197c478bd9Sstevel@tonic-gate 		else if (c == '\\')
7207c478bd9Sstevel@tonic-gate 			c = '\\';
7217c478bd9Sstevel@tonic-gate 		else if (isdigit(c)) {
7227c478bd9Sstevel@tonic-gate 			int n = c - '0';
7237c478bd9Sstevel@tonic-gate 			if (isdigit(*prestr)) {
7247c478bd9Sstevel@tonic-gate 				n = 8 * n + *prestr++ - '0';
7257c478bd9Sstevel@tonic-gate 				if (isdigit(*prestr))
7267c478bd9Sstevel@tonic-gate 					n = 8 * n + *prestr++ - '0';
7277c478bd9Sstevel@tonic-gate 			}
7287c478bd9Sstevel@tonic-gate 			c = n;
7297c478bd9Sstevel@tonic-gate 		} /* else it's now in c */
7307c478bd9Sstevel@tonic-gate 		rlxval = c;
731*1ee2e5faSnakanon 		return (CHAR);
7327c478bd9Sstevel@tonic-gate 	default:
7337c478bd9Sstevel@tonic-gate 		rlxval = c;
734*1ee2e5faSnakanon 		return (CHAR);
7357c478bd9Sstevel@tonic-gate 	case '[':
7367c478bd9Sstevel@tonic-gate 		clen = 0;
7377c478bd9Sstevel@tonic-gate 		if (*prestr == '^') {
7387c478bd9Sstevel@tonic-gate 			cflag = 1;
7397c478bd9Sstevel@tonic-gate 			prestr++;
740*1ee2e5faSnakanon 		} else
7417c478bd9Sstevel@tonic-gate 			cflag = 0;
742*1ee2e5faSnakanon 		init_buf(&cbuf, NULL, strlen((char *)prestr) * 2 + 1);
7437c478bd9Sstevel@tonic-gate 		for (;;) {
7447c478bd9Sstevel@tonic-gate 			if ((c = *prestr++) == '\\') {
7457c478bd9Sstevel@tonic-gate 				cbuf[clen++] = '\\';
746*1ee2e5faSnakanon 				if ((c = *prestr++) == '\0') {
747*1ee2e5faSnakanon 					ERROR
748*1ee2e5faSnakanon 			"nonterminated character class %s", lastre FATAL;
749*1ee2e5faSnakanon 				}
7507c478bd9Sstevel@tonic-gate 				cbuf[clen++] = c;
7517c478bd9Sstevel@tonic-gate 			} else if (c == ']') {
7527c478bd9Sstevel@tonic-gate 				cbuf[clen] = 0;
7537c478bd9Sstevel@tonic-gate 				rlxstr = tostring(cbuf);
754*1ee2e5faSnakanon 				free(cbuf);
7557c478bd9Sstevel@tonic-gate 				if (cflag == 0)
756*1ee2e5faSnakanon 					return (CCL);
7577c478bd9Sstevel@tonic-gate 				else
758*1ee2e5faSnakanon 					return (NCCL);
7597c478bd9Sstevel@tonic-gate 			} else if (c == '\n') {
760*1ee2e5faSnakanon 				ERROR "newline in character class %s...",
761*1ee2e5faSnakanon 				    lastre FATAL;
7627c478bd9Sstevel@tonic-gate 			} else if (c == '\0') {
763*1ee2e5faSnakanon 				ERROR "nonterminated character class %s",
764*1ee2e5faSnakanon 				    lastre FATAL;
7657c478bd9Sstevel@tonic-gate 			} else
7667c478bd9Sstevel@tonic-gate 				cbuf[clen++] = c;
7677c478bd9Sstevel@tonic-gate 		}
768*1ee2e5faSnakanon 		/*NOTREACHED*/
7697c478bd9Sstevel@tonic-gate 	}
770*1ee2e5faSnakanon 	return (0);
7717c478bd9Sstevel@tonic-gate }
7727c478bd9Sstevel@tonic-gate 
773*1ee2e5faSnakanon static int
774*1ee2e5faSnakanon cgoto(fa *f, int s, int c)
7757c478bd9Sstevel@tonic-gate {
7767c478bd9Sstevel@tonic-gate 	register int i, j, k;
7777c478bd9Sstevel@tonic-gate 	register int *p, *q;
7787c478bd9Sstevel@tonic-gate 
7797c478bd9Sstevel@tonic-gate 	for (i = 0; i <= f->accept; i++)
7807c478bd9Sstevel@tonic-gate 		setvec[i] = 0;
7817c478bd9Sstevel@tonic-gate 	setcnt = 0;
7827c478bd9Sstevel@tonic-gate 	/* compute positions of gototab[s,c] into setvec */
7837c478bd9Sstevel@tonic-gate 	p = f->posns[s];
7847c478bd9Sstevel@tonic-gate 	for (i = 1; i <= *p; i++) {
7857c478bd9Sstevel@tonic-gate 		if ((k = f->re[p[i]].ltype) != FINAL) {
786*1ee2e5faSnakanon 			if (k == CHAR && c == f->re[p[i]].lval ||
787*1ee2e5faSnakanon 			    k == DOT && c != 0 && c != HAT ||
788*1ee2e5faSnakanon 			    k == ALL && c != 0 ||
789*1ee2e5faSnakanon 			    k == CCL &&
790*1ee2e5faSnakanon 			    member(c, (uchar *)f->re[p[i]].lval) ||
791*1ee2e5faSnakanon 			    k == NCCL &&
792*1ee2e5faSnakanon 			    !member(c, (uchar *)f->re[p[i]].lval) &&
793*1ee2e5faSnakanon 			    c != 0 && c != HAT) {
7947c478bd9Sstevel@tonic-gate 				q = f->re[p[i]].lfollow;
7957c478bd9Sstevel@tonic-gate 				for (j = 1; j <= *q; j++) {
7967c478bd9Sstevel@tonic-gate 					if (setvec[q[j]] == 0) {
7977c478bd9Sstevel@tonic-gate 						setcnt++;
7987c478bd9Sstevel@tonic-gate 						setvec[q[j]] = 1;
7997c478bd9Sstevel@tonic-gate 					}
8007c478bd9Sstevel@tonic-gate 				}
8017c478bd9Sstevel@tonic-gate 			}
8027c478bd9Sstevel@tonic-gate 		}
8037c478bd9Sstevel@tonic-gate 	}
8047c478bd9Sstevel@tonic-gate 	/* determine if setvec is a previous state */
8057c478bd9Sstevel@tonic-gate 	tmpset[0] = setcnt;
8067c478bd9Sstevel@tonic-gate 	j = 1;
8077c478bd9Sstevel@tonic-gate 	for (i = f->accept; i >= 0; i--)
8087c478bd9Sstevel@tonic-gate 		if (setvec[i]) {
8097c478bd9Sstevel@tonic-gate 			tmpset[j++] = i;
8107c478bd9Sstevel@tonic-gate 		}
8117c478bd9Sstevel@tonic-gate 	/* tmpset == previous state? */
8127c478bd9Sstevel@tonic-gate 	for (i = 1; i <= f->curstat; i++) {
8137c478bd9Sstevel@tonic-gate 		p = f->posns[i];
8147c478bd9Sstevel@tonic-gate 		if ((k = tmpset[0]) != p[0])
8157c478bd9Sstevel@tonic-gate 			goto different;
8167c478bd9Sstevel@tonic-gate 		for (j = 1; j <= k; j++)
8177c478bd9Sstevel@tonic-gate 			if (tmpset[j] != p[j])
8187c478bd9Sstevel@tonic-gate 				goto different;
8197c478bd9Sstevel@tonic-gate 		/* setvec is state i */
8207c478bd9Sstevel@tonic-gate 		f->gototab[s][c] = i;
821*1ee2e5faSnakanon 		return (i);
8227c478bd9Sstevel@tonic-gate 	different:;
8237c478bd9Sstevel@tonic-gate 	}
8247c478bd9Sstevel@tonic-gate 
8257c478bd9Sstevel@tonic-gate 	/* add tmpset to current set of states */
8267c478bd9Sstevel@tonic-gate 	if (f->curstat >= NSTATES-1) {
8277c478bd9Sstevel@tonic-gate 		f->curstat = 2;
8287c478bd9Sstevel@tonic-gate 		f->reset = 1;
8297c478bd9Sstevel@tonic-gate 		for (i = 2; i < NSTATES; i++)
8307c478bd9Sstevel@tonic-gate 			xfree(f->posns[i]);
831*1ee2e5faSnakanon 	} else
8327c478bd9Sstevel@tonic-gate 		++(f->curstat);
8337c478bd9Sstevel@tonic-gate 	for (i = 0; i < NCHARS; i++)
8347c478bd9Sstevel@tonic-gate 		f->gototab[f->curstat][i] = 0;
8357c478bd9Sstevel@tonic-gate 	xfree(f->posns[f->curstat]);
8367c478bd9Sstevel@tonic-gate 	if ((p = (int *)calloc(1, (setcnt + 1) * sizeof (int))) == NULL)
8377c478bd9Sstevel@tonic-gate 		overflo("out of space in cgoto");
8387c478bd9Sstevel@tonic-gate 
8397c478bd9Sstevel@tonic-gate 	f->posns[f->curstat] = p;
8407c478bd9Sstevel@tonic-gate 	f->gototab[s][c] = f->curstat;
8417c478bd9Sstevel@tonic-gate 	for (i = 0; i <= setcnt; i++)
8427c478bd9Sstevel@tonic-gate 		p[i] = tmpset[i];
8437c478bd9Sstevel@tonic-gate 	if (setvec[f->accept])
8447c478bd9Sstevel@tonic-gate 		f->out[f->curstat] = 1;
8457c478bd9Sstevel@tonic-gate 	else
8467c478bd9Sstevel@tonic-gate 		f->out[f->curstat] = 0;
847*1ee2e5faSnakanon 	return (f->curstat);
8487c478bd9Sstevel@tonic-gate }
8497c478bd9Sstevel@tonic-gate 
850*1ee2e5faSnakanon static void
851*1ee2e5faSnakanon freefa(fa *f)
8527c478bd9Sstevel@tonic-gate {
8537c478bd9Sstevel@tonic-gate 
8547c478bd9Sstevel@tonic-gate 	register int i;
8557c478bd9Sstevel@tonic-gate 
8567c478bd9Sstevel@tonic-gate 	if (f == NULL)
8577c478bd9Sstevel@tonic-gate 		return;
8587c478bd9Sstevel@tonic-gate 	for (i = 0; i <= f->curstat; i++)
8597c478bd9Sstevel@tonic-gate 		xfree(f->posns[i]);
8607c478bd9Sstevel@tonic-gate 	for (i = 0; i <= f->accept; i++)
8617c478bd9Sstevel@tonic-gate 		xfree(f->re[i].lfollow);
8627c478bd9Sstevel@tonic-gate 	xfree(f->restr);
8637c478bd9Sstevel@tonic-gate 	xfree(f);
8647c478bd9Sstevel@tonic-gate }
865