xref: /titanic_51/usr/src/ucblib/libucb/port/gen/regex.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1*7c478bd9Sstevel@tonic-gate /*
2*7c478bd9Sstevel@tonic-gate  * CDDL HEADER START
3*7c478bd9Sstevel@tonic-gate  *
4*7c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*7c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*7c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*7c478bd9Sstevel@tonic-gate  * with the License.
8*7c478bd9Sstevel@tonic-gate  *
9*7c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*7c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*7c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*7c478bd9Sstevel@tonic-gate  * and limitations under the License.
13*7c478bd9Sstevel@tonic-gate  *
14*7c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*7c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*7c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*7c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*7c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*7c478bd9Sstevel@tonic-gate  *
20*7c478bd9Sstevel@tonic-gate  * CDDL HEADER END
21*7c478bd9Sstevel@tonic-gate  */
22*7c478bd9Sstevel@tonic-gate /*
23*7c478bd9Sstevel@tonic-gate  * Copyright (c) 1997, by Sun Microsystems, Inc.
24*7c478bd9Sstevel@tonic-gate  * All rights reserved.
25*7c478bd9Sstevel@tonic-gate  */
26*7c478bd9Sstevel@tonic-gate 
27*7c478bd9Sstevel@tonic-gate /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28*7c478bd9Sstevel@tonic-gate /*	  All Rights Reserved  	*/
29*7c478bd9Sstevel@tonic-gate 
30*7c478bd9Sstevel@tonic-gate 
31*7c478bd9Sstevel@tonic-gate /* 	Portions Copyright(c) 1988, Sun Microsystems Inc.	*/
32*7c478bd9Sstevel@tonic-gate /*	All Rights Reserved					*/
33*7c478bd9Sstevel@tonic-gate 
34*7c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"	/* SVr4.0 1.1	*/
35*7c478bd9Sstevel@tonic-gate 
36*7c478bd9Sstevel@tonic-gate /*LINTLIBRARY*/
37*7c478bd9Sstevel@tonic-gate 
38*7c478bd9Sstevel@tonic-gate /*
39*7c478bd9Sstevel@tonic-gate  * routines to do regular expression matching
40*7c478bd9Sstevel@tonic-gate  *
41*7c478bd9Sstevel@tonic-gate  * Entry points:
42*7c478bd9Sstevel@tonic-gate  *
43*7c478bd9Sstevel@tonic-gate  *	re_comp(s)
44*7c478bd9Sstevel@tonic-gate  *		char *s;
45*7c478bd9Sstevel@tonic-gate  *	 ... returns 0 if the string s was compiled successfully,
46*7c478bd9Sstevel@tonic-gate  *		     a pointer to an error message otherwise.
47*7c478bd9Sstevel@tonic-gate  *	     If passed 0 or a null string returns without changing
48*7c478bd9Sstevel@tonic-gate  *           the currently compiled re (see note 11 below).
49*7c478bd9Sstevel@tonic-gate  *
50*7c478bd9Sstevel@tonic-gate  *	re_exec(s)
51*7c478bd9Sstevel@tonic-gate  *		char *s;
52*7c478bd9Sstevel@tonic-gate  *	 ... returns 1 if the string s matches the last compiled regular
53*7c478bd9Sstevel@tonic-gate  *		       expression,
54*7c478bd9Sstevel@tonic-gate  *		     0 if the string s failed to match the last compiled
55*7c478bd9Sstevel@tonic-gate  *		       regular expression, and
56*7c478bd9Sstevel@tonic-gate  *		    -1 if the compiled regular expression was invalid
57*7c478bd9Sstevel@tonic-gate  *		       (indicating an internal error).
58*7c478bd9Sstevel@tonic-gate  *
59*7c478bd9Sstevel@tonic-gate  * The strings passed to both re_comp and re_exec may have trailing or
60*7c478bd9Sstevel@tonic-gate  * embedded newline characters; they are terminated by nulls.
61*7c478bd9Sstevel@tonic-gate  *
62*7c478bd9Sstevel@tonic-gate  * The identity of the author of these routines is lost in antiquity;
63*7c478bd9Sstevel@tonic-gate  * this is essentially the same as the re code in the original V6 ed.
64*7c478bd9Sstevel@tonic-gate  *
65*7c478bd9Sstevel@tonic-gate  * The regular expressions recognized are described below. This description
66*7c478bd9Sstevel@tonic-gate  * is essentially the same as that for ed.
67*7c478bd9Sstevel@tonic-gate  *
68*7c478bd9Sstevel@tonic-gate  *	A regular expression specifies a set of strings of characters.
69*7c478bd9Sstevel@tonic-gate  *	A member of this set of strings is said to be matched by
70*7c478bd9Sstevel@tonic-gate  *	the regular expression.  In the following specification for
71*7c478bd9Sstevel@tonic-gate  *	regular expressions the word `character' means any character but NUL.
72*7c478bd9Sstevel@tonic-gate  *
73*7c478bd9Sstevel@tonic-gate  *	1.  Any character except a special character matches itself.
74*7c478bd9Sstevel@tonic-gate  *	    Special characters are the regular expression delimiter plus
75*7c478bd9Sstevel@tonic-gate  *	    \ [ . and sometimes ^ * $.
76*7c478bd9Sstevel@tonic-gate  *	2.  A . matches any character.
77*7c478bd9Sstevel@tonic-gate  *	3.  A \ followed by any character except a digit or ( )
78*7c478bd9Sstevel@tonic-gate  *	    matches that character.
79*7c478bd9Sstevel@tonic-gate  *	4.  A nonempty string s bracketed [s] (or [^s]) matches any
80*7c478bd9Sstevel@tonic-gate  *	    character in (or not in) s. In s, \ has no special meaning,
81*7c478bd9Sstevel@tonic-gate  *	    and ] may only appear as the first letter. A substring
82*7c478bd9Sstevel@tonic-gate  *	    a-b, with a and b in ascending ASCII order, stands for
83*7c478bd9Sstevel@tonic-gate  *	    the inclusive range of ASCII characters.
84*7c478bd9Sstevel@tonic-gate  *	5.  A regular expression of form 1-4 followed by * matches a
85*7c478bd9Sstevel@tonic-gate  *	    sequence of 0 or more matches of the regular expression.
86*7c478bd9Sstevel@tonic-gate  *	6.  A regular expression, x, of form 1-8, bracketed \(x\)
87*7c478bd9Sstevel@tonic-gate  *	    matches what x matches.
88*7c478bd9Sstevel@tonic-gate  *	7.  A \ followed by a digit n matches a copy of the string that the
89*7c478bd9Sstevel@tonic-gate  *	    bracketed regular expression beginning with the nth \( matched.
90*7c478bd9Sstevel@tonic-gate  *	8.  A regular expression of form 1-8, x, followed by a regular
91*7c478bd9Sstevel@tonic-gate  *	    expression of form 1-7, y matches a match for x followed by
92*7c478bd9Sstevel@tonic-gate  *	    a match for y, with the x match being as long as possible
93*7c478bd9Sstevel@tonic-gate  *	    while still permitting a y match.
94*7c478bd9Sstevel@tonic-gate  *	9.  A regular expression of form 1-8 preceded by ^ (or followed
95*7c478bd9Sstevel@tonic-gate  *	    by $), is constrained to matches that begin at the left
96*7c478bd9Sstevel@tonic-gate  *	    (or end at the right) end of a line.
97*7c478bd9Sstevel@tonic-gate  *	10. A regular expression of form 1-9 picks out the longest among
98*7c478bd9Sstevel@tonic-gate  *	    the leftmost matches in a line.
99*7c478bd9Sstevel@tonic-gate  *	11. An empty regular expression stands for a copy of the last
100*7c478bd9Sstevel@tonic-gate  *	    regular expression encountered.
101*7c478bd9Sstevel@tonic-gate  */
102*7c478bd9Sstevel@tonic-gate 
103*7c478bd9Sstevel@tonic-gate #include <sys/types.h>
104*7c478bd9Sstevel@tonic-gate #include <stdlib.h>
105*7c478bd9Sstevel@tonic-gate #include <stddef.h>
106*7c478bd9Sstevel@tonic-gate 
107*7c478bd9Sstevel@tonic-gate /*
108*7c478bd9Sstevel@tonic-gate  * constants for re's
109*7c478bd9Sstevel@tonic-gate  */
110*7c478bd9Sstevel@tonic-gate #define	CBRA	1
111*7c478bd9Sstevel@tonic-gate #define	CCHR	2
112*7c478bd9Sstevel@tonic-gate #define	CDOT	4
113*7c478bd9Sstevel@tonic-gate #define	CCL	6
114*7c478bd9Sstevel@tonic-gate #define	NCCL	8
115*7c478bd9Sstevel@tonic-gate #define	CDOL	10
116*7c478bd9Sstevel@tonic-gate #define	CEOF	11
117*7c478bd9Sstevel@tonic-gate #define	CKET	12
118*7c478bd9Sstevel@tonic-gate #define	CBACK	18
119*7c478bd9Sstevel@tonic-gate 
120*7c478bd9Sstevel@tonic-gate #define	CSTAR	01
121*7c478bd9Sstevel@tonic-gate 
122*7c478bd9Sstevel@tonic-gate #define	ESIZE	512
123*7c478bd9Sstevel@tonic-gate #define	NBRA	9
124*7c478bd9Sstevel@tonic-gate 
125*7c478bd9Sstevel@tonic-gate static struct re_globals {
126*7c478bd9Sstevel@tonic-gate 	char	_expbuf[ESIZE];
127*7c478bd9Sstevel@tonic-gate 	char	*_braslist[NBRA], *_braelist[NBRA];
128*7c478bd9Sstevel@tonic-gate 	char	_circf;
129*7c478bd9Sstevel@tonic-gate } *re_globals;
130*7c478bd9Sstevel@tonic-gate #define	expbuf (_re->_expbuf)
131*7c478bd9Sstevel@tonic-gate #define	braslist (_re->_braslist)
132*7c478bd9Sstevel@tonic-gate #define	braelist (_re->_braelist)
133*7c478bd9Sstevel@tonic-gate #define	circf (_re->_circf)
134*7c478bd9Sstevel@tonic-gate 
135*7c478bd9Sstevel@tonic-gate /*
136*7c478bd9Sstevel@tonic-gate  * forward declarations
137*7c478bd9Sstevel@tonic-gate  */
138*7c478bd9Sstevel@tonic-gate static int backref(int, char *);
139*7c478bd9Sstevel@tonic-gate static int advance(char *, char *);
140*7c478bd9Sstevel@tonic-gate static int cclass(char *, char, int);
141*7c478bd9Sstevel@tonic-gate 
142*7c478bd9Sstevel@tonic-gate /*
143*7c478bd9Sstevel@tonic-gate  * compile the regular expression argument into a dfa
144*7c478bd9Sstevel@tonic-gate  */
145*7c478bd9Sstevel@tonic-gate char *
146*7c478bd9Sstevel@tonic-gate re_comp(char *sp)
147*7c478bd9Sstevel@tonic-gate {
148*7c478bd9Sstevel@tonic-gate 	char	c;
149*7c478bd9Sstevel@tonic-gate 	struct re_globals *_re = re_globals;
150*7c478bd9Sstevel@tonic-gate 	char	*ep;
151*7c478bd9Sstevel@tonic-gate 	int	cclcnt, numbra = 0;
152*7c478bd9Sstevel@tonic-gate 	char	*lastep = 0;
153*7c478bd9Sstevel@tonic-gate 	char	bracket[NBRA];
154*7c478bd9Sstevel@tonic-gate 	char	*bracketp = &bracket[0];
155*7c478bd9Sstevel@tonic-gate 	char	*retoolong = "Regular expression too long";
156*7c478bd9Sstevel@tonic-gate 
157*7c478bd9Sstevel@tonic-gate 	if (_re == 0) {
158*7c478bd9Sstevel@tonic-gate 		_re = (struct re_globals *)calloc(1, sizeof (*_re));
159*7c478bd9Sstevel@tonic-gate 		if (_re == 0)
160*7c478bd9Sstevel@tonic-gate 			return ("Out of memory");
161*7c478bd9Sstevel@tonic-gate 		re_globals = _re;
162*7c478bd9Sstevel@tonic-gate 	}
163*7c478bd9Sstevel@tonic-gate 	ep = expbuf;
164*7c478bd9Sstevel@tonic-gate 
165*7c478bd9Sstevel@tonic-gate #define	comerr(msg) {expbuf[0] = 0; return (msg); }
166*7c478bd9Sstevel@tonic-gate 
167*7c478bd9Sstevel@tonic-gate 	if (sp == 0 || *sp == '\0') {
168*7c478bd9Sstevel@tonic-gate 		if (*ep == 0)
169*7c478bd9Sstevel@tonic-gate 			return ("No previous regular expression");
170*7c478bd9Sstevel@tonic-gate 		return (0);
171*7c478bd9Sstevel@tonic-gate 	}
172*7c478bd9Sstevel@tonic-gate 	if (*sp == '^') {
173*7c478bd9Sstevel@tonic-gate 		circf = 1;
174*7c478bd9Sstevel@tonic-gate 		sp++;
175*7c478bd9Sstevel@tonic-gate 	}
176*7c478bd9Sstevel@tonic-gate 	else
177*7c478bd9Sstevel@tonic-gate 		circf = 0;
178*7c478bd9Sstevel@tonic-gate 	for (;;) {
179*7c478bd9Sstevel@tonic-gate 		if (ep >= &expbuf[ESIZE])
180*7c478bd9Sstevel@tonic-gate 			comerr(retoolong);
181*7c478bd9Sstevel@tonic-gate 		if ((c = *sp++) == '\0') {
182*7c478bd9Sstevel@tonic-gate 			if (bracketp != bracket)
183*7c478bd9Sstevel@tonic-gate 				comerr("unmatched \\(");
184*7c478bd9Sstevel@tonic-gate 			*ep++ = CEOF;
185*7c478bd9Sstevel@tonic-gate 			*ep++ = 0;
186*7c478bd9Sstevel@tonic-gate 			return (0);
187*7c478bd9Sstevel@tonic-gate 		}
188*7c478bd9Sstevel@tonic-gate 		if (c != '*')
189*7c478bd9Sstevel@tonic-gate 			lastep = ep;
190*7c478bd9Sstevel@tonic-gate 		switch (c) {
191*7c478bd9Sstevel@tonic-gate 
192*7c478bd9Sstevel@tonic-gate 		case '.':
193*7c478bd9Sstevel@tonic-gate 			*ep++ = CDOT;
194*7c478bd9Sstevel@tonic-gate 			continue;
195*7c478bd9Sstevel@tonic-gate 
196*7c478bd9Sstevel@tonic-gate 		case '*':
197*7c478bd9Sstevel@tonic-gate 			if (lastep == 0 || *lastep == CBRA || *lastep == CKET)
198*7c478bd9Sstevel@tonic-gate 				goto defchar;
199*7c478bd9Sstevel@tonic-gate 			*lastep |= CSTAR;
200*7c478bd9Sstevel@tonic-gate 			continue;
201*7c478bd9Sstevel@tonic-gate 
202*7c478bd9Sstevel@tonic-gate 		case '$':
203*7c478bd9Sstevel@tonic-gate 			if (*sp != '\0')
204*7c478bd9Sstevel@tonic-gate 				goto defchar;
205*7c478bd9Sstevel@tonic-gate 			*ep++ = CDOL;
206*7c478bd9Sstevel@tonic-gate 			continue;
207*7c478bd9Sstevel@tonic-gate 
208*7c478bd9Sstevel@tonic-gate 		case '[':
209*7c478bd9Sstevel@tonic-gate 			*ep++ = CCL;
210*7c478bd9Sstevel@tonic-gate 			*ep++ = 0;
211*7c478bd9Sstevel@tonic-gate 			cclcnt = 1;
212*7c478bd9Sstevel@tonic-gate 			if ((c = *sp++) == '^') {
213*7c478bd9Sstevel@tonic-gate 				c = *sp++;
214*7c478bd9Sstevel@tonic-gate 				ep[-2] = NCCL;
215*7c478bd9Sstevel@tonic-gate 			}
216*7c478bd9Sstevel@tonic-gate 			do {
217*7c478bd9Sstevel@tonic-gate 				if (c == '\0')
218*7c478bd9Sstevel@tonic-gate 					comerr("missing ]");
219*7c478bd9Sstevel@tonic-gate 				if (c == '-' && ep [-1] != 0) {
220*7c478bd9Sstevel@tonic-gate 					if ((c = *sp++) == ']') {
221*7c478bd9Sstevel@tonic-gate 						*ep++ = '-';
222*7c478bd9Sstevel@tonic-gate 						cclcnt++;
223*7c478bd9Sstevel@tonic-gate 						break;
224*7c478bd9Sstevel@tonic-gate 					}
225*7c478bd9Sstevel@tonic-gate 					while (ep[-1] < c) {
226*7c478bd9Sstevel@tonic-gate 						*ep = ep[-1] + 1;
227*7c478bd9Sstevel@tonic-gate 						ep++;
228*7c478bd9Sstevel@tonic-gate 						cclcnt++;
229*7c478bd9Sstevel@tonic-gate 						if (ep >= &expbuf[ESIZE])
230*7c478bd9Sstevel@tonic-gate 							comerr(retoolong);
231*7c478bd9Sstevel@tonic-gate 					}
232*7c478bd9Sstevel@tonic-gate 				}
233*7c478bd9Sstevel@tonic-gate 				*ep++ = c;
234*7c478bd9Sstevel@tonic-gate 				cclcnt++;
235*7c478bd9Sstevel@tonic-gate 				if (ep >= &expbuf[ESIZE])
236*7c478bd9Sstevel@tonic-gate 					comerr(retoolong);
237*7c478bd9Sstevel@tonic-gate 			} while ((c = *sp++) != ']');
238*7c478bd9Sstevel@tonic-gate 			lastep[1] = (char)cclcnt;
239*7c478bd9Sstevel@tonic-gate 			continue;
240*7c478bd9Sstevel@tonic-gate 
241*7c478bd9Sstevel@tonic-gate 		case '\\':
242*7c478bd9Sstevel@tonic-gate 			if ((c = *sp++) == '(') {
243*7c478bd9Sstevel@tonic-gate 				if (numbra >= NBRA)
244*7c478bd9Sstevel@tonic-gate 					comerr("too many \\(\\) pairs");
245*7c478bd9Sstevel@tonic-gate 				*bracketp++ = (char)numbra;
246*7c478bd9Sstevel@tonic-gate 				*ep++ = CBRA;
247*7c478bd9Sstevel@tonic-gate 				*ep++ = numbra++;
248*7c478bd9Sstevel@tonic-gate 				continue;
249*7c478bd9Sstevel@tonic-gate 			}
250*7c478bd9Sstevel@tonic-gate 			if (c == ')') {
251*7c478bd9Sstevel@tonic-gate 				if (bracketp <= bracket)
252*7c478bd9Sstevel@tonic-gate 					comerr("unmatched \\)");
253*7c478bd9Sstevel@tonic-gate 				*ep++ = CKET;
254*7c478bd9Sstevel@tonic-gate 				*ep++ = *--bracketp;
255*7c478bd9Sstevel@tonic-gate 				continue;
256*7c478bd9Sstevel@tonic-gate 			}
257*7c478bd9Sstevel@tonic-gate 			if (c >= '1' && c < ('1' + NBRA)) {
258*7c478bd9Sstevel@tonic-gate 				*ep++ = CBACK;
259*7c478bd9Sstevel@tonic-gate 				*ep++ = c - '1';
260*7c478bd9Sstevel@tonic-gate 				continue;
261*7c478bd9Sstevel@tonic-gate 			}
262*7c478bd9Sstevel@tonic-gate 			*ep++ = CCHR;
263*7c478bd9Sstevel@tonic-gate 			*ep++ = c;
264*7c478bd9Sstevel@tonic-gate 			continue;
265*7c478bd9Sstevel@tonic-gate 
266*7c478bd9Sstevel@tonic-gate 		defchar:
267*7c478bd9Sstevel@tonic-gate 		default:
268*7c478bd9Sstevel@tonic-gate 			*ep++ = CCHR;
269*7c478bd9Sstevel@tonic-gate 			*ep++ = c;
270*7c478bd9Sstevel@tonic-gate 		}
271*7c478bd9Sstevel@tonic-gate 	}
272*7c478bd9Sstevel@tonic-gate }
273*7c478bd9Sstevel@tonic-gate 
274*7c478bd9Sstevel@tonic-gate /*
275*7c478bd9Sstevel@tonic-gate  * match the argument string against the compiled re
276*7c478bd9Sstevel@tonic-gate  */
277*7c478bd9Sstevel@tonic-gate int
278*7c478bd9Sstevel@tonic-gate re_exec(char *p1)
279*7c478bd9Sstevel@tonic-gate {
280*7c478bd9Sstevel@tonic-gate 	struct re_globals *_re = re_globals;
281*7c478bd9Sstevel@tonic-gate 	char	*p2;
282*7c478bd9Sstevel@tonic-gate 	int	c;
283*7c478bd9Sstevel@tonic-gate 	int	rv;
284*7c478bd9Sstevel@tonic-gate 
285*7c478bd9Sstevel@tonic-gate 	if (_re == 0)
286*7c478bd9Sstevel@tonic-gate 		return (0);
287*7c478bd9Sstevel@tonic-gate 	p2 = expbuf;
288*7c478bd9Sstevel@tonic-gate 	for (c = 0; c < NBRA; c++) {
289*7c478bd9Sstevel@tonic-gate 		braslist[c] = 0;
290*7c478bd9Sstevel@tonic-gate 		braelist[c] = 0;
291*7c478bd9Sstevel@tonic-gate 	}
292*7c478bd9Sstevel@tonic-gate 	if (circf)
293*7c478bd9Sstevel@tonic-gate 		return ((advance(p1, p2)));
294*7c478bd9Sstevel@tonic-gate 	/*
295*7c478bd9Sstevel@tonic-gate 	 * fast check for first character
296*7c478bd9Sstevel@tonic-gate 	 */
297*7c478bd9Sstevel@tonic-gate 	if (*p2 == CCHR) {
298*7c478bd9Sstevel@tonic-gate 		c = p2[1];
299*7c478bd9Sstevel@tonic-gate 		do {
300*7c478bd9Sstevel@tonic-gate 			if (*p1 != c)
301*7c478bd9Sstevel@tonic-gate 				continue;
302*7c478bd9Sstevel@tonic-gate 			if (rv = advance(p1, p2))
303*7c478bd9Sstevel@tonic-gate 				return (rv);
304*7c478bd9Sstevel@tonic-gate 		} while (*p1++);
305*7c478bd9Sstevel@tonic-gate 		return (0);
306*7c478bd9Sstevel@tonic-gate 	}
307*7c478bd9Sstevel@tonic-gate 	/*
308*7c478bd9Sstevel@tonic-gate 	 * regular algorithm
309*7c478bd9Sstevel@tonic-gate 	 */
310*7c478bd9Sstevel@tonic-gate 	do
311*7c478bd9Sstevel@tonic-gate 		if (rv = advance(p1, p2))
312*7c478bd9Sstevel@tonic-gate 			return (rv);
313*7c478bd9Sstevel@tonic-gate 	while (*p1++);
314*7c478bd9Sstevel@tonic-gate 	return (0);
315*7c478bd9Sstevel@tonic-gate }
316*7c478bd9Sstevel@tonic-gate 
317*7c478bd9Sstevel@tonic-gate /*
318*7c478bd9Sstevel@tonic-gate  * try to match the next thing in the dfa
319*7c478bd9Sstevel@tonic-gate  */
320*7c478bd9Sstevel@tonic-gate static int
321*7c478bd9Sstevel@tonic-gate advance(char *lp, char *ep)
322*7c478bd9Sstevel@tonic-gate {
323*7c478bd9Sstevel@tonic-gate 	char	*curlp;
324*7c478bd9Sstevel@tonic-gate 	int	i;
325*7c478bd9Sstevel@tonic-gate 	ptrdiff_t	ct;
326*7c478bd9Sstevel@tonic-gate 	int	rv;
327*7c478bd9Sstevel@tonic-gate 	struct re_globals *_re = re_globals;
328*7c478bd9Sstevel@tonic-gate 
329*7c478bd9Sstevel@tonic-gate 	for (;;)
330*7c478bd9Sstevel@tonic-gate 		switch (*ep++) {
331*7c478bd9Sstevel@tonic-gate 
332*7c478bd9Sstevel@tonic-gate 		case CCHR:
333*7c478bd9Sstevel@tonic-gate 			if (*ep++ == *lp++)
334*7c478bd9Sstevel@tonic-gate 				continue;
335*7c478bd9Sstevel@tonic-gate 			return (0);
336*7c478bd9Sstevel@tonic-gate 
337*7c478bd9Sstevel@tonic-gate 		case CDOT:
338*7c478bd9Sstevel@tonic-gate 			if (*lp++)
339*7c478bd9Sstevel@tonic-gate 				continue;
340*7c478bd9Sstevel@tonic-gate 			return (0);
341*7c478bd9Sstevel@tonic-gate 
342*7c478bd9Sstevel@tonic-gate 		case CDOL:
343*7c478bd9Sstevel@tonic-gate 			if (*lp == '\0')
344*7c478bd9Sstevel@tonic-gate 				continue;
345*7c478bd9Sstevel@tonic-gate 			return (0);
346*7c478bd9Sstevel@tonic-gate 
347*7c478bd9Sstevel@tonic-gate 		case CEOF:
348*7c478bd9Sstevel@tonic-gate 			return (1);
349*7c478bd9Sstevel@tonic-gate 
350*7c478bd9Sstevel@tonic-gate 		case CCL:
351*7c478bd9Sstevel@tonic-gate 			if (cclass(ep, *lp++, 1)) {
352*7c478bd9Sstevel@tonic-gate 				ep += *ep;
353*7c478bd9Sstevel@tonic-gate 				continue;
354*7c478bd9Sstevel@tonic-gate 			}
355*7c478bd9Sstevel@tonic-gate 			return (0);
356*7c478bd9Sstevel@tonic-gate 
357*7c478bd9Sstevel@tonic-gate 		case NCCL:
358*7c478bd9Sstevel@tonic-gate 			if (cclass(ep, *lp++, 0)) {
359*7c478bd9Sstevel@tonic-gate 				ep += *ep;
360*7c478bd9Sstevel@tonic-gate 				continue;
361*7c478bd9Sstevel@tonic-gate 			}
362*7c478bd9Sstevel@tonic-gate 			return (0);
363*7c478bd9Sstevel@tonic-gate 
364*7c478bd9Sstevel@tonic-gate 		case CBRA:
365*7c478bd9Sstevel@tonic-gate 			braslist[*ep++] = lp;
366*7c478bd9Sstevel@tonic-gate 			continue;
367*7c478bd9Sstevel@tonic-gate 
368*7c478bd9Sstevel@tonic-gate 		case CKET:
369*7c478bd9Sstevel@tonic-gate 			braelist[*ep++] = lp;
370*7c478bd9Sstevel@tonic-gate 			continue;
371*7c478bd9Sstevel@tonic-gate 
372*7c478bd9Sstevel@tonic-gate 		case CBACK:
373*7c478bd9Sstevel@tonic-gate 			if (braelist[i = *ep++] == 0)
374*7c478bd9Sstevel@tonic-gate 				return (-1);
375*7c478bd9Sstevel@tonic-gate 			if (backref(i, lp)) {
376*7c478bd9Sstevel@tonic-gate 				lp += braelist[i] - braslist[i];
377*7c478bd9Sstevel@tonic-gate 				continue;
378*7c478bd9Sstevel@tonic-gate 			}
379*7c478bd9Sstevel@tonic-gate 			return (0);
380*7c478bd9Sstevel@tonic-gate 
381*7c478bd9Sstevel@tonic-gate 		case CBACK|CSTAR:
382*7c478bd9Sstevel@tonic-gate 			if (braelist[i = *ep++] == 0)
383*7c478bd9Sstevel@tonic-gate 				return (-1);
384*7c478bd9Sstevel@tonic-gate 			curlp = lp;
385*7c478bd9Sstevel@tonic-gate 			ct = braelist[i] - braslist[i];
386*7c478bd9Sstevel@tonic-gate 			while (backref(i, lp))
387*7c478bd9Sstevel@tonic-gate 				lp += ct;
388*7c478bd9Sstevel@tonic-gate 			while (lp >= curlp) {
389*7c478bd9Sstevel@tonic-gate 				if (rv = advance(lp, ep))
390*7c478bd9Sstevel@tonic-gate 					return (rv);
391*7c478bd9Sstevel@tonic-gate 				lp -= ct;
392*7c478bd9Sstevel@tonic-gate 			}
393*7c478bd9Sstevel@tonic-gate 			continue;
394*7c478bd9Sstevel@tonic-gate 
395*7c478bd9Sstevel@tonic-gate 		case CDOT|CSTAR:
396*7c478bd9Sstevel@tonic-gate 			curlp = lp;
397*7c478bd9Sstevel@tonic-gate 			while (*lp++)
398*7c478bd9Sstevel@tonic-gate 				;
399*7c478bd9Sstevel@tonic-gate 			goto star;
400*7c478bd9Sstevel@tonic-gate 
401*7c478bd9Sstevel@tonic-gate 		case CCHR|CSTAR:
402*7c478bd9Sstevel@tonic-gate 			curlp = lp;
403*7c478bd9Sstevel@tonic-gate 			while (*lp++ == *ep)
404*7c478bd9Sstevel@tonic-gate 				;
405*7c478bd9Sstevel@tonic-gate 			ep++;
406*7c478bd9Sstevel@tonic-gate 			goto star;
407*7c478bd9Sstevel@tonic-gate 
408*7c478bd9Sstevel@tonic-gate 		case CCL|CSTAR:
409*7c478bd9Sstevel@tonic-gate 		case NCCL|CSTAR:
410*7c478bd9Sstevel@tonic-gate 			curlp = lp;
411*7c478bd9Sstevel@tonic-gate 			while (cclass(ep, *lp++, ep[-1] == (CCL|CSTAR)))
412*7c478bd9Sstevel@tonic-gate 				;
413*7c478bd9Sstevel@tonic-gate 			ep += *ep;
414*7c478bd9Sstevel@tonic-gate 			goto star;
415*7c478bd9Sstevel@tonic-gate 
416*7c478bd9Sstevel@tonic-gate 		star:
417*7c478bd9Sstevel@tonic-gate 			do {
418*7c478bd9Sstevel@tonic-gate 				lp--;
419*7c478bd9Sstevel@tonic-gate 				if (rv = advance(lp, ep))
420*7c478bd9Sstevel@tonic-gate 					return (rv);
421*7c478bd9Sstevel@tonic-gate 			} while (lp > curlp);
422*7c478bd9Sstevel@tonic-gate 			return (0);
423*7c478bd9Sstevel@tonic-gate 
424*7c478bd9Sstevel@tonic-gate 		default:
425*7c478bd9Sstevel@tonic-gate 			return (-1);
426*7c478bd9Sstevel@tonic-gate 		}
427*7c478bd9Sstevel@tonic-gate }
428*7c478bd9Sstevel@tonic-gate 
429*7c478bd9Sstevel@tonic-gate static int
430*7c478bd9Sstevel@tonic-gate backref(int i, char *lp)
431*7c478bd9Sstevel@tonic-gate {
432*7c478bd9Sstevel@tonic-gate 	char	*bp;
433*7c478bd9Sstevel@tonic-gate 	struct re_globals *_re = re_globals;
434*7c478bd9Sstevel@tonic-gate 
435*7c478bd9Sstevel@tonic-gate 	bp = braslist[i];
436*7c478bd9Sstevel@tonic-gate 	while (*bp++ == *lp++)
437*7c478bd9Sstevel@tonic-gate 		if (bp >= braelist[i])
438*7c478bd9Sstevel@tonic-gate 			return (1);
439*7c478bd9Sstevel@tonic-gate 	return (0);
440*7c478bd9Sstevel@tonic-gate }
441*7c478bd9Sstevel@tonic-gate 
442*7c478bd9Sstevel@tonic-gate static int
443*7c478bd9Sstevel@tonic-gate cclass(char *set, char c, int af)
444*7c478bd9Sstevel@tonic-gate {
445*7c478bd9Sstevel@tonic-gate 	int	n;
446*7c478bd9Sstevel@tonic-gate 
447*7c478bd9Sstevel@tonic-gate 	if (c == 0)
448*7c478bd9Sstevel@tonic-gate 		return (0);
449*7c478bd9Sstevel@tonic-gate 	n = *set++;
450*7c478bd9Sstevel@tonic-gate 	while (--n)
451*7c478bd9Sstevel@tonic-gate 		if (*set++ == c)
452*7c478bd9Sstevel@tonic-gate 			return (af);
453*7c478bd9Sstevel@tonic-gate 	return (! af);
454*7c478bd9Sstevel@tonic-gate }
455