xref: /illumos-gate/usr/src/cmd/vgrind/regexp.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1*7c478bd9Sstevel@tonic-gate /*
2*7c478bd9Sstevel@tonic-gate  * Copyright (c) 1980 Regents of the University of California.
3*7c478bd9Sstevel@tonic-gate  * All rights reserved.  The Berkeley software License Agreement
4*7c478bd9Sstevel@tonic-gate  * specifies the terms and conditions for redistribution.
5*7c478bd9Sstevel@tonic-gate  */
6*7c478bd9Sstevel@tonic-gate 
7*7c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
8*7c478bd9Sstevel@tonic-gate 	  /* from UCB 5.1 (Berkeley) 6/5/85 */
9*7c478bd9Sstevel@tonic-gate 
10*7c478bd9Sstevel@tonic-gate #include <ctype.h>
11*7c478bd9Sstevel@tonic-gate 
12*7c478bd9Sstevel@tonic-gate typedef int	boolean;
13*7c478bd9Sstevel@tonic-gate #define TRUE	1
14*7c478bd9Sstevel@tonic-gate #define FALSE	0
15*7c478bd9Sstevel@tonic-gate #define NIL	0
16*7c478bd9Sstevel@tonic-gate 
17*7c478bd9Sstevel@tonic-gate extern boolean	l_onecase;	/* true if upper and lower equivalent */
18*7c478bd9Sstevel@tonic-gate extern char	*l_idchars;	/* set of characters legal in identifiers
19*7c478bd9Sstevel@tonic-gate 				   in addition to letters and digits */
20*7c478bd9Sstevel@tonic-gate 
21*7c478bd9Sstevel@tonic-gate extern char	*strchr();
22*7c478bd9Sstevel@tonic-gate 
23*7c478bd9Sstevel@tonic-gate #define isidchr(c)	\
24*7c478bd9Sstevel@tonic-gate 		(isalnum(c) || ((c) != NIL && strchr(l_idchars, (c)) != NIL))
25*7c478bd9Sstevel@tonic-gate #define makelower(c)	(isupper((c)) ? tolower((c)) : (c))
26*7c478bd9Sstevel@tonic-gate 
27*7c478bd9Sstevel@tonic-gate /*  STRNCMP -	like strncmp except that we convert the
28*7c478bd9Sstevel@tonic-gate  *	 	first string to lower case before comparing
29*7c478bd9Sstevel@tonic-gate  *		if l_onecase is set.
30*7c478bd9Sstevel@tonic-gate  */
31*7c478bd9Sstevel@tonic-gate 
32*7c478bd9Sstevel@tonic-gate STRNCMP(s1, s2, len)
33*7c478bd9Sstevel@tonic-gate 	register char *s1,*s2;
34*7c478bd9Sstevel@tonic-gate 	register int len;
35*7c478bd9Sstevel@tonic-gate {
36*7c478bd9Sstevel@tonic-gate 	if (l_onecase) {
37*7c478bd9Sstevel@tonic-gate 	    do
38*7c478bd9Sstevel@tonic-gate 		if (*s2 - makelower(*s1))
39*7c478bd9Sstevel@tonic-gate 			return (*s2 - makelower(*s1));
40*7c478bd9Sstevel@tonic-gate 		else {
41*7c478bd9Sstevel@tonic-gate 			s2++;
42*7c478bd9Sstevel@tonic-gate 			s1++;
43*7c478bd9Sstevel@tonic-gate 		}
44*7c478bd9Sstevel@tonic-gate 	    while (--len);
45*7c478bd9Sstevel@tonic-gate 	} else {
46*7c478bd9Sstevel@tonic-gate 	    do
47*7c478bd9Sstevel@tonic-gate 		if (*s2 - *s1)
48*7c478bd9Sstevel@tonic-gate 			return (*s2 - *s1);
49*7c478bd9Sstevel@tonic-gate 		else {
50*7c478bd9Sstevel@tonic-gate 			s2++;
51*7c478bd9Sstevel@tonic-gate 			s1++;
52*7c478bd9Sstevel@tonic-gate 		}
53*7c478bd9Sstevel@tonic-gate 	    while (--len);
54*7c478bd9Sstevel@tonic-gate 	}
55*7c478bd9Sstevel@tonic-gate 	return(0);
56*7c478bd9Sstevel@tonic-gate }
57*7c478bd9Sstevel@tonic-gate 
58*7c478bd9Sstevel@tonic-gate /*	The following routine converts an irregular expression to
59*7c478bd9Sstevel@tonic-gate  *	internal format.
60*7c478bd9Sstevel@tonic-gate  *
61*7c478bd9Sstevel@tonic-gate  *	Either meta symbols (\a \d or \p) or character strings or
62*7c478bd9Sstevel@tonic-gate  *	operations ( alternation or parenthesizing ) can be
63*7c478bd9Sstevel@tonic-gate  *	specified.  Each starts with a descriptor byte.  The descriptor
64*7c478bd9Sstevel@tonic-gate  *	byte has STR set for strings, META set for meta symbols
65*7c478bd9Sstevel@tonic-gate  *	and OPER set for operations.
66*7c478bd9Sstevel@tonic-gate  *	The descriptor byte can also have the OPT bit set if the object
67*7c478bd9Sstevel@tonic-gate  *	defined is optional.  Also ALT can be set to indicate an alternation.
68*7c478bd9Sstevel@tonic-gate  *
69*7c478bd9Sstevel@tonic-gate  *	For metasymbols the byte following the descriptor byte identities
70*7c478bd9Sstevel@tonic-gate  *	the meta symbol (containing an ascii 'a', 'd', 'p', '|', or '(').  For
71*7c478bd9Sstevel@tonic-gate  *	strings the byte after the descriptor is a character count for
72*7c478bd9Sstevel@tonic-gate  *	the string:
73*7c478bd9Sstevel@tonic-gate  *
74*7c478bd9Sstevel@tonic-gate  *		meta symbols := descriptor
75*7c478bd9Sstevel@tonic-gate  *				symbol
76*7c478bd9Sstevel@tonic-gate  *
77*7c478bd9Sstevel@tonic-gate  *		strings :=	descriptor
78*7c478bd9Sstevel@tonic-gate  *				character count
79*7c478bd9Sstevel@tonic-gate  *				the string
80*7c478bd9Sstevel@tonic-gate  *
81*7c478bd9Sstevel@tonic-gate  *		operations :=	descriptor
82*7c478bd9Sstevel@tonic-gate  *				symbol
83*7c478bd9Sstevel@tonic-gate  *				character count
84*7c478bd9Sstevel@tonic-gate  */
85*7c478bd9Sstevel@tonic-gate 
86*7c478bd9Sstevel@tonic-gate /*
87*7c478bd9Sstevel@tonic-gate  *  handy macros for accessing parts of match blocks
88*7c478bd9Sstevel@tonic-gate  */
89*7c478bd9Sstevel@tonic-gate #define MSYM(A) (*(A+1))	/* symbol in a meta symbol block */
90*7c478bd9Sstevel@tonic-gate #define MNEXT(A) (A+2)		/* character following a metasymbol block */
91*7c478bd9Sstevel@tonic-gate 
92*7c478bd9Sstevel@tonic-gate #define OSYM(A) (*(A+1))	/* symbol in an operation block */
93*7c478bd9Sstevel@tonic-gate #define OCNT(A) (*(A+2))	/* character count */
94*7c478bd9Sstevel@tonic-gate #define ONEXT(A) (A+3)		/* next character after the operation */
95*7c478bd9Sstevel@tonic-gate #define OPTR(A) (A+*(A+2))	/* place pointed to by the operator */
96*7c478bd9Sstevel@tonic-gate 
97*7c478bd9Sstevel@tonic-gate #define SCNT(A) (*(A+1))	/* byte count of a string */
98*7c478bd9Sstevel@tonic-gate #define SSTR(A) (A+2)		/* address of the string */
99*7c478bd9Sstevel@tonic-gate #define SNEXT(A) (A+2+*(A+1))	/* character following the string */
100*7c478bd9Sstevel@tonic-gate 
101*7c478bd9Sstevel@tonic-gate /*
102*7c478bd9Sstevel@tonic-gate  *  bit flags in the descriptor
103*7c478bd9Sstevel@tonic-gate  */
104*7c478bd9Sstevel@tonic-gate #define OPT 1
105*7c478bd9Sstevel@tonic-gate #define STR 2
106*7c478bd9Sstevel@tonic-gate #define META 4
107*7c478bd9Sstevel@tonic-gate #define ALT 8
108*7c478bd9Sstevel@tonic-gate #define OPER 16
109*7c478bd9Sstevel@tonic-gate 
110*7c478bd9Sstevel@tonic-gate char *ure;		/* pointer current position in unconverted exp */
111*7c478bd9Sstevel@tonic-gate char *ccre;		/* pointer to current position in converted exp*/
112*7c478bd9Sstevel@tonic-gate char *malloc();
113*7c478bd9Sstevel@tonic-gate 
114*7c478bd9Sstevel@tonic-gate char *
115*7c478bd9Sstevel@tonic-gate convexp(re)
116*7c478bd9Sstevel@tonic-gate     char *re;		/* unconverted irregular expression */
117*7c478bd9Sstevel@tonic-gate {
118*7c478bd9Sstevel@tonic-gate     register char *cre;		/* pointer to converted regular expression */
119*7c478bd9Sstevel@tonic-gate 
120*7c478bd9Sstevel@tonic-gate     /* allocate room for the converted expression */
121*7c478bd9Sstevel@tonic-gate     if (re == NIL)
122*7c478bd9Sstevel@tonic-gate 	return (NIL);
123*7c478bd9Sstevel@tonic-gate     if (*re == '\0')
124*7c478bd9Sstevel@tonic-gate 	return (NIL);
125*7c478bd9Sstevel@tonic-gate     cre = malloc (4 * strlen(re) + 3);
126*7c478bd9Sstevel@tonic-gate     ccre = cre;
127*7c478bd9Sstevel@tonic-gate     ure = re;
128*7c478bd9Sstevel@tonic-gate 
129*7c478bd9Sstevel@tonic-gate     /* start the conversion with a \a */
130*7c478bd9Sstevel@tonic-gate     *cre = META | OPT;
131*7c478bd9Sstevel@tonic-gate     MSYM(cre) = 'a';
132*7c478bd9Sstevel@tonic-gate     ccre = MNEXT(cre);
133*7c478bd9Sstevel@tonic-gate 
134*7c478bd9Sstevel@tonic-gate     /* start the conversion (its recursive) */
135*7c478bd9Sstevel@tonic-gate     expconv ();
136*7c478bd9Sstevel@tonic-gate     *ccre = 0;
137*7c478bd9Sstevel@tonic-gate     return (cre);
138*7c478bd9Sstevel@tonic-gate }
139*7c478bd9Sstevel@tonic-gate 
140*7c478bd9Sstevel@tonic-gate expconv()
141*7c478bd9Sstevel@tonic-gate {
142*7c478bd9Sstevel@tonic-gate     register char *cs;		/* pointer to current symbol in converted exp */
143*7c478bd9Sstevel@tonic-gate     register char c;		/* character being processed */
144*7c478bd9Sstevel@tonic-gate     register char *acs;		/* pinter to last alternate */
145*7c478bd9Sstevel@tonic-gate     register int temp;
146*7c478bd9Sstevel@tonic-gate 
147*7c478bd9Sstevel@tonic-gate     /* let the conversion begin */
148*7c478bd9Sstevel@tonic-gate     acs = NIL;
149*7c478bd9Sstevel@tonic-gate     cs = NIL;
150*7c478bd9Sstevel@tonic-gate     while (*ure != NIL) {
151*7c478bd9Sstevel@tonic-gate 	switch (c = *ure++) {
152*7c478bd9Sstevel@tonic-gate 
153*7c478bd9Sstevel@tonic-gate 	case '\\':
154*7c478bd9Sstevel@tonic-gate 	    switch (c = *ure++) {
155*7c478bd9Sstevel@tonic-gate 
156*7c478bd9Sstevel@tonic-gate 	    /* escaped characters are just characters */
157*7c478bd9Sstevel@tonic-gate 	    default:
158*7c478bd9Sstevel@tonic-gate 		if (cs == NIL || (*cs & STR) == 0) {
159*7c478bd9Sstevel@tonic-gate 		    cs = ccre;
160*7c478bd9Sstevel@tonic-gate 		    *cs = STR;
161*7c478bd9Sstevel@tonic-gate 		    SCNT(cs) = 1;
162*7c478bd9Sstevel@tonic-gate 		    ccre += 2;
163*7c478bd9Sstevel@tonic-gate 		} else
164*7c478bd9Sstevel@tonic-gate 		    SCNT(cs)++;
165*7c478bd9Sstevel@tonic-gate 		*ccre++ = c;
166*7c478bd9Sstevel@tonic-gate 		break;
167*7c478bd9Sstevel@tonic-gate 
168*7c478bd9Sstevel@tonic-gate 	    /* normal(?) metacharacters */
169*7c478bd9Sstevel@tonic-gate 	    case 'a':
170*7c478bd9Sstevel@tonic-gate 	    case 'd':
171*7c478bd9Sstevel@tonic-gate 	    case 'e':
172*7c478bd9Sstevel@tonic-gate 	    case 'p':
173*7c478bd9Sstevel@tonic-gate 		if (acs != NIL && acs != cs) {
174*7c478bd9Sstevel@tonic-gate 		    do {
175*7c478bd9Sstevel@tonic-gate 			temp = OCNT(acs);
176*7c478bd9Sstevel@tonic-gate 			OCNT(acs) = ccre - acs;
177*7c478bd9Sstevel@tonic-gate 			acs -= temp;
178*7c478bd9Sstevel@tonic-gate 		    } while (temp != 0);
179*7c478bd9Sstevel@tonic-gate 		    acs = NIL;
180*7c478bd9Sstevel@tonic-gate 		}
181*7c478bd9Sstevel@tonic-gate 		cs = ccre;
182*7c478bd9Sstevel@tonic-gate 		*cs = META;
183*7c478bd9Sstevel@tonic-gate 		MSYM(cs) = c;
184*7c478bd9Sstevel@tonic-gate 		ccre = MNEXT(cs);
185*7c478bd9Sstevel@tonic-gate 		break;
186*7c478bd9Sstevel@tonic-gate 	    }
187*7c478bd9Sstevel@tonic-gate 	    break;
188*7c478bd9Sstevel@tonic-gate 
189*7c478bd9Sstevel@tonic-gate 	/* just put the symbol in */
190*7c478bd9Sstevel@tonic-gate 	case '^':
191*7c478bd9Sstevel@tonic-gate 	case '$':
192*7c478bd9Sstevel@tonic-gate 	    if (acs != NIL && acs != cs) {
193*7c478bd9Sstevel@tonic-gate 		do {
194*7c478bd9Sstevel@tonic-gate 		    temp = OCNT(acs);
195*7c478bd9Sstevel@tonic-gate 		    OCNT(acs) = ccre - acs;
196*7c478bd9Sstevel@tonic-gate 		    acs -= temp;
197*7c478bd9Sstevel@tonic-gate 		} while (temp != 0);
198*7c478bd9Sstevel@tonic-gate 		acs = NIL;
199*7c478bd9Sstevel@tonic-gate 	    }
200*7c478bd9Sstevel@tonic-gate 	    cs = ccre;
201*7c478bd9Sstevel@tonic-gate 	    *cs = META;
202*7c478bd9Sstevel@tonic-gate 	    MSYM(cs) = c;
203*7c478bd9Sstevel@tonic-gate 	    ccre = MNEXT(cs);
204*7c478bd9Sstevel@tonic-gate 	    break;
205*7c478bd9Sstevel@tonic-gate 
206*7c478bd9Sstevel@tonic-gate 	/* mark the last match sequence as optional */
207*7c478bd9Sstevel@tonic-gate 	case '?':
208*7c478bd9Sstevel@tonic-gate 	    if (cs)
209*7c478bd9Sstevel@tonic-gate 	    	*cs = *cs | OPT;
210*7c478bd9Sstevel@tonic-gate 	    break;
211*7c478bd9Sstevel@tonic-gate 
212*7c478bd9Sstevel@tonic-gate 	/* recurse and define a subexpression */
213*7c478bd9Sstevel@tonic-gate 	case '(':
214*7c478bd9Sstevel@tonic-gate 	    if (acs != NIL && acs != cs) {
215*7c478bd9Sstevel@tonic-gate 		do {
216*7c478bd9Sstevel@tonic-gate 		    temp = OCNT(acs);
217*7c478bd9Sstevel@tonic-gate 		    OCNT(acs) = ccre - acs;
218*7c478bd9Sstevel@tonic-gate 		    acs -= temp;
219*7c478bd9Sstevel@tonic-gate 		} while (temp != 0);
220*7c478bd9Sstevel@tonic-gate 		acs = NIL;
221*7c478bd9Sstevel@tonic-gate 	    }
222*7c478bd9Sstevel@tonic-gate 	    cs = ccre;
223*7c478bd9Sstevel@tonic-gate 	    *cs = OPER;
224*7c478bd9Sstevel@tonic-gate 	    OSYM(cs) = '(';
225*7c478bd9Sstevel@tonic-gate 	    ccre = ONEXT(cs);
226*7c478bd9Sstevel@tonic-gate 	    expconv ();
227*7c478bd9Sstevel@tonic-gate 	    OCNT(cs) = ccre - cs;		/* offset to next symbol */
228*7c478bd9Sstevel@tonic-gate 	    break;
229*7c478bd9Sstevel@tonic-gate 
230*7c478bd9Sstevel@tonic-gate 	/* return from a recursion */
231*7c478bd9Sstevel@tonic-gate 	case ')':
232*7c478bd9Sstevel@tonic-gate 	    if (acs != NIL) {
233*7c478bd9Sstevel@tonic-gate 		do {
234*7c478bd9Sstevel@tonic-gate 		    temp = OCNT(acs);
235*7c478bd9Sstevel@tonic-gate 		    OCNT(acs) = ccre - acs;
236*7c478bd9Sstevel@tonic-gate 		    acs -= temp;
237*7c478bd9Sstevel@tonic-gate 		} while (temp != 0);
238*7c478bd9Sstevel@tonic-gate 		acs = NIL;
239*7c478bd9Sstevel@tonic-gate 	    }
240*7c478bd9Sstevel@tonic-gate 	    cs = ccre;
241*7c478bd9Sstevel@tonic-gate 	    *cs = META;
242*7c478bd9Sstevel@tonic-gate 	    MSYM(cs) = c;
243*7c478bd9Sstevel@tonic-gate 	    ccre = MNEXT(cs);
244*7c478bd9Sstevel@tonic-gate 	    return;
245*7c478bd9Sstevel@tonic-gate 
246*7c478bd9Sstevel@tonic-gate 	/* mark the last match sequence as having an alternate */
247*7c478bd9Sstevel@tonic-gate 	/* the third byte will contain an offset to jump over the */
248*7c478bd9Sstevel@tonic-gate 	/* alternate match in case the first did not fail */
249*7c478bd9Sstevel@tonic-gate 	case '|':
250*7c478bd9Sstevel@tonic-gate 	    if (acs != NIL && acs != cs)
251*7c478bd9Sstevel@tonic-gate 		OCNT(ccre) = ccre - acs;	/* make a back pointer */
252*7c478bd9Sstevel@tonic-gate 	    else
253*7c478bd9Sstevel@tonic-gate 		OCNT(ccre) = 0;
254*7c478bd9Sstevel@tonic-gate 	    *cs |= ALT;
255*7c478bd9Sstevel@tonic-gate 	    cs = ccre;
256*7c478bd9Sstevel@tonic-gate 	    *cs = OPER;
257*7c478bd9Sstevel@tonic-gate 	    OSYM(cs) = '|';
258*7c478bd9Sstevel@tonic-gate 	    ccre = ONEXT(cs);
259*7c478bd9Sstevel@tonic-gate 	    acs = cs;	/* remember that the pointer is to be filles */
260*7c478bd9Sstevel@tonic-gate 	    break;
261*7c478bd9Sstevel@tonic-gate 
262*7c478bd9Sstevel@tonic-gate 	/* if its not a metasymbol just build a scharacter string */
263*7c478bd9Sstevel@tonic-gate 	default:
264*7c478bd9Sstevel@tonic-gate 	    if (cs == NIL || (*cs & STR) == 0) {
265*7c478bd9Sstevel@tonic-gate 		cs = ccre;
266*7c478bd9Sstevel@tonic-gate 		*cs = STR;
267*7c478bd9Sstevel@tonic-gate 		SCNT(cs) = 1;
268*7c478bd9Sstevel@tonic-gate 		ccre = SSTR(cs);
269*7c478bd9Sstevel@tonic-gate 	    } else
270*7c478bd9Sstevel@tonic-gate 		SCNT(cs)++;
271*7c478bd9Sstevel@tonic-gate 	    *ccre++ = c;
272*7c478bd9Sstevel@tonic-gate 	    break;
273*7c478bd9Sstevel@tonic-gate 	}
274*7c478bd9Sstevel@tonic-gate     }
275*7c478bd9Sstevel@tonic-gate     if (acs != NIL) {
276*7c478bd9Sstevel@tonic-gate 	do {
277*7c478bd9Sstevel@tonic-gate 	    temp = OCNT(acs);
278*7c478bd9Sstevel@tonic-gate 	    OCNT(acs) = ccre - acs;
279*7c478bd9Sstevel@tonic-gate 	    acs -= temp;
280*7c478bd9Sstevel@tonic-gate 	} while (temp != 0);
281*7c478bd9Sstevel@tonic-gate 	acs = NIL;
282*7c478bd9Sstevel@tonic-gate     }
283*7c478bd9Sstevel@tonic-gate     return;
284*7c478bd9Sstevel@tonic-gate }
285*7c478bd9Sstevel@tonic-gate /* end of convertre */
286*7c478bd9Sstevel@tonic-gate 
287*7c478bd9Sstevel@tonic-gate 
288*7c478bd9Sstevel@tonic-gate /*
289*7c478bd9Sstevel@tonic-gate  *	The following routine recognises an irregular expresion
290*7c478bd9Sstevel@tonic-gate  *	with the following special characters:
291*7c478bd9Sstevel@tonic-gate  *
292*7c478bd9Sstevel@tonic-gate  *		\?	-	means last match was optional
293*7c478bd9Sstevel@tonic-gate  *		\a	-	matches any number of characters
294*7c478bd9Sstevel@tonic-gate  *		\d	-	matches any number of spaces and tabs
295*7c478bd9Sstevel@tonic-gate  *		\p	-	matches any number of alphanumeric
296*7c478bd9Sstevel@tonic-gate  *				characters. The
297*7c478bd9Sstevel@tonic-gate  *				characters matched will be copied into
298*7c478bd9Sstevel@tonic-gate  *				the area pointed to by 'name'.
299*7c478bd9Sstevel@tonic-gate  *		\|	-	alternation
300*7c478bd9Sstevel@tonic-gate  *		\( \)	-	grouping used mostly for alternation and
301*7c478bd9Sstevel@tonic-gate  *				optionality
302*7c478bd9Sstevel@tonic-gate  *
303*7c478bd9Sstevel@tonic-gate  *	The irregular expression must be translated to internal form
304*7c478bd9Sstevel@tonic-gate  *	prior to calling this routine
305*7c478bd9Sstevel@tonic-gate  *
306*7c478bd9Sstevel@tonic-gate  *	The value returned is the pointer to the first non \a
307*7c478bd9Sstevel@tonic-gate  *	character matched.
308*7c478bd9Sstevel@tonic-gate  */
309*7c478bd9Sstevel@tonic-gate 
310*7c478bd9Sstevel@tonic-gate boolean _escaped;		/* true if we are currently _escaped */
311*7c478bd9Sstevel@tonic-gate char *Start;			/* start of string */
312*7c478bd9Sstevel@tonic-gate 
313*7c478bd9Sstevel@tonic-gate char *
314*7c478bd9Sstevel@tonic-gate expmatch (s, re, mstring)
315*7c478bd9Sstevel@tonic-gate     register char *s;		/* string to check for a match in */
316*7c478bd9Sstevel@tonic-gate     register char *re;		/* a converted irregular expression */
317*7c478bd9Sstevel@tonic-gate     register char *mstring;	/* where to put whatever matches a \p */
318*7c478bd9Sstevel@tonic-gate {
319*7c478bd9Sstevel@tonic-gate     register char *cs;		/* the current symbol */
320*7c478bd9Sstevel@tonic-gate     register char *ptr,*s1;	/* temporary pointer */
321*7c478bd9Sstevel@tonic-gate     boolean matched;		/* a temporary boolean */
322*7c478bd9Sstevel@tonic-gate 
323*7c478bd9Sstevel@tonic-gate     /* initial conditions */
324*7c478bd9Sstevel@tonic-gate     if (re == NIL)
325*7c478bd9Sstevel@tonic-gate 	return (NIL);
326*7c478bd9Sstevel@tonic-gate     cs = re;
327*7c478bd9Sstevel@tonic-gate     matched = FALSE;
328*7c478bd9Sstevel@tonic-gate 
329*7c478bd9Sstevel@tonic-gate     /* loop till expression string is exhausted (or at least pretty tired) */
330*7c478bd9Sstevel@tonic-gate     while (*cs) {
331*7c478bd9Sstevel@tonic-gate 	switch (*cs & (OPER | STR | META)) {
332*7c478bd9Sstevel@tonic-gate 
333*7c478bd9Sstevel@tonic-gate 	/* try to match a string */
334*7c478bd9Sstevel@tonic-gate 	case STR:
335*7c478bd9Sstevel@tonic-gate 	    matched = !STRNCMP (s, SSTR(cs), SCNT(cs));
336*7c478bd9Sstevel@tonic-gate 	    if (matched) {
337*7c478bd9Sstevel@tonic-gate 
338*7c478bd9Sstevel@tonic-gate 		/* hoorah it matches */
339*7c478bd9Sstevel@tonic-gate 		s += SCNT(cs);
340*7c478bd9Sstevel@tonic-gate 		cs = SNEXT(cs);
341*7c478bd9Sstevel@tonic-gate 	    } else if (*cs & ALT) {
342*7c478bd9Sstevel@tonic-gate 
343*7c478bd9Sstevel@tonic-gate 		/* alternation, skip to next expression */
344*7c478bd9Sstevel@tonic-gate 		cs = SNEXT(cs);
345*7c478bd9Sstevel@tonic-gate 	    } else if (*cs & OPT) {
346*7c478bd9Sstevel@tonic-gate 
347*7c478bd9Sstevel@tonic-gate 		/* the match is optional */
348*7c478bd9Sstevel@tonic-gate 		cs = SNEXT(cs);
349*7c478bd9Sstevel@tonic-gate 		matched = 1;		/* indicate a successful match */
350*7c478bd9Sstevel@tonic-gate 	    } else {
351*7c478bd9Sstevel@tonic-gate 
352*7c478bd9Sstevel@tonic-gate 		/* no match, error return */
353*7c478bd9Sstevel@tonic-gate 		return (NIL);
354*7c478bd9Sstevel@tonic-gate 	    }
355*7c478bd9Sstevel@tonic-gate 	    break;
356*7c478bd9Sstevel@tonic-gate 
357*7c478bd9Sstevel@tonic-gate 	/* an operator, do something fancy */
358*7c478bd9Sstevel@tonic-gate 	case OPER:
359*7c478bd9Sstevel@tonic-gate 	    switch (OSYM(cs)) {
360*7c478bd9Sstevel@tonic-gate 
361*7c478bd9Sstevel@tonic-gate 	    /* this is an alternation */
362*7c478bd9Sstevel@tonic-gate 	    case '|':
363*7c478bd9Sstevel@tonic-gate 		if (matched)
364*7c478bd9Sstevel@tonic-gate 
365*7c478bd9Sstevel@tonic-gate 		    /* last thing in the alternation was a match, skip ahead */
366*7c478bd9Sstevel@tonic-gate 		    cs = OPTR(cs);
367*7c478bd9Sstevel@tonic-gate 		else
368*7c478bd9Sstevel@tonic-gate 
369*7c478bd9Sstevel@tonic-gate 		    /* no match, keep trying */
370*7c478bd9Sstevel@tonic-gate 		    cs = ONEXT(cs);
371*7c478bd9Sstevel@tonic-gate 		break;
372*7c478bd9Sstevel@tonic-gate 
373*7c478bd9Sstevel@tonic-gate 	    /* this is a grouping, recurse */
374*7c478bd9Sstevel@tonic-gate 	    case '(':
375*7c478bd9Sstevel@tonic-gate 		ptr = expmatch (s, ONEXT(cs), mstring);
376*7c478bd9Sstevel@tonic-gate 		if (ptr != NIL) {
377*7c478bd9Sstevel@tonic-gate 
378*7c478bd9Sstevel@tonic-gate 		    /* the subexpression matched */
379*7c478bd9Sstevel@tonic-gate 		    matched = 1;
380*7c478bd9Sstevel@tonic-gate 		    s = ptr;
381*7c478bd9Sstevel@tonic-gate 		} else if (*cs & ALT) {
382*7c478bd9Sstevel@tonic-gate 
383*7c478bd9Sstevel@tonic-gate 		    /* alternation, skip to next expression */
384*7c478bd9Sstevel@tonic-gate 		    matched = 0;
385*7c478bd9Sstevel@tonic-gate 		} else if (*cs & OPT) {
386*7c478bd9Sstevel@tonic-gate 
387*7c478bd9Sstevel@tonic-gate 		    /* the match is optional */
388*7c478bd9Sstevel@tonic-gate 		    matched = 1;	/* indicate a successful match */
389*7c478bd9Sstevel@tonic-gate 		} else {
390*7c478bd9Sstevel@tonic-gate 
391*7c478bd9Sstevel@tonic-gate 		    /* no match, error return */
392*7c478bd9Sstevel@tonic-gate 		    return (NIL);
393*7c478bd9Sstevel@tonic-gate 		}
394*7c478bd9Sstevel@tonic-gate 		cs = OPTR(cs);
395*7c478bd9Sstevel@tonic-gate 		break;
396*7c478bd9Sstevel@tonic-gate 	    }
397*7c478bd9Sstevel@tonic-gate 	    break;
398*7c478bd9Sstevel@tonic-gate 
399*7c478bd9Sstevel@tonic-gate 	/* try to match a metasymbol */
400*7c478bd9Sstevel@tonic-gate 	case META:
401*7c478bd9Sstevel@tonic-gate 	    switch (MSYM(cs)) {
402*7c478bd9Sstevel@tonic-gate 
403*7c478bd9Sstevel@tonic-gate 	    /* try to match anything and remember what was matched */
404*7c478bd9Sstevel@tonic-gate 	    case 'p':
405*7c478bd9Sstevel@tonic-gate 		/*
406*7c478bd9Sstevel@tonic-gate 		 *  This is really the same as trying the match the
407*7c478bd9Sstevel@tonic-gate 		 *  remaining parts of the expression to any subset
408*7c478bd9Sstevel@tonic-gate 		 *  of the string.
409*7c478bd9Sstevel@tonic-gate 		 */
410*7c478bd9Sstevel@tonic-gate 		s1 = s;
411*7c478bd9Sstevel@tonic-gate 		do {
412*7c478bd9Sstevel@tonic-gate 		    ptr = expmatch (s1, MNEXT(cs), mstring);
413*7c478bd9Sstevel@tonic-gate 		    if (ptr != NIL && s1 != s) {
414*7c478bd9Sstevel@tonic-gate 
415*7c478bd9Sstevel@tonic-gate 			/* we have a match, remember the match */
416*7c478bd9Sstevel@tonic-gate 			strncpy (mstring, s, s1 - s);
417*7c478bd9Sstevel@tonic-gate 			mstring[s1 - s] = '\0';
418*7c478bd9Sstevel@tonic-gate 			return (ptr);
419*7c478bd9Sstevel@tonic-gate 		    } else if (ptr != NIL && (*cs & OPT)) {
420*7c478bd9Sstevel@tonic-gate 
421*7c478bd9Sstevel@tonic-gate 			/* it was aoptional so no match is ok */
422*7c478bd9Sstevel@tonic-gate 			return (ptr);
423*7c478bd9Sstevel@tonic-gate 		    } else if (ptr != NIL) {
424*7c478bd9Sstevel@tonic-gate 
425*7c478bd9Sstevel@tonic-gate 			/* not optional and we still matched */
426*7c478bd9Sstevel@tonic-gate 			return (NIL);
427*7c478bd9Sstevel@tonic-gate 		    }
428*7c478bd9Sstevel@tonic-gate 		    if (!isidchr(*s1))
429*7c478bd9Sstevel@tonic-gate 			return (NIL);
430*7c478bd9Sstevel@tonic-gate 		    if (*s1 == '\\')
431*7c478bd9Sstevel@tonic-gate 			_escaped = _escaped ? FALSE : TRUE;
432*7c478bd9Sstevel@tonic-gate 		    else
433*7c478bd9Sstevel@tonic-gate 			_escaped = FALSE;
434*7c478bd9Sstevel@tonic-gate 		} while (*s1++);
435*7c478bd9Sstevel@tonic-gate 		return (NIL);
436*7c478bd9Sstevel@tonic-gate 
437*7c478bd9Sstevel@tonic-gate 	    /* try to match anything */
438*7c478bd9Sstevel@tonic-gate 	    case 'a':
439*7c478bd9Sstevel@tonic-gate 		/*
440*7c478bd9Sstevel@tonic-gate 		 *  This is really the same as trying the match the
441*7c478bd9Sstevel@tonic-gate 		 *  remaining parts of the expression to any subset
442*7c478bd9Sstevel@tonic-gate 		 *  of the string.
443*7c478bd9Sstevel@tonic-gate 		 */
444*7c478bd9Sstevel@tonic-gate 		s1 = s;
445*7c478bd9Sstevel@tonic-gate 		do {
446*7c478bd9Sstevel@tonic-gate 		    ptr = expmatch (s1, MNEXT(cs), mstring);
447*7c478bd9Sstevel@tonic-gate 		    if (ptr != NIL && s1 != s) {
448*7c478bd9Sstevel@tonic-gate 
449*7c478bd9Sstevel@tonic-gate 			/* we have a match */
450*7c478bd9Sstevel@tonic-gate 			return (ptr);
451*7c478bd9Sstevel@tonic-gate 		    } else if (ptr != NIL && (*cs & OPT)) {
452*7c478bd9Sstevel@tonic-gate 
453*7c478bd9Sstevel@tonic-gate 			/* it was aoptional so no match is ok */
454*7c478bd9Sstevel@tonic-gate 			return (ptr);
455*7c478bd9Sstevel@tonic-gate 		    } else if (ptr != NIL) {
456*7c478bd9Sstevel@tonic-gate 
457*7c478bd9Sstevel@tonic-gate 			/* not optional and we still matched */
458*7c478bd9Sstevel@tonic-gate 			return (NIL);
459*7c478bd9Sstevel@tonic-gate 		    }
460*7c478bd9Sstevel@tonic-gate 		    if (*s1 == '\\')
461*7c478bd9Sstevel@tonic-gate 			_escaped = _escaped ? FALSE : TRUE;
462*7c478bd9Sstevel@tonic-gate 		    else
463*7c478bd9Sstevel@tonic-gate 			_escaped = FALSE;
464*7c478bd9Sstevel@tonic-gate 		} while (*s1++);
465*7c478bd9Sstevel@tonic-gate 		return (NIL);
466*7c478bd9Sstevel@tonic-gate 
467*7c478bd9Sstevel@tonic-gate 	    /* fail if we are currently _escaped */
468*7c478bd9Sstevel@tonic-gate 	    case 'e':
469*7c478bd9Sstevel@tonic-gate 		if (_escaped)
470*7c478bd9Sstevel@tonic-gate 		    return(NIL);
471*7c478bd9Sstevel@tonic-gate 		cs = MNEXT(cs);
472*7c478bd9Sstevel@tonic-gate 		break;
473*7c478bd9Sstevel@tonic-gate 
474*7c478bd9Sstevel@tonic-gate 	    /* match any number of tabs and spaces */
475*7c478bd9Sstevel@tonic-gate 	    case 'd':
476*7c478bd9Sstevel@tonic-gate 		ptr = s;
477*7c478bd9Sstevel@tonic-gate 		while (*s == ' ' || *s == '\t')
478*7c478bd9Sstevel@tonic-gate 		    s++;
479*7c478bd9Sstevel@tonic-gate 		if (s != ptr || s == Start) {
480*7c478bd9Sstevel@tonic-gate 
481*7c478bd9Sstevel@tonic-gate 		    /* match, be happy */
482*7c478bd9Sstevel@tonic-gate 		    matched = 1;
483*7c478bd9Sstevel@tonic-gate 		    cs = MNEXT(cs);
484*7c478bd9Sstevel@tonic-gate 		} else if (*s == '\n' || *s == '\0') {
485*7c478bd9Sstevel@tonic-gate 
486*7c478bd9Sstevel@tonic-gate 		    /* match, be happy */
487*7c478bd9Sstevel@tonic-gate 		    matched = 1;
488*7c478bd9Sstevel@tonic-gate 		    cs = MNEXT(cs);
489*7c478bd9Sstevel@tonic-gate 		} else if (*cs & ALT) {
490*7c478bd9Sstevel@tonic-gate 
491*7c478bd9Sstevel@tonic-gate 		    /* try the next part */
492*7c478bd9Sstevel@tonic-gate 		    matched = 0;
493*7c478bd9Sstevel@tonic-gate 		    cs = MNEXT(cs);
494*7c478bd9Sstevel@tonic-gate 		} else if (*cs & OPT) {
495*7c478bd9Sstevel@tonic-gate 
496*7c478bd9Sstevel@tonic-gate 		    /* doesn't matter */
497*7c478bd9Sstevel@tonic-gate 		    matched = 1;
498*7c478bd9Sstevel@tonic-gate 		    cs = MNEXT(cs);
499*7c478bd9Sstevel@tonic-gate 		} else
500*7c478bd9Sstevel@tonic-gate 
501*7c478bd9Sstevel@tonic-gate 		    /* no match, error return */
502*7c478bd9Sstevel@tonic-gate 		    return (NIL);
503*7c478bd9Sstevel@tonic-gate 		break;
504*7c478bd9Sstevel@tonic-gate 
505*7c478bd9Sstevel@tonic-gate 	    /* check for end of line */
506*7c478bd9Sstevel@tonic-gate 	    case '$':
507*7c478bd9Sstevel@tonic-gate 		if (*s == '\0' || *s == '\n') {
508*7c478bd9Sstevel@tonic-gate 
509*7c478bd9Sstevel@tonic-gate 		    /* match, be happy */
510*7c478bd9Sstevel@tonic-gate 		    s++;
511*7c478bd9Sstevel@tonic-gate 		    matched = 1;
512*7c478bd9Sstevel@tonic-gate 		    cs = MNEXT(cs);
513*7c478bd9Sstevel@tonic-gate 		} else if (*cs & ALT) {
514*7c478bd9Sstevel@tonic-gate 
515*7c478bd9Sstevel@tonic-gate 		    /* try the next part */
516*7c478bd9Sstevel@tonic-gate 		    matched = 0;
517*7c478bd9Sstevel@tonic-gate 		    cs = MNEXT(cs);
518*7c478bd9Sstevel@tonic-gate 		} else if (*cs & OPT) {
519*7c478bd9Sstevel@tonic-gate 
520*7c478bd9Sstevel@tonic-gate 		    /* doesn't matter */
521*7c478bd9Sstevel@tonic-gate 		    matched = 1;
522*7c478bd9Sstevel@tonic-gate 		    cs = MNEXT(cs);
523*7c478bd9Sstevel@tonic-gate 		} else
524*7c478bd9Sstevel@tonic-gate 
525*7c478bd9Sstevel@tonic-gate 		    /* no match, error return */
526*7c478bd9Sstevel@tonic-gate 		    return (NIL);
527*7c478bd9Sstevel@tonic-gate 		break;
528*7c478bd9Sstevel@tonic-gate 
529*7c478bd9Sstevel@tonic-gate 	    /* check for start of line */
530*7c478bd9Sstevel@tonic-gate 	    case '^':
531*7c478bd9Sstevel@tonic-gate 		if (s == Start) {
532*7c478bd9Sstevel@tonic-gate 
533*7c478bd9Sstevel@tonic-gate 		    /* match, be happy */
534*7c478bd9Sstevel@tonic-gate 		    matched = 1;
535*7c478bd9Sstevel@tonic-gate 		    cs = MNEXT(cs);
536*7c478bd9Sstevel@tonic-gate 		} else if (*cs & ALT) {
537*7c478bd9Sstevel@tonic-gate 
538*7c478bd9Sstevel@tonic-gate 		    /* try the next part */
539*7c478bd9Sstevel@tonic-gate 		    matched = 0;
540*7c478bd9Sstevel@tonic-gate 		    cs = MNEXT(cs);
541*7c478bd9Sstevel@tonic-gate 		} else if (*cs & OPT) {
542*7c478bd9Sstevel@tonic-gate 
543*7c478bd9Sstevel@tonic-gate 		    /* doesn't matter */
544*7c478bd9Sstevel@tonic-gate 		    matched = 1;
545*7c478bd9Sstevel@tonic-gate 		    cs = MNEXT(cs);
546*7c478bd9Sstevel@tonic-gate 		} else
547*7c478bd9Sstevel@tonic-gate 
548*7c478bd9Sstevel@tonic-gate 		    /* no match, error return */
549*7c478bd9Sstevel@tonic-gate 		    return (NIL);
550*7c478bd9Sstevel@tonic-gate 		break;
551*7c478bd9Sstevel@tonic-gate 
552*7c478bd9Sstevel@tonic-gate 	    /* end of a subexpression, return success */
553*7c478bd9Sstevel@tonic-gate 	    case ')':
554*7c478bd9Sstevel@tonic-gate 		return (s);
555*7c478bd9Sstevel@tonic-gate 	    }
556*7c478bd9Sstevel@tonic-gate 	    break;
557*7c478bd9Sstevel@tonic-gate 	}
558*7c478bd9Sstevel@tonic-gate     }
559*7c478bd9Sstevel@tonic-gate     return (s);
560*7c478bd9Sstevel@tonic-gate }
561