xref: /freebsd/contrib/one-true-awk/b.c (revision b5253557294400621041b8ce1dfbf11e124c1575)
12a55deb1SDavid E. O'Brien /****************************************************************
22a55deb1SDavid E. O'Brien Copyright (C) Lucent Technologies 1997
32a55deb1SDavid E. O'Brien All Rights Reserved
42a55deb1SDavid E. O'Brien 
52a55deb1SDavid E. O'Brien Permission to use, copy, modify, and distribute this software and
62a55deb1SDavid E. O'Brien its documentation for any purpose and without fee is hereby
72a55deb1SDavid E. O'Brien granted, provided that the above copyright notice appear in all
82a55deb1SDavid E. O'Brien copies and that both that the copyright notice and this
92a55deb1SDavid E. O'Brien permission notice and warranty disclaimer appear in supporting
102a55deb1SDavid E. O'Brien documentation, and that the name Lucent Technologies or any of
112a55deb1SDavid E. O'Brien its entities not be used in advertising or publicity pertaining
122a55deb1SDavid E. O'Brien to distribution of the software without specific, written prior
132a55deb1SDavid E. O'Brien permission.
142a55deb1SDavid E. O'Brien 
152a55deb1SDavid E. O'Brien LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
162a55deb1SDavid E. O'Brien INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
172a55deb1SDavid E. O'Brien IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
182a55deb1SDavid E. O'Brien SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
192a55deb1SDavid E. O'Brien WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
202a55deb1SDavid E. O'Brien IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
212a55deb1SDavid E. O'Brien ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
222a55deb1SDavid E. O'Brien THIS SOFTWARE.
232a55deb1SDavid E. O'Brien ****************************************************************/
242a55deb1SDavid E. O'Brien 
25addad6afSRong-En Fan /* lasciate ogne speranza, voi ch'intrate. */
262a55deb1SDavid E. O'Brien 
27d98dd8e5SRuslan Ermilov #include <sys/cdefs.h>
28d98dd8e5SRuslan Ermilov __FBSDID("$FreeBSD$");
29d98dd8e5SRuslan Ermilov 
302a55deb1SDavid E. O'Brien #define	DEBUG
312a55deb1SDavid E. O'Brien 
322a55deb1SDavid E. O'Brien #include <ctype.h>
33*b5253557SWarner Losh #include <limits.h>
342a55deb1SDavid E. O'Brien #include <stdio.h>
352a55deb1SDavid E. O'Brien #include <string.h>
362a55deb1SDavid E. O'Brien #include <stdlib.h>
372a55deb1SDavid E. O'Brien #include "awk.h"
382a55deb1SDavid E. O'Brien #include "ytab.h"
392a55deb1SDavid E. O'Brien 
4088b8d487SRuslan Ermilov #define	HAT	(NCHARS+2)	/* matches ^ in regular expr */
412a55deb1SDavid E. O'Brien 				/* NCHARS is 2**n */
422a55deb1SDavid E. O'Brien #define MAXLIN 22
432a55deb1SDavid E. O'Brien 
442a55deb1SDavid E. O'Brien #define type(v)		(v)->nobj	/* badly overloaded here */
452a55deb1SDavid E. O'Brien #define info(v)		(v)->ntype	/* badly overloaded here */
462a55deb1SDavid E. O'Brien #define left(v)		(v)->narg[0]
472a55deb1SDavid E. O'Brien #define right(v)	(v)->narg[1]
482a55deb1SDavid E. O'Brien #define parent(v)	(v)->nnext
492a55deb1SDavid E. O'Brien 
502a55deb1SDavid E. O'Brien #define LEAF	case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
51addad6afSRong-En Fan #define ELEAF	case EMPTYRE:		/* empty string in regexp */
522a55deb1SDavid E. O'Brien #define UNARY	case STAR: case PLUS: case QUEST:
532a55deb1SDavid E. O'Brien 
542a55deb1SDavid E. O'Brien /* encoding in tree Nodes:
55addad6afSRong-En Fan 	leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE):
562a55deb1SDavid E. O'Brien 		left is index, right contains value or pointer to value
572a55deb1SDavid E. O'Brien 	unary (STAR, PLUS, QUEST): left is child, right is null
582a55deb1SDavid E. O'Brien 	binary (CAT, OR): left and right are children
592a55deb1SDavid E. O'Brien 	parent contains pointer to parent
602a55deb1SDavid E. O'Brien */
612a55deb1SDavid E. O'Brien 
622a55deb1SDavid E. O'Brien 
632a55deb1SDavid E. O'Brien int	*setvec;
642a55deb1SDavid E. O'Brien int	*tmpset;
652a55deb1SDavid E. O'Brien int	maxsetvec = 0;
662a55deb1SDavid E. O'Brien 
672a55deb1SDavid E. O'Brien int	rtok;		/* next token in current re */
682a55deb1SDavid E. O'Brien int	rlxval;
692a55deb1SDavid E. O'Brien static uschar	*rlxstr;
702a55deb1SDavid E. O'Brien static uschar	*prestr;	/* current position in current re */
712a55deb1SDavid E. O'Brien static uschar	*lastre;	/* origin of last re */
72*b5253557SWarner Losh static uschar	*lastatom;	/* origin of last Atom */
73*b5253557SWarner Losh static uschar	*starttok;
74*b5253557SWarner Losh static uschar 	*basestr;	/* starts with original, replaced during
75*b5253557SWarner Losh 				   repetition processing */
76*b5253557SWarner Losh static uschar 	*firstbasestr;
772a55deb1SDavid E. O'Brien 
782a55deb1SDavid E. O'Brien static	int setcnt;
792a55deb1SDavid E. O'Brien static	int poscnt;
802a55deb1SDavid E. O'Brien 
812a55deb1SDavid E. O'Brien char	*patbeg;
822a55deb1SDavid E. O'Brien int	patlen;
832a55deb1SDavid E. O'Brien 
842a55deb1SDavid E. O'Brien #define	NFA	20	/* cache this many dynamic fa's */
852a55deb1SDavid E. O'Brien fa	*fatab[NFA];
862a55deb1SDavid E. O'Brien int	nfatab	= 0;	/* entries in fatab */
872a55deb1SDavid E. O'Brien 
88813da98dSDavid E. O'Brien fa *makedfa(const char *s, int anchor)	/* returns dfa for reg expr s */
892a55deb1SDavid E. O'Brien {
902a55deb1SDavid E. O'Brien 	int i, use, nuse;
912a55deb1SDavid E. O'Brien 	fa *pfa;
922a55deb1SDavid E. O'Brien 	static int now = 1;
932a55deb1SDavid E. O'Brien 
94*b5253557SWarner Losh 	if (setvec == 0) {	/* first time through any RE */
952a55deb1SDavid E. O'Brien 		maxsetvec = MAXLIN;
962a55deb1SDavid E. O'Brien 		setvec = (int *) malloc(maxsetvec * sizeof(int));
972a55deb1SDavid E. O'Brien 		tmpset = (int *) malloc(maxsetvec * sizeof(int));
98*b5253557SWarner Losh 		if (setvec == 0 || tmpset == 0)
992a55deb1SDavid E. O'Brien 			overflo("out of space initializing makedfa");
1002a55deb1SDavid E. O'Brien 	}
1012a55deb1SDavid E. O'Brien 
1022a55deb1SDavid E. O'Brien 	if (compile_time)	/* a constant for sure */
1032a55deb1SDavid E. O'Brien 		return mkdfa(s, anchor);
1042a55deb1SDavid E. O'Brien 	for (i = 0; i < nfatab; i++)	/* is it there already? */
1052a55deb1SDavid E. O'Brien 		if (fatab[i]->anchor == anchor
106007c6572SDag-Erling Smørgrav 		  && strcmp((const char *) fatab[i]->restr, s) == 0) {
1072a55deb1SDavid E. O'Brien 			fatab[i]->use = now++;
1082a55deb1SDavid E. O'Brien 			return fatab[i];
1092a55deb1SDavid E. O'Brien 		}
1102a55deb1SDavid E. O'Brien 	pfa = mkdfa(s, anchor);
1112a55deb1SDavid E. O'Brien 	if (nfatab < NFA) {	/* room for another */
1122a55deb1SDavid E. O'Brien 		fatab[nfatab] = pfa;
1132a55deb1SDavid E. O'Brien 		fatab[nfatab]->use = now++;
1142a55deb1SDavid E. O'Brien 		nfatab++;
1152a55deb1SDavid E. O'Brien 		return pfa;
1162a55deb1SDavid E. O'Brien 	}
1172a55deb1SDavid E. O'Brien 	use = fatab[0]->use;	/* replace least-recently used */
1182a55deb1SDavid E. O'Brien 	nuse = 0;
1192a55deb1SDavid E. O'Brien 	for (i = 1; i < nfatab; i++)
1202a55deb1SDavid E. O'Brien 		if (fatab[i]->use < use) {
1212a55deb1SDavid E. O'Brien 			use = fatab[i]->use;
1222a55deb1SDavid E. O'Brien 			nuse = i;
1232a55deb1SDavid E. O'Brien 		}
1242a55deb1SDavid E. O'Brien 	freefa(fatab[nuse]);
1252a55deb1SDavid E. O'Brien 	fatab[nuse] = pfa;
1262a55deb1SDavid E. O'Brien 	pfa->use = now++;
1272a55deb1SDavid E. O'Brien 	return pfa;
1282a55deb1SDavid E. O'Brien }
1292a55deb1SDavid E. O'Brien 
130813da98dSDavid E. O'Brien fa *mkdfa(const char *s, int anchor)	/* does the real work of making a dfa */
1312a55deb1SDavid E. O'Brien 				/* anchor = 1 for anchored matches, else 0 */
1322a55deb1SDavid E. O'Brien {
1332a55deb1SDavid E. O'Brien 	Node *p, *p1;
1342a55deb1SDavid E. O'Brien 	fa *f;
1352a55deb1SDavid E. O'Brien 
136*b5253557SWarner Losh 	firstbasestr = (uschar *) s;
137*b5253557SWarner Losh 	basestr = firstbasestr;
1382a55deb1SDavid E. O'Brien 	p = reparse(s);
1392a55deb1SDavid E. O'Brien 	p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
1402a55deb1SDavid E. O'Brien 		/* put ALL STAR in front of reg.  exp. */
1412a55deb1SDavid E. O'Brien 	p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
1422a55deb1SDavid E. O'Brien 		/* put FINAL after reg.  exp. */
1432a55deb1SDavid E. O'Brien 
1442a55deb1SDavid E. O'Brien 	poscnt = 0;
1452a55deb1SDavid E. O'Brien 	penter(p1);	/* enter parent pointers and leaf indices */
1462a55deb1SDavid E. O'Brien 	if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
1472a55deb1SDavid E. O'Brien 		overflo("out of space for fa");
1482a55deb1SDavid E. O'Brien 	f->accept = poscnt-1;	/* penter has computed number of positions in re */
1492a55deb1SDavid E. O'Brien 	cfoll(f, p1);	/* set up follow sets */
1502a55deb1SDavid E. O'Brien 	freetr(p1);
151*b5253557SWarner Losh 	if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
1522a55deb1SDavid E. O'Brien 			overflo("out of space in makedfa");
1532a55deb1SDavid E. O'Brien 	if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
1542a55deb1SDavid E. O'Brien 		overflo("out of space in makedfa");
1552a55deb1SDavid E. O'Brien 	*f->posns[1] = 0;
1562a55deb1SDavid E. O'Brien 	f->initstat = makeinit(f, anchor);
1572a55deb1SDavid E. O'Brien 	f->anchor = anchor;
1582a55deb1SDavid E. O'Brien 	f->restr = (uschar *) tostring(s);
159*b5253557SWarner Losh 	if (firstbasestr != basestr) {
160*b5253557SWarner Losh 		if (basestr)
161*b5253557SWarner Losh 			xfree(basestr);
162*b5253557SWarner Losh 	}
1632a55deb1SDavid E. O'Brien 	return f;
1642a55deb1SDavid E. O'Brien }
1652a55deb1SDavid E. O'Brien 
1662a55deb1SDavid E. O'Brien int makeinit(fa *f, int anchor)
1672a55deb1SDavid E. O'Brien {
1682a55deb1SDavid E. O'Brien 	int i, k;
1692a55deb1SDavid E. O'Brien 
1702a55deb1SDavid E. O'Brien 	f->curstat = 2;
1712a55deb1SDavid E. O'Brien 	f->out[2] = 0;
1722a55deb1SDavid E. O'Brien 	f->reset = 0;
1732a55deb1SDavid E. O'Brien 	k = *(f->re[0].lfollow);
1742a55deb1SDavid E. O'Brien 	xfree(f->posns[2]);
175*b5253557SWarner Losh 	if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
1762a55deb1SDavid E. O'Brien 		overflo("out of space in makeinit");
1772a55deb1SDavid E. O'Brien 	for (i=0; i <= k; i++) {
1782a55deb1SDavid E. O'Brien 		(f->posns[2])[i] = (f->re[0].lfollow)[i];
1792a55deb1SDavid E. O'Brien 	}
1802a55deb1SDavid E. O'Brien 	if ((f->posns[2])[1] == f->accept)
1812a55deb1SDavid E. O'Brien 		f->out[2] = 1;
1822a55deb1SDavid E. O'Brien 	for (i=0; i < NCHARS; i++)
1832a55deb1SDavid E. O'Brien 		f->gototab[2][i] = 0;
1842a55deb1SDavid E. O'Brien 	f->curstat = cgoto(f, 2, HAT);
1852a55deb1SDavid E. O'Brien 	if (anchor) {
1862a55deb1SDavid E. O'Brien 		*f->posns[2] = k-1;	/* leave out position 0 */
1872a55deb1SDavid E. O'Brien 		for (i=0; i < k; i++) {
1882a55deb1SDavid E. O'Brien 			(f->posns[0])[i] = (f->posns[2])[i];
1892a55deb1SDavid E. O'Brien 		}
1902a55deb1SDavid E. O'Brien 
1912a55deb1SDavid E. O'Brien 		f->out[0] = f->out[2];
1922a55deb1SDavid E. O'Brien 		if (f->curstat != 2)
1932a55deb1SDavid E. O'Brien 			--(*f->posns[f->curstat]);
1942a55deb1SDavid E. O'Brien 	}
1952a55deb1SDavid E. O'Brien 	return f->curstat;
1962a55deb1SDavid E. O'Brien }
1972a55deb1SDavid E. O'Brien 
1982a55deb1SDavid E. O'Brien void penter(Node *p)	/* set up parent pointers and leaf indices */
1992a55deb1SDavid E. O'Brien {
2002a55deb1SDavid E. O'Brien 	switch (type(p)) {
201addad6afSRong-En Fan 	ELEAF
2022a55deb1SDavid E. O'Brien 	LEAF
2032a55deb1SDavid E. O'Brien 		info(p) = poscnt;
2042a55deb1SDavid E. O'Brien 		poscnt++;
2052a55deb1SDavid E. O'Brien 		break;
2062a55deb1SDavid E. O'Brien 	UNARY
2072a55deb1SDavid E. O'Brien 		penter(left(p));
2082a55deb1SDavid E. O'Brien 		parent(left(p)) = p;
2092a55deb1SDavid E. O'Brien 		break;
2102a55deb1SDavid E. O'Brien 	case CAT:
2112a55deb1SDavid E. O'Brien 	case OR:
2122a55deb1SDavid E. O'Brien 		penter(left(p));
2132a55deb1SDavid E. O'Brien 		penter(right(p));
2142a55deb1SDavid E. O'Brien 		parent(left(p)) = p;
2152a55deb1SDavid E. O'Brien 		parent(right(p)) = p;
2162a55deb1SDavid E. O'Brien 		break;
2172a55deb1SDavid E. O'Brien 	default:	/* can't happen */
2182a55deb1SDavid E. O'Brien 		FATAL("can't happen: unknown type %d in penter", type(p));
2192a55deb1SDavid E. O'Brien 		break;
2202a55deb1SDavid E. O'Brien 	}
2212a55deb1SDavid E. O'Brien }
2222a55deb1SDavid E. O'Brien 
2232a55deb1SDavid E. O'Brien void freetr(Node *p)	/* free parse tree */
2242a55deb1SDavid E. O'Brien {
2252a55deb1SDavid E. O'Brien 	switch (type(p)) {
226addad6afSRong-En Fan 	ELEAF
2272a55deb1SDavid E. O'Brien 	LEAF
2282a55deb1SDavid E. O'Brien 		xfree(p);
2292a55deb1SDavid E. O'Brien 		break;
2302a55deb1SDavid E. O'Brien 	UNARY
2312a55deb1SDavid E. O'Brien 		freetr(left(p));
2322a55deb1SDavid E. O'Brien 		xfree(p);
2332a55deb1SDavid E. O'Brien 		break;
2342a55deb1SDavid E. O'Brien 	case CAT:
2352a55deb1SDavid E. O'Brien 	case OR:
2362a55deb1SDavid E. O'Brien 		freetr(left(p));
2372a55deb1SDavid E. O'Brien 		freetr(right(p));
2382a55deb1SDavid E. O'Brien 		xfree(p);
2392a55deb1SDavid E. O'Brien 		break;
2402a55deb1SDavid E. O'Brien 	default:	/* can't happen */
2412a55deb1SDavid E. O'Brien 		FATAL("can't happen: unknown type %d in freetr", type(p));
2422a55deb1SDavid E. O'Brien 		break;
2432a55deb1SDavid E. O'Brien 	}
2442a55deb1SDavid E. O'Brien }
2452a55deb1SDavid E. O'Brien 
2462a55deb1SDavid E. O'Brien /* in the parsing of regular expressions, metacharacters like . have */
2472a55deb1SDavid E. O'Brien /* to be seen literally;  \056 is not a metacharacter. */
2482a55deb1SDavid E. O'Brien 
249d86a0988SRuslan Ermilov int hexstr(uschar **pp)	/* find and eval hex string at pp, return new p */
2502a55deb1SDavid E. O'Brien {			/* only pick up one 8-bit byte (2 chars) */
2512a55deb1SDavid E. O'Brien 	uschar *p;
2522a55deb1SDavid E. O'Brien 	int n = 0;
2532a55deb1SDavid E. O'Brien 	int i;
2542a55deb1SDavid E. O'Brien 
2552a55deb1SDavid E. O'Brien 	for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
2562a55deb1SDavid E. O'Brien 		if (isdigit(*p))
2572a55deb1SDavid E. O'Brien 			n = 16 * n + *p - '0';
2582a55deb1SDavid E. O'Brien 		else if (*p >= 'a' && *p <= 'f')
2592a55deb1SDavid E. O'Brien 			n = 16 * n + *p - 'a' + 10;
2602a55deb1SDavid E. O'Brien 		else if (*p >= 'A' && *p <= 'F')
2612a55deb1SDavid E. O'Brien 			n = 16 * n + *p - 'A' + 10;
2622a55deb1SDavid E. O'Brien 	}
263d86a0988SRuslan Ermilov 	*pp = (uschar *) p;
2642a55deb1SDavid E. O'Brien 	return n;
2652a55deb1SDavid E. O'Brien }
2662a55deb1SDavid E. O'Brien 
2672a55deb1SDavid E. O'Brien #define isoctdigit(c) ((c) >= '0' && (c) <= '7')	/* multiple use of arg */
2682a55deb1SDavid E. O'Brien 
269d86a0988SRuslan Ermilov int quoted(uschar **pp)	/* pick up next thing after a \\ */
2702a55deb1SDavid E. O'Brien 			/* and increment *pp */
2712a55deb1SDavid E. O'Brien {
272d86a0988SRuslan Ermilov 	uschar *p = *pp;
2732a55deb1SDavid E. O'Brien 	int c;
2742a55deb1SDavid E. O'Brien 
2752a55deb1SDavid E. O'Brien 	if ((c = *p++) == 't')
2762a55deb1SDavid E. O'Brien 		c = '\t';
2772a55deb1SDavid E. O'Brien 	else if (c == 'n')
2782a55deb1SDavid E. O'Brien 		c = '\n';
2792a55deb1SDavid E. O'Brien 	else if (c == 'f')
2802a55deb1SDavid E. O'Brien 		c = '\f';
2812a55deb1SDavid E. O'Brien 	else if (c == 'r')
2822a55deb1SDavid E. O'Brien 		c = '\r';
2832a55deb1SDavid E. O'Brien 	else if (c == 'b')
2842a55deb1SDavid E. O'Brien 		c = '\b';
2852a55deb1SDavid E. O'Brien 	else if (c == '\\')
2862a55deb1SDavid E. O'Brien 		c = '\\';
2872a55deb1SDavid E. O'Brien 	else if (c == 'x') {	/* hexadecimal goo follows */
2882a55deb1SDavid E. O'Brien 		c = hexstr(&p);	/* this adds a null if number is invalid */
2892a55deb1SDavid E. O'Brien 	} else if (isoctdigit(c)) {	/* \d \dd \ddd */
2902a55deb1SDavid E. O'Brien 		int n = c - '0';
2912a55deb1SDavid E. O'Brien 		if (isoctdigit(*p)) {
2922a55deb1SDavid E. O'Brien 			n = 8 * n + *p++ - '0';
2932a55deb1SDavid E. O'Brien 			if (isoctdigit(*p))
2942a55deb1SDavid E. O'Brien 				n = 8 * n + *p++ - '0';
2952a55deb1SDavid E. O'Brien 		}
2962a55deb1SDavid E. O'Brien 		c = n;
2972a55deb1SDavid E. O'Brien 	} /* else */
2982a55deb1SDavid E. O'Brien 		/* c = c; */
2992a55deb1SDavid E. O'Brien 	*pp = p;
3002a55deb1SDavid E. O'Brien 	return c;
3012a55deb1SDavid E. O'Brien }
3022a55deb1SDavid E. O'Brien 
303d98dd8e5SRuslan Ermilov static int collate_range_cmp(int a, int b)
304d98dd8e5SRuslan Ermilov {
305d98dd8e5SRuslan Ermilov 	static char s[2][2];
306d98dd8e5SRuslan Ermilov 
307d98dd8e5SRuslan Ermilov 	if ((uschar)a == (uschar)b)
308d98dd8e5SRuslan Ermilov 		return 0;
309d98dd8e5SRuslan Ermilov 	s[0][0] = a;
310d98dd8e5SRuslan Ermilov 	s[1][0] = b;
311d98dd8e5SRuslan Ermilov 	return (strcoll(s[0], s[1]));
312d98dd8e5SRuslan Ermilov }
313d98dd8e5SRuslan Ermilov 
314813da98dSDavid E. O'Brien char *cclenter(const char *argp)	/* add a character class */
3152a55deb1SDavid E. O'Brien {
3162a55deb1SDavid E. O'Brien 	int i, c, c2;
317d98dd8e5SRuslan Ermilov 	int j;
3182a55deb1SDavid E. O'Brien 	uschar *p = (uschar *) argp;
3192a55deb1SDavid E. O'Brien 	uschar *op, *bp;
320*b5253557SWarner Losh 	static uschar *buf = 0;
3212a55deb1SDavid E. O'Brien 	static int bufsz = 100;
3222a55deb1SDavid E. O'Brien 
3232a55deb1SDavid E. O'Brien 	op = p;
324*b5253557SWarner Losh 	if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
3252a55deb1SDavid E. O'Brien 		FATAL("out of space for character class [%.10s...] 1", p);
3262a55deb1SDavid E. O'Brien 	bp = buf;
3272a55deb1SDavid E. O'Brien 	for (i = 0; (c = *p++) != 0; ) {
3282a55deb1SDavid E. O'Brien 		if (c == '\\') {
329d86a0988SRuslan Ermilov 			c = quoted(&p);
3302a55deb1SDavid E. O'Brien 		} else if (c == '-' && i > 0 && bp[-1] != 0) {
3312a55deb1SDavid E. O'Brien 			if (*p != 0) {
3322a55deb1SDavid E. O'Brien 				c = bp[-1];
3332a55deb1SDavid E. O'Brien 				c2 = *p++;
3342a55deb1SDavid E. O'Brien 				if (c2 == '\\')
335d86a0988SRuslan Ermilov 					c2 = quoted(&p);
336d98dd8e5SRuslan Ermilov 				if (collate_range_cmp(c, c2) > 0) {
3372a55deb1SDavid E. O'Brien 					bp--;
3382a55deb1SDavid E. O'Brien 					i--;
3392a55deb1SDavid E. O'Brien 					continue;
3402a55deb1SDavid E. O'Brien 				}
341d98dd8e5SRuslan Ermilov 				for (j = 0; j < NCHARS; j++) {
342d98dd8e5SRuslan Ermilov 					if ((collate_range_cmp(c, j) > 0) ||
343d98dd8e5SRuslan Ermilov 					    collate_range_cmp(j, c2) > 0)
344d98dd8e5SRuslan Ermilov 						continue;
345addad6afSRong-En Fan 					if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter1"))
3462a55deb1SDavid E. O'Brien 						FATAL("out of space for character class [%.10s...] 2", p);
347d98dd8e5SRuslan Ermilov 					*bp++ = j;
3482a55deb1SDavid E. O'Brien 					i++;
3492a55deb1SDavid E. O'Brien 				}
3502a55deb1SDavid E. O'Brien 				continue;
3512a55deb1SDavid E. O'Brien 			}
3522a55deb1SDavid E. O'Brien 		}
353addad6afSRong-En Fan 		if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter2"))
3542a55deb1SDavid E. O'Brien 			FATAL("out of space for character class [%.10s...] 3", p);
3552a55deb1SDavid E. O'Brien 		*bp++ = c;
3562a55deb1SDavid E. O'Brien 		i++;
3572a55deb1SDavid E. O'Brien 	}
3582a55deb1SDavid E. O'Brien 	*bp = 0;
3592a55deb1SDavid E. O'Brien 	dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
3602a55deb1SDavid E. O'Brien 	xfree(op);
3612a55deb1SDavid E. O'Brien 	return (char *) tostring((char *) buf);
3622a55deb1SDavid E. O'Brien }
3632a55deb1SDavid E. O'Brien 
364813da98dSDavid E. O'Brien void overflo(const char *s)
3652a55deb1SDavid E. O'Brien {
3662a55deb1SDavid E. O'Brien 	FATAL("regular expression too big: %.30s...", s);
3672a55deb1SDavid E. O'Brien }
3682a55deb1SDavid E. O'Brien 
3692a55deb1SDavid E. O'Brien void cfoll(fa *f, Node *v)	/* enter follow set of each leaf of vertex v into lfollow[leaf] */
3702a55deb1SDavid E. O'Brien {
3712a55deb1SDavid E. O'Brien 	int i;
3722a55deb1SDavid E. O'Brien 	int *p;
3732a55deb1SDavid E. O'Brien 
3742a55deb1SDavid E. O'Brien 	switch (type(v)) {
375addad6afSRong-En Fan 	ELEAF
3762a55deb1SDavid E. O'Brien 	LEAF
3772a55deb1SDavid E. O'Brien 		f->re[info(v)].ltype = type(v);
3782a55deb1SDavid E. O'Brien 		f->re[info(v)].lval.np = right(v);
3792a55deb1SDavid E. O'Brien 		while (f->accept >= maxsetvec) {	/* guessing here! */
3802a55deb1SDavid E. O'Brien 			maxsetvec *= 4;
3812a55deb1SDavid E. O'Brien 			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
3822a55deb1SDavid E. O'Brien 			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
383*b5253557SWarner Losh 			if (setvec == 0 || tmpset == 0)
3842a55deb1SDavid E. O'Brien 				overflo("out of space in cfoll()");
3852a55deb1SDavid E. O'Brien 		}
3862a55deb1SDavid E. O'Brien 		for (i = 0; i <= f->accept; i++)
3872a55deb1SDavid E. O'Brien 			setvec[i] = 0;
3882a55deb1SDavid E. O'Brien 		setcnt = 0;
3892a55deb1SDavid E. O'Brien 		follow(v);	/* computes setvec and setcnt */
390*b5253557SWarner Losh 		if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
3912a55deb1SDavid E. O'Brien 			overflo("out of space building follow set");
3922a55deb1SDavid E. O'Brien 		f->re[info(v)].lfollow = p;
3932a55deb1SDavid E. O'Brien 		*p = setcnt;
3942a55deb1SDavid E. O'Brien 		for (i = f->accept; i >= 0; i--)
3952a55deb1SDavid E. O'Brien 			if (setvec[i] == 1)
3962a55deb1SDavid E. O'Brien 				*++p = i;
3972a55deb1SDavid E. O'Brien 		break;
3982a55deb1SDavid E. O'Brien 	UNARY
3992a55deb1SDavid E. O'Brien 		cfoll(f,left(v));
4002a55deb1SDavid E. O'Brien 		break;
4012a55deb1SDavid E. O'Brien 	case CAT:
4022a55deb1SDavid E. O'Brien 	case OR:
4032a55deb1SDavid E. O'Brien 		cfoll(f,left(v));
4042a55deb1SDavid E. O'Brien 		cfoll(f,right(v));
4052a55deb1SDavid E. O'Brien 		break;
4062a55deb1SDavid E. O'Brien 	default:	/* can't happen */
4072a55deb1SDavid E. O'Brien 		FATAL("can't happen: unknown type %d in cfoll", type(v));
4082a55deb1SDavid E. O'Brien 	}
4092a55deb1SDavid E. O'Brien }
4102a55deb1SDavid E. O'Brien 
4112a55deb1SDavid E. O'Brien int first(Node *p)	/* collects initially active leaves of p into setvec */
412addad6afSRong-En Fan 			/* returns 0 if p matches empty string */
4132a55deb1SDavid E. O'Brien {
4142a55deb1SDavid E. O'Brien 	int b, lp;
4152a55deb1SDavid E. O'Brien 
4162a55deb1SDavid E. O'Brien 	switch (type(p)) {
417addad6afSRong-En Fan 	ELEAF
4182a55deb1SDavid E. O'Brien 	LEAF
4192a55deb1SDavid E. O'Brien 		lp = info(p);	/* look for high-water mark of subscripts */
4202a55deb1SDavid E. O'Brien 		while (setcnt >= maxsetvec || lp >= maxsetvec) {	/* guessing here! */
4212a55deb1SDavid E. O'Brien 			maxsetvec *= 4;
4222a55deb1SDavid E. O'Brien 			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
4232a55deb1SDavid E. O'Brien 			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
424*b5253557SWarner Losh 			if (setvec == 0 || tmpset == 0)
4252a55deb1SDavid E. O'Brien 				overflo("out of space in first()");
4262a55deb1SDavid E. O'Brien 		}
427addad6afSRong-En Fan 		if (type(p) == EMPTYRE) {
428addad6afSRong-En Fan 			setvec[lp] = 0;
429addad6afSRong-En Fan 			return(0);
430addad6afSRong-En Fan 		}
4312a55deb1SDavid E. O'Brien 		if (setvec[lp] != 1) {
4322a55deb1SDavid E. O'Brien 			setvec[lp] = 1;
4332a55deb1SDavid E. O'Brien 			setcnt++;
4342a55deb1SDavid E. O'Brien 		}
4352a55deb1SDavid E. O'Brien 		if (type(p) == CCL && (*(char *) right(p)) == '\0')
4362a55deb1SDavid E. O'Brien 			return(0);		/* empty CCL */
4372a55deb1SDavid E. O'Brien 		else return(1);
4382a55deb1SDavid E. O'Brien 	case PLUS:
4392a55deb1SDavid E. O'Brien 		if (first(left(p)) == 0) return(0);
4402a55deb1SDavid E. O'Brien 		return(1);
4412a55deb1SDavid E. O'Brien 	case STAR:
4422a55deb1SDavid E. O'Brien 	case QUEST:
4432a55deb1SDavid E. O'Brien 		first(left(p));
4442a55deb1SDavid E. O'Brien 		return(0);
4452a55deb1SDavid E. O'Brien 	case CAT:
4462a55deb1SDavid E. O'Brien 		if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
4472a55deb1SDavid E. O'Brien 		return(1);
4482a55deb1SDavid E. O'Brien 	case OR:
4492a55deb1SDavid E. O'Brien 		b = first(right(p));
4502a55deb1SDavid E. O'Brien 		if (first(left(p)) == 0 || b == 0) return(0);
4512a55deb1SDavid E. O'Brien 		return(1);
4522a55deb1SDavid E. O'Brien 	}
4532a55deb1SDavid E. O'Brien 	FATAL("can't happen: unknown type %d in first", type(p));	/* can't happen */
4542a55deb1SDavid E. O'Brien 	return(-1);
4552a55deb1SDavid E. O'Brien }
4562a55deb1SDavid E. O'Brien 
4572a55deb1SDavid E. O'Brien void follow(Node *v)	/* collects leaves that can follow v into setvec */
4582a55deb1SDavid E. O'Brien {
4592a55deb1SDavid E. O'Brien 	Node *p;
4602a55deb1SDavid E. O'Brien 
4612a55deb1SDavid E. O'Brien 	if (type(v) == FINAL)
4622a55deb1SDavid E. O'Brien 		return;
4632a55deb1SDavid E. O'Brien 	p = parent(v);
4642a55deb1SDavid E. O'Brien 	switch (type(p)) {
4652a55deb1SDavid E. O'Brien 	case STAR:
4662a55deb1SDavid E. O'Brien 	case PLUS:
4672a55deb1SDavid E. O'Brien 		first(v);
4682a55deb1SDavid E. O'Brien 		follow(p);
4692a55deb1SDavid E. O'Brien 		return;
4702a55deb1SDavid E. O'Brien 
4712a55deb1SDavid E. O'Brien 	case OR:
4722a55deb1SDavid E. O'Brien 	case QUEST:
4732a55deb1SDavid E. O'Brien 		follow(p);
4742a55deb1SDavid E. O'Brien 		return;
4752a55deb1SDavid E. O'Brien 
4762a55deb1SDavid E. O'Brien 	case CAT:
4772a55deb1SDavid E. O'Brien 		if (v == left(p)) {	/* v is left child of p */
4782a55deb1SDavid E. O'Brien 			if (first(right(p)) == 0) {
4792a55deb1SDavid E. O'Brien 				follow(p);
4802a55deb1SDavid E. O'Brien 				return;
4812a55deb1SDavid E. O'Brien 			}
4822a55deb1SDavid E. O'Brien 		} else		/* v is right child */
4832a55deb1SDavid E. O'Brien 			follow(p);
4842a55deb1SDavid E. O'Brien 		return;
4852a55deb1SDavid E. O'Brien 	}
4862a55deb1SDavid E. O'Brien }
4872a55deb1SDavid E. O'Brien 
488813da98dSDavid E. O'Brien int member(int c, const char *sarg)	/* is c in s? */
4892a55deb1SDavid E. O'Brien {
4902a55deb1SDavid E. O'Brien 	uschar *s = (uschar *) sarg;
4912a55deb1SDavid E. O'Brien 
4922a55deb1SDavid E. O'Brien 	while (*s)
4932a55deb1SDavid E. O'Brien 		if (c == *s++)
4942a55deb1SDavid E. O'Brien 			return(1);
4952a55deb1SDavid E. O'Brien 	return(0);
4962a55deb1SDavid E. O'Brien }
4972a55deb1SDavid E. O'Brien 
498813da98dSDavid E. O'Brien int match(fa *f, const char *p0)	/* shortest match ? */
4992a55deb1SDavid E. O'Brien {
5002a55deb1SDavid E. O'Brien 	int s, ns;
5012a55deb1SDavid E. O'Brien 	uschar *p = (uschar *) p0;
5022a55deb1SDavid E. O'Brien 
5032a55deb1SDavid E. O'Brien 	s = f->reset ? makeinit(f,0) : f->initstat;
5042a55deb1SDavid E. O'Brien 	if (f->out[s])
5052a55deb1SDavid E. O'Brien 		return(1);
5062a55deb1SDavid E. O'Brien 	do {
507addad6afSRong-En Fan 		/* assert(*p < NCHARS); */
5082a55deb1SDavid E. O'Brien 		if ((ns = f->gototab[s][*p]) != 0)
5092a55deb1SDavid E. O'Brien 			s = ns;
5102a55deb1SDavid E. O'Brien 		else
5112a55deb1SDavid E. O'Brien 			s = cgoto(f, s, *p);
5122a55deb1SDavid E. O'Brien 		if (f->out[s])
5132a55deb1SDavid E. O'Brien 			return(1);
5142a55deb1SDavid E. O'Brien 	} while (*p++ != 0);
5152a55deb1SDavid E. O'Brien 	return(0);
5162a55deb1SDavid E. O'Brien }
5172a55deb1SDavid E. O'Brien 
518813da98dSDavid E. O'Brien int pmatch(fa *f, const char *p0)	/* longest match, for sub */
5192a55deb1SDavid E. O'Brien {
5202a55deb1SDavid E. O'Brien 	int s, ns;
5212a55deb1SDavid E. O'Brien 	uschar *p = (uschar *) p0;
5222a55deb1SDavid E. O'Brien 	uschar *q;
5232a55deb1SDavid E. O'Brien 	int i, k;
5242a55deb1SDavid E. O'Brien 
52562ebc626SRuslan Ermilov 	/* s = f->reset ? makeinit(f,1) : f->initstat; */
52662ebc626SRuslan Ermilov 	if (f->reset) {
52762ebc626SRuslan Ermilov 		f->initstat = s = makeinit(f,1);
52862ebc626SRuslan Ermilov 	} else {
52962ebc626SRuslan Ermilov 		s = f->initstat;
53062ebc626SRuslan Ermilov 	}
5312a55deb1SDavid E. O'Brien 	patbeg = (char *) p;
5322a55deb1SDavid E. O'Brien 	patlen = -1;
5332a55deb1SDavid E. O'Brien 	do {
5342a55deb1SDavid E. O'Brien 		q = p;
5352a55deb1SDavid E. O'Brien 		do {
5362a55deb1SDavid E. O'Brien 			if (f->out[s])		/* final state */
5372a55deb1SDavid E. O'Brien 				patlen = q-p;
538addad6afSRong-En Fan 			/* assert(*q < NCHARS); */
5392a55deb1SDavid E. O'Brien 			if ((ns = f->gototab[s][*q]) != 0)
5402a55deb1SDavid E. O'Brien 				s = ns;
5412a55deb1SDavid E. O'Brien 			else
5422a55deb1SDavid E. O'Brien 				s = cgoto(f, s, *q);
5432a55deb1SDavid E. O'Brien 			if (s == 1) {	/* no transition */
5442a55deb1SDavid E. O'Brien 				if (patlen >= 0) {
5452a55deb1SDavid E. O'Brien 					patbeg = (char *) p;
5462a55deb1SDavid E. O'Brien 					return(1);
5472a55deb1SDavid E. O'Brien 				}
5482a55deb1SDavid E. O'Brien 				else
5492a55deb1SDavid E. O'Brien 					goto nextin;	/* no match */
5502a55deb1SDavid E. O'Brien 			}
5512a55deb1SDavid E. O'Brien 		} while (*q++ != 0);
5522a55deb1SDavid E. O'Brien 		if (f->out[s])
5532a55deb1SDavid E. O'Brien 			patlen = q-p-1;	/* don't count $ */
5542a55deb1SDavid E. O'Brien 		if (patlen >= 0) {
5552a55deb1SDavid E. O'Brien 			patbeg = (char *) p;
5562a55deb1SDavid E. O'Brien 			return(1);
5572a55deb1SDavid E. O'Brien 		}
5582a55deb1SDavid E. O'Brien 	nextin:
5592a55deb1SDavid E. O'Brien 		s = 2;
5602a55deb1SDavid E. O'Brien 		if (f->reset) {
5612a55deb1SDavid E. O'Brien 			for (i = 2; i <= f->curstat; i++)
5622a55deb1SDavid E. O'Brien 				xfree(f->posns[i]);
5632a55deb1SDavid E. O'Brien 			k = *f->posns[0];
564*b5253557SWarner Losh 			if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
5652a55deb1SDavid E. O'Brien 				overflo("out of space in pmatch");
5662a55deb1SDavid E. O'Brien 			for (i = 0; i <= k; i++)
5672a55deb1SDavid E. O'Brien 				(f->posns[2])[i] = (f->posns[0])[i];
5682a55deb1SDavid E. O'Brien 			f->initstat = f->curstat = 2;
5692a55deb1SDavid E. O'Brien 			f->out[2] = f->out[0];
5702a55deb1SDavid E. O'Brien 			for (i = 0; i < NCHARS; i++)
5712a55deb1SDavid E. O'Brien 				f->gototab[2][i] = 0;
5722a55deb1SDavid E. O'Brien 		}
5732a55deb1SDavid E. O'Brien 	} while (*p++ != 0);
5742a55deb1SDavid E. O'Brien 	return (0);
5752a55deb1SDavid E. O'Brien }
5762a55deb1SDavid E. O'Brien 
577813da98dSDavid E. O'Brien int nematch(fa *f, const char *p0)	/* non-empty match, for sub */
5782a55deb1SDavid E. O'Brien {
5792a55deb1SDavid E. O'Brien 	int s, ns;
5802a55deb1SDavid E. O'Brien 	uschar *p = (uschar *) p0;
5812a55deb1SDavid E. O'Brien 	uschar *q;
5822a55deb1SDavid E. O'Brien 	int i, k;
5832a55deb1SDavid E. O'Brien 
58462ebc626SRuslan Ermilov 	/* s = f->reset ? makeinit(f,1) : f->initstat; */
58562ebc626SRuslan Ermilov 	if (f->reset) {
58662ebc626SRuslan Ermilov 		f->initstat = s = makeinit(f,1);
58762ebc626SRuslan Ermilov 	} else {
58862ebc626SRuslan Ermilov 		s = f->initstat;
58962ebc626SRuslan Ermilov 	}
5902a55deb1SDavid E. O'Brien 	patlen = -1;
5912a55deb1SDavid E. O'Brien 	while (*p) {
5922a55deb1SDavid E. O'Brien 		q = p;
5932a55deb1SDavid E. O'Brien 		do {
5942a55deb1SDavid E. O'Brien 			if (f->out[s])		/* final state */
5952a55deb1SDavid E. O'Brien 				patlen = q-p;
596addad6afSRong-En Fan 			/* assert(*q < NCHARS); */
5972a55deb1SDavid E. O'Brien 			if ((ns = f->gototab[s][*q]) != 0)
5982a55deb1SDavid E. O'Brien 				s = ns;
5992a55deb1SDavid E. O'Brien 			else
6002a55deb1SDavid E. O'Brien 				s = cgoto(f, s, *q);
6012a55deb1SDavid E. O'Brien 			if (s == 1) {	/* no transition */
6022a55deb1SDavid E. O'Brien 				if (patlen > 0) {
6032a55deb1SDavid E. O'Brien 					patbeg = (char *) p;
6042a55deb1SDavid E. O'Brien 					return(1);
6052a55deb1SDavid E. O'Brien 				} else
6062a55deb1SDavid E. O'Brien 					goto nnextin;	/* no nonempty match */
6072a55deb1SDavid E. O'Brien 			}
6082a55deb1SDavid E. O'Brien 		} while (*q++ != 0);
6092a55deb1SDavid E. O'Brien 		if (f->out[s])
6102a55deb1SDavid E. O'Brien 			patlen = q-p-1;	/* don't count $ */
6112a55deb1SDavid E. O'Brien 		if (patlen > 0 ) {
6122a55deb1SDavid E. O'Brien 			patbeg = (char *) p;
6132a55deb1SDavid E. O'Brien 			return(1);
6142a55deb1SDavid E. O'Brien 		}
6152a55deb1SDavid E. O'Brien 	nnextin:
6162a55deb1SDavid E. O'Brien 		s = 2;
6172a55deb1SDavid E. O'Brien 		if (f->reset) {
6182a55deb1SDavid E. O'Brien 			for (i = 2; i <= f->curstat; i++)
6192a55deb1SDavid E. O'Brien 				xfree(f->posns[i]);
6202a55deb1SDavid E. O'Brien 			k = *f->posns[0];
621*b5253557SWarner Losh 			if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
6222a55deb1SDavid E. O'Brien 				overflo("out of state space");
6232a55deb1SDavid E. O'Brien 			for (i = 0; i <= k; i++)
6242a55deb1SDavid E. O'Brien 				(f->posns[2])[i] = (f->posns[0])[i];
6252a55deb1SDavid E. O'Brien 			f->initstat = f->curstat = 2;
6262a55deb1SDavid E. O'Brien 			f->out[2] = f->out[0];
6272a55deb1SDavid E. O'Brien 			for (i = 0; i < NCHARS; i++)
6282a55deb1SDavid E. O'Brien 				f->gototab[2][i] = 0;
6292a55deb1SDavid E. O'Brien 		}
6302a55deb1SDavid E. O'Brien 		p++;
6312a55deb1SDavid E. O'Brien 	}
6322a55deb1SDavid E. O'Brien 	return (0);
6332a55deb1SDavid E. O'Brien }
6342a55deb1SDavid E. O'Brien 
635813da98dSDavid E. O'Brien Node *reparse(const char *p)	/* parses regular expression pointed to by p */
6362a55deb1SDavid E. O'Brien {			/* uses relex() to scan regular expression */
6372a55deb1SDavid E. O'Brien 	Node *np;
6382a55deb1SDavid E. O'Brien 
6392a55deb1SDavid E. O'Brien 	dprintf( ("reparse <%s>\n", p) );
6402a55deb1SDavid E. O'Brien 	lastre = prestr = (uschar *) p;	/* prestr points to string to be parsed */
6412a55deb1SDavid E. O'Brien 	rtok = relex();
642813da98dSDavid E. O'Brien 	/* GNU compatibility: an empty regexp matches anything */
643addad6afSRong-En Fan 	if (rtok == '\0') {
644813da98dSDavid E. O'Brien 		/* FATAL("empty regular expression"); previous */
645addad6afSRong-En Fan 		return(op2(EMPTYRE, NIL, NIL));
646addad6afSRong-En Fan 	}
6472a55deb1SDavid E. O'Brien 	np = regexp();
6482a55deb1SDavid E. O'Brien 	if (rtok != '\0')
6492a55deb1SDavid E. O'Brien 		FATAL("syntax error in regular expression %s at %s", lastre, prestr);
6502a55deb1SDavid E. O'Brien 	return(np);
6512a55deb1SDavid E. O'Brien }
6522a55deb1SDavid E. O'Brien 
6532a55deb1SDavid E. O'Brien Node *regexp(void)	/* top-level parse of reg expr */
6542a55deb1SDavid E. O'Brien {
6552a55deb1SDavid E. O'Brien 	return (alt(concat(primary())));
6562a55deb1SDavid E. O'Brien }
6572a55deb1SDavid E. O'Brien 
6582a55deb1SDavid E. O'Brien Node *primary(void)
6592a55deb1SDavid E. O'Brien {
6602a55deb1SDavid E. O'Brien 	Node *np;
661*b5253557SWarner Losh 	int savelastatom;
6622a55deb1SDavid E. O'Brien 
6632a55deb1SDavid E. O'Brien 	switch (rtok) {
6642a55deb1SDavid E. O'Brien 	case CHAR:
665*b5253557SWarner Losh 		lastatom = starttok;
6662a55deb1SDavid E. O'Brien 		np = op2(CHAR, NIL, itonp(rlxval));
6672a55deb1SDavid E. O'Brien 		rtok = relex();
6682a55deb1SDavid E. O'Brien 		return (unary(np));
6692a55deb1SDavid E. O'Brien 	case ALL:
6702a55deb1SDavid E. O'Brien 		rtok = relex();
6712a55deb1SDavid E. O'Brien 		return (unary(op2(ALL, NIL, NIL)));
672addad6afSRong-En Fan 	case EMPTYRE:
673addad6afSRong-En Fan 		rtok = relex();
674*b5253557SWarner Losh 		return (unary(op2(EMPTYRE, NIL, NIL)));
6752a55deb1SDavid E. O'Brien 	case DOT:
676*b5253557SWarner Losh 		lastatom = starttok;
6772a55deb1SDavid E. O'Brien 		rtok = relex();
6782a55deb1SDavid E. O'Brien 		return (unary(op2(DOT, NIL, NIL)));
6792a55deb1SDavid E. O'Brien 	case CCL:
6802a55deb1SDavid E. O'Brien 		np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
681*b5253557SWarner Losh 		lastatom = starttok;
6822a55deb1SDavid E. O'Brien 		rtok = relex();
6832a55deb1SDavid E. O'Brien 		return (unary(np));
6842a55deb1SDavid E. O'Brien 	case NCCL:
6852a55deb1SDavid E. O'Brien 		np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
686*b5253557SWarner Losh 		lastatom = starttok;
6872a55deb1SDavid E. O'Brien 		rtok = relex();
6882a55deb1SDavid E. O'Brien 		return (unary(np));
6892a55deb1SDavid E. O'Brien 	case '^':
6902a55deb1SDavid E. O'Brien 		rtok = relex();
6912a55deb1SDavid E. O'Brien 		return (unary(op2(CHAR, NIL, itonp(HAT))));
6922a55deb1SDavid E. O'Brien 	case '$':
6932a55deb1SDavid E. O'Brien 		rtok = relex();
6942a55deb1SDavid E. O'Brien 		return (unary(op2(CHAR, NIL, NIL)));
6952a55deb1SDavid E. O'Brien 	case '(':
696*b5253557SWarner Losh 		lastatom = starttok;
697*b5253557SWarner Losh 		savelastatom = starttok - basestr; /* Retain over recursion */
6982a55deb1SDavid E. O'Brien 		rtok = relex();
6992a55deb1SDavid E. O'Brien 		if (rtok == ')') {	/* special pleading for () */
7002a55deb1SDavid E. O'Brien 			rtok = relex();
7012a55deb1SDavid E. O'Brien 			return unary(op2(CCL, NIL, (Node *) tostring("")));
7022a55deb1SDavid E. O'Brien 		}
7032a55deb1SDavid E. O'Brien 		np = regexp();
7042a55deb1SDavid E. O'Brien 		if (rtok == ')') {
705*b5253557SWarner Losh 			lastatom = basestr + savelastatom; /* Restore */
7062a55deb1SDavid E. O'Brien 			rtok = relex();
7072a55deb1SDavid E. O'Brien 			return (unary(np));
7082a55deb1SDavid E. O'Brien 		}
7092a55deb1SDavid E. O'Brien 		else
7102a55deb1SDavid E. O'Brien 			FATAL("syntax error in regular expression %s at %s", lastre, prestr);
7112a55deb1SDavid E. O'Brien 	default:
7122a55deb1SDavid E. O'Brien 		FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
7132a55deb1SDavid E. O'Brien 	}
7142a55deb1SDavid E. O'Brien 	return 0;	/*NOTREACHED*/
7152a55deb1SDavid E. O'Brien }
7162a55deb1SDavid E. O'Brien 
7172a55deb1SDavid E. O'Brien Node *concat(Node *np)
7182a55deb1SDavid E. O'Brien {
7192a55deb1SDavid E. O'Brien 	switch (rtok) {
720*b5253557SWarner Losh 	case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
7212a55deb1SDavid E. O'Brien 		return (concat(op2(CAT, np, primary())));
722*b5253557SWarner Losh 	case EMPTYRE:
723*b5253557SWarner Losh 		rtok = relex();
724*b5253557SWarner Losh 		return (concat(op2(CAT, op2(CCL, NIL, (Node *) tostring("")),
725*b5253557SWarner Losh 				primary())));
7262a55deb1SDavid E. O'Brien 	}
7272a55deb1SDavid E. O'Brien 	return (np);
7282a55deb1SDavid E. O'Brien }
7292a55deb1SDavid E. O'Brien 
7302a55deb1SDavid E. O'Brien Node *alt(Node *np)
7312a55deb1SDavid E. O'Brien {
7322a55deb1SDavid E. O'Brien 	if (rtok == OR) {
7332a55deb1SDavid E. O'Brien 		rtok = relex();
7342a55deb1SDavid E. O'Brien 		return (alt(op2(OR, np, concat(primary()))));
7352a55deb1SDavid E. O'Brien 	}
7362a55deb1SDavid E. O'Brien 	return (np);
7372a55deb1SDavid E. O'Brien }
7382a55deb1SDavid E. O'Brien 
7392a55deb1SDavid E. O'Brien Node *unary(Node *np)
7402a55deb1SDavid E. O'Brien {
7412a55deb1SDavid E. O'Brien 	switch (rtok) {
7422a55deb1SDavid E. O'Brien 	case STAR:
7432a55deb1SDavid E. O'Brien 		rtok = relex();
7442a55deb1SDavid E. O'Brien 		return (unary(op2(STAR, np, NIL)));
7452a55deb1SDavid E. O'Brien 	case PLUS:
7462a55deb1SDavid E. O'Brien 		rtok = relex();
7472a55deb1SDavid E. O'Brien 		return (unary(op2(PLUS, np, NIL)));
7482a55deb1SDavid E. O'Brien 	case QUEST:
7492a55deb1SDavid E. O'Brien 		rtok = relex();
7502a55deb1SDavid E. O'Brien 		return (unary(op2(QUEST, np, NIL)));
7512a55deb1SDavid E. O'Brien 	default:
7522a55deb1SDavid E. O'Brien 		return (np);
7532a55deb1SDavid E. O'Brien 	}
7542a55deb1SDavid E. O'Brien }
7552a55deb1SDavid E. O'Brien 
756007c6572SDag-Erling Smørgrav /*
757007c6572SDag-Erling Smørgrav  * Character class definitions conformant to the POSIX locale as
758007c6572SDag-Erling Smørgrav  * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
759007c6572SDag-Erling Smørgrav  * and operating character sets are both ASCII (ISO646) or supersets
760007c6572SDag-Erling Smørgrav  * thereof.
761007c6572SDag-Erling Smørgrav  *
762007c6572SDag-Erling Smørgrav  * Note that to avoid overflowing the temporary buffer used in
763007c6572SDag-Erling Smørgrav  * relex(), the expanded character class (prior to range expansion)
764007c6572SDag-Erling Smørgrav  * must be less than twice the size of their full name.
765007c6572SDag-Erling Smørgrav  */
766fc6b1dfeSDavid E. O'Brien 
767fc6b1dfeSDavid E. O'Brien /* Because isblank doesn't show up in any of the header files on any
768fc6b1dfeSDavid E. O'Brien  * system i use, it's defined here.  if some other locale has a richer
769fc6b1dfeSDavid E. O'Brien  * definition of "blank", define HAS_ISBLANK and provide your own
770fc6b1dfeSDavid E. O'Brien  * version.
77188b8d487SRuslan Ermilov  * the parentheses here are an attempt to find a path through the maze
77288b8d487SRuslan Ermilov  * of macro definition and/or function and/or version provided.  thanks
77388b8d487SRuslan Ermilov  * to nelson beebe for the suggestion; let's see if it works everywhere.
774fc6b1dfeSDavid E. O'Brien  */
775fc6b1dfeSDavid E. O'Brien 
77691217c1cSRuslan Ermilov /* #define HAS_ISBLANK */
777fc6b1dfeSDavid E. O'Brien #ifndef HAS_ISBLANK
778fc6b1dfeSDavid E. O'Brien 
7791b11b783SRuslan Ermilov int (xisblank)(int c)
780fc6b1dfeSDavid E. O'Brien {
781fc6b1dfeSDavid E. O'Brien 	return c==' ' || c=='\t';
782fc6b1dfeSDavid E. O'Brien }
783fc6b1dfeSDavid E. O'Brien 
784fc6b1dfeSDavid E. O'Brien #endif
785fc6b1dfeSDavid E. O'Brien 
786007c6572SDag-Erling Smørgrav struct charclass {
787007c6572SDag-Erling Smørgrav 	const char *cc_name;
788007c6572SDag-Erling Smørgrav 	int cc_namelen;
789fc6b1dfeSDavid E. O'Brien 	int (*cc_func)(int);
790007c6572SDag-Erling Smørgrav } charclasses[] = {
791fc6b1dfeSDavid E. O'Brien 	{ "alnum",	5,	isalnum },
792fc6b1dfeSDavid E. O'Brien 	{ "alpha",	5,	isalpha },
7931b11b783SRuslan Ermilov #ifndef HAS_ISBLANK
794*b5253557SWarner Losh 	{ "blank",	5,	xisblank },
7951b11b783SRuslan Ermilov #else
796fc6b1dfeSDavid E. O'Brien 	{ "blank",	5,	isblank },
7971b11b783SRuslan Ermilov #endif
798fc6b1dfeSDavid E. O'Brien 	{ "cntrl",	5,	iscntrl },
799fc6b1dfeSDavid E. O'Brien 	{ "digit",	5,	isdigit },
800fc6b1dfeSDavid E. O'Brien 	{ "graph",	5,	isgraph },
801fc6b1dfeSDavid E. O'Brien 	{ "lower",	5,	islower },
802fc6b1dfeSDavid E. O'Brien 	{ "print",	5,	isprint },
803fc6b1dfeSDavid E. O'Brien 	{ "punct",	5,	ispunct },
804fc6b1dfeSDavid E. O'Brien 	{ "space",	5,	isspace },
805fc6b1dfeSDavid E. O'Brien 	{ "upper",	5,	isupper },
806fc6b1dfeSDavid E. O'Brien 	{ "xdigit",	6,	isxdigit },
807007c6572SDag-Erling Smørgrav 	{ NULL,		0,	NULL },
808007c6572SDag-Erling Smørgrav };
809007c6572SDag-Erling Smørgrav 
810*b5253557SWarner Losh #define REPEAT_SIMPLE		0
811*b5253557SWarner Losh #define REPEAT_PLUS_APPENDED	1
812*b5253557SWarner Losh #define REPEAT_WITH_Q		2
813*b5253557SWarner Losh #define REPEAT_ZERO		3
814*b5253557SWarner Losh 
815*b5253557SWarner Losh static int
816*b5253557SWarner Losh replace_repeat(const uschar *reptok, int reptoklen, const uschar *atom,
817*b5253557SWarner Losh 	       int atomlen, int firstnum, int secondnum, int special_case)
818*b5253557SWarner Losh {
819*b5253557SWarner Losh 	int i, j;
820*b5253557SWarner Losh 	uschar *buf = 0;
821*b5253557SWarner Losh 	int ret = 1;
822*b5253557SWarner Losh 	int init_q = (firstnum==0);		/* first added char will be ? */
823*b5253557SWarner Losh 	int n_q_reps = secondnum-firstnum;	/* m>n, so reduce until {1,m-n} left  */
824*b5253557SWarner Losh 	int prefix_length = reptok - basestr;	/* prefix includes first rep	*/
825*b5253557SWarner Losh 	int suffix_length = strlen((char *) reptok) - reptoklen;	/* string after rep specifier	*/
826*b5253557SWarner Losh 	int size = prefix_length +  suffix_length;
827*b5253557SWarner Losh 
828*b5253557SWarner Losh 	if (firstnum > 1) {	/* add room for reps 2 through firstnum */
829*b5253557SWarner Losh 		size += atomlen*(firstnum-1);
830*b5253557SWarner Losh 	}
831*b5253557SWarner Losh 
832*b5253557SWarner Losh 	/* Adjust size of buffer for special cases */
833*b5253557SWarner Losh 	if (special_case == REPEAT_PLUS_APPENDED) {
834*b5253557SWarner Losh 		size++;		/* for the final + */
835*b5253557SWarner Losh 	} else if (special_case == REPEAT_WITH_Q) {
836*b5253557SWarner Losh 		size += init_q + (atomlen+1)* n_q_reps;
837*b5253557SWarner Losh 	} else if (special_case == REPEAT_ZERO) {
838*b5253557SWarner Losh 		size += 2;	/* just a null ERE: () */
839*b5253557SWarner Losh 	}
840*b5253557SWarner Losh 	if ((buf = (uschar *) malloc(size+1)) == NULL)
841*b5253557SWarner Losh 		FATAL("out of space in reg expr %.10s..", lastre);
842*b5253557SWarner Losh 	memcpy(buf, basestr, prefix_length);	/* copy prefix	*/
843*b5253557SWarner Losh 	j = prefix_length;
844*b5253557SWarner Losh 	if (special_case == REPEAT_ZERO) {
845*b5253557SWarner Losh 		j -= atomlen;
846*b5253557SWarner Losh 		buf[j++] = '(';
847*b5253557SWarner Losh 		buf[j++] = ')';
848*b5253557SWarner Losh 	}
849*b5253557SWarner Losh 	for (i=1; i < firstnum; i++) {		/* copy x reps 	*/
850*b5253557SWarner Losh 		memcpy(&buf[j], atom, atomlen);
851*b5253557SWarner Losh 		j += atomlen;
852*b5253557SWarner Losh 	}
853*b5253557SWarner Losh 	if (special_case == REPEAT_PLUS_APPENDED) {
854*b5253557SWarner Losh 		buf[j++] = '+';
855*b5253557SWarner Losh 	} else if (special_case == REPEAT_WITH_Q) {
856*b5253557SWarner Losh 		if (init_q) buf[j++] = '?';
857*b5253557SWarner Losh 		for (i=0; i < n_q_reps; i++) {	/* copy x? reps */
858*b5253557SWarner Losh 			memcpy(&buf[j], atom, atomlen);
859*b5253557SWarner Losh 			j += atomlen;
860*b5253557SWarner Losh 			buf[j++] = '?';
861*b5253557SWarner Losh 		}
862*b5253557SWarner Losh 	}
863*b5253557SWarner Losh 	memcpy(&buf[j], reptok+reptoklen, suffix_length);
864*b5253557SWarner Losh 	if (special_case == REPEAT_ZERO) {
865*b5253557SWarner Losh 		buf[j+suffix_length] = '\0';
866*b5253557SWarner Losh 	} else {
867*b5253557SWarner Losh 		buf[size] = '\0';
868*b5253557SWarner Losh 	}
869*b5253557SWarner Losh 	/* free old basestr */
870*b5253557SWarner Losh 	if (firstbasestr != basestr) {
871*b5253557SWarner Losh 		if (basestr)
872*b5253557SWarner Losh 			xfree(basestr);
873*b5253557SWarner Losh 	}
874*b5253557SWarner Losh 	basestr = buf;
875*b5253557SWarner Losh 	prestr  = buf + prefix_length;
876*b5253557SWarner Losh 	if (special_case == REPEAT_ZERO) {
877*b5253557SWarner Losh 		prestr  -= atomlen;
878*b5253557SWarner Losh 		ret++;
879*b5253557SWarner Losh 	}
880*b5253557SWarner Losh 	return ret;
881*b5253557SWarner Losh }
882*b5253557SWarner Losh 
883*b5253557SWarner Losh static int repeat(const uschar *reptok, int reptoklen, const uschar *atom,
884*b5253557SWarner Losh 		  int atomlen, int firstnum, int secondnum)
885*b5253557SWarner Losh {
886*b5253557SWarner Losh 	/*
887*b5253557SWarner Losh 	   In general, the repetition specifier or "bound" is replaced here
888*b5253557SWarner Losh 	   by an equivalent ERE string, repeating the immediately previous atom
889*b5253557SWarner Losh 	   and appending ? and + as needed. Note that the first copy of the
890*b5253557SWarner Losh 	   atom is left in place, except in the special_case of a zero-repeat
891*b5253557SWarner Losh 	   (i.e., {0}).
892*b5253557SWarner Losh 	 */
893*b5253557SWarner Losh 	if (secondnum < 0) {	/* means {n,} -> repeat n-1 times followed by PLUS */
894*b5253557SWarner Losh 		if (firstnum < 2) {
895*b5253557SWarner Losh 			/* 0 or 1: should be handled before you get here */
896*b5253557SWarner Losh 			FATAL("internal error");
897*b5253557SWarner Losh 		} else {
898*b5253557SWarner Losh 			return replace_repeat(reptok, reptoklen, atom, atomlen,
899*b5253557SWarner Losh 				firstnum, secondnum, REPEAT_PLUS_APPENDED);
900*b5253557SWarner Losh 		}
901*b5253557SWarner Losh 	} else if (firstnum == secondnum) {	/* {n} or {n,n} -> simply repeat n-1 times */
902*b5253557SWarner Losh 		if (firstnum == 0) {	/* {0} or {0,0} */
903*b5253557SWarner Losh 			/* This case is unusual because the resulting
904*b5253557SWarner Losh 			   replacement string might actually be SMALLER than
905*b5253557SWarner Losh 			   the original ERE */
906*b5253557SWarner Losh 			return replace_repeat(reptok, reptoklen, atom, atomlen,
907*b5253557SWarner Losh 					firstnum, secondnum, REPEAT_ZERO);
908*b5253557SWarner Losh 		} else {		/* (firstnum >= 1) */
909*b5253557SWarner Losh 			return replace_repeat(reptok, reptoklen, atom, atomlen,
910*b5253557SWarner Losh 					firstnum, secondnum, REPEAT_SIMPLE);
911*b5253557SWarner Losh 		}
912*b5253557SWarner Losh 	} else if (firstnum < secondnum) {	/* {n,m} -> repeat n-1 times then alternate  */
913*b5253557SWarner Losh 		/*  x{n,m}  =>  xx...x{1, m-n+1}  =>  xx...x?x?x?..x?	*/
914*b5253557SWarner Losh 		return replace_repeat(reptok, reptoklen, atom, atomlen,
915*b5253557SWarner Losh 					firstnum, secondnum, REPEAT_WITH_Q);
916*b5253557SWarner Losh 	} else {	/* Error - shouldn't be here (n>m) */
917*b5253557SWarner Losh 		FATAL("internal error");
918*b5253557SWarner Losh 	}
919*b5253557SWarner Losh 	return 0;
920*b5253557SWarner Losh }
921007c6572SDag-Erling Smørgrav 
9222a55deb1SDavid E. O'Brien int relex(void)		/* lexical analyzer for reparse */
9232a55deb1SDavid E. O'Brien {
9242a55deb1SDavid E. O'Brien 	int c, n;
9252a55deb1SDavid E. O'Brien 	int cflag;
926*b5253557SWarner Losh 	static uschar *buf = 0;
9272a55deb1SDavid E. O'Brien 	static int bufsz = 100;
9282a55deb1SDavid E. O'Brien 	uschar *bp;
929007c6572SDag-Erling Smørgrav 	struct charclass *cc;
930fc6b1dfeSDavid E. O'Brien 	int i;
931*b5253557SWarner Losh 	int num, m, commafound, digitfound;
932*b5253557SWarner Losh 	const uschar *startreptok;
933*b5253557SWarner Losh 
934*b5253557SWarner Losh rescan:
935*b5253557SWarner Losh 	starttok = prestr;
9362a55deb1SDavid E. O'Brien 
9372a55deb1SDavid E. O'Brien 	switch (c = *prestr++) {
9382a55deb1SDavid E. O'Brien 	case '|': return OR;
9392a55deb1SDavid E. O'Brien 	case '*': return STAR;
9402a55deb1SDavid E. O'Brien 	case '+': return PLUS;
9412a55deb1SDavid E. O'Brien 	case '?': return QUEST;
9422a55deb1SDavid E. O'Brien 	case '.': return DOT;
9432a55deb1SDavid E. O'Brien 	case '\0': prestr--; return '\0';
9442a55deb1SDavid E. O'Brien 	case '^':
9452a55deb1SDavid E. O'Brien 	case '$':
9462a55deb1SDavid E. O'Brien 	case '(':
9472a55deb1SDavid E. O'Brien 	case ')':
9482a55deb1SDavid E. O'Brien 		return c;
9492a55deb1SDavid E. O'Brien 	case '\\':
950d86a0988SRuslan Ermilov 		rlxval = quoted(&prestr);
9512a55deb1SDavid E. O'Brien 		return CHAR;
9522a55deb1SDavid E. O'Brien 	default:
9532a55deb1SDavid E. O'Brien 		rlxval = c;
9542a55deb1SDavid E. O'Brien 		return CHAR;
9552a55deb1SDavid E. O'Brien 	case '[':
956*b5253557SWarner Losh 		if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
9572a55deb1SDavid E. O'Brien 			FATAL("out of space in reg expr %.10s..", lastre);
9582a55deb1SDavid E. O'Brien 		bp = buf;
9592a55deb1SDavid E. O'Brien 		if (*prestr == '^') {
9602a55deb1SDavid E. O'Brien 			cflag = 1;
9612a55deb1SDavid E. O'Brien 			prestr++;
9622a55deb1SDavid E. O'Brien 		}
9632a55deb1SDavid E. O'Brien 		else
9642a55deb1SDavid E. O'Brien 			cflag = 0;
965007c6572SDag-Erling Smørgrav 		n = 2 * strlen((const char *) prestr)+1;
966addad6afSRong-En Fan 		if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, "relex1"))
9672a55deb1SDavid E. O'Brien 			FATAL("out of space for reg expr %.10s...", lastre);
9682a55deb1SDavid E. O'Brien 		for (; ; ) {
9692a55deb1SDavid E. O'Brien 			if ((c = *prestr++) == '\\') {
9702a55deb1SDavid E. O'Brien 				*bp++ = '\\';
9712a55deb1SDavid E. O'Brien 				if ((c = *prestr++) == '\0')
9722a55deb1SDavid E. O'Brien 					FATAL("nonterminated character class %.20s...", lastre);
9732a55deb1SDavid E. O'Brien 				*bp++ = c;
9742a55deb1SDavid E. O'Brien 			/* } else if (c == '\n') { */
9752a55deb1SDavid E. O'Brien 			/* 	FATAL("newline in character class %.20s...", lastre); */
976007c6572SDag-Erling Smørgrav 			} else if (c == '[' && *prestr == ':') {
977007c6572SDag-Erling Smørgrav 				/* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
978007c6572SDag-Erling Smørgrav 				for (cc = charclasses; cc->cc_name; cc++)
979007c6572SDag-Erling Smørgrav 					if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
980007c6572SDag-Erling Smørgrav 						break;
981007c6572SDag-Erling Smørgrav 				if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
982007c6572SDag-Erling Smørgrav 				    prestr[2 + cc->cc_namelen] == ']') {
983007c6572SDag-Erling Smørgrav 					prestr += cc->cc_namelen + 3;
984*b5253557SWarner Losh 					/*
985*b5253557SWarner Losh 					 * BUG: We begin at 1, instead of 0, since we
986*b5253557SWarner Losh 					 * would otherwise prematurely terminate the
987*b5253557SWarner Losh 					 * string for classes like [[:cntrl:]]. This
988*b5253557SWarner Losh 					 * means that we can't match the NUL character,
989*b5253557SWarner Losh 					 * not without first adapting the entire
990*b5253557SWarner Losh 					 * program to track each string's length.
991*b5253557SWarner Losh 					 */
992*b5253557SWarner Losh 					for (i = 1; i <= UCHAR_MAX; i++) {
993addad6afSRong-En Fan 						if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2"))
994fc6b1dfeSDavid E. O'Brien 						    FATAL("out of space for reg expr %.10s...", lastre);
995fc6b1dfeSDavid E. O'Brien 						if (cc->cc_func(i)) {
996fc6b1dfeSDavid E. O'Brien 							*bp++ = i;
997fc6b1dfeSDavid E. O'Brien 							n++;
998fc6b1dfeSDavid E. O'Brien 						}
999fc6b1dfeSDavid E. O'Brien 					}
1000007c6572SDag-Erling Smørgrav 				} else
1001007c6572SDag-Erling Smørgrav 					*bp++ = c;
1002*b5253557SWarner Losh 			} else if (c == '[' && *prestr == '.') {
1003*b5253557SWarner Losh 				char collate_char;
1004*b5253557SWarner Losh 				prestr++;
1005*b5253557SWarner Losh 				collate_char = *prestr++;
1006*b5253557SWarner Losh 				if (*prestr == '.' && prestr[1] == ']') {
1007*b5253557SWarner Losh 					prestr += 2;
1008*b5253557SWarner Losh 					/* Found it: map via locale TBD: for
1009*b5253557SWarner Losh 					   now, simply return this char.  This
1010*b5253557SWarner Losh 					   is sufficient to pass conformance
1011*b5253557SWarner Losh 					   test awk.ex 156
1012*b5253557SWarner Losh 					 */
1013*b5253557SWarner Losh 					if (*prestr == ']') {
1014*b5253557SWarner Losh 						prestr++;
1015*b5253557SWarner Losh 						rlxval = collate_char;
1016*b5253557SWarner Losh 						return CHAR;
1017*b5253557SWarner Losh 					}
1018*b5253557SWarner Losh 				}
1019*b5253557SWarner Losh 			} else if (c == '[' && *prestr == '=') {
1020*b5253557SWarner Losh 				char equiv_char;
1021*b5253557SWarner Losh 				prestr++;
1022*b5253557SWarner Losh 				equiv_char = *prestr++;
1023*b5253557SWarner Losh 				if (*prestr == '=' && prestr[1] == ']') {
1024*b5253557SWarner Losh 					prestr += 2;
1025*b5253557SWarner Losh 					/* Found it: map via locale TBD: for now
1026*b5253557SWarner Losh 					   simply return this char. This is
1027*b5253557SWarner Losh 					   sufficient to pass conformance test
1028*b5253557SWarner Losh 					   awk.ex 156
1029*b5253557SWarner Losh 					 */
1030*b5253557SWarner Losh 					if (*prestr == ']') {
1031*b5253557SWarner Losh 						prestr++;
1032*b5253557SWarner Losh 						rlxval = equiv_char;
1033*b5253557SWarner Losh 						return CHAR;
1034*b5253557SWarner Losh 					}
1035*b5253557SWarner Losh 				}
10362a55deb1SDavid E. O'Brien 			} else if (c == '\0') {
10372a55deb1SDavid E. O'Brien 				FATAL("nonterminated character class %.20s", lastre);
10382a55deb1SDavid E. O'Brien 			} else if (bp == buf) {	/* 1st char is special */
10392a55deb1SDavid E. O'Brien 				*bp++ = c;
10402a55deb1SDavid E. O'Brien 			} else if (c == ']') {
10412a55deb1SDavid E. O'Brien 				*bp++ = 0;
10422a55deb1SDavid E. O'Brien 				rlxstr = (uschar *) tostring((char *) buf);
10432a55deb1SDavid E. O'Brien 				if (cflag == 0)
10442a55deb1SDavid E. O'Brien 					return CCL;
10452a55deb1SDavid E. O'Brien 				else
10462a55deb1SDavid E. O'Brien 					return NCCL;
10472a55deb1SDavid E. O'Brien 			} else
10482a55deb1SDavid E. O'Brien 				*bp++ = c;
10492a55deb1SDavid E. O'Brien 		}
1050*b5253557SWarner Losh 		break;
1051*b5253557SWarner Losh 	case '{':
1052*b5253557SWarner Losh 		if (isdigit(*(prestr))) {
1053*b5253557SWarner Losh 			num = 0;	/* Process as a repetition */
1054*b5253557SWarner Losh 			n = -1; m = -1;
1055*b5253557SWarner Losh 			commafound = 0;
1056*b5253557SWarner Losh 			digitfound = 0;
1057*b5253557SWarner Losh 			startreptok = prestr-1;
1058*b5253557SWarner Losh 			/* Remember start of previous atom here ? */
1059*b5253557SWarner Losh 		} else {        	/* just a { char, not a repetition */
1060*b5253557SWarner Losh 			rlxval = c;
1061*b5253557SWarner Losh 			return CHAR;
1062*b5253557SWarner Losh                 }
1063*b5253557SWarner Losh 		for (; ; ) {
1064*b5253557SWarner Losh 			if ((c = *prestr++) == '}') {
1065*b5253557SWarner Losh 				if (commafound) {
1066*b5253557SWarner Losh 					if (digitfound) { /* {n,m} */
1067*b5253557SWarner Losh 						m = num;
1068*b5253557SWarner Losh 						if (m<n)
1069*b5253557SWarner Losh 							FATAL("illegal repetition expression: class %.20s",
1070*b5253557SWarner Losh 								lastre);
1071*b5253557SWarner Losh 						if ((n==0) && (m==1)) {
1072*b5253557SWarner Losh 							return QUEST;
1073*b5253557SWarner Losh 						}
1074*b5253557SWarner Losh 					} else {	/* {n,} */
1075*b5253557SWarner Losh 						if (n==0) return STAR;
1076*b5253557SWarner Losh 						if (n==1) return PLUS;
1077*b5253557SWarner Losh 					}
1078*b5253557SWarner Losh 				} else {
1079*b5253557SWarner Losh 					if (digitfound) { /* {n} same as {n,n} */
1080*b5253557SWarner Losh 						n = num;
1081*b5253557SWarner Losh 						m = num;
1082*b5253557SWarner Losh 					} else {	/* {} */
1083*b5253557SWarner Losh 						FATAL("illegal repetition expression: class %.20s",
1084*b5253557SWarner Losh 							lastre);
1085*b5253557SWarner Losh 					}
1086*b5253557SWarner Losh 				}
1087*b5253557SWarner Losh 				if (repeat(starttok, prestr-starttok, lastatom,
1088*b5253557SWarner Losh 					   startreptok - lastatom, n, m) > 0) {
1089*b5253557SWarner Losh 					if ((n==0) && (m==0)) {
1090*b5253557SWarner Losh 						return EMPTYRE;
1091*b5253557SWarner Losh 					}
1092*b5253557SWarner Losh 					/* must rescan input for next token */
1093*b5253557SWarner Losh 					goto rescan;
1094*b5253557SWarner Losh 				}
1095*b5253557SWarner Losh 				/* Failed to replace: eat up {...} characters
1096*b5253557SWarner Losh 				   and treat like just PLUS */
1097*b5253557SWarner Losh 				return PLUS;
1098*b5253557SWarner Losh 			} else if (c == '\0') {
1099*b5253557SWarner Losh 				FATAL("nonterminated character class %.20s",
1100*b5253557SWarner Losh 					lastre);
1101*b5253557SWarner Losh 			} else if (isdigit(c)) {
1102*b5253557SWarner Losh 				num = 10 * num + c - '0';
1103*b5253557SWarner Losh 				digitfound = 1;
1104*b5253557SWarner Losh 			} else if (c == ',') {
1105*b5253557SWarner Losh 				if (commafound)
1106*b5253557SWarner Losh 					FATAL("illegal repetition expression: class %.20s",
1107*b5253557SWarner Losh 						lastre);
1108*b5253557SWarner Losh 				/* looking for {n,} or {n,m} */
1109*b5253557SWarner Losh 				commafound = 1;
1110*b5253557SWarner Losh 				n = num;
1111*b5253557SWarner Losh 				digitfound = 0; /* reset */
1112*b5253557SWarner Losh 				num = 0;
1113*b5253557SWarner Losh 			} else {
1114*b5253557SWarner Losh 				FATAL("illegal repetition expression: class %.20s",
1115*b5253557SWarner Losh 					lastre);
1116*b5253557SWarner Losh 			}
1117*b5253557SWarner Losh 		}
1118*b5253557SWarner Losh 		break;
11192a55deb1SDavid E. O'Brien 	}
11202a55deb1SDavid E. O'Brien }
11212a55deb1SDavid E. O'Brien 
11222a55deb1SDavid E. O'Brien int cgoto(fa *f, int s, int c)
11232a55deb1SDavid E. O'Brien {
11242a55deb1SDavid E. O'Brien 	int i, j, k;
11252a55deb1SDavid E. O'Brien 	int *p, *q;
11262a55deb1SDavid E. O'Brien 
1127c263f9bfSRuslan Ermilov 	assert(c == HAT || c < NCHARS);
11282a55deb1SDavid E. O'Brien 	while (f->accept >= maxsetvec) {	/* guessing here! */
11292a55deb1SDavid E. O'Brien 		maxsetvec *= 4;
11302a55deb1SDavid E. O'Brien 		setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
11312a55deb1SDavid E. O'Brien 		tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
1132*b5253557SWarner Losh 		if (setvec == 0 || tmpset == 0)
11332a55deb1SDavid E. O'Brien 			overflo("out of space in cgoto()");
11342a55deb1SDavid E. O'Brien 	}
11352a55deb1SDavid E. O'Brien 	for (i = 0; i <= f->accept; i++)
11362a55deb1SDavid E. O'Brien 		setvec[i] = 0;
11372a55deb1SDavid E. O'Brien 	setcnt = 0;
11382a55deb1SDavid E. O'Brien 	/* compute positions of gototab[s,c] into setvec */
11392a55deb1SDavid E. O'Brien 	p = f->posns[s];
11402a55deb1SDavid E. O'Brien 	for (i = 1; i <= *p; i++) {
11412a55deb1SDavid E. O'Brien 		if ((k = f->re[p[i]].ltype) != FINAL) {
11422a55deb1SDavid E. O'Brien 			if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
11432a55deb1SDavid E. O'Brien 			 || (k == DOT && c != 0 && c != HAT)
11442a55deb1SDavid E. O'Brien 			 || (k == ALL && c != 0)
1145addad6afSRong-En Fan 			 || (k == EMPTYRE && c != 0)
11462a55deb1SDavid E. O'Brien 			 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
11472a55deb1SDavid E. O'Brien 			 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
11482a55deb1SDavid E. O'Brien 				q = f->re[p[i]].lfollow;
11492a55deb1SDavid E. O'Brien 				for (j = 1; j <= *q; j++) {
11502a55deb1SDavid E. O'Brien 					if (q[j] >= maxsetvec) {
11512a55deb1SDavid E. O'Brien 						maxsetvec *= 4;
11522a55deb1SDavid E. O'Brien 						setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
115391217c1cSRuslan Ermilov 						tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
1154*b5253557SWarner Losh 						if (setvec == 0 || tmpset == 0)
11552a55deb1SDavid E. O'Brien 							overflo("cgoto overflow");
11562a55deb1SDavid E. O'Brien 					}
11572a55deb1SDavid E. O'Brien 					if (setvec[q[j]] == 0) {
11582a55deb1SDavid E. O'Brien 						setcnt++;
11592a55deb1SDavid E. O'Brien 						setvec[q[j]] = 1;
11602a55deb1SDavid E. O'Brien 					}
11612a55deb1SDavid E. O'Brien 				}
11622a55deb1SDavid E. O'Brien 			}
11632a55deb1SDavid E. O'Brien 		}
11642a55deb1SDavid E. O'Brien 	}
11652a55deb1SDavid E. O'Brien 	/* determine if setvec is a previous state */
11662a55deb1SDavid E. O'Brien 	tmpset[0] = setcnt;
11672a55deb1SDavid E. O'Brien 	j = 1;
11682a55deb1SDavid E. O'Brien 	for (i = f->accept; i >= 0; i--)
11692a55deb1SDavid E. O'Brien 		if (setvec[i]) {
11702a55deb1SDavid E. O'Brien 			tmpset[j++] = i;
11712a55deb1SDavid E. O'Brien 		}
11722a55deb1SDavid E. O'Brien 	/* tmpset == previous state? */
11732a55deb1SDavid E. O'Brien 	for (i = 1; i <= f->curstat; i++) {
11742a55deb1SDavid E. O'Brien 		p = f->posns[i];
11752a55deb1SDavid E. O'Brien 		if ((k = tmpset[0]) != p[0])
11762a55deb1SDavid E. O'Brien 			goto different;
11772a55deb1SDavid E. O'Brien 		for (j = 1; j <= k; j++)
11782a55deb1SDavid E. O'Brien 			if (tmpset[j] != p[j])
11792a55deb1SDavid E. O'Brien 				goto different;
11802a55deb1SDavid E. O'Brien 		/* setvec is state i */
11812a55deb1SDavid E. O'Brien 		f->gototab[s][c] = i;
11822a55deb1SDavid E. O'Brien 		return i;
11832a55deb1SDavid E. O'Brien 	  different:;
11842a55deb1SDavid E. O'Brien 	}
11852a55deb1SDavid E. O'Brien 
11862a55deb1SDavid E. O'Brien 	/* add tmpset to current set of states */
11872a55deb1SDavid E. O'Brien 	if (f->curstat >= NSTATES-1) {
11882a55deb1SDavid E. O'Brien 		f->curstat = 2;
11892a55deb1SDavid E. O'Brien 		f->reset = 1;
11902a55deb1SDavid E. O'Brien 		for (i = 2; i < NSTATES; i++)
11912a55deb1SDavid E. O'Brien 			xfree(f->posns[i]);
11922a55deb1SDavid E. O'Brien 	} else
11932a55deb1SDavid E. O'Brien 		++(f->curstat);
11942a55deb1SDavid E. O'Brien 	for (i = 0; i < NCHARS; i++)
11952a55deb1SDavid E. O'Brien 		f->gototab[f->curstat][i] = 0;
11962a55deb1SDavid E. O'Brien 	xfree(f->posns[f->curstat]);
1197*b5253557SWarner Losh 	if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
11982a55deb1SDavid E. O'Brien 		overflo("out of space in cgoto");
11992a55deb1SDavid E. O'Brien 
12002a55deb1SDavid E. O'Brien 	f->posns[f->curstat] = p;
12012a55deb1SDavid E. O'Brien 	f->gototab[s][c] = f->curstat;
12022a55deb1SDavid E. O'Brien 	for (i = 0; i <= setcnt; i++)
12032a55deb1SDavid E. O'Brien 		p[i] = tmpset[i];
12042a55deb1SDavid E. O'Brien 	if (setvec[f->accept])
12052a55deb1SDavid E. O'Brien 		f->out[f->curstat] = 1;
12062a55deb1SDavid E. O'Brien 	else
12072a55deb1SDavid E. O'Brien 		f->out[f->curstat] = 0;
12082a55deb1SDavid E. O'Brien 	return f->curstat;
12092a55deb1SDavid E. O'Brien }
12102a55deb1SDavid E. O'Brien 
12112a55deb1SDavid E. O'Brien 
12122a55deb1SDavid E. O'Brien void freefa(fa *f)	/* free a finite automaton */
12132a55deb1SDavid E. O'Brien {
12142a55deb1SDavid E. O'Brien 	int i;
12152a55deb1SDavid E. O'Brien 
12162a55deb1SDavid E. O'Brien 	if (f == NULL)
12172a55deb1SDavid E. O'Brien 		return;
12182a55deb1SDavid E. O'Brien 	for (i = 0; i <= f->curstat; i++)
12192a55deb1SDavid E. O'Brien 		xfree(f->posns[i]);
12202a55deb1SDavid E. O'Brien 	for (i = 0; i <= f->accept; i++) {
12212a55deb1SDavid E. O'Brien 		xfree(f->re[i].lfollow);
12222a55deb1SDavid E. O'Brien 		if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
12232a55deb1SDavid E. O'Brien 			xfree((f->re[i].lval.np));
12242a55deb1SDavid E. O'Brien 	}
12252a55deb1SDavid E. O'Brien 	xfree(f->restr);
12262a55deb1SDavid E. O'Brien 	xfree(f);
12272a55deb1SDavid E. O'Brien }
1228