xref: /freebsd/contrib/nvi/regex/regcomp.c (revision 416ba5c74546f32a993436a99516d35008e9f384)
1f0957ccaSPeter Wemm /*	$NetBSD: regcomp.c,v 1.7 2011/11/19 17:45:11 tnozaki Exp $ */
2f0957ccaSPeter Wemm 
3f0957ccaSPeter Wemm /*-
4f0957ccaSPeter Wemm  * Copyright (c) 1992, 1993, 1994 Henry Spencer.
5f0957ccaSPeter Wemm  * Copyright (c) 1992, 1993, 1994
6f0957ccaSPeter Wemm  *	The Regents of the University of California.  All rights reserved.
7f0957ccaSPeter Wemm  *
8f0957ccaSPeter Wemm  * This code is derived from software contributed to Berkeley by
9f0957ccaSPeter Wemm  * Henry Spencer of the University of Toronto.
10f0957ccaSPeter Wemm  *
11f0957ccaSPeter Wemm  * Redistribution and use in source and binary forms, with or without
12f0957ccaSPeter Wemm  * modification, are permitted provided that the following conditions
13f0957ccaSPeter Wemm  * are met:
14f0957ccaSPeter Wemm  * 1. Redistributions of source code must retain the above copyright
15f0957ccaSPeter Wemm  *    notice, this list of conditions and the following disclaimer.
16f0957ccaSPeter Wemm  * 2. Redistributions in binary form must reproduce the above copyright
17f0957ccaSPeter Wemm  *    notice, this list of conditions and the following disclaimer in the
18f0957ccaSPeter Wemm  *    documentation and/or other materials provided with the distribution.
19*c271fa92SBaptiste Daroussin  * 3. Neither the name of the University nor the names of its contributors
20f0957ccaSPeter Wemm  *    may be used to endorse or promote products derived from this software
21f0957ccaSPeter Wemm  *    without specific prior written permission.
22f0957ccaSPeter Wemm  *
23f0957ccaSPeter Wemm  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24f0957ccaSPeter Wemm  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25f0957ccaSPeter Wemm  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26f0957ccaSPeter Wemm  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27f0957ccaSPeter Wemm  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28f0957ccaSPeter Wemm  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29f0957ccaSPeter Wemm  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30f0957ccaSPeter Wemm  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31f0957ccaSPeter Wemm  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32f0957ccaSPeter Wemm  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33f0957ccaSPeter Wemm  * SUCH DAMAGE.
34f0957ccaSPeter Wemm  *
35f0957ccaSPeter Wemm  *	@(#)regcomp.c	8.4 (Berkeley) 3/19/94
36f0957ccaSPeter Wemm  */
37f0957ccaSPeter Wemm 
38f0957ccaSPeter Wemm #if defined(LIBC_SCCS) && !defined(lint)
39f0957ccaSPeter Wemm static char sccsid[] = "@(#)regcomp.c	8.4 (Berkeley) 3/19/94";
40f0957ccaSPeter Wemm #endif /* LIBC_SCCS and not lint */
41f0957ccaSPeter Wemm 
42f0957ccaSPeter Wemm #include <sys/types.h>
43f0957ccaSPeter Wemm #include <stdio.h>
44f0957ccaSPeter Wemm #include <string.h>
45f0957ccaSPeter Wemm #include <ctype.h>
46f0957ccaSPeter Wemm #include <limits.h>
47f0957ccaSPeter Wemm #include <stdlib.h>
48f0957ccaSPeter Wemm #include <regex.h>
49f0957ccaSPeter Wemm 
50f0957ccaSPeter Wemm #include "utils.h"
51f0957ccaSPeter Wemm #include "regex2.h"
52f0957ccaSPeter Wemm 
53f0957ccaSPeter Wemm #include "cclass.h"
54f0957ccaSPeter Wemm #include "cname.h"
55f0957ccaSPeter Wemm 
56f0957ccaSPeter Wemm /*
57f0957ccaSPeter Wemm  * parse structure, passed up and down to avoid global variables and
58f0957ccaSPeter Wemm  * other clumsinesses
59f0957ccaSPeter Wemm  */
60f0957ccaSPeter Wemm struct parse {
61f0957ccaSPeter Wemm 	RCHAR_T *next;		/* next character in RE */
62f0957ccaSPeter Wemm 	RCHAR_T *end;		/* end of string (-> NUL normally) */
63f0957ccaSPeter Wemm 	int error;		/* has an error been seen? */
64f0957ccaSPeter Wemm 	sop *strip;		/* malloced strip */
65f0957ccaSPeter Wemm 	RCHAR_T *stripdata;	/* malloced stripdata */
66f0957ccaSPeter Wemm 	sopno ssize;		/* malloced strip size (allocated) */
67f0957ccaSPeter Wemm 	sopno slen;		/* malloced strip length (used) */
68f0957ccaSPeter Wemm 	int ncsalloc;		/* number of csets allocated */
69f0957ccaSPeter Wemm 	struct re_guts *g;
70f0957ccaSPeter Wemm #	define	NPAREN	10	/* we need to remember () 1-9 for back refs */
71f0957ccaSPeter Wemm 	sopno pbegin[NPAREN];	/* -> ( ([0] unused) */
72f0957ccaSPeter Wemm 	sopno pend[NPAREN];	/* -> ) ([0] unused) */
73f0957ccaSPeter Wemm };
74f0957ccaSPeter Wemm 
75f0957ccaSPeter Wemm /* ========= begin header generated by ./mkh ========= */
76f0957ccaSPeter Wemm #ifdef __cplusplus
77f0957ccaSPeter Wemm extern "C" {
78f0957ccaSPeter Wemm #endif
79f0957ccaSPeter Wemm 
80f0957ccaSPeter Wemm /* === regcomp.c === */
81*c271fa92SBaptiste Daroussin static void p_ere(struct parse *p, int stop, size_t reclimit);
82*c271fa92SBaptiste Daroussin static void p_ere_exp(struct parse *p, size_t reclimit);
83*c271fa92SBaptiste Daroussin static void p_str(struct parse *p);
84*c271fa92SBaptiste Daroussin static void p_bre(struct parse *p, int end1, int end2, size_t reclimit);
85*c271fa92SBaptiste Daroussin static int p_simp_re(struct parse *p, int starordinary, size_t reclimit);
86*c271fa92SBaptiste Daroussin static int p_count(struct parse *p);
87*c271fa92SBaptiste Daroussin static void p_bracket(struct parse *p);
88*c271fa92SBaptiste Daroussin static void p_b_term(struct parse *p, cset *cs);
89*c271fa92SBaptiste Daroussin static void p_b_cclass(struct parse *p, cset *cs);
90*c271fa92SBaptiste Daroussin static void p_b_eclass(struct parse *p, cset *cs);
91*c271fa92SBaptiste Daroussin static char p_b_symbol(struct parse *p);
92*c271fa92SBaptiste Daroussin static char p_b_coll_elem(struct parse *p, int endc);
93*c271fa92SBaptiste Daroussin static char othercase(int ch);
94*c271fa92SBaptiste Daroussin static void bothcases(struct parse *p, int ch);
95*c271fa92SBaptiste Daroussin static void ordinary(struct parse *p, int ch);
96*c271fa92SBaptiste Daroussin static void nonnewline(struct parse *p);
97*c271fa92SBaptiste Daroussin static void repeat(struct parse *p, sopno start, int from, int to, size_t reclimit);
98*c271fa92SBaptiste Daroussin static int seterr(struct parse *p, int e);
99*c271fa92SBaptiste Daroussin static cset *allocset(struct parse *p);
100*c271fa92SBaptiste Daroussin static void freeset(struct parse *p, cset *cs);
101*c271fa92SBaptiste Daroussin static int freezeset(struct parse *p, cset *cs);
102*c271fa92SBaptiste Daroussin static int firstch(struct parse *p, cset *cs);
103*c271fa92SBaptiste Daroussin static int nch(struct parse *p, cset *cs);
104*c271fa92SBaptiste Daroussin static void mcadd(struct parse *p, cset *cs, const char *cp);
105f0957ccaSPeter Wemm #ifdef notdef
106*c271fa92SBaptiste Daroussin static void mcsub(cset *cs, char *cp);
107*c271fa92SBaptiste Daroussin static int mcin(cset *cs, char *cp);
108*c271fa92SBaptiste Daroussin static char *mcfind(cset *cs, char *cp);
109f0957ccaSPeter Wemm #endif
110*c271fa92SBaptiste Daroussin static void mcinvert(struct parse *p, cset *cs);
111*c271fa92SBaptiste Daroussin static void mccase(struct parse *p, cset *cs);
112f0957ccaSPeter Wemm #ifdef notdef
113*c271fa92SBaptiste Daroussin static int isinsets(struct re_guts *g, int c);
114*c271fa92SBaptiste Daroussin static int samesets(struct re_guts *g, int c1, int c2);
115f0957ccaSPeter Wemm #endif
116*c271fa92SBaptiste Daroussin static void categorize(struct parse *p, struct re_guts *g);
117*c271fa92SBaptiste Daroussin static sopno dupl(struct parse *p, sopno start, sopno finish);
118*c271fa92SBaptiste Daroussin static void doemit(struct parse *p, sop op, size_t opnd);
119*c271fa92SBaptiste Daroussin static void doinsert(struct parse *p, sop op, size_t opnd, sopno pos);
120*c271fa92SBaptiste Daroussin static void dofwd(struct parse *p, sopno pos, sop value);
121*c271fa92SBaptiste Daroussin static int enlarge(struct parse *p, sopno size);
122*c271fa92SBaptiste Daroussin static void stripsnug(struct parse *p, struct re_guts *g);
123*c271fa92SBaptiste Daroussin static void findmust(struct parse *p, struct re_guts *g);
124*c271fa92SBaptiste Daroussin static sopno pluscount(struct parse *p, struct re_guts *g);
125f0957ccaSPeter Wemm 
126f0957ccaSPeter Wemm #ifdef __cplusplus
127f0957ccaSPeter Wemm }
128f0957ccaSPeter Wemm #endif
129f0957ccaSPeter Wemm /* ========= end header generated by ./mkh ========= */
130f0957ccaSPeter Wemm 
131f0957ccaSPeter Wemm static RCHAR_T nuls[10];		/* place to point scanner in event of error */
132f0957ccaSPeter Wemm 
133f0957ccaSPeter Wemm /*
134f0957ccaSPeter Wemm  * macros for use with parse structure
135f0957ccaSPeter Wemm  * BEWARE:  these know that the parse structure is named `p' !!!
136f0957ccaSPeter Wemm  */
137f0957ccaSPeter Wemm #define	PEEK()	(*p->next)
138f0957ccaSPeter Wemm #define	PEEK2()	(*(p->next+1))
139f0957ccaSPeter Wemm #define	MORE()	(p->next < p->end)
140f0957ccaSPeter Wemm #define	MORE2()	(p->next+1 < p->end)
141f0957ccaSPeter Wemm #define	SEE(c)	(MORE() && PEEK() == (c))
142f0957ccaSPeter Wemm #define	SEETWO(a, b)	(MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
143f0957ccaSPeter Wemm #define	EAT(c)	((SEE(c)) ? (NEXT(), 1) : 0)
144f0957ccaSPeter Wemm #define	EATTWO(a, b)	((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
145f0957ccaSPeter Wemm #define	NEXT()	(p->next++)
146f0957ccaSPeter Wemm #define	NEXT2()	(p->next += 2)
147f0957ccaSPeter Wemm #define	NEXTn(n)	(p->next += (n))
148f0957ccaSPeter Wemm #define	GETNEXT()	(*p->next++)
149f0957ccaSPeter Wemm #define	SETERROR(e)	seterr(p, (e))
150f0957ccaSPeter Wemm #define	REQUIRE(co, e)	((co) || SETERROR(e))
151f0957ccaSPeter Wemm #define	MUSTSEE(c, e)	(REQUIRE(MORE() && PEEK() == (c), e))
152f0957ccaSPeter Wemm #define	MUSTEAT(c, e)	(REQUIRE(MORE() && GETNEXT() == (c), e))
153f0957ccaSPeter Wemm #define	MUSTNOTSEE(c, e)	(REQUIRE(!MORE() || PEEK() != (c), e))
154f0957ccaSPeter Wemm #define	EMIT(op, sopnd)	doemit(p, (sop)(op), (size_t)(sopnd))
155f0957ccaSPeter Wemm #define	INSERT(op, pos)	doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
156f0957ccaSPeter Wemm #define	AHEAD(pos)		dofwd(p, pos, HERE()-(pos))
157f0957ccaSPeter Wemm #define	ASTERN(sop, pos)	EMIT(sop, HERE()-pos)
158f0957ccaSPeter Wemm #define	HERE()		(p->slen)
159f0957ccaSPeter Wemm #define	THERE()		(p->slen - 1)
160f0957ccaSPeter Wemm #define	THERETHERE()	(p->slen - 2)
161f0957ccaSPeter Wemm #define	DROP(n)	(p->slen -= (n))
162f0957ccaSPeter Wemm 
163f0957ccaSPeter Wemm #ifndef NDEBUG
164f0957ccaSPeter Wemm static int never = 0;		/* for use in asserts; shuts lint up */
165f0957ccaSPeter Wemm #else
166f0957ccaSPeter Wemm #define	never	0		/* some <assert.h>s have bugs too */
167f0957ccaSPeter Wemm #endif
168f0957ccaSPeter Wemm 
169f0957ccaSPeter Wemm #define	MEMLIMIT	0x8000000
170f0957ccaSPeter Wemm #define MEMSIZE(p) \
171f0957ccaSPeter Wemm 	((p)->ncsalloc / CHAR_BIT * (p)->g->csetsize + \
172f0957ccaSPeter Wemm 	(p)->ncsalloc * sizeof(cset) + \
173f0957ccaSPeter Wemm 	(p)->ssize * sizeof(sop))
174f0957ccaSPeter Wemm #define	RECLIMIT	256
175f0957ccaSPeter Wemm 
176f0957ccaSPeter Wemm /*
177f0957ccaSPeter Wemm  - regcomp - interface for parser and compilation
178f0957ccaSPeter Wemm  */
179f0957ccaSPeter Wemm int				/* 0 success, otherwise REG_something */
regcomp(regex_t * preg,const RCHAR_T * pattern,int cflags)180f0957ccaSPeter Wemm regcomp(regex_t *preg, const RCHAR_T *pattern, int cflags)
181f0957ccaSPeter Wemm {
182f0957ccaSPeter Wemm 	struct parse pa;
183*c271fa92SBaptiste Daroussin 	struct re_guts *g;
184*c271fa92SBaptiste Daroussin 	struct parse *p = &pa;
185*c271fa92SBaptiste Daroussin 	int i;
186*c271fa92SBaptiste Daroussin 	size_t len;
187f0957ccaSPeter Wemm #ifdef REDEBUG
188f0957ccaSPeter Wemm #	define	GOODFLAGS(f)	(f)
189f0957ccaSPeter Wemm #else
190f0957ccaSPeter Wemm #	define	GOODFLAGS(f)	((f)&~REG_DUMP)
191f0957ccaSPeter Wemm #endif
192f0957ccaSPeter Wemm 
193f0957ccaSPeter Wemm 	cflags = GOODFLAGS(cflags);
194f0957ccaSPeter Wemm 	if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
195f0957ccaSPeter Wemm 		return(REG_INVARG);
196f0957ccaSPeter Wemm 
197f0957ccaSPeter Wemm 	if (cflags&REG_PEND) {
198f0957ccaSPeter Wemm 		if (preg->re_endp < pattern)
199f0957ccaSPeter Wemm 			return(REG_INVARG);
200f0957ccaSPeter Wemm 		len = preg->re_endp - pattern;
201f0957ccaSPeter Wemm 	} else
202f0957ccaSPeter Wemm 		len = STRLEN(pattern);
203f0957ccaSPeter Wemm 
204f0957ccaSPeter Wemm 	/* do the mallocs early so failure handling is easy */
205f0957ccaSPeter Wemm 	g = (struct re_guts *)malloc(sizeof(struct re_guts) +
206f0957ccaSPeter Wemm 							(NC-1)*sizeof(cat_t));
207f0957ccaSPeter Wemm 	if (g == NULL)
208f0957ccaSPeter Wemm 		return(REG_ESPACE);
209f0957ccaSPeter Wemm 	p->ssize = len/(size_t)2*(size_t)3 + (size_t)1;	/* ugh */
210f0957ccaSPeter Wemm 	p->strip = (sop *)malloc(p->ssize * sizeof(sop));
211f0957ccaSPeter Wemm 	if (p->strip == NULL) {
212f0957ccaSPeter Wemm 		free((char *)g);
213f0957ccaSPeter Wemm 		return(REG_ESPACE);
214f0957ccaSPeter Wemm 	}
215f0957ccaSPeter Wemm 	p->stripdata = (RCHAR_T *)malloc(p->ssize * sizeof(RCHAR_T));
216f0957ccaSPeter Wemm 	if (p->stripdata == NULL) {
217f0957ccaSPeter Wemm 		free((char *)p->strip);
218f0957ccaSPeter Wemm 		free((char *)g);
219f0957ccaSPeter Wemm 		return(REG_ESPACE);
220f0957ccaSPeter Wemm 	}
221f0957ccaSPeter Wemm 	p->slen = 0;
222f0957ccaSPeter Wemm 
223f0957ccaSPeter Wemm 	/* set things up */
224f0957ccaSPeter Wemm 	p->g = g;
225f0957ccaSPeter Wemm 	p->next = (RCHAR_T *)pattern;	/* convenience; we do not modify it */
226f0957ccaSPeter Wemm 	p->end = p->next + len;
227f0957ccaSPeter Wemm 	p->error = 0;
228f0957ccaSPeter Wemm 	p->ncsalloc = 0;
229f0957ccaSPeter Wemm 	for (i = 0; i < NPAREN; i++) {
230f0957ccaSPeter Wemm 		p->pbegin[i] = 0;
231f0957ccaSPeter Wemm 		p->pend[i] = 0;
232f0957ccaSPeter Wemm 	}
233f0957ccaSPeter Wemm 	g->csetsize = NC;
234f0957ccaSPeter Wemm 	g->sets = NULL;
235f0957ccaSPeter Wemm 	g->setbits = NULL;
236f0957ccaSPeter Wemm 	g->ncsets = 0;
237f0957ccaSPeter Wemm 	g->cflags = cflags;
238f0957ccaSPeter Wemm 	g->iflags = 0;
239f0957ccaSPeter Wemm 	g->nbol = 0;
240f0957ccaSPeter Wemm 	g->neol = 0;
241f0957ccaSPeter Wemm 	g->must = NULL;
242f0957ccaSPeter Wemm 	g->mlen = 0;
243f0957ccaSPeter Wemm 	g->nsub = 0;
244f0957ccaSPeter Wemm #if 0
245f0957ccaSPeter Wemm 	g->ncategories = 1;	/* category 0 is "everything else" */
246f0957ccaSPeter Wemm 	g->categories = &g->catspace[-(CHAR_MIN)];
247*c271fa92SBaptiste Daroussin 	memset((char *)g->catspace, 0, NC*sizeof(cat_t));
248f0957ccaSPeter Wemm #endif
249f0957ccaSPeter Wemm 	g->backrefs = 0;
250f0957ccaSPeter Wemm 
251f0957ccaSPeter Wemm 	/* do it */
252f0957ccaSPeter Wemm 	EMIT(OEND, 0);
253f0957ccaSPeter Wemm 	g->firststate = THERE();
254f0957ccaSPeter Wemm 	if (cflags&REG_EXTENDED)
255f0957ccaSPeter Wemm 		p_ere(p, OUT, 0);
256f0957ccaSPeter Wemm 	else if (cflags&REG_NOSPEC)
257f0957ccaSPeter Wemm 		p_str(p);
258f0957ccaSPeter Wemm 	else
259f0957ccaSPeter Wemm 		p_bre(p, OUT, OUT, 0);
260f0957ccaSPeter Wemm 	EMIT(OEND, 0);
261f0957ccaSPeter Wemm 	g->laststate = THERE();
262f0957ccaSPeter Wemm 
263f0957ccaSPeter Wemm 	/* tidy up loose ends and fill things in */
264f0957ccaSPeter Wemm 	categorize(p, g);
265f0957ccaSPeter Wemm 	stripsnug(p, g);
266f0957ccaSPeter Wemm 	findmust(p, g);
267f0957ccaSPeter Wemm 	g->nplus = pluscount(p, g);
268f0957ccaSPeter Wemm 	g->magic = MAGIC2;
269f0957ccaSPeter Wemm 	preg->re_nsub = g->nsub;
270f0957ccaSPeter Wemm 	preg->re_g = g;
271f0957ccaSPeter Wemm 	preg->re_magic = MAGIC1;
272f0957ccaSPeter Wemm #ifndef REDEBUG
273f0957ccaSPeter Wemm 	/* not debugging, so can't rely on the assert() in regexec() */
274f0957ccaSPeter Wemm 	if (g->iflags&BAD)
275f0957ccaSPeter Wemm 		SETERROR(REG_ASSERT);
276f0957ccaSPeter Wemm #endif
277f0957ccaSPeter Wemm 
278f0957ccaSPeter Wemm 	/* win or lose, we're done */
279f0957ccaSPeter Wemm 	if (p->error != 0)	/* lose */
280f0957ccaSPeter Wemm 		regfree(preg);
281f0957ccaSPeter Wemm 	return(p->error);
282f0957ccaSPeter Wemm }
283f0957ccaSPeter Wemm 
284f0957ccaSPeter Wemm /*
285f0957ccaSPeter Wemm  - p_ere - ERE parser top level, concatenation and alternation
286f0957ccaSPeter Wemm  */
287f0957ccaSPeter Wemm static void
p_ere(struct parse * p,int stop,size_t reclimit)288*c271fa92SBaptiste Daroussin p_ere(struct parse *p, int stop, size_t reclimit)
289f0957ccaSPeter Wemm          			/* character this ERE should end at */
290f0957ccaSPeter Wemm {
291*c271fa92SBaptiste Daroussin 	char c;
292*c271fa92SBaptiste Daroussin 	sopno prevback = 0;
293*c271fa92SBaptiste Daroussin 	sopno prevfwd = 0;
294*c271fa92SBaptiste Daroussin 	sopno conc;
295*c271fa92SBaptiste Daroussin 	int first = 1;		/* is this the first alternative? */
296f0957ccaSPeter Wemm 
297f0957ccaSPeter Wemm 	if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) {
298f0957ccaSPeter Wemm 		p->error = REG_ESPACE;
299f0957ccaSPeter Wemm 		return;
300f0957ccaSPeter Wemm 	}
301f0957ccaSPeter Wemm 
302f0957ccaSPeter Wemm 	for (;;) {
303f0957ccaSPeter Wemm 		/* do a bunch of concatenated expressions */
304f0957ccaSPeter Wemm 		conc = HERE();
305f0957ccaSPeter Wemm 		while (MORE() && (c = PEEK()) != '|' && c != stop)
306f0957ccaSPeter Wemm 			p_ere_exp(p, reclimit);
307f0957ccaSPeter Wemm 		(void)REQUIRE(HERE() != conc, REG_EMPTY);	/* require nonempty */
308f0957ccaSPeter Wemm 
309f0957ccaSPeter Wemm 		if (!EAT('|'))
310f0957ccaSPeter Wemm 			break;		/* NOTE BREAK OUT */
311f0957ccaSPeter Wemm 
312f0957ccaSPeter Wemm 		if (first) {
313f0957ccaSPeter Wemm 			INSERT(OCH_, conc);	/* offset is wrong */
314f0957ccaSPeter Wemm 			prevfwd = conc;
315f0957ccaSPeter Wemm 			prevback = conc;
316f0957ccaSPeter Wemm 			first = 0;
317f0957ccaSPeter Wemm 		}
318f0957ccaSPeter Wemm 		ASTERN(OOR1, prevback);
319f0957ccaSPeter Wemm 		prevback = THERE();
320f0957ccaSPeter Wemm 		AHEAD(prevfwd);			/* fix previous offset */
321f0957ccaSPeter Wemm 		prevfwd = HERE();
322f0957ccaSPeter Wemm 		EMIT(OOR2, 0);			/* offset is very wrong */
323f0957ccaSPeter Wemm 	}
324f0957ccaSPeter Wemm 
325f0957ccaSPeter Wemm 	if (!first) {		/* tail-end fixups */
326f0957ccaSPeter Wemm 		AHEAD(prevfwd);
327f0957ccaSPeter Wemm 		ASTERN(O_CH, prevback);
328f0957ccaSPeter Wemm 	}
329f0957ccaSPeter Wemm 
330f0957ccaSPeter Wemm 	assert(!MORE() || SEE(stop));
331f0957ccaSPeter Wemm }
332f0957ccaSPeter Wemm 
333f0957ccaSPeter Wemm /*
334f0957ccaSPeter Wemm  - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
335f0957ccaSPeter Wemm  */
336f0957ccaSPeter Wemm static void
p_ere_exp(struct parse * p,size_t reclimit)337*c271fa92SBaptiste Daroussin p_ere_exp(struct parse *p, size_t reclimit)
338f0957ccaSPeter Wemm {
339*c271fa92SBaptiste Daroussin 	char c;
340*c271fa92SBaptiste Daroussin 	sopno pos;
341*c271fa92SBaptiste Daroussin 	int count;
342*c271fa92SBaptiste Daroussin 	int count2;
343*c271fa92SBaptiste Daroussin 	sopno subno;
344f0957ccaSPeter Wemm 	int wascaret = 0;
345f0957ccaSPeter Wemm 
346f0957ccaSPeter Wemm 	assert(MORE());		/* caller should have ensured this */
347f0957ccaSPeter Wemm 	c = GETNEXT();
348f0957ccaSPeter Wemm 
349f0957ccaSPeter Wemm 	pos = HERE();
350f0957ccaSPeter Wemm 	switch (c) {
351f0957ccaSPeter Wemm 	case '(':
352f0957ccaSPeter Wemm 		(void)REQUIRE(MORE(), REG_EPAREN);
353f0957ccaSPeter Wemm 		p->g->nsub++;
354f0957ccaSPeter Wemm 		subno = p->g->nsub;
355f0957ccaSPeter Wemm 		if (subno < NPAREN)
356f0957ccaSPeter Wemm 			p->pbegin[subno] = HERE();
357f0957ccaSPeter Wemm 		EMIT(OLPAREN, subno);
358f0957ccaSPeter Wemm 		if (!SEE(')'))
359f0957ccaSPeter Wemm 			p_ere(p, ')', reclimit);
360f0957ccaSPeter Wemm 		if (subno < NPAREN) {
361f0957ccaSPeter Wemm 			p->pend[subno] = HERE();
362f0957ccaSPeter Wemm 			assert(p->pend[subno] != 0);
363f0957ccaSPeter Wemm 		}
364f0957ccaSPeter Wemm 		EMIT(ORPAREN, subno);
365f0957ccaSPeter Wemm 		(void)MUSTEAT(')', REG_EPAREN);
366f0957ccaSPeter Wemm 		break;
367f0957ccaSPeter Wemm #ifndef POSIX_MISTAKE
368f0957ccaSPeter Wemm 	case ')':		/* happens only if no current unmatched ( */
369f0957ccaSPeter Wemm 		/*
370f0957ccaSPeter Wemm 		 * You may ask, why the ifndef?  Because I didn't notice
371f0957ccaSPeter Wemm 		 * this until slightly too late for 1003.2, and none of the
372f0957ccaSPeter Wemm 		 * other 1003.2 regular-expression reviewers noticed it at
373f0957ccaSPeter Wemm 		 * all.  So an unmatched ) is legal POSIX, at least until
374f0957ccaSPeter Wemm 		 * we can get it fixed.
375f0957ccaSPeter Wemm 		 */
376f0957ccaSPeter Wemm 		SETERROR(REG_EPAREN);
377f0957ccaSPeter Wemm 		break;
378f0957ccaSPeter Wemm #endif
379f0957ccaSPeter Wemm 	case '^':
380f0957ccaSPeter Wemm 		EMIT(OBOL, 0);
381f0957ccaSPeter Wemm 		p->g->iflags |= USEBOL;
382f0957ccaSPeter Wemm 		p->g->nbol++;
383f0957ccaSPeter Wemm 		wascaret = 1;
384f0957ccaSPeter Wemm 		break;
385f0957ccaSPeter Wemm 	case '$':
386f0957ccaSPeter Wemm 		EMIT(OEOL, 0);
387f0957ccaSPeter Wemm 		p->g->iflags |= USEEOL;
388f0957ccaSPeter Wemm 		p->g->neol++;
389f0957ccaSPeter Wemm 		break;
390f0957ccaSPeter Wemm 	case '|':
391f0957ccaSPeter Wemm 		SETERROR(REG_EMPTY);
392f0957ccaSPeter Wemm 		break;
393f0957ccaSPeter Wemm 	case '*':
394f0957ccaSPeter Wemm 	case '+':
395f0957ccaSPeter Wemm 	case '?':
396f0957ccaSPeter Wemm 		SETERROR(REG_BADRPT);
397f0957ccaSPeter Wemm 		break;
398f0957ccaSPeter Wemm 	case '.':
399f0957ccaSPeter Wemm 		if (p->g->cflags&REG_NEWLINE)
400f0957ccaSPeter Wemm 			nonnewline(p);
401f0957ccaSPeter Wemm 		else
402f0957ccaSPeter Wemm 			EMIT(OANY, 0);
403f0957ccaSPeter Wemm 		break;
404f0957ccaSPeter Wemm 	case '[':
405f0957ccaSPeter Wemm 		p_bracket(p);
406f0957ccaSPeter Wemm 		break;
407f0957ccaSPeter Wemm 	case '\\':
408f0957ccaSPeter Wemm 		(void)REQUIRE(MORE(), REG_EESCAPE);
409f0957ccaSPeter Wemm 		c = GETNEXT();
410f0957ccaSPeter Wemm 		ordinary(p, c);
411f0957ccaSPeter Wemm 		break;
412f0957ccaSPeter Wemm 	case '{':		/* okay as ordinary except if digit follows */
413f0957ccaSPeter Wemm 		(void)REQUIRE(!MORE() || !ISDIGIT((UCHAR_T)PEEK()), REG_BADRPT);
414f0957ccaSPeter Wemm 		/* FALLTHROUGH */
415f0957ccaSPeter Wemm 	default:
416f0957ccaSPeter Wemm 		ordinary(p, c);
417f0957ccaSPeter Wemm 		break;
418f0957ccaSPeter Wemm 	}
419f0957ccaSPeter Wemm 
420f0957ccaSPeter Wemm 	if (!MORE())
421f0957ccaSPeter Wemm 		return;
422f0957ccaSPeter Wemm 	c = PEEK();
423f0957ccaSPeter Wemm 	/* we call { a repetition if followed by a digit */
424f0957ccaSPeter Wemm 	if (!( c == '*' || c == '+' || c == '?' ||
425f0957ccaSPeter Wemm 				(c == '{' && MORE2() && ISDIGIT((UCHAR_T)PEEK2())) ))
426f0957ccaSPeter Wemm 		return;		/* no repetition, we're done */
427f0957ccaSPeter Wemm 	NEXT();
428f0957ccaSPeter Wemm 
429f0957ccaSPeter Wemm 	(void)REQUIRE(!wascaret, REG_BADRPT);
430f0957ccaSPeter Wemm 	switch (c) {
431f0957ccaSPeter Wemm 	case '*':	/* implemented as +? */
432f0957ccaSPeter Wemm 		/* this case does not require the (y|) trick, noKLUDGE */
433f0957ccaSPeter Wemm 		INSERT(OPLUS_, pos);
434f0957ccaSPeter Wemm 		ASTERN(O_PLUS, pos);
435f0957ccaSPeter Wemm 		INSERT(OQUEST_, pos);
436f0957ccaSPeter Wemm 		ASTERN(O_QUEST, pos);
437f0957ccaSPeter Wemm 		break;
438f0957ccaSPeter Wemm 	case '+':
439f0957ccaSPeter Wemm 		INSERT(OPLUS_, pos);
440f0957ccaSPeter Wemm 		ASTERN(O_PLUS, pos);
441f0957ccaSPeter Wemm 		break;
442f0957ccaSPeter Wemm 	case '?':
443f0957ccaSPeter Wemm 		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
444f0957ccaSPeter Wemm 		INSERT(OCH_, pos);		/* offset slightly wrong */
445f0957ccaSPeter Wemm 		ASTERN(OOR1, pos);		/* this one's right */
446f0957ccaSPeter Wemm 		AHEAD(pos);			/* fix the OCH_ */
447f0957ccaSPeter Wemm 		EMIT(OOR2, 0);			/* offset very wrong... */
448f0957ccaSPeter Wemm 		AHEAD(THERE());			/* ...so fix it */
449f0957ccaSPeter Wemm 		ASTERN(O_CH, THERETHERE());
450f0957ccaSPeter Wemm 		break;
451f0957ccaSPeter Wemm 	case '{':
452f0957ccaSPeter Wemm 		count = p_count(p);
453f0957ccaSPeter Wemm 		if (EAT(',')) {
454f0957ccaSPeter Wemm 			if (ISDIGIT((UCHAR_T)PEEK())) {
455f0957ccaSPeter Wemm 				count2 = p_count(p);
456f0957ccaSPeter Wemm 				(void)REQUIRE(count <= count2, REG_BADBR);
457f0957ccaSPeter Wemm 			} else		/* single number with comma */
458f0957ccaSPeter Wemm 				count2 = INFINITY;
459f0957ccaSPeter Wemm 		} else		/* just a single number */
460f0957ccaSPeter Wemm 			count2 = count;
461f0957ccaSPeter Wemm 		repeat(p, pos, count, count2, 0);
462f0957ccaSPeter Wemm 		if (!EAT('}')) {	/* error heuristics */
463f0957ccaSPeter Wemm 			while (MORE() && PEEK() != '}')
464f0957ccaSPeter Wemm 				NEXT();
465f0957ccaSPeter Wemm 			(void)REQUIRE(MORE(), REG_EBRACE);
466f0957ccaSPeter Wemm 			SETERROR(REG_BADBR);
467f0957ccaSPeter Wemm 		}
468f0957ccaSPeter Wemm 		break;
469f0957ccaSPeter Wemm 	}
470f0957ccaSPeter Wemm 
471f0957ccaSPeter Wemm 	if (!MORE())
472f0957ccaSPeter Wemm 		return;
473f0957ccaSPeter Wemm 	c = PEEK();
474f0957ccaSPeter Wemm 	if (!( c == '*' || c == '+' || c == '?' ||
475f0957ccaSPeter Wemm 				(c == '{' && MORE2() && ISDIGIT((UCHAR_T)PEEK2())) ) )
476f0957ccaSPeter Wemm 		return;
477f0957ccaSPeter Wemm 	SETERROR(REG_BADRPT);
478f0957ccaSPeter Wemm }
479f0957ccaSPeter Wemm 
480f0957ccaSPeter Wemm /*
481f0957ccaSPeter Wemm  - p_str - string (no metacharacters) "parser"
482f0957ccaSPeter Wemm  */
483f0957ccaSPeter Wemm static void
p_str(struct parse * p)484*c271fa92SBaptiste Daroussin p_str(struct parse *p)
485f0957ccaSPeter Wemm {
486f0957ccaSPeter Wemm 	(void)REQUIRE(MORE(), REG_EMPTY);
487f0957ccaSPeter Wemm 	while (MORE())
488f0957ccaSPeter Wemm 		ordinary(p, GETNEXT());
489f0957ccaSPeter Wemm }
490f0957ccaSPeter Wemm 
491f0957ccaSPeter Wemm /*
492f0957ccaSPeter Wemm  - p_bre - BRE parser top level, anchoring and concatenation
493f0957ccaSPeter Wemm  * Giving end1 as OUT essentially eliminates the end1/end2 check.
494f0957ccaSPeter Wemm  *
495f0957ccaSPeter Wemm  * This implementation is a bit of a kludge, in that a trailing $ is first
496f0957ccaSPeter Wemm  * taken as an ordinary character and then revised to be an anchor.  The
497f0957ccaSPeter Wemm  * only undesirable side effect is that '$' gets included as a character
498f0957ccaSPeter Wemm  * category in such cases.  This is fairly harmless; not worth fixing.
499f0957ccaSPeter Wemm  * The amount of lookahead needed to avoid this kludge is excessive.
500f0957ccaSPeter Wemm  */
501f0957ccaSPeter Wemm static void
p_bre(struct parse * p,int end1,int end2,size_t reclimit)502*c271fa92SBaptiste Daroussin p_bre(struct parse *p,
503*c271fa92SBaptiste Daroussin     int end1,		/* first terminating character */
504*c271fa92SBaptiste Daroussin     int end2,		/* second terminating character */
505*c271fa92SBaptiste Daroussin     size_t reclimit)
506f0957ccaSPeter Wemm {
507*c271fa92SBaptiste Daroussin 	sopno start;
508*c271fa92SBaptiste Daroussin 	int first = 1;			/* first subexpression? */
509*c271fa92SBaptiste Daroussin 	int wasdollar = 0;
510f0957ccaSPeter Wemm 
511f0957ccaSPeter Wemm 	if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) {
512f0957ccaSPeter Wemm 		p->error = REG_ESPACE;
513f0957ccaSPeter Wemm 		return;
514f0957ccaSPeter Wemm 	}
515f0957ccaSPeter Wemm 
516f0957ccaSPeter Wemm 	start = HERE();
517f0957ccaSPeter Wemm 
518f0957ccaSPeter Wemm 	if (EAT('^')) {
519f0957ccaSPeter Wemm 		EMIT(OBOL, 0);
520f0957ccaSPeter Wemm 		p->g->iflags |= USEBOL;
521f0957ccaSPeter Wemm 		p->g->nbol++;
522f0957ccaSPeter Wemm 	}
523f0957ccaSPeter Wemm 	while (MORE() && !SEETWO(end1, end2)) {
524f0957ccaSPeter Wemm 		wasdollar = p_simp_re(p, first, reclimit);
525f0957ccaSPeter Wemm 		first = 0;
526f0957ccaSPeter Wemm 	}
527f0957ccaSPeter Wemm 	if (wasdollar) {	/* oops, that was a trailing anchor */
528f0957ccaSPeter Wemm 		DROP(1);
529f0957ccaSPeter Wemm 		EMIT(OEOL, 0);
530f0957ccaSPeter Wemm 		p->g->iflags |= USEEOL;
531f0957ccaSPeter Wemm 		p->g->neol++;
532f0957ccaSPeter Wemm 	}
533f0957ccaSPeter Wemm 
534f0957ccaSPeter Wemm 	(void)REQUIRE(HERE() != start, REG_EMPTY);	/* require nonempty */
535f0957ccaSPeter Wemm }
536f0957ccaSPeter Wemm 
537f0957ccaSPeter Wemm /*
538f0957ccaSPeter Wemm  - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
539f0957ccaSPeter Wemm  */
540f0957ccaSPeter Wemm static int			/* was the simple RE an unbackslashed $? */
p_simp_re(struct parse * p,int starordinary,size_t reclimit)541*c271fa92SBaptiste Daroussin p_simp_re(struct parse *p,
542*c271fa92SBaptiste Daroussin     int starordinary,		/* is a leading * an ordinary character? */
543*c271fa92SBaptiste Daroussin     size_t reclimit)
544f0957ccaSPeter Wemm {
545*c271fa92SBaptiste Daroussin 	int c;
546*c271fa92SBaptiste Daroussin 	int count;
547*c271fa92SBaptiste Daroussin 	int count2;
548*c271fa92SBaptiste Daroussin 	sopno pos;
549*c271fa92SBaptiste Daroussin 	int i;
550*c271fa92SBaptiste Daroussin 	sopno subno;
551f0957ccaSPeter Wemm 	int backsl;
552f0957ccaSPeter Wemm 
553f0957ccaSPeter Wemm 	pos = HERE();		/* repetion op, if any, covers from here */
554f0957ccaSPeter Wemm 
555f0957ccaSPeter Wemm 	assert(MORE());		/* caller should have ensured this */
556f0957ccaSPeter Wemm 	c = GETNEXT();
557f0957ccaSPeter Wemm 	backsl = c == '\\';
558f0957ccaSPeter Wemm 	if (backsl) {
559f0957ccaSPeter Wemm 		(void)REQUIRE(MORE(), REG_EESCAPE);
560f0957ccaSPeter Wemm 		c = (unsigned char)GETNEXT();
561f0957ccaSPeter Wemm 		switch (c) {
562f0957ccaSPeter Wemm 		case '{':
563f0957ccaSPeter Wemm 			SETERROR(REG_BADRPT);
564f0957ccaSPeter Wemm 			break;
565f0957ccaSPeter Wemm 		case '(':
566f0957ccaSPeter Wemm 			p->g->nsub++;
567f0957ccaSPeter Wemm 			subno = p->g->nsub;
568f0957ccaSPeter Wemm 			if (subno < NPAREN)
569f0957ccaSPeter Wemm 				p->pbegin[subno] = HERE();
570f0957ccaSPeter Wemm 			EMIT(OLPAREN, subno);
571f0957ccaSPeter Wemm 			/* the MORE here is an error heuristic */
572f0957ccaSPeter Wemm 			if (MORE() && !SEETWO('\\', ')'))
573f0957ccaSPeter Wemm 				p_bre(p, '\\', ')', reclimit);
574f0957ccaSPeter Wemm 			if (subno < NPAREN) {
575f0957ccaSPeter Wemm 				p->pend[subno] = HERE();
576f0957ccaSPeter Wemm 				assert(p->pend[subno] != 0);
577f0957ccaSPeter Wemm 			}
578f0957ccaSPeter Wemm 			EMIT(ORPAREN, subno);
579f0957ccaSPeter Wemm 			(void)REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
580f0957ccaSPeter Wemm 			break;
581f0957ccaSPeter Wemm 		case ')':	/* should not get here -- must be user */
582f0957ccaSPeter Wemm 		case '}':
583f0957ccaSPeter Wemm 			SETERROR(REG_EPAREN);
584f0957ccaSPeter Wemm 			break;
585f0957ccaSPeter Wemm 		case '1':
586f0957ccaSPeter Wemm 		case '2':
587f0957ccaSPeter Wemm 		case '3':
588f0957ccaSPeter Wemm 		case '4':
589f0957ccaSPeter Wemm 		case '5':
590f0957ccaSPeter Wemm 		case '6':
591f0957ccaSPeter Wemm 		case '7':
592f0957ccaSPeter Wemm 		case '8':
593f0957ccaSPeter Wemm 		case '9':
594f0957ccaSPeter Wemm 			i = c - '0';
595f0957ccaSPeter Wemm 			assert(i < NPAREN);
596f0957ccaSPeter Wemm 			if (p->pend[i] != 0) {
597f0957ccaSPeter Wemm 				assert(i <= p->g->nsub);
598f0957ccaSPeter Wemm 				EMIT(OBACK_, i);
599f0957ccaSPeter Wemm 				assert(p->pbegin[i] != 0);
600f0957ccaSPeter Wemm 				assert(p->strip[p->pbegin[i]] == OLPAREN);
601f0957ccaSPeter Wemm 				assert(p->strip[p->pend[i]] == ORPAREN);
602f0957ccaSPeter Wemm 				(void) dupl(p, p->pbegin[i]+1, p->pend[i]);
603f0957ccaSPeter Wemm 				EMIT(O_BACK, i);
604f0957ccaSPeter Wemm 			} else
605f0957ccaSPeter Wemm 				SETERROR(REG_ESUBREG);
606f0957ccaSPeter Wemm 			p->g->backrefs = 1;
607f0957ccaSPeter Wemm 			break;
608f0957ccaSPeter Wemm 		default:
609f0957ccaSPeter Wemm 			ordinary(p, c);
610f0957ccaSPeter Wemm 			break;
611f0957ccaSPeter Wemm 		}
612f0957ccaSPeter Wemm 	} else {
613f0957ccaSPeter Wemm 		switch (c) {
614f0957ccaSPeter Wemm 		case '.':
615f0957ccaSPeter Wemm 			if (p->g->cflags&REG_NEWLINE)
616f0957ccaSPeter Wemm 				nonnewline(p);
617f0957ccaSPeter Wemm 			else
618f0957ccaSPeter Wemm 				EMIT(OANY, 0);
619f0957ccaSPeter Wemm 			break;
620f0957ccaSPeter Wemm 		case '[':
621f0957ccaSPeter Wemm 			p_bracket(p);
622f0957ccaSPeter Wemm 			break;
623f0957ccaSPeter Wemm 		case '*':
624f0957ccaSPeter Wemm 			(void)REQUIRE(starordinary, REG_BADRPT);
625f0957ccaSPeter Wemm 			/* FALLTHROUGH */
626f0957ccaSPeter Wemm 		default:
627f0957ccaSPeter Wemm 			ordinary(p, c);
628f0957ccaSPeter Wemm 			break;
629f0957ccaSPeter Wemm 		}
630f0957ccaSPeter Wemm 	}
631f0957ccaSPeter Wemm 
632f0957ccaSPeter Wemm 	if (EAT('*')) {		/* implemented as +? */
633f0957ccaSPeter Wemm 		/* this case does not require the (y|) trick, noKLUDGE */
634f0957ccaSPeter Wemm 		INSERT(OPLUS_, pos);
635f0957ccaSPeter Wemm 		ASTERN(O_PLUS, pos);
636f0957ccaSPeter Wemm 		INSERT(OQUEST_, pos);
637f0957ccaSPeter Wemm 		ASTERN(O_QUEST, pos);
638f0957ccaSPeter Wemm 	} else if (EATTWO('\\', '{')) {
639f0957ccaSPeter Wemm 		count = p_count(p);
640f0957ccaSPeter Wemm 		if (EAT(',')) {
641f0957ccaSPeter Wemm 			if (MORE() && ISDIGIT((UCHAR_T)PEEK())) {
642f0957ccaSPeter Wemm 				count2 = p_count(p);
643f0957ccaSPeter Wemm 				(void)REQUIRE(count <= count2, REG_BADBR);
644f0957ccaSPeter Wemm 			} else		/* single number with comma */
645f0957ccaSPeter Wemm 				count2 = INFINITY;
646f0957ccaSPeter Wemm 		} else		/* just a single number */
647f0957ccaSPeter Wemm 			count2 = count;
648f0957ccaSPeter Wemm 		repeat(p, pos, count, count2, reclimit);
649f0957ccaSPeter Wemm 		if (!EATTWO('\\', '}')) {	/* error heuristics */
650f0957ccaSPeter Wemm 			while (MORE() && !SEETWO('\\', '}'))
651f0957ccaSPeter Wemm 				NEXT();
652f0957ccaSPeter Wemm 			(void)REQUIRE(MORE(), REG_EBRACE);
653f0957ccaSPeter Wemm 			SETERROR(REG_BADBR);
654f0957ccaSPeter Wemm 		}
655f0957ccaSPeter Wemm 	} else if (!backsl && c == (unsigned char)'$')	/* $ (but not \$) ends it */
656f0957ccaSPeter Wemm 		return(1);
657f0957ccaSPeter Wemm 
658f0957ccaSPeter Wemm 	return(0);
659f0957ccaSPeter Wemm }
660f0957ccaSPeter Wemm 
661f0957ccaSPeter Wemm /*
662f0957ccaSPeter Wemm  - p_count - parse a repetition count
663f0957ccaSPeter Wemm  */
664f0957ccaSPeter Wemm static int			/* the value */
p_count(struct parse * p)665*c271fa92SBaptiste Daroussin p_count(struct parse *p)
666f0957ccaSPeter Wemm {
667*c271fa92SBaptiste Daroussin 	int count = 0;
668*c271fa92SBaptiste Daroussin 	int ndigits = 0;
669f0957ccaSPeter Wemm 
670f0957ccaSPeter Wemm 	while (MORE() && ISDIGIT((UCHAR_T)PEEK()) && count <= DUPMAX) {
671f0957ccaSPeter Wemm 		count = count*10 + (GETNEXT() - '0');
672f0957ccaSPeter Wemm 		ndigits++;
673f0957ccaSPeter Wemm 	}
674f0957ccaSPeter Wemm 
675f0957ccaSPeter Wemm 	(void)REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
676f0957ccaSPeter Wemm 	return(count);
677f0957ccaSPeter Wemm }
678f0957ccaSPeter Wemm 
679f0957ccaSPeter Wemm /*
680f0957ccaSPeter Wemm  - p_bracket - parse a bracketed character list
681f0957ccaSPeter Wemm  *
682f0957ccaSPeter Wemm  * Note a significant property of this code:  if the allocset() did SETERROR,
683f0957ccaSPeter Wemm  * no set operations are done.
684f0957ccaSPeter Wemm  */
685f0957ccaSPeter Wemm static void
p_bracket(struct parse * p)686*c271fa92SBaptiste Daroussin p_bracket(struct parse *p)
687f0957ccaSPeter Wemm {
688*c271fa92SBaptiste Daroussin 	cset *cs;
689*c271fa92SBaptiste Daroussin 	int invert = 0;
690f0957ccaSPeter Wemm 	static RCHAR_T bow[] = { '[', ':', '<', ':', ']', ']' };
691f0957ccaSPeter Wemm 	static RCHAR_T eow[] = { '[', ':', '>', ':', ']', ']' };
692f0957ccaSPeter Wemm 
693f0957ccaSPeter Wemm 	cs = allocset(p);
694f0957ccaSPeter Wemm 	if (cs == NULL)
695f0957ccaSPeter Wemm 		return;
696f0957ccaSPeter Wemm 
697f0957ccaSPeter Wemm 	/* Dept of Truly Sickening Special-Case Kludges */
698f0957ccaSPeter Wemm 	if (p->next + 5 < p->end && MEMCMP(p->next, bow, 6) == 0) {
699f0957ccaSPeter Wemm 		EMIT(OBOW, 0);
700f0957ccaSPeter Wemm 		NEXTn(6);
701f0957ccaSPeter Wemm 		return;
702f0957ccaSPeter Wemm 	}
703f0957ccaSPeter Wemm 	if (p->next + 5 < p->end && MEMCMP(p->next, eow, 6) == 0) {
704f0957ccaSPeter Wemm 		EMIT(OEOW, 0);
705f0957ccaSPeter Wemm 		NEXTn(6);
706f0957ccaSPeter Wemm 		return;
707f0957ccaSPeter Wemm 	}
708f0957ccaSPeter Wemm 
709f0957ccaSPeter Wemm 	if (EAT('^'))
710f0957ccaSPeter Wemm 		invert++;	/* make note to invert set at end */
711f0957ccaSPeter Wemm 	if (EAT(']'))
712f0957ccaSPeter Wemm 		CHadd(cs, ']');
713f0957ccaSPeter Wemm 	else if (EAT('-'))
714f0957ccaSPeter Wemm 		CHadd(cs, '-');
715f0957ccaSPeter Wemm 	while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
716f0957ccaSPeter Wemm 		p_b_term(p, cs);
717f0957ccaSPeter Wemm 	if (EAT('-'))
718f0957ccaSPeter Wemm 		CHadd(cs, '-');
719f0957ccaSPeter Wemm 	(void)MUSTEAT(']', REG_EBRACK);
720f0957ccaSPeter Wemm 
721f0957ccaSPeter Wemm 	if (p->error != 0)	/* don't mess things up further */
722f0957ccaSPeter Wemm 		return;
723f0957ccaSPeter Wemm 
724f0957ccaSPeter Wemm 	if (p->g->cflags&REG_ICASE) {
725*c271fa92SBaptiste Daroussin 		int i;
726*c271fa92SBaptiste Daroussin 		int ci;
727f0957ccaSPeter Wemm 
728f0957ccaSPeter Wemm 		for (i = p->g->csetsize - 1; i >= 0; i--)
729f0957ccaSPeter Wemm 			if (CHIN(cs, i) && isalpha(i)) {
730f0957ccaSPeter Wemm 				ci = othercase(i);
731f0957ccaSPeter Wemm 				if (ci != i)
732f0957ccaSPeter Wemm 					CHadd(cs, ci);
733f0957ccaSPeter Wemm 			}
734f0957ccaSPeter Wemm 		if (cs->multis != NULL)
735f0957ccaSPeter Wemm 			mccase(p, cs);
736f0957ccaSPeter Wemm 	}
737f0957ccaSPeter Wemm 	if (invert) {
738*c271fa92SBaptiste Daroussin 		int i;
739f0957ccaSPeter Wemm 
740f0957ccaSPeter Wemm 		for (i = p->g->csetsize - 1; i >= 0; i--)
741f0957ccaSPeter Wemm 			if (CHIN(cs, i))
742f0957ccaSPeter Wemm 				CHsub(cs, i);
743f0957ccaSPeter Wemm 			else
744f0957ccaSPeter Wemm 				CHadd(cs, i);
745f0957ccaSPeter Wemm 		if (p->g->cflags&REG_NEWLINE)
746f0957ccaSPeter Wemm 			CHsub(cs, '\n');
747f0957ccaSPeter Wemm 		if (cs->multis != NULL)
748f0957ccaSPeter Wemm 			mcinvert(p, cs);
749f0957ccaSPeter Wemm 	}
750f0957ccaSPeter Wemm 
751f0957ccaSPeter Wemm 	assert(cs->multis == NULL);		/* xxx */
752f0957ccaSPeter Wemm 
753f0957ccaSPeter Wemm 	if (nch(p, cs) == 1) {		/* optimize singleton sets */
754f0957ccaSPeter Wemm 		ordinary(p, firstch(p, cs));
755f0957ccaSPeter Wemm 		freeset(p, cs);
756f0957ccaSPeter Wemm 	} else
757f0957ccaSPeter Wemm 		EMIT(OANYOF, freezeset(p, cs));
758f0957ccaSPeter Wemm }
759f0957ccaSPeter Wemm 
760f0957ccaSPeter Wemm /*
761f0957ccaSPeter Wemm  - p_b_term - parse one term of a bracketed character list
762f0957ccaSPeter Wemm  */
763f0957ccaSPeter Wemm static void
p_b_term(struct parse * p,cset * cs)764*c271fa92SBaptiste Daroussin p_b_term(struct parse *p, cset *cs)
765f0957ccaSPeter Wemm {
766*c271fa92SBaptiste Daroussin 	char c;
767*c271fa92SBaptiste Daroussin 	char start, finish;
768*c271fa92SBaptiste Daroussin 	int i;
769f0957ccaSPeter Wemm 
770f0957ccaSPeter Wemm 	/* classify what we've got */
771f0957ccaSPeter Wemm 	switch ((MORE()) ? PEEK() : '\0') {
772f0957ccaSPeter Wemm 	case '[':
773f0957ccaSPeter Wemm 		c = (MORE2()) ? PEEK2() : '\0';
774f0957ccaSPeter Wemm 		break;
775f0957ccaSPeter Wemm 	case '-':
776f0957ccaSPeter Wemm 		SETERROR(REG_ERANGE);
777f0957ccaSPeter Wemm 		return;			/* NOTE RETURN */
778f0957ccaSPeter Wemm 		break;
779f0957ccaSPeter Wemm 	default:
780f0957ccaSPeter Wemm 		c = '\0';
781f0957ccaSPeter Wemm 		break;
782f0957ccaSPeter Wemm 	}
783f0957ccaSPeter Wemm 
784f0957ccaSPeter Wemm 	switch (c) {
785f0957ccaSPeter Wemm 	case ':':		/* character class */
786f0957ccaSPeter Wemm 		NEXT2();
787f0957ccaSPeter Wemm 		(void)REQUIRE(MORE(), REG_EBRACK);
788f0957ccaSPeter Wemm 		c = PEEK();
789f0957ccaSPeter Wemm 		(void)REQUIRE(c != '-' && c != ']', REG_ECTYPE);
790f0957ccaSPeter Wemm 		p_b_cclass(p, cs);
791f0957ccaSPeter Wemm 		(void)REQUIRE(MORE(), REG_EBRACK);
792f0957ccaSPeter Wemm 		(void)REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
793f0957ccaSPeter Wemm 		break;
794f0957ccaSPeter Wemm 	case '=':		/* equivalence class */
795f0957ccaSPeter Wemm 		NEXT2();
796f0957ccaSPeter Wemm 		(void)REQUIRE(MORE(), REG_EBRACK);
797f0957ccaSPeter Wemm 		c = PEEK();
798f0957ccaSPeter Wemm 		(void)REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
799f0957ccaSPeter Wemm 		p_b_eclass(p, cs);
800f0957ccaSPeter Wemm 		(void)REQUIRE(MORE(), REG_EBRACK);
801f0957ccaSPeter Wemm 		(void)REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
802f0957ccaSPeter Wemm 		break;
803f0957ccaSPeter Wemm 	default:		/* symbol, ordinary character, or range */
804f0957ccaSPeter Wemm /* xxx revision needed for multichar stuff */
805f0957ccaSPeter Wemm 		start = p_b_symbol(p);
806f0957ccaSPeter Wemm 		if (SEE('-') && MORE2() && PEEK2() != ']') {
807f0957ccaSPeter Wemm 			/* range */
808f0957ccaSPeter Wemm 			NEXT();
809f0957ccaSPeter Wemm 			if (EAT('-'))
810f0957ccaSPeter Wemm 				finish = '-';
811f0957ccaSPeter Wemm 			else
812f0957ccaSPeter Wemm 				finish = p_b_symbol(p);
813f0957ccaSPeter Wemm 		} else
814f0957ccaSPeter Wemm 			finish = start;
815f0957ccaSPeter Wemm /* xxx what about signed chars here... */
816f0957ccaSPeter Wemm 		(void)REQUIRE(start <= finish, REG_ERANGE);
817f0957ccaSPeter Wemm 		for (i = start; i <= finish; i++)
818f0957ccaSPeter Wemm 			CHadd(cs, i);
819f0957ccaSPeter Wemm 		break;
820f0957ccaSPeter Wemm 	}
821f0957ccaSPeter Wemm }
822f0957ccaSPeter Wemm 
823f0957ccaSPeter Wemm /*
824f0957ccaSPeter Wemm  - p_b_cclass - parse a character-class name and deal with it
825f0957ccaSPeter Wemm  */
826f0957ccaSPeter Wemm static void
p_b_cclass(struct parse * p,cset * cs)827*c271fa92SBaptiste Daroussin p_b_cclass(struct parse *p, cset *cs)
828f0957ccaSPeter Wemm {
829*c271fa92SBaptiste Daroussin 	RCHAR_T *sp = p->next;
830*c271fa92SBaptiste Daroussin 	struct cclass *cp;
831*c271fa92SBaptiste Daroussin 	size_t len;
832*c271fa92SBaptiste Daroussin 	const char *u;
833*c271fa92SBaptiste Daroussin 	char c;
834f0957ccaSPeter Wemm 
835f0957ccaSPeter Wemm 	while (MORE() && isalpha(PEEK()))
836f0957ccaSPeter Wemm 		NEXT();
837f0957ccaSPeter Wemm 	len = p->next - sp;
838f0957ccaSPeter Wemm 	for (cp = cclasses; cp->name != NULL; cp++)
839f0957ccaSPeter Wemm 		if (STRLEN(cp->name) == len && !MEMCMP(cp->name, sp, len))
840f0957ccaSPeter Wemm 			break;
841f0957ccaSPeter Wemm 	if (cp->name == NULL) {
842f0957ccaSPeter Wemm 		/* oops, didn't find it */
843f0957ccaSPeter Wemm 		SETERROR(REG_ECTYPE);
844f0957ccaSPeter Wemm 		return;
845f0957ccaSPeter Wemm 	}
846f0957ccaSPeter Wemm 
847f0957ccaSPeter Wemm 	u = cp->chars;
848f0957ccaSPeter Wemm 	while ((c = *u++) != '\0')
849f0957ccaSPeter Wemm 		CHadd(cs, c);
850f0957ccaSPeter Wemm 	for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
851f0957ccaSPeter Wemm 		MCadd(p, cs, u);
852f0957ccaSPeter Wemm }
853f0957ccaSPeter Wemm 
854f0957ccaSPeter Wemm /*
855f0957ccaSPeter Wemm  - p_b_eclass - parse an equivalence-class name and deal with it
856f0957ccaSPeter Wemm  *
857f0957ccaSPeter Wemm  * This implementation is incomplete. xxx
858f0957ccaSPeter Wemm  */
859f0957ccaSPeter Wemm static void
p_b_eclass(struct parse * p,cset * cs)860*c271fa92SBaptiste Daroussin p_b_eclass(struct parse *p, cset *cs)
861f0957ccaSPeter Wemm {
862*c271fa92SBaptiste Daroussin 	char c;
863f0957ccaSPeter Wemm 
864f0957ccaSPeter Wemm 	c = p_b_coll_elem(p, '=');
865f0957ccaSPeter Wemm 	CHadd(cs, c);
866f0957ccaSPeter Wemm }
867f0957ccaSPeter Wemm 
868f0957ccaSPeter Wemm /*
869f0957ccaSPeter Wemm  - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
870f0957ccaSPeter Wemm  */
871f0957ccaSPeter Wemm static char			/* value of symbol */
p_b_symbol(struct parse * p)872*c271fa92SBaptiste Daroussin p_b_symbol(struct parse *p)
873f0957ccaSPeter Wemm {
874*c271fa92SBaptiste Daroussin 	char value;
875f0957ccaSPeter Wemm 
876f0957ccaSPeter Wemm 	(void)REQUIRE(MORE(), REG_EBRACK);
877f0957ccaSPeter Wemm 	if (!EATTWO('[', '.'))
878f0957ccaSPeter Wemm 		return(GETNEXT());
879f0957ccaSPeter Wemm 
880f0957ccaSPeter Wemm 	/* collating symbol */
881f0957ccaSPeter Wemm 	value = p_b_coll_elem(p, '.');
882f0957ccaSPeter Wemm 	(void)REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
883f0957ccaSPeter Wemm 	return(value);
884f0957ccaSPeter Wemm }
885f0957ccaSPeter Wemm 
886f0957ccaSPeter Wemm /*
887f0957ccaSPeter Wemm  - p_b_coll_elem - parse a collating-element name and look it up
888f0957ccaSPeter Wemm  */
889f0957ccaSPeter Wemm static char			/* value of collating element */
p_b_coll_elem(struct parse * p,int endc)890*c271fa92SBaptiste Daroussin p_b_coll_elem(struct parse *p, int endc)
891f0957ccaSPeter Wemm 
892f0957ccaSPeter Wemm          			/* name ended by endc,']' */
893f0957ccaSPeter Wemm {
894*c271fa92SBaptiste Daroussin 	RCHAR_T *sp = p->next;
895*c271fa92SBaptiste Daroussin 	struct cname *cp;
896*c271fa92SBaptiste Daroussin 	size_t len;
897f0957ccaSPeter Wemm 
898f0957ccaSPeter Wemm 	while (MORE() && !SEETWO(endc, ']'))
899f0957ccaSPeter Wemm 		NEXT();
900f0957ccaSPeter Wemm 	if (!MORE()) {
901f0957ccaSPeter Wemm 		SETERROR(REG_EBRACK);
902f0957ccaSPeter Wemm 		return(0);
903f0957ccaSPeter Wemm 	}
904f0957ccaSPeter Wemm 	len = p->next - sp;
905f0957ccaSPeter Wemm 	for (cp = cnames; cp->name != NULL; cp++)
906f0957ccaSPeter Wemm 		if (STRLEN(cp->name) == len && MEMCMP(cp->name, sp, len))
907f0957ccaSPeter Wemm 			return(cp->code);	/* known name */
908f0957ccaSPeter Wemm 	if (len == 1)
909f0957ccaSPeter Wemm 		return(*sp);	/* single character */
910f0957ccaSPeter Wemm 	SETERROR(REG_ECOLLATE);			/* neither */
911f0957ccaSPeter Wemm 	return(0);
912f0957ccaSPeter Wemm }
913f0957ccaSPeter Wemm 
914f0957ccaSPeter Wemm /*
915f0957ccaSPeter Wemm  - othercase - return the case counterpart of an alphabetic
916f0957ccaSPeter Wemm  */
917f0957ccaSPeter Wemm static char			/* if no counterpart, return ch */
othercase(int ch)918f0957ccaSPeter Wemm othercase(int ch)
919f0957ccaSPeter Wemm {
920f0957ccaSPeter Wemm 	assert(isalpha(ch));
921f0957ccaSPeter Wemm 	if (isupper(ch))
922f0957ccaSPeter Wemm 		return(tolower(ch));
923f0957ccaSPeter Wemm 	else if (islower(ch))
924f0957ccaSPeter Wemm 		return(toupper(ch));
925f0957ccaSPeter Wemm 	else			/* peculiar, but could happen */
926f0957ccaSPeter Wemm 		return(ch);
927f0957ccaSPeter Wemm }
928f0957ccaSPeter Wemm 
929f0957ccaSPeter Wemm /*
930f0957ccaSPeter Wemm  - bothcases - emit a dualcase version of a two-case character
931f0957ccaSPeter Wemm  *
932f0957ccaSPeter Wemm  * Boy, is this implementation ever a kludge...
933f0957ccaSPeter Wemm  */
934f0957ccaSPeter Wemm static void
bothcases(struct parse * p,int ch)935*c271fa92SBaptiste Daroussin bothcases(struct parse *p, int ch)
936f0957ccaSPeter Wemm {
937*c271fa92SBaptiste Daroussin 	RCHAR_T *oldnext = p->next;
938*c271fa92SBaptiste Daroussin 	RCHAR_T *oldend = p->end;
939f0957ccaSPeter Wemm 	RCHAR_T bracket[3];
940f0957ccaSPeter Wemm 
941f0957ccaSPeter Wemm 	assert(othercase(ch) != ch);	/* p_bracket() would recurse */
942f0957ccaSPeter Wemm 	p->next = bracket;
943f0957ccaSPeter Wemm 	p->end = bracket+2;
944f0957ccaSPeter Wemm 	bracket[0] = ch;
945f0957ccaSPeter Wemm 	bracket[1] = ']';
946f0957ccaSPeter Wemm 	bracket[2] = '\0';
947f0957ccaSPeter Wemm 	p_bracket(p);
948f0957ccaSPeter Wemm 	assert(p->next == bracket+2);
949f0957ccaSPeter Wemm 	p->next = oldnext;
950f0957ccaSPeter Wemm 	p->end = oldend;
951f0957ccaSPeter Wemm }
952f0957ccaSPeter Wemm 
953f0957ccaSPeter Wemm /*
954f0957ccaSPeter Wemm  - ordinary - emit an ordinary character
955f0957ccaSPeter Wemm  */
956f0957ccaSPeter Wemm static void
ordinary(struct parse * p,int ch)957*c271fa92SBaptiste Daroussin ordinary(struct parse *p, int ch)
958f0957ccaSPeter Wemm {
959f0957ccaSPeter Wemm /*
960*c271fa92SBaptiste Daroussin 	cat_t *cap = p->g->categories;
961f0957ccaSPeter Wemm */
962f0957ccaSPeter Wemm 
963f0957ccaSPeter Wemm 	if ((p->g->cflags&REG_ICASE) && isalpha(ch) && othercase(ch) != ch)
964f0957ccaSPeter Wemm 		bothcases(p, ch);
965f0957ccaSPeter Wemm 	else {
966f0957ccaSPeter Wemm 		EMIT(OCHAR, (UCHAR_T)ch);
967f0957ccaSPeter Wemm /*
968f0957ccaSPeter Wemm 		if (cap[ch] == 0)
969f0957ccaSPeter Wemm 			cap[ch] = p->g->ncategories++;
970f0957ccaSPeter Wemm */
971f0957ccaSPeter Wemm 	}
972f0957ccaSPeter Wemm }
973f0957ccaSPeter Wemm 
974f0957ccaSPeter Wemm /*
975f0957ccaSPeter Wemm  - nonnewline - emit REG_NEWLINE version of OANY
976f0957ccaSPeter Wemm  *
977f0957ccaSPeter Wemm  * Boy, is this implementation ever a kludge...
978f0957ccaSPeter Wemm  */
979f0957ccaSPeter Wemm static void
nonnewline(struct parse * p)980*c271fa92SBaptiste Daroussin nonnewline(struct parse *p)
981f0957ccaSPeter Wemm {
982*c271fa92SBaptiste Daroussin 	RCHAR_T *oldnext = p->next;
983*c271fa92SBaptiste Daroussin 	RCHAR_T *oldend = p->end;
984f0957ccaSPeter Wemm 	RCHAR_T bracket[4];
985f0957ccaSPeter Wemm 
986f0957ccaSPeter Wemm 	p->next = bracket;
987f0957ccaSPeter Wemm 	p->end = bracket+3;
988f0957ccaSPeter Wemm 	bracket[0] = '^';
989f0957ccaSPeter Wemm 	bracket[1] = '\n';
990f0957ccaSPeter Wemm 	bracket[2] = ']';
991f0957ccaSPeter Wemm 	bracket[3] = '\0';
992f0957ccaSPeter Wemm 	p_bracket(p);
993f0957ccaSPeter Wemm 	assert(p->next == bracket+3);
994f0957ccaSPeter Wemm 	p->next = oldnext;
995f0957ccaSPeter Wemm 	p->end = oldend;
996f0957ccaSPeter Wemm }
997f0957ccaSPeter Wemm 
998f0957ccaSPeter Wemm /*
999f0957ccaSPeter Wemm  - repeat - generate code for a bounded repetition, recursively if needed
1000f0957ccaSPeter Wemm  */
1001f0957ccaSPeter Wemm static void
repeat(struct parse * p,sopno start,int from,int to,size_t reclimit)1002*c271fa92SBaptiste Daroussin repeat(struct parse *p,
1003*c271fa92SBaptiste Daroussin     sopno start,		/* operand from here to end of strip */
1004*c271fa92SBaptiste Daroussin     int from,			/* repeated from this number */
1005*c271fa92SBaptiste Daroussin     int to,			/* to this number of times (maybe INFINITY) */
1006*c271fa92SBaptiste Daroussin     size_t reclimit)
1007f0957ccaSPeter Wemm {
1008*c271fa92SBaptiste Daroussin 	sopno finish;
1009f0957ccaSPeter Wemm #	define	N	2
1010f0957ccaSPeter Wemm #	define	INF	3
1011f0957ccaSPeter Wemm #	define	REP(f, t)	((f)*8 + (t))
1012f0957ccaSPeter Wemm #	define	MAP(n)	(((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
1013*c271fa92SBaptiste Daroussin 	sopno copy;
1014f0957ccaSPeter Wemm 
1015f0957ccaSPeter Wemm 	if (reclimit++ > RECLIMIT)
1016f0957ccaSPeter Wemm 		p->error = REG_ESPACE;
1017f0957ccaSPeter Wemm 	if (p->error)
1018f0957ccaSPeter Wemm 		return;
1019f0957ccaSPeter Wemm 
1020f0957ccaSPeter Wemm 	finish = HERE();
1021f0957ccaSPeter Wemm 
1022f0957ccaSPeter Wemm 	assert(from <= to);
1023f0957ccaSPeter Wemm 
1024f0957ccaSPeter Wemm 	switch (REP(MAP(from), MAP(to))) {
1025f0957ccaSPeter Wemm 	case REP(0, 0):			/* must be user doing this */
1026f0957ccaSPeter Wemm 		DROP(finish-start);	/* drop the operand */
1027f0957ccaSPeter Wemm 		break;
1028f0957ccaSPeter Wemm 	case REP(0, 1):			/* as x{1,1}? */
1029f0957ccaSPeter Wemm 	case REP(0, N):			/* as x{1,n}? */
1030f0957ccaSPeter Wemm 	case REP(0, INF):		/* as x{1,}? */
1031f0957ccaSPeter Wemm 		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
1032f0957ccaSPeter Wemm 		INSERT(OCH_, start);		/* offset is wrong... */
1033f0957ccaSPeter Wemm 		repeat(p, start+1, 1, to, reclimit);
1034f0957ccaSPeter Wemm 		ASTERN(OOR1, start);
1035f0957ccaSPeter Wemm 		AHEAD(start);			/* ... fix it */
1036f0957ccaSPeter Wemm 		EMIT(OOR2, 0);
1037f0957ccaSPeter Wemm 		AHEAD(THERE());
1038f0957ccaSPeter Wemm 		ASTERN(O_CH, THERETHERE());
1039f0957ccaSPeter Wemm 		break;
1040f0957ccaSPeter Wemm 	case REP(1, 1):			/* trivial case */
1041f0957ccaSPeter Wemm 		/* done */
1042f0957ccaSPeter Wemm 		break;
1043f0957ccaSPeter Wemm 	case REP(1, N):			/* as x?x{1,n-1} */
1044f0957ccaSPeter Wemm 		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
1045f0957ccaSPeter Wemm 		INSERT(OCH_, start);
1046f0957ccaSPeter Wemm 		ASTERN(OOR1, start);
1047f0957ccaSPeter Wemm 		AHEAD(start);
1048f0957ccaSPeter Wemm 		EMIT(OOR2, 0);			/* offset very wrong... */
1049f0957ccaSPeter Wemm 		AHEAD(THERE());			/* ...so fix it */
1050f0957ccaSPeter Wemm 		ASTERN(O_CH, THERETHERE());
1051f0957ccaSPeter Wemm 		copy = dupl(p, start+1, finish+1);
1052f0957ccaSPeter Wemm 		assert(copy == finish+4);
1053f0957ccaSPeter Wemm 		repeat(p, copy, 1, to-1, reclimit);
1054f0957ccaSPeter Wemm 		break;
1055f0957ccaSPeter Wemm 	case REP(1, INF):		/* as x+ */
1056f0957ccaSPeter Wemm 		INSERT(OPLUS_, start);
1057f0957ccaSPeter Wemm 		ASTERN(O_PLUS, start);
1058f0957ccaSPeter Wemm 		break;
1059f0957ccaSPeter Wemm 	case REP(N, N):			/* as xx{m-1,n-1} */
1060f0957ccaSPeter Wemm 		copy = dupl(p, start, finish);
1061f0957ccaSPeter Wemm 		repeat(p, copy, from-1, to-1, reclimit);
1062f0957ccaSPeter Wemm 		break;
1063f0957ccaSPeter Wemm 	case REP(N, INF):		/* as xx{n-1,INF} */
1064f0957ccaSPeter Wemm 		copy = dupl(p, start, finish);
1065f0957ccaSPeter Wemm 		repeat(p, copy, from-1, to, reclimit);
1066f0957ccaSPeter Wemm 		break;
1067f0957ccaSPeter Wemm 	default:			/* "can't happen" */
1068f0957ccaSPeter Wemm 		SETERROR(REG_ASSERT);	/* just in case */
1069f0957ccaSPeter Wemm 		break;
1070f0957ccaSPeter Wemm 	}
1071f0957ccaSPeter Wemm }
1072f0957ccaSPeter Wemm 
1073f0957ccaSPeter Wemm /*
1074f0957ccaSPeter Wemm  - seterr - set an error condition
1075f0957ccaSPeter Wemm  */
1076f0957ccaSPeter Wemm static int			/* useless but makes type checking happy */
seterr(struct parse * p,int e)1077*c271fa92SBaptiste Daroussin seterr(struct parse *p, int e)
1078f0957ccaSPeter Wemm {
1079f0957ccaSPeter Wemm 	if (p->error == 0)	/* keep earliest error condition */
1080f0957ccaSPeter Wemm 		p->error = e;
1081f0957ccaSPeter Wemm 	p->next = nuls;		/* try to bring things to a halt */
1082f0957ccaSPeter Wemm 	p->end = nuls;
1083f0957ccaSPeter Wemm 	return(0);		/* make the return value well-defined */
1084f0957ccaSPeter Wemm }
1085f0957ccaSPeter Wemm 
1086f0957ccaSPeter Wemm /*
1087f0957ccaSPeter Wemm  - allocset - allocate a set of characters for []
1088f0957ccaSPeter Wemm  */
1089f0957ccaSPeter Wemm static cset *
allocset(struct parse * p)1090*c271fa92SBaptiste Daroussin allocset(struct parse *p)
1091f0957ccaSPeter Wemm {
1092*c271fa92SBaptiste Daroussin 	int no = p->g->ncsets++;
1093*c271fa92SBaptiste Daroussin 	size_t nc;
1094*c271fa92SBaptiste Daroussin 	size_t nbytes;
1095*c271fa92SBaptiste Daroussin 	cset *cs;
1096*c271fa92SBaptiste Daroussin 	size_t css = (size_t)p->g->csetsize;
1097*c271fa92SBaptiste Daroussin 	int i;
1098f0957ccaSPeter Wemm 
1099f0957ccaSPeter Wemm 	if (no >= p->ncsalloc) {	/* need another column of space */
1100f0957ccaSPeter Wemm 		p->ncsalloc += CHAR_BIT;
1101f0957ccaSPeter Wemm 		nc = p->ncsalloc;
1102f0957ccaSPeter Wemm 		assert(nc % CHAR_BIT == 0);
1103f0957ccaSPeter Wemm 		nbytes = nc / CHAR_BIT * css;
1104f0957ccaSPeter Wemm 		if (MEMSIZE(p) > MEMLIMIT)
1105f0957ccaSPeter Wemm 			goto oomem;
1106f0957ccaSPeter Wemm 		if (p->g->sets == NULL)
1107f0957ccaSPeter Wemm 			p->g->sets = (cset *)malloc(nc * sizeof(cset));
1108f0957ccaSPeter Wemm 		else
1109f0957ccaSPeter Wemm 			p->g->sets = (cset *)realloc((char *)p->g->sets,
1110f0957ccaSPeter Wemm 							nc * sizeof(cset));
1111f0957ccaSPeter Wemm 		if (p->g->setbits == NULL)
1112f0957ccaSPeter Wemm 			p->g->setbits = (uch *)malloc(nbytes);
1113f0957ccaSPeter Wemm 		else {
1114f0957ccaSPeter Wemm 			p->g->setbits = (uch *)realloc((char *)p->g->setbits,
1115f0957ccaSPeter Wemm 								nbytes);
1116f0957ccaSPeter Wemm 			/* xxx this isn't right if setbits is now NULL */
1117f0957ccaSPeter Wemm 			for (i = 0; i < no; i++)
1118f0957ccaSPeter Wemm 				p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
1119f0957ccaSPeter Wemm 		}
1120f0957ccaSPeter Wemm 		if (p->g->sets != NULL && p->g->setbits != NULL)
1121*c271fa92SBaptiste Daroussin 			memset((char *)p->g->setbits + (nbytes - css),
1122f0957ccaSPeter Wemm 								0, css);
1123f0957ccaSPeter Wemm 		else {
1124f0957ccaSPeter Wemm oomem:
1125f0957ccaSPeter Wemm 			no = 0;
1126f0957ccaSPeter Wemm 			SETERROR(REG_ESPACE);
1127f0957ccaSPeter Wemm 			/* caller's responsibility not to do set ops */
1128f0957ccaSPeter Wemm 			return NULL;
1129f0957ccaSPeter Wemm 		}
1130f0957ccaSPeter Wemm 	}
1131f0957ccaSPeter Wemm 
1132f0957ccaSPeter Wemm 	cs = &p->g->sets[no];
1133f0957ccaSPeter Wemm 	cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
1134f0957ccaSPeter Wemm 	cs->mask = 1 << ((no) % CHAR_BIT);
1135f0957ccaSPeter Wemm 	cs->hash = 0;
1136f0957ccaSPeter Wemm 	cs->smultis = 0;
1137f0957ccaSPeter Wemm 	cs->multis = NULL;
1138f0957ccaSPeter Wemm 
1139f0957ccaSPeter Wemm 	return(cs);
1140f0957ccaSPeter Wemm }
1141f0957ccaSPeter Wemm 
1142f0957ccaSPeter Wemm /*
1143f0957ccaSPeter Wemm  - freeset - free a now-unused set
1144f0957ccaSPeter Wemm  */
1145f0957ccaSPeter Wemm static void
freeset(struct parse * p,cset * cs)1146*c271fa92SBaptiste Daroussin freeset(struct parse *p, cset *cs)
1147f0957ccaSPeter Wemm {
1148*c271fa92SBaptiste Daroussin 	size_t i;
1149*c271fa92SBaptiste Daroussin 	cset *top = &p->g->sets[p->g->ncsets];
1150*c271fa92SBaptiste Daroussin 	size_t css = (size_t)p->g->csetsize;
1151f0957ccaSPeter Wemm 
1152f0957ccaSPeter Wemm 	for (i = 0; i < css; i++)
1153f0957ccaSPeter Wemm 		CHsub(cs, i);
1154f0957ccaSPeter Wemm 	if (cs == top-1)	/* recover only the easy case */
1155f0957ccaSPeter Wemm 		p->g->ncsets--;
1156f0957ccaSPeter Wemm }
1157f0957ccaSPeter Wemm 
1158f0957ccaSPeter Wemm /*
1159f0957ccaSPeter Wemm  - freezeset - final processing on a set of characters
1160f0957ccaSPeter Wemm  *
1161f0957ccaSPeter Wemm  * The main task here is merging identical sets.  This is usually a waste
1162f0957ccaSPeter Wemm  * of time (although the hash code minimizes the overhead), but can win
1163f0957ccaSPeter Wemm  * big if REG_ICASE is being used.  REG_ICASE, by the way, is why the hash
1164f0957ccaSPeter Wemm  * is done using addition rather than xor -- all ASCII [aA] sets xor to
1165f0957ccaSPeter Wemm  * the same value!
1166f0957ccaSPeter Wemm  */
1167f0957ccaSPeter Wemm static int			/* set number */
freezeset(struct parse * p,cset * cs)1168*c271fa92SBaptiste Daroussin freezeset(struct parse *p, cset *cs)
1169f0957ccaSPeter Wemm {
1170*c271fa92SBaptiste Daroussin 	uch h = cs->hash;
1171*c271fa92SBaptiste Daroussin 	size_t i;
1172*c271fa92SBaptiste Daroussin 	cset *top = &p->g->sets[p->g->ncsets];
1173*c271fa92SBaptiste Daroussin 	cset *cs2;
1174*c271fa92SBaptiste Daroussin 	size_t css = (size_t)p->g->csetsize;
1175f0957ccaSPeter Wemm 
1176f0957ccaSPeter Wemm 	/* look for an earlier one which is the same */
1177f0957ccaSPeter Wemm 	for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
1178f0957ccaSPeter Wemm 		if (cs2->hash == h && cs2 != cs) {
1179f0957ccaSPeter Wemm 			/* maybe */
1180f0957ccaSPeter Wemm 			for (i = 0; i < css; i++)
1181f0957ccaSPeter Wemm 				if (!!CHIN(cs2, i) != !!CHIN(cs, i))
1182f0957ccaSPeter Wemm 					break;		/* no */
1183f0957ccaSPeter Wemm 			if (i == css)
1184f0957ccaSPeter Wemm 				break;			/* yes */
1185f0957ccaSPeter Wemm 		}
1186f0957ccaSPeter Wemm 
1187f0957ccaSPeter Wemm 	if (cs2 < top) {	/* found one */
1188f0957ccaSPeter Wemm 		freeset(p, cs);
1189f0957ccaSPeter Wemm 		cs = cs2;
1190f0957ccaSPeter Wemm 	}
1191f0957ccaSPeter Wemm 
1192f0957ccaSPeter Wemm 	return((int)(cs - p->g->sets));
1193f0957ccaSPeter Wemm }
1194f0957ccaSPeter Wemm 
1195f0957ccaSPeter Wemm /*
1196f0957ccaSPeter Wemm  - firstch - return first character in a set (which must have at least one)
1197f0957ccaSPeter Wemm  */
1198f0957ccaSPeter Wemm static int			/* character; there is no "none" value */
firstch(struct parse * p,cset * cs)1199*c271fa92SBaptiste Daroussin firstch(struct parse *p, cset *cs)
1200f0957ccaSPeter Wemm {
1201*c271fa92SBaptiste Daroussin 	size_t i;
1202*c271fa92SBaptiste Daroussin 	size_t css = (size_t)p->g->csetsize;
1203f0957ccaSPeter Wemm 
1204f0957ccaSPeter Wemm 	for (i = 0; i < css; i++)
1205f0957ccaSPeter Wemm 		if (CHIN(cs, i))
1206f0957ccaSPeter Wemm 			return((char)i);
1207f0957ccaSPeter Wemm 	assert(never);
1208f0957ccaSPeter Wemm 	return(0);		/* arbitrary */
1209f0957ccaSPeter Wemm }
1210f0957ccaSPeter Wemm 
1211f0957ccaSPeter Wemm /*
1212f0957ccaSPeter Wemm  - nch - number of characters in a set
1213f0957ccaSPeter Wemm  */
1214f0957ccaSPeter Wemm static int
nch(struct parse * p,cset * cs)1215*c271fa92SBaptiste Daroussin nch(struct parse *p, cset *cs)
1216f0957ccaSPeter Wemm {
1217*c271fa92SBaptiste Daroussin 	size_t i;
1218*c271fa92SBaptiste Daroussin 	size_t css = (size_t)p->g->csetsize;
1219*c271fa92SBaptiste Daroussin 	int n = 0;
1220f0957ccaSPeter Wemm 
1221f0957ccaSPeter Wemm 	for (i = 0; i < css; i++)
1222f0957ccaSPeter Wemm 		if (CHIN(cs, i))
1223f0957ccaSPeter Wemm 			n++;
1224f0957ccaSPeter Wemm 	return(n);
1225f0957ccaSPeter Wemm }
1226f0957ccaSPeter Wemm 
1227f0957ccaSPeter Wemm /*
1228f0957ccaSPeter Wemm  - mcadd - add a collating element to a cset
1229f0957ccaSPeter Wemm  */
1230f0957ccaSPeter Wemm static void
mcadd(struct parse * p,cset * cs,const char * cp)1231*c271fa92SBaptiste Daroussin mcadd(struct parse *p, cset *cs, const char *cp)
1232f0957ccaSPeter Wemm {
1233*c271fa92SBaptiste Daroussin 	size_t oldend = cs->smultis;
1234f0957ccaSPeter Wemm 	void *np;
1235f0957ccaSPeter Wemm 
1236f0957ccaSPeter Wemm 	cs->smultis += strlen(cp) + 1;
1237f0957ccaSPeter Wemm 	np = realloc(cs->multis, cs->smultis);
1238f0957ccaSPeter Wemm 	if (np == NULL) {
1239f0957ccaSPeter Wemm 		if (cs->multis)
1240f0957ccaSPeter Wemm 			free(cs->multis);
1241f0957ccaSPeter Wemm 		cs->multis = NULL;
1242f0957ccaSPeter Wemm 		SETERROR(REG_ESPACE);
1243f0957ccaSPeter Wemm 		return;
1244f0957ccaSPeter Wemm 	}
1245f0957ccaSPeter Wemm 	cs->multis = np;
1246f0957ccaSPeter Wemm 
1247*c271fa92SBaptiste Daroussin 	strlcpy(cs->multis + oldend - 1, cp, cs->smultis - oldend + 1);
1248f0957ccaSPeter Wemm }
1249f0957ccaSPeter Wemm 
1250f0957ccaSPeter Wemm /*
1251f0957ccaSPeter Wemm  - mcinvert - invert the list of collating elements in a cset
1252f0957ccaSPeter Wemm  *
1253f0957ccaSPeter Wemm  * This would have to know the set of possibilities.  Implementation
1254f0957ccaSPeter Wemm  * is deferred.
1255f0957ccaSPeter Wemm  */
1256f0957ccaSPeter Wemm static void
mcinvert(struct parse * p,cset * cs)1257*c271fa92SBaptiste Daroussin mcinvert(struct parse *p, cset *cs)
1258f0957ccaSPeter Wemm {
1259f0957ccaSPeter Wemm 	assert(cs->multis == NULL);	/* xxx */
1260f0957ccaSPeter Wemm }
1261f0957ccaSPeter Wemm 
1262f0957ccaSPeter Wemm /*
1263f0957ccaSPeter Wemm  - mccase - add case counterparts of the list of collating elements in a cset
1264f0957ccaSPeter Wemm  *
1265f0957ccaSPeter Wemm  * This would have to know the set of possibilities.  Implementation
1266f0957ccaSPeter Wemm  * is deferred.
1267f0957ccaSPeter Wemm  */
1268f0957ccaSPeter Wemm static void
mccase(struct parse * p,cset * cs)1269*c271fa92SBaptiste Daroussin mccase(struct parse *p, cset *cs)
1270f0957ccaSPeter Wemm {
1271f0957ccaSPeter Wemm 	assert(cs->multis == NULL);	/* xxx */
1272f0957ccaSPeter Wemm }
1273f0957ccaSPeter Wemm 
1274f0957ccaSPeter Wemm #ifdef notdef
1275f0957ccaSPeter Wemm /*
1276f0957ccaSPeter Wemm  - isinsets - is this character in any sets?
1277f0957ccaSPeter Wemm  */
1278f0957ccaSPeter Wemm static int			/* predicate */
isinsets(struct re_guts * g,int c)1279*c271fa92SBaptiste Daroussin isinsets(struct re_guts *g, int c)
1280f0957ccaSPeter Wemm {
1281*c271fa92SBaptiste Daroussin 	uch *col;
1282*c271fa92SBaptiste Daroussin 	int i;
1283*c271fa92SBaptiste Daroussin 	int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
1284*c271fa92SBaptiste Daroussin 	unsigned uc = (unsigned char)c;
1285f0957ccaSPeter Wemm 
1286f0957ccaSPeter Wemm 	for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
1287f0957ccaSPeter Wemm 		if (col[uc] != 0)
1288f0957ccaSPeter Wemm 			return(1);
1289f0957ccaSPeter Wemm 	return(0);
1290f0957ccaSPeter Wemm }
1291f0957ccaSPeter Wemm 
1292f0957ccaSPeter Wemm /*
1293f0957ccaSPeter Wemm  - samesets - are these two characters in exactly the same sets?
1294f0957ccaSPeter Wemm  */
1295f0957ccaSPeter Wemm static int			/* predicate */
samesets(struct re_guts * g,int c1,int c2)1296*c271fa92SBaptiste Daroussin samesets(struct re_guts *g, int c1, int c2)
1297f0957ccaSPeter Wemm {
1298*c271fa92SBaptiste Daroussin 	uch *col;
1299*c271fa92SBaptiste Daroussin 	int i;
1300*c271fa92SBaptiste Daroussin 	int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
1301*c271fa92SBaptiste Daroussin 	unsigned uc1 = (unsigned char)c1;
1302*c271fa92SBaptiste Daroussin 	unsigned uc2 = (unsigned char)c2;
1303f0957ccaSPeter Wemm 
1304f0957ccaSPeter Wemm 	for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
1305f0957ccaSPeter Wemm 		if (col[uc1] != col[uc2])
1306f0957ccaSPeter Wemm 			return(0);
1307f0957ccaSPeter Wemm 	return(1);
1308f0957ccaSPeter Wemm }
1309f0957ccaSPeter Wemm #endif
1310f0957ccaSPeter Wemm 
1311f0957ccaSPeter Wemm /*
1312f0957ccaSPeter Wemm  - categorize - sort out character categories
1313f0957ccaSPeter Wemm  */
1314f0957ccaSPeter Wemm static void
categorize(struct parse * p,struct re_guts * g)1315*c271fa92SBaptiste Daroussin categorize(struct parse *p, struct re_guts *g)
1316f0957ccaSPeter Wemm {
1317f0957ccaSPeter Wemm #ifdef notdef
1318*c271fa92SBaptiste Daroussin 	cat_t *cats = g->categories;
1319*c271fa92SBaptiste Daroussin 	int c;
1320*c271fa92SBaptiste Daroussin 	int c2;
1321*c271fa92SBaptiste Daroussin 	cat_t cat;
1322f0957ccaSPeter Wemm 
1323f0957ccaSPeter Wemm 	/* avoid making error situations worse */
1324f0957ccaSPeter Wemm 	if (p->error != 0)
1325f0957ccaSPeter Wemm 		return;
1326f0957ccaSPeter Wemm 
1327f0957ccaSPeter Wemm 	for (c = CHAR_MIN; c <= CHAR_MAX; c++)
1328f0957ccaSPeter Wemm 		if (cats[c] == 0 && isinsets(g, c)) {
1329f0957ccaSPeter Wemm 			cat = g->ncategories++;
1330f0957ccaSPeter Wemm 			cats[c] = cat;
1331f0957ccaSPeter Wemm 			for (c2 = c+1; c2 <= CHAR_MAX; c2++)
1332f0957ccaSPeter Wemm 				if (cats[c2] == 0 && samesets(g, c, c2))
1333f0957ccaSPeter Wemm 					cats[c2] = cat;
1334f0957ccaSPeter Wemm 		}
1335f0957ccaSPeter Wemm #endif
1336f0957ccaSPeter Wemm }
1337f0957ccaSPeter Wemm 
1338f0957ccaSPeter Wemm /*
1339f0957ccaSPeter Wemm  - dupl - emit a duplicate of a bunch of sops
1340f0957ccaSPeter Wemm  */
1341f0957ccaSPeter Wemm static sopno			/* start of duplicate */
dupl(struct parse * p,sopno start,sopno finish)1342*c271fa92SBaptiste Daroussin dupl(struct parse *p,
1343*c271fa92SBaptiste Daroussin     sopno start,		/* from here */
1344*c271fa92SBaptiste Daroussin     sopno finish)		/* to this less one */
1345f0957ccaSPeter Wemm {
1346*c271fa92SBaptiste Daroussin 	sopno ret = HERE();
1347*c271fa92SBaptiste Daroussin 	sopno len = finish - start;
1348f0957ccaSPeter Wemm 
1349f0957ccaSPeter Wemm 	assert(finish >= start);
1350f0957ccaSPeter Wemm 	if (len == 0)
1351f0957ccaSPeter Wemm 		return(ret);
1352f0957ccaSPeter Wemm 	if (!enlarge(p, p->ssize + len))	/* this many unexpected additions */
1353f0957ccaSPeter Wemm 		return ret;
1354f0957ccaSPeter Wemm 	assert(p->ssize >= p->slen + len);
1355f0957ccaSPeter Wemm 	(void) memcpy((char *)(p->strip + p->slen),
1356f0957ccaSPeter Wemm 		(char *)(p->strip + start), (size_t)len*sizeof(sop));
1357f0957ccaSPeter Wemm 	(void) memcpy((char *)(p->stripdata + p->slen),
1358f0957ccaSPeter Wemm 		(char *)(p->stripdata + start), (size_t)len*sizeof(RCHAR_T));
1359f0957ccaSPeter Wemm 	p->slen += len;
1360f0957ccaSPeter Wemm 	return(ret);
1361f0957ccaSPeter Wemm }
1362f0957ccaSPeter Wemm 
1363f0957ccaSPeter Wemm /*
1364f0957ccaSPeter Wemm  - doemit - emit a strip operator
1365f0957ccaSPeter Wemm  *
1366f0957ccaSPeter Wemm  * It might seem better to implement this as a macro with a function as
1367f0957ccaSPeter Wemm  * hard-case backup, but it's just too big and messy unless there are
1368f0957ccaSPeter Wemm  * some changes to the data structures.  Maybe later.
1369f0957ccaSPeter Wemm  */
1370f0957ccaSPeter Wemm static void
doemit(struct parse * p,sop op,size_t opnd)1371*c271fa92SBaptiste Daroussin doemit(struct parse *p, sop op, size_t opnd)
1372f0957ccaSPeter Wemm {
1373f0957ccaSPeter Wemm 	/* avoid making error situations worse */
1374f0957ccaSPeter Wemm 	if (p->error != 0)
1375f0957ccaSPeter Wemm 		return;
1376f0957ccaSPeter Wemm 
1377f0957ccaSPeter Wemm 	/* deal with oversize operands ("can't happen", more or less) */
1378f0957ccaSPeter Wemm 	assert(opnd < 1);
1379f0957ccaSPeter Wemm 
1380f0957ccaSPeter Wemm 	/* deal with undersized strip */
1381f0957ccaSPeter Wemm 	if (p->slen >= p->ssize)
1382f0957ccaSPeter Wemm 		if (!enlarge(p, (p->ssize+1) / 2 * 3))	/* +50% */
1383f0957ccaSPeter Wemm 			return;
1384f0957ccaSPeter Wemm 
1385f0957ccaSPeter Wemm 	/* finally, it's all reduced to the easy case */
1386f0957ccaSPeter Wemm 	p->strip[p->slen] = op;
1387f0957ccaSPeter Wemm 	p->stripdata[p->slen] = opnd;
1388f0957ccaSPeter Wemm 	p->slen++;
1389f0957ccaSPeter Wemm }
1390f0957ccaSPeter Wemm 
1391f0957ccaSPeter Wemm /*
1392f0957ccaSPeter Wemm  - doinsert - insert a sop into the strip
1393f0957ccaSPeter Wemm  */
1394f0957ccaSPeter Wemm static void
doinsert(struct parse * p,sop op,size_t opnd,sopno pos)1395*c271fa92SBaptiste Daroussin doinsert(struct parse *p, sop op, size_t opnd, sopno pos)
1396f0957ccaSPeter Wemm {
1397*c271fa92SBaptiste Daroussin 	sopno sn;
1398*c271fa92SBaptiste Daroussin 	sop s;
1399*c271fa92SBaptiste Daroussin 	RCHAR_T d;
1400*c271fa92SBaptiste Daroussin 	int i;
1401f0957ccaSPeter Wemm 
1402f0957ccaSPeter Wemm 	/* avoid making error situations worse */
1403f0957ccaSPeter Wemm 	if (p->error != 0)
1404f0957ccaSPeter Wemm 		return;
1405f0957ccaSPeter Wemm 
1406f0957ccaSPeter Wemm 	sn = HERE();
1407f0957ccaSPeter Wemm 	EMIT(op, opnd);		/* do checks, ensure space */
1408f0957ccaSPeter Wemm 	assert(HERE() == sn+1);
1409f0957ccaSPeter Wemm 	s = p->strip[sn];
1410f0957ccaSPeter Wemm 	d = p->stripdata[sn];
1411f0957ccaSPeter Wemm 
1412f0957ccaSPeter Wemm 	/* adjust paren pointers */
1413f0957ccaSPeter Wemm 	assert(pos > 0);
1414f0957ccaSPeter Wemm 	for (i = 1; i < NPAREN; i++) {
1415f0957ccaSPeter Wemm 		if (p->pbegin[i] >= pos) {
1416f0957ccaSPeter Wemm 			p->pbegin[i]++;
1417f0957ccaSPeter Wemm 		}
1418f0957ccaSPeter Wemm 		if (p->pend[i] >= pos) {
1419f0957ccaSPeter Wemm 			p->pend[i]++;
1420f0957ccaSPeter Wemm 		}
1421f0957ccaSPeter Wemm 	}
1422f0957ccaSPeter Wemm 
1423f0957ccaSPeter Wemm 	memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
1424f0957ccaSPeter Wemm 						(HERE()-pos-1)*sizeof(sop));
1425f0957ccaSPeter Wemm 	memmove((char *)&p->stripdata[pos+1], (char *)&p->stripdata[pos],
1426f0957ccaSPeter Wemm 						(HERE()-pos-1)*sizeof(RCHAR_T));
1427f0957ccaSPeter Wemm 	p->strip[pos] = s;
1428f0957ccaSPeter Wemm 	p->stripdata[pos] = d;
1429f0957ccaSPeter Wemm }
1430f0957ccaSPeter Wemm 
1431f0957ccaSPeter Wemm /*
1432f0957ccaSPeter Wemm  - dofwd - complete a forward reference
1433f0957ccaSPeter Wemm  */
1434f0957ccaSPeter Wemm static void
dofwd(struct parse * p,sopno pos,sop value)1435*c271fa92SBaptiste Daroussin dofwd(struct parse *p, sopno pos, sop value)
1436f0957ccaSPeter Wemm {
1437f0957ccaSPeter Wemm 	/* avoid making error situations worse */
1438f0957ccaSPeter Wemm 	if (p->error != 0)
1439f0957ccaSPeter Wemm 		return;
1440f0957ccaSPeter Wemm 
1441f0957ccaSPeter Wemm 	assert(value < 1);
1442f0957ccaSPeter Wemm 	p->stripdata[pos] = value;
1443f0957ccaSPeter Wemm }
1444f0957ccaSPeter Wemm 
1445f0957ccaSPeter Wemm /*
1446f0957ccaSPeter Wemm  - enlarge - enlarge the strip
1447f0957ccaSPeter Wemm  */
1448f0957ccaSPeter Wemm static int
enlarge(struct parse * p,sopno size)1449*c271fa92SBaptiste Daroussin enlarge(struct parse *p, sopno size)
1450f0957ccaSPeter Wemm {
1451*c271fa92SBaptiste Daroussin 	sop *sp;
1452*c271fa92SBaptiste Daroussin 	RCHAR_T *dp;
1453f0957ccaSPeter Wemm 	sopno osize;
1454f0957ccaSPeter Wemm 
1455f0957ccaSPeter Wemm 	if (p->ssize >= size)
1456f0957ccaSPeter Wemm 		return 1;
1457f0957ccaSPeter Wemm 
1458f0957ccaSPeter Wemm 	osize = p->ssize;
1459f0957ccaSPeter Wemm 	p->ssize = size;
1460f0957ccaSPeter Wemm 	if (MEMSIZE(p) > MEMLIMIT)
1461f0957ccaSPeter Wemm 		goto oomem;
1462f0957ccaSPeter Wemm 	sp = realloc(p->strip, p->ssize * sizeof(sop));
1463f0957ccaSPeter Wemm 	if (sp == NULL)
1464f0957ccaSPeter Wemm 		goto oomem;
1465f0957ccaSPeter Wemm 	p->strip = sp;
1466f0957ccaSPeter Wemm 	dp = realloc(p->stripdata, p->ssize * sizeof(RCHAR_T));
1467f0957ccaSPeter Wemm 	if (dp == NULL) {
1468f0957ccaSPeter Wemm oomem:
1469f0957ccaSPeter Wemm 		p->ssize = osize;
1470f0957ccaSPeter Wemm 		SETERROR(REG_ESPACE);
1471f0957ccaSPeter Wemm 		return 0;
1472f0957ccaSPeter Wemm 	}
1473f0957ccaSPeter Wemm 	p->stripdata = dp;
1474f0957ccaSPeter Wemm 	return 1;
1475f0957ccaSPeter Wemm }
1476f0957ccaSPeter Wemm 
1477f0957ccaSPeter Wemm /*
1478f0957ccaSPeter Wemm  - stripsnug - compact the strip
1479f0957ccaSPeter Wemm  */
1480f0957ccaSPeter Wemm static void
stripsnug(struct parse * p,struct re_guts * g)1481*c271fa92SBaptiste Daroussin stripsnug(struct parse *p, struct re_guts *g)
1482f0957ccaSPeter Wemm {
1483f0957ccaSPeter Wemm 	g->nstates = p->slen;
1484f0957ccaSPeter Wemm 	g->strip = (sop *)realloc((char *)p->strip,
1485f0957ccaSPeter Wemm 	    p->slen * sizeof(sop));
1486f0957ccaSPeter Wemm 	if (g->strip == NULL) {
1487f0957ccaSPeter Wemm 		SETERROR(REG_ESPACE);
1488f0957ccaSPeter Wemm 		g->strip = p->strip;
1489f0957ccaSPeter Wemm 	}
1490f0957ccaSPeter Wemm 	g->stripdata = (RCHAR_T *)realloc((char *)p->stripdata,
1491f0957ccaSPeter Wemm 	    p->slen * sizeof(RCHAR_T));
1492f0957ccaSPeter Wemm 	if (g->stripdata == NULL) {
1493f0957ccaSPeter Wemm 		SETERROR(REG_ESPACE);
1494f0957ccaSPeter Wemm 		g->stripdata = p->stripdata;
1495f0957ccaSPeter Wemm 	}
1496f0957ccaSPeter Wemm }
1497f0957ccaSPeter Wemm 
1498f0957ccaSPeter Wemm /*
1499f0957ccaSPeter Wemm  - findmust - fill in must and mlen with longest mandatory literal string
1500f0957ccaSPeter Wemm  *
1501f0957ccaSPeter Wemm  * This algorithm could do fancy things like analyzing the operands of |
1502f0957ccaSPeter Wemm  * for common subsequences.  Someday.  This code is simple and finds most
1503f0957ccaSPeter Wemm  * of the interesting cases.
1504f0957ccaSPeter Wemm  *
1505f0957ccaSPeter Wemm  * Note that must and mlen got initialized during setup.
1506f0957ccaSPeter Wemm  */
1507f0957ccaSPeter Wemm static void
findmust(struct parse * p,struct re_guts * g)1508*c271fa92SBaptiste Daroussin findmust(struct parse *p, struct re_guts *g)
1509f0957ccaSPeter Wemm {
1510*c271fa92SBaptiste Daroussin 	sop *scans;
1511*c271fa92SBaptiste Daroussin 	RCHAR_T *scand;
1512f0957ccaSPeter Wemm 	sop *starts = 0;
1513f0957ccaSPeter Wemm 	RCHAR_T *startd = NULL;
1514*c271fa92SBaptiste Daroussin 	sop *newstarts = 0;
1515*c271fa92SBaptiste Daroussin 	RCHAR_T *newstartd = NULL;
1516*c271fa92SBaptiste Daroussin 	sopno newlen;
1517*c271fa92SBaptiste Daroussin 	sop s;
1518*c271fa92SBaptiste Daroussin 	RCHAR_T d;
1519*c271fa92SBaptiste Daroussin 	RCHAR_T *cp;
1520*c271fa92SBaptiste Daroussin 	sopno i;
1521f0957ccaSPeter Wemm 
1522f0957ccaSPeter Wemm 	/* avoid making error situations worse */
1523f0957ccaSPeter Wemm 	if (p->error != 0)
1524f0957ccaSPeter Wemm 		return;
1525f0957ccaSPeter Wemm 
1526f0957ccaSPeter Wemm 	/* find the longest OCHAR sequence in strip */
1527f0957ccaSPeter Wemm 	newlen = 0;
1528f0957ccaSPeter Wemm 	scans = g->strip + 1;
1529f0957ccaSPeter Wemm 	scand = g->stripdata + 1;
1530f0957ccaSPeter Wemm 	do {
1531f0957ccaSPeter Wemm 		s = *scans++;
1532f0957ccaSPeter Wemm 		d = *scand++;
1533f0957ccaSPeter Wemm 		switch (s) {
1534f0957ccaSPeter Wemm 		case OCHAR:		/* sequence member */
1535f0957ccaSPeter Wemm 			if (newlen == 0) {		/* new sequence */
1536f0957ccaSPeter Wemm 				newstarts = scans - 1;
1537f0957ccaSPeter Wemm 				newstartd = scand - 1;
1538f0957ccaSPeter Wemm 			}
1539f0957ccaSPeter Wemm 			newlen++;
1540f0957ccaSPeter Wemm 			break;
1541f0957ccaSPeter Wemm 		case OPLUS_:		/* things that don't break one */
1542f0957ccaSPeter Wemm 		case OLPAREN:
1543f0957ccaSPeter Wemm 		case ORPAREN:
1544f0957ccaSPeter Wemm 			break;
1545f0957ccaSPeter Wemm 		case OQUEST_:		/* things that must be skipped */
1546f0957ccaSPeter Wemm 		case OCH_:
1547f0957ccaSPeter Wemm 			scans--;
1548f0957ccaSPeter Wemm 			scand--;
1549f0957ccaSPeter Wemm 			do {
1550f0957ccaSPeter Wemm 				scans += d;
1551f0957ccaSPeter Wemm 				scand += d;
1552f0957ccaSPeter Wemm 				s = *scans;
1553f0957ccaSPeter Wemm 				d = *scand;
1554f0957ccaSPeter Wemm 				/* assert() interferes w debug printouts */
1555f0957ccaSPeter Wemm 				if (s != O_QUEST && s != O_CH && s != OOR2) {
1556f0957ccaSPeter Wemm 					g->iflags |= BAD;
1557f0957ccaSPeter Wemm 					return;
1558f0957ccaSPeter Wemm 				}
1559f0957ccaSPeter Wemm 			} while (s != O_QUEST && s != O_CH);
1560f0957ccaSPeter Wemm 			/* fallthrough */
1561f0957ccaSPeter Wemm 		default:		/* things that break a sequence */
1562f0957ccaSPeter Wemm 			if (newlen > g->mlen) {		/* ends one */
1563f0957ccaSPeter Wemm 				starts = newstarts;
1564f0957ccaSPeter Wemm 				startd = newstartd;
1565f0957ccaSPeter Wemm 				g->mlen = newlen;
1566f0957ccaSPeter Wemm 			}
1567f0957ccaSPeter Wemm 			newlen = 0;
1568f0957ccaSPeter Wemm 			break;
1569f0957ccaSPeter Wemm 		}
1570f0957ccaSPeter Wemm 	} while (s != OEND);
1571f0957ccaSPeter Wemm 
1572f0957ccaSPeter Wemm 	if (g->mlen == 0)		/* there isn't one */
1573f0957ccaSPeter Wemm 		return;
1574f0957ccaSPeter Wemm 
1575f0957ccaSPeter Wemm 	/* turn it into a character string */
1576f0957ccaSPeter Wemm 	g->must = malloc(((size_t)g->mlen + 1) * sizeof(RCHAR_T));
1577f0957ccaSPeter Wemm 	if (g->must == NULL) {		/* argh; just forget it */
1578f0957ccaSPeter Wemm 		g->mlen = 0;
1579f0957ccaSPeter Wemm 		return;
1580f0957ccaSPeter Wemm 	}
1581f0957ccaSPeter Wemm 	cp = g->must;
1582f0957ccaSPeter Wemm 	scans = starts;
1583f0957ccaSPeter Wemm 	scand = startd;
1584f0957ccaSPeter Wemm 	for (i = g->mlen; i > 0; i--) {
1585f0957ccaSPeter Wemm 		for (;;) {
1586f0957ccaSPeter Wemm 			s = *scans++;
1587f0957ccaSPeter Wemm 			d = *scand++;
1588f0957ccaSPeter Wemm 			if (s == OCHAR)
1589f0957ccaSPeter Wemm 				break;
1590f0957ccaSPeter Wemm 		}
1591f0957ccaSPeter Wemm 		assert(cp < g->must + g->mlen);
1592f0957ccaSPeter Wemm 		*cp++ = d;
1593f0957ccaSPeter Wemm 	}
1594f0957ccaSPeter Wemm 	assert(cp == g->must + g->mlen);
1595f0957ccaSPeter Wemm 	*cp++ = '\0';		/* just on general principles */
1596f0957ccaSPeter Wemm }
1597f0957ccaSPeter Wemm 
1598f0957ccaSPeter Wemm /*
1599f0957ccaSPeter Wemm  - pluscount - count + nesting
1600f0957ccaSPeter Wemm  */
1601f0957ccaSPeter Wemm static sopno			/* nesting depth */
pluscount(struct parse * p,struct re_guts * g)1602*c271fa92SBaptiste Daroussin pluscount(struct parse *p, struct re_guts *g)
1603f0957ccaSPeter Wemm {
1604*c271fa92SBaptiste Daroussin 	sop *scan;
1605*c271fa92SBaptiste Daroussin 	sop s;
1606*c271fa92SBaptiste Daroussin 	sopno plusnest = 0;
1607*c271fa92SBaptiste Daroussin 	sopno maxnest = 0;
1608f0957ccaSPeter Wemm 
1609f0957ccaSPeter Wemm 	if (p->error != 0)
1610f0957ccaSPeter Wemm 		return(0);	/* there may not be an OEND */
1611f0957ccaSPeter Wemm 
1612f0957ccaSPeter Wemm 	scan = g->strip + 1;
1613f0957ccaSPeter Wemm 	do {
1614f0957ccaSPeter Wemm 		s = *scan++;
1615f0957ccaSPeter Wemm 		switch (s) {
1616f0957ccaSPeter Wemm 		case OPLUS_:
1617f0957ccaSPeter Wemm 			plusnest++;
1618f0957ccaSPeter Wemm 			break;
1619f0957ccaSPeter Wemm 		case O_PLUS:
1620f0957ccaSPeter Wemm 			if (plusnest > maxnest)
1621f0957ccaSPeter Wemm 				maxnest = plusnest;
1622f0957ccaSPeter Wemm 			plusnest--;
1623f0957ccaSPeter Wemm 			break;
1624f0957ccaSPeter Wemm 		}
1625f0957ccaSPeter Wemm 	} while (s != OEND);
1626f0957ccaSPeter Wemm 	if (plusnest != 0)
1627f0957ccaSPeter Wemm 		g->iflags |= BAD;
1628f0957ccaSPeter Wemm 	return(maxnest);
1629f0957ccaSPeter Wemm }
1630