1f0957ccaSPeter Wemm /* $NetBSD: regcomp.c,v 1.7 2011/11/19 17:45:11 tnozaki Exp $ */
2f0957ccaSPeter Wemm
3f0957ccaSPeter Wemm /*-
4f0957ccaSPeter Wemm * Copyright (c) 1992, 1993, 1994 Henry Spencer.
5f0957ccaSPeter Wemm * Copyright (c) 1992, 1993, 1994
6f0957ccaSPeter Wemm * The Regents of the University of California. All rights reserved.
7f0957ccaSPeter Wemm *
8f0957ccaSPeter Wemm * This code is derived from software contributed to Berkeley by
9f0957ccaSPeter Wemm * Henry Spencer of the University of Toronto.
10f0957ccaSPeter Wemm *
11f0957ccaSPeter Wemm * Redistribution and use in source and binary forms, with or without
12f0957ccaSPeter Wemm * modification, are permitted provided that the following conditions
13f0957ccaSPeter Wemm * are met:
14f0957ccaSPeter Wemm * 1. Redistributions of source code must retain the above copyright
15f0957ccaSPeter Wemm * notice, this list of conditions and the following disclaimer.
16f0957ccaSPeter Wemm * 2. Redistributions in binary form must reproduce the above copyright
17f0957ccaSPeter Wemm * notice, this list of conditions and the following disclaimer in the
18f0957ccaSPeter Wemm * documentation and/or other materials provided with the distribution.
19*c271fa92SBaptiste Daroussin * 3. Neither the name of the University nor the names of its contributors
20f0957ccaSPeter Wemm * may be used to endorse or promote products derived from this software
21f0957ccaSPeter Wemm * without specific prior written permission.
22f0957ccaSPeter Wemm *
23f0957ccaSPeter Wemm * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24f0957ccaSPeter Wemm * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25f0957ccaSPeter Wemm * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26f0957ccaSPeter Wemm * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27f0957ccaSPeter Wemm * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28f0957ccaSPeter Wemm * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29f0957ccaSPeter Wemm * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30f0957ccaSPeter Wemm * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31f0957ccaSPeter Wemm * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32f0957ccaSPeter Wemm * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33f0957ccaSPeter Wemm * SUCH DAMAGE.
34f0957ccaSPeter Wemm *
35f0957ccaSPeter Wemm * @(#)regcomp.c 8.4 (Berkeley) 3/19/94
36f0957ccaSPeter Wemm */
37f0957ccaSPeter Wemm
38f0957ccaSPeter Wemm #if defined(LIBC_SCCS) && !defined(lint)
39f0957ccaSPeter Wemm static char sccsid[] = "@(#)regcomp.c 8.4 (Berkeley) 3/19/94";
40f0957ccaSPeter Wemm #endif /* LIBC_SCCS and not lint */
41f0957ccaSPeter Wemm
42f0957ccaSPeter Wemm #include <sys/types.h>
43f0957ccaSPeter Wemm #include <stdio.h>
44f0957ccaSPeter Wemm #include <string.h>
45f0957ccaSPeter Wemm #include <ctype.h>
46f0957ccaSPeter Wemm #include <limits.h>
47f0957ccaSPeter Wemm #include <stdlib.h>
48f0957ccaSPeter Wemm #include <regex.h>
49f0957ccaSPeter Wemm
50f0957ccaSPeter Wemm #include "utils.h"
51f0957ccaSPeter Wemm #include "regex2.h"
52f0957ccaSPeter Wemm
53f0957ccaSPeter Wemm #include "cclass.h"
54f0957ccaSPeter Wemm #include "cname.h"
55f0957ccaSPeter Wemm
56f0957ccaSPeter Wemm /*
57f0957ccaSPeter Wemm * parse structure, passed up and down to avoid global variables and
58f0957ccaSPeter Wemm * other clumsinesses
59f0957ccaSPeter Wemm */
60f0957ccaSPeter Wemm struct parse {
61f0957ccaSPeter Wemm RCHAR_T *next; /* next character in RE */
62f0957ccaSPeter Wemm RCHAR_T *end; /* end of string (-> NUL normally) */
63f0957ccaSPeter Wemm int error; /* has an error been seen? */
64f0957ccaSPeter Wemm sop *strip; /* malloced strip */
65f0957ccaSPeter Wemm RCHAR_T *stripdata; /* malloced stripdata */
66f0957ccaSPeter Wemm sopno ssize; /* malloced strip size (allocated) */
67f0957ccaSPeter Wemm sopno slen; /* malloced strip length (used) */
68f0957ccaSPeter Wemm int ncsalloc; /* number of csets allocated */
69f0957ccaSPeter Wemm struct re_guts *g;
70f0957ccaSPeter Wemm # define NPAREN 10 /* we need to remember () 1-9 for back refs */
71f0957ccaSPeter Wemm sopno pbegin[NPAREN]; /* -> ( ([0] unused) */
72f0957ccaSPeter Wemm sopno pend[NPAREN]; /* -> ) ([0] unused) */
73f0957ccaSPeter Wemm };
74f0957ccaSPeter Wemm
75f0957ccaSPeter Wemm /* ========= begin header generated by ./mkh ========= */
76f0957ccaSPeter Wemm #ifdef __cplusplus
77f0957ccaSPeter Wemm extern "C" {
78f0957ccaSPeter Wemm #endif
79f0957ccaSPeter Wemm
80f0957ccaSPeter Wemm /* === regcomp.c === */
81*c271fa92SBaptiste Daroussin static void p_ere(struct parse *p, int stop, size_t reclimit);
82*c271fa92SBaptiste Daroussin static void p_ere_exp(struct parse *p, size_t reclimit);
83*c271fa92SBaptiste Daroussin static void p_str(struct parse *p);
84*c271fa92SBaptiste Daroussin static void p_bre(struct parse *p, int end1, int end2, size_t reclimit);
85*c271fa92SBaptiste Daroussin static int p_simp_re(struct parse *p, int starordinary, size_t reclimit);
86*c271fa92SBaptiste Daroussin static int p_count(struct parse *p);
87*c271fa92SBaptiste Daroussin static void p_bracket(struct parse *p);
88*c271fa92SBaptiste Daroussin static void p_b_term(struct parse *p, cset *cs);
89*c271fa92SBaptiste Daroussin static void p_b_cclass(struct parse *p, cset *cs);
90*c271fa92SBaptiste Daroussin static void p_b_eclass(struct parse *p, cset *cs);
91*c271fa92SBaptiste Daroussin static char p_b_symbol(struct parse *p);
92*c271fa92SBaptiste Daroussin static char p_b_coll_elem(struct parse *p, int endc);
93*c271fa92SBaptiste Daroussin static char othercase(int ch);
94*c271fa92SBaptiste Daroussin static void bothcases(struct parse *p, int ch);
95*c271fa92SBaptiste Daroussin static void ordinary(struct parse *p, int ch);
96*c271fa92SBaptiste Daroussin static void nonnewline(struct parse *p);
97*c271fa92SBaptiste Daroussin static void repeat(struct parse *p, sopno start, int from, int to, size_t reclimit);
98*c271fa92SBaptiste Daroussin static int seterr(struct parse *p, int e);
99*c271fa92SBaptiste Daroussin static cset *allocset(struct parse *p);
100*c271fa92SBaptiste Daroussin static void freeset(struct parse *p, cset *cs);
101*c271fa92SBaptiste Daroussin static int freezeset(struct parse *p, cset *cs);
102*c271fa92SBaptiste Daroussin static int firstch(struct parse *p, cset *cs);
103*c271fa92SBaptiste Daroussin static int nch(struct parse *p, cset *cs);
104*c271fa92SBaptiste Daroussin static void mcadd(struct parse *p, cset *cs, const char *cp);
105f0957ccaSPeter Wemm #ifdef notdef
106*c271fa92SBaptiste Daroussin static void mcsub(cset *cs, char *cp);
107*c271fa92SBaptiste Daroussin static int mcin(cset *cs, char *cp);
108*c271fa92SBaptiste Daroussin static char *mcfind(cset *cs, char *cp);
109f0957ccaSPeter Wemm #endif
110*c271fa92SBaptiste Daroussin static void mcinvert(struct parse *p, cset *cs);
111*c271fa92SBaptiste Daroussin static void mccase(struct parse *p, cset *cs);
112f0957ccaSPeter Wemm #ifdef notdef
113*c271fa92SBaptiste Daroussin static int isinsets(struct re_guts *g, int c);
114*c271fa92SBaptiste Daroussin static int samesets(struct re_guts *g, int c1, int c2);
115f0957ccaSPeter Wemm #endif
116*c271fa92SBaptiste Daroussin static void categorize(struct parse *p, struct re_guts *g);
117*c271fa92SBaptiste Daroussin static sopno dupl(struct parse *p, sopno start, sopno finish);
118*c271fa92SBaptiste Daroussin static void doemit(struct parse *p, sop op, size_t opnd);
119*c271fa92SBaptiste Daroussin static void doinsert(struct parse *p, sop op, size_t opnd, sopno pos);
120*c271fa92SBaptiste Daroussin static void dofwd(struct parse *p, sopno pos, sop value);
121*c271fa92SBaptiste Daroussin static int enlarge(struct parse *p, sopno size);
122*c271fa92SBaptiste Daroussin static void stripsnug(struct parse *p, struct re_guts *g);
123*c271fa92SBaptiste Daroussin static void findmust(struct parse *p, struct re_guts *g);
124*c271fa92SBaptiste Daroussin static sopno pluscount(struct parse *p, struct re_guts *g);
125f0957ccaSPeter Wemm
126f0957ccaSPeter Wemm #ifdef __cplusplus
127f0957ccaSPeter Wemm }
128f0957ccaSPeter Wemm #endif
129f0957ccaSPeter Wemm /* ========= end header generated by ./mkh ========= */
130f0957ccaSPeter Wemm
131f0957ccaSPeter Wemm static RCHAR_T nuls[10]; /* place to point scanner in event of error */
132f0957ccaSPeter Wemm
133f0957ccaSPeter Wemm /*
134f0957ccaSPeter Wemm * macros for use with parse structure
135f0957ccaSPeter Wemm * BEWARE: these know that the parse structure is named `p' !!!
136f0957ccaSPeter Wemm */
137f0957ccaSPeter Wemm #define PEEK() (*p->next)
138f0957ccaSPeter Wemm #define PEEK2() (*(p->next+1))
139f0957ccaSPeter Wemm #define MORE() (p->next < p->end)
140f0957ccaSPeter Wemm #define MORE2() (p->next+1 < p->end)
141f0957ccaSPeter Wemm #define SEE(c) (MORE() && PEEK() == (c))
142f0957ccaSPeter Wemm #define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
143f0957ccaSPeter Wemm #define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0)
144f0957ccaSPeter Wemm #define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
145f0957ccaSPeter Wemm #define NEXT() (p->next++)
146f0957ccaSPeter Wemm #define NEXT2() (p->next += 2)
147f0957ccaSPeter Wemm #define NEXTn(n) (p->next += (n))
148f0957ccaSPeter Wemm #define GETNEXT() (*p->next++)
149f0957ccaSPeter Wemm #define SETERROR(e) seterr(p, (e))
150f0957ccaSPeter Wemm #define REQUIRE(co, e) ((co) || SETERROR(e))
151f0957ccaSPeter Wemm #define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e))
152f0957ccaSPeter Wemm #define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e))
153f0957ccaSPeter Wemm #define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e))
154f0957ccaSPeter Wemm #define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd))
155f0957ccaSPeter Wemm #define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
156f0957ccaSPeter Wemm #define AHEAD(pos) dofwd(p, pos, HERE()-(pos))
157f0957ccaSPeter Wemm #define ASTERN(sop, pos) EMIT(sop, HERE()-pos)
158f0957ccaSPeter Wemm #define HERE() (p->slen)
159f0957ccaSPeter Wemm #define THERE() (p->slen - 1)
160f0957ccaSPeter Wemm #define THERETHERE() (p->slen - 2)
161f0957ccaSPeter Wemm #define DROP(n) (p->slen -= (n))
162f0957ccaSPeter Wemm
163f0957ccaSPeter Wemm #ifndef NDEBUG
164f0957ccaSPeter Wemm static int never = 0; /* for use in asserts; shuts lint up */
165f0957ccaSPeter Wemm #else
166f0957ccaSPeter Wemm #define never 0 /* some <assert.h>s have bugs too */
167f0957ccaSPeter Wemm #endif
168f0957ccaSPeter Wemm
169f0957ccaSPeter Wemm #define MEMLIMIT 0x8000000
170f0957ccaSPeter Wemm #define MEMSIZE(p) \
171f0957ccaSPeter Wemm ((p)->ncsalloc / CHAR_BIT * (p)->g->csetsize + \
172f0957ccaSPeter Wemm (p)->ncsalloc * sizeof(cset) + \
173f0957ccaSPeter Wemm (p)->ssize * sizeof(sop))
174f0957ccaSPeter Wemm #define RECLIMIT 256
175f0957ccaSPeter Wemm
176f0957ccaSPeter Wemm /*
177f0957ccaSPeter Wemm - regcomp - interface for parser and compilation
178f0957ccaSPeter Wemm */
179f0957ccaSPeter Wemm int /* 0 success, otherwise REG_something */
regcomp(regex_t * preg,const RCHAR_T * pattern,int cflags)180f0957ccaSPeter Wemm regcomp(regex_t *preg, const RCHAR_T *pattern, int cflags)
181f0957ccaSPeter Wemm {
182f0957ccaSPeter Wemm struct parse pa;
183*c271fa92SBaptiste Daroussin struct re_guts *g;
184*c271fa92SBaptiste Daroussin struct parse *p = &pa;
185*c271fa92SBaptiste Daroussin int i;
186*c271fa92SBaptiste Daroussin size_t len;
187f0957ccaSPeter Wemm #ifdef REDEBUG
188f0957ccaSPeter Wemm # define GOODFLAGS(f) (f)
189f0957ccaSPeter Wemm #else
190f0957ccaSPeter Wemm # define GOODFLAGS(f) ((f)&~REG_DUMP)
191f0957ccaSPeter Wemm #endif
192f0957ccaSPeter Wemm
193f0957ccaSPeter Wemm cflags = GOODFLAGS(cflags);
194f0957ccaSPeter Wemm if ((cflags®_EXTENDED) && (cflags®_NOSPEC))
195f0957ccaSPeter Wemm return(REG_INVARG);
196f0957ccaSPeter Wemm
197f0957ccaSPeter Wemm if (cflags®_PEND) {
198f0957ccaSPeter Wemm if (preg->re_endp < pattern)
199f0957ccaSPeter Wemm return(REG_INVARG);
200f0957ccaSPeter Wemm len = preg->re_endp - pattern;
201f0957ccaSPeter Wemm } else
202f0957ccaSPeter Wemm len = STRLEN(pattern);
203f0957ccaSPeter Wemm
204f0957ccaSPeter Wemm /* do the mallocs early so failure handling is easy */
205f0957ccaSPeter Wemm g = (struct re_guts *)malloc(sizeof(struct re_guts) +
206f0957ccaSPeter Wemm (NC-1)*sizeof(cat_t));
207f0957ccaSPeter Wemm if (g == NULL)
208f0957ccaSPeter Wemm return(REG_ESPACE);
209f0957ccaSPeter Wemm p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */
210f0957ccaSPeter Wemm p->strip = (sop *)malloc(p->ssize * sizeof(sop));
211f0957ccaSPeter Wemm if (p->strip == NULL) {
212f0957ccaSPeter Wemm free((char *)g);
213f0957ccaSPeter Wemm return(REG_ESPACE);
214f0957ccaSPeter Wemm }
215f0957ccaSPeter Wemm p->stripdata = (RCHAR_T *)malloc(p->ssize * sizeof(RCHAR_T));
216f0957ccaSPeter Wemm if (p->stripdata == NULL) {
217f0957ccaSPeter Wemm free((char *)p->strip);
218f0957ccaSPeter Wemm free((char *)g);
219f0957ccaSPeter Wemm return(REG_ESPACE);
220f0957ccaSPeter Wemm }
221f0957ccaSPeter Wemm p->slen = 0;
222f0957ccaSPeter Wemm
223f0957ccaSPeter Wemm /* set things up */
224f0957ccaSPeter Wemm p->g = g;
225f0957ccaSPeter Wemm p->next = (RCHAR_T *)pattern; /* convenience; we do not modify it */
226f0957ccaSPeter Wemm p->end = p->next + len;
227f0957ccaSPeter Wemm p->error = 0;
228f0957ccaSPeter Wemm p->ncsalloc = 0;
229f0957ccaSPeter Wemm for (i = 0; i < NPAREN; i++) {
230f0957ccaSPeter Wemm p->pbegin[i] = 0;
231f0957ccaSPeter Wemm p->pend[i] = 0;
232f0957ccaSPeter Wemm }
233f0957ccaSPeter Wemm g->csetsize = NC;
234f0957ccaSPeter Wemm g->sets = NULL;
235f0957ccaSPeter Wemm g->setbits = NULL;
236f0957ccaSPeter Wemm g->ncsets = 0;
237f0957ccaSPeter Wemm g->cflags = cflags;
238f0957ccaSPeter Wemm g->iflags = 0;
239f0957ccaSPeter Wemm g->nbol = 0;
240f0957ccaSPeter Wemm g->neol = 0;
241f0957ccaSPeter Wemm g->must = NULL;
242f0957ccaSPeter Wemm g->mlen = 0;
243f0957ccaSPeter Wemm g->nsub = 0;
244f0957ccaSPeter Wemm #if 0
245f0957ccaSPeter Wemm g->ncategories = 1; /* category 0 is "everything else" */
246f0957ccaSPeter Wemm g->categories = &g->catspace[-(CHAR_MIN)];
247*c271fa92SBaptiste Daroussin memset((char *)g->catspace, 0, NC*sizeof(cat_t));
248f0957ccaSPeter Wemm #endif
249f0957ccaSPeter Wemm g->backrefs = 0;
250f0957ccaSPeter Wemm
251f0957ccaSPeter Wemm /* do it */
252f0957ccaSPeter Wemm EMIT(OEND, 0);
253f0957ccaSPeter Wemm g->firststate = THERE();
254f0957ccaSPeter Wemm if (cflags®_EXTENDED)
255f0957ccaSPeter Wemm p_ere(p, OUT, 0);
256f0957ccaSPeter Wemm else if (cflags®_NOSPEC)
257f0957ccaSPeter Wemm p_str(p);
258f0957ccaSPeter Wemm else
259f0957ccaSPeter Wemm p_bre(p, OUT, OUT, 0);
260f0957ccaSPeter Wemm EMIT(OEND, 0);
261f0957ccaSPeter Wemm g->laststate = THERE();
262f0957ccaSPeter Wemm
263f0957ccaSPeter Wemm /* tidy up loose ends and fill things in */
264f0957ccaSPeter Wemm categorize(p, g);
265f0957ccaSPeter Wemm stripsnug(p, g);
266f0957ccaSPeter Wemm findmust(p, g);
267f0957ccaSPeter Wemm g->nplus = pluscount(p, g);
268f0957ccaSPeter Wemm g->magic = MAGIC2;
269f0957ccaSPeter Wemm preg->re_nsub = g->nsub;
270f0957ccaSPeter Wemm preg->re_g = g;
271f0957ccaSPeter Wemm preg->re_magic = MAGIC1;
272f0957ccaSPeter Wemm #ifndef REDEBUG
273f0957ccaSPeter Wemm /* not debugging, so can't rely on the assert() in regexec() */
274f0957ccaSPeter Wemm if (g->iflags&BAD)
275f0957ccaSPeter Wemm SETERROR(REG_ASSERT);
276f0957ccaSPeter Wemm #endif
277f0957ccaSPeter Wemm
278f0957ccaSPeter Wemm /* win or lose, we're done */
279f0957ccaSPeter Wemm if (p->error != 0) /* lose */
280f0957ccaSPeter Wemm regfree(preg);
281f0957ccaSPeter Wemm return(p->error);
282f0957ccaSPeter Wemm }
283f0957ccaSPeter Wemm
284f0957ccaSPeter Wemm /*
285f0957ccaSPeter Wemm - p_ere - ERE parser top level, concatenation and alternation
286f0957ccaSPeter Wemm */
287f0957ccaSPeter Wemm static void
p_ere(struct parse * p,int stop,size_t reclimit)288*c271fa92SBaptiste Daroussin p_ere(struct parse *p, int stop, size_t reclimit)
289f0957ccaSPeter Wemm /* character this ERE should end at */
290f0957ccaSPeter Wemm {
291*c271fa92SBaptiste Daroussin char c;
292*c271fa92SBaptiste Daroussin sopno prevback = 0;
293*c271fa92SBaptiste Daroussin sopno prevfwd = 0;
294*c271fa92SBaptiste Daroussin sopno conc;
295*c271fa92SBaptiste Daroussin int first = 1; /* is this the first alternative? */
296f0957ccaSPeter Wemm
297f0957ccaSPeter Wemm if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) {
298f0957ccaSPeter Wemm p->error = REG_ESPACE;
299f0957ccaSPeter Wemm return;
300f0957ccaSPeter Wemm }
301f0957ccaSPeter Wemm
302f0957ccaSPeter Wemm for (;;) {
303f0957ccaSPeter Wemm /* do a bunch of concatenated expressions */
304f0957ccaSPeter Wemm conc = HERE();
305f0957ccaSPeter Wemm while (MORE() && (c = PEEK()) != '|' && c != stop)
306f0957ccaSPeter Wemm p_ere_exp(p, reclimit);
307f0957ccaSPeter Wemm (void)REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */
308f0957ccaSPeter Wemm
309f0957ccaSPeter Wemm if (!EAT('|'))
310f0957ccaSPeter Wemm break; /* NOTE BREAK OUT */
311f0957ccaSPeter Wemm
312f0957ccaSPeter Wemm if (first) {
313f0957ccaSPeter Wemm INSERT(OCH_, conc); /* offset is wrong */
314f0957ccaSPeter Wemm prevfwd = conc;
315f0957ccaSPeter Wemm prevback = conc;
316f0957ccaSPeter Wemm first = 0;
317f0957ccaSPeter Wemm }
318f0957ccaSPeter Wemm ASTERN(OOR1, prevback);
319f0957ccaSPeter Wemm prevback = THERE();
320f0957ccaSPeter Wemm AHEAD(prevfwd); /* fix previous offset */
321f0957ccaSPeter Wemm prevfwd = HERE();
322f0957ccaSPeter Wemm EMIT(OOR2, 0); /* offset is very wrong */
323f0957ccaSPeter Wemm }
324f0957ccaSPeter Wemm
325f0957ccaSPeter Wemm if (!first) { /* tail-end fixups */
326f0957ccaSPeter Wemm AHEAD(prevfwd);
327f0957ccaSPeter Wemm ASTERN(O_CH, prevback);
328f0957ccaSPeter Wemm }
329f0957ccaSPeter Wemm
330f0957ccaSPeter Wemm assert(!MORE() || SEE(stop));
331f0957ccaSPeter Wemm }
332f0957ccaSPeter Wemm
333f0957ccaSPeter Wemm /*
334f0957ccaSPeter Wemm - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
335f0957ccaSPeter Wemm */
336f0957ccaSPeter Wemm static void
p_ere_exp(struct parse * p,size_t reclimit)337*c271fa92SBaptiste Daroussin p_ere_exp(struct parse *p, size_t reclimit)
338f0957ccaSPeter Wemm {
339*c271fa92SBaptiste Daroussin char c;
340*c271fa92SBaptiste Daroussin sopno pos;
341*c271fa92SBaptiste Daroussin int count;
342*c271fa92SBaptiste Daroussin int count2;
343*c271fa92SBaptiste Daroussin sopno subno;
344f0957ccaSPeter Wemm int wascaret = 0;
345f0957ccaSPeter Wemm
346f0957ccaSPeter Wemm assert(MORE()); /* caller should have ensured this */
347f0957ccaSPeter Wemm c = GETNEXT();
348f0957ccaSPeter Wemm
349f0957ccaSPeter Wemm pos = HERE();
350f0957ccaSPeter Wemm switch (c) {
351f0957ccaSPeter Wemm case '(':
352f0957ccaSPeter Wemm (void)REQUIRE(MORE(), REG_EPAREN);
353f0957ccaSPeter Wemm p->g->nsub++;
354f0957ccaSPeter Wemm subno = p->g->nsub;
355f0957ccaSPeter Wemm if (subno < NPAREN)
356f0957ccaSPeter Wemm p->pbegin[subno] = HERE();
357f0957ccaSPeter Wemm EMIT(OLPAREN, subno);
358f0957ccaSPeter Wemm if (!SEE(')'))
359f0957ccaSPeter Wemm p_ere(p, ')', reclimit);
360f0957ccaSPeter Wemm if (subno < NPAREN) {
361f0957ccaSPeter Wemm p->pend[subno] = HERE();
362f0957ccaSPeter Wemm assert(p->pend[subno] != 0);
363f0957ccaSPeter Wemm }
364f0957ccaSPeter Wemm EMIT(ORPAREN, subno);
365f0957ccaSPeter Wemm (void)MUSTEAT(')', REG_EPAREN);
366f0957ccaSPeter Wemm break;
367f0957ccaSPeter Wemm #ifndef POSIX_MISTAKE
368f0957ccaSPeter Wemm case ')': /* happens only if no current unmatched ( */
369f0957ccaSPeter Wemm /*
370f0957ccaSPeter Wemm * You may ask, why the ifndef? Because I didn't notice
371f0957ccaSPeter Wemm * this until slightly too late for 1003.2, and none of the
372f0957ccaSPeter Wemm * other 1003.2 regular-expression reviewers noticed it at
373f0957ccaSPeter Wemm * all. So an unmatched ) is legal POSIX, at least until
374f0957ccaSPeter Wemm * we can get it fixed.
375f0957ccaSPeter Wemm */
376f0957ccaSPeter Wemm SETERROR(REG_EPAREN);
377f0957ccaSPeter Wemm break;
378f0957ccaSPeter Wemm #endif
379f0957ccaSPeter Wemm case '^':
380f0957ccaSPeter Wemm EMIT(OBOL, 0);
381f0957ccaSPeter Wemm p->g->iflags |= USEBOL;
382f0957ccaSPeter Wemm p->g->nbol++;
383f0957ccaSPeter Wemm wascaret = 1;
384f0957ccaSPeter Wemm break;
385f0957ccaSPeter Wemm case '$':
386f0957ccaSPeter Wemm EMIT(OEOL, 0);
387f0957ccaSPeter Wemm p->g->iflags |= USEEOL;
388f0957ccaSPeter Wemm p->g->neol++;
389f0957ccaSPeter Wemm break;
390f0957ccaSPeter Wemm case '|':
391f0957ccaSPeter Wemm SETERROR(REG_EMPTY);
392f0957ccaSPeter Wemm break;
393f0957ccaSPeter Wemm case '*':
394f0957ccaSPeter Wemm case '+':
395f0957ccaSPeter Wemm case '?':
396f0957ccaSPeter Wemm SETERROR(REG_BADRPT);
397f0957ccaSPeter Wemm break;
398f0957ccaSPeter Wemm case '.':
399f0957ccaSPeter Wemm if (p->g->cflags®_NEWLINE)
400f0957ccaSPeter Wemm nonnewline(p);
401f0957ccaSPeter Wemm else
402f0957ccaSPeter Wemm EMIT(OANY, 0);
403f0957ccaSPeter Wemm break;
404f0957ccaSPeter Wemm case '[':
405f0957ccaSPeter Wemm p_bracket(p);
406f0957ccaSPeter Wemm break;
407f0957ccaSPeter Wemm case '\\':
408f0957ccaSPeter Wemm (void)REQUIRE(MORE(), REG_EESCAPE);
409f0957ccaSPeter Wemm c = GETNEXT();
410f0957ccaSPeter Wemm ordinary(p, c);
411f0957ccaSPeter Wemm break;
412f0957ccaSPeter Wemm case '{': /* okay as ordinary except if digit follows */
413f0957ccaSPeter Wemm (void)REQUIRE(!MORE() || !ISDIGIT((UCHAR_T)PEEK()), REG_BADRPT);
414f0957ccaSPeter Wemm /* FALLTHROUGH */
415f0957ccaSPeter Wemm default:
416f0957ccaSPeter Wemm ordinary(p, c);
417f0957ccaSPeter Wemm break;
418f0957ccaSPeter Wemm }
419f0957ccaSPeter Wemm
420f0957ccaSPeter Wemm if (!MORE())
421f0957ccaSPeter Wemm return;
422f0957ccaSPeter Wemm c = PEEK();
423f0957ccaSPeter Wemm /* we call { a repetition if followed by a digit */
424f0957ccaSPeter Wemm if (!( c == '*' || c == '+' || c == '?' ||
425f0957ccaSPeter Wemm (c == '{' && MORE2() && ISDIGIT((UCHAR_T)PEEK2())) ))
426f0957ccaSPeter Wemm return; /* no repetition, we're done */
427f0957ccaSPeter Wemm NEXT();
428f0957ccaSPeter Wemm
429f0957ccaSPeter Wemm (void)REQUIRE(!wascaret, REG_BADRPT);
430f0957ccaSPeter Wemm switch (c) {
431f0957ccaSPeter Wemm case '*': /* implemented as +? */
432f0957ccaSPeter Wemm /* this case does not require the (y|) trick, noKLUDGE */
433f0957ccaSPeter Wemm INSERT(OPLUS_, pos);
434f0957ccaSPeter Wemm ASTERN(O_PLUS, pos);
435f0957ccaSPeter Wemm INSERT(OQUEST_, pos);
436f0957ccaSPeter Wemm ASTERN(O_QUEST, pos);
437f0957ccaSPeter Wemm break;
438f0957ccaSPeter Wemm case '+':
439f0957ccaSPeter Wemm INSERT(OPLUS_, pos);
440f0957ccaSPeter Wemm ASTERN(O_PLUS, pos);
441f0957ccaSPeter Wemm break;
442f0957ccaSPeter Wemm case '?':
443f0957ccaSPeter Wemm /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
444f0957ccaSPeter Wemm INSERT(OCH_, pos); /* offset slightly wrong */
445f0957ccaSPeter Wemm ASTERN(OOR1, pos); /* this one's right */
446f0957ccaSPeter Wemm AHEAD(pos); /* fix the OCH_ */
447f0957ccaSPeter Wemm EMIT(OOR2, 0); /* offset very wrong... */
448f0957ccaSPeter Wemm AHEAD(THERE()); /* ...so fix it */
449f0957ccaSPeter Wemm ASTERN(O_CH, THERETHERE());
450f0957ccaSPeter Wemm break;
451f0957ccaSPeter Wemm case '{':
452f0957ccaSPeter Wemm count = p_count(p);
453f0957ccaSPeter Wemm if (EAT(',')) {
454f0957ccaSPeter Wemm if (ISDIGIT((UCHAR_T)PEEK())) {
455f0957ccaSPeter Wemm count2 = p_count(p);
456f0957ccaSPeter Wemm (void)REQUIRE(count <= count2, REG_BADBR);
457f0957ccaSPeter Wemm } else /* single number with comma */
458f0957ccaSPeter Wemm count2 = INFINITY;
459f0957ccaSPeter Wemm } else /* just a single number */
460f0957ccaSPeter Wemm count2 = count;
461f0957ccaSPeter Wemm repeat(p, pos, count, count2, 0);
462f0957ccaSPeter Wemm if (!EAT('}')) { /* error heuristics */
463f0957ccaSPeter Wemm while (MORE() && PEEK() != '}')
464f0957ccaSPeter Wemm NEXT();
465f0957ccaSPeter Wemm (void)REQUIRE(MORE(), REG_EBRACE);
466f0957ccaSPeter Wemm SETERROR(REG_BADBR);
467f0957ccaSPeter Wemm }
468f0957ccaSPeter Wemm break;
469f0957ccaSPeter Wemm }
470f0957ccaSPeter Wemm
471f0957ccaSPeter Wemm if (!MORE())
472f0957ccaSPeter Wemm return;
473f0957ccaSPeter Wemm c = PEEK();
474f0957ccaSPeter Wemm if (!( c == '*' || c == '+' || c == '?' ||
475f0957ccaSPeter Wemm (c == '{' && MORE2() && ISDIGIT((UCHAR_T)PEEK2())) ) )
476f0957ccaSPeter Wemm return;
477f0957ccaSPeter Wemm SETERROR(REG_BADRPT);
478f0957ccaSPeter Wemm }
479f0957ccaSPeter Wemm
480f0957ccaSPeter Wemm /*
481f0957ccaSPeter Wemm - p_str - string (no metacharacters) "parser"
482f0957ccaSPeter Wemm */
483f0957ccaSPeter Wemm static void
p_str(struct parse * p)484*c271fa92SBaptiste Daroussin p_str(struct parse *p)
485f0957ccaSPeter Wemm {
486f0957ccaSPeter Wemm (void)REQUIRE(MORE(), REG_EMPTY);
487f0957ccaSPeter Wemm while (MORE())
488f0957ccaSPeter Wemm ordinary(p, GETNEXT());
489f0957ccaSPeter Wemm }
490f0957ccaSPeter Wemm
491f0957ccaSPeter Wemm /*
492f0957ccaSPeter Wemm - p_bre - BRE parser top level, anchoring and concatenation
493f0957ccaSPeter Wemm * Giving end1 as OUT essentially eliminates the end1/end2 check.
494f0957ccaSPeter Wemm *
495f0957ccaSPeter Wemm * This implementation is a bit of a kludge, in that a trailing $ is first
496f0957ccaSPeter Wemm * taken as an ordinary character and then revised to be an anchor. The
497f0957ccaSPeter Wemm * only undesirable side effect is that '$' gets included as a character
498f0957ccaSPeter Wemm * category in such cases. This is fairly harmless; not worth fixing.
499f0957ccaSPeter Wemm * The amount of lookahead needed to avoid this kludge is excessive.
500f0957ccaSPeter Wemm */
501f0957ccaSPeter Wemm static void
p_bre(struct parse * p,int end1,int end2,size_t reclimit)502*c271fa92SBaptiste Daroussin p_bre(struct parse *p,
503*c271fa92SBaptiste Daroussin int end1, /* first terminating character */
504*c271fa92SBaptiste Daroussin int end2, /* second terminating character */
505*c271fa92SBaptiste Daroussin size_t reclimit)
506f0957ccaSPeter Wemm {
507*c271fa92SBaptiste Daroussin sopno start;
508*c271fa92SBaptiste Daroussin int first = 1; /* first subexpression? */
509*c271fa92SBaptiste Daroussin int wasdollar = 0;
510f0957ccaSPeter Wemm
511f0957ccaSPeter Wemm if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) {
512f0957ccaSPeter Wemm p->error = REG_ESPACE;
513f0957ccaSPeter Wemm return;
514f0957ccaSPeter Wemm }
515f0957ccaSPeter Wemm
516f0957ccaSPeter Wemm start = HERE();
517f0957ccaSPeter Wemm
518f0957ccaSPeter Wemm if (EAT('^')) {
519f0957ccaSPeter Wemm EMIT(OBOL, 0);
520f0957ccaSPeter Wemm p->g->iflags |= USEBOL;
521f0957ccaSPeter Wemm p->g->nbol++;
522f0957ccaSPeter Wemm }
523f0957ccaSPeter Wemm while (MORE() && !SEETWO(end1, end2)) {
524f0957ccaSPeter Wemm wasdollar = p_simp_re(p, first, reclimit);
525f0957ccaSPeter Wemm first = 0;
526f0957ccaSPeter Wemm }
527f0957ccaSPeter Wemm if (wasdollar) { /* oops, that was a trailing anchor */
528f0957ccaSPeter Wemm DROP(1);
529f0957ccaSPeter Wemm EMIT(OEOL, 0);
530f0957ccaSPeter Wemm p->g->iflags |= USEEOL;
531f0957ccaSPeter Wemm p->g->neol++;
532f0957ccaSPeter Wemm }
533f0957ccaSPeter Wemm
534f0957ccaSPeter Wemm (void)REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */
535f0957ccaSPeter Wemm }
536f0957ccaSPeter Wemm
537f0957ccaSPeter Wemm /*
538f0957ccaSPeter Wemm - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
539f0957ccaSPeter Wemm */
540f0957ccaSPeter Wemm static int /* was the simple RE an unbackslashed $? */
p_simp_re(struct parse * p,int starordinary,size_t reclimit)541*c271fa92SBaptiste Daroussin p_simp_re(struct parse *p,
542*c271fa92SBaptiste Daroussin int starordinary, /* is a leading * an ordinary character? */
543*c271fa92SBaptiste Daroussin size_t reclimit)
544f0957ccaSPeter Wemm {
545*c271fa92SBaptiste Daroussin int c;
546*c271fa92SBaptiste Daroussin int count;
547*c271fa92SBaptiste Daroussin int count2;
548*c271fa92SBaptiste Daroussin sopno pos;
549*c271fa92SBaptiste Daroussin int i;
550*c271fa92SBaptiste Daroussin sopno subno;
551f0957ccaSPeter Wemm int backsl;
552f0957ccaSPeter Wemm
553f0957ccaSPeter Wemm pos = HERE(); /* repetion op, if any, covers from here */
554f0957ccaSPeter Wemm
555f0957ccaSPeter Wemm assert(MORE()); /* caller should have ensured this */
556f0957ccaSPeter Wemm c = GETNEXT();
557f0957ccaSPeter Wemm backsl = c == '\\';
558f0957ccaSPeter Wemm if (backsl) {
559f0957ccaSPeter Wemm (void)REQUIRE(MORE(), REG_EESCAPE);
560f0957ccaSPeter Wemm c = (unsigned char)GETNEXT();
561f0957ccaSPeter Wemm switch (c) {
562f0957ccaSPeter Wemm case '{':
563f0957ccaSPeter Wemm SETERROR(REG_BADRPT);
564f0957ccaSPeter Wemm break;
565f0957ccaSPeter Wemm case '(':
566f0957ccaSPeter Wemm p->g->nsub++;
567f0957ccaSPeter Wemm subno = p->g->nsub;
568f0957ccaSPeter Wemm if (subno < NPAREN)
569f0957ccaSPeter Wemm p->pbegin[subno] = HERE();
570f0957ccaSPeter Wemm EMIT(OLPAREN, subno);
571f0957ccaSPeter Wemm /* the MORE here is an error heuristic */
572f0957ccaSPeter Wemm if (MORE() && !SEETWO('\\', ')'))
573f0957ccaSPeter Wemm p_bre(p, '\\', ')', reclimit);
574f0957ccaSPeter Wemm if (subno < NPAREN) {
575f0957ccaSPeter Wemm p->pend[subno] = HERE();
576f0957ccaSPeter Wemm assert(p->pend[subno] != 0);
577f0957ccaSPeter Wemm }
578f0957ccaSPeter Wemm EMIT(ORPAREN, subno);
579f0957ccaSPeter Wemm (void)REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
580f0957ccaSPeter Wemm break;
581f0957ccaSPeter Wemm case ')': /* should not get here -- must be user */
582f0957ccaSPeter Wemm case '}':
583f0957ccaSPeter Wemm SETERROR(REG_EPAREN);
584f0957ccaSPeter Wemm break;
585f0957ccaSPeter Wemm case '1':
586f0957ccaSPeter Wemm case '2':
587f0957ccaSPeter Wemm case '3':
588f0957ccaSPeter Wemm case '4':
589f0957ccaSPeter Wemm case '5':
590f0957ccaSPeter Wemm case '6':
591f0957ccaSPeter Wemm case '7':
592f0957ccaSPeter Wemm case '8':
593f0957ccaSPeter Wemm case '9':
594f0957ccaSPeter Wemm i = c - '0';
595f0957ccaSPeter Wemm assert(i < NPAREN);
596f0957ccaSPeter Wemm if (p->pend[i] != 0) {
597f0957ccaSPeter Wemm assert(i <= p->g->nsub);
598f0957ccaSPeter Wemm EMIT(OBACK_, i);
599f0957ccaSPeter Wemm assert(p->pbegin[i] != 0);
600f0957ccaSPeter Wemm assert(p->strip[p->pbegin[i]] == OLPAREN);
601f0957ccaSPeter Wemm assert(p->strip[p->pend[i]] == ORPAREN);
602f0957ccaSPeter Wemm (void) dupl(p, p->pbegin[i]+1, p->pend[i]);
603f0957ccaSPeter Wemm EMIT(O_BACK, i);
604f0957ccaSPeter Wemm } else
605f0957ccaSPeter Wemm SETERROR(REG_ESUBREG);
606f0957ccaSPeter Wemm p->g->backrefs = 1;
607f0957ccaSPeter Wemm break;
608f0957ccaSPeter Wemm default:
609f0957ccaSPeter Wemm ordinary(p, c);
610f0957ccaSPeter Wemm break;
611f0957ccaSPeter Wemm }
612f0957ccaSPeter Wemm } else {
613f0957ccaSPeter Wemm switch (c) {
614f0957ccaSPeter Wemm case '.':
615f0957ccaSPeter Wemm if (p->g->cflags®_NEWLINE)
616f0957ccaSPeter Wemm nonnewline(p);
617f0957ccaSPeter Wemm else
618f0957ccaSPeter Wemm EMIT(OANY, 0);
619f0957ccaSPeter Wemm break;
620f0957ccaSPeter Wemm case '[':
621f0957ccaSPeter Wemm p_bracket(p);
622f0957ccaSPeter Wemm break;
623f0957ccaSPeter Wemm case '*':
624f0957ccaSPeter Wemm (void)REQUIRE(starordinary, REG_BADRPT);
625f0957ccaSPeter Wemm /* FALLTHROUGH */
626f0957ccaSPeter Wemm default:
627f0957ccaSPeter Wemm ordinary(p, c);
628f0957ccaSPeter Wemm break;
629f0957ccaSPeter Wemm }
630f0957ccaSPeter Wemm }
631f0957ccaSPeter Wemm
632f0957ccaSPeter Wemm if (EAT('*')) { /* implemented as +? */
633f0957ccaSPeter Wemm /* this case does not require the (y|) trick, noKLUDGE */
634f0957ccaSPeter Wemm INSERT(OPLUS_, pos);
635f0957ccaSPeter Wemm ASTERN(O_PLUS, pos);
636f0957ccaSPeter Wemm INSERT(OQUEST_, pos);
637f0957ccaSPeter Wemm ASTERN(O_QUEST, pos);
638f0957ccaSPeter Wemm } else if (EATTWO('\\', '{')) {
639f0957ccaSPeter Wemm count = p_count(p);
640f0957ccaSPeter Wemm if (EAT(',')) {
641f0957ccaSPeter Wemm if (MORE() && ISDIGIT((UCHAR_T)PEEK())) {
642f0957ccaSPeter Wemm count2 = p_count(p);
643f0957ccaSPeter Wemm (void)REQUIRE(count <= count2, REG_BADBR);
644f0957ccaSPeter Wemm } else /* single number with comma */
645f0957ccaSPeter Wemm count2 = INFINITY;
646f0957ccaSPeter Wemm } else /* just a single number */
647f0957ccaSPeter Wemm count2 = count;
648f0957ccaSPeter Wemm repeat(p, pos, count, count2, reclimit);
649f0957ccaSPeter Wemm if (!EATTWO('\\', '}')) { /* error heuristics */
650f0957ccaSPeter Wemm while (MORE() && !SEETWO('\\', '}'))
651f0957ccaSPeter Wemm NEXT();
652f0957ccaSPeter Wemm (void)REQUIRE(MORE(), REG_EBRACE);
653f0957ccaSPeter Wemm SETERROR(REG_BADBR);
654f0957ccaSPeter Wemm }
655f0957ccaSPeter Wemm } else if (!backsl && c == (unsigned char)'$') /* $ (but not \$) ends it */
656f0957ccaSPeter Wemm return(1);
657f0957ccaSPeter Wemm
658f0957ccaSPeter Wemm return(0);
659f0957ccaSPeter Wemm }
660f0957ccaSPeter Wemm
661f0957ccaSPeter Wemm /*
662f0957ccaSPeter Wemm - p_count - parse a repetition count
663f0957ccaSPeter Wemm */
664f0957ccaSPeter Wemm static int /* the value */
p_count(struct parse * p)665*c271fa92SBaptiste Daroussin p_count(struct parse *p)
666f0957ccaSPeter Wemm {
667*c271fa92SBaptiste Daroussin int count = 0;
668*c271fa92SBaptiste Daroussin int ndigits = 0;
669f0957ccaSPeter Wemm
670f0957ccaSPeter Wemm while (MORE() && ISDIGIT((UCHAR_T)PEEK()) && count <= DUPMAX) {
671f0957ccaSPeter Wemm count = count*10 + (GETNEXT() - '0');
672f0957ccaSPeter Wemm ndigits++;
673f0957ccaSPeter Wemm }
674f0957ccaSPeter Wemm
675f0957ccaSPeter Wemm (void)REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
676f0957ccaSPeter Wemm return(count);
677f0957ccaSPeter Wemm }
678f0957ccaSPeter Wemm
679f0957ccaSPeter Wemm /*
680f0957ccaSPeter Wemm - p_bracket - parse a bracketed character list
681f0957ccaSPeter Wemm *
682f0957ccaSPeter Wemm * Note a significant property of this code: if the allocset() did SETERROR,
683f0957ccaSPeter Wemm * no set operations are done.
684f0957ccaSPeter Wemm */
685f0957ccaSPeter Wemm static void
p_bracket(struct parse * p)686*c271fa92SBaptiste Daroussin p_bracket(struct parse *p)
687f0957ccaSPeter Wemm {
688*c271fa92SBaptiste Daroussin cset *cs;
689*c271fa92SBaptiste Daroussin int invert = 0;
690f0957ccaSPeter Wemm static RCHAR_T bow[] = { '[', ':', '<', ':', ']', ']' };
691f0957ccaSPeter Wemm static RCHAR_T eow[] = { '[', ':', '>', ':', ']', ']' };
692f0957ccaSPeter Wemm
693f0957ccaSPeter Wemm cs = allocset(p);
694f0957ccaSPeter Wemm if (cs == NULL)
695f0957ccaSPeter Wemm return;
696f0957ccaSPeter Wemm
697f0957ccaSPeter Wemm /* Dept of Truly Sickening Special-Case Kludges */
698f0957ccaSPeter Wemm if (p->next + 5 < p->end && MEMCMP(p->next, bow, 6) == 0) {
699f0957ccaSPeter Wemm EMIT(OBOW, 0);
700f0957ccaSPeter Wemm NEXTn(6);
701f0957ccaSPeter Wemm return;
702f0957ccaSPeter Wemm }
703f0957ccaSPeter Wemm if (p->next + 5 < p->end && MEMCMP(p->next, eow, 6) == 0) {
704f0957ccaSPeter Wemm EMIT(OEOW, 0);
705f0957ccaSPeter Wemm NEXTn(6);
706f0957ccaSPeter Wemm return;
707f0957ccaSPeter Wemm }
708f0957ccaSPeter Wemm
709f0957ccaSPeter Wemm if (EAT('^'))
710f0957ccaSPeter Wemm invert++; /* make note to invert set at end */
711f0957ccaSPeter Wemm if (EAT(']'))
712f0957ccaSPeter Wemm CHadd(cs, ']');
713f0957ccaSPeter Wemm else if (EAT('-'))
714f0957ccaSPeter Wemm CHadd(cs, '-');
715f0957ccaSPeter Wemm while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
716f0957ccaSPeter Wemm p_b_term(p, cs);
717f0957ccaSPeter Wemm if (EAT('-'))
718f0957ccaSPeter Wemm CHadd(cs, '-');
719f0957ccaSPeter Wemm (void)MUSTEAT(']', REG_EBRACK);
720f0957ccaSPeter Wemm
721f0957ccaSPeter Wemm if (p->error != 0) /* don't mess things up further */
722f0957ccaSPeter Wemm return;
723f0957ccaSPeter Wemm
724f0957ccaSPeter Wemm if (p->g->cflags®_ICASE) {
725*c271fa92SBaptiste Daroussin int i;
726*c271fa92SBaptiste Daroussin int ci;
727f0957ccaSPeter Wemm
728f0957ccaSPeter Wemm for (i = p->g->csetsize - 1; i >= 0; i--)
729f0957ccaSPeter Wemm if (CHIN(cs, i) && isalpha(i)) {
730f0957ccaSPeter Wemm ci = othercase(i);
731f0957ccaSPeter Wemm if (ci != i)
732f0957ccaSPeter Wemm CHadd(cs, ci);
733f0957ccaSPeter Wemm }
734f0957ccaSPeter Wemm if (cs->multis != NULL)
735f0957ccaSPeter Wemm mccase(p, cs);
736f0957ccaSPeter Wemm }
737f0957ccaSPeter Wemm if (invert) {
738*c271fa92SBaptiste Daroussin int i;
739f0957ccaSPeter Wemm
740f0957ccaSPeter Wemm for (i = p->g->csetsize - 1; i >= 0; i--)
741f0957ccaSPeter Wemm if (CHIN(cs, i))
742f0957ccaSPeter Wemm CHsub(cs, i);
743f0957ccaSPeter Wemm else
744f0957ccaSPeter Wemm CHadd(cs, i);
745f0957ccaSPeter Wemm if (p->g->cflags®_NEWLINE)
746f0957ccaSPeter Wemm CHsub(cs, '\n');
747f0957ccaSPeter Wemm if (cs->multis != NULL)
748f0957ccaSPeter Wemm mcinvert(p, cs);
749f0957ccaSPeter Wemm }
750f0957ccaSPeter Wemm
751f0957ccaSPeter Wemm assert(cs->multis == NULL); /* xxx */
752f0957ccaSPeter Wemm
753f0957ccaSPeter Wemm if (nch(p, cs) == 1) { /* optimize singleton sets */
754f0957ccaSPeter Wemm ordinary(p, firstch(p, cs));
755f0957ccaSPeter Wemm freeset(p, cs);
756f0957ccaSPeter Wemm } else
757f0957ccaSPeter Wemm EMIT(OANYOF, freezeset(p, cs));
758f0957ccaSPeter Wemm }
759f0957ccaSPeter Wemm
760f0957ccaSPeter Wemm /*
761f0957ccaSPeter Wemm - p_b_term - parse one term of a bracketed character list
762f0957ccaSPeter Wemm */
763f0957ccaSPeter Wemm static void
p_b_term(struct parse * p,cset * cs)764*c271fa92SBaptiste Daroussin p_b_term(struct parse *p, cset *cs)
765f0957ccaSPeter Wemm {
766*c271fa92SBaptiste Daroussin char c;
767*c271fa92SBaptiste Daroussin char start, finish;
768*c271fa92SBaptiste Daroussin int i;
769f0957ccaSPeter Wemm
770f0957ccaSPeter Wemm /* classify what we've got */
771f0957ccaSPeter Wemm switch ((MORE()) ? PEEK() : '\0') {
772f0957ccaSPeter Wemm case '[':
773f0957ccaSPeter Wemm c = (MORE2()) ? PEEK2() : '\0';
774f0957ccaSPeter Wemm break;
775f0957ccaSPeter Wemm case '-':
776f0957ccaSPeter Wemm SETERROR(REG_ERANGE);
777f0957ccaSPeter Wemm return; /* NOTE RETURN */
778f0957ccaSPeter Wemm break;
779f0957ccaSPeter Wemm default:
780f0957ccaSPeter Wemm c = '\0';
781f0957ccaSPeter Wemm break;
782f0957ccaSPeter Wemm }
783f0957ccaSPeter Wemm
784f0957ccaSPeter Wemm switch (c) {
785f0957ccaSPeter Wemm case ':': /* character class */
786f0957ccaSPeter Wemm NEXT2();
787f0957ccaSPeter Wemm (void)REQUIRE(MORE(), REG_EBRACK);
788f0957ccaSPeter Wemm c = PEEK();
789f0957ccaSPeter Wemm (void)REQUIRE(c != '-' && c != ']', REG_ECTYPE);
790f0957ccaSPeter Wemm p_b_cclass(p, cs);
791f0957ccaSPeter Wemm (void)REQUIRE(MORE(), REG_EBRACK);
792f0957ccaSPeter Wemm (void)REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
793f0957ccaSPeter Wemm break;
794f0957ccaSPeter Wemm case '=': /* equivalence class */
795f0957ccaSPeter Wemm NEXT2();
796f0957ccaSPeter Wemm (void)REQUIRE(MORE(), REG_EBRACK);
797f0957ccaSPeter Wemm c = PEEK();
798f0957ccaSPeter Wemm (void)REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
799f0957ccaSPeter Wemm p_b_eclass(p, cs);
800f0957ccaSPeter Wemm (void)REQUIRE(MORE(), REG_EBRACK);
801f0957ccaSPeter Wemm (void)REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
802f0957ccaSPeter Wemm break;
803f0957ccaSPeter Wemm default: /* symbol, ordinary character, or range */
804f0957ccaSPeter Wemm /* xxx revision needed for multichar stuff */
805f0957ccaSPeter Wemm start = p_b_symbol(p);
806f0957ccaSPeter Wemm if (SEE('-') && MORE2() && PEEK2() != ']') {
807f0957ccaSPeter Wemm /* range */
808f0957ccaSPeter Wemm NEXT();
809f0957ccaSPeter Wemm if (EAT('-'))
810f0957ccaSPeter Wemm finish = '-';
811f0957ccaSPeter Wemm else
812f0957ccaSPeter Wemm finish = p_b_symbol(p);
813f0957ccaSPeter Wemm } else
814f0957ccaSPeter Wemm finish = start;
815f0957ccaSPeter Wemm /* xxx what about signed chars here... */
816f0957ccaSPeter Wemm (void)REQUIRE(start <= finish, REG_ERANGE);
817f0957ccaSPeter Wemm for (i = start; i <= finish; i++)
818f0957ccaSPeter Wemm CHadd(cs, i);
819f0957ccaSPeter Wemm break;
820f0957ccaSPeter Wemm }
821f0957ccaSPeter Wemm }
822f0957ccaSPeter Wemm
823f0957ccaSPeter Wemm /*
824f0957ccaSPeter Wemm - p_b_cclass - parse a character-class name and deal with it
825f0957ccaSPeter Wemm */
826f0957ccaSPeter Wemm static void
p_b_cclass(struct parse * p,cset * cs)827*c271fa92SBaptiste Daroussin p_b_cclass(struct parse *p, cset *cs)
828f0957ccaSPeter Wemm {
829*c271fa92SBaptiste Daroussin RCHAR_T *sp = p->next;
830*c271fa92SBaptiste Daroussin struct cclass *cp;
831*c271fa92SBaptiste Daroussin size_t len;
832*c271fa92SBaptiste Daroussin const char *u;
833*c271fa92SBaptiste Daroussin char c;
834f0957ccaSPeter Wemm
835f0957ccaSPeter Wemm while (MORE() && isalpha(PEEK()))
836f0957ccaSPeter Wemm NEXT();
837f0957ccaSPeter Wemm len = p->next - sp;
838f0957ccaSPeter Wemm for (cp = cclasses; cp->name != NULL; cp++)
839f0957ccaSPeter Wemm if (STRLEN(cp->name) == len && !MEMCMP(cp->name, sp, len))
840f0957ccaSPeter Wemm break;
841f0957ccaSPeter Wemm if (cp->name == NULL) {
842f0957ccaSPeter Wemm /* oops, didn't find it */
843f0957ccaSPeter Wemm SETERROR(REG_ECTYPE);
844f0957ccaSPeter Wemm return;
845f0957ccaSPeter Wemm }
846f0957ccaSPeter Wemm
847f0957ccaSPeter Wemm u = cp->chars;
848f0957ccaSPeter Wemm while ((c = *u++) != '\0')
849f0957ccaSPeter Wemm CHadd(cs, c);
850f0957ccaSPeter Wemm for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
851f0957ccaSPeter Wemm MCadd(p, cs, u);
852f0957ccaSPeter Wemm }
853f0957ccaSPeter Wemm
854f0957ccaSPeter Wemm /*
855f0957ccaSPeter Wemm - p_b_eclass - parse an equivalence-class name and deal with it
856f0957ccaSPeter Wemm *
857f0957ccaSPeter Wemm * This implementation is incomplete. xxx
858f0957ccaSPeter Wemm */
859f0957ccaSPeter Wemm static void
p_b_eclass(struct parse * p,cset * cs)860*c271fa92SBaptiste Daroussin p_b_eclass(struct parse *p, cset *cs)
861f0957ccaSPeter Wemm {
862*c271fa92SBaptiste Daroussin char c;
863f0957ccaSPeter Wemm
864f0957ccaSPeter Wemm c = p_b_coll_elem(p, '=');
865f0957ccaSPeter Wemm CHadd(cs, c);
866f0957ccaSPeter Wemm }
867f0957ccaSPeter Wemm
868f0957ccaSPeter Wemm /*
869f0957ccaSPeter Wemm - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
870f0957ccaSPeter Wemm */
871f0957ccaSPeter Wemm static char /* value of symbol */
p_b_symbol(struct parse * p)872*c271fa92SBaptiste Daroussin p_b_symbol(struct parse *p)
873f0957ccaSPeter Wemm {
874*c271fa92SBaptiste Daroussin char value;
875f0957ccaSPeter Wemm
876f0957ccaSPeter Wemm (void)REQUIRE(MORE(), REG_EBRACK);
877f0957ccaSPeter Wemm if (!EATTWO('[', '.'))
878f0957ccaSPeter Wemm return(GETNEXT());
879f0957ccaSPeter Wemm
880f0957ccaSPeter Wemm /* collating symbol */
881f0957ccaSPeter Wemm value = p_b_coll_elem(p, '.');
882f0957ccaSPeter Wemm (void)REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
883f0957ccaSPeter Wemm return(value);
884f0957ccaSPeter Wemm }
885f0957ccaSPeter Wemm
886f0957ccaSPeter Wemm /*
887f0957ccaSPeter Wemm - p_b_coll_elem - parse a collating-element name and look it up
888f0957ccaSPeter Wemm */
889f0957ccaSPeter Wemm static char /* value of collating element */
p_b_coll_elem(struct parse * p,int endc)890*c271fa92SBaptiste Daroussin p_b_coll_elem(struct parse *p, int endc)
891f0957ccaSPeter Wemm
892f0957ccaSPeter Wemm /* name ended by endc,']' */
893f0957ccaSPeter Wemm {
894*c271fa92SBaptiste Daroussin RCHAR_T *sp = p->next;
895*c271fa92SBaptiste Daroussin struct cname *cp;
896*c271fa92SBaptiste Daroussin size_t len;
897f0957ccaSPeter Wemm
898f0957ccaSPeter Wemm while (MORE() && !SEETWO(endc, ']'))
899f0957ccaSPeter Wemm NEXT();
900f0957ccaSPeter Wemm if (!MORE()) {
901f0957ccaSPeter Wemm SETERROR(REG_EBRACK);
902f0957ccaSPeter Wemm return(0);
903f0957ccaSPeter Wemm }
904f0957ccaSPeter Wemm len = p->next - sp;
905f0957ccaSPeter Wemm for (cp = cnames; cp->name != NULL; cp++)
906f0957ccaSPeter Wemm if (STRLEN(cp->name) == len && MEMCMP(cp->name, sp, len))
907f0957ccaSPeter Wemm return(cp->code); /* known name */
908f0957ccaSPeter Wemm if (len == 1)
909f0957ccaSPeter Wemm return(*sp); /* single character */
910f0957ccaSPeter Wemm SETERROR(REG_ECOLLATE); /* neither */
911f0957ccaSPeter Wemm return(0);
912f0957ccaSPeter Wemm }
913f0957ccaSPeter Wemm
914f0957ccaSPeter Wemm /*
915f0957ccaSPeter Wemm - othercase - return the case counterpart of an alphabetic
916f0957ccaSPeter Wemm */
917f0957ccaSPeter Wemm static char /* if no counterpart, return ch */
othercase(int ch)918f0957ccaSPeter Wemm othercase(int ch)
919f0957ccaSPeter Wemm {
920f0957ccaSPeter Wemm assert(isalpha(ch));
921f0957ccaSPeter Wemm if (isupper(ch))
922f0957ccaSPeter Wemm return(tolower(ch));
923f0957ccaSPeter Wemm else if (islower(ch))
924f0957ccaSPeter Wemm return(toupper(ch));
925f0957ccaSPeter Wemm else /* peculiar, but could happen */
926f0957ccaSPeter Wemm return(ch);
927f0957ccaSPeter Wemm }
928f0957ccaSPeter Wemm
929f0957ccaSPeter Wemm /*
930f0957ccaSPeter Wemm - bothcases - emit a dualcase version of a two-case character
931f0957ccaSPeter Wemm *
932f0957ccaSPeter Wemm * Boy, is this implementation ever a kludge...
933f0957ccaSPeter Wemm */
934f0957ccaSPeter Wemm static void
bothcases(struct parse * p,int ch)935*c271fa92SBaptiste Daroussin bothcases(struct parse *p, int ch)
936f0957ccaSPeter Wemm {
937*c271fa92SBaptiste Daroussin RCHAR_T *oldnext = p->next;
938*c271fa92SBaptiste Daroussin RCHAR_T *oldend = p->end;
939f0957ccaSPeter Wemm RCHAR_T bracket[3];
940f0957ccaSPeter Wemm
941f0957ccaSPeter Wemm assert(othercase(ch) != ch); /* p_bracket() would recurse */
942f0957ccaSPeter Wemm p->next = bracket;
943f0957ccaSPeter Wemm p->end = bracket+2;
944f0957ccaSPeter Wemm bracket[0] = ch;
945f0957ccaSPeter Wemm bracket[1] = ']';
946f0957ccaSPeter Wemm bracket[2] = '\0';
947f0957ccaSPeter Wemm p_bracket(p);
948f0957ccaSPeter Wemm assert(p->next == bracket+2);
949f0957ccaSPeter Wemm p->next = oldnext;
950f0957ccaSPeter Wemm p->end = oldend;
951f0957ccaSPeter Wemm }
952f0957ccaSPeter Wemm
953f0957ccaSPeter Wemm /*
954f0957ccaSPeter Wemm - ordinary - emit an ordinary character
955f0957ccaSPeter Wemm */
956f0957ccaSPeter Wemm static void
ordinary(struct parse * p,int ch)957*c271fa92SBaptiste Daroussin ordinary(struct parse *p, int ch)
958f0957ccaSPeter Wemm {
959f0957ccaSPeter Wemm /*
960*c271fa92SBaptiste Daroussin cat_t *cap = p->g->categories;
961f0957ccaSPeter Wemm */
962f0957ccaSPeter Wemm
963f0957ccaSPeter Wemm if ((p->g->cflags®_ICASE) && isalpha(ch) && othercase(ch) != ch)
964f0957ccaSPeter Wemm bothcases(p, ch);
965f0957ccaSPeter Wemm else {
966f0957ccaSPeter Wemm EMIT(OCHAR, (UCHAR_T)ch);
967f0957ccaSPeter Wemm /*
968f0957ccaSPeter Wemm if (cap[ch] == 0)
969f0957ccaSPeter Wemm cap[ch] = p->g->ncategories++;
970f0957ccaSPeter Wemm */
971f0957ccaSPeter Wemm }
972f0957ccaSPeter Wemm }
973f0957ccaSPeter Wemm
974f0957ccaSPeter Wemm /*
975f0957ccaSPeter Wemm - nonnewline - emit REG_NEWLINE version of OANY
976f0957ccaSPeter Wemm *
977f0957ccaSPeter Wemm * Boy, is this implementation ever a kludge...
978f0957ccaSPeter Wemm */
979f0957ccaSPeter Wemm static void
nonnewline(struct parse * p)980*c271fa92SBaptiste Daroussin nonnewline(struct parse *p)
981f0957ccaSPeter Wemm {
982*c271fa92SBaptiste Daroussin RCHAR_T *oldnext = p->next;
983*c271fa92SBaptiste Daroussin RCHAR_T *oldend = p->end;
984f0957ccaSPeter Wemm RCHAR_T bracket[4];
985f0957ccaSPeter Wemm
986f0957ccaSPeter Wemm p->next = bracket;
987f0957ccaSPeter Wemm p->end = bracket+3;
988f0957ccaSPeter Wemm bracket[0] = '^';
989f0957ccaSPeter Wemm bracket[1] = '\n';
990f0957ccaSPeter Wemm bracket[2] = ']';
991f0957ccaSPeter Wemm bracket[3] = '\0';
992f0957ccaSPeter Wemm p_bracket(p);
993f0957ccaSPeter Wemm assert(p->next == bracket+3);
994f0957ccaSPeter Wemm p->next = oldnext;
995f0957ccaSPeter Wemm p->end = oldend;
996f0957ccaSPeter Wemm }
997f0957ccaSPeter Wemm
998f0957ccaSPeter Wemm /*
999f0957ccaSPeter Wemm - repeat - generate code for a bounded repetition, recursively if needed
1000f0957ccaSPeter Wemm */
1001f0957ccaSPeter Wemm static void
repeat(struct parse * p,sopno start,int from,int to,size_t reclimit)1002*c271fa92SBaptiste Daroussin repeat(struct parse *p,
1003*c271fa92SBaptiste Daroussin sopno start, /* operand from here to end of strip */
1004*c271fa92SBaptiste Daroussin int from, /* repeated from this number */
1005*c271fa92SBaptiste Daroussin int to, /* to this number of times (maybe INFINITY) */
1006*c271fa92SBaptiste Daroussin size_t reclimit)
1007f0957ccaSPeter Wemm {
1008*c271fa92SBaptiste Daroussin sopno finish;
1009f0957ccaSPeter Wemm # define N 2
1010f0957ccaSPeter Wemm # define INF 3
1011f0957ccaSPeter Wemm # define REP(f, t) ((f)*8 + (t))
1012f0957ccaSPeter Wemm # define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
1013*c271fa92SBaptiste Daroussin sopno copy;
1014f0957ccaSPeter Wemm
1015f0957ccaSPeter Wemm if (reclimit++ > RECLIMIT)
1016f0957ccaSPeter Wemm p->error = REG_ESPACE;
1017f0957ccaSPeter Wemm if (p->error)
1018f0957ccaSPeter Wemm return;
1019f0957ccaSPeter Wemm
1020f0957ccaSPeter Wemm finish = HERE();
1021f0957ccaSPeter Wemm
1022f0957ccaSPeter Wemm assert(from <= to);
1023f0957ccaSPeter Wemm
1024f0957ccaSPeter Wemm switch (REP(MAP(from), MAP(to))) {
1025f0957ccaSPeter Wemm case REP(0, 0): /* must be user doing this */
1026f0957ccaSPeter Wemm DROP(finish-start); /* drop the operand */
1027f0957ccaSPeter Wemm break;
1028f0957ccaSPeter Wemm case REP(0, 1): /* as x{1,1}? */
1029f0957ccaSPeter Wemm case REP(0, N): /* as x{1,n}? */
1030f0957ccaSPeter Wemm case REP(0, INF): /* as x{1,}? */
1031f0957ccaSPeter Wemm /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
1032f0957ccaSPeter Wemm INSERT(OCH_, start); /* offset is wrong... */
1033f0957ccaSPeter Wemm repeat(p, start+1, 1, to, reclimit);
1034f0957ccaSPeter Wemm ASTERN(OOR1, start);
1035f0957ccaSPeter Wemm AHEAD(start); /* ... fix it */
1036f0957ccaSPeter Wemm EMIT(OOR2, 0);
1037f0957ccaSPeter Wemm AHEAD(THERE());
1038f0957ccaSPeter Wemm ASTERN(O_CH, THERETHERE());
1039f0957ccaSPeter Wemm break;
1040f0957ccaSPeter Wemm case REP(1, 1): /* trivial case */
1041f0957ccaSPeter Wemm /* done */
1042f0957ccaSPeter Wemm break;
1043f0957ccaSPeter Wemm case REP(1, N): /* as x?x{1,n-1} */
1044f0957ccaSPeter Wemm /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
1045f0957ccaSPeter Wemm INSERT(OCH_, start);
1046f0957ccaSPeter Wemm ASTERN(OOR1, start);
1047f0957ccaSPeter Wemm AHEAD(start);
1048f0957ccaSPeter Wemm EMIT(OOR2, 0); /* offset very wrong... */
1049f0957ccaSPeter Wemm AHEAD(THERE()); /* ...so fix it */
1050f0957ccaSPeter Wemm ASTERN(O_CH, THERETHERE());
1051f0957ccaSPeter Wemm copy = dupl(p, start+1, finish+1);
1052f0957ccaSPeter Wemm assert(copy == finish+4);
1053f0957ccaSPeter Wemm repeat(p, copy, 1, to-1, reclimit);
1054f0957ccaSPeter Wemm break;
1055f0957ccaSPeter Wemm case REP(1, INF): /* as x+ */
1056f0957ccaSPeter Wemm INSERT(OPLUS_, start);
1057f0957ccaSPeter Wemm ASTERN(O_PLUS, start);
1058f0957ccaSPeter Wemm break;
1059f0957ccaSPeter Wemm case REP(N, N): /* as xx{m-1,n-1} */
1060f0957ccaSPeter Wemm copy = dupl(p, start, finish);
1061f0957ccaSPeter Wemm repeat(p, copy, from-1, to-1, reclimit);
1062f0957ccaSPeter Wemm break;
1063f0957ccaSPeter Wemm case REP(N, INF): /* as xx{n-1,INF} */
1064f0957ccaSPeter Wemm copy = dupl(p, start, finish);
1065f0957ccaSPeter Wemm repeat(p, copy, from-1, to, reclimit);
1066f0957ccaSPeter Wemm break;
1067f0957ccaSPeter Wemm default: /* "can't happen" */
1068f0957ccaSPeter Wemm SETERROR(REG_ASSERT); /* just in case */
1069f0957ccaSPeter Wemm break;
1070f0957ccaSPeter Wemm }
1071f0957ccaSPeter Wemm }
1072f0957ccaSPeter Wemm
1073f0957ccaSPeter Wemm /*
1074f0957ccaSPeter Wemm - seterr - set an error condition
1075f0957ccaSPeter Wemm */
1076f0957ccaSPeter Wemm static int /* useless but makes type checking happy */
seterr(struct parse * p,int e)1077*c271fa92SBaptiste Daroussin seterr(struct parse *p, int e)
1078f0957ccaSPeter Wemm {
1079f0957ccaSPeter Wemm if (p->error == 0) /* keep earliest error condition */
1080f0957ccaSPeter Wemm p->error = e;
1081f0957ccaSPeter Wemm p->next = nuls; /* try to bring things to a halt */
1082f0957ccaSPeter Wemm p->end = nuls;
1083f0957ccaSPeter Wemm return(0); /* make the return value well-defined */
1084f0957ccaSPeter Wemm }
1085f0957ccaSPeter Wemm
1086f0957ccaSPeter Wemm /*
1087f0957ccaSPeter Wemm - allocset - allocate a set of characters for []
1088f0957ccaSPeter Wemm */
1089f0957ccaSPeter Wemm static cset *
allocset(struct parse * p)1090*c271fa92SBaptiste Daroussin allocset(struct parse *p)
1091f0957ccaSPeter Wemm {
1092*c271fa92SBaptiste Daroussin int no = p->g->ncsets++;
1093*c271fa92SBaptiste Daroussin size_t nc;
1094*c271fa92SBaptiste Daroussin size_t nbytes;
1095*c271fa92SBaptiste Daroussin cset *cs;
1096*c271fa92SBaptiste Daroussin size_t css = (size_t)p->g->csetsize;
1097*c271fa92SBaptiste Daroussin int i;
1098f0957ccaSPeter Wemm
1099f0957ccaSPeter Wemm if (no >= p->ncsalloc) { /* need another column of space */
1100f0957ccaSPeter Wemm p->ncsalloc += CHAR_BIT;
1101f0957ccaSPeter Wemm nc = p->ncsalloc;
1102f0957ccaSPeter Wemm assert(nc % CHAR_BIT == 0);
1103f0957ccaSPeter Wemm nbytes = nc / CHAR_BIT * css;
1104f0957ccaSPeter Wemm if (MEMSIZE(p) > MEMLIMIT)
1105f0957ccaSPeter Wemm goto oomem;
1106f0957ccaSPeter Wemm if (p->g->sets == NULL)
1107f0957ccaSPeter Wemm p->g->sets = (cset *)malloc(nc * sizeof(cset));
1108f0957ccaSPeter Wemm else
1109f0957ccaSPeter Wemm p->g->sets = (cset *)realloc((char *)p->g->sets,
1110f0957ccaSPeter Wemm nc * sizeof(cset));
1111f0957ccaSPeter Wemm if (p->g->setbits == NULL)
1112f0957ccaSPeter Wemm p->g->setbits = (uch *)malloc(nbytes);
1113f0957ccaSPeter Wemm else {
1114f0957ccaSPeter Wemm p->g->setbits = (uch *)realloc((char *)p->g->setbits,
1115f0957ccaSPeter Wemm nbytes);
1116f0957ccaSPeter Wemm /* xxx this isn't right if setbits is now NULL */
1117f0957ccaSPeter Wemm for (i = 0; i < no; i++)
1118f0957ccaSPeter Wemm p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
1119f0957ccaSPeter Wemm }
1120f0957ccaSPeter Wemm if (p->g->sets != NULL && p->g->setbits != NULL)
1121*c271fa92SBaptiste Daroussin memset((char *)p->g->setbits + (nbytes - css),
1122f0957ccaSPeter Wemm 0, css);
1123f0957ccaSPeter Wemm else {
1124f0957ccaSPeter Wemm oomem:
1125f0957ccaSPeter Wemm no = 0;
1126f0957ccaSPeter Wemm SETERROR(REG_ESPACE);
1127f0957ccaSPeter Wemm /* caller's responsibility not to do set ops */
1128f0957ccaSPeter Wemm return NULL;
1129f0957ccaSPeter Wemm }
1130f0957ccaSPeter Wemm }
1131f0957ccaSPeter Wemm
1132f0957ccaSPeter Wemm cs = &p->g->sets[no];
1133f0957ccaSPeter Wemm cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
1134f0957ccaSPeter Wemm cs->mask = 1 << ((no) % CHAR_BIT);
1135f0957ccaSPeter Wemm cs->hash = 0;
1136f0957ccaSPeter Wemm cs->smultis = 0;
1137f0957ccaSPeter Wemm cs->multis = NULL;
1138f0957ccaSPeter Wemm
1139f0957ccaSPeter Wemm return(cs);
1140f0957ccaSPeter Wemm }
1141f0957ccaSPeter Wemm
1142f0957ccaSPeter Wemm /*
1143f0957ccaSPeter Wemm - freeset - free a now-unused set
1144f0957ccaSPeter Wemm */
1145f0957ccaSPeter Wemm static void
freeset(struct parse * p,cset * cs)1146*c271fa92SBaptiste Daroussin freeset(struct parse *p, cset *cs)
1147f0957ccaSPeter Wemm {
1148*c271fa92SBaptiste Daroussin size_t i;
1149*c271fa92SBaptiste Daroussin cset *top = &p->g->sets[p->g->ncsets];
1150*c271fa92SBaptiste Daroussin size_t css = (size_t)p->g->csetsize;
1151f0957ccaSPeter Wemm
1152f0957ccaSPeter Wemm for (i = 0; i < css; i++)
1153f0957ccaSPeter Wemm CHsub(cs, i);
1154f0957ccaSPeter Wemm if (cs == top-1) /* recover only the easy case */
1155f0957ccaSPeter Wemm p->g->ncsets--;
1156f0957ccaSPeter Wemm }
1157f0957ccaSPeter Wemm
1158f0957ccaSPeter Wemm /*
1159f0957ccaSPeter Wemm - freezeset - final processing on a set of characters
1160f0957ccaSPeter Wemm *
1161f0957ccaSPeter Wemm * The main task here is merging identical sets. This is usually a waste
1162f0957ccaSPeter Wemm * of time (although the hash code minimizes the overhead), but can win
1163f0957ccaSPeter Wemm * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash
1164f0957ccaSPeter Wemm * is done using addition rather than xor -- all ASCII [aA] sets xor to
1165f0957ccaSPeter Wemm * the same value!
1166f0957ccaSPeter Wemm */
1167f0957ccaSPeter Wemm static int /* set number */
freezeset(struct parse * p,cset * cs)1168*c271fa92SBaptiste Daroussin freezeset(struct parse *p, cset *cs)
1169f0957ccaSPeter Wemm {
1170*c271fa92SBaptiste Daroussin uch h = cs->hash;
1171*c271fa92SBaptiste Daroussin size_t i;
1172*c271fa92SBaptiste Daroussin cset *top = &p->g->sets[p->g->ncsets];
1173*c271fa92SBaptiste Daroussin cset *cs2;
1174*c271fa92SBaptiste Daroussin size_t css = (size_t)p->g->csetsize;
1175f0957ccaSPeter Wemm
1176f0957ccaSPeter Wemm /* look for an earlier one which is the same */
1177f0957ccaSPeter Wemm for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
1178f0957ccaSPeter Wemm if (cs2->hash == h && cs2 != cs) {
1179f0957ccaSPeter Wemm /* maybe */
1180f0957ccaSPeter Wemm for (i = 0; i < css; i++)
1181f0957ccaSPeter Wemm if (!!CHIN(cs2, i) != !!CHIN(cs, i))
1182f0957ccaSPeter Wemm break; /* no */
1183f0957ccaSPeter Wemm if (i == css)
1184f0957ccaSPeter Wemm break; /* yes */
1185f0957ccaSPeter Wemm }
1186f0957ccaSPeter Wemm
1187f0957ccaSPeter Wemm if (cs2 < top) { /* found one */
1188f0957ccaSPeter Wemm freeset(p, cs);
1189f0957ccaSPeter Wemm cs = cs2;
1190f0957ccaSPeter Wemm }
1191f0957ccaSPeter Wemm
1192f0957ccaSPeter Wemm return((int)(cs - p->g->sets));
1193f0957ccaSPeter Wemm }
1194f0957ccaSPeter Wemm
1195f0957ccaSPeter Wemm /*
1196f0957ccaSPeter Wemm - firstch - return first character in a set (which must have at least one)
1197f0957ccaSPeter Wemm */
1198f0957ccaSPeter Wemm static int /* character; there is no "none" value */
firstch(struct parse * p,cset * cs)1199*c271fa92SBaptiste Daroussin firstch(struct parse *p, cset *cs)
1200f0957ccaSPeter Wemm {
1201*c271fa92SBaptiste Daroussin size_t i;
1202*c271fa92SBaptiste Daroussin size_t css = (size_t)p->g->csetsize;
1203f0957ccaSPeter Wemm
1204f0957ccaSPeter Wemm for (i = 0; i < css; i++)
1205f0957ccaSPeter Wemm if (CHIN(cs, i))
1206f0957ccaSPeter Wemm return((char)i);
1207f0957ccaSPeter Wemm assert(never);
1208f0957ccaSPeter Wemm return(0); /* arbitrary */
1209f0957ccaSPeter Wemm }
1210f0957ccaSPeter Wemm
1211f0957ccaSPeter Wemm /*
1212f0957ccaSPeter Wemm - nch - number of characters in a set
1213f0957ccaSPeter Wemm */
1214f0957ccaSPeter Wemm static int
nch(struct parse * p,cset * cs)1215*c271fa92SBaptiste Daroussin nch(struct parse *p, cset *cs)
1216f0957ccaSPeter Wemm {
1217*c271fa92SBaptiste Daroussin size_t i;
1218*c271fa92SBaptiste Daroussin size_t css = (size_t)p->g->csetsize;
1219*c271fa92SBaptiste Daroussin int n = 0;
1220f0957ccaSPeter Wemm
1221f0957ccaSPeter Wemm for (i = 0; i < css; i++)
1222f0957ccaSPeter Wemm if (CHIN(cs, i))
1223f0957ccaSPeter Wemm n++;
1224f0957ccaSPeter Wemm return(n);
1225f0957ccaSPeter Wemm }
1226f0957ccaSPeter Wemm
1227f0957ccaSPeter Wemm /*
1228f0957ccaSPeter Wemm - mcadd - add a collating element to a cset
1229f0957ccaSPeter Wemm */
1230f0957ccaSPeter Wemm static void
mcadd(struct parse * p,cset * cs,const char * cp)1231*c271fa92SBaptiste Daroussin mcadd(struct parse *p, cset *cs, const char *cp)
1232f0957ccaSPeter Wemm {
1233*c271fa92SBaptiste Daroussin size_t oldend = cs->smultis;
1234f0957ccaSPeter Wemm void *np;
1235f0957ccaSPeter Wemm
1236f0957ccaSPeter Wemm cs->smultis += strlen(cp) + 1;
1237f0957ccaSPeter Wemm np = realloc(cs->multis, cs->smultis);
1238f0957ccaSPeter Wemm if (np == NULL) {
1239f0957ccaSPeter Wemm if (cs->multis)
1240f0957ccaSPeter Wemm free(cs->multis);
1241f0957ccaSPeter Wemm cs->multis = NULL;
1242f0957ccaSPeter Wemm SETERROR(REG_ESPACE);
1243f0957ccaSPeter Wemm return;
1244f0957ccaSPeter Wemm }
1245f0957ccaSPeter Wemm cs->multis = np;
1246f0957ccaSPeter Wemm
1247*c271fa92SBaptiste Daroussin strlcpy(cs->multis + oldend - 1, cp, cs->smultis - oldend + 1);
1248f0957ccaSPeter Wemm }
1249f0957ccaSPeter Wemm
1250f0957ccaSPeter Wemm /*
1251f0957ccaSPeter Wemm - mcinvert - invert the list of collating elements in a cset
1252f0957ccaSPeter Wemm *
1253f0957ccaSPeter Wemm * This would have to know the set of possibilities. Implementation
1254f0957ccaSPeter Wemm * is deferred.
1255f0957ccaSPeter Wemm */
1256f0957ccaSPeter Wemm static void
mcinvert(struct parse * p,cset * cs)1257*c271fa92SBaptiste Daroussin mcinvert(struct parse *p, cset *cs)
1258f0957ccaSPeter Wemm {
1259f0957ccaSPeter Wemm assert(cs->multis == NULL); /* xxx */
1260f0957ccaSPeter Wemm }
1261f0957ccaSPeter Wemm
1262f0957ccaSPeter Wemm /*
1263f0957ccaSPeter Wemm - mccase - add case counterparts of the list of collating elements in a cset
1264f0957ccaSPeter Wemm *
1265f0957ccaSPeter Wemm * This would have to know the set of possibilities. Implementation
1266f0957ccaSPeter Wemm * is deferred.
1267f0957ccaSPeter Wemm */
1268f0957ccaSPeter Wemm static void
mccase(struct parse * p,cset * cs)1269*c271fa92SBaptiste Daroussin mccase(struct parse *p, cset *cs)
1270f0957ccaSPeter Wemm {
1271f0957ccaSPeter Wemm assert(cs->multis == NULL); /* xxx */
1272f0957ccaSPeter Wemm }
1273f0957ccaSPeter Wemm
1274f0957ccaSPeter Wemm #ifdef notdef
1275f0957ccaSPeter Wemm /*
1276f0957ccaSPeter Wemm - isinsets - is this character in any sets?
1277f0957ccaSPeter Wemm */
1278f0957ccaSPeter Wemm static int /* predicate */
isinsets(struct re_guts * g,int c)1279*c271fa92SBaptiste Daroussin isinsets(struct re_guts *g, int c)
1280f0957ccaSPeter Wemm {
1281*c271fa92SBaptiste Daroussin uch *col;
1282*c271fa92SBaptiste Daroussin int i;
1283*c271fa92SBaptiste Daroussin int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
1284*c271fa92SBaptiste Daroussin unsigned uc = (unsigned char)c;
1285f0957ccaSPeter Wemm
1286f0957ccaSPeter Wemm for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
1287f0957ccaSPeter Wemm if (col[uc] != 0)
1288f0957ccaSPeter Wemm return(1);
1289f0957ccaSPeter Wemm return(0);
1290f0957ccaSPeter Wemm }
1291f0957ccaSPeter Wemm
1292f0957ccaSPeter Wemm /*
1293f0957ccaSPeter Wemm - samesets - are these two characters in exactly the same sets?
1294f0957ccaSPeter Wemm */
1295f0957ccaSPeter Wemm static int /* predicate */
samesets(struct re_guts * g,int c1,int c2)1296*c271fa92SBaptiste Daroussin samesets(struct re_guts *g, int c1, int c2)
1297f0957ccaSPeter Wemm {
1298*c271fa92SBaptiste Daroussin uch *col;
1299*c271fa92SBaptiste Daroussin int i;
1300*c271fa92SBaptiste Daroussin int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
1301*c271fa92SBaptiste Daroussin unsigned uc1 = (unsigned char)c1;
1302*c271fa92SBaptiste Daroussin unsigned uc2 = (unsigned char)c2;
1303f0957ccaSPeter Wemm
1304f0957ccaSPeter Wemm for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
1305f0957ccaSPeter Wemm if (col[uc1] != col[uc2])
1306f0957ccaSPeter Wemm return(0);
1307f0957ccaSPeter Wemm return(1);
1308f0957ccaSPeter Wemm }
1309f0957ccaSPeter Wemm #endif
1310f0957ccaSPeter Wemm
1311f0957ccaSPeter Wemm /*
1312f0957ccaSPeter Wemm - categorize - sort out character categories
1313f0957ccaSPeter Wemm */
1314f0957ccaSPeter Wemm static void
categorize(struct parse * p,struct re_guts * g)1315*c271fa92SBaptiste Daroussin categorize(struct parse *p, struct re_guts *g)
1316f0957ccaSPeter Wemm {
1317f0957ccaSPeter Wemm #ifdef notdef
1318*c271fa92SBaptiste Daroussin cat_t *cats = g->categories;
1319*c271fa92SBaptiste Daroussin int c;
1320*c271fa92SBaptiste Daroussin int c2;
1321*c271fa92SBaptiste Daroussin cat_t cat;
1322f0957ccaSPeter Wemm
1323f0957ccaSPeter Wemm /* avoid making error situations worse */
1324f0957ccaSPeter Wemm if (p->error != 0)
1325f0957ccaSPeter Wemm return;
1326f0957ccaSPeter Wemm
1327f0957ccaSPeter Wemm for (c = CHAR_MIN; c <= CHAR_MAX; c++)
1328f0957ccaSPeter Wemm if (cats[c] == 0 && isinsets(g, c)) {
1329f0957ccaSPeter Wemm cat = g->ncategories++;
1330f0957ccaSPeter Wemm cats[c] = cat;
1331f0957ccaSPeter Wemm for (c2 = c+1; c2 <= CHAR_MAX; c2++)
1332f0957ccaSPeter Wemm if (cats[c2] == 0 && samesets(g, c, c2))
1333f0957ccaSPeter Wemm cats[c2] = cat;
1334f0957ccaSPeter Wemm }
1335f0957ccaSPeter Wemm #endif
1336f0957ccaSPeter Wemm }
1337f0957ccaSPeter Wemm
1338f0957ccaSPeter Wemm /*
1339f0957ccaSPeter Wemm - dupl - emit a duplicate of a bunch of sops
1340f0957ccaSPeter Wemm */
1341f0957ccaSPeter Wemm static sopno /* start of duplicate */
dupl(struct parse * p,sopno start,sopno finish)1342*c271fa92SBaptiste Daroussin dupl(struct parse *p,
1343*c271fa92SBaptiste Daroussin sopno start, /* from here */
1344*c271fa92SBaptiste Daroussin sopno finish) /* to this less one */
1345f0957ccaSPeter Wemm {
1346*c271fa92SBaptiste Daroussin sopno ret = HERE();
1347*c271fa92SBaptiste Daroussin sopno len = finish - start;
1348f0957ccaSPeter Wemm
1349f0957ccaSPeter Wemm assert(finish >= start);
1350f0957ccaSPeter Wemm if (len == 0)
1351f0957ccaSPeter Wemm return(ret);
1352f0957ccaSPeter Wemm if (!enlarge(p, p->ssize + len)) /* this many unexpected additions */
1353f0957ccaSPeter Wemm return ret;
1354f0957ccaSPeter Wemm assert(p->ssize >= p->slen + len);
1355f0957ccaSPeter Wemm (void) memcpy((char *)(p->strip + p->slen),
1356f0957ccaSPeter Wemm (char *)(p->strip + start), (size_t)len*sizeof(sop));
1357f0957ccaSPeter Wemm (void) memcpy((char *)(p->stripdata + p->slen),
1358f0957ccaSPeter Wemm (char *)(p->stripdata + start), (size_t)len*sizeof(RCHAR_T));
1359f0957ccaSPeter Wemm p->slen += len;
1360f0957ccaSPeter Wemm return(ret);
1361f0957ccaSPeter Wemm }
1362f0957ccaSPeter Wemm
1363f0957ccaSPeter Wemm /*
1364f0957ccaSPeter Wemm - doemit - emit a strip operator
1365f0957ccaSPeter Wemm *
1366f0957ccaSPeter Wemm * It might seem better to implement this as a macro with a function as
1367f0957ccaSPeter Wemm * hard-case backup, but it's just too big and messy unless there are
1368f0957ccaSPeter Wemm * some changes to the data structures. Maybe later.
1369f0957ccaSPeter Wemm */
1370f0957ccaSPeter Wemm static void
doemit(struct parse * p,sop op,size_t opnd)1371*c271fa92SBaptiste Daroussin doemit(struct parse *p, sop op, size_t opnd)
1372f0957ccaSPeter Wemm {
1373f0957ccaSPeter Wemm /* avoid making error situations worse */
1374f0957ccaSPeter Wemm if (p->error != 0)
1375f0957ccaSPeter Wemm return;
1376f0957ccaSPeter Wemm
1377f0957ccaSPeter Wemm /* deal with oversize operands ("can't happen", more or less) */
1378f0957ccaSPeter Wemm assert(opnd < 1);
1379f0957ccaSPeter Wemm
1380f0957ccaSPeter Wemm /* deal with undersized strip */
1381f0957ccaSPeter Wemm if (p->slen >= p->ssize)
1382f0957ccaSPeter Wemm if (!enlarge(p, (p->ssize+1) / 2 * 3)) /* +50% */
1383f0957ccaSPeter Wemm return;
1384f0957ccaSPeter Wemm
1385f0957ccaSPeter Wemm /* finally, it's all reduced to the easy case */
1386f0957ccaSPeter Wemm p->strip[p->slen] = op;
1387f0957ccaSPeter Wemm p->stripdata[p->slen] = opnd;
1388f0957ccaSPeter Wemm p->slen++;
1389f0957ccaSPeter Wemm }
1390f0957ccaSPeter Wemm
1391f0957ccaSPeter Wemm /*
1392f0957ccaSPeter Wemm - doinsert - insert a sop into the strip
1393f0957ccaSPeter Wemm */
1394f0957ccaSPeter Wemm static void
doinsert(struct parse * p,sop op,size_t opnd,sopno pos)1395*c271fa92SBaptiste Daroussin doinsert(struct parse *p, sop op, size_t opnd, sopno pos)
1396f0957ccaSPeter Wemm {
1397*c271fa92SBaptiste Daroussin sopno sn;
1398*c271fa92SBaptiste Daroussin sop s;
1399*c271fa92SBaptiste Daroussin RCHAR_T d;
1400*c271fa92SBaptiste Daroussin int i;
1401f0957ccaSPeter Wemm
1402f0957ccaSPeter Wemm /* avoid making error situations worse */
1403f0957ccaSPeter Wemm if (p->error != 0)
1404f0957ccaSPeter Wemm return;
1405f0957ccaSPeter Wemm
1406f0957ccaSPeter Wemm sn = HERE();
1407f0957ccaSPeter Wemm EMIT(op, opnd); /* do checks, ensure space */
1408f0957ccaSPeter Wemm assert(HERE() == sn+1);
1409f0957ccaSPeter Wemm s = p->strip[sn];
1410f0957ccaSPeter Wemm d = p->stripdata[sn];
1411f0957ccaSPeter Wemm
1412f0957ccaSPeter Wemm /* adjust paren pointers */
1413f0957ccaSPeter Wemm assert(pos > 0);
1414f0957ccaSPeter Wemm for (i = 1; i < NPAREN; i++) {
1415f0957ccaSPeter Wemm if (p->pbegin[i] >= pos) {
1416f0957ccaSPeter Wemm p->pbegin[i]++;
1417f0957ccaSPeter Wemm }
1418f0957ccaSPeter Wemm if (p->pend[i] >= pos) {
1419f0957ccaSPeter Wemm p->pend[i]++;
1420f0957ccaSPeter Wemm }
1421f0957ccaSPeter Wemm }
1422f0957ccaSPeter Wemm
1423f0957ccaSPeter Wemm memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
1424f0957ccaSPeter Wemm (HERE()-pos-1)*sizeof(sop));
1425f0957ccaSPeter Wemm memmove((char *)&p->stripdata[pos+1], (char *)&p->stripdata[pos],
1426f0957ccaSPeter Wemm (HERE()-pos-1)*sizeof(RCHAR_T));
1427f0957ccaSPeter Wemm p->strip[pos] = s;
1428f0957ccaSPeter Wemm p->stripdata[pos] = d;
1429f0957ccaSPeter Wemm }
1430f0957ccaSPeter Wemm
1431f0957ccaSPeter Wemm /*
1432f0957ccaSPeter Wemm - dofwd - complete a forward reference
1433f0957ccaSPeter Wemm */
1434f0957ccaSPeter Wemm static void
dofwd(struct parse * p,sopno pos,sop value)1435*c271fa92SBaptiste Daroussin dofwd(struct parse *p, sopno pos, sop value)
1436f0957ccaSPeter Wemm {
1437f0957ccaSPeter Wemm /* avoid making error situations worse */
1438f0957ccaSPeter Wemm if (p->error != 0)
1439f0957ccaSPeter Wemm return;
1440f0957ccaSPeter Wemm
1441f0957ccaSPeter Wemm assert(value < 1);
1442f0957ccaSPeter Wemm p->stripdata[pos] = value;
1443f0957ccaSPeter Wemm }
1444f0957ccaSPeter Wemm
1445f0957ccaSPeter Wemm /*
1446f0957ccaSPeter Wemm - enlarge - enlarge the strip
1447f0957ccaSPeter Wemm */
1448f0957ccaSPeter Wemm static int
enlarge(struct parse * p,sopno size)1449*c271fa92SBaptiste Daroussin enlarge(struct parse *p, sopno size)
1450f0957ccaSPeter Wemm {
1451*c271fa92SBaptiste Daroussin sop *sp;
1452*c271fa92SBaptiste Daroussin RCHAR_T *dp;
1453f0957ccaSPeter Wemm sopno osize;
1454f0957ccaSPeter Wemm
1455f0957ccaSPeter Wemm if (p->ssize >= size)
1456f0957ccaSPeter Wemm return 1;
1457f0957ccaSPeter Wemm
1458f0957ccaSPeter Wemm osize = p->ssize;
1459f0957ccaSPeter Wemm p->ssize = size;
1460f0957ccaSPeter Wemm if (MEMSIZE(p) > MEMLIMIT)
1461f0957ccaSPeter Wemm goto oomem;
1462f0957ccaSPeter Wemm sp = realloc(p->strip, p->ssize * sizeof(sop));
1463f0957ccaSPeter Wemm if (sp == NULL)
1464f0957ccaSPeter Wemm goto oomem;
1465f0957ccaSPeter Wemm p->strip = sp;
1466f0957ccaSPeter Wemm dp = realloc(p->stripdata, p->ssize * sizeof(RCHAR_T));
1467f0957ccaSPeter Wemm if (dp == NULL) {
1468f0957ccaSPeter Wemm oomem:
1469f0957ccaSPeter Wemm p->ssize = osize;
1470f0957ccaSPeter Wemm SETERROR(REG_ESPACE);
1471f0957ccaSPeter Wemm return 0;
1472f0957ccaSPeter Wemm }
1473f0957ccaSPeter Wemm p->stripdata = dp;
1474f0957ccaSPeter Wemm return 1;
1475f0957ccaSPeter Wemm }
1476f0957ccaSPeter Wemm
1477f0957ccaSPeter Wemm /*
1478f0957ccaSPeter Wemm - stripsnug - compact the strip
1479f0957ccaSPeter Wemm */
1480f0957ccaSPeter Wemm static void
stripsnug(struct parse * p,struct re_guts * g)1481*c271fa92SBaptiste Daroussin stripsnug(struct parse *p, struct re_guts *g)
1482f0957ccaSPeter Wemm {
1483f0957ccaSPeter Wemm g->nstates = p->slen;
1484f0957ccaSPeter Wemm g->strip = (sop *)realloc((char *)p->strip,
1485f0957ccaSPeter Wemm p->slen * sizeof(sop));
1486f0957ccaSPeter Wemm if (g->strip == NULL) {
1487f0957ccaSPeter Wemm SETERROR(REG_ESPACE);
1488f0957ccaSPeter Wemm g->strip = p->strip;
1489f0957ccaSPeter Wemm }
1490f0957ccaSPeter Wemm g->stripdata = (RCHAR_T *)realloc((char *)p->stripdata,
1491f0957ccaSPeter Wemm p->slen * sizeof(RCHAR_T));
1492f0957ccaSPeter Wemm if (g->stripdata == NULL) {
1493f0957ccaSPeter Wemm SETERROR(REG_ESPACE);
1494f0957ccaSPeter Wemm g->stripdata = p->stripdata;
1495f0957ccaSPeter Wemm }
1496f0957ccaSPeter Wemm }
1497f0957ccaSPeter Wemm
1498f0957ccaSPeter Wemm /*
1499f0957ccaSPeter Wemm - findmust - fill in must and mlen with longest mandatory literal string
1500f0957ccaSPeter Wemm *
1501f0957ccaSPeter Wemm * This algorithm could do fancy things like analyzing the operands of |
1502f0957ccaSPeter Wemm * for common subsequences. Someday. This code is simple and finds most
1503f0957ccaSPeter Wemm * of the interesting cases.
1504f0957ccaSPeter Wemm *
1505f0957ccaSPeter Wemm * Note that must and mlen got initialized during setup.
1506f0957ccaSPeter Wemm */
1507f0957ccaSPeter Wemm static void
findmust(struct parse * p,struct re_guts * g)1508*c271fa92SBaptiste Daroussin findmust(struct parse *p, struct re_guts *g)
1509f0957ccaSPeter Wemm {
1510*c271fa92SBaptiste Daroussin sop *scans;
1511*c271fa92SBaptiste Daroussin RCHAR_T *scand;
1512f0957ccaSPeter Wemm sop *starts = 0;
1513f0957ccaSPeter Wemm RCHAR_T *startd = NULL;
1514*c271fa92SBaptiste Daroussin sop *newstarts = 0;
1515*c271fa92SBaptiste Daroussin RCHAR_T *newstartd = NULL;
1516*c271fa92SBaptiste Daroussin sopno newlen;
1517*c271fa92SBaptiste Daroussin sop s;
1518*c271fa92SBaptiste Daroussin RCHAR_T d;
1519*c271fa92SBaptiste Daroussin RCHAR_T *cp;
1520*c271fa92SBaptiste Daroussin sopno i;
1521f0957ccaSPeter Wemm
1522f0957ccaSPeter Wemm /* avoid making error situations worse */
1523f0957ccaSPeter Wemm if (p->error != 0)
1524f0957ccaSPeter Wemm return;
1525f0957ccaSPeter Wemm
1526f0957ccaSPeter Wemm /* find the longest OCHAR sequence in strip */
1527f0957ccaSPeter Wemm newlen = 0;
1528f0957ccaSPeter Wemm scans = g->strip + 1;
1529f0957ccaSPeter Wemm scand = g->stripdata + 1;
1530f0957ccaSPeter Wemm do {
1531f0957ccaSPeter Wemm s = *scans++;
1532f0957ccaSPeter Wemm d = *scand++;
1533f0957ccaSPeter Wemm switch (s) {
1534f0957ccaSPeter Wemm case OCHAR: /* sequence member */
1535f0957ccaSPeter Wemm if (newlen == 0) { /* new sequence */
1536f0957ccaSPeter Wemm newstarts = scans - 1;
1537f0957ccaSPeter Wemm newstartd = scand - 1;
1538f0957ccaSPeter Wemm }
1539f0957ccaSPeter Wemm newlen++;
1540f0957ccaSPeter Wemm break;
1541f0957ccaSPeter Wemm case OPLUS_: /* things that don't break one */
1542f0957ccaSPeter Wemm case OLPAREN:
1543f0957ccaSPeter Wemm case ORPAREN:
1544f0957ccaSPeter Wemm break;
1545f0957ccaSPeter Wemm case OQUEST_: /* things that must be skipped */
1546f0957ccaSPeter Wemm case OCH_:
1547f0957ccaSPeter Wemm scans--;
1548f0957ccaSPeter Wemm scand--;
1549f0957ccaSPeter Wemm do {
1550f0957ccaSPeter Wemm scans += d;
1551f0957ccaSPeter Wemm scand += d;
1552f0957ccaSPeter Wemm s = *scans;
1553f0957ccaSPeter Wemm d = *scand;
1554f0957ccaSPeter Wemm /* assert() interferes w debug printouts */
1555f0957ccaSPeter Wemm if (s != O_QUEST && s != O_CH && s != OOR2) {
1556f0957ccaSPeter Wemm g->iflags |= BAD;
1557f0957ccaSPeter Wemm return;
1558f0957ccaSPeter Wemm }
1559f0957ccaSPeter Wemm } while (s != O_QUEST && s != O_CH);
1560f0957ccaSPeter Wemm /* fallthrough */
1561f0957ccaSPeter Wemm default: /* things that break a sequence */
1562f0957ccaSPeter Wemm if (newlen > g->mlen) { /* ends one */
1563f0957ccaSPeter Wemm starts = newstarts;
1564f0957ccaSPeter Wemm startd = newstartd;
1565f0957ccaSPeter Wemm g->mlen = newlen;
1566f0957ccaSPeter Wemm }
1567f0957ccaSPeter Wemm newlen = 0;
1568f0957ccaSPeter Wemm break;
1569f0957ccaSPeter Wemm }
1570f0957ccaSPeter Wemm } while (s != OEND);
1571f0957ccaSPeter Wemm
1572f0957ccaSPeter Wemm if (g->mlen == 0) /* there isn't one */
1573f0957ccaSPeter Wemm return;
1574f0957ccaSPeter Wemm
1575f0957ccaSPeter Wemm /* turn it into a character string */
1576f0957ccaSPeter Wemm g->must = malloc(((size_t)g->mlen + 1) * sizeof(RCHAR_T));
1577f0957ccaSPeter Wemm if (g->must == NULL) { /* argh; just forget it */
1578f0957ccaSPeter Wemm g->mlen = 0;
1579f0957ccaSPeter Wemm return;
1580f0957ccaSPeter Wemm }
1581f0957ccaSPeter Wemm cp = g->must;
1582f0957ccaSPeter Wemm scans = starts;
1583f0957ccaSPeter Wemm scand = startd;
1584f0957ccaSPeter Wemm for (i = g->mlen; i > 0; i--) {
1585f0957ccaSPeter Wemm for (;;) {
1586f0957ccaSPeter Wemm s = *scans++;
1587f0957ccaSPeter Wemm d = *scand++;
1588f0957ccaSPeter Wemm if (s == OCHAR)
1589f0957ccaSPeter Wemm break;
1590f0957ccaSPeter Wemm }
1591f0957ccaSPeter Wemm assert(cp < g->must + g->mlen);
1592f0957ccaSPeter Wemm *cp++ = d;
1593f0957ccaSPeter Wemm }
1594f0957ccaSPeter Wemm assert(cp == g->must + g->mlen);
1595f0957ccaSPeter Wemm *cp++ = '\0'; /* just on general principles */
1596f0957ccaSPeter Wemm }
1597f0957ccaSPeter Wemm
1598f0957ccaSPeter Wemm /*
1599f0957ccaSPeter Wemm - pluscount - count + nesting
1600f0957ccaSPeter Wemm */
1601f0957ccaSPeter Wemm static sopno /* nesting depth */
pluscount(struct parse * p,struct re_guts * g)1602*c271fa92SBaptiste Daroussin pluscount(struct parse *p, struct re_guts *g)
1603f0957ccaSPeter Wemm {
1604*c271fa92SBaptiste Daroussin sop *scan;
1605*c271fa92SBaptiste Daroussin sop s;
1606*c271fa92SBaptiste Daroussin sopno plusnest = 0;
1607*c271fa92SBaptiste Daroussin sopno maxnest = 0;
1608f0957ccaSPeter Wemm
1609f0957ccaSPeter Wemm if (p->error != 0)
1610f0957ccaSPeter Wemm return(0); /* there may not be an OEND */
1611f0957ccaSPeter Wemm
1612f0957ccaSPeter Wemm scan = g->strip + 1;
1613f0957ccaSPeter Wemm do {
1614f0957ccaSPeter Wemm s = *scan++;
1615f0957ccaSPeter Wemm switch (s) {
1616f0957ccaSPeter Wemm case OPLUS_:
1617f0957ccaSPeter Wemm plusnest++;
1618f0957ccaSPeter Wemm break;
1619f0957ccaSPeter Wemm case O_PLUS:
1620f0957ccaSPeter Wemm if (plusnest > maxnest)
1621f0957ccaSPeter Wemm maxnest = plusnest;
1622f0957ccaSPeter Wemm plusnest--;
1623f0957ccaSPeter Wemm break;
1624f0957ccaSPeter Wemm }
1625f0957ccaSPeter Wemm } while (s != OEND);
1626f0957ccaSPeter Wemm if (plusnest != 0)
1627f0957ccaSPeter Wemm g->iflags |= BAD;
1628f0957ccaSPeter Wemm return(maxnest);
1629f0957ccaSPeter Wemm }
1630