xref: /freebsd/lib/libc/regex/regcomp.c (revision 7fed38d0a00297914313d5b57f3fae8ef7d4e627)
158f0484fSRodney W. Grimes /*-
258f0484fSRodney W. Grimes  * Copyright (c) 1992, 1993, 1994 Henry Spencer.
358f0484fSRodney W. Grimes  * Copyright (c) 1992, 1993, 1994
458f0484fSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
558f0484fSRodney W. Grimes  *
658f0484fSRodney W. Grimes  * This code is derived from software contributed to Berkeley by
758f0484fSRodney W. Grimes  * Henry Spencer.
858f0484fSRodney W. Grimes  *
958f0484fSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
1058f0484fSRodney W. Grimes  * modification, are permitted provided that the following conditions
1158f0484fSRodney W. Grimes  * are met:
1258f0484fSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
1358f0484fSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
1458f0484fSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
1558f0484fSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
1658f0484fSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
1758f0484fSRodney W. Grimes  * 3. All advertising materials mentioning features or use of this software
1858f0484fSRodney W. Grimes  *    must display the following acknowledgement:
1958f0484fSRodney W. Grimes  *	This product includes software developed by the University of
2058f0484fSRodney W. Grimes  *	California, Berkeley and its contributors.
2158f0484fSRodney W. Grimes  * 4. Neither the name of the University nor the names of its contributors
2258f0484fSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
2358f0484fSRodney W. Grimes  *    without specific prior written permission.
2458f0484fSRodney W. Grimes  *
2558f0484fSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2658f0484fSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2758f0484fSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2858f0484fSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
2958f0484fSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
3058f0484fSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
3158f0484fSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
3258f0484fSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
3358f0484fSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3458f0484fSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3558f0484fSRodney W. Grimes  * SUCH DAMAGE.
3658f0484fSRodney W. Grimes  *
3758f0484fSRodney W. Grimes  *	@(#)regcomp.c	8.5 (Berkeley) 3/20/94
3858f0484fSRodney W. Grimes  */
3958f0484fSRodney W. Grimes 
4058f0484fSRodney W. Grimes #if defined(LIBC_SCCS) && !defined(lint)
4158f0484fSRodney W. Grimes static char sccsid[] = "@(#)regcomp.c	8.5 (Berkeley) 3/20/94";
4258f0484fSRodney W. Grimes #endif /* LIBC_SCCS and not lint */
43333fc21eSDavid E. O'Brien #include <sys/cdefs.h>
44333fc21eSDavid E. O'Brien __FBSDID("$FreeBSD$");
4558f0484fSRodney W. Grimes 
4658f0484fSRodney W. Grimes #include <sys/types.h>
4758f0484fSRodney W. Grimes #include <stdio.h>
4858f0484fSRodney W. Grimes #include <string.h>
4958f0484fSRodney W. Grimes #include <ctype.h>
5058f0484fSRodney W. Grimes #include <limits.h>
5158f0484fSRodney W. Grimes #include <stdlib.h>
5258f0484fSRodney W. Grimes #include <regex.h>
5358f0484fSRodney W. Grimes 
54c61cea72SAndrey A. Chernov #include "collate.h"
55c61cea72SAndrey A. Chernov 
5658f0484fSRodney W. Grimes #include "utils.h"
5758f0484fSRodney W. Grimes #include "regex2.h"
5858f0484fSRodney W. Grimes 
5958f0484fSRodney W. Grimes #include "cclass.h"
6058f0484fSRodney W. Grimes #include "cname.h"
6158f0484fSRodney W. Grimes 
6258f0484fSRodney W. Grimes /*
6358f0484fSRodney W. Grimes  * parse structure, passed up and down to avoid global variables and
6458f0484fSRodney W. Grimes  * other clumsinesses
6558f0484fSRodney W. Grimes  */
6658f0484fSRodney W. Grimes struct parse {
6758f0484fSRodney W. Grimes 	char *next;		/* next character in RE */
6858f0484fSRodney W. Grimes 	char *end;		/* end of string (-> NUL normally) */
6958f0484fSRodney W. Grimes 	int error;		/* has an error been seen? */
7058f0484fSRodney W. Grimes 	sop *strip;		/* malloced strip */
7158f0484fSRodney W. Grimes 	sopno ssize;		/* malloced strip size (allocated) */
7258f0484fSRodney W. Grimes 	sopno slen;		/* malloced strip length (used) */
7358f0484fSRodney W. Grimes 	int ncsalloc;		/* number of csets allocated */
7458f0484fSRodney W. Grimes 	struct re_guts *g;
7558f0484fSRodney W. Grimes #	define	NPAREN	10	/* we need to remember () 1-9 for back refs */
7658f0484fSRodney W. Grimes 	sopno pbegin[NPAREN];	/* -> ( ([0] unused) */
7758f0484fSRodney W. Grimes 	sopno pend[NPAREN];	/* -> ) ([0] unused) */
7858f0484fSRodney W. Grimes };
7958f0484fSRodney W. Grimes 
8058f0484fSRodney W. Grimes /* ========= begin header generated by ./mkh ========= */
8158f0484fSRodney W. Grimes #ifdef __cplusplus
8258f0484fSRodney W. Grimes extern "C" {
8358f0484fSRodney W. Grimes #endif
8458f0484fSRodney W. Grimes 
8558f0484fSRodney W. Grimes /* === regcomp.c === */
86c05ac53bSDavid E. O'Brien static void p_ere(struct parse *p, int stop);
87c05ac53bSDavid E. O'Brien static void p_ere_exp(struct parse *p);
88c05ac53bSDavid E. O'Brien static void p_str(struct parse *p);
89c05ac53bSDavid E. O'Brien static void p_bre(struct parse *p, int end1, int end2);
90c05ac53bSDavid E. O'Brien static int p_simp_re(struct parse *p, int starordinary);
91c05ac53bSDavid E. O'Brien static int p_count(struct parse *p);
92c05ac53bSDavid E. O'Brien static void p_bracket(struct parse *p);
93c05ac53bSDavid E. O'Brien static void p_b_term(struct parse *p, cset *cs);
94c05ac53bSDavid E. O'Brien static void p_b_cclass(struct parse *p, cset *cs);
95c05ac53bSDavid E. O'Brien static void p_b_eclass(struct parse *p, cset *cs);
96c05ac53bSDavid E. O'Brien static char p_b_symbol(struct parse *p);
97c05ac53bSDavid E. O'Brien static char p_b_coll_elem(struct parse *p, int endc);
98c05ac53bSDavid E. O'Brien static char othercase(int ch);
99c05ac53bSDavid E. O'Brien static void bothcases(struct parse *p, int ch);
100c05ac53bSDavid E. O'Brien static void ordinary(struct parse *p, int ch);
101c05ac53bSDavid E. O'Brien static void nonnewline(struct parse *p);
102c05ac53bSDavid E. O'Brien static void repeat(struct parse *p, sopno start, int from, int to);
103c05ac53bSDavid E. O'Brien static int seterr(struct parse *p, int e);
104c05ac53bSDavid E. O'Brien static cset *allocset(struct parse *p);
105c05ac53bSDavid E. O'Brien static void freeset(struct parse *p, cset *cs);
106c05ac53bSDavid E. O'Brien static int freezeset(struct parse *p, cset *cs);
107c05ac53bSDavid E. O'Brien static int firstch(struct parse *p, cset *cs);
108c05ac53bSDavid E. O'Brien static int nch(struct parse *p, cset *cs);
109c05ac53bSDavid E. O'Brien static void mcadd(struct parse *p, cset *cs, char *cp);
11051295a4dSJordan K. Hubbard #if used
111c05ac53bSDavid E. O'Brien static void mcsub(cset *cs, char *cp);
112c05ac53bSDavid E. O'Brien static int mcin(cset *cs, char *cp);
113c05ac53bSDavid E. O'Brien static char *mcfind(cset *cs, char *cp);
11451295a4dSJordan K. Hubbard #endif
115c05ac53bSDavid E. O'Brien static void mcinvert(struct parse *p, cset *cs);
116c05ac53bSDavid E. O'Brien static void mccase(struct parse *p, cset *cs);
117c05ac53bSDavid E. O'Brien static int isinsets(struct re_guts *g, int c);
118c05ac53bSDavid E. O'Brien static int samesets(struct re_guts *g, int c1, int c2);
119c05ac53bSDavid E. O'Brien static void categorize(struct parse *p, struct re_guts *g);
120c05ac53bSDavid E. O'Brien static sopno dupl(struct parse *p, sopno start, sopno finish);
121c05ac53bSDavid E. O'Brien static void doemit(struct parse *p, sop op, size_t opnd);
122c05ac53bSDavid E. O'Brien static void doinsert(struct parse *p, sop op, size_t opnd, sopno pos);
123c05ac53bSDavid E. O'Brien static void dofwd(struct parse *p, sopno pos, sop value);
124c05ac53bSDavid E. O'Brien static void enlarge(struct parse *p, sopno size);
125c05ac53bSDavid E. O'Brien static void stripsnug(struct parse *p, struct re_guts *g);
126c05ac53bSDavid E. O'Brien static void findmust(struct parse *p, struct re_guts *g);
127c05ac53bSDavid E. O'Brien static int altoffset(sop *scan, int offset, int mccs);
128c05ac53bSDavid E. O'Brien static void computejumps(struct parse *p, struct re_guts *g);
129c05ac53bSDavid E. O'Brien static void computematchjumps(struct parse *p, struct re_guts *g);
130c05ac53bSDavid E. O'Brien static sopno pluscount(struct parse *p, struct re_guts *g);
13158f0484fSRodney W. Grimes 
13258f0484fSRodney W. Grimes #ifdef __cplusplus
13358f0484fSRodney W. Grimes }
13458f0484fSRodney W. Grimes #endif
13558f0484fSRodney W. Grimes /* ========= end header generated by ./mkh ========= */
13658f0484fSRodney W. Grimes 
13758f0484fSRodney W. Grimes static char nuls[10];		/* place to point scanner in event of error */
13858f0484fSRodney W. Grimes 
13958f0484fSRodney W. Grimes /*
14058f0484fSRodney W. Grimes  * macros for use with parse structure
14158f0484fSRodney W. Grimes  * BEWARE:  these know that the parse structure is named `p' !!!
14258f0484fSRodney W. Grimes  */
14358f0484fSRodney W. Grimes #define	PEEK()	(*p->next)
14458f0484fSRodney W. Grimes #define	PEEK2()	(*(p->next+1))
14558f0484fSRodney W. Grimes #define	MORE()	(p->next < p->end)
14658f0484fSRodney W. Grimes #define	MORE2()	(p->next+1 < p->end)
14758f0484fSRodney W. Grimes #define	SEE(c)	(MORE() && PEEK() == (c))
14858f0484fSRodney W. Grimes #define	SEETWO(a, b)	(MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
14958f0484fSRodney W. Grimes #define	EAT(c)	((SEE(c)) ? (NEXT(), 1) : 0)
15058f0484fSRodney W. Grimes #define	EATTWO(a, b)	((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
15158f0484fSRodney W. Grimes #define	NEXT()	(p->next++)
15258f0484fSRodney W. Grimes #define	NEXT2()	(p->next += 2)
15358f0484fSRodney W. Grimes #define	NEXTn(n)	(p->next += (n))
15458f0484fSRodney W. Grimes #define	GETNEXT()	(*p->next++)
15558f0484fSRodney W. Grimes #define	SETERROR(e)	seterr(p, (e))
15658f0484fSRodney W. Grimes #define	REQUIRE(co, e)	((co) || SETERROR(e))
15758f0484fSRodney W. Grimes #define	MUSTSEE(c, e)	(REQUIRE(MORE() && PEEK() == (c), e))
15858f0484fSRodney W. Grimes #define	MUSTEAT(c, e)	(REQUIRE(MORE() && GETNEXT() == (c), e))
15958f0484fSRodney W. Grimes #define	MUSTNOTSEE(c, e)	(REQUIRE(!MORE() || PEEK() != (c), e))
16058f0484fSRodney W. Grimes #define	EMIT(op, sopnd)	doemit(p, (sop)(op), (size_t)(sopnd))
16158f0484fSRodney W. Grimes #define	INSERT(op, pos)	doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
16258f0484fSRodney W. Grimes #define	AHEAD(pos)		dofwd(p, pos, HERE()-(pos))
16358f0484fSRodney W. Grimes #define	ASTERN(sop, pos)	EMIT(sop, HERE()-pos)
16458f0484fSRodney W. Grimes #define	HERE()		(p->slen)
16558f0484fSRodney W. Grimes #define	THERE()		(p->slen - 1)
16658f0484fSRodney W. Grimes #define	THERETHERE()	(p->slen - 2)
16758f0484fSRodney W. Grimes #define	DROP(n)	(p->slen -= (n))
16858f0484fSRodney W. Grimes 
16958f0484fSRodney W. Grimes #ifndef NDEBUG
17058f0484fSRodney W. Grimes static int never = 0;		/* for use in asserts; shuts lint up */
17158f0484fSRodney W. Grimes #else
17258f0484fSRodney W. Grimes #define	never	0		/* some <assert.h>s have bugs too */
17358f0484fSRodney W. Grimes #endif
17458f0484fSRodney W. Grimes 
1756049d9f0SDaniel C. Sobral /* Macro used by computejump()/computematchjump() */
1766049d9f0SDaniel C. Sobral #define MIN(a,b)	((a)<(b)?(a):(b))
1776049d9f0SDaniel C. Sobral 
17858f0484fSRodney W. Grimes /*
17958f0484fSRodney W. Grimes  - regcomp - interface for parser and compilation
18058f0484fSRodney W. Grimes  = extern int regcomp(regex_t *, const char *, int);
18158f0484fSRodney W. Grimes  = #define	REG_BASIC	0000
18258f0484fSRodney W. Grimes  = #define	REG_EXTENDED	0001
18358f0484fSRodney W. Grimes  = #define	REG_ICASE	0002
18458f0484fSRodney W. Grimes  = #define	REG_NOSUB	0004
18558f0484fSRodney W. Grimes  = #define	REG_NEWLINE	0010
18658f0484fSRodney W. Grimes  = #define	REG_NOSPEC	0020
18758f0484fSRodney W. Grimes  = #define	REG_PEND	0040
18858f0484fSRodney W. Grimes  = #define	REG_DUMP	0200
18958f0484fSRodney W. Grimes  */
19058f0484fSRodney W. Grimes int				/* 0 success, otherwise REG_something */
19158f0484fSRodney W. Grimes regcomp(preg, pattern, cflags)
19258f0484fSRodney W. Grimes regex_t *preg;
19358f0484fSRodney W. Grimes const char *pattern;
19458f0484fSRodney W. Grimes int cflags;
19558f0484fSRodney W. Grimes {
19658f0484fSRodney W. Grimes 	struct parse pa;
1978fb3f3f6SDavid E. O'Brien 	struct re_guts *g;
1988fb3f3f6SDavid E. O'Brien 	struct parse *p = &pa;
1998fb3f3f6SDavid E. O'Brien 	int i;
2008fb3f3f6SDavid E. O'Brien 	size_t len;
20158f0484fSRodney W. Grimes #ifdef REDEBUG
20258f0484fSRodney W. Grimes #	define	GOODFLAGS(f)	(f)
20358f0484fSRodney W. Grimes #else
20458f0484fSRodney W. Grimes #	define	GOODFLAGS(f)	((f)&~REG_DUMP)
20558f0484fSRodney W. Grimes #endif
20658f0484fSRodney W. Grimes 
20758f0484fSRodney W. Grimes 	cflags = GOODFLAGS(cflags);
20858f0484fSRodney W. Grimes 	if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
20958f0484fSRodney W. Grimes 		return(REG_INVARG);
21058f0484fSRodney W. Grimes 
21158f0484fSRodney W. Grimes 	if (cflags&REG_PEND) {
21258f0484fSRodney W. Grimes 		if (preg->re_endp < pattern)
21358f0484fSRodney W. Grimes 			return(REG_INVARG);
21458f0484fSRodney W. Grimes 		len = preg->re_endp - pattern;
21558f0484fSRodney W. Grimes 	} else
21658f0484fSRodney W. Grimes 		len = strlen((char *)pattern);
21758f0484fSRodney W. Grimes 
21858f0484fSRodney W. Grimes 	/* do the mallocs early so failure handling is easy */
21958f0484fSRodney W. Grimes 	g = (struct re_guts *)malloc(sizeof(struct re_guts) +
22058f0484fSRodney W. Grimes 							(NC-1)*sizeof(cat_t));
22158f0484fSRodney W. Grimes 	if (g == NULL)
22258f0484fSRodney W. Grimes 		return(REG_ESPACE);
22358f0484fSRodney W. Grimes 	p->ssize = len/(size_t)2*(size_t)3 + (size_t)1;	/* ugh */
22458f0484fSRodney W. Grimes 	p->strip = (sop *)malloc(p->ssize * sizeof(sop));
22558f0484fSRodney W. Grimes 	p->slen = 0;
22658f0484fSRodney W. Grimes 	if (p->strip == NULL) {
22758f0484fSRodney W. Grimes 		free((char *)g);
22858f0484fSRodney W. Grimes 		return(REG_ESPACE);
22958f0484fSRodney W. Grimes 	}
23058f0484fSRodney W. Grimes 
23158f0484fSRodney W. Grimes 	/* set things up */
23258f0484fSRodney W. Grimes 	p->g = g;
23358f0484fSRodney W. Grimes 	p->next = (char *)pattern;	/* convenience; we do not modify it */
23458f0484fSRodney W. Grimes 	p->end = p->next + len;
23558f0484fSRodney W. Grimes 	p->error = 0;
23658f0484fSRodney W. Grimes 	p->ncsalloc = 0;
23758f0484fSRodney W. Grimes 	for (i = 0; i < NPAREN; i++) {
23858f0484fSRodney W. Grimes 		p->pbegin[i] = 0;
23958f0484fSRodney W. Grimes 		p->pend[i] = 0;
24058f0484fSRodney W. Grimes 	}
24158f0484fSRodney W. Grimes 	g->csetsize = NC;
24258f0484fSRodney W. Grimes 	g->sets = NULL;
24358f0484fSRodney W. Grimes 	g->setbits = NULL;
24458f0484fSRodney W. Grimes 	g->ncsets = 0;
24558f0484fSRodney W. Grimes 	g->cflags = cflags;
24658f0484fSRodney W. Grimes 	g->iflags = 0;
24758f0484fSRodney W. Grimes 	g->nbol = 0;
24858f0484fSRodney W. Grimes 	g->neol = 0;
24958f0484fSRodney W. Grimes 	g->must = NULL;
250e6a886d8SDaniel C. Sobral 	g->moffset = -1;
2516b709b74SDaniel C. Sobral 	g->charjump = NULL;
2526b709b74SDaniel C. Sobral 	g->matchjump = NULL;
25358f0484fSRodney W. Grimes 	g->mlen = 0;
25458f0484fSRodney W. Grimes 	g->nsub = 0;
25558f0484fSRodney W. Grimes 	g->ncategories = 1;	/* category 0 is "everything else" */
25658f0484fSRodney W. Grimes 	g->categories = &g->catspace[-(CHAR_MIN)];
25758f0484fSRodney W. Grimes 	(void) memset((char *)g->catspace, 0, NC*sizeof(cat_t));
25858f0484fSRodney W. Grimes 	g->backrefs = 0;
25958f0484fSRodney W. Grimes 
26058f0484fSRodney W. Grimes 	/* do it */
26158f0484fSRodney W. Grimes 	EMIT(OEND, 0);
26258f0484fSRodney W. Grimes 	g->firststate = THERE();
26358f0484fSRodney W. Grimes 	if (cflags&REG_EXTENDED)
26458f0484fSRodney W. Grimes 		p_ere(p, OUT);
26558f0484fSRodney W. Grimes 	else if (cflags&REG_NOSPEC)
26658f0484fSRodney W. Grimes 		p_str(p);
26758f0484fSRodney W. Grimes 	else
26858f0484fSRodney W. Grimes 		p_bre(p, OUT, OUT);
26958f0484fSRodney W. Grimes 	EMIT(OEND, 0);
27058f0484fSRodney W. Grimes 	g->laststate = THERE();
27158f0484fSRodney W. Grimes 
27258f0484fSRodney W. Grimes 	/* tidy up loose ends and fill things in */
27358f0484fSRodney W. Grimes 	categorize(p, g);
27458f0484fSRodney W. Grimes 	stripsnug(p, g);
27558f0484fSRodney W. Grimes 	findmust(p, g);
2766049d9f0SDaniel C. Sobral 	/* only use Boyer-Moore algorithm if the pattern is bigger
2776049d9f0SDaniel C. Sobral 	 * than three characters
2786049d9f0SDaniel C. Sobral 	 */
2796049d9f0SDaniel C. Sobral 	if(g->mlen > 3) {
2806049d9f0SDaniel C. Sobral 		computejumps(p, g);
2816049d9f0SDaniel C. Sobral 		computematchjumps(p, g);
2824d41cae8SDaniel C. Sobral 		if(g->matchjump == NULL && g->charjump != NULL) {
2836049d9f0SDaniel C. Sobral 			free(g->charjump);
2846049d9f0SDaniel C. Sobral 			g->charjump = NULL;
2856049d9f0SDaniel C. Sobral 		}
2866049d9f0SDaniel C. Sobral 	}
28758f0484fSRodney W. Grimes 	g->nplus = pluscount(p, g);
28858f0484fSRodney W. Grimes 	g->magic = MAGIC2;
28958f0484fSRodney W. Grimes 	preg->re_nsub = g->nsub;
29058f0484fSRodney W. Grimes 	preg->re_g = g;
29158f0484fSRodney W. Grimes 	preg->re_magic = MAGIC1;
29258f0484fSRodney W. Grimes #ifndef REDEBUG
29358f0484fSRodney W. Grimes 	/* not debugging, so can't rely on the assert() in regexec() */
29458f0484fSRodney W. Grimes 	if (g->iflags&BAD)
29558f0484fSRodney W. Grimes 		SETERROR(REG_ASSERT);
29658f0484fSRodney W. Grimes #endif
29758f0484fSRodney W. Grimes 
29858f0484fSRodney W. Grimes 	/* win or lose, we're done */
29958f0484fSRodney W. Grimes 	if (p->error != 0)	/* lose */
30058f0484fSRodney W. Grimes 		regfree(preg);
30158f0484fSRodney W. Grimes 	return(p->error);
30258f0484fSRodney W. Grimes }
30358f0484fSRodney W. Grimes 
30458f0484fSRodney W. Grimes /*
30558f0484fSRodney W. Grimes  - p_ere - ERE parser top level, concatenation and alternation
3068fb3f3f6SDavid E. O'Brien  == static void p_ere(struct parse *p, int stop);
30758f0484fSRodney W. Grimes  */
30858f0484fSRodney W. Grimes static void
30958f0484fSRodney W. Grimes p_ere(p, stop)
3108fb3f3f6SDavid E. O'Brien struct parse *p;
31158f0484fSRodney W. Grimes int stop;			/* character this ERE should end at */
31258f0484fSRodney W. Grimes {
3138fb3f3f6SDavid E. O'Brien 	char c;
3148fb3f3f6SDavid E. O'Brien 	sopno prevback;
3158fb3f3f6SDavid E. O'Brien 	sopno prevfwd;
3168fb3f3f6SDavid E. O'Brien 	sopno conc;
3178fb3f3f6SDavid E. O'Brien 	int first = 1;		/* is this the first alternative? */
31858f0484fSRodney W. Grimes 
31958f0484fSRodney W. Grimes 	for (;;) {
32058f0484fSRodney W. Grimes 		/* do a bunch of concatenated expressions */
32158f0484fSRodney W. Grimes 		conc = HERE();
32258f0484fSRodney W. Grimes 		while (MORE() && (c = PEEK()) != '|' && c != stop)
32358f0484fSRodney W. Grimes 			p_ere_exp(p);
32451295a4dSJordan K. Hubbard 		(void)REQUIRE(HERE() != conc, REG_EMPTY);	/* require nonempty */
32558f0484fSRodney W. Grimes 
32658f0484fSRodney W. Grimes 		if (!EAT('|'))
32758f0484fSRodney W. Grimes 			break;		/* NOTE BREAK OUT */
32858f0484fSRodney W. Grimes 
32958f0484fSRodney W. Grimes 		if (first) {
33058f0484fSRodney W. Grimes 			INSERT(OCH_, conc);	/* offset is wrong */
33158f0484fSRodney W. Grimes 			prevfwd = conc;
33258f0484fSRodney W. Grimes 			prevback = conc;
33358f0484fSRodney W. Grimes 			first = 0;
33458f0484fSRodney W. Grimes 		}
33558f0484fSRodney W. Grimes 		ASTERN(OOR1, prevback);
33658f0484fSRodney W. Grimes 		prevback = THERE();
33758f0484fSRodney W. Grimes 		AHEAD(prevfwd);			/* fix previous offset */
33858f0484fSRodney W. Grimes 		prevfwd = HERE();
33958f0484fSRodney W. Grimes 		EMIT(OOR2, 0);			/* offset is very wrong */
34058f0484fSRodney W. Grimes 	}
34158f0484fSRodney W. Grimes 
34258f0484fSRodney W. Grimes 	if (!first) {		/* tail-end fixups */
34358f0484fSRodney W. Grimes 		AHEAD(prevfwd);
34458f0484fSRodney W. Grimes 		ASTERN(O_CH, prevback);
34558f0484fSRodney W. Grimes 	}
34658f0484fSRodney W. Grimes 
34758f0484fSRodney W. Grimes 	assert(!MORE() || SEE(stop));
34858f0484fSRodney W. Grimes }
34958f0484fSRodney W. Grimes 
35058f0484fSRodney W. Grimes /*
35158f0484fSRodney W. Grimes  - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
3528fb3f3f6SDavid E. O'Brien  == static void p_ere_exp(struct parse *p);
35358f0484fSRodney W. Grimes  */
35458f0484fSRodney W. Grimes static void
35558f0484fSRodney W. Grimes p_ere_exp(p)
3568fb3f3f6SDavid E. O'Brien struct parse *p;
35758f0484fSRodney W. Grimes {
3588fb3f3f6SDavid E. O'Brien 	char c;
3598fb3f3f6SDavid E. O'Brien 	sopno pos;
3608fb3f3f6SDavid E. O'Brien 	int count;
3618fb3f3f6SDavid E. O'Brien 	int count2;
3628fb3f3f6SDavid E. O'Brien 	sopno subno;
36358f0484fSRodney W. Grimes 	int wascaret = 0;
36458f0484fSRodney W. Grimes 
36558f0484fSRodney W. Grimes 	assert(MORE());		/* caller should have ensured this */
36658f0484fSRodney W. Grimes 	c = GETNEXT();
36758f0484fSRodney W. Grimes 
36858f0484fSRodney W. Grimes 	pos = HERE();
36958f0484fSRodney W. Grimes 	switch (c) {
37058f0484fSRodney W. Grimes 	case '(':
37151295a4dSJordan K. Hubbard 		(void)REQUIRE(MORE(), REG_EPAREN);
37258f0484fSRodney W. Grimes 		p->g->nsub++;
37358f0484fSRodney W. Grimes 		subno = p->g->nsub;
37458f0484fSRodney W. Grimes 		if (subno < NPAREN)
37558f0484fSRodney W. Grimes 			p->pbegin[subno] = HERE();
37658f0484fSRodney W. Grimes 		EMIT(OLPAREN, subno);
37758f0484fSRodney W. Grimes 		if (!SEE(')'))
37858f0484fSRodney W. Grimes 			p_ere(p, ')');
37958f0484fSRodney W. Grimes 		if (subno < NPAREN) {
38058f0484fSRodney W. Grimes 			p->pend[subno] = HERE();
38158f0484fSRodney W. Grimes 			assert(p->pend[subno] != 0);
38258f0484fSRodney W. Grimes 		}
38358f0484fSRodney W. Grimes 		EMIT(ORPAREN, subno);
38451295a4dSJordan K. Hubbard 		(void)MUSTEAT(')', REG_EPAREN);
38558f0484fSRodney W. Grimes 		break;
38658f0484fSRodney W. Grimes #ifndef POSIX_MISTAKE
38758f0484fSRodney W. Grimes 	case ')':		/* happens only if no current unmatched ( */
38858f0484fSRodney W. Grimes 		/*
38958f0484fSRodney W. Grimes 		 * You may ask, why the ifndef?  Because I didn't notice
39058f0484fSRodney W. Grimes 		 * this until slightly too late for 1003.2, and none of the
39158f0484fSRodney W. Grimes 		 * other 1003.2 regular-expression reviewers noticed it at
39258f0484fSRodney W. Grimes 		 * all.  So an unmatched ) is legal POSIX, at least until
39358f0484fSRodney W. Grimes 		 * we can get it fixed.
39458f0484fSRodney W. Grimes 		 */
39558f0484fSRodney W. Grimes 		SETERROR(REG_EPAREN);
39658f0484fSRodney W. Grimes 		break;
39758f0484fSRodney W. Grimes #endif
39858f0484fSRodney W. Grimes 	case '^':
39958f0484fSRodney W. Grimes 		EMIT(OBOL, 0);
40058f0484fSRodney W. Grimes 		p->g->iflags |= USEBOL;
40158f0484fSRodney W. Grimes 		p->g->nbol++;
40258f0484fSRodney W. Grimes 		wascaret = 1;
40358f0484fSRodney W. Grimes 		break;
40458f0484fSRodney W. Grimes 	case '$':
40558f0484fSRodney W. Grimes 		EMIT(OEOL, 0);
40658f0484fSRodney W. Grimes 		p->g->iflags |= USEEOL;
40758f0484fSRodney W. Grimes 		p->g->neol++;
40858f0484fSRodney W. Grimes 		break;
40958f0484fSRodney W. Grimes 	case '|':
41058f0484fSRodney W. Grimes 		SETERROR(REG_EMPTY);
41158f0484fSRodney W. Grimes 		break;
41258f0484fSRodney W. Grimes 	case '*':
41358f0484fSRodney W. Grimes 	case '+':
41458f0484fSRodney W. Grimes 	case '?':
41558f0484fSRodney W. Grimes 		SETERROR(REG_BADRPT);
41658f0484fSRodney W. Grimes 		break;
41758f0484fSRodney W. Grimes 	case '.':
41858f0484fSRodney W. Grimes 		if (p->g->cflags&REG_NEWLINE)
41958f0484fSRodney W. Grimes 			nonnewline(p);
42058f0484fSRodney W. Grimes 		else
42158f0484fSRodney W. Grimes 			EMIT(OANY, 0);
42258f0484fSRodney W. Grimes 		break;
42358f0484fSRodney W. Grimes 	case '[':
42458f0484fSRodney W. Grimes 		p_bracket(p);
42558f0484fSRodney W. Grimes 		break;
42658f0484fSRodney W. Grimes 	case '\\':
42751295a4dSJordan K. Hubbard 		(void)REQUIRE(MORE(), REG_EESCAPE);
42858f0484fSRodney W. Grimes 		c = GETNEXT();
42958f0484fSRodney W. Grimes 		ordinary(p, c);
43058f0484fSRodney W. Grimes 		break;
43158f0484fSRodney W. Grimes 	case '{':		/* okay as ordinary except if digit follows */
43289b86d02SAndrey A. Chernov 		(void)REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT);
43358f0484fSRodney W. Grimes 		/* FALLTHROUGH */
43458f0484fSRodney W. Grimes 	default:
43558f0484fSRodney W. Grimes 		ordinary(p, c);
43658f0484fSRodney W. Grimes 		break;
43758f0484fSRodney W. Grimes 	}
43858f0484fSRodney W. Grimes 
43958f0484fSRodney W. Grimes 	if (!MORE())
44058f0484fSRodney W. Grimes 		return;
44158f0484fSRodney W. Grimes 	c = PEEK();
44258f0484fSRodney W. Grimes 	/* we call { a repetition if followed by a digit */
44358f0484fSRodney W. Grimes 	if (!( c == '*' || c == '+' || c == '?' ||
44489b86d02SAndrey A. Chernov 				(c == '{' && MORE2() && isdigit((uch)PEEK2())) ))
44558f0484fSRodney W. Grimes 		return;		/* no repetition, we're done */
44658f0484fSRodney W. Grimes 	NEXT();
44758f0484fSRodney W. Grimes 
44851295a4dSJordan K. Hubbard 	(void)REQUIRE(!wascaret, REG_BADRPT);
44958f0484fSRodney W. Grimes 	switch (c) {
45058f0484fSRodney W. Grimes 	case '*':	/* implemented as +? */
45158f0484fSRodney W. Grimes 		/* this case does not require the (y|) trick, noKLUDGE */
45258f0484fSRodney W. Grimes 		INSERT(OPLUS_, pos);
45358f0484fSRodney W. Grimes 		ASTERN(O_PLUS, pos);
45458f0484fSRodney W. Grimes 		INSERT(OQUEST_, pos);
45558f0484fSRodney W. Grimes 		ASTERN(O_QUEST, pos);
45658f0484fSRodney W. Grimes 		break;
45758f0484fSRodney W. Grimes 	case '+':
45858f0484fSRodney W. Grimes 		INSERT(OPLUS_, pos);
45958f0484fSRodney W. Grimes 		ASTERN(O_PLUS, pos);
46058f0484fSRodney W. Grimes 		break;
46158f0484fSRodney W. Grimes 	case '?':
46258f0484fSRodney W. Grimes 		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
46358f0484fSRodney W. Grimes 		INSERT(OCH_, pos);		/* offset slightly wrong */
46458f0484fSRodney W. Grimes 		ASTERN(OOR1, pos);		/* this one's right */
46558f0484fSRodney W. Grimes 		AHEAD(pos);			/* fix the OCH_ */
46658f0484fSRodney W. Grimes 		EMIT(OOR2, 0);			/* offset very wrong... */
46758f0484fSRodney W. Grimes 		AHEAD(THERE());			/* ...so fix it */
46858f0484fSRodney W. Grimes 		ASTERN(O_CH, THERETHERE());
46958f0484fSRodney W. Grimes 		break;
47058f0484fSRodney W. Grimes 	case '{':
47158f0484fSRodney W. Grimes 		count = p_count(p);
47258f0484fSRodney W. Grimes 		if (EAT(',')) {
47389b86d02SAndrey A. Chernov 			if (isdigit((uch)PEEK())) {
47458f0484fSRodney W. Grimes 				count2 = p_count(p);
47551295a4dSJordan K. Hubbard 				(void)REQUIRE(count <= count2, REG_BADBR);
47658f0484fSRodney W. Grimes 			} else		/* single number with comma */
47758f0484fSRodney W. Grimes 				count2 = INFINITY;
47858f0484fSRodney W. Grimes 		} else		/* just a single number */
47958f0484fSRodney W. Grimes 			count2 = count;
48058f0484fSRodney W. Grimes 		repeat(p, pos, count, count2);
48158f0484fSRodney W. Grimes 		if (!EAT('}')) {	/* error heuristics */
48258f0484fSRodney W. Grimes 			while (MORE() && PEEK() != '}')
48358f0484fSRodney W. Grimes 				NEXT();
48451295a4dSJordan K. Hubbard 			(void)REQUIRE(MORE(), REG_EBRACE);
48558f0484fSRodney W. Grimes 			SETERROR(REG_BADBR);
48658f0484fSRodney W. Grimes 		}
48758f0484fSRodney W. Grimes 		break;
48858f0484fSRodney W. Grimes 	}
48958f0484fSRodney W. Grimes 
49058f0484fSRodney W. Grimes 	if (!MORE())
49158f0484fSRodney W. Grimes 		return;
49258f0484fSRodney W. Grimes 	c = PEEK();
49358f0484fSRodney W. Grimes 	if (!( c == '*' || c == '+' || c == '?' ||
49489b86d02SAndrey A. Chernov 				(c == '{' && MORE2() && isdigit((uch)PEEK2())) ) )
49558f0484fSRodney W. Grimes 		return;
49658f0484fSRodney W. Grimes 	SETERROR(REG_BADRPT);
49758f0484fSRodney W. Grimes }
49858f0484fSRodney W. Grimes 
49958f0484fSRodney W. Grimes /*
50058f0484fSRodney W. Grimes  - p_str - string (no metacharacters) "parser"
5018fb3f3f6SDavid E. O'Brien  == static void p_str(struct parse *p);
50258f0484fSRodney W. Grimes  */
50358f0484fSRodney W. Grimes static void
50458f0484fSRodney W. Grimes p_str(p)
5058fb3f3f6SDavid E. O'Brien struct parse *p;
50658f0484fSRodney W. Grimes {
50751295a4dSJordan K. Hubbard 	(void)REQUIRE(MORE(), REG_EMPTY);
50858f0484fSRodney W. Grimes 	while (MORE())
50958f0484fSRodney W. Grimes 		ordinary(p, GETNEXT());
51058f0484fSRodney W. Grimes }
51158f0484fSRodney W. Grimes 
51258f0484fSRodney W. Grimes /*
51358f0484fSRodney W. Grimes  - p_bre - BRE parser top level, anchoring and concatenation
5148fb3f3f6SDavid E. O'Brien  == static void p_bre(struct parse *p, int end1, \
5158fb3f3f6SDavid E. O'Brien  ==	int end2);
51658f0484fSRodney W. Grimes  * Giving end1 as OUT essentially eliminates the end1/end2 check.
51758f0484fSRodney W. Grimes  *
51858f0484fSRodney W. Grimes  * This implementation is a bit of a kludge, in that a trailing $ is first
51958f0484fSRodney W. Grimes  * taken as an ordinary character and then revised to be an anchor.  The
52058f0484fSRodney W. Grimes  * only undesirable side effect is that '$' gets included as a character
52158f0484fSRodney W. Grimes  * category in such cases.  This is fairly harmless; not worth fixing.
52258f0484fSRodney W. Grimes  * The amount of lookahead needed to avoid this kludge is excessive.
52358f0484fSRodney W. Grimes  */
52458f0484fSRodney W. Grimes static void
52558f0484fSRodney W. Grimes p_bre(p, end1, end2)
5268fb3f3f6SDavid E. O'Brien struct parse *p;
5278fb3f3f6SDavid E. O'Brien int end1;			/* first terminating character */
5288fb3f3f6SDavid E. O'Brien int end2;			/* second terminating character */
52958f0484fSRodney W. Grimes {
5308fb3f3f6SDavid E. O'Brien 	sopno start = HERE();
5318fb3f3f6SDavid E. O'Brien 	int first = 1;			/* first subexpression? */
5328fb3f3f6SDavid E. O'Brien 	int wasdollar = 0;
53358f0484fSRodney W. Grimes 
53458f0484fSRodney W. Grimes 	if (EAT('^')) {
53558f0484fSRodney W. Grimes 		EMIT(OBOL, 0);
53658f0484fSRodney W. Grimes 		p->g->iflags |= USEBOL;
53758f0484fSRodney W. Grimes 		p->g->nbol++;
53858f0484fSRodney W. Grimes 	}
53958f0484fSRodney W. Grimes 	while (MORE() && !SEETWO(end1, end2)) {
54058f0484fSRodney W. Grimes 		wasdollar = p_simp_re(p, first);
54158f0484fSRodney W. Grimes 		first = 0;
54258f0484fSRodney W. Grimes 	}
54358f0484fSRodney W. Grimes 	if (wasdollar) {	/* oops, that was a trailing anchor */
54458f0484fSRodney W. Grimes 		DROP(1);
54558f0484fSRodney W. Grimes 		EMIT(OEOL, 0);
54658f0484fSRodney W. Grimes 		p->g->iflags |= USEEOL;
54758f0484fSRodney W. Grimes 		p->g->neol++;
54858f0484fSRodney W. Grimes 	}
54958f0484fSRodney W. Grimes 
55051295a4dSJordan K. Hubbard 	(void)REQUIRE(HERE() != start, REG_EMPTY);	/* require nonempty */
55158f0484fSRodney W. Grimes }
55258f0484fSRodney W. Grimes 
55358f0484fSRodney W. Grimes /*
55458f0484fSRodney W. Grimes  - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
5558fb3f3f6SDavid E. O'Brien  == static int p_simp_re(struct parse *p, int starordinary);
55658f0484fSRodney W. Grimes  */
55758f0484fSRodney W. Grimes static int			/* was the simple RE an unbackslashed $? */
55858f0484fSRodney W. Grimes p_simp_re(p, starordinary)
5598fb3f3f6SDavid E. O'Brien struct parse *p;
56058f0484fSRodney W. Grimes int starordinary;		/* is a leading * an ordinary character? */
56158f0484fSRodney W. Grimes {
5628fb3f3f6SDavid E. O'Brien 	int c;
5638fb3f3f6SDavid E. O'Brien 	int count;
5648fb3f3f6SDavid E. O'Brien 	int count2;
5658fb3f3f6SDavid E. O'Brien 	sopno pos;
5668fb3f3f6SDavid E. O'Brien 	int i;
5678fb3f3f6SDavid E. O'Brien 	sopno subno;
56858f0484fSRodney W. Grimes #	define	BACKSL	(1<<CHAR_BIT)
56958f0484fSRodney W. Grimes 
57058f0484fSRodney W. Grimes 	pos = HERE();		/* repetion op, if any, covers from here */
57158f0484fSRodney W. Grimes 
57258f0484fSRodney W. Grimes 	assert(MORE());		/* caller should have ensured this */
57358f0484fSRodney W. Grimes 	c = GETNEXT();
57458f0484fSRodney W. Grimes 	if (c == '\\') {
57551295a4dSJordan K. Hubbard 		(void)REQUIRE(MORE(), REG_EESCAPE);
57689b86d02SAndrey A. Chernov 		c = BACKSL | GETNEXT();
57758f0484fSRodney W. Grimes 	}
57858f0484fSRodney W. Grimes 	switch (c) {
57958f0484fSRodney W. Grimes 	case '.':
58058f0484fSRodney W. Grimes 		if (p->g->cflags&REG_NEWLINE)
58158f0484fSRodney W. Grimes 			nonnewline(p);
58258f0484fSRodney W. Grimes 		else
58358f0484fSRodney W. Grimes 			EMIT(OANY, 0);
58458f0484fSRodney W. Grimes 		break;
58558f0484fSRodney W. Grimes 	case '[':
58658f0484fSRodney W. Grimes 		p_bracket(p);
58758f0484fSRodney W. Grimes 		break;
58858f0484fSRodney W. Grimes 	case BACKSL|'{':
58958f0484fSRodney W. Grimes 		SETERROR(REG_BADRPT);
59058f0484fSRodney W. Grimes 		break;
59158f0484fSRodney W. Grimes 	case BACKSL|'(':
59258f0484fSRodney W. Grimes 		p->g->nsub++;
59358f0484fSRodney W. Grimes 		subno = p->g->nsub;
59458f0484fSRodney W. Grimes 		if (subno < NPAREN)
59558f0484fSRodney W. Grimes 			p->pbegin[subno] = HERE();
59658f0484fSRodney W. Grimes 		EMIT(OLPAREN, subno);
59758f0484fSRodney W. Grimes 		/* the MORE here is an error heuristic */
59858f0484fSRodney W. Grimes 		if (MORE() && !SEETWO('\\', ')'))
59958f0484fSRodney W. Grimes 			p_bre(p, '\\', ')');
60058f0484fSRodney W. Grimes 		if (subno < NPAREN) {
60158f0484fSRodney W. Grimes 			p->pend[subno] = HERE();
60258f0484fSRodney W. Grimes 			assert(p->pend[subno] != 0);
60358f0484fSRodney W. Grimes 		}
60458f0484fSRodney W. Grimes 		EMIT(ORPAREN, subno);
60551295a4dSJordan K. Hubbard 		(void)REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
60658f0484fSRodney W. Grimes 		break;
60758f0484fSRodney W. Grimes 	case BACKSL|')':	/* should not get here -- must be user */
60858f0484fSRodney W. Grimes 	case BACKSL|'}':
60958f0484fSRodney W. Grimes 		SETERROR(REG_EPAREN);
61058f0484fSRodney W. Grimes 		break;
61158f0484fSRodney W. Grimes 	case BACKSL|'1':
61258f0484fSRodney W. Grimes 	case BACKSL|'2':
61358f0484fSRodney W. Grimes 	case BACKSL|'3':
61458f0484fSRodney W. Grimes 	case BACKSL|'4':
61558f0484fSRodney W. Grimes 	case BACKSL|'5':
61658f0484fSRodney W. Grimes 	case BACKSL|'6':
61758f0484fSRodney W. Grimes 	case BACKSL|'7':
61858f0484fSRodney W. Grimes 	case BACKSL|'8':
61958f0484fSRodney W. Grimes 	case BACKSL|'9':
62058f0484fSRodney W. Grimes 		i = (c&~BACKSL) - '0';
62158f0484fSRodney W. Grimes 		assert(i < NPAREN);
62258f0484fSRodney W. Grimes 		if (p->pend[i] != 0) {
62358f0484fSRodney W. Grimes 			assert(i <= p->g->nsub);
62458f0484fSRodney W. Grimes 			EMIT(OBACK_, i);
62558f0484fSRodney W. Grimes 			assert(p->pbegin[i] != 0);
62658f0484fSRodney W. Grimes 			assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
62758f0484fSRodney W. Grimes 			assert(OP(p->strip[p->pend[i]]) == ORPAREN);
62858f0484fSRodney W. Grimes 			(void) dupl(p, p->pbegin[i]+1, p->pend[i]);
62958f0484fSRodney W. Grimes 			EMIT(O_BACK, i);
63058f0484fSRodney W. Grimes 		} else
63158f0484fSRodney W. Grimes 			SETERROR(REG_ESUBREG);
63258f0484fSRodney W. Grimes 		p->g->backrefs = 1;
63358f0484fSRodney W. Grimes 		break;
63458f0484fSRodney W. Grimes 	case '*':
63551295a4dSJordan K. Hubbard 		(void)REQUIRE(starordinary, REG_BADRPT);
63658f0484fSRodney W. Grimes 		/* FALLTHROUGH */
63758f0484fSRodney W. Grimes 	default:
63889b86d02SAndrey A. Chernov 		ordinary(p, (char)c);
63958f0484fSRodney W. Grimes 		break;
64058f0484fSRodney W. Grimes 	}
64158f0484fSRodney W. Grimes 
64258f0484fSRodney W. Grimes 	if (EAT('*')) {		/* implemented as +? */
64358f0484fSRodney W. Grimes 		/* this case does not require the (y|) trick, noKLUDGE */
64458f0484fSRodney W. Grimes 		INSERT(OPLUS_, pos);
64558f0484fSRodney W. Grimes 		ASTERN(O_PLUS, pos);
64658f0484fSRodney W. Grimes 		INSERT(OQUEST_, pos);
64758f0484fSRodney W. Grimes 		ASTERN(O_QUEST, pos);
64858f0484fSRodney W. Grimes 	} else if (EATTWO('\\', '{')) {
64958f0484fSRodney W. Grimes 		count = p_count(p);
65058f0484fSRodney W. Grimes 		if (EAT(',')) {
65189b86d02SAndrey A. Chernov 			if (MORE() && isdigit((uch)PEEK())) {
65258f0484fSRodney W. Grimes 				count2 = p_count(p);
65351295a4dSJordan K. Hubbard 				(void)REQUIRE(count <= count2, REG_BADBR);
65458f0484fSRodney W. Grimes 			} else		/* single number with comma */
65558f0484fSRodney W. Grimes 				count2 = INFINITY;
65658f0484fSRodney W. Grimes 		} else		/* just a single number */
65758f0484fSRodney W. Grimes 			count2 = count;
65858f0484fSRodney W. Grimes 		repeat(p, pos, count, count2);
65958f0484fSRodney W. Grimes 		if (!EATTWO('\\', '}')) {	/* error heuristics */
66058f0484fSRodney W. Grimes 			while (MORE() && !SEETWO('\\', '}'))
66158f0484fSRodney W. Grimes 				NEXT();
66251295a4dSJordan K. Hubbard 			(void)REQUIRE(MORE(), REG_EBRACE);
66358f0484fSRodney W. Grimes 			SETERROR(REG_BADBR);
66458f0484fSRodney W. Grimes 		}
66589b86d02SAndrey A. Chernov 	} else if (c == '$')     /* $ (but not \$) ends it */
66658f0484fSRodney W. Grimes 		return(1);
66758f0484fSRodney W. Grimes 
66858f0484fSRodney W. Grimes 	return(0);
66958f0484fSRodney W. Grimes }
67058f0484fSRodney W. Grimes 
67158f0484fSRodney W. Grimes /*
67258f0484fSRodney W. Grimes  - p_count - parse a repetition count
6738fb3f3f6SDavid E. O'Brien  == static int p_count(struct parse *p);
67458f0484fSRodney W. Grimes  */
67558f0484fSRodney W. Grimes static int			/* the value */
67658f0484fSRodney W. Grimes p_count(p)
6778fb3f3f6SDavid E. O'Brien struct parse *p;
67858f0484fSRodney W. Grimes {
6798fb3f3f6SDavid E. O'Brien 	int count = 0;
6808fb3f3f6SDavid E. O'Brien 	int ndigits = 0;
68158f0484fSRodney W. Grimes 
68289b86d02SAndrey A. Chernov 	while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) {
68358f0484fSRodney W. Grimes 		count = count*10 + (GETNEXT() - '0');
68458f0484fSRodney W. Grimes 		ndigits++;
68558f0484fSRodney W. Grimes 	}
68658f0484fSRodney W. Grimes 
68751295a4dSJordan K. Hubbard 	(void)REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
68858f0484fSRodney W. Grimes 	return(count);
68958f0484fSRodney W. Grimes }
69058f0484fSRodney W. Grimes 
69158f0484fSRodney W. Grimes /*
69258f0484fSRodney W. Grimes  - p_bracket - parse a bracketed character list
6938fb3f3f6SDavid E. O'Brien  == static void p_bracket(struct parse *p);
69458f0484fSRodney W. Grimes  *
69558f0484fSRodney W. Grimes  * Note a significant property of this code:  if the allocset() did SETERROR,
69658f0484fSRodney W. Grimes  * no set operations are done.
69758f0484fSRodney W. Grimes  */
69858f0484fSRodney W. Grimes static void
69958f0484fSRodney W. Grimes p_bracket(p)
7008fb3f3f6SDavid E. O'Brien struct parse *p;
70158f0484fSRodney W. Grimes {
7028fb3f3f6SDavid E. O'Brien 	cset *cs = allocset(p);
7038fb3f3f6SDavid E. O'Brien 	int invert = 0;
70458f0484fSRodney W. Grimes 
70558f0484fSRodney W. Grimes 	/* Dept of Truly Sickening Special-Case Kludges */
70658f0484fSRodney W. Grimes 	if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
70758f0484fSRodney W. Grimes 		EMIT(OBOW, 0);
70858f0484fSRodney W. Grimes 		NEXTn(6);
70958f0484fSRodney W. Grimes 		return;
71058f0484fSRodney W. Grimes 	}
71158f0484fSRodney W. Grimes 	if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
71258f0484fSRodney W. Grimes 		EMIT(OEOW, 0);
71358f0484fSRodney W. Grimes 		NEXTn(6);
71458f0484fSRodney W. Grimes 		return;
71558f0484fSRodney W. Grimes 	}
71658f0484fSRodney W. Grimes 
71758f0484fSRodney W. Grimes 	if (EAT('^'))
71858f0484fSRodney W. Grimes 		invert++;	/* make note to invert set at end */
71958f0484fSRodney W. Grimes 	if (EAT(']'))
72058f0484fSRodney W. Grimes 		CHadd(cs, ']');
72158f0484fSRodney W. Grimes 	else if (EAT('-'))
72258f0484fSRodney W. Grimes 		CHadd(cs, '-');
72358f0484fSRodney W. Grimes 	while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
72458f0484fSRodney W. Grimes 		p_b_term(p, cs);
72558f0484fSRodney W. Grimes 	if (EAT('-'))
72658f0484fSRodney W. Grimes 		CHadd(cs, '-');
72751295a4dSJordan K. Hubbard 	(void)MUSTEAT(']', REG_EBRACK);
72858f0484fSRodney W. Grimes 
72958f0484fSRodney W. Grimes 	if (p->error != 0)	/* don't mess things up further */
73058f0484fSRodney W. Grimes 		return;
73158f0484fSRodney W. Grimes 
73258f0484fSRodney W. Grimes 	if (p->g->cflags&REG_ICASE) {
7338fb3f3f6SDavid E. O'Brien 		int i;
7348fb3f3f6SDavid E. O'Brien 		int ci;
73558f0484fSRodney W. Grimes 
73658f0484fSRodney W. Grimes 		for (i = p->g->csetsize - 1; i >= 0; i--)
73758f0484fSRodney W. Grimes 			if (CHIN(cs, i) && isalpha(i)) {
73858f0484fSRodney W. Grimes 				ci = othercase(i);
73958f0484fSRodney W. Grimes 				if (ci != i)
74058f0484fSRodney W. Grimes 					CHadd(cs, ci);
74158f0484fSRodney W. Grimes 			}
74258f0484fSRodney W. Grimes 		if (cs->multis != NULL)
74358f0484fSRodney W. Grimes 			mccase(p, cs);
74458f0484fSRodney W. Grimes 	}
74558f0484fSRodney W. Grimes 	if (invert) {
7468fb3f3f6SDavid E. O'Brien 		int i;
74758f0484fSRodney W. Grimes 
74858f0484fSRodney W. Grimes 		for (i = p->g->csetsize - 1; i >= 0; i--)
74958f0484fSRodney W. Grimes 			if (CHIN(cs, i))
75058f0484fSRodney W. Grimes 				CHsub(cs, i);
75158f0484fSRodney W. Grimes 			else
75258f0484fSRodney W. Grimes 				CHadd(cs, i);
75358f0484fSRodney W. Grimes 		if (p->g->cflags&REG_NEWLINE)
75458f0484fSRodney W. Grimes 			CHsub(cs, '\n');
75558f0484fSRodney W. Grimes 		if (cs->multis != NULL)
75658f0484fSRodney W. Grimes 			mcinvert(p, cs);
75758f0484fSRodney W. Grimes 	}
75858f0484fSRodney W. Grimes 
75958f0484fSRodney W. Grimes 	assert(cs->multis == NULL);		/* xxx */
76058f0484fSRodney W. Grimes 
76158f0484fSRodney W. Grimes 	if (nch(p, cs) == 1) {		/* optimize singleton sets */
76258f0484fSRodney W. Grimes 		ordinary(p, firstch(p, cs));
76358f0484fSRodney W. Grimes 		freeset(p, cs);
76458f0484fSRodney W. Grimes 	} else
76558f0484fSRodney W. Grimes 		EMIT(OANYOF, freezeset(p, cs));
76658f0484fSRodney W. Grimes }
76758f0484fSRodney W. Grimes 
76858f0484fSRodney W. Grimes /*
76958f0484fSRodney W. Grimes  - p_b_term - parse one term of a bracketed character list
7708fb3f3f6SDavid E. O'Brien  == static void p_b_term(struct parse *p, cset *cs);
77158f0484fSRodney W. Grimes  */
77258f0484fSRodney W. Grimes static void
77358f0484fSRodney W. Grimes p_b_term(p, cs)
7748fb3f3f6SDavid E. O'Brien struct parse *p;
7758fb3f3f6SDavid E. O'Brien cset *cs;
77658f0484fSRodney W. Grimes {
7778fb3f3f6SDavid E. O'Brien 	char c;
7788fb3f3f6SDavid E. O'Brien 	char start, finish;
7798fb3f3f6SDavid E. O'Brien 	int i;
78058f0484fSRodney W. Grimes 
78158f0484fSRodney W. Grimes 	/* classify what we've got */
78258f0484fSRodney W. Grimes 	switch ((MORE()) ? PEEK() : '\0') {
78358f0484fSRodney W. Grimes 	case '[':
78458f0484fSRodney W. Grimes 		c = (MORE2()) ? PEEK2() : '\0';
78558f0484fSRodney W. Grimes 		break;
78658f0484fSRodney W. Grimes 	case '-':
78758f0484fSRodney W. Grimes 		SETERROR(REG_ERANGE);
78858f0484fSRodney W. Grimes 		return;			/* NOTE RETURN */
78958f0484fSRodney W. Grimes 		break;
79058f0484fSRodney W. Grimes 	default:
79158f0484fSRodney W. Grimes 		c = '\0';
79258f0484fSRodney W. Grimes 		break;
79358f0484fSRodney W. Grimes 	}
79458f0484fSRodney W. Grimes 
79558f0484fSRodney W. Grimes 	switch (c) {
79658f0484fSRodney W. Grimes 	case ':':		/* character class */
79758f0484fSRodney W. Grimes 		NEXT2();
79851295a4dSJordan K. Hubbard 		(void)REQUIRE(MORE(), REG_EBRACK);
79958f0484fSRodney W. Grimes 		c = PEEK();
80051295a4dSJordan K. Hubbard 		(void)REQUIRE(c != '-' && c != ']', REG_ECTYPE);
80158f0484fSRodney W. Grimes 		p_b_cclass(p, cs);
80251295a4dSJordan K. Hubbard 		(void)REQUIRE(MORE(), REG_EBRACK);
80351295a4dSJordan K. Hubbard 		(void)REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
80458f0484fSRodney W. Grimes 		break;
80558f0484fSRodney W. Grimes 	case '=':		/* equivalence class */
80658f0484fSRodney W. Grimes 		NEXT2();
80751295a4dSJordan K. Hubbard 		(void)REQUIRE(MORE(), REG_EBRACK);
80858f0484fSRodney W. Grimes 		c = PEEK();
80951295a4dSJordan K. Hubbard 		(void)REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
81058f0484fSRodney W. Grimes 		p_b_eclass(p, cs);
81151295a4dSJordan K. Hubbard 		(void)REQUIRE(MORE(), REG_EBRACK);
81251295a4dSJordan K. Hubbard 		(void)REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
81358f0484fSRodney W. Grimes 		break;
81458f0484fSRodney W. Grimes 	default:		/* symbol, ordinary character, or range */
81558f0484fSRodney W. Grimes /* xxx revision needed for multichar stuff */
81658f0484fSRodney W. Grimes 		start = p_b_symbol(p);
81758f0484fSRodney W. Grimes 		if (SEE('-') && MORE2() && PEEK2() != ']') {
81858f0484fSRodney W. Grimes 			/* range */
81958f0484fSRodney W. Grimes 			NEXT();
82058f0484fSRodney W. Grimes 			if (EAT('-'))
82158f0484fSRodney W. Grimes 				finish = '-';
82258f0484fSRodney W. Grimes 			else
82358f0484fSRodney W. Grimes 				finish = p_b_symbol(p);
82458f0484fSRodney W. Grimes 		} else
82558f0484fSRodney W. Grimes 			finish = start;
8265c551438SAndrey A. Chernov 		if (start == finish)
8275c551438SAndrey A. Chernov 			CHadd(cs, start);
8285c551438SAndrey A. Chernov 		else {
829b5a6eb18SAndrey A. Chernov 			if (__collate_load_error) {
830b5a6eb18SAndrey A. Chernov 				(void)REQUIRE((uch)start <= (uch)finish, REG_ERANGE);
831b5a6eb18SAndrey A. Chernov 				for (i = (uch)start; i <= (uch)finish; i++)
832b5a6eb18SAndrey A. Chernov 					CHadd(cs, i);
833b5a6eb18SAndrey A. Chernov 			} else {
834c61cea72SAndrey A. Chernov 				(void)REQUIRE(__collate_range_cmp(start, finish) <= 0, REG_ERANGE);
8355c551438SAndrey A. Chernov 				for (i = CHAR_MIN; i <= CHAR_MAX; i++) {
836c61cea72SAndrey A. Chernov 					if (   __collate_range_cmp(start, i) <= 0
837c61cea72SAndrey A. Chernov 					    && __collate_range_cmp(i, finish) <= 0
8385c551438SAndrey A. Chernov 					   )
83958f0484fSRodney W. Grimes 						CHadd(cs, i);
8405c551438SAndrey A. Chernov 				}
8415c551438SAndrey A. Chernov 			}
842b5a6eb18SAndrey A. Chernov 		}
84358f0484fSRodney W. Grimes 		break;
84458f0484fSRodney W. Grimes 	}
84558f0484fSRodney W. Grimes }
84658f0484fSRodney W. Grimes 
84758f0484fSRodney W. Grimes /*
84858f0484fSRodney W. Grimes  - p_b_cclass - parse a character-class name and deal with it
8498fb3f3f6SDavid E. O'Brien  == static void p_b_cclass(struct parse *p, cset *cs);
85058f0484fSRodney W. Grimes  */
85158f0484fSRodney W. Grimes static void
85258f0484fSRodney W. Grimes p_b_cclass(p, cs)
8538fb3f3f6SDavid E. O'Brien struct parse *p;
8548fb3f3f6SDavid E. O'Brien cset *cs;
85558f0484fSRodney W. Grimes {
8568fb3f3f6SDavid E. O'Brien 	int c;
8578fb3f3f6SDavid E. O'Brien 	char *sp = p->next;
8588fb3f3f6SDavid E. O'Brien 	struct cclass *cp;
8598fb3f3f6SDavid E. O'Brien 	size_t len;
86058f0484fSRodney W. Grimes 
861b5363c4aSAndrey A. Chernov 	while (MORE() && isalpha((uch)PEEK()))
86258f0484fSRodney W. Grimes 		NEXT();
86358f0484fSRodney W. Grimes 	len = p->next - sp;
86458f0484fSRodney W. Grimes 	for (cp = cclasses; cp->name != NULL; cp++)
86558f0484fSRodney W. Grimes 		if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
86658f0484fSRodney W. Grimes 			break;
86758f0484fSRodney W. Grimes 	if (cp->name == NULL) {
86858f0484fSRodney W. Grimes 		/* oops, didn't find it */
86958f0484fSRodney W. Grimes 		SETERROR(REG_ECTYPE);
87058f0484fSRodney W. Grimes 		return;
87158f0484fSRodney W. Grimes 	}
87258f0484fSRodney W. Grimes 
873b5363c4aSAndrey A. Chernov 	switch (cp->fidx) {
874b5363c4aSAndrey A. Chernov 	case CALNUM:
875b5363c4aSAndrey A. Chernov 		for (c = CHAR_MIN; c <= CHAR_MAX; c++)
876b5363c4aSAndrey A. Chernov 			if (isalnum((uch)c))
87758f0484fSRodney W. Grimes 				CHadd(cs, c);
878b5363c4aSAndrey A. Chernov 		break;
879b5363c4aSAndrey A. Chernov 	case CALPHA:
880b5363c4aSAndrey A. Chernov 		for (c = CHAR_MIN; c <= CHAR_MAX; c++)
881b5363c4aSAndrey A. Chernov 			if (isalpha((uch)c))
882b5363c4aSAndrey A. Chernov 				CHadd(cs, c);
883b5363c4aSAndrey A. Chernov 		break;
884b5363c4aSAndrey A. Chernov 	case CBLANK:
885b5363c4aSAndrey A. Chernov 		for (c = CHAR_MIN; c <= CHAR_MAX; c++)
886b5363c4aSAndrey A. Chernov 			if (isblank((uch)c))
887b5363c4aSAndrey A. Chernov 				CHadd(cs, c);
888b5363c4aSAndrey A. Chernov 		break;
889b5363c4aSAndrey A. Chernov 	case CCNTRL:
890b5363c4aSAndrey A. Chernov 		for (c = CHAR_MIN; c <= CHAR_MAX; c++)
891b5363c4aSAndrey A. Chernov 			if (iscntrl((uch)c))
892b5363c4aSAndrey A. Chernov 				CHadd(cs, c);
893b5363c4aSAndrey A. Chernov 		break;
894b5363c4aSAndrey A. Chernov 	case CDIGIT:
895b5363c4aSAndrey A. Chernov 		for (c = CHAR_MIN; c <= CHAR_MAX; c++)
896b5363c4aSAndrey A. Chernov 			if (isdigit((uch)c))
897b5363c4aSAndrey A. Chernov 				CHadd(cs, c);
898b5363c4aSAndrey A. Chernov 		break;
899b5363c4aSAndrey A. Chernov 	case CGRAPH:
900b5363c4aSAndrey A. Chernov 		for (c = CHAR_MIN; c <= CHAR_MAX; c++)
901b5363c4aSAndrey A. Chernov 			if (isgraph((uch)c))
902b5363c4aSAndrey A. Chernov 				CHadd(cs, c);
903b5363c4aSAndrey A. Chernov 		break;
904b5363c4aSAndrey A. Chernov 	case CLOWER:
905b5363c4aSAndrey A. Chernov 		for (c = CHAR_MIN; c <= CHAR_MAX; c++)
906b5363c4aSAndrey A. Chernov 			if (islower((uch)c))
907b5363c4aSAndrey A. Chernov 				CHadd(cs, c);
908b5363c4aSAndrey A. Chernov 		break;
909b5363c4aSAndrey A. Chernov 	case CPRINT:
910b5363c4aSAndrey A. Chernov 		for (c = CHAR_MIN; c <= CHAR_MAX; c++)
911b5363c4aSAndrey A. Chernov 			if (isprint((uch)c))
912b5363c4aSAndrey A. Chernov 				CHadd(cs, c);
913b5363c4aSAndrey A. Chernov 		break;
914b5363c4aSAndrey A. Chernov 	case CPUNCT:
915b5363c4aSAndrey A. Chernov 		for (c = CHAR_MIN; c <= CHAR_MAX; c++)
916b5363c4aSAndrey A. Chernov 			if (ispunct((uch)c))
917b5363c4aSAndrey A. Chernov 				CHadd(cs, c);
918b5363c4aSAndrey A. Chernov 		break;
919b5363c4aSAndrey A. Chernov 	case CSPACE:
920b5363c4aSAndrey A. Chernov 		for (c = CHAR_MIN; c <= CHAR_MAX; c++)
921b5363c4aSAndrey A. Chernov 			if (isspace((uch)c))
922b5363c4aSAndrey A. Chernov 				CHadd(cs, c);
923b5363c4aSAndrey A. Chernov 		break;
924b5363c4aSAndrey A. Chernov 	case CUPPER:
925b5363c4aSAndrey A. Chernov 		for (c = CHAR_MIN; c <= CHAR_MAX; c++)
926b5363c4aSAndrey A. Chernov 			if (isupper((uch)c))
927b5363c4aSAndrey A. Chernov 				CHadd(cs, c);
928b5363c4aSAndrey A. Chernov 		break;
929b5363c4aSAndrey A. Chernov 	case CXDIGIT:
930b5363c4aSAndrey A. Chernov 		for (c = CHAR_MIN; c <= CHAR_MAX; c++)
931b5363c4aSAndrey A. Chernov 			if (isxdigit((uch)c))
932b5363c4aSAndrey A. Chernov 				CHadd(cs, c);
933b5363c4aSAndrey A. Chernov 		break;
934b5363c4aSAndrey A. Chernov 	}
935b5363c4aSAndrey A. Chernov #if 0
93658f0484fSRodney W. Grimes 	for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
93758f0484fSRodney W. Grimes 		MCadd(p, cs, u);
938b5363c4aSAndrey A. Chernov #endif
93958f0484fSRodney W. Grimes }
94058f0484fSRodney W. Grimes 
94158f0484fSRodney W. Grimes /*
94258f0484fSRodney W. Grimes  - p_b_eclass - parse an equivalence-class name and deal with it
9438fb3f3f6SDavid E. O'Brien  == static void p_b_eclass(struct parse *p, cset *cs);
94458f0484fSRodney W. Grimes  *
94558f0484fSRodney W. Grimes  * This implementation is incomplete. xxx
94658f0484fSRodney W. Grimes  */
94758f0484fSRodney W. Grimes static void
94858f0484fSRodney W. Grimes p_b_eclass(p, cs)
9498fb3f3f6SDavid E. O'Brien struct parse *p;
9508fb3f3f6SDavid E. O'Brien cset *cs;
95158f0484fSRodney W. Grimes {
9528fb3f3f6SDavid E. O'Brien 	char c;
95358f0484fSRodney W. Grimes 
95458f0484fSRodney W. Grimes 	c = p_b_coll_elem(p, '=');
95558f0484fSRodney W. Grimes 	CHadd(cs, c);
95658f0484fSRodney W. Grimes }
95758f0484fSRodney W. Grimes 
95858f0484fSRodney W. Grimes /*
95958f0484fSRodney W. Grimes  - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
9608fb3f3f6SDavid E. O'Brien  == static char p_b_symbol(struct parse *p);
96158f0484fSRodney W. Grimes  */
96258f0484fSRodney W. Grimes static char			/* value of symbol */
96358f0484fSRodney W. Grimes p_b_symbol(p)
9648fb3f3f6SDavid E. O'Brien struct parse *p;
96558f0484fSRodney W. Grimes {
9668fb3f3f6SDavid E. O'Brien 	char value;
96758f0484fSRodney W. Grimes 
96851295a4dSJordan K. Hubbard 	(void)REQUIRE(MORE(), REG_EBRACK);
96958f0484fSRodney W. Grimes 	if (!EATTWO('[', '.'))
97058f0484fSRodney W. Grimes 		return(GETNEXT());
97158f0484fSRodney W. Grimes 
97258f0484fSRodney W. Grimes 	/* collating symbol */
97358f0484fSRodney W. Grimes 	value = p_b_coll_elem(p, '.');
97451295a4dSJordan K. Hubbard 	(void)REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
97558f0484fSRodney W. Grimes 	return(value);
97658f0484fSRodney W. Grimes }
97758f0484fSRodney W. Grimes 
97858f0484fSRodney W. Grimes /*
97958f0484fSRodney W. Grimes  - p_b_coll_elem - parse a collating-element name and look it up
9808fb3f3f6SDavid E. O'Brien  == static char p_b_coll_elem(struct parse *p, int endc);
98158f0484fSRodney W. Grimes  */
98258f0484fSRodney W. Grimes static char			/* value of collating element */
98358f0484fSRodney W. Grimes p_b_coll_elem(p, endc)
9848fb3f3f6SDavid E. O'Brien struct parse *p;
98558f0484fSRodney W. Grimes int endc;			/* name ended by endc,']' */
98658f0484fSRodney W. Grimes {
9878fb3f3f6SDavid E. O'Brien 	char *sp = p->next;
9888fb3f3f6SDavid E. O'Brien 	struct cname *cp;
9898fb3f3f6SDavid E. O'Brien 	int len;
99058f0484fSRodney W. Grimes 
99158f0484fSRodney W. Grimes 	while (MORE() && !SEETWO(endc, ']'))
99258f0484fSRodney W. Grimes 		NEXT();
99358f0484fSRodney W. Grimes 	if (!MORE()) {
99458f0484fSRodney W. Grimes 		SETERROR(REG_EBRACK);
99558f0484fSRodney W. Grimes 		return(0);
99658f0484fSRodney W. Grimes 	}
99758f0484fSRodney W. Grimes 	len = p->next - sp;
99858f0484fSRodney W. Grimes 	for (cp = cnames; cp->name != NULL; cp++)
99958f0484fSRodney W. Grimes 		if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
100058f0484fSRodney W. Grimes 			return(cp->code);	/* known name */
100158f0484fSRodney W. Grimes 	if (len == 1)
100258f0484fSRodney W. Grimes 		return(*sp);	/* single character */
100358f0484fSRodney W. Grimes 	SETERROR(REG_ECOLLATE);			/* neither */
100458f0484fSRodney W. Grimes 	return(0);
100558f0484fSRodney W. Grimes }
100658f0484fSRodney W. Grimes 
100758f0484fSRodney W. Grimes /*
100858f0484fSRodney W. Grimes  - othercase - return the case counterpart of an alphabetic
100958f0484fSRodney W. Grimes  == static char othercase(int ch);
101058f0484fSRodney W. Grimes  */
101158f0484fSRodney W. Grimes static char			/* if no counterpart, return ch */
101258f0484fSRodney W. Grimes othercase(ch)
101358f0484fSRodney W. Grimes int ch;
101458f0484fSRodney W. Grimes {
101589b86d02SAndrey A. Chernov 	ch = (uch)ch;
101658f0484fSRodney W. Grimes 	assert(isalpha(ch));
101758f0484fSRodney W. Grimes 	if (isupper(ch))
101858f0484fSRodney W. Grimes 		return(tolower(ch));
101958f0484fSRodney W. Grimes 	else if (islower(ch))
102058f0484fSRodney W. Grimes 		return(toupper(ch));
102158f0484fSRodney W. Grimes 	else			/* peculiar, but could happen */
102258f0484fSRodney W. Grimes 		return(ch);
102358f0484fSRodney W. Grimes }
102458f0484fSRodney W. Grimes 
102558f0484fSRodney W. Grimes /*
102658f0484fSRodney W. Grimes  - bothcases - emit a dualcase version of a two-case character
10278fb3f3f6SDavid E. O'Brien  == static void bothcases(struct parse *p, int ch);
102858f0484fSRodney W. Grimes  *
102958f0484fSRodney W. Grimes  * Boy, is this implementation ever a kludge...
103058f0484fSRodney W. Grimes  */
103158f0484fSRodney W. Grimes static void
103258f0484fSRodney W. Grimes bothcases(p, ch)
10338fb3f3f6SDavid E. O'Brien struct parse *p;
103458f0484fSRodney W. Grimes int ch;
103558f0484fSRodney W. Grimes {
10368fb3f3f6SDavid E. O'Brien 	char *oldnext = p->next;
10378fb3f3f6SDavid E. O'Brien 	char *oldend = p->end;
103858f0484fSRodney W. Grimes 	char bracket[3];
103958f0484fSRodney W. Grimes 
104089b86d02SAndrey A. Chernov 	ch = (uch)ch;
104158f0484fSRodney W. Grimes 	assert(othercase(ch) != ch);	/* p_bracket() would recurse */
104258f0484fSRodney W. Grimes 	p->next = bracket;
104358f0484fSRodney W. Grimes 	p->end = bracket+2;
104458f0484fSRodney W. Grimes 	bracket[0] = ch;
104558f0484fSRodney W. Grimes 	bracket[1] = ']';
104658f0484fSRodney W. Grimes 	bracket[2] = '\0';
104758f0484fSRodney W. Grimes 	p_bracket(p);
104858f0484fSRodney W. Grimes 	assert(p->next == bracket+2);
104958f0484fSRodney W. Grimes 	p->next = oldnext;
105058f0484fSRodney W. Grimes 	p->end = oldend;
105158f0484fSRodney W. Grimes }
105258f0484fSRodney W. Grimes 
105358f0484fSRodney W. Grimes /*
105458f0484fSRodney W. Grimes  - ordinary - emit an ordinary character
10558fb3f3f6SDavid E. O'Brien  == static void ordinary(struct parse *p, int ch);
105658f0484fSRodney W. Grimes  */
105758f0484fSRodney W. Grimes static void
105858f0484fSRodney W. Grimes ordinary(p, ch)
10598fb3f3f6SDavid E. O'Brien struct parse *p;
10608fb3f3f6SDavid E. O'Brien int ch;
106158f0484fSRodney W. Grimes {
10628fb3f3f6SDavid E. O'Brien 	cat_t *cap = p->g->categories;
106358f0484fSRodney W. Grimes 
106489b86d02SAndrey A. Chernov 	if ((p->g->cflags&REG_ICASE) && isalpha((uch)ch) && othercase(ch) != ch)
106558f0484fSRodney W. Grimes 		bothcases(p, ch);
106658f0484fSRodney W. Grimes 	else {
106789b86d02SAndrey A. Chernov 		EMIT(OCHAR, (uch)ch);
106858f0484fSRodney W. Grimes 		if (cap[ch] == 0)
106958f0484fSRodney W. Grimes 			cap[ch] = p->g->ncategories++;
107058f0484fSRodney W. Grimes 	}
107158f0484fSRodney W. Grimes }
107258f0484fSRodney W. Grimes 
107358f0484fSRodney W. Grimes /*
107458f0484fSRodney W. Grimes  - nonnewline - emit REG_NEWLINE version of OANY
10758fb3f3f6SDavid E. O'Brien  == static void nonnewline(struct parse *p);
107658f0484fSRodney W. Grimes  *
107758f0484fSRodney W. Grimes  * Boy, is this implementation ever a kludge...
107858f0484fSRodney W. Grimes  */
107958f0484fSRodney W. Grimes static void
108058f0484fSRodney W. Grimes nonnewline(p)
10818fb3f3f6SDavid E. O'Brien struct parse *p;
108258f0484fSRodney W. Grimes {
10838fb3f3f6SDavid E. O'Brien 	char *oldnext = p->next;
10848fb3f3f6SDavid E. O'Brien 	char *oldend = p->end;
108558f0484fSRodney W. Grimes 	char bracket[4];
108658f0484fSRodney W. Grimes 
108758f0484fSRodney W. Grimes 	p->next = bracket;
108858f0484fSRodney W. Grimes 	p->end = bracket+3;
108958f0484fSRodney W. Grimes 	bracket[0] = '^';
109058f0484fSRodney W. Grimes 	bracket[1] = '\n';
109158f0484fSRodney W. Grimes 	bracket[2] = ']';
109258f0484fSRodney W. Grimes 	bracket[3] = '\0';
109358f0484fSRodney W. Grimes 	p_bracket(p);
109458f0484fSRodney W. Grimes 	assert(p->next == bracket+3);
109558f0484fSRodney W. Grimes 	p->next = oldnext;
109658f0484fSRodney W. Grimes 	p->end = oldend;
109758f0484fSRodney W. Grimes }
109858f0484fSRodney W. Grimes 
109958f0484fSRodney W. Grimes /*
110058f0484fSRodney W. Grimes  - repeat - generate code for a bounded repetition, recursively if needed
11018fb3f3f6SDavid E. O'Brien  == static void repeat(struct parse *p, sopno start, int from, int to);
110258f0484fSRodney W. Grimes  */
110358f0484fSRodney W. Grimes static void
110458f0484fSRodney W. Grimes repeat(p, start, from, to)
11058fb3f3f6SDavid E. O'Brien struct parse *p;
110658f0484fSRodney W. Grimes sopno start;			/* operand from here to end of strip */
110758f0484fSRodney W. Grimes int from;			/* repeated from this number */
110858f0484fSRodney W. Grimes int to;				/* to this number of times (maybe INFINITY) */
110958f0484fSRodney W. Grimes {
11108fb3f3f6SDavid E. O'Brien 	sopno finish = HERE();
111158f0484fSRodney W. Grimes #	define	N	2
111258f0484fSRodney W. Grimes #	define	INF	3
111358f0484fSRodney W. Grimes #	define	REP(f, t)	((f)*8 + (t))
111458f0484fSRodney W. Grimes #	define	MAP(n)	(((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
11158fb3f3f6SDavid E. O'Brien 	sopno copy;
111658f0484fSRodney W. Grimes 
111758f0484fSRodney W. Grimes 	if (p->error != 0)	/* head off possible runaway recursion */
111858f0484fSRodney W. Grimes 		return;
111958f0484fSRodney W. Grimes 
112058f0484fSRodney W. Grimes 	assert(from <= to);
112158f0484fSRodney W. Grimes 
112258f0484fSRodney W. Grimes 	switch (REP(MAP(from), MAP(to))) {
112358f0484fSRodney W. Grimes 	case REP(0, 0):			/* must be user doing this */
112458f0484fSRodney W. Grimes 		DROP(finish-start);	/* drop the operand */
112558f0484fSRodney W. Grimes 		break;
112658f0484fSRodney W. Grimes 	case REP(0, 1):			/* as x{1,1}? */
112758f0484fSRodney W. Grimes 	case REP(0, N):			/* as x{1,n}? */
112858f0484fSRodney W. Grimes 	case REP(0, INF):		/* as x{1,}? */
112958f0484fSRodney W. Grimes 		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
113058f0484fSRodney W. Grimes 		INSERT(OCH_, start);		/* offset is wrong... */
113158f0484fSRodney W. Grimes 		repeat(p, start+1, 1, to);
113258f0484fSRodney W. Grimes 		ASTERN(OOR1, start);
113358f0484fSRodney W. Grimes 		AHEAD(start);			/* ... fix it */
113458f0484fSRodney W. Grimes 		EMIT(OOR2, 0);
113558f0484fSRodney W. Grimes 		AHEAD(THERE());
113658f0484fSRodney W. Grimes 		ASTERN(O_CH, THERETHERE());
113758f0484fSRodney W. Grimes 		break;
113858f0484fSRodney W. Grimes 	case REP(1, 1):			/* trivial case */
113958f0484fSRodney W. Grimes 		/* done */
114058f0484fSRodney W. Grimes 		break;
114158f0484fSRodney W. Grimes 	case REP(1, N):			/* as x?x{1,n-1} */
114258f0484fSRodney W. Grimes 		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
114358f0484fSRodney W. Grimes 		INSERT(OCH_, start);
114458f0484fSRodney W. Grimes 		ASTERN(OOR1, start);
114558f0484fSRodney W. Grimes 		AHEAD(start);
114658f0484fSRodney W. Grimes 		EMIT(OOR2, 0);			/* offset very wrong... */
114758f0484fSRodney W. Grimes 		AHEAD(THERE());			/* ...so fix it */
114858f0484fSRodney W. Grimes 		ASTERN(O_CH, THERETHERE());
114958f0484fSRodney W. Grimes 		copy = dupl(p, start+1, finish+1);
115058f0484fSRodney W. Grimes 		assert(copy == finish+4);
115158f0484fSRodney W. Grimes 		repeat(p, copy, 1, to-1);
115258f0484fSRodney W. Grimes 		break;
115358f0484fSRodney W. Grimes 	case REP(1, INF):		/* as x+ */
115458f0484fSRodney W. Grimes 		INSERT(OPLUS_, start);
115558f0484fSRodney W. Grimes 		ASTERN(O_PLUS, start);
115658f0484fSRodney W. Grimes 		break;
115758f0484fSRodney W. Grimes 	case REP(N, N):			/* as xx{m-1,n-1} */
115858f0484fSRodney W. Grimes 		copy = dupl(p, start, finish);
115958f0484fSRodney W. Grimes 		repeat(p, copy, from-1, to-1);
116058f0484fSRodney W. Grimes 		break;
116158f0484fSRodney W. Grimes 	case REP(N, INF):		/* as xx{n-1,INF} */
116258f0484fSRodney W. Grimes 		copy = dupl(p, start, finish);
116358f0484fSRodney W. Grimes 		repeat(p, copy, from-1, to);
116458f0484fSRodney W. Grimes 		break;
116558f0484fSRodney W. Grimes 	default:			/* "can't happen" */
116658f0484fSRodney W. Grimes 		SETERROR(REG_ASSERT);	/* just in case */
116758f0484fSRodney W. Grimes 		break;
116858f0484fSRodney W. Grimes 	}
116958f0484fSRodney W. Grimes }
117058f0484fSRodney W. Grimes 
117158f0484fSRodney W. Grimes /*
117258f0484fSRodney W. Grimes  - seterr - set an error condition
11738fb3f3f6SDavid E. O'Brien  == static int seterr(struct parse *p, int e);
117458f0484fSRodney W. Grimes  */
117558f0484fSRodney W. Grimes static int			/* useless but makes type checking happy */
117658f0484fSRodney W. Grimes seterr(p, e)
11778fb3f3f6SDavid E. O'Brien struct parse *p;
117858f0484fSRodney W. Grimes int e;
117958f0484fSRodney W. Grimes {
118058f0484fSRodney W. Grimes 	if (p->error == 0)	/* keep earliest error condition */
118158f0484fSRodney W. Grimes 		p->error = e;
118258f0484fSRodney W. Grimes 	p->next = nuls;		/* try to bring things to a halt */
118358f0484fSRodney W. Grimes 	p->end = nuls;
118458f0484fSRodney W. Grimes 	return(0);		/* make the return value well-defined */
118558f0484fSRodney W. Grimes }
118658f0484fSRodney W. Grimes 
118758f0484fSRodney W. Grimes /*
118858f0484fSRodney W. Grimes  - allocset - allocate a set of characters for []
11898fb3f3f6SDavid E. O'Brien  == static cset *allocset(struct parse *p);
119058f0484fSRodney W. Grimes  */
119158f0484fSRodney W. Grimes static cset *
119258f0484fSRodney W. Grimes allocset(p)
11938fb3f3f6SDavid E. O'Brien struct parse *p;
119458f0484fSRodney W. Grimes {
11958fb3f3f6SDavid E. O'Brien 	int no = p->g->ncsets++;
11968fb3f3f6SDavid E. O'Brien 	size_t nc;
11978fb3f3f6SDavid E. O'Brien 	size_t nbytes;
11988fb3f3f6SDavid E. O'Brien 	cset *cs;
11998fb3f3f6SDavid E. O'Brien 	size_t css = (size_t)p->g->csetsize;
12008fb3f3f6SDavid E. O'Brien 	int i;
120158f0484fSRodney W. Grimes 
120258f0484fSRodney W. Grimes 	if (no >= p->ncsalloc) {	/* need another column of space */
120358f0484fSRodney W. Grimes 		p->ncsalloc += CHAR_BIT;
120458f0484fSRodney W. Grimes 		nc = p->ncsalloc;
120558f0484fSRodney W. Grimes 		assert(nc % CHAR_BIT == 0);
120658f0484fSRodney W. Grimes 		nbytes = nc / CHAR_BIT * css;
120758f0484fSRodney W. Grimes 		if (p->g->sets == NULL)
120858f0484fSRodney W. Grimes 			p->g->sets = (cset *)malloc(nc * sizeof(cset));
120958f0484fSRodney W. Grimes 		else
1210e8420087SWarner Losh 			p->g->sets = (cset *)reallocf((char *)p->g->sets,
121158f0484fSRodney W. Grimes 							nc * sizeof(cset));
121258f0484fSRodney W. Grimes 		if (p->g->setbits == NULL)
121358f0484fSRodney W. Grimes 			p->g->setbits = (uch *)malloc(nbytes);
121458f0484fSRodney W. Grimes 		else {
1215e8420087SWarner Losh 			p->g->setbits = (uch *)reallocf((char *)p->g->setbits,
121658f0484fSRodney W. Grimes 								nbytes);
121758f0484fSRodney W. Grimes 			/* xxx this isn't right if setbits is now NULL */
121858f0484fSRodney W. Grimes 			for (i = 0; i < no; i++)
121958f0484fSRodney W. Grimes 				p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
122058f0484fSRodney W. Grimes 		}
122158f0484fSRodney W. Grimes 		if (p->g->sets != NULL && p->g->setbits != NULL)
122258f0484fSRodney W. Grimes 			(void) memset((char *)p->g->setbits + (nbytes - css),
122358f0484fSRodney W. Grimes 								0, css);
122458f0484fSRodney W. Grimes 		else {
122558f0484fSRodney W. Grimes 			no = 0;
122658f0484fSRodney W. Grimes 			SETERROR(REG_ESPACE);
122758f0484fSRodney W. Grimes 			/* caller's responsibility not to do set ops */
122858f0484fSRodney W. Grimes 		}
122958f0484fSRodney W. Grimes 	}
123058f0484fSRodney W. Grimes 
123158f0484fSRodney W. Grimes 	assert(p->g->sets != NULL);	/* xxx */
123258f0484fSRodney W. Grimes 	cs = &p->g->sets[no];
123358f0484fSRodney W. Grimes 	cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
123458f0484fSRodney W. Grimes 	cs->mask = 1 << ((no) % CHAR_BIT);
123558f0484fSRodney W. Grimes 	cs->hash = 0;
123658f0484fSRodney W. Grimes 	cs->smultis = 0;
123758f0484fSRodney W. Grimes 	cs->multis = NULL;
123858f0484fSRodney W. Grimes 
123958f0484fSRodney W. Grimes 	return(cs);
124058f0484fSRodney W. Grimes }
124158f0484fSRodney W. Grimes 
124258f0484fSRodney W. Grimes /*
124358f0484fSRodney W. Grimes  - freeset - free a now-unused set
12448fb3f3f6SDavid E. O'Brien  == static void freeset(struct parse *p, cset *cs);
124558f0484fSRodney W. Grimes  */
124658f0484fSRodney W. Grimes static void
124758f0484fSRodney W. Grimes freeset(p, cs)
12488fb3f3f6SDavid E. O'Brien struct parse *p;
12498fb3f3f6SDavid E. O'Brien cset *cs;
125058f0484fSRodney W. Grimes {
12518fb3f3f6SDavid E. O'Brien 	int i;
12528fb3f3f6SDavid E. O'Brien 	cset *top = &p->g->sets[p->g->ncsets];
12538fb3f3f6SDavid E. O'Brien 	size_t css = (size_t)p->g->csetsize;
125458f0484fSRodney W. Grimes 
125558f0484fSRodney W. Grimes 	for (i = 0; i < css; i++)
125658f0484fSRodney W. Grimes 		CHsub(cs, i);
125758f0484fSRodney W. Grimes 	if (cs == top-1)	/* recover only the easy case */
125858f0484fSRodney W. Grimes 		p->g->ncsets--;
125958f0484fSRodney W. Grimes }
126058f0484fSRodney W. Grimes 
126158f0484fSRodney W. Grimes /*
126258f0484fSRodney W. Grimes  - freezeset - final processing on a set of characters
12638fb3f3f6SDavid E. O'Brien  == static int freezeset(struct parse *p, cset *cs);
126458f0484fSRodney W. Grimes  *
126558f0484fSRodney W. Grimes  * The main task here is merging identical sets.  This is usually a waste
126658f0484fSRodney W. Grimes  * of time (although the hash code minimizes the overhead), but can win
126758f0484fSRodney W. Grimes  * big if REG_ICASE is being used.  REG_ICASE, by the way, is why the hash
126858f0484fSRodney W. Grimes  * is done using addition rather than xor -- all ASCII [aA] sets xor to
126958f0484fSRodney W. Grimes  * the same value!
127058f0484fSRodney W. Grimes  */
127158f0484fSRodney W. Grimes static int			/* set number */
127258f0484fSRodney W. Grimes freezeset(p, cs)
12738fb3f3f6SDavid E. O'Brien struct parse *p;
12748fb3f3f6SDavid E. O'Brien cset *cs;
127558f0484fSRodney W. Grimes {
12768fb3f3f6SDavid E. O'Brien 	short h = cs->hash;
12778fb3f3f6SDavid E. O'Brien 	int i;
12788fb3f3f6SDavid E. O'Brien 	cset *top = &p->g->sets[p->g->ncsets];
12798fb3f3f6SDavid E. O'Brien 	cset *cs2;
12808fb3f3f6SDavid E. O'Brien 	size_t css = (size_t)p->g->csetsize;
128158f0484fSRodney W. Grimes 
128258f0484fSRodney W. Grimes 	/* look for an earlier one which is the same */
128358f0484fSRodney W. Grimes 	for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
128458f0484fSRodney W. Grimes 		if (cs2->hash == h && cs2 != cs) {
128558f0484fSRodney W. Grimes 			/* maybe */
128658f0484fSRodney W. Grimes 			for (i = 0; i < css; i++)
128758f0484fSRodney W. Grimes 				if (!!CHIN(cs2, i) != !!CHIN(cs, i))
128858f0484fSRodney W. Grimes 					break;		/* no */
128958f0484fSRodney W. Grimes 			if (i == css)
129058f0484fSRodney W. Grimes 				break;			/* yes */
129158f0484fSRodney W. Grimes 		}
129258f0484fSRodney W. Grimes 
129358f0484fSRodney W. Grimes 	if (cs2 < top) {	/* found one */
129458f0484fSRodney W. Grimes 		freeset(p, cs);
129558f0484fSRodney W. Grimes 		cs = cs2;
129658f0484fSRodney W. Grimes 	}
129758f0484fSRodney W. Grimes 
129858f0484fSRodney W. Grimes 	return((int)(cs - p->g->sets));
129958f0484fSRodney W. Grimes }
130058f0484fSRodney W. Grimes 
130158f0484fSRodney W. Grimes /*
130258f0484fSRodney W. Grimes  - firstch - return first character in a set (which must have at least one)
13038fb3f3f6SDavid E. O'Brien  == static int firstch(struct parse *p, cset *cs);
130458f0484fSRodney W. Grimes  */
130558f0484fSRodney W. Grimes static int			/* character; there is no "none" value */
130658f0484fSRodney W. Grimes firstch(p, cs)
13078fb3f3f6SDavid E. O'Brien struct parse *p;
13088fb3f3f6SDavid E. O'Brien cset *cs;
130958f0484fSRodney W. Grimes {
13108fb3f3f6SDavid E. O'Brien 	int i;
13118fb3f3f6SDavid E. O'Brien 	size_t css = (size_t)p->g->csetsize;
131258f0484fSRodney W. Grimes 
131358f0484fSRodney W. Grimes 	for (i = 0; i < css; i++)
131458f0484fSRodney W. Grimes 		if (CHIN(cs, i))
131589b86d02SAndrey A. Chernov 			return((char)i);
131658f0484fSRodney W. Grimes 	assert(never);
131758f0484fSRodney W. Grimes 	return(0);		/* arbitrary */
131858f0484fSRodney W. Grimes }
131958f0484fSRodney W. Grimes 
132058f0484fSRodney W. Grimes /*
132158f0484fSRodney W. Grimes  - nch - number of characters in a set
13228fb3f3f6SDavid E. O'Brien  == static int nch(struct parse *p, cset *cs);
132358f0484fSRodney W. Grimes  */
132458f0484fSRodney W. Grimes static int
132558f0484fSRodney W. Grimes nch(p, cs)
13268fb3f3f6SDavid E. O'Brien struct parse *p;
13278fb3f3f6SDavid E. O'Brien cset *cs;
132858f0484fSRodney W. Grimes {
13298fb3f3f6SDavid E. O'Brien 	int i;
13308fb3f3f6SDavid E. O'Brien 	size_t css = (size_t)p->g->csetsize;
13318fb3f3f6SDavid E. O'Brien 	int n = 0;
133258f0484fSRodney W. Grimes 
133358f0484fSRodney W. Grimes 	for (i = 0; i < css; i++)
133458f0484fSRodney W. Grimes 		if (CHIN(cs, i))
133558f0484fSRodney W. Grimes 			n++;
133658f0484fSRodney W. Grimes 	return(n);
133758f0484fSRodney W. Grimes }
133858f0484fSRodney W. Grimes 
133958f0484fSRodney W. Grimes /*
134058f0484fSRodney W. Grimes  - mcadd - add a collating element to a cset
13418fb3f3f6SDavid E. O'Brien  == static void mcadd(struct parse *p, cset *cs, \
13428fb3f3f6SDavid E. O'Brien  ==	char *cp);
134358f0484fSRodney W. Grimes  */
134458f0484fSRodney W. Grimes static void
134558f0484fSRodney W. Grimes mcadd(p, cs, cp)
13468fb3f3f6SDavid E. O'Brien struct parse *p;
13478fb3f3f6SDavid E. O'Brien cset *cs;
13488fb3f3f6SDavid E. O'Brien char *cp;
134958f0484fSRodney W. Grimes {
13508fb3f3f6SDavid E. O'Brien 	size_t oldend = cs->smultis;
135158f0484fSRodney W. Grimes 
135258f0484fSRodney W. Grimes 	cs->smultis += strlen(cp) + 1;
135358f0484fSRodney W. Grimes 	if (cs->multis == NULL)
135458f0484fSRodney W. Grimes 		cs->multis = malloc(cs->smultis);
135558f0484fSRodney W. Grimes 	else
1356e8420087SWarner Losh 		cs->multis = reallocf(cs->multis, cs->smultis);
135758f0484fSRodney W. Grimes 	if (cs->multis == NULL) {
135858f0484fSRodney W. Grimes 		SETERROR(REG_ESPACE);
135958f0484fSRodney W. Grimes 		return;
136058f0484fSRodney W. Grimes 	}
136158f0484fSRodney W. Grimes 
136258f0484fSRodney W. Grimes 	(void) strcpy(cs->multis + oldend - 1, cp);
136358f0484fSRodney W. Grimes 	cs->multis[cs->smultis - 1] = '\0';
136458f0484fSRodney W. Grimes }
136558f0484fSRodney W. Grimes 
136651295a4dSJordan K. Hubbard #if used
136758f0484fSRodney W. Grimes /*
136858f0484fSRodney W. Grimes  - mcsub - subtract a collating element from a cset
13698fb3f3f6SDavid E. O'Brien  == static void mcsub(cset *cs, char *cp);
137058f0484fSRodney W. Grimes  */
137158f0484fSRodney W. Grimes static void
137258f0484fSRodney W. Grimes mcsub(cs, cp)
13738fb3f3f6SDavid E. O'Brien cset *cs;
13748fb3f3f6SDavid E. O'Brien char *cp;
137558f0484fSRodney W. Grimes {
13768fb3f3f6SDavid E. O'Brien 	char *fp = mcfind(cs, cp);
13778fb3f3f6SDavid E. O'Brien 	size_t len = strlen(fp);
137858f0484fSRodney W. Grimes 
137958f0484fSRodney W. Grimes 	assert(fp != NULL);
138058f0484fSRodney W. Grimes 	(void) memmove(fp, fp + len + 1,
138158f0484fSRodney W. Grimes 				cs->smultis - (fp + len + 1 - cs->multis));
138258f0484fSRodney W. Grimes 	cs->smultis -= len;
138358f0484fSRodney W. Grimes 
138458f0484fSRodney W. Grimes 	if (cs->smultis == 0) {
138558f0484fSRodney W. Grimes 		free(cs->multis);
138658f0484fSRodney W. Grimes 		cs->multis = NULL;
138758f0484fSRodney W. Grimes 		return;
138858f0484fSRodney W. Grimes 	}
138958f0484fSRodney W. Grimes 
1390e8420087SWarner Losh 	cs->multis = reallocf(cs->multis, cs->smultis);
139158f0484fSRodney W. Grimes 	assert(cs->multis != NULL);
139258f0484fSRodney W. Grimes }
139358f0484fSRodney W. Grimes 
139458f0484fSRodney W. Grimes /*
139558f0484fSRodney W. Grimes  - mcin - is a collating element in a cset?
13968fb3f3f6SDavid E. O'Brien  == static int mcin(cset *cs, char *cp);
139758f0484fSRodney W. Grimes  */
139858f0484fSRodney W. Grimes static int
139958f0484fSRodney W. Grimes mcin(cs, cp)
14008fb3f3f6SDavid E. O'Brien cset *cs;
14018fb3f3f6SDavid E. O'Brien char *cp;
140258f0484fSRodney W. Grimes {
140358f0484fSRodney W. Grimes 	return(mcfind(cs, cp) != NULL);
140458f0484fSRodney W. Grimes }
140558f0484fSRodney W. Grimes 
140658f0484fSRodney W. Grimes /*
140758f0484fSRodney W. Grimes  - mcfind - find a collating element in a cset
14088fb3f3f6SDavid E. O'Brien  == static char *mcfind(cset *cs, char *cp);
140958f0484fSRodney W. Grimes  */
141058f0484fSRodney W. Grimes static char *
141158f0484fSRodney W. Grimes mcfind(cs, cp)
14128fb3f3f6SDavid E. O'Brien cset *cs;
14138fb3f3f6SDavid E. O'Brien char *cp;
141458f0484fSRodney W. Grimes {
14158fb3f3f6SDavid E. O'Brien 	char *p;
141658f0484fSRodney W. Grimes 
141758f0484fSRodney W. Grimes 	if (cs->multis == NULL)
141858f0484fSRodney W. Grimes 		return(NULL);
141958f0484fSRodney W. Grimes 	for (p = cs->multis; *p != '\0'; p += strlen(p) + 1)
142058f0484fSRodney W. Grimes 		if (strcmp(cp, p) == 0)
142158f0484fSRodney W. Grimes 			return(p);
142258f0484fSRodney W. Grimes 	return(NULL);
142358f0484fSRodney W. Grimes }
142451295a4dSJordan K. Hubbard #endif
142558f0484fSRodney W. Grimes 
142658f0484fSRodney W. Grimes /*
142758f0484fSRodney W. Grimes  - mcinvert - invert the list of collating elements in a cset
14288fb3f3f6SDavid E. O'Brien  == static void mcinvert(struct parse *p, cset *cs);
142958f0484fSRodney W. Grimes  *
143058f0484fSRodney W. Grimes  * This would have to know the set of possibilities.  Implementation
143158f0484fSRodney W. Grimes  * is deferred.
143258f0484fSRodney W. Grimes  */
143358f0484fSRodney W. Grimes static void
143458f0484fSRodney W. Grimes mcinvert(p, cs)
14358fb3f3f6SDavid E. O'Brien struct parse *p;
14368fb3f3f6SDavid E. O'Brien cset *cs;
143758f0484fSRodney W. Grimes {
143858f0484fSRodney W. Grimes 	assert(cs->multis == NULL);	/* xxx */
143958f0484fSRodney W. Grimes }
144058f0484fSRodney W. Grimes 
144158f0484fSRodney W. Grimes /*
144258f0484fSRodney W. Grimes  - mccase - add case counterparts of the list of collating elements in a cset
14438fb3f3f6SDavid E. O'Brien  == static void mccase(struct parse *p, cset *cs);
144458f0484fSRodney W. Grimes  *
144558f0484fSRodney W. Grimes  * This would have to know the set of possibilities.  Implementation
144658f0484fSRodney W. Grimes  * is deferred.
144758f0484fSRodney W. Grimes  */
144858f0484fSRodney W. Grimes static void
144958f0484fSRodney W. Grimes mccase(p, cs)
14508fb3f3f6SDavid E. O'Brien struct parse *p;
14518fb3f3f6SDavid E. O'Brien cset *cs;
145258f0484fSRodney W. Grimes {
145358f0484fSRodney W. Grimes 	assert(cs->multis == NULL);	/* xxx */
145458f0484fSRodney W. Grimes }
145558f0484fSRodney W. Grimes 
145658f0484fSRodney W. Grimes /*
145758f0484fSRodney W. Grimes  - isinsets - is this character in any sets?
14588fb3f3f6SDavid E. O'Brien  == static int isinsets(struct re_guts *g, int c);
145958f0484fSRodney W. Grimes  */
146058f0484fSRodney W. Grimes static int			/* predicate */
146158f0484fSRodney W. Grimes isinsets(g, c)
14628fb3f3f6SDavid E. O'Brien struct re_guts *g;
146358f0484fSRodney W. Grimes int c;
146458f0484fSRodney W. Grimes {
14658fb3f3f6SDavid E. O'Brien 	uch *col;
14668fb3f3f6SDavid E. O'Brien 	int i;
14678fb3f3f6SDavid E. O'Brien 	int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
14688fb3f3f6SDavid E. O'Brien 	unsigned uc = (uch)c;
146958f0484fSRodney W. Grimes 
147058f0484fSRodney W. Grimes 	for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
147158f0484fSRodney W. Grimes 		if (col[uc] != 0)
147258f0484fSRodney W. Grimes 			return(1);
147358f0484fSRodney W. Grimes 	return(0);
147458f0484fSRodney W. Grimes }
147558f0484fSRodney W. Grimes 
147658f0484fSRodney W. Grimes /*
147758f0484fSRodney W. Grimes  - samesets - are these two characters in exactly the same sets?
14788fb3f3f6SDavid E. O'Brien  == static int samesets(struct re_guts *g, int c1, int c2);
147958f0484fSRodney W. Grimes  */
148058f0484fSRodney W. Grimes static int			/* predicate */
148158f0484fSRodney W. Grimes samesets(g, c1, c2)
14828fb3f3f6SDavid E. O'Brien struct re_guts *g;
148358f0484fSRodney W. Grimes int c1;
148458f0484fSRodney W. Grimes int c2;
148558f0484fSRodney W. Grimes {
14868fb3f3f6SDavid E. O'Brien 	uch *col;
14878fb3f3f6SDavid E. O'Brien 	int i;
14888fb3f3f6SDavid E. O'Brien 	int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
14898fb3f3f6SDavid E. O'Brien 	unsigned uc1 = (uch)c1;
14908fb3f3f6SDavid E. O'Brien 	unsigned uc2 = (uch)c2;
149158f0484fSRodney W. Grimes 
149258f0484fSRodney W. Grimes 	for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
149358f0484fSRodney W. Grimes 		if (col[uc1] != col[uc2])
149458f0484fSRodney W. Grimes 			return(0);
149558f0484fSRodney W. Grimes 	return(1);
149658f0484fSRodney W. Grimes }
149758f0484fSRodney W. Grimes 
149858f0484fSRodney W. Grimes /*
149958f0484fSRodney W. Grimes  - categorize - sort out character categories
15008fb3f3f6SDavid E. O'Brien  == static void categorize(struct parse *p, struct re_guts *g);
150158f0484fSRodney W. Grimes  */
150258f0484fSRodney W. Grimes static void
150358f0484fSRodney W. Grimes categorize(p, g)
150458f0484fSRodney W. Grimes struct parse *p;
15058fb3f3f6SDavid E. O'Brien struct re_guts *g;
150658f0484fSRodney W. Grimes {
15078fb3f3f6SDavid E. O'Brien 	cat_t *cats = g->categories;
15088fb3f3f6SDavid E. O'Brien 	int c;
15098fb3f3f6SDavid E. O'Brien 	int c2;
15108fb3f3f6SDavid E. O'Brien 	cat_t cat;
151158f0484fSRodney W. Grimes 
151258f0484fSRodney W. Grimes 	/* avoid making error situations worse */
151358f0484fSRodney W. Grimes 	if (p->error != 0)
151458f0484fSRodney W. Grimes 		return;
151558f0484fSRodney W. Grimes 
151658f0484fSRodney W. Grimes 	for (c = CHAR_MIN; c <= CHAR_MAX; c++)
151758f0484fSRodney W. Grimes 		if (cats[c] == 0 && isinsets(g, c)) {
151858f0484fSRodney W. Grimes 			cat = g->ncategories++;
151958f0484fSRodney W. Grimes 			cats[c] = cat;
152058f0484fSRodney W. Grimes 			for (c2 = c+1; c2 <= CHAR_MAX; c2++)
152158f0484fSRodney W. Grimes 				if (cats[c2] == 0 && samesets(g, c, c2))
152258f0484fSRodney W. Grimes 					cats[c2] = cat;
152358f0484fSRodney W. Grimes 		}
152458f0484fSRodney W. Grimes }
152558f0484fSRodney W. Grimes 
152658f0484fSRodney W. Grimes /*
152758f0484fSRodney W. Grimes  - dupl - emit a duplicate of a bunch of sops
15288fb3f3f6SDavid E. O'Brien  == static sopno dupl(struct parse *p, sopno start, sopno finish);
152958f0484fSRodney W. Grimes  */
153058f0484fSRodney W. Grimes static sopno			/* start of duplicate */
153158f0484fSRodney W. Grimes dupl(p, start, finish)
15328fb3f3f6SDavid E. O'Brien struct parse *p;
153358f0484fSRodney W. Grimes sopno start;			/* from here */
153458f0484fSRodney W. Grimes sopno finish;			/* to this less one */
153558f0484fSRodney W. Grimes {
15368fb3f3f6SDavid E. O'Brien 	sopno ret = HERE();
15378fb3f3f6SDavid E. O'Brien 	sopno len = finish - start;
153858f0484fSRodney W. Grimes 
153958f0484fSRodney W. Grimes 	assert(finish >= start);
154058f0484fSRodney W. Grimes 	if (len == 0)
154158f0484fSRodney W. Grimes 		return(ret);
154258f0484fSRodney W. Grimes 	enlarge(p, p->ssize + len);	/* this many unexpected additions */
154358f0484fSRodney W. Grimes 	assert(p->ssize >= p->slen + len);
154458f0484fSRodney W. Grimes 	(void) memcpy((char *)(p->strip + p->slen),
154558f0484fSRodney W. Grimes 		(char *)(p->strip + start), (size_t)len*sizeof(sop));
154658f0484fSRodney W. Grimes 	p->slen += len;
154758f0484fSRodney W. Grimes 	return(ret);
154858f0484fSRodney W. Grimes }
154958f0484fSRodney W. Grimes 
155058f0484fSRodney W. Grimes /*
155158f0484fSRodney W. Grimes  - doemit - emit a strip operator
15528fb3f3f6SDavid E. O'Brien  == static void doemit(struct parse *p, sop op, size_t opnd);
155358f0484fSRodney W. Grimes  *
155458f0484fSRodney W. Grimes  * It might seem better to implement this as a macro with a function as
155558f0484fSRodney W. Grimes  * hard-case backup, but it's just too big and messy unless there are
155658f0484fSRodney W. Grimes  * some changes to the data structures.  Maybe later.
155758f0484fSRodney W. Grimes  */
155858f0484fSRodney W. Grimes static void
155958f0484fSRodney W. Grimes doemit(p, op, opnd)
15608fb3f3f6SDavid E. O'Brien struct parse *p;
156158f0484fSRodney W. Grimes sop op;
156258f0484fSRodney W. Grimes size_t opnd;
156358f0484fSRodney W. Grimes {
156458f0484fSRodney W. Grimes 	/* avoid making error situations worse */
156558f0484fSRodney W. Grimes 	if (p->error != 0)
156658f0484fSRodney W. Grimes 		return;
156758f0484fSRodney W. Grimes 
156858f0484fSRodney W. Grimes 	/* deal with oversize operands ("can't happen", more or less) */
156958f0484fSRodney W. Grimes 	assert(opnd < 1<<OPSHIFT);
157058f0484fSRodney W. Grimes 
157158f0484fSRodney W. Grimes 	/* deal with undersized strip */
157258f0484fSRodney W. Grimes 	if (p->slen >= p->ssize)
157358f0484fSRodney W. Grimes 		enlarge(p, (p->ssize+1) / 2 * 3);	/* +50% */
157458f0484fSRodney W. Grimes 	assert(p->slen < p->ssize);
157558f0484fSRodney W. Grimes 
157658f0484fSRodney W. Grimes 	/* finally, it's all reduced to the easy case */
157758f0484fSRodney W. Grimes 	p->strip[p->slen++] = SOP(op, opnd);
157858f0484fSRodney W. Grimes }
157958f0484fSRodney W. Grimes 
158058f0484fSRodney W. Grimes /*
158158f0484fSRodney W. Grimes  - doinsert - insert a sop into the strip
15828fb3f3f6SDavid E. O'Brien  == static void doinsert(struct parse *p, sop op, size_t opnd, sopno pos);
158358f0484fSRodney W. Grimes  */
158458f0484fSRodney W. Grimes static void
158558f0484fSRodney W. Grimes doinsert(p, op, opnd, pos)
15868fb3f3f6SDavid E. O'Brien struct parse *p;
158758f0484fSRodney W. Grimes sop op;
158858f0484fSRodney W. Grimes size_t opnd;
158958f0484fSRodney W. Grimes sopno pos;
159058f0484fSRodney W. Grimes {
15918fb3f3f6SDavid E. O'Brien 	sopno sn;
15928fb3f3f6SDavid E. O'Brien 	sop s;
15938fb3f3f6SDavid E. O'Brien 	int i;
159458f0484fSRodney W. Grimes 
159558f0484fSRodney W. Grimes 	/* avoid making error situations worse */
159658f0484fSRodney W. Grimes 	if (p->error != 0)
159758f0484fSRodney W. Grimes 		return;
159858f0484fSRodney W. Grimes 
159958f0484fSRodney W. Grimes 	sn = HERE();
160058f0484fSRodney W. Grimes 	EMIT(op, opnd);		/* do checks, ensure space */
160158f0484fSRodney W. Grimes 	assert(HERE() == sn+1);
160258f0484fSRodney W. Grimes 	s = p->strip[sn];
160358f0484fSRodney W. Grimes 
160458f0484fSRodney W. Grimes 	/* adjust paren pointers */
160558f0484fSRodney W. Grimes 	assert(pos > 0);
160658f0484fSRodney W. Grimes 	for (i = 1; i < NPAREN; i++) {
160758f0484fSRodney W. Grimes 		if (p->pbegin[i] >= pos) {
160858f0484fSRodney W. Grimes 			p->pbegin[i]++;
160958f0484fSRodney W. Grimes 		}
161058f0484fSRodney W. Grimes 		if (p->pend[i] >= pos) {
161158f0484fSRodney W. Grimes 			p->pend[i]++;
161258f0484fSRodney W. Grimes 		}
161358f0484fSRodney W. Grimes 	}
161458f0484fSRodney W. Grimes 
161558f0484fSRodney W. Grimes 	memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
161658f0484fSRodney W. Grimes 						(HERE()-pos-1)*sizeof(sop));
161758f0484fSRodney W. Grimes 	p->strip[pos] = s;
161858f0484fSRodney W. Grimes }
161958f0484fSRodney W. Grimes 
162058f0484fSRodney W. Grimes /*
162158f0484fSRodney W. Grimes  - dofwd - complete a forward reference
16228fb3f3f6SDavid E. O'Brien  == static void dofwd(struct parse *p, sopno pos, sop value);
162358f0484fSRodney W. Grimes  */
162458f0484fSRodney W. Grimes static void
162558f0484fSRodney W. Grimes dofwd(p, pos, value)
16268fb3f3f6SDavid E. O'Brien struct parse *p;
16278fb3f3f6SDavid E. O'Brien sopno pos;
162858f0484fSRodney W. Grimes sop value;
162958f0484fSRodney W. Grimes {
163058f0484fSRodney W. Grimes 	/* avoid making error situations worse */
163158f0484fSRodney W. Grimes 	if (p->error != 0)
163258f0484fSRodney W. Grimes 		return;
163358f0484fSRodney W. Grimes 
163458f0484fSRodney W. Grimes 	assert(value < 1<<OPSHIFT);
163558f0484fSRodney W. Grimes 	p->strip[pos] = OP(p->strip[pos]) | value;
163658f0484fSRodney W. Grimes }
163758f0484fSRodney W. Grimes 
163858f0484fSRodney W. Grimes /*
163958f0484fSRodney W. Grimes  - enlarge - enlarge the strip
16408fb3f3f6SDavid E. O'Brien  == static void enlarge(struct parse *p, sopno size);
164158f0484fSRodney W. Grimes  */
164258f0484fSRodney W. Grimes static void
164358f0484fSRodney W. Grimes enlarge(p, size)
16448fb3f3f6SDavid E. O'Brien struct parse *p;
16458fb3f3f6SDavid E. O'Brien sopno size;
164658f0484fSRodney W. Grimes {
16478fb3f3f6SDavid E. O'Brien 	sop *sp;
164858f0484fSRodney W. Grimes 
164958f0484fSRodney W. Grimes 	if (p->ssize >= size)
165058f0484fSRodney W. Grimes 		return;
165158f0484fSRodney W. Grimes 
165258f0484fSRodney W. Grimes 	sp = (sop *)realloc(p->strip, size*sizeof(sop));
165358f0484fSRodney W. Grimes 	if (sp == NULL) {
165458f0484fSRodney W. Grimes 		SETERROR(REG_ESPACE);
165558f0484fSRodney W. Grimes 		return;
165658f0484fSRodney W. Grimes 	}
165758f0484fSRodney W. Grimes 	p->strip = sp;
165858f0484fSRodney W. Grimes 	p->ssize = size;
165958f0484fSRodney W. Grimes }
166058f0484fSRodney W. Grimes 
166158f0484fSRodney W. Grimes /*
166258f0484fSRodney W. Grimes  - stripsnug - compact the strip
16638fb3f3f6SDavid E. O'Brien  == static void stripsnug(struct parse *p, struct re_guts *g);
166458f0484fSRodney W. Grimes  */
166558f0484fSRodney W. Grimes static void
166658f0484fSRodney W. Grimes stripsnug(p, g)
16678fb3f3f6SDavid E. O'Brien struct parse *p;
16688fb3f3f6SDavid E. O'Brien struct re_guts *g;
166958f0484fSRodney W. Grimes {
167058f0484fSRodney W. Grimes 	g->nstates = p->slen;
167158f0484fSRodney W. Grimes 	g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop));
167258f0484fSRodney W. Grimes 	if (g->strip == NULL) {
167358f0484fSRodney W. Grimes 		SETERROR(REG_ESPACE);
167458f0484fSRodney W. Grimes 		g->strip = p->strip;
167558f0484fSRodney W. Grimes 	}
167658f0484fSRodney W. Grimes }
167758f0484fSRodney W. Grimes 
167858f0484fSRodney W. Grimes /*
167958f0484fSRodney W. Grimes  - findmust - fill in must and mlen with longest mandatory literal string
16808fb3f3f6SDavid E. O'Brien  == static void findmust(struct parse *p, struct re_guts *g);
168158f0484fSRodney W. Grimes  *
168258f0484fSRodney W. Grimes  * This algorithm could do fancy things like analyzing the operands of |
168358f0484fSRodney W. Grimes  * for common subsequences.  Someday.  This code is simple and finds most
168458f0484fSRodney W. Grimes  * of the interesting cases.
168558f0484fSRodney W. Grimes  *
168658f0484fSRodney W. Grimes  * Note that must and mlen got initialized during setup.
168758f0484fSRodney W. Grimes  */
168858f0484fSRodney W. Grimes static void
168958f0484fSRodney W. Grimes findmust(p, g)
169058f0484fSRodney W. Grimes struct parse *p;
16918fb3f3f6SDavid E. O'Brien struct re_guts *g;
169258f0484fSRodney W. Grimes {
16938fb3f3f6SDavid E. O'Brien 	sop *scan;
169458f0484fSRodney W. Grimes 	sop *start;
16958fb3f3f6SDavid E. O'Brien 	sop *newstart;
16968fb3f3f6SDavid E. O'Brien 	sopno newlen;
16978fb3f3f6SDavid E. O'Brien 	sop s;
16988fb3f3f6SDavid E. O'Brien 	char *cp;
16998fb3f3f6SDavid E. O'Brien 	sopno i;
1700e6a886d8SDaniel C. Sobral 	int offset;
1701e6a886d8SDaniel C. Sobral 	int cs, mccs;
170258f0484fSRodney W. Grimes 
170358f0484fSRodney W. Grimes 	/* avoid making error situations worse */
170458f0484fSRodney W. Grimes 	if (p->error != 0)
170558f0484fSRodney W. Grimes 		return;
170658f0484fSRodney W. Grimes 
1707e6a886d8SDaniel C. Sobral 	/* Find out if we can handle OANYOF or not */
1708e6a886d8SDaniel C. Sobral 	mccs = 0;
1709e6a886d8SDaniel C. Sobral 	for (cs = 0; cs < g->ncsets; cs++)
1710e6a886d8SDaniel C. Sobral 		if (g->sets[cs].multis != NULL)
1711e6a886d8SDaniel C. Sobral 			mccs = 1;
1712e6a886d8SDaniel C. Sobral 
171358f0484fSRodney W. Grimes 	/* find the longest OCHAR sequence in strip */
171458f0484fSRodney W. Grimes 	newlen = 0;
1715e6a886d8SDaniel C. Sobral 	offset = 0;
1716e6a886d8SDaniel C. Sobral 	g->moffset = 0;
171758f0484fSRodney W. Grimes 	scan = g->strip + 1;
171858f0484fSRodney W. Grimes 	do {
171958f0484fSRodney W. Grimes 		s = *scan++;
172058f0484fSRodney W. Grimes 		switch (OP(s)) {
172158f0484fSRodney W. Grimes 		case OCHAR:		/* sequence member */
172258f0484fSRodney W. Grimes 			if (newlen == 0)		/* new sequence */
172358f0484fSRodney W. Grimes 				newstart = scan - 1;
172458f0484fSRodney W. Grimes 			newlen++;
172558f0484fSRodney W. Grimes 			break;
172658f0484fSRodney W. Grimes 		case OPLUS_:		/* things that don't break one */
172758f0484fSRodney W. Grimes 		case OLPAREN:
172858f0484fSRodney W. Grimes 		case ORPAREN:
172958f0484fSRodney W. Grimes 			break;
173058f0484fSRodney W. Grimes 		case OQUEST_:		/* things that must be skipped */
173158f0484fSRodney W. Grimes 		case OCH_:
1732e6a886d8SDaniel C. Sobral 			offset = altoffset(scan, offset, mccs);
173358f0484fSRodney W. Grimes 			scan--;
173458f0484fSRodney W. Grimes 			do {
173558f0484fSRodney W. Grimes 				scan += OPND(s);
173658f0484fSRodney W. Grimes 				s = *scan;
173758f0484fSRodney W. Grimes 				/* assert() interferes w debug printouts */
173858f0484fSRodney W. Grimes 				if (OP(s) != O_QUEST && OP(s) != O_CH &&
173958f0484fSRodney W. Grimes 							OP(s) != OOR2) {
174058f0484fSRodney W. Grimes 					g->iflags |= BAD;
174158f0484fSRodney W. Grimes 					return;
174258f0484fSRodney W. Grimes 				}
174358f0484fSRodney W. Grimes 			} while (OP(s) != O_QUEST && OP(s) != O_CH);
17447fed38d0SPhilippe Charnier 			/* FALLTHROUGH */
1745e6a886d8SDaniel C. Sobral 		case OBOW:		/* things that break a sequence */
1746e6a886d8SDaniel C. Sobral 		case OEOW:
1747e6a886d8SDaniel C. Sobral 		case OBOL:
1748e6a886d8SDaniel C. Sobral 		case OEOL:
1749e6a886d8SDaniel C. Sobral 		case O_QUEST:
1750e6a886d8SDaniel C. Sobral 		case O_CH:
1751e6a886d8SDaniel C. Sobral 		case OEND:
175258f0484fSRodney W. Grimes 			if (newlen > g->mlen) {		/* ends one */
175358f0484fSRodney W. Grimes 				start = newstart;
175458f0484fSRodney W. Grimes 				g->mlen = newlen;
1755e6a886d8SDaniel C. Sobral 				if (offset > -1) {
1756e6a886d8SDaniel C. Sobral 					g->moffset += offset;
1757e6a886d8SDaniel C. Sobral 					offset = newlen;
1758e6a886d8SDaniel C. Sobral 				} else
1759e6a886d8SDaniel C. Sobral 					g->moffset = offset;
1760e6a886d8SDaniel C. Sobral 			} else {
1761e6a886d8SDaniel C. Sobral 				if (offset > -1)
1762e6a886d8SDaniel C. Sobral 					offset += newlen;
176358f0484fSRodney W. Grimes 			}
176458f0484fSRodney W. Grimes 			newlen = 0;
176558f0484fSRodney W. Grimes 			break;
1766e6a886d8SDaniel C. Sobral 		case OANY:
1767e6a886d8SDaniel C. Sobral 			if (newlen > g->mlen) {		/* ends one */
1768e6a886d8SDaniel C. Sobral 				start = newstart;
1769e6a886d8SDaniel C. Sobral 				g->mlen = newlen;
1770e6a886d8SDaniel C. Sobral 				if (offset > -1) {
1771e6a886d8SDaniel C. Sobral 					g->moffset += offset;
1772e6a886d8SDaniel C. Sobral 					offset = newlen;
1773e6a886d8SDaniel C. Sobral 				} else
1774e6a886d8SDaniel C. Sobral 					g->moffset = offset;
1775e6a886d8SDaniel C. Sobral 			} else {
1776e6a886d8SDaniel C. Sobral 				if (offset > -1)
1777e6a886d8SDaniel C. Sobral 					offset += newlen;
1778e6a886d8SDaniel C. Sobral 			}
1779e6a886d8SDaniel C. Sobral 			if (offset > -1)
1780e6a886d8SDaniel C. Sobral 				offset++;
1781e6a886d8SDaniel C. Sobral 			newlen = 0;
1782e6a886d8SDaniel C. Sobral 			break;
1783e6a886d8SDaniel C. Sobral 		case OANYOF:		/* may or may not invalidate offset */
1784e6a886d8SDaniel C. Sobral 			/* First, everything as OANY */
1785e6a886d8SDaniel C. Sobral 			if (newlen > g->mlen) {		/* ends one */
1786e6a886d8SDaniel C. Sobral 				start = newstart;
1787e6a886d8SDaniel C. Sobral 				g->mlen = newlen;
1788e6a886d8SDaniel C. Sobral 				if (offset > -1) {
1789e6a886d8SDaniel C. Sobral 					g->moffset += offset;
1790e6a886d8SDaniel C. Sobral 					offset = newlen;
1791e6a886d8SDaniel C. Sobral 				} else
1792e6a886d8SDaniel C. Sobral 					g->moffset = offset;
1793e6a886d8SDaniel C. Sobral 			} else {
1794e6a886d8SDaniel C. Sobral 				if (offset > -1)
1795e6a886d8SDaniel C. Sobral 					offset += newlen;
1796e6a886d8SDaniel C. Sobral 			}
1797e6a886d8SDaniel C. Sobral 			if (offset > -1)
1798e6a886d8SDaniel C. Sobral 				offset++;
1799e6a886d8SDaniel C. Sobral 			newlen = 0;
1800e6a886d8SDaniel C. Sobral 			/* And, now, if we found out we can't deal with
1801e6a886d8SDaniel C. Sobral 			 * it, make offset = -1.
1802e6a886d8SDaniel C. Sobral 			 */
1803e6a886d8SDaniel C. Sobral 			if (mccs)
1804e6a886d8SDaniel C. Sobral 				offset = -1;
1805e6a886d8SDaniel C. Sobral 			break;
1806e6a886d8SDaniel C. Sobral 		default:
1807e6a886d8SDaniel C. Sobral 			/* Anything here makes it impossible or too hard
1808e6a886d8SDaniel C. Sobral 			 * to calculate the offset -- so we give up;
1809e6a886d8SDaniel C. Sobral 			 * save the last known good offset, in case the
1810e6a886d8SDaniel C. Sobral 			 * must sequence doesn't occur later.
1811e6a886d8SDaniel C. Sobral 			 */
1812e6a886d8SDaniel C. Sobral 			if (newlen > g->mlen) {		/* ends one */
1813e6a886d8SDaniel C. Sobral 				start = newstart;
1814e6a886d8SDaniel C. Sobral 				g->mlen = newlen;
1815e6a886d8SDaniel C. Sobral 				if (offset > -1)
1816e6a886d8SDaniel C. Sobral 					g->moffset += offset;
1817e6a886d8SDaniel C. Sobral 				else
1818e6a886d8SDaniel C. Sobral 					g->moffset = offset;
1819e6a886d8SDaniel C. Sobral 			}
1820e6a886d8SDaniel C. Sobral 			offset = -1;
1821e6a886d8SDaniel C. Sobral 			newlen = 0;
1822e6a886d8SDaniel C. Sobral 			break;
182358f0484fSRodney W. Grimes 		}
182458f0484fSRodney W. Grimes 	} while (OP(s) != OEND);
182558f0484fSRodney W. Grimes 
1826e6a886d8SDaniel C. Sobral 	if (g->mlen == 0) {		/* there isn't one */
1827e6a886d8SDaniel C. Sobral 		g->moffset = -1;
182858f0484fSRodney W. Grimes 		return;
1829e6a886d8SDaniel C. Sobral 	}
183058f0484fSRodney W. Grimes 
183158f0484fSRodney W. Grimes 	/* turn it into a character string */
183258f0484fSRodney W. Grimes 	g->must = malloc((size_t)g->mlen + 1);
183358f0484fSRodney W. Grimes 	if (g->must == NULL) {		/* argh; just forget it */
183458f0484fSRodney W. Grimes 		g->mlen = 0;
1835e6a886d8SDaniel C. Sobral 		g->moffset = -1;
183658f0484fSRodney W. Grimes 		return;
183758f0484fSRodney W. Grimes 	}
183858f0484fSRodney W. Grimes 	cp = g->must;
183958f0484fSRodney W. Grimes 	scan = start;
184058f0484fSRodney W. Grimes 	for (i = g->mlen; i > 0; i--) {
184158f0484fSRodney W. Grimes 		while (OP(s = *scan++) != OCHAR)
184258f0484fSRodney W. Grimes 			continue;
184358f0484fSRodney W. Grimes 		assert(cp < g->must + g->mlen);
184458f0484fSRodney W. Grimes 		*cp++ = (char)OPND(s);
184558f0484fSRodney W. Grimes 	}
184658f0484fSRodney W. Grimes 	assert(cp == g->must + g->mlen);
184758f0484fSRodney W. Grimes 	*cp++ = '\0';		/* just on general principles */
184858f0484fSRodney W. Grimes }
184958f0484fSRodney W. Grimes 
185058f0484fSRodney W. Grimes /*
1851e6a886d8SDaniel C. Sobral  - altoffset - choose biggest offset among multiple choices
18529868274bSDaniel C. Sobral  == static int altoffset(sop *scan, int offset, int mccs);
1853e6a886d8SDaniel C. Sobral  *
1854e6a886d8SDaniel C. Sobral  * Compute, recursively if necessary, the largest offset among multiple
1855e6a886d8SDaniel C. Sobral  * re paths.
1856e6a886d8SDaniel C. Sobral  */
1857e6a886d8SDaniel C. Sobral static int
1858e6a886d8SDaniel C. Sobral altoffset(scan, offset, mccs)
1859e6a886d8SDaniel C. Sobral sop *scan;
1860e6a886d8SDaniel C. Sobral int offset;
1861e6a886d8SDaniel C. Sobral int mccs;
1862e6a886d8SDaniel C. Sobral {
1863e6a886d8SDaniel C. Sobral 	int largest;
1864e6a886d8SDaniel C. Sobral 	int try;
1865e6a886d8SDaniel C. Sobral 	sop s;
1866e6a886d8SDaniel C. Sobral 
1867e6a886d8SDaniel C. Sobral 	/* If we gave up already on offsets, return */
1868e6a886d8SDaniel C. Sobral 	if (offset == -1)
1869e6a886d8SDaniel C. Sobral 		return -1;
1870e6a886d8SDaniel C. Sobral 
1871e6a886d8SDaniel C. Sobral 	largest = 0;
1872e6a886d8SDaniel C. Sobral 	try = 0;
1873e6a886d8SDaniel C. Sobral 	s = *scan++;
1874e6a886d8SDaniel C. Sobral 	while (OP(s) != O_QUEST && OP(s) != O_CH) {
1875e6a886d8SDaniel C. Sobral 		switch (OP(s)) {
1876e6a886d8SDaniel C. Sobral 		case OOR1:
1877e6a886d8SDaniel C. Sobral 			if (try > largest)
1878e6a886d8SDaniel C. Sobral 				largest = try;
1879e6a886d8SDaniel C. Sobral 			try = 0;
1880e6a886d8SDaniel C. Sobral 			break;
1881e6a886d8SDaniel C. Sobral 		case OQUEST_:
1882e6a886d8SDaniel C. Sobral 		case OCH_:
1883e6a886d8SDaniel C. Sobral 			try = altoffset(scan, try, mccs);
1884e6a886d8SDaniel C. Sobral 			if (try == -1)
1885e6a886d8SDaniel C. Sobral 				return -1;
1886e6a886d8SDaniel C. Sobral 			scan--;
1887e6a886d8SDaniel C. Sobral 			do {
1888e6a886d8SDaniel C. Sobral 				scan += OPND(s);
1889e6a886d8SDaniel C. Sobral 				s = *scan;
1890e6a886d8SDaniel C. Sobral 				if (OP(s) != O_QUEST && OP(s) != O_CH &&
1891e6a886d8SDaniel C. Sobral 							OP(s) != OOR2)
1892e6a886d8SDaniel C. Sobral 					return -1;
1893e6a886d8SDaniel C. Sobral 			} while (OP(s) != O_QUEST && OP(s) != O_CH);
18948f9e434fSDaniel C. Sobral 			/* We must skip to the next position, or we'll
18958f9e434fSDaniel C. Sobral 			 * leave altoffset() too early.
18968f9e434fSDaniel C. Sobral 			 */
18978f9e434fSDaniel C. Sobral 			scan++;
1898e6a886d8SDaniel C. Sobral 			break;
1899e6a886d8SDaniel C. Sobral 		case OANYOF:
1900e6a886d8SDaniel C. Sobral 			if (mccs)
1901e6a886d8SDaniel C. Sobral 				return -1;
1902e6a886d8SDaniel C. Sobral 		case OCHAR:
1903e6a886d8SDaniel C. Sobral 		case OANY:
1904e6a886d8SDaniel C. Sobral 			try++;
1905e6a886d8SDaniel C. Sobral 		case OBOW:
1906e6a886d8SDaniel C. Sobral 		case OEOW:
1907e6a886d8SDaniel C. Sobral 		case OLPAREN:
1908e6a886d8SDaniel C. Sobral 		case ORPAREN:
1909e6a886d8SDaniel C. Sobral 		case OOR2:
1910e6a886d8SDaniel C. Sobral 			break;
1911e6a886d8SDaniel C. Sobral 		default:
1912e6a886d8SDaniel C. Sobral 			try = -1;
1913e6a886d8SDaniel C. Sobral 			break;
1914e6a886d8SDaniel C. Sobral 		}
1915e6a886d8SDaniel C. Sobral 		if (try == -1)
1916e6a886d8SDaniel C. Sobral 			return -1;
1917e6a886d8SDaniel C. Sobral 		s = *scan++;
1918e6a886d8SDaniel C. Sobral 	}
1919e6a886d8SDaniel C. Sobral 
1920e6a886d8SDaniel C. Sobral 	if (try > largest)
1921e6a886d8SDaniel C. Sobral 		largest = try;
1922e6a886d8SDaniel C. Sobral 
1923e6a886d8SDaniel C. Sobral 	return largest+offset;
1924e6a886d8SDaniel C. Sobral }
1925e6a886d8SDaniel C. Sobral 
1926e6a886d8SDaniel C. Sobral /*
19276049d9f0SDaniel C. Sobral  - computejumps - compute char jumps for BM scan
19288fb3f3f6SDavid E. O'Brien  == static void computejumps(struct parse *p, struct re_guts *g);
19296049d9f0SDaniel C. Sobral  *
19306049d9f0SDaniel C. Sobral  * This algorithm assumes g->must exists and is has size greater than
19316049d9f0SDaniel C. Sobral  * zero. It's based on the algorithm found on Computer Algorithms by
19326049d9f0SDaniel C. Sobral  * Sara Baase.
19336049d9f0SDaniel C. Sobral  *
19346049d9f0SDaniel C. Sobral  * A char jump is the number of characters one needs to jump based on
19356049d9f0SDaniel C. Sobral  * the value of the character from the text that was mismatched.
19366049d9f0SDaniel C. Sobral  */
19376049d9f0SDaniel C. Sobral static void
19386049d9f0SDaniel C. Sobral computejumps(p, g)
19396049d9f0SDaniel C. Sobral struct parse *p;
19406049d9f0SDaniel C. Sobral struct re_guts *g;
19416049d9f0SDaniel C. Sobral {
19426049d9f0SDaniel C. Sobral 	int ch;
19436049d9f0SDaniel C. Sobral 	int mindex;
19446049d9f0SDaniel C. Sobral 
19456049d9f0SDaniel C. Sobral 	/* Avoid making errors worse */
19466049d9f0SDaniel C. Sobral 	if (p->error != 0)
19476049d9f0SDaniel C. Sobral 		return;
19486049d9f0SDaniel C. Sobral 
1949517bffcaSDaniel C. Sobral 	g->charjump = (int*) malloc((NC + 1) * sizeof(int));
19506049d9f0SDaniel C. Sobral 	if (g->charjump == NULL)	/* Not a fatal error */
19516049d9f0SDaniel C. Sobral 		return;
1952c5e125bbSDaniel C. Sobral 	/* Adjust for signed chars, if necessary */
1953c5e125bbSDaniel C. Sobral 	g->charjump = &g->charjump[-(CHAR_MIN)];
19546049d9f0SDaniel C. Sobral 
19556049d9f0SDaniel C. Sobral 	/* If the character does not exist in the pattern, the jump
19566049d9f0SDaniel C. Sobral 	 * is equal to the number of characters in the pattern.
19576049d9f0SDaniel C. Sobral 	 */
1958c5e125bbSDaniel C. Sobral 	for (ch = CHAR_MIN; ch < (CHAR_MAX + 1); ch++)
19596049d9f0SDaniel C. Sobral 		g->charjump[ch] = g->mlen;
19606049d9f0SDaniel C. Sobral 
19616049d9f0SDaniel C. Sobral 	/* If the character does exist, compute the jump that would
19626049d9f0SDaniel C. Sobral 	 * take us to the last character in the pattern equal to it
19636049d9f0SDaniel C. Sobral 	 * (notice that we match right to left, so that last character
19646049d9f0SDaniel C. Sobral 	 * is the first one that would be matched).
19656049d9f0SDaniel C. Sobral 	 */
19666049d9f0SDaniel C. Sobral 	for (mindex = 0; mindex < g->mlen; mindex++)
1967c5e125bbSDaniel C. Sobral 		g->charjump[g->must[mindex]] = g->mlen - mindex - 1;
19686049d9f0SDaniel C. Sobral }
19696049d9f0SDaniel C. Sobral 
19706049d9f0SDaniel C. Sobral /*
19716049d9f0SDaniel C. Sobral  - computematchjumps - compute match jumps for BM scan
19728fb3f3f6SDavid E. O'Brien  == static void computematchjumps(struct parse *p, struct re_guts *g);
19736049d9f0SDaniel C. Sobral  *
19746049d9f0SDaniel C. Sobral  * This algorithm assumes g->must exists and is has size greater than
19756049d9f0SDaniel C. Sobral  * zero. It's based on the algorithm found on Computer Algorithms by
19766049d9f0SDaniel C. Sobral  * Sara Baase.
19776049d9f0SDaniel C. Sobral  *
19786049d9f0SDaniel C. Sobral  * A match jump is the number of characters one needs to advance based
19796049d9f0SDaniel C. Sobral  * on the already-matched suffix.
19806049d9f0SDaniel C. Sobral  * Notice that all values here are minus (g->mlen-1), because of the way
19816049d9f0SDaniel C. Sobral  * the search algorithm works.
19826049d9f0SDaniel C. Sobral  */
19836049d9f0SDaniel C. Sobral static void
19846049d9f0SDaniel C. Sobral computematchjumps(p, g)
19856049d9f0SDaniel C. Sobral struct parse *p;
19866049d9f0SDaniel C. Sobral struct re_guts *g;
19876049d9f0SDaniel C. Sobral {
19886049d9f0SDaniel C. Sobral 	int mindex;		/* General "must" iterator */
19896049d9f0SDaniel C. Sobral 	int suffix;		/* Keeps track of matching suffix */
19906049d9f0SDaniel C. Sobral 	int ssuffix;		/* Keeps track of suffixes' suffix */
19916049d9f0SDaniel C. Sobral 	int* pmatches;		/* pmatches[k] points to the next i
19926049d9f0SDaniel C. Sobral 				 * such that i+1...mlen is a substring
19936049d9f0SDaniel C. Sobral 				 * of k+1...k+mlen-i-1
19946049d9f0SDaniel C. Sobral 				 */
19956049d9f0SDaniel C. Sobral 
19966049d9f0SDaniel C. Sobral 	/* Avoid making errors worse */
19976049d9f0SDaniel C. Sobral 	if (p->error != 0)
19986049d9f0SDaniel C. Sobral 		return;
19996049d9f0SDaniel C. Sobral 
2000517bffcaSDaniel C. Sobral 	pmatches = (int*) malloc(g->mlen * sizeof(unsigned int));
20016049d9f0SDaniel C. Sobral 	if (pmatches == NULL) {
20026049d9f0SDaniel C. Sobral 		g->matchjump = NULL;
20036049d9f0SDaniel C. Sobral 		return;
20046049d9f0SDaniel C. Sobral 	}
20056049d9f0SDaniel C. Sobral 
2006517bffcaSDaniel C. Sobral 	g->matchjump = (int*) malloc(g->mlen * sizeof(unsigned int));
20076049d9f0SDaniel C. Sobral 	if (g->matchjump == NULL)	/* Not a fatal error */
20086049d9f0SDaniel C. Sobral 		return;
20096049d9f0SDaniel C. Sobral 
20106049d9f0SDaniel C. Sobral 	/* Set maximum possible jump for each character in the pattern */
20116049d9f0SDaniel C. Sobral 	for (mindex = 0; mindex < g->mlen; mindex++)
20126049d9f0SDaniel C. Sobral 		g->matchjump[mindex] = 2*g->mlen - mindex - 1;
20136049d9f0SDaniel C. Sobral 
20146049d9f0SDaniel C. Sobral 	/* Compute pmatches[] */
20156049d9f0SDaniel C. Sobral 	for (mindex = g->mlen - 1, suffix = g->mlen; mindex >= 0;
20166049d9f0SDaniel C. Sobral 	    mindex--, suffix--) {
20176049d9f0SDaniel C. Sobral 		pmatches[mindex] = suffix;
20186049d9f0SDaniel C. Sobral 
20196049d9f0SDaniel C. Sobral 		/* If a mismatch is found, interrupting the substring,
20206049d9f0SDaniel C. Sobral 		 * compute the matchjump for that position. If no
20216049d9f0SDaniel C. Sobral 		 * mismatch is found, then a text substring mismatched
20226049d9f0SDaniel C. Sobral 		 * against the suffix will also mismatch against the
20236049d9f0SDaniel C. Sobral 		 * substring.
20246049d9f0SDaniel C. Sobral 		 */
20256049d9f0SDaniel C. Sobral 		while (suffix < g->mlen
20266049d9f0SDaniel C. Sobral 		    && g->must[mindex] != g->must[suffix]) {
20276049d9f0SDaniel C. Sobral 			g->matchjump[suffix] = MIN(g->matchjump[suffix],
20286049d9f0SDaniel C. Sobral 			    g->mlen - mindex - 1);
20296049d9f0SDaniel C. Sobral 			suffix = pmatches[suffix];
20306049d9f0SDaniel C. Sobral 		}
20316049d9f0SDaniel C. Sobral 	}
20326049d9f0SDaniel C. Sobral 
20336049d9f0SDaniel C. Sobral 	/* Compute the matchjump up to the last substring found to jump
20346049d9f0SDaniel C. Sobral 	 * to the beginning of the largest must pattern prefix matching
20356049d9f0SDaniel C. Sobral 	 * it's own suffix.
20366049d9f0SDaniel C. Sobral 	 */
20376049d9f0SDaniel C. Sobral 	for (mindex = 0; mindex <= suffix; mindex++)
20386049d9f0SDaniel C. Sobral 		g->matchjump[mindex] = MIN(g->matchjump[mindex],
20396049d9f0SDaniel C. Sobral 		    g->mlen + suffix - mindex);
20406049d9f0SDaniel C. Sobral 
20416049d9f0SDaniel C. Sobral         ssuffix = pmatches[suffix];
20426049d9f0SDaniel C. Sobral         while (suffix < g->mlen) {
20439868274bSDaniel C. Sobral                 while (suffix <= ssuffix && suffix < g->mlen) {
20446049d9f0SDaniel C. Sobral                         g->matchjump[suffix] = MIN(g->matchjump[suffix],
20456049d9f0SDaniel C. Sobral 			    g->mlen + ssuffix - suffix);
20466049d9f0SDaniel C. Sobral                         suffix++;
20476049d9f0SDaniel C. Sobral                 }
20488e2f75b8SDaniel C. Sobral 		if (suffix < g->mlen)
20496049d9f0SDaniel C. Sobral                 	ssuffix = pmatches[ssuffix];
20506049d9f0SDaniel C. Sobral         }
20516049d9f0SDaniel C. Sobral 
20526049d9f0SDaniel C. Sobral 	free(pmatches);
20536049d9f0SDaniel C. Sobral }
20546049d9f0SDaniel C. Sobral 
20556049d9f0SDaniel C. Sobral /*
205658f0484fSRodney W. Grimes  - pluscount - count + nesting
20578fb3f3f6SDavid E. O'Brien  == static sopno pluscount(struct parse *p, struct re_guts *g);
205858f0484fSRodney W. Grimes  */
205958f0484fSRodney W. Grimes static sopno			/* nesting depth */
206058f0484fSRodney W. Grimes pluscount(p, g)
206158f0484fSRodney W. Grimes struct parse *p;
20628fb3f3f6SDavid E. O'Brien struct re_guts *g;
206358f0484fSRodney W. Grimes {
20648fb3f3f6SDavid E. O'Brien 	sop *scan;
20658fb3f3f6SDavid E. O'Brien 	sop s;
20668fb3f3f6SDavid E. O'Brien 	sopno plusnest = 0;
20678fb3f3f6SDavid E. O'Brien 	sopno maxnest = 0;
206858f0484fSRodney W. Grimes 
206958f0484fSRodney W. Grimes 	if (p->error != 0)
207058f0484fSRodney W. Grimes 		return(0);	/* there may not be an OEND */
207158f0484fSRodney W. Grimes 
207258f0484fSRodney W. Grimes 	scan = g->strip + 1;
207358f0484fSRodney W. Grimes 	do {
207458f0484fSRodney W. Grimes 		s = *scan++;
207558f0484fSRodney W. Grimes 		switch (OP(s)) {
207658f0484fSRodney W. Grimes 		case OPLUS_:
207758f0484fSRodney W. Grimes 			plusnest++;
207858f0484fSRodney W. Grimes 			break;
207958f0484fSRodney W. Grimes 		case O_PLUS:
208058f0484fSRodney W. Grimes 			if (plusnest > maxnest)
208158f0484fSRodney W. Grimes 				maxnest = plusnest;
208258f0484fSRodney W. Grimes 			plusnest--;
208358f0484fSRodney W. Grimes 			break;
208458f0484fSRodney W. Grimes 		}
208558f0484fSRodney W. Grimes 	} while (OP(s) != OEND);
208658f0484fSRodney W. Grimes 	if (plusnest != 0)
208758f0484fSRodney W. Grimes 		g->iflags |= BAD;
208858f0484fSRodney W. Grimes 	return(maxnest);
208958f0484fSRodney W. Grimes }
2090