xref: /freebsd/lib/libc/regex/regcomp.c (revision 51295a4d3e4c551df85249433c490208dc7fd23d)
158f0484fSRodney W. Grimes /*-
258f0484fSRodney W. Grimes  * Copyright (c) 1992, 1993, 1994 Henry Spencer.
358f0484fSRodney W. Grimes  * Copyright (c) 1992, 1993, 1994
458f0484fSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
558f0484fSRodney W. Grimes  *
658f0484fSRodney W. Grimes  * This code is derived from software contributed to Berkeley by
758f0484fSRodney W. Grimes  * Henry Spencer.
858f0484fSRodney W. Grimes  *
958f0484fSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
1058f0484fSRodney W. Grimes  * modification, are permitted provided that the following conditions
1158f0484fSRodney W. Grimes  * are met:
1258f0484fSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
1358f0484fSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
1458f0484fSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
1558f0484fSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
1658f0484fSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
1758f0484fSRodney W. Grimes  * 3. All advertising materials mentioning features or use of this software
1858f0484fSRodney W. Grimes  *    must display the following acknowledgement:
1958f0484fSRodney W. Grimes  *	This product includes software developed by the University of
2058f0484fSRodney W. Grimes  *	California, Berkeley and its contributors.
2158f0484fSRodney W. Grimes  * 4. Neither the name of the University nor the names of its contributors
2258f0484fSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
2358f0484fSRodney W. Grimes  *    without specific prior written permission.
2458f0484fSRodney W. Grimes  *
2558f0484fSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2658f0484fSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2758f0484fSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2858f0484fSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
2958f0484fSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
3058f0484fSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
3158f0484fSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
3258f0484fSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
3358f0484fSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3458f0484fSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3558f0484fSRodney W. Grimes  * SUCH DAMAGE.
3658f0484fSRodney W. Grimes  *
3758f0484fSRodney W. Grimes  *	@(#)regcomp.c	8.5 (Berkeley) 3/20/94
3858f0484fSRodney W. Grimes  */
3958f0484fSRodney W. Grimes 
4058f0484fSRodney W. Grimes #if defined(LIBC_SCCS) && !defined(lint)
4158f0484fSRodney W. Grimes static char sccsid[] = "@(#)regcomp.c	8.5 (Berkeley) 3/20/94";
4258f0484fSRodney W. Grimes #endif /* LIBC_SCCS and not lint */
4358f0484fSRodney W. Grimes 
4458f0484fSRodney W. Grimes #include <sys/types.h>
4558f0484fSRodney W. Grimes #include <stdio.h>
4658f0484fSRodney W. Grimes #include <string.h>
4758f0484fSRodney W. Grimes #include <ctype.h>
4858f0484fSRodney W. Grimes #include <limits.h>
4958f0484fSRodney W. Grimes #include <stdlib.h>
5058f0484fSRodney W. Grimes #include <regex.h>
5158f0484fSRodney W. Grimes 
5258f0484fSRodney W. Grimes #include "utils.h"
5358f0484fSRodney W. Grimes #include "regex2.h"
5458f0484fSRodney W. Grimes 
5558f0484fSRodney W. Grimes #include "cclass.h"
5658f0484fSRodney W. Grimes #include "cname.h"
5758f0484fSRodney W. Grimes 
5858f0484fSRodney W. Grimes /*
5958f0484fSRodney W. Grimes  * parse structure, passed up and down to avoid global variables and
6058f0484fSRodney W. Grimes  * other clumsinesses
6158f0484fSRodney W. Grimes  */
6258f0484fSRodney W. Grimes struct parse {
6358f0484fSRodney W. Grimes 	char *next;		/* next character in RE */
6458f0484fSRodney W. Grimes 	char *end;		/* end of string (-> NUL normally) */
6558f0484fSRodney W. Grimes 	int error;		/* has an error been seen? */
6658f0484fSRodney W. Grimes 	sop *strip;		/* malloced strip */
6758f0484fSRodney W. Grimes 	sopno ssize;		/* malloced strip size (allocated) */
6858f0484fSRodney W. Grimes 	sopno slen;		/* malloced strip length (used) */
6958f0484fSRodney W. Grimes 	int ncsalloc;		/* number of csets allocated */
7058f0484fSRodney W. Grimes 	struct re_guts *g;
7158f0484fSRodney W. Grimes #	define	NPAREN	10	/* we need to remember () 1-9 for back refs */
7258f0484fSRodney W. Grimes 	sopno pbegin[NPAREN];	/* -> ( ([0] unused) */
7358f0484fSRodney W. Grimes 	sopno pend[NPAREN];	/* -> ) ([0] unused) */
7458f0484fSRodney W. Grimes };
7558f0484fSRodney W. Grimes 
7658f0484fSRodney W. Grimes /* ========= begin header generated by ./mkh ========= */
7758f0484fSRodney W. Grimes #ifdef __cplusplus
7858f0484fSRodney W. Grimes extern "C" {
7958f0484fSRodney W. Grimes #endif
8058f0484fSRodney W. Grimes 
8158f0484fSRodney W. Grimes /* === regcomp.c === */
8258f0484fSRodney W. Grimes static void p_ere __P((struct parse *p, int stop));
8358f0484fSRodney W. Grimes static void p_ere_exp __P((struct parse *p));
8458f0484fSRodney W. Grimes static void p_str __P((struct parse *p));
8558f0484fSRodney W. Grimes static void p_bre __P((struct parse *p, int end1, int end2));
8658f0484fSRodney W. Grimes static int p_simp_re __P((struct parse *p, int starordinary));
8758f0484fSRodney W. Grimes static int p_count __P((struct parse *p));
8858f0484fSRodney W. Grimes static void p_bracket __P((struct parse *p));
8958f0484fSRodney W. Grimes static void p_b_term __P((struct parse *p, cset *cs));
9058f0484fSRodney W. Grimes static void p_b_cclass __P((struct parse *p, cset *cs));
9158f0484fSRodney W. Grimes static void p_b_eclass __P((struct parse *p, cset *cs));
9258f0484fSRodney W. Grimes static char p_b_symbol __P((struct parse *p));
9358f0484fSRodney W. Grimes static char p_b_coll_elem __P((struct parse *p, int endc));
9458f0484fSRodney W. Grimes static char othercase __P((int ch));
9558f0484fSRodney W. Grimes static void bothcases __P((struct parse *p, int ch));
9658f0484fSRodney W. Grimes static void ordinary __P((struct parse *p, int ch));
9758f0484fSRodney W. Grimes static void nonnewline __P((struct parse *p));
9858f0484fSRodney W. Grimes static void repeat __P((struct parse *p, sopno start, int from, int to));
9958f0484fSRodney W. Grimes static int seterr __P((struct parse *p, int e));
10058f0484fSRodney W. Grimes static cset *allocset __P((struct parse *p));
10158f0484fSRodney W. Grimes static void freeset __P((struct parse *p, cset *cs));
10258f0484fSRodney W. Grimes static int freezeset __P((struct parse *p, cset *cs));
10358f0484fSRodney W. Grimes static int firstch __P((struct parse *p, cset *cs));
10458f0484fSRodney W. Grimes static int nch __P((struct parse *p, cset *cs));
10558f0484fSRodney W. Grimes static void mcadd __P((struct parse *p, cset *cs, char *cp));
10651295a4dSJordan K. Hubbard #if used
10758f0484fSRodney W. Grimes static void mcsub __P((cset *cs, char *cp));
10858f0484fSRodney W. Grimes static int mcin __P((cset *cs, char *cp));
10958f0484fSRodney W. Grimes static char *mcfind __P((cset *cs, char *cp));
11051295a4dSJordan K. Hubbard #endif
11158f0484fSRodney W. Grimes static void mcinvert __P((struct parse *p, cset *cs));
11258f0484fSRodney W. Grimes static void mccase __P((struct parse *p, cset *cs));
11358f0484fSRodney W. Grimes static int isinsets __P((struct re_guts *g, int c));
11458f0484fSRodney W. Grimes static int samesets __P((struct re_guts *g, int c1, int c2));
11558f0484fSRodney W. Grimes static void categorize __P((struct parse *p, struct re_guts *g));
11658f0484fSRodney W. Grimes static sopno dupl __P((struct parse *p, sopno start, sopno finish));
11758f0484fSRodney W. Grimes static void doemit __P((struct parse *p, sop op, size_t opnd));
11858f0484fSRodney W. Grimes static void doinsert __P((struct parse *p, sop op, size_t opnd, sopno pos));
11958f0484fSRodney W. Grimes static void dofwd __P((struct parse *p, sopno pos, sop value));
12058f0484fSRodney W. Grimes static void enlarge __P((struct parse *p, sopno size));
12158f0484fSRodney W. Grimes static void stripsnug __P((struct parse *p, struct re_guts *g));
12258f0484fSRodney W. Grimes static void findmust __P((struct parse *p, struct re_guts *g));
12358f0484fSRodney W. Grimes static sopno pluscount __P((struct parse *p, struct re_guts *g));
12458f0484fSRodney W. Grimes 
12558f0484fSRodney W. Grimes #ifdef __cplusplus
12658f0484fSRodney W. Grimes }
12758f0484fSRodney W. Grimes #endif
12858f0484fSRodney W. Grimes /* ========= end header generated by ./mkh ========= */
12958f0484fSRodney W. Grimes 
13058f0484fSRodney W. Grimes static char nuls[10];		/* place to point scanner in event of error */
13158f0484fSRodney W. Grimes 
13258f0484fSRodney W. Grimes /*
13358f0484fSRodney W. Grimes  * macros for use with parse structure
13458f0484fSRodney W. Grimes  * BEWARE:  these know that the parse structure is named `p' !!!
13558f0484fSRodney W. Grimes  */
13658f0484fSRodney W. Grimes #define	PEEK()	(*p->next)
13758f0484fSRodney W. Grimes #define	PEEK2()	(*(p->next+1))
13858f0484fSRodney W. Grimes #define	MORE()	(p->next < p->end)
13958f0484fSRodney W. Grimes #define	MORE2()	(p->next+1 < p->end)
14058f0484fSRodney W. Grimes #define	SEE(c)	(MORE() && PEEK() == (c))
14158f0484fSRodney W. Grimes #define	SEETWO(a, b)	(MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
14258f0484fSRodney W. Grimes #define	EAT(c)	((SEE(c)) ? (NEXT(), 1) : 0)
14358f0484fSRodney W. Grimes #define	EATTWO(a, b)	((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
14458f0484fSRodney W. Grimes #define	NEXT()	(p->next++)
14558f0484fSRodney W. Grimes #define	NEXT2()	(p->next += 2)
14658f0484fSRodney W. Grimes #define	NEXTn(n)	(p->next += (n))
14758f0484fSRodney W. Grimes #define	GETNEXT()	(*p->next++)
14858f0484fSRodney W. Grimes #define	SETERROR(e)	seterr(p, (e))
14958f0484fSRodney W. Grimes #define	REQUIRE(co, e)	((co) || SETERROR(e))
15058f0484fSRodney W. Grimes #define	MUSTSEE(c, e)	(REQUIRE(MORE() && PEEK() == (c), e))
15158f0484fSRodney W. Grimes #define	MUSTEAT(c, e)	(REQUIRE(MORE() && GETNEXT() == (c), e))
15258f0484fSRodney W. Grimes #define	MUSTNOTSEE(c, e)	(REQUIRE(!MORE() || PEEK() != (c), e))
15358f0484fSRodney W. Grimes #define	EMIT(op, sopnd)	doemit(p, (sop)(op), (size_t)(sopnd))
15458f0484fSRodney W. Grimes #define	INSERT(op, pos)	doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
15558f0484fSRodney W. Grimes #define	AHEAD(pos)		dofwd(p, pos, HERE()-(pos))
15658f0484fSRodney W. Grimes #define	ASTERN(sop, pos)	EMIT(sop, HERE()-pos)
15758f0484fSRodney W. Grimes #define	HERE()		(p->slen)
15858f0484fSRodney W. Grimes #define	THERE()		(p->slen - 1)
15958f0484fSRodney W. Grimes #define	THERETHERE()	(p->slen - 2)
16058f0484fSRodney W. Grimes #define	DROP(n)	(p->slen -= (n))
16158f0484fSRodney W. Grimes 
16258f0484fSRodney W. Grimes #ifndef NDEBUG
16358f0484fSRodney W. Grimes static int never = 0;		/* for use in asserts; shuts lint up */
16458f0484fSRodney W. Grimes #else
16558f0484fSRodney W. Grimes #define	never	0		/* some <assert.h>s have bugs too */
16658f0484fSRodney W. Grimes #endif
16758f0484fSRodney W. Grimes 
16858f0484fSRodney W. Grimes /*
16958f0484fSRodney W. Grimes  - regcomp - interface for parser and compilation
17058f0484fSRodney W. Grimes  = extern int regcomp(regex_t *, const char *, int);
17158f0484fSRodney W. Grimes  = #define	REG_BASIC	0000
17258f0484fSRodney W. Grimes  = #define	REG_EXTENDED	0001
17358f0484fSRodney W. Grimes  = #define	REG_ICASE	0002
17458f0484fSRodney W. Grimes  = #define	REG_NOSUB	0004
17558f0484fSRodney W. Grimes  = #define	REG_NEWLINE	0010
17658f0484fSRodney W. Grimes  = #define	REG_NOSPEC	0020
17758f0484fSRodney W. Grimes  = #define	REG_PEND	0040
17858f0484fSRodney W. Grimes  = #define	REG_DUMP	0200
17958f0484fSRodney W. Grimes  */
18058f0484fSRodney W. Grimes int				/* 0 success, otherwise REG_something */
18158f0484fSRodney W. Grimes regcomp(preg, pattern, cflags)
18258f0484fSRodney W. Grimes regex_t *preg;
18358f0484fSRodney W. Grimes const char *pattern;
18458f0484fSRodney W. Grimes int cflags;
18558f0484fSRodney W. Grimes {
18658f0484fSRodney W. Grimes 	struct parse pa;
18758f0484fSRodney W. Grimes 	register struct re_guts *g;
18858f0484fSRodney W. Grimes 	register struct parse *p = &pa;
18958f0484fSRodney W. Grimes 	register int i;
19058f0484fSRodney W. Grimes 	register size_t len;
19158f0484fSRodney W. Grimes #ifdef REDEBUG
19258f0484fSRodney W. Grimes #	define	GOODFLAGS(f)	(f)
19358f0484fSRodney W. Grimes #else
19458f0484fSRodney W. Grimes #	define	GOODFLAGS(f)	((f)&~REG_DUMP)
19558f0484fSRodney W. Grimes #endif
19658f0484fSRodney W. Grimes 
19758f0484fSRodney W. Grimes 	cflags = GOODFLAGS(cflags);
19858f0484fSRodney W. Grimes 	if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
19958f0484fSRodney W. Grimes 		return(REG_INVARG);
20058f0484fSRodney W. Grimes 
20158f0484fSRodney W. Grimes 	if (cflags&REG_PEND) {
20258f0484fSRodney W. Grimes 		if (preg->re_endp < pattern)
20358f0484fSRodney W. Grimes 			return(REG_INVARG);
20458f0484fSRodney W. Grimes 		len = preg->re_endp - pattern;
20558f0484fSRodney W. Grimes 	} else
20658f0484fSRodney W. Grimes 		len = strlen((char *)pattern);
20758f0484fSRodney W. Grimes 
20858f0484fSRodney W. Grimes 	/* do the mallocs early so failure handling is easy */
20958f0484fSRodney W. Grimes 	g = (struct re_guts *)malloc(sizeof(struct re_guts) +
21058f0484fSRodney W. Grimes 							(NC-1)*sizeof(cat_t));
21158f0484fSRodney W. Grimes 	if (g == NULL)
21258f0484fSRodney W. Grimes 		return(REG_ESPACE);
21358f0484fSRodney W. Grimes 	p->ssize = len/(size_t)2*(size_t)3 + (size_t)1;	/* ugh */
21458f0484fSRodney W. Grimes 	p->strip = (sop *)malloc(p->ssize * sizeof(sop));
21558f0484fSRodney W. Grimes 	p->slen = 0;
21658f0484fSRodney W. Grimes 	if (p->strip == NULL) {
21758f0484fSRodney W. Grimes 		free((char *)g);
21858f0484fSRodney W. Grimes 		return(REG_ESPACE);
21958f0484fSRodney W. Grimes 	}
22058f0484fSRodney W. Grimes 
22158f0484fSRodney W. Grimes 	/* set things up */
22258f0484fSRodney W. Grimes 	p->g = g;
22358f0484fSRodney W. Grimes 	p->next = (char *)pattern;	/* convenience; we do not modify it */
22458f0484fSRodney W. Grimes 	p->end = p->next + len;
22558f0484fSRodney W. Grimes 	p->error = 0;
22658f0484fSRodney W. Grimes 	p->ncsalloc = 0;
22758f0484fSRodney W. Grimes 	for (i = 0; i < NPAREN; i++) {
22858f0484fSRodney W. Grimes 		p->pbegin[i] = 0;
22958f0484fSRodney W. Grimes 		p->pend[i] = 0;
23058f0484fSRodney W. Grimes 	}
23158f0484fSRodney W. Grimes 	g->csetsize = NC;
23258f0484fSRodney W. Grimes 	g->sets = NULL;
23358f0484fSRodney W. Grimes 	g->setbits = NULL;
23458f0484fSRodney W. Grimes 	g->ncsets = 0;
23558f0484fSRodney W. Grimes 	g->cflags = cflags;
23658f0484fSRodney W. Grimes 	g->iflags = 0;
23758f0484fSRodney W. Grimes 	g->nbol = 0;
23858f0484fSRodney W. Grimes 	g->neol = 0;
23958f0484fSRodney W. Grimes 	g->must = NULL;
24058f0484fSRodney W. Grimes 	g->mlen = 0;
24158f0484fSRodney W. Grimes 	g->nsub = 0;
24258f0484fSRodney W. Grimes 	g->ncategories = 1;	/* category 0 is "everything else" */
24358f0484fSRodney W. Grimes 	g->categories = &g->catspace[-(CHAR_MIN)];
24458f0484fSRodney W. Grimes 	(void) memset((char *)g->catspace, 0, NC*sizeof(cat_t));
24558f0484fSRodney W. Grimes 	g->backrefs = 0;
24658f0484fSRodney W. Grimes 
24758f0484fSRodney W. Grimes 	/* do it */
24858f0484fSRodney W. Grimes 	EMIT(OEND, 0);
24958f0484fSRodney W. Grimes 	g->firststate = THERE();
25058f0484fSRodney W. Grimes 	if (cflags&REG_EXTENDED)
25158f0484fSRodney W. Grimes 		p_ere(p, OUT);
25258f0484fSRodney W. Grimes 	else if (cflags&REG_NOSPEC)
25358f0484fSRodney W. Grimes 		p_str(p);
25458f0484fSRodney W. Grimes 	else
25558f0484fSRodney W. Grimes 		p_bre(p, OUT, OUT);
25658f0484fSRodney W. Grimes 	EMIT(OEND, 0);
25758f0484fSRodney W. Grimes 	g->laststate = THERE();
25858f0484fSRodney W. Grimes 
25958f0484fSRodney W. Grimes 	/* tidy up loose ends and fill things in */
26058f0484fSRodney W. Grimes 	categorize(p, g);
26158f0484fSRodney W. Grimes 	stripsnug(p, g);
26258f0484fSRodney W. Grimes 	findmust(p, g);
26358f0484fSRodney W. Grimes 	g->nplus = pluscount(p, g);
26458f0484fSRodney W. Grimes 	g->magic = MAGIC2;
26558f0484fSRodney W. Grimes 	preg->re_nsub = g->nsub;
26658f0484fSRodney W. Grimes 	preg->re_g = g;
26758f0484fSRodney W. Grimes 	preg->re_magic = MAGIC1;
26858f0484fSRodney W. Grimes #ifndef REDEBUG
26958f0484fSRodney W. Grimes 	/* not debugging, so can't rely on the assert() in regexec() */
27058f0484fSRodney W. Grimes 	if (g->iflags&BAD)
27158f0484fSRodney W. Grimes 		SETERROR(REG_ASSERT);
27258f0484fSRodney W. Grimes #endif
27358f0484fSRodney W. Grimes 
27458f0484fSRodney W. Grimes 	/* win or lose, we're done */
27558f0484fSRodney W. Grimes 	if (p->error != 0)	/* lose */
27658f0484fSRodney W. Grimes 		regfree(preg);
27758f0484fSRodney W. Grimes 	return(p->error);
27858f0484fSRodney W. Grimes }
27958f0484fSRodney W. Grimes 
28058f0484fSRodney W. Grimes /*
28158f0484fSRodney W. Grimes  - p_ere - ERE parser top level, concatenation and alternation
28258f0484fSRodney W. Grimes  == static void p_ere(register struct parse *p, int stop);
28358f0484fSRodney W. Grimes  */
28458f0484fSRodney W. Grimes static void
28558f0484fSRodney W. Grimes p_ere(p, stop)
28658f0484fSRodney W. Grimes register struct parse *p;
28758f0484fSRodney W. Grimes int stop;			/* character this ERE should end at */
28858f0484fSRodney W. Grimes {
28958f0484fSRodney W. Grimes 	register char c;
29058f0484fSRodney W. Grimes 	register sopno prevback;
29158f0484fSRodney W. Grimes 	register sopno prevfwd;
29258f0484fSRodney W. Grimes 	register sopno conc;
29358f0484fSRodney W. Grimes 	register int first = 1;		/* is this the first alternative? */
29458f0484fSRodney W. Grimes 
29558f0484fSRodney W. Grimes 	for (;;) {
29658f0484fSRodney W. Grimes 		/* do a bunch of concatenated expressions */
29758f0484fSRodney W. Grimes 		conc = HERE();
29858f0484fSRodney W. Grimes 		while (MORE() && (c = PEEK()) != '|' && c != stop)
29958f0484fSRodney W. Grimes 			p_ere_exp(p);
30051295a4dSJordan K. Hubbard 		(void)REQUIRE(HERE() != conc, REG_EMPTY);	/* require nonempty */
30158f0484fSRodney W. Grimes 
30258f0484fSRodney W. Grimes 		if (!EAT('|'))
30358f0484fSRodney W. Grimes 			break;		/* NOTE BREAK OUT */
30458f0484fSRodney W. Grimes 
30558f0484fSRodney W. Grimes 		if (first) {
30658f0484fSRodney W. Grimes 			INSERT(OCH_, conc);	/* offset is wrong */
30758f0484fSRodney W. Grimes 			prevfwd = conc;
30858f0484fSRodney W. Grimes 			prevback = conc;
30958f0484fSRodney W. Grimes 			first = 0;
31058f0484fSRodney W. Grimes 		}
31158f0484fSRodney W. Grimes 		ASTERN(OOR1, prevback);
31258f0484fSRodney W. Grimes 		prevback = THERE();
31358f0484fSRodney W. Grimes 		AHEAD(prevfwd);			/* fix previous offset */
31458f0484fSRodney W. Grimes 		prevfwd = HERE();
31558f0484fSRodney W. Grimes 		EMIT(OOR2, 0);			/* offset is very wrong */
31658f0484fSRodney W. Grimes 	}
31758f0484fSRodney W. Grimes 
31858f0484fSRodney W. Grimes 	if (!first) {		/* tail-end fixups */
31958f0484fSRodney W. Grimes 		AHEAD(prevfwd);
32058f0484fSRodney W. Grimes 		ASTERN(O_CH, prevback);
32158f0484fSRodney W. Grimes 	}
32258f0484fSRodney W. Grimes 
32358f0484fSRodney W. Grimes 	assert(!MORE() || SEE(stop));
32458f0484fSRodney W. Grimes }
32558f0484fSRodney W. Grimes 
32658f0484fSRodney W. Grimes /*
32758f0484fSRodney W. Grimes  - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
32858f0484fSRodney W. Grimes  == static void p_ere_exp(register struct parse *p);
32958f0484fSRodney W. Grimes  */
33058f0484fSRodney W. Grimes static void
33158f0484fSRodney W. Grimes p_ere_exp(p)
33258f0484fSRodney W. Grimes register struct parse *p;
33358f0484fSRodney W. Grimes {
33458f0484fSRodney W. Grimes 	register char c;
33558f0484fSRodney W. Grimes 	register sopno pos;
33658f0484fSRodney W. Grimes 	register int count;
33758f0484fSRodney W. Grimes 	register int count2;
33858f0484fSRodney W. Grimes 	register sopno subno;
33958f0484fSRodney W. Grimes 	int wascaret = 0;
34058f0484fSRodney W. Grimes 
34158f0484fSRodney W. Grimes 	assert(MORE());		/* caller should have ensured this */
34258f0484fSRodney W. Grimes 	c = GETNEXT();
34358f0484fSRodney W. Grimes 
34458f0484fSRodney W. Grimes 	pos = HERE();
34558f0484fSRodney W. Grimes 	switch (c) {
34658f0484fSRodney W. Grimes 	case '(':
34751295a4dSJordan K. Hubbard 		(void)REQUIRE(MORE(), REG_EPAREN);
34858f0484fSRodney W. Grimes 		p->g->nsub++;
34958f0484fSRodney W. Grimes 		subno = p->g->nsub;
35058f0484fSRodney W. Grimes 		if (subno < NPAREN)
35158f0484fSRodney W. Grimes 			p->pbegin[subno] = HERE();
35258f0484fSRodney W. Grimes 		EMIT(OLPAREN, subno);
35358f0484fSRodney W. Grimes 		if (!SEE(')'))
35458f0484fSRodney W. Grimes 			p_ere(p, ')');
35558f0484fSRodney W. Grimes 		if (subno < NPAREN) {
35658f0484fSRodney W. Grimes 			p->pend[subno] = HERE();
35758f0484fSRodney W. Grimes 			assert(p->pend[subno] != 0);
35858f0484fSRodney W. Grimes 		}
35958f0484fSRodney W. Grimes 		EMIT(ORPAREN, subno);
36051295a4dSJordan K. Hubbard 		(void)MUSTEAT(')', REG_EPAREN);
36158f0484fSRodney W. Grimes 		break;
36258f0484fSRodney W. Grimes #ifndef POSIX_MISTAKE
36358f0484fSRodney W. Grimes 	case ')':		/* happens only if no current unmatched ( */
36458f0484fSRodney W. Grimes 		/*
36558f0484fSRodney W. Grimes 		 * You may ask, why the ifndef?  Because I didn't notice
36658f0484fSRodney W. Grimes 		 * this until slightly too late for 1003.2, and none of the
36758f0484fSRodney W. Grimes 		 * other 1003.2 regular-expression reviewers noticed it at
36858f0484fSRodney W. Grimes 		 * all.  So an unmatched ) is legal POSIX, at least until
36958f0484fSRodney W. Grimes 		 * we can get it fixed.
37058f0484fSRodney W. Grimes 		 */
37158f0484fSRodney W. Grimes 		SETERROR(REG_EPAREN);
37258f0484fSRodney W. Grimes 		break;
37358f0484fSRodney W. Grimes #endif
37458f0484fSRodney W. Grimes 	case '^':
37558f0484fSRodney W. Grimes 		EMIT(OBOL, 0);
37658f0484fSRodney W. Grimes 		p->g->iflags |= USEBOL;
37758f0484fSRodney W. Grimes 		p->g->nbol++;
37858f0484fSRodney W. Grimes 		wascaret = 1;
37958f0484fSRodney W. Grimes 		break;
38058f0484fSRodney W. Grimes 	case '$':
38158f0484fSRodney W. Grimes 		EMIT(OEOL, 0);
38258f0484fSRodney W. Grimes 		p->g->iflags |= USEEOL;
38358f0484fSRodney W. Grimes 		p->g->neol++;
38458f0484fSRodney W. Grimes 		break;
38558f0484fSRodney W. Grimes 	case '|':
38658f0484fSRodney W. Grimes 		SETERROR(REG_EMPTY);
38758f0484fSRodney W. Grimes 		break;
38858f0484fSRodney W. Grimes 	case '*':
38958f0484fSRodney W. Grimes 	case '+':
39058f0484fSRodney W. Grimes 	case '?':
39158f0484fSRodney W. Grimes 		SETERROR(REG_BADRPT);
39258f0484fSRodney W. Grimes 		break;
39358f0484fSRodney W. Grimes 	case '.':
39458f0484fSRodney W. Grimes 		if (p->g->cflags&REG_NEWLINE)
39558f0484fSRodney W. Grimes 			nonnewline(p);
39658f0484fSRodney W. Grimes 		else
39758f0484fSRodney W. Grimes 			EMIT(OANY, 0);
39858f0484fSRodney W. Grimes 		break;
39958f0484fSRodney W. Grimes 	case '[':
40058f0484fSRodney W. Grimes 		p_bracket(p);
40158f0484fSRodney W. Grimes 		break;
40258f0484fSRodney W. Grimes 	case '\\':
40351295a4dSJordan K. Hubbard 		(void)REQUIRE(MORE(), REG_EESCAPE);
40458f0484fSRodney W. Grimes 		c = GETNEXT();
40558f0484fSRodney W. Grimes 		ordinary(p, c);
40658f0484fSRodney W. Grimes 		break;
40758f0484fSRodney W. Grimes 	case '{':		/* okay as ordinary except if digit follows */
40851295a4dSJordan K. Hubbard 		(void)REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT);
40958f0484fSRodney W. Grimes 		/* FALLTHROUGH */
41058f0484fSRodney W. Grimes 	default:
41158f0484fSRodney W. Grimes 		ordinary(p, c);
41258f0484fSRodney W. Grimes 		break;
41358f0484fSRodney W. Grimes 	}
41458f0484fSRodney W. Grimes 
41558f0484fSRodney W. Grimes 	if (!MORE())
41658f0484fSRodney W. Grimes 		return;
41758f0484fSRodney W. Grimes 	c = PEEK();
41858f0484fSRodney W. Grimes 	/* we call { a repetition if followed by a digit */
41958f0484fSRodney W. Grimes 	if (!( c == '*' || c == '+' || c == '?' ||
42058f0484fSRodney W. Grimes 				(c == '{' && MORE2() && isdigit(PEEK2())) ))
42158f0484fSRodney W. Grimes 		return;		/* no repetition, we're done */
42258f0484fSRodney W. Grimes 	NEXT();
42358f0484fSRodney W. Grimes 
42451295a4dSJordan K. Hubbard 	(void)REQUIRE(!wascaret, REG_BADRPT);
42558f0484fSRodney W. Grimes 	switch (c) {
42658f0484fSRodney W. Grimes 	case '*':	/* implemented as +? */
42758f0484fSRodney W. Grimes 		/* this case does not require the (y|) trick, noKLUDGE */
42858f0484fSRodney W. Grimes 		INSERT(OPLUS_, pos);
42958f0484fSRodney W. Grimes 		ASTERN(O_PLUS, pos);
43058f0484fSRodney W. Grimes 		INSERT(OQUEST_, pos);
43158f0484fSRodney W. Grimes 		ASTERN(O_QUEST, pos);
43258f0484fSRodney W. Grimes 		break;
43358f0484fSRodney W. Grimes 	case '+':
43458f0484fSRodney W. Grimes 		INSERT(OPLUS_, pos);
43558f0484fSRodney W. Grimes 		ASTERN(O_PLUS, pos);
43658f0484fSRodney W. Grimes 		break;
43758f0484fSRodney W. Grimes 	case '?':
43858f0484fSRodney W. Grimes 		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
43958f0484fSRodney W. Grimes 		INSERT(OCH_, pos);		/* offset slightly wrong */
44058f0484fSRodney W. Grimes 		ASTERN(OOR1, pos);		/* this one's right */
44158f0484fSRodney W. Grimes 		AHEAD(pos);			/* fix the OCH_ */
44258f0484fSRodney W. Grimes 		EMIT(OOR2, 0);			/* offset very wrong... */
44358f0484fSRodney W. Grimes 		AHEAD(THERE());			/* ...so fix it */
44458f0484fSRodney W. Grimes 		ASTERN(O_CH, THERETHERE());
44558f0484fSRodney W. Grimes 		break;
44658f0484fSRodney W. Grimes 	case '{':
44758f0484fSRodney W. Grimes 		count = p_count(p);
44858f0484fSRodney W. Grimes 		if (EAT(',')) {
44958f0484fSRodney W. Grimes 			if (isdigit(PEEK())) {
45058f0484fSRodney W. Grimes 				count2 = p_count(p);
45151295a4dSJordan K. Hubbard 				(void)REQUIRE(count <= count2, REG_BADBR);
45258f0484fSRodney W. Grimes 			} else		/* single number with comma */
45358f0484fSRodney W. Grimes 				count2 = INFINITY;
45458f0484fSRodney W. Grimes 		} else		/* just a single number */
45558f0484fSRodney W. Grimes 			count2 = count;
45658f0484fSRodney W. Grimes 		repeat(p, pos, count, count2);
45758f0484fSRodney W. Grimes 		if (!EAT('}')) {	/* error heuristics */
45858f0484fSRodney W. Grimes 			while (MORE() && PEEK() != '}')
45958f0484fSRodney W. Grimes 				NEXT();
46051295a4dSJordan K. Hubbard 			(void)REQUIRE(MORE(), REG_EBRACE);
46158f0484fSRodney W. Grimes 			SETERROR(REG_BADBR);
46258f0484fSRodney W. Grimes 		}
46358f0484fSRodney W. Grimes 		break;
46458f0484fSRodney W. Grimes 	}
46558f0484fSRodney W. Grimes 
46658f0484fSRodney W. Grimes 	if (!MORE())
46758f0484fSRodney W. Grimes 		return;
46858f0484fSRodney W. Grimes 	c = PEEK();
46958f0484fSRodney W. Grimes 	if (!( c == '*' || c == '+' || c == '?' ||
47058f0484fSRodney W. Grimes 				(c == '{' && MORE2() && isdigit(PEEK2())) ) )
47158f0484fSRodney W. Grimes 		return;
47258f0484fSRodney W. Grimes 	SETERROR(REG_BADRPT);
47358f0484fSRodney W. Grimes }
47458f0484fSRodney W. Grimes 
47558f0484fSRodney W. Grimes /*
47658f0484fSRodney W. Grimes  - p_str - string (no metacharacters) "parser"
47758f0484fSRodney W. Grimes  == static void p_str(register struct parse *p);
47858f0484fSRodney W. Grimes  */
47958f0484fSRodney W. Grimes static void
48058f0484fSRodney W. Grimes p_str(p)
48158f0484fSRodney W. Grimes register struct parse *p;
48258f0484fSRodney W. Grimes {
48351295a4dSJordan K. Hubbard 	(void)REQUIRE(MORE(), REG_EMPTY);
48458f0484fSRodney W. Grimes 	while (MORE())
48558f0484fSRodney W. Grimes 		ordinary(p, GETNEXT());
48658f0484fSRodney W. Grimes }
48758f0484fSRodney W. Grimes 
48858f0484fSRodney W. Grimes /*
48958f0484fSRodney W. Grimes  - p_bre - BRE parser top level, anchoring and concatenation
49058f0484fSRodney W. Grimes  == static void p_bre(register struct parse *p, register int end1, \
49158f0484fSRodney W. Grimes  ==	register int end2);
49258f0484fSRodney W. Grimes  * Giving end1 as OUT essentially eliminates the end1/end2 check.
49358f0484fSRodney W. Grimes  *
49458f0484fSRodney W. Grimes  * This implementation is a bit of a kludge, in that a trailing $ is first
49558f0484fSRodney W. Grimes  * taken as an ordinary character and then revised to be an anchor.  The
49658f0484fSRodney W. Grimes  * only undesirable side effect is that '$' gets included as a character
49758f0484fSRodney W. Grimes  * category in such cases.  This is fairly harmless; not worth fixing.
49858f0484fSRodney W. Grimes  * The amount of lookahead needed to avoid this kludge is excessive.
49958f0484fSRodney W. Grimes  */
50058f0484fSRodney W. Grimes static void
50158f0484fSRodney W. Grimes p_bre(p, end1, end2)
50258f0484fSRodney W. Grimes register struct parse *p;
50358f0484fSRodney W. Grimes register int end1;		/* first terminating character */
50458f0484fSRodney W. Grimes register int end2;		/* second terminating character */
50558f0484fSRodney W. Grimes {
50658f0484fSRodney W. Grimes 	register sopno start = HERE();
50758f0484fSRodney W. Grimes 	register int first = 1;			/* first subexpression? */
50858f0484fSRodney W. Grimes 	register int wasdollar = 0;
50958f0484fSRodney W. Grimes 
51058f0484fSRodney W. Grimes 	if (EAT('^')) {
51158f0484fSRodney W. Grimes 		EMIT(OBOL, 0);
51258f0484fSRodney W. Grimes 		p->g->iflags |= USEBOL;
51358f0484fSRodney W. Grimes 		p->g->nbol++;
51458f0484fSRodney W. Grimes 	}
51558f0484fSRodney W. Grimes 	while (MORE() && !SEETWO(end1, end2)) {
51658f0484fSRodney W. Grimes 		wasdollar = p_simp_re(p, first);
51758f0484fSRodney W. Grimes 		first = 0;
51858f0484fSRodney W. Grimes 	}
51958f0484fSRodney W. Grimes 	if (wasdollar) {	/* oops, that was a trailing anchor */
52058f0484fSRodney W. Grimes 		DROP(1);
52158f0484fSRodney W. Grimes 		EMIT(OEOL, 0);
52258f0484fSRodney W. Grimes 		p->g->iflags |= USEEOL;
52358f0484fSRodney W. Grimes 		p->g->neol++;
52458f0484fSRodney W. Grimes 	}
52558f0484fSRodney W. Grimes 
52651295a4dSJordan K. Hubbard 	(void)REQUIRE(HERE() != start, REG_EMPTY);	/* require nonempty */
52758f0484fSRodney W. Grimes }
52858f0484fSRodney W. Grimes 
52958f0484fSRodney W. Grimes /*
53058f0484fSRodney W. Grimes  - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
53158f0484fSRodney W. Grimes  == static int p_simp_re(register struct parse *p, int starordinary);
53258f0484fSRodney W. Grimes  */
53358f0484fSRodney W. Grimes static int			/* was the simple RE an unbackslashed $? */
53458f0484fSRodney W. Grimes p_simp_re(p, starordinary)
53558f0484fSRodney W. Grimes register struct parse *p;
53658f0484fSRodney W. Grimes int starordinary;		/* is a leading * an ordinary character? */
53758f0484fSRodney W. Grimes {
53858f0484fSRodney W. Grimes 	register int c;
53958f0484fSRodney W. Grimes 	register int count;
54058f0484fSRodney W. Grimes 	register int count2;
54158f0484fSRodney W. Grimes 	register sopno pos;
54258f0484fSRodney W. Grimes 	register int i;
54358f0484fSRodney W. Grimes 	register sopno subno;
54458f0484fSRodney W. Grimes #	define	BACKSL	(1<<CHAR_BIT)
54558f0484fSRodney W. Grimes 
54658f0484fSRodney W. Grimes 	pos = HERE();		/* repetion op, if any, covers from here */
54758f0484fSRodney W. Grimes 
54858f0484fSRodney W. Grimes 	assert(MORE());		/* caller should have ensured this */
54958f0484fSRodney W. Grimes 	c = GETNEXT();
55058f0484fSRodney W. Grimes 	if (c == '\\') {
55151295a4dSJordan K. Hubbard 		(void)REQUIRE(MORE(), REG_EESCAPE);
55258f0484fSRodney W. Grimes 		c = BACKSL | (unsigned char)GETNEXT();
55358f0484fSRodney W. Grimes 	}
55458f0484fSRodney W. Grimes 	switch (c) {
55558f0484fSRodney W. Grimes 	case '.':
55658f0484fSRodney W. Grimes 		if (p->g->cflags&REG_NEWLINE)
55758f0484fSRodney W. Grimes 			nonnewline(p);
55858f0484fSRodney W. Grimes 		else
55958f0484fSRodney W. Grimes 			EMIT(OANY, 0);
56058f0484fSRodney W. Grimes 		break;
56158f0484fSRodney W. Grimes 	case '[':
56258f0484fSRodney W. Grimes 		p_bracket(p);
56358f0484fSRodney W. Grimes 		break;
56458f0484fSRodney W. Grimes 	case BACKSL|'{':
56558f0484fSRodney W. Grimes 		SETERROR(REG_BADRPT);
56658f0484fSRodney W. Grimes 		break;
56758f0484fSRodney W. Grimes 	case BACKSL|'(':
56858f0484fSRodney W. Grimes 		p->g->nsub++;
56958f0484fSRodney W. Grimes 		subno = p->g->nsub;
57058f0484fSRodney W. Grimes 		if (subno < NPAREN)
57158f0484fSRodney W. Grimes 			p->pbegin[subno] = HERE();
57258f0484fSRodney W. Grimes 		EMIT(OLPAREN, subno);
57358f0484fSRodney W. Grimes 		/* the MORE here is an error heuristic */
57458f0484fSRodney W. Grimes 		if (MORE() && !SEETWO('\\', ')'))
57558f0484fSRodney W. Grimes 			p_bre(p, '\\', ')');
57658f0484fSRodney W. Grimes 		if (subno < NPAREN) {
57758f0484fSRodney W. Grimes 			p->pend[subno] = HERE();
57858f0484fSRodney W. Grimes 			assert(p->pend[subno] != 0);
57958f0484fSRodney W. Grimes 		}
58058f0484fSRodney W. Grimes 		EMIT(ORPAREN, subno);
58151295a4dSJordan K. Hubbard 		(void)REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
58258f0484fSRodney W. Grimes 		break;
58358f0484fSRodney W. Grimes 	case BACKSL|')':	/* should not get here -- must be user */
58458f0484fSRodney W. Grimes 	case BACKSL|'}':
58558f0484fSRodney W. Grimes 		SETERROR(REG_EPAREN);
58658f0484fSRodney W. Grimes 		break;
58758f0484fSRodney W. Grimes 	case BACKSL|'1':
58858f0484fSRodney W. Grimes 	case BACKSL|'2':
58958f0484fSRodney W. Grimes 	case BACKSL|'3':
59058f0484fSRodney W. Grimes 	case BACKSL|'4':
59158f0484fSRodney W. Grimes 	case BACKSL|'5':
59258f0484fSRodney W. Grimes 	case BACKSL|'6':
59358f0484fSRodney W. Grimes 	case BACKSL|'7':
59458f0484fSRodney W. Grimes 	case BACKSL|'8':
59558f0484fSRodney W. Grimes 	case BACKSL|'9':
59658f0484fSRodney W. Grimes 		i = (c&~BACKSL) - '0';
59758f0484fSRodney W. Grimes 		assert(i < NPAREN);
59858f0484fSRodney W. Grimes 		if (p->pend[i] != 0) {
59958f0484fSRodney W. Grimes 			assert(i <= p->g->nsub);
60058f0484fSRodney W. Grimes 			EMIT(OBACK_, i);
60158f0484fSRodney W. Grimes 			assert(p->pbegin[i] != 0);
60258f0484fSRodney W. Grimes 			assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
60358f0484fSRodney W. Grimes 			assert(OP(p->strip[p->pend[i]]) == ORPAREN);
60458f0484fSRodney W. Grimes 			(void) dupl(p, p->pbegin[i]+1, p->pend[i]);
60558f0484fSRodney W. Grimes 			EMIT(O_BACK, i);
60658f0484fSRodney W. Grimes 		} else
60758f0484fSRodney W. Grimes 			SETERROR(REG_ESUBREG);
60858f0484fSRodney W. Grimes 		p->g->backrefs = 1;
60958f0484fSRodney W. Grimes 		break;
61058f0484fSRodney W. Grimes 	case '*':
61151295a4dSJordan K. Hubbard 		(void)REQUIRE(starordinary, REG_BADRPT);
61258f0484fSRodney W. Grimes 		/* FALLTHROUGH */
61358f0484fSRodney W. Grimes 	default:
61458f0484fSRodney W. Grimes 		ordinary(p, c &~ BACKSL);
61558f0484fSRodney W. Grimes 		break;
61658f0484fSRodney W. Grimes 	}
61758f0484fSRodney W. Grimes 
61858f0484fSRodney W. Grimes 	if (EAT('*')) {		/* implemented as +? */
61958f0484fSRodney W. Grimes 		/* this case does not require the (y|) trick, noKLUDGE */
62058f0484fSRodney W. Grimes 		INSERT(OPLUS_, pos);
62158f0484fSRodney W. Grimes 		ASTERN(O_PLUS, pos);
62258f0484fSRodney W. Grimes 		INSERT(OQUEST_, pos);
62358f0484fSRodney W. Grimes 		ASTERN(O_QUEST, pos);
62458f0484fSRodney W. Grimes 	} else if (EATTWO('\\', '{')) {
62558f0484fSRodney W. Grimes 		count = p_count(p);
62658f0484fSRodney W. Grimes 		if (EAT(',')) {
62758f0484fSRodney W. Grimes 			if (MORE() && isdigit(PEEK())) {
62858f0484fSRodney W. Grimes 				count2 = p_count(p);
62951295a4dSJordan K. Hubbard 				(void)REQUIRE(count <= count2, REG_BADBR);
63058f0484fSRodney W. Grimes 			} else		/* single number with comma */
63158f0484fSRodney W. Grimes 				count2 = INFINITY;
63258f0484fSRodney W. Grimes 		} else		/* just a single number */
63358f0484fSRodney W. Grimes 			count2 = count;
63458f0484fSRodney W. Grimes 		repeat(p, pos, count, count2);
63558f0484fSRodney W. Grimes 		if (!EATTWO('\\', '}')) {	/* error heuristics */
63658f0484fSRodney W. Grimes 			while (MORE() && !SEETWO('\\', '}'))
63758f0484fSRodney W. Grimes 				NEXT();
63851295a4dSJordan K. Hubbard 			(void)REQUIRE(MORE(), REG_EBRACE);
63958f0484fSRodney W. Grimes 			SETERROR(REG_BADBR);
64058f0484fSRodney W. Grimes 		}
64158f0484fSRodney W. Grimes 	} else if (c == (unsigned char)'$')	/* $ (but not \$) ends it */
64258f0484fSRodney W. Grimes 		return(1);
64358f0484fSRodney W. Grimes 
64458f0484fSRodney W. Grimes 	return(0);
64558f0484fSRodney W. Grimes }
64658f0484fSRodney W. Grimes 
64758f0484fSRodney W. Grimes /*
64858f0484fSRodney W. Grimes  - p_count - parse a repetition count
64958f0484fSRodney W. Grimes  == static int p_count(register struct parse *p);
65058f0484fSRodney W. Grimes  */
65158f0484fSRodney W. Grimes static int			/* the value */
65258f0484fSRodney W. Grimes p_count(p)
65358f0484fSRodney W. Grimes register struct parse *p;
65458f0484fSRodney W. Grimes {
65558f0484fSRodney W. Grimes 	register int count = 0;
65658f0484fSRodney W. Grimes 	register int ndigits = 0;
65758f0484fSRodney W. Grimes 
65858f0484fSRodney W. Grimes 	while (MORE() && isdigit(PEEK()) && count <= DUPMAX) {
65958f0484fSRodney W. Grimes 		count = count*10 + (GETNEXT() - '0');
66058f0484fSRodney W. Grimes 		ndigits++;
66158f0484fSRodney W. Grimes 	}
66258f0484fSRodney W. Grimes 
66351295a4dSJordan K. Hubbard 	(void)REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
66458f0484fSRodney W. Grimes 	return(count);
66558f0484fSRodney W. Grimes }
66658f0484fSRodney W. Grimes 
66758f0484fSRodney W. Grimes /*
66858f0484fSRodney W. Grimes  - p_bracket - parse a bracketed character list
66958f0484fSRodney W. Grimes  == static void p_bracket(register struct parse *p);
67058f0484fSRodney W. Grimes  *
67158f0484fSRodney W. Grimes  * Note a significant property of this code:  if the allocset() did SETERROR,
67258f0484fSRodney W. Grimes  * no set operations are done.
67358f0484fSRodney W. Grimes  */
67458f0484fSRodney W. Grimes static void
67558f0484fSRodney W. Grimes p_bracket(p)
67658f0484fSRodney W. Grimes register struct parse *p;
67758f0484fSRodney W. Grimes {
67858f0484fSRodney W. Grimes 	register cset *cs = allocset(p);
67958f0484fSRodney W. Grimes 	register int invert = 0;
68058f0484fSRodney W. Grimes 
68158f0484fSRodney W. Grimes 	/* Dept of Truly Sickening Special-Case Kludges */
68258f0484fSRodney W. Grimes 	if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
68358f0484fSRodney W. Grimes 		EMIT(OBOW, 0);
68458f0484fSRodney W. Grimes 		NEXTn(6);
68558f0484fSRodney W. Grimes 		return;
68658f0484fSRodney W. Grimes 	}
68758f0484fSRodney W. Grimes 	if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
68858f0484fSRodney W. Grimes 		EMIT(OEOW, 0);
68958f0484fSRodney W. Grimes 		NEXTn(6);
69058f0484fSRodney W. Grimes 		return;
69158f0484fSRodney W. Grimes 	}
69258f0484fSRodney W. Grimes 
69358f0484fSRodney W. Grimes 	if (EAT('^'))
69458f0484fSRodney W. Grimes 		invert++;	/* make note to invert set at end */
69558f0484fSRodney W. Grimes 	if (EAT(']'))
69658f0484fSRodney W. Grimes 		CHadd(cs, ']');
69758f0484fSRodney W. Grimes 	else if (EAT('-'))
69858f0484fSRodney W. Grimes 		CHadd(cs, '-');
69958f0484fSRodney W. Grimes 	while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
70058f0484fSRodney W. Grimes 		p_b_term(p, cs);
70158f0484fSRodney W. Grimes 	if (EAT('-'))
70258f0484fSRodney W. Grimes 		CHadd(cs, '-');
70351295a4dSJordan K. Hubbard 	(void)MUSTEAT(']', REG_EBRACK);
70458f0484fSRodney W. Grimes 
70558f0484fSRodney W. Grimes 	if (p->error != 0)	/* don't mess things up further */
70658f0484fSRodney W. Grimes 		return;
70758f0484fSRodney W. Grimes 
70858f0484fSRodney W. Grimes 	if (p->g->cflags&REG_ICASE) {
70958f0484fSRodney W. Grimes 		register int i;
71058f0484fSRodney W. Grimes 		register int ci;
71158f0484fSRodney W. Grimes 
71258f0484fSRodney W. Grimes 		for (i = p->g->csetsize - 1; i >= 0; i--)
71358f0484fSRodney W. Grimes 			if (CHIN(cs, i) && isalpha(i)) {
71458f0484fSRodney W. Grimes 				ci = othercase(i);
71558f0484fSRodney W. Grimes 				if (ci != i)
71658f0484fSRodney W. Grimes 					CHadd(cs, ci);
71758f0484fSRodney W. Grimes 			}
71858f0484fSRodney W. Grimes 		if (cs->multis != NULL)
71958f0484fSRodney W. Grimes 			mccase(p, cs);
72058f0484fSRodney W. Grimes 	}
72158f0484fSRodney W. Grimes 	if (invert) {
72258f0484fSRodney W. Grimes 		register int i;
72358f0484fSRodney W. Grimes 
72458f0484fSRodney W. Grimes 		for (i = p->g->csetsize - 1; i >= 0; i--)
72558f0484fSRodney W. Grimes 			if (CHIN(cs, i))
72658f0484fSRodney W. Grimes 				CHsub(cs, i);
72758f0484fSRodney W. Grimes 			else
72858f0484fSRodney W. Grimes 				CHadd(cs, i);
72958f0484fSRodney W. Grimes 		if (p->g->cflags&REG_NEWLINE)
73058f0484fSRodney W. Grimes 			CHsub(cs, '\n');
73158f0484fSRodney W. Grimes 		if (cs->multis != NULL)
73258f0484fSRodney W. Grimes 			mcinvert(p, cs);
73358f0484fSRodney W. Grimes 	}
73458f0484fSRodney W. Grimes 
73558f0484fSRodney W. Grimes 	assert(cs->multis == NULL);		/* xxx */
73658f0484fSRodney W. Grimes 
73758f0484fSRodney W. Grimes 	if (nch(p, cs) == 1) {		/* optimize singleton sets */
73858f0484fSRodney W. Grimes 		ordinary(p, firstch(p, cs));
73958f0484fSRodney W. Grimes 		freeset(p, cs);
74058f0484fSRodney W. Grimes 	} else
74158f0484fSRodney W. Grimes 		EMIT(OANYOF, freezeset(p, cs));
74258f0484fSRodney W. Grimes }
74358f0484fSRodney W. Grimes 
74458f0484fSRodney W. Grimes /*
74558f0484fSRodney W. Grimes  - p_b_term - parse one term of a bracketed character list
74658f0484fSRodney W. Grimes  == static void p_b_term(register struct parse *p, register cset *cs);
74758f0484fSRodney W. Grimes  */
74858f0484fSRodney W. Grimes static void
74958f0484fSRodney W. Grimes p_b_term(p, cs)
75058f0484fSRodney W. Grimes register struct parse *p;
75158f0484fSRodney W. Grimes register cset *cs;
75258f0484fSRodney W. Grimes {
75358f0484fSRodney W. Grimes 	register char c;
75458f0484fSRodney W. Grimes 	register char start, finish;
75558f0484fSRodney W. Grimes 	register int i;
75658f0484fSRodney W. Grimes 
75758f0484fSRodney W. Grimes 	/* classify what we've got */
75858f0484fSRodney W. Grimes 	switch ((MORE()) ? PEEK() : '\0') {
75958f0484fSRodney W. Grimes 	case '[':
76058f0484fSRodney W. Grimes 		c = (MORE2()) ? PEEK2() : '\0';
76158f0484fSRodney W. Grimes 		break;
76258f0484fSRodney W. Grimes 	case '-':
76358f0484fSRodney W. Grimes 		SETERROR(REG_ERANGE);
76458f0484fSRodney W. Grimes 		return;			/* NOTE RETURN */
76558f0484fSRodney W. Grimes 		break;
76658f0484fSRodney W. Grimes 	default:
76758f0484fSRodney W. Grimes 		c = '\0';
76858f0484fSRodney W. Grimes 		break;
76958f0484fSRodney W. Grimes 	}
77058f0484fSRodney W. Grimes 
77158f0484fSRodney W. Grimes 	switch (c) {
77258f0484fSRodney W. Grimes 	case ':':		/* character class */
77358f0484fSRodney W. Grimes 		NEXT2();
77451295a4dSJordan K. Hubbard 		(void)REQUIRE(MORE(), REG_EBRACK);
77558f0484fSRodney W. Grimes 		c = PEEK();
77651295a4dSJordan K. Hubbard 		(void)REQUIRE(c != '-' && c != ']', REG_ECTYPE);
77758f0484fSRodney W. Grimes 		p_b_cclass(p, cs);
77851295a4dSJordan K. Hubbard 		(void)REQUIRE(MORE(), REG_EBRACK);
77951295a4dSJordan K. Hubbard 		(void)REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
78058f0484fSRodney W. Grimes 		break;
78158f0484fSRodney W. Grimes 	case '=':		/* equivalence class */
78258f0484fSRodney W. Grimes 		NEXT2();
78351295a4dSJordan K. Hubbard 		(void)REQUIRE(MORE(), REG_EBRACK);
78458f0484fSRodney W. Grimes 		c = PEEK();
78551295a4dSJordan K. Hubbard 		(void)REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
78658f0484fSRodney W. Grimes 		p_b_eclass(p, cs);
78751295a4dSJordan K. Hubbard 		(void)REQUIRE(MORE(), REG_EBRACK);
78851295a4dSJordan K. Hubbard 		(void)REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
78958f0484fSRodney W. Grimes 		break;
79058f0484fSRodney W. Grimes 	default:		/* symbol, ordinary character, or range */
79158f0484fSRodney W. Grimes /* xxx revision needed for multichar stuff */
79258f0484fSRodney W. Grimes 		start = p_b_symbol(p);
79358f0484fSRodney W. Grimes 		if (SEE('-') && MORE2() && PEEK2() != ']') {
79458f0484fSRodney W. Grimes 			/* range */
79558f0484fSRodney W. Grimes 			NEXT();
79658f0484fSRodney W. Grimes 			if (EAT('-'))
79758f0484fSRodney W. Grimes 				finish = '-';
79858f0484fSRodney W. Grimes 			else
79958f0484fSRodney W. Grimes 				finish = p_b_symbol(p);
80058f0484fSRodney W. Grimes 		} else
80158f0484fSRodney W. Grimes 			finish = start;
80258f0484fSRodney W. Grimes /* xxx what about signed chars here... */
80351295a4dSJordan K. Hubbard 		(void)REQUIRE(start <= finish, REG_ERANGE);
80458f0484fSRodney W. Grimes 		for (i = start; i <= finish; i++)
80558f0484fSRodney W. Grimes 			CHadd(cs, i);
80658f0484fSRodney W. Grimes 		break;
80758f0484fSRodney W. Grimes 	}
80858f0484fSRodney W. Grimes }
80958f0484fSRodney W. Grimes 
81058f0484fSRodney W. Grimes /*
81158f0484fSRodney W. Grimes  - p_b_cclass - parse a character-class name and deal with it
81258f0484fSRodney W. Grimes  == static void p_b_cclass(register struct parse *p, register cset *cs);
81358f0484fSRodney W. Grimes  */
81458f0484fSRodney W. Grimes static void
81558f0484fSRodney W. Grimes p_b_cclass(p, cs)
81658f0484fSRodney W. Grimes register struct parse *p;
81758f0484fSRodney W. Grimes register cset *cs;
81858f0484fSRodney W. Grimes {
81958f0484fSRodney W. Grimes 	register char *sp = p->next;
82058f0484fSRodney W. Grimes 	register struct cclass *cp;
82158f0484fSRodney W. Grimes 	register size_t len;
82258f0484fSRodney W. Grimes 	register char *u;
82358f0484fSRodney W. Grimes 	register char c;
82458f0484fSRodney W. Grimes 
82542ce22e4SAndrey A. Chernov 	while (MORE() && isalpha((unsigned char)PEEK()))
82658f0484fSRodney W. Grimes 		NEXT();
82758f0484fSRodney W. Grimes 	len = p->next - sp;
82858f0484fSRodney W. Grimes 	for (cp = cclasses; cp->name != NULL; cp++)
82958f0484fSRodney W. Grimes 		if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
83058f0484fSRodney W. Grimes 			break;
83158f0484fSRodney W. Grimes 	if (cp->name == NULL) {
83258f0484fSRodney W. Grimes 		/* oops, didn't find it */
83358f0484fSRodney W. Grimes 		SETERROR(REG_ECTYPE);
83458f0484fSRodney W. Grimes 		return;
83558f0484fSRodney W. Grimes 	}
83658f0484fSRodney W. Grimes 
83758f0484fSRodney W. Grimes 	u = cp->chars;
83858f0484fSRodney W. Grimes 	while ((c = *u++) != '\0')
83958f0484fSRodney W. Grimes 		CHadd(cs, c);
84058f0484fSRodney W. Grimes 	for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
84158f0484fSRodney W. Grimes 		MCadd(p, cs, u);
84258f0484fSRodney W. Grimes }
84358f0484fSRodney W. Grimes 
84458f0484fSRodney W. Grimes /*
84558f0484fSRodney W. Grimes  - p_b_eclass - parse an equivalence-class name and deal with it
84658f0484fSRodney W. Grimes  == static void p_b_eclass(register struct parse *p, register cset *cs);
84758f0484fSRodney W. Grimes  *
84858f0484fSRodney W. Grimes  * This implementation is incomplete. xxx
84958f0484fSRodney W. Grimes  */
85058f0484fSRodney W. Grimes static void
85158f0484fSRodney W. Grimes p_b_eclass(p, cs)
85258f0484fSRodney W. Grimes register struct parse *p;
85358f0484fSRodney W. Grimes register cset *cs;
85458f0484fSRodney W. Grimes {
85558f0484fSRodney W. Grimes 	register char c;
85658f0484fSRodney W. Grimes 
85758f0484fSRodney W. Grimes 	c = p_b_coll_elem(p, '=');
85858f0484fSRodney W. Grimes 	CHadd(cs, c);
85958f0484fSRodney W. Grimes }
86058f0484fSRodney W. Grimes 
86158f0484fSRodney W. Grimes /*
86258f0484fSRodney W. Grimes  - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
86358f0484fSRodney W. Grimes  == static char p_b_symbol(register struct parse *p);
86458f0484fSRodney W. Grimes  */
86558f0484fSRodney W. Grimes static char			/* value of symbol */
86658f0484fSRodney W. Grimes p_b_symbol(p)
86758f0484fSRodney W. Grimes register struct parse *p;
86858f0484fSRodney W. Grimes {
86958f0484fSRodney W. Grimes 	register char value;
87058f0484fSRodney W. Grimes 
87151295a4dSJordan K. Hubbard 	(void)REQUIRE(MORE(), REG_EBRACK);
87258f0484fSRodney W. Grimes 	if (!EATTWO('[', '.'))
87358f0484fSRodney W. Grimes 		return(GETNEXT());
87458f0484fSRodney W. Grimes 
87558f0484fSRodney W. Grimes 	/* collating symbol */
87658f0484fSRodney W. Grimes 	value = p_b_coll_elem(p, '.');
87751295a4dSJordan K. Hubbard 	(void)REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
87858f0484fSRodney W. Grimes 	return(value);
87958f0484fSRodney W. Grimes }
88058f0484fSRodney W. Grimes 
88158f0484fSRodney W. Grimes /*
88258f0484fSRodney W. Grimes  - p_b_coll_elem - parse a collating-element name and look it up
88358f0484fSRodney W. Grimes  == static char p_b_coll_elem(register struct parse *p, int endc);
88458f0484fSRodney W. Grimes  */
88558f0484fSRodney W. Grimes static char			/* value of collating element */
88658f0484fSRodney W. Grimes p_b_coll_elem(p, endc)
88758f0484fSRodney W. Grimes register struct parse *p;
88858f0484fSRodney W. Grimes int endc;			/* name ended by endc,']' */
88958f0484fSRodney W. Grimes {
89058f0484fSRodney W. Grimes 	register char *sp = p->next;
89158f0484fSRodney W. Grimes 	register struct cname *cp;
89258f0484fSRodney W. Grimes 	register int len;
89358f0484fSRodney W. Grimes 
89458f0484fSRodney W. Grimes 	while (MORE() && !SEETWO(endc, ']'))
89558f0484fSRodney W. Grimes 		NEXT();
89658f0484fSRodney W. Grimes 	if (!MORE()) {
89758f0484fSRodney W. Grimes 		SETERROR(REG_EBRACK);
89858f0484fSRodney W. Grimes 		return(0);
89958f0484fSRodney W. Grimes 	}
90058f0484fSRodney W. Grimes 	len = p->next - sp;
90158f0484fSRodney W. Grimes 	for (cp = cnames; cp->name != NULL; cp++)
90258f0484fSRodney W. Grimes 		if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
90358f0484fSRodney W. Grimes 			return(cp->code);	/* known name */
90458f0484fSRodney W. Grimes 	if (len == 1)
90558f0484fSRodney W. Grimes 		return(*sp);	/* single character */
90658f0484fSRodney W. Grimes 	SETERROR(REG_ECOLLATE);			/* neither */
90758f0484fSRodney W. Grimes 	return(0);
90858f0484fSRodney W. Grimes }
90958f0484fSRodney W. Grimes 
91058f0484fSRodney W. Grimes /*
91158f0484fSRodney W. Grimes  - othercase - return the case counterpart of an alphabetic
91258f0484fSRodney W. Grimes  == static char othercase(int ch);
91358f0484fSRodney W. Grimes  */
91458f0484fSRodney W. Grimes static char			/* if no counterpart, return ch */
91558f0484fSRodney W. Grimes othercase(ch)
91658f0484fSRodney W. Grimes int ch;
91758f0484fSRodney W. Grimes {
91842ce22e4SAndrey A. Chernov 	ch = (unsigned char)ch;
91958f0484fSRodney W. Grimes 	assert(isalpha(ch));
92058f0484fSRodney W. Grimes 	if (isupper(ch))
92158f0484fSRodney W. Grimes 		return(tolower(ch));
92258f0484fSRodney W. Grimes 	else if (islower(ch))
92358f0484fSRodney W. Grimes 		return(toupper(ch));
92458f0484fSRodney W. Grimes 	else			/* peculiar, but could happen */
92558f0484fSRodney W. Grimes 		return(ch);
92658f0484fSRodney W. Grimes }
92758f0484fSRodney W. Grimes 
92858f0484fSRodney W. Grimes /*
92958f0484fSRodney W. Grimes  - bothcases - emit a dualcase version of a two-case character
93058f0484fSRodney W. Grimes  == static void bothcases(register struct parse *p, int ch);
93158f0484fSRodney W. Grimes  *
93258f0484fSRodney W. Grimes  * Boy, is this implementation ever a kludge...
93358f0484fSRodney W. Grimes  */
93458f0484fSRodney W. Grimes static void
93558f0484fSRodney W. Grimes bothcases(p, ch)
93658f0484fSRodney W. Grimes register struct parse *p;
93758f0484fSRodney W. Grimes int ch;
93858f0484fSRodney W. Grimes {
93958f0484fSRodney W. Grimes 	register char *oldnext = p->next;
94058f0484fSRodney W. Grimes 	register char *oldend = p->end;
94158f0484fSRodney W. Grimes 	char bracket[3];
94258f0484fSRodney W. Grimes 
94342ce22e4SAndrey A. Chernov 	ch = (unsigned char)ch;
94458f0484fSRodney W. Grimes 	assert(othercase(ch) != ch);	/* p_bracket() would recurse */
94558f0484fSRodney W. Grimes 	p->next = bracket;
94658f0484fSRodney W. Grimes 	p->end = bracket+2;
94758f0484fSRodney W. Grimes 	bracket[0] = ch;
94858f0484fSRodney W. Grimes 	bracket[1] = ']';
94958f0484fSRodney W. Grimes 	bracket[2] = '\0';
95058f0484fSRodney W. Grimes 	p_bracket(p);
95158f0484fSRodney W. Grimes 	assert(p->next == bracket+2);
95258f0484fSRodney W. Grimes 	p->next = oldnext;
95358f0484fSRodney W. Grimes 	p->end = oldend;
95458f0484fSRodney W. Grimes }
95558f0484fSRodney W. Grimes 
95658f0484fSRodney W. Grimes /*
95758f0484fSRodney W. Grimes  - ordinary - emit an ordinary character
95858f0484fSRodney W. Grimes  == static void ordinary(register struct parse *p, register int ch);
95958f0484fSRodney W. Grimes  */
96058f0484fSRodney W. Grimes static void
96158f0484fSRodney W. Grimes ordinary(p, ch)
96258f0484fSRodney W. Grimes register struct parse *p;
96358f0484fSRodney W. Grimes register int ch;
96458f0484fSRodney W. Grimes {
96558f0484fSRodney W. Grimes 	register cat_t *cap = p->g->categories;
96658f0484fSRodney W. Grimes 
96742ce22e4SAndrey A. Chernov 	if ((p->g->cflags&REG_ICASE) && isalpha((unsigned char)ch) && othercase(ch) != ch)
96858f0484fSRodney W. Grimes 		bothcases(p, ch);
96958f0484fSRodney W. Grimes 	else {
97058f0484fSRodney W. Grimes 		EMIT(OCHAR, (unsigned char)ch);
97158f0484fSRodney W. Grimes 		if (cap[ch] == 0)
97258f0484fSRodney W. Grimes 			cap[ch] = p->g->ncategories++;
97358f0484fSRodney W. Grimes 	}
97458f0484fSRodney W. Grimes }
97558f0484fSRodney W. Grimes 
97658f0484fSRodney W. Grimes /*
97758f0484fSRodney W. Grimes  - nonnewline - emit REG_NEWLINE version of OANY
97858f0484fSRodney W. Grimes  == static void nonnewline(register struct parse *p);
97958f0484fSRodney W. Grimes  *
98058f0484fSRodney W. Grimes  * Boy, is this implementation ever a kludge...
98158f0484fSRodney W. Grimes  */
98258f0484fSRodney W. Grimes static void
98358f0484fSRodney W. Grimes nonnewline(p)
98458f0484fSRodney W. Grimes register struct parse *p;
98558f0484fSRodney W. Grimes {
98658f0484fSRodney W. Grimes 	register char *oldnext = p->next;
98758f0484fSRodney W. Grimes 	register char *oldend = p->end;
98858f0484fSRodney W. Grimes 	char bracket[4];
98958f0484fSRodney W. Grimes 
99058f0484fSRodney W. Grimes 	p->next = bracket;
99158f0484fSRodney W. Grimes 	p->end = bracket+3;
99258f0484fSRodney W. Grimes 	bracket[0] = '^';
99358f0484fSRodney W. Grimes 	bracket[1] = '\n';
99458f0484fSRodney W. Grimes 	bracket[2] = ']';
99558f0484fSRodney W. Grimes 	bracket[3] = '\0';
99658f0484fSRodney W. Grimes 	p_bracket(p);
99758f0484fSRodney W. Grimes 	assert(p->next == bracket+3);
99858f0484fSRodney W. Grimes 	p->next = oldnext;
99958f0484fSRodney W. Grimes 	p->end = oldend;
100058f0484fSRodney W. Grimes }
100158f0484fSRodney W. Grimes 
100258f0484fSRodney W. Grimes /*
100358f0484fSRodney W. Grimes  - repeat - generate code for a bounded repetition, recursively if needed
100458f0484fSRodney W. Grimes  == static void repeat(register struct parse *p, sopno start, int from, int to);
100558f0484fSRodney W. Grimes  */
100658f0484fSRodney W. Grimes static void
100758f0484fSRodney W. Grimes repeat(p, start, from, to)
100858f0484fSRodney W. Grimes register struct parse *p;
100958f0484fSRodney W. Grimes sopno start;			/* operand from here to end of strip */
101058f0484fSRodney W. Grimes int from;			/* repeated from this number */
101158f0484fSRodney W. Grimes int to;				/* to this number of times (maybe INFINITY) */
101258f0484fSRodney W. Grimes {
101358f0484fSRodney W. Grimes 	register sopno finish = HERE();
101458f0484fSRodney W. Grimes #	define	N	2
101558f0484fSRodney W. Grimes #	define	INF	3
101658f0484fSRodney W. Grimes #	define	REP(f, t)	((f)*8 + (t))
101758f0484fSRodney W. Grimes #	define	MAP(n)	(((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
101858f0484fSRodney W. Grimes 	register sopno copy;
101958f0484fSRodney W. Grimes 
102058f0484fSRodney W. Grimes 	if (p->error != 0)	/* head off possible runaway recursion */
102158f0484fSRodney W. Grimes 		return;
102258f0484fSRodney W. Grimes 
102358f0484fSRodney W. Grimes 	assert(from <= to);
102458f0484fSRodney W. Grimes 
102558f0484fSRodney W. Grimes 	switch (REP(MAP(from), MAP(to))) {
102658f0484fSRodney W. Grimes 	case REP(0, 0):			/* must be user doing this */
102758f0484fSRodney W. Grimes 		DROP(finish-start);	/* drop the operand */
102858f0484fSRodney W. Grimes 		break;
102958f0484fSRodney W. Grimes 	case REP(0, 1):			/* as x{1,1}? */
103058f0484fSRodney W. Grimes 	case REP(0, N):			/* as x{1,n}? */
103158f0484fSRodney W. Grimes 	case REP(0, INF):		/* as x{1,}? */
103258f0484fSRodney W. Grimes 		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
103358f0484fSRodney W. Grimes 		INSERT(OCH_, start);		/* offset is wrong... */
103458f0484fSRodney W. Grimes 		repeat(p, start+1, 1, to);
103558f0484fSRodney W. Grimes 		ASTERN(OOR1, start);
103658f0484fSRodney W. Grimes 		AHEAD(start);			/* ... fix it */
103758f0484fSRodney W. Grimes 		EMIT(OOR2, 0);
103858f0484fSRodney W. Grimes 		AHEAD(THERE());
103958f0484fSRodney W. Grimes 		ASTERN(O_CH, THERETHERE());
104058f0484fSRodney W. Grimes 		break;
104158f0484fSRodney W. Grimes 	case REP(1, 1):			/* trivial case */
104258f0484fSRodney W. Grimes 		/* done */
104358f0484fSRodney W. Grimes 		break;
104458f0484fSRodney W. Grimes 	case REP(1, N):			/* as x?x{1,n-1} */
104558f0484fSRodney W. Grimes 		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
104658f0484fSRodney W. Grimes 		INSERT(OCH_, start);
104758f0484fSRodney W. Grimes 		ASTERN(OOR1, start);
104858f0484fSRodney W. Grimes 		AHEAD(start);
104958f0484fSRodney W. Grimes 		EMIT(OOR2, 0);			/* offset very wrong... */
105058f0484fSRodney W. Grimes 		AHEAD(THERE());			/* ...so fix it */
105158f0484fSRodney W. Grimes 		ASTERN(O_CH, THERETHERE());
105258f0484fSRodney W. Grimes 		copy = dupl(p, start+1, finish+1);
105358f0484fSRodney W. Grimes 		assert(copy == finish+4);
105458f0484fSRodney W. Grimes 		repeat(p, copy, 1, to-1);
105558f0484fSRodney W. Grimes 		break;
105658f0484fSRodney W. Grimes 	case REP(1, INF):		/* as x+ */
105758f0484fSRodney W. Grimes 		INSERT(OPLUS_, start);
105858f0484fSRodney W. Grimes 		ASTERN(O_PLUS, start);
105958f0484fSRodney W. Grimes 		break;
106058f0484fSRodney W. Grimes 	case REP(N, N):			/* as xx{m-1,n-1} */
106158f0484fSRodney W. Grimes 		copy = dupl(p, start, finish);
106258f0484fSRodney W. Grimes 		repeat(p, copy, from-1, to-1);
106358f0484fSRodney W. Grimes 		break;
106458f0484fSRodney W. Grimes 	case REP(N, INF):		/* as xx{n-1,INF} */
106558f0484fSRodney W. Grimes 		copy = dupl(p, start, finish);
106658f0484fSRodney W. Grimes 		repeat(p, copy, from-1, to);
106758f0484fSRodney W. Grimes 		break;
106858f0484fSRodney W. Grimes 	default:			/* "can't happen" */
106958f0484fSRodney W. Grimes 		SETERROR(REG_ASSERT);	/* just in case */
107058f0484fSRodney W. Grimes 		break;
107158f0484fSRodney W. Grimes 	}
107258f0484fSRodney W. Grimes }
107358f0484fSRodney W. Grimes 
107458f0484fSRodney W. Grimes /*
107558f0484fSRodney W. Grimes  - seterr - set an error condition
107658f0484fSRodney W. Grimes  == static int seterr(register struct parse *p, int e);
107758f0484fSRodney W. Grimes  */
107858f0484fSRodney W. Grimes static int			/* useless but makes type checking happy */
107958f0484fSRodney W. Grimes seterr(p, e)
108058f0484fSRodney W. Grimes register struct parse *p;
108158f0484fSRodney W. Grimes int e;
108258f0484fSRodney W. Grimes {
108358f0484fSRodney W. Grimes 	if (p->error == 0)	/* keep earliest error condition */
108458f0484fSRodney W. Grimes 		p->error = e;
108558f0484fSRodney W. Grimes 	p->next = nuls;		/* try to bring things to a halt */
108658f0484fSRodney W. Grimes 	p->end = nuls;
108758f0484fSRodney W. Grimes 	return(0);		/* make the return value well-defined */
108858f0484fSRodney W. Grimes }
108958f0484fSRodney W. Grimes 
109058f0484fSRodney W. Grimes /*
109158f0484fSRodney W. Grimes  - allocset - allocate a set of characters for []
109258f0484fSRodney W. Grimes  == static cset *allocset(register struct parse *p);
109358f0484fSRodney W. Grimes  */
109458f0484fSRodney W. Grimes static cset *
109558f0484fSRodney W. Grimes allocset(p)
109658f0484fSRodney W. Grimes register struct parse *p;
109758f0484fSRodney W. Grimes {
109858f0484fSRodney W. Grimes 	register int no = p->g->ncsets++;
109958f0484fSRodney W. Grimes 	register size_t nc;
110058f0484fSRodney W. Grimes 	register size_t nbytes;
110158f0484fSRodney W. Grimes 	register cset *cs;
110258f0484fSRodney W. Grimes 	register size_t css = (size_t)p->g->csetsize;
110358f0484fSRodney W. Grimes 	register int i;
110458f0484fSRodney W. Grimes 
110558f0484fSRodney W. Grimes 	if (no >= p->ncsalloc) {	/* need another column of space */
110658f0484fSRodney W. Grimes 		p->ncsalloc += CHAR_BIT;
110758f0484fSRodney W. Grimes 		nc = p->ncsalloc;
110858f0484fSRodney W. Grimes 		assert(nc % CHAR_BIT == 0);
110958f0484fSRodney W. Grimes 		nbytes = nc / CHAR_BIT * css;
111058f0484fSRodney W. Grimes 		if (p->g->sets == NULL)
111158f0484fSRodney W. Grimes 			p->g->sets = (cset *)malloc(nc * sizeof(cset));
111258f0484fSRodney W. Grimes 		else
111358f0484fSRodney W. Grimes 			p->g->sets = (cset *)realloc((char *)p->g->sets,
111458f0484fSRodney W. Grimes 							nc * sizeof(cset));
111558f0484fSRodney W. Grimes 		if (p->g->setbits == NULL)
111658f0484fSRodney W. Grimes 			p->g->setbits = (uch *)malloc(nbytes);
111758f0484fSRodney W. Grimes 		else {
111858f0484fSRodney W. Grimes 			p->g->setbits = (uch *)realloc((char *)p->g->setbits,
111958f0484fSRodney W. Grimes 								nbytes);
112058f0484fSRodney W. Grimes 			/* xxx this isn't right if setbits is now NULL */
112158f0484fSRodney W. Grimes 			for (i = 0; i < no; i++)
112258f0484fSRodney W. Grimes 				p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
112358f0484fSRodney W. Grimes 		}
112458f0484fSRodney W. Grimes 		if (p->g->sets != NULL && p->g->setbits != NULL)
112558f0484fSRodney W. Grimes 			(void) memset((char *)p->g->setbits + (nbytes - css),
112658f0484fSRodney W. Grimes 								0, css);
112758f0484fSRodney W. Grimes 		else {
112858f0484fSRodney W. Grimes 			no = 0;
112958f0484fSRodney W. Grimes 			SETERROR(REG_ESPACE);
113058f0484fSRodney W. Grimes 			/* caller's responsibility not to do set ops */
113158f0484fSRodney W. Grimes 		}
113258f0484fSRodney W. Grimes 	}
113358f0484fSRodney W. Grimes 
113458f0484fSRodney W. Grimes 	assert(p->g->sets != NULL);	/* xxx */
113558f0484fSRodney W. Grimes 	cs = &p->g->sets[no];
113658f0484fSRodney W. Grimes 	cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
113758f0484fSRodney W. Grimes 	cs->mask = 1 << ((no) % CHAR_BIT);
113858f0484fSRodney W. Grimes 	cs->hash = 0;
113958f0484fSRodney W. Grimes 	cs->smultis = 0;
114058f0484fSRodney W. Grimes 	cs->multis = NULL;
114158f0484fSRodney W. Grimes 
114258f0484fSRodney W. Grimes 	return(cs);
114358f0484fSRodney W. Grimes }
114458f0484fSRodney W. Grimes 
114558f0484fSRodney W. Grimes /*
114658f0484fSRodney W. Grimes  - freeset - free a now-unused set
114758f0484fSRodney W. Grimes  == static void freeset(register struct parse *p, register cset *cs);
114858f0484fSRodney W. Grimes  */
114958f0484fSRodney W. Grimes static void
115058f0484fSRodney W. Grimes freeset(p, cs)
115158f0484fSRodney W. Grimes register struct parse *p;
115258f0484fSRodney W. Grimes register cset *cs;
115358f0484fSRodney W. Grimes {
115458f0484fSRodney W. Grimes 	register int i;
115558f0484fSRodney W. Grimes 	register cset *top = &p->g->sets[p->g->ncsets];
115658f0484fSRodney W. Grimes 	register size_t css = (size_t)p->g->csetsize;
115758f0484fSRodney W. Grimes 
115858f0484fSRodney W. Grimes 	for (i = 0; i < css; i++)
115958f0484fSRodney W. Grimes 		CHsub(cs, i);
116058f0484fSRodney W. Grimes 	if (cs == top-1)	/* recover only the easy case */
116158f0484fSRodney W. Grimes 		p->g->ncsets--;
116258f0484fSRodney W. Grimes }
116358f0484fSRodney W. Grimes 
116458f0484fSRodney W. Grimes /*
116558f0484fSRodney W. Grimes  - freezeset - final processing on a set of characters
116658f0484fSRodney W. Grimes  == static int freezeset(register struct parse *p, register cset *cs);
116758f0484fSRodney W. Grimes  *
116858f0484fSRodney W. Grimes  * The main task here is merging identical sets.  This is usually a waste
116958f0484fSRodney W. Grimes  * of time (although the hash code minimizes the overhead), but can win
117058f0484fSRodney W. Grimes  * big if REG_ICASE is being used.  REG_ICASE, by the way, is why the hash
117158f0484fSRodney W. Grimes  * is done using addition rather than xor -- all ASCII [aA] sets xor to
117258f0484fSRodney W. Grimes  * the same value!
117358f0484fSRodney W. Grimes  */
117458f0484fSRodney W. Grimes static int			/* set number */
117558f0484fSRodney W. Grimes freezeset(p, cs)
117658f0484fSRodney W. Grimes register struct parse *p;
117758f0484fSRodney W. Grimes register cset *cs;
117858f0484fSRodney W. Grimes {
117958f0484fSRodney W. Grimes 	register uch h = cs->hash;
118058f0484fSRodney W. Grimes 	register int i;
118158f0484fSRodney W. Grimes 	register cset *top = &p->g->sets[p->g->ncsets];
118258f0484fSRodney W. Grimes 	register cset *cs2;
118358f0484fSRodney W. Grimes 	register size_t css = (size_t)p->g->csetsize;
118458f0484fSRodney W. Grimes 
118558f0484fSRodney W. Grimes 	/* look for an earlier one which is the same */
118658f0484fSRodney W. Grimes 	for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
118758f0484fSRodney W. Grimes 		if (cs2->hash == h && cs2 != cs) {
118858f0484fSRodney W. Grimes 			/* maybe */
118958f0484fSRodney W. Grimes 			for (i = 0; i < css; i++)
119058f0484fSRodney W. Grimes 				if (!!CHIN(cs2, i) != !!CHIN(cs, i))
119158f0484fSRodney W. Grimes 					break;		/* no */
119258f0484fSRodney W. Grimes 			if (i == css)
119358f0484fSRodney W. Grimes 				break;			/* yes */
119458f0484fSRodney W. Grimes 		}
119558f0484fSRodney W. Grimes 
119658f0484fSRodney W. Grimes 	if (cs2 < top) {	/* found one */
119758f0484fSRodney W. Grimes 		freeset(p, cs);
119858f0484fSRodney W. Grimes 		cs = cs2;
119958f0484fSRodney W. Grimes 	}
120058f0484fSRodney W. Grimes 
120158f0484fSRodney W. Grimes 	return((int)(cs - p->g->sets));
120258f0484fSRodney W. Grimes }
120358f0484fSRodney W. Grimes 
120458f0484fSRodney W. Grimes /*
120558f0484fSRodney W. Grimes  - firstch - return first character in a set (which must have at least one)
120658f0484fSRodney W. Grimes  == static int firstch(register struct parse *p, register cset *cs);
120758f0484fSRodney W. Grimes  */
120858f0484fSRodney W. Grimes static int			/* character; there is no "none" value */
120958f0484fSRodney W. Grimes firstch(p, cs)
121058f0484fSRodney W. Grimes register struct parse *p;
121158f0484fSRodney W. Grimes register cset *cs;
121258f0484fSRodney W. Grimes {
121358f0484fSRodney W. Grimes 	register int i;
121458f0484fSRodney W. Grimes 	register size_t css = (size_t)p->g->csetsize;
121558f0484fSRodney W. Grimes 
121658f0484fSRodney W. Grimes 	for (i = 0; i < css; i++)
121758f0484fSRodney W. Grimes 		if (CHIN(cs, i))
121842ce22e4SAndrey A. Chernov 			return((unsigned char)i);
121958f0484fSRodney W. Grimes 	assert(never);
122058f0484fSRodney W. Grimes 	return(0);		/* arbitrary */
122158f0484fSRodney W. Grimes }
122258f0484fSRodney W. Grimes 
122358f0484fSRodney W. Grimes /*
122458f0484fSRodney W. Grimes  - nch - number of characters in a set
122558f0484fSRodney W. Grimes  == static int nch(register struct parse *p, register cset *cs);
122658f0484fSRodney W. Grimes  */
122758f0484fSRodney W. Grimes static int
122858f0484fSRodney W. Grimes nch(p, cs)
122958f0484fSRodney W. Grimes register struct parse *p;
123058f0484fSRodney W. Grimes register cset *cs;
123158f0484fSRodney W. Grimes {
123258f0484fSRodney W. Grimes 	register int i;
123358f0484fSRodney W. Grimes 	register size_t css = (size_t)p->g->csetsize;
123458f0484fSRodney W. Grimes 	register int n = 0;
123558f0484fSRodney W. Grimes 
123658f0484fSRodney W. Grimes 	for (i = 0; i < css; i++)
123758f0484fSRodney W. Grimes 		if (CHIN(cs, i))
123858f0484fSRodney W. Grimes 			n++;
123958f0484fSRodney W. Grimes 	return(n);
124058f0484fSRodney W. Grimes }
124158f0484fSRodney W. Grimes 
124258f0484fSRodney W. Grimes /*
124358f0484fSRodney W. Grimes  - mcadd - add a collating element to a cset
124458f0484fSRodney W. Grimes  == static void mcadd(register struct parse *p, register cset *cs, \
124558f0484fSRodney W. Grimes  ==	register char *cp);
124658f0484fSRodney W. Grimes  */
124758f0484fSRodney W. Grimes static void
124858f0484fSRodney W. Grimes mcadd(p, cs, cp)
124958f0484fSRodney W. Grimes register struct parse *p;
125058f0484fSRodney W. Grimes register cset *cs;
125158f0484fSRodney W. Grimes register char *cp;
125258f0484fSRodney W. Grimes {
125358f0484fSRodney W. Grimes 	register size_t oldend = cs->smultis;
125458f0484fSRodney W. Grimes 
125558f0484fSRodney W. Grimes 	cs->smultis += strlen(cp) + 1;
125658f0484fSRodney W. Grimes 	if (cs->multis == NULL)
125758f0484fSRodney W. Grimes 		cs->multis = malloc(cs->smultis);
125858f0484fSRodney W. Grimes 	else
125958f0484fSRodney W. Grimes 		cs->multis = realloc(cs->multis, cs->smultis);
126058f0484fSRodney W. Grimes 	if (cs->multis == NULL) {
126158f0484fSRodney W. Grimes 		SETERROR(REG_ESPACE);
126258f0484fSRodney W. Grimes 		return;
126358f0484fSRodney W. Grimes 	}
126458f0484fSRodney W. Grimes 
126558f0484fSRodney W. Grimes 	(void) strcpy(cs->multis + oldend - 1, cp);
126658f0484fSRodney W. Grimes 	cs->multis[cs->smultis - 1] = '\0';
126758f0484fSRodney W. Grimes }
126858f0484fSRodney W. Grimes 
126951295a4dSJordan K. Hubbard #if used
127058f0484fSRodney W. Grimes /*
127158f0484fSRodney W. Grimes  - mcsub - subtract a collating element from a cset
127258f0484fSRodney W. Grimes  == static void mcsub(register cset *cs, register char *cp);
127358f0484fSRodney W. Grimes  */
127458f0484fSRodney W. Grimes static void
127558f0484fSRodney W. Grimes mcsub(cs, cp)
127658f0484fSRodney W. Grimes register cset *cs;
127758f0484fSRodney W. Grimes register char *cp;
127858f0484fSRodney W. Grimes {
127958f0484fSRodney W. Grimes 	register char *fp = mcfind(cs, cp);
128058f0484fSRodney W. Grimes 	register size_t len = strlen(fp);
128158f0484fSRodney W. Grimes 
128258f0484fSRodney W. Grimes 	assert(fp != NULL);
128358f0484fSRodney W. Grimes 	(void) memmove(fp, fp + len + 1,
128458f0484fSRodney W. Grimes 				cs->smultis - (fp + len + 1 - cs->multis));
128558f0484fSRodney W. Grimes 	cs->smultis -= len;
128658f0484fSRodney W. Grimes 
128758f0484fSRodney W. Grimes 	if (cs->smultis == 0) {
128858f0484fSRodney W. Grimes 		free(cs->multis);
128958f0484fSRodney W. Grimes 		cs->multis = NULL;
129058f0484fSRodney W. Grimes 		return;
129158f0484fSRodney W. Grimes 	}
129258f0484fSRodney W. Grimes 
129358f0484fSRodney W. Grimes 	cs->multis = realloc(cs->multis, cs->smultis);
129458f0484fSRodney W. Grimes 	assert(cs->multis != NULL);
129558f0484fSRodney W. Grimes }
129658f0484fSRodney W. Grimes 
129758f0484fSRodney W. Grimes /*
129858f0484fSRodney W. Grimes  - mcin - is a collating element in a cset?
129958f0484fSRodney W. Grimes  == static int mcin(register cset *cs, register char *cp);
130058f0484fSRodney W. Grimes  */
130158f0484fSRodney W. Grimes static int
130258f0484fSRodney W. Grimes mcin(cs, cp)
130358f0484fSRodney W. Grimes register cset *cs;
130458f0484fSRodney W. Grimes register char *cp;
130558f0484fSRodney W. Grimes {
130658f0484fSRodney W. Grimes 	return(mcfind(cs, cp) != NULL);
130758f0484fSRodney W. Grimes }
130858f0484fSRodney W. Grimes 
130958f0484fSRodney W. Grimes /*
131058f0484fSRodney W. Grimes  - mcfind - find a collating element in a cset
131158f0484fSRodney W. Grimes  == static char *mcfind(register cset *cs, register char *cp);
131258f0484fSRodney W. Grimes  */
131358f0484fSRodney W. Grimes static char *
131458f0484fSRodney W. Grimes mcfind(cs, cp)
131558f0484fSRodney W. Grimes register cset *cs;
131658f0484fSRodney W. Grimes register char *cp;
131758f0484fSRodney W. Grimes {
131858f0484fSRodney W. Grimes 	register char *p;
131958f0484fSRodney W. Grimes 
132058f0484fSRodney W. Grimes 	if (cs->multis == NULL)
132158f0484fSRodney W. Grimes 		return(NULL);
132258f0484fSRodney W. Grimes 	for (p = cs->multis; *p != '\0'; p += strlen(p) + 1)
132358f0484fSRodney W. Grimes 		if (strcmp(cp, p) == 0)
132458f0484fSRodney W. Grimes 			return(p);
132558f0484fSRodney W. Grimes 	return(NULL);
132658f0484fSRodney W. Grimes }
132751295a4dSJordan K. Hubbard #endif
132858f0484fSRodney W. Grimes 
132958f0484fSRodney W. Grimes /*
133058f0484fSRodney W. Grimes  - mcinvert - invert the list of collating elements in a cset
133158f0484fSRodney W. Grimes  == static void mcinvert(register struct parse *p, register cset *cs);
133258f0484fSRodney W. Grimes  *
133358f0484fSRodney W. Grimes  * This would have to know the set of possibilities.  Implementation
133458f0484fSRodney W. Grimes  * is deferred.
133558f0484fSRodney W. Grimes  */
133658f0484fSRodney W. Grimes static void
133758f0484fSRodney W. Grimes mcinvert(p, cs)
133858f0484fSRodney W. Grimes register struct parse *p;
133958f0484fSRodney W. Grimes register cset *cs;
134058f0484fSRodney W. Grimes {
134158f0484fSRodney W. Grimes 	assert(cs->multis == NULL);	/* xxx */
134258f0484fSRodney W. Grimes }
134358f0484fSRodney W. Grimes 
134458f0484fSRodney W. Grimes /*
134558f0484fSRodney W. Grimes  - mccase - add case counterparts of the list of collating elements in a cset
134658f0484fSRodney W. Grimes  == static void mccase(register struct parse *p, register cset *cs);
134758f0484fSRodney W. Grimes  *
134858f0484fSRodney W. Grimes  * This would have to know the set of possibilities.  Implementation
134958f0484fSRodney W. Grimes  * is deferred.
135058f0484fSRodney W. Grimes  */
135158f0484fSRodney W. Grimes static void
135258f0484fSRodney W. Grimes mccase(p, cs)
135358f0484fSRodney W. Grimes register struct parse *p;
135458f0484fSRodney W. Grimes register cset *cs;
135558f0484fSRodney W. Grimes {
135658f0484fSRodney W. Grimes 	assert(cs->multis == NULL);	/* xxx */
135758f0484fSRodney W. Grimes }
135858f0484fSRodney W. Grimes 
135958f0484fSRodney W. Grimes /*
136058f0484fSRodney W. Grimes  - isinsets - is this character in any sets?
136158f0484fSRodney W. Grimes  == static int isinsets(register struct re_guts *g, int c);
136258f0484fSRodney W. Grimes  */
136358f0484fSRodney W. Grimes static int			/* predicate */
136458f0484fSRodney W. Grimes isinsets(g, c)
136558f0484fSRodney W. Grimes register struct re_guts *g;
136658f0484fSRodney W. Grimes int c;
136758f0484fSRodney W. Grimes {
136858f0484fSRodney W. Grimes 	register uch *col;
136958f0484fSRodney W. Grimes 	register int i;
137058f0484fSRodney W. Grimes 	register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
137158f0484fSRodney W. Grimes 	register unsigned uc = (unsigned char)c;
137258f0484fSRodney W. Grimes 
137358f0484fSRodney W. Grimes 	for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
137458f0484fSRodney W. Grimes 		if (col[uc] != 0)
137558f0484fSRodney W. Grimes 			return(1);
137658f0484fSRodney W. Grimes 	return(0);
137758f0484fSRodney W. Grimes }
137858f0484fSRodney W. Grimes 
137958f0484fSRodney W. Grimes /*
138058f0484fSRodney W. Grimes  - samesets - are these two characters in exactly the same sets?
138158f0484fSRodney W. Grimes  == static int samesets(register struct re_guts *g, int c1, int c2);
138258f0484fSRodney W. Grimes  */
138358f0484fSRodney W. Grimes static int			/* predicate */
138458f0484fSRodney W. Grimes samesets(g, c1, c2)
138558f0484fSRodney W. Grimes register struct re_guts *g;
138658f0484fSRodney W. Grimes int c1;
138758f0484fSRodney W. Grimes int c2;
138858f0484fSRodney W. Grimes {
138958f0484fSRodney W. Grimes 	register uch *col;
139058f0484fSRodney W. Grimes 	register int i;
139158f0484fSRodney W. Grimes 	register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
139258f0484fSRodney W. Grimes 	register unsigned uc1 = (unsigned char)c1;
139358f0484fSRodney W. Grimes 	register unsigned uc2 = (unsigned char)c2;
139458f0484fSRodney W. Grimes 
139558f0484fSRodney W. Grimes 	for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
139658f0484fSRodney W. Grimes 		if (col[uc1] != col[uc2])
139758f0484fSRodney W. Grimes 			return(0);
139858f0484fSRodney W. Grimes 	return(1);
139958f0484fSRodney W. Grimes }
140058f0484fSRodney W. Grimes 
140158f0484fSRodney W. Grimes /*
140258f0484fSRodney W. Grimes  - categorize - sort out character categories
140358f0484fSRodney W. Grimes  == static void categorize(struct parse *p, register struct re_guts *g);
140458f0484fSRodney W. Grimes  */
140558f0484fSRodney W. Grimes static void
140658f0484fSRodney W. Grimes categorize(p, g)
140758f0484fSRodney W. Grimes struct parse *p;
140858f0484fSRodney W. Grimes register struct re_guts *g;
140958f0484fSRodney W. Grimes {
141058f0484fSRodney W. Grimes 	register cat_t *cats = g->categories;
141158f0484fSRodney W. Grimes 	register int c;
141258f0484fSRodney W. Grimes 	register int c2;
141358f0484fSRodney W. Grimes 	register cat_t cat;
141458f0484fSRodney W. Grimes 
141558f0484fSRodney W. Grimes 	/* avoid making error situations worse */
141658f0484fSRodney W. Grimes 	if (p->error != 0)
141758f0484fSRodney W. Grimes 		return;
141858f0484fSRodney W. Grimes 
141958f0484fSRodney W. Grimes 	for (c = CHAR_MIN; c <= CHAR_MAX; c++)
142058f0484fSRodney W. Grimes 		if (cats[c] == 0 && isinsets(g, c)) {
142158f0484fSRodney W. Grimes 			cat = g->ncategories++;
142258f0484fSRodney W. Grimes 			cats[c] = cat;
142358f0484fSRodney W. Grimes 			for (c2 = c+1; c2 <= CHAR_MAX; c2++)
142458f0484fSRodney W. Grimes 				if (cats[c2] == 0 && samesets(g, c, c2))
142558f0484fSRodney W. Grimes 					cats[c2] = cat;
142658f0484fSRodney W. Grimes 		}
142758f0484fSRodney W. Grimes }
142858f0484fSRodney W. Grimes 
142958f0484fSRodney W. Grimes /*
143058f0484fSRodney W. Grimes  - dupl - emit a duplicate of a bunch of sops
143158f0484fSRodney W. Grimes  == static sopno dupl(register struct parse *p, sopno start, sopno finish);
143258f0484fSRodney W. Grimes  */
143358f0484fSRodney W. Grimes static sopno			/* start of duplicate */
143458f0484fSRodney W. Grimes dupl(p, start, finish)
143558f0484fSRodney W. Grimes register struct parse *p;
143658f0484fSRodney W. Grimes sopno start;			/* from here */
143758f0484fSRodney W. Grimes sopno finish;			/* to this less one */
143858f0484fSRodney W. Grimes {
143958f0484fSRodney W. Grimes 	register sopno ret = HERE();
144058f0484fSRodney W. Grimes 	register sopno len = finish - start;
144158f0484fSRodney W. Grimes 
144258f0484fSRodney W. Grimes 	assert(finish >= start);
144358f0484fSRodney W. Grimes 	if (len == 0)
144458f0484fSRodney W. Grimes 		return(ret);
144558f0484fSRodney W. Grimes 	enlarge(p, p->ssize + len);	/* this many unexpected additions */
144658f0484fSRodney W. Grimes 	assert(p->ssize >= p->slen + len);
144758f0484fSRodney W. Grimes 	(void) memcpy((char *)(p->strip + p->slen),
144858f0484fSRodney W. Grimes 		(char *)(p->strip + start), (size_t)len*sizeof(sop));
144958f0484fSRodney W. Grimes 	p->slen += len;
145058f0484fSRodney W. Grimes 	return(ret);
145158f0484fSRodney W. Grimes }
145258f0484fSRodney W. Grimes 
145358f0484fSRodney W. Grimes /*
145458f0484fSRodney W. Grimes  - doemit - emit a strip operator
145558f0484fSRodney W. Grimes  == static void doemit(register struct parse *p, sop op, size_t opnd);
145658f0484fSRodney W. Grimes  *
145758f0484fSRodney W. Grimes  * It might seem better to implement this as a macro with a function as
145858f0484fSRodney W. Grimes  * hard-case backup, but it's just too big and messy unless there are
145958f0484fSRodney W. Grimes  * some changes to the data structures.  Maybe later.
146058f0484fSRodney W. Grimes  */
146158f0484fSRodney W. Grimes static void
146258f0484fSRodney W. Grimes doemit(p, op, opnd)
146358f0484fSRodney W. Grimes register struct parse *p;
146458f0484fSRodney W. Grimes sop op;
146558f0484fSRodney W. Grimes size_t opnd;
146658f0484fSRodney W. Grimes {
146758f0484fSRodney W. Grimes 	/* avoid making error situations worse */
146858f0484fSRodney W. Grimes 	if (p->error != 0)
146958f0484fSRodney W. Grimes 		return;
147058f0484fSRodney W. Grimes 
147158f0484fSRodney W. Grimes 	/* deal with oversize operands ("can't happen", more or less) */
147258f0484fSRodney W. Grimes 	assert(opnd < 1<<OPSHIFT);
147358f0484fSRodney W. Grimes 
147458f0484fSRodney W. Grimes 	/* deal with undersized strip */
147558f0484fSRodney W. Grimes 	if (p->slen >= p->ssize)
147658f0484fSRodney W. Grimes 		enlarge(p, (p->ssize+1) / 2 * 3);	/* +50% */
147758f0484fSRodney W. Grimes 	assert(p->slen < p->ssize);
147858f0484fSRodney W. Grimes 
147958f0484fSRodney W. Grimes 	/* finally, it's all reduced to the easy case */
148058f0484fSRodney W. Grimes 	p->strip[p->slen++] = SOP(op, opnd);
148158f0484fSRodney W. Grimes }
148258f0484fSRodney W. Grimes 
148358f0484fSRodney W. Grimes /*
148458f0484fSRodney W. Grimes  - doinsert - insert a sop into the strip
148558f0484fSRodney W. Grimes  == static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos);
148658f0484fSRodney W. Grimes  */
148758f0484fSRodney W. Grimes static void
148858f0484fSRodney W. Grimes doinsert(p, op, opnd, pos)
148958f0484fSRodney W. Grimes register struct parse *p;
149058f0484fSRodney W. Grimes sop op;
149158f0484fSRodney W. Grimes size_t opnd;
149258f0484fSRodney W. Grimes sopno pos;
149358f0484fSRodney W. Grimes {
149458f0484fSRodney W. Grimes 	register sopno sn;
149558f0484fSRodney W. Grimes 	register sop s;
149658f0484fSRodney W. Grimes 	register int i;
149758f0484fSRodney W. Grimes 
149858f0484fSRodney W. Grimes 	/* avoid making error situations worse */
149958f0484fSRodney W. Grimes 	if (p->error != 0)
150058f0484fSRodney W. Grimes 		return;
150158f0484fSRodney W. Grimes 
150258f0484fSRodney W. Grimes 	sn = HERE();
150358f0484fSRodney W. Grimes 	EMIT(op, opnd);		/* do checks, ensure space */
150458f0484fSRodney W. Grimes 	assert(HERE() == sn+1);
150558f0484fSRodney W. Grimes 	s = p->strip[sn];
150658f0484fSRodney W. Grimes 
150758f0484fSRodney W. Grimes 	/* adjust paren pointers */
150858f0484fSRodney W. Grimes 	assert(pos > 0);
150958f0484fSRodney W. Grimes 	for (i = 1; i < NPAREN; i++) {
151058f0484fSRodney W. Grimes 		if (p->pbegin[i] >= pos) {
151158f0484fSRodney W. Grimes 			p->pbegin[i]++;
151258f0484fSRodney W. Grimes 		}
151358f0484fSRodney W. Grimes 		if (p->pend[i] >= pos) {
151458f0484fSRodney W. Grimes 			p->pend[i]++;
151558f0484fSRodney W. Grimes 		}
151658f0484fSRodney W. Grimes 	}
151758f0484fSRodney W. Grimes 
151858f0484fSRodney W. Grimes 	memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
151958f0484fSRodney W. Grimes 						(HERE()-pos-1)*sizeof(sop));
152058f0484fSRodney W. Grimes 	p->strip[pos] = s;
152158f0484fSRodney W. Grimes }
152258f0484fSRodney W. Grimes 
152358f0484fSRodney W. Grimes /*
152458f0484fSRodney W. Grimes  - dofwd - complete a forward reference
152558f0484fSRodney W. Grimes  == static void dofwd(register struct parse *p, sopno pos, sop value);
152658f0484fSRodney W. Grimes  */
152758f0484fSRodney W. Grimes static void
152858f0484fSRodney W. Grimes dofwd(p, pos, value)
152958f0484fSRodney W. Grimes register struct parse *p;
153058f0484fSRodney W. Grimes register sopno pos;
153158f0484fSRodney W. Grimes sop value;
153258f0484fSRodney W. Grimes {
153358f0484fSRodney W. Grimes 	/* avoid making error situations worse */
153458f0484fSRodney W. Grimes 	if (p->error != 0)
153558f0484fSRodney W. Grimes 		return;
153658f0484fSRodney W. Grimes 
153758f0484fSRodney W. Grimes 	assert(value < 1<<OPSHIFT);
153858f0484fSRodney W. Grimes 	p->strip[pos] = OP(p->strip[pos]) | value;
153958f0484fSRodney W. Grimes }
154058f0484fSRodney W. Grimes 
154158f0484fSRodney W. Grimes /*
154258f0484fSRodney W. Grimes  - enlarge - enlarge the strip
154358f0484fSRodney W. Grimes  == static void enlarge(register struct parse *p, sopno size);
154458f0484fSRodney W. Grimes  */
154558f0484fSRodney W. Grimes static void
154658f0484fSRodney W. Grimes enlarge(p, size)
154758f0484fSRodney W. Grimes register struct parse *p;
154858f0484fSRodney W. Grimes register sopno size;
154958f0484fSRodney W. Grimes {
155058f0484fSRodney W. Grimes 	register sop *sp;
155158f0484fSRodney W. Grimes 
155258f0484fSRodney W. Grimes 	if (p->ssize >= size)
155358f0484fSRodney W. Grimes 		return;
155458f0484fSRodney W. Grimes 
155558f0484fSRodney W. Grimes 	sp = (sop *)realloc(p->strip, size*sizeof(sop));
155658f0484fSRodney W. Grimes 	if (sp == NULL) {
155758f0484fSRodney W. Grimes 		SETERROR(REG_ESPACE);
155858f0484fSRodney W. Grimes 		return;
155958f0484fSRodney W. Grimes 	}
156058f0484fSRodney W. Grimes 	p->strip = sp;
156158f0484fSRodney W. Grimes 	p->ssize = size;
156258f0484fSRodney W. Grimes }
156358f0484fSRodney W. Grimes 
156458f0484fSRodney W. Grimes /*
156558f0484fSRodney W. Grimes  - stripsnug - compact the strip
156658f0484fSRodney W. Grimes  == static void stripsnug(register struct parse *p, register struct re_guts *g);
156758f0484fSRodney W. Grimes  */
156858f0484fSRodney W. Grimes static void
156958f0484fSRodney W. Grimes stripsnug(p, g)
157058f0484fSRodney W. Grimes register struct parse *p;
157158f0484fSRodney W. Grimes register struct re_guts *g;
157258f0484fSRodney W. Grimes {
157358f0484fSRodney W. Grimes 	g->nstates = p->slen;
157458f0484fSRodney W. Grimes 	g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop));
157558f0484fSRodney W. Grimes 	if (g->strip == NULL) {
157658f0484fSRodney W. Grimes 		SETERROR(REG_ESPACE);
157758f0484fSRodney W. Grimes 		g->strip = p->strip;
157858f0484fSRodney W. Grimes 	}
157958f0484fSRodney W. Grimes }
158058f0484fSRodney W. Grimes 
158158f0484fSRodney W. Grimes /*
158258f0484fSRodney W. Grimes  - findmust - fill in must and mlen with longest mandatory literal string
158358f0484fSRodney W. Grimes  == static void findmust(register struct parse *p, register struct re_guts *g);
158458f0484fSRodney W. Grimes  *
158558f0484fSRodney W. Grimes  * This algorithm could do fancy things like analyzing the operands of |
158658f0484fSRodney W. Grimes  * for common subsequences.  Someday.  This code is simple and finds most
158758f0484fSRodney W. Grimes  * of the interesting cases.
158858f0484fSRodney W. Grimes  *
158958f0484fSRodney W. Grimes  * Note that must and mlen got initialized during setup.
159058f0484fSRodney W. Grimes  */
159158f0484fSRodney W. Grimes static void
159258f0484fSRodney W. Grimes findmust(p, g)
159358f0484fSRodney W. Grimes struct parse *p;
159458f0484fSRodney W. Grimes register struct re_guts *g;
159558f0484fSRodney W. Grimes {
159658f0484fSRodney W. Grimes 	register sop *scan;
159758f0484fSRodney W. Grimes 	sop *start;
159858f0484fSRodney W. Grimes 	register sop *newstart;
159958f0484fSRodney W. Grimes 	register sopno newlen;
160058f0484fSRodney W. Grimes 	register sop s;
160158f0484fSRodney W. Grimes 	register char *cp;
160258f0484fSRodney W. Grimes 	register sopno i;
160358f0484fSRodney W. Grimes 
160458f0484fSRodney W. Grimes 	/* avoid making error situations worse */
160558f0484fSRodney W. Grimes 	if (p->error != 0)
160658f0484fSRodney W. Grimes 		return;
160758f0484fSRodney W. Grimes 
160858f0484fSRodney W. Grimes 	/* find the longest OCHAR sequence in strip */
160958f0484fSRodney W. Grimes 	newlen = 0;
161058f0484fSRodney W. Grimes 	scan = g->strip + 1;
161158f0484fSRodney W. Grimes 	do {
161258f0484fSRodney W. Grimes 		s = *scan++;
161358f0484fSRodney W. Grimes 		switch (OP(s)) {
161458f0484fSRodney W. Grimes 		case OCHAR:		/* sequence member */
161558f0484fSRodney W. Grimes 			if (newlen == 0)		/* new sequence */
161658f0484fSRodney W. Grimes 				newstart = scan - 1;
161758f0484fSRodney W. Grimes 			newlen++;
161858f0484fSRodney W. Grimes 			break;
161958f0484fSRodney W. Grimes 		case OPLUS_:		/* things that don't break one */
162058f0484fSRodney W. Grimes 		case OLPAREN:
162158f0484fSRodney W. Grimes 		case ORPAREN:
162258f0484fSRodney W. Grimes 			break;
162358f0484fSRodney W. Grimes 		case OQUEST_:		/* things that must be skipped */
162458f0484fSRodney W. Grimes 		case OCH_:
162558f0484fSRodney W. Grimes 			scan--;
162658f0484fSRodney W. Grimes 			do {
162758f0484fSRodney W. Grimes 				scan += OPND(s);
162858f0484fSRodney W. Grimes 				s = *scan;
162958f0484fSRodney W. Grimes 				/* assert() interferes w debug printouts */
163058f0484fSRodney W. Grimes 				if (OP(s) != O_QUEST && OP(s) != O_CH &&
163158f0484fSRodney W. Grimes 							OP(s) != OOR2) {
163258f0484fSRodney W. Grimes 					g->iflags |= BAD;
163358f0484fSRodney W. Grimes 					return;
163458f0484fSRodney W. Grimes 				}
163558f0484fSRodney W. Grimes 			} while (OP(s) != O_QUEST && OP(s) != O_CH);
163658f0484fSRodney W. Grimes 			/* fallthrough */
163758f0484fSRodney W. Grimes 		default:		/* things that break a sequence */
163858f0484fSRodney W. Grimes 			if (newlen > g->mlen) {		/* ends one */
163958f0484fSRodney W. Grimes 				start = newstart;
164058f0484fSRodney W. Grimes 				g->mlen = newlen;
164158f0484fSRodney W. Grimes 			}
164258f0484fSRodney W. Grimes 			newlen = 0;
164358f0484fSRodney W. Grimes 			break;
164458f0484fSRodney W. Grimes 		}
164558f0484fSRodney W. Grimes 	} while (OP(s) != OEND);
164658f0484fSRodney W. Grimes 
164758f0484fSRodney W. Grimes 	if (g->mlen == 0)		/* there isn't one */
164858f0484fSRodney W. Grimes 		return;
164958f0484fSRodney W. Grimes 
165058f0484fSRodney W. Grimes 	/* turn it into a character string */
165158f0484fSRodney W. Grimes 	g->must = malloc((size_t)g->mlen + 1);
165258f0484fSRodney W. Grimes 	if (g->must == NULL) {		/* argh; just forget it */
165358f0484fSRodney W. Grimes 		g->mlen = 0;
165458f0484fSRodney W. Grimes 		return;
165558f0484fSRodney W. Grimes 	}
165658f0484fSRodney W. Grimes 	cp = g->must;
165758f0484fSRodney W. Grimes 	scan = start;
165858f0484fSRodney W. Grimes 	for (i = g->mlen; i > 0; i--) {
165958f0484fSRodney W. Grimes 		while (OP(s = *scan++) != OCHAR)
166058f0484fSRodney W. Grimes 			continue;
166158f0484fSRodney W. Grimes 		assert(cp < g->must + g->mlen);
166258f0484fSRodney W. Grimes 		*cp++ = (char)OPND(s);
166358f0484fSRodney W. Grimes 	}
166458f0484fSRodney W. Grimes 	assert(cp == g->must + g->mlen);
166558f0484fSRodney W. Grimes 	*cp++ = '\0';		/* just on general principles */
166658f0484fSRodney W. Grimes }
166758f0484fSRodney W. Grimes 
166858f0484fSRodney W. Grimes /*
166958f0484fSRodney W. Grimes  - pluscount - count + nesting
167058f0484fSRodney W. Grimes  == static sopno pluscount(register struct parse *p, register struct re_guts *g);
167158f0484fSRodney W. Grimes  */
167258f0484fSRodney W. Grimes static sopno			/* nesting depth */
167358f0484fSRodney W. Grimes pluscount(p, g)
167458f0484fSRodney W. Grimes struct parse *p;
167558f0484fSRodney W. Grimes register struct re_guts *g;
167658f0484fSRodney W. Grimes {
167758f0484fSRodney W. Grimes 	register sop *scan;
167858f0484fSRodney W. Grimes 	register sop s;
167958f0484fSRodney W. Grimes 	register sopno plusnest = 0;
168058f0484fSRodney W. Grimes 	register sopno maxnest = 0;
168158f0484fSRodney W. Grimes 
168258f0484fSRodney W. Grimes 	if (p->error != 0)
168358f0484fSRodney W. Grimes 		return(0);	/* there may not be an OEND */
168458f0484fSRodney W. Grimes 
168558f0484fSRodney W. Grimes 	scan = g->strip + 1;
168658f0484fSRodney W. Grimes 	do {
168758f0484fSRodney W. Grimes 		s = *scan++;
168858f0484fSRodney W. Grimes 		switch (OP(s)) {
168958f0484fSRodney W. Grimes 		case OPLUS_:
169058f0484fSRodney W. Grimes 			plusnest++;
169158f0484fSRodney W. Grimes 			break;
169258f0484fSRodney W. Grimes 		case O_PLUS:
169358f0484fSRodney W. Grimes 			if (plusnest > maxnest)
169458f0484fSRodney W. Grimes 				maxnest = plusnest;
169558f0484fSRodney W. Grimes 			plusnest--;
169658f0484fSRodney W. Grimes 			break;
169758f0484fSRodney W. Grimes 		}
169858f0484fSRodney W. Grimes 	} while (OP(s) != OEND);
169958f0484fSRodney W. Grimes 	if (plusnest != 0)
170058f0484fSRodney W. Grimes 		g->iflags |= BAD;
170158f0484fSRodney W. Grimes 	return(maxnest);
170258f0484fSRodney W. Grimes }
1703