xref: /titanic_41/usr/src/lib/libc/port/locale/regcomp.c (revision 62c3776affc53153ce74e14a6e7ce91950c28102)
14297a3b0SGarrett D'Amore /*
2*62c3776aSGarrett D'Amore  * Copyright 2013 Garrett D'Amore <garrett@damore.org>
36b5e5868SGarrett D'Amore  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
433f5ff17SMilan Jurik  * Copyright 2012 Milan Jurik. All rights reserved.
54297a3b0SGarrett D'Amore  * Copyright (c) 1992, 1993, 1994 Henry Spencer.
64297a3b0SGarrett D'Amore  * Copyright (c) 1992, 1993, 1994
74297a3b0SGarrett D'Amore  *	The Regents of the University of California.  All rights reserved.
84297a3b0SGarrett D'Amore  *
94297a3b0SGarrett D'Amore  * This code is derived from software contributed to Berkeley by
104297a3b0SGarrett D'Amore  * Henry Spencer.
114297a3b0SGarrett D'Amore  *
124297a3b0SGarrett D'Amore  * Redistribution and use in source and binary forms, with or without
134297a3b0SGarrett D'Amore  * modification, are permitted provided that the following conditions
144297a3b0SGarrett D'Amore  * are met:
154297a3b0SGarrett D'Amore  * 1. Redistributions of source code must retain the above copyright
164297a3b0SGarrett D'Amore  *    notice, this list of conditions and the following disclaimer.
174297a3b0SGarrett D'Amore  * 2. Redistributions in binary form must reproduce the above copyright
184297a3b0SGarrett D'Amore  *    notice, this list of conditions and the following disclaimer in the
194297a3b0SGarrett D'Amore  *    documentation and/or other materials provided with the distribution.
204297a3b0SGarrett D'Amore  * 4. Neither the name of the University nor the names of its contributors
214297a3b0SGarrett D'Amore  *    may be used to endorse or promote products derived from this software
224297a3b0SGarrett D'Amore  *    without specific prior written permission.
234297a3b0SGarrett D'Amore  *
244297a3b0SGarrett D'Amore  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
254297a3b0SGarrett D'Amore  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
264297a3b0SGarrett D'Amore  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
274297a3b0SGarrett D'Amore  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
284297a3b0SGarrett D'Amore  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
294297a3b0SGarrett D'Amore  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
304297a3b0SGarrett D'Amore  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
314297a3b0SGarrett D'Amore  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
324297a3b0SGarrett D'Amore  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
334297a3b0SGarrett D'Amore  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
344297a3b0SGarrett D'Amore  * SUCH DAMAGE.
354297a3b0SGarrett D'Amore  */
364297a3b0SGarrett D'Amore 
374297a3b0SGarrett D'Amore #include "lint.h"
384297a3b0SGarrett D'Amore #include "file64.h"
394297a3b0SGarrett D'Amore #include <sys/types.h>
404297a3b0SGarrett D'Amore #include <stdio.h>
414297a3b0SGarrett D'Amore #include <string.h>
424297a3b0SGarrett D'Amore #include <ctype.h>
434297a3b0SGarrett D'Amore #include <limits.h>
444297a3b0SGarrett D'Amore #include <stdlib.h>
454297a3b0SGarrett D'Amore #include <regex.h>
464297a3b0SGarrett D'Amore #include <wchar.h>
474297a3b0SGarrett D'Amore #include <wctype.h>
484297a3b0SGarrett D'Amore 
494297a3b0SGarrett D'Amore #include "runetype.h"
504297a3b0SGarrett D'Amore #include "collate.h"
514297a3b0SGarrett D'Amore 
524297a3b0SGarrett D'Amore #include "utils.h"
534297a3b0SGarrett D'Amore #include "regex2.h"
544297a3b0SGarrett D'Amore 
554297a3b0SGarrett D'Amore #include "cname.h"
564297a3b0SGarrett D'Amore #include "mblocal.h"
574297a3b0SGarrett D'Amore 
584297a3b0SGarrett D'Amore /*
594297a3b0SGarrett D'Amore  * parse structure, passed up and down to avoid global variables and
604297a3b0SGarrett D'Amore  * other clumsinesses
614297a3b0SGarrett D'Amore  */
624297a3b0SGarrett D'Amore struct parse {
634297a3b0SGarrett D'Amore 	char *next;		/* next character in RE */
644297a3b0SGarrett D'Amore 	char *end;		/* end of string (-> NUL normally) */
654297a3b0SGarrett D'Amore 	int error;		/* has an error been seen? */
664297a3b0SGarrett D'Amore 	sop *strip;		/* malloced strip */
674297a3b0SGarrett D'Amore 	sopno ssize;		/* malloced strip size (allocated) */
684297a3b0SGarrett D'Amore 	sopno slen;		/* malloced strip length (used) */
694297a3b0SGarrett D'Amore 	int ncsalloc;		/* number of csets allocated */
704297a3b0SGarrett D'Amore 	struct re_guts *g;
714297a3b0SGarrett D'Amore #define	NPAREN	10		/* we need to remember () 1-9 for back refs */
724297a3b0SGarrett D'Amore 	sopno pbegin[NPAREN];	/* -> ( ([0] unused) */
734297a3b0SGarrett D'Amore 	sopno pend[NPAREN];	/* -> ) ([0] unused) */
744297a3b0SGarrett D'Amore };
754297a3b0SGarrett D'Amore 
764297a3b0SGarrett D'Amore /* ========= begin header generated by ./mkh ========= */
774297a3b0SGarrett D'Amore #ifdef __cplusplus
784297a3b0SGarrett D'Amore extern "C" {
794297a3b0SGarrett D'Amore #endif
804297a3b0SGarrett D'Amore 
814297a3b0SGarrett D'Amore /* === regcomp.c === */
824297a3b0SGarrett D'Amore static void p_ere(struct parse *p, wint_t stop);
834297a3b0SGarrett D'Amore static void p_ere_exp(struct parse *p);
844297a3b0SGarrett D'Amore static void p_str(struct parse *p);
854297a3b0SGarrett D'Amore static void p_bre(struct parse *p, wint_t end1, wint_t end2);
864297a3b0SGarrett D'Amore static int p_simp_re(struct parse *p, int starordinary);
874297a3b0SGarrett D'Amore static int p_count(struct parse *p);
884297a3b0SGarrett D'Amore static void p_bracket(struct parse *p);
894297a3b0SGarrett D'Amore static void p_b_term(struct parse *p, cset *cs);
904297a3b0SGarrett D'Amore static void p_b_cclass(struct parse *p, cset *cs);
914297a3b0SGarrett D'Amore static void p_b_eclass(struct parse *p, cset *cs);
924297a3b0SGarrett D'Amore static wint_t p_b_symbol(struct parse *p);
934297a3b0SGarrett D'Amore static wint_t p_b_coll_elem(struct parse *p, wint_t endc);
944297a3b0SGarrett D'Amore static wint_t othercase(wint_t ch);
954297a3b0SGarrett D'Amore static void bothcases(struct parse *p, wint_t ch);
964297a3b0SGarrett D'Amore static void ordinary(struct parse *p, wint_t ch);
974297a3b0SGarrett D'Amore static void nonnewline(struct parse *p);
984297a3b0SGarrett D'Amore static void repeat(struct parse *p, sopno start, int from, int to);
994297a3b0SGarrett D'Amore static int seterr(struct parse *p, int e);
1004297a3b0SGarrett D'Amore static cset *allocset(struct parse *p);
1014297a3b0SGarrett D'Amore static void freeset(struct parse *p, cset *cs);
1024297a3b0SGarrett D'Amore static void CHadd(struct parse *p, cset *cs, wint_t ch);
1034297a3b0SGarrett D'Amore static void CHaddrange(struct parse *p, cset *cs, wint_t min, wint_t max);
1044297a3b0SGarrett D'Amore static void CHaddtype(struct parse *p, cset *cs, wctype_t wct);
1054297a3b0SGarrett D'Amore static wint_t singleton(cset *cs);
1064297a3b0SGarrett D'Amore static sopno dupl(struct parse *p, sopno start, sopno finish);
1074297a3b0SGarrett D'Amore static void doemit(struct parse *p, sop op, size_t opnd);
1084297a3b0SGarrett D'Amore static void doinsert(struct parse *p, sop op, size_t opnd, sopno pos);
1094297a3b0SGarrett D'Amore static void dofwd(struct parse *p, sopno pos, sop value);
1104297a3b0SGarrett D'Amore static void enlarge(struct parse *p, sopno size);
1114297a3b0SGarrett D'Amore static void stripsnug(struct parse *p, struct re_guts *g);
1124297a3b0SGarrett D'Amore static void findmust(struct parse *p, struct re_guts *g);
1134297a3b0SGarrett D'Amore static int altoffset(sop *scan, int offset);
1144297a3b0SGarrett D'Amore static void computejumps(struct parse *p, struct re_guts *g);
1154297a3b0SGarrett D'Amore static void computematchjumps(struct parse *p, struct re_guts *g);
1164297a3b0SGarrett D'Amore static sopno pluscount(struct parse *p, struct re_guts *g);
1174297a3b0SGarrett D'Amore static wint_t wgetnext(struct parse *p);
1184297a3b0SGarrett D'Amore 
1194297a3b0SGarrett D'Amore #ifdef __cplusplus
1204297a3b0SGarrett D'Amore }
1214297a3b0SGarrett D'Amore #endif
1224297a3b0SGarrett D'Amore /* ========= end header generated by ./mkh ========= */
1234297a3b0SGarrett D'Amore 
1244297a3b0SGarrett D'Amore static char nuls[10];		/* place to point scanner in event of error */
1254297a3b0SGarrett D'Amore 
1264297a3b0SGarrett D'Amore /*
1274297a3b0SGarrett D'Amore  * macros for use with parse structure
1284297a3b0SGarrett D'Amore  * BEWARE:  these know that the parse structure is named `p' !!!
1294297a3b0SGarrett D'Amore  */
1304297a3b0SGarrett D'Amore #define	PEEK()	(*p->next)
1314297a3b0SGarrett D'Amore #define	PEEK2()	(*(p->next+1))
1324297a3b0SGarrett D'Amore #define	MORE()	(p->next < p->end)
1334297a3b0SGarrett D'Amore #define	MORE2()	(p->next+1 < p->end)
1344297a3b0SGarrett D'Amore #define	SEE(c)	(MORE() && PEEK() == (c))
1354297a3b0SGarrett D'Amore #define	SEETWO(a, b)	(MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
1364297a3b0SGarrett D'Amore #define	EAT(c)	((SEE(c)) ? (NEXT(), 1) : 0)
1374297a3b0SGarrett D'Amore #define	EATTWO(a, b)	((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
1384297a3b0SGarrett D'Amore #define	NEXT()	(p->next++)
1394297a3b0SGarrett D'Amore #define	NEXT2()	(p->next += 2)
1404297a3b0SGarrett D'Amore #define	NEXTn(n)	(p->next += (n))
1414297a3b0SGarrett D'Amore #define	GETNEXT()	(*p->next++)
1424297a3b0SGarrett D'Amore #define	WGETNEXT()	wgetnext(p)
1434297a3b0SGarrett D'Amore #define	SETERROR(e)	((void)seterr(p, (e)))
1444297a3b0SGarrett D'Amore #define	REQUIRE(co, e)	((co) || seterr(p, e))
1454297a3b0SGarrett D'Amore #define	MUSTSEE(c, e)	(REQUIRE(MORE() && PEEK() == (c), e))
1464297a3b0SGarrett D'Amore #define	MUSTEAT(c, e)	(REQUIRE(MORE() && GETNEXT() == (c), e))
1474297a3b0SGarrett D'Amore #define	MUSTNOTSEE(c, e)	(REQUIRE(!MORE() || PEEK() != (c), e))
1484297a3b0SGarrett D'Amore #define	EMIT(op, sopnd)	doemit(p, (sop)(op), (size_t)(sopnd))
1494297a3b0SGarrett D'Amore #define	INSERT(op, pos)	doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
1504297a3b0SGarrett D'Amore #define	AHEAD(pos)		dofwd(p, pos, HERE()-(pos))
1514297a3b0SGarrett D'Amore #define	ASTERN(sop, pos)	EMIT(sop, HERE()-pos)
1524297a3b0SGarrett D'Amore #define	HERE()		(p->slen)
1534297a3b0SGarrett D'Amore #define	THERE()		(p->slen - 1)
1544297a3b0SGarrett D'Amore #define	THERETHERE()	(p->slen - 2)
1554297a3b0SGarrett D'Amore #define	DROP(n)	(p->slen -= (n))
1564297a3b0SGarrett D'Amore 
1574297a3b0SGarrett D'Amore #ifndef NDEBUG
1584297a3b0SGarrett D'Amore static int never = 0;		/* for use in asserts; shuts lint up */
1594297a3b0SGarrett D'Amore #else
1604297a3b0SGarrett D'Amore #define	never	0		/* some <assert.h>s have bugs too */
1614297a3b0SGarrett D'Amore #endif
1624297a3b0SGarrett D'Amore 
1634297a3b0SGarrett D'Amore /*
1644297a3b0SGarrett D'Amore  * regcomp - interface for parser and compilation
1654297a3b0SGarrett D'Amore  */
1664297a3b0SGarrett D'Amore int				/* 0 success, otherwise REG_something */
regcomp(regex_t * _RESTRICT_KYWD preg,const char * _RESTRICT_KYWD pattern,int cflags)1674297a3b0SGarrett D'Amore regcomp(regex_t *_RESTRICT_KYWD preg,
1684297a3b0SGarrett D'Amore 	const char *_RESTRICT_KYWD pattern,
1694297a3b0SGarrett D'Amore 	int cflags)
1704297a3b0SGarrett D'Amore {
1714297a3b0SGarrett D'Amore 	struct parse pa;
1724297a3b0SGarrett D'Amore 	struct re_guts *g;
1734297a3b0SGarrett D'Amore 	struct parse *p = &pa;
1744297a3b0SGarrett D'Amore 	int i;
1754297a3b0SGarrett D'Amore 	size_t len;
1764297a3b0SGarrett D'Amore #ifdef REDEBUG
1774297a3b0SGarrett D'Amore #define	GOODFLAGS(f)	(f)
1784297a3b0SGarrett D'Amore #else
1794297a3b0SGarrett D'Amore #define	GOODFLAGS(f)	((f)&~REG_DUMP)
1804297a3b0SGarrett D'Amore #endif
1814297a3b0SGarrett D'Amore 
1824297a3b0SGarrett D'Amore 	/* We had REG_INVARG, but we don't have that on Solaris. */
1834297a3b0SGarrett D'Amore 	cflags = GOODFLAGS(cflags);
1844297a3b0SGarrett D'Amore 	if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
1854297a3b0SGarrett D'Amore 		return (REG_EFATAL);
1864297a3b0SGarrett D'Amore 
1874297a3b0SGarrett D'Amore 	if (cflags&REG_PEND) {
1884297a3b0SGarrett D'Amore 		if (preg->re_endp < pattern)
1894297a3b0SGarrett D'Amore 			return (REG_EFATAL);
1904297a3b0SGarrett D'Amore 		len = preg->re_endp - pattern;
1914297a3b0SGarrett D'Amore 	} else
1924297a3b0SGarrett D'Amore 		len = strlen((char *)pattern);
1934297a3b0SGarrett D'Amore 
1944297a3b0SGarrett D'Amore 	/* do the mallocs early so failure handling is easy */
1954297a3b0SGarrett D'Amore 	g = (struct re_guts *)malloc(sizeof (struct re_guts));
1964297a3b0SGarrett D'Amore 	if (g == NULL)
1974297a3b0SGarrett D'Amore 		return (REG_ESPACE);
1984297a3b0SGarrett D'Amore 	p->ssize = len/(size_t)2*(size_t)3 + (size_t)1;	/* ugh */
1994297a3b0SGarrett D'Amore 	p->strip = (sop *)malloc(p->ssize * sizeof (sop));
2004297a3b0SGarrett D'Amore 	p->slen = 0;
2014297a3b0SGarrett D'Amore 	if (p->strip == NULL) {
2024297a3b0SGarrett D'Amore 		free((char *)g);
2034297a3b0SGarrett D'Amore 		return (REG_ESPACE);
2044297a3b0SGarrett D'Amore 	}
2054297a3b0SGarrett D'Amore 
2064297a3b0SGarrett D'Amore 	/* set things up */
2074297a3b0SGarrett D'Amore 	p->g = g;
2084297a3b0SGarrett D'Amore 	p->next = (char *)pattern;	/* convenience; we do not modify it */
2094297a3b0SGarrett D'Amore 	p->end = p->next + len;
2104297a3b0SGarrett D'Amore 	p->error = 0;
2114297a3b0SGarrett D'Amore 	p->ncsalloc = 0;
2124297a3b0SGarrett D'Amore 	for (i = 0; i < NPAREN; i++) {
2134297a3b0SGarrett D'Amore 		p->pbegin[i] = 0;
2144297a3b0SGarrett D'Amore 		p->pend[i] = 0;
2154297a3b0SGarrett D'Amore 	}
2164297a3b0SGarrett D'Amore 	g->sets = NULL;
2174297a3b0SGarrett D'Amore 	g->ncsets = 0;
2184297a3b0SGarrett D'Amore 	g->cflags = cflags;
2194297a3b0SGarrett D'Amore 	g->iflags = 0;
2204297a3b0SGarrett D'Amore 	g->nbol = 0;
2214297a3b0SGarrett D'Amore 	g->neol = 0;
2224297a3b0SGarrett D'Amore 	g->must = NULL;
2234297a3b0SGarrett D'Amore 	g->moffset = -1;
2244297a3b0SGarrett D'Amore 	g->charjump = NULL;
2254297a3b0SGarrett D'Amore 	g->matchjump = NULL;
2264297a3b0SGarrett D'Amore 	g->mlen = 0;
2274297a3b0SGarrett D'Amore 	g->nsub = 0;
2284297a3b0SGarrett D'Amore 	g->backrefs = 0;
2294297a3b0SGarrett D'Amore 
2304297a3b0SGarrett D'Amore 	/* do it */
2314297a3b0SGarrett D'Amore 	EMIT(OEND, 0);
2324297a3b0SGarrett D'Amore 	g->firststate = THERE();
2334297a3b0SGarrett D'Amore 	if (cflags&REG_EXTENDED)
2344297a3b0SGarrett D'Amore 		p_ere(p, OUT);
2354297a3b0SGarrett D'Amore 	else if (cflags&REG_NOSPEC)
2364297a3b0SGarrett D'Amore 		p_str(p);
2374297a3b0SGarrett D'Amore 	else
2384297a3b0SGarrett D'Amore 		p_bre(p, OUT, OUT);
2394297a3b0SGarrett D'Amore 	EMIT(OEND, 0);
2404297a3b0SGarrett D'Amore 	g->laststate = THERE();
2414297a3b0SGarrett D'Amore 
2424297a3b0SGarrett D'Amore 	/* tidy up loose ends and fill things in */
2434297a3b0SGarrett D'Amore 	stripsnug(p, g);
2444297a3b0SGarrett D'Amore 	findmust(p, g);
2454297a3b0SGarrett D'Amore 	/*
2464297a3b0SGarrett D'Amore 	 * only use Boyer-Moore algorithm if the pattern is bigger
2474297a3b0SGarrett D'Amore 	 * than three characters
2484297a3b0SGarrett D'Amore 	 */
2494297a3b0SGarrett D'Amore 	if (g->mlen > 3) {
2504297a3b0SGarrett D'Amore 		computejumps(p, g);
2514297a3b0SGarrett D'Amore 		computematchjumps(p, g);
2524297a3b0SGarrett D'Amore 		if (g->matchjump == NULL && g->charjump != NULL) {
2534297a3b0SGarrett D'Amore 			free(g->charjump);
2544297a3b0SGarrett D'Amore 			g->charjump = NULL;
2554297a3b0SGarrett D'Amore 		}
2564297a3b0SGarrett D'Amore 	}
2574297a3b0SGarrett D'Amore 	g->nplus = pluscount(p, g);
2584297a3b0SGarrett D'Amore 	g->magic = MAGIC2;
2594297a3b0SGarrett D'Amore 	preg->re_nsub = g->nsub;
2604297a3b0SGarrett D'Amore 	preg->re_g = g;
2614297a3b0SGarrett D'Amore 	preg->re_magic = MAGIC1;
2624297a3b0SGarrett D'Amore #ifndef REDEBUG
2634297a3b0SGarrett D'Amore 	/* not debugging, so can't rely on the assert() in regexec() */
2644297a3b0SGarrett D'Amore 	if (g->iflags&BAD)
2654297a3b0SGarrett D'Amore 		SETERROR(REG_EFATAL);
2664297a3b0SGarrett D'Amore #endif
2674297a3b0SGarrett D'Amore 
2684297a3b0SGarrett D'Amore 	/* win or lose, we're done */
2694297a3b0SGarrett D'Amore 	if (p->error != 0)	/* lose */
2704297a3b0SGarrett D'Amore 		regfree(preg);
2714297a3b0SGarrett D'Amore 	return (p->error);
2724297a3b0SGarrett D'Amore }
2734297a3b0SGarrett D'Amore 
2744297a3b0SGarrett D'Amore /*
2754297a3b0SGarrett D'Amore  * p_ere - ERE parser top level, concatenation and alternation
2764297a3b0SGarrett D'Amore  */
2774297a3b0SGarrett D'Amore static void
p_ere(struct parse * p,wint_t stop)2784297a3b0SGarrett D'Amore p_ere(struct parse *p,
2794297a3b0SGarrett D'Amore     wint_t stop)		/* character this ERE should end at */
2804297a3b0SGarrett D'Amore {
2814297a3b0SGarrett D'Amore 	char c;
2824297a3b0SGarrett D'Amore 	sopno prevback;
2834297a3b0SGarrett D'Amore 	sopno prevfwd;
2844297a3b0SGarrett D'Amore 	sopno conc;
2854297a3b0SGarrett D'Amore 	int first = 1;		/* is this the first alternative? */
2864297a3b0SGarrett D'Amore 
2874297a3b0SGarrett D'Amore 	for (;;) {
2884297a3b0SGarrett D'Amore 		/* do a bunch of concatenated expressions */
2894297a3b0SGarrett D'Amore 		conc = HERE();
2904297a3b0SGarrett D'Amore 		while (MORE() && (c = PEEK()) != '|' && c != stop)
2914297a3b0SGarrett D'Amore 			p_ere_exp(p);
2924297a3b0SGarrett D'Amore 		/* require nonempty */
2934297a3b0SGarrett D'Amore 		(void) REQUIRE(HERE() != conc, REG_BADPAT);
2944297a3b0SGarrett D'Amore 
2954297a3b0SGarrett D'Amore 		if (!EAT('|'))
2964297a3b0SGarrett D'Amore 			break;		/* NOTE BREAK OUT */
2974297a3b0SGarrett D'Amore 
2984297a3b0SGarrett D'Amore 		if (first) {
2994297a3b0SGarrett D'Amore 			INSERT(OCH_, conc);	/* offset is wrong */
3004297a3b0SGarrett D'Amore 			prevfwd = conc;
3014297a3b0SGarrett D'Amore 			prevback = conc;
3024297a3b0SGarrett D'Amore 			first = 0;
3034297a3b0SGarrett D'Amore 		}
3044297a3b0SGarrett D'Amore 		ASTERN(OOR1, prevback);
3054297a3b0SGarrett D'Amore 		prevback = THERE();
3064297a3b0SGarrett D'Amore 		AHEAD(prevfwd);			/* fix previous offset */
3074297a3b0SGarrett D'Amore 		prevfwd = HERE();
3084297a3b0SGarrett D'Amore 		EMIT(OOR2, 0);			/* offset is very wrong */
3094297a3b0SGarrett D'Amore 	}
3104297a3b0SGarrett D'Amore 
3114297a3b0SGarrett D'Amore 	if (!first) {		/* tail-end fixups */
3124297a3b0SGarrett D'Amore 		AHEAD(prevfwd);
3134297a3b0SGarrett D'Amore 		ASTERN(O_CH, prevback);
3144297a3b0SGarrett D'Amore 	}
3154297a3b0SGarrett D'Amore 
3164297a3b0SGarrett D'Amore 	assert(!MORE() || SEE(stop));
3174297a3b0SGarrett D'Amore }
3184297a3b0SGarrett D'Amore 
3194297a3b0SGarrett D'Amore /*
3204297a3b0SGarrett D'Amore  * p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
3214297a3b0SGarrett D'Amore  */
3224297a3b0SGarrett D'Amore static void
p_ere_exp(struct parse * p)3234297a3b0SGarrett D'Amore p_ere_exp(struct parse *p)
3244297a3b0SGarrett D'Amore {
3254297a3b0SGarrett D'Amore 	char c;
3264297a3b0SGarrett D'Amore 	wint_t wc;
3274297a3b0SGarrett D'Amore 	sopno pos;
3284297a3b0SGarrett D'Amore 	int count;
3294297a3b0SGarrett D'Amore 	int count2;
3304297a3b0SGarrett D'Amore 	sopno subno;
3314297a3b0SGarrett D'Amore 	int wascaret = 0;
3324297a3b0SGarrett D'Amore 
3334297a3b0SGarrett D'Amore 	assert(MORE());		/* caller should have ensured this */
3344297a3b0SGarrett D'Amore 	c = GETNEXT();
3354297a3b0SGarrett D'Amore 
3364297a3b0SGarrett D'Amore 	pos = HERE();
3374297a3b0SGarrett D'Amore 	switch (c) {
3384297a3b0SGarrett D'Amore 	case '(':
3394297a3b0SGarrett D'Amore 		(void) REQUIRE(MORE(), REG_EPAREN);
3404297a3b0SGarrett D'Amore 		p->g->nsub++;
3414297a3b0SGarrett D'Amore 		subno = p->g->nsub;
3424297a3b0SGarrett D'Amore 		if (subno < NPAREN)
3434297a3b0SGarrett D'Amore 			p->pbegin[subno] = HERE();
3444297a3b0SGarrett D'Amore 		EMIT(OLPAREN, subno);
3454297a3b0SGarrett D'Amore 		if (!SEE(')'))
3464297a3b0SGarrett D'Amore 			p_ere(p, ')');
3474297a3b0SGarrett D'Amore 		if (subno < NPAREN) {
3484297a3b0SGarrett D'Amore 			p->pend[subno] = HERE();
3494297a3b0SGarrett D'Amore 			assert(p->pend[subno] != 0);
3504297a3b0SGarrett D'Amore 		}
3514297a3b0SGarrett D'Amore 		EMIT(ORPAREN, subno);
3524297a3b0SGarrett D'Amore 		(void) MUSTEAT(')', REG_EPAREN);
3534297a3b0SGarrett D'Amore 		break;
3544297a3b0SGarrett D'Amore #ifndef POSIX_MISTAKE
3554297a3b0SGarrett D'Amore 	case ')':		/* happens only if no current unmatched ( */
3564297a3b0SGarrett D'Amore 		/*
3574297a3b0SGarrett D'Amore 		 * You may ask, why the ifndef?  Because I didn't notice
3584297a3b0SGarrett D'Amore 		 * this until slightly too late for 1003.2, and none of the
3594297a3b0SGarrett D'Amore 		 * other 1003.2 regular-expression reviewers noticed it at
3604297a3b0SGarrett D'Amore 		 * all.  So an unmatched ) is legal POSIX, at least until
3614297a3b0SGarrett D'Amore 		 * we can get it fixed.
3624297a3b0SGarrett D'Amore 		 */
3634297a3b0SGarrett D'Amore 		SETERROR(REG_EPAREN);
3644297a3b0SGarrett D'Amore 		break;
3654297a3b0SGarrett D'Amore #endif
3664297a3b0SGarrett D'Amore 	case '^':
3674297a3b0SGarrett D'Amore 		EMIT(OBOL, 0);
3684297a3b0SGarrett D'Amore 		p->g->iflags |= USEBOL;
3694297a3b0SGarrett D'Amore 		p->g->nbol++;
3704297a3b0SGarrett D'Amore 		wascaret = 1;
3714297a3b0SGarrett D'Amore 		break;
3724297a3b0SGarrett D'Amore 	case '$':
3734297a3b0SGarrett D'Amore 		EMIT(OEOL, 0);
3744297a3b0SGarrett D'Amore 		p->g->iflags |= USEEOL;
3754297a3b0SGarrett D'Amore 		p->g->neol++;
3764297a3b0SGarrett D'Amore 		break;
3774297a3b0SGarrett D'Amore 	case '|':
3784297a3b0SGarrett D'Amore 		SETERROR(REG_BADPAT);
3794297a3b0SGarrett D'Amore 		break;
3804297a3b0SGarrett D'Amore 	case '*':
3814297a3b0SGarrett D'Amore 	case '+':
3824297a3b0SGarrett D'Amore 	case '?':
3834297a3b0SGarrett D'Amore 		SETERROR(REG_BADRPT);
3844297a3b0SGarrett D'Amore 		break;
3854297a3b0SGarrett D'Amore 	case '.':
3864297a3b0SGarrett D'Amore 		if (p->g->cflags&REG_NEWLINE)
3874297a3b0SGarrett D'Amore 			nonnewline(p);
3884297a3b0SGarrett D'Amore 		else
3894297a3b0SGarrett D'Amore 			EMIT(OANY, 0);
3904297a3b0SGarrett D'Amore 		break;
3914297a3b0SGarrett D'Amore 	case '[':
3924297a3b0SGarrett D'Amore 		p_bracket(p);
3934297a3b0SGarrett D'Amore 		break;
3944297a3b0SGarrett D'Amore 	case '\\':
3954297a3b0SGarrett D'Amore 		(void) REQUIRE(MORE(), REG_EESCAPE);
3964297a3b0SGarrett D'Amore 		wc = WGETNEXT();
39784441f85SGarrett D'Amore 		switch (wc) {
39884441f85SGarrett D'Amore 		case '<':
39984441f85SGarrett D'Amore 			EMIT(OBOW, 0);
40084441f85SGarrett D'Amore 			break;
40184441f85SGarrett D'Amore 		case '>':
40284441f85SGarrett D'Amore 			EMIT(OEOW, 0);
40384441f85SGarrett D'Amore 			break;
40484441f85SGarrett D'Amore 		default:
4054297a3b0SGarrett D'Amore 			ordinary(p, wc);
4064297a3b0SGarrett D'Amore 			break;
40784441f85SGarrett D'Amore 		}
40884441f85SGarrett D'Amore 		break;
4094297a3b0SGarrett D'Amore 	case '{':		/* okay as ordinary except if digit follows */
4104297a3b0SGarrett D'Amore 		(void) REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT);
4114297a3b0SGarrett D'Amore 		/* FALLTHROUGH */
4124297a3b0SGarrett D'Amore 	default:
4134297a3b0SGarrett D'Amore 		p->next--;
4144297a3b0SGarrett D'Amore 		wc = WGETNEXT();
4154297a3b0SGarrett D'Amore 		ordinary(p, wc);
4164297a3b0SGarrett D'Amore 		break;
4174297a3b0SGarrett D'Amore 	}
4184297a3b0SGarrett D'Amore 
4194297a3b0SGarrett D'Amore 	if (!MORE())
4204297a3b0SGarrett D'Amore 		return;
4214297a3b0SGarrett D'Amore 	c = PEEK();
4224297a3b0SGarrett D'Amore 	/* we call { a repetition if followed by a digit */
4234297a3b0SGarrett D'Amore 	if (!(c == '*' || c == '+' || c == '?' ||
4244297a3b0SGarrett D'Amore 	    (c == '{' && MORE2() && isdigit((uch)PEEK2()))))
4254297a3b0SGarrett D'Amore 		return;		/* no repetition, we're done */
4264297a3b0SGarrett D'Amore 	NEXT();
4274297a3b0SGarrett D'Amore 
4284297a3b0SGarrett D'Amore 	(void) REQUIRE(!wascaret, REG_BADRPT);
4294297a3b0SGarrett D'Amore 	switch (c) {
4304297a3b0SGarrett D'Amore 	case '*':	/* implemented as +? */
4314297a3b0SGarrett D'Amore 		/* this case does not require the (y|) trick, noKLUDGE */
4324297a3b0SGarrett D'Amore 		INSERT(OPLUS_, pos);
4334297a3b0SGarrett D'Amore 		ASTERN(O_PLUS, pos);
4344297a3b0SGarrett D'Amore 		INSERT(OQUEST_, pos);
4354297a3b0SGarrett D'Amore 		ASTERN(O_QUEST, pos);
4364297a3b0SGarrett D'Amore 		break;
4374297a3b0SGarrett D'Amore 	case '+':
4384297a3b0SGarrett D'Amore 		INSERT(OPLUS_, pos);
4394297a3b0SGarrett D'Amore 		ASTERN(O_PLUS, pos);
4404297a3b0SGarrett D'Amore 		break;
4414297a3b0SGarrett D'Amore 	case '?':
4424297a3b0SGarrett D'Amore 		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
4434297a3b0SGarrett D'Amore 		INSERT(OCH_, pos);		/* offset slightly wrong */
4444297a3b0SGarrett D'Amore 		ASTERN(OOR1, pos);		/* this one's right */
4454297a3b0SGarrett D'Amore 		AHEAD(pos);			/* fix the OCH_ */
4464297a3b0SGarrett D'Amore 		EMIT(OOR2, 0);			/* offset very wrong... */
4474297a3b0SGarrett D'Amore 		AHEAD(THERE());			/* ...so fix it */
4484297a3b0SGarrett D'Amore 		ASTERN(O_CH, THERETHERE());
4494297a3b0SGarrett D'Amore 		break;
4504297a3b0SGarrett D'Amore 	case '{':
4514297a3b0SGarrett D'Amore 		count = p_count(p);
4524297a3b0SGarrett D'Amore 		if (EAT(',')) {
4534297a3b0SGarrett D'Amore 			if (isdigit((uch)PEEK())) {
4544297a3b0SGarrett D'Amore 				count2 = p_count(p);
4554297a3b0SGarrett D'Amore 				(void) REQUIRE(count <= count2, REG_BADBR);
4564297a3b0SGarrett D'Amore 			} else		/* single number with comma */
4574297a3b0SGarrett D'Amore 				count2 = INFINITY;
4584297a3b0SGarrett D'Amore 		} else		/* just a single number */
4594297a3b0SGarrett D'Amore 			count2 = count;
4604297a3b0SGarrett D'Amore 		repeat(p, pos, count, count2);
4614297a3b0SGarrett D'Amore 		if (!EAT('}')) {	/* error heuristics */
4624297a3b0SGarrett D'Amore 			while (MORE() && PEEK() != '}')
4634297a3b0SGarrett D'Amore 				NEXT();
4644297a3b0SGarrett D'Amore 			(void) REQUIRE(MORE(), REG_EBRACE);
4654297a3b0SGarrett D'Amore 			SETERROR(REG_BADBR);
4664297a3b0SGarrett D'Amore 		}
4674297a3b0SGarrett D'Amore 		break;
4684297a3b0SGarrett D'Amore 	}
4694297a3b0SGarrett D'Amore 
4704297a3b0SGarrett D'Amore 	if (!MORE())
4714297a3b0SGarrett D'Amore 		return;
4724297a3b0SGarrett D'Amore 	c = PEEK();
4734297a3b0SGarrett D'Amore 	if (!(c == '*' || c == '+' || c == '?' ||
4744297a3b0SGarrett D'Amore 	    (c == '{' && MORE2() && isdigit((uch)PEEK2()))))
4754297a3b0SGarrett D'Amore 		return;
4764297a3b0SGarrett D'Amore 	SETERROR(REG_BADRPT);
4774297a3b0SGarrett D'Amore }
4784297a3b0SGarrett D'Amore 
4794297a3b0SGarrett D'Amore /*
4804297a3b0SGarrett D'Amore  * p_str - string (no metacharacters) "parser"
4814297a3b0SGarrett D'Amore  */
4824297a3b0SGarrett D'Amore static void
p_str(struct parse * p)4834297a3b0SGarrett D'Amore p_str(struct parse *p)
4844297a3b0SGarrett D'Amore {
4854297a3b0SGarrett D'Amore 	(void) REQUIRE(MORE(), REG_BADPAT);
4864297a3b0SGarrett D'Amore 	while (MORE())
4874297a3b0SGarrett D'Amore 		ordinary(p, WGETNEXT());
4884297a3b0SGarrett D'Amore }
4894297a3b0SGarrett D'Amore 
4904297a3b0SGarrett D'Amore /*
4914297a3b0SGarrett D'Amore  * p_bre - BRE parser top level, anchoring and concatenation
4924297a3b0SGarrett D'Amore  * Giving end1 as OUT essentially eliminates the end1/end2 check.
4934297a3b0SGarrett D'Amore  *
4944297a3b0SGarrett D'Amore  * This implementation is a bit of a kludge, in that a trailing $ is first
4954297a3b0SGarrett D'Amore  * taken as an ordinary character and then revised to be an anchor.
4964297a3b0SGarrett D'Amore  * The amount of lookahead needed to avoid this kludge is excessive.
4974297a3b0SGarrett D'Amore  */
4984297a3b0SGarrett D'Amore static void
p_bre(struct parse * p,wint_t end1,wint_t end2)4994297a3b0SGarrett D'Amore p_bre(struct parse *p,
5004297a3b0SGarrett D'Amore     wint_t end1,		/* first terminating character */
5014297a3b0SGarrett D'Amore     wint_t end2)		/* second terminating character */
5024297a3b0SGarrett D'Amore {
5034297a3b0SGarrett D'Amore 	sopno start = HERE();
5044297a3b0SGarrett D'Amore 	int first = 1;			/* first subexpression? */
5054297a3b0SGarrett D'Amore 	int wasdollar = 0;
5064297a3b0SGarrett D'Amore 
5074297a3b0SGarrett D'Amore 	if (EAT('^')) {
5084297a3b0SGarrett D'Amore 		EMIT(OBOL, 0);
5094297a3b0SGarrett D'Amore 		p->g->iflags |= USEBOL;
5104297a3b0SGarrett D'Amore 		p->g->nbol++;
5114297a3b0SGarrett D'Amore 	}
5124297a3b0SGarrett D'Amore 	while (MORE() && !SEETWO(end1, end2)) {
5134297a3b0SGarrett D'Amore 		wasdollar = p_simp_re(p, first);
5144297a3b0SGarrett D'Amore 		first = 0;
5154297a3b0SGarrett D'Amore 	}
5164297a3b0SGarrett D'Amore 	if (wasdollar) {	/* oops, that was a trailing anchor */
5174297a3b0SGarrett D'Amore 		DROP(1);
5184297a3b0SGarrett D'Amore 		EMIT(OEOL, 0);
5194297a3b0SGarrett D'Amore 		p->g->iflags |= USEEOL;
5204297a3b0SGarrett D'Amore 		p->g->neol++;
5214297a3b0SGarrett D'Amore 	}
5224297a3b0SGarrett D'Amore 
5234297a3b0SGarrett D'Amore 	(void) REQUIRE(HERE() != start, REG_BADPAT);	/* require nonempty */
5244297a3b0SGarrett D'Amore }
5254297a3b0SGarrett D'Amore 
5264297a3b0SGarrett D'Amore /*
5274297a3b0SGarrett D'Amore  * p_simp_re - parse a simple RE, an atom possibly followed by a repetition
5284297a3b0SGarrett D'Amore  */
5294297a3b0SGarrett D'Amore static int			/* was the simple RE an unbackslashed $? */
p_simp_re(struct parse * p,int starordinary)5304297a3b0SGarrett D'Amore p_simp_re(struct parse *p,
5314297a3b0SGarrett D'Amore 	int starordinary)	/* is a leading * an ordinary character? */
5324297a3b0SGarrett D'Amore {
5334297a3b0SGarrett D'Amore 	int c;
5344297a3b0SGarrett D'Amore 	int count;
5354297a3b0SGarrett D'Amore 	int count2;
5364297a3b0SGarrett D'Amore 	sopno pos;
5374297a3b0SGarrett D'Amore 	int i;
5384297a3b0SGarrett D'Amore 	wint_t wc;
5394297a3b0SGarrett D'Amore 	sopno subno;
5404297a3b0SGarrett D'Amore #define	BACKSL	(1<<CHAR_BIT)
5414297a3b0SGarrett D'Amore 
5424297a3b0SGarrett D'Amore 	pos = HERE();		/* repetion op, if any, covers from here */
5434297a3b0SGarrett D'Amore 
5444297a3b0SGarrett D'Amore 	assert(MORE());		/* caller should have ensured this */
5454297a3b0SGarrett D'Amore 	c = GETNEXT();
5464297a3b0SGarrett D'Amore 	if (c == '\\') {
5474297a3b0SGarrett D'Amore 		(void) REQUIRE(MORE(), REG_EESCAPE);
5484297a3b0SGarrett D'Amore 		c = BACKSL | GETNEXT();
5494297a3b0SGarrett D'Amore 	}
5504297a3b0SGarrett D'Amore 	switch (c) {
5514297a3b0SGarrett D'Amore 	case '.':
5524297a3b0SGarrett D'Amore 		if (p->g->cflags&REG_NEWLINE)
5534297a3b0SGarrett D'Amore 			nonnewline(p);
5544297a3b0SGarrett D'Amore 		else
5554297a3b0SGarrett D'Amore 			EMIT(OANY, 0);
5564297a3b0SGarrett D'Amore 		break;
5574297a3b0SGarrett D'Amore 	case '[':
5584297a3b0SGarrett D'Amore 		p_bracket(p);
5594297a3b0SGarrett D'Amore 		break;
56084441f85SGarrett D'Amore 	case BACKSL|'<':
56184441f85SGarrett D'Amore 		EMIT(OBOW, 0);
56284441f85SGarrett D'Amore 		break;
56384441f85SGarrett D'Amore 	case BACKSL|'>':
56484441f85SGarrett D'Amore 		EMIT(OEOW, 0);
56584441f85SGarrett D'Amore 		break;
5664297a3b0SGarrett D'Amore 	case BACKSL|'{':
5674297a3b0SGarrett D'Amore 		SETERROR(REG_BADRPT);
5684297a3b0SGarrett D'Amore 		break;
5694297a3b0SGarrett D'Amore 	case BACKSL|'(':
5704297a3b0SGarrett D'Amore 		p->g->nsub++;
5714297a3b0SGarrett D'Amore 		subno = p->g->nsub;
5724297a3b0SGarrett D'Amore 		if (subno < NPAREN)
5734297a3b0SGarrett D'Amore 			p->pbegin[subno] = HERE();
5744297a3b0SGarrett D'Amore 		EMIT(OLPAREN, subno);
5754297a3b0SGarrett D'Amore 		/* the MORE here is an error heuristic */
5764297a3b0SGarrett D'Amore 		if (MORE() && !SEETWO('\\', ')'))
5774297a3b0SGarrett D'Amore 			p_bre(p, '\\', ')');
5784297a3b0SGarrett D'Amore 		if (subno < NPAREN) {
5794297a3b0SGarrett D'Amore 			p->pend[subno] = HERE();
5804297a3b0SGarrett D'Amore 			assert(p->pend[subno] != 0);
5814297a3b0SGarrett D'Amore 		}
5824297a3b0SGarrett D'Amore 		EMIT(ORPAREN, subno);
5834297a3b0SGarrett D'Amore 		(void) REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
5844297a3b0SGarrett D'Amore 		break;
5854297a3b0SGarrett D'Amore 	case BACKSL|')':	/* should not get here -- must be user */
5864297a3b0SGarrett D'Amore 	case BACKSL|'}':
5874297a3b0SGarrett D'Amore 		SETERROR(REG_EPAREN);
5884297a3b0SGarrett D'Amore 		break;
5894297a3b0SGarrett D'Amore 	case BACKSL|'1':
5904297a3b0SGarrett D'Amore 	case BACKSL|'2':
5914297a3b0SGarrett D'Amore 	case BACKSL|'3':
5924297a3b0SGarrett D'Amore 	case BACKSL|'4':
5934297a3b0SGarrett D'Amore 	case BACKSL|'5':
5944297a3b0SGarrett D'Amore 	case BACKSL|'6':
5954297a3b0SGarrett D'Amore 	case BACKSL|'7':
5964297a3b0SGarrett D'Amore 	case BACKSL|'8':
5974297a3b0SGarrett D'Amore 	case BACKSL|'9':
5984297a3b0SGarrett D'Amore 		i = (c&~BACKSL) - '0';
5994297a3b0SGarrett D'Amore 		assert(i < NPAREN);
6004297a3b0SGarrett D'Amore 		if (p->pend[i] != 0) {
6014297a3b0SGarrett D'Amore 			assert(i <= p->g->nsub);
6024297a3b0SGarrett D'Amore 			EMIT(OBACK_, i);
6034297a3b0SGarrett D'Amore 			assert(p->pbegin[i] != 0);
6044297a3b0SGarrett D'Amore 			assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
6054297a3b0SGarrett D'Amore 			assert(OP(p->strip[p->pend[i]]) == ORPAREN);
6064297a3b0SGarrett D'Amore 			(void) dupl(p, p->pbegin[i]+1, p->pend[i]);
6074297a3b0SGarrett D'Amore 			EMIT(O_BACK, i);
6084297a3b0SGarrett D'Amore 		} else
6094297a3b0SGarrett D'Amore 			SETERROR(REG_ESUBREG);
6104297a3b0SGarrett D'Amore 		p->g->backrefs = 1;
6114297a3b0SGarrett D'Amore 		break;
6124297a3b0SGarrett D'Amore 	case '*':
6134297a3b0SGarrett D'Amore 		(void) REQUIRE(starordinary, REG_BADRPT);
6144297a3b0SGarrett D'Amore 		/* FALLTHROUGH */
6154297a3b0SGarrett D'Amore 	default:
6164297a3b0SGarrett D'Amore 		p->next--;
6174297a3b0SGarrett D'Amore 		wc = WGETNEXT();
6184297a3b0SGarrett D'Amore 		ordinary(p, wc);
6194297a3b0SGarrett D'Amore 		break;
6204297a3b0SGarrett D'Amore 	}
6214297a3b0SGarrett D'Amore 
6224297a3b0SGarrett D'Amore 	if (EAT('*')) {		/* implemented as +? */
6234297a3b0SGarrett D'Amore 		/* this case does not require the (y|) trick, noKLUDGE */
6244297a3b0SGarrett D'Amore 		INSERT(OPLUS_, pos);
6254297a3b0SGarrett D'Amore 		ASTERN(O_PLUS, pos);
6264297a3b0SGarrett D'Amore 		INSERT(OQUEST_, pos);
6274297a3b0SGarrett D'Amore 		ASTERN(O_QUEST, pos);
6284297a3b0SGarrett D'Amore 	} else if (EATTWO('\\', '{')) {
6294297a3b0SGarrett D'Amore 		count = p_count(p);
6304297a3b0SGarrett D'Amore 		if (EAT(',')) {
6314297a3b0SGarrett D'Amore 			if (MORE() && isdigit((uch)PEEK())) {
6324297a3b0SGarrett D'Amore 				count2 = p_count(p);
6334297a3b0SGarrett D'Amore 				(void) REQUIRE(count <= count2, REG_BADBR);
6344297a3b0SGarrett D'Amore 			} else		/* single number with comma */
6354297a3b0SGarrett D'Amore 				count2 = INFINITY;
6364297a3b0SGarrett D'Amore 		} else		/* just a single number */
6374297a3b0SGarrett D'Amore 			count2 = count;
6384297a3b0SGarrett D'Amore 		repeat(p, pos, count, count2);
6394297a3b0SGarrett D'Amore 		if (!EATTWO('\\', '}')) {	/* error heuristics */
6404297a3b0SGarrett D'Amore 			while (MORE() && !SEETWO('\\', '}'))
6414297a3b0SGarrett D'Amore 				NEXT();
6424297a3b0SGarrett D'Amore 			(void) REQUIRE(MORE(), REG_EBRACE);
6434297a3b0SGarrett D'Amore 			SETERROR(REG_BADBR);
6444297a3b0SGarrett D'Amore 		}
6454297a3b0SGarrett D'Amore 	} else if (c == '$')	/* $ (but not \$) ends it */
6464297a3b0SGarrett D'Amore 		return (1);
6474297a3b0SGarrett D'Amore 
6484297a3b0SGarrett D'Amore 	return (0);
6494297a3b0SGarrett D'Amore }
6504297a3b0SGarrett D'Amore 
6514297a3b0SGarrett D'Amore /*
6524297a3b0SGarrett D'Amore  * p_count - parse a repetition count
6534297a3b0SGarrett D'Amore  */
6544297a3b0SGarrett D'Amore static int			/* the value */
p_count(struct parse * p)6554297a3b0SGarrett D'Amore p_count(struct parse *p)
6564297a3b0SGarrett D'Amore {
6574297a3b0SGarrett D'Amore 	int count = 0;
6584297a3b0SGarrett D'Amore 	int ndigits = 0;
6594297a3b0SGarrett D'Amore 
6604297a3b0SGarrett D'Amore 	while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) {
6614297a3b0SGarrett D'Amore 		count = count*10 + (GETNEXT() - '0');
6624297a3b0SGarrett D'Amore 		ndigits++;
6634297a3b0SGarrett D'Amore 	}
6644297a3b0SGarrett D'Amore 
6654297a3b0SGarrett D'Amore 	(void) REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
6664297a3b0SGarrett D'Amore 	return (count);
6674297a3b0SGarrett D'Amore }
6684297a3b0SGarrett D'Amore 
6694297a3b0SGarrett D'Amore /*
6704297a3b0SGarrett D'Amore  * p_bracket - parse a bracketed character list
6714297a3b0SGarrett D'Amore  */
6724297a3b0SGarrett D'Amore static void
p_bracket(struct parse * p)6734297a3b0SGarrett D'Amore p_bracket(struct parse *p)
6744297a3b0SGarrett D'Amore {
6754297a3b0SGarrett D'Amore 	cset *cs;
6764297a3b0SGarrett D'Amore 	wint_t ch;
6774297a3b0SGarrett D'Amore 
6784297a3b0SGarrett D'Amore 	/* Dept of Truly Sickening Special-Case Kludges */
6794297a3b0SGarrett D'Amore 	if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
6804297a3b0SGarrett D'Amore 		EMIT(OBOW, 0);
6814297a3b0SGarrett D'Amore 		NEXTn(6);
6824297a3b0SGarrett D'Amore 		return;
6834297a3b0SGarrett D'Amore 	}
6844297a3b0SGarrett D'Amore 	if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
6854297a3b0SGarrett D'Amore 		EMIT(OEOW, 0);
6864297a3b0SGarrett D'Amore 		NEXTn(6);
6874297a3b0SGarrett D'Amore 		return;
6884297a3b0SGarrett D'Amore 	}
6894297a3b0SGarrett D'Amore 
6904297a3b0SGarrett D'Amore 	if ((cs = allocset(p)) == NULL)
6914297a3b0SGarrett D'Amore 		return;
6924297a3b0SGarrett D'Amore 
6934297a3b0SGarrett D'Amore 	if (p->g->cflags&REG_ICASE)
6944297a3b0SGarrett D'Amore 		cs->icase = 1;
6954297a3b0SGarrett D'Amore 	if (EAT('^'))
6964297a3b0SGarrett D'Amore 		cs->invert = 1;
6974297a3b0SGarrett D'Amore 	if (EAT(']'))
6984297a3b0SGarrett D'Amore 		CHadd(p, cs, ']');
6994297a3b0SGarrett D'Amore 	else if (EAT('-'))
7004297a3b0SGarrett D'Amore 		CHadd(p, cs, '-');
7014297a3b0SGarrett D'Amore 	while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
7024297a3b0SGarrett D'Amore 		p_b_term(p, cs);
7034297a3b0SGarrett D'Amore 	if (EAT('-'))
7044297a3b0SGarrett D'Amore 		CHadd(p, cs, '-');
7054297a3b0SGarrett D'Amore 	(void) MUSTEAT(']', REG_EBRACK);
7064297a3b0SGarrett D'Amore 
7074297a3b0SGarrett D'Amore 	if (p->error != 0)	/* don't mess things up further */
7084297a3b0SGarrett D'Amore 		return;
7094297a3b0SGarrett D'Amore 
7104297a3b0SGarrett D'Amore 	if (cs->invert && p->g->cflags&REG_NEWLINE)
7114297a3b0SGarrett D'Amore 		cs->bmp['\n' >> 3] |= 1 << ('\n' & 7);
7124297a3b0SGarrett D'Amore 
7134297a3b0SGarrett D'Amore 	if ((ch = singleton(cs)) != OUT) {	/* optimize singleton sets */
7144297a3b0SGarrett D'Amore 		ordinary(p, ch);
7154297a3b0SGarrett D'Amore 		freeset(p, cs);
7164297a3b0SGarrett D'Amore 	} else
7174297a3b0SGarrett D'Amore 		EMIT(OANYOF, (int)(cs - p->g->sets));
7184297a3b0SGarrett D'Amore }
7194297a3b0SGarrett D'Amore 
7204297a3b0SGarrett D'Amore /*
7214297a3b0SGarrett D'Amore  * p_b_term - parse one term of a bracketed character list
7224297a3b0SGarrett D'Amore  */
7234297a3b0SGarrett D'Amore static void
p_b_term(struct parse * p,cset * cs)7244297a3b0SGarrett D'Amore p_b_term(struct parse *p, cset *cs)
7254297a3b0SGarrett D'Amore {
7264297a3b0SGarrett D'Amore 	char c;
7274297a3b0SGarrett D'Amore 	wint_t start, finish;
7284297a3b0SGarrett D'Amore 	wint_t i;
729*62c3776aSGarrett D'Amore 	locale_t loc = uselocale(NULL);
7304297a3b0SGarrett D'Amore 
7314297a3b0SGarrett D'Amore 	/* classify what we've got */
7324297a3b0SGarrett D'Amore 	switch ((MORE()) ? PEEK() : '\0') {
7334297a3b0SGarrett D'Amore 	case '[':
7344297a3b0SGarrett D'Amore 		c = (MORE2()) ? PEEK2() : '\0';
7354297a3b0SGarrett D'Amore 		break;
7364297a3b0SGarrett D'Amore 	case '-':
7374297a3b0SGarrett D'Amore 		SETERROR(REG_ERANGE);
7384297a3b0SGarrett D'Amore 		return;			/* NOTE RETURN */
7394297a3b0SGarrett D'Amore 	default:
7404297a3b0SGarrett D'Amore 		c = '\0';
7414297a3b0SGarrett D'Amore 		break;
7424297a3b0SGarrett D'Amore 	}
7434297a3b0SGarrett D'Amore 
7444297a3b0SGarrett D'Amore 	switch (c) {
7454297a3b0SGarrett D'Amore 	case ':':		/* character class */
7464297a3b0SGarrett D'Amore 		NEXT2();
7474297a3b0SGarrett D'Amore 		(void) REQUIRE(MORE(), REG_EBRACK);
7484297a3b0SGarrett D'Amore 		c = PEEK();
7494297a3b0SGarrett D'Amore 		(void) REQUIRE(c != '-' && c != ']', REG_ECTYPE);
7504297a3b0SGarrett D'Amore 		p_b_cclass(p, cs);
7514297a3b0SGarrett D'Amore 		(void) REQUIRE(MORE(), REG_EBRACK);
7524297a3b0SGarrett D'Amore 		(void) REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
7534297a3b0SGarrett D'Amore 		break;
7544297a3b0SGarrett D'Amore 	case '=':		/* equivalence class */
7554297a3b0SGarrett D'Amore 		NEXT2();
7564297a3b0SGarrett D'Amore 		(void) REQUIRE(MORE(), REG_EBRACK);
7574297a3b0SGarrett D'Amore 		c = PEEK();
7584297a3b0SGarrett D'Amore 		(void) REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
7594297a3b0SGarrett D'Amore 		p_b_eclass(p, cs);
7604297a3b0SGarrett D'Amore 		(void) REQUIRE(MORE(), REG_EBRACK);
7614297a3b0SGarrett D'Amore 		(void) REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
7624297a3b0SGarrett D'Amore 		break;
7634297a3b0SGarrett D'Amore 	default:		/* symbol, ordinary character, or range */
7644297a3b0SGarrett D'Amore 		start = p_b_symbol(p);
7654297a3b0SGarrett D'Amore 		if (SEE('-') && MORE2() && PEEK2() != ']') {
7664297a3b0SGarrett D'Amore 			/* range */
7674297a3b0SGarrett D'Amore 			NEXT();
7684297a3b0SGarrett D'Amore 			if (EAT('-'))
7694297a3b0SGarrett D'Amore 				finish = '-';
7704297a3b0SGarrett D'Amore 			else
7714297a3b0SGarrett D'Amore 				finish = p_b_symbol(p);
7724297a3b0SGarrett D'Amore 		} else
7734297a3b0SGarrett D'Amore 			finish = start;
7744297a3b0SGarrett D'Amore 		if (start == finish)
7754297a3b0SGarrett D'Amore 			CHadd(p, cs, start);
7764297a3b0SGarrett D'Amore 		else {
777*62c3776aSGarrett D'Amore 			if (loc->collate->lc_is_posix) {
7784297a3b0SGarrett D'Amore 				(void) REQUIRE((uch)start <= (uch)finish,
7794297a3b0SGarrett D'Amore 				    REG_ERANGE);
7804297a3b0SGarrett D'Amore 				CHaddrange(p, cs, start, finish);
7814297a3b0SGarrett D'Amore 			} else {
7826b5e5868SGarrett D'Amore 				(void) REQUIRE(_collate_range_cmp(start,
783*62c3776aSGarrett D'Amore 				    finish, loc) <= 0, REG_ERANGE);
7844297a3b0SGarrett D'Amore 				for (i = 0; i <= UCHAR_MAX; i++) {
785*62c3776aSGarrett D'Amore 					if (_collate_range_cmp(start, i, loc)
786*62c3776aSGarrett D'Amore 					    <= 0 &&
787*62c3776aSGarrett D'Amore 					    _collate_range_cmp(i, finish, loc)
788*62c3776aSGarrett D'Amore 					    <= 0)
7894297a3b0SGarrett D'Amore 						CHadd(p, cs, i);
7904297a3b0SGarrett D'Amore 				}
7914297a3b0SGarrett D'Amore 			}
7924297a3b0SGarrett D'Amore 		}
7934297a3b0SGarrett D'Amore 		break;
7944297a3b0SGarrett D'Amore 	}
7954297a3b0SGarrett D'Amore }
7964297a3b0SGarrett D'Amore 
7974297a3b0SGarrett D'Amore /*
7984297a3b0SGarrett D'Amore  * p_b_cclass - parse a character-class name and deal with it
7994297a3b0SGarrett D'Amore  */
8004297a3b0SGarrett D'Amore static void
p_b_cclass(struct parse * p,cset * cs)8014297a3b0SGarrett D'Amore p_b_cclass(struct parse *p, cset *cs)
8024297a3b0SGarrett D'Amore {
8034297a3b0SGarrett D'Amore 	char *sp = p->next;
8044297a3b0SGarrett D'Amore 	size_t len;
8054297a3b0SGarrett D'Amore 	wctype_t wct;
8064297a3b0SGarrett D'Amore 	char clname[16];
8074297a3b0SGarrett D'Amore 
8084297a3b0SGarrett D'Amore 	while (MORE() && isalpha((uch)PEEK()))
8094297a3b0SGarrett D'Amore 		NEXT();
8104297a3b0SGarrett D'Amore 	len = p->next - sp;
8114297a3b0SGarrett D'Amore 	if (len >= sizeof (clname) - 1) {
8124297a3b0SGarrett D'Amore 		SETERROR(REG_ECTYPE);
8134297a3b0SGarrett D'Amore 		return;
8144297a3b0SGarrett D'Amore 	}
8154297a3b0SGarrett D'Amore 	(void) memcpy(clname, sp, len);
8164297a3b0SGarrett D'Amore 	clname[len] = '\0';
8174297a3b0SGarrett D'Amore 	if ((wct = wctype(clname)) == 0) {
8184297a3b0SGarrett D'Amore 		SETERROR(REG_ECTYPE);
8194297a3b0SGarrett D'Amore 		return;
8204297a3b0SGarrett D'Amore 	}
8214297a3b0SGarrett D'Amore 	CHaddtype(p, cs, wct);
8224297a3b0SGarrett D'Amore }
8234297a3b0SGarrett D'Amore 
8244297a3b0SGarrett D'Amore /*
8254297a3b0SGarrett D'Amore  * p_b_eclass - parse an equivalence-class name and deal with it
8264297a3b0SGarrett D'Amore  *
8274297a3b0SGarrett D'Amore  * This implementation is incomplete. xxx
8284297a3b0SGarrett D'Amore  */
8294297a3b0SGarrett D'Amore static void
p_b_eclass(struct parse * p,cset * cs)8304297a3b0SGarrett D'Amore p_b_eclass(struct parse *p, cset *cs)
8314297a3b0SGarrett D'Amore {
8324297a3b0SGarrett D'Amore 	wint_t c;
8334297a3b0SGarrett D'Amore 
8344297a3b0SGarrett D'Amore 	c = p_b_coll_elem(p, '=');
8354297a3b0SGarrett D'Amore 	CHadd(p, cs, c);
8364297a3b0SGarrett D'Amore }
8374297a3b0SGarrett D'Amore 
8384297a3b0SGarrett D'Amore /*
8394297a3b0SGarrett D'Amore  * p_b_symbol - parse a character or [..]ed multicharacter collating symbol
8404297a3b0SGarrett D'Amore  */
8414297a3b0SGarrett D'Amore static wint_t			/* value of symbol */
p_b_symbol(struct parse * p)8424297a3b0SGarrett D'Amore p_b_symbol(struct parse *p)
8434297a3b0SGarrett D'Amore {
8444297a3b0SGarrett D'Amore 	wint_t value;
8454297a3b0SGarrett D'Amore 
8464297a3b0SGarrett D'Amore 	(void) REQUIRE(MORE(), REG_EBRACK);
8474297a3b0SGarrett D'Amore 	if (!EATTWO('[', '.'))
8484297a3b0SGarrett D'Amore 		return (WGETNEXT());
8494297a3b0SGarrett D'Amore 
8504297a3b0SGarrett D'Amore 	/* collating symbol */
8514297a3b0SGarrett D'Amore 	value = p_b_coll_elem(p, '.');
8524297a3b0SGarrett D'Amore 	(void) REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
8534297a3b0SGarrett D'Amore 	return (value);
8544297a3b0SGarrett D'Amore }
8554297a3b0SGarrett D'Amore 
8564297a3b0SGarrett D'Amore /*
8574297a3b0SGarrett D'Amore  * p_b_coll_elem - parse a collating-element name and look it up
8584297a3b0SGarrett D'Amore  */
8594297a3b0SGarrett D'Amore static wint_t			/* value of collating element */
p_b_coll_elem(struct parse * p,wint_t endc)8604297a3b0SGarrett D'Amore p_b_coll_elem(struct parse *p,
8614297a3b0SGarrett D'Amore 	wint_t endc)		/* name ended by endc,']' */
8624297a3b0SGarrett D'Amore {
8634297a3b0SGarrett D'Amore 	char *sp = p->next;
8644297a3b0SGarrett D'Amore 	struct cname *cp;
8654297a3b0SGarrett D'Amore 	int len;
8664297a3b0SGarrett D'Amore 	mbstate_t mbs;
8674297a3b0SGarrett D'Amore 	wchar_t wc;
8684297a3b0SGarrett D'Amore 	size_t clen;
8694297a3b0SGarrett D'Amore 
8704297a3b0SGarrett D'Amore 	while (MORE() && !SEETWO(endc, ']'))
8714297a3b0SGarrett D'Amore 		NEXT();
8724297a3b0SGarrett D'Amore 	if (!MORE()) {
8734297a3b0SGarrett D'Amore 		SETERROR(REG_EBRACK);
8744297a3b0SGarrett D'Amore 		return (0);
8754297a3b0SGarrett D'Amore 	}
8764297a3b0SGarrett D'Amore 	len = p->next - sp;
8774297a3b0SGarrett D'Amore 	for (cp = cnames; cp->name != NULL; cp++)
8784297a3b0SGarrett D'Amore 		if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
8794297a3b0SGarrett D'Amore 			return (cp->code);	/* known name */
8804297a3b0SGarrett D'Amore 	(void) memset(&mbs, 0, sizeof (mbs));
8814297a3b0SGarrett D'Amore 	if ((clen = mbrtowc(&wc, sp, len, &mbs)) == len)
8824297a3b0SGarrett D'Amore 		return (wc);			/* single character */
8834297a3b0SGarrett D'Amore 	else if (clen == (size_t)-1 || clen == (size_t)-2)
8844297a3b0SGarrett D'Amore 		SETERROR(REG_ECHAR);
8854297a3b0SGarrett D'Amore 	else
8864297a3b0SGarrett D'Amore 		SETERROR(REG_ECOLLATE);		/* neither */
8874297a3b0SGarrett D'Amore 	return (0);
8884297a3b0SGarrett D'Amore }
8894297a3b0SGarrett D'Amore 
8904297a3b0SGarrett D'Amore /*
8914297a3b0SGarrett D'Amore  * othercase - return the case counterpart of an alphabetic
8924297a3b0SGarrett D'Amore  */
8934297a3b0SGarrett D'Amore static wint_t			/* if no counterpart, return ch */
othercase(wint_t ch)8944297a3b0SGarrett D'Amore othercase(wint_t ch)
8954297a3b0SGarrett D'Amore {
8964297a3b0SGarrett D'Amore 	assert(iswalpha(ch));
8974297a3b0SGarrett D'Amore 	if (iswupper(ch))
8984297a3b0SGarrett D'Amore 		return (towlower(ch));
8994297a3b0SGarrett D'Amore 	else if (iswlower(ch))
9004297a3b0SGarrett D'Amore 		return (towupper(ch));
9014297a3b0SGarrett D'Amore 	else			/* peculiar, but could happen */
9024297a3b0SGarrett D'Amore 		return (ch);
9034297a3b0SGarrett D'Amore }
9044297a3b0SGarrett D'Amore 
9054297a3b0SGarrett D'Amore /*
9064297a3b0SGarrett D'Amore  * bothcases - emit a dualcase version of a two-case character
9074297a3b0SGarrett D'Amore  *
9084297a3b0SGarrett D'Amore  * Boy, is this implementation ever a kludge...
9094297a3b0SGarrett D'Amore  */
9104297a3b0SGarrett D'Amore static void
bothcases(struct parse * p,wint_t ch)9114297a3b0SGarrett D'Amore bothcases(struct parse *p, wint_t ch)
9124297a3b0SGarrett D'Amore {
9134297a3b0SGarrett D'Amore 	char *oldnext = p->next;
9144297a3b0SGarrett D'Amore 	char *oldend = p->end;
9154297a3b0SGarrett D'Amore 	char bracket[3 + MB_LEN_MAX];
9164297a3b0SGarrett D'Amore 	size_t n;
9174297a3b0SGarrett D'Amore 	mbstate_t mbs;
9184297a3b0SGarrett D'Amore 
9194297a3b0SGarrett D'Amore 	assert(othercase(ch) != ch);	/* p_bracket() would recurse */
9204297a3b0SGarrett D'Amore 	p->next = bracket;
9214297a3b0SGarrett D'Amore 	(void) memset(&mbs, 0, sizeof (mbs));
9224297a3b0SGarrett D'Amore 	n = wcrtomb(bracket, ch, &mbs);
9234297a3b0SGarrett D'Amore 	assert(n != (size_t)-1);
9244297a3b0SGarrett D'Amore 	bracket[n] = ']';
9254297a3b0SGarrett D'Amore 	bracket[n + 1] = '\0';
9264297a3b0SGarrett D'Amore 	p->end = bracket+n+1;
9274297a3b0SGarrett D'Amore 	p_bracket(p);
9284297a3b0SGarrett D'Amore 	assert(p->next == p->end);
9294297a3b0SGarrett D'Amore 	p->next = oldnext;
9304297a3b0SGarrett D'Amore 	p->end = oldend;
9314297a3b0SGarrett D'Amore }
9324297a3b0SGarrett D'Amore 
9334297a3b0SGarrett D'Amore /*
9344297a3b0SGarrett D'Amore  * ordinary - emit an ordinary character
9354297a3b0SGarrett D'Amore  */
9364297a3b0SGarrett D'Amore static void
ordinary(struct parse * p,wint_t ch)9374297a3b0SGarrett D'Amore ordinary(struct parse *p, wint_t ch)
9384297a3b0SGarrett D'Amore {
9394297a3b0SGarrett D'Amore 	cset *cs;
9404297a3b0SGarrett D'Amore 
9414297a3b0SGarrett D'Amore 	if ((p->g->cflags&REG_ICASE) && iswalpha(ch) && othercase(ch) != ch)
9424297a3b0SGarrett D'Amore 		bothcases(p, ch);
9434297a3b0SGarrett D'Amore 	else if ((ch & OPDMASK) == ch)
9444297a3b0SGarrett D'Amore 		EMIT(OCHAR, ch);
9454297a3b0SGarrett D'Amore 	else {
9464297a3b0SGarrett D'Amore 		/*
9474297a3b0SGarrett D'Amore 		 * Kludge: character is too big to fit into an OCHAR operand.
9484297a3b0SGarrett D'Amore 		 * Emit a singleton set.
9494297a3b0SGarrett D'Amore 		 */
9504297a3b0SGarrett D'Amore 		if ((cs = allocset(p)) == NULL)
9514297a3b0SGarrett D'Amore 			return;
9524297a3b0SGarrett D'Amore 		CHadd(p, cs, ch);
9534297a3b0SGarrett D'Amore 		EMIT(OANYOF, (int)(cs - p->g->sets));
9544297a3b0SGarrett D'Amore 	}
9554297a3b0SGarrett D'Amore }
9564297a3b0SGarrett D'Amore 
9574297a3b0SGarrett D'Amore /*
9584297a3b0SGarrett D'Amore  * nonnewline - emit REG_NEWLINE version of OANY
9594297a3b0SGarrett D'Amore  *
9604297a3b0SGarrett D'Amore  * Boy, is this implementation ever a kludge...
9614297a3b0SGarrett D'Amore  */
9624297a3b0SGarrett D'Amore static void
nonnewline(struct parse * p)9634297a3b0SGarrett D'Amore nonnewline(struct parse *p)
9644297a3b0SGarrett D'Amore {
9654297a3b0SGarrett D'Amore 	char *oldnext = p->next;
9664297a3b0SGarrett D'Amore 	char *oldend = p->end;
9674297a3b0SGarrett D'Amore 	char bracket[4];
9684297a3b0SGarrett D'Amore 
9694297a3b0SGarrett D'Amore 	p->next = bracket;
9704297a3b0SGarrett D'Amore 	p->end = bracket+3;
9714297a3b0SGarrett D'Amore 	bracket[0] = '^';
9724297a3b0SGarrett D'Amore 	bracket[1] = '\n';
9734297a3b0SGarrett D'Amore 	bracket[2] = ']';
9744297a3b0SGarrett D'Amore 	bracket[3] = '\0';
9754297a3b0SGarrett D'Amore 	p_bracket(p);
9764297a3b0SGarrett D'Amore 	assert(p->next == bracket+3);
9774297a3b0SGarrett D'Amore 	p->next = oldnext;
9784297a3b0SGarrett D'Amore 	p->end = oldend;
9794297a3b0SGarrett D'Amore }
9804297a3b0SGarrett D'Amore 
9814297a3b0SGarrett D'Amore /*
9824297a3b0SGarrett D'Amore  * repeat - generate code for a bounded repetition, recursively if needed
9834297a3b0SGarrett D'Amore  */
9844297a3b0SGarrett D'Amore static void
repeat(struct parse * p,sopno start,int from,int to)9854297a3b0SGarrett D'Amore repeat(struct parse *p,
9864297a3b0SGarrett D'Amore     sopno start,		/* operand from here to end of strip */
9874297a3b0SGarrett D'Amore     int from,			/* repeated from this number */
9884297a3b0SGarrett D'Amore     int to)			/* to this number of times (maybe INFINITY) */
9894297a3b0SGarrett D'Amore {
9904297a3b0SGarrett D'Amore 	sopno finish = HERE();
9914297a3b0SGarrett D'Amore #define	N	2
9924297a3b0SGarrett D'Amore #define	INF	3
9934297a3b0SGarrett D'Amore #define	REP(f, t)	((f)*8 + (t))
9944297a3b0SGarrett D'Amore #define	MAP(n)	(((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
9954297a3b0SGarrett D'Amore 	sopno copy;
9964297a3b0SGarrett D'Amore 
9974297a3b0SGarrett D'Amore 	if (p->error != 0)	/* head off possible runaway recursion */
9984297a3b0SGarrett D'Amore 		return;
9994297a3b0SGarrett D'Amore 
10004297a3b0SGarrett D'Amore 	assert(from <= to);
10014297a3b0SGarrett D'Amore 
10024297a3b0SGarrett D'Amore 	switch (REP(MAP(from), MAP(to))) {
10034297a3b0SGarrett D'Amore 	case REP(0, 0):			/* must be user doing this */
10044297a3b0SGarrett D'Amore 		DROP(finish-start);	/* drop the operand */
10054297a3b0SGarrett D'Amore 		break;
10064297a3b0SGarrett D'Amore 	case REP(0, 1):			/* as x{1,1}? */
10074297a3b0SGarrett D'Amore 	case REP(0, N):			/* as x{1,n}? */
10084297a3b0SGarrett D'Amore 	case REP(0, INF):		/* as x{1,}? */
10094297a3b0SGarrett D'Amore 		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
10104297a3b0SGarrett D'Amore 		INSERT(OCH_, start);		/* offset is wrong... */
10114297a3b0SGarrett D'Amore 		repeat(p, start+1, 1, to);
10124297a3b0SGarrett D'Amore 		ASTERN(OOR1, start);
10134297a3b0SGarrett D'Amore 		AHEAD(start);			/* ... fix it */
10144297a3b0SGarrett D'Amore 		EMIT(OOR2, 0);
10154297a3b0SGarrett D'Amore 		AHEAD(THERE());
10164297a3b0SGarrett D'Amore 		ASTERN(O_CH, THERETHERE());
10174297a3b0SGarrett D'Amore 		break;
10184297a3b0SGarrett D'Amore 	case REP(1, 1):			/* trivial case */
10194297a3b0SGarrett D'Amore 		/* done */
10204297a3b0SGarrett D'Amore 		break;
10214297a3b0SGarrett D'Amore 	case REP(1, N):			/* as x?x{1,n-1} */
10224297a3b0SGarrett D'Amore 		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
10234297a3b0SGarrett D'Amore 		INSERT(OCH_, start);
10244297a3b0SGarrett D'Amore 		ASTERN(OOR1, start);
10254297a3b0SGarrett D'Amore 		AHEAD(start);
10264297a3b0SGarrett D'Amore 		EMIT(OOR2, 0);			/* offset very wrong... */
10274297a3b0SGarrett D'Amore 		AHEAD(THERE());			/* ...so fix it */
10284297a3b0SGarrett D'Amore 		ASTERN(O_CH, THERETHERE());
10294297a3b0SGarrett D'Amore 		copy = dupl(p, start+1, finish+1);
10304297a3b0SGarrett D'Amore 		assert(copy == finish+4);
10314297a3b0SGarrett D'Amore 		repeat(p, copy, 1, to-1);
10324297a3b0SGarrett D'Amore 		break;
10334297a3b0SGarrett D'Amore 	case REP(1, INF):		/* as x+ */
10344297a3b0SGarrett D'Amore 		INSERT(OPLUS_, start);
10354297a3b0SGarrett D'Amore 		ASTERN(O_PLUS, start);
10364297a3b0SGarrett D'Amore 		break;
10374297a3b0SGarrett D'Amore 	case REP(N, N):			/* as xx{m-1,n-1} */
10384297a3b0SGarrett D'Amore 		copy = dupl(p, start, finish);
10394297a3b0SGarrett D'Amore 		repeat(p, copy, from-1, to-1);
10404297a3b0SGarrett D'Amore 		break;
10414297a3b0SGarrett D'Amore 	case REP(N, INF):		/* as xx{n-1,INF} */
10424297a3b0SGarrett D'Amore 		copy = dupl(p, start, finish);
10434297a3b0SGarrett D'Amore 		repeat(p, copy, from-1, to);
10444297a3b0SGarrett D'Amore 		break;
10454297a3b0SGarrett D'Amore 	default:			/* "can't happen" */
10464297a3b0SGarrett D'Amore 		SETERROR(REG_EFATAL);	/* just in case */
10474297a3b0SGarrett D'Amore 		break;
10484297a3b0SGarrett D'Amore 	}
10494297a3b0SGarrett D'Amore }
10504297a3b0SGarrett D'Amore 
10514297a3b0SGarrett D'Amore /*
10524297a3b0SGarrett D'Amore  * wgetnext - helper function for WGETNEXT() macro. Gets the next wide
10534297a3b0SGarrett D'Amore  * character from the parse struct, signals a REG_ILLSEQ error if the
10544297a3b0SGarrett D'Amore  * character can't be converted. Returns the number of bytes consumed.
10554297a3b0SGarrett D'Amore  */
10564297a3b0SGarrett D'Amore static wint_t
wgetnext(struct parse * p)10574297a3b0SGarrett D'Amore wgetnext(struct parse *p)
10584297a3b0SGarrett D'Amore {
10594297a3b0SGarrett D'Amore 	mbstate_t mbs;
10604297a3b0SGarrett D'Amore 	wchar_t wc;
10614297a3b0SGarrett D'Amore 	size_t n;
10624297a3b0SGarrett D'Amore 
10634297a3b0SGarrett D'Amore 	(void) memset(&mbs, 0, sizeof (mbs));
10644297a3b0SGarrett D'Amore 	n = mbrtowc(&wc, p->next, p->end - p->next, &mbs);
10654297a3b0SGarrett D'Amore 	if (n == (size_t)-1 || n == (size_t)-2) {
10664297a3b0SGarrett D'Amore 		SETERROR(REG_ECHAR);
10674297a3b0SGarrett D'Amore 		return (0);
10684297a3b0SGarrett D'Amore 	}
10694297a3b0SGarrett D'Amore 	if (n == 0)
10704297a3b0SGarrett D'Amore 		n = 1;
10714297a3b0SGarrett D'Amore 	p->next += n;
10724297a3b0SGarrett D'Amore 	return (wc);
10734297a3b0SGarrett D'Amore }
10744297a3b0SGarrett D'Amore 
10754297a3b0SGarrett D'Amore /*
10764297a3b0SGarrett D'Amore  * seterr - set an error condition
10774297a3b0SGarrett D'Amore  */
10784297a3b0SGarrett D'Amore static int			/* useless but makes type checking happy */
seterr(struct parse * p,int e)10794297a3b0SGarrett D'Amore seterr(struct parse *p, int e)
10804297a3b0SGarrett D'Amore {
10814297a3b0SGarrett D'Amore 	if (p->error == 0)	/* keep earliest error condition */
10824297a3b0SGarrett D'Amore 		p->error = e;
10834297a3b0SGarrett D'Amore 	p->next = nuls;		/* try to bring things to a halt */
10844297a3b0SGarrett D'Amore 	p->end = nuls;
10854297a3b0SGarrett D'Amore 	return (0);		/* make the return value well-defined */
10864297a3b0SGarrett D'Amore }
10874297a3b0SGarrett D'Amore 
10884297a3b0SGarrett D'Amore /*
10894297a3b0SGarrett D'Amore  * allocset - allocate a set of characters for []
10904297a3b0SGarrett D'Amore  */
10914297a3b0SGarrett D'Amore static cset *
allocset(struct parse * p)10924297a3b0SGarrett D'Amore allocset(struct parse *p)
10934297a3b0SGarrett D'Amore {
10944297a3b0SGarrett D'Amore 	cset *cs, *ncs;
10954297a3b0SGarrett D'Amore 
10964297a3b0SGarrett D'Amore 	ncs = realloc(p->g->sets, (p->g->ncsets + 1) * sizeof (*ncs));
10974297a3b0SGarrett D'Amore 	if (ncs == NULL) {
10984297a3b0SGarrett D'Amore 		SETERROR(REG_ESPACE);
10994297a3b0SGarrett D'Amore 		return (NULL);
11004297a3b0SGarrett D'Amore 	}
11014297a3b0SGarrett D'Amore 	p->g->sets = ncs;
11024297a3b0SGarrett D'Amore 	cs = &p->g->sets[p->g->ncsets++];
11034297a3b0SGarrett D'Amore 	(void) memset(cs, 0, sizeof (*cs));
11044297a3b0SGarrett D'Amore 
11054297a3b0SGarrett D'Amore 	return (cs);
11064297a3b0SGarrett D'Amore }
11074297a3b0SGarrett D'Amore 
11084297a3b0SGarrett D'Amore /*
11094297a3b0SGarrett D'Amore  * freeset - free a now-unused set
11104297a3b0SGarrett D'Amore  */
11114297a3b0SGarrett D'Amore static void
freeset(struct parse * p,cset * cs)11124297a3b0SGarrett D'Amore freeset(struct parse *p, cset *cs)
11134297a3b0SGarrett D'Amore {
11144297a3b0SGarrett D'Amore 	cset *top = &p->g->sets[p->g->ncsets];
11154297a3b0SGarrett D'Amore 
11164297a3b0SGarrett D'Amore 	free(cs->wides);
11174297a3b0SGarrett D'Amore 	free(cs->ranges);
11184297a3b0SGarrett D'Amore 	free(cs->types);
11194297a3b0SGarrett D'Amore 	(void) memset(cs, 0, sizeof (*cs));
11204297a3b0SGarrett D'Amore 	if (cs == top-1)	/* recover only the easy case */
11214297a3b0SGarrett D'Amore 		p->g->ncsets--;
11224297a3b0SGarrett D'Amore }
11234297a3b0SGarrett D'Amore 
11244297a3b0SGarrett D'Amore /*
11254297a3b0SGarrett D'Amore  * singleton - Determine whether a set contains only one character,
11264297a3b0SGarrett D'Amore  * returning it if so, otherwise returning OUT.
11274297a3b0SGarrett D'Amore  */
11284297a3b0SGarrett D'Amore static wint_t
singleton(cset * cs)11294297a3b0SGarrett D'Amore singleton(cset *cs)
11304297a3b0SGarrett D'Amore {
11314297a3b0SGarrett D'Amore 	wint_t i, s, n;
11324297a3b0SGarrett D'Amore 
11334297a3b0SGarrett D'Amore 	for (i = n = 0; i < NC; i++)
11344297a3b0SGarrett D'Amore 		if (CHIN(cs, i)) {
11354297a3b0SGarrett D'Amore 			n++;
11364297a3b0SGarrett D'Amore 			s = i;
11374297a3b0SGarrett D'Amore 		}
11384297a3b0SGarrett D'Amore 	if (n == 1)
11394297a3b0SGarrett D'Amore 		return (s);
11404297a3b0SGarrett D'Amore 	if (cs->nwides == 1 && cs->nranges == 0 && cs->ntypes == 0 &&
11414297a3b0SGarrett D'Amore 	    cs->icase == 0)
11424297a3b0SGarrett D'Amore 		return (cs->wides[0]);
11434297a3b0SGarrett D'Amore 	/* Don't bother handling the other cases. */
11444297a3b0SGarrett D'Amore 	return (OUT);
11454297a3b0SGarrett D'Amore }
11464297a3b0SGarrett D'Amore 
11474297a3b0SGarrett D'Amore /*
11484297a3b0SGarrett D'Amore  * CHadd - add character to character set.
11494297a3b0SGarrett D'Amore  */
11504297a3b0SGarrett D'Amore static void
CHadd(struct parse * p,cset * cs,wint_t ch)11514297a3b0SGarrett D'Amore CHadd(struct parse *p, cset *cs, wint_t ch)
11524297a3b0SGarrett D'Amore {
11534297a3b0SGarrett D'Amore 	wint_t nch, *newwides;
11544297a3b0SGarrett D'Amore 	assert(ch >= 0);
11554297a3b0SGarrett D'Amore 	if (ch < NC)
11564297a3b0SGarrett D'Amore 		cs->bmp[ch >> 3] |= 1 << (ch & 7);
11574297a3b0SGarrett D'Amore 	else {
11584297a3b0SGarrett D'Amore 		newwides = realloc(cs->wides, (cs->nwides + 1) *
11594297a3b0SGarrett D'Amore 		    sizeof (*cs->wides));
11604297a3b0SGarrett D'Amore 		if (newwides == NULL) {
11614297a3b0SGarrett D'Amore 			SETERROR(REG_ESPACE);
11624297a3b0SGarrett D'Amore 			return;
11634297a3b0SGarrett D'Amore 		}
11644297a3b0SGarrett D'Amore 		cs->wides = newwides;
11654297a3b0SGarrett D'Amore 		cs->wides[cs->nwides++] = ch;
11664297a3b0SGarrett D'Amore 	}
11674297a3b0SGarrett D'Amore 	if (cs->icase) {
11684297a3b0SGarrett D'Amore 		if ((nch = towlower(ch)) < NC)
11694297a3b0SGarrett D'Amore 			cs->bmp[nch >> 3] |= 1 << (nch & 7);
11704297a3b0SGarrett D'Amore 		if ((nch = towupper(ch)) < NC)
11714297a3b0SGarrett D'Amore 			cs->bmp[nch >> 3] |= 1 << (nch & 7);
11724297a3b0SGarrett D'Amore 	}
11734297a3b0SGarrett D'Amore }
11744297a3b0SGarrett D'Amore 
11754297a3b0SGarrett D'Amore /*
11764297a3b0SGarrett D'Amore  * CHaddrange - add all characters in the range [min,max] to a character set.
11774297a3b0SGarrett D'Amore  */
11784297a3b0SGarrett D'Amore static void
CHaddrange(struct parse * p,cset * cs,wint_t min,wint_t max)11794297a3b0SGarrett D'Amore CHaddrange(struct parse *p, cset *cs, wint_t min, wint_t max)
11804297a3b0SGarrett D'Amore {
11814297a3b0SGarrett D'Amore 	crange *newranges;
11824297a3b0SGarrett D'Amore 
11834297a3b0SGarrett D'Amore 	for (; min < NC && min <= max; min++)
11844297a3b0SGarrett D'Amore 		CHadd(p, cs, min);
11854297a3b0SGarrett D'Amore 	if (min >= max)
11864297a3b0SGarrett D'Amore 		return;
11874297a3b0SGarrett D'Amore 	newranges = realloc(cs->ranges, (cs->nranges + 1) *
11884297a3b0SGarrett D'Amore 	    sizeof (*cs->ranges));
11894297a3b0SGarrett D'Amore 	if (newranges == NULL) {
11904297a3b0SGarrett D'Amore 		SETERROR(REG_ESPACE);
11914297a3b0SGarrett D'Amore 		return;
11924297a3b0SGarrett D'Amore 	}
11934297a3b0SGarrett D'Amore 	cs->ranges = newranges;
11944297a3b0SGarrett D'Amore 	cs->ranges[cs->nranges].min = min;
11954297a3b0SGarrett D'Amore 	cs->ranges[cs->nranges].min = max;
11964297a3b0SGarrett D'Amore 	cs->nranges++;
11974297a3b0SGarrett D'Amore }
11984297a3b0SGarrett D'Amore 
11994297a3b0SGarrett D'Amore /*
12004297a3b0SGarrett D'Amore  * CHaddtype - add all characters of a certain type to a character set.
12014297a3b0SGarrett D'Amore  */
12024297a3b0SGarrett D'Amore static void
CHaddtype(struct parse * p,cset * cs,wctype_t wct)12034297a3b0SGarrett D'Amore CHaddtype(struct parse *p, cset *cs, wctype_t wct)
12044297a3b0SGarrett D'Amore {
12054297a3b0SGarrett D'Amore 	wint_t i;
12064297a3b0SGarrett D'Amore 	wctype_t *newtypes;
12074297a3b0SGarrett D'Amore 
12084297a3b0SGarrett D'Amore 	for (i = 0; i < NC; i++)
12094297a3b0SGarrett D'Amore 		if (iswctype(i, wct))
12104297a3b0SGarrett D'Amore 			CHadd(p, cs, i);
12114297a3b0SGarrett D'Amore 	newtypes = realloc(cs->types, (cs->ntypes + 1) *
12124297a3b0SGarrett D'Amore 	    sizeof (*cs->types));
12134297a3b0SGarrett D'Amore 	if (newtypes == NULL) {
12144297a3b0SGarrett D'Amore 		SETERROR(REG_ESPACE);
12154297a3b0SGarrett D'Amore 		return;
12164297a3b0SGarrett D'Amore 	}
12174297a3b0SGarrett D'Amore 	cs->types = newtypes;
12184297a3b0SGarrett D'Amore 	cs->types[cs->ntypes++] = wct;
12194297a3b0SGarrett D'Amore }
12204297a3b0SGarrett D'Amore 
12214297a3b0SGarrett D'Amore /*
12224297a3b0SGarrett D'Amore  * dupl - emit a duplicate of a bunch of sops
12234297a3b0SGarrett D'Amore  */
12244297a3b0SGarrett D'Amore static sopno			/* start of duplicate */
dupl(struct parse * p,sopno start,sopno finish)12254297a3b0SGarrett D'Amore dupl(struct parse *p,
12264297a3b0SGarrett D'Amore 	sopno start,		/* from here */
12274297a3b0SGarrett D'Amore 	sopno finish)		/* to this less one */
12284297a3b0SGarrett D'Amore {
12294297a3b0SGarrett D'Amore 	sopno ret = HERE();
12304297a3b0SGarrett D'Amore 	sopno len = finish - start;
12314297a3b0SGarrett D'Amore 
12324297a3b0SGarrett D'Amore 	assert(finish >= start);
12334297a3b0SGarrett D'Amore 	if (len == 0)
12344297a3b0SGarrett D'Amore 		return (ret);
12354297a3b0SGarrett D'Amore 	enlarge(p, p->ssize + len);	/* this many unexpected additions */
12364297a3b0SGarrett D'Amore 	assert(p->ssize >= p->slen + len);
12374297a3b0SGarrett D'Amore 	(void) memcpy((char *)(p->strip + p->slen),
12384297a3b0SGarrett D'Amore 	    (char *)(p->strip + start), (size_t)len*sizeof (sop));
12394297a3b0SGarrett D'Amore 	p->slen += len;
12404297a3b0SGarrett D'Amore 	return (ret);
12414297a3b0SGarrett D'Amore }
12424297a3b0SGarrett D'Amore 
12434297a3b0SGarrett D'Amore /*
12444297a3b0SGarrett D'Amore  * doemit - emit a strip operator
12454297a3b0SGarrett D'Amore  *
12464297a3b0SGarrett D'Amore  * It might seem better to implement this as a macro with a function as
12474297a3b0SGarrett D'Amore  * hard-case backup, but it's just too big and messy unless there are
12484297a3b0SGarrett D'Amore  * some changes to the data structures.  Maybe later.
12494297a3b0SGarrett D'Amore  */
12504297a3b0SGarrett D'Amore static void
doemit(struct parse * p,sop op,size_t opnd)12514297a3b0SGarrett D'Amore doemit(struct parse *p, sop op, size_t opnd)
12524297a3b0SGarrett D'Amore {
12534297a3b0SGarrett D'Amore 	/* avoid making error situations worse */
12544297a3b0SGarrett D'Amore 	if (p->error != 0)
12554297a3b0SGarrett D'Amore 		return;
12564297a3b0SGarrett D'Amore 
12574297a3b0SGarrett D'Amore 	/* deal with oversize operands ("can't happen", more or less) */
12584297a3b0SGarrett D'Amore 	assert(opnd < 1<<OPSHIFT);
12594297a3b0SGarrett D'Amore 
12604297a3b0SGarrett D'Amore 	/* deal with undersized strip */
12614297a3b0SGarrett D'Amore 	if (p->slen >= p->ssize)
12624297a3b0SGarrett D'Amore 		enlarge(p, (p->ssize+1) / 2 * 3);	/* +50% */
12634297a3b0SGarrett D'Amore 	assert(p->slen < p->ssize);
12644297a3b0SGarrett D'Amore 
12654297a3b0SGarrett D'Amore 	/* finally, it's all reduced to the easy case */
12664297a3b0SGarrett D'Amore 	p->strip[p->slen++] = SOP(op, opnd);
12674297a3b0SGarrett D'Amore }
12684297a3b0SGarrett D'Amore 
12694297a3b0SGarrett D'Amore /*
12704297a3b0SGarrett D'Amore  * doinsert - insert a sop into the strip
12714297a3b0SGarrett D'Amore  */
12724297a3b0SGarrett D'Amore static void
doinsert(struct parse * p,sop op,size_t opnd,sopno pos)12734297a3b0SGarrett D'Amore doinsert(struct parse *p, sop op, size_t opnd, sopno pos)
12744297a3b0SGarrett D'Amore {
12754297a3b0SGarrett D'Amore 	sopno sn;
12764297a3b0SGarrett D'Amore 	sop s;
12774297a3b0SGarrett D'Amore 	int i;
12784297a3b0SGarrett D'Amore 
12794297a3b0SGarrett D'Amore 	/* avoid making error situations worse */
12804297a3b0SGarrett D'Amore 	if (p->error != 0)
12814297a3b0SGarrett D'Amore 		return;
12824297a3b0SGarrett D'Amore 
12834297a3b0SGarrett D'Amore 	sn = HERE();
12844297a3b0SGarrett D'Amore 	EMIT(op, opnd);		/* do checks, ensure space */
12854297a3b0SGarrett D'Amore 	assert(HERE() == sn+1);
12864297a3b0SGarrett D'Amore 	s = p->strip[sn];
12874297a3b0SGarrett D'Amore 
12884297a3b0SGarrett D'Amore 	/* adjust paren pointers */
12894297a3b0SGarrett D'Amore 	assert(pos > 0);
12904297a3b0SGarrett D'Amore 	for (i = 1; i < NPAREN; i++) {
12914297a3b0SGarrett D'Amore 		if (p->pbegin[i] >= pos) {
12924297a3b0SGarrett D'Amore 			p->pbegin[i]++;
12934297a3b0SGarrett D'Amore 		}
12944297a3b0SGarrett D'Amore 		if (p->pend[i] >= pos) {
12954297a3b0SGarrett D'Amore 			p->pend[i]++;
12964297a3b0SGarrett D'Amore 		}
12974297a3b0SGarrett D'Amore 	}
12984297a3b0SGarrett D'Amore 
1299eda71b4aSGarrett D'Amore 	(void) memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
13004297a3b0SGarrett D'Amore 	    (HERE()-pos-1)*sizeof (sop));
13014297a3b0SGarrett D'Amore 	p->strip[pos] = s;
13024297a3b0SGarrett D'Amore }
13034297a3b0SGarrett D'Amore 
13044297a3b0SGarrett D'Amore /*
13054297a3b0SGarrett D'Amore  * dofwd - complete a forward reference
13064297a3b0SGarrett D'Amore  */
13074297a3b0SGarrett D'Amore static void
dofwd(struct parse * p,sopno pos,sop value)13084297a3b0SGarrett D'Amore dofwd(struct parse *p, sopno pos, sop value)
13094297a3b0SGarrett D'Amore {
13104297a3b0SGarrett D'Amore 	/* avoid making error situations worse */
13114297a3b0SGarrett D'Amore 	if (p->error != 0)
13124297a3b0SGarrett D'Amore 		return;
13134297a3b0SGarrett D'Amore 
13144297a3b0SGarrett D'Amore 	assert(value < 1<<OPSHIFT);
13154297a3b0SGarrett D'Amore 	p->strip[pos] = OP(p->strip[pos]) | value;
13164297a3b0SGarrett D'Amore }
13174297a3b0SGarrett D'Amore 
13184297a3b0SGarrett D'Amore /*
13194297a3b0SGarrett D'Amore  * enlarge - enlarge the strip
13204297a3b0SGarrett D'Amore  */
13214297a3b0SGarrett D'Amore static void
enlarge(struct parse * p,sopno size)13224297a3b0SGarrett D'Amore enlarge(struct parse *p, sopno size)
13234297a3b0SGarrett D'Amore {
13244297a3b0SGarrett D'Amore 	sop *sp;
13254297a3b0SGarrett D'Amore 
13264297a3b0SGarrett D'Amore 	if (p->ssize >= size)
13274297a3b0SGarrett D'Amore 		return;
13284297a3b0SGarrett D'Amore 
13294297a3b0SGarrett D'Amore 	sp = (sop *)realloc(p->strip, size*sizeof (sop));
13304297a3b0SGarrett D'Amore 	if (sp == NULL) {
13314297a3b0SGarrett D'Amore 		SETERROR(REG_ESPACE);
13324297a3b0SGarrett D'Amore 		return;
13334297a3b0SGarrett D'Amore 	}
13344297a3b0SGarrett D'Amore 	p->strip = sp;
13354297a3b0SGarrett D'Amore 	p->ssize = size;
13364297a3b0SGarrett D'Amore }
13374297a3b0SGarrett D'Amore 
13384297a3b0SGarrett D'Amore /*
13394297a3b0SGarrett D'Amore  * stripsnug - compact the strip
13404297a3b0SGarrett D'Amore  */
13414297a3b0SGarrett D'Amore static void
stripsnug(struct parse * p,struct re_guts * g)13424297a3b0SGarrett D'Amore stripsnug(struct parse *p, struct re_guts *g)
13434297a3b0SGarrett D'Amore {
13444297a3b0SGarrett D'Amore 	g->nstates = p->slen;
13454297a3b0SGarrett D'Amore 	g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof (sop));
13464297a3b0SGarrett D'Amore 	if (g->strip == NULL) {
13474297a3b0SGarrett D'Amore 		SETERROR(REG_ESPACE);
13484297a3b0SGarrett D'Amore 		g->strip = p->strip;
13494297a3b0SGarrett D'Amore 	}
13504297a3b0SGarrett D'Amore }
13514297a3b0SGarrett D'Amore 
13524297a3b0SGarrett D'Amore /*
13534297a3b0SGarrett D'Amore  * findmust - fill in must and mlen with longest mandatory literal string
13544297a3b0SGarrett D'Amore  *
13554297a3b0SGarrett D'Amore  * This algorithm could do fancy things like analyzing the operands of |
13564297a3b0SGarrett D'Amore  * for common subsequences.  Someday.  This code is simple and finds most
13574297a3b0SGarrett D'Amore  * of the interesting cases.
13584297a3b0SGarrett D'Amore  *
13594297a3b0SGarrett D'Amore  * Note that must and mlen got initialized during setup.
13604297a3b0SGarrett D'Amore  */
13614297a3b0SGarrett D'Amore static void
findmust(struct parse * p,struct re_guts * g)13624297a3b0SGarrett D'Amore findmust(struct parse *p, struct re_guts *g)
13634297a3b0SGarrett D'Amore {
13644297a3b0SGarrett D'Amore 	sop *scan;
13654297a3b0SGarrett D'Amore 	sop *start;
13664297a3b0SGarrett D'Amore 	sop *newstart;
13674297a3b0SGarrett D'Amore 	sopno newlen;
13684297a3b0SGarrett D'Amore 	sop s;
13694297a3b0SGarrett D'Amore 	char *cp;
13704297a3b0SGarrett D'Amore 	int offset;
13714297a3b0SGarrett D'Amore 	char buf[MB_LEN_MAX];
13724297a3b0SGarrett D'Amore 	size_t clen;
13734297a3b0SGarrett D'Amore 	mbstate_t mbs;
1374*62c3776aSGarrett D'Amore 	locale_t loc = uselocale(NULL);
13754297a3b0SGarrett D'Amore 
13764297a3b0SGarrett D'Amore 	/* avoid making error situations worse */
13774297a3b0SGarrett D'Amore 	if (p->error != 0)
13784297a3b0SGarrett D'Amore 		return;
13794297a3b0SGarrett D'Amore 
13804297a3b0SGarrett D'Amore 	/*
13814297a3b0SGarrett D'Amore 	 * It's not generally safe to do a ``char'' substring search on
13824297a3b0SGarrett D'Amore 	 * multibyte character strings, but it's safe for at least
13834297a3b0SGarrett D'Amore 	 * UTF-8 (see RFC 3629).
13844297a3b0SGarrett D'Amore 	 */
13854297a3b0SGarrett D'Amore 	if (MB_CUR_MAX > 1 &&
1386*62c3776aSGarrett D'Amore 	    strcmp(loc->runelocale->__encoding, "UTF-8") != 0)
13874297a3b0SGarrett D'Amore 		return;
13884297a3b0SGarrett D'Amore 
13894297a3b0SGarrett D'Amore 	/* find the longest OCHAR sequence in strip */
13904297a3b0SGarrett D'Amore 	newlen = 0;
13914297a3b0SGarrett D'Amore 	offset = 0;
13924297a3b0SGarrett D'Amore 	g->moffset = 0;
13934297a3b0SGarrett D'Amore 	scan = g->strip + 1;
13944297a3b0SGarrett D'Amore 	do {
13954297a3b0SGarrett D'Amore 		s = *scan++;
13964297a3b0SGarrett D'Amore 		switch (OP(s)) {
13974297a3b0SGarrett D'Amore 		case OCHAR:		/* sequence member */
13984297a3b0SGarrett D'Amore 			if (newlen == 0) {		/* new sequence */
13994297a3b0SGarrett D'Amore 				(void) memset(&mbs, 0, sizeof (mbs));
14004297a3b0SGarrett D'Amore 				newstart = scan - 1;
14014297a3b0SGarrett D'Amore 			}
14024297a3b0SGarrett D'Amore 			clen = wcrtomb(buf, OPND(s), &mbs);
14034297a3b0SGarrett D'Amore 			if (clen == (size_t)-1)
14044297a3b0SGarrett D'Amore 				goto toohard;
14054297a3b0SGarrett D'Amore 			newlen += clen;
14064297a3b0SGarrett D'Amore 			break;
14074297a3b0SGarrett D'Amore 		case OPLUS_:		/* things that don't break one */
14084297a3b0SGarrett D'Amore 		case OLPAREN:
14094297a3b0SGarrett D'Amore 		case ORPAREN:
14104297a3b0SGarrett D'Amore 			break;
14114297a3b0SGarrett D'Amore 		case OQUEST_:		/* things that must be skipped */
14124297a3b0SGarrett D'Amore 		case OCH_:
14134297a3b0SGarrett D'Amore 			offset = altoffset(scan, offset);
14144297a3b0SGarrett D'Amore 			scan--;
14154297a3b0SGarrett D'Amore 			do {
14164297a3b0SGarrett D'Amore 				scan += OPND(s);
14174297a3b0SGarrett D'Amore 				s = *scan;
14184297a3b0SGarrett D'Amore 				/* assert() interferes w debug printouts */
14194297a3b0SGarrett D'Amore 				if (OP(s) != O_QUEST && OP(s) != O_CH &&
14204297a3b0SGarrett D'Amore 				    OP(s) != OOR2) {
14214297a3b0SGarrett D'Amore 					g->iflags |= BAD;
14224297a3b0SGarrett D'Amore 					return;
14234297a3b0SGarrett D'Amore 				}
14244297a3b0SGarrett D'Amore 			} while (OP(s) != O_QUEST && OP(s) != O_CH);
14254297a3b0SGarrett D'Amore 			/* FALLTHROUGH */
14264297a3b0SGarrett D'Amore 		case OBOW:		/* things that break a sequence */
14274297a3b0SGarrett D'Amore 		case OEOW:
14284297a3b0SGarrett D'Amore 		case OBOL:
14294297a3b0SGarrett D'Amore 		case OEOL:
14304297a3b0SGarrett D'Amore 		case O_QUEST:
14314297a3b0SGarrett D'Amore 		case O_CH:
14324297a3b0SGarrett D'Amore 		case OEND:
14334297a3b0SGarrett D'Amore 			if (newlen > g->mlen) {		/* ends one */
14344297a3b0SGarrett D'Amore 				start = newstart;
14354297a3b0SGarrett D'Amore 				g->mlen = newlen;
14364297a3b0SGarrett D'Amore 				if (offset > -1) {
14374297a3b0SGarrett D'Amore 					g->moffset += offset;
14384297a3b0SGarrett D'Amore 					offset = newlen;
14394297a3b0SGarrett D'Amore 				} else
14404297a3b0SGarrett D'Amore 					g->moffset = offset;
14414297a3b0SGarrett D'Amore 			} else {
14424297a3b0SGarrett D'Amore 				if (offset > -1)
14434297a3b0SGarrett D'Amore 					offset += newlen;
14444297a3b0SGarrett D'Amore 			}
14454297a3b0SGarrett D'Amore 			newlen = 0;
14464297a3b0SGarrett D'Amore 			break;
14474297a3b0SGarrett D'Amore 		case OANY:
14484297a3b0SGarrett D'Amore 			if (newlen > g->mlen) {		/* ends one */
14494297a3b0SGarrett D'Amore 				start = newstart;
14504297a3b0SGarrett D'Amore 				g->mlen = newlen;
14514297a3b0SGarrett D'Amore 				if (offset > -1) {
14524297a3b0SGarrett D'Amore 					g->moffset += offset;
14534297a3b0SGarrett D'Amore 					offset = newlen;
14544297a3b0SGarrett D'Amore 				} else
14554297a3b0SGarrett D'Amore 					g->moffset = offset;
14564297a3b0SGarrett D'Amore 			} else {
14574297a3b0SGarrett D'Amore 				if (offset > -1)
14584297a3b0SGarrett D'Amore 					offset += newlen;
14594297a3b0SGarrett D'Amore 			}
14604297a3b0SGarrett D'Amore 			if (offset > -1)
14614297a3b0SGarrett D'Amore 				offset++;
14624297a3b0SGarrett D'Amore 			newlen = 0;
14634297a3b0SGarrett D'Amore 			break;
14644297a3b0SGarrett D'Amore 		case OANYOF:		/* may or may not invalidate offset */
14654297a3b0SGarrett D'Amore 			/* First, everything as OANY */
14664297a3b0SGarrett D'Amore 			if (newlen > g->mlen) {		/* ends one */
14674297a3b0SGarrett D'Amore 				start = newstart;
14684297a3b0SGarrett D'Amore 				g->mlen = newlen;
14694297a3b0SGarrett D'Amore 				if (offset > -1) {
14704297a3b0SGarrett D'Amore 					g->moffset += offset;
14714297a3b0SGarrett D'Amore 					offset = newlen;
14724297a3b0SGarrett D'Amore 				} else
14734297a3b0SGarrett D'Amore 					g->moffset = offset;
14744297a3b0SGarrett D'Amore 			} else {
14754297a3b0SGarrett D'Amore 				if (offset > -1)
14764297a3b0SGarrett D'Amore 					offset += newlen;
14774297a3b0SGarrett D'Amore 			}
14784297a3b0SGarrett D'Amore 			if (offset > -1)
14794297a3b0SGarrett D'Amore 				offset++;
14804297a3b0SGarrett D'Amore 			newlen = 0;
14814297a3b0SGarrett D'Amore 			break;
14824297a3b0SGarrett D'Amore 		toohard:
14834297a3b0SGarrett D'Amore 		default:
14844297a3b0SGarrett D'Amore 			/*
14854297a3b0SGarrett D'Amore 			 * Anything here makes it impossible or too hard
14864297a3b0SGarrett D'Amore 			 * to calculate the offset -- so we give up;
14874297a3b0SGarrett D'Amore 			 * save the last known good offset, in case the
14884297a3b0SGarrett D'Amore 			 * must sequence doesn't occur later.
14894297a3b0SGarrett D'Amore 			 */
14904297a3b0SGarrett D'Amore 			if (newlen > g->mlen) {		/* ends one */
14914297a3b0SGarrett D'Amore 				start = newstart;
14924297a3b0SGarrett D'Amore 				g->mlen = newlen;
14934297a3b0SGarrett D'Amore 				if (offset > -1)
14944297a3b0SGarrett D'Amore 					g->moffset += offset;
14954297a3b0SGarrett D'Amore 				else
14964297a3b0SGarrett D'Amore 					g->moffset = offset;
14974297a3b0SGarrett D'Amore 			}
14984297a3b0SGarrett D'Amore 			offset = -1;
14994297a3b0SGarrett D'Amore 			newlen = 0;
15004297a3b0SGarrett D'Amore 			break;
15014297a3b0SGarrett D'Amore 		}
15024297a3b0SGarrett D'Amore 	} while (OP(s) != OEND);
15034297a3b0SGarrett D'Amore 
15044297a3b0SGarrett D'Amore 	if (g->mlen == 0) {		/* there isn't one */
15054297a3b0SGarrett D'Amore 		g->moffset = -1;
15064297a3b0SGarrett D'Amore 		return;
15074297a3b0SGarrett D'Amore 	}
15084297a3b0SGarrett D'Amore 
15094297a3b0SGarrett D'Amore 	/* turn it into a character string */
15104297a3b0SGarrett D'Amore 	g->must = malloc((size_t)g->mlen + 1);
15114297a3b0SGarrett D'Amore 	if (g->must == NULL) {		/* argh; just forget it */
15124297a3b0SGarrett D'Amore 		g->mlen = 0;
15134297a3b0SGarrett D'Amore 		g->moffset = -1;
15144297a3b0SGarrett D'Amore 		return;
15154297a3b0SGarrett D'Amore 	}
15164297a3b0SGarrett D'Amore 	cp = g->must;
15174297a3b0SGarrett D'Amore 	scan = start;
15184297a3b0SGarrett D'Amore 	(void) memset(&mbs, 0, sizeof (mbs));
15194297a3b0SGarrett D'Amore 	while (cp < g->must + g->mlen) {
15204297a3b0SGarrett D'Amore 		while (OP(s = *scan++) != OCHAR)
15214297a3b0SGarrett D'Amore 			continue;
15224297a3b0SGarrett D'Amore 		clen = wcrtomb(cp, OPND(s), &mbs);
15234297a3b0SGarrett D'Amore 		assert(clen != (size_t)-1);
15244297a3b0SGarrett D'Amore 		cp += clen;
15254297a3b0SGarrett D'Amore 	}
15264297a3b0SGarrett D'Amore 	assert(cp == g->must + g->mlen);
15274297a3b0SGarrett D'Amore 	*cp++ = '\0';		/* just on general principles */
15284297a3b0SGarrett D'Amore }
15294297a3b0SGarrett D'Amore 
15304297a3b0SGarrett D'Amore /*
15314297a3b0SGarrett D'Amore  * altoffset - choose biggest offset among multiple choices
15324297a3b0SGarrett D'Amore  *
15334297a3b0SGarrett D'Amore  * Compute, recursively if necessary, the largest offset among multiple
15344297a3b0SGarrett D'Amore  * re paths.
15354297a3b0SGarrett D'Amore  */
15364297a3b0SGarrett D'Amore static int
altoffset(sop * scan,int offset)15374297a3b0SGarrett D'Amore altoffset(sop *scan, int offset)
15384297a3b0SGarrett D'Amore {
15394297a3b0SGarrett D'Amore 	int largest;
15404297a3b0SGarrett D'Amore 	int try;
15414297a3b0SGarrett D'Amore 	sop s;
15424297a3b0SGarrett D'Amore 
15434297a3b0SGarrett D'Amore 	/* If we gave up already on offsets, return */
15444297a3b0SGarrett D'Amore 	if (offset == -1)
15454297a3b0SGarrett D'Amore 		return (-1);
15464297a3b0SGarrett D'Amore 
15474297a3b0SGarrett D'Amore 	largest = 0;
15484297a3b0SGarrett D'Amore 	try = 0;
15494297a3b0SGarrett D'Amore 	s = *scan++;
15504297a3b0SGarrett D'Amore 	while (OP(s) != O_QUEST && OP(s) != O_CH) {
15514297a3b0SGarrett D'Amore 		switch (OP(s)) {
15524297a3b0SGarrett D'Amore 		case OOR1:
15534297a3b0SGarrett D'Amore 			if (try > largest)
15544297a3b0SGarrett D'Amore 				largest = try;
15554297a3b0SGarrett D'Amore 			try = 0;
15564297a3b0SGarrett D'Amore 			break;
15574297a3b0SGarrett D'Amore 		case OQUEST_:
15584297a3b0SGarrett D'Amore 		case OCH_:
15594297a3b0SGarrett D'Amore 			try = altoffset(scan, try);
15604297a3b0SGarrett D'Amore 			if (try == -1)
15614297a3b0SGarrett D'Amore 				return (-1);
15624297a3b0SGarrett D'Amore 			scan--;
15634297a3b0SGarrett D'Amore 			do {
15644297a3b0SGarrett D'Amore 				scan += OPND(s);
15654297a3b0SGarrett D'Amore 				s = *scan;
15664297a3b0SGarrett D'Amore 				if (OP(s) != O_QUEST && OP(s) != O_CH &&
15674297a3b0SGarrett D'Amore 				    OP(s) != OOR2)
15684297a3b0SGarrett D'Amore 					return (-1);
15694297a3b0SGarrett D'Amore 			} while (OP(s) != O_QUEST && OP(s) != O_CH);
15704297a3b0SGarrett D'Amore 			/*
15714297a3b0SGarrett D'Amore 			 * We must skip to the next position, or we'll
15724297a3b0SGarrett D'Amore 			 * leave altoffset() too early.
15734297a3b0SGarrett D'Amore 			 */
15744297a3b0SGarrett D'Amore 			scan++;
15754297a3b0SGarrett D'Amore 			break;
15764297a3b0SGarrett D'Amore 		case OANYOF:
15774297a3b0SGarrett D'Amore 		case OCHAR:
15784297a3b0SGarrett D'Amore 		case OANY:
15794297a3b0SGarrett D'Amore 			try++;
15804297a3b0SGarrett D'Amore 			/*FALLTHRU*/
15814297a3b0SGarrett D'Amore 		case OBOW:
15824297a3b0SGarrett D'Amore 		case OEOW:
15834297a3b0SGarrett D'Amore 		case OLPAREN:
15844297a3b0SGarrett D'Amore 		case ORPAREN:
15854297a3b0SGarrett D'Amore 		case OOR2:
15864297a3b0SGarrett D'Amore 			break;
15874297a3b0SGarrett D'Amore 		default:
15884297a3b0SGarrett D'Amore 			try = -1;
15894297a3b0SGarrett D'Amore 			break;
15904297a3b0SGarrett D'Amore 		}
15914297a3b0SGarrett D'Amore 		if (try == -1)
15924297a3b0SGarrett D'Amore 			return (-1);
15934297a3b0SGarrett D'Amore 		s = *scan++;
15944297a3b0SGarrett D'Amore 	}
15954297a3b0SGarrett D'Amore 
15964297a3b0SGarrett D'Amore 	if (try > largest)
15974297a3b0SGarrett D'Amore 		largest = try;
15984297a3b0SGarrett D'Amore 
15994297a3b0SGarrett D'Amore 	return (largest+offset);
16004297a3b0SGarrett D'Amore }
16014297a3b0SGarrett D'Amore 
16024297a3b0SGarrett D'Amore /*
16034297a3b0SGarrett D'Amore  * computejumps - compute char jumps for BM scan
16044297a3b0SGarrett D'Amore  *
16054297a3b0SGarrett D'Amore  * This algorithm assumes g->must exists and is has size greater than
16064297a3b0SGarrett D'Amore  * zero. It's based on the algorithm found on Computer Algorithms by
16074297a3b0SGarrett D'Amore  * Sara Baase.
16084297a3b0SGarrett D'Amore  *
16094297a3b0SGarrett D'Amore  * A char jump is the number of characters one needs to jump based on
16104297a3b0SGarrett D'Amore  * the value of the character from the text that was mismatched.
16114297a3b0SGarrett D'Amore  */
16124297a3b0SGarrett D'Amore static void
computejumps(struct parse * p,struct re_guts * g)16134297a3b0SGarrett D'Amore computejumps(struct parse *p, struct re_guts *g)
16144297a3b0SGarrett D'Amore {
16154297a3b0SGarrett D'Amore 	int ch;
16164297a3b0SGarrett D'Amore 	int mindex;
16174297a3b0SGarrett D'Amore 
16184297a3b0SGarrett D'Amore 	/* Avoid making errors worse */
16194297a3b0SGarrett D'Amore 	if (p->error != 0)
16204297a3b0SGarrett D'Amore 		return;
16214297a3b0SGarrett D'Amore 
16224297a3b0SGarrett D'Amore 	g->charjump = (int *)malloc((NC + 1) * sizeof (int));
16234297a3b0SGarrett D'Amore 	if (g->charjump == NULL)	/* Not a fatal error */
16244297a3b0SGarrett D'Amore 		return;
16254297a3b0SGarrett D'Amore 	/* Adjust for signed chars, if necessary */
16264297a3b0SGarrett D'Amore 	g->charjump = &g->charjump[-(CHAR_MIN)];
16274297a3b0SGarrett D'Amore 
16284297a3b0SGarrett D'Amore 	/*
16294297a3b0SGarrett D'Amore 	 * If the character does not exist in the pattern, the jump
16304297a3b0SGarrett D'Amore 	 * is equal to the number of characters in the pattern.
16314297a3b0SGarrett D'Amore 	 */
16324297a3b0SGarrett D'Amore 	for (ch = CHAR_MIN; ch < (CHAR_MAX + 1); ch++)
16334297a3b0SGarrett D'Amore 		g->charjump[ch] = g->mlen;
16344297a3b0SGarrett D'Amore 
16354297a3b0SGarrett D'Amore 	/*
16364297a3b0SGarrett D'Amore 	 * If the character does exist, compute the jump that would
16374297a3b0SGarrett D'Amore 	 * take us to the last character in the pattern equal to it
16384297a3b0SGarrett D'Amore 	 * (notice that we match right to left, so that last character
16394297a3b0SGarrett D'Amore 	 * is the first one that would be matched).
16404297a3b0SGarrett D'Amore 	 */
16414297a3b0SGarrett D'Amore 	for (mindex = 0; mindex < g->mlen; mindex++)
16424297a3b0SGarrett D'Amore 		g->charjump[(int)g->must[mindex]] = g->mlen - mindex - 1;
16434297a3b0SGarrett D'Amore }
16444297a3b0SGarrett D'Amore 
16454297a3b0SGarrett D'Amore /*
16464297a3b0SGarrett D'Amore  * computematchjumps - compute match jumps for BM scan
16474297a3b0SGarrett D'Amore  *
16484297a3b0SGarrett D'Amore  * This algorithm assumes g->must exists and is has size greater than
16494297a3b0SGarrett D'Amore  * zero. It's based on the algorithm found on Computer Algorithms by
16504297a3b0SGarrett D'Amore  * Sara Baase.
16514297a3b0SGarrett D'Amore  *
16524297a3b0SGarrett D'Amore  * A match jump is the number of characters one needs to advance based
16534297a3b0SGarrett D'Amore  * on the already-matched suffix.
16544297a3b0SGarrett D'Amore  * Notice that all values here are minus (g->mlen-1), because of the way
16554297a3b0SGarrett D'Amore  * the search algorithm works.
16564297a3b0SGarrett D'Amore  */
16574297a3b0SGarrett D'Amore static void
computematchjumps(struct parse * p,struct re_guts * g)16584297a3b0SGarrett D'Amore computematchjumps(struct parse *p, struct re_guts *g)
16594297a3b0SGarrett D'Amore {
16604297a3b0SGarrett D'Amore 	int mindex;		/* General "must" iterator */
16614297a3b0SGarrett D'Amore 	int suffix;		/* Keeps track of matching suffix */
16624297a3b0SGarrett D'Amore 	int ssuffix;		/* Keeps track of suffixes' suffix */
16634297a3b0SGarrett D'Amore 	int *pmatches;
16644297a3b0SGarrett D'Amore 				/*
16654297a3b0SGarrett D'Amore 				 * pmatches[k] points to the next i
16664297a3b0SGarrett D'Amore 				 * such that i+1...mlen is a substring
16674297a3b0SGarrett D'Amore 				 * of k+1...k+mlen-i-1
16684297a3b0SGarrett D'Amore 				 */
16694297a3b0SGarrett D'Amore 
16704297a3b0SGarrett D'Amore 	/* Avoid making errors worse */
16714297a3b0SGarrett D'Amore 	if (p->error != 0)
16724297a3b0SGarrett D'Amore 		return;
16734297a3b0SGarrett D'Amore 
16744297a3b0SGarrett D'Amore 	pmatches = (int *)malloc(g->mlen * sizeof (unsigned int));
16754297a3b0SGarrett D'Amore 	if (pmatches == NULL) {
16764297a3b0SGarrett D'Amore 		g->matchjump = NULL;
16774297a3b0SGarrett D'Amore 		return;
16784297a3b0SGarrett D'Amore 	}
16794297a3b0SGarrett D'Amore 
16804297a3b0SGarrett D'Amore 	g->matchjump = (int *)malloc(g->mlen * sizeof (unsigned int));
16814297a3b0SGarrett D'Amore 	if (g->matchjump == NULL)	/* Not a fatal error */
16824297a3b0SGarrett D'Amore 		return;
16834297a3b0SGarrett D'Amore 
16844297a3b0SGarrett D'Amore 	/* Set maximum possible jump for each character in the pattern */
16854297a3b0SGarrett D'Amore 	for (mindex = 0; mindex < g->mlen; mindex++)
16864297a3b0SGarrett D'Amore 		g->matchjump[mindex] = 2*g->mlen - mindex - 1;
16874297a3b0SGarrett D'Amore 
16884297a3b0SGarrett D'Amore 	/* Compute pmatches[] */
16894297a3b0SGarrett D'Amore 	for (mindex = g->mlen - 1, suffix = g->mlen; mindex >= 0;
16904297a3b0SGarrett D'Amore 	    mindex--, suffix--) {
16914297a3b0SGarrett D'Amore 		pmatches[mindex] = suffix;
16924297a3b0SGarrett D'Amore 
16934297a3b0SGarrett D'Amore 		/*
16944297a3b0SGarrett D'Amore 		 * If a mismatch is found, interrupting the substring,
16954297a3b0SGarrett D'Amore 		 * compute the matchjump for that position. If no
16964297a3b0SGarrett D'Amore 		 * mismatch is found, then a text substring mismatched
16974297a3b0SGarrett D'Amore 		 * against the suffix will also mismatch against the
16984297a3b0SGarrett D'Amore 		 * substring.
16994297a3b0SGarrett D'Amore 		 */
17004297a3b0SGarrett D'Amore 		while (suffix < g->mlen && g->must[mindex] != g->must[suffix]) {
17014297a3b0SGarrett D'Amore 			g->matchjump[suffix] = MIN(g->matchjump[suffix],
17024297a3b0SGarrett D'Amore 			    g->mlen - mindex - 1);
17034297a3b0SGarrett D'Amore 			suffix = pmatches[suffix];
17044297a3b0SGarrett D'Amore 		}
17054297a3b0SGarrett D'Amore 	}
17064297a3b0SGarrett D'Amore 
17074297a3b0SGarrett D'Amore 	/*
17084297a3b0SGarrett D'Amore 	 * Compute the matchjump up to the last substring found to jump
17094297a3b0SGarrett D'Amore 	 * to the beginning of the largest must pattern prefix matching
17104297a3b0SGarrett D'Amore 	 * it's own suffix.
17114297a3b0SGarrett D'Amore 	 */
17124297a3b0SGarrett D'Amore 	for (mindex = 0; mindex <= suffix; mindex++)
17134297a3b0SGarrett D'Amore 		g->matchjump[mindex] = MIN(g->matchjump[mindex],
17144297a3b0SGarrett D'Amore 		    g->mlen + suffix - mindex);
17154297a3b0SGarrett D'Amore 
17164297a3b0SGarrett D'Amore 	ssuffix = pmatches[suffix];
17174297a3b0SGarrett D'Amore 	while (suffix < g->mlen) {
17184297a3b0SGarrett D'Amore 		while (suffix <= ssuffix && suffix < g->mlen) {
17194297a3b0SGarrett D'Amore 			g->matchjump[suffix] = MIN(g->matchjump[suffix],
17204297a3b0SGarrett D'Amore 			    g->mlen + ssuffix - suffix);
17214297a3b0SGarrett D'Amore 			suffix++;
17224297a3b0SGarrett D'Amore 		}
17234297a3b0SGarrett D'Amore 		if (suffix < g->mlen)
17244297a3b0SGarrett D'Amore 			ssuffix = pmatches[ssuffix];
17254297a3b0SGarrett D'Amore 	}
17264297a3b0SGarrett D'Amore 
17274297a3b0SGarrett D'Amore 	free(pmatches);
17284297a3b0SGarrett D'Amore }
17294297a3b0SGarrett D'Amore 
17304297a3b0SGarrett D'Amore /*
17314297a3b0SGarrett D'Amore  * pluscount - count + nesting
17324297a3b0SGarrett D'Amore  */
17334297a3b0SGarrett D'Amore static sopno			/* nesting depth */
pluscount(struct parse * p,struct re_guts * g)17344297a3b0SGarrett D'Amore pluscount(struct parse *p, struct re_guts *g)
17354297a3b0SGarrett D'Amore {
17364297a3b0SGarrett D'Amore 	sop *scan;
17374297a3b0SGarrett D'Amore 	sop s;
17384297a3b0SGarrett D'Amore 	sopno plusnest = 0;
17394297a3b0SGarrett D'Amore 	sopno maxnest = 0;
17404297a3b0SGarrett D'Amore 
17414297a3b0SGarrett D'Amore 	if (p->error != 0)
17424297a3b0SGarrett D'Amore 		return (0);	/* there may not be an OEND */
17434297a3b0SGarrett D'Amore 
17444297a3b0SGarrett D'Amore 	scan = g->strip + 1;
17454297a3b0SGarrett D'Amore 	do {
17464297a3b0SGarrett D'Amore 		s = *scan++;
17474297a3b0SGarrett D'Amore 		switch (OP(s)) {
17484297a3b0SGarrett D'Amore 		case OPLUS_:
17494297a3b0SGarrett D'Amore 			plusnest++;
17504297a3b0SGarrett D'Amore 			break;
17514297a3b0SGarrett D'Amore 		case O_PLUS:
17524297a3b0SGarrett D'Amore 			if (plusnest > maxnest)
17534297a3b0SGarrett D'Amore 				maxnest = plusnest;
17544297a3b0SGarrett D'Amore 			plusnest--;
17554297a3b0SGarrett D'Amore 			break;
17564297a3b0SGarrett D'Amore 		}
17574297a3b0SGarrett D'Amore 	} while (OP(s) != OEND);
17584297a3b0SGarrett D'Amore 	if (plusnest != 0)
17594297a3b0SGarrett D'Amore 		g->iflags |= BAD;
17604297a3b0SGarrett D'Amore 	return (maxnest);
17614297a3b0SGarrett D'Amore }
1762