158f0484fSRodney W. Grimes /*- 258f0484fSRodney W. Grimes * Copyright (c) 1992, 1993, 1994 Henry Spencer. 358f0484fSRodney W. Grimes * Copyright (c) 1992, 1993, 1994 458f0484fSRodney W. Grimes * The Regents of the University of California. All rights reserved. 558f0484fSRodney W. Grimes * 658f0484fSRodney W. Grimes * This code is derived from software contributed to Berkeley by 758f0484fSRodney W. Grimes * Henry Spencer. 858f0484fSRodney W. Grimes * 958f0484fSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 1058f0484fSRodney W. Grimes * modification, are permitted provided that the following conditions 1158f0484fSRodney W. Grimes * are met: 1258f0484fSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 1358f0484fSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 1458f0484fSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 1558f0484fSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 1658f0484fSRodney W. Grimes * documentation and/or other materials provided with the distribution. 1758f0484fSRodney W. Grimes * 3. All advertising materials mentioning features or use of this software 1858f0484fSRodney W. Grimes * must display the following acknowledgement: 1958f0484fSRodney W. Grimes * This product includes software developed by the University of 2058f0484fSRodney W. Grimes * California, Berkeley and its contributors. 2158f0484fSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 2258f0484fSRodney W. Grimes * may be used to endorse or promote products derived from this software 2358f0484fSRodney W. Grimes * without specific prior written permission. 2458f0484fSRodney W. Grimes * 2558f0484fSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 2658f0484fSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2758f0484fSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2858f0484fSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 2958f0484fSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 3058f0484fSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 3158f0484fSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 3258f0484fSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 3358f0484fSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 3458f0484fSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3558f0484fSRodney W. Grimes * SUCH DAMAGE. 3658f0484fSRodney W. Grimes * 3758f0484fSRodney W. Grimes * @(#)regcomp.c 8.5 (Berkeley) 3/20/94 3858f0484fSRodney W. Grimes */ 3958f0484fSRodney W. Grimes 4058f0484fSRodney W. Grimes #if defined(LIBC_SCCS) && !defined(lint) 4158f0484fSRodney W. Grimes static char sccsid[] = "@(#)regcomp.c 8.5 (Berkeley) 3/20/94"; 4258f0484fSRodney W. Grimes #endif /* LIBC_SCCS and not lint */ 43333fc21eSDavid E. O'Brien #include <sys/cdefs.h> 44333fc21eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 4558f0484fSRodney W. Grimes 4658f0484fSRodney W. Grimes #include <sys/types.h> 4758f0484fSRodney W. Grimes #include <stdio.h> 4858f0484fSRodney W. Grimes #include <string.h> 4958f0484fSRodney W. Grimes #include <ctype.h> 5058f0484fSRodney W. Grimes #include <limits.h> 5158f0484fSRodney W. Grimes #include <stdlib.h> 5258f0484fSRodney W. Grimes #include <regex.h> 5358f0484fSRodney W. Grimes 54c61cea72SAndrey A. Chernov #include "collate.h" 55c61cea72SAndrey A. Chernov 5658f0484fSRodney W. Grimes #include "utils.h" 5758f0484fSRodney W. Grimes #include "regex2.h" 5858f0484fSRodney W. Grimes 5958f0484fSRodney W. Grimes #include "cclass.h" 6058f0484fSRodney W. Grimes #include "cname.h" 6158f0484fSRodney W. Grimes 6258f0484fSRodney W. Grimes /* 6358f0484fSRodney W. Grimes * parse structure, passed up and down to avoid global variables and 6458f0484fSRodney W. Grimes * other clumsinesses 6558f0484fSRodney W. Grimes */ 6658f0484fSRodney W. Grimes struct parse { 6758f0484fSRodney W. Grimes char *next; /* next character in RE */ 6858f0484fSRodney W. Grimes char *end; /* end of string (-> NUL normally) */ 6958f0484fSRodney W. Grimes int error; /* has an error been seen? */ 7058f0484fSRodney W. Grimes sop *strip; /* malloced strip */ 7158f0484fSRodney W. Grimes sopno ssize; /* malloced strip size (allocated) */ 7258f0484fSRodney W. Grimes sopno slen; /* malloced strip length (used) */ 7358f0484fSRodney W. Grimes int ncsalloc; /* number of csets allocated */ 7458f0484fSRodney W. Grimes struct re_guts *g; 7558f0484fSRodney W. Grimes # define NPAREN 10 /* we need to remember () 1-9 for back refs */ 7658f0484fSRodney W. Grimes sopno pbegin[NPAREN]; /* -> ( ([0] unused) */ 7758f0484fSRodney W. Grimes sopno pend[NPAREN]; /* -> ) ([0] unused) */ 7858f0484fSRodney W. Grimes }; 7958f0484fSRodney W. Grimes 8058f0484fSRodney W. Grimes /* ========= begin header generated by ./mkh ========= */ 8158f0484fSRodney W. Grimes #ifdef __cplusplus 8258f0484fSRodney W. Grimes extern "C" { 8358f0484fSRodney W. Grimes #endif 8458f0484fSRodney W. Grimes 8558f0484fSRodney W. Grimes /* === regcomp.c === */ 86c05ac53bSDavid E. O'Brien static void p_ere(struct parse *p, int stop); 87c05ac53bSDavid E. O'Brien static void p_ere_exp(struct parse *p); 88c05ac53bSDavid E. O'Brien static void p_str(struct parse *p); 89c05ac53bSDavid E. O'Brien static void p_bre(struct parse *p, int end1, int end2); 90c05ac53bSDavid E. O'Brien static int p_simp_re(struct parse *p, int starordinary); 91c05ac53bSDavid E. O'Brien static int p_count(struct parse *p); 92c05ac53bSDavid E. O'Brien static void p_bracket(struct parse *p); 93c05ac53bSDavid E. O'Brien static void p_b_term(struct parse *p, cset *cs); 94c05ac53bSDavid E. O'Brien static void p_b_cclass(struct parse *p, cset *cs); 95c05ac53bSDavid E. O'Brien static void p_b_eclass(struct parse *p, cset *cs); 96c05ac53bSDavid E. O'Brien static char p_b_symbol(struct parse *p); 97c05ac53bSDavid E. O'Brien static char p_b_coll_elem(struct parse *p, int endc); 98c05ac53bSDavid E. O'Brien static char othercase(int ch); 99c05ac53bSDavid E. O'Brien static void bothcases(struct parse *p, int ch); 100c05ac53bSDavid E. O'Brien static void ordinary(struct parse *p, int ch); 101c05ac53bSDavid E. O'Brien static void nonnewline(struct parse *p); 102c05ac53bSDavid E. O'Brien static void repeat(struct parse *p, sopno start, int from, int to); 103c05ac53bSDavid E. O'Brien static int seterr(struct parse *p, int e); 104c05ac53bSDavid E. O'Brien static cset *allocset(struct parse *p); 105c05ac53bSDavid E. O'Brien static void freeset(struct parse *p, cset *cs); 106c05ac53bSDavid E. O'Brien static int freezeset(struct parse *p, cset *cs); 107c05ac53bSDavid E. O'Brien static int firstch(struct parse *p, cset *cs); 108c05ac53bSDavid E. O'Brien static int nch(struct parse *p, cset *cs); 109c05ac53bSDavid E. O'Brien static void mcadd(struct parse *p, cset *cs, char *cp); 11051295a4dSJordan K. Hubbard #if used 111c05ac53bSDavid E. O'Brien static void mcsub(cset *cs, char *cp); 112c05ac53bSDavid E. O'Brien static int mcin(cset *cs, char *cp); 113c05ac53bSDavid E. O'Brien static char *mcfind(cset *cs, char *cp); 11451295a4dSJordan K. Hubbard #endif 115c05ac53bSDavid E. O'Brien static void mcinvert(struct parse *p, cset *cs); 116c05ac53bSDavid E. O'Brien static void mccase(struct parse *p, cset *cs); 117c05ac53bSDavid E. O'Brien static int isinsets(struct re_guts *g, int c); 118c05ac53bSDavid E. O'Brien static int samesets(struct re_guts *g, int c1, int c2); 119c05ac53bSDavid E. O'Brien static void categorize(struct parse *p, struct re_guts *g); 120c05ac53bSDavid E. O'Brien static sopno dupl(struct parse *p, sopno start, sopno finish); 121c05ac53bSDavid E. O'Brien static void doemit(struct parse *p, sop op, size_t opnd); 122c05ac53bSDavid E. O'Brien static void doinsert(struct parse *p, sop op, size_t opnd, sopno pos); 123c05ac53bSDavid E. O'Brien static void dofwd(struct parse *p, sopno pos, sop value); 124c05ac53bSDavid E. O'Brien static void enlarge(struct parse *p, sopno size); 125c05ac53bSDavid E. O'Brien static void stripsnug(struct parse *p, struct re_guts *g); 126c05ac53bSDavid E. O'Brien static void findmust(struct parse *p, struct re_guts *g); 127c05ac53bSDavid E. O'Brien static int altoffset(sop *scan, int offset, int mccs); 128c05ac53bSDavid E. O'Brien static void computejumps(struct parse *p, struct re_guts *g); 129c05ac53bSDavid E. O'Brien static void computematchjumps(struct parse *p, struct re_guts *g); 130c05ac53bSDavid E. O'Brien static sopno pluscount(struct parse *p, struct re_guts *g); 13158f0484fSRodney W. Grimes 13258f0484fSRodney W. Grimes #ifdef __cplusplus 13358f0484fSRodney W. Grimes } 13458f0484fSRodney W. Grimes #endif 13558f0484fSRodney W. Grimes /* ========= end header generated by ./mkh ========= */ 13658f0484fSRodney W. Grimes 13758f0484fSRodney W. Grimes static char nuls[10]; /* place to point scanner in event of error */ 13858f0484fSRodney W. Grimes 13958f0484fSRodney W. Grimes /* 14058f0484fSRodney W. Grimes * macros for use with parse structure 14158f0484fSRodney W. Grimes * BEWARE: these know that the parse structure is named `p' !!! 14258f0484fSRodney W. Grimes */ 14358f0484fSRodney W. Grimes #define PEEK() (*p->next) 14458f0484fSRodney W. Grimes #define PEEK2() (*(p->next+1)) 14558f0484fSRodney W. Grimes #define MORE() (p->next < p->end) 14658f0484fSRodney W. Grimes #define MORE2() (p->next+1 < p->end) 14758f0484fSRodney W. Grimes #define SEE(c) (MORE() && PEEK() == (c)) 14858f0484fSRodney W. Grimes #define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b)) 14958f0484fSRodney W. Grimes #define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0) 15058f0484fSRodney W. Grimes #define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0) 15158f0484fSRodney W. Grimes #define NEXT() (p->next++) 15258f0484fSRodney W. Grimes #define NEXT2() (p->next += 2) 15358f0484fSRodney W. Grimes #define NEXTn(n) (p->next += (n)) 15458f0484fSRodney W. Grimes #define GETNEXT() (*p->next++) 15558f0484fSRodney W. Grimes #define SETERROR(e) seterr(p, (e)) 15658f0484fSRodney W. Grimes #define REQUIRE(co, e) ((co) || SETERROR(e)) 15758f0484fSRodney W. Grimes #define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e)) 15858f0484fSRodney W. Grimes #define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e)) 15958f0484fSRodney W. Grimes #define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e)) 16058f0484fSRodney W. Grimes #define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd)) 16158f0484fSRodney W. Grimes #define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos) 16258f0484fSRodney W. Grimes #define AHEAD(pos) dofwd(p, pos, HERE()-(pos)) 16358f0484fSRodney W. Grimes #define ASTERN(sop, pos) EMIT(sop, HERE()-pos) 16458f0484fSRodney W. Grimes #define HERE() (p->slen) 16558f0484fSRodney W. Grimes #define THERE() (p->slen - 1) 16658f0484fSRodney W. Grimes #define THERETHERE() (p->slen - 2) 16758f0484fSRodney W. Grimes #define DROP(n) (p->slen -= (n)) 16858f0484fSRodney W. Grimes 16958f0484fSRodney W. Grimes #ifndef NDEBUG 17058f0484fSRodney W. Grimes static int never = 0; /* for use in asserts; shuts lint up */ 17158f0484fSRodney W. Grimes #else 17258f0484fSRodney W. Grimes #define never 0 /* some <assert.h>s have bugs too */ 17358f0484fSRodney W. Grimes #endif 17458f0484fSRodney W. Grimes 1756049d9f0SDaniel C. Sobral /* Macro used by computejump()/computematchjump() */ 1766049d9f0SDaniel C. Sobral #define MIN(a,b) ((a)<(b)?(a):(b)) 1776049d9f0SDaniel C. Sobral 17858f0484fSRodney W. Grimes /* 17958f0484fSRodney W. Grimes - regcomp - interface for parser and compilation 18058f0484fSRodney W. Grimes = extern int regcomp(regex_t *, const char *, int); 18158f0484fSRodney W. Grimes = #define REG_BASIC 0000 18258f0484fSRodney W. Grimes = #define REG_EXTENDED 0001 18358f0484fSRodney W. Grimes = #define REG_ICASE 0002 18458f0484fSRodney W. Grimes = #define REG_NOSUB 0004 18558f0484fSRodney W. Grimes = #define REG_NEWLINE 0010 18658f0484fSRodney W. Grimes = #define REG_NOSPEC 0020 18758f0484fSRodney W. Grimes = #define REG_PEND 0040 18858f0484fSRodney W. Grimes = #define REG_DUMP 0200 18958f0484fSRodney W. Grimes */ 19058f0484fSRodney W. Grimes int /* 0 success, otherwise REG_something */ 19158f0484fSRodney W. Grimes regcomp(preg, pattern, cflags) 19258f0484fSRodney W. Grimes regex_t *preg; 19358f0484fSRodney W. Grimes const char *pattern; 19458f0484fSRodney W. Grimes int cflags; 19558f0484fSRodney W. Grimes { 19658f0484fSRodney W. Grimes struct parse pa; 1978fb3f3f6SDavid E. O'Brien struct re_guts *g; 1988fb3f3f6SDavid E. O'Brien struct parse *p = &pa; 1998fb3f3f6SDavid E. O'Brien int i; 2008fb3f3f6SDavid E. O'Brien size_t len; 20158f0484fSRodney W. Grimes #ifdef REDEBUG 20258f0484fSRodney W. Grimes # define GOODFLAGS(f) (f) 20358f0484fSRodney W. Grimes #else 20458f0484fSRodney W. Grimes # define GOODFLAGS(f) ((f)&~REG_DUMP) 20558f0484fSRodney W. Grimes #endif 20658f0484fSRodney W. Grimes 20758f0484fSRodney W. Grimes cflags = GOODFLAGS(cflags); 20858f0484fSRodney W. Grimes if ((cflags®_EXTENDED) && (cflags®_NOSPEC)) 20958f0484fSRodney W. Grimes return(REG_INVARG); 21058f0484fSRodney W. Grimes 21158f0484fSRodney W. Grimes if (cflags®_PEND) { 21258f0484fSRodney W. Grimes if (preg->re_endp < pattern) 21358f0484fSRodney W. Grimes return(REG_INVARG); 21458f0484fSRodney W. Grimes len = preg->re_endp - pattern; 21558f0484fSRodney W. Grimes } else 21658f0484fSRodney W. Grimes len = strlen((char *)pattern); 21758f0484fSRodney W. Grimes 21858f0484fSRodney W. Grimes /* do the mallocs early so failure handling is easy */ 21958f0484fSRodney W. Grimes g = (struct re_guts *)malloc(sizeof(struct re_guts) + 22058f0484fSRodney W. Grimes (NC-1)*sizeof(cat_t)); 22158f0484fSRodney W. Grimes if (g == NULL) 22258f0484fSRodney W. Grimes return(REG_ESPACE); 22358f0484fSRodney W. Grimes p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */ 22458f0484fSRodney W. Grimes p->strip = (sop *)malloc(p->ssize * sizeof(sop)); 22558f0484fSRodney W. Grimes p->slen = 0; 22658f0484fSRodney W. Grimes if (p->strip == NULL) { 22758f0484fSRodney W. Grimes free((char *)g); 22858f0484fSRodney W. Grimes return(REG_ESPACE); 22958f0484fSRodney W. Grimes } 23058f0484fSRodney W. Grimes 23158f0484fSRodney W. Grimes /* set things up */ 23258f0484fSRodney W. Grimes p->g = g; 23358f0484fSRodney W. Grimes p->next = (char *)pattern; /* convenience; we do not modify it */ 23458f0484fSRodney W. Grimes p->end = p->next + len; 23558f0484fSRodney W. Grimes p->error = 0; 23658f0484fSRodney W. Grimes p->ncsalloc = 0; 23758f0484fSRodney W. Grimes for (i = 0; i < NPAREN; i++) { 23858f0484fSRodney W. Grimes p->pbegin[i] = 0; 23958f0484fSRodney W. Grimes p->pend[i] = 0; 24058f0484fSRodney W. Grimes } 24158f0484fSRodney W. Grimes g->csetsize = NC; 24258f0484fSRodney W. Grimes g->sets = NULL; 24358f0484fSRodney W. Grimes g->setbits = NULL; 24458f0484fSRodney W. Grimes g->ncsets = 0; 24558f0484fSRodney W. Grimes g->cflags = cflags; 24658f0484fSRodney W. Grimes g->iflags = 0; 24758f0484fSRodney W. Grimes g->nbol = 0; 24858f0484fSRodney W. Grimes g->neol = 0; 24958f0484fSRodney W. Grimes g->must = NULL; 250e6a886d8SDaniel C. Sobral g->moffset = -1; 2516b709b74SDaniel C. Sobral g->charjump = NULL; 2526b709b74SDaniel C. Sobral g->matchjump = NULL; 25358f0484fSRodney W. Grimes g->mlen = 0; 25458f0484fSRodney W. Grimes g->nsub = 0; 25558f0484fSRodney W. Grimes g->ncategories = 1; /* category 0 is "everything else" */ 25658f0484fSRodney W. Grimes g->categories = &g->catspace[-(CHAR_MIN)]; 25758f0484fSRodney W. Grimes (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t)); 25858f0484fSRodney W. Grimes g->backrefs = 0; 25958f0484fSRodney W. Grimes 26058f0484fSRodney W. Grimes /* do it */ 26158f0484fSRodney W. Grimes EMIT(OEND, 0); 26258f0484fSRodney W. Grimes g->firststate = THERE(); 26358f0484fSRodney W. Grimes if (cflags®_EXTENDED) 26458f0484fSRodney W. Grimes p_ere(p, OUT); 26558f0484fSRodney W. Grimes else if (cflags®_NOSPEC) 26658f0484fSRodney W. Grimes p_str(p); 26758f0484fSRodney W. Grimes else 26858f0484fSRodney W. Grimes p_bre(p, OUT, OUT); 26958f0484fSRodney W. Grimes EMIT(OEND, 0); 27058f0484fSRodney W. Grimes g->laststate = THERE(); 27158f0484fSRodney W. Grimes 27258f0484fSRodney W. Grimes /* tidy up loose ends and fill things in */ 27358f0484fSRodney W. Grimes categorize(p, g); 27458f0484fSRodney W. Grimes stripsnug(p, g); 27558f0484fSRodney W. Grimes findmust(p, g); 2766049d9f0SDaniel C. Sobral /* only use Boyer-Moore algorithm if the pattern is bigger 2776049d9f0SDaniel C. Sobral * than three characters 2786049d9f0SDaniel C. Sobral */ 2796049d9f0SDaniel C. Sobral if(g->mlen > 3) { 2806049d9f0SDaniel C. Sobral computejumps(p, g); 2816049d9f0SDaniel C. Sobral computematchjumps(p, g); 2824d41cae8SDaniel C. Sobral if(g->matchjump == NULL && g->charjump != NULL) { 2836049d9f0SDaniel C. Sobral free(g->charjump); 2846049d9f0SDaniel C. Sobral g->charjump = NULL; 2856049d9f0SDaniel C. Sobral } 2866049d9f0SDaniel C. Sobral } 28758f0484fSRodney W. Grimes g->nplus = pluscount(p, g); 28858f0484fSRodney W. Grimes g->magic = MAGIC2; 28958f0484fSRodney W. Grimes preg->re_nsub = g->nsub; 29058f0484fSRodney W. Grimes preg->re_g = g; 29158f0484fSRodney W. Grimes preg->re_magic = MAGIC1; 29258f0484fSRodney W. Grimes #ifndef REDEBUG 29358f0484fSRodney W. Grimes /* not debugging, so can't rely on the assert() in regexec() */ 29458f0484fSRodney W. Grimes if (g->iflags&BAD) 29558f0484fSRodney W. Grimes SETERROR(REG_ASSERT); 29658f0484fSRodney W. Grimes #endif 29758f0484fSRodney W. Grimes 29858f0484fSRodney W. Grimes /* win or lose, we're done */ 29958f0484fSRodney W. Grimes if (p->error != 0) /* lose */ 30058f0484fSRodney W. Grimes regfree(preg); 30158f0484fSRodney W. Grimes return(p->error); 30258f0484fSRodney W. Grimes } 30358f0484fSRodney W. Grimes 30458f0484fSRodney W. Grimes /* 30558f0484fSRodney W. Grimes - p_ere - ERE parser top level, concatenation and alternation 3068fb3f3f6SDavid E. O'Brien == static void p_ere(struct parse *p, int stop); 30758f0484fSRodney W. Grimes */ 30858f0484fSRodney W. Grimes static void 30958f0484fSRodney W. Grimes p_ere(p, stop) 3108fb3f3f6SDavid E. O'Brien struct parse *p; 31158f0484fSRodney W. Grimes int stop; /* character this ERE should end at */ 31258f0484fSRodney W. Grimes { 3138fb3f3f6SDavid E. O'Brien char c; 3148fb3f3f6SDavid E. O'Brien sopno prevback; 3158fb3f3f6SDavid E. O'Brien sopno prevfwd; 3168fb3f3f6SDavid E. O'Brien sopno conc; 3178fb3f3f6SDavid E. O'Brien int first = 1; /* is this the first alternative? */ 31858f0484fSRodney W. Grimes 31958f0484fSRodney W. Grimes for (;;) { 32058f0484fSRodney W. Grimes /* do a bunch of concatenated expressions */ 32158f0484fSRodney W. Grimes conc = HERE(); 32258f0484fSRodney W. Grimes while (MORE() && (c = PEEK()) != '|' && c != stop) 32358f0484fSRodney W. Grimes p_ere_exp(p); 32451295a4dSJordan K. Hubbard (void)REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */ 32558f0484fSRodney W. Grimes 32658f0484fSRodney W. Grimes if (!EAT('|')) 32758f0484fSRodney W. Grimes break; /* NOTE BREAK OUT */ 32858f0484fSRodney W. Grimes 32958f0484fSRodney W. Grimes if (first) { 33058f0484fSRodney W. Grimes INSERT(OCH_, conc); /* offset is wrong */ 33158f0484fSRodney W. Grimes prevfwd = conc; 33258f0484fSRodney W. Grimes prevback = conc; 33358f0484fSRodney W. Grimes first = 0; 33458f0484fSRodney W. Grimes } 33558f0484fSRodney W. Grimes ASTERN(OOR1, prevback); 33658f0484fSRodney W. Grimes prevback = THERE(); 33758f0484fSRodney W. Grimes AHEAD(prevfwd); /* fix previous offset */ 33858f0484fSRodney W. Grimes prevfwd = HERE(); 33958f0484fSRodney W. Grimes EMIT(OOR2, 0); /* offset is very wrong */ 34058f0484fSRodney W. Grimes } 34158f0484fSRodney W. Grimes 34258f0484fSRodney W. Grimes if (!first) { /* tail-end fixups */ 34358f0484fSRodney W. Grimes AHEAD(prevfwd); 34458f0484fSRodney W. Grimes ASTERN(O_CH, prevback); 34558f0484fSRodney W. Grimes } 34658f0484fSRodney W. Grimes 34758f0484fSRodney W. Grimes assert(!MORE() || SEE(stop)); 34858f0484fSRodney W. Grimes } 34958f0484fSRodney W. Grimes 35058f0484fSRodney W. Grimes /* 35158f0484fSRodney W. Grimes - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op 3528fb3f3f6SDavid E. O'Brien == static void p_ere_exp(struct parse *p); 35358f0484fSRodney W. Grimes */ 35458f0484fSRodney W. Grimes static void 35558f0484fSRodney W. Grimes p_ere_exp(p) 3568fb3f3f6SDavid E. O'Brien struct parse *p; 35758f0484fSRodney W. Grimes { 3588fb3f3f6SDavid E. O'Brien char c; 3598fb3f3f6SDavid E. O'Brien sopno pos; 3608fb3f3f6SDavid E. O'Brien int count; 3618fb3f3f6SDavid E. O'Brien int count2; 3628fb3f3f6SDavid E. O'Brien sopno subno; 36358f0484fSRodney W. Grimes int wascaret = 0; 36458f0484fSRodney W. Grimes 36558f0484fSRodney W. Grimes assert(MORE()); /* caller should have ensured this */ 36658f0484fSRodney W. Grimes c = GETNEXT(); 36758f0484fSRodney W. Grimes 36858f0484fSRodney W. Grimes pos = HERE(); 36958f0484fSRodney W. Grimes switch (c) { 37058f0484fSRodney W. Grimes case '(': 37151295a4dSJordan K. Hubbard (void)REQUIRE(MORE(), REG_EPAREN); 37258f0484fSRodney W. Grimes p->g->nsub++; 37358f0484fSRodney W. Grimes subno = p->g->nsub; 37458f0484fSRodney W. Grimes if (subno < NPAREN) 37558f0484fSRodney W. Grimes p->pbegin[subno] = HERE(); 37658f0484fSRodney W. Grimes EMIT(OLPAREN, subno); 37758f0484fSRodney W. Grimes if (!SEE(')')) 37858f0484fSRodney W. Grimes p_ere(p, ')'); 37958f0484fSRodney W. Grimes if (subno < NPAREN) { 38058f0484fSRodney W. Grimes p->pend[subno] = HERE(); 38158f0484fSRodney W. Grimes assert(p->pend[subno] != 0); 38258f0484fSRodney W. Grimes } 38358f0484fSRodney W. Grimes EMIT(ORPAREN, subno); 38451295a4dSJordan K. Hubbard (void)MUSTEAT(')', REG_EPAREN); 38558f0484fSRodney W. Grimes break; 38658f0484fSRodney W. Grimes #ifndef POSIX_MISTAKE 38758f0484fSRodney W. Grimes case ')': /* happens only if no current unmatched ( */ 38858f0484fSRodney W. Grimes /* 38958f0484fSRodney W. Grimes * You may ask, why the ifndef? Because I didn't notice 39058f0484fSRodney W. Grimes * this until slightly too late for 1003.2, and none of the 39158f0484fSRodney W. Grimes * other 1003.2 regular-expression reviewers noticed it at 39258f0484fSRodney W. Grimes * all. So an unmatched ) is legal POSIX, at least until 39358f0484fSRodney W. Grimes * we can get it fixed. 39458f0484fSRodney W. Grimes */ 39558f0484fSRodney W. Grimes SETERROR(REG_EPAREN); 39658f0484fSRodney W. Grimes break; 39758f0484fSRodney W. Grimes #endif 39858f0484fSRodney W. Grimes case '^': 39958f0484fSRodney W. Grimes EMIT(OBOL, 0); 40058f0484fSRodney W. Grimes p->g->iflags |= USEBOL; 40158f0484fSRodney W. Grimes p->g->nbol++; 40258f0484fSRodney W. Grimes wascaret = 1; 40358f0484fSRodney W. Grimes break; 40458f0484fSRodney W. Grimes case '$': 40558f0484fSRodney W. Grimes EMIT(OEOL, 0); 40658f0484fSRodney W. Grimes p->g->iflags |= USEEOL; 40758f0484fSRodney W. Grimes p->g->neol++; 40858f0484fSRodney W. Grimes break; 40958f0484fSRodney W. Grimes case '|': 41058f0484fSRodney W. Grimes SETERROR(REG_EMPTY); 41158f0484fSRodney W. Grimes break; 41258f0484fSRodney W. Grimes case '*': 41358f0484fSRodney W. Grimes case '+': 41458f0484fSRodney W. Grimes case '?': 41558f0484fSRodney W. Grimes SETERROR(REG_BADRPT); 41658f0484fSRodney W. Grimes break; 41758f0484fSRodney W. Grimes case '.': 41858f0484fSRodney W. Grimes if (p->g->cflags®_NEWLINE) 41958f0484fSRodney W. Grimes nonnewline(p); 42058f0484fSRodney W. Grimes else 42158f0484fSRodney W. Grimes EMIT(OANY, 0); 42258f0484fSRodney W. Grimes break; 42358f0484fSRodney W. Grimes case '[': 42458f0484fSRodney W. Grimes p_bracket(p); 42558f0484fSRodney W. Grimes break; 42658f0484fSRodney W. Grimes case '\\': 42751295a4dSJordan K. Hubbard (void)REQUIRE(MORE(), REG_EESCAPE); 42858f0484fSRodney W. Grimes c = GETNEXT(); 42958f0484fSRodney W. Grimes ordinary(p, c); 43058f0484fSRodney W. Grimes break; 43158f0484fSRodney W. Grimes case '{': /* okay as ordinary except if digit follows */ 43289b86d02SAndrey A. Chernov (void)REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT); 43358f0484fSRodney W. Grimes /* FALLTHROUGH */ 43458f0484fSRodney W. Grimes default: 43558f0484fSRodney W. Grimes ordinary(p, c); 43658f0484fSRodney W. Grimes break; 43758f0484fSRodney W. Grimes } 43858f0484fSRodney W. Grimes 43958f0484fSRodney W. Grimes if (!MORE()) 44058f0484fSRodney W. Grimes return; 44158f0484fSRodney W. Grimes c = PEEK(); 44258f0484fSRodney W. Grimes /* we call { a repetition if followed by a digit */ 44358f0484fSRodney W. Grimes if (!( c == '*' || c == '+' || c == '?' || 44489b86d02SAndrey A. Chernov (c == '{' && MORE2() && isdigit((uch)PEEK2())) )) 44558f0484fSRodney W. Grimes return; /* no repetition, we're done */ 44658f0484fSRodney W. Grimes NEXT(); 44758f0484fSRodney W. Grimes 44851295a4dSJordan K. Hubbard (void)REQUIRE(!wascaret, REG_BADRPT); 44958f0484fSRodney W. Grimes switch (c) { 45058f0484fSRodney W. Grimes case '*': /* implemented as +? */ 45158f0484fSRodney W. Grimes /* this case does not require the (y|) trick, noKLUDGE */ 45258f0484fSRodney W. Grimes INSERT(OPLUS_, pos); 45358f0484fSRodney W. Grimes ASTERN(O_PLUS, pos); 45458f0484fSRodney W. Grimes INSERT(OQUEST_, pos); 45558f0484fSRodney W. Grimes ASTERN(O_QUEST, pos); 45658f0484fSRodney W. Grimes break; 45758f0484fSRodney W. Grimes case '+': 45858f0484fSRodney W. Grimes INSERT(OPLUS_, pos); 45958f0484fSRodney W. Grimes ASTERN(O_PLUS, pos); 46058f0484fSRodney W. Grimes break; 46158f0484fSRodney W. Grimes case '?': 46258f0484fSRodney W. Grimes /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ 46358f0484fSRodney W. Grimes INSERT(OCH_, pos); /* offset slightly wrong */ 46458f0484fSRodney W. Grimes ASTERN(OOR1, pos); /* this one's right */ 46558f0484fSRodney W. Grimes AHEAD(pos); /* fix the OCH_ */ 46658f0484fSRodney W. Grimes EMIT(OOR2, 0); /* offset very wrong... */ 46758f0484fSRodney W. Grimes AHEAD(THERE()); /* ...so fix it */ 46858f0484fSRodney W. Grimes ASTERN(O_CH, THERETHERE()); 46958f0484fSRodney W. Grimes break; 47058f0484fSRodney W. Grimes case '{': 47158f0484fSRodney W. Grimes count = p_count(p); 47258f0484fSRodney W. Grimes if (EAT(',')) { 47389b86d02SAndrey A. Chernov if (isdigit((uch)PEEK())) { 47458f0484fSRodney W. Grimes count2 = p_count(p); 47551295a4dSJordan K. Hubbard (void)REQUIRE(count <= count2, REG_BADBR); 47658f0484fSRodney W. Grimes } else /* single number with comma */ 47758f0484fSRodney W. Grimes count2 = INFINITY; 47858f0484fSRodney W. Grimes } else /* just a single number */ 47958f0484fSRodney W. Grimes count2 = count; 48058f0484fSRodney W. Grimes repeat(p, pos, count, count2); 48158f0484fSRodney W. Grimes if (!EAT('}')) { /* error heuristics */ 48258f0484fSRodney W. Grimes while (MORE() && PEEK() != '}') 48358f0484fSRodney W. Grimes NEXT(); 48451295a4dSJordan K. Hubbard (void)REQUIRE(MORE(), REG_EBRACE); 48558f0484fSRodney W. Grimes SETERROR(REG_BADBR); 48658f0484fSRodney W. Grimes } 48758f0484fSRodney W. Grimes break; 48858f0484fSRodney W. Grimes } 48958f0484fSRodney W. Grimes 49058f0484fSRodney W. Grimes if (!MORE()) 49158f0484fSRodney W. Grimes return; 49258f0484fSRodney W. Grimes c = PEEK(); 49358f0484fSRodney W. Grimes if (!( c == '*' || c == '+' || c == '?' || 49489b86d02SAndrey A. Chernov (c == '{' && MORE2() && isdigit((uch)PEEK2())) ) ) 49558f0484fSRodney W. Grimes return; 49658f0484fSRodney W. Grimes SETERROR(REG_BADRPT); 49758f0484fSRodney W. Grimes } 49858f0484fSRodney W. Grimes 49958f0484fSRodney W. Grimes /* 50058f0484fSRodney W. Grimes - p_str - string (no metacharacters) "parser" 5018fb3f3f6SDavid E. O'Brien == static void p_str(struct parse *p); 50258f0484fSRodney W. Grimes */ 50358f0484fSRodney W. Grimes static void 50458f0484fSRodney W. Grimes p_str(p) 5058fb3f3f6SDavid E. O'Brien struct parse *p; 50658f0484fSRodney W. Grimes { 50751295a4dSJordan K. Hubbard (void)REQUIRE(MORE(), REG_EMPTY); 50858f0484fSRodney W. Grimes while (MORE()) 50958f0484fSRodney W. Grimes ordinary(p, GETNEXT()); 51058f0484fSRodney W. Grimes } 51158f0484fSRodney W. Grimes 51258f0484fSRodney W. Grimes /* 51358f0484fSRodney W. Grimes - p_bre - BRE parser top level, anchoring and concatenation 5148fb3f3f6SDavid E. O'Brien == static void p_bre(struct parse *p, int end1, \ 5158fb3f3f6SDavid E. O'Brien == int end2); 51658f0484fSRodney W. Grimes * Giving end1 as OUT essentially eliminates the end1/end2 check. 51758f0484fSRodney W. Grimes * 51858f0484fSRodney W. Grimes * This implementation is a bit of a kludge, in that a trailing $ is first 51958f0484fSRodney W. Grimes * taken as an ordinary character and then revised to be an anchor. The 52058f0484fSRodney W. Grimes * only undesirable side effect is that '$' gets included as a character 52158f0484fSRodney W. Grimes * category in such cases. This is fairly harmless; not worth fixing. 52258f0484fSRodney W. Grimes * The amount of lookahead needed to avoid this kludge is excessive. 52358f0484fSRodney W. Grimes */ 52458f0484fSRodney W. Grimes static void 52558f0484fSRodney W. Grimes p_bre(p, end1, end2) 5268fb3f3f6SDavid E. O'Brien struct parse *p; 5278fb3f3f6SDavid E. O'Brien int end1; /* first terminating character */ 5288fb3f3f6SDavid E. O'Brien int end2; /* second terminating character */ 52958f0484fSRodney W. Grimes { 5308fb3f3f6SDavid E. O'Brien sopno start = HERE(); 5318fb3f3f6SDavid E. O'Brien int first = 1; /* first subexpression? */ 5328fb3f3f6SDavid E. O'Brien int wasdollar = 0; 53358f0484fSRodney W. Grimes 53458f0484fSRodney W. Grimes if (EAT('^')) { 53558f0484fSRodney W. Grimes EMIT(OBOL, 0); 53658f0484fSRodney W. Grimes p->g->iflags |= USEBOL; 53758f0484fSRodney W. Grimes p->g->nbol++; 53858f0484fSRodney W. Grimes } 53958f0484fSRodney W. Grimes while (MORE() && !SEETWO(end1, end2)) { 54058f0484fSRodney W. Grimes wasdollar = p_simp_re(p, first); 54158f0484fSRodney W. Grimes first = 0; 54258f0484fSRodney W. Grimes } 54358f0484fSRodney W. Grimes if (wasdollar) { /* oops, that was a trailing anchor */ 54458f0484fSRodney W. Grimes DROP(1); 54558f0484fSRodney W. Grimes EMIT(OEOL, 0); 54658f0484fSRodney W. Grimes p->g->iflags |= USEEOL; 54758f0484fSRodney W. Grimes p->g->neol++; 54858f0484fSRodney W. Grimes } 54958f0484fSRodney W. Grimes 55051295a4dSJordan K. Hubbard (void)REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */ 55158f0484fSRodney W. Grimes } 55258f0484fSRodney W. Grimes 55358f0484fSRodney W. Grimes /* 55458f0484fSRodney W. Grimes - p_simp_re - parse a simple RE, an atom possibly followed by a repetition 5558fb3f3f6SDavid E. O'Brien == static int p_simp_re(struct parse *p, int starordinary); 55658f0484fSRodney W. Grimes */ 55758f0484fSRodney W. Grimes static int /* was the simple RE an unbackslashed $? */ 55858f0484fSRodney W. Grimes p_simp_re(p, starordinary) 5598fb3f3f6SDavid E. O'Brien struct parse *p; 56058f0484fSRodney W. Grimes int starordinary; /* is a leading * an ordinary character? */ 56158f0484fSRodney W. Grimes { 5628fb3f3f6SDavid E. O'Brien int c; 5638fb3f3f6SDavid E. O'Brien int count; 5648fb3f3f6SDavid E. O'Brien int count2; 5658fb3f3f6SDavid E. O'Brien sopno pos; 5668fb3f3f6SDavid E. O'Brien int i; 5678fb3f3f6SDavid E. O'Brien sopno subno; 56858f0484fSRodney W. Grimes # define BACKSL (1<<CHAR_BIT) 56958f0484fSRodney W. Grimes 57058f0484fSRodney W. Grimes pos = HERE(); /* repetion op, if any, covers from here */ 57158f0484fSRodney W. Grimes 57258f0484fSRodney W. Grimes assert(MORE()); /* caller should have ensured this */ 57358f0484fSRodney W. Grimes c = GETNEXT(); 57458f0484fSRodney W. Grimes if (c == '\\') { 57551295a4dSJordan K. Hubbard (void)REQUIRE(MORE(), REG_EESCAPE); 57689b86d02SAndrey A. Chernov c = BACKSL | GETNEXT(); 57758f0484fSRodney W. Grimes } 57858f0484fSRodney W. Grimes switch (c) { 57958f0484fSRodney W. Grimes case '.': 58058f0484fSRodney W. Grimes if (p->g->cflags®_NEWLINE) 58158f0484fSRodney W. Grimes nonnewline(p); 58258f0484fSRodney W. Grimes else 58358f0484fSRodney W. Grimes EMIT(OANY, 0); 58458f0484fSRodney W. Grimes break; 58558f0484fSRodney W. Grimes case '[': 58658f0484fSRodney W. Grimes p_bracket(p); 58758f0484fSRodney W. Grimes break; 58858f0484fSRodney W. Grimes case BACKSL|'{': 58958f0484fSRodney W. Grimes SETERROR(REG_BADRPT); 59058f0484fSRodney W. Grimes break; 59158f0484fSRodney W. Grimes case BACKSL|'(': 59258f0484fSRodney W. Grimes p->g->nsub++; 59358f0484fSRodney W. Grimes subno = p->g->nsub; 59458f0484fSRodney W. Grimes if (subno < NPAREN) 59558f0484fSRodney W. Grimes p->pbegin[subno] = HERE(); 59658f0484fSRodney W. Grimes EMIT(OLPAREN, subno); 59758f0484fSRodney W. Grimes /* the MORE here is an error heuristic */ 59858f0484fSRodney W. Grimes if (MORE() && !SEETWO('\\', ')')) 59958f0484fSRodney W. Grimes p_bre(p, '\\', ')'); 60058f0484fSRodney W. Grimes if (subno < NPAREN) { 60158f0484fSRodney W. Grimes p->pend[subno] = HERE(); 60258f0484fSRodney W. Grimes assert(p->pend[subno] != 0); 60358f0484fSRodney W. Grimes } 60458f0484fSRodney W. Grimes EMIT(ORPAREN, subno); 60551295a4dSJordan K. Hubbard (void)REQUIRE(EATTWO('\\', ')'), REG_EPAREN); 60658f0484fSRodney W. Grimes break; 60758f0484fSRodney W. Grimes case BACKSL|')': /* should not get here -- must be user */ 60858f0484fSRodney W. Grimes case BACKSL|'}': 60958f0484fSRodney W. Grimes SETERROR(REG_EPAREN); 61058f0484fSRodney W. Grimes break; 61158f0484fSRodney W. Grimes case BACKSL|'1': 61258f0484fSRodney W. Grimes case BACKSL|'2': 61358f0484fSRodney W. Grimes case BACKSL|'3': 61458f0484fSRodney W. Grimes case BACKSL|'4': 61558f0484fSRodney W. Grimes case BACKSL|'5': 61658f0484fSRodney W. Grimes case BACKSL|'6': 61758f0484fSRodney W. Grimes case BACKSL|'7': 61858f0484fSRodney W. Grimes case BACKSL|'8': 61958f0484fSRodney W. Grimes case BACKSL|'9': 62058f0484fSRodney W. Grimes i = (c&~BACKSL) - '0'; 62158f0484fSRodney W. Grimes assert(i < NPAREN); 62258f0484fSRodney W. Grimes if (p->pend[i] != 0) { 62358f0484fSRodney W. Grimes assert(i <= p->g->nsub); 62458f0484fSRodney W. Grimes EMIT(OBACK_, i); 62558f0484fSRodney W. Grimes assert(p->pbegin[i] != 0); 62658f0484fSRodney W. Grimes assert(OP(p->strip[p->pbegin[i]]) == OLPAREN); 62758f0484fSRodney W. Grimes assert(OP(p->strip[p->pend[i]]) == ORPAREN); 62858f0484fSRodney W. Grimes (void) dupl(p, p->pbegin[i]+1, p->pend[i]); 62958f0484fSRodney W. Grimes EMIT(O_BACK, i); 63058f0484fSRodney W. Grimes } else 63158f0484fSRodney W. Grimes SETERROR(REG_ESUBREG); 63258f0484fSRodney W. Grimes p->g->backrefs = 1; 63358f0484fSRodney W. Grimes break; 63458f0484fSRodney W. Grimes case '*': 63551295a4dSJordan K. Hubbard (void)REQUIRE(starordinary, REG_BADRPT); 63658f0484fSRodney W. Grimes /* FALLTHROUGH */ 63758f0484fSRodney W. Grimes default: 63889b86d02SAndrey A. Chernov ordinary(p, (char)c); 63958f0484fSRodney W. Grimes break; 64058f0484fSRodney W. Grimes } 64158f0484fSRodney W. Grimes 64258f0484fSRodney W. Grimes if (EAT('*')) { /* implemented as +? */ 64358f0484fSRodney W. Grimes /* this case does not require the (y|) trick, noKLUDGE */ 64458f0484fSRodney W. Grimes INSERT(OPLUS_, pos); 64558f0484fSRodney W. Grimes ASTERN(O_PLUS, pos); 64658f0484fSRodney W. Grimes INSERT(OQUEST_, pos); 64758f0484fSRodney W. Grimes ASTERN(O_QUEST, pos); 64858f0484fSRodney W. Grimes } else if (EATTWO('\\', '{')) { 64958f0484fSRodney W. Grimes count = p_count(p); 65058f0484fSRodney W. Grimes if (EAT(',')) { 65189b86d02SAndrey A. Chernov if (MORE() && isdigit((uch)PEEK())) { 65258f0484fSRodney W. Grimes count2 = p_count(p); 65351295a4dSJordan K. Hubbard (void)REQUIRE(count <= count2, REG_BADBR); 65458f0484fSRodney W. Grimes } else /* single number with comma */ 65558f0484fSRodney W. Grimes count2 = INFINITY; 65658f0484fSRodney W. Grimes } else /* just a single number */ 65758f0484fSRodney W. Grimes count2 = count; 65858f0484fSRodney W. Grimes repeat(p, pos, count, count2); 65958f0484fSRodney W. Grimes if (!EATTWO('\\', '}')) { /* error heuristics */ 66058f0484fSRodney W. Grimes while (MORE() && !SEETWO('\\', '}')) 66158f0484fSRodney W. Grimes NEXT(); 66251295a4dSJordan K. Hubbard (void)REQUIRE(MORE(), REG_EBRACE); 66358f0484fSRodney W. Grimes SETERROR(REG_BADBR); 66458f0484fSRodney W. Grimes } 66589b86d02SAndrey A. Chernov } else if (c == '$') /* $ (but not \$) ends it */ 66658f0484fSRodney W. Grimes return(1); 66758f0484fSRodney W. Grimes 66858f0484fSRodney W. Grimes return(0); 66958f0484fSRodney W. Grimes } 67058f0484fSRodney W. Grimes 67158f0484fSRodney W. Grimes /* 67258f0484fSRodney W. Grimes - p_count - parse a repetition count 6738fb3f3f6SDavid E. O'Brien == static int p_count(struct parse *p); 67458f0484fSRodney W. Grimes */ 67558f0484fSRodney W. Grimes static int /* the value */ 67658f0484fSRodney W. Grimes p_count(p) 6778fb3f3f6SDavid E. O'Brien struct parse *p; 67858f0484fSRodney W. Grimes { 6798fb3f3f6SDavid E. O'Brien int count = 0; 6808fb3f3f6SDavid E. O'Brien int ndigits = 0; 68158f0484fSRodney W. Grimes 68289b86d02SAndrey A. Chernov while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) { 68358f0484fSRodney W. Grimes count = count*10 + (GETNEXT() - '0'); 68458f0484fSRodney W. Grimes ndigits++; 68558f0484fSRodney W. Grimes } 68658f0484fSRodney W. Grimes 68751295a4dSJordan K. Hubbard (void)REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR); 68858f0484fSRodney W. Grimes return(count); 68958f0484fSRodney W. Grimes } 69058f0484fSRodney W. Grimes 69158f0484fSRodney W. Grimes /* 69258f0484fSRodney W. Grimes - p_bracket - parse a bracketed character list 6938fb3f3f6SDavid E. O'Brien == static void p_bracket(struct parse *p); 69458f0484fSRodney W. Grimes * 69558f0484fSRodney W. Grimes * Note a significant property of this code: if the allocset() did SETERROR, 69658f0484fSRodney W. Grimes * no set operations are done. 69758f0484fSRodney W. Grimes */ 69858f0484fSRodney W. Grimes static void 69958f0484fSRodney W. Grimes p_bracket(p) 7008fb3f3f6SDavid E. O'Brien struct parse *p; 70158f0484fSRodney W. Grimes { 7028fb3f3f6SDavid E. O'Brien cset *cs = allocset(p); 7038fb3f3f6SDavid E. O'Brien int invert = 0; 70458f0484fSRodney W. Grimes 70558f0484fSRodney W. Grimes /* Dept of Truly Sickening Special-Case Kludges */ 70658f0484fSRodney W. Grimes if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) { 70758f0484fSRodney W. Grimes EMIT(OBOW, 0); 70858f0484fSRodney W. Grimes NEXTn(6); 70958f0484fSRodney W. Grimes return; 71058f0484fSRodney W. Grimes } 71158f0484fSRodney W. Grimes if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) { 71258f0484fSRodney W. Grimes EMIT(OEOW, 0); 71358f0484fSRodney W. Grimes NEXTn(6); 71458f0484fSRodney W. Grimes return; 71558f0484fSRodney W. Grimes } 71658f0484fSRodney W. Grimes 71758f0484fSRodney W. Grimes if (EAT('^')) 71858f0484fSRodney W. Grimes invert++; /* make note to invert set at end */ 71958f0484fSRodney W. Grimes if (EAT(']')) 72058f0484fSRodney W. Grimes CHadd(cs, ']'); 72158f0484fSRodney W. Grimes else if (EAT('-')) 72258f0484fSRodney W. Grimes CHadd(cs, '-'); 72358f0484fSRodney W. Grimes while (MORE() && PEEK() != ']' && !SEETWO('-', ']')) 72458f0484fSRodney W. Grimes p_b_term(p, cs); 72558f0484fSRodney W. Grimes if (EAT('-')) 72658f0484fSRodney W. Grimes CHadd(cs, '-'); 72751295a4dSJordan K. Hubbard (void)MUSTEAT(']', REG_EBRACK); 72858f0484fSRodney W. Grimes 72958f0484fSRodney W. Grimes if (p->error != 0) /* don't mess things up further */ 73058f0484fSRodney W. Grimes return; 73158f0484fSRodney W. Grimes 73258f0484fSRodney W. Grimes if (p->g->cflags®_ICASE) { 7338fb3f3f6SDavid E. O'Brien int i; 7348fb3f3f6SDavid E. O'Brien int ci; 73558f0484fSRodney W. Grimes 73658f0484fSRodney W. Grimes for (i = p->g->csetsize - 1; i >= 0; i--) 73758f0484fSRodney W. Grimes if (CHIN(cs, i) && isalpha(i)) { 73858f0484fSRodney W. Grimes ci = othercase(i); 73958f0484fSRodney W. Grimes if (ci != i) 74058f0484fSRodney W. Grimes CHadd(cs, ci); 74158f0484fSRodney W. Grimes } 74258f0484fSRodney W. Grimes if (cs->multis != NULL) 74358f0484fSRodney W. Grimes mccase(p, cs); 74458f0484fSRodney W. Grimes } 74558f0484fSRodney W. Grimes if (invert) { 7468fb3f3f6SDavid E. O'Brien int i; 74758f0484fSRodney W. Grimes 74858f0484fSRodney W. Grimes for (i = p->g->csetsize - 1; i >= 0; i--) 74958f0484fSRodney W. Grimes if (CHIN(cs, i)) 75058f0484fSRodney W. Grimes CHsub(cs, i); 75158f0484fSRodney W. Grimes else 75258f0484fSRodney W. Grimes CHadd(cs, i); 75358f0484fSRodney W. Grimes if (p->g->cflags®_NEWLINE) 75458f0484fSRodney W. Grimes CHsub(cs, '\n'); 75558f0484fSRodney W. Grimes if (cs->multis != NULL) 75658f0484fSRodney W. Grimes mcinvert(p, cs); 75758f0484fSRodney W. Grimes } 75858f0484fSRodney W. Grimes 75958f0484fSRodney W. Grimes assert(cs->multis == NULL); /* xxx */ 76058f0484fSRodney W. Grimes 76158f0484fSRodney W. Grimes if (nch(p, cs) == 1) { /* optimize singleton sets */ 76258f0484fSRodney W. Grimes ordinary(p, firstch(p, cs)); 76358f0484fSRodney W. Grimes freeset(p, cs); 76458f0484fSRodney W. Grimes } else 76558f0484fSRodney W. Grimes EMIT(OANYOF, freezeset(p, cs)); 76658f0484fSRodney W. Grimes } 76758f0484fSRodney W. Grimes 76858f0484fSRodney W. Grimes /* 76958f0484fSRodney W. Grimes - p_b_term - parse one term of a bracketed character list 7708fb3f3f6SDavid E. O'Brien == static void p_b_term(struct parse *p, cset *cs); 77158f0484fSRodney W. Grimes */ 77258f0484fSRodney W. Grimes static void 77358f0484fSRodney W. Grimes p_b_term(p, cs) 7748fb3f3f6SDavid E. O'Brien struct parse *p; 7758fb3f3f6SDavid E. O'Brien cset *cs; 77658f0484fSRodney W. Grimes { 7778fb3f3f6SDavid E. O'Brien char c; 7788fb3f3f6SDavid E. O'Brien char start, finish; 7798fb3f3f6SDavid E. O'Brien int i; 78058f0484fSRodney W. Grimes 78158f0484fSRodney W. Grimes /* classify what we've got */ 78258f0484fSRodney W. Grimes switch ((MORE()) ? PEEK() : '\0') { 78358f0484fSRodney W. Grimes case '[': 78458f0484fSRodney W. Grimes c = (MORE2()) ? PEEK2() : '\0'; 78558f0484fSRodney W. Grimes break; 78658f0484fSRodney W. Grimes case '-': 78758f0484fSRodney W. Grimes SETERROR(REG_ERANGE); 78858f0484fSRodney W. Grimes return; /* NOTE RETURN */ 78958f0484fSRodney W. Grimes break; 79058f0484fSRodney W. Grimes default: 79158f0484fSRodney W. Grimes c = '\0'; 79258f0484fSRodney W. Grimes break; 79358f0484fSRodney W. Grimes } 79458f0484fSRodney W. Grimes 79558f0484fSRodney W. Grimes switch (c) { 79658f0484fSRodney W. Grimes case ':': /* character class */ 79758f0484fSRodney W. Grimes NEXT2(); 79851295a4dSJordan K. Hubbard (void)REQUIRE(MORE(), REG_EBRACK); 79958f0484fSRodney W. Grimes c = PEEK(); 80051295a4dSJordan K. Hubbard (void)REQUIRE(c != '-' && c != ']', REG_ECTYPE); 80158f0484fSRodney W. Grimes p_b_cclass(p, cs); 80251295a4dSJordan K. Hubbard (void)REQUIRE(MORE(), REG_EBRACK); 80351295a4dSJordan K. Hubbard (void)REQUIRE(EATTWO(':', ']'), REG_ECTYPE); 80458f0484fSRodney W. Grimes break; 80558f0484fSRodney W. Grimes case '=': /* equivalence class */ 80658f0484fSRodney W. Grimes NEXT2(); 80751295a4dSJordan K. Hubbard (void)REQUIRE(MORE(), REG_EBRACK); 80858f0484fSRodney W. Grimes c = PEEK(); 80951295a4dSJordan K. Hubbard (void)REQUIRE(c != '-' && c != ']', REG_ECOLLATE); 81058f0484fSRodney W. Grimes p_b_eclass(p, cs); 81151295a4dSJordan K. Hubbard (void)REQUIRE(MORE(), REG_EBRACK); 81251295a4dSJordan K. Hubbard (void)REQUIRE(EATTWO('=', ']'), REG_ECOLLATE); 81358f0484fSRodney W. Grimes break; 81458f0484fSRodney W. Grimes default: /* symbol, ordinary character, or range */ 81558f0484fSRodney W. Grimes /* xxx revision needed for multichar stuff */ 81658f0484fSRodney W. Grimes start = p_b_symbol(p); 81758f0484fSRodney W. Grimes if (SEE('-') && MORE2() && PEEK2() != ']') { 81858f0484fSRodney W. Grimes /* range */ 81958f0484fSRodney W. Grimes NEXT(); 82058f0484fSRodney W. Grimes if (EAT('-')) 82158f0484fSRodney W. Grimes finish = '-'; 82258f0484fSRodney W. Grimes else 82358f0484fSRodney W. Grimes finish = p_b_symbol(p); 82458f0484fSRodney W. Grimes } else 82558f0484fSRodney W. Grimes finish = start; 8265c551438SAndrey A. Chernov if (start == finish) 8275c551438SAndrey A. Chernov CHadd(cs, start); 8285c551438SAndrey A. Chernov else { 829b5a6eb18SAndrey A. Chernov if (__collate_load_error) { 830b5a6eb18SAndrey A. Chernov (void)REQUIRE((uch)start <= (uch)finish, REG_ERANGE); 831b5a6eb18SAndrey A. Chernov for (i = (uch)start; i <= (uch)finish; i++) 832b5a6eb18SAndrey A. Chernov CHadd(cs, i); 833b5a6eb18SAndrey A. Chernov } else { 834c61cea72SAndrey A. Chernov (void)REQUIRE(__collate_range_cmp(start, finish) <= 0, REG_ERANGE); 8355c551438SAndrey A. Chernov for (i = CHAR_MIN; i <= CHAR_MAX; i++) { 836c61cea72SAndrey A. Chernov if ( __collate_range_cmp(start, i) <= 0 837c61cea72SAndrey A. Chernov && __collate_range_cmp(i, finish) <= 0 8385c551438SAndrey A. Chernov ) 83958f0484fSRodney W. Grimes CHadd(cs, i); 8405c551438SAndrey A. Chernov } 8415c551438SAndrey A. Chernov } 842b5a6eb18SAndrey A. Chernov } 84358f0484fSRodney W. Grimes break; 84458f0484fSRodney W. Grimes } 84558f0484fSRodney W. Grimes } 84658f0484fSRodney W. Grimes 84758f0484fSRodney W. Grimes /* 84858f0484fSRodney W. Grimes - p_b_cclass - parse a character-class name and deal with it 8498fb3f3f6SDavid E. O'Brien == static void p_b_cclass(struct parse *p, cset *cs); 85058f0484fSRodney W. Grimes */ 85158f0484fSRodney W. Grimes static void 85258f0484fSRodney W. Grimes p_b_cclass(p, cs) 8538fb3f3f6SDavid E. O'Brien struct parse *p; 8548fb3f3f6SDavid E. O'Brien cset *cs; 85558f0484fSRodney W. Grimes { 8568fb3f3f6SDavid E. O'Brien int c; 8578fb3f3f6SDavid E. O'Brien char *sp = p->next; 8588fb3f3f6SDavid E. O'Brien struct cclass *cp; 8598fb3f3f6SDavid E. O'Brien size_t len; 86058f0484fSRodney W. Grimes 861b5363c4aSAndrey A. Chernov while (MORE() && isalpha((uch)PEEK())) 86258f0484fSRodney W. Grimes NEXT(); 86358f0484fSRodney W. Grimes len = p->next - sp; 86458f0484fSRodney W. Grimes for (cp = cclasses; cp->name != NULL; cp++) 86558f0484fSRodney W. Grimes if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') 86658f0484fSRodney W. Grimes break; 86758f0484fSRodney W. Grimes if (cp->name == NULL) { 86858f0484fSRodney W. Grimes /* oops, didn't find it */ 86958f0484fSRodney W. Grimes SETERROR(REG_ECTYPE); 87058f0484fSRodney W. Grimes return; 87158f0484fSRodney W. Grimes } 87258f0484fSRodney W. Grimes 873b5363c4aSAndrey A. Chernov switch (cp->fidx) { 874b5363c4aSAndrey A. Chernov case CALNUM: 875b5363c4aSAndrey A. Chernov for (c = CHAR_MIN; c <= CHAR_MAX; c++) 876b5363c4aSAndrey A. Chernov if (isalnum((uch)c)) 87758f0484fSRodney W. Grimes CHadd(cs, c); 878b5363c4aSAndrey A. Chernov break; 879b5363c4aSAndrey A. Chernov case CALPHA: 880b5363c4aSAndrey A. Chernov for (c = CHAR_MIN; c <= CHAR_MAX; c++) 881b5363c4aSAndrey A. Chernov if (isalpha((uch)c)) 882b5363c4aSAndrey A. Chernov CHadd(cs, c); 883b5363c4aSAndrey A. Chernov break; 884b5363c4aSAndrey A. Chernov case CBLANK: 885b5363c4aSAndrey A. Chernov for (c = CHAR_MIN; c <= CHAR_MAX; c++) 886b5363c4aSAndrey A. Chernov if (isblank((uch)c)) 887b5363c4aSAndrey A. Chernov CHadd(cs, c); 888b5363c4aSAndrey A. Chernov break; 889b5363c4aSAndrey A. Chernov case CCNTRL: 890b5363c4aSAndrey A. Chernov for (c = CHAR_MIN; c <= CHAR_MAX; c++) 891b5363c4aSAndrey A. Chernov if (iscntrl((uch)c)) 892b5363c4aSAndrey A. Chernov CHadd(cs, c); 893b5363c4aSAndrey A. Chernov break; 894b5363c4aSAndrey A. Chernov case CDIGIT: 895b5363c4aSAndrey A. Chernov for (c = CHAR_MIN; c <= CHAR_MAX; c++) 896b5363c4aSAndrey A. Chernov if (isdigit((uch)c)) 897b5363c4aSAndrey A. Chernov CHadd(cs, c); 898b5363c4aSAndrey A. Chernov break; 899b5363c4aSAndrey A. Chernov case CGRAPH: 900b5363c4aSAndrey A. Chernov for (c = CHAR_MIN; c <= CHAR_MAX; c++) 901b5363c4aSAndrey A. Chernov if (isgraph((uch)c)) 902b5363c4aSAndrey A. Chernov CHadd(cs, c); 903b5363c4aSAndrey A. Chernov break; 904b5363c4aSAndrey A. Chernov case CLOWER: 905b5363c4aSAndrey A. Chernov for (c = CHAR_MIN; c <= CHAR_MAX; c++) 906b5363c4aSAndrey A. Chernov if (islower((uch)c)) 907b5363c4aSAndrey A. Chernov CHadd(cs, c); 908b5363c4aSAndrey A. Chernov break; 909b5363c4aSAndrey A. Chernov case CPRINT: 910b5363c4aSAndrey A. Chernov for (c = CHAR_MIN; c <= CHAR_MAX; c++) 911b5363c4aSAndrey A. Chernov if (isprint((uch)c)) 912b5363c4aSAndrey A. Chernov CHadd(cs, c); 913b5363c4aSAndrey A. Chernov break; 914b5363c4aSAndrey A. Chernov case CPUNCT: 915b5363c4aSAndrey A. Chernov for (c = CHAR_MIN; c <= CHAR_MAX; c++) 916b5363c4aSAndrey A. Chernov if (ispunct((uch)c)) 917b5363c4aSAndrey A. Chernov CHadd(cs, c); 918b5363c4aSAndrey A. Chernov break; 919b5363c4aSAndrey A. Chernov case CSPACE: 920b5363c4aSAndrey A. Chernov for (c = CHAR_MIN; c <= CHAR_MAX; c++) 921b5363c4aSAndrey A. Chernov if (isspace((uch)c)) 922b5363c4aSAndrey A. Chernov CHadd(cs, c); 923b5363c4aSAndrey A. Chernov break; 924b5363c4aSAndrey A. Chernov case CUPPER: 925b5363c4aSAndrey A. Chernov for (c = CHAR_MIN; c <= CHAR_MAX; c++) 926b5363c4aSAndrey A. Chernov if (isupper((uch)c)) 927b5363c4aSAndrey A. Chernov CHadd(cs, c); 928b5363c4aSAndrey A. Chernov break; 929b5363c4aSAndrey A. Chernov case CXDIGIT: 930b5363c4aSAndrey A. Chernov for (c = CHAR_MIN; c <= CHAR_MAX; c++) 931b5363c4aSAndrey A. Chernov if (isxdigit((uch)c)) 932b5363c4aSAndrey A. Chernov CHadd(cs, c); 933b5363c4aSAndrey A. Chernov break; 934b5363c4aSAndrey A. Chernov } 935b5363c4aSAndrey A. Chernov #if 0 93658f0484fSRodney W. Grimes for (u = cp->multis; *u != '\0'; u += strlen(u) + 1) 93758f0484fSRodney W. Grimes MCadd(p, cs, u); 938b5363c4aSAndrey A. Chernov #endif 93958f0484fSRodney W. Grimes } 94058f0484fSRodney W. Grimes 94158f0484fSRodney W. Grimes /* 94258f0484fSRodney W. Grimes - p_b_eclass - parse an equivalence-class name and deal with it 9438fb3f3f6SDavid E. O'Brien == static void p_b_eclass(struct parse *p, cset *cs); 94458f0484fSRodney W. Grimes * 94558f0484fSRodney W. Grimes * This implementation is incomplete. xxx 94658f0484fSRodney W. Grimes */ 94758f0484fSRodney W. Grimes static void 94858f0484fSRodney W. Grimes p_b_eclass(p, cs) 9498fb3f3f6SDavid E. O'Brien struct parse *p; 9508fb3f3f6SDavid E. O'Brien cset *cs; 95158f0484fSRodney W. Grimes { 9528fb3f3f6SDavid E. O'Brien char c; 95358f0484fSRodney W. Grimes 95458f0484fSRodney W. Grimes c = p_b_coll_elem(p, '='); 95558f0484fSRodney W. Grimes CHadd(cs, c); 95658f0484fSRodney W. Grimes } 95758f0484fSRodney W. Grimes 95858f0484fSRodney W. Grimes /* 95958f0484fSRodney W. Grimes - p_b_symbol - parse a character or [..]ed multicharacter collating symbol 9608fb3f3f6SDavid E. O'Brien == static char p_b_symbol(struct parse *p); 96158f0484fSRodney W. Grimes */ 96258f0484fSRodney W. Grimes static char /* value of symbol */ 96358f0484fSRodney W. Grimes p_b_symbol(p) 9648fb3f3f6SDavid E. O'Brien struct parse *p; 96558f0484fSRodney W. Grimes { 9668fb3f3f6SDavid E. O'Brien char value; 96758f0484fSRodney W. Grimes 96851295a4dSJordan K. Hubbard (void)REQUIRE(MORE(), REG_EBRACK); 96958f0484fSRodney W. Grimes if (!EATTWO('[', '.')) 97058f0484fSRodney W. Grimes return(GETNEXT()); 97158f0484fSRodney W. Grimes 97258f0484fSRodney W. Grimes /* collating symbol */ 97358f0484fSRodney W. Grimes value = p_b_coll_elem(p, '.'); 97451295a4dSJordan K. Hubbard (void)REQUIRE(EATTWO('.', ']'), REG_ECOLLATE); 97558f0484fSRodney W. Grimes return(value); 97658f0484fSRodney W. Grimes } 97758f0484fSRodney W. Grimes 97858f0484fSRodney W. Grimes /* 97958f0484fSRodney W. Grimes - p_b_coll_elem - parse a collating-element name and look it up 9808fb3f3f6SDavid E. O'Brien == static char p_b_coll_elem(struct parse *p, int endc); 98158f0484fSRodney W. Grimes */ 98258f0484fSRodney W. Grimes static char /* value of collating element */ 98358f0484fSRodney W. Grimes p_b_coll_elem(p, endc) 9848fb3f3f6SDavid E. O'Brien struct parse *p; 98558f0484fSRodney W. Grimes int endc; /* name ended by endc,']' */ 98658f0484fSRodney W. Grimes { 9878fb3f3f6SDavid E. O'Brien char *sp = p->next; 9888fb3f3f6SDavid E. O'Brien struct cname *cp; 9898fb3f3f6SDavid E. O'Brien int len; 99058f0484fSRodney W. Grimes 99158f0484fSRodney W. Grimes while (MORE() && !SEETWO(endc, ']')) 99258f0484fSRodney W. Grimes NEXT(); 99358f0484fSRodney W. Grimes if (!MORE()) { 99458f0484fSRodney W. Grimes SETERROR(REG_EBRACK); 99558f0484fSRodney W. Grimes return(0); 99658f0484fSRodney W. Grimes } 99758f0484fSRodney W. Grimes len = p->next - sp; 99858f0484fSRodney W. Grimes for (cp = cnames; cp->name != NULL; cp++) 99958f0484fSRodney W. Grimes if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') 100058f0484fSRodney W. Grimes return(cp->code); /* known name */ 100158f0484fSRodney W. Grimes if (len == 1) 100258f0484fSRodney W. Grimes return(*sp); /* single character */ 100358f0484fSRodney W. Grimes SETERROR(REG_ECOLLATE); /* neither */ 100458f0484fSRodney W. Grimes return(0); 100558f0484fSRodney W. Grimes } 100658f0484fSRodney W. Grimes 100758f0484fSRodney W. Grimes /* 100858f0484fSRodney W. Grimes - othercase - return the case counterpart of an alphabetic 100958f0484fSRodney W. Grimes == static char othercase(int ch); 101058f0484fSRodney W. Grimes */ 101158f0484fSRodney W. Grimes static char /* if no counterpart, return ch */ 101258f0484fSRodney W. Grimes othercase(ch) 101358f0484fSRodney W. Grimes int ch; 101458f0484fSRodney W. Grimes { 101589b86d02SAndrey A. Chernov ch = (uch)ch; 101658f0484fSRodney W. Grimes assert(isalpha(ch)); 101758f0484fSRodney W. Grimes if (isupper(ch)) 101858f0484fSRodney W. Grimes return(tolower(ch)); 101958f0484fSRodney W. Grimes else if (islower(ch)) 102058f0484fSRodney W. Grimes return(toupper(ch)); 102158f0484fSRodney W. Grimes else /* peculiar, but could happen */ 102258f0484fSRodney W. Grimes return(ch); 102358f0484fSRodney W. Grimes } 102458f0484fSRodney W. Grimes 102558f0484fSRodney W. Grimes /* 102658f0484fSRodney W. Grimes - bothcases - emit a dualcase version of a two-case character 10278fb3f3f6SDavid E. O'Brien == static void bothcases(struct parse *p, int ch); 102858f0484fSRodney W. Grimes * 102958f0484fSRodney W. Grimes * Boy, is this implementation ever a kludge... 103058f0484fSRodney W. Grimes */ 103158f0484fSRodney W. Grimes static void 103258f0484fSRodney W. Grimes bothcases(p, ch) 10338fb3f3f6SDavid E. O'Brien struct parse *p; 103458f0484fSRodney W. Grimes int ch; 103558f0484fSRodney W. Grimes { 10368fb3f3f6SDavid E. O'Brien char *oldnext = p->next; 10378fb3f3f6SDavid E. O'Brien char *oldend = p->end; 103858f0484fSRodney W. Grimes char bracket[3]; 103958f0484fSRodney W. Grimes 104089b86d02SAndrey A. Chernov ch = (uch)ch; 104158f0484fSRodney W. Grimes assert(othercase(ch) != ch); /* p_bracket() would recurse */ 104258f0484fSRodney W. Grimes p->next = bracket; 104358f0484fSRodney W. Grimes p->end = bracket+2; 104458f0484fSRodney W. Grimes bracket[0] = ch; 104558f0484fSRodney W. Grimes bracket[1] = ']'; 104658f0484fSRodney W. Grimes bracket[2] = '\0'; 104758f0484fSRodney W. Grimes p_bracket(p); 104858f0484fSRodney W. Grimes assert(p->next == bracket+2); 104958f0484fSRodney W. Grimes p->next = oldnext; 105058f0484fSRodney W. Grimes p->end = oldend; 105158f0484fSRodney W. Grimes } 105258f0484fSRodney W. Grimes 105358f0484fSRodney W. Grimes /* 105458f0484fSRodney W. Grimes - ordinary - emit an ordinary character 10558fb3f3f6SDavid E. O'Brien == static void ordinary(struct parse *p, int ch); 105658f0484fSRodney W. Grimes */ 105758f0484fSRodney W. Grimes static void 105858f0484fSRodney W. Grimes ordinary(p, ch) 10598fb3f3f6SDavid E. O'Brien struct parse *p; 10608fb3f3f6SDavid E. O'Brien int ch; 106158f0484fSRodney W. Grimes { 10628fb3f3f6SDavid E. O'Brien cat_t *cap = p->g->categories; 106358f0484fSRodney W. Grimes 106489b86d02SAndrey A. Chernov if ((p->g->cflags®_ICASE) && isalpha((uch)ch) && othercase(ch) != ch) 106558f0484fSRodney W. Grimes bothcases(p, ch); 106658f0484fSRodney W. Grimes else { 106789b86d02SAndrey A. Chernov EMIT(OCHAR, (uch)ch); 106858f0484fSRodney W. Grimes if (cap[ch] == 0) 106958f0484fSRodney W. Grimes cap[ch] = p->g->ncategories++; 107058f0484fSRodney W. Grimes } 107158f0484fSRodney W. Grimes } 107258f0484fSRodney W. Grimes 107358f0484fSRodney W. Grimes /* 107458f0484fSRodney W. Grimes - nonnewline - emit REG_NEWLINE version of OANY 10758fb3f3f6SDavid E. O'Brien == static void nonnewline(struct parse *p); 107658f0484fSRodney W. Grimes * 107758f0484fSRodney W. Grimes * Boy, is this implementation ever a kludge... 107858f0484fSRodney W. Grimes */ 107958f0484fSRodney W. Grimes static void 108058f0484fSRodney W. Grimes nonnewline(p) 10818fb3f3f6SDavid E. O'Brien struct parse *p; 108258f0484fSRodney W. Grimes { 10838fb3f3f6SDavid E. O'Brien char *oldnext = p->next; 10848fb3f3f6SDavid E. O'Brien char *oldend = p->end; 108558f0484fSRodney W. Grimes char bracket[4]; 108658f0484fSRodney W. Grimes 108758f0484fSRodney W. Grimes p->next = bracket; 108858f0484fSRodney W. Grimes p->end = bracket+3; 108958f0484fSRodney W. Grimes bracket[0] = '^'; 109058f0484fSRodney W. Grimes bracket[1] = '\n'; 109158f0484fSRodney W. Grimes bracket[2] = ']'; 109258f0484fSRodney W. Grimes bracket[3] = '\0'; 109358f0484fSRodney W. Grimes p_bracket(p); 109458f0484fSRodney W. Grimes assert(p->next == bracket+3); 109558f0484fSRodney W. Grimes p->next = oldnext; 109658f0484fSRodney W. Grimes p->end = oldend; 109758f0484fSRodney W. Grimes } 109858f0484fSRodney W. Grimes 109958f0484fSRodney W. Grimes /* 110058f0484fSRodney W. Grimes - repeat - generate code for a bounded repetition, recursively if needed 11018fb3f3f6SDavid E. O'Brien == static void repeat(struct parse *p, sopno start, int from, int to); 110258f0484fSRodney W. Grimes */ 110358f0484fSRodney W. Grimes static void 110458f0484fSRodney W. Grimes repeat(p, start, from, to) 11058fb3f3f6SDavid E. O'Brien struct parse *p; 110658f0484fSRodney W. Grimes sopno start; /* operand from here to end of strip */ 110758f0484fSRodney W. Grimes int from; /* repeated from this number */ 110858f0484fSRodney W. Grimes int to; /* to this number of times (maybe INFINITY) */ 110958f0484fSRodney W. Grimes { 11108fb3f3f6SDavid E. O'Brien sopno finish = HERE(); 111158f0484fSRodney W. Grimes # define N 2 111258f0484fSRodney W. Grimes # define INF 3 111358f0484fSRodney W. Grimes # define REP(f, t) ((f)*8 + (t)) 111458f0484fSRodney W. Grimes # define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) 11158fb3f3f6SDavid E. O'Brien sopno copy; 111658f0484fSRodney W. Grimes 111758f0484fSRodney W. Grimes if (p->error != 0) /* head off possible runaway recursion */ 111858f0484fSRodney W. Grimes return; 111958f0484fSRodney W. Grimes 112058f0484fSRodney W. Grimes assert(from <= to); 112158f0484fSRodney W. Grimes 112258f0484fSRodney W. Grimes switch (REP(MAP(from), MAP(to))) { 112358f0484fSRodney W. Grimes case REP(0, 0): /* must be user doing this */ 112458f0484fSRodney W. Grimes DROP(finish-start); /* drop the operand */ 112558f0484fSRodney W. Grimes break; 112658f0484fSRodney W. Grimes case REP(0, 1): /* as x{1,1}? */ 112758f0484fSRodney W. Grimes case REP(0, N): /* as x{1,n}? */ 112858f0484fSRodney W. Grimes case REP(0, INF): /* as x{1,}? */ 112958f0484fSRodney W. Grimes /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ 113058f0484fSRodney W. Grimes INSERT(OCH_, start); /* offset is wrong... */ 113158f0484fSRodney W. Grimes repeat(p, start+1, 1, to); 113258f0484fSRodney W. Grimes ASTERN(OOR1, start); 113358f0484fSRodney W. Grimes AHEAD(start); /* ... fix it */ 113458f0484fSRodney W. Grimes EMIT(OOR2, 0); 113558f0484fSRodney W. Grimes AHEAD(THERE()); 113658f0484fSRodney W. Grimes ASTERN(O_CH, THERETHERE()); 113758f0484fSRodney W. Grimes break; 113858f0484fSRodney W. Grimes case REP(1, 1): /* trivial case */ 113958f0484fSRodney W. Grimes /* done */ 114058f0484fSRodney W. Grimes break; 114158f0484fSRodney W. Grimes case REP(1, N): /* as x?x{1,n-1} */ 114258f0484fSRodney W. Grimes /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ 114358f0484fSRodney W. Grimes INSERT(OCH_, start); 114458f0484fSRodney W. Grimes ASTERN(OOR1, start); 114558f0484fSRodney W. Grimes AHEAD(start); 114658f0484fSRodney W. Grimes EMIT(OOR2, 0); /* offset very wrong... */ 114758f0484fSRodney W. Grimes AHEAD(THERE()); /* ...so fix it */ 114858f0484fSRodney W. Grimes ASTERN(O_CH, THERETHERE()); 114958f0484fSRodney W. Grimes copy = dupl(p, start+1, finish+1); 115058f0484fSRodney W. Grimes assert(copy == finish+4); 115158f0484fSRodney W. Grimes repeat(p, copy, 1, to-1); 115258f0484fSRodney W. Grimes break; 115358f0484fSRodney W. Grimes case REP(1, INF): /* as x+ */ 115458f0484fSRodney W. Grimes INSERT(OPLUS_, start); 115558f0484fSRodney W. Grimes ASTERN(O_PLUS, start); 115658f0484fSRodney W. Grimes break; 115758f0484fSRodney W. Grimes case REP(N, N): /* as xx{m-1,n-1} */ 115858f0484fSRodney W. Grimes copy = dupl(p, start, finish); 115958f0484fSRodney W. Grimes repeat(p, copy, from-1, to-1); 116058f0484fSRodney W. Grimes break; 116158f0484fSRodney W. Grimes case REP(N, INF): /* as xx{n-1,INF} */ 116258f0484fSRodney W. Grimes copy = dupl(p, start, finish); 116358f0484fSRodney W. Grimes repeat(p, copy, from-1, to); 116458f0484fSRodney W. Grimes break; 116558f0484fSRodney W. Grimes default: /* "can't happen" */ 116658f0484fSRodney W. Grimes SETERROR(REG_ASSERT); /* just in case */ 116758f0484fSRodney W. Grimes break; 116858f0484fSRodney W. Grimes } 116958f0484fSRodney W. Grimes } 117058f0484fSRodney W. Grimes 117158f0484fSRodney W. Grimes /* 117258f0484fSRodney W. Grimes - seterr - set an error condition 11738fb3f3f6SDavid E. O'Brien == static int seterr(struct parse *p, int e); 117458f0484fSRodney W. Grimes */ 117558f0484fSRodney W. Grimes static int /* useless but makes type checking happy */ 117658f0484fSRodney W. Grimes seterr(p, e) 11778fb3f3f6SDavid E. O'Brien struct parse *p; 117858f0484fSRodney W. Grimes int e; 117958f0484fSRodney W. Grimes { 118058f0484fSRodney W. Grimes if (p->error == 0) /* keep earliest error condition */ 118158f0484fSRodney W. Grimes p->error = e; 118258f0484fSRodney W. Grimes p->next = nuls; /* try to bring things to a halt */ 118358f0484fSRodney W. Grimes p->end = nuls; 118458f0484fSRodney W. Grimes return(0); /* make the return value well-defined */ 118558f0484fSRodney W. Grimes } 118658f0484fSRodney W. Grimes 118758f0484fSRodney W. Grimes /* 118858f0484fSRodney W. Grimes - allocset - allocate a set of characters for [] 11898fb3f3f6SDavid E. O'Brien == static cset *allocset(struct parse *p); 119058f0484fSRodney W. Grimes */ 119158f0484fSRodney W. Grimes static cset * 119258f0484fSRodney W. Grimes allocset(p) 11938fb3f3f6SDavid E. O'Brien struct parse *p; 119458f0484fSRodney W. Grimes { 11958fb3f3f6SDavid E. O'Brien int no = p->g->ncsets++; 11968fb3f3f6SDavid E. O'Brien size_t nc; 11978fb3f3f6SDavid E. O'Brien size_t nbytes; 11988fb3f3f6SDavid E. O'Brien cset *cs; 11998fb3f3f6SDavid E. O'Brien size_t css = (size_t)p->g->csetsize; 12008fb3f3f6SDavid E. O'Brien int i; 120158f0484fSRodney W. Grimes 120258f0484fSRodney W. Grimes if (no >= p->ncsalloc) { /* need another column of space */ 120358f0484fSRodney W. Grimes p->ncsalloc += CHAR_BIT; 120458f0484fSRodney W. Grimes nc = p->ncsalloc; 120558f0484fSRodney W. Grimes assert(nc % CHAR_BIT == 0); 120658f0484fSRodney W. Grimes nbytes = nc / CHAR_BIT * css; 120758f0484fSRodney W. Grimes if (p->g->sets == NULL) 120858f0484fSRodney W. Grimes p->g->sets = (cset *)malloc(nc * sizeof(cset)); 120958f0484fSRodney W. Grimes else 1210e8420087SWarner Losh p->g->sets = (cset *)reallocf((char *)p->g->sets, 121158f0484fSRodney W. Grimes nc * sizeof(cset)); 121258f0484fSRodney W. Grimes if (p->g->setbits == NULL) 121358f0484fSRodney W. Grimes p->g->setbits = (uch *)malloc(nbytes); 121458f0484fSRodney W. Grimes else { 1215e8420087SWarner Losh p->g->setbits = (uch *)reallocf((char *)p->g->setbits, 121658f0484fSRodney W. Grimes nbytes); 121758f0484fSRodney W. Grimes /* xxx this isn't right if setbits is now NULL */ 121858f0484fSRodney W. Grimes for (i = 0; i < no; i++) 121958f0484fSRodney W. Grimes p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT); 122058f0484fSRodney W. Grimes } 122158f0484fSRodney W. Grimes if (p->g->sets != NULL && p->g->setbits != NULL) 122258f0484fSRodney W. Grimes (void) memset((char *)p->g->setbits + (nbytes - css), 122358f0484fSRodney W. Grimes 0, css); 122458f0484fSRodney W. Grimes else { 122558f0484fSRodney W. Grimes no = 0; 122658f0484fSRodney W. Grimes SETERROR(REG_ESPACE); 122758f0484fSRodney W. Grimes /* caller's responsibility not to do set ops */ 122858f0484fSRodney W. Grimes } 122958f0484fSRodney W. Grimes } 123058f0484fSRodney W. Grimes 123158f0484fSRodney W. Grimes assert(p->g->sets != NULL); /* xxx */ 123258f0484fSRodney W. Grimes cs = &p->g->sets[no]; 123358f0484fSRodney W. Grimes cs->ptr = p->g->setbits + css*((no)/CHAR_BIT); 123458f0484fSRodney W. Grimes cs->mask = 1 << ((no) % CHAR_BIT); 123558f0484fSRodney W. Grimes cs->hash = 0; 123658f0484fSRodney W. Grimes cs->smultis = 0; 123758f0484fSRodney W. Grimes cs->multis = NULL; 123858f0484fSRodney W. Grimes 123958f0484fSRodney W. Grimes return(cs); 124058f0484fSRodney W. Grimes } 124158f0484fSRodney W. Grimes 124258f0484fSRodney W. Grimes /* 124358f0484fSRodney W. Grimes - freeset - free a now-unused set 12448fb3f3f6SDavid E. O'Brien == static void freeset(struct parse *p, cset *cs); 124558f0484fSRodney W. Grimes */ 124658f0484fSRodney W. Grimes static void 124758f0484fSRodney W. Grimes freeset(p, cs) 12488fb3f3f6SDavid E. O'Brien struct parse *p; 12498fb3f3f6SDavid E. O'Brien cset *cs; 125058f0484fSRodney W. Grimes { 12518fb3f3f6SDavid E. O'Brien int i; 12528fb3f3f6SDavid E. O'Brien cset *top = &p->g->sets[p->g->ncsets]; 12538fb3f3f6SDavid E. O'Brien size_t css = (size_t)p->g->csetsize; 125458f0484fSRodney W. Grimes 125558f0484fSRodney W. Grimes for (i = 0; i < css; i++) 125658f0484fSRodney W. Grimes CHsub(cs, i); 125758f0484fSRodney W. Grimes if (cs == top-1) /* recover only the easy case */ 125858f0484fSRodney W. Grimes p->g->ncsets--; 125958f0484fSRodney W. Grimes } 126058f0484fSRodney W. Grimes 126158f0484fSRodney W. Grimes /* 126258f0484fSRodney W. Grimes - freezeset - final processing on a set of characters 12638fb3f3f6SDavid E. O'Brien == static int freezeset(struct parse *p, cset *cs); 126458f0484fSRodney W. Grimes * 126558f0484fSRodney W. Grimes * The main task here is merging identical sets. This is usually a waste 126658f0484fSRodney W. Grimes * of time (although the hash code minimizes the overhead), but can win 126758f0484fSRodney W. Grimes * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash 126858f0484fSRodney W. Grimes * is done using addition rather than xor -- all ASCII [aA] sets xor to 126958f0484fSRodney W. Grimes * the same value! 127058f0484fSRodney W. Grimes */ 127158f0484fSRodney W. Grimes static int /* set number */ 127258f0484fSRodney W. Grimes freezeset(p, cs) 12738fb3f3f6SDavid E. O'Brien struct parse *p; 12748fb3f3f6SDavid E. O'Brien cset *cs; 127558f0484fSRodney W. Grimes { 12768fb3f3f6SDavid E. O'Brien short h = cs->hash; 12778fb3f3f6SDavid E. O'Brien int i; 12788fb3f3f6SDavid E. O'Brien cset *top = &p->g->sets[p->g->ncsets]; 12798fb3f3f6SDavid E. O'Brien cset *cs2; 12808fb3f3f6SDavid E. O'Brien size_t css = (size_t)p->g->csetsize; 128158f0484fSRodney W. Grimes 128258f0484fSRodney W. Grimes /* look for an earlier one which is the same */ 128358f0484fSRodney W. Grimes for (cs2 = &p->g->sets[0]; cs2 < top; cs2++) 128458f0484fSRodney W. Grimes if (cs2->hash == h && cs2 != cs) { 128558f0484fSRodney W. Grimes /* maybe */ 128658f0484fSRodney W. Grimes for (i = 0; i < css; i++) 128758f0484fSRodney W. Grimes if (!!CHIN(cs2, i) != !!CHIN(cs, i)) 128858f0484fSRodney W. Grimes break; /* no */ 128958f0484fSRodney W. Grimes if (i == css) 129058f0484fSRodney W. Grimes break; /* yes */ 129158f0484fSRodney W. Grimes } 129258f0484fSRodney W. Grimes 129358f0484fSRodney W. Grimes if (cs2 < top) { /* found one */ 129458f0484fSRodney W. Grimes freeset(p, cs); 129558f0484fSRodney W. Grimes cs = cs2; 129658f0484fSRodney W. Grimes } 129758f0484fSRodney W. Grimes 129858f0484fSRodney W. Grimes return((int)(cs - p->g->sets)); 129958f0484fSRodney W. Grimes } 130058f0484fSRodney W. Grimes 130158f0484fSRodney W. Grimes /* 130258f0484fSRodney W. Grimes - firstch - return first character in a set (which must have at least one) 13038fb3f3f6SDavid E. O'Brien == static int firstch(struct parse *p, cset *cs); 130458f0484fSRodney W. Grimes */ 130558f0484fSRodney W. Grimes static int /* character; there is no "none" value */ 130658f0484fSRodney W. Grimes firstch(p, cs) 13078fb3f3f6SDavid E. O'Brien struct parse *p; 13088fb3f3f6SDavid E. O'Brien cset *cs; 130958f0484fSRodney W. Grimes { 13108fb3f3f6SDavid E. O'Brien int i; 13118fb3f3f6SDavid E. O'Brien size_t css = (size_t)p->g->csetsize; 131258f0484fSRodney W. Grimes 131358f0484fSRodney W. Grimes for (i = 0; i < css; i++) 131458f0484fSRodney W. Grimes if (CHIN(cs, i)) 131589b86d02SAndrey A. Chernov return((char)i); 131658f0484fSRodney W. Grimes assert(never); 131758f0484fSRodney W. Grimes return(0); /* arbitrary */ 131858f0484fSRodney W. Grimes } 131958f0484fSRodney W. Grimes 132058f0484fSRodney W. Grimes /* 132158f0484fSRodney W. Grimes - nch - number of characters in a set 13228fb3f3f6SDavid E. O'Brien == static int nch(struct parse *p, cset *cs); 132358f0484fSRodney W. Grimes */ 132458f0484fSRodney W. Grimes static int 132558f0484fSRodney W. Grimes nch(p, cs) 13268fb3f3f6SDavid E. O'Brien struct parse *p; 13278fb3f3f6SDavid E. O'Brien cset *cs; 132858f0484fSRodney W. Grimes { 13298fb3f3f6SDavid E. O'Brien int i; 13308fb3f3f6SDavid E. O'Brien size_t css = (size_t)p->g->csetsize; 13318fb3f3f6SDavid E. O'Brien int n = 0; 133258f0484fSRodney W. Grimes 133358f0484fSRodney W. Grimes for (i = 0; i < css; i++) 133458f0484fSRodney W. Grimes if (CHIN(cs, i)) 133558f0484fSRodney W. Grimes n++; 133658f0484fSRodney W. Grimes return(n); 133758f0484fSRodney W. Grimes } 133858f0484fSRodney W. Grimes 133958f0484fSRodney W. Grimes /* 134058f0484fSRodney W. Grimes - mcadd - add a collating element to a cset 13418fb3f3f6SDavid E. O'Brien == static void mcadd(struct parse *p, cset *cs, \ 13428fb3f3f6SDavid E. O'Brien == char *cp); 134358f0484fSRodney W. Grimes */ 134458f0484fSRodney W. Grimes static void 134558f0484fSRodney W. Grimes mcadd(p, cs, cp) 13468fb3f3f6SDavid E. O'Brien struct parse *p; 13478fb3f3f6SDavid E. O'Brien cset *cs; 13488fb3f3f6SDavid E. O'Brien char *cp; 134958f0484fSRodney W. Grimes { 13508fb3f3f6SDavid E. O'Brien size_t oldend = cs->smultis; 135158f0484fSRodney W. Grimes 135258f0484fSRodney W. Grimes cs->smultis += strlen(cp) + 1; 135358f0484fSRodney W. Grimes if (cs->multis == NULL) 135458f0484fSRodney W. Grimes cs->multis = malloc(cs->smultis); 135558f0484fSRodney W. Grimes else 1356e8420087SWarner Losh cs->multis = reallocf(cs->multis, cs->smultis); 135758f0484fSRodney W. Grimes if (cs->multis == NULL) { 135858f0484fSRodney W. Grimes SETERROR(REG_ESPACE); 135958f0484fSRodney W. Grimes return; 136058f0484fSRodney W. Grimes } 136158f0484fSRodney W. Grimes 136258f0484fSRodney W. Grimes (void) strcpy(cs->multis + oldend - 1, cp); 136358f0484fSRodney W. Grimes cs->multis[cs->smultis - 1] = '\0'; 136458f0484fSRodney W. Grimes } 136558f0484fSRodney W. Grimes 136651295a4dSJordan K. Hubbard #if used 136758f0484fSRodney W. Grimes /* 136858f0484fSRodney W. Grimes - mcsub - subtract a collating element from a cset 13698fb3f3f6SDavid E. O'Brien == static void mcsub(cset *cs, char *cp); 137058f0484fSRodney W. Grimes */ 137158f0484fSRodney W. Grimes static void 137258f0484fSRodney W. Grimes mcsub(cs, cp) 13738fb3f3f6SDavid E. O'Brien cset *cs; 13748fb3f3f6SDavid E. O'Brien char *cp; 137558f0484fSRodney W. Grimes { 13768fb3f3f6SDavid E. O'Brien char *fp = mcfind(cs, cp); 13778fb3f3f6SDavid E. O'Brien size_t len = strlen(fp); 137858f0484fSRodney W. Grimes 137958f0484fSRodney W. Grimes assert(fp != NULL); 138058f0484fSRodney W. Grimes (void) memmove(fp, fp + len + 1, 138158f0484fSRodney W. Grimes cs->smultis - (fp + len + 1 - cs->multis)); 138258f0484fSRodney W. Grimes cs->smultis -= len; 138358f0484fSRodney W. Grimes 138458f0484fSRodney W. Grimes if (cs->smultis == 0) { 138558f0484fSRodney W. Grimes free(cs->multis); 138658f0484fSRodney W. Grimes cs->multis = NULL; 138758f0484fSRodney W. Grimes return; 138858f0484fSRodney W. Grimes } 138958f0484fSRodney W. Grimes 1390e8420087SWarner Losh cs->multis = reallocf(cs->multis, cs->smultis); 139158f0484fSRodney W. Grimes assert(cs->multis != NULL); 139258f0484fSRodney W. Grimes } 139358f0484fSRodney W. Grimes 139458f0484fSRodney W. Grimes /* 139558f0484fSRodney W. Grimes - mcin - is a collating element in a cset? 13968fb3f3f6SDavid E. O'Brien == static int mcin(cset *cs, char *cp); 139758f0484fSRodney W. Grimes */ 139858f0484fSRodney W. Grimes static int 139958f0484fSRodney W. Grimes mcin(cs, cp) 14008fb3f3f6SDavid E. O'Brien cset *cs; 14018fb3f3f6SDavid E. O'Brien char *cp; 140258f0484fSRodney W. Grimes { 140358f0484fSRodney W. Grimes return(mcfind(cs, cp) != NULL); 140458f0484fSRodney W. Grimes } 140558f0484fSRodney W. Grimes 140658f0484fSRodney W. Grimes /* 140758f0484fSRodney W. Grimes - mcfind - find a collating element in a cset 14088fb3f3f6SDavid E. O'Brien == static char *mcfind(cset *cs, char *cp); 140958f0484fSRodney W. Grimes */ 141058f0484fSRodney W. Grimes static char * 141158f0484fSRodney W. Grimes mcfind(cs, cp) 14128fb3f3f6SDavid E. O'Brien cset *cs; 14138fb3f3f6SDavid E. O'Brien char *cp; 141458f0484fSRodney W. Grimes { 14158fb3f3f6SDavid E. O'Brien char *p; 141658f0484fSRodney W. Grimes 141758f0484fSRodney W. Grimes if (cs->multis == NULL) 141858f0484fSRodney W. Grimes return(NULL); 141958f0484fSRodney W. Grimes for (p = cs->multis; *p != '\0'; p += strlen(p) + 1) 142058f0484fSRodney W. Grimes if (strcmp(cp, p) == 0) 142158f0484fSRodney W. Grimes return(p); 142258f0484fSRodney W. Grimes return(NULL); 142358f0484fSRodney W. Grimes } 142451295a4dSJordan K. Hubbard #endif 142558f0484fSRodney W. Grimes 142658f0484fSRodney W. Grimes /* 142758f0484fSRodney W. Grimes - mcinvert - invert the list of collating elements in a cset 14288fb3f3f6SDavid E. O'Brien == static void mcinvert(struct parse *p, cset *cs); 142958f0484fSRodney W. Grimes * 143058f0484fSRodney W. Grimes * This would have to know the set of possibilities. Implementation 143158f0484fSRodney W. Grimes * is deferred. 143258f0484fSRodney W. Grimes */ 143358f0484fSRodney W. Grimes static void 143458f0484fSRodney W. Grimes mcinvert(p, cs) 14358fb3f3f6SDavid E. O'Brien struct parse *p; 14368fb3f3f6SDavid E. O'Brien cset *cs; 143758f0484fSRodney W. Grimes { 143858f0484fSRodney W. Grimes assert(cs->multis == NULL); /* xxx */ 143958f0484fSRodney W. Grimes } 144058f0484fSRodney W. Grimes 144158f0484fSRodney W. Grimes /* 144258f0484fSRodney W. Grimes - mccase - add case counterparts of the list of collating elements in a cset 14438fb3f3f6SDavid E. O'Brien == static void mccase(struct parse *p, cset *cs); 144458f0484fSRodney W. Grimes * 144558f0484fSRodney W. Grimes * This would have to know the set of possibilities. Implementation 144658f0484fSRodney W. Grimes * is deferred. 144758f0484fSRodney W. Grimes */ 144858f0484fSRodney W. Grimes static void 144958f0484fSRodney W. Grimes mccase(p, cs) 14508fb3f3f6SDavid E. O'Brien struct parse *p; 14518fb3f3f6SDavid E. O'Brien cset *cs; 145258f0484fSRodney W. Grimes { 145358f0484fSRodney W. Grimes assert(cs->multis == NULL); /* xxx */ 145458f0484fSRodney W. Grimes } 145558f0484fSRodney W. Grimes 145658f0484fSRodney W. Grimes /* 145758f0484fSRodney W. Grimes - isinsets - is this character in any sets? 14588fb3f3f6SDavid E. O'Brien == static int isinsets(struct re_guts *g, int c); 145958f0484fSRodney W. Grimes */ 146058f0484fSRodney W. Grimes static int /* predicate */ 146158f0484fSRodney W. Grimes isinsets(g, c) 14628fb3f3f6SDavid E. O'Brien struct re_guts *g; 146358f0484fSRodney W. Grimes int c; 146458f0484fSRodney W. Grimes { 14658fb3f3f6SDavid E. O'Brien uch *col; 14668fb3f3f6SDavid E. O'Brien int i; 14678fb3f3f6SDavid E. O'Brien int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; 14688fb3f3f6SDavid E. O'Brien unsigned uc = (uch)c; 146958f0484fSRodney W. Grimes 147058f0484fSRodney W. Grimes for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) 147158f0484fSRodney W. Grimes if (col[uc] != 0) 147258f0484fSRodney W. Grimes return(1); 147358f0484fSRodney W. Grimes return(0); 147458f0484fSRodney W. Grimes } 147558f0484fSRodney W. Grimes 147658f0484fSRodney W. Grimes /* 147758f0484fSRodney W. Grimes - samesets - are these two characters in exactly the same sets? 14788fb3f3f6SDavid E. O'Brien == static int samesets(struct re_guts *g, int c1, int c2); 147958f0484fSRodney W. Grimes */ 148058f0484fSRodney W. Grimes static int /* predicate */ 148158f0484fSRodney W. Grimes samesets(g, c1, c2) 14828fb3f3f6SDavid E. O'Brien struct re_guts *g; 148358f0484fSRodney W. Grimes int c1; 148458f0484fSRodney W. Grimes int c2; 148558f0484fSRodney W. Grimes { 14868fb3f3f6SDavid E. O'Brien uch *col; 14878fb3f3f6SDavid E. O'Brien int i; 14888fb3f3f6SDavid E. O'Brien int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; 14898fb3f3f6SDavid E. O'Brien unsigned uc1 = (uch)c1; 14908fb3f3f6SDavid E. O'Brien unsigned uc2 = (uch)c2; 149158f0484fSRodney W. Grimes 149258f0484fSRodney W. Grimes for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) 149358f0484fSRodney W. Grimes if (col[uc1] != col[uc2]) 149458f0484fSRodney W. Grimes return(0); 149558f0484fSRodney W. Grimes return(1); 149658f0484fSRodney W. Grimes } 149758f0484fSRodney W. Grimes 149858f0484fSRodney W. Grimes /* 149958f0484fSRodney W. Grimes - categorize - sort out character categories 15008fb3f3f6SDavid E. O'Brien == static void categorize(struct parse *p, struct re_guts *g); 150158f0484fSRodney W. Grimes */ 150258f0484fSRodney W. Grimes static void 150358f0484fSRodney W. Grimes categorize(p, g) 150458f0484fSRodney W. Grimes struct parse *p; 15058fb3f3f6SDavid E. O'Brien struct re_guts *g; 150658f0484fSRodney W. Grimes { 15078fb3f3f6SDavid E. O'Brien cat_t *cats = g->categories; 15088fb3f3f6SDavid E. O'Brien int c; 15098fb3f3f6SDavid E. O'Brien int c2; 15108fb3f3f6SDavid E. O'Brien cat_t cat; 151158f0484fSRodney W. Grimes 151258f0484fSRodney W. Grimes /* avoid making error situations worse */ 151358f0484fSRodney W. Grimes if (p->error != 0) 151458f0484fSRodney W. Grimes return; 151558f0484fSRodney W. Grimes 151658f0484fSRodney W. Grimes for (c = CHAR_MIN; c <= CHAR_MAX; c++) 151758f0484fSRodney W. Grimes if (cats[c] == 0 && isinsets(g, c)) { 151858f0484fSRodney W. Grimes cat = g->ncategories++; 151958f0484fSRodney W. Grimes cats[c] = cat; 152058f0484fSRodney W. Grimes for (c2 = c+1; c2 <= CHAR_MAX; c2++) 152158f0484fSRodney W. Grimes if (cats[c2] == 0 && samesets(g, c, c2)) 152258f0484fSRodney W. Grimes cats[c2] = cat; 152358f0484fSRodney W. Grimes } 152458f0484fSRodney W. Grimes } 152558f0484fSRodney W. Grimes 152658f0484fSRodney W. Grimes /* 152758f0484fSRodney W. Grimes - dupl - emit a duplicate of a bunch of sops 15288fb3f3f6SDavid E. O'Brien == static sopno dupl(struct parse *p, sopno start, sopno finish); 152958f0484fSRodney W. Grimes */ 153058f0484fSRodney W. Grimes static sopno /* start of duplicate */ 153158f0484fSRodney W. Grimes dupl(p, start, finish) 15328fb3f3f6SDavid E. O'Brien struct parse *p; 153358f0484fSRodney W. Grimes sopno start; /* from here */ 153458f0484fSRodney W. Grimes sopno finish; /* to this less one */ 153558f0484fSRodney W. Grimes { 15368fb3f3f6SDavid E. O'Brien sopno ret = HERE(); 15378fb3f3f6SDavid E. O'Brien sopno len = finish - start; 153858f0484fSRodney W. Grimes 153958f0484fSRodney W. Grimes assert(finish >= start); 154058f0484fSRodney W. Grimes if (len == 0) 154158f0484fSRodney W. Grimes return(ret); 154258f0484fSRodney W. Grimes enlarge(p, p->ssize + len); /* this many unexpected additions */ 154358f0484fSRodney W. Grimes assert(p->ssize >= p->slen + len); 154458f0484fSRodney W. Grimes (void) memcpy((char *)(p->strip + p->slen), 154558f0484fSRodney W. Grimes (char *)(p->strip + start), (size_t)len*sizeof(sop)); 154658f0484fSRodney W. Grimes p->slen += len; 154758f0484fSRodney W. Grimes return(ret); 154858f0484fSRodney W. Grimes } 154958f0484fSRodney W. Grimes 155058f0484fSRodney W. Grimes /* 155158f0484fSRodney W. Grimes - doemit - emit a strip operator 15528fb3f3f6SDavid E. O'Brien == static void doemit(struct parse *p, sop op, size_t opnd); 155358f0484fSRodney W. Grimes * 155458f0484fSRodney W. Grimes * It might seem better to implement this as a macro with a function as 155558f0484fSRodney W. Grimes * hard-case backup, but it's just too big and messy unless there are 155658f0484fSRodney W. Grimes * some changes to the data structures. Maybe later. 155758f0484fSRodney W. Grimes */ 155858f0484fSRodney W. Grimes static void 155958f0484fSRodney W. Grimes doemit(p, op, opnd) 15608fb3f3f6SDavid E. O'Brien struct parse *p; 156158f0484fSRodney W. Grimes sop op; 156258f0484fSRodney W. Grimes size_t opnd; 156358f0484fSRodney W. Grimes { 156458f0484fSRodney W. Grimes /* avoid making error situations worse */ 156558f0484fSRodney W. Grimes if (p->error != 0) 156658f0484fSRodney W. Grimes return; 156758f0484fSRodney W. Grimes 156858f0484fSRodney W. Grimes /* deal with oversize operands ("can't happen", more or less) */ 156958f0484fSRodney W. Grimes assert(opnd < 1<<OPSHIFT); 157058f0484fSRodney W. Grimes 157158f0484fSRodney W. Grimes /* deal with undersized strip */ 157258f0484fSRodney W. Grimes if (p->slen >= p->ssize) 157358f0484fSRodney W. Grimes enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */ 157458f0484fSRodney W. Grimes assert(p->slen < p->ssize); 157558f0484fSRodney W. Grimes 157658f0484fSRodney W. Grimes /* finally, it's all reduced to the easy case */ 157758f0484fSRodney W. Grimes p->strip[p->slen++] = SOP(op, opnd); 157858f0484fSRodney W. Grimes } 157958f0484fSRodney W. Grimes 158058f0484fSRodney W. Grimes /* 158158f0484fSRodney W. Grimes - doinsert - insert a sop into the strip 15828fb3f3f6SDavid E. O'Brien == static void doinsert(struct parse *p, sop op, size_t opnd, sopno pos); 158358f0484fSRodney W. Grimes */ 158458f0484fSRodney W. Grimes static void 158558f0484fSRodney W. Grimes doinsert(p, op, opnd, pos) 15868fb3f3f6SDavid E. O'Brien struct parse *p; 158758f0484fSRodney W. Grimes sop op; 158858f0484fSRodney W. Grimes size_t opnd; 158958f0484fSRodney W. Grimes sopno pos; 159058f0484fSRodney W. Grimes { 15918fb3f3f6SDavid E. O'Brien sopno sn; 15928fb3f3f6SDavid E. O'Brien sop s; 15938fb3f3f6SDavid E. O'Brien int i; 159458f0484fSRodney W. Grimes 159558f0484fSRodney W. Grimes /* avoid making error situations worse */ 159658f0484fSRodney W. Grimes if (p->error != 0) 159758f0484fSRodney W. Grimes return; 159858f0484fSRodney W. Grimes 159958f0484fSRodney W. Grimes sn = HERE(); 160058f0484fSRodney W. Grimes EMIT(op, opnd); /* do checks, ensure space */ 160158f0484fSRodney W. Grimes assert(HERE() == sn+1); 160258f0484fSRodney W. Grimes s = p->strip[sn]; 160358f0484fSRodney W. Grimes 160458f0484fSRodney W. Grimes /* adjust paren pointers */ 160558f0484fSRodney W. Grimes assert(pos > 0); 160658f0484fSRodney W. Grimes for (i = 1; i < NPAREN; i++) { 160758f0484fSRodney W. Grimes if (p->pbegin[i] >= pos) { 160858f0484fSRodney W. Grimes p->pbegin[i]++; 160958f0484fSRodney W. Grimes } 161058f0484fSRodney W. Grimes if (p->pend[i] >= pos) { 161158f0484fSRodney W. Grimes p->pend[i]++; 161258f0484fSRodney W. Grimes } 161358f0484fSRodney W. Grimes } 161458f0484fSRodney W. Grimes 161558f0484fSRodney W. Grimes memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos], 161658f0484fSRodney W. Grimes (HERE()-pos-1)*sizeof(sop)); 161758f0484fSRodney W. Grimes p->strip[pos] = s; 161858f0484fSRodney W. Grimes } 161958f0484fSRodney W. Grimes 162058f0484fSRodney W. Grimes /* 162158f0484fSRodney W. Grimes - dofwd - complete a forward reference 16228fb3f3f6SDavid E. O'Brien == static void dofwd(struct parse *p, sopno pos, sop value); 162358f0484fSRodney W. Grimes */ 162458f0484fSRodney W. Grimes static void 162558f0484fSRodney W. Grimes dofwd(p, pos, value) 16268fb3f3f6SDavid E. O'Brien struct parse *p; 16278fb3f3f6SDavid E. O'Brien sopno pos; 162858f0484fSRodney W. Grimes sop value; 162958f0484fSRodney W. Grimes { 163058f0484fSRodney W. Grimes /* avoid making error situations worse */ 163158f0484fSRodney W. Grimes if (p->error != 0) 163258f0484fSRodney W. Grimes return; 163358f0484fSRodney W. Grimes 163458f0484fSRodney W. Grimes assert(value < 1<<OPSHIFT); 163558f0484fSRodney W. Grimes p->strip[pos] = OP(p->strip[pos]) | value; 163658f0484fSRodney W. Grimes } 163758f0484fSRodney W. Grimes 163858f0484fSRodney W. Grimes /* 163958f0484fSRodney W. Grimes - enlarge - enlarge the strip 16408fb3f3f6SDavid E. O'Brien == static void enlarge(struct parse *p, sopno size); 164158f0484fSRodney W. Grimes */ 164258f0484fSRodney W. Grimes static void 164358f0484fSRodney W. Grimes enlarge(p, size) 16448fb3f3f6SDavid E. O'Brien struct parse *p; 16458fb3f3f6SDavid E. O'Brien sopno size; 164658f0484fSRodney W. Grimes { 16478fb3f3f6SDavid E. O'Brien sop *sp; 164858f0484fSRodney W. Grimes 164958f0484fSRodney W. Grimes if (p->ssize >= size) 165058f0484fSRodney W. Grimes return; 165158f0484fSRodney W. Grimes 165258f0484fSRodney W. Grimes sp = (sop *)realloc(p->strip, size*sizeof(sop)); 165358f0484fSRodney W. Grimes if (sp == NULL) { 165458f0484fSRodney W. Grimes SETERROR(REG_ESPACE); 165558f0484fSRodney W. Grimes return; 165658f0484fSRodney W. Grimes } 165758f0484fSRodney W. Grimes p->strip = sp; 165858f0484fSRodney W. Grimes p->ssize = size; 165958f0484fSRodney W. Grimes } 166058f0484fSRodney W. Grimes 166158f0484fSRodney W. Grimes /* 166258f0484fSRodney W. Grimes - stripsnug - compact the strip 16638fb3f3f6SDavid E. O'Brien == static void stripsnug(struct parse *p, struct re_guts *g); 166458f0484fSRodney W. Grimes */ 166558f0484fSRodney W. Grimes static void 166658f0484fSRodney W. Grimes stripsnug(p, g) 16678fb3f3f6SDavid E. O'Brien struct parse *p; 16688fb3f3f6SDavid E. O'Brien struct re_guts *g; 166958f0484fSRodney W. Grimes { 167058f0484fSRodney W. Grimes g->nstates = p->slen; 167158f0484fSRodney W. Grimes g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop)); 167258f0484fSRodney W. Grimes if (g->strip == NULL) { 167358f0484fSRodney W. Grimes SETERROR(REG_ESPACE); 167458f0484fSRodney W. Grimes g->strip = p->strip; 167558f0484fSRodney W. Grimes } 167658f0484fSRodney W. Grimes } 167758f0484fSRodney W. Grimes 167858f0484fSRodney W. Grimes /* 167958f0484fSRodney W. Grimes - findmust - fill in must and mlen with longest mandatory literal string 16808fb3f3f6SDavid E. O'Brien == static void findmust(struct parse *p, struct re_guts *g); 168158f0484fSRodney W. Grimes * 168258f0484fSRodney W. Grimes * This algorithm could do fancy things like analyzing the operands of | 168358f0484fSRodney W. Grimes * for common subsequences. Someday. This code is simple and finds most 168458f0484fSRodney W. Grimes * of the interesting cases. 168558f0484fSRodney W. Grimes * 168658f0484fSRodney W. Grimes * Note that must and mlen got initialized during setup. 168758f0484fSRodney W. Grimes */ 168858f0484fSRodney W. Grimes static void 168958f0484fSRodney W. Grimes findmust(p, g) 169058f0484fSRodney W. Grimes struct parse *p; 16918fb3f3f6SDavid E. O'Brien struct re_guts *g; 169258f0484fSRodney W. Grimes { 16938fb3f3f6SDavid E. O'Brien sop *scan; 169458f0484fSRodney W. Grimes sop *start; 16958fb3f3f6SDavid E. O'Brien sop *newstart; 16968fb3f3f6SDavid E. O'Brien sopno newlen; 16978fb3f3f6SDavid E. O'Brien sop s; 16988fb3f3f6SDavid E. O'Brien char *cp; 16998fb3f3f6SDavid E. O'Brien sopno i; 1700e6a886d8SDaniel C. Sobral int offset; 1701e6a886d8SDaniel C. Sobral int cs, mccs; 170258f0484fSRodney W. Grimes 170358f0484fSRodney W. Grimes /* avoid making error situations worse */ 170458f0484fSRodney W. Grimes if (p->error != 0) 170558f0484fSRodney W. Grimes return; 170658f0484fSRodney W. Grimes 1707e6a886d8SDaniel C. Sobral /* Find out if we can handle OANYOF or not */ 1708e6a886d8SDaniel C. Sobral mccs = 0; 1709e6a886d8SDaniel C. Sobral for (cs = 0; cs < g->ncsets; cs++) 1710e6a886d8SDaniel C. Sobral if (g->sets[cs].multis != NULL) 1711e6a886d8SDaniel C. Sobral mccs = 1; 1712e6a886d8SDaniel C. Sobral 171358f0484fSRodney W. Grimes /* find the longest OCHAR sequence in strip */ 171458f0484fSRodney W. Grimes newlen = 0; 1715e6a886d8SDaniel C. Sobral offset = 0; 1716e6a886d8SDaniel C. Sobral g->moffset = 0; 171758f0484fSRodney W. Grimes scan = g->strip + 1; 171858f0484fSRodney W. Grimes do { 171958f0484fSRodney W. Grimes s = *scan++; 172058f0484fSRodney W. Grimes switch (OP(s)) { 172158f0484fSRodney W. Grimes case OCHAR: /* sequence member */ 172258f0484fSRodney W. Grimes if (newlen == 0) /* new sequence */ 172358f0484fSRodney W. Grimes newstart = scan - 1; 172458f0484fSRodney W. Grimes newlen++; 172558f0484fSRodney W. Grimes break; 172658f0484fSRodney W. Grimes case OPLUS_: /* things that don't break one */ 172758f0484fSRodney W. Grimes case OLPAREN: 172858f0484fSRodney W. Grimes case ORPAREN: 172958f0484fSRodney W. Grimes break; 173058f0484fSRodney W. Grimes case OQUEST_: /* things that must be skipped */ 173158f0484fSRodney W. Grimes case OCH_: 1732e6a886d8SDaniel C. Sobral offset = altoffset(scan, offset, mccs); 173358f0484fSRodney W. Grimes scan--; 173458f0484fSRodney W. Grimes do { 173558f0484fSRodney W. Grimes scan += OPND(s); 173658f0484fSRodney W. Grimes s = *scan; 173758f0484fSRodney W. Grimes /* assert() interferes w debug printouts */ 173858f0484fSRodney W. Grimes if (OP(s) != O_QUEST && OP(s) != O_CH && 173958f0484fSRodney W. Grimes OP(s) != OOR2) { 174058f0484fSRodney W. Grimes g->iflags |= BAD; 174158f0484fSRodney W. Grimes return; 174258f0484fSRodney W. Grimes } 174358f0484fSRodney W. Grimes } while (OP(s) != O_QUEST && OP(s) != O_CH); 17447fed38d0SPhilippe Charnier /* FALLTHROUGH */ 1745e6a886d8SDaniel C. Sobral case OBOW: /* things that break a sequence */ 1746e6a886d8SDaniel C. Sobral case OEOW: 1747e6a886d8SDaniel C. Sobral case OBOL: 1748e6a886d8SDaniel C. Sobral case OEOL: 1749e6a886d8SDaniel C. Sobral case O_QUEST: 1750e6a886d8SDaniel C. Sobral case O_CH: 1751e6a886d8SDaniel C. Sobral case OEND: 175258f0484fSRodney W. Grimes if (newlen > g->mlen) { /* ends one */ 175358f0484fSRodney W. Grimes start = newstart; 175458f0484fSRodney W. Grimes g->mlen = newlen; 1755e6a886d8SDaniel C. Sobral if (offset > -1) { 1756e6a886d8SDaniel C. Sobral g->moffset += offset; 1757e6a886d8SDaniel C. Sobral offset = newlen; 1758e6a886d8SDaniel C. Sobral } else 1759e6a886d8SDaniel C. Sobral g->moffset = offset; 1760e6a886d8SDaniel C. Sobral } else { 1761e6a886d8SDaniel C. Sobral if (offset > -1) 1762e6a886d8SDaniel C. Sobral offset += newlen; 176358f0484fSRodney W. Grimes } 176458f0484fSRodney W. Grimes newlen = 0; 176558f0484fSRodney W. Grimes break; 1766e6a886d8SDaniel C. Sobral case OANY: 1767e6a886d8SDaniel C. Sobral if (newlen > g->mlen) { /* ends one */ 1768e6a886d8SDaniel C. Sobral start = newstart; 1769e6a886d8SDaniel C. Sobral g->mlen = newlen; 1770e6a886d8SDaniel C. Sobral if (offset > -1) { 1771e6a886d8SDaniel C. Sobral g->moffset += offset; 1772e6a886d8SDaniel C. Sobral offset = newlen; 1773e6a886d8SDaniel C. Sobral } else 1774e6a886d8SDaniel C. Sobral g->moffset = offset; 1775e6a886d8SDaniel C. Sobral } else { 1776e6a886d8SDaniel C. Sobral if (offset > -1) 1777e6a886d8SDaniel C. Sobral offset += newlen; 1778e6a886d8SDaniel C. Sobral } 1779e6a886d8SDaniel C. Sobral if (offset > -1) 1780e6a886d8SDaniel C. Sobral offset++; 1781e6a886d8SDaniel C. Sobral newlen = 0; 1782e6a886d8SDaniel C. Sobral break; 1783e6a886d8SDaniel C. Sobral case OANYOF: /* may or may not invalidate offset */ 1784e6a886d8SDaniel C. Sobral /* First, everything as OANY */ 1785e6a886d8SDaniel C. Sobral if (newlen > g->mlen) { /* ends one */ 1786e6a886d8SDaniel C. Sobral start = newstart; 1787e6a886d8SDaniel C. Sobral g->mlen = newlen; 1788e6a886d8SDaniel C. Sobral if (offset > -1) { 1789e6a886d8SDaniel C. Sobral g->moffset += offset; 1790e6a886d8SDaniel C. Sobral offset = newlen; 1791e6a886d8SDaniel C. Sobral } else 1792e6a886d8SDaniel C. Sobral g->moffset = offset; 1793e6a886d8SDaniel C. Sobral } else { 1794e6a886d8SDaniel C. Sobral if (offset > -1) 1795e6a886d8SDaniel C. Sobral offset += newlen; 1796e6a886d8SDaniel C. Sobral } 1797e6a886d8SDaniel C. Sobral if (offset > -1) 1798e6a886d8SDaniel C. Sobral offset++; 1799e6a886d8SDaniel C. Sobral newlen = 0; 1800e6a886d8SDaniel C. Sobral /* And, now, if we found out we can't deal with 1801e6a886d8SDaniel C. Sobral * it, make offset = -1. 1802e6a886d8SDaniel C. Sobral */ 1803e6a886d8SDaniel C. Sobral if (mccs) 1804e6a886d8SDaniel C. Sobral offset = -1; 1805e6a886d8SDaniel C. Sobral break; 1806e6a886d8SDaniel C. Sobral default: 1807e6a886d8SDaniel C. Sobral /* Anything here makes it impossible or too hard 1808e6a886d8SDaniel C. Sobral * to calculate the offset -- so we give up; 1809e6a886d8SDaniel C. Sobral * save the last known good offset, in case the 1810e6a886d8SDaniel C. Sobral * must sequence doesn't occur later. 1811e6a886d8SDaniel C. Sobral */ 1812e6a886d8SDaniel C. Sobral if (newlen > g->mlen) { /* ends one */ 1813e6a886d8SDaniel C. Sobral start = newstart; 1814e6a886d8SDaniel C. Sobral g->mlen = newlen; 1815e6a886d8SDaniel C. Sobral if (offset > -1) 1816e6a886d8SDaniel C. Sobral g->moffset += offset; 1817e6a886d8SDaniel C. Sobral else 1818e6a886d8SDaniel C. Sobral g->moffset = offset; 1819e6a886d8SDaniel C. Sobral } 1820e6a886d8SDaniel C. Sobral offset = -1; 1821e6a886d8SDaniel C. Sobral newlen = 0; 1822e6a886d8SDaniel C. Sobral break; 182358f0484fSRodney W. Grimes } 182458f0484fSRodney W. Grimes } while (OP(s) != OEND); 182558f0484fSRodney W. Grimes 1826e6a886d8SDaniel C. Sobral if (g->mlen == 0) { /* there isn't one */ 1827e6a886d8SDaniel C. Sobral g->moffset = -1; 182858f0484fSRodney W. Grimes return; 1829e6a886d8SDaniel C. Sobral } 183058f0484fSRodney W. Grimes 183158f0484fSRodney W. Grimes /* turn it into a character string */ 183258f0484fSRodney W. Grimes g->must = malloc((size_t)g->mlen + 1); 183358f0484fSRodney W. Grimes if (g->must == NULL) { /* argh; just forget it */ 183458f0484fSRodney W. Grimes g->mlen = 0; 1835e6a886d8SDaniel C. Sobral g->moffset = -1; 183658f0484fSRodney W. Grimes return; 183758f0484fSRodney W. Grimes } 183858f0484fSRodney W. Grimes cp = g->must; 183958f0484fSRodney W. Grimes scan = start; 184058f0484fSRodney W. Grimes for (i = g->mlen; i > 0; i--) { 184158f0484fSRodney W. Grimes while (OP(s = *scan++) != OCHAR) 184258f0484fSRodney W. Grimes continue; 184358f0484fSRodney W. Grimes assert(cp < g->must + g->mlen); 184458f0484fSRodney W. Grimes *cp++ = (char)OPND(s); 184558f0484fSRodney W. Grimes } 184658f0484fSRodney W. Grimes assert(cp == g->must + g->mlen); 184758f0484fSRodney W. Grimes *cp++ = '\0'; /* just on general principles */ 184858f0484fSRodney W. Grimes } 184958f0484fSRodney W. Grimes 185058f0484fSRodney W. Grimes /* 1851e6a886d8SDaniel C. Sobral - altoffset - choose biggest offset among multiple choices 18529868274bSDaniel C. Sobral == static int altoffset(sop *scan, int offset, int mccs); 1853e6a886d8SDaniel C. Sobral * 1854e6a886d8SDaniel C. Sobral * Compute, recursively if necessary, the largest offset among multiple 1855e6a886d8SDaniel C. Sobral * re paths. 1856e6a886d8SDaniel C. Sobral */ 1857e6a886d8SDaniel C. Sobral static int 1858e6a886d8SDaniel C. Sobral altoffset(scan, offset, mccs) 1859e6a886d8SDaniel C. Sobral sop *scan; 1860e6a886d8SDaniel C. Sobral int offset; 1861e6a886d8SDaniel C. Sobral int mccs; 1862e6a886d8SDaniel C. Sobral { 1863e6a886d8SDaniel C. Sobral int largest; 1864e6a886d8SDaniel C. Sobral int try; 1865e6a886d8SDaniel C. Sobral sop s; 1866e6a886d8SDaniel C. Sobral 1867e6a886d8SDaniel C. Sobral /* If we gave up already on offsets, return */ 1868e6a886d8SDaniel C. Sobral if (offset == -1) 1869e6a886d8SDaniel C. Sobral return -1; 1870e6a886d8SDaniel C. Sobral 1871e6a886d8SDaniel C. Sobral largest = 0; 1872e6a886d8SDaniel C. Sobral try = 0; 1873e6a886d8SDaniel C. Sobral s = *scan++; 1874e6a886d8SDaniel C. Sobral while (OP(s) != O_QUEST && OP(s) != O_CH) { 1875e6a886d8SDaniel C. Sobral switch (OP(s)) { 1876e6a886d8SDaniel C. Sobral case OOR1: 1877e6a886d8SDaniel C. Sobral if (try > largest) 1878e6a886d8SDaniel C. Sobral largest = try; 1879e6a886d8SDaniel C. Sobral try = 0; 1880e6a886d8SDaniel C. Sobral break; 1881e6a886d8SDaniel C. Sobral case OQUEST_: 1882e6a886d8SDaniel C. Sobral case OCH_: 1883e6a886d8SDaniel C. Sobral try = altoffset(scan, try, mccs); 1884e6a886d8SDaniel C. Sobral if (try == -1) 1885e6a886d8SDaniel C. Sobral return -1; 1886e6a886d8SDaniel C. Sobral scan--; 1887e6a886d8SDaniel C. Sobral do { 1888e6a886d8SDaniel C. Sobral scan += OPND(s); 1889e6a886d8SDaniel C. Sobral s = *scan; 1890e6a886d8SDaniel C. Sobral if (OP(s) != O_QUEST && OP(s) != O_CH && 1891e6a886d8SDaniel C. Sobral OP(s) != OOR2) 1892e6a886d8SDaniel C. Sobral return -1; 1893e6a886d8SDaniel C. Sobral } while (OP(s) != O_QUEST && OP(s) != O_CH); 18948f9e434fSDaniel C. Sobral /* We must skip to the next position, or we'll 18958f9e434fSDaniel C. Sobral * leave altoffset() too early. 18968f9e434fSDaniel C. Sobral */ 18978f9e434fSDaniel C. Sobral scan++; 1898e6a886d8SDaniel C. Sobral break; 1899e6a886d8SDaniel C. Sobral case OANYOF: 1900e6a886d8SDaniel C. Sobral if (mccs) 1901e6a886d8SDaniel C. Sobral return -1; 1902e6a886d8SDaniel C. Sobral case OCHAR: 1903e6a886d8SDaniel C. Sobral case OANY: 1904e6a886d8SDaniel C. Sobral try++; 1905e6a886d8SDaniel C. Sobral case OBOW: 1906e6a886d8SDaniel C. Sobral case OEOW: 1907e6a886d8SDaniel C. Sobral case OLPAREN: 1908e6a886d8SDaniel C. Sobral case ORPAREN: 1909e6a886d8SDaniel C. Sobral case OOR2: 1910e6a886d8SDaniel C. Sobral break; 1911e6a886d8SDaniel C. Sobral default: 1912e6a886d8SDaniel C. Sobral try = -1; 1913e6a886d8SDaniel C. Sobral break; 1914e6a886d8SDaniel C. Sobral } 1915e6a886d8SDaniel C. Sobral if (try == -1) 1916e6a886d8SDaniel C. Sobral return -1; 1917e6a886d8SDaniel C. Sobral s = *scan++; 1918e6a886d8SDaniel C. Sobral } 1919e6a886d8SDaniel C. Sobral 1920e6a886d8SDaniel C. Sobral if (try > largest) 1921e6a886d8SDaniel C. Sobral largest = try; 1922e6a886d8SDaniel C. Sobral 1923e6a886d8SDaniel C. Sobral return largest+offset; 1924e6a886d8SDaniel C. Sobral } 1925e6a886d8SDaniel C. Sobral 1926e6a886d8SDaniel C. Sobral /* 19276049d9f0SDaniel C. Sobral - computejumps - compute char jumps for BM scan 19288fb3f3f6SDavid E. O'Brien == static void computejumps(struct parse *p, struct re_guts *g); 19296049d9f0SDaniel C. Sobral * 19306049d9f0SDaniel C. Sobral * This algorithm assumes g->must exists and is has size greater than 19316049d9f0SDaniel C. Sobral * zero. It's based on the algorithm found on Computer Algorithms by 19326049d9f0SDaniel C. Sobral * Sara Baase. 19336049d9f0SDaniel C. Sobral * 19346049d9f0SDaniel C. Sobral * A char jump is the number of characters one needs to jump based on 19356049d9f0SDaniel C. Sobral * the value of the character from the text that was mismatched. 19366049d9f0SDaniel C. Sobral */ 19376049d9f0SDaniel C. Sobral static void 19386049d9f0SDaniel C. Sobral computejumps(p, g) 19396049d9f0SDaniel C. Sobral struct parse *p; 19406049d9f0SDaniel C. Sobral struct re_guts *g; 19416049d9f0SDaniel C. Sobral { 19426049d9f0SDaniel C. Sobral int ch; 19436049d9f0SDaniel C. Sobral int mindex; 19446049d9f0SDaniel C. Sobral 19456049d9f0SDaniel C. Sobral /* Avoid making errors worse */ 19466049d9f0SDaniel C. Sobral if (p->error != 0) 19476049d9f0SDaniel C. Sobral return; 19486049d9f0SDaniel C. Sobral 1949517bffcaSDaniel C. Sobral g->charjump = (int*) malloc((NC + 1) * sizeof(int)); 19506049d9f0SDaniel C. Sobral if (g->charjump == NULL) /* Not a fatal error */ 19516049d9f0SDaniel C. Sobral return; 1952c5e125bbSDaniel C. Sobral /* Adjust for signed chars, if necessary */ 1953c5e125bbSDaniel C. Sobral g->charjump = &g->charjump[-(CHAR_MIN)]; 19546049d9f0SDaniel C. Sobral 19556049d9f0SDaniel C. Sobral /* If the character does not exist in the pattern, the jump 19566049d9f0SDaniel C. Sobral * is equal to the number of characters in the pattern. 19576049d9f0SDaniel C. Sobral */ 1958c5e125bbSDaniel C. Sobral for (ch = CHAR_MIN; ch < (CHAR_MAX + 1); ch++) 19596049d9f0SDaniel C. Sobral g->charjump[ch] = g->mlen; 19606049d9f0SDaniel C. Sobral 19616049d9f0SDaniel C. Sobral /* If the character does exist, compute the jump that would 19626049d9f0SDaniel C. Sobral * take us to the last character in the pattern equal to it 19636049d9f0SDaniel C. Sobral * (notice that we match right to left, so that last character 19646049d9f0SDaniel C. Sobral * is the first one that would be matched). 19656049d9f0SDaniel C. Sobral */ 19666049d9f0SDaniel C. Sobral for (mindex = 0; mindex < g->mlen; mindex++) 1967c5e125bbSDaniel C. Sobral g->charjump[g->must[mindex]] = g->mlen - mindex - 1; 19686049d9f0SDaniel C. Sobral } 19696049d9f0SDaniel C. Sobral 19706049d9f0SDaniel C. Sobral /* 19716049d9f0SDaniel C. Sobral - computematchjumps - compute match jumps for BM scan 19728fb3f3f6SDavid E. O'Brien == static void computematchjumps(struct parse *p, struct re_guts *g); 19736049d9f0SDaniel C. Sobral * 19746049d9f0SDaniel C. Sobral * This algorithm assumes g->must exists and is has size greater than 19756049d9f0SDaniel C. Sobral * zero. It's based on the algorithm found on Computer Algorithms by 19766049d9f0SDaniel C. Sobral * Sara Baase. 19776049d9f0SDaniel C. Sobral * 19786049d9f0SDaniel C. Sobral * A match jump is the number of characters one needs to advance based 19796049d9f0SDaniel C. Sobral * on the already-matched suffix. 19806049d9f0SDaniel C. Sobral * Notice that all values here are minus (g->mlen-1), because of the way 19816049d9f0SDaniel C. Sobral * the search algorithm works. 19826049d9f0SDaniel C. Sobral */ 19836049d9f0SDaniel C. Sobral static void 19846049d9f0SDaniel C. Sobral computematchjumps(p, g) 19856049d9f0SDaniel C. Sobral struct parse *p; 19866049d9f0SDaniel C. Sobral struct re_guts *g; 19876049d9f0SDaniel C. Sobral { 19886049d9f0SDaniel C. Sobral int mindex; /* General "must" iterator */ 19896049d9f0SDaniel C. Sobral int suffix; /* Keeps track of matching suffix */ 19906049d9f0SDaniel C. Sobral int ssuffix; /* Keeps track of suffixes' suffix */ 19916049d9f0SDaniel C. Sobral int* pmatches; /* pmatches[k] points to the next i 19926049d9f0SDaniel C. Sobral * such that i+1...mlen is a substring 19936049d9f0SDaniel C. Sobral * of k+1...k+mlen-i-1 19946049d9f0SDaniel C. Sobral */ 19956049d9f0SDaniel C. Sobral 19966049d9f0SDaniel C. Sobral /* Avoid making errors worse */ 19976049d9f0SDaniel C. Sobral if (p->error != 0) 19986049d9f0SDaniel C. Sobral return; 19996049d9f0SDaniel C. Sobral 2000517bffcaSDaniel C. Sobral pmatches = (int*) malloc(g->mlen * sizeof(unsigned int)); 20016049d9f0SDaniel C. Sobral if (pmatches == NULL) { 20026049d9f0SDaniel C. Sobral g->matchjump = NULL; 20036049d9f0SDaniel C. Sobral return; 20046049d9f0SDaniel C. Sobral } 20056049d9f0SDaniel C. Sobral 2006517bffcaSDaniel C. Sobral g->matchjump = (int*) malloc(g->mlen * sizeof(unsigned int)); 20076049d9f0SDaniel C. Sobral if (g->matchjump == NULL) /* Not a fatal error */ 20086049d9f0SDaniel C. Sobral return; 20096049d9f0SDaniel C. Sobral 20106049d9f0SDaniel C. Sobral /* Set maximum possible jump for each character in the pattern */ 20116049d9f0SDaniel C. Sobral for (mindex = 0; mindex < g->mlen; mindex++) 20126049d9f0SDaniel C. Sobral g->matchjump[mindex] = 2*g->mlen - mindex - 1; 20136049d9f0SDaniel C. Sobral 20146049d9f0SDaniel C. Sobral /* Compute pmatches[] */ 20156049d9f0SDaniel C. Sobral for (mindex = g->mlen - 1, suffix = g->mlen; mindex >= 0; 20166049d9f0SDaniel C. Sobral mindex--, suffix--) { 20176049d9f0SDaniel C. Sobral pmatches[mindex] = suffix; 20186049d9f0SDaniel C. Sobral 20196049d9f0SDaniel C. Sobral /* If a mismatch is found, interrupting the substring, 20206049d9f0SDaniel C. Sobral * compute the matchjump for that position. If no 20216049d9f0SDaniel C. Sobral * mismatch is found, then a text substring mismatched 20226049d9f0SDaniel C. Sobral * against the suffix will also mismatch against the 20236049d9f0SDaniel C. Sobral * substring. 20246049d9f0SDaniel C. Sobral */ 20256049d9f0SDaniel C. Sobral while (suffix < g->mlen 20266049d9f0SDaniel C. Sobral && g->must[mindex] != g->must[suffix]) { 20276049d9f0SDaniel C. Sobral g->matchjump[suffix] = MIN(g->matchjump[suffix], 20286049d9f0SDaniel C. Sobral g->mlen - mindex - 1); 20296049d9f0SDaniel C. Sobral suffix = pmatches[suffix]; 20306049d9f0SDaniel C. Sobral } 20316049d9f0SDaniel C. Sobral } 20326049d9f0SDaniel C. Sobral 20336049d9f0SDaniel C. Sobral /* Compute the matchjump up to the last substring found to jump 20346049d9f0SDaniel C. Sobral * to the beginning of the largest must pattern prefix matching 20356049d9f0SDaniel C. Sobral * it's own suffix. 20366049d9f0SDaniel C. Sobral */ 20376049d9f0SDaniel C. Sobral for (mindex = 0; mindex <= suffix; mindex++) 20386049d9f0SDaniel C. Sobral g->matchjump[mindex] = MIN(g->matchjump[mindex], 20396049d9f0SDaniel C. Sobral g->mlen + suffix - mindex); 20406049d9f0SDaniel C. Sobral 20416049d9f0SDaniel C. Sobral ssuffix = pmatches[suffix]; 20426049d9f0SDaniel C. Sobral while (suffix < g->mlen) { 20439868274bSDaniel C. Sobral while (suffix <= ssuffix && suffix < g->mlen) { 20446049d9f0SDaniel C. Sobral g->matchjump[suffix] = MIN(g->matchjump[suffix], 20456049d9f0SDaniel C. Sobral g->mlen + ssuffix - suffix); 20466049d9f0SDaniel C. Sobral suffix++; 20476049d9f0SDaniel C. Sobral } 20488e2f75b8SDaniel C. Sobral if (suffix < g->mlen) 20496049d9f0SDaniel C. Sobral ssuffix = pmatches[ssuffix]; 20506049d9f0SDaniel C. Sobral } 20516049d9f0SDaniel C. Sobral 20526049d9f0SDaniel C. Sobral free(pmatches); 20536049d9f0SDaniel C. Sobral } 20546049d9f0SDaniel C. Sobral 20556049d9f0SDaniel C. Sobral /* 205658f0484fSRodney W. Grimes - pluscount - count + nesting 20578fb3f3f6SDavid E. O'Brien == static sopno pluscount(struct parse *p, struct re_guts *g); 205858f0484fSRodney W. Grimes */ 205958f0484fSRodney W. Grimes static sopno /* nesting depth */ 206058f0484fSRodney W. Grimes pluscount(p, g) 206158f0484fSRodney W. Grimes struct parse *p; 20628fb3f3f6SDavid E. O'Brien struct re_guts *g; 206358f0484fSRodney W. Grimes { 20648fb3f3f6SDavid E. O'Brien sop *scan; 20658fb3f3f6SDavid E. O'Brien sop s; 20668fb3f3f6SDavid E. O'Brien sopno plusnest = 0; 20678fb3f3f6SDavid E. O'Brien sopno maxnest = 0; 206858f0484fSRodney W. Grimes 206958f0484fSRodney W. Grimes if (p->error != 0) 207058f0484fSRodney W. Grimes return(0); /* there may not be an OEND */ 207158f0484fSRodney W. Grimes 207258f0484fSRodney W. Grimes scan = g->strip + 1; 207358f0484fSRodney W. Grimes do { 207458f0484fSRodney W. Grimes s = *scan++; 207558f0484fSRodney W. Grimes switch (OP(s)) { 207658f0484fSRodney W. Grimes case OPLUS_: 207758f0484fSRodney W. Grimes plusnest++; 207858f0484fSRodney W. Grimes break; 207958f0484fSRodney W. Grimes case O_PLUS: 208058f0484fSRodney W. Grimes if (plusnest > maxnest) 208158f0484fSRodney W. Grimes maxnest = plusnest; 208258f0484fSRodney W. Grimes plusnest--; 208358f0484fSRodney W. Grimes break; 208458f0484fSRodney W. Grimes } 208558f0484fSRodney W. Grimes } while (OP(s) != OEND); 208658f0484fSRodney W. Grimes if (plusnest != 0) 208758f0484fSRodney W. Grimes g->iflags |= BAD; 208858f0484fSRodney W. Grimes return(maxnest); 208958f0484fSRodney W. Grimes } 2090