158f0484fSRodney W. Grimes /*- 258f0484fSRodney W. Grimes * Copyright (c) 1992, 1993, 1994 Henry Spencer. 358f0484fSRodney W. Grimes * Copyright (c) 1992, 1993, 1994 458f0484fSRodney W. Grimes * The Regents of the University of California. All rights reserved. 558f0484fSRodney W. Grimes * 658f0484fSRodney W. Grimes * This code is derived from software contributed to Berkeley by 758f0484fSRodney W. Grimes * Henry Spencer. 858f0484fSRodney W. Grimes * 958f0484fSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 1058f0484fSRodney W. Grimes * modification, are permitted provided that the following conditions 1158f0484fSRodney W. Grimes * are met: 1258f0484fSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 1358f0484fSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 1458f0484fSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 1558f0484fSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 1658f0484fSRodney W. Grimes * documentation and/or other materials provided with the distribution. 1758f0484fSRodney W. Grimes * 3. All advertising materials mentioning features or use of this software 1858f0484fSRodney W. Grimes * must display the following acknowledgement: 1958f0484fSRodney W. Grimes * This product includes software developed by the University of 2058f0484fSRodney W. Grimes * California, Berkeley and its contributors. 2158f0484fSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 2258f0484fSRodney W. Grimes * may be used to endorse or promote products derived from this software 2358f0484fSRodney W. Grimes * without specific prior written permission. 2458f0484fSRodney W. Grimes * 2558f0484fSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 2658f0484fSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2758f0484fSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2858f0484fSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 2958f0484fSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 3058f0484fSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 3158f0484fSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 3258f0484fSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 3358f0484fSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 3458f0484fSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3558f0484fSRodney W. Grimes * SUCH DAMAGE. 3658f0484fSRodney W. Grimes * 3758f0484fSRodney W. Grimes * @(#)regcomp.c 8.5 (Berkeley) 3/20/94 3858f0484fSRodney W. Grimes */ 3958f0484fSRodney W. Grimes 4058f0484fSRodney W. Grimes #if defined(LIBC_SCCS) && !defined(lint) 4158f0484fSRodney W. Grimes static char sccsid[] = "@(#)regcomp.c 8.5 (Berkeley) 3/20/94"; 4258f0484fSRodney W. Grimes #endif /* LIBC_SCCS and not lint */ 4358f0484fSRodney W. Grimes 4458f0484fSRodney W. Grimes #include <sys/types.h> 4558f0484fSRodney W. Grimes #include <stdio.h> 4658f0484fSRodney W. Grimes #include <string.h> 4758f0484fSRodney W. Grimes #include <ctype.h> 4858f0484fSRodney W. Grimes #include <limits.h> 4958f0484fSRodney W. Grimes #include <stdlib.h> 5058f0484fSRodney W. Grimes #include <regex.h> 5158f0484fSRodney W. Grimes 52c61cea72SAndrey A. Chernov #include "collate.h" 53c61cea72SAndrey A. Chernov 5458f0484fSRodney W. Grimes #include "utils.h" 5558f0484fSRodney W. Grimes #include "regex2.h" 5658f0484fSRodney W. Grimes 5758f0484fSRodney W. Grimes #include "cclass.h" 5858f0484fSRodney W. Grimes #include "cname.h" 5958f0484fSRodney W. Grimes 6058f0484fSRodney W. Grimes /* 6158f0484fSRodney W. Grimes * parse structure, passed up and down to avoid global variables and 6258f0484fSRodney W. Grimes * other clumsinesses 6358f0484fSRodney W. Grimes */ 6458f0484fSRodney W. Grimes struct parse { 6558f0484fSRodney W. Grimes char *next; /* next character in RE */ 6658f0484fSRodney W. Grimes char *end; /* end of string (-> NUL normally) */ 6758f0484fSRodney W. Grimes int error; /* has an error been seen? */ 6858f0484fSRodney W. Grimes sop *strip; /* malloced strip */ 6958f0484fSRodney W. Grimes sopno ssize; /* malloced strip size (allocated) */ 7058f0484fSRodney W. Grimes sopno slen; /* malloced strip length (used) */ 7158f0484fSRodney W. Grimes int ncsalloc; /* number of csets allocated */ 7258f0484fSRodney W. Grimes struct re_guts *g; 7358f0484fSRodney W. Grimes # define NPAREN 10 /* we need to remember () 1-9 for back refs */ 7458f0484fSRodney W. Grimes sopno pbegin[NPAREN]; /* -> ( ([0] unused) */ 7558f0484fSRodney W. Grimes sopno pend[NPAREN]; /* -> ) ([0] unused) */ 7658f0484fSRodney W. Grimes }; 7758f0484fSRodney W. Grimes 7858f0484fSRodney W. Grimes /* ========= begin header generated by ./mkh ========= */ 7958f0484fSRodney W. Grimes #ifdef __cplusplus 8058f0484fSRodney W. Grimes extern "C" { 8158f0484fSRodney W. Grimes #endif 8258f0484fSRodney W. Grimes 8358f0484fSRodney W. Grimes /* === regcomp.c === */ 8458f0484fSRodney W. Grimes static void p_ere __P((struct parse *p, int stop)); 8558f0484fSRodney W. Grimes static void p_ere_exp __P((struct parse *p)); 8658f0484fSRodney W. Grimes static void p_str __P((struct parse *p)); 8758f0484fSRodney W. Grimes static void p_bre __P((struct parse *p, int end1, int end2)); 8858f0484fSRodney W. Grimes static int p_simp_re __P((struct parse *p, int starordinary)); 8958f0484fSRodney W. Grimes static int p_count __P((struct parse *p)); 9058f0484fSRodney W. Grimes static void p_bracket __P((struct parse *p)); 9158f0484fSRodney W. Grimes static void p_b_term __P((struct parse *p, cset *cs)); 9258f0484fSRodney W. Grimes static void p_b_cclass __P((struct parse *p, cset *cs)); 9358f0484fSRodney W. Grimes static void p_b_eclass __P((struct parse *p, cset *cs)); 9458f0484fSRodney W. Grimes static char p_b_symbol __P((struct parse *p)); 9558f0484fSRodney W. Grimes static char p_b_coll_elem __P((struct parse *p, int endc)); 9658f0484fSRodney W. Grimes static char othercase __P((int ch)); 9758f0484fSRodney W. Grimes static void bothcases __P((struct parse *p, int ch)); 9858f0484fSRodney W. Grimes static void ordinary __P((struct parse *p, int ch)); 9958f0484fSRodney W. Grimes static void nonnewline __P((struct parse *p)); 10058f0484fSRodney W. Grimes static void repeat __P((struct parse *p, sopno start, int from, int to)); 10158f0484fSRodney W. Grimes static int seterr __P((struct parse *p, int e)); 10258f0484fSRodney W. Grimes static cset *allocset __P((struct parse *p)); 10358f0484fSRodney W. Grimes static void freeset __P((struct parse *p, cset *cs)); 10458f0484fSRodney W. Grimes static int freezeset __P((struct parse *p, cset *cs)); 10558f0484fSRodney W. Grimes static int firstch __P((struct parse *p, cset *cs)); 10658f0484fSRodney W. Grimes static int nch __P((struct parse *p, cset *cs)); 10758f0484fSRodney W. Grimes static void mcadd __P((struct parse *p, cset *cs, char *cp)); 10851295a4dSJordan K. Hubbard #if used 10958f0484fSRodney W. Grimes static void mcsub __P((cset *cs, char *cp)); 11058f0484fSRodney W. Grimes static int mcin __P((cset *cs, char *cp)); 11158f0484fSRodney W. Grimes static char *mcfind __P((cset *cs, char *cp)); 11251295a4dSJordan K. Hubbard #endif 11358f0484fSRodney W. Grimes static void mcinvert __P((struct parse *p, cset *cs)); 11458f0484fSRodney W. Grimes static void mccase __P((struct parse *p, cset *cs)); 11558f0484fSRodney W. Grimes static int isinsets __P((struct re_guts *g, int c)); 11658f0484fSRodney W. Grimes static int samesets __P((struct re_guts *g, int c1, int c2)); 11758f0484fSRodney W. Grimes static void categorize __P((struct parse *p, struct re_guts *g)); 11858f0484fSRodney W. Grimes static sopno dupl __P((struct parse *p, sopno start, sopno finish)); 11958f0484fSRodney W. Grimes static void doemit __P((struct parse *p, sop op, size_t opnd)); 12058f0484fSRodney W. Grimes static void doinsert __P((struct parse *p, sop op, size_t opnd, sopno pos)); 12158f0484fSRodney W. Grimes static void dofwd __P((struct parse *p, sopno pos, sop value)); 12258f0484fSRodney W. Grimes static void enlarge __P((struct parse *p, sopno size)); 12358f0484fSRodney W. Grimes static void stripsnug __P((struct parse *p, struct re_guts *g)); 12458f0484fSRodney W. Grimes static void findmust __P((struct parse *p, struct re_guts *g)); 12558f0484fSRodney W. Grimes static sopno pluscount __P((struct parse *p, struct re_guts *g)); 12658f0484fSRodney W. Grimes 12758f0484fSRodney W. Grimes #ifdef __cplusplus 12858f0484fSRodney W. Grimes } 12958f0484fSRodney W. Grimes #endif 13058f0484fSRodney W. Grimes /* ========= end header generated by ./mkh ========= */ 13158f0484fSRodney W. Grimes 13258f0484fSRodney W. Grimes static char nuls[10]; /* place to point scanner in event of error */ 13358f0484fSRodney W. Grimes 13458f0484fSRodney W. Grimes /* 13558f0484fSRodney W. Grimes * macros for use with parse structure 13658f0484fSRodney W. Grimes * BEWARE: these know that the parse structure is named `p' !!! 13758f0484fSRodney W. Grimes */ 13858f0484fSRodney W. Grimes #define PEEK() (*p->next) 13958f0484fSRodney W. Grimes #define PEEK2() (*(p->next+1)) 14058f0484fSRodney W. Grimes #define MORE() (p->next < p->end) 14158f0484fSRodney W. Grimes #define MORE2() (p->next+1 < p->end) 14258f0484fSRodney W. Grimes #define SEE(c) (MORE() && PEEK() == (c)) 14358f0484fSRodney W. Grimes #define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b)) 14458f0484fSRodney W. Grimes #define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0) 14558f0484fSRodney W. Grimes #define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0) 14658f0484fSRodney W. Grimes #define NEXT() (p->next++) 14758f0484fSRodney W. Grimes #define NEXT2() (p->next += 2) 14858f0484fSRodney W. Grimes #define NEXTn(n) (p->next += (n)) 14958f0484fSRodney W. Grimes #define GETNEXT() (*p->next++) 15058f0484fSRodney W. Grimes #define SETERROR(e) seterr(p, (e)) 15158f0484fSRodney W. Grimes #define REQUIRE(co, e) ((co) || SETERROR(e)) 15258f0484fSRodney W. Grimes #define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e)) 15358f0484fSRodney W. Grimes #define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e)) 15458f0484fSRodney W. Grimes #define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e)) 15558f0484fSRodney W. Grimes #define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd)) 15658f0484fSRodney W. Grimes #define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos) 15758f0484fSRodney W. Grimes #define AHEAD(pos) dofwd(p, pos, HERE()-(pos)) 15858f0484fSRodney W. Grimes #define ASTERN(sop, pos) EMIT(sop, HERE()-pos) 15958f0484fSRodney W. Grimes #define HERE() (p->slen) 16058f0484fSRodney W. Grimes #define THERE() (p->slen - 1) 16158f0484fSRodney W. Grimes #define THERETHERE() (p->slen - 2) 16258f0484fSRodney W. Grimes #define DROP(n) (p->slen -= (n)) 16358f0484fSRodney W. Grimes 16458f0484fSRodney W. Grimes #ifndef NDEBUG 16558f0484fSRodney W. Grimes static int never = 0; /* for use in asserts; shuts lint up */ 16658f0484fSRodney W. Grimes #else 16758f0484fSRodney W. Grimes #define never 0 /* some <assert.h>s have bugs too */ 16858f0484fSRodney W. Grimes #endif 16958f0484fSRodney W. Grimes 17058f0484fSRodney W. Grimes /* 17158f0484fSRodney W. Grimes - regcomp - interface for parser and compilation 17258f0484fSRodney W. Grimes = extern int regcomp(regex_t *, const char *, int); 17358f0484fSRodney W. Grimes = #define REG_BASIC 0000 17458f0484fSRodney W. Grimes = #define REG_EXTENDED 0001 17558f0484fSRodney W. Grimes = #define REG_ICASE 0002 17658f0484fSRodney W. Grimes = #define REG_NOSUB 0004 17758f0484fSRodney W. Grimes = #define REG_NEWLINE 0010 17858f0484fSRodney W. Grimes = #define REG_NOSPEC 0020 17958f0484fSRodney W. Grimes = #define REG_PEND 0040 18058f0484fSRodney W. Grimes = #define REG_DUMP 0200 18158f0484fSRodney W. Grimes */ 18258f0484fSRodney W. Grimes int /* 0 success, otherwise REG_something */ 18358f0484fSRodney W. Grimes regcomp(preg, pattern, cflags) 18458f0484fSRodney W. Grimes regex_t *preg; 18558f0484fSRodney W. Grimes const char *pattern; 18658f0484fSRodney W. Grimes int cflags; 18758f0484fSRodney W. Grimes { 18858f0484fSRodney W. Grimes struct parse pa; 18958f0484fSRodney W. Grimes register struct re_guts *g; 19058f0484fSRodney W. Grimes register struct parse *p = &pa; 19158f0484fSRodney W. Grimes register int i; 19258f0484fSRodney W. Grimes register size_t len; 19358f0484fSRodney W. Grimes #ifdef REDEBUG 19458f0484fSRodney W. Grimes # define GOODFLAGS(f) (f) 19558f0484fSRodney W. Grimes #else 19658f0484fSRodney W. Grimes # define GOODFLAGS(f) ((f)&~REG_DUMP) 19758f0484fSRodney W. Grimes #endif 19858f0484fSRodney W. Grimes 19958f0484fSRodney W. Grimes cflags = GOODFLAGS(cflags); 20058f0484fSRodney W. Grimes if ((cflags®_EXTENDED) && (cflags®_NOSPEC)) 20158f0484fSRodney W. Grimes return(REG_INVARG); 20258f0484fSRodney W. Grimes 20358f0484fSRodney W. Grimes if (cflags®_PEND) { 20458f0484fSRodney W. Grimes if (preg->re_endp < pattern) 20558f0484fSRodney W. Grimes return(REG_INVARG); 20658f0484fSRodney W. Grimes len = preg->re_endp - pattern; 20758f0484fSRodney W. Grimes } else 20858f0484fSRodney W. Grimes len = strlen((char *)pattern); 20958f0484fSRodney W. Grimes 21058f0484fSRodney W. Grimes /* do the mallocs early so failure handling is easy */ 21158f0484fSRodney W. Grimes g = (struct re_guts *)malloc(sizeof(struct re_guts) + 21258f0484fSRodney W. Grimes (NC-1)*sizeof(cat_t)); 21358f0484fSRodney W. Grimes if (g == NULL) 21458f0484fSRodney W. Grimes return(REG_ESPACE); 21558f0484fSRodney W. Grimes p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */ 21658f0484fSRodney W. Grimes p->strip = (sop *)malloc(p->ssize * sizeof(sop)); 21758f0484fSRodney W. Grimes p->slen = 0; 21858f0484fSRodney W. Grimes if (p->strip == NULL) { 21958f0484fSRodney W. Grimes free((char *)g); 22058f0484fSRodney W. Grimes return(REG_ESPACE); 22158f0484fSRodney W. Grimes } 22258f0484fSRodney W. Grimes 22358f0484fSRodney W. Grimes /* set things up */ 22458f0484fSRodney W. Grimes p->g = g; 22558f0484fSRodney W. Grimes p->next = (char *)pattern; /* convenience; we do not modify it */ 22658f0484fSRodney W. Grimes p->end = p->next + len; 22758f0484fSRodney W. Grimes p->error = 0; 22858f0484fSRodney W. Grimes p->ncsalloc = 0; 22958f0484fSRodney W. Grimes for (i = 0; i < NPAREN; i++) { 23058f0484fSRodney W. Grimes p->pbegin[i] = 0; 23158f0484fSRodney W. Grimes p->pend[i] = 0; 23258f0484fSRodney W. Grimes } 23358f0484fSRodney W. Grimes g->csetsize = NC; 23458f0484fSRodney W. Grimes g->sets = NULL; 23558f0484fSRodney W. Grimes g->setbits = NULL; 23658f0484fSRodney W. Grimes g->ncsets = 0; 23758f0484fSRodney W. Grimes g->cflags = cflags; 23858f0484fSRodney W. Grimes g->iflags = 0; 23958f0484fSRodney W. Grimes g->nbol = 0; 24058f0484fSRodney W. Grimes g->neol = 0; 24158f0484fSRodney W. Grimes g->must = NULL; 24258f0484fSRodney W. Grimes g->mlen = 0; 24358f0484fSRodney W. Grimes g->nsub = 0; 24458f0484fSRodney W. Grimes g->ncategories = 1; /* category 0 is "everything else" */ 24558f0484fSRodney W. Grimes g->categories = &g->catspace[-(CHAR_MIN)]; 24658f0484fSRodney W. Grimes (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t)); 24758f0484fSRodney W. Grimes g->backrefs = 0; 24858f0484fSRodney W. Grimes 24958f0484fSRodney W. Grimes /* do it */ 25058f0484fSRodney W. Grimes EMIT(OEND, 0); 25158f0484fSRodney W. Grimes g->firststate = THERE(); 25258f0484fSRodney W. Grimes if (cflags®_EXTENDED) 25358f0484fSRodney W. Grimes p_ere(p, OUT); 25458f0484fSRodney W. Grimes else if (cflags®_NOSPEC) 25558f0484fSRodney W. Grimes p_str(p); 25658f0484fSRodney W. Grimes else 25758f0484fSRodney W. Grimes p_bre(p, OUT, OUT); 25858f0484fSRodney W. Grimes EMIT(OEND, 0); 25958f0484fSRodney W. Grimes g->laststate = THERE(); 26058f0484fSRodney W. Grimes 26158f0484fSRodney W. Grimes /* tidy up loose ends and fill things in */ 26258f0484fSRodney W. Grimes categorize(p, g); 26358f0484fSRodney W. Grimes stripsnug(p, g); 26458f0484fSRodney W. Grimes findmust(p, g); 26558f0484fSRodney W. Grimes g->nplus = pluscount(p, g); 26658f0484fSRodney W. Grimes g->magic = MAGIC2; 26758f0484fSRodney W. Grimes preg->re_nsub = g->nsub; 26858f0484fSRodney W. Grimes preg->re_g = g; 26958f0484fSRodney W. Grimes preg->re_magic = MAGIC1; 27058f0484fSRodney W. Grimes #ifndef REDEBUG 27158f0484fSRodney W. Grimes /* not debugging, so can't rely on the assert() in regexec() */ 27258f0484fSRodney W. Grimes if (g->iflags&BAD) 27358f0484fSRodney W. Grimes SETERROR(REG_ASSERT); 27458f0484fSRodney W. Grimes #endif 27558f0484fSRodney W. Grimes 27658f0484fSRodney W. Grimes /* win or lose, we're done */ 27758f0484fSRodney W. Grimes if (p->error != 0) /* lose */ 27858f0484fSRodney W. Grimes regfree(preg); 27958f0484fSRodney W. Grimes return(p->error); 28058f0484fSRodney W. Grimes } 28158f0484fSRodney W. Grimes 28258f0484fSRodney W. Grimes /* 28358f0484fSRodney W. Grimes - p_ere - ERE parser top level, concatenation and alternation 28458f0484fSRodney W. Grimes == static void p_ere(register struct parse *p, int stop); 28558f0484fSRodney W. Grimes */ 28658f0484fSRodney W. Grimes static void 28758f0484fSRodney W. Grimes p_ere(p, stop) 28858f0484fSRodney W. Grimes register struct parse *p; 28958f0484fSRodney W. Grimes int stop; /* character this ERE should end at */ 29058f0484fSRodney W. Grimes { 29158f0484fSRodney W. Grimes register char c; 29258f0484fSRodney W. Grimes register sopno prevback; 29358f0484fSRodney W. Grimes register sopno prevfwd; 29458f0484fSRodney W. Grimes register sopno conc; 29558f0484fSRodney W. Grimes register int first = 1; /* is this the first alternative? */ 29658f0484fSRodney W. Grimes 29758f0484fSRodney W. Grimes for (;;) { 29858f0484fSRodney W. Grimes /* do a bunch of concatenated expressions */ 29958f0484fSRodney W. Grimes conc = HERE(); 30058f0484fSRodney W. Grimes while (MORE() && (c = PEEK()) != '|' && c != stop) 30158f0484fSRodney W. Grimes p_ere_exp(p); 30251295a4dSJordan K. Hubbard (void)REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */ 30358f0484fSRodney W. Grimes 30458f0484fSRodney W. Grimes if (!EAT('|')) 30558f0484fSRodney W. Grimes break; /* NOTE BREAK OUT */ 30658f0484fSRodney W. Grimes 30758f0484fSRodney W. Grimes if (first) { 30858f0484fSRodney W. Grimes INSERT(OCH_, conc); /* offset is wrong */ 30958f0484fSRodney W. Grimes prevfwd = conc; 31058f0484fSRodney W. Grimes prevback = conc; 31158f0484fSRodney W. Grimes first = 0; 31258f0484fSRodney W. Grimes } 31358f0484fSRodney W. Grimes ASTERN(OOR1, prevback); 31458f0484fSRodney W. Grimes prevback = THERE(); 31558f0484fSRodney W. Grimes AHEAD(prevfwd); /* fix previous offset */ 31658f0484fSRodney W. Grimes prevfwd = HERE(); 31758f0484fSRodney W. Grimes EMIT(OOR2, 0); /* offset is very wrong */ 31858f0484fSRodney W. Grimes } 31958f0484fSRodney W. Grimes 32058f0484fSRodney W. Grimes if (!first) { /* tail-end fixups */ 32158f0484fSRodney W. Grimes AHEAD(prevfwd); 32258f0484fSRodney W. Grimes ASTERN(O_CH, prevback); 32358f0484fSRodney W. Grimes } 32458f0484fSRodney W. Grimes 32558f0484fSRodney W. Grimes assert(!MORE() || SEE(stop)); 32658f0484fSRodney W. Grimes } 32758f0484fSRodney W. Grimes 32858f0484fSRodney W. Grimes /* 32958f0484fSRodney W. Grimes - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op 33058f0484fSRodney W. Grimes == static void p_ere_exp(register struct parse *p); 33158f0484fSRodney W. Grimes */ 33258f0484fSRodney W. Grimes static void 33358f0484fSRodney W. Grimes p_ere_exp(p) 33458f0484fSRodney W. Grimes register struct parse *p; 33558f0484fSRodney W. Grimes { 33658f0484fSRodney W. Grimes register char c; 33758f0484fSRodney W. Grimes register sopno pos; 33858f0484fSRodney W. Grimes register int count; 33958f0484fSRodney W. Grimes register int count2; 34058f0484fSRodney W. Grimes register sopno subno; 34158f0484fSRodney W. Grimes int wascaret = 0; 34258f0484fSRodney W. Grimes 34358f0484fSRodney W. Grimes assert(MORE()); /* caller should have ensured this */ 34458f0484fSRodney W. Grimes c = GETNEXT(); 34558f0484fSRodney W. Grimes 34658f0484fSRodney W. Grimes pos = HERE(); 34758f0484fSRodney W. Grimes switch (c) { 34858f0484fSRodney W. Grimes case '(': 34951295a4dSJordan K. Hubbard (void)REQUIRE(MORE(), REG_EPAREN); 35058f0484fSRodney W. Grimes p->g->nsub++; 35158f0484fSRodney W. Grimes subno = p->g->nsub; 35258f0484fSRodney W. Grimes if (subno < NPAREN) 35358f0484fSRodney W. Grimes p->pbegin[subno] = HERE(); 35458f0484fSRodney W. Grimes EMIT(OLPAREN, subno); 35558f0484fSRodney W. Grimes if (!SEE(')')) 35658f0484fSRodney W. Grimes p_ere(p, ')'); 35758f0484fSRodney W. Grimes if (subno < NPAREN) { 35858f0484fSRodney W. Grimes p->pend[subno] = HERE(); 35958f0484fSRodney W. Grimes assert(p->pend[subno] != 0); 36058f0484fSRodney W. Grimes } 36158f0484fSRodney W. Grimes EMIT(ORPAREN, subno); 36251295a4dSJordan K. Hubbard (void)MUSTEAT(')', REG_EPAREN); 36358f0484fSRodney W. Grimes break; 36458f0484fSRodney W. Grimes #ifndef POSIX_MISTAKE 36558f0484fSRodney W. Grimes case ')': /* happens only if no current unmatched ( */ 36658f0484fSRodney W. Grimes /* 36758f0484fSRodney W. Grimes * You may ask, why the ifndef? Because I didn't notice 36858f0484fSRodney W. Grimes * this until slightly too late for 1003.2, and none of the 36958f0484fSRodney W. Grimes * other 1003.2 regular-expression reviewers noticed it at 37058f0484fSRodney W. Grimes * all. So an unmatched ) is legal POSIX, at least until 37158f0484fSRodney W. Grimes * we can get it fixed. 37258f0484fSRodney W. Grimes */ 37358f0484fSRodney W. Grimes SETERROR(REG_EPAREN); 37458f0484fSRodney W. Grimes break; 37558f0484fSRodney W. Grimes #endif 37658f0484fSRodney W. Grimes case '^': 37758f0484fSRodney W. Grimes EMIT(OBOL, 0); 37858f0484fSRodney W. Grimes p->g->iflags |= USEBOL; 37958f0484fSRodney W. Grimes p->g->nbol++; 38058f0484fSRodney W. Grimes wascaret = 1; 38158f0484fSRodney W. Grimes break; 38258f0484fSRodney W. Grimes case '$': 38358f0484fSRodney W. Grimes EMIT(OEOL, 0); 38458f0484fSRodney W. Grimes p->g->iflags |= USEEOL; 38558f0484fSRodney W. Grimes p->g->neol++; 38658f0484fSRodney W. Grimes break; 38758f0484fSRodney W. Grimes case '|': 38858f0484fSRodney W. Grimes SETERROR(REG_EMPTY); 38958f0484fSRodney W. Grimes break; 39058f0484fSRodney W. Grimes case '*': 39158f0484fSRodney W. Grimes case '+': 39258f0484fSRodney W. Grimes case '?': 39358f0484fSRodney W. Grimes SETERROR(REG_BADRPT); 39458f0484fSRodney W. Grimes break; 39558f0484fSRodney W. Grimes case '.': 39658f0484fSRodney W. Grimes if (p->g->cflags®_NEWLINE) 39758f0484fSRodney W. Grimes nonnewline(p); 39858f0484fSRodney W. Grimes else 39958f0484fSRodney W. Grimes EMIT(OANY, 0); 40058f0484fSRodney W. Grimes break; 40158f0484fSRodney W. Grimes case '[': 40258f0484fSRodney W. Grimes p_bracket(p); 40358f0484fSRodney W. Grimes break; 40458f0484fSRodney W. Grimes case '\\': 40551295a4dSJordan K. Hubbard (void)REQUIRE(MORE(), REG_EESCAPE); 40658f0484fSRodney W. Grimes c = GETNEXT(); 40758f0484fSRodney W. Grimes ordinary(p, c); 40858f0484fSRodney W. Grimes break; 40958f0484fSRodney W. Grimes case '{': /* okay as ordinary except if digit follows */ 41051295a4dSJordan K. Hubbard (void)REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT); 41158f0484fSRodney W. Grimes /* FALLTHROUGH */ 41258f0484fSRodney W. Grimes default: 41358f0484fSRodney W. Grimes ordinary(p, c); 41458f0484fSRodney W. Grimes break; 41558f0484fSRodney W. Grimes } 41658f0484fSRodney W. Grimes 41758f0484fSRodney W. Grimes if (!MORE()) 41858f0484fSRodney W. Grimes return; 41958f0484fSRodney W. Grimes c = PEEK(); 42058f0484fSRodney W. Grimes /* we call { a repetition if followed by a digit */ 42158f0484fSRodney W. Grimes if (!( c == '*' || c == '+' || c == '?' || 42258f0484fSRodney W. Grimes (c == '{' && MORE2() && isdigit(PEEK2())) )) 42358f0484fSRodney W. Grimes return; /* no repetition, we're done */ 42458f0484fSRodney W. Grimes NEXT(); 42558f0484fSRodney W. Grimes 42651295a4dSJordan K. Hubbard (void)REQUIRE(!wascaret, REG_BADRPT); 42758f0484fSRodney W. Grimes switch (c) { 42858f0484fSRodney W. Grimes case '*': /* implemented as +? */ 42958f0484fSRodney W. Grimes /* this case does not require the (y|) trick, noKLUDGE */ 43058f0484fSRodney W. Grimes INSERT(OPLUS_, pos); 43158f0484fSRodney W. Grimes ASTERN(O_PLUS, pos); 43258f0484fSRodney W. Grimes INSERT(OQUEST_, pos); 43358f0484fSRodney W. Grimes ASTERN(O_QUEST, pos); 43458f0484fSRodney W. Grimes break; 43558f0484fSRodney W. Grimes case '+': 43658f0484fSRodney W. Grimes INSERT(OPLUS_, pos); 43758f0484fSRodney W. Grimes ASTERN(O_PLUS, pos); 43858f0484fSRodney W. Grimes break; 43958f0484fSRodney W. Grimes case '?': 44058f0484fSRodney W. Grimes /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ 44158f0484fSRodney W. Grimes INSERT(OCH_, pos); /* offset slightly wrong */ 44258f0484fSRodney W. Grimes ASTERN(OOR1, pos); /* this one's right */ 44358f0484fSRodney W. Grimes AHEAD(pos); /* fix the OCH_ */ 44458f0484fSRodney W. Grimes EMIT(OOR2, 0); /* offset very wrong... */ 44558f0484fSRodney W. Grimes AHEAD(THERE()); /* ...so fix it */ 44658f0484fSRodney W. Grimes ASTERN(O_CH, THERETHERE()); 44758f0484fSRodney W. Grimes break; 44858f0484fSRodney W. Grimes case '{': 44958f0484fSRodney W. Grimes count = p_count(p); 45058f0484fSRodney W. Grimes if (EAT(',')) { 45158f0484fSRodney W. Grimes if (isdigit(PEEK())) { 45258f0484fSRodney W. Grimes count2 = p_count(p); 45351295a4dSJordan K. Hubbard (void)REQUIRE(count <= count2, REG_BADBR); 45458f0484fSRodney W. Grimes } else /* single number with comma */ 45558f0484fSRodney W. Grimes count2 = INFINITY; 45658f0484fSRodney W. Grimes } else /* just a single number */ 45758f0484fSRodney W. Grimes count2 = count; 45858f0484fSRodney W. Grimes repeat(p, pos, count, count2); 45958f0484fSRodney W. Grimes if (!EAT('}')) { /* error heuristics */ 46058f0484fSRodney W. Grimes while (MORE() && PEEK() != '}') 46158f0484fSRodney W. Grimes NEXT(); 46251295a4dSJordan K. Hubbard (void)REQUIRE(MORE(), REG_EBRACE); 46358f0484fSRodney W. Grimes SETERROR(REG_BADBR); 46458f0484fSRodney W. Grimes } 46558f0484fSRodney W. Grimes break; 46658f0484fSRodney W. Grimes } 46758f0484fSRodney W. Grimes 46858f0484fSRodney W. Grimes if (!MORE()) 46958f0484fSRodney W. Grimes return; 47058f0484fSRodney W. Grimes c = PEEK(); 47158f0484fSRodney W. Grimes if (!( c == '*' || c == '+' || c == '?' || 47258f0484fSRodney W. Grimes (c == '{' && MORE2() && isdigit(PEEK2())) ) ) 47358f0484fSRodney W. Grimes return; 47458f0484fSRodney W. Grimes SETERROR(REG_BADRPT); 47558f0484fSRodney W. Grimes } 47658f0484fSRodney W. Grimes 47758f0484fSRodney W. Grimes /* 47858f0484fSRodney W. Grimes - p_str - string (no metacharacters) "parser" 47958f0484fSRodney W. Grimes == static void p_str(register struct parse *p); 48058f0484fSRodney W. Grimes */ 48158f0484fSRodney W. Grimes static void 48258f0484fSRodney W. Grimes p_str(p) 48358f0484fSRodney W. Grimes register struct parse *p; 48458f0484fSRodney W. Grimes { 48551295a4dSJordan K. Hubbard (void)REQUIRE(MORE(), REG_EMPTY); 48658f0484fSRodney W. Grimes while (MORE()) 48758f0484fSRodney W. Grimes ordinary(p, GETNEXT()); 48858f0484fSRodney W. Grimes } 48958f0484fSRodney W. Grimes 49058f0484fSRodney W. Grimes /* 49158f0484fSRodney W. Grimes - p_bre - BRE parser top level, anchoring and concatenation 49258f0484fSRodney W. Grimes == static void p_bre(register struct parse *p, register int end1, \ 49358f0484fSRodney W. Grimes == register int end2); 49458f0484fSRodney W. Grimes * Giving end1 as OUT essentially eliminates the end1/end2 check. 49558f0484fSRodney W. Grimes * 49658f0484fSRodney W. Grimes * This implementation is a bit of a kludge, in that a trailing $ is first 49758f0484fSRodney W. Grimes * taken as an ordinary character and then revised to be an anchor. The 49858f0484fSRodney W. Grimes * only undesirable side effect is that '$' gets included as a character 49958f0484fSRodney W. Grimes * category in such cases. This is fairly harmless; not worth fixing. 50058f0484fSRodney W. Grimes * The amount of lookahead needed to avoid this kludge is excessive. 50158f0484fSRodney W. Grimes */ 50258f0484fSRodney W. Grimes static void 50358f0484fSRodney W. Grimes p_bre(p, end1, end2) 50458f0484fSRodney W. Grimes register struct parse *p; 50558f0484fSRodney W. Grimes register int end1; /* first terminating character */ 50658f0484fSRodney W. Grimes register int end2; /* second terminating character */ 50758f0484fSRodney W. Grimes { 50858f0484fSRodney W. Grimes register sopno start = HERE(); 50958f0484fSRodney W. Grimes register int first = 1; /* first subexpression? */ 51058f0484fSRodney W. Grimes register int wasdollar = 0; 51158f0484fSRodney W. Grimes 51258f0484fSRodney W. Grimes if (EAT('^')) { 51358f0484fSRodney W. Grimes EMIT(OBOL, 0); 51458f0484fSRodney W. Grimes p->g->iflags |= USEBOL; 51558f0484fSRodney W. Grimes p->g->nbol++; 51658f0484fSRodney W. Grimes } 51758f0484fSRodney W. Grimes while (MORE() && !SEETWO(end1, end2)) { 51858f0484fSRodney W. Grimes wasdollar = p_simp_re(p, first); 51958f0484fSRodney W. Grimes first = 0; 52058f0484fSRodney W. Grimes } 52158f0484fSRodney W. Grimes if (wasdollar) { /* oops, that was a trailing anchor */ 52258f0484fSRodney W. Grimes DROP(1); 52358f0484fSRodney W. Grimes EMIT(OEOL, 0); 52458f0484fSRodney W. Grimes p->g->iflags |= USEEOL; 52558f0484fSRodney W. Grimes p->g->neol++; 52658f0484fSRodney W. Grimes } 52758f0484fSRodney W. Grimes 52851295a4dSJordan K. Hubbard (void)REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */ 52958f0484fSRodney W. Grimes } 53058f0484fSRodney W. Grimes 53158f0484fSRodney W. Grimes /* 53258f0484fSRodney W. Grimes - p_simp_re - parse a simple RE, an atom possibly followed by a repetition 53358f0484fSRodney W. Grimes == static int p_simp_re(register struct parse *p, int starordinary); 53458f0484fSRodney W. Grimes */ 53558f0484fSRodney W. Grimes static int /* was the simple RE an unbackslashed $? */ 53658f0484fSRodney W. Grimes p_simp_re(p, starordinary) 53758f0484fSRodney W. Grimes register struct parse *p; 53858f0484fSRodney W. Grimes int starordinary; /* is a leading * an ordinary character? */ 53958f0484fSRodney W. Grimes { 54058f0484fSRodney W. Grimes register int c; 54158f0484fSRodney W. Grimes register int count; 54258f0484fSRodney W. Grimes register int count2; 54358f0484fSRodney W. Grimes register sopno pos; 54458f0484fSRodney W. Grimes register int i; 54558f0484fSRodney W. Grimes register sopno subno; 54658f0484fSRodney W. Grimes # define BACKSL (1<<CHAR_BIT) 54758f0484fSRodney W. Grimes 54858f0484fSRodney W. Grimes pos = HERE(); /* repetion op, if any, covers from here */ 54958f0484fSRodney W. Grimes 55058f0484fSRodney W. Grimes assert(MORE()); /* caller should have ensured this */ 55158f0484fSRodney W. Grimes c = GETNEXT(); 55258f0484fSRodney W. Grimes if (c == '\\') { 55351295a4dSJordan K. Hubbard (void)REQUIRE(MORE(), REG_EESCAPE); 55458f0484fSRodney W. Grimes c = BACKSL | (unsigned char)GETNEXT(); 55558f0484fSRodney W. Grimes } 55658f0484fSRodney W. Grimes switch (c) { 55758f0484fSRodney W. Grimes case '.': 55858f0484fSRodney W. Grimes if (p->g->cflags®_NEWLINE) 55958f0484fSRodney W. Grimes nonnewline(p); 56058f0484fSRodney W. Grimes else 56158f0484fSRodney W. Grimes EMIT(OANY, 0); 56258f0484fSRodney W. Grimes break; 56358f0484fSRodney W. Grimes case '[': 56458f0484fSRodney W. Grimes p_bracket(p); 56558f0484fSRodney W. Grimes break; 56658f0484fSRodney W. Grimes case BACKSL|'{': 56758f0484fSRodney W. Grimes SETERROR(REG_BADRPT); 56858f0484fSRodney W. Grimes break; 56958f0484fSRodney W. Grimes case BACKSL|'(': 57058f0484fSRodney W. Grimes p->g->nsub++; 57158f0484fSRodney W. Grimes subno = p->g->nsub; 57258f0484fSRodney W. Grimes if (subno < NPAREN) 57358f0484fSRodney W. Grimes p->pbegin[subno] = HERE(); 57458f0484fSRodney W. Grimes EMIT(OLPAREN, subno); 57558f0484fSRodney W. Grimes /* the MORE here is an error heuristic */ 57658f0484fSRodney W. Grimes if (MORE() && !SEETWO('\\', ')')) 57758f0484fSRodney W. Grimes p_bre(p, '\\', ')'); 57858f0484fSRodney W. Grimes if (subno < NPAREN) { 57958f0484fSRodney W. Grimes p->pend[subno] = HERE(); 58058f0484fSRodney W. Grimes assert(p->pend[subno] != 0); 58158f0484fSRodney W. Grimes } 58258f0484fSRodney W. Grimes EMIT(ORPAREN, subno); 58351295a4dSJordan K. Hubbard (void)REQUIRE(EATTWO('\\', ')'), REG_EPAREN); 58458f0484fSRodney W. Grimes break; 58558f0484fSRodney W. Grimes case BACKSL|')': /* should not get here -- must be user */ 58658f0484fSRodney W. Grimes case BACKSL|'}': 58758f0484fSRodney W. Grimes SETERROR(REG_EPAREN); 58858f0484fSRodney W. Grimes break; 58958f0484fSRodney W. Grimes case BACKSL|'1': 59058f0484fSRodney W. Grimes case BACKSL|'2': 59158f0484fSRodney W. Grimes case BACKSL|'3': 59258f0484fSRodney W. Grimes case BACKSL|'4': 59358f0484fSRodney W. Grimes case BACKSL|'5': 59458f0484fSRodney W. Grimes case BACKSL|'6': 59558f0484fSRodney W. Grimes case BACKSL|'7': 59658f0484fSRodney W. Grimes case BACKSL|'8': 59758f0484fSRodney W. Grimes case BACKSL|'9': 59858f0484fSRodney W. Grimes i = (c&~BACKSL) - '0'; 59958f0484fSRodney W. Grimes assert(i < NPAREN); 60058f0484fSRodney W. Grimes if (p->pend[i] != 0) { 60158f0484fSRodney W. Grimes assert(i <= p->g->nsub); 60258f0484fSRodney W. Grimes EMIT(OBACK_, i); 60358f0484fSRodney W. Grimes assert(p->pbegin[i] != 0); 60458f0484fSRodney W. Grimes assert(OP(p->strip[p->pbegin[i]]) == OLPAREN); 60558f0484fSRodney W. Grimes assert(OP(p->strip[p->pend[i]]) == ORPAREN); 60658f0484fSRodney W. Grimes (void) dupl(p, p->pbegin[i]+1, p->pend[i]); 60758f0484fSRodney W. Grimes EMIT(O_BACK, i); 60858f0484fSRodney W. Grimes } else 60958f0484fSRodney W. Grimes SETERROR(REG_ESUBREG); 61058f0484fSRodney W. Grimes p->g->backrefs = 1; 61158f0484fSRodney W. Grimes break; 61258f0484fSRodney W. Grimes case '*': 61351295a4dSJordan K. Hubbard (void)REQUIRE(starordinary, REG_BADRPT); 61458f0484fSRodney W. Grimes /* FALLTHROUGH */ 61558f0484fSRodney W. Grimes default: 61658f0484fSRodney W. Grimes ordinary(p, c &~ BACKSL); 61758f0484fSRodney W. Grimes break; 61858f0484fSRodney W. Grimes } 61958f0484fSRodney W. Grimes 62058f0484fSRodney W. Grimes if (EAT('*')) { /* implemented as +? */ 62158f0484fSRodney W. Grimes /* this case does not require the (y|) trick, noKLUDGE */ 62258f0484fSRodney W. Grimes INSERT(OPLUS_, pos); 62358f0484fSRodney W. Grimes ASTERN(O_PLUS, pos); 62458f0484fSRodney W. Grimes INSERT(OQUEST_, pos); 62558f0484fSRodney W. Grimes ASTERN(O_QUEST, pos); 62658f0484fSRodney W. Grimes } else if (EATTWO('\\', '{')) { 62758f0484fSRodney W. Grimes count = p_count(p); 62858f0484fSRodney W. Grimes if (EAT(',')) { 62958f0484fSRodney W. Grimes if (MORE() && isdigit(PEEK())) { 63058f0484fSRodney W. Grimes count2 = p_count(p); 63151295a4dSJordan K. Hubbard (void)REQUIRE(count <= count2, REG_BADBR); 63258f0484fSRodney W. Grimes } else /* single number with comma */ 63358f0484fSRodney W. Grimes count2 = INFINITY; 63458f0484fSRodney W. Grimes } else /* just a single number */ 63558f0484fSRodney W. Grimes count2 = count; 63658f0484fSRodney W. Grimes repeat(p, pos, count, count2); 63758f0484fSRodney W. Grimes if (!EATTWO('\\', '}')) { /* error heuristics */ 63858f0484fSRodney W. Grimes while (MORE() && !SEETWO('\\', '}')) 63958f0484fSRodney W. Grimes NEXT(); 64051295a4dSJordan K. Hubbard (void)REQUIRE(MORE(), REG_EBRACE); 64158f0484fSRodney W. Grimes SETERROR(REG_BADBR); 64258f0484fSRodney W. Grimes } 64358f0484fSRodney W. Grimes } else if (c == (unsigned char)'$') /* $ (but not \$) ends it */ 64458f0484fSRodney W. Grimes return(1); 64558f0484fSRodney W. Grimes 64658f0484fSRodney W. Grimes return(0); 64758f0484fSRodney W. Grimes } 64858f0484fSRodney W. Grimes 64958f0484fSRodney W. Grimes /* 65058f0484fSRodney W. Grimes - p_count - parse a repetition count 65158f0484fSRodney W. Grimes == static int p_count(register struct parse *p); 65258f0484fSRodney W. Grimes */ 65358f0484fSRodney W. Grimes static int /* the value */ 65458f0484fSRodney W. Grimes p_count(p) 65558f0484fSRodney W. Grimes register struct parse *p; 65658f0484fSRodney W. Grimes { 65758f0484fSRodney W. Grimes register int count = 0; 65858f0484fSRodney W. Grimes register int ndigits = 0; 65958f0484fSRodney W. Grimes 66058f0484fSRodney W. Grimes while (MORE() && isdigit(PEEK()) && count <= DUPMAX) { 66158f0484fSRodney W. Grimes count = count*10 + (GETNEXT() - '0'); 66258f0484fSRodney W. Grimes ndigits++; 66358f0484fSRodney W. Grimes } 66458f0484fSRodney W. Grimes 66551295a4dSJordan K. Hubbard (void)REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR); 66658f0484fSRodney W. Grimes return(count); 66758f0484fSRodney W. Grimes } 66858f0484fSRodney W. Grimes 66958f0484fSRodney W. Grimes /* 67058f0484fSRodney W. Grimes - p_bracket - parse a bracketed character list 67158f0484fSRodney W. Grimes == static void p_bracket(register struct parse *p); 67258f0484fSRodney W. Grimes * 67358f0484fSRodney W. Grimes * Note a significant property of this code: if the allocset() did SETERROR, 67458f0484fSRodney W. Grimes * no set operations are done. 67558f0484fSRodney W. Grimes */ 67658f0484fSRodney W. Grimes static void 67758f0484fSRodney W. Grimes p_bracket(p) 67858f0484fSRodney W. Grimes register struct parse *p; 67958f0484fSRodney W. Grimes { 68058f0484fSRodney W. Grimes register cset *cs = allocset(p); 68158f0484fSRodney W. Grimes register int invert = 0; 68258f0484fSRodney W. Grimes 68358f0484fSRodney W. Grimes /* Dept of Truly Sickening Special-Case Kludges */ 68458f0484fSRodney W. Grimes if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) { 68558f0484fSRodney W. Grimes EMIT(OBOW, 0); 68658f0484fSRodney W. Grimes NEXTn(6); 68758f0484fSRodney W. Grimes return; 68858f0484fSRodney W. Grimes } 68958f0484fSRodney W. Grimes if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) { 69058f0484fSRodney W. Grimes EMIT(OEOW, 0); 69158f0484fSRodney W. Grimes NEXTn(6); 69258f0484fSRodney W. Grimes return; 69358f0484fSRodney W. Grimes } 69458f0484fSRodney W. Grimes 69558f0484fSRodney W. Grimes if (EAT('^')) 69658f0484fSRodney W. Grimes invert++; /* make note to invert set at end */ 69758f0484fSRodney W. Grimes if (EAT(']')) 69858f0484fSRodney W. Grimes CHadd(cs, ']'); 69958f0484fSRodney W. Grimes else if (EAT('-')) 70058f0484fSRodney W. Grimes CHadd(cs, '-'); 70158f0484fSRodney W. Grimes while (MORE() && PEEK() != ']' && !SEETWO('-', ']')) 70258f0484fSRodney W. Grimes p_b_term(p, cs); 70358f0484fSRodney W. Grimes if (EAT('-')) 70458f0484fSRodney W. Grimes CHadd(cs, '-'); 70551295a4dSJordan K. Hubbard (void)MUSTEAT(']', REG_EBRACK); 70658f0484fSRodney W. Grimes 70758f0484fSRodney W. Grimes if (p->error != 0) /* don't mess things up further */ 70858f0484fSRodney W. Grimes return; 70958f0484fSRodney W. Grimes 71058f0484fSRodney W. Grimes if (p->g->cflags®_ICASE) { 71158f0484fSRodney W. Grimes register int i; 71258f0484fSRodney W. Grimes register int ci; 71358f0484fSRodney W. Grimes 71458f0484fSRodney W. Grimes for (i = p->g->csetsize - 1; i >= 0; i--) 71558f0484fSRodney W. Grimes if (CHIN(cs, i) && isalpha(i)) { 71658f0484fSRodney W. Grimes ci = othercase(i); 71758f0484fSRodney W. Grimes if (ci != i) 71858f0484fSRodney W. Grimes CHadd(cs, ci); 71958f0484fSRodney W. Grimes } 72058f0484fSRodney W. Grimes if (cs->multis != NULL) 72158f0484fSRodney W. Grimes mccase(p, cs); 72258f0484fSRodney W. Grimes } 72358f0484fSRodney W. Grimes if (invert) { 72458f0484fSRodney W. Grimes register int i; 72558f0484fSRodney W. Grimes 72658f0484fSRodney W. Grimes for (i = p->g->csetsize - 1; i >= 0; i--) 72758f0484fSRodney W. Grimes if (CHIN(cs, i)) 72858f0484fSRodney W. Grimes CHsub(cs, i); 72958f0484fSRodney W. Grimes else 73058f0484fSRodney W. Grimes CHadd(cs, i); 73158f0484fSRodney W. Grimes if (p->g->cflags®_NEWLINE) 73258f0484fSRodney W. Grimes CHsub(cs, '\n'); 73358f0484fSRodney W. Grimes if (cs->multis != NULL) 73458f0484fSRodney W. Grimes mcinvert(p, cs); 73558f0484fSRodney W. Grimes } 73658f0484fSRodney W. Grimes 73758f0484fSRodney W. Grimes assert(cs->multis == NULL); /* xxx */ 73858f0484fSRodney W. Grimes 73958f0484fSRodney W. Grimes if (nch(p, cs) == 1) { /* optimize singleton sets */ 74058f0484fSRodney W. Grimes ordinary(p, firstch(p, cs)); 74158f0484fSRodney W. Grimes freeset(p, cs); 74258f0484fSRodney W. Grimes } else 74358f0484fSRodney W. Grimes EMIT(OANYOF, freezeset(p, cs)); 74458f0484fSRodney W. Grimes } 74558f0484fSRodney W. Grimes 74658f0484fSRodney W. Grimes /* 74758f0484fSRodney W. Grimes - p_b_term - parse one term of a bracketed character list 74858f0484fSRodney W. Grimes == static void p_b_term(register struct parse *p, register cset *cs); 74958f0484fSRodney W. Grimes */ 75058f0484fSRodney W. Grimes static void 75158f0484fSRodney W. Grimes p_b_term(p, cs) 75258f0484fSRodney W. Grimes register struct parse *p; 75358f0484fSRodney W. Grimes register cset *cs; 75458f0484fSRodney W. Grimes { 75558f0484fSRodney W. Grimes register char c; 75658f0484fSRodney W. Grimes register char start, finish; 75758f0484fSRodney W. Grimes register int i; 75858f0484fSRodney W. Grimes 75958f0484fSRodney W. Grimes /* classify what we've got */ 76058f0484fSRodney W. Grimes switch ((MORE()) ? PEEK() : '\0') { 76158f0484fSRodney W. Grimes case '[': 76258f0484fSRodney W. Grimes c = (MORE2()) ? PEEK2() : '\0'; 76358f0484fSRodney W. Grimes break; 76458f0484fSRodney W. Grimes case '-': 76558f0484fSRodney W. Grimes SETERROR(REG_ERANGE); 76658f0484fSRodney W. Grimes return; /* NOTE RETURN */ 76758f0484fSRodney W. Grimes break; 76858f0484fSRodney W. Grimes default: 76958f0484fSRodney W. Grimes c = '\0'; 77058f0484fSRodney W. Grimes break; 77158f0484fSRodney W. Grimes } 77258f0484fSRodney W. Grimes 77358f0484fSRodney W. Grimes switch (c) { 77458f0484fSRodney W. Grimes case ':': /* character class */ 77558f0484fSRodney W. Grimes NEXT2(); 77651295a4dSJordan K. Hubbard (void)REQUIRE(MORE(), REG_EBRACK); 77758f0484fSRodney W. Grimes c = PEEK(); 77851295a4dSJordan K. Hubbard (void)REQUIRE(c != '-' && c != ']', REG_ECTYPE); 77958f0484fSRodney W. Grimes p_b_cclass(p, cs); 78051295a4dSJordan K. Hubbard (void)REQUIRE(MORE(), REG_EBRACK); 78151295a4dSJordan K. Hubbard (void)REQUIRE(EATTWO(':', ']'), REG_ECTYPE); 78258f0484fSRodney W. Grimes break; 78358f0484fSRodney W. Grimes case '=': /* equivalence class */ 78458f0484fSRodney W. Grimes NEXT2(); 78551295a4dSJordan K. Hubbard (void)REQUIRE(MORE(), REG_EBRACK); 78658f0484fSRodney W. Grimes c = PEEK(); 78751295a4dSJordan K. Hubbard (void)REQUIRE(c != '-' && c != ']', REG_ECOLLATE); 78858f0484fSRodney W. Grimes p_b_eclass(p, cs); 78951295a4dSJordan K. Hubbard (void)REQUIRE(MORE(), REG_EBRACK); 79051295a4dSJordan K. Hubbard (void)REQUIRE(EATTWO('=', ']'), REG_ECOLLATE); 79158f0484fSRodney W. Grimes break; 79258f0484fSRodney W. Grimes default: /* symbol, ordinary character, or range */ 79358f0484fSRodney W. Grimes /* xxx revision needed for multichar stuff */ 79458f0484fSRodney W. Grimes start = p_b_symbol(p); 79558f0484fSRodney W. Grimes if (SEE('-') && MORE2() && PEEK2() != ']') { 79658f0484fSRodney W. Grimes /* range */ 79758f0484fSRodney W. Grimes NEXT(); 79858f0484fSRodney W. Grimes if (EAT('-')) 79958f0484fSRodney W. Grimes finish = '-'; 80058f0484fSRodney W. Grimes else 80158f0484fSRodney W. Grimes finish = p_b_symbol(p); 80258f0484fSRodney W. Grimes } else 80358f0484fSRodney W. Grimes finish = start; 8045c551438SAndrey A. Chernov if (start == finish) 8055c551438SAndrey A. Chernov CHadd(cs, start); 8065c551438SAndrey A. Chernov else { 807c61cea72SAndrey A. Chernov (void)REQUIRE(__collate_range_cmp(start, finish) <= 0, REG_ERANGE); 8085c551438SAndrey A. Chernov for (i = CHAR_MIN; i <= CHAR_MAX; i++) { 809c61cea72SAndrey A. Chernov if ( __collate_range_cmp(start, i) <= 0 810c61cea72SAndrey A. Chernov && __collate_range_cmp(i, finish) <= 0 8115c551438SAndrey A. Chernov ) 81258f0484fSRodney W. Grimes CHadd(cs, i); 8135c551438SAndrey A. Chernov } 8145c551438SAndrey A. Chernov } 81558f0484fSRodney W. Grimes break; 81658f0484fSRodney W. Grimes } 81758f0484fSRodney W. Grimes } 81858f0484fSRodney W. Grimes 81958f0484fSRodney W. Grimes /* 82058f0484fSRodney W. Grimes - p_b_cclass - parse a character-class name and deal with it 82158f0484fSRodney W. Grimes == static void p_b_cclass(register struct parse *p, register cset *cs); 82258f0484fSRodney W. Grimes */ 82358f0484fSRodney W. Grimes static void 82458f0484fSRodney W. Grimes p_b_cclass(p, cs) 82558f0484fSRodney W. Grimes register struct parse *p; 82658f0484fSRodney W. Grimes register cset *cs; 82758f0484fSRodney W. Grimes { 828b5363c4aSAndrey A. Chernov register int c; 82958f0484fSRodney W. Grimes register char *sp = p->next; 83058f0484fSRodney W. Grimes register struct cclass *cp; 83158f0484fSRodney W. Grimes register size_t len; 83258f0484fSRodney W. Grimes 833b5363c4aSAndrey A. Chernov while (MORE() && isalpha((uch)PEEK())) 83458f0484fSRodney W. Grimes NEXT(); 83558f0484fSRodney W. Grimes len = p->next - sp; 83658f0484fSRodney W. Grimes for (cp = cclasses; cp->name != NULL; cp++) 83758f0484fSRodney W. Grimes if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') 83858f0484fSRodney W. Grimes break; 83958f0484fSRodney W. Grimes if (cp->name == NULL) { 84058f0484fSRodney W. Grimes /* oops, didn't find it */ 84158f0484fSRodney W. Grimes SETERROR(REG_ECTYPE); 84258f0484fSRodney W. Grimes return; 84358f0484fSRodney W. Grimes } 84458f0484fSRodney W. Grimes 845b5363c4aSAndrey A. Chernov switch (cp->fidx) { 846b5363c4aSAndrey A. Chernov case CALNUM: 847b5363c4aSAndrey A. Chernov for (c = CHAR_MIN; c <= CHAR_MAX; c++) 848b5363c4aSAndrey A. Chernov if (isalnum((uch)c)) 84958f0484fSRodney W. Grimes CHadd(cs, c); 850b5363c4aSAndrey A. Chernov break; 851b5363c4aSAndrey A. Chernov case CALPHA: 852b5363c4aSAndrey A. Chernov for (c = CHAR_MIN; c <= CHAR_MAX; c++) 853b5363c4aSAndrey A. Chernov if (isalpha((uch)c)) 854b5363c4aSAndrey A. Chernov CHadd(cs, c); 855b5363c4aSAndrey A. Chernov break; 856b5363c4aSAndrey A. Chernov case CBLANK: 857b5363c4aSAndrey A. Chernov for (c = CHAR_MIN; c <= CHAR_MAX; c++) 858b5363c4aSAndrey A. Chernov if (isblank((uch)c)) 859b5363c4aSAndrey A. Chernov CHadd(cs, c); 860b5363c4aSAndrey A. Chernov break; 861b5363c4aSAndrey A. Chernov case CCNTRL: 862b5363c4aSAndrey A. Chernov for (c = CHAR_MIN; c <= CHAR_MAX; c++) 863b5363c4aSAndrey A. Chernov if (iscntrl((uch)c)) 864b5363c4aSAndrey A. Chernov CHadd(cs, c); 865b5363c4aSAndrey A. Chernov break; 866b5363c4aSAndrey A. Chernov case CDIGIT: 867b5363c4aSAndrey A. Chernov for (c = CHAR_MIN; c <= CHAR_MAX; c++) 868b5363c4aSAndrey A. Chernov if (isdigit((uch)c)) 869b5363c4aSAndrey A. Chernov CHadd(cs, c); 870b5363c4aSAndrey A. Chernov break; 871b5363c4aSAndrey A. Chernov case CGRAPH: 872b5363c4aSAndrey A. Chernov for (c = CHAR_MIN; c <= CHAR_MAX; c++) 873b5363c4aSAndrey A. Chernov if (isgraph((uch)c)) 874b5363c4aSAndrey A. Chernov CHadd(cs, c); 875b5363c4aSAndrey A. Chernov break; 876b5363c4aSAndrey A. Chernov case CLOWER: 877b5363c4aSAndrey A. Chernov for (c = CHAR_MIN; c <= CHAR_MAX; c++) 878b5363c4aSAndrey A. Chernov if (islower((uch)c)) 879b5363c4aSAndrey A. Chernov CHadd(cs, c); 880b5363c4aSAndrey A. Chernov break; 881b5363c4aSAndrey A. Chernov case CPRINT: 882b5363c4aSAndrey A. Chernov for (c = CHAR_MIN; c <= CHAR_MAX; c++) 883b5363c4aSAndrey A. Chernov if (isprint((uch)c)) 884b5363c4aSAndrey A. Chernov CHadd(cs, c); 885b5363c4aSAndrey A. Chernov break; 886b5363c4aSAndrey A. Chernov case CPUNCT: 887b5363c4aSAndrey A. Chernov for (c = CHAR_MIN; c <= CHAR_MAX; c++) 888b5363c4aSAndrey A. Chernov if (ispunct((uch)c)) 889b5363c4aSAndrey A. Chernov CHadd(cs, c); 890b5363c4aSAndrey A. Chernov break; 891b5363c4aSAndrey A. Chernov case CSPACE: 892b5363c4aSAndrey A. Chernov for (c = CHAR_MIN; c <= CHAR_MAX; c++) 893b5363c4aSAndrey A. Chernov if (isspace((uch)c)) 894b5363c4aSAndrey A. Chernov CHadd(cs, c); 895b5363c4aSAndrey A. Chernov break; 896b5363c4aSAndrey A. Chernov case CUPPER: 897b5363c4aSAndrey A. Chernov for (c = CHAR_MIN; c <= CHAR_MAX; c++) 898b5363c4aSAndrey A. Chernov if (isupper((uch)c)) 899b5363c4aSAndrey A. Chernov CHadd(cs, c); 900b5363c4aSAndrey A. Chernov break; 901b5363c4aSAndrey A. Chernov case CXDIGIT: 902b5363c4aSAndrey A. Chernov for (c = CHAR_MIN; c <= CHAR_MAX; c++) 903b5363c4aSAndrey A. Chernov if (isxdigit((uch)c)) 904b5363c4aSAndrey A. Chernov CHadd(cs, c); 905b5363c4aSAndrey A. Chernov break; 906b5363c4aSAndrey A. Chernov } 907b5363c4aSAndrey A. Chernov #if 0 90858f0484fSRodney W. Grimes for (u = cp->multis; *u != '\0'; u += strlen(u) + 1) 90958f0484fSRodney W. Grimes MCadd(p, cs, u); 910b5363c4aSAndrey A. Chernov #endif 91158f0484fSRodney W. Grimes } 91258f0484fSRodney W. Grimes 91358f0484fSRodney W. Grimes /* 91458f0484fSRodney W. Grimes - p_b_eclass - parse an equivalence-class name and deal with it 91558f0484fSRodney W. Grimes == static void p_b_eclass(register struct parse *p, register cset *cs); 91658f0484fSRodney W. Grimes * 91758f0484fSRodney W. Grimes * This implementation is incomplete. xxx 91858f0484fSRodney W. Grimes */ 91958f0484fSRodney W. Grimes static void 92058f0484fSRodney W. Grimes p_b_eclass(p, cs) 92158f0484fSRodney W. Grimes register struct parse *p; 92258f0484fSRodney W. Grimes register cset *cs; 92358f0484fSRodney W. Grimes { 92458f0484fSRodney W. Grimes register char c; 92558f0484fSRodney W. Grimes 92658f0484fSRodney W. Grimes c = p_b_coll_elem(p, '='); 92758f0484fSRodney W. Grimes CHadd(cs, c); 92858f0484fSRodney W. Grimes } 92958f0484fSRodney W. Grimes 93058f0484fSRodney W. Grimes /* 93158f0484fSRodney W. Grimes - p_b_symbol - parse a character or [..]ed multicharacter collating symbol 93258f0484fSRodney W. Grimes == static char p_b_symbol(register struct parse *p); 93358f0484fSRodney W. Grimes */ 93458f0484fSRodney W. Grimes static char /* value of symbol */ 93558f0484fSRodney W. Grimes p_b_symbol(p) 93658f0484fSRodney W. Grimes register struct parse *p; 93758f0484fSRodney W. Grimes { 93858f0484fSRodney W. Grimes register char value; 93958f0484fSRodney W. Grimes 94051295a4dSJordan K. Hubbard (void)REQUIRE(MORE(), REG_EBRACK); 94158f0484fSRodney W. Grimes if (!EATTWO('[', '.')) 94258f0484fSRodney W. Grimes return(GETNEXT()); 94358f0484fSRodney W. Grimes 94458f0484fSRodney W. Grimes /* collating symbol */ 94558f0484fSRodney W. Grimes value = p_b_coll_elem(p, '.'); 94651295a4dSJordan K. Hubbard (void)REQUIRE(EATTWO('.', ']'), REG_ECOLLATE); 94758f0484fSRodney W. Grimes return(value); 94858f0484fSRodney W. Grimes } 94958f0484fSRodney W. Grimes 95058f0484fSRodney W. Grimes /* 95158f0484fSRodney W. Grimes - p_b_coll_elem - parse a collating-element name and look it up 95258f0484fSRodney W. Grimes == static char p_b_coll_elem(register struct parse *p, int endc); 95358f0484fSRodney W. Grimes */ 95458f0484fSRodney W. Grimes static char /* value of collating element */ 95558f0484fSRodney W. Grimes p_b_coll_elem(p, endc) 95658f0484fSRodney W. Grimes register struct parse *p; 95758f0484fSRodney W. Grimes int endc; /* name ended by endc,']' */ 95858f0484fSRodney W. Grimes { 95958f0484fSRodney W. Grimes register char *sp = p->next; 96058f0484fSRodney W. Grimes register struct cname *cp; 96158f0484fSRodney W. Grimes register int len; 96258f0484fSRodney W. Grimes 96358f0484fSRodney W. Grimes while (MORE() && !SEETWO(endc, ']')) 96458f0484fSRodney W. Grimes NEXT(); 96558f0484fSRodney W. Grimes if (!MORE()) { 96658f0484fSRodney W. Grimes SETERROR(REG_EBRACK); 96758f0484fSRodney W. Grimes return(0); 96858f0484fSRodney W. Grimes } 96958f0484fSRodney W. Grimes len = p->next - sp; 97058f0484fSRodney W. Grimes for (cp = cnames; cp->name != NULL; cp++) 97158f0484fSRodney W. Grimes if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') 97258f0484fSRodney W. Grimes return(cp->code); /* known name */ 97358f0484fSRodney W. Grimes if (len == 1) 97458f0484fSRodney W. Grimes return(*sp); /* single character */ 97558f0484fSRodney W. Grimes SETERROR(REG_ECOLLATE); /* neither */ 97658f0484fSRodney W. Grimes return(0); 97758f0484fSRodney W. Grimes } 97858f0484fSRodney W. Grimes 97958f0484fSRodney W. Grimes /* 98058f0484fSRodney W. Grimes - othercase - return the case counterpart of an alphabetic 98158f0484fSRodney W. Grimes == static char othercase(int ch); 98258f0484fSRodney W. Grimes */ 98358f0484fSRodney W. Grimes static char /* if no counterpart, return ch */ 98458f0484fSRodney W. Grimes othercase(ch) 98558f0484fSRodney W. Grimes int ch; 98658f0484fSRodney W. Grimes { 98742ce22e4SAndrey A. Chernov ch = (unsigned char)ch; 98858f0484fSRodney W. Grimes assert(isalpha(ch)); 98958f0484fSRodney W. Grimes if (isupper(ch)) 99058f0484fSRodney W. Grimes return(tolower(ch)); 99158f0484fSRodney W. Grimes else if (islower(ch)) 99258f0484fSRodney W. Grimes return(toupper(ch)); 99358f0484fSRodney W. Grimes else /* peculiar, but could happen */ 99458f0484fSRodney W. Grimes return(ch); 99558f0484fSRodney W. Grimes } 99658f0484fSRodney W. Grimes 99758f0484fSRodney W. Grimes /* 99858f0484fSRodney W. Grimes - bothcases - emit a dualcase version of a two-case character 99958f0484fSRodney W. Grimes == static void bothcases(register struct parse *p, int ch); 100058f0484fSRodney W. Grimes * 100158f0484fSRodney W. Grimes * Boy, is this implementation ever a kludge... 100258f0484fSRodney W. Grimes */ 100358f0484fSRodney W. Grimes static void 100458f0484fSRodney W. Grimes bothcases(p, ch) 100558f0484fSRodney W. Grimes register struct parse *p; 100658f0484fSRodney W. Grimes int ch; 100758f0484fSRodney W. Grimes { 100858f0484fSRodney W. Grimes register char *oldnext = p->next; 100958f0484fSRodney W. Grimes register char *oldend = p->end; 101058f0484fSRodney W. Grimes char bracket[3]; 101158f0484fSRodney W. Grimes 101242ce22e4SAndrey A. Chernov ch = (unsigned char)ch; 101358f0484fSRodney W. Grimes assert(othercase(ch) != ch); /* p_bracket() would recurse */ 101458f0484fSRodney W. Grimes p->next = bracket; 101558f0484fSRodney W. Grimes p->end = bracket+2; 101658f0484fSRodney W. Grimes bracket[0] = ch; 101758f0484fSRodney W. Grimes bracket[1] = ']'; 101858f0484fSRodney W. Grimes bracket[2] = '\0'; 101958f0484fSRodney W. Grimes p_bracket(p); 102058f0484fSRodney W. Grimes assert(p->next == bracket+2); 102158f0484fSRodney W. Grimes p->next = oldnext; 102258f0484fSRodney W. Grimes p->end = oldend; 102358f0484fSRodney W. Grimes } 102458f0484fSRodney W. Grimes 102558f0484fSRodney W. Grimes /* 102658f0484fSRodney W. Grimes - ordinary - emit an ordinary character 102758f0484fSRodney W. Grimes == static void ordinary(register struct parse *p, register int ch); 102858f0484fSRodney W. Grimes */ 102958f0484fSRodney W. Grimes static void 103058f0484fSRodney W. Grimes ordinary(p, ch) 103158f0484fSRodney W. Grimes register struct parse *p; 103258f0484fSRodney W. Grimes register int ch; 103358f0484fSRodney W. Grimes { 103458f0484fSRodney W. Grimes register cat_t *cap = p->g->categories; 103558f0484fSRodney W. Grimes 103642ce22e4SAndrey A. Chernov if ((p->g->cflags®_ICASE) && isalpha((unsigned char)ch) && othercase(ch) != ch) 103758f0484fSRodney W. Grimes bothcases(p, ch); 103858f0484fSRodney W. Grimes else { 103958f0484fSRodney W. Grimes EMIT(OCHAR, (unsigned char)ch); 104058f0484fSRodney W. Grimes if (cap[ch] == 0) 104158f0484fSRodney W. Grimes cap[ch] = p->g->ncategories++; 104258f0484fSRodney W. Grimes } 104358f0484fSRodney W. Grimes } 104458f0484fSRodney W. Grimes 104558f0484fSRodney W. Grimes /* 104658f0484fSRodney W. Grimes - nonnewline - emit REG_NEWLINE version of OANY 104758f0484fSRodney W. Grimes == static void nonnewline(register struct parse *p); 104858f0484fSRodney W. Grimes * 104958f0484fSRodney W. Grimes * Boy, is this implementation ever a kludge... 105058f0484fSRodney W. Grimes */ 105158f0484fSRodney W. Grimes static void 105258f0484fSRodney W. Grimes nonnewline(p) 105358f0484fSRodney W. Grimes register struct parse *p; 105458f0484fSRodney W. Grimes { 105558f0484fSRodney W. Grimes register char *oldnext = p->next; 105658f0484fSRodney W. Grimes register char *oldend = p->end; 105758f0484fSRodney W. Grimes char bracket[4]; 105858f0484fSRodney W. Grimes 105958f0484fSRodney W. Grimes p->next = bracket; 106058f0484fSRodney W. Grimes p->end = bracket+3; 106158f0484fSRodney W. Grimes bracket[0] = '^'; 106258f0484fSRodney W. Grimes bracket[1] = '\n'; 106358f0484fSRodney W. Grimes bracket[2] = ']'; 106458f0484fSRodney W. Grimes bracket[3] = '\0'; 106558f0484fSRodney W. Grimes p_bracket(p); 106658f0484fSRodney W. Grimes assert(p->next == bracket+3); 106758f0484fSRodney W. Grimes p->next = oldnext; 106858f0484fSRodney W. Grimes p->end = oldend; 106958f0484fSRodney W. Grimes } 107058f0484fSRodney W. Grimes 107158f0484fSRodney W. Grimes /* 107258f0484fSRodney W. Grimes - repeat - generate code for a bounded repetition, recursively if needed 107358f0484fSRodney W. Grimes == static void repeat(register struct parse *p, sopno start, int from, int to); 107458f0484fSRodney W. Grimes */ 107558f0484fSRodney W. Grimes static void 107658f0484fSRodney W. Grimes repeat(p, start, from, to) 107758f0484fSRodney W. Grimes register struct parse *p; 107858f0484fSRodney W. Grimes sopno start; /* operand from here to end of strip */ 107958f0484fSRodney W. Grimes int from; /* repeated from this number */ 108058f0484fSRodney W. Grimes int to; /* to this number of times (maybe INFINITY) */ 108158f0484fSRodney W. Grimes { 108258f0484fSRodney W. Grimes register sopno finish = HERE(); 108358f0484fSRodney W. Grimes # define N 2 108458f0484fSRodney W. Grimes # define INF 3 108558f0484fSRodney W. Grimes # define REP(f, t) ((f)*8 + (t)) 108658f0484fSRodney W. Grimes # define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) 108758f0484fSRodney W. Grimes register sopno copy; 108858f0484fSRodney W. Grimes 108958f0484fSRodney W. Grimes if (p->error != 0) /* head off possible runaway recursion */ 109058f0484fSRodney W. Grimes return; 109158f0484fSRodney W. Grimes 109258f0484fSRodney W. Grimes assert(from <= to); 109358f0484fSRodney W. Grimes 109458f0484fSRodney W. Grimes switch (REP(MAP(from), MAP(to))) { 109558f0484fSRodney W. Grimes case REP(0, 0): /* must be user doing this */ 109658f0484fSRodney W. Grimes DROP(finish-start); /* drop the operand */ 109758f0484fSRodney W. Grimes break; 109858f0484fSRodney W. Grimes case REP(0, 1): /* as x{1,1}? */ 109958f0484fSRodney W. Grimes case REP(0, N): /* as x{1,n}? */ 110058f0484fSRodney W. Grimes case REP(0, INF): /* as x{1,}? */ 110158f0484fSRodney W. Grimes /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ 110258f0484fSRodney W. Grimes INSERT(OCH_, start); /* offset is wrong... */ 110358f0484fSRodney W. Grimes repeat(p, start+1, 1, to); 110458f0484fSRodney W. Grimes ASTERN(OOR1, start); 110558f0484fSRodney W. Grimes AHEAD(start); /* ... fix it */ 110658f0484fSRodney W. Grimes EMIT(OOR2, 0); 110758f0484fSRodney W. Grimes AHEAD(THERE()); 110858f0484fSRodney W. Grimes ASTERN(O_CH, THERETHERE()); 110958f0484fSRodney W. Grimes break; 111058f0484fSRodney W. Grimes case REP(1, 1): /* trivial case */ 111158f0484fSRodney W. Grimes /* done */ 111258f0484fSRodney W. Grimes break; 111358f0484fSRodney W. Grimes case REP(1, N): /* as x?x{1,n-1} */ 111458f0484fSRodney W. Grimes /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ 111558f0484fSRodney W. Grimes INSERT(OCH_, start); 111658f0484fSRodney W. Grimes ASTERN(OOR1, start); 111758f0484fSRodney W. Grimes AHEAD(start); 111858f0484fSRodney W. Grimes EMIT(OOR2, 0); /* offset very wrong... */ 111958f0484fSRodney W. Grimes AHEAD(THERE()); /* ...so fix it */ 112058f0484fSRodney W. Grimes ASTERN(O_CH, THERETHERE()); 112158f0484fSRodney W. Grimes copy = dupl(p, start+1, finish+1); 112258f0484fSRodney W. Grimes assert(copy == finish+4); 112358f0484fSRodney W. Grimes repeat(p, copy, 1, to-1); 112458f0484fSRodney W. Grimes break; 112558f0484fSRodney W. Grimes case REP(1, INF): /* as x+ */ 112658f0484fSRodney W. Grimes INSERT(OPLUS_, start); 112758f0484fSRodney W. Grimes ASTERN(O_PLUS, start); 112858f0484fSRodney W. Grimes break; 112958f0484fSRodney W. Grimes case REP(N, N): /* as xx{m-1,n-1} */ 113058f0484fSRodney W. Grimes copy = dupl(p, start, finish); 113158f0484fSRodney W. Grimes repeat(p, copy, from-1, to-1); 113258f0484fSRodney W. Grimes break; 113358f0484fSRodney W. Grimes case REP(N, INF): /* as xx{n-1,INF} */ 113458f0484fSRodney W. Grimes copy = dupl(p, start, finish); 113558f0484fSRodney W. Grimes repeat(p, copy, from-1, to); 113658f0484fSRodney W. Grimes break; 113758f0484fSRodney W. Grimes default: /* "can't happen" */ 113858f0484fSRodney W. Grimes SETERROR(REG_ASSERT); /* just in case */ 113958f0484fSRodney W. Grimes break; 114058f0484fSRodney W. Grimes } 114158f0484fSRodney W. Grimes } 114258f0484fSRodney W. Grimes 114358f0484fSRodney W. Grimes /* 114458f0484fSRodney W. Grimes - seterr - set an error condition 114558f0484fSRodney W. Grimes == static int seterr(register struct parse *p, int e); 114658f0484fSRodney W. Grimes */ 114758f0484fSRodney W. Grimes static int /* useless but makes type checking happy */ 114858f0484fSRodney W. Grimes seterr(p, e) 114958f0484fSRodney W. Grimes register struct parse *p; 115058f0484fSRodney W. Grimes int e; 115158f0484fSRodney W. Grimes { 115258f0484fSRodney W. Grimes if (p->error == 0) /* keep earliest error condition */ 115358f0484fSRodney W. Grimes p->error = e; 115458f0484fSRodney W. Grimes p->next = nuls; /* try to bring things to a halt */ 115558f0484fSRodney W. Grimes p->end = nuls; 115658f0484fSRodney W. Grimes return(0); /* make the return value well-defined */ 115758f0484fSRodney W. Grimes } 115858f0484fSRodney W. Grimes 115958f0484fSRodney W. Grimes /* 116058f0484fSRodney W. Grimes - allocset - allocate a set of characters for [] 116158f0484fSRodney W. Grimes == static cset *allocset(register struct parse *p); 116258f0484fSRodney W. Grimes */ 116358f0484fSRodney W. Grimes static cset * 116458f0484fSRodney W. Grimes allocset(p) 116558f0484fSRodney W. Grimes register struct parse *p; 116658f0484fSRodney W. Grimes { 116758f0484fSRodney W. Grimes register int no = p->g->ncsets++; 116858f0484fSRodney W. Grimes register size_t nc; 116958f0484fSRodney W. Grimes register size_t nbytes; 117058f0484fSRodney W. Grimes register cset *cs; 117158f0484fSRodney W. Grimes register size_t css = (size_t)p->g->csetsize; 117258f0484fSRodney W. Grimes register int i; 117358f0484fSRodney W. Grimes 117458f0484fSRodney W. Grimes if (no >= p->ncsalloc) { /* need another column of space */ 117558f0484fSRodney W. Grimes p->ncsalloc += CHAR_BIT; 117658f0484fSRodney W. Grimes nc = p->ncsalloc; 117758f0484fSRodney W. Grimes assert(nc % CHAR_BIT == 0); 117858f0484fSRodney W. Grimes nbytes = nc / CHAR_BIT * css; 117958f0484fSRodney W. Grimes if (p->g->sets == NULL) 118058f0484fSRodney W. Grimes p->g->sets = (cset *)malloc(nc * sizeof(cset)); 118158f0484fSRodney W. Grimes else 118258f0484fSRodney W. Grimes p->g->sets = (cset *)realloc((char *)p->g->sets, 118358f0484fSRodney W. Grimes nc * sizeof(cset)); 118458f0484fSRodney W. Grimes if (p->g->setbits == NULL) 118558f0484fSRodney W. Grimes p->g->setbits = (uch *)malloc(nbytes); 118658f0484fSRodney W. Grimes else { 118758f0484fSRodney W. Grimes p->g->setbits = (uch *)realloc((char *)p->g->setbits, 118858f0484fSRodney W. Grimes nbytes); 118958f0484fSRodney W. Grimes /* xxx this isn't right if setbits is now NULL */ 119058f0484fSRodney W. Grimes for (i = 0; i < no; i++) 119158f0484fSRodney W. Grimes p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT); 119258f0484fSRodney W. Grimes } 119358f0484fSRodney W. Grimes if (p->g->sets != NULL && p->g->setbits != NULL) 119458f0484fSRodney W. Grimes (void) memset((char *)p->g->setbits + (nbytes - css), 119558f0484fSRodney W. Grimes 0, css); 119658f0484fSRodney W. Grimes else { 119758f0484fSRodney W. Grimes no = 0; 119858f0484fSRodney W. Grimes SETERROR(REG_ESPACE); 119958f0484fSRodney W. Grimes /* caller's responsibility not to do set ops */ 120058f0484fSRodney W. Grimes } 120158f0484fSRodney W. Grimes } 120258f0484fSRodney W. Grimes 120358f0484fSRodney W. Grimes assert(p->g->sets != NULL); /* xxx */ 120458f0484fSRodney W. Grimes cs = &p->g->sets[no]; 120558f0484fSRodney W. Grimes cs->ptr = p->g->setbits + css*((no)/CHAR_BIT); 120658f0484fSRodney W. Grimes cs->mask = 1 << ((no) % CHAR_BIT); 120758f0484fSRodney W. Grimes cs->hash = 0; 120858f0484fSRodney W. Grimes cs->smultis = 0; 120958f0484fSRodney W. Grimes cs->multis = NULL; 121058f0484fSRodney W. Grimes 121158f0484fSRodney W. Grimes return(cs); 121258f0484fSRodney W. Grimes } 121358f0484fSRodney W. Grimes 121458f0484fSRodney W. Grimes /* 121558f0484fSRodney W. Grimes - freeset - free a now-unused set 121658f0484fSRodney W. Grimes == static void freeset(register struct parse *p, register cset *cs); 121758f0484fSRodney W. Grimes */ 121858f0484fSRodney W. Grimes static void 121958f0484fSRodney W. Grimes freeset(p, cs) 122058f0484fSRodney W. Grimes register struct parse *p; 122158f0484fSRodney W. Grimes register cset *cs; 122258f0484fSRodney W. Grimes { 122358f0484fSRodney W. Grimes register int i; 122458f0484fSRodney W. Grimes register cset *top = &p->g->sets[p->g->ncsets]; 122558f0484fSRodney W. Grimes register size_t css = (size_t)p->g->csetsize; 122658f0484fSRodney W. Grimes 122758f0484fSRodney W. Grimes for (i = 0; i < css; i++) 122858f0484fSRodney W. Grimes CHsub(cs, i); 122958f0484fSRodney W. Grimes if (cs == top-1) /* recover only the easy case */ 123058f0484fSRodney W. Grimes p->g->ncsets--; 123158f0484fSRodney W. Grimes } 123258f0484fSRodney W. Grimes 123358f0484fSRodney W. Grimes /* 123458f0484fSRodney W. Grimes - freezeset - final processing on a set of characters 123558f0484fSRodney W. Grimes == static int freezeset(register struct parse *p, register cset *cs); 123658f0484fSRodney W. Grimes * 123758f0484fSRodney W. Grimes * The main task here is merging identical sets. This is usually a waste 123858f0484fSRodney W. Grimes * of time (although the hash code minimizes the overhead), but can win 123958f0484fSRodney W. Grimes * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash 124058f0484fSRodney W. Grimes * is done using addition rather than xor -- all ASCII [aA] sets xor to 124158f0484fSRodney W. Grimes * the same value! 124258f0484fSRodney W. Grimes */ 124358f0484fSRodney W. Grimes static int /* set number */ 124458f0484fSRodney W. Grimes freezeset(p, cs) 124558f0484fSRodney W. Grimes register struct parse *p; 124658f0484fSRodney W. Grimes register cset *cs; 124758f0484fSRodney W. Grimes { 124830735075SAndrey A. Chernov register short h = cs->hash; 124958f0484fSRodney W. Grimes register int i; 125058f0484fSRodney W. Grimes register cset *top = &p->g->sets[p->g->ncsets]; 125158f0484fSRodney W. Grimes register cset *cs2; 125258f0484fSRodney W. Grimes register size_t css = (size_t)p->g->csetsize; 125358f0484fSRodney W. Grimes 125458f0484fSRodney W. Grimes /* look for an earlier one which is the same */ 125558f0484fSRodney W. Grimes for (cs2 = &p->g->sets[0]; cs2 < top; cs2++) 125658f0484fSRodney W. Grimes if (cs2->hash == h && cs2 != cs) { 125758f0484fSRodney W. Grimes /* maybe */ 125858f0484fSRodney W. Grimes for (i = 0; i < css; i++) 125958f0484fSRodney W. Grimes if (!!CHIN(cs2, i) != !!CHIN(cs, i)) 126058f0484fSRodney W. Grimes break; /* no */ 126158f0484fSRodney W. Grimes if (i == css) 126258f0484fSRodney W. Grimes break; /* yes */ 126358f0484fSRodney W. Grimes } 126458f0484fSRodney W. Grimes 126558f0484fSRodney W. Grimes if (cs2 < top) { /* found one */ 126658f0484fSRodney W. Grimes freeset(p, cs); 126758f0484fSRodney W. Grimes cs = cs2; 126858f0484fSRodney W. Grimes } 126958f0484fSRodney W. Grimes 127058f0484fSRodney W. Grimes return((int)(cs - p->g->sets)); 127158f0484fSRodney W. Grimes } 127258f0484fSRodney W. Grimes 127358f0484fSRodney W. Grimes /* 127458f0484fSRodney W. Grimes - firstch - return first character in a set (which must have at least one) 127558f0484fSRodney W. Grimes == static int firstch(register struct parse *p, register cset *cs); 127658f0484fSRodney W. Grimes */ 127758f0484fSRodney W. Grimes static int /* character; there is no "none" value */ 127858f0484fSRodney W. Grimes firstch(p, cs) 127958f0484fSRodney W. Grimes register struct parse *p; 128058f0484fSRodney W. Grimes register cset *cs; 128158f0484fSRodney W. Grimes { 128258f0484fSRodney W. Grimes register int i; 128358f0484fSRodney W. Grimes register size_t css = (size_t)p->g->csetsize; 128458f0484fSRodney W. Grimes 128558f0484fSRodney W. Grimes for (i = 0; i < css; i++) 128658f0484fSRodney W. Grimes if (CHIN(cs, i)) 128742ce22e4SAndrey A. Chernov return((unsigned char)i); 128858f0484fSRodney W. Grimes assert(never); 128958f0484fSRodney W. Grimes return(0); /* arbitrary */ 129058f0484fSRodney W. Grimes } 129158f0484fSRodney W. Grimes 129258f0484fSRodney W. Grimes /* 129358f0484fSRodney W. Grimes - nch - number of characters in a set 129458f0484fSRodney W. Grimes == static int nch(register struct parse *p, register cset *cs); 129558f0484fSRodney W. Grimes */ 129658f0484fSRodney W. Grimes static int 129758f0484fSRodney W. Grimes nch(p, cs) 129858f0484fSRodney W. Grimes register struct parse *p; 129958f0484fSRodney W. Grimes register cset *cs; 130058f0484fSRodney W. Grimes { 130158f0484fSRodney W. Grimes register int i; 130258f0484fSRodney W. Grimes register size_t css = (size_t)p->g->csetsize; 130358f0484fSRodney W. Grimes register int n = 0; 130458f0484fSRodney W. Grimes 130558f0484fSRodney W. Grimes for (i = 0; i < css; i++) 130658f0484fSRodney W. Grimes if (CHIN(cs, i)) 130758f0484fSRodney W. Grimes n++; 130858f0484fSRodney W. Grimes return(n); 130958f0484fSRodney W. Grimes } 131058f0484fSRodney W. Grimes 131158f0484fSRodney W. Grimes /* 131258f0484fSRodney W. Grimes - mcadd - add a collating element to a cset 131358f0484fSRodney W. Grimes == static void mcadd(register struct parse *p, register cset *cs, \ 131458f0484fSRodney W. Grimes == register char *cp); 131558f0484fSRodney W. Grimes */ 131658f0484fSRodney W. Grimes static void 131758f0484fSRodney W. Grimes mcadd(p, cs, cp) 131858f0484fSRodney W. Grimes register struct parse *p; 131958f0484fSRodney W. Grimes register cset *cs; 132058f0484fSRodney W. Grimes register char *cp; 132158f0484fSRodney W. Grimes { 132258f0484fSRodney W. Grimes register size_t oldend = cs->smultis; 132358f0484fSRodney W. Grimes 132458f0484fSRodney W. Grimes cs->smultis += strlen(cp) + 1; 132558f0484fSRodney W. Grimes if (cs->multis == NULL) 132658f0484fSRodney W. Grimes cs->multis = malloc(cs->smultis); 132758f0484fSRodney W. Grimes else 132858f0484fSRodney W. Grimes cs->multis = realloc(cs->multis, cs->smultis); 132958f0484fSRodney W. Grimes if (cs->multis == NULL) { 133058f0484fSRodney W. Grimes SETERROR(REG_ESPACE); 133158f0484fSRodney W. Grimes return; 133258f0484fSRodney W. Grimes } 133358f0484fSRodney W. Grimes 133458f0484fSRodney W. Grimes (void) strcpy(cs->multis + oldend - 1, cp); 133558f0484fSRodney W. Grimes cs->multis[cs->smultis - 1] = '\0'; 133658f0484fSRodney W. Grimes } 133758f0484fSRodney W. Grimes 133851295a4dSJordan K. Hubbard #if used 133958f0484fSRodney W. Grimes /* 134058f0484fSRodney W. Grimes - mcsub - subtract a collating element from a cset 134158f0484fSRodney W. Grimes == static void mcsub(register cset *cs, register char *cp); 134258f0484fSRodney W. Grimes */ 134358f0484fSRodney W. Grimes static void 134458f0484fSRodney W. Grimes mcsub(cs, cp) 134558f0484fSRodney W. Grimes register cset *cs; 134658f0484fSRodney W. Grimes register char *cp; 134758f0484fSRodney W. Grimes { 134858f0484fSRodney W. Grimes register char *fp = mcfind(cs, cp); 134958f0484fSRodney W. Grimes register size_t len = strlen(fp); 135058f0484fSRodney W. Grimes 135158f0484fSRodney W. Grimes assert(fp != NULL); 135258f0484fSRodney W. Grimes (void) memmove(fp, fp + len + 1, 135358f0484fSRodney W. Grimes cs->smultis - (fp + len + 1 - cs->multis)); 135458f0484fSRodney W. Grimes cs->smultis -= len; 135558f0484fSRodney W. Grimes 135658f0484fSRodney W. Grimes if (cs->smultis == 0) { 135758f0484fSRodney W. Grimes free(cs->multis); 135858f0484fSRodney W. Grimes cs->multis = NULL; 135958f0484fSRodney W. Grimes return; 136058f0484fSRodney W. Grimes } 136158f0484fSRodney W. Grimes 136258f0484fSRodney W. Grimes cs->multis = realloc(cs->multis, cs->smultis); 136358f0484fSRodney W. Grimes assert(cs->multis != NULL); 136458f0484fSRodney W. Grimes } 136558f0484fSRodney W. Grimes 136658f0484fSRodney W. Grimes /* 136758f0484fSRodney W. Grimes - mcin - is a collating element in a cset? 136858f0484fSRodney W. Grimes == static int mcin(register cset *cs, register char *cp); 136958f0484fSRodney W. Grimes */ 137058f0484fSRodney W. Grimes static int 137158f0484fSRodney W. Grimes mcin(cs, cp) 137258f0484fSRodney W. Grimes register cset *cs; 137358f0484fSRodney W. Grimes register char *cp; 137458f0484fSRodney W. Grimes { 137558f0484fSRodney W. Grimes return(mcfind(cs, cp) != NULL); 137658f0484fSRodney W. Grimes } 137758f0484fSRodney W. Grimes 137858f0484fSRodney W. Grimes /* 137958f0484fSRodney W. Grimes - mcfind - find a collating element in a cset 138058f0484fSRodney W. Grimes == static char *mcfind(register cset *cs, register char *cp); 138158f0484fSRodney W. Grimes */ 138258f0484fSRodney W. Grimes static char * 138358f0484fSRodney W. Grimes mcfind(cs, cp) 138458f0484fSRodney W. Grimes register cset *cs; 138558f0484fSRodney W. Grimes register char *cp; 138658f0484fSRodney W. Grimes { 138758f0484fSRodney W. Grimes register char *p; 138858f0484fSRodney W. Grimes 138958f0484fSRodney W. Grimes if (cs->multis == NULL) 139058f0484fSRodney W. Grimes return(NULL); 139158f0484fSRodney W. Grimes for (p = cs->multis; *p != '\0'; p += strlen(p) + 1) 139258f0484fSRodney W. Grimes if (strcmp(cp, p) == 0) 139358f0484fSRodney W. Grimes return(p); 139458f0484fSRodney W. Grimes return(NULL); 139558f0484fSRodney W. Grimes } 139651295a4dSJordan K. Hubbard #endif 139758f0484fSRodney W. Grimes 139858f0484fSRodney W. Grimes /* 139958f0484fSRodney W. Grimes - mcinvert - invert the list of collating elements in a cset 140058f0484fSRodney W. Grimes == static void mcinvert(register struct parse *p, register cset *cs); 140158f0484fSRodney W. Grimes * 140258f0484fSRodney W. Grimes * This would have to know the set of possibilities. Implementation 140358f0484fSRodney W. Grimes * is deferred. 140458f0484fSRodney W. Grimes */ 140558f0484fSRodney W. Grimes static void 140658f0484fSRodney W. Grimes mcinvert(p, cs) 140758f0484fSRodney W. Grimes register struct parse *p; 140858f0484fSRodney W. Grimes register cset *cs; 140958f0484fSRodney W. Grimes { 141058f0484fSRodney W. Grimes assert(cs->multis == NULL); /* xxx */ 141158f0484fSRodney W. Grimes } 141258f0484fSRodney W. Grimes 141358f0484fSRodney W. Grimes /* 141458f0484fSRodney W. Grimes - mccase - add case counterparts of the list of collating elements in a cset 141558f0484fSRodney W. Grimes == static void mccase(register struct parse *p, register cset *cs); 141658f0484fSRodney W. Grimes * 141758f0484fSRodney W. Grimes * This would have to know the set of possibilities. Implementation 141858f0484fSRodney W. Grimes * is deferred. 141958f0484fSRodney W. Grimes */ 142058f0484fSRodney W. Grimes static void 142158f0484fSRodney W. Grimes mccase(p, cs) 142258f0484fSRodney W. Grimes register struct parse *p; 142358f0484fSRodney W. Grimes register cset *cs; 142458f0484fSRodney W. Grimes { 142558f0484fSRodney W. Grimes assert(cs->multis == NULL); /* xxx */ 142658f0484fSRodney W. Grimes } 142758f0484fSRodney W. Grimes 142858f0484fSRodney W. Grimes /* 142958f0484fSRodney W. Grimes - isinsets - is this character in any sets? 143058f0484fSRodney W. Grimes == static int isinsets(register struct re_guts *g, int c); 143158f0484fSRodney W. Grimes */ 143258f0484fSRodney W. Grimes static int /* predicate */ 143358f0484fSRodney W. Grimes isinsets(g, c) 143458f0484fSRodney W. Grimes register struct re_guts *g; 143558f0484fSRodney W. Grimes int c; 143658f0484fSRodney W. Grimes { 143758f0484fSRodney W. Grimes register uch *col; 143858f0484fSRodney W. Grimes register int i; 143958f0484fSRodney W. Grimes register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; 144058f0484fSRodney W. Grimes register unsigned uc = (unsigned char)c; 144158f0484fSRodney W. Grimes 144258f0484fSRodney W. Grimes for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) 144358f0484fSRodney W. Grimes if (col[uc] != 0) 144458f0484fSRodney W. Grimes return(1); 144558f0484fSRodney W. Grimes return(0); 144658f0484fSRodney W. Grimes } 144758f0484fSRodney W. Grimes 144858f0484fSRodney W. Grimes /* 144958f0484fSRodney W. Grimes - samesets - are these two characters in exactly the same sets? 145058f0484fSRodney W. Grimes == static int samesets(register struct re_guts *g, int c1, int c2); 145158f0484fSRodney W. Grimes */ 145258f0484fSRodney W. Grimes static int /* predicate */ 145358f0484fSRodney W. Grimes samesets(g, c1, c2) 145458f0484fSRodney W. Grimes register struct re_guts *g; 145558f0484fSRodney W. Grimes int c1; 145658f0484fSRodney W. Grimes int c2; 145758f0484fSRodney W. Grimes { 145858f0484fSRodney W. Grimes register uch *col; 145958f0484fSRodney W. Grimes register int i; 146058f0484fSRodney W. Grimes register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; 146158f0484fSRodney W. Grimes register unsigned uc1 = (unsigned char)c1; 146258f0484fSRodney W. Grimes register unsigned uc2 = (unsigned char)c2; 146358f0484fSRodney W. Grimes 146458f0484fSRodney W. Grimes for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) 146558f0484fSRodney W. Grimes if (col[uc1] != col[uc2]) 146658f0484fSRodney W. Grimes return(0); 146758f0484fSRodney W. Grimes return(1); 146858f0484fSRodney W. Grimes } 146958f0484fSRodney W. Grimes 147058f0484fSRodney W. Grimes /* 147158f0484fSRodney W. Grimes - categorize - sort out character categories 147258f0484fSRodney W. Grimes == static void categorize(struct parse *p, register struct re_guts *g); 147358f0484fSRodney W. Grimes */ 147458f0484fSRodney W. Grimes static void 147558f0484fSRodney W. Grimes categorize(p, g) 147658f0484fSRodney W. Grimes struct parse *p; 147758f0484fSRodney W. Grimes register struct re_guts *g; 147858f0484fSRodney W. Grimes { 147958f0484fSRodney W. Grimes register cat_t *cats = g->categories; 148058f0484fSRodney W. Grimes register int c; 148158f0484fSRodney W. Grimes register int c2; 148258f0484fSRodney W. Grimes register cat_t cat; 148358f0484fSRodney W. Grimes 148458f0484fSRodney W. Grimes /* avoid making error situations worse */ 148558f0484fSRodney W. Grimes if (p->error != 0) 148658f0484fSRodney W. Grimes return; 148758f0484fSRodney W. Grimes 148858f0484fSRodney W. Grimes for (c = CHAR_MIN; c <= CHAR_MAX; c++) 148958f0484fSRodney W. Grimes if (cats[c] == 0 && isinsets(g, c)) { 149058f0484fSRodney W. Grimes cat = g->ncategories++; 149158f0484fSRodney W. Grimes cats[c] = cat; 149258f0484fSRodney W. Grimes for (c2 = c+1; c2 <= CHAR_MAX; c2++) 149358f0484fSRodney W. Grimes if (cats[c2] == 0 && samesets(g, c, c2)) 149458f0484fSRodney W. Grimes cats[c2] = cat; 149558f0484fSRodney W. Grimes } 149658f0484fSRodney W. Grimes } 149758f0484fSRodney W. Grimes 149858f0484fSRodney W. Grimes /* 149958f0484fSRodney W. Grimes - dupl - emit a duplicate of a bunch of sops 150058f0484fSRodney W. Grimes == static sopno dupl(register struct parse *p, sopno start, sopno finish); 150158f0484fSRodney W. Grimes */ 150258f0484fSRodney W. Grimes static sopno /* start of duplicate */ 150358f0484fSRodney W. Grimes dupl(p, start, finish) 150458f0484fSRodney W. Grimes register struct parse *p; 150558f0484fSRodney W. Grimes sopno start; /* from here */ 150658f0484fSRodney W. Grimes sopno finish; /* to this less one */ 150758f0484fSRodney W. Grimes { 150858f0484fSRodney W. Grimes register sopno ret = HERE(); 150958f0484fSRodney W. Grimes register sopno len = finish - start; 151058f0484fSRodney W. Grimes 151158f0484fSRodney W. Grimes assert(finish >= start); 151258f0484fSRodney W. Grimes if (len == 0) 151358f0484fSRodney W. Grimes return(ret); 151458f0484fSRodney W. Grimes enlarge(p, p->ssize + len); /* this many unexpected additions */ 151558f0484fSRodney W. Grimes assert(p->ssize >= p->slen + len); 151658f0484fSRodney W. Grimes (void) memcpy((char *)(p->strip + p->slen), 151758f0484fSRodney W. Grimes (char *)(p->strip + start), (size_t)len*sizeof(sop)); 151858f0484fSRodney W. Grimes p->slen += len; 151958f0484fSRodney W. Grimes return(ret); 152058f0484fSRodney W. Grimes } 152158f0484fSRodney W. Grimes 152258f0484fSRodney W. Grimes /* 152358f0484fSRodney W. Grimes - doemit - emit a strip operator 152458f0484fSRodney W. Grimes == static void doemit(register struct parse *p, sop op, size_t opnd); 152558f0484fSRodney W. Grimes * 152658f0484fSRodney W. Grimes * It might seem better to implement this as a macro with a function as 152758f0484fSRodney W. Grimes * hard-case backup, but it's just too big and messy unless there are 152858f0484fSRodney W. Grimes * some changes to the data structures. Maybe later. 152958f0484fSRodney W. Grimes */ 153058f0484fSRodney W. Grimes static void 153158f0484fSRodney W. Grimes doemit(p, op, opnd) 153258f0484fSRodney W. Grimes register struct parse *p; 153358f0484fSRodney W. Grimes sop op; 153458f0484fSRodney W. Grimes size_t opnd; 153558f0484fSRodney W. Grimes { 153658f0484fSRodney W. Grimes /* avoid making error situations worse */ 153758f0484fSRodney W. Grimes if (p->error != 0) 153858f0484fSRodney W. Grimes return; 153958f0484fSRodney W. Grimes 154058f0484fSRodney W. Grimes /* deal with oversize operands ("can't happen", more or less) */ 154158f0484fSRodney W. Grimes assert(opnd < 1<<OPSHIFT); 154258f0484fSRodney W. Grimes 154358f0484fSRodney W. Grimes /* deal with undersized strip */ 154458f0484fSRodney W. Grimes if (p->slen >= p->ssize) 154558f0484fSRodney W. Grimes enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */ 154658f0484fSRodney W. Grimes assert(p->slen < p->ssize); 154758f0484fSRodney W. Grimes 154858f0484fSRodney W. Grimes /* finally, it's all reduced to the easy case */ 154958f0484fSRodney W. Grimes p->strip[p->slen++] = SOP(op, opnd); 155058f0484fSRodney W. Grimes } 155158f0484fSRodney W. Grimes 155258f0484fSRodney W. Grimes /* 155358f0484fSRodney W. Grimes - doinsert - insert a sop into the strip 155458f0484fSRodney W. Grimes == static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos); 155558f0484fSRodney W. Grimes */ 155658f0484fSRodney W. Grimes static void 155758f0484fSRodney W. Grimes doinsert(p, op, opnd, pos) 155858f0484fSRodney W. Grimes register struct parse *p; 155958f0484fSRodney W. Grimes sop op; 156058f0484fSRodney W. Grimes size_t opnd; 156158f0484fSRodney W. Grimes sopno pos; 156258f0484fSRodney W. Grimes { 156358f0484fSRodney W. Grimes register sopno sn; 156458f0484fSRodney W. Grimes register sop s; 156558f0484fSRodney W. Grimes register int i; 156658f0484fSRodney W. Grimes 156758f0484fSRodney W. Grimes /* avoid making error situations worse */ 156858f0484fSRodney W. Grimes if (p->error != 0) 156958f0484fSRodney W. Grimes return; 157058f0484fSRodney W. Grimes 157158f0484fSRodney W. Grimes sn = HERE(); 157258f0484fSRodney W. Grimes EMIT(op, opnd); /* do checks, ensure space */ 157358f0484fSRodney W. Grimes assert(HERE() == sn+1); 157458f0484fSRodney W. Grimes s = p->strip[sn]; 157558f0484fSRodney W. Grimes 157658f0484fSRodney W. Grimes /* adjust paren pointers */ 157758f0484fSRodney W. Grimes assert(pos > 0); 157858f0484fSRodney W. Grimes for (i = 1; i < NPAREN; i++) { 157958f0484fSRodney W. Grimes if (p->pbegin[i] >= pos) { 158058f0484fSRodney W. Grimes p->pbegin[i]++; 158158f0484fSRodney W. Grimes } 158258f0484fSRodney W. Grimes if (p->pend[i] >= pos) { 158358f0484fSRodney W. Grimes p->pend[i]++; 158458f0484fSRodney W. Grimes } 158558f0484fSRodney W. Grimes } 158658f0484fSRodney W. Grimes 158758f0484fSRodney W. Grimes memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos], 158858f0484fSRodney W. Grimes (HERE()-pos-1)*sizeof(sop)); 158958f0484fSRodney W. Grimes p->strip[pos] = s; 159058f0484fSRodney W. Grimes } 159158f0484fSRodney W. Grimes 159258f0484fSRodney W. Grimes /* 159358f0484fSRodney W. Grimes - dofwd - complete a forward reference 159458f0484fSRodney W. Grimes == static void dofwd(register struct parse *p, sopno pos, sop value); 159558f0484fSRodney W. Grimes */ 159658f0484fSRodney W. Grimes static void 159758f0484fSRodney W. Grimes dofwd(p, pos, value) 159858f0484fSRodney W. Grimes register struct parse *p; 159958f0484fSRodney W. Grimes register sopno pos; 160058f0484fSRodney W. Grimes sop value; 160158f0484fSRodney W. Grimes { 160258f0484fSRodney W. Grimes /* avoid making error situations worse */ 160358f0484fSRodney W. Grimes if (p->error != 0) 160458f0484fSRodney W. Grimes return; 160558f0484fSRodney W. Grimes 160658f0484fSRodney W. Grimes assert(value < 1<<OPSHIFT); 160758f0484fSRodney W. Grimes p->strip[pos] = OP(p->strip[pos]) | value; 160858f0484fSRodney W. Grimes } 160958f0484fSRodney W. Grimes 161058f0484fSRodney W. Grimes /* 161158f0484fSRodney W. Grimes - enlarge - enlarge the strip 161258f0484fSRodney W. Grimes == static void enlarge(register struct parse *p, sopno size); 161358f0484fSRodney W. Grimes */ 161458f0484fSRodney W. Grimes static void 161558f0484fSRodney W. Grimes enlarge(p, size) 161658f0484fSRodney W. Grimes register struct parse *p; 161758f0484fSRodney W. Grimes register sopno size; 161858f0484fSRodney W. Grimes { 161958f0484fSRodney W. Grimes register sop *sp; 162058f0484fSRodney W. Grimes 162158f0484fSRodney W. Grimes if (p->ssize >= size) 162258f0484fSRodney W. Grimes return; 162358f0484fSRodney W. Grimes 162458f0484fSRodney W. Grimes sp = (sop *)realloc(p->strip, size*sizeof(sop)); 162558f0484fSRodney W. Grimes if (sp == NULL) { 162658f0484fSRodney W. Grimes SETERROR(REG_ESPACE); 162758f0484fSRodney W. Grimes return; 162858f0484fSRodney W. Grimes } 162958f0484fSRodney W. Grimes p->strip = sp; 163058f0484fSRodney W. Grimes p->ssize = size; 163158f0484fSRodney W. Grimes } 163258f0484fSRodney W. Grimes 163358f0484fSRodney W. Grimes /* 163458f0484fSRodney W. Grimes - stripsnug - compact the strip 163558f0484fSRodney W. Grimes == static void stripsnug(register struct parse *p, register struct re_guts *g); 163658f0484fSRodney W. Grimes */ 163758f0484fSRodney W. Grimes static void 163858f0484fSRodney W. Grimes stripsnug(p, g) 163958f0484fSRodney W. Grimes register struct parse *p; 164058f0484fSRodney W. Grimes register struct re_guts *g; 164158f0484fSRodney W. Grimes { 164258f0484fSRodney W. Grimes g->nstates = p->slen; 164358f0484fSRodney W. Grimes g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop)); 164458f0484fSRodney W. Grimes if (g->strip == NULL) { 164558f0484fSRodney W. Grimes SETERROR(REG_ESPACE); 164658f0484fSRodney W. Grimes g->strip = p->strip; 164758f0484fSRodney W. Grimes } 164858f0484fSRodney W. Grimes } 164958f0484fSRodney W. Grimes 165058f0484fSRodney W. Grimes /* 165158f0484fSRodney W. Grimes - findmust - fill in must and mlen with longest mandatory literal string 165258f0484fSRodney W. Grimes == static void findmust(register struct parse *p, register struct re_guts *g); 165358f0484fSRodney W. Grimes * 165458f0484fSRodney W. Grimes * This algorithm could do fancy things like analyzing the operands of | 165558f0484fSRodney W. Grimes * for common subsequences. Someday. This code is simple and finds most 165658f0484fSRodney W. Grimes * of the interesting cases. 165758f0484fSRodney W. Grimes * 165858f0484fSRodney W. Grimes * Note that must and mlen got initialized during setup. 165958f0484fSRodney W. Grimes */ 166058f0484fSRodney W. Grimes static void 166158f0484fSRodney W. Grimes findmust(p, g) 166258f0484fSRodney W. Grimes struct parse *p; 166358f0484fSRodney W. Grimes register struct re_guts *g; 166458f0484fSRodney W. Grimes { 166558f0484fSRodney W. Grimes register sop *scan; 166658f0484fSRodney W. Grimes sop *start; 166758f0484fSRodney W. Grimes register sop *newstart; 166858f0484fSRodney W. Grimes register sopno newlen; 166958f0484fSRodney W. Grimes register sop s; 167058f0484fSRodney W. Grimes register char *cp; 167158f0484fSRodney W. Grimes register sopno i; 167258f0484fSRodney W. Grimes 167358f0484fSRodney W. Grimes /* avoid making error situations worse */ 167458f0484fSRodney W. Grimes if (p->error != 0) 167558f0484fSRodney W. Grimes return; 167658f0484fSRodney W. Grimes 167758f0484fSRodney W. Grimes /* find the longest OCHAR sequence in strip */ 167858f0484fSRodney W. Grimes newlen = 0; 167958f0484fSRodney W. Grimes scan = g->strip + 1; 168058f0484fSRodney W. Grimes do { 168158f0484fSRodney W. Grimes s = *scan++; 168258f0484fSRodney W. Grimes switch (OP(s)) { 168358f0484fSRodney W. Grimes case OCHAR: /* sequence member */ 168458f0484fSRodney W. Grimes if (newlen == 0) /* new sequence */ 168558f0484fSRodney W. Grimes newstart = scan - 1; 168658f0484fSRodney W. Grimes newlen++; 168758f0484fSRodney W. Grimes break; 168858f0484fSRodney W. Grimes case OPLUS_: /* things that don't break one */ 168958f0484fSRodney W. Grimes case OLPAREN: 169058f0484fSRodney W. Grimes case ORPAREN: 169158f0484fSRodney W. Grimes break; 169258f0484fSRodney W. Grimes case OQUEST_: /* things that must be skipped */ 169358f0484fSRodney W. Grimes case OCH_: 169458f0484fSRodney W. Grimes scan--; 169558f0484fSRodney W. Grimes do { 169658f0484fSRodney W. Grimes scan += OPND(s); 169758f0484fSRodney W. Grimes s = *scan; 169858f0484fSRodney W. Grimes /* assert() interferes w debug printouts */ 169958f0484fSRodney W. Grimes if (OP(s) != O_QUEST && OP(s) != O_CH && 170058f0484fSRodney W. Grimes OP(s) != OOR2) { 170158f0484fSRodney W. Grimes g->iflags |= BAD; 170258f0484fSRodney W. Grimes return; 170358f0484fSRodney W. Grimes } 170458f0484fSRodney W. Grimes } while (OP(s) != O_QUEST && OP(s) != O_CH); 170558f0484fSRodney W. Grimes /* fallthrough */ 170658f0484fSRodney W. Grimes default: /* things that break a sequence */ 170758f0484fSRodney W. Grimes if (newlen > g->mlen) { /* ends one */ 170858f0484fSRodney W. Grimes start = newstart; 170958f0484fSRodney W. Grimes g->mlen = newlen; 171058f0484fSRodney W. Grimes } 171158f0484fSRodney W. Grimes newlen = 0; 171258f0484fSRodney W. Grimes break; 171358f0484fSRodney W. Grimes } 171458f0484fSRodney W. Grimes } while (OP(s) != OEND); 171558f0484fSRodney W. Grimes 171658f0484fSRodney W. Grimes if (g->mlen == 0) /* there isn't one */ 171758f0484fSRodney W. Grimes return; 171858f0484fSRodney W. Grimes 171958f0484fSRodney W. Grimes /* turn it into a character string */ 172058f0484fSRodney W. Grimes g->must = malloc((size_t)g->mlen + 1); 172158f0484fSRodney W. Grimes if (g->must == NULL) { /* argh; just forget it */ 172258f0484fSRodney W. Grimes g->mlen = 0; 172358f0484fSRodney W. Grimes return; 172458f0484fSRodney W. Grimes } 172558f0484fSRodney W. Grimes cp = g->must; 172658f0484fSRodney W. Grimes scan = start; 172758f0484fSRodney W. Grimes for (i = g->mlen; i > 0; i--) { 172858f0484fSRodney W. Grimes while (OP(s = *scan++) != OCHAR) 172958f0484fSRodney W. Grimes continue; 173058f0484fSRodney W. Grimes assert(cp < g->must + g->mlen); 173158f0484fSRodney W. Grimes *cp++ = (char)OPND(s); 173258f0484fSRodney W. Grimes } 173358f0484fSRodney W. Grimes assert(cp == g->must + g->mlen); 173458f0484fSRodney W. Grimes *cp++ = '\0'; /* just on general principles */ 173558f0484fSRodney W. Grimes } 173658f0484fSRodney W. Grimes 173758f0484fSRodney W. Grimes /* 173858f0484fSRodney W. Grimes - pluscount - count + nesting 173958f0484fSRodney W. Grimes == static sopno pluscount(register struct parse *p, register struct re_guts *g); 174058f0484fSRodney W. Grimes */ 174158f0484fSRodney W. Grimes static sopno /* nesting depth */ 174258f0484fSRodney W. Grimes pluscount(p, g) 174358f0484fSRodney W. Grimes struct parse *p; 174458f0484fSRodney W. Grimes register struct re_guts *g; 174558f0484fSRodney W. Grimes { 174658f0484fSRodney W. Grimes register sop *scan; 174758f0484fSRodney W. Grimes register sop s; 174858f0484fSRodney W. Grimes register sopno plusnest = 0; 174958f0484fSRodney W. Grimes register sopno maxnest = 0; 175058f0484fSRodney W. Grimes 175158f0484fSRodney W. Grimes if (p->error != 0) 175258f0484fSRodney W. Grimes return(0); /* there may not be an OEND */ 175358f0484fSRodney W. Grimes 175458f0484fSRodney W. Grimes scan = g->strip + 1; 175558f0484fSRodney W. Grimes do { 175658f0484fSRodney W. Grimes s = *scan++; 175758f0484fSRodney W. Grimes switch (OP(s)) { 175858f0484fSRodney W. Grimes case OPLUS_: 175958f0484fSRodney W. Grimes plusnest++; 176058f0484fSRodney W. Grimes break; 176158f0484fSRodney W. Grimes case O_PLUS: 176258f0484fSRodney W. Grimes if (plusnest > maxnest) 176358f0484fSRodney W. Grimes maxnest = plusnest; 176458f0484fSRodney W. Grimes plusnest--; 176558f0484fSRodney W. Grimes break; 176658f0484fSRodney W. Grimes } 176758f0484fSRodney W. Grimes } while (OP(s) != OEND); 176858f0484fSRodney W. Grimes if (plusnest != 0) 176958f0484fSRodney W. Grimes g->iflags |= BAD; 177058f0484fSRodney W. Grimes return(maxnest); 177158f0484fSRodney W. Grimes } 1772