10b57cec5SDimitry Andric/*- 20b57cec5SDimitry Andric * This code is derived from OpenBSD's libc/regex, original license follows: 30b57cec5SDimitry Andric * 40b57cec5SDimitry Andric * Copyright (c) 1992, 1993, 1994 Henry Spencer. 50b57cec5SDimitry Andric * Copyright (c) 1992, 1993, 1994 60b57cec5SDimitry Andric * The Regents of the University of California. All rights reserved. 70b57cec5SDimitry Andric * 80b57cec5SDimitry Andric * This code is derived from software contributed to Berkeley by 90b57cec5SDimitry Andric * Henry Spencer. 100b57cec5SDimitry Andric * 110b57cec5SDimitry Andric * Redistribution and use in source and binary forms, with or without 120b57cec5SDimitry Andric * modification, are permitted provided that the following conditions 130b57cec5SDimitry Andric * are met: 140b57cec5SDimitry Andric * 1. Redistributions of source code must retain the above copyright 150b57cec5SDimitry Andric * notice, this list of conditions and the following disclaimer. 160b57cec5SDimitry Andric * 2. Redistributions in binary form must reproduce the above copyright 170b57cec5SDimitry Andric * notice, this list of conditions and the following disclaimer in the 180b57cec5SDimitry Andric * documentation and/or other materials provided with the distribution. 190b57cec5SDimitry Andric * 3. Neither the name of the University nor the names of its contributors 200b57cec5SDimitry Andric * may be used to endorse or promote products derived from this software 210b57cec5SDimitry Andric * without specific prior written permission. 220b57cec5SDimitry Andric * 230b57cec5SDimitry Andric * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 240b57cec5SDimitry Andric * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 250b57cec5SDimitry Andric * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 260b57cec5SDimitry Andric * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 270b57cec5SDimitry Andric * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 280b57cec5SDimitry Andric * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 290b57cec5SDimitry Andric * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 300b57cec5SDimitry Andric * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 310b57cec5SDimitry Andric * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 320b57cec5SDimitry Andric * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 330b57cec5SDimitry Andric * SUCH DAMAGE. 340b57cec5SDimitry Andric * 350b57cec5SDimitry Andric * @(#)engine.c 8.5 (Berkeley) 3/20/94 360b57cec5SDimitry Andric */ 370b57cec5SDimitry Andric 380b57cec5SDimitry Andric/* 390b57cec5SDimitry Andric * The matching engine and friends. This file is #included by regexec.c 400b57cec5SDimitry Andric * after suitable #defines of a variety of macros used herein, so that 410b57cec5SDimitry Andric * different state representations can be used without duplicating masses 420b57cec5SDimitry Andric * of code. 430b57cec5SDimitry Andric */ 440b57cec5SDimitry Andric 450b57cec5SDimitry Andric#ifdef SNAMES 460b57cec5SDimitry Andric#define matcher smatcher 470b57cec5SDimitry Andric#define fast sfast 480b57cec5SDimitry Andric#define slow sslow 490b57cec5SDimitry Andric#define dissect sdissect 500b57cec5SDimitry Andric#define backref sbackref 510b57cec5SDimitry Andric#define step sstep 520b57cec5SDimitry Andric#define print sprint 530b57cec5SDimitry Andric#define at sat 540b57cec5SDimitry Andric#define match smat 550b57cec5SDimitry Andric#define nope snope 5681ad6265SDimitry Andric#define step_back sstep_back 570b57cec5SDimitry Andric#endif 580b57cec5SDimitry Andric#ifdef LNAMES 590b57cec5SDimitry Andric#define matcher lmatcher 600b57cec5SDimitry Andric#define fast lfast 610b57cec5SDimitry Andric#define slow lslow 620b57cec5SDimitry Andric#define dissect ldissect 630b57cec5SDimitry Andric#define backref lbackref 640b57cec5SDimitry Andric#define step lstep 650b57cec5SDimitry Andric#define print lprint 660b57cec5SDimitry Andric#define at lat 670b57cec5SDimitry Andric#define match lmat 680b57cec5SDimitry Andric#define nope lnope 6981ad6265SDimitry Andric#define step_back lstep_back 700b57cec5SDimitry Andric#endif 710b57cec5SDimitry Andric 720b57cec5SDimitry Andric/* another structure passed up and down to avoid zillions of parameters */ 730b57cec5SDimitry Andricstruct match { 740b57cec5SDimitry Andric struct re_guts *g; 750b57cec5SDimitry Andric int eflags; 760b57cec5SDimitry Andric llvm_regmatch_t *pmatch; /* [nsub+1] (0 element unused) */ 770b57cec5SDimitry Andric const char *offp; /* offsets work from here */ 780b57cec5SDimitry Andric const char *beginp; /* start of string -- virtual NUL precedes */ 790b57cec5SDimitry Andric const char *endp; /* end of string -- virtual NUL here */ 800b57cec5SDimitry Andric const char *coldp; /* can be no match starting before here */ 810b57cec5SDimitry Andric const char **lastpos; /* [nplus+1] */ 820b57cec5SDimitry Andric STATEVARS; 830b57cec5SDimitry Andric states st; /* current states */ 840b57cec5SDimitry Andric states fresh; /* states for a fresh start */ 850b57cec5SDimitry Andric states tmp; /* temporary */ 860b57cec5SDimitry Andric states empty; /* empty set of states */ 870b57cec5SDimitry Andric}; 880b57cec5SDimitry Andric 890b57cec5SDimitry Andricstatic int matcher(struct re_guts *, const char *, size_t, 900b57cec5SDimitry Andric llvm_regmatch_t[], int); 910b57cec5SDimitry Andricstatic const char *dissect(struct match *, const char *, const char *, sopno, 920b57cec5SDimitry Andric sopno); 930b57cec5SDimitry Andricstatic const char *backref(struct match *, const char *, const char *, sopno, 940b57cec5SDimitry Andric sopno, sopno, int); 950b57cec5SDimitry Andricstatic const char *fast(struct match *, const char *, const char *, sopno, sopno); 960b57cec5SDimitry Andricstatic const char *slow(struct match *, const char *, const char *, sopno, sopno); 970b57cec5SDimitry Andricstatic states step(struct re_guts *, sopno, sopno, states, int, states); 980b57cec5SDimitry Andric#define MAX_RECURSION 100 990b57cec5SDimitry Andric#define BOL (OUT+1) 1000b57cec5SDimitry Andric#define EOL (BOL+1) 1010b57cec5SDimitry Andric#define BOLEOL (BOL+2) 1020b57cec5SDimitry Andric#define NOTHING (BOL+3) 1030b57cec5SDimitry Andric#define BOW (BOL+4) 1040b57cec5SDimitry Andric#define EOW (BOL+5) 1050b57cec5SDimitry Andric#define CODEMAX (BOL+5) /* highest code used */ 1060b57cec5SDimitry Andric#define NONCHAR(c) ((c) > CHAR_MAX) 1070b57cec5SDimitry Andric#define NNONCHAR (CODEMAX-CHAR_MAX) 1080b57cec5SDimitry Andric#ifdef REDEBUG 109*bdd1243dSDimitry Andricstatic void print(struct match *, const char *, states, int, FILE *); 1100b57cec5SDimitry Andric#endif 1110b57cec5SDimitry Andric#ifdef REDEBUG 112*bdd1243dSDimitry Andricstatic void at( 113*bdd1243dSDimitry Andric struct match *, const char *, const char *, const char *, sopno, sopno); 1140b57cec5SDimitry Andric#endif 1150b57cec5SDimitry Andric#ifdef REDEBUG 1160b57cec5SDimitry Andricstatic char *pchar(int); 1170b57cec5SDimitry Andric#endif 1180b57cec5SDimitry Andric 1190b57cec5SDimitry Andric#ifdef REDEBUG 1200b57cec5SDimitry Andric#define SP(t, s, c) print(m, t, s, c, stdout) 1210b57cec5SDimitry Andric#define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2) 1220b57cec5SDimitry Andric#define NOTE(str) { if (m->eflags®_TRACE) (void)printf("=%s\n", (str)); } 1230b57cec5SDimitry Andricstatic int nope = 0; 1240b57cec5SDimitry Andric#else 1250b57cec5SDimitry Andric#define SP(t, s, c) /* nothing */ 1260b57cec5SDimitry Andric#define AT(t, p1, p2, s1, s2) /* nothing */ 1270b57cec5SDimitry Andric#define NOTE(s) /* nothing */ 1280b57cec5SDimitry Andric#endif 1290b57cec5SDimitry Andric 1300b57cec5SDimitry Andric/* 1310b57cec5SDimitry Andric - matcher - the actual matching engine 1320b57cec5SDimitry Andric */ 1330b57cec5SDimitry Andricstatic int /* 0 success, REG_NOMATCH failure */ 1340b57cec5SDimitry Andricmatcher(struct re_guts *g, const char *string, size_t nmatch, 1350b57cec5SDimitry Andric llvm_regmatch_t pmatch[], 1360b57cec5SDimitry Andric int eflags) 1370b57cec5SDimitry Andric{ 1380b57cec5SDimitry Andric const char *endp; 1390b57cec5SDimitry Andric size_t i; 1400b57cec5SDimitry Andric struct match mv; 1410b57cec5SDimitry Andric struct match *m = &mv; 1420b57cec5SDimitry Andric const char *dp; 1430b57cec5SDimitry Andric const sopno gf = g->firststate+1; /* +1 for OEND */ 1440b57cec5SDimitry Andric const sopno gl = g->laststate; 1450b57cec5SDimitry Andric const char *start; 1460b57cec5SDimitry Andric const char *stop; 1470b57cec5SDimitry Andric 1480b57cec5SDimitry Andric /* simplify the situation where possible */ 1490b57cec5SDimitry Andric if (g->cflags®_NOSUB) 1500b57cec5SDimitry Andric nmatch = 0; 1510b57cec5SDimitry Andric if (eflags®_STARTEND) { 1520b57cec5SDimitry Andric start = string + pmatch[0].rm_so; 1530b57cec5SDimitry Andric stop = string + pmatch[0].rm_eo; 1540b57cec5SDimitry Andric } else { 1550b57cec5SDimitry Andric start = string; 1560b57cec5SDimitry Andric stop = start + strlen(start); 1570b57cec5SDimitry Andric } 1580b57cec5SDimitry Andric if (stop < start) 1590b57cec5SDimitry Andric return(REG_INVARG); 1600b57cec5SDimitry Andric 1610b57cec5SDimitry Andric /* prescreening; this does wonders for this rather slow code */ 1620b57cec5SDimitry Andric if (g->must != NULL) { 1630b57cec5SDimitry Andric for (dp = start; dp < stop; dp++) 1640b57cec5SDimitry Andric if (*dp == g->must[0] && stop - dp >= g->mlen && 1650b57cec5SDimitry Andric memcmp(dp, g->must, (size_t)g->mlen) == 0) 1660b57cec5SDimitry Andric break; 1670b57cec5SDimitry Andric if (dp == stop) /* we didn't find g->must */ 1680b57cec5SDimitry Andric return(REG_NOMATCH); 1690b57cec5SDimitry Andric } 1700b57cec5SDimitry Andric 1710b57cec5SDimitry Andric /* match struct setup */ 1720b57cec5SDimitry Andric m->g = g; 1730b57cec5SDimitry Andric m->eflags = eflags; 1740b57cec5SDimitry Andric m->pmatch = NULL; 1750b57cec5SDimitry Andric m->lastpos = NULL; 1760b57cec5SDimitry Andric m->offp = string; 1770b57cec5SDimitry Andric m->beginp = start; 1780b57cec5SDimitry Andric m->endp = stop; 1790b57cec5SDimitry Andric STATESETUP(m, 4); 1800b57cec5SDimitry Andric SETUP(m->st); 1810b57cec5SDimitry Andric SETUP(m->fresh); 1820b57cec5SDimitry Andric SETUP(m->tmp); 1830b57cec5SDimitry Andric SETUP(m->empty); 1840b57cec5SDimitry Andric CLEAR(m->empty); 1850b57cec5SDimitry Andric 1860b57cec5SDimitry Andric /* this loop does only one repetition except for backrefs */ 1870b57cec5SDimitry Andric for (;;) { 1880b57cec5SDimitry Andric endp = fast(m, start, stop, gf, gl); 1890b57cec5SDimitry Andric if (endp == NULL) { /* a miss */ 1900b57cec5SDimitry Andric free(m->pmatch); 1910b57cec5SDimitry Andric free((void*)m->lastpos); 1920b57cec5SDimitry Andric STATETEARDOWN(m); 1930b57cec5SDimitry Andric return(REG_NOMATCH); 1940b57cec5SDimitry Andric } 1950b57cec5SDimitry Andric if (nmatch == 0 && !g->backrefs) 1960b57cec5SDimitry Andric break; /* no further info needed */ 1970b57cec5SDimitry Andric 1980b57cec5SDimitry Andric /* where? */ 1990b57cec5SDimitry Andric assert(m->coldp != NULL); 2000b57cec5SDimitry Andric for (;;) { 2010b57cec5SDimitry Andric NOTE("finding start"); 2020b57cec5SDimitry Andric endp = slow(m, m->coldp, stop, gf, gl); 2030b57cec5SDimitry Andric if (endp != NULL) 2040b57cec5SDimitry Andric break; 2050b57cec5SDimitry Andric assert(m->coldp < m->endp); 2060b57cec5SDimitry Andric m->coldp++; 2070b57cec5SDimitry Andric } 2080b57cec5SDimitry Andric if (nmatch == 1 && !g->backrefs) 2090b57cec5SDimitry Andric break; /* no further info needed */ 2100b57cec5SDimitry Andric 2110b57cec5SDimitry Andric /* oh my, they want the subexpressions... */ 2120b57cec5SDimitry Andric if (m->pmatch == NULL) 2130b57cec5SDimitry Andric m->pmatch = (llvm_regmatch_t *)malloc((m->g->nsub + 1) * 2140b57cec5SDimitry Andric sizeof(llvm_regmatch_t)); 2150b57cec5SDimitry Andric if (m->pmatch == NULL) { 2160b57cec5SDimitry Andric STATETEARDOWN(m); 2170b57cec5SDimitry Andric return(REG_ESPACE); 2180b57cec5SDimitry Andric } 2190b57cec5SDimitry Andric for (i = 1; i <= m->g->nsub; i++) 2200b57cec5SDimitry Andric m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; 2210b57cec5SDimitry Andric if (!g->backrefs && !(m->eflags®_BACKR)) { 2220b57cec5SDimitry Andric NOTE("dissecting"); 2230b57cec5SDimitry Andric dp = dissect(m, m->coldp, endp, gf, gl); 2240b57cec5SDimitry Andric } else { 2250b57cec5SDimitry Andric if (g->nplus > 0 && m->lastpos == NULL) 2260b57cec5SDimitry Andric m->lastpos = (const char **)malloc((g->nplus+1) * 2270b57cec5SDimitry Andric sizeof(char *)); 2280b57cec5SDimitry Andric if (g->nplus > 0 && m->lastpos == NULL) { 2290b57cec5SDimitry Andric free(m->pmatch); 2300b57cec5SDimitry Andric STATETEARDOWN(m); 2310b57cec5SDimitry Andric return(REG_ESPACE); 2320b57cec5SDimitry Andric } 2330b57cec5SDimitry Andric NOTE("backref dissect"); 2340b57cec5SDimitry Andric dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); 2350b57cec5SDimitry Andric } 2360b57cec5SDimitry Andric if (dp != NULL) 2370b57cec5SDimitry Andric break; 2380b57cec5SDimitry Andric 2390b57cec5SDimitry Andric /* uh-oh... we couldn't find a subexpression-level match */ 2400b57cec5SDimitry Andric assert(g->backrefs); /* must be back references doing it */ 2410b57cec5SDimitry Andric assert(g->nplus == 0 || m->lastpos != NULL); 2420b57cec5SDimitry Andric for (;;) { 2430b57cec5SDimitry Andric if (dp != NULL || endp <= m->coldp) 2440b57cec5SDimitry Andric break; /* defeat */ 2450b57cec5SDimitry Andric NOTE("backoff"); 2460b57cec5SDimitry Andric endp = slow(m, m->coldp, endp-1, gf, gl); 2470b57cec5SDimitry Andric if (endp == NULL) 2480b57cec5SDimitry Andric break; /* defeat */ 2490b57cec5SDimitry Andric /* try it on a shorter possibility */ 2500b57cec5SDimitry Andric#ifndef NDEBUG 2510b57cec5SDimitry Andric for (i = 1; i <= m->g->nsub; i++) { 2520b57cec5SDimitry Andric assert(m->pmatch[i].rm_so == -1); 2530b57cec5SDimitry Andric assert(m->pmatch[i].rm_eo == -1); 2540b57cec5SDimitry Andric } 2550b57cec5SDimitry Andric#endif 2560b57cec5SDimitry Andric NOTE("backoff dissect"); 2570b57cec5SDimitry Andric dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); 2580b57cec5SDimitry Andric } 2590b57cec5SDimitry Andric assert(dp == NULL || dp == endp); 2600b57cec5SDimitry Andric if (dp != NULL) /* found a shorter one */ 2610b57cec5SDimitry Andric break; 2620b57cec5SDimitry Andric 2630b57cec5SDimitry Andric /* despite initial appearances, there is no match here */ 2640b57cec5SDimitry Andric NOTE("false alarm"); 2650b57cec5SDimitry Andric if (m->coldp == stop) 2660b57cec5SDimitry Andric break; 2670b57cec5SDimitry Andric start = m->coldp + 1; /* recycle starting later */ 2680b57cec5SDimitry Andric } 2690b57cec5SDimitry Andric 2700b57cec5SDimitry Andric /* fill in the details if requested */ 2710b57cec5SDimitry Andric if (nmatch > 0) { 2720b57cec5SDimitry Andric pmatch[0].rm_so = m->coldp - m->offp; 2730b57cec5SDimitry Andric pmatch[0].rm_eo = endp - m->offp; 2740b57cec5SDimitry Andric } 2750b57cec5SDimitry Andric if (nmatch > 1) { 2760b57cec5SDimitry Andric assert(m->pmatch != NULL); 2770b57cec5SDimitry Andric for (i = 1; i < nmatch; i++) 2780b57cec5SDimitry Andric if (i <= m->g->nsub) 2790b57cec5SDimitry Andric pmatch[i] = m->pmatch[i]; 2800b57cec5SDimitry Andric else { 2810b57cec5SDimitry Andric pmatch[i].rm_so = -1; 2820b57cec5SDimitry Andric pmatch[i].rm_eo = -1; 2830b57cec5SDimitry Andric } 2840b57cec5SDimitry Andric } 2850b57cec5SDimitry Andric 2860b57cec5SDimitry Andric if (m->pmatch != NULL) 2870b57cec5SDimitry Andric free((char *)m->pmatch); 2880b57cec5SDimitry Andric if (m->lastpos != NULL) 2890b57cec5SDimitry Andric free((char *)m->lastpos); 2900b57cec5SDimitry Andric STATETEARDOWN(m); 2910b57cec5SDimitry Andric return(0); 2920b57cec5SDimitry Andric} 2930b57cec5SDimitry Andric 29481ad6265SDimitry Andric/* Step back from "stop" to a position where the strip startst..stopst might 29581ad6265SDimitry Andric * match. This can always conservatively return "stop - 1", but may return an 29681ad6265SDimitry Andric * earlier position if matches at later positions are impossible. */ 29781ad6265SDimitry Andricstatic const char * 29881ad6265SDimitry Andricstep_back(struct re_guts *g, const char *start, const char *stop, sopno startst, 29981ad6265SDimitry Andric sopno stopst) 30081ad6265SDimitry Andric{ 30181ad6265SDimitry Andric /* Always step back at least one character. */ 30281ad6265SDimitry Andric assert(stop > start); 30381ad6265SDimitry Andric const char *res = stop - 1; 30481ad6265SDimitry Andric 30581ad6265SDimitry Andric /* Check whether the strip startst..stropst starts with a fixed character, 30681ad6265SDimitry Andric * ignoring any closing parentheses. If not, return a conservative result. */ 30781ad6265SDimitry Andric for (;;) { 30881ad6265SDimitry Andric if (startst >= stopst) 30981ad6265SDimitry Andric return res; 31081ad6265SDimitry Andric if (OP(g->strip[startst]) != ORPAREN) 31181ad6265SDimitry Andric break; 31281ad6265SDimitry Andric startst++; 31381ad6265SDimitry Andric } 31481ad6265SDimitry Andric if (OP(g->strip[startst]) != OCHAR) 31581ad6265SDimitry Andric return res; 31681ad6265SDimitry Andric 31781ad6265SDimitry Andric /* Find the character that starts the following match. */ 31881ad6265SDimitry Andric char ch = OPND(g->strip[startst]); 31981ad6265SDimitry Andric for (; res != start; --res) { 320*bdd1243dSDimitry Andric if (*res == ch) { 321*bdd1243dSDimitry Andric /* Try to check the next fixed character as well. */ 322*bdd1243dSDimitry Andric sopno nextst = startst + 1; 323*bdd1243dSDimitry Andric const char *next = res + 1; 324*bdd1243dSDimitry Andric if (nextst >= stopst || OP(g->strip[nextst]) != OCHAR || next >= stop || 325*bdd1243dSDimitry Andric *next == (char)OPND(g->strip[nextst])) 32681ad6265SDimitry Andric break; 32781ad6265SDimitry Andric } 328*bdd1243dSDimitry Andric } 32981ad6265SDimitry Andric return res; 33081ad6265SDimitry Andric} 33181ad6265SDimitry Andric 3320b57cec5SDimitry Andric/* 3330b57cec5SDimitry Andric - dissect - figure out what matched what, no back references 3340b57cec5SDimitry Andric */ 3350b57cec5SDimitry Andricstatic const char * /* == stop (success) always */ 3360b57cec5SDimitry Andricdissect(struct match *m, const char *start, const char *stop, sopno startst, 3370b57cec5SDimitry Andric sopno stopst) 3380b57cec5SDimitry Andric{ 3390b57cec5SDimitry Andric int i; 3400b57cec5SDimitry Andric sopno ss; /* start sop of current subRE */ 3410b57cec5SDimitry Andric sopno es; /* end sop of current subRE */ 3420b57cec5SDimitry Andric const char *sp; /* start of string matched by it */ 3430b57cec5SDimitry Andric const char *stp; /* string matched by it cannot pass here */ 3440b57cec5SDimitry Andric const char *rest; /* start of rest of string */ 3450b57cec5SDimitry Andric const char *tail; /* string unmatched by rest of RE */ 3460b57cec5SDimitry Andric sopno ssub; /* start sop of subsubRE */ 3470b57cec5SDimitry Andric sopno esub; /* end sop of subsubRE */ 3480b57cec5SDimitry Andric const char *ssp; /* start of string matched by subsubRE */ 3490b57cec5SDimitry Andric const char *sep; /* end of string matched by subsubRE */ 3500b57cec5SDimitry Andric const char *oldssp; /* previous ssp */ 3510b57cec5SDimitry Andric 3520b57cec5SDimitry Andric AT("diss", start, stop, startst, stopst); 3530b57cec5SDimitry Andric sp = start; 3540b57cec5SDimitry Andric for (ss = startst; ss < stopst; ss = es) { 3550b57cec5SDimitry Andric /* identify end of subRE */ 3560b57cec5SDimitry Andric es = ss; 3570b57cec5SDimitry Andric switch (OP(m->g->strip[es])) { 3580b57cec5SDimitry Andric case OPLUS_: 3590b57cec5SDimitry Andric case OQUEST_: 3600b57cec5SDimitry Andric es += OPND(m->g->strip[es]); 3610b57cec5SDimitry Andric break; 3620b57cec5SDimitry Andric case OCH_: 3630b57cec5SDimitry Andric while (OP(m->g->strip[es]) != O_CH) 3640b57cec5SDimitry Andric es += OPND(m->g->strip[es]); 3650b57cec5SDimitry Andric break; 3660b57cec5SDimitry Andric } 3670b57cec5SDimitry Andric es++; 3680b57cec5SDimitry Andric 3690b57cec5SDimitry Andric /* figure out what it matched */ 3700b57cec5SDimitry Andric switch (OP(m->g->strip[ss])) { 3710b57cec5SDimitry Andric case OEND: 3720b57cec5SDimitry Andric assert(nope); 3730b57cec5SDimitry Andric break; 3740b57cec5SDimitry Andric case OCHAR: 3750b57cec5SDimitry Andric sp++; 3760b57cec5SDimitry Andric break; 3770b57cec5SDimitry Andric case OBOL: 3780b57cec5SDimitry Andric case OEOL: 3790b57cec5SDimitry Andric case OBOW: 3800b57cec5SDimitry Andric case OEOW: 3810b57cec5SDimitry Andric break; 3820b57cec5SDimitry Andric case OANY: 3830b57cec5SDimitry Andric case OANYOF: 3840b57cec5SDimitry Andric sp++; 3850b57cec5SDimitry Andric break; 3860b57cec5SDimitry Andric case OBACK_: 3870b57cec5SDimitry Andric case O_BACK: 3880b57cec5SDimitry Andric assert(nope); 3890b57cec5SDimitry Andric break; 3900b57cec5SDimitry Andric /* cases where length of match is hard to find */ 3910b57cec5SDimitry Andric case OQUEST_: 3920b57cec5SDimitry Andric stp = stop; 3930b57cec5SDimitry Andric for (;;) { 3940b57cec5SDimitry Andric /* how long could this one be? */ 3950b57cec5SDimitry Andric rest = slow(m, sp, stp, ss, es); 3960b57cec5SDimitry Andric assert(rest != NULL); /* it did match */ 3970b57cec5SDimitry Andric /* could the rest match the rest? */ 3980b57cec5SDimitry Andric tail = slow(m, rest, stop, es, stopst); 3990b57cec5SDimitry Andric if (tail == stop) 4000b57cec5SDimitry Andric break; /* yes! */ 4010b57cec5SDimitry Andric /* no -- try a shorter match for this one */ 40281ad6265SDimitry Andric stp = step_back(m->g, sp, rest, es, stopst); 4030b57cec5SDimitry Andric assert(stp >= sp); /* it did work */ 4040b57cec5SDimitry Andric } 4050b57cec5SDimitry Andric ssub = ss + 1; 4060b57cec5SDimitry Andric esub = es - 1; 4070b57cec5SDimitry Andric /* did innards match? */ 4080b57cec5SDimitry Andric if (slow(m, sp, rest, ssub, esub) != NULL) { 4090b57cec5SDimitry Andric const char *dp = dissect(m, sp, rest, ssub, esub); 4100b57cec5SDimitry Andric (void)dp; /* avoid warning if assertions off */ 4110b57cec5SDimitry Andric assert(dp == rest); 4120b57cec5SDimitry Andric } else /* no */ 4130b57cec5SDimitry Andric assert(sp == rest); 4140b57cec5SDimitry Andric sp = rest; 4150b57cec5SDimitry Andric break; 4160b57cec5SDimitry Andric case OPLUS_: 4170b57cec5SDimitry Andric stp = stop; 4180b57cec5SDimitry Andric for (;;) { 4190b57cec5SDimitry Andric /* how long could this one be? */ 4200b57cec5SDimitry Andric rest = slow(m, sp, stp, ss, es); 4210b57cec5SDimitry Andric assert(rest != NULL); /* it did match */ 4220b57cec5SDimitry Andric /* could the rest match the rest? */ 4230b57cec5SDimitry Andric tail = slow(m, rest, stop, es, stopst); 4240b57cec5SDimitry Andric if (tail == stop) 4250b57cec5SDimitry Andric break; /* yes! */ 4260b57cec5SDimitry Andric /* no -- try a shorter match for this one */ 42781ad6265SDimitry Andric stp = step_back(m->g, sp, rest, es, stopst); 4280b57cec5SDimitry Andric assert(stp >= sp); /* it did work */ 4290b57cec5SDimitry Andric } 4300b57cec5SDimitry Andric ssub = ss + 1; 4310b57cec5SDimitry Andric esub = es - 1; 4320b57cec5SDimitry Andric ssp = sp; 4330b57cec5SDimitry Andric oldssp = ssp; 4340b57cec5SDimitry Andric for (;;) { /* find last match of innards */ 4350b57cec5SDimitry Andric sep = slow(m, ssp, rest, ssub, esub); 4360b57cec5SDimitry Andric if (sep == NULL || sep == ssp) 4370b57cec5SDimitry Andric break; /* failed or matched null */ 4380b57cec5SDimitry Andric oldssp = ssp; /* on to next try */ 4390b57cec5SDimitry Andric ssp = sep; 4400b57cec5SDimitry Andric } 4410b57cec5SDimitry Andric if (sep == NULL) { 4420b57cec5SDimitry Andric /* last successful match */ 4430b57cec5SDimitry Andric sep = ssp; 4440b57cec5SDimitry Andric ssp = oldssp; 4450b57cec5SDimitry Andric } 4460b57cec5SDimitry Andric assert(sep == rest); /* must exhaust substring */ 4470b57cec5SDimitry Andric assert(slow(m, ssp, sep, ssub, esub) == rest); 4480b57cec5SDimitry Andric { 4490b57cec5SDimitry Andric const char *dp = dissect(m, ssp, sep, ssub, esub); 4500b57cec5SDimitry Andric (void)dp; /* avoid warning if assertions off */ 4510b57cec5SDimitry Andric assert(dp == sep); 4520b57cec5SDimitry Andric } 4530b57cec5SDimitry Andric sp = rest; 4540b57cec5SDimitry Andric break; 4550b57cec5SDimitry Andric case OCH_: 4560b57cec5SDimitry Andric stp = stop; 4570b57cec5SDimitry Andric for (;;) { 4580b57cec5SDimitry Andric /* how long could this one be? */ 4590b57cec5SDimitry Andric rest = slow(m, sp, stp, ss, es); 4600b57cec5SDimitry Andric assert(rest != NULL); /* it did match */ 4610b57cec5SDimitry Andric /* could the rest match the rest? */ 4620b57cec5SDimitry Andric tail = slow(m, rest, stop, es, stopst); 4630b57cec5SDimitry Andric if (tail == stop) 4640b57cec5SDimitry Andric break; /* yes! */ 4650b57cec5SDimitry Andric /* no -- try a shorter match for this one */ 4660b57cec5SDimitry Andric stp = rest - 1; 4670b57cec5SDimitry Andric assert(stp >= sp); /* it did work */ 4680b57cec5SDimitry Andric } 4690b57cec5SDimitry Andric ssub = ss + 1; 4700b57cec5SDimitry Andric esub = ss + OPND(m->g->strip[ss]) - 1; 4710b57cec5SDimitry Andric assert(OP(m->g->strip[esub]) == OOR1); 4720b57cec5SDimitry Andric for (;;) { /* find first matching branch */ 4730b57cec5SDimitry Andric if (slow(m, sp, rest, ssub, esub) == rest) 4740b57cec5SDimitry Andric break; /* it matched all of it */ 4750b57cec5SDimitry Andric /* that one missed, try next one */ 4760b57cec5SDimitry Andric assert(OP(m->g->strip[esub]) == OOR1); 4770b57cec5SDimitry Andric esub++; 4780b57cec5SDimitry Andric assert(OP(m->g->strip[esub]) == OOR2); 4790b57cec5SDimitry Andric ssub = esub + 1; 4800b57cec5SDimitry Andric esub += OPND(m->g->strip[esub]); 4810b57cec5SDimitry Andric if (OP(m->g->strip[esub]) == OOR2) 4820b57cec5SDimitry Andric esub--; 4830b57cec5SDimitry Andric else 4840b57cec5SDimitry Andric assert(OP(m->g->strip[esub]) == O_CH); 4850b57cec5SDimitry Andric } 4860b57cec5SDimitry Andric { 4870b57cec5SDimitry Andric const char *dp = dissect(m, sp, rest, ssub, esub); 4880b57cec5SDimitry Andric (void)dp; /* avoid warning if assertions off */ 4890b57cec5SDimitry Andric assert(dp == rest); 4900b57cec5SDimitry Andric } 4910b57cec5SDimitry Andric sp = rest; 4920b57cec5SDimitry Andric break; 4930b57cec5SDimitry Andric case O_PLUS: 4940b57cec5SDimitry Andric case O_QUEST: 4950b57cec5SDimitry Andric case OOR1: 4960b57cec5SDimitry Andric case OOR2: 4970b57cec5SDimitry Andric case O_CH: 4980b57cec5SDimitry Andric assert(nope); 4990b57cec5SDimitry Andric break; 5000b57cec5SDimitry Andric case OLPAREN: 5010b57cec5SDimitry Andric i = OPND(m->g->strip[ss]); 5020b57cec5SDimitry Andric assert(0 < i && i <= m->g->nsub); 5030b57cec5SDimitry Andric m->pmatch[i].rm_so = sp - m->offp; 5040b57cec5SDimitry Andric break; 5050b57cec5SDimitry Andric case ORPAREN: 5060b57cec5SDimitry Andric i = OPND(m->g->strip[ss]); 5070b57cec5SDimitry Andric assert(0 < i && i <= m->g->nsub); 5080b57cec5SDimitry Andric m->pmatch[i].rm_eo = sp - m->offp; 5090b57cec5SDimitry Andric break; 5100b57cec5SDimitry Andric default: /* uh oh */ 5110b57cec5SDimitry Andric assert(nope); 5120b57cec5SDimitry Andric break; 5130b57cec5SDimitry Andric } 5140b57cec5SDimitry Andric } 5150b57cec5SDimitry Andric 5160b57cec5SDimitry Andric assert(sp == stop); 5170b57cec5SDimitry Andric return(sp); 5180b57cec5SDimitry Andric} 5190b57cec5SDimitry Andric 5200b57cec5SDimitry Andric/* 5210b57cec5SDimitry Andric - backref - figure out what matched what, figuring in back references 5220b57cec5SDimitry Andric */ 5230b57cec5SDimitry Andricstatic const char * /* == stop (success) or NULL (failure) */ 5240b57cec5SDimitry Andricbackref(struct match *m, const char *start, const char *stop, sopno startst, 5250b57cec5SDimitry Andric sopno stopst, sopno lev, int rec) /* PLUS nesting level */ 5260b57cec5SDimitry Andric{ 5270b57cec5SDimitry Andric int i; 5280b57cec5SDimitry Andric sopno ss; /* start sop of current subRE */ 5290b57cec5SDimitry Andric const char *sp; /* start of string matched by it */ 5300b57cec5SDimitry Andric sopno ssub; /* start sop of subsubRE */ 5310b57cec5SDimitry Andric sopno esub; /* end sop of subsubRE */ 5320b57cec5SDimitry Andric const char *ssp; /* start of string matched by subsubRE */ 5330b57cec5SDimitry Andric const char *dp; 5340b57cec5SDimitry Andric size_t len; 5350b57cec5SDimitry Andric int hard; 5360b57cec5SDimitry Andric sop s; 5370b57cec5SDimitry Andric llvm_regoff_t offsave; 5380b57cec5SDimitry Andric cset *cs; 5390b57cec5SDimitry Andric 5400b57cec5SDimitry Andric AT("back", start, stop, startst, stopst); 5410b57cec5SDimitry Andric sp = start; 5420b57cec5SDimitry Andric 5430b57cec5SDimitry Andric /* get as far as we can with easy stuff */ 5440b57cec5SDimitry Andric hard = 0; 5450b57cec5SDimitry Andric for (ss = startst; !hard && ss < stopst; ss++) 5460b57cec5SDimitry Andric switch (OP(s = m->g->strip[ss])) { 5470b57cec5SDimitry Andric case OCHAR: 5480b57cec5SDimitry Andric if (sp == stop || *sp++ != (char)OPND(s)) 5490b57cec5SDimitry Andric return(NULL); 5500b57cec5SDimitry Andric break; 5510b57cec5SDimitry Andric case OANY: 5520b57cec5SDimitry Andric if (sp == stop) 5530b57cec5SDimitry Andric return(NULL); 5540b57cec5SDimitry Andric sp++; 5550b57cec5SDimitry Andric break; 5560b57cec5SDimitry Andric case OANYOF: 5570b57cec5SDimitry Andric cs = &m->g->sets[OPND(s)]; 5580b57cec5SDimitry Andric if (sp == stop || !CHIN(cs, *sp++)) 5590b57cec5SDimitry Andric return(NULL); 5600b57cec5SDimitry Andric break; 5610b57cec5SDimitry Andric case OBOL: 5620b57cec5SDimitry Andric if ( (sp == m->beginp && !(m->eflags®_NOTBOL)) || 5630b57cec5SDimitry Andric (sp < m->endp && *(sp-1) == '\n' && 5640b57cec5SDimitry Andric (m->g->cflags®_NEWLINE)) ) 5650b57cec5SDimitry Andric { /* yes */ } 5660b57cec5SDimitry Andric else 5670b57cec5SDimitry Andric return(NULL); 5680b57cec5SDimitry Andric break; 5690b57cec5SDimitry Andric case OEOL: 5700b57cec5SDimitry Andric if ( (sp == m->endp && !(m->eflags®_NOTEOL)) || 5710b57cec5SDimitry Andric (sp < m->endp && *sp == '\n' && 5720b57cec5SDimitry Andric (m->g->cflags®_NEWLINE)) ) 5730b57cec5SDimitry Andric { /* yes */ } 5740b57cec5SDimitry Andric else 5750b57cec5SDimitry Andric return(NULL); 5760b57cec5SDimitry Andric break; 5770b57cec5SDimitry Andric case OBOW: 5780b57cec5SDimitry Andric if (( (sp == m->beginp && !(m->eflags®_NOTBOL)) || 5790b57cec5SDimitry Andric (sp < m->endp && *(sp-1) == '\n' && 5800b57cec5SDimitry Andric (m->g->cflags®_NEWLINE)) || 5810b57cec5SDimitry Andric (sp > m->beginp && 5820b57cec5SDimitry Andric !ISWORD(*(sp-1))) ) && 5830b57cec5SDimitry Andric (sp < m->endp && ISWORD(*sp)) ) 5840b57cec5SDimitry Andric { /* yes */ } 5850b57cec5SDimitry Andric else 5860b57cec5SDimitry Andric return(NULL); 5870b57cec5SDimitry Andric break; 5880b57cec5SDimitry Andric case OEOW: 5890b57cec5SDimitry Andric if (( (sp == m->endp && !(m->eflags®_NOTEOL)) || 5900b57cec5SDimitry Andric (sp < m->endp && *sp == '\n' && 5910b57cec5SDimitry Andric (m->g->cflags®_NEWLINE)) || 5920b57cec5SDimitry Andric (sp < m->endp && !ISWORD(*sp)) ) && 5930b57cec5SDimitry Andric (sp > m->beginp && ISWORD(*(sp-1))) ) 5940b57cec5SDimitry Andric { /* yes */ } 5950b57cec5SDimitry Andric else 5960b57cec5SDimitry Andric return(NULL); 5970b57cec5SDimitry Andric break; 5980b57cec5SDimitry Andric case O_QUEST: 599*bdd1243dSDimitry Andric case O_CH: 6000b57cec5SDimitry Andric break; 6010b57cec5SDimitry Andric case OOR1: /* matches null but needs to skip */ 6020b57cec5SDimitry Andric ss++; 6030b57cec5SDimitry Andric s = m->g->strip[ss]; 6040b57cec5SDimitry Andric do { 6050b57cec5SDimitry Andric assert(OP(s) == OOR2); 6060b57cec5SDimitry Andric ss += OPND(s); 6070b57cec5SDimitry Andric } while (OP(s = m->g->strip[ss]) != O_CH); 6080b57cec5SDimitry Andric /* note that the ss++ gets us past the O_CH */ 6090b57cec5SDimitry Andric break; 6100b57cec5SDimitry Andric default: /* have to make a choice */ 6110b57cec5SDimitry Andric hard = 1; 6120b57cec5SDimitry Andric break; 6130b57cec5SDimitry Andric } 6140b57cec5SDimitry Andric if (!hard) { /* that was it! */ 6150b57cec5SDimitry Andric if (sp != stop) 6160b57cec5SDimitry Andric return(NULL); 6170b57cec5SDimitry Andric return(sp); 6180b57cec5SDimitry Andric } 6190b57cec5SDimitry Andric ss--; /* adjust for the for's final increment */ 6200b57cec5SDimitry Andric 6210b57cec5SDimitry Andric /* the hard stuff */ 6220b57cec5SDimitry Andric AT("hard", sp, stop, ss, stopst); 6230b57cec5SDimitry Andric s = m->g->strip[ss]; 6240b57cec5SDimitry Andric switch (OP(s)) { 6250b57cec5SDimitry Andric case OBACK_: /* the vilest depths */ 6260b57cec5SDimitry Andric i = OPND(s); 6270b57cec5SDimitry Andric assert(0 < i && i <= m->g->nsub); 6280b57cec5SDimitry Andric if (m->pmatch[i].rm_eo == -1) 6290b57cec5SDimitry Andric return(NULL); 6300b57cec5SDimitry Andric assert(m->pmatch[i].rm_so != -1); 6310b57cec5SDimitry Andric len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; 6320b57cec5SDimitry Andric if (len == 0 && rec++ > MAX_RECURSION) 6330b57cec5SDimitry Andric return(NULL); 6340b57cec5SDimitry Andric assert(stop - m->beginp >= len); 6350b57cec5SDimitry Andric if (sp > stop - len) 6360b57cec5SDimitry Andric return(NULL); /* not enough left to match */ 6370b57cec5SDimitry Andric ssp = m->offp + m->pmatch[i].rm_so; 6380b57cec5SDimitry Andric if (memcmp(sp, ssp, len) != 0) 6390b57cec5SDimitry Andric return(NULL); 6400b57cec5SDimitry Andric while (m->g->strip[ss] != SOP(O_BACK, i)) 6410b57cec5SDimitry Andric ss++; 6420b57cec5SDimitry Andric return(backref(m, sp+len, stop, ss+1, stopst, lev, rec)); 6430b57cec5SDimitry Andric break; 6440b57cec5SDimitry Andric case OQUEST_: /* to null or not */ 6450b57cec5SDimitry Andric dp = backref(m, sp, stop, ss+1, stopst, lev, rec); 6460b57cec5SDimitry Andric if (dp != NULL) 6470b57cec5SDimitry Andric return(dp); /* not */ 6480b57cec5SDimitry Andric return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec)); 6490b57cec5SDimitry Andric break; 6500b57cec5SDimitry Andric case OPLUS_: 6510b57cec5SDimitry Andric assert(m->lastpos != NULL); 6520b57cec5SDimitry Andric assert(lev+1 <= m->g->nplus); 6530b57cec5SDimitry Andric m->lastpos[lev+1] = sp; 6540b57cec5SDimitry Andric return(backref(m, sp, stop, ss+1, stopst, lev+1, rec)); 6550b57cec5SDimitry Andric break; 6560b57cec5SDimitry Andric case O_PLUS: 6570b57cec5SDimitry Andric if (sp == m->lastpos[lev]) /* last pass matched null */ 6580b57cec5SDimitry Andric return(backref(m, sp, stop, ss+1, stopst, lev-1, rec)); 6590b57cec5SDimitry Andric /* try another pass */ 6600b57cec5SDimitry Andric m->lastpos[lev] = sp; 6610b57cec5SDimitry Andric dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev, rec); 6620b57cec5SDimitry Andric if (dp == NULL) 6630b57cec5SDimitry Andric return(backref(m, sp, stop, ss+1, stopst, lev-1, rec)); 6640b57cec5SDimitry Andric else 6650b57cec5SDimitry Andric return(dp); 6660b57cec5SDimitry Andric break; 6670b57cec5SDimitry Andric case OCH_: /* find the right one, if any */ 6680b57cec5SDimitry Andric ssub = ss + 1; 6690b57cec5SDimitry Andric esub = ss + OPND(s) - 1; 6700b57cec5SDimitry Andric assert(OP(m->g->strip[esub]) == OOR1); 6710b57cec5SDimitry Andric for (;;) { /* find first matching branch */ 672*bdd1243dSDimitry Andric dp = backref(m, sp, stop, ssub, stopst, lev, rec); 6730b57cec5SDimitry Andric if (dp != NULL) 6740b57cec5SDimitry Andric return(dp); 6750b57cec5SDimitry Andric /* that one missed, try next one */ 6760b57cec5SDimitry Andric if (OP(m->g->strip[esub]) == O_CH) 6770b57cec5SDimitry Andric return(NULL); /* there is none */ 6780b57cec5SDimitry Andric esub++; 6790b57cec5SDimitry Andric assert(OP(m->g->strip[esub]) == OOR2); 6800b57cec5SDimitry Andric ssub = esub + 1; 6810b57cec5SDimitry Andric esub += OPND(m->g->strip[esub]); 6820b57cec5SDimitry Andric if (OP(m->g->strip[esub]) == OOR2) 6830b57cec5SDimitry Andric esub--; 6840b57cec5SDimitry Andric else 6850b57cec5SDimitry Andric assert(OP(m->g->strip[esub]) == O_CH); 6860b57cec5SDimitry Andric } 6870b57cec5SDimitry Andric break; 6880b57cec5SDimitry Andric case OLPAREN: /* must undo assignment if rest fails */ 6890b57cec5SDimitry Andric i = OPND(s); 6900b57cec5SDimitry Andric assert(0 < i && i <= m->g->nsub); 6910b57cec5SDimitry Andric offsave = m->pmatch[i].rm_so; 6920b57cec5SDimitry Andric m->pmatch[i].rm_so = sp - m->offp; 6930b57cec5SDimitry Andric dp = backref(m, sp, stop, ss+1, stopst, lev, rec); 6940b57cec5SDimitry Andric if (dp != NULL) 6950b57cec5SDimitry Andric return(dp); 6960b57cec5SDimitry Andric m->pmatch[i].rm_so = offsave; 6970b57cec5SDimitry Andric return(NULL); 6980b57cec5SDimitry Andric break; 6990b57cec5SDimitry Andric case ORPAREN: /* must undo assignment if rest fails */ 7000b57cec5SDimitry Andric i = OPND(s); 7010b57cec5SDimitry Andric assert(0 < i && i <= m->g->nsub); 7020b57cec5SDimitry Andric offsave = m->pmatch[i].rm_eo; 7030b57cec5SDimitry Andric m->pmatch[i].rm_eo = sp - m->offp; 7040b57cec5SDimitry Andric dp = backref(m, sp, stop, ss+1, stopst, lev, rec); 7050b57cec5SDimitry Andric if (dp != NULL) 7060b57cec5SDimitry Andric return(dp); 7070b57cec5SDimitry Andric m->pmatch[i].rm_eo = offsave; 7080b57cec5SDimitry Andric return(NULL); 7090b57cec5SDimitry Andric break; 7100b57cec5SDimitry Andric default: /* uh oh */ 7110b57cec5SDimitry Andric assert(nope); 7120b57cec5SDimitry Andric break; 7130b57cec5SDimitry Andric } 7140b57cec5SDimitry Andric 7150b57cec5SDimitry Andric /* "can't happen" */ 7160b57cec5SDimitry Andric assert(nope); 7170b57cec5SDimitry Andric /* NOTREACHED */ 7180b57cec5SDimitry Andric return NULL; 7190b57cec5SDimitry Andric} 7200b57cec5SDimitry Andric 7210b57cec5SDimitry Andric/* 7220b57cec5SDimitry Andric - fast - step through the string at top speed 7230b57cec5SDimitry Andric */ 7240b57cec5SDimitry Andricstatic const char * /* where tentative match ended, or NULL */ 7250b57cec5SDimitry Andricfast(struct match *m, const char *start, const char *stop, sopno startst, 7260b57cec5SDimitry Andric sopno stopst) 7270b57cec5SDimitry Andric{ 7280b57cec5SDimitry Andric states st = m->st; 7290b57cec5SDimitry Andric states fresh = m->fresh; 7300b57cec5SDimitry Andric states tmp = m->tmp; 7310b57cec5SDimitry Andric const char *p = start; 7320b57cec5SDimitry Andric int c = (start == m->beginp) ? OUT : *(start-1); 7330b57cec5SDimitry Andric int lastc; /* previous c */ 7340b57cec5SDimitry Andric int flagch; 7350b57cec5SDimitry Andric int i; 7360b57cec5SDimitry Andric const char *coldp; /* last p after which no match was underway */ 7370b57cec5SDimitry Andric 7380b57cec5SDimitry Andric CLEAR(st); 7390b57cec5SDimitry Andric SET1(st, startst); 7400b57cec5SDimitry Andric st = step(m->g, startst, stopst, st, NOTHING, st); 7410b57cec5SDimitry Andric ASSIGN(fresh, st); 7420b57cec5SDimitry Andric SP("start", st, *p); 7430b57cec5SDimitry Andric coldp = NULL; 7440b57cec5SDimitry Andric for (;;) { 7450b57cec5SDimitry Andric /* next character */ 7460b57cec5SDimitry Andric lastc = c; 7470b57cec5SDimitry Andric c = (p == m->endp) ? OUT : *p; 7480b57cec5SDimitry Andric if (EQ(st, fresh)) 7490b57cec5SDimitry Andric coldp = p; 7500b57cec5SDimitry Andric 7510b57cec5SDimitry Andric /* is there an EOL and/or BOL between lastc and c? */ 7520b57cec5SDimitry Andric flagch = '\0'; 7530b57cec5SDimitry Andric i = 0; 7540b57cec5SDimitry Andric if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || 7550b57cec5SDimitry Andric (lastc == OUT && !(m->eflags®_NOTBOL)) ) { 7560b57cec5SDimitry Andric flagch = BOL; 7570b57cec5SDimitry Andric i = m->g->nbol; 7580b57cec5SDimitry Andric } 7590b57cec5SDimitry Andric if ( (c == '\n' && m->g->cflags®_NEWLINE) || 7600b57cec5SDimitry Andric (c == OUT && !(m->eflags®_NOTEOL)) ) { 7610b57cec5SDimitry Andric flagch = (flagch == BOL) ? BOLEOL : EOL; 7620b57cec5SDimitry Andric i += m->g->neol; 7630b57cec5SDimitry Andric } 7640b57cec5SDimitry Andric if (i != 0) { 7650b57cec5SDimitry Andric for (; i > 0; i--) 7660b57cec5SDimitry Andric st = step(m->g, startst, stopst, st, flagch, st); 7670b57cec5SDimitry Andric SP("boleol", st, c); 7680b57cec5SDimitry Andric } 7690b57cec5SDimitry Andric 7700b57cec5SDimitry Andric /* how about a word boundary? */ 7710b57cec5SDimitry Andric if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && 7720b57cec5SDimitry Andric (c != OUT && ISWORD(c)) ) { 7730b57cec5SDimitry Andric flagch = BOW; 7740b57cec5SDimitry Andric } 7750b57cec5SDimitry Andric if ( (lastc != OUT && ISWORD(lastc)) && 7760b57cec5SDimitry Andric (flagch == EOL || (c != OUT && !ISWORD(c))) ) { 7770b57cec5SDimitry Andric flagch = EOW; 7780b57cec5SDimitry Andric } 7790b57cec5SDimitry Andric if (flagch == BOW || flagch == EOW) { 7800b57cec5SDimitry Andric st = step(m->g, startst, stopst, st, flagch, st); 7810b57cec5SDimitry Andric SP("boweow", st, c); 7820b57cec5SDimitry Andric } 7830b57cec5SDimitry Andric 7840b57cec5SDimitry Andric /* are we done? */ 7850b57cec5SDimitry Andric if (ISSET(st, stopst) || p == stop) 7860b57cec5SDimitry Andric break; /* NOTE BREAK OUT */ 7870b57cec5SDimitry Andric 7880b57cec5SDimitry Andric /* no, we must deal with this character */ 7890b57cec5SDimitry Andric ASSIGN(tmp, st); 7900b57cec5SDimitry Andric ASSIGN(st, fresh); 7910b57cec5SDimitry Andric assert(c != OUT); 7920b57cec5SDimitry Andric st = step(m->g, startst, stopst, tmp, c, st); 7930b57cec5SDimitry Andric SP("aft", st, c); 7940b57cec5SDimitry Andric assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); 7950b57cec5SDimitry Andric p++; 7960b57cec5SDimitry Andric } 7970b57cec5SDimitry Andric 7980b57cec5SDimitry Andric assert(coldp != NULL); 7990b57cec5SDimitry Andric m->coldp = coldp; 8000b57cec5SDimitry Andric if (ISSET(st, stopst)) 8010b57cec5SDimitry Andric return(p+1); 8020b57cec5SDimitry Andric else 8030b57cec5SDimitry Andric return(NULL); 8040b57cec5SDimitry Andric} 8050b57cec5SDimitry Andric 8060b57cec5SDimitry Andric/* 8070b57cec5SDimitry Andric - slow - step through the string more deliberately 8080b57cec5SDimitry Andric */ 8090b57cec5SDimitry Andricstatic const char * /* where it ended */ 8100b57cec5SDimitry Andricslow(struct match *m, const char *start, const char *stop, sopno startst, 8110b57cec5SDimitry Andric sopno stopst) 8120b57cec5SDimitry Andric{ 813*bdd1243dSDimitry Andric /* Quickly skip over fixed character matches at the start. */ 814*bdd1243dSDimitry Andric const char *p = start; 815*bdd1243dSDimitry Andric for (; startst < stopst; ++startst) { 816*bdd1243dSDimitry Andric int hard = 0; 817*bdd1243dSDimitry Andric sop s = m->g->strip[startst]; 818*bdd1243dSDimitry Andric switch (OP(s)) { 819*bdd1243dSDimitry Andric case OLPAREN: 820*bdd1243dSDimitry Andric case ORPAREN: 821*bdd1243dSDimitry Andric /* Not relevant here. */ 822*bdd1243dSDimitry Andric break; 823*bdd1243dSDimitry Andric case OCHAR: 824*bdd1243dSDimitry Andric if (p == stop || *p != (char)OPND(s)) 825*bdd1243dSDimitry Andric return NULL; 826*bdd1243dSDimitry Andric ++p; 827*bdd1243dSDimitry Andric break; 828*bdd1243dSDimitry Andric default: 829*bdd1243dSDimitry Andric hard = 1; 830*bdd1243dSDimitry Andric break; 831*bdd1243dSDimitry Andric } 832*bdd1243dSDimitry Andric if (hard) 833*bdd1243dSDimitry Andric break; 834*bdd1243dSDimitry Andric } 835*bdd1243dSDimitry Andric 8360b57cec5SDimitry Andric states st = m->st; 8370b57cec5SDimitry Andric states empty = m->empty; 8380b57cec5SDimitry Andric states tmp = m->tmp; 839*bdd1243dSDimitry Andric int c = (p == m->beginp) ? OUT : *(p-1); 8400b57cec5SDimitry Andric int lastc; /* previous c */ 8410b57cec5SDimitry Andric int flagch; 8420b57cec5SDimitry Andric int i; 8430b57cec5SDimitry Andric const char *matchp; /* last p at which a match ended */ 8440b57cec5SDimitry Andric 8450b57cec5SDimitry Andric AT("slow", start, stop, startst, stopst); 8460b57cec5SDimitry Andric CLEAR(st); 8470b57cec5SDimitry Andric SET1(st, startst); 8480b57cec5SDimitry Andric SP("sstart", st, *p); 8490b57cec5SDimitry Andric st = step(m->g, startst, stopst, st, NOTHING, st); 8500b57cec5SDimitry Andric matchp = NULL; 8510b57cec5SDimitry Andric for (;;) { 8520b57cec5SDimitry Andric /* next character */ 8530b57cec5SDimitry Andric lastc = c; 8540b57cec5SDimitry Andric c = (p == m->endp) ? OUT : *p; 8550b57cec5SDimitry Andric 8560b57cec5SDimitry Andric /* is there an EOL and/or BOL between lastc and c? */ 8570b57cec5SDimitry Andric flagch = '\0'; 8580b57cec5SDimitry Andric i = 0; 8590b57cec5SDimitry Andric if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || 8600b57cec5SDimitry Andric (lastc == OUT && !(m->eflags®_NOTBOL)) ) { 8610b57cec5SDimitry Andric flagch = BOL; 8620b57cec5SDimitry Andric i = m->g->nbol; 8630b57cec5SDimitry Andric } 8640b57cec5SDimitry Andric if ( (c == '\n' && m->g->cflags®_NEWLINE) || 8650b57cec5SDimitry Andric (c == OUT && !(m->eflags®_NOTEOL)) ) { 8660b57cec5SDimitry Andric flagch = (flagch == BOL) ? BOLEOL : EOL; 8670b57cec5SDimitry Andric i += m->g->neol; 8680b57cec5SDimitry Andric } 8690b57cec5SDimitry Andric if (i != 0) { 8700b57cec5SDimitry Andric for (; i > 0; i--) 8710b57cec5SDimitry Andric st = step(m->g, startst, stopst, st, flagch, st); 8720b57cec5SDimitry Andric SP("sboleol", st, c); 8730b57cec5SDimitry Andric } 8740b57cec5SDimitry Andric 8750b57cec5SDimitry Andric /* how about a word boundary? */ 8760b57cec5SDimitry Andric if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && 8770b57cec5SDimitry Andric (c != OUT && ISWORD(c)) ) { 8780b57cec5SDimitry Andric flagch = BOW; 8790b57cec5SDimitry Andric } 8800b57cec5SDimitry Andric if ( (lastc != OUT && ISWORD(lastc)) && 8810b57cec5SDimitry Andric (flagch == EOL || (c != OUT && !ISWORD(c))) ) { 8820b57cec5SDimitry Andric flagch = EOW; 8830b57cec5SDimitry Andric } 8840b57cec5SDimitry Andric if (flagch == BOW || flagch == EOW) { 8850b57cec5SDimitry Andric st = step(m->g, startst, stopst, st, flagch, st); 8860b57cec5SDimitry Andric SP("sboweow", st, c); 8870b57cec5SDimitry Andric } 8880b57cec5SDimitry Andric 8890b57cec5SDimitry Andric /* are we done? */ 8900b57cec5SDimitry Andric if (ISSET(st, stopst)) 8910b57cec5SDimitry Andric matchp = p; 8920b57cec5SDimitry Andric if (EQ(st, empty) || p == stop) 8930b57cec5SDimitry Andric break; /* NOTE BREAK OUT */ 8940b57cec5SDimitry Andric 8950b57cec5SDimitry Andric /* no, we must deal with this character */ 8960b57cec5SDimitry Andric ASSIGN(tmp, st); 8970b57cec5SDimitry Andric ASSIGN(st, empty); 8980b57cec5SDimitry Andric assert(c != OUT); 8990b57cec5SDimitry Andric st = step(m->g, startst, stopst, tmp, c, st); 9000b57cec5SDimitry Andric SP("saft", st, c); 9010b57cec5SDimitry Andric assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); 9020b57cec5SDimitry Andric p++; 9030b57cec5SDimitry Andric } 9040b57cec5SDimitry Andric 9050b57cec5SDimitry Andric return(matchp); 9060b57cec5SDimitry Andric} 9070b57cec5SDimitry Andric 9080b57cec5SDimitry Andric 9090b57cec5SDimitry Andric/* 9100b57cec5SDimitry Andric - step - map set of states reachable before char to set reachable after 9110b57cec5SDimitry Andric */ 9120b57cec5SDimitry Andricstatic states 9130b57cec5SDimitry Andricstep(struct re_guts *g, 9140b57cec5SDimitry Andric sopno start, /* start state within strip */ 9150b57cec5SDimitry Andric sopno stop, /* state after stop state within strip */ 9160b57cec5SDimitry Andric states bef, /* states reachable before */ 9170b57cec5SDimitry Andric int ch, /* character or NONCHAR code */ 9180b57cec5SDimitry Andric states aft) /* states already known reachable after */ 9190b57cec5SDimitry Andric{ 9200b57cec5SDimitry Andric cset *cs; 9210b57cec5SDimitry Andric sop s; 9220b57cec5SDimitry Andric sopno pc; 9230b57cec5SDimitry Andric onestate here; /* note, macros know this name */ 9240b57cec5SDimitry Andric sopno look; 9250b57cec5SDimitry Andric int i; 9260b57cec5SDimitry Andric 9270b57cec5SDimitry Andric for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { 9280b57cec5SDimitry Andric s = g->strip[pc]; 9290b57cec5SDimitry Andric switch (OP(s)) { 9300b57cec5SDimitry Andric case OEND: 9310b57cec5SDimitry Andric assert(pc == stop-1); 9320b57cec5SDimitry Andric break; 9330b57cec5SDimitry Andric case OCHAR: 9340b57cec5SDimitry Andric /* only characters can match */ 9350b57cec5SDimitry Andric assert(!NONCHAR(ch) || ch != (char)OPND(s)); 9360b57cec5SDimitry Andric if (ch == (char)OPND(s)) 9370b57cec5SDimitry Andric FWD(aft, bef, 1); 9380b57cec5SDimitry Andric break; 9390b57cec5SDimitry Andric case OBOL: 9400b57cec5SDimitry Andric if (ch == BOL || ch == BOLEOL) 9410b57cec5SDimitry Andric FWD(aft, bef, 1); 9420b57cec5SDimitry Andric break; 9430b57cec5SDimitry Andric case OEOL: 9440b57cec5SDimitry Andric if (ch == EOL || ch == BOLEOL) 9450b57cec5SDimitry Andric FWD(aft, bef, 1); 9460b57cec5SDimitry Andric break; 9470b57cec5SDimitry Andric case OBOW: 9480b57cec5SDimitry Andric if (ch == BOW) 9490b57cec5SDimitry Andric FWD(aft, bef, 1); 9500b57cec5SDimitry Andric break; 9510b57cec5SDimitry Andric case OEOW: 9520b57cec5SDimitry Andric if (ch == EOW) 9530b57cec5SDimitry Andric FWD(aft, bef, 1); 9540b57cec5SDimitry Andric break; 9550b57cec5SDimitry Andric case OANY: 9560b57cec5SDimitry Andric if (!NONCHAR(ch)) 9570b57cec5SDimitry Andric FWD(aft, bef, 1); 9580b57cec5SDimitry Andric break; 9590b57cec5SDimitry Andric case OANYOF: 9600b57cec5SDimitry Andric cs = &g->sets[OPND(s)]; 9610b57cec5SDimitry Andric if (!NONCHAR(ch) && CHIN(cs, ch)) 9620b57cec5SDimitry Andric FWD(aft, bef, 1); 9630b57cec5SDimitry Andric break; 9640b57cec5SDimitry Andric case OBACK_: /* ignored here */ 9650b57cec5SDimitry Andric case O_BACK: 9660b57cec5SDimitry Andric FWD(aft, aft, 1); 9670b57cec5SDimitry Andric break; 9680b57cec5SDimitry Andric case OPLUS_: /* forward, this is just an empty */ 9690b57cec5SDimitry Andric FWD(aft, aft, 1); 9700b57cec5SDimitry Andric break; 9710b57cec5SDimitry Andric case O_PLUS: /* both forward and back */ 9720b57cec5SDimitry Andric FWD(aft, aft, 1); 9730b57cec5SDimitry Andric i = ISSETBACK(aft, OPND(s)); 9740b57cec5SDimitry Andric BACK(aft, aft, OPND(s)); 9750b57cec5SDimitry Andric if (!i && ISSETBACK(aft, OPND(s))) { 9760b57cec5SDimitry Andric /* oho, must reconsider loop body */ 9770b57cec5SDimitry Andric pc -= OPND(s) + 1; 9780b57cec5SDimitry Andric INIT(here, pc); 9790b57cec5SDimitry Andric } 9800b57cec5SDimitry Andric break; 9810b57cec5SDimitry Andric case OQUEST_: /* two branches, both forward */ 9820b57cec5SDimitry Andric FWD(aft, aft, 1); 9830b57cec5SDimitry Andric FWD(aft, aft, OPND(s)); 9840b57cec5SDimitry Andric break; 9850b57cec5SDimitry Andric case O_QUEST: /* just an empty */ 9860b57cec5SDimitry Andric FWD(aft, aft, 1); 9870b57cec5SDimitry Andric break; 9880b57cec5SDimitry Andric case OLPAREN: /* not significant here */ 9890b57cec5SDimitry Andric case ORPAREN: 9900b57cec5SDimitry Andric FWD(aft, aft, 1); 9910b57cec5SDimitry Andric break; 9920b57cec5SDimitry Andric case OCH_: /* mark the first two branches */ 9930b57cec5SDimitry Andric FWD(aft, aft, 1); 9940b57cec5SDimitry Andric assert(OP(g->strip[pc+OPND(s)]) == OOR2); 9950b57cec5SDimitry Andric FWD(aft, aft, OPND(s)); 9960b57cec5SDimitry Andric break; 9970b57cec5SDimitry Andric case OOR1: /* done a branch, find the O_CH */ 9980b57cec5SDimitry Andric if (ISSTATEIN(aft, here)) { 9990b57cec5SDimitry Andric for (look = 1; 10000b57cec5SDimitry Andric OP(s = g->strip[pc+look]) != O_CH; 10010b57cec5SDimitry Andric look += OPND(s)) 10020b57cec5SDimitry Andric assert(OP(s) == OOR2); 10030b57cec5SDimitry Andric FWD(aft, aft, look); 10040b57cec5SDimitry Andric } 10050b57cec5SDimitry Andric break; 10060b57cec5SDimitry Andric case OOR2: /* propagate OCH_'s marking */ 10070b57cec5SDimitry Andric FWD(aft, aft, 1); 10080b57cec5SDimitry Andric if (OP(g->strip[pc+OPND(s)]) != O_CH) { 10090b57cec5SDimitry Andric assert(OP(g->strip[pc+OPND(s)]) == OOR2); 10100b57cec5SDimitry Andric FWD(aft, aft, OPND(s)); 10110b57cec5SDimitry Andric } 10120b57cec5SDimitry Andric break; 10130b57cec5SDimitry Andric case O_CH: /* just empty */ 10140b57cec5SDimitry Andric FWD(aft, aft, 1); 10150b57cec5SDimitry Andric break; 10160b57cec5SDimitry Andric default: /* ooooops... */ 10170b57cec5SDimitry Andric assert(nope); 10180b57cec5SDimitry Andric break; 10190b57cec5SDimitry Andric } 10200b57cec5SDimitry Andric } 10210b57cec5SDimitry Andric 10220b57cec5SDimitry Andric return(aft); 10230b57cec5SDimitry Andric} 10240b57cec5SDimitry Andric 10250b57cec5SDimitry Andric#ifdef REDEBUG 10260b57cec5SDimitry Andric/* 10270b57cec5SDimitry Andric - print - print a set of states 10280b57cec5SDimitry Andric */ 10290b57cec5SDimitry Andricstatic void 1030*bdd1243dSDimitry Andricprint(struct match *m, const char *caption, states st, int ch, FILE *d) 10310b57cec5SDimitry Andric{ 10320b57cec5SDimitry Andric struct re_guts *g = m->g; 10330b57cec5SDimitry Andric int i; 10340b57cec5SDimitry Andric int first = 1; 10350b57cec5SDimitry Andric 10360b57cec5SDimitry Andric if (!(m->eflags®_TRACE)) 10370b57cec5SDimitry Andric return; 10380b57cec5SDimitry Andric 10390b57cec5SDimitry Andric (void)fprintf(d, "%s", caption); 10400b57cec5SDimitry Andric if (ch != '\0') 10410b57cec5SDimitry Andric (void)fprintf(d, " %s", pchar(ch)); 10420b57cec5SDimitry Andric for (i = 0; i < g->nstates; i++) 10430b57cec5SDimitry Andric if (ISSET(st, i)) { 10440b57cec5SDimitry Andric (void)fprintf(d, "%s%d", (first) ? "\t" : ", ", i); 10450b57cec5SDimitry Andric first = 0; 10460b57cec5SDimitry Andric } 10470b57cec5SDimitry Andric (void)fprintf(d, "\n"); 10480b57cec5SDimitry Andric} 10490b57cec5SDimitry Andric 10500b57cec5SDimitry Andric/* 10510b57cec5SDimitry Andric - at - print current situation 10520b57cec5SDimitry Andric */ 10530b57cec5SDimitry Andricstatic void 1054*bdd1243dSDimitry Andricat(struct match *m, const char *title, const char *start, const char *stop, 1055*bdd1243dSDimitry Andric sopno startst, sopno stopst) 10560b57cec5SDimitry Andric{ 10570b57cec5SDimitry Andric if (!(m->eflags®_TRACE)) 10580b57cec5SDimitry Andric return; 10590b57cec5SDimitry Andric 10600b57cec5SDimitry Andric (void)printf("%s %s-", title, pchar(*start)); 10610b57cec5SDimitry Andric (void)printf("%s ", pchar(*stop)); 10620b57cec5SDimitry Andric (void)printf("%ld-%ld\n", (long)startst, (long)stopst); 10630b57cec5SDimitry Andric} 10640b57cec5SDimitry Andric 10650b57cec5SDimitry Andric#ifndef PCHARDONE 10660b57cec5SDimitry Andric#define PCHARDONE /* never again */ 10670b57cec5SDimitry Andric/* 10680b57cec5SDimitry Andric - pchar - make a character printable 10690b57cec5SDimitry Andric * 10700b57cec5SDimitry Andric * Is this identical to regchar() over in debug.c? Well, yes. But a 10710b57cec5SDimitry Andric * duplicate here avoids having a debugging-capable regexec.o tied to 10720b57cec5SDimitry Andric * a matching debug.o, and this is convenient. It all disappears in 10730b57cec5SDimitry Andric * the non-debug compilation anyway, so it doesn't matter much. 10740b57cec5SDimitry Andric */ 10750b57cec5SDimitry Andricstatic char * /* -> representation */ 10760b57cec5SDimitry Andricpchar(int ch) 10770b57cec5SDimitry Andric{ 10780b57cec5SDimitry Andric static char pbuf[10]; 10790b57cec5SDimitry Andric 1080*bdd1243dSDimitry Andric if (isprint(ch) || ch == ' ') 10810b57cec5SDimitry Andric (void)snprintf(pbuf, sizeof pbuf, "%c", ch); 10820b57cec5SDimitry Andric else 10830b57cec5SDimitry Andric (void)snprintf(pbuf, sizeof pbuf, "\\%o", ch); 10840b57cec5SDimitry Andric return(pbuf); 10850b57cec5SDimitry Andric} 10860b57cec5SDimitry Andric#endif 10870b57cec5SDimitry Andric#endif 10880b57cec5SDimitry Andric 10890b57cec5SDimitry Andric#undef matcher 10900b57cec5SDimitry Andric#undef fast 10910b57cec5SDimitry Andric#undef slow 10920b57cec5SDimitry Andric#undef dissect 10930b57cec5SDimitry Andric#undef backref 10940b57cec5SDimitry Andric#undef step 10950b57cec5SDimitry Andric#undef print 10960b57cec5SDimitry Andric#undef at 10970b57cec5SDimitry Andric#undef match 10980b57cec5SDimitry Andric#undef nope 109981ad6265SDimitry Andric#undef step_back 1100