14297a3b0SGarrett D'Amore /* 26b5e5868SGarrett D'Amore * Copyright 2010 Nexenta Systems, Inc. All rights reserved. 3*33f5ff17SMilan Jurik * Copyright 2012 Milan Jurik. All rights reserved. 44297a3b0SGarrett D'Amore * Copyright (c) 1992, 1993, 1994 Henry Spencer. 54297a3b0SGarrett D'Amore * Copyright (c) 1992, 1993, 1994 64297a3b0SGarrett D'Amore * The Regents of the University of California. All rights reserved. 74297a3b0SGarrett D'Amore * 84297a3b0SGarrett D'Amore * This code is derived from software contributed to Berkeley by 94297a3b0SGarrett D'Amore * Henry Spencer. 104297a3b0SGarrett D'Amore * 114297a3b0SGarrett D'Amore * Redistribution and use in source and binary forms, with or without 124297a3b0SGarrett D'Amore * modification, are permitted provided that the following conditions 134297a3b0SGarrett D'Amore * are met: 144297a3b0SGarrett D'Amore * 1. Redistributions of source code must retain the above copyright 154297a3b0SGarrett D'Amore * notice, this list of conditions and the following disclaimer. 164297a3b0SGarrett D'Amore * 2. Redistributions in binary form must reproduce the above copyright 174297a3b0SGarrett D'Amore * notice, this list of conditions and the following disclaimer in the 184297a3b0SGarrett D'Amore * documentation and/or other materials provided with the distribution. 194297a3b0SGarrett D'Amore * 4. Neither the name of the University nor the names of its contributors 204297a3b0SGarrett D'Amore * may be used to endorse or promote products derived from this software 214297a3b0SGarrett D'Amore * without specific prior written permission. 224297a3b0SGarrett D'Amore * 234297a3b0SGarrett D'Amore * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 244297a3b0SGarrett D'Amore * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 254297a3b0SGarrett D'Amore * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 264297a3b0SGarrett D'Amore * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 274297a3b0SGarrett D'Amore * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 284297a3b0SGarrett D'Amore * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 294297a3b0SGarrett D'Amore * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 304297a3b0SGarrett D'Amore * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 314297a3b0SGarrett D'Amore * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 324297a3b0SGarrett D'Amore * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 334297a3b0SGarrett D'Amore * SUCH DAMAGE. 344297a3b0SGarrett D'Amore */ 354297a3b0SGarrett D'Amore 364297a3b0SGarrett D'Amore /* 374297a3b0SGarrett D'Amore * The matching engine and friends. This file is #included by regexec.c 384297a3b0SGarrett D'Amore * after suitable #defines of a variety of macros used herein, so that 394297a3b0SGarrett D'Amore * different state representations can be used without duplicating masses 404297a3b0SGarrett D'Amore * of code. 414297a3b0SGarrett D'Amore */ 424297a3b0SGarrett D'Amore 434297a3b0SGarrett D'Amore #ifdef SNAMES 444297a3b0SGarrett D'Amore #define matcher smatcher 454297a3b0SGarrett D'Amore #define fast sfast 464297a3b0SGarrett D'Amore #define slow sslow 474297a3b0SGarrett D'Amore #define dissect sdissect 484297a3b0SGarrett D'Amore #define backref sbackref 494297a3b0SGarrett D'Amore #define step sstep 504297a3b0SGarrett D'Amore #define print sprint 514297a3b0SGarrett D'Amore #define at sat 524297a3b0SGarrett D'Amore #define match smat 534297a3b0SGarrett D'Amore #endif 544297a3b0SGarrett D'Amore #ifdef LNAMES 554297a3b0SGarrett D'Amore #define matcher lmatcher 564297a3b0SGarrett D'Amore #define fast lfast 574297a3b0SGarrett D'Amore #define slow lslow 584297a3b0SGarrett D'Amore #define dissect ldissect 594297a3b0SGarrett D'Amore #define backref lbackref 604297a3b0SGarrett D'Amore #define step lstep 614297a3b0SGarrett D'Amore #define print lprint 624297a3b0SGarrett D'Amore #define at lat 634297a3b0SGarrett D'Amore #define match lmat 644297a3b0SGarrett D'Amore #endif 654297a3b0SGarrett D'Amore #ifdef MNAMES 664297a3b0SGarrett D'Amore #define matcher mmatcher 674297a3b0SGarrett D'Amore #define fast mfast 684297a3b0SGarrett D'Amore #define slow mslow 694297a3b0SGarrett D'Amore #define dissect mdissect 704297a3b0SGarrett D'Amore #define backref mbackref 714297a3b0SGarrett D'Amore #define step mstep 724297a3b0SGarrett D'Amore #define print mprint 734297a3b0SGarrett D'Amore #define at mat 744297a3b0SGarrett D'Amore #define match mmat 754297a3b0SGarrett D'Amore #endif 764297a3b0SGarrett D'Amore 774297a3b0SGarrett D'Amore /* another structure passed up and down to avoid zillions of parameters */ 784297a3b0SGarrett D'Amore struct match { 794297a3b0SGarrett D'Amore struct re_guts *g; 804297a3b0SGarrett D'Amore int eflags; 814297a3b0SGarrett D'Amore regmatch_t *pmatch; /* [nsub+1] (0 element unused) */ 824297a3b0SGarrett D'Amore const char *offp; /* offsets work from here */ 834297a3b0SGarrett D'Amore const char *beginp; /* start of string -- virtual NUL precedes */ 844297a3b0SGarrett D'Amore const char *endp; /* end of string -- virtual NUL here */ 854297a3b0SGarrett D'Amore const char *coldp; /* can be no match starting before here */ 864297a3b0SGarrett D'Amore const char **lastpos; /* [nplus+1] */ 874297a3b0SGarrett D'Amore STATEVARS; 884297a3b0SGarrett D'Amore states st; /* current states */ 894297a3b0SGarrett D'Amore states fresh; /* states for a fresh start */ 904297a3b0SGarrett D'Amore states tmp; /* temporary */ 914297a3b0SGarrett D'Amore states empty; /* empty set of states */ 924297a3b0SGarrett D'Amore mbstate_t mbs; /* multibyte conversion state */ 934297a3b0SGarrett D'Amore }; 944297a3b0SGarrett D'Amore 954297a3b0SGarrett D'Amore /* ========= begin header generated by ./mkh ========= */ 964297a3b0SGarrett D'Amore #ifdef __cplusplus 974297a3b0SGarrett D'Amore extern "C" { 984297a3b0SGarrett D'Amore #endif 994297a3b0SGarrett D'Amore 1004297a3b0SGarrett D'Amore /* === engine.c === */ 1014297a3b0SGarrett D'Amore static int matcher(struct re_guts *, const char *, size_t, regmatch_t[], int); 1024297a3b0SGarrett D'Amore static const char *dissect(struct match *, const char *, const char *, 1034297a3b0SGarrett D'Amore sopno, sopno); 1044297a3b0SGarrett D'Amore static const char *backref(struct match *, const char *, const char *, sopno, 1054297a3b0SGarrett D'Amore sopno, sopno, int); 1064297a3b0SGarrett D'Amore static const char *fast(struct match *, const char *, const char *, sopno, 1074297a3b0SGarrett D'Amore sopno); 1084297a3b0SGarrett D'Amore static const char *slow(struct match *, const char *, const char *, sopno, 1094297a3b0SGarrett D'Amore sopno); 1104297a3b0SGarrett D'Amore static states step(struct re_guts *, sopno, sopno, states, wint_t, states); 1114297a3b0SGarrett D'Amore #define MAX_RECURSION 100 1124297a3b0SGarrett D'Amore #define BOL (OUT-1) 1134297a3b0SGarrett D'Amore #define EOL (BOL-1) 1144297a3b0SGarrett D'Amore #define BOLEOL (BOL-2) 1154297a3b0SGarrett D'Amore #define NOTHING (BOL-3) 1164297a3b0SGarrett D'Amore #define BOW (BOL-4) 1174297a3b0SGarrett D'Amore #define EOW (BOL-5) 1184297a3b0SGarrett D'Amore #define BADCHAR (BOL-6) 1194297a3b0SGarrett D'Amore #define NONCHAR(c) ((c) <= OUT) 1204297a3b0SGarrett D'Amore #ifdef REDEBUG 1214297a3b0SGarrett D'Amore static void print(struct match *, const char *, states, int, FILE *); 1224297a3b0SGarrett D'Amore #endif 1234297a3b0SGarrett D'Amore #ifdef REDEBUG 1244297a3b0SGarrett D'Amore static void at(struct match *, const char *, const char *, const char *, 1254297a3b0SGarrett D'Amore sopno, sopno); 1264297a3b0SGarrett D'Amore #endif 1274297a3b0SGarrett D'Amore #ifdef REDEBUG 1284297a3b0SGarrett D'Amore static const char *pchar(int ch); 1294297a3b0SGarrett D'Amore #endif 1304297a3b0SGarrett D'Amore 1314297a3b0SGarrett D'Amore #ifdef __cplusplus 1324297a3b0SGarrett D'Amore } 1334297a3b0SGarrett D'Amore #endif 1344297a3b0SGarrett D'Amore /* ========= end header generated by ./mkh ========= */ 1354297a3b0SGarrett D'Amore 1364297a3b0SGarrett D'Amore #ifdef REDEBUG 1374297a3b0SGarrett D'Amore #define SP(t, s, c) print(m, t, s, c, stdout) 1384297a3b0SGarrett D'Amore #define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2) 1394297a3b0SGarrett D'Amore #define NOTE(str) { if (m->eflags®_TRACE) printf("=%s\n", (str)); } 1404297a3b0SGarrett D'Amore #else 1414297a3b0SGarrett D'Amore #define SP(t, s, c) /* nothing */ 1424297a3b0SGarrett D'Amore #define AT(t, p1, p2, s1, s2) /* nothing */ 1434297a3b0SGarrett D'Amore #define NOTE(s) /* nothing */ 1444297a3b0SGarrett D'Amore #endif 1454297a3b0SGarrett D'Amore 1464297a3b0SGarrett D'Amore /* 1474297a3b0SGarrett D'Amore * matcher - the actual matching engine 1484297a3b0SGarrett D'Amore */ 1494297a3b0SGarrett D'Amore static int /* 0 success, REG_NOMATCH failure */ 1504297a3b0SGarrett D'Amore matcher(struct re_guts *g, 1514297a3b0SGarrett D'Amore const char *string, 1524297a3b0SGarrett D'Amore size_t nmatch, 1534297a3b0SGarrett D'Amore regmatch_t pmatch[], 1544297a3b0SGarrett D'Amore int eflags) 1554297a3b0SGarrett D'Amore { 1564297a3b0SGarrett D'Amore const char *endp; 1574297a3b0SGarrett D'Amore int i; 1584297a3b0SGarrett D'Amore struct match mv; 1594297a3b0SGarrett D'Amore struct match *m = &mv; 1604297a3b0SGarrett D'Amore const char *dp; 1614297a3b0SGarrett D'Amore const sopno gf = g->firststate+1; /* +1 for OEND */ 1624297a3b0SGarrett D'Amore const sopno gl = g->laststate; 1634297a3b0SGarrett D'Amore const char *start; 1644297a3b0SGarrett D'Amore const char *stop; 1654297a3b0SGarrett D'Amore /* Boyer-Moore algorithms variables */ 1664297a3b0SGarrett D'Amore const char *pp; 1674297a3b0SGarrett D'Amore int cj, mj; 1684297a3b0SGarrett D'Amore const char *mustfirst; 1694297a3b0SGarrett D'Amore const char *mustlast; 1704297a3b0SGarrett D'Amore int *matchjump; 1714297a3b0SGarrett D'Amore int *charjump; 1724297a3b0SGarrett D'Amore 1734297a3b0SGarrett D'Amore /* simplify the situation where possible */ 1744297a3b0SGarrett D'Amore if (g->cflags®_NOSUB) 1754297a3b0SGarrett D'Amore nmatch = 0; 17684441f85SGarrett D'Amore 1774297a3b0SGarrett D'Amore if (eflags®_STARTEND) { 1784297a3b0SGarrett D'Amore start = string + pmatch[0].rm_so; 1794297a3b0SGarrett D'Amore stop = string + pmatch[0].rm_eo; 1804297a3b0SGarrett D'Amore } else { 1814297a3b0SGarrett D'Amore start = string; 1824297a3b0SGarrett D'Amore stop = start + strlen(start); 1834297a3b0SGarrett D'Amore } 18484441f85SGarrett D'Amore 1854297a3b0SGarrett D'Amore if (stop < start) 1864297a3b0SGarrett D'Amore return (REG_EFATAL); 1874297a3b0SGarrett D'Amore 1884297a3b0SGarrett D'Amore /* prescreening; this does wonders for this rather slow code */ 1894297a3b0SGarrett D'Amore if (g->must != NULL) { 1904297a3b0SGarrett D'Amore if (g->charjump != NULL && g->matchjump != NULL) { 1914297a3b0SGarrett D'Amore mustfirst = g->must; 1924297a3b0SGarrett D'Amore mustlast = g->must + g->mlen - 1; 1934297a3b0SGarrett D'Amore charjump = g->charjump; 1944297a3b0SGarrett D'Amore matchjump = g->matchjump; 1954297a3b0SGarrett D'Amore pp = mustlast; 1964297a3b0SGarrett D'Amore for (dp = start+g->mlen-1; dp < stop; ) { 1974297a3b0SGarrett D'Amore /* Fast skip non-matches */ 1984297a3b0SGarrett D'Amore while (dp < stop && charjump[(int)*dp]) 1994297a3b0SGarrett D'Amore dp += charjump[(int)*dp]; 2004297a3b0SGarrett D'Amore 2014297a3b0SGarrett D'Amore if (dp >= stop) 2024297a3b0SGarrett D'Amore break; 2034297a3b0SGarrett D'Amore 2044297a3b0SGarrett D'Amore /* Greedy matcher */ 2054297a3b0SGarrett D'Amore /* 2064297a3b0SGarrett D'Amore * We depend on not being used for 2074297a3b0SGarrett D'Amore * for strings of length 1 2084297a3b0SGarrett D'Amore */ 2094297a3b0SGarrett D'Amore while (*--dp == *--pp && pp != mustfirst) 2104297a3b0SGarrett D'Amore ; 2114297a3b0SGarrett D'Amore 2124297a3b0SGarrett D'Amore if (*dp == *pp) 2134297a3b0SGarrett D'Amore break; 2144297a3b0SGarrett D'Amore 2154297a3b0SGarrett D'Amore /* Jump to next possible match */ 2164297a3b0SGarrett D'Amore mj = matchjump[pp - mustfirst]; 2174297a3b0SGarrett D'Amore cj = charjump[(int)*dp]; 2184297a3b0SGarrett D'Amore dp += (cj < mj ? mj : cj); 2194297a3b0SGarrett D'Amore pp = mustlast; 2204297a3b0SGarrett D'Amore } 2214297a3b0SGarrett D'Amore if (pp != mustfirst) 2224297a3b0SGarrett D'Amore return (REG_NOMATCH); 2234297a3b0SGarrett D'Amore } else { 2244297a3b0SGarrett D'Amore for (dp = start; dp < stop; dp++) 2254297a3b0SGarrett D'Amore if (*dp == g->must[0] && 2264297a3b0SGarrett D'Amore stop - dp >= g->mlen && 2274297a3b0SGarrett D'Amore memcmp(dp, g->must, (size_t)g->mlen) == 0) 2284297a3b0SGarrett D'Amore break; 2294297a3b0SGarrett D'Amore if (dp == stop) /* we didn't find g->must */ 2304297a3b0SGarrett D'Amore return (REG_NOMATCH); 2314297a3b0SGarrett D'Amore } 2324297a3b0SGarrett D'Amore } 2334297a3b0SGarrett D'Amore 2344297a3b0SGarrett D'Amore /* match struct setup */ 2354297a3b0SGarrett D'Amore m->g = g; 2364297a3b0SGarrett D'Amore m->eflags = eflags; 2374297a3b0SGarrett D'Amore m->pmatch = NULL; 2384297a3b0SGarrett D'Amore m->lastpos = NULL; 2394297a3b0SGarrett D'Amore m->offp = string; 2404297a3b0SGarrett D'Amore m->beginp = start; 2414297a3b0SGarrett D'Amore m->endp = stop; 2424297a3b0SGarrett D'Amore STATESETUP(m, 4); 2434297a3b0SGarrett D'Amore SETUP(m->st); 2444297a3b0SGarrett D'Amore SETUP(m->fresh); 2454297a3b0SGarrett D'Amore SETUP(m->tmp); 2464297a3b0SGarrett D'Amore SETUP(m->empty); 2474297a3b0SGarrett D'Amore CLEAR(m->empty); 2484297a3b0SGarrett D'Amore ZAPSTATE(&m->mbs); 2494297a3b0SGarrett D'Amore 2504297a3b0SGarrett D'Amore /* Adjust start according to moffset, to speed things up */ 2514297a3b0SGarrett D'Amore if (g->moffset > -1) 2524297a3b0SGarrett D'Amore start = ((dp - g->moffset) < start) ? start : dp - g->moffset; 2534297a3b0SGarrett D'Amore 2544297a3b0SGarrett D'Amore SP("mloop", m->st, *start); 2554297a3b0SGarrett D'Amore 2564297a3b0SGarrett D'Amore /* this loop does only one repetition except for backrefs */ 2574297a3b0SGarrett D'Amore for (;;) { 2584297a3b0SGarrett D'Amore endp = fast(m, start, stop, gf, gl); 2594297a3b0SGarrett D'Amore if (endp == NULL) { /* a miss */ 2604297a3b0SGarrett D'Amore if (m->pmatch != NULL) 2614297a3b0SGarrett D'Amore free((char *)m->pmatch); 2624297a3b0SGarrett D'Amore if (m->lastpos != NULL) 2634297a3b0SGarrett D'Amore free((char *)m->lastpos); 2644297a3b0SGarrett D'Amore STATETEARDOWN(m); 2654297a3b0SGarrett D'Amore return (REG_NOMATCH); 2664297a3b0SGarrett D'Amore } 2674297a3b0SGarrett D'Amore if (nmatch == 0 && !g->backrefs) 2684297a3b0SGarrett D'Amore break; /* no further info needed */ 2694297a3b0SGarrett D'Amore 2704297a3b0SGarrett D'Amore /* where? */ 2714297a3b0SGarrett D'Amore assert(m->coldp != NULL); 2724297a3b0SGarrett D'Amore for (;;) { 2734297a3b0SGarrett D'Amore NOTE("finding start"); 2744297a3b0SGarrett D'Amore endp = slow(m, m->coldp, stop, gf, gl); 2754297a3b0SGarrett D'Amore if (endp != NULL) 2764297a3b0SGarrett D'Amore break; 2774297a3b0SGarrett D'Amore assert(m->coldp < m->endp); 2784297a3b0SGarrett D'Amore m->coldp += XMBRTOWC(NULL, m->coldp, 2794297a3b0SGarrett D'Amore m->endp - m->coldp, &m->mbs, 0); 2804297a3b0SGarrett D'Amore } 2814297a3b0SGarrett D'Amore if (nmatch == 1 && !g->backrefs) 2824297a3b0SGarrett D'Amore break; /* no further info needed */ 2834297a3b0SGarrett D'Amore 2844297a3b0SGarrett D'Amore /* oh my, he wants the subexpressions... */ 2854297a3b0SGarrett D'Amore if (m->pmatch == NULL) 2864297a3b0SGarrett D'Amore m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) * 2874297a3b0SGarrett D'Amore sizeof (regmatch_t)); 2884297a3b0SGarrett D'Amore if (m->pmatch == NULL) { 2894297a3b0SGarrett D'Amore STATETEARDOWN(m); 2904297a3b0SGarrett D'Amore return (REG_ESPACE); 2914297a3b0SGarrett D'Amore } 2924297a3b0SGarrett D'Amore for (i = 1; i <= m->g->nsub; i++) 2934297a3b0SGarrett D'Amore m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; 2944297a3b0SGarrett D'Amore /* NB: FreeBSD has REG_BACKR, we do not */ 2954297a3b0SGarrett D'Amore if (!g->backrefs /* && !(m->eflags®_BACKR) */) { 2964297a3b0SGarrett D'Amore NOTE("dissecting"); 2974297a3b0SGarrett D'Amore dp = dissect(m, m->coldp, endp, gf, gl); 2984297a3b0SGarrett D'Amore } else { 2994297a3b0SGarrett D'Amore if (g->nplus > 0 && m->lastpos == NULL) 3004297a3b0SGarrett D'Amore m->lastpos = malloc((g->nplus+1) * 3014297a3b0SGarrett D'Amore sizeof (const char *)); 3024297a3b0SGarrett D'Amore if (g->nplus > 0 && m->lastpos == NULL) { 3034297a3b0SGarrett D'Amore free(m->pmatch); 3044297a3b0SGarrett D'Amore STATETEARDOWN(m); 3054297a3b0SGarrett D'Amore return (REG_ESPACE); 3064297a3b0SGarrett D'Amore } 3074297a3b0SGarrett D'Amore NOTE("backref dissect"); 3084297a3b0SGarrett D'Amore dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); 3094297a3b0SGarrett D'Amore } 3104297a3b0SGarrett D'Amore if (dp != NULL) 3114297a3b0SGarrett D'Amore break; 3124297a3b0SGarrett D'Amore 3134297a3b0SGarrett D'Amore /* uh-oh... we couldn't find a subexpression-level match */ 3144297a3b0SGarrett D'Amore assert(g->backrefs); /* must be back references doing it */ 3154297a3b0SGarrett D'Amore assert(g->nplus == 0 || m->lastpos != NULL); 3164297a3b0SGarrett D'Amore for (;;) { 3174297a3b0SGarrett D'Amore if (dp != NULL || endp <= m->coldp) 3184297a3b0SGarrett D'Amore break; /* defeat */ 3194297a3b0SGarrett D'Amore NOTE("backoff"); 3204297a3b0SGarrett D'Amore endp = slow(m, m->coldp, endp-1, gf, gl); 3214297a3b0SGarrett D'Amore if (endp == NULL) 3224297a3b0SGarrett D'Amore break; /* defeat */ 3234297a3b0SGarrett D'Amore /* try it on a shorter possibility */ 3244297a3b0SGarrett D'Amore #ifndef NDEBUG 3254297a3b0SGarrett D'Amore for (i = 1; i <= m->g->nsub; i++) { 3264297a3b0SGarrett D'Amore assert(m->pmatch[i].rm_so == -1); 3274297a3b0SGarrett D'Amore assert(m->pmatch[i].rm_eo == -1); 3284297a3b0SGarrett D'Amore } 3294297a3b0SGarrett D'Amore #endif 3304297a3b0SGarrett D'Amore NOTE("backoff dissect"); 3314297a3b0SGarrett D'Amore dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); 3324297a3b0SGarrett D'Amore } 3334297a3b0SGarrett D'Amore assert(dp == NULL || dp == endp); 3344297a3b0SGarrett D'Amore if (dp != NULL) /* found a shorter one */ 3354297a3b0SGarrett D'Amore break; 3364297a3b0SGarrett D'Amore 3374297a3b0SGarrett D'Amore /* despite initial appearances, there is no match here */ 3384297a3b0SGarrett D'Amore NOTE("false alarm"); 3394297a3b0SGarrett D'Amore /* recycle starting later */ 3404297a3b0SGarrett D'Amore start = m->coldp + XMBRTOWC(NULL, m->coldp, 3414297a3b0SGarrett D'Amore stop - m->coldp, &m->mbs, 0); 3424297a3b0SGarrett D'Amore assert(start <= stop); 3434297a3b0SGarrett D'Amore } 3444297a3b0SGarrett D'Amore 3454297a3b0SGarrett D'Amore /* fill in the details if requested */ 3464297a3b0SGarrett D'Amore if (nmatch > 0) { 3474297a3b0SGarrett D'Amore pmatch[0].rm_so = m->coldp - m->offp; 3484297a3b0SGarrett D'Amore pmatch[0].rm_eo = endp - m->offp; 3494297a3b0SGarrett D'Amore } 3504297a3b0SGarrett D'Amore if (nmatch > 1) { 3514297a3b0SGarrett D'Amore assert(m->pmatch != NULL); 3524297a3b0SGarrett D'Amore for (i = 1; i < nmatch; i++) 3534297a3b0SGarrett D'Amore if (i <= m->g->nsub) 3544297a3b0SGarrett D'Amore pmatch[i] = m->pmatch[i]; 3554297a3b0SGarrett D'Amore else { 3564297a3b0SGarrett D'Amore pmatch[i].rm_so = -1; 3574297a3b0SGarrett D'Amore pmatch[i].rm_eo = -1; 3584297a3b0SGarrett D'Amore } 3594297a3b0SGarrett D'Amore } 3604297a3b0SGarrett D'Amore 3614297a3b0SGarrett D'Amore if (m->pmatch != NULL) 3624297a3b0SGarrett D'Amore free((char *)m->pmatch); 3634297a3b0SGarrett D'Amore if (m->lastpos != NULL) 3644297a3b0SGarrett D'Amore free((char *)m->lastpos); 3654297a3b0SGarrett D'Amore STATETEARDOWN(m); 3664297a3b0SGarrett D'Amore return (0); 3674297a3b0SGarrett D'Amore } 3684297a3b0SGarrett D'Amore 3694297a3b0SGarrett D'Amore /* 3704297a3b0SGarrett D'Amore * dissect - figure out what matched what, no back references 3714297a3b0SGarrett D'Amore */ 3724297a3b0SGarrett D'Amore static const char * 3734297a3b0SGarrett D'Amore dissect(struct match *m, const char *start, const char *stop, sopno startst, 3744297a3b0SGarrett D'Amore sopno stopst) 3754297a3b0SGarrett D'Amore { 3764297a3b0SGarrett D'Amore int i; 3774297a3b0SGarrett D'Amore sopno ss; /* start sop of current subRE */ 3784297a3b0SGarrett D'Amore sopno es; /* end sop of current subRE */ 3794297a3b0SGarrett D'Amore const char *sp; /* start of string matched by it */ 3804297a3b0SGarrett D'Amore const char *stp; /* string matched by it cannot pass here */ 3814297a3b0SGarrett D'Amore const char *rest; /* start of rest of string */ 3824297a3b0SGarrett D'Amore const char *tail; /* string unmatched by rest of RE */ 3834297a3b0SGarrett D'Amore sopno ssub; /* start sop of subsubRE */ 3844297a3b0SGarrett D'Amore sopno esub; /* end sop of subsubRE */ 3854297a3b0SGarrett D'Amore const char *ssp; /* start of string matched by subsubRE */ 3864297a3b0SGarrett D'Amore const char *sep; /* end of string matched by subsubRE */ 3874297a3b0SGarrett D'Amore const char *oldssp; /* previous ssp */ 3884297a3b0SGarrett D'Amore const char *dp; 3894297a3b0SGarrett D'Amore 3904297a3b0SGarrett D'Amore AT("diss", start, stop, startst, stopst); 3914297a3b0SGarrett D'Amore sp = start; 3924297a3b0SGarrett D'Amore for (ss = startst; ss < stopst; ss = es) { 3934297a3b0SGarrett D'Amore /* identify end of subRE */ 3944297a3b0SGarrett D'Amore es = ss; 3954297a3b0SGarrett D'Amore switch (OP(m->g->strip[es])) { 3964297a3b0SGarrett D'Amore case OPLUS_: 3974297a3b0SGarrett D'Amore case OQUEST_: 3984297a3b0SGarrett D'Amore es += OPND(m->g->strip[es]); 3994297a3b0SGarrett D'Amore break; 4004297a3b0SGarrett D'Amore case OCH_: 4014297a3b0SGarrett D'Amore while (OP(m->g->strip[es]) != O_CH) 4024297a3b0SGarrett D'Amore es += OPND(m->g->strip[es]); 4034297a3b0SGarrett D'Amore break; 4044297a3b0SGarrett D'Amore } 4054297a3b0SGarrett D'Amore es++; 4064297a3b0SGarrett D'Amore 4074297a3b0SGarrett D'Amore /* figure out what it matched */ 4084297a3b0SGarrett D'Amore switch (OP(m->g->strip[ss])) { 4094297a3b0SGarrett D'Amore case OEND: 4104297a3b0SGarrett D'Amore assert(0); 4114297a3b0SGarrett D'Amore break; 4124297a3b0SGarrett D'Amore case OCHAR: 4134297a3b0SGarrett D'Amore sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0); 4144297a3b0SGarrett D'Amore break; 4154297a3b0SGarrett D'Amore case OBOL: 4164297a3b0SGarrett D'Amore case OEOL: 4174297a3b0SGarrett D'Amore case OBOW: 4184297a3b0SGarrett D'Amore case OEOW: 4194297a3b0SGarrett D'Amore break; 4204297a3b0SGarrett D'Amore case OANY: 4214297a3b0SGarrett D'Amore case OANYOF: 4224297a3b0SGarrett D'Amore sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0); 4234297a3b0SGarrett D'Amore break; 4244297a3b0SGarrett D'Amore case OBACK_: 4254297a3b0SGarrett D'Amore case O_BACK: 4264297a3b0SGarrett D'Amore assert(0); 4274297a3b0SGarrett D'Amore break; 4284297a3b0SGarrett D'Amore /* cases where length of match is hard to find */ 4294297a3b0SGarrett D'Amore case OQUEST_: 4304297a3b0SGarrett D'Amore stp = stop; 4314297a3b0SGarrett D'Amore for (;;) { 4324297a3b0SGarrett D'Amore /* how long could this one be? */ 4334297a3b0SGarrett D'Amore rest = slow(m, sp, stp, ss, es); 4344297a3b0SGarrett D'Amore assert(rest != NULL); /* it did match */ 4354297a3b0SGarrett D'Amore /* could the rest match the rest? */ 4364297a3b0SGarrett D'Amore tail = slow(m, rest, stop, es, stopst); 4374297a3b0SGarrett D'Amore if (tail == stop) 4384297a3b0SGarrett D'Amore break; /* yes! */ 4394297a3b0SGarrett D'Amore /* no -- try a shorter match for this one */ 4404297a3b0SGarrett D'Amore stp = rest - 1; 4414297a3b0SGarrett D'Amore assert(stp >= sp); /* it did work */ 4424297a3b0SGarrett D'Amore } 4434297a3b0SGarrett D'Amore ssub = ss + 1; 4444297a3b0SGarrett D'Amore esub = es - 1; 4454297a3b0SGarrett D'Amore /* did innards match? */ 4464297a3b0SGarrett D'Amore if (slow(m, sp, rest, ssub, esub) != NULL) { 4474297a3b0SGarrett D'Amore dp = dissect(m, sp, rest, ssub, esub); 4484297a3b0SGarrett D'Amore assert(dp == rest); 4494297a3b0SGarrett D'Amore #if defined(__lint) 4504297a3b0SGarrett D'Amore (void) dp; 4514297a3b0SGarrett D'Amore #endif 4524297a3b0SGarrett D'Amore } else /* no */ 4534297a3b0SGarrett D'Amore assert(sp == rest); 4544297a3b0SGarrett D'Amore sp = rest; 4554297a3b0SGarrett D'Amore break; 4564297a3b0SGarrett D'Amore case OPLUS_: 4574297a3b0SGarrett D'Amore stp = stop; 4584297a3b0SGarrett D'Amore for (;;) { 4594297a3b0SGarrett D'Amore /* how long could this one be? */ 4604297a3b0SGarrett D'Amore rest = slow(m, sp, stp, ss, es); 4614297a3b0SGarrett D'Amore assert(rest != NULL); /* it did match */ 4624297a3b0SGarrett D'Amore /* could the rest match the rest? */ 4634297a3b0SGarrett D'Amore tail = slow(m, rest, stop, es, stopst); 4644297a3b0SGarrett D'Amore if (tail == stop) 4654297a3b0SGarrett D'Amore break; /* yes! */ 4664297a3b0SGarrett D'Amore /* no -- try a shorter match for this one */ 4674297a3b0SGarrett D'Amore stp = rest - 1; 4684297a3b0SGarrett D'Amore assert(stp >= sp); /* it did work */ 4694297a3b0SGarrett D'Amore } 4704297a3b0SGarrett D'Amore ssub = ss + 1; 4714297a3b0SGarrett D'Amore esub = es - 1; 4724297a3b0SGarrett D'Amore ssp = sp; 4734297a3b0SGarrett D'Amore oldssp = ssp; 4744297a3b0SGarrett D'Amore for (;;) { /* find last match of innards */ 4754297a3b0SGarrett D'Amore sep = slow(m, ssp, rest, ssub, esub); 4764297a3b0SGarrett D'Amore if (sep == NULL || sep == ssp) 4774297a3b0SGarrett D'Amore break; /* failed or matched null */ 4784297a3b0SGarrett D'Amore oldssp = ssp; /* on to next try */ 4794297a3b0SGarrett D'Amore ssp = sep; 4804297a3b0SGarrett D'Amore } 4814297a3b0SGarrett D'Amore if (sep == NULL) { 4824297a3b0SGarrett D'Amore /* last successful match */ 4834297a3b0SGarrett D'Amore sep = ssp; 4844297a3b0SGarrett D'Amore ssp = oldssp; 4854297a3b0SGarrett D'Amore } 4864297a3b0SGarrett D'Amore assert(sep == rest); /* must exhaust substring */ 4874297a3b0SGarrett D'Amore assert(slow(m, ssp, sep, ssub, esub) == rest); 4884297a3b0SGarrett D'Amore dp = dissect(m, ssp, sep, ssub, esub); 4894297a3b0SGarrett D'Amore assert(dp == sep); 4904297a3b0SGarrett D'Amore sp = rest; 4914297a3b0SGarrett D'Amore break; 4924297a3b0SGarrett D'Amore case OCH_: 4934297a3b0SGarrett D'Amore stp = stop; 4944297a3b0SGarrett D'Amore for (;;) { 4954297a3b0SGarrett D'Amore /* how long could this one be? */ 4964297a3b0SGarrett D'Amore rest = slow(m, sp, stp, ss, es); 4974297a3b0SGarrett D'Amore assert(rest != NULL); /* it did match */ 4984297a3b0SGarrett D'Amore /* could the rest match the rest? */ 4994297a3b0SGarrett D'Amore tail = slow(m, rest, stop, es, stopst); 5004297a3b0SGarrett D'Amore if (tail == stop) 5014297a3b0SGarrett D'Amore break; /* yes! */ 5024297a3b0SGarrett D'Amore /* no -- try a shorter match for this one */ 5034297a3b0SGarrett D'Amore stp = rest - 1; 5044297a3b0SGarrett D'Amore assert(stp >= sp); /* it did work */ 5054297a3b0SGarrett D'Amore } 5064297a3b0SGarrett D'Amore ssub = ss + 1; 5074297a3b0SGarrett D'Amore esub = ss + OPND(m->g->strip[ss]) - 1; 5084297a3b0SGarrett D'Amore assert(OP(m->g->strip[esub]) == OOR1); 5094297a3b0SGarrett D'Amore for (;;) { /* find first matching branch */ 5104297a3b0SGarrett D'Amore if (slow(m, sp, rest, ssub, esub) == rest) 5114297a3b0SGarrett D'Amore break; /* it matched all of it */ 5124297a3b0SGarrett D'Amore /* that one missed, try next one */ 5134297a3b0SGarrett D'Amore assert(OP(m->g->strip[esub]) == OOR1); 5144297a3b0SGarrett D'Amore esub++; 5154297a3b0SGarrett D'Amore assert(OP(m->g->strip[esub]) == OOR2); 5164297a3b0SGarrett D'Amore ssub = esub + 1; 5174297a3b0SGarrett D'Amore esub += OPND(m->g->strip[esub]); 5184297a3b0SGarrett D'Amore if (OP(m->g->strip[esub]) == OOR2) 5194297a3b0SGarrett D'Amore esub--; 5204297a3b0SGarrett D'Amore else 5214297a3b0SGarrett D'Amore assert(OP(m->g->strip[esub]) == O_CH); 5224297a3b0SGarrett D'Amore } 5234297a3b0SGarrett D'Amore dp = dissect(m, sp, rest, ssub, esub); 5244297a3b0SGarrett D'Amore assert(dp == rest); 5254297a3b0SGarrett D'Amore sp = rest; 5264297a3b0SGarrett D'Amore break; 5274297a3b0SGarrett D'Amore case O_PLUS: 5284297a3b0SGarrett D'Amore case O_QUEST: 5294297a3b0SGarrett D'Amore case OOR1: 5304297a3b0SGarrett D'Amore case OOR2: 5314297a3b0SGarrett D'Amore case O_CH: 5324297a3b0SGarrett D'Amore assert(0); 5334297a3b0SGarrett D'Amore break; 5344297a3b0SGarrett D'Amore case OLPAREN: 5354297a3b0SGarrett D'Amore i = OPND(m->g->strip[ss]); 5364297a3b0SGarrett D'Amore assert(0 < i && i <= m->g->nsub); 5374297a3b0SGarrett D'Amore m->pmatch[i].rm_so = sp - m->offp; 5384297a3b0SGarrett D'Amore break; 5394297a3b0SGarrett D'Amore case ORPAREN: 5404297a3b0SGarrett D'Amore i = OPND(m->g->strip[ss]); 5414297a3b0SGarrett D'Amore assert(0 < i && i <= m->g->nsub); 5424297a3b0SGarrett D'Amore m->pmatch[i].rm_eo = sp - m->offp; 5434297a3b0SGarrett D'Amore break; 5444297a3b0SGarrett D'Amore default: /* uh oh */ 5454297a3b0SGarrett D'Amore assert(0); 5464297a3b0SGarrett D'Amore break; 5474297a3b0SGarrett D'Amore } 5484297a3b0SGarrett D'Amore } 5494297a3b0SGarrett D'Amore 5504297a3b0SGarrett D'Amore assert(sp == stop); 5514297a3b0SGarrett D'Amore return (sp); 5524297a3b0SGarrett D'Amore } 5534297a3b0SGarrett D'Amore 5544297a3b0SGarrett D'Amore /* 5554297a3b0SGarrett D'Amore * backref - figure out what matched what, figuring in back references 5564297a3b0SGarrett D'Amore */ 5574297a3b0SGarrett D'Amore static const char * 5584297a3b0SGarrett D'Amore backref(struct match *m, const char *start, const char *stop, sopno startst, 5594297a3b0SGarrett D'Amore sopno stopst, sopno lev, /* PLUS nesting level */ 5604297a3b0SGarrett D'Amore int rec) 5614297a3b0SGarrett D'Amore { 5624297a3b0SGarrett D'Amore int i; 5634297a3b0SGarrett D'Amore sopno ss; /* start sop of current subRE */ 5644297a3b0SGarrett D'Amore const char *sp; /* start of string matched by it */ 5654297a3b0SGarrett D'Amore sopno ssub; /* start sop of subsubRE */ 5664297a3b0SGarrett D'Amore sopno esub; /* end sop of subsubRE */ 5674297a3b0SGarrett D'Amore const char *ssp; /* start of string matched by subsubRE */ 5684297a3b0SGarrett D'Amore const char *dp; 5694297a3b0SGarrett D'Amore size_t len; 5704297a3b0SGarrett D'Amore int hard; 5714297a3b0SGarrett D'Amore sop s; 5724297a3b0SGarrett D'Amore regoff_t offsave; 5734297a3b0SGarrett D'Amore cset *cs; 5744297a3b0SGarrett D'Amore wint_t wc; 5754297a3b0SGarrett D'Amore 5764297a3b0SGarrett D'Amore AT("back", start, stop, startst, stopst); 5774297a3b0SGarrett D'Amore sp = start; 5784297a3b0SGarrett D'Amore 5794297a3b0SGarrett D'Amore /* get as far as we can with easy stuff */ 5804297a3b0SGarrett D'Amore hard = 0; 5814297a3b0SGarrett D'Amore for (ss = startst; !hard && ss < stopst; ss++) 5824297a3b0SGarrett D'Amore switch (OP(s = m->g->strip[ss])) { 5834297a3b0SGarrett D'Amore case OCHAR: 5844297a3b0SGarrett D'Amore if (sp == stop) 5854297a3b0SGarrett D'Amore return (NULL); 5864297a3b0SGarrett D'Amore sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR); 5874297a3b0SGarrett D'Amore if (wc != OPND(s)) 5884297a3b0SGarrett D'Amore return (NULL); 5894297a3b0SGarrett D'Amore break; 5904297a3b0SGarrett D'Amore case OANY: 5914297a3b0SGarrett D'Amore if (sp == stop) 5924297a3b0SGarrett D'Amore return (NULL); 5934297a3b0SGarrett D'Amore sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR); 5944297a3b0SGarrett D'Amore if (wc == BADCHAR) 5954297a3b0SGarrett D'Amore return (NULL); 5964297a3b0SGarrett D'Amore break; 5974297a3b0SGarrett D'Amore case OANYOF: 5984297a3b0SGarrett D'Amore if (sp == stop) 5994297a3b0SGarrett D'Amore return (NULL); 6004297a3b0SGarrett D'Amore cs = &m->g->sets[OPND(s)]; 6014297a3b0SGarrett D'Amore sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR); 6024297a3b0SGarrett D'Amore if (wc == BADCHAR || !CHIN(cs, wc)) 6034297a3b0SGarrett D'Amore return (NULL); 6044297a3b0SGarrett D'Amore break; 6054297a3b0SGarrett D'Amore case OBOL: 6064297a3b0SGarrett D'Amore if ((sp == m->beginp && !(m->eflags®_NOTBOL)) || 6074297a3b0SGarrett D'Amore (sp < m->endp && *(sp-1) == '\n' && 6084297a3b0SGarrett D'Amore (m->g->cflags®_NEWLINE))) { 6094297a3b0SGarrett D'Amore break; 6104297a3b0SGarrett D'Amore } 6114297a3b0SGarrett D'Amore return (NULL); 6124297a3b0SGarrett D'Amore case OEOL: 6134297a3b0SGarrett D'Amore if ((sp == m->endp && !(m->eflags®_NOTEOL)) || 6144297a3b0SGarrett D'Amore (sp < m->endp && *sp == '\n' && 6154297a3b0SGarrett D'Amore (m->g->cflags®_NEWLINE))) { 6164297a3b0SGarrett D'Amore break; 6174297a3b0SGarrett D'Amore } 6184297a3b0SGarrett D'Amore return (NULL); 6194297a3b0SGarrett D'Amore case OBOW: 6204297a3b0SGarrett D'Amore if (((sp == m->beginp && !(m->eflags®_NOTBOL)) || 6214297a3b0SGarrett D'Amore (sp < m->endp && *(sp-1) == '\n' && 6224297a3b0SGarrett D'Amore (m->g->cflags®_NEWLINE)) || 6234297a3b0SGarrett D'Amore (sp > m->beginp && !ISWORD(*(sp-1)))) && 6244297a3b0SGarrett D'Amore (sp < m->endp && ISWORD(*sp))) { 6254297a3b0SGarrett D'Amore break; 626*33f5ff17SMilan Jurik } 6274297a3b0SGarrett D'Amore return (NULL); 6284297a3b0SGarrett D'Amore case OEOW: 6294297a3b0SGarrett D'Amore if (((sp == m->endp && !(m->eflags®_NOTEOL)) || 6304297a3b0SGarrett D'Amore (sp < m->endp && *sp == '\n' && 6314297a3b0SGarrett D'Amore (m->g->cflags®_NEWLINE)) || 6324297a3b0SGarrett D'Amore (sp < m->endp && !ISWORD(*sp))) && 6334297a3b0SGarrett D'Amore (sp > m->beginp && ISWORD(*(sp-1)))) { 6344297a3b0SGarrett D'Amore break; 635*33f5ff17SMilan Jurik } 6364297a3b0SGarrett D'Amore return (NULL); 6374297a3b0SGarrett D'Amore case O_QUEST: 6384297a3b0SGarrett D'Amore break; 6394297a3b0SGarrett D'Amore case OOR1: /* matches null but needs to skip */ 6404297a3b0SGarrett D'Amore ss++; 6414297a3b0SGarrett D'Amore s = m->g->strip[ss]; 6424297a3b0SGarrett D'Amore do { 6434297a3b0SGarrett D'Amore assert(OP(s) == OOR2); 6444297a3b0SGarrett D'Amore ss += OPND(s); 6454297a3b0SGarrett D'Amore } while (OP(s = m->g->strip[ss]) != O_CH); 6464297a3b0SGarrett D'Amore /* note that the ss++ gets us past the O_CH */ 6474297a3b0SGarrett D'Amore break; 6484297a3b0SGarrett D'Amore default: /* have to make a choice */ 6494297a3b0SGarrett D'Amore hard = 1; 6504297a3b0SGarrett D'Amore break; 6514297a3b0SGarrett D'Amore } 6524297a3b0SGarrett D'Amore if (!hard) { /* that was it! */ 6534297a3b0SGarrett D'Amore if (sp != stop) 6544297a3b0SGarrett D'Amore return (NULL); 6554297a3b0SGarrett D'Amore return (sp); 6564297a3b0SGarrett D'Amore } 6574297a3b0SGarrett D'Amore ss--; /* adjust for the for's final increment */ 6584297a3b0SGarrett D'Amore 6594297a3b0SGarrett D'Amore /* the hard stuff */ 6604297a3b0SGarrett D'Amore AT("hard", sp, stop, ss, stopst); 6614297a3b0SGarrett D'Amore s = m->g->strip[ss]; 6624297a3b0SGarrett D'Amore switch (OP(s)) { 6634297a3b0SGarrett D'Amore case OBACK_: /* the vilest depths */ 6644297a3b0SGarrett D'Amore i = OPND(s); 6654297a3b0SGarrett D'Amore assert(0 < i && i <= m->g->nsub); 6664297a3b0SGarrett D'Amore if (m->pmatch[i].rm_eo == -1) 6674297a3b0SGarrett D'Amore return (NULL); 6684297a3b0SGarrett D'Amore assert(m->pmatch[i].rm_so != -1); 6694297a3b0SGarrett D'Amore len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; 6704297a3b0SGarrett D'Amore if (len == 0 && rec++ > MAX_RECURSION) 6714297a3b0SGarrett D'Amore return (NULL); 6724297a3b0SGarrett D'Amore assert(stop - m->beginp >= len); 6734297a3b0SGarrett D'Amore if (sp > stop - len) 6744297a3b0SGarrett D'Amore return (NULL); /* not enough left to match */ 6754297a3b0SGarrett D'Amore ssp = m->offp + m->pmatch[i].rm_so; 6764297a3b0SGarrett D'Amore if (memcmp(sp, ssp, len) != 0) 6774297a3b0SGarrett D'Amore return (NULL); 6784297a3b0SGarrett D'Amore while (m->g->strip[ss] != SOP(O_BACK, i)) 6794297a3b0SGarrett D'Amore ss++; 6804297a3b0SGarrett D'Amore return (backref(m, sp+len, stop, ss+1, stopst, lev, rec)); 6814297a3b0SGarrett D'Amore case OQUEST_: /* to null or not */ 6824297a3b0SGarrett D'Amore dp = backref(m, sp, stop, ss+1, stopst, lev, rec); 6834297a3b0SGarrett D'Amore if (dp != NULL) 6844297a3b0SGarrett D'Amore return (dp); /* not */ 6854297a3b0SGarrett D'Amore return (backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec)); 6864297a3b0SGarrett D'Amore case OPLUS_: 6874297a3b0SGarrett D'Amore assert(m->lastpos != NULL); 6884297a3b0SGarrett D'Amore assert(lev+1 <= m->g->nplus); 6894297a3b0SGarrett D'Amore m->lastpos[lev+1] = sp; 6904297a3b0SGarrett D'Amore return (backref(m, sp, stop, ss+1, stopst, lev+1, rec)); 6914297a3b0SGarrett D'Amore case O_PLUS: 6924297a3b0SGarrett D'Amore if (sp == m->lastpos[lev]) /* last pass matched null */ 6934297a3b0SGarrett D'Amore return (backref(m, sp, stop, ss+1, stopst, lev-1, rec)); 6944297a3b0SGarrett D'Amore /* try another pass */ 6954297a3b0SGarrett D'Amore m->lastpos[lev] = sp; 6964297a3b0SGarrett D'Amore dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev, rec); 6974297a3b0SGarrett D'Amore if (dp == NULL) 6984297a3b0SGarrett D'Amore return (backref(m, sp, stop, ss+1, stopst, lev-1, rec)); 6994297a3b0SGarrett D'Amore return (dp); 7004297a3b0SGarrett D'Amore case OCH_: /* find the right one, if any */ 7014297a3b0SGarrett D'Amore ssub = ss + 1; 7024297a3b0SGarrett D'Amore esub = ss + OPND(s) - 1; 7034297a3b0SGarrett D'Amore assert(OP(m->g->strip[esub]) == OOR1); 7044297a3b0SGarrett D'Amore for (;;) { /* find first matching branch */ 7054297a3b0SGarrett D'Amore dp = backref(m, sp, stop, ssub, esub, lev, rec); 7064297a3b0SGarrett D'Amore if (dp != NULL) 7074297a3b0SGarrett D'Amore return (dp); 7084297a3b0SGarrett D'Amore /* that one missed, try next one */ 7094297a3b0SGarrett D'Amore if (OP(m->g->strip[esub]) == O_CH) 7104297a3b0SGarrett D'Amore return (NULL); /* there is none */ 7114297a3b0SGarrett D'Amore esub++; 7124297a3b0SGarrett D'Amore assert(OP(m->g->strip[esub]) == OOR2); 7134297a3b0SGarrett D'Amore ssub = esub + 1; 7144297a3b0SGarrett D'Amore esub += OPND(m->g->strip[esub]); 7154297a3b0SGarrett D'Amore if (OP(m->g->strip[esub]) == OOR2) 7164297a3b0SGarrett D'Amore esub--; 7174297a3b0SGarrett D'Amore else 7184297a3b0SGarrett D'Amore assert(OP(m->g->strip[esub]) == O_CH); 7194297a3b0SGarrett D'Amore } 720*33f5ff17SMilan Jurik /* NOTREACHED */ 7214297a3b0SGarrett D'Amore break; 7224297a3b0SGarrett D'Amore case OLPAREN: /* must undo assignment if rest fails */ 7234297a3b0SGarrett D'Amore i = OPND(s); 7244297a3b0SGarrett D'Amore assert(0 < i && i <= m->g->nsub); 7254297a3b0SGarrett D'Amore offsave = m->pmatch[i].rm_so; 7264297a3b0SGarrett D'Amore m->pmatch[i].rm_so = sp - m->offp; 7274297a3b0SGarrett D'Amore dp = backref(m, sp, stop, ss+1, stopst, lev, rec); 7284297a3b0SGarrett D'Amore if (dp != NULL) 7294297a3b0SGarrett D'Amore return (dp); 7304297a3b0SGarrett D'Amore m->pmatch[i].rm_so = offsave; 7314297a3b0SGarrett D'Amore return (NULL); 7324297a3b0SGarrett D'Amore case ORPAREN: /* must undo assignment if rest fails */ 7334297a3b0SGarrett D'Amore i = OPND(s); 7344297a3b0SGarrett D'Amore assert(0 < i && i <= m->g->nsub); 7354297a3b0SGarrett D'Amore offsave = m->pmatch[i].rm_eo; 7364297a3b0SGarrett D'Amore m->pmatch[i].rm_eo = sp - m->offp; 7374297a3b0SGarrett D'Amore dp = backref(m, sp, stop, ss+1, stopst, lev, rec); 7384297a3b0SGarrett D'Amore if (dp != NULL) 7394297a3b0SGarrett D'Amore return (dp); 7404297a3b0SGarrett D'Amore m->pmatch[i].rm_eo = offsave; 7414297a3b0SGarrett D'Amore return (NULL); 7424297a3b0SGarrett D'Amore default: /* uh oh */ 7434297a3b0SGarrett D'Amore assert(0); 7444297a3b0SGarrett D'Amore break; 7454297a3b0SGarrett D'Amore } 7464297a3b0SGarrett D'Amore 7474297a3b0SGarrett D'Amore /* "can't happen" */ 7484297a3b0SGarrett D'Amore assert(0); 7494297a3b0SGarrett D'Amore return (NULL); 7504297a3b0SGarrett D'Amore } 7514297a3b0SGarrett D'Amore 7524297a3b0SGarrett D'Amore /* 7534297a3b0SGarrett D'Amore * fast - step through the string at top speed 7544297a3b0SGarrett D'Amore */ 7554297a3b0SGarrett D'Amore static const char * 7564297a3b0SGarrett D'Amore fast(struct match *m, const char *start, const char *stop, sopno startst, 7574297a3b0SGarrett D'Amore sopno stopst) 7584297a3b0SGarrett D'Amore { 7594297a3b0SGarrett D'Amore states st = m->st; 7604297a3b0SGarrett D'Amore states fresh = m->fresh; 7614297a3b0SGarrett D'Amore states tmp = m->tmp; 7624297a3b0SGarrett D'Amore const char *p = start; 7634297a3b0SGarrett D'Amore wint_t c; 7644297a3b0SGarrett D'Amore wint_t lastc; /* previous c */ 7654297a3b0SGarrett D'Amore wint_t flagch; 7664297a3b0SGarrett D'Amore int i; 7674297a3b0SGarrett D'Amore const char *coldp; /* last p after which no match was underway */ 7684297a3b0SGarrett D'Amore size_t clen; 7694297a3b0SGarrett D'Amore 7704297a3b0SGarrett D'Amore CLEAR(st); 7714297a3b0SGarrett D'Amore SET1(st, startst); 7724297a3b0SGarrett D'Amore SP("fast", st, *p); 7734297a3b0SGarrett D'Amore st = step(m->g, startst, stopst, st, NOTHING, st); 7744297a3b0SGarrett D'Amore ASSIGN(fresh, st); 7754297a3b0SGarrett D'Amore SP("start", st, *p); 7764297a3b0SGarrett D'Amore coldp = NULL; 7774297a3b0SGarrett D'Amore if (start == m->beginp) 7784297a3b0SGarrett D'Amore c = OUT; 7794297a3b0SGarrett D'Amore else { 7804297a3b0SGarrett D'Amore /* 7814297a3b0SGarrett D'Amore * XXX Wrong if the previous character was multi-byte. 7824297a3b0SGarrett D'Amore * Newline never is (in encodings supported by FreeBSD), 7834297a3b0SGarrett D'Amore * so this only breaks the ISWORD tests below. 7844297a3b0SGarrett D'Amore */ 7854297a3b0SGarrett D'Amore c = (uch)*(start - 1); 7864297a3b0SGarrett D'Amore } 7874297a3b0SGarrett D'Amore for (;;) { 7884297a3b0SGarrett D'Amore /* next character */ 7894297a3b0SGarrett D'Amore lastc = c; 7904297a3b0SGarrett D'Amore if (p == m->endp) { 7914297a3b0SGarrett D'Amore clen = 0; 7924297a3b0SGarrett D'Amore c = OUT; 7934297a3b0SGarrett D'Amore } else 7944297a3b0SGarrett D'Amore clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR); 7954297a3b0SGarrett D'Amore if (EQ(st, fresh)) 7964297a3b0SGarrett D'Amore coldp = p; 7974297a3b0SGarrett D'Amore 7984297a3b0SGarrett D'Amore /* is there an EOL and/or BOL between lastc and c? */ 7994297a3b0SGarrett D'Amore flagch = '\0'; 8004297a3b0SGarrett D'Amore i = 0; 8014297a3b0SGarrett D'Amore if ((lastc == '\n' && m->g->cflags®_NEWLINE) || 8024297a3b0SGarrett D'Amore (lastc == OUT && !(m->eflags®_NOTBOL))) { 8034297a3b0SGarrett D'Amore flagch = BOL; 8044297a3b0SGarrett D'Amore i = m->g->nbol; 8054297a3b0SGarrett D'Amore } 8064297a3b0SGarrett D'Amore if ((c == '\n' && m->g->cflags®_NEWLINE) || 8074297a3b0SGarrett D'Amore (c == OUT && !(m->eflags®_NOTEOL))) { 8084297a3b0SGarrett D'Amore flagch = (flagch == BOL) ? BOLEOL : EOL; 8094297a3b0SGarrett D'Amore i += m->g->neol; 8104297a3b0SGarrett D'Amore } 8114297a3b0SGarrett D'Amore if (i != 0) { 8124297a3b0SGarrett D'Amore for (; i > 0; i--) 8134297a3b0SGarrett D'Amore st = step(m->g, startst, stopst, st, 8144297a3b0SGarrett D'Amore flagch, st); 8154297a3b0SGarrett D'Amore SP("boleol", st, c); 8164297a3b0SGarrett D'Amore } 8174297a3b0SGarrett D'Amore 8184297a3b0SGarrett D'Amore /* how about a word boundary? */ 8194297a3b0SGarrett D'Amore if ((flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && 8204297a3b0SGarrett D'Amore (c != OUT && ISWORD(c))) { 8214297a3b0SGarrett D'Amore flagch = BOW; 8224297a3b0SGarrett D'Amore } 8234297a3b0SGarrett D'Amore if ((lastc != OUT && ISWORD(lastc)) && 8244297a3b0SGarrett D'Amore (flagch == EOL || (c != OUT && !ISWORD(c)))) { 8254297a3b0SGarrett D'Amore flagch = EOW; 8264297a3b0SGarrett D'Amore } 8274297a3b0SGarrett D'Amore if (flagch == BOW || flagch == EOW) { 8284297a3b0SGarrett D'Amore st = step(m->g, startst, stopst, st, flagch, st); 8294297a3b0SGarrett D'Amore SP("boweow", st, c); 8304297a3b0SGarrett D'Amore } 8314297a3b0SGarrett D'Amore 8324297a3b0SGarrett D'Amore /* are we done? */ 8334297a3b0SGarrett D'Amore if (ISSET(st, stopst) || p == stop || clen > stop - p) 8344297a3b0SGarrett D'Amore break; /* NOTE BREAK OUT */ 8354297a3b0SGarrett D'Amore 8364297a3b0SGarrett D'Amore /* no, we must deal with this character */ 8374297a3b0SGarrett D'Amore ASSIGN(tmp, st); 8384297a3b0SGarrett D'Amore ASSIGN(st, fresh); 8394297a3b0SGarrett D'Amore assert(c != OUT); 8404297a3b0SGarrett D'Amore st = step(m->g, startst, stopst, tmp, c, st); 8414297a3b0SGarrett D'Amore SP("aft", st, c); 8424297a3b0SGarrett D'Amore assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); 8434297a3b0SGarrett D'Amore p += clen; 8444297a3b0SGarrett D'Amore } 8454297a3b0SGarrett D'Amore 8464297a3b0SGarrett D'Amore assert(coldp != NULL); 8474297a3b0SGarrett D'Amore m->coldp = coldp; 8484297a3b0SGarrett D'Amore if (ISSET(st, stopst)) 8494297a3b0SGarrett D'Amore return (p+XMBRTOWC(NULL, p, stop - p, &m->mbs, 0)); 8504297a3b0SGarrett D'Amore else 8514297a3b0SGarrett D'Amore return (NULL); 8524297a3b0SGarrett D'Amore } 8534297a3b0SGarrett D'Amore 8544297a3b0SGarrett D'Amore /* 8554297a3b0SGarrett D'Amore * slow - step through the string more deliberately 8564297a3b0SGarrett D'Amore */ 8574297a3b0SGarrett D'Amore static const char * 8584297a3b0SGarrett D'Amore slow(struct match *m, const char *start, const char *stop, sopno startst, 8594297a3b0SGarrett D'Amore sopno stopst) 8604297a3b0SGarrett D'Amore { 8614297a3b0SGarrett D'Amore states st = m->st; 8624297a3b0SGarrett D'Amore states empty = m->empty; 8634297a3b0SGarrett D'Amore states tmp = m->tmp; 8644297a3b0SGarrett D'Amore const char *p = start; 8654297a3b0SGarrett D'Amore wint_t c; 8664297a3b0SGarrett D'Amore wint_t lastc; /* previous c */ 8674297a3b0SGarrett D'Amore wint_t flagch; 8684297a3b0SGarrett D'Amore int i; 8694297a3b0SGarrett D'Amore const char *matchp; /* last p at which a match ended */ 8704297a3b0SGarrett D'Amore size_t clen; 8714297a3b0SGarrett D'Amore 8724297a3b0SGarrett D'Amore AT("slow", start, stop, startst, stopst); 8734297a3b0SGarrett D'Amore CLEAR(st); 8744297a3b0SGarrett D'Amore SET1(st, startst); 8754297a3b0SGarrett D'Amore SP("sstart", st, *p); 8764297a3b0SGarrett D'Amore st = step(m->g, startst, stopst, st, NOTHING, st); 8774297a3b0SGarrett D'Amore matchp = NULL; 8784297a3b0SGarrett D'Amore if (start == m->beginp) 8794297a3b0SGarrett D'Amore c = OUT; 8804297a3b0SGarrett D'Amore else { 8814297a3b0SGarrett D'Amore /* 8824297a3b0SGarrett D'Amore * XXX Wrong if the previous character was multi-byte. 8834297a3b0SGarrett D'Amore * Newline never is (in encodings supported by FreeBSD), 8844297a3b0SGarrett D'Amore * so this only breaks the ISWORD tests below. 8854297a3b0SGarrett D'Amore */ 8864297a3b0SGarrett D'Amore c = (uch)*(start - 1); 8874297a3b0SGarrett D'Amore } 8884297a3b0SGarrett D'Amore for (;;) { 8894297a3b0SGarrett D'Amore /* next character */ 8904297a3b0SGarrett D'Amore lastc = c; 8914297a3b0SGarrett D'Amore if (p == m->endp) { 8924297a3b0SGarrett D'Amore c = OUT; 8934297a3b0SGarrett D'Amore clen = 0; 8944297a3b0SGarrett D'Amore } else 8954297a3b0SGarrett D'Amore clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR); 8964297a3b0SGarrett D'Amore 8974297a3b0SGarrett D'Amore /* is there an EOL and/or BOL between lastc and c? */ 8984297a3b0SGarrett D'Amore flagch = '\0'; 8994297a3b0SGarrett D'Amore i = 0; 9004297a3b0SGarrett D'Amore if ((lastc == '\n' && m->g->cflags®_NEWLINE) || 9014297a3b0SGarrett D'Amore (lastc == OUT && !(m->eflags®_NOTBOL))) { 9024297a3b0SGarrett D'Amore flagch = BOL; 9034297a3b0SGarrett D'Amore i = m->g->nbol; 9044297a3b0SGarrett D'Amore } 9054297a3b0SGarrett D'Amore if ((c == '\n' && m->g->cflags®_NEWLINE) || 9064297a3b0SGarrett D'Amore (c == OUT && !(m->eflags®_NOTEOL))) { 9074297a3b0SGarrett D'Amore flagch = (flagch == BOL) ? BOLEOL : EOL; 9084297a3b0SGarrett D'Amore i += m->g->neol; 9094297a3b0SGarrett D'Amore } 9104297a3b0SGarrett D'Amore if (i != 0) { 9114297a3b0SGarrett D'Amore for (; i > 0; i--) 9124297a3b0SGarrett D'Amore st = step(m->g, startst, stopst, st, 9134297a3b0SGarrett D'Amore flagch, st); 9144297a3b0SGarrett D'Amore SP("sboleol", st, c); 9154297a3b0SGarrett D'Amore } 9164297a3b0SGarrett D'Amore 9174297a3b0SGarrett D'Amore /* how about a word boundary? */ 9184297a3b0SGarrett D'Amore if ((flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && 9194297a3b0SGarrett D'Amore (c != OUT && ISWORD(c))) { 9204297a3b0SGarrett D'Amore flagch = BOW; 9214297a3b0SGarrett D'Amore } 9224297a3b0SGarrett D'Amore if ((lastc != OUT && ISWORD(lastc)) && 9234297a3b0SGarrett D'Amore (flagch == EOL || (c != OUT && !ISWORD(c)))) { 9244297a3b0SGarrett D'Amore flagch = EOW; 9254297a3b0SGarrett D'Amore } 9264297a3b0SGarrett D'Amore if (flagch == BOW || flagch == EOW) { 9274297a3b0SGarrett D'Amore st = step(m->g, startst, stopst, st, flagch, st); 9284297a3b0SGarrett D'Amore SP("sboweow", st, c); 9294297a3b0SGarrett D'Amore } 9304297a3b0SGarrett D'Amore 9314297a3b0SGarrett D'Amore /* are we done? */ 9324297a3b0SGarrett D'Amore if (ISSET(st, stopst)) 9334297a3b0SGarrett D'Amore matchp = p; 9344297a3b0SGarrett D'Amore if (EQ(st, empty) || p == stop || clen > stop - p) 9354297a3b0SGarrett D'Amore break; /* NOTE BREAK OUT */ 9364297a3b0SGarrett D'Amore 9374297a3b0SGarrett D'Amore /* no, we must deal with this character */ 9384297a3b0SGarrett D'Amore ASSIGN(tmp, st); 9394297a3b0SGarrett D'Amore ASSIGN(st, empty); 9404297a3b0SGarrett D'Amore assert(c != OUT); 9414297a3b0SGarrett D'Amore st = step(m->g, startst, stopst, tmp, c, st); 9424297a3b0SGarrett D'Amore SP("saft", st, c); 9434297a3b0SGarrett D'Amore assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); 9444297a3b0SGarrett D'Amore p += clen; 9454297a3b0SGarrett D'Amore } 9464297a3b0SGarrett D'Amore 9474297a3b0SGarrett D'Amore return (matchp); 9484297a3b0SGarrett D'Amore } 9494297a3b0SGarrett D'Amore 9504297a3b0SGarrett D'Amore 9514297a3b0SGarrett D'Amore /* 9524297a3b0SGarrett D'Amore * step - map set of states reachable before char to set reachable after 9534297a3b0SGarrett D'Amore */ 9544297a3b0SGarrett D'Amore static states 9554297a3b0SGarrett D'Amore step(struct re_guts *g, 9564297a3b0SGarrett D'Amore sopno start, /* start state within strip */ 9574297a3b0SGarrett D'Amore sopno stop, /* state after stop state within strip */ 9584297a3b0SGarrett D'Amore states bef, /* states reachable before */ 9594297a3b0SGarrett D'Amore wint_t ch, /* character or NONCHAR code */ 9604297a3b0SGarrett D'Amore states aft) /* states already known reachable after */ 9614297a3b0SGarrett D'Amore { 9624297a3b0SGarrett D'Amore cset *cs; 9634297a3b0SGarrett D'Amore sop s; 9644297a3b0SGarrett D'Amore sopno pc; 9654297a3b0SGarrett D'Amore onestate here; /* note, macros know this name */ 9664297a3b0SGarrett D'Amore sopno look; 9674297a3b0SGarrett D'Amore int i; 9684297a3b0SGarrett D'Amore 9694297a3b0SGarrett D'Amore for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { 9704297a3b0SGarrett D'Amore s = g->strip[pc]; 9714297a3b0SGarrett D'Amore switch (OP(s)) { 9724297a3b0SGarrett D'Amore case OEND: 9734297a3b0SGarrett D'Amore assert(pc == stop-1); 9744297a3b0SGarrett D'Amore break; 9754297a3b0SGarrett D'Amore case OCHAR: 9764297a3b0SGarrett D'Amore /* only characters can match */ 9774297a3b0SGarrett D'Amore assert(!NONCHAR(ch) || ch != OPND(s)); 9784297a3b0SGarrett D'Amore if (ch == OPND(s)) 9794297a3b0SGarrett D'Amore FWD(aft, bef, 1); 9804297a3b0SGarrett D'Amore break; 9814297a3b0SGarrett D'Amore case OBOL: 9824297a3b0SGarrett D'Amore if (ch == BOL || ch == BOLEOL) 9834297a3b0SGarrett D'Amore FWD(aft, bef, 1); 9844297a3b0SGarrett D'Amore break; 9854297a3b0SGarrett D'Amore case OEOL: 9864297a3b0SGarrett D'Amore if (ch == EOL || ch == BOLEOL) 9874297a3b0SGarrett D'Amore FWD(aft, bef, 1); 9884297a3b0SGarrett D'Amore break; 9894297a3b0SGarrett D'Amore case OBOW: 9904297a3b0SGarrett D'Amore if (ch == BOW) 9914297a3b0SGarrett D'Amore FWD(aft, bef, 1); 9924297a3b0SGarrett D'Amore break; 9934297a3b0SGarrett D'Amore case OEOW: 9944297a3b0SGarrett D'Amore if (ch == EOW) 9954297a3b0SGarrett D'Amore FWD(aft, bef, 1); 9964297a3b0SGarrett D'Amore break; 9974297a3b0SGarrett D'Amore case OANY: 9984297a3b0SGarrett D'Amore if (!NONCHAR(ch)) 9994297a3b0SGarrett D'Amore FWD(aft, bef, 1); 10004297a3b0SGarrett D'Amore break; 10014297a3b0SGarrett D'Amore case OANYOF: 10024297a3b0SGarrett D'Amore cs = &g->sets[OPND(s)]; 10034297a3b0SGarrett D'Amore if (!NONCHAR(ch) && CHIN(cs, ch)) 10044297a3b0SGarrett D'Amore FWD(aft, bef, 1); 10054297a3b0SGarrett D'Amore break; 10064297a3b0SGarrett D'Amore case OBACK_: /* ignored here */ 10074297a3b0SGarrett D'Amore case O_BACK: 10084297a3b0SGarrett D'Amore FWD(aft, aft, 1); 10094297a3b0SGarrett D'Amore break; 10104297a3b0SGarrett D'Amore case OPLUS_: /* forward, this is just an empty */ 10114297a3b0SGarrett D'Amore FWD(aft, aft, 1); 10124297a3b0SGarrett D'Amore break; 10134297a3b0SGarrett D'Amore case O_PLUS: /* both forward and back */ 10144297a3b0SGarrett D'Amore FWD(aft, aft, 1); 10154297a3b0SGarrett D'Amore i = ISSETBACK(aft, OPND(s)); 10164297a3b0SGarrett D'Amore BACK(aft, aft, OPND(s)); 10174297a3b0SGarrett D'Amore if (!i && ISSETBACK(aft, OPND(s))) { 10184297a3b0SGarrett D'Amore /* oho, must reconsider loop body */ 10194297a3b0SGarrett D'Amore pc -= OPND(s) + 1; 10204297a3b0SGarrett D'Amore INIT(here, pc); 10214297a3b0SGarrett D'Amore } 10224297a3b0SGarrett D'Amore break; 10234297a3b0SGarrett D'Amore case OQUEST_: /* two branches, both forward */ 10244297a3b0SGarrett D'Amore FWD(aft, aft, 1); 10254297a3b0SGarrett D'Amore FWD(aft, aft, OPND(s)); 10264297a3b0SGarrett D'Amore break; 10274297a3b0SGarrett D'Amore case O_QUEST: /* just an empty */ 10284297a3b0SGarrett D'Amore FWD(aft, aft, 1); 10294297a3b0SGarrett D'Amore break; 10304297a3b0SGarrett D'Amore case OLPAREN: /* not significant here */ 10314297a3b0SGarrett D'Amore case ORPAREN: 10324297a3b0SGarrett D'Amore FWD(aft, aft, 1); 10334297a3b0SGarrett D'Amore break; 10344297a3b0SGarrett D'Amore case OCH_: /* mark the first two branches */ 10354297a3b0SGarrett D'Amore FWD(aft, aft, 1); 10364297a3b0SGarrett D'Amore assert(OP(g->strip[pc+OPND(s)]) == OOR2); 10374297a3b0SGarrett D'Amore FWD(aft, aft, OPND(s)); 10384297a3b0SGarrett D'Amore break; 10394297a3b0SGarrett D'Amore case OOR1: /* done a branch, find the O_CH */ 10404297a3b0SGarrett D'Amore if (ISSTATEIN(aft, here)) { 10414297a3b0SGarrett D'Amore for (look = 1; 10424297a3b0SGarrett D'Amore OP(s = g->strip[pc+look]) != O_CH; 10434297a3b0SGarrett D'Amore look += OPND(s)) 10444297a3b0SGarrett D'Amore assert(OP(s) == OOR2); 10454297a3b0SGarrett D'Amore FWD(aft, aft, look + 1); 10464297a3b0SGarrett D'Amore } 10474297a3b0SGarrett D'Amore break; 10484297a3b0SGarrett D'Amore case OOR2: /* propagate OCH_'s marking */ 10494297a3b0SGarrett D'Amore FWD(aft, aft, 1); 10504297a3b0SGarrett D'Amore if (OP(g->strip[pc+OPND(s)]) != O_CH) { 10514297a3b0SGarrett D'Amore assert(OP(g->strip[pc+OPND(s)]) == OOR2); 10524297a3b0SGarrett D'Amore FWD(aft, aft, OPND(s)); 10534297a3b0SGarrett D'Amore } 10544297a3b0SGarrett D'Amore break; 10554297a3b0SGarrett D'Amore case O_CH: /* just empty */ 10564297a3b0SGarrett D'Amore FWD(aft, aft, 1); 10574297a3b0SGarrett D'Amore break; 10584297a3b0SGarrett D'Amore default: /* ooooops... */ 10594297a3b0SGarrett D'Amore assert(0); 10604297a3b0SGarrett D'Amore break; 10614297a3b0SGarrett D'Amore } 10624297a3b0SGarrett D'Amore } 10634297a3b0SGarrett D'Amore 10644297a3b0SGarrett D'Amore return (aft); 10654297a3b0SGarrett D'Amore } 10664297a3b0SGarrett D'Amore 10674297a3b0SGarrett D'Amore #ifdef REDEBUG 10684297a3b0SGarrett D'Amore /* 10694297a3b0SGarrett D'Amore * print - print a set of states 10704297a3b0SGarrett D'Amore */ 10714297a3b0SGarrett D'Amore static void 10724297a3b0SGarrett D'Amore print(struct match *m, const char *caption, states st, int ch, FILE *d) 10734297a3b0SGarrett D'Amore { 10744297a3b0SGarrett D'Amore struct re_guts *g = m->g; 10754297a3b0SGarrett D'Amore int i; 10764297a3b0SGarrett D'Amore int first = 1; 10774297a3b0SGarrett D'Amore 10784297a3b0SGarrett D'Amore if (!(m->eflags®_TRACE)) 10794297a3b0SGarrett D'Amore return; 10804297a3b0SGarrett D'Amore 10814297a3b0SGarrett D'Amore (void) fprintf(d, "%s", caption); 10824297a3b0SGarrett D'Amore if (ch != '\0') 10834297a3b0SGarrett D'Amore (void) fprintf(d, " %s", pchar(ch)); 10844297a3b0SGarrett D'Amore for (i = 0; i < g->nstates; i++) 10854297a3b0SGarrett D'Amore if (ISSET(st, i)) { 10864297a3b0SGarrett D'Amore (void) fprintf(d, "%s%d", (first) ? "\t" : ", ", i); 10874297a3b0SGarrett D'Amore first = 0; 10884297a3b0SGarrett D'Amore } 10894297a3b0SGarrett D'Amore (void) fprintf(d, "\n"); 10904297a3b0SGarrett D'Amore } 10914297a3b0SGarrett D'Amore 10924297a3b0SGarrett D'Amore /* 10934297a3b0SGarrett D'Amore * at - print current situation 10944297a3b0SGarrett D'Amore */ 10954297a3b0SGarrett D'Amore static void 10964297a3b0SGarrett D'Amore at(struct match *m, const char *title, const char *start, const char *stop, 10974297a3b0SGarrett D'Amore sopno startst, sopno stopst) 10984297a3b0SGarrett D'Amore { 10994297a3b0SGarrett D'Amore if (!(m->eflags®_TRACE)) 11004297a3b0SGarrett D'Amore return; 11014297a3b0SGarrett D'Amore 11024297a3b0SGarrett D'Amore (void) printf("%s %s-", title, pchar(*start)); 11034297a3b0SGarrett D'Amore (void) printf("%s ", pchar(*stop)); 11044297a3b0SGarrett D'Amore (void) printf("%ld-%ld\n", (long)startst, (long)stopst); 11054297a3b0SGarrett D'Amore } 11064297a3b0SGarrett D'Amore 11074297a3b0SGarrett D'Amore #ifndef PCHARDONE 11084297a3b0SGarrett D'Amore #define PCHARDONE /* never again */ 11094297a3b0SGarrett D'Amore /* 11104297a3b0SGarrett D'Amore * pchar - make a character printable 11114297a3b0SGarrett D'Amore * 11124297a3b0SGarrett D'Amore * Is this identical to regchar() over in debug.c? Well, yes. But a 11134297a3b0SGarrett D'Amore * duplicate here avoids having a debugging-capable regexec.o tied to 11144297a3b0SGarrett D'Amore * a matching debug.o, and this is convenient. It all disappears in 11154297a3b0SGarrett D'Amore * the non-debug compilation anyway, so it doesn't matter much. 11164297a3b0SGarrett D'Amore */ 11174297a3b0SGarrett D'Amore static const char * 11184297a3b0SGarrett D'Amore pchar(int ch) 11194297a3b0SGarrett D'Amore { 11204297a3b0SGarrett D'Amore static char pbuf[10]; 11214297a3b0SGarrett D'Amore 11224297a3b0SGarrett D'Amore if (isprint((uch)ch) || ch == ' ') 11234297a3b0SGarrett D'Amore (void) sprintf(pbuf, "%c", ch); 11244297a3b0SGarrett D'Amore else 11254297a3b0SGarrett D'Amore (void) sprintf(pbuf, "\\%o", ch); 11264297a3b0SGarrett D'Amore return (pbuf); 11274297a3b0SGarrett D'Amore } 11284297a3b0SGarrett D'Amore #endif 11294297a3b0SGarrett D'Amore #endif 11304297a3b0SGarrett D'Amore 11314297a3b0SGarrett D'Amore #undef matcher 11324297a3b0SGarrett D'Amore #undef fast 11334297a3b0SGarrett D'Amore #undef slow 11344297a3b0SGarrett D'Amore #undef dissect 11354297a3b0SGarrett D'Amore #undef backref 11364297a3b0SGarrett D'Amore #undef step 11374297a3b0SGarrett D'Amore #undef print 11384297a3b0SGarrett D'Amore #undef at 11394297a3b0SGarrett D'Amore #undef match 1140