1f0957ccaSPeter Wemm /* $NetBSD: engine.c,v 1.7 2011/11/19 17:45:11 tnozaki Exp $ */ 2f0957ccaSPeter Wemm 3f0957ccaSPeter Wemm /*- 4f0957ccaSPeter Wemm * Copyright (c) 1992, 1993, 1994 Henry Spencer. 5f0957ccaSPeter Wemm * Copyright (c) 1992, 1993, 1994 6f0957ccaSPeter Wemm * The Regents of the University of California. All rights reserved. 7f0957ccaSPeter Wemm * 8f0957ccaSPeter Wemm * This code is derived from software contributed to Berkeley by 9f0957ccaSPeter Wemm * Henry Spencer of the University of Toronto. 10f0957ccaSPeter Wemm * 11f0957ccaSPeter Wemm * Redistribution and use in source and binary forms, with or without 12f0957ccaSPeter Wemm * modification, are permitted provided that the following conditions 13f0957ccaSPeter Wemm * are met: 14f0957ccaSPeter Wemm * 1. Redistributions of source code must retain the above copyright 15f0957ccaSPeter Wemm * notice, this list of conditions and the following disclaimer. 16f0957ccaSPeter Wemm * 2. Redistributions in binary form must reproduce the above copyright 17f0957ccaSPeter Wemm * notice, this list of conditions and the following disclaimer in the 18f0957ccaSPeter Wemm * documentation and/or other materials provided with the distribution. 19*c271fa92SBaptiste Daroussin * 3. Neither the name of the University nor the names of its contributors 20f0957ccaSPeter Wemm * may be used to endorse or promote products derived from this software 21f0957ccaSPeter Wemm * without specific prior written permission. 22f0957ccaSPeter Wemm * 23f0957ccaSPeter Wemm * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24f0957ccaSPeter Wemm * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25f0957ccaSPeter Wemm * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26f0957ccaSPeter Wemm * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27f0957ccaSPeter Wemm * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28f0957ccaSPeter Wemm * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29f0957ccaSPeter Wemm * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30f0957ccaSPeter Wemm * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31f0957ccaSPeter Wemm * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32f0957ccaSPeter Wemm * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33f0957ccaSPeter Wemm * SUCH DAMAGE. 34f0957ccaSPeter Wemm * 35f0957ccaSPeter Wemm * @(#)engine.c 8.4 (Berkeley) 3/19/94 36f0957ccaSPeter Wemm */ 37f0957ccaSPeter Wemm 38f0957ccaSPeter Wemm /* 39f0957ccaSPeter Wemm * The matching engine and friends. This file is #included by regexec.c 40f0957ccaSPeter Wemm * after suitable #defines of a variety of macros used herein, so that 41f0957ccaSPeter Wemm * different state representations can be used without duplicating masses 42f0957ccaSPeter Wemm * of code. 43f0957ccaSPeter Wemm */ 44f0957ccaSPeter Wemm 45f0957ccaSPeter Wemm #ifdef SNAMES 46f0957ccaSPeter Wemm #define matcher smatcher 47f0957ccaSPeter Wemm #define fast sfast 48f0957ccaSPeter Wemm #define slow sslow 49f0957ccaSPeter Wemm #define dissect sdissect 50f0957ccaSPeter Wemm #define backref sbackref 51f0957ccaSPeter Wemm #define step sstep 52f0957ccaSPeter Wemm #define print sprint 53f0957ccaSPeter Wemm #define at sat 54f0957ccaSPeter Wemm #define match smat 55f0957ccaSPeter Wemm #endif 56f0957ccaSPeter Wemm #ifdef LNAMES 57f0957ccaSPeter Wemm #define matcher lmatcher 58f0957ccaSPeter Wemm #define fast lfast 59f0957ccaSPeter Wemm #define slow lslow 60f0957ccaSPeter Wemm #define dissect ldissect 61f0957ccaSPeter Wemm #define backref lbackref 62f0957ccaSPeter Wemm #define step lstep 63f0957ccaSPeter Wemm #define print lprint 64f0957ccaSPeter Wemm #define at lat 65f0957ccaSPeter Wemm #define match lmat 66f0957ccaSPeter Wemm #endif 67f0957ccaSPeter Wemm 68f0957ccaSPeter Wemm /* another structure passed up and down to avoid zillions of parameters */ 69f0957ccaSPeter Wemm struct match { 70f0957ccaSPeter Wemm struct re_guts *g; 71f0957ccaSPeter Wemm int eflags; 72f0957ccaSPeter Wemm regmatch_t *pmatch; /* [nsub+1] (0 element unused) */ 73f0957ccaSPeter Wemm const RCHAR_T *offp; /* offsets work from here */ 74f0957ccaSPeter Wemm const RCHAR_T *beginp; /* start of string -- virtual NUL precedes */ 75f0957ccaSPeter Wemm const RCHAR_T *endp; /* end of string -- virtual NUL here */ 76f0957ccaSPeter Wemm const RCHAR_T *coldp; /* can be no match starting before here */ 77f0957ccaSPeter Wemm const RCHAR_T **lastpos; /* [nplus+1] */ 78f0957ccaSPeter Wemm STATEVARS; 79f0957ccaSPeter Wemm states st; /* current states */ 80f0957ccaSPeter Wemm states fresh; /* states for a fresh start */ 81f0957ccaSPeter Wemm states tmp; /* temporary */ 82f0957ccaSPeter Wemm states empty; /* empty set of states */ 83f0957ccaSPeter Wemm }; 84f0957ccaSPeter Wemm 85f0957ccaSPeter Wemm /* ========= begin header generated by ./mkh ========= */ 86f0957ccaSPeter Wemm #ifdef __cplusplus 87f0957ccaSPeter Wemm extern "C" { 88f0957ccaSPeter Wemm #endif 89f0957ccaSPeter Wemm 90f0957ccaSPeter Wemm /* === engine.c === */ 91*c271fa92SBaptiste Daroussin static int matcher(struct re_guts *g, const RCHAR_T *string, size_t nmatch, regmatch_t pmatch[], int eflags); 92*c271fa92SBaptiste Daroussin static const RCHAR_T *dissect(struct match *m, const RCHAR_T *start, const RCHAR_T *stop, sopno startst, sopno stopst); 93*c271fa92SBaptiste Daroussin static const RCHAR_T *backref(struct match *m, const RCHAR_T *start, const RCHAR_T *stop, sopno startst, sopno stopst, sopno lev); 94*c271fa92SBaptiste Daroussin static const RCHAR_T *fast(struct match *m, const RCHAR_T *start, const RCHAR_T *stop, sopno startst, sopno stopst); 95*c271fa92SBaptiste Daroussin static const RCHAR_T *slow(struct match *m, const RCHAR_T *start, const RCHAR_T *stop, sopno startst, sopno stopst); 96*c271fa92SBaptiste Daroussin static states step(struct re_guts *g, sopno start, sopno stop, states bef, int flag, RCHAR_T ch, states aft); 97f0957ccaSPeter Wemm #define BOL (1) 98f0957ccaSPeter Wemm #define EOL (BOL+1) 99f0957ccaSPeter Wemm #define BOLEOL (BOL+2) 100f0957ccaSPeter Wemm #define NOTHING (BOL+3) 101f0957ccaSPeter Wemm #define BOW (BOL+4) 102f0957ccaSPeter Wemm #define EOW (BOL+5) 103f0957ccaSPeter Wemm #ifdef REDEBUG 104*c271fa92SBaptiste Daroussin static void print(struct match *m, char *caption, states st, int ch, FILE *d); 105f0957ccaSPeter Wemm #endif 106f0957ccaSPeter Wemm #ifdef REDEBUG 107*c271fa92SBaptiste Daroussin static void at(struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst); 108f0957ccaSPeter Wemm #endif 109f0957ccaSPeter Wemm #ifdef REDEBUG 110*c271fa92SBaptiste Daroussin static char *pchar(int ch); 111f0957ccaSPeter Wemm #endif 112f0957ccaSPeter Wemm 113f0957ccaSPeter Wemm #ifdef __cplusplus 114f0957ccaSPeter Wemm } 115f0957ccaSPeter Wemm #endif 116f0957ccaSPeter Wemm /* ========= end header generated by ./mkh ========= */ 117f0957ccaSPeter Wemm 118f0957ccaSPeter Wemm #ifdef REDEBUG 119f0957ccaSPeter Wemm #define SP(t, s, c) print(m, t, s, c, stdout) 120f0957ccaSPeter Wemm #define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2) 121f0957ccaSPeter Wemm #define NOTE(str) { if (m->eflags®_TRACE) printf("=%s\n", (str)); } 122f0957ccaSPeter Wemm #else 123f0957ccaSPeter Wemm #define SP(t, s, c) /* nothing */ 124f0957ccaSPeter Wemm #define AT(t, p1, p2, s1, s2) /* nothing */ 125f0957ccaSPeter Wemm #define NOTE(s) /* nothing */ 126f0957ccaSPeter Wemm #endif 127f0957ccaSPeter Wemm 128f0957ccaSPeter Wemm /* 129f0957ccaSPeter Wemm - matcher - the actual matching engine 130f0957ccaSPeter Wemm */ 131f0957ccaSPeter Wemm static int /* 0 success, REG_NOMATCH failure */ 132*c271fa92SBaptiste Daroussin matcher(struct re_guts *g, const RCHAR_T *string, size_t nmatch, 133*c271fa92SBaptiste Daroussin regmatch_t pmatch[], int eflags) 134f0957ccaSPeter Wemm { 135*c271fa92SBaptiste Daroussin const RCHAR_T *endp; 136*c271fa92SBaptiste Daroussin size_t i; 137f0957ccaSPeter Wemm struct match mv; 138*c271fa92SBaptiste Daroussin struct match *m = &mv; 139*c271fa92SBaptiste Daroussin const RCHAR_T *dp; 140*c271fa92SBaptiste Daroussin const sopno gf = g->firststate+1; /* +1 for OEND */ 141*c271fa92SBaptiste Daroussin const sopno gl = g->laststate; 142f0957ccaSPeter Wemm const RCHAR_T *start; 143f0957ccaSPeter Wemm const RCHAR_T *stop; 144f0957ccaSPeter Wemm 145f0957ccaSPeter Wemm /* simplify the situation where possible */ 146f0957ccaSPeter Wemm if (g->cflags®_NOSUB) 147f0957ccaSPeter Wemm nmatch = 0; 148f0957ccaSPeter Wemm if (eflags®_STARTEND) { 149f0957ccaSPeter Wemm start = string + pmatch[0].rm_so; 150f0957ccaSPeter Wemm stop = string + pmatch[0].rm_eo; 151f0957ccaSPeter Wemm } else { 152f0957ccaSPeter Wemm start = string; 153f0957ccaSPeter Wemm stop = start + STRLEN(start); 154f0957ccaSPeter Wemm } 155f0957ccaSPeter Wemm if (stop < start) 156f0957ccaSPeter Wemm return(REG_INVARG); 157f0957ccaSPeter Wemm 158f0957ccaSPeter Wemm /* prescreening; this does wonders for this rather slow code */ 159f0957ccaSPeter Wemm if (g->must != NULL) { 160f0957ccaSPeter Wemm for (dp = start; dp < stop; dp++) 161f0957ccaSPeter Wemm if (*dp == g->must[0] && (size_t)(stop - dp) >= g->mlen && 162f0957ccaSPeter Wemm MEMCMP(dp, g->must, g->mlen) == 0) 163f0957ccaSPeter Wemm break; 164f0957ccaSPeter Wemm if (dp == stop) /* we didn't find g->must */ 165f0957ccaSPeter Wemm return(REG_NOMATCH); 166f0957ccaSPeter Wemm } 167f0957ccaSPeter Wemm 168f0957ccaSPeter Wemm /* match struct setup */ 169f0957ccaSPeter Wemm m->g = g; 170f0957ccaSPeter Wemm m->eflags = eflags; 171f0957ccaSPeter Wemm m->pmatch = NULL; 172f0957ccaSPeter Wemm m->lastpos = NULL; 173f0957ccaSPeter Wemm m->offp = string; 174f0957ccaSPeter Wemm m->beginp = start; 175f0957ccaSPeter Wemm m->endp = stop; 176f0957ccaSPeter Wemm STATESETUP(m, 4); 177f0957ccaSPeter Wemm SETUP(m->st); 178f0957ccaSPeter Wemm SETUP(m->fresh); 179f0957ccaSPeter Wemm SETUP(m->tmp); 180f0957ccaSPeter Wemm SETUP(m->empty); 181f0957ccaSPeter Wemm CLEAR(m->empty); 182f0957ccaSPeter Wemm 183f0957ccaSPeter Wemm /* this loop does only one repetition except for backrefs */ 184f0957ccaSPeter Wemm for (;;) { 185f0957ccaSPeter Wemm endp = fast(m, start, stop, gf, gl); 186f0957ccaSPeter Wemm if (endp == NULL) { /* a miss */ 187f0957ccaSPeter Wemm STATETEARDOWN(m); 188f0957ccaSPeter Wemm return(REG_NOMATCH); 189f0957ccaSPeter Wemm } 190f0957ccaSPeter Wemm if (nmatch == 0 && !g->backrefs) 191f0957ccaSPeter Wemm break; /* no further info needed */ 192f0957ccaSPeter Wemm 193f0957ccaSPeter Wemm /* where? */ 194f0957ccaSPeter Wemm assert(m->coldp != NULL); 195f0957ccaSPeter Wemm for (;;) { 196f0957ccaSPeter Wemm NOTE("finding start"); 197f0957ccaSPeter Wemm endp = slow(m, m->coldp, stop, gf, gl); 198f0957ccaSPeter Wemm if (endp != NULL) 199f0957ccaSPeter Wemm break; 200f0957ccaSPeter Wemm assert(m->coldp < m->endp); 201f0957ccaSPeter Wemm m->coldp++; 202f0957ccaSPeter Wemm } 203f0957ccaSPeter Wemm if (nmatch == 1 && !g->backrefs) 204f0957ccaSPeter Wemm break; /* no further info needed */ 205f0957ccaSPeter Wemm 206f0957ccaSPeter Wemm /* oh my, he wants the subexpressions... */ 207f0957ccaSPeter Wemm if (m->pmatch == NULL) 208f0957ccaSPeter Wemm m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) * 209f0957ccaSPeter Wemm sizeof(regmatch_t)); 210f0957ccaSPeter Wemm if (m->pmatch == NULL) { 211f0957ccaSPeter Wemm STATETEARDOWN(m); 212f0957ccaSPeter Wemm return(REG_ESPACE); 213f0957ccaSPeter Wemm } 214f0957ccaSPeter Wemm for (i = 1; i <= m->g->nsub; i++) 215f0957ccaSPeter Wemm m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; 216f0957ccaSPeter Wemm if (!g->backrefs && !(m->eflags®_BACKR)) { 217f0957ccaSPeter Wemm NOTE("dissecting"); 218f0957ccaSPeter Wemm dp = dissect(m, m->coldp, endp, gf, gl); 219f0957ccaSPeter Wemm } else { 220f0957ccaSPeter Wemm if (g->nplus > 0 && m->lastpos == NULL) 221f0957ccaSPeter Wemm m->lastpos = (const RCHAR_T **)malloc((g->nplus+1) * 222f0957ccaSPeter Wemm sizeof(const RCHAR_T *)); 223f0957ccaSPeter Wemm if (g->nplus > 0 && m->lastpos == NULL) { 224f0957ccaSPeter Wemm free(m->pmatch); 225f0957ccaSPeter Wemm STATETEARDOWN(m); 226f0957ccaSPeter Wemm return(REG_ESPACE); 227f0957ccaSPeter Wemm } 228f0957ccaSPeter Wemm NOTE("backref dissect"); 229f0957ccaSPeter Wemm dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); 230f0957ccaSPeter Wemm } 231f0957ccaSPeter Wemm if (dp != NULL) 232f0957ccaSPeter Wemm break; 233f0957ccaSPeter Wemm 234f0957ccaSPeter Wemm /* uh-oh... we couldn't find a subexpression-level match */ 235f0957ccaSPeter Wemm assert(g->backrefs); /* must be back references doing it */ 236f0957ccaSPeter Wemm assert(g->nplus == 0 || m->lastpos != NULL); 237f0957ccaSPeter Wemm for (;;) { 238f0957ccaSPeter Wemm if (dp != NULL || endp <= m->coldp) 239f0957ccaSPeter Wemm break; /* defeat */ 240f0957ccaSPeter Wemm NOTE("backoff"); 241f0957ccaSPeter Wemm endp = slow(m, m->coldp, endp-1, gf, gl); 242f0957ccaSPeter Wemm if (endp == NULL) 243f0957ccaSPeter Wemm break; /* defeat */ 244f0957ccaSPeter Wemm /* try it on a shorter possibility */ 245f0957ccaSPeter Wemm #ifndef NDEBUG 246f0957ccaSPeter Wemm for (i = 1; i <= m->g->nsub; i++) { 247f0957ccaSPeter Wemm assert(m->pmatch[i].rm_so == -1); 248f0957ccaSPeter Wemm assert(m->pmatch[i].rm_eo == -1); 249f0957ccaSPeter Wemm } 250f0957ccaSPeter Wemm #endif 251f0957ccaSPeter Wemm NOTE("backoff dissect"); 252f0957ccaSPeter Wemm dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); 253f0957ccaSPeter Wemm } 254f0957ccaSPeter Wemm assert(dp == NULL || dp == endp); 255f0957ccaSPeter Wemm if (dp != NULL) /* found a shorter one */ 256f0957ccaSPeter Wemm break; 257f0957ccaSPeter Wemm 258f0957ccaSPeter Wemm /* despite initial appearances, there is no match here */ 259f0957ccaSPeter Wemm NOTE("false alarm"); 260f0957ccaSPeter Wemm start = m->coldp + 1; /* recycle starting later */ 261f0957ccaSPeter Wemm assert(start <= stop); 262f0957ccaSPeter Wemm } 263f0957ccaSPeter Wemm 264f0957ccaSPeter Wemm /* fill in the details if requested */ 265f0957ccaSPeter Wemm if (nmatch > 0) { 266f0957ccaSPeter Wemm pmatch[0].rm_so = m->coldp - m->offp; 267f0957ccaSPeter Wemm pmatch[0].rm_eo = endp - m->offp; 268f0957ccaSPeter Wemm } 269f0957ccaSPeter Wemm if (nmatch > 1) { 270f0957ccaSPeter Wemm assert(m->pmatch != NULL); 271f0957ccaSPeter Wemm for (i = 1; i < nmatch; i++) 272f0957ccaSPeter Wemm if (i <= m->g->nsub) 273f0957ccaSPeter Wemm pmatch[i] = m->pmatch[i]; 274f0957ccaSPeter Wemm else { 275f0957ccaSPeter Wemm pmatch[i].rm_so = -1; 276f0957ccaSPeter Wemm pmatch[i].rm_eo = -1; 277f0957ccaSPeter Wemm } 278f0957ccaSPeter Wemm } 279f0957ccaSPeter Wemm 280f0957ccaSPeter Wemm if (m->pmatch != NULL) 281f0957ccaSPeter Wemm free((char *)m->pmatch); 282f0957ccaSPeter Wemm if (m->lastpos != NULL) 283f0957ccaSPeter Wemm free((char *)m->lastpos); 284f0957ccaSPeter Wemm STATETEARDOWN(m); 285f0957ccaSPeter Wemm return(0); 286f0957ccaSPeter Wemm } 287f0957ccaSPeter Wemm 288f0957ccaSPeter Wemm /* 289f0957ccaSPeter Wemm - dissect - figure out what matched what, no back references 290f0957ccaSPeter Wemm */ 291f0957ccaSPeter Wemm static const RCHAR_T * /* == stop (success) always */ 292*c271fa92SBaptiste Daroussin dissect(struct match *m, const RCHAR_T *start, const RCHAR_T *stop, 293*c271fa92SBaptiste Daroussin sopno startst, sopno stopst) 294f0957ccaSPeter Wemm { 295*c271fa92SBaptiste Daroussin int i; 296*c271fa92SBaptiste Daroussin sopno ss; /* start sop of current subRE */ 297*c271fa92SBaptiste Daroussin sopno es; /* end sop of current subRE */ 298*c271fa92SBaptiste Daroussin const RCHAR_T *sp; /* start of string matched by it */ 299*c271fa92SBaptiste Daroussin const RCHAR_T *stp; /* string matched by it cannot pass here */ 300*c271fa92SBaptiste Daroussin const RCHAR_T *rest; /* start of rest of string */ 301*c271fa92SBaptiste Daroussin const RCHAR_T *tail; /* string unmatched by rest of RE */ 302*c271fa92SBaptiste Daroussin sopno ssub; /* start sop of subsubRE */ 303*c271fa92SBaptiste Daroussin sopno esub; /* end sop of subsubRE */ 304*c271fa92SBaptiste Daroussin const RCHAR_T *ssp; /* start of string matched by subsubRE */ 305*c271fa92SBaptiste Daroussin const RCHAR_T *sep; /* end of string matched by subsubRE */ 306*c271fa92SBaptiste Daroussin const RCHAR_T *oldssp; /* previous ssp */ 307*c271fa92SBaptiste Daroussin const RCHAR_T *dp; 308f0957ccaSPeter Wemm 309f0957ccaSPeter Wemm AT("diss", start, stop, startst, stopst); 310f0957ccaSPeter Wemm sp = start; 311f0957ccaSPeter Wemm for (ss = startst; ss < stopst; ss = es) { 312f0957ccaSPeter Wemm /* identify end of subRE */ 313f0957ccaSPeter Wemm es = ss; 314f0957ccaSPeter Wemm switch (m->g->strip[es]) { 315f0957ccaSPeter Wemm case OPLUS_: 316f0957ccaSPeter Wemm case OQUEST_: 317f0957ccaSPeter Wemm es += m->g->stripdata[es]; 318f0957ccaSPeter Wemm break; 319f0957ccaSPeter Wemm case OCH_: 320f0957ccaSPeter Wemm while (m->g->strip[es] != O_CH) 321f0957ccaSPeter Wemm es += m->g->stripdata[es]; 322f0957ccaSPeter Wemm break; 323f0957ccaSPeter Wemm } 324f0957ccaSPeter Wemm es++; 325f0957ccaSPeter Wemm 326f0957ccaSPeter Wemm /* figure out what it matched */ 327f0957ccaSPeter Wemm switch (m->g->strip[ss]) { 328f0957ccaSPeter Wemm case OEND: 329f0957ccaSPeter Wemm assert(nope); 330f0957ccaSPeter Wemm break; 331f0957ccaSPeter Wemm case OCHAR: 332f0957ccaSPeter Wemm sp++; 333f0957ccaSPeter Wemm break; 334f0957ccaSPeter Wemm case OBOL: 335f0957ccaSPeter Wemm case OEOL: 336f0957ccaSPeter Wemm case OBOW: 337f0957ccaSPeter Wemm case OEOW: 338f0957ccaSPeter Wemm break; 339f0957ccaSPeter Wemm case OANY: 340f0957ccaSPeter Wemm case OANYOF: 341f0957ccaSPeter Wemm sp++; 342f0957ccaSPeter Wemm break; 343f0957ccaSPeter Wemm case OBACK_: 344f0957ccaSPeter Wemm case O_BACK: 345f0957ccaSPeter Wemm assert(nope); 346f0957ccaSPeter Wemm break; 347f0957ccaSPeter Wemm /* cases where length of match is hard to find */ 348f0957ccaSPeter Wemm case OQUEST_: 349f0957ccaSPeter Wemm stp = stop; 350f0957ccaSPeter Wemm for (;;) { 351f0957ccaSPeter Wemm /* how long could this one be? */ 352f0957ccaSPeter Wemm rest = slow(m, sp, stp, ss, es); 353f0957ccaSPeter Wemm assert(rest != NULL); /* it did match */ 354f0957ccaSPeter Wemm /* could the rest match the rest? */ 355f0957ccaSPeter Wemm tail = slow(m, rest, stop, es, stopst); 356f0957ccaSPeter Wemm if (tail == stop) 357f0957ccaSPeter Wemm break; /* yes! */ 358f0957ccaSPeter Wemm /* no -- try a shorter match for this one */ 359f0957ccaSPeter Wemm stp = rest - 1; 360f0957ccaSPeter Wemm assert(stp >= sp); /* it did work */ 361f0957ccaSPeter Wemm } 362f0957ccaSPeter Wemm ssub = ss + 1; 363f0957ccaSPeter Wemm esub = es - 1; 364f0957ccaSPeter Wemm /* did innards match? */ 365f0957ccaSPeter Wemm if (slow(m, sp, rest, ssub, esub) != NULL) { 366f0957ccaSPeter Wemm dp = dissect(m, sp, rest, ssub, esub); 367f0957ccaSPeter Wemm assert(dp == rest); 368f0957ccaSPeter Wemm } else /* no */ 369f0957ccaSPeter Wemm assert(sp == rest); 370f0957ccaSPeter Wemm sp = rest; 371f0957ccaSPeter Wemm break; 372f0957ccaSPeter Wemm case OPLUS_: 373f0957ccaSPeter Wemm stp = stop; 374f0957ccaSPeter Wemm for (;;) { 375f0957ccaSPeter Wemm /* how long could this one be? */ 376f0957ccaSPeter Wemm rest = slow(m, sp, stp, ss, es); 377f0957ccaSPeter Wemm assert(rest != NULL); /* it did match */ 378f0957ccaSPeter Wemm /* could the rest match the rest? */ 379f0957ccaSPeter Wemm tail = slow(m, rest, stop, es, stopst); 380f0957ccaSPeter Wemm if (tail == stop) 381f0957ccaSPeter Wemm break; /* yes! */ 382f0957ccaSPeter Wemm /* no -- try a shorter match for this one */ 383f0957ccaSPeter Wemm stp = rest - 1; 384f0957ccaSPeter Wemm assert(stp >= sp); /* it did work */ 385f0957ccaSPeter Wemm } 386f0957ccaSPeter Wemm ssub = ss + 1; 387f0957ccaSPeter Wemm esub = es - 1; 388f0957ccaSPeter Wemm ssp = sp; 389f0957ccaSPeter Wemm oldssp = ssp; 390f0957ccaSPeter Wemm for (;;) { /* find last match of innards */ 391f0957ccaSPeter Wemm sep = slow(m, ssp, rest, ssub, esub); 392f0957ccaSPeter Wemm if (sep == NULL || sep == ssp) 393f0957ccaSPeter Wemm break; /* failed or matched null */ 394f0957ccaSPeter Wemm oldssp = ssp; /* on to next try */ 395f0957ccaSPeter Wemm ssp = sep; 396f0957ccaSPeter Wemm } 397f0957ccaSPeter Wemm if (sep == NULL) { 398f0957ccaSPeter Wemm /* last successful match */ 399f0957ccaSPeter Wemm sep = ssp; 400f0957ccaSPeter Wemm ssp = oldssp; 401f0957ccaSPeter Wemm } 402f0957ccaSPeter Wemm assert(sep == rest); /* must exhaust substring */ 403f0957ccaSPeter Wemm assert(slow(m, ssp, sep, ssub, esub) == rest); 404f0957ccaSPeter Wemm dp = dissect(m, ssp, sep, ssub, esub); 405f0957ccaSPeter Wemm assert(dp == sep); 406f0957ccaSPeter Wemm sp = rest; 407f0957ccaSPeter Wemm break; 408f0957ccaSPeter Wemm case OCH_: 409f0957ccaSPeter Wemm stp = stop; 410f0957ccaSPeter Wemm for (;;) { 411f0957ccaSPeter Wemm /* how long could this one be? */ 412f0957ccaSPeter Wemm rest = slow(m, sp, stp, ss, es); 413f0957ccaSPeter Wemm assert(rest != NULL); /* it did match */ 414f0957ccaSPeter Wemm /* could the rest match the rest? */ 415f0957ccaSPeter Wemm tail = slow(m, rest, stop, es, stopst); 416f0957ccaSPeter Wemm if (tail == stop) 417f0957ccaSPeter Wemm break; /* yes! */ 418f0957ccaSPeter Wemm /* no -- try a shorter match for this one */ 419f0957ccaSPeter Wemm stp = rest - 1; 420f0957ccaSPeter Wemm assert(stp >= sp); /* it did work */ 421f0957ccaSPeter Wemm } 422f0957ccaSPeter Wemm ssub = ss + 1; 423f0957ccaSPeter Wemm esub = ss + m->g->stripdata[ss] - 1; 424f0957ccaSPeter Wemm assert(m->g->strip[esub] == OOR1); 425f0957ccaSPeter Wemm for (;;) { /* find first matching branch */ 426f0957ccaSPeter Wemm if (slow(m, sp, rest, ssub, esub) == rest) 427f0957ccaSPeter Wemm break; /* it matched all of it */ 428f0957ccaSPeter Wemm /* that one missed, try next one */ 429f0957ccaSPeter Wemm assert(m->g->strip[esub] == OOR1); 430f0957ccaSPeter Wemm esub++; 431f0957ccaSPeter Wemm assert(m->g->strip[esub] == OOR2); 432f0957ccaSPeter Wemm ssub = esub + 1; 433f0957ccaSPeter Wemm esub += m->g->stripdata[esub]; 434f0957ccaSPeter Wemm if (m->g->strip[esub] == OOR2) 435f0957ccaSPeter Wemm esub--; 436f0957ccaSPeter Wemm else 437f0957ccaSPeter Wemm assert(m->g->strip[esub] == O_CH); 438f0957ccaSPeter Wemm } 439f0957ccaSPeter Wemm dp = dissect(m, sp, rest, ssub, esub); 440f0957ccaSPeter Wemm assert(dp == rest); 441f0957ccaSPeter Wemm sp = rest; 442f0957ccaSPeter Wemm break; 443f0957ccaSPeter Wemm case O_PLUS: 444f0957ccaSPeter Wemm case O_QUEST: 445f0957ccaSPeter Wemm case OOR1: 446f0957ccaSPeter Wemm case OOR2: 447f0957ccaSPeter Wemm case O_CH: 448f0957ccaSPeter Wemm assert(nope); 449f0957ccaSPeter Wemm break; 450f0957ccaSPeter Wemm case OLPAREN: 451f0957ccaSPeter Wemm i = m->g->stripdata[ss]; 452f0957ccaSPeter Wemm assert(0 < i && i <= m->g->nsub); 453f0957ccaSPeter Wemm m->pmatch[i].rm_so = sp - m->offp; 454f0957ccaSPeter Wemm break; 455f0957ccaSPeter Wemm case ORPAREN: 456f0957ccaSPeter Wemm i = m->g->stripdata[ss]; 457f0957ccaSPeter Wemm assert(0 < i && i <= m->g->nsub); 458f0957ccaSPeter Wemm m->pmatch[i].rm_eo = sp - m->offp; 459f0957ccaSPeter Wemm break; 460f0957ccaSPeter Wemm default: /* uh oh */ 461f0957ccaSPeter Wemm assert(nope); 462f0957ccaSPeter Wemm break; 463f0957ccaSPeter Wemm } 464f0957ccaSPeter Wemm } 465f0957ccaSPeter Wemm 466f0957ccaSPeter Wemm assert(sp == stop); 467f0957ccaSPeter Wemm return(sp); 468f0957ccaSPeter Wemm } 469f0957ccaSPeter Wemm 470f0957ccaSPeter Wemm /* 471f0957ccaSPeter Wemm - backref - figure out what matched what, figuring in back references 472f0957ccaSPeter Wemm */ 473f0957ccaSPeter Wemm static const RCHAR_T * /* == stop (success) or NULL (failure) */ 474*c271fa92SBaptiste Daroussin backref(struct match *m, const RCHAR_T *start, const RCHAR_T *stop, 475*c271fa92SBaptiste Daroussin sopno startst, sopno stopst, sopno lev) /* PLUS nesting level */ 476f0957ccaSPeter Wemm { 477*c271fa92SBaptiste Daroussin int i; 478*c271fa92SBaptiste Daroussin sopno ss; /* start sop of current subRE */ 479*c271fa92SBaptiste Daroussin const RCHAR_T *sp; /* start of string matched by it */ 480*c271fa92SBaptiste Daroussin sopno ssub; /* start sop of subsubRE */ 481*c271fa92SBaptiste Daroussin sopno esub; /* end sop of subsubRE */ 482*c271fa92SBaptiste Daroussin const RCHAR_T *ssp; /* start of string matched by subsubRE */ 483*c271fa92SBaptiste Daroussin const RCHAR_T *dp; 484*c271fa92SBaptiste Daroussin size_t len; 485*c271fa92SBaptiste Daroussin int hard; 486*c271fa92SBaptiste Daroussin sop s; 487*c271fa92SBaptiste Daroussin RCHAR_T d; 488*c271fa92SBaptiste Daroussin regoff_t offsave; 489*c271fa92SBaptiste Daroussin cset *cs; 490f0957ccaSPeter Wemm 491f0957ccaSPeter Wemm AT("back", start, stop, startst, stopst); 492f0957ccaSPeter Wemm sp = start; 493f0957ccaSPeter Wemm 494f0957ccaSPeter Wemm /* get as far as we can with easy stuff */ 495f0957ccaSPeter Wemm hard = 0; 496f0957ccaSPeter Wemm for (ss = startst; !hard && ss < stopst; ss++) { 497f0957ccaSPeter Wemm s = m->g->strip[ss]; 498f0957ccaSPeter Wemm d = m->g->stripdata[ss]; 499f0957ccaSPeter Wemm switch (s) { 500f0957ccaSPeter Wemm case OCHAR: 501f0957ccaSPeter Wemm if (sp == stop || *sp++ != d) 502f0957ccaSPeter Wemm return(NULL); 503f0957ccaSPeter Wemm break; 504f0957ccaSPeter Wemm case OANY: 505f0957ccaSPeter Wemm if (sp == stop) 506f0957ccaSPeter Wemm return(NULL); 507f0957ccaSPeter Wemm sp++; 508f0957ccaSPeter Wemm break; 509f0957ccaSPeter Wemm case OANYOF: 510f0957ccaSPeter Wemm cs = &m->g->sets[d]; 511f0957ccaSPeter Wemm if (sp == stop || !CHIN(cs, *sp++)) 512f0957ccaSPeter Wemm return(NULL); 513f0957ccaSPeter Wemm break; 514f0957ccaSPeter Wemm case OBOL: 515f0957ccaSPeter Wemm if ( (sp == m->beginp && !(m->eflags®_NOTBOL)) || 516f0957ccaSPeter Wemm (sp < m->endp && *(sp-1) == '\n' && 517f0957ccaSPeter Wemm (m->g->cflags®_NEWLINE)) ) 518f0957ccaSPeter Wemm { /* yes */ } 519f0957ccaSPeter Wemm else 520f0957ccaSPeter Wemm return(NULL); 521f0957ccaSPeter Wemm break; 522f0957ccaSPeter Wemm case OEOL: 523f0957ccaSPeter Wemm if ( (sp == m->endp && !(m->eflags®_NOTEOL)) || 524f0957ccaSPeter Wemm (sp < m->endp && *sp == '\n' && 525f0957ccaSPeter Wemm (m->g->cflags®_NEWLINE)) ) 526f0957ccaSPeter Wemm { /* yes */ } 527f0957ccaSPeter Wemm else 528f0957ccaSPeter Wemm return(NULL); 529f0957ccaSPeter Wemm break; 530f0957ccaSPeter Wemm case OBOW: 531f0957ccaSPeter Wemm if (( (sp == m->beginp && !(m->eflags®_NOTBOL)) || 532f0957ccaSPeter Wemm (sp < m->endp && *(sp-1) == '\n' && 533f0957ccaSPeter Wemm (m->g->cflags®_NEWLINE)) || 534f0957ccaSPeter Wemm (sp > m->beginp && 535f0957ccaSPeter Wemm !ISWORD(*(sp-1))) ) && 536f0957ccaSPeter Wemm (sp < m->endp && ISWORD(*sp)) ) 537f0957ccaSPeter Wemm { /* yes */ } 538f0957ccaSPeter Wemm else 539f0957ccaSPeter Wemm return(NULL); 540f0957ccaSPeter Wemm break; 541f0957ccaSPeter Wemm case OEOW: 542f0957ccaSPeter Wemm if (( (sp == m->endp && !(m->eflags®_NOTEOL)) || 543f0957ccaSPeter Wemm (sp < m->endp && *sp == '\n' && 544f0957ccaSPeter Wemm (m->g->cflags®_NEWLINE)) || 545f0957ccaSPeter Wemm (sp < m->endp && !ISWORD(*sp)) ) && 546f0957ccaSPeter Wemm (sp > m->beginp && ISWORD(*(sp-1))) ) 547f0957ccaSPeter Wemm { /* yes */ } 548f0957ccaSPeter Wemm else 549f0957ccaSPeter Wemm return(NULL); 550f0957ccaSPeter Wemm break; 551f0957ccaSPeter Wemm case O_QUEST: 552f0957ccaSPeter Wemm break; 553f0957ccaSPeter Wemm case OOR1: /* matches null but needs to skip */ 554f0957ccaSPeter Wemm ss++; 555f0957ccaSPeter Wemm s = m->g->strip[ss]; 556f0957ccaSPeter Wemm d = m->g->stripdata[ss]; 557f0957ccaSPeter Wemm do { 558f0957ccaSPeter Wemm assert(s == OOR2); 559f0957ccaSPeter Wemm ss += d; 560f0957ccaSPeter Wemm s = m->g->strip[ss]; 561f0957ccaSPeter Wemm d = m->g->stripdata[ss]; 562f0957ccaSPeter Wemm } while (s != O_CH); 563f0957ccaSPeter Wemm /* note that the ss++ gets us past the O_CH */ 564f0957ccaSPeter Wemm break; 565f0957ccaSPeter Wemm default: /* have to make a choice */ 566f0957ccaSPeter Wemm hard = 1; 567f0957ccaSPeter Wemm break; 568f0957ccaSPeter Wemm } 569f0957ccaSPeter Wemm } 570f0957ccaSPeter Wemm if (!hard) { /* that was it! */ 571f0957ccaSPeter Wemm if (sp != stop) 572f0957ccaSPeter Wemm return(NULL); 573f0957ccaSPeter Wemm return(sp); 574f0957ccaSPeter Wemm } 575f0957ccaSPeter Wemm ss--; /* adjust for the for's final increment */ 576f0957ccaSPeter Wemm 577f0957ccaSPeter Wemm /* the hard stuff */ 578f0957ccaSPeter Wemm AT("hard", sp, stop, ss, stopst); 579f0957ccaSPeter Wemm s = m->g->strip[ss]; 580f0957ccaSPeter Wemm d = m->g->stripdata[ss]; 581f0957ccaSPeter Wemm switch (s) { 582f0957ccaSPeter Wemm case OBACK_: /* the vilest depths */ 583f0957ccaSPeter Wemm i = d; 584f0957ccaSPeter Wemm assert(0 < i && i <= m->g->nsub); 585f0957ccaSPeter Wemm if (m->pmatch[i].rm_eo == -1) 586f0957ccaSPeter Wemm return(NULL); 587f0957ccaSPeter Wemm assert(m->pmatch[i].rm_so != -1); 588f0957ccaSPeter Wemm len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; 589f0957ccaSPeter Wemm assert(stop - m->beginp >= len); 590f0957ccaSPeter Wemm if (sp > stop - len) 591f0957ccaSPeter Wemm return(NULL); /* not enough left to match */ 592f0957ccaSPeter Wemm ssp = m->offp + m->pmatch[i].rm_so; 593f0957ccaSPeter Wemm if (memcmp(sp, ssp, len) != 0) 594f0957ccaSPeter Wemm return(NULL); 595f0957ccaSPeter Wemm while (m->g->strip[ss] != O_BACK || m->g->stripdata[ss] != i) 596f0957ccaSPeter Wemm ss++; 597f0957ccaSPeter Wemm return(backref(m, sp+len, stop, ss+1, stopst, lev)); 598f0957ccaSPeter Wemm break; 599f0957ccaSPeter Wemm case OQUEST_: /* to null or not */ 600f0957ccaSPeter Wemm dp = backref(m, sp, stop, ss+1, stopst, lev); 601f0957ccaSPeter Wemm if (dp != NULL) 602f0957ccaSPeter Wemm return(dp); /* not */ 603f0957ccaSPeter Wemm return(backref(m, sp, stop, ss+d+1, stopst, lev)); 604f0957ccaSPeter Wemm break; 605f0957ccaSPeter Wemm case OPLUS_: 606f0957ccaSPeter Wemm assert(m->lastpos != NULL); 607f0957ccaSPeter Wemm assert(lev+1 <= m->g->nplus); 608f0957ccaSPeter Wemm m->lastpos[lev+1] = sp; 609f0957ccaSPeter Wemm return(backref(m, sp, stop, ss+1, stopst, lev+1)); 610f0957ccaSPeter Wemm break; 611f0957ccaSPeter Wemm case O_PLUS: 612f0957ccaSPeter Wemm if (sp == m->lastpos[lev]) /* last pass matched null */ 613f0957ccaSPeter Wemm return(backref(m, sp, stop, ss+1, stopst, lev-1)); 614f0957ccaSPeter Wemm /* try another pass */ 615f0957ccaSPeter Wemm m->lastpos[lev] = sp; 616f0957ccaSPeter Wemm dp = backref(m, sp, stop, ss-d+1, stopst, lev); 617f0957ccaSPeter Wemm if (dp == NULL) 618f0957ccaSPeter Wemm return(backref(m, sp, stop, ss+1, stopst, lev-1)); 619f0957ccaSPeter Wemm else 620f0957ccaSPeter Wemm return(dp); 621f0957ccaSPeter Wemm break; 622f0957ccaSPeter Wemm case OCH_: /* find the right one, if any */ 623f0957ccaSPeter Wemm ssub = ss + 1; 624f0957ccaSPeter Wemm esub = ss + d - 1; 625f0957ccaSPeter Wemm assert(m->g->strip[esub] == OOR1); 626f0957ccaSPeter Wemm for (;;) { /* find first matching branch */ 627f0957ccaSPeter Wemm dp = backref(m, sp, stop, ssub, esub, lev); 628f0957ccaSPeter Wemm if (dp != NULL) 629f0957ccaSPeter Wemm return(dp); 630f0957ccaSPeter Wemm /* that one missed, try next one */ 631f0957ccaSPeter Wemm if (m->g->strip[esub] == O_CH) 632f0957ccaSPeter Wemm return(NULL); /* there is none */ 633f0957ccaSPeter Wemm esub++; 634f0957ccaSPeter Wemm assert(m->g->strip[esub] == OOR2); 635f0957ccaSPeter Wemm ssub = esub + 1; 636f0957ccaSPeter Wemm esub += m->g->stripdata[esub]; 637f0957ccaSPeter Wemm if (m->g->strip[esub] == OOR2) 638f0957ccaSPeter Wemm esub--; 639f0957ccaSPeter Wemm else 640f0957ccaSPeter Wemm assert(m->g->strip[esub] == O_CH); 641f0957ccaSPeter Wemm } 642f0957ccaSPeter Wemm break; 643f0957ccaSPeter Wemm case OLPAREN: /* must undo assignment if rest fails */ 644f0957ccaSPeter Wemm i = d; 645f0957ccaSPeter Wemm assert(0 < i && i <= m->g->nsub); 646f0957ccaSPeter Wemm offsave = m->pmatch[i].rm_so; 647f0957ccaSPeter Wemm m->pmatch[i].rm_so = sp - m->offp; 648f0957ccaSPeter Wemm dp = backref(m, sp, stop, ss+1, stopst, lev); 649f0957ccaSPeter Wemm if (dp != NULL) 650f0957ccaSPeter Wemm return(dp); 651f0957ccaSPeter Wemm m->pmatch[i].rm_so = offsave; 652f0957ccaSPeter Wemm return(NULL); 653f0957ccaSPeter Wemm break; 654f0957ccaSPeter Wemm case ORPAREN: /* must undo assignment if rest fails */ 655f0957ccaSPeter Wemm i = d; 656f0957ccaSPeter Wemm assert(0 < i && i <= m->g->nsub); 657f0957ccaSPeter Wemm offsave = m->pmatch[i].rm_eo; 658f0957ccaSPeter Wemm m->pmatch[i].rm_eo = sp - m->offp; 659f0957ccaSPeter Wemm dp = backref(m, sp, stop, ss+1, stopst, lev); 660f0957ccaSPeter Wemm if (dp != NULL) 661f0957ccaSPeter Wemm return(dp); 662f0957ccaSPeter Wemm m->pmatch[i].rm_eo = offsave; 663f0957ccaSPeter Wemm return(NULL); 664f0957ccaSPeter Wemm break; 665f0957ccaSPeter Wemm default: /* uh oh */ 666f0957ccaSPeter Wemm assert(nope); 667f0957ccaSPeter Wemm break; 668f0957ccaSPeter Wemm } 669f0957ccaSPeter Wemm 670f0957ccaSPeter Wemm /* "can't happen" */ 671f0957ccaSPeter Wemm assert(nope); 672f0957ccaSPeter Wemm /* NOTREACHED */ 673f0957ccaSPeter Wemm return NULL; 674f0957ccaSPeter Wemm } 675f0957ccaSPeter Wemm 676f0957ccaSPeter Wemm /* 677f0957ccaSPeter Wemm - fast - step through the string at top speed 678f0957ccaSPeter Wemm */ 679f0957ccaSPeter Wemm static const RCHAR_T * /* where tentative match ended, or NULL */ 680*c271fa92SBaptiste Daroussin fast(struct match *m, const RCHAR_T *start, const RCHAR_T *stop, sopno startst, 681*c271fa92SBaptiste Daroussin sopno stopst) 682f0957ccaSPeter Wemm { 683*c271fa92SBaptiste Daroussin states st = m->st; 684*c271fa92SBaptiste Daroussin states fresh = m->fresh; 685*c271fa92SBaptiste Daroussin states tmp = m->tmp; 686*c271fa92SBaptiste Daroussin const RCHAR_T *p = start; 687*c271fa92SBaptiste Daroussin RCHAR_T c = (start == m->beginp) ? OUT : *(start-1); 688*c271fa92SBaptiste Daroussin RCHAR_T lastc; /* previous c */ 689*c271fa92SBaptiste Daroussin int flag; 690*c271fa92SBaptiste Daroussin int i; 691*c271fa92SBaptiste Daroussin const RCHAR_T *coldp; /* last p after which no match was underway */ 692f0957ccaSPeter Wemm 693f0957ccaSPeter Wemm CLEAR(st); 694f0957ccaSPeter Wemm SET1(st, startst); 695f0957ccaSPeter Wemm st = step(m->g, startst, stopst, st, NOTHING, OUT, st); 696f0957ccaSPeter Wemm ASSIGN(fresh, st); 697f0957ccaSPeter Wemm SP("start", st, *p); 698f0957ccaSPeter Wemm coldp = NULL; 699f0957ccaSPeter Wemm for (;;) { 700f0957ccaSPeter Wemm /* next character */ 701f0957ccaSPeter Wemm lastc = c; 702f0957ccaSPeter Wemm c = (p == m->endp) ? OUT : *p; 703f0957ccaSPeter Wemm if (EQ(st, fresh)) 704f0957ccaSPeter Wemm coldp = p; 705f0957ccaSPeter Wemm 706f0957ccaSPeter Wemm /* is there an EOL and/or BOL between lastc and c? */ 707f0957ccaSPeter Wemm flag = 0; 708f0957ccaSPeter Wemm i = 0; 709f0957ccaSPeter Wemm if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || 710f0957ccaSPeter Wemm (lastc == OUT && !(m->eflags®_NOTBOL)) ) { 711f0957ccaSPeter Wemm flag = BOL; 712f0957ccaSPeter Wemm i = m->g->nbol; 713f0957ccaSPeter Wemm } 714f0957ccaSPeter Wemm if ( (c == '\n' && m->g->cflags®_NEWLINE) || 715f0957ccaSPeter Wemm (c == OUT && !(m->eflags®_NOTEOL)) ) { 716f0957ccaSPeter Wemm flag = (flag == BOL) ? BOLEOL : EOL; 717f0957ccaSPeter Wemm i += m->g->neol; 718f0957ccaSPeter Wemm } 719f0957ccaSPeter Wemm if (i != 0) { 720f0957ccaSPeter Wemm for (; i > 0; i--) 721f0957ccaSPeter Wemm st = step(m->g, startst, stopst, st, flag, OUT, st); 722f0957ccaSPeter Wemm SP("boleol", st, c); 723f0957ccaSPeter Wemm } 724f0957ccaSPeter Wemm 725f0957ccaSPeter Wemm /* how about a word boundary? */ 726f0957ccaSPeter Wemm if ( (flag == BOL || (lastc != OUT && !ISWORD(lastc))) && 727f0957ccaSPeter Wemm (c != OUT && ISWORD(c)) ) { 728f0957ccaSPeter Wemm flag = BOW; 729f0957ccaSPeter Wemm } 730f0957ccaSPeter Wemm if ( (lastc != OUT && ISWORD(lastc)) && 731f0957ccaSPeter Wemm (flag == EOL || (c != OUT && !ISWORD(c))) ) { 732f0957ccaSPeter Wemm flag = EOW; 733f0957ccaSPeter Wemm } 734f0957ccaSPeter Wemm if (flag == BOW || flag == EOW) { 735f0957ccaSPeter Wemm st = step(m->g, startst, stopst, st, flag, OUT, st); 736f0957ccaSPeter Wemm SP("boweow", st, c); 737f0957ccaSPeter Wemm } 738f0957ccaSPeter Wemm 739f0957ccaSPeter Wemm /* are we done? */ 740f0957ccaSPeter Wemm if (ISSET(st, stopst) || p == stop) 741f0957ccaSPeter Wemm break; /* NOTE BREAK OUT */ 742f0957ccaSPeter Wemm 743f0957ccaSPeter Wemm /* no, we must deal with this character */ 744f0957ccaSPeter Wemm ASSIGN(tmp, st); 745f0957ccaSPeter Wemm ASSIGN(st, fresh); 746f0957ccaSPeter Wemm assert(c != OUT); 747f0957ccaSPeter Wemm st = step(m->g, startst, stopst, tmp, 0, c, st); 748f0957ccaSPeter Wemm SP("aft", st, c); 749f0957ccaSPeter Wemm assert(EQ(step(m->g, startst, stopst, st, NOTHING, OUT, st), st)); 750f0957ccaSPeter Wemm p++; 751f0957ccaSPeter Wemm } 752f0957ccaSPeter Wemm 753f0957ccaSPeter Wemm assert(coldp != NULL); 754f0957ccaSPeter Wemm m->coldp = coldp; 755f0957ccaSPeter Wemm if (ISSET(st, stopst)) 756f0957ccaSPeter Wemm return(p+1); 757f0957ccaSPeter Wemm else 758f0957ccaSPeter Wemm return(NULL); 759f0957ccaSPeter Wemm } 760f0957ccaSPeter Wemm 761f0957ccaSPeter Wemm /* 762f0957ccaSPeter Wemm - slow - step through the string more deliberately 763f0957ccaSPeter Wemm */ 764f0957ccaSPeter Wemm static const RCHAR_T * /* where it ended */ 765*c271fa92SBaptiste Daroussin slow(struct match *m, const RCHAR_T *start, const RCHAR_T *stop, sopno startst, 766*c271fa92SBaptiste Daroussin sopno stopst) 767f0957ccaSPeter Wemm { 768*c271fa92SBaptiste Daroussin states st = m->st; 769*c271fa92SBaptiste Daroussin states empty = m->empty; 770*c271fa92SBaptiste Daroussin states tmp = m->tmp; 771*c271fa92SBaptiste Daroussin const RCHAR_T *p = start; 772*c271fa92SBaptiste Daroussin RCHAR_T c = (start == m->beginp) ? OUT : *(start-1); 773*c271fa92SBaptiste Daroussin RCHAR_T lastc; /* previous c */ 774*c271fa92SBaptiste Daroussin int flag; 775*c271fa92SBaptiste Daroussin int i; 776*c271fa92SBaptiste Daroussin const RCHAR_T *matchp; /* last p at which a match ended */ 777f0957ccaSPeter Wemm 778f0957ccaSPeter Wemm AT("slow", start, stop, startst, stopst); 779f0957ccaSPeter Wemm CLEAR(st); 780f0957ccaSPeter Wemm SET1(st, startst); 781f0957ccaSPeter Wemm SP("sstart", st, *p); 782f0957ccaSPeter Wemm st = step(m->g, startst, stopst, st, NOTHING, OUT, st); 783f0957ccaSPeter Wemm matchp = NULL; 784f0957ccaSPeter Wemm for (;;) { 785f0957ccaSPeter Wemm /* next character */ 786f0957ccaSPeter Wemm lastc = c; 787f0957ccaSPeter Wemm c = (p == m->endp) ? OUT : *p; 788f0957ccaSPeter Wemm 789f0957ccaSPeter Wemm /* is there an EOL and/or BOL between lastc and c? */ 790f0957ccaSPeter Wemm flag = 0; 791f0957ccaSPeter Wemm i = 0; 792f0957ccaSPeter Wemm if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || 793f0957ccaSPeter Wemm (lastc == OUT && !(m->eflags®_NOTBOL)) ) { 794f0957ccaSPeter Wemm flag = BOL; 795f0957ccaSPeter Wemm i = m->g->nbol; 796f0957ccaSPeter Wemm } 797f0957ccaSPeter Wemm if ( (c == '\n' && m->g->cflags®_NEWLINE) || 798f0957ccaSPeter Wemm (c == OUT && !(m->eflags®_NOTEOL)) ) { 799f0957ccaSPeter Wemm flag = (flag == BOL) ? BOLEOL : EOL; 800f0957ccaSPeter Wemm i += m->g->neol; 801f0957ccaSPeter Wemm } 802f0957ccaSPeter Wemm if (i != 0) { 803f0957ccaSPeter Wemm for (; i > 0; i--) 804f0957ccaSPeter Wemm st = step(m->g, startst, stopst, st, flag, OUT, st); 805f0957ccaSPeter Wemm SP("sboleol", st, c); 806f0957ccaSPeter Wemm } 807f0957ccaSPeter Wemm 808f0957ccaSPeter Wemm /* how about a word boundary? */ 809f0957ccaSPeter Wemm if ( (flag == BOL || (lastc != OUT && !ISWORD(lastc))) && 810f0957ccaSPeter Wemm (c != OUT && ISWORD(c)) ) { 811f0957ccaSPeter Wemm flag = BOW; 812f0957ccaSPeter Wemm } 813f0957ccaSPeter Wemm if ( (lastc != OUT && ISWORD(lastc)) && 814f0957ccaSPeter Wemm (flag == EOL || (c != OUT && !ISWORD(c))) ) { 815f0957ccaSPeter Wemm flag = EOW; 816f0957ccaSPeter Wemm } 817f0957ccaSPeter Wemm if (flag == BOW || flag == EOW) { 818f0957ccaSPeter Wemm st = step(m->g, startst, stopst, st, flag, OUT, st); 819f0957ccaSPeter Wemm SP("sboweow", st, c); 820f0957ccaSPeter Wemm } 821f0957ccaSPeter Wemm 822f0957ccaSPeter Wemm /* are we done? */ 823f0957ccaSPeter Wemm if (ISSET(st, stopst)) 824f0957ccaSPeter Wemm matchp = p; 825f0957ccaSPeter Wemm if (EQ(st, empty) || p == stop) 826f0957ccaSPeter Wemm break; /* NOTE BREAK OUT */ 827f0957ccaSPeter Wemm 828f0957ccaSPeter Wemm /* no, we must deal with this character */ 829f0957ccaSPeter Wemm ASSIGN(tmp, st); 830f0957ccaSPeter Wemm ASSIGN(st, empty); 831f0957ccaSPeter Wemm assert(c != OUT); 832f0957ccaSPeter Wemm st = step(m->g, startst, stopst, tmp, 0, c, st); 833f0957ccaSPeter Wemm SP("saft", st, c); 834f0957ccaSPeter Wemm assert(EQ(step(m->g, startst, stopst, st, NOTHING, OUT, st), st)); 835f0957ccaSPeter Wemm p++; 836f0957ccaSPeter Wemm } 837f0957ccaSPeter Wemm 838f0957ccaSPeter Wemm return(matchp); 839f0957ccaSPeter Wemm } 840f0957ccaSPeter Wemm 841f0957ccaSPeter Wemm 842f0957ccaSPeter Wemm /* 843f0957ccaSPeter Wemm - step - map set of states reachable before char to set reachable after 844f0957ccaSPeter Wemm */ 845f0957ccaSPeter Wemm static states 846*c271fa92SBaptiste Daroussin step(struct re_guts *g, 847*c271fa92SBaptiste Daroussin sopno start, /* start state within strip */ 848*c271fa92SBaptiste Daroussin sopno stop, /* state after stop state within strip */ 849*c271fa92SBaptiste Daroussin states bef, /* states reachable before */ 850*c271fa92SBaptiste Daroussin int flag, /* NONCHAR flag */ 851*c271fa92SBaptiste Daroussin RCHAR_T ch, /* character code */ 852*c271fa92SBaptiste Daroussin states aft) /* states already known reachable after */ 853f0957ccaSPeter Wemm { 854*c271fa92SBaptiste Daroussin cset *cs; 855*c271fa92SBaptiste Daroussin sop s; 856*c271fa92SBaptiste Daroussin RCHAR_T d; 857*c271fa92SBaptiste Daroussin sopno pc; 858*c271fa92SBaptiste Daroussin onestate here; /* note, macros know this name */ 859*c271fa92SBaptiste Daroussin sopno look; 860*c271fa92SBaptiste Daroussin int i; 861f0957ccaSPeter Wemm 862f0957ccaSPeter Wemm for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { 863f0957ccaSPeter Wemm s = g->strip[pc]; 864f0957ccaSPeter Wemm d = g->stripdata[pc]; 865f0957ccaSPeter Wemm switch (s) { 866f0957ccaSPeter Wemm case OEND: 867f0957ccaSPeter Wemm assert(pc == stop-1); 868f0957ccaSPeter Wemm break; 869f0957ccaSPeter Wemm case OCHAR: 870f0957ccaSPeter Wemm /* only characters can match */ 871f0957ccaSPeter Wemm assert(!flag || ch != d); 872f0957ccaSPeter Wemm if (ch == d) 873f0957ccaSPeter Wemm FWD(aft, bef, 1); 874f0957ccaSPeter Wemm break; 875f0957ccaSPeter Wemm case OBOL: 876f0957ccaSPeter Wemm if (flag == BOL || flag == BOLEOL) 877f0957ccaSPeter Wemm FWD(aft, bef, 1); 878f0957ccaSPeter Wemm break; 879f0957ccaSPeter Wemm case OEOL: 880f0957ccaSPeter Wemm if (flag == EOL || flag == BOLEOL) 881f0957ccaSPeter Wemm FWD(aft, bef, 1); 882f0957ccaSPeter Wemm break; 883f0957ccaSPeter Wemm case OBOW: 884f0957ccaSPeter Wemm if (flag == BOW) 885f0957ccaSPeter Wemm FWD(aft, bef, 1); 886f0957ccaSPeter Wemm break; 887f0957ccaSPeter Wemm case OEOW: 888f0957ccaSPeter Wemm if (flag == EOW) 889f0957ccaSPeter Wemm FWD(aft, bef, 1); 890f0957ccaSPeter Wemm break; 891f0957ccaSPeter Wemm case OANY: 892f0957ccaSPeter Wemm if (!flag) 893f0957ccaSPeter Wemm FWD(aft, bef, 1); 894f0957ccaSPeter Wemm break; 895f0957ccaSPeter Wemm case OANYOF: 896f0957ccaSPeter Wemm cs = &g->sets[d]; 897f0957ccaSPeter Wemm if (!flag && CHIN(cs, ch)) 898f0957ccaSPeter Wemm FWD(aft, bef, 1); 899f0957ccaSPeter Wemm break; 900f0957ccaSPeter Wemm case OBACK_: /* ignored here */ 901f0957ccaSPeter Wemm case O_BACK: 902f0957ccaSPeter Wemm FWD(aft, aft, 1); 903f0957ccaSPeter Wemm break; 904f0957ccaSPeter Wemm case OPLUS_: /* forward, this is just an empty */ 905f0957ccaSPeter Wemm FWD(aft, aft, 1); 906f0957ccaSPeter Wemm break; 907f0957ccaSPeter Wemm case O_PLUS: /* both forward and back */ 908f0957ccaSPeter Wemm FWD(aft, aft, 1); 909f0957ccaSPeter Wemm i = ISSETBACK(aft, d); 910f0957ccaSPeter Wemm BACK(aft, aft, d); 911f0957ccaSPeter Wemm if (!i && ISSETBACK(aft, d)) { 912f0957ccaSPeter Wemm /* oho, must reconsider loop body */ 913f0957ccaSPeter Wemm pc -= d + 1; 914f0957ccaSPeter Wemm INIT(here, pc); 915f0957ccaSPeter Wemm } 916f0957ccaSPeter Wemm break; 917f0957ccaSPeter Wemm case OQUEST_: /* two branches, both forward */ 918f0957ccaSPeter Wemm FWD(aft, aft, 1); 919f0957ccaSPeter Wemm FWD(aft, aft, d); 920f0957ccaSPeter Wemm break; 921f0957ccaSPeter Wemm case O_QUEST: /* just an empty */ 922f0957ccaSPeter Wemm FWD(aft, aft, 1); 923f0957ccaSPeter Wemm break; 924f0957ccaSPeter Wemm case OLPAREN: /* not significant here */ 925f0957ccaSPeter Wemm case ORPAREN: 926f0957ccaSPeter Wemm FWD(aft, aft, 1); 927f0957ccaSPeter Wemm break; 928f0957ccaSPeter Wemm case OCH_: /* mark the first two branches */ 929f0957ccaSPeter Wemm FWD(aft, aft, 1); 930f0957ccaSPeter Wemm assert(OP(g->strip[pc+d]) == OOR2); 931f0957ccaSPeter Wemm FWD(aft, aft, d); 932f0957ccaSPeter Wemm break; 933f0957ccaSPeter Wemm case OOR1: /* done a branch, find the O_CH */ 934f0957ccaSPeter Wemm if (ISSTATEIN(aft, here)) { 935f0957ccaSPeter Wemm for (look = 1; /**/; look += d) { 936f0957ccaSPeter Wemm s = g->strip[pc+look]; 937f0957ccaSPeter Wemm d = g->stripdata[pc+look]; 938f0957ccaSPeter Wemm if (s == O_CH) 939f0957ccaSPeter Wemm break; 940f0957ccaSPeter Wemm assert(s == OOR2); 941f0957ccaSPeter Wemm } 942f0957ccaSPeter Wemm FWD(aft, aft, look); 943f0957ccaSPeter Wemm } 944f0957ccaSPeter Wemm break; 945f0957ccaSPeter Wemm case OOR2: /* propagate OCH_'s marking */ 946f0957ccaSPeter Wemm FWD(aft, aft, 1); 947f0957ccaSPeter Wemm if (g->strip[pc+d] != O_CH) { 948f0957ccaSPeter Wemm assert(g->strip[pc+d] == OOR2); 949f0957ccaSPeter Wemm FWD(aft, aft, d); 950f0957ccaSPeter Wemm } 951f0957ccaSPeter Wemm break; 952f0957ccaSPeter Wemm case O_CH: /* just empty */ 953f0957ccaSPeter Wemm FWD(aft, aft, 1); 954f0957ccaSPeter Wemm break; 955f0957ccaSPeter Wemm default: /* ooooops... */ 956f0957ccaSPeter Wemm assert(nope); 957f0957ccaSPeter Wemm break; 958f0957ccaSPeter Wemm } 959f0957ccaSPeter Wemm } 960f0957ccaSPeter Wemm 961f0957ccaSPeter Wemm return(aft); 962f0957ccaSPeter Wemm } 963f0957ccaSPeter Wemm 964f0957ccaSPeter Wemm #ifdef REDEBUG 965f0957ccaSPeter Wemm /* 966f0957ccaSPeter Wemm - print - print a set of states 967f0957ccaSPeter Wemm */ 968f0957ccaSPeter Wemm static void 969*c271fa92SBaptiste Daroussin print(struct match *m, char *caption, states st, int ch, FILE *d) 970f0957ccaSPeter Wemm { 971*c271fa92SBaptiste Daroussin struct re_guts *g = m->g; 972*c271fa92SBaptiste Daroussin int i; 973*c271fa92SBaptiste Daroussin int first = 1; 974f0957ccaSPeter Wemm 975f0957ccaSPeter Wemm if (!(m->eflags®_TRACE)) 976f0957ccaSPeter Wemm return; 977f0957ccaSPeter Wemm 978f0957ccaSPeter Wemm fprintf(d, "%s", caption); 979f0957ccaSPeter Wemm if (ch != '\0') 980f0957ccaSPeter Wemm fprintf(d, " %s", pchar(ch)); 981f0957ccaSPeter Wemm for (i = 0; i < g->nstates; i++) 982f0957ccaSPeter Wemm if (ISSET(st, i)) { 983f0957ccaSPeter Wemm fprintf(d, "%s%d", (first) ? "\t" : ", ", i); 984f0957ccaSPeter Wemm first = 0; 985f0957ccaSPeter Wemm } 986f0957ccaSPeter Wemm fprintf(d, "\n"); 987f0957ccaSPeter Wemm } 988f0957ccaSPeter Wemm 989f0957ccaSPeter Wemm /* 990f0957ccaSPeter Wemm - at - print current situation 991f0957ccaSPeter Wemm */ 992f0957ccaSPeter Wemm static void 993*c271fa92SBaptiste Daroussin at(struct match *m, char *title, char *start, char *stop, sopno startst, 994*c271fa92SBaptiste Daroussin sopno stopst) 995f0957ccaSPeter Wemm { 996f0957ccaSPeter Wemm if (!(m->eflags®_TRACE)) 997f0957ccaSPeter Wemm return; 998f0957ccaSPeter Wemm 999f0957ccaSPeter Wemm printf("%s %s-", title, pchar(*start)); 1000f0957ccaSPeter Wemm printf("%s ", pchar(*stop)); 1001f0957ccaSPeter Wemm printf("%ld-%ld\n", (long)startst, (long)stopst); 1002f0957ccaSPeter Wemm } 1003f0957ccaSPeter Wemm 1004f0957ccaSPeter Wemm #ifndef PCHARDONE 1005f0957ccaSPeter Wemm #define PCHARDONE /* never again */ 1006f0957ccaSPeter Wemm /* 1007f0957ccaSPeter Wemm - pchar - make a character printable 1008f0957ccaSPeter Wemm * 1009f0957ccaSPeter Wemm * Is this identical to regchar() over in debug.c? Well, yes. But a 1010f0957ccaSPeter Wemm * duplicate here avoids having a debugging-capable regexec.o tied to 1011f0957ccaSPeter Wemm * a matching debug.o, and this is convenient. It all disappears in 1012f0957ccaSPeter Wemm * the non-debug compilation anyway, so it doesn't matter much. 1013f0957ccaSPeter Wemm */ 1014f0957ccaSPeter Wemm static char * /* -> representation */ 1015*c271fa92SBaptiste Daroussin pchar(int ch) 1016f0957ccaSPeter Wemm { 1017f0957ccaSPeter Wemm static char pbuf[10]; 1018f0957ccaSPeter Wemm 1019f0957ccaSPeter Wemm if (isprint(ch) || ch == ' ') 1020f0957ccaSPeter Wemm snprintf(pbuf, sizeof(pbuf), "%c", ch); 1021f0957ccaSPeter Wemm else 1022f0957ccaSPeter Wemm snprintf(pbuf, sizeof(pbuf), "\\%o", ch); 1023f0957ccaSPeter Wemm return(pbuf); 1024f0957ccaSPeter Wemm } 1025f0957ccaSPeter Wemm #endif 1026f0957ccaSPeter Wemm #endif 1027f0957ccaSPeter Wemm 1028f0957ccaSPeter Wemm #undef matcher 1029f0957ccaSPeter Wemm #undef fast 1030f0957ccaSPeter Wemm #undef slow 1031f0957ccaSPeter Wemm #undef dissect 1032f0957ccaSPeter Wemm #undef backref 1033f0957ccaSPeter Wemm #undef step 1034f0957ccaSPeter Wemm #undef print 1035f0957ccaSPeter Wemm #undef at 1036f0957ccaSPeter Wemm #undef match 1037