1 /* $NetBSD: engine.c,v 1.7 2011/11/19 17:45:11 tnozaki Exp $ */ 2 3 /*- 4 * Copyright (c) 1992, 1993, 1994 Henry Spencer. 5 * Copyright (c) 1992, 1993, 1994 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Henry Spencer of the University of Toronto. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * @(#)engine.c 8.4 (Berkeley) 3/19/94 40 */ 41 42 /* 43 * The matching engine and friends. This file is #included by regexec.c 44 * after suitable #defines of a variety of macros used herein, so that 45 * different state representations can be used without duplicating masses 46 * of code. 47 */ 48 49 #ifdef SNAMES 50 #define matcher smatcher 51 #define fast sfast 52 #define slow sslow 53 #define dissect sdissect 54 #define backref sbackref 55 #define step sstep 56 #define print sprint 57 #define at sat 58 #define match smat 59 #endif 60 #ifdef LNAMES 61 #define matcher lmatcher 62 #define fast lfast 63 #define slow lslow 64 #define dissect ldissect 65 #define backref lbackref 66 #define step lstep 67 #define print lprint 68 #define at lat 69 #define match lmat 70 #endif 71 72 /* another structure passed up and down to avoid zillions of parameters */ 73 struct match { 74 struct re_guts *g; 75 int eflags; 76 regmatch_t *pmatch; /* [nsub+1] (0 element unused) */ 77 const RCHAR_T *offp; /* offsets work from here */ 78 const RCHAR_T *beginp; /* start of string -- virtual NUL precedes */ 79 const RCHAR_T *endp; /* end of string -- virtual NUL here */ 80 const RCHAR_T *coldp; /* can be no match starting before here */ 81 const RCHAR_T **lastpos; /* [nplus+1] */ 82 STATEVARS; 83 states st; /* current states */ 84 states fresh; /* states for a fresh start */ 85 states tmp; /* temporary */ 86 states empty; /* empty set of states */ 87 }; 88 89 /* ========= begin header generated by ./mkh ========= */ 90 #ifdef __cplusplus 91 extern "C" { 92 #endif 93 94 /* === engine.c === */ 95 static int matcher __P((struct re_guts *g, const RCHAR_T *string, size_t nmatch, regmatch_t pmatch[], int eflags)); 96 static const RCHAR_T *dissect __P((struct match *m, const RCHAR_T *start, const RCHAR_T *stop, sopno startst, sopno stopst)); 97 static const RCHAR_T *backref __P((struct match *m, const RCHAR_T *start, const RCHAR_T *stop, sopno startst, sopno stopst, sopno lev)); 98 static const RCHAR_T *fast __P((struct match *m, const RCHAR_T *start, const RCHAR_T *stop, sopno startst, sopno stopst)); 99 static const RCHAR_T *slow __P((struct match *m, const RCHAR_T *start, const RCHAR_T *stop, sopno startst, sopno stopst)); 100 static states step __P((struct re_guts *g, sopno start, sopno stop, states bef, int flag, RCHAR_T ch, states aft)); 101 #define BOL (1) 102 #define EOL (BOL+1) 103 #define BOLEOL (BOL+2) 104 #define NOTHING (BOL+3) 105 #define BOW (BOL+4) 106 #define EOW (BOL+5) 107 #ifdef REDEBUG 108 static void print __P((struct match *m, char *caption, states st, int ch, FILE *d)); 109 #endif 110 #ifdef REDEBUG 111 static void at __P((struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst)); 112 #endif 113 #ifdef REDEBUG 114 static char *pchar __P((int ch)); 115 #endif 116 117 #ifdef __cplusplus 118 } 119 #endif 120 /* ========= end header generated by ./mkh ========= */ 121 122 #ifdef REDEBUG 123 #define SP(t, s, c) print(m, t, s, c, stdout) 124 #define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2) 125 #define NOTE(str) { if (m->eflags®_TRACE) printf("=%s\n", (str)); } 126 #else 127 #define SP(t, s, c) /* nothing */ 128 #define AT(t, p1, p2, s1, s2) /* nothing */ 129 #define NOTE(s) /* nothing */ 130 #endif 131 132 /* 133 - matcher - the actual matching engine 134 == static int matcher(register struct re_guts *g, const RCHAR_T *string, \ 135 == size_t nmatch, regmatch_t pmatch[], int eflags); 136 */ 137 static int /* 0 success, REG_NOMATCH failure */ 138 matcher(g, string, nmatch, pmatch, eflags) 139 register struct re_guts *g; 140 const RCHAR_T *string; 141 size_t nmatch; 142 regmatch_t pmatch[]; 143 int eflags; 144 { 145 register const RCHAR_T *endp; 146 register size_t i; 147 struct match mv; 148 register struct match *m = &mv; 149 register const RCHAR_T *dp; 150 register const sopno gf = g->firststate+1; /* +1 for OEND */ 151 register const sopno gl = g->laststate; 152 const RCHAR_T *start; 153 const RCHAR_T *stop; 154 155 /* simplify the situation where possible */ 156 if (g->cflags®_NOSUB) 157 nmatch = 0; 158 if (eflags®_STARTEND) { 159 start = string + pmatch[0].rm_so; 160 stop = string + pmatch[0].rm_eo; 161 } else { 162 start = string; 163 stop = start + STRLEN(start); 164 } 165 if (stop < start) 166 return(REG_INVARG); 167 168 /* prescreening; this does wonders for this rather slow code */ 169 if (g->must != NULL) { 170 for (dp = start; dp < stop; dp++) 171 if (*dp == g->must[0] && (size_t)(stop - dp) >= g->mlen && 172 MEMCMP(dp, g->must, g->mlen) == 0) 173 break; 174 if (dp == stop) /* we didn't find g->must */ 175 return(REG_NOMATCH); 176 } 177 178 /* match struct setup */ 179 m->g = g; 180 m->eflags = eflags; 181 m->pmatch = NULL; 182 m->lastpos = NULL; 183 m->offp = string; 184 m->beginp = start; 185 m->endp = stop; 186 STATESETUP(m, 4); 187 SETUP(m->st); 188 SETUP(m->fresh); 189 SETUP(m->tmp); 190 SETUP(m->empty); 191 CLEAR(m->empty); 192 193 /* this loop does only one repetition except for backrefs */ 194 for (;;) { 195 endp = fast(m, start, stop, gf, gl); 196 if (endp == NULL) { /* a miss */ 197 STATETEARDOWN(m); 198 return(REG_NOMATCH); 199 } 200 if (nmatch == 0 && !g->backrefs) 201 break; /* no further info needed */ 202 203 /* where? */ 204 assert(m->coldp != NULL); 205 for (;;) { 206 NOTE("finding start"); 207 endp = slow(m, m->coldp, stop, gf, gl); 208 if (endp != NULL) 209 break; 210 assert(m->coldp < m->endp); 211 m->coldp++; 212 } 213 if (nmatch == 1 && !g->backrefs) 214 break; /* no further info needed */ 215 216 /* oh my, he wants the subexpressions... */ 217 if (m->pmatch == NULL) 218 m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) * 219 sizeof(regmatch_t)); 220 if (m->pmatch == NULL) { 221 STATETEARDOWN(m); 222 return(REG_ESPACE); 223 } 224 for (i = 1; i <= m->g->nsub; i++) 225 m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; 226 if (!g->backrefs && !(m->eflags®_BACKR)) { 227 NOTE("dissecting"); 228 dp = dissect(m, m->coldp, endp, gf, gl); 229 } else { 230 if (g->nplus > 0 && m->lastpos == NULL) 231 m->lastpos = (const RCHAR_T **)malloc((g->nplus+1) * 232 sizeof(const RCHAR_T *)); 233 if (g->nplus > 0 && m->lastpos == NULL) { 234 free(m->pmatch); 235 STATETEARDOWN(m); 236 return(REG_ESPACE); 237 } 238 NOTE("backref dissect"); 239 dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); 240 } 241 if (dp != NULL) 242 break; 243 244 /* uh-oh... we couldn't find a subexpression-level match */ 245 assert(g->backrefs); /* must be back references doing it */ 246 assert(g->nplus == 0 || m->lastpos != NULL); 247 for (;;) { 248 if (dp != NULL || endp <= m->coldp) 249 break; /* defeat */ 250 NOTE("backoff"); 251 endp = slow(m, m->coldp, endp-1, gf, gl); 252 if (endp == NULL) 253 break; /* defeat */ 254 /* try it on a shorter possibility */ 255 #ifndef NDEBUG 256 for (i = 1; i <= m->g->nsub; i++) { 257 assert(m->pmatch[i].rm_so == -1); 258 assert(m->pmatch[i].rm_eo == -1); 259 } 260 #endif 261 NOTE("backoff dissect"); 262 dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); 263 } 264 assert(dp == NULL || dp == endp); 265 if (dp != NULL) /* found a shorter one */ 266 break; 267 268 /* despite initial appearances, there is no match here */ 269 NOTE("false alarm"); 270 start = m->coldp + 1; /* recycle starting later */ 271 assert(start <= stop); 272 } 273 274 /* fill in the details if requested */ 275 if (nmatch > 0) { 276 pmatch[0].rm_so = m->coldp - m->offp; 277 pmatch[0].rm_eo = endp - m->offp; 278 } 279 if (nmatch > 1) { 280 assert(m->pmatch != NULL); 281 for (i = 1; i < nmatch; i++) 282 if (i <= m->g->nsub) 283 pmatch[i] = m->pmatch[i]; 284 else { 285 pmatch[i].rm_so = -1; 286 pmatch[i].rm_eo = -1; 287 } 288 } 289 290 if (m->pmatch != NULL) 291 free((char *)m->pmatch); 292 if (m->lastpos != NULL) 293 free((char *)m->lastpos); 294 STATETEARDOWN(m); 295 return(0); 296 } 297 298 /* 299 - dissect - figure out what matched what, no back references 300 == static const RCHAR_T *dissect(register struct match *m, const RCHAR_T *start, \ 301 == const RCHAR_T *stop, sopno startst, sopno stopst); 302 */ 303 static const RCHAR_T * /* == stop (success) always */ 304 dissect(m, start, stop, startst, stopst) 305 register struct match *m; 306 const RCHAR_T *start; 307 const RCHAR_T *stop; 308 sopno startst; 309 sopno stopst; 310 { 311 register int i; 312 register sopno ss; /* start sop of current subRE */ 313 register sopno es; /* end sop of current subRE */ 314 register const RCHAR_T *sp; /* start of string matched by it */ 315 register const RCHAR_T *stp; /* string matched by it cannot pass here */ 316 register const RCHAR_T *rest; /* start of rest of string */ 317 register const RCHAR_T *tail; /* string unmatched by rest of RE */ 318 register sopno ssub; /* start sop of subsubRE */ 319 register sopno esub; /* end sop of subsubRE */ 320 register const RCHAR_T *ssp; /* start of string matched by subsubRE */ 321 register const RCHAR_T *sep; /* end of string matched by subsubRE */ 322 register const RCHAR_T *oldssp; /* previous ssp */ 323 register const RCHAR_T *dp; 324 325 AT("diss", start, stop, startst, stopst); 326 sp = start; 327 for (ss = startst; ss < stopst; ss = es) { 328 /* identify end of subRE */ 329 es = ss; 330 switch (m->g->strip[es]) { 331 case OPLUS_: 332 case OQUEST_: 333 es += m->g->stripdata[es]; 334 break; 335 case OCH_: 336 while (m->g->strip[es] != O_CH) 337 es += m->g->stripdata[es]; 338 break; 339 } 340 es++; 341 342 /* figure out what it matched */ 343 switch (m->g->strip[ss]) { 344 case OEND: 345 assert(nope); 346 break; 347 case OCHAR: 348 sp++; 349 break; 350 case OBOL: 351 case OEOL: 352 case OBOW: 353 case OEOW: 354 break; 355 case OANY: 356 case OANYOF: 357 sp++; 358 break; 359 case OBACK_: 360 case O_BACK: 361 assert(nope); 362 break; 363 /* cases where length of match is hard to find */ 364 case OQUEST_: 365 stp = stop; 366 for (;;) { 367 /* how long could this one be? */ 368 rest = slow(m, sp, stp, ss, es); 369 assert(rest != NULL); /* it did match */ 370 /* could the rest match the rest? */ 371 tail = slow(m, rest, stop, es, stopst); 372 if (tail == stop) 373 break; /* yes! */ 374 /* no -- try a shorter match for this one */ 375 stp = rest - 1; 376 assert(stp >= sp); /* it did work */ 377 } 378 ssub = ss + 1; 379 esub = es - 1; 380 /* did innards match? */ 381 if (slow(m, sp, rest, ssub, esub) != NULL) { 382 dp = dissect(m, sp, rest, ssub, esub); 383 assert(dp == rest); 384 } else /* no */ 385 assert(sp == rest); 386 sp = rest; 387 break; 388 case OPLUS_: 389 stp = stop; 390 for (;;) { 391 /* how long could this one be? */ 392 rest = slow(m, sp, stp, ss, es); 393 assert(rest != NULL); /* it did match */ 394 /* could the rest match the rest? */ 395 tail = slow(m, rest, stop, es, stopst); 396 if (tail == stop) 397 break; /* yes! */ 398 /* no -- try a shorter match for this one */ 399 stp = rest - 1; 400 assert(stp >= sp); /* it did work */ 401 } 402 ssub = ss + 1; 403 esub = es - 1; 404 ssp = sp; 405 oldssp = ssp; 406 for (;;) { /* find last match of innards */ 407 sep = slow(m, ssp, rest, ssub, esub); 408 if (sep == NULL || sep == ssp) 409 break; /* failed or matched null */ 410 oldssp = ssp; /* on to next try */ 411 ssp = sep; 412 } 413 if (sep == NULL) { 414 /* last successful match */ 415 sep = ssp; 416 ssp = oldssp; 417 } 418 assert(sep == rest); /* must exhaust substring */ 419 assert(slow(m, ssp, sep, ssub, esub) == rest); 420 dp = dissect(m, ssp, sep, ssub, esub); 421 assert(dp == sep); 422 sp = rest; 423 break; 424 case OCH_: 425 stp = stop; 426 for (;;) { 427 /* how long could this one be? */ 428 rest = slow(m, sp, stp, ss, es); 429 assert(rest != NULL); /* it did match */ 430 /* could the rest match the rest? */ 431 tail = slow(m, rest, stop, es, stopst); 432 if (tail == stop) 433 break; /* yes! */ 434 /* no -- try a shorter match for this one */ 435 stp = rest - 1; 436 assert(stp >= sp); /* it did work */ 437 } 438 ssub = ss + 1; 439 esub = ss + m->g->stripdata[ss] - 1; 440 assert(m->g->strip[esub] == OOR1); 441 for (;;) { /* find first matching branch */ 442 if (slow(m, sp, rest, ssub, esub) == rest) 443 break; /* it matched all of it */ 444 /* that one missed, try next one */ 445 assert(m->g->strip[esub] == OOR1); 446 esub++; 447 assert(m->g->strip[esub] == OOR2); 448 ssub = esub + 1; 449 esub += m->g->stripdata[esub]; 450 if (m->g->strip[esub] == OOR2) 451 esub--; 452 else 453 assert(m->g->strip[esub] == O_CH); 454 } 455 dp = dissect(m, sp, rest, ssub, esub); 456 assert(dp == rest); 457 sp = rest; 458 break; 459 case O_PLUS: 460 case O_QUEST: 461 case OOR1: 462 case OOR2: 463 case O_CH: 464 assert(nope); 465 break; 466 case OLPAREN: 467 i = m->g->stripdata[ss]; 468 assert(0 < i && i <= m->g->nsub); 469 m->pmatch[i].rm_so = sp - m->offp; 470 break; 471 case ORPAREN: 472 i = m->g->stripdata[ss]; 473 assert(0 < i && i <= m->g->nsub); 474 m->pmatch[i].rm_eo = sp - m->offp; 475 break; 476 default: /* uh oh */ 477 assert(nope); 478 break; 479 } 480 } 481 482 assert(sp == stop); 483 return(sp); 484 } 485 486 /* 487 - backref - figure out what matched what, figuring in back references 488 == static const RCHAR_T *backref(register struct match *m, const RCHAR_T *start, \ 489 == const RCHAR_T *stop, sopno startst, sopno stopst, sopno lev); 490 */ 491 static const RCHAR_T * /* == stop (success) or NULL (failure) */ 492 backref(m, start, stop, startst, stopst, lev) 493 register struct match *m; 494 const RCHAR_T *start; 495 const RCHAR_T *stop; 496 sopno startst; 497 sopno stopst; 498 sopno lev; /* PLUS nesting level */ 499 { 500 register int i; 501 register sopno ss; /* start sop of current subRE */ 502 register const RCHAR_T *sp; /* start of string matched by it */ 503 register sopno ssub; /* start sop of subsubRE */ 504 register sopno esub; /* end sop of subsubRE */ 505 register const RCHAR_T *ssp; /* start of string matched by subsubRE */ 506 register const RCHAR_T *dp; 507 register size_t len; 508 register int hard; 509 register sop s; 510 register RCHAR_T d; 511 register regoff_t offsave; 512 register cset *cs; 513 514 AT("back", start, stop, startst, stopst); 515 sp = start; 516 517 /* get as far as we can with easy stuff */ 518 hard = 0; 519 for (ss = startst; !hard && ss < stopst; ss++) { 520 s = m->g->strip[ss]; 521 d = m->g->stripdata[ss]; 522 switch (s) { 523 case OCHAR: 524 if (sp == stop || *sp++ != d) 525 return(NULL); 526 break; 527 case OANY: 528 if (sp == stop) 529 return(NULL); 530 sp++; 531 break; 532 case OANYOF: 533 cs = &m->g->sets[d]; 534 if (sp == stop || !CHIN(cs, *sp++)) 535 return(NULL); 536 break; 537 case OBOL: 538 if ( (sp == m->beginp && !(m->eflags®_NOTBOL)) || 539 (sp < m->endp && *(sp-1) == '\n' && 540 (m->g->cflags®_NEWLINE)) ) 541 { /* yes */ } 542 else 543 return(NULL); 544 break; 545 case OEOL: 546 if ( (sp == m->endp && !(m->eflags®_NOTEOL)) || 547 (sp < m->endp && *sp == '\n' && 548 (m->g->cflags®_NEWLINE)) ) 549 { /* yes */ } 550 else 551 return(NULL); 552 break; 553 case OBOW: 554 if (( (sp == m->beginp && !(m->eflags®_NOTBOL)) || 555 (sp < m->endp && *(sp-1) == '\n' && 556 (m->g->cflags®_NEWLINE)) || 557 (sp > m->beginp && 558 !ISWORD(*(sp-1))) ) && 559 (sp < m->endp && ISWORD(*sp)) ) 560 { /* yes */ } 561 else 562 return(NULL); 563 break; 564 case OEOW: 565 if (( (sp == m->endp && !(m->eflags®_NOTEOL)) || 566 (sp < m->endp && *sp == '\n' && 567 (m->g->cflags®_NEWLINE)) || 568 (sp < m->endp && !ISWORD(*sp)) ) && 569 (sp > m->beginp && ISWORD(*(sp-1))) ) 570 { /* yes */ } 571 else 572 return(NULL); 573 break; 574 case O_QUEST: 575 break; 576 case OOR1: /* matches null but needs to skip */ 577 ss++; 578 s = m->g->strip[ss]; 579 d = m->g->stripdata[ss]; 580 do { 581 assert(s == OOR2); 582 ss += d; 583 s = m->g->strip[ss]; 584 d = m->g->stripdata[ss]; 585 } while (s != O_CH); 586 /* note that the ss++ gets us past the O_CH */ 587 break; 588 default: /* have to make a choice */ 589 hard = 1; 590 break; 591 } 592 } 593 if (!hard) { /* that was it! */ 594 if (sp != stop) 595 return(NULL); 596 return(sp); 597 } 598 ss--; /* adjust for the for's final increment */ 599 600 /* the hard stuff */ 601 AT("hard", sp, stop, ss, stopst); 602 s = m->g->strip[ss]; 603 d = m->g->stripdata[ss]; 604 switch (s) { 605 case OBACK_: /* the vilest depths */ 606 i = d; 607 assert(0 < i && i <= m->g->nsub); 608 if (m->pmatch[i].rm_eo == -1) 609 return(NULL); 610 assert(m->pmatch[i].rm_so != -1); 611 len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; 612 assert(stop - m->beginp >= len); 613 if (sp > stop - len) 614 return(NULL); /* not enough left to match */ 615 ssp = m->offp + m->pmatch[i].rm_so; 616 if (memcmp(sp, ssp, len) != 0) 617 return(NULL); 618 while (m->g->strip[ss] != O_BACK || m->g->stripdata[ss] != i) 619 ss++; 620 return(backref(m, sp+len, stop, ss+1, stopst, lev)); 621 break; 622 case OQUEST_: /* to null or not */ 623 dp = backref(m, sp, stop, ss+1, stopst, lev); 624 if (dp != NULL) 625 return(dp); /* not */ 626 return(backref(m, sp, stop, ss+d+1, stopst, lev)); 627 break; 628 case OPLUS_: 629 assert(m->lastpos != NULL); 630 assert(lev+1 <= m->g->nplus); 631 m->lastpos[lev+1] = sp; 632 return(backref(m, sp, stop, ss+1, stopst, lev+1)); 633 break; 634 case O_PLUS: 635 if (sp == m->lastpos[lev]) /* last pass matched null */ 636 return(backref(m, sp, stop, ss+1, stopst, lev-1)); 637 /* try another pass */ 638 m->lastpos[lev] = sp; 639 dp = backref(m, sp, stop, ss-d+1, stopst, lev); 640 if (dp == NULL) 641 return(backref(m, sp, stop, ss+1, stopst, lev-1)); 642 else 643 return(dp); 644 break; 645 case OCH_: /* find the right one, if any */ 646 ssub = ss + 1; 647 esub = ss + d - 1; 648 assert(m->g->strip[esub] == OOR1); 649 for (;;) { /* find first matching branch */ 650 dp = backref(m, sp, stop, ssub, esub, lev); 651 if (dp != NULL) 652 return(dp); 653 /* that one missed, try next one */ 654 if (m->g->strip[esub] == O_CH) 655 return(NULL); /* there is none */ 656 esub++; 657 assert(m->g->strip[esub] == OOR2); 658 ssub = esub + 1; 659 esub += m->g->stripdata[esub]; 660 if (m->g->strip[esub] == OOR2) 661 esub--; 662 else 663 assert(m->g->strip[esub] == O_CH); 664 } 665 break; 666 case OLPAREN: /* must undo assignment if rest fails */ 667 i = d; 668 assert(0 < i && i <= m->g->nsub); 669 offsave = m->pmatch[i].rm_so; 670 m->pmatch[i].rm_so = sp - m->offp; 671 dp = backref(m, sp, stop, ss+1, stopst, lev); 672 if (dp != NULL) 673 return(dp); 674 m->pmatch[i].rm_so = offsave; 675 return(NULL); 676 break; 677 case ORPAREN: /* must undo assignment if rest fails */ 678 i = d; 679 assert(0 < i && i <= m->g->nsub); 680 offsave = m->pmatch[i].rm_eo; 681 m->pmatch[i].rm_eo = sp - m->offp; 682 dp = backref(m, sp, stop, ss+1, stopst, lev); 683 if (dp != NULL) 684 return(dp); 685 m->pmatch[i].rm_eo = offsave; 686 return(NULL); 687 break; 688 default: /* uh oh */ 689 assert(nope); 690 break; 691 } 692 693 /* "can't happen" */ 694 assert(nope); 695 /* NOTREACHED */ 696 return NULL; 697 } 698 699 /* 700 - fast - step through the string at top speed 701 == static const RCHAR_T *fast(register struct match *m, const RCHAR_T *start, \ 702 == const RCHAR_T *stop, sopno startst, sopno stopst); 703 */ 704 static const RCHAR_T * /* where tentative match ended, or NULL */ 705 fast(m, start, stop, startst, stopst) 706 register struct match *m; 707 const RCHAR_T *start; 708 const RCHAR_T *stop; 709 sopno startst; 710 sopno stopst; 711 { 712 register states st = m->st; 713 register states fresh = m->fresh; 714 register states tmp = m->tmp; 715 register const RCHAR_T *p = start; 716 register RCHAR_T c = (start == m->beginp) ? OUT : *(start-1); 717 register RCHAR_T lastc; /* previous c */ 718 register int flag; 719 register int i; 720 register const RCHAR_T *coldp; /* last p after which no match was underway */ 721 722 CLEAR(st); 723 SET1(st, startst); 724 st = step(m->g, startst, stopst, st, NOTHING, OUT, st); 725 ASSIGN(fresh, st); 726 SP("start", st, *p); 727 coldp = NULL; 728 for (;;) { 729 /* next character */ 730 lastc = c; 731 c = (p == m->endp) ? OUT : *p; 732 if (EQ(st, fresh)) 733 coldp = p; 734 735 /* is there an EOL and/or BOL between lastc and c? */ 736 flag = 0; 737 i = 0; 738 if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || 739 (lastc == OUT && !(m->eflags®_NOTBOL)) ) { 740 flag = BOL; 741 i = m->g->nbol; 742 } 743 if ( (c == '\n' && m->g->cflags®_NEWLINE) || 744 (c == OUT && !(m->eflags®_NOTEOL)) ) { 745 flag = (flag == BOL) ? BOLEOL : EOL; 746 i += m->g->neol; 747 } 748 if (i != 0) { 749 for (; i > 0; i--) 750 st = step(m->g, startst, stopst, st, flag, OUT, st); 751 SP("boleol", st, c); 752 } 753 754 /* how about a word boundary? */ 755 if ( (flag == BOL || (lastc != OUT && !ISWORD(lastc))) && 756 (c != OUT && ISWORD(c)) ) { 757 flag = BOW; 758 } 759 if ( (lastc != OUT && ISWORD(lastc)) && 760 (flag == EOL || (c != OUT && !ISWORD(c))) ) { 761 flag = EOW; 762 } 763 if (flag == BOW || flag == EOW) { 764 st = step(m->g, startst, stopst, st, flag, OUT, st); 765 SP("boweow", st, c); 766 } 767 768 /* are we done? */ 769 if (ISSET(st, stopst) || p == stop) 770 break; /* NOTE BREAK OUT */ 771 772 /* no, we must deal with this character */ 773 ASSIGN(tmp, st); 774 ASSIGN(st, fresh); 775 assert(c != OUT); 776 st = step(m->g, startst, stopst, tmp, 0, c, st); 777 SP("aft", st, c); 778 assert(EQ(step(m->g, startst, stopst, st, NOTHING, OUT, st), st)); 779 p++; 780 } 781 782 assert(coldp != NULL); 783 m->coldp = coldp; 784 if (ISSET(st, stopst)) 785 return(p+1); 786 else 787 return(NULL); 788 } 789 790 /* 791 - slow - step through the string more deliberately 792 == static const RCHAR_T *slow(register struct match *m, const RCHAR_T *start, \ 793 == const RCHAR_T *stop, sopno startst, sopno stopst); 794 */ 795 static const RCHAR_T * /* where it ended */ 796 slow(m, start, stop, startst, stopst) 797 register struct match *m; 798 const RCHAR_T *start; 799 const RCHAR_T *stop; 800 sopno startst; 801 sopno stopst; 802 { 803 register states st = m->st; 804 register states empty = m->empty; 805 register states tmp = m->tmp; 806 register const RCHAR_T *p = start; 807 register RCHAR_T c = (start == m->beginp) ? OUT : *(start-1); 808 register RCHAR_T lastc; /* previous c */ 809 register int flag; 810 register int i; 811 register const RCHAR_T *matchp; /* last p at which a match ended */ 812 813 AT("slow", start, stop, startst, stopst); 814 CLEAR(st); 815 SET1(st, startst); 816 SP("sstart", st, *p); 817 st = step(m->g, startst, stopst, st, NOTHING, OUT, st); 818 matchp = NULL; 819 for (;;) { 820 /* next character */ 821 lastc = c; 822 c = (p == m->endp) ? OUT : *p; 823 824 /* is there an EOL and/or BOL between lastc and c? */ 825 flag = 0; 826 i = 0; 827 if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || 828 (lastc == OUT && !(m->eflags®_NOTBOL)) ) { 829 flag = BOL; 830 i = m->g->nbol; 831 } 832 if ( (c == '\n' && m->g->cflags®_NEWLINE) || 833 (c == OUT && !(m->eflags®_NOTEOL)) ) { 834 flag = (flag == BOL) ? BOLEOL : EOL; 835 i += m->g->neol; 836 } 837 if (i != 0) { 838 for (; i > 0; i--) 839 st = step(m->g, startst, stopst, st, flag, OUT, st); 840 SP("sboleol", st, c); 841 } 842 843 /* how about a word boundary? */ 844 if ( (flag == BOL || (lastc != OUT && !ISWORD(lastc))) && 845 (c != OUT && ISWORD(c)) ) { 846 flag = BOW; 847 } 848 if ( (lastc != OUT && ISWORD(lastc)) && 849 (flag == EOL || (c != OUT && !ISWORD(c))) ) { 850 flag = EOW; 851 } 852 if (flag == BOW || flag == EOW) { 853 st = step(m->g, startst, stopst, st, flag, OUT, st); 854 SP("sboweow", st, c); 855 } 856 857 /* are we done? */ 858 if (ISSET(st, stopst)) 859 matchp = p; 860 if (EQ(st, empty) || p == stop) 861 break; /* NOTE BREAK OUT */ 862 863 /* no, we must deal with this character */ 864 ASSIGN(tmp, st); 865 ASSIGN(st, empty); 866 assert(c != OUT); 867 st = step(m->g, startst, stopst, tmp, 0, c, st); 868 SP("saft", st, c); 869 assert(EQ(step(m->g, startst, stopst, st, NOTHING, OUT, st), st)); 870 p++; 871 } 872 873 return(matchp); 874 } 875 876 877 /* 878 - step - map set of states reachable before char to set reachable after 879 == static states step(register struct re_guts *g, sopno start, sopno stop, \ 880 == register states bef, int flag, RCHAR_T ch, register states aft); 881 == #define BOL (1) 882 == #define EOL (BOL+1) 883 == #define BOLEOL (BOL+2) 884 == #define NOTHING (BOL+3) 885 == #define BOW (BOL+4) 886 == #define EOW (BOL+5) 887 */ 888 static states 889 step(g, start, stop, bef, flag, ch, aft) 890 register struct re_guts *g; 891 sopno start; /* start state within strip */ 892 sopno stop; /* state after stop state within strip */ 893 register states bef; /* states reachable before */ 894 int flag; /* NONCHAR flag */ 895 RCHAR_T ch; /* character code */ 896 register states aft; /* states already known reachable after */ 897 { 898 register cset *cs; 899 register sop s; 900 register RCHAR_T d; 901 register sopno pc; 902 register onestate here; /* note, macros know this name */ 903 register sopno look; 904 register int i; 905 906 for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { 907 s = g->strip[pc]; 908 d = g->stripdata[pc]; 909 switch (s) { 910 case OEND: 911 assert(pc == stop-1); 912 break; 913 case OCHAR: 914 /* only characters can match */ 915 assert(!flag || ch != d); 916 if (ch == d) 917 FWD(aft, bef, 1); 918 break; 919 case OBOL: 920 if (flag == BOL || flag == BOLEOL) 921 FWD(aft, bef, 1); 922 break; 923 case OEOL: 924 if (flag == EOL || flag == BOLEOL) 925 FWD(aft, bef, 1); 926 break; 927 case OBOW: 928 if (flag == BOW) 929 FWD(aft, bef, 1); 930 break; 931 case OEOW: 932 if (flag == EOW) 933 FWD(aft, bef, 1); 934 break; 935 case OANY: 936 if (!flag) 937 FWD(aft, bef, 1); 938 break; 939 case OANYOF: 940 cs = &g->sets[d]; 941 if (!flag && CHIN(cs, ch)) 942 FWD(aft, bef, 1); 943 break; 944 case OBACK_: /* ignored here */ 945 case O_BACK: 946 FWD(aft, aft, 1); 947 break; 948 case OPLUS_: /* forward, this is just an empty */ 949 FWD(aft, aft, 1); 950 break; 951 case O_PLUS: /* both forward and back */ 952 FWD(aft, aft, 1); 953 i = ISSETBACK(aft, d); 954 BACK(aft, aft, d); 955 if (!i && ISSETBACK(aft, d)) { 956 /* oho, must reconsider loop body */ 957 pc -= d + 1; 958 INIT(here, pc); 959 } 960 break; 961 case OQUEST_: /* two branches, both forward */ 962 FWD(aft, aft, 1); 963 FWD(aft, aft, d); 964 break; 965 case O_QUEST: /* just an empty */ 966 FWD(aft, aft, 1); 967 break; 968 case OLPAREN: /* not significant here */ 969 case ORPAREN: 970 FWD(aft, aft, 1); 971 break; 972 case OCH_: /* mark the first two branches */ 973 FWD(aft, aft, 1); 974 assert(OP(g->strip[pc+d]) == OOR2); 975 FWD(aft, aft, d); 976 break; 977 case OOR1: /* done a branch, find the O_CH */ 978 if (ISSTATEIN(aft, here)) { 979 for (look = 1; /**/; look += d) { 980 s = g->strip[pc+look]; 981 d = g->stripdata[pc+look]; 982 if (s == O_CH) 983 break; 984 assert(s == OOR2); 985 } 986 FWD(aft, aft, look); 987 } 988 break; 989 case OOR2: /* propagate OCH_'s marking */ 990 FWD(aft, aft, 1); 991 if (g->strip[pc+d] != O_CH) { 992 assert(g->strip[pc+d] == OOR2); 993 FWD(aft, aft, d); 994 } 995 break; 996 case O_CH: /* just empty */ 997 FWD(aft, aft, 1); 998 break; 999 default: /* ooooops... */ 1000 assert(nope); 1001 break; 1002 } 1003 } 1004 1005 return(aft); 1006 } 1007 1008 #ifdef REDEBUG 1009 /* 1010 - print - print a set of states 1011 == #ifdef REDEBUG 1012 == static void print(struct match *m, char *caption, states st, \ 1013 == int ch, FILE *d); 1014 == #endif 1015 */ 1016 static void 1017 print(m, caption, st, ch, d) 1018 struct match *m; 1019 char *caption; 1020 states st; 1021 int ch; 1022 FILE *d; 1023 { 1024 register struct re_guts *g = m->g; 1025 register int i; 1026 register int first = 1; 1027 1028 if (!(m->eflags®_TRACE)) 1029 return; 1030 1031 fprintf(d, "%s", caption); 1032 if (ch != '\0') 1033 fprintf(d, " %s", pchar(ch)); 1034 for (i = 0; i < g->nstates; i++) 1035 if (ISSET(st, i)) { 1036 fprintf(d, "%s%d", (first) ? "\t" : ", ", i); 1037 first = 0; 1038 } 1039 fprintf(d, "\n"); 1040 } 1041 1042 /* 1043 - at - print current situation 1044 == #ifdef REDEBUG 1045 == static void at(struct match *m, char *title, char *start, char *stop, \ 1046 == sopno startst, sopno stopst); 1047 == #endif 1048 */ 1049 static void 1050 at(m, title, start, stop, startst, stopst) 1051 struct match *m; 1052 char *title; 1053 char *start; 1054 char *stop; 1055 sopno startst; 1056 sopno stopst; 1057 { 1058 if (!(m->eflags®_TRACE)) 1059 return; 1060 1061 printf("%s %s-", title, pchar(*start)); 1062 printf("%s ", pchar(*stop)); 1063 printf("%ld-%ld\n", (long)startst, (long)stopst); 1064 } 1065 1066 #ifndef PCHARDONE 1067 #define PCHARDONE /* never again */ 1068 /* 1069 - pchar - make a character printable 1070 == #ifdef REDEBUG 1071 == static char *pchar(int ch); 1072 == #endif 1073 * 1074 * Is this identical to regchar() over in debug.c? Well, yes. But a 1075 * duplicate here avoids having a debugging-capable regexec.o tied to 1076 * a matching debug.o, and this is convenient. It all disappears in 1077 * the non-debug compilation anyway, so it doesn't matter much. 1078 */ 1079 static char * /* -> representation */ 1080 pchar(ch) 1081 int ch; 1082 { 1083 static char pbuf[10]; 1084 1085 if (isprint(ch) || ch == ' ') 1086 snprintf(pbuf, sizeof(pbuf), "%c", ch); 1087 else 1088 snprintf(pbuf, sizeof(pbuf), "\\%o", ch); 1089 return(pbuf); 1090 } 1091 #endif 1092 #endif 1093 1094 #undef matcher 1095 #undef fast 1096 #undef slow 1097 #undef dissect 1098 #undef backref 1099 #undef step 1100 #undef print 1101 #undef at 1102 #undef match 1103