Lines Matching +full:c +full:- +full:states

1 /*-
4 * Copyright (c) 1992, 1993, 1994 Henry Spencer.
5 * Copyright (c) 1992, 1993, 1994
35 * @(#)engine.c 8.5 (Berkeley) 3/20/94
39 * The matching engine and friends. This file is #included by regexec.c
78 const char *beginp; /* start of string -- virtual NUL precedes */
79 const char *endp; /* end of string -- virtual NUL here */
83 states st; /* current states */
84 states fresh; /* states for a fresh start */
85 states tmp; /* temporary */
86 states empty; /* empty set of states */
97 static states step(struct re_guts *, sopno, sopno, states, int, states);
106 #define NONCHAR(c) ((c) > CHAR_MAX)
107 #define NNONCHAR (CODEMAX-CHAR_MAX)
109 static void print(struct match *, const char *, states, int, FILE *);
120 #define SP(t, s, c) print(m, t, s, c, stdout)
122 #define NOTE(str) { if (m->eflags&REG_TRACE) (void)printf("=%s\n", (str)); }
125 #define SP(t, s, c) /* nothing */
131 - matcher - the actual matching engine
143 const sopno gf = g->firststate+1; /* +1 for OEND */
144 const sopno gl = g->laststate;
149 if (g->cflags&REG_NOSUB)
162 if (g->must != NULL) {
164 if (*dp == g->must[0] && stop - dp >= g->mlen &&
165 memcmp(dp, g->must, (size_t)g->mlen) == 0)
167 if (dp == stop) /* we didn't find g->must */
172 m->g = g;
173 m->eflags = eflags;
174 m->pmatch = NULL;
175 m->lastpos = NULL;
176 m->offp = string;
177 m->beginp = start;
178 m->endp = stop;
180 SETUP(m->st);
181 SETUP(m->fresh);
182 SETUP(m->tmp);
183 SETUP(m->empty);
184 CLEAR(m->empty);
190 free(m->pmatch);
191 free((void*)m->lastpos);
195 if (nmatch == 0 && !g->backrefs)
199 assert(m->coldp != NULL);
202 endp = slow(m, m->coldp, stop, gf, gl);
205 assert(m->coldp < m->endp);
206 m->coldp++;
208 if (nmatch == 1 && !g->backrefs)
212 if (m->pmatch == NULL)
213 m->pmatch = (llvm_regmatch_t *)malloc((m->g->nsub + 1) *
215 if (m->pmatch == NULL) {
219 for (i = 1; i <= m->g->nsub; i++)
220 m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
221 if (!g->backrefs && !(m->eflags&REG_BACKR)) {
223 dp = dissect(m, m->coldp, endp, gf, gl);
225 if (g->nplus > 0 && m->lastpos == NULL)
226 m->lastpos = (const char **)malloc((g->nplus+1) *
228 if (g->nplus > 0 && m->lastpos == NULL) {
229 free(m->pmatch);
234 dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
239 /* uh-oh... we couldn't find a subexpression-level match */
240 assert(g->backrefs); /* must be back references doing it */
241 assert(g->nplus == 0 || m->lastpos != NULL);
243 if (dp != NULL || endp <= m->coldp)
246 endp = slow(m, m->coldp, endp-1, gf, gl);
251 for (i = 1; i <= m->g->nsub; i++) {
252 assert(m->pmatch[i].rm_so == -1);
253 assert(m->pmatch[i].rm_eo == -1);
257 dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
265 if (m->coldp == stop)
267 start = m->coldp + 1; /* recycle starting later */
272 pmatch[0].rm_so = m->coldp - m->offp;
273 pmatch[0].rm_eo = endp - m->offp;
276 assert(m->pmatch != NULL);
278 if (i <= m->g->nsub)
279 pmatch[i] = m->pmatch[i];
281 pmatch[i].rm_so = -1;
282 pmatch[i].rm_eo = -1;
286 if (m->pmatch != NULL)
287 free((char *)m->pmatch);
288 if (m->lastpos != NULL)
289 free((char *)m->lastpos);
295 * match. This can always conservatively return "stop - 1", but may return an
303 const char *res = stop - 1;
310 if (OP(g->strip[startst]) != ORPAREN)
314 if (OP(g->strip[startst]) != OCHAR)
318 char ch = OPND(g->strip[startst]);
319 for (; res != start; --res) {
324 if (nextst >= stopst || OP(g->strip[nextst]) != OCHAR || next >= stop ||
325 *next == (char)OPND(g->strip[nextst]))
333 - dissect - figure out what matched what, no back references
357 switch (OP(m->g->strip[es])) {
360 es += OPND(m->g->strip[es]);
363 while (OP(m->g->strip[es]) != O_CH)
364 es += OPND(m->g->strip[es]);
370 switch (OP(m->g->strip[ss])) {
401 /* no -- try a shorter match for this one */
402 stp = step_back(m->g, sp, rest, es, stopst);
406 esub = es - 1;
426 /* no -- try a shorter match for this one */
427 stp = step_back(m->g, sp, rest, es, stopst);
431 esub = es - 1;
465 /* no -- try a shorter match for this one */
466 stp = rest - 1;
470 esub = ss + OPND(m->g->strip[ss]) - 1;
471 assert(OP(m->g->strip[esub]) == OOR1);
476 assert(OP(m->g->strip[esub]) == OOR1);
478 assert(OP(m->g->strip[esub]) == OOR2);
480 esub += OPND(m->g->strip[esub]);
481 if (OP(m->g->strip[esub]) == OOR2)
482 esub--;
484 assert(OP(m->g->strip[esub]) == O_CH);
501 i = OPND(m->g->strip[ss]);
502 assert(0 < i && i <= m->g->nsub);
503 m->pmatch[i].rm_so = sp - m->offp;
506 i = OPND(m->g->strip[ss]);
507 assert(0 < i && i <= m->g->nsub);
508 m->pmatch[i].rm_eo = sp - m->offp;
521 - backref - figure out what matched what, figuring in back references
546 switch (OP(s = m->g->strip[ss])) {
557 cs = &m->g->sets[OPND(s)];
562 if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
563 (sp < m->endp && *(sp-1) == '\n' &&
564 (m->g->cflags&REG_NEWLINE)) )
570 if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
571 (sp < m->endp && *sp == '\n' &&
572 (m->g->cflags&REG_NEWLINE)) )
578 if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
579 (sp < m->endp && *(sp-1) == '\n' &&
580 (m->g->cflags&REG_NEWLINE)) ||
581 (sp > m->beginp &&
582 !ISWORD(*(sp-1))) ) &&
583 (sp < m->endp && ISWORD(*sp)) )
589 if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
590 (sp < m->endp && *sp == '\n' &&
591 (m->g->cflags&REG_NEWLINE)) ||
592 (sp < m->endp && !ISWORD(*sp)) ) &&
593 (sp > m->beginp && ISWORD(*(sp-1))) )
603 s = m->g->strip[ss];
607 } while (OP(s = m->g->strip[ss]) != O_CH);
619 ss--; /* adjust for the for's final increment */
623 s = m->g->strip[ss];
627 assert(0 < i && i <= m->g->nsub);
628 if (m->pmatch[i].rm_eo == -1)
630 assert(m->pmatch[i].rm_so != -1);
631 len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
634 assert(stop - m->beginp >= len);
635 if (sp > stop - len)
637 ssp = m->offp + m->pmatch[i].rm_so;
640 while (m->g->strip[ss] != SOP(O_BACK, i))
651 assert(m->lastpos != NULL);
652 assert(lev+1 <= m->g->nplus);
653 m->lastpos[lev+1] = sp;
657 if (sp == m->lastpos[lev]) /* last pass matched null */
658 return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
660 m->lastpos[lev] = sp;
661 dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev, rec);
663 return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
669 esub = ss + OPND(s) - 1;
670 assert(OP(m->g->strip[esub]) == OOR1);
676 if (OP(m->g->strip[esub]) == O_CH)
679 assert(OP(m->g->strip[esub]) == OOR2);
681 esub += OPND(m->g->strip[esub]);
682 if (OP(m->g->strip[esub]) == OOR2)
683 esub--;
685 assert(OP(m->g->strip[esub]) == O_CH);
690 assert(0 < i && i <= m->g->nsub);
691 offsave = m->pmatch[i].rm_so;
692 m->pmatch[i].rm_so = sp - m->offp;
696 m->pmatch[i].rm_so = offsave;
701 assert(0 < i && i <= m->g->nsub);
702 offsave = m->pmatch[i].rm_eo;
703 m->pmatch[i].rm_eo = sp - m->offp;
707 m->pmatch[i].rm_eo = offsave;
722 - fast - step through the string at top speed
728 states st = m->st;
729 states fresh = m->fresh;
730 states tmp = m->tmp;
732 int c = (start == m->beginp) ? OUT : *(start-1);
733 int lastc; /* previous c */
740 st = step(m->g, startst, stopst, st, NOTHING, st);
746 lastc = c;
747 c = (p == m->endp) ? OUT : *p;
751 /* is there an EOL and/or BOL between lastc and c? */
754 if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
755 (lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
757 i = m->g->nbol;
759 if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
760 (c == OUT && !(m->eflags&REG_NOTEOL)) ) {
762 i += m->g->neol;
765 for (; i > 0; i--)
766 st = step(m->g, startst, stopst, st, flagch, st);
767 SP("boleol", st, c);
772 (c != OUT && ISWORD(c)) ) {
776 (flagch == EOL || (c != OUT && !ISWORD(c))) ) {
780 st = step(m->g, startst, stopst, st, flagch, st);
781 SP("boweow", st, c);
791 assert(c != OUT);
792 st = step(m->g, startst, stopst, tmp, c, st);
793 SP("aft", st, c);
794 assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
799 m->coldp = coldp;
807 - slow - step through the string more deliberately
817 sop s = m->g->strip[startst];
836 states st = m->st;
837 states empty = m->empty;
838 states tmp = m->tmp;
839 int c = (p == m->beginp) ? OUT : *(p-1);
840 int lastc; /* previous c */
849 st = step(m->g, startst, stopst, st, NOTHING, st);
853 lastc = c;
854 c = (p == m->endp) ? OUT : *p;
856 /* is there an EOL and/or BOL between lastc and c? */
859 if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
860 (lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
862 i = m->g->nbol;
864 if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
865 (c == OUT && !(m->eflags&REG_NOTEOL)) ) {
867 i += m->g->neol;
870 for (; i > 0; i--)
871 st = step(m->g, startst, stopst, st, flagch, st);
872 SP("sboleol", st, c);
877 (c != OUT && ISWORD(c)) ) {
881 (flagch == EOL || (c != OUT && !ISWORD(c))) ) {
885 st = step(m->g, startst, stopst, st, flagch, st);
886 SP("sboweow", st, c);
898 assert(c != OUT);
899 st = step(m->g, startst, stopst, tmp, c, st);
900 SP("saft", st, c);
901 assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
910 - step - map set of states reachable before char to set reachable after
912 static states
916 states bef, /* states reachable before */
918 states aft) /* states already known reachable after */
928 s = g->strip[pc];
931 assert(pc == stop-1);
960 cs = &g->sets[OPND(s)];
977 pc -= OPND(s) + 1;
994 assert(OP(g->strip[pc+OPND(s)]) == OOR2);
1000 OP(s = g->strip[pc+look]) != O_CH;
1008 if (OP(g->strip[pc+OPND(s)]) != O_CH) {
1009 assert(OP(g->strip[pc+OPND(s)]) == OOR2);
1027 - print - print a set of states
1030 print(struct match *m, const char *caption, states st, int ch, FILE *d)
1032 struct re_guts *g = m->g;
1036 if (!(m->eflags&REG_TRACE))
1042 for (i = 0; i < g->nstates; i++)
1051 - at - print current situation
1057 if (!(m->eflags&REG_TRACE))
1060 (void)printf("%s %s-", title, pchar(*start));
1062 (void)printf("%ld-%ld\n", (long)startst, (long)stopst);
1068 - pchar - make a character printable
1070 * Is this identical to regchar() over in debug.c? Well, yes. But a
1071 * duplicate here avoids having a debugging-capable regexec.o tied to
1073 * the non-debug compilation anyway, so it doesn't matter much.
1075 static char * /* -> representation */
1081 (void)snprintf(pbuf, sizeof pbuf, "%c", ch);