1 /*********************************************************************** 2 * * 3 * This software is part of the ast package * 4 * Copyright (c) 1985-2007 AT&T Knowledge Ventures * 5 * and is licensed under the * 6 * Common Public License, Version 1.0 * 7 * by AT&T Knowledge Ventures * 8 * * 9 * A copy of the License is available at * 10 * http://www.opensource.org/licenses/cpl1.0.txt * 11 * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 12 * * 13 * Information and Software Systems Research * 14 * AT&T Research * 15 * Florham Park NJ * 16 * * 17 * Glenn Fowler <gsf@research.att.com> * 18 * David Korn <dgk@research.att.com> * 19 * Phong Vo <kpv@research.att.com> * 20 * * 21 ***********************************************************************/ 22 #pragma prototyped 23 24 /* 25 * posix regex record executor 26 * multiple record sized-buffer interface 27 */ 28 29 #include "reglib.h" 30 31 /* 32 * call regnexec() on records selected by Boyer-Moore 33 */ 34 35 int 36 regrexec(const regex_t* p, const char* s, size_t len, size_t nmatch, regmatch_t* match, regflags_t flags, int sep, void* handle, regrecord_t record) 37 { 38 register unsigned char* buf = (unsigned char*)s; 39 register unsigned char* beg; 40 register unsigned char* l; 41 register unsigned char* r; 42 register unsigned char* x; 43 register size_t* skip; 44 register size_t* fail; 45 register Bm_mask_t** mask; 46 register size_t index; 47 register int n; 48 unsigned char* end; 49 size_t mid; 50 int complete; 51 int exactlen; 52 int leftlen; 53 int rightlen; 54 int inv; 55 Bm_mask_t m; 56 Env_t* env; 57 Rex_t* e; 58 59 if (!s || !p || !(env = p->env) || (e = env->rex)->type != REX_BM) 60 return REG_BADPAT; 61 inv = (flags & REG_INVERT) != 0; 62 buf = beg = (unsigned char*)s; 63 end = buf + len; 64 mid = (len < e->re.bm.right) ? 0 : (len - e->re.bm.right); 65 skip = e->re.bm.skip; 66 fail = e->re.bm.fail; 67 mask = e->re.bm.mask; 68 complete = e->re.bm.complete && !nmatch; 69 exactlen = e->re.bm.size; 70 leftlen = e->re.bm.left + exactlen; 71 rightlen = exactlen + e->re.bm.right; 72 index = leftlen++; 73 for (;;) 74 { 75 while ((index += skip[buf[index]]) < mid); 76 if (index < HIT) 77 goto impossible; 78 index -= HIT; 79 m = mask[n = exactlen - 1][buf[index]]; 80 do 81 { 82 if (!n--) 83 goto possible; 84 } while (m &= mask[n][buf[--index]]); 85 if ((index += fail[n + 1]) < len) 86 continue; 87 impossible: 88 if (inv) 89 { 90 l = r = buf + len; 91 goto invert; 92 } 93 n = 0; 94 goto done; 95 possible: 96 r = (l = buf + index) + exactlen; 97 while (l > beg) 98 if (*--l == sep) 99 { 100 l++; 101 break; 102 } 103 if ((r - l) < leftlen) 104 goto spanned; 105 while (r < end && *r != sep) 106 r++; 107 if ((r - (buf + index)) < rightlen) 108 goto spanned; 109 if (complete || (env->rex = ((r - l) > 128) ? e : e->next) && !(n = regnexec(p, (char*)l, r - l, nmatch, match, flags))) 110 { 111 if (inv) 112 { 113 invert: 114 x = beg; 115 while (beg < l) 116 { 117 while (x < l && *x != sep) 118 x++; 119 if (n = (*record)(handle, (char*)beg, x - beg)) 120 goto done; 121 beg = ++x; 122 } 123 } 124 else if (n = (*record)(handle, (char*)l, r - l)) 125 goto done; 126 if ((index = (r - buf) + leftlen) >= len) 127 { 128 n = (inv && (++r - buf) < len) ? (*record)(handle, (char*)r, (buf + len) - r): 0; 129 goto done; 130 } 131 beg = r + 1; 132 } 133 else if (n != REG_NOMATCH) 134 goto done; 135 else 136 { 137 spanned: 138 if ((index += exactlen) >= mid) 139 goto impossible; 140 } 141 } 142 done: 143 env->rex = e; 144 return n; 145 } 146