1 /***********************************************************************
2 * *
3 * This software is part of the ast package *
4 * Copyright (c) 1985-2010 AT&T Intellectual Property *
5 * and is licensed under the *
6 * Common Public License, Version 1.0 *
7 * by AT&T Intellectual Property *
8 * *
9 * A copy of the License is available at *
10 * http://www.opensource.org/licenses/cpl1.0.txt *
11 * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
12 * *
13 * Information and Software Systems Research *
14 * AT&T Research *
15 * Florham Park NJ *
16 * *
17 * Glenn Fowler <gsf@research.att.com> *
18 * David Korn <dgk@research.att.com> *
19 * Phong Vo <kpv@research.att.com> *
20 * *
21 ***********************************************************************/
22 #pragma prototyped
23
24 /*
25 * posix regex record executor
26 * multiple record sized-buffer interface
27 */
28
29 #include "reglib.h"
30
31 /*
32 * call regnexec() on records selected by Boyer-Moore
33 */
34
35 int
regrexec(const regex_t * p,const char * s,size_t len,size_t nmatch,regmatch_t * match,regflags_t flags,int sep,void * handle,regrecord_t record)36 regrexec(const regex_t* p, const char* s, size_t len, size_t nmatch, regmatch_t* match, regflags_t flags, int sep, void* handle, regrecord_t record)
37 {
38 register unsigned char* buf = (unsigned char*)s;
39 register unsigned char* beg;
40 register unsigned char* l;
41 register unsigned char* r;
42 register unsigned char* x;
43 register size_t* skip;
44 register size_t* fail;
45 register Bm_mask_t** mask;
46 register size_t index;
47 register int n;
48 unsigned char* end;
49 size_t mid;
50 int complete;
51 int exactlen;
52 int leftlen;
53 int rightlen;
54 int inv;
55 Bm_mask_t m;
56 Env_t* env;
57 Rex_t* e;
58
59 if (!s || !p || !(env = p->env) || (e = env->rex)->type != REX_BM)
60 return REG_BADPAT;
61 inv = (flags & REG_INVERT) != 0;
62 buf = beg = (unsigned char*)s;
63 end = buf + len;
64 mid = (len < e->re.bm.right) ? 0 : (len - e->re.bm.right);
65 skip = e->re.bm.skip;
66 fail = e->re.bm.fail;
67 mask = e->re.bm.mask;
68 complete = e->re.bm.complete && !nmatch;
69 exactlen = e->re.bm.size;
70 leftlen = e->re.bm.left + exactlen;
71 rightlen = exactlen + e->re.bm.right;
72 index = leftlen++;
73 for (;;)
74 {
75 while ((index += skip[buf[index]]) < mid);
76 if (index < HIT)
77 goto impossible;
78 index -= HIT;
79 m = mask[n = exactlen - 1][buf[index]];
80 do
81 {
82 if (!n--)
83 goto possible;
84 } while (m &= mask[n][buf[--index]]);
85 if ((index += fail[n + 1]) < len)
86 continue;
87 impossible:
88 if (inv)
89 {
90 l = r = buf + len;
91 goto invert;
92 }
93 n = 0;
94 goto done;
95 possible:
96 r = (l = buf + index) + exactlen;
97 while (l > beg)
98 if (*--l == sep)
99 {
100 l++;
101 break;
102 }
103 if ((r - l) < leftlen)
104 goto spanned;
105 while (r < end && *r != sep)
106 r++;
107 if ((r - (buf + index)) < rightlen)
108 goto spanned;
109 if (complete || (env->rex = ((r - l) > 128) ? e : e->next) && !(n = regnexec(p, (char*)l, r - l, nmatch, match, flags)))
110 {
111 if (inv)
112 {
113 invert:
114 x = beg;
115 while (beg < l)
116 {
117 while (x < l && *x != sep)
118 x++;
119 if (n = (*record)(handle, (char*)beg, x - beg))
120 goto done;
121 beg = ++x;
122 }
123 }
124 else if (n = (*record)(handle, (char*)l, r - l))
125 goto done;
126 if ((index = (r - buf) + leftlen) >= len)
127 {
128 n = (inv && (++r - buf) < len) ? (*record)(handle, (char*)r, (buf + len) - r): 0;
129 goto done;
130 }
131 beg = r + 1;
132 }
133 else if (n != REG_NOMATCH)
134 goto done;
135 else
136 {
137 spanned:
138 if ((index += exactlen) >= mid)
139 goto impossible;
140 }
141 }
142 done:
143 env->rex = e;
144 return n;
145 }
146