1da2e3ebdSchin /***********************************************************************
2da2e3ebdSchin * *
3da2e3ebdSchin * This software is part of the ast package *
4*3e14f97fSRoger A. Faulkner * Copyright (c) 1985-2010 AT&T Intellectual Property *
5da2e3ebdSchin * and is licensed under the *
6da2e3ebdSchin * Common Public License, Version 1.0 *
77c2fbfb3SApril Chin * by AT&T Intellectual Property *
8da2e3ebdSchin * *
9da2e3ebdSchin * A copy of the License is available at *
10da2e3ebdSchin * http://www.opensource.org/licenses/cpl1.0.txt *
11da2e3ebdSchin * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
12da2e3ebdSchin * *
13da2e3ebdSchin * Information and Software Systems Research *
14da2e3ebdSchin * AT&T Research *
15da2e3ebdSchin * Florham Park NJ *
16da2e3ebdSchin * *
17da2e3ebdSchin * Glenn Fowler <gsf@research.att.com> *
18da2e3ebdSchin * David Korn <dgk@research.att.com> *
19da2e3ebdSchin * Phong Vo <kpv@research.att.com> *
20da2e3ebdSchin * *
21da2e3ebdSchin ***********************************************************************/
22da2e3ebdSchin #pragma prototyped
23da2e3ebdSchin
24da2e3ebdSchin /*
25da2e3ebdSchin * posix regex record executor
26da2e3ebdSchin * multiple record sized-buffer interface
27da2e3ebdSchin */
28da2e3ebdSchin
29da2e3ebdSchin #include "reglib.h"
30da2e3ebdSchin
31da2e3ebdSchin /*
32da2e3ebdSchin * call regnexec() on records selected by Boyer-Moore
33da2e3ebdSchin */
34da2e3ebdSchin
35da2e3ebdSchin int
regrexec(const regex_t * p,const char * s,size_t len,size_t nmatch,regmatch_t * match,regflags_t flags,int sep,void * handle,regrecord_t record)36da2e3ebdSchin regrexec(const regex_t* p, const char* s, size_t len, size_t nmatch, regmatch_t* match, regflags_t flags, int sep, void* handle, regrecord_t record)
37da2e3ebdSchin {
38da2e3ebdSchin register unsigned char* buf = (unsigned char*)s;
39da2e3ebdSchin register unsigned char* beg;
40da2e3ebdSchin register unsigned char* l;
41da2e3ebdSchin register unsigned char* r;
42da2e3ebdSchin register unsigned char* x;
43da2e3ebdSchin register size_t* skip;
44da2e3ebdSchin register size_t* fail;
45da2e3ebdSchin register Bm_mask_t** mask;
46da2e3ebdSchin register size_t index;
47da2e3ebdSchin register int n;
48da2e3ebdSchin unsigned char* end;
49da2e3ebdSchin size_t mid;
50da2e3ebdSchin int complete;
51da2e3ebdSchin int exactlen;
52da2e3ebdSchin int leftlen;
53da2e3ebdSchin int rightlen;
54da2e3ebdSchin int inv;
55da2e3ebdSchin Bm_mask_t m;
56da2e3ebdSchin Env_t* env;
57da2e3ebdSchin Rex_t* e;
58da2e3ebdSchin
59da2e3ebdSchin if (!s || !p || !(env = p->env) || (e = env->rex)->type != REX_BM)
60da2e3ebdSchin return REG_BADPAT;
61da2e3ebdSchin inv = (flags & REG_INVERT) != 0;
62da2e3ebdSchin buf = beg = (unsigned char*)s;
63da2e3ebdSchin end = buf + len;
64da2e3ebdSchin mid = (len < e->re.bm.right) ? 0 : (len - e->re.bm.right);
65da2e3ebdSchin skip = e->re.bm.skip;
66da2e3ebdSchin fail = e->re.bm.fail;
67da2e3ebdSchin mask = e->re.bm.mask;
68da2e3ebdSchin complete = e->re.bm.complete && !nmatch;
69da2e3ebdSchin exactlen = e->re.bm.size;
70da2e3ebdSchin leftlen = e->re.bm.left + exactlen;
71da2e3ebdSchin rightlen = exactlen + e->re.bm.right;
72da2e3ebdSchin index = leftlen++;
73da2e3ebdSchin for (;;)
74da2e3ebdSchin {
75da2e3ebdSchin while ((index += skip[buf[index]]) < mid);
76da2e3ebdSchin if (index < HIT)
77da2e3ebdSchin goto impossible;
78da2e3ebdSchin index -= HIT;
79da2e3ebdSchin m = mask[n = exactlen - 1][buf[index]];
80da2e3ebdSchin do
81da2e3ebdSchin {
82da2e3ebdSchin if (!n--)
83da2e3ebdSchin goto possible;
84da2e3ebdSchin } while (m &= mask[n][buf[--index]]);
85da2e3ebdSchin if ((index += fail[n + 1]) < len)
86da2e3ebdSchin continue;
87da2e3ebdSchin impossible:
88da2e3ebdSchin if (inv)
89da2e3ebdSchin {
90da2e3ebdSchin l = r = buf + len;
91da2e3ebdSchin goto invert;
92da2e3ebdSchin }
93da2e3ebdSchin n = 0;
94da2e3ebdSchin goto done;
95da2e3ebdSchin possible:
96da2e3ebdSchin r = (l = buf + index) + exactlen;
97da2e3ebdSchin while (l > beg)
98da2e3ebdSchin if (*--l == sep)
99da2e3ebdSchin {
100da2e3ebdSchin l++;
101da2e3ebdSchin break;
102da2e3ebdSchin }
103da2e3ebdSchin if ((r - l) < leftlen)
104da2e3ebdSchin goto spanned;
105da2e3ebdSchin while (r < end && *r != sep)
106da2e3ebdSchin r++;
107da2e3ebdSchin if ((r - (buf + index)) < rightlen)
108da2e3ebdSchin goto spanned;
109da2e3ebdSchin if (complete || (env->rex = ((r - l) > 128) ? e : e->next) && !(n = regnexec(p, (char*)l, r - l, nmatch, match, flags)))
110da2e3ebdSchin {
111da2e3ebdSchin if (inv)
112da2e3ebdSchin {
113da2e3ebdSchin invert:
114da2e3ebdSchin x = beg;
115da2e3ebdSchin while (beg < l)
116da2e3ebdSchin {
117da2e3ebdSchin while (x < l && *x != sep)
118da2e3ebdSchin x++;
119da2e3ebdSchin if (n = (*record)(handle, (char*)beg, x - beg))
120da2e3ebdSchin goto done;
121da2e3ebdSchin beg = ++x;
122da2e3ebdSchin }
123da2e3ebdSchin }
124da2e3ebdSchin else if (n = (*record)(handle, (char*)l, r - l))
125da2e3ebdSchin goto done;
126da2e3ebdSchin if ((index = (r - buf) + leftlen) >= len)
127da2e3ebdSchin {
128da2e3ebdSchin n = (inv && (++r - buf) < len) ? (*record)(handle, (char*)r, (buf + len) - r): 0;
129da2e3ebdSchin goto done;
130da2e3ebdSchin }
131da2e3ebdSchin beg = r + 1;
132da2e3ebdSchin }
133da2e3ebdSchin else if (n != REG_NOMATCH)
134da2e3ebdSchin goto done;
135da2e3ebdSchin else
136da2e3ebdSchin {
137da2e3ebdSchin spanned:
138da2e3ebdSchin if ((index += exactlen) >= mid)
139da2e3ebdSchin goto impossible;
140da2e3ebdSchin }
141da2e3ebdSchin }
142da2e3ebdSchin done:
143da2e3ebdSchin env->rex = e;
144da2e3ebdSchin return n;
145da2e3ebdSchin }
146