xref: /titanic_50/usr/src/lib/libast/common/regex/regrexec.c (revision edcc07547a39d6570197493a9836083bd6b2a197)
1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *           Copyright (c) 1985-2007 AT&T Knowledge Ventures            *
5 *                      and is licensed under the                       *
6 *                  Common Public License, Version 1.0                  *
7 *                      by AT&T Knowledge Ventures                      *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *            http://www.opensource.org/licenses/cpl1.0.txt             *
11 *         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *                 Glenn Fowler <gsf@research.att.com>                  *
18 *                  David Korn <dgk@research.att.com>                   *
19 *                   Phong Vo <kpv@research.att.com>                    *
20 *                                                                      *
21 ***********************************************************************/
22 #pragma prototyped
23 
24 /*
25  * posix regex record executor
26  * multiple record sized-buffer interface
27  */
28 
29 #include "reglib.h"
30 
31 /*
32  * call regnexec() on records selected by Boyer-Moore
33  */
34 
35 int
36 regrexec(const regex_t* p, const char* s, size_t len, size_t nmatch, regmatch_t* match, regflags_t flags, int sep, void* handle, regrecord_t record)
37 {
38 	register unsigned char*	buf = (unsigned char*)s;
39 	register unsigned char*	beg;
40 	register unsigned char*	l;
41 	register unsigned char*	r;
42 	register unsigned char*	x;
43 	register size_t*	skip;
44 	register size_t*	fail;
45 	register Bm_mask_t**	mask;
46 	register size_t		index;
47 	register int		n;
48 	unsigned char*		end;
49 	size_t			mid;
50 	int			complete;
51 	int			exactlen;
52 	int			leftlen;
53 	int			rightlen;
54 	int			inv;
55 	Bm_mask_t		m;
56 	Env_t*			env;
57 	Rex_t*			e;
58 
59 	if (!s || !p || !(env = p->env) || (e = env->rex)->type != REX_BM)
60 		return REG_BADPAT;
61 	inv = (flags & REG_INVERT) != 0;
62 	buf = beg = (unsigned char*)s;
63 	end = buf + len;
64 	mid = (len < e->re.bm.right) ? 0 : (len - e->re.bm.right);
65 	skip = e->re.bm.skip;
66 	fail = e->re.bm.fail;
67 	mask = e->re.bm.mask;
68 	complete = e->re.bm.complete && !nmatch;
69 	exactlen = e->re.bm.size;
70 	leftlen = e->re.bm.left + exactlen;
71 	rightlen = exactlen + e->re.bm.right;
72 	index = leftlen++;
73 	for (;;)
74 	{
75 		while ((index += skip[buf[index]]) < mid);
76 		if (index < HIT)
77 			goto impossible;
78 		index -= HIT;
79 		m = mask[n = exactlen - 1][buf[index]];
80 		do
81 		{
82 			if (!n--)
83 				goto possible;
84 		} while (m &= mask[n][buf[--index]]);
85 		if ((index += fail[n + 1]) < len)
86 			continue;
87  impossible:
88 		if (inv)
89 		{
90 			l = r = buf + len;
91 			goto invert;
92 		}
93 		n = 0;
94 		goto done;
95  possible:
96 		r = (l = buf + index) + exactlen;
97 		while (l > beg)
98 			if (*--l == sep)
99 			{
100 				l++;
101 				break;
102 			}
103 		if ((r - l) < leftlen)
104 			goto spanned;
105 		while (r < end && *r != sep)
106 			r++;
107 		if ((r - (buf + index)) < rightlen)
108 			goto spanned;
109 		if (complete || (env->rex = ((r - l) > 128) ? e : e->next) && !(n = regnexec(p, (char*)l, r - l, nmatch, match, flags)))
110 		{
111 			if (inv)
112 			{
113  invert:
114 				x = beg;
115 				while (beg < l)
116 				{
117 					while (x < l && *x != sep)
118 						x++;
119 					if (n = (*record)(handle, (char*)beg, x - beg))
120 						goto done;
121 					beg = ++x;
122 				}
123 			}
124 			else if (n = (*record)(handle, (char*)l, r - l))
125 				goto done;
126 			if ((index = (r - buf) + leftlen) >= len)
127 			{
128 				n = (inv && (++r - buf) < len) ? (*record)(handle, (char*)r, (buf + len) - r): 0;
129 				goto done;
130 			}
131 			beg = r + 1;
132 		}
133 		else if (n != REG_NOMATCH)
134 			goto done;
135 		else
136 		{
137  spanned:
138 			if ((index += exactlen) >= mid)
139 				goto impossible;
140 		}
141 	}
142  done:
143 	env->rex = e;
144 	return n;
145 }
146