xref: /titanic_50/usr/src/lib/libast/common/regex/regnexec.c (revision 11c2b4c0e543fe2e1e5910cde1f4422cc3218160)
1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *           Copyright (c) 1985-2007 AT&T Knowledge Ventures            *
5 *                      and is licensed under the                       *
6 *                  Common Public License, Version 1.0                  *
7 *                      by AT&T Knowledge Ventures                      *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *            http://www.opensource.org/licenses/cpl1.0.txt             *
11 *         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *                 Glenn Fowler <gsf@research.att.com>                  *
18 *                  David Korn <dgk@research.att.com>                   *
19 *                   Phong Vo <kpv@research.att.com>                    *
20 *                                                                      *
21 ***********************************************************************/
22 #pragma prototyped
23 
24 /*
25  * posix regex executor
26  * single sized-record interface
27  */
28 
29 #include "reglib.h"
30 
31 #if _AST_REGEX_DEBUG
32 
33 #define DEBUG_TEST(f,y,n)	((debug&(debug_flag=f))?(y):(n))
34 #define DEBUG_CODE(f,y,n)	do if(debug&(f)){y}else{n} while(0)
35 #define DEBUG_INIT()		do { char* t; if (!debug) { debug = 0x80000000; if (t = getenv("_AST_regex_exec_debug")) debug |= strtoul(t, NiL, 0); } } while (0)
36 
37 static unsigned long	debug;
38 static unsigned long	debug_flag;
39 
40 static const char*	rexnames[] =
41 {
42 	"REX_NULL",
43 	"REX_ALT",
44 	"REX_ALT_CATCH",
45 	"REX_BACK",
46 	"REX_BEG",
47 	"REX_BEG_STR",
48 	"REX_BM",
49 	"REX_CAT",
50 	"REX_CLASS",
51 	"REX_COLL_CLASS",
52 	"REX_CONJ",
53 	"REX_CONJ_LEFT",
54 	"REX_CONJ_RIGHT",
55 	"REX_DONE",
56 	"REX_DOT",
57 	"REX_END",
58 	"REX_END_STR",
59 	"REX_EXEC",
60 	"REX_FIN_STR",
61 	"REX_GROUP",
62 	"REX_GROUP_CATCH",
63 	"REX_GROUP_AHEAD",
64 	"REX_GROUP_AHEAD_CATCH",
65 	"REX_GROUP_AHEAD_NOT",
66 	"REX_GROUP_BEHIND",
67 	"REX_GROUP_BEHIND_CATCH",
68 	"REX_GROUP_BEHIND_NOT",
69 	"REX_GROUP_BEHIND_NOT_CATCH",
70 	"REX_GROUP_COND",
71 	"REX_GROUP_COND_CATCH",
72 	"REX_GROUP_CUT",
73 	"REX_GROUP_CUT_CATCH",
74 	"REX_KMP",
75 	"REX_NEG",
76 	"REX_NEG_CATCH",
77 	"REX_NEST",
78 	"REX_ONECHAR",
79 	"REX_REP",
80 	"REX_REP_CATCH",
81 	"REX_STRING",
82 	"REX_TRIE",
83 	"REX_WBEG",
84 	"REX_WEND",
85 	"REX_WORD",
86 	"REX_WORD_NOT"
87 };
88 
89 static const char* rexname(Rex_t* rex)
90 {
91 	if (!rex)
92 		return "NIL";
93 	if (rex->type >= elementsof(rexnames))
94 		return "ERROR";
95 	return rexnames[rex->type];
96 }
97 
98 #else
99 
100 #define DEBUG_INIT()
101 #define DEBUG_TEST(f,y,n)	(n)
102 #define DEBUG_CODE(f,y,n)	do {n} while(0)
103 
104 #endif
105 
106 #define BEG_ALT		1	/* beginning of an alt			*/
107 #define BEG_ONE		2	/* beginning of one iteration of a rep	*/
108 #define BEG_REP		3	/* beginning of a repetition		*/
109 #define BEG_SUB		4	/* beginning of a subexpression		*/
110 #define END_ANY		5	/* end of any of above			*/
111 
112 /*
113  * returns from parse()
114  */
115 
116 #define NONE		0	/* no parse found			*/
117 #define GOOD		1	/* some parse was found			*/
118 #define CUT		2	/* no match and no backtrack		*/
119 #define BEST		3	/* an unbeatable parse was found	*/
120 #define BAD		4	/* error ocurred			*/
121 
122 /*
123  * REG_SHELL_DOT test
124  */
125 
126 #define LEADING(e,r,s)	(*(s)==(e)->leading&&((s)==(e)->beg||*((s)-1)==(r)->explicit))
127 
128 /*
129  * Pos_t is for comparing parses. An entry is made in the
130  * array at the beginning and at the end of each Group_t,
131  * each iteration in a Group_t, and each Binary_t.
132  */
133 
134 typedef struct
135 {
136 	unsigned char*	p;		/* where in string		*/
137 	size_t		length;		/* length in string		*/
138 	short		serial;		/* preorder subpattern number	*/
139 	short		be;		/* which end of pair		*/
140 } Pos_t;
141 
142 /* ===== begin library support ===== */
143 
144 #define vector(t,v,i)	(((i)<(v)->max)?(t*)((v)->vec+(i)*(v)->siz):(t*)vecseek(&(v),i))
145 
146 static Vector_t*
147 vecopen(int inc, int siz)
148 {
149 	Vector_t*	v;
150 	Stk_t*		sp;
151 
152 	if (inc <= 0)
153 		inc = 16;
154 	if (!(sp = stkopen(STK_SMALL|STK_NULL)))
155 		return 0;
156 	if (!(v = (Vector_t*)stkseek(sp, sizeof(Vector_t) + inc * siz)))
157 	{
158 		stkclose(sp);
159 		return 0;
160 	}
161 	v->stk = sp;
162 	v->vec = (char*)v + sizeof(Vector_t);
163 	v->max = v->inc = inc;
164 	v->siz = siz;
165 	v->cur = 0;
166 	return v;
167 }
168 
169 static void*
170 vecseek(Vector_t** p, int index)
171 {
172 	Vector_t*	v = *p;
173 
174 	if (index >= v->max)
175 	{
176 		while ((v->max += v->inc) <= index);
177 		if (!(v = (Vector_t*)stkseek(v->stk, sizeof(Vector_t) + v->max * v->siz)))
178 			return 0;
179 		*p = v;
180 		v->vec = (char*)v + sizeof(Vector_t);
181 	}
182 	return v->vec + index * v->siz;
183 }
184 
185 static void
186 vecclose(Vector_t* v)
187 {
188 	if (v)
189 		stkclose(v->stk);
190 }
191 
192 typedef struct
193 {
194 	Stk_pos_t	pos;
195 	char		data[1];
196 } Stk_frame_t;
197 
198 #define stknew(s,p)	((p)->offset=stktell(s),(p)->base=stkfreeze(s,0))
199 #define stkold(s,p)	stkset(s,(p)->base,(p)->offset)
200 
201 #define stkframe(s)	(*((Stk_frame_t**)stktop(s)-1))
202 #define stkdata(s,t)	((t*)stkframe(s)->data)
203 #define stkpop(s)	stkold(s,&(stkframe(s)->pos))
204 
205 static void*
206 stkpush(Stk_t* sp, size_t size)
207 {
208 	Stk_frame_t*	f;
209 	Stk_pos_t	p;
210 
211 	stknew(sp, &p);
212 	size = sizeof(Stk_frame_t) + sizeof(size_t) + size - 1;
213 	if (!(f = (Stk_frame_t*)stkalloc(sp, sizeof(Stk_frame_t) + sizeof(Stk_frame_t*) + size - 1)))
214 		return 0;
215 	f->pos = p;
216 	stkframe(sp) = f;
217 	return f->data;
218 }
219 
220 /* ===== end library support ===== */
221 
222 /*
223  * Match_frame_t is for saving and restoring match records
224  * around alternate attempts, so that fossils will not be
225  * left in the match array.  These are the only entries in
226  * the match array that are not otherwise guaranteed to
227  * have current data in them when they get used.
228  */
229 
230 typedef struct
231 {
232 	size_t			size;
233 	regmatch_t*		match;
234 	regmatch_t		save[1];
235 } Match_frame_t;
236 
237 #define matchframe	stkdata(stkstd,Match_frame_t)
238 #define matchpush(e,x)	((x)->re.group.number?_matchpush(e,x):0)
239 #define matchcopy(e,x)	((x)->re.group.number?memcpy(matchframe->match,matchframe->save,matchframe->size):(void*)0)
240 #define matchpop(e,x)	((x)->re.group.number?memcpy(matchframe->match,matchframe->save,matchframe->size),stkpop(stkstd):(void*)0)
241 
242 #define pospop(e)	(--(e)->pos->cur)
243 
244 /*
245  * allocate a frame and push a match onto the stack
246  */
247 
248 static int
249 _matchpush(Env_t* env, Rex_t* rex)
250 {
251 	Match_frame_t*	f;
252 	regmatch_t*	m;
253 	regmatch_t*	e;
254 	regmatch_t*	s;
255 	int		num;
256 
257 	if (rex->re.group.number <= 0 || (num = rex->re.group.last - rex->re.group.number + 1) <= 0)
258 		num = 0;
259 	if (!(f = (Match_frame_t*)stkpush(stkstd, sizeof(Match_frame_t) + (num - 1) * sizeof(regmatch_t))))
260 	{
261 		env->error = REG_ESPACE;
262 		return 1;
263 	}
264 	f->size = num * sizeof(regmatch_t);
265 	f->match = m = env->match + rex->re.group.number;
266 	e = m + num;
267 	s = f->save;
268 	while (m < e)
269 	{
270 		*s++ = *m;
271 		*m++ = state.nomatch;
272 	}
273 	return 0;
274 }
275 
276 /*
277  * allocate a frame and push a pos onto the stack
278  */
279 
280 static int
281 pospush(Env_t* env, Rex_t* rex, unsigned char* p, int be)
282 {
283 	Pos_t*	pos;
284 
285 	if (!(pos = vector(Pos_t, env->pos, env->pos->cur)))
286 	{
287 		env->error = REG_ESPACE;
288 		return 1;
289 	}
290 	pos->serial = rex->serial;
291 	pos->p = p;
292 	pos->be = be;
293 	env->pos->cur++;
294 	return 0;
295 }
296 
297 /*
298  * two matches are known to have the same length
299  * os is start of old pos array, ns is start of new,
300  * oend and nend are end+1 pointers to ends of arrays.
301  * oe and ne are ends (not end+1) of subarrays.
302  * returns 1 if new is better, -1 if old, else 0.
303  */
304 
305 #if _AST_REGEX_DEBUG
306 
307 static void
308 showmatch(regmatch_t* p)
309 {
310 	sfputc(sfstdout, '(');
311 	if (p->rm_so < 0)
312 		sfputc(sfstdout, '?');
313 	else
314 		sfprintf(sfstdout, "%d", p->rm_so);
315 	sfputc(sfstdout, ',');
316 	if (p->rm_eo < 0)
317 		sfputc(sfstdout, '?');
318 	else
319 		sfprintf(sfstdout, "%d", p->rm_eo);
320 	sfputc(sfstdout, ')');
321 }
322 
323 static int
324 better(Env_t* env, Pos_t* os, Pos_t* ns, Pos_t* oend, Pos_t* nend, int level)
325 #define better	_better
326 {
327 	int	i;
328 
329 	DEBUG_CODE(0x0040,{sfprintf(sfstdout, "AHA better old ");for (i = 0; i <= env->nsub; i++)showmatch(&env->best[i]);sfprintf(sfstdout, "\n           new ");for (i = 0; i <= env->nsub; i++)showmatch(&env->match[i]);sfprintf(sfstdout, "\n");},{0;});
330 	i = better(env, os, ns, oend, nend, 0);
331 	DEBUG_TEST(0x0040,(sfprintf(sfstdout, "           %s\n", i <= 0 ? "OLD" : "NEW")),(0));
332 	return i;
333 }
334 
335 #endif
336 
337 static int
338 better(Env_t* env, Pos_t* os, Pos_t* ns, Pos_t* oend, Pos_t* nend, int level)
339 {
340 	Pos_t*	oe;
341 	Pos_t*	ne;
342 	int	k;
343 	int	n;
344 
345 	if (env->error)
346 		return -1;
347 	for (;;)
348 	{
349 		DEBUG_CODE(0x0080,{sfprintf(sfstdout, "   %-*.*sold ", (level + 3) * 4, (level + 3) * 4, "");for (oe = os; oe < oend; oe++)sfprintf(sfstdout, "<%d,%d,%d>", oe->p - env->beg, oe->serial, oe->be);sfprintf(sfstdout, "\n   %-*.*snew ", (level + 3) * 4, (level + 3) * 4, "");for (oe = ns; oe < nend; oe++)sfprintf(sfstdout, "<%d,%d,%d>", oe->p - env->beg, oe->serial, oe->be);sfprintf(sfstdout, "\n");},{0;});
350 		if (ns >= nend)
351 			return DEBUG_TEST(0x8000,(os < oend),(0));
352 		if (os >= oend)
353 			return DEBUG_TEST(0x8000,(-1),(1));
354 		n = os->serial;
355 		if (ns->serial > n)
356 			return -1;
357 		if (n > ns->serial)
358 		{
359 			env->error = REG_PANIC;
360 			return -1;
361 		}
362 		if (ns->p > os->p)
363 			return 1;
364 		if (os->p > ns->p)
365 			return -1;
366 		oe = os;
367 		k = 0;
368 		for (;;)
369 			if ((++oe)->serial == n)
370 			{
371 				if (oe->be != END_ANY)
372 					k++;
373 				else if (k-- <= 0)
374 					break;
375 			}
376 		ne = ns;
377 		k = 0;
378 		for (;;)
379 			if ((++ne)->serial == n)
380 			{
381 				if (ne->be != END_ANY)
382 					k++;
383 				else if (k-- <= 0)
384 					break;
385 			}
386 		if (ne->p > oe->p)
387 			return 1;
388 		if (oe->p > ne->p)
389 			return -1;
390 		if (k = better(env, os + 1, ns + 1, oe, ne, level + 1))
391 			return k;
392 		os = oe + 1;
393 		ns = ne + 1;
394 	}
395 }
396 
397 #undef	better
398 
399 #define follow(e,r,c,s)	((r)->next?parse(e,(r)->next,c,s):(c)?parse(e,c,0,s):BEST)
400 
401 static int		parse(Env_t*, Rex_t*, Rex_t*, unsigned char*);
402 
403 static int
404 parserep(Env_t* env, Rex_t* rex, Rex_t* cont, unsigned char* s, int n)
405 {
406 	int	i;
407 	int	r = NONE;
408 	Rex_t	catcher;
409 
410 	DEBUG_TEST(0x0010,(sfprintf(sfstdout, "AHA#%04d 0x%04x parserep %s %d %d %d `%-.*s'\n", __LINE__, debug_flag, rexname(rex->re.group.expr.rex), rex->lo, n, rex->hi, env->end - s, s)),(0));
411 	if ((rex->flags & REG_MINIMAL) && n >= rex->lo && n < rex->hi)
412 	{
413 		if (env->stack && pospush(env, rex, s, END_ANY))
414 			return BAD;
415 		i = follow(env, rex, cont, s);
416 		if (env->stack)
417 			pospop(env);
418 		switch (i)
419 		{
420 		case BAD:
421 			return BAD;
422 		case CUT:
423 			return CUT;
424 		case BEST:
425 		case GOOD:
426 			return BEST;
427 		}
428 	}
429 	if (n < rex->hi)
430 	{
431 		catcher.type = REX_REP_CATCH;
432 		catcher.serial = rex->serial;
433 		catcher.re.rep_catch.ref = rex;
434 		catcher.re.rep_catch.cont = cont;
435 		catcher.re.rep_catch.beg = s;
436 		catcher.re.rep_catch.n = n + 1;
437 		catcher.next = rex->next;
438 		if (n == 0)
439 			rex->re.rep_catch.beg = s;
440 		if (env->stack)
441 		{
442 DEBUG_TEST(0x0004,(sfprintf(sfstdout,"AHA#%04d 0x%04x PUSH (%d,%d)(%d,%d) (%d,%d)(%d,%d)\n", __LINE__, debug_flag, rex->re.group.number, env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
443 			if (matchpush(env, rex))
444 				return BAD;
445 			if (pospush(env, rex, s, BEG_ONE))
446 				return BAD;
447 DEBUG_TEST(0x0004,(sfprintf(sfstdout,"AHA#%04d 0x%04x PUSH (%d,%d)(%d,%d) (%d,%d)(%d,%d)\n", __LINE__, debug_flag, rex->re.group.number, env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
448 		}
449 		r = parse(env, rex->re.group.expr.rex, &catcher, s);
450 		DEBUG_TEST(0x0010,(sfprintf(sfstdout, "AHA#%04d 0x%04x parserep parse %d `%-.*s'\n", __LINE__, debug_flag, r, env->end - s, s)),(0));
451 		if (env->stack)
452 		{
453 DEBUG_TEST(0x0004,(sfprintf(sfstdout,"AHA#%04d 0x%04x POP (%d,%d)(%d,%d) (%d,%d)(%d,%d)\n", __LINE__, debug_flag, rex->re.group.number, env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
454 			pospop(env);
455 			matchpop(env, rex);
456 DEBUG_TEST(0x0004,(sfprintf(sfstdout,"AHA#%04d 0x%04x POP (%d,%d)(%d,%d) (%d,%d)(%d,%d)\n", __LINE__, debug_flag, rex->re.group.number, env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
457 		}
458 		switch (r)
459 		{
460 		case BAD:
461 			return BAD;
462 		case BEST:
463 			return BEST;
464 		case CUT:
465 			r = NONE;
466 			break;
467 		case GOOD:
468 			if (rex->flags & REG_MINIMAL)
469 				return BEST;
470 			r = GOOD;
471 			break;
472 		}
473 	}
474 	if (n < rex->lo)
475 		return r;
476 	if (!(rex->flags & REG_MINIMAL) || n >= rex->hi)
477 	{
478 		if (env->stack && pospush(env, rex, s, END_ANY))
479 			return BAD;
480 		i = follow(env, rex, cont, s);
481 		if (env->stack)
482 			pospop(env);
483 		switch (i)
484 		{
485 		case BAD:
486 			r = BAD;
487 			break;
488 		case CUT:
489 			r = CUT;
490 			break;
491 		case BEST:
492 			r = BEST;
493 			break;
494 		case GOOD:
495 			r = (rex->flags & REG_MINIMAL) ? BEST : GOOD;
496 			break;
497 		}
498 	}
499 	return r;
500 }
501 
502 static int
503 parsetrie(Env_t* env, Trie_node_t* x, Rex_t* rex, Rex_t* cont, unsigned char* s)
504 {
505 	unsigned char*	p;
506 	int		r;
507 
508 	if (p = rex->map)
509 	{
510 		for (;;)
511 		{
512 			if (s >= env->end)
513 				return NONE;
514 			while (x->c != p[*s])
515 				if (!(x = x->sib))
516 					return NONE;
517 			if (x->end)
518 				break;
519 			x = x->son;
520 			s++;
521 		}
522 	}
523 	else
524 	{
525 		for (;;)
526 		{
527 			if (s >= env->end)
528 				return NONE;
529 			while (x->c != *s)
530 				if (!(x = x->sib))
531 					return NONE;
532 			if (x->end)
533 				break;
534 			x = x->son;
535 			s++;
536 		}
537 	}
538 	s++;
539 	if (rex->flags & REG_MINIMAL)
540 		switch (follow(env, rex, cont, s))
541 		{
542 		case BAD:
543 			return BAD;
544 		case CUT:
545 			return CUT;
546 		case BEST:
547 		case GOOD:
548 			return BEST;
549 		}
550 	if (x->son)
551 		switch (parsetrie(env, x->son, rex, cont, s))
552 		{
553 		case BAD:
554 			return BAD;
555 		case CUT:
556 			return CUT;
557 		case BEST:
558 			return BEST;
559 		case GOOD:
560 			if (rex->flags & REG_MINIMAL)
561 				return BEST;
562 			r = GOOD;
563 			break;
564 		default:
565 			r = NONE;
566 			break;
567 		}
568 	else
569 		r = NONE;
570 	if (!(rex->flags & REG_MINIMAL))
571 		switch (follow(env, rex, cont, s))
572 		{
573 		case BAD:
574 			return BAD;
575 		case CUT:
576 			return CUT;
577 		case BEST:
578 			return BEST;
579 		case GOOD:
580 			return GOOD;
581 	}
582 	return r;
583 }
584 
585 static int
586 collelt(register Celt_t* ce, char* key, int c, int x)
587 {
588 	Ckey_t	elt;
589 
590 	mbxfrm(elt, key, COLL_KEY_MAX);
591 	for (;; ce++)
592 	{
593 		switch (ce->typ)
594 		{
595 		case COLL_call:
596 			if (!x && (*ce->fun)(c))
597 				return 1;
598 			continue;
599 		case COLL_char:
600 			if (!strcmp((char*)ce->beg, (char*)elt))
601 				return 1;
602 			continue;
603 		case COLL_range:
604 			if (strcmp((char*)ce->beg, (char*)elt) <= ce->min && strcmp((char*)elt, (char*)ce->end) <= ce->max)
605 				return 1;
606 			continue;
607 		case COLL_range_lc:
608 			if (strcmp((char*)ce->beg, (char*)elt) <= ce->min && strcmp((char*)elt, (char*)ce->end) <= ce->max && (iswlower(c) || !iswupper(c)))
609 				return 1;
610 			continue;
611 		case COLL_range_uc:
612 			if (strcmp((char*)ce->beg, (char*)elt) <= ce->min && strcmp((char*)elt, (char*)ce->end) <= ce->max && (iswupper(c) || !iswlower(c)))
613 				return 1;
614 			continue;
615 		}
616 		break;
617 	}
618 	return 0;
619 }
620 
621 static int
622 collic(register Celt_t* ce, char* key, register char* nxt, int c, int x)
623 {
624 	if (!x)
625 	{
626 		if (collelt(ce, key, c, x))
627 			return 1;
628 		if (iswlower(c))
629 			c = towupper(c);
630 		else if (iswupper(c))
631 			c = towlower(c);
632 		else
633 			return 0;
634 		x = wctomb(key, c);
635 		key[x] = 0;
636 		return collelt(ce, key, c, 0);
637 	}
638 	while (*nxt)
639 	{
640 		if (collic(ce, key, nxt + 1, c, x))
641 			return 1;
642 		if (islower(*nxt))
643 			*nxt = toupper(*nxt);
644 		else if (isupper(*nxt))
645 			*nxt = tolower(*nxt);
646 		else
647 			return 0;
648 		nxt++;
649 	}
650 	return collelt(ce, key, c, x);
651 }
652 
653 static int
654 collmatch(Rex_t* rex, unsigned char* s, unsigned char* e, unsigned char** p)
655 {
656 	unsigned char*		t;
657 	wchar_t			c;
658 	int			w;
659 	int			r;
660 	int			x;
661 	int			ic;
662 	Ckey_t			key;
663 	Ckey_t			elt;
664 
665 	ic = (rex->flags & REG_ICASE);
666 	if ((w = MBSIZE(s)) > 1)
667 	{
668 		memcpy((char*)key, (char*)s, w);
669 		key[w] = 0;
670 		t = s;
671 		c = mbchar(t);
672 #if !_lib_wctype
673 		c &= 0xff;
674 #endif
675 		x = 0;
676 	}
677 	else
678 	{
679 		c = s[0];
680 		if (ic && isupper(c))
681 			c = tolower(c);
682 		key[0] = c;
683 		key[1] = 0;
684 		if (isalpha(c))
685 		{
686 			x = e - s;
687 			if (x > COLL_KEY_MAX)
688 				x = COLL_KEY_MAX;
689 			while (w < x)
690 			{
691 				c = s[w];
692 				if (!isalpha(c))
693 					break;
694 				r = mbxfrm(elt, key, COLL_KEY_MAX);
695 				if (ic && isupper(c))
696 					c = tolower(c);
697 				key[w] = c;
698 				key[w + 1] = 0;
699 				if (mbxfrm(elt, key, COLL_KEY_MAX) != r)
700 					break;
701 				w++;
702 			}
703 		}
704 		key[w] = 0;
705 		c = key[0];
706 		x = w - 1;
707 	}
708 	r = 1;
709 	for (;;)
710 	{
711 		if (ic ? collic(rex->re.collate.elements, (char*)key, (char*)key, c, x) : collelt(rex->re.collate.elements, (char*)key, c, x))
712 			break;
713 		if (!x)
714 		{
715 			r = 0;
716 			break;
717 		}
718 		w = x--;
719 		key[w] = 0;
720 	}
721 	*p = s + w;
722 	return rex->re.collate.invert ? !r : r;
723 }
724 
725 static unsigned char*
726 nestmatch(register unsigned char* s, register unsigned char* e, const unsigned short* type, register int co)
727 {
728 	register int	c;
729 	register int	cc;
730 	unsigned int	n;
731 	int		oc;
732 
733 	if (type[co] & (REX_NEST_literal|REX_NEST_quote))
734 	{
735 		n = (type[co] & REX_NEST_literal) ? REX_NEST_terminator : (REX_NEST_escape|REX_NEST_terminator);
736 		while (s < e)
737 		{
738 			c = *s++;
739 			if (c == co)
740 				return s;
741 			else if (type[c] & n)
742 			{
743 				if (s >= e || (type[c] & REX_NEST_terminator))
744 					break;
745 				s++;
746 			}
747 		}
748 	}
749 	else
750 	{
751 		cc = type[co] >> REX_NEST_SHIFT;
752 		oc = type[co] & (REX_NEST_open|REX_NEST_close);
753 		n = 1;
754 		while (s < e)
755 		{
756 			c = *s++;
757 			switch (type[c] & (REX_NEST_escape|REX_NEST_open|REX_NEST_close|REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator))
758 			{
759 			case REX_NEST_delimiter:
760 			case REX_NEST_terminator:
761 				return oc ? 0 : s;
762 			case REX_NEST_separator:
763 				if (!oc)
764 					return s;
765 				break;
766 			case REX_NEST_escape:
767 				if (s >= e)
768 					return 0;
769 				s++;
770 				break;
771 			case REX_NEST_open|REX_NEST_close:
772 				if (c == cc)
773 				{
774 					if (!--n)
775 						return s;
776 				}
777 				/*FALLTHROUGH*/
778 			case REX_NEST_open:
779 				if (c == co)
780 				{
781 					if (!++n)
782 						return 0;
783 				}
784 				else if (!(s = nestmatch(s, e, type, c)))
785 					return 0;
786 				break;
787 			case REX_NEST_close:
788 				if (c != cc)
789 					return 0;
790 				if (!--n)
791 					return s;
792 				break;
793 			}
794 		}
795 		return (oc || !(type[UCHAR_MAX+1] & REX_NEST_terminator)) ? 0 : s;
796 	}
797 	return 0;
798 }
799 
800 static int
801 parse(Env_t* env, Rex_t* rex, Rex_t* cont, unsigned char* s)
802 {
803 	int		c;
804 	int		d;
805 	int		i;
806 	int		m;
807 	int		n;
808 	int		r;
809 	int*		f;
810 	unsigned char*	p;
811 	unsigned char*	t;
812 	unsigned char*	b;
813 	unsigned char*	e;
814 	char*		u;
815 	regmatch_t*	o;
816 	Trie_node_t*	x;
817 	Rex_t*		q;
818 	Rex_t		catcher;
819 	Rex_t		next;
820 
821 	for (;;)
822 	{
823 DEBUG_TEST(0x0008,(sfprintf(sfstdout, "AHA#%04d 0x%04x parse %s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), env->end - s, s)),(0));
824 		switch (rex->type)
825 		{
826 		case REX_ALT:
827 			if (env->stack)
828 			{
829 				if (matchpush(env, rex))
830 					return BAD;
831 				if (pospush(env, rex, s, BEG_ALT))
832 					return BAD;
833 				catcher.type = REX_ALT_CATCH;
834 				catcher.serial = rex->serial;
835 				catcher.re.alt_catch.cont = cont;
836 				catcher.next = rex->next;
837 				r = parse(env, rex->re.group.expr.binary.left, &catcher, s);
838 				if (r < BEST || (rex->flags & REG_MINIMAL))
839 				{
840 					matchcopy(env, rex);
841 					((Pos_t*)env->pos->vec + env->pos->cur - 1)->serial = catcher.serial = rex->re.group.expr.binary.serial;
842 					n = parse(env, rex->re.group.expr.binary.right, &catcher, s);
843 					if (n != NONE)
844 						r = n;
845 				}
846 				pospop(env);
847 				matchpop(env, rex);
848 			}
849 			else
850 			{
851 				if ((r = parse(env, rex->re.group.expr.binary.left, cont, s)) == NONE)
852 					r = parse(env, rex->re.group.expr.binary.right, cont, s);
853 				if (r == GOOD)
854 					r = BEST;
855 			}
856 			return r;
857 		case REX_ALT_CATCH:
858 			if (pospush(env, rex, s, END_ANY))
859 				return BAD;
860 			r = follow(env, rex, rex->re.alt_catch.cont, s);
861 			pospop(env);
862 			return r;
863 		case REX_BACK:
864 			o = &env->match[rex->lo];
865 			if (o->rm_so < 0)
866 				return NONE;
867 			i = o->rm_eo - o->rm_so;
868 			e = s + i;
869 			if (e > env->end)
870 				return NONE;
871 			t = env->beg + o->rm_so;
872 			if (!(p = rex->map))
873 			{
874 				while (s < e)
875 					if (*s++ != *t++)
876 						return NONE;
877 			}
878 			else if (!mbwide())
879 			{
880 				while (s < e)
881 					if (p[*s++] != p[*t++])
882 						return NONE;
883 			}
884 			else
885 			{
886 				while (s < e)
887 				{
888 					c = mbchar(s);
889 					d = mbchar(t);
890 					if (towupper(c) != towupper(d))
891 						return NONE;
892 				}
893 			}
894 			break;
895 		case REX_BEG:
896 			if ((!(rex->flags & REG_NEWLINE) || s <= env->beg || *(s - 1) != '\n') && ((env->flags & REG_NOTBOL) || s != env->beg))
897 				return NONE;
898 			break;
899 		case REX_CLASS:
900 			if (LEADING(env, rex, s))
901 				return NONE;
902 			n = rex->hi;
903 			if (n > env->end - s)
904 				n = env->end - s;
905 			m = rex->lo;
906 			if (m > n)
907 				return NONE;
908 			r = NONE;
909 			if (!(rex->flags & REG_MINIMAL))
910 			{
911 				for (i = 0; i < n; i++)
912 					if (!settst(rex->re.charclass, s[i]))
913 					{
914 						n = i;
915 						break;
916 					}
917 				for (s += n; n-- >= m; s--)
918 					switch (follow(env, rex, cont, s))
919 					{
920 					case BAD:
921 						return BAD;
922 					case CUT:
923 						return CUT;
924 					case BEST:
925 						return BEST;
926 					case GOOD:
927 						r = GOOD;
928 						break;
929 					}
930 			}
931 			else
932 			{
933 				for (e = s + m; s < e; s++)
934 					if (!settst(rex->re.charclass, *s))
935 						return r;
936 				e += n - m;
937 				for (;;)
938 				{
939 					switch (follow(env, rex, cont, s))
940 					{
941 					case BAD:
942 						return BAD;
943 					case CUT:
944 						return CUT;
945 					case BEST:
946 					case GOOD:
947 						return BEST;
948 					}
949 					if (s >= e || !settst(rex->re.charclass, *s))
950 						break;
951 					s++;
952 				}
953 			}
954 			return r;
955 		case REX_COLL_CLASS:
956 			if (LEADING(env, rex, s))
957 				return NONE;
958 			n = rex->hi;
959 			if (n > env->end - s)
960 				n = env->end - s;
961 			m = rex->lo;
962 			if (m > n)
963 				return NONE;
964 			r = NONE;
965 			e = env->end;
966 			if (!(rex->flags & REG_MINIMAL))
967 			{
968 				if (!(b = (unsigned char*)stkpush(stkstd, n)))
969 				{
970 					env->error = REG_ESPACE;
971 					return BAD;
972 				}
973 				for (i = 0; s < e && i < n && collmatch(rex, s, e, &t); i++)
974 				{
975 					b[i] = t - s;
976 					s = t;
977 				}
978 				for (; i-- >= rex->lo; s -= b[i])
979 					switch (follow(env, rex, cont, s))
980 					{
981 					case BAD:
982 						stkpop(stkstd);
983 						return BAD;
984 					case CUT:
985 						stkpop(stkstd);
986 						return CUT;
987 					case BEST:
988 						stkpop(stkstd);
989 						return BEST;
990 					case GOOD:
991 						r = GOOD;
992 						break;
993 					}
994 				stkpop(stkstd);
995 			}
996 			else
997 			{
998 				for (i = 0; i < m && s < e; i++, s = t)
999 					if (!collmatch(rex, s, e, &t))
1000 						return r;
1001 				while (i++ <= n)
1002 				{
1003 					switch (follow(env, rex, cont, s))
1004 					{
1005 					case BAD:
1006 						return BAD;
1007 					case CUT:
1008 						return CUT;
1009 					case BEST:
1010 					case GOOD:
1011 						return BEST;
1012 					}
1013 					if (s >= e || !collmatch(rex, s, e, &s))
1014 						break;
1015 				}
1016 			}
1017 			return r;
1018 		case REX_CONJ:
1019 			next.type = REX_CONJ_RIGHT;
1020 			next.re.conj_right.cont = cont;
1021 			next.next = rex->next;
1022 			catcher.type = REX_CONJ_LEFT;
1023 			catcher.re.conj_left.right = rex->re.group.expr.binary.right;
1024 			catcher.re.conj_left.cont = &next;
1025 			catcher.re.conj_left.beg = s;
1026 			catcher.next = 0;
1027 			return parse(env, rex->re.group.expr.binary.left, &catcher, s);
1028 		case REX_CONJ_LEFT:
1029 			rex->re.conj_left.cont->re.conj_right.end = s;
1030 			cont = rex->re.conj_left.cont;
1031 			s = rex->re.conj_left.beg;
1032 			rex = rex->re.conj_left.right;
1033 			continue;
1034 		case REX_CONJ_RIGHT:
1035 			if (rex->re.conj_right.end != s)
1036 				return NONE;
1037 			cont = rex->re.conj_right.cont;
1038 			break;
1039 		case REX_DONE:
1040 			if (!env->stack)
1041 				return BEST;
1042 			n = s - env->beg;
1043 			r = env->nsub;
1044 			DEBUG_TEST(0x0100,(sfprintf(sfstdout,"AHA#%04d 0x%04x %s (%d,%d)(%d,%d)(%d,%d)(%d,%d) (%d,%d)(%d,%d)\n", __LINE__, debug_flag, rexname(rex), env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->best[3].rm_so, env->best[3].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
1045 			if ((i = env->best[0].rm_eo) >= 0)
1046 			{
1047 				if (rex->flags & REG_MINIMAL)
1048 				{
1049 					if (n > i)
1050 						return GOOD;
1051 				}
1052 				else
1053 				{
1054 					if (n < i)
1055 						return GOOD;
1056 				}
1057 				if (n == i && better(env,
1058 						     (Pos_t*)env->bestpos->vec,
1059 				   		     (Pos_t*)env->pos->vec,
1060 				   		     (Pos_t*)env->bestpos->vec+env->bestpos->cur,
1061 				   		     (Pos_t*)env->pos->vec+env->pos->cur,
1062 						     0) <= 0)
1063 					return GOOD;
1064 			}
1065 			env->best[0].rm_eo = n;
1066 			memcpy(&env->best[1], &env->match[1], r * sizeof(regmatch_t));
1067 			n = env->pos->cur;
1068 			if (!vector(Pos_t, env->bestpos, n))
1069 			{
1070 				env->error = REG_ESPACE;
1071 				return BAD;
1072 			}
1073 			env->bestpos->cur = n;
1074 			memcpy(env->bestpos->vec, env->pos->vec, n * sizeof(Pos_t));
1075 			DEBUG_TEST(0x0100,(sfprintf(sfstdout,"AHA#%04d 0x%04x %s (%d,%d)(%d,%d)(%d,%d)(%d,%d) (%d,%d)(%d,%d)\n", __LINE__, debug_flag, rexname(rex), env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->best[3].rm_so, env->best[3].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
1076 			return GOOD;
1077 		case REX_DOT:
1078 			if (LEADING(env, rex, s))
1079 				return NONE;
1080 			n = rex->hi;
1081 			if (n > env->end - s)
1082 				n = env->end - s;
1083 			m = rex->lo;
1084 			if (m > n)
1085 				return NONE;
1086 			if ((c = rex->explicit) >= 0 && !mbwide())
1087 				for (i = 0; i < n; i++)
1088 					if (s[i] == c)
1089 					{
1090 						n = i;
1091 						break;
1092 					}
1093 			r = NONE;
1094 			if (!(rex->flags & REG_MINIMAL))
1095 			{
1096 				if (!mbwide())
1097 				{
1098 					for (s += n; n-- >= m; s--)
1099 						switch (follow(env, rex, cont, s))
1100 						{
1101 						case BAD:
1102 							return BAD;
1103 						case CUT:
1104 							return CUT;
1105 						case BEST:
1106 							return BEST;
1107 						case GOOD:
1108 							r = GOOD;
1109 							break;
1110 						}
1111 				}
1112 				else
1113 				{
1114 					if (!(b = (unsigned char*)stkpush(stkstd, n)))
1115 					{
1116 						env->error = REG_ESPACE;
1117 						return BAD;
1118 					}
1119 					e = env->end;
1120 					for (i = 0; s < e && i < n && *s != c; i++)
1121 						s += b[i] = MBSIZE(s);
1122 					for (; i-- >= m; s -= b[i])
1123 						switch (follow(env, rex, cont, s))
1124 						{
1125 						case BAD:
1126 							stkpop(stkstd);
1127 							return BAD;
1128 						case CUT:
1129 							stkpop(stkstd);
1130 							return CUT;
1131 						case BEST:
1132 							stkpop(stkstd);
1133 							return BEST;
1134 						case GOOD:
1135 							r = GOOD;
1136 							break;
1137 						}
1138 					stkpop(stkstd);
1139 				}
1140 			}
1141 			else
1142 			{
1143 				if (!mbwide())
1144 				{
1145 					e = s + n;
1146 					for (s += m; s <= e; s++)
1147 						switch (follow(env, rex, cont, s))
1148 						{
1149 						case BAD:
1150 							return BAD;
1151 						case CUT:
1152 							return CUT;
1153 						case BEST:
1154 						case GOOD:
1155 							return BEST;
1156 						}
1157 				}
1158 				else
1159 				{
1160 					e = env->end;
1161 					for (i = 0; s < e && i < m && *s != c; i++)
1162 						s += MBSIZE(s);
1163 					if (i >= m)
1164 						for (; s <= e && i <= n; s += MBSIZE(s), i++)
1165 							switch (follow(env, rex, cont, s))
1166 							{
1167 							case BAD:
1168 								return BAD;
1169 							case CUT:
1170 								return CUT;
1171 							case BEST:
1172 							case GOOD:
1173 								return BEST;
1174 							}
1175 				}
1176 			}
1177 			return r;
1178 		case REX_END:
1179 			if ((!(rex->flags & REG_NEWLINE) || *s != '\n') && ((env->flags & REG_NOTEOL) || s < env->end))
1180 				return NONE;
1181 			break;
1182 		case REX_GROUP:
1183 DEBUG_TEST(0x0200,(sfprintf(sfstdout,"AHA#%04d 0x%04x parse %s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), env->end - s, s)),(0));
1184 			if (env->stack)
1185 			{
1186 				if (rex->re.group.number)
1187 					env->match[rex->re.group.number].rm_so = s - env->beg;
1188 				if (pospush(env, rex, s, BEG_SUB))
1189 					return BAD;
1190 				catcher.re.group_catch.eo = rex->re.group.number ? &env->match[rex->re.group.number].rm_eo : (regoff_t*)0;
1191 			}
1192 			catcher.type = REX_GROUP_CATCH;
1193 			catcher.serial = rex->serial;
1194 			catcher.re.group_catch.cont = cont;
1195 			catcher.next = rex->next;
1196 			r = parse(env, rex->re.group.expr.rex, &catcher, s);
1197 			if (env->stack)
1198 			{
1199 				pospop(env);
1200 				if (rex->re.group.number)
1201 					env->match[rex->re.group.number].rm_so = -1;
1202 			}
1203 			return r;
1204 		case REX_GROUP_CATCH:
1205 DEBUG_TEST(0x0200,(sfprintf(sfstdout,"AHA#%04d 0x%04x parse %s=>%s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), rexname(rex->re.group_catch.cont), env->end - s, s)),(0));
1206 			if (env->stack)
1207 			{
1208 				if (rex->re.group_catch.eo)
1209 					*rex->re.group_catch.eo = s - env->beg;
1210 				if (pospush(env, rex, s, END_ANY))
1211 					return BAD;
1212 			}
1213 			r = follow(env, rex, rex->re.group_catch.cont, s);
1214 			if (env->stack)
1215 			{
1216 				pospop(env);
1217 				if (rex->re.group_catch.eo)
1218 					*rex->re.group_catch.eo = -1;
1219 			}
1220 			return r;
1221 		case REX_GROUP_AHEAD:
1222 			catcher.type = REX_GROUP_AHEAD_CATCH;
1223 			catcher.flags = rex->flags;
1224 			catcher.serial = rex->serial;
1225 			catcher.re.rep_catch.beg = s;
1226 			catcher.re.rep_catch.cont = cont;
1227 			catcher.next = rex->next;
1228 			return parse(env, rex->re.group.expr.rex, &catcher, s);
1229 		case REX_GROUP_AHEAD_CATCH:
1230 			return follow(env, rex, rex->re.rep_catch.cont, rex->re.rep_catch.beg);
1231 		case REX_GROUP_AHEAD_NOT:
1232 			r = parse(env, rex->re.group.expr.rex, NiL, s);
1233 			if (r == NONE)
1234 				r = follow(env, rex, cont, s);
1235 			else if (r != BAD)
1236 				r = NONE;
1237 			return r;
1238 		case REX_GROUP_BEHIND:
1239 			if ((s - env->beg) < rex->re.group.size)
1240 				return NONE;
1241 			catcher.type = REX_GROUP_BEHIND_CATCH;
1242 			catcher.flags = rex->flags;
1243 			catcher.serial = rex->serial;
1244 			catcher.re.behind_catch.beg = s;
1245 			catcher.re.behind_catch.end = e = env->end;
1246 			catcher.re.behind_catch.cont = cont;
1247 			catcher.next = rex->next;
1248 			for (t = s - rex->re.group.size; t >= env->beg; t--)
1249 			{
1250 				env->end = s;
1251 				r = parse(env, rex->re.group.expr.rex, &catcher, t);
1252 				env->end = e;
1253 				if (r != NONE)
1254 					return r;
1255 			}
1256 			return NONE;
1257 		case REX_GROUP_BEHIND_CATCH:
1258 			if (s != rex->re.behind_catch.beg)
1259 				return NONE;
1260 			env->end = rex->re.behind_catch.end;
1261 			return follow(env, rex, rex->re.behind_catch.cont, rex->re.behind_catch.beg);
1262 		case REX_GROUP_BEHIND_NOT:
1263 			if ((s - env->beg) < rex->re.group.size)
1264 				r = NONE;
1265 			else
1266 			{
1267 				catcher.type = REX_GROUP_BEHIND_NOT_CATCH;
1268 				catcher.re.neg_catch.beg = s;
1269 				catcher.next = 0;
1270 				e = env->end;
1271 				env->end = s;
1272 				for (t = s - rex->re.group.size; t >= env->beg; t--)
1273 				{
1274 					r = parse(env, rex->re.group.expr.rex, &catcher, t);
1275 					if (r != NONE)
1276 						break;
1277 				}
1278 				env->end = e;
1279 			}
1280 			if (r == NONE)
1281 				r = follow(env, rex, cont, s);
1282 			else if (r != BAD)
1283 				r = NONE;
1284 			return r;
1285 		case REX_GROUP_BEHIND_NOT_CATCH:
1286 			return s == rex->re.neg_catch.beg ? GOOD : NONE;
1287 		case REX_GROUP_COND:
1288 			if (q = rex->re.group.expr.binary.right)
1289 			{
1290 				catcher.re.cond_catch.next[0] = q->re.group.expr.binary.right;
1291 				catcher.re.cond_catch.next[1] = q->re.group.expr.binary.left;
1292 			}
1293 			else
1294 				catcher.re.cond_catch.next[0] = catcher.re.cond_catch.next[1] = 0;
1295 			if (q = rex->re.group.expr.binary.left)
1296 			{
1297 				catcher.type = REX_GROUP_COND_CATCH;
1298 				catcher.flags = rex->flags;
1299 				catcher.serial = rex->serial;
1300 				catcher.re.cond_catch.yes = 0;
1301 				catcher.re.cond_catch.beg = s;
1302 				catcher.re.cond_catch.cont = cont;
1303 				catcher.next = rex->next;
1304 				r = parse(env, q, &catcher, s);
1305 				if (r == BAD || catcher.re.cond_catch.yes)
1306 					return r;
1307 			}
1308 			else if (!rex->re.group.size || rex->re.group.size > 0 && env->match[rex->re.group.size].rm_so >= 0)
1309 				r = GOOD;
1310 			else
1311 				r = NONE;
1312 			if (q = catcher.re.cond_catch.next[r != NONE])
1313 			{
1314 				catcher.type = REX_CAT;
1315 				catcher.flags = q->flags;
1316 				catcher.serial = q->serial;
1317 				catcher.re.group_catch.cont = cont;
1318 				catcher.next = rex->next;
1319 				return parse(env, q, &catcher, s);
1320 			}
1321 			return follow(env, rex, cont, s);
1322 		case REX_GROUP_COND_CATCH:
1323 			rex->re.cond_catch.yes = 1;
1324 			catcher.type = REX_CAT;
1325 			catcher.flags = rex->flags;
1326 			catcher.serial = rex->serial;
1327 			catcher.re.group_catch.cont = rex->re.cond_catch.cont;
1328 			catcher.next = rex->next;
1329 			return parse(env, rex->re.cond_catch.next[1], &catcher, rex->re.cond_catch.beg);
1330 		case REX_CAT:
1331 			return follow(env, rex, rex->re.group_catch.cont, s);
1332 		case REX_GROUP_CUT:
1333 			catcher.type = REX_GROUP_CUT_CATCH;
1334 			catcher.flags = rex->flags;
1335 			catcher.serial = rex->serial;
1336 			catcher.re.group_catch.cont = cont;
1337 			catcher.next = rex->next;
1338 			return parse(env, rex->re.group.expr.rex, &catcher, s);
1339 		case REX_GROUP_CUT_CATCH:
1340 			switch (r = follow(env, rex, rex->re.group_catch.cont, s))
1341 			{
1342 			case GOOD:
1343 				r = BEST;
1344 				break;
1345 			case NONE:
1346 				r = CUT;
1347 				break;
1348 			}
1349 			return r;
1350 		case REX_KMP:
1351 			f = rex->re.string.fail;
1352 			b = rex->re.string.base;
1353 			n = rex->re.string.size;
1354 			t = s;
1355 			e = env->end;
1356 			if (p = rex->map)
1357 			{
1358 				while (t + n <= e)
1359 				{
1360 					for (i = -1; t < e; t++)
1361 					{
1362 						while (i >= 0 && b[i+1] != p[*t])
1363 							i = f[i];
1364 						if (b[i+1] == p[*t])
1365 							i++;
1366 						if (i + 1 == n)
1367 						{
1368 							t++;
1369 							if (env->stack)
1370 								env->best[0].rm_so = t - s - n;
1371 							switch (follow(env, rex, cont, t))
1372 							{
1373 							case BAD:
1374 								return BAD;
1375 							case CUT:
1376 								return CUT;
1377 							case BEST:
1378 							case GOOD:
1379 								return BEST;
1380 							}
1381 							t -= n - 1;
1382 							break;
1383 						}
1384 					}
1385 				}
1386 			}
1387 			else
1388 			{
1389 				while (t + n <= e)
1390 				{
1391 					for (i = -1; t < e; t++)
1392 					{
1393 						while (i >= 0 && b[i+1] != *t)
1394 							i = f[i];
1395 						if (b[i+1] == *t)
1396 							i++;
1397 						if (i + 1 == n)
1398 						{
1399 							t++;
1400 							if (env->stack)
1401 								env->best[0].rm_so = t - s - n;
1402 							switch (follow(env, rex, cont, t))
1403 							{
1404 							case BAD:
1405 								return BAD;
1406 							case CUT:
1407 								return CUT;
1408 							case BEST:
1409 							case GOOD:
1410 								return BEST;
1411 							}
1412 							t -= n - 1;
1413 							break;
1414 						}
1415 					}
1416 				}
1417 			}
1418 			return NONE;
1419 		case REX_NEG:
1420 			if (LEADING(env, rex, s))
1421 				return NONE;
1422 			i = env->end - s;
1423 			n = ((i + 7) >> 3) + 1;
1424 			catcher.type = REX_NEG_CATCH;
1425 			catcher.re.neg_catch.beg = s;
1426 			if (!(p = (unsigned char*)stkpush(stkstd, n)))
1427 				return BAD;
1428 			memset(catcher.re.neg_catch.index = p, 0, n);
1429 			catcher.next = rex->next;
1430 			if (parse(env, rex->re.group.expr.rex, &catcher, s) == BAD)
1431 				r = BAD;
1432 			else
1433 			{
1434 				r = NONE;
1435 				for (; i >= 0; i--)
1436 					if (!bittst(p, i))
1437 					{
1438 						switch (follow(env, rex, cont, s + i))
1439 						{
1440 						case BAD:
1441 							r = BAD;
1442 							break;
1443 						case BEST:
1444 							r = BEST;
1445 							break;
1446 						case CUT:
1447 							r = CUT;
1448 							break;
1449 						case GOOD:
1450 							r = GOOD;
1451 							/*FALLTHROUGH*/
1452 						default:
1453 							continue;
1454 						}
1455 						break;
1456 					}
1457 			}
1458 			stkpop(stkstd);
1459 			return r;
1460 		case REX_NEG_CATCH:
1461 			bitset(rex->re.neg_catch.index, s - rex->re.neg_catch.beg);
1462 			return NONE;
1463 		case REX_NEST:
1464 			do
1465 			{
1466 				if ((c = *s++) == rex->re.nest.primary)
1467 				{
1468 					if (s >= env->end || !(s = nestmatch(s, env->end, rex->re.nest.type, c)))
1469 						return NONE;
1470 					break;
1471 				}
1472 				if (rex->re.nest.primary >= 0)
1473 					return NONE;
1474 			    	if (rex->re.nest.type[c] & (REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator))
1475 					break;
1476 			    	if (!(s = nestmatch(s, env->end, rex->re.nest.type, c)))
1477 					return NONE;
1478 			} while (s < env->end && !(rex->re.nest.type[*(s-1)] & (REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator)));
1479 			break;
1480 		case REX_NULL:
1481 			break;
1482 		case REX_ONECHAR:
1483 			n = rex->hi;
1484 			if (n > env->end - s)
1485 				n = env->end - s;
1486 			m = rex->lo;
1487 			if (m > n)
1488 				return NONE;
1489 			r = NONE;
1490 			c = rex->re.onechar;
1491 			if (!(rex->flags & REG_MINIMAL))
1492 			{
1493 				if (!mbwide())
1494 				{
1495 					if (p = rex->map)
1496 					{
1497 						for (i = 0; i < n; i++, s++)
1498 							if (p[*s] != c)
1499 								break;
1500 					}
1501 					else
1502 					{
1503 						for (i = 0; i < n; i++, s++)
1504 							if (*s != c)
1505 								break;
1506 					}
1507 					for (; i-- >= m; s--)
1508 						switch (follow(env, rex, cont, s))
1509 						{
1510 						case BAD:
1511 							return BAD;
1512 						case BEST:
1513 							return BEST;
1514 						case CUT:
1515 							return CUT;
1516 						case GOOD:
1517 							r = GOOD;
1518 							break;
1519 						}
1520 				}
1521 				else
1522 				{
1523 					if (!(b = (unsigned char*)stkpush(stkstd, n)))
1524 					{
1525 						env->error = REG_ESPACE;
1526 						return BAD;
1527 					}
1528 					e = env->end;
1529 					if (!(rex->flags & REG_ICASE))
1530 					{
1531 						for (i = 0; s < e && i < n; i++, s = t)
1532 						{
1533 							t = s;
1534 							if (mbchar(t) != c)
1535 								break;
1536 							b[i] = t - s;
1537 						}
1538 					}
1539 					else
1540 					{
1541 						for (i = 0; s < e && i < n; i++, s = t)
1542 						{
1543 							t = s;
1544 							if (towupper(mbchar(t)) != c)
1545 								break;
1546 							b[i] = t - s;
1547 						}
1548 					}
1549 					for (; i-- >= m; s -= b[i])
1550 						switch (follow(env, rex, cont, s))
1551 						{
1552 						case BAD:
1553 							stkpop(stkstd);
1554 							return BAD;
1555 						case BEST:
1556 							stkpop(stkstd);
1557 							return BEST;
1558 						case CUT:
1559 							stkpop(stkstd);
1560 							return CUT;
1561 						case GOOD:
1562 							r = GOOD;
1563 							break;
1564 						}
1565 					stkpop(stkstd);
1566 				}
1567 			}
1568 			else
1569 			{
1570 				if (!mbwide())
1571 				{
1572 					e = s + m;
1573 					if (p = rex->map)
1574 					{
1575 						for (; s < e; s++)
1576 							if (p[*s] != c)
1577 								return r;
1578 						e += n - m;
1579 						for (;;)
1580 						{
1581 							switch (follow(env, rex, cont, s))
1582 							{
1583 							case BAD:
1584 								return BAD;
1585 							case CUT:
1586 								return CUT;
1587 							case BEST:
1588 							case GOOD:
1589 								return BEST;
1590 							}
1591 							if (s >= e || p[*s++] != c)
1592 								break;
1593 						}
1594 					}
1595 					else
1596 					{
1597 						for (; s < e; s++)
1598 							if (*s != c)
1599 								return r;
1600 						e += n - m;
1601 						for (;;)
1602 						{
1603 							switch (follow(env, rex, cont, s))
1604 							{
1605 							case BAD:
1606 								return BAD;
1607 							case CUT:
1608 								return CUT;
1609 							case BEST:
1610 							case GOOD:
1611 								return BEST;
1612 							}
1613 							if (s >= e || *s++ != c)
1614 								break;
1615 						}
1616 					}
1617 				}
1618 				else
1619 				{
1620 					if (!(rex->flags & REG_ICASE))
1621 					{
1622 						for (i = 0; i < m && s < e; i++, s = t)
1623 						{
1624 							t = s;
1625 							if (mbchar(t) != c)
1626 								return r;
1627 						}
1628 						while (i++ <= n)
1629 						{
1630 							switch (follow(env, rex, cont, s))
1631 							{
1632 							case BAD:
1633 								return BAD;
1634 							case CUT:
1635 								return CUT;
1636 							case BEST:
1637 							case GOOD:
1638 								return BEST;
1639 							}
1640 							if (s >= e || mbchar(s) != c)
1641 								break;
1642 						}
1643 					}
1644 					else
1645 					{
1646 						for (i = 0; i < m && s < e; i++, s = t)
1647 						{
1648 							t = s;
1649 							if (towupper(mbchar(t)) != c)
1650 								return r;
1651 						}
1652 						while (i++ <= n)
1653 						{
1654 							switch (follow(env, rex, cont, s))
1655 							{
1656 							case BAD:
1657 								return BAD;
1658 							case CUT:
1659 								return CUT;
1660 							case BEST:
1661 							case GOOD:
1662 								return BEST;
1663 							}
1664 							if (s >= e || towupper(mbchar(s)) != c)
1665 								break;
1666 						}
1667 					}
1668 				}
1669 			}
1670 			return r;
1671 		case REX_REP:
1672 			if (env->stack && pospush(env, rex, s, BEG_REP))
1673 				return BAD;
1674 			r = parserep(env, rex, cont, s, 0);
1675 			if (env->stack)
1676 				pospop(env);
1677 			return r;
1678 		case REX_REP_CATCH:
1679 			DEBUG_TEST(0x0020,(sfprintf(sfstdout, "AHA#%04d 0x%04x %s n %d len %d s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), rex->re.rep_catch.n, s - rex->re.rep_catch.beg, env->end - s, s)),(0));
1680 			if (env->stack && pospush(env, rex, s, END_ANY))
1681 				return BAD;
1682 			if (s == rex->re.rep_catch.beg && rex->re.rep_catch.n > rex->re.rep_catch.ref->lo)
1683 			{
1684 				/*
1685 				 * optional empty iteration
1686 				 */
1687 
1688 DEBUG_TEST(0x0002,(sfprintf(sfstdout, "AHA#%04d %p re.group.back=%d re.group.expr.rex=%s\n", __LINE__, rex->re.rep_catch.ref->re.group.expr.rex, rex->re.rep_catch.ref->re.group.expr.rex->re.group.back, rexname(rex->re.rep_catch.ref->re.group.expr.rex))),(0));
1689 				if (!env->stack || s != rex->re.rep_catch.ref->re.rep_catch.beg && !rex->re.rep_catch.ref->re.group.expr.rex->re.group.back)
1690 					r = NONE;
1691 				else if (pospush(env, rex, s, END_ANY))
1692 					r = BAD;
1693 				else
1694 				{
1695 					r = follow(env, rex, rex->re.rep_catch.cont, s);
1696 					pospop(env);
1697 				}
1698 			}
1699 			else
1700 				r = parserep(env, rex->re.rep_catch.ref, rex->re.rep_catch.cont, s, rex->re.rep_catch.n);
1701 			if (env->stack)
1702 				pospop(env);
1703 			return r;
1704 		case REX_STRING:
1705 DEBUG_TEST(0x0200,(sfprintf(sfstdout,"AHA#%04d 0x%04x parse %s \"%-.*s\" `%-.*s'\n", __LINE__, debug_flag, rexname(rex), rex->re.string.size, rex->re.string.base, env->end - s, s)),(0));
1706 			if (rex->re.string.size > (env->end - s))
1707 				return NONE;
1708 			t = rex->re.string.base;
1709 			e = t + rex->re.string.size;
1710 			if (!(p = rex->map))
1711 			{
1712 				while (t < e)
1713 					if (*s++ != *t++)
1714 						return NONE;
1715 			}
1716 			else if (!mbwide())
1717 			{
1718 				while (t < e)
1719 					if (p[*s++] != *t++)
1720 						return NONE;
1721 			}
1722 			else
1723 			{
1724 				while (t < e)
1725 				{
1726 					c = mbchar(s);
1727 					d = mbchar(t);
1728 					if (towupper(c) != d)
1729 						return NONE;
1730 				}
1731 			}
1732 			break;
1733 		case REX_TRIE:
1734 			if (((s + rex->re.trie.min) > env->end) || !(x = rex->re.trie.root[rex->map ? rex->map[*s] : *s]))
1735 				return NONE;
1736 			return parsetrie(env, x, rex, cont, s);
1737 		case REX_EXEC:
1738 			u = 0;
1739 			r = (*env->disc->re_execf)(env->regex, rex->re.exec.data, rex->re.exec.text, rex->re.exec.size, (const char*)s, env->end - s, &u, env->disc);
1740 			e = (unsigned char*)u;
1741 			if (e >= s && e <= env->end)
1742 				s = e;
1743 			switch (r)
1744 			{
1745 			case 0:
1746 				break;
1747 			case REG_NOMATCH:
1748 				return NONE;
1749 			default:
1750 				env->error = r;
1751 				return BAD;
1752 			}
1753 			break;
1754 		case REX_WBEG:
1755 			if (!isword(*s) || s > env->beg && isword(*(s - 1)))
1756 				return NONE;
1757 			break;
1758 		case REX_WEND:
1759 			if (isword(*s) || s > env->beg && !isword(*(s - 1)))
1760 				return NONE;
1761 			break;
1762 		case REX_WORD:
1763 			if (s > env->beg && isword(*(s - 1)) == isword(*s))
1764 				return NONE;
1765 			break;
1766 		case REX_WORD_NOT:
1767 			if (s == env->beg || isword(*(s - 1)) != isword(*s))
1768 				return NONE;
1769 			break;
1770 		case REX_BEG_STR:
1771 			if (s != env->beg)
1772 				return NONE;
1773 			break;
1774 		case REX_END_STR:
1775 			for (t = s; t < env->end && *t == '\n'; t++);
1776 			if (t < env->end)
1777 				return NONE;
1778 			break;
1779 		case REX_FIN_STR:
1780 			if (s < env->end)
1781 				return NONE;
1782 			break;
1783 		}
1784 		if (!(rex = rex->next))
1785 		{
1786 			if (!(rex = cont))
1787 				break;
1788 			cont = 0;
1789 		}
1790 	}
1791 	return GOOD;
1792 }
1793 
1794 #if _AST_REGEX_DEBUG
1795 
1796 static void
1797 listnode(Rex_t* e, int level)
1798 {
1799 	int	i;
1800 
1801 	if (e)
1802 	{
1803 		do
1804 		{
1805 			for (i = 0; i < level; i++)
1806 				sfprintf(sfstderr, "  ");
1807 			sfprintf(sfstderr, "%s\n", rexname(e));
1808 			switch (e->type)
1809 			{
1810 			case REX_ALT:
1811 			case REX_CONJ:
1812 				listnode(e->re.group.expr.binary.left, level + 1);
1813 				listnode(e->re.group.expr.binary.right, level + 1);
1814 				break;
1815 			case REX_GROUP:
1816 			case REX_GROUP_AHEAD:
1817 			case REX_GROUP_AHEAD_NOT:
1818 			case REX_GROUP_BEHIND:
1819 			case REX_GROUP_BEHIND_NOT:
1820 			case REX_GROUP_CUT:
1821 			case REX_NEG:
1822 			case REX_REP:
1823 				listnode(e->re.group.expr.rex, level + 1);
1824 				break;
1825 			}
1826 		} while (e = e->next);
1827 	}
1828 }
1829 
1830 static int
1831 list(Env_t* env, Rex_t* rex)
1832 {
1833 	sfprintf(sfstderr, "AHA regex hard=%d stack=%p\n", env->hard, env->stack);
1834 	if (rex)
1835 		listnode(rex, 1);
1836 	return 0;
1837 }
1838 
1839 #endif
1840 
1841 /*
1842  * returning REG_BADPAT or REG_ESPACE is not explicitly
1843  * countenanced by the standard
1844  */
1845 
1846 int
1847 regnexec(const regex_t* p, const char* s, size_t len, size_t nmatch, regmatch_t* match, regflags_t flags)
1848 {
1849 	register int	n;
1850 	register int	i;
1851 	int		j;
1852 	int		k;
1853 	int		m;
1854 	int		advance;
1855 	Env_t*		env;
1856 	Rex_t*		e;
1857 
1858 	DEBUG_INIT();
1859 	DEBUG_TEST(0x0001,(sfprintf(sfstdout, "AHA#%04d 0x%04x regnexec %d 0x%08x `%-.*s'\n", __LINE__, debug_flag, nmatch, flags, len, s)),(0));
1860 	if (!p || !(env = p->env))
1861 		return REG_BADPAT;
1862 	if (!s)
1863 		return fatal(env->disc, REG_BADPAT, NiL);
1864 	if (len < env->min)
1865 	{
1866 		DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REG_NOMATCH %d %d\n", __LINE__, len, env->min)),(0));
1867 		return REG_NOMATCH;
1868 	}
1869 	env->regex = p;
1870 	env->beg = (unsigned char*)s;
1871 	env->end = env->beg + len;
1872 	stknew(stkstd, &env->stk);
1873 	env->flags &= ~REG_EXEC;
1874 	env->flags |= (flags & REG_EXEC);
1875 	advance = 0;
1876 	if (env->stack = env->hard || !(env->flags & REG_NOSUB) && nmatch)
1877 	{
1878 		n = env->nsub;
1879 		if (!(env->match = (regmatch_t*)stkpush(stkstd, 2 * (n + 1) * sizeof(regmatch_t))) ||
1880 		    !env->pos && !(env->pos = vecopen(16, sizeof(Pos_t))) ||
1881 		    !env->bestpos && !(env->bestpos = vecopen(16, sizeof(Pos_t))))
1882 		{
1883 			k = REG_ESPACE;
1884 			goto done;
1885 		}
1886 		env->pos->cur = env->bestpos->cur = 0;
1887 		env->best = &env->match[n + 1];
1888 		env->best[0].rm_so = 0;
1889 		env->best[0].rm_eo = -1;
1890 		for (i = 0; i <= n; i++)
1891 			env->match[i] = state.nomatch;
1892 		if (flags & REG_ADVANCE)
1893 			advance = 1;
1894 	}
1895 	DEBUG_TEST(0x1000,(list(env,env->rex)),(0));
1896 	k = REG_NOMATCH;
1897 	if ((e = env->rex)->type == REX_BM)
1898 	{
1899 		DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REX_BM\n", __LINE__)),(0));
1900 		if (len < e->re.bm.right)
1901 		{
1902 			DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REG_NOMATCH %d %d\n", __LINE__, len, e->re.bm.right)),(0));
1903 			goto done;
1904 		}
1905 		else if (!(flags & REG_LEFT))
1906 		{
1907 			register unsigned char*	buf = (unsigned char*)s;
1908 			register size_t		index = e->re.bm.left + e->re.bm.size;
1909 			register size_t		mid = len - e->re.bm.right;
1910 			register size_t*	skip = e->re.bm.skip;
1911 			register size_t*	fail = e->re.bm.fail;
1912 			register Bm_mask_t**	mask = e->re.bm.mask;
1913 			Bm_mask_t		m;
1914 			size_t			x;
1915 
1916 			DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REX_BM len=%d right=%d left=%d size=%d %d %d\n", __LINE__, len, e->re.bm.right, e->re.bm.left, e->re.bm.size, index, mid)),(0));
1917 			for (;;)
1918 			{
1919 				while ((index += skip[buf[index]]) < mid);
1920 				if (index < HIT)
1921 				{
1922 					DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REG_NOMATCH %d %d\n", __LINE__, index, HIT)),(0));
1923 					goto done;
1924 				}
1925 				index -= HIT;
1926 				m = mask[n = e->re.bm.size - 1][buf[index]];
1927 				do
1928 				{
1929 					if (!n--)
1930 					{
1931 						if (e->re.bm.back < 0)
1932 							goto possible;
1933 						if (advance)
1934 						{
1935 							i = index - e->re.bm.back;
1936 							s += i;
1937 							if (env->stack)
1938 								env->best[0].rm_so += i;
1939 							goto possible;
1940 						}
1941 						x = index;
1942 						if (index < e->re.bm.back)
1943 							index = 0;
1944 						else
1945 							index -= e->re.bm.back;
1946 						while (index <= x)
1947 						{
1948 							if ((i = parse(env, e->next, &env->done, buf + index)) != NONE)
1949 							{
1950 								if (env->stack)
1951 									env->best[0].rm_so = index;
1952 								n = env->nsub;
1953 								goto hit;
1954 							}
1955 							index++;
1956 						}
1957 						index += e->re.bm.size;
1958 						break;
1959 					}
1960 				} while (m &= mask[n][buf[--index]]);
1961 				if ((index += fail[n + 1]) >= len)
1962 					goto done;
1963 			}
1964 		}
1965  possible:
1966 		n = env->nsub;
1967 		e = e->next;
1968 	}
1969 	j = env->once || (flags & REG_LEFT);
1970 	DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d parse once=%d\n", __LINE__, j)),(0));
1971 	while ((i = parse(env, e, &env->done, (unsigned char*)s)) == NONE || advance && !env->best[0].rm_eo && !(advance = 0))
1972 	{
1973 		if (j)
1974 			goto done;
1975 		i = MBSIZE(s);
1976 		s += i;
1977 		if ((unsigned char*)s > env->end - env->min)
1978 			goto done;
1979 		if (env->stack)
1980 			env->best[0].rm_so += i;
1981 	}
1982 	if ((flags & REG_LEFT) && env->stack && env->best[0].rm_so)
1983 		goto done;
1984  hit:
1985 	if (k = env->error)
1986 		goto done;
1987 	if (i == CUT)
1988 	{
1989 		k = env->error = REG_NOMATCH;
1990 		goto done;
1991 	}
1992 	if (!(env->flags & REG_NOSUB))
1993 	{
1994 		k = (env->flags & (REG_SHELL|REG_AUGMENTED)) == (REG_SHELL|REG_AUGMENTED);
1995 		for (i = j = m = 0; j < nmatch; i++)
1996 			if (!i || !k || (i & 1))
1997 			{
1998 				if (i > n)
1999 					match[j] = state.nomatch;
2000 				else
2001 					match[m = j] = env->best[i];
2002 				j++;
2003 			}
2004 		if (k)
2005 		{
2006 			while (m > 0 && match[m].rm_so == -1 && match[m].rm_eo == -1)
2007 				m--;
2008 			((regex_t*)p)->re_nsub = m;
2009 		}
2010 	}
2011 	k = 0;
2012  done:
2013 	stkold(stkstd, &env->stk);
2014 	env->stk.base = 0;
2015 	if (k > REG_NOMATCH)
2016 		fatal(p->env->disc, k, NiL);
2017 	return k;
2018 }
2019 
2020 void
2021 regfree(regex_t* p)
2022 {
2023 	Env_t*	env;
2024 
2025 	if (p && (env = p->env))
2026 	{
2027 #if _REG_subcomp
2028 		if (env->sub)
2029 		{
2030 			regsubfree(p);
2031 			p->re_sub = 0;
2032 		}
2033 #endif
2034 		p->env = 0;
2035 		if (--env->refs <= 0 && !(env->disc->re_flags & REG_NOFREE))
2036 		{
2037 			drop(env->disc, env->rex);
2038 			if (env->pos)
2039 				vecclose(env->pos);
2040 			if (env->bestpos)
2041 				vecclose(env->bestpos);
2042 			if (env->stk.base)
2043 				stkold(stkstd, &env->stk);
2044 			alloc(env->disc, env, 0);
2045 		}
2046 	}
2047 }
2048