xref: /titanic_50/usr/src/lib/libast/common/regex/regnexec.c (revision bfed486ad8de8b8ebc6345a8e10accae08bf2f45)
1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *          Copyright (c) 1985-2008 AT&T Intellectual Property          *
5 *                      and is licensed under the                       *
6 *                  Common Public License, Version 1.0                  *
7 *                    by AT&T Intellectual Property                     *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *            http://www.opensource.org/licenses/cpl1.0.txt             *
11 *         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *                 Glenn Fowler <gsf@research.att.com>                  *
18 *                  David Korn <dgk@research.att.com>                   *
19 *                   Phong Vo <kpv@research.att.com>                    *
20 *                                                                      *
21 ***********************************************************************/
22 #pragma prototyped
23 
24 /*
25  * posix regex executor
26  * single sized-record interface
27  */
28 
29 #include "reglib.h"
30 
31 #if _AST_REGEX_DEBUG
32 
33 #define DEBUG_TEST(f,y,n)	((debug&(debug_flag=f))?(y):(n))
34 #define DEBUG_CODE(f,y,n)	do if(debug&(f)){y}else{n} while(0)
35 #define DEBUG_INIT()		do { char* t; if (!debug) { debug = 0x80000000; if (t = getenv("_AST_regex_exec_debug")) debug |= strtoul(t, NiL, 0); } } while (0)
36 
37 static unsigned long	debug;
38 static unsigned long	debug_flag;
39 
40 static const char*	rexnames[] =
41 {
42 	"REX_NULL",
43 	"REX_ALT",
44 	"REX_ALT_CATCH",
45 	"REX_BACK",
46 	"REX_BEG",
47 	"REX_BEG_STR",
48 	"REX_BM",
49 	"REX_CAT",
50 	"REX_CLASS",
51 	"REX_COLL_CLASS",
52 	"REX_CONJ",
53 	"REX_CONJ_LEFT",
54 	"REX_CONJ_RIGHT",
55 	"REX_DONE",
56 	"REX_DOT",
57 	"REX_END",
58 	"REX_END_STR",
59 	"REX_EXEC",
60 	"REX_FIN_STR",
61 	"REX_GROUP",
62 	"REX_GROUP_CATCH",
63 	"REX_GROUP_AHEAD",
64 	"REX_GROUP_AHEAD_CATCH",
65 	"REX_GROUP_AHEAD_NOT",
66 	"REX_GROUP_BEHIND",
67 	"REX_GROUP_BEHIND_CATCH",
68 	"REX_GROUP_BEHIND_NOT",
69 	"REX_GROUP_BEHIND_NOT_CATCH",
70 	"REX_GROUP_COND",
71 	"REX_GROUP_COND_CATCH",
72 	"REX_GROUP_CUT",
73 	"REX_GROUP_CUT_CATCH",
74 	"REX_KMP",
75 	"REX_NEG",
76 	"REX_NEG_CATCH",
77 	"REX_NEST",
78 	"REX_ONECHAR",
79 	"REX_REP",
80 	"REX_REP_CATCH",
81 	"REX_STRING",
82 	"REX_TRIE",
83 	"REX_WBEG",
84 	"REX_WEND",
85 	"REX_WORD",
86 	"REX_WORD_NOT"
87 };
88 
89 static const char* rexname(Rex_t* rex)
90 {
91 	if (!rex)
92 		return "NIL";
93 	if (rex->type >= elementsof(rexnames))
94 		return "ERROR";
95 	return rexnames[rex->type];
96 }
97 
98 #else
99 
100 #define DEBUG_INIT()
101 #define DEBUG_TEST(f,y,n)	(n)
102 #define DEBUG_CODE(f,y,n)	do {n} while(0)
103 
104 #endif
105 
106 #define BEG_ALT		1	/* beginning of an alt			*/
107 #define BEG_ONE		2	/* beginning of one iteration of a rep	*/
108 #define BEG_REP		3	/* beginning of a repetition		*/
109 #define BEG_SUB		4	/* beginning of a subexpression		*/
110 #define END_ANY		5	/* end of any of above			*/
111 
112 /*
113  * returns from parse()
114  */
115 
116 #define NONE		0	/* no parse found			*/
117 #define GOOD		1	/* some parse was found			*/
118 #define CUT		2	/* no match and no backtrack		*/
119 #define BEST		3	/* an unbeatable parse was found	*/
120 #define BAD		4	/* error ocurred			*/
121 
122 /*
123  * REG_SHELL_DOT test
124  */
125 
126 #define LEADING(e,r,s)	(*(s)==(e)->leading&&((s)==(e)->beg||*((s)-1)==(r)->explicit))
127 
128 /*
129  * Pos_t is for comparing parses. An entry is made in the
130  * array at the beginning and at the end of each Group_t,
131  * each iteration in a Group_t, and each Binary_t.
132  */
133 
134 typedef struct
135 {
136 	unsigned char*	p;		/* where in string		*/
137 	size_t		length;		/* length in string		*/
138 	short		serial;		/* preorder subpattern number	*/
139 	short		be;		/* which end of pair		*/
140 } Pos_t;
141 
142 /* ===== begin library support ===== */
143 
144 #define vector(t,v,i)	(((i)<(v)->max)?(t*)((v)->vec+(i)*(v)->siz):(t*)vecseek(&(v),i))
145 
146 static Vector_t*
147 vecopen(int inc, int siz)
148 {
149 	Vector_t*	v;
150 	Stk_t*		sp;
151 
152 	if (inc <= 0)
153 		inc = 16;
154 	if (!(sp = stkopen(STK_SMALL|STK_NULL)))
155 		return 0;
156 	if (!(v = (Vector_t*)stkseek(sp, sizeof(Vector_t) + inc * siz)))
157 	{
158 		stkclose(sp);
159 		return 0;
160 	}
161 	v->stk = sp;
162 	v->vec = (char*)v + sizeof(Vector_t);
163 	v->max = v->inc = inc;
164 	v->siz = siz;
165 	v->cur = 0;
166 	return v;
167 }
168 
169 static void*
170 vecseek(Vector_t** p, int index)
171 {
172 	Vector_t*	v = *p;
173 
174 	if (index >= v->max)
175 	{
176 		while ((v->max += v->inc) <= index);
177 		if (!(v = (Vector_t*)stkseek(v->stk, sizeof(Vector_t) + v->max * v->siz)))
178 			return 0;
179 		*p = v;
180 		v->vec = (char*)v + sizeof(Vector_t);
181 	}
182 	return v->vec + index * v->siz;
183 }
184 
185 static void
186 vecclose(Vector_t* v)
187 {
188 	if (v)
189 		stkclose(v->stk);
190 }
191 
192 typedef struct
193 {
194 	Stk_pos_t	pos;
195 	char		data[1];
196 } Stk_frame_t;
197 
198 #define stknew(s,p)	((p)->offset=stktell(s),(p)->base=stkfreeze(s,0))
199 #define stkold(s,p)	stkset(s,(p)->base,(p)->offset)
200 
201 #define stkframe(s)	(*((Stk_frame_t**)stktop(s)-1))
202 #define stkdata(s,t)	((t*)stkframe(s)->data)
203 #define stkpop(s)	stkold(s,&(stkframe(s)->pos))
204 
205 static void*
206 stkpush(Stk_t* sp, size_t size)
207 {
208 	Stk_frame_t*	f;
209 	Stk_pos_t	p;
210 
211 	stknew(sp, &p);
212 	size = sizeof(Stk_frame_t) + sizeof(size_t) + size - 1;
213 	if (!(f = (Stk_frame_t*)stkalloc(sp, sizeof(Stk_frame_t) + sizeof(Stk_frame_t*) + size - 1)))
214 		return 0;
215 	f->pos = p;
216 	stkframe(sp) = f;
217 	return f->data;
218 }
219 
220 /* ===== end library support ===== */
221 
222 /*
223  * Match_frame_t is for saving and restoring match records
224  * around alternate attempts, so that fossils will not be
225  * left in the match array.  These are the only entries in
226  * the match array that are not otherwise guaranteed to
227  * have current data in them when they get used.
228  */
229 
230 typedef struct
231 {
232 	size_t			size;
233 	regmatch_t*		match;
234 	regmatch_t		save[1];
235 } Match_frame_t;
236 
237 #define matchframe	stkdata(stkstd,Match_frame_t)
238 #define matchpush(e,x)	((x)->re.group.number?_matchpush(e,x):0)
239 #define matchcopy(e,x)	((x)->re.group.number?memcpy(matchframe->match,matchframe->save,matchframe->size):(void*)0)
240 #define matchpop(e,x)	((x)->re.group.number?memcpy(matchframe->match,matchframe->save,matchframe->size),stkpop(stkstd):(void*)0)
241 
242 #define pospop(e)	(--(e)->pos->cur)
243 
244 /*
245  * allocate a frame and push a match onto the stack
246  */
247 
248 static int
249 _matchpush(Env_t* env, Rex_t* rex)
250 {
251 	Match_frame_t*	f;
252 	regmatch_t*	m;
253 	regmatch_t*	e;
254 	regmatch_t*	s;
255 	int		num;
256 
257 	if (rex->re.group.number <= 0 || (num = rex->re.group.last - rex->re.group.number + 1) <= 0)
258 		num = 0;
259 	if (!(f = (Match_frame_t*)stkpush(stkstd, sizeof(Match_frame_t) + (num - 1) * sizeof(regmatch_t))))
260 	{
261 		env->error = REG_ESPACE;
262 		return 1;
263 	}
264 	f->size = num * sizeof(regmatch_t);
265 	f->match = m = env->match + rex->re.group.number;
266 	e = m + num;
267 	s = f->save;
268 	while (m < e)
269 	{
270 		*s++ = *m;
271 		*m++ = state.nomatch;
272 	}
273 	return 0;
274 }
275 
276 /*
277  * allocate a frame and push a pos onto the stack
278  */
279 
280 static int
281 pospush(Env_t* env, Rex_t* rex, unsigned char* p, int be)
282 {
283 	Pos_t*	pos;
284 
285 	if (!(pos = vector(Pos_t, env->pos, env->pos->cur)))
286 	{
287 		env->error = REG_ESPACE;
288 		return 1;
289 	}
290 	pos->serial = rex->serial;
291 	pos->p = p;
292 	pos->be = be;
293 	env->pos->cur++;
294 	return 0;
295 }
296 
297 /*
298  * two matches are known to have the same length
299  * os is start of old pos array, ns is start of new,
300  * oend and nend are end+1 pointers to ends of arrays.
301  * oe and ne are ends (not end+1) of subarrays.
302  * returns 1 if new is better, -1 if old, else 0.
303  */
304 
305 static int
306 better(Env_t* env, Pos_t* os, Pos_t* ns, Pos_t* oend, Pos_t* nend, int level)
307 {
308 	Pos_t*	oe;
309 	Pos_t*	ne;
310 	int	k;
311 	int	n;
312 
313 	if (env->error)
314 		return -1;
315 	for (;;)
316 	{
317 		DEBUG_CODE(0x0080,{sfprintf(sfstdout, "   %-*.*sold ", (level + 3) * 4, (level + 3) * 4, "");for (oe = os; oe < oend; oe++)sfprintf(sfstdout, "<%d,%d,%d>", oe->p - env->beg, oe->serial, oe->be);sfprintf(sfstdout, "\n   %-*.*snew ", (level + 3) * 4, (level + 3) * 4, "");for (oe = ns; oe < nend; oe++)sfprintf(sfstdout, "<%d,%d,%d>", oe->p - env->beg, oe->serial, oe->be);sfprintf(sfstdout, "\n");},{0;});
318 		if (ns >= nend)
319 			return DEBUG_TEST(0x8000,(os < oend),(0));
320 		if (os >= oend)
321 			return DEBUG_TEST(0x8000,(-1),(1));
322 		n = os->serial;
323 		if (ns->serial > n)
324 			return -1;
325 		if (n > ns->serial)
326 		{
327 			env->error = REG_PANIC;
328 			return -1;
329 		}
330 		if (ns->p > os->p)
331 			return 1;
332 		if (os->p > ns->p)
333 			return -1;
334 		oe = os;
335 		k = 0;
336 		for (;;)
337 			if ((++oe)->serial == n)
338 			{
339 				if (oe->be != END_ANY)
340 					k++;
341 				else if (k-- <= 0)
342 					break;
343 			}
344 		ne = ns;
345 		k = 0;
346 		for (;;)
347 			if ((++ne)->serial == n)
348 			{
349 				if (ne->be != END_ANY)
350 					k++;
351 				else if (k-- <= 0)
352 					break;
353 			}
354 		if (ne->p > oe->p)
355 			return 1;
356 		if (oe->p > ne->p)
357 			return -1;
358 		if (k = better(env, os + 1, ns + 1, oe, ne, level + 1))
359 			return k;
360 		os = oe + 1;
361 		ns = ne + 1;
362 	}
363 }
364 
365 #if _AST_REGEX_DEBUG
366 
367 static void
368 showmatch(regmatch_t* p)
369 {
370 	sfputc(sfstdout, '(');
371 	if (p->rm_so < 0)
372 		sfputc(sfstdout, '?');
373 	else
374 		sfprintf(sfstdout, "%d", p->rm_so);
375 	sfputc(sfstdout, ',');
376 	if (p->rm_eo < 0)
377 		sfputc(sfstdout, '?');
378 	else
379 		sfprintf(sfstdout, "%d", p->rm_eo);
380 	sfputc(sfstdout, ')');
381 }
382 
383 static int
384 _better(Env_t* env, Pos_t* os, Pos_t* ns, Pos_t* oend, Pos_t* nend, int level)
385 {
386 	int	i;
387 
388 	DEBUG_CODE(0x0040,{sfprintf(sfstdout, "AHA better old ");for (i = 0; i <= env->nsub; i++)showmatch(&env->best[i]);sfprintf(sfstdout, "\n           new ");for (i = 0; i <= env->nsub; i++)showmatch(&env->match[i]);sfprintf(sfstdout, "\n");},{0;});
389 	i = better(env, os, ns, oend, nend, 0);
390 	DEBUG_TEST(0x0040,(sfprintf(sfstdout, "           %s\n", i <= 0 ? "OLD" : "NEW")),(0));
391 	return i;
392 }
393 
394 #define better	_better
395 
396 #endif
397 
398 #define follow(e,r,c,s)	((r)->next?parse(e,(r)->next,c,s):(c)?parse(e,c,0,s):BEST)
399 
400 static int		parse(Env_t*, Rex_t*, Rex_t*, unsigned char*);
401 
402 static int
403 parserep(Env_t* env, Rex_t* rex, Rex_t* cont, unsigned char* s, int n)
404 {
405 	int	i;
406 	int	r = NONE;
407 	Rex_t	catcher;
408 
409 	DEBUG_TEST(0x0010,(sfprintf(sfstdout, "AHA#%04d 0x%04x parserep %s %d %d %d `%-.*s'\n", __LINE__, debug_flag, rexname(rex->re.group.expr.rex), rex->lo, n, rex->hi, env->end - s, s)),(0));
410 	if ((rex->flags & REG_MINIMAL) && n >= rex->lo && n < rex->hi)
411 	{
412 		if (env->stack && pospush(env, rex, s, END_ANY))
413 			return BAD;
414 		i = follow(env, rex, cont, s);
415 		if (env->stack)
416 			pospop(env);
417 		switch (i)
418 		{
419 		case BAD:
420 			return BAD;
421 		case CUT:
422 			return CUT;
423 		case BEST:
424 		case GOOD:
425 			return BEST;
426 		}
427 	}
428 	if (n < rex->hi)
429 	{
430 		catcher.type = REX_REP_CATCH;
431 		catcher.serial = rex->serial;
432 		catcher.re.rep_catch.ref = rex;
433 		catcher.re.rep_catch.cont = cont;
434 		catcher.re.rep_catch.beg = s;
435 		catcher.re.rep_catch.n = n + 1;
436 		catcher.next = rex->next;
437 		if (n == 0)
438 			rex->re.rep_catch.beg = s;
439 		if (env->stack)
440 		{
441 DEBUG_TEST(0x0004,(sfprintf(sfstdout,"AHA#%04d 0x%04x PUSH (%d,%d)(%d,%d) (%d,%d)(%d,%d)\n", __LINE__, debug_flag, rex->re.group.number, env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
442 			if (matchpush(env, rex))
443 				return BAD;
444 			if (pospush(env, rex, s, BEG_ONE))
445 				return BAD;
446 DEBUG_TEST(0x0004,(sfprintf(sfstdout,"AHA#%04d 0x%04x PUSH (%d,%d)(%d,%d) (%d,%d)(%d,%d)\n", __LINE__, debug_flag, rex->re.group.number, env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
447 		}
448 		r = parse(env, rex->re.group.expr.rex, &catcher, s);
449 		DEBUG_TEST(0x0010,(sfprintf(sfstdout, "AHA#%04d 0x%04x parserep parse %d `%-.*s'\n", __LINE__, debug_flag, r, env->end - s, s)),(0));
450 		if (env->stack)
451 		{
452 DEBUG_TEST(0x0004,(sfprintf(sfstdout,"AHA#%04d 0x%04x POP (%d,%d)(%d,%d) (%d,%d)(%d,%d)\n", __LINE__, debug_flag, rex->re.group.number, env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
453 			pospop(env);
454 			matchpop(env, rex);
455 DEBUG_TEST(0x0004,(sfprintf(sfstdout,"AHA#%04d 0x%04x POP (%d,%d)(%d,%d) (%d,%d)(%d,%d)\n", __LINE__, debug_flag, rex->re.group.number, env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
456 		}
457 		switch (r)
458 		{
459 		case BAD:
460 			return BAD;
461 		case BEST:
462 			return BEST;
463 		case CUT:
464 			r = NONE;
465 			break;
466 		case GOOD:
467 			if (rex->flags & REG_MINIMAL)
468 				return BEST;
469 			r = GOOD;
470 			break;
471 		}
472 	}
473 	if (n < rex->lo)
474 		return r;
475 	if (!(rex->flags & REG_MINIMAL) || n >= rex->hi)
476 	{
477 		if (env->stack && pospush(env, rex, s, END_ANY))
478 			return BAD;
479 		i = follow(env, rex, cont, s);
480 		if (env->stack)
481 			pospop(env);
482 		switch (i)
483 		{
484 		case BAD:
485 			r = BAD;
486 			break;
487 		case CUT:
488 			r = CUT;
489 			break;
490 		case BEST:
491 			r = BEST;
492 			break;
493 		case GOOD:
494 			r = (rex->flags & REG_MINIMAL) ? BEST : GOOD;
495 			break;
496 		}
497 	}
498 	return r;
499 }
500 
501 static int
502 parsetrie(Env_t* env, Trie_node_t* x, Rex_t* rex, Rex_t* cont, unsigned char* s)
503 {
504 	unsigned char*	p;
505 	int		r;
506 
507 	if (p = rex->map)
508 	{
509 		for (;;)
510 		{
511 			if (s >= env->end)
512 				return NONE;
513 			while (x->c != p[*s])
514 				if (!(x = x->sib))
515 					return NONE;
516 			if (x->end)
517 				break;
518 			x = x->son;
519 			s++;
520 		}
521 	}
522 	else
523 	{
524 		for (;;)
525 		{
526 			if (s >= env->end)
527 				return NONE;
528 			while (x->c != *s)
529 				if (!(x = x->sib))
530 					return NONE;
531 			if (x->end)
532 				break;
533 			x = x->son;
534 			s++;
535 		}
536 	}
537 	s++;
538 	if (rex->flags & REG_MINIMAL)
539 		switch (follow(env, rex, cont, s))
540 		{
541 		case BAD:
542 			return BAD;
543 		case CUT:
544 			return CUT;
545 		case BEST:
546 		case GOOD:
547 			return BEST;
548 		}
549 	if (x->son)
550 		switch (parsetrie(env, x->son, rex, cont, s))
551 		{
552 		case BAD:
553 			return BAD;
554 		case CUT:
555 			return CUT;
556 		case BEST:
557 			return BEST;
558 		case GOOD:
559 			if (rex->flags & REG_MINIMAL)
560 				return BEST;
561 			r = GOOD;
562 			break;
563 		default:
564 			r = NONE;
565 			break;
566 		}
567 	else
568 		r = NONE;
569 	if (!(rex->flags & REG_MINIMAL))
570 		switch (follow(env, rex, cont, s))
571 		{
572 		case BAD:
573 			return BAD;
574 		case CUT:
575 			return CUT;
576 		case BEST:
577 			return BEST;
578 		case GOOD:
579 			return GOOD;
580 	}
581 	return r;
582 }
583 
584 static int
585 collelt(register Celt_t* ce, char* key, int c, int x)
586 {
587 	Ckey_t	elt;
588 
589 	mbxfrm(elt, key, COLL_KEY_MAX);
590 	for (;; ce++)
591 	{
592 		switch (ce->typ)
593 		{
594 		case COLL_call:
595 			if (!x && (*ce->fun)(c))
596 				return 1;
597 			continue;
598 		case COLL_char:
599 			if (!strcmp((char*)ce->beg, (char*)elt))
600 				return 1;
601 			continue;
602 		case COLL_range:
603 			if (strcmp((char*)ce->beg, (char*)elt) <= ce->min && strcmp((char*)elt, (char*)ce->end) <= ce->max)
604 				return 1;
605 			continue;
606 		case COLL_range_lc:
607 			if (strcmp((char*)ce->beg, (char*)elt) <= ce->min && strcmp((char*)elt, (char*)ce->end) <= ce->max && (iswlower(c) || !iswupper(c)))
608 				return 1;
609 			continue;
610 		case COLL_range_uc:
611 			if (strcmp((char*)ce->beg, (char*)elt) <= ce->min && strcmp((char*)elt, (char*)ce->end) <= ce->max && (iswupper(c) || !iswlower(c)))
612 				return 1;
613 			continue;
614 		}
615 		break;
616 	}
617 	return 0;
618 }
619 
620 static int
621 collic(register Celt_t* ce, char* key, register char* nxt, int c, int x)
622 {
623 	if (!x)
624 	{
625 		if (collelt(ce, key, c, x))
626 			return 1;
627 		if (iswlower(c))
628 			c = towupper(c);
629 		else if (iswupper(c))
630 			c = towlower(c);
631 		else
632 			return 0;
633 		x = wctomb(key, c);
634 		key[x] = 0;
635 		return collelt(ce, key, c, 0);
636 	}
637 	while (*nxt)
638 	{
639 		if (collic(ce, key, nxt + 1, c, x))
640 			return 1;
641 		if (islower(*nxt))
642 			*nxt = toupper(*nxt);
643 		else if (isupper(*nxt))
644 			*nxt = tolower(*nxt);
645 		else
646 			return 0;
647 		nxt++;
648 	}
649 	return collelt(ce, key, c, x);
650 }
651 
652 static int
653 collmatch(Rex_t* rex, unsigned char* s, unsigned char* e, unsigned char** p)
654 {
655 	unsigned char*		t;
656 	wchar_t			c;
657 	int			w;
658 	int			r;
659 	int			x;
660 	int			ic;
661 	Ckey_t			key;
662 	Ckey_t			elt;
663 
664 	ic = (rex->flags & REG_ICASE);
665 	if ((w = MBSIZE(s)) > 1)
666 	{
667 		memcpy((char*)key, (char*)s, w);
668 		key[w] = 0;
669 		t = s;
670 		c = mbchar(t);
671 #if !_lib_wctype
672 		c &= 0xff;
673 #endif
674 		x = 0;
675 	}
676 	else
677 	{
678 		c = s[0];
679 		if (ic && isupper(c))
680 			c = tolower(c);
681 		key[0] = c;
682 		key[1] = 0;
683 		if (isalpha(c))
684 		{
685 			x = e - s;
686 			if (x > COLL_KEY_MAX)
687 				x = COLL_KEY_MAX;
688 			while (w < x)
689 			{
690 				c = s[w];
691 				if (!isalpha(c))
692 					break;
693 				r = mbxfrm(elt, key, COLL_KEY_MAX);
694 				if (ic && isupper(c))
695 					c = tolower(c);
696 				key[w] = c;
697 				key[w + 1] = 0;
698 				if (mbxfrm(elt, key, COLL_KEY_MAX) != r)
699 					break;
700 				w++;
701 			}
702 		}
703 		key[w] = 0;
704 		c = key[0];
705 		x = w - 1;
706 	}
707 	r = 1;
708 	for (;;)
709 	{
710 		if (ic ? collic(rex->re.collate.elements, (char*)key, (char*)key, c, x) : collelt(rex->re.collate.elements, (char*)key, c, x))
711 			break;
712 		if (!x)
713 		{
714 			r = 0;
715 			break;
716 		}
717 		w = x--;
718 		key[w] = 0;
719 	}
720 	*p = s + w;
721 	return rex->re.collate.invert ? !r : r;
722 }
723 
724 static unsigned char*
725 nestmatch(register unsigned char* s, register unsigned char* e, const unsigned short* type, register int co)
726 {
727 	register int	c;
728 	register int	cc;
729 	unsigned int	n;
730 	int		oc;
731 
732 	if (type[co] & (REX_NEST_literal|REX_NEST_quote))
733 	{
734 		n = (type[co] & REX_NEST_literal) ? REX_NEST_terminator : (REX_NEST_escape|REX_NEST_terminator);
735 		while (s < e)
736 		{
737 			c = *s++;
738 			if (c == co)
739 				return s;
740 			else if (type[c] & n)
741 			{
742 				if (s >= e || (type[c] & REX_NEST_terminator))
743 					break;
744 				s++;
745 			}
746 		}
747 	}
748 	else
749 	{
750 		cc = type[co] >> REX_NEST_SHIFT;
751 		oc = type[co] & (REX_NEST_open|REX_NEST_close);
752 		n = 1;
753 		while (s < e)
754 		{
755 			c = *s++;
756 			switch (type[c] & (REX_NEST_escape|REX_NEST_open|REX_NEST_close|REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator))
757 			{
758 			case REX_NEST_delimiter:
759 			case REX_NEST_terminator:
760 				return oc ? 0 : s;
761 			case REX_NEST_separator:
762 				if (!oc)
763 					return s;
764 				break;
765 			case REX_NEST_escape:
766 				if (s >= e)
767 					return 0;
768 				s++;
769 				break;
770 			case REX_NEST_open|REX_NEST_close:
771 				if (c == cc)
772 				{
773 					if (!--n)
774 						return s;
775 				}
776 				/*FALLTHROUGH*/
777 			case REX_NEST_open:
778 				if (c == co)
779 				{
780 					if (!++n)
781 						return 0;
782 				}
783 				else if (!(s = nestmatch(s, e, type, c)))
784 					return 0;
785 				break;
786 			case REX_NEST_close:
787 				if (c != cc)
788 					return 0;
789 				if (!--n)
790 					return s;
791 				break;
792 			}
793 		}
794 		return (oc || !(type[UCHAR_MAX+1] & REX_NEST_terminator)) ? 0 : s;
795 	}
796 	return 0;
797 }
798 
799 static int
800 parse(Env_t* env, Rex_t* rex, Rex_t* cont, unsigned char* s)
801 {
802 	int		c;
803 	int		d;
804 	int		i;
805 	int		m;
806 	int		n;
807 	int		r;
808 	int*		f;
809 	unsigned char*	p;
810 	unsigned char*	t;
811 	unsigned char*	b;
812 	unsigned char*	e;
813 	char*		u;
814 	regmatch_t*	o;
815 	Trie_node_t*	x;
816 	Rex_t*		q;
817 	Rex_t		catcher;
818 	Rex_t		next;
819 
820 	for (;;)
821 	{
822 DEBUG_TEST(0x0008,(sfprintf(sfstdout, "AHA#%04d 0x%04x parse %s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), env->end - s, s)),(0));
823 		switch (rex->type)
824 		{
825 		case REX_ALT:
826 			if (env->stack)
827 			{
828 				if (matchpush(env, rex))
829 					return BAD;
830 				if (pospush(env, rex, s, BEG_ALT))
831 					return BAD;
832 				catcher.type = REX_ALT_CATCH;
833 				catcher.serial = rex->serial;
834 				catcher.re.alt_catch.cont = cont;
835 				catcher.next = rex->next;
836 				r = parse(env, rex->re.group.expr.binary.left, &catcher, s);
837 				if (r < BEST || (rex->flags & REG_MINIMAL))
838 				{
839 					matchcopy(env, rex);
840 					((Pos_t*)env->pos->vec + env->pos->cur - 1)->serial = catcher.serial = rex->re.group.expr.binary.serial;
841 					n = parse(env, rex->re.group.expr.binary.right, &catcher, s);
842 					if (n != NONE)
843 						r = n;
844 				}
845 				pospop(env);
846 				matchpop(env, rex);
847 			}
848 			else
849 			{
850 				if ((r = parse(env, rex->re.group.expr.binary.left, cont, s)) == NONE)
851 					r = parse(env, rex->re.group.expr.binary.right, cont, s);
852 				if (r == GOOD)
853 					r = BEST;
854 			}
855 			return r;
856 		case REX_ALT_CATCH:
857 			if (pospush(env, rex, s, END_ANY))
858 				return BAD;
859 			r = follow(env, rex, rex->re.alt_catch.cont, s);
860 			pospop(env);
861 			return r;
862 		case REX_BACK:
863 			o = &env->match[rex->lo];
864 			if (o->rm_so < 0)
865 				return NONE;
866 			i = o->rm_eo - o->rm_so;
867 			e = s + i;
868 			if (e > env->end)
869 				return NONE;
870 			t = env->beg + o->rm_so;
871 			if (!(p = rex->map))
872 			{
873 				while (s < e)
874 					if (*s++ != *t++)
875 						return NONE;
876 			}
877 			else if (!mbwide())
878 			{
879 				while (s < e)
880 					if (p[*s++] != p[*t++])
881 						return NONE;
882 			}
883 			else
884 			{
885 				while (s < e)
886 				{
887 					c = mbchar(s);
888 					d = mbchar(t);
889 					if (towupper(c) != towupper(d))
890 						return NONE;
891 				}
892 			}
893 			break;
894 		case REX_BEG:
895 			if ((!(rex->flags & REG_NEWLINE) || s <= env->beg || *(s - 1) != '\n') && ((env->flags & REG_NOTBOL) || s != env->beg))
896 				return NONE;
897 			break;
898 		case REX_CLASS:
899 			if (LEADING(env, rex, s))
900 				return NONE;
901 			n = rex->hi;
902 			if (n > env->end - s)
903 				n = env->end - s;
904 			m = rex->lo;
905 			if (m > n)
906 				return NONE;
907 			r = NONE;
908 			if (!(rex->flags & REG_MINIMAL))
909 			{
910 				for (i = 0; i < n; i++)
911 					if (!settst(rex->re.charclass, s[i]))
912 					{
913 						n = i;
914 						break;
915 					}
916 				for (s += n; n-- >= m; s--)
917 					switch (follow(env, rex, cont, s))
918 					{
919 					case BAD:
920 						return BAD;
921 					case CUT:
922 						return CUT;
923 					case BEST:
924 						return BEST;
925 					case GOOD:
926 						r = GOOD;
927 						break;
928 					}
929 			}
930 			else
931 			{
932 				for (e = s + m; s < e; s++)
933 					if (!settst(rex->re.charclass, *s))
934 						return r;
935 				e += n - m;
936 				for (;;)
937 				{
938 					switch (follow(env, rex, cont, s))
939 					{
940 					case BAD:
941 						return BAD;
942 					case CUT:
943 						return CUT;
944 					case BEST:
945 					case GOOD:
946 						return BEST;
947 					}
948 					if (s >= e || !settst(rex->re.charclass, *s))
949 						break;
950 					s++;
951 				}
952 			}
953 			return r;
954 		case REX_COLL_CLASS:
955 			if (LEADING(env, rex, s))
956 				return NONE;
957 			n = rex->hi;
958 			if (n > env->end - s)
959 				n = env->end - s;
960 			m = rex->lo;
961 			if (m > n)
962 				return NONE;
963 			r = NONE;
964 			e = env->end;
965 			if (!(rex->flags & REG_MINIMAL))
966 			{
967 				if (!(b = (unsigned char*)stkpush(stkstd, n)))
968 				{
969 					env->error = REG_ESPACE;
970 					return BAD;
971 				}
972 				for (i = 0; s < e && i < n && collmatch(rex, s, e, &t); i++)
973 				{
974 					b[i] = t - s;
975 					s = t;
976 				}
977 				for (; i-- >= rex->lo; s -= b[i])
978 					switch (follow(env, rex, cont, s))
979 					{
980 					case BAD:
981 						stkpop(stkstd);
982 						return BAD;
983 					case CUT:
984 						stkpop(stkstd);
985 						return CUT;
986 					case BEST:
987 						stkpop(stkstd);
988 						return BEST;
989 					case GOOD:
990 						r = GOOD;
991 						break;
992 					}
993 				stkpop(stkstd);
994 			}
995 			else
996 			{
997 				for (i = 0; i < m && s < e; i++, s = t)
998 					if (!collmatch(rex, s, e, &t))
999 						return r;
1000 				while (i++ <= n)
1001 				{
1002 					switch (follow(env, rex, cont, s))
1003 					{
1004 					case BAD:
1005 						return BAD;
1006 					case CUT:
1007 						return CUT;
1008 					case BEST:
1009 					case GOOD:
1010 						return BEST;
1011 					}
1012 					if (s >= e || !collmatch(rex, s, e, &s))
1013 						break;
1014 				}
1015 			}
1016 			return r;
1017 		case REX_CONJ:
1018 			next.type = REX_CONJ_RIGHT;
1019 			next.re.conj_right.cont = cont;
1020 			next.next = rex->next;
1021 			catcher.type = REX_CONJ_LEFT;
1022 			catcher.re.conj_left.right = rex->re.group.expr.binary.right;
1023 			catcher.re.conj_left.cont = &next;
1024 			catcher.re.conj_left.beg = s;
1025 			catcher.next = 0;
1026 			return parse(env, rex->re.group.expr.binary.left, &catcher, s);
1027 		case REX_CONJ_LEFT:
1028 			rex->re.conj_left.cont->re.conj_right.end = s;
1029 			cont = rex->re.conj_left.cont;
1030 			s = rex->re.conj_left.beg;
1031 			rex = rex->re.conj_left.right;
1032 			continue;
1033 		case REX_CONJ_RIGHT:
1034 			if (rex->re.conj_right.end != s)
1035 				return NONE;
1036 			cont = rex->re.conj_right.cont;
1037 			break;
1038 		case REX_DONE:
1039 			if (!env->stack)
1040 				return BEST;
1041 			n = s - env->beg;
1042 			r = env->nsub;
1043 			DEBUG_TEST(0x0100,(sfprintf(sfstdout,"AHA#%04d 0x%04x %s (%d,%d)(%d,%d)(%d,%d)(%d,%d) (%d,%d)(%d,%d)\n", __LINE__, debug_flag, rexname(rex), env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->best[3].rm_so, env->best[3].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
1044 			if ((i = env->best[0].rm_eo) >= 0)
1045 			{
1046 				if (rex->flags & REG_MINIMAL)
1047 				{
1048 					if (n > i)
1049 						return GOOD;
1050 				}
1051 				else
1052 				{
1053 					if (n < i)
1054 						return GOOD;
1055 				}
1056 				if (n == i && better(env,
1057 						     (Pos_t*)env->bestpos->vec,
1058 				   		     (Pos_t*)env->pos->vec,
1059 				   		     (Pos_t*)env->bestpos->vec+env->bestpos->cur,
1060 				   		     (Pos_t*)env->pos->vec+env->pos->cur,
1061 						     0) <= 0)
1062 					return GOOD;
1063 			}
1064 			env->best[0].rm_eo = n;
1065 			memcpy(&env->best[1], &env->match[1], r * sizeof(regmatch_t));
1066 			n = env->pos->cur;
1067 			if (!vector(Pos_t, env->bestpos, n))
1068 			{
1069 				env->error = REG_ESPACE;
1070 				return BAD;
1071 			}
1072 			env->bestpos->cur = n;
1073 			memcpy(env->bestpos->vec, env->pos->vec, n * sizeof(Pos_t));
1074 			DEBUG_TEST(0x0100,(sfprintf(sfstdout,"AHA#%04d 0x%04x %s (%d,%d)(%d,%d)(%d,%d)(%d,%d) (%d,%d)(%d,%d)\n", __LINE__, debug_flag, rexname(rex), env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->best[3].rm_so, env->best[3].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
1075 			return GOOD;
1076 		case REX_DOT:
1077 			if (LEADING(env, rex, s))
1078 				return NONE;
1079 			n = rex->hi;
1080 			if (n > env->end - s)
1081 				n = env->end - s;
1082 			m = rex->lo;
1083 			if (m > n)
1084 				return NONE;
1085 			if ((c = rex->explicit) >= 0 && !mbwide())
1086 				for (i = 0; i < n; i++)
1087 					if (s[i] == c)
1088 					{
1089 						n = i;
1090 						break;
1091 					}
1092 			r = NONE;
1093 			if (!(rex->flags & REG_MINIMAL))
1094 			{
1095 				if (!mbwide())
1096 				{
1097 					for (s += n; n-- >= m; s--)
1098 						switch (follow(env, rex, cont, s))
1099 						{
1100 						case BAD:
1101 							return BAD;
1102 						case CUT:
1103 							return CUT;
1104 						case BEST:
1105 							return BEST;
1106 						case GOOD:
1107 							r = GOOD;
1108 							break;
1109 						}
1110 				}
1111 				else
1112 				{
1113 					if (!(b = (unsigned char*)stkpush(stkstd, n)))
1114 					{
1115 						env->error = REG_ESPACE;
1116 						return BAD;
1117 					}
1118 					e = env->end;
1119 					for (i = 0; s < e && i < n && *s != c; i++)
1120 						s += b[i] = MBSIZE(s);
1121 					for (; i-- >= m; s -= b[i])
1122 						switch (follow(env, rex, cont, s))
1123 						{
1124 						case BAD:
1125 							stkpop(stkstd);
1126 							return BAD;
1127 						case CUT:
1128 							stkpop(stkstd);
1129 							return CUT;
1130 						case BEST:
1131 							stkpop(stkstd);
1132 							return BEST;
1133 						case GOOD:
1134 							r = GOOD;
1135 							break;
1136 						}
1137 					stkpop(stkstd);
1138 				}
1139 			}
1140 			else
1141 			{
1142 				if (!mbwide())
1143 				{
1144 					e = s + n;
1145 					for (s += m; s <= e; s++)
1146 						switch (follow(env, rex, cont, s))
1147 						{
1148 						case BAD:
1149 							return BAD;
1150 						case CUT:
1151 							return CUT;
1152 						case BEST:
1153 						case GOOD:
1154 							return BEST;
1155 						}
1156 				}
1157 				else
1158 				{
1159 					e = env->end;
1160 					for (i = 0; s < e && i < m && *s != c; i++)
1161 						s += MBSIZE(s);
1162 					if (i >= m)
1163 						for (; s <= e && i <= n; s += MBSIZE(s), i++)
1164 							switch (follow(env, rex, cont, s))
1165 							{
1166 							case BAD:
1167 								return BAD;
1168 							case CUT:
1169 								return CUT;
1170 							case BEST:
1171 							case GOOD:
1172 								return BEST;
1173 							}
1174 				}
1175 			}
1176 			return r;
1177 		case REX_END:
1178 			if ((!(rex->flags & REG_NEWLINE) || *s != '\n') && ((env->flags & REG_NOTEOL) || s < env->end))
1179 				return NONE;
1180 			break;
1181 		case REX_GROUP:
1182 DEBUG_TEST(0x0200,(sfprintf(sfstdout,"AHA#%04d 0x%04x parse %s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), env->end - s, s)),(0));
1183 			if (env->stack)
1184 			{
1185 				if (rex->re.group.number)
1186 					env->match[rex->re.group.number].rm_so = s - env->beg;
1187 				if (pospush(env, rex, s, BEG_SUB))
1188 					return BAD;
1189 				catcher.re.group_catch.eo = rex->re.group.number ? &env->match[rex->re.group.number].rm_eo : (regoff_t*)0;
1190 			}
1191 			catcher.type = REX_GROUP_CATCH;
1192 			catcher.serial = rex->serial;
1193 			catcher.re.group_catch.cont = cont;
1194 			catcher.next = rex->next;
1195 			r = parse(env, rex->re.group.expr.rex, &catcher, s);
1196 			if (env->stack)
1197 			{
1198 				pospop(env);
1199 				if (rex->re.group.number)
1200 					env->match[rex->re.group.number].rm_so = -1;
1201 			}
1202 			return r;
1203 		case REX_GROUP_CATCH:
1204 DEBUG_TEST(0x0200,(sfprintf(sfstdout,"AHA#%04d 0x%04x parse %s=>%s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), rexname(rex->re.group_catch.cont), env->end - s, s)),(0));
1205 			if (env->stack)
1206 			{
1207 				if (rex->re.group_catch.eo)
1208 					*rex->re.group_catch.eo = s - env->beg;
1209 				if (pospush(env, rex, s, END_ANY))
1210 					return BAD;
1211 			}
1212 			r = follow(env, rex, rex->re.group_catch.cont, s);
1213 			if (env->stack)
1214 			{
1215 				pospop(env);
1216 				if (rex->re.group_catch.eo)
1217 					*rex->re.group_catch.eo = -1;
1218 			}
1219 			return r;
1220 		case REX_GROUP_AHEAD:
1221 			catcher.type = REX_GROUP_AHEAD_CATCH;
1222 			catcher.flags = rex->flags;
1223 			catcher.serial = rex->serial;
1224 			catcher.re.rep_catch.beg = s;
1225 			catcher.re.rep_catch.cont = cont;
1226 			catcher.next = rex->next;
1227 			return parse(env, rex->re.group.expr.rex, &catcher, s);
1228 		case REX_GROUP_AHEAD_CATCH:
1229 			return follow(env, rex, rex->re.rep_catch.cont, rex->re.rep_catch.beg);
1230 		case REX_GROUP_AHEAD_NOT:
1231 			r = parse(env, rex->re.group.expr.rex, NiL, s);
1232 			if (r == NONE)
1233 				r = follow(env, rex, cont, s);
1234 			else if (r != BAD)
1235 				r = NONE;
1236 			return r;
1237 		case REX_GROUP_BEHIND:
1238 			if ((s - env->beg) < rex->re.group.size)
1239 				return NONE;
1240 			catcher.type = REX_GROUP_BEHIND_CATCH;
1241 			catcher.flags = rex->flags;
1242 			catcher.serial = rex->serial;
1243 			catcher.re.behind_catch.beg = s;
1244 			catcher.re.behind_catch.end = e = env->end;
1245 			catcher.re.behind_catch.cont = cont;
1246 			catcher.next = rex->next;
1247 			for (t = s - rex->re.group.size; t >= env->beg; t--)
1248 			{
1249 				env->end = s;
1250 				r = parse(env, rex->re.group.expr.rex, &catcher, t);
1251 				env->end = e;
1252 				if (r != NONE)
1253 					return r;
1254 			}
1255 			return NONE;
1256 		case REX_GROUP_BEHIND_CATCH:
1257 			if (s != rex->re.behind_catch.beg)
1258 				return NONE;
1259 			env->end = rex->re.behind_catch.end;
1260 			return follow(env, rex, rex->re.behind_catch.cont, rex->re.behind_catch.beg);
1261 		case REX_GROUP_BEHIND_NOT:
1262 			if ((s - env->beg) < rex->re.group.size)
1263 				r = NONE;
1264 			else
1265 			{
1266 				catcher.type = REX_GROUP_BEHIND_NOT_CATCH;
1267 				catcher.re.neg_catch.beg = s;
1268 				catcher.next = 0;
1269 				e = env->end;
1270 				env->end = s;
1271 				for (t = s - rex->re.group.size; t >= env->beg; t--)
1272 				{
1273 					r = parse(env, rex->re.group.expr.rex, &catcher, t);
1274 					if (r != NONE)
1275 						break;
1276 				}
1277 				env->end = e;
1278 			}
1279 			if (r == NONE)
1280 				r = follow(env, rex, cont, s);
1281 			else if (r != BAD)
1282 				r = NONE;
1283 			return r;
1284 		case REX_GROUP_BEHIND_NOT_CATCH:
1285 			return s == rex->re.neg_catch.beg ? GOOD : NONE;
1286 		case REX_GROUP_COND:
1287 			if (q = rex->re.group.expr.binary.right)
1288 			{
1289 				catcher.re.cond_catch.next[0] = q->re.group.expr.binary.right;
1290 				catcher.re.cond_catch.next[1] = q->re.group.expr.binary.left;
1291 			}
1292 			else
1293 				catcher.re.cond_catch.next[0] = catcher.re.cond_catch.next[1] = 0;
1294 			if (q = rex->re.group.expr.binary.left)
1295 			{
1296 				catcher.type = REX_GROUP_COND_CATCH;
1297 				catcher.flags = rex->flags;
1298 				catcher.serial = rex->serial;
1299 				catcher.re.cond_catch.yes = 0;
1300 				catcher.re.cond_catch.beg = s;
1301 				catcher.re.cond_catch.cont = cont;
1302 				catcher.next = rex->next;
1303 				r = parse(env, q, &catcher, s);
1304 				if (r == BAD || catcher.re.cond_catch.yes)
1305 					return r;
1306 			}
1307 			else if (!rex->re.group.size || rex->re.group.size > 0 && env->match[rex->re.group.size].rm_so >= 0)
1308 				r = GOOD;
1309 			else
1310 				r = NONE;
1311 			if (q = catcher.re.cond_catch.next[r != NONE])
1312 			{
1313 				catcher.type = REX_CAT;
1314 				catcher.flags = q->flags;
1315 				catcher.serial = q->serial;
1316 				catcher.re.group_catch.cont = cont;
1317 				catcher.next = rex->next;
1318 				return parse(env, q, &catcher, s);
1319 			}
1320 			return follow(env, rex, cont, s);
1321 		case REX_GROUP_COND_CATCH:
1322 			rex->re.cond_catch.yes = 1;
1323 			catcher.type = REX_CAT;
1324 			catcher.flags = rex->flags;
1325 			catcher.serial = rex->serial;
1326 			catcher.re.group_catch.cont = rex->re.cond_catch.cont;
1327 			catcher.next = rex->next;
1328 			return parse(env, rex->re.cond_catch.next[1], &catcher, rex->re.cond_catch.beg);
1329 		case REX_CAT:
1330 			return follow(env, rex, rex->re.group_catch.cont, s);
1331 		case REX_GROUP_CUT:
1332 			catcher.type = REX_GROUP_CUT_CATCH;
1333 			catcher.flags = rex->flags;
1334 			catcher.serial = rex->serial;
1335 			catcher.re.group_catch.cont = cont;
1336 			catcher.next = rex->next;
1337 			return parse(env, rex->re.group.expr.rex, &catcher, s);
1338 		case REX_GROUP_CUT_CATCH:
1339 			switch (r = follow(env, rex, rex->re.group_catch.cont, s))
1340 			{
1341 			case GOOD:
1342 				r = BEST;
1343 				break;
1344 			case NONE:
1345 				r = CUT;
1346 				break;
1347 			}
1348 			return r;
1349 		case REX_KMP:
1350 			f = rex->re.string.fail;
1351 			b = rex->re.string.base;
1352 			n = rex->re.string.size;
1353 			t = s;
1354 			e = env->end;
1355 			if (p = rex->map)
1356 			{
1357 				while (t + n <= e)
1358 				{
1359 					for (i = -1; t < e; t++)
1360 					{
1361 						while (i >= 0 && b[i+1] != p[*t])
1362 							i = f[i];
1363 						if (b[i+1] == p[*t])
1364 							i++;
1365 						if (i + 1 == n)
1366 						{
1367 							t++;
1368 							if (env->stack)
1369 								env->best[0].rm_so = t - s - n;
1370 							switch (follow(env, rex, cont, t))
1371 							{
1372 							case BAD:
1373 								return BAD;
1374 							case CUT:
1375 								return CUT;
1376 							case BEST:
1377 							case GOOD:
1378 								return BEST;
1379 							}
1380 							t -= n - 1;
1381 							break;
1382 						}
1383 					}
1384 				}
1385 			}
1386 			else
1387 			{
1388 				while (t + n <= e)
1389 				{
1390 					for (i = -1; t < e; t++)
1391 					{
1392 						while (i >= 0 && b[i+1] != *t)
1393 							i = f[i];
1394 						if (b[i+1] == *t)
1395 							i++;
1396 						if (i + 1 == n)
1397 						{
1398 							t++;
1399 							if (env->stack)
1400 								env->best[0].rm_so = t - s - n;
1401 							switch (follow(env, rex, cont, t))
1402 							{
1403 							case BAD:
1404 								return BAD;
1405 							case CUT:
1406 								return CUT;
1407 							case BEST:
1408 							case GOOD:
1409 								return BEST;
1410 							}
1411 							t -= n - 1;
1412 							break;
1413 						}
1414 					}
1415 				}
1416 			}
1417 			return NONE;
1418 		case REX_NEG:
1419 			if (LEADING(env, rex, s))
1420 				return NONE;
1421 			i = env->end - s;
1422 			n = ((i + 7) >> 3) + 1;
1423 			catcher.type = REX_NEG_CATCH;
1424 			catcher.re.neg_catch.beg = s;
1425 			if (!(p = (unsigned char*)stkpush(stkstd, n)))
1426 				return BAD;
1427 			memset(catcher.re.neg_catch.index = p, 0, n);
1428 			catcher.next = rex->next;
1429 			if (parse(env, rex->re.group.expr.rex, &catcher, s) == BAD)
1430 				r = BAD;
1431 			else
1432 			{
1433 				r = NONE;
1434 				for (; i >= 0; i--)
1435 					if (!bittst(p, i))
1436 					{
1437 						switch (follow(env, rex, cont, s + i))
1438 						{
1439 						case BAD:
1440 							r = BAD;
1441 							break;
1442 						case BEST:
1443 							r = BEST;
1444 							break;
1445 						case CUT:
1446 							r = CUT;
1447 							break;
1448 						case GOOD:
1449 							r = GOOD;
1450 							/*FALLTHROUGH*/
1451 						default:
1452 							continue;
1453 						}
1454 						break;
1455 					}
1456 			}
1457 			stkpop(stkstd);
1458 			return r;
1459 		case REX_NEG_CATCH:
1460 			bitset(rex->re.neg_catch.index, s - rex->re.neg_catch.beg);
1461 			return NONE;
1462 		case REX_NEST:
1463 			if (s >= env->end)
1464 				return NONE;
1465 			do
1466 			{
1467 				if ((c = *s++) == rex->re.nest.primary)
1468 				{
1469 					if (s >= env->end || !(s = nestmatch(s, env->end, rex->re.nest.type, c)))
1470 						return NONE;
1471 					break;
1472 				}
1473 				if (rex->re.nest.primary >= 0)
1474 					return NONE;
1475 			    	if (rex->re.nest.type[c] & (REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator))
1476 					break;
1477 			    	if (!(s = nestmatch(s, env->end, rex->re.nest.type, c)))
1478 					return NONE;
1479 			} while (s < env->end && !(rex->re.nest.type[*(s-1)] & (REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator)));
1480 			break;
1481 		case REX_NULL:
1482 			break;
1483 		case REX_ONECHAR:
1484 			n = rex->hi;
1485 			if (n > env->end - s)
1486 				n = env->end - s;
1487 			m = rex->lo;
1488 			if (m > n)
1489 				return NONE;
1490 			r = NONE;
1491 			c = rex->re.onechar;
1492 			if (!(rex->flags & REG_MINIMAL))
1493 			{
1494 				if (!mbwide())
1495 				{
1496 					if (p = rex->map)
1497 					{
1498 						for (i = 0; i < n; i++, s++)
1499 							if (p[*s] != c)
1500 								break;
1501 					}
1502 					else
1503 					{
1504 						for (i = 0; i < n; i++, s++)
1505 							if (*s != c)
1506 								break;
1507 					}
1508 					for (; i-- >= m; s--)
1509 						switch (follow(env, rex, cont, s))
1510 						{
1511 						case BAD:
1512 							return BAD;
1513 						case BEST:
1514 							return BEST;
1515 						case CUT:
1516 							return CUT;
1517 						case GOOD:
1518 							r = GOOD;
1519 							break;
1520 						}
1521 				}
1522 				else
1523 				{
1524 					if (!(b = (unsigned char*)stkpush(stkstd, n)))
1525 					{
1526 						env->error = REG_ESPACE;
1527 						return BAD;
1528 					}
1529 					e = env->end;
1530 					if (!(rex->flags & REG_ICASE))
1531 					{
1532 						for (i = 0; s < e && i < n; i++, s = t)
1533 						{
1534 							t = s;
1535 							if (mbchar(t) != c)
1536 								break;
1537 							b[i] = t - s;
1538 						}
1539 					}
1540 					else
1541 					{
1542 						for (i = 0; s < e && i < n; i++, s = t)
1543 						{
1544 							t = s;
1545 							if (towupper(mbchar(t)) != c)
1546 								break;
1547 							b[i] = t - s;
1548 						}
1549 					}
1550 					for (; i-- >= m; s -= b[i])
1551 						switch (follow(env, rex, cont, s))
1552 						{
1553 						case BAD:
1554 							stkpop(stkstd);
1555 							return BAD;
1556 						case BEST:
1557 							stkpop(stkstd);
1558 							return BEST;
1559 						case CUT:
1560 							stkpop(stkstd);
1561 							return CUT;
1562 						case GOOD:
1563 							r = GOOD;
1564 							break;
1565 						}
1566 					stkpop(stkstd);
1567 				}
1568 			}
1569 			else
1570 			{
1571 				if (!mbwide())
1572 				{
1573 					e = s + m;
1574 					if (p = rex->map)
1575 					{
1576 						for (; s < e; s++)
1577 							if (p[*s] != c)
1578 								return r;
1579 						e += n - m;
1580 						for (;;)
1581 						{
1582 							switch (follow(env, rex, cont, s))
1583 							{
1584 							case BAD:
1585 								return BAD;
1586 							case CUT:
1587 								return CUT;
1588 							case BEST:
1589 							case GOOD:
1590 								return BEST;
1591 							}
1592 							if (s >= e || p[*s++] != c)
1593 								break;
1594 						}
1595 					}
1596 					else
1597 					{
1598 						for (; s < e; s++)
1599 							if (*s != c)
1600 								return r;
1601 						e += n - m;
1602 						for (;;)
1603 						{
1604 							switch (follow(env, rex, cont, s))
1605 							{
1606 							case BAD:
1607 								return BAD;
1608 							case CUT:
1609 								return CUT;
1610 							case BEST:
1611 							case GOOD:
1612 								return BEST;
1613 							}
1614 							if (s >= e || *s++ != c)
1615 								break;
1616 						}
1617 					}
1618 				}
1619 				else
1620 				{
1621 					if (!(rex->flags & REG_ICASE))
1622 					{
1623 						for (i = 0; i < m && s < e; i++, s = t)
1624 						{
1625 							t = s;
1626 							if (mbchar(t) != c)
1627 								return r;
1628 						}
1629 						while (i++ <= n)
1630 						{
1631 							switch (follow(env, rex, cont, s))
1632 							{
1633 							case BAD:
1634 								return BAD;
1635 							case CUT:
1636 								return CUT;
1637 							case BEST:
1638 							case GOOD:
1639 								return BEST;
1640 							}
1641 							if (s >= e || mbchar(s) != c)
1642 								break;
1643 						}
1644 					}
1645 					else
1646 					{
1647 						for (i = 0; i < m && s < e; i++, s = t)
1648 						{
1649 							t = s;
1650 							if (towupper(mbchar(t)) != c)
1651 								return r;
1652 						}
1653 						while (i++ <= n)
1654 						{
1655 							switch (follow(env, rex, cont, s))
1656 							{
1657 							case BAD:
1658 								return BAD;
1659 							case CUT:
1660 								return CUT;
1661 							case BEST:
1662 							case GOOD:
1663 								return BEST;
1664 							}
1665 							if (s >= e || towupper(mbchar(s)) != c)
1666 								break;
1667 						}
1668 					}
1669 				}
1670 			}
1671 			return r;
1672 		case REX_REP:
1673 			if (env->stack && pospush(env, rex, s, BEG_REP))
1674 				return BAD;
1675 			r = parserep(env, rex, cont, s, 0);
1676 			if (env->stack)
1677 				pospop(env);
1678 			return r;
1679 		case REX_REP_CATCH:
1680 			DEBUG_TEST(0x0020,(sfprintf(sfstdout, "AHA#%04d 0x%04x %s n %d len %d s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), rex->re.rep_catch.n, s - rex->re.rep_catch.beg, env->end - s, s)),(0));
1681 			if (env->stack && pospush(env, rex, s, END_ANY))
1682 				return BAD;
1683 			if (s == rex->re.rep_catch.beg && rex->re.rep_catch.n > rex->re.rep_catch.ref->lo)
1684 			{
1685 				/*
1686 				 * optional empty iteration
1687 				 */
1688 
1689 DEBUG_TEST(0x0002,(sfprintf(sfstdout, "AHA#%04d %p re.group.back=%d re.group.expr.rex=%s\n", __LINE__, rex->re.rep_catch.ref->re.group.expr.rex, rex->re.rep_catch.ref->re.group.expr.rex->re.group.back, rexname(rex->re.rep_catch.ref->re.group.expr.rex))),(0));
1690 				if (!env->stack || s != rex->re.rep_catch.ref->re.rep_catch.beg && !rex->re.rep_catch.ref->re.group.expr.rex->re.group.back)
1691 					r = NONE;
1692 				else if (pospush(env, rex, s, END_ANY))
1693 					r = BAD;
1694 				else
1695 				{
1696 					r = follow(env, rex, rex->re.rep_catch.cont, s);
1697 					pospop(env);
1698 				}
1699 			}
1700 			else
1701 				r = parserep(env, rex->re.rep_catch.ref, rex->re.rep_catch.cont, s, rex->re.rep_catch.n);
1702 			if (env->stack)
1703 				pospop(env);
1704 			return r;
1705 		case REX_STRING:
1706 DEBUG_TEST(0x0200,(sfprintf(sfstdout,"AHA#%04d 0x%04x parse %s \"%-.*s\" `%-.*s'\n", __LINE__, debug_flag, rexname(rex), rex->re.string.size, rex->re.string.base, env->end - s, s)),(0));
1707 			if (rex->re.string.size > (env->end - s))
1708 				return NONE;
1709 			t = rex->re.string.base;
1710 			e = t + rex->re.string.size;
1711 			if (!(p = rex->map))
1712 			{
1713 				while (t < e)
1714 					if (*s++ != *t++)
1715 						return NONE;
1716 			}
1717 			else if (!mbwide())
1718 			{
1719 				while (t < e)
1720 					if (p[*s++] != *t++)
1721 						return NONE;
1722 			}
1723 			else
1724 			{
1725 				while (t < e)
1726 				{
1727 					c = mbchar(s);
1728 					d = mbchar(t);
1729 					if (towupper(c) != d)
1730 						return NONE;
1731 				}
1732 			}
1733 			break;
1734 		case REX_TRIE:
1735 			if (((s + rex->re.trie.min) > env->end) || !(x = rex->re.trie.root[rex->map ? rex->map[*s] : *s]))
1736 				return NONE;
1737 			return parsetrie(env, x, rex, cont, s);
1738 		case REX_EXEC:
1739 			u = 0;
1740 			r = (*env->disc->re_execf)(env->regex, rex->re.exec.data, rex->re.exec.text, rex->re.exec.size, (const char*)s, env->end - s, &u, env->disc);
1741 			e = (unsigned char*)u;
1742 			if (e >= s && e <= env->end)
1743 				s = e;
1744 			switch (r)
1745 			{
1746 			case 0:
1747 				break;
1748 			case REG_NOMATCH:
1749 				return NONE;
1750 			default:
1751 				env->error = r;
1752 				return BAD;
1753 			}
1754 			break;
1755 		case REX_WBEG:
1756 			if (!isword(*s) || s > env->beg && isword(*(s - 1)))
1757 				return NONE;
1758 			break;
1759 		case REX_WEND:
1760 			if (isword(*s) || s > env->beg && !isword(*(s - 1)))
1761 				return NONE;
1762 			break;
1763 		case REX_WORD:
1764 			if (s > env->beg && isword(*(s - 1)) == isword(*s))
1765 				return NONE;
1766 			break;
1767 		case REX_WORD_NOT:
1768 			if (s == env->beg || isword(*(s - 1)) != isword(*s))
1769 				return NONE;
1770 			break;
1771 		case REX_BEG_STR:
1772 			if (s != env->beg)
1773 				return NONE;
1774 			break;
1775 		case REX_END_STR:
1776 			for (t = s; t < env->end && *t == '\n'; t++);
1777 			if (t < env->end)
1778 				return NONE;
1779 			break;
1780 		case REX_FIN_STR:
1781 			if (s < env->end)
1782 				return NONE;
1783 			break;
1784 		}
1785 		if (!(rex = rex->next))
1786 		{
1787 			if (!(rex = cont))
1788 				break;
1789 			cont = 0;
1790 		}
1791 	}
1792 	return GOOD;
1793 }
1794 
1795 #if _AST_REGEX_DEBUG
1796 
1797 static void
1798 listnode(Rex_t* e, int level)
1799 {
1800 	int	i;
1801 
1802 	if (e)
1803 	{
1804 		do
1805 		{
1806 			for (i = 0; i < level; i++)
1807 				sfprintf(sfstderr, "  ");
1808 			sfprintf(sfstderr, "%s\n", rexname(e));
1809 			switch (e->type)
1810 			{
1811 			case REX_ALT:
1812 			case REX_CONJ:
1813 				listnode(e->re.group.expr.binary.left, level + 1);
1814 				listnode(e->re.group.expr.binary.right, level + 1);
1815 				break;
1816 			case REX_GROUP:
1817 			case REX_GROUP_AHEAD:
1818 			case REX_GROUP_AHEAD_NOT:
1819 			case REX_GROUP_BEHIND:
1820 			case REX_GROUP_BEHIND_NOT:
1821 			case REX_GROUP_CUT:
1822 			case REX_NEG:
1823 			case REX_REP:
1824 				listnode(e->re.group.expr.rex, level + 1);
1825 				break;
1826 			}
1827 		} while (e = e->next);
1828 	}
1829 }
1830 
1831 static int
1832 list(Env_t* env, Rex_t* rex)
1833 {
1834 	sfprintf(sfstderr, "AHA regex hard=%d stack=%p\n", env->hard, env->stack);
1835 	if (rex)
1836 		listnode(rex, 1);
1837 	return 0;
1838 }
1839 
1840 #endif
1841 
1842 /*
1843  * returning REG_BADPAT or REG_ESPACE is not explicitly
1844  * countenanced by the standard
1845  */
1846 
1847 int
1848 regnexec(const regex_t* p, const char* s, size_t len, size_t nmatch, regmatch_t* match, regflags_t flags)
1849 {
1850 	register int	n;
1851 	register int	i;
1852 	int		j;
1853 	int		k;
1854 	int		m;
1855 	int		advance;
1856 	Env_t*		env;
1857 	Rex_t*		e;
1858 
1859 	DEBUG_INIT();
1860 	DEBUG_TEST(0x0001,(sfprintf(sfstdout, "AHA#%04d 0x%04x regnexec %d 0x%08x `%-.*s'\n", __LINE__, debug_flag, nmatch, flags, len, s)),(0));
1861 	if (!p || !(env = p->env))
1862 		return REG_BADPAT;
1863 	if (!s)
1864 		return fatal(env->disc, REG_BADPAT, NiL);
1865 	if (len < env->min)
1866 	{
1867 		DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REG_NOMATCH %d %d\n", __LINE__, len, env->min)),(0));
1868 		return REG_NOMATCH;
1869 	}
1870 	env->regex = p;
1871 	env->beg = (unsigned char*)s;
1872 	env->end = env->beg + len;
1873 	stknew(stkstd, &env->stk);
1874 	env->flags &= ~REG_EXEC;
1875 	env->flags |= (flags & REG_EXEC);
1876 	advance = 0;
1877 	if (env->stack = env->hard || !(env->flags & REG_NOSUB) && nmatch)
1878 	{
1879 		n = env->nsub;
1880 		if (!(env->match = (regmatch_t*)stkpush(stkstd, 2 * (n + 1) * sizeof(regmatch_t))) ||
1881 		    !env->pos && !(env->pos = vecopen(16, sizeof(Pos_t))) ||
1882 		    !env->bestpos && !(env->bestpos = vecopen(16, sizeof(Pos_t))))
1883 		{
1884 			k = REG_ESPACE;
1885 			goto done;
1886 		}
1887 		env->pos->cur = env->bestpos->cur = 0;
1888 		env->best = &env->match[n + 1];
1889 		env->best[0].rm_so = 0;
1890 		env->best[0].rm_eo = -1;
1891 		for (i = 0; i <= n; i++)
1892 			env->match[i] = state.nomatch;
1893 		if (flags & REG_ADVANCE)
1894 			advance = 1;
1895 	}
1896 	DEBUG_TEST(0x1000,(list(env,env->rex)),(0));
1897 	k = REG_NOMATCH;
1898 	if ((e = env->rex)->type == REX_BM)
1899 	{
1900 		DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REX_BM\n", __LINE__)),(0));
1901 		if (len < e->re.bm.right)
1902 		{
1903 			DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REG_NOMATCH %d %d\n", __LINE__, len, e->re.bm.right)),(0));
1904 			goto done;
1905 		}
1906 		else if (!(flags & REG_LEFT))
1907 		{
1908 			register unsigned char*	buf = (unsigned char*)s;
1909 			register size_t		index = e->re.bm.left + e->re.bm.size;
1910 			register size_t		mid = len - e->re.bm.right;
1911 			register size_t*	skip = e->re.bm.skip;
1912 			register size_t*	fail = e->re.bm.fail;
1913 			register Bm_mask_t**	mask = e->re.bm.mask;
1914 			Bm_mask_t		m;
1915 			size_t			x;
1916 
1917 			DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REX_BM len=%d right=%d left=%d size=%d %d %d\n", __LINE__, len, e->re.bm.right, e->re.bm.left, e->re.bm.size, index, mid)),(0));
1918 			for (;;)
1919 			{
1920 				while ((index += skip[buf[index]]) < mid);
1921 				if (index < HIT)
1922 				{
1923 					DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REG_NOMATCH %d %d\n", __LINE__, index, HIT)),(0));
1924 					goto done;
1925 				}
1926 				index -= HIT;
1927 				m = mask[n = e->re.bm.size - 1][buf[index]];
1928 				do
1929 				{
1930 					if (!n--)
1931 					{
1932 						if (e->re.bm.back < 0)
1933 							goto possible;
1934 						if (advance)
1935 						{
1936 							i = index - e->re.bm.back;
1937 							s += i;
1938 							if (env->stack)
1939 								env->best[0].rm_so += i;
1940 							goto possible;
1941 						}
1942 						x = index;
1943 						if (index < e->re.bm.back)
1944 							index = 0;
1945 						else
1946 							index -= e->re.bm.back;
1947 						while (index <= x)
1948 						{
1949 							if ((i = parse(env, e->next, &env->done, buf + index)) != NONE)
1950 							{
1951 								if (env->stack)
1952 									env->best[0].rm_so = index;
1953 								n = env->nsub;
1954 								goto hit;
1955 							}
1956 							index++;
1957 						}
1958 						index += e->re.bm.size;
1959 						break;
1960 					}
1961 				} while (m &= mask[n][buf[--index]]);
1962 				if ((index += fail[n + 1]) >= len)
1963 					goto done;
1964 			}
1965 		}
1966  possible:
1967 		n = env->nsub;
1968 		e = e->next;
1969 	}
1970 	j = env->once || (flags & REG_LEFT);
1971 	DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d parse once=%d\n", __LINE__, j)),(0));
1972 	while ((i = parse(env, e, &env->done, (unsigned char*)s)) == NONE || advance && !env->best[0].rm_eo && !(advance = 0))
1973 	{
1974 		if (j)
1975 			goto done;
1976 		i = MBSIZE(s);
1977 		s += i;
1978 		if ((unsigned char*)s > env->end - env->min)
1979 			goto done;
1980 		if (env->stack)
1981 			env->best[0].rm_so += i;
1982 	}
1983 	if ((flags & REG_LEFT) && env->stack && env->best[0].rm_so)
1984 		goto done;
1985  hit:
1986 	if (k = env->error)
1987 		goto done;
1988 	if (i == CUT)
1989 	{
1990 		k = env->error = REG_NOMATCH;
1991 		goto done;
1992 	}
1993 	if (!(env->flags & REG_NOSUB))
1994 	{
1995 		k = (env->flags & (REG_SHELL|REG_AUGMENTED)) == (REG_SHELL|REG_AUGMENTED);
1996 		for (i = j = m = 0; j < nmatch; i++)
1997 			if (!i || !k || (i & 1))
1998 			{
1999 				if (i > n)
2000 					match[j] = state.nomatch;
2001 				else
2002 					match[m = j] = env->best[i];
2003 				j++;
2004 			}
2005 		if (k)
2006 		{
2007 			while (m > 0 && match[m].rm_so == -1 && match[m].rm_eo == -1)
2008 				m--;
2009 			((regex_t*)p)->re_nsub = m;
2010 		}
2011 	}
2012 	k = 0;
2013  done:
2014 	stkold(stkstd, &env->stk);
2015 	env->stk.base = 0;
2016 	if (k > REG_NOMATCH)
2017 		fatal(p->env->disc, k, NiL);
2018 	return k;
2019 }
2020 
2021 void
2022 regfree(regex_t* p)
2023 {
2024 	Env_t*	env;
2025 
2026 	if (p && (env = p->env))
2027 	{
2028 #if _REG_subcomp
2029 		if (env->sub)
2030 		{
2031 			regsubfree(p);
2032 			p->re_sub = 0;
2033 		}
2034 #endif
2035 		p->env = 0;
2036 		if (--env->refs <= 0 && !(env->disc->re_flags & REG_NOFREE))
2037 		{
2038 			drop(env->disc, env->rex);
2039 			if (env->pos)
2040 				vecclose(env->pos);
2041 			if (env->bestpos)
2042 				vecclose(env->bestpos);
2043 			if (env->stk.base)
2044 				stkold(stkstd, &env->stk);
2045 			alloc(env->disc, env, 0);
2046 		}
2047 	}
2048 }
2049