xref: /illumos-gate/usr/src/contrib/ast/src/lib/libast/regex/regnexec.c (revision fec047081731fd77caf46ec0471c501b2cb33894)
1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *          Copyright (c) 1985-2012 AT&T Intellectual Property          *
5 *                      and is licensed under the                       *
6 *                 Eclipse Public License, Version 1.0                  *
7 *                    by AT&T Intellectual Property                     *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *          http://www.eclipse.org/org/documents/epl-v10.html           *
11 *         (with md5 checksum b35adb5213ca9657e911e9befb180842)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *                 Glenn Fowler <gsf@research.att.com>                  *
18 *                  David Korn <dgk@research.att.com>                   *
19 *                   Phong Vo <kpv@research.att.com>                    *
20 *                                                                      *
21 ***********************************************************************/
22 #pragma prototyped
23 
24 /*
25  * posix regex executor
26  * single sized-record interface
27  */
28 
29 #include "reglib.h"
30 
31 #if _AST_REGEX_DEBUG
32 
33 #define DEBUG_TEST(f,y,n)	((debug&(debug_flag=f))?(y):(n))
34 #define DEBUG_CODE(f,y,n)	do if(debug&(f)){y}else{n} while(0)
35 #define DEBUG_INIT()		do { char* t; if (!debug) { debug = 0x80000000; if (t = getenv("_AST_regex_exec_debug")) debug |= strtoul(t, NiL, 0); } } while (0)
36 
37 static unsigned long	debug;
38 static unsigned long	debug_flag;
39 
40 static const char*	rexnames[] =
41 {
42 	"REX_NULL",
43 	"REX_ALT",
44 	"REX_ALT_CATCH",
45 	"REX_BACK",
46 	"REX_BEG",
47 	"REX_BEG_STR",
48 	"REX_BM",
49 	"REX_CAT",
50 	"REX_CLASS",
51 	"REX_COLL_CLASS",
52 	"REX_CONJ",
53 	"REX_CONJ_LEFT",
54 	"REX_CONJ_RIGHT",
55 	"REX_DONE",
56 	"REX_DOT",
57 	"REX_END",
58 	"REX_END_STR",
59 	"REX_EXEC",
60 	"REX_FIN_STR",
61 	"REX_GROUP",
62 	"REX_GROUP_CATCH",
63 	"REX_GROUP_AHEAD",
64 	"REX_GROUP_AHEAD_CATCH",
65 	"REX_GROUP_AHEAD_NOT",
66 	"REX_GROUP_BEHIND",
67 	"REX_GROUP_BEHIND_CATCH",
68 	"REX_GROUP_BEHIND_NOT",
69 	"REX_GROUP_BEHIND_NOT_CATCH",
70 	"REX_GROUP_COND",
71 	"REX_GROUP_COND_CATCH",
72 	"REX_GROUP_CUT",
73 	"REX_GROUP_CUT_CATCH",
74 	"REX_KMP",
75 	"REX_NEG",
76 	"REX_NEG_CATCH",
77 	"REX_NEST",
78 	"REX_ONECHAR",
79 	"REX_REP",
80 	"REX_REP_CATCH",
81 	"REX_STRING",
82 	"REX_TRIE",
83 	"REX_WBEG",
84 	"REX_WEND",
85 	"REX_WORD",
86 	"REX_WORD_NOT"
87 };
88 
89 static const char* rexname(Rex_t* rex)
90 {
91 	if (!rex)
92 		return "NIL";
93 	if (rex->type >= elementsof(rexnames))
94 		return "ERROR";
95 	return rexnames[rex->type];
96 }
97 
98 #else
99 
100 #define DEBUG_INIT()
101 #define DEBUG_TEST(f,y,n)	(n)
102 #define DEBUG_CODE(f,y,n)	do {n} while(0)
103 
104 #endif
105 
106 #define BEG_ALT		1	/* beginning of an alt			*/
107 #define BEG_ONE		2	/* beginning of one iteration of a rep	*/
108 #define BEG_REP		3	/* beginning of a repetition		*/
109 #define BEG_SUB		4	/* beginning of a subexpression		*/
110 #define END_ANY		5	/* end of any of above			*/
111 
112 /*
113  * returns from parse()
114  */
115 
116 #define NONE		0	/* no parse found			*/
117 #define GOOD		1	/* some parse was found			*/
118 #define CUT		2	/* no match and no backtrack		*/
119 #define BEST		3	/* an unbeatable parse was found	*/
120 #define BAD		4	/* error ocurred			*/
121 
122 /*
123  * REG_SHELL_DOT test
124  */
125 
126 #define LEADING(e,r,s)	(*(s)==(e)->leading&&((s)==(e)->beg||*((s)-1)==(r)->explicit))
127 
128 /*
129  * Pos_t is for comparing parses. An entry is made in the
130  * array at the beginning and at the end of each Group_t,
131  * each iteration in a Group_t, and each Binary_t.
132  */
133 
134 typedef struct
135 {
136 	unsigned char*	p;		/* where in string		*/
137 	size_t		length;		/* length in string		*/
138 	short		serial;		/* preorder subpattern number	*/
139 	short		be;		/* which end of pair		*/
140 } Pos_t;
141 
142 /* ===== begin library support ===== */
143 
144 #define vector(t,v,i)	(((i)<(v)->max)?(t*)((v)->vec+(i)*(v)->siz):(t*)vecseek(&(v),i))
145 
146 static Vector_t*
147 vecopen(int inc, int siz)
148 {
149 	Vector_t*	v;
150 	Stk_t*		sp;
151 
152 	if (inc <= 0)
153 		inc = 16;
154 	if (!(sp = stkopen(STK_SMALL|STK_NULL)))
155 		return 0;
156 	if (!(v = (Vector_t*)stkseek(sp, sizeof(Vector_t) + inc * siz)))
157 	{
158 		stkclose(sp);
159 		return 0;
160 	}
161 	v->stk = sp;
162 	v->vec = (char*)v + sizeof(Vector_t);
163 	v->max = v->inc = inc;
164 	v->siz = siz;
165 	v->cur = 0;
166 	return v;
167 }
168 
169 static void*
170 vecseek(Vector_t** p, int index)
171 {
172 	Vector_t*	v = *p;
173 
174 	if (index >= v->max)
175 	{
176 		while ((v->max += v->inc) <= index);
177 		if (!(v = (Vector_t*)stkseek(v->stk, sizeof(Vector_t) + v->max * v->siz)))
178 			return 0;
179 		*p = v;
180 		v->vec = (char*)v + sizeof(Vector_t);
181 	}
182 	return v->vec + index * v->siz;
183 }
184 
185 static void
186 vecclose(Vector_t* v)
187 {
188 	if (v)
189 		stkclose(v->stk);
190 }
191 
192 typedef struct
193 {
194 	Stk_pos_t	pos;
195 	char		data[1];
196 } Stk_frame_t;
197 
198 #define stknew(s,p)	((p)->offset=stktell(s),(p)->base=stkfreeze(s,0))
199 #define stkold(s,p)	stkset(s,(p)->base,(p)->offset)
200 
201 #define stkframe(s)	(*((Stk_frame_t**)stktop(s)-1))
202 #define stkdata(s,t)	((t*)stkframe(s)->data)
203 #define stkpop(s)	stkold(s,&(stkframe(s)->pos))
204 
205 static void*
206 stkpush(Stk_t* sp, size_t size)
207 {
208 	Stk_frame_t*	f;
209 	Stk_pos_t	p;
210 
211 	stknew(sp, &p);
212 	size = sizeof(Stk_frame_t) + sizeof(size_t) + size - 1;
213 	if (!(f = (Stk_frame_t*)stkalloc(sp, sizeof(Stk_frame_t) + sizeof(Stk_frame_t*) + size - 1)))
214 		return 0;
215 	f->pos = p;
216 	stkframe(sp) = f;
217 	return f->data;
218 }
219 
220 /* ===== end library support ===== */
221 
222 /*
223  * Match_frame_t is for saving and restoring match records
224  * around alternate attempts, so that fossils will not be
225  * left in the match array.  These are the only entries in
226  * the match array that are not otherwise guaranteed to
227  * have current data in them when they get used.
228  */
229 
230 typedef struct
231 {
232 	size_t			size;
233 	regmatch_t*		match;
234 	regmatch_t		save[1];
235 } Match_frame_t;
236 
237 #define matchpush(e,x)	((x)->re.group.number?_matchpush(e,x):0)
238 #define matchcopy(e,x)	do if ((x)->re.group.number) { Match_frame_t* fp = (void*)stkframe(stkstd)->data; memcpy(fp->match, fp->save, fp->size); } while (0)
239 #define matchpop(e,x)	do if ((x)->re.group.number) { Match_frame_t* fp = (void*)stkframe(stkstd)->data; memcpy(fp->match, fp->save, fp->size); stkpop(stkstd); } while (0)
240 
241 #define pospop(e)	(--(e)->pos->cur)
242 
243 /*
244  * allocate a frame and push a match onto the stack
245  */
246 
247 static int
248 _matchpush(Env_t* env, Rex_t* rex)
249 {
250 	Match_frame_t*	f;
251 	regmatch_t*	m;
252 	regmatch_t*	e;
253 	regmatch_t*	s;
254 	int		num;
255 
256 	if (rex->re.group.number <= 0 || (num = rex->re.group.last - rex->re.group.number + 1) <= 0)
257 		num = 0;
258 	if (!(f = (Match_frame_t*)stkpush(stkstd, sizeof(Match_frame_t) + (num - 1) * sizeof(regmatch_t))))
259 	{
260 		env->error = REG_ESPACE;
261 		return 1;
262 	}
263 	f->size = num * sizeof(regmatch_t);
264 	f->match = m = env->match + rex->re.group.number;
265 	e = m + num;
266 	s = f->save;
267 	while (m < e)
268 	{
269 		*s++ = *m;
270 		*m++ = state.nomatch;
271 	}
272 	return 0;
273 }
274 
275 /*
276  * allocate a frame and push a pos onto the stack
277  */
278 
279 static int
280 pospush(Env_t* env, Rex_t* rex, unsigned char* p, int be)
281 {
282 	Pos_t*	pos;
283 
284 	if (!(pos = vector(Pos_t, env->pos, env->pos->cur)))
285 	{
286 		env->error = REG_ESPACE;
287 		return 1;
288 	}
289 	pos->serial = rex->serial;
290 	pos->p = p;
291 	pos->be = be;
292 	env->pos->cur++;
293 	return 0;
294 }
295 
296 /*
297  * two matches are known to have the same length
298  * os is start of old pos array, ns is start of new,
299  * oend and nend are end+1 pointers to ends of arrays.
300  * oe and ne are ends (not end+1) of subarrays.
301  * returns 1 if new is better, -1 if old, else 0.
302  */
303 
304 static int
305 better(Env_t* env, Pos_t* os, Pos_t* ns, Pos_t* oend, Pos_t* nend, int level)
306 {
307 	Pos_t*	oe;
308 	Pos_t*	ne;
309 	int	k;
310 	int	n;
311 
312 	if (env->error)
313 		return -1;
314 	for (;;)
315 	{
316 		DEBUG_CODE(0x0080,{sfprintf(sfstdout, "   %-*.*sold ", (level + 3) * 4, (level + 3) * 4, "");for (oe = os; oe < oend; oe++)sfprintf(sfstdout, "<%d,%d,%d>", oe->p - env->beg, oe->serial, oe->be);sfprintf(sfstdout, "\n   %-*.*snew ", (level + 3) * 4, (level + 3) * 4, "");for (oe = ns; oe < nend; oe++)sfprintf(sfstdout, "<%d,%d,%d>", oe->p - env->beg, oe->serial, oe->be);sfprintf(sfstdout, "\n");},{0;});
317 		if (ns >= nend)
318 			return DEBUG_TEST(0x8000,(os < oend),(0));
319 		if (os >= oend)
320 			return DEBUG_TEST(0x8000,(-1),(1));
321 		n = os->serial;
322 		if (ns->serial > n)
323 			return -1;
324 		if (n > ns->serial)
325 		{
326 			env->error = REG_PANIC;
327 			return -1;
328 		}
329 		if (ns->p > os->p)
330 			return 1;
331 		if (os->p > ns->p)
332 			return -1;
333 		oe = os;
334 		k = 0;
335 		for (;;)
336 			if ((++oe)->serial == n)
337 			{
338 				if (oe->be != END_ANY)
339 					k++;
340 				else if (k-- <= 0)
341 					break;
342 			}
343 		ne = ns;
344 		k = 0;
345 		for (;;)
346 			if ((++ne)->serial == n)
347 			{
348 				if (ne->be != END_ANY)
349 					k++;
350 				else if (k-- <= 0)
351 					break;
352 			}
353 		if (ne->p > oe->p)
354 			return 1;
355 		if (oe->p > ne->p)
356 			return -1;
357 		if (k = better(env, os + 1, ns + 1, oe, ne, level + 1))
358 			return k;
359 		os = oe + 1;
360 		ns = ne + 1;
361 	}
362 }
363 
364 #if _AST_REGEX_DEBUG
365 
366 static void
367 showmatch(regmatch_t* p)
368 {
369 	sfputc(sfstdout, '(');
370 	if (p->rm_so < 0)
371 		sfputc(sfstdout, '?');
372 	else
373 		sfprintf(sfstdout, "%z", p->rm_so);
374 	sfputc(sfstdout, ',');
375 	if (p->rm_eo < 0)
376 		sfputc(sfstdout, '?');
377 	else
378 		sfprintf(sfstdout, "%z", p->rm_eo);
379 	sfputc(sfstdout, ')');
380 }
381 
382 static int
383 _better(Env_t* env, Pos_t* os, Pos_t* ns, Pos_t* oend, Pos_t* nend, int level)
384 {
385 	int	i;
386 
387 	DEBUG_CODE(0x0040,{sfprintf(sfstdout, "AHA better old ");for (i = 0; i <= env->nsub; i++)showmatch(&env->best[i]);sfprintf(sfstdout, "\n           new ");for (i = 0; i <= env->nsub; i++)showmatch(&env->match[i]);sfprintf(sfstdout, "\n");},{0;});
388 	i = better(env, os, ns, oend, nend, 0);
389 	DEBUG_TEST(0x0040,(sfprintf(sfstdout, "           %s\n", i <= 0 ? "OLD" : "NEW")),(0));
390 	return i;
391 }
392 
393 #define better	_better
394 
395 #endif
396 
397 #define follow(e,r,c,s)	((r)->next?parse(e,(r)->next,c,s):(c)?parse(e,c,0,s):BEST)
398 
399 static int		parse(Env_t*, Rex_t*, Rex_t*, unsigned char*);
400 
401 static int
402 parserep(Env_t* env, Rex_t* rex, Rex_t* cont, unsigned char* s, int n)
403 {
404 	int	i;
405 	int	r = NONE;
406 	Rex_t	catcher;
407 
408 	DEBUG_TEST(0x0010,(sfprintf(sfstdout, "AHA#%04d 0x%04x parserep %s %d %d %d %d `%-.*s'\n", __LINE__, debug_flag, rexname(rex->re.group.expr.rex), rex->re.group.number, rex->lo, n, rex->hi, env->end - s, s)),(0));
409 	if ((rex->flags & REG_MINIMAL) && n >= rex->lo && n < rex->hi)
410 	{
411 		if (env->stack && pospush(env, rex, s, END_ANY))
412 			return BAD;
413 		i = follow(env, rex, cont, s);
414 		if (env->stack)
415 			pospop(env);
416 		switch (i)
417 		{
418 		case BAD:
419 			return BAD;
420 		case CUT:
421 			return CUT;
422 		case BEST:
423 		case GOOD:
424 			return BEST;
425 		}
426 	}
427 	if (n < rex->hi)
428 	{
429 		catcher.type = REX_REP_CATCH;
430 		catcher.serial = rex->serial;
431 		catcher.re.rep_catch.ref = rex;
432 		catcher.re.rep_catch.cont = cont;
433 		catcher.re.rep_catch.beg = s;
434 		catcher.re.rep_catch.n = n + 1;
435 		catcher.next = rex->next;
436 		if (n == 0)
437 			rex->re.rep_catch.beg = s;
438 		if (env->stack)
439 		{
440 			if (matchpush(env, rex))
441 				return BAD;
442 			if (pospush(env, rex, s, BEG_ONE))
443 				return BAD;
444 DEBUG_TEST(0x0004,(sfprintf(sfstdout,"AHA#%04d 0x%04x PUSH %d   (%z,%z)(%z,%z)(%z,%z) (%z,%z)(%z,%z)(%z,%z)\n", __LINE__, debug_flag, rex->re.group.number, env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo, env->match[2].rm_so, env->match[2].rm_eo)),(0));
445 		}
446 		r = parse(env, rex->re.group.expr.rex, &catcher, s);
447 		DEBUG_TEST(0x0010,(sfprintf(sfstdout, "AHA#%04d 0x%04x parserep parse %d %d `%-.*s'\n", __LINE__, debug_flag, rex->re.group.number, r, env->end - s, s)),(0));
448 		if (env->stack)
449 		{
450 			pospop(env);
451 			matchpop(env, rex);
452 DEBUG_TEST(0x0004,(sfprintf(sfstdout,"AHA#%04d 0x%04x POP  %d %d (%z,%z)(%z,%z)(%z,%z) (%z,%z)(%z,%z)(%z,%z)\n", __LINE__, debug_flag, rex->re.group.number, r, env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo, env->match[2].rm_so, env->match[2].rm_eo)),(0));
453 		}
454 		switch (r)
455 		{
456 		case BAD:
457 			return BAD;
458 		case BEST:
459 			return BEST;
460 		case CUT:
461 			r = NONE;
462 			break;
463 		case GOOD:
464 			if (rex->flags & REG_MINIMAL)
465 				return BEST;
466 			r = GOOD;
467 			break;
468 		}
469 	}
470 	if (n < rex->lo)
471 		return r;
472 	if (!(rex->flags & REG_MINIMAL) || n >= rex->hi)
473 	{
474 		if (env->stack && pospush(env, rex, s, END_ANY))
475 			return BAD;
476 		i = follow(env, rex, cont, s);
477 		if (env->stack)
478 			pospop(env);
479 		switch (i)
480 		{
481 		case BAD:
482 			r = BAD;
483 			break;
484 		case CUT:
485 			r = CUT;
486 			break;
487 		case BEST:
488 			r = BEST;
489 			break;
490 		case GOOD:
491 			r = (rex->flags & REG_MINIMAL) ? BEST : GOOD;
492 			break;
493 		}
494 	}
495 	return r;
496 }
497 
498 static int
499 parsetrie(Env_t* env, Trie_node_t* x, Rex_t* rex, Rex_t* cont, unsigned char* s)
500 {
501 	unsigned char*	p;
502 	int		r;
503 
504 	if (p = rex->map)
505 	{
506 		for (;;)
507 		{
508 			if (s >= env->end)
509 				return NONE;
510 			while (x->c != p[*s])
511 				if (!(x = x->sib))
512 					return NONE;
513 			if (x->end)
514 				break;
515 			x = x->son;
516 			s++;
517 		}
518 	}
519 	else
520 	{
521 		for (;;)
522 		{
523 			if (s >= env->end)
524 				return NONE;
525 			while (x->c != *s)
526 				if (!(x = x->sib))
527 					return NONE;
528 			if (x->end)
529 				break;
530 			x = x->son;
531 			s++;
532 		}
533 	}
534 	s++;
535 	if (rex->flags & REG_MINIMAL)
536 		switch (follow(env, rex, cont, s))
537 		{
538 		case BAD:
539 			return BAD;
540 		case CUT:
541 			return CUT;
542 		case BEST:
543 		case GOOD:
544 			return BEST;
545 		}
546 	if (x->son)
547 		switch (parsetrie(env, x->son, rex, cont, s))
548 		{
549 		case BAD:
550 			return BAD;
551 		case CUT:
552 			return CUT;
553 		case BEST:
554 			return BEST;
555 		case GOOD:
556 			if (rex->flags & REG_MINIMAL)
557 				return BEST;
558 			r = GOOD;
559 			break;
560 		default:
561 			r = NONE;
562 			break;
563 		}
564 	else
565 		r = NONE;
566 	if (!(rex->flags & REG_MINIMAL))
567 		switch (follow(env, rex, cont, s))
568 		{
569 		case BAD:
570 			return BAD;
571 		case CUT:
572 			return CUT;
573 		case BEST:
574 			return BEST;
575 		case GOOD:
576 			return GOOD;
577 	}
578 	return r;
579 }
580 
581 static int
582 collelt(register Celt_t* ce, char* key, int c, int x)
583 {
584 	Ckey_t	elt;
585 
586 	mbxfrm(elt, key, COLL_KEY_MAX);
587 	for (;; ce++)
588 	{
589 		switch (ce->typ)
590 		{
591 		case COLL_call:
592 			if (!x && (*ce->fun)(c))
593 				return 1;
594 			continue;
595 		case COLL_char:
596 			if (!strcmp((char*)ce->beg, (char*)elt))
597 				return 1;
598 			continue;
599 		case COLL_range:
600 			if (strcmp((char*)ce->beg, (char*)elt) <= ce->min && strcmp((char*)elt, (char*)ce->end) <= ce->max)
601 				return 1;
602 			continue;
603 		case COLL_range_lc:
604 			if (strcmp((char*)ce->beg, (char*)elt) <= ce->min && strcmp((char*)elt, (char*)ce->end) <= ce->max && (iswlower(c) || !iswupper(c)))
605 				return 1;
606 			continue;
607 		case COLL_range_uc:
608 			if (strcmp((char*)ce->beg, (char*)elt) <= ce->min && strcmp((char*)elt, (char*)ce->end) <= ce->max && (iswupper(c) || !iswlower(c)))
609 				return 1;
610 			continue;
611 		}
612 		break;
613 	}
614 	return 0;
615 }
616 
617 static int
618 collic(register Celt_t* ce, char* key, register char* nxt, int c, int x)
619 {
620 	if (!x)
621 	{
622 		if (collelt(ce, key, c, x))
623 			return 1;
624 		if (iswlower(c))
625 			c = towupper(c);
626 		else if (iswupper(c))
627 			c = towlower(c);
628 		else
629 			return 0;
630 		x = mbconv(key, c);
631 		key[x] = 0;
632 		return collelt(ce, key, c, 0);
633 	}
634 	while (*nxt)
635 	{
636 		if (collic(ce, key, nxt + 1, c, x))
637 			return 1;
638 		if (islower(*nxt))
639 			*nxt = toupper(*nxt);
640 		else if (isupper(*nxt))
641 			*nxt = tolower(*nxt);
642 		else
643 			return 0;
644 		nxt++;
645 	}
646 	return collelt(ce, key, c, x);
647 }
648 
649 static int
650 collmatch(Rex_t* rex, unsigned char* s, unsigned char* e, unsigned char** p)
651 {
652 	unsigned char*		t;
653 	wchar_t			c;
654 	int			w;
655 	int			r;
656 	int			x;
657 	int			ic;
658 	Ckey_t			key;
659 	Ckey_t			elt;
660 
661 	ic = (rex->flags & REG_ICASE);
662 	if ((w = MBSIZE(s)) > 1)
663 	{
664 		memcpy((char*)key, (char*)s, w);
665 		key[w] = 0;
666 		t = s;
667 		c = mbchar(t);
668 #if !_lib_wctype
669 		c &= 0xff;
670 #endif
671 		x = 0;
672 	}
673 	else
674 	{
675 		c = s[0];
676 		if (ic && isupper(c))
677 			c = tolower(c);
678 		key[0] = c;
679 		key[1] = 0;
680 		if (isalpha(c))
681 		{
682 			x = e - s;
683 			if (x > COLL_KEY_MAX)
684 				x = COLL_KEY_MAX;
685 			while (w < x)
686 			{
687 				c = s[w];
688 				if (!isalpha(c))
689 					break;
690 				r = mbxfrm(elt, key, COLL_KEY_MAX);
691 				if (ic && isupper(c))
692 					c = tolower(c);
693 				key[w] = c;
694 				key[w + 1] = 0;
695 				if (mbxfrm(elt, key, COLL_KEY_MAX) != r)
696 					break;
697 				w++;
698 			}
699 		}
700 		key[w] = 0;
701 		c = key[0];
702 		x = w - 1;
703 	}
704 	r = 1;
705 	for (;;)
706 	{
707 		if (ic ? collic(rex->re.collate.elements, (char*)key, (char*)key, c, x) : collelt(rex->re.collate.elements, (char*)key, c, x))
708 			break;
709 		if (!x)
710 		{
711 			r = 0;
712 			break;
713 		}
714 		w = x--;
715 		key[w] = 0;
716 	}
717 	*p = s + w;
718 	return rex->re.collate.invert ? !r : r;
719 }
720 
721 static unsigned char*
722 nestmatch(register unsigned char* s, register unsigned char* e, const unsigned short* type, register int co)
723 {
724 	register int	c;
725 	register int	cc;
726 	unsigned int	n;
727 	int		oc;
728 
729 	if (type[co] & (REX_NEST_literal|REX_NEST_quote))
730 	{
731 		n = (type[co] & REX_NEST_literal) ? REX_NEST_terminator : (REX_NEST_escape|REX_NEST_terminator);
732 		while (s < e)
733 		{
734 			c = *s++;
735 			if (c == co)
736 				return s;
737 			else if (type[c] & n)
738 			{
739 				if (s >= e || (type[c] & REX_NEST_terminator))
740 					break;
741 				s++;
742 			}
743 		}
744 	}
745 	else
746 	{
747 		cc = type[co] >> REX_NEST_SHIFT;
748 		oc = type[co] & (REX_NEST_open|REX_NEST_close);
749 		n = 1;
750 		while (s < e)
751 		{
752 			c = *s++;
753 			switch (type[c] & (REX_NEST_escape|REX_NEST_open|REX_NEST_close|REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator))
754 			{
755 			case REX_NEST_delimiter:
756 			case REX_NEST_terminator:
757 				return oc ? 0 : s;
758 			case REX_NEST_separator:
759 				if (!oc)
760 					return s;
761 				break;
762 			case REX_NEST_escape:
763 				if (s >= e)
764 					return 0;
765 				s++;
766 				break;
767 			case REX_NEST_open|REX_NEST_close:
768 				if (c == cc)
769 				{
770 					if (!--n)
771 						return s;
772 				}
773 				/*FALLTHROUGH*/
774 			case REX_NEST_open:
775 				if (c == co)
776 				{
777 					if (!++n)
778 						return 0;
779 				}
780 				else if (!(s = nestmatch(s, e, type, c)))
781 					return 0;
782 				break;
783 			case REX_NEST_close:
784 				if (c != cc)
785 					return 0;
786 				if (!--n)
787 					return s;
788 				break;
789 			}
790 		}
791 		return (oc || !(type[UCHAR_MAX+1] & REX_NEST_terminator)) ? 0 : s;
792 	}
793 	return 0;
794 }
795 
796 static int
797 parse(Env_t* env, Rex_t* rex, Rex_t* cont, unsigned char* s)
798 {
799 	int		c;
800 	int		d;
801 	int		m;
802 	int		r;
803 	ssize_t		i;
804 	ssize_t		n;
805 	int*		f;
806 	unsigned char*	p;
807 	unsigned char*	t;
808 	unsigned char*	b;
809 	unsigned char*	e;
810 	char*		u;
811 	regmatch_t*	o;
812 	Trie_node_t*	x;
813 	Rex_t*		q;
814 	Rex_t		catcher;
815 	Rex_t		next;
816 
817 	for (;;)
818 	{
819 DEBUG_TEST(0x0008,(sfprintf(sfstdout, "AHA#%04d 0x%04x parse %s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), env->end - s, s)),(0));
820 		switch (rex->type)
821 		{
822 		case REX_ALT:
823 			if (env->stack)
824 			{
825 				if (matchpush(env, rex))
826 					return BAD;
827 				if (pospush(env, rex, s, BEG_ALT))
828 					return BAD;
829 				catcher.type = REX_ALT_CATCH;
830 				catcher.serial = rex->serial;
831 				catcher.re.alt_catch.cont = cont;
832 				catcher.next = rex->next;
833 				r = parse(env, rex->re.group.expr.binary.left, &catcher, s);
834 				if (r < BEST || (rex->flags & REG_MINIMAL))
835 				{
836 					matchcopy(env, rex);
837 					((Pos_t*)env->pos->vec + env->pos->cur - 1)->serial = catcher.serial = rex->re.group.expr.binary.serial;
838 					n = parse(env, rex->re.group.expr.binary.right, &catcher, s);
839 					if (n != NONE)
840 						r = n;
841 				}
842 				pospop(env);
843 				matchpop(env, rex);
844 			}
845 			else
846 			{
847 				if ((r = parse(env, rex->re.group.expr.binary.left, cont, s)) == NONE)
848 					r = parse(env, rex->re.group.expr.binary.right, cont, s);
849 				if (r == GOOD)
850 					r = BEST;
851 			}
852 			return r;
853 		case REX_ALT_CATCH:
854 			if (pospush(env, rex, s, END_ANY))
855 				return BAD;
856 			r = follow(env, rex, rex->re.alt_catch.cont, s);
857 			pospop(env);
858 			return r;
859 		case REX_BACK:
860 			o = &env->match[rex->lo];
861 			if (o->rm_so < 0)
862 				return NONE;
863 			i = o->rm_eo - o->rm_so;
864 			e = s + i;
865 			if (e > env->end)
866 				return NONE;
867 			t = env->beg + o->rm_so;
868 			if (!(p = rex->map))
869 			{
870 				while (s < e)
871 					if (*s++ != *t++)
872 						return NONE;
873 			}
874 			else if (!mbwide())
875 			{
876 				while (s < e)
877 					if (p[*s++] != p[*t++])
878 						return NONE;
879 			}
880 			else
881 			{
882 				while (s < e)
883 				{
884 					c = mbchar(s);
885 					d = mbchar(t);
886 					if (towupper(c) != towupper(d))
887 						return NONE;
888 				}
889 			}
890 			break;
891 		case REX_BEG:
892 			if ((!(rex->flags & REG_NEWLINE) || s <= env->beg || *(s - 1) != '\n') && ((env->flags & REG_NOTBOL) || s != env->beg))
893 				return NONE;
894 			break;
895 		case REX_CLASS:
896 			if (LEADING(env, rex, s))
897 				return NONE;
898 			n = rex->hi;
899 			if (n > env->end - s)
900 				n = env->end - s;
901 			m = rex->lo;
902 			if (m > n)
903 				return NONE;
904 			r = NONE;
905 			if (!(rex->flags & REG_MINIMAL))
906 			{
907 				for (i = 0; i < n; i++)
908 					if (!settst(rex->re.charclass, s[i]))
909 					{
910 						n = i;
911 						break;
912 					}
913 				for (s += n; n-- >= m; s--)
914 					switch (follow(env, rex, cont, s))
915 					{
916 					case BAD:
917 						return BAD;
918 					case CUT:
919 						return CUT;
920 					case BEST:
921 						return BEST;
922 					case GOOD:
923 						r = GOOD;
924 						break;
925 					}
926 			}
927 			else
928 			{
929 				for (e = s + m; s < e; s++)
930 					if (!settst(rex->re.charclass, *s))
931 						return r;
932 				e += n - m;
933 				for (;;)
934 				{
935 					switch (follow(env, rex, cont, s))
936 					{
937 					case BAD:
938 						return BAD;
939 					case CUT:
940 						return CUT;
941 					case BEST:
942 					case GOOD:
943 						return BEST;
944 					}
945 					if (s >= e || !settst(rex->re.charclass, *s))
946 						break;
947 					s++;
948 				}
949 			}
950 			return r;
951 		case REX_COLL_CLASS:
952 			if (LEADING(env, rex, s))
953 				return NONE;
954 			n = rex->hi;
955 			if (n > env->end - s)
956 				n = env->end - s;
957 			m = rex->lo;
958 			if (m > n)
959 				return NONE;
960 			r = NONE;
961 			e = env->end;
962 			if (!(rex->flags & REG_MINIMAL))
963 			{
964 				if (!(b = (unsigned char*)stkpush(stkstd, n)))
965 				{
966 					env->error = REG_ESPACE;
967 					return BAD;
968 				}
969 				for (i = 0; s < e && i < n && collmatch(rex, s, e, &t); i++)
970 				{
971 					b[i] = t - s;
972 					s = t;
973 				}
974 				for (; i-- >= rex->lo; s -= b[i])
975 					switch (follow(env, rex, cont, s))
976 					{
977 					case BAD:
978 						stkpop(stkstd);
979 						return BAD;
980 					case CUT:
981 						stkpop(stkstd);
982 						return CUT;
983 					case BEST:
984 						stkpop(stkstd);
985 						return BEST;
986 					case GOOD:
987 						r = GOOD;
988 						break;
989 					}
990 				stkpop(stkstd);
991 			}
992 			else
993 			{
994 				for (i = 0; i < m && s < e; i++, s = t)
995 					if (!collmatch(rex, s, e, &t))
996 						return r;
997 				while (i++ <= n)
998 				{
999 					switch (follow(env, rex, cont, s))
1000 					{
1001 					case BAD:
1002 						return BAD;
1003 					case CUT:
1004 						return CUT;
1005 					case BEST:
1006 					case GOOD:
1007 						return BEST;
1008 					}
1009 					if (s >= e || !collmatch(rex, s, e, &s))
1010 						break;
1011 				}
1012 			}
1013 			return r;
1014 		case REX_CONJ:
1015 			next.type = REX_CONJ_RIGHT;
1016 			next.re.conj_right.cont = cont;
1017 			next.next = rex->next;
1018 			catcher.type = REX_CONJ_LEFT;
1019 			catcher.re.conj_left.right = rex->re.group.expr.binary.right;
1020 			catcher.re.conj_left.cont = &next;
1021 			catcher.re.conj_left.beg = s;
1022 			catcher.next = 0;
1023 			return parse(env, rex->re.group.expr.binary.left, &catcher, s);
1024 		case REX_CONJ_LEFT:
1025 			rex->re.conj_left.cont->re.conj_right.end = s;
1026 			cont = rex->re.conj_left.cont;
1027 			s = rex->re.conj_left.beg;
1028 			rex = rex->re.conj_left.right;
1029 			continue;
1030 		case REX_CONJ_RIGHT:
1031 			if (rex->re.conj_right.end != s)
1032 				return NONE;
1033 			cont = rex->re.conj_right.cont;
1034 			break;
1035 		case REX_DONE:
1036 			if (!env->stack)
1037 				return BEST;
1038 			n = s - env->beg;
1039 			r = env->nsub;
1040 			DEBUG_TEST(0x0100,(sfprintf(sfstdout,"AHA#%04d 0x%04x %s (%z,%z)(%z,%z)(%z,%z)(%z,%z) (%z,%z)(%z,%z)\n", __LINE__, debug_flag, rexname(rex), env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->best[3].rm_so, env->best[3].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
1041 			if ((i = env->best[0].rm_eo) >= 0)
1042 			{
1043 				if (rex->flags & REG_MINIMAL)
1044 				{
1045 					if (n > i)
1046 						return GOOD;
1047 				}
1048 				else
1049 				{
1050 					if (n < i)
1051 						return GOOD;
1052 				}
1053 				if (n == i && better(env,
1054 						     (Pos_t*)env->bestpos->vec,
1055 				   		     (Pos_t*)env->pos->vec,
1056 				   		     (Pos_t*)env->bestpos->vec+env->bestpos->cur,
1057 				   		     (Pos_t*)env->pos->vec+env->pos->cur,
1058 						     0) <= 0)
1059 					return GOOD;
1060 			}
1061 			env->best[0].rm_eo = n;
1062 			memcpy(&env->best[1], &env->match[1], r * sizeof(regmatch_t));
1063 			n = env->pos->cur;
1064 			if (!vector(Pos_t, env->bestpos, n))
1065 			{
1066 				env->error = REG_ESPACE;
1067 				return BAD;
1068 			}
1069 			env->bestpos->cur = n;
1070 			memcpy(env->bestpos->vec, env->pos->vec, n * sizeof(Pos_t));
1071 			DEBUG_TEST(0x0100,(sfprintf(sfstdout,"AHA#%04d 0x%04x %s (%z,%z)(%z,%z)(%z,%z)(%z,%z) (%z,%z)(%z,%z)\n", __LINE__, debug_flag, rexname(rex), env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->best[3].rm_so, env->best[3].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
1072 			return GOOD;
1073 		case REX_DOT:
1074 			if (LEADING(env, rex, s))
1075 				return NONE;
1076 			n = rex->hi;
1077 			if (n > env->end - s)
1078 				n = env->end - s;
1079 			m = rex->lo;
1080 			if (m > n)
1081 				return NONE;
1082 			if ((c = rex->explicit) >= 0 && !mbwide())
1083 				for (i = 0; i < n; i++)
1084 					if (s[i] == c)
1085 					{
1086 						n = i;
1087 						break;
1088 					}
1089 			r = NONE;
1090 			if (!(rex->flags & REG_MINIMAL))
1091 			{
1092 				if (!mbwide())
1093 				{
1094 					for (s += n; n-- >= m; s--)
1095 						switch (follow(env, rex, cont, s))
1096 						{
1097 						case BAD:
1098 							return BAD;
1099 						case CUT:
1100 							return CUT;
1101 						case BEST:
1102 							return BEST;
1103 						case GOOD:
1104 							r = GOOD;
1105 							break;
1106 						}
1107 				}
1108 				else
1109 				{
1110 					if (!(b = (unsigned char*)stkpush(stkstd, n)))
1111 					{
1112 						env->error = REG_ESPACE;
1113 						return BAD;
1114 					}
1115 					e = env->end;
1116 					for (i = 0; s < e && i < n && *s != c; i++)
1117 						s += b[i] = MBSIZE(s);
1118 					for (; i-- >= m; s -= b[i])
1119 						switch (follow(env, rex, cont, s))
1120 						{
1121 						case BAD:
1122 							stkpop(stkstd);
1123 							return BAD;
1124 						case CUT:
1125 							stkpop(stkstd);
1126 							return CUT;
1127 						case BEST:
1128 							stkpop(stkstd);
1129 							return BEST;
1130 						case GOOD:
1131 							r = GOOD;
1132 							break;
1133 						}
1134 					stkpop(stkstd);
1135 				}
1136 			}
1137 			else
1138 			{
1139 				if (!mbwide())
1140 				{
1141 					e = s + n;
1142 					for (s += m; s <= e; s++)
1143 						switch (follow(env, rex, cont, s))
1144 						{
1145 						case BAD:
1146 							return BAD;
1147 						case CUT:
1148 							return CUT;
1149 						case BEST:
1150 						case GOOD:
1151 							return BEST;
1152 						}
1153 				}
1154 				else
1155 				{
1156 					e = env->end;
1157 					for (i = 0; s < e && i < m && *s != c; i++)
1158 						s += MBSIZE(s);
1159 					if (i >= m)
1160 						for (; s <= e && i <= n; s += MBSIZE(s), i++)
1161 							switch (follow(env, rex, cont, s))
1162 							{
1163 							case BAD:
1164 								return BAD;
1165 							case CUT:
1166 								return CUT;
1167 							case BEST:
1168 							case GOOD:
1169 								return BEST;
1170 							}
1171 				}
1172 			}
1173 			return r;
1174 		case REX_END:
1175 			if ((!(rex->flags & REG_NEWLINE) || *s != '\n') && ((env->flags & REG_NOTEOL) || s < env->end))
1176 				return NONE;
1177 			break;
1178 		case REX_GROUP:
1179 DEBUG_TEST(0x0200,(sfprintf(sfstdout,"AHA#%04d 0x%04x parse %s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), env->end - s, s)),(0));
1180 			if (env->stack)
1181 			{
1182 				if (rex->re.group.number)
1183 					env->match[rex->re.group.number].rm_so = s - env->beg;
1184 				if (pospush(env, rex, s, BEG_SUB))
1185 					return BAD;
1186 				catcher.re.group_catch.eo = rex->re.group.number ? &env->match[rex->re.group.number].rm_eo : (regoff_t*)0;
1187 			}
1188 			catcher.type = REX_GROUP_CATCH;
1189 			catcher.serial = rex->serial;
1190 			catcher.re.group_catch.cont = cont;
1191 			catcher.next = rex->next;
1192 			r = parse(env, rex->re.group.expr.rex, &catcher, s);
1193 			if (env->stack)
1194 			{
1195 				pospop(env);
1196 				if (rex->re.group.number)
1197 					env->match[rex->re.group.number].rm_so = -1;
1198 			}
1199 			return r;
1200 		case REX_GROUP_CATCH:
1201 DEBUG_TEST(0x0200,(sfprintf(sfstdout,"AHA#%04d 0x%04x parse %s=>%s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), rexname(rex->re.group_catch.cont), env->end - s, s)),(0));
1202 			if (env->stack)
1203 			{
1204 				if (rex->re.group_catch.eo)
1205 					*rex->re.group_catch.eo = s - env->beg;
1206 				if (pospush(env, rex, s, END_ANY))
1207 					return BAD;
1208 			}
1209 			r = follow(env, rex, rex->re.group_catch.cont, s);
1210 			if (env->stack)
1211 			{
1212 				pospop(env);
1213 				if (rex->re.group_catch.eo)
1214 					*rex->re.group_catch.eo = -1;
1215 			}
1216 			return r;
1217 		case REX_GROUP_AHEAD:
1218 			catcher.type = REX_GROUP_AHEAD_CATCH;
1219 			catcher.flags = rex->flags;
1220 			catcher.serial = rex->serial;
1221 			catcher.re.rep_catch.beg = s;
1222 			catcher.re.rep_catch.cont = cont;
1223 			catcher.next = rex->next;
1224 			return parse(env, rex->re.group.expr.rex, &catcher, s);
1225 		case REX_GROUP_AHEAD_CATCH:
1226 			return follow(env, rex, rex->re.rep_catch.cont, rex->re.rep_catch.beg);
1227 		case REX_GROUP_AHEAD_NOT:
1228 			r = parse(env, rex->re.group.expr.rex, NiL, s);
1229 			if (r == NONE)
1230 				r = follow(env, rex, cont, s);
1231 			else if (r != BAD)
1232 				r = NONE;
1233 			return r;
1234 		case REX_GROUP_BEHIND:
1235 			if ((s - env->beg) < rex->re.group.size)
1236 				return NONE;
1237 			catcher.type = REX_GROUP_BEHIND_CATCH;
1238 			catcher.flags = rex->flags;
1239 			catcher.serial = rex->serial;
1240 			catcher.re.behind_catch.beg = s;
1241 			catcher.re.behind_catch.end = e = env->end;
1242 			catcher.re.behind_catch.cont = cont;
1243 			catcher.next = rex->next;
1244 			for (t = s - rex->re.group.size; t >= env->beg; t--)
1245 			{
1246 				env->end = s;
1247 				r = parse(env, rex->re.group.expr.rex, &catcher, t);
1248 				env->end = e;
1249 				if (r != NONE)
1250 					return r;
1251 			}
1252 			return NONE;
1253 		case REX_GROUP_BEHIND_CATCH:
1254 			if (s != rex->re.behind_catch.beg)
1255 				return NONE;
1256 			env->end = rex->re.behind_catch.end;
1257 			return follow(env, rex, rex->re.behind_catch.cont, rex->re.behind_catch.beg);
1258 		case REX_GROUP_BEHIND_NOT:
1259 			if ((s - env->beg) < rex->re.group.size)
1260 				r = NONE;
1261 			else
1262 			{
1263 				catcher.type = REX_GROUP_BEHIND_NOT_CATCH;
1264 				catcher.re.neg_catch.beg = s;
1265 				catcher.next = 0;
1266 				e = env->end;
1267 				env->end = s;
1268 				for (t = s - rex->re.group.size; t >= env->beg; t--)
1269 				{
1270 					r = parse(env, rex->re.group.expr.rex, &catcher, t);
1271 					if (r != NONE)
1272 						break;
1273 				}
1274 				env->end = e;
1275 			}
1276 			if (r == NONE)
1277 				r = follow(env, rex, cont, s);
1278 			else if (r != BAD)
1279 				r = NONE;
1280 			return r;
1281 		case REX_GROUP_BEHIND_NOT_CATCH:
1282 			return s == rex->re.neg_catch.beg ? GOOD : NONE;
1283 		case REX_GROUP_COND:
1284 			if (q = rex->re.group.expr.binary.right)
1285 			{
1286 				catcher.re.cond_catch.next[0] = q->re.group.expr.binary.right;
1287 				catcher.re.cond_catch.next[1] = q->re.group.expr.binary.left;
1288 			}
1289 			else
1290 				catcher.re.cond_catch.next[0] = catcher.re.cond_catch.next[1] = 0;
1291 			if (q = rex->re.group.expr.binary.left)
1292 			{
1293 				catcher.type = REX_GROUP_COND_CATCH;
1294 				catcher.flags = rex->flags;
1295 				catcher.serial = rex->serial;
1296 				catcher.re.cond_catch.yes = 0;
1297 				catcher.re.cond_catch.beg = s;
1298 				catcher.re.cond_catch.cont = cont;
1299 				catcher.next = rex->next;
1300 				r = parse(env, q, &catcher, s);
1301 				if (r == BAD || catcher.re.cond_catch.yes)
1302 					return r;
1303 			}
1304 			else if (!rex->re.group.size || rex->re.group.size > 0 && env->match[rex->re.group.size].rm_so >= 0)
1305 				r = GOOD;
1306 			else
1307 				r = NONE;
1308 			if (q = catcher.re.cond_catch.next[r != NONE])
1309 			{
1310 				catcher.type = REX_CAT;
1311 				catcher.flags = q->flags;
1312 				catcher.serial = q->serial;
1313 				catcher.re.group_catch.cont = cont;
1314 				catcher.next = rex->next;
1315 				return parse(env, q, &catcher, s);
1316 			}
1317 			return follow(env, rex, cont, s);
1318 		case REX_GROUP_COND_CATCH:
1319 			rex->re.cond_catch.yes = 1;
1320 			catcher.type = REX_CAT;
1321 			catcher.flags = rex->flags;
1322 			catcher.serial = rex->serial;
1323 			catcher.re.group_catch.cont = rex->re.cond_catch.cont;
1324 			catcher.next = rex->next;
1325 			return parse(env, rex->re.cond_catch.next[1], &catcher, rex->re.cond_catch.beg);
1326 		case REX_CAT:
1327 			return follow(env, rex, rex->re.group_catch.cont, s);
1328 		case REX_GROUP_CUT:
1329 			catcher.type = REX_GROUP_CUT_CATCH;
1330 			catcher.flags = rex->flags;
1331 			catcher.serial = rex->serial;
1332 			catcher.re.group_catch.cont = cont;
1333 			catcher.next = rex->next;
1334 			return parse(env, rex->re.group.expr.rex, &catcher, s);
1335 		case REX_GROUP_CUT_CATCH:
1336 			switch (r = follow(env, rex, rex->re.group_catch.cont, s))
1337 			{
1338 			case GOOD:
1339 				r = BEST;
1340 				break;
1341 			case NONE:
1342 				r = CUT;
1343 				break;
1344 			}
1345 			return r;
1346 		case REX_KMP:
1347 			f = rex->re.string.fail;
1348 			b = rex->re.string.base;
1349 			n = rex->re.string.size;
1350 			t = s;
1351 			e = env->end;
1352 			if (p = rex->map)
1353 			{
1354 				while (t + n <= e)
1355 				{
1356 					for (i = -1; t < e; t++)
1357 					{
1358 						while (i >= 0 && b[i+1] != p[*t])
1359 							i = f[i];
1360 						if (b[i+1] == p[*t])
1361 							i++;
1362 						if (i + 1 == n)
1363 						{
1364 							t++;
1365 							if (env->stack)
1366 								env->best[0].rm_so = t - s - n;
1367 							switch (follow(env, rex, cont, t))
1368 							{
1369 							case BAD:
1370 								return BAD;
1371 							case CUT:
1372 								return CUT;
1373 							case BEST:
1374 							case GOOD:
1375 								return BEST;
1376 							}
1377 							t -= n - 1;
1378 							break;
1379 						}
1380 					}
1381 				}
1382 			}
1383 			else
1384 			{
1385 				while (t + n <= e)
1386 				{
1387 					for (i = -1; t < e; t++)
1388 					{
1389 						while (i >= 0 && b[i+1] != *t)
1390 							i = f[i];
1391 						if (b[i+1] == *t)
1392 							i++;
1393 						if (i + 1 == n)
1394 						{
1395 							t++;
1396 							if (env->stack)
1397 								env->best[0].rm_so = t - s - n;
1398 							switch (follow(env, rex, cont, t))
1399 							{
1400 							case BAD:
1401 								return BAD;
1402 							case CUT:
1403 								return CUT;
1404 							case BEST:
1405 							case GOOD:
1406 								return BEST;
1407 							}
1408 							t -= n - 1;
1409 							break;
1410 						}
1411 					}
1412 				}
1413 			}
1414 			return NONE;
1415 		case REX_NEG:
1416 			if (LEADING(env, rex, s))
1417 				return NONE;
1418 			i = env->end - s;
1419 			n = ((i + 7) >> 3) + 1;
1420 			catcher.type = REX_NEG_CATCH;
1421 			catcher.re.neg_catch.beg = s;
1422 			if (!(p = (unsigned char*)stkpush(stkstd, n)))
1423 				return BAD;
1424 			memset(catcher.re.neg_catch.index = p, 0, n);
1425 			catcher.next = rex->next;
1426 			if (parse(env, rex->re.group.expr.rex, &catcher, s) == BAD)
1427 				r = BAD;
1428 			else
1429 			{
1430 				r = NONE;
1431 				for (; i >= 0; i--)
1432 					if (!bittst(p, i))
1433 					{
1434 						switch (follow(env, rex, cont, s + i))
1435 						{
1436 						case BAD:
1437 							r = BAD;
1438 							break;
1439 						case BEST:
1440 							r = BEST;
1441 							break;
1442 						case CUT:
1443 							r = CUT;
1444 							break;
1445 						case GOOD:
1446 							r = GOOD;
1447 							/*FALLTHROUGH*/
1448 						default:
1449 							continue;
1450 						}
1451 						break;
1452 					}
1453 			}
1454 			stkpop(stkstd);
1455 			return r;
1456 		case REX_NEG_CATCH:
1457 			bitset(rex->re.neg_catch.index, s - rex->re.neg_catch.beg);
1458 			return NONE;
1459 		case REX_NEST:
1460 			if (s >= env->end)
1461 				return NONE;
1462 			do
1463 			{
1464 				if ((c = *s++) == rex->re.nest.primary)
1465 				{
1466 					if (s >= env->end || !(s = nestmatch(s, env->end, rex->re.nest.type, c)))
1467 						return NONE;
1468 					break;
1469 				}
1470 				if (rex->re.nest.primary >= 0)
1471 					return NONE;
1472 			    	if (rex->re.nest.type[c] & (REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator))
1473 					break;
1474 			    	if (!(s = nestmatch(s, env->end, rex->re.nest.type, c)))
1475 					return NONE;
1476 			} while (s < env->end && !(rex->re.nest.type[*(s-1)] & (REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator)));
1477 			break;
1478 		case REX_NULL:
1479 			break;
1480 		case REX_ONECHAR:
1481 			n = rex->hi;
1482 			if (n > env->end - s)
1483 				n = env->end - s;
1484 			m = rex->lo;
1485 			if (m > n)
1486 				return NONE;
1487 			r = NONE;
1488 			c = rex->re.onechar;
1489 			if (!(rex->flags & REG_MINIMAL))
1490 			{
1491 				if (!mbwide())
1492 				{
1493 					if (p = rex->map)
1494 					{
1495 						for (i = 0; i < n; i++, s++)
1496 							if (p[*s] != c)
1497 								break;
1498 					}
1499 					else
1500 					{
1501 						for (i = 0; i < n; i++, s++)
1502 							if (*s != c)
1503 								break;
1504 					}
1505 					for (; i-- >= m; s--)
1506 						switch (follow(env, rex, cont, s))
1507 						{
1508 						case BAD:
1509 							return BAD;
1510 						case BEST:
1511 							return BEST;
1512 						case CUT:
1513 							return CUT;
1514 						case GOOD:
1515 							r = GOOD;
1516 							break;
1517 						}
1518 				}
1519 				else
1520 				{
1521 					if (!(b = (unsigned char*)stkpush(stkstd, n)))
1522 					{
1523 						env->error = REG_ESPACE;
1524 						return BAD;
1525 					}
1526 					e = env->end;
1527 					if (!(rex->flags & REG_ICASE))
1528 					{
1529 						for (i = 0; s < e && i < n; i++, s = t)
1530 						{
1531 							t = s;
1532 							if (mbchar(t) != c)
1533 								break;
1534 							b[i] = t - s;
1535 						}
1536 					}
1537 					else
1538 					{
1539 						for (i = 0; s < e && i < n; i++, s = t)
1540 						{
1541 							t = s;
1542 							if (towupper(mbchar(t)) != c)
1543 								break;
1544 							b[i] = t - s;
1545 						}
1546 					}
1547 					for (; i-- >= m; s -= b[i])
1548 						switch (follow(env, rex, cont, s))
1549 						{
1550 						case BAD:
1551 							stkpop(stkstd);
1552 							return BAD;
1553 						case BEST:
1554 							stkpop(stkstd);
1555 							return BEST;
1556 						case CUT:
1557 							stkpop(stkstd);
1558 							return CUT;
1559 						case GOOD:
1560 							r = GOOD;
1561 							break;
1562 						}
1563 					stkpop(stkstd);
1564 				}
1565 			}
1566 			else
1567 			{
1568 				if (!mbwide())
1569 				{
1570 					e = s + m;
1571 					if (p = rex->map)
1572 					{
1573 						for (; s < e; s++)
1574 							if (p[*s] != c)
1575 								return r;
1576 						e += n - m;
1577 						for (;;)
1578 						{
1579 							switch (follow(env, rex, cont, s))
1580 							{
1581 							case BAD:
1582 								return BAD;
1583 							case CUT:
1584 								return CUT;
1585 							case BEST:
1586 							case GOOD:
1587 								return BEST;
1588 							}
1589 							if (s >= e || p[*s++] != c)
1590 								break;
1591 						}
1592 					}
1593 					else
1594 					{
1595 						for (; s < e; s++)
1596 							if (*s != c)
1597 								return r;
1598 						e += n - m;
1599 						for (;;)
1600 						{
1601 							switch (follow(env, rex, cont, s))
1602 							{
1603 							case BAD:
1604 								return BAD;
1605 							case CUT:
1606 								return CUT;
1607 							case BEST:
1608 							case GOOD:
1609 								return BEST;
1610 							}
1611 							if (s >= e || *s++ != c)
1612 								break;
1613 						}
1614 					}
1615 				}
1616 				else
1617 				{
1618 					e = env->end;
1619 					if (!(rex->flags & REG_ICASE))
1620 					{
1621 						for (i = 0; i < m && s < e; i++, s = t)
1622 						{
1623 							t = s;
1624 							if (mbchar(t) != c)
1625 								return r;
1626 						}
1627 						while (i++ <= n)
1628 						{
1629 							switch (follow(env, rex, cont, s))
1630 							{
1631 							case BAD:
1632 								return BAD;
1633 							case CUT:
1634 								return CUT;
1635 							case BEST:
1636 							case GOOD:
1637 								return BEST;
1638 							}
1639 							if (s >= e || mbchar(s) != c)
1640 								break;
1641 						}
1642 					}
1643 					else
1644 					{
1645 						for (i = 0; i < m && s < e; i++, s = t)
1646 						{
1647 							t = s;
1648 							if (towupper(mbchar(t)) != c)
1649 								return r;
1650 						}
1651 						while (i++ <= n)
1652 						{
1653 							switch (follow(env, rex, cont, s))
1654 							{
1655 							case BAD:
1656 								return BAD;
1657 							case CUT:
1658 								return CUT;
1659 							case BEST:
1660 							case GOOD:
1661 								return BEST;
1662 							}
1663 							if (s >= e || towupper(mbchar(s)) != c)
1664 								break;
1665 						}
1666 					}
1667 				}
1668 			}
1669 			return r;
1670 		case REX_REP:
1671 			if (env->stack && pospush(env, rex, s, BEG_REP))
1672 				return BAD;
1673 			r = parserep(env, rex, cont, s, 0);
1674 			if (env->stack)
1675 				pospop(env);
1676 			return r;
1677 		case REX_REP_CATCH:
1678 			DEBUG_TEST(0x0020,(sfprintf(sfstdout, "AHA#%04d 0x%04x %s n %d len %d s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), rex->re.rep_catch.n, s - rex->re.rep_catch.beg, env->end - s, s)),(0));
1679 			if (env->stack && pospush(env, rex, s, END_ANY))
1680 				return BAD;
1681 			if (s == rex->re.rep_catch.beg && rex->re.rep_catch.n > rex->re.rep_catch.ref->lo)
1682 			{
1683 				/*
1684 				 * optional empty iteration
1685 				 */
1686 
1687 DEBUG_TEST(0x0002,(sfprintf(sfstdout, "AHA#%04d %p re.group.back=%d re.group.expr.rex=%s\n", __LINE__, rex->re.rep_catch.ref->re.group.expr.rex, rex->re.rep_catch.ref->re.group.expr.rex->re.group.back, rexname(rex->re.rep_catch.ref->re.group.expr.rex))),(0));
1688 				if (!env->stack || s != rex->re.rep_catch.ref->re.rep_catch.beg && !rex->re.rep_catch.ref->re.group.expr.rex->re.group.back)
1689 					r = NONE;
1690 				else if (pospush(env, rex, s, END_ANY))
1691 					r = BAD;
1692 				else
1693 				{
1694 					r = follow(env, rex, rex->re.rep_catch.cont, s);
1695 					pospop(env);
1696 				}
1697 			}
1698 			else
1699 				r = parserep(env, rex->re.rep_catch.ref, rex->re.rep_catch.cont, s, rex->re.rep_catch.n);
1700 			if (env->stack)
1701 				pospop(env);
1702 			return r;
1703 		case REX_STRING:
1704 DEBUG_TEST(0x0200,(sfprintf(sfstdout,"AHA#%04d 0x%04x parse %s \"%-.*s\" `%-.*s'\n", __LINE__, debug_flag, rexname(rex), rex->re.string.size, rex->re.string.base, env->end - s, s)),(0));
1705 			if (rex->re.string.size > (env->end - s))
1706 				return NONE;
1707 			t = rex->re.string.base;
1708 			e = t + rex->re.string.size;
1709 			if (!(p = rex->map))
1710 			{
1711 				while (t < e)
1712 					if (*s++ != *t++)
1713 						return NONE;
1714 			}
1715 			else if (!mbwide())
1716 			{
1717 				while (t < e)
1718 					if (p[*s++] != *t++)
1719 						return NONE;
1720 			}
1721 			else
1722 			{
1723 				while (t < e)
1724 				{
1725 					c = mbchar(s);
1726 					d = mbchar(t);
1727 					if (towupper(c) != d)
1728 						return NONE;
1729 				}
1730 			}
1731 			break;
1732 		case REX_TRIE:
1733 			if (((s + rex->re.trie.min) > env->end) || !(x = rex->re.trie.root[rex->map ? rex->map[*s] : *s]))
1734 				return NONE;
1735 			return parsetrie(env, x, rex, cont, s);
1736 		case REX_EXEC:
1737 			u = 0;
1738 			r = (*env->disc->re_execf)(env->regex, rex->re.exec.data, rex->re.exec.text, rex->re.exec.size, (const char*)s, env->end - s, &u, env->disc);
1739 			e = (unsigned char*)u;
1740 			if (e >= s && e <= env->end)
1741 				s = e;
1742 			switch (r)
1743 			{
1744 			case 0:
1745 				break;
1746 			case REG_NOMATCH:
1747 				return NONE;
1748 			default:
1749 				env->error = r;
1750 				return BAD;
1751 			}
1752 			break;
1753 		case REX_WBEG:
1754 			if (!isword(*s) || s > env->beg && isword(*(s - 1)))
1755 				return NONE;
1756 			break;
1757 		case REX_WEND:
1758 			if (isword(*s) || s > env->beg && !isword(*(s - 1)))
1759 				return NONE;
1760 			break;
1761 		case REX_WORD:
1762 			if (s > env->beg && isword(*(s - 1)) == isword(*s))
1763 				return NONE;
1764 			break;
1765 		case REX_WORD_NOT:
1766 			if (s == env->beg || isword(*(s - 1)) != isword(*s))
1767 				return NONE;
1768 			break;
1769 		case REX_BEG_STR:
1770 			if (s != env->beg)
1771 				return NONE;
1772 			break;
1773 		case REX_END_STR:
1774 			for (t = s; t < env->end && *t == '\n'; t++);
1775 			if (t < env->end)
1776 				return NONE;
1777 			break;
1778 		case REX_FIN_STR:
1779 			if (s < env->end)
1780 				return NONE;
1781 			break;
1782 		}
1783 		if (!(rex = rex->next))
1784 		{
1785 			if (!(rex = cont))
1786 				break;
1787 			cont = 0;
1788 		}
1789 	}
1790 	return GOOD;
1791 }
1792 
1793 #if _AST_REGEX_DEBUG
1794 
1795 static void
1796 listnode(Rex_t* e, int level)
1797 {
1798 	int	i;
1799 
1800 	if (e)
1801 	{
1802 		do
1803 		{
1804 			for (i = 0; i < level; i++)
1805 				sfprintf(sfstderr, "  ");
1806 			sfprintf(sfstderr, "%s\n", rexname(e));
1807 			switch (e->type)
1808 			{
1809 			case REX_ALT:
1810 			case REX_CONJ:
1811 				listnode(e->re.group.expr.binary.left, level + 1);
1812 				listnode(e->re.group.expr.binary.right, level + 1);
1813 				break;
1814 			case REX_GROUP:
1815 			case REX_GROUP_AHEAD:
1816 			case REX_GROUP_AHEAD_NOT:
1817 			case REX_GROUP_BEHIND:
1818 			case REX_GROUP_BEHIND_NOT:
1819 			case REX_GROUP_CUT:
1820 			case REX_NEG:
1821 			case REX_REP:
1822 				listnode(e->re.group.expr.rex, level + 1);
1823 				break;
1824 			}
1825 		} while (e = e->next);
1826 	}
1827 }
1828 
1829 static int
1830 list(Env_t* env, Rex_t* rex)
1831 {
1832 	sfprintf(sfstderr, "AHA regex hard=%d stack=%p\n", env->hard, env->stack);
1833 	if (rex)
1834 		listnode(rex, 1);
1835 	return 0;
1836 }
1837 
1838 #endif
1839 
1840 /*
1841  * returning REG_BADPAT or REG_ESPACE is not explicitly
1842  * countenanced by the standard
1843  */
1844 
1845 int
1846 regnexec(const regex_t* p, const char* s, size_t len, size_t nmatch, regmatch_t* match, regflags_t flags)
1847 {
1848 	register ssize_t	n;
1849 	register int		i;
1850 	int			j;
1851 	int			k;
1852 	int			m;
1853 	int			advance;
1854 	Env_t*			env;
1855 	Rex_t*			e;
1856 
1857 	DEBUG_INIT();
1858 	DEBUG_TEST(0x0001,(sfprintf(sfstdout, "AHA#%04d 0x%04x regnexec %d 0x%08x `%-.*s'\n", __LINE__, debug_flag, nmatch, flags, len, s)),(0));
1859 	if (!p || !(env = p->env))
1860 		return REG_BADPAT;
1861 	if (!s)
1862 		return fatal(env->disc, REG_BADPAT, NiL);
1863 	if (len < env->min)
1864 	{
1865 		DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REG_NOMATCH %d %d\n", __LINE__, len, env->min)),(0));
1866 		return REG_NOMATCH;
1867 	}
1868 	env->regex = p;
1869 	env->beg = (unsigned char*)s;
1870 	env->end = env->beg + len;
1871 	stknew(stkstd, &env->stk);
1872 	env->flags &= ~REG_EXEC;
1873 	env->flags |= (flags & REG_EXEC);
1874 	advance = 0;
1875 	if (env->stack = env->hard || !(env->flags & REG_NOSUB) && nmatch)
1876 	{
1877 		n = env->nsub;
1878 		if (!(env->match = (regmatch_t*)stkpush(stkstd, 2 * (n + 1) * sizeof(regmatch_t))) ||
1879 		    !env->pos && !(env->pos = vecopen(16, sizeof(Pos_t))) ||
1880 		    !env->bestpos && !(env->bestpos = vecopen(16, sizeof(Pos_t))))
1881 		{
1882 			k = REG_ESPACE;
1883 			goto done;
1884 		}
1885 		env->pos->cur = env->bestpos->cur = 0;
1886 		env->best = &env->match[n + 1];
1887 		env->best[0].rm_so = 0;
1888 		env->best[0].rm_eo = -1;
1889 		for (i = 0; i <= n; i++)
1890 			env->match[i] = state.nomatch;
1891 		if (flags & REG_ADVANCE)
1892 			advance = 1;
1893 	}
1894 	DEBUG_TEST(0x1000,(list(env,env->rex)),(0));
1895 	k = REG_NOMATCH;
1896 	if ((e = env->rex)->type == REX_BM)
1897 	{
1898 		DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REX_BM\n", __LINE__)),(0));
1899 		if (len < e->re.bm.right)
1900 		{
1901 			DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REG_NOMATCH %d %d\n", __LINE__, len, e->re.bm.right)),(0));
1902 			goto done;
1903 		}
1904 		else if (!(flags & REG_LEFT))
1905 		{
1906 			register unsigned char*	buf = (unsigned char*)s;
1907 			register size_t		index = e->re.bm.left + e->re.bm.size;
1908 			register size_t		mid = len - e->re.bm.right;
1909 			register size_t*	skip = e->re.bm.skip;
1910 			register size_t*	fail = e->re.bm.fail;
1911 			register Bm_mask_t**	mask = e->re.bm.mask;
1912 			Bm_mask_t		m;
1913 			size_t			x;
1914 
1915 			DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REX_BM len=%d right=%d left=%d size=%d %d %d\n", __LINE__, len, e->re.bm.right, e->re.bm.left, e->re.bm.size, index, mid)),(0));
1916 			for (;;)
1917 			{
1918 				while ((index += skip[buf[index]]) < mid);
1919 				if (index < HIT)
1920 				{
1921 					DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REG_NOMATCH %d %d\n", __LINE__, index, HIT)),(0));
1922 					goto done;
1923 				}
1924 				index -= HIT;
1925 				m = mask[n = e->re.bm.size - 1][buf[index]];
1926 				do
1927 				{
1928 					if (!n--)
1929 					{
1930 						if (e->re.bm.back < 0)
1931 							goto possible;
1932 						if (advance)
1933 						{
1934 							i = index - e->re.bm.back;
1935 							s += i;
1936 							if (env->stack)
1937 								env->best[0].rm_so += i;
1938 							goto possible;
1939 						}
1940 						x = index;
1941 						if (index < e->re.bm.back)
1942 							index = 0;
1943 						else
1944 							index -= e->re.bm.back;
1945 						while (index <= x)
1946 						{
1947 							if ((i = parse(env, e->next, &env->done, buf + index)) != NONE)
1948 							{
1949 								if (env->stack)
1950 									env->best[0].rm_so = index;
1951 								n = env->nsub;
1952 								goto hit;
1953 							}
1954 							index++;
1955 						}
1956 						index += e->re.bm.size;
1957 						break;
1958 					}
1959 				} while (m &= mask[n][buf[--index]]);
1960 				if ((index += fail[n + 1]) >= len)
1961 					goto done;
1962 			}
1963 		}
1964  possible:
1965 		n = env->nsub;
1966 		e = e->next;
1967 	}
1968 	j = env->once || (flags & REG_LEFT);
1969 	DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d parse once=%d\n", __LINE__, j)),(0));
1970 	while ((i = parse(env, e, &env->done, (unsigned char*)s)) == NONE || advance && !env->best[0].rm_eo && !(advance = 0))
1971 	{
1972 		if (j)
1973 			goto done;
1974 		i = MBSIZE(s);
1975 		s += i;
1976 		if ((unsigned char*)s > env->end - env->min)
1977 			goto done;
1978 		if (env->stack)
1979 			env->best[0].rm_so += i;
1980 	}
1981 	if ((flags & REG_LEFT) && env->stack && env->best[0].rm_so)
1982 		goto done;
1983  hit:
1984 	if (k = env->error)
1985 		goto done;
1986 	if (i == CUT)
1987 	{
1988 		k = env->error = REG_NOMATCH;
1989 		goto done;
1990 	}
1991 	if (!(env->flags & REG_NOSUB))
1992 	{
1993 		k = (env->flags & (REG_SHELL|REG_AUGMENTED)) == (REG_SHELL|REG_AUGMENTED);
1994 		for (i = j = m = 0; j < nmatch; i++)
1995 			if (!i || !k || (i & 1))
1996 			{
1997 				if (i > n)
1998 					match[j] = state.nomatch;
1999 				else
2000 					match[m = j] = env->best[i];
2001 				j++;
2002 			}
2003 		if (k)
2004 		{
2005 			while (m > 0 && match[m].rm_so == -1 && match[m].rm_eo == -1)
2006 				m--;
2007 			((regex_t*)p)->re_nsub = m;
2008 		}
2009 	}
2010 	k = 0;
2011  done:
2012 	stkold(stkstd, &env->stk);
2013 	env->stk.base = 0;
2014 	if (k > REG_NOMATCH)
2015 		fatal(p->env->disc, k, NiL);
2016 	return k;
2017 }
2018 
2019 void
2020 regfree(regex_t* p)
2021 {
2022 	Env_t*	env;
2023 
2024 	if (p && (env = p->env))
2025 	{
2026 #if _REG_subcomp
2027 		if (env->sub)
2028 		{
2029 			regsubfree(p);
2030 			p->re_sub = 0;
2031 		}
2032 #endif
2033 		p->env = 0;
2034 		if (--env->refs <= 0 && !(env->disc->re_flags & REG_NOFREE))
2035 		{
2036 			drop(env->disc, env->rex);
2037 			if (env->pos)
2038 				vecclose(env->pos);
2039 			if (env->bestpos)
2040 				vecclose(env->bestpos);
2041 			if (env->stk.base)
2042 				stkold(stkstd, &env->stk);
2043 			alloc(env->disc, env, 0);
2044 		}
2045 	}
2046 }
2047 
2048 /*
2049  * 20120528: regoff_t changed from int to ssize_t
2050  */
2051 
2052 #if defined(__EXPORT__)
2053 #define extern		__EXPORT__
2054 #endif
2055 
2056 #undef	regnexec
2057 #if _map_libc
2058 #define regnexec	_ast_regnexec
2059 #endif
2060 
2061 extern int
2062 regnexec(const regex_t* p, const char* s, size_t len, size_t nmatch, oldregmatch_t* oldmatch, regflags_t flags)
2063 {
2064 	if (oldmatch)
2065 	{
2066 		regmatch_t*	match;
2067 		ssize_t		i;
2068 		int		r;
2069 
2070 		if (!(match = oldof(0, regmatch_t, nmatch, 0)))
2071 			return -1;
2072 		if (!(r = regnexec_20120528(p, s, len, nmatch, match, flags)))
2073 			for (i = 0; i < nmatch; i++)
2074 			{
2075 				oldmatch[i].rm_so = match[i].rm_so;
2076 				oldmatch[i].rm_eo = match[i].rm_eo;
2077 			}
2078 		free(match);
2079 		return r;
2080 	}
2081 	return regnexec_20120528(p, s, len, 0, NiL, flags);
2082 }
2083