xref: /titanic_44/usr/src/lib/libast/common/regex/regnexec.c (revision 3e14f97f673e8a630f076077de35afdd43dc1587)
1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *          Copyright (c) 1985-2010 AT&T Intellectual Property          *
5 *                      and is licensed under the                       *
6 *                  Common Public License, Version 1.0                  *
7 *                    by AT&T Intellectual Property                     *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *            http://www.opensource.org/licenses/cpl1.0.txt             *
11 *         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *                 Glenn Fowler <gsf@research.att.com>                  *
18 *                  David Korn <dgk@research.att.com>                   *
19 *                   Phong Vo <kpv@research.att.com>                    *
20 *                                                                      *
21 ***********************************************************************/
22 #pragma prototyped
23 
24 /*
25  * posix regex executor
26  * single sized-record interface
27  */
28 
29 #include "reglib.h"
30 
31 #if _AST_REGEX_DEBUG
32 
33 #define DEBUG_TEST(f,y,n)	((debug&(debug_flag=f))?(y):(n))
34 #define DEBUG_CODE(f,y,n)	do if(debug&(f)){y}else{n} while(0)
35 #define DEBUG_INIT()		do { char* t; if (!debug) { debug = 0x80000000; if (t = getenv("_AST_regex_exec_debug")) debug |= strtoul(t, NiL, 0); } } while (0)
36 
37 static unsigned long	debug;
38 static unsigned long	debug_flag;
39 
40 static const char*	rexnames[] =
41 {
42 	"REX_NULL",
43 	"REX_ALT",
44 	"REX_ALT_CATCH",
45 	"REX_BACK",
46 	"REX_BEG",
47 	"REX_BEG_STR",
48 	"REX_BM",
49 	"REX_CAT",
50 	"REX_CLASS",
51 	"REX_COLL_CLASS",
52 	"REX_CONJ",
53 	"REX_CONJ_LEFT",
54 	"REX_CONJ_RIGHT",
55 	"REX_DONE",
56 	"REX_DOT",
57 	"REX_END",
58 	"REX_END_STR",
59 	"REX_EXEC",
60 	"REX_FIN_STR",
61 	"REX_GROUP",
62 	"REX_GROUP_CATCH",
63 	"REX_GROUP_AHEAD",
64 	"REX_GROUP_AHEAD_CATCH",
65 	"REX_GROUP_AHEAD_NOT",
66 	"REX_GROUP_BEHIND",
67 	"REX_GROUP_BEHIND_CATCH",
68 	"REX_GROUP_BEHIND_NOT",
69 	"REX_GROUP_BEHIND_NOT_CATCH",
70 	"REX_GROUP_COND",
71 	"REX_GROUP_COND_CATCH",
72 	"REX_GROUP_CUT",
73 	"REX_GROUP_CUT_CATCH",
74 	"REX_KMP",
75 	"REX_NEG",
76 	"REX_NEG_CATCH",
77 	"REX_NEST",
78 	"REX_ONECHAR",
79 	"REX_REP",
80 	"REX_REP_CATCH",
81 	"REX_STRING",
82 	"REX_TRIE",
83 	"REX_WBEG",
84 	"REX_WEND",
85 	"REX_WORD",
86 	"REX_WORD_NOT"
87 };
88 
rexname(Rex_t * rex)89 static const char* rexname(Rex_t* rex)
90 {
91 	if (!rex)
92 		return "NIL";
93 	if (rex->type >= elementsof(rexnames))
94 		return "ERROR";
95 	return rexnames[rex->type];
96 }
97 
98 #else
99 
100 #define DEBUG_INIT()
101 #define DEBUG_TEST(f,y,n)	(n)
102 #define DEBUG_CODE(f,y,n)	do {n} while(0)
103 
104 #endif
105 
106 #define BEG_ALT		1	/* beginning of an alt			*/
107 #define BEG_ONE		2	/* beginning of one iteration of a rep	*/
108 #define BEG_REP		3	/* beginning of a repetition		*/
109 #define BEG_SUB		4	/* beginning of a subexpression		*/
110 #define END_ANY		5	/* end of any of above			*/
111 
112 /*
113  * returns from parse()
114  */
115 
116 #define NONE		0	/* no parse found			*/
117 #define GOOD		1	/* some parse was found			*/
118 #define CUT		2	/* no match and no backtrack		*/
119 #define BEST		3	/* an unbeatable parse was found	*/
120 #define BAD		4	/* error ocurred			*/
121 
122 /*
123  * REG_SHELL_DOT test
124  */
125 
126 #define LEADING(e,r,s)	(*(s)==(e)->leading&&((s)==(e)->beg||*((s)-1)==(r)->explicit))
127 
128 /*
129  * Pos_t is for comparing parses. An entry is made in the
130  * array at the beginning and at the end of each Group_t,
131  * each iteration in a Group_t, and each Binary_t.
132  */
133 
134 typedef struct
135 {
136 	unsigned char*	p;		/* where in string		*/
137 	size_t		length;		/* length in string		*/
138 	short		serial;		/* preorder subpattern number	*/
139 	short		be;		/* which end of pair		*/
140 } Pos_t;
141 
142 /* ===== begin library support ===== */
143 
144 #define vector(t,v,i)	(((i)<(v)->max)?(t*)((v)->vec+(i)*(v)->siz):(t*)vecseek(&(v),i))
145 
146 static Vector_t*
vecopen(int inc,int siz)147 vecopen(int inc, int siz)
148 {
149 	Vector_t*	v;
150 	Stk_t*		sp;
151 
152 	if (inc <= 0)
153 		inc = 16;
154 	if (!(sp = stkopen(STK_SMALL|STK_NULL)))
155 		return 0;
156 	if (!(v = (Vector_t*)stkseek(sp, sizeof(Vector_t) + inc * siz)))
157 	{
158 		stkclose(sp);
159 		return 0;
160 	}
161 	v->stk = sp;
162 	v->vec = (char*)v + sizeof(Vector_t);
163 	v->max = v->inc = inc;
164 	v->siz = siz;
165 	v->cur = 0;
166 	return v;
167 }
168 
169 static void*
vecseek(Vector_t ** p,int index)170 vecseek(Vector_t** p, int index)
171 {
172 	Vector_t*	v = *p;
173 
174 	if (index >= v->max)
175 	{
176 		while ((v->max += v->inc) <= index);
177 		if (!(v = (Vector_t*)stkseek(v->stk, sizeof(Vector_t) + v->max * v->siz)))
178 			return 0;
179 		*p = v;
180 		v->vec = (char*)v + sizeof(Vector_t);
181 	}
182 	return v->vec + index * v->siz;
183 }
184 
185 static void
vecclose(Vector_t * v)186 vecclose(Vector_t* v)
187 {
188 	if (v)
189 		stkclose(v->stk);
190 }
191 
192 typedef struct
193 {
194 	Stk_pos_t	pos;
195 	char		data[1];
196 } Stk_frame_t;
197 
198 #define stknew(s,p)	((p)->offset=stktell(s),(p)->base=stkfreeze(s,0))
199 #define stkold(s,p)	stkset(s,(p)->base,(p)->offset)
200 
201 #define stkframe(s)	(*((Stk_frame_t**)stktop(s)-1))
202 #define stkdata(s,t)	((t*)stkframe(s)->data)
203 #define stkpop(s)	stkold(s,&(stkframe(s)->pos))
204 
205 static void*
stkpush(Stk_t * sp,size_t size)206 stkpush(Stk_t* sp, size_t size)
207 {
208 	Stk_frame_t*	f;
209 	Stk_pos_t	p;
210 
211 	stknew(sp, &p);
212 	size = sizeof(Stk_frame_t) + sizeof(size_t) + size - 1;
213 	if (!(f = (Stk_frame_t*)stkalloc(sp, sizeof(Stk_frame_t) + sizeof(Stk_frame_t*) + size - 1)))
214 		return 0;
215 	f->pos = p;
216 	stkframe(sp) = f;
217 	return f->data;
218 }
219 
220 /* ===== end library support ===== */
221 
222 /*
223  * Match_frame_t is for saving and restoring match records
224  * around alternate attempts, so that fossils will not be
225  * left in the match array.  These are the only entries in
226  * the match array that are not otherwise guaranteed to
227  * have current data in them when they get used.
228  */
229 
230 typedef struct
231 {
232 	size_t			size;
233 	regmatch_t*		match;
234 	regmatch_t		save[1];
235 } Match_frame_t;
236 
237 #define matchframe	stkdata(stkstd,Match_frame_t)
238 #define matchpush(e,x)	((x)->re.group.number?_matchpush(e,x):0)
239 #define matchcopy(e,x)	((x)->re.group.number?memcpy(matchframe->match,matchframe->save,matchframe->size):(void*)0)
240 #define matchpop(e,x)	((x)->re.group.number?memcpy(matchframe->match,matchframe->save,matchframe->size),stkpop(stkstd):(void*)0)
241 
242 #define pospop(e)	(--(e)->pos->cur)
243 
244 /*
245  * allocate a frame and push a match onto the stack
246  */
247 
248 static int
_matchpush(Env_t * env,Rex_t * rex)249 _matchpush(Env_t* env, Rex_t* rex)
250 {
251 	Match_frame_t*	f;
252 	regmatch_t*	m;
253 	regmatch_t*	e;
254 	regmatch_t*	s;
255 	int		num;
256 
257 	if (rex->re.group.number <= 0 || (num = rex->re.group.last - rex->re.group.number + 1) <= 0)
258 		num = 0;
259 	if (!(f = (Match_frame_t*)stkpush(stkstd, sizeof(Match_frame_t) + (num - 1) * sizeof(regmatch_t))))
260 	{
261 		env->error = REG_ESPACE;
262 		return 1;
263 	}
264 	f->size = num * sizeof(regmatch_t);
265 	f->match = m = env->match + rex->re.group.number;
266 	e = m + num;
267 	s = f->save;
268 	while (m < e)
269 	{
270 		*s++ = *m;
271 		*m++ = state.nomatch;
272 	}
273 	return 0;
274 }
275 
276 /*
277  * allocate a frame and push a pos onto the stack
278  */
279 
280 static int
pospush(Env_t * env,Rex_t * rex,unsigned char * p,int be)281 pospush(Env_t* env, Rex_t* rex, unsigned char* p, int be)
282 {
283 	Pos_t*	pos;
284 
285 	if (!(pos = vector(Pos_t, env->pos, env->pos->cur)))
286 	{
287 		env->error = REG_ESPACE;
288 		return 1;
289 	}
290 	pos->serial = rex->serial;
291 	pos->p = p;
292 	pos->be = be;
293 	env->pos->cur++;
294 	return 0;
295 }
296 
297 /*
298  * two matches are known to have the same length
299  * os is start of old pos array, ns is start of new,
300  * oend and nend are end+1 pointers to ends of arrays.
301  * oe and ne are ends (not end+1) of subarrays.
302  * returns 1 if new is better, -1 if old, else 0.
303  */
304 
305 static int
better(Env_t * env,Pos_t * os,Pos_t * ns,Pos_t * oend,Pos_t * nend,int level)306 better(Env_t* env, Pos_t* os, Pos_t* ns, Pos_t* oend, Pos_t* nend, int level)
307 {
308 	Pos_t*	oe;
309 	Pos_t*	ne;
310 	int	k;
311 	int	n;
312 
313 	if (env->error)
314 		return -1;
315 	for (;;)
316 	{
317 		DEBUG_CODE(0x0080,{sfprintf(sfstdout, "   %-*.*sold ", (level + 3) * 4, (level + 3) * 4, "");for (oe = os; oe < oend; oe++)sfprintf(sfstdout, "<%d,%d,%d>", oe->p - env->beg, oe->serial, oe->be);sfprintf(sfstdout, "\n   %-*.*snew ", (level + 3) * 4, (level + 3) * 4, "");for (oe = ns; oe < nend; oe++)sfprintf(sfstdout, "<%d,%d,%d>", oe->p - env->beg, oe->serial, oe->be);sfprintf(sfstdout, "\n");},{0;});
318 		if (ns >= nend)
319 			return DEBUG_TEST(0x8000,(os < oend),(0));
320 		if (os >= oend)
321 			return DEBUG_TEST(0x8000,(-1),(1));
322 		n = os->serial;
323 		if (ns->serial > n)
324 			return -1;
325 		if (n > ns->serial)
326 		{
327 			env->error = REG_PANIC;
328 			return -1;
329 		}
330 		if (ns->p > os->p)
331 			return 1;
332 		if (os->p > ns->p)
333 			return -1;
334 		oe = os;
335 		k = 0;
336 		for (;;)
337 			if ((++oe)->serial == n)
338 			{
339 				if (oe->be != END_ANY)
340 					k++;
341 				else if (k-- <= 0)
342 					break;
343 			}
344 		ne = ns;
345 		k = 0;
346 		for (;;)
347 			if ((++ne)->serial == n)
348 			{
349 				if (ne->be != END_ANY)
350 					k++;
351 				else if (k-- <= 0)
352 					break;
353 			}
354 		if (ne->p > oe->p)
355 			return 1;
356 		if (oe->p > ne->p)
357 			return -1;
358 		if (k = better(env, os + 1, ns + 1, oe, ne, level + 1))
359 			return k;
360 		os = oe + 1;
361 		ns = ne + 1;
362 	}
363 }
364 
365 #if _AST_REGEX_DEBUG
366 
367 static void
showmatch(regmatch_t * p)368 showmatch(regmatch_t* p)
369 {
370 	sfputc(sfstdout, '(');
371 	if (p->rm_so < 0)
372 		sfputc(sfstdout, '?');
373 	else
374 		sfprintf(sfstdout, "%d", p->rm_so);
375 	sfputc(sfstdout, ',');
376 	if (p->rm_eo < 0)
377 		sfputc(sfstdout, '?');
378 	else
379 		sfprintf(sfstdout, "%d", p->rm_eo);
380 	sfputc(sfstdout, ')');
381 }
382 
383 static int
_better(Env_t * env,Pos_t * os,Pos_t * ns,Pos_t * oend,Pos_t * nend,int level)384 _better(Env_t* env, Pos_t* os, Pos_t* ns, Pos_t* oend, Pos_t* nend, int level)
385 {
386 	int	i;
387 
388 	DEBUG_CODE(0x0040,{sfprintf(sfstdout, "AHA better old ");for (i = 0; i <= env->nsub; i++)showmatch(&env->best[i]);sfprintf(sfstdout, "\n           new ");for (i = 0; i <= env->nsub; i++)showmatch(&env->match[i]);sfprintf(sfstdout, "\n");},{0;});
389 	i = better(env, os, ns, oend, nend, 0);
390 	DEBUG_TEST(0x0040,(sfprintf(sfstdout, "           %s\n", i <= 0 ? "OLD" : "NEW")),(0));
391 	return i;
392 }
393 
394 #define better	_better
395 
396 #endif
397 
398 #define follow(e,r,c,s)	((r)->next?parse(e,(r)->next,c,s):(c)?parse(e,c,0,s):BEST)
399 
400 static int		parse(Env_t*, Rex_t*, Rex_t*, unsigned char*);
401 
402 static int
parserep(Env_t * env,Rex_t * rex,Rex_t * cont,unsigned char * s,int n)403 parserep(Env_t* env, Rex_t* rex, Rex_t* cont, unsigned char* s, int n)
404 {
405 	int	i;
406 	int	r = NONE;
407 	Rex_t	catcher;
408 
409 	DEBUG_TEST(0x0010,(sfprintf(sfstdout, "AHA#%04d 0x%04x parserep %s %d %d %d %d `%-.*s'\n", __LINE__, debug_flag, rexname(rex->re.group.expr.rex), rex->re.group.number, rex->lo, n, rex->hi, env->end - s, s)),(0));
410 	if ((rex->flags & REG_MINIMAL) && n >= rex->lo && n < rex->hi)
411 	{
412 		if (env->stack && pospush(env, rex, s, END_ANY))
413 			return BAD;
414 		i = follow(env, rex, cont, s);
415 		if (env->stack)
416 			pospop(env);
417 		switch (i)
418 		{
419 		case BAD:
420 			return BAD;
421 		case CUT:
422 			return CUT;
423 		case BEST:
424 		case GOOD:
425 			return BEST;
426 		}
427 	}
428 	if (n < rex->hi)
429 	{
430 		catcher.type = REX_REP_CATCH;
431 		catcher.serial = rex->serial;
432 		catcher.re.rep_catch.ref = rex;
433 		catcher.re.rep_catch.cont = cont;
434 		catcher.re.rep_catch.beg = s;
435 		catcher.re.rep_catch.n = n + 1;
436 		catcher.next = rex->next;
437 		if (n == 0)
438 			rex->re.rep_catch.beg = s;
439 		if (env->stack)
440 		{
441 			if (matchpush(env, rex))
442 				return BAD;
443 			if (pospush(env, rex, s, BEG_ONE))
444 				return BAD;
445 DEBUG_TEST(0x0004,(sfprintf(sfstdout,"AHA#%04d 0x%04x PUSH %d   (%d,%d)(%d,%d)(%d,%d) (%d,%d)(%d,%d)(%d,%d)\n", __LINE__, debug_flag, rex->re.group.number, env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo, env->match[2].rm_so, env->match[2].rm_eo)),(0));
446 		}
447 		r = parse(env, rex->re.group.expr.rex, &catcher, s);
448 		DEBUG_TEST(0x0010,(sfprintf(sfstdout, "AHA#%04d 0x%04x parserep parse %d %d `%-.*s'\n", __LINE__, debug_flag, rex->re.group.number, r, env->end - s, s)),(0));
449 		if (env->stack)
450 		{
451 			pospop(env);
452 			matchpop(env, rex);
453 DEBUG_TEST(0x0004,(sfprintf(sfstdout,"AHA#%04d 0x%04x POP  %d %d (%d,%d)(%d,%d)(%d,%d) (%d,%d)(%d,%d)(%d,%d)\n", __LINE__, debug_flag, rex->re.group.number, r, env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo, env->match[2].rm_so, env->match[2].rm_eo)),(0));
454 		}
455 		switch (r)
456 		{
457 		case BAD:
458 			return BAD;
459 		case BEST:
460 			return BEST;
461 		case CUT:
462 			r = NONE;
463 			break;
464 		case GOOD:
465 			if (rex->flags & REG_MINIMAL)
466 				return BEST;
467 			r = GOOD;
468 			break;
469 		}
470 	}
471 	if (n < rex->lo)
472 		return r;
473 	if (!(rex->flags & REG_MINIMAL) || n >= rex->hi)
474 	{
475 		if (env->stack && pospush(env, rex, s, END_ANY))
476 			return BAD;
477 		i = follow(env, rex, cont, s);
478 		if (env->stack)
479 			pospop(env);
480 		switch (i)
481 		{
482 		case BAD:
483 			r = BAD;
484 			break;
485 		case CUT:
486 			r = CUT;
487 			break;
488 		case BEST:
489 			r = BEST;
490 			break;
491 		case GOOD:
492 			r = (rex->flags & REG_MINIMAL) ? BEST : GOOD;
493 			break;
494 		}
495 	}
496 	return r;
497 }
498 
499 static int
parsetrie(Env_t * env,Trie_node_t * x,Rex_t * rex,Rex_t * cont,unsigned char * s)500 parsetrie(Env_t* env, Trie_node_t* x, Rex_t* rex, Rex_t* cont, unsigned char* s)
501 {
502 	unsigned char*	p;
503 	int		r;
504 
505 	if (p = rex->map)
506 	{
507 		for (;;)
508 		{
509 			if (s >= env->end)
510 				return NONE;
511 			while (x->c != p[*s])
512 				if (!(x = x->sib))
513 					return NONE;
514 			if (x->end)
515 				break;
516 			x = x->son;
517 			s++;
518 		}
519 	}
520 	else
521 	{
522 		for (;;)
523 		{
524 			if (s >= env->end)
525 				return NONE;
526 			while (x->c != *s)
527 				if (!(x = x->sib))
528 					return NONE;
529 			if (x->end)
530 				break;
531 			x = x->son;
532 			s++;
533 		}
534 	}
535 	s++;
536 	if (rex->flags & REG_MINIMAL)
537 		switch (follow(env, rex, cont, s))
538 		{
539 		case BAD:
540 			return BAD;
541 		case CUT:
542 			return CUT;
543 		case BEST:
544 		case GOOD:
545 			return BEST;
546 		}
547 	if (x->son)
548 		switch (parsetrie(env, x->son, rex, cont, s))
549 		{
550 		case BAD:
551 			return BAD;
552 		case CUT:
553 			return CUT;
554 		case BEST:
555 			return BEST;
556 		case GOOD:
557 			if (rex->flags & REG_MINIMAL)
558 				return BEST;
559 			r = GOOD;
560 			break;
561 		default:
562 			r = NONE;
563 			break;
564 		}
565 	else
566 		r = NONE;
567 	if (!(rex->flags & REG_MINIMAL))
568 		switch (follow(env, rex, cont, s))
569 		{
570 		case BAD:
571 			return BAD;
572 		case CUT:
573 			return CUT;
574 		case BEST:
575 			return BEST;
576 		case GOOD:
577 			return GOOD;
578 	}
579 	return r;
580 }
581 
582 static int
collelt(register Celt_t * ce,char * key,int c,int x)583 collelt(register Celt_t* ce, char* key, int c, int x)
584 {
585 	Ckey_t	elt;
586 
587 	mbxfrm(elt, key, COLL_KEY_MAX);
588 	for (;; ce++)
589 	{
590 		switch (ce->typ)
591 		{
592 		case COLL_call:
593 			if (!x && (*ce->fun)(c))
594 				return 1;
595 			continue;
596 		case COLL_char:
597 			if (!strcmp((char*)ce->beg, (char*)elt))
598 				return 1;
599 			continue;
600 		case COLL_range:
601 			if (strcmp((char*)ce->beg, (char*)elt) <= ce->min && strcmp((char*)elt, (char*)ce->end) <= ce->max)
602 				return 1;
603 			continue;
604 		case COLL_range_lc:
605 			if (strcmp((char*)ce->beg, (char*)elt) <= ce->min && strcmp((char*)elt, (char*)ce->end) <= ce->max && (iswlower(c) || !iswupper(c)))
606 				return 1;
607 			continue;
608 		case COLL_range_uc:
609 			if (strcmp((char*)ce->beg, (char*)elt) <= ce->min && strcmp((char*)elt, (char*)ce->end) <= ce->max && (iswupper(c) || !iswlower(c)))
610 				return 1;
611 			continue;
612 		}
613 		break;
614 	}
615 	return 0;
616 }
617 
618 static int
collic(register Celt_t * ce,char * key,register char * nxt,int c,int x)619 collic(register Celt_t* ce, char* key, register char* nxt, int c, int x)
620 {
621 	if (!x)
622 	{
623 		if (collelt(ce, key, c, x))
624 			return 1;
625 		if (iswlower(c))
626 			c = towupper(c);
627 		else if (iswupper(c))
628 			c = towlower(c);
629 		else
630 			return 0;
631 		x = mbconv(key, c);
632 		key[x] = 0;
633 		return collelt(ce, key, c, 0);
634 	}
635 	while (*nxt)
636 	{
637 		if (collic(ce, key, nxt + 1, c, x))
638 			return 1;
639 		if (islower(*nxt))
640 			*nxt = toupper(*nxt);
641 		else if (isupper(*nxt))
642 			*nxt = tolower(*nxt);
643 		else
644 			return 0;
645 		nxt++;
646 	}
647 	return collelt(ce, key, c, x);
648 }
649 
650 static int
collmatch(Rex_t * rex,unsigned char * s,unsigned char * e,unsigned char ** p)651 collmatch(Rex_t* rex, unsigned char* s, unsigned char* e, unsigned char** p)
652 {
653 	unsigned char*		t;
654 	wchar_t			c;
655 	int			w;
656 	int			r;
657 	int			x;
658 	int			ic;
659 	Ckey_t			key;
660 	Ckey_t			elt;
661 
662 	ic = (rex->flags & REG_ICASE);
663 	if ((w = MBSIZE(s)) > 1)
664 	{
665 		memcpy((char*)key, (char*)s, w);
666 		key[w] = 0;
667 		t = s;
668 		c = mbchar(t);
669 #if !_lib_wctype
670 		c &= 0xff;
671 #endif
672 		x = 0;
673 	}
674 	else
675 	{
676 		c = s[0];
677 		if (ic && isupper(c))
678 			c = tolower(c);
679 		key[0] = c;
680 		key[1] = 0;
681 		if (isalpha(c))
682 		{
683 			x = e - s;
684 			if (x > COLL_KEY_MAX)
685 				x = COLL_KEY_MAX;
686 			while (w < x)
687 			{
688 				c = s[w];
689 				if (!isalpha(c))
690 					break;
691 				r = mbxfrm(elt, key, COLL_KEY_MAX);
692 				if (ic && isupper(c))
693 					c = tolower(c);
694 				key[w] = c;
695 				key[w + 1] = 0;
696 				if (mbxfrm(elt, key, COLL_KEY_MAX) != r)
697 					break;
698 				w++;
699 			}
700 		}
701 		key[w] = 0;
702 		c = key[0];
703 		x = w - 1;
704 	}
705 	r = 1;
706 	for (;;)
707 	{
708 		if (ic ? collic(rex->re.collate.elements, (char*)key, (char*)key, c, x) : collelt(rex->re.collate.elements, (char*)key, c, x))
709 			break;
710 		if (!x)
711 		{
712 			r = 0;
713 			break;
714 		}
715 		w = x--;
716 		key[w] = 0;
717 	}
718 	*p = s + w;
719 	return rex->re.collate.invert ? !r : r;
720 }
721 
722 static unsigned char*
nestmatch(register unsigned char * s,register unsigned char * e,const unsigned short * type,register int co)723 nestmatch(register unsigned char* s, register unsigned char* e, const unsigned short* type, register int co)
724 {
725 	register int	c;
726 	register int	cc;
727 	unsigned int	n;
728 	int		oc;
729 
730 	if (type[co] & (REX_NEST_literal|REX_NEST_quote))
731 	{
732 		n = (type[co] & REX_NEST_literal) ? REX_NEST_terminator : (REX_NEST_escape|REX_NEST_terminator);
733 		while (s < e)
734 		{
735 			c = *s++;
736 			if (c == co)
737 				return s;
738 			else if (type[c] & n)
739 			{
740 				if (s >= e || (type[c] & REX_NEST_terminator))
741 					break;
742 				s++;
743 			}
744 		}
745 	}
746 	else
747 	{
748 		cc = type[co] >> REX_NEST_SHIFT;
749 		oc = type[co] & (REX_NEST_open|REX_NEST_close);
750 		n = 1;
751 		while (s < e)
752 		{
753 			c = *s++;
754 			switch (type[c] & (REX_NEST_escape|REX_NEST_open|REX_NEST_close|REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator))
755 			{
756 			case REX_NEST_delimiter:
757 			case REX_NEST_terminator:
758 				return oc ? 0 : s;
759 			case REX_NEST_separator:
760 				if (!oc)
761 					return s;
762 				break;
763 			case REX_NEST_escape:
764 				if (s >= e)
765 					return 0;
766 				s++;
767 				break;
768 			case REX_NEST_open|REX_NEST_close:
769 				if (c == cc)
770 				{
771 					if (!--n)
772 						return s;
773 				}
774 				/*FALLTHROUGH*/
775 			case REX_NEST_open:
776 				if (c == co)
777 				{
778 					if (!++n)
779 						return 0;
780 				}
781 				else if (!(s = nestmatch(s, e, type, c)))
782 					return 0;
783 				break;
784 			case REX_NEST_close:
785 				if (c != cc)
786 					return 0;
787 				if (!--n)
788 					return s;
789 				break;
790 			}
791 		}
792 		return (oc || !(type[UCHAR_MAX+1] & REX_NEST_terminator)) ? 0 : s;
793 	}
794 	return 0;
795 }
796 
797 static int
parse(Env_t * env,Rex_t * rex,Rex_t * cont,unsigned char * s)798 parse(Env_t* env, Rex_t* rex, Rex_t* cont, unsigned char* s)
799 {
800 	int		c;
801 	int		d;
802 	int		i;
803 	int		m;
804 	int		n;
805 	int		r;
806 	int*		f;
807 	unsigned char*	p;
808 	unsigned char*	t;
809 	unsigned char*	b;
810 	unsigned char*	e;
811 	char*		u;
812 	regmatch_t*	o;
813 	Trie_node_t*	x;
814 	Rex_t*		q;
815 	Rex_t		catcher;
816 	Rex_t		next;
817 
818 	for (;;)
819 	{
820 DEBUG_TEST(0x0008,(sfprintf(sfstdout, "AHA#%04d 0x%04x parse %s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), env->end - s, s)),(0));
821 		switch (rex->type)
822 		{
823 		case REX_ALT:
824 			if (env->stack)
825 			{
826 				if (matchpush(env, rex))
827 					return BAD;
828 				if (pospush(env, rex, s, BEG_ALT))
829 					return BAD;
830 				catcher.type = REX_ALT_CATCH;
831 				catcher.serial = rex->serial;
832 				catcher.re.alt_catch.cont = cont;
833 				catcher.next = rex->next;
834 				r = parse(env, rex->re.group.expr.binary.left, &catcher, s);
835 				if (r < BEST || (rex->flags & REG_MINIMAL))
836 				{
837 					matchcopy(env, rex);
838 					((Pos_t*)env->pos->vec + env->pos->cur - 1)->serial = catcher.serial = rex->re.group.expr.binary.serial;
839 					n = parse(env, rex->re.group.expr.binary.right, &catcher, s);
840 					if (n != NONE)
841 						r = n;
842 				}
843 				pospop(env);
844 				matchpop(env, rex);
845 			}
846 			else
847 			{
848 				if ((r = parse(env, rex->re.group.expr.binary.left, cont, s)) == NONE)
849 					r = parse(env, rex->re.group.expr.binary.right, cont, s);
850 				if (r == GOOD)
851 					r = BEST;
852 			}
853 			return r;
854 		case REX_ALT_CATCH:
855 			if (pospush(env, rex, s, END_ANY))
856 				return BAD;
857 			r = follow(env, rex, rex->re.alt_catch.cont, s);
858 			pospop(env);
859 			return r;
860 		case REX_BACK:
861 			o = &env->match[rex->lo];
862 			if (o->rm_so < 0)
863 				return NONE;
864 			i = o->rm_eo - o->rm_so;
865 			e = s + i;
866 			if (e > env->end)
867 				return NONE;
868 			t = env->beg + o->rm_so;
869 			if (!(p = rex->map))
870 			{
871 				while (s < e)
872 					if (*s++ != *t++)
873 						return NONE;
874 			}
875 			else if (!mbwide())
876 			{
877 				while (s < e)
878 					if (p[*s++] != p[*t++])
879 						return NONE;
880 			}
881 			else
882 			{
883 				while (s < e)
884 				{
885 					c = mbchar(s);
886 					d = mbchar(t);
887 					if (towupper(c) != towupper(d))
888 						return NONE;
889 				}
890 			}
891 			break;
892 		case REX_BEG:
893 			if ((!(rex->flags & REG_NEWLINE) || s <= env->beg || *(s - 1) != '\n') && ((env->flags & REG_NOTBOL) || s != env->beg))
894 				return NONE;
895 			break;
896 		case REX_CLASS:
897 			if (LEADING(env, rex, s))
898 				return NONE;
899 			n = rex->hi;
900 			if (n > env->end - s)
901 				n = env->end - s;
902 			m = rex->lo;
903 			if (m > n)
904 				return NONE;
905 			r = NONE;
906 			if (!(rex->flags & REG_MINIMAL))
907 			{
908 				for (i = 0; i < n; i++)
909 					if (!settst(rex->re.charclass, s[i]))
910 					{
911 						n = i;
912 						break;
913 					}
914 				for (s += n; n-- >= m; s--)
915 					switch (follow(env, rex, cont, s))
916 					{
917 					case BAD:
918 						return BAD;
919 					case CUT:
920 						return CUT;
921 					case BEST:
922 						return BEST;
923 					case GOOD:
924 						r = GOOD;
925 						break;
926 					}
927 			}
928 			else
929 			{
930 				for (e = s + m; s < e; s++)
931 					if (!settst(rex->re.charclass, *s))
932 						return r;
933 				e += n - m;
934 				for (;;)
935 				{
936 					switch (follow(env, rex, cont, s))
937 					{
938 					case BAD:
939 						return BAD;
940 					case CUT:
941 						return CUT;
942 					case BEST:
943 					case GOOD:
944 						return BEST;
945 					}
946 					if (s >= e || !settst(rex->re.charclass, *s))
947 						break;
948 					s++;
949 				}
950 			}
951 			return r;
952 		case REX_COLL_CLASS:
953 			if (LEADING(env, rex, s))
954 				return NONE;
955 			n = rex->hi;
956 			if (n > env->end - s)
957 				n = env->end - s;
958 			m = rex->lo;
959 			if (m > n)
960 				return NONE;
961 			r = NONE;
962 			e = env->end;
963 			if (!(rex->flags & REG_MINIMAL))
964 			{
965 				if (!(b = (unsigned char*)stkpush(stkstd, n)))
966 				{
967 					env->error = REG_ESPACE;
968 					return BAD;
969 				}
970 				for (i = 0; s < e && i < n && collmatch(rex, s, e, &t); i++)
971 				{
972 					b[i] = t - s;
973 					s = t;
974 				}
975 				for (; i-- >= rex->lo; s -= b[i])
976 					switch (follow(env, rex, cont, s))
977 					{
978 					case BAD:
979 						stkpop(stkstd);
980 						return BAD;
981 					case CUT:
982 						stkpop(stkstd);
983 						return CUT;
984 					case BEST:
985 						stkpop(stkstd);
986 						return BEST;
987 					case GOOD:
988 						r = GOOD;
989 						break;
990 					}
991 				stkpop(stkstd);
992 			}
993 			else
994 			{
995 				for (i = 0; i < m && s < e; i++, s = t)
996 					if (!collmatch(rex, s, e, &t))
997 						return r;
998 				while (i++ <= n)
999 				{
1000 					switch (follow(env, rex, cont, s))
1001 					{
1002 					case BAD:
1003 						return BAD;
1004 					case CUT:
1005 						return CUT;
1006 					case BEST:
1007 					case GOOD:
1008 						return BEST;
1009 					}
1010 					if (s >= e || !collmatch(rex, s, e, &s))
1011 						break;
1012 				}
1013 			}
1014 			return r;
1015 		case REX_CONJ:
1016 			next.type = REX_CONJ_RIGHT;
1017 			next.re.conj_right.cont = cont;
1018 			next.next = rex->next;
1019 			catcher.type = REX_CONJ_LEFT;
1020 			catcher.re.conj_left.right = rex->re.group.expr.binary.right;
1021 			catcher.re.conj_left.cont = &next;
1022 			catcher.re.conj_left.beg = s;
1023 			catcher.next = 0;
1024 			return parse(env, rex->re.group.expr.binary.left, &catcher, s);
1025 		case REX_CONJ_LEFT:
1026 			rex->re.conj_left.cont->re.conj_right.end = s;
1027 			cont = rex->re.conj_left.cont;
1028 			s = rex->re.conj_left.beg;
1029 			rex = rex->re.conj_left.right;
1030 			continue;
1031 		case REX_CONJ_RIGHT:
1032 			if (rex->re.conj_right.end != s)
1033 				return NONE;
1034 			cont = rex->re.conj_right.cont;
1035 			break;
1036 		case REX_DONE:
1037 			if (!env->stack)
1038 				return BEST;
1039 			n = s - env->beg;
1040 			r = env->nsub;
1041 			DEBUG_TEST(0x0100,(sfprintf(sfstdout,"AHA#%04d 0x%04x %s (%d,%d)(%d,%d)(%d,%d)(%d,%d) (%d,%d)(%d,%d)\n", __LINE__, debug_flag, rexname(rex), env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->best[3].rm_so, env->best[3].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
1042 			if ((i = env->best[0].rm_eo) >= 0)
1043 			{
1044 				if (rex->flags & REG_MINIMAL)
1045 				{
1046 					if (n > i)
1047 						return GOOD;
1048 				}
1049 				else
1050 				{
1051 					if (n < i)
1052 						return GOOD;
1053 				}
1054 				if (n == i && better(env,
1055 						     (Pos_t*)env->bestpos->vec,
1056 				   		     (Pos_t*)env->pos->vec,
1057 				   		     (Pos_t*)env->bestpos->vec+env->bestpos->cur,
1058 				   		     (Pos_t*)env->pos->vec+env->pos->cur,
1059 						     0) <= 0)
1060 					return GOOD;
1061 			}
1062 			env->best[0].rm_eo = n;
1063 			memcpy(&env->best[1], &env->match[1], r * sizeof(regmatch_t));
1064 			n = env->pos->cur;
1065 			if (!vector(Pos_t, env->bestpos, n))
1066 			{
1067 				env->error = REG_ESPACE;
1068 				return BAD;
1069 			}
1070 			env->bestpos->cur = n;
1071 			memcpy(env->bestpos->vec, env->pos->vec, n * sizeof(Pos_t));
1072 			DEBUG_TEST(0x0100,(sfprintf(sfstdout,"AHA#%04d 0x%04x %s (%d,%d)(%d,%d)(%d,%d)(%d,%d) (%d,%d)(%d,%d)\n", __LINE__, debug_flag, rexname(rex), env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->best[3].rm_so, env->best[3].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
1073 			return GOOD;
1074 		case REX_DOT:
1075 			if (LEADING(env, rex, s))
1076 				return NONE;
1077 			n = rex->hi;
1078 			if (n > env->end - s)
1079 				n = env->end - s;
1080 			m = rex->lo;
1081 			if (m > n)
1082 				return NONE;
1083 			if ((c = rex->explicit) >= 0 && !mbwide())
1084 				for (i = 0; i < n; i++)
1085 					if (s[i] == c)
1086 					{
1087 						n = i;
1088 						break;
1089 					}
1090 			r = NONE;
1091 			if (!(rex->flags & REG_MINIMAL))
1092 			{
1093 				if (!mbwide())
1094 				{
1095 					for (s += n; n-- >= m; s--)
1096 						switch (follow(env, rex, cont, s))
1097 						{
1098 						case BAD:
1099 							return BAD;
1100 						case CUT:
1101 							return CUT;
1102 						case BEST:
1103 							return BEST;
1104 						case GOOD:
1105 							r = GOOD;
1106 							break;
1107 						}
1108 				}
1109 				else
1110 				{
1111 					if (!(b = (unsigned char*)stkpush(stkstd, n)))
1112 					{
1113 						env->error = REG_ESPACE;
1114 						return BAD;
1115 					}
1116 					e = env->end;
1117 					for (i = 0; s < e && i < n && *s != c; i++)
1118 						s += b[i] = MBSIZE(s);
1119 					for (; i-- >= m; s -= b[i])
1120 						switch (follow(env, rex, cont, s))
1121 						{
1122 						case BAD:
1123 							stkpop(stkstd);
1124 							return BAD;
1125 						case CUT:
1126 							stkpop(stkstd);
1127 							return CUT;
1128 						case BEST:
1129 							stkpop(stkstd);
1130 							return BEST;
1131 						case GOOD:
1132 							r = GOOD;
1133 							break;
1134 						}
1135 					stkpop(stkstd);
1136 				}
1137 			}
1138 			else
1139 			{
1140 				if (!mbwide())
1141 				{
1142 					e = s + n;
1143 					for (s += m; s <= e; s++)
1144 						switch (follow(env, rex, cont, s))
1145 						{
1146 						case BAD:
1147 							return BAD;
1148 						case CUT:
1149 							return CUT;
1150 						case BEST:
1151 						case GOOD:
1152 							return BEST;
1153 						}
1154 				}
1155 				else
1156 				{
1157 					e = env->end;
1158 					for (i = 0; s < e && i < m && *s != c; i++)
1159 						s += MBSIZE(s);
1160 					if (i >= m)
1161 						for (; s <= e && i <= n; s += MBSIZE(s), i++)
1162 							switch (follow(env, rex, cont, s))
1163 							{
1164 							case BAD:
1165 								return BAD;
1166 							case CUT:
1167 								return CUT;
1168 							case BEST:
1169 							case GOOD:
1170 								return BEST;
1171 							}
1172 				}
1173 			}
1174 			return r;
1175 		case REX_END:
1176 			if ((!(rex->flags & REG_NEWLINE) || *s != '\n') && ((env->flags & REG_NOTEOL) || s < env->end))
1177 				return NONE;
1178 			break;
1179 		case REX_GROUP:
1180 DEBUG_TEST(0x0200,(sfprintf(sfstdout,"AHA#%04d 0x%04x parse %s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), env->end - s, s)),(0));
1181 			if (env->stack)
1182 			{
1183 				if (rex->re.group.number)
1184 					env->match[rex->re.group.number].rm_so = s - env->beg;
1185 				if (pospush(env, rex, s, BEG_SUB))
1186 					return BAD;
1187 				catcher.re.group_catch.eo = rex->re.group.number ? &env->match[rex->re.group.number].rm_eo : (regoff_t*)0;
1188 			}
1189 			catcher.type = REX_GROUP_CATCH;
1190 			catcher.serial = rex->serial;
1191 			catcher.re.group_catch.cont = cont;
1192 			catcher.next = rex->next;
1193 			r = parse(env, rex->re.group.expr.rex, &catcher, s);
1194 			if (env->stack)
1195 			{
1196 				pospop(env);
1197 				if (rex->re.group.number)
1198 					env->match[rex->re.group.number].rm_so = -1;
1199 			}
1200 			return r;
1201 		case REX_GROUP_CATCH:
1202 DEBUG_TEST(0x0200,(sfprintf(sfstdout,"AHA#%04d 0x%04x parse %s=>%s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), rexname(rex->re.group_catch.cont), env->end - s, s)),(0));
1203 			if (env->stack)
1204 			{
1205 				if (rex->re.group_catch.eo)
1206 					*rex->re.group_catch.eo = s - env->beg;
1207 				if (pospush(env, rex, s, END_ANY))
1208 					return BAD;
1209 			}
1210 			r = follow(env, rex, rex->re.group_catch.cont, s);
1211 			if (env->stack)
1212 			{
1213 				pospop(env);
1214 				if (rex->re.group_catch.eo)
1215 					*rex->re.group_catch.eo = -1;
1216 			}
1217 			return r;
1218 		case REX_GROUP_AHEAD:
1219 			catcher.type = REX_GROUP_AHEAD_CATCH;
1220 			catcher.flags = rex->flags;
1221 			catcher.serial = rex->serial;
1222 			catcher.re.rep_catch.beg = s;
1223 			catcher.re.rep_catch.cont = cont;
1224 			catcher.next = rex->next;
1225 			return parse(env, rex->re.group.expr.rex, &catcher, s);
1226 		case REX_GROUP_AHEAD_CATCH:
1227 			return follow(env, rex, rex->re.rep_catch.cont, rex->re.rep_catch.beg);
1228 		case REX_GROUP_AHEAD_NOT:
1229 			r = parse(env, rex->re.group.expr.rex, NiL, s);
1230 			if (r == NONE)
1231 				r = follow(env, rex, cont, s);
1232 			else if (r != BAD)
1233 				r = NONE;
1234 			return r;
1235 		case REX_GROUP_BEHIND:
1236 			if ((s - env->beg) < rex->re.group.size)
1237 				return NONE;
1238 			catcher.type = REX_GROUP_BEHIND_CATCH;
1239 			catcher.flags = rex->flags;
1240 			catcher.serial = rex->serial;
1241 			catcher.re.behind_catch.beg = s;
1242 			catcher.re.behind_catch.end = e = env->end;
1243 			catcher.re.behind_catch.cont = cont;
1244 			catcher.next = rex->next;
1245 			for (t = s - rex->re.group.size; t >= env->beg; t--)
1246 			{
1247 				env->end = s;
1248 				r = parse(env, rex->re.group.expr.rex, &catcher, t);
1249 				env->end = e;
1250 				if (r != NONE)
1251 					return r;
1252 			}
1253 			return NONE;
1254 		case REX_GROUP_BEHIND_CATCH:
1255 			if (s != rex->re.behind_catch.beg)
1256 				return NONE;
1257 			env->end = rex->re.behind_catch.end;
1258 			return follow(env, rex, rex->re.behind_catch.cont, rex->re.behind_catch.beg);
1259 		case REX_GROUP_BEHIND_NOT:
1260 			if ((s - env->beg) < rex->re.group.size)
1261 				r = NONE;
1262 			else
1263 			{
1264 				catcher.type = REX_GROUP_BEHIND_NOT_CATCH;
1265 				catcher.re.neg_catch.beg = s;
1266 				catcher.next = 0;
1267 				e = env->end;
1268 				env->end = s;
1269 				for (t = s - rex->re.group.size; t >= env->beg; t--)
1270 				{
1271 					r = parse(env, rex->re.group.expr.rex, &catcher, t);
1272 					if (r != NONE)
1273 						break;
1274 				}
1275 				env->end = e;
1276 			}
1277 			if (r == NONE)
1278 				r = follow(env, rex, cont, s);
1279 			else if (r != BAD)
1280 				r = NONE;
1281 			return r;
1282 		case REX_GROUP_BEHIND_NOT_CATCH:
1283 			return s == rex->re.neg_catch.beg ? GOOD : NONE;
1284 		case REX_GROUP_COND:
1285 			if (q = rex->re.group.expr.binary.right)
1286 			{
1287 				catcher.re.cond_catch.next[0] = q->re.group.expr.binary.right;
1288 				catcher.re.cond_catch.next[1] = q->re.group.expr.binary.left;
1289 			}
1290 			else
1291 				catcher.re.cond_catch.next[0] = catcher.re.cond_catch.next[1] = 0;
1292 			if (q = rex->re.group.expr.binary.left)
1293 			{
1294 				catcher.type = REX_GROUP_COND_CATCH;
1295 				catcher.flags = rex->flags;
1296 				catcher.serial = rex->serial;
1297 				catcher.re.cond_catch.yes = 0;
1298 				catcher.re.cond_catch.beg = s;
1299 				catcher.re.cond_catch.cont = cont;
1300 				catcher.next = rex->next;
1301 				r = parse(env, q, &catcher, s);
1302 				if (r == BAD || catcher.re.cond_catch.yes)
1303 					return r;
1304 			}
1305 			else if (!rex->re.group.size || rex->re.group.size > 0 && env->match[rex->re.group.size].rm_so >= 0)
1306 				r = GOOD;
1307 			else
1308 				r = NONE;
1309 			if (q = catcher.re.cond_catch.next[r != NONE])
1310 			{
1311 				catcher.type = REX_CAT;
1312 				catcher.flags = q->flags;
1313 				catcher.serial = q->serial;
1314 				catcher.re.group_catch.cont = cont;
1315 				catcher.next = rex->next;
1316 				return parse(env, q, &catcher, s);
1317 			}
1318 			return follow(env, rex, cont, s);
1319 		case REX_GROUP_COND_CATCH:
1320 			rex->re.cond_catch.yes = 1;
1321 			catcher.type = REX_CAT;
1322 			catcher.flags = rex->flags;
1323 			catcher.serial = rex->serial;
1324 			catcher.re.group_catch.cont = rex->re.cond_catch.cont;
1325 			catcher.next = rex->next;
1326 			return parse(env, rex->re.cond_catch.next[1], &catcher, rex->re.cond_catch.beg);
1327 		case REX_CAT:
1328 			return follow(env, rex, rex->re.group_catch.cont, s);
1329 		case REX_GROUP_CUT:
1330 			catcher.type = REX_GROUP_CUT_CATCH;
1331 			catcher.flags = rex->flags;
1332 			catcher.serial = rex->serial;
1333 			catcher.re.group_catch.cont = cont;
1334 			catcher.next = rex->next;
1335 			return parse(env, rex->re.group.expr.rex, &catcher, s);
1336 		case REX_GROUP_CUT_CATCH:
1337 			switch (r = follow(env, rex, rex->re.group_catch.cont, s))
1338 			{
1339 			case GOOD:
1340 				r = BEST;
1341 				break;
1342 			case NONE:
1343 				r = CUT;
1344 				break;
1345 			}
1346 			return r;
1347 		case REX_KMP:
1348 			f = rex->re.string.fail;
1349 			b = rex->re.string.base;
1350 			n = rex->re.string.size;
1351 			t = s;
1352 			e = env->end;
1353 			if (p = rex->map)
1354 			{
1355 				while (t + n <= e)
1356 				{
1357 					for (i = -1; t < e; t++)
1358 					{
1359 						while (i >= 0 && b[i+1] != p[*t])
1360 							i = f[i];
1361 						if (b[i+1] == p[*t])
1362 							i++;
1363 						if (i + 1 == n)
1364 						{
1365 							t++;
1366 							if (env->stack)
1367 								env->best[0].rm_so = t - s - n;
1368 							switch (follow(env, rex, cont, t))
1369 							{
1370 							case BAD:
1371 								return BAD;
1372 							case CUT:
1373 								return CUT;
1374 							case BEST:
1375 							case GOOD:
1376 								return BEST;
1377 							}
1378 							t -= n - 1;
1379 							break;
1380 						}
1381 					}
1382 				}
1383 			}
1384 			else
1385 			{
1386 				while (t + n <= e)
1387 				{
1388 					for (i = -1; t < e; t++)
1389 					{
1390 						while (i >= 0 && b[i+1] != *t)
1391 							i = f[i];
1392 						if (b[i+1] == *t)
1393 							i++;
1394 						if (i + 1 == n)
1395 						{
1396 							t++;
1397 							if (env->stack)
1398 								env->best[0].rm_so = t - s - n;
1399 							switch (follow(env, rex, cont, t))
1400 							{
1401 							case BAD:
1402 								return BAD;
1403 							case CUT:
1404 								return CUT;
1405 							case BEST:
1406 							case GOOD:
1407 								return BEST;
1408 							}
1409 							t -= n - 1;
1410 							break;
1411 						}
1412 					}
1413 				}
1414 			}
1415 			return NONE;
1416 		case REX_NEG:
1417 			if (LEADING(env, rex, s))
1418 				return NONE;
1419 			i = env->end - s;
1420 			n = ((i + 7) >> 3) + 1;
1421 			catcher.type = REX_NEG_CATCH;
1422 			catcher.re.neg_catch.beg = s;
1423 			if (!(p = (unsigned char*)stkpush(stkstd, n)))
1424 				return BAD;
1425 			memset(catcher.re.neg_catch.index = p, 0, n);
1426 			catcher.next = rex->next;
1427 			if (parse(env, rex->re.group.expr.rex, &catcher, s) == BAD)
1428 				r = BAD;
1429 			else
1430 			{
1431 				r = NONE;
1432 				for (; i >= 0; i--)
1433 					if (!bittst(p, i))
1434 					{
1435 						switch (follow(env, rex, cont, s + i))
1436 						{
1437 						case BAD:
1438 							r = BAD;
1439 							break;
1440 						case BEST:
1441 							r = BEST;
1442 							break;
1443 						case CUT:
1444 							r = CUT;
1445 							break;
1446 						case GOOD:
1447 							r = GOOD;
1448 							/*FALLTHROUGH*/
1449 						default:
1450 							continue;
1451 						}
1452 						break;
1453 					}
1454 			}
1455 			stkpop(stkstd);
1456 			return r;
1457 		case REX_NEG_CATCH:
1458 			bitset(rex->re.neg_catch.index, s - rex->re.neg_catch.beg);
1459 			return NONE;
1460 		case REX_NEST:
1461 			if (s >= env->end)
1462 				return NONE;
1463 			do
1464 			{
1465 				if ((c = *s++) == rex->re.nest.primary)
1466 				{
1467 					if (s >= env->end || !(s = nestmatch(s, env->end, rex->re.nest.type, c)))
1468 						return NONE;
1469 					break;
1470 				}
1471 				if (rex->re.nest.primary >= 0)
1472 					return NONE;
1473 			    	if (rex->re.nest.type[c] & (REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator))
1474 					break;
1475 			    	if (!(s = nestmatch(s, env->end, rex->re.nest.type, c)))
1476 					return NONE;
1477 			} while (s < env->end && !(rex->re.nest.type[*(s-1)] & (REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator)));
1478 			break;
1479 		case REX_NULL:
1480 			break;
1481 		case REX_ONECHAR:
1482 			n = rex->hi;
1483 			if (n > env->end - s)
1484 				n = env->end - s;
1485 			m = rex->lo;
1486 			if (m > n)
1487 				return NONE;
1488 			r = NONE;
1489 			c = rex->re.onechar;
1490 			if (!(rex->flags & REG_MINIMAL))
1491 			{
1492 				if (!mbwide())
1493 				{
1494 					if (p = rex->map)
1495 					{
1496 						for (i = 0; i < n; i++, s++)
1497 							if (p[*s] != c)
1498 								break;
1499 					}
1500 					else
1501 					{
1502 						for (i = 0; i < n; i++, s++)
1503 							if (*s != c)
1504 								break;
1505 					}
1506 					for (; i-- >= m; s--)
1507 						switch (follow(env, rex, cont, s))
1508 						{
1509 						case BAD:
1510 							return BAD;
1511 						case BEST:
1512 							return BEST;
1513 						case CUT:
1514 							return CUT;
1515 						case GOOD:
1516 							r = GOOD;
1517 							break;
1518 						}
1519 				}
1520 				else
1521 				{
1522 					if (!(b = (unsigned char*)stkpush(stkstd, n)))
1523 					{
1524 						env->error = REG_ESPACE;
1525 						return BAD;
1526 					}
1527 					e = env->end;
1528 					if (!(rex->flags & REG_ICASE))
1529 					{
1530 						for (i = 0; s < e && i < n; i++, s = t)
1531 						{
1532 							t = s;
1533 							if (mbchar(t) != c)
1534 								break;
1535 							b[i] = t - s;
1536 						}
1537 					}
1538 					else
1539 					{
1540 						for (i = 0; s < e && i < n; i++, s = t)
1541 						{
1542 							t = s;
1543 							if (towupper(mbchar(t)) != c)
1544 								break;
1545 							b[i] = t - s;
1546 						}
1547 					}
1548 					for (; i-- >= m; s -= b[i])
1549 						switch (follow(env, rex, cont, s))
1550 						{
1551 						case BAD:
1552 							stkpop(stkstd);
1553 							return BAD;
1554 						case BEST:
1555 							stkpop(stkstd);
1556 							return BEST;
1557 						case CUT:
1558 							stkpop(stkstd);
1559 							return CUT;
1560 						case GOOD:
1561 							r = GOOD;
1562 							break;
1563 						}
1564 					stkpop(stkstd);
1565 				}
1566 			}
1567 			else
1568 			{
1569 				if (!mbwide())
1570 				{
1571 					e = s + m;
1572 					if (p = rex->map)
1573 					{
1574 						for (; s < e; s++)
1575 							if (p[*s] != c)
1576 								return r;
1577 						e += n - m;
1578 						for (;;)
1579 						{
1580 							switch (follow(env, rex, cont, s))
1581 							{
1582 							case BAD:
1583 								return BAD;
1584 							case CUT:
1585 								return CUT;
1586 							case BEST:
1587 							case GOOD:
1588 								return BEST;
1589 							}
1590 							if (s >= e || p[*s++] != c)
1591 								break;
1592 						}
1593 					}
1594 					else
1595 					{
1596 						for (; s < e; s++)
1597 							if (*s != c)
1598 								return r;
1599 						e += n - m;
1600 						for (;;)
1601 						{
1602 							switch (follow(env, rex, cont, s))
1603 							{
1604 							case BAD:
1605 								return BAD;
1606 							case CUT:
1607 								return CUT;
1608 							case BEST:
1609 							case GOOD:
1610 								return BEST;
1611 							}
1612 							if (s >= e || *s++ != c)
1613 								break;
1614 						}
1615 					}
1616 				}
1617 				else
1618 				{
1619 					if (!(rex->flags & REG_ICASE))
1620 					{
1621 						for (i = 0; i < m && s < e; i++, s = t)
1622 						{
1623 							t = s;
1624 							if (mbchar(t) != c)
1625 								return r;
1626 						}
1627 						while (i++ <= n)
1628 						{
1629 							switch (follow(env, rex, cont, s))
1630 							{
1631 							case BAD:
1632 								return BAD;
1633 							case CUT:
1634 								return CUT;
1635 							case BEST:
1636 							case GOOD:
1637 								return BEST;
1638 							}
1639 							if (s >= e || mbchar(s) != c)
1640 								break;
1641 						}
1642 					}
1643 					else
1644 					{
1645 						for (i = 0; i < m && s < e; i++, s = t)
1646 						{
1647 							t = s;
1648 							if (towupper(mbchar(t)) != c)
1649 								return r;
1650 						}
1651 						while (i++ <= n)
1652 						{
1653 							switch (follow(env, rex, cont, s))
1654 							{
1655 							case BAD:
1656 								return BAD;
1657 							case CUT:
1658 								return CUT;
1659 							case BEST:
1660 							case GOOD:
1661 								return BEST;
1662 							}
1663 							if (s >= e || towupper(mbchar(s)) != c)
1664 								break;
1665 						}
1666 					}
1667 				}
1668 			}
1669 			return r;
1670 		case REX_REP:
1671 			if (env->stack && pospush(env, rex, s, BEG_REP))
1672 				return BAD;
1673 			r = parserep(env, rex, cont, s, 0);
1674 			if (env->stack)
1675 				pospop(env);
1676 			return r;
1677 		case REX_REP_CATCH:
1678 			DEBUG_TEST(0x0020,(sfprintf(sfstdout, "AHA#%04d 0x%04x %s n %d len %d s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), rex->re.rep_catch.n, s - rex->re.rep_catch.beg, env->end - s, s)),(0));
1679 			if (env->stack && pospush(env, rex, s, END_ANY))
1680 				return BAD;
1681 			if (s == rex->re.rep_catch.beg && rex->re.rep_catch.n > rex->re.rep_catch.ref->lo)
1682 			{
1683 				/*
1684 				 * optional empty iteration
1685 				 */
1686 
1687 DEBUG_TEST(0x0002,(sfprintf(sfstdout, "AHA#%04d %p re.group.back=%d re.group.expr.rex=%s\n", __LINE__, rex->re.rep_catch.ref->re.group.expr.rex, rex->re.rep_catch.ref->re.group.expr.rex->re.group.back, rexname(rex->re.rep_catch.ref->re.group.expr.rex))),(0));
1688 				if (!env->stack || s != rex->re.rep_catch.ref->re.rep_catch.beg && !rex->re.rep_catch.ref->re.group.expr.rex->re.group.back)
1689 					r = NONE;
1690 				else if (pospush(env, rex, s, END_ANY))
1691 					r = BAD;
1692 				else
1693 				{
1694 					r = follow(env, rex, rex->re.rep_catch.cont, s);
1695 					pospop(env);
1696 				}
1697 			}
1698 			else
1699 				r = parserep(env, rex->re.rep_catch.ref, rex->re.rep_catch.cont, s, rex->re.rep_catch.n);
1700 			if (env->stack)
1701 				pospop(env);
1702 			return r;
1703 		case REX_STRING:
1704 DEBUG_TEST(0x0200,(sfprintf(sfstdout,"AHA#%04d 0x%04x parse %s \"%-.*s\" `%-.*s'\n", __LINE__, debug_flag, rexname(rex), rex->re.string.size, rex->re.string.base, env->end - s, s)),(0));
1705 			if (rex->re.string.size > (env->end - s))
1706 				return NONE;
1707 			t = rex->re.string.base;
1708 			e = t + rex->re.string.size;
1709 			if (!(p = rex->map))
1710 			{
1711 				while (t < e)
1712 					if (*s++ != *t++)
1713 						return NONE;
1714 			}
1715 			else if (!mbwide())
1716 			{
1717 				while (t < e)
1718 					if (p[*s++] != *t++)
1719 						return NONE;
1720 			}
1721 			else
1722 			{
1723 				while (t < e)
1724 				{
1725 					c = mbchar(s);
1726 					d = mbchar(t);
1727 					if (towupper(c) != d)
1728 						return NONE;
1729 				}
1730 			}
1731 			break;
1732 		case REX_TRIE:
1733 			if (((s + rex->re.trie.min) > env->end) || !(x = rex->re.trie.root[rex->map ? rex->map[*s] : *s]))
1734 				return NONE;
1735 			return parsetrie(env, x, rex, cont, s);
1736 		case REX_EXEC:
1737 			u = 0;
1738 			r = (*env->disc->re_execf)(env->regex, rex->re.exec.data, rex->re.exec.text, rex->re.exec.size, (const char*)s, env->end - s, &u, env->disc);
1739 			e = (unsigned char*)u;
1740 			if (e >= s && e <= env->end)
1741 				s = e;
1742 			switch (r)
1743 			{
1744 			case 0:
1745 				break;
1746 			case REG_NOMATCH:
1747 				return NONE;
1748 			default:
1749 				env->error = r;
1750 				return BAD;
1751 			}
1752 			break;
1753 		case REX_WBEG:
1754 			if (!isword(*s) || s > env->beg && isword(*(s - 1)))
1755 				return NONE;
1756 			break;
1757 		case REX_WEND:
1758 			if (isword(*s) || s > env->beg && !isword(*(s - 1)))
1759 				return NONE;
1760 			break;
1761 		case REX_WORD:
1762 			if (s > env->beg && isword(*(s - 1)) == isword(*s))
1763 				return NONE;
1764 			break;
1765 		case REX_WORD_NOT:
1766 			if (s == env->beg || isword(*(s - 1)) != isword(*s))
1767 				return NONE;
1768 			break;
1769 		case REX_BEG_STR:
1770 			if (s != env->beg)
1771 				return NONE;
1772 			break;
1773 		case REX_END_STR:
1774 			for (t = s; t < env->end && *t == '\n'; t++);
1775 			if (t < env->end)
1776 				return NONE;
1777 			break;
1778 		case REX_FIN_STR:
1779 			if (s < env->end)
1780 				return NONE;
1781 			break;
1782 		}
1783 		if (!(rex = rex->next))
1784 		{
1785 			if (!(rex = cont))
1786 				break;
1787 			cont = 0;
1788 		}
1789 	}
1790 	return GOOD;
1791 }
1792 
1793 #if _AST_REGEX_DEBUG
1794 
1795 static void
listnode(Rex_t * e,int level)1796 listnode(Rex_t* e, int level)
1797 {
1798 	int	i;
1799 
1800 	if (e)
1801 	{
1802 		do
1803 		{
1804 			for (i = 0; i < level; i++)
1805 				sfprintf(sfstderr, "  ");
1806 			sfprintf(sfstderr, "%s\n", rexname(e));
1807 			switch (e->type)
1808 			{
1809 			case REX_ALT:
1810 			case REX_CONJ:
1811 				listnode(e->re.group.expr.binary.left, level + 1);
1812 				listnode(e->re.group.expr.binary.right, level + 1);
1813 				break;
1814 			case REX_GROUP:
1815 			case REX_GROUP_AHEAD:
1816 			case REX_GROUP_AHEAD_NOT:
1817 			case REX_GROUP_BEHIND:
1818 			case REX_GROUP_BEHIND_NOT:
1819 			case REX_GROUP_CUT:
1820 			case REX_NEG:
1821 			case REX_REP:
1822 				listnode(e->re.group.expr.rex, level + 1);
1823 				break;
1824 			}
1825 		} while (e = e->next);
1826 	}
1827 }
1828 
1829 static int
list(Env_t * env,Rex_t * rex)1830 list(Env_t* env, Rex_t* rex)
1831 {
1832 	sfprintf(sfstderr, "AHA regex hard=%d stack=%p\n", env->hard, env->stack);
1833 	if (rex)
1834 		listnode(rex, 1);
1835 	return 0;
1836 }
1837 
1838 #endif
1839 
1840 /*
1841  * returning REG_BADPAT or REG_ESPACE is not explicitly
1842  * countenanced by the standard
1843  */
1844 
1845 int
regnexec(const regex_t * p,const char * s,size_t len,size_t nmatch,regmatch_t * match,regflags_t flags)1846 regnexec(const regex_t* p, const char* s, size_t len, size_t nmatch, regmatch_t* match, regflags_t flags)
1847 {
1848 	register int	n;
1849 	register int	i;
1850 	int		j;
1851 	int		k;
1852 	int		m;
1853 	int		advance;
1854 	Env_t*		env;
1855 	Rex_t*		e;
1856 
1857 	DEBUG_INIT();
1858 	DEBUG_TEST(0x0001,(sfprintf(sfstdout, "AHA#%04d 0x%04x regnexec %d 0x%08x `%-.*s'\n", __LINE__, debug_flag, nmatch, flags, len, s)),(0));
1859 	if (!p || !(env = p->env))
1860 		return REG_BADPAT;
1861 	if (!s)
1862 		return fatal(env->disc, REG_BADPAT, NiL);
1863 	if (len < env->min)
1864 	{
1865 		DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REG_NOMATCH %d %d\n", __LINE__, len, env->min)),(0));
1866 		return REG_NOMATCH;
1867 	}
1868 	env->regex = p;
1869 	env->beg = (unsigned char*)s;
1870 	env->end = env->beg + len;
1871 	stknew(stkstd, &env->stk);
1872 	env->flags &= ~REG_EXEC;
1873 	env->flags |= (flags & REG_EXEC);
1874 	advance = 0;
1875 	if (env->stack = env->hard || !(env->flags & REG_NOSUB) && nmatch)
1876 	{
1877 		n = env->nsub;
1878 		if (!(env->match = (regmatch_t*)stkpush(stkstd, 2 * (n + 1) * sizeof(regmatch_t))) ||
1879 		    !env->pos && !(env->pos = vecopen(16, sizeof(Pos_t))) ||
1880 		    !env->bestpos && !(env->bestpos = vecopen(16, sizeof(Pos_t))))
1881 		{
1882 			k = REG_ESPACE;
1883 			goto done;
1884 		}
1885 		env->pos->cur = env->bestpos->cur = 0;
1886 		env->best = &env->match[n + 1];
1887 		env->best[0].rm_so = 0;
1888 		env->best[0].rm_eo = -1;
1889 		for (i = 0; i <= n; i++)
1890 			env->match[i] = state.nomatch;
1891 		if (flags & REG_ADVANCE)
1892 			advance = 1;
1893 	}
1894 	DEBUG_TEST(0x1000,(list(env,env->rex)),(0));
1895 	k = REG_NOMATCH;
1896 	if ((e = env->rex)->type == REX_BM)
1897 	{
1898 		DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REX_BM\n", __LINE__)),(0));
1899 		if (len < e->re.bm.right)
1900 		{
1901 			DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REG_NOMATCH %d %d\n", __LINE__, len, e->re.bm.right)),(0));
1902 			goto done;
1903 		}
1904 		else if (!(flags & REG_LEFT))
1905 		{
1906 			register unsigned char*	buf = (unsigned char*)s;
1907 			register size_t		index = e->re.bm.left + e->re.bm.size;
1908 			register size_t		mid = len - e->re.bm.right;
1909 			register size_t*	skip = e->re.bm.skip;
1910 			register size_t*	fail = e->re.bm.fail;
1911 			register Bm_mask_t**	mask = e->re.bm.mask;
1912 			Bm_mask_t		m;
1913 			size_t			x;
1914 
1915 			DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REX_BM len=%d right=%d left=%d size=%d %d %d\n", __LINE__, len, e->re.bm.right, e->re.bm.left, e->re.bm.size, index, mid)),(0));
1916 			for (;;)
1917 			{
1918 				while ((index += skip[buf[index]]) < mid);
1919 				if (index < HIT)
1920 				{
1921 					DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REG_NOMATCH %d %d\n", __LINE__, index, HIT)),(0));
1922 					goto done;
1923 				}
1924 				index -= HIT;
1925 				m = mask[n = e->re.bm.size - 1][buf[index]];
1926 				do
1927 				{
1928 					if (!n--)
1929 					{
1930 						if (e->re.bm.back < 0)
1931 							goto possible;
1932 						if (advance)
1933 						{
1934 							i = index - e->re.bm.back;
1935 							s += i;
1936 							if (env->stack)
1937 								env->best[0].rm_so += i;
1938 							goto possible;
1939 						}
1940 						x = index;
1941 						if (index < e->re.bm.back)
1942 							index = 0;
1943 						else
1944 							index -= e->re.bm.back;
1945 						while (index <= x)
1946 						{
1947 							if ((i = parse(env, e->next, &env->done, buf + index)) != NONE)
1948 							{
1949 								if (env->stack)
1950 									env->best[0].rm_so = index;
1951 								n = env->nsub;
1952 								goto hit;
1953 							}
1954 							index++;
1955 						}
1956 						index += e->re.bm.size;
1957 						break;
1958 					}
1959 				} while (m &= mask[n][buf[--index]]);
1960 				if ((index += fail[n + 1]) >= len)
1961 					goto done;
1962 			}
1963 		}
1964  possible:
1965 		n = env->nsub;
1966 		e = e->next;
1967 	}
1968 	j = env->once || (flags & REG_LEFT);
1969 	DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d parse once=%d\n", __LINE__, j)),(0));
1970 	while ((i = parse(env, e, &env->done, (unsigned char*)s)) == NONE || advance && !env->best[0].rm_eo && !(advance = 0))
1971 	{
1972 		if (j)
1973 			goto done;
1974 		i = MBSIZE(s);
1975 		s += i;
1976 		if ((unsigned char*)s > env->end - env->min)
1977 			goto done;
1978 		if (env->stack)
1979 			env->best[0].rm_so += i;
1980 	}
1981 	if ((flags & REG_LEFT) && env->stack && env->best[0].rm_so)
1982 		goto done;
1983  hit:
1984 	if (k = env->error)
1985 		goto done;
1986 	if (i == CUT)
1987 	{
1988 		k = env->error = REG_NOMATCH;
1989 		goto done;
1990 	}
1991 	if (!(env->flags & REG_NOSUB))
1992 	{
1993 		k = (env->flags & (REG_SHELL|REG_AUGMENTED)) == (REG_SHELL|REG_AUGMENTED);
1994 		for (i = j = m = 0; j < nmatch; i++)
1995 			if (!i || !k || (i & 1))
1996 			{
1997 				if (i > n)
1998 					match[j] = state.nomatch;
1999 				else
2000 					match[m = j] = env->best[i];
2001 				j++;
2002 			}
2003 		if (k)
2004 		{
2005 			while (m > 0 && match[m].rm_so == -1 && match[m].rm_eo == -1)
2006 				m--;
2007 			((regex_t*)p)->re_nsub = m;
2008 		}
2009 	}
2010 	k = 0;
2011  done:
2012 	stkold(stkstd, &env->stk);
2013 	env->stk.base = 0;
2014 	if (k > REG_NOMATCH)
2015 		fatal(p->env->disc, k, NiL);
2016 	return k;
2017 }
2018 
2019 void
regfree(regex_t * p)2020 regfree(regex_t* p)
2021 {
2022 	Env_t*	env;
2023 
2024 	if (p && (env = p->env))
2025 	{
2026 #if _REG_subcomp
2027 		if (env->sub)
2028 		{
2029 			regsubfree(p);
2030 			p->re_sub = 0;
2031 		}
2032 #endif
2033 		p->env = 0;
2034 		if (--env->refs <= 0 && !(env->disc->re_flags & REG_NOFREE))
2035 		{
2036 			drop(env->disc, env->rex);
2037 			if (env->pos)
2038 				vecclose(env->pos);
2039 			if (env->bestpos)
2040 				vecclose(env->bestpos);
2041 			if (env->stk.base)
2042 				stkold(stkstd, &env->stk);
2043 			alloc(env->disc, env, 0);
2044 		}
2045 	}
2046 }
2047