xref: /freebsd/contrib/one-true-awk/b.c (revision f4dc9bf43457515e5c88d1400d4f5ff70a82d9c7)
1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
4 
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
14 
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
24 
25 /* lasciate ogne speranza, voi ch'intrate. */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #define	DEBUG
31 
32 #include <ctype.h>
33 #include <stdio.h>
34 #include <string.h>
35 #include <stdlib.h>
36 #include "awk.h"
37 #include "ytab.h"
38 
39 #define	HAT	(NCHARS+2)	/* matches ^ in regular expr */
40 				/* NCHARS is 2**n */
41 #define MAXLIN 22
42 
43 #define type(v)		(v)->nobj	/* badly overloaded here */
44 #define info(v)		(v)->ntype	/* badly overloaded here */
45 #define left(v)		(v)->narg[0]
46 #define right(v)	(v)->narg[1]
47 #define parent(v)	(v)->nnext
48 
49 #define LEAF	case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
50 #define ELEAF	case EMPTYRE:		/* empty string in regexp */
51 #define UNARY	case STAR: case PLUS: case QUEST:
52 
53 /* encoding in tree Nodes:
54 	leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE):
55 		left is index, right contains value or pointer to value
56 	unary (STAR, PLUS, QUEST): left is child, right is null
57 	binary (CAT, OR): left and right are children
58 	parent contains pointer to parent
59 */
60 
61 
62 int	*setvec;
63 int	*tmpset;
64 int	maxsetvec = 0;
65 
66 int	rtok;		/* next token in current re */
67 int	rlxval;
68 static uschar	*rlxstr;
69 static uschar	*prestr;	/* current position in current re */
70 static uschar	*lastre;	/* origin of last re */
71 
72 static	int setcnt;
73 static	int poscnt;
74 
75 char	*patbeg;
76 int	patlen;
77 
78 #define	NFA	20	/* cache this many dynamic fa's */
79 fa	*fatab[NFA];
80 int	nfatab	= 0;	/* entries in fatab */
81 
82 fa *makedfa(const char *s, int anchor)	/* returns dfa for reg expr s */
83 {
84 	int i, use, nuse;
85 	fa *pfa;
86 	static int now = 1;
87 
88 	if (setvec == NULL) {	/* first time through any RE */
89 		maxsetvec = MAXLIN;
90 		setvec = (int *) malloc(maxsetvec * sizeof(int));
91 		tmpset = (int *) malloc(maxsetvec * sizeof(int));
92 		if (setvec == NULL || tmpset == NULL)
93 			overflo("out of space initializing makedfa");
94 	}
95 
96 	if (compile_time)	/* a constant for sure */
97 		return mkdfa(s, anchor);
98 	for (i = 0; i < nfatab; i++)	/* is it there already? */
99 		if (fatab[i]->anchor == anchor
100 		  && strcmp((const char *) fatab[i]->restr, s) == 0) {
101 			fatab[i]->use = now++;
102 			return fatab[i];
103 		}
104 	pfa = mkdfa(s, anchor);
105 	if (nfatab < NFA) {	/* room for another */
106 		fatab[nfatab] = pfa;
107 		fatab[nfatab]->use = now++;
108 		nfatab++;
109 		return pfa;
110 	}
111 	use = fatab[0]->use;	/* replace least-recently used */
112 	nuse = 0;
113 	for (i = 1; i < nfatab; i++)
114 		if (fatab[i]->use < use) {
115 			use = fatab[i]->use;
116 			nuse = i;
117 		}
118 	freefa(fatab[nuse]);
119 	fatab[nuse] = pfa;
120 	pfa->use = now++;
121 	return pfa;
122 }
123 
124 fa *mkdfa(const char *s, int anchor)	/* does the real work of making a dfa */
125 				/* anchor = 1 for anchored matches, else 0 */
126 {
127 	Node *p, *p1;
128 	fa *f;
129 
130 	p = reparse(s);
131 	p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
132 		/* put ALL STAR in front of reg.  exp. */
133 	p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
134 		/* put FINAL after reg.  exp. */
135 
136 	poscnt = 0;
137 	penter(p1);	/* enter parent pointers and leaf indices */
138 	if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
139 		overflo("out of space for fa");
140 	f->accept = poscnt-1;	/* penter has computed number of positions in re */
141 	cfoll(f, p1);	/* set up follow sets */
142 	freetr(p1);
143 	if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
144 			overflo("out of space in makedfa");
145 	if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
146 		overflo("out of space in makedfa");
147 	*f->posns[1] = 0;
148 	f->initstat = makeinit(f, anchor);
149 	f->anchor = anchor;
150 	f->restr = (uschar *) tostring(s);
151 	return f;
152 }
153 
154 int makeinit(fa *f, int anchor)
155 {
156 	int i, k;
157 
158 	f->curstat = 2;
159 	f->out[2] = 0;
160 	f->reset = 0;
161 	k = *(f->re[0].lfollow);
162 	xfree(f->posns[2]);
163 	if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
164 		overflo("out of space in makeinit");
165 	for (i=0; i <= k; i++) {
166 		(f->posns[2])[i] = (f->re[0].lfollow)[i];
167 	}
168 	if ((f->posns[2])[1] == f->accept)
169 		f->out[2] = 1;
170 	for (i=0; i < NCHARS; i++)
171 		f->gototab[2][i] = 0;
172 	f->curstat = cgoto(f, 2, HAT);
173 	if (anchor) {
174 		*f->posns[2] = k-1;	/* leave out position 0 */
175 		for (i=0; i < k; i++) {
176 			(f->posns[0])[i] = (f->posns[2])[i];
177 		}
178 
179 		f->out[0] = f->out[2];
180 		if (f->curstat != 2)
181 			--(*f->posns[f->curstat]);
182 	}
183 	return f->curstat;
184 }
185 
186 void penter(Node *p)	/* set up parent pointers and leaf indices */
187 {
188 	switch (type(p)) {
189 	ELEAF
190 	LEAF
191 		info(p) = poscnt;
192 		poscnt++;
193 		break;
194 	UNARY
195 		penter(left(p));
196 		parent(left(p)) = p;
197 		break;
198 	case CAT:
199 	case OR:
200 		penter(left(p));
201 		penter(right(p));
202 		parent(left(p)) = p;
203 		parent(right(p)) = p;
204 		break;
205 	default:	/* can't happen */
206 		FATAL("can't happen: unknown type %d in penter", type(p));
207 		break;
208 	}
209 }
210 
211 void freetr(Node *p)	/* free parse tree */
212 {
213 	switch (type(p)) {
214 	ELEAF
215 	LEAF
216 		xfree(p);
217 		break;
218 	UNARY
219 		freetr(left(p));
220 		xfree(p);
221 		break;
222 	case CAT:
223 	case OR:
224 		freetr(left(p));
225 		freetr(right(p));
226 		xfree(p);
227 		break;
228 	default:	/* can't happen */
229 		FATAL("can't happen: unknown type %d in freetr", type(p));
230 		break;
231 	}
232 }
233 
234 /* in the parsing of regular expressions, metacharacters like . have */
235 /* to be seen literally;  \056 is not a metacharacter. */
236 
237 int hexstr(uschar **pp)	/* find and eval hex string at pp, return new p */
238 {			/* only pick up one 8-bit byte (2 chars) */
239 	uschar *p;
240 	int n = 0;
241 	int i;
242 
243 	for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
244 		if (isdigit(*p))
245 			n = 16 * n + *p - '0';
246 		else if (*p >= 'a' && *p <= 'f')
247 			n = 16 * n + *p - 'a' + 10;
248 		else if (*p >= 'A' && *p <= 'F')
249 			n = 16 * n + *p - 'A' + 10;
250 	}
251 	*pp = (uschar *) p;
252 	return n;
253 }
254 
255 #define isoctdigit(c) ((c) >= '0' && (c) <= '7')	/* multiple use of arg */
256 
257 int quoted(uschar **pp)	/* pick up next thing after a \\ */
258 			/* and increment *pp */
259 {
260 	uschar *p = *pp;
261 	int c;
262 
263 	if ((c = *p++) == 't')
264 		c = '\t';
265 	else if (c == 'n')
266 		c = '\n';
267 	else if (c == 'f')
268 		c = '\f';
269 	else if (c == 'r')
270 		c = '\r';
271 	else if (c == 'b')
272 		c = '\b';
273 	else if (c == '\\')
274 		c = '\\';
275 	else if (c == 'x') {	/* hexadecimal goo follows */
276 		c = hexstr(&p);	/* this adds a null if number is invalid */
277 	} else if (isoctdigit(c)) {	/* \d \dd \ddd */
278 		int n = c - '0';
279 		if (isoctdigit(*p)) {
280 			n = 8 * n + *p++ - '0';
281 			if (isoctdigit(*p))
282 				n = 8 * n + *p++ - '0';
283 		}
284 		c = n;
285 	} /* else */
286 		/* c = c; */
287 	*pp = p;
288 	return c;
289 }
290 
291 static int collate_range_cmp(int a, int b)
292 {
293 	static char s[2][2];
294 
295 	if ((uschar)a == (uschar)b)
296 		return 0;
297 	s[0][0] = a;
298 	s[1][0] = b;
299 #ifdef __FreeBSD__
300 	return (strcmp(s[0], s[1]));
301 #else
302 	return (strcoll(s[0], s[1]));
303 #endif
304 }
305 
306 char *cclenter(const char *argp)	/* add a character class */
307 {
308 	int i, c, c2;
309 	int j;
310 	uschar *p = (uschar *) argp;
311 	uschar *op, *bp;
312 	static uschar *buf = NULL;
313 	static int bufsz = 100;
314 
315 	op = p;
316 	if (buf == NULL && (buf = (uschar *) malloc(bufsz)) == NULL)
317 		FATAL("out of space for character class [%.10s...] 1", p);
318 	bp = buf;
319 	for (i = 0; (c = *p++) != 0; ) {
320 		if (c == '\\') {
321 			c = quoted(&p);
322 		} else if (c == '-' && i > 0 && bp[-1] != 0) {
323 			if (*p != 0) {
324 				c = bp[-1];
325 				c2 = *p++;
326 				if (c2 == '\\')
327 					c2 = quoted(&p);
328 				if (collate_range_cmp(c, c2) > 0) {
329 					bp--;
330 					i--;
331 					continue;
332 				}
333 				for (j = 0; j < NCHARS; j++) {
334 					if ((collate_range_cmp(c, j) > 0) ||
335 					    collate_range_cmp(j, c2) > 0)
336 						continue;
337 					if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter1"))
338 						FATAL("out of space for character class [%.10s...] 2", p);
339 					*bp++ = j;
340 					i++;
341 				}
342 				continue;
343 			}
344 		}
345 		if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter2"))
346 			FATAL("out of space for character class [%.10s...] 3", p);
347 		*bp++ = c;
348 		i++;
349 	}
350 	*bp = 0;
351 	dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
352 	xfree(op);
353 	return (char *) tostring((char *) buf);
354 }
355 
356 void overflo(const char *s)
357 {
358 	FATAL("regular expression too big: %.30s...", s);
359 }
360 
361 void cfoll(fa *f, Node *v)	/* enter follow set of each leaf of vertex v into lfollow[leaf] */
362 {
363 	int i;
364 	int *p;
365 
366 	switch (type(v)) {
367 	ELEAF
368 	LEAF
369 		f->re[info(v)].ltype = type(v);
370 		f->re[info(v)].lval.np = right(v);
371 		while (f->accept >= maxsetvec) {	/* guessing here! */
372 			maxsetvec *= 4;
373 			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
374 			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
375 			if (setvec == NULL || tmpset == NULL)
376 				overflo("out of space in cfoll()");
377 		}
378 		for (i = 0; i <= f->accept; i++)
379 			setvec[i] = 0;
380 		setcnt = 0;
381 		follow(v);	/* computes setvec and setcnt */
382 		if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
383 			overflo("out of space building follow set");
384 		f->re[info(v)].lfollow = p;
385 		*p = setcnt;
386 		for (i = f->accept; i >= 0; i--)
387 			if (setvec[i] == 1)
388 				*++p = i;
389 		break;
390 	UNARY
391 		cfoll(f,left(v));
392 		break;
393 	case CAT:
394 	case OR:
395 		cfoll(f,left(v));
396 		cfoll(f,right(v));
397 		break;
398 	default:	/* can't happen */
399 		FATAL("can't happen: unknown type %d in cfoll", type(v));
400 	}
401 }
402 
403 int first(Node *p)	/* collects initially active leaves of p into setvec */
404 			/* returns 0 if p matches empty string */
405 {
406 	int b, lp;
407 
408 	switch (type(p)) {
409 	ELEAF
410 	LEAF
411 		lp = info(p);	/* look for high-water mark of subscripts */
412 		while (setcnt >= maxsetvec || lp >= maxsetvec) {	/* guessing here! */
413 			maxsetvec *= 4;
414 			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
415 			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
416 			if (setvec == NULL || tmpset == NULL)
417 				overflo("out of space in first()");
418 		}
419 		if (type(p) == EMPTYRE) {
420 			setvec[lp] = 0;
421 			return(0);
422 		}
423 		if (setvec[lp] != 1) {
424 			setvec[lp] = 1;
425 			setcnt++;
426 		}
427 		if (type(p) == CCL && (*(char *) right(p)) == '\0')
428 			return(0);		/* empty CCL */
429 		else return(1);
430 	case PLUS:
431 		if (first(left(p)) == 0) return(0);
432 		return(1);
433 	case STAR:
434 	case QUEST:
435 		first(left(p));
436 		return(0);
437 	case CAT:
438 		if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
439 		return(1);
440 	case OR:
441 		b = first(right(p));
442 		if (first(left(p)) == 0 || b == 0) return(0);
443 		return(1);
444 	}
445 	FATAL("can't happen: unknown type %d in first", type(p));	/* can't happen */
446 	return(-1);
447 }
448 
449 void follow(Node *v)	/* collects leaves that can follow v into setvec */
450 {
451 	Node *p;
452 
453 	if (type(v) == FINAL)
454 		return;
455 	p = parent(v);
456 	switch (type(p)) {
457 	case STAR:
458 	case PLUS:
459 		first(v);
460 		follow(p);
461 		return;
462 
463 	case OR:
464 	case QUEST:
465 		follow(p);
466 		return;
467 
468 	case CAT:
469 		if (v == left(p)) {	/* v is left child of p */
470 			if (first(right(p)) == 0) {
471 				follow(p);
472 				return;
473 			}
474 		} else		/* v is right child */
475 			follow(p);
476 		return;
477 	}
478 }
479 
480 int member(int c, const char *sarg)	/* is c in s? */
481 {
482 	uschar *s = (uschar *) sarg;
483 
484 	while (*s)
485 		if (c == *s++)
486 			return(1);
487 	return(0);
488 }
489 
490 int match(fa *f, const char *p0)	/* shortest match ? */
491 {
492 	int s, ns;
493 	uschar *p = (uschar *) p0;
494 
495 	s = f->reset ? makeinit(f,0) : f->initstat;
496 	if (f->out[s])
497 		return(1);
498 	do {
499 		/* assert(*p < NCHARS); */
500 		if ((ns = f->gototab[s][*p]) != 0)
501 			s = ns;
502 		else
503 			s = cgoto(f, s, *p);
504 		if (f->out[s])
505 			return(1);
506 	} while (*p++ != 0);
507 	return(0);
508 }
509 
510 int pmatch(fa *f, const char *p0)	/* longest match, for sub */
511 {
512 	int s, ns;
513 	uschar *p = (uschar *) p0;
514 	uschar *q;
515 	int i, k;
516 
517 	/* s = f->reset ? makeinit(f,1) : f->initstat; */
518 	if (f->reset) {
519 		f->initstat = s = makeinit(f,1);
520 	} else {
521 		s = f->initstat;
522 	}
523 	patbeg = (char *) p;
524 	patlen = -1;
525 	do {
526 		q = p;
527 		do {
528 			if (f->out[s])		/* final state */
529 				patlen = q-p;
530 			/* assert(*q < NCHARS); */
531 			if ((ns = f->gototab[s][*q]) != 0)
532 				s = ns;
533 			else
534 				s = cgoto(f, s, *q);
535 			if (s == 1) {	/* no transition */
536 				if (patlen >= 0) {
537 					patbeg = (char *) p;
538 					return(1);
539 				}
540 				else
541 					goto nextin;	/* no match */
542 			}
543 		} while (*q++ != 0);
544 		if (f->out[s])
545 			patlen = q-p-1;	/* don't count $ */
546 		if (patlen >= 0) {
547 			patbeg = (char *) p;
548 			return(1);
549 		}
550 	nextin:
551 		s = 2;
552 		if (f->reset) {
553 			for (i = 2; i <= f->curstat; i++)
554 				xfree(f->posns[i]);
555 			k = *f->posns[0];
556 			if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
557 				overflo("out of space in pmatch");
558 			for (i = 0; i <= k; i++)
559 				(f->posns[2])[i] = (f->posns[0])[i];
560 			f->initstat = f->curstat = 2;
561 			f->out[2] = f->out[0];
562 			for (i = 0; i < NCHARS; i++)
563 				f->gototab[2][i] = 0;
564 		}
565 	} while (*p++ != 0);
566 	return (0);
567 }
568 
569 int nematch(fa *f, const char *p0)	/* non-empty match, for sub */
570 {
571 	int s, ns;
572 	uschar *p = (uschar *) p0;
573 	uschar *q;
574 	int i, k;
575 
576 	/* s = f->reset ? makeinit(f,1) : f->initstat; */
577 	if (f->reset) {
578 		f->initstat = s = makeinit(f,1);
579 	} else {
580 		s = f->initstat;
581 	}
582 	patlen = -1;
583 	while (*p) {
584 		q = p;
585 		do {
586 			if (f->out[s])		/* final state */
587 				patlen = q-p;
588 			/* assert(*q < NCHARS); */
589 			if ((ns = f->gototab[s][*q]) != 0)
590 				s = ns;
591 			else
592 				s = cgoto(f, s, *q);
593 			if (s == 1) {	/* no transition */
594 				if (patlen > 0) {
595 					patbeg = (char *) p;
596 					return(1);
597 				} else
598 					goto nnextin;	/* no nonempty match */
599 			}
600 		} while (*q++ != 0);
601 		if (f->out[s])
602 			patlen = q-p-1;	/* don't count $ */
603 		if (patlen > 0 ) {
604 			patbeg = (char *) p;
605 			return(1);
606 		}
607 	nnextin:
608 		s = 2;
609 		if (f->reset) {
610 			for (i = 2; i <= f->curstat; i++)
611 				xfree(f->posns[i]);
612 			k = *f->posns[0];
613 			if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
614 				overflo("out of state space");
615 			for (i = 0; i <= k; i++)
616 				(f->posns[2])[i] = (f->posns[0])[i];
617 			f->initstat = f->curstat = 2;
618 			f->out[2] = f->out[0];
619 			for (i = 0; i < NCHARS; i++)
620 				f->gototab[2][i] = 0;
621 		}
622 		p++;
623 	}
624 	return (0);
625 }
626 
627 Node *reparse(const char *p)	/* parses regular expression pointed to by p */
628 {			/* uses relex() to scan regular expression */
629 	Node *np;
630 
631 	dprintf( ("reparse <%s>\n", p) );
632 	lastre = prestr = (uschar *) p;	/* prestr points to string to be parsed */
633 	rtok = relex();
634 	/* GNU compatibility: an empty regexp matches anything */
635 	if (rtok == '\0') {
636 		/* FATAL("empty regular expression"); previous */
637 		return(op2(EMPTYRE, NIL, NIL));
638 	}
639 	np = regexp();
640 	if (rtok != '\0')
641 		FATAL("syntax error in regular expression %s at %s", lastre, prestr);
642 	return(np);
643 }
644 
645 Node *regexp(void)	/* top-level parse of reg expr */
646 {
647 	return (alt(concat(primary())));
648 }
649 
650 Node *primary(void)
651 {
652 	Node *np;
653 
654 	switch (rtok) {
655 	case CHAR:
656 		np = op2(CHAR, NIL, itonp(rlxval));
657 		rtok = relex();
658 		return (unary(np));
659 	case ALL:
660 		rtok = relex();
661 		return (unary(op2(ALL, NIL, NIL)));
662 	case EMPTYRE:
663 		rtok = relex();
664 		return (unary(op2(ALL, NIL, NIL)));
665 	case DOT:
666 		rtok = relex();
667 		return (unary(op2(DOT, NIL, NIL)));
668 	case CCL:
669 		np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
670 		rtok = relex();
671 		return (unary(np));
672 	case NCCL:
673 		np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
674 		rtok = relex();
675 		return (unary(np));
676 	case '^':
677 		rtok = relex();
678 		return (unary(op2(CHAR, NIL, itonp(HAT))));
679 	case '$':
680 		rtok = relex();
681 		return (unary(op2(CHAR, NIL, NIL)));
682 	case '(':
683 		rtok = relex();
684 		if (rtok == ')') {	/* special pleading for () */
685 			rtok = relex();
686 			return unary(op2(CCL, NIL, (Node *) tostring("")));
687 		}
688 		np = regexp();
689 		if (rtok == ')') {
690 			rtok = relex();
691 			return (unary(np));
692 		}
693 		else
694 			FATAL("syntax error in regular expression %s at %s", lastre, prestr);
695 	default:
696 		FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
697 	}
698 	return 0;	/*NOTREACHED*/
699 }
700 
701 Node *concat(Node *np)
702 {
703 	switch (rtok) {
704 	case CHAR: case DOT: case ALL: case EMPTYRE: case CCL: case NCCL: case '$': case '(':
705 		return (concat(op2(CAT, np, primary())));
706 	}
707 	return (np);
708 }
709 
710 Node *alt(Node *np)
711 {
712 	if (rtok == OR) {
713 		rtok = relex();
714 		return (alt(op2(OR, np, concat(primary()))));
715 	}
716 	return (np);
717 }
718 
719 Node *unary(Node *np)
720 {
721 	switch (rtok) {
722 	case STAR:
723 		rtok = relex();
724 		return (unary(op2(STAR, np, NIL)));
725 	case PLUS:
726 		rtok = relex();
727 		return (unary(op2(PLUS, np, NIL)));
728 	case QUEST:
729 		rtok = relex();
730 		return (unary(op2(QUEST, np, NIL)));
731 	default:
732 		return (np);
733 	}
734 }
735 
736 /*
737  * Character class definitions conformant to the POSIX locale as
738  * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
739  * and operating character sets are both ASCII (ISO646) or supersets
740  * thereof.
741  *
742  * Note that to avoid overflowing the temporary buffer used in
743  * relex(), the expanded character class (prior to range expansion)
744  * must be less than twice the size of their full name.
745  */
746 
747 /* Because isblank doesn't show up in any of the header files on any
748  * system i use, it's defined here.  if some other locale has a richer
749  * definition of "blank", define HAS_ISBLANK and provide your own
750  * version.
751  * the parentheses here are an attempt to find a path through the maze
752  * of macro definition and/or function and/or version provided.  thanks
753  * to nelson beebe for the suggestion; let's see if it works everywhere.
754  */
755 
756 /* #define HAS_ISBLANK */
757 #ifndef HAS_ISBLANK
758 
759 int (xisblank)(int c)
760 {
761 	return c==' ' || c=='\t';
762 }
763 
764 #endif
765 
766 struct charclass {
767 	const char *cc_name;
768 	int cc_namelen;
769 	int (*cc_func)(int);
770 } charclasses[] = {
771 	{ "alnum",	5,	isalnum },
772 	{ "alpha",	5,	isalpha },
773 #ifndef HAS_ISBLANK
774 	{ "blank",	5,	isspace }, /* was isblank */
775 #else
776 	{ "blank",	5,	isblank },
777 #endif
778 	{ "cntrl",	5,	iscntrl },
779 	{ "digit",	5,	isdigit },
780 	{ "graph",	5,	isgraph },
781 	{ "lower",	5,	islower },
782 	{ "print",	5,	isprint },
783 	{ "punct",	5,	ispunct },
784 	{ "space",	5,	isspace },
785 	{ "upper",	5,	isupper },
786 	{ "xdigit",	6,	isxdigit },
787 	{ NULL,		0,	NULL },
788 };
789 
790 
791 int relex(void)		/* lexical analyzer for reparse */
792 {
793 	int c, n;
794 	int cflag;
795 	static uschar *buf = NULL;
796 	static int bufsz = 100;
797 	uschar *bp;
798 	struct charclass *cc;
799 	int i;
800 
801 	switch (c = *prestr++) {
802 	case '|': return OR;
803 	case '*': return STAR;
804 	case '+': return PLUS;
805 	case '?': return QUEST;
806 	case '.': return DOT;
807 	case '\0': prestr--; return '\0';
808 	case '^':
809 	case '$':
810 	case '(':
811 	case ')':
812 		return c;
813 	case '\\':
814 		rlxval = quoted(&prestr);
815 		return CHAR;
816 	default:
817 		rlxval = c;
818 		return CHAR;
819 	case '[':
820 		if (buf == NULL && (buf = (uschar *) malloc(bufsz)) == NULL)
821 			FATAL("out of space in reg expr %.10s..", lastre);
822 		bp = buf;
823 		if (*prestr == '^') {
824 			cflag = 1;
825 			prestr++;
826 		}
827 		else
828 			cflag = 0;
829 		n = 2 * strlen((const char *) prestr)+1;
830 		if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, "relex1"))
831 			FATAL("out of space for reg expr %.10s...", lastre);
832 		for (; ; ) {
833 			if ((c = *prestr++) == '\\') {
834 				*bp++ = '\\';
835 				if ((c = *prestr++) == '\0')
836 					FATAL("nonterminated character class %.20s...", lastre);
837 				*bp++ = c;
838 			/* } else if (c == '\n') { */
839 			/* 	FATAL("newline in character class %.20s...", lastre); */
840 			} else if (c == '[' && *prestr == ':') {
841 				/* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
842 				for (cc = charclasses; cc->cc_name; cc++)
843 					if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
844 						break;
845 				if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
846 				    prestr[2 + cc->cc_namelen] == ']') {
847 					prestr += cc->cc_namelen + 3;
848 					for (i = 0; i < NCHARS; i++) {
849 						if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2"))
850 						    FATAL("out of space for reg expr %.10s...", lastre);
851 						if (cc->cc_func(i)) {
852 							*bp++ = i;
853 							n++;
854 						}
855 					}
856 				} else
857 					*bp++ = c;
858 			} else if (c == '\0') {
859 				FATAL("nonterminated character class %.20s", lastre);
860 			} else if (bp == buf) {	/* 1st char is special */
861 				*bp++ = c;
862 			} else if (c == ']') {
863 				*bp++ = 0;
864 				rlxstr = (uschar *) tostring((char *) buf);
865 				if (cflag == 0)
866 					return CCL;
867 				else
868 					return NCCL;
869 			} else
870 				*bp++ = c;
871 		}
872 	}
873 }
874 
875 int cgoto(fa *f, int s, int c)
876 {
877 	int i, j, k;
878 	int *p, *q;
879 
880 	assert(c == HAT || c < NCHARS);
881 	while (f->accept >= maxsetvec) {	/* guessing here! */
882 		maxsetvec *= 4;
883 		setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
884 		tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
885 		if (setvec == NULL || tmpset == NULL)
886 			overflo("out of space in cgoto()");
887 	}
888 	for (i = 0; i <= f->accept; i++)
889 		setvec[i] = 0;
890 	setcnt = 0;
891 	/* compute positions of gototab[s,c] into setvec */
892 	p = f->posns[s];
893 	for (i = 1; i <= *p; i++) {
894 		if ((k = f->re[p[i]].ltype) != FINAL) {
895 			if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
896 			 || (k == DOT && c != 0 && c != HAT)
897 			 || (k == ALL && c != 0)
898 			 || (k == EMPTYRE && c != 0)
899 			 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
900 			 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
901 				q = f->re[p[i]].lfollow;
902 				for (j = 1; j <= *q; j++) {
903 					if (q[j] >= maxsetvec) {
904 						maxsetvec *= 4;
905 						setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
906 						tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
907 						if (setvec == NULL || tmpset == NULL)
908 							overflo("cgoto overflow");
909 					}
910 					if (setvec[q[j]] == 0) {
911 						setcnt++;
912 						setvec[q[j]] = 1;
913 					}
914 				}
915 			}
916 		}
917 	}
918 	/* determine if setvec is a previous state */
919 	tmpset[0] = setcnt;
920 	j = 1;
921 	for (i = f->accept; i >= 0; i--)
922 		if (setvec[i]) {
923 			tmpset[j++] = i;
924 		}
925 	/* tmpset == previous state? */
926 	for (i = 1; i <= f->curstat; i++) {
927 		p = f->posns[i];
928 		if ((k = tmpset[0]) != p[0])
929 			goto different;
930 		for (j = 1; j <= k; j++)
931 			if (tmpset[j] != p[j])
932 				goto different;
933 		/* setvec is state i */
934 		f->gototab[s][c] = i;
935 		return i;
936 	  different:;
937 	}
938 
939 	/* add tmpset to current set of states */
940 	if (f->curstat >= NSTATES-1) {
941 		f->curstat = 2;
942 		f->reset = 1;
943 		for (i = 2; i < NSTATES; i++)
944 			xfree(f->posns[i]);
945 	} else
946 		++(f->curstat);
947 	for (i = 0; i < NCHARS; i++)
948 		f->gototab[f->curstat][i] = 0;
949 	xfree(f->posns[f->curstat]);
950 	if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
951 		overflo("out of space in cgoto");
952 
953 	f->posns[f->curstat] = p;
954 	f->gototab[s][c] = f->curstat;
955 	for (i = 0; i <= setcnt; i++)
956 		p[i] = tmpset[i];
957 	if (setvec[f->accept])
958 		f->out[f->curstat] = 1;
959 	else
960 		f->out[f->curstat] = 0;
961 	return f->curstat;
962 }
963 
964 
965 void freefa(fa *f)	/* free a finite automaton */
966 {
967 	int i;
968 
969 	if (f == NULL)
970 		return;
971 	for (i = 0; i <= f->curstat; i++)
972 		xfree(f->posns[i]);
973 	for (i = 0; i <= f->accept; i++) {
974 		xfree(f->re[i].lfollow);
975 		if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
976 			xfree((f->re[i].lval.np));
977 	}
978 	xfree(f->restr);
979 	xfree(f);
980 }
981