xref: /freebsd/contrib/one-true-awk/b.c (revision b52b9d56d4e96089873a75f9e29062eec19fabba)
1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
4 
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
14 
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
24 
25 /* lasciate ogne speranza, voi ch'entrate. */
26 
27 #define	DEBUG
28 
29 #include <ctype.h>
30 #include <stdio.h>
31 #include <string.h>
32 #include <stdlib.h>
33 #include "awk.h"
34 #include "ytab.h"
35 
36 #define	HAT	(NCHARS-2)	/* matches ^ in regular expr */
37 				/* NCHARS is 2**n */
38 #define MAXLIN 22
39 
40 #define type(v)		(v)->nobj	/* badly overloaded here */
41 #define info(v)		(v)->ntype	/* badly overloaded here */
42 #define left(v)		(v)->narg[0]
43 #define right(v)	(v)->narg[1]
44 #define parent(v)	(v)->nnext
45 
46 #define LEAF	case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
47 #define UNARY	case STAR: case PLUS: case QUEST:
48 
49 /* encoding in tree Nodes:
50 	leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):
51 		left is index, right contains value or pointer to value
52 	unary (STAR, PLUS, QUEST): left is child, right is null
53 	binary (CAT, OR): left and right are children
54 	parent contains pointer to parent
55 */
56 
57 
58 int	*setvec;
59 int	*tmpset;
60 int	maxsetvec = 0;
61 
62 int	rtok;		/* next token in current re */
63 int	rlxval;
64 static uschar	*rlxstr;
65 static uschar	*prestr;	/* current position in current re */
66 static uschar	*lastre;	/* origin of last re */
67 
68 static	int setcnt;
69 static	int poscnt;
70 
71 char	*patbeg;
72 int	patlen;
73 
74 #define	NFA	20	/* cache this many dynamic fa's */
75 fa	*fatab[NFA];
76 int	nfatab	= 0;	/* entries in fatab */
77 
78 fa *makedfa(char *s, int anchor)	/* returns dfa for reg expr s */
79 {
80 	int i, use, nuse;
81 	fa *pfa;
82 	static int now = 1;
83 
84 	if (setvec == 0) {	/* first time through any RE */
85 		maxsetvec = MAXLIN;
86 		setvec = (int *) malloc(maxsetvec * sizeof(int));
87 		tmpset = (int *) malloc(maxsetvec * sizeof(int));
88 		if (setvec == 0 || tmpset == 0)
89 			overflo("out of space initializing makedfa");
90 	}
91 
92 	if (compile_time)	/* a constant for sure */
93 		return mkdfa(s, anchor);
94 	for (i = 0; i < nfatab; i++)	/* is it there already? */
95 		if (fatab[i]->anchor == anchor
96 		  && strcmp((const char *) fatab[i]->restr, s) == 0) {
97 			fatab[i]->use = now++;
98 			return fatab[i];
99 		}
100 	pfa = mkdfa(s, anchor);
101 	if (nfatab < NFA) {	/* room for another */
102 		fatab[nfatab] = pfa;
103 		fatab[nfatab]->use = now++;
104 		nfatab++;
105 		return pfa;
106 	}
107 	use = fatab[0]->use;	/* replace least-recently used */
108 	nuse = 0;
109 	for (i = 1; i < nfatab; i++)
110 		if (fatab[i]->use < use) {
111 			use = fatab[i]->use;
112 			nuse = i;
113 		}
114 	freefa(fatab[nuse]);
115 	fatab[nuse] = pfa;
116 	pfa->use = now++;
117 	return pfa;
118 }
119 
120 fa *mkdfa(char *s, int anchor)	/* does the real work of making a dfa */
121 				/* anchor = 1 for anchored matches, else 0 */
122 {
123 	Node *p, *p1;
124 	fa *f;
125 
126 	p = reparse(s);
127 	p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
128 		/* put ALL STAR in front of reg.  exp. */
129 	p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
130 		/* put FINAL after reg.  exp. */
131 
132 	poscnt = 0;
133 	penter(p1);	/* enter parent pointers and leaf indices */
134 	if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
135 		overflo("out of space for fa");
136 	f->accept = poscnt-1;	/* penter has computed number of positions in re */
137 	cfoll(f, p1);	/* set up follow sets */
138 	freetr(p1);
139 	if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
140 			overflo("out of space in makedfa");
141 	if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
142 		overflo("out of space in makedfa");
143 	*f->posns[1] = 0;
144 	f->initstat = makeinit(f, anchor);
145 	f->anchor = anchor;
146 	f->restr = (uschar *) tostring(s);
147 	return f;
148 }
149 
150 int makeinit(fa *f, int anchor)
151 {
152 	int i, k;
153 
154 	f->curstat = 2;
155 	f->out[2] = 0;
156 	f->reset = 0;
157 	k = *(f->re[0].lfollow);
158 	xfree(f->posns[2]);
159 	if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
160 		overflo("out of space in makeinit");
161 	for (i=0; i <= k; i++) {
162 		(f->posns[2])[i] = (f->re[0].lfollow)[i];
163 	}
164 	if ((f->posns[2])[1] == f->accept)
165 		f->out[2] = 1;
166 	for (i=0; i < NCHARS; i++)
167 		f->gototab[2][i] = 0;
168 	f->curstat = cgoto(f, 2, HAT);
169 	if (anchor) {
170 		*f->posns[2] = k-1;	/* leave out position 0 */
171 		for (i=0; i < k; i++) {
172 			(f->posns[0])[i] = (f->posns[2])[i];
173 		}
174 
175 		f->out[0] = f->out[2];
176 		if (f->curstat != 2)
177 			--(*f->posns[f->curstat]);
178 	}
179 	return f->curstat;
180 }
181 
182 void penter(Node *p)	/* set up parent pointers and leaf indices */
183 {
184 	switch (type(p)) {
185 	LEAF
186 		info(p) = poscnt;
187 		poscnt++;
188 		break;
189 	UNARY
190 		penter(left(p));
191 		parent(left(p)) = p;
192 		break;
193 	case CAT:
194 	case OR:
195 		penter(left(p));
196 		penter(right(p));
197 		parent(left(p)) = p;
198 		parent(right(p)) = p;
199 		break;
200 	default:	/* can't happen */
201 		FATAL("can't happen: unknown type %d in penter", type(p));
202 		break;
203 	}
204 }
205 
206 void freetr(Node *p)	/* free parse tree */
207 {
208 	switch (type(p)) {
209 	LEAF
210 		xfree(p);
211 		break;
212 	UNARY
213 		freetr(left(p));
214 		xfree(p);
215 		break;
216 	case CAT:
217 	case OR:
218 		freetr(left(p));
219 		freetr(right(p));
220 		xfree(p);
221 		break;
222 	default:	/* can't happen */
223 		FATAL("can't happen: unknown type %d in freetr", type(p));
224 		break;
225 	}
226 }
227 
228 /* in the parsing of regular expressions, metacharacters like . have */
229 /* to be seen literally;  \056 is not a metacharacter. */
230 
231 int hexstr(char **pp)	/* find and eval hex string at pp, return new p */
232 {			/* only pick up one 8-bit byte (2 chars) */
233 	uschar *p;
234 	int n = 0;
235 	int i;
236 
237 	for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
238 		if (isdigit(*p))
239 			n = 16 * n + *p - '0';
240 		else if (*p >= 'a' && *p <= 'f')
241 			n = 16 * n + *p - 'a' + 10;
242 		else if (*p >= 'A' && *p <= 'F')
243 			n = 16 * n + *p - 'A' + 10;
244 	}
245 	*pp = (char *) p;
246 	return n;
247 }
248 
249 #define isoctdigit(c) ((c) >= '0' && (c) <= '7')	/* multiple use of arg */
250 
251 int quoted(char **pp)	/* pick up next thing after a \\ */
252 			/* and increment *pp */
253 {
254 	char *p = *pp;
255 	int c;
256 
257 	if ((c = *p++) == 't')
258 		c = '\t';
259 	else if (c == 'n')
260 		c = '\n';
261 	else if (c == 'f')
262 		c = '\f';
263 	else if (c == 'r')
264 		c = '\r';
265 	else if (c == 'b')
266 		c = '\b';
267 	else if (c == '\\')
268 		c = '\\';
269 	else if (c == 'x') {	/* hexadecimal goo follows */
270 		c = hexstr(&p);	/* this adds a null if number is invalid */
271 	} else if (isoctdigit(c)) {	/* \d \dd \ddd */
272 		int n = c - '0';
273 		if (isoctdigit(*p)) {
274 			n = 8 * n + *p++ - '0';
275 			if (isoctdigit(*p))
276 				n = 8 * n + *p++ - '0';
277 		}
278 		c = n;
279 	} /* else */
280 		/* c = c; */
281 	*pp = p;
282 	return c;
283 }
284 
285 char *cclenter(char *argp)	/* add a character class */
286 {
287 	int i, c, c2;
288 	uschar *p = (uschar *) argp;
289 	uschar *op, *bp;
290 	static uschar *buf = 0;
291 	static int bufsz = 100;
292 
293 	op = p;
294 	if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
295 		FATAL("out of space for character class [%.10s...] 1", p);
296 	bp = buf;
297 	for (i = 0; (c = *p++) != 0; ) {
298 		if (c == '\\') {
299 			c = quoted((char **) &p);
300 		} else if (c == '-' && i > 0 && bp[-1] != 0) {
301 			if (*p != 0) {
302 				c = bp[-1];
303 				c2 = *p++;
304 				if (c2 == '\\')
305 					c2 = quoted((char **) &p);
306 				if (c > c2) {	/* empty; ignore */
307 					bp--;
308 					i--;
309 					continue;
310 				}
311 				while (c < c2) {
312 					if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0))
313 						FATAL("out of space for character class [%.10s...] 2", p);
314 					*bp++ = ++c;
315 					i++;
316 				}
317 				continue;
318 			}
319 		}
320 		if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0))
321 			FATAL("out of space for character class [%.10s...] 3", p);
322 		*bp++ = c;
323 		i++;
324 	}
325 	*bp = 0;
326 	dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
327 	xfree(op);
328 	return (char *) tostring((char *) buf);
329 }
330 
331 void overflo(char *s)
332 {
333 	FATAL("regular expression too big: %.30s...", s);
334 }
335 
336 void cfoll(fa *f, Node *v)	/* enter follow set of each leaf of vertex v into lfollow[leaf] */
337 {
338 	int i;
339 	int *p;
340 
341 	switch (type(v)) {
342 	LEAF
343 		f->re[info(v)].ltype = type(v);
344 		f->re[info(v)].lval.np = right(v);
345 		while (f->accept >= maxsetvec) {	/* guessing here! */
346 			maxsetvec *= 4;
347 			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
348 			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
349 			if (setvec == 0 || tmpset == 0)
350 				overflo("out of space in cfoll()");
351 		}
352 		for (i = 0; i <= f->accept; i++)
353 			setvec[i] = 0;
354 		setcnt = 0;
355 		follow(v);	/* computes setvec and setcnt */
356 		if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
357 			overflo("out of space building follow set");
358 		f->re[info(v)].lfollow = p;
359 		*p = setcnt;
360 		for (i = f->accept; i >= 0; i--)
361 			if (setvec[i] == 1)
362 				*++p = i;
363 		break;
364 	UNARY
365 		cfoll(f,left(v));
366 		break;
367 	case CAT:
368 	case OR:
369 		cfoll(f,left(v));
370 		cfoll(f,right(v));
371 		break;
372 	default:	/* can't happen */
373 		FATAL("can't happen: unknown type %d in cfoll", type(v));
374 	}
375 }
376 
377 int first(Node *p)	/* collects initially active leaves of p into setvec */
378 			/* returns 1 if p matches empty string */
379 {
380 	int b, lp;
381 
382 	switch (type(p)) {
383 	LEAF
384 		lp = info(p);	/* look for high-water mark of subscripts */
385 		while (setcnt >= maxsetvec || lp >= maxsetvec) {	/* guessing here! */
386 			maxsetvec *= 4;
387 			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
388 			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
389 			if (setvec == 0 || tmpset == 0)
390 				overflo("out of space in first()");
391 		}
392 		if (setvec[lp] != 1) {
393 			setvec[lp] = 1;
394 			setcnt++;
395 		}
396 		if (type(p) == CCL && (*(char *) right(p)) == '\0')
397 			return(0);		/* empty CCL */
398 		else return(1);
399 	case PLUS:
400 		if (first(left(p)) == 0) return(0);
401 		return(1);
402 	case STAR:
403 	case QUEST:
404 		first(left(p));
405 		return(0);
406 	case CAT:
407 		if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
408 		return(1);
409 	case OR:
410 		b = first(right(p));
411 		if (first(left(p)) == 0 || b == 0) return(0);
412 		return(1);
413 	}
414 	FATAL("can't happen: unknown type %d in first", type(p));	/* can't happen */
415 	return(-1);
416 }
417 
418 void follow(Node *v)	/* collects leaves that can follow v into setvec */
419 {
420 	Node *p;
421 
422 	if (type(v) == FINAL)
423 		return;
424 	p = parent(v);
425 	switch (type(p)) {
426 	case STAR:
427 	case PLUS:
428 		first(v);
429 		follow(p);
430 		return;
431 
432 	case OR:
433 	case QUEST:
434 		follow(p);
435 		return;
436 
437 	case CAT:
438 		if (v == left(p)) {	/* v is left child of p */
439 			if (first(right(p)) == 0) {
440 				follow(p);
441 				return;
442 			}
443 		} else		/* v is right child */
444 			follow(p);
445 		return;
446 	}
447 }
448 
449 int member(int c, char *sarg)	/* is c in s? */
450 {
451 	uschar *s = (uschar *) sarg;
452 
453 	while (*s)
454 		if (c == *s++)
455 			return(1);
456 	return(0);
457 }
458 
459 int match(fa *f, char *p0)	/* shortest match ? */
460 {
461 	int s, ns;
462 	uschar *p = (uschar *) p0;
463 
464 	s = f->reset ? makeinit(f,0) : f->initstat;
465 	if (f->out[s])
466 		return(1);
467 	do {
468 		if ((ns = f->gototab[s][*p]) != 0)
469 			s = ns;
470 		else
471 			s = cgoto(f, s, *p);
472 		if (f->out[s])
473 			return(1);
474 	} while (*p++ != 0);
475 	return(0);
476 }
477 
478 int pmatch(fa *f, char *p0)	/* longest match, for sub */
479 {
480 	int s, ns;
481 	uschar *p = (uschar *) p0;
482 	uschar *q;
483 	int i, k;
484 
485 	s = f->reset ? makeinit(f,1) : f->initstat;
486 	patbeg = (char *) p;
487 	patlen = -1;
488 	do {
489 		q = p;
490 		do {
491 			if (f->out[s])		/* final state */
492 				patlen = q-p;
493 			if ((ns = f->gototab[s][*q]) != 0)
494 				s = ns;
495 			else
496 				s = cgoto(f, s, *q);
497 			if (s == 1) {	/* no transition */
498 				if (patlen >= 0) {
499 					patbeg = (char *) p;
500 					return(1);
501 				}
502 				else
503 					goto nextin;	/* no match */
504 			}
505 		} while (*q++ != 0);
506 		if (f->out[s])
507 			patlen = q-p-1;	/* don't count $ */
508 		if (patlen >= 0) {
509 			patbeg = (char *) p;
510 			return(1);
511 		}
512 	nextin:
513 		s = 2;
514 		if (f->reset) {
515 			for (i = 2; i <= f->curstat; i++)
516 				xfree(f->posns[i]);
517 			k = *f->posns[0];
518 			if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
519 				overflo("out of space in pmatch");
520 			for (i = 0; i <= k; i++)
521 				(f->posns[2])[i] = (f->posns[0])[i];
522 			f->initstat = f->curstat = 2;
523 			f->out[2] = f->out[0];
524 			for (i = 0; i < NCHARS; i++)
525 				f->gototab[2][i] = 0;
526 		}
527 	} while (*p++ != 0);
528 	return (0);
529 }
530 
531 int nematch(fa *f, char *p0)	/* non-empty match, for sub */
532 {
533 	int s, ns;
534 	uschar *p = (uschar *) p0;
535 	uschar *q;
536 	int i, k;
537 
538 	s = f->reset ? makeinit(f,1) : f->initstat;
539 	patlen = -1;
540 	while (*p) {
541 		q = p;
542 		do {
543 			if (f->out[s])		/* final state */
544 				patlen = q-p;
545 			if ((ns = f->gototab[s][*q]) != 0)
546 				s = ns;
547 			else
548 				s = cgoto(f, s, *q);
549 			if (s == 1) {	/* no transition */
550 				if (patlen > 0) {
551 					patbeg = (char *) p;
552 					return(1);
553 				} else
554 					goto nnextin;	/* no nonempty match */
555 			}
556 		} while (*q++ != 0);
557 		if (f->out[s])
558 			patlen = q-p-1;	/* don't count $ */
559 		if (patlen > 0 ) {
560 			patbeg = (char *) p;
561 			return(1);
562 		}
563 	nnextin:
564 		s = 2;
565 		if (f->reset) {
566 			for (i = 2; i <= f->curstat; i++)
567 				xfree(f->posns[i]);
568 			k = *f->posns[0];
569 			if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
570 				overflo("out of state space");
571 			for (i = 0; i <= k; i++)
572 				(f->posns[2])[i] = (f->posns[0])[i];
573 			f->initstat = f->curstat = 2;
574 			f->out[2] = f->out[0];
575 			for (i = 0; i < NCHARS; i++)
576 				f->gototab[2][i] = 0;
577 		}
578 		p++;
579 	}
580 	return (0);
581 }
582 
583 Node *reparse(char *p)	/* parses regular expression pointed to by p */
584 {			/* uses relex() to scan regular expression */
585 	Node *np;
586 
587 	dprintf( ("reparse <%s>\n", p) );
588 	lastre = prestr = (uschar *) p;	/* prestr points to string to be parsed */
589 	rtok = relex();
590 	if (rtok == '\0')
591 		FATAL("empty regular expression");
592 	np = regexp();
593 	if (rtok != '\0')
594 		FATAL("syntax error in regular expression %s at %s", lastre, prestr);
595 	return(np);
596 }
597 
598 Node *regexp(void)	/* top-level parse of reg expr */
599 {
600 	return (alt(concat(primary())));
601 }
602 
603 Node *primary(void)
604 {
605 	Node *np;
606 
607 	switch (rtok) {
608 	case CHAR:
609 		np = op2(CHAR, NIL, itonp(rlxval));
610 		rtok = relex();
611 		return (unary(np));
612 	case ALL:
613 		rtok = relex();
614 		return (unary(op2(ALL, NIL, NIL)));
615 	case DOT:
616 		rtok = relex();
617 		return (unary(op2(DOT, NIL, NIL)));
618 	case CCL:
619 		np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
620 		rtok = relex();
621 		return (unary(np));
622 	case NCCL:
623 		np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
624 		rtok = relex();
625 		return (unary(np));
626 	case '^':
627 		rtok = relex();
628 		return (unary(op2(CHAR, NIL, itonp(HAT))));
629 	case '$':
630 		rtok = relex();
631 		return (unary(op2(CHAR, NIL, NIL)));
632 	case '(':
633 		rtok = relex();
634 		if (rtok == ')') {	/* special pleading for () */
635 			rtok = relex();
636 			return unary(op2(CCL, NIL, (Node *) tostring("")));
637 		}
638 		np = regexp();
639 		if (rtok == ')') {
640 			rtok = relex();
641 			return (unary(np));
642 		}
643 		else
644 			FATAL("syntax error in regular expression %s at %s", lastre, prestr);
645 	default:
646 		FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
647 	}
648 	return 0;	/*NOTREACHED*/
649 }
650 
651 Node *concat(Node *np)
652 {
653 	switch (rtok) {
654 	case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
655 		return (concat(op2(CAT, np, primary())));
656 	}
657 	return (np);
658 }
659 
660 Node *alt(Node *np)
661 {
662 	if (rtok == OR) {
663 		rtok = relex();
664 		return (alt(op2(OR, np, concat(primary()))));
665 	}
666 	return (np);
667 }
668 
669 Node *unary(Node *np)
670 {
671 	switch (rtok) {
672 	case STAR:
673 		rtok = relex();
674 		return (unary(op2(STAR, np, NIL)));
675 	case PLUS:
676 		rtok = relex();
677 		return (unary(op2(PLUS, np, NIL)));
678 	case QUEST:
679 		rtok = relex();
680 		return (unary(op2(QUEST, np, NIL)));
681 	default:
682 		return (np);
683 	}
684 }
685 
686 /*
687  * Character class definitions conformant to the POSIX locale as
688  * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
689  * and operating character sets are both ASCII (ISO646) or supersets
690  * thereof.
691  *
692  * Note that to avoid overflowing the temporary buffer used in
693  * relex(), the expanded character class (prior to range expansion)
694  * must be less than twice the size of their full name.
695  */
696 struct charclass {
697 	const char *cc_name;
698 	int cc_namelen;
699 	const char *cc_expand;
700 } charclasses[] = {
701 	{ "alnum",	5,	"0-9A-Za-z" },
702 	{ "alpha",	5,	"A-Za-z" },
703 	{ "blank",	5,	" \t" },
704 	{ "cntrl",	5,	"\000-\037\177" },
705 	{ "digit",	5,	"0-9" },
706 	{ "graph",	5,	"\041-\176" },
707 	{ "lower",	5,	"a-z" },
708 	{ "print",	5,	" \041-\176" },
709 	{ "punct",	5,	"\041-\057\072-\100\133-\140\173-\176" },
710 	{ "space",	5,	" \f\n\r\t\v" },
711 	{ "upper",	5,	"A-Z" },
712 	{ "xdigit",	6,	"0-9A-Fa-f" },
713 	{ NULL,		0,	NULL },
714 };
715 
716 
717 int relex(void)		/* lexical analyzer for reparse */
718 {
719 	int c, n;
720 	int cflag;
721 	static uschar *buf = 0;
722 	static int bufsz = 100;
723 	uschar *bp;
724 	struct charclass *cc;
725 	const uschar *p;
726 
727 	switch (c = *prestr++) {
728 	case '|': return OR;
729 	case '*': return STAR;
730 	case '+': return PLUS;
731 	case '?': return QUEST;
732 	case '.': return DOT;
733 	case '\0': prestr--; return '\0';
734 	case '^':
735 	case '$':
736 	case '(':
737 	case ')':
738 		return c;
739 	case '\\':
740 		rlxval = quoted((char **) &prestr);
741 		return CHAR;
742 	default:
743 		rlxval = c;
744 		return CHAR;
745 	case '[':
746 		if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
747 			FATAL("out of space in reg expr %.10s..", lastre);
748 		bp = buf;
749 		if (*prestr == '^') {
750 			cflag = 1;
751 			prestr++;
752 		}
753 		else
754 			cflag = 0;
755 		n = 2 * strlen((const char *) prestr)+1;
756 		if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, 0))
757 			FATAL("out of space for reg expr %.10s...", lastre);
758 		for (; ; ) {
759 			if ((c = *prestr++) == '\\') {
760 				*bp++ = '\\';
761 				if ((c = *prestr++) == '\0')
762 					FATAL("nonterminated character class %.20s...", lastre);
763 				*bp++ = c;
764 			/* } else if (c == '\n') { */
765 			/* 	FATAL("newline in character class %.20s...", lastre); */
766 			} else if (c == '[' && *prestr == ':') {
767 				/* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
768 				for (cc = charclasses; cc->cc_name; cc++)
769 					if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
770 						break;
771 				if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
772 				    prestr[2 + cc->cc_namelen] == ']') {
773 					prestr += cc->cc_namelen + 3;
774 					for (p = (const uschar *) cc->cc_expand; *p; p++)
775 						*bp++ = *p;
776 				} else
777 					*bp++ = c;
778 			} else if (c == '\0') {
779 				FATAL("nonterminated character class %.20s", lastre);
780 			} else if (bp == buf) {	/* 1st char is special */
781 				*bp++ = c;
782 			} else if (c == ']') {
783 				*bp++ = 0;
784 				rlxstr = (uschar *) tostring((char *) buf);
785 				if (cflag == 0)
786 					return CCL;
787 				else
788 					return NCCL;
789 			} else
790 				*bp++ = c;
791 		}
792 	}
793 }
794 
795 int cgoto(fa *f, int s, int c)
796 {
797 	int i, j, k;
798 	int *p, *q;
799 
800 	if (c < 0 || c > 255)
801 		FATAL("can't happen: neg char %d in cgoto", c);
802 	while (f->accept >= maxsetvec) {	/* guessing here! */
803 		maxsetvec *= 4;
804 		setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
805 		tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
806 		if (setvec == 0 || tmpset == 0)
807 			overflo("out of space in cgoto()");
808 	}
809 	for (i = 0; i <= f->accept; i++)
810 		setvec[i] = 0;
811 	setcnt = 0;
812 	/* compute positions of gototab[s,c] into setvec */
813 	p = f->posns[s];
814 	for (i = 1; i <= *p; i++) {
815 		if ((k = f->re[p[i]].ltype) != FINAL) {
816 			if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
817 			 || (k == DOT && c != 0 && c != HAT)
818 			 || (k == ALL && c != 0)
819 			 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
820 			 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
821 				q = f->re[p[i]].lfollow;
822 				for (j = 1; j <= *q; j++) {
823 					if (q[j] >= maxsetvec) {
824 						maxsetvec *= 4;
825 						setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
826 						tmpset = (int *) realloc(setvec, maxsetvec * sizeof(int));
827 						if (setvec == 0 || tmpset == 0)
828 							overflo("cgoto overflow");
829 					}
830 					if (setvec[q[j]] == 0) {
831 						setcnt++;
832 						setvec[q[j]] = 1;
833 					}
834 				}
835 			}
836 		}
837 	}
838 	/* determine if setvec is a previous state */
839 	tmpset[0] = setcnt;
840 	j = 1;
841 	for (i = f->accept; i >= 0; i--)
842 		if (setvec[i]) {
843 			tmpset[j++] = i;
844 		}
845 	/* tmpset == previous state? */
846 	for (i = 1; i <= f->curstat; i++) {
847 		p = f->posns[i];
848 		if ((k = tmpset[0]) != p[0])
849 			goto different;
850 		for (j = 1; j <= k; j++)
851 			if (tmpset[j] != p[j])
852 				goto different;
853 		/* setvec is state i */
854 		f->gototab[s][c] = i;
855 		return i;
856 	  different:;
857 	}
858 
859 	/* add tmpset to current set of states */
860 	if (f->curstat >= NSTATES-1) {
861 		f->curstat = 2;
862 		f->reset = 1;
863 		for (i = 2; i < NSTATES; i++)
864 			xfree(f->posns[i]);
865 	} else
866 		++(f->curstat);
867 	for (i = 0; i < NCHARS; i++)
868 		f->gototab[f->curstat][i] = 0;
869 	xfree(f->posns[f->curstat]);
870 	if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
871 		overflo("out of space in cgoto");
872 
873 	f->posns[f->curstat] = p;
874 	f->gototab[s][c] = f->curstat;
875 	for (i = 0; i <= setcnt; i++)
876 		p[i] = tmpset[i];
877 	if (setvec[f->accept])
878 		f->out[f->curstat] = 1;
879 	else
880 		f->out[f->curstat] = 0;
881 	return f->curstat;
882 }
883 
884 
885 void freefa(fa *f)	/* free a finite automaton */
886 {
887 	int i;
888 
889 	if (f == NULL)
890 		return;
891 	for (i = 0; i <= f->curstat; i++)
892 		xfree(f->posns[i]);
893 	for (i = 0; i <= f->accept; i++) {
894 		xfree(f->re[i].lfollow);
895 		if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
896 			xfree((f->re[i].lval.np));
897 	}
898 	xfree(f->restr);
899 	xfree(f);
900 }
901