xref: /illumos-gate/usr/src/cmd/awk/b.c (revision bc1f688b4872ace323eaddbb1a6365d054e7bf56)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 
23 /*
24  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
25  * Use is subject to license terms.
26  */
27 
28 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
29 /*	  All Rights Reserved  	*/
30 
31 #define	DEBUG
32 
33 #include "awk.h"
34 #include "y.tab.h"
35 
36 #define	HAT	(NCHARS-1)	/* matches ^ in regular expr */
37 				/* NCHARS is 2**n */
38 #define	MAXLIN (3 * LINE_MAX)
39 
40 #define	type(v)		(v)->nobj
41 #define	left(v)		(v)->narg[0]
42 #define	right(v)	(v)->narg[1]
43 #define	parent(v)	(v)->nnext
44 
45 #define	LEAF	case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
46 #define	UNARY	case STAR: case PLUS: case QUEST:
47 
48 /*
49  * encoding in tree Nodes:
50  *	leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):
51  *		left is index, right contains value or pointer to value
52  *	unary (STAR, PLUS, QUEST): left is child, right is null
53  *	binary (CAT, OR): left and right are children
54  *	parent contains pointer to parent
55  */
56 
57 int	setvec[MAXLIN];
58 int	tmpset[MAXLIN];
59 Node	*point[MAXLIN];
60 
61 int	rtok;		/* next token in current re */
62 int	rlxval;
63 uchar	*rlxstr;
64 uchar	*prestr;	/* current position in current re */
65 uchar	*lastre;	/* origin of last re */
66 
67 static	int setcnt;
68 static	int poscnt;
69 
70 uchar	*patbeg;
71 int	patlen;
72 
73 #define	NFA	20	/* cache this many dynamic fa's */
74 fa	*fatab[NFA];
75 int	nfatab	= 0;	/* entries in fatab */
76 
77 static fa	*mkdfa(uchar *, int);
78 static int	makeinit(fa *, int);
79 static void	penter(Node *);
80 static void	freetr(Node *);
81 static void	overflo(char *);
82 static void	cfoll(fa *, Node *);
83 static void	follow(Node *);
84 static Node	*reparse(uchar *);
85 static int	relex(void);
86 static void	freefa(fa *);
87 static int	cgoto(fa *, int, int);
88 
89 fa *
90 makedfa(uchar *s, int anchor)	/* returns dfa for reg expr s */
91 {
92 	int i, use, nuse;
93 	fa *pfa;
94 
95 	if (compile_time)	/* a constant for sure */
96 		return (mkdfa(s, anchor));
97 	for (i = 0; i < nfatab; i++) {	/* is it there already? */
98 		if (fatab[i]->anchor == anchor &&
99 		    strcmp((char *)fatab[i]->restr, (char *)s) == 0) {
100 			fatab[i]->use++;
101 			return (fatab[i]);
102 		}
103 	}
104 	pfa = mkdfa(s, anchor);
105 	if (nfatab < NFA) {	/* room for another */
106 		fatab[nfatab] = pfa;
107 		fatab[nfatab]->use = 1;
108 		nfatab++;
109 		return (pfa);
110 	}
111 	use = fatab[0]->use;	/* replace least-recently used */
112 	nuse = 0;
113 	for (i = 1; i < nfatab; i++)
114 		if (fatab[i]->use < use) {
115 			use = fatab[i]->use;
116 			nuse = i;
117 		}
118 	freefa(fatab[nuse]);
119 	fatab[nuse] = pfa;
120 	pfa->use = 1;
121 	return (pfa);
122 }
123 
124 fa *
125 mkdfa(uchar *s, int anchor)	/* does the real work of making a dfa */
126 	/* anchor = 1 for anchored matches, else 0 */
127 {
128 	Node *p, *p1;
129 	fa *f;
130 
131 	p = reparse(s);
132 	p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
133 		/* put ALL STAR in front of reg.  exp. */
134 	p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
135 		/* put FINAL after reg.  exp. */
136 
137 	poscnt = 0;
138 	penter(p1);	/* enter parent pointers and leaf indices */
139 	if ((f = (fa *)calloc(1, sizeof (fa) + poscnt * sizeof (rrow))) == NULL)
140 		overflo("no room for fa");
141 	/* penter has computed number of positions in re */
142 	f->accept = poscnt-1;
143 	cfoll(f, p1);	/* set up follow sets */
144 	freetr(p1);
145 	if ((f->posns[0] =
146 	    (int *)calloc(1, *(f->re[0].lfollow) * sizeof (int))) == NULL) {
147 			overflo("out of space in makedfa");
148 	}
149 	if ((f->posns[1] = (int *)calloc(1, sizeof (int))) == NULL)
150 		overflo("out of space in makedfa");
151 	*f->posns[1] = 0;
152 	f->initstat = makeinit(f, anchor);
153 	f->anchor = anchor;
154 	f->restr = tostring(s);
155 	return (f);
156 }
157 
158 static int
159 makeinit(fa *f, int anchor)
160 {
161 	register int i, k;
162 
163 	f->curstat = 2;
164 	f->out[2] = 0;
165 	f->reset = 0;
166 	k = *(f->re[0].lfollow);
167 	xfree(f->posns[2]);
168 	if ((f->posns[2] = (int *)calloc(1, (k+1) * sizeof (int))) == NULL)
169 		overflo("out of space in makeinit");
170 	for (i = 0; i <= k; i++) {
171 		(f->posns[2])[i] = (f->re[0].lfollow)[i];
172 	}
173 	if ((f->posns[2])[1] == f->accept)
174 		f->out[2] = 1;
175 	for (i = 0; i < NCHARS; i++)
176 		f->gototab[2][i] = 0;
177 	f->curstat = cgoto(f, 2, HAT);
178 	if (anchor) {
179 		*f->posns[2] = k-1;	/* leave out position 0 */
180 		for (i = 0; i < k; i++) {
181 			(f->posns[0])[i] = (f->posns[2])[i];
182 		}
183 
184 		f->out[0] = f->out[2];
185 		if (f->curstat != 2)
186 			--(*f->posns[f->curstat]);
187 	}
188 	return (f->curstat);
189 }
190 
191 void
192 penter(Node *p)	/* set up parent pointers and leaf indices */
193 {
194 	switch (type(p)) {
195 	LEAF
196 		left(p) = (Node *) poscnt;
197 		point[poscnt++] = p;
198 		break;
199 	UNARY
200 		penter(left(p));
201 		parent(left(p)) = p;
202 		break;
203 	case CAT:
204 	case OR:
205 		penter(left(p));
206 		penter(right(p));
207 		parent(left(p)) = p;
208 		parent(right(p)) = p;
209 		break;
210 	default:
211 		ERROR "unknown type %d in penter", type(p) FATAL;
212 		break;
213 	}
214 }
215 
216 static void
217 freetr(Node *p)	/* free parse tree */
218 {
219 	switch (type(p)) {
220 	LEAF
221 		xfree(p);
222 		break;
223 	UNARY
224 		freetr(left(p));
225 		xfree(p);
226 		break;
227 	case CAT:
228 	case OR:
229 		freetr(left(p));
230 		freetr(right(p));
231 		xfree(p);
232 		break;
233 	default:
234 		ERROR "unknown type %d in freetr", type(p) FATAL;
235 		break;
236 	}
237 }
238 
239 uchar *
240 cclenter(uchar *p)
241 {
242 	register int i, c;
243 	uchar *op, *chars, *ret;
244 	size_t	bsize;
245 
246 	init_buf(&chars, &bsize, LINE_INCR);
247 	op = p;
248 	i = 0;
249 	while ((c = *p++) != 0) {
250 		if (c == '\\') {
251 			if ((c = *p++) == 't')
252 				c = '\t';
253 			else if (c == 'n')
254 				c = '\n';
255 			else if (c == 'f')
256 				c = '\f';
257 			else if (c == 'r')
258 				c = '\r';
259 			else if (c == 'b')
260 				c = '\b';
261 			else if (c == '\\')
262 				c = '\\';
263 			else if (isdigit(c)) {
264 				int n = c - '0';
265 				if (isdigit(*p)) {
266 					n = 8 * n + *p++ - '0';
267 					if (isdigit(*p))
268 						n = 8 * n + *p++ - '0';
269 				}
270 				c = n;
271 			} /* else */
272 				/* c = c; */
273 		} else if (c == '-' && i > 0 && chars[i-1] != 0) {
274 			if (*p != 0) {
275 				c = chars[i-1];
276 				while ((uchar)c < *p) {	/* fails if *p is \\ */
277 					expand_buf(&chars, &bsize, i);
278 					chars[i++] = ++c;
279 				}
280 				p++;
281 				continue;
282 			}
283 		}
284 		expand_buf(&chars, &bsize, i);
285 		chars[i++] = c;
286 	}
287 	chars[i++] = '\0';
288 	dprintf(("cclenter: in = |%s|, out = |%s|\n", op, chars));
289 	xfree(op);
290 	ret = tostring(chars);
291 	free(chars);
292 	return (ret);
293 }
294 
295 static void
296 overflo(char *s)
297 {
298 	ERROR "regular expression too big: %s", gettext((char *)s) FATAL;
299 }
300 
301 /* enter follow set of each leaf of vertex v into lfollow[leaf] */
302 static void
303 cfoll(fa *f, Node *v)
304 {
305 	register int i;
306 	register int *p;
307 
308 	switch (type(v)) {
309 	LEAF
310 		f->re[(int)left(v)].ltype = type(v);
311 		f->re[(int)left(v)].lval = (int)right(v);
312 		for (i = 0; i <= f->accept; i++)
313 			setvec[i] = 0;
314 		setcnt = 0;
315 		follow(v);	/* computes setvec and setcnt */
316 		if ((p = (int *)calloc(1, (setcnt+1) * sizeof (int))) == NULL)
317 			overflo("follow set overflow");
318 		f->re[(int)left(v)].lfollow = p;
319 		*p = setcnt;
320 		for (i = f->accept; i >= 0; i--) {
321 			if (setvec[i] == 1)
322 				*++p = i;
323 		}
324 		break;
325 	UNARY
326 		cfoll(f, left(v));
327 		break;
328 	case CAT:
329 	case OR:
330 		cfoll(f, left(v));
331 		cfoll(f, right(v));
332 		break;
333 	default:
334 		ERROR "unknown type %d in cfoll", type(v) FATAL;
335 	}
336 }
337 
338 /*
339  * collects initially active leaves of p into setvec
340  * returns 0 or 1 depending on whether p matches empty string
341  */
342 static int
343 first(Node *p)
344 {
345 	register int b;
346 
347 	switch (type(p)) {
348 	LEAF
349 		if (setvec[(int)left(p)] != 1) {
350 			setvec[(int)left(p)] = 1;
351 			setcnt++;
352 		}
353 		if (type(p) == CCL && (*(uchar *)right(p)) == '\0')
354 			return (0);		/* empty CCL */
355 		else
356 			return (1);
357 	case PLUS:
358 		if (first(left(p)) == 0)
359 			return (0);
360 		return (1);
361 	case STAR:
362 	case QUEST:
363 		(void) first(left(p));
364 		return (0);
365 	case CAT:
366 		if (first(left(p)) == 0 && first(right(p)) == 0)
367 			return (0);
368 		return (1);
369 	case OR:
370 		b = first(right(p));
371 		if (first(left(p)) == 0 || b == 0)
372 			return (0);
373 		return (1);
374 	}
375 	ERROR "unknown type %d in first", type(p) FATAL;
376 	return (-1);
377 }
378 
379 /* collects leaves that can follow v into setvec */
380 static void
381 follow(Node *v)
382 {
383 	Node *p;
384 
385 	if (type(v) == FINAL)
386 		return;
387 	p = parent(v);
388 	switch (type(p)) {
389 	case STAR:
390 	case PLUS:
391 		(void) first(v);
392 		follow(p);
393 		return;
394 
395 	case OR:
396 	case QUEST:
397 		follow(p);
398 		return;
399 
400 	case CAT:
401 		if (v == left(p)) {	/* v is left child of p */
402 			if (first(right(p)) == 0) {
403 				follow(p);
404 				return;
405 			}
406 		} else		/* v is right child */
407 			follow(p);
408 		return;
409 	default:
410 		ERROR "unknown type %d in follow", type(p) FATAL;
411 		break;
412 	}
413 }
414 
415 static int
416 member(uchar c, uchar *s)	/* is c in s? */
417 {
418 	while (*s)
419 		if (c == *s++)
420 			return (1);
421 	return (0);
422 }
423 
424 
425 int
426 match(fa *f, uchar *p)
427 {
428 	register int s, ns;
429 
430 	s = f->reset ? makeinit(f, 0) : f->initstat;
431 	if (f->out[s])
432 		return (1);
433 	do {
434 		if ((ns = f->gototab[s][*p]) != 0)
435 			s = ns;
436 		else
437 			s = cgoto(f, s, *p);
438 		if (f->out[s])
439 			return (1);
440 	} while (*p++ != 0);
441 	return (0);
442 }
443 
444 int
445 pmatch(fa *f, uchar *p)
446 {
447 	register int s, ns;
448 	register uchar *q;
449 	int i, k;
450 
451 	if (f->reset) {
452 		f->initstat = s = makeinit(f, 1);
453 	} else {
454 		s = f->initstat;
455 	}
456 	patbeg = p;
457 	patlen = -1;
458 	do {
459 		q = p;
460 		do {
461 			if (f->out[s])		/* final state */
462 				patlen = q-p;
463 			if ((ns = f->gototab[s][*q]) != 0)
464 				s = ns;
465 			else
466 				s = cgoto(f, s, *q);
467 			if (s == 1) {	/* no transition */
468 				if (patlen >= 0) {
469 					patbeg = p;
470 					return (1);
471 				} else
472 					goto nextin;	/* no match */
473 			}
474 		} while (*q++ != 0);
475 		if (f->out[s])
476 			patlen = q - p - 1;	/* don't count $ */
477 		if (patlen >= 0) {
478 			patbeg = p;
479 			return (1);
480 		}
481 	nextin:
482 		s = 2;
483 		if (f->reset) {
484 			for (i = 2; i <= f->curstat; i++)
485 				xfree(f->posns[i]);
486 			k = *f->posns[0];
487 			if ((f->posns[2] =
488 			    (int *)calloc(1, (k + 1) * sizeof (int))) == NULL) {
489 				overflo("out of space in pmatch");
490 			}
491 			for (i = 0; i <= k; i++)
492 				(f->posns[2])[i] = (f->posns[0])[i];
493 			f->initstat = f->curstat = 2;
494 			f->out[2] = f->out[0];
495 			for (i = 0; i < NCHARS; i++)
496 				f->gototab[2][i] = 0;
497 		}
498 	} while (*p++ != 0);
499 	return (0);
500 }
501 
502 int
503 nematch(fa *f, uchar *p)
504 {
505 	register int s, ns;
506 	register uchar *q;
507 	int i, k;
508 
509 	if (f->reset) {
510 		f->initstat = s = makeinit(f, 1);
511 	} else {
512 		s = f->initstat;
513 	}
514 	patlen = -1;
515 	while (*p) {
516 		q = p;
517 		do {
518 			if (f->out[s])		/* final state */
519 				patlen = q-p;
520 			if ((ns = f->gototab[s][*q]) != 0)
521 				s = ns;
522 			else
523 				s = cgoto(f, s, *q);
524 			if (s == 1) {	/* no transition */
525 				if (patlen > 0) {
526 					patbeg = p;
527 					return (1);
528 				} else
529 					goto nnextin;	/* no nonempty match */
530 			}
531 		} while (*q++ != 0);
532 		if (f->out[s])
533 			patlen = q - p - 1;	/* don't count $ */
534 		if (patlen > 0) {
535 			patbeg = p;
536 			return (1);
537 		}
538 	nnextin:
539 		s = 2;
540 		if (f->reset) {
541 			for (i = 2; i <= f->curstat; i++)
542 				xfree(f->posns[i]);
543 			k = *f->posns[0];
544 			if ((f->posns[2] =
545 			    (int *)calloc(1, (k + 1) * sizeof (int))) == NULL) {
546 				overflo("out of state space");
547 			}
548 			for (i = 0; i <= k; i++)
549 				(f->posns[2])[i] = (f->posns[0])[i];
550 			f->initstat = f->curstat = 2;
551 			f->out[2] = f->out[0];
552 			for (i = 0; i < NCHARS; i++)
553 				f->gototab[2][i] = 0;
554 		}
555 		p++;
556 	}
557 	return (0);
558 }
559 
560 static Node *regexp(void), *primary(void), *concat(Node *);
561 static Node *alt(Node *), *unary(Node *);
562 
563 static Node *
564 reparse(uchar *p)
565 {
566 	/* parses regular expression pointed to by p */
567 	/* uses relex() to scan regular expression */
568 	Node *np;
569 
570 	dprintf(("reparse <%s>\n", p));
571 	lastre = prestr = p;	/* prestr points to string to be parsed */
572 	rtok = relex();
573 	if (rtok == '\0')
574 		ERROR "empty regular expression" FATAL;
575 	np = regexp();
576 	if (rtok == '\0') {
577 		return (np);
578 	} else {
579 		ERROR "syntax error in regular expression %s at %s",
580 		    lastre, prestr FATAL;
581 	}
582 	/*NOTREACHED*/
583 	return (NULL);
584 }
585 
586 static Node *
587 regexp(void)
588 {
589 	return (alt(concat(primary())));
590 }
591 
592 static Node *
593 primary(void)
594 {
595 	Node *np;
596 
597 	switch (rtok) {
598 	case CHAR:
599 		np = op2(CHAR, NIL, (Node *)rlxval);
600 		rtok = relex();
601 		return (unary(np));
602 	case ALL:
603 		rtok = relex();
604 		return (unary(op2(ALL, NIL, NIL)));
605 	case DOT:
606 		rtok = relex();
607 		return (unary(op2(DOT, NIL, NIL)));
608 	case CCL:
609 		/*LINTED align*/
610 		np = op2(CCL, NIL, (Node *)cclenter(rlxstr));
611 		rtok = relex();
612 		return (unary(np));
613 	case NCCL:
614 		/*LINTED align*/
615 		np = op2(NCCL, NIL, (Node *)cclenter(rlxstr));
616 		rtok = relex();
617 		return (unary(np));
618 	case '^':
619 		rtok = relex();
620 		return (unary(op2(CHAR, NIL, (Node *)HAT)));
621 	case '$':
622 		rtok = relex();
623 		return (unary(op2(CHAR, NIL, NIL)));
624 	case '(':
625 		rtok = relex();
626 		if (rtok == ')') {	/* special pleading for () */
627 			rtok = relex();
628 			return (unary(op2(CCL, NIL,
629 			    /*LINTED align*/
630 			    (Node *)tostring((uchar *)""))));
631 		}
632 		np = regexp();
633 		if (rtok == ')') {
634 			rtok = relex();
635 			return (unary(np));
636 		} else {
637 			ERROR "syntax error in regular expression %s at %s",
638 			    lastre, prestr FATAL;
639 		}
640 		/* FALLTHROUGH */
641 	default:
642 		ERROR "illegal primary in regular expression %s at %s",
643 		    lastre, prestr FATAL;
644 	}
645 	/*NOTREACHED*/
646 	return (NULL);
647 }
648 
649 static Node *
650 concat(Node *np)
651 {
652 	switch (rtok) {
653 	case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
654 		return (concat(op2(CAT, np, primary())));
655 	default:
656 		return (np);
657 	}
658 }
659 
660 static Node *
661 alt(Node *np)
662 {
663 	if (rtok == OR) {
664 		rtok = relex();
665 		return (alt(op2(OR, np, concat(primary()))));
666 	}
667 	return (np);
668 }
669 
670 static Node *
671 unary(Node *np)
672 {
673 	switch (rtok) {
674 	case STAR:
675 		rtok = relex();
676 		return (unary(op2(STAR, np, NIL)));
677 	case PLUS:
678 		rtok = relex();
679 		return (unary(op2(PLUS, np, NIL)));
680 	case QUEST:
681 		rtok = relex();
682 		return (unary(op2(QUEST, np, NIL)));
683 	default:
684 		return (np);
685 	}
686 }
687 
688 static int
689 relex(void)		/* lexical analyzer for reparse */
690 {
691 	register int c;
692 	uchar *cbuf;
693 	int clen, cflag;
694 
695 	switch (c = *prestr++) {
696 	case '|': return OR;
697 	case '*': return STAR;
698 	case '+': return PLUS;
699 	case '?': return QUEST;
700 	case '.': return DOT;
701 	case '\0': prestr--; return '\0';
702 	case '^':
703 	case '$':
704 	case '(':
705 	case ')':
706 		return (c);
707 	case '\\':
708 		if ((c = *prestr++) == 't')
709 			c = '\t';
710 		else if (c == 'n')
711 			c = '\n';
712 		else if (c == 'f')
713 			c = '\f';
714 		else if (c == 'r')
715 			c = '\r';
716 		else if (c == 'b')
717 			c = '\b';
718 		else if (c == '\\')
719 			c = '\\';
720 		else if (isdigit(c)) {
721 			int n = c - '0';
722 			if (isdigit(*prestr)) {
723 				n = 8 * n + *prestr++ - '0';
724 				if (isdigit(*prestr))
725 					n = 8 * n + *prestr++ - '0';
726 			}
727 			c = n;
728 		} /* else it's now in c */
729 		rlxval = c;
730 		return (CHAR);
731 	default:
732 		rlxval = c;
733 		return (CHAR);
734 	case '[':
735 		clen = 0;
736 		if (*prestr == '^') {
737 			cflag = 1;
738 			prestr++;
739 		} else
740 			cflag = 0;
741 		init_buf(&cbuf, NULL, strlen((char *)prestr) * 2 + 1);
742 		for (;;) {
743 			if ((c = *prestr++) == '\\') {
744 				cbuf[clen++] = '\\';
745 				if ((c = *prestr++) == '\0') {
746 					ERROR
747 			"nonterminated character class %s", lastre FATAL;
748 				}
749 				cbuf[clen++] = c;
750 			} else if (c == ']') {
751 				cbuf[clen] = 0;
752 				rlxstr = tostring(cbuf);
753 				free(cbuf);
754 				if (cflag == 0)
755 					return (CCL);
756 				else
757 					return (NCCL);
758 			} else if (c == '\n') {
759 				ERROR "newline in character class %s...",
760 				    lastre FATAL;
761 			} else if (c == '\0') {
762 				ERROR "nonterminated character class %s",
763 				    lastre FATAL;
764 			} else
765 				cbuf[clen++] = c;
766 		}
767 		/*NOTREACHED*/
768 	}
769 	return (0);
770 }
771 
772 static int
773 cgoto(fa *f, int s, int c)
774 {
775 	register int i, j, k;
776 	register int *p, *q;
777 
778 	for (i = 0; i <= f->accept; i++)
779 		setvec[i] = 0;
780 	setcnt = 0;
781 	/* compute positions of gototab[s,c] into setvec */
782 	p = f->posns[s];
783 	for (i = 1; i <= *p; i++) {
784 		if ((k = f->re[p[i]].ltype) != FINAL) {
785 			if (k == CHAR && c == f->re[p[i]].lval ||
786 			    k == DOT && c != 0 && c != HAT ||
787 			    k == ALL && c != 0 ||
788 			    k == CCL &&
789 			    member(c, (uchar *)f->re[p[i]].lval) ||
790 			    k == NCCL &&
791 			    !member(c, (uchar *)f->re[p[i]].lval) &&
792 			    c != 0 && c != HAT) {
793 				q = f->re[p[i]].lfollow;
794 				for (j = 1; j <= *q; j++) {
795 					if (setvec[q[j]] == 0) {
796 						setcnt++;
797 						setvec[q[j]] = 1;
798 					}
799 				}
800 			}
801 		}
802 	}
803 	/* determine if setvec is a previous state */
804 	tmpset[0] = setcnt;
805 	j = 1;
806 	for (i = f->accept; i >= 0; i--)
807 		if (setvec[i]) {
808 			tmpset[j++] = i;
809 		}
810 	/* tmpset == previous state? */
811 	for (i = 1; i <= f->curstat; i++) {
812 		p = f->posns[i];
813 		if ((k = tmpset[0]) != p[0])
814 			goto different;
815 		for (j = 1; j <= k; j++)
816 			if (tmpset[j] != p[j])
817 				goto different;
818 		/* setvec is state i */
819 		f->gototab[s][c] = i;
820 		return (i);
821 	different:;
822 	}
823 
824 	/* add tmpset to current set of states */
825 	if (f->curstat >= NSTATES-1) {
826 		f->curstat = 2;
827 		f->reset = 1;
828 		for (i = 2; i < NSTATES; i++)
829 			xfree(f->posns[i]);
830 	} else
831 		++(f->curstat);
832 	for (i = 0; i < NCHARS; i++)
833 		f->gototab[f->curstat][i] = 0;
834 	xfree(f->posns[f->curstat]);
835 	if ((p = (int *)calloc(1, (setcnt + 1) * sizeof (int))) == NULL)
836 		overflo("out of space in cgoto");
837 
838 	f->posns[f->curstat] = p;
839 	f->gototab[s][c] = f->curstat;
840 	for (i = 0; i <= setcnt; i++)
841 		p[i] = tmpset[i];
842 	if (setvec[f->accept])
843 		f->out[f->curstat] = 1;
844 	else
845 		f->out[f->curstat] = 0;
846 	return (f->curstat);
847 }
848 
849 static void
850 freefa(fa *f)
851 {
852 
853 	register int i;
854 
855 	if (f == NULL)
856 		return;
857 	for (i = 0; i <= f->curstat; i++)
858 		xfree(f->posns[i]);
859 	for (i = 0; i <= f->accept; i++)
860 		xfree(f->re[i].lfollow);
861 	xfree(f->restr);
862 	xfree(f);
863 }
864