xref: /illumos-gate/usr/src/cmd/awk/b.c (revision 5e989a96186a37eb528fb7bb4d28a150874ec799)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 
23 /*
24  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
25  * Use is subject to license terms.
26  */
27 
28 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
29 /*	  All Rights Reserved  	*/
30 
31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
32 
33 #define	DEBUG
34 
35 #include "awk.h"
36 #include "y.tab.h"
37 
38 #define	HAT	(NCHARS-1)	/* matches ^ in regular expr */
39 				/* NCHARS is 2**n */
40 #define	MAXLIN (3 * LINE_MAX)
41 
42 #define	type(v)		(v)->nobj
43 #define	left(v)		(v)->narg[0]
44 #define	right(v)	(v)->narg[1]
45 #define	parent(v)	(v)->nnext
46 
47 #define	LEAF	case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
48 #define	UNARY	case STAR: case PLUS: case QUEST:
49 
50 /*
51  * encoding in tree Nodes:
52  *	leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):
53  *		left is index, right contains value or pointer to value
54  *	unary (STAR, PLUS, QUEST): left is child, right is null
55  *	binary (CAT, OR): left and right are children
56  *	parent contains pointer to parent
57  */
58 
59 int	setvec[MAXLIN];
60 int	tmpset[MAXLIN];
61 Node	*point[MAXLIN];
62 
63 int	rtok;		/* next token in current re */
64 int	rlxval;
65 uchar	*rlxstr;
66 uchar	*prestr;	/* current position in current re */
67 uchar	*lastre;	/* origin of last re */
68 
69 static	int setcnt;
70 static	int poscnt;
71 
72 uchar	*patbeg;
73 int	patlen;
74 
75 #define	NFA	20	/* cache this many dynamic fa's */
76 fa	*fatab[NFA];
77 int	nfatab	= 0;	/* entries in fatab */
78 
79 static fa	*mkdfa(uchar *, int);
80 static int	makeinit(fa *, int);
81 static void	penter(Node *);
82 static void	freetr(Node *);
83 static void	overflo(char *);
84 static void	cfoll(fa *, Node *);
85 static void	follow(Node *);
86 static Node	*reparse(uchar *);
87 static int	relex(void);
88 static void	freefa(fa *);
89 static int	cgoto(fa *, int, int);
90 
91 fa *
92 makedfa(uchar *s, int anchor)	/* returns dfa for reg expr s */
93 {
94 	int i, use, nuse;
95 	fa *pfa;
96 
97 	if (compile_time)	/* a constant for sure */
98 		return (mkdfa(s, anchor));
99 	for (i = 0; i < nfatab; i++) {	/* is it there already? */
100 		if (fatab[i]->anchor == anchor &&
101 		    strcmp((char *)fatab[i]->restr, (char *)s) == 0) {
102 			fatab[i]->use++;
103 			return (fatab[i]);
104 		}
105 	}
106 	pfa = mkdfa(s, anchor);
107 	if (nfatab < NFA) {	/* room for another */
108 		fatab[nfatab] = pfa;
109 		fatab[nfatab]->use = 1;
110 		nfatab++;
111 		return (pfa);
112 	}
113 	use = fatab[0]->use;	/* replace least-recently used */
114 	nuse = 0;
115 	for (i = 1; i < nfatab; i++)
116 		if (fatab[i]->use < use) {
117 			use = fatab[i]->use;
118 			nuse = i;
119 		}
120 	freefa(fatab[nuse]);
121 	fatab[nuse] = pfa;
122 	pfa->use = 1;
123 	return (pfa);
124 }
125 
126 fa *
127 mkdfa(uchar *s, int anchor)	/* does the real work of making a dfa */
128 	/* anchor = 1 for anchored matches, else 0 */
129 {
130 	Node *p, *p1;
131 	fa *f;
132 
133 	p = reparse(s);
134 	p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
135 		/* put ALL STAR in front of reg.  exp. */
136 	p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
137 		/* put FINAL after reg.  exp. */
138 
139 	poscnt = 0;
140 	penter(p1);	/* enter parent pointers and leaf indices */
141 	if ((f = (fa *)calloc(1, sizeof (fa) + poscnt * sizeof (rrow))) == NULL)
142 		overflo("no room for fa");
143 	/* penter has computed number of positions in re */
144 	f->accept = poscnt-1;
145 	cfoll(f, p1);	/* set up follow sets */
146 	freetr(p1);
147 	if ((f->posns[0] =
148 	    (int *)calloc(1, *(f->re[0].lfollow) * sizeof (int))) == NULL) {
149 			overflo("out of space in makedfa");
150 	}
151 	if ((f->posns[1] = (int *)calloc(1, sizeof (int))) == NULL)
152 		overflo("out of space in makedfa");
153 	*f->posns[1] = 0;
154 	f->initstat = makeinit(f, anchor);
155 	f->anchor = anchor;
156 	f->restr = tostring(s);
157 	return (f);
158 }
159 
160 static int
161 makeinit(fa *f, int anchor)
162 {
163 	register int i, k;
164 
165 	f->curstat = 2;
166 	f->out[2] = 0;
167 	f->reset = 0;
168 	k = *(f->re[0].lfollow);
169 	xfree(f->posns[2]);
170 	if ((f->posns[2] = (int *)calloc(1, (k+1) * sizeof (int))) == NULL)
171 		overflo("out of space in makeinit");
172 	for (i = 0; i <= k; i++) {
173 		(f->posns[2])[i] = (f->re[0].lfollow)[i];
174 	}
175 	if ((f->posns[2])[1] == f->accept)
176 		f->out[2] = 1;
177 	for (i = 0; i < NCHARS; i++)
178 		f->gototab[2][i] = 0;
179 	f->curstat = cgoto(f, 2, HAT);
180 	if (anchor) {
181 		*f->posns[2] = k-1;	/* leave out position 0 */
182 		for (i = 0; i < k; i++) {
183 			(f->posns[0])[i] = (f->posns[2])[i];
184 		}
185 
186 		f->out[0] = f->out[2];
187 		if (f->curstat != 2)
188 			--(*f->posns[f->curstat]);
189 	}
190 	return (f->curstat);
191 }
192 
193 void
194 penter(Node *p)	/* set up parent pointers and leaf indices */
195 {
196 	switch (type(p)) {
197 	LEAF
198 		left(p) = (Node *) poscnt;
199 		point[poscnt++] = p;
200 		break;
201 	UNARY
202 		penter(left(p));
203 		parent(left(p)) = p;
204 		break;
205 	case CAT:
206 	case OR:
207 		penter(left(p));
208 		penter(right(p));
209 		parent(left(p)) = p;
210 		parent(right(p)) = p;
211 		break;
212 	default:
213 		ERROR "unknown type %d in penter", type(p) FATAL;
214 		break;
215 	}
216 }
217 
218 static void
219 freetr(Node *p)	/* free parse tree */
220 {
221 	switch (type(p)) {
222 	LEAF
223 		xfree(p);
224 		break;
225 	UNARY
226 		freetr(left(p));
227 		xfree(p);
228 		break;
229 	case CAT:
230 	case OR:
231 		freetr(left(p));
232 		freetr(right(p));
233 		xfree(p);
234 		break;
235 	default:
236 		ERROR "unknown type %d in freetr", type(p) FATAL;
237 		break;
238 	}
239 }
240 
241 uchar *
242 cclenter(uchar *p)
243 {
244 	register int i, c;
245 	uchar *op, *chars, *ret;
246 	size_t	bsize;
247 
248 	init_buf(&chars, &bsize, LINE_INCR);
249 	op = p;
250 	i = 0;
251 	while ((c = *p++) != 0) {
252 		if (c == '\\') {
253 			if ((c = *p++) == 't')
254 				c = '\t';
255 			else if (c == 'n')
256 				c = '\n';
257 			else if (c == 'f')
258 				c = '\f';
259 			else if (c == 'r')
260 				c = '\r';
261 			else if (c == 'b')
262 				c = '\b';
263 			else if (c == '\\')
264 				c = '\\';
265 			else if (isdigit(c)) {
266 				int n = c - '0';
267 				if (isdigit(*p)) {
268 					n = 8 * n + *p++ - '0';
269 					if (isdigit(*p))
270 						n = 8 * n + *p++ - '0';
271 				}
272 				c = n;
273 			} /* else */
274 				/* c = c; */
275 		} else if (c == '-' && i > 0 && chars[i-1] != 0) {
276 			if (*p != 0) {
277 				c = chars[i-1];
278 				while ((uchar)c < *p) {	/* fails if *p is \\ */
279 					expand_buf(&chars, &bsize, i);
280 					chars[i++] = ++c;
281 				}
282 				p++;
283 				continue;
284 			}
285 		}
286 		expand_buf(&chars, &bsize, i);
287 		chars[i++] = c;
288 	}
289 	chars[i++] = '\0';
290 	dprintf(("cclenter: in = |%s|, out = |%s|\n", op, chars));
291 	xfree(op);
292 	ret = tostring(chars);
293 	free(chars);
294 	return (ret);
295 }
296 
297 static void
298 overflo(char *s)
299 {
300 	ERROR "regular expression too big: %s", gettext((char *)s) FATAL;
301 }
302 
303 /* enter follow set of each leaf of vertex v into lfollow[leaf] */
304 static void
305 cfoll(fa *f, Node *v)
306 {
307 	register int i;
308 	register int *p;
309 
310 	switch (type(v)) {
311 	LEAF
312 		f->re[(int)left(v)].ltype = type(v);
313 		f->re[(int)left(v)].lval = (int)right(v);
314 		for (i = 0; i <= f->accept; i++)
315 			setvec[i] = 0;
316 		setcnt = 0;
317 		follow(v);	/* computes setvec and setcnt */
318 		if ((p = (int *)calloc(1, (setcnt+1) * sizeof (int))) == NULL)
319 			overflo("follow set overflow");
320 		f->re[(int)left(v)].lfollow = p;
321 		*p = setcnt;
322 		for (i = f->accept; i >= 0; i--) {
323 			if (setvec[i] == 1)
324 				*++p = i;
325 		}
326 		break;
327 	UNARY
328 		cfoll(f, left(v));
329 		break;
330 	case CAT:
331 	case OR:
332 		cfoll(f, left(v));
333 		cfoll(f, right(v));
334 		break;
335 	default:
336 		ERROR "unknown type %d in cfoll", type(v) FATAL;
337 	}
338 }
339 
340 /*
341  * collects initially active leaves of p into setvec
342  * returns 0 or 1 depending on whether p matches empty string
343  */
344 static int
345 first(Node *p)
346 {
347 	register int b;
348 
349 	switch (type(p)) {
350 	LEAF
351 		if (setvec[(int)left(p)] != 1) {
352 			setvec[(int)left(p)] = 1;
353 			setcnt++;
354 		}
355 		if (type(p) == CCL && (*(uchar *)right(p)) == '\0')
356 			return (0);		/* empty CCL */
357 		else
358 			return (1);
359 	case PLUS:
360 		if (first(left(p)) == 0)
361 			return (0);
362 		return (1);
363 	case STAR:
364 	case QUEST:
365 		(void) first(left(p));
366 		return (0);
367 	case CAT:
368 		if (first(left(p)) == 0 && first(right(p)) == 0)
369 			return (0);
370 		return (1);
371 	case OR:
372 		b = first(right(p));
373 		if (first(left(p)) == 0 || b == 0)
374 			return (0);
375 		return (1);
376 	}
377 	ERROR "unknown type %d in first", type(p) FATAL;
378 	return (-1);
379 }
380 
381 /* collects leaves that can follow v into setvec */
382 static void
383 follow(Node *v)
384 {
385 	Node *p;
386 
387 	if (type(v) == FINAL)
388 		return;
389 	p = parent(v);
390 	switch (type(p)) {
391 	case STAR:
392 	case PLUS:
393 		(void) first(v);
394 		follow(p);
395 		return;
396 
397 	case OR:
398 	case QUEST:
399 		follow(p);
400 		return;
401 
402 	case CAT:
403 		if (v == left(p)) {	/* v is left child of p */
404 			if (first(right(p)) == 0) {
405 				follow(p);
406 				return;
407 			}
408 		} else		/* v is right child */
409 			follow(p);
410 		return;
411 	default:
412 		ERROR "unknown type %d in follow", type(p) FATAL;
413 		break;
414 	}
415 }
416 
417 static int
418 member(uchar c, uchar *s)	/* is c in s? */
419 {
420 	while (*s)
421 		if (c == *s++)
422 			return (1);
423 	return (0);
424 }
425 
426 
427 int
428 match(fa *f, uchar *p)
429 {
430 	register int s, ns;
431 
432 	s = f->reset ? makeinit(f, 0) : f->initstat;
433 	if (f->out[s])
434 		return (1);
435 	do {
436 		if ((ns = f->gototab[s][*p]) != 0)
437 			s = ns;
438 		else
439 			s = cgoto(f, s, *p);
440 		if (f->out[s])
441 			return (1);
442 	} while (*p++ != 0);
443 	return (0);
444 }
445 
446 int
447 pmatch(fa *f, uchar *p)
448 {
449 	register int s, ns;
450 	register uchar *q;
451 	int i, k;
452 
453 	if (f->reset) {
454 		f->initstat = s = makeinit(f, 1);
455 	} else {
456 		s = f->initstat;
457 	}
458 	patbeg = p;
459 	patlen = -1;
460 	do {
461 		q = p;
462 		do {
463 			if (f->out[s])		/* final state */
464 				patlen = q-p;
465 			if ((ns = f->gototab[s][*q]) != 0)
466 				s = ns;
467 			else
468 				s = cgoto(f, s, *q);
469 			if (s == 1) {	/* no transition */
470 				if (patlen >= 0) {
471 					patbeg = p;
472 					return (1);
473 				} else
474 					goto nextin;	/* no match */
475 			}
476 		} while (*q++ != 0);
477 		if (f->out[s])
478 			patlen = q - p - 1;	/* don't count $ */
479 		if (patlen >= 0) {
480 			patbeg = p;
481 			return (1);
482 		}
483 	nextin:
484 		s = 2;
485 		if (f->reset) {
486 			for (i = 2; i <= f->curstat; i++)
487 				xfree(f->posns[i]);
488 			k = *f->posns[0];
489 			if ((f->posns[2] =
490 			    (int *)calloc(1, (k + 1) * sizeof (int))) == NULL) {
491 				overflo("out of space in pmatch");
492 			}
493 			for (i = 0; i <= k; i++)
494 				(f->posns[2])[i] = (f->posns[0])[i];
495 			f->initstat = f->curstat = 2;
496 			f->out[2] = f->out[0];
497 			for (i = 0; i < NCHARS; i++)
498 				f->gototab[2][i] = 0;
499 		}
500 	} while (*p++ != 0);
501 	return (0);
502 }
503 
504 int
505 nematch(fa *f, uchar *p)
506 {
507 	register int s, ns;
508 	register uchar *q;
509 	int i, k;
510 
511 	if (f->reset) {
512 		f->initstat = s = makeinit(f, 1);
513 	} else {
514 		s = f->initstat;
515 	}
516 	patlen = -1;
517 	while (*p) {
518 		q = p;
519 		do {
520 			if (f->out[s])		/* final state */
521 				patlen = q-p;
522 			if ((ns = f->gototab[s][*q]) != 0)
523 				s = ns;
524 			else
525 				s = cgoto(f, s, *q);
526 			if (s == 1) {	/* no transition */
527 				if (patlen > 0) {
528 					patbeg = p;
529 					return (1);
530 				} else
531 					goto nnextin;	/* no nonempty match */
532 			}
533 		} while (*q++ != 0);
534 		if (f->out[s])
535 			patlen = q - p - 1;	/* don't count $ */
536 		if (patlen > 0) {
537 			patbeg = p;
538 			return (1);
539 		}
540 	nnextin:
541 		s = 2;
542 		if (f->reset) {
543 			for (i = 2; i <= f->curstat; i++)
544 				xfree(f->posns[i]);
545 			k = *f->posns[0];
546 			if ((f->posns[2] =
547 			    (int *)calloc(1, (k + 1) * sizeof (int))) == NULL) {
548 				overflo("out of state space");
549 			}
550 			for (i = 0; i <= k; i++)
551 				(f->posns[2])[i] = (f->posns[0])[i];
552 			f->initstat = f->curstat = 2;
553 			f->out[2] = f->out[0];
554 			for (i = 0; i < NCHARS; i++)
555 				f->gototab[2][i] = 0;
556 		}
557 		p++;
558 	}
559 	return (0);
560 }
561 
562 static Node *regexp(void), *primary(void), *concat(Node *);
563 static Node *alt(Node *), *unary(Node *);
564 
565 static Node *
566 reparse(uchar *p)
567 {
568 	/* parses regular expression pointed to by p */
569 	/* uses relex() to scan regular expression */
570 	Node *np;
571 
572 	dprintf(("reparse <%s>\n", p));
573 	lastre = prestr = p;	/* prestr points to string to be parsed */
574 	rtok = relex();
575 	if (rtok == '\0')
576 		ERROR "empty regular expression" FATAL;
577 	np = regexp();
578 	if (rtok == '\0') {
579 		return (np);
580 	} else {
581 		ERROR "syntax error in regular expression %s at %s",
582 		    lastre, prestr FATAL;
583 	}
584 	/*NOTREACHED*/
585 	return (NULL);
586 }
587 
588 static Node *
589 regexp(void)
590 {
591 	return (alt(concat(primary())));
592 }
593 
594 static Node *
595 primary(void)
596 {
597 	Node *np;
598 
599 	switch (rtok) {
600 	case CHAR:
601 		np = op2(CHAR, NIL, (Node *)rlxval);
602 		rtok = relex();
603 		return (unary(np));
604 	case ALL:
605 		rtok = relex();
606 		return (unary(op2(ALL, NIL, NIL)));
607 	case DOT:
608 		rtok = relex();
609 		return (unary(op2(DOT, NIL, NIL)));
610 	case CCL:
611 		/*LINTED align*/
612 		np = op2(CCL, NIL, (Node *)cclenter(rlxstr));
613 		rtok = relex();
614 		return (unary(np));
615 	case NCCL:
616 		/*LINTED align*/
617 		np = op2(NCCL, NIL, (Node *)cclenter(rlxstr));
618 		rtok = relex();
619 		return (unary(np));
620 	case '^':
621 		rtok = relex();
622 		return (unary(op2(CHAR, NIL, (Node *)HAT)));
623 	case '$':
624 		rtok = relex();
625 		return (unary(op2(CHAR, NIL, NIL)));
626 	case '(':
627 		rtok = relex();
628 		if (rtok == ')') {	/* special pleading for () */
629 			rtok = relex();
630 			return (unary(op2(CCL, NIL,
631 			    /*LINTED align*/
632 			    (Node *)tostring((uchar *)""))));
633 		}
634 		np = regexp();
635 		if (rtok == ')') {
636 			rtok = relex();
637 			return (unary(np));
638 		} else {
639 			ERROR "syntax error in regular expression %s at %s",
640 			    lastre, prestr FATAL;
641 		}
642 	default:
643 		ERROR "illegal primary in regular expression %s at %s",
644 		    lastre, prestr FATAL;
645 	}
646 	/*NOTREACHED*/
647 	return (NULL);
648 }
649 
650 static Node *
651 concat(Node *np)
652 {
653 	switch (rtok) {
654 	case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
655 		return (concat(op2(CAT, np, primary())));
656 	default:
657 		return (np);
658 	}
659 }
660 
661 static Node *
662 alt(Node *np)
663 {
664 	if (rtok == OR) {
665 		rtok = relex();
666 		return (alt(op2(OR, np, concat(primary()))));
667 	}
668 	return (np);
669 }
670 
671 static Node *
672 unary(Node *np)
673 {
674 	switch (rtok) {
675 	case STAR:
676 		rtok = relex();
677 		return (unary(op2(STAR, np, NIL)));
678 	case PLUS:
679 		rtok = relex();
680 		return (unary(op2(PLUS, np, NIL)));
681 	case QUEST:
682 		rtok = relex();
683 		return (unary(op2(QUEST, np, NIL)));
684 	default:
685 		return (np);
686 	}
687 }
688 
689 static int
690 relex(void)		/* lexical analyzer for reparse */
691 {
692 	register int c;
693 	uchar *cbuf;
694 	int clen, cflag;
695 
696 	switch (c = *prestr++) {
697 	case '|': return OR;
698 	case '*': return STAR;
699 	case '+': return PLUS;
700 	case '?': return QUEST;
701 	case '.': return DOT;
702 	case '\0': prestr--; return '\0';
703 	case '^':
704 	case '$':
705 	case '(':
706 	case ')':
707 		return (c);
708 	case '\\':
709 		if ((c = *prestr++) == 't')
710 			c = '\t';
711 		else if (c == 'n')
712 			c = '\n';
713 		else if (c == 'f')
714 			c = '\f';
715 		else if (c == 'r')
716 			c = '\r';
717 		else if (c == 'b')
718 			c = '\b';
719 		else if (c == '\\')
720 			c = '\\';
721 		else if (isdigit(c)) {
722 			int n = c - '0';
723 			if (isdigit(*prestr)) {
724 				n = 8 * n + *prestr++ - '0';
725 				if (isdigit(*prestr))
726 					n = 8 * n + *prestr++ - '0';
727 			}
728 			c = n;
729 		} /* else it's now in c */
730 		rlxval = c;
731 		return (CHAR);
732 	default:
733 		rlxval = c;
734 		return (CHAR);
735 	case '[':
736 		clen = 0;
737 		if (*prestr == '^') {
738 			cflag = 1;
739 			prestr++;
740 		} else
741 			cflag = 0;
742 		init_buf(&cbuf, NULL, strlen((char *)prestr) * 2 + 1);
743 		for (;;) {
744 			if ((c = *prestr++) == '\\') {
745 				cbuf[clen++] = '\\';
746 				if ((c = *prestr++) == '\0') {
747 					ERROR
748 			"nonterminated character class %s", lastre FATAL;
749 				}
750 				cbuf[clen++] = c;
751 			} else if (c == ']') {
752 				cbuf[clen] = 0;
753 				rlxstr = tostring(cbuf);
754 				free(cbuf);
755 				if (cflag == 0)
756 					return (CCL);
757 				else
758 					return (NCCL);
759 			} else if (c == '\n') {
760 				ERROR "newline in character class %s...",
761 				    lastre FATAL;
762 			} else if (c == '\0') {
763 				ERROR "nonterminated character class %s",
764 				    lastre FATAL;
765 			} else
766 				cbuf[clen++] = c;
767 		}
768 		/*NOTREACHED*/
769 	}
770 	return (0);
771 }
772 
773 static int
774 cgoto(fa *f, int s, int c)
775 {
776 	register int i, j, k;
777 	register int *p, *q;
778 
779 	for (i = 0; i <= f->accept; i++)
780 		setvec[i] = 0;
781 	setcnt = 0;
782 	/* compute positions of gototab[s,c] into setvec */
783 	p = f->posns[s];
784 	for (i = 1; i <= *p; i++) {
785 		if ((k = f->re[p[i]].ltype) != FINAL) {
786 			if (k == CHAR && c == f->re[p[i]].lval ||
787 			    k == DOT && c != 0 && c != HAT ||
788 			    k == ALL && c != 0 ||
789 			    k == CCL &&
790 			    member(c, (uchar *)f->re[p[i]].lval) ||
791 			    k == NCCL &&
792 			    !member(c, (uchar *)f->re[p[i]].lval) &&
793 			    c != 0 && c != HAT) {
794 				q = f->re[p[i]].lfollow;
795 				for (j = 1; j <= *q; j++) {
796 					if (setvec[q[j]] == 0) {
797 						setcnt++;
798 						setvec[q[j]] = 1;
799 					}
800 				}
801 			}
802 		}
803 	}
804 	/* determine if setvec is a previous state */
805 	tmpset[0] = setcnt;
806 	j = 1;
807 	for (i = f->accept; i >= 0; i--)
808 		if (setvec[i]) {
809 			tmpset[j++] = i;
810 		}
811 	/* tmpset == previous state? */
812 	for (i = 1; i <= f->curstat; i++) {
813 		p = f->posns[i];
814 		if ((k = tmpset[0]) != p[0])
815 			goto different;
816 		for (j = 1; j <= k; j++)
817 			if (tmpset[j] != p[j])
818 				goto different;
819 		/* setvec is state i */
820 		f->gototab[s][c] = i;
821 		return (i);
822 	different:;
823 	}
824 
825 	/* add tmpset to current set of states */
826 	if (f->curstat >= NSTATES-1) {
827 		f->curstat = 2;
828 		f->reset = 1;
829 		for (i = 2; i < NSTATES; i++)
830 			xfree(f->posns[i]);
831 	} else
832 		++(f->curstat);
833 	for (i = 0; i < NCHARS; i++)
834 		f->gototab[f->curstat][i] = 0;
835 	xfree(f->posns[f->curstat]);
836 	if ((p = (int *)calloc(1, (setcnt + 1) * sizeof (int))) == NULL)
837 		overflo("out of space in cgoto");
838 
839 	f->posns[f->curstat] = p;
840 	f->gototab[s][c] = f->curstat;
841 	for (i = 0; i <= setcnt; i++)
842 		p[i] = tmpset[i];
843 	if (setvec[f->accept])
844 		f->out[f->curstat] = 1;
845 	else
846 		f->out[f->curstat] = 0;
847 	return (f->curstat);
848 }
849 
850 static void
851 freefa(fa *f)
852 {
853 
854 	register int i;
855 
856 	if (f == NULL)
857 		return;
858 	for (i = 0; i <= f->curstat; i++)
859 		xfree(f->posns[i]);
860 	for (i = 0; i <= f->accept; i++)
861 		xfree(f->re[i].lfollow);
862 	xfree(f->restr);
863 	xfree(f);
864 }
865