xref: /titanic_44/usr/src/cmd/oawk/b.c (revision 8eea8e29cc4374d1ee24c25a07f45af132db3499)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
23 /*	  All Rights Reserved  	*/
24 
25 
26 /*
27  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
28  * Use is subject to license terms.
29  */
30 
31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
32 
33 #include "awk.def"
34 #include "stdio.h"
35 #include "awk.h"
36 
37 
38 extern NODE *op2();
39 extern struct fa *cgotofn();
40 #define	MAXLIN 256
41 #define	NCHARS 128
42 #define	NSTATES 256
43 
44 
45 #define	type(v)	v->nobj
46 #define	left(v)	v->narg[0]
47 #define	right(v)	v->narg[1]
48 #define	parent(v)	v->nnext
49 
50 
51 #define	LEAF	case CCL: case NCCL: case CHAR: case DOT:
52 #define	UNARY	case FINAL: case STAR: case PLUS: case QUEST:
53 
54 
55 /*
56  * encoding in tree NODEs:
57  * leaf (CCL, NCCL, CHAR, DOT): left is index,
58  * right contains value or pointer to value
59  * unary (FINAL, STAR, PLUS, QUEST): left is child, right is null
60  * binary (CAT, OR): left and right are children
61  * parent contains pointer to parent
62  */
63 
64 
65 struct fa {
66 union {
67 		ccl_chars_t s;
68 		int h;
69 	} cc;
70 #define	MLCMPLT(m1, l1, m2, l2) ((m1 != m2 &&\
71 				(int)m1 < (int)m2) ||\
72 				(m1 == m2 && (int)l1 < (int)l2))
73 #define	MLCMPLE(m1, l1, m2, l2) ((m1 != m2 &&\
74 				(int)m1 <= (int)m2) ||\
75 				(m1 == m2 && (int)l1 <= (int)l2))
76 #define	MLCMPGT(m1, l1, m2, l2) ((m1 != m2 &&\
77 				(int)m1 > (int)m2) ||\
78 				(m1 == m2 && (int)l1 > (int)l2))
79 #define	MAX_CODESET	3
80 	struct fa *st;
81 };
82 
83 
84 int	*state[NSTATES];
85 int	*foll[MAXLIN];
86 int	setvec[MAXLIN];
87 NODE	*point[MAXLIN];
88 
89 
90 int	setcnt;
91 int	line;
92 
93 
94 static int	ccln_member();
95 static int	insert_table();
96 static int	delete_table();
97 #ifdef DEBUG
98 #define	ddump_table(t, s)	dump_table(t, s)
99 #else
100 #define	ddump_table(t, s)
101 #endif
102 
103 
104 struct fa *
105 makedfa(p)	/* returns dfa for tree pointed to by p */
106 NODE *p;
107 {
108 	NODE *p1;
109 	struct fa *fap;
110 	p1 = op2(CAT, op2(STAR, op2(DOT, (NODE *) 0,
111 		(NODE *) 0), (NODE *) 0), p);
112 		/* put DOT STAR in front of reg. exp. */
113 	p1 = op2(FINAL, p1, (NODE *) 0);	/* install FINAL NODE */
114 
115 
116 	line = 0;
117 	penter(p1);	/* enter parent pointers and leaf indices */
118 	point[line] = p1;	/* FINAL NODE */
119 	setvec[0] = 1;		/* for initial DOT STAR */
120 	cfoll(p1);	/* set up follow sets */
121 	fap = cgotofn();
122 	freetr(p1);	/* add this when alloc works */
123 	return (fap);
124 }
125 
126 
127 penter(p)	/* set up parent pointers and leaf indices */
128 NODE *p;
129 {
130 	switch (type(p)) {
131 		LEAF
132 			left(p) = (NODE *)line;
133 			point[line++] = p;
134 			break;
135 		UNARY
136 			penter(left(p));
137 			parent(left(p)) = p;
138 			break;
139 		case CAT:
140 		case OR:
141 			penter(left(p));
142 			penter(right(p));
143 			parent(left(p)) = p;
144 			parent(right(p)) = p;
145 			break;
146 		default:
147 			error(FATAL, "unknown type %d in penter\n", type(p));
148 			break;
149 	}
150 }
151 
152 
153 freetr(p)	/* free parse tree and follow sets */
154 NODE *p;
155 {
156 	switch (type(p)) {
157 		LEAF
158 			xfree(foll[(int)left(p)]);
159 			xfree(p);
160 			break;
161 		UNARY
162 			freetr(left(p));
163 			xfree(p);
164 			break;
165 		case CAT:
166 		case OR:
167 			freetr(left(p));
168 			freetr(right(p));
169 			xfree(p);
170 			break;
171 		default:
172 			error(FATAL, "unknown type %d in freetr", type(p));
173 			break;
174 	}
175 }
176 ccl_chars_t *
177 cclenter(p)
178 register wchar_t *p;
179 {
180 	register int		i, cn;
181 	register wchar_t	c, pc;
182 	register wchar_t	*op;
183 	register ccl_chars_t	*new;
184 	ccl_chars_t		chars[MAXLIN];
185 
186 
187 	op = p;
188 	i = 0;
189 	while ((c = *p++) != 0) {
190 		if (c == '-' && i > 0)  {
191 			if (*p != 0) {
192 				/*
193 				 * If there are not in same code set,  the
194 				 * class should be ignore (make two independent
195 				 * characters)!
196 				 */
197 				c = *p++;
198 				cn = wcsetno(pc);
199 				if (cn != wcsetno(c) || pc > c)
200 					goto char_array;
201 				i = insert_table(chars, i, cn, pc, cn, c);
202 				continue;
203 			}
204 		}
205 char_array:
206 		if (i >= MAXLIN)
207 			overflo();
208 		cn = wcsetno(c);
209 		i = insert_table(chars, i, cn, c, cn, c);
210 		pc = c;
211 	}
212 	dprintf("cclenter: in = |%ws|, ", op, NULL, NULL);
213 	xfree(op);
214 	i = (i + 1) * sizeof (ccl_chars_t);
215 	if ((new = (ccl_chars_t *)malloc(i)) == NULL)
216 		error(FATAL, "out of space in cclenter on %s", op);
217 	(void) memcpy((char *)new, (char *)chars, i);
218 	ddump_table(chars, i / 4);
219 
220 
221 	return (new);
222 }
223 
224 
225 overflo()
226 {
227 	error(FATAL, "regular expression too long\n");
228 }
229 
230 
231 cfoll(v)	/* enter follow set of each leaf of vertex v into foll[leaf] */
232 register NODE *v;
233 {
234 	register i;
235 	int prev;
236 	int *add();
237 
238 
239 	switch (type(v)) {
240 		LEAF
241 			setcnt = 0;
242 			for (i = 1; i <= line; i++)
243 				setvec[i] = 0;
244 			follow(v);
245 			foll[(int)left(v)] = add(setcnt);
246 			break;
247 		UNARY
248 			cfoll(left(v));
249 			break;
250 		case CAT:
251 		case OR:
252 			cfoll(left(v));
253 			cfoll(right(v));
254 			break;
255 		default:
256 			error(FATAL, "unknown type %d in cfoll", type(v));
257 	}
258 }
259 
260 
261 first(p)		/* collects initially active leaves of p into setvec */
262 register NODE *p;
263 	/* returns 0 or 1 depending on whether p matches empty string */
264 {
265 	register b;
266 
267 
268 	switch (type(p)) {
269 		LEAF
270 			if (setvec[(int)left(p)] != 1) {
271 				setvec[(int)left(p)] = 1;
272 				setcnt++;
273 			}
274 			if (type(p) == CCL &&
275 			(*(ccl_chars_t *)right(p)).cc_cs == (wchar_t)0x0)
276 				return (0);		/* empty CCL */
277 			else return (1);
278 		case FINAL:
279 		case PLUS:
280 			if (first(left(p)) == 0)
281 				return (0);
282 			return (1);
283 		case STAR:
284 		case QUEST:
285 			first(left(p));
286 			return (0);
287 		case CAT:
288 			if (first(left(p)) == 0 && first(right(p)) == 0)
289 				return (0);
290 			return (1);
291 		case OR:
292 			b = first(right(p));
293 			if (first(left(p)) == 0 || b == 0)
294 				return (0);
295 			return (1);
296 	}
297 	error(FATAL, "unknown type %d in first\n", type(p));
298 	return (-1);
299 }
300 
301 
302 follow(v)
303 NODE *v;		/* collects leaves that can follow v into setvec */
304 {
305 	NODE *p;
306 
307 
308 	if (type(v) == FINAL)
309 		return;
310 	p = parent(v);
311 	switch (type(p)) {
312 		case STAR:
313 		case PLUS:	first(v);
314 				follow(p);
315 				return;
316 
317 
318 		case OR:
319 		case QUEST:	follow(p);
320 				return;
321 
322 
323 		case CAT:	if (v == left(p)) { /* v is left child of p */
324 					if (first(right(p)) == 0) {
325 						follow(p);
326 						return;
327 					}
328 				} else		/* v is right child */
329 					follow(p);
330 				return;
331 		case FINAL:	if (setvec[line] != 1) {
332 					setvec[line] = 1;
333 					setcnt++;
334 				}
335 				return;
336 	}
337 }
338 
339 
340 /*
341  * There are three type of functions for checking member ship.  Because I have
342  * been changed structure of CCL tables.  And some CCL tables end up with NULLs
343  * but someone has length and will includes NULLs in table as one of data.
344  * Please note, CCL table which has a length data and data will include NULLs,
345  * it only used within a this source file("b.c").
346  */
347 
348 
349 ccl_member(ns, cs, ne, ce, s)	/* is cs thru ce in s? */
350 register int		ns;
351 register wchar_t	cs;
352 register int		ne;
353 register wchar_t	ce;
354 register ccl_chars_t	*s;
355 {
356 	/*
357 	 * The specified range(cs, ce) must be beside the range between
358 	 * s->cc_start and s->cc_end to determine member.
359 	 */
360 	while (s->cc_cs || s->cc_ce) {
361 		if (MLCMPLE(s->cc_ns, s->cc_cs, ns, cs) &&
362 				MLCMPLE(ne, ce, s->cc_ne, s->cc_ce))
363 			return (1);
364 		s++;
365 	}
366 	return (0);
367 }
368 
369 
370 static
371 ccln_member(ns, cs, ne, ce, s, n)	/* is cs thru ce in s? */
372 register int ns;
373 register wchar_t cs;
374 register int ne;
375 register wchar_t ce;
376 register ccl_chars_t *s;
377 register int n;
378 {
379 	/*
380 	 * The specified range(cs, ce) must be beside the range between
381 	 * s->cc_start and s->cc_end to determine member.
382 	 */
383 	while (n-- > 0) {
384 		if (MLCMPLE(s->cc_ns, s->cc_cs, ns, cs) &&
385 				MLCMPLE(ne, ce, s->cc_ne, s->cc_ce))
386 			return (1);
387 		s++;
388 	}
389 	return (0);
390 }
391 
392 
393 member(c, s)	/* is c in s? */
394 register wchar_t c, *s;
395 {
396 	while (*s)
397 		if (c == *s++)
398 			return (1);
399 	return (0);
400 }
401 
402 
403 notin(array, n, prev)		/* is setvec in array[0] thru array[n]? */
404 int **array;
405 int *prev; {
406 	register i, j;
407 	int *ptr;
408 	for (i = 0; i <= n; i++) {
409 		ptr = array[i];
410 		if (*ptr == setcnt) {
411 			for (j = 0; j < setcnt; j++)
412 				if (setvec[*(++ptr)] != 1) goto nxt;
413 			*prev = i;
414 			return (0);
415 		}
416 		nxt: /* dummy */;
417 	}
418 	return (1);
419 }
420 
421 
422 int *add(n) {		/* remember setvec */
423 	int *ptr, *p;
424 	register i;
425 	if ((p = ptr = (int *)malloc((n+1)*sizeof (int))) == NULL)
426 		overflo();
427 	*ptr = n;
428 	dprintf("add(%d)\n", n, NULL, NULL);
429 	for (i = 1; i <= line; i++)
430 		if (setvec[i] == 1) {
431 			*(++ptr) = i;
432 		dprintf("  ptr = %o, *ptr = %d, i = %d\n", ptr, *ptr, i);
433 		}
434 	dprintf("\n", NULL, NULL, NULL);
435 	return (p);
436 }
437 
438 
439 struct fa *
440 cgotofn()
441 {
442 	register i, k;
443 	register int *ptr;
444 	register int ns, ne;
445 	register wchar_t cs, ce;
446 	register ccl_chars_t *p;
447 	register NODE *cp;
448 	int j, n, s, ind, numtrans;
449 	int finflg;
450 	int curpos, num, prev;
451 	struct fa *where[NSTATES];
452 
453 
454 	struct {
455 		ccl_chars_t	cc;
456 		int		n;
457 	} fatab[257];
458 	register struct fa *pfa;
459 
460 
461 	char index[MAXLIN];
462 	char iposns[MAXLIN];
463 	int sposns[MAXLIN];
464 	int spmax, spinit;
465 	ccl_chars_t symbol[NCHARS];
466 	ccl_chars_t isyms[NCHARS];
467 	ccl_chars_t ssyms[NCHARS];
468 	int ssmax, symax, ismax, ssinit;
469 
470 
471 	wchar_t hat;
472 	int hatcn;
473 
474 
475 	for (i = 0; i <= line; i++) index[i] = iposns[i] = setvec[i] = 0;
476 	isyms[0].cc_cs = isyms[0].cc_ce = (wchar_t)0x0;
477 	for (i = 0; i < NCHARS; i++)
478 		isyms[i] = symbol[i] = ssyms[i] = isyms[0];
479 	symax = 0;
480 	setcnt = 0;
481 	/* compute initial positions and symbols of state 0 */
482 	ismax = 0;
483 	ssmax = 0;
484 	ptr = state[0] = foll[0];
485 	spinit = *ptr;
486 	hat = HAT;
487 	hatcn = wcsetno(hat);
488 	for (i = 0; i < spinit; i++) {
489 		curpos = *(++ptr);
490 		sposns[i] = curpos;
491 		iposns[curpos] = 1;
492 		cp = point[curpos];
493 		dprintf("i= %d, spinit = %d, curpos = %d\n", i, spinit, curpos);
494 		switch (type(cp)) {
495 			case CHAR:
496 				k = (int)right(cp);
497 				ns = wcsetno(k);
498 				if (! ccln_member(ns, k, ns, k,
499 							isyms, ismax)) {
500 					ismax = insert_table(isyms, ismax,
501 								ns, k, ns, k);
502 				}
503 				ssyms[ssmax].cc_ns = ns;
504 				ssyms[ssmax].cc_cs = k;
505 				ssyms[ssmax].cc_ne = ns;
506 				ssyms[ssmax++].cc_ce = k;
507 				break;
508 			case DOT:
509 				cs = WC_VERY_SMALL;
510 				ns = 0;
511 				ce = HAT - 1;
512 				ne = hatcn;
513 				if (! ccln_member(ns, cs, ne, ce,
514 							isyms, ismax)) {
515 					ismax = insert_table(isyms, ismax,
516 								ns, cs, ne, ce);
517 				}
518 				ssyms[ssmax].cc_cs = cs;
519 				ssyms[ssmax].cc_ns = ns;
520 				ssyms[ssmax].cc_ce = ce;
521 				ssyms[ssmax++].cc_ne = ne;
522 				cs = HAT + 1;
523 				ns = hatcn;
524 				ce = WC_VERY_LARGE;
525 				ne = MAX_CODESET;
526 				if (! ccln_member(ns, cs, ne, ce,
527 							isyms, ismax)) {
528 					ismax = insert_table(isyms, ismax,
529 								ns, cs, ne, ce);
530 				}
531 				ssyms[ssmax].cc_cs = cs;
532 				ssyms[ssmax].cc_ns = ns;
533 				ssyms[ssmax].cc_ce = ce;
534 				ssyms[ssmax++].cc_ne = ne;
535 				break;
536 			case CCL:
537 				cs = HAT;
538 				ns = hatcn;
539 				for (p = (ccl_chars_t *)right(cp);
540 					p->cc_cs; p++) {
541 					if ((p->cc_ns != ns ||\
542 					p->cc_cs != cs) &&\
543 				!ccln_member(p->cc_ns, p->cc_cs,
544 				p->cc_ne, p->cc_ce, isyms, ismax)) {
545 						ismax = insert_table(isyms,
546 				ismax, p->cc_ns, p->cc_cs, p->cc_ne, p->cc_ce);
547 					}
548 					ssyms[ssmax++] = *p;
549 				}
550 				break;
551 			case NCCL:
552 				ns = 0;
553 				cs = WC_VERY_SMALL;
554 				for (p = (ccl_chars_t *)right(cp);
555 					p->cc_cs; p++) {
556 					if ((ns != hatcn || p->cc_cs != HAT) &&
557 						! ccln_member(ns, cs,
558 							p->cc_ns, p->cc_cs-1,
559 								isyms, ismax)) {
560 						ismax = insert_table(isyms,
561 								ismax,
562 								ns, cs,
563 								p->cc_ns,
564 								p->cc_cs-1);
565 					}
566 					ssyms[ssmax].cc_ns = ns;
567 					ssyms[ssmax].cc_cs = cs;
568 					ssyms[ssmax].cc_ne = p->cc_ns;
569 					ssyms[ssmax++].cc_ce = p->cc_cs-1;
570 					if (p->cc_ce == (wchar_t)0x0) {
571 						ns = p->cc_ns;
572 						cs = p->cc_cs + 1;
573 
574 					} else {
575 						ns = p->cc_ne;
576 						cs = p->cc_ce + 1;
577 					}
578 				}
579 				if ((ns != hatcn || cs != HAT) &&
580 					! ccln_member(ns, cs,
581 						MAX_CODESET, WC_VERY_LARGE,
582 							isyms, ismax)) {
583 					ismax = insert_table(isyms, ismax,
584 							ns, cs, MAX_CODESET,
585 							WC_VERY_LARGE);
586 				}
587 				ssyms[ssmax].cc_ns = ns;
588 				ssyms[ssmax].cc_cs = cs;
589 				ssyms[ssmax].cc_ne = MAX_CODESET;
590 				ssyms[ssmax++].cc_ce = WC_VERY_LARGE;
591 				break;
592 		}
593 	}
594 	ssinit = ssmax;
595 	symax = 0;
596 	n = 0;
597 	for (s = 0; s <= n; s++)  {
598 		dprintf("s = %d\n", s, NULL, NULL);
599 		ind = 0;
600 		numtrans = 0;
601 		finflg = 0;
602 		if (*(state[s] + *state[s]) == line) {		/* s final? */
603 			finflg = 1;
604 			goto tenter;
605 		}
606 		spmax = spinit;
607 		ssmax = ssinit;
608 		ptr = state[s];
609 		num = *ptr;
610 		for (i = 0; i < num; i++) {
611 			curpos = *(++ptr);
612 			if (iposns[curpos] != 1 && index[curpos] != 1) {
613 				index[curpos] = 1;
614 				sposns[spmax++] = curpos;
615 			}
616 			cp = point[curpos];
617 			switch (type(cp)) {
618 				case CHAR:
619 					k = (int)right(cp);
620 					ns = wcsetno(k);
621 					if (! ccln_member(ns, k, ns, k,
622 							isyms, ismax) &&
623 						! ccln_member(ns, k, ns, k,
624 							symbol, symax)) {
625 						symax = insert_table(symbol,
626 									symax,
627 									ns, k,
628 									ns, k);
629 					}
630 					ssyms[ssmax].cc_ns = ns;
631 					ssyms[ssmax].cc_cs = k;
632 					ssyms[ssmax].cc_ne = ns;
633 					ssyms[ssmax++].cc_ce = k;
634 					break;
635 				case DOT:
636 					cs = WC_VERY_SMALL;
637 					ns = 0;
638 					ce = HAT - 1;
639 					ne = hatcn;
640 					if (! ccln_member(ns, cs, ne, ce,
641 							isyms, ismax) &&
642 						! ccln_member(ns, cs, ne, ce,
643 							symbol, symax)) {
644 						symax = insert_table(symbol,
645 									symax,
646 									ns, cs,
647 									ne, ce);
648 					}
649 					ssyms[ssmax].cc_cs = cs;
650 					ssyms[ssmax].cc_ns = ns;
651 					ssyms[ssmax].cc_ce = ce;
652 					ssyms[ssmax++].cc_ne = ne;
653 					cs = HAT + 1;
654 					ns = hatcn;
655 					ce = WC_VERY_LARGE;
656 					ne = MAX_CODESET;
657 					if (! ccln_member(ns, cs, ne, ce,
658 								isyms, ismax) &&
659 						! ccln_member(ns, cs, ne, ce,
660 							symbol, symax)) {
661 						symax = insert_table(symbol,
662 									symax,
663 									ns, cs,
664 									ne, ce);
665 					}
666 					ssyms[ssmax].cc_cs = cs;
667 					ssyms[ssmax].cc_ns = ns;
668 					ssyms[ssmax].cc_ce = ce;
669 					ssyms[ssmax++].cc_ne = ne;
670 					break;
671 				case CCL:
672 					cs = HAT;
673 					ns = hatcn;
674 					for (p = (ccl_chars_t *)right(cp);
675 						p->cc_cs; p++) {
676 						if ((p->cc_ns != ns ||
677 							p->cc_cs != cs) &&
678 							! ccln_member(p->cc_ns,
679 							p->cc_cs, p->cc_ne,
680 						p->cc_ce, isyms, ismax) &&
681 						!ccln_member(p->cc_ns, p->cc_cs,
682 						p->cc_ne, p->cc_ce, symbol,
683 						symax)) {
684 							symax = insert_table(
685 						symbol, symax, p->cc_ns,
686 						p->cc_cs, p->cc_ne, p->cc_ce);
687 						}
688 						ssyms[ssmax++] = *p;
689 					}
690 					break;
691 				case NCCL:
692 					ns = 0;
693 					cs = WC_VERY_SMALL;
694 		for (p = (ccl_chars_t *)right(cp); p->cc_cs; p++) {
695 			if ((p->cc_ns != hatcn || p->cc_cs != HAT) &&
696 					! ccln_member(ns, cs, p->cc_ns,
697 					p->cc_cs-1, isyms, ismax) &&
698 					! ccln_member(ns, cs, p->cc_ns,
699 					p->cc_cs-1, symbol, symax)) {
700 				symax = insert_table(symbol,
701 					symax, ns, cs, p->cc_ns, p->cc_cs-1);
702 						}
703 						ssyms[ssmax].cc_ns = ns;
704 						ssyms[ssmax].cc_cs = cs;
705 						ssyms[ssmax].cc_ne = p->cc_ns;
706 						ssyms[ssmax++].cc_ce
707 								= p->cc_cs-1;
708 						if (p->cc_ce == (wchar_t)0x0) {
709 							ns = p->cc_ns;
710 							cs = p->cc_cs + 1;
711 
712 						} else {
713 							ns = p->cc_ne;
714 							cs = p->cc_ce + 1;
715 						}
716 					}
717 		if ((ns != hatcn || cs != HAT) && ! ccln_member(ns, cs,
718 				MAX_CODESET, WC_VERY_LARGE, isyms, ismax) &&
719 				! ccln_member(ns, cs, MAX_CODESET,
720 					WC_VERY_LARGE, symbol, symax)) {
721 			symax = insert_table(symbol, symax, ns, cs,
722 								MAX_CODESET,
723 								WC_VERY_LARGE);
724 					}
725 					ssyms[ssmax].cc_ns = ns;
726 					ssyms[ssmax].cc_cs = cs;
727 					ssyms[ssmax].cc_ne = MAX_CODESET;
728 					ssyms[ssmax++].cc_ce = WC_VERY_LARGE;
729 					break;
730 			}
731 		}
732 		for (j = 0; j < ssmax; j++) {	/* nextstate(s, ssyms[j]) */
733 			ns = ssyms[j].cc_ns;
734 			cs = ssyms[j].cc_cs;
735 			ne = ssyms[j].cc_ne;
736 			ce = ssyms[j].cc_ce;
737 dprintf("j = %d, cs = %o, ce = %o\n", j, cs, ce);
738 			symax = delete_table(symbol, symax, ns, cs, ne, ce);
739 			setcnt = 0;
740 			for (k = 0; k <= line; k++) setvec[k] = 0;
741 			for (i = 0; i < spmax; i++) {
742 				index[sposns[i]] = 0;
743 				cp = point[sposns[i]];
744 				if ((k = type(cp)) != FINAL) {
745 					if (k == CHAR && ns == ne && cs == ce &&
746 						cs == (int)right(cp) ||
747 						k == DOT || k == CCL &&
748 						ccl_member(ns, cs, ne, ce,
749 						(ccl_chars_t *)right(cp)) ||
750 						k == NCCL &&
751 						!ccl_member(ns, cs, ne, ce,
752 						(ccl_chars_t *)right(cp))) {
753 						ptr = foll[sposns[i]];
754 						num = *ptr;
755 						for (k = 0; k < num; k++) {
756 						if (setvec[*(++ptr)] != 1 &&
757 							iposns[*ptr] != 1) {
758 							setvec[*ptr] = 1;
759 								setcnt++;
760 							}
761 						}
762 					}
763 				}
764 			} /* end nextstate */
765 			if (notin(state, n, &prev)) {
766 				if (n >= NSTATES - 1) {
767 		printf("cgotofn: notin; state = %d, n = %d\n", state, n, NULL);
768 					overflo();
769 				}
770 				state[++n] = add(setcnt);
771 				dprintf("	delta(%d,[%o,%o])",
772 					s, cs, ce);
773 				dprintf(" = %d, ind = %d\n", n, ind+1, NULL);
774 				fatab[++ind].cc.cc_ns = ns;
775 				fatab[ind].cc.cc_cs = cs;
776 				fatab[ind].cc.cc_ne = ne;
777 				fatab[ind].cc.cc_ce = ce;
778 				fatab[ind].n = n;
779 				numtrans++;
780 			} else {
781 				if (prev != 0) {
782 					dprintf("	delta(%d,[%o,%o])",
783 						s, cs, ce);
784 					dprintf("= %d, ind = %d\n",
785 						prev, ind+1, NULL);
786 					fatab[++ind].cc.cc_ns = ns;
787 					fatab[ind].cc.cc_cs = cs;
788 					fatab[ind].cc.cc_ne = ne;
789 					fatab[ind].cc.cc_ce = ce;
790 					fatab[ind].n = prev;
791 					numtrans++;
792 				}
793 			}
794 		}
795 	tenter:
796 		if ((pfa = (struct fa *)malloc((numtrans + 1)
797 						* sizeof (struct fa))) == NULL)
798 			overflo();
799 		where[s] = pfa;
800 		if (finflg)
801 			pfa->cc.h = -1;		/* s is a final state */
802 		else
803 			pfa->cc.h = numtrans;
804 		pfa->st = 0;
805 		for (i = 1, pfa += 1; i <= numtrans; i++, pfa++) {
806 			pfa->cc.s = fatab[i].cc;
807 			pfa->st = (struct fa *)fatab[i].n;
808 		}
809 	}
810 	for (i = 0; i <= n; i++) {
811 		if (i != 0)	/* state[0] is freed later in freetr() */
812 			xfree(state[i]);	/* free state[i] */
813 		pfa = where[i];
814 		pfa->st = where[0];
815 		dprintf("state %d: (%o)\n", i, pfa, NULL);
816 		dprintf("	numtrans = %d,	default = %o\n",
817 			pfa->cc.h, pfa->st, NULL);
818 		for (k = 1; k <= pfa->cc.h; k++) {
819 			(pfa+k)->st = where[(int)(pfa+k)->st];
820 			dprintf("	char = [%o,%o],	nextstate = %o\n",
821 				(pfa+k)->cc.s.cc_cs, (pfa+k)->cc.s.cc_ce,
822 				(pfa+k)->st);
823 		}
824 	}
825 	pfa = where[0];
826 	if ((num = pfa->cc.h) < 0)
827 		return (where[0]);
828 	for (pfa += num; num; num--, pfa--)
829 		if (pfa->cc.s.cc_ns == hatcn && pfa->cc.s.cc_cs == HAT) {
830 			return (pfa->st);
831 		}
832 	return (where[0]);
833 }
834 
835 
836 /*
837  * Insert CCL entry to CCL table with maintain optimized order.
838  */
839 static int
840 insert_table(table_base, table_size, ns, cs, ne, ce)
841 ccl_chars_t		*table_base;
842 register int		table_size;
843 register int		ns, ne;
844 register wchar_t	cs, ce;
845 {
846 	register int		i;
847 	register int		tns, tne;
848 	register wchar_t	tcs, tce;
849 	register ccl_chars_t	*table;
850 	register ccl_chars_t	*saved_table;
851 	int			saved_i;
852 
853 
854 
855 
856 	dprintf("Inserting {%o, %o} to table %o\n", cs, ce, table_base);
857 	/*
858 	 * Searching the table to find out where should put the new item.
859 	 */
860 	for (i = 0, table = table_base; i < table_size; i++, table++) {
861 		tns = table->cc_ns;
862 		tcs = table->cc_cs;
863 		tne = table->cc_ne;
864 		tce = table->cc_ce;
865 		if (MLCMPLT(ne, ce, tns, (tcs - 1))) {
866 			/*
867 			 * Quick! insert to font of current table entries.
868 			 */
869 qinsert:
870 			table_size++;
871 			for (; i < table_size; i++, table++) {
872 				tns = table->cc_ns;
873 				tcs = table->cc_cs;
874 				tne = table->cc_ne;
875 				tce = table->cc_ce;
876 				table->cc_ns = ns;
877 				table->cc_cs = cs;
878 				table->cc_ne = ne;
879 				table->cc_ce = ce;
880 				ns = tns;
881 				cs = tcs;
882 				ne = tne;
883 				ce = tce;
884 			}
885 			goto add_null;
886 		} else if (MLCMPLE(tns, (tcs - 1), ns, cs) &&
887 				MLCMPLE(ns, cs, tne, (tce + 1))) {
888 			/*
889 			 * Starting point is within the current entry.
890 			 */
891 			if (MLCMPGT(tns, tcs, ns, cs)) {
892 				table->cc_ns = ns;
893 				table->cc_cs = cs;
894 			}
895 			if (MLCMPLE(ne, ce, tne, tce)) {
896 				return (table_size);
897 			}
898 			goto combine;
899 		}
900 	}
901 
902 
903 	/*
904 	 * Adding new one to end of table.
905 	 */
906 	table->cc_ns = ns;
907 	table->cc_cs = cs;
908 	table->cc_ne = ne;
909 	table->cc_ce = ce;
910 
911 
912 	table_size++;
913 	goto add_null;
914 
915 
916 
917 
918 	combine:
919 	/*
920 	 * Check and try to combine the new entry with rest of entries.
921 	 */
922 	if ((i + 1) >= table_size) {
923 		table->cc_ne = ne;
924 		table->cc_ce = ce;
925 		return (table_size);
926 	}
927 
928 
929 	saved_table = table++;
930 	saved_i = i++;
931 
932 
933 	/*
934 	 * Finding the spot where we should put the end point.
935 	 */
936 	for (; i < table_size; i++, table++) {
937 		if (MLCMPLT(ne, ce, table->cc_ns, (table->cc_cs - 1))) {
938 			break;
939 		} else
940 		if (MLCMPLE(table->cc_ns, (table->cc_cs - 1), ne, ce) &&
941 			MLCMPLE(ne, ce, table->cc_ne, (table->cc_ce + 1))) {
942 			/*
943 			 * Tack with this table.
944 			 */
945 			if (MLCMPLT(ne, ce, table->cc_ne, table->cc_ce)) {
946 				ne = table->cc_ne;
947 				ce = table->cc_ce;
948 			}
949 			table++;
950 			i++;
951 			break;
952 		}
953 	}
954 
955 
956 	saved_table->cc_ne = ne;
957 	saved_table->cc_ce = ce;
958 	saved_i = table_size - (i - saved_i - 1);
959 
960 
961 	/*
962 	 * Moving the rest of entries.
963 	 */
964 	for (; i < table_size; i++, table++)
965 		*(++saved_table) = *table;
966 	table_size = saved_i;
967 
968 
969 add_null:
970 	table_base[table_size].cc_cs = (wchar_t)0x0;
971 	table_base[table_size].cc_ce = (wchar_t)0x0;
972 
973 
974 	return (table_size);
975 }
976 
977 
978 
979 
980 static int
981 delete_table(table_base, table_size, ns, cs, ne, ce)
982 ccl_chars_t		*table_base;
983 register int		table_size;
984 register int		ns, ne;
985 register wchar_t	cs, ce;
986 {
987 	register int		i;
988 	int			saved_i;
989 	register ccl_chars_t	*table;
990 	register ccl_chars_t	*saved_table;
991 	register int		tns;
992 	register wchar_t	tcs;
993 	register int		tne;
994 	register wchar_t	tce;
995 
996 
997 
998 
999 	for (i = 0, table = table_base; i < table_size; i++, table++) {
1000 		tns = table->cc_ns;
1001 		tcs = table->cc_cs;
1002 		tne = table->cc_ne;
1003 		tce = table->cc_ce;
1004 		if (MLCMPLT(ne, ce, tns, tcs))
1005 			return (table_size);
1006 		else if (MLCMPLT(ne, ce, tne, tce)) {
1007 			if (MLCMPLE(ns, cs, tns, tcs)) {
1008 				/*
1009 				 * Shrink type 1.
1010 				 */
1011 				table->cc_ns = ne;
1012 				table->cc_cs = ce + 1;
1013 				return (table_size);
1014 
1015 			} else {
1016 				/*
1017 				 * Spliting !!
1018 				 */
1019 				table->cc_ns = ne;
1020 				table->cc_cs = ce + 1;
1021 				tne = ns;
1022 				tce = cs - 1;
1023 				table_size++;
1024 				for (; i < table_size; i++, table++) {
1025 					ns = table->cc_ns;
1026 					cs = table->cc_cs;
1027 					ne = table->cc_ne;
1028 					ce = table->cc_ce;
1029 					table->cc_ns = tns;
1030 					table->cc_cs = tcs;
1031 					table->cc_ne = tne;
1032 					table->cc_ce = tce;
1033 					tns = ns;
1034 					tcs = cs;
1035 					tne = ne;
1036 					tce = ce;
1037 				}
1038 				return (table_size);
1039 			}
1040 
1041 		} else if (MLCMPLE(ns, cs, tne, tce)) {
1042 			if (MLCMPGT(ns, cs, tns, tcs)) {
1043 				/*
1044 				 * Shrink current table(type 2).
1045 				 */
1046 				table->cc_ne = ns;
1047 				table->cc_ce = cs - 1;
1048 				table++;
1049 				i++;
1050 			}
1051 			/*
1052 			 * Search for the end point.
1053 			 */
1054 			saved_i = i;
1055 			saved_table = table;
1056 			for (; i < table_size; i++, table++) {
1057 				if (MLCMPLT(ne, ce,
1058 						table->cc_ns, table->cc_cs)) {
1059 					/*
1060 					 * Easy point, no shrinks!
1061 					 */
1062 					break;
1063 
1064 				} else if (MLCMPGT(table->cc_ne, table->cc_ce,
1065 						ne, ce)) {
1066 					/*
1067 					 * Shrinking...
1068 					 */
1069 					table->cc_ns = ne;
1070 					table->cc_cs = ce + 1;
1071 					break;
1072 				}
1073 
1074 
1075 			}
1076 			/*
1077 			 * Moving(removing) backword.
1078 			 */
1079 			saved_i = table_size - (i - saved_i);
1080 			for (; i < table_size; i++)
1081 				*saved_table++ = *table++;
1082 			return (saved_i);
1083 		}
1084 	}
1085 	return (table_size);
1086 }
1087 
1088 
1089 #ifdef DEBUG
1090 dump_table(table, size)
1091 register ccl_chars_t	*table;
1092 register int		size;
1093 {
1094 	register int	i;
1095 
1096 
1097 
1098 
1099 	if (! dbg)
1100 		return;
1101 
1102 
1103 	printf("Duming table %o with size %d\n", table, size);
1104 	size++;	/* To watch out NULL */
1105 	for (i = 0; i < size; i++, table++)
1106 	{
1107 		printf("{%3o, %3o}, ", table->cc_cs, table->cc_ce);
1108 	}
1109 	printf("\n");
1110 }
1111 #endif /* DEBUG */
1112 
1113 
1114 
1115 
1116 match(pfa, p)
1117 register struct fa *pfa;
1118 register wchar_t *p;
1119 {
1120 	register count;
1121 	register int n, ns, ne;
1122 	register wchar_t c, cs, ce;
1123 
1124 
1125 	if (p == 0)
1126 		return (0);
1127 	if (pfa->cc.h == 1) { /* fast test for first character, if possible */
1128 		ns = (++pfa)->cc.s.cc_ns;
1129 		cs = (pfa)->cc.s.cc_cs;
1130 		ne = (pfa)->cc.s.cc_ne;
1131 		ce = (pfa)->cc.s.cc_ce;
1132 		do {
1133 			c = *p;
1134 			n = wcsetno(c);
1135 			if (MLCMPLE(ns, cs, n, c) &&
1136 				MLCMPLE(n, c, ne, ce)) {
1137 				p++;
1138 				pfa = pfa->st;
1139 				goto adv;
1140 			}
1141 		} while (*p++ != 0);
1142 		return (0);
1143 	}
1144 	adv: if ((count = pfa->cc.h) < 0)
1145 		return (1);
1146 	do {
1147 		c = *p;
1148 		n = wcsetno(c);
1149 		for (pfa += count; count; count--, pfa--) {
1150 			ns = (pfa)->cc.s.cc_ns;
1151 			cs = (pfa)->cc.s.cc_cs;
1152 			ne = (pfa)->cc.s.cc_ne;
1153 			ce = (pfa)->cc.s.cc_ce;
1154 			if (MLCMPLE(ns, cs, n, c) && MLCMPLE(n, c, ne, ce))
1155 				break;
1156 		}
1157 		pfa = pfa->st;
1158 		if ((count = pfa->cc.h) < 0)
1159 			return (1);
1160 	} while (*p++ != 0);
1161 	return (0);
1162 }
1163