xref: /illumos-gate/usr/src/cmd/oawk/b.c (revision 533affcbc7fc4d0c8132976ea454aaa715fe2307)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 
23 /*
24  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
25  * Use is subject to license terms.
26  */
27 
28 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
29 /*	  All Rights Reserved  	*/
30 
31 #include "awk.def"
32 #include "stdio.h"
33 #include "awk.h"
34 #include <stdlib.h>
35 
36 
37 extern NODE *op2();
38 extern struct fa *cgotofn();
39 #define	MAXLIN 256
40 #define	NCHARS 128
41 #define	NSTATES 256
42 
43 
44 #define	type(v)	v->nobj
45 #define	left(v)	v->narg[0]
46 #define	right(v)	v->narg[1]
47 #define	parent(v)	v->nnext
48 
49 
50 #define	LEAF	case CCL: case NCCL: case CHAR: case DOT:
51 #define	UNARY	case FINAL: case STAR: case PLUS: case QUEST:
52 
53 
54 /*
55  * encoding in tree NODEs:
56  * leaf (CCL, NCCL, CHAR, DOT): left is index,
57  * right contains value or pointer to value
58  * unary (FINAL, STAR, PLUS, QUEST): left is child, right is null
59  * binary (CAT, OR): left and right are children
60  * parent contains pointer to parent
61  */
62 
63 
64 struct fa {
65 union {
66 		ccl_chars_t s;
67 		int h;
68 	} cc;
69 #define	MLCMPLT(m1, l1, m2, l2) ((m1 != m2 &&\
70 				(int)m1 < (int)m2) ||\
71 				(m1 == m2 && (int)l1 < (int)l2))
72 #define	MLCMPLE(m1, l1, m2, l2) ((m1 != m2 &&\
73 				(int)m1 <= (int)m2) ||\
74 				(m1 == m2 && (int)l1 <= (int)l2))
75 #define	MLCMPGT(m1, l1, m2, l2) ((m1 != m2 &&\
76 				(int)m1 > (int)m2) ||\
77 				(m1 == m2 && (int)l1 > (int)l2))
78 #define	MAX_CODESET	3
79 	struct fa *st;
80 };
81 
82 
83 int	*state[NSTATES];
84 int	*foll[MAXLIN];
85 int	setvec[MAXLIN];
86 NODE	*point[MAXLIN];
87 
88 
89 int	setcnt;
90 int	line;
91 
92 
93 static int	ccln_member();
94 static int	insert_table();
95 static int	delete_table();
96 static void	penter(NODE *p);
97 static void	follow(NODE *v);
98 static void	overflo(void);
99 static void	cfoll(NODE *v);
100 static void	freetr(NODE *p);
101 #ifdef DEBUG
102 #define	ddump_table(t, s)	dump_table(t, s)
103 #else
104 #define	ddump_table(t, s)
105 #endif
106 
107 struct fa *
108 makedfa(p)	/* returns dfa for tree pointed to by p */
109 NODE *p;
110 {
111 	NODE *p1;
112 	struct fa *fap;
113 	p1 = op2(CAT, op2(STAR, op2(DOT, (NODE *) 0,
114 		(NODE *) 0), (NODE *) 0), p);
115 		/* put DOT STAR in front of reg. exp. */
116 	p1 = op2(FINAL, p1, (NODE *) 0);	/* install FINAL NODE */
117 
118 
119 	line = 0;
120 	penter(p1);	/* enter parent pointers and leaf indices */
121 	point[line] = p1;	/* FINAL NODE */
122 	setvec[0] = 1;		/* for initial DOT STAR */
123 	cfoll(p1);	/* set up follow sets */
124 	fap = cgotofn();
125 	freetr(p1);	/* add this when alloc works */
126 	return (fap);
127 }
128 
129 static void
130 penter(NODE *p)	/* set up parent pointers and leaf indices */
131 {
132 	switch (type(p)) {
133 		LEAF
134 			left(p) = (NODE *)line;
135 			point[line++] = p;
136 			break;
137 		UNARY
138 			penter(left(p));
139 			parent(left(p)) = p;
140 			break;
141 		case CAT:
142 		case OR:
143 			penter(left(p));
144 			penter(right(p));
145 			parent(left(p)) = p;
146 			parent(right(p)) = p;
147 			break;
148 		default:
149 			error(FATAL, "unknown type %d in penter\n", type(p));
150 			break;
151 	}
152 }
153 
154 static void
155 freetr(NODE *p)	/* free parse tree and follow sets */
156 {
157 	switch (type(p)) {
158 		LEAF
159 			xfree(foll[(int)left(p)]);
160 			xfree(p);
161 			break;
162 		UNARY
163 			freetr(left(p));
164 			xfree(p);
165 			break;
166 		case CAT:
167 		case OR:
168 			freetr(left(p));
169 			freetr(right(p));
170 			xfree(p);
171 			break;
172 		default:
173 			error(FATAL, "unknown type %d in freetr", type(p));
174 			break;
175 	}
176 }
177 ccl_chars_t *
178 cclenter(wchar_t *p)
179 {
180 	int 		i, cn;
181 	wchar_t		c, pc;
182 	wchar_t		*op;
183 	ccl_chars_t	*new;
184 	ccl_chars_t	chars[MAXLIN];
185 
186 	op = p;
187 	i = 0;
188 	while ((c = *p++) != 0) {
189 		if (c == '-' && i > 0)  {
190 			if (*p != 0) {
191 				/*
192 				 * If there are not in same code set,  the
193 				 * class should be ignore (make two independent
194 				 * characters)!
195 				 */
196 				c = *p++;
197 				cn = wcsetno(pc);
198 				if (cn != wcsetno(c) || pc > c)
199 					goto char_array;
200 				i = insert_table(chars, i, cn, pc, cn, c);
201 				continue;
202 			}
203 		}
204 char_array:
205 		if (i >= MAXLIN)
206 			overflo();
207 		cn = wcsetno(c);
208 		i = insert_table(chars, i, cn, c, cn, c);
209 		pc = c;
210 	}
211 	dprintf("cclenter: in = |%ws|, ", op, NULL, NULL);
212 	xfree(op);
213 	i = (i + 1) * sizeof (ccl_chars_t);
214 	if ((new = (ccl_chars_t *)malloc(i)) == NULL)
215 		error(FATAL, "out of space in cclenter on %s", op);
216 	(void) memcpy((char *)new, (char *)chars, i);
217 	ddump_table(chars, i / 4);
218 
219 
220 	return (new);
221 }
222 
223 static void
224 overflo(void)
225 {
226 	error(FATAL, "regular expression too long\n");
227 }
228 
229 static void
230 cfoll(NODE *v)	/* enter follow set of each leaf of vertex v into foll[leaf] */
231 {
232 	int i;
233 	int prev;
234 	int *add();
235 
236 
237 	switch (type(v)) {
238 		LEAF
239 			setcnt = 0;
240 			for (i = 1; i <= line; i++)
241 				setvec[i] = 0;
242 			follow(v);
243 			foll[(int)left(v)] = add(setcnt);
244 			break;
245 		UNARY
246 			cfoll(left(v));
247 			break;
248 		case CAT:
249 		case OR:
250 			cfoll(left(v));
251 			cfoll(right(v));
252 			break;
253 		default:
254 			error(FATAL, "unknown type %d in cfoll", type(v));
255 	}
256 }
257 
258 int
259 first(NODE *p)		/* collects initially active leaves of p into setvec */
260 	/* returns 0 or 1 depending on whether p matches empty string */
261 {
262 	int b;
263 
264 
265 	switch (type(p)) {
266 		LEAF
267 			if (setvec[(int)left(p)] != 1) {
268 				setvec[(int)left(p)] = 1;
269 				setcnt++;
270 			}
271 			if (type(p) == CCL &&
272 			(*(ccl_chars_t *)right(p)).cc_cs == (wchar_t)0x0)
273 				return (0);		/* empty CCL */
274 			else return (1);
275 		case FINAL:
276 		case PLUS:
277 			if (first(left(p)) == 0)
278 				return (0);
279 			return (1);
280 		case STAR:
281 		case QUEST:
282 			first(left(p));
283 			return (0);
284 		case CAT:
285 			if (first(left(p)) == 0 && first(right(p)) == 0)
286 				return (0);
287 			return (1);
288 		case OR:
289 			b = first(right(p));
290 			if (first(left(p)) == 0 || b == 0)
291 				return (0);
292 			return (1);
293 	}
294 	error(FATAL, "unknown type %d in first\n", type(p));
295 	return (-1);
296 }
297 
298 static void
299 follow(NODE *v)
300 		/* collects leaves that can follow v into setvec */
301 {
302 	NODE *p;
303 
304 
305 	if (type(v) == FINAL)
306 		return;
307 	p = parent(v);
308 	switch (type(p)) {
309 		case STAR:
310 		case PLUS:	first(v);
311 				follow(p);
312 				return;
313 
314 
315 		case OR:
316 		case QUEST:	follow(p);
317 				return;
318 
319 
320 		case CAT:	if (v == left(p)) { /* v is left child of p */
321 					if (first(right(p)) == 0) {
322 						follow(p);
323 						return;
324 					}
325 				} else		/* v is right child */
326 					follow(p);
327 				return;
328 		case FINAL:	if (setvec[line] != 1) {
329 					setvec[line] = 1;
330 					setcnt++;
331 				}
332 				return;
333 	}
334 }
335 
336 
337 /*
338  * There are three type of functions for checking member ship.  Because I have
339  * been changed structure of CCL tables.  And some CCL tables end up with NULLs
340  * but someone has length and will includes NULLs in table as one of data.
341  * Please note, CCL table which has a length data and data will include NULLs,
342  * it only used within a this source file("b.c").
343  */
344 
345 int				/* is cs thru ce in s? */
346 ccl_member(int ns, wchar_t cs, int ne, wchar_t ce, ccl_chars_t *s)
347 {
348 	/*
349 	 * The specified range(cs, ce) must be beside the range between
350 	 * s->cc_start and s->cc_end to determine member.
351 	 */
352 	while (s->cc_cs || s->cc_ce) {
353 		if (MLCMPLE(s->cc_ns, s->cc_cs, ns, cs) &&
354 				MLCMPLE(ne, ce, s->cc_ne, s->cc_ce))
355 			return (1);
356 		s++;
357 	}
358 	return (0);
359 }
360 
361 
362 static int			/* is cs thru ce in s? */
363 ccln_member(int ns, wchar_t cs, int ne, wchar_t ce, ccl_chars_t *s, int n)
364 {
365 	/*
366 	 * The specified range(cs, ce) must be beside the range between
367 	 * s->cc_start and s->cc_end to determine member.
368 	 */
369 	while (n-- > 0) {
370 		if (MLCMPLE(s->cc_ns, s->cc_cs, ns, cs) &&
371 				MLCMPLE(ne, ce, s->cc_ne, s->cc_ce))
372 			return (1);
373 		s++;
374 	}
375 	return (0);
376 }
377 
378 
379 int
380 member(wchar_t c, wchar_t *s)	/* is c in s? */
381 {
382 	while (*s)
383 		if (c == *s++)
384 			return (1);
385 	return (0);
386 }
387 
388 int
389 notin(int **array, int n, int *prev) /* is setvec in array[0] thru array[n]? */
390 {
391 	int i, j;
392 	int *ptr;
393 	for (i = 0; i <= n; i++) {
394 		ptr = array[i];
395 		if (*ptr == setcnt) {
396 			for (j = 0; j < setcnt; j++)
397 				if (setvec[*(++ptr)] != 1) goto nxt;
398 			*prev = i;
399 			return (0);
400 		}
401 		nxt: /* dummy */;
402 	}
403 	return (1);
404 }
405 
406 
407 int *
408 add(int n)
409 {		/* remember setvec */
410 	int *ptr, *p;
411 	int i;
412 	if ((p = ptr = (int *)malloc((n+1)*sizeof (int))) == NULL)
413 		overflo();
414 	*ptr = n;
415 	dprintf("add(%d)\n", n, NULL, NULL);
416 	for (i = 1; i <= line; i++)
417 		if (setvec[i] == 1) {
418 			*(++ptr) = i;
419 		dprintf("  ptr = %o, *ptr = %d, i = %d\n", ptr, *ptr, i);
420 		}
421 	dprintf("\n", NULL, NULL, NULL);
422 	return (p);
423 }
424 
425 
426 struct fa *
427 cgotofn()
428 {
429 	int i, k;
430 	int *ptr;
431 	int ns, ne;
432 	wchar_t cs, ce;
433 	ccl_chars_t *p;
434 	NODE *cp;
435 	int j, n, s, ind, numtrans;
436 	int finflg;
437 	int curpos, num, prev;
438 	struct fa *where[NSTATES];
439 
440 
441 	struct {
442 		ccl_chars_t	cc;
443 		int		n;
444 	} fatab[257];
445 	struct fa *pfa;
446 
447 
448 	char index[MAXLIN];
449 	char iposns[MAXLIN];
450 	int sposns[MAXLIN];
451 	int spmax, spinit;
452 	ccl_chars_t symbol[NCHARS];
453 	ccl_chars_t isyms[NCHARS];
454 	ccl_chars_t ssyms[NCHARS];
455 	int ssmax, symax, ismax, ssinit;
456 
457 
458 	wchar_t hat;
459 	int hatcn;
460 
461 
462 	for (i = 0; i <= line; i++) index[i] = iposns[i] = setvec[i] = 0;
463 	isyms[0].cc_cs = isyms[0].cc_ce = (wchar_t)0x0;
464 	for (i = 0; i < NCHARS; i++)
465 		isyms[i] = symbol[i] = ssyms[i] = isyms[0];
466 	symax = 0;
467 	setcnt = 0;
468 	/* compute initial positions and symbols of state 0 */
469 	ismax = 0;
470 	ssmax = 0;
471 	ptr = state[0] = foll[0];
472 	spinit = *ptr;
473 	hat = HAT;
474 	hatcn = wcsetno(hat);
475 	for (i = 0; i < spinit; i++) {
476 		curpos = *(++ptr);
477 		sposns[i] = curpos;
478 		iposns[curpos] = 1;
479 		cp = point[curpos];
480 		dprintf("i= %d, spinit = %d, curpos = %d\n", i, spinit, curpos);
481 		switch (type(cp)) {
482 			case CHAR:
483 				k = (int)right(cp);
484 				ns = wcsetno(k);
485 				if (! ccln_member(ns, k, ns, k,
486 							isyms, ismax)) {
487 					ismax = insert_table(isyms, ismax,
488 								ns, k, ns, k);
489 				}
490 				ssyms[ssmax].cc_ns = ns;
491 				ssyms[ssmax].cc_cs = k;
492 				ssyms[ssmax].cc_ne = ns;
493 				ssyms[ssmax++].cc_ce = k;
494 				break;
495 			case DOT:
496 				cs = WC_VERY_SMALL;
497 				ns = 0;
498 				ce = HAT - 1;
499 				ne = hatcn;
500 				if (! ccln_member(ns, cs, ne, ce,
501 							isyms, ismax)) {
502 					ismax = insert_table(isyms, ismax,
503 								ns, cs, ne, ce);
504 				}
505 				ssyms[ssmax].cc_cs = cs;
506 				ssyms[ssmax].cc_ns = ns;
507 				ssyms[ssmax].cc_ce = ce;
508 				ssyms[ssmax++].cc_ne = ne;
509 				cs = HAT + 1;
510 				ns = hatcn;
511 				ce = WC_VERY_LARGE;
512 				ne = MAX_CODESET;
513 				if (! ccln_member(ns, cs, ne, ce,
514 							isyms, ismax)) {
515 					ismax = insert_table(isyms, ismax,
516 								ns, cs, ne, ce);
517 				}
518 				ssyms[ssmax].cc_cs = cs;
519 				ssyms[ssmax].cc_ns = ns;
520 				ssyms[ssmax].cc_ce = ce;
521 				ssyms[ssmax++].cc_ne = ne;
522 				break;
523 			case CCL:
524 				cs = HAT;
525 				ns = hatcn;
526 				for (p = (ccl_chars_t *)right(cp);
527 					p->cc_cs; p++) {
528 					if ((p->cc_ns != ns ||\
529 					p->cc_cs != cs) &&\
530 				!ccln_member(p->cc_ns, p->cc_cs,
531 				p->cc_ne, p->cc_ce, isyms, ismax)) {
532 						ismax = insert_table(isyms,
533 				ismax, p->cc_ns, p->cc_cs, p->cc_ne, p->cc_ce);
534 					}
535 					ssyms[ssmax++] = *p;
536 				}
537 				break;
538 			case NCCL:
539 				ns = 0;
540 				cs = WC_VERY_SMALL;
541 				for (p = (ccl_chars_t *)right(cp);
542 					p->cc_cs; p++) {
543 					if ((ns != hatcn || p->cc_cs != HAT) &&
544 						! ccln_member(ns, cs,
545 							p->cc_ns, p->cc_cs-1,
546 								isyms, ismax)) {
547 						ismax = insert_table(isyms,
548 								ismax,
549 								ns, cs,
550 								p->cc_ns,
551 								p->cc_cs-1);
552 					}
553 					ssyms[ssmax].cc_ns = ns;
554 					ssyms[ssmax].cc_cs = cs;
555 					ssyms[ssmax].cc_ne = p->cc_ns;
556 					ssyms[ssmax++].cc_ce = p->cc_cs-1;
557 					if (p->cc_ce == (wchar_t)0x0) {
558 						ns = p->cc_ns;
559 						cs = p->cc_cs + 1;
560 
561 					} else {
562 						ns = p->cc_ne;
563 						cs = p->cc_ce + 1;
564 					}
565 				}
566 				if ((ns != hatcn || cs != HAT) &&
567 					! ccln_member(ns, cs,
568 						MAX_CODESET, WC_VERY_LARGE,
569 							isyms, ismax)) {
570 					ismax = insert_table(isyms, ismax,
571 							ns, cs, MAX_CODESET,
572 							WC_VERY_LARGE);
573 				}
574 				ssyms[ssmax].cc_ns = ns;
575 				ssyms[ssmax].cc_cs = cs;
576 				ssyms[ssmax].cc_ne = MAX_CODESET;
577 				ssyms[ssmax++].cc_ce = WC_VERY_LARGE;
578 				break;
579 		}
580 	}
581 	ssinit = ssmax;
582 	symax = 0;
583 	n = 0;
584 	for (s = 0; s <= n; s++)  {
585 		dprintf("s = %d\n", s, NULL, NULL);
586 		ind = 0;
587 		numtrans = 0;
588 		finflg = 0;
589 		if (*(state[s] + *state[s]) == line) {		/* s final? */
590 			finflg = 1;
591 			goto tenter;
592 		}
593 		spmax = spinit;
594 		ssmax = ssinit;
595 		ptr = state[s];
596 		num = *ptr;
597 		for (i = 0; i < num; i++) {
598 			curpos = *(++ptr);
599 			if (iposns[curpos] != 1 && index[curpos] != 1) {
600 				index[curpos] = 1;
601 				sposns[spmax++] = curpos;
602 			}
603 			cp = point[curpos];
604 			switch (type(cp)) {
605 				case CHAR:
606 					k = (int)right(cp);
607 					ns = wcsetno(k);
608 					if (! ccln_member(ns, k, ns, k,
609 							isyms, ismax) &&
610 						! ccln_member(ns, k, ns, k,
611 							symbol, symax)) {
612 						symax = insert_table(symbol,
613 									symax,
614 									ns, k,
615 									ns, k);
616 					}
617 					ssyms[ssmax].cc_ns = ns;
618 					ssyms[ssmax].cc_cs = k;
619 					ssyms[ssmax].cc_ne = ns;
620 					ssyms[ssmax++].cc_ce = k;
621 					break;
622 				case DOT:
623 					cs = WC_VERY_SMALL;
624 					ns = 0;
625 					ce = HAT - 1;
626 					ne = hatcn;
627 					if (! ccln_member(ns, cs, ne, ce,
628 							isyms, ismax) &&
629 						! ccln_member(ns, cs, ne, ce,
630 							symbol, symax)) {
631 						symax = insert_table(symbol,
632 									symax,
633 									ns, cs,
634 									ne, ce);
635 					}
636 					ssyms[ssmax].cc_cs = cs;
637 					ssyms[ssmax].cc_ns = ns;
638 					ssyms[ssmax].cc_ce = ce;
639 					ssyms[ssmax++].cc_ne = ne;
640 					cs = HAT + 1;
641 					ns = hatcn;
642 					ce = WC_VERY_LARGE;
643 					ne = MAX_CODESET;
644 					if (! ccln_member(ns, cs, ne, ce,
645 								isyms, ismax) &&
646 						! ccln_member(ns, cs, ne, ce,
647 							symbol, symax)) {
648 						symax = insert_table(symbol,
649 									symax,
650 									ns, cs,
651 									ne, ce);
652 					}
653 					ssyms[ssmax].cc_cs = cs;
654 					ssyms[ssmax].cc_ns = ns;
655 					ssyms[ssmax].cc_ce = ce;
656 					ssyms[ssmax++].cc_ne = ne;
657 					break;
658 				case CCL:
659 					cs = HAT;
660 					ns = hatcn;
661 					for (p = (ccl_chars_t *)right(cp);
662 						p->cc_cs; p++) {
663 						if ((p->cc_ns != ns ||
664 							p->cc_cs != cs) &&
665 							! ccln_member(p->cc_ns,
666 							p->cc_cs, p->cc_ne,
667 						p->cc_ce, isyms, ismax) &&
668 						!ccln_member(p->cc_ns, p->cc_cs,
669 						p->cc_ne, p->cc_ce, symbol,
670 						symax)) {
671 							symax = insert_table(
672 						symbol, symax, p->cc_ns,
673 						p->cc_cs, p->cc_ne, p->cc_ce);
674 						}
675 						ssyms[ssmax++] = *p;
676 					}
677 					break;
678 				case NCCL:
679 					ns = 0;
680 					cs = WC_VERY_SMALL;
681 		for (p = (ccl_chars_t *)right(cp); p->cc_cs; p++) {
682 			if ((p->cc_ns != hatcn || p->cc_cs != HAT) &&
683 					! ccln_member(ns, cs, p->cc_ns,
684 					p->cc_cs-1, isyms, ismax) &&
685 					! ccln_member(ns, cs, p->cc_ns,
686 					p->cc_cs-1, symbol, symax)) {
687 				symax = insert_table(symbol,
688 					symax, ns, cs, p->cc_ns, p->cc_cs-1);
689 						}
690 						ssyms[ssmax].cc_ns = ns;
691 						ssyms[ssmax].cc_cs = cs;
692 						ssyms[ssmax].cc_ne = p->cc_ns;
693 						ssyms[ssmax++].cc_ce
694 								= p->cc_cs-1;
695 						if (p->cc_ce == (wchar_t)0x0) {
696 							ns = p->cc_ns;
697 							cs = p->cc_cs + 1;
698 
699 						} else {
700 							ns = p->cc_ne;
701 							cs = p->cc_ce + 1;
702 						}
703 					}
704 		if ((ns != hatcn || cs != HAT) && ! ccln_member(ns, cs,
705 				MAX_CODESET, WC_VERY_LARGE, isyms, ismax) &&
706 				! ccln_member(ns, cs, MAX_CODESET,
707 					WC_VERY_LARGE, symbol, symax)) {
708 			symax = insert_table(symbol, symax, ns, cs,
709 								MAX_CODESET,
710 								WC_VERY_LARGE);
711 					}
712 					ssyms[ssmax].cc_ns = ns;
713 					ssyms[ssmax].cc_cs = cs;
714 					ssyms[ssmax].cc_ne = MAX_CODESET;
715 					ssyms[ssmax++].cc_ce = WC_VERY_LARGE;
716 					break;
717 			}
718 		}
719 		for (j = 0; j < ssmax; j++) {	/* nextstate(s, ssyms[j]) */
720 			ns = ssyms[j].cc_ns;
721 			cs = ssyms[j].cc_cs;
722 			ne = ssyms[j].cc_ne;
723 			ce = ssyms[j].cc_ce;
724 dprintf("j = %d, cs = %o, ce = %o\n", j, cs, ce);
725 			symax = delete_table(symbol, symax, ns, cs, ne, ce);
726 			setcnt = 0;
727 			for (k = 0; k <= line; k++) setvec[k] = 0;
728 			for (i = 0; i < spmax; i++) {
729 				index[sposns[i]] = 0;
730 				cp = point[sposns[i]];
731 				if ((k = type(cp)) != FINAL) {
732 					if (k == CHAR && ns == ne && cs == ce &&
733 						cs == (int)right(cp) ||
734 						k == DOT || k == CCL &&
735 						ccl_member(ns, cs, ne, ce,
736 						(ccl_chars_t *)right(cp)) ||
737 						k == NCCL &&
738 						!ccl_member(ns, cs, ne, ce,
739 						(ccl_chars_t *)right(cp))) {
740 						ptr = foll[sposns[i]];
741 						num = *ptr;
742 						for (k = 0; k < num; k++) {
743 						if (setvec[*(++ptr)] != 1 &&
744 							iposns[*ptr] != 1) {
745 							setvec[*ptr] = 1;
746 								setcnt++;
747 							}
748 						}
749 					}
750 				}
751 			} /* end nextstate */
752 			if (notin(state, n, &prev)) {
753 				if (n >= NSTATES - 1) {
754 		printf("cgotofn: notin; state = %d, n = %d\n", state, n, NULL);
755 					overflo();
756 				}
757 				state[++n] = add(setcnt);
758 				dprintf("	delta(%d,[%o,%o])",
759 					s, cs, ce);
760 				dprintf(" = %d, ind = %d\n", n, ind+1, NULL);
761 				fatab[++ind].cc.cc_ns = ns;
762 				fatab[ind].cc.cc_cs = cs;
763 				fatab[ind].cc.cc_ne = ne;
764 				fatab[ind].cc.cc_ce = ce;
765 				fatab[ind].n = n;
766 				numtrans++;
767 			} else {
768 				if (prev != 0) {
769 					dprintf("	delta(%d,[%o,%o])",
770 						s, cs, ce);
771 					dprintf("= %d, ind = %d\n",
772 						prev, ind+1, NULL);
773 					fatab[++ind].cc.cc_ns = ns;
774 					fatab[ind].cc.cc_cs = cs;
775 					fatab[ind].cc.cc_ne = ne;
776 					fatab[ind].cc.cc_ce = ce;
777 					fatab[ind].n = prev;
778 					numtrans++;
779 				}
780 			}
781 		}
782 	tenter:
783 		if ((pfa = (struct fa *)malloc((numtrans + 1)
784 						* sizeof (struct fa))) == NULL)
785 			overflo();
786 		where[s] = pfa;
787 		if (finflg)
788 			pfa->cc.h = -1;		/* s is a final state */
789 		else
790 			pfa->cc.h = numtrans;
791 		pfa->st = 0;
792 		for (i = 1, pfa += 1; i <= numtrans; i++, pfa++) {
793 			pfa->cc.s = fatab[i].cc;
794 			pfa->st = (struct fa *)fatab[i].n;
795 		}
796 	}
797 	for (i = 0; i <= n; i++) {
798 		if (i != 0)	/* state[0] is freed later in freetr() */
799 			xfree(state[i]);	/* free state[i] */
800 		pfa = where[i];
801 		pfa->st = where[0];
802 		dprintf("state %d: (%o)\n", i, pfa, NULL);
803 		dprintf("	numtrans = %d,	default = %o\n",
804 			pfa->cc.h, pfa->st, NULL);
805 		for (k = 1; k <= pfa->cc.h; k++) {
806 			(pfa+k)->st = where[(int)(pfa+k)->st];
807 			dprintf("	char = [%o,%o],	nextstate = %o\n",
808 				(pfa+k)->cc.s.cc_cs, (pfa+k)->cc.s.cc_ce,
809 				(pfa+k)->st);
810 		}
811 	}
812 	pfa = where[0];
813 	if ((num = pfa->cc.h) < 0)
814 		return (where[0]);
815 	for (pfa += num; num; num--, pfa--)
816 		if (pfa->cc.s.cc_ns == hatcn && pfa->cc.s.cc_cs == HAT) {
817 			return (pfa->st);
818 		}
819 	return (where[0]);
820 }
821 
822 
823 /*
824  * Insert CCL entry to CCL table with maintain optimized order.
825  */
826 static int
827 insert_table(ccl_chars_t *table_base, int table_size, int ns, wchar_t cs,
828 	int ne, wchar_t ce)
829 {
830 	int		i;
831 	int		tns, tne;
832 	wchar_t		tcs, tce;
833 	ccl_chars_t	*table;
834 	ccl_chars_t	*saved_table;
835 	int		saved_i;
836 
837 
838 
839 
840 	dprintf("Inserting {%o, %o} to table %o\n", cs, ce, table_base);
841 	/*
842 	 * Searching the table to find out where should put the new item.
843 	 */
844 	for (i = 0, table = table_base; i < table_size; i++, table++) {
845 		tns = table->cc_ns;
846 		tcs = table->cc_cs;
847 		tne = table->cc_ne;
848 		tce = table->cc_ce;
849 		if (MLCMPLT(ne, ce, tns, (tcs - 1))) {
850 			/*
851 			 * Quick! insert to font of current table entries.
852 			 */
853 qinsert:
854 			table_size++;
855 			for (; i < table_size; i++, table++) {
856 				tns = table->cc_ns;
857 				tcs = table->cc_cs;
858 				tne = table->cc_ne;
859 				tce = table->cc_ce;
860 				table->cc_ns = ns;
861 				table->cc_cs = cs;
862 				table->cc_ne = ne;
863 				table->cc_ce = ce;
864 				ns = tns;
865 				cs = tcs;
866 				ne = tne;
867 				ce = tce;
868 			}
869 			goto add_null;
870 		} else if (MLCMPLE(tns, (tcs - 1), ns, cs) &&
871 				MLCMPLE(ns, cs, tne, (tce + 1))) {
872 			/*
873 			 * Starting point is within the current entry.
874 			 */
875 			if (MLCMPGT(tns, tcs, ns, cs)) {
876 				table->cc_ns = ns;
877 				table->cc_cs = cs;
878 			}
879 			if (MLCMPLE(ne, ce, tne, tce)) {
880 				return (table_size);
881 			}
882 			goto combine;
883 		}
884 	}
885 
886 
887 	/*
888 	 * Adding new one to end of table.
889 	 */
890 	table->cc_ns = ns;
891 	table->cc_cs = cs;
892 	table->cc_ne = ne;
893 	table->cc_ce = ce;
894 
895 
896 	table_size++;
897 	goto add_null;
898 
899 
900 
901 
902 	combine:
903 	/*
904 	 * Check and try to combine the new entry with rest of entries.
905 	 */
906 	if ((i + 1) >= table_size) {
907 		table->cc_ne = ne;
908 		table->cc_ce = ce;
909 		return (table_size);
910 	}
911 
912 
913 	saved_table = table++;
914 	saved_i = i++;
915 
916 
917 	/*
918 	 * Finding the spot where we should put the end point.
919 	 */
920 	for (; i < table_size; i++, table++) {
921 		if (MLCMPLT(ne, ce, table->cc_ns, (table->cc_cs - 1))) {
922 			break;
923 		} else
924 		if (MLCMPLE(table->cc_ns, (table->cc_cs - 1), ne, ce) &&
925 			MLCMPLE(ne, ce, table->cc_ne, (table->cc_ce + 1))) {
926 			/*
927 			 * Tack with this table.
928 			 */
929 			if (MLCMPLT(ne, ce, table->cc_ne, table->cc_ce)) {
930 				ne = table->cc_ne;
931 				ce = table->cc_ce;
932 			}
933 			table++;
934 			i++;
935 			break;
936 		}
937 	}
938 
939 
940 	saved_table->cc_ne = ne;
941 	saved_table->cc_ce = ce;
942 	saved_i = table_size - (i - saved_i - 1);
943 
944 
945 	/*
946 	 * Moving the rest of entries.
947 	 */
948 	for (; i < table_size; i++, table++)
949 		*(++saved_table) = *table;
950 	table_size = saved_i;
951 
952 
953 add_null:
954 	table_base[table_size].cc_cs = (wchar_t)0x0;
955 	table_base[table_size].cc_ce = (wchar_t)0x0;
956 
957 
958 	return (table_size);
959 }
960 
961 
962 
963 
964 static int
965 delete_table(ccl_chars_t *table_base, int table_size, int ns, wchar_t cs,
966 		int ne, wchar_t ce)
967 {
968 	int		i;
969 	int		saved_i;
970 	ccl_chars_t	*table;
971 	ccl_chars_t	*saved_table;
972 	int		tns;
973 	wchar_t		tcs;
974 	int		tne;
975 	wchar_t		tce;
976 
977 
978 
979 
980 	for (i = 0, table = table_base; i < table_size; i++, table++) {
981 		tns = table->cc_ns;
982 		tcs = table->cc_cs;
983 		tne = table->cc_ne;
984 		tce = table->cc_ce;
985 		if (MLCMPLT(ne, ce, tns, tcs))
986 			return (table_size);
987 		else if (MLCMPLT(ne, ce, tne, tce)) {
988 			if (MLCMPLE(ns, cs, tns, tcs)) {
989 				/*
990 				 * Shrink type 1.
991 				 */
992 				table->cc_ns = ne;
993 				table->cc_cs = ce + 1;
994 				return (table_size);
995 
996 			} else {
997 				/*
998 				 * Spliting !!
999 				 */
1000 				table->cc_ns = ne;
1001 				table->cc_cs = ce + 1;
1002 				tne = ns;
1003 				tce = cs - 1;
1004 				table_size++;
1005 				for (; i < table_size; i++, table++) {
1006 					ns = table->cc_ns;
1007 					cs = table->cc_cs;
1008 					ne = table->cc_ne;
1009 					ce = table->cc_ce;
1010 					table->cc_ns = tns;
1011 					table->cc_cs = tcs;
1012 					table->cc_ne = tne;
1013 					table->cc_ce = tce;
1014 					tns = ns;
1015 					tcs = cs;
1016 					tne = ne;
1017 					tce = ce;
1018 				}
1019 				return (table_size);
1020 			}
1021 
1022 		} else if (MLCMPLE(ns, cs, tne, tce)) {
1023 			if (MLCMPGT(ns, cs, tns, tcs)) {
1024 				/*
1025 				 * Shrink current table(type 2).
1026 				 */
1027 				table->cc_ne = ns;
1028 				table->cc_ce = cs - 1;
1029 				table++;
1030 				i++;
1031 			}
1032 			/*
1033 			 * Search for the end point.
1034 			 */
1035 			saved_i = i;
1036 			saved_table = table;
1037 			for (; i < table_size; i++, table++) {
1038 				if (MLCMPLT(ne, ce,
1039 						table->cc_ns, table->cc_cs)) {
1040 					/*
1041 					 * Easy point, no shrinks!
1042 					 */
1043 					break;
1044 
1045 				} else if (MLCMPGT(table->cc_ne, table->cc_ce,
1046 						ne, ce)) {
1047 					/*
1048 					 * Shrinking...
1049 					 */
1050 					table->cc_ns = ne;
1051 					table->cc_cs = ce + 1;
1052 					break;
1053 				}
1054 
1055 
1056 			}
1057 			/*
1058 			 * Moving(removing) backword.
1059 			 */
1060 			saved_i = table_size - (i - saved_i);
1061 			for (; i < table_size; i++)
1062 				*saved_table++ = *table++;
1063 			return (saved_i);
1064 		}
1065 	}
1066 	return (table_size);
1067 }
1068 
1069 
1070 #ifdef DEBUG
1071 dump_table(ccl_chars_t *table, int size)
1072 {
1073 	int	i;
1074 
1075 
1076 
1077 
1078 	if (! dbg)
1079 		return;
1080 
1081 
1082 	printf("Duming table %o with size %d\n", table, size);
1083 	size++;	/* To watch out NULL */
1084 	for (i = 0; i < size; i++, table++) {
1085 		printf("{%3o, %3o}, ", table->cc_cs, table->cc_ce);
1086 	}
1087 	printf("\n");
1088 }
1089 #endif /* DEBUG */
1090 
1091 
1092 
1093 int
1094 match(struct fa *pfa, wchar_t *p)
1095 {
1096 	int count;
1097 	int n, ns, ne;
1098 	wchar_t c, cs, ce;
1099 
1100 
1101 	if (p == 0)
1102 		return (0);
1103 	if (pfa->cc.h == 1) { /* fast test for first character, if possible */
1104 		ns = (++pfa)->cc.s.cc_ns;
1105 		cs = (pfa)->cc.s.cc_cs;
1106 		ne = (pfa)->cc.s.cc_ne;
1107 		ce = (pfa)->cc.s.cc_ce;
1108 		do {
1109 			c = *p;
1110 			n = wcsetno(c);
1111 			if (MLCMPLE(ns, cs, n, c) &&
1112 				MLCMPLE(n, c, ne, ce)) {
1113 				p++;
1114 				pfa = pfa->st;
1115 				goto adv;
1116 			}
1117 		} while (*p++ != 0);
1118 		return (0);
1119 	}
1120 	adv: if ((count = pfa->cc.h) < 0)
1121 		return (1);
1122 	do {
1123 		c = *p;
1124 		n = wcsetno(c);
1125 		for (pfa += count; count; count--, pfa--) {
1126 			ns = (pfa)->cc.s.cc_ns;
1127 			cs = (pfa)->cc.s.cc_cs;
1128 			ne = (pfa)->cc.s.cc_ne;
1129 			ce = (pfa)->cc.s.cc_ce;
1130 			if (MLCMPLE(ns, cs, n, c) && MLCMPLE(n, c, ne, ce))
1131 				break;
1132 		}
1133 		pfa = pfa->st;
1134 		if ((count = pfa->cc.h) < 0)
1135 			return (1);
1136 	} while (*p++ != 0);
1137 	return (0);
1138 }
1139