xref: /titanic_50/usr/src/cmd/oawk/b.c (revision 45effca3dd1ba645d6bd3955c8ecd0c7bb85298a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 
23 /*
24  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
25  * Use is subject to license terms.
26  */
27 
28 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
29 /*	  All Rights Reserved  	*/
30 
31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
32 
33 #include "awk.def"
34 #include "stdio.h"
35 #include "awk.h"
36 #include <stdlib.h>
37 
38 
39 extern NODE *op2();
40 extern struct fa *cgotofn();
41 #define	MAXLIN 256
42 #define	NCHARS 128
43 #define	NSTATES 256
44 
45 
46 #define	type(v)	v->nobj
47 #define	left(v)	v->narg[0]
48 #define	right(v)	v->narg[1]
49 #define	parent(v)	v->nnext
50 
51 
52 #define	LEAF	case CCL: case NCCL: case CHAR: case DOT:
53 #define	UNARY	case FINAL: case STAR: case PLUS: case QUEST:
54 
55 
56 /*
57  * encoding in tree NODEs:
58  * leaf (CCL, NCCL, CHAR, DOT): left is index,
59  * right contains value or pointer to value
60  * unary (FINAL, STAR, PLUS, QUEST): left is child, right is null
61  * binary (CAT, OR): left and right are children
62  * parent contains pointer to parent
63  */
64 
65 
66 struct fa {
67 union {
68 		ccl_chars_t s;
69 		int h;
70 	} cc;
71 #define	MLCMPLT(m1, l1, m2, l2) ((m1 != m2 &&\
72 				(int)m1 < (int)m2) ||\
73 				(m1 == m2 && (int)l1 < (int)l2))
74 #define	MLCMPLE(m1, l1, m2, l2) ((m1 != m2 &&\
75 				(int)m1 <= (int)m2) ||\
76 				(m1 == m2 && (int)l1 <= (int)l2))
77 #define	MLCMPGT(m1, l1, m2, l2) ((m1 != m2 &&\
78 				(int)m1 > (int)m2) ||\
79 				(m1 == m2 && (int)l1 > (int)l2))
80 #define	MAX_CODESET	3
81 	struct fa *st;
82 };
83 
84 
85 int	*state[NSTATES];
86 int	*foll[MAXLIN];
87 int	setvec[MAXLIN];
88 NODE	*point[MAXLIN];
89 
90 
91 int	setcnt;
92 int	line;
93 
94 
95 static int	ccln_member();
96 static int	insert_table();
97 static int	delete_table();
98 static void	penter(NODE *p);
99 static void	follow(NODE *v);
100 static void	overflo(void);
101 static void	cfoll(NODE *v);
102 static void	freetr(NODE *p);
103 #ifdef DEBUG
104 #define	ddump_table(t, s)	dump_table(t, s)
105 #else
106 #define	ddump_table(t, s)
107 #endif
108 
109 struct fa *
makedfa(p)110 makedfa(p)	/* returns dfa for tree pointed to by p */
111 NODE *p;
112 {
113 	NODE *p1;
114 	struct fa *fap;
115 	p1 = op2(CAT, op2(STAR, op2(DOT, (NODE *) 0,
116 		(NODE *) 0), (NODE *) 0), p);
117 		/* put DOT STAR in front of reg. exp. */
118 	p1 = op2(FINAL, p1, (NODE *) 0);	/* install FINAL NODE */
119 
120 
121 	line = 0;
122 	penter(p1);	/* enter parent pointers and leaf indices */
123 	point[line] = p1;	/* FINAL NODE */
124 	setvec[0] = 1;		/* for initial DOT STAR */
125 	cfoll(p1);	/* set up follow sets */
126 	fap = cgotofn();
127 	freetr(p1);	/* add this when alloc works */
128 	return (fap);
129 }
130 
131 static void
penter(NODE * p)132 penter(NODE *p)	/* set up parent pointers and leaf indices */
133 {
134 	switch (type(p)) {
135 		LEAF
136 			left(p) = (NODE *)line;
137 			point[line++] = p;
138 			break;
139 		UNARY
140 			penter(left(p));
141 			parent(left(p)) = p;
142 			break;
143 		case CAT:
144 		case OR:
145 			penter(left(p));
146 			penter(right(p));
147 			parent(left(p)) = p;
148 			parent(right(p)) = p;
149 			break;
150 		default:
151 			error(FATAL, "unknown type %d in penter\n", type(p));
152 			break;
153 	}
154 }
155 
156 static void
freetr(NODE * p)157 freetr(NODE *p)	/* free parse tree and follow sets */
158 {
159 	switch (type(p)) {
160 		LEAF
161 			xfree(foll[(int)left(p)]);
162 			xfree(p);
163 			break;
164 		UNARY
165 			freetr(left(p));
166 			xfree(p);
167 			break;
168 		case CAT:
169 		case OR:
170 			freetr(left(p));
171 			freetr(right(p));
172 			xfree(p);
173 			break;
174 		default:
175 			error(FATAL, "unknown type %d in freetr", type(p));
176 			break;
177 	}
178 }
179 ccl_chars_t *
cclenter(wchar_t * p)180 cclenter(wchar_t *p)
181 {
182 	int 		i, cn;
183 	wchar_t		c, pc;
184 	wchar_t		*op;
185 	ccl_chars_t	*new;
186 	ccl_chars_t	chars[MAXLIN];
187 
188 	op = p;
189 	i = 0;
190 	while ((c = *p++) != 0) {
191 		if (c == '-' && i > 0)  {
192 			if (*p != 0) {
193 				/*
194 				 * If there are not in same code set,  the
195 				 * class should be ignore (make two independent
196 				 * characters)!
197 				 */
198 				c = *p++;
199 				cn = wcsetno(pc);
200 				if (cn != wcsetno(c) || pc > c)
201 					goto char_array;
202 				i = insert_table(chars, i, cn, pc, cn, c);
203 				continue;
204 			}
205 		}
206 char_array:
207 		if (i >= MAXLIN)
208 			overflo();
209 		cn = wcsetno(c);
210 		i = insert_table(chars, i, cn, c, cn, c);
211 		pc = c;
212 	}
213 	dprintf("cclenter: in = |%ws|, ", op, NULL, NULL);
214 	xfree(op);
215 	i = (i + 1) * sizeof (ccl_chars_t);
216 	if ((new = (ccl_chars_t *)malloc(i)) == NULL)
217 		error(FATAL, "out of space in cclenter on %s", op);
218 	(void) memcpy((char *)new, (char *)chars, i);
219 	ddump_table(chars, i / 4);
220 
221 
222 	return (new);
223 }
224 
225 static void
overflo(void)226 overflo(void)
227 {
228 	error(FATAL, "regular expression too long\n");
229 }
230 
231 static void
cfoll(NODE * v)232 cfoll(NODE *v)	/* enter follow set of each leaf of vertex v into foll[leaf] */
233 {
234 	int i;
235 	int prev;
236 	int *add();
237 
238 
239 	switch (type(v)) {
240 		LEAF
241 			setcnt = 0;
242 			for (i = 1; i <= line; i++)
243 				setvec[i] = 0;
244 			follow(v);
245 			foll[(int)left(v)] = add(setcnt);
246 			break;
247 		UNARY
248 			cfoll(left(v));
249 			break;
250 		case CAT:
251 		case OR:
252 			cfoll(left(v));
253 			cfoll(right(v));
254 			break;
255 		default:
256 			error(FATAL, "unknown type %d in cfoll", type(v));
257 	}
258 }
259 
260 int
first(NODE * p)261 first(NODE *p)		/* collects initially active leaves of p into setvec */
262 	/* returns 0 or 1 depending on whether p matches empty string */
263 {
264 	int b;
265 
266 
267 	switch (type(p)) {
268 		LEAF
269 			if (setvec[(int)left(p)] != 1) {
270 				setvec[(int)left(p)] = 1;
271 				setcnt++;
272 			}
273 			if (type(p) == CCL &&
274 			(*(ccl_chars_t *)right(p)).cc_cs == (wchar_t)0x0)
275 				return (0);		/* empty CCL */
276 			else return (1);
277 		case FINAL:
278 		case PLUS:
279 			if (first(left(p)) == 0)
280 				return (0);
281 			return (1);
282 		case STAR:
283 		case QUEST:
284 			first(left(p));
285 			return (0);
286 		case CAT:
287 			if (first(left(p)) == 0 && first(right(p)) == 0)
288 				return (0);
289 			return (1);
290 		case OR:
291 			b = first(right(p));
292 			if (first(left(p)) == 0 || b == 0)
293 				return (0);
294 			return (1);
295 	}
296 	error(FATAL, "unknown type %d in first\n", type(p));
297 	return (-1);
298 }
299 
300 static void
follow(NODE * v)301 follow(NODE *v)
302 		/* collects leaves that can follow v into setvec */
303 {
304 	NODE *p;
305 
306 
307 	if (type(v) == FINAL)
308 		return;
309 	p = parent(v);
310 	switch (type(p)) {
311 		case STAR:
312 		case PLUS:	first(v);
313 				follow(p);
314 				return;
315 
316 
317 		case OR:
318 		case QUEST:	follow(p);
319 				return;
320 
321 
322 		case CAT:	if (v == left(p)) { /* v is left child of p */
323 					if (first(right(p)) == 0) {
324 						follow(p);
325 						return;
326 					}
327 				} else		/* v is right child */
328 					follow(p);
329 				return;
330 		case FINAL:	if (setvec[line] != 1) {
331 					setvec[line] = 1;
332 					setcnt++;
333 				}
334 				return;
335 	}
336 }
337 
338 
339 /*
340  * There are three type of functions for checking member ship.  Because I have
341  * been changed structure of CCL tables.  And some CCL tables end up with NULLs
342  * but someone has length and will includes NULLs in table as one of data.
343  * Please note, CCL table which has a length data and data will include NULLs,
344  * it only used within a this source file("b.c").
345  */
346 
347 int				/* is cs thru ce in s? */
ccl_member(int ns,wchar_t cs,int ne,wchar_t ce,ccl_chars_t * s)348 ccl_member(int ns, wchar_t cs, int ne, wchar_t ce, ccl_chars_t *s)
349 {
350 	/*
351 	 * The specified range(cs, ce) must be beside the range between
352 	 * s->cc_start and s->cc_end to determine member.
353 	 */
354 	while (s->cc_cs || s->cc_ce) {
355 		if (MLCMPLE(s->cc_ns, s->cc_cs, ns, cs) &&
356 				MLCMPLE(ne, ce, s->cc_ne, s->cc_ce))
357 			return (1);
358 		s++;
359 	}
360 	return (0);
361 }
362 
363 
364 static int			/* is cs thru ce in s? */
ccln_member(int ns,wchar_t cs,int ne,wchar_t ce,ccl_chars_t * s,int n)365 ccln_member(int ns, wchar_t cs, int ne, wchar_t ce, ccl_chars_t *s, int n)
366 {
367 	/*
368 	 * The specified range(cs, ce) must be beside the range between
369 	 * s->cc_start and s->cc_end to determine member.
370 	 */
371 	while (n-- > 0) {
372 		if (MLCMPLE(s->cc_ns, s->cc_cs, ns, cs) &&
373 				MLCMPLE(ne, ce, s->cc_ne, s->cc_ce))
374 			return (1);
375 		s++;
376 	}
377 	return (0);
378 }
379 
380 
381 int
member(wchar_t c,wchar_t * s)382 member(wchar_t c, wchar_t *s)	/* is c in s? */
383 {
384 	while (*s)
385 		if (c == *s++)
386 			return (1);
387 	return (0);
388 }
389 
390 int
notin(int ** array,int n,int * prev)391 notin(int **array, int n, int *prev) /* is setvec in array[0] thru array[n]? */
392 {
393 	int i, j;
394 	int *ptr;
395 	for (i = 0; i <= n; i++) {
396 		ptr = array[i];
397 		if (*ptr == setcnt) {
398 			for (j = 0; j < setcnt; j++)
399 				if (setvec[*(++ptr)] != 1) goto nxt;
400 			*prev = i;
401 			return (0);
402 		}
403 		nxt: /* dummy */;
404 	}
405 	return (1);
406 }
407 
408 
409 int *
add(int n)410 add(int n)
411 {		/* remember setvec */
412 	int *ptr, *p;
413 	int i;
414 	if ((p = ptr = (int *)malloc((n+1)*sizeof (int))) == NULL)
415 		overflo();
416 	*ptr = n;
417 	dprintf("add(%d)\n", n, NULL, NULL);
418 	for (i = 1; i <= line; i++)
419 		if (setvec[i] == 1) {
420 			*(++ptr) = i;
421 		dprintf("  ptr = %o, *ptr = %d, i = %d\n", ptr, *ptr, i);
422 		}
423 	dprintf("\n", NULL, NULL, NULL);
424 	return (p);
425 }
426 
427 
428 struct fa *
cgotofn()429 cgotofn()
430 {
431 	int i, k;
432 	int *ptr;
433 	int ns, ne;
434 	wchar_t cs, ce;
435 	ccl_chars_t *p;
436 	NODE *cp;
437 	int j, n, s, ind, numtrans;
438 	int finflg;
439 	int curpos, num, prev;
440 	struct fa *where[NSTATES];
441 
442 
443 	struct {
444 		ccl_chars_t	cc;
445 		int		n;
446 	} fatab[257];
447 	struct fa *pfa;
448 
449 
450 	char index[MAXLIN];
451 	char iposns[MAXLIN];
452 	int sposns[MAXLIN];
453 	int spmax, spinit;
454 	ccl_chars_t symbol[NCHARS];
455 	ccl_chars_t isyms[NCHARS];
456 	ccl_chars_t ssyms[NCHARS];
457 	int ssmax, symax, ismax, ssinit;
458 
459 
460 	wchar_t hat;
461 	int hatcn;
462 
463 
464 	for (i = 0; i <= line; i++) index[i] = iposns[i] = setvec[i] = 0;
465 	isyms[0].cc_cs = isyms[0].cc_ce = (wchar_t)0x0;
466 	for (i = 0; i < NCHARS; i++)
467 		isyms[i] = symbol[i] = ssyms[i] = isyms[0];
468 	symax = 0;
469 	setcnt = 0;
470 	/* compute initial positions and symbols of state 0 */
471 	ismax = 0;
472 	ssmax = 0;
473 	ptr = state[0] = foll[0];
474 	spinit = *ptr;
475 	hat = HAT;
476 	hatcn = wcsetno(hat);
477 	for (i = 0; i < spinit; i++) {
478 		curpos = *(++ptr);
479 		sposns[i] = curpos;
480 		iposns[curpos] = 1;
481 		cp = point[curpos];
482 		dprintf("i= %d, spinit = %d, curpos = %d\n", i, spinit, curpos);
483 		switch (type(cp)) {
484 			case CHAR:
485 				k = (int)right(cp);
486 				ns = wcsetno(k);
487 				if (! ccln_member(ns, k, ns, k,
488 							isyms, ismax)) {
489 					ismax = insert_table(isyms, ismax,
490 								ns, k, ns, k);
491 				}
492 				ssyms[ssmax].cc_ns = ns;
493 				ssyms[ssmax].cc_cs = k;
494 				ssyms[ssmax].cc_ne = ns;
495 				ssyms[ssmax++].cc_ce = k;
496 				break;
497 			case DOT:
498 				cs = WC_VERY_SMALL;
499 				ns = 0;
500 				ce = HAT - 1;
501 				ne = hatcn;
502 				if (! ccln_member(ns, cs, ne, ce,
503 							isyms, ismax)) {
504 					ismax = insert_table(isyms, ismax,
505 								ns, cs, ne, ce);
506 				}
507 				ssyms[ssmax].cc_cs = cs;
508 				ssyms[ssmax].cc_ns = ns;
509 				ssyms[ssmax].cc_ce = ce;
510 				ssyms[ssmax++].cc_ne = ne;
511 				cs = HAT + 1;
512 				ns = hatcn;
513 				ce = WC_VERY_LARGE;
514 				ne = MAX_CODESET;
515 				if (! ccln_member(ns, cs, ne, ce,
516 							isyms, ismax)) {
517 					ismax = insert_table(isyms, ismax,
518 								ns, cs, ne, ce);
519 				}
520 				ssyms[ssmax].cc_cs = cs;
521 				ssyms[ssmax].cc_ns = ns;
522 				ssyms[ssmax].cc_ce = ce;
523 				ssyms[ssmax++].cc_ne = ne;
524 				break;
525 			case CCL:
526 				cs = HAT;
527 				ns = hatcn;
528 				for (p = (ccl_chars_t *)right(cp);
529 					p->cc_cs; p++) {
530 					if ((p->cc_ns != ns ||\
531 					p->cc_cs != cs) &&\
532 				!ccln_member(p->cc_ns, p->cc_cs,
533 				p->cc_ne, p->cc_ce, isyms, ismax)) {
534 						ismax = insert_table(isyms,
535 				ismax, p->cc_ns, p->cc_cs, p->cc_ne, p->cc_ce);
536 					}
537 					ssyms[ssmax++] = *p;
538 				}
539 				break;
540 			case NCCL:
541 				ns = 0;
542 				cs = WC_VERY_SMALL;
543 				for (p = (ccl_chars_t *)right(cp);
544 					p->cc_cs; p++) {
545 					if ((ns != hatcn || p->cc_cs != HAT) &&
546 						! ccln_member(ns, cs,
547 							p->cc_ns, p->cc_cs-1,
548 								isyms, ismax)) {
549 						ismax = insert_table(isyms,
550 								ismax,
551 								ns, cs,
552 								p->cc_ns,
553 								p->cc_cs-1);
554 					}
555 					ssyms[ssmax].cc_ns = ns;
556 					ssyms[ssmax].cc_cs = cs;
557 					ssyms[ssmax].cc_ne = p->cc_ns;
558 					ssyms[ssmax++].cc_ce = p->cc_cs-1;
559 					if (p->cc_ce == (wchar_t)0x0) {
560 						ns = p->cc_ns;
561 						cs = p->cc_cs + 1;
562 
563 					} else {
564 						ns = p->cc_ne;
565 						cs = p->cc_ce + 1;
566 					}
567 				}
568 				if ((ns != hatcn || cs != HAT) &&
569 					! ccln_member(ns, cs,
570 						MAX_CODESET, WC_VERY_LARGE,
571 							isyms, ismax)) {
572 					ismax = insert_table(isyms, ismax,
573 							ns, cs, MAX_CODESET,
574 							WC_VERY_LARGE);
575 				}
576 				ssyms[ssmax].cc_ns = ns;
577 				ssyms[ssmax].cc_cs = cs;
578 				ssyms[ssmax].cc_ne = MAX_CODESET;
579 				ssyms[ssmax++].cc_ce = WC_VERY_LARGE;
580 				break;
581 		}
582 	}
583 	ssinit = ssmax;
584 	symax = 0;
585 	n = 0;
586 	for (s = 0; s <= n; s++)  {
587 		dprintf("s = %d\n", s, NULL, NULL);
588 		ind = 0;
589 		numtrans = 0;
590 		finflg = 0;
591 		if (*(state[s] + *state[s]) == line) {		/* s final? */
592 			finflg = 1;
593 			goto tenter;
594 		}
595 		spmax = spinit;
596 		ssmax = ssinit;
597 		ptr = state[s];
598 		num = *ptr;
599 		for (i = 0; i < num; i++) {
600 			curpos = *(++ptr);
601 			if (iposns[curpos] != 1 && index[curpos] != 1) {
602 				index[curpos] = 1;
603 				sposns[spmax++] = curpos;
604 			}
605 			cp = point[curpos];
606 			switch (type(cp)) {
607 				case CHAR:
608 					k = (int)right(cp);
609 					ns = wcsetno(k);
610 					if (! ccln_member(ns, k, ns, k,
611 							isyms, ismax) &&
612 						! ccln_member(ns, k, ns, k,
613 							symbol, symax)) {
614 						symax = insert_table(symbol,
615 									symax,
616 									ns, k,
617 									ns, k);
618 					}
619 					ssyms[ssmax].cc_ns = ns;
620 					ssyms[ssmax].cc_cs = k;
621 					ssyms[ssmax].cc_ne = ns;
622 					ssyms[ssmax++].cc_ce = k;
623 					break;
624 				case DOT:
625 					cs = WC_VERY_SMALL;
626 					ns = 0;
627 					ce = HAT - 1;
628 					ne = hatcn;
629 					if (! ccln_member(ns, cs, ne, ce,
630 							isyms, ismax) &&
631 						! ccln_member(ns, cs, ne, ce,
632 							symbol, symax)) {
633 						symax = insert_table(symbol,
634 									symax,
635 									ns, cs,
636 									ne, ce);
637 					}
638 					ssyms[ssmax].cc_cs = cs;
639 					ssyms[ssmax].cc_ns = ns;
640 					ssyms[ssmax].cc_ce = ce;
641 					ssyms[ssmax++].cc_ne = ne;
642 					cs = HAT + 1;
643 					ns = hatcn;
644 					ce = WC_VERY_LARGE;
645 					ne = MAX_CODESET;
646 					if (! ccln_member(ns, cs, ne, ce,
647 								isyms, ismax) &&
648 						! ccln_member(ns, cs, ne, ce,
649 							symbol, symax)) {
650 						symax = insert_table(symbol,
651 									symax,
652 									ns, cs,
653 									ne, ce);
654 					}
655 					ssyms[ssmax].cc_cs = cs;
656 					ssyms[ssmax].cc_ns = ns;
657 					ssyms[ssmax].cc_ce = ce;
658 					ssyms[ssmax++].cc_ne = ne;
659 					break;
660 				case CCL:
661 					cs = HAT;
662 					ns = hatcn;
663 					for (p = (ccl_chars_t *)right(cp);
664 						p->cc_cs; p++) {
665 						if ((p->cc_ns != ns ||
666 							p->cc_cs != cs) &&
667 							! ccln_member(p->cc_ns,
668 							p->cc_cs, p->cc_ne,
669 						p->cc_ce, isyms, ismax) &&
670 						!ccln_member(p->cc_ns, p->cc_cs,
671 						p->cc_ne, p->cc_ce, symbol,
672 						symax)) {
673 							symax = insert_table(
674 						symbol, symax, p->cc_ns,
675 						p->cc_cs, p->cc_ne, p->cc_ce);
676 						}
677 						ssyms[ssmax++] = *p;
678 					}
679 					break;
680 				case NCCL:
681 					ns = 0;
682 					cs = WC_VERY_SMALL;
683 		for (p = (ccl_chars_t *)right(cp); p->cc_cs; p++) {
684 			if ((p->cc_ns != hatcn || p->cc_cs != HAT) &&
685 					! ccln_member(ns, cs, p->cc_ns,
686 					p->cc_cs-1, isyms, ismax) &&
687 					! ccln_member(ns, cs, p->cc_ns,
688 					p->cc_cs-1, symbol, symax)) {
689 				symax = insert_table(symbol,
690 					symax, ns, cs, p->cc_ns, p->cc_cs-1);
691 						}
692 						ssyms[ssmax].cc_ns = ns;
693 						ssyms[ssmax].cc_cs = cs;
694 						ssyms[ssmax].cc_ne = p->cc_ns;
695 						ssyms[ssmax++].cc_ce
696 								= p->cc_cs-1;
697 						if (p->cc_ce == (wchar_t)0x0) {
698 							ns = p->cc_ns;
699 							cs = p->cc_cs + 1;
700 
701 						} else {
702 							ns = p->cc_ne;
703 							cs = p->cc_ce + 1;
704 						}
705 					}
706 		if ((ns != hatcn || cs != HAT) && ! ccln_member(ns, cs,
707 				MAX_CODESET, WC_VERY_LARGE, isyms, ismax) &&
708 				! ccln_member(ns, cs, MAX_CODESET,
709 					WC_VERY_LARGE, symbol, symax)) {
710 			symax = insert_table(symbol, symax, ns, cs,
711 								MAX_CODESET,
712 								WC_VERY_LARGE);
713 					}
714 					ssyms[ssmax].cc_ns = ns;
715 					ssyms[ssmax].cc_cs = cs;
716 					ssyms[ssmax].cc_ne = MAX_CODESET;
717 					ssyms[ssmax++].cc_ce = WC_VERY_LARGE;
718 					break;
719 			}
720 		}
721 		for (j = 0; j < ssmax; j++) {	/* nextstate(s, ssyms[j]) */
722 			ns = ssyms[j].cc_ns;
723 			cs = ssyms[j].cc_cs;
724 			ne = ssyms[j].cc_ne;
725 			ce = ssyms[j].cc_ce;
726 dprintf("j = %d, cs = %o, ce = %o\n", j, cs, ce);
727 			symax = delete_table(symbol, symax, ns, cs, ne, ce);
728 			setcnt = 0;
729 			for (k = 0; k <= line; k++) setvec[k] = 0;
730 			for (i = 0; i < spmax; i++) {
731 				index[sposns[i]] = 0;
732 				cp = point[sposns[i]];
733 				if ((k = type(cp)) != FINAL) {
734 					if (k == CHAR && ns == ne && cs == ce &&
735 						cs == (int)right(cp) ||
736 						k == DOT || k == CCL &&
737 						ccl_member(ns, cs, ne, ce,
738 						(ccl_chars_t *)right(cp)) ||
739 						k == NCCL &&
740 						!ccl_member(ns, cs, ne, ce,
741 						(ccl_chars_t *)right(cp))) {
742 						ptr = foll[sposns[i]];
743 						num = *ptr;
744 						for (k = 0; k < num; k++) {
745 						if (setvec[*(++ptr)] != 1 &&
746 							iposns[*ptr] != 1) {
747 							setvec[*ptr] = 1;
748 								setcnt++;
749 							}
750 						}
751 					}
752 				}
753 			} /* end nextstate */
754 			if (notin(state, n, &prev)) {
755 				if (n >= NSTATES - 1) {
756 		printf("cgotofn: notin; state = %d, n = %d\n", state, n, NULL);
757 					overflo();
758 				}
759 				state[++n] = add(setcnt);
760 				dprintf("	delta(%d,[%o,%o])",
761 					s, cs, ce);
762 				dprintf(" = %d, ind = %d\n", n, ind+1, NULL);
763 				fatab[++ind].cc.cc_ns = ns;
764 				fatab[ind].cc.cc_cs = cs;
765 				fatab[ind].cc.cc_ne = ne;
766 				fatab[ind].cc.cc_ce = ce;
767 				fatab[ind].n = n;
768 				numtrans++;
769 			} else {
770 				if (prev != 0) {
771 					dprintf("	delta(%d,[%o,%o])",
772 						s, cs, ce);
773 					dprintf("= %d, ind = %d\n",
774 						prev, ind+1, NULL);
775 					fatab[++ind].cc.cc_ns = ns;
776 					fatab[ind].cc.cc_cs = cs;
777 					fatab[ind].cc.cc_ne = ne;
778 					fatab[ind].cc.cc_ce = ce;
779 					fatab[ind].n = prev;
780 					numtrans++;
781 				}
782 			}
783 		}
784 	tenter:
785 		if ((pfa = (struct fa *)malloc((numtrans + 1)
786 						* sizeof (struct fa))) == NULL)
787 			overflo();
788 		where[s] = pfa;
789 		if (finflg)
790 			pfa->cc.h = -1;		/* s is a final state */
791 		else
792 			pfa->cc.h = numtrans;
793 		pfa->st = 0;
794 		for (i = 1, pfa += 1; i <= numtrans; i++, pfa++) {
795 			pfa->cc.s = fatab[i].cc;
796 			pfa->st = (struct fa *)fatab[i].n;
797 		}
798 	}
799 	for (i = 0; i <= n; i++) {
800 		if (i != 0)	/* state[0] is freed later in freetr() */
801 			xfree(state[i]);	/* free state[i] */
802 		pfa = where[i];
803 		pfa->st = where[0];
804 		dprintf("state %d: (%o)\n", i, pfa, NULL);
805 		dprintf("	numtrans = %d,	default = %o\n",
806 			pfa->cc.h, pfa->st, NULL);
807 		for (k = 1; k <= pfa->cc.h; k++) {
808 			(pfa+k)->st = where[(int)(pfa+k)->st];
809 			dprintf("	char = [%o,%o],	nextstate = %o\n",
810 				(pfa+k)->cc.s.cc_cs, (pfa+k)->cc.s.cc_ce,
811 				(pfa+k)->st);
812 		}
813 	}
814 	pfa = where[0];
815 	if ((num = pfa->cc.h) < 0)
816 		return (where[0]);
817 	for (pfa += num; num; num--, pfa--)
818 		if (pfa->cc.s.cc_ns == hatcn && pfa->cc.s.cc_cs == HAT) {
819 			return (pfa->st);
820 		}
821 	return (where[0]);
822 }
823 
824 
825 /*
826  * Insert CCL entry to CCL table with maintain optimized order.
827  */
828 static int
insert_table(ccl_chars_t * table_base,int table_size,int ns,wchar_t cs,int ne,wchar_t ce)829 insert_table(ccl_chars_t *table_base, int table_size, int ns, wchar_t cs,
830 	int ne, wchar_t ce)
831 {
832 	int		i;
833 	int		tns, tne;
834 	wchar_t		tcs, tce;
835 	ccl_chars_t	*table;
836 	ccl_chars_t	*saved_table;
837 	int		saved_i;
838 
839 
840 
841 
842 	dprintf("Inserting {%o, %o} to table %o\n", cs, ce, table_base);
843 	/*
844 	 * Searching the table to find out where should put the new item.
845 	 */
846 	for (i = 0, table = table_base; i < table_size; i++, table++) {
847 		tns = table->cc_ns;
848 		tcs = table->cc_cs;
849 		tne = table->cc_ne;
850 		tce = table->cc_ce;
851 		if (MLCMPLT(ne, ce, tns, (tcs - 1))) {
852 			/*
853 			 * Quick! insert to font of current table entries.
854 			 */
855 qinsert:
856 			table_size++;
857 			for (; i < table_size; i++, table++) {
858 				tns = table->cc_ns;
859 				tcs = table->cc_cs;
860 				tne = table->cc_ne;
861 				tce = table->cc_ce;
862 				table->cc_ns = ns;
863 				table->cc_cs = cs;
864 				table->cc_ne = ne;
865 				table->cc_ce = ce;
866 				ns = tns;
867 				cs = tcs;
868 				ne = tne;
869 				ce = tce;
870 			}
871 			goto add_null;
872 		} else if (MLCMPLE(tns, (tcs - 1), ns, cs) &&
873 				MLCMPLE(ns, cs, tne, (tce + 1))) {
874 			/*
875 			 * Starting point is within the current entry.
876 			 */
877 			if (MLCMPGT(tns, tcs, ns, cs)) {
878 				table->cc_ns = ns;
879 				table->cc_cs = cs;
880 			}
881 			if (MLCMPLE(ne, ce, tne, tce)) {
882 				return (table_size);
883 			}
884 			goto combine;
885 		}
886 	}
887 
888 
889 	/*
890 	 * Adding new one to end of table.
891 	 */
892 	table->cc_ns = ns;
893 	table->cc_cs = cs;
894 	table->cc_ne = ne;
895 	table->cc_ce = ce;
896 
897 
898 	table_size++;
899 	goto add_null;
900 
901 
902 
903 
904 	combine:
905 	/*
906 	 * Check and try to combine the new entry with rest of entries.
907 	 */
908 	if ((i + 1) >= table_size) {
909 		table->cc_ne = ne;
910 		table->cc_ce = ce;
911 		return (table_size);
912 	}
913 
914 
915 	saved_table = table++;
916 	saved_i = i++;
917 
918 
919 	/*
920 	 * Finding the spot where we should put the end point.
921 	 */
922 	for (; i < table_size; i++, table++) {
923 		if (MLCMPLT(ne, ce, table->cc_ns, (table->cc_cs - 1))) {
924 			break;
925 		} else
926 		if (MLCMPLE(table->cc_ns, (table->cc_cs - 1), ne, ce) &&
927 			MLCMPLE(ne, ce, table->cc_ne, (table->cc_ce + 1))) {
928 			/*
929 			 * Tack with this table.
930 			 */
931 			if (MLCMPLT(ne, ce, table->cc_ne, table->cc_ce)) {
932 				ne = table->cc_ne;
933 				ce = table->cc_ce;
934 			}
935 			table++;
936 			i++;
937 			break;
938 		}
939 	}
940 
941 
942 	saved_table->cc_ne = ne;
943 	saved_table->cc_ce = ce;
944 	saved_i = table_size - (i - saved_i - 1);
945 
946 
947 	/*
948 	 * Moving the rest of entries.
949 	 */
950 	for (; i < table_size; i++, table++)
951 		*(++saved_table) = *table;
952 	table_size = saved_i;
953 
954 
955 add_null:
956 	table_base[table_size].cc_cs = (wchar_t)0x0;
957 	table_base[table_size].cc_ce = (wchar_t)0x0;
958 
959 
960 	return (table_size);
961 }
962 
963 
964 
965 
966 static int
delete_table(ccl_chars_t * table_base,int table_size,int ns,wchar_t cs,int ne,wchar_t ce)967 delete_table(ccl_chars_t *table_base, int table_size, int ns, wchar_t cs,
968 		int ne, wchar_t ce)
969 {
970 	int		i;
971 	int		saved_i;
972 	ccl_chars_t	*table;
973 	ccl_chars_t	*saved_table;
974 	int		tns;
975 	wchar_t		tcs;
976 	int		tne;
977 	wchar_t		tce;
978 
979 
980 
981 
982 	for (i = 0, table = table_base; i < table_size; i++, table++) {
983 		tns = table->cc_ns;
984 		tcs = table->cc_cs;
985 		tne = table->cc_ne;
986 		tce = table->cc_ce;
987 		if (MLCMPLT(ne, ce, tns, tcs))
988 			return (table_size);
989 		else if (MLCMPLT(ne, ce, tne, tce)) {
990 			if (MLCMPLE(ns, cs, tns, tcs)) {
991 				/*
992 				 * Shrink type 1.
993 				 */
994 				table->cc_ns = ne;
995 				table->cc_cs = ce + 1;
996 				return (table_size);
997 
998 			} else {
999 				/*
1000 				 * Spliting !!
1001 				 */
1002 				table->cc_ns = ne;
1003 				table->cc_cs = ce + 1;
1004 				tne = ns;
1005 				tce = cs - 1;
1006 				table_size++;
1007 				for (; i < table_size; i++, table++) {
1008 					ns = table->cc_ns;
1009 					cs = table->cc_cs;
1010 					ne = table->cc_ne;
1011 					ce = table->cc_ce;
1012 					table->cc_ns = tns;
1013 					table->cc_cs = tcs;
1014 					table->cc_ne = tne;
1015 					table->cc_ce = tce;
1016 					tns = ns;
1017 					tcs = cs;
1018 					tne = ne;
1019 					tce = ce;
1020 				}
1021 				return (table_size);
1022 			}
1023 
1024 		} else if (MLCMPLE(ns, cs, tne, tce)) {
1025 			if (MLCMPGT(ns, cs, tns, tcs)) {
1026 				/*
1027 				 * Shrink current table(type 2).
1028 				 */
1029 				table->cc_ne = ns;
1030 				table->cc_ce = cs - 1;
1031 				table++;
1032 				i++;
1033 			}
1034 			/*
1035 			 * Search for the end point.
1036 			 */
1037 			saved_i = i;
1038 			saved_table = table;
1039 			for (; i < table_size; i++, table++) {
1040 				if (MLCMPLT(ne, ce,
1041 						table->cc_ns, table->cc_cs)) {
1042 					/*
1043 					 * Easy point, no shrinks!
1044 					 */
1045 					break;
1046 
1047 				} else if (MLCMPGT(table->cc_ne, table->cc_ce,
1048 						ne, ce)) {
1049 					/*
1050 					 * Shrinking...
1051 					 */
1052 					table->cc_ns = ne;
1053 					table->cc_cs = ce + 1;
1054 					break;
1055 				}
1056 
1057 
1058 			}
1059 			/*
1060 			 * Moving(removing) backword.
1061 			 */
1062 			saved_i = table_size - (i - saved_i);
1063 			for (; i < table_size; i++)
1064 				*saved_table++ = *table++;
1065 			return (saved_i);
1066 		}
1067 	}
1068 	return (table_size);
1069 }
1070 
1071 
1072 #ifdef DEBUG
dump_table(ccl_chars_t * table,int size)1073 dump_table(ccl_chars_t *table, int size)
1074 {
1075 	int	i;
1076 
1077 
1078 
1079 
1080 	if (! dbg)
1081 		return;
1082 
1083 
1084 	printf("Duming table %o with size %d\n", table, size);
1085 	size++;	/* To watch out NULL */
1086 	for (i = 0; i < size; i++, table++) {
1087 		printf("{%3o, %3o}, ", table->cc_cs, table->cc_ce);
1088 	}
1089 	printf("\n");
1090 }
1091 #endif /* DEBUG */
1092 
1093 
1094 
1095 int
match(struct fa * pfa,wchar_t * p)1096 match(struct fa *pfa, wchar_t *p)
1097 {
1098 	int count;
1099 	int n, ns, ne;
1100 	wchar_t c, cs, ce;
1101 
1102 
1103 	if (p == 0)
1104 		return (0);
1105 	if (pfa->cc.h == 1) { /* fast test for first character, if possible */
1106 		ns = (++pfa)->cc.s.cc_ns;
1107 		cs = (pfa)->cc.s.cc_cs;
1108 		ne = (pfa)->cc.s.cc_ne;
1109 		ce = (pfa)->cc.s.cc_ce;
1110 		do {
1111 			c = *p;
1112 			n = wcsetno(c);
1113 			if (MLCMPLE(ns, cs, n, c) &&
1114 				MLCMPLE(n, c, ne, ce)) {
1115 				p++;
1116 				pfa = pfa->st;
1117 				goto adv;
1118 			}
1119 		} while (*p++ != 0);
1120 		return (0);
1121 	}
1122 	adv: if ((count = pfa->cc.h) < 0)
1123 		return (1);
1124 	do {
1125 		c = *p;
1126 		n = wcsetno(c);
1127 		for (pfa += count; count; count--, pfa--) {
1128 			ns = (pfa)->cc.s.cc_ns;
1129 			cs = (pfa)->cc.s.cc_cs;
1130 			ne = (pfa)->cc.s.cc_ne;
1131 			ce = (pfa)->cc.s.cc_ce;
1132 			if (MLCMPLE(ns, cs, n, c) && MLCMPLE(n, c, ne, ce))
1133 				break;
1134 		}
1135 		pfa = pfa->st;
1136 		if ((count = pfa->cc.h) < 0)
1137 			return (1);
1138 	} while (*p++ != 0);
1139 	return (0);
1140 }
1141