1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22
23 /*
24 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
25 * Use is subject to license terms.
26 */
27
28 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
29 /* All Rights Reserved */
30
31 #pragma ident "%Z%%M% %I% %E% SMI"
32
33 #include "awk.def"
34 #include "stdio.h"
35 #include "awk.h"
36 #include <stdlib.h>
37
38
39 extern NODE *op2();
40 extern struct fa *cgotofn();
41 #define MAXLIN 256
42 #define NCHARS 128
43 #define NSTATES 256
44
45
46 #define type(v) v->nobj
47 #define left(v) v->narg[0]
48 #define right(v) v->narg[1]
49 #define parent(v) v->nnext
50
51
52 #define LEAF case CCL: case NCCL: case CHAR: case DOT:
53 #define UNARY case FINAL: case STAR: case PLUS: case QUEST:
54
55
56 /*
57 * encoding in tree NODEs:
58 * leaf (CCL, NCCL, CHAR, DOT): left is index,
59 * right contains value or pointer to value
60 * unary (FINAL, STAR, PLUS, QUEST): left is child, right is null
61 * binary (CAT, OR): left and right are children
62 * parent contains pointer to parent
63 */
64
65
66 struct fa {
67 union {
68 ccl_chars_t s;
69 int h;
70 } cc;
71 #define MLCMPLT(m1, l1, m2, l2) ((m1 != m2 &&\
72 (int)m1 < (int)m2) ||\
73 (m1 == m2 && (int)l1 < (int)l2))
74 #define MLCMPLE(m1, l1, m2, l2) ((m1 != m2 &&\
75 (int)m1 <= (int)m2) ||\
76 (m1 == m2 && (int)l1 <= (int)l2))
77 #define MLCMPGT(m1, l1, m2, l2) ((m1 != m2 &&\
78 (int)m1 > (int)m2) ||\
79 (m1 == m2 && (int)l1 > (int)l2))
80 #define MAX_CODESET 3
81 struct fa *st;
82 };
83
84
85 int *state[NSTATES];
86 int *foll[MAXLIN];
87 int setvec[MAXLIN];
88 NODE *point[MAXLIN];
89
90
91 int setcnt;
92 int line;
93
94
95 static int ccln_member();
96 static int insert_table();
97 static int delete_table();
98 static void penter(NODE *p);
99 static void follow(NODE *v);
100 static void overflo(void);
101 static void cfoll(NODE *v);
102 static void freetr(NODE *p);
103 #ifdef DEBUG
104 #define ddump_table(t, s) dump_table(t, s)
105 #else
106 #define ddump_table(t, s)
107 #endif
108
109 struct fa *
makedfa(p)110 makedfa(p) /* returns dfa for tree pointed to by p */
111 NODE *p;
112 {
113 NODE *p1;
114 struct fa *fap;
115 p1 = op2(CAT, op2(STAR, op2(DOT, (NODE *) 0,
116 (NODE *) 0), (NODE *) 0), p);
117 /* put DOT STAR in front of reg. exp. */
118 p1 = op2(FINAL, p1, (NODE *) 0); /* install FINAL NODE */
119
120
121 line = 0;
122 penter(p1); /* enter parent pointers and leaf indices */
123 point[line] = p1; /* FINAL NODE */
124 setvec[0] = 1; /* for initial DOT STAR */
125 cfoll(p1); /* set up follow sets */
126 fap = cgotofn();
127 freetr(p1); /* add this when alloc works */
128 return (fap);
129 }
130
131 static void
penter(NODE * p)132 penter(NODE *p) /* set up parent pointers and leaf indices */
133 {
134 switch (type(p)) {
135 LEAF
136 left(p) = (NODE *)line;
137 point[line++] = p;
138 break;
139 UNARY
140 penter(left(p));
141 parent(left(p)) = p;
142 break;
143 case CAT:
144 case OR:
145 penter(left(p));
146 penter(right(p));
147 parent(left(p)) = p;
148 parent(right(p)) = p;
149 break;
150 default:
151 error(FATAL, "unknown type %d in penter\n", type(p));
152 break;
153 }
154 }
155
156 static void
freetr(NODE * p)157 freetr(NODE *p) /* free parse tree and follow sets */
158 {
159 switch (type(p)) {
160 LEAF
161 xfree(foll[(int)left(p)]);
162 xfree(p);
163 break;
164 UNARY
165 freetr(left(p));
166 xfree(p);
167 break;
168 case CAT:
169 case OR:
170 freetr(left(p));
171 freetr(right(p));
172 xfree(p);
173 break;
174 default:
175 error(FATAL, "unknown type %d in freetr", type(p));
176 break;
177 }
178 }
179 ccl_chars_t *
cclenter(wchar_t * p)180 cclenter(wchar_t *p)
181 {
182 int i, cn;
183 wchar_t c, pc;
184 wchar_t *op;
185 ccl_chars_t *new;
186 ccl_chars_t chars[MAXLIN];
187
188 op = p;
189 i = 0;
190 while ((c = *p++) != 0) {
191 if (c == '-' && i > 0) {
192 if (*p != 0) {
193 /*
194 * If there are not in same code set, the
195 * class should be ignore (make two independent
196 * characters)!
197 */
198 c = *p++;
199 cn = wcsetno(pc);
200 if (cn != wcsetno(c) || pc > c)
201 goto char_array;
202 i = insert_table(chars, i, cn, pc, cn, c);
203 continue;
204 }
205 }
206 char_array:
207 if (i >= MAXLIN)
208 overflo();
209 cn = wcsetno(c);
210 i = insert_table(chars, i, cn, c, cn, c);
211 pc = c;
212 }
213 dprintf("cclenter: in = |%ws|, ", op, NULL, NULL);
214 xfree(op);
215 i = (i + 1) * sizeof (ccl_chars_t);
216 if ((new = (ccl_chars_t *)malloc(i)) == NULL)
217 error(FATAL, "out of space in cclenter on %s", op);
218 (void) memcpy((char *)new, (char *)chars, i);
219 ddump_table(chars, i / 4);
220
221
222 return (new);
223 }
224
225 static void
overflo(void)226 overflo(void)
227 {
228 error(FATAL, "regular expression too long\n");
229 }
230
231 static void
cfoll(NODE * v)232 cfoll(NODE *v) /* enter follow set of each leaf of vertex v into foll[leaf] */
233 {
234 int i;
235 int prev;
236 int *add();
237
238
239 switch (type(v)) {
240 LEAF
241 setcnt = 0;
242 for (i = 1; i <= line; i++)
243 setvec[i] = 0;
244 follow(v);
245 foll[(int)left(v)] = add(setcnt);
246 break;
247 UNARY
248 cfoll(left(v));
249 break;
250 case CAT:
251 case OR:
252 cfoll(left(v));
253 cfoll(right(v));
254 break;
255 default:
256 error(FATAL, "unknown type %d in cfoll", type(v));
257 }
258 }
259
260 int
first(NODE * p)261 first(NODE *p) /* collects initially active leaves of p into setvec */
262 /* returns 0 or 1 depending on whether p matches empty string */
263 {
264 int b;
265
266
267 switch (type(p)) {
268 LEAF
269 if (setvec[(int)left(p)] != 1) {
270 setvec[(int)left(p)] = 1;
271 setcnt++;
272 }
273 if (type(p) == CCL &&
274 (*(ccl_chars_t *)right(p)).cc_cs == (wchar_t)0x0)
275 return (0); /* empty CCL */
276 else return (1);
277 case FINAL:
278 case PLUS:
279 if (first(left(p)) == 0)
280 return (0);
281 return (1);
282 case STAR:
283 case QUEST:
284 first(left(p));
285 return (0);
286 case CAT:
287 if (first(left(p)) == 0 && first(right(p)) == 0)
288 return (0);
289 return (1);
290 case OR:
291 b = first(right(p));
292 if (first(left(p)) == 0 || b == 0)
293 return (0);
294 return (1);
295 }
296 error(FATAL, "unknown type %d in first\n", type(p));
297 return (-1);
298 }
299
300 static void
follow(NODE * v)301 follow(NODE *v)
302 /* collects leaves that can follow v into setvec */
303 {
304 NODE *p;
305
306
307 if (type(v) == FINAL)
308 return;
309 p = parent(v);
310 switch (type(p)) {
311 case STAR:
312 case PLUS: first(v);
313 follow(p);
314 return;
315
316
317 case OR:
318 case QUEST: follow(p);
319 return;
320
321
322 case CAT: if (v == left(p)) { /* v is left child of p */
323 if (first(right(p)) == 0) {
324 follow(p);
325 return;
326 }
327 } else /* v is right child */
328 follow(p);
329 return;
330 case FINAL: if (setvec[line] != 1) {
331 setvec[line] = 1;
332 setcnt++;
333 }
334 return;
335 }
336 }
337
338
339 /*
340 * There are three type of functions for checking member ship. Because I have
341 * been changed structure of CCL tables. And some CCL tables end up with NULLs
342 * but someone has length and will includes NULLs in table as one of data.
343 * Please note, CCL table which has a length data and data will include NULLs,
344 * it only used within a this source file("b.c").
345 */
346
347 int /* is cs thru ce in s? */
ccl_member(int ns,wchar_t cs,int ne,wchar_t ce,ccl_chars_t * s)348 ccl_member(int ns, wchar_t cs, int ne, wchar_t ce, ccl_chars_t *s)
349 {
350 /*
351 * The specified range(cs, ce) must be beside the range between
352 * s->cc_start and s->cc_end to determine member.
353 */
354 while (s->cc_cs || s->cc_ce) {
355 if (MLCMPLE(s->cc_ns, s->cc_cs, ns, cs) &&
356 MLCMPLE(ne, ce, s->cc_ne, s->cc_ce))
357 return (1);
358 s++;
359 }
360 return (0);
361 }
362
363
364 static int /* is cs thru ce in s? */
ccln_member(int ns,wchar_t cs,int ne,wchar_t ce,ccl_chars_t * s,int n)365 ccln_member(int ns, wchar_t cs, int ne, wchar_t ce, ccl_chars_t *s, int n)
366 {
367 /*
368 * The specified range(cs, ce) must be beside the range between
369 * s->cc_start and s->cc_end to determine member.
370 */
371 while (n-- > 0) {
372 if (MLCMPLE(s->cc_ns, s->cc_cs, ns, cs) &&
373 MLCMPLE(ne, ce, s->cc_ne, s->cc_ce))
374 return (1);
375 s++;
376 }
377 return (0);
378 }
379
380
381 int
member(wchar_t c,wchar_t * s)382 member(wchar_t c, wchar_t *s) /* is c in s? */
383 {
384 while (*s)
385 if (c == *s++)
386 return (1);
387 return (0);
388 }
389
390 int
notin(int ** array,int n,int * prev)391 notin(int **array, int n, int *prev) /* is setvec in array[0] thru array[n]? */
392 {
393 int i, j;
394 int *ptr;
395 for (i = 0; i <= n; i++) {
396 ptr = array[i];
397 if (*ptr == setcnt) {
398 for (j = 0; j < setcnt; j++)
399 if (setvec[*(++ptr)] != 1) goto nxt;
400 *prev = i;
401 return (0);
402 }
403 nxt: /* dummy */;
404 }
405 return (1);
406 }
407
408
409 int *
add(int n)410 add(int n)
411 { /* remember setvec */
412 int *ptr, *p;
413 int i;
414 if ((p = ptr = (int *)malloc((n+1)*sizeof (int))) == NULL)
415 overflo();
416 *ptr = n;
417 dprintf("add(%d)\n", n, NULL, NULL);
418 for (i = 1; i <= line; i++)
419 if (setvec[i] == 1) {
420 *(++ptr) = i;
421 dprintf(" ptr = %o, *ptr = %d, i = %d\n", ptr, *ptr, i);
422 }
423 dprintf("\n", NULL, NULL, NULL);
424 return (p);
425 }
426
427
428 struct fa *
cgotofn()429 cgotofn()
430 {
431 int i, k;
432 int *ptr;
433 int ns, ne;
434 wchar_t cs, ce;
435 ccl_chars_t *p;
436 NODE *cp;
437 int j, n, s, ind, numtrans;
438 int finflg;
439 int curpos, num, prev;
440 struct fa *where[NSTATES];
441
442
443 struct {
444 ccl_chars_t cc;
445 int n;
446 } fatab[257];
447 struct fa *pfa;
448
449
450 char index[MAXLIN];
451 char iposns[MAXLIN];
452 int sposns[MAXLIN];
453 int spmax, spinit;
454 ccl_chars_t symbol[NCHARS];
455 ccl_chars_t isyms[NCHARS];
456 ccl_chars_t ssyms[NCHARS];
457 int ssmax, symax, ismax, ssinit;
458
459
460 wchar_t hat;
461 int hatcn;
462
463
464 for (i = 0; i <= line; i++) index[i] = iposns[i] = setvec[i] = 0;
465 isyms[0].cc_cs = isyms[0].cc_ce = (wchar_t)0x0;
466 for (i = 0; i < NCHARS; i++)
467 isyms[i] = symbol[i] = ssyms[i] = isyms[0];
468 symax = 0;
469 setcnt = 0;
470 /* compute initial positions and symbols of state 0 */
471 ismax = 0;
472 ssmax = 0;
473 ptr = state[0] = foll[0];
474 spinit = *ptr;
475 hat = HAT;
476 hatcn = wcsetno(hat);
477 for (i = 0; i < spinit; i++) {
478 curpos = *(++ptr);
479 sposns[i] = curpos;
480 iposns[curpos] = 1;
481 cp = point[curpos];
482 dprintf("i= %d, spinit = %d, curpos = %d\n", i, spinit, curpos);
483 switch (type(cp)) {
484 case CHAR:
485 k = (int)right(cp);
486 ns = wcsetno(k);
487 if (! ccln_member(ns, k, ns, k,
488 isyms, ismax)) {
489 ismax = insert_table(isyms, ismax,
490 ns, k, ns, k);
491 }
492 ssyms[ssmax].cc_ns = ns;
493 ssyms[ssmax].cc_cs = k;
494 ssyms[ssmax].cc_ne = ns;
495 ssyms[ssmax++].cc_ce = k;
496 break;
497 case DOT:
498 cs = WC_VERY_SMALL;
499 ns = 0;
500 ce = HAT - 1;
501 ne = hatcn;
502 if (! ccln_member(ns, cs, ne, ce,
503 isyms, ismax)) {
504 ismax = insert_table(isyms, ismax,
505 ns, cs, ne, ce);
506 }
507 ssyms[ssmax].cc_cs = cs;
508 ssyms[ssmax].cc_ns = ns;
509 ssyms[ssmax].cc_ce = ce;
510 ssyms[ssmax++].cc_ne = ne;
511 cs = HAT + 1;
512 ns = hatcn;
513 ce = WC_VERY_LARGE;
514 ne = MAX_CODESET;
515 if (! ccln_member(ns, cs, ne, ce,
516 isyms, ismax)) {
517 ismax = insert_table(isyms, ismax,
518 ns, cs, ne, ce);
519 }
520 ssyms[ssmax].cc_cs = cs;
521 ssyms[ssmax].cc_ns = ns;
522 ssyms[ssmax].cc_ce = ce;
523 ssyms[ssmax++].cc_ne = ne;
524 break;
525 case CCL:
526 cs = HAT;
527 ns = hatcn;
528 for (p = (ccl_chars_t *)right(cp);
529 p->cc_cs; p++) {
530 if ((p->cc_ns != ns ||\
531 p->cc_cs != cs) &&\
532 !ccln_member(p->cc_ns, p->cc_cs,
533 p->cc_ne, p->cc_ce, isyms, ismax)) {
534 ismax = insert_table(isyms,
535 ismax, p->cc_ns, p->cc_cs, p->cc_ne, p->cc_ce);
536 }
537 ssyms[ssmax++] = *p;
538 }
539 break;
540 case NCCL:
541 ns = 0;
542 cs = WC_VERY_SMALL;
543 for (p = (ccl_chars_t *)right(cp);
544 p->cc_cs; p++) {
545 if ((ns != hatcn || p->cc_cs != HAT) &&
546 ! ccln_member(ns, cs,
547 p->cc_ns, p->cc_cs-1,
548 isyms, ismax)) {
549 ismax = insert_table(isyms,
550 ismax,
551 ns, cs,
552 p->cc_ns,
553 p->cc_cs-1);
554 }
555 ssyms[ssmax].cc_ns = ns;
556 ssyms[ssmax].cc_cs = cs;
557 ssyms[ssmax].cc_ne = p->cc_ns;
558 ssyms[ssmax++].cc_ce = p->cc_cs-1;
559 if (p->cc_ce == (wchar_t)0x0) {
560 ns = p->cc_ns;
561 cs = p->cc_cs + 1;
562
563 } else {
564 ns = p->cc_ne;
565 cs = p->cc_ce + 1;
566 }
567 }
568 if ((ns != hatcn || cs != HAT) &&
569 ! ccln_member(ns, cs,
570 MAX_CODESET, WC_VERY_LARGE,
571 isyms, ismax)) {
572 ismax = insert_table(isyms, ismax,
573 ns, cs, MAX_CODESET,
574 WC_VERY_LARGE);
575 }
576 ssyms[ssmax].cc_ns = ns;
577 ssyms[ssmax].cc_cs = cs;
578 ssyms[ssmax].cc_ne = MAX_CODESET;
579 ssyms[ssmax++].cc_ce = WC_VERY_LARGE;
580 break;
581 }
582 }
583 ssinit = ssmax;
584 symax = 0;
585 n = 0;
586 for (s = 0; s <= n; s++) {
587 dprintf("s = %d\n", s, NULL, NULL);
588 ind = 0;
589 numtrans = 0;
590 finflg = 0;
591 if (*(state[s] + *state[s]) == line) { /* s final? */
592 finflg = 1;
593 goto tenter;
594 }
595 spmax = spinit;
596 ssmax = ssinit;
597 ptr = state[s];
598 num = *ptr;
599 for (i = 0; i < num; i++) {
600 curpos = *(++ptr);
601 if (iposns[curpos] != 1 && index[curpos] != 1) {
602 index[curpos] = 1;
603 sposns[spmax++] = curpos;
604 }
605 cp = point[curpos];
606 switch (type(cp)) {
607 case CHAR:
608 k = (int)right(cp);
609 ns = wcsetno(k);
610 if (! ccln_member(ns, k, ns, k,
611 isyms, ismax) &&
612 ! ccln_member(ns, k, ns, k,
613 symbol, symax)) {
614 symax = insert_table(symbol,
615 symax,
616 ns, k,
617 ns, k);
618 }
619 ssyms[ssmax].cc_ns = ns;
620 ssyms[ssmax].cc_cs = k;
621 ssyms[ssmax].cc_ne = ns;
622 ssyms[ssmax++].cc_ce = k;
623 break;
624 case DOT:
625 cs = WC_VERY_SMALL;
626 ns = 0;
627 ce = HAT - 1;
628 ne = hatcn;
629 if (! ccln_member(ns, cs, ne, ce,
630 isyms, ismax) &&
631 ! ccln_member(ns, cs, ne, ce,
632 symbol, symax)) {
633 symax = insert_table(symbol,
634 symax,
635 ns, cs,
636 ne, ce);
637 }
638 ssyms[ssmax].cc_cs = cs;
639 ssyms[ssmax].cc_ns = ns;
640 ssyms[ssmax].cc_ce = ce;
641 ssyms[ssmax++].cc_ne = ne;
642 cs = HAT + 1;
643 ns = hatcn;
644 ce = WC_VERY_LARGE;
645 ne = MAX_CODESET;
646 if (! ccln_member(ns, cs, ne, ce,
647 isyms, ismax) &&
648 ! ccln_member(ns, cs, ne, ce,
649 symbol, symax)) {
650 symax = insert_table(symbol,
651 symax,
652 ns, cs,
653 ne, ce);
654 }
655 ssyms[ssmax].cc_cs = cs;
656 ssyms[ssmax].cc_ns = ns;
657 ssyms[ssmax].cc_ce = ce;
658 ssyms[ssmax++].cc_ne = ne;
659 break;
660 case CCL:
661 cs = HAT;
662 ns = hatcn;
663 for (p = (ccl_chars_t *)right(cp);
664 p->cc_cs; p++) {
665 if ((p->cc_ns != ns ||
666 p->cc_cs != cs) &&
667 ! ccln_member(p->cc_ns,
668 p->cc_cs, p->cc_ne,
669 p->cc_ce, isyms, ismax) &&
670 !ccln_member(p->cc_ns, p->cc_cs,
671 p->cc_ne, p->cc_ce, symbol,
672 symax)) {
673 symax = insert_table(
674 symbol, symax, p->cc_ns,
675 p->cc_cs, p->cc_ne, p->cc_ce);
676 }
677 ssyms[ssmax++] = *p;
678 }
679 break;
680 case NCCL:
681 ns = 0;
682 cs = WC_VERY_SMALL;
683 for (p = (ccl_chars_t *)right(cp); p->cc_cs; p++) {
684 if ((p->cc_ns != hatcn || p->cc_cs != HAT) &&
685 ! ccln_member(ns, cs, p->cc_ns,
686 p->cc_cs-1, isyms, ismax) &&
687 ! ccln_member(ns, cs, p->cc_ns,
688 p->cc_cs-1, symbol, symax)) {
689 symax = insert_table(symbol,
690 symax, ns, cs, p->cc_ns, p->cc_cs-1);
691 }
692 ssyms[ssmax].cc_ns = ns;
693 ssyms[ssmax].cc_cs = cs;
694 ssyms[ssmax].cc_ne = p->cc_ns;
695 ssyms[ssmax++].cc_ce
696 = p->cc_cs-1;
697 if (p->cc_ce == (wchar_t)0x0) {
698 ns = p->cc_ns;
699 cs = p->cc_cs + 1;
700
701 } else {
702 ns = p->cc_ne;
703 cs = p->cc_ce + 1;
704 }
705 }
706 if ((ns != hatcn || cs != HAT) && ! ccln_member(ns, cs,
707 MAX_CODESET, WC_VERY_LARGE, isyms, ismax) &&
708 ! ccln_member(ns, cs, MAX_CODESET,
709 WC_VERY_LARGE, symbol, symax)) {
710 symax = insert_table(symbol, symax, ns, cs,
711 MAX_CODESET,
712 WC_VERY_LARGE);
713 }
714 ssyms[ssmax].cc_ns = ns;
715 ssyms[ssmax].cc_cs = cs;
716 ssyms[ssmax].cc_ne = MAX_CODESET;
717 ssyms[ssmax++].cc_ce = WC_VERY_LARGE;
718 break;
719 }
720 }
721 for (j = 0; j < ssmax; j++) { /* nextstate(s, ssyms[j]) */
722 ns = ssyms[j].cc_ns;
723 cs = ssyms[j].cc_cs;
724 ne = ssyms[j].cc_ne;
725 ce = ssyms[j].cc_ce;
726 dprintf("j = %d, cs = %o, ce = %o\n", j, cs, ce);
727 symax = delete_table(symbol, symax, ns, cs, ne, ce);
728 setcnt = 0;
729 for (k = 0; k <= line; k++) setvec[k] = 0;
730 for (i = 0; i < spmax; i++) {
731 index[sposns[i]] = 0;
732 cp = point[sposns[i]];
733 if ((k = type(cp)) != FINAL) {
734 if (k == CHAR && ns == ne && cs == ce &&
735 cs == (int)right(cp) ||
736 k == DOT || k == CCL &&
737 ccl_member(ns, cs, ne, ce,
738 (ccl_chars_t *)right(cp)) ||
739 k == NCCL &&
740 !ccl_member(ns, cs, ne, ce,
741 (ccl_chars_t *)right(cp))) {
742 ptr = foll[sposns[i]];
743 num = *ptr;
744 for (k = 0; k < num; k++) {
745 if (setvec[*(++ptr)] != 1 &&
746 iposns[*ptr] != 1) {
747 setvec[*ptr] = 1;
748 setcnt++;
749 }
750 }
751 }
752 }
753 } /* end nextstate */
754 if (notin(state, n, &prev)) {
755 if (n >= NSTATES - 1) {
756 printf("cgotofn: notin; state = %d, n = %d\n", state, n, NULL);
757 overflo();
758 }
759 state[++n] = add(setcnt);
760 dprintf(" delta(%d,[%o,%o])",
761 s, cs, ce);
762 dprintf(" = %d, ind = %d\n", n, ind+1, NULL);
763 fatab[++ind].cc.cc_ns = ns;
764 fatab[ind].cc.cc_cs = cs;
765 fatab[ind].cc.cc_ne = ne;
766 fatab[ind].cc.cc_ce = ce;
767 fatab[ind].n = n;
768 numtrans++;
769 } else {
770 if (prev != 0) {
771 dprintf(" delta(%d,[%o,%o])",
772 s, cs, ce);
773 dprintf("= %d, ind = %d\n",
774 prev, ind+1, NULL);
775 fatab[++ind].cc.cc_ns = ns;
776 fatab[ind].cc.cc_cs = cs;
777 fatab[ind].cc.cc_ne = ne;
778 fatab[ind].cc.cc_ce = ce;
779 fatab[ind].n = prev;
780 numtrans++;
781 }
782 }
783 }
784 tenter:
785 if ((pfa = (struct fa *)malloc((numtrans + 1)
786 * sizeof (struct fa))) == NULL)
787 overflo();
788 where[s] = pfa;
789 if (finflg)
790 pfa->cc.h = -1; /* s is a final state */
791 else
792 pfa->cc.h = numtrans;
793 pfa->st = 0;
794 for (i = 1, pfa += 1; i <= numtrans; i++, pfa++) {
795 pfa->cc.s = fatab[i].cc;
796 pfa->st = (struct fa *)fatab[i].n;
797 }
798 }
799 for (i = 0; i <= n; i++) {
800 if (i != 0) /* state[0] is freed later in freetr() */
801 xfree(state[i]); /* free state[i] */
802 pfa = where[i];
803 pfa->st = where[0];
804 dprintf("state %d: (%o)\n", i, pfa, NULL);
805 dprintf(" numtrans = %d, default = %o\n",
806 pfa->cc.h, pfa->st, NULL);
807 for (k = 1; k <= pfa->cc.h; k++) {
808 (pfa+k)->st = where[(int)(pfa+k)->st];
809 dprintf(" char = [%o,%o], nextstate = %o\n",
810 (pfa+k)->cc.s.cc_cs, (pfa+k)->cc.s.cc_ce,
811 (pfa+k)->st);
812 }
813 }
814 pfa = where[0];
815 if ((num = pfa->cc.h) < 0)
816 return (where[0]);
817 for (pfa += num; num; num--, pfa--)
818 if (pfa->cc.s.cc_ns == hatcn && pfa->cc.s.cc_cs == HAT) {
819 return (pfa->st);
820 }
821 return (where[0]);
822 }
823
824
825 /*
826 * Insert CCL entry to CCL table with maintain optimized order.
827 */
828 static int
insert_table(ccl_chars_t * table_base,int table_size,int ns,wchar_t cs,int ne,wchar_t ce)829 insert_table(ccl_chars_t *table_base, int table_size, int ns, wchar_t cs,
830 int ne, wchar_t ce)
831 {
832 int i;
833 int tns, tne;
834 wchar_t tcs, tce;
835 ccl_chars_t *table;
836 ccl_chars_t *saved_table;
837 int saved_i;
838
839
840
841
842 dprintf("Inserting {%o, %o} to table %o\n", cs, ce, table_base);
843 /*
844 * Searching the table to find out where should put the new item.
845 */
846 for (i = 0, table = table_base; i < table_size; i++, table++) {
847 tns = table->cc_ns;
848 tcs = table->cc_cs;
849 tne = table->cc_ne;
850 tce = table->cc_ce;
851 if (MLCMPLT(ne, ce, tns, (tcs - 1))) {
852 /*
853 * Quick! insert to font of current table entries.
854 */
855 qinsert:
856 table_size++;
857 for (; i < table_size; i++, table++) {
858 tns = table->cc_ns;
859 tcs = table->cc_cs;
860 tne = table->cc_ne;
861 tce = table->cc_ce;
862 table->cc_ns = ns;
863 table->cc_cs = cs;
864 table->cc_ne = ne;
865 table->cc_ce = ce;
866 ns = tns;
867 cs = tcs;
868 ne = tne;
869 ce = tce;
870 }
871 goto add_null;
872 } else if (MLCMPLE(tns, (tcs - 1), ns, cs) &&
873 MLCMPLE(ns, cs, tne, (tce + 1))) {
874 /*
875 * Starting point is within the current entry.
876 */
877 if (MLCMPGT(tns, tcs, ns, cs)) {
878 table->cc_ns = ns;
879 table->cc_cs = cs;
880 }
881 if (MLCMPLE(ne, ce, tne, tce)) {
882 return (table_size);
883 }
884 goto combine;
885 }
886 }
887
888
889 /*
890 * Adding new one to end of table.
891 */
892 table->cc_ns = ns;
893 table->cc_cs = cs;
894 table->cc_ne = ne;
895 table->cc_ce = ce;
896
897
898 table_size++;
899 goto add_null;
900
901
902
903
904 combine:
905 /*
906 * Check and try to combine the new entry with rest of entries.
907 */
908 if ((i + 1) >= table_size) {
909 table->cc_ne = ne;
910 table->cc_ce = ce;
911 return (table_size);
912 }
913
914
915 saved_table = table++;
916 saved_i = i++;
917
918
919 /*
920 * Finding the spot where we should put the end point.
921 */
922 for (; i < table_size; i++, table++) {
923 if (MLCMPLT(ne, ce, table->cc_ns, (table->cc_cs - 1))) {
924 break;
925 } else
926 if (MLCMPLE(table->cc_ns, (table->cc_cs - 1), ne, ce) &&
927 MLCMPLE(ne, ce, table->cc_ne, (table->cc_ce + 1))) {
928 /*
929 * Tack with this table.
930 */
931 if (MLCMPLT(ne, ce, table->cc_ne, table->cc_ce)) {
932 ne = table->cc_ne;
933 ce = table->cc_ce;
934 }
935 table++;
936 i++;
937 break;
938 }
939 }
940
941
942 saved_table->cc_ne = ne;
943 saved_table->cc_ce = ce;
944 saved_i = table_size - (i - saved_i - 1);
945
946
947 /*
948 * Moving the rest of entries.
949 */
950 for (; i < table_size; i++, table++)
951 *(++saved_table) = *table;
952 table_size = saved_i;
953
954
955 add_null:
956 table_base[table_size].cc_cs = (wchar_t)0x0;
957 table_base[table_size].cc_ce = (wchar_t)0x0;
958
959
960 return (table_size);
961 }
962
963
964
965
966 static int
delete_table(ccl_chars_t * table_base,int table_size,int ns,wchar_t cs,int ne,wchar_t ce)967 delete_table(ccl_chars_t *table_base, int table_size, int ns, wchar_t cs,
968 int ne, wchar_t ce)
969 {
970 int i;
971 int saved_i;
972 ccl_chars_t *table;
973 ccl_chars_t *saved_table;
974 int tns;
975 wchar_t tcs;
976 int tne;
977 wchar_t tce;
978
979
980
981
982 for (i = 0, table = table_base; i < table_size; i++, table++) {
983 tns = table->cc_ns;
984 tcs = table->cc_cs;
985 tne = table->cc_ne;
986 tce = table->cc_ce;
987 if (MLCMPLT(ne, ce, tns, tcs))
988 return (table_size);
989 else if (MLCMPLT(ne, ce, tne, tce)) {
990 if (MLCMPLE(ns, cs, tns, tcs)) {
991 /*
992 * Shrink type 1.
993 */
994 table->cc_ns = ne;
995 table->cc_cs = ce + 1;
996 return (table_size);
997
998 } else {
999 /*
1000 * Spliting !!
1001 */
1002 table->cc_ns = ne;
1003 table->cc_cs = ce + 1;
1004 tne = ns;
1005 tce = cs - 1;
1006 table_size++;
1007 for (; i < table_size; i++, table++) {
1008 ns = table->cc_ns;
1009 cs = table->cc_cs;
1010 ne = table->cc_ne;
1011 ce = table->cc_ce;
1012 table->cc_ns = tns;
1013 table->cc_cs = tcs;
1014 table->cc_ne = tne;
1015 table->cc_ce = tce;
1016 tns = ns;
1017 tcs = cs;
1018 tne = ne;
1019 tce = ce;
1020 }
1021 return (table_size);
1022 }
1023
1024 } else if (MLCMPLE(ns, cs, tne, tce)) {
1025 if (MLCMPGT(ns, cs, tns, tcs)) {
1026 /*
1027 * Shrink current table(type 2).
1028 */
1029 table->cc_ne = ns;
1030 table->cc_ce = cs - 1;
1031 table++;
1032 i++;
1033 }
1034 /*
1035 * Search for the end point.
1036 */
1037 saved_i = i;
1038 saved_table = table;
1039 for (; i < table_size; i++, table++) {
1040 if (MLCMPLT(ne, ce,
1041 table->cc_ns, table->cc_cs)) {
1042 /*
1043 * Easy point, no shrinks!
1044 */
1045 break;
1046
1047 } else if (MLCMPGT(table->cc_ne, table->cc_ce,
1048 ne, ce)) {
1049 /*
1050 * Shrinking...
1051 */
1052 table->cc_ns = ne;
1053 table->cc_cs = ce + 1;
1054 break;
1055 }
1056
1057
1058 }
1059 /*
1060 * Moving(removing) backword.
1061 */
1062 saved_i = table_size - (i - saved_i);
1063 for (; i < table_size; i++)
1064 *saved_table++ = *table++;
1065 return (saved_i);
1066 }
1067 }
1068 return (table_size);
1069 }
1070
1071
1072 #ifdef DEBUG
dump_table(ccl_chars_t * table,int size)1073 dump_table(ccl_chars_t *table, int size)
1074 {
1075 int i;
1076
1077
1078
1079
1080 if (! dbg)
1081 return;
1082
1083
1084 printf("Duming table %o with size %d\n", table, size);
1085 size++; /* To watch out NULL */
1086 for (i = 0; i < size; i++, table++) {
1087 printf("{%3o, %3o}, ", table->cc_cs, table->cc_ce);
1088 }
1089 printf("\n");
1090 }
1091 #endif /* DEBUG */
1092
1093
1094
1095 int
match(struct fa * pfa,wchar_t * p)1096 match(struct fa *pfa, wchar_t *p)
1097 {
1098 int count;
1099 int n, ns, ne;
1100 wchar_t c, cs, ce;
1101
1102
1103 if (p == 0)
1104 return (0);
1105 if (pfa->cc.h == 1) { /* fast test for first character, if possible */
1106 ns = (++pfa)->cc.s.cc_ns;
1107 cs = (pfa)->cc.s.cc_cs;
1108 ne = (pfa)->cc.s.cc_ne;
1109 ce = (pfa)->cc.s.cc_ce;
1110 do {
1111 c = *p;
1112 n = wcsetno(c);
1113 if (MLCMPLE(ns, cs, n, c) &&
1114 MLCMPLE(n, c, ne, ce)) {
1115 p++;
1116 pfa = pfa->st;
1117 goto adv;
1118 }
1119 } while (*p++ != 0);
1120 return (0);
1121 }
1122 adv: if ((count = pfa->cc.h) < 0)
1123 return (1);
1124 do {
1125 c = *p;
1126 n = wcsetno(c);
1127 for (pfa += count; count; count--, pfa--) {
1128 ns = (pfa)->cc.s.cc_ns;
1129 cs = (pfa)->cc.s.cc_cs;
1130 ne = (pfa)->cc.s.cc_ne;
1131 ce = (pfa)->cc.s.cc_ce;
1132 if (MLCMPLE(ns, cs, n, c) && MLCMPLE(n, c, ne, ce))
1133 break;
1134 }
1135 pfa = pfa->st;
1136 if ((count = pfa->cc.h) < 0)
1137 return (1);
1138 } while (*p++ != 0);
1139 return (0);
1140 }
1141