1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22
23 /*
24 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
25 * Use is subject to license terms.
26 */
27
28 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
29 /* All Rights Reserved */
30
31 #include "awk.def"
32 #include "stdio.h"
33 #include "awk.h"
34 #include <stdlib.h>
35
36
37 extern NODE *op2();
38 extern struct fa *cgotofn();
39 #define MAXLIN 256
40 #define NCHARS 128
41 #define NSTATES 256
42
43
44 #define type(v) v->nobj
45 #define left(v) v->narg[0]
46 #define right(v) v->narg[1]
47 #define parent(v) v->nnext
48
49
50 #define LEAF case CCL: case NCCL: case CHAR: case DOT:
51 #define UNARY case FINAL: case STAR: case PLUS: case QUEST:
52
53
54 /*
55 * encoding in tree NODEs:
56 * leaf (CCL, NCCL, CHAR, DOT): left is index,
57 * right contains value or pointer to value
58 * unary (FINAL, STAR, PLUS, QUEST): left is child, right is null
59 * binary (CAT, OR): left and right are children
60 * parent contains pointer to parent
61 */
62
63
64 struct fa {
65 union {
66 ccl_chars_t s;
67 int h;
68 } cc;
69 #define MLCMPLT(m1, l1, m2, l2) ((m1 != m2 &&\
70 (int)m1 < (int)m2) ||\
71 (m1 == m2 && (int)l1 < (int)l2))
72 #define MLCMPLE(m1, l1, m2, l2) ((m1 != m2 &&\
73 (int)m1 <= (int)m2) ||\
74 (m1 == m2 && (int)l1 <= (int)l2))
75 #define MLCMPGT(m1, l1, m2, l2) ((m1 != m2 &&\
76 (int)m1 > (int)m2) ||\
77 (m1 == m2 && (int)l1 > (int)l2))
78 #define MAX_CODESET 3
79 struct fa *st;
80 };
81
82
83 int *state[NSTATES];
84 int *foll[MAXLIN];
85 int setvec[MAXLIN];
86 NODE *point[MAXLIN];
87
88
89 int setcnt;
90 int line;
91
92
93 static int ccln_member();
94 static int insert_table();
95 static int delete_table();
96 static void penter(NODE *p);
97 static void follow(NODE *v);
98 static void overflo(void);
99 static void cfoll(NODE *v);
100 static void freetr(NODE *p);
101 #ifdef DEBUG
102 #define ddump_table(t, s) dump_table(t, s)
103 #else
104 #define ddump_table(t, s)
105 #endif
106
107 struct fa *
makedfa(p)108 makedfa(p) /* returns dfa for tree pointed to by p */
109 NODE *p;
110 {
111 NODE *p1;
112 struct fa *fap;
113 p1 = op2(CAT, op2(STAR, op2(DOT, (NODE *) 0,
114 (NODE *) 0), (NODE *) 0), p);
115 /* put DOT STAR in front of reg. exp. */
116 p1 = op2(FINAL, p1, (NODE *) 0); /* install FINAL NODE */
117
118
119 line = 0;
120 penter(p1); /* enter parent pointers and leaf indices */
121 point[line] = p1; /* FINAL NODE */
122 setvec[0] = 1; /* for initial DOT STAR */
123 cfoll(p1); /* set up follow sets */
124 fap = cgotofn();
125 freetr(p1); /* add this when alloc works */
126 return (fap);
127 }
128
129 static void
penter(NODE * p)130 penter(NODE *p) /* set up parent pointers and leaf indices */
131 {
132 switch (type(p)) {
133 LEAF
134 left(p) = (NODE *)line;
135 point[line++] = p;
136 break;
137 UNARY
138 penter(left(p));
139 parent(left(p)) = p;
140 break;
141 case CAT:
142 case OR:
143 penter(left(p));
144 penter(right(p));
145 parent(left(p)) = p;
146 parent(right(p)) = p;
147 break;
148 default:
149 error(FATAL, "unknown type %d in penter\n", type(p));
150 break;
151 }
152 }
153
154 static void
freetr(NODE * p)155 freetr(NODE *p) /* free parse tree and follow sets */
156 {
157 switch (type(p)) {
158 LEAF
159 xfree(foll[(int)left(p)]);
160 xfree(p);
161 break;
162 UNARY
163 freetr(left(p));
164 xfree(p);
165 break;
166 case CAT:
167 case OR:
168 freetr(left(p));
169 freetr(right(p));
170 xfree(p);
171 break;
172 default:
173 error(FATAL, "unknown type %d in freetr", type(p));
174 break;
175 }
176 }
177 ccl_chars_t *
cclenter(wchar_t * p)178 cclenter(wchar_t *p)
179 {
180 int i, cn;
181 wchar_t c, pc;
182 wchar_t *op;
183 ccl_chars_t *new;
184 ccl_chars_t chars[MAXLIN];
185
186 op = p;
187 i = 0;
188 while ((c = *p++) != 0) {
189 if (c == '-' && i > 0) {
190 if (*p != 0) {
191 /*
192 * If there are not in same code set, the
193 * class should be ignore (make two independent
194 * characters)!
195 */
196 c = *p++;
197 cn = wcsetno(pc);
198 if (cn != wcsetno(c) || pc > c)
199 goto char_array;
200 i = insert_table(chars, i, cn, pc, cn, c);
201 continue;
202 }
203 }
204 char_array:
205 if (i >= MAXLIN)
206 overflo();
207 cn = wcsetno(c);
208 i = insert_table(chars, i, cn, c, cn, c);
209 pc = c;
210 }
211 dprintf("cclenter: in = |%ws|, ", op, NULL, NULL);
212 xfree(op);
213 i = (i + 1) * sizeof (ccl_chars_t);
214 if ((new = (ccl_chars_t *)malloc(i)) == NULL)
215 error(FATAL, "out of space in cclenter on %s", op);
216 (void) memcpy((char *)new, (char *)chars, i);
217 ddump_table(chars, i / 4);
218
219
220 return (new);
221 }
222
223 static void
overflo(void)224 overflo(void)
225 {
226 error(FATAL, "regular expression too long\n");
227 }
228
229 static void
cfoll(NODE * v)230 cfoll(NODE *v) /* enter follow set of each leaf of vertex v into foll[leaf] */
231 {
232 int i;
233 int prev;
234 int *add();
235
236
237 switch (type(v)) {
238 LEAF
239 setcnt = 0;
240 for (i = 1; i <= line; i++)
241 setvec[i] = 0;
242 follow(v);
243 foll[(int)left(v)] = add(setcnt);
244 break;
245 UNARY
246 cfoll(left(v));
247 break;
248 case CAT:
249 case OR:
250 cfoll(left(v));
251 cfoll(right(v));
252 break;
253 default:
254 error(FATAL, "unknown type %d in cfoll", type(v));
255 }
256 }
257
258 int
first(NODE * p)259 first(NODE *p) /* collects initially active leaves of p into setvec */
260 /* returns 0 or 1 depending on whether p matches empty string */
261 {
262 int b;
263
264
265 switch (type(p)) {
266 LEAF
267 if (setvec[(int)left(p)] != 1) {
268 setvec[(int)left(p)] = 1;
269 setcnt++;
270 }
271 if (type(p) == CCL &&
272 (*(ccl_chars_t *)right(p)).cc_cs == (wchar_t)0x0)
273 return (0); /* empty CCL */
274 else return (1);
275 case FINAL:
276 case PLUS:
277 if (first(left(p)) == 0)
278 return (0);
279 return (1);
280 case STAR:
281 case QUEST:
282 first(left(p));
283 return (0);
284 case CAT:
285 if (first(left(p)) == 0 && first(right(p)) == 0)
286 return (0);
287 return (1);
288 case OR:
289 b = first(right(p));
290 if (first(left(p)) == 0 || b == 0)
291 return (0);
292 return (1);
293 }
294 error(FATAL, "unknown type %d in first\n", type(p));
295 return (-1);
296 }
297
298 static void
follow(NODE * v)299 follow(NODE *v)
300 /* collects leaves that can follow v into setvec */
301 {
302 NODE *p;
303
304
305 if (type(v) == FINAL)
306 return;
307 p = parent(v);
308 switch (type(p)) {
309 case STAR:
310 case PLUS: first(v);
311 follow(p);
312 return;
313
314
315 case OR:
316 case QUEST: follow(p);
317 return;
318
319
320 case CAT: if (v == left(p)) { /* v is left child of p */
321 if (first(right(p)) == 0) {
322 follow(p);
323 return;
324 }
325 } else /* v is right child */
326 follow(p);
327 return;
328 case FINAL: if (setvec[line] != 1) {
329 setvec[line] = 1;
330 setcnt++;
331 }
332 return;
333 }
334 }
335
336
337 /*
338 * There are three type of functions for checking member ship. Because I have
339 * been changed structure of CCL tables. And some CCL tables end up with NULLs
340 * but someone has length and will includes NULLs in table as one of data.
341 * Please note, CCL table which has a length data and data will include NULLs,
342 * it only used within a this source file("b.c").
343 */
344
345 int /* is cs thru ce in s? */
ccl_member(int ns,wchar_t cs,int ne,wchar_t ce,ccl_chars_t * s)346 ccl_member(int ns, wchar_t cs, int ne, wchar_t ce, ccl_chars_t *s)
347 {
348 /*
349 * The specified range(cs, ce) must be beside the range between
350 * s->cc_start and s->cc_end to determine member.
351 */
352 while (s->cc_cs || s->cc_ce) {
353 if (MLCMPLE(s->cc_ns, s->cc_cs, ns, cs) &&
354 MLCMPLE(ne, ce, s->cc_ne, s->cc_ce))
355 return (1);
356 s++;
357 }
358 return (0);
359 }
360
361
362 static int /* is cs thru ce in s? */
ccln_member(int ns,wchar_t cs,int ne,wchar_t ce,ccl_chars_t * s,int n)363 ccln_member(int ns, wchar_t cs, int ne, wchar_t ce, ccl_chars_t *s, int n)
364 {
365 /*
366 * The specified range(cs, ce) must be beside the range between
367 * s->cc_start and s->cc_end to determine member.
368 */
369 while (n-- > 0) {
370 if (MLCMPLE(s->cc_ns, s->cc_cs, ns, cs) &&
371 MLCMPLE(ne, ce, s->cc_ne, s->cc_ce))
372 return (1);
373 s++;
374 }
375 return (0);
376 }
377
378
379 int
member(wchar_t c,wchar_t * s)380 member(wchar_t c, wchar_t *s) /* is c in s? */
381 {
382 while (*s)
383 if (c == *s++)
384 return (1);
385 return (0);
386 }
387
388 int
notin(int ** array,int n,int * prev)389 notin(int **array, int n, int *prev) /* is setvec in array[0] thru array[n]? */
390 {
391 int i, j;
392 int *ptr;
393 for (i = 0; i <= n; i++) {
394 ptr = array[i];
395 if (*ptr == setcnt) {
396 for (j = 0; j < setcnt; j++)
397 if (setvec[*(++ptr)] != 1) goto nxt;
398 *prev = i;
399 return (0);
400 }
401 nxt: /* dummy */;
402 }
403 return (1);
404 }
405
406
407 int *
add(int n)408 add(int n)
409 { /* remember setvec */
410 int *ptr, *p;
411 int i;
412 if ((p = ptr = (int *)malloc((n+1)*sizeof (int))) == NULL)
413 overflo();
414 *ptr = n;
415 dprintf("add(%d)\n", n, NULL, NULL);
416 for (i = 1; i <= line; i++)
417 if (setvec[i] == 1) {
418 *(++ptr) = i;
419 dprintf(" ptr = %o, *ptr = %d, i = %d\n", ptr, *ptr, i);
420 }
421 dprintf("\n", NULL, NULL, NULL);
422 return (p);
423 }
424
425
426 struct fa *
cgotofn()427 cgotofn()
428 {
429 int i, k;
430 int *ptr;
431 int ns, ne;
432 wchar_t cs, ce;
433 ccl_chars_t *p;
434 NODE *cp;
435 int j, n, s, ind, numtrans;
436 int finflg;
437 int curpos, num, prev;
438 struct fa *where[NSTATES];
439
440
441 struct {
442 ccl_chars_t cc;
443 int n;
444 } fatab[257];
445 struct fa *pfa;
446
447
448 char index[MAXLIN];
449 char iposns[MAXLIN];
450 int sposns[MAXLIN];
451 int spmax, spinit;
452 ccl_chars_t symbol[NCHARS];
453 ccl_chars_t isyms[NCHARS];
454 ccl_chars_t ssyms[NCHARS];
455 int ssmax, symax, ismax, ssinit;
456
457
458 wchar_t hat;
459 int hatcn;
460
461
462 for (i = 0; i <= line; i++) index[i] = iposns[i] = setvec[i] = 0;
463 isyms[0].cc_cs = isyms[0].cc_ce = (wchar_t)0x0;
464 for (i = 0; i < NCHARS; i++)
465 isyms[i] = symbol[i] = ssyms[i] = isyms[0];
466 symax = 0;
467 setcnt = 0;
468 /* compute initial positions and symbols of state 0 */
469 ismax = 0;
470 ssmax = 0;
471 ptr = state[0] = foll[0];
472 spinit = *ptr;
473 hat = HAT;
474 hatcn = wcsetno(hat);
475 for (i = 0; i < spinit; i++) {
476 curpos = *(++ptr);
477 sposns[i] = curpos;
478 iposns[curpos] = 1;
479 cp = point[curpos];
480 dprintf("i= %d, spinit = %d, curpos = %d\n", i, spinit, curpos);
481 switch (type(cp)) {
482 case CHAR:
483 k = (int)right(cp);
484 ns = wcsetno(k);
485 if (! ccln_member(ns, k, ns, k,
486 isyms, ismax)) {
487 ismax = insert_table(isyms, ismax,
488 ns, k, ns, k);
489 }
490 ssyms[ssmax].cc_ns = ns;
491 ssyms[ssmax].cc_cs = k;
492 ssyms[ssmax].cc_ne = ns;
493 ssyms[ssmax++].cc_ce = k;
494 break;
495 case DOT:
496 cs = WC_VERY_SMALL;
497 ns = 0;
498 ce = HAT - 1;
499 ne = hatcn;
500 if (! ccln_member(ns, cs, ne, ce,
501 isyms, ismax)) {
502 ismax = insert_table(isyms, ismax,
503 ns, cs, ne, ce);
504 }
505 ssyms[ssmax].cc_cs = cs;
506 ssyms[ssmax].cc_ns = ns;
507 ssyms[ssmax].cc_ce = ce;
508 ssyms[ssmax++].cc_ne = ne;
509 cs = HAT + 1;
510 ns = hatcn;
511 ce = WC_VERY_LARGE;
512 ne = MAX_CODESET;
513 if (! ccln_member(ns, cs, ne, ce,
514 isyms, ismax)) {
515 ismax = insert_table(isyms, ismax,
516 ns, cs, ne, ce);
517 }
518 ssyms[ssmax].cc_cs = cs;
519 ssyms[ssmax].cc_ns = ns;
520 ssyms[ssmax].cc_ce = ce;
521 ssyms[ssmax++].cc_ne = ne;
522 break;
523 case CCL:
524 cs = HAT;
525 ns = hatcn;
526 for (p = (ccl_chars_t *)right(cp);
527 p->cc_cs; p++) {
528 if ((p->cc_ns != ns ||\
529 p->cc_cs != cs) &&\
530 !ccln_member(p->cc_ns, p->cc_cs,
531 p->cc_ne, p->cc_ce, isyms, ismax)) {
532 ismax = insert_table(isyms,
533 ismax, p->cc_ns, p->cc_cs, p->cc_ne, p->cc_ce);
534 }
535 ssyms[ssmax++] = *p;
536 }
537 break;
538 case NCCL:
539 ns = 0;
540 cs = WC_VERY_SMALL;
541 for (p = (ccl_chars_t *)right(cp);
542 p->cc_cs; p++) {
543 if ((ns != hatcn || p->cc_cs != HAT) &&
544 ! ccln_member(ns, cs,
545 p->cc_ns, p->cc_cs-1,
546 isyms, ismax)) {
547 ismax = insert_table(isyms,
548 ismax,
549 ns, cs,
550 p->cc_ns,
551 p->cc_cs-1);
552 }
553 ssyms[ssmax].cc_ns = ns;
554 ssyms[ssmax].cc_cs = cs;
555 ssyms[ssmax].cc_ne = p->cc_ns;
556 ssyms[ssmax++].cc_ce = p->cc_cs-1;
557 if (p->cc_ce == (wchar_t)0x0) {
558 ns = p->cc_ns;
559 cs = p->cc_cs + 1;
560
561 } else {
562 ns = p->cc_ne;
563 cs = p->cc_ce + 1;
564 }
565 }
566 if ((ns != hatcn || cs != HAT) &&
567 ! ccln_member(ns, cs,
568 MAX_CODESET, WC_VERY_LARGE,
569 isyms, ismax)) {
570 ismax = insert_table(isyms, ismax,
571 ns, cs, MAX_CODESET,
572 WC_VERY_LARGE);
573 }
574 ssyms[ssmax].cc_ns = ns;
575 ssyms[ssmax].cc_cs = cs;
576 ssyms[ssmax].cc_ne = MAX_CODESET;
577 ssyms[ssmax++].cc_ce = WC_VERY_LARGE;
578 break;
579 }
580 }
581 ssinit = ssmax;
582 symax = 0;
583 n = 0;
584 for (s = 0; s <= n; s++) {
585 dprintf("s = %d\n", s, NULL, NULL);
586 ind = 0;
587 numtrans = 0;
588 finflg = 0;
589 if (*(state[s] + *state[s]) == line) { /* s final? */
590 finflg = 1;
591 goto tenter;
592 }
593 spmax = spinit;
594 ssmax = ssinit;
595 ptr = state[s];
596 num = *ptr;
597 for (i = 0; i < num; i++) {
598 curpos = *(++ptr);
599 if (iposns[curpos] != 1 && index[curpos] != 1) {
600 index[curpos] = 1;
601 sposns[spmax++] = curpos;
602 }
603 cp = point[curpos];
604 switch (type(cp)) {
605 case CHAR:
606 k = (int)right(cp);
607 ns = wcsetno(k);
608 if (! ccln_member(ns, k, ns, k,
609 isyms, ismax) &&
610 ! ccln_member(ns, k, ns, k,
611 symbol, symax)) {
612 symax = insert_table(symbol,
613 symax,
614 ns, k,
615 ns, k);
616 }
617 ssyms[ssmax].cc_ns = ns;
618 ssyms[ssmax].cc_cs = k;
619 ssyms[ssmax].cc_ne = ns;
620 ssyms[ssmax++].cc_ce = k;
621 break;
622 case DOT:
623 cs = WC_VERY_SMALL;
624 ns = 0;
625 ce = HAT - 1;
626 ne = hatcn;
627 if (! ccln_member(ns, cs, ne, ce,
628 isyms, ismax) &&
629 ! ccln_member(ns, cs, ne, ce,
630 symbol, symax)) {
631 symax = insert_table(symbol,
632 symax,
633 ns, cs,
634 ne, ce);
635 }
636 ssyms[ssmax].cc_cs = cs;
637 ssyms[ssmax].cc_ns = ns;
638 ssyms[ssmax].cc_ce = ce;
639 ssyms[ssmax++].cc_ne = ne;
640 cs = HAT + 1;
641 ns = hatcn;
642 ce = WC_VERY_LARGE;
643 ne = MAX_CODESET;
644 if (! ccln_member(ns, cs, ne, ce,
645 isyms, ismax) &&
646 ! ccln_member(ns, cs, ne, ce,
647 symbol, symax)) {
648 symax = insert_table(symbol,
649 symax,
650 ns, cs,
651 ne, ce);
652 }
653 ssyms[ssmax].cc_cs = cs;
654 ssyms[ssmax].cc_ns = ns;
655 ssyms[ssmax].cc_ce = ce;
656 ssyms[ssmax++].cc_ne = ne;
657 break;
658 case CCL:
659 cs = HAT;
660 ns = hatcn;
661 for (p = (ccl_chars_t *)right(cp);
662 p->cc_cs; p++) {
663 if ((p->cc_ns != ns ||
664 p->cc_cs != cs) &&
665 ! ccln_member(p->cc_ns,
666 p->cc_cs, p->cc_ne,
667 p->cc_ce, isyms, ismax) &&
668 !ccln_member(p->cc_ns, p->cc_cs,
669 p->cc_ne, p->cc_ce, symbol,
670 symax)) {
671 symax = insert_table(
672 symbol, symax, p->cc_ns,
673 p->cc_cs, p->cc_ne, p->cc_ce);
674 }
675 ssyms[ssmax++] = *p;
676 }
677 break;
678 case NCCL:
679 ns = 0;
680 cs = WC_VERY_SMALL;
681 for (p = (ccl_chars_t *)right(cp); p->cc_cs; p++) {
682 if ((p->cc_ns != hatcn || p->cc_cs != HAT) &&
683 ! ccln_member(ns, cs, p->cc_ns,
684 p->cc_cs-1, isyms, ismax) &&
685 ! ccln_member(ns, cs, p->cc_ns,
686 p->cc_cs-1, symbol, symax)) {
687 symax = insert_table(symbol,
688 symax, ns, cs, p->cc_ns, p->cc_cs-1);
689 }
690 ssyms[ssmax].cc_ns = ns;
691 ssyms[ssmax].cc_cs = cs;
692 ssyms[ssmax].cc_ne = p->cc_ns;
693 ssyms[ssmax++].cc_ce
694 = p->cc_cs-1;
695 if (p->cc_ce == (wchar_t)0x0) {
696 ns = p->cc_ns;
697 cs = p->cc_cs + 1;
698
699 } else {
700 ns = p->cc_ne;
701 cs = p->cc_ce + 1;
702 }
703 }
704 if ((ns != hatcn || cs != HAT) && ! ccln_member(ns, cs,
705 MAX_CODESET, WC_VERY_LARGE, isyms, ismax) &&
706 ! ccln_member(ns, cs, MAX_CODESET,
707 WC_VERY_LARGE, symbol, symax)) {
708 symax = insert_table(symbol, symax, ns, cs,
709 MAX_CODESET,
710 WC_VERY_LARGE);
711 }
712 ssyms[ssmax].cc_ns = ns;
713 ssyms[ssmax].cc_cs = cs;
714 ssyms[ssmax].cc_ne = MAX_CODESET;
715 ssyms[ssmax++].cc_ce = WC_VERY_LARGE;
716 break;
717 }
718 }
719 for (j = 0; j < ssmax; j++) { /* nextstate(s, ssyms[j]) */
720 ns = ssyms[j].cc_ns;
721 cs = ssyms[j].cc_cs;
722 ne = ssyms[j].cc_ne;
723 ce = ssyms[j].cc_ce;
724 dprintf("j = %d, cs = %o, ce = %o\n", j, cs, ce);
725 symax = delete_table(symbol, symax, ns, cs, ne, ce);
726 setcnt = 0;
727 for (k = 0; k <= line; k++) setvec[k] = 0;
728 for (i = 0; i < spmax; i++) {
729 index[sposns[i]] = 0;
730 cp = point[sposns[i]];
731 if ((k = type(cp)) != FINAL) {
732 if (k == CHAR && ns == ne && cs == ce &&
733 cs == (int)right(cp) ||
734 k == DOT || k == CCL &&
735 ccl_member(ns, cs, ne, ce,
736 (ccl_chars_t *)right(cp)) ||
737 k == NCCL &&
738 !ccl_member(ns, cs, ne, ce,
739 (ccl_chars_t *)right(cp))) {
740 ptr = foll[sposns[i]];
741 num = *ptr;
742 for (k = 0; k < num; k++) {
743 if (setvec[*(++ptr)] != 1 &&
744 iposns[*ptr] != 1) {
745 setvec[*ptr] = 1;
746 setcnt++;
747 }
748 }
749 }
750 }
751 } /* end nextstate */
752 if (notin(state, n, &prev)) {
753 if (n >= NSTATES - 1) {
754 printf("cgotofn: notin; state = %d, n = %d\n", state, n, NULL);
755 overflo();
756 }
757 state[++n] = add(setcnt);
758 dprintf(" delta(%d,[%o,%o])",
759 s, cs, ce);
760 dprintf(" = %d, ind = %d\n", n, ind+1, NULL);
761 fatab[++ind].cc.cc_ns = ns;
762 fatab[ind].cc.cc_cs = cs;
763 fatab[ind].cc.cc_ne = ne;
764 fatab[ind].cc.cc_ce = ce;
765 fatab[ind].n = n;
766 numtrans++;
767 } else {
768 if (prev != 0) {
769 dprintf(" delta(%d,[%o,%o])",
770 s, cs, ce);
771 dprintf("= %d, ind = %d\n",
772 prev, ind+1, NULL);
773 fatab[++ind].cc.cc_ns = ns;
774 fatab[ind].cc.cc_cs = cs;
775 fatab[ind].cc.cc_ne = ne;
776 fatab[ind].cc.cc_ce = ce;
777 fatab[ind].n = prev;
778 numtrans++;
779 }
780 }
781 }
782 tenter:
783 if ((pfa = (struct fa *)malloc((numtrans + 1)
784 * sizeof (struct fa))) == NULL)
785 overflo();
786 where[s] = pfa;
787 if (finflg)
788 pfa->cc.h = -1; /* s is a final state */
789 else
790 pfa->cc.h = numtrans;
791 pfa->st = 0;
792 for (i = 1, pfa += 1; i <= numtrans; i++, pfa++) {
793 pfa->cc.s = fatab[i].cc;
794 pfa->st = (struct fa *)fatab[i].n;
795 }
796 }
797 for (i = 0; i <= n; i++) {
798 if (i != 0) /* state[0] is freed later in freetr() */
799 xfree(state[i]); /* free state[i] */
800 pfa = where[i];
801 pfa->st = where[0];
802 dprintf("state %d: (%o)\n", i, pfa, NULL);
803 dprintf(" numtrans = %d, default = %o\n",
804 pfa->cc.h, pfa->st, NULL);
805 for (k = 1; k <= pfa->cc.h; k++) {
806 (pfa+k)->st = where[(int)(pfa+k)->st];
807 dprintf(" char = [%o,%o], nextstate = %o\n",
808 (pfa+k)->cc.s.cc_cs, (pfa+k)->cc.s.cc_ce,
809 (pfa+k)->st);
810 }
811 }
812 pfa = where[0];
813 if ((num = pfa->cc.h) < 0)
814 return (where[0]);
815 for (pfa += num; num; num--, pfa--)
816 if (pfa->cc.s.cc_ns == hatcn && pfa->cc.s.cc_cs == HAT) {
817 return (pfa->st);
818 }
819 return (where[0]);
820 }
821
822
823 /*
824 * Insert CCL entry to CCL table with maintain optimized order.
825 */
826 static int
insert_table(ccl_chars_t * table_base,int table_size,int ns,wchar_t cs,int ne,wchar_t ce)827 insert_table(ccl_chars_t *table_base, int table_size, int ns, wchar_t cs,
828 int ne, wchar_t ce)
829 {
830 int i;
831 int tns, tne;
832 wchar_t tcs, tce;
833 ccl_chars_t *table;
834 ccl_chars_t *saved_table;
835 int saved_i;
836
837
838
839
840 dprintf("Inserting {%o, %o} to table %o\n", cs, ce, table_base);
841 /*
842 * Searching the table to find out where should put the new item.
843 */
844 for (i = 0, table = table_base; i < table_size; i++, table++) {
845 tns = table->cc_ns;
846 tcs = table->cc_cs;
847 tne = table->cc_ne;
848 tce = table->cc_ce;
849 if (MLCMPLT(ne, ce, tns, (tcs - 1))) {
850 /*
851 * Quick! insert to font of current table entries.
852 */
853 table_size++;
854 for (; i < table_size; i++, table++) {
855 tns = table->cc_ns;
856 tcs = table->cc_cs;
857 tne = table->cc_ne;
858 tce = table->cc_ce;
859 table->cc_ns = ns;
860 table->cc_cs = cs;
861 table->cc_ne = ne;
862 table->cc_ce = ce;
863 ns = tns;
864 cs = tcs;
865 ne = tne;
866 ce = tce;
867 }
868 goto add_null;
869 } else if (MLCMPLE(tns, (tcs - 1), ns, cs) &&
870 MLCMPLE(ns, cs, tne, (tce + 1))) {
871 /*
872 * Starting point is within the current entry.
873 */
874 if (MLCMPGT(tns, tcs, ns, cs)) {
875 table->cc_ns = ns;
876 table->cc_cs = cs;
877 }
878 if (MLCMPLE(ne, ce, tne, tce)) {
879 return (table_size);
880 }
881 goto combine;
882 }
883 }
884
885
886 /*
887 * Adding new one to end of table.
888 */
889 table->cc_ns = ns;
890 table->cc_cs = cs;
891 table->cc_ne = ne;
892 table->cc_ce = ce;
893
894
895 table_size++;
896 goto add_null;
897
898
899
900
901 combine:
902 /*
903 * Check and try to combine the new entry with rest of entries.
904 */
905 if ((i + 1) >= table_size) {
906 table->cc_ne = ne;
907 table->cc_ce = ce;
908 return (table_size);
909 }
910
911
912 saved_table = table++;
913 saved_i = i++;
914
915
916 /*
917 * Finding the spot where we should put the end point.
918 */
919 for (; i < table_size; i++, table++) {
920 if (MLCMPLT(ne, ce, table->cc_ns, (table->cc_cs - 1))) {
921 break;
922 } else
923 if (MLCMPLE(table->cc_ns, (table->cc_cs - 1), ne, ce) &&
924 MLCMPLE(ne, ce, table->cc_ne, (table->cc_ce + 1))) {
925 /*
926 * Tack with this table.
927 */
928 if (MLCMPLT(ne, ce, table->cc_ne, table->cc_ce)) {
929 ne = table->cc_ne;
930 ce = table->cc_ce;
931 }
932 table++;
933 i++;
934 break;
935 }
936 }
937
938
939 saved_table->cc_ne = ne;
940 saved_table->cc_ce = ce;
941 saved_i = table_size - (i - saved_i - 1);
942
943
944 /*
945 * Moving the rest of entries.
946 */
947 for (; i < table_size; i++, table++)
948 *(++saved_table) = *table;
949 table_size = saved_i;
950
951
952 add_null:
953 table_base[table_size].cc_cs = (wchar_t)0x0;
954 table_base[table_size].cc_ce = (wchar_t)0x0;
955
956
957 return (table_size);
958 }
959
960
961
962
963 static int
delete_table(ccl_chars_t * table_base,int table_size,int ns,wchar_t cs,int ne,wchar_t ce)964 delete_table(ccl_chars_t *table_base, int table_size, int ns, wchar_t cs,
965 int ne, wchar_t ce)
966 {
967 int i;
968 int saved_i;
969 ccl_chars_t *table;
970 ccl_chars_t *saved_table;
971 int tns;
972 wchar_t tcs;
973 int tne;
974 wchar_t tce;
975
976
977
978
979 for (i = 0, table = table_base; i < table_size; i++, table++) {
980 tns = table->cc_ns;
981 tcs = table->cc_cs;
982 tne = table->cc_ne;
983 tce = table->cc_ce;
984 if (MLCMPLT(ne, ce, tns, tcs))
985 return (table_size);
986 else if (MLCMPLT(ne, ce, tne, tce)) {
987 if (MLCMPLE(ns, cs, tns, tcs)) {
988 /*
989 * Shrink type 1.
990 */
991 table->cc_ns = ne;
992 table->cc_cs = ce + 1;
993 return (table_size);
994
995 } else {
996 /*
997 * Spliting !!
998 */
999 table->cc_ns = ne;
1000 table->cc_cs = ce + 1;
1001 tne = ns;
1002 tce = cs - 1;
1003 table_size++;
1004 for (; i < table_size; i++, table++) {
1005 ns = table->cc_ns;
1006 cs = table->cc_cs;
1007 ne = table->cc_ne;
1008 ce = table->cc_ce;
1009 table->cc_ns = tns;
1010 table->cc_cs = tcs;
1011 table->cc_ne = tne;
1012 table->cc_ce = tce;
1013 tns = ns;
1014 tcs = cs;
1015 tne = ne;
1016 tce = ce;
1017 }
1018 return (table_size);
1019 }
1020
1021 } else if (MLCMPLE(ns, cs, tne, tce)) {
1022 if (MLCMPGT(ns, cs, tns, tcs)) {
1023 /*
1024 * Shrink current table(type 2).
1025 */
1026 table->cc_ne = ns;
1027 table->cc_ce = cs - 1;
1028 table++;
1029 i++;
1030 }
1031 /*
1032 * Search for the end point.
1033 */
1034 saved_i = i;
1035 saved_table = table;
1036 for (; i < table_size; i++, table++) {
1037 if (MLCMPLT(ne, ce,
1038 table->cc_ns, table->cc_cs)) {
1039 /*
1040 * Easy point, no shrinks!
1041 */
1042 break;
1043
1044 } else if (MLCMPGT(table->cc_ne, table->cc_ce,
1045 ne, ce)) {
1046 /*
1047 * Shrinking...
1048 */
1049 table->cc_ns = ne;
1050 table->cc_cs = ce + 1;
1051 break;
1052 }
1053
1054
1055 }
1056 /*
1057 * Moving(removing) backword.
1058 */
1059 saved_i = table_size - (i - saved_i);
1060 for (; i < table_size; i++)
1061 *saved_table++ = *table++;
1062 return (saved_i);
1063 }
1064 }
1065 return (table_size);
1066 }
1067
1068
1069 #ifdef DEBUG
dump_table(ccl_chars_t * table,int size)1070 dump_table(ccl_chars_t *table, int size)
1071 {
1072 int i;
1073
1074
1075
1076
1077 if (! dbg)
1078 return;
1079
1080
1081 printf("Duming table %o with size %d\n", table, size);
1082 size++; /* To watch out NULL */
1083 for (i = 0; i < size; i++, table++) {
1084 printf("{%3o, %3o}, ", table->cc_cs, table->cc_ce);
1085 }
1086 printf("\n");
1087 }
1088 #endif /* DEBUG */
1089
1090
1091
1092 int
match(struct fa * pfa,wchar_t * p)1093 match(struct fa *pfa, wchar_t *p)
1094 {
1095 int count;
1096 int n, ns, ne;
1097 wchar_t c, cs, ce;
1098
1099
1100 if (p == 0)
1101 return (0);
1102 if (pfa->cc.h == 1) { /* fast test for first character, if possible */
1103 ns = (++pfa)->cc.s.cc_ns;
1104 cs = (pfa)->cc.s.cc_cs;
1105 ne = (pfa)->cc.s.cc_ne;
1106 ce = (pfa)->cc.s.cc_ce;
1107 do {
1108 c = *p;
1109 n = wcsetno(c);
1110 if (MLCMPLE(ns, cs, n, c) &&
1111 MLCMPLE(n, c, ne, ce)) {
1112 p++;
1113 pfa = pfa->st;
1114 goto adv;
1115 }
1116 } while (*p++ != 0);
1117 return (0);
1118 }
1119 adv: if ((count = pfa->cc.h) < 0)
1120 return (1);
1121 do {
1122 c = *p;
1123 n = wcsetno(c);
1124 for (pfa += count; count; count--, pfa--) {
1125 ns = (pfa)->cc.s.cc_ns;
1126 cs = (pfa)->cc.s.cc_cs;
1127 ne = (pfa)->cc.s.cc_ne;
1128 ce = (pfa)->cc.s.cc_ce;
1129 if (MLCMPLE(ns, cs, n, c) && MLCMPLE(n, c, ne, ce))
1130 break;
1131 }
1132 pfa = pfa->st;
1133 if ((count = pfa->cc.h) < 0)
1134 return (1);
1135 } while (*p++ != 0);
1136 return (0);
1137 }
1138