1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 23 /* All Rights Reserved */ 24 25 26 /* 27 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 28 * Use is subject to license terms. 29 */ 30 31 #pragma ident "%Z%%M% %I% %E% SMI" 32 33 #include "awk.def" 34 #include "stdio.h" 35 #include "awk.h" 36 37 38 extern NODE *op2(); 39 extern struct fa *cgotofn(); 40 #define MAXLIN 256 41 #define NCHARS 128 42 #define NSTATES 256 43 44 45 #define type(v) v->nobj 46 #define left(v) v->narg[0] 47 #define right(v) v->narg[1] 48 #define parent(v) v->nnext 49 50 51 #define LEAF case CCL: case NCCL: case CHAR: case DOT: 52 #define UNARY case FINAL: case STAR: case PLUS: case QUEST: 53 54 55 /* 56 * encoding in tree NODEs: 57 * leaf (CCL, NCCL, CHAR, DOT): left is index, 58 * right contains value or pointer to value 59 * unary (FINAL, STAR, PLUS, QUEST): left is child, right is null 60 * binary (CAT, OR): left and right are children 61 * parent contains pointer to parent 62 */ 63 64 65 struct fa { 66 union { 67 ccl_chars_t s; 68 int h; 69 } cc; 70 #define MLCMPLT(m1, l1, m2, l2) ((m1 != m2 &&\ 71 (int)m1 < (int)m2) ||\ 72 (m1 == m2 && (int)l1 < (int)l2)) 73 #define MLCMPLE(m1, l1, m2, l2) ((m1 != m2 &&\ 74 (int)m1 <= (int)m2) ||\ 75 (m1 == m2 && (int)l1 <= (int)l2)) 76 #define MLCMPGT(m1, l1, m2, l2) ((m1 != m2 &&\ 77 (int)m1 > (int)m2) ||\ 78 (m1 == m2 && (int)l1 > (int)l2)) 79 #define MAX_CODESET 3 80 struct fa *st; 81 }; 82 83 84 int *state[NSTATES]; 85 int *foll[MAXLIN]; 86 int setvec[MAXLIN]; 87 NODE *point[MAXLIN]; 88 89 90 int setcnt; 91 int line; 92 93 94 static int ccln_member(); 95 static int insert_table(); 96 static int delete_table(); 97 #ifdef DEBUG 98 #define ddump_table(t, s) dump_table(t, s) 99 #else 100 #define ddump_table(t, s) 101 #endif 102 103 104 struct fa * 105 makedfa(p) /* returns dfa for tree pointed to by p */ 106 NODE *p; 107 { 108 NODE *p1; 109 struct fa *fap; 110 p1 = op2(CAT, op2(STAR, op2(DOT, (NODE *) 0, 111 (NODE *) 0), (NODE *) 0), p); 112 /* put DOT STAR in front of reg. exp. */ 113 p1 = op2(FINAL, p1, (NODE *) 0); /* install FINAL NODE */ 114 115 116 line = 0; 117 penter(p1); /* enter parent pointers and leaf indices */ 118 point[line] = p1; /* FINAL NODE */ 119 setvec[0] = 1; /* for initial DOT STAR */ 120 cfoll(p1); /* set up follow sets */ 121 fap = cgotofn(); 122 freetr(p1); /* add this when alloc works */ 123 return (fap); 124 } 125 126 127 penter(p) /* set up parent pointers and leaf indices */ 128 NODE *p; 129 { 130 switch (type(p)) { 131 LEAF 132 left(p) = (NODE *)line; 133 point[line++] = p; 134 break; 135 UNARY 136 penter(left(p)); 137 parent(left(p)) = p; 138 break; 139 case CAT: 140 case OR: 141 penter(left(p)); 142 penter(right(p)); 143 parent(left(p)) = p; 144 parent(right(p)) = p; 145 break; 146 default: 147 error(FATAL, "unknown type %d in penter\n", type(p)); 148 break; 149 } 150 } 151 152 153 freetr(p) /* free parse tree and follow sets */ 154 NODE *p; 155 { 156 switch (type(p)) { 157 LEAF 158 xfree(foll[(int)left(p)]); 159 xfree(p); 160 break; 161 UNARY 162 freetr(left(p)); 163 xfree(p); 164 break; 165 case CAT: 166 case OR: 167 freetr(left(p)); 168 freetr(right(p)); 169 xfree(p); 170 break; 171 default: 172 error(FATAL, "unknown type %d in freetr", type(p)); 173 break; 174 } 175 } 176 ccl_chars_t * 177 cclenter(p) 178 register wchar_t *p; 179 { 180 register int i, cn; 181 register wchar_t c, pc; 182 register wchar_t *op; 183 register ccl_chars_t *new; 184 ccl_chars_t chars[MAXLIN]; 185 186 187 op = p; 188 i = 0; 189 while ((c = *p++) != 0) { 190 if (c == '-' && i > 0) { 191 if (*p != 0) { 192 /* 193 * If there are not in same code set, the 194 * class should be ignore (make two independent 195 * characters)! 196 */ 197 c = *p++; 198 cn = wcsetno(pc); 199 if (cn != wcsetno(c) || pc > c) 200 goto char_array; 201 i = insert_table(chars, i, cn, pc, cn, c); 202 continue; 203 } 204 } 205 char_array: 206 if (i >= MAXLIN) 207 overflo(); 208 cn = wcsetno(c); 209 i = insert_table(chars, i, cn, c, cn, c); 210 pc = c; 211 } 212 dprintf("cclenter: in = |%ws|, ", op, NULL, NULL); 213 xfree(op); 214 i = (i + 1) * sizeof (ccl_chars_t); 215 if ((new = (ccl_chars_t *)malloc(i)) == NULL) 216 error(FATAL, "out of space in cclenter on %s", op); 217 (void) memcpy((char *)new, (char *)chars, i); 218 ddump_table(chars, i / 4); 219 220 221 return (new); 222 } 223 224 225 overflo() 226 { 227 error(FATAL, "regular expression too long\n"); 228 } 229 230 231 cfoll(v) /* enter follow set of each leaf of vertex v into foll[leaf] */ 232 register NODE *v; 233 { 234 register i; 235 int prev; 236 int *add(); 237 238 239 switch (type(v)) { 240 LEAF 241 setcnt = 0; 242 for (i = 1; i <= line; i++) 243 setvec[i] = 0; 244 follow(v); 245 foll[(int)left(v)] = add(setcnt); 246 break; 247 UNARY 248 cfoll(left(v)); 249 break; 250 case CAT: 251 case OR: 252 cfoll(left(v)); 253 cfoll(right(v)); 254 break; 255 default: 256 error(FATAL, "unknown type %d in cfoll", type(v)); 257 } 258 } 259 260 261 first(p) /* collects initially active leaves of p into setvec */ 262 register NODE *p; 263 /* returns 0 or 1 depending on whether p matches empty string */ 264 { 265 register b; 266 267 268 switch (type(p)) { 269 LEAF 270 if (setvec[(int)left(p)] != 1) { 271 setvec[(int)left(p)] = 1; 272 setcnt++; 273 } 274 if (type(p) == CCL && 275 (*(ccl_chars_t *)right(p)).cc_cs == (wchar_t)0x0) 276 return (0); /* empty CCL */ 277 else return (1); 278 case FINAL: 279 case PLUS: 280 if (first(left(p)) == 0) 281 return (0); 282 return (1); 283 case STAR: 284 case QUEST: 285 first(left(p)); 286 return (0); 287 case CAT: 288 if (first(left(p)) == 0 && first(right(p)) == 0) 289 return (0); 290 return (1); 291 case OR: 292 b = first(right(p)); 293 if (first(left(p)) == 0 || b == 0) 294 return (0); 295 return (1); 296 } 297 error(FATAL, "unknown type %d in first\n", type(p)); 298 return (-1); 299 } 300 301 302 follow(v) 303 NODE *v; /* collects leaves that can follow v into setvec */ 304 { 305 NODE *p; 306 307 308 if (type(v) == FINAL) 309 return; 310 p = parent(v); 311 switch (type(p)) { 312 case STAR: 313 case PLUS: first(v); 314 follow(p); 315 return; 316 317 318 case OR: 319 case QUEST: follow(p); 320 return; 321 322 323 case CAT: if (v == left(p)) { /* v is left child of p */ 324 if (first(right(p)) == 0) { 325 follow(p); 326 return; 327 } 328 } else /* v is right child */ 329 follow(p); 330 return; 331 case FINAL: if (setvec[line] != 1) { 332 setvec[line] = 1; 333 setcnt++; 334 } 335 return; 336 } 337 } 338 339 340 /* 341 * There are three type of functions for checking member ship. Because I have 342 * been changed structure of CCL tables. And some CCL tables end up with NULLs 343 * but someone has length and will includes NULLs in table as one of data. 344 * Please note, CCL table which has a length data and data will include NULLs, 345 * it only used within a this source file("b.c"). 346 */ 347 348 349 ccl_member(ns, cs, ne, ce, s) /* is cs thru ce in s? */ 350 register int ns; 351 register wchar_t cs; 352 register int ne; 353 register wchar_t ce; 354 register ccl_chars_t *s; 355 { 356 /* 357 * The specified range(cs, ce) must be beside the range between 358 * s->cc_start and s->cc_end to determine member. 359 */ 360 while (s->cc_cs || s->cc_ce) { 361 if (MLCMPLE(s->cc_ns, s->cc_cs, ns, cs) && 362 MLCMPLE(ne, ce, s->cc_ne, s->cc_ce)) 363 return (1); 364 s++; 365 } 366 return (0); 367 } 368 369 370 static 371 ccln_member(ns, cs, ne, ce, s, n) /* is cs thru ce in s? */ 372 register int ns; 373 register wchar_t cs; 374 register int ne; 375 register wchar_t ce; 376 register ccl_chars_t *s; 377 register int n; 378 { 379 /* 380 * The specified range(cs, ce) must be beside the range between 381 * s->cc_start and s->cc_end to determine member. 382 */ 383 while (n-- > 0) { 384 if (MLCMPLE(s->cc_ns, s->cc_cs, ns, cs) && 385 MLCMPLE(ne, ce, s->cc_ne, s->cc_ce)) 386 return (1); 387 s++; 388 } 389 return (0); 390 } 391 392 393 member(c, s) /* is c in s? */ 394 register wchar_t c, *s; 395 { 396 while (*s) 397 if (c == *s++) 398 return (1); 399 return (0); 400 } 401 402 403 notin(array, n, prev) /* is setvec in array[0] thru array[n]? */ 404 int **array; 405 int *prev; { 406 register i, j; 407 int *ptr; 408 for (i = 0; i <= n; i++) { 409 ptr = array[i]; 410 if (*ptr == setcnt) { 411 for (j = 0; j < setcnt; j++) 412 if (setvec[*(++ptr)] != 1) goto nxt; 413 *prev = i; 414 return (0); 415 } 416 nxt: /* dummy */; 417 } 418 return (1); 419 } 420 421 422 int *add(n) { /* remember setvec */ 423 int *ptr, *p; 424 register i; 425 if ((p = ptr = (int *)malloc((n+1)*sizeof (int))) == NULL) 426 overflo(); 427 *ptr = n; 428 dprintf("add(%d)\n", n, NULL, NULL); 429 for (i = 1; i <= line; i++) 430 if (setvec[i] == 1) { 431 *(++ptr) = i; 432 dprintf(" ptr = %o, *ptr = %d, i = %d\n", ptr, *ptr, i); 433 } 434 dprintf("\n", NULL, NULL, NULL); 435 return (p); 436 } 437 438 439 struct fa * 440 cgotofn() 441 { 442 register i, k; 443 register int *ptr; 444 register int ns, ne; 445 register wchar_t cs, ce; 446 register ccl_chars_t *p; 447 register NODE *cp; 448 int j, n, s, ind, numtrans; 449 int finflg; 450 int curpos, num, prev; 451 struct fa *where[NSTATES]; 452 453 454 struct { 455 ccl_chars_t cc; 456 int n; 457 } fatab[257]; 458 register struct fa *pfa; 459 460 461 char index[MAXLIN]; 462 char iposns[MAXLIN]; 463 int sposns[MAXLIN]; 464 int spmax, spinit; 465 ccl_chars_t symbol[NCHARS]; 466 ccl_chars_t isyms[NCHARS]; 467 ccl_chars_t ssyms[NCHARS]; 468 int ssmax, symax, ismax, ssinit; 469 470 471 wchar_t hat; 472 int hatcn; 473 474 475 for (i = 0; i <= line; i++) index[i] = iposns[i] = setvec[i] = 0; 476 isyms[0].cc_cs = isyms[0].cc_ce = (wchar_t)0x0; 477 for (i = 0; i < NCHARS; i++) 478 isyms[i] = symbol[i] = ssyms[i] = isyms[0]; 479 symax = 0; 480 setcnt = 0; 481 /* compute initial positions and symbols of state 0 */ 482 ismax = 0; 483 ssmax = 0; 484 ptr = state[0] = foll[0]; 485 spinit = *ptr; 486 hat = HAT; 487 hatcn = wcsetno(hat); 488 for (i = 0; i < spinit; i++) { 489 curpos = *(++ptr); 490 sposns[i] = curpos; 491 iposns[curpos] = 1; 492 cp = point[curpos]; 493 dprintf("i= %d, spinit = %d, curpos = %d\n", i, spinit, curpos); 494 switch (type(cp)) { 495 case CHAR: 496 k = (int)right(cp); 497 ns = wcsetno(k); 498 if (! ccln_member(ns, k, ns, k, 499 isyms, ismax)) { 500 ismax = insert_table(isyms, ismax, 501 ns, k, ns, k); 502 } 503 ssyms[ssmax].cc_ns = ns; 504 ssyms[ssmax].cc_cs = k; 505 ssyms[ssmax].cc_ne = ns; 506 ssyms[ssmax++].cc_ce = k; 507 break; 508 case DOT: 509 cs = WC_VERY_SMALL; 510 ns = 0; 511 ce = HAT - 1; 512 ne = hatcn; 513 if (! ccln_member(ns, cs, ne, ce, 514 isyms, ismax)) { 515 ismax = insert_table(isyms, ismax, 516 ns, cs, ne, ce); 517 } 518 ssyms[ssmax].cc_cs = cs; 519 ssyms[ssmax].cc_ns = ns; 520 ssyms[ssmax].cc_ce = ce; 521 ssyms[ssmax++].cc_ne = ne; 522 cs = HAT + 1; 523 ns = hatcn; 524 ce = WC_VERY_LARGE; 525 ne = MAX_CODESET; 526 if (! ccln_member(ns, cs, ne, ce, 527 isyms, ismax)) { 528 ismax = insert_table(isyms, ismax, 529 ns, cs, ne, ce); 530 } 531 ssyms[ssmax].cc_cs = cs; 532 ssyms[ssmax].cc_ns = ns; 533 ssyms[ssmax].cc_ce = ce; 534 ssyms[ssmax++].cc_ne = ne; 535 break; 536 case CCL: 537 cs = HAT; 538 ns = hatcn; 539 for (p = (ccl_chars_t *)right(cp); 540 p->cc_cs; p++) { 541 if ((p->cc_ns != ns ||\ 542 p->cc_cs != cs) &&\ 543 !ccln_member(p->cc_ns, p->cc_cs, 544 p->cc_ne, p->cc_ce, isyms, ismax)) { 545 ismax = insert_table(isyms, 546 ismax, p->cc_ns, p->cc_cs, p->cc_ne, p->cc_ce); 547 } 548 ssyms[ssmax++] = *p; 549 } 550 break; 551 case NCCL: 552 ns = 0; 553 cs = WC_VERY_SMALL; 554 for (p = (ccl_chars_t *)right(cp); 555 p->cc_cs; p++) { 556 if ((ns != hatcn || p->cc_cs != HAT) && 557 ! ccln_member(ns, cs, 558 p->cc_ns, p->cc_cs-1, 559 isyms, ismax)) { 560 ismax = insert_table(isyms, 561 ismax, 562 ns, cs, 563 p->cc_ns, 564 p->cc_cs-1); 565 } 566 ssyms[ssmax].cc_ns = ns; 567 ssyms[ssmax].cc_cs = cs; 568 ssyms[ssmax].cc_ne = p->cc_ns; 569 ssyms[ssmax++].cc_ce = p->cc_cs-1; 570 if (p->cc_ce == (wchar_t)0x0) { 571 ns = p->cc_ns; 572 cs = p->cc_cs + 1; 573 574 } else { 575 ns = p->cc_ne; 576 cs = p->cc_ce + 1; 577 } 578 } 579 if ((ns != hatcn || cs != HAT) && 580 ! ccln_member(ns, cs, 581 MAX_CODESET, WC_VERY_LARGE, 582 isyms, ismax)) { 583 ismax = insert_table(isyms, ismax, 584 ns, cs, MAX_CODESET, 585 WC_VERY_LARGE); 586 } 587 ssyms[ssmax].cc_ns = ns; 588 ssyms[ssmax].cc_cs = cs; 589 ssyms[ssmax].cc_ne = MAX_CODESET; 590 ssyms[ssmax++].cc_ce = WC_VERY_LARGE; 591 break; 592 } 593 } 594 ssinit = ssmax; 595 symax = 0; 596 n = 0; 597 for (s = 0; s <= n; s++) { 598 dprintf("s = %d\n", s, NULL, NULL); 599 ind = 0; 600 numtrans = 0; 601 finflg = 0; 602 if (*(state[s] + *state[s]) == line) { /* s final? */ 603 finflg = 1; 604 goto tenter; 605 } 606 spmax = spinit; 607 ssmax = ssinit; 608 ptr = state[s]; 609 num = *ptr; 610 for (i = 0; i < num; i++) { 611 curpos = *(++ptr); 612 if (iposns[curpos] != 1 && index[curpos] != 1) { 613 index[curpos] = 1; 614 sposns[spmax++] = curpos; 615 } 616 cp = point[curpos]; 617 switch (type(cp)) { 618 case CHAR: 619 k = (int)right(cp); 620 ns = wcsetno(k); 621 if (! ccln_member(ns, k, ns, k, 622 isyms, ismax) && 623 ! ccln_member(ns, k, ns, k, 624 symbol, symax)) { 625 symax = insert_table(symbol, 626 symax, 627 ns, k, 628 ns, k); 629 } 630 ssyms[ssmax].cc_ns = ns; 631 ssyms[ssmax].cc_cs = k; 632 ssyms[ssmax].cc_ne = ns; 633 ssyms[ssmax++].cc_ce = k; 634 break; 635 case DOT: 636 cs = WC_VERY_SMALL; 637 ns = 0; 638 ce = HAT - 1; 639 ne = hatcn; 640 if (! ccln_member(ns, cs, ne, ce, 641 isyms, ismax) && 642 ! ccln_member(ns, cs, ne, ce, 643 symbol, symax)) { 644 symax = insert_table(symbol, 645 symax, 646 ns, cs, 647 ne, ce); 648 } 649 ssyms[ssmax].cc_cs = cs; 650 ssyms[ssmax].cc_ns = ns; 651 ssyms[ssmax].cc_ce = ce; 652 ssyms[ssmax++].cc_ne = ne; 653 cs = HAT + 1; 654 ns = hatcn; 655 ce = WC_VERY_LARGE; 656 ne = MAX_CODESET; 657 if (! ccln_member(ns, cs, ne, ce, 658 isyms, ismax) && 659 ! ccln_member(ns, cs, ne, ce, 660 symbol, symax)) { 661 symax = insert_table(symbol, 662 symax, 663 ns, cs, 664 ne, ce); 665 } 666 ssyms[ssmax].cc_cs = cs; 667 ssyms[ssmax].cc_ns = ns; 668 ssyms[ssmax].cc_ce = ce; 669 ssyms[ssmax++].cc_ne = ne; 670 break; 671 case CCL: 672 cs = HAT; 673 ns = hatcn; 674 for (p = (ccl_chars_t *)right(cp); 675 p->cc_cs; p++) { 676 if ((p->cc_ns != ns || 677 p->cc_cs != cs) && 678 ! ccln_member(p->cc_ns, 679 p->cc_cs, p->cc_ne, 680 p->cc_ce, isyms, ismax) && 681 !ccln_member(p->cc_ns, p->cc_cs, 682 p->cc_ne, p->cc_ce, symbol, 683 symax)) { 684 symax = insert_table( 685 symbol, symax, p->cc_ns, 686 p->cc_cs, p->cc_ne, p->cc_ce); 687 } 688 ssyms[ssmax++] = *p; 689 } 690 break; 691 case NCCL: 692 ns = 0; 693 cs = WC_VERY_SMALL; 694 for (p = (ccl_chars_t *)right(cp); p->cc_cs; p++) { 695 if ((p->cc_ns != hatcn || p->cc_cs != HAT) && 696 ! ccln_member(ns, cs, p->cc_ns, 697 p->cc_cs-1, isyms, ismax) && 698 ! ccln_member(ns, cs, p->cc_ns, 699 p->cc_cs-1, symbol, symax)) { 700 symax = insert_table(symbol, 701 symax, ns, cs, p->cc_ns, p->cc_cs-1); 702 } 703 ssyms[ssmax].cc_ns = ns; 704 ssyms[ssmax].cc_cs = cs; 705 ssyms[ssmax].cc_ne = p->cc_ns; 706 ssyms[ssmax++].cc_ce 707 = p->cc_cs-1; 708 if (p->cc_ce == (wchar_t)0x0) { 709 ns = p->cc_ns; 710 cs = p->cc_cs + 1; 711 712 } else { 713 ns = p->cc_ne; 714 cs = p->cc_ce + 1; 715 } 716 } 717 if ((ns != hatcn || cs != HAT) && ! ccln_member(ns, cs, 718 MAX_CODESET, WC_VERY_LARGE, isyms, ismax) && 719 ! ccln_member(ns, cs, MAX_CODESET, 720 WC_VERY_LARGE, symbol, symax)) { 721 symax = insert_table(symbol, symax, ns, cs, 722 MAX_CODESET, 723 WC_VERY_LARGE); 724 } 725 ssyms[ssmax].cc_ns = ns; 726 ssyms[ssmax].cc_cs = cs; 727 ssyms[ssmax].cc_ne = MAX_CODESET; 728 ssyms[ssmax++].cc_ce = WC_VERY_LARGE; 729 break; 730 } 731 } 732 for (j = 0; j < ssmax; j++) { /* nextstate(s, ssyms[j]) */ 733 ns = ssyms[j].cc_ns; 734 cs = ssyms[j].cc_cs; 735 ne = ssyms[j].cc_ne; 736 ce = ssyms[j].cc_ce; 737 dprintf("j = %d, cs = %o, ce = %o\n", j, cs, ce); 738 symax = delete_table(symbol, symax, ns, cs, ne, ce); 739 setcnt = 0; 740 for (k = 0; k <= line; k++) setvec[k] = 0; 741 for (i = 0; i < spmax; i++) { 742 index[sposns[i]] = 0; 743 cp = point[sposns[i]]; 744 if ((k = type(cp)) != FINAL) { 745 if (k == CHAR && ns == ne && cs == ce && 746 cs == (int)right(cp) || 747 k == DOT || k == CCL && 748 ccl_member(ns, cs, ne, ce, 749 (ccl_chars_t *)right(cp)) || 750 k == NCCL && 751 !ccl_member(ns, cs, ne, ce, 752 (ccl_chars_t *)right(cp))) { 753 ptr = foll[sposns[i]]; 754 num = *ptr; 755 for (k = 0; k < num; k++) { 756 if (setvec[*(++ptr)] != 1 && 757 iposns[*ptr] != 1) { 758 setvec[*ptr] = 1; 759 setcnt++; 760 } 761 } 762 } 763 } 764 } /* end nextstate */ 765 if (notin(state, n, &prev)) { 766 if (n >= NSTATES - 1) { 767 printf("cgotofn: notin; state = %d, n = %d\n", state, n, NULL); 768 overflo(); 769 } 770 state[++n] = add(setcnt); 771 dprintf(" delta(%d,[%o,%o])", 772 s, cs, ce); 773 dprintf(" = %d, ind = %d\n", n, ind+1, NULL); 774 fatab[++ind].cc.cc_ns = ns; 775 fatab[ind].cc.cc_cs = cs; 776 fatab[ind].cc.cc_ne = ne; 777 fatab[ind].cc.cc_ce = ce; 778 fatab[ind].n = n; 779 numtrans++; 780 } else { 781 if (prev != 0) { 782 dprintf(" delta(%d,[%o,%o])", 783 s, cs, ce); 784 dprintf("= %d, ind = %d\n", 785 prev, ind+1, NULL); 786 fatab[++ind].cc.cc_ns = ns; 787 fatab[ind].cc.cc_cs = cs; 788 fatab[ind].cc.cc_ne = ne; 789 fatab[ind].cc.cc_ce = ce; 790 fatab[ind].n = prev; 791 numtrans++; 792 } 793 } 794 } 795 tenter: 796 if ((pfa = (struct fa *)malloc((numtrans + 1) 797 * sizeof (struct fa))) == NULL) 798 overflo(); 799 where[s] = pfa; 800 if (finflg) 801 pfa->cc.h = -1; /* s is a final state */ 802 else 803 pfa->cc.h = numtrans; 804 pfa->st = 0; 805 for (i = 1, pfa += 1; i <= numtrans; i++, pfa++) { 806 pfa->cc.s = fatab[i].cc; 807 pfa->st = (struct fa *)fatab[i].n; 808 } 809 } 810 for (i = 0; i <= n; i++) { 811 if (i != 0) /* state[0] is freed later in freetr() */ 812 xfree(state[i]); /* free state[i] */ 813 pfa = where[i]; 814 pfa->st = where[0]; 815 dprintf("state %d: (%o)\n", i, pfa, NULL); 816 dprintf(" numtrans = %d, default = %o\n", 817 pfa->cc.h, pfa->st, NULL); 818 for (k = 1; k <= pfa->cc.h; k++) { 819 (pfa+k)->st = where[(int)(pfa+k)->st]; 820 dprintf(" char = [%o,%o], nextstate = %o\n", 821 (pfa+k)->cc.s.cc_cs, (pfa+k)->cc.s.cc_ce, 822 (pfa+k)->st); 823 } 824 } 825 pfa = where[0]; 826 if ((num = pfa->cc.h) < 0) 827 return (where[0]); 828 for (pfa += num; num; num--, pfa--) 829 if (pfa->cc.s.cc_ns == hatcn && pfa->cc.s.cc_cs == HAT) { 830 return (pfa->st); 831 } 832 return (where[0]); 833 } 834 835 836 /* 837 * Insert CCL entry to CCL table with maintain optimized order. 838 */ 839 static int 840 insert_table(table_base, table_size, ns, cs, ne, ce) 841 ccl_chars_t *table_base; 842 register int table_size; 843 register int ns, ne; 844 register wchar_t cs, ce; 845 { 846 register int i; 847 register int tns, tne; 848 register wchar_t tcs, tce; 849 register ccl_chars_t *table; 850 register ccl_chars_t *saved_table; 851 int saved_i; 852 853 854 855 856 dprintf("Inserting {%o, %o} to table %o\n", cs, ce, table_base); 857 /* 858 * Searching the table to find out where should put the new item. 859 */ 860 for (i = 0, table = table_base; i < table_size; i++, table++) { 861 tns = table->cc_ns; 862 tcs = table->cc_cs; 863 tne = table->cc_ne; 864 tce = table->cc_ce; 865 if (MLCMPLT(ne, ce, tns, (tcs - 1))) { 866 /* 867 * Quick! insert to font of current table entries. 868 */ 869 qinsert: 870 table_size++; 871 for (; i < table_size; i++, table++) { 872 tns = table->cc_ns; 873 tcs = table->cc_cs; 874 tne = table->cc_ne; 875 tce = table->cc_ce; 876 table->cc_ns = ns; 877 table->cc_cs = cs; 878 table->cc_ne = ne; 879 table->cc_ce = ce; 880 ns = tns; 881 cs = tcs; 882 ne = tne; 883 ce = tce; 884 } 885 goto add_null; 886 } else if (MLCMPLE(tns, (tcs - 1), ns, cs) && 887 MLCMPLE(ns, cs, tne, (tce + 1))) { 888 /* 889 * Starting point is within the current entry. 890 */ 891 if (MLCMPGT(tns, tcs, ns, cs)) { 892 table->cc_ns = ns; 893 table->cc_cs = cs; 894 } 895 if (MLCMPLE(ne, ce, tne, tce)) { 896 return (table_size); 897 } 898 goto combine; 899 } 900 } 901 902 903 /* 904 * Adding new one to end of table. 905 */ 906 table->cc_ns = ns; 907 table->cc_cs = cs; 908 table->cc_ne = ne; 909 table->cc_ce = ce; 910 911 912 table_size++; 913 goto add_null; 914 915 916 917 918 combine: 919 /* 920 * Check and try to combine the new entry with rest of entries. 921 */ 922 if ((i + 1) >= table_size) { 923 table->cc_ne = ne; 924 table->cc_ce = ce; 925 return (table_size); 926 } 927 928 929 saved_table = table++; 930 saved_i = i++; 931 932 933 /* 934 * Finding the spot where we should put the end point. 935 */ 936 for (; i < table_size; i++, table++) { 937 if (MLCMPLT(ne, ce, table->cc_ns, (table->cc_cs - 1))) { 938 break; 939 } else 940 if (MLCMPLE(table->cc_ns, (table->cc_cs - 1), ne, ce) && 941 MLCMPLE(ne, ce, table->cc_ne, (table->cc_ce + 1))) { 942 /* 943 * Tack with this table. 944 */ 945 if (MLCMPLT(ne, ce, table->cc_ne, table->cc_ce)) { 946 ne = table->cc_ne; 947 ce = table->cc_ce; 948 } 949 table++; 950 i++; 951 break; 952 } 953 } 954 955 956 saved_table->cc_ne = ne; 957 saved_table->cc_ce = ce; 958 saved_i = table_size - (i - saved_i - 1); 959 960 961 /* 962 * Moving the rest of entries. 963 */ 964 for (; i < table_size; i++, table++) 965 *(++saved_table) = *table; 966 table_size = saved_i; 967 968 969 add_null: 970 table_base[table_size].cc_cs = (wchar_t)0x0; 971 table_base[table_size].cc_ce = (wchar_t)0x0; 972 973 974 return (table_size); 975 } 976 977 978 979 980 static int 981 delete_table(table_base, table_size, ns, cs, ne, ce) 982 ccl_chars_t *table_base; 983 register int table_size; 984 register int ns, ne; 985 register wchar_t cs, ce; 986 { 987 register int i; 988 int saved_i; 989 register ccl_chars_t *table; 990 register ccl_chars_t *saved_table; 991 register int tns; 992 register wchar_t tcs; 993 register int tne; 994 register wchar_t tce; 995 996 997 998 999 for (i = 0, table = table_base; i < table_size; i++, table++) { 1000 tns = table->cc_ns; 1001 tcs = table->cc_cs; 1002 tne = table->cc_ne; 1003 tce = table->cc_ce; 1004 if (MLCMPLT(ne, ce, tns, tcs)) 1005 return (table_size); 1006 else if (MLCMPLT(ne, ce, tne, tce)) { 1007 if (MLCMPLE(ns, cs, tns, tcs)) { 1008 /* 1009 * Shrink type 1. 1010 */ 1011 table->cc_ns = ne; 1012 table->cc_cs = ce + 1; 1013 return (table_size); 1014 1015 } else { 1016 /* 1017 * Spliting !! 1018 */ 1019 table->cc_ns = ne; 1020 table->cc_cs = ce + 1; 1021 tne = ns; 1022 tce = cs - 1; 1023 table_size++; 1024 for (; i < table_size; i++, table++) { 1025 ns = table->cc_ns; 1026 cs = table->cc_cs; 1027 ne = table->cc_ne; 1028 ce = table->cc_ce; 1029 table->cc_ns = tns; 1030 table->cc_cs = tcs; 1031 table->cc_ne = tne; 1032 table->cc_ce = tce; 1033 tns = ns; 1034 tcs = cs; 1035 tne = ne; 1036 tce = ce; 1037 } 1038 return (table_size); 1039 } 1040 1041 } else if (MLCMPLE(ns, cs, tne, tce)) { 1042 if (MLCMPGT(ns, cs, tns, tcs)) { 1043 /* 1044 * Shrink current table(type 2). 1045 */ 1046 table->cc_ne = ns; 1047 table->cc_ce = cs - 1; 1048 table++; 1049 i++; 1050 } 1051 /* 1052 * Search for the end point. 1053 */ 1054 saved_i = i; 1055 saved_table = table; 1056 for (; i < table_size; i++, table++) { 1057 if (MLCMPLT(ne, ce, 1058 table->cc_ns, table->cc_cs)) { 1059 /* 1060 * Easy point, no shrinks! 1061 */ 1062 break; 1063 1064 } else if (MLCMPGT(table->cc_ne, table->cc_ce, 1065 ne, ce)) { 1066 /* 1067 * Shrinking... 1068 */ 1069 table->cc_ns = ne; 1070 table->cc_cs = ce + 1; 1071 break; 1072 } 1073 1074 1075 } 1076 /* 1077 * Moving(removing) backword. 1078 */ 1079 saved_i = table_size - (i - saved_i); 1080 for (; i < table_size; i++) 1081 *saved_table++ = *table++; 1082 return (saved_i); 1083 } 1084 } 1085 return (table_size); 1086 } 1087 1088 1089 #ifdef DEBUG 1090 dump_table(table, size) 1091 register ccl_chars_t *table; 1092 register int size; 1093 { 1094 register int i; 1095 1096 1097 1098 1099 if (! dbg) 1100 return; 1101 1102 1103 printf("Duming table %o with size %d\n", table, size); 1104 size++; /* To watch out NULL */ 1105 for (i = 0; i < size; i++, table++) 1106 { 1107 printf("{%3o, %3o}, ", table->cc_cs, table->cc_ce); 1108 } 1109 printf("\n"); 1110 } 1111 #endif /* DEBUG */ 1112 1113 1114 1115 1116 match(pfa, p) 1117 register struct fa *pfa; 1118 register wchar_t *p; 1119 { 1120 register count; 1121 register int n, ns, ne; 1122 register wchar_t c, cs, ce; 1123 1124 1125 if (p == 0) 1126 return (0); 1127 if (pfa->cc.h == 1) { /* fast test for first character, if possible */ 1128 ns = (++pfa)->cc.s.cc_ns; 1129 cs = (pfa)->cc.s.cc_cs; 1130 ne = (pfa)->cc.s.cc_ne; 1131 ce = (pfa)->cc.s.cc_ce; 1132 do { 1133 c = *p; 1134 n = wcsetno(c); 1135 if (MLCMPLE(ns, cs, n, c) && 1136 MLCMPLE(n, c, ne, ce)) { 1137 p++; 1138 pfa = pfa->st; 1139 goto adv; 1140 } 1141 } while (*p++ != 0); 1142 return (0); 1143 } 1144 adv: if ((count = pfa->cc.h) < 0) 1145 return (1); 1146 do { 1147 c = *p; 1148 n = wcsetno(c); 1149 for (pfa += count; count; count--, pfa--) { 1150 ns = (pfa)->cc.s.cc_ns; 1151 cs = (pfa)->cc.s.cc_cs; 1152 ne = (pfa)->cc.s.cc_ne; 1153 ce = (pfa)->cc.s.cc_ce; 1154 if (MLCMPLE(ns, cs, n, c) && MLCMPLE(n, c, ne, ce)) 1155 break; 1156 } 1157 pfa = pfa->st; 1158 if ((count = pfa->cc.h) < 0) 1159 return (1); 1160 } while (*p++ != 0); 1161 return (0); 1162 } 1163