1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 23 /* 24 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 25 * Use is subject to license terms. 26 */ 27 28 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 29 /* All Rights Reserved */ 30 31 #include "awk.def" 32 #include "stdio.h" 33 #include "awk.h" 34 #include <stdlib.h> 35 36 37 extern NODE *op2(); 38 extern struct fa *cgotofn(); 39 #define MAXLIN 256 40 #define NCHARS 128 41 #define NSTATES 256 42 43 44 #define type(v) v->nobj 45 #define left(v) v->narg[0] 46 #define right(v) v->narg[1] 47 #define parent(v) v->nnext 48 49 50 #define LEAF case CCL: case NCCL: case CHAR: case DOT: 51 #define UNARY case FINAL: case STAR: case PLUS: case QUEST: 52 53 54 /* 55 * encoding in tree NODEs: 56 * leaf (CCL, NCCL, CHAR, DOT): left is index, 57 * right contains value or pointer to value 58 * unary (FINAL, STAR, PLUS, QUEST): left is child, right is null 59 * binary (CAT, OR): left and right are children 60 * parent contains pointer to parent 61 */ 62 63 64 struct fa { 65 union { 66 ccl_chars_t s; 67 int h; 68 } cc; 69 #define MLCMPLT(m1, l1, m2, l2) ((m1 != m2 &&\ 70 (int)m1 < (int)m2) ||\ 71 (m1 == m2 && (int)l1 < (int)l2)) 72 #define MLCMPLE(m1, l1, m2, l2) ((m1 != m2 &&\ 73 (int)m1 <= (int)m2) ||\ 74 (m1 == m2 && (int)l1 <= (int)l2)) 75 #define MLCMPGT(m1, l1, m2, l2) ((m1 != m2 &&\ 76 (int)m1 > (int)m2) ||\ 77 (m1 == m2 && (int)l1 > (int)l2)) 78 #define MAX_CODESET 3 79 struct fa *st; 80 }; 81 82 83 int *state[NSTATES]; 84 int *foll[MAXLIN]; 85 int setvec[MAXLIN]; 86 NODE *point[MAXLIN]; 87 88 89 int setcnt; 90 int line; 91 92 93 static int ccln_member(); 94 static int insert_table(); 95 static int delete_table(); 96 static void penter(NODE *p); 97 static void follow(NODE *v); 98 static void overflo(void); 99 static void cfoll(NODE *v); 100 static void freetr(NODE *p); 101 #ifdef DEBUG 102 #define ddump_table(t, s) dump_table(t, s) 103 #else 104 #define ddump_table(t, s) 105 #endif 106 107 struct fa * 108 makedfa(p) /* returns dfa for tree pointed to by p */ 109 NODE *p; 110 { 111 NODE *p1; 112 struct fa *fap; 113 p1 = op2(CAT, op2(STAR, op2(DOT, (NODE *) 0, 114 (NODE *) 0), (NODE *) 0), p); 115 /* put DOT STAR in front of reg. exp. */ 116 p1 = op2(FINAL, p1, (NODE *) 0); /* install FINAL NODE */ 117 118 119 line = 0; 120 penter(p1); /* enter parent pointers and leaf indices */ 121 point[line] = p1; /* FINAL NODE */ 122 setvec[0] = 1; /* for initial DOT STAR */ 123 cfoll(p1); /* set up follow sets */ 124 fap = cgotofn(); 125 freetr(p1); /* add this when alloc works */ 126 return (fap); 127 } 128 129 static void 130 penter(NODE *p) /* set up parent pointers and leaf indices */ 131 { 132 switch (type(p)) { 133 LEAF 134 left(p) = (NODE *)line; 135 point[line++] = p; 136 break; 137 UNARY 138 penter(left(p)); 139 parent(left(p)) = p; 140 break; 141 case CAT: 142 case OR: 143 penter(left(p)); 144 penter(right(p)); 145 parent(left(p)) = p; 146 parent(right(p)) = p; 147 break; 148 default: 149 error(FATAL, "unknown type %d in penter\n", type(p)); 150 break; 151 } 152 } 153 154 static void 155 freetr(NODE *p) /* free parse tree and follow sets */ 156 { 157 switch (type(p)) { 158 LEAF 159 xfree(foll[(int)left(p)]); 160 xfree(p); 161 break; 162 UNARY 163 freetr(left(p)); 164 xfree(p); 165 break; 166 case CAT: 167 case OR: 168 freetr(left(p)); 169 freetr(right(p)); 170 xfree(p); 171 break; 172 default: 173 error(FATAL, "unknown type %d in freetr", type(p)); 174 break; 175 } 176 } 177 ccl_chars_t * 178 cclenter(wchar_t *p) 179 { 180 int i, cn; 181 wchar_t c, pc; 182 wchar_t *op; 183 ccl_chars_t *new; 184 ccl_chars_t chars[MAXLIN]; 185 186 op = p; 187 i = 0; 188 while ((c = *p++) != 0) { 189 if (c == '-' && i > 0) { 190 if (*p != 0) { 191 /* 192 * If there are not in same code set, the 193 * class should be ignore (make two independent 194 * characters)! 195 */ 196 c = *p++; 197 cn = wcsetno(pc); 198 if (cn != wcsetno(c) || pc > c) 199 goto char_array; 200 i = insert_table(chars, i, cn, pc, cn, c); 201 continue; 202 } 203 } 204 char_array: 205 if (i >= MAXLIN) 206 overflo(); 207 cn = wcsetno(c); 208 i = insert_table(chars, i, cn, c, cn, c); 209 pc = c; 210 } 211 dprintf("cclenter: in = |%ws|, ", op, NULL, NULL); 212 xfree(op); 213 i = (i + 1) * sizeof (ccl_chars_t); 214 if ((new = (ccl_chars_t *)malloc(i)) == NULL) 215 error(FATAL, "out of space in cclenter on %s", op); 216 (void) memcpy((char *)new, (char *)chars, i); 217 ddump_table(chars, i / 4); 218 219 220 return (new); 221 } 222 223 static void 224 overflo(void) 225 { 226 error(FATAL, "regular expression too long\n"); 227 } 228 229 static void 230 cfoll(NODE *v) /* enter follow set of each leaf of vertex v into foll[leaf] */ 231 { 232 int i; 233 int prev; 234 int *add(); 235 236 237 switch (type(v)) { 238 LEAF 239 setcnt = 0; 240 for (i = 1; i <= line; i++) 241 setvec[i] = 0; 242 follow(v); 243 foll[(int)left(v)] = add(setcnt); 244 break; 245 UNARY 246 cfoll(left(v)); 247 break; 248 case CAT: 249 case OR: 250 cfoll(left(v)); 251 cfoll(right(v)); 252 break; 253 default: 254 error(FATAL, "unknown type %d in cfoll", type(v)); 255 } 256 } 257 258 int 259 first(NODE *p) /* collects initially active leaves of p into setvec */ 260 /* returns 0 or 1 depending on whether p matches empty string */ 261 { 262 int b; 263 264 265 switch (type(p)) { 266 LEAF 267 if (setvec[(int)left(p)] != 1) { 268 setvec[(int)left(p)] = 1; 269 setcnt++; 270 } 271 if (type(p) == CCL && 272 (*(ccl_chars_t *)right(p)).cc_cs == (wchar_t)0x0) 273 return (0); /* empty CCL */ 274 else return (1); 275 case FINAL: 276 case PLUS: 277 if (first(left(p)) == 0) 278 return (0); 279 return (1); 280 case STAR: 281 case QUEST: 282 first(left(p)); 283 return (0); 284 case CAT: 285 if (first(left(p)) == 0 && first(right(p)) == 0) 286 return (0); 287 return (1); 288 case OR: 289 b = first(right(p)); 290 if (first(left(p)) == 0 || b == 0) 291 return (0); 292 return (1); 293 } 294 error(FATAL, "unknown type %d in first\n", type(p)); 295 return (-1); 296 } 297 298 static void 299 follow(NODE *v) 300 /* collects leaves that can follow v into setvec */ 301 { 302 NODE *p; 303 304 305 if (type(v) == FINAL) 306 return; 307 p = parent(v); 308 switch (type(p)) { 309 case STAR: 310 case PLUS: first(v); 311 follow(p); 312 return; 313 314 315 case OR: 316 case QUEST: follow(p); 317 return; 318 319 320 case CAT: if (v == left(p)) { /* v is left child of p */ 321 if (first(right(p)) == 0) { 322 follow(p); 323 return; 324 } 325 } else /* v is right child */ 326 follow(p); 327 return; 328 case FINAL: if (setvec[line] != 1) { 329 setvec[line] = 1; 330 setcnt++; 331 } 332 return; 333 } 334 } 335 336 337 /* 338 * There are three type of functions for checking member ship. Because I have 339 * been changed structure of CCL tables. And some CCL tables end up with NULLs 340 * but someone has length and will includes NULLs in table as one of data. 341 * Please note, CCL table which has a length data and data will include NULLs, 342 * it only used within a this source file("b.c"). 343 */ 344 345 int /* is cs thru ce in s? */ 346 ccl_member(int ns, wchar_t cs, int ne, wchar_t ce, ccl_chars_t *s) 347 { 348 /* 349 * The specified range(cs, ce) must be beside the range between 350 * s->cc_start and s->cc_end to determine member. 351 */ 352 while (s->cc_cs || s->cc_ce) { 353 if (MLCMPLE(s->cc_ns, s->cc_cs, ns, cs) && 354 MLCMPLE(ne, ce, s->cc_ne, s->cc_ce)) 355 return (1); 356 s++; 357 } 358 return (0); 359 } 360 361 362 static int /* is cs thru ce in s? */ 363 ccln_member(int ns, wchar_t cs, int ne, wchar_t ce, ccl_chars_t *s, int n) 364 { 365 /* 366 * The specified range(cs, ce) must be beside the range between 367 * s->cc_start and s->cc_end to determine member. 368 */ 369 while (n-- > 0) { 370 if (MLCMPLE(s->cc_ns, s->cc_cs, ns, cs) && 371 MLCMPLE(ne, ce, s->cc_ne, s->cc_ce)) 372 return (1); 373 s++; 374 } 375 return (0); 376 } 377 378 379 int 380 member(wchar_t c, wchar_t *s) /* is c in s? */ 381 { 382 while (*s) 383 if (c == *s++) 384 return (1); 385 return (0); 386 } 387 388 int 389 notin(int **array, int n, int *prev) /* is setvec in array[0] thru array[n]? */ 390 { 391 int i, j; 392 int *ptr; 393 for (i = 0; i <= n; i++) { 394 ptr = array[i]; 395 if (*ptr == setcnt) { 396 for (j = 0; j < setcnt; j++) 397 if (setvec[*(++ptr)] != 1) goto nxt; 398 *prev = i; 399 return (0); 400 } 401 nxt: /* dummy */; 402 } 403 return (1); 404 } 405 406 407 int * 408 add(int n) 409 { /* remember setvec */ 410 int *ptr, *p; 411 int i; 412 if ((p = ptr = (int *)malloc((n+1)*sizeof (int))) == NULL) 413 overflo(); 414 *ptr = n; 415 dprintf("add(%d)\n", n, NULL, NULL); 416 for (i = 1; i <= line; i++) 417 if (setvec[i] == 1) { 418 *(++ptr) = i; 419 dprintf(" ptr = %o, *ptr = %d, i = %d\n", ptr, *ptr, i); 420 } 421 dprintf("\n", NULL, NULL, NULL); 422 return (p); 423 } 424 425 426 struct fa * 427 cgotofn() 428 { 429 int i, k; 430 int *ptr; 431 int ns, ne; 432 wchar_t cs, ce; 433 ccl_chars_t *p; 434 NODE *cp; 435 int j, n, s, ind, numtrans; 436 int finflg; 437 int curpos, num, prev; 438 struct fa *where[NSTATES]; 439 440 441 struct { 442 ccl_chars_t cc; 443 int n; 444 } fatab[257]; 445 struct fa *pfa; 446 447 448 char index[MAXLIN]; 449 char iposns[MAXLIN]; 450 int sposns[MAXLIN]; 451 int spmax, spinit; 452 ccl_chars_t symbol[NCHARS]; 453 ccl_chars_t isyms[NCHARS]; 454 ccl_chars_t ssyms[NCHARS]; 455 int ssmax, symax, ismax, ssinit; 456 457 458 wchar_t hat; 459 int hatcn; 460 461 462 for (i = 0; i <= line; i++) index[i] = iposns[i] = setvec[i] = 0; 463 isyms[0].cc_cs = isyms[0].cc_ce = (wchar_t)0x0; 464 for (i = 0; i < NCHARS; i++) 465 isyms[i] = symbol[i] = ssyms[i] = isyms[0]; 466 symax = 0; 467 setcnt = 0; 468 /* compute initial positions and symbols of state 0 */ 469 ismax = 0; 470 ssmax = 0; 471 ptr = state[0] = foll[0]; 472 spinit = *ptr; 473 hat = HAT; 474 hatcn = wcsetno(hat); 475 for (i = 0; i < spinit; i++) { 476 curpos = *(++ptr); 477 sposns[i] = curpos; 478 iposns[curpos] = 1; 479 cp = point[curpos]; 480 dprintf("i= %d, spinit = %d, curpos = %d\n", i, spinit, curpos); 481 switch (type(cp)) { 482 case CHAR: 483 k = (int)right(cp); 484 ns = wcsetno(k); 485 if (! ccln_member(ns, k, ns, k, 486 isyms, ismax)) { 487 ismax = insert_table(isyms, ismax, 488 ns, k, ns, k); 489 } 490 ssyms[ssmax].cc_ns = ns; 491 ssyms[ssmax].cc_cs = k; 492 ssyms[ssmax].cc_ne = ns; 493 ssyms[ssmax++].cc_ce = k; 494 break; 495 case DOT: 496 cs = WC_VERY_SMALL; 497 ns = 0; 498 ce = HAT - 1; 499 ne = hatcn; 500 if (! ccln_member(ns, cs, ne, ce, 501 isyms, ismax)) { 502 ismax = insert_table(isyms, ismax, 503 ns, cs, ne, ce); 504 } 505 ssyms[ssmax].cc_cs = cs; 506 ssyms[ssmax].cc_ns = ns; 507 ssyms[ssmax].cc_ce = ce; 508 ssyms[ssmax++].cc_ne = ne; 509 cs = HAT + 1; 510 ns = hatcn; 511 ce = WC_VERY_LARGE; 512 ne = MAX_CODESET; 513 if (! ccln_member(ns, cs, ne, ce, 514 isyms, ismax)) { 515 ismax = insert_table(isyms, ismax, 516 ns, cs, ne, ce); 517 } 518 ssyms[ssmax].cc_cs = cs; 519 ssyms[ssmax].cc_ns = ns; 520 ssyms[ssmax].cc_ce = ce; 521 ssyms[ssmax++].cc_ne = ne; 522 break; 523 case CCL: 524 cs = HAT; 525 ns = hatcn; 526 for (p = (ccl_chars_t *)right(cp); 527 p->cc_cs; p++) { 528 if ((p->cc_ns != ns ||\ 529 p->cc_cs != cs) &&\ 530 !ccln_member(p->cc_ns, p->cc_cs, 531 p->cc_ne, p->cc_ce, isyms, ismax)) { 532 ismax = insert_table(isyms, 533 ismax, p->cc_ns, p->cc_cs, p->cc_ne, p->cc_ce); 534 } 535 ssyms[ssmax++] = *p; 536 } 537 break; 538 case NCCL: 539 ns = 0; 540 cs = WC_VERY_SMALL; 541 for (p = (ccl_chars_t *)right(cp); 542 p->cc_cs; p++) { 543 if ((ns != hatcn || p->cc_cs != HAT) && 544 ! ccln_member(ns, cs, 545 p->cc_ns, p->cc_cs-1, 546 isyms, ismax)) { 547 ismax = insert_table(isyms, 548 ismax, 549 ns, cs, 550 p->cc_ns, 551 p->cc_cs-1); 552 } 553 ssyms[ssmax].cc_ns = ns; 554 ssyms[ssmax].cc_cs = cs; 555 ssyms[ssmax].cc_ne = p->cc_ns; 556 ssyms[ssmax++].cc_ce = p->cc_cs-1; 557 if (p->cc_ce == (wchar_t)0x0) { 558 ns = p->cc_ns; 559 cs = p->cc_cs + 1; 560 561 } else { 562 ns = p->cc_ne; 563 cs = p->cc_ce + 1; 564 } 565 } 566 if ((ns != hatcn || cs != HAT) && 567 ! ccln_member(ns, cs, 568 MAX_CODESET, WC_VERY_LARGE, 569 isyms, ismax)) { 570 ismax = insert_table(isyms, ismax, 571 ns, cs, MAX_CODESET, 572 WC_VERY_LARGE); 573 } 574 ssyms[ssmax].cc_ns = ns; 575 ssyms[ssmax].cc_cs = cs; 576 ssyms[ssmax].cc_ne = MAX_CODESET; 577 ssyms[ssmax++].cc_ce = WC_VERY_LARGE; 578 break; 579 } 580 } 581 ssinit = ssmax; 582 symax = 0; 583 n = 0; 584 for (s = 0; s <= n; s++) { 585 dprintf("s = %d\n", s, NULL, NULL); 586 ind = 0; 587 numtrans = 0; 588 finflg = 0; 589 if (*(state[s] + *state[s]) == line) { /* s final? */ 590 finflg = 1; 591 goto tenter; 592 } 593 spmax = spinit; 594 ssmax = ssinit; 595 ptr = state[s]; 596 num = *ptr; 597 for (i = 0; i < num; i++) { 598 curpos = *(++ptr); 599 if (iposns[curpos] != 1 && index[curpos] != 1) { 600 index[curpos] = 1; 601 sposns[spmax++] = curpos; 602 } 603 cp = point[curpos]; 604 switch (type(cp)) { 605 case CHAR: 606 k = (int)right(cp); 607 ns = wcsetno(k); 608 if (! ccln_member(ns, k, ns, k, 609 isyms, ismax) && 610 ! ccln_member(ns, k, ns, k, 611 symbol, symax)) { 612 symax = insert_table(symbol, 613 symax, 614 ns, k, 615 ns, k); 616 } 617 ssyms[ssmax].cc_ns = ns; 618 ssyms[ssmax].cc_cs = k; 619 ssyms[ssmax].cc_ne = ns; 620 ssyms[ssmax++].cc_ce = k; 621 break; 622 case DOT: 623 cs = WC_VERY_SMALL; 624 ns = 0; 625 ce = HAT - 1; 626 ne = hatcn; 627 if (! ccln_member(ns, cs, ne, ce, 628 isyms, ismax) && 629 ! ccln_member(ns, cs, ne, ce, 630 symbol, symax)) { 631 symax = insert_table(symbol, 632 symax, 633 ns, cs, 634 ne, ce); 635 } 636 ssyms[ssmax].cc_cs = cs; 637 ssyms[ssmax].cc_ns = ns; 638 ssyms[ssmax].cc_ce = ce; 639 ssyms[ssmax++].cc_ne = ne; 640 cs = HAT + 1; 641 ns = hatcn; 642 ce = WC_VERY_LARGE; 643 ne = MAX_CODESET; 644 if (! ccln_member(ns, cs, ne, ce, 645 isyms, ismax) && 646 ! ccln_member(ns, cs, ne, ce, 647 symbol, symax)) { 648 symax = insert_table(symbol, 649 symax, 650 ns, cs, 651 ne, ce); 652 } 653 ssyms[ssmax].cc_cs = cs; 654 ssyms[ssmax].cc_ns = ns; 655 ssyms[ssmax].cc_ce = ce; 656 ssyms[ssmax++].cc_ne = ne; 657 break; 658 case CCL: 659 cs = HAT; 660 ns = hatcn; 661 for (p = (ccl_chars_t *)right(cp); 662 p->cc_cs; p++) { 663 if ((p->cc_ns != ns || 664 p->cc_cs != cs) && 665 ! ccln_member(p->cc_ns, 666 p->cc_cs, p->cc_ne, 667 p->cc_ce, isyms, ismax) && 668 !ccln_member(p->cc_ns, p->cc_cs, 669 p->cc_ne, p->cc_ce, symbol, 670 symax)) { 671 symax = insert_table( 672 symbol, symax, p->cc_ns, 673 p->cc_cs, p->cc_ne, p->cc_ce); 674 } 675 ssyms[ssmax++] = *p; 676 } 677 break; 678 case NCCL: 679 ns = 0; 680 cs = WC_VERY_SMALL; 681 for (p = (ccl_chars_t *)right(cp); p->cc_cs; p++) { 682 if ((p->cc_ns != hatcn || p->cc_cs != HAT) && 683 ! ccln_member(ns, cs, p->cc_ns, 684 p->cc_cs-1, isyms, ismax) && 685 ! ccln_member(ns, cs, p->cc_ns, 686 p->cc_cs-1, symbol, symax)) { 687 symax = insert_table(symbol, 688 symax, ns, cs, p->cc_ns, p->cc_cs-1); 689 } 690 ssyms[ssmax].cc_ns = ns; 691 ssyms[ssmax].cc_cs = cs; 692 ssyms[ssmax].cc_ne = p->cc_ns; 693 ssyms[ssmax++].cc_ce 694 = p->cc_cs-1; 695 if (p->cc_ce == (wchar_t)0x0) { 696 ns = p->cc_ns; 697 cs = p->cc_cs + 1; 698 699 } else { 700 ns = p->cc_ne; 701 cs = p->cc_ce + 1; 702 } 703 } 704 if ((ns != hatcn || cs != HAT) && ! ccln_member(ns, cs, 705 MAX_CODESET, WC_VERY_LARGE, isyms, ismax) && 706 ! ccln_member(ns, cs, MAX_CODESET, 707 WC_VERY_LARGE, symbol, symax)) { 708 symax = insert_table(symbol, symax, ns, cs, 709 MAX_CODESET, 710 WC_VERY_LARGE); 711 } 712 ssyms[ssmax].cc_ns = ns; 713 ssyms[ssmax].cc_cs = cs; 714 ssyms[ssmax].cc_ne = MAX_CODESET; 715 ssyms[ssmax++].cc_ce = WC_VERY_LARGE; 716 break; 717 } 718 } 719 for (j = 0; j < ssmax; j++) { /* nextstate(s, ssyms[j]) */ 720 ns = ssyms[j].cc_ns; 721 cs = ssyms[j].cc_cs; 722 ne = ssyms[j].cc_ne; 723 ce = ssyms[j].cc_ce; 724 dprintf("j = %d, cs = %o, ce = %o\n", j, cs, ce); 725 symax = delete_table(symbol, symax, ns, cs, ne, ce); 726 setcnt = 0; 727 for (k = 0; k <= line; k++) setvec[k] = 0; 728 for (i = 0; i < spmax; i++) { 729 index[sposns[i]] = 0; 730 cp = point[sposns[i]]; 731 if ((k = type(cp)) != FINAL) { 732 if (k == CHAR && ns == ne && cs == ce && 733 cs == (int)right(cp) || 734 k == DOT || k == CCL && 735 ccl_member(ns, cs, ne, ce, 736 (ccl_chars_t *)right(cp)) || 737 k == NCCL && 738 !ccl_member(ns, cs, ne, ce, 739 (ccl_chars_t *)right(cp))) { 740 ptr = foll[sposns[i]]; 741 num = *ptr; 742 for (k = 0; k < num; k++) { 743 if (setvec[*(++ptr)] != 1 && 744 iposns[*ptr] != 1) { 745 setvec[*ptr] = 1; 746 setcnt++; 747 } 748 } 749 } 750 } 751 } /* end nextstate */ 752 if (notin(state, n, &prev)) { 753 if (n >= NSTATES - 1) { 754 printf("cgotofn: notin; state = %d, n = %d\n", state, n, NULL); 755 overflo(); 756 } 757 state[++n] = add(setcnt); 758 dprintf(" delta(%d,[%o,%o])", 759 s, cs, ce); 760 dprintf(" = %d, ind = %d\n", n, ind+1, NULL); 761 fatab[++ind].cc.cc_ns = ns; 762 fatab[ind].cc.cc_cs = cs; 763 fatab[ind].cc.cc_ne = ne; 764 fatab[ind].cc.cc_ce = ce; 765 fatab[ind].n = n; 766 numtrans++; 767 } else { 768 if (prev != 0) { 769 dprintf(" delta(%d,[%o,%o])", 770 s, cs, ce); 771 dprintf("= %d, ind = %d\n", 772 prev, ind+1, NULL); 773 fatab[++ind].cc.cc_ns = ns; 774 fatab[ind].cc.cc_cs = cs; 775 fatab[ind].cc.cc_ne = ne; 776 fatab[ind].cc.cc_ce = ce; 777 fatab[ind].n = prev; 778 numtrans++; 779 } 780 } 781 } 782 tenter: 783 if ((pfa = (struct fa *)malloc((numtrans + 1) 784 * sizeof (struct fa))) == NULL) 785 overflo(); 786 where[s] = pfa; 787 if (finflg) 788 pfa->cc.h = -1; /* s is a final state */ 789 else 790 pfa->cc.h = numtrans; 791 pfa->st = 0; 792 for (i = 1, pfa += 1; i <= numtrans; i++, pfa++) { 793 pfa->cc.s = fatab[i].cc; 794 pfa->st = (struct fa *)fatab[i].n; 795 } 796 } 797 for (i = 0; i <= n; i++) { 798 if (i != 0) /* state[0] is freed later in freetr() */ 799 xfree(state[i]); /* free state[i] */ 800 pfa = where[i]; 801 pfa->st = where[0]; 802 dprintf("state %d: (%o)\n", i, pfa, NULL); 803 dprintf(" numtrans = %d, default = %o\n", 804 pfa->cc.h, pfa->st, NULL); 805 for (k = 1; k <= pfa->cc.h; k++) { 806 (pfa+k)->st = where[(int)(pfa+k)->st]; 807 dprintf(" char = [%o,%o], nextstate = %o\n", 808 (pfa+k)->cc.s.cc_cs, (pfa+k)->cc.s.cc_ce, 809 (pfa+k)->st); 810 } 811 } 812 pfa = where[0]; 813 if ((num = pfa->cc.h) < 0) 814 return (where[0]); 815 for (pfa += num; num; num--, pfa--) 816 if (pfa->cc.s.cc_ns == hatcn && pfa->cc.s.cc_cs == HAT) { 817 return (pfa->st); 818 } 819 return (where[0]); 820 } 821 822 823 /* 824 * Insert CCL entry to CCL table with maintain optimized order. 825 */ 826 static int 827 insert_table(ccl_chars_t *table_base, int table_size, int ns, wchar_t cs, 828 int ne, wchar_t ce) 829 { 830 int i; 831 int tns, tne; 832 wchar_t tcs, tce; 833 ccl_chars_t *table; 834 ccl_chars_t *saved_table; 835 int saved_i; 836 837 838 839 840 dprintf("Inserting {%o, %o} to table %o\n", cs, ce, table_base); 841 /* 842 * Searching the table to find out where should put the new item. 843 */ 844 for (i = 0, table = table_base; i < table_size; i++, table++) { 845 tns = table->cc_ns; 846 tcs = table->cc_cs; 847 tne = table->cc_ne; 848 tce = table->cc_ce; 849 if (MLCMPLT(ne, ce, tns, (tcs - 1))) { 850 /* 851 * Quick! insert to font of current table entries. 852 */ 853 qinsert: 854 table_size++; 855 for (; i < table_size; i++, table++) { 856 tns = table->cc_ns; 857 tcs = table->cc_cs; 858 tne = table->cc_ne; 859 tce = table->cc_ce; 860 table->cc_ns = ns; 861 table->cc_cs = cs; 862 table->cc_ne = ne; 863 table->cc_ce = ce; 864 ns = tns; 865 cs = tcs; 866 ne = tne; 867 ce = tce; 868 } 869 goto add_null; 870 } else if (MLCMPLE(tns, (tcs - 1), ns, cs) && 871 MLCMPLE(ns, cs, tne, (tce + 1))) { 872 /* 873 * Starting point is within the current entry. 874 */ 875 if (MLCMPGT(tns, tcs, ns, cs)) { 876 table->cc_ns = ns; 877 table->cc_cs = cs; 878 } 879 if (MLCMPLE(ne, ce, tne, tce)) { 880 return (table_size); 881 } 882 goto combine; 883 } 884 } 885 886 887 /* 888 * Adding new one to end of table. 889 */ 890 table->cc_ns = ns; 891 table->cc_cs = cs; 892 table->cc_ne = ne; 893 table->cc_ce = ce; 894 895 896 table_size++; 897 goto add_null; 898 899 900 901 902 combine: 903 /* 904 * Check and try to combine the new entry with rest of entries. 905 */ 906 if ((i + 1) >= table_size) { 907 table->cc_ne = ne; 908 table->cc_ce = ce; 909 return (table_size); 910 } 911 912 913 saved_table = table++; 914 saved_i = i++; 915 916 917 /* 918 * Finding the spot where we should put the end point. 919 */ 920 for (; i < table_size; i++, table++) { 921 if (MLCMPLT(ne, ce, table->cc_ns, (table->cc_cs - 1))) { 922 break; 923 } else 924 if (MLCMPLE(table->cc_ns, (table->cc_cs - 1), ne, ce) && 925 MLCMPLE(ne, ce, table->cc_ne, (table->cc_ce + 1))) { 926 /* 927 * Tack with this table. 928 */ 929 if (MLCMPLT(ne, ce, table->cc_ne, table->cc_ce)) { 930 ne = table->cc_ne; 931 ce = table->cc_ce; 932 } 933 table++; 934 i++; 935 break; 936 } 937 } 938 939 940 saved_table->cc_ne = ne; 941 saved_table->cc_ce = ce; 942 saved_i = table_size - (i - saved_i - 1); 943 944 945 /* 946 * Moving the rest of entries. 947 */ 948 for (; i < table_size; i++, table++) 949 *(++saved_table) = *table; 950 table_size = saved_i; 951 952 953 add_null: 954 table_base[table_size].cc_cs = (wchar_t)0x0; 955 table_base[table_size].cc_ce = (wchar_t)0x0; 956 957 958 return (table_size); 959 } 960 961 962 963 964 static int 965 delete_table(ccl_chars_t *table_base, int table_size, int ns, wchar_t cs, 966 int ne, wchar_t ce) 967 { 968 int i; 969 int saved_i; 970 ccl_chars_t *table; 971 ccl_chars_t *saved_table; 972 int tns; 973 wchar_t tcs; 974 int tne; 975 wchar_t tce; 976 977 978 979 980 for (i = 0, table = table_base; i < table_size; i++, table++) { 981 tns = table->cc_ns; 982 tcs = table->cc_cs; 983 tne = table->cc_ne; 984 tce = table->cc_ce; 985 if (MLCMPLT(ne, ce, tns, tcs)) 986 return (table_size); 987 else if (MLCMPLT(ne, ce, tne, tce)) { 988 if (MLCMPLE(ns, cs, tns, tcs)) { 989 /* 990 * Shrink type 1. 991 */ 992 table->cc_ns = ne; 993 table->cc_cs = ce + 1; 994 return (table_size); 995 996 } else { 997 /* 998 * Spliting !! 999 */ 1000 table->cc_ns = ne; 1001 table->cc_cs = ce + 1; 1002 tne = ns; 1003 tce = cs - 1; 1004 table_size++; 1005 for (; i < table_size; i++, table++) { 1006 ns = table->cc_ns; 1007 cs = table->cc_cs; 1008 ne = table->cc_ne; 1009 ce = table->cc_ce; 1010 table->cc_ns = tns; 1011 table->cc_cs = tcs; 1012 table->cc_ne = tne; 1013 table->cc_ce = tce; 1014 tns = ns; 1015 tcs = cs; 1016 tne = ne; 1017 tce = ce; 1018 } 1019 return (table_size); 1020 } 1021 1022 } else if (MLCMPLE(ns, cs, tne, tce)) { 1023 if (MLCMPGT(ns, cs, tns, tcs)) { 1024 /* 1025 * Shrink current table(type 2). 1026 */ 1027 table->cc_ne = ns; 1028 table->cc_ce = cs - 1; 1029 table++; 1030 i++; 1031 } 1032 /* 1033 * Search for the end point. 1034 */ 1035 saved_i = i; 1036 saved_table = table; 1037 for (; i < table_size; i++, table++) { 1038 if (MLCMPLT(ne, ce, 1039 table->cc_ns, table->cc_cs)) { 1040 /* 1041 * Easy point, no shrinks! 1042 */ 1043 break; 1044 1045 } else if (MLCMPGT(table->cc_ne, table->cc_ce, 1046 ne, ce)) { 1047 /* 1048 * Shrinking... 1049 */ 1050 table->cc_ns = ne; 1051 table->cc_cs = ce + 1; 1052 break; 1053 } 1054 1055 1056 } 1057 /* 1058 * Moving(removing) backword. 1059 */ 1060 saved_i = table_size - (i - saved_i); 1061 for (; i < table_size; i++) 1062 *saved_table++ = *table++; 1063 return (saved_i); 1064 } 1065 } 1066 return (table_size); 1067 } 1068 1069 1070 #ifdef DEBUG 1071 dump_table(ccl_chars_t *table, int size) 1072 { 1073 int i; 1074 1075 1076 1077 1078 if (! dbg) 1079 return; 1080 1081 1082 printf("Duming table %o with size %d\n", table, size); 1083 size++; /* To watch out NULL */ 1084 for (i = 0; i < size; i++, table++) { 1085 printf("{%3o, %3o}, ", table->cc_cs, table->cc_ce); 1086 } 1087 printf("\n"); 1088 } 1089 #endif /* DEBUG */ 1090 1091 1092 1093 int 1094 match(struct fa *pfa, wchar_t *p) 1095 { 1096 int count; 1097 int n, ns, ne; 1098 wchar_t c, cs, ce; 1099 1100 1101 if (p == 0) 1102 return (0); 1103 if (pfa->cc.h == 1) { /* fast test for first character, if possible */ 1104 ns = (++pfa)->cc.s.cc_ns; 1105 cs = (pfa)->cc.s.cc_cs; 1106 ne = (pfa)->cc.s.cc_ne; 1107 ce = (pfa)->cc.s.cc_ce; 1108 do { 1109 c = *p; 1110 n = wcsetno(c); 1111 if (MLCMPLE(ns, cs, n, c) && 1112 MLCMPLE(n, c, ne, ce)) { 1113 p++; 1114 pfa = pfa->st; 1115 goto adv; 1116 } 1117 } while (*p++ != 0); 1118 return (0); 1119 } 1120 adv: if ((count = pfa->cc.h) < 0) 1121 return (1); 1122 do { 1123 c = *p; 1124 n = wcsetno(c); 1125 for (pfa += count; count; count--, pfa--) { 1126 ns = (pfa)->cc.s.cc_ns; 1127 cs = (pfa)->cc.s.cc_cs; 1128 ne = (pfa)->cc.s.cc_ne; 1129 ce = (pfa)->cc.s.cc_ce; 1130 if (MLCMPLE(ns, cs, n, c) && MLCMPLE(n, c, ne, ce)) 1131 break; 1132 } 1133 pfa = pfa->st; 1134 if ((count = pfa->cc.h) < 0) 1135 return (1); 1136 } while (*p++ != 0); 1137 return (0); 1138 } 1139