1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 23 /* 24 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 25 * Use is subject to license terms. 26 */ 27 28 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 29 /* All Rights Reserved */ 30 31 #pragma ident "%Z%%M% %I% %E% SMI" 32 33 #include "awk.def" 34 #include "stdio.h" 35 #include "awk.h" 36 #include <stdlib.h> 37 38 39 extern NODE *op2(); 40 extern struct fa *cgotofn(); 41 #define MAXLIN 256 42 #define NCHARS 128 43 #define NSTATES 256 44 45 46 #define type(v) v->nobj 47 #define left(v) v->narg[0] 48 #define right(v) v->narg[1] 49 #define parent(v) v->nnext 50 51 52 #define LEAF case CCL: case NCCL: case CHAR: case DOT: 53 #define UNARY case FINAL: case STAR: case PLUS: case QUEST: 54 55 56 /* 57 * encoding in tree NODEs: 58 * leaf (CCL, NCCL, CHAR, DOT): left is index, 59 * right contains value or pointer to value 60 * unary (FINAL, STAR, PLUS, QUEST): left is child, right is null 61 * binary (CAT, OR): left and right are children 62 * parent contains pointer to parent 63 */ 64 65 66 struct fa { 67 union { 68 ccl_chars_t s; 69 int h; 70 } cc; 71 #define MLCMPLT(m1, l1, m2, l2) ((m1 != m2 &&\ 72 (int)m1 < (int)m2) ||\ 73 (m1 == m2 && (int)l1 < (int)l2)) 74 #define MLCMPLE(m1, l1, m2, l2) ((m1 != m2 &&\ 75 (int)m1 <= (int)m2) ||\ 76 (m1 == m2 && (int)l1 <= (int)l2)) 77 #define MLCMPGT(m1, l1, m2, l2) ((m1 != m2 &&\ 78 (int)m1 > (int)m2) ||\ 79 (m1 == m2 && (int)l1 > (int)l2)) 80 #define MAX_CODESET 3 81 struct fa *st; 82 }; 83 84 85 int *state[NSTATES]; 86 int *foll[MAXLIN]; 87 int setvec[MAXLIN]; 88 NODE *point[MAXLIN]; 89 90 91 int setcnt; 92 int line; 93 94 95 static int ccln_member(); 96 static int insert_table(); 97 static int delete_table(); 98 static void penter(NODE *p); 99 static void follow(NODE *v); 100 static void overflo(void); 101 static void cfoll(NODE *v); 102 static void freetr(NODE *p); 103 #ifdef DEBUG 104 #define ddump_table(t, s) dump_table(t, s) 105 #else 106 #define ddump_table(t, s) 107 #endif 108 109 struct fa * 110 makedfa(p) /* returns dfa for tree pointed to by p */ 111 NODE *p; 112 { 113 NODE *p1; 114 struct fa *fap; 115 p1 = op2(CAT, op2(STAR, op2(DOT, (NODE *) 0, 116 (NODE *) 0), (NODE *) 0), p); 117 /* put DOT STAR in front of reg. exp. */ 118 p1 = op2(FINAL, p1, (NODE *) 0); /* install FINAL NODE */ 119 120 121 line = 0; 122 penter(p1); /* enter parent pointers and leaf indices */ 123 point[line] = p1; /* FINAL NODE */ 124 setvec[0] = 1; /* for initial DOT STAR */ 125 cfoll(p1); /* set up follow sets */ 126 fap = cgotofn(); 127 freetr(p1); /* add this when alloc works */ 128 return (fap); 129 } 130 131 static void 132 penter(NODE *p) /* set up parent pointers and leaf indices */ 133 { 134 switch (type(p)) { 135 LEAF 136 left(p) = (NODE *)line; 137 point[line++] = p; 138 break; 139 UNARY 140 penter(left(p)); 141 parent(left(p)) = p; 142 break; 143 case CAT: 144 case OR: 145 penter(left(p)); 146 penter(right(p)); 147 parent(left(p)) = p; 148 parent(right(p)) = p; 149 break; 150 default: 151 error(FATAL, "unknown type %d in penter\n", type(p)); 152 break; 153 } 154 } 155 156 static void 157 freetr(NODE *p) /* free parse tree and follow sets */ 158 { 159 switch (type(p)) { 160 LEAF 161 xfree(foll[(int)left(p)]); 162 xfree(p); 163 break; 164 UNARY 165 freetr(left(p)); 166 xfree(p); 167 break; 168 case CAT: 169 case OR: 170 freetr(left(p)); 171 freetr(right(p)); 172 xfree(p); 173 break; 174 default: 175 error(FATAL, "unknown type %d in freetr", type(p)); 176 break; 177 } 178 } 179 ccl_chars_t * 180 cclenter(wchar_t *p) 181 { 182 int i, cn; 183 wchar_t c, pc; 184 wchar_t *op; 185 ccl_chars_t *new; 186 ccl_chars_t chars[MAXLIN]; 187 188 op = p; 189 i = 0; 190 while ((c = *p++) != 0) { 191 if (c == '-' && i > 0) { 192 if (*p != 0) { 193 /* 194 * If there are not in same code set, the 195 * class should be ignore (make two independent 196 * characters)! 197 */ 198 c = *p++; 199 cn = wcsetno(pc); 200 if (cn != wcsetno(c) || pc > c) 201 goto char_array; 202 i = insert_table(chars, i, cn, pc, cn, c); 203 continue; 204 } 205 } 206 char_array: 207 if (i >= MAXLIN) 208 overflo(); 209 cn = wcsetno(c); 210 i = insert_table(chars, i, cn, c, cn, c); 211 pc = c; 212 } 213 dprintf("cclenter: in = |%ws|, ", op, NULL, NULL); 214 xfree(op); 215 i = (i + 1) * sizeof (ccl_chars_t); 216 if ((new = (ccl_chars_t *)malloc(i)) == NULL) 217 error(FATAL, "out of space in cclenter on %s", op); 218 (void) memcpy((char *)new, (char *)chars, i); 219 ddump_table(chars, i / 4); 220 221 222 return (new); 223 } 224 225 static void 226 overflo(void) 227 { 228 error(FATAL, "regular expression too long\n"); 229 } 230 231 static void 232 cfoll(NODE *v) /* enter follow set of each leaf of vertex v into foll[leaf] */ 233 { 234 int i; 235 int prev; 236 int *add(); 237 238 239 switch (type(v)) { 240 LEAF 241 setcnt = 0; 242 for (i = 1; i <= line; i++) 243 setvec[i] = 0; 244 follow(v); 245 foll[(int)left(v)] = add(setcnt); 246 break; 247 UNARY 248 cfoll(left(v)); 249 break; 250 case CAT: 251 case OR: 252 cfoll(left(v)); 253 cfoll(right(v)); 254 break; 255 default: 256 error(FATAL, "unknown type %d in cfoll", type(v)); 257 } 258 } 259 260 int 261 first(NODE *p) /* collects initially active leaves of p into setvec */ 262 /* returns 0 or 1 depending on whether p matches empty string */ 263 { 264 int b; 265 266 267 switch (type(p)) { 268 LEAF 269 if (setvec[(int)left(p)] != 1) { 270 setvec[(int)left(p)] = 1; 271 setcnt++; 272 } 273 if (type(p) == CCL && 274 (*(ccl_chars_t *)right(p)).cc_cs == (wchar_t)0x0) 275 return (0); /* empty CCL */ 276 else return (1); 277 case FINAL: 278 case PLUS: 279 if (first(left(p)) == 0) 280 return (0); 281 return (1); 282 case STAR: 283 case QUEST: 284 first(left(p)); 285 return (0); 286 case CAT: 287 if (first(left(p)) == 0 && first(right(p)) == 0) 288 return (0); 289 return (1); 290 case OR: 291 b = first(right(p)); 292 if (first(left(p)) == 0 || b == 0) 293 return (0); 294 return (1); 295 } 296 error(FATAL, "unknown type %d in first\n", type(p)); 297 return (-1); 298 } 299 300 static void 301 follow(NODE *v) 302 /* collects leaves that can follow v into setvec */ 303 { 304 NODE *p; 305 306 307 if (type(v) == FINAL) 308 return; 309 p = parent(v); 310 switch (type(p)) { 311 case STAR: 312 case PLUS: first(v); 313 follow(p); 314 return; 315 316 317 case OR: 318 case QUEST: follow(p); 319 return; 320 321 322 case CAT: if (v == left(p)) { /* v is left child of p */ 323 if (first(right(p)) == 0) { 324 follow(p); 325 return; 326 } 327 } else /* v is right child */ 328 follow(p); 329 return; 330 case FINAL: if (setvec[line] != 1) { 331 setvec[line] = 1; 332 setcnt++; 333 } 334 return; 335 } 336 } 337 338 339 /* 340 * There are three type of functions for checking member ship. Because I have 341 * been changed structure of CCL tables. And some CCL tables end up with NULLs 342 * but someone has length and will includes NULLs in table as one of data. 343 * Please note, CCL table which has a length data and data will include NULLs, 344 * it only used within a this source file("b.c"). 345 */ 346 347 int /* is cs thru ce in s? */ 348 ccl_member(int ns, wchar_t cs, int ne, wchar_t ce, ccl_chars_t *s) 349 { 350 /* 351 * The specified range(cs, ce) must be beside the range between 352 * s->cc_start and s->cc_end to determine member. 353 */ 354 while (s->cc_cs || s->cc_ce) { 355 if (MLCMPLE(s->cc_ns, s->cc_cs, ns, cs) && 356 MLCMPLE(ne, ce, s->cc_ne, s->cc_ce)) 357 return (1); 358 s++; 359 } 360 return (0); 361 } 362 363 364 static int /* is cs thru ce in s? */ 365 ccln_member(int ns, wchar_t cs, int ne, wchar_t ce, ccl_chars_t *s, int n) 366 { 367 /* 368 * The specified range(cs, ce) must be beside the range between 369 * s->cc_start and s->cc_end to determine member. 370 */ 371 while (n-- > 0) { 372 if (MLCMPLE(s->cc_ns, s->cc_cs, ns, cs) && 373 MLCMPLE(ne, ce, s->cc_ne, s->cc_ce)) 374 return (1); 375 s++; 376 } 377 return (0); 378 } 379 380 381 int 382 member(wchar_t c, wchar_t *s) /* is c in s? */ 383 { 384 while (*s) 385 if (c == *s++) 386 return (1); 387 return (0); 388 } 389 390 int 391 notin(int **array, int n, int *prev) /* is setvec in array[0] thru array[n]? */ 392 { 393 int i, j; 394 int *ptr; 395 for (i = 0; i <= n; i++) { 396 ptr = array[i]; 397 if (*ptr == setcnt) { 398 for (j = 0; j < setcnt; j++) 399 if (setvec[*(++ptr)] != 1) goto nxt; 400 *prev = i; 401 return (0); 402 } 403 nxt: /* dummy */; 404 } 405 return (1); 406 } 407 408 409 int * 410 add(int n) 411 { /* remember setvec */ 412 int *ptr, *p; 413 int i; 414 if ((p = ptr = (int *)malloc((n+1)*sizeof (int))) == NULL) 415 overflo(); 416 *ptr = n; 417 dprintf("add(%d)\n", n, NULL, NULL); 418 for (i = 1; i <= line; i++) 419 if (setvec[i] == 1) { 420 *(++ptr) = i; 421 dprintf(" ptr = %o, *ptr = %d, i = %d\n", ptr, *ptr, i); 422 } 423 dprintf("\n", NULL, NULL, NULL); 424 return (p); 425 } 426 427 428 struct fa * 429 cgotofn() 430 { 431 int i, k; 432 int *ptr; 433 int ns, ne; 434 wchar_t cs, ce; 435 ccl_chars_t *p; 436 NODE *cp; 437 int j, n, s, ind, numtrans; 438 int finflg; 439 int curpos, num, prev; 440 struct fa *where[NSTATES]; 441 442 443 struct { 444 ccl_chars_t cc; 445 int n; 446 } fatab[257]; 447 struct fa *pfa; 448 449 450 char index[MAXLIN]; 451 char iposns[MAXLIN]; 452 int sposns[MAXLIN]; 453 int spmax, spinit; 454 ccl_chars_t symbol[NCHARS]; 455 ccl_chars_t isyms[NCHARS]; 456 ccl_chars_t ssyms[NCHARS]; 457 int ssmax, symax, ismax, ssinit; 458 459 460 wchar_t hat; 461 int hatcn; 462 463 464 for (i = 0; i <= line; i++) index[i] = iposns[i] = setvec[i] = 0; 465 isyms[0].cc_cs = isyms[0].cc_ce = (wchar_t)0x0; 466 for (i = 0; i < NCHARS; i++) 467 isyms[i] = symbol[i] = ssyms[i] = isyms[0]; 468 symax = 0; 469 setcnt = 0; 470 /* compute initial positions and symbols of state 0 */ 471 ismax = 0; 472 ssmax = 0; 473 ptr = state[0] = foll[0]; 474 spinit = *ptr; 475 hat = HAT; 476 hatcn = wcsetno(hat); 477 for (i = 0; i < spinit; i++) { 478 curpos = *(++ptr); 479 sposns[i] = curpos; 480 iposns[curpos] = 1; 481 cp = point[curpos]; 482 dprintf("i= %d, spinit = %d, curpos = %d\n", i, spinit, curpos); 483 switch (type(cp)) { 484 case CHAR: 485 k = (int)right(cp); 486 ns = wcsetno(k); 487 if (! ccln_member(ns, k, ns, k, 488 isyms, ismax)) { 489 ismax = insert_table(isyms, ismax, 490 ns, k, ns, k); 491 } 492 ssyms[ssmax].cc_ns = ns; 493 ssyms[ssmax].cc_cs = k; 494 ssyms[ssmax].cc_ne = ns; 495 ssyms[ssmax++].cc_ce = k; 496 break; 497 case DOT: 498 cs = WC_VERY_SMALL; 499 ns = 0; 500 ce = HAT - 1; 501 ne = hatcn; 502 if (! ccln_member(ns, cs, ne, ce, 503 isyms, ismax)) { 504 ismax = insert_table(isyms, ismax, 505 ns, cs, ne, ce); 506 } 507 ssyms[ssmax].cc_cs = cs; 508 ssyms[ssmax].cc_ns = ns; 509 ssyms[ssmax].cc_ce = ce; 510 ssyms[ssmax++].cc_ne = ne; 511 cs = HAT + 1; 512 ns = hatcn; 513 ce = WC_VERY_LARGE; 514 ne = MAX_CODESET; 515 if (! ccln_member(ns, cs, ne, ce, 516 isyms, ismax)) { 517 ismax = insert_table(isyms, ismax, 518 ns, cs, ne, ce); 519 } 520 ssyms[ssmax].cc_cs = cs; 521 ssyms[ssmax].cc_ns = ns; 522 ssyms[ssmax].cc_ce = ce; 523 ssyms[ssmax++].cc_ne = ne; 524 break; 525 case CCL: 526 cs = HAT; 527 ns = hatcn; 528 for (p = (ccl_chars_t *)right(cp); 529 p->cc_cs; p++) { 530 if ((p->cc_ns != ns ||\ 531 p->cc_cs != cs) &&\ 532 !ccln_member(p->cc_ns, p->cc_cs, 533 p->cc_ne, p->cc_ce, isyms, ismax)) { 534 ismax = insert_table(isyms, 535 ismax, p->cc_ns, p->cc_cs, p->cc_ne, p->cc_ce); 536 } 537 ssyms[ssmax++] = *p; 538 } 539 break; 540 case NCCL: 541 ns = 0; 542 cs = WC_VERY_SMALL; 543 for (p = (ccl_chars_t *)right(cp); 544 p->cc_cs; p++) { 545 if ((ns != hatcn || p->cc_cs != HAT) && 546 ! ccln_member(ns, cs, 547 p->cc_ns, p->cc_cs-1, 548 isyms, ismax)) { 549 ismax = insert_table(isyms, 550 ismax, 551 ns, cs, 552 p->cc_ns, 553 p->cc_cs-1); 554 } 555 ssyms[ssmax].cc_ns = ns; 556 ssyms[ssmax].cc_cs = cs; 557 ssyms[ssmax].cc_ne = p->cc_ns; 558 ssyms[ssmax++].cc_ce = p->cc_cs-1; 559 if (p->cc_ce == (wchar_t)0x0) { 560 ns = p->cc_ns; 561 cs = p->cc_cs + 1; 562 563 } else { 564 ns = p->cc_ne; 565 cs = p->cc_ce + 1; 566 } 567 } 568 if ((ns != hatcn || cs != HAT) && 569 ! ccln_member(ns, cs, 570 MAX_CODESET, WC_VERY_LARGE, 571 isyms, ismax)) { 572 ismax = insert_table(isyms, ismax, 573 ns, cs, MAX_CODESET, 574 WC_VERY_LARGE); 575 } 576 ssyms[ssmax].cc_ns = ns; 577 ssyms[ssmax].cc_cs = cs; 578 ssyms[ssmax].cc_ne = MAX_CODESET; 579 ssyms[ssmax++].cc_ce = WC_VERY_LARGE; 580 break; 581 } 582 } 583 ssinit = ssmax; 584 symax = 0; 585 n = 0; 586 for (s = 0; s <= n; s++) { 587 dprintf("s = %d\n", s, NULL, NULL); 588 ind = 0; 589 numtrans = 0; 590 finflg = 0; 591 if (*(state[s] + *state[s]) == line) { /* s final? */ 592 finflg = 1; 593 goto tenter; 594 } 595 spmax = spinit; 596 ssmax = ssinit; 597 ptr = state[s]; 598 num = *ptr; 599 for (i = 0; i < num; i++) { 600 curpos = *(++ptr); 601 if (iposns[curpos] != 1 && index[curpos] != 1) { 602 index[curpos] = 1; 603 sposns[spmax++] = curpos; 604 } 605 cp = point[curpos]; 606 switch (type(cp)) { 607 case CHAR: 608 k = (int)right(cp); 609 ns = wcsetno(k); 610 if (! ccln_member(ns, k, ns, k, 611 isyms, ismax) && 612 ! ccln_member(ns, k, ns, k, 613 symbol, symax)) { 614 symax = insert_table(symbol, 615 symax, 616 ns, k, 617 ns, k); 618 } 619 ssyms[ssmax].cc_ns = ns; 620 ssyms[ssmax].cc_cs = k; 621 ssyms[ssmax].cc_ne = ns; 622 ssyms[ssmax++].cc_ce = k; 623 break; 624 case DOT: 625 cs = WC_VERY_SMALL; 626 ns = 0; 627 ce = HAT - 1; 628 ne = hatcn; 629 if (! ccln_member(ns, cs, ne, ce, 630 isyms, ismax) && 631 ! ccln_member(ns, cs, ne, ce, 632 symbol, symax)) { 633 symax = insert_table(symbol, 634 symax, 635 ns, cs, 636 ne, ce); 637 } 638 ssyms[ssmax].cc_cs = cs; 639 ssyms[ssmax].cc_ns = ns; 640 ssyms[ssmax].cc_ce = ce; 641 ssyms[ssmax++].cc_ne = ne; 642 cs = HAT + 1; 643 ns = hatcn; 644 ce = WC_VERY_LARGE; 645 ne = MAX_CODESET; 646 if (! ccln_member(ns, cs, ne, ce, 647 isyms, ismax) && 648 ! ccln_member(ns, cs, ne, ce, 649 symbol, symax)) { 650 symax = insert_table(symbol, 651 symax, 652 ns, cs, 653 ne, ce); 654 } 655 ssyms[ssmax].cc_cs = cs; 656 ssyms[ssmax].cc_ns = ns; 657 ssyms[ssmax].cc_ce = ce; 658 ssyms[ssmax++].cc_ne = ne; 659 break; 660 case CCL: 661 cs = HAT; 662 ns = hatcn; 663 for (p = (ccl_chars_t *)right(cp); 664 p->cc_cs; p++) { 665 if ((p->cc_ns != ns || 666 p->cc_cs != cs) && 667 ! ccln_member(p->cc_ns, 668 p->cc_cs, p->cc_ne, 669 p->cc_ce, isyms, ismax) && 670 !ccln_member(p->cc_ns, p->cc_cs, 671 p->cc_ne, p->cc_ce, symbol, 672 symax)) { 673 symax = insert_table( 674 symbol, symax, p->cc_ns, 675 p->cc_cs, p->cc_ne, p->cc_ce); 676 } 677 ssyms[ssmax++] = *p; 678 } 679 break; 680 case NCCL: 681 ns = 0; 682 cs = WC_VERY_SMALL; 683 for (p = (ccl_chars_t *)right(cp); p->cc_cs; p++) { 684 if ((p->cc_ns != hatcn || p->cc_cs != HAT) && 685 ! ccln_member(ns, cs, p->cc_ns, 686 p->cc_cs-1, isyms, ismax) && 687 ! ccln_member(ns, cs, p->cc_ns, 688 p->cc_cs-1, symbol, symax)) { 689 symax = insert_table(symbol, 690 symax, ns, cs, p->cc_ns, p->cc_cs-1); 691 } 692 ssyms[ssmax].cc_ns = ns; 693 ssyms[ssmax].cc_cs = cs; 694 ssyms[ssmax].cc_ne = p->cc_ns; 695 ssyms[ssmax++].cc_ce 696 = p->cc_cs-1; 697 if (p->cc_ce == (wchar_t)0x0) { 698 ns = p->cc_ns; 699 cs = p->cc_cs + 1; 700 701 } else { 702 ns = p->cc_ne; 703 cs = p->cc_ce + 1; 704 } 705 } 706 if ((ns != hatcn || cs != HAT) && ! ccln_member(ns, cs, 707 MAX_CODESET, WC_VERY_LARGE, isyms, ismax) && 708 ! ccln_member(ns, cs, MAX_CODESET, 709 WC_VERY_LARGE, symbol, symax)) { 710 symax = insert_table(symbol, symax, ns, cs, 711 MAX_CODESET, 712 WC_VERY_LARGE); 713 } 714 ssyms[ssmax].cc_ns = ns; 715 ssyms[ssmax].cc_cs = cs; 716 ssyms[ssmax].cc_ne = MAX_CODESET; 717 ssyms[ssmax++].cc_ce = WC_VERY_LARGE; 718 break; 719 } 720 } 721 for (j = 0; j < ssmax; j++) { /* nextstate(s, ssyms[j]) */ 722 ns = ssyms[j].cc_ns; 723 cs = ssyms[j].cc_cs; 724 ne = ssyms[j].cc_ne; 725 ce = ssyms[j].cc_ce; 726 dprintf("j = %d, cs = %o, ce = %o\n", j, cs, ce); 727 symax = delete_table(symbol, symax, ns, cs, ne, ce); 728 setcnt = 0; 729 for (k = 0; k <= line; k++) setvec[k] = 0; 730 for (i = 0; i < spmax; i++) { 731 index[sposns[i]] = 0; 732 cp = point[sposns[i]]; 733 if ((k = type(cp)) != FINAL) { 734 if (k == CHAR && ns == ne && cs == ce && 735 cs == (int)right(cp) || 736 k == DOT || k == CCL && 737 ccl_member(ns, cs, ne, ce, 738 (ccl_chars_t *)right(cp)) || 739 k == NCCL && 740 !ccl_member(ns, cs, ne, ce, 741 (ccl_chars_t *)right(cp))) { 742 ptr = foll[sposns[i]]; 743 num = *ptr; 744 for (k = 0; k < num; k++) { 745 if (setvec[*(++ptr)] != 1 && 746 iposns[*ptr] != 1) { 747 setvec[*ptr] = 1; 748 setcnt++; 749 } 750 } 751 } 752 } 753 } /* end nextstate */ 754 if (notin(state, n, &prev)) { 755 if (n >= NSTATES - 1) { 756 printf("cgotofn: notin; state = %d, n = %d\n", state, n, NULL); 757 overflo(); 758 } 759 state[++n] = add(setcnt); 760 dprintf(" delta(%d,[%o,%o])", 761 s, cs, ce); 762 dprintf(" = %d, ind = %d\n", n, ind+1, NULL); 763 fatab[++ind].cc.cc_ns = ns; 764 fatab[ind].cc.cc_cs = cs; 765 fatab[ind].cc.cc_ne = ne; 766 fatab[ind].cc.cc_ce = ce; 767 fatab[ind].n = n; 768 numtrans++; 769 } else { 770 if (prev != 0) { 771 dprintf(" delta(%d,[%o,%o])", 772 s, cs, ce); 773 dprintf("= %d, ind = %d\n", 774 prev, ind+1, NULL); 775 fatab[++ind].cc.cc_ns = ns; 776 fatab[ind].cc.cc_cs = cs; 777 fatab[ind].cc.cc_ne = ne; 778 fatab[ind].cc.cc_ce = ce; 779 fatab[ind].n = prev; 780 numtrans++; 781 } 782 } 783 } 784 tenter: 785 if ((pfa = (struct fa *)malloc((numtrans + 1) 786 * sizeof (struct fa))) == NULL) 787 overflo(); 788 where[s] = pfa; 789 if (finflg) 790 pfa->cc.h = -1; /* s is a final state */ 791 else 792 pfa->cc.h = numtrans; 793 pfa->st = 0; 794 for (i = 1, pfa += 1; i <= numtrans; i++, pfa++) { 795 pfa->cc.s = fatab[i].cc; 796 pfa->st = (struct fa *)fatab[i].n; 797 } 798 } 799 for (i = 0; i <= n; i++) { 800 if (i != 0) /* state[0] is freed later in freetr() */ 801 xfree(state[i]); /* free state[i] */ 802 pfa = where[i]; 803 pfa->st = where[0]; 804 dprintf("state %d: (%o)\n", i, pfa, NULL); 805 dprintf(" numtrans = %d, default = %o\n", 806 pfa->cc.h, pfa->st, NULL); 807 for (k = 1; k <= pfa->cc.h; k++) { 808 (pfa+k)->st = where[(int)(pfa+k)->st]; 809 dprintf(" char = [%o,%o], nextstate = %o\n", 810 (pfa+k)->cc.s.cc_cs, (pfa+k)->cc.s.cc_ce, 811 (pfa+k)->st); 812 } 813 } 814 pfa = where[0]; 815 if ((num = pfa->cc.h) < 0) 816 return (where[0]); 817 for (pfa += num; num; num--, pfa--) 818 if (pfa->cc.s.cc_ns == hatcn && pfa->cc.s.cc_cs == HAT) { 819 return (pfa->st); 820 } 821 return (where[0]); 822 } 823 824 825 /* 826 * Insert CCL entry to CCL table with maintain optimized order. 827 */ 828 static int 829 insert_table(ccl_chars_t *table_base, int table_size, int ns, wchar_t cs, 830 int ne, wchar_t ce) 831 { 832 int i; 833 int tns, tne; 834 wchar_t tcs, tce; 835 ccl_chars_t *table; 836 ccl_chars_t *saved_table; 837 int saved_i; 838 839 840 841 842 dprintf("Inserting {%o, %o} to table %o\n", cs, ce, table_base); 843 /* 844 * Searching the table to find out where should put the new item. 845 */ 846 for (i = 0, table = table_base; i < table_size; i++, table++) { 847 tns = table->cc_ns; 848 tcs = table->cc_cs; 849 tne = table->cc_ne; 850 tce = table->cc_ce; 851 if (MLCMPLT(ne, ce, tns, (tcs - 1))) { 852 /* 853 * Quick! insert to font of current table entries. 854 */ 855 qinsert: 856 table_size++; 857 for (; i < table_size; i++, table++) { 858 tns = table->cc_ns; 859 tcs = table->cc_cs; 860 tne = table->cc_ne; 861 tce = table->cc_ce; 862 table->cc_ns = ns; 863 table->cc_cs = cs; 864 table->cc_ne = ne; 865 table->cc_ce = ce; 866 ns = tns; 867 cs = tcs; 868 ne = tne; 869 ce = tce; 870 } 871 goto add_null; 872 } else if (MLCMPLE(tns, (tcs - 1), ns, cs) && 873 MLCMPLE(ns, cs, tne, (tce + 1))) { 874 /* 875 * Starting point is within the current entry. 876 */ 877 if (MLCMPGT(tns, tcs, ns, cs)) { 878 table->cc_ns = ns; 879 table->cc_cs = cs; 880 } 881 if (MLCMPLE(ne, ce, tne, tce)) { 882 return (table_size); 883 } 884 goto combine; 885 } 886 } 887 888 889 /* 890 * Adding new one to end of table. 891 */ 892 table->cc_ns = ns; 893 table->cc_cs = cs; 894 table->cc_ne = ne; 895 table->cc_ce = ce; 896 897 898 table_size++; 899 goto add_null; 900 901 902 903 904 combine: 905 /* 906 * Check and try to combine the new entry with rest of entries. 907 */ 908 if ((i + 1) >= table_size) { 909 table->cc_ne = ne; 910 table->cc_ce = ce; 911 return (table_size); 912 } 913 914 915 saved_table = table++; 916 saved_i = i++; 917 918 919 /* 920 * Finding the spot where we should put the end point. 921 */ 922 for (; i < table_size; i++, table++) { 923 if (MLCMPLT(ne, ce, table->cc_ns, (table->cc_cs - 1))) { 924 break; 925 } else 926 if (MLCMPLE(table->cc_ns, (table->cc_cs - 1), ne, ce) && 927 MLCMPLE(ne, ce, table->cc_ne, (table->cc_ce + 1))) { 928 /* 929 * Tack with this table. 930 */ 931 if (MLCMPLT(ne, ce, table->cc_ne, table->cc_ce)) { 932 ne = table->cc_ne; 933 ce = table->cc_ce; 934 } 935 table++; 936 i++; 937 break; 938 } 939 } 940 941 942 saved_table->cc_ne = ne; 943 saved_table->cc_ce = ce; 944 saved_i = table_size - (i - saved_i - 1); 945 946 947 /* 948 * Moving the rest of entries. 949 */ 950 for (; i < table_size; i++, table++) 951 *(++saved_table) = *table; 952 table_size = saved_i; 953 954 955 add_null: 956 table_base[table_size].cc_cs = (wchar_t)0x0; 957 table_base[table_size].cc_ce = (wchar_t)0x0; 958 959 960 return (table_size); 961 } 962 963 964 965 966 static int 967 delete_table(ccl_chars_t *table_base, int table_size, int ns, wchar_t cs, 968 int ne, wchar_t ce) 969 { 970 int i; 971 int saved_i; 972 ccl_chars_t *table; 973 ccl_chars_t *saved_table; 974 int tns; 975 wchar_t tcs; 976 int tne; 977 wchar_t tce; 978 979 980 981 982 for (i = 0, table = table_base; i < table_size; i++, table++) { 983 tns = table->cc_ns; 984 tcs = table->cc_cs; 985 tne = table->cc_ne; 986 tce = table->cc_ce; 987 if (MLCMPLT(ne, ce, tns, tcs)) 988 return (table_size); 989 else if (MLCMPLT(ne, ce, tne, tce)) { 990 if (MLCMPLE(ns, cs, tns, tcs)) { 991 /* 992 * Shrink type 1. 993 */ 994 table->cc_ns = ne; 995 table->cc_cs = ce + 1; 996 return (table_size); 997 998 } else { 999 /* 1000 * Spliting !! 1001 */ 1002 table->cc_ns = ne; 1003 table->cc_cs = ce + 1; 1004 tne = ns; 1005 tce = cs - 1; 1006 table_size++; 1007 for (; i < table_size; i++, table++) { 1008 ns = table->cc_ns; 1009 cs = table->cc_cs; 1010 ne = table->cc_ne; 1011 ce = table->cc_ce; 1012 table->cc_ns = tns; 1013 table->cc_cs = tcs; 1014 table->cc_ne = tne; 1015 table->cc_ce = tce; 1016 tns = ns; 1017 tcs = cs; 1018 tne = ne; 1019 tce = ce; 1020 } 1021 return (table_size); 1022 } 1023 1024 } else if (MLCMPLE(ns, cs, tne, tce)) { 1025 if (MLCMPGT(ns, cs, tns, tcs)) { 1026 /* 1027 * Shrink current table(type 2). 1028 */ 1029 table->cc_ne = ns; 1030 table->cc_ce = cs - 1; 1031 table++; 1032 i++; 1033 } 1034 /* 1035 * Search for the end point. 1036 */ 1037 saved_i = i; 1038 saved_table = table; 1039 for (; i < table_size; i++, table++) { 1040 if (MLCMPLT(ne, ce, 1041 table->cc_ns, table->cc_cs)) { 1042 /* 1043 * Easy point, no shrinks! 1044 */ 1045 break; 1046 1047 } else if (MLCMPGT(table->cc_ne, table->cc_ce, 1048 ne, ce)) { 1049 /* 1050 * Shrinking... 1051 */ 1052 table->cc_ns = ne; 1053 table->cc_cs = ce + 1; 1054 break; 1055 } 1056 1057 1058 } 1059 /* 1060 * Moving(removing) backword. 1061 */ 1062 saved_i = table_size - (i - saved_i); 1063 for (; i < table_size; i++) 1064 *saved_table++ = *table++; 1065 return (saved_i); 1066 } 1067 } 1068 return (table_size); 1069 } 1070 1071 1072 #ifdef DEBUG 1073 dump_table(ccl_chars_t *table, int size) 1074 { 1075 int i; 1076 1077 1078 1079 1080 if (! dbg) 1081 return; 1082 1083 1084 printf("Duming table %o with size %d\n", table, size); 1085 size++; /* To watch out NULL */ 1086 for (i = 0; i < size; i++, table++) { 1087 printf("{%3o, %3o}, ", table->cc_cs, table->cc_ce); 1088 } 1089 printf("\n"); 1090 } 1091 #endif /* DEBUG */ 1092 1093 1094 1095 int 1096 match(struct fa *pfa, wchar_t *p) 1097 { 1098 int count; 1099 int n, ns, ne; 1100 wchar_t c, cs, ce; 1101 1102 1103 if (p == 0) 1104 return (0); 1105 if (pfa->cc.h == 1) { /* fast test for first character, if possible */ 1106 ns = (++pfa)->cc.s.cc_ns; 1107 cs = (pfa)->cc.s.cc_cs; 1108 ne = (pfa)->cc.s.cc_ne; 1109 ce = (pfa)->cc.s.cc_ce; 1110 do { 1111 c = *p; 1112 n = wcsetno(c); 1113 if (MLCMPLE(ns, cs, n, c) && 1114 MLCMPLE(n, c, ne, ce)) { 1115 p++; 1116 pfa = pfa->st; 1117 goto adv; 1118 } 1119 } while (*p++ != 0); 1120 return (0); 1121 } 1122 adv: if ((count = pfa->cc.h) < 0) 1123 return (1); 1124 do { 1125 c = *p; 1126 n = wcsetno(c); 1127 for (pfa += count; count; count--, pfa--) { 1128 ns = (pfa)->cc.s.cc_ns; 1129 cs = (pfa)->cc.s.cc_cs; 1130 ne = (pfa)->cc.s.cc_ne; 1131 ce = (pfa)->cc.s.cc_ce; 1132 if (MLCMPLE(ns, cs, n, c) && MLCMPLE(n, c, ne, ce)) 1133 break; 1134 } 1135 pfa = pfa->st; 1136 if ((count = pfa->cc.h) < 0) 1137 return (1); 1138 } while (*p++ != 0); 1139 return (0); 1140 } 1141