1 /**************************************************************** 2 Copyright (C) Lucent Technologies 1997 3 All Rights Reserved 4 5 Permission to use, copy, modify, and distribute this software and 6 its documentation for any purpose and without fee is hereby 7 granted, provided that the above copyright notice appear in all 8 copies and that both that the copyright notice and this 9 permission notice and warranty disclaimer appear in supporting 10 documentation, and that the name Lucent Technologies or any of 11 its entities not be used in advertising or publicity pertaining 12 to distribution of the software without specific, written prior 13 permission. 14 15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 22 THIS SOFTWARE. 23 ****************************************************************/ 24 25 /* lasciate ogne speranza, voi ch'entrate. */ 26 27 #define DEBUG 28 29 #include <ctype.h> 30 #include <stdio.h> 31 #include <string.h> 32 #include <stdlib.h> 33 #include "awk.h" 34 #include "ytab.h" 35 36 #define HAT (NCHARS+2) /* matches ^ in regular expr */ 37 /* NCHARS is 2**n */ 38 #define MAXLIN 22 39 40 #define type(v) (v)->nobj /* badly overloaded here */ 41 #define info(v) (v)->ntype /* badly overloaded here */ 42 #define left(v) (v)->narg[0] 43 #define right(v) (v)->narg[1] 44 #define parent(v) (v)->nnext 45 46 #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL: 47 #define UNARY case STAR: case PLUS: case QUEST: 48 49 /* encoding in tree Nodes: 50 leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL): 51 left is index, right contains value or pointer to value 52 unary (STAR, PLUS, QUEST): left is child, right is null 53 binary (CAT, OR): left and right are children 54 parent contains pointer to parent 55 */ 56 57 58 int *setvec; 59 int *tmpset; 60 int maxsetvec = 0; 61 62 int rtok; /* next token in current re */ 63 int rlxval; 64 static uschar *rlxstr; 65 static uschar *prestr; /* current position in current re */ 66 static uschar *lastre; /* origin of last re */ 67 68 static int setcnt; 69 static int poscnt; 70 71 char *patbeg; 72 int patlen; 73 74 #define NFA 20 /* cache this many dynamic fa's */ 75 fa *fatab[NFA]; 76 int nfatab = 0; /* entries in fatab */ 77 78 fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */ 79 { 80 int i, use, nuse; 81 fa *pfa; 82 static int now = 1; 83 84 if (setvec == 0) { /* first time through any RE */ 85 maxsetvec = MAXLIN; 86 setvec = (int *) malloc(maxsetvec * sizeof(int)); 87 tmpset = (int *) malloc(maxsetvec * sizeof(int)); 88 if (setvec == 0 || tmpset == 0) 89 overflo("out of space initializing makedfa"); 90 } 91 92 if (compile_time) /* a constant for sure */ 93 return mkdfa(s, anchor); 94 for (i = 0; i < nfatab; i++) /* is it there already? */ 95 if (fatab[i]->anchor == anchor 96 && strcmp((const char *) fatab[i]->restr, s) == 0) { 97 fatab[i]->use = now++; 98 return fatab[i]; 99 } 100 pfa = mkdfa(s, anchor); 101 if (nfatab < NFA) { /* room for another */ 102 fatab[nfatab] = pfa; 103 fatab[nfatab]->use = now++; 104 nfatab++; 105 return pfa; 106 } 107 use = fatab[0]->use; /* replace least-recently used */ 108 nuse = 0; 109 for (i = 1; i < nfatab; i++) 110 if (fatab[i]->use < use) { 111 use = fatab[i]->use; 112 nuse = i; 113 } 114 freefa(fatab[nuse]); 115 fatab[nuse] = pfa; 116 pfa->use = now++; 117 return pfa; 118 } 119 120 fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */ 121 /* anchor = 1 for anchored matches, else 0 */ 122 { 123 Node *p, *p1; 124 fa *f; 125 126 p = reparse(s); 127 p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p); 128 /* put ALL STAR in front of reg. exp. */ 129 p1 = op2(CAT, p1, op2(FINAL, NIL, NIL)); 130 /* put FINAL after reg. exp. */ 131 132 poscnt = 0; 133 penter(p1); /* enter parent pointers and leaf indices */ 134 if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL) 135 overflo("out of space for fa"); 136 f->accept = poscnt-1; /* penter has computed number of positions in re */ 137 cfoll(f, p1); /* set up follow sets */ 138 freetr(p1); 139 if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL) 140 overflo("out of space in makedfa"); 141 if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL) 142 overflo("out of space in makedfa"); 143 *f->posns[1] = 0; 144 f->initstat = makeinit(f, anchor); 145 f->anchor = anchor; 146 f->restr = (uschar *) tostring(s); 147 return f; 148 } 149 150 int makeinit(fa *f, int anchor) 151 { 152 int i, k; 153 154 f->curstat = 2; 155 f->out[2] = 0; 156 f->reset = 0; 157 k = *(f->re[0].lfollow); 158 xfree(f->posns[2]); 159 if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) 160 overflo("out of space in makeinit"); 161 for (i=0; i <= k; i++) { 162 (f->posns[2])[i] = (f->re[0].lfollow)[i]; 163 } 164 if ((f->posns[2])[1] == f->accept) 165 f->out[2] = 1; 166 for (i=0; i < NCHARS; i++) 167 f->gototab[2][i] = 0; 168 f->curstat = cgoto(f, 2, HAT); 169 if (anchor) { 170 *f->posns[2] = k-1; /* leave out position 0 */ 171 for (i=0; i < k; i++) { 172 (f->posns[0])[i] = (f->posns[2])[i]; 173 } 174 175 f->out[0] = f->out[2]; 176 if (f->curstat != 2) 177 --(*f->posns[f->curstat]); 178 } 179 return f->curstat; 180 } 181 182 void penter(Node *p) /* set up parent pointers and leaf indices */ 183 { 184 switch (type(p)) { 185 LEAF 186 info(p) = poscnt; 187 poscnt++; 188 break; 189 UNARY 190 penter(left(p)); 191 parent(left(p)) = p; 192 break; 193 case CAT: 194 case OR: 195 penter(left(p)); 196 penter(right(p)); 197 parent(left(p)) = p; 198 parent(right(p)) = p; 199 break; 200 default: /* can't happen */ 201 FATAL("can't happen: unknown type %d in penter", type(p)); 202 break; 203 } 204 } 205 206 void freetr(Node *p) /* free parse tree */ 207 { 208 switch (type(p)) { 209 LEAF 210 xfree(p); 211 break; 212 UNARY 213 freetr(left(p)); 214 xfree(p); 215 break; 216 case CAT: 217 case OR: 218 freetr(left(p)); 219 freetr(right(p)); 220 xfree(p); 221 break; 222 default: /* can't happen */ 223 FATAL("can't happen: unknown type %d in freetr", type(p)); 224 break; 225 } 226 } 227 228 /* in the parsing of regular expressions, metacharacters like . have */ 229 /* to be seen literally; \056 is not a metacharacter. */ 230 231 int hexstr(char **pp) /* find and eval hex string at pp, return new p */ 232 { /* only pick up one 8-bit byte (2 chars) */ 233 uschar *p; 234 int n = 0; 235 int i; 236 237 for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) { 238 if (isdigit(*p)) 239 n = 16 * n + *p - '0'; 240 else if (*p >= 'a' && *p <= 'f') 241 n = 16 * n + *p - 'a' + 10; 242 else if (*p >= 'A' && *p <= 'F') 243 n = 16 * n + *p - 'A' + 10; 244 } 245 *pp = (char *) p; 246 return n; 247 } 248 249 #define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */ 250 251 int quoted(char **pp) /* pick up next thing after a \\ */ 252 /* and increment *pp */ 253 { 254 char *p = *pp; 255 int c; 256 257 if ((c = *p++) == 't') 258 c = '\t'; 259 else if (c == 'n') 260 c = '\n'; 261 else if (c == 'f') 262 c = '\f'; 263 else if (c == 'r') 264 c = '\r'; 265 else if (c == 'b') 266 c = '\b'; 267 else if (c == '\\') 268 c = '\\'; 269 else if (c == 'x') { /* hexadecimal goo follows */ 270 c = hexstr(&p); /* this adds a null if number is invalid */ 271 } else if (isoctdigit(c)) { /* \d \dd \ddd */ 272 int n = c - '0'; 273 if (isoctdigit(*p)) { 274 n = 8 * n + *p++ - '0'; 275 if (isoctdigit(*p)) 276 n = 8 * n + *p++ - '0'; 277 } 278 c = n; 279 } /* else */ 280 /* c = c; */ 281 *pp = p; 282 return c; 283 } 284 285 char *cclenter(const char *argp) /* add a character class */ 286 { 287 int i, c, c2; 288 uschar *p = (uschar *) argp; 289 uschar *op, *bp; 290 static uschar *buf = 0; 291 static int bufsz = 100; 292 293 op = p; 294 if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL) 295 FATAL("out of space for character class [%.10s...] 1", p); 296 bp = buf; 297 for (i = 0; (c = *p++) != 0; ) { 298 if (c == '\\') { 299 c = quoted((char **) &p); 300 } else if (c == '-' && i > 0 && bp[-1] != 0) { 301 if (*p != 0) { 302 c = bp[-1]; 303 c2 = *p++; 304 if (c2 == '\\') 305 c2 = quoted((char **) &p); 306 if (c > c2) { /* empty; ignore */ 307 bp--; 308 i--; 309 continue; 310 } 311 while (c < c2) { 312 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0)) 313 FATAL("out of space for character class [%.10s...] 2", p); 314 *bp++ = ++c; 315 i++; 316 } 317 continue; 318 } 319 } 320 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0)) 321 FATAL("out of space for character class [%.10s...] 3", p); 322 *bp++ = c; 323 i++; 324 } 325 *bp = 0; 326 dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) ); 327 xfree(op); 328 return (char *) tostring((char *) buf); 329 } 330 331 void overflo(const char *s) 332 { 333 FATAL("regular expression too big: %.30s...", s); 334 } 335 336 void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */ 337 { 338 int i; 339 int *p; 340 341 switch (type(v)) { 342 LEAF 343 f->re[info(v)].ltype = type(v); 344 f->re[info(v)].lval.np = right(v); 345 while (f->accept >= maxsetvec) { /* guessing here! */ 346 maxsetvec *= 4; 347 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); 348 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); 349 if (setvec == 0 || tmpset == 0) 350 overflo("out of space in cfoll()"); 351 } 352 for (i = 0; i <= f->accept; i++) 353 setvec[i] = 0; 354 setcnt = 0; 355 follow(v); /* computes setvec and setcnt */ 356 if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL) 357 overflo("out of space building follow set"); 358 f->re[info(v)].lfollow = p; 359 *p = setcnt; 360 for (i = f->accept; i >= 0; i--) 361 if (setvec[i] == 1) 362 *++p = i; 363 break; 364 UNARY 365 cfoll(f,left(v)); 366 break; 367 case CAT: 368 case OR: 369 cfoll(f,left(v)); 370 cfoll(f,right(v)); 371 break; 372 default: /* can't happen */ 373 FATAL("can't happen: unknown type %d in cfoll", type(v)); 374 } 375 } 376 377 int first(Node *p) /* collects initially active leaves of p into setvec */ 378 /* returns 1 if p matches empty string */ 379 { 380 int b, lp; 381 382 switch (type(p)) { 383 LEAF 384 lp = info(p); /* look for high-water mark of subscripts */ 385 while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */ 386 maxsetvec *= 4; 387 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); 388 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); 389 if (setvec == 0 || tmpset == 0) 390 overflo("out of space in first()"); 391 } 392 if (setvec[lp] != 1) { 393 setvec[lp] = 1; 394 setcnt++; 395 } 396 if (type(p) == CCL && (*(char *) right(p)) == '\0') 397 return(0); /* empty CCL */ 398 else return(1); 399 case PLUS: 400 if (first(left(p)) == 0) return(0); 401 return(1); 402 case STAR: 403 case QUEST: 404 first(left(p)); 405 return(0); 406 case CAT: 407 if (first(left(p)) == 0 && first(right(p)) == 0) return(0); 408 return(1); 409 case OR: 410 b = first(right(p)); 411 if (first(left(p)) == 0 || b == 0) return(0); 412 return(1); 413 } 414 FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */ 415 return(-1); 416 } 417 418 void follow(Node *v) /* collects leaves that can follow v into setvec */ 419 { 420 Node *p; 421 422 if (type(v) == FINAL) 423 return; 424 p = parent(v); 425 switch (type(p)) { 426 case STAR: 427 case PLUS: 428 first(v); 429 follow(p); 430 return; 431 432 case OR: 433 case QUEST: 434 follow(p); 435 return; 436 437 case CAT: 438 if (v == left(p)) { /* v is left child of p */ 439 if (first(right(p)) == 0) { 440 follow(p); 441 return; 442 } 443 } else /* v is right child */ 444 follow(p); 445 return; 446 } 447 } 448 449 int member(int c, const char *sarg) /* is c in s? */ 450 { 451 uschar *s = (uschar *) sarg; 452 453 while (*s) 454 if (c == *s++) 455 return(1); 456 return(0); 457 } 458 459 int match(fa *f, const char *p0) /* shortest match ? */ 460 { 461 int s, ns; 462 uschar *p = (uschar *) p0; 463 464 s = f->reset ? makeinit(f,0) : f->initstat; 465 if (f->out[s]) 466 return(1); 467 do { 468 assert(*p < NCHARS); 469 if ((ns = f->gototab[s][*p]) != 0) 470 s = ns; 471 else 472 s = cgoto(f, s, *p); 473 if (f->out[s]) 474 return(1); 475 } while (*p++ != 0); 476 return(0); 477 } 478 479 int pmatch(fa *f, const char *p0) /* longest match, for sub */ 480 { 481 int s, ns; 482 uschar *p = (uschar *) p0; 483 uschar *q; 484 int i, k; 485 486 /* s = f->reset ? makeinit(f,1) : f->initstat; */ 487 if (f->reset) { 488 f->initstat = s = makeinit(f,1); 489 } else { 490 s = f->initstat; 491 } 492 patbeg = (char *) p; 493 patlen = -1; 494 do { 495 q = p; 496 do { 497 if (f->out[s]) /* final state */ 498 patlen = q-p; 499 assert(*q < NCHARS); 500 if ((ns = f->gototab[s][*q]) != 0) 501 s = ns; 502 else 503 s = cgoto(f, s, *q); 504 if (s == 1) { /* no transition */ 505 if (patlen >= 0) { 506 patbeg = (char *) p; 507 return(1); 508 } 509 else 510 goto nextin; /* no match */ 511 } 512 } while (*q++ != 0); 513 if (f->out[s]) 514 patlen = q-p-1; /* don't count $ */ 515 if (patlen >= 0) { 516 patbeg = (char *) p; 517 return(1); 518 } 519 nextin: 520 s = 2; 521 if (f->reset) { 522 for (i = 2; i <= f->curstat; i++) 523 xfree(f->posns[i]); 524 k = *f->posns[0]; 525 if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) 526 overflo("out of space in pmatch"); 527 for (i = 0; i <= k; i++) 528 (f->posns[2])[i] = (f->posns[0])[i]; 529 f->initstat = f->curstat = 2; 530 f->out[2] = f->out[0]; 531 for (i = 0; i < NCHARS; i++) 532 f->gototab[2][i] = 0; 533 } 534 } while (*p++ != 0); 535 return (0); 536 } 537 538 int nematch(fa *f, const char *p0) /* non-empty match, for sub */ 539 { 540 int s, ns; 541 uschar *p = (uschar *) p0; 542 uschar *q; 543 int i, k; 544 545 /* s = f->reset ? makeinit(f,1) : f->initstat; */ 546 if (f->reset) { 547 f->initstat = s = makeinit(f,1); 548 } else { 549 s = f->initstat; 550 } 551 patlen = -1; 552 while (*p) { 553 q = p; 554 do { 555 if (f->out[s]) /* final state */ 556 patlen = q-p; 557 assert(*q < NCHARS); 558 if ((ns = f->gototab[s][*q]) != 0) 559 s = ns; 560 else 561 s = cgoto(f, s, *q); 562 if (s == 1) { /* no transition */ 563 if (patlen > 0) { 564 patbeg = (char *) p; 565 return(1); 566 } else 567 goto nnextin; /* no nonempty match */ 568 } 569 } while (*q++ != 0); 570 if (f->out[s]) 571 patlen = q-p-1; /* don't count $ */ 572 if (patlen > 0 ) { 573 patbeg = (char *) p; 574 return(1); 575 } 576 nnextin: 577 s = 2; 578 if (f->reset) { 579 for (i = 2; i <= f->curstat; i++) 580 xfree(f->posns[i]); 581 k = *f->posns[0]; 582 if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) 583 overflo("out of state space"); 584 for (i = 0; i <= k; i++) 585 (f->posns[2])[i] = (f->posns[0])[i]; 586 f->initstat = f->curstat = 2; 587 f->out[2] = f->out[0]; 588 for (i = 0; i < NCHARS; i++) 589 f->gototab[2][i] = 0; 590 } 591 p++; 592 } 593 return (0); 594 } 595 596 Node *reparse(const char *p) /* parses regular expression pointed to by p */ 597 { /* uses relex() to scan regular expression */ 598 Node *np; 599 600 dprintf( ("reparse <%s>\n", p) ); 601 lastre = prestr = (uschar *) p; /* prestr points to string to be parsed */ 602 rtok = relex(); 603 /* GNU compatibility: an empty regexp matches anything */ 604 if (rtok == '\0') 605 /* FATAL("empty regular expression"); previous */ 606 return(op2(ALL, NIL, NIL)); 607 np = regexp(); 608 if (rtok != '\0') 609 FATAL("syntax error in regular expression %s at %s", lastre, prestr); 610 return(np); 611 } 612 613 Node *regexp(void) /* top-level parse of reg expr */ 614 { 615 return (alt(concat(primary()))); 616 } 617 618 Node *primary(void) 619 { 620 Node *np; 621 622 switch (rtok) { 623 case CHAR: 624 np = op2(CHAR, NIL, itonp(rlxval)); 625 rtok = relex(); 626 return (unary(np)); 627 case ALL: 628 rtok = relex(); 629 return (unary(op2(ALL, NIL, NIL))); 630 case DOT: 631 rtok = relex(); 632 return (unary(op2(DOT, NIL, NIL))); 633 case CCL: 634 np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr)); 635 rtok = relex(); 636 return (unary(np)); 637 case NCCL: 638 np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr)); 639 rtok = relex(); 640 return (unary(np)); 641 case '^': 642 rtok = relex(); 643 return (unary(op2(CHAR, NIL, itonp(HAT)))); 644 case '$': 645 rtok = relex(); 646 return (unary(op2(CHAR, NIL, NIL))); 647 case '(': 648 rtok = relex(); 649 if (rtok == ')') { /* special pleading for () */ 650 rtok = relex(); 651 return unary(op2(CCL, NIL, (Node *) tostring(""))); 652 } 653 np = regexp(); 654 if (rtok == ')') { 655 rtok = relex(); 656 return (unary(np)); 657 } 658 else 659 FATAL("syntax error in regular expression %s at %s", lastre, prestr); 660 default: 661 FATAL("illegal primary in regular expression %s at %s", lastre, prestr); 662 } 663 return 0; /*NOTREACHED*/ 664 } 665 666 Node *concat(Node *np) 667 { 668 switch (rtok) { 669 case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(': 670 return (concat(op2(CAT, np, primary()))); 671 } 672 return (np); 673 } 674 675 Node *alt(Node *np) 676 { 677 if (rtok == OR) { 678 rtok = relex(); 679 return (alt(op2(OR, np, concat(primary())))); 680 } 681 return (np); 682 } 683 684 Node *unary(Node *np) 685 { 686 switch (rtok) { 687 case STAR: 688 rtok = relex(); 689 return (unary(op2(STAR, np, NIL))); 690 case PLUS: 691 rtok = relex(); 692 return (unary(op2(PLUS, np, NIL))); 693 case QUEST: 694 rtok = relex(); 695 return (unary(op2(QUEST, np, NIL))); 696 default: 697 return (np); 698 } 699 } 700 701 /* 702 * Character class definitions conformant to the POSIX locale as 703 * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source 704 * and operating character sets are both ASCII (ISO646) or supersets 705 * thereof. 706 * 707 * Note that to avoid overflowing the temporary buffer used in 708 * relex(), the expanded character class (prior to range expansion) 709 * must be less than twice the size of their full name. 710 */ 711 712 /* Because isblank doesn't show up in any of the header files on any 713 * system i use, it's defined here. if some other locale has a richer 714 * definition of "blank", define HAS_ISBLANK and provide your own 715 * version. 716 * the parentheses here are an attempt to find a path through the maze 717 * of macro definition and/or function and/or version provided. thanks 718 * to nelson beebe for the suggestion; let's see if it works everywhere. 719 */ 720 721 #ifndef HAS_ISBLANK 722 723 int (isblank)(int c) 724 { 725 return c==' ' || c=='\t'; 726 } 727 728 #endif 729 730 struct charclass { 731 const char *cc_name; 732 int cc_namelen; 733 int (*cc_func)(int); 734 } charclasses[] = { 735 { "alnum", 5, isalnum }, 736 { "alpha", 5, isalpha }, 737 { "blank", 5, isblank }, 738 { "cntrl", 5, iscntrl }, 739 { "digit", 5, isdigit }, 740 { "graph", 5, isgraph }, 741 { "lower", 5, islower }, 742 { "print", 5, isprint }, 743 { "punct", 5, ispunct }, 744 { "space", 5, isspace }, 745 { "upper", 5, isupper }, 746 { "xdigit", 6, isxdigit }, 747 { NULL, 0, NULL }, 748 }; 749 750 751 int relex(void) /* lexical analyzer for reparse */ 752 { 753 int c, n; 754 int cflag; 755 static uschar *buf = 0; 756 static int bufsz = 100; 757 uschar *bp; 758 struct charclass *cc; 759 int i; 760 761 switch (c = *prestr++) { 762 case '|': return OR; 763 case '*': return STAR; 764 case '+': return PLUS; 765 case '?': return QUEST; 766 case '.': return DOT; 767 case '\0': prestr--; return '\0'; 768 case '^': 769 case '$': 770 case '(': 771 case ')': 772 return c; 773 case '\\': 774 rlxval = quoted((char **) &prestr); 775 return CHAR; 776 default: 777 rlxval = c; 778 return CHAR; 779 case '[': 780 if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL) 781 FATAL("out of space in reg expr %.10s..", lastre); 782 bp = buf; 783 if (*prestr == '^') { 784 cflag = 1; 785 prestr++; 786 } 787 else 788 cflag = 0; 789 n = 2 * strlen((const char *) prestr)+1; 790 if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, 0)) 791 FATAL("out of space for reg expr %.10s...", lastre); 792 for (; ; ) { 793 if ((c = *prestr++) == '\\') { 794 *bp++ = '\\'; 795 if ((c = *prestr++) == '\0') 796 FATAL("nonterminated character class %.20s...", lastre); 797 *bp++ = c; 798 /* } else if (c == '\n') { */ 799 /* FATAL("newline in character class %.20s...", lastre); */ 800 } else if (c == '[' && *prestr == ':') { 801 /* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */ 802 for (cc = charclasses; cc->cc_name; cc++) 803 if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0) 804 break; 805 if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' && 806 prestr[2 + cc->cc_namelen] == ']') { 807 prestr += cc->cc_namelen + 3; 808 for (i = 0; i < NCHARS; i++) { 809 if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, 0)) 810 FATAL("out of space for reg expr %.10s...", lastre); 811 if (cc->cc_func(i)) { 812 *bp++ = i; 813 n++; 814 } 815 } 816 } else 817 *bp++ = c; 818 } else if (c == '\0') { 819 FATAL("nonterminated character class %.20s", lastre); 820 } else if (bp == buf) { /* 1st char is special */ 821 *bp++ = c; 822 } else if (c == ']') { 823 *bp++ = 0; 824 rlxstr = (uschar *) tostring((char *) buf); 825 if (cflag == 0) 826 return CCL; 827 else 828 return NCCL; 829 } else 830 *bp++ = c; 831 } 832 } 833 } 834 835 int cgoto(fa *f, int s, int c) 836 { 837 int i, j, k; 838 int *p, *q; 839 840 assert(c == HAT || c < NCHARS); 841 while (f->accept >= maxsetvec) { /* guessing here! */ 842 maxsetvec *= 4; 843 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); 844 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); 845 if (setvec == 0 || tmpset == 0) 846 overflo("out of space in cgoto()"); 847 } 848 for (i = 0; i <= f->accept; i++) 849 setvec[i] = 0; 850 setcnt = 0; 851 /* compute positions of gototab[s,c] into setvec */ 852 p = f->posns[s]; 853 for (i = 1; i <= *p; i++) { 854 if ((k = f->re[p[i]].ltype) != FINAL) { 855 if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np)) 856 || (k == DOT && c != 0 && c != HAT) 857 || (k == ALL && c != 0) 858 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up)) 859 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) { 860 q = f->re[p[i]].lfollow; 861 for (j = 1; j <= *q; j++) { 862 if (q[j] >= maxsetvec) { 863 maxsetvec *= 4; 864 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); 865 tmpset = (int *) realloc(setvec, maxsetvec * sizeof(int)); 866 if (setvec == 0 || tmpset == 0) 867 overflo("cgoto overflow"); 868 } 869 if (setvec[q[j]] == 0) { 870 setcnt++; 871 setvec[q[j]] = 1; 872 } 873 } 874 } 875 } 876 } 877 /* determine if setvec is a previous state */ 878 tmpset[0] = setcnt; 879 j = 1; 880 for (i = f->accept; i >= 0; i--) 881 if (setvec[i]) { 882 tmpset[j++] = i; 883 } 884 /* tmpset == previous state? */ 885 for (i = 1; i <= f->curstat; i++) { 886 p = f->posns[i]; 887 if ((k = tmpset[0]) != p[0]) 888 goto different; 889 for (j = 1; j <= k; j++) 890 if (tmpset[j] != p[j]) 891 goto different; 892 /* setvec is state i */ 893 f->gototab[s][c] = i; 894 return i; 895 different:; 896 } 897 898 /* add tmpset to current set of states */ 899 if (f->curstat >= NSTATES-1) { 900 f->curstat = 2; 901 f->reset = 1; 902 for (i = 2; i < NSTATES; i++) 903 xfree(f->posns[i]); 904 } else 905 ++(f->curstat); 906 for (i = 0; i < NCHARS; i++) 907 f->gototab[f->curstat][i] = 0; 908 xfree(f->posns[f->curstat]); 909 if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL) 910 overflo("out of space in cgoto"); 911 912 f->posns[f->curstat] = p; 913 f->gototab[s][c] = f->curstat; 914 for (i = 0; i <= setcnt; i++) 915 p[i] = tmpset[i]; 916 if (setvec[f->accept]) 917 f->out[f->curstat] = 1; 918 else 919 f->out[f->curstat] = 0; 920 return f->curstat; 921 } 922 923 924 void freefa(fa *f) /* free a finite automaton */ 925 { 926 int i; 927 928 if (f == NULL) 929 return; 930 for (i = 0; i <= f->curstat; i++) 931 xfree(f->posns[i]); 932 for (i = 0; i <= f->accept; i++) { 933 xfree(f->re[i].lfollow); 934 if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL) 935 xfree((f->re[i].lval.np)); 936 } 937 xfree(f->restr); 938 xfree(f); 939 } 940