1 /**************************************************************** 2 Copyright (C) Lucent Technologies 1997 3 All Rights Reserved 4 5 Permission to use, copy, modify, and distribute this software and 6 its documentation for any purpose and without fee is hereby 7 granted, provided that the above copyright notice appear in all 8 copies and that both that the copyright notice and this 9 permission notice and warranty disclaimer appear in supporting 10 documentation, and that the name Lucent Technologies or any of 11 its entities not be used in advertising or publicity pertaining 12 to distribution of the software without specific, written prior 13 permission. 14 15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 22 THIS SOFTWARE. 23 ****************************************************************/ 24 25 /* lasciate ogne speranza, voi ch'entrate. */ 26 27 #define DEBUG 28 29 #include <ctype.h> 30 #include <stdio.h> 31 #include <string.h> 32 #include <stdlib.h> 33 #include "awk.h" 34 #include "ytab.h" 35 36 #define HAT (NCHARS+2) /* matches ^ in regular expr */ 37 /* NCHARS is 2**n */ 38 #define MAXLIN 22 39 40 #define type(v) (v)->nobj /* badly overloaded here */ 41 #define info(v) (v)->ntype /* badly overloaded here */ 42 #define left(v) (v)->narg[0] 43 #define right(v) (v)->narg[1] 44 #define parent(v) (v)->nnext 45 46 #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL: 47 #define UNARY case STAR: case PLUS: case QUEST: 48 49 /* encoding in tree Nodes: 50 leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL): 51 left is index, right contains value or pointer to value 52 unary (STAR, PLUS, QUEST): left is child, right is null 53 binary (CAT, OR): left and right are children 54 parent contains pointer to parent 55 */ 56 57 58 int *setvec; 59 int *tmpset; 60 int maxsetvec = 0; 61 62 int rtok; /* next token in current re */ 63 int rlxval; 64 static uschar *rlxstr; 65 static uschar *prestr; /* current position in current re */ 66 static uschar *lastre; /* origin of last re */ 67 68 static int setcnt; 69 static int poscnt; 70 71 char *patbeg; 72 int patlen; 73 74 #define NFA 20 /* cache this many dynamic fa's */ 75 fa *fatab[NFA]; 76 int nfatab = 0; /* entries in fatab */ 77 78 fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */ 79 { 80 int i, use, nuse; 81 fa *pfa; 82 static int now = 1; 83 84 if (setvec == 0) { /* first time through any RE */ 85 maxsetvec = MAXLIN; 86 setvec = (int *) malloc(maxsetvec * sizeof(int)); 87 tmpset = (int *) malloc(maxsetvec * sizeof(int)); 88 if (setvec == 0 || tmpset == 0) 89 overflo("out of space initializing makedfa"); 90 } 91 92 if (compile_time) /* a constant for sure */ 93 return mkdfa(s, anchor); 94 for (i = 0; i < nfatab; i++) /* is it there already? */ 95 if (fatab[i]->anchor == anchor 96 && strcmp((const char *) fatab[i]->restr, s) == 0) { 97 fatab[i]->use = now++; 98 return fatab[i]; 99 } 100 pfa = mkdfa(s, anchor); 101 if (nfatab < NFA) { /* room for another */ 102 fatab[nfatab] = pfa; 103 fatab[nfatab]->use = now++; 104 nfatab++; 105 return pfa; 106 } 107 use = fatab[0]->use; /* replace least-recently used */ 108 nuse = 0; 109 for (i = 1; i < nfatab; i++) 110 if (fatab[i]->use < use) { 111 use = fatab[i]->use; 112 nuse = i; 113 } 114 freefa(fatab[nuse]); 115 fatab[nuse] = pfa; 116 pfa->use = now++; 117 return pfa; 118 } 119 120 fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */ 121 /* anchor = 1 for anchored matches, else 0 */ 122 { 123 Node *p, *p1; 124 fa *f; 125 126 p = reparse(s); 127 p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p); 128 /* put ALL STAR in front of reg. exp. */ 129 p1 = op2(CAT, p1, op2(FINAL, NIL, NIL)); 130 /* put FINAL after reg. exp. */ 131 132 poscnt = 0; 133 penter(p1); /* enter parent pointers and leaf indices */ 134 if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL) 135 overflo("out of space for fa"); 136 f->accept = poscnt-1; /* penter has computed number of positions in re */ 137 cfoll(f, p1); /* set up follow sets */ 138 freetr(p1); 139 if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL) 140 overflo("out of space in makedfa"); 141 if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL) 142 overflo("out of space in makedfa"); 143 *f->posns[1] = 0; 144 f->initstat = makeinit(f, anchor); 145 f->anchor = anchor; 146 f->restr = (uschar *) tostring(s); 147 return f; 148 } 149 150 int makeinit(fa *f, int anchor) 151 { 152 int i, k; 153 154 f->curstat = 2; 155 f->out[2] = 0; 156 f->reset = 0; 157 k = *(f->re[0].lfollow); 158 xfree(f->posns[2]); 159 if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) 160 overflo("out of space in makeinit"); 161 for (i=0; i <= k; i++) { 162 (f->posns[2])[i] = (f->re[0].lfollow)[i]; 163 } 164 if ((f->posns[2])[1] == f->accept) 165 f->out[2] = 1; 166 for (i=0; i < NCHARS; i++) 167 f->gototab[2][i] = 0; 168 f->curstat = cgoto(f, 2, HAT); 169 if (anchor) { 170 *f->posns[2] = k-1; /* leave out position 0 */ 171 for (i=0; i < k; i++) { 172 (f->posns[0])[i] = (f->posns[2])[i]; 173 } 174 175 f->out[0] = f->out[2]; 176 if (f->curstat != 2) 177 --(*f->posns[f->curstat]); 178 } 179 return f->curstat; 180 } 181 182 void penter(Node *p) /* set up parent pointers and leaf indices */ 183 { 184 switch (type(p)) { 185 LEAF 186 info(p) = poscnt; 187 poscnt++; 188 break; 189 UNARY 190 penter(left(p)); 191 parent(left(p)) = p; 192 break; 193 case CAT: 194 case OR: 195 penter(left(p)); 196 penter(right(p)); 197 parent(left(p)) = p; 198 parent(right(p)) = p; 199 break; 200 default: /* can't happen */ 201 FATAL("can't happen: unknown type %d in penter", type(p)); 202 break; 203 } 204 } 205 206 void freetr(Node *p) /* free parse tree */ 207 { 208 switch (type(p)) { 209 LEAF 210 xfree(p); 211 break; 212 UNARY 213 freetr(left(p)); 214 xfree(p); 215 break; 216 case CAT: 217 case OR: 218 freetr(left(p)); 219 freetr(right(p)); 220 xfree(p); 221 break; 222 default: /* can't happen */ 223 FATAL("can't happen: unknown type %d in freetr", type(p)); 224 break; 225 } 226 } 227 228 /* in the parsing of regular expressions, metacharacters like . have */ 229 /* to be seen literally; \056 is not a metacharacter. */ 230 231 int hexstr(char **pp) /* find and eval hex string at pp, return new p */ 232 { /* only pick up one 8-bit byte (2 chars) */ 233 uschar *p; 234 int n = 0; 235 int i; 236 237 for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) { 238 if (isdigit(*p)) 239 n = 16 * n + *p - '0'; 240 else if (*p >= 'a' && *p <= 'f') 241 n = 16 * n + *p - 'a' + 10; 242 else if (*p >= 'A' && *p <= 'F') 243 n = 16 * n + *p - 'A' + 10; 244 } 245 *pp = (char *) p; 246 return n; 247 } 248 249 #define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */ 250 251 int quoted(char **pp) /* pick up next thing after a \\ */ 252 /* and increment *pp */ 253 { 254 char *p = *pp; 255 int c; 256 257 if ((c = *p++) == 't') 258 c = '\t'; 259 else if (c == 'n') 260 c = '\n'; 261 else if (c == 'f') 262 c = '\f'; 263 else if (c == 'r') 264 c = '\r'; 265 else if (c == 'b') 266 c = '\b'; 267 else if (c == '\\') 268 c = '\\'; 269 else if (c == 'x') { /* hexadecimal goo follows */ 270 c = hexstr(&p); /* this adds a null if number is invalid */ 271 } else if (isoctdigit(c)) { /* \d \dd \ddd */ 272 int n = c - '0'; 273 if (isoctdigit(*p)) { 274 n = 8 * n + *p++ - '0'; 275 if (isoctdigit(*p)) 276 n = 8 * n + *p++ - '0'; 277 } 278 c = n; 279 } /* else */ 280 /* c = c; */ 281 *pp = p; 282 return c; 283 } 284 285 char *cclenter(const char *argp) /* add a character class */ 286 { 287 int i, c, c2; 288 uschar *p = (uschar *) argp; 289 uschar *op, *bp; 290 static uschar *buf = 0; 291 static int bufsz = 100; 292 293 op = p; 294 if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL) 295 FATAL("out of space for character class [%.10s...] 1", p); 296 bp = buf; 297 for (i = 0; (c = *p++) != 0; ) { 298 if (c == '\\') { 299 c = quoted((char **) &p); 300 } else if (c == '-' && i > 0 && bp[-1] != 0) { 301 if (*p != 0) { 302 c = bp[-1]; 303 c2 = *p++; 304 if (c2 == '\\') 305 c2 = quoted((char **) &p); 306 if (c > c2) { /* empty; ignore */ 307 bp--; 308 i--; 309 continue; 310 } 311 while (c < c2) { 312 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0)) 313 FATAL("out of space for character class [%.10s...] 2", p); 314 *bp++ = ++c; 315 i++; 316 } 317 continue; 318 } 319 } 320 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0)) 321 FATAL("out of space for character class [%.10s...] 3", p); 322 *bp++ = c; 323 i++; 324 } 325 *bp = 0; 326 dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) ); 327 xfree(op); 328 return (char *) tostring((char *) buf); 329 } 330 331 void overflo(const char *s) 332 { 333 FATAL("regular expression too big: %.30s...", s); 334 } 335 336 void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */ 337 { 338 int i; 339 int *p; 340 341 switch (type(v)) { 342 LEAF 343 f->re[info(v)].ltype = type(v); 344 f->re[info(v)].lval.np = right(v); 345 while (f->accept >= maxsetvec) { /* guessing here! */ 346 maxsetvec *= 4; 347 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); 348 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); 349 if (setvec == 0 || tmpset == 0) 350 overflo("out of space in cfoll()"); 351 } 352 for (i = 0; i <= f->accept; i++) 353 setvec[i] = 0; 354 setcnt = 0; 355 follow(v); /* computes setvec and setcnt */ 356 if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL) 357 overflo("out of space building follow set"); 358 f->re[info(v)].lfollow = p; 359 *p = setcnt; 360 for (i = f->accept; i >= 0; i--) 361 if (setvec[i] == 1) 362 *++p = i; 363 break; 364 UNARY 365 cfoll(f,left(v)); 366 break; 367 case CAT: 368 case OR: 369 cfoll(f,left(v)); 370 cfoll(f,right(v)); 371 break; 372 default: /* can't happen */ 373 FATAL("can't happen: unknown type %d in cfoll", type(v)); 374 } 375 } 376 377 int first(Node *p) /* collects initially active leaves of p into setvec */ 378 /* returns 1 if p matches empty string */ 379 { 380 int b, lp; 381 382 switch (type(p)) { 383 LEAF 384 lp = info(p); /* look for high-water mark of subscripts */ 385 while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */ 386 maxsetvec *= 4; 387 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); 388 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); 389 if (setvec == 0 || tmpset == 0) 390 overflo("out of space in first()"); 391 } 392 if (setvec[lp] != 1) { 393 setvec[lp] = 1; 394 setcnt++; 395 } 396 if (type(p) == CCL && (*(char *) right(p)) == '\0') 397 return(0); /* empty CCL */ 398 else return(1); 399 case PLUS: 400 if (first(left(p)) == 0) return(0); 401 return(1); 402 case STAR: 403 case QUEST: 404 first(left(p)); 405 return(0); 406 case CAT: 407 if (first(left(p)) == 0 && first(right(p)) == 0) return(0); 408 return(1); 409 case OR: 410 b = first(right(p)); 411 if (first(left(p)) == 0 || b == 0) return(0); 412 return(1); 413 } 414 FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */ 415 return(-1); 416 } 417 418 void follow(Node *v) /* collects leaves that can follow v into setvec */ 419 { 420 Node *p; 421 422 if (type(v) == FINAL) 423 return; 424 p = parent(v); 425 switch (type(p)) { 426 case STAR: 427 case PLUS: 428 first(v); 429 follow(p); 430 return; 431 432 case OR: 433 case QUEST: 434 follow(p); 435 return; 436 437 case CAT: 438 if (v == left(p)) { /* v is left child of p */ 439 if (first(right(p)) == 0) { 440 follow(p); 441 return; 442 } 443 } else /* v is right child */ 444 follow(p); 445 return; 446 } 447 } 448 449 int member(int c, const char *sarg) /* is c in s? */ 450 { 451 uschar *s = (uschar *) sarg; 452 453 while (*s) 454 if (c == *s++) 455 return(1); 456 return(0); 457 } 458 459 int match(fa *f, const char *p0) /* shortest match ? */ 460 { 461 int s, ns; 462 uschar *p = (uschar *) p0; 463 464 s = f->reset ? makeinit(f,0) : f->initstat; 465 if (f->out[s]) 466 return(1); 467 do { 468 if ((ns = f->gototab[s][*p]) != 0) 469 s = ns; 470 else 471 s = cgoto(f, s, *p); 472 if (f->out[s]) 473 return(1); 474 } while (*p++ != 0); 475 return(0); 476 } 477 478 int pmatch(fa *f, const char *p0) /* longest match, for sub */ 479 { 480 int s, ns; 481 uschar *p = (uschar *) p0; 482 uschar *q; 483 int i, k; 484 485 s = f->reset ? makeinit(f,1) : f->initstat; 486 patbeg = (char *) p; 487 patlen = -1; 488 do { 489 q = p; 490 do { 491 if (f->out[s]) /* final state */ 492 patlen = q-p; 493 if ((ns = f->gototab[s][*q]) != 0) 494 s = ns; 495 else 496 s = cgoto(f, s, *q); 497 if (s == 1) { /* no transition */ 498 if (patlen >= 0) { 499 patbeg = (char *) p; 500 return(1); 501 } 502 else 503 goto nextin; /* no match */ 504 } 505 } while (*q++ != 0); 506 if (f->out[s]) 507 patlen = q-p-1; /* don't count $ */ 508 if (patlen >= 0) { 509 patbeg = (char *) p; 510 return(1); 511 } 512 nextin: 513 s = 2; 514 if (f->reset) { 515 for (i = 2; i <= f->curstat; i++) 516 xfree(f->posns[i]); 517 k = *f->posns[0]; 518 if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) 519 overflo("out of space in pmatch"); 520 for (i = 0; i <= k; i++) 521 (f->posns[2])[i] = (f->posns[0])[i]; 522 f->initstat = f->curstat = 2; 523 f->out[2] = f->out[0]; 524 for (i = 0; i < NCHARS; i++) 525 f->gototab[2][i] = 0; 526 } 527 } while (*p++ != 0); 528 return (0); 529 } 530 531 int nematch(fa *f, const char *p0) /* non-empty match, for sub */ 532 { 533 int s, ns; 534 uschar *p = (uschar *) p0; 535 uschar *q; 536 int i, k; 537 538 s = f->reset ? makeinit(f,1) : f->initstat; 539 patlen = -1; 540 while (*p) { 541 q = p; 542 do { 543 if (f->out[s]) /* final state */ 544 patlen = q-p; 545 if ((ns = f->gototab[s][*q]) != 0) 546 s = ns; 547 else 548 s = cgoto(f, s, *q); 549 if (s == 1) { /* no transition */ 550 if (patlen > 0) { 551 patbeg = (char *) p; 552 return(1); 553 } else 554 goto nnextin; /* no nonempty match */ 555 } 556 } while (*q++ != 0); 557 if (f->out[s]) 558 patlen = q-p-1; /* don't count $ */ 559 if (patlen > 0 ) { 560 patbeg = (char *) p; 561 return(1); 562 } 563 nnextin: 564 s = 2; 565 if (f->reset) { 566 for (i = 2; i <= f->curstat; i++) 567 xfree(f->posns[i]); 568 k = *f->posns[0]; 569 if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) 570 overflo("out of state space"); 571 for (i = 0; i <= k; i++) 572 (f->posns[2])[i] = (f->posns[0])[i]; 573 f->initstat = f->curstat = 2; 574 f->out[2] = f->out[0]; 575 for (i = 0; i < NCHARS; i++) 576 f->gototab[2][i] = 0; 577 } 578 p++; 579 } 580 return (0); 581 } 582 583 Node *reparse(const char *p) /* parses regular expression pointed to by p */ 584 { /* uses relex() to scan regular expression */ 585 Node *np; 586 587 dprintf( ("reparse <%s>\n", p) ); 588 lastre = prestr = (uschar *) p; /* prestr points to string to be parsed */ 589 rtok = relex(); 590 /* GNU compatibility: an empty regexp matches anything */ 591 if (rtok == '\0') 592 /* FATAL("empty regular expression"); previous */ 593 return(op2(ALL, NIL, NIL)); 594 np = regexp(); 595 if (rtok != '\0') 596 FATAL("syntax error in regular expression %s at %s", lastre, prestr); 597 return(np); 598 } 599 600 Node *regexp(void) /* top-level parse of reg expr */ 601 { 602 return (alt(concat(primary()))); 603 } 604 605 Node *primary(void) 606 { 607 Node *np; 608 609 switch (rtok) { 610 case CHAR: 611 np = op2(CHAR, NIL, itonp(rlxval)); 612 rtok = relex(); 613 return (unary(np)); 614 case ALL: 615 rtok = relex(); 616 return (unary(op2(ALL, NIL, NIL))); 617 case DOT: 618 rtok = relex(); 619 return (unary(op2(DOT, NIL, NIL))); 620 case CCL: 621 np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr)); 622 rtok = relex(); 623 return (unary(np)); 624 case NCCL: 625 np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr)); 626 rtok = relex(); 627 return (unary(np)); 628 case '^': 629 rtok = relex(); 630 return (unary(op2(CHAR, NIL, itonp(HAT)))); 631 case '$': 632 rtok = relex(); 633 return (unary(op2(CHAR, NIL, NIL))); 634 case '(': 635 rtok = relex(); 636 if (rtok == ')') { /* special pleading for () */ 637 rtok = relex(); 638 return unary(op2(CCL, NIL, (Node *) tostring(""))); 639 } 640 np = regexp(); 641 if (rtok == ')') { 642 rtok = relex(); 643 return (unary(np)); 644 } 645 else 646 FATAL("syntax error in regular expression %s at %s", lastre, prestr); 647 default: 648 FATAL("illegal primary in regular expression %s at %s", lastre, prestr); 649 } 650 return 0; /*NOTREACHED*/ 651 } 652 653 Node *concat(Node *np) 654 { 655 switch (rtok) { 656 case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(': 657 return (concat(op2(CAT, np, primary()))); 658 } 659 return (np); 660 } 661 662 Node *alt(Node *np) 663 { 664 if (rtok == OR) { 665 rtok = relex(); 666 return (alt(op2(OR, np, concat(primary())))); 667 } 668 return (np); 669 } 670 671 Node *unary(Node *np) 672 { 673 switch (rtok) { 674 case STAR: 675 rtok = relex(); 676 return (unary(op2(STAR, np, NIL))); 677 case PLUS: 678 rtok = relex(); 679 return (unary(op2(PLUS, np, NIL))); 680 case QUEST: 681 rtok = relex(); 682 return (unary(op2(QUEST, np, NIL))); 683 default: 684 return (np); 685 } 686 } 687 688 /* 689 * Character class definitions conformant to the POSIX locale as 690 * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source 691 * and operating character sets are both ASCII (ISO646) or supersets 692 * thereof. 693 * 694 * Note that to avoid overflowing the temporary buffer used in 695 * relex(), the expanded character class (prior to range expansion) 696 * must be less than twice the size of their full name. 697 */ 698 699 /* Because isblank doesn't show up in any of the header files on any 700 * system i use, it's defined here. if some other locale has a richer 701 * definition of "blank", define HAS_ISBLANK and provide your own 702 * version. 703 * the parentheses here are an attempt to find a path through the maze 704 * of macro definition and/or function and/or version provided. thanks 705 * to nelson beebe for the suggestion; let's see if it works everywhere. 706 */ 707 708 #ifndef HAS_ISBLANK 709 710 int (isblank)(int c) 711 { 712 return c==' ' || c=='\t'; 713 } 714 715 #endif 716 717 struct charclass { 718 const char *cc_name; 719 int cc_namelen; 720 int (*cc_func)(int); 721 } charclasses[] = { 722 { "alnum", 5, isalnum }, 723 { "alpha", 5, isalpha }, 724 { "blank", 5, isblank }, 725 { "cntrl", 5, iscntrl }, 726 { "digit", 5, isdigit }, 727 { "graph", 5, isgraph }, 728 { "lower", 5, islower }, 729 { "print", 5, isprint }, 730 { "punct", 5, ispunct }, 731 { "space", 5, isspace }, 732 { "upper", 5, isupper }, 733 { "xdigit", 6, isxdigit }, 734 { NULL, 0, NULL }, 735 }; 736 737 738 int relex(void) /* lexical analyzer for reparse */ 739 { 740 int c, n; 741 int cflag; 742 static uschar *buf = 0; 743 static int bufsz = 100; 744 uschar *bp; 745 struct charclass *cc; 746 int i; 747 748 switch (c = *prestr++) { 749 case '|': return OR; 750 case '*': return STAR; 751 case '+': return PLUS; 752 case '?': return QUEST; 753 case '.': return DOT; 754 case '\0': prestr--; return '\0'; 755 case '^': 756 case '$': 757 case '(': 758 case ')': 759 return c; 760 case '\\': 761 rlxval = quoted((char **) &prestr); 762 return CHAR; 763 default: 764 rlxval = c; 765 return CHAR; 766 case '[': 767 if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL) 768 FATAL("out of space in reg expr %.10s..", lastre); 769 bp = buf; 770 if (*prestr == '^') { 771 cflag = 1; 772 prestr++; 773 } 774 else 775 cflag = 0; 776 n = 2 * strlen((const char *) prestr)+1; 777 if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, 0)) 778 FATAL("out of space for reg expr %.10s...", lastre); 779 for (; ; ) { 780 if ((c = *prestr++) == '\\') { 781 *bp++ = '\\'; 782 if ((c = *prestr++) == '\0') 783 FATAL("nonterminated character class %.20s...", lastre); 784 *bp++ = c; 785 /* } else if (c == '\n') { */ 786 /* FATAL("newline in character class %.20s...", lastre); */ 787 } else if (c == '[' && *prestr == ':') { 788 /* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */ 789 for (cc = charclasses; cc->cc_name; cc++) 790 if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0) 791 break; 792 if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' && 793 prestr[2 + cc->cc_namelen] == ']') { 794 prestr += cc->cc_namelen + 3; 795 for (i = 0; i < NCHARS; i++) { 796 if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, 0)) 797 FATAL("out of space for reg expr %.10s...", lastre); 798 if (cc->cc_func(i)) { 799 *bp++ = i; 800 n++; 801 } 802 } 803 } else 804 *bp++ = c; 805 } else if (c == '\0') { 806 FATAL("nonterminated character class %.20s", lastre); 807 } else if (bp == buf) { /* 1st char is special */ 808 *bp++ = c; 809 } else if (c == ']') { 810 *bp++ = 0; 811 rlxstr = (uschar *) tostring((char *) buf); 812 if (cflag == 0) 813 return CCL; 814 else 815 return NCCL; 816 } else 817 *bp++ = c; 818 } 819 } 820 } 821 822 int cgoto(fa *f, int s, int c) 823 { 824 int i, j, k; 825 int *p, *q; 826 827 while (f->accept >= maxsetvec) { /* guessing here! */ 828 maxsetvec *= 4; 829 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); 830 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); 831 if (setvec == 0 || tmpset == 0) 832 overflo("out of space in cgoto()"); 833 } 834 for (i = 0; i <= f->accept; i++) 835 setvec[i] = 0; 836 setcnt = 0; 837 /* compute positions of gototab[s,c] into setvec */ 838 p = f->posns[s]; 839 for (i = 1; i <= *p; i++) { 840 if ((k = f->re[p[i]].ltype) != FINAL) { 841 if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np)) 842 || (k == DOT && c != 0 && c != HAT) 843 || (k == ALL && c != 0) 844 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up)) 845 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) { 846 q = f->re[p[i]].lfollow; 847 for (j = 1; j <= *q; j++) { 848 if (q[j] >= maxsetvec) { 849 maxsetvec *= 4; 850 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); 851 tmpset = (int *) realloc(setvec, maxsetvec * sizeof(int)); 852 if (setvec == 0 || tmpset == 0) 853 overflo("cgoto overflow"); 854 } 855 if (setvec[q[j]] == 0) { 856 setcnt++; 857 setvec[q[j]] = 1; 858 } 859 } 860 } 861 } 862 } 863 /* determine if setvec is a previous state */ 864 tmpset[0] = setcnt; 865 j = 1; 866 for (i = f->accept; i >= 0; i--) 867 if (setvec[i]) { 868 tmpset[j++] = i; 869 } 870 /* tmpset == previous state? */ 871 for (i = 1; i <= f->curstat; i++) { 872 p = f->posns[i]; 873 if ((k = tmpset[0]) != p[0]) 874 goto different; 875 for (j = 1; j <= k; j++) 876 if (tmpset[j] != p[j]) 877 goto different; 878 /* setvec is state i */ 879 f->gototab[s][c] = i; 880 return i; 881 different:; 882 } 883 884 /* add tmpset to current set of states */ 885 if (f->curstat >= NSTATES-1) { 886 f->curstat = 2; 887 f->reset = 1; 888 for (i = 2; i < NSTATES; i++) 889 xfree(f->posns[i]); 890 } else 891 ++(f->curstat); 892 for (i = 0; i < NCHARS; i++) 893 f->gototab[f->curstat][i] = 0; 894 xfree(f->posns[f->curstat]); 895 if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL) 896 overflo("out of space in cgoto"); 897 898 f->posns[f->curstat] = p; 899 f->gototab[s][c] = f->curstat; 900 for (i = 0; i <= setcnt; i++) 901 p[i] = tmpset[i]; 902 if (setvec[f->accept]) 903 f->out[f->curstat] = 1; 904 else 905 f->out[f->curstat] = 0; 906 return f->curstat; 907 } 908 909 910 void freefa(fa *f) /* free a finite automaton */ 911 { 912 int i; 913 914 if (f == NULL) 915 return; 916 for (i = 0; i <= f->curstat; i++) 917 xfree(f->posns[i]); 918 for (i = 0; i <= f->accept; i++) { 919 xfree(f->re[i].lfollow); 920 if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL) 921 xfree((f->re[i].lval.np)); 922 } 923 xfree(f->restr); 924 xfree(f); 925 } 926