1 /**************************************************************** 2 Copyright (C) Lucent Technologies 1997 3 All Rights Reserved 4 5 Permission to use, copy, modify, and distribute this software and 6 its documentation for any purpose and without fee is hereby 7 granted, provided that the above copyright notice appear in all 8 copies and that both that the copyright notice and this 9 permission notice and warranty disclaimer appear in supporting 10 documentation, and that the name Lucent Technologies or any of 11 its entities not be used in advertising or publicity pertaining 12 to distribution of the software without specific, written prior 13 permission. 14 15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 22 THIS SOFTWARE. 23 ****************************************************************/ 24 25 /* lasciate ogne speranza, voi ch'intrate. */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #define DEBUG 31 32 #include <ctype.h> 33 #include <stdio.h> 34 #include <string.h> 35 #include <stdlib.h> 36 #include "awk.h" 37 #include "ytab.h" 38 39 #define HAT (NCHARS+2) /* matches ^ in regular expr */ 40 /* NCHARS is 2**n */ 41 #define MAXLIN 22 42 43 #define type(v) (v)->nobj /* badly overloaded here */ 44 #define info(v) (v)->ntype /* badly overloaded here */ 45 #define left(v) (v)->narg[0] 46 #define right(v) (v)->narg[1] 47 #define parent(v) (v)->nnext 48 49 #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL: 50 #define ELEAF case EMPTYRE: /* empty string in regexp */ 51 #define UNARY case STAR: case PLUS: case QUEST: 52 53 /* encoding in tree Nodes: 54 leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE): 55 left is index, right contains value or pointer to value 56 unary (STAR, PLUS, QUEST): left is child, right is null 57 binary (CAT, OR): left and right are children 58 parent contains pointer to parent 59 */ 60 61 62 int *setvec; 63 int *tmpset; 64 int maxsetvec = 0; 65 66 int rtok; /* next token in current re */ 67 int rlxval; 68 static uschar *rlxstr; 69 static uschar *prestr; /* current position in current re */ 70 static uschar *lastre; /* origin of last re */ 71 72 static int setcnt; 73 static int poscnt; 74 75 char *patbeg; 76 int patlen; 77 78 #define NFA 20 /* cache this many dynamic fa's */ 79 fa *fatab[NFA]; 80 int nfatab = 0; /* entries in fatab */ 81 82 fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */ 83 { 84 int i, use, nuse; 85 fa *pfa; 86 static int now = 1; 87 88 if (setvec == NULL) { /* first time through any RE */ 89 maxsetvec = MAXLIN; 90 setvec = (int *) malloc(maxsetvec * sizeof(int)); 91 tmpset = (int *) malloc(maxsetvec * sizeof(int)); 92 if (setvec == NULL || tmpset == NULL) 93 overflo("out of space initializing makedfa"); 94 } 95 96 if (compile_time) /* a constant for sure */ 97 return mkdfa(s, anchor); 98 for (i = 0; i < nfatab; i++) /* is it there already? */ 99 if (fatab[i]->anchor == anchor 100 && strcmp((const char *) fatab[i]->restr, s) == 0) { 101 fatab[i]->use = now++; 102 return fatab[i]; 103 } 104 pfa = mkdfa(s, anchor); 105 if (nfatab < NFA) { /* room for another */ 106 fatab[nfatab] = pfa; 107 fatab[nfatab]->use = now++; 108 nfatab++; 109 return pfa; 110 } 111 use = fatab[0]->use; /* replace least-recently used */ 112 nuse = 0; 113 for (i = 1; i < nfatab; i++) 114 if (fatab[i]->use < use) { 115 use = fatab[i]->use; 116 nuse = i; 117 } 118 freefa(fatab[nuse]); 119 fatab[nuse] = pfa; 120 pfa->use = now++; 121 return pfa; 122 } 123 124 fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */ 125 /* anchor = 1 for anchored matches, else 0 */ 126 { 127 Node *p, *p1; 128 fa *f; 129 130 p = reparse(s); 131 p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p); 132 /* put ALL STAR in front of reg. exp. */ 133 p1 = op2(CAT, p1, op2(FINAL, NIL, NIL)); 134 /* put FINAL after reg. exp. */ 135 136 poscnt = 0; 137 penter(p1); /* enter parent pointers and leaf indices */ 138 if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL) 139 overflo("out of space for fa"); 140 f->accept = poscnt-1; /* penter has computed number of positions in re */ 141 cfoll(f, p1); /* set up follow sets */ 142 freetr(p1); 143 if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL) 144 overflo("out of space in makedfa"); 145 if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL) 146 overflo("out of space in makedfa"); 147 *f->posns[1] = 0; 148 f->initstat = makeinit(f, anchor); 149 f->anchor = anchor; 150 f->restr = (uschar *) tostring(s); 151 return f; 152 } 153 154 int makeinit(fa *f, int anchor) 155 { 156 int i, k; 157 158 f->curstat = 2; 159 f->out[2] = 0; 160 f->reset = 0; 161 k = *(f->re[0].lfollow); 162 xfree(f->posns[2]); 163 if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) 164 overflo("out of space in makeinit"); 165 for (i=0; i <= k; i++) { 166 (f->posns[2])[i] = (f->re[0].lfollow)[i]; 167 } 168 if ((f->posns[2])[1] == f->accept) 169 f->out[2] = 1; 170 for (i=0; i < NCHARS; i++) 171 f->gototab[2][i] = 0; 172 f->curstat = cgoto(f, 2, HAT); 173 if (anchor) { 174 *f->posns[2] = k-1; /* leave out position 0 */ 175 for (i=0; i < k; i++) { 176 (f->posns[0])[i] = (f->posns[2])[i]; 177 } 178 179 f->out[0] = f->out[2]; 180 if (f->curstat != 2) 181 --(*f->posns[f->curstat]); 182 } 183 return f->curstat; 184 } 185 186 void penter(Node *p) /* set up parent pointers and leaf indices */ 187 { 188 switch (type(p)) { 189 ELEAF 190 LEAF 191 info(p) = poscnt; 192 poscnt++; 193 break; 194 UNARY 195 penter(left(p)); 196 parent(left(p)) = p; 197 break; 198 case CAT: 199 case OR: 200 penter(left(p)); 201 penter(right(p)); 202 parent(left(p)) = p; 203 parent(right(p)) = p; 204 break; 205 default: /* can't happen */ 206 FATAL("can't happen: unknown type %d in penter", type(p)); 207 break; 208 } 209 } 210 211 void freetr(Node *p) /* free parse tree */ 212 { 213 switch (type(p)) { 214 ELEAF 215 LEAF 216 xfree(p); 217 break; 218 UNARY 219 freetr(left(p)); 220 xfree(p); 221 break; 222 case CAT: 223 case OR: 224 freetr(left(p)); 225 freetr(right(p)); 226 xfree(p); 227 break; 228 default: /* can't happen */ 229 FATAL("can't happen: unknown type %d in freetr", type(p)); 230 break; 231 } 232 } 233 234 /* in the parsing of regular expressions, metacharacters like . have */ 235 /* to be seen literally; \056 is not a metacharacter. */ 236 237 int hexstr(uschar **pp) /* find and eval hex string at pp, return new p */ 238 { /* only pick up one 8-bit byte (2 chars) */ 239 uschar *p; 240 int n = 0; 241 int i; 242 243 for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) { 244 if (isdigit(*p)) 245 n = 16 * n + *p - '0'; 246 else if (*p >= 'a' && *p <= 'f') 247 n = 16 * n + *p - 'a' + 10; 248 else if (*p >= 'A' && *p <= 'F') 249 n = 16 * n + *p - 'A' + 10; 250 } 251 *pp = (uschar *) p; 252 return n; 253 } 254 255 #define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */ 256 257 int quoted(uschar **pp) /* pick up next thing after a \\ */ 258 /* and increment *pp */ 259 { 260 uschar *p = *pp; 261 int c; 262 263 if ((c = *p++) == 't') 264 c = '\t'; 265 else if (c == 'n') 266 c = '\n'; 267 else if (c == 'f') 268 c = '\f'; 269 else if (c == 'r') 270 c = '\r'; 271 else if (c == 'b') 272 c = '\b'; 273 else if (c == '\\') 274 c = '\\'; 275 else if (c == 'x') { /* hexadecimal goo follows */ 276 c = hexstr(&p); /* this adds a null if number is invalid */ 277 } else if (isoctdigit(c)) { /* \d \dd \ddd */ 278 int n = c - '0'; 279 if (isoctdigit(*p)) { 280 n = 8 * n + *p++ - '0'; 281 if (isoctdigit(*p)) 282 n = 8 * n + *p++ - '0'; 283 } 284 c = n; 285 } /* else */ 286 /* c = c; */ 287 *pp = p; 288 return c; 289 } 290 291 static int collate_range_cmp(int a, int b) 292 { 293 static char s[2][2]; 294 295 if ((uschar)a == (uschar)b) 296 return 0; 297 s[0][0] = a; 298 s[1][0] = b; 299 #ifdef __FreeBSD__ 300 return (strcmp(s[0], s[1])); 301 #else 302 return (strcoll(s[0], s[1])); 303 #endif 304 } 305 306 char *cclenter(const char *argp) /* add a character class */ 307 { 308 int i, c, c2; 309 int j; 310 uschar *p = (uschar *) argp; 311 uschar *op, *bp; 312 static uschar *buf = NULL; 313 static int bufsz = 100; 314 315 op = p; 316 if (buf == NULL && (buf = (uschar *) malloc(bufsz)) == NULL) 317 FATAL("out of space for character class [%.10s...] 1", p); 318 bp = buf; 319 for (i = 0; (c = *p++) != 0; ) { 320 if (c == '\\') { 321 c = quoted(&p); 322 } else if (c == '-' && i > 0 && bp[-1] != 0) { 323 if (*p != 0) { 324 c = bp[-1]; 325 c2 = *p++; 326 if (c2 == '\\') 327 c2 = quoted(&p); 328 if (collate_range_cmp(c, c2) > 0) { 329 bp--; 330 i--; 331 continue; 332 } 333 for (j = 0; j < NCHARS; j++) { 334 if ((collate_range_cmp(c, j) > 0) || 335 collate_range_cmp(j, c2) > 0) 336 continue; 337 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter1")) 338 FATAL("out of space for character class [%.10s...] 2", p); 339 *bp++ = j; 340 i++; 341 } 342 continue; 343 } 344 } 345 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter2")) 346 FATAL("out of space for character class [%.10s...] 3", p); 347 *bp++ = c; 348 i++; 349 } 350 *bp = 0; 351 dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) ); 352 xfree(op); 353 return (char *) tostring((char *) buf); 354 } 355 356 void overflo(const char *s) 357 { 358 FATAL("regular expression too big: %.30s...", s); 359 } 360 361 void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */ 362 { 363 int i; 364 int *p; 365 366 switch (type(v)) { 367 ELEAF 368 LEAF 369 f->re[info(v)].ltype = type(v); 370 f->re[info(v)].lval.np = right(v); 371 while (f->accept >= maxsetvec) { /* guessing here! */ 372 maxsetvec *= 4; 373 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); 374 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); 375 if (setvec == NULL || tmpset == NULL) 376 overflo("out of space in cfoll()"); 377 } 378 for (i = 0; i <= f->accept; i++) 379 setvec[i] = 0; 380 setcnt = 0; 381 follow(v); /* computes setvec and setcnt */ 382 if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL) 383 overflo("out of space building follow set"); 384 f->re[info(v)].lfollow = p; 385 *p = setcnt; 386 for (i = f->accept; i >= 0; i--) 387 if (setvec[i] == 1) 388 *++p = i; 389 break; 390 UNARY 391 cfoll(f,left(v)); 392 break; 393 case CAT: 394 case OR: 395 cfoll(f,left(v)); 396 cfoll(f,right(v)); 397 break; 398 default: /* can't happen */ 399 FATAL("can't happen: unknown type %d in cfoll", type(v)); 400 } 401 } 402 403 int first(Node *p) /* collects initially active leaves of p into setvec */ 404 /* returns 0 if p matches empty string */ 405 { 406 int b, lp; 407 408 switch (type(p)) { 409 ELEAF 410 LEAF 411 lp = info(p); /* look for high-water mark of subscripts */ 412 while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */ 413 maxsetvec *= 4; 414 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); 415 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); 416 if (setvec == NULL || tmpset == NULL) 417 overflo("out of space in first()"); 418 } 419 if (type(p) == EMPTYRE) { 420 setvec[lp] = 0; 421 return(0); 422 } 423 if (setvec[lp] != 1) { 424 setvec[lp] = 1; 425 setcnt++; 426 } 427 if (type(p) == CCL && (*(char *) right(p)) == '\0') 428 return(0); /* empty CCL */ 429 else return(1); 430 case PLUS: 431 if (first(left(p)) == 0) return(0); 432 return(1); 433 case STAR: 434 case QUEST: 435 first(left(p)); 436 return(0); 437 case CAT: 438 if (first(left(p)) == 0 && first(right(p)) == 0) return(0); 439 return(1); 440 case OR: 441 b = first(right(p)); 442 if (first(left(p)) == 0 || b == 0) return(0); 443 return(1); 444 } 445 FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */ 446 return(-1); 447 } 448 449 void follow(Node *v) /* collects leaves that can follow v into setvec */ 450 { 451 Node *p; 452 453 if (type(v) == FINAL) 454 return; 455 p = parent(v); 456 switch (type(p)) { 457 case STAR: 458 case PLUS: 459 first(v); 460 follow(p); 461 return; 462 463 case OR: 464 case QUEST: 465 follow(p); 466 return; 467 468 case CAT: 469 if (v == left(p)) { /* v is left child of p */ 470 if (first(right(p)) == 0) { 471 follow(p); 472 return; 473 } 474 } else /* v is right child */ 475 follow(p); 476 return; 477 } 478 } 479 480 int member(int c, const char *sarg) /* is c in s? */ 481 { 482 uschar *s = (uschar *) sarg; 483 484 while (*s) 485 if (c == *s++) 486 return(1); 487 return(0); 488 } 489 490 int match(fa *f, const char *p0) /* shortest match ? */ 491 { 492 int s, ns; 493 uschar *p = (uschar *) p0; 494 495 s = f->reset ? makeinit(f,0) : f->initstat; 496 if (f->out[s]) 497 return(1); 498 do { 499 /* assert(*p < NCHARS); */ 500 if ((ns = f->gototab[s][*p]) != 0) 501 s = ns; 502 else 503 s = cgoto(f, s, *p); 504 if (f->out[s]) 505 return(1); 506 } while (*p++ != 0); 507 return(0); 508 } 509 510 int pmatch(fa *f, const char *p0) /* longest match, for sub */ 511 { 512 int s, ns; 513 uschar *p = (uschar *) p0; 514 uschar *q; 515 int i, k; 516 517 /* s = f->reset ? makeinit(f,1) : f->initstat; */ 518 if (f->reset) { 519 f->initstat = s = makeinit(f,1); 520 } else { 521 s = f->initstat; 522 } 523 patbeg = (char *) p; 524 patlen = -1; 525 do { 526 q = p; 527 do { 528 if (f->out[s]) /* final state */ 529 patlen = q-p; 530 /* assert(*q < NCHARS); */ 531 if ((ns = f->gototab[s][*q]) != 0) 532 s = ns; 533 else 534 s = cgoto(f, s, *q); 535 if (s == 1) { /* no transition */ 536 if (patlen >= 0) { 537 patbeg = (char *) p; 538 return(1); 539 } 540 else 541 goto nextin; /* no match */ 542 } 543 } while (*q++ != 0); 544 if (f->out[s]) 545 patlen = q-p-1; /* don't count $ */ 546 if (patlen >= 0) { 547 patbeg = (char *) p; 548 return(1); 549 } 550 nextin: 551 s = 2; 552 if (f->reset) { 553 for (i = 2; i <= f->curstat; i++) 554 xfree(f->posns[i]); 555 k = *f->posns[0]; 556 if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) 557 overflo("out of space in pmatch"); 558 for (i = 0; i <= k; i++) 559 (f->posns[2])[i] = (f->posns[0])[i]; 560 f->initstat = f->curstat = 2; 561 f->out[2] = f->out[0]; 562 for (i = 0; i < NCHARS; i++) 563 f->gototab[2][i] = 0; 564 } 565 } while (*p++ != 0); 566 return (0); 567 } 568 569 int nematch(fa *f, const char *p0) /* non-empty match, for sub */ 570 { 571 int s, ns; 572 uschar *p = (uschar *) p0; 573 uschar *q; 574 int i, k; 575 576 /* s = f->reset ? makeinit(f,1) : f->initstat; */ 577 if (f->reset) { 578 f->initstat = s = makeinit(f,1); 579 } else { 580 s = f->initstat; 581 } 582 patlen = -1; 583 while (*p) { 584 q = p; 585 do { 586 if (f->out[s]) /* final state */ 587 patlen = q-p; 588 /* assert(*q < NCHARS); */ 589 if ((ns = f->gototab[s][*q]) != 0) 590 s = ns; 591 else 592 s = cgoto(f, s, *q); 593 if (s == 1) { /* no transition */ 594 if (patlen > 0) { 595 patbeg = (char *) p; 596 return(1); 597 } else 598 goto nnextin; /* no nonempty match */ 599 } 600 } while (*q++ != 0); 601 if (f->out[s]) 602 patlen = q-p-1; /* don't count $ */ 603 if (patlen > 0 ) { 604 patbeg = (char *) p; 605 return(1); 606 } 607 nnextin: 608 s = 2; 609 if (f->reset) { 610 for (i = 2; i <= f->curstat; i++) 611 xfree(f->posns[i]); 612 k = *f->posns[0]; 613 if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) 614 overflo("out of state space"); 615 for (i = 0; i <= k; i++) 616 (f->posns[2])[i] = (f->posns[0])[i]; 617 f->initstat = f->curstat = 2; 618 f->out[2] = f->out[0]; 619 for (i = 0; i < NCHARS; i++) 620 f->gototab[2][i] = 0; 621 } 622 p++; 623 } 624 return (0); 625 } 626 627 Node *reparse(const char *p) /* parses regular expression pointed to by p */ 628 { /* uses relex() to scan regular expression */ 629 Node *np; 630 631 dprintf( ("reparse <%s>\n", p) ); 632 lastre = prestr = (uschar *) p; /* prestr points to string to be parsed */ 633 rtok = relex(); 634 /* GNU compatibility: an empty regexp matches anything */ 635 if (rtok == '\0') { 636 /* FATAL("empty regular expression"); previous */ 637 return(op2(EMPTYRE, NIL, NIL)); 638 } 639 np = regexp(); 640 if (rtok != '\0') 641 FATAL("syntax error in regular expression %s at %s", lastre, prestr); 642 return(np); 643 } 644 645 Node *regexp(void) /* top-level parse of reg expr */ 646 { 647 return (alt(concat(primary()))); 648 } 649 650 Node *primary(void) 651 { 652 Node *np; 653 654 switch (rtok) { 655 case CHAR: 656 np = op2(CHAR, NIL, itonp(rlxval)); 657 rtok = relex(); 658 return (unary(np)); 659 case ALL: 660 rtok = relex(); 661 return (unary(op2(ALL, NIL, NIL))); 662 case EMPTYRE: 663 rtok = relex(); 664 return (unary(op2(ALL, NIL, NIL))); 665 case DOT: 666 rtok = relex(); 667 return (unary(op2(DOT, NIL, NIL))); 668 case CCL: 669 np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr)); 670 rtok = relex(); 671 return (unary(np)); 672 case NCCL: 673 np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr)); 674 rtok = relex(); 675 return (unary(np)); 676 case '^': 677 rtok = relex(); 678 return (unary(op2(CHAR, NIL, itonp(HAT)))); 679 case '$': 680 rtok = relex(); 681 return (unary(op2(CHAR, NIL, NIL))); 682 case '(': 683 rtok = relex(); 684 if (rtok == ')') { /* special pleading for () */ 685 rtok = relex(); 686 return unary(op2(CCL, NIL, (Node *) tostring(""))); 687 } 688 np = regexp(); 689 if (rtok == ')') { 690 rtok = relex(); 691 return (unary(np)); 692 } 693 else 694 FATAL("syntax error in regular expression %s at %s", lastre, prestr); 695 default: 696 FATAL("illegal primary in regular expression %s at %s", lastre, prestr); 697 } 698 return 0; /*NOTREACHED*/ 699 } 700 701 Node *concat(Node *np) 702 { 703 switch (rtok) { 704 case CHAR: case DOT: case ALL: case EMPTYRE: case CCL: case NCCL: case '$': case '(': 705 return (concat(op2(CAT, np, primary()))); 706 } 707 return (np); 708 } 709 710 Node *alt(Node *np) 711 { 712 if (rtok == OR) { 713 rtok = relex(); 714 return (alt(op2(OR, np, concat(primary())))); 715 } 716 return (np); 717 } 718 719 Node *unary(Node *np) 720 { 721 switch (rtok) { 722 case STAR: 723 rtok = relex(); 724 return (unary(op2(STAR, np, NIL))); 725 case PLUS: 726 rtok = relex(); 727 return (unary(op2(PLUS, np, NIL))); 728 case QUEST: 729 rtok = relex(); 730 return (unary(op2(QUEST, np, NIL))); 731 default: 732 return (np); 733 } 734 } 735 736 /* 737 * Character class definitions conformant to the POSIX locale as 738 * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source 739 * and operating character sets are both ASCII (ISO646) or supersets 740 * thereof. 741 * 742 * Note that to avoid overflowing the temporary buffer used in 743 * relex(), the expanded character class (prior to range expansion) 744 * must be less than twice the size of their full name. 745 */ 746 747 /* Because isblank doesn't show up in any of the header files on any 748 * system i use, it's defined here. if some other locale has a richer 749 * definition of "blank", define HAS_ISBLANK and provide your own 750 * version. 751 * the parentheses here are an attempt to find a path through the maze 752 * of macro definition and/or function and/or version provided. thanks 753 * to nelson beebe for the suggestion; let's see if it works everywhere. 754 */ 755 756 /* #define HAS_ISBLANK */ 757 #ifndef HAS_ISBLANK 758 759 int (xisblank)(int c) 760 { 761 return c==' ' || c=='\t'; 762 } 763 764 #endif 765 766 struct charclass { 767 const char *cc_name; 768 int cc_namelen; 769 int (*cc_func)(int); 770 } charclasses[] = { 771 { "alnum", 5, isalnum }, 772 { "alpha", 5, isalpha }, 773 #ifndef HAS_ISBLANK 774 { "blank", 5, isspace }, /* was isblank */ 775 #else 776 { "blank", 5, isblank }, 777 #endif 778 { "cntrl", 5, iscntrl }, 779 { "digit", 5, isdigit }, 780 { "graph", 5, isgraph }, 781 { "lower", 5, islower }, 782 { "print", 5, isprint }, 783 { "punct", 5, ispunct }, 784 { "space", 5, isspace }, 785 { "upper", 5, isupper }, 786 { "xdigit", 6, isxdigit }, 787 { NULL, 0, NULL }, 788 }; 789 790 791 int relex(void) /* lexical analyzer for reparse */ 792 { 793 int c, n; 794 int cflag; 795 static uschar *buf = NULL; 796 static int bufsz = 100; 797 uschar *bp; 798 struct charclass *cc; 799 int i; 800 801 switch (c = *prestr++) { 802 case '|': return OR; 803 case '*': return STAR; 804 case '+': return PLUS; 805 case '?': return QUEST; 806 case '.': return DOT; 807 case '\0': prestr--; return '\0'; 808 case '^': 809 case '$': 810 case '(': 811 case ')': 812 return c; 813 case '\\': 814 rlxval = quoted(&prestr); 815 return CHAR; 816 default: 817 rlxval = c; 818 return CHAR; 819 case '[': 820 if (buf == NULL && (buf = (uschar *) malloc(bufsz)) == NULL) 821 FATAL("out of space in reg expr %.10s..", lastre); 822 bp = buf; 823 if (*prestr == '^') { 824 cflag = 1; 825 prestr++; 826 } 827 else 828 cflag = 0; 829 n = 2 * strlen((const char *) prestr)+1; 830 if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, "relex1")) 831 FATAL("out of space for reg expr %.10s...", lastre); 832 for (; ; ) { 833 if ((c = *prestr++) == '\\') { 834 *bp++ = '\\'; 835 if ((c = *prestr++) == '\0') 836 FATAL("nonterminated character class %.20s...", lastre); 837 *bp++ = c; 838 /* } else if (c == '\n') { */ 839 /* FATAL("newline in character class %.20s...", lastre); */ 840 } else if (c == '[' && *prestr == ':') { 841 /* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */ 842 for (cc = charclasses; cc->cc_name; cc++) 843 if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0) 844 break; 845 if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' && 846 prestr[2 + cc->cc_namelen] == ']') { 847 prestr += cc->cc_namelen + 3; 848 for (i = 0; i < NCHARS; i++) { 849 if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2")) 850 FATAL("out of space for reg expr %.10s...", lastre); 851 if (cc->cc_func(i)) { 852 *bp++ = i; 853 n++; 854 } 855 } 856 } else 857 *bp++ = c; 858 } else if (c == '\0') { 859 FATAL("nonterminated character class %.20s", lastre); 860 } else if (bp == buf) { /* 1st char is special */ 861 *bp++ = c; 862 } else if (c == ']') { 863 *bp++ = 0; 864 rlxstr = (uschar *) tostring((char *) buf); 865 if (cflag == 0) 866 return CCL; 867 else 868 return NCCL; 869 } else 870 *bp++ = c; 871 } 872 } 873 } 874 875 int cgoto(fa *f, int s, int c) 876 { 877 int i, j, k; 878 int *p, *q; 879 880 assert(c == HAT || c < NCHARS); 881 while (f->accept >= maxsetvec) { /* guessing here! */ 882 maxsetvec *= 4; 883 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); 884 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); 885 if (setvec == NULL || tmpset == NULL) 886 overflo("out of space in cgoto()"); 887 } 888 for (i = 0; i <= f->accept; i++) 889 setvec[i] = 0; 890 setcnt = 0; 891 /* compute positions of gototab[s,c] into setvec */ 892 p = f->posns[s]; 893 for (i = 1; i <= *p; i++) { 894 if ((k = f->re[p[i]].ltype) != FINAL) { 895 if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np)) 896 || (k == DOT && c != 0 && c != HAT) 897 || (k == ALL && c != 0) 898 || (k == EMPTYRE && c != 0) 899 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up)) 900 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) { 901 q = f->re[p[i]].lfollow; 902 for (j = 1; j <= *q; j++) { 903 if (q[j] >= maxsetvec) { 904 maxsetvec *= 4; 905 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); 906 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); 907 if (setvec == NULL || tmpset == NULL) 908 overflo("cgoto overflow"); 909 } 910 if (setvec[q[j]] == 0) { 911 setcnt++; 912 setvec[q[j]] = 1; 913 } 914 } 915 } 916 } 917 } 918 /* determine if setvec is a previous state */ 919 tmpset[0] = setcnt; 920 j = 1; 921 for (i = f->accept; i >= 0; i--) 922 if (setvec[i]) { 923 tmpset[j++] = i; 924 } 925 /* tmpset == previous state? */ 926 for (i = 1; i <= f->curstat; i++) { 927 p = f->posns[i]; 928 if ((k = tmpset[0]) != p[0]) 929 goto different; 930 for (j = 1; j <= k; j++) 931 if (tmpset[j] != p[j]) 932 goto different; 933 /* setvec is state i */ 934 f->gototab[s][c] = i; 935 return i; 936 different:; 937 } 938 939 /* add tmpset to current set of states */ 940 if (f->curstat >= NSTATES-1) { 941 f->curstat = 2; 942 f->reset = 1; 943 for (i = 2; i < NSTATES; i++) 944 xfree(f->posns[i]); 945 } else 946 ++(f->curstat); 947 for (i = 0; i < NCHARS; i++) 948 f->gototab[f->curstat][i] = 0; 949 xfree(f->posns[f->curstat]); 950 if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL) 951 overflo("out of space in cgoto"); 952 953 f->posns[f->curstat] = p; 954 f->gototab[s][c] = f->curstat; 955 for (i = 0; i <= setcnt; i++) 956 p[i] = tmpset[i]; 957 if (setvec[f->accept]) 958 f->out[f->curstat] = 1; 959 else 960 f->out[f->curstat] = 0; 961 return f->curstat; 962 } 963 964 965 void freefa(fa *f) /* free a finite automaton */ 966 { 967 int i; 968 969 if (f == NULL) 970 return; 971 for (i = 0; i <= f->curstat; i++) 972 xfree(f->posns[i]); 973 for (i = 0; i <= f->accept; i++) { 974 xfree(f->re[i].lfollow); 975 if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL) 976 xfree((f->re[i].lval.np)); 977 } 978 xfree(f->restr); 979 xfree(f); 980 } 981