1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* Copyright (c) 1990 AT&T */ 23 /* All Rights Reserved */ 24 25 /* 26 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 27 * Use is subject to license terms. 28 */ 29 30 /* 31 * cscope - interactive C symbol or text cross-reference 32 * 33 * text searching functions 34 */ 35 36 #include <fcntl.h> 37 #include <setjmp.h> 38 #include <stdio.h> 39 #include <string.h> 40 #include <memory.h> 41 #include <ctype.h> 42 #include <unistd.h> 43 #include "library.h" 44 45 typedef enum { 46 NO = 0, 47 YES = 1 48 } BOOL; 49 50 typedef struct re_bm { 51 int delta0[256]; 52 int *delta2; 53 uchar_t *cmap; 54 uchar_t *bmpat; 55 int patlen; 56 } re_bm; 57 58 typedef struct Link { 59 uchar_t lit; 60 struct Node *node; 61 struct Link *next; 62 } Link; 63 64 typedef struct Node { 65 short out; 66 short d; 67 short shift1; 68 short shift2; 69 long id; 70 struct Node **tab; 71 Link *alts; 72 struct Node *fail; 73 } Node; 74 75 typedef struct re_cw { 76 int maxdepth, mindepth; 77 long nodeid; 78 int step[256]; 79 uchar_t *cmap; 80 struct Node *root; 81 } re_cw; 82 83 typedef enum { 84 /* lit expression types */ 85 Literal, Dot, Charclass, EOP, 86 87 /* non-lit expression types */ 88 Cat, Alternate, Star, Plus, Quest, 89 90 /* not really expression types, just helping */ 91 Lpar, Rpar, Backslash 92 93 } Exprtype; 94 95 typedef int ID; 96 97 typedef struct Expr { 98 Exprtype type; /* Type of expression (in grammar) */ 99 ID id; /* unique ID of lit expression */ 100 int lit; /* Literal character or tag */ 101 int flen; /* Number of following lit expressions */ 102 ID *follow; /* Array of IDs of following lit expressions */ 103 struct Expr *l; /* pointer to Left child (or ccl count) */ 104 struct Expr *r; /* pointer to Right child (or ccl mask) */ 105 struct Expr *parent; /* pointer to Parent */ 106 } Expr; 107 108 typedef struct State { 109 struct State *tab[256]; 110 int cnt; /* number of matched chars on full match, -1 otherwise */ 111 int npos; /* number of IDs in position set for this state. */ 112 int pos; /* index into posbase for beginning of IDs */ 113 } State; 114 115 typedef struct FID { 116 ID id; /* Lit Expression id */ 117 int fcount; /* Number of Lit exp matches before this one. */ 118 } FID; 119 120 typedef struct Positionset { 121 int count; /* Number of lit exps in position set */ 122 ID last; /* ID of last lit exp in position set */ 123 FID *base; /* array of MAXID FIDS */ 124 /* 0 means not in position set */ 125 /* -1 means first in position set */ 126 /* n (>0) is ID of prev member of position set. */ 127 } Positionset; 128 129 typedef struct re_re { 130 FID *posbase; /* Array of IDs from all states */ 131 int nposalloc; /* Allocated size of posbase */ 132 int posnext; /* Index into free space in posbase */ 133 int posreset; /* Index into end of IDs for initial state in posbase */ 134 int maxid; /* Number of (also maximum ID of) lit expressions */ 135 Expr *root; /* Pointer to root (EOP) expression */ 136 Expr **ptr; /* Pointer to array of ptrs to lit expressions. */ 137 uchar_t *cmap; /* Character mapping array */ 138 Positionset firstpos; 139 Positionset tmp; 140 int nstates; /* Number of current states defined */ 141 int statelim; /* Limit on number of states before flushing */ 142 State *states; /* Array of states */ 143 State istate; /* Initial state */ 144 } re_re; 145 146 typedef struct { 147 uchar_t *cmap; 148 re_re *re_ptr; 149 re_bm *bm_ptr; 150 re_cw *cw_ptr; 151 BOOL fullmatch; 152 BOOL (*procfn)(); 153 BOOL (*succfn)(); 154 uchar_t *loc1; 155 uchar_t *loc2; 156 uchar_t *expression; 157 } PATTERN; 158 159 typedef enum { 160 BEGIN, /* File is not yet in buffer at all */ 161 MORE, /* File is partly in buffer */ 162 NO_MORE /* File has been completely read into buffer */ 163 } FILE_STAT; 164 165 typedef struct { 166 uchar_t *prntbuf; /* current line of input from data file */ 167 uchar_t *newline; /* end of line (real or sentinel \n) */ 168 long ln; /* line number */ 169 } LINE; 170 171 #define NL '\n' 172 173 #define CCL_SIZ 32 174 #define CCL_SET(a, c) ((a)[(c) >> 3] |= bittab[(c) & 07]) 175 #define CCL_CLR(a, c) ((a)[(c) >> 3] &= ~bittab[(c) & 07]) 176 #define CCL_CHK(a, c) ((a)[(c) >> 3] & bittab[(c) & 07]) 177 178 #define ESIZE (BUFSIZ) 179 #define MAXBUFSIZE (64*BUFSIZ) 180 181 #define MAXMALLOCS 1024 182 #define MAXLIT 256 /* is plenty big enough */ 183 #define LARGE MAXBUFSIZE+ESIZE+2 184 185 #define CLEAR(r, rps) (void) memset((char *)(rps)->base, 0, \ 186 (int)((r)->maxid * sizeof (FID))), \ 187 (rps)->count = 0, (rps)->last = -1 188 #define SET(rps, n, cnt) { \ 189 if ((rps)->base[n].id == 0) {\ 190 (rps)->count++;\ 191 (rps)->base[n].fcount = (cnt);\ 192 (rps)->base[n].id = (rps)->last;\ 193 (rps)->last = (n);\ 194 } else if ((cnt) > (rps)->base[n].fcount) {\ 195 (rps)->base[n].fcount = (cnt);\ 196 }} 197 198 #define START { _p = tmp; } 199 #define ADDL(c) { if (_p >= &tmp[MAXLIT]) _p--; *_p++ = c; } 200 #define FINISH { ADDL(0) if ((_p-tmp) > bestlen) \ 201 (void) memmove(best, tmp, bestlen = _p-tmp); } 202 203 204 #define NEW(N) (froot?(t = froot, froot = froot->next, t->next = NULL, \ 205 t->node = N, t): newlink(0, N)) 206 #define ADD(N) if (qtail) qtail = qtail->next = NEW(N); \ 207 else qtail = qhead = NEW(N) 208 #define DEL() { Link *_l = qhead; if ((qhead = qhead->next) == NULL) \ 209 { qtail = NULL; } _l->next = froot; froot = _l; } 210 211 static uchar_t *buffer; 212 static uchar_t *bufend; 213 static FILE *output; 214 static char *format; 215 static char *file; 216 static int file_desc; 217 static FILE_STAT file_stat; 218 static PATTERN match_pattern; 219 static uchar_t char_map[2][256]; 220 static int iflag; 221 static Exprtype toktype; 222 static int toklit, parno, maxid; 223 static uchar_t tmp[MAXLIT], best[MAXLIT]; 224 static uchar_t *_p; 225 static int bestlen; 226 static Node *next_node; 227 static Link *froot, *next_link; 228 static jmp_buf env; 229 230 static int nmalloc; 231 static uchar_t *mallocs[MAXMALLOCS]; 232 233 static uchar_t bittab[] = { 1, 2, 4, 8, 16, 32, 64, 128 }; 234 235 #ifdef DEBUG 236 #define TRACE(n) (n < debug) 237 #define EPRINTSIZE 50000 238 static void spr(int c, int *p, uchar_t *buf); 239 void epr(Expr *e, uchar_t *res); 240 static int debug = 12; 241 #endif 242 243 static void init_file(LINE *cur_ptr); 244 static void get_line(LINE *cur_ptr, uchar_t *s); 245 static void get_ncount(LINE *cur_ptr, uchar_t *s); 246 static int execute(void); 247 static State *startstate(re_re *r); 248 static State *stateof(re_re *r, Positionset *ps); 249 static State *nextstate(re_re *r, State *s, int a); 250 static State *addstate(re_re *r, Positionset *ps, int cnt); 251 static BOOL match(Expr *e, int a); 252 static BOOL first_lit(Positionset *fpos, Expr *e); 253 static void eptr(re_re *r, Expr *e); 254 static void efollow(re_re *r, Positionset *fpos, Expr *e); 255 static void follow(Positionset *fpos, Expr *e); 256 static void followstate(re_re *r, State *s, int a, Positionset *fpos); 257 static Expr *eall(re_re *r, PATTERN *pat); 258 static Expr *d0(re_re *r, PATTERN *pat); 259 static Expr *d1(re_re *r, PATTERN *pat); 260 static Expr *d2(re_re *r, PATTERN *pat); 261 static Expr *d3(re_re *r, PATTERN *pat); 262 static Expr *newexpr(Exprtype t, int lit, Expr *left, Expr *right); 263 static void lex(re_re *r, PATTERN *pat); 264 static int re_lit(PATTERN *pat, uchar_t **b, uchar_t **e); 265 static void traverse(PATTERN *pat, Expr *e); 266 static int ccl(PATTERN *pat, uchar_t *tab); 267 static BOOL altlist(), word(); 268 static BOOL altlist(Expr *e, uchar_t *buf, re_cw *pat); 269 static Node *newnode(re_cw *c, int d); 270 static Link *newlink(uchar_t lit, Node *n); 271 static void fail(Node *root); 272 static void zeroroot(Node *root, Node *n); 273 static void shift(re_cw *c); 274 static void shifttab(Node *n); 275 static void shiftprop(re_cw *c, Node *n); 276 static void delta_2(re_bm *b); 277 static int getstate(re_re *r, Positionset *ps); 278 static void savestate(re_re *r); 279 static void stateinit(re_re *r); 280 static re_bm *re_bmcomp(uchar_t *pb, uchar_t *pe, uchar_t *cmap); 281 static re_cw *re_cwinit(uchar_t *cmap); 282 static re_cw *re_recw(re_re *r, uchar_t *map); 283 static re_re *egprep(PATTERN *pat); 284 static void re_cwadd(re_cw *c, uchar_t *s, uchar_t *e); 285 static void re_cwcomp(re_cw *c); 286 static void eginit(re_re *r); 287 static BOOL re_bmexec(PATTERN *pat, uchar_t *s, uchar_t *e, uchar_t **mb, 288 uchar_t **me); 289 static BOOL re_cwexec(PATTERN *pat, uchar_t *rs, uchar_t *re, uchar_t **mb, 290 uchar_t **me); 291 static BOOL re_reexec(PATTERN *pat, uchar_t *b, uchar_t *e, uchar_t **mb, 292 uchar_t **me); 293 static uchar_t *egmalloc(size_t n); 294 static void fgetfile(LINE *cur_ptr); 295 static void dogre(PATTERN *pat); 296 static BOOL pattern_match(PATTERN *pat, LINE *lptr); 297 static BOOL fixloc(uchar_t **mb, uchar_t **me); 298 static BOOL grepmatch(PATTERN *pat, uchar_t **mb, uchar_t **me); 299 static void err(char *s); 300 301 static char *message; 302 303 void 304 egrepcaseless(int i) 305 { 306 iflag = i; /* simulate "egrep -i" */ 307 } 308 309 char * 310 egrepinit(char *expression) 311 { 312 static int firsttime = 1; 313 int i; 314 315 if (firsttime) { 316 firsttime = 0; 317 for (i = 0; i < 256; i++) { 318 char_map[0][i] = (uchar_t)i; 319 char_map[1][i] = tolower(i); 320 } 321 } 322 for (i = 0; i < nmalloc; i ++) 323 free(mallocs[i]); 324 nmalloc = 0; 325 message = NULL; 326 327 match_pattern.expression = (uchar_t *)expression; 328 match_pattern.cmap = char_map[iflag]; 329 if (setjmp(env) == 0) { 330 dogre(&match_pattern); 331 #ifdef DEBUG 332 { 333 PATTERN *p = match_pattern; 334 if (p->procfn == re_bmexec) 335 if (!p->fullmatch) 336 if (p->succfn == re_reexec) 337 printf("PARTIAL BOYER_MOORE\n"); 338 else 339 printf("PARTIAL B_M with GREP\n"); 340 else 341 printf("FULL BOYER_MOORE\n"); 342 else if (p->procfn == re_cwexec) 343 printf("C_W\n"); 344 else 345 printf("GENERAL\n"); 346 } 347 #endif 348 } 349 return (message); 350 } 351 352 static void 353 dogre(PATTERN *pat) 354 { 355 uchar_t *lb, *le; 356 357 #ifdef DEBUG 358 printf("PATTERN %s\n", pat->expression); 359 #endif 360 pat->re_ptr = egprep(pat); 361 bestlen = re_lit(pat, &lb, &le); 362 363 if (bestlen && pat->fullmatch) { /* Full Boyer Moore */ 364 #ifdef DEBUG 365 printf("BESTLEN %d\n", bestlen); 366 { 367 uchar_t *p; 368 for (p = lb; p < le; p++) printf("%c", *p); 369 printf("\n"); 370 } 371 #endif 372 pat->bm_ptr = re_bmcomp(lb, le, pat->cmap); 373 pat->procfn = re_bmexec; 374 return; 375 } 376 if (bestlen > 1) { 377 /* Partial Boyer Moore */ 378 pat->bm_ptr = re_bmcomp(lb, le, pat->cmap); 379 pat->procfn = re_bmexec; 380 pat->fullmatch = NO; 381 } else { 382 pat->fullmatch = YES; 383 if ((pat->cw_ptr = re_recw(pat->re_ptr, pat->cmap)) != NULL) { 384 pat->procfn = re_cwexec; /* CW */ 385 return; 386 } 387 } 388 /* general egrep regular expression */ 389 pat->succfn = re_reexec; 390 391 if (pat->fullmatch) { 392 pat->procfn = pat->succfn; 393 pat->succfn = NULL; 394 } 395 } 396 397 static BOOL 398 fixloc(uchar_t **mb, uchar_t **me) 399 { 400 /* Handle match to null string */ 401 402 while (*me <= *mb) 403 (*me)++; 404 405 if (*(*me - 1) != NL) 406 while (**me != NL) 407 (*me)++; 408 409 /* Handle match to new-line only */ 410 411 if (*mb == *me - 1 && **mb == NL) { 412 (*me)++; 413 } 414 415 /* Handle match including beginning or ending new-line */ 416 417 if (**mb == NL) 418 (*mb)++; 419 if (*(*me - 1) == NL) 420 (*me)--; 421 return (YES); 422 } 423 424 static BOOL 425 grepmatch(PATTERN *pat, uchar_t **mb, uchar_t **me) 426 { 427 uchar_t *s, *f; 428 429 if (pat->fullmatch) 430 return (fixloc(mb, me)); 431 432 for (f = *me - 1; *f != NL; f++) { 433 } 434 f++; 435 for (s = *mb; *s != NL; s--) { 436 } 437 438 if ((*pat->succfn)(pat, s, f, mb, me)) { 439 return (YES); 440 } else { 441 *mb = f; 442 return (NO); 443 } 444 } 445 446 static void 447 eginit(re_re *r) 448 { 449 unsigned int n; 450 451 r->ptr = (Expr **)egmalloc(r->maxid * sizeof (Expr *)); 452 eptr(r, r->root); 453 n = r->maxid * sizeof (FID); 454 r->firstpos.base = (FID *)egmalloc(n); 455 r->tmp.base = (FID *)egmalloc(n); 456 CLEAR(r, &r->firstpos); 457 if (!first_lit(&r->firstpos, r->root->l)) { 458 /* 459 * This expression matches the null string!!!! 460 * Add EOP to beginning position set. 461 */ 462 SET(&r->firstpos, r->root->id, 0) 463 /* (void) printf("first of root->l == 0, b=%s\n", b); */ 464 } 465 stateinit(r); 466 (void) addstate(r, &r->firstpos, 0); 467 savestate(r); 468 } 469 470 static void 471 eptr(re_re *r, Expr *e) 472 { 473 if ((e->id < 0) || (e->id >= r->maxid)) { 474 err("internal error"); 475 } 476 r->ptr[e->id] = e; 477 if (e->type != Charclass) { 478 if (e->l) eptr(r, e->l); 479 if (e->r) eptr(r, e->r); 480 } 481 } 482 483 static BOOL 484 re_reexec(PATTERN *pat, uchar_t *b, uchar_t *e, uchar_t **mb, uchar_t **me) 485 { 486 re_re *r = pat->re_ptr; 487 State *s, *t; 488 489 #ifdef DEBUG 490 if (TRACE(10)) { 491 uchar_t buf[EPRINTSIZE]; 492 epr(r->root, buf); 493 (void) printf("expr='%s'\n", buf); 494 } 495 #endif 496 s = startstate(r); 497 498 for (;;) { 499 uchar_t c; 500 501 if (s->cnt >= 0) { 502 #ifdef DEBUG 503 if (TRACE(6)) 504 (void) printf("match at input '%s'\n", b); 505 #endif 506 *mb = b - s->cnt; 507 *me = b; 508 if (fixloc(mb, me)) 509 return (YES); 510 } 511 512 if (b >= e) break; 513 c = pat->cmap[*b]; 514 #ifdef DEBUG 515 if (TRACE(4)) 516 (void) printf("state %d: char '%c'\n", s-r->states, *b); 517 #endif 518 if ((t = s->tab[c]) != NULL) s = t; 519 else s = nextstate(r, s, (int)c); 520 b++; 521 } 522 #ifdef DEBUG 523 if (TRACE(3)) { 524 uchar_t buf[EPRINTSIZE]; 525 526 epr(r->root, buf); 527 (void) printf("pat = %s\n", buf); 528 } 529 #endif 530 return (NO); 531 } 532 533 static BOOL 534 match(Expr *e, int a) 535 { 536 switch (e->type) { 537 case Dot: return ((BOOL)(a != NL)); 538 case Literal: return ((BOOL)(a == e->lit)); 539 case Charclass: return ((BOOL)(CCL_CHK((uchar_t *)e->r, a))); 540 default: return (NO); 541 } 542 } 543 544 /* 545 * generates the followset for a node in fpos 546 */ 547 static void 548 follow(Positionset *fpos, Expr *e) 549 { 550 Expr *p; 551 552 if (e->type == EOP) 553 return; 554 else 555 p = e->parent; 556 switch (p->type) { 557 case EOP: 558 SET(fpos, p->id, 0) 559 break; 560 case Plus: 561 case Star: 562 (void) first_lit(fpos, e); 563 follow(fpos, p); 564 break; 565 case Quest: 566 case Alternate: 567 follow(fpos, p); 568 break; 569 case Cat: 570 if (e == p->r || !first_lit(fpos, p->r)) 571 follow(fpos, p); 572 break; 573 default: 574 break; 575 } 576 } 577 578 /* 579 * first_lit returns NO if e is nullable and in the process, 580 * ets up fpos. 581 */ 582 static BOOL 583 first_lit(Positionset *fpos, Expr *e) 584 { 585 BOOL k; 586 587 switch (e->type) { 588 case Literal: 589 case Dot: 590 case Charclass: 591 SET(fpos, e->id, 0) 592 return (YES); 593 case EOP: 594 case Star: 595 case Quest: 596 (void) first_lit(fpos, e->l); 597 return (NO); 598 case Plus: 599 return (first_lit(fpos, e->l)); 600 case Cat: 601 return ((BOOL)(first_lit(fpos, e->l) || first_lit(fpos, e->r))); 602 case Alternate: 603 k = first_lit(fpos, e->r); 604 return ((BOOL)(first_lit(fpos, e->l) && k)); 605 default: 606 err("internal error"); 607 } 608 return (NO); 609 } 610 611 static void 612 efollow(re_re *r, Positionset *fpos, Expr *e) 613 { 614 ID i, *p; 615 616 CLEAR(r, fpos); 617 follow(fpos, e); 618 e->flen = fpos->count; 619 e->follow = (ID *)egmalloc(e->flen * sizeof (ID)); 620 p = e->follow; 621 #ifdef DEBUG 622 printf("ID = %d LIT %c FLEN = %d\n", e->id, e->lit, e->flen); 623 #endif 624 for (i = fpos->last; i > 0; i = fpos->base[i].id) { 625 *p++ = i; 626 #ifdef DEBUG 627 printf("FOLLOW ID = %d LIT %c\n", r->ptr[i]->id, r->ptr[i]->lit); 628 #endif 629 } 630 if (p != e->follow + e->flen) { 631 err("internal error"); 632 } 633 } 634 635 static State * 636 addstate(re_re *r, Positionset *ps, int cnt) 637 { 638 ID j; 639 FID *p, *q; 640 State *s; 641 642 if (cnt) { 643 s = r->states + getstate(r, ps); 644 (void) memset((char *)s->tab, 0, sizeof (s->tab)); 645 s->cnt = r->istate.cnt; 646 } else { 647 s = &r->istate; 648 s->cnt = -1; 649 } 650 s->pos = r->posnext; 651 r->posnext += ps->count; 652 s->npos = ps->count; 653 p = r->posbase + s->pos; 654 for (j = ps->last; j > 0; p++, j = q->id) { 655 q = &ps->base[j]; 656 p->id = j; 657 p->fcount = q->fcount; 658 if (p->id == r->root->id && s->cnt < p->fcount) 659 s->cnt = p->fcount; 660 } 661 #ifdef DEBUG 662 if (TRACE(3)) { 663 uchar_t buf[2000]; 664 spr(s->npos, s->pos+r->posbase, buf); 665 (void) printf("new state[%d] %s%s\n", s-r->states, buf, 666 s->cnt?" cnt":""); 667 } 668 #endif 669 return (s); 670 } 671 672 static State * 673 nextstate(re_re *r, State *s, int a) 674 { 675 State *news; 676 677 CLEAR(r, &r->tmp); 678 followstate(r, s, a, &r->tmp); 679 if (s != &r->istate) followstate(r, &r->istate, a, &r->tmp); 680 681 #ifdef DEBUG 682 if (TRACE(5)) { 683 uchar_t buf[2000]; 684 ppr(&r->tmp, buf); 685 (void) printf("nextstate(%d, '%c'): found %s\n", s-r->states, 686 a, buf); 687 } 688 #endif 689 if (r->tmp.count == 0) 690 news = &r->istate; 691 else if ((news = stateof(r, &r->tmp)) == NULL) 692 news = addstate(r, &r->tmp, 1); 693 s->tab[a] = news; 694 #ifdef DEBUG 695 if (TRACE(5)) { 696 (void) printf("nextstate(%d, '%c'): returning %ld\n", 697 s-r->states, a, news); 698 } 699 #endif 700 return (news); 701 } 702 703 static void 704 followstate(re_re *r, State *s, int a, Positionset *fpos) 705 { 706 int j; 707 ID *q, *eq; 708 Expr *e; 709 710 for (j = s->pos; j < (s->pos + s->npos); j++) { 711 e = r->ptr[r->posbase[j].id]; 712 if (e->type == EOP) continue; 713 if (match(e, a)) { 714 if (e->follow == NULL) efollow(r, &r->firstpos, e); 715 for (q = e->follow, eq = q + e->flen; q < eq; q++) { 716 SET(fpos, *q, r->posbase[j].fcount + 1) 717 #ifdef DEBUG 718 printf("CHAR %c FC %c COUNT %d\n", a, 719 r->ptr[*q]->lit, r->posbase[j].fcount+1); 720 #endif 721 } 722 } 723 } 724 } 725 726 static uchar_t * 727 egmalloc(size_t n) 728 { 729 uchar_t *x; 730 731 x = (uchar_t *)mymalloc(n); 732 mallocs[nmalloc++] = x; 733 if (nmalloc >= MAXMALLOCS) 734 nmalloc = MAXMALLOCS - 1; 735 return (x); 736 } 737 738 #ifdef DEBUG 739 void 740 ppr(Positionse *ps, char *p) 741 { 742 ID n; 743 744 if (ps->count < 1) { 745 (void) sprintf(p, "{}"); 746 return; 747 } 748 *p++ = '{'; 749 for (n = ps->last; n > 0; n = ps->base[n].id) { 750 (void) sprintf(p, "%d,", n); 751 p = strchr(p, 0); 752 } 753 p[-1] = '}'; 754 } 755 756 void 757 epr(Expr *e, uchar_t *res) 758 { 759 uchar_t r1[EPRINTSIZE], r2[EPRINTSIZE]; 760 int i; 761 762 if (e == NULL) { 763 (void) sprintf(res, "!0!"); 764 return; 765 } 766 switch (e->type) { 767 case Dot: 768 case Literal: 769 spr(e->flen, e->follow, r1); 770 (void) sprintf(res, "%c%s", e->lit, r1); 771 break; 772 case Charclass: 773 *res++ = '['; 774 for (i = 0; i < 256; i++) 775 if (CCL_CHK((uchar_t *)e->r, i)) { 776 *res++ = i; 777 } 778 *res++ = ']'; 779 *res = '\0'; 780 break; 781 case Cat: 782 epr(e->l, r1); 783 epr(e->r, r2); 784 (void) sprintf(res, "%s%s", r1, r2); 785 break; 786 case Alternate: 787 epr(e->l, r1); 788 epr(e->r, r2); 789 (void) sprintf(res, "(%s|%s)", r1, r2); 790 break; 791 case Star: 792 epr(e->l, r1); 793 (void) sprintf(res, "(%s)*", r1); 794 break; 795 case Plus: 796 epr(e->l, r1); 797 (void) sprintf(res, "(%s)+", r1); 798 break; 799 case Quest: 800 epr(e->l, r1); 801 (void) sprintf(res, "(%s)?", r1); 802 break; 803 case EOP: 804 epr(e->l, r1); 805 (void) sprintf(res, "%s<EOP>", r1); 806 break; 807 default: 808 (void) sprintf(res, "<undef type %d>", e->type); 809 err(res); 810 break; 811 } 812 } 813 814 static void 815 spr(int c, int *p, uchar_t *buf) 816 { 817 if (c > 0) { 818 *buf++ = '{'; 819 *buf = '\0'; 820 while (--c > 0) { 821 (void) sprintf(buf, "%d,", *p++); 822 buf = strchr(buf, 0); 823 } 824 (void) sprintf(buf, "%d}", *p); 825 } else 826 (void) sprintf(buf, "{}"); 827 } 828 #endif 829 830 static void 831 stateinit(re_re *r) 832 { 833 /* CONSTANTCONDITION */ 834 r->statelim = (sizeof (int) < 4 ? 32 : 128); 835 r->states = (State *)egmalloc(r->statelim * sizeof (State)); 836 837 /* CONSTANTCONDITION */ 838 r->nposalloc = (sizeof (int) < 4 ? 2048 : 8192); 839 r->posbase = (FID *)egmalloc(r->nposalloc * sizeof (FID)); 840 r->nstates = r->posnext = 0; 841 } 842 843 static void 844 clrstates(re_re *r) 845 { 846 r->nstates = 0; /* reclaim space for states and positions */ 847 r->posnext = r->posreset; 848 (void) memset((char *)r->istate.tab, 0, sizeof (r->istate.tab)); 849 } 850 851 static void 852 savestate(re_re *r) 853 { 854 r->posreset = r->posnext; /* save for reset */ 855 } 856 857 static State * 858 startstate(re_re *r) 859 { 860 return (&r->istate); 861 } 862 863 static int 864 getstate(re_re *r, Positionset *ps) 865 { 866 if (r->nstates >= r->statelim || 867 r->posnext + ps->count >= r->nposalloc) { 868 clrstates(r); 869 #ifdef DEBUG 870 printf("%d STATES FLUSHED\n", r->statelim); 871 #endif 872 } 873 return (r->nstates++); 874 } 875 876 static State * 877 stateof(re_re *r, Positionset *ps) 878 { 879 State *s; 880 int i; 881 FID *p, *e; 882 883 for (i = 0, s = r->states; i < r->nstates; i++, s++) { 884 if (s->npos == ps->count) { 885 for (p = s->pos+r->posbase, e = p+s->npos; p < e; p++) 886 if (ps->base[p->id].id == 0 || 887 ps->base[p->id].fcount != p->fcount) 888 goto next; 889 return (s); 890 } 891 next:; 892 } 893 return (NULL); 894 } 895 896 static re_re * 897 egprep(PATTERN *pat) 898 { 899 re_re *r; 900 901 r = (re_re *)egmalloc(sizeof (re_re)); 902 (void) memset((char *)r, 0, sizeof (re_re)); 903 904 pat->loc1 = pat->expression; 905 pat->loc2 = pat->expression + strlen((char *)pat->expression); 906 907 parno = 0; 908 maxid = 1; 909 r->cmap = pat->cmap; 910 lex(r, pat); 911 r->root = newexpr(EOP, '#', eall(r, pat), (Expr *)NULL); 912 r->maxid = maxid; 913 914 eginit(r); 915 return (r); 916 } 917 918 static Expr * 919 newexpr(Exprtype t, int lit, Expr *left, Expr *right) 920 { 921 Expr *e = (Expr *)egmalloc(sizeof (Expr)); 922 923 e->type = t; 924 e->parent = NULL; 925 e->lit = lit; 926 927 if (e->lit) e->id = maxid++; 928 else e->id = 0; 929 930 if ((e->l = left) != NULL) { 931 left->parent = e; 932 } 933 if ((e->r = right) != NULL) { 934 right->parent = e; 935 } 936 e->follow = NULL; 937 e->flen = 0; 938 return (e); 939 } 940 941 static void 942 lex(re_re *r, PATTERN *pat) 943 { 944 if (pat->loc1 == pat->loc2) { 945 toktype = EOP; 946 toklit = -1; 947 } else switch (toklit = *pat->loc1++) { 948 case '.': toktype = Dot; break; 949 case '*': toktype = Star; break; 950 case '+': toktype = Plus; break; 951 case '?': toktype = Quest; break; 952 case '[': toktype = Charclass; break; 953 case '|': toktype = Alternate; break; 954 case '(': toktype = Lpar; break; 955 case ')': toktype = Rpar; break; 956 case '\\': toktype = Backslash; 957 if (pat->loc1 == pat->loc2) { 958 err("syntax error - missing character " 959 "after \\"); 960 } else { 961 toklit = r->cmap[*pat->loc1++]; 962 } 963 break; 964 case '^': case '$': toktype = Literal; toklit = NL; break; 965 default: toktype = Literal; toklit = r->cmap[toklit]; break; 966 } 967 } 968 969 static int 970 ccl(PATTERN *pat, uchar_t *tab) 971 { 972 int i; 973 int range = 0; 974 int lastc = -1; 975 int count = 0; 976 BOOL comp = NO; 977 978 (void) memset((char *)tab, 0, CCL_SIZ * sizeof (uchar_t)); 979 if (*pat->loc1 == '^') { 980 pat->loc1++; 981 comp = YES; 982 } 983 if (*pat->loc1 == ']') { 984 uchar_t c = pat->cmap[*pat->loc1]; 985 CCL_SET(tab, c); 986 lastc = *pat->loc1++; 987 } 988 /* scan for chars */ 989 for (; (pat->loc1 < pat->loc2) && (*pat->loc1 != ']'); pat->loc1++) { 990 if (*pat->loc1 == '-') { 991 if (lastc < 0) CCL_SET(tab, pat->cmap['-']); 992 else range = 1; 993 continue; 994 } 995 if (range) { 996 for (i = *pat->loc1; i >= lastc; i--) { 997 CCL_SET(tab, pat->cmap[i]); 998 } 999 } else { 1000 uchar_t c = pat->cmap[*pat->loc1]; 1001 CCL_SET(tab, c); 1002 } 1003 1004 range = 0; 1005 1006 lastc = *pat->loc1; 1007 } 1008 if (range) CCL_SET(tab, pat->cmap['-']); 1009 1010 if (pat->loc1 < pat->loc2) pat->loc1++; 1011 else err("syntax error - missing ]"); 1012 1013 if (comp) { 1014 CCL_SET(tab, pat->cmap[NL]); 1015 for (i = 0; i < CCL_SIZ; i++) tab[i] ^= 0xff; 1016 } 1017 for (i = 0; i < 256; i++) { 1018 if (pat->cmap[i] != i) CCL_CLR(tab, i); 1019 if (CCL_CHK(tab, i)) { 1020 lastc = i; 1021 count++; 1022 } 1023 } 1024 if (count == 1) 1025 *tab = (char)lastc; 1026 return (count); 1027 } 1028 1029 /* 1030 * egrep patterns: 1031 * 1032 * Alternation: d0: d1 { '|' d1 }* 1033 * Concatenation: d1: d2 { d2 }* 1034 * Repetition: d2: d3 { '*' | '?' | '+' } 1035 * Literal: d3: lit | '.' | '[]' | '(' d0 ')' 1036 */ 1037 1038 static Expr * 1039 d3(re_re *r, PATTERN *pat) 1040 { 1041 Expr *e; 1042 uchar_t *tab; 1043 int count; 1044 1045 switch (toktype) { 1046 case Backslash: 1047 case Literal: 1048 e = newexpr(Literal, toklit, (Expr *)NULL, (Expr *)NULL); 1049 lex(r, pat); 1050 break; 1051 case Dot: 1052 e = newexpr(Dot, '.', (Expr *)NULL, (Expr *)NULL); 1053 lex(r, pat); 1054 break; 1055 case Charclass: 1056 tab = egmalloc(CCL_SIZ * sizeof (uchar_t)); 1057 count = ccl(pat, tab); 1058 if (count == 1) { 1059 toklit = *tab; 1060 e = newexpr(Literal, toklit, (Expr *)NULL, 1061 (Expr *)NULL); 1062 } else { 1063 e = newexpr(Charclass, '[', (Expr *)NULL, (Expr *)NULL); 1064 e->l = (Expr *)count; /* number of chars */ 1065 e->r = (Expr *)tab; /* bitmap of chars */ 1066 } 1067 lex(r, pat); 1068 break; 1069 case Lpar: 1070 lex(r, pat); 1071 count = ++parno; 1072 e = d0(r, pat); 1073 if (toktype == Rpar) 1074 lex(r, pat); 1075 else 1076 err("syntax error - missing )"); 1077 return (e); 1078 default: 1079 err("syntax error"); 1080 e = NULL; 1081 } 1082 return (e); 1083 } 1084 1085 static Expr * 1086 d2(re_re *r, PATTERN *pat) 1087 { 1088 Expr *e; 1089 Exprtype t; 1090 1091 e = d3(r, pat); 1092 while ((toktype == Star) || (toktype == Plus) || (toktype == Quest)) { 1093 t = toktype; 1094 lex(r, pat); 1095 e = newexpr(t, 0, e, (Expr *)NULL); 1096 } 1097 return (e); 1098 } 1099 1100 static Expr * 1101 d1(re_re *r, PATTERN *pat) 1102 { 1103 Expr *e, *f; 1104 1105 e = d2(r, pat); 1106 while ((toktype == Literal) || (toktype == Dot) || (toktype == Lpar) || 1107 (toktype == Backslash) || (toktype == Charclass)) { 1108 f = d2(r, pat); 1109 e = newexpr(Cat, 0, e, f); 1110 } 1111 return (e); 1112 } 1113 1114 static Expr * 1115 d0(re_re *r, PATTERN *pat) 1116 { 1117 Expr *e, *f; 1118 1119 e = d1(r, pat); 1120 while (toktype == Alternate) { 1121 lex(r, pat); 1122 if (toktype == EOP) 1123 continue; 1124 f = d1(r, pat); 1125 e = newexpr(Alternate, 0, e, f); 1126 } 1127 return (e); 1128 } 1129 1130 static Expr * 1131 eall(re_re *r, PATTERN *pat) 1132 { 1133 Expr *e; 1134 1135 while (toktype == Alternate) /* bogus but user-friendly */ 1136 lex(r, pat); 1137 e = d0(r, pat); 1138 while (toktype == Alternate) /* bogus but user-friendly */ 1139 lex(r, pat); 1140 if (toktype != EOP) 1141 err("syntax error"); 1142 return (e); 1143 } 1144 1145 static void 1146 err(char *s) 1147 { 1148 message = s; 1149 longjmp(env, 1); 1150 } 1151 1152 static int 1153 re_lit(PATTERN *pat, uchar_t **b, uchar_t **e) 1154 { 1155 bestlen = 0; 1156 pat->fullmatch = YES; 1157 START 1158 traverse(pat, pat->re_ptr->root->l); 1159 FINISH 1160 if (bestlen < 2) 1161 return (0); 1162 *b = egmalloc(bestlen * sizeof (uchar_t)); 1163 (void) memmove(*b, best, bestlen); 1164 *e = *b + bestlen - 1; 1165 return (bestlen - 1); 1166 } 1167 1168 static void 1169 traverse(PATTERN *pat, Expr *e) 1170 { 1171 switch (e->type) { 1172 case Literal: 1173 ADDL(e->lit) 1174 break; 1175 case Cat: 1176 traverse(pat, e->l); 1177 traverse(pat, e->r); 1178 break; 1179 case Plus: 1180 traverse(pat, e->l); 1181 FINISH /* can't go on past a + */ 1182 pat->fullmatch = NO; 1183 START /* but we can start with one! */ 1184 traverse(pat, e->l); 1185 break; 1186 default: 1187 FINISH 1188 pat->fullmatch = NO; 1189 START 1190 break; 1191 } 1192 } 1193 1194 static re_bm * 1195 re_bmcomp(uchar_t *pb, uchar_t *pe, uchar_t *cmap) 1196 { 1197 int j; 1198 int delta[256]; 1199 re_bm *b; 1200 1201 b = (re_bm *)egmalloc(sizeof (*b)); 1202 1203 b->patlen = pe - pb; 1204 b->cmap = cmap; 1205 b->bmpat = pb; 1206 1207 delta_2(b); 1208 1209 for (j = 0; j < 256; j++) 1210 delta[j] = b->patlen; 1211 1212 for (pe--; pb < pe; pb++) 1213 delta[b->cmap[*pb]] = pe - pb; 1214 1215 delta[b->cmap[*pb]] = LARGE; 1216 1217 for (j = 0; j < 256; j++) 1218 b->delta0[j] = delta[b->cmap[j]]; 1219 return (b); 1220 } 1221 1222 static void 1223 delta_2(re_bm *b) 1224 { 1225 int m = b->patlen; 1226 int i, k, j; 1227 1228 b->delta2 = (int *)egmalloc(m * sizeof (int)); 1229 1230 for (j = 0; j < m; j++) { 1231 k = 1; 1232 again: 1233 while (k <= j && b->bmpat[j-k] == b->bmpat[j]) k++; 1234 1235 for (i = j + 1; i < m; i++) { 1236 if (k <= i && b->bmpat[i-k] != b->bmpat[i]) { 1237 k++; 1238 goto again; 1239 } 1240 } 1241 b->delta2[j] = k + m - j - 1; 1242 } 1243 } 1244 1245 static BOOL 1246 re_bmexec(PATTERN *pat, uchar_t *s, uchar_t *e, uchar_t **mb, uchar_t **me) 1247 { 1248 re_bm *b = pat->bm_ptr; 1249 uchar_t *sp; 1250 int k; 1251 1252 s += b->patlen - 1; 1253 while ((unsigned long)s < (unsigned long)e) { 1254 while ((unsigned long)(s += b->delta0[*s]) < (unsigned long)e) 1255 ; 1256 if ((unsigned long)s < (unsigned long)(e + b->patlen)) 1257 return (NO); /* no match */ 1258 s -= LARGE; 1259 for (k = b->patlen-2, sp = s-1; k >= 0; k--) { 1260 if (b->cmap[*sp--] != b->bmpat[k]) break; 1261 } 1262 if (k < 0) { 1263 *mb = ++sp; 1264 *me = s+1; 1265 if (grepmatch(pat, mb, me)) 1266 return (YES); 1267 s = *mb; 1268 } else if (k < 0) { 1269 s++; 1270 } else { 1271 int j; 1272 j = b->delta2[k]; 1273 k = b->delta0[*++sp]; 1274 if ((j > k) || (k == (int)LARGE)) 1275 k = j; 1276 s = sp + k; 1277 } 1278 } 1279 return (NO); 1280 } 1281 1282 static re_cw * 1283 re_recw(re_re *r, uchar_t *map) 1284 { 1285 Expr *e, *root = r->root; 1286 re_cw *pat; 1287 uchar_t *buf; 1288 1289 if (root->type != EOP) 1290 return (NULL); 1291 e = root->l; 1292 pat = re_cwinit(map); 1293 buf = (uchar_t *)egmalloc(20000 * sizeof (uchar_t)); 1294 if (!altlist(e, buf, pat)) { 1295 return (NULL); 1296 } 1297 re_cwcomp(pat); 1298 return (pat); 1299 } 1300 1301 static BOOL 1302 altlist(Expr *e, uchar_t *buf, re_cw *pat) 1303 { 1304 if (e->type == Alternate) 1305 return ((BOOL)(altlist(e->l, buf, pat) && 1306 altlist(e->r, buf, pat))); 1307 return (word(e, buf, pat)); 1308 } 1309 1310 static BOOL 1311 word(Expr *e, uchar_t *buf, re_cw *pat) 1312 { 1313 static uchar_t *p; 1314 1315 if (buf) p = buf; 1316 1317 if (e->type == Cat) { 1318 if (!word(e->l, (uchar_t *)NULL, pat)) 1319 return (NO); 1320 if (!word(e->r, (uchar_t *)NULL, pat)) 1321 return (NO); 1322 } else if (e->type == Literal) 1323 *p++ = e->lit; 1324 else 1325 return (NO); 1326 1327 if (buf) 1328 re_cwadd(pat, buf, p); 1329 return (YES); 1330 } 1331 1332 static re_cw * 1333 re_cwinit(uchar_t *cmap) 1334 { 1335 re_cw *c; 1336 1337 froot = NULL; 1338 next_node = NULL; 1339 next_link = NULL; 1340 c = (re_cw *)egmalloc(sizeof (*c)); 1341 c->nodeid = 0; 1342 c->maxdepth = 0; 1343 c->mindepth = 10000; 1344 c->root = newnode(c, 0); 1345 c->cmap = cmap; 1346 return (c); 1347 } 1348 1349 static void 1350 re_cwadd(re_cw *c, uchar_t *s, uchar_t *e) 1351 { 1352 Node *p, *state; 1353 Link *l; 1354 int depth; 1355 1356 state = c->root; 1357 while (s <= --e) { 1358 for (l = state->alts; l; l = l->next) 1359 if (l->lit == *e) 1360 break; 1361 if (l == NULL) 1362 break; 1363 else 1364 state = l->node; 1365 } 1366 if (s <= e) { 1367 depth = state->d+1; 1368 l = newlink(*e--, p = newnode(c, depth++)); 1369 l->next = state->alts; 1370 state->alts = l; 1371 state = p; 1372 while (s <= e) { 1373 state->alts = newlink(*e--, p = newnode(c, depth++)); 1374 state = p; 1375 } 1376 if (c->mindepth >= depth) c->mindepth = depth-1; 1377 } 1378 state->out = 1; 1379 } 1380 1381 #ifdef DEBUG 1382 static 1383 pr(Node *n) 1384 { 1385 Link *l; 1386 1387 printf("%d[%d,%d]: ", n->id, n->shift1, n->shift2); 1388 for (l = n->alts; l; l = l->next) { 1389 printf("edge from \"%d\" to \"%d\" label {\"%c\"};", 1390 n->id, l->node->id, l->lit); 1391 if (l->node->out) { 1392 printf(" draw \"%d\" as Doublecircle;", l->node->id); 1393 } 1394 if (l->node->fail) { 1395 printf(" edge from \"%d\" to \"%d\" dashed;", 1396 l->node->id, l->node->fail->id); 1397 } 1398 printf("\n"); 1399 pr(l->node); 1400 } 1401 } 1402 #endif 1403 1404 static void 1405 fail(Node *root) 1406 { 1407 Link *qhead = NULL, *qtail = NULL; 1408 Link *l, *ll; 1409 Link *t; 1410 Node *state, *r, *s; 1411 int a; 1412 1413 for (l = root->alts; l; l = l->next) { 1414 ADD(l->node); 1415 l->node->fail = root; 1416 } 1417 while (qhead) { 1418 r = qhead->node; 1419 DEL(); 1420 for (l = r->alts; l; l = l->next) { 1421 s = l->node; 1422 a = l->lit; 1423 ADD(s); 1424 state = r->fail; 1425 while (state) { 1426 for (ll = state->alts; ll; ll = ll->next) 1427 if (ll->lit == a) 1428 break; 1429 if (ll || (state == root)) { 1430 if (ll) 1431 state = ll->node; 1432 /* 1433 * do it here as only other exit is 1434 * state 0 1435 */ 1436 if (state->out) { 1437 s->out = 1; 1438 } 1439 break; 1440 } else 1441 state = state->fail; 1442 } 1443 s->fail = state; 1444 } 1445 } 1446 zeroroot(root, root); 1447 } 1448 1449 static void 1450 zeroroot(Node *root, Node *n) 1451 { 1452 Link *l; 1453 1454 if (n->fail == root) 1455 n->fail = NULL; 1456 for (l = n->alts; l; l = l->next) 1457 zeroroot(root, l->node); 1458 } 1459 1460 static void 1461 shift(re_cw *c) 1462 { 1463 Link *qhead = NULL, *qtail = NULL; 1464 Link *l; 1465 Link *t; 1466 Node *state, *r, *s; 1467 int k; 1468 1469 for (k = 0; k < 256; k++) 1470 c->step[k] = c->mindepth+1; 1471 c->root->shift1 = 1; 1472 c->root->shift2 = c->mindepth; 1473 for (l = c->root->alts; l; l = l->next) { 1474 l->node->shift2 = c->root->shift2; 1475 ADD(l->node); 1476 l->node->fail = NULL; 1477 } 1478 while (qhead) { 1479 r = qhead->node; 1480 DEL(); 1481 r->shift1 = c->mindepth; 1482 r->shift2 = c->mindepth; 1483 if ((state = r->fail) != NULL) { 1484 do { 1485 k = r->d - state->d; 1486 if (k < state->shift1) { 1487 state->shift1 = k; 1488 } 1489 if (r->out && (k < state->shift2)) { 1490 state->shift2 = k; 1491 } 1492 } while ((state = state->fail) != NULL); 1493 } 1494 for (l = r->alts; l; l = l->next) { 1495 s = l->node; 1496 ADD(s); 1497 } 1498 } 1499 shiftprop(c, c->root); 1500 shifttab(c->root); 1501 c->step[0] = 1; 1502 } 1503 1504 static void 1505 shifttab(Node *n) 1506 { 1507 Link *l; 1508 Node **nn; 1509 1510 n->tab = nn = (Node **)egmalloc(256 * sizeof (Node *)); 1511 (void) memset((char *)n->tab, 0, 256 * sizeof (Node *)); 1512 for (l = n->alts; l; l = l->next) 1513 nn[l->lit] = l->node; 1514 } 1515 1516 static void 1517 shiftprop(re_cw *c, Node *n) 1518 { 1519 Link *l; 1520 Node *nn; 1521 1522 for (l = n->alts; l; l = l->next) { 1523 if (c->step[l->lit] > l->node->d) 1524 c->step[l->lit] = l->node->d; 1525 nn = l->node; 1526 if (n->shift2 < nn->shift2) 1527 nn->shift2 = n->shift2; 1528 shiftprop(c, nn); 1529 } 1530 } 1531 1532 static void 1533 re_cwcomp(re_cw *c) 1534 { 1535 fail(c->root); 1536 shift(c); 1537 } 1538 1539 static BOOL 1540 re_cwexec(PATTERN *pat, uchar_t *rs, uchar_t *re, uchar_t **mb, uchar_t **me) 1541 { 1542 Node *state; 1543 Link *l; 1544 uchar_t *sp; 1545 uchar_t *s; 1546 uchar_t *e; 1547 Node *ostate; 1548 int k; 1549 re_cw *c = pat->cw_ptr; 1550 uchar_t fake[1]; 1551 uchar_t mappedsp; 1552 1553 fake[0] = 0; 1554 state = c->root; 1555 1556 s = rs+c->mindepth-1; 1557 e = re; 1558 while (s < e) { 1559 /* scan */ 1560 for (sp = s; (ostate = state) != NULL; ) { 1561 mappedsp = c->cmap[*sp]; 1562 if (state->tab) { 1563 if ((state = state->tab[mappedsp]) == NULL) 1564 goto nomatch; 1565 } else { 1566 for (l = state->alts; ; l = l->next) { 1567 if (l == NULL) 1568 goto nomatch; 1569 if (l->lit == mappedsp) { 1570 state = l->node; 1571 break; 1572 } 1573 } 1574 } 1575 if (state->out) { 1576 *mb = sp; 1577 *me = s+1; 1578 if (fixloc(mb, me)) 1579 return (YES); 1580 } 1581 if (--sp < rs) { 1582 sp = fake; 1583 break; 1584 } 1585 } 1586 nomatch: 1587 k = c->step[c->cmap[*sp]] - ostate->d - 1; 1588 if (k < ostate->shift1) 1589 k = ostate->shift1; 1590 if (k > ostate->shift2) 1591 k = ostate->shift2; 1592 s += k; 1593 state = c->root; 1594 } 1595 return (NO); 1596 } 1597 1598 static Node * 1599 newnode(re_cw *c, int d) 1600 { 1601 static Node *lim = NULL; 1602 static int incr = 256; 1603 1604 if (!next_node) lim = NULL; 1605 if (next_node == lim) { 1606 next_node = (Node *)egmalloc(incr * sizeof (Node)); 1607 lim = next_node + incr; 1608 } 1609 next_node->d = d; 1610 if (d > c->maxdepth) c->maxdepth = d; 1611 next_node->id = c->nodeid++; 1612 next_node->alts = NULL; 1613 next_node->tab = NULL; 1614 next_node->out = 0; 1615 return (next_node++); 1616 } 1617 1618 static Link * 1619 newlink(uchar_t lit, Node *n) 1620 { 1621 static Link *lim = NULL; 1622 static int incr = 256; 1623 1624 if (!next_link) lim = NULL; 1625 if (next_link == lim) { 1626 next_link = (Link *)egmalloc(incr * sizeof (Link)); 1627 lim = next_link + incr; 1628 } 1629 next_link->lit = lit; 1630 next_link->node = n; 1631 next_link->next = NULL; 1632 return (next_link++); 1633 } 1634 1635 int 1636 egrep(char *f, FILE *o, char *fo) 1637 { 1638 uchar_t buff[MAXBUFSIZE + ESIZE]; 1639 1640 buffer = buff; 1641 *buffer++ = NL; /* New line precedes buffer to prevent runover */ 1642 file = f; 1643 output = o; 1644 format = fo; 1645 return (execute()); 1646 } 1647 1648 static int 1649 execute(void) 1650 { 1651 LINE current; 1652 BOOL matched; 1653 BOOL all_searched; /* file being matched until end */ 1654 1655 if ((file_desc = open(file, O_RDONLY)) < 0) { 1656 return (-1); 1657 } 1658 init_file(¤t); 1659 /* while there is more get more text from the data stream */ 1660 for (;;) { 1661 all_searched = NO; 1662 1663 /* 1664 * Find next new-line in buffer. 1665 * Begin after previous new line character 1666 */ 1667 1668 current.prntbuf = current.newline + 1; 1669 current.ln++; /* increment line number */ 1670 1671 if (current.prntbuf < bufend) { 1672 /* There is more text in buffer */ 1673 1674 /* 1675 * Take our next 1676 * "line" as the entire remaining buffer. 1677 * However, if there is more of the file yet 1678 * to be read in, exclude any incomplete 1679 * line at end. 1680 */ 1681 if (file_stat == NO_MORE) { 1682 all_searched = YES; 1683 current.newline = bufend - 1; 1684 if (*current.newline != NL) { 1685 current.newline = bufend; 1686 } 1687 } else { 1688 /* 1689 * Find end of the last 1690 * complete line in the buffer. 1691 */ 1692 current.newline = bufend; 1693 while (*--current.newline != NL) { 1694 } 1695 if (current.newline < current.prntbuf) { 1696 /* end not found */ 1697 current.newline = bufend; 1698 } 1699 } 1700 } else { 1701 /* There is no more text in the buffer. */ 1702 current.newline = bufend; 1703 } 1704 if (current.newline >= bufend) { 1705 /* 1706 * There is no more text in the buffer, 1707 * or no new line was found. 1708 */ 1709 switch (file_stat) { 1710 case MORE: /* file partly unread */ 1711 case BEGIN: 1712 fgetfile(¤t); 1713 1714 current.ln--; 1715 continue; /* with while loop */ 1716 case NO_MORE: 1717 break; 1718 } 1719 /* Nothing more to read in for this file. */ 1720 if (current.newline <= current.prntbuf) { 1721 /* Nothing in the buffer, either */ 1722 /* We are done with the file. */ 1723 current.ln--; 1724 break; /* out of while loop */ 1725 } 1726 /* There is no NL at the end of the file */ 1727 } 1728 1729 matched = pattern_match(&match_pattern, ¤t); 1730 if (matched) { 1731 int nc; 1732 1733 get_ncount(¤t, match_pattern.loc1); 1734 get_line(¤t, match_pattern.loc2); 1735 1736 nc = current.newline + 1 - current.prntbuf; 1737 (void) fprintf(output, format, file, current.ln); 1738 (void) fwrite((char *)current.prntbuf, 1, nc, output); 1739 } else { 1740 if (all_searched) 1741 break; /* out of while loop */ 1742 1743 get_ncount(¤t, current.newline + 1); 1744 } 1745 } 1746 1747 (void) close(file_desc); 1748 return (0); 1749 } 1750 1751 static void 1752 init_file(LINE *cur_ptr) 1753 { 1754 file_stat = BEGIN; 1755 cur_ptr->ln = 0; 1756 bufend = buffer; 1757 cur_ptr->newline = buffer - 1; 1758 } 1759 1760 static BOOL 1761 pattern_match(PATTERN *pat, LINE *lptr) 1762 { 1763 if ((*pat->procfn)(pat, lptr->prntbuf - 1, lptr->newline + 1, 1764 &pat->loc1, &pat->loc2)) { 1765 return (YES); 1766 } else { 1767 pat->loc1 = lptr->prntbuf; 1768 pat->loc2 = lptr->newline - 1; 1769 return (NO); 1770 } 1771 } /* end of function pattern_match() */ 1772 1773 static void 1774 fgetfile(LINE *cur_ptr) 1775 { 1776 /* 1777 * This function reads as much of the current file into the buffer 1778 * as will fit. 1779 */ 1780 int bytes; /* bytes read in current buffer */ 1781 int bufsize = MAXBUFSIZE; /* free space in data buffer */ 1782 int save_current; 1783 uchar_t *begin = cur_ptr->prntbuf; 1784 1785 /* 1786 * Bytes of current incomplete line, if any, save_current in buffer. 1787 * These must be saved. 1788 */ 1789 save_current = (int)(bufend - begin); 1790 1791 if (file_stat == MORE) { 1792 /* 1793 * A portion of the file fills the buffer. We must clear 1794 * out the dead wood to make room for more of the file. 1795 */ 1796 1797 int k = 0; 1798 1799 k = begin - buffer; 1800 if (!k) { 1801 /* 1802 * We have one humungous current line, 1803 * which fills the whole buffer. 1804 * Toss it. 1805 */ 1806 begin = bufend; 1807 k = begin - buffer; 1808 save_current = 0; 1809 } 1810 1811 begin = buffer; 1812 bufend = begin + save_current; 1813 1814 bufsize = MAXBUFSIZE - save_current; 1815 1816 if (save_current > 0) { 1817 /* 1818 * Must save portion of current line. 1819 * Copy to beginning of buffer. 1820 */ 1821 (void) memmove(buffer, buffer + k, save_current); 1822 } 1823 } 1824 1825 /* Now read in the file. */ 1826 1827 do { 1828 bytes = read(file_desc, (char *)bufend, (unsigned int)bufsize); 1829 if (bytes < 0) { 1830 /* can't read any more of file */ 1831 bytes = 0; 1832 } 1833 bufend += bytes; 1834 bufsize -= bytes; 1835 } while (bytes > 0 && bufsize > 0); 1836 1837 1838 if (begin >= bufend) { 1839 /* No new lines or incomplete line in buffer */ 1840 file_stat = NO_MORE; 1841 } else if (bufsize) { 1842 /* Still space in the buffer, so we have read entire file */ 1843 file_stat = NO_MORE; 1844 } else { 1845 /* We filled entire buffer, so there may be more yet to read */ 1846 file_stat = MORE; 1847 } 1848 /* Note: bufend is 1 past last good char */ 1849 *bufend = NL; /* Sentinel new-line character */ 1850 /* Set newline to character preceding the current line */ 1851 cur_ptr->newline = begin - 1; 1852 } 1853 1854 static void 1855 get_line(LINE *cur_ptr, uchar_t *s) 1856 { 1857 while (*s++ != NL) { 1858 } 1859 cur_ptr->newline = --s; 1860 cur_ptr->ln++; 1861 } 1862 1863 static void 1864 get_ncount(LINE *cur_ptr, uchar_t *s) 1865 { 1866 uchar_t *p = cur_ptr->prntbuf; 1867 1868 while (*--s != NL) { 1869 } 1870 cur_ptr->newline = s++; 1871 while ((s > p) && 1872 (p = (uchar_t *)memchr((char *)p, NL, s - p)) != NULL) { 1873 cur_ptr->ln++; 1874 ++p; 1875 } 1876 cur_ptr->ln--; 1877 cur_ptr->prntbuf = s; 1878 } 1879