1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* Copyright (c) 1990 AT&T */ 23 /* All Rights Reserved */ 24 25 26 /* 27 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 28 * Use is subject to license terms. 29 */ 30 31 #pragma ident "%Z%%M% %I% %E% SMI" 32 33 /* 34 * cscope - interactive C symbol or text cross-reference 35 * 36 * text searching functions 37 */ 38 39 #include <fcntl.h> 40 #include <setjmp.h> 41 #include <stdio.h> 42 #include <string.h> 43 #include <memory.h> 44 #include <ctype.h> 45 #include <unistd.h> 46 #include "library.h" 47 48 typedef enum { 49 NO = 0, 50 YES = 1 51 } BOOL; 52 53 typedef struct re_bm { 54 int delta0[256]; 55 int *delta2; 56 uchar_t *cmap; 57 uchar_t *bmpat; 58 int patlen; 59 } re_bm; 60 61 typedef struct Link { 62 uchar_t lit; 63 struct Node *node; 64 struct Link *next; 65 } Link; 66 67 typedef struct Node { 68 short out; 69 short d; 70 short shift1; 71 short shift2; 72 long id; 73 struct Node **tab; 74 Link *alts; 75 struct Node *fail; 76 } Node; 77 78 typedef struct re_cw { 79 int maxdepth, mindepth; 80 long nodeid; 81 int step[256]; 82 uchar_t *cmap; 83 struct Node *root; 84 } re_cw; 85 86 typedef enum { 87 /* lit expression types */ 88 Literal, Dot, Charclass, EOP, 89 90 /* non-lit expression types */ 91 Cat, Alternate, Star, Plus, Quest, 92 93 /* not really expression types, just helping */ 94 Lpar, Rpar, Backslash 95 96 } Exprtype; 97 98 typedef int ID; 99 100 typedef struct Expr { 101 Exprtype type; /* Type of expression (in grammar) */ 102 ID id; /* unique ID of lit expression */ 103 int lit; /* Literal character or tag */ 104 int flen; /* Number of following lit expressions */ 105 ID *follow; /* Array of IDs of following lit expressions */ 106 struct Expr *l; /* pointer to Left child (or ccl count) */ 107 struct Expr *r; /* pointer to Right child (or ccl mask) */ 108 struct Expr *parent; /* pointer to Parent */ 109 } Expr; 110 111 typedef struct State { 112 struct State *tab[256]; 113 int cnt; /* number of matched chars on full match, -1 otherwise */ 114 int npos; /* number of IDs in position set for this state. */ 115 int pos; /* index into posbase for beginning of IDs */ 116 } State; 117 118 typedef struct FID { 119 ID id; /* Lit Expression id */ 120 int fcount; /* Number of Lit exp matches before this one. */ 121 } FID; 122 123 typedef struct Positionset { 124 int count; /* Number of lit exps in position set */ 125 ID last; /* ID of last lit exp in position set */ 126 FID *base; /* array of MAXID FIDS */ 127 /* 0 means not in position set */ 128 /* -1 means first in position set */ 129 /* n (>0) is ID of prev member of position set. */ 130 } Positionset; 131 132 typedef struct re_re { 133 FID *posbase; /* Array of IDs from all states */ 134 int nposalloc; /* Allocated size of posbase */ 135 int posnext; /* Index into free space in posbase */ 136 int posreset; /* Index into end of IDs for initial state in posbase */ 137 int maxid; /* Number of (also maximum ID of) lit expressions */ 138 Expr *root; /* Pointer to root (EOP) expression */ 139 Expr **ptr; /* Pointer to array of ptrs to lit expressions. */ 140 uchar_t *cmap; /* Character mapping array */ 141 Positionset firstpos; 142 Positionset tmp; 143 int nstates; /* Number of current states defined */ 144 int statelim; /* Limit on number of states before flushing */ 145 State *states; /* Array of states */ 146 State istate; /* Initial state */ 147 } re_re; 148 149 typedef struct { 150 uchar_t *cmap; 151 re_re *re_ptr; 152 re_bm *bm_ptr; 153 re_cw *cw_ptr; 154 BOOL fullmatch; 155 BOOL (*procfn)(); 156 BOOL (*succfn)(); 157 uchar_t *loc1; 158 uchar_t *loc2; 159 uchar_t *expression; 160 } PATTERN; 161 162 typedef enum { 163 BEGIN, /* File is not yet in buffer at all */ 164 MORE, /* File is partly in buffer */ 165 NO_MORE /* File has been completely read into buffer */ 166 } FILE_STAT; 167 168 typedef struct { 169 uchar_t *prntbuf; /* current line of input from data file */ 170 uchar_t *newline; /* end of line (real or sentinel \n) */ 171 long ln; /* line number */ 172 } LINE; 173 174 #define NL '\n' 175 176 #define CCL_SIZ 32 177 #define CCL_SET(a, c) ((a)[(c) >> 3] |= bittab[(c) & 07]) 178 #define CCL_CLR(a, c) ((a)[(c) >> 3] &= ~bittab[(c) & 07]) 179 #define CCL_CHK(a, c) ((a)[(c) >> 3] & bittab[(c) & 07]) 180 181 #define ESIZE (BUFSIZ) 182 #define MAXBUFSIZE (64*BUFSIZ) 183 184 #define MAXMALLOCS 1024 185 #define MAXLIT 256 /* is plenty big enough */ 186 #define LARGE MAXBUFSIZE+ESIZE+2 187 188 #define CLEAR(r, rps) (void) memset((char *)(rps)->base, 0, \ 189 (int)((r)->maxid * sizeof (FID))), \ 190 (rps)->count = 0, (rps)->last = -1 191 #define SET(rps, n, cnt) { \ 192 if ((rps)->base[n].id == 0) {\ 193 (rps)->count++;\ 194 (rps)->base[n].fcount = (cnt);\ 195 (rps)->base[n].id = (rps)->last;\ 196 (rps)->last = (n);\ 197 } else if ((cnt) > (rps)->base[n].fcount) {\ 198 (rps)->base[n].fcount = (cnt);\ 199 }} 200 201 #define START { _p = tmp; } 202 #define ADDL(c) { if (_p >= &tmp[MAXLIT]) _p--; *_p++ = c; } 203 #define FINISH { ADDL(0) if ((_p-tmp) > bestlen) \ 204 (void) memmove(best, tmp, bestlen = _p-tmp); } 205 206 207 #define NEW(N) (froot?(t = froot, froot = froot->next, t->next = NULL, \ 208 t->node = N, t): newlink(0, N)) 209 #define ADD(N) if (qtail) qtail = qtail->next = NEW(N); \ 210 else qtail = qhead = NEW(N) 211 #define DEL() { Link *_l = qhead; if ((qhead = qhead->next) == NULL) \ 212 qtail = NULL; _l->next = froot; froot = _l; } 213 214 static uchar_t *buffer; 215 static uchar_t *bufend; 216 static FILE *output; 217 static char *format; 218 static char *file; 219 static int file_desc; 220 static FILE_STAT file_stat; 221 static PATTERN match_pattern; 222 static uchar_t char_map[2][256]; 223 static int iflag; 224 static Exprtype toktype; 225 static int toklit, parno, maxid; 226 static uchar_t tmp[MAXLIT], best[MAXLIT]; 227 static uchar_t *_p; 228 static int bestlen; 229 static Node *next_node; 230 static Link *froot, *next_link; 231 static jmp_buf env; 232 233 static int nmalloc; 234 static uchar_t *mallocs[MAXMALLOCS]; 235 236 static uchar_t bittab[] = { 1, 2, 4, 8, 16, 32, 64, 128 }; 237 238 #ifdef DEBUG 239 #define TRACE(n) (n < debug) 240 #define EPRINTSIZE 50000 241 static void spr(int c, int *p, uchar_t *buf); 242 void epr(Expr *e, uchar_t *res); 243 static int debug = 12; 244 #endif 245 246 static void init_file(LINE *cur_ptr); 247 static void get_line(LINE *cur_ptr, uchar_t *s); 248 static void get_ncount(LINE *cur_ptr, uchar_t *s); 249 static int execute(void); 250 static State *startstate(re_re *r); 251 static State *stateof(re_re *r, Positionset *ps); 252 static State *nextstate(re_re *r, State *s, int a); 253 static State *addstate(re_re *r, Positionset *ps, int cnt); 254 static BOOL match(Expr *e, int a); 255 static BOOL first_lit(Positionset *fpos, Expr *e); 256 static void eptr(re_re *r, Expr *e); 257 static void efollow(re_re *r, Positionset *fpos, Expr *e); 258 static void follow(Positionset *fpos, Expr *e); 259 static void followstate(re_re *r, State *s, int a, Positionset *fpos); 260 static Expr *eall(re_re *r, PATTERN *pat); 261 static Expr *d0(re_re *r, PATTERN *pat); 262 static Expr *d1(re_re *r, PATTERN *pat); 263 static Expr *d2(re_re *r, PATTERN *pat); 264 static Expr *d3(re_re *r, PATTERN *pat); 265 static Expr *newexpr(Exprtype t, int lit, Expr *left, Expr *right); 266 static void lex(re_re *r, PATTERN *pat); 267 static int re_lit(PATTERN *pat, uchar_t **b, uchar_t **e); 268 static void traverse(PATTERN *pat, Expr *e); 269 static int ccl(PATTERN *pat, uchar_t *tab); 270 static BOOL altlist(), word(); 271 static BOOL altlist(Expr *e, uchar_t *buf, re_cw *pat); 272 static Node *newnode(re_cw *c, int d); 273 static Link *newlink(uchar_t lit, Node *n); 274 static void fail(Node *root); 275 static void zeroroot(Node *root, Node *n); 276 static void shift(re_cw *c); 277 static void shifttab(Node *n); 278 static void shiftprop(re_cw *c, Node *n); 279 static void delta_2(re_bm *b); 280 static int getstate(re_re *r, Positionset *ps); 281 static void savestate(re_re *r); 282 static void stateinit(re_re *r); 283 static re_bm *re_bmcomp(uchar_t *pb, uchar_t *pe, uchar_t *cmap); 284 static re_cw *re_cwinit(uchar_t *cmap); 285 static re_cw *re_recw(re_re *r, uchar_t *map); 286 static re_re *egprep(PATTERN *pat); 287 static void re_cwadd(re_cw *c, uchar_t *s, uchar_t *e); 288 static void re_cwcomp(re_cw *c); 289 static void eginit(re_re *r); 290 static BOOL re_bmexec(PATTERN *pat, uchar_t *s, uchar_t *e, uchar_t **mb, 291 uchar_t **me); 292 static BOOL re_cwexec(PATTERN *pat, uchar_t *rs, uchar_t *re, uchar_t **mb, 293 uchar_t **me); 294 static BOOL re_reexec(PATTERN *pat, uchar_t *b, uchar_t *e, uchar_t **mb, 295 uchar_t **me); 296 static uchar_t *egmalloc(size_t n); 297 static void fgetfile(LINE *cur_ptr); 298 static void dogre(PATTERN *pat); 299 static BOOL pattern_match(PATTERN *pat, LINE *lptr); 300 static BOOL fixloc(uchar_t **mb, uchar_t **me); 301 static BOOL grepmatch(PATTERN *pat, uchar_t **mb, uchar_t **me); 302 static void err(char *s); 303 304 static char *message; 305 306 void 307 egrepcaseless(int i) 308 { 309 iflag = i; /* simulate "egrep -i" */ 310 } 311 312 char * 313 egrepinit(char *expression) 314 { 315 static int firsttime = 1; 316 int i; 317 318 if (firsttime) { 319 firsttime = 0; 320 for (i = 0; i < 256; i++) { 321 char_map[0][i] = (uchar_t)i; 322 char_map[1][i] = tolower(i); 323 } 324 } 325 for (i = 0; i < nmalloc; i ++) 326 free(mallocs[i]); 327 nmalloc = 0; 328 message = NULL; 329 330 match_pattern.expression = (uchar_t *)expression; 331 match_pattern.cmap = char_map[iflag]; 332 if (setjmp(env) == 0) { 333 dogre(&match_pattern); 334 #ifdef DEBUG 335 { 336 PATTERN *p = match_pattern; 337 if (p->procfn == re_bmexec) 338 if (!p->fullmatch) 339 if (p->succfn == re_reexec) 340 printf("PARTIAL BOYER_MOORE\n"); 341 else 342 printf("PARTIAL B_M with GREP\n"); 343 else 344 printf("FULL BOYER_MOORE\n"); 345 else if (p->procfn == re_cwexec) 346 printf("C_W\n"); 347 else 348 printf("GENERAL\n"); 349 } 350 #endif 351 } 352 return (message); 353 } 354 355 static void 356 dogre(PATTERN *pat) 357 { 358 uchar_t *lb, *le; 359 360 #ifdef DEBUG 361 printf("PATTERN %s\n", pat->expression); 362 #endif 363 pat->re_ptr = egprep(pat); 364 bestlen = re_lit(pat, &lb, &le); 365 366 if (bestlen && pat->fullmatch) { /* Full Boyer Moore */ 367 #ifdef DEBUG 368 printf("BESTLEN %d\n", bestlen); 369 { 370 uchar_t *p; 371 for (p = lb; p < le; p++) printf("%c", *p); 372 printf("\n"); 373 } 374 #endif 375 pat->bm_ptr = re_bmcomp(lb, le, pat->cmap); 376 pat->procfn = re_bmexec; 377 return; 378 } 379 if (bestlen > 1) { 380 /* Partial Boyer Moore */ 381 pat->bm_ptr = re_bmcomp(lb, le, pat->cmap); 382 pat->procfn = re_bmexec; 383 pat->fullmatch = NO; 384 } else { 385 pat->fullmatch = YES; 386 if ((pat->cw_ptr = re_recw(pat->re_ptr, pat->cmap)) != NULL) { 387 pat->procfn = re_cwexec; /* CW */ 388 return; 389 } 390 } 391 /* general egrep regular expression */ 392 pat->succfn = re_reexec; 393 394 if (pat->fullmatch) { 395 pat->procfn = pat->succfn; 396 pat->succfn = NULL; 397 } 398 } 399 400 static BOOL 401 fixloc(uchar_t **mb, uchar_t **me) 402 { 403 /* Handle match to null string */ 404 405 while (*me <= *mb) 406 (*me)++; 407 408 if (*(*me - 1) != NL) 409 while (**me != NL) 410 (*me)++; 411 412 /* Handle match to new-line only */ 413 414 if (*mb == *me - 1 && **mb == NL) { 415 (*me)++; 416 } 417 418 /* Handle match including beginning or ending new-line */ 419 420 if (**mb == NL) 421 (*mb)++; 422 if (*(*me - 1) == NL) 423 (*me)--; 424 return (YES); 425 } 426 427 static BOOL 428 grepmatch(PATTERN *pat, uchar_t **mb, uchar_t **me) 429 { 430 uchar_t *s, *f; 431 432 if (pat->fullmatch) 433 return (fixloc(mb, me)); 434 435 for (f = *me - 1; *f != NL; f++) { 436 } 437 f++; 438 for (s = *mb; *s != NL; s--) { 439 } 440 441 if ((*pat->succfn)(pat, s, f, mb, me)) { 442 return (YES); 443 } else { 444 *mb = f; 445 return (NO); 446 } 447 } 448 449 static void 450 eginit(re_re *r) 451 { 452 unsigned int n; 453 454 r->ptr = (Expr **)egmalloc(r->maxid * sizeof (Expr *)); 455 eptr(r, r->root); 456 n = r->maxid * sizeof (FID); 457 r->firstpos.base = (FID *)egmalloc(n); 458 r->tmp.base = (FID *)egmalloc(n); 459 CLEAR(r, &r->firstpos); 460 if (!first_lit(&r->firstpos, r->root->l)) { 461 /* 462 * This expression matches the null string!!!! 463 * Add EOP to beginning position set. 464 */ 465 SET(&r->firstpos, r->root->id, 0) 466 /* (void) printf("first of root->l == 0, b=%s\n", b); */ 467 } 468 stateinit(r); 469 (void) addstate(r, &r->firstpos, 0); 470 savestate(r); 471 } 472 473 static void 474 eptr(re_re *r, Expr *e) 475 { 476 if ((e->id < 0) || (e->id >= r->maxid)) { 477 err("internal error"); 478 } 479 r->ptr[e->id] = e; 480 if (e->type != Charclass) { 481 if (e->l) eptr(r, e->l); 482 if (e->r) eptr(r, e->r); 483 } 484 } 485 486 static BOOL 487 re_reexec(PATTERN *pat, uchar_t *b, uchar_t *e, uchar_t **mb, uchar_t **me) 488 { 489 re_re *r = pat->re_ptr; 490 State *s, *t; 491 492 #ifdef DEBUG 493 if (TRACE(10)) { 494 uchar_t buf[EPRINTSIZE]; 495 epr(r->root, buf); 496 (void) printf("expr='%s'\n", buf); 497 } 498 #endif 499 s = startstate(r); 500 501 for (;;) { 502 uchar_t c; 503 504 if (s->cnt >= 0) { 505 #ifdef DEBUG 506 if (TRACE(6)) 507 (void) printf("match at input '%s'\n", b); 508 #endif 509 *mb = b - s->cnt; 510 *me = b; 511 if (fixloc(mb, me)) 512 return (YES); 513 } 514 515 if (b >= e) break; 516 c = pat->cmap[*b]; 517 #ifdef DEBUG 518 if (TRACE(4)) 519 (void) printf("state %d: char '%c'\n", s-r->states, *b); 520 #endif 521 if ((t = s->tab[c]) != NULL) s = t; 522 else s = nextstate(r, s, (int)c); 523 b++; 524 } 525 #ifdef DEBUG 526 if (TRACE(3)) { 527 uchar_t buf[EPRINTSIZE]; 528 529 epr(r->root, buf); 530 (void) printf("pat = %s\n", buf); 531 } 532 #endif 533 return (NO); 534 } 535 536 static BOOL 537 match(Expr *e, int a) 538 { 539 switch (e->type) { 540 case Dot: return ((BOOL)(a != NL)); 541 case Literal: return ((BOOL)(a == e->lit)); 542 case Charclass: return ((BOOL)(CCL_CHK((uchar_t *)e->r, a))); 543 default: return (NO); 544 } 545 } 546 547 /* 548 * generates the followset for a node in fpos 549 */ 550 static void 551 follow(Positionset *fpos, Expr *e) 552 { 553 Expr *p; 554 555 if (e->type == EOP) 556 return; 557 else 558 p = e->parent; 559 switch (p->type) { 560 case EOP: 561 SET(fpos, p->id, 0) 562 break; 563 case Plus: 564 case Star: 565 (void) first_lit(fpos, e); 566 follow(fpos, p); 567 break; 568 case Quest: 569 case Alternate: 570 follow(fpos, p); 571 break; 572 case Cat: 573 if (e == p->r || !first_lit(fpos, p->r)) 574 follow(fpos, p); 575 break; 576 default: 577 break; 578 } 579 } 580 581 /* 582 * first_lit returns NO if e is nullable and in the process, 583 * ets up fpos. 584 */ 585 static BOOL 586 first_lit(Positionset *fpos, Expr *e) 587 { 588 BOOL k; 589 590 switch (e->type) { 591 case Literal: 592 case Dot: 593 case Charclass: 594 SET(fpos, e->id, 0) 595 return (YES); 596 case EOP: 597 case Star: 598 case Quest: 599 (void) first_lit(fpos, e->l); 600 return (NO); 601 case Plus: 602 return (first_lit(fpos, e->l)); 603 case Cat: 604 return ((BOOL)(first_lit(fpos, e->l) || first_lit(fpos, e->r))); 605 case Alternate: 606 k = first_lit(fpos, e->r); 607 return ((BOOL)(first_lit(fpos, e->l) && k)); 608 default: 609 err("internal error"); 610 } 611 return (NO); 612 } 613 614 static void 615 efollow(re_re *r, Positionset *fpos, Expr *e) 616 { 617 ID i, *p; 618 619 CLEAR(r, fpos); 620 follow(fpos, e); 621 e->flen = fpos->count; 622 e->follow = (ID *)egmalloc(e->flen * sizeof (ID)); 623 p = e->follow; 624 #ifdef DEBUG 625 printf("ID = %d LIT %c FLEN = %d\n", e->id, e->lit, e->flen); 626 #endif 627 for (i = fpos->last; i > 0; i = fpos->base[i].id) { 628 *p++ = i; 629 #ifdef DEBUG 630 printf("FOLLOW ID = %d LIT %c\n", r->ptr[i]->id, r->ptr[i]->lit); 631 #endif 632 } 633 if (p != e->follow + e->flen) { 634 err("internal error"); 635 } 636 } 637 638 static State * 639 addstate(re_re *r, Positionset *ps, int cnt) 640 { 641 ID j; 642 FID *p, *q; 643 State *s; 644 645 if (cnt) { 646 s = r->states + getstate(r, ps); 647 (void) memset((char *)s->tab, 0, sizeof (s->tab)); 648 s->cnt = r->istate.cnt; 649 } else { 650 s = &r->istate; 651 s->cnt = -1; 652 } 653 s->pos = r->posnext; 654 r->posnext += ps->count; 655 s->npos = ps->count; 656 p = r->posbase + s->pos; 657 for (j = ps->last; j > 0; p++, j = q->id) { 658 q = &ps->base[j]; 659 p->id = j; 660 p->fcount = q->fcount; 661 if (p->id == r->root->id && s->cnt < p->fcount) 662 s->cnt = p->fcount; 663 } 664 #ifdef DEBUG 665 if (TRACE(3)) { 666 uchar_t buf[2000]; 667 spr(s->npos, s->pos+r->posbase, buf); 668 (void) printf("new state[%d] %s%s\n", s-r->states, buf, 669 s->cnt?" cnt":""); 670 } 671 #endif 672 return (s); 673 } 674 675 static State * 676 nextstate(re_re *r, State *s, int a) 677 { 678 State *news; 679 680 CLEAR(r, &r->tmp); 681 followstate(r, s, a, &r->tmp); 682 if (s != &r->istate) followstate(r, &r->istate, a, &r->tmp); 683 684 #ifdef DEBUG 685 if (TRACE(5)) { 686 uchar_t buf[2000]; 687 ppr(&r->tmp, buf); 688 (void) printf("nextstate(%d, '%c'): found %s\n", s-r->states, 689 a, buf); 690 } 691 #endif 692 if (r->tmp.count == 0) 693 news = &r->istate; 694 else if ((news = stateof(r, &r->tmp)) == NULL) 695 news = addstate(r, &r->tmp, 1); 696 s->tab[a] = news; 697 #ifdef DEBUG 698 if (TRACE(5)) { 699 (void) printf("nextstate(%d, '%c'): returning %ld\n", 700 s-r->states, a, news); 701 } 702 #endif 703 return (news); 704 } 705 706 static void 707 followstate(re_re *r, State *s, int a, Positionset *fpos) 708 { 709 int j; 710 ID *q, *eq; 711 Expr *e; 712 713 for (j = s->pos; j < (s->pos + s->npos); j++) { 714 e = r->ptr[r->posbase[j].id]; 715 if (e->type == EOP) continue; 716 if (match(e, a)) { 717 if (e->follow == NULL) efollow(r, &r->firstpos, e); 718 for (q = e->follow, eq = q + e->flen; q < eq; q++) { 719 SET(fpos, *q, r->posbase[j].fcount + 1) 720 #ifdef DEBUG 721 printf("CHAR %c FC %c COUNT %d\n", 722 a, 723 r->ptr[*q]->lit, 724 r->posbase[j].fcount+1); 725 #endif 726 } 727 } 728 } 729 } 730 731 static uchar_t * 732 egmalloc(size_t n) 733 { 734 uchar_t *x; 735 736 x = (uchar_t *)mymalloc(n); 737 mallocs[nmalloc++] = x; 738 if (nmalloc >= MAXMALLOCS) 739 nmalloc = MAXMALLOCS - 1; 740 return (x); 741 } 742 743 #ifdef DEBUG 744 void 745 ppr(Positionse *ps, char *p) 746 { 747 ID n; 748 749 if (ps->count < 1) { 750 (void) sprintf(p, "{}"); 751 return; 752 } 753 *p++ = '{'; 754 for (n = ps->last; n > 0; n = ps->base[n].id) { 755 (void) sprintf(p, "%d,", n); 756 p = strchr(p, 0); 757 } 758 p[-1] = '}'; 759 } 760 761 void 762 epr(Expr *e, uchar_t *res) 763 { 764 uchar_t r1[EPRINTSIZE], r2[EPRINTSIZE]; 765 int i; 766 767 if (e == NULL) { 768 (void) sprintf(res, "!0!"); 769 return; 770 } 771 switch (e->type) { 772 case Dot: 773 case Literal: 774 spr(e->flen, e->follow, r1); 775 (void) sprintf(res, "%c%s", e->lit, r1); 776 break; 777 case Charclass: 778 *res++ = '['; 779 for (i = 0; i < 256; i++) 780 if (CCL_CHK((uchar_t *)e->r, i)) { 781 *res++ = i; 782 } 783 *res++ = ']'; 784 *res = '\0'; 785 break; 786 case Cat: 787 epr(e->l, r1); 788 epr(e->r, r2); 789 (void) sprintf(res, "%s%s", r1, r2); 790 break; 791 case Alternate: 792 epr(e->l, r1); 793 epr(e->r, r2); 794 (void) sprintf(res, "(%s|%s)", r1, r2); 795 break; 796 case Star: 797 epr(e->l, r1); 798 (void) sprintf(res, "(%s)*", r1); 799 break; 800 case Plus: 801 epr(e->l, r1); 802 (void) sprintf(res, "(%s)+", r1); 803 break; 804 case Quest: 805 epr(e->l, r1); 806 (void) sprintf(res, "(%s)?", r1); 807 break; 808 case EOP: 809 epr(e->l, r1); 810 (void) sprintf(res, "%s<EOP>", r1); 811 break; 812 default: 813 (void) sprintf(res, "<undef type %d>", e->type); 814 err(res); 815 break; 816 } 817 } 818 819 static void 820 spr(int c, int *p, uchar_t *buf) 821 { 822 if (c > 0) { 823 *buf++ = '{'; 824 *buf = '\0'; 825 while (--c > 0) { 826 (void) sprintf(buf, "%d,", *p++); 827 buf = strchr(buf, 0); 828 } 829 (void) sprintf(buf, "%d}", *p); 830 } else 831 (void) sprintf(buf, "{}"); 832 } 833 #endif 834 835 static void 836 stateinit(re_re *r) 837 { 838 /* CONSTANTCONDITION */ 839 r->statelim = (sizeof (int) < 4 ? 32 : 128); 840 r->states = (State *)egmalloc(r->statelim * sizeof (State)); 841 842 /* CONSTANTCONDITION */ 843 r->nposalloc = (sizeof (int) < 4 ? 2048 : 8192); 844 r->posbase = (FID *)egmalloc(r->nposalloc * sizeof (FID)); 845 r->nstates = r->posnext = 0; 846 } 847 848 static void 849 clrstates(re_re *r) 850 { 851 r->nstates = 0; /* reclaim space for states and positions */ 852 r->posnext = r->posreset; 853 (void) memset((char *)r->istate.tab, 0, sizeof (r->istate.tab)); 854 } 855 856 static void 857 savestate(re_re *r) 858 { 859 r->posreset = r->posnext; /* save for reset */ 860 } 861 862 static State * 863 startstate(re_re *r) 864 { 865 return (&r->istate); 866 } 867 868 static int 869 getstate(re_re *r, Positionset *ps) 870 { 871 if (r->nstates >= r->statelim || 872 r->posnext + ps->count >= r->nposalloc) { 873 clrstates(r); 874 #ifdef DEBUG 875 printf("%d STATES FLUSHED\n", r->statelim); 876 #endif 877 } 878 return (r->nstates++); 879 } 880 881 static State * 882 stateof(re_re *r, Positionset *ps) 883 { 884 State *s; 885 int i; 886 FID *p, *e; 887 888 for (i = 0, s = r->states; i < r->nstates; i++, s++) { 889 if (s->npos == ps->count) { 890 for (p = s->pos+r->posbase, e = p+s->npos; p < e; p++) 891 if (ps->base[p->id].id == 0 || 892 ps->base[p->id].fcount != p->fcount) 893 goto next; 894 return (s); 895 } 896 next:; 897 } 898 return (NULL); 899 } 900 901 static re_re * 902 egprep(PATTERN *pat) 903 { 904 re_re *r; 905 906 r = (re_re *)egmalloc(sizeof (re_re)); 907 (void) memset((char *)r, 0, sizeof (re_re)); 908 909 pat->loc1 = pat->expression; 910 pat->loc2 = pat->expression + strlen((char *)pat->expression); 911 912 parno = 0; 913 maxid = 1; 914 r->cmap = pat->cmap; 915 lex(r, pat); 916 r->root = newexpr(EOP, '#', eall(r, pat), (Expr *)NULL); 917 r->maxid = maxid; 918 919 eginit(r); 920 return (r); 921 } 922 923 static Expr * 924 newexpr(Exprtype t, int lit, Expr *left, Expr *right) 925 { 926 Expr *e = (Expr *)egmalloc(sizeof (Expr)); 927 928 e->type = t; 929 e->parent = NULL; 930 e->lit = lit; 931 932 if (e->lit) e->id = maxid++; 933 else e->id = 0; 934 935 if ((e->l = left) != NULL) { 936 left->parent = e; 937 } 938 if ((e->r = right) != NULL) { 939 right->parent = e; 940 } 941 e->follow = NULL; 942 e->flen = 0; 943 return (e); 944 } 945 946 static void 947 lex(re_re *r, PATTERN *pat) 948 { 949 if (pat->loc1 == pat->loc2) { 950 toktype = EOP; 951 toklit = -1; 952 } else switch (toklit = *pat->loc1++) { 953 case '.': toktype = Dot; break; 954 case '*': toktype = Star; break; 955 case '+': toktype = Plus; break; 956 case '?': toktype = Quest; break; 957 case '[': toktype = Charclass; break; 958 case '|': toktype = Alternate; break; 959 case '(': toktype = Lpar; break; 960 case ')': toktype = Rpar; break; 961 case '\\': toktype = Backslash; 962 if (pat->loc1 == pat->loc2) { 963 err("syntax error - missing character " 964 "after \\"); 965 } else 966 toklit = r->cmap[*pat->loc1++]; 967 break; 968 case '^': case '$': toktype = Literal; toklit = NL; break; 969 default: toktype = Literal; toklit = r->cmap[toklit]; break; 970 } 971 } 972 973 static int 974 ccl(PATTERN *pat, uchar_t *tab) 975 { 976 int i; 977 int range = 0; 978 int lastc = -1; 979 int count = 0; 980 BOOL comp = NO; 981 982 (void) memset((char *)tab, 0, CCL_SIZ * sizeof (uchar_t)); 983 if (*pat->loc1 == '^') { 984 pat->loc1++; 985 comp = YES; 986 } 987 if (*pat->loc1 == ']') { 988 uchar_t c = pat->cmap[*pat->loc1]; 989 CCL_SET(tab, c); 990 lastc = *pat->loc1++; 991 } 992 /* scan for chars */ 993 for (; (pat->loc1 < pat->loc2) && (*pat->loc1 != ']'); 994 pat->loc1++) { 995 if (*pat->loc1 == '-') { 996 if (lastc < 0) CCL_SET(tab, pat->cmap['-']); 997 else range = 1; 998 continue; 999 } 1000 if (range) { 1001 for (i = *pat->loc1; i >= lastc; i--) { 1002 CCL_SET(tab, pat->cmap[i]); 1003 } 1004 } else { 1005 uchar_t c = pat->cmap[*pat->loc1]; 1006 CCL_SET(tab, c); 1007 } 1008 1009 range = 0; 1010 1011 lastc = *pat->loc1; 1012 } 1013 if (range) CCL_SET(tab, pat->cmap['-']); 1014 1015 if (pat->loc1 < pat->loc2) pat->loc1++; 1016 else err("syntax error - missing ]"); 1017 1018 if (comp) { 1019 CCL_SET(tab, pat->cmap[NL]); 1020 for (i = 0; i < CCL_SIZ; i++) tab[i] ^= 0xff; 1021 } 1022 for (i = 0; i < 256; i++) { 1023 if (pat->cmap[i] != i) CCL_CLR(tab, i); 1024 if (CCL_CHK(tab, i)) { 1025 lastc = i; 1026 count++; 1027 } 1028 } 1029 if (count == 1) 1030 *tab = (char)lastc; 1031 return (count); 1032 } 1033 1034 /* 1035 * egrep patterns: 1036 * 1037 * Alternation: d0: d1 { '|' d1 }* 1038 * Concatenation: d1: d2 { d2 }* 1039 * Repetition: d2: d3 { '*' | '?' | '+' } 1040 * Literal: d3: lit | '.' | '[]' | '(' d0 ')' 1041 */ 1042 1043 static Expr * 1044 d3(re_re *r, PATTERN *pat) 1045 { 1046 Expr *e; 1047 uchar_t *tab; 1048 int count; 1049 1050 switch (toktype) { 1051 case Backslash: 1052 case Literal: 1053 e = newexpr(Literal, toklit, (Expr *)NULL, (Expr *)NULL); 1054 lex(r, pat); 1055 break; 1056 case Dot: 1057 e = newexpr(Dot, '.', (Expr *)NULL, (Expr *)NULL); 1058 lex(r, pat); 1059 break; 1060 case Charclass: 1061 tab = egmalloc(CCL_SIZ * sizeof (uchar_t)); 1062 count = ccl(pat, tab); 1063 if (count == 1) { 1064 toklit = *tab; 1065 e = newexpr(Literal, toklit, (Expr *)NULL, 1066 (Expr *)NULL); 1067 } else { 1068 e = newexpr(Charclass, '[', (Expr *)NULL, (Expr *)NULL); 1069 e->l = (Expr *)count; /* number of chars */ 1070 e->r = (Expr *)tab; /* bitmap of chars */ 1071 } 1072 lex(r, pat); 1073 break; 1074 case Lpar: 1075 lex(r, pat); 1076 count = ++parno; 1077 e = d0(r, pat); 1078 if (toktype == Rpar) 1079 lex(r, pat); 1080 else 1081 err("syntax error - missing )"); 1082 return (e); 1083 default: 1084 err("syntax error"); 1085 e = NULL; 1086 } 1087 return (e); 1088 } 1089 1090 static Expr * 1091 d2(re_re *r, PATTERN *pat) 1092 { 1093 Expr *e; 1094 Exprtype t; 1095 1096 e = d3(r, pat); 1097 while ((toktype == Star) || (toktype == Plus) || (toktype == Quest)) { 1098 t = toktype; 1099 lex(r, pat); 1100 e = newexpr(t, 0, e, (Expr *)NULL); 1101 } 1102 return (e); 1103 } 1104 1105 static Expr * 1106 d1(re_re *r, PATTERN *pat) 1107 { 1108 Expr *e, *f; 1109 1110 e = d2(r, pat); 1111 while ((toktype == Literal) || (toktype == Dot) || (toktype == Lpar) || 1112 (toktype == Backslash) || (toktype == Charclass)) { 1113 f = d2(r, pat); 1114 e = newexpr(Cat, 0, e, f); 1115 } 1116 return (e); 1117 } 1118 1119 static Expr * 1120 d0(re_re *r, PATTERN *pat) 1121 { 1122 Expr *e, *f; 1123 1124 e = d1(r, pat); 1125 while (toktype == Alternate) { 1126 lex(r, pat); 1127 if (toktype == EOP) 1128 continue; 1129 f = d1(r, pat); 1130 e = newexpr(Alternate, 0, e, f); 1131 } 1132 return (e); 1133 } 1134 1135 static Expr * 1136 eall(re_re *r, PATTERN *pat) 1137 { 1138 Expr *e; 1139 1140 while (toktype == Alternate) /* bogus but user-friendly */ 1141 lex(r, pat); 1142 e = d0(r, pat); 1143 while (toktype == Alternate) /* bogus but user-friendly */ 1144 lex(r, pat); 1145 if (toktype != EOP) 1146 err("syntax error"); 1147 return (e); 1148 } 1149 1150 static void 1151 err(char *s) 1152 { 1153 message = s; 1154 longjmp(env, 1); 1155 } 1156 1157 static int 1158 re_lit(PATTERN *pat, uchar_t **b, uchar_t **e) 1159 { 1160 bestlen = 0; 1161 pat->fullmatch = YES; 1162 START 1163 traverse(pat, pat->re_ptr->root->l); 1164 FINISH 1165 if (bestlen < 2) 1166 return (0); 1167 *b = egmalloc(bestlen * sizeof (uchar_t)); 1168 (void) memmove(*b, best, bestlen); 1169 *e = *b + bestlen - 1; 1170 return (bestlen - 1); 1171 } 1172 1173 static void 1174 traverse(PATTERN *pat, Expr *e) 1175 { 1176 switch (e->type) { 1177 case Literal: 1178 ADDL(e->lit) 1179 break; 1180 case Cat: 1181 traverse(pat, e->l); 1182 traverse(pat, e->r); 1183 break; 1184 case Plus: 1185 traverse(pat, e->l); 1186 FINISH /* can't go on past a + */ 1187 pat->fullmatch = NO; 1188 START /* but we can start with one! */ 1189 traverse(pat, e->l); 1190 break; 1191 default: 1192 FINISH 1193 pat->fullmatch = NO; 1194 START 1195 break; 1196 } 1197 } 1198 1199 static re_bm * 1200 re_bmcomp(uchar_t *pb, uchar_t *pe, uchar_t *cmap) 1201 { 1202 int j; 1203 int delta[256]; 1204 re_bm *b; 1205 1206 b = (re_bm *)egmalloc(sizeof (*b)); 1207 1208 b->patlen = pe - pb; 1209 b->cmap = cmap; 1210 b->bmpat = pb; 1211 1212 delta_2(b); 1213 1214 for (j = 0; j < 256; j++) 1215 delta[j] = b->patlen; 1216 1217 for (pe--; pb < pe; pb++) 1218 delta[b->cmap[*pb]] = pe - pb; 1219 1220 delta[b->cmap[*pb]] = LARGE; 1221 1222 for (j = 0; j < 256; j++) 1223 b->delta0[j] = delta[b->cmap[j]]; 1224 return (b); 1225 } 1226 1227 static void 1228 delta_2(re_bm *b) 1229 { 1230 int m = b->patlen; 1231 int i, k, j; 1232 1233 b->delta2 = (int *)egmalloc(m * sizeof (int)); 1234 1235 for (j = 0; j < m; j++) { 1236 k = 1; 1237 again: 1238 while (k <= j && b->bmpat[j-k] == b->bmpat[j]) k++; 1239 1240 for (i = j + 1; i < m; i++) { 1241 if (k <= i && b->bmpat[i-k] != b->bmpat[i]) { 1242 k++; 1243 goto again; 1244 } 1245 } 1246 b->delta2[j] = k + m - j - 1; 1247 } 1248 } 1249 1250 static BOOL 1251 re_bmexec(PATTERN *pat, uchar_t *s, uchar_t *e, uchar_t **mb, uchar_t **me) 1252 { 1253 re_bm *b = pat->bm_ptr; 1254 uchar_t *sp; 1255 int k; 1256 1257 s += b->patlen - 1; 1258 while ((unsigned long)s < (unsigned long)e) { 1259 while ((unsigned long)(s += b->delta0[*s]) < (unsigned long)e) 1260 ; 1261 if ((unsigned long)s < (unsigned long)(e + b->patlen)) 1262 return (NO); /* no match */ 1263 s -= LARGE; 1264 for (k = b->patlen-2, sp = s-1; k >= 0; k--) { 1265 if (b->cmap[*sp--] != b->bmpat[k]) break; 1266 } 1267 if (k < 0) { 1268 *mb = ++sp; 1269 *me = s+1; 1270 if (grepmatch(pat, mb, me)) 1271 return (YES); 1272 s = *mb; 1273 } else if (k < 0) { 1274 s++; 1275 } else { 1276 int j; 1277 j = b->delta2[k]; 1278 k = b->delta0[*++sp]; 1279 if ((j > k) || (k == (int)LARGE)) 1280 k = j; 1281 s = sp + k; 1282 } 1283 } 1284 return (NO); 1285 } 1286 1287 static re_cw * 1288 re_recw(re_re *r, uchar_t *map) 1289 { 1290 Expr *e, *root = r->root; 1291 re_cw *pat; 1292 uchar_t *buf; 1293 1294 if (root->type != EOP) 1295 return (NULL); 1296 e = root->l; 1297 pat = re_cwinit(map); 1298 buf = (uchar_t *)egmalloc(20000 * sizeof (uchar_t)); 1299 if (!altlist(e, buf, pat)) { 1300 return (NULL); 1301 } 1302 re_cwcomp(pat); 1303 return (pat); 1304 } 1305 1306 static BOOL 1307 altlist(Expr *e, uchar_t *buf, re_cw *pat) 1308 { 1309 if (e->type == Alternate) 1310 return ((BOOL)(altlist(e->l, buf, pat) && 1311 altlist(e->r, buf, pat))); 1312 return (word(e, buf, pat)); 1313 } 1314 1315 static BOOL 1316 word(Expr *e, uchar_t *buf, re_cw *pat) 1317 { 1318 static uchar_t *p; 1319 1320 if (buf) p = buf; 1321 1322 if (e->type == Cat) { 1323 if (!word(e->l, (uchar_t *)NULL, pat)) 1324 return (NO); 1325 if (!word(e->r, (uchar_t *)NULL, pat)) 1326 return (NO); 1327 } else if (e->type == Literal) 1328 *p++ = e->lit; 1329 else 1330 return (NO); 1331 1332 if (buf) 1333 re_cwadd(pat, buf, p); 1334 return (YES); 1335 } 1336 1337 static re_cw * 1338 re_cwinit(uchar_t *cmap) 1339 { 1340 re_cw *c; 1341 1342 froot = NULL; 1343 next_node = NULL; 1344 next_link = NULL; 1345 c = (re_cw *)egmalloc(sizeof (*c)); 1346 c->nodeid = 0; 1347 c->maxdepth = 0; 1348 c->mindepth = 10000; 1349 c->root = newnode(c, 0); 1350 c->cmap = cmap; 1351 return (c); 1352 } 1353 1354 static void 1355 re_cwadd(re_cw *c, uchar_t *s, uchar_t *e) 1356 { 1357 Node *p, *state; 1358 Link *l; 1359 int depth; 1360 1361 state = c->root; 1362 while (s <= --e) { 1363 for (l = state->alts; l; l = l->next) 1364 if (l->lit == *e) 1365 break; 1366 if (l == NULL) 1367 break; 1368 else 1369 state = l->node; 1370 } 1371 if (s <= e) { 1372 depth = state->d+1; 1373 l = newlink(*e--, p = newnode(c, depth++)); 1374 l->next = state->alts; 1375 state->alts = l; 1376 state = p; 1377 while (s <= e) { 1378 state->alts = newlink(*e--, p = newnode(c, depth++)); 1379 state = p; 1380 } 1381 if (c->mindepth >= depth) c->mindepth = depth-1; 1382 } 1383 state->out = 1; 1384 } 1385 1386 #ifdef DEBUG 1387 static 1388 pr(Node *n) 1389 { 1390 Link *l; 1391 1392 printf("%d[%d,%d]: ", n->id, n->shift1, n->shift2); 1393 for (l = n->alts; l; l = l->next) { 1394 printf("edge from \"%d\" to \"%d\" label {\"%c\"};", 1395 n->id, l->node->id, l->lit); 1396 if (l->node->out) { 1397 printf(" draw \"%d\" as Doublecircle;", l->node->id); 1398 } 1399 if (l->node->fail) { 1400 printf(" edge from \"%d\" to \"%d\" dashed;", 1401 l->node->id, l->node->fail->id); 1402 } 1403 printf("\n"); 1404 pr(l->node); 1405 } 1406 } 1407 #endif 1408 1409 static void 1410 fail(Node *root) 1411 { 1412 Link *qhead = NULL, *qtail = NULL; 1413 Link *l, *ll; 1414 Link *t; 1415 Node *state, *r, *s; 1416 int a; 1417 1418 for (l = root->alts; l; l = l->next) { 1419 ADD(l->node); 1420 l->node->fail = root; 1421 } 1422 while (qhead) { 1423 r = qhead->node; 1424 DEL(); 1425 for (l = r->alts; l; l = l->next) { 1426 s = l->node; 1427 a = l->lit; 1428 ADD(s); 1429 state = r->fail; 1430 while (state) { 1431 for (ll = state->alts; ll; ll = ll->next) 1432 if (ll->lit == a) 1433 break; 1434 if (ll || (state == root)) { 1435 if (ll) 1436 state = ll->node; 1437 /* 1438 * do it here as only other exit is 1439 * state 0 1440 */ 1441 if (state->out) { 1442 s->out = 1; 1443 } 1444 break; 1445 } else 1446 state = state->fail; 1447 } 1448 s->fail = state; 1449 } 1450 } 1451 zeroroot(root, root); 1452 } 1453 1454 static void 1455 zeroroot(Node *root, Node *n) 1456 { 1457 Link *l; 1458 1459 if (n->fail == root) 1460 n->fail = NULL; 1461 for (l = n->alts; l; l = l->next) 1462 zeroroot(root, l->node); 1463 } 1464 1465 static void 1466 shift(re_cw *c) 1467 { 1468 Link *qhead = NULL, *qtail = NULL; 1469 Link *l; 1470 Link *t; 1471 Node *state, *r, *s; 1472 int k; 1473 1474 for (k = 0; k < 256; k++) 1475 c->step[k] = c->mindepth+1; 1476 c->root->shift1 = 1; 1477 c->root->shift2 = c->mindepth; 1478 for (l = c->root->alts; l; l = l->next) { 1479 l->node->shift2 = c->root->shift2; 1480 ADD(l->node); 1481 l->node->fail = NULL; 1482 } 1483 while (qhead) { 1484 r = qhead->node; 1485 DEL(); 1486 r->shift1 = c->mindepth; 1487 r->shift2 = c->mindepth; 1488 if ((state = r->fail) != NULL) { 1489 do { 1490 k = r->d - state->d; 1491 if (k < state->shift1) { 1492 state->shift1 = k; 1493 } 1494 if (r->out && (k < state->shift2)) { 1495 state->shift2 = k; 1496 } 1497 } while ((state = state->fail) != NULL); 1498 } 1499 for (l = r->alts; l; l = l->next) { 1500 s = l->node; 1501 ADD(s); 1502 } 1503 } 1504 shiftprop(c, c->root); 1505 shifttab(c->root); 1506 c->step[0] = 1; 1507 } 1508 1509 static void 1510 shifttab(Node *n) 1511 { 1512 Link *l; 1513 Node **nn; 1514 1515 n->tab = nn = (Node **)egmalloc(256 * sizeof (Node *)); 1516 (void) memset((char *)n->tab, 0, 256 * sizeof (Node *)); 1517 for (l = n->alts; l; l = l->next) 1518 nn[l->lit] = l->node; 1519 } 1520 1521 static void 1522 shiftprop(re_cw *c, Node *n) 1523 { 1524 Link *l; 1525 Node *nn; 1526 1527 for (l = n->alts; l; l = l->next) { 1528 if (c->step[l->lit] > l->node->d) 1529 c->step[l->lit] = l->node->d; 1530 nn = l->node; 1531 if (n->shift2 < nn->shift2) 1532 nn->shift2 = n->shift2; 1533 shiftprop(c, nn); 1534 } 1535 } 1536 1537 static void 1538 re_cwcomp(re_cw *c) 1539 { 1540 fail(c->root); 1541 shift(c); 1542 } 1543 1544 static BOOL 1545 re_cwexec(PATTERN *pat, uchar_t *rs, uchar_t *re, uchar_t **mb, uchar_t **me) 1546 { 1547 Node *state; 1548 Link *l; 1549 uchar_t *sp; 1550 uchar_t *s; 1551 uchar_t *e; 1552 Node *ostate; 1553 int k; 1554 re_cw *c = pat->cw_ptr; 1555 uchar_t fake[1]; 1556 uchar_t mappedsp; 1557 1558 fake[0] = 0; 1559 state = c->root; 1560 1561 s = rs+c->mindepth-1; 1562 e = re; 1563 while (s < e) { 1564 /* scan */ 1565 for (sp = s; (ostate = state) != NULL; ) { 1566 mappedsp = c->cmap[*sp]; 1567 if (state->tab) { 1568 if ((state = state->tab[mappedsp]) == NULL) 1569 goto nomatch; 1570 } else { 1571 for (l = state->alts; ; l = l->next) { 1572 if (l == NULL) 1573 goto nomatch; 1574 if (l->lit == mappedsp) { 1575 state = l->node; 1576 break; 1577 } 1578 } 1579 } 1580 if (state->out) { 1581 *mb = sp; 1582 *me = s+1; 1583 if (fixloc(mb, me)) 1584 return (YES); 1585 } 1586 if (--sp < rs) { 1587 sp = fake; 1588 break; 1589 } 1590 } 1591 nomatch: 1592 k = c->step[c->cmap[*sp]] - ostate->d - 1; 1593 if (k < ostate->shift1) 1594 k = ostate->shift1; 1595 if (k > ostate->shift2) 1596 k = ostate->shift2; 1597 s += k; 1598 state = c->root; 1599 } 1600 return (NO); 1601 } 1602 1603 static Node * 1604 newnode(re_cw *c, int d) 1605 { 1606 static Node *lim = NULL; 1607 static int incr = 256; 1608 1609 if (!next_node) lim = NULL; 1610 if (next_node == lim) { 1611 next_node = (Node *)egmalloc(incr * sizeof (Node)); 1612 lim = next_node + incr; 1613 } 1614 next_node->d = d; 1615 if (d > c->maxdepth) c->maxdepth = d; 1616 next_node->id = c->nodeid++; 1617 next_node->alts = NULL; 1618 next_node->tab = NULL; 1619 next_node->out = 0; 1620 return (next_node++); 1621 } 1622 1623 static Link * 1624 newlink(uchar_t lit, Node *n) 1625 { 1626 static Link *lim = NULL; 1627 static int incr = 256; 1628 1629 if (!next_link) lim = NULL; 1630 if (next_link == lim) { 1631 next_link = (Link *)egmalloc(incr * sizeof (Link)); 1632 lim = next_link + incr; 1633 } 1634 next_link->lit = lit; 1635 next_link->node = n; 1636 next_link->next = NULL; 1637 return (next_link++); 1638 } 1639 1640 int 1641 egrep(char *f, FILE *o, char *fo) 1642 { 1643 uchar_t buff[MAXBUFSIZE + ESIZE]; 1644 1645 buffer = buff; 1646 *buffer++ = NL; /* New line precedes buffer to prevent runover */ 1647 file = f; 1648 output = o; 1649 format = fo; 1650 return (execute()); 1651 } 1652 1653 static int 1654 execute(void) 1655 { 1656 LINE current; 1657 BOOL matched; 1658 BOOL all_searched; /* file being matched until end */ 1659 1660 if ((file_desc = open(file, O_RDONLY)) < 0) { 1661 return (-1); 1662 } 1663 init_file(¤t); 1664 /* while there is more get more text from the data stream */ 1665 for (;;) { 1666 all_searched = NO; 1667 1668 /* 1669 * Find next new-line in buffer. 1670 * Begin after previous new line character 1671 */ 1672 1673 current.prntbuf = current.newline + 1; 1674 current.ln++; /* increment line number */ 1675 1676 if (current.prntbuf < bufend) { 1677 /* There is more text in buffer */ 1678 1679 /* 1680 * Take our next 1681 * "line" as the entire remaining buffer. 1682 * However, if there is more of the file yet 1683 * to be read in, exclude any incomplete 1684 * line at end. 1685 */ 1686 if (file_stat == NO_MORE) { 1687 all_searched = YES; 1688 current.newline = bufend - 1; 1689 if (*current.newline != NL) { 1690 current.newline = bufend; 1691 } 1692 } else { 1693 /* 1694 * Find end of the last 1695 * complete line in the buffer. 1696 */ 1697 current.newline = bufend; 1698 while (*--current.newline != NL) { 1699 } 1700 if (current.newline < current.prntbuf) { 1701 /* end not found */ 1702 current.newline = bufend; 1703 } 1704 } 1705 } else { 1706 /* There is no more text in the buffer. */ 1707 current.newline = bufend; 1708 } 1709 if (current.newline >= bufend) { 1710 /* 1711 * There is no more text in the buffer, 1712 * or no new line was found. 1713 */ 1714 switch (file_stat) { 1715 case MORE: /* file partly unread */ 1716 case BEGIN: 1717 fgetfile(¤t); 1718 1719 current.ln--; 1720 continue; /* with while loop */ 1721 case NO_MORE: 1722 break; 1723 } 1724 /* Nothing more to read in for this file. */ 1725 if (current.newline <= current.prntbuf) { 1726 /* Nothing in the buffer, either */ 1727 /* We are done with the file. */ 1728 current.ln--; 1729 break; /* out of while loop */ 1730 } 1731 /* There is no NL at the end of the file */ 1732 } 1733 1734 matched = pattern_match(&match_pattern, ¤t); 1735 if (matched) { 1736 int nc; 1737 1738 get_ncount(¤t, match_pattern.loc1); 1739 get_line(¤t, match_pattern.loc2); 1740 1741 nc = current.newline + 1 - current.prntbuf; 1742 (void) fprintf(output, format, file, current.ln); 1743 (void) fwrite((char *)current.prntbuf, 1, nc, output); 1744 } else { 1745 if (all_searched) 1746 break; /* out of while loop */ 1747 1748 get_ncount(¤t, current.newline + 1); 1749 } 1750 } 1751 1752 (void) close(file_desc); 1753 return (0); 1754 } 1755 1756 static void 1757 init_file(LINE *cur_ptr) 1758 { 1759 file_stat = BEGIN; 1760 cur_ptr->ln = 0; 1761 bufend = buffer; 1762 cur_ptr->newline = buffer - 1; 1763 } 1764 1765 static BOOL 1766 pattern_match(PATTERN *pat, LINE *lptr) 1767 { 1768 if ((*pat->procfn)(pat, lptr->prntbuf - 1, lptr->newline + 1, 1769 &pat->loc1, &pat->loc2)) { 1770 return (YES); 1771 } else { 1772 pat->loc1 = lptr->prntbuf; 1773 pat->loc2 = lptr->newline - 1; 1774 return (NO); 1775 } 1776 } /* end of function pattern_match() */ 1777 1778 static void 1779 fgetfile(LINE *cur_ptr) 1780 { 1781 /* 1782 * This function reads as much of the current file into the buffer 1783 * as will fit. 1784 */ 1785 int bytes; /* bytes read in current buffer */ 1786 int bufsize = MAXBUFSIZE; /* free space in data buffer */ 1787 int save_current; 1788 uchar_t *begin = cur_ptr->prntbuf; 1789 1790 /* 1791 * Bytes of current incomplete line, if any, save_current in buffer. 1792 * These must be saved. 1793 */ 1794 save_current = (int)(bufend - begin); 1795 1796 if (file_stat == MORE) { 1797 /* 1798 * A portion of the file fills the buffer. We must clear 1799 * out the dead wood to make room for more of the file. 1800 */ 1801 1802 int k = 0; 1803 1804 k = begin - buffer; 1805 if (!k) { 1806 /* 1807 * We have one humungous current line, 1808 * which fills the whole buffer. 1809 * Toss it. 1810 */ 1811 begin = bufend; 1812 k = begin - buffer; 1813 save_current = 0; 1814 } 1815 1816 begin = buffer; 1817 bufend = begin + save_current; 1818 1819 bufsize = MAXBUFSIZE - save_current; 1820 1821 if (save_current > 0) { 1822 /* 1823 * Must save portion of current line. 1824 * Copy to beginning of buffer. 1825 */ 1826 (void) memmove(buffer, buffer + k, save_current); 1827 } 1828 } 1829 1830 /* Now read in the file. */ 1831 1832 do { 1833 bytes = read(file_desc, (char *)bufend, (unsigned int)bufsize); 1834 if (bytes < 0) { 1835 /* can't read any more of file */ 1836 bytes = 0; 1837 } 1838 bufend += bytes; 1839 bufsize -= bytes; 1840 } while (bytes > 0 && bufsize > 0); 1841 1842 1843 if (begin >= bufend) { 1844 /* No new lines or incomplete line in buffer */ 1845 file_stat = NO_MORE; 1846 } else if (bufsize) { 1847 /* Still space in the buffer, so we have read entire file */ 1848 file_stat = NO_MORE; 1849 } else { 1850 /* We filled entire buffer, so there may be more yet to read */ 1851 file_stat = MORE; 1852 } 1853 /* Note: bufend is 1 past last good char */ 1854 *bufend = NL; /* Sentinel new-line character */ 1855 /* Set newline to character preceding the current line */ 1856 cur_ptr->newline = begin - 1; 1857 } 1858 1859 static void 1860 get_line(LINE *cur_ptr, uchar_t *s) 1861 { 1862 while (*s++ != NL) { 1863 } 1864 cur_ptr->newline = --s; 1865 cur_ptr->ln++; 1866 } 1867 1868 static void 1869 get_ncount(LINE *cur_ptr, uchar_t *s) 1870 { 1871 uchar_t *p = cur_ptr->prntbuf; 1872 1873 while (*--s != NL) { 1874 } 1875 cur_ptr->newline = s++; 1876 while ((s > p) && 1877 (p = (uchar_t *)memchr((char *)p, NL, s - p)) != NULL) { 1878 cur_ptr->ln++; 1879 ++p; 1880 } 1881 cur_ptr->ln--; 1882 cur_ptr->prntbuf = s; 1883 } 1884