1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /* Copyright (c) 1990 AT&T */
23 /* All Rights Reserved */
24
25 /*
26 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
28 */
29
30 /*
31 * cscope - interactive C symbol or text cross-reference
32 *
33 * text searching functions
34 */
35
36 #include <fcntl.h>
37 #include <setjmp.h>
38 #include <stdio.h>
39 #include <string.h>
40 #include <memory.h>
41 #include <ctype.h>
42 #include <unistd.h>
43 #include "library.h"
44
45 typedef enum {
46 NO = 0,
47 YES = 1
48 } BOOL;
49
50 typedef struct re_bm {
51 int delta0[256];
52 int *delta2;
53 uchar_t *cmap;
54 uchar_t *bmpat;
55 int patlen;
56 } re_bm;
57
58 typedef struct Link {
59 uchar_t lit;
60 struct Node *node;
61 struct Link *next;
62 } Link;
63
64 typedef struct Node {
65 short out;
66 short d;
67 short shift1;
68 short shift2;
69 long id;
70 struct Node **tab;
71 Link *alts;
72 struct Node *fail;
73 } Node;
74
75 typedef struct re_cw {
76 int maxdepth, mindepth;
77 long nodeid;
78 int step[256];
79 uchar_t *cmap;
80 struct Node *root;
81 } re_cw;
82
83 typedef enum {
84 /* lit expression types */
85 Literal, Dot, Charclass, EOP,
86
87 /* non-lit expression types */
88 Cat, Alternate, Star, Plus, Quest,
89
90 /* not really expression types, just helping */
91 Lpar, Rpar, Backslash
92
93 } Exprtype;
94
95 typedef int ID;
96
97 typedef struct Expr {
98 Exprtype type; /* Type of expression (in grammar) */
99 ID id; /* unique ID of lit expression */
100 int lit; /* Literal character or tag */
101 int flen; /* Number of following lit expressions */
102 ID *follow; /* Array of IDs of following lit expressions */
103 struct Expr *l; /* pointer to Left child (or ccl count) */
104 struct Expr *r; /* pointer to Right child (or ccl mask) */
105 struct Expr *parent; /* pointer to Parent */
106 } Expr;
107
108 typedef struct State {
109 struct State *tab[256];
110 int cnt; /* number of matched chars on full match, -1 otherwise */
111 int npos; /* number of IDs in position set for this state. */
112 int pos; /* index into posbase for beginning of IDs */
113 } State;
114
115 typedef struct FID {
116 ID id; /* Lit Expression id */
117 int fcount; /* Number of Lit exp matches before this one. */
118 } FID;
119
120 typedef struct Positionset {
121 int count; /* Number of lit exps in position set */
122 ID last; /* ID of last lit exp in position set */
123 FID *base; /* array of MAXID FIDS */
124 /* 0 means not in position set */
125 /* -1 means first in position set */
126 /* n (>0) is ID of prev member of position set. */
127 } Positionset;
128
129 typedef struct re_re {
130 FID *posbase; /* Array of IDs from all states */
131 int nposalloc; /* Allocated size of posbase */
132 int posnext; /* Index into free space in posbase */
133 int posreset; /* Index into end of IDs for initial state in posbase */
134 int maxid; /* Number of (also maximum ID of) lit expressions */
135 Expr *root; /* Pointer to root (EOP) expression */
136 Expr **ptr; /* Pointer to array of ptrs to lit expressions. */
137 uchar_t *cmap; /* Character mapping array */
138 Positionset firstpos;
139 Positionset tmp;
140 int nstates; /* Number of current states defined */
141 int statelim; /* Limit on number of states before flushing */
142 State *states; /* Array of states */
143 State istate; /* Initial state */
144 } re_re;
145
146 typedef struct {
147 uchar_t *cmap;
148 re_re *re_ptr;
149 re_bm *bm_ptr;
150 re_cw *cw_ptr;
151 BOOL fullmatch;
152 BOOL (*procfn)();
153 BOOL (*succfn)();
154 uchar_t *loc1;
155 uchar_t *loc2;
156 uchar_t *expression;
157 } PATTERN;
158
159 typedef enum {
160 BEGIN, /* File is not yet in buffer at all */
161 MORE, /* File is partly in buffer */
162 NO_MORE /* File has been completely read into buffer */
163 } FILE_STAT;
164
165 typedef struct {
166 uchar_t *prntbuf; /* current line of input from data file */
167 uchar_t *newline; /* end of line (real or sentinel \n) */
168 long ln; /* line number */
169 } LINE;
170
171 #define NL '\n'
172
173 #define CCL_SIZ 32
174 #define CCL_SET(a, c) ((a)[(c) >> 3] |= bittab[(c) & 07])
175 #define CCL_CLR(a, c) ((a)[(c) >> 3] &= ~bittab[(c) & 07])
176 #define CCL_CHK(a, c) ((a)[(c) >> 3] & bittab[(c) & 07])
177
178 #define ESIZE (BUFSIZ)
179 #define MAXBUFSIZE (64*BUFSIZ)
180
181 #define MAXMALLOCS 1024
182 #define MAXLIT 256 /* is plenty big enough */
183 #define LARGE MAXBUFSIZE+ESIZE+2
184
185 #define CLEAR(r, rps) (void) memset((char *)(rps)->base, 0, \
186 (int)((r)->maxid * sizeof (FID))), \
187 (rps)->count = 0, (rps)->last = -1
188 #define SET(rps, n, cnt) { \
189 if ((rps)->base[n].id == 0) {\
190 (rps)->count++;\
191 (rps)->base[n].fcount = (cnt);\
192 (rps)->base[n].id = (rps)->last;\
193 (rps)->last = (n);\
194 } else if ((cnt) > (rps)->base[n].fcount) {\
195 (rps)->base[n].fcount = (cnt);\
196 }}
197
198 #define START { _p = tmp; }
199 #define ADDL(c) { if (_p >= &tmp[MAXLIT]) _p--; *_p++ = c; }
200 #define FINISH { ADDL(0) if ((_p-tmp) > bestlen) \
201 (void) memmove(best, tmp, bestlen = _p-tmp); }
202
203
204 #define NEW(N) (froot?(t = froot, froot = froot->next, t->next = NULL, \
205 t->node = N, t): newlink(0, N))
206 #define ADD(N) if (qtail) qtail = qtail->next = NEW(N); \
207 else qtail = qhead = NEW(N)
208 #define DEL() { Link *_l = qhead; if ((qhead = qhead->next) == NULL) \
209 { qtail = NULL; } _l->next = froot; froot = _l; }
210
211 static uchar_t *buffer;
212 static uchar_t *bufend;
213 static FILE *output;
214 static char *format;
215 static char *file;
216 static int file_desc;
217 static FILE_STAT file_stat;
218 static PATTERN match_pattern;
219 static uchar_t char_map[2][256];
220 static int iflag;
221 static Exprtype toktype;
222 static int toklit, parno, maxid;
223 static uchar_t tmp[MAXLIT], best[MAXLIT];
224 static uchar_t *_p;
225 static int bestlen;
226 static Node *next_node;
227 static Link *froot, *next_link;
228 static jmp_buf env;
229
230 static int nmalloc;
231 static uchar_t *mallocs[MAXMALLOCS];
232
233 static uchar_t bittab[] = { 1, 2, 4, 8, 16, 32, 64, 128 };
234
235 #ifdef DEBUG
236 #define TRACE(n) (n < debug)
237 #define EPRINTSIZE 50000
238 static void spr(int c, int *p, uchar_t *buf);
239 void epr(Expr *e, uchar_t *res);
240 static int debug = 12;
241 #endif
242
243 static void init_file(LINE *cur_ptr);
244 static void get_line(LINE *cur_ptr, uchar_t *s);
245 static void get_ncount(LINE *cur_ptr, uchar_t *s);
246 static int execute(void);
247 static State *startstate(re_re *r);
248 static State *stateof(re_re *r, Positionset *ps);
249 static State *nextstate(re_re *r, State *s, int a);
250 static State *addstate(re_re *r, Positionset *ps, int cnt);
251 static BOOL match(Expr *e, int a);
252 static BOOL first_lit(Positionset *fpos, Expr *e);
253 static void eptr(re_re *r, Expr *e);
254 static void efollow(re_re *r, Positionset *fpos, Expr *e);
255 static void follow(Positionset *fpos, Expr *e);
256 static void followstate(re_re *r, State *s, int a, Positionset *fpos);
257 static Expr *eall(re_re *r, PATTERN *pat);
258 static Expr *d0(re_re *r, PATTERN *pat);
259 static Expr *d1(re_re *r, PATTERN *pat);
260 static Expr *d2(re_re *r, PATTERN *pat);
261 static Expr *d3(re_re *r, PATTERN *pat);
262 static Expr *newexpr(Exprtype t, int lit, Expr *left, Expr *right);
263 static void lex(re_re *r, PATTERN *pat);
264 static int re_lit(PATTERN *pat, uchar_t **b, uchar_t **e);
265 static void traverse(PATTERN *pat, Expr *e);
266 static int ccl(PATTERN *pat, uchar_t *tab);
267 static BOOL altlist(), word();
268 static BOOL altlist(Expr *e, uchar_t *buf, re_cw *pat);
269 static Node *newnode(re_cw *c, int d);
270 static Link *newlink(uchar_t lit, Node *n);
271 static void fail(Node *root);
272 static void zeroroot(Node *root, Node *n);
273 static void shift(re_cw *c);
274 static void shifttab(Node *n);
275 static void shiftprop(re_cw *c, Node *n);
276 static void delta_2(re_bm *b);
277 static int getstate(re_re *r, Positionset *ps);
278 static void savestate(re_re *r);
279 static void stateinit(re_re *r);
280 static re_bm *re_bmcomp(uchar_t *pb, uchar_t *pe, uchar_t *cmap);
281 static re_cw *re_cwinit(uchar_t *cmap);
282 static re_cw *re_recw(re_re *r, uchar_t *map);
283 static re_re *egprep(PATTERN *pat);
284 static void re_cwadd(re_cw *c, uchar_t *s, uchar_t *e);
285 static void re_cwcomp(re_cw *c);
286 static void eginit(re_re *r);
287 static BOOL re_bmexec(PATTERN *pat, uchar_t *s, uchar_t *e, uchar_t **mb,
288 uchar_t **me);
289 static BOOL re_cwexec(PATTERN *pat, uchar_t *rs, uchar_t *re, uchar_t **mb,
290 uchar_t **me);
291 static BOOL re_reexec(PATTERN *pat, uchar_t *b, uchar_t *e, uchar_t **mb,
292 uchar_t **me);
293 static uchar_t *egmalloc(size_t n);
294 static void fgetfile(LINE *cur_ptr);
295 static void dogre(PATTERN *pat);
296 static BOOL pattern_match(PATTERN *pat, LINE *lptr);
297 static BOOL fixloc(uchar_t **mb, uchar_t **me);
298 static BOOL grepmatch(PATTERN *pat, uchar_t **mb, uchar_t **me);
299 static void err(char *s);
300
301 static char *message;
302
303 void
egrepcaseless(int i)304 egrepcaseless(int i)
305 {
306 iflag = i; /* simulate "egrep -i" */
307 }
308
309 char *
egrepinit(char * expression)310 egrepinit(char *expression)
311 {
312 static int firsttime = 1;
313 int i;
314
315 if (firsttime) {
316 firsttime = 0;
317 for (i = 0; i < 256; i++) {
318 char_map[0][i] = (uchar_t)i;
319 char_map[1][i] = tolower(i);
320 }
321 }
322 for (i = 0; i < nmalloc; i ++)
323 free(mallocs[i]);
324 nmalloc = 0;
325 message = NULL;
326
327 match_pattern.expression = (uchar_t *)expression;
328 match_pattern.cmap = char_map[iflag];
329 if (setjmp(env) == 0) {
330 dogre(&match_pattern);
331 #ifdef DEBUG
332 {
333 PATTERN *p = match_pattern;
334 if (p->procfn == re_bmexec)
335 if (!p->fullmatch)
336 if (p->succfn == re_reexec)
337 printf("PARTIAL BOYER_MOORE\n");
338 else
339 printf("PARTIAL B_M with GREP\n");
340 else
341 printf("FULL BOYER_MOORE\n");
342 else if (p->procfn == re_cwexec)
343 printf("C_W\n");
344 else
345 printf("GENERAL\n");
346 }
347 #endif
348 }
349 return (message);
350 }
351
352 static void
dogre(PATTERN * pat)353 dogre(PATTERN *pat)
354 {
355 uchar_t *lb, *le;
356
357 #ifdef DEBUG
358 printf("PATTERN %s\n", pat->expression);
359 #endif
360 pat->re_ptr = egprep(pat);
361 bestlen = re_lit(pat, &lb, &le);
362
363 if (bestlen && pat->fullmatch) { /* Full Boyer Moore */
364 #ifdef DEBUG
365 printf("BESTLEN %d\n", bestlen);
366 {
367 uchar_t *p;
368 for (p = lb; p < le; p++) printf("%c", *p);
369 printf("\n");
370 }
371 #endif
372 pat->bm_ptr = re_bmcomp(lb, le, pat->cmap);
373 pat->procfn = re_bmexec;
374 return;
375 }
376 if (bestlen > 1) {
377 /* Partial Boyer Moore */
378 pat->bm_ptr = re_bmcomp(lb, le, pat->cmap);
379 pat->procfn = re_bmexec;
380 pat->fullmatch = NO;
381 } else {
382 pat->fullmatch = YES;
383 if ((pat->cw_ptr = re_recw(pat->re_ptr, pat->cmap)) != NULL) {
384 pat->procfn = re_cwexec; /* CW */
385 return;
386 }
387 }
388 /* general egrep regular expression */
389 pat->succfn = re_reexec;
390
391 if (pat->fullmatch) {
392 pat->procfn = pat->succfn;
393 pat->succfn = NULL;
394 }
395 }
396
397 static BOOL
fixloc(uchar_t ** mb,uchar_t ** me)398 fixloc(uchar_t **mb, uchar_t **me)
399 {
400 /* Handle match to null string */
401
402 while (*me <= *mb)
403 (*me)++;
404
405 if (*(*me - 1) != NL)
406 while (**me != NL)
407 (*me)++;
408
409 /* Handle match to new-line only */
410
411 if (*mb == *me - 1 && **mb == NL) {
412 (*me)++;
413 }
414
415 /* Handle match including beginning or ending new-line */
416
417 if (**mb == NL)
418 (*mb)++;
419 if (*(*me - 1) == NL)
420 (*me)--;
421 return (YES);
422 }
423
424 static BOOL
grepmatch(PATTERN * pat,uchar_t ** mb,uchar_t ** me)425 grepmatch(PATTERN *pat, uchar_t **mb, uchar_t **me)
426 {
427 uchar_t *s, *f;
428
429 if (pat->fullmatch)
430 return (fixloc(mb, me));
431
432 for (f = *me - 1; *f != NL; f++) {
433 }
434 f++;
435 for (s = *mb; *s != NL; s--) {
436 }
437
438 if ((*pat->succfn)(pat, s, f, mb, me)) {
439 return (YES);
440 } else {
441 *mb = f;
442 return (NO);
443 }
444 }
445
446 static void
eginit(re_re * r)447 eginit(re_re *r)
448 {
449 unsigned int n;
450
451 r->ptr = (Expr **)egmalloc(r->maxid * sizeof (Expr *));
452 eptr(r, r->root);
453 n = r->maxid * sizeof (FID);
454 r->firstpos.base = (FID *)egmalloc(n);
455 r->tmp.base = (FID *)egmalloc(n);
456 CLEAR(r, &r->firstpos);
457 if (!first_lit(&r->firstpos, r->root->l)) {
458 /*
459 * This expression matches the null string!!!!
460 * Add EOP to beginning position set.
461 */
462 SET(&r->firstpos, r->root->id, 0)
463 /* (void) printf("first of root->l == 0, b=%s\n", b); */
464 }
465 stateinit(r);
466 (void) addstate(r, &r->firstpos, 0);
467 savestate(r);
468 }
469
470 static void
eptr(re_re * r,Expr * e)471 eptr(re_re *r, Expr *e)
472 {
473 if ((e->id < 0) || (e->id >= r->maxid)) {
474 err("internal error");
475 }
476 r->ptr[e->id] = e;
477 if (e->type != Charclass) {
478 if (e->l) eptr(r, e->l);
479 if (e->r) eptr(r, e->r);
480 }
481 }
482
483 static BOOL
re_reexec(PATTERN * pat,uchar_t * b,uchar_t * e,uchar_t ** mb,uchar_t ** me)484 re_reexec(PATTERN *pat, uchar_t *b, uchar_t *e, uchar_t **mb, uchar_t **me)
485 {
486 re_re *r = pat->re_ptr;
487 State *s, *t;
488
489 #ifdef DEBUG
490 if (TRACE(10)) {
491 uchar_t buf[EPRINTSIZE];
492 epr(r->root, buf);
493 (void) printf("expr='%s'\n", buf);
494 }
495 #endif
496 s = startstate(r);
497
498 for (;;) {
499 uchar_t c;
500
501 if (s->cnt >= 0) {
502 #ifdef DEBUG
503 if (TRACE(6))
504 (void) printf("match at input '%s'\n", b);
505 #endif
506 *mb = b - s->cnt;
507 *me = b;
508 if (fixloc(mb, me))
509 return (YES);
510 }
511
512 if (b >= e) break;
513 c = pat->cmap[*b];
514 #ifdef DEBUG
515 if (TRACE(4))
516 (void) printf("state %d: char '%c'\n", s-r->states, *b);
517 #endif
518 if ((t = s->tab[c]) != NULL) s = t;
519 else s = nextstate(r, s, (int)c);
520 b++;
521 }
522 #ifdef DEBUG
523 if (TRACE(3)) {
524 uchar_t buf[EPRINTSIZE];
525
526 epr(r->root, buf);
527 (void) printf("pat = %s\n", buf);
528 }
529 #endif
530 return (NO);
531 }
532
533 static BOOL
match(Expr * e,int a)534 match(Expr *e, int a)
535 {
536 switch (e->type) {
537 case Dot: return ((BOOL)(a != NL));
538 case Literal: return ((BOOL)(a == e->lit));
539 case Charclass: return ((BOOL)(CCL_CHK((uchar_t *)e->r, a)));
540 default: return (NO);
541 }
542 }
543
544 /*
545 * generates the followset for a node in fpos
546 */
547 static void
follow(Positionset * fpos,Expr * e)548 follow(Positionset *fpos, Expr *e)
549 {
550 Expr *p;
551
552 if (e->type == EOP)
553 return;
554 else
555 p = e->parent;
556 switch (p->type) {
557 case EOP:
558 SET(fpos, p->id, 0)
559 break;
560 case Plus:
561 case Star:
562 (void) first_lit(fpos, e);
563 follow(fpos, p);
564 break;
565 case Quest:
566 case Alternate:
567 follow(fpos, p);
568 break;
569 case Cat:
570 if (e == p->r || !first_lit(fpos, p->r))
571 follow(fpos, p);
572 break;
573 default:
574 break;
575 }
576 }
577
578 /*
579 * first_lit returns NO if e is nullable and in the process,
580 * ets up fpos.
581 */
582 static BOOL
first_lit(Positionset * fpos,Expr * e)583 first_lit(Positionset *fpos, Expr *e)
584 {
585 BOOL k;
586
587 switch (e->type) {
588 case Literal:
589 case Dot:
590 case Charclass:
591 SET(fpos, e->id, 0)
592 return (YES);
593 case EOP:
594 case Star:
595 case Quest:
596 (void) first_lit(fpos, e->l);
597 return (NO);
598 case Plus:
599 return (first_lit(fpos, e->l));
600 case Cat:
601 return ((BOOL)(first_lit(fpos, e->l) || first_lit(fpos, e->r)));
602 case Alternate:
603 k = first_lit(fpos, e->r);
604 return ((BOOL)(first_lit(fpos, e->l) && k));
605 default:
606 err("internal error");
607 }
608 return (NO);
609 }
610
611 static void
efollow(re_re * r,Positionset * fpos,Expr * e)612 efollow(re_re *r, Positionset *fpos, Expr *e)
613 {
614 ID i, *p;
615
616 CLEAR(r, fpos);
617 follow(fpos, e);
618 e->flen = fpos->count;
619 e->follow = (ID *)egmalloc(e->flen * sizeof (ID));
620 p = e->follow;
621 #ifdef DEBUG
622 printf("ID = %d LIT %c FLEN = %d\n", e->id, e->lit, e->flen);
623 #endif
624 for (i = fpos->last; i > 0; i = fpos->base[i].id) {
625 *p++ = i;
626 #ifdef DEBUG
627 printf("FOLLOW ID = %d LIT %c\n", r->ptr[i]->id, r->ptr[i]->lit);
628 #endif
629 }
630 if (p != e->follow + e->flen) {
631 err("internal error");
632 }
633 }
634
635 static State *
addstate(re_re * r,Positionset * ps,int cnt)636 addstate(re_re *r, Positionset *ps, int cnt)
637 {
638 ID j;
639 FID *p, *q;
640 State *s;
641
642 if (cnt) {
643 s = r->states + getstate(r, ps);
644 (void) memset((char *)s->tab, 0, sizeof (s->tab));
645 s->cnt = r->istate.cnt;
646 } else {
647 s = &r->istate;
648 s->cnt = -1;
649 }
650 s->pos = r->posnext;
651 r->posnext += ps->count;
652 s->npos = ps->count;
653 p = r->posbase + s->pos;
654 for (j = ps->last; j > 0; p++, j = q->id) {
655 q = &ps->base[j];
656 p->id = j;
657 p->fcount = q->fcount;
658 if (p->id == r->root->id && s->cnt < p->fcount)
659 s->cnt = p->fcount;
660 }
661 #ifdef DEBUG
662 if (TRACE(3)) {
663 uchar_t buf[2000];
664 spr(s->npos, s->pos+r->posbase, buf);
665 (void) printf("new state[%d] %s%s\n", s-r->states, buf,
666 s->cnt?" cnt":"");
667 }
668 #endif
669 return (s);
670 }
671
672 static State *
nextstate(re_re * r,State * s,int a)673 nextstate(re_re *r, State *s, int a)
674 {
675 State *news;
676
677 CLEAR(r, &r->tmp);
678 followstate(r, s, a, &r->tmp);
679 if (s != &r->istate) followstate(r, &r->istate, a, &r->tmp);
680
681 #ifdef DEBUG
682 if (TRACE(5)) {
683 uchar_t buf[2000];
684 ppr(&r->tmp, buf);
685 (void) printf("nextstate(%d, '%c'): found %s\n", s-r->states,
686 a, buf);
687 }
688 #endif
689 if (r->tmp.count == 0)
690 news = &r->istate;
691 else if ((news = stateof(r, &r->tmp)) == NULL)
692 news = addstate(r, &r->tmp, 1);
693 s->tab[a] = news;
694 #ifdef DEBUG
695 if (TRACE(5)) {
696 (void) printf("nextstate(%d, '%c'): returning %ld\n",
697 s-r->states, a, news);
698 }
699 #endif
700 return (news);
701 }
702
703 static void
followstate(re_re * r,State * s,int a,Positionset * fpos)704 followstate(re_re *r, State *s, int a, Positionset *fpos)
705 {
706 int j;
707 ID *q, *eq;
708 Expr *e;
709
710 for (j = s->pos; j < (s->pos + s->npos); j++) {
711 e = r->ptr[r->posbase[j].id];
712 if (e->type == EOP) continue;
713 if (match(e, a)) {
714 if (e->follow == NULL) efollow(r, &r->firstpos, e);
715 for (q = e->follow, eq = q + e->flen; q < eq; q++) {
716 SET(fpos, *q, r->posbase[j].fcount + 1)
717 #ifdef DEBUG
718 printf("CHAR %c FC %c COUNT %d\n", a,
719 r->ptr[*q]->lit, r->posbase[j].fcount+1);
720 #endif
721 }
722 }
723 }
724 }
725
726 static uchar_t *
egmalloc(size_t n)727 egmalloc(size_t n)
728 {
729 uchar_t *x;
730
731 x = (uchar_t *)mymalloc(n);
732 mallocs[nmalloc++] = x;
733 if (nmalloc >= MAXMALLOCS)
734 nmalloc = MAXMALLOCS - 1;
735 return (x);
736 }
737
738 #ifdef DEBUG
739 void
ppr(Positionse * ps,char * p)740 ppr(Positionse *ps, char *p)
741 {
742 ID n;
743
744 if (ps->count < 1) {
745 (void) sprintf(p, "{}");
746 return;
747 }
748 *p++ = '{';
749 for (n = ps->last; n > 0; n = ps->base[n].id) {
750 (void) sprintf(p, "%d,", n);
751 p = strchr(p, 0);
752 }
753 p[-1] = '}';
754 }
755
756 void
epr(Expr * e,uchar_t * res)757 epr(Expr *e, uchar_t *res)
758 {
759 uchar_t r1[EPRINTSIZE], r2[EPRINTSIZE];
760 int i;
761
762 if (e == NULL) {
763 (void) sprintf(res, "!0!");
764 return;
765 }
766 switch (e->type) {
767 case Dot:
768 case Literal:
769 spr(e->flen, e->follow, r1);
770 (void) sprintf(res, "%c%s", e->lit, r1);
771 break;
772 case Charclass:
773 *res++ = '[';
774 for (i = 0; i < 256; i++)
775 if (CCL_CHK((uchar_t *)e->r, i)) {
776 *res++ = i;
777 }
778 *res++ = ']';
779 *res = '\0';
780 break;
781 case Cat:
782 epr(e->l, r1);
783 epr(e->r, r2);
784 (void) sprintf(res, "%s%s", r1, r2);
785 break;
786 case Alternate:
787 epr(e->l, r1);
788 epr(e->r, r2);
789 (void) sprintf(res, "(%s|%s)", r1, r2);
790 break;
791 case Star:
792 epr(e->l, r1);
793 (void) sprintf(res, "(%s)*", r1);
794 break;
795 case Plus:
796 epr(e->l, r1);
797 (void) sprintf(res, "(%s)+", r1);
798 break;
799 case Quest:
800 epr(e->l, r1);
801 (void) sprintf(res, "(%s)?", r1);
802 break;
803 case EOP:
804 epr(e->l, r1);
805 (void) sprintf(res, "%s<EOP>", r1);
806 break;
807 default:
808 (void) sprintf(res, "<undef type %d>", e->type);
809 err(res);
810 break;
811 }
812 }
813
814 static void
spr(int c,int * p,uchar_t * buf)815 spr(int c, int *p, uchar_t *buf)
816 {
817 if (c > 0) {
818 *buf++ = '{';
819 *buf = '\0';
820 while (--c > 0) {
821 (void) sprintf(buf, "%d,", *p++);
822 buf = strchr(buf, 0);
823 }
824 (void) sprintf(buf, "%d}", *p);
825 } else
826 (void) sprintf(buf, "{}");
827 }
828 #endif
829
830 static void
stateinit(re_re * r)831 stateinit(re_re *r)
832 {
833 /* CONSTANTCONDITION */
834 r->statelim = (sizeof (int) < 4 ? 32 : 128);
835 r->states = (State *)egmalloc(r->statelim * sizeof (State));
836
837 /* CONSTANTCONDITION */
838 r->nposalloc = (sizeof (int) < 4 ? 2048 : 8192);
839 r->posbase = (FID *)egmalloc(r->nposalloc * sizeof (FID));
840 r->nstates = r->posnext = 0;
841 }
842
843 static void
clrstates(re_re * r)844 clrstates(re_re *r)
845 {
846 r->nstates = 0; /* reclaim space for states and positions */
847 r->posnext = r->posreset;
848 (void) memset((char *)r->istate.tab, 0, sizeof (r->istate.tab));
849 }
850
851 static void
savestate(re_re * r)852 savestate(re_re *r)
853 {
854 r->posreset = r->posnext; /* save for reset */
855 }
856
857 static State *
startstate(re_re * r)858 startstate(re_re *r)
859 {
860 return (&r->istate);
861 }
862
863 static int
getstate(re_re * r,Positionset * ps)864 getstate(re_re *r, Positionset *ps)
865 {
866 if (r->nstates >= r->statelim ||
867 r->posnext + ps->count >= r->nposalloc) {
868 clrstates(r);
869 #ifdef DEBUG
870 printf("%d STATES FLUSHED\n", r->statelim);
871 #endif
872 }
873 return (r->nstates++);
874 }
875
876 static State *
stateof(re_re * r,Positionset * ps)877 stateof(re_re *r, Positionset *ps)
878 {
879 State *s;
880 int i;
881 FID *p, *e;
882
883 for (i = 0, s = r->states; i < r->nstates; i++, s++) {
884 if (s->npos == ps->count) {
885 for (p = s->pos+r->posbase, e = p+s->npos; p < e; p++)
886 if (ps->base[p->id].id == 0 ||
887 ps->base[p->id].fcount != p->fcount)
888 goto next;
889 return (s);
890 }
891 next:;
892 }
893 return (NULL);
894 }
895
896 static re_re *
egprep(PATTERN * pat)897 egprep(PATTERN *pat)
898 {
899 re_re *r;
900
901 r = (re_re *)egmalloc(sizeof (re_re));
902 (void) memset((char *)r, 0, sizeof (re_re));
903
904 pat->loc1 = pat->expression;
905 pat->loc2 = pat->expression + strlen((char *)pat->expression);
906
907 parno = 0;
908 maxid = 1;
909 r->cmap = pat->cmap;
910 lex(r, pat);
911 r->root = newexpr(EOP, '#', eall(r, pat), (Expr *)NULL);
912 r->maxid = maxid;
913
914 eginit(r);
915 return (r);
916 }
917
918 static Expr *
newexpr(Exprtype t,int lit,Expr * left,Expr * right)919 newexpr(Exprtype t, int lit, Expr *left, Expr *right)
920 {
921 Expr *e = (Expr *)egmalloc(sizeof (Expr));
922
923 e->type = t;
924 e->parent = NULL;
925 e->lit = lit;
926
927 if (e->lit) e->id = maxid++;
928 else e->id = 0;
929
930 if ((e->l = left) != NULL) {
931 left->parent = e;
932 }
933 if ((e->r = right) != NULL) {
934 right->parent = e;
935 }
936 e->follow = NULL;
937 e->flen = 0;
938 return (e);
939 }
940
941 static void
lex(re_re * r,PATTERN * pat)942 lex(re_re *r, PATTERN *pat)
943 {
944 if (pat->loc1 == pat->loc2) {
945 toktype = EOP;
946 toklit = -1;
947 } else switch (toklit = *pat->loc1++) {
948 case '.': toktype = Dot; break;
949 case '*': toktype = Star; break;
950 case '+': toktype = Plus; break;
951 case '?': toktype = Quest; break;
952 case '[': toktype = Charclass; break;
953 case '|': toktype = Alternate; break;
954 case '(': toktype = Lpar; break;
955 case ')': toktype = Rpar; break;
956 case '\\': toktype = Backslash;
957 if (pat->loc1 == pat->loc2) {
958 err("syntax error - missing character "
959 "after \\");
960 } else {
961 toklit = r->cmap[*pat->loc1++];
962 }
963 break;
964 case '^': case '$': toktype = Literal; toklit = NL; break;
965 default: toktype = Literal; toklit = r->cmap[toklit]; break;
966 }
967 }
968
969 static int
ccl(PATTERN * pat,uchar_t * tab)970 ccl(PATTERN *pat, uchar_t *tab)
971 {
972 int i;
973 int range = 0;
974 int lastc = -1;
975 int count = 0;
976 BOOL comp = NO;
977
978 (void) memset((char *)tab, 0, CCL_SIZ * sizeof (uchar_t));
979 if (*pat->loc1 == '^') {
980 pat->loc1++;
981 comp = YES;
982 }
983 if (*pat->loc1 == ']') {
984 uchar_t c = pat->cmap[*pat->loc1];
985 CCL_SET(tab, c);
986 lastc = *pat->loc1++;
987 }
988 /* scan for chars */
989 for (; (pat->loc1 < pat->loc2) && (*pat->loc1 != ']'); pat->loc1++) {
990 if (*pat->loc1 == '-') {
991 if (lastc < 0) CCL_SET(tab, pat->cmap['-']);
992 else range = 1;
993 continue;
994 }
995 if (range) {
996 for (i = *pat->loc1; i >= lastc; i--) {
997 CCL_SET(tab, pat->cmap[i]);
998 }
999 } else {
1000 uchar_t c = pat->cmap[*pat->loc1];
1001 CCL_SET(tab, c);
1002 }
1003
1004 range = 0;
1005
1006 lastc = *pat->loc1;
1007 }
1008 if (range) CCL_SET(tab, pat->cmap['-']);
1009
1010 if (pat->loc1 < pat->loc2) pat->loc1++;
1011 else err("syntax error - missing ]");
1012
1013 if (comp) {
1014 CCL_SET(tab, pat->cmap[NL]);
1015 for (i = 0; i < CCL_SIZ; i++) tab[i] ^= 0xff;
1016 }
1017 for (i = 0; i < 256; i++) {
1018 if (pat->cmap[i] != i) CCL_CLR(tab, i);
1019 if (CCL_CHK(tab, i)) {
1020 lastc = i;
1021 count++;
1022 }
1023 }
1024 if (count == 1)
1025 *tab = (char)lastc;
1026 return (count);
1027 }
1028
1029 /*
1030 * egrep patterns:
1031 *
1032 * Alternation: d0: d1 { '|' d1 }*
1033 * Concatenation: d1: d2 { d2 }*
1034 * Repetition: d2: d3 { '*' | '?' | '+' }
1035 * Literal: d3: lit | '.' | '[]' | '(' d0 ')'
1036 */
1037
1038 static Expr *
d3(re_re * r,PATTERN * pat)1039 d3(re_re *r, PATTERN *pat)
1040 {
1041 Expr *e;
1042 uchar_t *tab;
1043 int count;
1044
1045 switch (toktype) {
1046 case Backslash:
1047 case Literal:
1048 e = newexpr(Literal, toklit, (Expr *)NULL, (Expr *)NULL);
1049 lex(r, pat);
1050 break;
1051 case Dot:
1052 e = newexpr(Dot, '.', (Expr *)NULL, (Expr *)NULL);
1053 lex(r, pat);
1054 break;
1055 case Charclass:
1056 tab = egmalloc(CCL_SIZ * sizeof (uchar_t));
1057 count = ccl(pat, tab);
1058 if (count == 1) {
1059 toklit = *tab;
1060 e = newexpr(Literal, toklit, (Expr *)NULL,
1061 (Expr *)NULL);
1062 } else {
1063 e = newexpr(Charclass, '[', (Expr *)NULL, (Expr *)NULL);
1064 e->l = (Expr *)count; /* number of chars */
1065 e->r = (Expr *)tab; /* bitmap of chars */
1066 }
1067 lex(r, pat);
1068 break;
1069 case Lpar:
1070 lex(r, pat);
1071 count = ++parno;
1072 e = d0(r, pat);
1073 if (toktype == Rpar)
1074 lex(r, pat);
1075 else
1076 err("syntax error - missing )");
1077 return (e);
1078 default:
1079 err("syntax error");
1080 e = NULL;
1081 }
1082 return (e);
1083 }
1084
1085 static Expr *
d2(re_re * r,PATTERN * pat)1086 d2(re_re *r, PATTERN *pat)
1087 {
1088 Expr *e;
1089 Exprtype t;
1090
1091 e = d3(r, pat);
1092 while ((toktype == Star) || (toktype == Plus) || (toktype == Quest)) {
1093 t = toktype;
1094 lex(r, pat);
1095 e = newexpr(t, 0, e, (Expr *)NULL);
1096 }
1097 return (e);
1098 }
1099
1100 static Expr *
d1(re_re * r,PATTERN * pat)1101 d1(re_re *r, PATTERN *pat)
1102 {
1103 Expr *e, *f;
1104
1105 e = d2(r, pat);
1106 while ((toktype == Literal) || (toktype == Dot) || (toktype == Lpar) ||
1107 (toktype == Backslash) || (toktype == Charclass)) {
1108 f = d2(r, pat);
1109 e = newexpr(Cat, 0, e, f);
1110 }
1111 return (e);
1112 }
1113
1114 static Expr *
d0(re_re * r,PATTERN * pat)1115 d0(re_re *r, PATTERN *pat)
1116 {
1117 Expr *e, *f;
1118
1119 e = d1(r, pat);
1120 while (toktype == Alternate) {
1121 lex(r, pat);
1122 if (toktype == EOP)
1123 continue;
1124 f = d1(r, pat);
1125 e = newexpr(Alternate, 0, e, f);
1126 }
1127 return (e);
1128 }
1129
1130 static Expr *
eall(re_re * r,PATTERN * pat)1131 eall(re_re *r, PATTERN *pat)
1132 {
1133 Expr *e;
1134
1135 while (toktype == Alternate) /* bogus but user-friendly */
1136 lex(r, pat);
1137 e = d0(r, pat);
1138 while (toktype == Alternate) /* bogus but user-friendly */
1139 lex(r, pat);
1140 if (toktype != EOP)
1141 err("syntax error");
1142 return (e);
1143 }
1144
1145 static void
err(char * s)1146 err(char *s)
1147 {
1148 message = s;
1149 longjmp(env, 1);
1150 }
1151
1152 static int
re_lit(PATTERN * pat,uchar_t ** b,uchar_t ** e)1153 re_lit(PATTERN *pat, uchar_t **b, uchar_t **e)
1154 {
1155 bestlen = 0;
1156 pat->fullmatch = YES;
1157 START
1158 traverse(pat, pat->re_ptr->root->l);
1159 FINISH
1160 if (bestlen < 2)
1161 return (0);
1162 *b = egmalloc(bestlen * sizeof (uchar_t));
1163 (void) memmove(*b, best, bestlen);
1164 *e = *b + bestlen - 1;
1165 return (bestlen - 1);
1166 }
1167
1168 static void
traverse(PATTERN * pat,Expr * e)1169 traverse(PATTERN *pat, Expr *e)
1170 {
1171 switch (e->type) {
1172 case Literal:
1173 ADDL(e->lit)
1174 break;
1175 case Cat:
1176 traverse(pat, e->l);
1177 traverse(pat, e->r);
1178 break;
1179 case Plus:
1180 traverse(pat, e->l);
1181 FINISH /* can't go on past a + */
1182 pat->fullmatch = NO;
1183 START /* but we can start with one! */
1184 traverse(pat, e->l);
1185 break;
1186 default:
1187 FINISH
1188 pat->fullmatch = NO;
1189 START
1190 break;
1191 }
1192 }
1193
1194 static re_bm *
re_bmcomp(uchar_t * pb,uchar_t * pe,uchar_t * cmap)1195 re_bmcomp(uchar_t *pb, uchar_t *pe, uchar_t *cmap)
1196 {
1197 int j;
1198 int delta[256];
1199 re_bm *b;
1200
1201 b = (re_bm *)egmalloc(sizeof (*b));
1202
1203 b->patlen = pe - pb;
1204 b->cmap = cmap;
1205 b->bmpat = pb;
1206
1207 delta_2(b);
1208
1209 for (j = 0; j < 256; j++)
1210 delta[j] = b->patlen;
1211
1212 for (pe--; pb < pe; pb++)
1213 delta[b->cmap[*pb]] = pe - pb;
1214
1215 delta[b->cmap[*pb]] = LARGE;
1216
1217 for (j = 0; j < 256; j++)
1218 b->delta0[j] = delta[b->cmap[j]];
1219 return (b);
1220 }
1221
1222 static void
delta_2(re_bm * b)1223 delta_2(re_bm *b)
1224 {
1225 int m = b->patlen;
1226 int i, k, j;
1227
1228 b->delta2 = (int *)egmalloc(m * sizeof (int));
1229
1230 for (j = 0; j < m; j++) {
1231 k = 1;
1232 again:
1233 while (k <= j && b->bmpat[j-k] == b->bmpat[j]) k++;
1234
1235 for (i = j + 1; i < m; i++) {
1236 if (k <= i && b->bmpat[i-k] != b->bmpat[i]) {
1237 k++;
1238 goto again;
1239 }
1240 }
1241 b->delta2[j] = k + m - j - 1;
1242 }
1243 }
1244
1245 static BOOL
re_bmexec(PATTERN * pat,uchar_t * s,uchar_t * e,uchar_t ** mb,uchar_t ** me)1246 re_bmexec(PATTERN *pat, uchar_t *s, uchar_t *e, uchar_t **mb, uchar_t **me)
1247 {
1248 re_bm *b = pat->bm_ptr;
1249 uchar_t *sp;
1250 int k;
1251
1252 s += b->patlen - 1;
1253 while ((unsigned long)s < (unsigned long)e) {
1254 while ((unsigned long)(s += b->delta0[*s]) < (unsigned long)e)
1255 ;
1256 if ((unsigned long)s < (unsigned long)(e + b->patlen))
1257 return (NO); /* no match */
1258 s -= LARGE;
1259 for (k = b->patlen-2, sp = s-1; k >= 0; k--) {
1260 if (b->cmap[*sp--] != b->bmpat[k]) break;
1261 }
1262 if (k < 0) {
1263 *mb = ++sp;
1264 *me = s+1;
1265 if (grepmatch(pat, mb, me))
1266 return (YES);
1267 s = *mb;
1268 } else if (k < 0) {
1269 s++;
1270 } else {
1271 int j;
1272 j = b->delta2[k];
1273 k = b->delta0[*++sp];
1274 if ((j > k) || (k == (int)LARGE))
1275 k = j;
1276 s = sp + k;
1277 }
1278 }
1279 return (NO);
1280 }
1281
1282 static re_cw *
re_recw(re_re * r,uchar_t * map)1283 re_recw(re_re *r, uchar_t *map)
1284 {
1285 Expr *e, *root = r->root;
1286 re_cw *pat;
1287 uchar_t *buf;
1288
1289 if (root->type != EOP)
1290 return (NULL);
1291 e = root->l;
1292 pat = re_cwinit(map);
1293 buf = (uchar_t *)egmalloc(20000 * sizeof (uchar_t));
1294 if (!altlist(e, buf, pat)) {
1295 return (NULL);
1296 }
1297 re_cwcomp(pat);
1298 return (pat);
1299 }
1300
1301 static BOOL
altlist(Expr * e,uchar_t * buf,re_cw * pat)1302 altlist(Expr *e, uchar_t *buf, re_cw *pat)
1303 {
1304 if (e->type == Alternate)
1305 return ((BOOL)(altlist(e->l, buf, pat) &&
1306 altlist(e->r, buf, pat)));
1307 return (word(e, buf, pat));
1308 }
1309
1310 static BOOL
word(Expr * e,uchar_t * buf,re_cw * pat)1311 word(Expr *e, uchar_t *buf, re_cw *pat)
1312 {
1313 static uchar_t *p;
1314
1315 if (buf) p = buf;
1316
1317 if (e->type == Cat) {
1318 if (!word(e->l, (uchar_t *)NULL, pat))
1319 return (NO);
1320 if (!word(e->r, (uchar_t *)NULL, pat))
1321 return (NO);
1322 } else if (e->type == Literal)
1323 *p++ = e->lit;
1324 else
1325 return (NO);
1326
1327 if (buf)
1328 re_cwadd(pat, buf, p);
1329 return (YES);
1330 }
1331
1332 static re_cw *
re_cwinit(uchar_t * cmap)1333 re_cwinit(uchar_t *cmap)
1334 {
1335 re_cw *c;
1336
1337 froot = NULL;
1338 next_node = NULL;
1339 next_link = NULL;
1340 c = (re_cw *)egmalloc(sizeof (*c));
1341 c->nodeid = 0;
1342 c->maxdepth = 0;
1343 c->mindepth = 10000;
1344 c->root = newnode(c, 0);
1345 c->cmap = cmap;
1346 return (c);
1347 }
1348
1349 static void
re_cwadd(re_cw * c,uchar_t * s,uchar_t * e)1350 re_cwadd(re_cw *c, uchar_t *s, uchar_t *e)
1351 {
1352 Node *p, *state;
1353 Link *l;
1354 int depth;
1355
1356 state = c->root;
1357 while (s <= --e) {
1358 for (l = state->alts; l; l = l->next)
1359 if (l->lit == *e)
1360 break;
1361 if (l == NULL)
1362 break;
1363 else
1364 state = l->node;
1365 }
1366 if (s <= e) {
1367 depth = state->d+1;
1368 l = newlink(*e--, p = newnode(c, depth++));
1369 l->next = state->alts;
1370 state->alts = l;
1371 state = p;
1372 while (s <= e) {
1373 state->alts = newlink(*e--, p = newnode(c, depth++));
1374 state = p;
1375 }
1376 if (c->mindepth >= depth) c->mindepth = depth-1;
1377 }
1378 state->out = 1;
1379 }
1380
1381 #ifdef DEBUG
1382 static
pr(Node * n)1383 pr(Node *n)
1384 {
1385 Link *l;
1386
1387 printf("%d[%d,%d]: ", n->id, n->shift1, n->shift2);
1388 for (l = n->alts; l; l = l->next) {
1389 printf("edge from \"%d\" to \"%d\" label {\"%c\"};",
1390 n->id, l->node->id, l->lit);
1391 if (l->node->out) {
1392 printf(" draw \"%d\" as Doublecircle;", l->node->id);
1393 }
1394 if (l->node->fail) {
1395 printf(" edge from \"%d\" to \"%d\" dashed;",
1396 l->node->id, l->node->fail->id);
1397 }
1398 printf("\n");
1399 pr(l->node);
1400 }
1401 }
1402 #endif
1403
1404 static void
fail(Node * root)1405 fail(Node *root)
1406 {
1407 Link *qhead = NULL, *qtail = NULL;
1408 Link *l, *ll;
1409 Link *t;
1410 Node *state, *r, *s;
1411 int a;
1412
1413 for (l = root->alts; l; l = l->next) {
1414 ADD(l->node);
1415 l->node->fail = root;
1416 }
1417 while (qhead) {
1418 r = qhead->node;
1419 DEL();
1420 for (l = r->alts; l; l = l->next) {
1421 s = l->node;
1422 a = l->lit;
1423 ADD(s);
1424 state = r->fail;
1425 while (state) {
1426 for (ll = state->alts; ll; ll = ll->next)
1427 if (ll->lit == a)
1428 break;
1429 if (ll || (state == root)) {
1430 if (ll)
1431 state = ll->node;
1432 /*
1433 * do it here as only other exit is
1434 * state 0
1435 */
1436 if (state->out) {
1437 s->out = 1;
1438 }
1439 break;
1440 } else
1441 state = state->fail;
1442 }
1443 s->fail = state;
1444 }
1445 }
1446 zeroroot(root, root);
1447 }
1448
1449 static void
zeroroot(Node * root,Node * n)1450 zeroroot(Node *root, Node *n)
1451 {
1452 Link *l;
1453
1454 if (n->fail == root)
1455 n->fail = NULL;
1456 for (l = n->alts; l; l = l->next)
1457 zeroroot(root, l->node);
1458 }
1459
1460 static void
shift(re_cw * c)1461 shift(re_cw *c)
1462 {
1463 Link *qhead = NULL, *qtail = NULL;
1464 Link *l;
1465 Link *t;
1466 Node *state, *r, *s;
1467 int k;
1468
1469 for (k = 0; k < 256; k++)
1470 c->step[k] = c->mindepth+1;
1471 c->root->shift1 = 1;
1472 c->root->shift2 = c->mindepth;
1473 for (l = c->root->alts; l; l = l->next) {
1474 l->node->shift2 = c->root->shift2;
1475 ADD(l->node);
1476 l->node->fail = NULL;
1477 }
1478 while (qhead) {
1479 r = qhead->node;
1480 DEL();
1481 r->shift1 = c->mindepth;
1482 r->shift2 = c->mindepth;
1483 if ((state = r->fail) != NULL) {
1484 do {
1485 k = r->d - state->d;
1486 if (k < state->shift1) {
1487 state->shift1 = k;
1488 }
1489 if (r->out && (k < state->shift2)) {
1490 state->shift2 = k;
1491 }
1492 } while ((state = state->fail) != NULL);
1493 }
1494 for (l = r->alts; l; l = l->next) {
1495 s = l->node;
1496 ADD(s);
1497 }
1498 }
1499 shiftprop(c, c->root);
1500 shifttab(c->root);
1501 c->step[0] = 1;
1502 }
1503
1504 static void
shifttab(Node * n)1505 shifttab(Node *n)
1506 {
1507 Link *l;
1508 Node **nn;
1509
1510 n->tab = nn = (Node **)egmalloc(256 * sizeof (Node *));
1511 (void) memset((char *)n->tab, 0, 256 * sizeof (Node *));
1512 for (l = n->alts; l; l = l->next)
1513 nn[l->lit] = l->node;
1514 }
1515
1516 static void
shiftprop(re_cw * c,Node * n)1517 shiftprop(re_cw *c, Node *n)
1518 {
1519 Link *l;
1520 Node *nn;
1521
1522 for (l = n->alts; l; l = l->next) {
1523 if (c->step[l->lit] > l->node->d)
1524 c->step[l->lit] = l->node->d;
1525 nn = l->node;
1526 if (n->shift2 < nn->shift2)
1527 nn->shift2 = n->shift2;
1528 shiftprop(c, nn);
1529 }
1530 }
1531
1532 static void
re_cwcomp(re_cw * c)1533 re_cwcomp(re_cw *c)
1534 {
1535 fail(c->root);
1536 shift(c);
1537 }
1538
1539 static BOOL
re_cwexec(PATTERN * pat,uchar_t * rs,uchar_t * re,uchar_t ** mb,uchar_t ** me)1540 re_cwexec(PATTERN *pat, uchar_t *rs, uchar_t *re, uchar_t **mb, uchar_t **me)
1541 {
1542 Node *state;
1543 Link *l;
1544 uchar_t *sp;
1545 uchar_t *s;
1546 uchar_t *e;
1547 Node *ostate;
1548 int k;
1549 re_cw *c = pat->cw_ptr;
1550 uchar_t fake[1];
1551 uchar_t mappedsp;
1552
1553 fake[0] = 0;
1554 state = c->root;
1555
1556 s = rs+c->mindepth-1;
1557 e = re;
1558 while (s < e) {
1559 /* scan */
1560 for (sp = s; (ostate = state) != NULL; ) {
1561 mappedsp = c->cmap[*sp];
1562 if (state->tab) {
1563 if ((state = state->tab[mappedsp]) == NULL)
1564 goto nomatch;
1565 } else {
1566 for (l = state->alts; ; l = l->next) {
1567 if (l == NULL)
1568 goto nomatch;
1569 if (l->lit == mappedsp) {
1570 state = l->node;
1571 break;
1572 }
1573 }
1574 }
1575 if (state->out) {
1576 *mb = sp;
1577 *me = s+1;
1578 if (fixloc(mb, me))
1579 return (YES);
1580 }
1581 if (--sp < rs) {
1582 sp = fake;
1583 break;
1584 }
1585 }
1586 nomatch:
1587 k = c->step[c->cmap[*sp]] - ostate->d - 1;
1588 if (k < ostate->shift1)
1589 k = ostate->shift1;
1590 if (k > ostate->shift2)
1591 k = ostate->shift2;
1592 s += k;
1593 state = c->root;
1594 }
1595 return (NO);
1596 }
1597
1598 static Node *
newnode(re_cw * c,int d)1599 newnode(re_cw *c, int d)
1600 {
1601 static Node *lim = NULL;
1602 static int incr = 256;
1603
1604 if (!next_node) lim = NULL;
1605 if (next_node == lim) {
1606 next_node = (Node *)egmalloc(incr * sizeof (Node));
1607 lim = next_node + incr;
1608 }
1609 next_node->d = d;
1610 if (d > c->maxdepth) c->maxdepth = d;
1611 next_node->id = c->nodeid++;
1612 next_node->alts = NULL;
1613 next_node->tab = NULL;
1614 next_node->out = 0;
1615 return (next_node++);
1616 }
1617
1618 static Link *
newlink(uchar_t lit,Node * n)1619 newlink(uchar_t lit, Node *n)
1620 {
1621 static Link *lim = NULL;
1622 static int incr = 256;
1623
1624 if (!next_link) lim = NULL;
1625 if (next_link == lim) {
1626 next_link = (Link *)egmalloc(incr * sizeof (Link));
1627 lim = next_link + incr;
1628 }
1629 next_link->lit = lit;
1630 next_link->node = n;
1631 next_link->next = NULL;
1632 return (next_link++);
1633 }
1634
1635 int
egrep(char * f,FILE * o,char * fo)1636 egrep(char *f, FILE *o, char *fo)
1637 {
1638 uchar_t buff[MAXBUFSIZE + ESIZE];
1639
1640 buffer = buff;
1641 *buffer++ = NL; /* New line precedes buffer to prevent runover */
1642 file = f;
1643 output = o;
1644 format = fo;
1645 return (execute());
1646 }
1647
1648 static int
execute(void)1649 execute(void)
1650 {
1651 LINE current;
1652 BOOL matched;
1653 BOOL all_searched; /* file being matched until end */
1654
1655 if ((file_desc = open(file, O_RDONLY)) < 0) {
1656 return (-1);
1657 }
1658 init_file(¤t);
1659 /* while there is more get more text from the data stream */
1660 for (;;) {
1661 all_searched = NO;
1662
1663 /*
1664 * Find next new-line in buffer.
1665 * Begin after previous new line character
1666 */
1667
1668 current.prntbuf = current.newline + 1;
1669 current.ln++; /* increment line number */
1670
1671 if (current.prntbuf < bufend) {
1672 /* There is more text in buffer */
1673
1674 /*
1675 * Take our next
1676 * "line" as the entire remaining buffer.
1677 * However, if there is more of the file yet
1678 * to be read in, exclude any incomplete
1679 * line at end.
1680 */
1681 if (file_stat == NO_MORE) {
1682 all_searched = YES;
1683 current.newline = bufend - 1;
1684 if (*current.newline != NL) {
1685 current.newline = bufend;
1686 }
1687 } else {
1688 /*
1689 * Find end of the last
1690 * complete line in the buffer.
1691 */
1692 current.newline = bufend;
1693 while (*--current.newline != NL) {
1694 }
1695 if (current.newline < current.prntbuf) {
1696 /* end not found */
1697 current.newline = bufend;
1698 }
1699 }
1700 } else {
1701 /* There is no more text in the buffer. */
1702 current.newline = bufend;
1703 }
1704 if (current.newline >= bufend) {
1705 /*
1706 * There is no more text in the buffer,
1707 * or no new line was found.
1708 */
1709 switch (file_stat) {
1710 case MORE: /* file partly unread */
1711 case BEGIN:
1712 fgetfile(¤t);
1713
1714 current.ln--;
1715 continue; /* with while loop */
1716 case NO_MORE:
1717 break;
1718 }
1719 /* Nothing more to read in for this file. */
1720 if (current.newline <= current.prntbuf) {
1721 /* Nothing in the buffer, either */
1722 /* We are done with the file. */
1723 current.ln--;
1724 break; /* out of while loop */
1725 }
1726 /* There is no NL at the end of the file */
1727 }
1728
1729 matched = pattern_match(&match_pattern, ¤t);
1730 if (matched) {
1731 int nc;
1732
1733 get_ncount(¤t, match_pattern.loc1);
1734 get_line(¤t, match_pattern.loc2);
1735
1736 nc = current.newline + 1 - current.prntbuf;
1737 (void) fprintf(output, format, file, current.ln);
1738 (void) fwrite((char *)current.prntbuf, 1, nc, output);
1739 } else {
1740 if (all_searched)
1741 break; /* out of while loop */
1742
1743 get_ncount(¤t, current.newline + 1);
1744 }
1745 }
1746
1747 (void) close(file_desc);
1748 return (0);
1749 }
1750
1751 static void
init_file(LINE * cur_ptr)1752 init_file(LINE *cur_ptr)
1753 {
1754 file_stat = BEGIN;
1755 cur_ptr->ln = 0;
1756 bufend = buffer;
1757 cur_ptr->newline = buffer - 1;
1758 }
1759
1760 static BOOL
pattern_match(PATTERN * pat,LINE * lptr)1761 pattern_match(PATTERN *pat, LINE *lptr)
1762 {
1763 if ((*pat->procfn)(pat, lptr->prntbuf - 1, lptr->newline + 1,
1764 &pat->loc1, &pat->loc2)) {
1765 return (YES);
1766 } else {
1767 pat->loc1 = lptr->prntbuf;
1768 pat->loc2 = lptr->newline - 1;
1769 return (NO);
1770 }
1771 } /* end of function pattern_match() */
1772
1773 static void
fgetfile(LINE * cur_ptr)1774 fgetfile(LINE *cur_ptr)
1775 {
1776 /*
1777 * This function reads as much of the current file into the buffer
1778 * as will fit.
1779 */
1780 int bytes; /* bytes read in current buffer */
1781 int bufsize = MAXBUFSIZE; /* free space in data buffer */
1782 int save_current;
1783 uchar_t *begin = cur_ptr->prntbuf;
1784
1785 /*
1786 * Bytes of current incomplete line, if any, save_current in buffer.
1787 * These must be saved.
1788 */
1789 save_current = (int)(bufend - begin);
1790
1791 if (file_stat == MORE) {
1792 /*
1793 * A portion of the file fills the buffer. We must clear
1794 * out the dead wood to make room for more of the file.
1795 */
1796
1797 int k = 0;
1798
1799 k = begin - buffer;
1800 if (!k) {
1801 /*
1802 * We have one humungous current line,
1803 * which fills the whole buffer.
1804 * Toss it.
1805 */
1806 begin = bufend;
1807 k = begin - buffer;
1808 save_current = 0;
1809 }
1810
1811 begin = buffer;
1812 bufend = begin + save_current;
1813
1814 bufsize = MAXBUFSIZE - save_current;
1815
1816 if (save_current > 0) {
1817 /*
1818 * Must save portion of current line.
1819 * Copy to beginning of buffer.
1820 */
1821 (void) memmove(buffer, buffer + k, save_current);
1822 }
1823 }
1824
1825 /* Now read in the file. */
1826
1827 do {
1828 bytes = read(file_desc, (char *)bufend, (unsigned int)bufsize);
1829 if (bytes < 0) {
1830 /* can't read any more of file */
1831 bytes = 0;
1832 }
1833 bufend += bytes;
1834 bufsize -= bytes;
1835 } while (bytes > 0 && bufsize > 0);
1836
1837
1838 if (begin >= bufend) {
1839 /* No new lines or incomplete line in buffer */
1840 file_stat = NO_MORE;
1841 } else if (bufsize) {
1842 /* Still space in the buffer, so we have read entire file */
1843 file_stat = NO_MORE;
1844 } else {
1845 /* We filled entire buffer, so there may be more yet to read */
1846 file_stat = MORE;
1847 }
1848 /* Note: bufend is 1 past last good char */
1849 *bufend = NL; /* Sentinel new-line character */
1850 /* Set newline to character preceding the current line */
1851 cur_ptr->newline = begin - 1;
1852 }
1853
1854 static void
get_line(LINE * cur_ptr,uchar_t * s)1855 get_line(LINE *cur_ptr, uchar_t *s)
1856 {
1857 while (*s++ != NL) {
1858 }
1859 cur_ptr->newline = --s;
1860 cur_ptr->ln++;
1861 }
1862
1863 static void
get_ncount(LINE * cur_ptr,uchar_t * s)1864 get_ncount(LINE *cur_ptr, uchar_t *s)
1865 {
1866 uchar_t *p = cur_ptr->prntbuf;
1867
1868 while (*--s != NL) {
1869 }
1870 cur_ptr->newline = s++;
1871 while ((s > p) &&
1872 (p = (uchar_t *)memchr((char *)p, NL, s - p)) != NULL) {
1873 cur_ptr->ln++;
1874 ++p;
1875 }
1876 cur_ptr->ln--;
1877 cur_ptr->prntbuf = s;
1878 }
1879