1*7c478bd9Sstevel@tonic-gate /* 2*7c478bd9Sstevel@tonic-gate * CDDL HEADER START 3*7c478bd9Sstevel@tonic-gate * 4*7c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*7c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*7c478bd9Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*7c478bd9Sstevel@tonic-gate * with the License. 8*7c478bd9Sstevel@tonic-gate * 9*7c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*7c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*7c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 12*7c478bd9Sstevel@tonic-gate * and limitations under the License. 13*7c478bd9Sstevel@tonic-gate * 14*7c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*7c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*7c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*7c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*7c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*7c478bd9Sstevel@tonic-gate * 20*7c478bd9Sstevel@tonic-gate * CDDL HEADER END 21*7c478bd9Sstevel@tonic-gate */ 22*7c478bd9Sstevel@tonic-gate /* Copyright (c) 1990 AT&T */ 23*7c478bd9Sstevel@tonic-gate /* All Rights Reserved */ 24*7c478bd9Sstevel@tonic-gate 25*7c478bd9Sstevel@tonic-gate 26*7c478bd9Sstevel@tonic-gate /* 27*7c478bd9Sstevel@tonic-gate * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 28*7c478bd9Sstevel@tonic-gate * Use is subject to license terms. 29*7c478bd9Sstevel@tonic-gate */ 30*7c478bd9Sstevel@tonic-gate 31*7c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 32*7c478bd9Sstevel@tonic-gate 33*7c478bd9Sstevel@tonic-gate /* 34*7c478bd9Sstevel@tonic-gate * cscope - interactive C symbol or text cross-reference 35*7c478bd9Sstevel@tonic-gate * 36*7c478bd9Sstevel@tonic-gate * text searching functions 37*7c478bd9Sstevel@tonic-gate */ 38*7c478bd9Sstevel@tonic-gate 39*7c478bd9Sstevel@tonic-gate #include <fcntl.h> 40*7c478bd9Sstevel@tonic-gate #include <setjmp.h> 41*7c478bd9Sstevel@tonic-gate #include <stdio.h> 42*7c478bd9Sstevel@tonic-gate #include <string.h> 43*7c478bd9Sstevel@tonic-gate #include <memory.h> 44*7c478bd9Sstevel@tonic-gate #include <ctype.h> 45*7c478bd9Sstevel@tonic-gate #include <unistd.h> 46*7c478bd9Sstevel@tonic-gate #include "library.h" 47*7c478bd9Sstevel@tonic-gate 48*7c478bd9Sstevel@tonic-gate typedef enum { 49*7c478bd9Sstevel@tonic-gate NO = 0, 50*7c478bd9Sstevel@tonic-gate YES = 1 51*7c478bd9Sstevel@tonic-gate } BOOL; 52*7c478bd9Sstevel@tonic-gate 53*7c478bd9Sstevel@tonic-gate typedef struct re_bm { 54*7c478bd9Sstevel@tonic-gate int delta0[256]; 55*7c478bd9Sstevel@tonic-gate int *delta2; 56*7c478bd9Sstevel@tonic-gate uchar_t *cmap; 57*7c478bd9Sstevel@tonic-gate uchar_t *bmpat; 58*7c478bd9Sstevel@tonic-gate int patlen; 59*7c478bd9Sstevel@tonic-gate } re_bm; 60*7c478bd9Sstevel@tonic-gate 61*7c478bd9Sstevel@tonic-gate typedef struct Link { 62*7c478bd9Sstevel@tonic-gate uchar_t lit; 63*7c478bd9Sstevel@tonic-gate struct Node *node; 64*7c478bd9Sstevel@tonic-gate struct Link *next; 65*7c478bd9Sstevel@tonic-gate } Link; 66*7c478bd9Sstevel@tonic-gate 67*7c478bd9Sstevel@tonic-gate typedef struct Node { 68*7c478bd9Sstevel@tonic-gate short out; 69*7c478bd9Sstevel@tonic-gate short d; 70*7c478bd9Sstevel@tonic-gate short shift1; 71*7c478bd9Sstevel@tonic-gate short shift2; 72*7c478bd9Sstevel@tonic-gate long id; 73*7c478bd9Sstevel@tonic-gate struct Node **tab; 74*7c478bd9Sstevel@tonic-gate Link *alts; 75*7c478bd9Sstevel@tonic-gate struct Node *fail; 76*7c478bd9Sstevel@tonic-gate } Node; 77*7c478bd9Sstevel@tonic-gate 78*7c478bd9Sstevel@tonic-gate typedef struct re_cw { 79*7c478bd9Sstevel@tonic-gate int maxdepth, mindepth; 80*7c478bd9Sstevel@tonic-gate long nodeid; 81*7c478bd9Sstevel@tonic-gate int step[256]; 82*7c478bd9Sstevel@tonic-gate uchar_t *cmap; 83*7c478bd9Sstevel@tonic-gate struct Node *root; 84*7c478bd9Sstevel@tonic-gate } re_cw; 85*7c478bd9Sstevel@tonic-gate 86*7c478bd9Sstevel@tonic-gate typedef enum { 87*7c478bd9Sstevel@tonic-gate /* lit expression types */ 88*7c478bd9Sstevel@tonic-gate Literal, Dot, Charclass, EOP, 89*7c478bd9Sstevel@tonic-gate 90*7c478bd9Sstevel@tonic-gate /* non-lit expression types */ 91*7c478bd9Sstevel@tonic-gate Cat, Alternate, Star, Plus, Quest, 92*7c478bd9Sstevel@tonic-gate 93*7c478bd9Sstevel@tonic-gate /* not really expression types, just helping */ 94*7c478bd9Sstevel@tonic-gate Lpar, Rpar, Backslash 95*7c478bd9Sstevel@tonic-gate 96*7c478bd9Sstevel@tonic-gate } Exprtype; 97*7c478bd9Sstevel@tonic-gate 98*7c478bd9Sstevel@tonic-gate typedef int ID; 99*7c478bd9Sstevel@tonic-gate 100*7c478bd9Sstevel@tonic-gate typedef struct Expr { 101*7c478bd9Sstevel@tonic-gate Exprtype type; /* Type of expression (in grammar) */ 102*7c478bd9Sstevel@tonic-gate ID id; /* unique ID of lit expression */ 103*7c478bd9Sstevel@tonic-gate int lit; /* Literal character or tag */ 104*7c478bd9Sstevel@tonic-gate int flen; /* Number of following lit expressions */ 105*7c478bd9Sstevel@tonic-gate ID *follow; /* Array of IDs of following lit expressions */ 106*7c478bd9Sstevel@tonic-gate struct Expr *l; /* pointer to Left child (or ccl count) */ 107*7c478bd9Sstevel@tonic-gate struct Expr *r; /* pointer to Right child (or ccl mask) */ 108*7c478bd9Sstevel@tonic-gate struct Expr *parent; /* pointer to Parent */ 109*7c478bd9Sstevel@tonic-gate } Expr; 110*7c478bd9Sstevel@tonic-gate 111*7c478bd9Sstevel@tonic-gate typedef struct State { 112*7c478bd9Sstevel@tonic-gate struct State *tab[256]; 113*7c478bd9Sstevel@tonic-gate int cnt; /* number of matched chars on full match, -1 otherwise */ 114*7c478bd9Sstevel@tonic-gate int npos; /* number of IDs in position set for this state. */ 115*7c478bd9Sstevel@tonic-gate int pos; /* index into posbase for beginning of IDs */ 116*7c478bd9Sstevel@tonic-gate } State; 117*7c478bd9Sstevel@tonic-gate 118*7c478bd9Sstevel@tonic-gate typedef struct FID { 119*7c478bd9Sstevel@tonic-gate ID id; /* Lit Expression id */ 120*7c478bd9Sstevel@tonic-gate int fcount; /* Number of Lit exp matches before this one. */ 121*7c478bd9Sstevel@tonic-gate } FID; 122*7c478bd9Sstevel@tonic-gate 123*7c478bd9Sstevel@tonic-gate typedef struct Positionset { 124*7c478bd9Sstevel@tonic-gate int count; /* Number of lit exps in position set */ 125*7c478bd9Sstevel@tonic-gate ID last; /* ID of last lit exp in position set */ 126*7c478bd9Sstevel@tonic-gate FID *base; /* array of MAXID FIDS */ 127*7c478bd9Sstevel@tonic-gate /* 0 means not in position set */ 128*7c478bd9Sstevel@tonic-gate /* -1 means first in position set */ 129*7c478bd9Sstevel@tonic-gate /* n (>0) is ID of prev member of position set. */ 130*7c478bd9Sstevel@tonic-gate } Positionset; 131*7c478bd9Sstevel@tonic-gate 132*7c478bd9Sstevel@tonic-gate typedef struct re_re { 133*7c478bd9Sstevel@tonic-gate FID *posbase; /* Array of IDs from all states */ 134*7c478bd9Sstevel@tonic-gate int nposalloc; /* Allocated size of posbase */ 135*7c478bd9Sstevel@tonic-gate int posnext; /* Index into free space in posbase */ 136*7c478bd9Sstevel@tonic-gate int posreset; /* Index into end of IDs for initial state in posbase */ 137*7c478bd9Sstevel@tonic-gate int maxid; /* Number of (also maximum ID of) lit expressions */ 138*7c478bd9Sstevel@tonic-gate Expr *root; /* Pointer to root (EOP) expression */ 139*7c478bd9Sstevel@tonic-gate Expr **ptr; /* Pointer to array of ptrs to lit expressions. */ 140*7c478bd9Sstevel@tonic-gate uchar_t *cmap; /* Character mapping array */ 141*7c478bd9Sstevel@tonic-gate Positionset firstpos; 142*7c478bd9Sstevel@tonic-gate Positionset tmp; 143*7c478bd9Sstevel@tonic-gate int nstates; /* Number of current states defined */ 144*7c478bd9Sstevel@tonic-gate int statelim; /* Limit on number of states before flushing */ 145*7c478bd9Sstevel@tonic-gate State *states; /* Array of states */ 146*7c478bd9Sstevel@tonic-gate State istate; /* Initial state */ 147*7c478bd9Sstevel@tonic-gate } re_re; 148*7c478bd9Sstevel@tonic-gate 149*7c478bd9Sstevel@tonic-gate typedef struct { 150*7c478bd9Sstevel@tonic-gate uchar_t *cmap; 151*7c478bd9Sstevel@tonic-gate re_re *re_ptr; 152*7c478bd9Sstevel@tonic-gate re_bm *bm_ptr; 153*7c478bd9Sstevel@tonic-gate re_cw *cw_ptr; 154*7c478bd9Sstevel@tonic-gate BOOL fullmatch; 155*7c478bd9Sstevel@tonic-gate BOOL (*procfn)(); 156*7c478bd9Sstevel@tonic-gate BOOL (*succfn)(); 157*7c478bd9Sstevel@tonic-gate uchar_t *loc1; 158*7c478bd9Sstevel@tonic-gate uchar_t *loc2; 159*7c478bd9Sstevel@tonic-gate uchar_t *expression; 160*7c478bd9Sstevel@tonic-gate } PATTERN; 161*7c478bd9Sstevel@tonic-gate 162*7c478bd9Sstevel@tonic-gate typedef enum { 163*7c478bd9Sstevel@tonic-gate BEGIN, /* File is not yet in buffer at all */ 164*7c478bd9Sstevel@tonic-gate MORE, /* File is partly in buffer */ 165*7c478bd9Sstevel@tonic-gate NO_MORE /* File has been completely read into buffer */ 166*7c478bd9Sstevel@tonic-gate } FILE_STAT; 167*7c478bd9Sstevel@tonic-gate 168*7c478bd9Sstevel@tonic-gate typedef struct { 169*7c478bd9Sstevel@tonic-gate uchar_t *prntbuf; /* current line of input from data file */ 170*7c478bd9Sstevel@tonic-gate uchar_t *newline; /* end of line (real or sentinel \n) */ 171*7c478bd9Sstevel@tonic-gate long ln; /* line number */ 172*7c478bd9Sstevel@tonic-gate } LINE; 173*7c478bd9Sstevel@tonic-gate 174*7c478bd9Sstevel@tonic-gate #define NL '\n' 175*7c478bd9Sstevel@tonic-gate 176*7c478bd9Sstevel@tonic-gate #define CCL_SIZ 32 177*7c478bd9Sstevel@tonic-gate #define CCL_SET(a, c) ((a)[(c) >> 3] |= bittab[(c) & 07]) 178*7c478bd9Sstevel@tonic-gate #define CCL_CLR(a, c) ((a)[(c) >> 3] &= ~bittab[(c) & 07]) 179*7c478bd9Sstevel@tonic-gate #define CCL_CHK(a, c) ((a)[(c) >> 3] & bittab[(c) & 07]) 180*7c478bd9Sstevel@tonic-gate 181*7c478bd9Sstevel@tonic-gate #define ESIZE (BUFSIZ) 182*7c478bd9Sstevel@tonic-gate #define MAXBUFSIZE (64*BUFSIZ) 183*7c478bd9Sstevel@tonic-gate 184*7c478bd9Sstevel@tonic-gate #define MAXMALLOCS 1024 185*7c478bd9Sstevel@tonic-gate #define MAXLIT 256 /* is plenty big enough */ 186*7c478bd9Sstevel@tonic-gate #define LARGE MAXBUFSIZE+ESIZE+2 187*7c478bd9Sstevel@tonic-gate 188*7c478bd9Sstevel@tonic-gate #define CLEAR(r, rps) (void) memset((char *)(rps)->base, 0, \ 189*7c478bd9Sstevel@tonic-gate (int)((r)->maxid * sizeof (FID))), \ 190*7c478bd9Sstevel@tonic-gate (rps)->count = 0, (rps)->last = -1 191*7c478bd9Sstevel@tonic-gate #define SET(rps, n, cnt) { \ 192*7c478bd9Sstevel@tonic-gate if ((rps)->base[n].id == 0) {\ 193*7c478bd9Sstevel@tonic-gate (rps)->count++;\ 194*7c478bd9Sstevel@tonic-gate (rps)->base[n].fcount = (cnt);\ 195*7c478bd9Sstevel@tonic-gate (rps)->base[n].id = (rps)->last;\ 196*7c478bd9Sstevel@tonic-gate (rps)->last = (n);\ 197*7c478bd9Sstevel@tonic-gate } else if ((cnt) > (rps)->base[n].fcount) {\ 198*7c478bd9Sstevel@tonic-gate (rps)->base[n].fcount = (cnt);\ 199*7c478bd9Sstevel@tonic-gate }} 200*7c478bd9Sstevel@tonic-gate 201*7c478bd9Sstevel@tonic-gate #define START { _p = tmp; } 202*7c478bd9Sstevel@tonic-gate #define ADDL(c) { if (_p >= &tmp[MAXLIT]) _p--; *_p++ = c; } 203*7c478bd9Sstevel@tonic-gate #define FINISH { ADDL(0) if ((_p-tmp) > bestlen) \ 204*7c478bd9Sstevel@tonic-gate (void) memmove(best, tmp, bestlen = _p-tmp); } 205*7c478bd9Sstevel@tonic-gate 206*7c478bd9Sstevel@tonic-gate 207*7c478bd9Sstevel@tonic-gate #define NEW(N) (froot?(t = froot, froot = froot->next, t->next = NULL, \ 208*7c478bd9Sstevel@tonic-gate t->node = N, t): newlink(0, N)) 209*7c478bd9Sstevel@tonic-gate #define ADD(N) if (qtail) qtail = qtail->next = NEW(N); \ 210*7c478bd9Sstevel@tonic-gate else qtail = qhead = NEW(N) 211*7c478bd9Sstevel@tonic-gate #define DEL() { Link *_l = qhead; if ((qhead = qhead->next) == NULL) \ 212*7c478bd9Sstevel@tonic-gate qtail = NULL; _l->next = froot; froot = _l; } 213*7c478bd9Sstevel@tonic-gate 214*7c478bd9Sstevel@tonic-gate static uchar_t *buffer; 215*7c478bd9Sstevel@tonic-gate static uchar_t *bufend; 216*7c478bd9Sstevel@tonic-gate static FILE *output; 217*7c478bd9Sstevel@tonic-gate static char *format; 218*7c478bd9Sstevel@tonic-gate static char *file; 219*7c478bd9Sstevel@tonic-gate static int file_desc; 220*7c478bd9Sstevel@tonic-gate static FILE_STAT file_stat; 221*7c478bd9Sstevel@tonic-gate static PATTERN match_pattern; 222*7c478bd9Sstevel@tonic-gate static uchar_t char_map[2][256]; 223*7c478bd9Sstevel@tonic-gate static int iflag; 224*7c478bd9Sstevel@tonic-gate static Exprtype toktype; 225*7c478bd9Sstevel@tonic-gate static int toklit, parno, maxid; 226*7c478bd9Sstevel@tonic-gate static uchar_t tmp[MAXLIT], best[MAXLIT]; 227*7c478bd9Sstevel@tonic-gate static uchar_t *_p; 228*7c478bd9Sstevel@tonic-gate static int bestlen; 229*7c478bd9Sstevel@tonic-gate static Node *next_node; 230*7c478bd9Sstevel@tonic-gate static Link *froot, *next_link; 231*7c478bd9Sstevel@tonic-gate static jmp_buf env; 232*7c478bd9Sstevel@tonic-gate 233*7c478bd9Sstevel@tonic-gate static int nmalloc; 234*7c478bd9Sstevel@tonic-gate static uchar_t *mallocs[MAXMALLOCS]; 235*7c478bd9Sstevel@tonic-gate 236*7c478bd9Sstevel@tonic-gate static uchar_t bittab[] = { 1, 2, 4, 8, 16, 32, 64, 128 }; 237*7c478bd9Sstevel@tonic-gate 238*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 239*7c478bd9Sstevel@tonic-gate #define TRACE(n) (n < debug) 240*7c478bd9Sstevel@tonic-gate #define EPRINTSIZE 50000 241*7c478bd9Sstevel@tonic-gate static void spr(int c, int *p, uchar_t *buf); 242*7c478bd9Sstevel@tonic-gate void epr(Expr *e, uchar_t *res); 243*7c478bd9Sstevel@tonic-gate static int debug = 12; 244*7c478bd9Sstevel@tonic-gate #endif 245*7c478bd9Sstevel@tonic-gate 246*7c478bd9Sstevel@tonic-gate static void init_file(LINE *cur_ptr); 247*7c478bd9Sstevel@tonic-gate static void get_line(LINE *cur_ptr, uchar_t *s); 248*7c478bd9Sstevel@tonic-gate static void get_ncount(LINE *cur_ptr, uchar_t *s); 249*7c478bd9Sstevel@tonic-gate static int execute(void); 250*7c478bd9Sstevel@tonic-gate static State *startstate(re_re *r); 251*7c478bd9Sstevel@tonic-gate static State *stateof(re_re *r, Positionset *ps); 252*7c478bd9Sstevel@tonic-gate static State *nextstate(re_re *r, State *s, int a); 253*7c478bd9Sstevel@tonic-gate static State *addstate(re_re *r, Positionset *ps, int cnt); 254*7c478bd9Sstevel@tonic-gate static BOOL match(Expr *e, int a); 255*7c478bd9Sstevel@tonic-gate static BOOL first_lit(Positionset *fpos, Expr *e); 256*7c478bd9Sstevel@tonic-gate static void eptr(re_re *r, Expr *e); 257*7c478bd9Sstevel@tonic-gate static void efollow(re_re *r, Positionset *fpos, Expr *e); 258*7c478bd9Sstevel@tonic-gate static void follow(Positionset *fpos, Expr *e); 259*7c478bd9Sstevel@tonic-gate static void followstate(re_re *r, State *s, int a, Positionset *fpos); 260*7c478bd9Sstevel@tonic-gate static Expr *eall(re_re *r, PATTERN *pat); 261*7c478bd9Sstevel@tonic-gate static Expr *d0(re_re *r, PATTERN *pat); 262*7c478bd9Sstevel@tonic-gate static Expr *d1(re_re *r, PATTERN *pat); 263*7c478bd9Sstevel@tonic-gate static Expr *d2(re_re *r, PATTERN *pat); 264*7c478bd9Sstevel@tonic-gate static Expr *d3(re_re *r, PATTERN *pat); 265*7c478bd9Sstevel@tonic-gate static Expr *newexpr(Exprtype t, int lit, Expr *left, Expr *right); 266*7c478bd9Sstevel@tonic-gate static void lex(re_re *r, PATTERN *pat); 267*7c478bd9Sstevel@tonic-gate static int re_lit(PATTERN *pat, uchar_t **b, uchar_t **e); 268*7c478bd9Sstevel@tonic-gate static void traverse(PATTERN *pat, Expr *e); 269*7c478bd9Sstevel@tonic-gate static int ccl(PATTERN *pat, uchar_t *tab); 270*7c478bd9Sstevel@tonic-gate static BOOL altlist(), word(); 271*7c478bd9Sstevel@tonic-gate static BOOL altlist(Expr *e, uchar_t *buf, re_cw *pat); 272*7c478bd9Sstevel@tonic-gate static Node *newnode(re_cw *c, int d); 273*7c478bd9Sstevel@tonic-gate static Link *newlink(uchar_t lit, Node *n); 274*7c478bd9Sstevel@tonic-gate static void fail(Node *root); 275*7c478bd9Sstevel@tonic-gate static void zeroroot(Node *root, Node *n); 276*7c478bd9Sstevel@tonic-gate static void shift(re_cw *c); 277*7c478bd9Sstevel@tonic-gate static void shifttab(Node *n); 278*7c478bd9Sstevel@tonic-gate static void shiftprop(re_cw *c, Node *n); 279*7c478bd9Sstevel@tonic-gate static void delta_2(re_bm *b); 280*7c478bd9Sstevel@tonic-gate static int getstate(re_re *r, Positionset *ps); 281*7c478bd9Sstevel@tonic-gate static void savestate(re_re *r); 282*7c478bd9Sstevel@tonic-gate static void stateinit(re_re *r); 283*7c478bd9Sstevel@tonic-gate static re_bm *re_bmcomp(uchar_t *pb, uchar_t *pe, uchar_t *cmap); 284*7c478bd9Sstevel@tonic-gate static re_cw *re_cwinit(uchar_t *cmap); 285*7c478bd9Sstevel@tonic-gate static re_cw *re_recw(re_re *r, uchar_t *map); 286*7c478bd9Sstevel@tonic-gate static re_re *egprep(PATTERN *pat); 287*7c478bd9Sstevel@tonic-gate static void re_cwadd(re_cw *c, uchar_t *s, uchar_t *e); 288*7c478bd9Sstevel@tonic-gate static void re_cwcomp(re_cw *c); 289*7c478bd9Sstevel@tonic-gate static void eginit(re_re *r); 290*7c478bd9Sstevel@tonic-gate static BOOL re_bmexec(PATTERN *pat, uchar_t *s, uchar_t *e, uchar_t **mb, 291*7c478bd9Sstevel@tonic-gate uchar_t **me); 292*7c478bd9Sstevel@tonic-gate static BOOL re_cwexec(PATTERN *pat, uchar_t *rs, uchar_t *re, uchar_t **mb, 293*7c478bd9Sstevel@tonic-gate uchar_t **me); 294*7c478bd9Sstevel@tonic-gate static BOOL re_reexec(PATTERN *pat, uchar_t *b, uchar_t *e, uchar_t **mb, 295*7c478bd9Sstevel@tonic-gate uchar_t **me); 296*7c478bd9Sstevel@tonic-gate static uchar_t *egmalloc(size_t n); 297*7c478bd9Sstevel@tonic-gate static void fgetfile(LINE *cur_ptr); 298*7c478bd9Sstevel@tonic-gate static void dogre(PATTERN *pat); 299*7c478bd9Sstevel@tonic-gate static BOOL pattern_match(PATTERN *pat, LINE *lptr); 300*7c478bd9Sstevel@tonic-gate static BOOL fixloc(uchar_t **mb, uchar_t **me); 301*7c478bd9Sstevel@tonic-gate static BOOL grepmatch(PATTERN *pat, uchar_t **mb, uchar_t **me); 302*7c478bd9Sstevel@tonic-gate static void err(char *s); 303*7c478bd9Sstevel@tonic-gate 304*7c478bd9Sstevel@tonic-gate static char *message; 305*7c478bd9Sstevel@tonic-gate 306*7c478bd9Sstevel@tonic-gate void 307*7c478bd9Sstevel@tonic-gate egrepcaseless(int i) 308*7c478bd9Sstevel@tonic-gate { 309*7c478bd9Sstevel@tonic-gate iflag = i; /* simulate "egrep -i" */ 310*7c478bd9Sstevel@tonic-gate } 311*7c478bd9Sstevel@tonic-gate 312*7c478bd9Sstevel@tonic-gate char * 313*7c478bd9Sstevel@tonic-gate egrepinit(char *expression) 314*7c478bd9Sstevel@tonic-gate { 315*7c478bd9Sstevel@tonic-gate static int firsttime = 1; 316*7c478bd9Sstevel@tonic-gate int i; 317*7c478bd9Sstevel@tonic-gate 318*7c478bd9Sstevel@tonic-gate if (firsttime) { 319*7c478bd9Sstevel@tonic-gate firsttime = 0; 320*7c478bd9Sstevel@tonic-gate for (i = 0; i < 256; i++) { 321*7c478bd9Sstevel@tonic-gate char_map[0][i] = (uchar_t)i; 322*7c478bd9Sstevel@tonic-gate char_map[1][i] = tolower(i); 323*7c478bd9Sstevel@tonic-gate } 324*7c478bd9Sstevel@tonic-gate } 325*7c478bd9Sstevel@tonic-gate for (i = 0; i < nmalloc; i ++) 326*7c478bd9Sstevel@tonic-gate free(mallocs[i]); 327*7c478bd9Sstevel@tonic-gate nmalloc = 0; 328*7c478bd9Sstevel@tonic-gate message = NULL; 329*7c478bd9Sstevel@tonic-gate 330*7c478bd9Sstevel@tonic-gate match_pattern.expression = (uchar_t *)expression; 331*7c478bd9Sstevel@tonic-gate match_pattern.cmap = char_map[iflag]; 332*7c478bd9Sstevel@tonic-gate if (setjmp(env) == 0) { 333*7c478bd9Sstevel@tonic-gate dogre(&match_pattern); 334*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 335*7c478bd9Sstevel@tonic-gate { 336*7c478bd9Sstevel@tonic-gate PATTERN *p = match_pattern; 337*7c478bd9Sstevel@tonic-gate if (p->procfn == re_bmexec) 338*7c478bd9Sstevel@tonic-gate if (!p->fullmatch) 339*7c478bd9Sstevel@tonic-gate if (p->succfn == re_reexec) 340*7c478bd9Sstevel@tonic-gate printf("PARTIAL BOYER_MOORE\n"); 341*7c478bd9Sstevel@tonic-gate else 342*7c478bd9Sstevel@tonic-gate printf("PARTIAL B_M with GREP\n"); 343*7c478bd9Sstevel@tonic-gate else 344*7c478bd9Sstevel@tonic-gate printf("FULL BOYER_MOORE\n"); 345*7c478bd9Sstevel@tonic-gate else if (p->procfn == re_cwexec) 346*7c478bd9Sstevel@tonic-gate printf("C_W\n"); 347*7c478bd9Sstevel@tonic-gate else 348*7c478bd9Sstevel@tonic-gate printf("GENERAL\n"); 349*7c478bd9Sstevel@tonic-gate } 350*7c478bd9Sstevel@tonic-gate #endif 351*7c478bd9Sstevel@tonic-gate } 352*7c478bd9Sstevel@tonic-gate return (message); 353*7c478bd9Sstevel@tonic-gate } 354*7c478bd9Sstevel@tonic-gate 355*7c478bd9Sstevel@tonic-gate static void 356*7c478bd9Sstevel@tonic-gate dogre(PATTERN *pat) 357*7c478bd9Sstevel@tonic-gate { 358*7c478bd9Sstevel@tonic-gate uchar_t *lb, *le; 359*7c478bd9Sstevel@tonic-gate 360*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 361*7c478bd9Sstevel@tonic-gate printf("PATTERN %s\n", pat->expression); 362*7c478bd9Sstevel@tonic-gate #endif 363*7c478bd9Sstevel@tonic-gate pat->re_ptr = egprep(pat); 364*7c478bd9Sstevel@tonic-gate bestlen = re_lit(pat, &lb, &le); 365*7c478bd9Sstevel@tonic-gate 366*7c478bd9Sstevel@tonic-gate if (bestlen && pat->fullmatch) { /* Full Boyer Moore */ 367*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 368*7c478bd9Sstevel@tonic-gate printf("BESTLEN %d\n", bestlen); 369*7c478bd9Sstevel@tonic-gate { 370*7c478bd9Sstevel@tonic-gate uchar_t *p; 371*7c478bd9Sstevel@tonic-gate for (p = lb; p < le; p++) printf("%c", *p); 372*7c478bd9Sstevel@tonic-gate printf("\n"); 373*7c478bd9Sstevel@tonic-gate } 374*7c478bd9Sstevel@tonic-gate #endif 375*7c478bd9Sstevel@tonic-gate pat->bm_ptr = re_bmcomp(lb, le, pat->cmap); 376*7c478bd9Sstevel@tonic-gate pat->procfn = re_bmexec; 377*7c478bd9Sstevel@tonic-gate return; 378*7c478bd9Sstevel@tonic-gate } 379*7c478bd9Sstevel@tonic-gate if (bestlen > 1) { 380*7c478bd9Sstevel@tonic-gate /* Partial Boyer Moore */ 381*7c478bd9Sstevel@tonic-gate pat->bm_ptr = re_bmcomp(lb, le, pat->cmap); 382*7c478bd9Sstevel@tonic-gate pat->procfn = re_bmexec; 383*7c478bd9Sstevel@tonic-gate pat->fullmatch = NO; 384*7c478bd9Sstevel@tonic-gate } else { 385*7c478bd9Sstevel@tonic-gate pat->fullmatch = YES; 386*7c478bd9Sstevel@tonic-gate if ((pat->cw_ptr = re_recw(pat->re_ptr, pat->cmap)) != NULL) { 387*7c478bd9Sstevel@tonic-gate pat->procfn = re_cwexec; /* CW */ 388*7c478bd9Sstevel@tonic-gate return; 389*7c478bd9Sstevel@tonic-gate } 390*7c478bd9Sstevel@tonic-gate } 391*7c478bd9Sstevel@tonic-gate /* general egrep regular expression */ 392*7c478bd9Sstevel@tonic-gate pat->succfn = re_reexec; 393*7c478bd9Sstevel@tonic-gate 394*7c478bd9Sstevel@tonic-gate if (pat->fullmatch) { 395*7c478bd9Sstevel@tonic-gate pat->procfn = pat->succfn; 396*7c478bd9Sstevel@tonic-gate pat->succfn = NULL; 397*7c478bd9Sstevel@tonic-gate } 398*7c478bd9Sstevel@tonic-gate } 399*7c478bd9Sstevel@tonic-gate 400*7c478bd9Sstevel@tonic-gate static BOOL 401*7c478bd9Sstevel@tonic-gate fixloc(uchar_t **mb, uchar_t **me) 402*7c478bd9Sstevel@tonic-gate { 403*7c478bd9Sstevel@tonic-gate /* Handle match to null string */ 404*7c478bd9Sstevel@tonic-gate 405*7c478bd9Sstevel@tonic-gate while (*me <= *mb) 406*7c478bd9Sstevel@tonic-gate (*me)++; 407*7c478bd9Sstevel@tonic-gate 408*7c478bd9Sstevel@tonic-gate if (*(*me - 1) != NL) 409*7c478bd9Sstevel@tonic-gate while (**me != NL) 410*7c478bd9Sstevel@tonic-gate (*me)++; 411*7c478bd9Sstevel@tonic-gate 412*7c478bd9Sstevel@tonic-gate /* Handle match to new-line only */ 413*7c478bd9Sstevel@tonic-gate 414*7c478bd9Sstevel@tonic-gate if (*mb == *me - 1 && **mb == NL) { 415*7c478bd9Sstevel@tonic-gate (*me)++; 416*7c478bd9Sstevel@tonic-gate } 417*7c478bd9Sstevel@tonic-gate 418*7c478bd9Sstevel@tonic-gate /* Handle match including beginning or ending new-line */ 419*7c478bd9Sstevel@tonic-gate 420*7c478bd9Sstevel@tonic-gate if (**mb == NL) 421*7c478bd9Sstevel@tonic-gate (*mb)++; 422*7c478bd9Sstevel@tonic-gate if (*(*me - 1) == NL) 423*7c478bd9Sstevel@tonic-gate (*me)--; 424*7c478bd9Sstevel@tonic-gate return (YES); 425*7c478bd9Sstevel@tonic-gate } 426*7c478bd9Sstevel@tonic-gate 427*7c478bd9Sstevel@tonic-gate static BOOL 428*7c478bd9Sstevel@tonic-gate grepmatch(PATTERN *pat, uchar_t **mb, uchar_t **me) 429*7c478bd9Sstevel@tonic-gate { 430*7c478bd9Sstevel@tonic-gate uchar_t *s, *f; 431*7c478bd9Sstevel@tonic-gate 432*7c478bd9Sstevel@tonic-gate if (pat->fullmatch) 433*7c478bd9Sstevel@tonic-gate return (fixloc(mb, me)); 434*7c478bd9Sstevel@tonic-gate 435*7c478bd9Sstevel@tonic-gate for (f = *me - 1; *f != NL; f++) { 436*7c478bd9Sstevel@tonic-gate } 437*7c478bd9Sstevel@tonic-gate f++; 438*7c478bd9Sstevel@tonic-gate for (s = *mb; *s != NL; s--) { 439*7c478bd9Sstevel@tonic-gate } 440*7c478bd9Sstevel@tonic-gate 441*7c478bd9Sstevel@tonic-gate if ((*pat->succfn)(pat, s, f, mb, me)) { 442*7c478bd9Sstevel@tonic-gate return (YES); 443*7c478bd9Sstevel@tonic-gate } else { 444*7c478bd9Sstevel@tonic-gate *mb = f; 445*7c478bd9Sstevel@tonic-gate return (NO); 446*7c478bd9Sstevel@tonic-gate } 447*7c478bd9Sstevel@tonic-gate } 448*7c478bd9Sstevel@tonic-gate 449*7c478bd9Sstevel@tonic-gate static void 450*7c478bd9Sstevel@tonic-gate eginit(re_re *r) 451*7c478bd9Sstevel@tonic-gate { 452*7c478bd9Sstevel@tonic-gate unsigned int n; 453*7c478bd9Sstevel@tonic-gate 454*7c478bd9Sstevel@tonic-gate r->ptr = (Expr **)egmalloc(r->maxid * sizeof (Expr *)); 455*7c478bd9Sstevel@tonic-gate eptr(r, r->root); 456*7c478bd9Sstevel@tonic-gate n = r->maxid * sizeof (FID); 457*7c478bd9Sstevel@tonic-gate r->firstpos.base = (FID *)egmalloc(n); 458*7c478bd9Sstevel@tonic-gate r->tmp.base = (FID *)egmalloc(n); 459*7c478bd9Sstevel@tonic-gate CLEAR(r, &r->firstpos); 460*7c478bd9Sstevel@tonic-gate if (!first_lit(&r->firstpos, r->root->l)) { 461*7c478bd9Sstevel@tonic-gate /* 462*7c478bd9Sstevel@tonic-gate * This expression matches the null string!!!! 463*7c478bd9Sstevel@tonic-gate * Add EOP to beginning position set. 464*7c478bd9Sstevel@tonic-gate */ 465*7c478bd9Sstevel@tonic-gate SET(&r->firstpos, r->root->id, 0) 466*7c478bd9Sstevel@tonic-gate /* (void) printf("first of root->l == 0, b=%s\n", b); */ 467*7c478bd9Sstevel@tonic-gate } 468*7c478bd9Sstevel@tonic-gate stateinit(r); 469*7c478bd9Sstevel@tonic-gate (void) addstate(r, &r->firstpos, 0); 470*7c478bd9Sstevel@tonic-gate savestate(r); 471*7c478bd9Sstevel@tonic-gate } 472*7c478bd9Sstevel@tonic-gate 473*7c478bd9Sstevel@tonic-gate static void 474*7c478bd9Sstevel@tonic-gate eptr(re_re *r, Expr *e) 475*7c478bd9Sstevel@tonic-gate { 476*7c478bd9Sstevel@tonic-gate if ((e->id < 0) || (e->id >= r->maxid)) { 477*7c478bd9Sstevel@tonic-gate err("internal error"); 478*7c478bd9Sstevel@tonic-gate } 479*7c478bd9Sstevel@tonic-gate r->ptr[e->id] = e; 480*7c478bd9Sstevel@tonic-gate if (e->type != Charclass) { 481*7c478bd9Sstevel@tonic-gate if (e->l) eptr(r, e->l); 482*7c478bd9Sstevel@tonic-gate if (e->r) eptr(r, e->r); 483*7c478bd9Sstevel@tonic-gate } 484*7c478bd9Sstevel@tonic-gate } 485*7c478bd9Sstevel@tonic-gate 486*7c478bd9Sstevel@tonic-gate static BOOL 487*7c478bd9Sstevel@tonic-gate re_reexec(PATTERN *pat, uchar_t *b, uchar_t *e, uchar_t **mb, uchar_t **me) 488*7c478bd9Sstevel@tonic-gate { 489*7c478bd9Sstevel@tonic-gate re_re *r = pat->re_ptr; 490*7c478bd9Sstevel@tonic-gate State *s, *t; 491*7c478bd9Sstevel@tonic-gate 492*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 493*7c478bd9Sstevel@tonic-gate if (TRACE(10)) { 494*7c478bd9Sstevel@tonic-gate uchar_t buf[EPRINTSIZE]; 495*7c478bd9Sstevel@tonic-gate epr(r->root, buf); 496*7c478bd9Sstevel@tonic-gate (void) printf("expr='%s'\n", buf); 497*7c478bd9Sstevel@tonic-gate } 498*7c478bd9Sstevel@tonic-gate #endif 499*7c478bd9Sstevel@tonic-gate s = startstate(r); 500*7c478bd9Sstevel@tonic-gate 501*7c478bd9Sstevel@tonic-gate for (;;) { 502*7c478bd9Sstevel@tonic-gate uchar_t c; 503*7c478bd9Sstevel@tonic-gate 504*7c478bd9Sstevel@tonic-gate if (s->cnt >= 0) { 505*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 506*7c478bd9Sstevel@tonic-gate if (TRACE(6)) 507*7c478bd9Sstevel@tonic-gate (void) printf("match at input '%s'\n", b); 508*7c478bd9Sstevel@tonic-gate #endif 509*7c478bd9Sstevel@tonic-gate *mb = b - s->cnt; 510*7c478bd9Sstevel@tonic-gate *me = b; 511*7c478bd9Sstevel@tonic-gate if (fixloc(mb, me)) 512*7c478bd9Sstevel@tonic-gate return (YES); 513*7c478bd9Sstevel@tonic-gate } 514*7c478bd9Sstevel@tonic-gate 515*7c478bd9Sstevel@tonic-gate if (b >= e) break; 516*7c478bd9Sstevel@tonic-gate c = pat->cmap[*b]; 517*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 518*7c478bd9Sstevel@tonic-gate if (TRACE(4)) 519*7c478bd9Sstevel@tonic-gate (void) printf("state %d: char '%c'\n", s-r->states, *b); 520*7c478bd9Sstevel@tonic-gate #endif 521*7c478bd9Sstevel@tonic-gate if ((t = s->tab[c]) != NULL) s = t; 522*7c478bd9Sstevel@tonic-gate else s = nextstate(r, s, (int)c); 523*7c478bd9Sstevel@tonic-gate b++; 524*7c478bd9Sstevel@tonic-gate } 525*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 526*7c478bd9Sstevel@tonic-gate if (TRACE(3)) { 527*7c478bd9Sstevel@tonic-gate uchar_t buf[EPRINTSIZE]; 528*7c478bd9Sstevel@tonic-gate 529*7c478bd9Sstevel@tonic-gate epr(r->root, buf); 530*7c478bd9Sstevel@tonic-gate (void) printf("pat = %s\n", buf); 531*7c478bd9Sstevel@tonic-gate } 532*7c478bd9Sstevel@tonic-gate #endif 533*7c478bd9Sstevel@tonic-gate return (NO); 534*7c478bd9Sstevel@tonic-gate } 535*7c478bd9Sstevel@tonic-gate 536*7c478bd9Sstevel@tonic-gate static BOOL 537*7c478bd9Sstevel@tonic-gate match(Expr *e, int a) 538*7c478bd9Sstevel@tonic-gate { 539*7c478bd9Sstevel@tonic-gate switch (e->type) { 540*7c478bd9Sstevel@tonic-gate case Dot: return ((BOOL)(a != NL)); 541*7c478bd9Sstevel@tonic-gate case Literal: return ((BOOL)(a == e->lit)); 542*7c478bd9Sstevel@tonic-gate case Charclass: return ((BOOL)(CCL_CHK((uchar_t *)e->r, a))); 543*7c478bd9Sstevel@tonic-gate default: return (NO); 544*7c478bd9Sstevel@tonic-gate } 545*7c478bd9Sstevel@tonic-gate } 546*7c478bd9Sstevel@tonic-gate 547*7c478bd9Sstevel@tonic-gate /* 548*7c478bd9Sstevel@tonic-gate * generates the followset for a node in fpos 549*7c478bd9Sstevel@tonic-gate */ 550*7c478bd9Sstevel@tonic-gate static void 551*7c478bd9Sstevel@tonic-gate follow(Positionset *fpos, Expr *e) 552*7c478bd9Sstevel@tonic-gate { 553*7c478bd9Sstevel@tonic-gate Expr *p; 554*7c478bd9Sstevel@tonic-gate 555*7c478bd9Sstevel@tonic-gate if (e->type == EOP) 556*7c478bd9Sstevel@tonic-gate return; 557*7c478bd9Sstevel@tonic-gate else 558*7c478bd9Sstevel@tonic-gate p = e->parent; 559*7c478bd9Sstevel@tonic-gate switch (p->type) { 560*7c478bd9Sstevel@tonic-gate case EOP: 561*7c478bd9Sstevel@tonic-gate SET(fpos, p->id, 0) 562*7c478bd9Sstevel@tonic-gate break; 563*7c478bd9Sstevel@tonic-gate case Plus: 564*7c478bd9Sstevel@tonic-gate case Star: 565*7c478bd9Sstevel@tonic-gate (void) first_lit(fpos, e); 566*7c478bd9Sstevel@tonic-gate follow(fpos, p); 567*7c478bd9Sstevel@tonic-gate break; 568*7c478bd9Sstevel@tonic-gate case Quest: 569*7c478bd9Sstevel@tonic-gate case Alternate: 570*7c478bd9Sstevel@tonic-gate follow(fpos, p); 571*7c478bd9Sstevel@tonic-gate break; 572*7c478bd9Sstevel@tonic-gate case Cat: 573*7c478bd9Sstevel@tonic-gate if (e == p->r || !first_lit(fpos, p->r)) 574*7c478bd9Sstevel@tonic-gate follow(fpos, p); 575*7c478bd9Sstevel@tonic-gate break; 576*7c478bd9Sstevel@tonic-gate default: 577*7c478bd9Sstevel@tonic-gate break; 578*7c478bd9Sstevel@tonic-gate } 579*7c478bd9Sstevel@tonic-gate } 580*7c478bd9Sstevel@tonic-gate 581*7c478bd9Sstevel@tonic-gate /* 582*7c478bd9Sstevel@tonic-gate * first_lit returns NO if e is nullable and in the process, 583*7c478bd9Sstevel@tonic-gate * ets up fpos. 584*7c478bd9Sstevel@tonic-gate */ 585*7c478bd9Sstevel@tonic-gate static BOOL 586*7c478bd9Sstevel@tonic-gate first_lit(Positionset *fpos, Expr *e) 587*7c478bd9Sstevel@tonic-gate { 588*7c478bd9Sstevel@tonic-gate BOOL k; 589*7c478bd9Sstevel@tonic-gate 590*7c478bd9Sstevel@tonic-gate switch (e->type) { 591*7c478bd9Sstevel@tonic-gate case Literal: 592*7c478bd9Sstevel@tonic-gate case Dot: 593*7c478bd9Sstevel@tonic-gate case Charclass: 594*7c478bd9Sstevel@tonic-gate SET(fpos, e->id, 0) 595*7c478bd9Sstevel@tonic-gate return (YES); 596*7c478bd9Sstevel@tonic-gate case EOP: 597*7c478bd9Sstevel@tonic-gate case Star: 598*7c478bd9Sstevel@tonic-gate case Quest: 599*7c478bd9Sstevel@tonic-gate (void) first_lit(fpos, e->l); 600*7c478bd9Sstevel@tonic-gate return (NO); 601*7c478bd9Sstevel@tonic-gate case Plus: 602*7c478bd9Sstevel@tonic-gate return (first_lit(fpos, e->l)); 603*7c478bd9Sstevel@tonic-gate case Cat: 604*7c478bd9Sstevel@tonic-gate return ((BOOL)(first_lit(fpos, e->l) || first_lit(fpos, e->r))); 605*7c478bd9Sstevel@tonic-gate case Alternate: 606*7c478bd9Sstevel@tonic-gate k = first_lit(fpos, e->r); 607*7c478bd9Sstevel@tonic-gate return ((BOOL)(first_lit(fpos, e->l) && k)); 608*7c478bd9Sstevel@tonic-gate default: 609*7c478bd9Sstevel@tonic-gate err("internal error"); 610*7c478bd9Sstevel@tonic-gate } 611*7c478bd9Sstevel@tonic-gate return (NO); 612*7c478bd9Sstevel@tonic-gate } 613*7c478bd9Sstevel@tonic-gate 614*7c478bd9Sstevel@tonic-gate static void 615*7c478bd9Sstevel@tonic-gate efollow(re_re *r, Positionset *fpos, Expr *e) 616*7c478bd9Sstevel@tonic-gate { 617*7c478bd9Sstevel@tonic-gate ID i, *p; 618*7c478bd9Sstevel@tonic-gate 619*7c478bd9Sstevel@tonic-gate CLEAR(r, fpos); 620*7c478bd9Sstevel@tonic-gate follow(fpos, e); 621*7c478bd9Sstevel@tonic-gate e->flen = fpos->count; 622*7c478bd9Sstevel@tonic-gate e->follow = (ID *)egmalloc(e->flen * sizeof (ID)); 623*7c478bd9Sstevel@tonic-gate p = e->follow; 624*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 625*7c478bd9Sstevel@tonic-gate printf("ID = %d LIT %c FLEN = %d\n", e->id, e->lit, e->flen); 626*7c478bd9Sstevel@tonic-gate #endif 627*7c478bd9Sstevel@tonic-gate for (i = fpos->last; i > 0; i = fpos->base[i].id) { 628*7c478bd9Sstevel@tonic-gate *p++ = i; 629*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 630*7c478bd9Sstevel@tonic-gate printf("FOLLOW ID = %d LIT %c\n", r->ptr[i]->id, r->ptr[i]->lit); 631*7c478bd9Sstevel@tonic-gate #endif 632*7c478bd9Sstevel@tonic-gate } 633*7c478bd9Sstevel@tonic-gate if (p != e->follow + e->flen) { 634*7c478bd9Sstevel@tonic-gate err("internal error"); 635*7c478bd9Sstevel@tonic-gate } 636*7c478bd9Sstevel@tonic-gate } 637*7c478bd9Sstevel@tonic-gate 638*7c478bd9Sstevel@tonic-gate static State * 639*7c478bd9Sstevel@tonic-gate addstate(re_re *r, Positionset *ps, int cnt) 640*7c478bd9Sstevel@tonic-gate { 641*7c478bd9Sstevel@tonic-gate ID j; 642*7c478bd9Sstevel@tonic-gate FID *p, *q; 643*7c478bd9Sstevel@tonic-gate State *s; 644*7c478bd9Sstevel@tonic-gate 645*7c478bd9Sstevel@tonic-gate if (cnt) { 646*7c478bd9Sstevel@tonic-gate s = r->states + getstate(r, ps); 647*7c478bd9Sstevel@tonic-gate (void) memset((char *)s->tab, 0, sizeof (s->tab)); 648*7c478bd9Sstevel@tonic-gate s->cnt = r->istate.cnt; 649*7c478bd9Sstevel@tonic-gate } else { 650*7c478bd9Sstevel@tonic-gate s = &r->istate; 651*7c478bd9Sstevel@tonic-gate s->cnt = -1; 652*7c478bd9Sstevel@tonic-gate } 653*7c478bd9Sstevel@tonic-gate s->pos = r->posnext; 654*7c478bd9Sstevel@tonic-gate r->posnext += ps->count; 655*7c478bd9Sstevel@tonic-gate s->npos = ps->count; 656*7c478bd9Sstevel@tonic-gate p = r->posbase + s->pos; 657*7c478bd9Sstevel@tonic-gate for (j = ps->last; j > 0; p++, j = q->id) { 658*7c478bd9Sstevel@tonic-gate q = &ps->base[j]; 659*7c478bd9Sstevel@tonic-gate p->id = j; 660*7c478bd9Sstevel@tonic-gate p->fcount = q->fcount; 661*7c478bd9Sstevel@tonic-gate if (p->id == r->root->id && s->cnt < p->fcount) 662*7c478bd9Sstevel@tonic-gate s->cnt = p->fcount; 663*7c478bd9Sstevel@tonic-gate } 664*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 665*7c478bd9Sstevel@tonic-gate if (TRACE(3)) { 666*7c478bd9Sstevel@tonic-gate uchar_t buf[2000]; 667*7c478bd9Sstevel@tonic-gate spr(s->npos, s->pos+r->posbase, buf); 668*7c478bd9Sstevel@tonic-gate (void) printf("new state[%d] %s%s\n", s-r->states, buf, 669*7c478bd9Sstevel@tonic-gate s->cnt?" cnt":""); 670*7c478bd9Sstevel@tonic-gate } 671*7c478bd9Sstevel@tonic-gate #endif 672*7c478bd9Sstevel@tonic-gate return (s); 673*7c478bd9Sstevel@tonic-gate } 674*7c478bd9Sstevel@tonic-gate 675*7c478bd9Sstevel@tonic-gate static State * 676*7c478bd9Sstevel@tonic-gate nextstate(re_re *r, State *s, int a) 677*7c478bd9Sstevel@tonic-gate { 678*7c478bd9Sstevel@tonic-gate State *news; 679*7c478bd9Sstevel@tonic-gate 680*7c478bd9Sstevel@tonic-gate CLEAR(r, &r->tmp); 681*7c478bd9Sstevel@tonic-gate followstate(r, s, a, &r->tmp); 682*7c478bd9Sstevel@tonic-gate if (s != &r->istate) followstate(r, &r->istate, a, &r->tmp); 683*7c478bd9Sstevel@tonic-gate 684*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 685*7c478bd9Sstevel@tonic-gate if (TRACE(5)) { 686*7c478bd9Sstevel@tonic-gate uchar_t buf[2000]; 687*7c478bd9Sstevel@tonic-gate ppr(&r->tmp, buf); 688*7c478bd9Sstevel@tonic-gate (void) printf("nextstate(%d, '%c'): found %s\n", s-r->states, 689*7c478bd9Sstevel@tonic-gate a, buf); 690*7c478bd9Sstevel@tonic-gate } 691*7c478bd9Sstevel@tonic-gate #endif 692*7c478bd9Sstevel@tonic-gate if (r->tmp.count == 0) 693*7c478bd9Sstevel@tonic-gate news = &r->istate; 694*7c478bd9Sstevel@tonic-gate else if ((news = stateof(r, &r->tmp)) == NULL) 695*7c478bd9Sstevel@tonic-gate news = addstate(r, &r->tmp, 1); 696*7c478bd9Sstevel@tonic-gate s->tab[a] = news; 697*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 698*7c478bd9Sstevel@tonic-gate if (TRACE(5)) { 699*7c478bd9Sstevel@tonic-gate (void) printf("nextstate(%d, '%c'): returning %ld\n", 700*7c478bd9Sstevel@tonic-gate s-r->states, a, news); 701*7c478bd9Sstevel@tonic-gate } 702*7c478bd9Sstevel@tonic-gate #endif 703*7c478bd9Sstevel@tonic-gate return (news); 704*7c478bd9Sstevel@tonic-gate } 705*7c478bd9Sstevel@tonic-gate 706*7c478bd9Sstevel@tonic-gate static void 707*7c478bd9Sstevel@tonic-gate followstate(re_re *r, State *s, int a, Positionset *fpos) 708*7c478bd9Sstevel@tonic-gate { 709*7c478bd9Sstevel@tonic-gate int j; 710*7c478bd9Sstevel@tonic-gate ID *q, *eq; 711*7c478bd9Sstevel@tonic-gate Expr *e; 712*7c478bd9Sstevel@tonic-gate 713*7c478bd9Sstevel@tonic-gate for (j = s->pos; j < (s->pos + s->npos); j++) { 714*7c478bd9Sstevel@tonic-gate e = r->ptr[r->posbase[j].id]; 715*7c478bd9Sstevel@tonic-gate if (e->type == EOP) continue; 716*7c478bd9Sstevel@tonic-gate if (match(e, a)) { 717*7c478bd9Sstevel@tonic-gate if (e->follow == NULL) efollow(r, &r->firstpos, e); 718*7c478bd9Sstevel@tonic-gate for (q = e->follow, eq = q + e->flen; q < eq; q++) { 719*7c478bd9Sstevel@tonic-gate SET(fpos, *q, r->posbase[j].fcount + 1) 720*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 721*7c478bd9Sstevel@tonic-gate printf("CHAR %c FC %c COUNT %d\n", 722*7c478bd9Sstevel@tonic-gate a, 723*7c478bd9Sstevel@tonic-gate r->ptr[*q]->lit, 724*7c478bd9Sstevel@tonic-gate r->posbase[j].fcount+1); 725*7c478bd9Sstevel@tonic-gate #endif 726*7c478bd9Sstevel@tonic-gate } 727*7c478bd9Sstevel@tonic-gate } 728*7c478bd9Sstevel@tonic-gate } 729*7c478bd9Sstevel@tonic-gate } 730*7c478bd9Sstevel@tonic-gate 731*7c478bd9Sstevel@tonic-gate static uchar_t * 732*7c478bd9Sstevel@tonic-gate egmalloc(size_t n) 733*7c478bd9Sstevel@tonic-gate { 734*7c478bd9Sstevel@tonic-gate uchar_t *x; 735*7c478bd9Sstevel@tonic-gate 736*7c478bd9Sstevel@tonic-gate x = (uchar_t *)mymalloc(n); 737*7c478bd9Sstevel@tonic-gate mallocs[nmalloc++] = x; 738*7c478bd9Sstevel@tonic-gate if (nmalloc >= MAXMALLOCS) 739*7c478bd9Sstevel@tonic-gate nmalloc = MAXMALLOCS - 1; 740*7c478bd9Sstevel@tonic-gate return (x); 741*7c478bd9Sstevel@tonic-gate } 742*7c478bd9Sstevel@tonic-gate 743*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 744*7c478bd9Sstevel@tonic-gate void 745*7c478bd9Sstevel@tonic-gate ppr(Positionse *ps, char *p) 746*7c478bd9Sstevel@tonic-gate { 747*7c478bd9Sstevel@tonic-gate ID n; 748*7c478bd9Sstevel@tonic-gate 749*7c478bd9Sstevel@tonic-gate if (ps->count < 1) { 750*7c478bd9Sstevel@tonic-gate (void) sprintf(p, "{}"); 751*7c478bd9Sstevel@tonic-gate return; 752*7c478bd9Sstevel@tonic-gate } 753*7c478bd9Sstevel@tonic-gate *p++ = '{'; 754*7c478bd9Sstevel@tonic-gate for (n = ps->last; n > 0; n = ps->base[n].id) { 755*7c478bd9Sstevel@tonic-gate (void) sprintf(p, "%d,", n); 756*7c478bd9Sstevel@tonic-gate p = strchr(p, 0); 757*7c478bd9Sstevel@tonic-gate } 758*7c478bd9Sstevel@tonic-gate p[-1] = '}'; 759*7c478bd9Sstevel@tonic-gate } 760*7c478bd9Sstevel@tonic-gate 761*7c478bd9Sstevel@tonic-gate void 762*7c478bd9Sstevel@tonic-gate epr(Expr *e, uchar_t *res) 763*7c478bd9Sstevel@tonic-gate { 764*7c478bd9Sstevel@tonic-gate uchar_t r1[EPRINTSIZE], r2[EPRINTSIZE]; 765*7c478bd9Sstevel@tonic-gate int i; 766*7c478bd9Sstevel@tonic-gate 767*7c478bd9Sstevel@tonic-gate if (e == NULL) { 768*7c478bd9Sstevel@tonic-gate (void) sprintf(res, "!0!"); 769*7c478bd9Sstevel@tonic-gate return; 770*7c478bd9Sstevel@tonic-gate } 771*7c478bd9Sstevel@tonic-gate switch (e->type) { 772*7c478bd9Sstevel@tonic-gate case Dot: 773*7c478bd9Sstevel@tonic-gate case Literal: 774*7c478bd9Sstevel@tonic-gate spr(e->flen, e->follow, r1); 775*7c478bd9Sstevel@tonic-gate (void) sprintf(res, "%c%s", e->lit, r1); 776*7c478bd9Sstevel@tonic-gate break; 777*7c478bd9Sstevel@tonic-gate case Charclass: 778*7c478bd9Sstevel@tonic-gate *res++ = '['; 779*7c478bd9Sstevel@tonic-gate for (i = 0; i < 256; i++) 780*7c478bd9Sstevel@tonic-gate if (CCL_CHK((uchar_t *)e->r, i)) { 781*7c478bd9Sstevel@tonic-gate *res++ = i; 782*7c478bd9Sstevel@tonic-gate } 783*7c478bd9Sstevel@tonic-gate *res++ = ']'; 784*7c478bd9Sstevel@tonic-gate *res = '\0'; 785*7c478bd9Sstevel@tonic-gate break; 786*7c478bd9Sstevel@tonic-gate case Cat: 787*7c478bd9Sstevel@tonic-gate epr(e->l, r1); 788*7c478bd9Sstevel@tonic-gate epr(e->r, r2); 789*7c478bd9Sstevel@tonic-gate (void) sprintf(res, "%s%s", r1, r2); 790*7c478bd9Sstevel@tonic-gate break; 791*7c478bd9Sstevel@tonic-gate case Alternate: 792*7c478bd9Sstevel@tonic-gate epr(e->l, r1); 793*7c478bd9Sstevel@tonic-gate epr(e->r, r2); 794*7c478bd9Sstevel@tonic-gate (void) sprintf(res, "(%s|%s)", r1, r2); 795*7c478bd9Sstevel@tonic-gate break; 796*7c478bd9Sstevel@tonic-gate case Star: 797*7c478bd9Sstevel@tonic-gate epr(e->l, r1); 798*7c478bd9Sstevel@tonic-gate (void) sprintf(res, "(%s)*", r1); 799*7c478bd9Sstevel@tonic-gate break; 800*7c478bd9Sstevel@tonic-gate case Plus: 801*7c478bd9Sstevel@tonic-gate epr(e->l, r1); 802*7c478bd9Sstevel@tonic-gate (void) sprintf(res, "(%s)+", r1); 803*7c478bd9Sstevel@tonic-gate break; 804*7c478bd9Sstevel@tonic-gate case Quest: 805*7c478bd9Sstevel@tonic-gate epr(e->l, r1); 806*7c478bd9Sstevel@tonic-gate (void) sprintf(res, "(%s)?", r1); 807*7c478bd9Sstevel@tonic-gate break; 808*7c478bd9Sstevel@tonic-gate case EOP: 809*7c478bd9Sstevel@tonic-gate epr(e->l, r1); 810*7c478bd9Sstevel@tonic-gate (void) sprintf(res, "%s<EOP>", r1); 811*7c478bd9Sstevel@tonic-gate break; 812*7c478bd9Sstevel@tonic-gate default: 813*7c478bd9Sstevel@tonic-gate (void) sprintf(res, "<undef type %d>", e->type); 814*7c478bd9Sstevel@tonic-gate err(res); 815*7c478bd9Sstevel@tonic-gate break; 816*7c478bd9Sstevel@tonic-gate } 817*7c478bd9Sstevel@tonic-gate } 818*7c478bd9Sstevel@tonic-gate 819*7c478bd9Sstevel@tonic-gate static void 820*7c478bd9Sstevel@tonic-gate spr(int c, int *p, uchar_t *buf) 821*7c478bd9Sstevel@tonic-gate { 822*7c478bd9Sstevel@tonic-gate if (c > 0) { 823*7c478bd9Sstevel@tonic-gate *buf++ = '{'; 824*7c478bd9Sstevel@tonic-gate *buf = '\0'; 825*7c478bd9Sstevel@tonic-gate while (--c > 0) { 826*7c478bd9Sstevel@tonic-gate (void) sprintf(buf, "%d,", *p++); 827*7c478bd9Sstevel@tonic-gate buf = strchr(buf, 0); 828*7c478bd9Sstevel@tonic-gate } 829*7c478bd9Sstevel@tonic-gate (void) sprintf(buf, "%d}", *p); 830*7c478bd9Sstevel@tonic-gate } else 831*7c478bd9Sstevel@tonic-gate (void) sprintf(buf, "{}"); 832*7c478bd9Sstevel@tonic-gate } 833*7c478bd9Sstevel@tonic-gate #endif 834*7c478bd9Sstevel@tonic-gate 835*7c478bd9Sstevel@tonic-gate static void 836*7c478bd9Sstevel@tonic-gate stateinit(re_re *r) 837*7c478bd9Sstevel@tonic-gate { 838*7c478bd9Sstevel@tonic-gate /* CONSTANTCONDITION */ 839*7c478bd9Sstevel@tonic-gate r->statelim = (sizeof (int) < 4 ? 32 : 128); 840*7c478bd9Sstevel@tonic-gate r->states = (State *)egmalloc(r->statelim * sizeof (State)); 841*7c478bd9Sstevel@tonic-gate 842*7c478bd9Sstevel@tonic-gate /* CONSTANTCONDITION */ 843*7c478bd9Sstevel@tonic-gate r->nposalloc = (sizeof (int) < 4 ? 2048 : 8192); 844*7c478bd9Sstevel@tonic-gate r->posbase = (FID *)egmalloc(r->nposalloc * sizeof (FID)); 845*7c478bd9Sstevel@tonic-gate r->nstates = r->posnext = 0; 846*7c478bd9Sstevel@tonic-gate } 847*7c478bd9Sstevel@tonic-gate 848*7c478bd9Sstevel@tonic-gate static void 849*7c478bd9Sstevel@tonic-gate clrstates(re_re *r) 850*7c478bd9Sstevel@tonic-gate { 851*7c478bd9Sstevel@tonic-gate r->nstates = 0; /* reclaim space for states and positions */ 852*7c478bd9Sstevel@tonic-gate r->posnext = r->posreset; 853*7c478bd9Sstevel@tonic-gate (void) memset((char *)r->istate.tab, 0, sizeof (r->istate.tab)); 854*7c478bd9Sstevel@tonic-gate } 855*7c478bd9Sstevel@tonic-gate 856*7c478bd9Sstevel@tonic-gate static void 857*7c478bd9Sstevel@tonic-gate savestate(re_re *r) 858*7c478bd9Sstevel@tonic-gate { 859*7c478bd9Sstevel@tonic-gate r->posreset = r->posnext; /* save for reset */ 860*7c478bd9Sstevel@tonic-gate } 861*7c478bd9Sstevel@tonic-gate 862*7c478bd9Sstevel@tonic-gate static State * 863*7c478bd9Sstevel@tonic-gate startstate(re_re *r) 864*7c478bd9Sstevel@tonic-gate { 865*7c478bd9Sstevel@tonic-gate return (&r->istate); 866*7c478bd9Sstevel@tonic-gate } 867*7c478bd9Sstevel@tonic-gate 868*7c478bd9Sstevel@tonic-gate static int 869*7c478bd9Sstevel@tonic-gate getstate(re_re *r, Positionset *ps) 870*7c478bd9Sstevel@tonic-gate { 871*7c478bd9Sstevel@tonic-gate if (r->nstates >= r->statelim || 872*7c478bd9Sstevel@tonic-gate r->posnext + ps->count >= r->nposalloc) { 873*7c478bd9Sstevel@tonic-gate clrstates(r); 874*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 875*7c478bd9Sstevel@tonic-gate printf("%d STATES FLUSHED\n", r->statelim); 876*7c478bd9Sstevel@tonic-gate #endif 877*7c478bd9Sstevel@tonic-gate } 878*7c478bd9Sstevel@tonic-gate return (r->nstates++); 879*7c478bd9Sstevel@tonic-gate } 880*7c478bd9Sstevel@tonic-gate 881*7c478bd9Sstevel@tonic-gate static State * 882*7c478bd9Sstevel@tonic-gate stateof(re_re *r, Positionset *ps) 883*7c478bd9Sstevel@tonic-gate { 884*7c478bd9Sstevel@tonic-gate State *s; 885*7c478bd9Sstevel@tonic-gate int i; 886*7c478bd9Sstevel@tonic-gate FID *p, *e; 887*7c478bd9Sstevel@tonic-gate 888*7c478bd9Sstevel@tonic-gate for (i = 0, s = r->states; i < r->nstates; i++, s++) { 889*7c478bd9Sstevel@tonic-gate if (s->npos == ps->count) { 890*7c478bd9Sstevel@tonic-gate for (p = s->pos+r->posbase, e = p+s->npos; p < e; p++) 891*7c478bd9Sstevel@tonic-gate if (ps->base[p->id].id == 0 || 892*7c478bd9Sstevel@tonic-gate ps->base[p->id].fcount != p->fcount) 893*7c478bd9Sstevel@tonic-gate goto next; 894*7c478bd9Sstevel@tonic-gate return (s); 895*7c478bd9Sstevel@tonic-gate } 896*7c478bd9Sstevel@tonic-gate next:; 897*7c478bd9Sstevel@tonic-gate } 898*7c478bd9Sstevel@tonic-gate return (NULL); 899*7c478bd9Sstevel@tonic-gate } 900*7c478bd9Sstevel@tonic-gate 901*7c478bd9Sstevel@tonic-gate static re_re * 902*7c478bd9Sstevel@tonic-gate egprep(PATTERN *pat) 903*7c478bd9Sstevel@tonic-gate { 904*7c478bd9Sstevel@tonic-gate re_re *r; 905*7c478bd9Sstevel@tonic-gate 906*7c478bd9Sstevel@tonic-gate r = (re_re *)egmalloc(sizeof (re_re)); 907*7c478bd9Sstevel@tonic-gate (void) memset((char *)r, 0, sizeof (re_re)); 908*7c478bd9Sstevel@tonic-gate 909*7c478bd9Sstevel@tonic-gate pat->loc1 = pat->expression; 910*7c478bd9Sstevel@tonic-gate pat->loc2 = pat->expression + strlen((char *)pat->expression); 911*7c478bd9Sstevel@tonic-gate 912*7c478bd9Sstevel@tonic-gate parno = 0; 913*7c478bd9Sstevel@tonic-gate maxid = 1; 914*7c478bd9Sstevel@tonic-gate r->cmap = pat->cmap; 915*7c478bd9Sstevel@tonic-gate lex(r, pat); 916*7c478bd9Sstevel@tonic-gate r->root = newexpr(EOP, '#', eall(r, pat), (Expr *)NULL); 917*7c478bd9Sstevel@tonic-gate r->maxid = maxid; 918*7c478bd9Sstevel@tonic-gate 919*7c478bd9Sstevel@tonic-gate eginit(r); 920*7c478bd9Sstevel@tonic-gate return (r); 921*7c478bd9Sstevel@tonic-gate } 922*7c478bd9Sstevel@tonic-gate 923*7c478bd9Sstevel@tonic-gate static Expr * 924*7c478bd9Sstevel@tonic-gate newexpr(Exprtype t, int lit, Expr *left, Expr *right) 925*7c478bd9Sstevel@tonic-gate { 926*7c478bd9Sstevel@tonic-gate Expr *e = (Expr *)egmalloc(sizeof (Expr)); 927*7c478bd9Sstevel@tonic-gate 928*7c478bd9Sstevel@tonic-gate e->type = t; 929*7c478bd9Sstevel@tonic-gate e->parent = NULL; 930*7c478bd9Sstevel@tonic-gate e->lit = lit; 931*7c478bd9Sstevel@tonic-gate 932*7c478bd9Sstevel@tonic-gate if (e->lit) e->id = maxid++; 933*7c478bd9Sstevel@tonic-gate else e->id = 0; 934*7c478bd9Sstevel@tonic-gate 935*7c478bd9Sstevel@tonic-gate if ((e->l = left) != NULL) { 936*7c478bd9Sstevel@tonic-gate left->parent = e; 937*7c478bd9Sstevel@tonic-gate } 938*7c478bd9Sstevel@tonic-gate if ((e->r = right) != NULL) { 939*7c478bd9Sstevel@tonic-gate right->parent = e; 940*7c478bd9Sstevel@tonic-gate } 941*7c478bd9Sstevel@tonic-gate e->follow = NULL; 942*7c478bd9Sstevel@tonic-gate e->flen = 0; 943*7c478bd9Sstevel@tonic-gate return (e); 944*7c478bd9Sstevel@tonic-gate } 945*7c478bd9Sstevel@tonic-gate 946*7c478bd9Sstevel@tonic-gate static void 947*7c478bd9Sstevel@tonic-gate lex(re_re *r, PATTERN *pat) 948*7c478bd9Sstevel@tonic-gate { 949*7c478bd9Sstevel@tonic-gate if (pat->loc1 == pat->loc2) { 950*7c478bd9Sstevel@tonic-gate toktype = EOP; 951*7c478bd9Sstevel@tonic-gate toklit = -1; 952*7c478bd9Sstevel@tonic-gate } else switch (toklit = *pat->loc1++) { 953*7c478bd9Sstevel@tonic-gate case '.': toktype = Dot; break; 954*7c478bd9Sstevel@tonic-gate case '*': toktype = Star; break; 955*7c478bd9Sstevel@tonic-gate case '+': toktype = Plus; break; 956*7c478bd9Sstevel@tonic-gate case '?': toktype = Quest; break; 957*7c478bd9Sstevel@tonic-gate case '[': toktype = Charclass; break; 958*7c478bd9Sstevel@tonic-gate case '|': toktype = Alternate; break; 959*7c478bd9Sstevel@tonic-gate case '(': toktype = Lpar; break; 960*7c478bd9Sstevel@tonic-gate case ')': toktype = Rpar; break; 961*7c478bd9Sstevel@tonic-gate case '\\': toktype = Backslash; 962*7c478bd9Sstevel@tonic-gate if (pat->loc1 == pat->loc2) { 963*7c478bd9Sstevel@tonic-gate err("syntax error - missing character " 964*7c478bd9Sstevel@tonic-gate "after \\"); 965*7c478bd9Sstevel@tonic-gate } else 966*7c478bd9Sstevel@tonic-gate toklit = r->cmap[*pat->loc1++]; 967*7c478bd9Sstevel@tonic-gate break; 968*7c478bd9Sstevel@tonic-gate case '^': case '$': toktype = Literal; toklit = NL; break; 969*7c478bd9Sstevel@tonic-gate default: toktype = Literal; toklit = r->cmap[toklit]; break; 970*7c478bd9Sstevel@tonic-gate } 971*7c478bd9Sstevel@tonic-gate } 972*7c478bd9Sstevel@tonic-gate 973*7c478bd9Sstevel@tonic-gate static int 974*7c478bd9Sstevel@tonic-gate ccl(PATTERN *pat, uchar_t *tab) 975*7c478bd9Sstevel@tonic-gate { 976*7c478bd9Sstevel@tonic-gate int i; 977*7c478bd9Sstevel@tonic-gate int range = 0; 978*7c478bd9Sstevel@tonic-gate int lastc = -1; 979*7c478bd9Sstevel@tonic-gate int count = 0; 980*7c478bd9Sstevel@tonic-gate BOOL comp = NO; 981*7c478bd9Sstevel@tonic-gate 982*7c478bd9Sstevel@tonic-gate (void) memset((char *)tab, 0, CCL_SIZ * sizeof (uchar_t)); 983*7c478bd9Sstevel@tonic-gate if (*pat->loc1 == '^') { 984*7c478bd9Sstevel@tonic-gate pat->loc1++; 985*7c478bd9Sstevel@tonic-gate comp = YES; 986*7c478bd9Sstevel@tonic-gate } 987*7c478bd9Sstevel@tonic-gate if (*pat->loc1 == ']') { 988*7c478bd9Sstevel@tonic-gate uchar_t c = pat->cmap[*pat->loc1]; 989*7c478bd9Sstevel@tonic-gate CCL_SET(tab, c); 990*7c478bd9Sstevel@tonic-gate lastc = *pat->loc1++; 991*7c478bd9Sstevel@tonic-gate } 992*7c478bd9Sstevel@tonic-gate /* scan for chars */ 993*7c478bd9Sstevel@tonic-gate for (; (pat->loc1 < pat->loc2) && (*pat->loc1 != ']'); 994*7c478bd9Sstevel@tonic-gate pat->loc1++) { 995*7c478bd9Sstevel@tonic-gate if (*pat->loc1 == '-') { 996*7c478bd9Sstevel@tonic-gate if (lastc < 0) CCL_SET(tab, pat->cmap['-']); 997*7c478bd9Sstevel@tonic-gate else range = 1; 998*7c478bd9Sstevel@tonic-gate continue; 999*7c478bd9Sstevel@tonic-gate } 1000*7c478bd9Sstevel@tonic-gate if (range) { 1001*7c478bd9Sstevel@tonic-gate for (i = *pat->loc1; i >= lastc; i--) { 1002*7c478bd9Sstevel@tonic-gate CCL_SET(tab, pat->cmap[i]); 1003*7c478bd9Sstevel@tonic-gate } 1004*7c478bd9Sstevel@tonic-gate } else { 1005*7c478bd9Sstevel@tonic-gate uchar_t c = pat->cmap[*pat->loc1]; 1006*7c478bd9Sstevel@tonic-gate CCL_SET(tab, c); 1007*7c478bd9Sstevel@tonic-gate } 1008*7c478bd9Sstevel@tonic-gate 1009*7c478bd9Sstevel@tonic-gate range = 0; 1010*7c478bd9Sstevel@tonic-gate 1011*7c478bd9Sstevel@tonic-gate lastc = *pat->loc1; 1012*7c478bd9Sstevel@tonic-gate } 1013*7c478bd9Sstevel@tonic-gate if (range) CCL_SET(tab, pat->cmap['-']); 1014*7c478bd9Sstevel@tonic-gate 1015*7c478bd9Sstevel@tonic-gate if (pat->loc1 < pat->loc2) pat->loc1++; 1016*7c478bd9Sstevel@tonic-gate else err("syntax error - missing ]"); 1017*7c478bd9Sstevel@tonic-gate 1018*7c478bd9Sstevel@tonic-gate if (comp) { 1019*7c478bd9Sstevel@tonic-gate CCL_SET(tab, pat->cmap[NL]); 1020*7c478bd9Sstevel@tonic-gate for (i = 0; i < CCL_SIZ; i++) tab[i] ^= 0xff; 1021*7c478bd9Sstevel@tonic-gate } 1022*7c478bd9Sstevel@tonic-gate for (i = 0; i < 256; i++) { 1023*7c478bd9Sstevel@tonic-gate if (pat->cmap[i] != i) CCL_CLR(tab, i); 1024*7c478bd9Sstevel@tonic-gate if (CCL_CHK(tab, i)) { 1025*7c478bd9Sstevel@tonic-gate lastc = i; 1026*7c478bd9Sstevel@tonic-gate count++; 1027*7c478bd9Sstevel@tonic-gate } 1028*7c478bd9Sstevel@tonic-gate } 1029*7c478bd9Sstevel@tonic-gate if (count == 1) 1030*7c478bd9Sstevel@tonic-gate *tab = (char)lastc; 1031*7c478bd9Sstevel@tonic-gate return (count); 1032*7c478bd9Sstevel@tonic-gate } 1033*7c478bd9Sstevel@tonic-gate 1034*7c478bd9Sstevel@tonic-gate /* 1035*7c478bd9Sstevel@tonic-gate * egrep patterns: 1036*7c478bd9Sstevel@tonic-gate * 1037*7c478bd9Sstevel@tonic-gate * Alternation: d0: d1 { '|' d1 }* 1038*7c478bd9Sstevel@tonic-gate * Concatenation: d1: d2 { d2 }* 1039*7c478bd9Sstevel@tonic-gate * Repetition: d2: d3 { '*' | '?' | '+' } 1040*7c478bd9Sstevel@tonic-gate * Literal: d3: lit | '.' | '[]' | '(' d0 ')' 1041*7c478bd9Sstevel@tonic-gate */ 1042*7c478bd9Sstevel@tonic-gate 1043*7c478bd9Sstevel@tonic-gate static Expr * 1044*7c478bd9Sstevel@tonic-gate d3(re_re *r, PATTERN *pat) 1045*7c478bd9Sstevel@tonic-gate { 1046*7c478bd9Sstevel@tonic-gate Expr *e; 1047*7c478bd9Sstevel@tonic-gate uchar_t *tab; 1048*7c478bd9Sstevel@tonic-gate int count; 1049*7c478bd9Sstevel@tonic-gate 1050*7c478bd9Sstevel@tonic-gate switch (toktype) { 1051*7c478bd9Sstevel@tonic-gate case Backslash: 1052*7c478bd9Sstevel@tonic-gate case Literal: 1053*7c478bd9Sstevel@tonic-gate e = newexpr(Literal, toklit, (Expr *)NULL, (Expr *)NULL); 1054*7c478bd9Sstevel@tonic-gate lex(r, pat); 1055*7c478bd9Sstevel@tonic-gate break; 1056*7c478bd9Sstevel@tonic-gate case Dot: 1057*7c478bd9Sstevel@tonic-gate e = newexpr(Dot, '.', (Expr *)NULL, (Expr *)NULL); 1058*7c478bd9Sstevel@tonic-gate lex(r, pat); 1059*7c478bd9Sstevel@tonic-gate break; 1060*7c478bd9Sstevel@tonic-gate case Charclass: 1061*7c478bd9Sstevel@tonic-gate tab = egmalloc(CCL_SIZ * sizeof (uchar_t)); 1062*7c478bd9Sstevel@tonic-gate count = ccl(pat, tab); 1063*7c478bd9Sstevel@tonic-gate if (count == 1) { 1064*7c478bd9Sstevel@tonic-gate toklit = *tab; 1065*7c478bd9Sstevel@tonic-gate e = newexpr(Literal, toklit, (Expr *)NULL, 1066*7c478bd9Sstevel@tonic-gate (Expr *)NULL); 1067*7c478bd9Sstevel@tonic-gate } else { 1068*7c478bd9Sstevel@tonic-gate e = newexpr(Charclass, '[', (Expr *)NULL, (Expr *)NULL); 1069*7c478bd9Sstevel@tonic-gate e->l = (Expr *)count; /* number of chars */ 1070*7c478bd9Sstevel@tonic-gate e->r = (Expr *)tab; /* bitmap of chars */ 1071*7c478bd9Sstevel@tonic-gate } 1072*7c478bd9Sstevel@tonic-gate lex(r, pat); 1073*7c478bd9Sstevel@tonic-gate break; 1074*7c478bd9Sstevel@tonic-gate case Lpar: 1075*7c478bd9Sstevel@tonic-gate lex(r, pat); 1076*7c478bd9Sstevel@tonic-gate count = ++parno; 1077*7c478bd9Sstevel@tonic-gate e = d0(r, pat); 1078*7c478bd9Sstevel@tonic-gate if (toktype == Rpar) 1079*7c478bd9Sstevel@tonic-gate lex(r, pat); 1080*7c478bd9Sstevel@tonic-gate else 1081*7c478bd9Sstevel@tonic-gate err("syntax error - missing )"); 1082*7c478bd9Sstevel@tonic-gate return (e); 1083*7c478bd9Sstevel@tonic-gate default: 1084*7c478bd9Sstevel@tonic-gate err("syntax error"); 1085*7c478bd9Sstevel@tonic-gate e = NULL; 1086*7c478bd9Sstevel@tonic-gate } 1087*7c478bd9Sstevel@tonic-gate return (e); 1088*7c478bd9Sstevel@tonic-gate } 1089*7c478bd9Sstevel@tonic-gate 1090*7c478bd9Sstevel@tonic-gate static Expr * 1091*7c478bd9Sstevel@tonic-gate d2(re_re *r, PATTERN *pat) 1092*7c478bd9Sstevel@tonic-gate { 1093*7c478bd9Sstevel@tonic-gate Expr *e; 1094*7c478bd9Sstevel@tonic-gate Exprtype t; 1095*7c478bd9Sstevel@tonic-gate 1096*7c478bd9Sstevel@tonic-gate e = d3(r, pat); 1097*7c478bd9Sstevel@tonic-gate while ((toktype == Star) || (toktype == Plus) || (toktype == Quest)) { 1098*7c478bd9Sstevel@tonic-gate t = toktype; 1099*7c478bd9Sstevel@tonic-gate lex(r, pat); 1100*7c478bd9Sstevel@tonic-gate e = newexpr(t, 0, e, (Expr *)NULL); 1101*7c478bd9Sstevel@tonic-gate } 1102*7c478bd9Sstevel@tonic-gate return (e); 1103*7c478bd9Sstevel@tonic-gate } 1104*7c478bd9Sstevel@tonic-gate 1105*7c478bd9Sstevel@tonic-gate static Expr * 1106*7c478bd9Sstevel@tonic-gate d1(re_re *r, PATTERN *pat) 1107*7c478bd9Sstevel@tonic-gate { 1108*7c478bd9Sstevel@tonic-gate Expr *e, *f; 1109*7c478bd9Sstevel@tonic-gate 1110*7c478bd9Sstevel@tonic-gate e = d2(r, pat); 1111*7c478bd9Sstevel@tonic-gate while ((toktype == Literal) || (toktype == Dot) || (toktype == Lpar) || 1112*7c478bd9Sstevel@tonic-gate (toktype == Backslash) || (toktype == Charclass)) { 1113*7c478bd9Sstevel@tonic-gate f = d2(r, pat); 1114*7c478bd9Sstevel@tonic-gate e = newexpr(Cat, 0, e, f); 1115*7c478bd9Sstevel@tonic-gate } 1116*7c478bd9Sstevel@tonic-gate return (e); 1117*7c478bd9Sstevel@tonic-gate } 1118*7c478bd9Sstevel@tonic-gate 1119*7c478bd9Sstevel@tonic-gate static Expr * 1120*7c478bd9Sstevel@tonic-gate d0(re_re *r, PATTERN *pat) 1121*7c478bd9Sstevel@tonic-gate { 1122*7c478bd9Sstevel@tonic-gate Expr *e, *f; 1123*7c478bd9Sstevel@tonic-gate 1124*7c478bd9Sstevel@tonic-gate e = d1(r, pat); 1125*7c478bd9Sstevel@tonic-gate while (toktype == Alternate) { 1126*7c478bd9Sstevel@tonic-gate lex(r, pat); 1127*7c478bd9Sstevel@tonic-gate if (toktype == EOP) 1128*7c478bd9Sstevel@tonic-gate continue; 1129*7c478bd9Sstevel@tonic-gate f = d1(r, pat); 1130*7c478bd9Sstevel@tonic-gate e = newexpr(Alternate, 0, e, f); 1131*7c478bd9Sstevel@tonic-gate } 1132*7c478bd9Sstevel@tonic-gate return (e); 1133*7c478bd9Sstevel@tonic-gate } 1134*7c478bd9Sstevel@tonic-gate 1135*7c478bd9Sstevel@tonic-gate static Expr * 1136*7c478bd9Sstevel@tonic-gate eall(re_re *r, PATTERN *pat) 1137*7c478bd9Sstevel@tonic-gate { 1138*7c478bd9Sstevel@tonic-gate Expr *e; 1139*7c478bd9Sstevel@tonic-gate 1140*7c478bd9Sstevel@tonic-gate while (toktype == Alternate) /* bogus but user-friendly */ 1141*7c478bd9Sstevel@tonic-gate lex(r, pat); 1142*7c478bd9Sstevel@tonic-gate e = d0(r, pat); 1143*7c478bd9Sstevel@tonic-gate while (toktype == Alternate) /* bogus but user-friendly */ 1144*7c478bd9Sstevel@tonic-gate lex(r, pat); 1145*7c478bd9Sstevel@tonic-gate if (toktype != EOP) 1146*7c478bd9Sstevel@tonic-gate err("syntax error"); 1147*7c478bd9Sstevel@tonic-gate return (e); 1148*7c478bd9Sstevel@tonic-gate } 1149*7c478bd9Sstevel@tonic-gate 1150*7c478bd9Sstevel@tonic-gate static void 1151*7c478bd9Sstevel@tonic-gate err(char *s) 1152*7c478bd9Sstevel@tonic-gate { 1153*7c478bd9Sstevel@tonic-gate message = s; 1154*7c478bd9Sstevel@tonic-gate longjmp(env, 1); 1155*7c478bd9Sstevel@tonic-gate } 1156*7c478bd9Sstevel@tonic-gate 1157*7c478bd9Sstevel@tonic-gate static int 1158*7c478bd9Sstevel@tonic-gate re_lit(PATTERN *pat, uchar_t **b, uchar_t **e) 1159*7c478bd9Sstevel@tonic-gate { 1160*7c478bd9Sstevel@tonic-gate bestlen = 0; 1161*7c478bd9Sstevel@tonic-gate pat->fullmatch = YES; 1162*7c478bd9Sstevel@tonic-gate START 1163*7c478bd9Sstevel@tonic-gate traverse(pat, pat->re_ptr->root->l); 1164*7c478bd9Sstevel@tonic-gate FINISH 1165*7c478bd9Sstevel@tonic-gate if (bestlen < 2) 1166*7c478bd9Sstevel@tonic-gate return (0); 1167*7c478bd9Sstevel@tonic-gate *b = egmalloc(bestlen * sizeof (uchar_t)); 1168*7c478bd9Sstevel@tonic-gate (void) memmove(*b, best, bestlen); 1169*7c478bd9Sstevel@tonic-gate *e = *b + bestlen - 1; 1170*7c478bd9Sstevel@tonic-gate return (bestlen - 1); 1171*7c478bd9Sstevel@tonic-gate } 1172*7c478bd9Sstevel@tonic-gate 1173*7c478bd9Sstevel@tonic-gate static void 1174*7c478bd9Sstevel@tonic-gate traverse(PATTERN *pat, Expr *e) 1175*7c478bd9Sstevel@tonic-gate { 1176*7c478bd9Sstevel@tonic-gate switch (e->type) { 1177*7c478bd9Sstevel@tonic-gate case Literal: 1178*7c478bd9Sstevel@tonic-gate ADDL(e->lit) 1179*7c478bd9Sstevel@tonic-gate break; 1180*7c478bd9Sstevel@tonic-gate case Cat: 1181*7c478bd9Sstevel@tonic-gate traverse(pat, e->l); 1182*7c478bd9Sstevel@tonic-gate traverse(pat, e->r); 1183*7c478bd9Sstevel@tonic-gate break; 1184*7c478bd9Sstevel@tonic-gate case Plus: 1185*7c478bd9Sstevel@tonic-gate traverse(pat, e->l); 1186*7c478bd9Sstevel@tonic-gate FINISH /* can't go on past a + */ 1187*7c478bd9Sstevel@tonic-gate pat->fullmatch = NO; 1188*7c478bd9Sstevel@tonic-gate START /* but we can start with one! */ 1189*7c478bd9Sstevel@tonic-gate traverse(pat, e->l); 1190*7c478bd9Sstevel@tonic-gate break; 1191*7c478bd9Sstevel@tonic-gate default: 1192*7c478bd9Sstevel@tonic-gate FINISH 1193*7c478bd9Sstevel@tonic-gate pat->fullmatch = NO; 1194*7c478bd9Sstevel@tonic-gate START 1195*7c478bd9Sstevel@tonic-gate break; 1196*7c478bd9Sstevel@tonic-gate } 1197*7c478bd9Sstevel@tonic-gate } 1198*7c478bd9Sstevel@tonic-gate 1199*7c478bd9Sstevel@tonic-gate static re_bm * 1200*7c478bd9Sstevel@tonic-gate re_bmcomp(uchar_t *pb, uchar_t *pe, uchar_t *cmap) 1201*7c478bd9Sstevel@tonic-gate { 1202*7c478bd9Sstevel@tonic-gate int j; 1203*7c478bd9Sstevel@tonic-gate int delta[256]; 1204*7c478bd9Sstevel@tonic-gate re_bm *b; 1205*7c478bd9Sstevel@tonic-gate 1206*7c478bd9Sstevel@tonic-gate b = (re_bm *)egmalloc(sizeof (*b)); 1207*7c478bd9Sstevel@tonic-gate 1208*7c478bd9Sstevel@tonic-gate b->patlen = pe - pb; 1209*7c478bd9Sstevel@tonic-gate b->cmap = cmap; 1210*7c478bd9Sstevel@tonic-gate b->bmpat = pb; 1211*7c478bd9Sstevel@tonic-gate 1212*7c478bd9Sstevel@tonic-gate delta_2(b); 1213*7c478bd9Sstevel@tonic-gate 1214*7c478bd9Sstevel@tonic-gate for (j = 0; j < 256; j++) 1215*7c478bd9Sstevel@tonic-gate delta[j] = b->patlen; 1216*7c478bd9Sstevel@tonic-gate 1217*7c478bd9Sstevel@tonic-gate for (pe--; pb < pe; pb++) 1218*7c478bd9Sstevel@tonic-gate delta[b->cmap[*pb]] = pe - pb; 1219*7c478bd9Sstevel@tonic-gate 1220*7c478bd9Sstevel@tonic-gate delta[b->cmap[*pb]] = LARGE; 1221*7c478bd9Sstevel@tonic-gate 1222*7c478bd9Sstevel@tonic-gate for (j = 0; j < 256; j++) 1223*7c478bd9Sstevel@tonic-gate b->delta0[j] = delta[b->cmap[j]]; 1224*7c478bd9Sstevel@tonic-gate return (b); 1225*7c478bd9Sstevel@tonic-gate } 1226*7c478bd9Sstevel@tonic-gate 1227*7c478bd9Sstevel@tonic-gate static void 1228*7c478bd9Sstevel@tonic-gate delta_2(re_bm *b) 1229*7c478bd9Sstevel@tonic-gate { 1230*7c478bd9Sstevel@tonic-gate int m = b->patlen; 1231*7c478bd9Sstevel@tonic-gate int i, k, j; 1232*7c478bd9Sstevel@tonic-gate 1233*7c478bd9Sstevel@tonic-gate b->delta2 = (int *)egmalloc(m * sizeof (int)); 1234*7c478bd9Sstevel@tonic-gate 1235*7c478bd9Sstevel@tonic-gate for (j = 0; j < m; j++) { 1236*7c478bd9Sstevel@tonic-gate k = 1; 1237*7c478bd9Sstevel@tonic-gate again: 1238*7c478bd9Sstevel@tonic-gate while (k <= j && b->bmpat[j-k] == b->bmpat[j]) k++; 1239*7c478bd9Sstevel@tonic-gate 1240*7c478bd9Sstevel@tonic-gate for (i = j + 1; i < m; i++) { 1241*7c478bd9Sstevel@tonic-gate if (k <= i && b->bmpat[i-k] != b->bmpat[i]) { 1242*7c478bd9Sstevel@tonic-gate k++; 1243*7c478bd9Sstevel@tonic-gate goto again; 1244*7c478bd9Sstevel@tonic-gate } 1245*7c478bd9Sstevel@tonic-gate } 1246*7c478bd9Sstevel@tonic-gate b->delta2[j] = k + m - j - 1; 1247*7c478bd9Sstevel@tonic-gate } 1248*7c478bd9Sstevel@tonic-gate } 1249*7c478bd9Sstevel@tonic-gate 1250*7c478bd9Sstevel@tonic-gate static BOOL 1251*7c478bd9Sstevel@tonic-gate re_bmexec(PATTERN *pat, uchar_t *s, uchar_t *e, uchar_t **mb, uchar_t **me) 1252*7c478bd9Sstevel@tonic-gate { 1253*7c478bd9Sstevel@tonic-gate re_bm *b = pat->bm_ptr; 1254*7c478bd9Sstevel@tonic-gate uchar_t *sp; 1255*7c478bd9Sstevel@tonic-gate int k; 1256*7c478bd9Sstevel@tonic-gate 1257*7c478bd9Sstevel@tonic-gate s += b->patlen - 1; 1258*7c478bd9Sstevel@tonic-gate while ((unsigned long)s < (unsigned long)e) { 1259*7c478bd9Sstevel@tonic-gate while ((unsigned long)(s += b->delta0[*s]) < (unsigned long)e) 1260*7c478bd9Sstevel@tonic-gate ; 1261*7c478bd9Sstevel@tonic-gate if ((unsigned long)s < (unsigned long)(e + b->patlen)) 1262*7c478bd9Sstevel@tonic-gate return (NO); /* no match */ 1263*7c478bd9Sstevel@tonic-gate s -= LARGE; 1264*7c478bd9Sstevel@tonic-gate for (k = b->patlen-2, sp = s-1; k >= 0; k--) { 1265*7c478bd9Sstevel@tonic-gate if (b->cmap[*sp--] != b->bmpat[k]) break; 1266*7c478bd9Sstevel@tonic-gate } 1267*7c478bd9Sstevel@tonic-gate if (k < 0) { 1268*7c478bd9Sstevel@tonic-gate *mb = ++sp; 1269*7c478bd9Sstevel@tonic-gate *me = s+1; 1270*7c478bd9Sstevel@tonic-gate if (grepmatch(pat, mb, me)) 1271*7c478bd9Sstevel@tonic-gate return (YES); 1272*7c478bd9Sstevel@tonic-gate s = *mb; 1273*7c478bd9Sstevel@tonic-gate } else if (k < 0) { 1274*7c478bd9Sstevel@tonic-gate s++; 1275*7c478bd9Sstevel@tonic-gate } else { 1276*7c478bd9Sstevel@tonic-gate int j; 1277*7c478bd9Sstevel@tonic-gate j = b->delta2[k]; 1278*7c478bd9Sstevel@tonic-gate k = b->delta0[*++sp]; 1279*7c478bd9Sstevel@tonic-gate if ((j > k) || (k == (int)LARGE)) 1280*7c478bd9Sstevel@tonic-gate k = j; 1281*7c478bd9Sstevel@tonic-gate s = sp + k; 1282*7c478bd9Sstevel@tonic-gate } 1283*7c478bd9Sstevel@tonic-gate } 1284*7c478bd9Sstevel@tonic-gate return (NO); 1285*7c478bd9Sstevel@tonic-gate } 1286*7c478bd9Sstevel@tonic-gate 1287*7c478bd9Sstevel@tonic-gate static re_cw * 1288*7c478bd9Sstevel@tonic-gate re_recw(re_re *r, uchar_t *map) 1289*7c478bd9Sstevel@tonic-gate { 1290*7c478bd9Sstevel@tonic-gate Expr *e, *root = r->root; 1291*7c478bd9Sstevel@tonic-gate re_cw *pat; 1292*7c478bd9Sstevel@tonic-gate uchar_t *buf; 1293*7c478bd9Sstevel@tonic-gate 1294*7c478bd9Sstevel@tonic-gate if (root->type != EOP) 1295*7c478bd9Sstevel@tonic-gate return (NULL); 1296*7c478bd9Sstevel@tonic-gate e = root->l; 1297*7c478bd9Sstevel@tonic-gate pat = re_cwinit(map); 1298*7c478bd9Sstevel@tonic-gate buf = (uchar_t *)egmalloc(20000 * sizeof (uchar_t)); 1299*7c478bd9Sstevel@tonic-gate if (!altlist(e, buf, pat)) { 1300*7c478bd9Sstevel@tonic-gate return (NULL); 1301*7c478bd9Sstevel@tonic-gate } 1302*7c478bd9Sstevel@tonic-gate re_cwcomp(pat); 1303*7c478bd9Sstevel@tonic-gate return (pat); 1304*7c478bd9Sstevel@tonic-gate } 1305*7c478bd9Sstevel@tonic-gate 1306*7c478bd9Sstevel@tonic-gate static BOOL 1307*7c478bd9Sstevel@tonic-gate altlist(Expr *e, uchar_t *buf, re_cw *pat) 1308*7c478bd9Sstevel@tonic-gate { 1309*7c478bd9Sstevel@tonic-gate if (e->type == Alternate) 1310*7c478bd9Sstevel@tonic-gate return ((BOOL)(altlist(e->l, buf, pat) && 1311*7c478bd9Sstevel@tonic-gate altlist(e->r, buf, pat))); 1312*7c478bd9Sstevel@tonic-gate return (word(e, buf, pat)); 1313*7c478bd9Sstevel@tonic-gate } 1314*7c478bd9Sstevel@tonic-gate 1315*7c478bd9Sstevel@tonic-gate static BOOL 1316*7c478bd9Sstevel@tonic-gate word(Expr *e, uchar_t *buf, re_cw *pat) 1317*7c478bd9Sstevel@tonic-gate { 1318*7c478bd9Sstevel@tonic-gate static uchar_t *p; 1319*7c478bd9Sstevel@tonic-gate 1320*7c478bd9Sstevel@tonic-gate if (buf) p = buf; 1321*7c478bd9Sstevel@tonic-gate 1322*7c478bd9Sstevel@tonic-gate if (e->type == Cat) { 1323*7c478bd9Sstevel@tonic-gate if (!word(e->l, (uchar_t *)NULL, pat)) 1324*7c478bd9Sstevel@tonic-gate return (NO); 1325*7c478bd9Sstevel@tonic-gate if (!word(e->r, (uchar_t *)NULL, pat)) 1326*7c478bd9Sstevel@tonic-gate return (NO); 1327*7c478bd9Sstevel@tonic-gate } else if (e->type == Literal) 1328*7c478bd9Sstevel@tonic-gate *p++ = e->lit; 1329*7c478bd9Sstevel@tonic-gate else 1330*7c478bd9Sstevel@tonic-gate return (NO); 1331*7c478bd9Sstevel@tonic-gate 1332*7c478bd9Sstevel@tonic-gate if (buf) 1333*7c478bd9Sstevel@tonic-gate re_cwadd(pat, buf, p); 1334*7c478bd9Sstevel@tonic-gate return (YES); 1335*7c478bd9Sstevel@tonic-gate } 1336*7c478bd9Sstevel@tonic-gate 1337*7c478bd9Sstevel@tonic-gate static re_cw * 1338*7c478bd9Sstevel@tonic-gate re_cwinit(uchar_t *cmap) 1339*7c478bd9Sstevel@tonic-gate { 1340*7c478bd9Sstevel@tonic-gate re_cw *c; 1341*7c478bd9Sstevel@tonic-gate 1342*7c478bd9Sstevel@tonic-gate froot = NULL; 1343*7c478bd9Sstevel@tonic-gate next_node = NULL; 1344*7c478bd9Sstevel@tonic-gate next_link = NULL; 1345*7c478bd9Sstevel@tonic-gate c = (re_cw *)egmalloc(sizeof (*c)); 1346*7c478bd9Sstevel@tonic-gate c->nodeid = 0; 1347*7c478bd9Sstevel@tonic-gate c->maxdepth = 0; 1348*7c478bd9Sstevel@tonic-gate c->mindepth = 10000; 1349*7c478bd9Sstevel@tonic-gate c->root = newnode(c, 0); 1350*7c478bd9Sstevel@tonic-gate c->cmap = cmap; 1351*7c478bd9Sstevel@tonic-gate return (c); 1352*7c478bd9Sstevel@tonic-gate } 1353*7c478bd9Sstevel@tonic-gate 1354*7c478bd9Sstevel@tonic-gate static void 1355*7c478bd9Sstevel@tonic-gate re_cwadd(re_cw *c, uchar_t *s, uchar_t *e) 1356*7c478bd9Sstevel@tonic-gate { 1357*7c478bd9Sstevel@tonic-gate Node *p, *state; 1358*7c478bd9Sstevel@tonic-gate Link *l; 1359*7c478bd9Sstevel@tonic-gate int depth; 1360*7c478bd9Sstevel@tonic-gate 1361*7c478bd9Sstevel@tonic-gate state = c->root; 1362*7c478bd9Sstevel@tonic-gate while (s <= --e) { 1363*7c478bd9Sstevel@tonic-gate for (l = state->alts; l; l = l->next) 1364*7c478bd9Sstevel@tonic-gate if (l->lit == *e) 1365*7c478bd9Sstevel@tonic-gate break; 1366*7c478bd9Sstevel@tonic-gate if (l == NULL) 1367*7c478bd9Sstevel@tonic-gate break; 1368*7c478bd9Sstevel@tonic-gate else 1369*7c478bd9Sstevel@tonic-gate state = l->node; 1370*7c478bd9Sstevel@tonic-gate } 1371*7c478bd9Sstevel@tonic-gate if (s <= e) { 1372*7c478bd9Sstevel@tonic-gate depth = state->d+1; 1373*7c478bd9Sstevel@tonic-gate l = newlink(*e--, p = newnode(c, depth++)); 1374*7c478bd9Sstevel@tonic-gate l->next = state->alts; 1375*7c478bd9Sstevel@tonic-gate state->alts = l; 1376*7c478bd9Sstevel@tonic-gate state = p; 1377*7c478bd9Sstevel@tonic-gate while (s <= e) { 1378*7c478bd9Sstevel@tonic-gate state->alts = newlink(*e--, p = newnode(c, depth++)); 1379*7c478bd9Sstevel@tonic-gate state = p; 1380*7c478bd9Sstevel@tonic-gate } 1381*7c478bd9Sstevel@tonic-gate if (c->mindepth >= depth) c->mindepth = depth-1; 1382*7c478bd9Sstevel@tonic-gate } 1383*7c478bd9Sstevel@tonic-gate state->out = 1; 1384*7c478bd9Sstevel@tonic-gate } 1385*7c478bd9Sstevel@tonic-gate 1386*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 1387*7c478bd9Sstevel@tonic-gate static 1388*7c478bd9Sstevel@tonic-gate pr(Node *n) 1389*7c478bd9Sstevel@tonic-gate { 1390*7c478bd9Sstevel@tonic-gate Link *l; 1391*7c478bd9Sstevel@tonic-gate 1392*7c478bd9Sstevel@tonic-gate printf("%d[%d,%d]: ", n->id, n->shift1, n->shift2); 1393*7c478bd9Sstevel@tonic-gate for (l = n->alts; l; l = l->next) { 1394*7c478bd9Sstevel@tonic-gate printf("edge from \"%d\" to \"%d\" label {\"%c\"};", 1395*7c478bd9Sstevel@tonic-gate n->id, l->node->id, l->lit); 1396*7c478bd9Sstevel@tonic-gate if (l->node->out) { 1397*7c478bd9Sstevel@tonic-gate printf(" draw \"%d\" as Doublecircle;", l->node->id); 1398*7c478bd9Sstevel@tonic-gate } 1399*7c478bd9Sstevel@tonic-gate if (l->node->fail) { 1400*7c478bd9Sstevel@tonic-gate printf(" edge from \"%d\" to \"%d\" dashed;", 1401*7c478bd9Sstevel@tonic-gate l->node->id, l->node->fail->id); 1402*7c478bd9Sstevel@tonic-gate } 1403*7c478bd9Sstevel@tonic-gate printf("\n"); 1404*7c478bd9Sstevel@tonic-gate pr(l->node); 1405*7c478bd9Sstevel@tonic-gate } 1406*7c478bd9Sstevel@tonic-gate } 1407*7c478bd9Sstevel@tonic-gate #endif 1408*7c478bd9Sstevel@tonic-gate 1409*7c478bd9Sstevel@tonic-gate static void 1410*7c478bd9Sstevel@tonic-gate fail(Node *root) 1411*7c478bd9Sstevel@tonic-gate { 1412*7c478bd9Sstevel@tonic-gate Link *qhead = NULL, *qtail = NULL; 1413*7c478bd9Sstevel@tonic-gate Link *l, *ll; 1414*7c478bd9Sstevel@tonic-gate Link *t; 1415*7c478bd9Sstevel@tonic-gate Node *state, *r, *s; 1416*7c478bd9Sstevel@tonic-gate int a; 1417*7c478bd9Sstevel@tonic-gate 1418*7c478bd9Sstevel@tonic-gate for (l = root->alts; l; l = l->next) { 1419*7c478bd9Sstevel@tonic-gate ADD(l->node); 1420*7c478bd9Sstevel@tonic-gate l->node->fail = root; 1421*7c478bd9Sstevel@tonic-gate } 1422*7c478bd9Sstevel@tonic-gate while (qhead) { 1423*7c478bd9Sstevel@tonic-gate r = qhead->node; 1424*7c478bd9Sstevel@tonic-gate DEL(); 1425*7c478bd9Sstevel@tonic-gate for (l = r->alts; l; l = l->next) { 1426*7c478bd9Sstevel@tonic-gate s = l->node; 1427*7c478bd9Sstevel@tonic-gate a = l->lit; 1428*7c478bd9Sstevel@tonic-gate ADD(s); 1429*7c478bd9Sstevel@tonic-gate state = r->fail; 1430*7c478bd9Sstevel@tonic-gate while (state) { 1431*7c478bd9Sstevel@tonic-gate for (ll = state->alts; ll; ll = ll->next) 1432*7c478bd9Sstevel@tonic-gate if (ll->lit == a) 1433*7c478bd9Sstevel@tonic-gate break; 1434*7c478bd9Sstevel@tonic-gate if (ll || (state == root)) { 1435*7c478bd9Sstevel@tonic-gate if (ll) 1436*7c478bd9Sstevel@tonic-gate state = ll->node; 1437*7c478bd9Sstevel@tonic-gate /* 1438*7c478bd9Sstevel@tonic-gate * do it here as only other exit is 1439*7c478bd9Sstevel@tonic-gate * state 0 1440*7c478bd9Sstevel@tonic-gate */ 1441*7c478bd9Sstevel@tonic-gate if (state->out) { 1442*7c478bd9Sstevel@tonic-gate s->out = 1; 1443*7c478bd9Sstevel@tonic-gate } 1444*7c478bd9Sstevel@tonic-gate break; 1445*7c478bd9Sstevel@tonic-gate } else 1446*7c478bd9Sstevel@tonic-gate state = state->fail; 1447*7c478bd9Sstevel@tonic-gate } 1448*7c478bd9Sstevel@tonic-gate s->fail = state; 1449*7c478bd9Sstevel@tonic-gate } 1450*7c478bd9Sstevel@tonic-gate } 1451*7c478bd9Sstevel@tonic-gate zeroroot(root, root); 1452*7c478bd9Sstevel@tonic-gate } 1453*7c478bd9Sstevel@tonic-gate 1454*7c478bd9Sstevel@tonic-gate static void 1455*7c478bd9Sstevel@tonic-gate zeroroot(Node *root, Node *n) 1456*7c478bd9Sstevel@tonic-gate { 1457*7c478bd9Sstevel@tonic-gate Link *l; 1458*7c478bd9Sstevel@tonic-gate 1459*7c478bd9Sstevel@tonic-gate if (n->fail == root) 1460*7c478bd9Sstevel@tonic-gate n->fail = NULL; 1461*7c478bd9Sstevel@tonic-gate for (l = n->alts; l; l = l->next) 1462*7c478bd9Sstevel@tonic-gate zeroroot(root, l->node); 1463*7c478bd9Sstevel@tonic-gate } 1464*7c478bd9Sstevel@tonic-gate 1465*7c478bd9Sstevel@tonic-gate static void 1466*7c478bd9Sstevel@tonic-gate shift(re_cw *c) 1467*7c478bd9Sstevel@tonic-gate { 1468*7c478bd9Sstevel@tonic-gate Link *qhead = NULL, *qtail = NULL; 1469*7c478bd9Sstevel@tonic-gate Link *l; 1470*7c478bd9Sstevel@tonic-gate Link *t; 1471*7c478bd9Sstevel@tonic-gate Node *state, *r, *s; 1472*7c478bd9Sstevel@tonic-gate int k; 1473*7c478bd9Sstevel@tonic-gate 1474*7c478bd9Sstevel@tonic-gate for (k = 0; k < 256; k++) 1475*7c478bd9Sstevel@tonic-gate c->step[k] = c->mindepth+1; 1476*7c478bd9Sstevel@tonic-gate c->root->shift1 = 1; 1477*7c478bd9Sstevel@tonic-gate c->root->shift2 = c->mindepth; 1478*7c478bd9Sstevel@tonic-gate for (l = c->root->alts; l; l = l->next) { 1479*7c478bd9Sstevel@tonic-gate l->node->shift2 = c->root->shift2; 1480*7c478bd9Sstevel@tonic-gate ADD(l->node); 1481*7c478bd9Sstevel@tonic-gate l->node->fail = NULL; 1482*7c478bd9Sstevel@tonic-gate } 1483*7c478bd9Sstevel@tonic-gate while (qhead) { 1484*7c478bd9Sstevel@tonic-gate r = qhead->node; 1485*7c478bd9Sstevel@tonic-gate DEL(); 1486*7c478bd9Sstevel@tonic-gate r->shift1 = c->mindepth; 1487*7c478bd9Sstevel@tonic-gate r->shift2 = c->mindepth; 1488*7c478bd9Sstevel@tonic-gate if ((state = r->fail) != NULL) { 1489*7c478bd9Sstevel@tonic-gate do { 1490*7c478bd9Sstevel@tonic-gate k = r->d - state->d; 1491*7c478bd9Sstevel@tonic-gate if (k < state->shift1) { 1492*7c478bd9Sstevel@tonic-gate state->shift1 = k; 1493*7c478bd9Sstevel@tonic-gate } 1494*7c478bd9Sstevel@tonic-gate if (r->out && (k < state->shift2)) { 1495*7c478bd9Sstevel@tonic-gate state->shift2 = k; 1496*7c478bd9Sstevel@tonic-gate } 1497*7c478bd9Sstevel@tonic-gate } while ((state = state->fail) != NULL); 1498*7c478bd9Sstevel@tonic-gate } 1499*7c478bd9Sstevel@tonic-gate for (l = r->alts; l; l = l->next) { 1500*7c478bd9Sstevel@tonic-gate s = l->node; 1501*7c478bd9Sstevel@tonic-gate ADD(s); 1502*7c478bd9Sstevel@tonic-gate } 1503*7c478bd9Sstevel@tonic-gate } 1504*7c478bd9Sstevel@tonic-gate shiftprop(c, c->root); 1505*7c478bd9Sstevel@tonic-gate shifttab(c->root); 1506*7c478bd9Sstevel@tonic-gate c->step[0] = 1; 1507*7c478bd9Sstevel@tonic-gate } 1508*7c478bd9Sstevel@tonic-gate 1509*7c478bd9Sstevel@tonic-gate static void 1510*7c478bd9Sstevel@tonic-gate shifttab(Node *n) 1511*7c478bd9Sstevel@tonic-gate { 1512*7c478bd9Sstevel@tonic-gate Link *l; 1513*7c478bd9Sstevel@tonic-gate Node **nn; 1514*7c478bd9Sstevel@tonic-gate 1515*7c478bd9Sstevel@tonic-gate n->tab = nn = (Node **)egmalloc(256 * sizeof (Node *)); 1516*7c478bd9Sstevel@tonic-gate (void) memset((char *)n->tab, 0, 256 * sizeof (Node *)); 1517*7c478bd9Sstevel@tonic-gate for (l = n->alts; l; l = l->next) 1518*7c478bd9Sstevel@tonic-gate nn[l->lit] = l->node; 1519*7c478bd9Sstevel@tonic-gate } 1520*7c478bd9Sstevel@tonic-gate 1521*7c478bd9Sstevel@tonic-gate static void 1522*7c478bd9Sstevel@tonic-gate shiftprop(re_cw *c, Node *n) 1523*7c478bd9Sstevel@tonic-gate { 1524*7c478bd9Sstevel@tonic-gate Link *l; 1525*7c478bd9Sstevel@tonic-gate Node *nn; 1526*7c478bd9Sstevel@tonic-gate 1527*7c478bd9Sstevel@tonic-gate for (l = n->alts; l; l = l->next) { 1528*7c478bd9Sstevel@tonic-gate if (c->step[l->lit] > l->node->d) 1529*7c478bd9Sstevel@tonic-gate c->step[l->lit] = l->node->d; 1530*7c478bd9Sstevel@tonic-gate nn = l->node; 1531*7c478bd9Sstevel@tonic-gate if (n->shift2 < nn->shift2) 1532*7c478bd9Sstevel@tonic-gate nn->shift2 = n->shift2; 1533*7c478bd9Sstevel@tonic-gate shiftprop(c, nn); 1534*7c478bd9Sstevel@tonic-gate } 1535*7c478bd9Sstevel@tonic-gate } 1536*7c478bd9Sstevel@tonic-gate 1537*7c478bd9Sstevel@tonic-gate static void 1538*7c478bd9Sstevel@tonic-gate re_cwcomp(re_cw *c) 1539*7c478bd9Sstevel@tonic-gate { 1540*7c478bd9Sstevel@tonic-gate fail(c->root); 1541*7c478bd9Sstevel@tonic-gate shift(c); 1542*7c478bd9Sstevel@tonic-gate } 1543*7c478bd9Sstevel@tonic-gate 1544*7c478bd9Sstevel@tonic-gate static BOOL 1545*7c478bd9Sstevel@tonic-gate re_cwexec(PATTERN *pat, uchar_t *rs, uchar_t *re, uchar_t **mb, uchar_t **me) 1546*7c478bd9Sstevel@tonic-gate { 1547*7c478bd9Sstevel@tonic-gate Node *state; 1548*7c478bd9Sstevel@tonic-gate Link *l; 1549*7c478bd9Sstevel@tonic-gate uchar_t *sp; 1550*7c478bd9Sstevel@tonic-gate uchar_t *s; 1551*7c478bd9Sstevel@tonic-gate uchar_t *e; 1552*7c478bd9Sstevel@tonic-gate Node *ostate; 1553*7c478bd9Sstevel@tonic-gate int k; 1554*7c478bd9Sstevel@tonic-gate re_cw *c = pat->cw_ptr; 1555*7c478bd9Sstevel@tonic-gate uchar_t fake[1]; 1556*7c478bd9Sstevel@tonic-gate uchar_t mappedsp; 1557*7c478bd9Sstevel@tonic-gate 1558*7c478bd9Sstevel@tonic-gate fake[0] = 0; 1559*7c478bd9Sstevel@tonic-gate state = c->root; 1560*7c478bd9Sstevel@tonic-gate 1561*7c478bd9Sstevel@tonic-gate s = rs+c->mindepth-1; 1562*7c478bd9Sstevel@tonic-gate e = re; 1563*7c478bd9Sstevel@tonic-gate while (s < e) { 1564*7c478bd9Sstevel@tonic-gate /* scan */ 1565*7c478bd9Sstevel@tonic-gate for (sp = s; (ostate = state) != NULL; ) { 1566*7c478bd9Sstevel@tonic-gate mappedsp = c->cmap[*sp]; 1567*7c478bd9Sstevel@tonic-gate if (state->tab) { 1568*7c478bd9Sstevel@tonic-gate if ((state = state->tab[mappedsp]) == NULL) 1569*7c478bd9Sstevel@tonic-gate goto nomatch; 1570*7c478bd9Sstevel@tonic-gate } else { 1571*7c478bd9Sstevel@tonic-gate for (l = state->alts; ; l = l->next) { 1572*7c478bd9Sstevel@tonic-gate if (l == NULL) 1573*7c478bd9Sstevel@tonic-gate goto nomatch; 1574*7c478bd9Sstevel@tonic-gate if (l->lit == mappedsp) { 1575*7c478bd9Sstevel@tonic-gate state = l->node; 1576*7c478bd9Sstevel@tonic-gate break; 1577*7c478bd9Sstevel@tonic-gate } 1578*7c478bd9Sstevel@tonic-gate } 1579*7c478bd9Sstevel@tonic-gate } 1580*7c478bd9Sstevel@tonic-gate if (state->out) { 1581*7c478bd9Sstevel@tonic-gate *mb = sp; 1582*7c478bd9Sstevel@tonic-gate *me = s+1; 1583*7c478bd9Sstevel@tonic-gate if (fixloc(mb, me)) 1584*7c478bd9Sstevel@tonic-gate return (YES); 1585*7c478bd9Sstevel@tonic-gate } 1586*7c478bd9Sstevel@tonic-gate if (--sp < rs) { 1587*7c478bd9Sstevel@tonic-gate sp = fake; 1588*7c478bd9Sstevel@tonic-gate break; 1589*7c478bd9Sstevel@tonic-gate } 1590*7c478bd9Sstevel@tonic-gate } 1591*7c478bd9Sstevel@tonic-gate nomatch: 1592*7c478bd9Sstevel@tonic-gate k = c->step[c->cmap[*sp]] - ostate->d - 1; 1593*7c478bd9Sstevel@tonic-gate if (k < ostate->shift1) 1594*7c478bd9Sstevel@tonic-gate k = ostate->shift1; 1595*7c478bd9Sstevel@tonic-gate if (k > ostate->shift2) 1596*7c478bd9Sstevel@tonic-gate k = ostate->shift2; 1597*7c478bd9Sstevel@tonic-gate s += k; 1598*7c478bd9Sstevel@tonic-gate state = c->root; 1599*7c478bd9Sstevel@tonic-gate } 1600*7c478bd9Sstevel@tonic-gate return (NO); 1601*7c478bd9Sstevel@tonic-gate } 1602*7c478bd9Sstevel@tonic-gate 1603*7c478bd9Sstevel@tonic-gate static Node * 1604*7c478bd9Sstevel@tonic-gate newnode(re_cw *c, int d) 1605*7c478bd9Sstevel@tonic-gate { 1606*7c478bd9Sstevel@tonic-gate static Node *lim = NULL; 1607*7c478bd9Sstevel@tonic-gate static int incr = 256; 1608*7c478bd9Sstevel@tonic-gate 1609*7c478bd9Sstevel@tonic-gate if (!next_node) lim = NULL; 1610*7c478bd9Sstevel@tonic-gate if (next_node == lim) { 1611*7c478bd9Sstevel@tonic-gate next_node = (Node *)egmalloc(incr * sizeof (Node)); 1612*7c478bd9Sstevel@tonic-gate lim = next_node + incr; 1613*7c478bd9Sstevel@tonic-gate } 1614*7c478bd9Sstevel@tonic-gate next_node->d = d; 1615*7c478bd9Sstevel@tonic-gate if (d > c->maxdepth) c->maxdepth = d; 1616*7c478bd9Sstevel@tonic-gate next_node->id = c->nodeid++; 1617*7c478bd9Sstevel@tonic-gate next_node->alts = NULL; 1618*7c478bd9Sstevel@tonic-gate next_node->tab = NULL; 1619*7c478bd9Sstevel@tonic-gate next_node->out = 0; 1620*7c478bd9Sstevel@tonic-gate return (next_node++); 1621*7c478bd9Sstevel@tonic-gate } 1622*7c478bd9Sstevel@tonic-gate 1623*7c478bd9Sstevel@tonic-gate static Link * 1624*7c478bd9Sstevel@tonic-gate newlink(uchar_t lit, Node *n) 1625*7c478bd9Sstevel@tonic-gate { 1626*7c478bd9Sstevel@tonic-gate static Link *lim = NULL; 1627*7c478bd9Sstevel@tonic-gate static int incr = 256; 1628*7c478bd9Sstevel@tonic-gate 1629*7c478bd9Sstevel@tonic-gate if (!next_link) lim = NULL; 1630*7c478bd9Sstevel@tonic-gate if (next_link == lim) { 1631*7c478bd9Sstevel@tonic-gate next_link = (Link *)egmalloc(incr * sizeof (Link)); 1632*7c478bd9Sstevel@tonic-gate lim = next_link + incr; 1633*7c478bd9Sstevel@tonic-gate } 1634*7c478bd9Sstevel@tonic-gate next_link->lit = lit; 1635*7c478bd9Sstevel@tonic-gate next_link->node = n; 1636*7c478bd9Sstevel@tonic-gate next_link->next = NULL; 1637*7c478bd9Sstevel@tonic-gate return (next_link++); 1638*7c478bd9Sstevel@tonic-gate } 1639*7c478bd9Sstevel@tonic-gate 1640*7c478bd9Sstevel@tonic-gate int 1641*7c478bd9Sstevel@tonic-gate egrep(char *f, FILE *o, char *fo) 1642*7c478bd9Sstevel@tonic-gate { 1643*7c478bd9Sstevel@tonic-gate uchar_t buff[MAXBUFSIZE + ESIZE]; 1644*7c478bd9Sstevel@tonic-gate 1645*7c478bd9Sstevel@tonic-gate buffer = buff; 1646*7c478bd9Sstevel@tonic-gate *buffer++ = NL; /* New line precedes buffer to prevent runover */ 1647*7c478bd9Sstevel@tonic-gate file = f; 1648*7c478bd9Sstevel@tonic-gate output = o; 1649*7c478bd9Sstevel@tonic-gate format = fo; 1650*7c478bd9Sstevel@tonic-gate return (execute()); 1651*7c478bd9Sstevel@tonic-gate } 1652*7c478bd9Sstevel@tonic-gate 1653*7c478bd9Sstevel@tonic-gate static int 1654*7c478bd9Sstevel@tonic-gate execute(void) 1655*7c478bd9Sstevel@tonic-gate { 1656*7c478bd9Sstevel@tonic-gate LINE current; 1657*7c478bd9Sstevel@tonic-gate BOOL matched; 1658*7c478bd9Sstevel@tonic-gate BOOL all_searched; /* file being matched until end */ 1659*7c478bd9Sstevel@tonic-gate 1660*7c478bd9Sstevel@tonic-gate if ((file_desc = open(file, O_RDONLY)) < 0) { 1661*7c478bd9Sstevel@tonic-gate return (-1); 1662*7c478bd9Sstevel@tonic-gate } 1663*7c478bd9Sstevel@tonic-gate init_file(¤t); 1664*7c478bd9Sstevel@tonic-gate /* while there is more get more text from the data stream */ 1665*7c478bd9Sstevel@tonic-gate for (;;) { 1666*7c478bd9Sstevel@tonic-gate all_searched = NO; 1667*7c478bd9Sstevel@tonic-gate 1668*7c478bd9Sstevel@tonic-gate /* 1669*7c478bd9Sstevel@tonic-gate * Find next new-line in buffer. 1670*7c478bd9Sstevel@tonic-gate * Begin after previous new line character 1671*7c478bd9Sstevel@tonic-gate */ 1672*7c478bd9Sstevel@tonic-gate 1673*7c478bd9Sstevel@tonic-gate current.prntbuf = current.newline + 1; 1674*7c478bd9Sstevel@tonic-gate current.ln++; /* increment line number */ 1675*7c478bd9Sstevel@tonic-gate 1676*7c478bd9Sstevel@tonic-gate if (current.prntbuf < bufend) { 1677*7c478bd9Sstevel@tonic-gate /* There is more text in buffer */ 1678*7c478bd9Sstevel@tonic-gate 1679*7c478bd9Sstevel@tonic-gate /* 1680*7c478bd9Sstevel@tonic-gate * Take our next 1681*7c478bd9Sstevel@tonic-gate * "line" as the entire remaining buffer. 1682*7c478bd9Sstevel@tonic-gate * However, if there is more of the file yet 1683*7c478bd9Sstevel@tonic-gate * to be read in, exclude any incomplete 1684*7c478bd9Sstevel@tonic-gate * line at end. 1685*7c478bd9Sstevel@tonic-gate */ 1686*7c478bd9Sstevel@tonic-gate if (file_stat == NO_MORE) { 1687*7c478bd9Sstevel@tonic-gate all_searched = YES; 1688*7c478bd9Sstevel@tonic-gate current.newline = bufend - 1; 1689*7c478bd9Sstevel@tonic-gate if (*current.newline != NL) { 1690*7c478bd9Sstevel@tonic-gate current.newline = bufend; 1691*7c478bd9Sstevel@tonic-gate } 1692*7c478bd9Sstevel@tonic-gate } else { 1693*7c478bd9Sstevel@tonic-gate /* 1694*7c478bd9Sstevel@tonic-gate * Find end of the last 1695*7c478bd9Sstevel@tonic-gate * complete line in the buffer. 1696*7c478bd9Sstevel@tonic-gate */ 1697*7c478bd9Sstevel@tonic-gate current.newline = bufend; 1698*7c478bd9Sstevel@tonic-gate while (*--current.newline != NL) { 1699*7c478bd9Sstevel@tonic-gate } 1700*7c478bd9Sstevel@tonic-gate if (current.newline < current.prntbuf) { 1701*7c478bd9Sstevel@tonic-gate /* end not found */ 1702*7c478bd9Sstevel@tonic-gate current.newline = bufend; 1703*7c478bd9Sstevel@tonic-gate } 1704*7c478bd9Sstevel@tonic-gate } 1705*7c478bd9Sstevel@tonic-gate } else { 1706*7c478bd9Sstevel@tonic-gate /* There is no more text in the buffer. */ 1707*7c478bd9Sstevel@tonic-gate current.newline = bufend; 1708*7c478bd9Sstevel@tonic-gate } 1709*7c478bd9Sstevel@tonic-gate if (current.newline >= bufend) { 1710*7c478bd9Sstevel@tonic-gate /* 1711*7c478bd9Sstevel@tonic-gate * There is no more text in the buffer, 1712*7c478bd9Sstevel@tonic-gate * or no new line was found. 1713*7c478bd9Sstevel@tonic-gate */ 1714*7c478bd9Sstevel@tonic-gate switch (file_stat) { 1715*7c478bd9Sstevel@tonic-gate case MORE: /* file partly unread */ 1716*7c478bd9Sstevel@tonic-gate case BEGIN: 1717*7c478bd9Sstevel@tonic-gate fgetfile(¤t); 1718*7c478bd9Sstevel@tonic-gate 1719*7c478bd9Sstevel@tonic-gate current.ln--; 1720*7c478bd9Sstevel@tonic-gate continue; /* with while loop */ 1721*7c478bd9Sstevel@tonic-gate case NO_MORE: 1722*7c478bd9Sstevel@tonic-gate break; 1723*7c478bd9Sstevel@tonic-gate } 1724*7c478bd9Sstevel@tonic-gate /* Nothing more to read in for this file. */ 1725*7c478bd9Sstevel@tonic-gate if (current.newline <= current.prntbuf) { 1726*7c478bd9Sstevel@tonic-gate /* Nothing in the buffer, either */ 1727*7c478bd9Sstevel@tonic-gate /* We are done with the file. */ 1728*7c478bd9Sstevel@tonic-gate current.ln--; 1729*7c478bd9Sstevel@tonic-gate break; /* out of while loop */ 1730*7c478bd9Sstevel@tonic-gate } 1731*7c478bd9Sstevel@tonic-gate /* There is no NL at the end of the file */ 1732*7c478bd9Sstevel@tonic-gate } 1733*7c478bd9Sstevel@tonic-gate 1734*7c478bd9Sstevel@tonic-gate matched = pattern_match(&match_pattern, ¤t); 1735*7c478bd9Sstevel@tonic-gate if (matched) { 1736*7c478bd9Sstevel@tonic-gate int nc; 1737*7c478bd9Sstevel@tonic-gate 1738*7c478bd9Sstevel@tonic-gate get_ncount(¤t, match_pattern.loc1); 1739*7c478bd9Sstevel@tonic-gate get_line(¤t, match_pattern.loc2); 1740*7c478bd9Sstevel@tonic-gate 1741*7c478bd9Sstevel@tonic-gate nc = current.newline + 1 - current.prntbuf; 1742*7c478bd9Sstevel@tonic-gate (void) fprintf(output, format, file, current.ln); 1743*7c478bd9Sstevel@tonic-gate (void) fwrite((char *)current.prntbuf, 1, nc, output); 1744*7c478bd9Sstevel@tonic-gate } else { 1745*7c478bd9Sstevel@tonic-gate if (all_searched) 1746*7c478bd9Sstevel@tonic-gate break; /* out of while loop */ 1747*7c478bd9Sstevel@tonic-gate 1748*7c478bd9Sstevel@tonic-gate get_ncount(¤t, current.newline + 1); 1749*7c478bd9Sstevel@tonic-gate } 1750*7c478bd9Sstevel@tonic-gate } 1751*7c478bd9Sstevel@tonic-gate 1752*7c478bd9Sstevel@tonic-gate (void) close(file_desc); 1753*7c478bd9Sstevel@tonic-gate return (0); 1754*7c478bd9Sstevel@tonic-gate } 1755*7c478bd9Sstevel@tonic-gate 1756*7c478bd9Sstevel@tonic-gate static void 1757*7c478bd9Sstevel@tonic-gate init_file(LINE *cur_ptr) 1758*7c478bd9Sstevel@tonic-gate { 1759*7c478bd9Sstevel@tonic-gate file_stat = BEGIN; 1760*7c478bd9Sstevel@tonic-gate cur_ptr->ln = 0; 1761*7c478bd9Sstevel@tonic-gate bufend = buffer; 1762*7c478bd9Sstevel@tonic-gate cur_ptr->newline = buffer - 1; 1763*7c478bd9Sstevel@tonic-gate } 1764*7c478bd9Sstevel@tonic-gate 1765*7c478bd9Sstevel@tonic-gate static BOOL 1766*7c478bd9Sstevel@tonic-gate pattern_match(PATTERN *pat, LINE *lptr) 1767*7c478bd9Sstevel@tonic-gate { 1768*7c478bd9Sstevel@tonic-gate if ((*pat->procfn)(pat, lptr->prntbuf - 1, lptr->newline + 1, 1769*7c478bd9Sstevel@tonic-gate &pat->loc1, &pat->loc2)) { 1770*7c478bd9Sstevel@tonic-gate return (YES); 1771*7c478bd9Sstevel@tonic-gate } else { 1772*7c478bd9Sstevel@tonic-gate pat->loc1 = lptr->prntbuf; 1773*7c478bd9Sstevel@tonic-gate pat->loc2 = lptr->newline - 1; 1774*7c478bd9Sstevel@tonic-gate return (NO); 1775*7c478bd9Sstevel@tonic-gate } 1776*7c478bd9Sstevel@tonic-gate } /* end of function pattern_match() */ 1777*7c478bd9Sstevel@tonic-gate 1778*7c478bd9Sstevel@tonic-gate static void 1779*7c478bd9Sstevel@tonic-gate fgetfile(LINE *cur_ptr) 1780*7c478bd9Sstevel@tonic-gate { 1781*7c478bd9Sstevel@tonic-gate /* 1782*7c478bd9Sstevel@tonic-gate * This function reads as much of the current file into the buffer 1783*7c478bd9Sstevel@tonic-gate * as will fit. 1784*7c478bd9Sstevel@tonic-gate */ 1785*7c478bd9Sstevel@tonic-gate int bytes; /* bytes read in current buffer */ 1786*7c478bd9Sstevel@tonic-gate int bufsize = MAXBUFSIZE; /* free space in data buffer */ 1787*7c478bd9Sstevel@tonic-gate int save_current; 1788*7c478bd9Sstevel@tonic-gate uchar_t *begin = cur_ptr->prntbuf; 1789*7c478bd9Sstevel@tonic-gate 1790*7c478bd9Sstevel@tonic-gate /* 1791*7c478bd9Sstevel@tonic-gate * Bytes of current incomplete line, if any, save_current in buffer. 1792*7c478bd9Sstevel@tonic-gate * These must be saved. 1793*7c478bd9Sstevel@tonic-gate */ 1794*7c478bd9Sstevel@tonic-gate save_current = (int)(bufend - begin); 1795*7c478bd9Sstevel@tonic-gate 1796*7c478bd9Sstevel@tonic-gate if (file_stat == MORE) { 1797*7c478bd9Sstevel@tonic-gate /* 1798*7c478bd9Sstevel@tonic-gate * A portion of the file fills the buffer. We must clear 1799*7c478bd9Sstevel@tonic-gate * out the dead wood to make room for more of the file. 1800*7c478bd9Sstevel@tonic-gate */ 1801*7c478bd9Sstevel@tonic-gate 1802*7c478bd9Sstevel@tonic-gate int k = 0; 1803*7c478bd9Sstevel@tonic-gate 1804*7c478bd9Sstevel@tonic-gate k = begin - buffer; 1805*7c478bd9Sstevel@tonic-gate if (!k) { 1806*7c478bd9Sstevel@tonic-gate /* 1807*7c478bd9Sstevel@tonic-gate * We have one humungous current line, 1808*7c478bd9Sstevel@tonic-gate * which fills the whole buffer. 1809*7c478bd9Sstevel@tonic-gate * Toss it. 1810*7c478bd9Sstevel@tonic-gate */ 1811*7c478bd9Sstevel@tonic-gate begin = bufend; 1812*7c478bd9Sstevel@tonic-gate k = begin - buffer; 1813*7c478bd9Sstevel@tonic-gate save_current = 0; 1814*7c478bd9Sstevel@tonic-gate } 1815*7c478bd9Sstevel@tonic-gate 1816*7c478bd9Sstevel@tonic-gate begin = buffer; 1817*7c478bd9Sstevel@tonic-gate bufend = begin + save_current; 1818*7c478bd9Sstevel@tonic-gate 1819*7c478bd9Sstevel@tonic-gate bufsize = MAXBUFSIZE - save_current; 1820*7c478bd9Sstevel@tonic-gate 1821*7c478bd9Sstevel@tonic-gate if (save_current > 0) { 1822*7c478bd9Sstevel@tonic-gate /* 1823*7c478bd9Sstevel@tonic-gate * Must save portion of current line. 1824*7c478bd9Sstevel@tonic-gate * Copy to beginning of buffer. 1825*7c478bd9Sstevel@tonic-gate */ 1826*7c478bd9Sstevel@tonic-gate (void) memmove(buffer, buffer + k, save_current); 1827*7c478bd9Sstevel@tonic-gate } 1828*7c478bd9Sstevel@tonic-gate } 1829*7c478bd9Sstevel@tonic-gate 1830*7c478bd9Sstevel@tonic-gate /* Now read in the file. */ 1831*7c478bd9Sstevel@tonic-gate 1832*7c478bd9Sstevel@tonic-gate do { 1833*7c478bd9Sstevel@tonic-gate bytes = read(file_desc, (char *)bufend, (unsigned int)bufsize); 1834*7c478bd9Sstevel@tonic-gate if (bytes < 0) { 1835*7c478bd9Sstevel@tonic-gate /* can't read any more of file */ 1836*7c478bd9Sstevel@tonic-gate bytes = 0; 1837*7c478bd9Sstevel@tonic-gate } 1838*7c478bd9Sstevel@tonic-gate bufend += bytes; 1839*7c478bd9Sstevel@tonic-gate bufsize -= bytes; 1840*7c478bd9Sstevel@tonic-gate } while (bytes > 0 && bufsize > 0); 1841*7c478bd9Sstevel@tonic-gate 1842*7c478bd9Sstevel@tonic-gate 1843*7c478bd9Sstevel@tonic-gate if (begin >= bufend) { 1844*7c478bd9Sstevel@tonic-gate /* No new lines or incomplete line in buffer */ 1845*7c478bd9Sstevel@tonic-gate file_stat = NO_MORE; 1846*7c478bd9Sstevel@tonic-gate } else if (bufsize) { 1847*7c478bd9Sstevel@tonic-gate /* Still space in the buffer, so we have read entire file */ 1848*7c478bd9Sstevel@tonic-gate file_stat = NO_MORE; 1849*7c478bd9Sstevel@tonic-gate } else { 1850*7c478bd9Sstevel@tonic-gate /* We filled entire buffer, so there may be more yet to read */ 1851*7c478bd9Sstevel@tonic-gate file_stat = MORE; 1852*7c478bd9Sstevel@tonic-gate } 1853*7c478bd9Sstevel@tonic-gate /* Note: bufend is 1 past last good char */ 1854*7c478bd9Sstevel@tonic-gate *bufend = NL; /* Sentinel new-line character */ 1855*7c478bd9Sstevel@tonic-gate /* Set newline to character preceding the current line */ 1856*7c478bd9Sstevel@tonic-gate cur_ptr->newline = begin - 1; 1857*7c478bd9Sstevel@tonic-gate } 1858*7c478bd9Sstevel@tonic-gate 1859*7c478bd9Sstevel@tonic-gate static void 1860*7c478bd9Sstevel@tonic-gate get_line(LINE *cur_ptr, uchar_t *s) 1861*7c478bd9Sstevel@tonic-gate { 1862*7c478bd9Sstevel@tonic-gate while (*s++ != NL) { 1863*7c478bd9Sstevel@tonic-gate } 1864*7c478bd9Sstevel@tonic-gate cur_ptr->newline = --s; 1865*7c478bd9Sstevel@tonic-gate cur_ptr->ln++; 1866*7c478bd9Sstevel@tonic-gate } 1867*7c478bd9Sstevel@tonic-gate 1868*7c478bd9Sstevel@tonic-gate static void 1869*7c478bd9Sstevel@tonic-gate get_ncount(LINE *cur_ptr, uchar_t *s) 1870*7c478bd9Sstevel@tonic-gate { 1871*7c478bd9Sstevel@tonic-gate uchar_t *p = cur_ptr->prntbuf; 1872*7c478bd9Sstevel@tonic-gate 1873*7c478bd9Sstevel@tonic-gate while (*--s != NL) { 1874*7c478bd9Sstevel@tonic-gate } 1875*7c478bd9Sstevel@tonic-gate cur_ptr->newline = s++; 1876*7c478bd9Sstevel@tonic-gate while ((s > p) && 1877*7c478bd9Sstevel@tonic-gate (p = (uchar_t *)memchr((char *)p, NL, s - p)) != NULL) { 1878*7c478bd9Sstevel@tonic-gate cur_ptr->ln++; 1879*7c478bd9Sstevel@tonic-gate ++p; 1880*7c478bd9Sstevel@tonic-gate } 1881*7c478bd9Sstevel@tonic-gate cur_ptr->ln--; 1882*7c478bd9Sstevel@tonic-gate cur_ptr->prntbuf = s; 1883*7c478bd9Sstevel@tonic-gate } 1884