1*7c478bd9Sstevel@tonic-gate /* 2*7c478bd9Sstevel@tonic-gate * CDDL HEADER START 3*7c478bd9Sstevel@tonic-gate * 4*7c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*7c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*7c478bd9Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*7c478bd9Sstevel@tonic-gate * with the License. 8*7c478bd9Sstevel@tonic-gate * 9*7c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*7c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*7c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 12*7c478bd9Sstevel@tonic-gate * and limitations under the License. 13*7c478bd9Sstevel@tonic-gate * 14*7c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*7c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*7c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*7c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*7c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*7c478bd9Sstevel@tonic-gate * 20*7c478bd9Sstevel@tonic-gate * CDDL HEADER END 21*7c478bd9Sstevel@tonic-gate */ 22*7c478bd9Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 23*7c478bd9Sstevel@tonic-gate /* All Rights Reserved */ 24*7c478bd9Sstevel@tonic-gate 25*7c478bd9Sstevel@tonic-gate 26*7c478bd9Sstevel@tonic-gate /* 27*7c478bd9Sstevel@tonic-gate * Copyright 2003 Sun Microsystems, Inc. All rights reserved. 28*7c478bd9Sstevel@tonic-gate * Use is subject to license terms. 29*7c478bd9Sstevel@tonic-gate */ 30*7c478bd9Sstevel@tonic-gate 31*7c478bd9Sstevel@tonic-gate #ident "%Z%%M% %I% %E% SMI" /* SVr4.0 2.11 */ 32*7c478bd9Sstevel@tonic-gate 33*7c478bd9Sstevel@tonic-gate #define DEBUG 34*7c478bd9Sstevel@tonic-gate 35*7c478bd9Sstevel@tonic-gate #include "awk.h" 36*7c478bd9Sstevel@tonic-gate #include <ctype.h> 37*7c478bd9Sstevel@tonic-gate #include <stdio.h> 38*7c478bd9Sstevel@tonic-gate #include "y.tab.h" 39*7c478bd9Sstevel@tonic-gate 40*7c478bd9Sstevel@tonic-gate #define HAT (NCHARS-1) /* matches ^ in regular expr */ 41*7c478bd9Sstevel@tonic-gate /* NCHARS is 2**n */ 42*7c478bd9Sstevel@tonic-gate 43*7c478bd9Sstevel@tonic-gate #define type(v) (v)->nobj 44*7c478bd9Sstevel@tonic-gate #define left(v) (v)->narg[0] 45*7c478bd9Sstevel@tonic-gate #define right(v) (v)->narg[1] 46*7c478bd9Sstevel@tonic-gate #define parent(v) (v)->nnext 47*7c478bd9Sstevel@tonic-gate 48*7c478bd9Sstevel@tonic-gate #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL: 49*7c478bd9Sstevel@tonic-gate #define UNARY case STAR: case PLUS: case QUEST: 50*7c478bd9Sstevel@tonic-gate 51*7c478bd9Sstevel@tonic-gate /* encoding in tree Nodes: 52*7c478bd9Sstevel@tonic-gate leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL): 53*7c478bd9Sstevel@tonic-gate left is index, right contains value or pointer to value 54*7c478bd9Sstevel@tonic-gate unary (STAR, PLUS, QUEST): left is child, right is null 55*7c478bd9Sstevel@tonic-gate binary (CAT, OR): left and right are children 56*7c478bd9Sstevel@tonic-gate parent contains pointer to parent 57*7c478bd9Sstevel@tonic-gate */ 58*7c478bd9Sstevel@tonic-gate 59*7c478bd9Sstevel@tonic-gate 60*7c478bd9Sstevel@tonic-gate uchar chars[RECSIZE]; 61*7c478bd9Sstevel@tonic-gate int setvec[RECSIZE]; 62*7c478bd9Sstevel@tonic-gate int tmpset[RECSIZE]; 63*7c478bd9Sstevel@tonic-gate Node *point[RECSIZE]; 64*7c478bd9Sstevel@tonic-gate 65*7c478bd9Sstevel@tonic-gate int rtok; /* next token in current re */ 66*7c478bd9Sstevel@tonic-gate int rlxval; 67*7c478bd9Sstevel@tonic-gate uchar *rlxstr; 68*7c478bd9Sstevel@tonic-gate uchar *prestr; /* current position in current re */ 69*7c478bd9Sstevel@tonic-gate uchar *lastre; /* origin of last re */ 70*7c478bd9Sstevel@tonic-gate 71*7c478bd9Sstevel@tonic-gate static int setcnt; 72*7c478bd9Sstevel@tonic-gate static int poscnt; 73*7c478bd9Sstevel@tonic-gate 74*7c478bd9Sstevel@tonic-gate uchar *patbeg; 75*7c478bd9Sstevel@tonic-gate int patlen; 76*7c478bd9Sstevel@tonic-gate 77*7c478bd9Sstevel@tonic-gate #define NFA 20 /* cache this many dynamic fa's */ 78*7c478bd9Sstevel@tonic-gate fa *fatab[NFA]; 79*7c478bd9Sstevel@tonic-gate int nfatab = 0; /* entries in fatab */ 80*7c478bd9Sstevel@tonic-gate fa *mkdfa(); 81*7c478bd9Sstevel@tonic-gate 82*7c478bd9Sstevel@tonic-gate fa *makedfa(s, anchor) /* returns dfa for reg expr s */ 83*7c478bd9Sstevel@tonic-gate uchar *s; 84*7c478bd9Sstevel@tonic-gate int anchor; 85*7c478bd9Sstevel@tonic-gate { 86*7c478bd9Sstevel@tonic-gate int i, use, nuse; 87*7c478bd9Sstevel@tonic-gate fa *pfa; 88*7c478bd9Sstevel@tonic-gate 89*7c478bd9Sstevel@tonic-gate if (compile_time) /* a constant for sure */ 90*7c478bd9Sstevel@tonic-gate return mkdfa(s, anchor); 91*7c478bd9Sstevel@tonic-gate for (i = 0; i < nfatab; i++) /* is it there already? */ 92*7c478bd9Sstevel@tonic-gate if (fatab[i]->anchor == anchor && strcmp(fatab[i]->restr,s) == 0) { 93*7c478bd9Sstevel@tonic-gate fatab[i]->use++; 94*7c478bd9Sstevel@tonic-gate return fatab[i]; 95*7c478bd9Sstevel@tonic-gate } 96*7c478bd9Sstevel@tonic-gate pfa = mkdfa(s, anchor); 97*7c478bd9Sstevel@tonic-gate if (nfatab < NFA) { /* room for another */ 98*7c478bd9Sstevel@tonic-gate fatab[nfatab] = pfa; 99*7c478bd9Sstevel@tonic-gate fatab[nfatab]->use = 1; 100*7c478bd9Sstevel@tonic-gate nfatab++; 101*7c478bd9Sstevel@tonic-gate return pfa; 102*7c478bd9Sstevel@tonic-gate } 103*7c478bd9Sstevel@tonic-gate use = fatab[0]->use; /* replace least-recently used */ 104*7c478bd9Sstevel@tonic-gate nuse = 0; 105*7c478bd9Sstevel@tonic-gate for (i = 1; i < nfatab; i++) 106*7c478bd9Sstevel@tonic-gate if (fatab[i]->use < use) { 107*7c478bd9Sstevel@tonic-gate use = fatab[i]->use; 108*7c478bd9Sstevel@tonic-gate nuse = i; 109*7c478bd9Sstevel@tonic-gate } 110*7c478bd9Sstevel@tonic-gate freefa(fatab[nuse]); 111*7c478bd9Sstevel@tonic-gate fatab[nuse] = pfa; 112*7c478bd9Sstevel@tonic-gate pfa->use = 1; 113*7c478bd9Sstevel@tonic-gate return pfa; 114*7c478bd9Sstevel@tonic-gate } 115*7c478bd9Sstevel@tonic-gate 116*7c478bd9Sstevel@tonic-gate fa *mkdfa(s, anchor) /* does the real work of making a dfa */ 117*7c478bd9Sstevel@tonic-gate uchar *s; 118*7c478bd9Sstevel@tonic-gate int anchor; /* anchor = 1 for anchored matches, else 0 */ 119*7c478bd9Sstevel@tonic-gate { 120*7c478bd9Sstevel@tonic-gate Node *p, *p1, *reparse(); 121*7c478bd9Sstevel@tonic-gate fa *f; 122*7c478bd9Sstevel@tonic-gate 123*7c478bd9Sstevel@tonic-gate p = reparse(s); 124*7c478bd9Sstevel@tonic-gate p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p); 125*7c478bd9Sstevel@tonic-gate /* put ALL STAR in front of reg. exp. */ 126*7c478bd9Sstevel@tonic-gate p1 = op2(CAT, p1, op2(FINAL, NIL, NIL)); 127*7c478bd9Sstevel@tonic-gate /* put FINAL after reg. exp. */ 128*7c478bd9Sstevel@tonic-gate 129*7c478bd9Sstevel@tonic-gate poscnt = 0; 130*7c478bd9Sstevel@tonic-gate penter(p1); /* enter parent pointers and leaf indices */ 131*7c478bd9Sstevel@tonic-gate if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL) 132*7c478bd9Sstevel@tonic-gate overflo("no room for fa"); 133*7c478bd9Sstevel@tonic-gate f->accept = poscnt-1; /* penter has computed number of positions in re */ 134*7c478bd9Sstevel@tonic-gate cfoll(f, p1); /* set up follow sets */ 135*7c478bd9Sstevel@tonic-gate freetr(p1); 136*7c478bd9Sstevel@tonic-gate if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL) 137*7c478bd9Sstevel@tonic-gate overflo("out of space in makedfa"); 138*7c478bd9Sstevel@tonic-gate if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL) 139*7c478bd9Sstevel@tonic-gate overflo("out of space in makedfa"); 140*7c478bd9Sstevel@tonic-gate *f->posns[1] = 0; 141*7c478bd9Sstevel@tonic-gate f->initstat = makeinit(f, anchor); 142*7c478bd9Sstevel@tonic-gate f->anchor = anchor; 143*7c478bd9Sstevel@tonic-gate f->restr = tostring(s); 144*7c478bd9Sstevel@tonic-gate return f; 145*7c478bd9Sstevel@tonic-gate } 146*7c478bd9Sstevel@tonic-gate 147*7c478bd9Sstevel@tonic-gate int makeinit(f, anchor) 148*7c478bd9Sstevel@tonic-gate fa *f; 149*7c478bd9Sstevel@tonic-gate int anchor; 150*7c478bd9Sstevel@tonic-gate { 151*7c478bd9Sstevel@tonic-gate register int i, k; 152*7c478bd9Sstevel@tonic-gate 153*7c478bd9Sstevel@tonic-gate f->curstat = 2; 154*7c478bd9Sstevel@tonic-gate f->out[2] = 0; 155*7c478bd9Sstevel@tonic-gate f->reset = 0; 156*7c478bd9Sstevel@tonic-gate k = *(f->re[0].lfollow); 157*7c478bd9Sstevel@tonic-gate xfree(f->posns[2]); 158*7c478bd9Sstevel@tonic-gate if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) 159*7c478bd9Sstevel@tonic-gate overflo("out of space in makeinit"); 160*7c478bd9Sstevel@tonic-gate for (i=0; i<=k; i++) { 161*7c478bd9Sstevel@tonic-gate (f->posns[2])[i] = (f->re[0].lfollow)[i]; 162*7c478bd9Sstevel@tonic-gate } 163*7c478bd9Sstevel@tonic-gate if ((f->posns[2])[1] == f->accept) 164*7c478bd9Sstevel@tonic-gate f->out[2] = 1; 165*7c478bd9Sstevel@tonic-gate for (i=0; i<NCHARS; i++) 166*7c478bd9Sstevel@tonic-gate f->gototab[2][i] = 0; 167*7c478bd9Sstevel@tonic-gate f->curstat = cgoto(f, 2, HAT); 168*7c478bd9Sstevel@tonic-gate if (anchor) { 169*7c478bd9Sstevel@tonic-gate *f->posns[2] = k-1; /* leave out position 0 */ 170*7c478bd9Sstevel@tonic-gate for (i=0; i<k; i++) { 171*7c478bd9Sstevel@tonic-gate (f->posns[0])[i] = (f->posns[2])[i]; 172*7c478bd9Sstevel@tonic-gate } 173*7c478bd9Sstevel@tonic-gate 174*7c478bd9Sstevel@tonic-gate f->out[0] = f->out[2]; 175*7c478bd9Sstevel@tonic-gate if (f->curstat != 2) 176*7c478bd9Sstevel@tonic-gate --(*f->posns[f->curstat]); 177*7c478bd9Sstevel@tonic-gate } 178*7c478bd9Sstevel@tonic-gate return f->curstat; 179*7c478bd9Sstevel@tonic-gate } 180*7c478bd9Sstevel@tonic-gate 181*7c478bd9Sstevel@tonic-gate penter(p) /* set up parent pointers and leaf indices */ 182*7c478bd9Sstevel@tonic-gate Node *p; 183*7c478bd9Sstevel@tonic-gate { 184*7c478bd9Sstevel@tonic-gate switch(type(p)) { 185*7c478bd9Sstevel@tonic-gate LEAF 186*7c478bd9Sstevel@tonic-gate left(p) = (Node *) poscnt; 187*7c478bd9Sstevel@tonic-gate point[poscnt++] = p; 188*7c478bd9Sstevel@tonic-gate break; 189*7c478bd9Sstevel@tonic-gate UNARY 190*7c478bd9Sstevel@tonic-gate penter(left(p)); 191*7c478bd9Sstevel@tonic-gate parent(left(p)) = p; 192*7c478bd9Sstevel@tonic-gate break; 193*7c478bd9Sstevel@tonic-gate case CAT: 194*7c478bd9Sstevel@tonic-gate case OR: 195*7c478bd9Sstevel@tonic-gate penter(left(p)); 196*7c478bd9Sstevel@tonic-gate penter(right(p)); 197*7c478bd9Sstevel@tonic-gate parent(left(p)) = p; 198*7c478bd9Sstevel@tonic-gate parent(right(p)) = p; 199*7c478bd9Sstevel@tonic-gate break; 200*7c478bd9Sstevel@tonic-gate default: 201*7c478bd9Sstevel@tonic-gate ERROR "unknown type %d in penter", type(p) FATAL; 202*7c478bd9Sstevel@tonic-gate break; 203*7c478bd9Sstevel@tonic-gate } 204*7c478bd9Sstevel@tonic-gate } 205*7c478bd9Sstevel@tonic-gate 206*7c478bd9Sstevel@tonic-gate freetr(p) /* free parse tree */ 207*7c478bd9Sstevel@tonic-gate Node *p; 208*7c478bd9Sstevel@tonic-gate { 209*7c478bd9Sstevel@tonic-gate switch (type(p)) { 210*7c478bd9Sstevel@tonic-gate LEAF 211*7c478bd9Sstevel@tonic-gate xfree(p); 212*7c478bd9Sstevel@tonic-gate break; 213*7c478bd9Sstevel@tonic-gate UNARY 214*7c478bd9Sstevel@tonic-gate freetr(left(p)); 215*7c478bd9Sstevel@tonic-gate xfree(p); 216*7c478bd9Sstevel@tonic-gate break; 217*7c478bd9Sstevel@tonic-gate case CAT: 218*7c478bd9Sstevel@tonic-gate case OR: 219*7c478bd9Sstevel@tonic-gate freetr(left(p)); 220*7c478bd9Sstevel@tonic-gate freetr(right(p)); 221*7c478bd9Sstevel@tonic-gate xfree(p); 222*7c478bd9Sstevel@tonic-gate break; 223*7c478bd9Sstevel@tonic-gate default: 224*7c478bd9Sstevel@tonic-gate ERROR "unknown type %d in freetr", type(p) FATAL; 225*7c478bd9Sstevel@tonic-gate break; 226*7c478bd9Sstevel@tonic-gate } 227*7c478bd9Sstevel@tonic-gate } 228*7c478bd9Sstevel@tonic-gate 229*7c478bd9Sstevel@tonic-gate uchar *cclenter(p) 230*7c478bd9Sstevel@tonic-gate register uchar *p; 231*7c478bd9Sstevel@tonic-gate { 232*7c478bd9Sstevel@tonic-gate register int i, c; 233*7c478bd9Sstevel@tonic-gate uchar *op; 234*7c478bd9Sstevel@tonic-gate 235*7c478bd9Sstevel@tonic-gate op = p; 236*7c478bd9Sstevel@tonic-gate i = 0; 237*7c478bd9Sstevel@tonic-gate while ((c = *p++) != 0) { 238*7c478bd9Sstevel@tonic-gate if (c == '\\') { 239*7c478bd9Sstevel@tonic-gate if ((c = *p++) == 't') 240*7c478bd9Sstevel@tonic-gate c = '\t'; 241*7c478bd9Sstevel@tonic-gate else if (c == 'n') 242*7c478bd9Sstevel@tonic-gate c = '\n'; 243*7c478bd9Sstevel@tonic-gate else if (c == 'f') 244*7c478bd9Sstevel@tonic-gate c = '\f'; 245*7c478bd9Sstevel@tonic-gate else if (c == 'r') 246*7c478bd9Sstevel@tonic-gate c = '\r'; 247*7c478bd9Sstevel@tonic-gate else if (c == 'b') 248*7c478bd9Sstevel@tonic-gate c = '\b'; 249*7c478bd9Sstevel@tonic-gate else if (c == '\\') 250*7c478bd9Sstevel@tonic-gate c = '\\'; 251*7c478bd9Sstevel@tonic-gate else if (isdigit(c)) { 252*7c478bd9Sstevel@tonic-gate int n = c - '0'; 253*7c478bd9Sstevel@tonic-gate if (isdigit(*p)) { 254*7c478bd9Sstevel@tonic-gate n = 8 * n + *p++ - '0'; 255*7c478bd9Sstevel@tonic-gate if (isdigit(*p)) 256*7c478bd9Sstevel@tonic-gate n = 8 * n + *p++ - '0'; 257*7c478bd9Sstevel@tonic-gate } 258*7c478bd9Sstevel@tonic-gate c = n; 259*7c478bd9Sstevel@tonic-gate } /* else */ 260*7c478bd9Sstevel@tonic-gate /* c = c; */ 261*7c478bd9Sstevel@tonic-gate } else if (c == '-' && i > 0 && chars[i-1] != 0) { 262*7c478bd9Sstevel@tonic-gate if (*p != 0) { 263*7c478bd9Sstevel@tonic-gate c = chars[i-1]; 264*7c478bd9Sstevel@tonic-gate while ((uchar) c < *p) { /* fails if *p is \\ */ 265*7c478bd9Sstevel@tonic-gate if (i >= RECSIZE-1) 266*7c478bd9Sstevel@tonic-gate overflo("character class too big"); 267*7c478bd9Sstevel@tonic-gate chars[i++] = ++c; 268*7c478bd9Sstevel@tonic-gate } 269*7c478bd9Sstevel@tonic-gate p++; 270*7c478bd9Sstevel@tonic-gate continue; 271*7c478bd9Sstevel@tonic-gate } 272*7c478bd9Sstevel@tonic-gate } 273*7c478bd9Sstevel@tonic-gate if (i >= RECSIZE-1) 274*7c478bd9Sstevel@tonic-gate overflo("character class too big"); 275*7c478bd9Sstevel@tonic-gate chars[i++] = c; 276*7c478bd9Sstevel@tonic-gate } 277*7c478bd9Sstevel@tonic-gate chars[i++] = '\0'; 278*7c478bd9Sstevel@tonic-gate dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, chars) ); 279*7c478bd9Sstevel@tonic-gate xfree(op); 280*7c478bd9Sstevel@tonic-gate return(tostring(chars)); 281*7c478bd9Sstevel@tonic-gate } 282*7c478bd9Sstevel@tonic-gate 283*7c478bd9Sstevel@tonic-gate overflo(s) 284*7c478bd9Sstevel@tonic-gate uchar *s; 285*7c478bd9Sstevel@tonic-gate { 286*7c478bd9Sstevel@tonic-gate ERROR "regular expression too big: %s", gettext((char *) s) FATAL; 287*7c478bd9Sstevel@tonic-gate } 288*7c478bd9Sstevel@tonic-gate 289*7c478bd9Sstevel@tonic-gate cfoll(f, v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */ 290*7c478bd9Sstevel@tonic-gate fa *f; 291*7c478bd9Sstevel@tonic-gate register Node *v; 292*7c478bd9Sstevel@tonic-gate { 293*7c478bd9Sstevel@tonic-gate register int i; 294*7c478bd9Sstevel@tonic-gate register int *p; 295*7c478bd9Sstevel@tonic-gate 296*7c478bd9Sstevel@tonic-gate switch(type(v)) { 297*7c478bd9Sstevel@tonic-gate LEAF 298*7c478bd9Sstevel@tonic-gate f->re[(int) left(v)].ltype = type(v); 299*7c478bd9Sstevel@tonic-gate f->re[(int) left(v)].lval = (int) right(v); 300*7c478bd9Sstevel@tonic-gate for (i=0; i<=f->accept; i++) 301*7c478bd9Sstevel@tonic-gate setvec[i] = 0; 302*7c478bd9Sstevel@tonic-gate setcnt = 0; 303*7c478bd9Sstevel@tonic-gate follow(v); /* computes setvec and setcnt */ 304*7c478bd9Sstevel@tonic-gate if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL) 305*7c478bd9Sstevel@tonic-gate overflo("follow set overflow"); 306*7c478bd9Sstevel@tonic-gate f->re[(int) left(v)].lfollow = p; 307*7c478bd9Sstevel@tonic-gate *p = setcnt; 308*7c478bd9Sstevel@tonic-gate for (i = f->accept; i >= 0; i--) 309*7c478bd9Sstevel@tonic-gate if (setvec[i] == 1) *++p = i; 310*7c478bd9Sstevel@tonic-gate break; 311*7c478bd9Sstevel@tonic-gate UNARY 312*7c478bd9Sstevel@tonic-gate cfoll(f,left(v)); 313*7c478bd9Sstevel@tonic-gate break; 314*7c478bd9Sstevel@tonic-gate case CAT: 315*7c478bd9Sstevel@tonic-gate case OR: 316*7c478bd9Sstevel@tonic-gate cfoll(f,left(v)); 317*7c478bd9Sstevel@tonic-gate cfoll(f,right(v)); 318*7c478bd9Sstevel@tonic-gate break; 319*7c478bd9Sstevel@tonic-gate default: 320*7c478bd9Sstevel@tonic-gate ERROR "unknown type %d in cfoll", type(v) FATAL; 321*7c478bd9Sstevel@tonic-gate } 322*7c478bd9Sstevel@tonic-gate } 323*7c478bd9Sstevel@tonic-gate 324*7c478bd9Sstevel@tonic-gate first(p) /* collects initially active leaves of p into setvec */ 325*7c478bd9Sstevel@tonic-gate register Node *p; /* returns 0 or 1 depending on whether p matches empty string */ 326*7c478bd9Sstevel@tonic-gate { 327*7c478bd9Sstevel@tonic-gate register int b; 328*7c478bd9Sstevel@tonic-gate 329*7c478bd9Sstevel@tonic-gate switch(type(p)) { 330*7c478bd9Sstevel@tonic-gate LEAF 331*7c478bd9Sstevel@tonic-gate if (setvec[(int) left(p)] != 1) { 332*7c478bd9Sstevel@tonic-gate setvec[(int) left(p)] = 1; 333*7c478bd9Sstevel@tonic-gate setcnt++; 334*7c478bd9Sstevel@tonic-gate } 335*7c478bd9Sstevel@tonic-gate if (type(p) == CCL && (*(uchar *) right(p)) == '\0') 336*7c478bd9Sstevel@tonic-gate return(0); /* empty CCL */ 337*7c478bd9Sstevel@tonic-gate else return(1); 338*7c478bd9Sstevel@tonic-gate case PLUS: 339*7c478bd9Sstevel@tonic-gate if (first(left(p)) == 0) return(0); 340*7c478bd9Sstevel@tonic-gate return(1); 341*7c478bd9Sstevel@tonic-gate case STAR: 342*7c478bd9Sstevel@tonic-gate case QUEST: 343*7c478bd9Sstevel@tonic-gate first(left(p)); 344*7c478bd9Sstevel@tonic-gate return(0); 345*7c478bd9Sstevel@tonic-gate case CAT: 346*7c478bd9Sstevel@tonic-gate if (first(left(p)) == 0 && first(right(p)) == 0) return(0); 347*7c478bd9Sstevel@tonic-gate return(1); 348*7c478bd9Sstevel@tonic-gate case OR: 349*7c478bd9Sstevel@tonic-gate b = first(right(p)); 350*7c478bd9Sstevel@tonic-gate if (first(left(p)) == 0 || b == 0) return(0); 351*7c478bd9Sstevel@tonic-gate return(1); 352*7c478bd9Sstevel@tonic-gate } 353*7c478bd9Sstevel@tonic-gate ERROR "unknown type %d in first", type(p) FATAL; 354*7c478bd9Sstevel@tonic-gate return(-1); 355*7c478bd9Sstevel@tonic-gate } 356*7c478bd9Sstevel@tonic-gate 357*7c478bd9Sstevel@tonic-gate follow(v) 358*7c478bd9Sstevel@tonic-gate Node *v; /* collects leaves that can follow v into setvec */ 359*7c478bd9Sstevel@tonic-gate { 360*7c478bd9Sstevel@tonic-gate Node *p; 361*7c478bd9Sstevel@tonic-gate 362*7c478bd9Sstevel@tonic-gate if (type(v) == FINAL) 363*7c478bd9Sstevel@tonic-gate return; 364*7c478bd9Sstevel@tonic-gate p = parent(v); 365*7c478bd9Sstevel@tonic-gate switch (type(p)) { 366*7c478bd9Sstevel@tonic-gate case STAR: 367*7c478bd9Sstevel@tonic-gate case PLUS: 368*7c478bd9Sstevel@tonic-gate first(v); 369*7c478bd9Sstevel@tonic-gate follow(p); 370*7c478bd9Sstevel@tonic-gate return; 371*7c478bd9Sstevel@tonic-gate 372*7c478bd9Sstevel@tonic-gate case OR: 373*7c478bd9Sstevel@tonic-gate case QUEST: 374*7c478bd9Sstevel@tonic-gate follow(p); 375*7c478bd9Sstevel@tonic-gate return; 376*7c478bd9Sstevel@tonic-gate 377*7c478bd9Sstevel@tonic-gate case CAT: 378*7c478bd9Sstevel@tonic-gate if (v == left(p)) { /* v is left child of p */ 379*7c478bd9Sstevel@tonic-gate if (first(right(p)) == 0) { 380*7c478bd9Sstevel@tonic-gate follow(p); 381*7c478bd9Sstevel@tonic-gate return; 382*7c478bd9Sstevel@tonic-gate } 383*7c478bd9Sstevel@tonic-gate } 384*7c478bd9Sstevel@tonic-gate else /* v is right child */ 385*7c478bd9Sstevel@tonic-gate follow(p); 386*7c478bd9Sstevel@tonic-gate return; 387*7c478bd9Sstevel@tonic-gate default: 388*7c478bd9Sstevel@tonic-gate ERROR "unknown type %d in follow", type(p) FATAL; 389*7c478bd9Sstevel@tonic-gate break; 390*7c478bd9Sstevel@tonic-gate } 391*7c478bd9Sstevel@tonic-gate } 392*7c478bd9Sstevel@tonic-gate 393*7c478bd9Sstevel@tonic-gate member(c, s) /* is c in s? */ 394*7c478bd9Sstevel@tonic-gate register uchar c, *s; 395*7c478bd9Sstevel@tonic-gate { 396*7c478bd9Sstevel@tonic-gate while (*s) 397*7c478bd9Sstevel@tonic-gate if (c == *s++) 398*7c478bd9Sstevel@tonic-gate return(1); 399*7c478bd9Sstevel@tonic-gate return(0); 400*7c478bd9Sstevel@tonic-gate } 401*7c478bd9Sstevel@tonic-gate 402*7c478bd9Sstevel@tonic-gate 403*7c478bd9Sstevel@tonic-gate match(f, p) 404*7c478bd9Sstevel@tonic-gate register fa *f; 405*7c478bd9Sstevel@tonic-gate register uchar *p; 406*7c478bd9Sstevel@tonic-gate { 407*7c478bd9Sstevel@tonic-gate register int s, ns; 408*7c478bd9Sstevel@tonic-gate 409*7c478bd9Sstevel@tonic-gate s = f->reset?makeinit(f,0):f->initstat; 410*7c478bd9Sstevel@tonic-gate if (f->out[s]) 411*7c478bd9Sstevel@tonic-gate return(1); 412*7c478bd9Sstevel@tonic-gate do { 413*7c478bd9Sstevel@tonic-gate if (ns=f->gototab[s][*p]) 414*7c478bd9Sstevel@tonic-gate s=ns; 415*7c478bd9Sstevel@tonic-gate else 416*7c478bd9Sstevel@tonic-gate s=cgoto(f,s,*p); 417*7c478bd9Sstevel@tonic-gate if (f->out[s]) 418*7c478bd9Sstevel@tonic-gate return(1); 419*7c478bd9Sstevel@tonic-gate } while (*p++ != 0); 420*7c478bd9Sstevel@tonic-gate return(0); 421*7c478bd9Sstevel@tonic-gate } 422*7c478bd9Sstevel@tonic-gate 423*7c478bd9Sstevel@tonic-gate pmatch(f, p) 424*7c478bd9Sstevel@tonic-gate register fa *f; 425*7c478bd9Sstevel@tonic-gate register uchar *p; 426*7c478bd9Sstevel@tonic-gate { 427*7c478bd9Sstevel@tonic-gate register int s, ns; 428*7c478bd9Sstevel@tonic-gate register uchar *q; 429*7c478bd9Sstevel@tonic-gate int i, k; 430*7c478bd9Sstevel@tonic-gate 431*7c478bd9Sstevel@tonic-gate if (f->reset) { 432*7c478bd9Sstevel@tonic-gate f->initstat = s = makeinit(f,1); 433*7c478bd9Sstevel@tonic-gate } else { 434*7c478bd9Sstevel@tonic-gate s = f->initstat; 435*7c478bd9Sstevel@tonic-gate } 436*7c478bd9Sstevel@tonic-gate patbeg = p; 437*7c478bd9Sstevel@tonic-gate patlen = -1; 438*7c478bd9Sstevel@tonic-gate do { 439*7c478bd9Sstevel@tonic-gate q = p; 440*7c478bd9Sstevel@tonic-gate do { 441*7c478bd9Sstevel@tonic-gate if (f->out[s]) /* final state */ 442*7c478bd9Sstevel@tonic-gate patlen = q-p; 443*7c478bd9Sstevel@tonic-gate if (ns=f->gototab[s][*q]) 444*7c478bd9Sstevel@tonic-gate s=ns; 445*7c478bd9Sstevel@tonic-gate else 446*7c478bd9Sstevel@tonic-gate s=cgoto(f,s,*q); 447*7c478bd9Sstevel@tonic-gate if (s==1) /* no transition */ 448*7c478bd9Sstevel@tonic-gate if (patlen >= 0) { 449*7c478bd9Sstevel@tonic-gate patbeg = p; 450*7c478bd9Sstevel@tonic-gate return(1); 451*7c478bd9Sstevel@tonic-gate } 452*7c478bd9Sstevel@tonic-gate else 453*7c478bd9Sstevel@tonic-gate goto nextin; /* no match */ 454*7c478bd9Sstevel@tonic-gate } while (*q++ != 0); 455*7c478bd9Sstevel@tonic-gate if (f->out[s]) 456*7c478bd9Sstevel@tonic-gate patlen = q-p-1; /* don't count $ */ 457*7c478bd9Sstevel@tonic-gate if (patlen >= 0) { 458*7c478bd9Sstevel@tonic-gate patbeg = p; 459*7c478bd9Sstevel@tonic-gate return(1); 460*7c478bd9Sstevel@tonic-gate } 461*7c478bd9Sstevel@tonic-gate nextin: 462*7c478bd9Sstevel@tonic-gate s = 2; 463*7c478bd9Sstevel@tonic-gate if (f->reset) { 464*7c478bd9Sstevel@tonic-gate for (i=2; i<=f->curstat; i++) 465*7c478bd9Sstevel@tonic-gate xfree(f->posns[i]); 466*7c478bd9Sstevel@tonic-gate k = *f->posns[0]; 467*7c478bd9Sstevel@tonic-gate if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) 468*7c478bd9Sstevel@tonic-gate overflo("out of space in pmatch"); 469*7c478bd9Sstevel@tonic-gate for (i=0; i<=k; i++) 470*7c478bd9Sstevel@tonic-gate (f->posns[2])[i] = (f->posns[0])[i]; 471*7c478bd9Sstevel@tonic-gate f->initstat = f->curstat = 2; 472*7c478bd9Sstevel@tonic-gate f->out[2] = f->out[0]; 473*7c478bd9Sstevel@tonic-gate for (i=0; i<NCHARS; i++) 474*7c478bd9Sstevel@tonic-gate f->gototab[2][i] = 0; 475*7c478bd9Sstevel@tonic-gate } 476*7c478bd9Sstevel@tonic-gate } while (*p++ != 0); 477*7c478bd9Sstevel@tonic-gate return (0); 478*7c478bd9Sstevel@tonic-gate } 479*7c478bd9Sstevel@tonic-gate 480*7c478bd9Sstevel@tonic-gate nematch(f, p) 481*7c478bd9Sstevel@tonic-gate register fa *f; 482*7c478bd9Sstevel@tonic-gate register uchar *p; 483*7c478bd9Sstevel@tonic-gate { 484*7c478bd9Sstevel@tonic-gate register int s, ns; 485*7c478bd9Sstevel@tonic-gate register uchar *q; 486*7c478bd9Sstevel@tonic-gate int i, k; 487*7c478bd9Sstevel@tonic-gate 488*7c478bd9Sstevel@tonic-gate if (f->reset) { 489*7c478bd9Sstevel@tonic-gate f->initstat = s = makeinit(f,1); 490*7c478bd9Sstevel@tonic-gate } else { 491*7c478bd9Sstevel@tonic-gate s = f->initstat; 492*7c478bd9Sstevel@tonic-gate } 493*7c478bd9Sstevel@tonic-gate patlen = -1; 494*7c478bd9Sstevel@tonic-gate while (*p) { 495*7c478bd9Sstevel@tonic-gate q = p; 496*7c478bd9Sstevel@tonic-gate do { 497*7c478bd9Sstevel@tonic-gate if (f->out[s]) /* final state */ 498*7c478bd9Sstevel@tonic-gate patlen = q-p; 499*7c478bd9Sstevel@tonic-gate if (ns=f->gototab[s][*q]) 500*7c478bd9Sstevel@tonic-gate s=ns; 501*7c478bd9Sstevel@tonic-gate else 502*7c478bd9Sstevel@tonic-gate s=cgoto(f,s,*q); 503*7c478bd9Sstevel@tonic-gate if (s==1) /* no transition */ 504*7c478bd9Sstevel@tonic-gate if (patlen > 0) { 505*7c478bd9Sstevel@tonic-gate patbeg = p; 506*7c478bd9Sstevel@tonic-gate return(1); 507*7c478bd9Sstevel@tonic-gate } 508*7c478bd9Sstevel@tonic-gate else 509*7c478bd9Sstevel@tonic-gate goto nnextin; /* no nonempty match */ 510*7c478bd9Sstevel@tonic-gate } while (*q++ != 0); 511*7c478bd9Sstevel@tonic-gate if (f->out[s]) 512*7c478bd9Sstevel@tonic-gate patlen = q-p-1; /* don't count $ */ 513*7c478bd9Sstevel@tonic-gate if (patlen > 0 ) { 514*7c478bd9Sstevel@tonic-gate patbeg = p; 515*7c478bd9Sstevel@tonic-gate return(1); 516*7c478bd9Sstevel@tonic-gate } 517*7c478bd9Sstevel@tonic-gate nnextin: 518*7c478bd9Sstevel@tonic-gate s = 2; 519*7c478bd9Sstevel@tonic-gate if (f->reset) { 520*7c478bd9Sstevel@tonic-gate for (i=2; i<=f->curstat; i++) 521*7c478bd9Sstevel@tonic-gate xfree(f->posns[i]); 522*7c478bd9Sstevel@tonic-gate k = *f->posns[0]; 523*7c478bd9Sstevel@tonic-gate if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) 524*7c478bd9Sstevel@tonic-gate overflo("out of state space"); 525*7c478bd9Sstevel@tonic-gate for (i=0; i<=k; i++) 526*7c478bd9Sstevel@tonic-gate (f->posns[2])[i] = (f->posns[0])[i]; 527*7c478bd9Sstevel@tonic-gate f->initstat = f->curstat = 2; 528*7c478bd9Sstevel@tonic-gate f->out[2] = f->out[0]; 529*7c478bd9Sstevel@tonic-gate for (i=0; i<NCHARS; i++) 530*7c478bd9Sstevel@tonic-gate f->gototab[2][i] = 0; 531*7c478bd9Sstevel@tonic-gate } 532*7c478bd9Sstevel@tonic-gate p++; 533*7c478bd9Sstevel@tonic-gate } 534*7c478bd9Sstevel@tonic-gate return (0); 535*7c478bd9Sstevel@tonic-gate } 536*7c478bd9Sstevel@tonic-gate 537*7c478bd9Sstevel@tonic-gate Node *regexp(), *primary(), *concat(), *alt(), *unary(); 538*7c478bd9Sstevel@tonic-gate 539*7c478bd9Sstevel@tonic-gate Node *reparse(p) 540*7c478bd9Sstevel@tonic-gate uchar *p; 541*7c478bd9Sstevel@tonic-gate { 542*7c478bd9Sstevel@tonic-gate /* parses regular expression pointed to by p */ 543*7c478bd9Sstevel@tonic-gate /* uses relex() to scan regular expression */ 544*7c478bd9Sstevel@tonic-gate Node *np; 545*7c478bd9Sstevel@tonic-gate 546*7c478bd9Sstevel@tonic-gate dprintf( ("reparse <%s>\n", p) ); 547*7c478bd9Sstevel@tonic-gate lastre = prestr = p; /* prestr points to string to be parsed */ 548*7c478bd9Sstevel@tonic-gate rtok = relex(); 549*7c478bd9Sstevel@tonic-gate if (rtok == '\0') 550*7c478bd9Sstevel@tonic-gate ERROR "empty regular expression" FATAL; 551*7c478bd9Sstevel@tonic-gate np = regexp(); 552*7c478bd9Sstevel@tonic-gate if (rtok == '\0') 553*7c478bd9Sstevel@tonic-gate return(np); 554*7c478bd9Sstevel@tonic-gate else 555*7c478bd9Sstevel@tonic-gate ERROR "syntax error in regular expression %s at %s", lastre, prestr FATAL; 556*7c478bd9Sstevel@tonic-gate /*NOTREACHED*/ 557*7c478bd9Sstevel@tonic-gate } 558*7c478bd9Sstevel@tonic-gate 559*7c478bd9Sstevel@tonic-gate Node *regexp() 560*7c478bd9Sstevel@tonic-gate { 561*7c478bd9Sstevel@tonic-gate return (alt(concat(primary()))); 562*7c478bd9Sstevel@tonic-gate } 563*7c478bd9Sstevel@tonic-gate 564*7c478bd9Sstevel@tonic-gate Node *primary() 565*7c478bd9Sstevel@tonic-gate { 566*7c478bd9Sstevel@tonic-gate Node *np; 567*7c478bd9Sstevel@tonic-gate 568*7c478bd9Sstevel@tonic-gate switch (rtok) { 569*7c478bd9Sstevel@tonic-gate case CHAR: 570*7c478bd9Sstevel@tonic-gate np = op2(CHAR, NIL, (Node *) rlxval); 571*7c478bd9Sstevel@tonic-gate rtok = relex(); 572*7c478bd9Sstevel@tonic-gate return (unary(np)); 573*7c478bd9Sstevel@tonic-gate case ALL: 574*7c478bd9Sstevel@tonic-gate rtok = relex(); 575*7c478bd9Sstevel@tonic-gate return (unary(op2(ALL, NIL, NIL))); 576*7c478bd9Sstevel@tonic-gate case DOT: 577*7c478bd9Sstevel@tonic-gate rtok = relex(); 578*7c478bd9Sstevel@tonic-gate return (unary(op2(DOT, NIL, NIL))); 579*7c478bd9Sstevel@tonic-gate case CCL: 580*7c478bd9Sstevel@tonic-gate np = op2(CCL, NIL, (Node*) cclenter(rlxstr)); 581*7c478bd9Sstevel@tonic-gate rtok = relex(); 582*7c478bd9Sstevel@tonic-gate return (unary(np)); 583*7c478bd9Sstevel@tonic-gate case NCCL: 584*7c478bd9Sstevel@tonic-gate np = op2(NCCL, NIL, (Node *) cclenter(rlxstr)); 585*7c478bd9Sstevel@tonic-gate rtok = relex(); 586*7c478bd9Sstevel@tonic-gate return (unary(np)); 587*7c478bd9Sstevel@tonic-gate case '^': 588*7c478bd9Sstevel@tonic-gate rtok = relex(); 589*7c478bd9Sstevel@tonic-gate return (unary(op2(CHAR, NIL, (Node *) HAT))); 590*7c478bd9Sstevel@tonic-gate case '$': 591*7c478bd9Sstevel@tonic-gate rtok = relex(); 592*7c478bd9Sstevel@tonic-gate return (unary(op2(CHAR, NIL, NIL))); 593*7c478bd9Sstevel@tonic-gate case '(': 594*7c478bd9Sstevel@tonic-gate rtok = relex(); 595*7c478bd9Sstevel@tonic-gate if (rtok == ')') { /* special pleading for () */ 596*7c478bd9Sstevel@tonic-gate rtok = relex(); 597*7c478bd9Sstevel@tonic-gate return unary(op2(CCL, NIL, (Node *) tostring(""))); 598*7c478bd9Sstevel@tonic-gate } 599*7c478bd9Sstevel@tonic-gate np = regexp(); 600*7c478bd9Sstevel@tonic-gate if (rtok == ')') { 601*7c478bd9Sstevel@tonic-gate rtok = relex(); 602*7c478bd9Sstevel@tonic-gate return (unary(np)); 603*7c478bd9Sstevel@tonic-gate } 604*7c478bd9Sstevel@tonic-gate else 605*7c478bd9Sstevel@tonic-gate ERROR "syntax error in regular expression %s at %s", lastre, prestr FATAL; 606*7c478bd9Sstevel@tonic-gate default: 607*7c478bd9Sstevel@tonic-gate ERROR "illegal primary in regular expression %s at %s", lastre, prestr FATAL; 608*7c478bd9Sstevel@tonic-gate } 609*7c478bd9Sstevel@tonic-gate /*NOTREACHED*/ 610*7c478bd9Sstevel@tonic-gate } 611*7c478bd9Sstevel@tonic-gate 612*7c478bd9Sstevel@tonic-gate Node *concat(np) 613*7c478bd9Sstevel@tonic-gate Node *np; 614*7c478bd9Sstevel@tonic-gate { 615*7c478bd9Sstevel@tonic-gate switch (rtok) { 616*7c478bd9Sstevel@tonic-gate case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(': 617*7c478bd9Sstevel@tonic-gate return (concat(op2(CAT, np, primary()))); 618*7c478bd9Sstevel@tonic-gate default: 619*7c478bd9Sstevel@tonic-gate return (np); 620*7c478bd9Sstevel@tonic-gate } 621*7c478bd9Sstevel@tonic-gate } 622*7c478bd9Sstevel@tonic-gate 623*7c478bd9Sstevel@tonic-gate Node *alt(np) 624*7c478bd9Sstevel@tonic-gate Node *np; 625*7c478bd9Sstevel@tonic-gate { 626*7c478bd9Sstevel@tonic-gate if (rtok == OR) { 627*7c478bd9Sstevel@tonic-gate rtok = relex(); 628*7c478bd9Sstevel@tonic-gate return (alt(op2(OR, np, concat(primary())))); 629*7c478bd9Sstevel@tonic-gate } 630*7c478bd9Sstevel@tonic-gate return (np); 631*7c478bd9Sstevel@tonic-gate } 632*7c478bd9Sstevel@tonic-gate 633*7c478bd9Sstevel@tonic-gate Node *unary(np) 634*7c478bd9Sstevel@tonic-gate Node *np; 635*7c478bd9Sstevel@tonic-gate { 636*7c478bd9Sstevel@tonic-gate switch (rtok) { 637*7c478bd9Sstevel@tonic-gate case STAR: 638*7c478bd9Sstevel@tonic-gate rtok = relex(); 639*7c478bd9Sstevel@tonic-gate return (unary(op2(STAR, np, NIL))); 640*7c478bd9Sstevel@tonic-gate case PLUS: 641*7c478bd9Sstevel@tonic-gate rtok = relex(); 642*7c478bd9Sstevel@tonic-gate return (unary(op2(PLUS, np, NIL))); 643*7c478bd9Sstevel@tonic-gate case QUEST: 644*7c478bd9Sstevel@tonic-gate rtok = relex(); 645*7c478bd9Sstevel@tonic-gate return (unary(op2(QUEST, np, NIL))); 646*7c478bd9Sstevel@tonic-gate default: 647*7c478bd9Sstevel@tonic-gate return (np); 648*7c478bd9Sstevel@tonic-gate } 649*7c478bd9Sstevel@tonic-gate } 650*7c478bd9Sstevel@tonic-gate 651*7c478bd9Sstevel@tonic-gate relex() /* lexical analyzer for reparse */ 652*7c478bd9Sstevel@tonic-gate { 653*7c478bd9Sstevel@tonic-gate register int c; 654*7c478bd9Sstevel@tonic-gate uchar cbuf[RECSIZE]; 655*7c478bd9Sstevel@tonic-gate int clen, cflag; 656*7c478bd9Sstevel@tonic-gate 657*7c478bd9Sstevel@tonic-gate switch (c = *prestr++) { 658*7c478bd9Sstevel@tonic-gate case '|': return OR; 659*7c478bd9Sstevel@tonic-gate case '*': return STAR; 660*7c478bd9Sstevel@tonic-gate case '+': return PLUS; 661*7c478bd9Sstevel@tonic-gate case '?': return QUEST; 662*7c478bd9Sstevel@tonic-gate case '.': return DOT; 663*7c478bd9Sstevel@tonic-gate case '\0': prestr--; return '\0'; 664*7c478bd9Sstevel@tonic-gate case '^': 665*7c478bd9Sstevel@tonic-gate case '$': 666*7c478bd9Sstevel@tonic-gate case '(': 667*7c478bd9Sstevel@tonic-gate case ')': 668*7c478bd9Sstevel@tonic-gate return c; 669*7c478bd9Sstevel@tonic-gate case '\\': 670*7c478bd9Sstevel@tonic-gate if ((c = *prestr++) == 't') 671*7c478bd9Sstevel@tonic-gate c = '\t'; 672*7c478bd9Sstevel@tonic-gate else if (c == 'n') 673*7c478bd9Sstevel@tonic-gate c = '\n'; 674*7c478bd9Sstevel@tonic-gate else if (c == 'f') 675*7c478bd9Sstevel@tonic-gate c = '\f'; 676*7c478bd9Sstevel@tonic-gate else if (c == 'r') 677*7c478bd9Sstevel@tonic-gate c = '\r'; 678*7c478bd9Sstevel@tonic-gate else if (c == 'b') 679*7c478bd9Sstevel@tonic-gate c = '\b'; 680*7c478bd9Sstevel@tonic-gate else if (c == '\\') 681*7c478bd9Sstevel@tonic-gate c = '\\'; 682*7c478bd9Sstevel@tonic-gate else if (isdigit(c)) { 683*7c478bd9Sstevel@tonic-gate int n = c - '0'; 684*7c478bd9Sstevel@tonic-gate if (isdigit(*prestr)) { 685*7c478bd9Sstevel@tonic-gate n = 8 * n + *prestr++ - '0'; 686*7c478bd9Sstevel@tonic-gate if (isdigit(*prestr)) 687*7c478bd9Sstevel@tonic-gate n = 8 * n + *prestr++ - '0'; 688*7c478bd9Sstevel@tonic-gate } 689*7c478bd9Sstevel@tonic-gate c = n; 690*7c478bd9Sstevel@tonic-gate } /* else it's now in c */ 691*7c478bd9Sstevel@tonic-gate rlxval = c; 692*7c478bd9Sstevel@tonic-gate return CHAR; 693*7c478bd9Sstevel@tonic-gate default: 694*7c478bd9Sstevel@tonic-gate rlxval = c; 695*7c478bd9Sstevel@tonic-gate return CHAR; 696*7c478bd9Sstevel@tonic-gate case '[': 697*7c478bd9Sstevel@tonic-gate clen = 0; 698*7c478bd9Sstevel@tonic-gate if (*prestr == '^') { 699*7c478bd9Sstevel@tonic-gate cflag = 1; 700*7c478bd9Sstevel@tonic-gate prestr++; 701*7c478bd9Sstevel@tonic-gate } 702*7c478bd9Sstevel@tonic-gate else 703*7c478bd9Sstevel@tonic-gate cflag = 0; 704*7c478bd9Sstevel@tonic-gate for (;;) { 705*7c478bd9Sstevel@tonic-gate if ((c = *prestr++) == '\\') { 706*7c478bd9Sstevel@tonic-gate cbuf[clen++] = '\\'; 707*7c478bd9Sstevel@tonic-gate if ((c = *prestr++) == '\0') 708*7c478bd9Sstevel@tonic-gate ERROR "nonterminated character class %s", lastre FATAL; 709*7c478bd9Sstevel@tonic-gate cbuf[clen++] = c; 710*7c478bd9Sstevel@tonic-gate } else if (c == ']') { 711*7c478bd9Sstevel@tonic-gate cbuf[clen] = 0; 712*7c478bd9Sstevel@tonic-gate rlxstr = tostring(cbuf); 713*7c478bd9Sstevel@tonic-gate if (cflag == 0) 714*7c478bd9Sstevel@tonic-gate return CCL; 715*7c478bd9Sstevel@tonic-gate else 716*7c478bd9Sstevel@tonic-gate return NCCL; 717*7c478bd9Sstevel@tonic-gate } else if (c == '\n') { 718*7c478bd9Sstevel@tonic-gate ERROR "newline in character class %s...", lastre FATAL; 719*7c478bd9Sstevel@tonic-gate } else if (c == '\0') { 720*7c478bd9Sstevel@tonic-gate ERROR "nonterminated character class %s", lastre FATAL; 721*7c478bd9Sstevel@tonic-gate } else 722*7c478bd9Sstevel@tonic-gate cbuf[clen++] = c; 723*7c478bd9Sstevel@tonic-gate } 724*7c478bd9Sstevel@tonic-gate } 725*7c478bd9Sstevel@tonic-gate } 726*7c478bd9Sstevel@tonic-gate 727*7c478bd9Sstevel@tonic-gate int cgoto(f, s, c) 728*7c478bd9Sstevel@tonic-gate fa *f; 729*7c478bd9Sstevel@tonic-gate int s, c; 730*7c478bd9Sstevel@tonic-gate { 731*7c478bd9Sstevel@tonic-gate register int i, j, k; 732*7c478bd9Sstevel@tonic-gate register int *p, *q; 733*7c478bd9Sstevel@tonic-gate 734*7c478bd9Sstevel@tonic-gate for (i=0; i<=f->accept; i++) 735*7c478bd9Sstevel@tonic-gate setvec[i] = 0; 736*7c478bd9Sstevel@tonic-gate setcnt = 0; 737*7c478bd9Sstevel@tonic-gate /* compute positions of gototab[s,c] into setvec */ 738*7c478bd9Sstevel@tonic-gate p = f->posns[s]; 739*7c478bd9Sstevel@tonic-gate for (i=1; i<=*p; i++) { 740*7c478bd9Sstevel@tonic-gate if ((k = f->re[p[i]].ltype) != FINAL) { 741*7c478bd9Sstevel@tonic-gate if (k == CHAR && c == f->re[p[i]].lval 742*7c478bd9Sstevel@tonic-gate || k == DOT && c != 0 && c != HAT 743*7c478bd9Sstevel@tonic-gate || k == ALL && c != 0 744*7c478bd9Sstevel@tonic-gate || k == CCL && member(c, (uchar *) f->re[p[i]].lval) 745*7c478bd9Sstevel@tonic-gate || k == NCCL && !member(c, (uchar *) f->re[p[i]].lval) && c != 0 && c != HAT) { 746*7c478bd9Sstevel@tonic-gate q = f->re[p[i]].lfollow; 747*7c478bd9Sstevel@tonic-gate for (j=1; j<=*q; j++) { 748*7c478bd9Sstevel@tonic-gate if (setvec[q[j]] == 0) { 749*7c478bd9Sstevel@tonic-gate setcnt++; 750*7c478bd9Sstevel@tonic-gate setvec[q[j]] = 1; 751*7c478bd9Sstevel@tonic-gate } 752*7c478bd9Sstevel@tonic-gate } 753*7c478bd9Sstevel@tonic-gate } 754*7c478bd9Sstevel@tonic-gate } 755*7c478bd9Sstevel@tonic-gate } 756*7c478bd9Sstevel@tonic-gate /* determine if setvec is a previous state */ 757*7c478bd9Sstevel@tonic-gate tmpset[0] = setcnt; 758*7c478bd9Sstevel@tonic-gate j = 1; 759*7c478bd9Sstevel@tonic-gate for (i = f->accept; i >= 0; i--) 760*7c478bd9Sstevel@tonic-gate if (setvec[i]) { 761*7c478bd9Sstevel@tonic-gate tmpset[j++] = i; 762*7c478bd9Sstevel@tonic-gate } 763*7c478bd9Sstevel@tonic-gate /* tmpset == previous state? */ 764*7c478bd9Sstevel@tonic-gate for (i=1; i<= f->curstat; i++) { 765*7c478bd9Sstevel@tonic-gate p = f->posns[i]; 766*7c478bd9Sstevel@tonic-gate if ((k = tmpset[0]) != p[0]) 767*7c478bd9Sstevel@tonic-gate goto different; 768*7c478bd9Sstevel@tonic-gate for (j = 1; j <= k; j++) 769*7c478bd9Sstevel@tonic-gate if (tmpset[j] != p[j]) 770*7c478bd9Sstevel@tonic-gate goto different; 771*7c478bd9Sstevel@tonic-gate /* setvec is state i */ 772*7c478bd9Sstevel@tonic-gate f->gototab[s][c] = i; 773*7c478bd9Sstevel@tonic-gate return i; 774*7c478bd9Sstevel@tonic-gate different:; 775*7c478bd9Sstevel@tonic-gate } 776*7c478bd9Sstevel@tonic-gate 777*7c478bd9Sstevel@tonic-gate /* add tmpset to current set of states */ 778*7c478bd9Sstevel@tonic-gate if (f->curstat >= NSTATES-1) { 779*7c478bd9Sstevel@tonic-gate f->curstat = 2; 780*7c478bd9Sstevel@tonic-gate f->reset = 1; 781*7c478bd9Sstevel@tonic-gate for (i=2; i<NSTATES; i++) 782*7c478bd9Sstevel@tonic-gate xfree(f->posns[i]); 783*7c478bd9Sstevel@tonic-gate } 784*7c478bd9Sstevel@tonic-gate else 785*7c478bd9Sstevel@tonic-gate ++(f->curstat); 786*7c478bd9Sstevel@tonic-gate for (i=0; i<NCHARS; i++) 787*7c478bd9Sstevel@tonic-gate f->gototab[f->curstat][i] = 0; 788*7c478bd9Sstevel@tonic-gate xfree(f->posns[f->curstat]); 789*7c478bd9Sstevel@tonic-gate if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL) 790*7c478bd9Sstevel@tonic-gate overflo("out of space in cgoto"); 791*7c478bd9Sstevel@tonic-gate 792*7c478bd9Sstevel@tonic-gate f->posns[f->curstat] = p; 793*7c478bd9Sstevel@tonic-gate f->gototab[s][c] = f->curstat; 794*7c478bd9Sstevel@tonic-gate for (i = 0; i <= setcnt; i++) 795*7c478bd9Sstevel@tonic-gate p[i] = tmpset[i]; 796*7c478bd9Sstevel@tonic-gate if (setvec[f->accept]) 797*7c478bd9Sstevel@tonic-gate f->out[f->curstat] = 1; 798*7c478bd9Sstevel@tonic-gate else 799*7c478bd9Sstevel@tonic-gate f->out[f->curstat] = 0; 800*7c478bd9Sstevel@tonic-gate return f->curstat; 801*7c478bd9Sstevel@tonic-gate } 802*7c478bd9Sstevel@tonic-gate 803*7c478bd9Sstevel@tonic-gate 804*7c478bd9Sstevel@tonic-gate freefa(f) 805*7c478bd9Sstevel@tonic-gate struct fa *f; 806*7c478bd9Sstevel@tonic-gate { 807*7c478bd9Sstevel@tonic-gate 808*7c478bd9Sstevel@tonic-gate register int i; 809*7c478bd9Sstevel@tonic-gate 810*7c478bd9Sstevel@tonic-gate if (f == NULL) 811*7c478bd9Sstevel@tonic-gate return; 812*7c478bd9Sstevel@tonic-gate for (i=0; i<=f->curstat; i++) 813*7c478bd9Sstevel@tonic-gate xfree(f->posns[i]); 814*7c478bd9Sstevel@tonic-gate for (i=0; i<=f->accept; i++) 815*7c478bd9Sstevel@tonic-gate xfree(f->re[i].lfollow); 816*7c478bd9Sstevel@tonic-gate xfree(f->restr); 817*7c478bd9Sstevel@tonic-gate xfree(f); 818*7c478bd9Sstevel@tonic-gate } 819