17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 57c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 67c478bd9Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 77c478bd9Sstevel@tonic-gate * with the License. 87c478bd9Sstevel@tonic-gate * 97c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 107c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 117c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 127c478bd9Sstevel@tonic-gate * and limitations under the License. 137c478bd9Sstevel@tonic-gate * 147c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 157c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 167c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 177c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 187c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 197c478bd9Sstevel@tonic-gate * 207c478bd9Sstevel@tonic-gate * CDDL HEADER END 217c478bd9Sstevel@tonic-gate */ 227c478bd9Sstevel@tonic-gate 237c478bd9Sstevel@tonic-gate /* 24*1ee2e5faSnakanon * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 257c478bd9Sstevel@tonic-gate * Use is subject to license terms. 267c478bd9Sstevel@tonic-gate */ 277c478bd9Sstevel@tonic-gate 28*1ee2e5faSnakanon /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 29*1ee2e5faSnakanon /* All Rights Reserved */ 30*1ee2e5faSnakanon 31*1ee2e5faSnakanon #pragma ident "%Z%%M% %I% %E% SMI" 327c478bd9Sstevel@tonic-gate 337c478bd9Sstevel@tonic-gate #define DEBUG 347c478bd9Sstevel@tonic-gate 357c478bd9Sstevel@tonic-gate #include "awk.h" 367c478bd9Sstevel@tonic-gate #include "y.tab.h" 377c478bd9Sstevel@tonic-gate 387c478bd9Sstevel@tonic-gate #define HAT (NCHARS-1) /* matches ^ in regular expr */ 397c478bd9Sstevel@tonic-gate /* NCHARS is 2**n */ 40*1ee2e5faSnakanon #define MAXLIN (3 * LINE_MAX) 417c478bd9Sstevel@tonic-gate 427c478bd9Sstevel@tonic-gate #define type(v) (v)->nobj 437c478bd9Sstevel@tonic-gate #define left(v) (v)->narg[0] 447c478bd9Sstevel@tonic-gate #define right(v) (v)->narg[1] 457c478bd9Sstevel@tonic-gate #define parent(v) (v)->nnext 467c478bd9Sstevel@tonic-gate 477c478bd9Sstevel@tonic-gate #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL: 487c478bd9Sstevel@tonic-gate #define UNARY case STAR: case PLUS: case QUEST: 497c478bd9Sstevel@tonic-gate 50*1ee2e5faSnakanon /* 51*1ee2e5faSnakanon * encoding in tree Nodes: 52*1ee2e5faSnakanon * leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL): 53*1ee2e5faSnakanon * left is index, right contains value or pointer to value 54*1ee2e5faSnakanon * unary (STAR, PLUS, QUEST): left is child, right is null 55*1ee2e5faSnakanon * binary (CAT, OR): left and right are children 56*1ee2e5faSnakanon * parent contains pointer to parent 577c478bd9Sstevel@tonic-gate */ 587c478bd9Sstevel@tonic-gate 59*1ee2e5faSnakanon int setvec[MAXLIN]; 60*1ee2e5faSnakanon int tmpset[MAXLIN]; 61*1ee2e5faSnakanon Node *point[MAXLIN]; 627c478bd9Sstevel@tonic-gate 637c478bd9Sstevel@tonic-gate int rtok; /* next token in current re */ 647c478bd9Sstevel@tonic-gate int rlxval; 657c478bd9Sstevel@tonic-gate uchar *rlxstr; 667c478bd9Sstevel@tonic-gate uchar *prestr; /* current position in current re */ 677c478bd9Sstevel@tonic-gate uchar *lastre; /* origin of last re */ 687c478bd9Sstevel@tonic-gate 697c478bd9Sstevel@tonic-gate static int setcnt; 707c478bd9Sstevel@tonic-gate static int poscnt; 717c478bd9Sstevel@tonic-gate 727c478bd9Sstevel@tonic-gate uchar *patbeg; 737c478bd9Sstevel@tonic-gate int patlen; 747c478bd9Sstevel@tonic-gate 757c478bd9Sstevel@tonic-gate #define NFA 20 /* cache this many dynamic fa's */ 767c478bd9Sstevel@tonic-gate fa *fatab[NFA]; 777c478bd9Sstevel@tonic-gate int nfatab = 0; /* entries in fatab */ 787c478bd9Sstevel@tonic-gate 79*1ee2e5faSnakanon static fa *mkdfa(uchar *, int); 80*1ee2e5faSnakanon static int makeinit(fa *, int); 81*1ee2e5faSnakanon static void penter(Node *); 82*1ee2e5faSnakanon static void freetr(Node *); 83*1ee2e5faSnakanon static void overflo(char *); 84*1ee2e5faSnakanon static void cfoll(fa *, Node *); 85*1ee2e5faSnakanon static void follow(Node *); 86*1ee2e5faSnakanon static Node *reparse(uchar *); 87*1ee2e5faSnakanon static int relex(void); 88*1ee2e5faSnakanon static void freefa(fa *); 89*1ee2e5faSnakanon static int cgoto(fa *, int, int); 90*1ee2e5faSnakanon 91*1ee2e5faSnakanon fa * 92*1ee2e5faSnakanon makedfa(uchar *s, int anchor) /* returns dfa for reg expr s */ 937c478bd9Sstevel@tonic-gate { 947c478bd9Sstevel@tonic-gate int i, use, nuse; 957c478bd9Sstevel@tonic-gate fa *pfa; 967c478bd9Sstevel@tonic-gate 977c478bd9Sstevel@tonic-gate if (compile_time) /* a constant for sure */ 98*1ee2e5faSnakanon return (mkdfa(s, anchor)); 99*1ee2e5faSnakanon for (i = 0; i < nfatab; i++) { /* is it there already? */ 100*1ee2e5faSnakanon if (fatab[i]->anchor == anchor && 101*1ee2e5faSnakanon strcmp((char *)fatab[i]->restr, (char *)s) == 0) { 1027c478bd9Sstevel@tonic-gate fatab[i]->use++; 103*1ee2e5faSnakanon return (fatab[i]); 104*1ee2e5faSnakanon } 1057c478bd9Sstevel@tonic-gate } 1067c478bd9Sstevel@tonic-gate pfa = mkdfa(s, anchor); 1077c478bd9Sstevel@tonic-gate if (nfatab < NFA) { /* room for another */ 1087c478bd9Sstevel@tonic-gate fatab[nfatab] = pfa; 1097c478bd9Sstevel@tonic-gate fatab[nfatab]->use = 1; 1107c478bd9Sstevel@tonic-gate nfatab++; 111*1ee2e5faSnakanon return (pfa); 1127c478bd9Sstevel@tonic-gate } 1137c478bd9Sstevel@tonic-gate use = fatab[0]->use; /* replace least-recently used */ 1147c478bd9Sstevel@tonic-gate nuse = 0; 1157c478bd9Sstevel@tonic-gate for (i = 1; i < nfatab; i++) 1167c478bd9Sstevel@tonic-gate if (fatab[i]->use < use) { 1177c478bd9Sstevel@tonic-gate use = fatab[i]->use; 1187c478bd9Sstevel@tonic-gate nuse = i; 1197c478bd9Sstevel@tonic-gate } 1207c478bd9Sstevel@tonic-gate freefa(fatab[nuse]); 1217c478bd9Sstevel@tonic-gate fatab[nuse] = pfa; 1227c478bd9Sstevel@tonic-gate pfa->use = 1; 123*1ee2e5faSnakanon return (pfa); 1247c478bd9Sstevel@tonic-gate } 1257c478bd9Sstevel@tonic-gate 126*1ee2e5faSnakanon fa * 127*1ee2e5faSnakanon mkdfa(uchar *s, int anchor) /* does the real work of making a dfa */ 128*1ee2e5faSnakanon /* anchor = 1 for anchored matches, else 0 */ 1297c478bd9Sstevel@tonic-gate { 130*1ee2e5faSnakanon Node *p, *p1; 1317c478bd9Sstevel@tonic-gate fa *f; 1327c478bd9Sstevel@tonic-gate 1337c478bd9Sstevel@tonic-gate p = reparse(s); 1347c478bd9Sstevel@tonic-gate p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p); 1357c478bd9Sstevel@tonic-gate /* put ALL STAR in front of reg. exp. */ 1367c478bd9Sstevel@tonic-gate p1 = op2(CAT, p1, op2(FINAL, NIL, NIL)); 1377c478bd9Sstevel@tonic-gate /* put FINAL after reg. exp. */ 1387c478bd9Sstevel@tonic-gate 1397c478bd9Sstevel@tonic-gate poscnt = 0; 1407c478bd9Sstevel@tonic-gate penter(p1); /* enter parent pointers and leaf indices */ 1417c478bd9Sstevel@tonic-gate if ((f = (fa *)calloc(1, sizeof (fa) + poscnt * sizeof (rrow))) == NULL) 1427c478bd9Sstevel@tonic-gate overflo("no room for fa"); 143*1ee2e5faSnakanon /* penter has computed number of positions in re */ 144*1ee2e5faSnakanon f->accept = poscnt-1; 1457c478bd9Sstevel@tonic-gate cfoll(f, p1); /* set up follow sets */ 1467c478bd9Sstevel@tonic-gate freetr(p1); 147*1ee2e5faSnakanon if ((f->posns[0] = 148*1ee2e5faSnakanon (int *)calloc(1, *(f->re[0].lfollow) * sizeof (int))) == NULL) { 1497c478bd9Sstevel@tonic-gate overflo("out of space in makedfa"); 150*1ee2e5faSnakanon } 1517c478bd9Sstevel@tonic-gate if ((f->posns[1] = (int *)calloc(1, sizeof (int))) == NULL) 1527c478bd9Sstevel@tonic-gate overflo("out of space in makedfa"); 1537c478bd9Sstevel@tonic-gate *f->posns[1] = 0; 1547c478bd9Sstevel@tonic-gate f->initstat = makeinit(f, anchor); 1557c478bd9Sstevel@tonic-gate f->anchor = anchor; 1567c478bd9Sstevel@tonic-gate f->restr = tostring(s); 157*1ee2e5faSnakanon return (f); 1587c478bd9Sstevel@tonic-gate } 1597c478bd9Sstevel@tonic-gate 160*1ee2e5faSnakanon static int 161*1ee2e5faSnakanon makeinit(fa *f, int anchor) 1627c478bd9Sstevel@tonic-gate { 1637c478bd9Sstevel@tonic-gate register int i, k; 1647c478bd9Sstevel@tonic-gate 1657c478bd9Sstevel@tonic-gate f->curstat = 2; 1667c478bd9Sstevel@tonic-gate f->out[2] = 0; 1677c478bd9Sstevel@tonic-gate f->reset = 0; 1687c478bd9Sstevel@tonic-gate k = *(f->re[0].lfollow); 1697c478bd9Sstevel@tonic-gate xfree(f->posns[2]); 1707c478bd9Sstevel@tonic-gate if ((f->posns[2] = (int *)calloc(1, (k+1) * sizeof (int))) == NULL) 1717c478bd9Sstevel@tonic-gate overflo("out of space in makeinit"); 1727c478bd9Sstevel@tonic-gate for (i = 0; i <= k; i++) { 1737c478bd9Sstevel@tonic-gate (f->posns[2])[i] = (f->re[0].lfollow)[i]; 1747c478bd9Sstevel@tonic-gate } 1757c478bd9Sstevel@tonic-gate if ((f->posns[2])[1] == f->accept) 1767c478bd9Sstevel@tonic-gate f->out[2] = 1; 1777c478bd9Sstevel@tonic-gate for (i = 0; i < NCHARS; i++) 1787c478bd9Sstevel@tonic-gate f->gototab[2][i] = 0; 1797c478bd9Sstevel@tonic-gate f->curstat = cgoto(f, 2, HAT); 1807c478bd9Sstevel@tonic-gate if (anchor) { 1817c478bd9Sstevel@tonic-gate *f->posns[2] = k-1; /* leave out position 0 */ 1827c478bd9Sstevel@tonic-gate for (i = 0; i < k; i++) { 1837c478bd9Sstevel@tonic-gate (f->posns[0])[i] = (f->posns[2])[i]; 1847c478bd9Sstevel@tonic-gate } 1857c478bd9Sstevel@tonic-gate 1867c478bd9Sstevel@tonic-gate f->out[0] = f->out[2]; 1877c478bd9Sstevel@tonic-gate if (f->curstat != 2) 1887c478bd9Sstevel@tonic-gate --(*f->posns[f->curstat]); 1897c478bd9Sstevel@tonic-gate } 190*1ee2e5faSnakanon return (f->curstat); 1917c478bd9Sstevel@tonic-gate } 1927c478bd9Sstevel@tonic-gate 193*1ee2e5faSnakanon void 194*1ee2e5faSnakanon penter(Node *p) /* set up parent pointers and leaf indices */ 1957c478bd9Sstevel@tonic-gate { 1967c478bd9Sstevel@tonic-gate switch (type(p)) { 1977c478bd9Sstevel@tonic-gate LEAF 1987c478bd9Sstevel@tonic-gate left(p) = (Node *) poscnt; 1997c478bd9Sstevel@tonic-gate point[poscnt++] = p; 2007c478bd9Sstevel@tonic-gate break; 2017c478bd9Sstevel@tonic-gate UNARY 2027c478bd9Sstevel@tonic-gate penter(left(p)); 2037c478bd9Sstevel@tonic-gate parent(left(p)) = p; 2047c478bd9Sstevel@tonic-gate break; 2057c478bd9Sstevel@tonic-gate case CAT: 2067c478bd9Sstevel@tonic-gate case OR: 2077c478bd9Sstevel@tonic-gate penter(left(p)); 2087c478bd9Sstevel@tonic-gate penter(right(p)); 2097c478bd9Sstevel@tonic-gate parent(left(p)) = p; 2107c478bd9Sstevel@tonic-gate parent(right(p)) = p; 2117c478bd9Sstevel@tonic-gate break; 2127c478bd9Sstevel@tonic-gate default: 2137c478bd9Sstevel@tonic-gate ERROR "unknown type %d in penter", type(p) FATAL; 2147c478bd9Sstevel@tonic-gate break; 2157c478bd9Sstevel@tonic-gate } 2167c478bd9Sstevel@tonic-gate } 2177c478bd9Sstevel@tonic-gate 218*1ee2e5faSnakanon static void 219*1ee2e5faSnakanon freetr(Node *p) /* free parse tree */ 2207c478bd9Sstevel@tonic-gate { 2217c478bd9Sstevel@tonic-gate switch (type(p)) { 2227c478bd9Sstevel@tonic-gate LEAF 2237c478bd9Sstevel@tonic-gate xfree(p); 2247c478bd9Sstevel@tonic-gate break; 2257c478bd9Sstevel@tonic-gate UNARY 2267c478bd9Sstevel@tonic-gate freetr(left(p)); 2277c478bd9Sstevel@tonic-gate xfree(p); 2287c478bd9Sstevel@tonic-gate break; 2297c478bd9Sstevel@tonic-gate case CAT: 2307c478bd9Sstevel@tonic-gate case OR: 2317c478bd9Sstevel@tonic-gate freetr(left(p)); 2327c478bd9Sstevel@tonic-gate freetr(right(p)); 2337c478bd9Sstevel@tonic-gate xfree(p); 2347c478bd9Sstevel@tonic-gate break; 2357c478bd9Sstevel@tonic-gate default: 2367c478bd9Sstevel@tonic-gate ERROR "unknown type %d in freetr", type(p) FATAL; 2377c478bd9Sstevel@tonic-gate break; 2387c478bd9Sstevel@tonic-gate } 2397c478bd9Sstevel@tonic-gate } 2407c478bd9Sstevel@tonic-gate 241*1ee2e5faSnakanon uchar * 242*1ee2e5faSnakanon cclenter(uchar *p) 2437c478bd9Sstevel@tonic-gate { 2447c478bd9Sstevel@tonic-gate register int i, c; 245*1ee2e5faSnakanon uchar *op, *chars, *ret; 246*1ee2e5faSnakanon size_t bsize; 2477c478bd9Sstevel@tonic-gate 248*1ee2e5faSnakanon init_buf(&chars, &bsize, LINE_INCR); 2497c478bd9Sstevel@tonic-gate op = p; 2507c478bd9Sstevel@tonic-gate i = 0; 2517c478bd9Sstevel@tonic-gate while ((c = *p++) != 0) { 2527c478bd9Sstevel@tonic-gate if (c == '\\') { 2537c478bd9Sstevel@tonic-gate if ((c = *p++) == 't') 2547c478bd9Sstevel@tonic-gate c = '\t'; 2557c478bd9Sstevel@tonic-gate else if (c == 'n') 2567c478bd9Sstevel@tonic-gate c = '\n'; 2577c478bd9Sstevel@tonic-gate else if (c == 'f') 2587c478bd9Sstevel@tonic-gate c = '\f'; 2597c478bd9Sstevel@tonic-gate else if (c == 'r') 2607c478bd9Sstevel@tonic-gate c = '\r'; 2617c478bd9Sstevel@tonic-gate else if (c == 'b') 2627c478bd9Sstevel@tonic-gate c = '\b'; 2637c478bd9Sstevel@tonic-gate else if (c == '\\') 2647c478bd9Sstevel@tonic-gate c = '\\'; 2657c478bd9Sstevel@tonic-gate else if (isdigit(c)) { 2667c478bd9Sstevel@tonic-gate int n = c - '0'; 2677c478bd9Sstevel@tonic-gate if (isdigit(*p)) { 2687c478bd9Sstevel@tonic-gate n = 8 * n + *p++ - '0'; 2697c478bd9Sstevel@tonic-gate if (isdigit(*p)) 2707c478bd9Sstevel@tonic-gate n = 8 * n + *p++ - '0'; 2717c478bd9Sstevel@tonic-gate } 2727c478bd9Sstevel@tonic-gate c = n; 2737c478bd9Sstevel@tonic-gate } /* else */ 2747c478bd9Sstevel@tonic-gate /* c = c; */ 2757c478bd9Sstevel@tonic-gate } else if (c == '-' && i > 0 && chars[i-1] != 0) { 2767c478bd9Sstevel@tonic-gate if (*p != 0) { 2777c478bd9Sstevel@tonic-gate c = chars[i-1]; 2787c478bd9Sstevel@tonic-gate while ((uchar)c < *p) { /* fails if *p is \\ */ 279*1ee2e5faSnakanon expand_buf(&chars, &bsize, i); 2807c478bd9Sstevel@tonic-gate chars[i++] = ++c; 2817c478bd9Sstevel@tonic-gate } 2827c478bd9Sstevel@tonic-gate p++; 2837c478bd9Sstevel@tonic-gate continue; 2847c478bd9Sstevel@tonic-gate } 2857c478bd9Sstevel@tonic-gate } 286*1ee2e5faSnakanon expand_buf(&chars, &bsize, i); 2877c478bd9Sstevel@tonic-gate chars[i++] = c; 2887c478bd9Sstevel@tonic-gate } 2897c478bd9Sstevel@tonic-gate chars[i++] = '\0'; 2907c478bd9Sstevel@tonic-gate dprintf(("cclenter: in = |%s|, out = |%s|\n", op, chars)); 2917c478bd9Sstevel@tonic-gate xfree(op); 292*1ee2e5faSnakanon ret = tostring(chars); 293*1ee2e5faSnakanon free(chars); 294*1ee2e5faSnakanon return (ret); 2957c478bd9Sstevel@tonic-gate } 2967c478bd9Sstevel@tonic-gate 297*1ee2e5faSnakanon static void 298*1ee2e5faSnakanon overflo(char *s) 2997c478bd9Sstevel@tonic-gate { 3007c478bd9Sstevel@tonic-gate ERROR "regular expression too big: %s", gettext((char *)s) FATAL; 3017c478bd9Sstevel@tonic-gate } 3027c478bd9Sstevel@tonic-gate 303*1ee2e5faSnakanon /* enter follow set of each leaf of vertex v into lfollow[leaf] */ 304*1ee2e5faSnakanon static void 305*1ee2e5faSnakanon cfoll(fa *f, Node *v) 3067c478bd9Sstevel@tonic-gate { 3077c478bd9Sstevel@tonic-gate register int i; 3087c478bd9Sstevel@tonic-gate register int *p; 3097c478bd9Sstevel@tonic-gate 3107c478bd9Sstevel@tonic-gate switch (type(v)) { 3117c478bd9Sstevel@tonic-gate LEAF 3127c478bd9Sstevel@tonic-gate f->re[(int)left(v)].ltype = type(v); 3137c478bd9Sstevel@tonic-gate f->re[(int)left(v)].lval = (int)right(v); 3147c478bd9Sstevel@tonic-gate for (i = 0; i <= f->accept; i++) 3157c478bd9Sstevel@tonic-gate setvec[i] = 0; 3167c478bd9Sstevel@tonic-gate setcnt = 0; 3177c478bd9Sstevel@tonic-gate follow(v); /* computes setvec and setcnt */ 3187c478bd9Sstevel@tonic-gate if ((p = (int *)calloc(1, (setcnt+1) * sizeof (int))) == NULL) 3197c478bd9Sstevel@tonic-gate overflo("follow set overflow"); 3207c478bd9Sstevel@tonic-gate f->re[(int)left(v)].lfollow = p; 3217c478bd9Sstevel@tonic-gate *p = setcnt; 322*1ee2e5faSnakanon for (i = f->accept; i >= 0; i--) { 323*1ee2e5faSnakanon if (setvec[i] == 1) 324*1ee2e5faSnakanon *++p = i; 325*1ee2e5faSnakanon } 3267c478bd9Sstevel@tonic-gate break; 3277c478bd9Sstevel@tonic-gate UNARY 3287c478bd9Sstevel@tonic-gate cfoll(f, left(v)); 3297c478bd9Sstevel@tonic-gate break; 3307c478bd9Sstevel@tonic-gate case CAT: 3317c478bd9Sstevel@tonic-gate case OR: 3327c478bd9Sstevel@tonic-gate cfoll(f, left(v)); 3337c478bd9Sstevel@tonic-gate cfoll(f, right(v)); 3347c478bd9Sstevel@tonic-gate break; 3357c478bd9Sstevel@tonic-gate default: 3367c478bd9Sstevel@tonic-gate ERROR "unknown type %d in cfoll", type(v) FATAL; 3377c478bd9Sstevel@tonic-gate } 3387c478bd9Sstevel@tonic-gate } 3397c478bd9Sstevel@tonic-gate 340*1ee2e5faSnakanon /* 341*1ee2e5faSnakanon * collects initially active leaves of p into setvec 342*1ee2e5faSnakanon * returns 0 or 1 depending on whether p matches empty string 343*1ee2e5faSnakanon */ 344*1ee2e5faSnakanon static int 345*1ee2e5faSnakanon first(Node *p) 3467c478bd9Sstevel@tonic-gate { 3477c478bd9Sstevel@tonic-gate register int b; 3487c478bd9Sstevel@tonic-gate 3497c478bd9Sstevel@tonic-gate switch (type(p)) { 3507c478bd9Sstevel@tonic-gate LEAF 3517c478bd9Sstevel@tonic-gate if (setvec[(int)left(p)] != 1) { 3527c478bd9Sstevel@tonic-gate setvec[(int)left(p)] = 1; 3537c478bd9Sstevel@tonic-gate setcnt++; 3547c478bd9Sstevel@tonic-gate } 3557c478bd9Sstevel@tonic-gate if (type(p) == CCL && (*(uchar *)right(p)) == '\0') 3567c478bd9Sstevel@tonic-gate return (0); /* empty CCL */ 357*1ee2e5faSnakanon else 358*1ee2e5faSnakanon return (1); 3597c478bd9Sstevel@tonic-gate case PLUS: 360*1ee2e5faSnakanon if (first(left(p)) == 0) 361*1ee2e5faSnakanon return (0); 3627c478bd9Sstevel@tonic-gate return (1); 3637c478bd9Sstevel@tonic-gate case STAR: 3647c478bd9Sstevel@tonic-gate case QUEST: 365*1ee2e5faSnakanon (void) first(left(p)); 3667c478bd9Sstevel@tonic-gate return (0); 3677c478bd9Sstevel@tonic-gate case CAT: 368*1ee2e5faSnakanon if (first(left(p)) == 0 && first(right(p)) == 0) 369*1ee2e5faSnakanon return (0); 3707c478bd9Sstevel@tonic-gate return (1); 3717c478bd9Sstevel@tonic-gate case OR: 3727c478bd9Sstevel@tonic-gate b = first(right(p)); 373*1ee2e5faSnakanon if (first(left(p)) == 0 || b == 0) 374*1ee2e5faSnakanon return (0); 3757c478bd9Sstevel@tonic-gate return (1); 3767c478bd9Sstevel@tonic-gate } 3777c478bd9Sstevel@tonic-gate ERROR "unknown type %d in first", type(p) FATAL; 3787c478bd9Sstevel@tonic-gate return (-1); 3797c478bd9Sstevel@tonic-gate } 3807c478bd9Sstevel@tonic-gate 381*1ee2e5faSnakanon /* collects leaves that can follow v into setvec */ 382*1ee2e5faSnakanon static void 383*1ee2e5faSnakanon follow(Node *v) 3847c478bd9Sstevel@tonic-gate { 3857c478bd9Sstevel@tonic-gate Node *p; 3867c478bd9Sstevel@tonic-gate 3877c478bd9Sstevel@tonic-gate if (type(v) == FINAL) 3887c478bd9Sstevel@tonic-gate return; 3897c478bd9Sstevel@tonic-gate p = parent(v); 3907c478bd9Sstevel@tonic-gate switch (type(p)) { 3917c478bd9Sstevel@tonic-gate case STAR: 3927c478bd9Sstevel@tonic-gate case PLUS: 393*1ee2e5faSnakanon (void) first(v); 3947c478bd9Sstevel@tonic-gate follow(p); 3957c478bd9Sstevel@tonic-gate return; 3967c478bd9Sstevel@tonic-gate 3977c478bd9Sstevel@tonic-gate case OR: 3987c478bd9Sstevel@tonic-gate case QUEST: 3997c478bd9Sstevel@tonic-gate follow(p); 4007c478bd9Sstevel@tonic-gate return; 4017c478bd9Sstevel@tonic-gate 4027c478bd9Sstevel@tonic-gate case CAT: 4037c478bd9Sstevel@tonic-gate if (v == left(p)) { /* v is left child of p */ 4047c478bd9Sstevel@tonic-gate if (first(right(p)) == 0) { 4057c478bd9Sstevel@tonic-gate follow(p); 4067c478bd9Sstevel@tonic-gate return; 4077c478bd9Sstevel@tonic-gate } 408*1ee2e5faSnakanon } else /* v is right child */ 4097c478bd9Sstevel@tonic-gate follow(p); 4107c478bd9Sstevel@tonic-gate return; 4117c478bd9Sstevel@tonic-gate default: 4127c478bd9Sstevel@tonic-gate ERROR "unknown type %d in follow", type(p) FATAL; 4137c478bd9Sstevel@tonic-gate break; 4147c478bd9Sstevel@tonic-gate } 4157c478bd9Sstevel@tonic-gate } 4167c478bd9Sstevel@tonic-gate 417*1ee2e5faSnakanon static int 418*1ee2e5faSnakanon member(uchar c, uchar *s) /* is c in s? */ 4197c478bd9Sstevel@tonic-gate { 4207c478bd9Sstevel@tonic-gate while (*s) 4217c478bd9Sstevel@tonic-gate if (c == *s++) 4227c478bd9Sstevel@tonic-gate return (1); 4237c478bd9Sstevel@tonic-gate return (0); 4247c478bd9Sstevel@tonic-gate } 4257c478bd9Sstevel@tonic-gate 4267c478bd9Sstevel@tonic-gate 427*1ee2e5faSnakanon int 428*1ee2e5faSnakanon match(fa *f, uchar *p) 4297c478bd9Sstevel@tonic-gate { 4307c478bd9Sstevel@tonic-gate register int s, ns; 4317c478bd9Sstevel@tonic-gate 4327c478bd9Sstevel@tonic-gate s = f->reset ? makeinit(f, 0) : f->initstat; 4337c478bd9Sstevel@tonic-gate if (f->out[s]) 4347c478bd9Sstevel@tonic-gate return (1); 4357c478bd9Sstevel@tonic-gate do { 436*1ee2e5faSnakanon if ((ns = f->gototab[s][*p]) != 0) 4377c478bd9Sstevel@tonic-gate s = ns; 4387c478bd9Sstevel@tonic-gate else 4397c478bd9Sstevel@tonic-gate s = cgoto(f, s, *p); 4407c478bd9Sstevel@tonic-gate if (f->out[s]) 4417c478bd9Sstevel@tonic-gate return (1); 4427c478bd9Sstevel@tonic-gate } while (*p++ != 0); 4437c478bd9Sstevel@tonic-gate return (0); 4447c478bd9Sstevel@tonic-gate } 4457c478bd9Sstevel@tonic-gate 446*1ee2e5faSnakanon int 447*1ee2e5faSnakanon pmatch(fa *f, uchar *p) 4487c478bd9Sstevel@tonic-gate { 4497c478bd9Sstevel@tonic-gate register int s, ns; 4507c478bd9Sstevel@tonic-gate register uchar *q; 4517c478bd9Sstevel@tonic-gate int i, k; 4527c478bd9Sstevel@tonic-gate 4537c478bd9Sstevel@tonic-gate if (f->reset) { 4547c478bd9Sstevel@tonic-gate f->initstat = s = makeinit(f, 1); 4557c478bd9Sstevel@tonic-gate } else { 4567c478bd9Sstevel@tonic-gate s = f->initstat; 4577c478bd9Sstevel@tonic-gate } 4587c478bd9Sstevel@tonic-gate patbeg = p; 4597c478bd9Sstevel@tonic-gate patlen = -1; 4607c478bd9Sstevel@tonic-gate do { 4617c478bd9Sstevel@tonic-gate q = p; 4627c478bd9Sstevel@tonic-gate do { 4637c478bd9Sstevel@tonic-gate if (f->out[s]) /* final state */ 4647c478bd9Sstevel@tonic-gate patlen = q-p; 465*1ee2e5faSnakanon if ((ns = f->gototab[s][*q]) != 0) 4667c478bd9Sstevel@tonic-gate s = ns; 4677c478bd9Sstevel@tonic-gate else 4687c478bd9Sstevel@tonic-gate s = cgoto(f, s, *q); 469*1ee2e5faSnakanon if (s == 1) { /* no transition */ 4707c478bd9Sstevel@tonic-gate if (patlen >= 0) { 4717c478bd9Sstevel@tonic-gate patbeg = p; 4727c478bd9Sstevel@tonic-gate return (1); 473*1ee2e5faSnakanon } else 4747c478bd9Sstevel@tonic-gate goto nextin; /* no match */ 475*1ee2e5faSnakanon } 4767c478bd9Sstevel@tonic-gate } while (*q++ != 0); 4777c478bd9Sstevel@tonic-gate if (f->out[s]) 4787c478bd9Sstevel@tonic-gate patlen = q - p - 1; /* don't count $ */ 4797c478bd9Sstevel@tonic-gate if (patlen >= 0) { 4807c478bd9Sstevel@tonic-gate patbeg = p; 4817c478bd9Sstevel@tonic-gate return (1); 4827c478bd9Sstevel@tonic-gate } 4837c478bd9Sstevel@tonic-gate nextin: 4847c478bd9Sstevel@tonic-gate s = 2; 4857c478bd9Sstevel@tonic-gate if (f->reset) { 4867c478bd9Sstevel@tonic-gate for (i = 2; i <= f->curstat; i++) 4877c478bd9Sstevel@tonic-gate xfree(f->posns[i]); 4887c478bd9Sstevel@tonic-gate k = *f->posns[0]; 489*1ee2e5faSnakanon if ((f->posns[2] = 490*1ee2e5faSnakanon (int *)calloc(1, (k + 1) * sizeof (int))) == NULL) { 4917c478bd9Sstevel@tonic-gate overflo("out of space in pmatch"); 492*1ee2e5faSnakanon } 4937c478bd9Sstevel@tonic-gate for (i = 0; i <= k; i++) 4947c478bd9Sstevel@tonic-gate (f->posns[2])[i] = (f->posns[0])[i]; 4957c478bd9Sstevel@tonic-gate f->initstat = f->curstat = 2; 4967c478bd9Sstevel@tonic-gate f->out[2] = f->out[0]; 4977c478bd9Sstevel@tonic-gate for (i = 0; i < NCHARS; i++) 4987c478bd9Sstevel@tonic-gate f->gototab[2][i] = 0; 4997c478bd9Sstevel@tonic-gate } 5007c478bd9Sstevel@tonic-gate } while (*p++ != 0); 5017c478bd9Sstevel@tonic-gate return (0); 5027c478bd9Sstevel@tonic-gate } 5037c478bd9Sstevel@tonic-gate 504*1ee2e5faSnakanon int 505*1ee2e5faSnakanon nematch(fa *f, uchar *p) 5067c478bd9Sstevel@tonic-gate { 5077c478bd9Sstevel@tonic-gate register int s, ns; 5087c478bd9Sstevel@tonic-gate register uchar *q; 5097c478bd9Sstevel@tonic-gate int i, k; 5107c478bd9Sstevel@tonic-gate 5117c478bd9Sstevel@tonic-gate if (f->reset) { 5127c478bd9Sstevel@tonic-gate f->initstat = s = makeinit(f, 1); 5137c478bd9Sstevel@tonic-gate } else { 5147c478bd9Sstevel@tonic-gate s = f->initstat; 5157c478bd9Sstevel@tonic-gate } 5167c478bd9Sstevel@tonic-gate patlen = -1; 5177c478bd9Sstevel@tonic-gate while (*p) { 5187c478bd9Sstevel@tonic-gate q = p; 5197c478bd9Sstevel@tonic-gate do { 5207c478bd9Sstevel@tonic-gate if (f->out[s]) /* final state */ 5217c478bd9Sstevel@tonic-gate patlen = q-p; 522*1ee2e5faSnakanon if ((ns = f->gototab[s][*q]) != 0) 5237c478bd9Sstevel@tonic-gate s = ns; 5247c478bd9Sstevel@tonic-gate else 5257c478bd9Sstevel@tonic-gate s = cgoto(f, s, *q); 526*1ee2e5faSnakanon if (s == 1) { /* no transition */ 5277c478bd9Sstevel@tonic-gate if (patlen > 0) { 5287c478bd9Sstevel@tonic-gate patbeg = p; 5297c478bd9Sstevel@tonic-gate return (1); 530*1ee2e5faSnakanon } else 5317c478bd9Sstevel@tonic-gate goto nnextin; /* no nonempty match */ 532*1ee2e5faSnakanon } 5337c478bd9Sstevel@tonic-gate } while (*q++ != 0); 5347c478bd9Sstevel@tonic-gate if (f->out[s]) 5357c478bd9Sstevel@tonic-gate patlen = q - p - 1; /* don't count $ */ 5367c478bd9Sstevel@tonic-gate if (patlen > 0) { 5377c478bd9Sstevel@tonic-gate patbeg = p; 5387c478bd9Sstevel@tonic-gate return (1); 5397c478bd9Sstevel@tonic-gate } 5407c478bd9Sstevel@tonic-gate nnextin: 5417c478bd9Sstevel@tonic-gate s = 2; 5427c478bd9Sstevel@tonic-gate if (f->reset) { 5437c478bd9Sstevel@tonic-gate for (i = 2; i <= f->curstat; i++) 5447c478bd9Sstevel@tonic-gate xfree(f->posns[i]); 5457c478bd9Sstevel@tonic-gate k = *f->posns[0]; 546*1ee2e5faSnakanon if ((f->posns[2] = 547*1ee2e5faSnakanon (int *)calloc(1, (k + 1) * sizeof (int))) == NULL) { 5487c478bd9Sstevel@tonic-gate overflo("out of state space"); 549*1ee2e5faSnakanon } 5507c478bd9Sstevel@tonic-gate for (i = 0; i <= k; i++) 5517c478bd9Sstevel@tonic-gate (f->posns[2])[i] = (f->posns[0])[i]; 5527c478bd9Sstevel@tonic-gate f->initstat = f->curstat = 2; 5537c478bd9Sstevel@tonic-gate f->out[2] = f->out[0]; 5547c478bd9Sstevel@tonic-gate for (i = 0; i < NCHARS; i++) 5557c478bd9Sstevel@tonic-gate f->gototab[2][i] = 0; 5567c478bd9Sstevel@tonic-gate } 5577c478bd9Sstevel@tonic-gate p++; 5587c478bd9Sstevel@tonic-gate } 5597c478bd9Sstevel@tonic-gate return (0); 5607c478bd9Sstevel@tonic-gate } 5617c478bd9Sstevel@tonic-gate 562*1ee2e5faSnakanon static Node *regexp(void), *primary(void), *concat(Node *); 563*1ee2e5faSnakanon static Node *alt(Node *), *unary(Node *); 5647c478bd9Sstevel@tonic-gate 565*1ee2e5faSnakanon static Node * 566*1ee2e5faSnakanon reparse(uchar *p) 5677c478bd9Sstevel@tonic-gate { 5687c478bd9Sstevel@tonic-gate /* parses regular expression pointed to by p */ 5697c478bd9Sstevel@tonic-gate /* uses relex() to scan regular expression */ 5707c478bd9Sstevel@tonic-gate Node *np; 5717c478bd9Sstevel@tonic-gate 5727c478bd9Sstevel@tonic-gate dprintf(("reparse <%s>\n", p)); 5737c478bd9Sstevel@tonic-gate lastre = prestr = p; /* prestr points to string to be parsed */ 5747c478bd9Sstevel@tonic-gate rtok = relex(); 5757c478bd9Sstevel@tonic-gate if (rtok == '\0') 5767c478bd9Sstevel@tonic-gate ERROR "empty regular expression" FATAL; 5777c478bd9Sstevel@tonic-gate np = regexp(); 578*1ee2e5faSnakanon if (rtok == '\0') { 5797c478bd9Sstevel@tonic-gate return (np); 580*1ee2e5faSnakanon } else { 581*1ee2e5faSnakanon ERROR "syntax error in regular expression %s at %s", 582*1ee2e5faSnakanon lastre, prestr FATAL; 583*1ee2e5faSnakanon } 5847c478bd9Sstevel@tonic-gate /*NOTREACHED*/ 585*1ee2e5faSnakanon return (NULL); 5867c478bd9Sstevel@tonic-gate } 5877c478bd9Sstevel@tonic-gate 588*1ee2e5faSnakanon static Node * 589*1ee2e5faSnakanon regexp(void) 5907c478bd9Sstevel@tonic-gate { 5917c478bd9Sstevel@tonic-gate return (alt(concat(primary()))); 5927c478bd9Sstevel@tonic-gate } 5937c478bd9Sstevel@tonic-gate 594*1ee2e5faSnakanon static Node * 595*1ee2e5faSnakanon primary(void) 5967c478bd9Sstevel@tonic-gate { 5977c478bd9Sstevel@tonic-gate Node *np; 5987c478bd9Sstevel@tonic-gate 5997c478bd9Sstevel@tonic-gate switch (rtok) { 6007c478bd9Sstevel@tonic-gate case CHAR: 6017c478bd9Sstevel@tonic-gate np = op2(CHAR, NIL, (Node *)rlxval); 6027c478bd9Sstevel@tonic-gate rtok = relex(); 6037c478bd9Sstevel@tonic-gate return (unary(np)); 6047c478bd9Sstevel@tonic-gate case ALL: 6057c478bd9Sstevel@tonic-gate rtok = relex(); 6067c478bd9Sstevel@tonic-gate return (unary(op2(ALL, NIL, NIL))); 6077c478bd9Sstevel@tonic-gate case DOT: 6087c478bd9Sstevel@tonic-gate rtok = relex(); 6097c478bd9Sstevel@tonic-gate return (unary(op2(DOT, NIL, NIL))); 6107c478bd9Sstevel@tonic-gate case CCL: 611*1ee2e5faSnakanon /*LINTED align*/ 6127c478bd9Sstevel@tonic-gate np = op2(CCL, NIL, (Node *)cclenter(rlxstr)); 6137c478bd9Sstevel@tonic-gate rtok = relex(); 6147c478bd9Sstevel@tonic-gate return (unary(np)); 6157c478bd9Sstevel@tonic-gate case NCCL: 616*1ee2e5faSnakanon /*LINTED align*/ 6177c478bd9Sstevel@tonic-gate np = op2(NCCL, NIL, (Node *)cclenter(rlxstr)); 6187c478bd9Sstevel@tonic-gate rtok = relex(); 6197c478bd9Sstevel@tonic-gate return (unary(np)); 6207c478bd9Sstevel@tonic-gate case '^': 6217c478bd9Sstevel@tonic-gate rtok = relex(); 6227c478bd9Sstevel@tonic-gate return (unary(op2(CHAR, NIL, (Node *)HAT))); 6237c478bd9Sstevel@tonic-gate case '$': 6247c478bd9Sstevel@tonic-gate rtok = relex(); 6257c478bd9Sstevel@tonic-gate return (unary(op2(CHAR, NIL, NIL))); 6267c478bd9Sstevel@tonic-gate case '(': 6277c478bd9Sstevel@tonic-gate rtok = relex(); 6287c478bd9Sstevel@tonic-gate if (rtok == ')') { /* special pleading for () */ 6297c478bd9Sstevel@tonic-gate rtok = relex(); 630*1ee2e5faSnakanon return (unary(op2(CCL, NIL, 631*1ee2e5faSnakanon /*LINTED align*/ 632*1ee2e5faSnakanon (Node *)tostring((uchar *)"")))); 6337c478bd9Sstevel@tonic-gate } 6347c478bd9Sstevel@tonic-gate np = regexp(); 6357c478bd9Sstevel@tonic-gate if (rtok == ')') { 6367c478bd9Sstevel@tonic-gate rtok = relex(); 6377c478bd9Sstevel@tonic-gate return (unary(np)); 638*1ee2e5faSnakanon } else { 639*1ee2e5faSnakanon ERROR "syntax error in regular expression %s at %s", 640*1ee2e5faSnakanon lastre, prestr FATAL; 6417c478bd9Sstevel@tonic-gate } 6427c478bd9Sstevel@tonic-gate default: 643*1ee2e5faSnakanon ERROR "illegal primary in regular expression %s at %s", 644*1ee2e5faSnakanon lastre, prestr FATAL; 6457c478bd9Sstevel@tonic-gate } 6467c478bd9Sstevel@tonic-gate /*NOTREACHED*/ 647*1ee2e5faSnakanon return (NULL); 6487c478bd9Sstevel@tonic-gate } 6497c478bd9Sstevel@tonic-gate 650*1ee2e5faSnakanon static Node * 651*1ee2e5faSnakanon concat(Node *np) 6527c478bd9Sstevel@tonic-gate { 6537c478bd9Sstevel@tonic-gate switch (rtok) { 6547c478bd9Sstevel@tonic-gate case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(': 6557c478bd9Sstevel@tonic-gate return (concat(op2(CAT, np, primary()))); 6567c478bd9Sstevel@tonic-gate default: 6577c478bd9Sstevel@tonic-gate return (np); 6587c478bd9Sstevel@tonic-gate } 6597c478bd9Sstevel@tonic-gate } 6607c478bd9Sstevel@tonic-gate 661*1ee2e5faSnakanon static Node * 662*1ee2e5faSnakanon alt(Node *np) 6637c478bd9Sstevel@tonic-gate { 6647c478bd9Sstevel@tonic-gate if (rtok == OR) { 6657c478bd9Sstevel@tonic-gate rtok = relex(); 6667c478bd9Sstevel@tonic-gate return (alt(op2(OR, np, concat(primary())))); 6677c478bd9Sstevel@tonic-gate } 6687c478bd9Sstevel@tonic-gate return (np); 6697c478bd9Sstevel@tonic-gate } 6707c478bd9Sstevel@tonic-gate 671*1ee2e5faSnakanon static Node * 672*1ee2e5faSnakanon unary(Node *np) 6737c478bd9Sstevel@tonic-gate { 6747c478bd9Sstevel@tonic-gate switch (rtok) { 6757c478bd9Sstevel@tonic-gate case STAR: 6767c478bd9Sstevel@tonic-gate rtok = relex(); 6777c478bd9Sstevel@tonic-gate return (unary(op2(STAR, np, NIL))); 6787c478bd9Sstevel@tonic-gate case PLUS: 6797c478bd9Sstevel@tonic-gate rtok = relex(); 6807c478bd9Sstevel@tonic-gate return (unary(op2(PLUS, np, NIL))); 6817c478bd9Sstevel@tonic-gate case QUEST: 6827c478bd9Sstevel@tonic-gate rtok = relex(); 6837c478bd9Sstevel@tonic-gate return (unary(op2(QUEST, np, NIL))); 6847c478bd9Sstevel@tonic-gate default: 6857c478bd9Sstevel@tonic-gate return (np); 6867c478bd9Sstevel@tonic-gate } 6877c478bd9Sstevel@tonic-gate } 6887c478bd9Sstevel@tonic-gate 689*1ee2e5faSnakanon static int 690*1ee2e5faSnakanon relex(void) /* lexical analyzer for reparse */ 6917c478bd9Sstevel@tonic-gate { 6927c478bd9Sstevel@tonic-gate register int c; 693*1ee2e5faSnakanon uchar *cbuf; 6947c478bd9Sstevel@tonic-gate int clen, cflag; 6957c478bd9Sstevel@tonic-gate 6967c478bd9Sstevel@tonic-gate switch (c = *prestr++) { 6977c478bd9Sstevel@tonic-gate case '|': return OR; 6987c478bd9Sstevel@tonic-gate case '*': return STAR; 6997c478bd9Sstevel@tonic-gate case '+': return PLUS; 7007c478bd9Sstevel@tonic-gate case '?': return QUEST; 7017c478bd9Sstevel@tonic-gate case '.': return DOT; 7027c478bd9Sstevel@tonic-gate case '\0': prestr--; return '\0'; 7037c478bd9Sstevel@tonic-gate case '^': 7047c478bd9Sstevel@tonic-gate case '$': 7057c478bd9Sstevel@tonic-gate case '(': 7067c478bd9Sstevel@tonic-gate case ')': 707*1ee2e5faSnakanon return (c); 7087c478bd9Sstevel@tonic-gate case '\\': 7097c478bd9Sstevel@tonic-gate if ((c = *prestr++) == 't') 7107c478bd9Sstevel@tonic-gate c = '\t'; 7117c478bd9Sstevel@tonic-gate else if (c == 'n') 7127c478bd9Sstevel@tonic-gate c = '\n'; 7137c478bd9Sstevel@tonic-gate else if (c == 'f') 7147c478bd9Sstevel@tonic-gate c = '\f'; 7157c478bd9Sstevel@tonic-gate else if (c == 'r') 7167c478bd9Sstevel@tonic-gate c = '\r'; 7177c478bd9Sstevel@tonic-gate else if (c == 'b') 7187c478bd9Sstevel@tonic-gate c = '\b'; 7197c478bd9Sstevel@tonic-gate else if (c == '\\') 7207c478bd9Sstevel@tonic-gate c = '\\'; 7217c478bd9Sstevel@tonic-gate else if (isdigit(c)) { 7227c478bd9Sstevel@tonic-gate int n = c - '0'; 7237c478bd9Sstevel@tonic-gate if (isdigit(*prestr)) { 7247c478bd9Sstevel@tonic-gate n = 8 * n + *prestr++ - '0'; 7257c478bd9Sstevel@tonic-gate if (isdigit(*prestr)) 7267c478bd9Sstevel@tonic-gate n = 8 * n + *prestr++ - '0'; 7277c478bd9Sstevel@tonic-gate } 7287c478bd9Sstevel@tonic-gate c = n; 7297c478bd9Sstevel@tonic-gate } /* else it's now in c */ 7307c478bd9Sstevel@tonic-gate rlxval = c; 731*1ee2e5faSnakanon return (CHAR); 7327c478bd9Sstevel@tonic-gate default: 7337c478bd9Sstevel@tonic-gate rlxval = c; 734*1ee2e5faSnakanon return (CHAR); 7357c478bd9Sstevel@tonic-gate case '[': 7367c478bd9Sstevel@tonic-gate clen = 0; 7377c478bd9Sstevel@tonic-gate if (*prestr == '^') { 7387c478bd9Sstevel@tonic-gate cflag = 1; 7397c478bd9Sstevel@tonic-gate prestr++; 740*1ee2e5faSnakanon } else 7417c478bd9Sstevel@tonic-gate cflag = 0; 742*1ee2e5faSnakanon init_buf(&cbuf, NULL, strlen((char *)prestr) * 2 + 1); 7437c478bd9Sstevel@tonic-gate for (;;) { 7447c478bd9Sstevel@tonic-gate if ((c = *prestr++) == '\\') { 7457c478bd9Sstevel@tonic-gate cbuf[clen++] = '\\'; 746*1ee2e5faSnakanon if ((c = *prestr++) == '\0') { 747*1ee2e5faSnakanon ERROR 748*1ee2e5faSnakanon "nonterminated character class %s", lastre FATAL; 749*1ee2e5faSnakanon } 7507c478bd9Sstevel@tonic-gate cbuf[clen++] = c; 7517c478bd9Sstevel@tonic-gate } else if (c == ']') { 7527c478bd9Sstevel@tonic-gate cbuf[clen] = 0; 7537c478bd9Sstevel@tonic-gate rlxstr = tostring(cbuf); 754*1ee2e5faSnakanon free(cbuf); 7557c478bd9Sstevel@tonic-gate if (cflag == 0) 756*1ee2e5faSnakanon return (CCL); 7577c478bd9Sstevel@tonic-gate else 758*1ee2e5faSnakanon return (NCCL); 7597c478bd9Sstevel@tonic-gate } else if (c == '\n') { 760*1ee2e5faSnakanon ERROR "newline in character class %s...", 761*1ee2e5faSnakanon lastre FATAL; 7627c478bd9Sstevel@tonic-gate } else if (c == '\0') { 763*1ee2e5faSnakanon ERROR "nonterminated character class %s", 764*1ee2e5faSnakanon lastre FATAL; 7657c478bd9Sstevel@tonic-gate } else 7667c478bd9Sstevel@tonic-gate cbuf[clen++] = c; 7677c478bd9Sstevel@tonic-gate } 768*1ee2e5faSnakanon /*NOTREACHED*/ 7697c478bd9Sstevel@tonic-gate } 770*1ee2e5faSnakanon return (0); 7717c478bd9Sstevel@tonic-gate } 7727c478bd9Sstevel@tonic-gate 773*1ee2e5faSnakanon static int 774*1ee2e5faSnakanon cgoto(fa *f, int s, int c) 7757c478bd9Sstevel@tonic-gate { 7767c478bd9Sstevel@tonic-gate register int i, j, k; 7777c478bd9Sstevel@tonic-gate register int *p, *q; 7787c478bd9Sstevel@tonic-gate 7797c478bd9Sstevel@tonic-gate for (i = 0; i <= f->accept; i++) 7807c478bd9Sstevel@tonic-gate setvec[i] = 0; 7817c478bd9Sstevel@tonic-gate setcnt = 0; 7827c478bd9Sstevel@tonic-gate /* compute positions of gototab[s,c] into setvec */ 7837c478bd9Sstevel@tonic-gate p = f->posns[s]; 7847c478bd9Sstevel@tonic-gate for (i = 1; i <= *p; i++) { 7857c478bd9Sstevel@tonic-gate if ((k = f->re[p[i]].ltype) != FINAL) { 786*1ee2e5faSnakanon if (k == CHAR && c == f->re[p[i]].lval || 787*1ee2e5faSnakanon k == DOT && c != 0 && c != HAT || 788*1ee2e5faSnakanon k == ALL && c != 0 || 789*1ee2e5faSnakanon k == CCL && 790*1ee2e5faSnakanon member(c, (uchar *)f->re[p[i]].lval) || 791*1ee2e5faSnakanon k == NCCL && 792*1ee2e5faSnakanon !member(c, (uchar *)f->re[p[i]].lval) && 793*1ee2e5faSnakanon c != 0 && c != HAT) { 7947c478bd9Sstevel@tonic-gate q = f->re[p[i]].lfollow; 7957c478bd9Sstevel@tonic-gate for (j = 1; j <= *q; j++) { 7967c478bd9Sstevel@tonic-gate if (setvec[q[j]] == 0) { 7977c478bd9Sstevel@tonic-gate setcnt++; 7987c478bd9Sstevel@tonic-gate setvec[q[j]] = 1; 7997c478bd9Sstevel@tonic-gate } 8007c478bd9Sstevel@tonic-gate } 8017c478bd9Sstevel@tonic-gate } 8027c478bd9Sstevel@tonic-gate } 8037c478bd9Sstevel@tonic-gate } 8047c478bd9Sstevel@tonic-gate /* determine if setvec is a previous state */ 8057c478bd9Sstevel@tonic-gate tmpset[0] = setcnt; 8067c478bd9Sstevel@tonic-gate j = 1; 8077c478bd9Sstevel@tonic-gate for (i = f->accept; i >= 0; i--) 8087c478bd9Sstevel@tonic-gate if (setvec[i]) { 8097c478bd9Sstevel@tonic-gate tmpset[j++] = i; 8107c478bd9Sstevel@tonic-gate } 8117c478bd9Sstevel@tonic-gate /* tmpset == previous state? */ 8127c478bd9Sstevel@tonic-gate for (i = 1; i <= f->curstat; i++) { 8137c478bd9Sstevel@tonic-gate p = f->posns[i]; 8147c478bd9Sstevel@tonic-gate if ((k = tmpset[0]) != p[0]) 8157c478bd9Sstevel@tonic-gate goto different; 8167c478bd9Sstevel@tonic-gate for (j = 1; j <= k; j++) 8177c478bd9Sstevel@tonic-gate if (tmpset[j] != p[j]) 8187c478bd9Sstevel@tonic-gate goto different; 8197c478bd9Sstevel@tonic-gate /* setvec is state i */ 8207c478bd9Sstevel@tonic-gate f->gototab[s][c] = i; 821*1ee2e5faSnakanon return (i); 8227c478bd9Sstevel@tonic-gate different:; 8237c478bd9Sstevel@tonic-gate } 8247c478bd9Sstevel@tonic-gate 8257c478bd9Sstevel@tonic-gate /* add tmpset to current set of states */ 8267c478bd9Sstevel@tonic-gate if (f->curstat >= NSTATES-1) { 8277c478bd9Sstevel@tonic-gate f->curstat = 2; 8287c478bd9Sstevel@tonic-gate f->reset = 1; 8297c478bd9Sstevel@tonic-gate for (i = 2; i < NSTATES; i++) 8307c478bd9Sstevel@tonic-gate xfree(f->posns[i]); 831*1ee2e5faSnakanon } else 8327c478bd9Sstevel@tonic-gate ++(f->curstat); 8337c478bd9Sstevel@tonic-gate for (i = 0; i < NCHARS; i++) 8347c478bd9Sstevel@tonic-gate f->gototab[f->curstat][i] = 0; 8357c478bd9Sstevel@tonic-gate xfree(f->posns[f->curstat]); 8367c478bd9Sstevel@tonic-gate if ((p = (int *)calloc(1, (setcnt + 1) * sizeof (int))) == NULL) 8377c478bd9Sstevel@tonic-gate overflo("out of space in cgoto"); 8387c478bd9Sstevel@tonic-gate 8397c478bd9Sstevel@tonic-gate f->posns[f->curstat] = p; 8407c478bd9Sstevel@tonic-gate f->gototab[s][c] = f->curstat; 8417c478bd9Sstevel@tonic-gate for (i = 0; i <= setcnt; i++) 8427c478bd9Sstevel@tonic-gate p[i] = tmpset[i]; 8437c478bd9Sstevel@tonic-gate if (setvec[f->accept]) 8447c478bd9Sstevel@tonic-gate f->out[f->curstat] = 1; 8457c478bd9Sstevel@tonic-gate else 8467c478bd9Sstevel@tonic-gate f->out[f->curstat] = 0; 847*1ee2e5faSnakanon return (f->curstat); 8487c478bd9Sstevel@tonic-gate } 8497c478bd9Sstevel@tonic-gate 850*1ee2e5faSnakanon static void 851*1ee2e5faSnakanon freefa(fa *f) 8527c478bd9Sstevel@tonic-gate { 8537c478bd9Sstevel@tonic-gate 8547c478bd9Sstevel@tonic-gate register int i; 8557c478bd9Sstevel@tonic-gate 8567c478bd9Sstevel@tonic-gate if (f == NULL) 8577c478bd9Sstevel@tonic-gate return; 8587c478bd9Sstevel@tonic-gate for (i = 0; i <= f->curstat; i++) 8597c478bd9Sstevel@tonic-gate xfree(f->posns[i]); 8607c478bd9Sstevel@tonic-gate for (i = 0; i <= f->accept; i++) 8617c478bd9Sstevel@tonic-gate xfree(f->re[i].lfollow); 8627c478bd9Sstevel@tonic-gate xfree(f->restr); 8637c478bd9Sstevel@tonic-gate xfree(f); 8647c478bd9Sstevel@tonic-gate } 865