17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate * CDDL HEADER START
37c478bd9Sstevel@tonic-gate *
47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the
57c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only
67c478bd9Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance
77c478bd9Sstevel@tonic-gate * with the License.
87c478bd9Sstevel@tonic-gate *
97c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
107c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
117c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions
127c478bd9Sstevel@tonic-gate * and limitations under the License.
137c478bd9Sstevel@tonic-gate *
147c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
157c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
167c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
177c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
187c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
197c478bd9Sstevel@tonic-gate *
207c478bd9Sstevel@tonic-gate * CDDL HEADER END
217c478bd9Sstevel@tonic-gate */
227c478bd9Sstevel@tonic-gate
237c478bd9Sstevel@tonic-gate /*
24*1ee2e5faSnakanon * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
257c478bd9Sstevel@tonic-gate * Use is subject to license terms.
267c478bd9Sstevel@tonic-gate */
277c478bd9Sstevel@tonic-gate
28*1ee2e5faSnakanon /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
29*1ee2e5faSnakanon /* All Rights Reserved */
30*1ee2e5faSnakanon
31*1ee2e5faSnakanon #pragma ident "%Z%%M% %I% %E% SMI"
327c478bd9Sstevel@tonic-gate
337c478bd9Sstevel@tonic-gate #define DEBUG
347c478bd9Sstevel@tonic-gate
357c478bd9Sstevel@tonic-gate #include "awk.h"
367c478bd9Sstevel@tonic-gate #include "y.tab.h"
377c478bd9Sstevel@tonic-gate
387c478bd9Sstevel@tonic-gate #define HAT (NCHARS-1) /* matches ^ in regular expr */
397c478bd9Sstevel@tonic-gate /* NCHARS is 2**n */
40*1ee2e5faSnakanon #define MAXLIN (3 * LINE_MAX)
417c478bd9Sstevel@tonic-gate
427c478bd9Sstevel@tonic-gate #define type(v) (v)->nobj
437c478bd9Sstevel@tonic-gate #define left(v) (v)->narg[0]
447c478bd9Sstevel@tonic-gate #define right(v) (v)->narg[1]
457c478bd9Sstevel@tonic-gate #define parent(v) (v)->nnext
467c478bd9Sstevel@tonic-gate
477c478bd9Sstevel@tonic-gate #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
487c478bd9Sstevel@tonic-gate #define UNARY case STAR: case PLUS: case QUEST:
497c478bd9Sstevel@tonic-gate
50*1ee2e5faSnakanon /*
51*1ee2e5faSnakanon * encoding in tree Nodes:
52*1ee2e5faSnakanon * leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):
53*1ee2e5faSnakanon * left is index, right contains value or pointer to value
54*1ee2e5faSnakanon * unary (STAR, PLUS, QUEST): left is child, right is null
55*1ee2e5faSnakanon * binary (CAT, OR): left and right are children
56*1ee2e5faSnakanon * parent contains pointer to parent
577c478bd9Sstevel@tonic-gate */
587c478bd9Sstevel@tonic-gate
59*1ee2e5faSnakanon int setvec[MAXLIN];
60*1ee2e5faSnakanon int tmpset[MAXLIN];
61*1ee2e5faSnakanon Node *point[MAXLIN];
627c478bd9Sstevel@tonic-gate
637c478bd9Sstevel@tonic-gate int rtok; /* next token in current re */
647c478bd9Sstevel@tonic-gate int rlxval;
657c478bd9Sstevel@tonic-gate uchar *rlxstr;
667c478bd9Sstevel@tonic-gate uchar *prestr; /* current position in current re */
677c478bd9Sstevel@tonic-gate uchar *lastre; /* origin of last re */
687c478bd9Sstevel@tonic-gate
697c478bd9Sstevel@tonic-gate static int setcnt;
707c478bd9Sstevel@tonic-gate static int poscnt;
717c478bd9Sstevel@tonic-gate
727c478bd9Sstevel@tonic-gate uchar *patbeg;
737c478bd9Sstevel@tonic-gate int patlen;
747c478bd9Sstevel@tonic-gate
757c478bd9Sstevel@tonic-gate #define NFA 20 /* cache this many dynamic fa's */
767c478bd9Sstevel@tonic-gate fa *fatab[NFA];
777c478bd9Sstevel@tonic-gate int nfatab = 0; /* entries in fatab */
787c478bd9Sstevel@tonic-gate
79*1ee2e5faSnakanon static fa *mkdfa(uchar *, int);
80*1ee2e5faSnakanon static int makeinit(fa *, int);
81*1ee2e5faSnakanon static void penter(Node *);
82*1ee2e5faSnakanon static void freetr(Node *);
83*1ee2e5faSnakanon static void overflo(char *);
84*1ee2e5faSnakanon static void cfoll(fa *, Node *);
85*1ee2e5faSnakanon static void follow(Node *);
86*1ee2e5faSnakanon static Node *reparse(uchar *);
87*1ee2e5faSnakanon static int relex(void);
88*1ee2e5faSnakanon static void freefa(fa *);
89*1ee2e5faSnakanon static int cgoto(fa *, int, int);
90*1ee2e5faSnakanon
91*1ee2e5faSnakanon fa *
makedfa(uchar * s,int anchor)92*1ee2e5faSnakanon makedfa(uchar *s, int anchor) /* returns dfa for reg expr s */
937c478bd9Sstevel@tonic-gate {
947c478bd9Sstevel@tonic-gate int i, use, nuse;
957c478bd9Sstevel@tonic-gate fa *pfa;
967c478bd9Sstevel@tonic-gate
977c478bd9Sstevel@tonic-gate if (compile_time) /* a constant for sure */
98*1ee2e5faSnakanon return (mkdfa(s, anchor));
99*1ee2e5faSnakanon for (i = 0; i < nfatab; i++) { /* is it there already? */
100*1ee2e5faSnakanon if (fatab[i]->anchor == anchor &&
101*1ee2e5faSnakanon strcmp((char *)fatab[i]->restr, (char *)s) == 0) {
1027c478bd9Sstevel@tonic-gate fatab[i]->use++;
103*1ee2e5faSnakanon return (fatab[i]);
104*1ee2e5faSnakanon }
1057c478bd9Sstevel@tonic-gate }
1067c478bd9Sstevel@tonic-gate pfa = mkdfa(s, anchor);
1077c478bd9Sstevel@tonic-gate if (nfatab < NFA) { /* room for another */
1087c478bd9Sstevel@tonic-gate fatab[nfatab] = pfa;
1097c478bd9Sstevel@tonic-gate fatab[nfatab]->use = 1;
1107c478bd9Sstevel@tonic-gate nfatab++;
111*1ee2e5faSnakanon return (pfa);
1127c478bd9Sstevel@tonic-gate }
1137c478bd9Sstevel@tonic-gate use = fatab[0]->use; /* replace least-recently used */
1147c478bd9Sstevel@tonic-gate nuse = 0;
1157c478bd9Sstevel@tonic-gate for (i = 1; i < nfatab; i++)
1167c478bd9Sstevel@tonic-gate if (fatab[i]->use < use) {
1177c478bd9Sstevel@tonic-gate use = fatab[i]->use;
1187c478bd9Sstevel@tonic-gate nuse = i;
1197c478bd9Sstevel@tonic-gate }
1207c478bd9Sstevel@tonic-gate freefa(fatab[nuse]);
1217c478bd9Sstevel@tonic-gate fatab[nuse] = pfa;
1227c478bd9Sstevel@tonic-gate pfa->use = 1;
123*1ee2e5faSnakanon return (pfa);
1247c478bd9Sstevel@tonic-gate }
1257c478bd9Sstevel@tonic-gate
126*1ee2e5faSnakanon fa *
mkdfa(uchar * s,int anchor)127*1ee2e5faSnakanon mkdfa(uchar *s, int anchor) /* does the real work of making a dfa */
128*1ee2e5faSnakanon /* anchor = 1 for anchored matches, else 0 */
1297c478bd9Sstevel@tonic-gate {
130*1ee2e5faSnakanon Node *p, *p1;
1317c478bd9Sstevel@tonic-gate fa *f;
1327c478bd9Sstevel@tonic-gate
1337c478bd9Sstevel@tonic-gate p = reparse(s);
1347c478bd9Sstevel@tonic-gate p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
1357c478bd9Sstevel@tonic-gate /* put ALL STAR in front of reg. exp. */
1367c478bd9Sstevel@tonic-gate p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
1377c478bd9Sstevel@tonic-gate /* put FINAL after reg. exp. */
1387c478bd9Sstevel@tonic-gate
1397c478bd9Sstevel@tonic-gate poscnt = 0;
1407c478bd9Sstevel@tonic-gate penter(p1); /* enter parent pointers and leaf indices */
1417c478bd9Sstevel@tonic-gate if ((f = (fa *)calloc(1, sizeof (fa) + poscnt * sizeof (rrow))) == NULL)
1427c478bd9Sstevel@tonic-gate overflo("no room for fa");
143*1ee2e5faSnakanon /* penter has computed number of positions in re */
144*1ee2e5faSnakanon f->accept = poscnt-1;
1457c478bd9Sstevel@tonic-gate cfoll(f, p1); /* set up follow sets */
1467c478bd9Sstevel@tonic-gate freetr(p1);
147*1ee2e5faSnakanon if ((f->posns[0] =
148*1ee2e5faSnakanon (int *)calloc(1, *(f->re[0].lfollow) * sizeof (int))) == NULL) {
1497c478bd9Sstevel@tonic-gate overflo("out of space in makedfa");
150*1ee2e5faSnakanon }
1517c478bd9Sstevel@tonic-gate if ((f->posns[1] = (int *)calloc(1, sizeof (int))) == NULL)
1527c478bd9Sstevel@tonic-gate overflo("out of space in makedfa");
1537c478bd9Sstevel@tonic-gate *f->posns[1] = 0;
1547c478bd9Sstevel@tonic-gate f->initstat = makeinit(f, anchor);
1557c478bd9Sstevel@tonic-gate f->anchor = anchor;
1567c478bd9Sstevel@tonic-gate f->restr = tostring(s);
157*1ee2e5faSnakanon return (f);
1587c478bd9Sstevel@tonic-gate }
1597c478bd9Sstevel@tonic-gate
160*1ee2e5faSnakanon static int
makeinit(fa * f,int anchor)161*1ee2e5faSnakanon makeinit(fa *f, int anchor)
1627c478bd9Sstevel@tonic-gate {
1637c478bd9Sstevel@tonic-gate register int i, k;
1647c478bd9Sstevel@tonic-gate
1657c478bd9Sstevel@tonic-gate f->curstat = 2;
1667c478bd9Sstevel@tonic-gate f->out[2] = 0;
1677c478bd9Sstevel@tonic-gate f->reset = 0;
1687c478bd9Sstevel@tonic-gate k = *(f->re[0].lfollow);
1697c478bd9Sstevel@tonic-gate xfree(f->posns[2]);
1707c478bd9Sstevel@tonic-gate if ((f->posns[2] = (int *)calloc(1, (k+1) * sizeof (int))) == NULL)
1717c478bd9Sstevel@tonic-gate overflo("out of space in makeinit");
1727c478bd9Sstevel@tonic-gate for (i = 0; i <= k; i++) {
1737c478bd9Sstevel@tonic-gate (f->posns[2])[i] = (f->re[0].lfollow)[i];
1747c478bd9Sstevel@tonic-gate }
1757c478bd9Sstevel@tonic-gate if ((f->posns[2])[1] == f->accept)
1767c478bd9Sstevel@tonic-gate f->out[2] = 1;
1777c478bd9Sstevel@tonic-gate for (i = 0; i < NCHARS; i++)
1787c478bd9Sstevel@tonic-gate f->gototab[2][i] = 0;
1797c478bd9Sstevel@tonic-gate f->curstat = cgoto(f, 2, HAT);
1807c478bd9Sstevel@tonic-gate if (anchor) {
1817c478bd9Sstevel@tonic-gate *f->posns[2] = k-1; /* leave out position 0 */
1827c478bd9Sstevel@tonic-gate for (i = 0; i < k; i++) {
1837c478bd9Sstevel@tonic-gate (f->posns[0])[i] = (f->posns[2])[i];
1847c478bd9Sstevel@tonic-gate }
1857c478bd9Sstevel@tonic-gate
1867c478bd9Sstevel@tonic-gate f->out[0] = f->out[2];
1877c478bd9Sstevel@tonic-gate if (f->curstat != 2)
1887c478bd9Sstevel@tonic-gate --(*f->posns[f->curstat]);
1897c478bd9Sstevel@tonic-gate }
190*1ee2e5faSnakanon return (f->curstat);
1917c478bd9Sstevel@tonic-gate }
1927c478bd9Sstevel@tonic-gate
193*1ee2e5faSnakanon void
penter(Node * p)194*1ee2e5faSnakanon penter(Node *p) /* set up parent pointers and leaf indices */
1957c478bd9Sstevel@tonic-gate {
1967c478bd9Sstevel@tonic-gate switch (type(p)) {
1977c478bd9Sstevel@tonic-gate LEAF
1987c478bd9Sstevel@tonic-gate left(p) = (Node *) poscnt;
1997c478bd9Sstevel@tonic-gate point[poscnt++] = p;
2007c478bd9Sstevel@tonic-gate break;
2017c478bd9Sstevel@tonic-gate UNARY
2027c478bd9Sstevel@tonic-gate penter(left(p));
2037c478bd9Sstevel@tonic-gate parent(left(p)) = p;
2047c478bd9Sstevel@tonic-gate break;
2057c478bd9Sstevel@tonic-gate case CAT:
2067c478bd9Sstevel@tonic-gate case OR:
2077c478bd9Sstevel@tonic-gate penter(left(p));
2087c478bd9Sstevel@tonic-gate penter(right(p));
2097c478bd9Sstevel@tonic-gate parent(left(p)) = p;
2107c478bd9Sstevel@tonic-gate parent(right(p)) = p;
2117c478bd9Sstevel@tonic-gate break;
2127c478bd9Sstevel@tonic-gate default:
2137c478bd9Sstevel@tonic-gate ERROR "unknown type %d in penter", type(p) FATAL;
2147c478bd9Sstevel@tonic-gate break;
2157c478bd9Sstevel@tonic-gate }
2167c478bd9Sstevel@tonic-gate }
2177c478bd9Sstevel@tonic-gate
218*1ee2e5faSnakanon static void
freetr(Node * p)219*1ee2e5faSnakanon freetr(Node *p) /* free parse tree */
2207c478bd9Sstevel@tonic-gate {
2217c478bd9Sstevel@tonic-gate switch (type(p)) {
2227c478bd9Sstevel@tonic-gate LEAF
2237c478bd9Sstevel@tonic-gate xfree(p);
2247c478bd9Sstevel@tonic-gate break;
2257c478bd9Sstevel@tonic-gate UNARY
2267c478bd9Sstevel@tonic-gate freetr(left(p));
2277c478bd9Sstevel@tonic-gate xfree(p);
2287c478bd9Sstevel@tonic-gate break;
2297c478bd9Sstevel@tonic-gate case CAT:
2307c478bd9Sstevel@tonic-gate case OR:
2317c478bd9Sstevel@tonic-gate freetr(left(p));
2327c478bd9Sstevel@tonic-gate freetr(right(p));
2337c478bd9Sstevel@tonic-gate xfree(p);
2347c478bd9Sstevel@tonic-gate break;
2357c478bd9Sstevel@tonic-gate default:
2367c478bd9Sstevel@tonic-gate ERROR "unknown type %d in freetr", type(p) FATAL;
2377c478bd9Sstevel@tonic-gate break;
2387c478bd9Sstevel@tonic-gate }
2397c478bd9Sstevel@tonic-gate }
2407c478bd9Sstevel@tonic-gate
241*1ee2e5faSnakanon uchar *
cclenter(uchar * p)242*1ee2e5faSnakanon cclenter(uchar *p)
2437c478bd9Sstevel@tonic-gate {
2447c478bd9Sstevel@tonic-gate register int i, c;
245*1ee2e5faSnakanon uchar *op, *chars, *ret;
246*1ee2e5faSnakanon size_t bsize;
2477c478bd9Sstevel@tonic-gate
248*1ee2e5faSnakanon init_buf(&chars, &bsize, LINE_INCR);
2497c478bd9Sstevel@tonic-gate op = p;
2507c478bd9Sstevel@tonic-gate i = 0;
2517c478bd9Sstevel@tonic-gate while ((c = *p++) != 0) {
2527c478bd9Sstevel@tonic-gate if (c == '\\') {
2537c478bd9Sstevel@tonic-gate if ((c = *p++) == 't')
2547c478bd9Sstevel@tonic-gate c = '\t';
2557c478bd9Sstevel@tonic-gate else if (c == 'n')
2567c478bd9Sstevel@tonic-gate c = '\n';
2577c478bd9Sstevel@tonic-gate else if (c == 'f')
2587c478bd9Sstevel@tonic-gate c = '\f';
2597c478bd9Sstevel@tonic-gate else if (c == 'r')
2607c478bd9Sstevel@tonic-gate c = '\r';
2617c478bd9Sstevel@tonic-gate else if (c == 'b')
2627c478bd9Sstevel@tonic-gate c = '\b';
2637c478bd9Sstevel@tonic-gate else if (c == '\\')
2647c478bd9Sstevel@tonic-gate c = '\\';
2657c478bd9Sstevel@tonic-gate else if (isdigit(c)) {
2667c478bd9Sstevel@tonic-gate int n = c - '0';
2677c478bd9Sstevel@tonic-gate if (isdigit(*p)) {
2687c478bd9Sstevel@tonic-gate n = 8 * n + *p++ - '0';
2697c478bd9Sstevel@tonic-gate if (isdigit(*p))
2707c478bd9Sstevel@tonic-gate n = 8 * n + *p++ - '0';
2717c478bd9Sstevel@tonic-gate }
2727c478bd9Sstevel@tonic-gate c = n;
2737c478bd9Sstevel@tonic-gate } /* else */
2747c478bd9Sstevel@tonic-gate /* c = c; */
2757c478bd9Sstevel@tonic-gate } else if (c == '-' && i > 0 && chars[i-1] != 0) {
2767c478bd9Sstevel@tonic-gate if (*p != 0) {
2777c478bd9Sstevel@tonic-gate c = chars[i-1];
2787c478bd9Sstevel@tonic-gate while ((uchar)c < *p) { /* fails if *p is \\ */
279*1ee2e5faSnakanon expand_buf(&chars, &bsize, i);
2807c478bd9Sstevel@tonic-gate chars[i++] = ++c;
2817c478bd9Sstevel@tonic-gate }
2827c478bd9Sstevel@tonic-gate p++;
2837c478bd9Sstevel@tonic-gate continue;
2847c478bd9Sstevel@tonic-gate }
2857c478bd9Sstevel@tonic-gate }
286*1ee2e5faSnakanon expand_buf(&chars, &bsize, i);
2877c478bd9Sstevel@tonic-gate chars[i++] = c;
2887c478bd9Sstevel@tonic-gate }
2897c478bd9Sstevel@tonic-gate chars[i++] = '\0';
2907c478bd9Sstevel@tonic-gate dprintf(("cclenter: in = |%s|, out = |%s|\n", op, chars));
2917c478bd9Sstevel@tonic-gate xfree(op);
292*1ee2e5faSnakanon ret = tostring(chars);
293*1ee2e5faSnakanon free(chars);
294*1ee2e5faSnakanon return (ret);
2957c478bd9Sstevel@tonic-gate }
2967c478bd9Sstevel@tonic-gate
297*1ee2e5faSnakanon static void
overflo(char * s)298*1ee2e5faSnakanon overflo(char *s)
2997c478bd9Sstevel@tonic-gate {
3007c478bd9Sstevel@tonic-gate ERROR "regular expression too big: %s", gettext((char *)s) FATAL;
3017c478bd9Sstevel@tonic-gate }
3027c478bd9Sstevel@tonic-gate
303*1ee2e5faSnakanon /* enter follow set of each leaf of vertex v into lfollow[leaf] */
304*1ee2e5faSnakanon static void
cfoll(fa * f,Node * v)305*1ee2e5faSnakanon cfoll(fa *f, Node *v)
3067c478bd9Sstevel@tonic-gate {
3077c478bd9Sstevel@tonic-gate register int i;
3087c478bd9Sstevel@tonic-gate register int *p;
3097c478bd9Sstevel@tonic-gate
3107c478bd9Sstevel@tonic-gate switch (type(v)) {
3117c478bd9Sstevel@tonic-gate LEAF
3127c478bd9Sstevel@tonic-gate f->re[(int)left(v)].ltype = type(v);
3137c478bd9Sstevel@tonic-gate f->re[(int)left(v)].lval = (int)right(v);
3147c478bd9Sstevel@tonic-gate for (i = 0; i <= f->accept; i++)
3157c478bd9Sstevel@tonic-gate setvec[i] = 0;
3167c478bd9Sstevel@tonic-gate setcnt = 0;
3177c478bd9Sstevel@tonic-gate follow(v); /* computes setvec and setcnt */
3187c478bd9Sstevel@tonic-gate if ((p = (int *)calloc(1, (setcnt+1) * sizeof (int))) == NULL)
3197c478bd9Sstevel@tonic-gate overflo("follow set overflow");
3207c478bd9Sstevel@tonic-gate f->re[(int)left(v)].lfollow = p;
3217c478bd9Sstevel@tonic-gate *p = setcnt;
322*1ee2e5faSnakanon for (i = f->accept; i >= 0; i--) {
323*1ee2e5faSnakanon if (setvec[i] == 1)
324*1ee2e5faSnakanon *++p = i;
325*1ee2e5faSnakanon }
3267c478bd9Sstevel@tonic-gate break;
3277c478bd9Sstevel@tonic-gate UNARY
3287c478bd9Sstevel@tonic-gate cfoll(f, left(v));
3297c478bd9Sstevel@tonic-gate break;
3307c478bd9Sstevel@tonic-gate case CAT:
3317c478bd9Sstevel@tonic-gate case OR:
3327c478bd9Sstevel@tonic-gate cfoll(f, left(v));
3337c478bd9Sstevel@tonic-gate cfoll(f, right(v));
3347c478bd9Sstevel@tonic-gate break;
3357c478bd9Sstevel@tonic-gate default:
3367c478bd9Sstevel@tonic-gate ERROR "unknown type %d in cfoll", type(v) FATAL;
3377c478bd9Sstevel@tonic-gate }
3387c478bd9Sstevel@tonic-gate }
3397c478bd9Sstevel@tonic-gate
340*1ee2e5faSnakanon /*
341*1ee2e5faSnakanon * collects initially active leaves of p into setvec
342*1ee2e5faSnakanon * returns 0 or 1 depending on whether p matches empty string
343*1ee2e5faSnakanon */
344*1ee2e5faSnakanon static int
first(Node * p)345*1ee2e5faSnakanon first(Node *p)
3467c478bd9Sstevel@tonic-gate {
3477c478bd9Sstevel@tonic-gate register int b;
3487c478bd9Sstevel@tonic-gate
3497c478bd9Sstevel@tonic-gate switch (type(p)) {
3507c478bd9Sstevel@tonic-gate LEAF
3517c478bd9Sstevel@tonic-gate if (setvec[(int)left(p)] != 1) {
3527c478bd9Sstevel@tonic-gate setvec[(int)left(p)] = 1;
3537c478bd9Sstevel@tonic-gate setcnt++;
3547c478bd9Sstevel@tonic-gate }
3557c478bd9Sstevel@tonic-gate if (type(p) == CCL && (*(uchar *)right(p)) == '\0')
3567c478bd9Sstevel@tonic-gate return (0); /* empty CCL */
357*1ee2e5faSnakanon else
358*1ee2e5faSnakanon return (1);
3597c478bd9Sstevel@tonic-gate case PLUS:
360*1ee2e5faSnakanon if (first(left(p)) == 0)
361*1ee2e5faSnakanon return (0);
3627c478bd9Sstevel@tonic-gate return (1);
3637c478bd9Sstevel@tonic-gate case STAR:
3647c478bd9Sstevel@tonic-gate case QUEST:
365*1ee2e5faSnakanon (void) first(left(p));
3667c478bd9Sstevel@tonic-gate return (0);
3677c478bd9Sstevel@tonic-gate case CAT:
368*1ee2e5faSnakanon if (first(left(p)) == 0 && first(right(p)) == 0)
369*1ee2e5faSnakanon return (0);
3707c478bd9Sstevel@tonic-gate return (1);
3717c478bd9Sstevel@tonic-gate case OR:
3727c478bd9Sstevel@tonic-gate b = first(right(p));
373*1ee2e5faSnakanon if (first(left(p)) == 0 || b == 0)
374*1ee2e5faSnakanon return (0);
3757c478bd9Sstevel@tonic-gate return (1);
3767c478bd9Sstevel@tonic-gate }
3777c478bd9Sstevel@tonic-gate ERROR "unknown type %d in first", type(p) FATAL;
3787c478bd9Sstevel@tonic-gate return (-1);
3797c478bd9Sstevel@tonic-gate }
3807c478bd9Sstevel@tonic-gate
381*1ee2e5faSnakanon /* collects leaves that can follow v into setvec */
382*1ee2e5faSnakanon static void
follow(Node * v)383*1ee2e5faSnakanon follow(Node *v)
3847c478bd9Sstevel@tonic-gate {
3857c478bd9Sstevel@tonic-gate Node *p;
3867c478bd9Sstevel@tonic-gate
3877c478bd9Sstevel@tonic-gate if (type(v) == FINAL)
3887c478bd9Sstevel@tonic-gate return;
3897c478bd9Sstevel@tonic-gate p = parent(v);
3907c478bd9Sstevel@tonic-gate switch (type(p)) {
3917c478bd9Sstevel@tonic-gate case STAR:
3927c478bd9Sstevel@tonic-gate case PLUS:
393*1ee2e5faSnakanon (void) first(v);
3947c478bd9Sstevel@tonic-gate follow(p);
3957c478bd9Sstevel@tonic-gate return;
3967c478bd9Sstevel@tonic-gate
3977c478bd9Sstevel@tonic-gate case OR:
3987c478bd9Sstevel@tonic-gate case QUEST:
3997c478bd9Sstevel@tonic-gate follow(p);
4007c478bd9Sstevel@tonic-gate return;
4017c478bd9Sstevel@tonic-gate
4027c478bd9Sstevel@tonic-gate case CAT:
4037c478bd9Sstevel@tonic-gate if (v == left(p)) { /* v is left child of p */
4047c478bd9Sstevel@tonic-gate if (first(right(p)) == 0) {
4057c478bd9Sstevel@tonic-gate follow(p);
4067c478bd9Sstevel@tonic-gate return;
4077c478bd9Sstevel@tonic-gate }
408*1ee2e5faSnakanon } else /* v is right child */
4097c478bd9Sstevel@tonic-gate follow(p);
4107c478bd9Sstevel@tonic-gate return;
4117c478bd9Sstevel@tonic-gate default:
4127c478bd9Sstevel@tonic-gate ERROR "unknown type %d in follow", type(p) FATAL;
4137c478bd9Sstevel@tonic-gate break;
4147c478bd9Sstevel@tonic-gate }
4157c478bd9Sstevel@tonic-gate }
4167c478bd9Sstevel@tonic-gate
417*1ee2e5faSnakanon static int
member(uchar c,uchar * s)418*1ee2e5faSnakanon member(uchar c, uchar *s) /* is c in s? */
4197c478bd9Sstevel@tonic-gate {
4207c478bd9Sstevel@tonic-gate while (*s)
4217c478bd9Sstevel@tonic-gate if (c == *s++)
4227c478bd9Sstevel@tonic-gate return (1);
4237c478bd9Sstevel@tonic-gate return (0);
4247c478bd9Sstevel@tonic-gate }
4257c478bd9Sstevel@tonic-gate
4267c478bd9Sstevel@tonic-gate
427*1ee2e5faSnakanon int
match(fa * f,uchar * p)428*1ee2e5faSnakanon match(fa *f, uchar *p)
4297c478bd9Sstevel@tonic-gate {
4307c478bd9Sstevel@tonic-gate register int s, ns;
4317c478bd9Sstevel@tonic-gate
4327c478bd9Sstevel@tonic-gate s = f->reset ? makeinit(f, 0) : f->initstat;
4337c478bd9Sstevel@tonic-gate if (f->out[s])
4347c478bd9Sstevel@tonic-gate return (1);
4357c478bd9Sstevel@tonic-gate do {
436*1ee2e5faSnakanon if ((ns = f->gototab[s][*p]) != 0)
4377c478bd9Sstevel@tonic-gate s = ns;
4387c478bd9Sstevel@tonic-gate else
4397c478bd9Sstevel@tonic-gate s = cgoto(f, s, *p);
4407c478bd9Sstevel@tonic-gate if (f->out[s])
4417c478bd9Sstevel@tonic-gate return (1);
4427c478bd9Sstevel@tonic-gate } while (*p++ != 0);
4437c478bd9Sstevel@tonic-gate return (0);
4447c478bd9Sstevel@tonic-gate }
4457c478bd9Sstevel@tonic-gate
446*1ee2e5faSnakanon int
pmatch(fa * f,uchar * p)447*1ee2e5faSnakanon pmatch(fa *f, uchar *p)
4487c478bd9Sstevel@tonic-gate {
4497c478bd9Sstevel@tonic-gate register int s, ns;
4507c478bd9Sstevel@tonic-gate register uchar *q;
4517c478bd9Sstevel@tonic-gate int i, k;
4527c478bd9Sstevel@tonic-gate
4537c478bd9Sstevel@tonic-gate if (f->reset) {
4547c478bd9Sstevel@tonic-gate f->initstat = s = makeinit(f, 1);
4557c478bd9Sstevel@tonic-gate } else {
4567c478bd9Sstevel@tonic-gate s = f->initstat;
4577c478bd9Sstevel@tonic-gate }
4587c478bd9Sstevel@tonic-gate patbeg = p;
4597c478bd9Sstevel@tonic-gate patlen = -1;
4607c478bd9Sstevel@tonic-gate do {
4617c478bd9Sstevel@tonic-gate q = p;
4627c478bd9Sstevel@tonic-gate do {
4637c478bd9Sstevel@tonic-gate if (f->out[s]) /* final state */
4647c478bd9Sstevel@tonic-gate patlen = q-p;
465*1ee2e5faSnakanon if ((ns = f->gototab[s][*q]) != 0)
4667c478bd9Sstevel@tonic-gate s = ns;
4677c478bd9Sstevel@tonic-gate else
4687c478bd9Sstevel@tonic-gate s = cgoto(f, s, *q);
469*1ee2e5faSnakanon if (s == 1) { /* no transition */
4707c478bd9Sstevel@tonic-gate if (patlen >= 0) {
4717c478bd9Sstevel@tonic-gate patbeg = p;
4727c478bd9Sstevel@tonic-gate return (1);
473*1ee2e5faSnakanon } else
4747c478bd9Sstevel@tonic-gate goto nextin; /* no match */
475*1ee2e5faSnakanon }
4767c478bd9Sstevel@tonic-gate } while (*q++ != 0);
4777c478bd9Sstevel@tonic-gate if (f->out[s])
4787c478bd9Sstevel@tonic-gate patlen = q - p - 1; /* don't count $ */
4797c478bd9Sstevel@tonic-gate if (patlen >= 0) {
4807c478bd9Sstevel@tonic-gate patbeg = p;
4817c478bd9Sstevel@tonic-gate return (1);
4827c478bd9Sstevel@tonic-gate }
4837c478bd9Sstevel@tonic-gate nextin:
4847c478bd9Sstevel@tonic-gate s = 2;
4857c478bd9Sstevel@tonic-gate if (f->reset) {
4867c478bd9Sstevel@tonic-gate for (i = 2; i <= f->curstat; i++)
4877c478bd9Sstevel@tonic-gate xfree(f->posns[i]);
4887c478bd9Sstevel@tonic-gate k = *f->posns[0];
489*1ee2e5faSnakanon if ((f->posns[2] =
490*1ee2e5faSnakanon (int *)calloc(1, (k + 1) * sizeof (int))) == NULL) {
4917c478bd9Sstevel@tonic-gate overflo("out of space in pmatch");
492*1ee2e5faSnakanon }
4937c478bd9Sstevel@tonic-gate for (i = 0; i <= k; i++)
4947c478bd9Sstevel@tonic-gate (f->posns[2])[i] = (f->posns[0])[i];
4957c478bd9Sstevel@tonic-gate f->initstat = f->curstat = 2;
4967c478bd9Sstevel@tonic-gate f->out[2] = f->out[0];
4977c478bd9Sstevel@tonic-gate for (i = 0; i < NCHARS; i++)
4987c478bd9Sstevel@tonic-gate f->gototab[2][i] = 0;
4997c478bd9Sstevel@tonic-gate }
5007c478bd9Sstevel@tonic-gate } while (*p++ != 0);
5017c478bd9Sstevel@tonic-gate return (0);
5027c478bd9Sstevel@tonic-gate }
5037c478bd9Sstevel@tonic-gate
504*1ee2e5faSnakanon int
nematch(fa * f,uchar * p)505*1ee2e5faSnakanon nematch(fa *f, uchar *p)
5067c478bd9Sstevel@tonic-gate {
5077c478bd9Sstevel@tonic-gate register int s, ns;
5087c478bd9Sstevel@tonic-gate register uchar *q;
5097c478bd9Sstevel@tonic-gate int i, k;
5107c478bd9Sstevel@tonic-gate
5117c478bd9Sstevel@tonic-gate if (f->reset) {
5127c478bd9Sstevel@tonic-gate f->initstat = s = makeinit(f, 1);
5137c478bd9Sstevel@tonic-gate } else {
5147c478bd9Sstevel@tonic-gate s = f->initstat;
5157c478bd9Sstevel@tonic-gate }
5167c478bd9Sstevel@tonic-gate patlen = -1;
5177c478bd9Sstevel@tonic-gate while (*p) {
5187c478bd9Sstevel@tonic-gate q = p;
5197c478bd9Sstevel@tonic-gate do {
5207c478bd9Sstevel@tonic-gate if (f->out[s]) /* final state */
5217c478bd9Sstevel@tonic-gate patlen = q-p;
522*1ee2e5faSnakanon if ((ns = f->gototab[s][*q]) != 0)
5237c478bd9Sstevel@tonic-gate s = ns;
5247c478bd9Sstevel@tonic-gate else
5257c478bd9Sstevel@tonic-gate s = cgoto(f, s, *q);
526*1ee2e5faSnakanon if (s == 1) { /* no transition */
5277c478bd9Sstevel@tonic-gate if (patlen > 0) {
5287c478bd9Sstevel@tonic-gate patbeg = p;
5297c478bd9Sstevel@tonic-gate return (1);
530*1ee2e5faSnakanon } else
5317c478bd9Sstevel@tonic-gate goto nnextin; /* no nonempty match */
532*1ee2e5faSnakanon }
5337c478bd9Sstevel@tonic-gate } while (*q++ != 0);
5347c478bd9Sstevel@tonic-gate if (f->out[s])
5357c478bd9Sstevel@tonic-gate patlen = q - p - 1; /* don't count $ */
5367c478bd9Sstevel@tonic-gate if (patlen > 0) {
5377c478bd9Sstevel@tonic-gate patbeg = p;
5387c478bd9Sstevel@tonic-gate return (1);
5397c478bd9Sstevel@tonic-gate }
5407c478bd9Sstevel@tonic-gate nnextin:
5417c478bd9Sstevel@tonic-gate s = 2;
5427c478bd9Sstevel@tonic-gate if (f->reset) {
5437c478bd9Sstevel@tonic-gate for (i = 2; i <= f->curstat; i++)
5447c478bd9Sstevel@tonic-gate xfree(f->posns[i]);
5457c478bd9Sstevel@tonic-gate k = *f->posns[0];
546*1ee2e5faSnakanon if ((f->posns[2] =
547*1ee2e5faSnakanon (int *)calloc(1, (k + 1) * sizeof (int))) == NULL) {
5487c478bd9Sstevel@tonic-gate overflo("out of state space");
549*1ee2e5faSnakanon }
5507c478bd9Sstevel@tonic-gate for (i = 0; i <= k; i++)
5517c478bd9Sstevel@tonic-gate (f->posns[2])[i] = (f->posns[0])[i];
5527c478bd9Sstevel@tonic-gate f->initstat = f->curstat = 2;
5537c478bd9Sstevel@tonic-gate f->out[2] = f->out[0];
5547c478bd9Sstevel@tonic-gate for (i = 0; i < NCHARS; i++)
5557c478bd9Sstevel@tonic-gate f->gototab[2][i] = 0;
5567c478bd9Sstevel@tonic-gate }
5577c478bd9Sstevel@tonic-gate p++;
5587c478bd9Sstevel@tonic-gate }
5597c478bd9Sstevel@tonic-gate return (0);
5607c478bd9Sstevel@tonic-gate }
5617c478bd9Sstevel@tonic-gate
562*1ee2e5faSnakanon static Node *regexp(void), *primary(void), *concat(Node *);
563*1ee2e5faSnakanon static Node *alt(Node *), *unary(Node *);
5647c478bd9Sstevel@tonic-gate
565*1ee2e5faSnakanon static Node *
reparse(uchar * p)566*1ee2e5faSnakanon reparse(uchar *p)
5677c478bd9Sstevel@tonic-gate {
5687c478bd9Sstevel@tonic-gate /* parses regular expression pointed to by p */
5697c478bd9Sstevel@tonic-gate /* uses relex() to scan regular expression */
5707c478bd9Sstevel@tonic-gate Node *np;
5717c478bd9Sstevel@tonic-gate
5727c478bd9Sstevel@tonic-gate dprintf(("reparse <%s>\n", p));
5737c478bd9Sstevel@tonic-gate lastre = prestr = p; /* prestr points to string to be parsed */
5747c478bd9Sstevel@tonic-gate rtok = relex();
5757c478bd9Sstevel@tonic-gate if (rtok == '\0')
5767c478bd9Sstevel@tonic-gate ERROR "empty regular expression" FATAL;
5777c478bd9Sstevel@tonic-gate np = regexp();
578*1ee2e5faSnakanon if (rtok == '\0') {
5797c478bd9Sstevel@tonic-gate return (np);
580*1ee2e5faSnakanon } else {
581*1ee2e5faSnakanon ERROR "syntax error in regular expression %s at %s",
582*1ee2e5faSnakanon lastre, prestr FATAL;
583*1ee2e5faSnakanon }
5847c478bd9Sstevel@tonic-gate /*NOTREACHED*/
585*1ee2e5faSnakanon return (NULL);
5867c478bd9Sstevel@tonic-gate }
5877c478bd9Sstevel@tonic-gate
588*1ee2e5faSnakanon static Node *
regexp(void)589*1ee2e5faSnakanon regexp(void)
5907c478bd9Sstevel@tonic-gate {
5917c478bd9Sstevel@tonic-gate return (alt(concat(primary())));
5927c478bd9Sstevel@tonic-gate }
5937c478bd9Sstevel@tonic-gate
594*1ee2e5faSnakanon static Node *
primary(void)595*1ee2e5faSnakanon primary(void)
5967c478bd9Sstevel@tonic-gate {
5977c478bd9Sstevel@tonic-gate Node *np;
5987c478bd9Sstevel@tonic-gate
5997c478bd9Sstevel@tonic-gate switch (rtok) {
6007c478bd9Sstevel@tonic-gate case CHAR:
6017c478bd9Sstevel@tonic-gate np = op2(CHAR, NIL, (Node *)rlxval);
6027c478bd9Sstevel@tonic-gate rtok = relex();
6037c478bd9Sstevel@tonic-gate return (unary(np));
6047c478bd9Sstevel@tonic-gate case ALL:
6057c478bd9Sstevel@tonic-gate rtok = relex();
6067c478bd9Sstevel@tonic-gate return (unary(op2(ALL, NIL, NIL)));
6077c478bd9Sstevel@tonic-gate case DOT:
6087c478bd9Sstevel@tonic-gate rtok = relex();
6097c478bd9Sstevel@tonic-gate return (unary(op2(DOT, NIL, NIL)));
6107c478bd9Sstevel@tonic-gate case CCL:
611*1ee2e5faSnakanon /*LINTED align*/
6127c478bd9Sstevel@tonic-gate np = op2(CCL, NIL, (Node *)cclenter(rlxstr));
6137c478bd9Sstevel@tonic-gate rtok = relex();
6147c478bd9Sstevel@tonic-gate return (unary(np));
6157c478bd9Sstevel@tonic-gate case NCCL:
616*1ee2e5faSnakanon /*LINTED align*/
6177c478bd9Sstevel@tonic-gate np = op2(NCCL, NIL, (Node *)cclenter(rlxstr));
6187c478bd9Sstevel@tonic-gate rtok = relex();
6197c478bd9Sstevel@tonic-gate return (unary(np));
6207c478bd9Sstevel@tonic-gate case '^':
6217c478bd9Sstevel@tonic-gate rtok = relex();
6227c478bd9Sstevel@tonic-gate return (unary(op2(CHAR, NIL, (Node *)HAT)));
6237c478bd9Sstevel@tonic-gate case '$':
6247c478bd9Sstevel@tonic-gate rtok = relex();
6257c478bd9Sstevel@tonic-gate return (unary(op2(CHAR, NIL, NIL)));
6267c478bd9Sstevel@tonic-gate case '(':
6277c478bd9Sstevel@tonic-gate rtok = relex();
6287c478bd9Sstevel@tonic-gate if (rtok == ')') { /* special pleading for () */
6297c478bd9Sstevel@tonic-gate rtok = relex();
630*1ee2e5faSnakanon return (unary(op2(CCL, NIL,
631*1ee2e5faSnakanon /*LINTED align*/
632*1ee2e5faSnakanon (Node *)tostring((uchar *)""))));
6337c478bd9Sstevel@tonic-gate }
6347c478bd9Sstevel@tonic-gate np = regexp();
6357c478bd9Sstevel@tonic-gate if (rtok == ')') {
6367c478bd9Sstevel@tonic-gate rtok = relex();
6377c478bd9Sstevel@tonic-gate return (unary(np));
638*1ee2e5faSnakanon } else {
639*1ee2e5faSnakanon ERROR "syntax error in regular expression %s at %s",
640*1ee2e5faSnakanon lastre, prestr FATAL;
6417c478bd9Sstevel@tonic-gate }
6427c478bd9Sstevel@tonic-gate default:
643*1ee2e5faSnakanon ERROR "illegal primary in regular expression %s at %s",
644*1ee2e5faSnakanon lastre, prestr FATAL;
6457c478bd9Sstevel@tonic-gate }
6467c478bd9Sstevel@tonic-gate /*NOTREACHED*/
647*1ee2e5faSnakanon return (NULL);
6487c478bd9Sstevel@tonic-gate }
6497c478bd9Sstevel@tonic-gate
650*1ee2e5faSnakanon static Node *
concat(Node * np)651*1ee2e5faSnakanon concat(Node *np)
6527c478bd9Sstevel@tonic-gate {
6537c478bd9Sstevel@tonic-gate switch (rtok) {
6547c478bd9Sstevel@tonic-gate case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
6557c478bd9Sstevel@tonic-gate return (concat(op2(CAT, np, primary())));
6567c478bd9Sstevel@tonic-gate default:
6577c478bd9Sstevel@tonic-gate return (np);
6587c478bd9Sstevel@tonic-gate }
6597c478bd9Sstevel@tonic-gate }
6607c478bd9Sstevel@tonic-gate
661*1ee2e5faSnakanon static Node *
alt(Node * np)662*1ee2e5faSnakanon alt(Node *np)
6637c478bd9Sstevel@tonic-gate {
6647c478bd9Sstevel@tonic-gate if (rtok == OR) {
6657c478bd9Sstevel@tonic-gate rtok = relex();
6667c478bd9Sstevel@tonic-gate return (alt(op2(OR, np, concat(primary()))));
6677c478bd9Sstevel@tonic-gate }
6687c478bd9Sstevel@tonic-gate return (np);
6697c478bd9Sstevel@tonic-gate }
6707c478bd9Sstevel@tonic-gate
671*1ee2e5faSnakanon static Node *
unary(Node * np)672*1ee2e5faSnakanon unary(Node *np)
6737c478bd9Sstevel@tonic-gate {
6747c478bd9Sstevel@tonic-gate switch (rtok) {
6757c478bd9Sstevel@tonic-gate case STAR:
6767c478bd9Sstevel@tonic-gate rtok = relex();
6777c478bd9Sstevel@tonic-gate return (unary(op2(STAR, np, NIL)));
6787c478bd9Sstevel@tonic-gate case PLUS:
6797c478bd9Sstevel@tonic-gate rtok = relex();
6807c478bd9Sstevel@tonic-gate return (unary(op2(PLUS, np, NIL)));
6817c478bd9Sstevel@tonic-gate case QUEST:
6827c478bd9Sstevel@tonic-gate rtok = relex();
6837c478bd9Sstevel@tonic-gate return (unary(op2(QUEST, np, NIL)));
6847c478bd9Sstevel@tonic-gate default:
6857c478bd9Sstevel@tonic-gate return (np);
6867c478bd9Sstevel@tonic-gate }
6877c478bd9Sstevel@tonic-gate }
6887c478bd9Sstevel@tonic-gate
689*1ee2e5faSnakanon static int
relex(void)690*1ee2e5faSnakanon relex(void) /* lexical analyzer for reparse */
6917c478bd9Sstevel@tonic-gate {
6927c478bd9Sstevel@tonic-gate register int c;
693*1ee2e5faSnakanon uchar *cbuf;
6947c478bd9Sstevel@tonic-gate int clen, cflag;
6957c478bd9Sstevel@tonic-gate
6967c478bd9Sstevel@tonic-gate switch (c = *prestr++) {
6977c478bd9Sstevel@tonic-gate case '|': return OR;
6987c478bd9Sstevel@tonic-gate case '*': return STAR;
6997c478bd9Sstevel@tonic-gate case '+': return PLUS;
7007c478bd9Sstevel@tonic-gate case '?': return QUEST;
7017c478bd9Sstevel@tonic-gate case '.': return DOT;
7027c478bd9Sstevel@tonic-gate case '\0': prestr--; return '\0';
7037c478bd9Sstevel@tonic-gate case '^':
7047c478bd9Sstevel@tonic-gate case '$':
7057c478bd9Sstevel@tonic-gate case '(':
7067c478bd9Sstevel@tonic-gate case ')':
707*1ee2e5faSnakanon return (c);
7087c478bd9Sstevel@tonic-gate case '\\':
7097c478bd9Sstevel@tonic-gate if ((c = *prestr++) == 't')
7107c478bd9Sstevel@tonic-gate c = '\t';
7117c478bd9Sstevel@tonic-gate else if (c == 'n')
7127c478bd9Sstevel@tonic-gate c = '\n';
7137c478bd9Sstevel@tonic-gate else if (c == 'f')
7147c478bd9Sstevel@tonic-gate c = '\f';
7157c478bd9Sstevel@tonic-gate else if (c == 'r')
7167c478bd9Sstevel@tonic-gate c = '\r';
7177c478bd9Sstevel@tonic-gate else if (c == 'b')
7187c478bd9Sstevel@tonic-gate c = '\b';
7197c478bd9Sstevel@tonic-gate else if (c == '\\')
7207c478bd9Sstevel@tonic-gate c = '\\';
7217c478bd9Sstevel@tonic-gate else if (isdigit(c)) {
7227c478bd9Sstevel@tonic-gate int n = c - '0';
7237c478bd9Sstevel@tonic-gate if (isdigit(*prestr)) {
7247c478bd9Sstevel@tonic-gate n = 8 * n + *prestr++ - '0';
7257c478bd9Sstevel@tonic-gate if (isdigit(*prestr))
7267c478bd9Sstevel@tonic-gate n = 8 * n + *prestr++ - '0';
7277c478bd9Sstevel@tonic-gate }
7287c478bd9Sstevel@tonic-gate c = n;
7297c478bd9Sstevel@tonic-gate } /* else it's now in c */
7307c478bd9Sstevel@tonic-gate rlxval = c;
731*1ee2e5faSnakanon return (CHAR);
7327c478bd9Sstevel@tonic-gate default:
7337c478bd9Sstevel@tonic-gate rlxval = c;
734*1ee2e5faSnakanon return (CHAR);
7357c478bd9Sstevel@tonic-gate case '[':
7367c478bd9Sstevel@tonic-gate clen = 0;
7377c478bd9Sstevel@tonic-gate if (*prestr == '^') {
7387c478bd9Sstevel@tonic-gate cflag = 1;
7397c478bd9Sstevel@tonic-gate prestr++;
740*1ee2e5faSnakanon } else
7417c478bd9Sstevel@tonic-gate cflag = 0;
742*1ee2e5faSnakanon init_buf(&cbuf, NULL, strlen((char *)prestr) * 2 + 1);
7437c478bd9Sstevel@tonic-gate for (;;) {
7447c478bd9Sstevel@tonic-gate if ((c = *prestr++) == '\\') {
7457c478bd9Sstevel@tonic-gate cbuf[clen++] = '\\';
746*1ee2e5faSnakanon if ((c = *prestr++) == '\0') {
747*1ee2e5faSnakanon ERROR
748*1ee2e5faSnakanon "nonterminated character class %s", lastre FATAL;
749*1ee2e5faSnakanon }
7507c478bd9Sstevel@tonic-gate cbuf[clen++] = c;
7517c478bd9Sstevel@tonic-gate } else if (c == ']') {
7527c478bd9Sstevel@tonic-gate cbuf[clen] = 0;
7537c478bd9Sstevel@tonic-gate rlxstr = tostring(cbuf);
754*1ee2e5faSnakanon free(cbuf);
7557c478bd9Sstevel@tonic-gate if (cflag == 0)
756*1ee2e5faSnakanon return (CCL);
7577c478bd9Sstevel@tonic-gate else
758*1ee2e5faSnakanon return (NCCL);
7597c478bd9Sstevel@tonic-gate } else if (c == '\n') {
760*1ee2e5faSnakanon ERROR "newline in character class %s...",
761*1ee2e5faSnakanon lastre FATAL;
7627c478bd9Sstevel@tonic-gate } else if (c == '\0') {
763*1ee2e5faSnakanon ERROR "nonterminated character class %s",
764*1ee2e5faSnakanon lastre FATAL;
7657c478bd9Sstevel@tonic-gate } else
7667c478bd9Sstevel@tonic-gate cbuf[clen++] = c;
7677c478bd9Sstevel@tonic-gate }
768*1ee2e5faSnakanon /*NOTREACHED*/
7697c478bd9Sstevel@tonic-gate }
770*1ee2e5faSnakanon return (0);
7717c478bd9Sstevel@tonic-gate }
7727c478bd9Sstevel@tonic-gate
773*1ee2e5faSnakanon static int
cgoto(fa * f,int s,int c)774*1ee2e5faSnakanon cgoto(fa *f, int s, int c)
7757c478bd9Sstevel@tonic-gate {
7767c478bd9Sstevel@tonic-gate register int i, j, k;
7777c478bd9Sstevel@tonic-gate register int *p, *q;
7787c478bd9Sstevel@tonic-gate
7797c478bd9Sstevel@tonic-gate for (i = 0; i <= f->accept; i++)
7807c478bd9Sstevel@tonic-gate setvec[i] = 0;
7817c478bd9Sstevel@tonic-gate setcnt = 0;
7827c478bd9Sstevel@tonic-gate /* compute positions of gototab[s,c] into setvec */
7837c478bd9Sstevel@tonic-gate p = f->posns[s];
7847c478bd9Sstevel@tonic-gate for (i = 1; i <= *p; i++) {
7857c478bd9Sstevel@tonic-gate if ((k = f->re[p[i]].ltype) != FINAL) {
786*1ee2e5faSnakanon if (k == CHAR && c == f->re[p[i]].lval ||
787*1ee2e5faSnakanon k == DOT && c != 0 && c != HAT ||
788*1ee2e5faSnakanon k == ALL && c != 0 ||
789*1ee2e5faSnakanon k == CCL &&
790*1ee2e5faSnakanon member(c, (uchar *)f->re[p[i]].lval) ||
791*1ee2e5faSnakanon k == NCCL &&
792*1ee2e5faSnakanon !member(c, (uchar *)f->re[p[i]].lval) &&
793*1ee2e5faSnakanon c != 0 && c != HAT) {
7947c478bd9Sstevel@tonic-gate q = f->re[p[i]].lfollow;
7957c478bd9Sstevel@tonic-gate for (j = 1; j <= *q; j++) {
7967c478bd9Sstevel@tonic-gate if (setvec[q[j]] == 0) {
7977c478bd9Sstevel@tonic-gate setcnt++;
7987c478bd9Sstevel@tonic-gate setvec[q[j]] = 1;
7997c478bd9Sstevel@tonic-gate }
8007c478bd9Sstevel@tonic-gate }
8017c478bd9Sstevel@tonic-gate }
8027c478bd9Sstevel@tonic-gate }
8037c478bd9Sstevel@tonic-gate }
8047c478bd9Sstevel@tonic-gate /* determine if setvec is a previous state */
8057c478bd9Sstevel@tonic-gate tmpset[0] = setcnt;
8067c478bd9Sstevel@tonic-gate j = 1;
8077c478bd9Sstevel@tonic-gate for (i = f->accept; i >= 0; i--)
8087c478bd9Sstevel@tonic-gate if (setvec[i]) {
8097c478bd9Sstevel@tonic-gate tmpset[j++] = i;
8107c478bd9Sstevel@tonic-gate }
8117c478bd9Sstevel@tonic-gate /* tmpset == previous state? */
8127c478bd9Sstevel@tonic-gate for (i = 1; i <= f->curstat; i++) {
8137c478bd9Sstevel@tonic-gate p = f->posns[i];
8147c478bd9Sstevel@tonic-gate if ((k = tmpset[0]) != p[0])
8157c478bd9Sstevel@tonic-gate goto different;
8167c478bd9Sstevel@tonic-gate for (j = 1; j <= k; j++)
8177c478bd9Sstevel@tonic-gate if (tmpset[j] != p[j])
8187c478bd9Sstevel@tonic-gate goto different;
8197c478bd9Sstevel@tonic-gate /* setvec is state i */
8207c478bd9Sstevel@tonic-gate f->gototab[s][c] = i;
821*1ee2e5faSnakanon return (i);
8227c478bd9Sstevel@tonic-gate different:;
8237c478bd9Sstevel@tonic-gate }
8247c478bd9Sstevel@tonic-gate
8257c478bd9Sstevel@tonic-gate /* add tmpset to current set of states */
8267c478bd9Sstevel@tonic-gate if (f->curstat >= NSTATES-1) {
8277c478bd9Sstevel@tonic-gate f->curstat = 2;
8287c478bd9Sstevel@tonic-gate f->reset = 1;
8297c478bd9Sstevel@tonic-gate for (i = 2; i < NSTATES; i++)
8307c478bd9Sstevel@tonic-gate xfree(f->posns[i]);
831*1ee2e5faSnakanon } else
8327c478bd9Sstevel@tonic-gate ++(f->curstat);
8337c478bd9Sstevel@tonic-gate for (i = 0; i < NCHARS; i++)
8347c478bd9Sstevel@tonic-gate f->gototab[f->curstat][i] = 0;
8357c478bd9Sstevel@tonic-gate xfree(f->posns[f->curstat]);
8367c478bd9Sstevel@tonic-gate if ((p = (int *)calloc(1, (setcnt + 1) * sizeof (int))) == NULL)
8377c478bd9Sstevel@tonic-gate overflo("out of space in cgoto");
8387c478bd9Sstevel@tonic-gate
8397c478bd9Sstevel@tonic-gate f->posns[f->curstat] = p;
8407c478bd9Sstevel@tonic-gate f->gototab[s][c] = f->curstat;
8417c478bd9Sstevel@tonic-gate for (i = 0; i <= setcnt; i++)
8427c478bd9Sstevel@tonic-gate p[i] = tmpset[i];
8437c478bd9Sstevel@tonic-gate if (setvec[f->accept])
8447c478bd9Sstevel@tonic-gate f->out[f->curstat] = 1;
8457c478bd9Sstevel@tonic-gate else
8467c478bd9Sstevel@tonic-gate f->out[f->curstat] = 0;
847*1ee2e5faSnakanon return (f->curstat);
8487c478bd9Sstevel@tonic-gate }
8497c478bd9Sstevel@tonic-gate
850*1ee2e5faSnakanon static void
freefa(fa * f)851*1ee2e5faSnakanon freefa(fa *f)
8527c478bd9Sstevel@tonic-gate {
8537c478bd9Sstevel@tonic-gate
8547c478bd9Sstevel@tonic-gate register int i;
8557c478bd9Sstevel@tonic-gate
8567c478bd9Sstevel@tonic-gate if (f == NULL)
8577c478bd9Sstevel@tonic-gate return;
8587c478bd9Sstevel@tonic-gate for (i = 0; i <= f->curstat; i++)
8597c478bd9Sstevel@tonic-gate xfree(f->posns[i]);
8607c478bd9Sstevel@tonic-gate for (i = 0; i <= f->accept; i++)
8617c478bd9Sstevel@tonic-gate xfree(f->re[i].lfollow);
8627c478bd9Sstevel@tonic-gate xfree(f->restr);
8637c478bd9Sstevel@tonic-gate xfree(f);
8647c478bd9Sstevel@tonic-gate }
865