1 /*
2 * Copyright (C) Lucent Technologies 1997
3 * All Rights Reserved
4 *
5 * Permission to use, copy, modify, and distribute this software and
6 * its documentation for any purpose and without fee is hereby
7 * granted, provided that the above copyright notice appear in all
8 * copies and that both that the copyright notice and this
9 * permission notice and warranty disclaimer appear in supporting
10 * documentation, and that the name Lucent Technologies or any of
11 * its entities not be used in advertising or publicity pertaining
12 * to distribution of the software without specific, written prior
13 * permission.
14 *
15 * LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 * IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 * THIS SOFTWARE.
23 */
24
25 /*
26 * CDDL HEADER START
27 *
28 * The contents of this file are subject to the terms of the
29 * Common Development and Distribution License, Version 1.0 only
30 * (the "License"). You may not use this file except in compliance
31 * with the License.
32 *
33 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
34 * or http://www.opensolaris.org/os/licensing.
35 * See the License for the specific language governing permissions
36 * and limitations under the License.
37 *
38 * When distributing Covered Code, include this CDDL HEADER in each
39 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
40 * If applicable, add the following below this CDDL HEADER, with the
41 * fields enclosed by brackets "[]" replaced with your own identifying
42 * information: Portions Copyright [yyyy] [name of copyright owner]
43 *
44 * CDDL HEADER END
45 */
46
47 /*
48 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
49 * Use is subject to license terms.
50 */
51
52 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
53 /* All Rights Reserved */
54
55 /* lasciate ogne speranza, voi ch'intrate. */
56
57 #define DEBUG
58
59 #include "awk.h"
60 #include "y.tab.h"
61
62 #define HAT (NCHARS-1) /* matches ^ in regular expr */
63 /* NCHARS is 2**n */
64 #define MAXLIN (3 * LINE_MAX)
65
66 #define type(v) (v)->nobj /* badly overloaded here */
67 #define info(v) (v)->ntype /* badly overloaded here */
68 #define left(v) (v)->narg[0]
69 #define right(v) (v)->narg[1]
70 #define parent(v) (v)->nnext
71
72 #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
73 #define ELEAF case EMPTYRE: /* empty string in regexp */
74 #define UNARY case STAR: case PLUS: case QUEST:
75
76 /*
77 * encoding in tree Nodes:
78 * leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE):
79 * left is index, right contains value or pointer to value
80 * unary (STAR, PLUS, QUEST): left is child, right is null
81 * binary (CAT, OR): left and right are children
82 * parent contains pointer to parent
83 */
84
85 int *setvec;
86 int *tmpset;
87 int maxsetvec = 0;
88
89 int rtok; /* next token in current re */
90 int rlxval;
91 static uschar *rlxstr;
92 static uschar *prestr; /* current position in current re */
93 static uschar *lastre; /* origin of last re */
94
95 static int setcnt;
96 static int poscnt;
97
98 char *patbeg;
99 int patlen;
100
101 #define NFA 20 /* cache this many dynamic fa's */
102 fa *fatab[NFA];
103 int nfatab = 0; /* entries in fatab */
104
105 static fa *mkdfa(const char *, int);
106 static int makeinit(fa *, int);
107 static void penter(Node *);
108 static void freetr(Node *);
109 static void overflo(const char *);
110 static void growvec(const char *);
111 static void cfoll(fa *, Node *);
112 static void follow(Node *);
113 static Node *reparse(const char *);
114 static int relex(void);
115 static void freefa(fa *);
116 static int cgoto(fa *, int, int);
117
118 fa *
makedfa(const char * s,int anchor)119 makedfa(const char *s, int anchor) /* returns dfa for reg expr s */
120 {
121 int i, use, nuse;
122 fa *pfa;
123 static int now = 1;
124
125 if (setvec == NULL) { /* first time through any RE */
126 maxsetvec = MAXLIN;
127 setvec = (int *)malloc(maxsetvec * sizeof (int));
128 tmpset = (int *)malloc(maxsetvec * sizeof (int));
129 if (setvec == NULL || tmpset == NULL)
130 overflo("out of space initializing makedfa");
131 }
132
133 if (compile_time) /* a constant for sure */
134 return (mkdfa(s, anchor));
135 for (i = 0; i < nfatab; i++) { /* is it there already? */
136 if (fatab[i]->anchor == anchor &&
137 strcmp((const char *)fatab[i]->restr, s) == 0) {
138 fatab[i]->use = now++;
139 return (fatab[i]);
140 }
141 }
142 pfa = mkdfa(s, anchor);
143 if (nfatab < NFA) { /* room for another */
144 fatab[nfatab] = pfa;
145 fatab[nfatab]->use = now++;
146 nfatab++;
147 return (pfa);
148 }
149 use = fatab[0]->use; /* replace least-recently used */
150 nuse = 0;
151 for (i = 1; i < nfatab; i++)
152 if (fatab[i]->use < use) {
153 use = fatab[i]->use;
154 nuse = i;
155 }
156 freefa(fatab[nuse]);
157 fatab[nuse] = pfa;
158 pfa->use = now++;
159 return (pfa);
160 }
161
162 /*
163 * does the real work of making a dfa
164 * anchor = 1 for anchored matches, else 0
165 */
166 fa *
mkdfa(const char * s,int anchor)167 mkdfa(const char *s, int anchor)
168 {
169 Node *p, *p1;
170 fa *f;
171
172 p = reparse(s);
173 p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
174 /* put ALL STAR in front of reg. exp. */
175 p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
176 /* put FINAL after reg. exp. */
177
178 poscnt = 0;
179 penter(p1); /* enter parent pointers and leaf indices */
180 if ((f = (fa *)calloc(1, sizeof (fa) + poscnt * sizeof (rrow))) == NULL)
181 overflo("out of space for fa");
182 /* penter has computed number of positions in re */
183 f->accept = poscnt-1;
184 cfoll(f, p1); /* set up follow sets */
185 freetr(p1);
186 if ((f->posns[0] =
187 (int *)calloc(1, *(f->re[0].lfollow) * sizeof (int))) == NULL) {
188 overflo("out of space in makedfa");
189 }
190 if ((f->posns[1] = (int *)calloc(1, sizeof (int))) == NULL)
191 overflo("out of space in makedfa");
192 *f->posns[1] = 0;
193 f->initstat = makeinit(f, anchor);
194 f->anchor = anchor;
195 f->restr = (uschar *)tostring(s);
196 return (f);
197 }
198
199 static int
makeinit(fa * f,int anchor)200 makeinit(fa *f, int anchor)
201 {
202 int i, k;
203
204 f->curstat = 2;
205 f->out[2] = 0;
206 f->reset = 0;
207 k = *(f->re[0].lfollow);
208 xfree(f->posns[2]);
209 if ((f->posns[2] = (int *)calloc(1, (k+1) * sizeof (int))) == NULL)
210 overflo("out of space in makeinit");
211 for (i = 0; i <= k; i++) {
212 (f->posns[2])[i] = (f->re[0].lfollow)[i];
213 }
214 if ((f->posns[2])[1] == f->accept)
215 f->out[2] = 1;
216 for (i = 0; i < NCHARS; i++)
217 f->gototab[2][i] = 0;
218 f->curstat = cgoto(f, 2, HAT);
219 if (anchor) {
220 *f->posns[2] = k-1; /* leave out position 0 */
221 for (i = 0; i < k; i++) {
222 (f->posns[0])[i] = (f->posns[2])[i];
223 }
224
225 f->out[0] = f->out[2];
226 if (f->curstat != 2)
227 --(*f->posns[f->curstat]);
228 }
229 return (f->curstat);
230 }
231
232 void
penter(Node * p)233 penter(Node *p) /* set up parent pointers and leaf indices */
234 {
235 switch (type(p)) {
236 ELEAF
237 LEAF
238 info(p) = poscnt;
239 poscnt++;
240 break;
241 UNARY
242 penter(left(p));
243 parent(left(p)) = p;
244 break;
245 case CAT:
246 case OR:
247 penter(left(p));
248 penter(right(p));
249 parent(left(p)) = p;
250 parent(right(p)) = p;
251 break;
252 default: /* can't happen */
253 FATAL("can't happen: unknown type %d in penter", type(p));
254 break;
255 }
256 }
257
258 static void
freetr(Node * p)259 freetr(Node *p) /* free parse tree */
260 {
261 switch (type(p)) {
262 ELEAF
263 LEAF
264 xfree(p);
265 break;
266 UNARY
267 freetr(left(p));
268 xfree(p);
269 break;
270 case CAT:
271 case OR:
272 freetr(left(p));
273 freetr(right(p));
274 xfree(p);
275 break;
276 default: /* can't happen */
277 FATAL("can't happen: unknown type %d in freetr", type(p));
278 break;
279 }
280 }
281
282 static void
growvec(const char * msg)283 growvec(const char *msg)
284 {
285 maxsetvec *= 4;
286 setvec = (int *)realloc(setvec, maxsetvec * sizeof (int));
287 tmpset = (int *)realloc(tmpset, maxsetvec * sizeof (int));
288 if (setvec == NULL || tmpset == NULL)
289 overflo(msg);
290 }
291
292 /*
293 * in the parsing of regular expressions, metacharacters like . have
294 * to be seen literally; \056 is not a metacharacter.
295 */
296
297 /*
298 * find and eval hex string at pp, return new p; only pick up one 8-bit
299 * byte (2 chars).
300 */
301 int
hexstr(uschar ** pp)302 hexstr(uschar **pp)
303 {
304 uschar *p;
305 int n = 0;
306 int i;
307
308 for (i = 0, p = (uschar *)*pp; i < 2 && isxdigit(*p); i++, p++) {
309 if (isdigit(*p))
310 n = 16 * n + *p - '0';
311 else if (*p >= 'a' && *p <= 'f')
312 n = 16 * n + *p - 'a' + 10;
313 else if (*p >= 'A' && *p <= 'F')
314 n = 16 * n + *p - 'A' + 10;
315 }
316 *pp = (uschar *)p;
317 return (n);
318 }
319
320 #define isoctdigit(c) ((c) >= '0' && (c) <= '7')
321
322 /* pick up next thing after a \\ and increment *pp */
323 int
quoted(uschar ** pp)324 quoted(uschar **pp)
325 {
326 uschar *p = *pp;
327 int c;
328
329 if ((c = *p++) == 't')
330 c = '\t';
331 else if (c == 'n')
332 c = '\n';
333 else if (c == 'f')
334 c = '\f';
335 else if (c == 'r')
336 c = '\r';
337 else if (c == 'b')
338 c = '\b';
339 else if (c == '\\')
340 c = '\\';
341 else if (c == 'x') { /* hexadecimal goo follows */
342 c = hexstr(&p); /* this adds a null if number is invalid */
343 } else if (isoctdigit(c)) { /* \d \dd \ddd */
344 int n = c - '0';
345 if (isoctdigit(*p)) {
346 n = 8 * n + *p++ - '0';
347 if (isoctdigit(*p))
348 n = 8 * n + *p++ - '0';
349 }
350 c = n;
351 } /* else */
352 /* c = c; */
353 *pp = p;
354 return (c);
355 }
356
357 char *
cclenter(const char * argp)358 cclenter(const char *argp) /* add a character class */
359 {
360 int i, c, c2;
361 uschar *p = (uschar *)argp;
362 uschar *op, *bp;
363 static uschar *buf = NULL;
364 static size_t bufsz = 100;
365
366 op = p;
367 if (buf == NULL && (buf = (uschar *)malloc(bufsz)) == NULL)
368 FATAL("out of space for character class [%.10s...] 1", p);
369 bp = buf;
370 for (i = 0; (c = *p++) != 0; ) {
371 if (c == '\\') {
372 c = quoted(&p);
373 } else if (c == '-' && i > 0 && bp[-1] != 0) {
374 if (*p != 0) {
375 c = bp[-1];
376 c2 = *p++;
377 if (c2 == '\\')
378 c2 = quoted(&p);
379 if (c > c2) { /* empty; ignore */
380 bp--;
381 i--;
382 continue;
383 }
384 while (c < c2) {
385 if (!adjbuf((char **)&buf, &bufsz,
386 bp-buf+2, 100, (char **)&bp,
387 "cclenter1")) {
388 FATAL(
389 "out of space for character class [%.10s...] 2", p);
390 }
391 *bp++ = ++c;
392 i++;
393 }
394 continue;
395 }
396 }
397 if (!adjbuf((char **)&buf, &bufsz, bp-buf+2, 100, (char **)&bp,
398 "cclenter2"))
399 FATAL(
400 "out of space for character class [%.10s...] 3", p);
401 *bp++ = c;
402 i++;
403 }
404 *bp = '\0';
405 dprintf(("cclenter: in = |%s|, out = |%s|\n", op, buf));
406 xfree(op);
407 return ((char *)tostring((char *)buf));
408 }
409
410 static void
overflo(const char * s)411 overflo(const char *s)
412 {
413 FATAL("regular expression too big: %.30s...", gettext((char *)s));
414 }
415
416 /* enter follow set of each leaf of vertex v into lfollow[leaf] */
417 static void
cfoll(fa * f,Node * v)418 cfoll(fa *f, Node *v)
419 {
420 int i;
421 int *p;
422
423 switch (type(v)) {
424 ELEAF
425 LEAF
426 f->re[info(v)].ltype = type(v);
427 f->re[info(v)].lval.np = right(v);
428 while (f->accept >= maxsetvec) { /* guessing here! */
429 growvec("out of space in cfoll()");
430 }
431 for (i = 0; i <= f->accept; i++)
432 setvec[i] = 0;
433 setcnt = 0;
434 follow(v); /* computes setvec and setcnt */
435 if ((p = (int *)calloc(1, (setcnt+1) * sizeof (int))) == NULL)
436 overflo("out of space building follow set");
437 f->re[info(v)].lfollow = p;
438 *p = setcnt;
439 for (i = f->accept; i >= 0; i--) {
440 if (setvec[i] == 1)
441 *++p = i;
442 }
443 break;
444 UNARY
445 cfoll(f, left(v));
446 break;
447 case CAT:
448 case OR:
449 cfoll(f, left(v));
450 cfoll(f, right(v));
451 break;
452 default: /* can't happen */
453 FATAL("can't happen: unknown type %d in cfoll", type(v));
454 }
455 }
456
457 /*
458 * collects initially active leaves of p into setvec
459 * returns 0 or 1 depending on whether p matches empty string
460 */
461 static int
first(Node * p)462 first(Node *p)
463 {
464 int b, lp;
465
466 switch (type(p)) {
467 ELEAF
468 LEAF
469 lp = info(p); /* look for high-water mark of subscripts */
470 while (setcnt >= maxsetvec || lp >= maxsetvec) {
471 /* guessing here! */
472 growvec("out of space in first()");
473 }
474 if (type(p) == EMPTYRE) {
475 setvec[lp] = 0;
476 return (0);
477 }
478 if (setvec[lp] != 1) {
479 setvec[lp] = 1;
480 setcnt++;
481 }
482 if (type(p) == CCL && (*(char *)right(p)) == '\0')
483 return (0); /* empty CCL */
484 else
485 return (1);
486 case PLUS:
487 if (first(left(p)) == 0)
488 return (0);
489 return (1);
490 case STAR:
491 case QUEST:
492 (void) first(left(p));
493 return (0);
494 case CAT:
495 if (first(left(p)) == 0 && first(right(p)) == 0)
496 return (0);
497 return (1);
498 case OR:
499 b = first(right(p));
500 if (first(left(p)) == 0 || b == 0)
501 return (0);
502 return (1);
503 }
504 FATAL("can't happen: unknown type %d in first", type(p));
505 }
506
507 /* collects leaves that can follow v into setvec */
508 static void
follow(Node * v)509 follow(Node *v)
510 {
511 Node *p;
512
513 if (type(v) == FINAL)
514 return;
515 p = parent(v);
516 switch (type(p)) {
517 case STAR:
518 case PLUS:
519 (void) first(v);
520 follow(p);
521 return;
522
523 case OR:
524 case QUEST:
525 follow(p);
526 return;
527
528 case CAT:
529 if (v == left(p)) { /* v is left child of p */
530 if (first(right(p)) == 0) {
531 follow(p);
532 return;
533 }
534 } else /* v is right child */
535 follow(p);
536 return;
537 default:
538 FATAL("unknown type %d in follow", type(p));
539 break;
540 }
541 }
542
543 static int
member(int c,const char * sarg)544 member(int c, const char *sarg) /* is c in s? */
545 {
546 uschar *s = (uschar *)sarg;
547
548 while (*s)
549 if (c == *s++)
550 return (1);
551 return (0);
552 }
553
554
555 int
match(fa * f,const char * p0)556 match(fa *f, const char *p0) /* shortest match ? */
557 {
558 int s, ns;
559 uschar *p = (uschar *)p0;
560
561 s = f->reset ? makeinit(f, 0) : f->initstat;
562 if (f->out[s])
563 return (1);
564 do {
565 if ((ns = f->gototab[s][*p]) != 0)
566 s = ns;
567 else
568 s = cgoto(f, s, *p);
569 if (f->out[s])
570 return (1);
571 } while (*p++ != 0);
572 return (0);
573 }
574
575 int
pmatch(fa * f,const char * p0)576 pmatch(fa *f, const char *p0) /* longest match, for sub */
577 {
578 int s, ns;
579 uschar *p = (uschar *)p0;
580 uschar *q;
581 int i, k;
582
583 if (f->reset) {
584 f->initstat = s = makeinit(f, 1);
585 } else {
586 s = f->initstat;
587 }
588 patbeg = (char *)p;
589 patlen = -1;
590 do {
591 q = p;
592 do {
593 if (f->out[s]) /* final state */
594 patlen = q-p;
595 if ((ns = f->gototab[s][*q]) != 0)
596 s = ns;
597 else
598 s = cgoto(f, s, *q);
599 if (s == 1) { /* no transition */
600 if (patlen >= 0) {
601 patbeg = (char *)p;
602 return (1);
603 } else {
604 goto nextin; /* no match */
605 }
606 }
607 } while (*q++ != 0);
608 if (f->out[s])
609 patlen = q - p - 1; /* don't count $ */
610 if (patlen >= 0) {
611 patbeg = (char *)p;
612 return (1);
613 }
614 nextin:
615 s = 2;
616 if (f->reset) {
617 for (i = 2; i <= f->curstat; i++)
618 xfree(f->posns[i]);
619 k = *f->posns[0];
620 if ((f->posns[2] =
621 (int *)calloc(k + 1, sizeof (int))) == NULL) {
622 overflo("out of space in pmatch");
623 }
624 for (i = 0; i <= k; i++)
625 (f->posns[2])[i] = (f->posns[0])[i];
626 f->initstat = f->curstat = 2;
627 f->out[2] = f->out[0];
628 for (i = 0; i < NCHARS; i++)
629 f->gototab[2][i] = 0;
630 }
631 } while (*p++ != 0);
632 return (0);
633 }
634
635 int
nematch(fa * f,const char * p0)636 nematch(fa *f, const char *p0) /* non-empty match, for sub */
637 {
638 int s, ns;
639 uschar *p = (uschar *)p0;
640 uschar *q;
641 int i, k;
642
643 if (f->reset) {
644 f->initstat = s = makeinit(f, 1);
645 } else {
646 s = f->initstat;
647 }
648 patlen = -1;
649 while (*p) {
650 q = p;
651 do {
652 if (f->out[s]) /* final state */
653 patlen = q-p;
654 if ((ns = f->gototab[s][*q]) != 0)
655 s = ns;
656 else
657 s = cgoto(f, s, *q);
658 if (s == 1) { /* no transition */
659 if (patlen > 0) {
660 patbeg = (char *)p;
661 return (1);
662 } else
663 goto nnextin; /* no nonempty match */
664 }
665 } while (*q++ != 0);
666 if (f->out[s])
667 patlen = q - p - 1; /* don't count $ */
668 if (patlen > 0) {
669 patbeg = (char *)p;
670 return (1);
671 }
672 nnextin:
673 s = 2;
674 if (f->reset) {
675 for (i = 2; i <= f->curstat; i++)
676 xfree(f->posns[i]);
677 k = *f->posns[0];
678 if ((f->posns[2] =
679 (int *)calloc(k + 1, sizeof (int))) == NULL) {
680 overflo("out of state space");
681 }
682 for (i = 0; i <= k; i++)
683 (f->posns[2])[i] = (f->posns[0])[i];
684 f->initstat = f->curstat = 2;
685 f->out[2] = f->out[0];
686 for (i = 0; i < NCHARS; i++)
687 f->gototab[2][i] = 0;
688 }
689 p++;
690 }
691 return (0);
692 }
693
694 static Node *regexp(void), *primary(void), *concat(Node *);
695 static Node *alt(Node *), *unary(Node *);
696
697 /* parses regular expression pointed to by p */
698 /* uses relex() to scan regular expression */
699 static Node *
reparse(const char * p)700 reparse(const char *p)
701 {
702 Node *np;
703
704 dprintf(("reparse <%s>\n", p));
705
706 /* prestr points to string to be parsed */
707 lastre = prestr = (uschar *)p;
708 rtok = relex();
709 /* GNU compatibility: an empty regexp matches anything */
710 if (rtok == '\0') {
711 return (op2(EMPTYRE, NIL, NIL));
712 }
713 np = regexp();
714 if (rtok != '\0')
715 FATAL("syntax error in regular expression %s at %s",
716 lastre, prestr);
717 return (np);
718 }
719
720 static Node *
regexp(void)721 regexp(void) /* top-level parse of reg expr */
722 {
723 return (alt(concat(primary())));
724 }
725
726 static Node *
primary(void)727 primary(void)
728 {
729 Node *np;
730
731 switch (rtok) {
732 case CHAR:
733 np = op2(CHAR, NIL, itonp(rlxval));
734 rtok = relex();
735 return (unary(np));
736 case ALL:
737 rtok = relex();
738 return (unary(op2(ALL, NIL, NIL)));
739 case EMPTYRE:
740 rtok = relex();
741 return (unary(op2(ALL, NIL, NIL)));
742 case DOT:
743 rtok = relex();
744 return (unary(op2(DOT, NIL, NIL)));
745 case CCL:
746 /*LINTED align*/
747 np = op2(CCL, NIL, (Node *)cclenter((char *)rlxstr));
748 rtok = relex();
749 return (unary(np));
750 case NCCL:
751 /*LINTED align*/
752 np = op2(NCCL, NIL, (Node *)cclenter((char *)rlxstr));
753 rtok = relex();
754 return (unary(np));
755 case '^':
756 rtok = relex();
757 return (unary(op2(CHAR, NIL, itonp(HAT))));
758 case '$':
759 rtok = relex();
760 return (unary(op2(CHAR, NIL, NIL)));
761 case '(':
762 rtok = relex();
763 if (rtok == ')') { /* special pleading for () */
764 rtok = relex();
765 return (unary(op2(CCL, NIL,
766 /*LINTED align*/
767 (Node *)tostring(""))));
768 }
769 np = regexp();
770 if (rtok == ')') {
771 rtok = relex();
772 return (unary(np));
773 } else {
774 FATAL("syntax error in regular expression %s at %s",
775 lastre, prestr);
776 }
777 /* FALLTHROUGH */
778 default:
779 FATAL("illegal primary in regular expression %s at %s",
780 lastre, prestr);
781 }
782 /*NOTREACHED*/
783 return (NULL);
784 }
785
786 static Node *
concat(Node * np)787 concat(Node *np)
788 {
789 switch (rtok) {
790 case EMPTYRE:
791 case CHAR:
792 case DOT:
793 case ALL:
794 case CCL:
795 case NCCL:
796 case '$':
797 case '(':
798 return (concat(op2(CAT, np, primary())));
799 default:
800 return (np);
801 }
802 }
803
804 static Node *
alt(Node * np)805 alt(Node *np)
806 {
807 if (rtok == OR) {
808 rtok = relex();
809 return (alt(op2(OR, np, concat(primary()))));
810 }
811 return (np);
812 }
813
814 static Node *
unary(Node * np)815 unary(Node *np)
816 {
817 switch (rtok) {
818 case STAR:
819 rtok = relex();
820 return (unary(op2(STAR, np, NIL)));
821 case PLUS:
822 rtok = relex();
823 return (unary(op2(PLUS, np, NIL)));
824 case QUEST:
825 rtok = relex();
826 return (unary(op2(QUEST, np, NIL)));
827 default:
828 return (np);
829 }
830 }
831
832 /*
833 * Character class definitions conformant to the POSIX locale as
834 * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
835 * and operating character sets are both ASCII (ISO646) or supersets
836 * thereof.
837 *
838 * Note that to avoid overflowing the temporary buffer used in
839 * relex(), the expanded character class (prior to range expansion)
840 * must be less than twice the size of their full name.
841 */
842
843 struct charclass {
844 const char *cc_name;
845 int cc_namelen;
846 int (*cc_func)(int);
847 } charclasses[] = {
848 { "alnum", 5, isalnum },
849 { "alpha", 5, isalpha },
850 { "blank", 5, isblank },
851 { "cntrl", 5, iscntrl },
852 { "digit", 5, isdigit },
853 { "graph", 5, isgraph },
854 { "lower", 5, islower },
855 { "print", 5, isprint },
856 { "punct", 5, ispunct },
857 { "space", 5, isspace },
858 { "upper", 5, isupper },
859 { "xdigit", 6, isxdigit },
860 { NULL, 0, NULL },
861 };
862
863
864 static int
relex(void)865 relex(void) /* lexical analyzer for reparse */
866 {
867 int c, n;
868 int cflag;
869 static uschar *buf = 0;
870 static size_t bufsz = 100;
871 uschar *bp;
872 struct charclass *cc;
873 int i;
874
875 switch (c = *prestr++) {
876 case '|': return OR;
877 case '*': return STAR;
878 case '+': return PLUS;
879 case '?': return QUEST;
880 case '.': return DOT;
881 case '\0': prestr--; return '\0';
882 case '^':
883 case '$':
884 case '(':
885 case ')':
886 return (c);
887 case '\\':
888 rlxval = quoted(&prestr);
889 return (CHAR);
890 default:
891 rlxval = c;
892 return (CHAR);
893 case '[':
894 if (buf == NULL && (buf = (uschar *)malloc(bufsz)) == NULL)
895 FATAL("out of space in reg expr %.10s..", lastre);
896 bp = buf;
897 if (*prestr == '^') {
898 cflag = 1;
899 prestr++;
900 } else
901 cflag = 0;
902 n = 2 * strlen((const char *)prestr) + 1;
903 if (!adjbuf((char **)&buf, &bufsz, n, n, (char **)&bp,
904 "relex1"))
905 FATAL("out of space for reg expr %.10s...", lastre);
906 for (;;) {
907 if ((c = *prestr++) == '\\') {
908 *bp++ = '\\';
909 if ((c = *prestr++) == '\0') {
910 FATAL("nonterminated character class "
911 "%.20s...", lastre);
912 }
913 *bp++ = c;
914 } else if (c == '[' && *prestr == ':') {
915 /*
916 * Handle POSIX character class names.
917 * Dag-Erling Smorgrav, des@ofug.org
918 */
919 for (cc = charclasses; cc->cc_name; cc++)
920 if (strncmp((const char *)prestr + 1,
921 (const char *)cc->cc_name,
922 cc->cc_namelen) == 0)
923 break;
924
925 if (cc->cc_name == NULL ||
926 prestr[1 + cc->cc_namelen] != ':' ||
927 prestr[2 + cc->cc_namelen] != ']') {
928 *bp++ = c;
929 continue;
930 }
931
932 prestr += cc->cc_namelen + 3;
933 /*
934 * BUG: We begin at 1, instead of 0, since we
935 * would otherwise prematurely terminate the
936 * string for classes like [[:cntrl:]]. This
937 * means that we can't match the NUL character,
938 * not without first adapting the entire
939 * program to track each string's length.
940 */
941 for (i = 1; i < NCHARS; i++) {
942 (void) adjbuf((char **)&buf, &bufsz,
943 bp - buf + 1, 100, (char **)&bp,
944 "relex2");
945 if (cc->cc_func(i)) {
946 *bp++ = i;
947 n++;
948 }
949 }
950 } else if (c == '\0') {
951 FATAL("nonterminated character class %.20s",
952 lastre);
953 } else if (bp == buf) { /* 1st char is special */
954 *bp++ = c;
955 } else if (c == ']') {
956 *bp++ = '\0';
957 rlxstr = (uschar *)tostring((char *)buf);
958 if (cflag == 0)
959 return (CCL);
960 else
961 return (NCCL);
962 } else
963 *bp++ = c;
964 }
965 /*NOTREACHED*/
966 }
967 return (0);
968 }
969
970 static int
cgoto(fa * f,int s,int c)971 cgoto(fa *f, int s, int c)
972 {
973 int i, j, k;
974 int *p, *q;
975
976 assert(c == HAT || c < NCHARS);
977 while (f->accept >= maxsetvec) { /* guessing here! */
978 growvec("out of space in cgoto()");
979 }
980 for (i = 0; i <= f->accept; i++)
981 setvec[i] = 0;
982 setcnt = 0;
983 /* compute positions of gototab[s,c] into setvec */
984 p = f->posns[s];
985 for (i = 1; i <= *p; i++) {
986 if ((k = f->re[p[i]].ltype) != FINAL) {
987 if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np)) ||
988 (k == DOT && c != 0 && c != HAT) ||
989 (k == ALL && c != 0) ||
990 (k == EMPTYRE && c != 0) ||
991 (k == CCL &&
992 member(c, (char *)f->re[p[i]].lval.up)) ||
993 (k == NCCL &&
994 !member(c, (char *)f->re[p[i]].lval.up) &&
995 c != 0 && c != HAT)) {
996 q = f->re[p[i]].lfollow;
997 for (j = 1; j <= *q; j++) {
998 if (q[j] >= maxsetvec) {
999 growvec("cgoto overflow");
1000 }
1001 if (setvec[q[j]] == 0) {
1002 setcnt++;
1003 setvec[q[j]] = 1;
1004 }
1005 }
1006 }
1007 }
1008 }
1009 /* determine if setvec is a previous state */
1010 tmpset[0] = setcnt;
1011 j = 1;
1012 for (i = f->accept; i >= 0; i--)
1013 if (setvec[i]) {
1014 tmpset[j++] = i;
1015 }
1016 /* tmpset == previous state? */
1017 for (i = 1; i <= f->curstat; i++) {
1018 p = f->posns[i];
1019 if ((k = tmpset[0]) != p[0])
1020 goto different;
1021 for (j = 1; j <= k; j++)
1022 if (tmpset[j] != p[j])
1023 goto different;
1024 /* setvec is state i */
1025 f->gototab[s][c] = i;
1026 return (i);
1027 different:;
1028 }
1029
1030 /* add tmpset to current set of states */
1031 if (f->curstat >= NSTATES-1) {
1032 f->curstat = 2;
1033 f->reset = 1;
1034 for (i = 2; i < NSTATES; i++)
1035 xfree(f->posns[i]);
1036 } else
1037 ++(f->curstat);
1038 for (i = 0; i < NCHARS; i++)
1039 f->gototab[f->curstat][i] = 0;
1040 xfree(f->posns[f->curstat]);
1041 if ((p = (int *)calloc(1, (setcnt + 1) * sizeof (int))) == NULL)
1042 overflo("out of space in cgoto");
1043
1044 f->posns[f->curstat] = p;
1045 f->gototab[s][c] = f->curstat;
1046 for (i = 0; i <= setcnt; i++)
1047 p[i] = tmpset[i];
1048 if (setvec[f->accept])
1049 f->out[f->curstat] = 1;
1050 else
1051 f->out[f->curstat] = 0;
1052 return (f->curstat);
1053 }
1054
1055 static void
freefa(fa * f)1056 freefa(fa *f) /* free a finite automaton */
1057 {
1058 int i;
1059
1060 if (f == NULL)
1061 return;
1062 for (i = 0; i <= f->curstat; i++)
1063 xfree(f->posns[i]);
1064 for (i = 0; i <= f->accept; i++) {
1065 xfree(f->re[i].lfollow);
1066 if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
1067 xfree((f->re[i].lval.np));
1068 }
1069 xfree(f->restr);
1070 xfree(f);
1071 }
1072