/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* Copyright (c) 1988 AT&T */ /* All Rights Reserved */ /* * Copyright 2014 Garrett D'Amore * * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _REGEXP_H #define _REGEXP_H #include #ifdef __cplusplus extern "C" { #endif #define CBRA 2 #define CCHR 4 #define CDOT 8 #define CCL 12 #define CXCL 16 #define CDOL 20 #define CCEOF 22 #define CKET 24 #define CBACK 36 #define NCCL 40 #define STAR 01 #define RNGE 03 #define NBRA 9 #define PLACE(c) ep[c >> 3] |= bittab[c & 07] #define ISTHERE(c) (ep[c >> 3] & bittab[c & 07]) #define ecmp(s1, s2, n) (strncmp(s1, s2, n) == 0) static char *braslist[NBRA]; static char *braelist[NBRA]; int sed, nbra; char *loc1, *loc2, *locs; static int nodelim; int circf; static int low; static int size; static unsigned char bittab[] = { 1, 2, 4, 8, 16, 32, 64, 128 }; int advance(const char *lp, const char *ep); static void getrnge(const char *str); char * compile(char *instring, char *ep, const char *endbuf, int seof) { INIT /* Dependent declarations and initializations */ register int c; register int eof = seof; char *lastep; int cclcnt; char bracket[NBRA], *bracketp; int closed; int neg; int lc; int i, cflg; int iflag; /* used for non-ascii characters in brackets */ #ifdef __lint /* make lint happy */ c = nodelim; #endif lastep = NULL; if ((c = GETC()) == eof || c == '\n') { if (c == '\n') { UNGETC(c); nodelim = 1; } if (*ep == 0 && !sed) ERROR(41); RETURN(ep); } bracketp = bracket; circf = closed = nbra = 0; if (c == '^') circf++; else UNGETC(c); for (;;) { if (ep >= endbuf) ERROR(50); c = GETC(); if (c != '*' && ((c != '\\') || (PEEKC() != '{'))) lastep = ep; if (c == eof) { *ep++ = CCEOF; if (bracketp != bracket) ERROR(42); RETURN(ep); } switch (c) { case '.': *ep++ = CDOT; continue; case '\n': if (!sed) { UNGETC(c); *ep++ = CCEOF; nodelim = 1; if (bracketp != bracket) ERROR(42); RETURN(ep); } else ERROR(36); case '*': if (lastep == NULL || *lastep == CBRA || *lastep == CKET) goto defchar; *lastep |= STAR; continue; case '$': if (PEEKC() != eof && PEEKC() != '\n') goto defchar; *ep++ = CDOL; continue; case '[': if (&ep[17] >= endbuf) ERROR(50); *ep++ = CCL; lc = 0; for (i = 0; i < 16; i++) ep[i] = 0; neg = 0; if ((c = GETC()) == '^') { neg = 1; c = GETC(); } iflag = 1; do { c &= 0377; if (c == '\0' || c == '\n') ERROR(49); if ((c & 0200) && iflag) { iflag = 0; if (&ep[32] >= endbuf) ERROR(50); ep[-1] = CXCL; for (i = 16; i < 32; i++) ep[i] = 0; } if (c == '-' && lc != 0) { if ((c = GETC()) == ']') { PLACE('-'); break; } if ((c & 0200) && iflag) { iflag = 0; if (&ep[32] >= endbuf) ERROR(50); ep[-1] = CXCL; for (i = 16; i < 32; i++) ep[i] = 0; } while (lc < c) { PLACE(lc); lc++; } } lc = c; PLACE(c); } while ((c = GETC()) != ']'); if (iflag) iflag = 16; else iflag = 32; if (neg) { if (iflag == 32) { for (cclcnt = 0; cclcnt < iflag; cclcnt++) ep[cclcnt] ^= 0377; ep[0] &= 0376; } else { ep[-1] = NCCL; /* make nulls match so test fails */ ep[0] |= 01; } } ep += iflag; continue; case '\\': switch (c = GETC()) { case '(': if (nbra >= NBRA) ERROR(43); *bracketp++ = (char)nbra; *ep++ = CBRA; *ep++ = (char)nbra++; continue; case ')': if (bracketp <= bracket) ERROR(42); *ep++ = CKET; *ep++ = *--bracketp; closed++; continue; case '{': if (lastep == NULL) goto defchar; *lastep |= RNGE; cflg = 0; nlim: c = GETC(); i = 0; do { if ('0' <= c && c <= '9') i = 10 * i + c - '0'; else ERROR(16); } while (((c = GETC()) != '\\') && (c != ',')); if (i >= 255) ERROR(11); *ep++ = (char)i; if (c == ',') { if (cflg++) ERROR(44); if ((c = GETC()) == '\\') *ep++ = (char)255; else { UNGETC(c); goto nlim; /* get 2'nd number */ } } if (GETC() != '}') ERROR(45); if (!cflg) /* one number */ *ep++ = (char)i; else if ((ep[-1] & 0377) < (ep[-2] & 0377)) ERROR(46); continue; case '\n': ERROR(36); case 'n': c = '\n'; goto defchar; default: if (c >= '1' && c <= '9') { if ((c -= '1') >= closed) ERROR(25); *ep++ = CBACK; *ep++ = (char)c; continue; } } /* Drop through to default to use \ to turn off special chars */ defchar: default: lastep = ep; *ep++ = CCHR; *ep++ = (char)c; } } /*NOTREACHED*/ } int step(const char *p1, const char *p2) { char c; if (circf) { loc1 = (char *)p1; return (advance(p1, p2)); } /* fast check for first character */ if (*p2 == CCHR) { c = p2[1]; do { if (*p1 != c) continue; if (advance(p1, p2)) { loc1 = (char *)p1; return (1); } } while (*p1++); return (0); } /* regular algorithm */ do { if (advance(p1, p2)) { loc1 = (char *)p1; return (1); } } while (*p1++); return (0); } int advance(const char *lp, const char *ep) { const char *curlp; int c; char *bbeg; register char neg; size_t ct; for (;;) { neg = 0; switch (*ep++) { case CCHR: if (*ep++ == *lp++) continue; return (0); /*FALLTHRU*/ case CDOT: if (*lp++) continue; return (0); /*FALLTHRU*/ case CDOL: if (*lp == 0) continue; return (0); /*FALLTHRU*/ case CCEOF: loc2 = (char *)lp; return (1); /*FALLTHRU*/ case CXCL: c = (unsigned char)*lp++; if (ISTHERE(c)) { ep += 32; continue; } return (0); /*FALLTHRU*/ case NCCL: neg = 1; /*FALLTHRU*/ case CCL: c = *lp++; if (((c & 0200) == 0 && ISTHERE(c)) ^ neg) { ep += 16; continue; } return (0); /*FALLTHRU*/ case CBRA: braslist[*ep++] = (char *)lp; continue; /*FALLTHRU*/ case CKET: braelist[*ep++] = (char *)lp; continue; /*FALLTHRU*/ case CCHR | RNGE: c = *ep++; getrnge(ep); while (low--) if (*lp++ != c) return (0); curlp = lp; while (size--) if (*lp++ != c) break; if (size < 0) lp++; ep += 2; goto star; /*FALLTHRU*/ case CDOT | RNGE: getrnge(ep); while (low--) if (*lp++ == '\0') return (0); curlp = lp; while (size--) if (*lp++ == '\0') break; if (size < 0) lp++; ep += 2; goto star; /*FALLTHRU*/ case CXCL | RNGE: getrnge(ep + 32); while (low--) { c = (unsigned char)*lp++; if (!ISTHERE(c)) return (0); } curlp = lp; while (size--) { c = (unsigned char)*lp++; if (!ISTHERE(c)) break; } if (size < 0) lp++; ep += 34; /* 32 + 2 */ goto star; /*FALLTHRU*/ case NCCL | RNGE: neg = 1; /*FALLTHRU*/ case CCL | RNGE: getrnge(ep + 16); while (low--) { c = *lp++; if (((c & 0200) || !ISTHERE(c)) ^ neg) return (0); } curlp = lp; while (size--) { c = *lp++; if (((c & 0200) || !ISTHERE(c)) ^ neg) break; } if (size < 0) lp++; ep += 18; /* 16 + 2 */ goto star; /*FALLTHRU*/ case CBACK: bbeg = braslist[*ep]; ct = braelist[*ep++] - bbeg; if (ecmp(bbeg, lp, ct)) { lp += ct; continue; } return (0); /*FALLTHRU*/ case CBACK | STAR: bbeg = braslist[*ep]; ct = braelist[*ep++] - bbeg; curlp = lp; while (ecmp(bbeg, lp, ct)) lp += ct; while (lp >= curlp) { if (advance(lp, ep)) return (1); lp -= ct; } return (0); /*FALLTHRU*/ case CDOT | STAR: curlp = lp; while (*lp++); goto star; /*FALLTHRU*/ case CCHR | STAR: curlp = lp; while (*lp++ == *ep); ep++; goto star; /*FALLTHRU*/ case CXCL | STAR: curlp = lp; do { c = (unsigned char)*lp++; } while (ISTHERE(c)); ep += 32; goto star; /*FALLTHRU*/ case NCCL | STAR: neg = 1; /*FALLTHRU*/ case CCL | STAR: curlp = lp; do { c = *lp++; } while (((c & 0200) == 0 && ISTHERE(c)) ^ neg); ep += 16; goto star; /*FALLTHRU*/ star: do { if (--lp == locs) break; if (advance(lp, ep)) return (1); } while (lp > curlp); return (0); } } /*NOTREACHED*/ } static void getrnge(const char *str) { low = *str++ & 0377; size = ((*str & 0377) == 255)? 20000: (*str &0377) - low; } #ifdef __cplusplus } #endif #endif /* _REGEXP_H */