14297a3b0SGarrett D'Amore /* 2*6b5e5868SGarrett D'Amore * Copyright 2010 Nexenta Systems, Inc. All rights reserved. 34297a3b0SGarrett D'Amore * Copyright (c) 1992, 1993, 1994 Henry Spencer. 44297a3b0SGarrett D'Amore * Copyright (c) 1992, 1993, 1994 54297a3b0SGarrett D'Amore * The Regents of the University of California. All rights reserved. 64297a3b0SGarrett D'Amore * 74297a3b0SGarrett D'Amore * This code is derived from software contributed to Berkeley by 84297a3b0SGarrett D'Amore * Henry Spencer. 94297a3b0SGarrett D'Amore * 104297a3b0SGarrett D'Amore * Redistribution and use in source and binary forms, with or without 114297a3b0SGarrett D'Amore * modification, are permitted provided that the following conditions 124297a3b0SGarrett D'Amore * are met: 134297a3b0SGarrett D'Amore * 1. Redistributions of source code must retain the above copyright 144297a3b0SGarrett D'Amore * notice, this list of conditions and the following disclaimer. 154297a3b0SGarrett D'Amore * 2. Redistributions in binary form must reproduce the above copyright 164297a3b0SGarrett D'Amore * notice, this list of conditions and the following disclaimer in the 174297a3b0SGarrett D'Amore * documentation and/or other materials provided with the distribution. 184297a3b0SGarrett D'Amore * 4. Neither the name of the University nor the names of its contributors 194297a3b0SGarrett D'Amore * may be used to endorse or promote products derived from this software 204297a3b0SGarrett D'Amore * without specific prior written permission. 214297a3b0SGarrett D'Amore * 224297a3b0SGarrett D'Amore * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 234297a3b0SGarrett D'Amore * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 244297a3b0SGarrett D'Amore * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 254297a3b0SGarrett D'Amore * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 264297a3b0SGarrett D'Amore * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 274297a3b0SGarrett D'Amore * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 284297a3b0SGarrett D'Amore * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 294297a3b0SGarrett D'Amore * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 304297a3b0SGarrett D'Amore * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 314297a3b0SGarrett D'Amore * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 324297a3b0SGarrett D'Amore * SUCH DAMAGE. 334297a3b0SGarrett D'Amore */ 344297a3b0SGarrett D'Amore 354297a3b0SGarrett D'Amore /* 364297a3b0SGarrett D'Amore * the outer shell of regexec() 374297a3b0SGarrett D'Amore * 384297a3b0SGarrett D'Amore * This file includes engine.c three times, after muchos fiddling with the 394297a3b0SGarrett D'Amore * macros that code uses. This lets the same code operate on two different 404297a3b0SGarrett D'Amore * representations for state sets and characters. 414297a3b0SGarrett D'Amore */ 424297a3b0SGarrett D'Amore #include "lint.h" 434297a3b0SGarrett D'Amore #include "file64.h" 444297a3b0SGarrett D'Amore #include <sys/types.h> 454297a3b0SGarrett D'Amore #include <stdio.h> 464297a3b0SGarrett D'Amore #include <stdlib.h> 474297a3b0SGarrett D'Amore #include <string.h> 484297a3b0SGarrett D'Amore #include <limits.h> 494297a3b0SGarrett D'Amore #include <ctype.h> 504297a3b0SGarrett D'Amore #include <regex.h> 514297a3b0SGarrett D'Amore #include <wchar.h> 524297a3b0SGarrett D'Amore #include <wctype.h> 534297a3b0SGarrett D'Amore #include <note.h> 544297a3b0SGarrett D'Amore #include <assert.h> 554297a3b0SGarrett D'Amore 564297a3b0SGarrett D'Amore #include "utils.h" 574297a3b0SGarrett D'Amore #include "regex2.h" 584297a3b0SGarrett D'Amore 594297a3b0SGarrett D'Amore /* we want _NOTE, but not NOTE (which collides with our own use) */ 604297a3b0SGarrett D'Amore #undef NOTE 614297a3b0SGarrett D'Amore 624297a3b0SGarrett D'Amore static size_t 634297a3b0SGarrett D'Amore xmbrtowc(wint_t *wi, const char *s, size_t n, mbstate_t *mbs, wint_t dummy) 644297a3b0SGarrett D'Amore { 654297a3b0SGarrett D'Amore size_t nr; 664297a3b0SGarrett D'Amore wchar_t wc; 674297a3b0SGarrett D'Amore 684297a3b0SGarrett D'Amore nr = mbrtowc(&wc, s, n, mbs); 694297a3b0SGarrett D'Amore if (wi != NULL) 704297a3b0SGarrett D'Amore *wi = wc; 714297a3b0SGarrett D'Amore if (nr == 0) 724297a3b0SGarrett D'Amore return (1); 734297a3b0SGarrett D'Amore else if (nr == (size_t)-1 || nr == (size_t)-2) { 744297a3b0SGarrett D'Amore (void) memset(mbs, 0, sizeof (*mbs)); 754297a3b0SGarrett D'Amore if (wi != NULL) 764297a3b0SGarrett D'Amore *wi = dummy; 774297a3b0SGarrett D'Amore return (1); 784297a3b0SGarrett D'Amore } else 794297a3b0SGarrett D'Amore return (nr); 804297a3b0SGarrett D'Amore } 814297a3b0SGarrett D'Amore 824297a3b0SGarrett D'Amore static size_t 834297a3b0SGarrett D'Amore xmbrtowc_dummy(wint_t *wi, const char *s, size_t n, mbstate_t *mbs, 844297a3b0SGarrett D'Amore wint_t dummy) 854297a3b0SGarrett D'Amore { 864297a3b0SGarrett D'Amore _NOTE(ARGUNUSED(n)); 874297a3b0SGarrett D'Amore _NOTE(ARGUNUSED(mbs)); 884297a3b0SGarrett D'Amore _NOTE(ARGUNUSED(dummy)); 894297a3b0SGarrett D'Amore 904297a3b0SGarrett D'Amore if (wi != NULL) 914297a3b0SGarrett D'Amore *wi = (unsigned char)*s; 924297a3b0SGarrett D'Amore return (1); 934297a3b0SGarrett D'Amore } 944297a3b0SGarrett D'Amore 954297a3b0SGarrett D'Amore /* macros for manipulating states, small version */ 964297a3b0SGarrett D'Amore #define states long 974297a3b0SGarrett D'Amore #define states1 states /* for later use in regexec() decision */ 984297a3b0SGarrett D'Amore #define CLEAR(v) ((v) = 0) 994297a3b0SGarrett D'Amore #define SET0(v, n) ((v) &= ~((unsigned long)1 << (n))) 1004297a3b0SGarrett D'Amore #define SET1(v, n) ((v) |= (unsigned long)1 << (n)) 1014297a3b0SGarrett D'Amore #define ISSET(v, n) (((v) & ((unsigned long)1 << (n))) != 0) 1024297a3b0SGarrett D'Amore #define ASSIGN(d, s) ((d) = (s)) 1034297a3b0SGarrett D'Amore #define EQ(a, b) ((a) == (b)) 1044297a3b0SGarrett D'Amore #define STATEVARS long dummy /* dummy version */ 1054297a3b0SGarrett D'Amore #define STATESETUP(m, n) /* nothing */ 1064297a3b0SGarrett D'Amore #define STATETEARDOWN(m) /* nothing */ 1074297a3b0SGarrett D'Amore #define SETUP(v) ((v) = 0) 1084297a3b0SGarrett D'Amore #define onestate long 1094297a3b0SGarrett D'Amore #define INIT(o, n) ((o) = (unsigned long)1 << (n)) 1104297a3b0SGarrett D'Amore #define INC(o) ((o) <<= 1) 1114297a3b0SGarrett D'Amore #define ISSTATEIN(v, o) (((v) & (o)) != 0) 1124297a3b0SGarrett D'Amore /* some abbreviations; note that some of these know variable names! */ 1134297a3b0SGarrett D'Amore /* do "if I'm here, I can also be there" etc without branches */ 1144297a3b0SGarrett D'Amore #define FWD(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) << (n)) 1154297a3b0SGarrett D'Amore #define BACK(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) >> (n)) 1164297a3b0SGarrett D'Amore #define ISSETBACK(v, n) (((v) & ((unsigned long)here >> (n))) != 0) 1174297a3b0SGarrett D'Amore /* no multibyte support */ 1184297a3b0SGarrett D'Amore #define XMBRTOWC xmbrtowc_dummy 1194297a3b0SGarrett D'Amore #define ZAPSTATE(mbs) ((void)(mbs)) 1204297a3b0SGarrett D'Amore /* function names */ 1214297a3b0SGarrett D'Amore #define SNAMES /* engine.c looks after details */ 1224297a3b0SGarrett D'Amore 1234297a3b0SGarrett D'Amore #include "engine.c" 1244297a3b0SGarrett D'Amore 1254297a3b0SGarrett D'Amore /* now undo things */ 1264297a3b0SGarrett D'Amore #undef states 1274297a3b0SGarrett D'Amore #undef CLEAR 1284297a3b0SGarrett D'Amore #undef SET0 1294297a3b0SGarrett D'Amore #undef SET1 1304297a3b0SGarrett D'Amore #undef ISSET 1314297a3b0SGarrett D'Amore #undef ASSIGN 1324297a3b0SGarrett D'Amore #undef EQ 1334297a3b0SGarrett D'Amore #undef STATEVARS 1344297a3b0SGarrett D'Amore #undef STATESETUP 1354297a3b0SGarrett D'Amore #undef STATETEARDOWN 1364297a3b0SGarrett D'Amore #undef SETUP 1374297a3b0SGarrett D'Amore #undef onestate 1384297a3b0SGarrett D'Amore #undef INIT 1394297a3b0SGarrett D'Amore #undef INC 1404297a3b0SGarrett D'Amore #undef ISSTATEIN 1414297a3b0SGarrett D'Amore #undef FWD 1424297a3b0SGarrett D'Amore #undef BACK 1434297a3b0SGarrett D'Amore #undef ISSETBACK 1444297a3b0SGarrett D'Amore #undef SNAMES 1454297a3b0SGarrett D'Amore #undef XMBRTOWC 1464297a3b0SGarrett D'Amore #undef ZAPSTATE 1474297a3b0SGarrett D'Amore 1484297a3b0SGarrett D'Amore /* macros for manipulating states, large version */ 1494297a3b0SGarrett D'Amore #define states char * 1504297a3b0SGarrett D'Amore #define CLEAR(v) (void) memset(v, 0, m->g->nstates) 1514297a3b0SGarrett D'Amore #define SET0(v, n) ((v)[n] = 0) 1524297a3b0SGarrett D'Amore #define SET1(v, n) ((v)[n] = 1) 1534297a3b0SGarrett D'Amore #define ISSET(v, n) ((v)[n]) 1544297a3b0SGarrett D'Amore #define ASSIGN(d, s) (void) memcpy(d, s, m->g->nstates) 1554297a3b0SGarrett D'Amore #define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0) 1564297a3b0SGarrett D'Amore #define STATEVARS long vn; char *space 1574297a3b0SGarrett D'Amore #define STATESETUP(m, nv) { (m)->space = malloc((nv)*(m)->g->nstates); \ 1584297a3b0SGarrett D'Amore if ((m)->space == NULL) \ 1594297a3b0SGarrett D'Amore return (REG_ESPACE); \ 1604297a3b0SGarrett D'Amore (m)->vn = 0; } 1614297a3b0SGarrett D'Amore #define STATETEARDOWN(m) { free((m)->space); } 1624297a3b0SGarrett D'Amore #define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates]) 1634297a3b0SGarrett D'Amore #define onestate long 1644297a3b0SGarrett D'Amore #define INIT(o, n) ((o) = (n)) 1654297a3b0SGarrett D'Amore #define INC(o) ((o)++) 1664297a3b0SGarrett D'Amore #define ISSTATEIN(v, o) ((v)[o]) 1674297a3b0SGarrett D'Amore /* some abbreviations; note that some of these know variable names! */ 1684297a3b0SGarrett D'Amore /* do "if I'm here, I can also be there" etc without branches */ 1694297a3b0SGarrett D'Amore #define FWD(dst, src, n) ((dst)[here+(n)] |= (src)[here]) 1704297a3b0SGarrett D'Amore #define BACK(dst, src, n) ((dst)[here-(n)] |= (src)[here]) 1714297a3b0SGarrett D'Amore #define ISSETBACK(v, n) ((v)[here - (n)]) 1724297a3b0SGarrett D'Amore /* no multibyte support */ 1734297a3b0SGarrett D'Amore #define XMBRTOWC xmbrtowc_dummy 1744297a3b0SGarrett D'Amore #define ZAPSTATE(mbs) ((void)(mbs)) 1754297a3b0SGarrett D'Amore /* function names */ 1764297a3b0SGarrett D'Amore #define LNAMES /* flag */ 1774297a3b0SGarrett D'Amore 1784297a3b0SGarrett D'Amore #include "engine.c" 1794297a3b0SGarrett D'Amore 1804297a3b0SGarrett D'Amore /* multibyte character & large states version */ 1814297a3b0SGarrett D'Amore #undef LNAMES 1824297a3b0SGarrett D'Amore #undef XMBRTOWC 1834297a3b0SGarrett D'Amore #undef ZAPSTATE 1844297a3b0SGarrett D'Amore #define XMBRTOWC xmbrtowc 1854297a3b0SGarrett D'Amore #define ZAPSTATE(mbs) (void) memset((mbs), 0, sizeof (*(mbs))) 1864297a3b0SGarrett D'Amore #define MNAMES 1874297a3b0SGarrett D'Amore 1884297a3b0SGarrett D'Amore #include "engine.c" 1894297a3b0SGarrett D'Amore 1904297a3b0SGarrett D'Amore /* 1914297a3b0SGarrett D'Amore * regexec - interface for matching 1924297a3b0SGarrett D'Amore * 1934297a3b0SGarrett D'Amore * We put this here so we can exploit knowledge of the state representation 1944297a3b0SGarrett D'Amore * when choosing which matcher to call. Also, by this point the matchers 1954297a3b0SGarrett D'Amore * have been prototyped. 1964297a3b0SGarrett D'Amore */ 1974297a3b0SGarrett D'Amore int /* 0 success, REG_NOMATCH failure */ 1984297a3b0SGarrett D'Amore regexec(const regex_t *_RESTRICT_KYWD preg, 1994297a3b0SGarrett D'Amore const char *_RESTRICT_KYWD string, size_t nmatch, 2004297a3b0SGarrett D'Amore regmatch_t pmatch[_RESTRICT_KYWD], int eflags) 2014297a3b0SGarrett D'Amore { 2024297a3b0SGarrett D'Amore struct re_guts *g = preg->re_g; 2034297a3b0SGarrett D'Amore #ifdef REDEBUG 2044297a3b0SGarrett D'Amore #define GOODFLAGS(f) (f) 2054297a3b0SGarrett D'Amore #else 2064297a3b0SGarrett D'Amore #ifdef REG_STARTEND 2074297a3b0SGarrett D'Amore #define GOODFLAGS(f) ((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND)) 2084297a3b0SGarrett D'Amore #else 2094297a3b0SGarrett D'Amore #define GOODFLAGS(f) ((f)&(REG_NOTBOL|REG_NOTEOL)) 2104297a3b0SGarrett D'Amore #endif 2114297a3b0SGarrett D'Amore #endif 2124297a3b0SGarrett D'Amore 2134297a3b0SGarrett D'Amore if (preg->re_magic != MAGIC1 || g->magic != MAGIC2) 2144297a3b0SGarrett D'Amore return (REG_BADPAT); 2154297a3b0SGarrett D'Amore assert(!(g->iflags&BAD)); 2164297a3b0SGarrett D'Amore if (g->iflags&BAD) /* backstop for no-debug case */ 2174297a3b0SGarrett D'Amore return (REG_BADPAT); 2184297a3b0SGarrett D'Amore eflags = GOODFLAGS(eflags); 2194297a3b0SGarrett D'Amore 2204297a3b0SGarrett D'Amore if (MB_CUR_MAX > 1) 2214297a3b0SGarrett D'Amore return (mmatcher(g, (char *)string, nmatch, pmatch, eflags)); 2224297a3b0SGarrett D'Amore #ifdef REG_LARGE 2234297a3b0SGarrett D'Amore else if (g->nstates <= CHAR_BIT*sizeof (states1) && !(eflags®_LARGE)) 2244297a3b0SGarrett D'Amore #else 2254297a3b0SGarrett D'Amore else if (g->nstates <= CHAR_BIT*sizeof (states1)) 2264297a3b0SGarrett D'Amore #endif 2274297a3b0SGarrett D'Amore return (smatcher(g, (char *)string, nmatch, pmatch, eflags)); 2284297a3b0SGarrett D'Amore else 2294297a3b0SGarrett D'Amore return (lmatcher(g, (char *)string, nmatch, pmatch, eflags)); 2304297a3b0SGarrett D'Amore } 231