158f0484fSRodney W. Grimes /*-
28a16b7a1SPedro F. Giffuni * SPDX-License-Identifier: BSD-3-Clause
38a16b7a1SPedro F. Giffuni *
458f0484fSRodney W. Grimes * Copyright (c) 1992, 1993, 1994 Henry Spencer.
558f0484fSRodney W. Grimes * Copyright (c) 1992, 1993, 1994
658f0484fSRodney W. Grimes * The Regents of the University of California. All rights reserved.
758f0484fSRodney W. Grimes *
858f0484fSRodney W. Grimes * This code is derived from software contributed to Berkeley by
958f0484fSRodney W. Grimes * Henry Spencer.
1058f0484fSRodney W. Grimes *
1158f0484fSRodney W. Grimes * Redistribution and use in source and binary forms, with or without
1258f0484fSRodney W. Grimes * modification, are permitted provided that the following conditions
1358f0484fSRodney W. Grimes * are met:
1458f0484fSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright
1558f0484fSRodney W. Grimes * notice, this list of conditions and the following disclaimer.
1658f0484fSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright
1758f0484fSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the
1858f0484fSRodney W. Grimes * documentation and/or other materials provided with the distribution.
19fbbd9655SWarner Losh * 3. Neither the name of the University nor the names of its contributors
2058f0484fSRodney W. Grimes * may be used to endorse or promote products derived from this software
2158f0484fSRodney W. Grimes * without specific prior written permission.
2258f0484fSRodney W. Grimes *
2358f0484fSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2458f0484fSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2558f0484fSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2658f0484fSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
2758f0484fSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2858f0484fSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2958f0484fSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
3058f0484fSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
3158f0484fSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3258f0484fSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3358f0484fSRodney W. Grimes * SUCH DAMAGE.
3458f0484fSRodney W. Grimes */
3558f0484fSRodney W. Grimes
3658f0484fSRodney W. Grimes /*
3758f0484fSRodney W. Grimes * the outer shell of regexec()
3858f0484fSRodney W. Grimes *
39e5996857STim J. Robbins * This file includes engine.c three times, after muchos fiddling with the
4058f0484fSRodney W. Grimes * macros that code uses. This lets the same code operate on two different
41e5996857STim J. Robbins * representations for state sets and characters.
4258f0484fSRodney W. Grimes */
4358f0484fSRodney W. Grimes #include <sys/types.h>
4458f0484fSRodney W. Grimes #include <stdio.h>
4558f0484fSRodney W. Grimes #include <stdlib.h>
4658f0484fSRodney W. Grimes #include <string.h>
4758f0484fSRodney W. Grimes #include <limits.h>
4858f0484fSRodney W. Grimes #include <ctype.h>
4958f0484fSRodney W. Grimes #include <regex.h>
50e5996857STim J. Robbins #include <wchar.h>
51e5996857STim J. Robbins #include <wctype.h>
5258f0484fSRodney W. Grimes
5358f0484fSRodney W. Grimes #include "utils.h"
5458f0484fSRodney W. Grimes #include "regex2.h"
5558f0484fSRodney W. Grimes
56e0554a53SJacques Vidrine static int nope __unused = 0; /* for use in asserts; shuts lint up */
5758f0484fSRodney W. Grimes
58e5996857STim J. Robbins static __inline size_t
xmbrtowc(wint_t * wi,const char * s,size_t n,mbstate_t * mbs,wint_t dummy)5954a648d1SXin LI xmbrtowc(wint_t *wi, const char *s, size_t n, mbstate_t *mbs, wint_t dummy)
60e5996857STim J. Robbins {
61e5996857STim J. Robbins size_t nr;
62e5996857STim J. Robbins wchar_t wc;
63e5996857STim J. Robbins
64e5996857STim J. Robbins nr = mbrtowc(&wc, s, n, mbs);
65e5996857STim J. Robbins if (wi != NULL)
66e5996857STim J. Robbins *wi = wc;
67e5996857STim J. Robbins if (nr == 0)
68e5996857STim J. Robbins return (1);
69e5996857STim J. Robbins else if (nr == (size_t)-1 || nr == (size_t)-2) {
70e5996857STim J. Robbins memset(mbs, 0, sizeof(*mbs));
71e5996857STim J. Robbins if (wi != NULL)
72e5996857STim J. Robbins *wi = dummy;
73e5996857STim J. Robbins return (1);
74e5996857STim J. Robbins } else
75e5996857STim J. Robbins return (nr);
76e5996857STim J. Robbins }
77e5996857STim J. Robbins
78e5996857STim J. Robbins static __inline size_t
xmbrtowc_dummy(wint_t * wi,const char * s,size_t n __unused,mbstate_t * mbs __unused,wint_t dummy __unused)7954a648d1SXin LI xmbrtowc_dummy(wint_t *wi,
8054a648d1SXin LI const char *s,
8154a648d1SXin LI size_t n __unused,
8254a648d1SXin LI mbstate_t *mbs __unused,
8354a648d1SXin LI wint_t dummy __unused)
84e5996857STim J. Robbins {
85e5996857STim J. Robbins
86e5996857STim J. Robbins if (wi != NULL)
87e5996857STim J. Robbins *wi = (unsigned char)*s;
88e5996857STim J. Robbins return (1);
89e5996857STim J. Robbins }
90e5996857STim J. Robbins
9158f0484fSRodney W. Grimes /* macros for manipulating states, small version */
92*0aa8b18bSJessica Clarke #define states1 long /* for later use in regexec() decision */
93*0aa8b18bSJessica Clarke #define states states1
9458f0484fSRodney W. Grimes #define CLEAR(v) ((v) = 0)
95cfc1614aSJohn Birrell #define SET0(v, n) ((v) &= ~((unsigned long)1 << (n)))
96cfc1614aSJohn Birrell #define SET1(v, n) ((v) |= (unsigned long)1 << (n))
97cfc1614aSJohn Birrell #define ISSET(v, n) (((v) & ((unsigned long)1 << (n))) != 0)
9858f0484fSRodney W. Grimes #define ASSIGN(d, s) ((d) = (s))
9958f0484fSRodney W. Grimes #define EQ(a, b) ((a) == (b))
100cfc1614aSJohn Birrell #define STATEVARS long dummy /* dummy version */
10158f0484fSRodney W. Grimes #define STATESETUP(m, n) /* nothing */
10258f0484fSRodney W. Grimes #define STATETEARDOWN(m) /* nothing */
10358f0484fSRodney W. Grimes #define SETUP(v) ((v) = 0)
104cfc1614aSJohn Birrell #define onestate long
105cfc1614aSJohn Birrell #define INIT(o, n) ((o) = (unsigned long)1 << (n))
10658f0484fSRodney W. Grimes #define INC(o) ((o) <<= 1)
107cfc1614aSJohn Birrell #define ISSTATEIN(v, o) (((v) & (o)) != 0)
10858f0484fSRodney W. Grimes /* some abbreviations; note that some of these know variable names! */
10958f0484fSRodney W. Grimes /* do "if I'm here, I can also be there" etc without branches */
110cfc1614aSJohn Birrell #define FWD(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) << (n))
111cfc1614aSJohn Birrell #define BACK(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) >> (n))
112cfc1614aSJohn Birrell #define ISSETBACK(v, n) (((v) & ((unsigned long)here >> (n))) != 0)
113e5996857STim J. Robbins /* no multibyte support */
114e5996857STim J. Robbins #define XMBRTOWC xmbrtowc_dummy
115e5996857STim J. Robbins #define ZAPSTATE(mbs) ((void)(mbs))
11658f0484fSRodney W. Grimes /* function names */
11758f0484fSRodney W. Grimes #define SNAMES /* engine.c looks after details */
11858f0484fSRodney W. Grimes
11958f0484fSRodney W. Grimes #include "engine.c"
12058f0484fSRodney W. Grimes
12158f0484fSRodney W. Grimes /* now undo things */
12258f0484fSRodney W. Grimes #undef states
12358f0484fSRodney W. Grimes #undef CLEAR
12458f0484fSRodney W. Grimes #undef SET0
12558f0484fSRodney W. Grimes #undef SET1
12658f0484fSRodney W. Grimes #undef ISSET
12758f0484fSRodney W. Grimes #undef ASSIGN
12858f0484fSRodney W. Grimes #undef EQ
12958f0484fSRodney W. Grimes #undef STATEVARS
13058f0484fSRodney W. Grimes #undef STATESETUP
13158f0484fSRodney W. Grimes #undef STATETEARDOWN
13258f0484fSRodney W. Grimes #undef SETUP
13358f0484fSRodney W. Grimes #undef onestate
13458f0484fSRodney W. Grimes #undef INIT
13558f0484fSRodney W. Grimes #undef INC
13658f0484fSRodney W. Grimes #undef ISSTATEIN
13758f0484fSRodney W. Grimes #undef FWD
13858f0484fSRodney W. Grimes #undef BACK
13958f0484fSRodney W. Grimes #undef ISSETBACK
14058f0484fSRodney W. Grimes #undef SNAMES
141e5996857STim J. Robbins #undef XMBRTOWC
142e5996857STim J. Robbins #undef ZAPSTATE
14358f0484fSRodney W. Grimes
14458f0484fSRodney W. Grimes /* macros for manipulating states, large version */
14558f0484fSRodney W. Grimes #define states char *
14658f0484fSRodney W. Grimes #define CLEAR(v) memset(v, 0, m->g->nstates)
14758f0484fSRodney W. Grimes #define SET0(v, n) ((v)[n] = 0)
14858f0484fSRodney W. Grimes #define SET1(v, n) ((v)[n] = 1)
14958f0484fSRodney W. Grimes #define ISSET(v, n) ((v)[n])
15058f0484fSRodney W. Grimes #define ASSIGN(d, s) memcpy(d, s, m->g->nstates)
15158f0484fSRodney W. Grimes #define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0)
152cfc1614aSJohn Birrell #define STATEVARS long vn; char *space
15358f0484fSRodney W. Grimes #define STATESETUP(m, nv) { (m)->space = malloc((nv)*(m)->g->nstates); \
15458f0484fSRodney W. Grimes if ((m)->space == NULL) return(REG_ESPACE); \
15558f0484fSRodney W. Grimes (m)->vn = 0; }
15658f0484fSRodney W. Grimes #define STATETEARDOWN(m) { free((m)->space); }
15758f0484fSRodney W. Grimes #define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates])
158cfc1614aSJohn Birrell #define onestate long
15958f0484fSRodney W. Grimes #define INIT(o, n) ((o) = (n))
16058f0484fSRodney W. Grimes #define INC(o) ((o)++)
16158f0484fSRodney W. Grimes #define ISSTATEIN(v, o) ((v)[o])
16258f0484fSRodney W. Grimes /* some abbreviations; note that some of these know variable names! */
16358f0484fSRodney W. Grimes /* do "if I'm here, I can also be there" etc without branches */
16458f0484fSRodney W. Grimes #define FWD(dst, src, n) ((dst)[here+(n)] |= (src)[here])
16558f0484fSRodney W. Grimes #define BACK(dst, src, n) ((dst)[here-(n)] |= (src)[here])
16658f0484fSRodney W. Grimes #define ISSETBACK(v, n) ((v)[here - (n)])
167e5996857STim J. Robbins /* no multibyte support */
168e5996857STim J. Robbins #define XMBRTOWC xmbrtowc_dummy
169e5996857STim J. Robbins #define ZAPSTATE(mbs) ((void)(mbs))
17058f0484fSRodney W. Grimes /* function names */
17158f0484fSRodney W. Grimes #define LNAMES /* flag */
17258f0484fSRodney W. Grimes
17358f0484fSRodney W. Grimes #include "engine.c"
17458f0484fSRodney W. Grimes
175e5996857STim J. Robbins /* multibyte character & large states version */
176e5996857STim J. Robbins #undef LNAMES
177e5996857STim J. Robbins #undef XMBRTOWC
178e5996857STim J. Robbins #undef ZAPSTATE
179e5996857STim J. Robbins #define XMBRTOWC xmbrtowc
180e5996857STim J. Robbins #define ZAPSTATE(mbs) memset((mbs), 0, sizeof(*(mbs)))
181e5996857STim J. Robbins #define MNAMES
182e5996857STim J. Robbins
183e5996857STim J. Robbins #include "engine.c"
184e5996857STim J. Robbins
18558f0484fSRodney W. Grimes /*
18658f0484fSRodney W. Grimes - regexec - interface for matching
18758f0484fSRodney W. Grimes = extern int regexec(const regex_t *, const char *, size_t, \
18858f0484fSRodney W. Grimes = regmatch_t [], int);
18958f0484fSRodney W. Grimes = #define REG_NOTBOL 00001
19058f0484fSRodney W. Grimes = #define REG_NOTEOL 00002
19158f0484fSRodney W. Grimes = #define REG_STARTEND 00004
19258f0484fSRodney W. Grimes = #define REG_TRACE 00400 // tracing of execution
19358f0484fSRodney W. Grimes = #define REG_LARGE 01000 // force large representation
19458f0484fSRodney W. Grimes = #define REG_BACKR 02000 // force use of backref code
19558f0484fSRodney W. Grimes *
19658f0484fSRodney W. Grimes * We put this here so we can exploit knowledge of the state representation
19758f0484fSRodney W. Grimes * when choosing which matcher to call. Also, by this point the matchers
19858f0484fSRodney W. Grimes * have been prototyped.
19958f0484fSRodney W. Grimes */
20058f0484fSRodney W. Grimes int /* 0 success, REG_NOMATCH failure */
regexec(const regex_t * __restrict preg,const char * __restrict string,size_t nmatch,regmatch_t pmatch[__restrict],int eflags)20154a648d1SXin LI regexec(const regex_t * __restrict preg,
20254a648d1SXin LI const char * __restrict string,
20354a648d1SXin LI size_t nmatch,
20454a648d1SXin LI regmatch_t pmatch[__restrict],
20554a648d1SXin LI int eflags)
20658f0484fSRodney W. Grimes {
2078fb3f3f6SDavid E. O'Brien struct re_guts *g = preg->re_g;
20858f0484fSRodney W. Grimes #ifdef REDEBUG
20958f0484fSRodney W. Grimes # define GOODFLAGS(f) (f)
21058f0484fSRodney W. Grimes #else
21158f0484fSRodney W. Grimes # define GOODFLAGS(f) ((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND))
21258f0484fSRodney W. Grimes #endif
21358f0484fSRodney W. Grimes
21458f0484fSRodney W. Grimes if (preg->re_magic != MAGIC1 || g->magic != MAGIC2)
21558f0484fSRodney W. Grimes return(REG_BADPAT);
21658f0484fSRodney W. Grimes assert(!(g->iflags&BAD));
21758f0484fSRodney W. Grimes if (g->iflags&BAD) /* backstop for no-debug case */
21858f0484fSRodney W. Grimes return(REG_BADPAT);
21958f0484fSRodney W. Grimes eflags = GOODFLAGS(eflags);
22058f0484fSRodney W. Grimes
221e5996857STim J. Robbins if (MB_CUR_MAX > 1)
2228d0f9a93SPedro F. Giffuni return(mmatcher(g, string, nmatch, pmatch, eflags));
223e5996857STim J. Robbins else if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags®_LARGE))
2248d0f9a93SPedro F. Giffuni return(smatcher(g, string, nmatch, pmatch, eflags));
22558f0484fSRodney W. Grimes else
2268d0f9a93SPedro F. Giffuni return(lmatcher(g, string, nmatch, pmatch, eflags));
22758f0484fSRodney W. Grimes }
228