xref: /freebsd/crypto/krb5/src/util/support/regex.cpp (revision f1c4c3daccbaf3820f0e2224de53df12fc952fcc)
1*f1c4c3daSCy Schubert /* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2*f1c4c3daSCy Schubert /* regex.cpp - Glue routines to std::regex functions */
3*f1c4c3daSCy Schubert 
4*f1c4c3daSCy Schubert /*
5*f1c4c3daSCy Schubert  * Copyright (C) 2024 United States Government as represented by the
6*f1c4c3daSCy Schubert  * Secretary of the Navy.
7*f1c4c3daSCy Schubert  * All rights reserved.
8*f1c4c3daSCy Schubert  *
9*f1c4c3daSCy Schubert  * Redistribution and use in source and binary forms, with or without
10*f1c4c3daSCy Schubert  * modification, are permitted provided that the following conditions
11*f1c4c3daSCy Schubert  * are met:
12*f1c4c3daSCy Schubert  *
13*f1c4c3daSCy Schubert  * * Redistributions of source code must retain the above copyright
14*f1c4c3daSCy Schubert  *   notice, this list of conditions and the following disclaimer.
15*f1c4c3daSCy Schubert  *
16*f1c4c3daSCy Schubert  * * Redistributions in binary form must reproduce the above copyright
17*f1c4c3daSCy Schubert  *   notice, this list of conditions and the following disclaimer in
18*f1c4c3daSCy Schubert  *   the documentation and/or other materials provided with the
19*f1c4c3daSCy Schubert  *   distribution.
20*f1c4c3daSCy Schubert  *
21*f1c4c3daSCy Schubert  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22*f1c4c3daSCy Schubert  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23*f1c4c3daSCy Schubert  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24*f1c4c3daSCy Schubert  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25*f1c4c3daSCy Schubert  * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
26*f1c4c3daSCy Schubert  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
27*f1c4c3daSCy Schubert  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
28*f1c4c3daSCy Schubert  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29*f1c4c3daSCy Schubert  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
30*f1c4c3daSCy Schubert  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31*f1c4c3daSCy Schubert  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
32*f1c4c3daSCy Schubert  * OF THE POSSIBILITY OF SUCH DAMAGE.
33*f1c4c3daSCy Schubert  */
34*f1c4c3daSCy Schubert 
35*f1c4c3daSCy Schubert /*
36*f1c4c3daSCy Schubert  * These functions provide a mostly-complete POSIX regex(3)
37*f1c4c3daSCy Schubert  * implementation using C++ std::regex.  Deficiencies are noted below.
38*f1c4c3daSCy Schubert  */
39*f1c4c3daSCy Schubert 
40*f1c4c3daSCy Schubert #include "k5-platform.h"
41*f1c4c3daSCy Schubert #include "k5-regex.h"
42*f1c4c3daSCy Schubert 
43*f1c4c3daSCy Schubert #include <regex>
44*f1c4c3daSCy Schubert 
45*f1c4c3daSCy Schubert /*
46*f1c4c3daSCy Schubert  * Our implementation of regcomp() which calls into std::regex.  We implement
47*f1c4c3daSCy Schubert  * the standard flags, but not the non-portable extensions present on some
48*f1c4c3daSCy Schubert  * platforms.
49*f1c4c3daSCy Schubert  */
50*f1c4c3daSCy Schubert extern "C" int
k5_regcomp(regex_t * preg,const char * pattern,int cflags)51*f1c4c3daSCy Schubert k5_regcomp(regex_t *preg, const char *pattern, int cflags)
52*f1c4c3daSCy Schubert {
53*f1c4c3daSCy Schubert     std::regex *r;
54*f1c4c3daSCy Schubert     std::regex_constants::syntax_option_type flags;
55*f1c4c3daSCy Schubert 
56*f1c4c3daSCy Schubert     memset(preg, 0, sizeof(*preg));
57*f1c4c3daSCy Schubert 
58*f1c4c3daSCy Schubert     flags = (cflags & REG_EXTENDED) ? std::regex::extended : std::regex::basic;
59*f1c4c3daSCy Schubert     if (cflags & REG_ICASE)
60*f1c4c3daSCy Schubert         flags |= std::regex::icase;
61*f1c4c3daSCy Schubert     if (cflags & REG_NOSUB)
62*f1c4c3daSCy Schubert         flags |= std::regex::nosubs;
63*f1c4c3daSCy Schubert 
64*f1c4c3daSCy Schubert     try {
65*f1c4c3daSCy Schubert         r = new std::regex(pattern, flags);
66*f1c4c3daSCy Schubert         preg->regex = r;
67*f1c4c3daSCy Schubert         preg->re_nsub = r->mark_count();
68*f1c4c3daSCy Schubert     } catch (std::regex_error& e) {
69*f1c4c3daSCy Schubert         /* Save the error message in errmsg.  We don't actually use the
70*f1c4c3daSCy Schubert          * error code for anything; return REG_BADPAT for everything. */
71*f1c4c3daSCy Schubert         strlcpy(preg->errmsg, e.what(), sizeof(preg->errmsg));
72*f1c4c3daSCy Schubert         return REG_BADPAT;
73*f1c4c3daSCy Schubert     }
74*f1c4c3daSCy Schubert 
75*f1c4c3daSCy Schubert     return 0;
76*f1c4c3daSCy Schubert }
77*f1c4c3daSCy Schubert 
78*f1c4c3daSCy Schubert extern "C" int
k5_regexec(regex_t * preg,const char * string,size_t nmatch,regmatch_t pmatch[],int eflags)79*f1c4c3daSCy Schubert k5_regexec(regex_t *preg, const char *string, size_t nmatch,
80*f1c4c3daSCy Schubert            regmatch_t pmatch[], int eflags)
81*f1c4c3daSCy Schubert {
82*f1c4c3daSCy Schubert     size_t i;
83*f1c4c3daSCy Schubert     std::cmatch cm;
84*f1c4c3daSCy Schubert     std::regex_constants::match_flag_type flags;
85*f1c4c3daSCy Schubert     std::regex *r = static_cast<std::regex *>(preg->regex);
86*f1c4c3daSCy Schubert 
87*f1c4c3daSCy Schubert     flags = std::regex_constants::match_default;
88*f1c4c3daSCy Schubert     if (eflags & REG_NOTBOL)
89*f1c4c3daSCy Schubert         flags |= std::regex_constants::match_not_bol;
90*f1c4c3daSCy Schubert     if (eflags & REG_NOTEOL)
91*f1c4c3daSCy Schubert         flags |= std::regex_constants::match_not_eol;
92*f1c4c3daSCy Schubert 
93*f1c4c3daSCy Schubert     try {
94*f1c4c3daSCy Schubert         if (!std::regex_search(string, cm, *r, flags))
95*f1c4c3daSCy Schubert             return REG_NOMATCH;
96*f1c4c3daSCy Schubert 
97*f1c4c3daSCy Schubert         /*
98*f1c4c3daSCy Schubert          * If given, fill in pmatch with the full match string and any
99*f1c4c3daSCy Schubert          * sub-matches.  If we set nosub previously we shouldn't have any
100*f1c4c3daSCy Schubert          * submatches, but should still have the first element which refers to
101*f1c4c3daSCy Schubert          * the whole match string.
102*f1c4c3daSCy Schubert          */
103*f1c4c3daSCy Schubert 
104*f1c4c3daSCy Schubert         for (i = 0; i < nmatch; i++) {
105*f1c4c3daSCy Schubert             /*
106*f1c4c3daSCy Schubert              * If we're past the end of the match list (cm.size()) or
107*f1c4c3daSCy Schubert              * this sub-match didn't match (!cm[i].matched()) then
108*f1c4c3daSCy Schubert              * return -1 for those array members.
109*f1c4c3daSCy Schubert              */
110*f1c4c3daSCy Schubert             if (i >= cm.size() || !cm[i].matched) {
111*f1c4c3daSCy Schubert                 pmatch[i].rm_so = pmatch[i].rm_eo = -1;
112*f1c4c3daSCy Schubert             } else {
113*f1c4c3daSCy Schubert                 pmatch[i].rm_so = cm.position(i);
114*f1c4c3daSCy Schubert                 pmatch[i].rm_eo = cm.position(i) + cm.length(i);
115*f1c4c3daSCy Schubert             }
116*f1c4c3daSCy Schubert         }
117*f1c4c3daSCy Schubert     } catch (std::regex_error& e) {
118*f1c4c3daSCy Schubert         /* See above. */
119*f1c4c3daSCy Schubert         strlcpy(preg->errmsg, e.what(), sizeof(preg->errmsg));
120*f1c4c3daSCy Schubert         return REG_BADPAT;
121*f1c4c3daSCy Schubert     }
122*f1c4c3daSCy Schubert 
123*f1c4c3daSCy Schubert     return 0;
124*f1c4c3daSCy Schubert }
125*f1c4c3daSCy Schubert 
126*f1c4c3daSCy Schubert /*
127*f1c4c3daSCy Schubert  * Report back an error string.  We don't use the errcode for anything, just
128*f1c4c3daSCy Schubert  * the error string stored in regex_t.  If we don't have an error string,
129*f1c4c3daSCy Schubert  * return an "unknown error" message.
130*f1c4c3daSCy Schubert  */
131*f1c4c3daSCy Schubert extern "C" size_t
k5_regerror(int errcode,const regex_t * preg,char * errbuf,size_t errbuf_size)132*f1c4c3daSCy Schubert k5_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
133*f1c4c3daSCy Schubert {
134*f1c4c3daSCy Schubert     const char *err;
135*f1c4c3daSCy Schubert     size_t errlen;
136*f1c4c3daSCy Schubert 
137*f1c4c3daSCy Schubert     err = preg->errmsg;
138*f1c4c3daSCy Schubert     if (*err == '\0')
139*f1c4c3daSCy Schubert         err = "Unknown regular expression error";
140*f1c4c3daSCy Schubert 
141*f1c4c3daSCy Schubert     if (errbuf != NULL && errbuf_size > 0)
142*f1c4c3daSCy Schubert         strlcpy(errbuf, err, errbuf_size);
143*f1c4c3daSCy Schubert     return strlen(err);
144*f1c4c3daSCy Schubert }
145*f1c4c3daSCy Schubert 
146*f1c4c3daSCy Schubert extern "C" void
k5_regfree(regex_t * preg)147*f1c4c3daSCy Schubert k5_regfree(regex_t *preg)
148*f1c4c3daSCy Schubert {
149*f1c4c3daSCy Schubert     if (preg->regex == NULL)
150*f1c4c3daSCy Schubert         return;
151*f1c4c3daSCy Schubert     delete static_cast<std::regex *>(preg->regex);
152*f1c4c3daSCy Schubert     memset(preg, 0, sizeof(*preg));
153*f1c4c3daSCy Schubert }
154