xref: /freebsd/crypto/krb5/src/util/support/regex.cpp (revision f1c4c3daccbaf3820f0e2224de53df12fc952fcc)
1 /* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* regex.cpp - Glue routines to std::regex functions */
3 
4 /*
5  * Copyright (C) 2024 United States Government as represented by the
6  * Secretary of the Navy.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * * Redistributions of source code must retain the above copyright
14  *   notice, this list of conditions and the following disclaimer.
15  *
16  * * Redistributions in binary form must reproduce the above copyright
17  *   notice, this list of conditions and the following disclaimer in
18  *   the documentation and/or other materials provided with the
19  *   distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
26  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
27  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
28  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
30  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
32  * OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 /*
36  * These functions provide a mostly-complete POSIX regex(3)
37  * implementation using C++ std::regex.  Deficiencies are noted below.
38  */
39 
40 #include "k5-platform.h"
41 #include "k5-regex.h"
42 
43 #include <regex>
44 
45 /*
46  * Our implementation of regcomp() which calls into std::regex.  We implement
47  * the standard flags, but not the non-portable extensions present on some
48  * platforms.
49  */
50 extern "C" int
k5_regcomp(regex_t * preg,const char * pattern,int cflags)51 k5_regcomp(regex_t *preg, const char *pattern, int cflags)
52 {
53     std::regex *r;
54     std::regex_constants::syntax_option_type flags;
55 
56     memset(preg, 0, sizeof(*preg));
57 
58     flags = (cflags & REG_EXTENDED) ? std::regex::extended : std::regex::basic;
59     if (cflags & REG_ICASE)
60         flags |= std::regex::icase;
61     if (cflags & REG_NOSUB)
62         flags |= std::regex::nosubs;
63 
64     try {
65         r = new std::regex(pattern, flags);
66         preg->regex = r;
67         preg->re_nsub = r->mark_count();
68     } catch (std::regex_error& e) {
69         /* Save the error message in errmsg.  We don't actually use the
70          * error code for anything; return REG_BADPAT for everything. */
71         strlcpy(preg->errmsg, e.what(), sizeof(preg->errmsg));
72         return REG_BADPAT;
73     }
74 
75     return 0;
76 }
77 
78 extern "C" int
k5_regexec(regex_t * preg,const char * string,size_t nmatch,regmatch_t pmatch[],int eflags)79 k5_regexec(regex_t *preg, const char *string, size_t nmatch,
80            regmatch_t pmatch[], int eflags)
81 {
82     size_t i;
83     std::cmatch cm;
84     std::regex_constants::match_flag_type flags;
85     std::regex *r = static_cast<std::regex *>(preg->regex);
86 
87     flags = std::regex_constants::match_default;
88     if (eflags & REG_NOTBOL)
89         flags |= std::regex_constants::match_not_bol;
90     if (eflags & REG_NOTEOL)
91         flags |= std::regex_constants::match_not_eol;
92 
93     try {
94         if (!std::regex_search(string, cm, *r, flags))
95             return REG_NOMATCH;
96 
97         /*
98          * If given, fill in pmatch with the full match string and any
99          * sub-matches.  If we set nosub previously we shouldn't have any
100          * submatches, but should still have the first element which refers to
101          * the whole match string.
102          */
103 
104         for (i = 0; i < nmatch; i++) {
105             /*
106              * If we're past the end of the match list (cm.size()) or
107              * this sub-match didn't match (!cm[i].matched()) then
108              * return -1 for those array members.
109              */
110             if (i >= cm.size() || !cm[i].matched) {
111                 pmatch[i].rm_so = pmatch[i].rm_eo = -1;
112             } else {
113                 pmatch[i].rm_so = cm.position(i);
114                 pmatch[i].rm_eo = cm.position(i) + cm.length(i);
115             }
116         }
117     } catch (std::regex_error& e) {
118         /* See above. */
119         strlcpy(preg->errmsg, e.what(), sizeof(preg->errmsg));
120         return REG_BADPAT;
121     }
122 
123     return 0;
124 }
125 
126 /*
127  * Report back an error string.  We don't use the errcode for anything, just
128  * the error string stored in regex_t.  If we don't have an error string,
129  * return an "unknown error" message.
130  */
131 extern "C" size_t
k5_regerror(int errcode,const regex_t * preg,char * errbuf,size_t errbuf_size)132 k5_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
133 {
134     const char *err;
135     size_t errlen;
136 
137     err = preg->errmsg;
138     if (*err == '\0')
139         err = "Unknown regular expression error";
140 
141     if (errbuf != NULL && errbuf_size > 0)
142         strlcpy(errbuf, err, errbuf_size);
143     return strlen(err);
144 }
145 
146 extern "C" void
k5_regfree(regex_t * preg)147 k5_regfree(regex_t *preg)
148 {
149     if (preg->regex == NULL)
150         return;
151     delete static_cast<std::regex *>(preg->regex);
152     memset(preg, 0, sizeof(*preg));
153 }
154