1*f1c4c3daSCy Schubert /* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2*f1c4c3daSCy Schubert /* regex.cpp - Glue routines to std::regex functions */
3*f1c4c3daSCy Schubert
4*f1c4c3daSCy Schubert /*
5*f1c4c3daSCy Schubert * Copyright (C) 2024 United States Government as represented by the
6*f1c4c3daSCy Schubert * Secretary of the Navy.
7*f1c4c3daSCy Schubert * All rights reserved.
8*f1c4c3daSCy Schubert *
9*f1c4c3daSCy Schubert * Redistribution and use in source and binary forms, with or without
10*f1c4c3daSCy Schubert * modification, are permitted provided that the following conditions
11*f1c4c3daSCy Schubert * are met:
12*f1c4c3daSCy Schubert *
13*f1c4c3daSCy Schubert * * Redistributions of source code must retain the above copyright
14*f1c4c3daSCy Schubert * notice, this list of conditions and the following disclaimer.
15*f1c4c3daSCy Schubert *
16*f1c4c3daSCy Schubert * * Redistributions in binary form must reproduce the above copyright
17*f1c4c3daSCy Schubert * notice, this list of conditions and the following disclaimer in
18*f1c4c3daSCy Schubert * the documentation and/or other materials provided with the
19*f1c4c3daSCy Schubert * distribution.
20*f1c4c3daSCy Schubert *
21*f1c4c3daSCy Schubert * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22*f1c4c3daSCy Schubert * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23*f1c4c3daSCy Schubert * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24*f1c4c3daSCy Schubert * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25*f1c4c3daSCy Schubert * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
26*f1c4c3daSCy Schubert * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
27*f1c4c3daSCy Schubert * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
28*f1c4c3daSCy Schubert * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29*f1c4c3daSCy Schubert * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
30*f1c4c3daSCy Schubert * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31*f1c4c3daSCy Schubert * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
32*f1c4c3daSCy Schubert * OF THE POSSIBILITY OF SUCH DAMAGE.
33*f1c4c3daSCy Schubert */
34*f1c4c3daSCy Schubert
35*f1c4c3daSCy Schubert /*
36*f1c4c3daSCy Schubert * These functions provide a mostly-complete POSIX regex(3)
37*f1c4c3daSCy Schubert * implementation using C++ std::regex. Deficiencies are noted below.
38*f1c4c3daSCy Schubert */
39*f1c4c3daSCy Schubert
40*f1c4c3daSCy Schubert #include "k5-platform.h"
41*f1c4c3daSCy Schubert #include "k5-regex.h"
42*f1c4c3daSCy Schubert
43*f1c4c3daSCy Schubert #include <regex>
44*f1c4c3daSCy Schubert
45*f1c4c3daSCy Schubert /*
46*f1c4c3daSCy Schubert * Our implementation of regcomp() which calls into std::regex. We implement
47*f1c4c3daSCy Schubert * the standard flags, but not the non-portable extensions present on some
48*f1c4c3daSCy Schubert * platforms.
49*f1c4c3daSCy Schubert */
50*f1c4c3daSCy Schubert extern "C" int
k5_regcomp(regex_t * preg,const char * pattern,int cflags)51*f1c4c3daSCy Schubert k5_regcomp(regex_t *preg, const char *pattern, int cflags)
52*f1c4c3daSCy Schubert {
53*f1c4c3daSCy Schubert std::regex *r;
54*f1c4c3daSCy Schubert std::regex_constants::syntax_option_type flags;
55*f1c4c3daSCy Schubert
56*f1c4c3daSCy Schubert memset(preg, 0, sizeof(*preg));
57*f1c4c3daSCy Schubert
58*f1c4c3daSCy Schubert flags = (cflags & REG_EXTENDED) ? std::regex::extended : std::regex::basic;
59*f1c4c3daSCy Schubert if (cflags & REG_ICASE)
60*f1c4c3daSCy Schubert flags |= std::regex::icase;
61*f1c4c3daSCy Schubert if (cflags & REG_NOSUB)
62*f1c4c3daSCy Schubert flags |= std::regex::nosubs;
63*f1c4c3daSCy Schubert
64*f1c4c3daSCy Schubert try {
65*f1c4c3daSCy Schubert r = new std::regex(pattern, flags);
66*f1c4c3daSCy Schubert preg->regex = r;
67*f1c4c3daSCy Schubert preg->re_nsub = r->mark_count();
68*f1c4c3daSCy Schubert } catch (std::regex_error& e) {
69*f1c4c3daSCy Schubert /* Save the error message in errmsg. We don't actually use the
70*f1c4c3daSCy Schubert * error code for anything; return REG_BADPAT for everything. */
71*f1c4c3daSCy Schubert strlcpy(preg->errmsg, e.what(), sizeof(preg->errmsg));
72*f1c4c3daSCy Schubert return REG_BADPAT;
73*f1c4c3daSCy Schubert }
74*f1c4c3daSCy Schubert
75*f1c4c3daSCy Schubert return 0;
76*f1c4c3daSCy Schubert }
77*f1c4c3daSCy Schubert
78*f1c4c3daSCy Schubert extern "C" int
k5_regexec(regex_t * preg,const char * string,size_t nmatch,regmatch_t pmatch[],int eflags)79*f1c4c3daSCy Schubert k5_regexec(regex_t *preg, const char *string, size_t nmatch,
80*f1c4c3daSCy Schubert regmatch_t pmatch[], int eflags)
81*f1c4c3daSCy Schubert {
82*f1c4c3daSCy Schubert size_t i;
83*f1c4c3daSCy Schubert std::cmatch cm;
84*f1c4c3daSCy Schubert std::regex_constants::match_flag_type flags;
85*f1c4c3daSCy Schubert std::regex *r = static_cast<std::regex *>(preg->regex);
86*f1c4c3daSCy Schubert
87*f1c4c3daSCy Schubert flags = std::regex_constants::match_default;
88*f1c4c3daSCy Schubert if (eflags & REG_NOTBOL)
89*f1c4c3daSCy Schubert flags |= std::regex_constants::match_not_bol;
90*f1c4c3daSCy Schubert if (eflags & REG_NOTEOL)
91*f1c4c3daSCy Schubert flags |= std::regex_constants::match_not_eol;
92*f1c4c3daSCy Schubert
93*f1c4c3daSCy Schubert try {
94*f1c4c3daSCy Schubert if (!std::regex_search(string, cm, *r, flags))
95*f1c4c3daSCy Schubert return REG_NOMATCH;
96*f1c4c3daSCy Schubert
97*f1c4c3daSCy Schubert /*
98*f1c4c3daSCy Schubert * If given, fill in pmatch with the full match string and any
99*f1c4c3daSCy Schubert * sub-matches. If we set nosub previously we shouldn't have any
100*f1c4c3daSCy Schubert * submatches, but should still have the first element which refers to
101*f1c4c3daSCy Schubert * the whole match string.
102*f1c4c3daSCy Schubert */
103*f1c4c3daSCy Schubert
104*f1c4c3daSCy Schubert for (i = 0; i < nmatch; i++) {
105*f1c4c3daSCy Schubert /*
106*f1c4c3daSCy Schubert * If we're past the end of the match list (cm.size()) or
107*f1c4c3daSCy Schubert * this sub-match didn't match (!cm[i].matched()) then
108*f1c4c3daSCy Schubert * return -1 for those array members.
109*f1c4c3daSCy Schubert */
110*f1c4c3daSCy Schubert if (i >= cm.size() || !cm[i].matched) {
111*f1c4c3daSCy Schubert pmatch[i].rm_so = pmatch[i].rm_eo = -1;
112*f1c4c3daSCy Schubert } else {
113*f1c4c3daSCy Schubert pmatch[i].rm_so = cm.position(i);
114*f1c4c3daSCy Schubert pmatch[i].rm_eo = cm.position(i) + cm.length(i);
115*f1c4c3daSCy Schubert }
116*f1c4c3daSCy Schubert }
117*f1c4c3daSCy Schubert } catch (std::regex_error& e) {
118*f1c4c3daSCy Schubert /* See above. */
119*f1c4c3daSCy Schubert strlcpy(preg->errmsg, e.what(), sizeof(preg->errmsg));
120*f1c4c3daSCy Schubert return REG_BADPAT;
121*f1c4c3daSCy Schubert }
122*f1c4c3daSCy Schubert
123*f1c4c3daSCy Schubert return 0;
124*f1c4c3daSCy Schubert }
125*f1c4c3daSCy Schubert
126*f1c4c3daSCy Schubert /*
127*f1c4c3daSCy Schubert * Report back an error string. We don't use the errcode for anything, just
128*f1c4c3daSCy Schubert * the error string stored in regex_t. If we don't have an error string,
129*f1c4c3daSCy Schubert * return an "unknown error" message.
130*f1c4c3daSCy Schubert */
131*f1c4c3daSCy Schubert extern "C" size_t
k5_regerror(int errcode,const regex_t * preg,char * errbuf,size_t errbuf_size)132*f1c4c3daSCy Schubert k5_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
133*f1c4c3daSCy Schubert {
134*f1c4c3daSCy Schubert const char *err;
135*f1c4c3daSCy Schubert size_t errlen;
136*f1c4c3daSCy Schubert
137*f1c4c3daSCy Schubert err = preg->errmsg;
138*f1c4c3daSCy Schubert if (*err == '\0')
139*f1c4c3daSCy Schubert err = "Unknown regular expression error";
140*f1c4c3daSCy Schubert
141*f1c4c3daSCy Schubert if (errbuf != NULL && errbuf_size > 0)
142*f1c4c3daSCy Schubert strlcpy(errbuf, err, errbuf_size);
143*f1c4c3daSCy Schubert return strlen(err);
144*f1c4c3daSCy Schubert }
145*f1c4c3daSCy Schubert
146*f1c4c3daSCy Schubert extern "C" void
k5_regfree(regex_t * preg)147*f1c4c3daSCy Schubert k5_regfree(regex_t *preg)
148*f1c4c3daSCy Schubert {
149*f1c4c3daSCy Schubert if (preg->regex == NULL)
150*f1c4c3daSCy Schubert return;
151*f1c4c3daSCy Schubert delete static_cast<std::regex *>(preg->regex);
152*f1c4c3daSCy Schubert memset(preg, 0, sizeof(*preg));
153*f1c4c3daSCy Schubert }
154