1 /* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* regex.cpp - Glue routines to std::regex functions */
3
4 /*
5 * Copyright (C) 2024 United States Government as represented by the
6 * Secretary of the Navy.
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * * Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 *
16 * * Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in
18 * the documentation and/or other materials provided with the
19 * distribution.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
26 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
27 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
28 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
30 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
32 * OF THE POSSIBILITY OF SUCH DAMAGE.
33 */
34
35 /*
36 * These functions provide a mostly-complete POSIX regex(3)
37 * implementation using C++ std::regex. Deficiencies are noted below.
38 */
39
40 #include "k5-platform.h"
41 #include "k5-regex.h"
42
43 #include <regex>
44
45 /*
46 * Our implementation of regcomp() which calls into std::regex. We implement
47 * the standard flags, but not the non-portable extensions present on some
48 * platforms.
49 */
50 extern "C" int
k5_regcomp(regex_t * preg,const char * pattern,int cflags)51 k5_regcomp(regex_t *preg, const char *pattern, int cflags)
52 {
53 std::regex *r;
54 std::regex_constants::syntax_option_type flags;
55
56 memset(preg, 0, sizeof(*preg));
57
58 flags = (cflags & REG_EXTENDED) ? std::regex::extended : std::regex::basic;
59 if (cflags & REG_ICASE)
60 flags |= std::regex::icase;
61 if (cflags & REG_NOSUB)
62 flags |= std::regex::nosubs;
63
64 try {
65 r = new std::regex(pattern, flags);
66 preg->regex = r;
67 preg->re_nsub = r->mark_count();
68 } catch (std::regex_error& e) {
69 /* Save the error message in errmsg. We don't actually use the
70 * error code for anything; return REG_BADPAT for everything. */
71 strlcpy(preg->errmsg, e.what(), sizeof(preg->errmsg));
72 return REG_BADPAT;
73 }
74
75 return 0;
76 }
77
78 extern "C" int
k5_regexec(regex_t * preg,const char * string,size_t nmatch,regmatch_t pmatch[],int eflags)79 k5_regexec(regex_t *preg, const char *string, size_t nmatch,
80 regmatch_t pmatch[], int eflags)
81 {
82 size_t i;
83 std::cmatch cm;
84 std::regex_constants::match_flag_type flags;
85 std::regex *r = static_cast<std::regex *>(preg->regex);
86
87 flags = std::regex_constants::match_default;
88 if (eflags & REG_NOTBOL)
89 flags |= std::regex_constants::match_not_bol;
90 if (eflags & REG_NOTEOL)
91 flags |= std::regex_constants::match_not_eol;
92
93 try {
94 if (!std::regex_search(string, cm, *r, flags))
95 return REG_NOMATCH;
96
97 /*
98 * If given, fill in pmatch with the full match string and any
99 * sub-matches. If we set nosub previously we shouldn't have any
100 * submatches, but should still have the first element which refers to
101 * the whole match string.
102 */
103
104 for (i = 0; i < nmatch; i++) {
105 /*
106 * If we're past the end of the match list (cm.size()) or
107 * this sub-match didn't match (!cm[i].matched()) then
108 * return -1 for those array members.
109 */
110 if (i >= cm.size() || !cm[i].matched) {
111 pmatch[i].rm_so = pmatch[i].rm_eo = -1;
112 } else {
113 pmatch[i].rm_so = cm.position(i);
114 pmatch[i].rm_eo = cm.position(i) + cm.length(i);
115 }
116 }
117 } catch (std::regex_error& e) {
118 /* See above. */
119 strlcpy(preg->errmsg, e.what(), sizeof(preg->errmsg));
120 return REG_BADPAT;
121 }
122
123 return 0;
124 }
125
126 /*
127 * Report back an error string. We don't use the errcode for anything, just
128 * the error string stored in regex_t. If we don't have an error string,
129 * return an "unknown error" message.
130 */
131 extern "C" size_t
k5_regerror(int errcode,const regex_t * preg,char * errbuf,size_t errbuf_size)132 k5_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
133 {
134 const char *err;
135 size_t errlen;
136
137 err = preg->errmsg;
138 if (*err == '\0')
139 err = "Unknown regular expression error";
140
141 if (errbuf != NULL && errbuf_size > 0)
142 strlcpy(errbuf, err, errbuf_size);
143 return strlen(err);
144 }
145
146 extern "C" void
k5_regfree(regex_t * preg)147 k5_regfree(regex_t *preg)
148 {
149 if (preg->regex == NULL)
150 return;
151 delete static_cast<std::regex *>(preg->regex);
152 memset(preg, 0, sizeof(*preg));
153 }
154