1 /* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil -*- */ 2 /* regex.cpp - Glue routines to std::regex functions */ 3 4 /* 5 * Copyright (C) 2024 United States Government as represented by the 6 * Secretary of the Navy. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * * Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 16 * * Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in 18 * the documentation and/or other materials provided with the 19 * distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 26 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 27 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 28 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 30 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 32 * OF THE POSSIBILITY OF SUCH DAMAGE. 33 */ 34 35 /* 36 * These functions provide a mostly-complete POSIX regex(3) 37 * implementation using C++ std::regex. Deficiencies are noted below. 38 */ 39 40 #include "k5-platform.h" 41 #include "k5-regex.h" 42 43 #include <regex> 44 45 /* 46 * Our implementation of regcomp() which calls into std::regex. We implement 47 * the standard flags, but not the non-portable extensions present on some 48 * platforms. 49 */ 50 extern "C" int 51 k5_regcomp(regex_t *preg, const char *pattern, int cflags) 52 { 53 std::regex *r; 54 std::regex_constants::syntax_option_type flags; 55 56 memset(preg, 0, sizeof(*preg)); 57 58 flags = (cflags & REG_EXTENDED) ? std::regex::extended : std::regex::basic; 59 if (cflags & REG_ICASE) 60 flags |= std::regex::icase; 61 if (cflags & REG_NOSUB) 62 flags |= std::regex::nosubs; 63 64 try { 65 r = new std::regex(pattern, flags); 66 preg->regex = r; 67 preg->re_nsub = r->mark_count(); 68 } catch (std::regex_error& e) { 69 /* Save the error message in errmsg. We don't actually use the 70 * error code for anything; return REG_BADPAT for everything. */ 71 strlcpy(preg->errmsg, e.what(), sizeof(preg->errmsg)); 72 return REG_BADPAT; 73 } 74 75 return 0; 76 } 77 78 extern "C" int 79 k5_regexec(regex_t *preg, const char *string, size_t nmatch, 80 regmatch_t pmatch[], int eflags) 81 { 82 size_t i; 83 std::cmatch cm; 84 std::regex_constants::match_flag_type flags; 85 std::regex *r = static_cast<std::regex *>(preg->regex); 86 87 flags = std::regex_constants::match_default; 88 if (eflags & REG_NOTBOL) 89 flags |= std::regex_constants::match_not_bol; 90 if (eflags & REG_NOTEOL) 91 flags |= std::regex_constants::match_not_eol; 92 93 try { 94 if (!std::regex_search(string, cm, *r, flags)) 95 return REG_NOMATCH; 96 97 /* 98 * If given, fill in pmatch with the full match string and any 99 * sub-matches. If we set nosub previously we shouldn't have any 100 * submatches, but should still have the first element which refers to 101 * the whole match string. 102 */ 103 104 for (i = 0; i < nmatch; i++) { 105 /* 106 * If we're past the end of the match list (cm.size()) or 107 * this sub-match didn't match (!cm[i].matched()) then 108 * return -1 for those array members. 109 */ 110 if (i >= cm.size() || !cm[i].matched) { 111 pmatch[i].rm_so = pmatch[i].rm_eo = -1; 112 } else { 113 pmatch[i].rm_so = cm.position(i); 114 pmatch[i].rm_eo = cm.position(i) + cm.length(i); 115 } 116 } 117 } catch (std::regex_error& e) { 118 /* See above. */ 119 strlcpy(preg->errmsg, e.what(), sizeof(preg->errmsg)); 120 return REG_BADPAT; 121 } 122 123 return 0; 124 } 125 126 /* 127 * Report back an error string. We don't use the errcode for anything, just 128 * the error string stored in regex_t. If we don't have an error string, 129 * return an "unknown error" message. 130 */ 131 extern "C" size_t 132 k5_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size) 133 { 134 const char *err; 135 size_t errlen; 136 137 err = preg->errmsg; 138 if (*err == '\0') 139 err = "Unknown regular expression error"; 140 141 if (errbuf != NULL && errbuf_size > 0) 142 strlcpy(errbuf, err, errbuf_size); 143 return strlen(err); 144 } 145 146 extern "C" void 147 k5_regfree(regex_t *preg) 148 { 149 if (preg->regex == NULL) 150 return; 151 delete static_cast<std::regex *>(preg->regex); 152 memset(preg, 0, sizeof(*preg)); 153 } 154