1 //===-------------------------- regex.cpp ---------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "regex" 10 #include "algorithm" 11 #include "iterator" 12 13 _LIBCPP_BEGIN_NAMESPACE_STD 14 15 static 16 const char* 17 make_error_type_string(regex_constants::error_type ecode) 18 { 19 switch (ecode) 20 { 21 case regex_constants::error_collate: 22 return "The expression contained an invalid collating element name."; 23 case regex_constants::error_ctype: 24 return "The expression contained an invalid character class name."; 25 case regex_constants::error_escape: 26 return "The expression contained an invalid escaped character, or a " 27 "trailing escape."; 28 case regex_constants::error_backref: 29 return "The expression contained an invalid back reference."; 30 case regex_constants::error_brack: 31 return "The expression contained mismatched [ and ]."; 32 case regex_constants::error_paren: 33 return "The expression contained mismatched ( and )."; 34 case regex_constants::error_brace: 35 return "The expression contained mismatched { and }."; 36 case regex_constants::error_badbrace: 37 return "The expression contained an invalid range in a {} expression."; 38 case regex_constants::error_range: 39 return "The expression contained an invalid character range, " 40 "such as [b-a] in most encodings."; 41 case regex_constants::error_space: 42 return "There was insufficient memory to convert the expression into " 43 "a finite state machine."; 44 case regex_constants::error_badrepeat: 45 return "One of *?+{ was not preceded by a valid regular expression."; 46 case regex_constants::error_complexity: 47 return "The complexity of an attempted match against a regular " 48 "expression exceeded a pre-set level."; 49 case regex_constants::error_stack: 50 return "There was insufficient memory to determine whether the regular " 51 "expression could match the specified character sequence."; 52 case regex_constants::__re_err_grammar: 53 return "An invalid regex grammar has been requested."; 54 case regex_constants::__re_err_empty: 55 return "An empty regex is not allowed in the POSIX grammar."; 56 default: 57 break; 58 } 59 return "Unknown error type"; 60 } 61 62 regex_error::regex_error(regex_constants::error_type ecode) 63 : runtime_error(make_error_type_string(ecode)), 64 __code_(ecode) 65 {} 66 67 regex_error::~regex_error() throw() {} 68 69 namespace { 70 71 struct collationnames 72 { 73 const char* elem_; 74 char char_; 75 }; 76 77 const collationnames collatenames[] = 78 { 79 {"A", 0x41}, 80 {"B", 0x42}, 81 {"C", 0x43}, 82 {"D", 0x44}, 83 {"E", 0x45}, 84 {"F", 0x46}, 85 {"G", 0x47}, 86 {"H", 0x48}, 87 {"I", 0x49}, 88 {"J", 0x4a}, 89 {"K", 0x4b}, 90 {"L", 0x4c}, 91 {"M", 0x4d}, 92 {"N", 0x4e}, 93 {"NUL", 0x00}, 94 {"O", 0x4f}, 95 {"P", 0x50}, 96 {"Q", 0x51}, 97 {"R", 0x52}, 98 {"S", 0x53}, 99 {"T", 0x54}, 100 {"U", 0x55}, 101 {"V", 0x56}, 102 {"W", 0x57}, 103 {"X", 0x58}, 104 {"Y", 0x59}, 105 {"Z", 0x5a}, 106 {"a", 0x61}, 107 {"alert", 0x07}, 108 {"ampersand", 0x26}, 109 {"apostrophe", 0x27}, 110 {"asterisk", 0x2a}, 111 {"b", 0x62}, 112 {"backslash", 0x5c}, 113 {"backspace", 0x08}, 114 {"c", 0x63}, 115 {"carriage-return", 0x0d}, 116 {"circumflex", 0x5e}, 117 {"circumflex-accent", 0x5e}, 118 {"colon", 0x3a}, 119 {"comma", 0x2c}, 120 {"commercial-at", 0x40}, 121 {"d", 0x64}, 122 {"dollar-sign", 0x24}, 123 {"e", 0x65}, 124 {"eight", 0x38}, 125 {"equals-sign", 0x3d}, 126 {"exclamation-mark", 0x21}, 127 {"f", 0x66}, 128 {"five", 0x35}, 129 {"form-feed", 0x0c}, 130 {"four", 0x34}, 131 {"full-stop", 0x2e}, 132 {"g", 0x67}, 133 {"grave-accent", 0x60}, 134 {"greater-than-sign", 0x3e}, 135 {"h", 0x68}, 136 {"hyphen", 0x2d}, 137 {"hyphen-minus", 0x2d}, 138 {"i", 0x69}, 139 {"j", 0x6a}, 140 {"k", 0x6b}, 141 {"l", 0x6c}, 142 {"left-brace", 0x7b}, 143 {"left-curly-bracket", 0x7b}, 144 {"left-parenthesis", 0x28}, 145 {"left-square-bracket", 0x5b}, 146 {"less-than-sign", 0x3c}, 147 {"low-line", 0x5f}, 148 {"m", 0x6d}, 149 {"n", 0x6e}, 150 {"newline", 0x0a}, 151 {"nine", 0x39}, 152 {"number-sign", 0x23}, 153 {"o", 0x6f}, 154 {"one", 0x31}, 155 {"p", 0x70}, 156 {"percent-sign", 0x25}, 157 {"period", 0x2e}, 158 {"plus-sign", 0x2b}, 159 {"q", 0x71}, 160 {"question-mark", 0x3f}, 161 {"quotation-mark", 0x22}, 162 {"r", 0x72}, 163 {"reverse-solidus", 0x5c}, 164 {"right-brace", 0x7d}, 165 {"right-curly-bracket", 0x7d}, 166 {"right-parenthesis", 0x29}, 167 {"right-square-bracket", 0x5d}, 168 {"s", 0x73}, 169 {"semicolon", 0x3b}, 170 {"seven", 0x37}, 171 {"six", 0x36}, 172 {"slash", 0x2f}, 173 {"solidus", 0x2f}, 174 {"space", 0x20}, 175 {"t", 0x74}, 176 {"tab", 0x09}, 177 {"three", 0x33}, 178 {"tilde", 0x7e}, 179 {"two", 0x32}, 180 {"u", 0x75}, 181 {"underscore", 0x5f}, 182 {"v", 0x76}, 183 {"vertical-line", 0x7c}, 184 {"vertical-tab", 0x0b}, 185 {"w", 0x77}, 186 {"x", 0x78}, 187 {"y", 0x79}, 188 {"z", 0x7a}, 189 {"zero", 0x30} 190 }; 191 192 struct classnames 193 { 194 const char* elem_; 195 regex_traits<char>::char_class_type mask_; 196 }; 197 198 const classnames ClassNames[] = 199 { 200 {"alnum", ctype_base::alnum}, 201 {"alpha", ctype_base::alpha}, 202 {"blank", ctype_base::blank}, 203 {"cntrl", ctype_base::cntrl}, 204 {"d", ctype_base::digit}, 205 {"digit", ctype_base::digit}, 206 {"graph", ctype_base::graph}, 207 {"lower", ctype_base::lower}, 208 {"print", ctype_base::print}, 209 {"punct", ctype_base::punct}, 210 {"s", ctype_base::space}, 211 {"space", ctype_base::space}, 212 {"upper", ctype_base::upper}, 213 {"w", regex_traits<char>::__regex_word}, 214 {"xdigit", ctype_base::xdigit} 215 }; 216 217 struct use_strcmp 218 { 219 bool operator()(const collationnames& x, const char* y) 220 {return strcmp(x.elem_, y) < 0;} 221 bool operator()(const classnames& x, const char* y) 222 {return strcmp(x.elem_, y) < 0;} 223 }; 224 225 } 226 227 string 228 __get_collation_name(const char* s) 229 { 230 const collationnames* i = 231 _VSTD::lower_bound(begin(collatenames), end(collatenames), s, use_strcmp()); 232 string r; 233 if (i != end(collatenames) && strcmp(s, i->elem_) == 0) 234 r = char(i->char_); 235 return r; 236 } 237 238 regex_traits<char>::char_class_type 239 __get_classname(const char* s, bool __icase) 240 { 241 const classnames* i = 242 _VSTD::lower_bound(begin(ClassNames), end(ClassNames), s, use_strcmp()); 243 regex_traits<char>::char_class_type r = 0; 244 if (i != end(ClassNames) && strcmp(s, i->elem_) == 0) 245 { 246 r = i->mask_; 247 if (r == regex_traits<char>::__regex_word) 248 r |= ctype_base::alnum | ctype_base::upper | ctype_base::lower; 249 else if (__icase) 250 { 251 if (r & (ctype_base::lower | ctype_base::upper)) 252 r |= ctype_base::alpha; 253 } 254 } 255 return r; 256 } 257 258 template <> 259 void 260 __match_any_but_newline<char>::__exec(__state& __s) const 261 { 262 if (__s.__current_ != __s.__last_) 263 { 264 switch (*__s.__current_) 265 { 266 case '\r': 267 case '\n': 268 __s.__do_ = __state::__reject; 269 __s.__node_ = nullptr; 270 break; 271 default: 272 __s.__do_ = __state::__accept_and_consume; 273 ++__s.__current_; 274 __s.__node_ = this->first(); 275 break; 276 } 277 } 278 else 279 { 280 __s.__do_ = __state::__reject; 281 __s.__node_ = nullptr; 282 } 283 } 284 285 template <> 286 void 287 __match_any_but_newline<wchar_t>::__exec(__state& __s) const 288 { 289 if (__s.__current_ != __s.__last_) 290 { 291 switch (*__s.__current_) 292 { 293 case '\r': 294 case '\n': 295 case 0x2028: 296 case 0x2029: 297 __s.__do_ = __state::__reject; 298 __s.__node_ = nullptr; 299 break; 300 default: 301 __s.__do_ = __state::__accept_and_consume; 302 ++__s.__current_; 303 __s.__node_ = this->first(); 304 break; 305 } 306 } 307 else 308 { 309 __s.__do_ = __state::__reject; 310 __s.__node_ = nullptr; 311 } 312 } 313 314 _LIBCPP_END_NAMESPACE_STD 315