1 //===----------------------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include <algorithm> 10 #include <iterator> 11 #include <regex> 12 13 _LIBCPP_BEGIN_NAMESPACE_STD 14 15 static const char* make_error_type_string(regex_constants::error_type ecode) { 16 switch (ecode) { 17 case regex_constants::error_collate: 18 return "The expression contained an invalid collating element name."; 19 case regex_constants::error_ctype: 20 return "The expression contained an invalid character class name."; 21 case regex_constants::error_escape: 22 return "The expression contained an invalid escaped character, or a " 23 "trailing escape."; 24 case regex_constants::error_backref: 25 return "The expression contained an invalid back reference."; 26 case regex_constants::error_brack: 27 return "The expression contained mismatched [ and ]."; 28 case regex_constants::error_paren: 29 return "The expression contained mismatched ( and )."; 30 case regex_constants::error_brace: 31 return "The expression contained mismatched { and }."; 32 case regex_constants::error_badbrace: 33 return "The expression contained an invalid range in a {} expression."; 34 case regex_constants::error_range: 35 return "The expression contained an invalid character range, " 36 "such as [b-a] in most encodings."; 37 case regex_constants::error_space: 38 return "There was insufficient memory to convert the expression into " 39 "a finite state machine."; 40 case regex_constants::error_badrepeat: 41 return "One of *?+{ was not preceded by a valid regular expression."; 42 case regex_constants::error_complexity: 43 return "The complexity of an attempted match against a regular " 44 "expression exceeded a pre-set level."; 45 case regex_constants::error_stack: 46 return "There was insufficient memory to determine whether the regular " 47 "expression could match the specified character sequence."; 48 case regex_constants::__re_err_grammar: 49 return "An invalid regex grammar has been requested."; 50 case regex_constants::__re_err_empty: 51 return "An empty regex is not allowed in the POSIX grammar."; 52 case regex_constants::__re_err_parse: 53 return "The parser did not consume the entire regular expression."; 54 default: 55 break; 56 } 57 return "Unknown error type"; 58 } 59 60 regex_error::regex_error(regex_constants::error_type ecode) 61 : runtime_error(make_error_type_string(ecode)), __code_(ecode) {} 62 63 regex_error::~regex_error() throw() {} 64 65 namespace { 66 67 struct collationnames { 68 const char* elem_; 69 char char_; 70 }; 71 72 #if defined(__MVS__) && !defined(__NATIVE_ASCII_F) 73 // EBCDIC IBM-1047 74 // Sorted via the EBCDIC collating sequence 75 const collationnames collatenames[] = { 76 {"a", 0x81}, 77 {"alert", 0x2f}, 78 {"ampersand", 0x50}, 79 {"apostrophe", 0x7d}, 80 {"asterisk", 0x5c}, 81 {"b", 0x82}, 82 {"backslash", 0xe0}, 83 {"backspace", 0x16}, 84 {"c", 0x83}, 85 {"carriage-return", 0xd}, 86 {"circumflex", 0x5f}, 87 {"circumflex-accent", 0x5f}, 88 {"colon", 0x7a}, 89 {"comma", 0x6b}, 90 {"commercial-at", 0x7c}, 91 {"d", 0x84}, 92 {"dollar-sign", 0x5b}, 93 {"e", 0x85}, 94 {"eight", 0xf8}, 95 {"equals-sign", 0x7e}, 96 {"exclamation-mark", 0x5a}, 97 {"f", 0x86}, 98 {"five", 0xf5}, 99 {"form-feed", 0xc}, 100 {"four", 0xf4}, 101 {"full-stop", 0x4b}, 102 {"g", 0x87}, 103 {"grave-accent", 0x79}, 104 {"greater-than-sign", 0x6e}, 105 {"h", 0x88}, 106 {"hyphen", 0x60}, 107 {"hyphen-minus", 0x60}, 108 {"i", 0x89}, 109 {"j", 0x91}, 110 {"k", 0x92}, 111 {"l", 0x93}, 112 {"left-brace", 0xc0}, 113 {"left-curly-bracket", 0xc0}, 114 {"left-parenthesis", 0x4d}, 115 {"left-square-bracket", 0xad}, 116 {"less-than-sign", 0x4c}, 117 {"low-line", 0x6d}, 118 {"m", 0x94}, 119 {"n", 0x95}, 120 {"newline", 0x15}, 121 {"nine", 0xf9}, 122 {"number-sign", 0x7b}, 123 {"o", 0x96}, 124 {"one", 0xf1}, 125 {"p", 0x97}, 126 {"percent-sign", 0x6c}, 127 {"period", 0x4b}, 128 {"plus-sign", 0x4e}, 129 {"q", 0x98}, 130 {"question-mark", 0x6f}, 131 {"quotation-mark", 0x7f}, 132 {"r", 0x99}, 133 {"reverse-solidus", 0xe0}, 134 {"right-brace", 0xd0}, 135 {"right-curly-bracket", 0xd0}, 136 {"right-parenthesis", 0x5d}, 137 {"right-square-bracket", 0xbd}, 138 {"s", 0xa2}, 139 {"semicolon", 0x5e}, 140 {"seven", 0xf7}, 141 {"six", 0xf6}, 142 {"slash", 0x61}, 143 {"solidus", 0x61}, 144 {"space", 0x40}, 145 {"t", 0xa3}, 146 {"tab", 0x5}, 147 {"three", 0xf3}, 148 {"tilde", 0xa1}, 149 {"two", 0xf2}, 150 {"u", 0xa4}, 151 {"underscore", 0x6d}, 152 {"v", 0xa5}, 153 {"vertical-line", 0x4f}, 154 {"vertical-tab", 0xb}, 155 {"w", 0xa6}, 156 {"x", 0xa7}, 157 {"y", 0xa8}, 158 {"z", 0xa9}, 159 {"zero", 0xf0}, 160 {"A", 0xc1}, 161 {"B", 0xc2}, 162 {"C", 0xc3}, 163 {"D", 0xc4}, 164 {"E", 0xc5}, 165 {"F", 0xc6}, 166 {"G", 0xc7}, 167 {"H", 0xc8}, 168 {"I", 0xc9}, 169 {"J", 0xd1}, 170 {"K", 0xd2}, 171 {"L", 0xd3}, 172 {"M", 0xd4}, 173 {"N", 0xd5}, 174 {"NUL", 0}, 175 {"O", 0xd6}, 176 {"P", 0xd7}, 177 {"Q", 0xd8}, 178 {"R", 0xd9}, 179 {"S", 0xe2}, 180 {"T", 0xe3}, 181 {"U", 0xe4}, 182 {"V", 0xe5}, 183 {"W", 0xe6}, 184 {"X", 0xe7}, 185 {"Y", 0xe8}, 186 {"Z", 0xe9}}; 187 #else 188 // ASCII 189 const collationnames collatenames[] = { 190 {"A", 0x41}, 191 {"B", 0x42}, 192 {"C", 0x43}, 193 {"D", 0x44}, 194 {"E", 0x45}, 195 {"F", 0x46}, 196 {"G", 0x47}, 197 {"H", 0x48}, 198 {"I", 0x49}, 199 {"J", 0x4a}, 200 {"K", 0x4b}, 201 {"L", 0x4c}, 202 {"M", 0x4d}, 203 {"N", 0x4e}, 204 {"NUL", 0x00}, 205 {"O", 0x4f}, 206 {"P", 0x50}, 207 {"Q", 0x51}, 208 {"R", 0x52}, 209 {"S", 0x53}, 210 {"T", 0x54}, 211 {"U", 0x55}, 212 {"V", 0x56}, 213 {"W", 0x57}, 214 {"X", 0x58}, 215 {"Y", 0x59}, 216 {"Z", 0x5a}, 217 {"a", 0x61}, 218 {"alert", 0x07}, 219 {"ampersand", 0x26}, 220 {"apostrophe", 0x27}, 221 {"asterisk", 0x2a}, 222 {"b", 0x62}, 223 {"backslash", 0x5c}, 224 {"backspace", 0x08}, 225 {"c", 0x63}, 226 {"carriage-return", 0x0d}, 227 {"circumflex", 0x5e}, 228 {"circumflex-accent", 0x5e}, 229 {"colon", 0x3a}, 230 {"comma", 0x2c}, 231 {"commercial-at", 0x40}, 232 {"d", 0x64}, 233 {"dollar-sign", 0x24}, 234 {"e", 0x65}, 235 {"eight", 0x38}, 236 {"equals-sign", 0x3d}, 237 {"exclamation-mark", 0x21}, 238 {"f", 0x66}, 239 {"five", 0x35}, 240 {"form-feed", 0x0c}, 241 {"four", 0x34}, 242 {"full-stop", 0x2e}, 243 {"g", 0x67}, 244 {"grave-accent", 0x60}, 245 {"greater-than-sign", 0x3e}, 246 {"h", 0x68}, 247 {"hyphen", 0x2d}, 248 {"hyphen-minus", 0x2d}, 249 {"i", 0x69}, 250 {"j", 0x6a}, 251 {"k", 0x6b}, 252 {"l", 0x6c}, 253 {"left-brace", 0x7b}, 254 {"left-curly-bracket", 0x7b}, 255 {"left-parenthesis", 0x28}, 256 {"left-square-bracket", 0x5b}, 257 {"less-than-sign", 0x3c}, 258 {"low-line", 0x5f}, 259 {"m", 0x6d}, 260 {"n", 0x6e}, 261 {"newline", 0x0a}, 262 {"nine", 0x39}, 263 {"number-sign", 0x23}, 264 {"o", 0x6f}, 265 {"one", 0x31}, 266 {"p", 0x70}, 267 {"percent-sign", 0x25}, 268 {"period", 0x2e}, 269 {"plus-sign", 0x2b}, 270 {"q", 0x71}, 271 {"question-mark", 0x3f}, 272 {"quotation-mark", 0x22}, 273 {"r", 0x72}, 274 {"reverse-solidus", 0x5c}, 275 {"right-brace", 0x7d}, 276 {"right-curly-bracket", 0x7d}, 277 {"right-parenthesis", 0x29}, 278 {"right-square-bracket", 0x5d}, 279 {"s", 0x73}, 280 {"semicolon", 0x3b}, 281 {"seven", 0x37}, 282 {"six", 0x36}, 283 {"slash", 0x2f}, 284 {"solidus", 0x2f}, 285 {"space", 0x20}, 286 {"t", 0x74}, 287 {"tab", 0x09}, 288 {"three", 0x33}, 289 {"tilde", 0x7e}, 290 {"two", 0x32}, 291 {"u", 0x75}, 292 {"underscore", 0x5f}, 293 {"v", 0x76}, 294 {"vertical-line", 0x7c}, 295 {"vertical-tab", 0x0b}, 296 {"w", 0x77}, 297 {"x", 0x78}, 298 {"y", 0x79}, 299 {"z", 0x7a}, 300 {"zero", 0x30}}; 301 #endif 302 303 struct classnames { 304 const char* elem_; 305 regex_traits<char>::char_class_type mask_; 306 }; 307 308 const classnames ClassNames[] = { 309 {"alnum", ctype_base::alnum}, 310 {"alpha", ctype_base::alpha}, 311 {"blank", ctype_base::blank}, 312 {"cntrl", ctype_base::cntrl}, 313 {"d", ctype_base::digit}, 314 {"digit", ctype_base::digit}, 315 {"graph", ctype_base::graph}, 316 {"lower", ctype_base::lower}, 317 {"print", ctype_base::print}, 318 {"punct", ctype_base::punct}, 319 {"s", ctype_base::space}, 320 {"space", ctype_base::space}, 321 {"upper", ctype_base::upper}, 322 {"w", regex_traits<char>::__regex_word}, 323 {"xdigit", ctype_base::xdigit}}; 324 325 struct use_strcmp { 326 bool operator()(const collationnames& x, const char* y) { return strcmp(x.elem_, y) < 0; } 327 bool operator()(const classnames& x, const char* y) { return strcmp(x.elem_, y) < 0; } 328 }; 329 330 } // namespace 331 332 string __get_collation_name(const char* s) { 333 const collationnames* i = std::lower_bound(begin(collatenames), end(collatenames), s, use_strcmp()); 334 string r; 335 if (i != end(collatenames) && strcmp(s, i->elem_) == 0) 336 r = char(i->char_); 337 return r; 338 } 339 340 regex_traits<char>::char_class_type __get_classname(const char* s, bool __icase) { 341 const classnames* i = std::lower_bound(begin(ClassNames), end(ClassNames), s, use_strcmp()); 342 regex_traits<char>::char_class_type r = 0; 343 if (i != end(ClassNames) && strcmp(s, i->elem_) == 0) { 344 r = i->mask_; 345 if (r == regex_traits<char>::__regex_word) 346 r |= ctype_base::alnum | ctype_base::upper | ctype_base::lower; 347 else if (__icase) { 348 if (r & (ctype_base::lower | ctype_base::upper)) 349 r |= ctype_base::alpha; 350 } 351 } 352 return r; 353 } 354 355 template <> 356 void __match_any_but_newline<char>::__exec(__state& __s) const { 357 if (__s.__current_ != __s.__last_) { 358 switch (*__s.__current_) { 359 case '\r': 360 case '\n': 361 __s.__do_ = __state::__reject; 362 __s.__node_ = nullptr; 363 break; 364 default: 365 __s.__do_ = __state::__accept_and_consume; 366 ++__s.__current_; 367 __s.__node_ = this->first(); 368 break; 369 } 370 } else { 371 __s.__do_ = __state::__reject; 372 __s.__node_ = nullptr; 373 } 374 } 375 376 template <> 377 void __match_any_but_newline<wchar_t>::__exec(__state& __s) const { 378 if (__s.__current_ != __s.__last_) { 379 switch (*__s.__current_) { 380 case '\r': 381 case '\n': 382 case 0x2028: 383 case 0x2029: 384 __s.__do_ = __state::__reject; 385 __s.__node_ = nullptr; 386 break; 387 default: 388 __s.__do_ = __state::__accept_and_consume; 389 ++__s.__current_; 390 __s.__node_ = this->first(); 391 break; 392 } 393 } else { 394 __s.__do_ = __state::__reject; 395 __s.__node_ = nullptr; 396 } 397 } 398 399 _LIBCPP_END_NAMESPACE_STD 400