1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 /* 31 * IMPORTANT NOTE: 32 * 33 * regex() WORKS **ONLY** WITH THE ASCII AND THE Solaris EUC CHARACTER SETS. 34 * IT IS **NOT** CHARACTER SET INDEPENDENT. 35 * 36 */ 37 38 #pragma weak _regex = regex 39 40 #include "lint.h" 41 /* CONSTANTS SHARED WITH regcmp() */ 42 #include "regex.h" 43 #include "mtlib.h" 44 #include <limits.h> 45 #include <stdarg.h> 46 #include <stdlib.h> 47 #include <thread.h> 48 #include <widec.h> 49 #include "tsd.h" 50 51 52 /* PRIVATE CONSTANTS */ 53 54 #define ADD_256_TO_GROUP_LENGTH 0x1 55 #define ADD_512_TO_GROUP_LENGTH 0x2 56 #define ADD_768_TO_GROUP_LENGTH 0x3 57 #define ADDED_LENGTH_BITS 0x3 58 #define SINGLE_BYTE_MASK 0xff 59 #define STRINGP_STACK_SIZE 50 60 61 62 /* PRIVATE TYPE DEFINITIONS */ 63 64 typedef enum { 65 NOT_IN_CLASS = 0, 66 IN_CLASS 67 } char_test_condition_t; 68 69 typedef enum { 70 TESTING_CHAR = 0, 71 CONDITION_TRUE, 72 CONDITION_FALSE, 73 CHAR_TEST_ERROR 74 } char_test_result_t; 75 76 77 /* PRIVATE GLOBAL VARIABLES */ 78 79 static mutex_t regex_lock = DEFAULTMUTEX; 80 static int return_arg_number[NSUBSTRINGS]; 81 static const char *substring_endp[NSUBSTRINGS]; 82 static const char *substring_startp[NSUBSTRINGS]; 83 static const char *stringp_stack[STRINGP_STACK_SIZE]; 84 static const char **stringp_stackp; 85 86 87 /* DECLARATIONS OF PRIVATE FUNCTIONS */ 88 89 static int 90 get_wchar(wchar_t *wcharp, 91 const char *stringp); 92 93 static void 94 get_match_counts(int *nmust_matchp, 95 int *nextra_matches_allowedp, 96 const char *count_stringp); 97 98 static boolean_t 99 in_wchar_range(wchar_t test_char, 100 wchar_t lower_char, 101 wchar_t upper_char); 102 103 static const char * 104 pop_stringp(void); 105 106 static const char * 107 previous_charp(const char *current_charp); 108 109 static const char * 110 push_stringp(const char *stringp); 111 112 static char_test_result_t 113 test_char_against_ascii_class(char test_char, 114 const char *classp, 115 char_test_condition_t test_condition); 116 117 static char_test_result_t 118 test_char_against_multibyte_class(wchar_t test_char, 119 const char *classp, 120 char_test_condition_t test_condition); 121 122 123 /* FOR COMPATIBILITY WITH PREVIOUS ASCII VERSIONS OF regcmp() */ 124 125 static char_test_result_t 126 test_char_against_old_ascii_class(char test_char, 127 const char *classp, 128 char_test_condition_t test_condition); 129 130 static const char * 131 test_repeated_ascii_char(const char *repeat_startp, 132 const char *stringp, 133 const char *regexp); 134 135 static const char * 136 test_repeated_multibyte_char(const char *repeat_startp, 137 const char *stringp, 138 const char *regexp); 139 140 static const char * 141 test_repeated_group(const char *repeat_startp, 142 const char *stringp, 143 const char *regexp); 144 145 static const char * 146 test_string(const char *stringp, 147 const char *regexp); 148 149 150 /* DEFINITIONS OF PUBLIC VARIABLES */ 151 152 char *__loc1; 153 154 /* 155 * reserve thread-specific storage for __loc1 156 */ 157 char ** 158 ____loc1(void) 159 { 160 if (thr_main()) 161 return (&__loc1); 162 return ((char **)tsdalloc(_T_REGEX_LOC1, sizeof (char *), NULL)); 163 } 164 165 #define __loc1 (*(____loc1())) 166 167 /* DEFINITION OF regex() */ 168 169 extern char * 170 regex(const char *regexp, const char *stringp, ...) 171 { 172 va_list arg_listp; 173 int char_size; 174 const char *end_of_matchp; 175 wchar_t regex_wchar; 176 char *return_argp[NSUBSTRINGS]; 177 char *returned_substringp; 178 int substringn; 179 const char *substringp; 180 wchar_t string_wchar; 181 182 if (____loc1() == (char **)0) { 183 return ((char *)0); 184 } else { 185 lmutex_lock(®ex_lock); 186 __loc1 = (char *)0; 187 } 188 189 if ((stringp == (char *)0) || (regexp == (char *)0)) { 190 lmutex_unlock(®ex_lock); 191 return ((char *)0); 192 } 193 194 195 /* INITIALIZE SUBSTRINGS THAT MIGHT BE RETURNED IN VARARGS */ 196 197 substringn = 0; 198 va_start(arg_listp, stringp); 199 while (substringn < NSUBSTRINGS) { 200 return_argp[substringn] = va_arg(arg_listp, char *); 201 substring_startp[substringn] = (char *)0; 202 return_arg_number[substringn] = -1; 203 substringn++; 204 } 205 va_end(arg_listp); 206 207 208 /* TEST THE STRING AGAINST THE REGULAR EXPRESSION */ 209 210 end_of_matchp = (char *)0; 211 stringp_stackp = &stringp_stack[STRINGP_STACK_SIZE]; 212 213 if ((int)*regexp == (int)START_OF_STRING_MARK) { 214 215 /* 216 * the match must start at the beginning of the string 217 */ 218 219 __loc1 = (char *)stringp; 220 regexp++; 221 end_of_matchp = test_string(stringp, regexp); 222 223 } else if ((int)*regexp == (int)ASCII_CHAR) { 224 225 /* 226 * test a string against a regular expression 227 * that starts with a single ASCII character: 228 * 229 * move to each character in the string that matches 230 * the first character in the regular expression 231 * and test the remaining string 232 */ 233 234 while ((*stringp != *(regexp + 1)) && (*stringp != '\0')) { 235 stringp++; 236 } 237 while ((end_of_matchp == (char *)0) && (*stringp != '\0')) { 238 end_of_matchp = test_string(stringp, regexp); 239 if (end_of_matchp != (char *)0) { 240 __loc1 = (char *)stringp; 241 } else { 242 stringp++; 243 while ((*stringp != *(regexp + 1)) && (*stringp != '\0')) { 244 stringp++; 245 } 246 } 247 } 248 249 } else if (!multibyte) { 250 251 /* 252 * if the value of the "multibyte" macro defined in <euc.h> 253 * is false, regex() is running in an ASCII locale; 254 * test an ASCII string against an ASCII regular expression 255 * that doesn't start with a single ASCII character: 256 * 257 * move forward in the string one byte at a time, testing 258 * the remaining string against the regular expression 259 */ 260 261 end_of_matchp = test_string(stringp, regexp); 262 while ((end_of_matchp == (char *)0) && (*stringp != '\0')) { 263 stringp++; 264 end_of_matchp = test_string(stringp, regexp); 265 } 266 if (end_of_matchp != (char *)0) { 267 __loc1 = (char *)stringp; 268 } 269 270 } else if ((int)*regexp == (int)MULTIBYTE_CHAR) { 271 272 /* 273 * test a multibyte string against a multibyte regular expression 274 * that starts with a single multibyte character: 275 * 276 * move to each character in the string that matches 277 * the first character in the regular expression 278 * and test the remaining string 279 */ 280 281 (void) get_wchar(®ex_wchar, regexp + 1); 282 char_size = get_wchar(&string_wchar, stringp); 283 while ((string_wchar != regex_wchar) && (char_size > 0)) { 284 stringp += char_size; 285 char_size = get_wchar(&string_wchar, stringp); 286 } 287 while ((end_of_matchp == (char *)0) && (char_size > 0)) { 288 end_of_matchp = test_string(stringp, regexp); 289 if (end_of_matchp != (char *)0) { 290 __loc1 = (char *)stringp; 291 } else { 292 stringp += char_size; 293 char_size = get_wchar(&string_wchar, stringp); 294 while ((string_wchar != regex_wchar) && (char_size > 0)) { 295 stringp += char_size; 296 char_size = get_wchar(&string_wchar, stringp); 297 } 298 } 299 } 300 301 } else { 302 303 /* 304 * test a multibyte string against a multibyte regular expression 305 * that doesn't start with a single multibyte character 306 * 307 * move forward in the string one multibyte character at a time, 308 * testing the remaining string against the regular expression 309 */ 310 311 end_of_matchp = test_string(stringp, regexp); 312 char_size = get_wchar(&string_wchar, stringp); 313 while ((end_of_matchp == (char *)0) && (char_size > 0)) { 314 stringp += char_size; 315 end_of_matchp = test_string(stringp, regexp); 316 char_size = get_wchar(&string_wchar, stringp); 317 } 318 if (end_of_matchp != (char *)0) { 319 __loc1 = (char *)stringp; 320 } 321 } 322 323 /* 324 * Return substrings that matched subexpressions for which 325 * matching substrings are to be returned. 326 * 327 * NOTE: 328 * 329 * According to manual page regcmp(3C), regex() returns substrings 330 * that match subexpressions even when no substring matches the 331 * entire regular expression. 332 */ 333 334 substringn = 0; 335 while (substringn < NSUBSTRINGS) { 336 substringp = substring_startp[substringn]; 337 if ((substringp != (char *)0) && 338 (return_arg_number[substringn] >= 0)) { 339 returned_substringp = 340 return_argp[return_arg_number[substringn]]; 341 if (returned_substringp != (char *)0) { 342 while (substringp < substring_endp[substringn]) { 343 *returned_substringp = (char)*substringp; 344 returned_substringp++; 345 substringp++; 346 } 347 *returned_substringp = '\0'; 348 } 349 } 350 substringn++; 351 } 352 lmutex_unlock(®ex_lock); 353 return ((char *)end_of_matchp); 354 } /* regex() */ 355 356 357 /* DEFINITIONS OF PRIVATE FUNCTIONS */ 358 359 static int 360 get_wchar(wchar_t *wcharp, 361 const char *stringp) 362 { 363 int char_size; 364 365 if (stringp == (char *)0) { 366 char_size = 0; 367 *wcharp = (wchar_t)((unsigned int)'\0'); 368 } else if (*stringp == '\0') { 369 char_size = 0; 370 *wcharp = (wchar_t)((unsigned int)*stringp); 371 } else if ((unsigned char)*stringp <= (unsigned char)0x7f) { 372 char_size = 1; 373 *wcharp = (wchar_t)((unsigned int)*stringp); 374 } else { 375 char_size = mbtowc(wcharp, stringp, MB_LEN_MAX); 376 } 377 return (char_size); 378 } 379 380 static void 381 get_match_counts(int *nmust_matchp, 382 int *nextra_matches_allowedp, 383 const char *count_stringp) 384 { 385 int minimum_match_count; 386 int maximum_match_count; 387 388 minimum_match_count = 389 (int)((unsigned int)*count_stringp & SINGLE_BYTE_MASK); 390 *nmust_matchp = minimum_match_count; 391 392 count_stringp++; 393 maximum_match_count = 394 (int)((unsigned int)*count_stringp & SINGLE_BYTE_MASK); 395 if (maximum_match_count == (int)UNLIMITED) { 396 *nextra_matches_allowedp = (int)UNLIMITED; 397 } else { 398 *nextra_matches_allowedp = 399 maximum_match_count - minimum_match_count; 400 } 401 return; 402 403 } /* get_match_counts() */ 404 405 static boolean_t 406 in_wchar_range(wchar_t test_char, 407 wchar_t lower_char, 408 wchar_t upper_char) 409 { 410 return (((lower_char <= 0x7f) && (upper_char <= 0x7f) && 411 (lower_char <= test_char) && (test_char <= upper_char)) || 412 (((test_char & WCHAR_CSMASK) == (lower_char & WCHAR_CSMASK)) && 413 ((test_char & WCHAR_CSMASK) == (upper_char & WCHAR_CSMASK)) && 414 (lower_char <= test_char) && (test_char <= upper_char))); 415 416 } /* in_wchar_range() */ 417 418 static const char * 419 pop_stringp(void) 420 { 421 const char *stringp; 422 423 if (stringp_stackp >= &stringp_stack[STRINGP_STACK_SIZE]) { 424 return ((char *)0); 425 } else { 426 stringp = *stringp_stackp; 427 stringp_stackp++; 428 return (stringp); 429 } 430 } 431 432 433 static const char * 434 previous_charp(const char *current_charp) 435 { 436 /* 437 * returns the pointer to the previous character in 438 * a string of multibyte characters 439 */ 440 441 const char *prev_cs0 = current_charp - 1; 442 const char *prev_cs1 = current_charp - eucw1; 443 const char *prev_cs2 = current_charp - eucw2 - 1; 444 const char *prev_cs3 = current_charp - eucw3 - 1; 445 const char *prev_charp; 446 447 if ((unsigned char)*prev_cs0 <= 0x7f) { 448 prev_charp = prev_cs0; 449 } else if ((unsigned char)*prev_cs2 == SS2) { 450 prev_charp = prev_cs2; 451 } else if ((unsigned char)*prev_cs3 == SS3) { 452 prev_charp = prev_cs3; 453 } else { 454 prev_charp = prev_cs1; 455 } 456 return (prev_charp); 457 458 } /* previous_charp() */ 459 460 static const char * 461 push_stringp(const char *stringp) 462 { 463 if (stringp_stackp <= &stringp_stack[0]) { 464 return ((char *)0); 465 } else { 466 stringp_stackp--; 467 *stringp_stackp = stringp; 468 return (stringp); 469 } 470 } 471 472 473 static char_test_result_t 474 test_char_against_ascii_class(char test_char, 475 const char *classp, 476 char_test_condition_t test_condition) 477 { 478 /* 479 * tests a character for membership in an ASCII character class compiled 480 * by the internationalized version of regcmp(); 481 * 482 * NOTE: The internationalized version of regcmp() compiles 483 * the range a-z in an ASCII character class to aTHRUz. 484 */ 485 486 int nbytes_to_check; 487 488 nbytes_to_check = (int)*classp; 489 classp++; 490 nbytes_to_check--; 491 492 while (nbytes_to_check > 0) { 493 if (test_char == *classp) { 494 if (test_condition == IN_CLASS) 495 return (CONDITION_TRUE); 496 else 497 return (CONDITION_FALSE); 498 } else if (*classp == THRU) { 499 if ((*(classp - 1) <= test_char) && 500 (test_char <= *(classp + 1))) { 501 if (test_condition == IN_CLASS) 502 return (CONDITION_TRUE); 503 else 504 return (CONDITION_FALSE); 505 } else { 506 classp += 2; 507 nbytes_to_check -= 2; 508 } 509 } else { 510 classp++; 511 nbytes_to_check--; 512 } 513 } 514 if (test_condition == NOT_IN_CLASS) { 515 return (CONDITION_TRUE); 516 } else { 517 return (CONDITION_FALSE); 518 } 519 } /* test_char_against_ascii_class() */ 520 521 static char_test_result_t 522 test_char_against_multibyte_class(wchar_t test_char, 523 const char *classp, 524 char_test_condition_t test_condition) 525 { 526 /* 527 * tests a character for membership in a multibyte character class; 528 * 529 * NOTE: The range a-z in a multibyte character class compiles to 530 * aTHRUz. 531 */ 532 533 int char_size; 534 wchar_t current_char; 535 int nbytes_to_check; 536 wchar_t previous_char; 537 538 nbytes_to_check = (int)*classp; 539 classp++; 540 nbytes_to_check--; 541 542 char_size = get_wchar(¤t_char, classp); 543 if (char_size <= 0) { 544 return (CHAR_TEST_ERROR); 545 } else if (test_char == current_char) { 546 if (test_condition == IN_CLASS) { 547 return (CONDITION_TRUE); 548 } else { 549 return (CONDITION_FALSE); 550 } 551 } else { 552 classp += char_size; 553 nbytes_to_check -= char_size; 554 } 555 556 while (nbytes_to_check > 0) { 557 previous_char = current_char; 558 char_size = get_wchar(¤t_char, classp); 559 if (char_size <= 0) { 560 return (CHAR_TEST_ERROR); 561 } else if (test_char == current_char) { 562 if (test_condition == IN_CLASS) { 563 return (CONDITION_TRUE); 564 } else { 565 return (CONDITION_FALSE); 566 } 567 } else if (current_char == THRU) { 568 classp += char_size; 569 nbytes_to_check -= char_size; 570 char_size = get_wchar(¤t_char, classp); 571 if (char_size <= 0) { 572 return (CHAR_TEST_ERROR); 573 } else if (in_wchar_range(test_char, previous_char, 574 current_char)) { 575 if (test_condition == IN_CLASS) { 576 return (CONDITION_TRUE); 577 } else { 578 return (CONDITION_FALSE); 579 } 580 } else { 581 classp += char_size; 582 nbytes_to_check -= char_size; 583 } 584 } else { 585 classp += char_size; 586 nbytes_to_check -= char_size; 587 } 588 } 589 if (test_condition == NOT_IN_CLASS) { 590 return (CONDITION_TRUE); 591 } else { 592 return (CONDITION_FALSE); 593 } 594 } /* test_char_against_multibyte_class() */ 595 596 597 /* FOR COMPATIBILITY WITH PREVIOUS ASCII VERSIONS OF regcmp() */ 598 599 static char_test_result_t 600 test_char_against_old_ascii_class(char test_char, 601 const char *classp, 602 char_test_condition_t test_condition) 603 { 604 /* 605 * tests a character for membership in an ASCII character class compiled 606 * by the ASCII version of regcmp(); 607 * 608 * NOTE: ASCII versions of regcmp() compile the range a-z in an 609 * ASCII character class to THRUaz. The internationalized 610 * version compiles the same range to aTHRUz. 611 */ 612 613 int nbytes_to_check; 614 615 nbytes_to_check = (int)*classp; 616 classp++; 617 nbytes_to_check--; 618 619 while (nbytes_to_check > 0) { 620 if (test_char == *classp) { 621 if (test_condition == IN_CLASS) { 622 return (CONDITION_TRUE); 623 } else { 624 return (CONDITION_FALSE); 625 } 626 } else if (*classp == THRU) { 627 if ((*(classp + 1) <= test_char) && 628 (test_char <= *(classp + 2))) { 629 if (test_condition == IN_CLASS) { 630 return (CONDITION_TRUE); 631 } else { 632 return (CONDITION_FALSE); 633 } 634 } else { 635 classp += 3; 636 nbytes_to_check -= 3; 637 } 638 } else { 639 classp++; 640 nbytes_to_check--; 641 } 642 } 643 if (test_condition == NOT_IN_CLASS) { 644 return (CONDITION_TRUE); 645 } else { 646 return (CONDITION_FALSE); 647 } 648 } /* test_char_against_old_ascii_class() */ 649 650 static const char * 651 test_repeated_ascii_char(const char *repeat_startp, 652 const char *stringp, 653 const char *regexp) 654 { 655 const char *end_of_matchp; 656 657 end_of_matchp = test_string(stringp, regexp); 658 while ((end_of_matchp == (char *)0) && 659 (stringp > repeat_startp)) { 660 stringp--; 661 end_of_matchp = test_string(stringp, regexp); 662 } 663 return (end_of_matchp); 664 } 665 666 static const char * 667 test_repeated_multibyte_char(const char *repeat_startp, 668 const char *stringp, 669 const char *regexp) 670 { 671 const char *end_of_matchp; 672 673 end_of_matchp = test_string(stringp, regexp); 674 while ((end_of_matchp == (char *)0) && 675 (stringp > repeat_startp)) { 676 stringp = previous_charp(stringp); 677 end_of_matchp = test_string(stringp, regexp); 678 } 679 return (end_of_matchp); 680 } 681 682 static const char * 683 test_repeated_group(const char *repeat_startp, 684 const char *stringp, 685 const char *regexp) 686 { 687 const char *end_of_matchp; 688 689 end_of_matchp = test_string(stringp, regexp); 690 while ((end_of_matchp == (char *)0) && 691 (stringp > repeat_startp)) { 692 stringp = pop_stringp(); 693 if (stringp == (char *)0) { 694 return ((char *)0); 695 } 696 end_of_matchp = test_string(stringp, regexp); 697 } 698 return (end_of_matchp); 699 } 700 701 static const char * 702 test_string(const char *stringp, 703 const char *regexp) 704 { 705 /* 706 * returns a pointer to the first character following the first 707 * substring of the string addressed by stringp that matches 708 * the compiled regular expression addressed by regexp 709 */ 710 711 unsigned int group_length; 712 int nextra_matches_allowed; 713 int nmust_match; 714 wchar_t regex_wchar; 715 int regex_char_size; 716 const char *repeat_startp; 717 unsigned int return_argn; 718 wchar_t string_wchar; 719 int string_char_size; 720 unsigned int substringn; 721 char_test_condition_t test_condition; 722 const char *test_stringp; 723 724 for (;;) { 725 726 /* 727 * Exit the loop via a return whenever there's a match 728 * or it's clear that there can be no match. 729 */ 730 731 switch ((int)*regexp) { 732 733 /* 734 * No fall-through. 735 * Each case ends with either a return or with stringp 736 * addressing the next character to be tested and regexp 737 * addressing the next compiled regular expression 738 * 739 * NOTE: The comments for each case give the meaning 740 * of the compiled regular expression decoded by the case 741 * and the character string that the compiled regular 742 * expression uses to encode the case. Each single 743 * character encoded in the compiled regular expression 744 * is shown enclosed in angle brackets (<>). Each 745 * compiled regular expression begins with a marker 746 * character which is shown as a named constant 747 * (e.g. <ASCII_CHAR>). Character constants are shown 748 * enclosed in single quotes (e.g. <'$'>). All other 749 * single characters encoded in the compiled regular 750 * expression are shown as lower case variable names 751 * (e.g. <ascii_char> or <multibyte_char>). Multicharacter 752 * strings encoded in the compiled regular expression 753 * are shown as variable names followed by elipses 754 * (e.g. <compiled_regex...>). 755 */ 756 757 case ASCII_CHAR: /* single ASCII char */ 758 759 /* encoded as <ASCII_CHAR><ascii_char> */ 760 761 regexp++; 762 if (*regexp == *stringp) { 763 regexp++; 764 stringp++; 765 } else { 766 return ((char *)0); 767 } 768 break; /* end case ASCII_CHAR */ 769 770 case MULTIBYTE_CHAR: /* single multibyte char */ 771 772 /* encoded as <MULTIBYTE_CHAR><multibyte_char> */ 773 774 regexp++; 775 regex_char_size = get_wchar(®ex_wchar, regexp); 776 string_char_size = get_wchar(&string_wchar, stringp); 777 if ((string_char_size <= 0) || (string_wchar != regex_wchar)) { 778 return ((char *)0); 779 } else { 780 regexp += regex_char_size; 781 stringp += string_char_size; 782 } 783 break; /* end case MULTIBYTE_CHAR */ 784 785 case ANY_CHAR: /* any single ASCII or multibyte char */ 786 787 /* encoded as <ANY_CHAR> */ 788 789 if (!multibyte) { 790 if (*stringp == '\0') { 791 return ((char *)0); 792 } else { 793 regexp++; 794 stringp++; 795 } 796 } else { 797 string_char_size = get_wchar(&string_wchar, stringp); 798 if (string_char_size <= 0) { 799 return ((char *)0); 800 } else { 801 regexp++; 802 stringp += string_char_size; 803 } 804 } 805 break; /* end case ANY_CHAR */ 806 807 case IN_ASCII_CHAR_CLASS: /* [.....] */ 808 case NOT_IN_ASCII_CHAR_CLASS: 809 810 /* 811 * encoded as <IN_ASCII_CHAR_CLASS><class_length><class...> 812 * or <NOT_IN_ASCII_CHAR_CLASS><class_length><class...> 813 * 814 * NOTE: <class_length> includes the <class_length> byte 815 */ 816 817 if ((int)*regexp == (int)IN_ASCII_CHAR_CLASS) { 818 test_condition = IN_CLASS; 819 } else { 820 test_condition = NOT_IN_CLASS; 821 } 822 regexp++; /* point to the <class_length> byte */ 823 824 if ((*stringp != '\0') && 825 (test_char_against_ascii_class(*stringp, regexp, 826 test_condition) == CONDITION_TRUE)) { 827 regexp += (int)*regexp; /* add the class length to regexp */ 828 stringp++; 829 } else { 830 return ((char *)0); 831 } 832 break; /* end case IN_ASCII_CHAR_CLASS */ 833 834 case IN_MULTIBYTE_CHAR_CLASS: /* [....] */ 835 case NOT_IN_MULTIBYTE_CHAR_CLASS: 836 837 /* 838 * encoded as <IN_MULTIBYTE_CHAR_CLASS><class_length><class...> 839 * or <NOT_IN_MULTIBYTE_CHAR_CLASS><class_length><class...> 840 * 841 * NOTE: <class_length> includes the <class_length> byte 842 */ 843 844 if ((int)*regexp == (int)IN_MULTIBYTE_CHAR_CLASS) { 845 test_condition = IN_CLASS; 846 } else { 847 test_condition = NOT_IN_CLASS; 848 } 849 regexp++; /* point to the <class_length> byte */ 850 851 string_char_size = get_wchar(&string_wchar, stringp); 852 if ((string_char_size > 0) && 853 (test_char_against_multibyte_class(string_wchar, regexp, 854 test_condition) == CONDITION_TRUE)) { 855 regexp += (int)*regexp; /* add the class length to regexp */ 856 stringp += string_char_size; 857 } else { 858 return ((char *)0); 859 } 860 break; /* end case IN_MULTIBYTE_CHAR_CLASS */ 861 862 case IN_OLD_ASCII_CHAR_CLASS: /* [...] */ 863 case NOT_IN_OLD_ASCII_CHAR_CLASS: 864 865 /* 866 * encoded as <IN_OLD_ASCII_CHAR_CLASS><class_length><class...> 867 * or <NOT_IN_OLD_ASCII_CHAR_CLASS><class_length><class...> 868 * 869 * NOTE: <class_length> includes the <class_length> byte 870 */ 871 872 if ((int)*regexp == (int)IN_OLD_ASCII_CHAR_CLASS) { 873 test_condition = IN_CLASS; 874 } else { 875 test_condition = NOT_IN_CLASS; 876 } 877 regexp++; /* point to the <class_length> byte */ 878 879 if ((*stringp != '\0') && 880 (test_char_against_old_ascii_class(*stringp, regexp, 881 test_condition) == CONDITION_TRUE)) { 882 regexp += (int)*regexp; /* add the class length to regexp */ 883 stringp++; 884 } else { 885 return ((char *)0); 886 } 887 break; /* end case [NOT_]IN_OLD_ASCII_CHAR_CLASS */ 888 889 case SIMPLE_GROUP: /* (.....) */ 890 891 /* encoded as <SIMPLE_GROUP><group_length> */ 892 893 regexp += 2; 894 break; /* end case SIMPLE_GROUP */ 895 896 case END_GROUP: /* (.....) */ 897 898 /* encoded as <END_GROUP><groupn> */ 899 900 regexp += 2; 901 break; /* end case END_GROUP */ 902 903 case SAVED_GROUP: /* (.....)$0-9 */ 904 905 /* encoded as <SAVED_GROUP><substringn> */ 906 907 regexp++; 908 substringn = (unsigned int)*regexp; 909 if (substringn >= NSUBSTRINGS) 910 return ((char *)0); 911 substring_startp[substringn] = stringp; 912 regexp++; 913 break; /* end case SAVED_GROUP */ 914 915 case END_SAVED_GROUP: /* (.....)$0-9 */ 916 917 /* 918 * encoded as <END_SAVED_GROUP><substringn>\ 919 * <return_arg_number[substringn]> 920 */ 921 922 regexp++; 923 substringn = (unsigned int)*regexp; 924 if (substringn >= NSUBSTRINGS) 925 return ((char *)0); 926 substring_endp[substringn] = stringp; 927 regexp++; 928 return_argn = (unsigned int)*regexp; 929 if (return_argn >= NSUBSTRINGS) 930 return ((char *)0); 931 return_arg_number[substringn] = return_argn; 932 regexp++; 933 break; /* end case END_SAVED_GROUP */ 934 935 case ASCII_CHAR|ZERO_OR_MORE: /* char* */ 936 937 /* encoded as <ASCII_CHAR|ZERO_OR_MORE><ascii_char> */ 938 939 regexp++; 940 repeat_startp = stringp; 941 while (*stringp == *regexp) { 942 stringp++; 943 } 944 regexp++; 945 return (test_repeated_ascii_char(repeat_startp, 946 stringp, regexp)); 947 948 /* end case ASCII_CHAR|ZERO_OR_MORE */ 949 950 case ASCII_CHAR|ONE_OR_MORE: /* char+ */ 951 952 /* encoded as <ASCII_CHAR|ONE_OR_MORE><ascii_char> */ 953 954 regexp++; 955 if (*stringp != *regexp) { 956 return ((char *)0); 957 } else { 958 stringp++; 959 repeat_startp = stringp; 960 while (*stringp == *regexp) { 961 stringp++; 962 } 963 regexp++; 964 return (test_repeated_ascii_char(repeat_startp, stringp, 965 regexp)); 966 } 967 /* end case ASCII_CHAR|ONE_OR_MORE */ 968 969 case ASCII_CHAR|COUNT: /* char{min_count,max_count} */ 970 971 /* 972 * encoded as <ASCII_CHAR|COUNT><ascii_char>\ 973 * <minimum_match_count><maximum_match_count> 974 */ 975 976 regexp++; 977 get_match_counts(&nmust_match, &nextra_matches_allowed, 978 regexp + 1); 979 while ((*stringp == *regexp) && (nmust_match > 0)) { 980 nmust_match--; 981 stringp++; 982 } 983 if (nmust_match > 0) { 984 return ((char *)0); 985 } else if (nextra_matches_allowed == UNLIMITED) { 986 repeat_startp = stringp; 987 while (*stringp == *regexp) { 988 stringp++; 989 } 990 regexp += 3; 991 return (test_repeated_ascii_char(repeat_startp, stringp, 992 regexp)); 993 } else { 994 repeat_startp = stringp; 995 while ((*stringp == *regexp) && 996 (nextra_matches_allowed > 0)) { 997 nextra_matches_allowed--; 998 stringp++; 999 } 1000 regexp += 3; 1001 return (test_repeated_ascii_char(repeat_startp, stringp, 1002 regexp)); 1003 } 1004 /* end case ASCII_CHAR|COUNT */ 1005 1006 case MULTIBYTE_CHAR|ZERO_OR_MORE: /* char* */ 1007 1008 /* encoded as <MULTIBYTE_CHAR|ZERO_OR_MORE><multibyte_char> */ 1009 1010 regexp++; 1011 regex_char_size = get_wchar(®ex_wchar, regexp); 1012 repeat_startp = stringp; 1013 string_char_size = get_wchar(&string_wchar, stringp); 1014 while ((string_char_size > 0) && 1015 (string_wchar == regex_wchar)) { 1016 stringp += string_char_size; 1017 string_char_size = get_wchar(&string_wchar, stringp); 1018 } 1019 regexp += regex_char_size; 1020 return (test_repeated_multibyte_char(repeat_startp, stringp, 1021 regexp)); 1022 1023 /* end case MULTIBYTE_CHAR|ZERO_OR_MORE */ 1024 1025 case MULTIBYTE_CHAR|ONE_OR_MORE: /* char+ */ 1026 1027 /* encoded as <MULTIBYTE_CHAR|ONE_OR_MORE><multibyte_char> */ 1028 1029 regexp++; 1030 regex_char_size = get_wchar(®ex_wchar, regexp); 1031 string_char_size = get_wchar(&string_wchar, stringp); 1032 if ((string_char_size <= 0) || (string_wchar != regex_wchar)) { 1033 return ((char *)0); 1034 } else { 1035 stringp += string_char_size; 1036 repeat_startp = stringp; 1037 string_char_size = get_wchar(&string_wchar, stringp); 1038 while ((string_char_size > 0) && 1039 (string_wchar == regex_wchar)) { 1040 stringp += string_char_size; 1041 string_char_size = get_wchar(&string_wchar, stringp); 1042 } 1043 regexp += regex_char_size; 1044 return (test_repeated_multibyte_char(repeat_startp, stringp, 1045 regexp)); 1046 } 1047 /* end case MULTIBYTE_CHAR|ONE_OR_MORE */ 1048 1049 case MULTIBYTE_CHAR|COUNT: /* char{min_count,max_count} */ 1050 1051 /* 1052 * encoded as <MULTIBYTE_CHAR|COUNT><multibyte_char>\ 1053 * <minimum_match_count><maximum_match_count> 1054 */ 1055 1056 regexp++; 1057 regex_char_size = get_wchar(®ex_wchar, regexp); 1058 get_match_counts(&nmust_match, &nextra_matches_allowed, 1059 regexp + regex_char_size); 1060 string_char_size = get_wchar(&string_wchar, stringp); 1061 while ((string_char_size > 0) && 1062 (string_wchar == regex_wchar) && 1063 (nmust_match > 0)) { 1064 1065 nmust_match--; 1066 stringp += string_char_size; 1067 string_char_size = get_wchar(&string_wchar, stringp); 1068 } 1069 if (nmust_match > 0) { 1070 return ((char *)0); 1071 } else if (nextra_matches_allowed == UNLIMITED) { 1072 repeat_startp = stringp; 1073 while ((string_char_size > 0) && 1074 (string_wchar == regex_wchar)) { 1075 stringp += string_char_size; 1076 string_char_size = get_wchar(&string_wchar, stringp); 1077 } 1078 regexp += regex_char_size + 2; 1079 return (test_repeated_multibyte_char(repeat_startp, stringp, 1080 regexp)); 1081 } else { 1082 repeat_startp = stringp; 1083 while ((string_char_size > 0) && 1084 (string_wchar == regex_wchar) && 1085 (nextra_matches_allowed > 0)) { 1086 nextra_matches_allowed--; 1087 stringp += string_char_size; 1088 string_char_size = get_wchar(&string_wchar, stringp); 1089 } 1090 regexp += regex_char_size + 2; 1091 return (test_repeated_multibyte_char(repeat_startp, stringp, 1092 regexp)); 1093 } 1094 /* end case MULTIBYTE_CHAR|COUNT */ 1095 1096 case ANY_CHAR|ZERO_OR_MORE: /* .* */ 1097 1098 /* encoded as <ANY_CHAR|ZERO_OR_MORE> */ 1099 1100 repeat_startp = stringp; 1101 if (!multibyte) { 1102 while (*stringp != '\0') { 1103 stringp++; 1104 } 1105 regexp++; 1106 return (test_repeated_ascii_char(repeat_startp, stringp, 1107 regexp)); 1108 } else { 1109 string_char_size = get_wchar(&string_wchar, stringp); 1110 while (string_char_size > 0) { 1111 stringp += string_char_size; 1112 string_char_size = get_wchar(&string_wchar, stringp); 1113 } 1114 regexp++; 1115 return (test_repeated_multibyte_char(repeat_startp, stringp, 1116 regexp)); 1117 } 1118 /* end case <ANY_CHAR|ZERO_OR_MORE> */ 1119 1120 case ANY_CHAR|ONE_OR_MORE: /* .+ */ 1121 1122 /* encoded as <ANY_CHAR|ONE_OR_MORE> */ 1123 1124 if (!multibyte) { 1125 if (*stringp == '\0') { 1126 return ((char *)0); 1127 } else { 1128 stringp++; 1129 repeat_startp = stringp; 1130 while (*stringp != '\0') { 1131 stringp++; 1132 } 1133 regexp++; 1134 return (test_repeated_ascii_char(repeat_startp, stringp, 1135 regexp)); 1136 } 1137 } else { 1138 string_char_size = get_wchar(&string_wchar, stringp); 1139 if (string_char_size <= 0) { 1140 return ((char *)0); 1141 } else { 1142 stringp += string_char_size; 1143 repeat_startp = stringp; 1144 string_char_size = get_wchar(&string_wchar, stringp); 1145 while (string_char_size > 0) { 1146 stringp += string_char_size; 1147 string_char_size = 1148 get_wchar(&string_wchar, stringp); 1149 } 1150 regexp++; 1151 return (test_repeated_multibyte_char(repeat_startp, 1152 stringp, regexp)); 1153 } 1154 } 1155 /* end case <ANY_CHAR|ONE_OR_MORE> */ 1156 1157 case ANY_CHAR|COUNT: /* .{min_count,max_count} */ 1158 1159 /* 1160 * encoded as <ANY_CHAR|COUNT>\ 1161 * <minimum_match_count><maximum_match_count> 1162 */ 1163 1164 get_match_counts(&nmust_match, &nextra_matches_allowed, 1165 regexp + 1); 1166 if (!multibyte) { 1167 while ((*stringp != '\0') && (nmust_match > 0)) { 1168 nmust_match--; 1169 stringp++; 1170 } 1171 if (nmust_match > 0) { 1172 return ((char *)0); 1173 } else if (nextra_matches_allowed == UNLIMITED) { 1174 repeat_startp = stringp; 1175 while (*stringp != '\0') { 1176 stringp++; 1177 } 1178 regexp += 3; 1179 return (test_repeated_ascii_char(repeat_startp, stringp, 1180 regexp)); 1181 } else { 1182 repeat_startp = stringp; 1183 while ((*stringp != '\0') && 1184 (nextra_matches_allowed > 0)) { 1185 nextra_matches_allowed--; 1186 stringp++; 1187 } 1188 regexp += 3; 1189 return (test_repeated_ascii_char(repeat_startp, stringp, 1190 regexp)); 1191 } 1192 } else { /* multibyte character */ 1193 1194 string_char_size = get_wchar(&string_wchar, stringp); 1195 while ((string_char_size > 0) && (nmust_match > 0)) { 1196 nmust_match--; 1197 stringp += string_char_size; 1198 string_char_size = get_wchar(&string_wchar, stringp); 1199 } 1200 if (nmust_match > 0) { 1201 return ((char *)0); 1202 } else if (nextra_matches_allowed == UNLIMITED) { 1203 repeat_startp = stringp; 1204 while (string_char_size > 0) { 1205 stringp += string_char_size; 1206 string_char_size = 1207 get_wchar(&string_wchar, stringp); 1208 } 1209 regexp += 3; 1210 return (test_repeated_multibyte_char(repeat_startp, 1211 stringp, regexp)); 1212 } else { 1213 repeat_startp = stringp; 1214 while ((string_char_size > 0) && 1215 (nextra_matches_allowed > 0)) { 1216 nextra_matches_allowed--; 1217 stringp += string_char_size; 1218 string_char_size = 1219 get_wchar(&string_wchar, stringp); 1220 } 1221 regexp += 3; 1222 return (test_repeated_multibyte_char(repeat_startp, 1223 stringp, regexp)); 1224 } 1225 } /* end case ANY_CHAR|COUNT */ 1226 1227 case IN_ASCII_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */ 1228 case NOT_IN_ASCII_CHAR_CLASS|ZERO_OR_MORE: 1229 1230 /* 1231 * encoded as <IN_ASCII_CHAR_CLASS|ZERO_OR_MORE>\ 1232 * <class_length><class ...> 1233 * or <NOT_IN_ASCII_CHAR_CLASS|ZERO_OR_MORE>\ 1234 * <class_length><class ...> 1235 * 1236 * NOTE: <class_length> includes the <class_length> byte 1237 */ 1238 1239 if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|ZERO_OR_MORE)) { 1240 test_condition = IN_CLASS; 1241 } else { 1242 test_condition = NOT_IN_CLASS; 1243 } 1244 regexp++; /* point to the <class_length> byte */ 1245 1246 repeat_startp = stringp; 1247 while ((*stringp != '\0') && 1248 (test_char_against_ascii_class(*stringp, regexp, 1249 test_condition) == CONDITION_TRUE)) { 1250 stringp++; 1251 } 1252 regexp += (int)*regexp; /* add the class length to regexp */ 1253 return (test_repeated_ascii_char(repeat_startp, stringp, 1254 regexp)); 1255 1256 /* end case IN_ASCII_CHAR_CLASS|ZERO_OR_MORE */ 1257 1258 case IN_ASCII_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */ 1259 case NOT_IN_ASCII_CHAR_CLASS|ONE_OR_MORE: 1260 1261 /* 1262 * encoded as <IN_ASCII_CHAR_CLASS|ONE_OR_MORE>\ 1263 * <class_length><class ...> 1264 * or <NOT_IN_ASCII_CHAR_CLASS|ONE_OR_MORE>\ 1265 * <class_length><class ...> 1266 * 1267 * NOTE: <class_length> includes the <class_length> byte 1268 */ 1269 1270 if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|ONE_OR_MORE)) { 1271 test_condition = IN_CLASS; 1272 } else { 1273 test_condition = NOT_IN_CLASS; 1274 } 1275 regexp++; /* point to the <class_length> byte */ 1276 1277 if ((*stringp == '\0') || 1278 (test_char_against_ascii_class(*stringp, regexp, 1279 test_condition) != CONDITION_TRUE)) { 1280 return ((char *)0); 1281 } else { 1282 stringp++; 1283 repeat_startp = stringp; 1284 while ((*stringp != '\0') && 1285 (test_char_against_ascii_class(*stringp, regexp, 1286 test_condition) == CONDITION_TRUE)) { 1287 stringp++; 1288 } 1289 regexp += (int)*regexp; /* add the class length to regexp */ 1290 return (test_repeated_ascii_char(repeat_startp, stringp, 1291 regexp)); 1292 } 1293 /* end case IN_ASCII_CHAR_CLASS|ONE_OR_MORE */ 1294 1295 case IN_ASCII_CHAR_CLASS | COUNT: /* [.....]{max_count,min_count} */ 1296 case NOT_IN_ASCII_CHAR_CLASS | COUNT: 1297 1298 /* 1299 * endoded as <IN_ASCII_CHAR_CLASS|COUNT><class_length>\ 1300 * <class ...><minimum_match_count>\ 1301 * <maximum_match_count> 1302 * or <NOT_IN_ASCII_CHAR_CLASS|COUNT><class_length>\ 1303 * <class ...><minimum_match_count>\ 1304 * <maximum_match_count> 1305 * 1306 * NOTE: <class_length> includes the <class_length> byte, 1307 * but not the <minimum_match_count> or 1308 * <maximum_match_count> bytes 1309 */ 1310 1311 if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|COUNT)) { 1312 test_condition = IN_CLASS; 1313 } else { 1314 test_condition = NOT_IN_CLASS; 1315 } 1316 regexp++; /* point to the <class_length> byte */ 1317 1318 get_match_counts(&nmust_match, &nextra_matches_allowed, 1319 regexp + (int)*regexp); 1320 while ((*stringp != '\0') && 1321 (test_char_against_ascii_class(*stringp, regexp, 1322 test_condition) == CONDITION_TRUE) && 1323 (nmust_match > 0)) { 1324 nmust_match--; 1325 stringp++; 1326 } 1327 if (nmust_match > 0) { 1328 return ((char *)0); 1329 } else if (nextra_matches_allowed == UNLIMITED) { 1330 repeat_startp = stringp; 1331 while ((*stringp != '\0') && 1332 (test_char_against_ascii_class(*stringp, regexp, 1333 test_condition) == CONDITION_TRUE)) { 1334 stringp++; 1335 } 1336 regexp += (int)*regexp + 2; 1337 return (test_repeated_ascii_char(repeat_startp, stringp, 1338 regexp)); 1339 } else { 1340 repeat_startp = stringp; 1341 while ((*stringp != '\0') && 1342 (test_char_against_ascii_class(*stringp, regexp, 1343 test_condition) == CONDITION_TRUE) && 1344 (nextra_matches_allowed > 0)) { 1345 nextra_matches_allowed--; 1346 stringp++; 1347 } 1348 regexp += (int)*regexp + 2; 1349 return (test_repeated_ascii_char(repeat_startp, stringp, 1350 regexp)); 1351 } 1352 /* end case IN_ASCII_CHAR_CLASS|COUNT */ 1353 1354 case IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */ 1355 case NOT_IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE: 1356 1357 /* 1358 * encoded as <IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE>\ 1359 * <class_length><class ...> 1360 * or <NOT_IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE>\ 1361 * <class_length><class ...> 1362 * 1363 * NOTE: <class_length> includes the <class_length> byte 1364 */ 1365 1366 if ((int)*regexp == 1367 (int)(IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE)) { 1368 test_condition = IN_CLASS; 1369 } else { 1370 test_condition = NOT_IN_CLASS; 1371 } 1372 regexp++; /* point to the <class_length> byte */ 1373 1374 repeat_startp = stringp; 1375 string_char_size = get_wchar(&string_wchar, stringp); 1376 while ((string_char_size > 0) && 1377 (test_char_against_multibyte_class(string_wchar, regexp, 1378 test_condition) == CONDITION_TRUE)) { 1379 stringp += string_char_size; 1380 string_char_size = get_wchar(&string_wchar, stringp); 1381 } 1382 regexp += (int)*regexp; /* add the class length to regexp */ 1383 return (test_repeated_multibyte_char(repeat_startp, stringp, 1384 regexp)); 1385 1386 /* end case IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE */ 1387 1388 case IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */ 1389 case NOT_IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE: 1390 1391 /* 1392 * encoded as <IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE>\ 1393 * <class_length><class ...> 1394 * or <NOT_IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE>\ 1395 * <class_length><class ...> 1396 * 1397 * NOTE: <class_length> includes the <class_length> byte 1398 */ 1399 1400 if ((int)*regexp == 1401 (int)(IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE)) { 1402 test_condition = IN_CLASS; 1403 } else { 1404 test_condition = NOT_IN_CLASS; 1405 } 1406 regexp++; /* point to the <class_length> byte */ 1407 1408 string_char_size = get_wchar(&string_wchar, stringp); 1409 if ((string_char_size <= 0) || 1410 (test_char_against_multibyte_class(string_wchar, regexp, 1411 test_condition) != CONDITION_TRUE)) { 1412 return ((char *)0); 1413 } else { 1414 stringp += string_char_size; 1415 repeat_startp = stringp; 1416 string_char_size = get_wchar(&string_wchar, stringp); 1417 while ((string_char_size > 0) && 1418 (test_char_against_multibyte_class(string_wchar, 1419 regexp, test_condition) == CONDITION_TRUE)) { 1420 stringp += string_char_size; 1421 string_char_size = get_wchar(&string_wchar, stringp); 1422 } 1423 regexp += (int)*regexp; /* add the class length to regexp */ 1424 return (test_repeated_multibyte_char(repeat_startp, stringp, 1425 regexp)); 1426 } 1427 /* end case IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE */ 1428 1429 case IN_MULTIBYTE_CHAR_CLASS|COUNT: /* [...]{min_count,max_count} */ 1430 case NOT_IN_MULTIBYTE_CHAR_CLASS|COUNT: 1431 1432 /* 1433 * encoded as <IN_MULTIBYTE_CHAR_CLASS|COUNT>\ 1434 * <class_length><class ...><min_count><max_count> 1435 * or <NOT_IN_MULTIBYTE_CHAR_CLASS|COUNT>\ 1436 * <class_length><class ...><min_count><max_count> 1437 * 1438 * NOTE: <class_length> includes the <class_length> byte 1439 * but not the <minimum_match_count> or 1440 * <maximum_match_count> bytes 1441 */ 1442 1443 if ((int)*regexp == (int)(IN_MULTIBYTE_CHAR_CLASS|COUNT)) { 1444 test_condition = IN_CLASS; 1445 } else { 1446 test_condition = NOT_IN_CLASS; 1447 } 1448 regexp++; /* point to the <class_length> byte */ 1449 1450 get_match_counts(&nmust_match, &nextra_matches_allowed, 1451 regexp + (int)*regexp); 1452 string_char_size = get_wchar(&string_wchar, stringp); 1453 while ((string_char_size > 0) && 1454 (test_char_against_multibyte_class(string_wchar, regexp, 1455 test_condition) == CONDITION_TRUE) && 1456 (nmust_match > 0)) { 1457 nmust_match--; 1458 stringp += string_char_size; 1459 string_char_size = get_wchar(&string_wchar, stringp); 1460 } 1461 if (nmust_match > 0) { 1462 return ((char *)0); 1463 } else if (nextra_matches_allowed == UNLIMITED) { 1464 repeat_startp = stringp; 1465 while ((string_char_size > 0) && 1466 (test_char_against_multibyte_class(string_wchar, 1467 regexp, test_condition) == CONDITION_TRUE)) { 1468 stringp += string_char_size; 1469 string_char_size = get_wchar(&string_wchar, stringp); 1470 } 1471 regexp += (int)*regexp + 2; 1472 return (test_repeated_multibyte_char(repeat_startp, stringp, 1473 regexp)); 1474 } else { 1475 repeat_startp = stringp; 1476 while ((string_char_size > 0) && 1477 (test_char_against_multibyte_class(string_wchar, 1478 regexp, test_condition) == CONDITION_TRUE) && 1479 (nextra_matches_allowed > 0)) { 1480 nextra_matches_allowed--; 1481 stringp += string_char_size; 1482 string_char_size = get_wchar(&string_wchar, stringp); 1483 } 1484 regexp += (int)*regexp + 2; 1485 return (test_repeated_multibyte_char(repeat_startp, stringp, 1486 regexp)); 1487 } 1488 /* end case IN_MULTIBYTE_CHAR_CLASS|COUNT */ 1489 1490 case IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */ 1491 case NOT_IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE: 1492 1493 /* 1494 * encoded as <IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE>\ 1495 * <class_length><class ...> 1496 * or <NOT_IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE>\ 1497 * <class_length><class ...> 1498 * 1499 * NOTE: <class_length> includes the <class_length> byte 1500 */ 1501 1502 if ((int)*regexp == 1503 (int)(IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE)) { 1504 test_condition = IN_CLASS; 1505 } else { 1506 test_condition = NOT_IN_CLASS; 1507 } 1508 regexp++; /* point to the <class_length> byte */ 1509 1510 repeat_startp = stringp; 1511 while ((*stringp != '\0') && 1512 (test_char_against_old_ascii_class(*stringp, regexp, 1513 test_condition) == CONDITION_TRUE)) { 1514 stringp++; 1515 } 1516 regexp += (int)*regexp; /* add the class length to regexp */ 1517 return (test_repeated_ascii_char(repeat_startp, stringp, 1518 regexp)); 1519 1520 /* end case IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE */ 1521 1522 case IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */ 1523 case NOT_IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE: 1524 1525 /* 1526 * encoded as <IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE>\ 1527 * <class_length><class ...> 1528 * or <NOT_IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE>\ 1529 * <class_length><class ...> 1530 * 1531 * NOTE: <class length> includes the <class_length> byte 1532 */ 1533 1534 if ((int)*regexp == 1535 (int)(IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE)) { 1536 test_condition = IN_CLASS; 1537 } else { 1538 test_condition = NOT_IN_CLASS; 1539 } 1540 regexp++; /* point to the <class_length> byte */ 1541 1542 if ((*stringp == '\0') || 1543 (test_char_against_old_ascii_class(*stringp, regexp, 1544 test_condition) != CONDITION_TRUE)) { 1545 return ((char *)0); 1546 } else { 1547 stringp++; 1548 repeat_startp = stringp; 1549 while ((*stringp != '\0') && 1550 (test_char_against_old_ascii_class(*stringp, regexp, 1551 test_condition) == CONDITION_TRUE)) { 1552 stringp++; 1553 } 1554 regexp += (int)*regexp; /* add the class length to regexp */ 1555 return (test_repeated_ascii_char(repeat_startp, stringp, 1556 regexp)); 1557 } 1558 /* end case IN_OLD_ASCII_CHAR_CLASS | ONE_OR_MORE */ 1559 1560 case IN_OLD_ASCII_CHAR_CLASS|COUNT: /* [...]{min_count,max_count} */ 1561 case NOT_IN_OLD_ASCII_CHAR_CLASS|COUNT: 1562 1563 /* 1564 * encoded as <IN_OLD_ASCII_CHAR_CLASS|COUNT><class_length>\ 1565 * <class ...><minimum_match_count>\ 1566 * <maximum_match_count> 1567 * or <NOT_IN_OLD_ASCII_CHAR_CLASS|COUNT>\ 1568 * <class_length><class ...><minimum_match_count>\ 1569 * <maximum_match_count> 1570 * 1571 * NOTE: <class_length> includes the <class_length> byte 1572 * but not the <minimum_match_count> or 1573 * <maximum_match_count> bytes 1574 */ 1575 1576 if ((int)*regexp == (int)(IN_OLD_ASCII_CHAR_CLASS|COUNT)) { 1577 test_condition = IN_CLASS; 1578 } else { 1579 test_condition = NOT_IN_CLASS; 1580 } 1581 regexp++; /* point to the <class_length> byte */ 1582 1583 get_match_counts(&nmust_match, &nextra_matches_allowed, 1584 regexp + (int)*regexp); 1585 while ((*stringp != '\0') && 1586 (test_char_against_old_ascii_class(*stringp, regexp, 1587 test_condition) == CONDITION_TRUE) && 1588 (nmust_match > 0)) { 1589 nmust_match--; 1590 stringp++; 1591 } 1592 if (nmust_match > 0) { 1593 return ((char *)0); 1594 } else if (nextra_matches_allowed == UNLIMITED) { 1595 repeat_startp = stringp; 1596 while ((*stringp != '\0') && 1597 (test_char_against_old_ascii_class(*stringp, regexp, 1598 test_condition) == CONDITION_TRUE)) { 1599 stringp++; 1600 } 1601 regexp += (int)*regexp + 2; 1602 return (test_repeated_ascii_char(repeat_startp, stringp, 1603 regexp)); 1604 } else { 1605 repeat_startp = stringp; 1606 while ((*stringp != '\0') && 1607 (test_char_against_old_ascii_class(*stringp, regexp, 1608 test_condition) == CONDITION_TRUE) && 1609 (nextra_matches_allowed > 0)) { 1610 nextra_matches_allowed--; 1611 stringp++; 1612 } 1613 regexp += (int)*regexp + 2; 1614 return (test_repeated_ascii_char(repeat_startp, stringp, 1615 regexp)); 1616 } 1617 /* end case IN_OLD_ASCII_CHAR_CLASS|COUNT */ 1618 1619 case ZERO_OR_MORE_GROUP: /* (.....)* */ 1620 case ZERO_OR_MORE_GROUP|ADD_256_TO_GROUP_LENGTH: 1621 case ZERO_OR_MORE_GROUP|ADD_512_TO_GROUP_LENGTH: 1622 case ZERO_OR_MORE_GROUP|ADD_768_TO_GROUP_LENGTH: 1623 1624 /* 1625 * encoded as <ZERO_OR_MORE_GROUP|ADDED_LENGTH_BITS>\ 1626 * <group_length><compiled_regex...>\ 1627 * <END_GROUP|ZERO_OR_MORE><groupn> 1628 * 1629 * NOTE: 1630 * 1631 * group_length + (256 * ADDED_LENGTH_BITS) == 1632 * length_of(<compiled_regex...><END_GROUP|ZERO_OR_MORE>\ 1633 * <groupn>) 1634 * 1635 */ 1636 1637 group_length = 1638 (((unsigned int)*regexp & ADDED_LENGTH_BITS) << 1639 TIMES_256_SHIFT); 1640 regexp++; 1641 group_length += (unsigned int)*regexp; 1642 regexp++; 1643 repeat_startp = stringp; 1644 test_stringp = test_string(stringp, regexp); 1645 while (test_stringp != (char *)0) { 1646 if (push_stringp(stringp) == (char *)0) 1647 return ((char *)0); 1648 stringp = test_stringp; 1649 test_stringp = test_string(stringp, regexp); 1650 } 1651 regexp += group_length; 1652 return (test_repeated_group(repeat_startp, stringp, regexp)); 1653 1654 /* end case ZERO_OR_MORE_GROUP */ 1655 1656 case END_GROUP|ZERO_OR_MORE: /* (.....)* */ 1657 1658 /* encoded as <END_GROUP|ZERO_OR_MORE> */ 1659 1660 /* return from recursive call to test_string() */ 1661 1662 return ((char *)stringp); 1663 1664 /* end case END_GROUP|ZERO_OR_MORE */ 1665 1666 case ONE_OR_MORE_GROUP: /* (.....)+ */ 1667 case ONE_OR_MORE_GROUP|ADD_256_TO_GROUP_LENGTH: 1668 case ONE_OR_MORE_GROUP|ADD_512_TO_GROUP_LENGTH: 1669 case ONE_OR_MORE_GROUP|ADD_768_TO_GROUP_LENGTH: 1670 1671 /* 1672 * encoded as <ONE_OR_MORE_GROUP|ADDED_LENGTH_BITS>\ 1673 * <group_length><compiled_regex...>\ 1674 * <END_GROUP|ONE_OR_MORE><groupn> 1675 * 1676 * NOTE: 1677 * 1678 * group_length + (256 * ADDED_LENGTH_BITS) == 1679 * length_of(<compiled_regex...><END_GROUP|ONE_OR_MORE>\ 1680 * <groupn>) 1681 */ 1682 1683 group_length = 1684 (((unsigned int)*regexp & ADDED_LENGTH_BITS) << 1685 TIMES_256_SHIFT); 1686 regexp++; 1687 group_length += (unsigned int)*regexp; 1688 regexp++; 1689 stringp = test_string(stringp, regexp); 1690 if (stringp == (char *)0) 1691 return ((char *)0); 1692 repeat_startp = stringp; 1693 test_stringp = test_string(stringp, regexp); 1694 while (test_stringp != (char *)0) { 1695 if (push_stringp(stringp) == (char *)0) 1696 return ((char *)0); 1697 stringp = test_stringp; 1698 test_stringp = test_string(stringp, regexp); 1699 } 1700 regexp += group_length; 1701 return (test_repeated_group(repeat_startp, stringp, regexp)); 1702 1703 /* end case ONE_OR_MORE_GROUP */ 1704 1705 case END_GROUP|ONE_OR_MORE: /* (.....)+ */ 1706 1707 /* encoded as <END_GROUP|ONE_OR_MORE><groupn> */ 1708 1709 /* return from recursive call to test_string() */ 1710 1711 return ((char *)stringp); 1712 1713 /* end case END_GROUP|ONE_OR_MORE */ 1714 1715 case COUNTED_GROUP: /* (.....){max_count,min_count} */ 1716 case COUNTED_GROUP|ADD_256_TO_GROUP_LENGTH: 1717 case COUNTED_GROUP|ADD_512_TO_GROUP_LENGTH: 1718 case COUNTED_GROUP|ADD_768_TO_GROUP_LENGTH: 1719 1720 /* 1721 * encoded as <COUNTED_GROUP|ADDED_LENGTH_BITS><group_length>\ 1722 * <compiled_regex...>\<END_GROUP|COUNT><groupn>\ 1723 * <minimum_match_count><maximum_match_count> 1724 * 1725 * NOTE: 1726 * 1727 * group_length + (256 * ADDED_LENGTH_BITS) == 1728 * length_of(<compiled_regex...><END_GROUP|COUNT><groupn>) 1729 * 1730 * but does not include the <minimum_match_count> or 1731 * <maximum_match_count> bytes 1732 */ 1733 1734 group_length = 1735 (((unsigned int)*regexp & ADDED_LENGTH_BITS) << 1736 TIMES_256_SHIFT); 1737 regexp++; 1738 group_length += (unsigned int)*regexp; 1739 regexp++; 1740 get_match_counts(&nmust_match, &nextra_matches_allowed, 1741 regexp + group_length); 1742 test_stringp = test_string(stringp, regexp); 1743 while ((test_stringp != (char *)0) && (nmust_match > 0)) { 1744 stringp = test_stringp; 1745 nmust_match--; 1746 test_stringp = test_string(stringp, regexp); 1747 } 1748 if (nmust_match > 0) { 1749 return ((char *)0); 1750 } else if (nextra_matches_allowed == UNLIMITED) { 1751 repeat_startp = stringp; 1752 while (test_stringp != (char *)0) { 1753 if (push_stringp(stringp) == (char *)0) 1754 return ((char *)0); 1755 stringp = test_stringp; 1756 test_stringp = test_string(stringp, regexp); 1757 } 1758 regexp += group_length + 2; 1759 return (test_repeated_group(repeat_startp, stringp, 1760 regexp)); 1761 } else { 1762 repeat_startp = stringp; 1763 while ((test_stringp != (char *)0) && 1764 (nextra_matches_allowed > 0)) { 1765 nextra_matches_allowed--; 1766 if (push_stringp(stringp) == (char *)0) 1767 return ((char *)0); 1768 stringp = test_stringp; 1769 test_stringp = test_string(stringp, regexp); 1770 } 1771 regexp += group_length + 2; 1772 return (test_repeated_group(repeat_startp, stringp, 1773 regexp)); 1774 } 1775 /* end case COUNTED_GROUP */ 1776 1777 case END_GROUP|COUNT: /* (.....){max_count,min_count} */ 1778 1779 /* encoded as <END_GROUP|COUNT> */ 1780 1781 /* return from recursive call to test_string() */ 1782 1783 return (stringp); 1784 1785 /* end case END_GROUP|COUNT */ 1786 1787 case END_OF_STRING_MARK: 1788 1789 /* encoded as <END_OF_STRING_MARK><END_REGEX> */ 1790 1791 if (*stringp == '\0') { 1792 regexp++; 1793 } else { 1794 return ((char *)0); 1795 } 1796 break; /* end case END_OF_STRING_MARK */ 1797 1798 case END_REGEX: /* end of the compiled regular expression */ 1799 1800 /* encoded as <END_REGEX> */ 1801 1802 return (stringp); 1803 1804 /* end case END_REGEX */ 1805 1806 default: 1807 1808 return ((char *)0); 1809 1810 } /* end switch (*regexp) */ 1811 1812 } /* end for (;;) */ 1813 1814 } /* test_string() */ 1815