1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 /* 33 * IMPORTANT NOTE: 34 * 35 * regex() WORKS **ONLY** WITH THE ASCII AND THE Solaris EUC CHARACTER SETS. 36 * IT IS **NOT** CHARACTER SET INDEPENDENT. 37 * 38 */ 39 40 #pragma weak _regex = regex 41 42 #include "lint.h" 43 /* CONSTANTS SHARED WITH regcmp() */ 44 #include "regex.h" 45 #include "mtlib.h" 46 #include <limits.h> 47 #include <stdarg.h> 48 #include <stdlib.h> 49 #include <thread.h> 50 #include <widec.h> 51 #include "tsd.h" 52 53 54 /* PRIVATE CONSTANTS */ 55 56 #define ADD_256_TO_GROUP_LENGTH 0x1 57 #define ADD_512_TO_GROUP_LENGTH 0x2 58 #define ADD_768_TO_GROUP_LENGTH 0x3 59 #define ADDED_LENGTH_BITS 0x3 60 #define SINGLE_BYTE_MASK 0xff 61 #define STRINGP_STACK_SIZE 50 62 63 64 /* PRIVATE TYPE DEFINITIONS */ 65 66 typedef enum { 67 NOT_IN_CLASS = 0, 68 IN_CLASS 69 } char_test_condition_t; 70 71 typedef enum { 72 TESTING_CHAR = 0, 73 CONDITION_TRUE, 74 CONDITION_FALSE, 75 CHAR_TEST_ERROR 76 } char_test_result_t; 77 78 79 /* PRIVATE GLOBAL VARIABLES */ 80 81 static mutex_t regex_lock = DEFAULTMUTEX; 82 static int return_arg_number[NSUBSTRINGS]; 83 static const char *substring_endp[NSUBSTRINGS]; 84 static const char *substring_startp[NSUBSTRINGS]; 85 static const char *stringp_stack[STRINGP_STACK_SIZE]; 86 static const char **stringp_stackp; 87 88 89 /* DECLARATIONS OF PRIVATE FUNCTIONS */ 90 91 static int 92 get_wchar(wchar_t *wcharp, 93 const char *stringp); 94 95 static void 96 get_match_counts(int *nmust_matchp, 97 int *nextra_matches_allowedp, 98 const char *count_stringp); 99 100 static boolean_t 101 in_wchar_range(wchar_t test_char, 102 wchar_t lower_char, 103 wchar_t upper_char); 104 105 static const char * 106 pop_stringp(void); 107 108 static const char * 109 previous_charp(const char *current_charp); 110 111 static const char * 112 push_stringp(const char *stringp); 113 114 static char_test_result_t 115 test_char_against_ascii_class(char test_char, 116 const char *classp, 117 char_test_condition_t test_condition); 118 119 static char_test_result_t 120 test_char_against_multibyte_class(wchar_t test_char, 121 const char *classp, 122 char_test_condition_t test_condition); 123 124 125 /* FOR COMPATIBILITY WITH PREVIOUS ASCII VERSIONS OF regcmp() */ 126 127 static char_test_result_t 128 test_char_against_old_ascii_class(char test_char, 129 const char *classp, 130 char_test_condition_t test_condition); 131 132 static const char * 133 test_repeated_ascii_char(const char *repeat_startp, 134 const char *stringp, 135 const char *regexp); 136 137 static const char * 138 test_repeated_multibyte_char(const char *repeat_startp, 139 const char *stringp, 140 const char *regexp); 141 142 static const char * 143 test_repeated_group(const char *repeat_startp, 144 const char *stringp, 145 const char *regexp); 146 147 static const char * 148 test_string(const char *stringp, 149 const char *regexp); 150 151 152 /* DEFINITIONS OF PUBLIC VARIABLES */ 153 154 char *__loc1; 155 156 /* 157 * reserve thread-specific storage for __loc1 158 */ 159 char ** 160 ____loc1(void) 161 { 162 if (thr_main()) 163 return (&__loc1); 164 return ((char **)tsdalloc(_T_REGEX_LOC1, sizeof (char *), NULL)); 165 } 166 167 #define __loc1 (*(____loc1())) 168 169 /* DEFINITION OF regex() */ 170 171 extern char * 172 regex(const char *regexp, const char *stringp, ...) 173 { 174 va_list arg_listp; 175 int char_size; 176 const char *end_of_matchp; 177 wchar_t regex_wchar; 178 char *return_argp[NSUBSTRINGS]; 179 char *returned_substringp; 180 int substringn; 181 const char *substringp; 182 wchar_t string_wchar; 183 184 if (____loc1() == (char **)0) { 185 return ((char *)0); 186 } else { 187 lmutex_lock(®ex_lock); 188 __loc1 = (char *)0; 189 } 190 191 if ((stringp == (char *)0) || (regexp == (char *)0)) { 192 lmutex_unlock(®ex_lock); 193 return ((char *)0); 194 } 195 196 197 /* INITIALIZE SUBSTRINGS THAT MIGHT BE RETURNED IN VARARGS */ 198 199 substringn = 0; 200 va_start(arg_listp, stringp); 201 while (substringn < NSUBSTRINGS) { 202 return_argp[substringn] = va_arg(arg_listp, char *); 203 substring_startp[substringn] = (char *)0; 204 return_arg_number[substringn] = -1; 205 substringn++; 206 } 207 va_end(arg_listp); 208 209 210 /* TEST THE STRING AGAINST THE REGULAR EXPRESSION */ 211 212 end_of_matchp = (char *)0; 213 stringp_stackp = &stringp_stack[STRINGP_STACK_SIZE]; 214 215 if ((int)*regexp == (int)START_OF_STRING_MARK) { 216 217 /* 218 * the match must start at the beginning of the string 219 */ 220 221 __loc1 = (char *)stringp; 222 regexp++; 223 end_of_matchp = test_string(stringp, regexp); 224 225 } else if ((int)*regexp == (int)ASCII_CHAR) { 226 227 /* 228 * test a string against a regular expression 229 * that starts with a single ASCII character: 230 * 231 * move to each character in the string that matches 232 * the first character in the regular expression 233 * and test the remaining string 234 */ 235 236 while ((*stringp != *(regexp + 1)) && (*stringp != '\0')) { 237 stringp++; 238 } 239 while ((end_of_matchp == (char *)0) && (*stringp != '\0')) { 240 end_of_matchp = test_string(stringp, regexp); 241 if (end_of_matchp != (char *)0) { 242 __loc1 = (char *)stringp; 243 } else { 244 stringp++; 245 while ((*stringp != *(regexp + 1)) && (*stringp != '\0')) { 246 stringp++; 247 } 248 } 249 } 250 251 } else if (!multibyte) { 252 253 /* 254 * if the value of the "multibyte" macro defined in <euc.h> 255 * is false, regex() is running in an ASCII locale; 256 * test an ASCII string against an ASCII regular expression 257 * that doesn't start with a single ASCII character: 258 * 259 * move forward in the string one byte at a time, testing 260 * the remaining string against the regular expression 261 */ 262 263 end_of_matchp = test_string(stringp, regexp); 264 while ((end_of_matchp == (char *)0) && (*stringp != '\0')) { 265 stringp++; 266 end_of_matchp = test_string(stringp, regexp); 267 } 268 if (end_of_matchp != (char *)0) { 269 __loc1 = (char *)stringp; 270 } 271 272 } else if ((int)*regexp == (int)MULTIBYTE_CHAR) { 273 274 /* 275 * test a multibyte string against a multibyte regular expression 276 * that starts with a single multibyte character: 277 * 278 * move to each character in the string that matches 279 * the first character in the regular expression 280 * and test the remaining string 281 */ 282 283 (void) get_wchar(®ex_wchar, regexp + 1); 284 char_size = get_wchar(&string_wchar, stringp); 285 while ((string_wchar != regex_wchar) && (char_size > 0)) { 286 stringp += char_size; 287 char_size = get_wchar(&string_wchar, stringp); 288 } 289 while ((end_of_matchp == (char *)0) && (char_size > 0)) { 290 end_of_matchp = test_string(stringp, regexp); 291 if (end_of_matchp != (char *)0) { 292 __loc1 = (char *)stringp; 293 } else { 294 stringp += char_size; 295 char_size = get_wchar(&string_wchar, stringp); 296 while ((string_wchar != regex_wchar) && (char_size > 0)) { 297 stringp += char_size; 298 char_size = get_wchar(&string_wchar, stringp); 299 } 300 } 301 } 302 303 } else { 304 305 /* 306 * test a multibyte string against a multibyte regular expression 307 * that doesn't start with a single multibyte character 308 * 309 * move forward in the string one multibyte character at a time, 310 * testing the remaining string against the regular expression 311 */ 312 313 end_of_matchp = test_string(stringp, regexp); 314 char_size = get_wchar(&string_wchar, stringp); 315 while ((end_of_matchp == (char *)0) && (char_size > 0)) { 316 stringp += char_size; 317 end_of_matchp = test_string(stringp, regexp); 318 char_size = get_wchar(&string_wchar, stringp); 319 } 320 if (end_of_matchp != (char *)0) { 321 __loc1 = (char *)stringp; 322 } 323 } 324 325 /* 326 * Return substrings that matched subexpressions for which 327 * matching substrings are to be returned. 328 * 329 * NOTE: 330 * 331 * According to manual page regcmp(3G), regex() returns substrings 332 * that match subexpressions even when no substring matches the 333 * entire regular expression. 334 */ 335 336 substringn = 0; 337 while (substringn < NSUBSTRINGS) { 338 substringp = substring_startp[substringn]; 339 if ((substringp != (char *)0) && 340 (return_arg_number[substringn] >= 0)) { 341 returned_substringp = 342 return_argp[return_arg_number[substringn]]; 343 if (returned_substringp != (char *)0) { 344 while (substringp < substring_endp[substringn]) { 345 *returned_substringp = (char)*substringp; 346 returned_substringp++; 347 substringp++; 348 } 349 *returned_substringp = '\0'; 350 } 351 } 352 substringn++; 353 } 354 lmutex_unlock(®ex_lock); 355 return ((char *)end_of_matchp); 356 } /* regex() */ 357 358 359 /* DEFINITIONS OF PRIVATE FUNCTIONS */ 360 361 static int 362 get_wchar(wchar_t *wcharp, 363 const char *stringp) 364 { 365 int char_size; 366 367 if (stringp == (char *)0) { 368 char_size = 0; 369 *wcharp = (wchar_t)((unsigned int)'\0'); 370 } else if (*stringp == '\0') { 371 char_size = 0; 372 *wcharp = (wchar_t)((unsigned int)*stringp); 373 } else if ((unsigned char)*stringp <= (unsigned char)0x7f) { 374 char_size = 1; 375 *wcharp = (wchar_t)((unsigned int)*stringp); 376 } else { 377 char_size = mbtowc(wcharp, stringp, MB_LEN_MAX); 378 } 379 return (char_size); 380 } 381 382 static void 383 get_match_counts(int *nmust_matchp, 384 int *nextra_matches_allowedp, 385 const char *count_stringp) 386 { 387 int minimum_match_count; 388 int maximum_match_count; 389 390 minimum_match_count = 391 (int)((unsigned int)*count_stringp & SINGLE_BYTE_MASK); 392 *nmust_matchp = minimum_match_count; 393 394 count_stringp++; 395 maximum_match_count = 396 (int)((unsigned int)*count_stringp & SINGLE_BYTE_MASK); 397 if (maximum_match_count == (int)UNLIMITED) { 398 *nextra_matches_allowedp = (int)UNLIMITED; 399 } else { 400 *nextra_matches_allowedp = 401 maximum_match_count - minimum_match_count; 402 } 403 return; 404 405 } /* get_match_counts() */ 406 407 static boolean_t 408 in_wchar_range(wchar_t test_char, 409 wchar_t lower_char, 410 wchar_t upper_char) 411 { 412 return (((lower_char <= 0x7f) && (upper_char <= 0x7f) && 413 (lower_char <= test_char) && (test_char <= upper_char)) || 414 (((test_char & WCHAR_CSMASK) == (lower_char & WCHAR_CSMASK)) && 415 ((test_char & WCHAR_CSMASK) == (upper_char & WCHAR_CSMASK)) && 416 (lower_char <= test_char) && (test_char <= upper_char))); 417 418 } /* in_wchar_range() */ 419 420 static const char * 421 pop_stringp(void) 422 { 423 const char *stringp; 424 425 if (stringp_stackp >= &stringp_stack[STRINGP_STACK_SIZE]) { 426 return ((char *)0); 427 } else { 428 stringp = *stringp_stackp; 429 stringp_stackp++; 430 return (stringp); 431 } 432 } 433 434 435 static const char * 436 previous_charp(const char *current_charp) 437 { 438 /* 439 * returns the pointer to the previous character in 440 * a string of multibyte characters 441 */ 442 443 const char *prev_cs0 = current_charp - 1; 444 const char *prev_cs1 = current_charp - eucw1; 445 const char *prev_cs2 = current_charp - eucw2 - 1; 446 const char *prev_cs3 = current_charp - eucw3 - 1; 447 const char *prev_charp; 448 449 if ((unsigned char)*prev_cs0 <= 0x7f) { 450 prev_charp = prev_cs0; 451 } else if ((unsigned char)*prev_cs2 == SS2) { 452 prev_charp = prev_cs2; 453 } else if ((unsigned char)*prev_cs3 == SS3) { 454 prev_charp = prev_cs3; 455 } else { 456 prev_charp = prev_cs1; 457 } 458 return (prev_charp); 459 460 } /* previous_charp() */ 461 462 static const char * 463 push_stringp(const char *stringp) 464 { 465 if (stringp_stackp <= &stringp_stack[0]) { 466 return ((char *)0); 467 } else { 468 stringp_stackp--; 469 *stringp_stackp = stringp; 470 return (stringp); 471 } 472 } 473 474 475 static char_test_result_t 476 test_char_against_ascii_class(char test_char, 477 const char *classp, 478 char_test_condition_t test_condition) 479 { 480 /* 481 * tests a character for membership in an ASCII character class compiled 482 * by the internationalized version of regcmp(); 483 * 484 * NOTE: The internationalized version of regcmp() compiles 485 * the range a-z in an ASCII character class to aTHRUz. 486 */ 487 488 int nbytes_to_check; 489 490 nbytes_to_check = (int)*classp; 491 classp++; 492 nbytes_to_check--; 493 494 while (nbytes_to_check > 0) { 495 if (test_char == *classp) { 496 if (test_condition == IN_CLASS) 497 return (CONDITION_TRUE); 498 else 499 return (CONDITION_FALSE); 500 } else if (*classp == THRU) { 501 if ((*(classp - 1) <= test_char) && 502 (test_char <= *(classp + 1))) { 503 if (test_condition == IN_CLASS) 504 return (CONDITION_TRUE); 505 else 506 return (CONDITION_FALSE); 507 } else { 508 classp += 2; 509 nbytes_to_check -= 2; 510 } 511 } else { 512 classp++; 513 nbytes_to_check--; 514 } 515 } 516 if (test_condition == NOT_IN_CLASS) { 517 return (CONDITION_TRUE); 518 } else { 519 return (CONDITION_FALSE); 520 } 521 } /* test_char_against_ascii_class() */ 522 523 static char_test_result_t 524 test_char_against_multibyte_class(wchar_t test_char, 525 const char *classp, 526 char_test_condition_t test_condition) 527 { 528 /* 529 * tests a character for membership in a multibyte character class; 530 * 531 * NOTE: The range a-z in a multibyte character class compiles to 532 * aTHRUz. 533 */ 534 535 int char_size; 536 wchar_t current_char; 537 int nbytes_to_check; 538 wchar_t previous_char; 539 540 nbytes_to_check = (int)*classp; 541 classp++; 542 nbytes_to_check--; 543 544 char_size = get_wchar(¤t_char, classp); 545 if (char_size <= 0) { 546 return (CHAR_TEST_ERROR); 547 } else if (test_char == current_char) { 548 if (test_condition == IN_CLASS) { 549 return (CONDITION_TRUE); 550 } else { 551 return (CONDITION_FALSE); 552 } 553 } else { 554 classp += char_size; 555 nbytes_to_check -= char_size; 556 } 557 558 while (nbytes_to_check > 0) { 559 previous_char = current_char; 560 char_size = get_wchar(¤t_char, classp); 561 if (char_size <= 0) { 562 return (CHAR_TEST_ERROR); 563 } else if (test_char == current_char) { 564 if (test_condition == IN_CLASS) { 565 return (CONDITION_TRUE); 566 } else { 567 return (CONDITION_FALSE); 568 } 569 } else if (current_char == THRU) { 570 classp += char_size; 571 nbytes_to_check -= char_size; 572 char_size = get_wchar(¤t_char, classp); 573 if (char_size <= 0) { 574 return (CHAR_TEST_ERROR); 575 } else if (in_wchar_range(test_char, previous_char, 576 current_char)) { 577 if (test_condition == IN_CLASS) { 578 return (CONDITION_TRUE); 579 } else { 580 return (CONDITION_FALSE); 581 } 582 } else { 583 classp += char_size; 584 nbytes_to_check -= char_size; 585 } 586 } else { 587 classp += char_size; 588 nbytes_to_check -= char_size; 589 } 590 } 591 if (test_condition == NOT_IN_CLASS) { 592 return (CONDITION_TRUE); 593 } else { 594 return (CONDITION_FALSE); 595 } 596 } /* test_char_against_multibyte_class() */ 597 598 599 /* FOR COMPATIBILITY WITH PREVIOUS ASCII VERSIONS OF regcmp() */ 600 601 static char_test_result_t 602 test_char_against_old_ascii_class(char test_char, 603 const char *classp, 604 char_test_condition_t test_condition) 605 { 606 /* 607 * tests a character for membership in an ASCII character class compiled 608 * by the ASCII version of regcmp(); 609 * 610 * NOTE: ASCII versions of regcmp() compile the range a-z in an 611 * ASCII character class to THRUaz. The internationalized 612 * version compiles the same range to aTHRUz. 613 */ 614 615 int nbytes_to_check; 616 617 nbytes_to_check = (int)*classp; 618 classp++; 619 nbytes_to_check--; 620 621 while (nbytes_to_check > 0) { 622 if (test_char == *classp) { 623 if (test_condition == IN_CLASS) { 624 return (CONDITION_TRUE); 625 } else { 626 return (CONDITION_FALSE); 627 } 628 } else if (*classp == THRU) { 629 if ((*(classp + 1) <= test_char) && 630 (test_char <= *(classp + 2))) { 631 if (test_condition == IN_CLASS) { 632 return (CONDITION_TRUE); 633 } else { 634 return (CONDITION_FALSE); 635 } 636 } else { 637 classp += 3; 638 nbytes_to_check -= 3; 639 } 640 } else { 641 classp++; 642 nbytes_to_check--; 643 } 644 } 645 if (test_condition == NOT_IN_CLASS) { 646 return (CONDITION_TRUE); 647 } else { 648 return (CONDITION_FALSE); 649 } 650 } /* test_char_against_old_ascii_class() */ 651 652 static const char * 653 test_repeated_ascii_char(const char *repeat_startp, 654 const char *stringp, 655 const char *regexp) 656 { 657 const char *end_of_matchp; 658 659 end_of_matchp = test_string(stringp, regexp); 660 while ((end_of_matchp == (char *)0) && 661 (stringp > repeat_startp)) { 662 stringp--; 663 end_of_matchp = test_string(stringp, regexp); 664 } 665 return (end_of_matchp); 666 } 667 668 static const char * 669 test_repeated_multibyte_char(const char *repeat_startp, 670 const char *stringp, 671 const char *regexp) 672 { 673 const char *end_of_matchp; 674 675 end_of_matchp = test_string(stringp, regexp); 676 while ((end_of_matchp == (char *)0) && 677 (stringp > repeat_startp)) { 678 stringp = previous_charp(stringp); 679 end_of_matchp = test_string(stringp, regexp); 680 } 681 return (end_of_matchp); 682 } 683 684 static const char * 685 test_repeated_group(const char *repeat_startp, 686 const char *stringp, 687 const char *regexp) 688 { 689 const char *end_of_matchp; 690 691 end_of_matchp = test_string(stringp, regexp); 692 while ((end_of_matchp == (char *)0) && 693 (stringp > repeat_startp)) { 694 stringp = pop_stringp(); 695 if (stringp == (char *)0) { 696 return ((char *)0); 697 } 698 end_of_matchp = test_string(stringp, regexp); 699 } 700 return (end_of_matchp); 701 } 702 703 static const char * 704 test_string(const char *stringp, 705 const char *regexp) 706 { 707 /* 708 * returns a pointer to the first character following the first 709 * substring of the string addressed by stringp that matches 710 * the compiled regular expression addressed by regexp 711 */ 712 713 unsigned int group_length; 714 int nextra_matches_allowed; 715 int nmust_match; 716 wchar_t regex_wchar; 717 int regex_char_size; 718 const char *repeat_startp; 719 unsigned int return_argn; 720 wchar_t string_wchar; 721 int string_char_size; 722 unsigned int substringn; 723 char_test_condition_t test_condition; 724 const char *test_stringp; 725 726 for (;;) { 727 728 /* 729 * Exit the loop via a return whenever there's a match 730 * or it's clear that there can be no match. 731 */ 732 733 switch ((int)*regexp) { 734 735 /* 736 * No fall-through. 737 * Each case ends with either a return or with stringp 738 * addressing the next character to be tested and regexp 739 * addressing the next compiled regular expression 740 * 741 * NOTE: The comments for each case give the meaning 742 * of the compiled regular expression decoded by the case 743 * and the character string that the compiled regular 744 * expression uses to encode the case. Each single 745 * character encoded in the compiled regular expression 746 * is shown enclosed in angle brackets (<>). Each 747 * compiled regular expression begins with a marker 748 * character which is shown as a named constant 749 * (e.g. <ASCII_CHAR>). Character constants are shown 750 * enclosed in single quotes (e.g. <'$'>). All other 751 * single characters encoded in the compiled regular 752 * expression are shown as lower case variable names 753 * (e.g. <ascii_char> or <multibyte_char>). Multicharacter 754 * strings encoded in the compiled regular expression 755 * are shown as variable names followed by elipses 756 * (e.g. <compiled_regex...>). 757 */ 758 759 case ASCII_CHAR: /* single ASCII char */ 760 761 /* encoded as <ASCII_CHAR><ascii_char> */ 762 763 regexp++; 764 if (*regexp == *stringp) { 765 regexp++; 766 stringp++; 767 } else { 768 return ((char *)0); 769 } 770 break; /* end case ASCII_CHAR */ 771 772 case MULTIBYTE_CHAR: /* single multibyte char */ 773 774 /* encoded as <MULTIBYTE_CHAR><multibyte_char> */ 775 776 regexp++; 777 regex_char_size = get_wchar(®ex_wchar, regexp); 778 string_char_size = get_wchar(&string_wchar, stringp); 779 if ((string_char_size <= 0) || (string_wchar != regex_wchar)) { 780 return ((char *)0); 781 } else { 782 regexp += regex_char_size; 783 stringp += string_char_size; 784 } 785 break; /* end case MULTIBYTE_CHAR */ 786 787 case ANY_CHAR: /* any single ASCII or multibyte char */ 788 789 /* encoded as <ANY_CHAR> */ 790 791 if (!multibyte) { 792 if (*stringp == '\0') { 793 return ((char *)0); 794 } else { 795 regexp++; 796 stringp++; 797 } 798 } else { 799 string_char_size = get_wchar(&string_wchar, stringp); 800 if (string_char_size <= 0) { 801 return ((char *)0); 802 } else { 803 regexp++; 804 stringp += string_char_size; 805 } 806 } 807 break; /* end case ANY_CHAR */ 808 809 case IN_ASCII_CHAR_CLASS: /* [.....] */ 810 case NOT_IN_ASCII_CHAR_CLASS: 811 812 /* 813 * encoded as <IN_ASCII_CHAR_CLASS><class_length><class...> 814 * or <NOT_IN_ASCII_CHAR_CLASS><class_length><class...> 815 * 816 * NOTE: <class_length> includes the <class_length> byte 817 */ 818 819 if ((int)*regexp == (int)IN_ASCII_CHAR_CLASS) { 820 test_condition = IN_CLASS; 821 } else { 822 test_condition = NOT_IN_CLASS; 823 } 824 regexp++; /* point to the <class_length> byte */ 825 826 if ((*stringp != '\0') && 827 (test_char_against_ascii_class(*stringp, regexp, 828 test_condition) == CONDITION_TRUE)) { 829 regexp += (int)*regexp; /* add the class length to regexp */ 830 stringp++; 831 } else { 832 return ((char *)0); 833 } 834 break; /* end case IN_ASCII_CHAR_CLASS */ 835 836 case IN_MULTIBYTE_CHAR_CLASS: /* [....] */ 837 case NOT_IN_MULTIBYTE_CHAR_CLASS: 838 839 /* 840 * encoded as <IN_MULTIBYTE_CHAR_CLASS><class_length><class...> 841 * or <NOT_IN_MULTIBYTE_CHAR_CLASS><class_length><class...> 842 * 843 * NOTE: <class_length> includes the <class_length> byte 844 */ 845 846 if ((int)*regexp == (int)IN_MULTIBYTE_CHAR_CLASS) { 847 test_condition = IN_CLASS; 848 } else { 849 test_condition = NOT_IN_CLASS; 850 } 851 regexp++; /* point to the <class_length> byte */ 852 853 string_char_size = get_wchar(&string_wchar, stringp); 854 if ((string_char_size > 0) && 855 (test_char_against_multibyte_class(string_wchar, regexp, 856 test_condition) == CONDITION_TRUE)) { 857 regexp += (int)*regexp; /* add the class length to regexp */ 858 stringp += string_char_size; 859 } else { 860 return ((char *)0); 861 } 862 break; /* end case IN_MULTIBYTE_CHAR_CLASS */ 863 864 case IN_OLD_ASCII_CHAR_CLASS: /* [...] */ 865 case NOT_IN_OLD_ASCII_CHAR_CLASS: 866 867 /* 868 * encoded as <IN_OLD_ASCII_CHAR_CLASS><class_length><class...> 869 * or <NOT_IN_OLD_ASCII_CHAR_CLASS><class_length><class...> 870 * 871 * NOTE: <class_length> includes the <class_length> byte 872 */ 873 874 if ((int)*regexp == (int)IN_OLD_ASCII_CHAR_CLASS) { 875 test_condition = IN_CLASS; 876 } else { 877 test_condition = NOT_IN_CLASS; 878 } 879 regexp++; /* point to the <class_length> byte */ 880 881 if ((*stringp != '\0') && 882 (test_char_against_old_ascii_class(*stringp, regexp, 883 test_condition) == CONDITION_TRUE)) { 884 regexp += (int)*regexp; /* add the class length to regexp */ 885 stringp++; 886 } else { 887 return ((char *)0); 888 } 889 break; /* end case [NOT_]IN_OLD_ASCII_CHAR_CLASS */ 890 891 case SIMPLE_GROUP: /* (.....) */ 892 893 /* encoded as <SIMPLE_GROUP><group_length> */ 894 895 regexp += 2; 896 break; /* end case SIMPLE_GROUP */ 897 898 case END_GROUP: /* (.....) */ 899 900 /* encoded as <END_GROUP><groupn> */ 901 902 regexp += 2; 903 break; /* end case END_GROUP */ 904 905 case SAVED_GROUP: /* (.....)$0-9 */ 906 907 /* encoded as <SAVED_GROUP><substringn> */ 908 909 regexp++; 910 substringn = (unsigned int)*regexp; 911 if (substringn >= NSUBSTRINGS) 912 return ((char *)0); 913 substring_startp[substringn] = stringp; 914 regexp++; 915 break; /* end case SAVED_GROUP */ 916 917 case END_SAVED_GROUP: /* (.....)$0-9 */ 918 919 /* 920 * encoded as <END_SAVED_GROUP><substringn>\ 921 * <return_arg_number[substringn]> 922 */ 923 924 regexp++; 925 substringn = (unsigned int)*regexp; 926 if (substringn >= NSUBSTRINGS) 927 return ((char *)0); 928 substring_endp[substringn] = stringp; 929 regexp++; 930 return_argn = (unsigned int)*regexp; 931 if (return_argn >= NSUBSTRINGS) 932 return ((char *)0); 933 return_arg_number[substringn] = return_argn; 934 regexp++; 935 break; /* end case END_SAVED_GROUP */ 936 937 case ASCII_CHAR|ZERO_OR_MORE: /* char* */ 938 939 /* encoded as <ASCII_CHAR|ZERO_OR_MORE><ascii_char> */ 940 941 regexp++; 942 repeat_startp = stringp; 943 while (*stringp == *regexp) { 944 stringp++; 945 } 946 regexp++; 947 return (test_repeated_ascii_char(repeat_startp, 948 stringp, regexp)); 949 950 /* end case ASCII_CHAR|ZERO_OR_MORE */ 951 952 case ASCII_CHAR|ONE_OR_MORE: /* char+ */ 953 954 /* encoded as <ASCII_CHAR|ONE_OR_MORE><ascii_char> */ 955 956 regexp++; 957 if (*stringp != *regexp) { 958 return ((char *)0); 959 } else { 960 stringp++; 961 repeat_startp = stringp; 962 while (*stringp == *regexp) { 963 stringp++; 964 } 965 regexp++; 966 return (test_repeated_ascii_char(repeat_startp, stringp, 967 regexp)); 968 } 969 /* end case ASCII_CHAR|ONE_OR_MORE */ 970 971 case ASCII_CHAR|COUNT: /* char{min_count,max_count} */ 972 973 /* 974 * encoded as <ASCII_CHAR|COUNT><ascii_char>\ 975 * <minimum_match_count><maximum_match_count> 976 */ 977 978 regexp++; 979 get_match_counts(&nmust_match, &nextra_matches_allowed, 980 regexp + 1); 981 while ((*stringp == *regexp) && (nmust_match > 0)) { 982 nmust_match--; 983 stringp++; 984 } 985 if (nmust_match > 0) { 986 return ((char *)0); 987 } else if (nextra_matches_allowed == UNLIMITED) { 988 repeat_startp = stringp; 989 while (*stringp == *regexp) { 990 stringp++; 991 } 992 regexp += 3; 993 return (test_repeated_ascii_char(repeat_startp, stringp, 994 regexp)); 995 } else { 996 repeat_startp = stringp; 997 while ((*stringp == *regexp) && 998 (nextra_matches_allowed > 0)) { 999 nextra_matches_allowed--; 1000 stringp++; 1001 } 1002 regexp += 3; 1003 return (test_repeated_ascii_char(repeat_startp, stringp, 1004 regexp)); 1005 } 1006 /* end case ASCII_CHAR|COUNT */ 1007 1008 case MULTIBYTE_CHAR|ZERO_OR_MORE: /* char* */ 1009 1010 /* encoded as <MULTIBYTE_CHAR|ZERO_OR_MORE><multibyte_char> */ 1011 1012 regexp++; 1013 regex_char_size = get_wchar(®ex_wchar, regexp); 1014 repeat_startp = stringp; 1015 string_char_size = get_wchar(&string_wchar, stringp); 1016 while ((string_char_size > 0) && 1017 (string_wchar == regex_wchar)) { 1018 stringp += string_char_size; 1019 string_char_size = get_wchar(&string_wchar, stringp); 1020 } 1021 regexp += regex_char_size; 1022 return (test_repeated_multibyte_char(repeat_startp, stringp, 1023 regexp)); 1024 1025 /* end case MULTIBYTE_CHAR|ZERO_OR_MORE */ 1026 1027 case MULTIBYTE_CHAR|ONE_OR_MORE: /* char+ */ 1028 1029 /* encoded as <MULTIBYTE_CHAR|ONE_OR_MORE><multibyte_char> */ 1030 1031 regexp++; 1032 regex_char_size = get_wchar(®ex_wchar, regexp); 1033 string_char_size = get_wchar(&string_wchar, stringp); 1034 if ((string_char_size <= 0) || (string_wchar != regex_wchar)) { 1035 return ((char *)0); 1036 } else { 1037 stringp += string_char_size; 1038 repeat_startp = stringp; 1039 string_char_size = get_wchar(&string_wchar, stringp); 1040 while ((string_char_size > 0) && 1041 (string_wchar == regex_wchar)) { 1042 stringp += string_char_size; 1043 string_char_size = get_wchar(&string_wchar, stringp); 1044 } 1045 regexp += regex_char_size; 1046 return (test_repeated_multibyte_char(repeat_startp, stringp, 1047 regexp)); 1048 } 1049 /* end case MULTIBYTE_CHAR|ONE_OR_MORE */ 1050 1051 case MULTIBYTE_CHAR|COUNT: /* char{min_count,max_count} */ 1052 1053 /* 1054 * encoded as <MULTIBYTE_CHAR|COUNT><multibyte_char>\ 1055 * <minimum_match_count><maximum_match_count> 1056 */ 1057 1058 regexp++; 1059 regex_char_size = get_wchar(®ex_wchar, regexp); 1060 get_match_counts(&nmust_match, &nextra_matches_allowed, 1061 regexp + regex_char_size); 1062 string_char_size = get_wchar(&string_wchar, stringp); 1063 while ((string_char_size > 0) && 1064 (string_wchar == regex_wchar) && 1065 (nmust_match > 0)) { 1066 1067 nmust_match--; 1068 stringp += string_char_size; 1069 string_char_size = get_wchar(&string_wchar, stringp); 1070 } 1071 if (nmust_match > 0) { 1072 return ((char *)0); 1073 } else if (nextra_matches_allowed == UNLIMITED) { 1074 repeat_startp = stringp; 1075 while ((string_char_size > 0) && 1076 (string_wchar == regex_wchar)) { 1077 stringp += string_char_size; 1078 string_char_size = get_wchar(&string_wchar, stringp); 1079 } 1080 regexp += regex_char_size + 2; 1081 return (test_repeated_multibyte_char(repeat_startp, stringp, 1082 regexp)); 1083 } else { 1084 repeat_startp = stringp; 1085 while ((string_char_size > 0) && 1086 (string_wchar == regex_wchar) && 1087 (nextra_matches_allowed > 0)) { 1088 nextra_matches_allowed--; 1089 stringp += string_char_size; 1090 string_char_size = get_wchar(&string_wchar, stringp); 1091 } 1092 regexp += regex_char_size + 2; 1093 return (test_repeated_multibyte_char(repeat_startp, stringp, 1094 regexp)); 1095 } 1096 /* end case MULTIBYTE_CHAR|COUNT */ 1097 1098 case ANY_CHAR|ZERO_OR_MORE: /* .* */ 1099 1100 /* encoded as <ANY_CHAR|ZERO_OR_MORE> */ 1101 1102 repeat_startp = stringp; 1103 if (!multibyte) { 1104 while (*stringp != '\0') { 1105 stringp++; 1106 } 1107 regexp++; 1108 return (test_repeated_ascii_char(repeat_startp, stringp, 1109 regexp)); 1110 } else { 1111 string_char_size = get_wchar(&string_wchar, stringp); 1112 while (string_char_size > 0) { 1113 stringp += string_char_size; 1114 string_char_size = get_wchar(&string_wchar, stringp); 1115 } 1116 regexp++; 1117 return (test_repeated_multibyte_char(repeat_startp, stringp, 1118 regexp)); 1119 } 1120 /* end case <ANY_CHAR|ZERO_OR_MORE> */ 1121 1122 case ANY_CHAR|ONE_OR_MORE: /* .+ */ 1123 1124 /* encoded as <ANY_CHAR|ONE_OR_MORE> */ 1125 1126 if (!multibyte) { 1127 if (*stringp == '\0') { 1128 return ((char *)0); 1129 } else { 1130 stringp++; 1131 repeat_startp = stringp; 1132 while (*stringp != '\0') { 1133 stringp++; 1134 } 1135 regexp++; 1136 return (test_repeated_ascii_char(repeat_startp, stringp, 1137 regexp)); 1138 } 1139 } else { 1140 string_char_size = get_wchar(&string_wchar, stringp); 1141 if (string_char_size <= 0) { 1142 return ((char *)0); 1143 } else { 1144 stringp += string_char_size; 1145 repeat_startp = stringp; 1146 string_char_size = get_wchar(&string_wchar, stringp); 1147 while (string_char_size > 0) { 1148 stringp += string_char_size; 1149 string_char_size = 1150 get_wchar(&string_wchar, stringp); 1151 } 1152 regexp++; 1153 return (test_repeated_multibyte_char(repeat_startp, 1154 stringp, regexp)); 1155 } 1156 } 1157 /* end case <ANY_CHAR|ONE_OR_MORE> */ 1158 1159 case ANY_CHAR|COUNT: /* .{min_count,max_count} */ 1160 1161 /* 1162 * encoded as <ANY_CHAR|COUNT>\ 1163 * <minimum_match_count><maximum_match_count> 1164 */ 1165 1166 get_match_counts(&nmust_match, &nextra_matches_allowed, 1167 regexp + 1); 1168 if (!multibyte) { 1169 while ((*stringp != '\0') && (nmust_match > 0)) { 1170 nmust_match--; 1171 stringp++; 1172 } 1173 if (nmust_match > 0) { 1174 return ((char *)0); 1175 } else if (nextra_matches_allowed == UNLIMITED) { 1176 repeat_startp = stringp; 1177 while (*stringp != '\0') { 1178 stringp++; 1179 } 1180 regexp += 3; 1181 return (test_repeated_ascii_char(repeat_startp, stringp, 1182 regexp)); 1183 } else { 1184 repeat_startp = stringp; 1185 while ((*stringp != '\0') && 1186 (nextra_matches_allowed > 0)) { 1187 nextra_matches_allowed--; 1188 stringp++; 1189 } 1190 regexp += 3; 1191 return (test_repeated_ascii_char(repeat_startp, stringp, 1192 regexp)); 1193 } 1194 } else { /* multibyte character */ 1195 1196 string_char_size = get_wchar(&string_wchar, stringp); 1197 while ((string_char_size > 0) && (nmust_match > 0)) { 1198 nmust_match--; 1199 stringp += string_char_size; 1200 string_char_size = get_wchar(&string_wchar, stringp); 1201 } 1202 if (nmust_match > 0) { 1203 return ((char *)0); 1204 } else if (nextra_matches_allowed == UNLIMITED) { 1205 repeat_startp = stringp; 1206 while (string_char_size > 0) { 1207 stringp += string_char_size; 1208 string_char_size = 1209 get_wchar(&string_wchar, stringp); 1210 } 1211 regexp += 3; 1212 return (test_repeated_multibyte_char(repeat_startp, 1213 stringp, regexp)); 1214 } else { 1215 repeat_startp = stringp; 1216 while ((string_char_size > 0) && 1217 (nextra_matches_allowed > 0)) { 1218 nextra_matches_allowed--; 1219 stringp += string_char_size; 1220 string_char_size = 1221 get_wchar(&string_wchar, stringp); 1222 } 1223 regexp += 3; 1224 return (test_repeated_multibyte_char(repeat_startp, 1225 stringp, regexp)); 1226 } 1227 } /* end case ANY_CHAR|COUNT */ 1228 1229 case IN_ASCII_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */ 1230 case NOT_IN_ASCII_CHAR_CLASS|ZERO_OR_MORE: 1231 1232 /* 1233 * encoded as <IN_ASCII_CHAR_CLASS|ZERO_OR_MORE>\ 1234 * <class_length><class ...> 1235 * or <NOT_IN_ASCII_CHAR_CLASS|ZERO_OR_MORE>\ 1236 * <class_length><class ...> 1237 * 1238 * NOTE: <class_length> includes the <class_length> byte 1239 */ 1240 1241 if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|ZERO_OR_MORE)) { 1242 test_condition = IN_CLASS; 1243 } else { 1244 test_condition = NOT_IN_CLASS; 1245 } 1246 regexp++; /* point to the <class_length> byte */ 1247 1248 repeat_startp = stringp; 1249 while ((*stringp != '\0') && 1250 (test_char_against_ascii_class(*stringp, regexp, 1251 test_condition) == CONDITION_TRUE)) { 1252 stringp++; 1253 } 1254 regexp += (int)*regexp; /* add the class length to regexp */ 1255 return (test_repeated_ascii_char(repeat_startp, stringp, 1256 regexp)); 1257 1258 /* end case IN_ASCII_CHAR_CLASS|ZERO_OR_MORE */ 1259 1260 case IN_ASCII_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */ 1261 case NOT_IN_ASCII_CHAR_CLASS|ONE_OR_MORE: 1262 1263 /* 1264 * encoded as <IN_ASCII_CHAR_CLASS|ONE_OR_MORE>\ 1265 * <class_length><class ...> 1266 * or <NOT_IN_ASCII_CHAR_CLASS|ONE_OR_MORE>\ 1267 * <class_length><class ...> 1268 * 1269 * NOTE: <class_length> includes the <class_length> byte 1270 */ 1271 1272 if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|ONE_OR_MORE)) { 1273 test_condition = IN_CLASS; 1274 } else { 1275 test_condition = NOT_IN_CLASS; 1276 } 1277 regexp++; /* point to the <class_length> byte */ 1278 1279 if ((*stringp == '\0') || 1280 (test_char_against_ascii_class(*stringp, regexp, 1281 test_condition) != CONDITION_TRUE)) { 1282 return ((char *)0); 1283 } else { 1284 stringp++; 1285 repeat_startp = stringp; 1286 while ((*stringp != '\0') && 1287 (test_char_against_ascii_class(*stringp, regexp, 1288 test_condition) == CONDITION_TRUE)) { 1289 stringp++; 1290 } 1291 regexp += (int)*regexp; /* add the class length to regexp */ 1292 return (test_repeated_ascii_char(repeat_startp, stringp, 1293 regexp)); 1294 } 1295 /* end case IN_ASCII_CHAR_CLASS|ONE_OR_MORE */ 1296 1297 case IN_ASCII_CHAR_CLASS | COUNT: /* [.....]{max_count,min_count} */ 1298 case NOT_IN_ASCII_CHAR_CLASS | COUNT: 1299 1300 /* 1301 * endoded as <IN_ASCII_CHAR_CLASS|COUNT><class_length>\ 1302 * <class ...><minimum_match_count>\ 1303 * <maximum_match_count> 1304 * or <NOT_IN_ASCII_CHAR_CLASS|COUNT><class_length>\ 1305 * <class ...><minimum_match_count>\ 1306 * <maximum_match_count> 1307 * 1308 * NOTE: <class_length> includes the <class_length> byte, 1309 * but not the <minimum_match_count> or 1310 * <maximum_match_count> bytes 1311 */ 1312 1313 if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|COUNT)) { 1314 test_condition = IN_CLASS; 1315 } else { 1316 test_condition = NOT_IN_CLASS; 1317 } 1318 regexp++; /* point to the <class_length> byte */ 1319 1320 get_match_counts(&nmust_match, &nextra_matches_allowed, 1321 regexp + (int)*regexp); 1322 while ((*stringp != '\0') && 1323 (test_char_against_ascii_class(*stringp, regexp, 1324 test_condition) == CONDITION_TRUE) && 1325 (nmust_match > 0)) { 1326 nmust_match--; 1327 stringp++; 1328 } 1329 if (nmust_match > 0) { 1330 return ((char *)0); 1331 } else if (nextra_matches_allowed == UNLIMITED) { 1332 repeat_startp = stringp; 1333 while ((*stringp != '\0') && 1334 (test_char_against_ascii_class(*stringp, regexp, 1335 test_condition) == CONDITION_TRUE)) { 1336 stringp++; 1337 } 1338 regexp += (int)*regexp + 2; 1339 return (test_repeated_ascii_char(repeat_startp, stringp, 1340 regexp)); 1341 } else { 1342 repeat_startp = stringp; 1343 while ((*stringp != '\0') && 1344 (test_char_against_ascii_class(*stringp, regexp, 1345 test_condition) == CONDITION_TRUE) && 1346 (nextra_matches_allowed > 0)) { 1347 nextra_matches_allowed--; 1348 stringp++; 1349 } 1350 regexp += (int)*regexp + 2; 1351 return (test_repeated_ascii_char(repeat_startp, stringp, 1352 regexp)); 1353 } 1354 /* end case IN_ASCII_CHAR_CLASS|COUNT */ 1355 1356 case IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */ 1357 case NOT_IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE: 1358 1359 /* 1360 * encoded as <IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE>\ 1361 * <class_length><class ...> 1362 * or <NOT_IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE>\ 1363 * <class_length><class ...> 1364 * 1365 * NOTE: <class_length> includes the <class_length> byte 1366 */ 1367 1368 if ((int)*regexp == 1369 (int)(IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE)) { 1370 test_condition = IN_CLASS; 1371 } else { 1372 test_condition = NOT_IN_CLASS; 1373 } 1374 regexp++; /* point to the <class_length> byte */ 1375 1376 repeat_startp = stringp; 1377 string_char_size = get_wchar(&string_wchar, stringp); 1378 while ((string_char_size > 0) && 1379 (test_char_against_multibyte_class(string_wchar, regexp, 1380 test_condition) == CONDITION_TRUE)) { 1381 stringp += string_char_size; 1382 string_char_size = get_wchar(&string_wchar, stringp); 1383 } 1384 regexp += (int)*regexp; /* add the class length to regexp */ 1385 return (test_repeated_multibyte_char(repeat_startp, stringp, 1386 regexp)); 1387 1388 /* end case IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE */ 1389 1390 case IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */ 1391 case NOT_IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE: 1392 1393 /* 1394 * encoded as <IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE>\ 1395 * <class_length><class ...> 1396 * or <NOT_IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE>\ 1397 * <class_length><class ...> 1398 * 1399 * NOTE: <class_length> includes the <class_length> byte 1400 */ 1401 1402 if ((int)*regexp == 1403 (int)(IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE)) { 1404 test_condition = IN_CLASS; 1405 } else { 1406 test_condition = NOT_IN_CLASS; 1407 } 1408 regexp++; /* point to the <class_length> byte */ 1409 1410 string_char_size = get_wchar(&string_wchar, stringp); 1411 if ((string_char_size <= 0) || 1412 (test_char_against_multibyte_class(string_wchar, regexp, 1413 test_condition) != CONDITION_TRUE)) { 1414 return ((char *)0); 1415 } else { 1416 stringp += string_char_size; 1417 repeat_startp = stringp; 1418 string_char_size = get_wchar(&string_wchar, stringp); 1419 while ((string_char_size > 0) && 1420 (test_char_against_multibyte_class(string_wchar, 1421 regexp, test_condition) == CONDITION_TRUE)) { 1422 stringp += string_char_size; 1423 string_char_size = get_wchar(&string_wchar, stringp); 1424 } 1425 regexp += (int)*regexp; /* add the class length to regexp */ 1426 return (test_repeated_multibyte_char(repeat_startp, stringp, 1427 regexp)); 1428 } 1429 /* end case IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE */ 1430 1431 case IN_MULTIBYTE_CHAR_CLASS|COUNT: /* [...]{min_count,max_count} */ 1432 case NOT_IN_MULTIBYTE_CHAR_CLASS|COUNT: 1433 1434 /* 1435 * encoded as <IN_MULTIBYTE_CHAR_CLASS|COUNT>\ 1436 * <class_length><class ...><min_count><max_count> 1437 * or <NOT_IN_MULTIBYTE_CHAR_CLASS|COUNT>\ 1438 * <class_length><class ...><min_count><max_count> 1439 * 1440 * NOTE: <class_length> includes the <class_length> byte 1441 * but not the <minimum_match_count> or 1442 * <maximum_match_count> bytes 1443 */ 1444 1445 if ((int)*regexp == (int)(IN_MULTIBYTE_CHAR_CLASS|COUNT)) { 1446 test_condition = IN_CLASS; 1447 } else { 1448 test_condition = NOT_IN_CLASS; 1449 } 1450 regexp++; /* point to the <class_length> byte */ 1451 1452 get_match_counts(&nmust_match, &nextra_matches_allowed, 1453 regexp + (int)*regexp); 1454 string_char_size = get_wchar(&string_wchar, stringp); 1455 while ((string_char_size > 0) && 1456 (test_char_against_multibyte_class(string_wchar, regexp, 1457 test_condition) == CONDITION_TRUE) && 1458 (nmust_match > 0)) { 1459 nmust_match--; 1460 stringp += string_char_size; 1461 string_char_size = get_wchar(&string_wchar, stringp); 1462 } 1463 if (nmust_match > 0) { 1464 return ((char *)0); 1465 } else if (nextra_matches_allowed == UNLIMITED) { 1466 repeat_startp = stringp; 1467 while ((string_char_size > 0) && 1468 (test_char_against_multibyte_class(string_wchar, 1469 regexp, test_condition) == CONDITION_TRUE)) { 1470 stringp += string_char_size; 1471 string_char_size = get_wchar(&string_wchar, stringp); 1472 } 1473 regexp += (int)*regexp + 2; 1474 return (test_repeated_multibyte_char(repeat_startp, stringp, 1475 regexp)); 1476 } else { 1477 repeat_startp = stringp; 1478 while ((string_char_size > 0) && 1479 (test_char_against_multibyte_class(string_wchar, 1480 regexp, test_condition) == CONDITION_TRUE) && 1481 (nextra_matches_allowed > 0)) { 1482 nextra_matches_allowed--; 1483 stringp += string_char_size; 1484 string_char_size = get_wchar(&string_wchar, stringp); 1485 } 1486 regexp += (int)*regexp + 2; 1487 return (test_repeated_multibyte_char(repeat_startp, stringp, 1488 regexp)); 1489 } 1490 /* end case IN_MULTIBYTE_CHAR_CLASS|COUNT */ 1491 1492 case IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */ 1493 case NOT_IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE: 1494 1495 /* 1496 * encoded as <IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE>\ 1497 * <class_length><class ...> 1498 * or <NOT_IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE>\ 1499 * <class_length><class ...> 1500 * 1501 * NOTE: <class_length> includes the <class_length> byte 1502 */ 1503 1504 if ((int)*regexp == 1505 (int)(IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE)) { 1506 test_condition = IN_CLASS; 1507 } else { 1508 test_condition = NOT_IN_CLASS; 1509 } 1510 regexp++; /* point to the <class_length> byte */ 1511 1512 repeat_startp = stringp; 1513 while ((*stringp != '\0') && 1514 (test_char_against_old_ascii_class(*stringp, regexp, 1515 test_condition) == CONDITION_TRUE)) { 1516 stringp++; 1517 } 1518 regexp += (int)*regexp; /* add the class length to regexp */ 1519 return (test_repeated_ascii_char(repeat_startp, stringp, 1520 regexp)); 1521 1522 /* end case IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE */ 1523 1524 case IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */ 1525 case NOT_IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE: 1526 1527 /* 1528 * encoded as <IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE>\ 1529 * <class_length><class ...> 1530 * or <NOT_IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE>\ 1531 * <class_length><class ...> 1532 * 1533 * NOTE: <class length> includes the <class_length> byte 1534 */ 1535 1536 if ((int)*regexp == 1537 (int)(IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE)) { 1538 test_condition = IN_CLASS; 1539 } else { 1540 test_condition = NOT_IN_CLASS; 1541 } 1542 regexp++; /* point to the <class_length> byte */ 1543 1544 if ((*stringp == '\0') || 1545 (test_char_against_old_ascii_class(*stringp, regexp, 1546 test_condition) != CONDITION_TRUE)) { 1547 return ((char *)0); 1548 } else { 1549 stringp++; 1550 repeat_startp = stringp; 1551 while ((*stringp != '\0') && 1552 (test_char_against_old_ascii_class(*stringp, regexp, 1553 test_condition) == CONDITION_TRUE)) { 1554 stringp++; 1555 } 1556 regexp += (int)*regexp; /* add the class length to regexp */ 1557 return (test_repeated_ascii_char(repeat_startp, stringp, 1558 regexp)); 1559 } 1560 /* end case IN_OLD_ASCII_CHAR_CLASS | ONE_OR_MORE */ 1561 1562 case IN_OLD_ASCII_CHAR_CLASS|COUNT: /* [...]{min_count,max_count} */ 1563 case NOT_IN_OLD_ASCII_CHAR_CLASS|COUNT: 1564 1565 /* 1566 * encoded as <IN_OLD_ASCII_CHAR_CLASS|COUNT><class_length>\ 1567 * <class ...><minimum_match_count>\ 1568 * <maximum_match_count> 1569 * or <NOT_IN_OLD_ASCII_CHAR_CLASS|COUNT>\ 1570 * <class_length><class ...><minimum_match_count>\ 1571 * <maximum_match_count> 1572 * 1573 * NOTE: <class_length> includes the <class_length> byte 1574 * but not the <minimum_match_count> or 1575 * <maximum_match_count> bytes 1576 */ 1577 1578 if ((int)*regexp == (int)(IN_OLD_ASCII_CHAR_CLASS|COUNT)) { 1579 test_condition = IN_CLASS; 1580 } else { 1581 test_condition = NOT_IN_CLASS; 1582 } 1583 regexp++; /* point to the <class_length> byte */ 1584 1585 get_match_counts(&nmust_match, &nextra_matches_allowed, 1586 regexp + (int)*regexp); 1587 while ((*stringp != '\0') && 1588 (test_char_against_old_ascii_class(*stringp, regexp, 1589 test_condition) == CONDITION_TRUE) && 1590 (nmust_match > 0)) { 1591 nmust_match--; 1592 stringp++; 1593 } 1594 if (nmust_match > 0) { 1595 return ((char *)0); 1596 } else if (nextra_matches_allowed == UNLIMITED) { 1597 repeat_startp = stringp; 1598 while ((*stringp != '\0') && 1599 (test_char_against_old_ascii_class(*stringp, regexp, 1600 test_condition) == CONDITION_TRUE)) { 1601 stringp++; 1602 } 1603 regexp += (int)*regexp + 2; 1604 return (test_repeated_ascii_char(repeat_startp, stringp, 1605 regexp)); 1606 } else { 1607 repeat_startp = stringp; 1608 while ((*stringp != '\0') && 1609 (test_char_against_old_ascii_class(*stringp, regexp, 1610 test_condition) == CONDITION_TRUE) && 1611 (nextra_matches_allowed > 0)) { 1612 nextra_matches_allowed--; 1613 stringp++; 1614 } 1615 regexp += (int)*regexp + 2; 1616 return (test_repeated_ascii_char(repeat_startp, stringp, 1617 regexp)); 1618 } 1619 /* end case IN_OLD_ASCII_CHAR_CLASS|COUNT */ 1620 1621 case ZERO_OR_MORE_GROUP: /* (.....)* */ 1622 case ZERO_OR_MORE_GROUP|ADD_256_TO_GROUP_LENGTH: 1623 case ZERO_OR_MORE_GROUP|ADD_512_TO_GROUP_LENGTH: 1624 case ZERO_OR_MORE_GROUP|ADD_768_TO_GROUP_LENGTH: 1625 1626 /* 1627 * encoded as <ZERO_OR_MORE_GROUP|ADDED_LENGTH_BITS>\ 1628 * <group_length><compiled_regex...>\ 1629 * <END_GROUP|ZERO_OR_MORE><groupn> 1630 * 1631 * NOTE: 1632 * 1633 * group_length + (256 * ADDED_LENGTH_BITS) == 1634 * length_of(<compiled_regex...><END_GROUP|ZERO_OR_MORE>\ 1635 * <groupn>) 1636 * 1637 */ 1638 1639 group_length = 1640 (((unsigned int)*regexp & ADDED_LENGTH_BITS) << 1641 TIMES_256_SHIFT); 1642 regexp++; 1643 group_length += (unsigned int)*regexp; 1644 regexp++; 1645 repeat_startp = stringp; 1646 test_stringp = test_string(stringp, regexp); 1647 while (test_stringp != (char *)0) { 1648 if (push_stringp(stringp) == (char *)0) 1649 return ((char *)0); 1650 stringp = test_stringp; 1651 test_stringp = test_string(stringp, regexp); 1652 } 1653 regexp += group_length; 1654 return (test_repeated_group(repeat_startp, stringp, regexp)); 1655 1656 /* end case ZERO_OR_MORE_GROUP */ 1657 1658 case END_GROUP|ZERO_OR_MORE: /* (.....)* */ 1659 1660 /* encoded as <END_GROUP|ZERO_OR_MORE> */ 1661 1662 /* return from recursive call to test_string() */ 1663 1664 return ((char *)stringp); 1665 1666 /* end case END_GROUP|ZERO_OR_MORE */ 1667 1668 case ONE_OR_MORE_GROUP: /* (.....)+ */ 1669 case ONE_OR_MORE_GROUP|ADD_256_TO_GROUP_LENGTH: 1670 case ONE_OR_MORE_GROUP|ADD_512_TO_GROUP_LENGTH: 1671 case ONE_OR_MORE_GROUP|ADD_768_TO_GROUP_LENGTH: 1672 1673 /* 1674 * encoded as <ONE_OR_MORE_GROUP|ADDED_LENGTH_BITS>\ 1675 * <group_length><compiled_regex...>\ 1676 * <END_GROUP|ONE_OR_MORE><groupn> 1677 * 1678 * NOTE: 1679 * 1680 * group_length + (256 * ADDED_LENGTH_BITS) == 1681 * length_of(<compiled_regex...><END_GROUP|ONE_OR_MORE>\ 1682 * <groupn>) 1683 */ 1684 1685 group_length = 1686 (((unsigned int)*regexp & ADDED_LENGTH_BITS) << 1687 TIMES_256_SHIFT); 1688 regexp++; 1689 group_length += (unsigned int)*regexp; 1690 regexp++; 1691 stringp = test_string(stringp, regexp); 1692 if (stringp == (char *)0) 1693 return ((char *)0); 1694 repeat_startp = stringp; 1695 test_stringp = test_string(stringp, regexp); 1696 while (test_stringp != (char *)0) { 1697 if (push_stringp(stringp) == (char *)0) 1698 return ((char *)0); 1699 stringp = test_stringp; 1700 test_stringp = test_string(stringp, regexp); 1701 } 1702 regexp += group_length; 1703 return (test_repeated_group(repeat_startp, stringp, regexp)); 1704 1705 /* end case ONE_OR_MORE_GROUP */ 1706 1707 case END_GROUP|ONE_OR_MORE: /* (.....)+ */ 1708 1709 /* encoded as <END_GROUP|ONE_OR_MORE><groupn> */ 1710 1711 /* return from recursive call to test_string() */ 1712 1713 return ((char *)stringp); 1714 1715 /* end case END_GROUP|ONE_OR_MORE */ 1716 1717 case COUNTED_GROUP: /* (.....){max_count,min_count} */ 1718 case COUNTED_GROUP|ADD_256_TO_GROUP_LENGTH: 1719 case COUNTED_GROUP|ADD_512_TO_GROUP_LENGTH: 1720 case COUNTED_GROUP|ADD_768_TO_GROUP_LENGTH: 1721 1722 /* 1723 * encoded as <COUNTED_GROUP|ADDED_LENGTH_BITS><group_length>\ 1724 * <compiled_regex...>\<END_GROUP|COUNT><groupn>\ 1725 * <minimum_match_count><maximum_match_count> 1726 * 1727 * NOTE: 1728 * 1729 * group_length + (256 * ADDED_LENGTH_BITS) == 1730 * length_of(<compiled_regex...><END_GROUP|COUNT><groupn>) 1731 * 1732 * but does not include the <minimum_match_count> or 1733 * <maximum_match_count> bytes 1734 */ 1735 1736 group_length = 1737 (((unsigned int)*regexp & ADDED_LENGTH_BITS) << 1738 TIMES_256_SHIFT); 1739 regexp++; 1740 group_length += (unsigned int)*regexp; 1741 regexp++; 1742 get_match_counts(&nmust_match, &nextra_matches_allowed, 1743 regexp + group_length); 1744 test_stringp = test_string(stringp, regexp); 1745 while ((test_stringp != (char *)0) && (nmust_match > 0)) { 1746 stringp = test_stringp; 1747 nmust_match--; 1748 test_stringp = test_string(stringp, regexp); 1749 } 1750 if (nmust_match > 0) { 1751 return ((char *)0); 1752 } else if (nextra_matches_allowed == UNLIMITED) { 1753 repeat_startp = stringp; 1754 while (test_stringp != (char *)0) { 1755 if (push_stringp(stringp) == (char *)0) 1756 return ((char *)0); 1757 stringp = test_stringp; 1758 test_stringp = test_string(stringp, regexp); 1759 } 1760 regexp += group_length + 2; 1761 return (test_repeated_group(repeat_startp, stringp, 1762 regexp)); 1763 } else { 1764 repeat_startp = stringp; 1765 while ((test_stringp != (char *)0) && 1766 (nextra_matches_allowed > 0)) { 1767 nextra_matches_allowed--; 1768 if (push_stringp(stringp) == (char *)0) 1769 return ((char *)0); 1770 stringp = test_stringp; 1771 test_stringp = test_string(stringp, regexp); 1772 } 1773 regexp += group_length + 2; 1774 return (test_repeated_group(repeat_startp, stringp, 1775 regexp)); 1776 } 1777 /* end case COUNTED_GROUP */ 1778 1779 case END_GROUP|COUNT: /* (.....){max_count,min_count} */ 1780 1781 /* encoded as <END_GROUP|COUNT> */ 1782 1783 /* return from recursive call to test_string() */ 1784 1785 return (stringp); 1786 1787 /* end case END_GROUP|COUNT */ 1788 1789 case END_OF_STRING_MARK: 1790 1791 /* encoded as <END_OF_STRING_MARK><END_REGEX> */ 1792 1793 if (*stringp == '\0') { 1794 regexp++; 1795 } else { 1796 return ((char *)0); 1797 } 1798 break; /* end case END_OF_STRING_MARK */ 1799 1800 case END_REGEX: /* end of the compiled regular expression */ 1801 1802 /* encoded as <END_REGEX> */ 1803 1804 return (stringp); 1805 1806 /* end case END_REGEX */ 1807 1808 default: 1809 1810 return ((char *)0); 1811 1812 } /* end switch (*regexp) */ 1813 1814 } /* end for (;;) */ 1815 1816 } /* test_string() */ 1817