1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 23 /* 24 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 25 * Use is subject to license terms. 26 */ 27 28 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 29 /* All Rights Reserved */ 30 31 #pragma ident "%Z%%M% %I% %E% SMI" 32 33 /* 34 * IMPORTANT NOTE: 35 * 36 * regex() WORKS **ONLY** WITH THE ASCII AND THE Solaris EUC CHARACTER SETS. 37 * IT IS **NOT** CHARACTER SET INDEPENDENT. 38 * 39 */ 40 41 #pragma weak regex = _regex 42 43 /* CONSTANTS SHARED WITH regcmp() */ 44 #include "regex.h" 45 46 #include "synonyms.h" 47 #include "mtlib.h" 48 #include <limits.h> 49 #include <stdarg.h> 50 #include <stdlib.h> 51 #include <thread.h> 52 #include <widec.h> 53 #include "tsd.h" 54 55 56 /* PRIVATE CONSTANTS */ 57 58 #define ADD_256_TO_GROUP_LENGTH 0x1 59 #define ADD_512_TO_GROUP_LENGTH 0x2 60 #define ADD_768_TO_GROUP_LENGTH 0x3 61 #define ADDED_LENGTH_BITS 0x3 62 #define SINGLE_BYTE_MASK 0xff 63 #define STRINGP_STACK_SIZE 50 64 65 66 /* PRIVATE TYPE DEFINITIONS */ 67 68 typedef enum { 69 NOT_IN_CLASS = 0, 70 IN_CLASS 71 } char_test_condition_t; 72 73 typedef enum { 74 TESTING_CHAR = 0, 75 CONDITION_TRUE, 76 CONDITION_FALSE, 77 CHAR_TEST_ERROR 78 } char_test_result_t; 79 80 81 /* PRIVATE GLOBAL VARIABLES */ 82 83 static mutex_t regex_lock = DEFAULTMUTEX; 84 static int return_arg_number[NSUBSTRINGS]; 85 static const char *substring_endp[NSUBSTRINGS]; 86 static const char *substring_startp[NSUBSTRINGS]; 87 static const char *stringp_stack[STRINGP_STACK_SIZE]; 88 static const char **stringp_stackp; 89 90 91 /* DECLARATIONS OF PRIVATE FUNCTIONS */ 92 93 static int 94 get_wchar(wchar_t *wcharp, 95 const char *stringp); 96 97 static void 98 get_match_counts(int *nmust_matchp, 99 int *nextra_matches_allowedp, 100 const char *count_stringp); 101 102 static boolean_t 103 in_wchar_range(wchar_t test_char, 104 wchar_t lower_char, 105 wchar_t upper_char); 106 107 static const char * 108 pop_stringp(void); 109 110 static const char * 111 previous_charp(const char *current_charp); 112 113 static const char * 114 push_stringp(const char *stringp); 115 116 static char_test_result_t 117 test_char_against_ascii_class(char test_char, 118 const char *classp, 119 char_test_condition_t test_condition); 120 121 static char_test_result_t 122 test_char_against_multibyte_class(wchar_t test_char, 123 const char *classp, 124 char_test_condition_t test_condition); 125 126 127 /* FOR COMPATIBILITY WITH PREVIOUS ASCII VERSIONS OF regcmp() */ 128 129 static char_test_result_t 130 test_char_against_old_ascii_class(char test_char, 131 const char *classp, 132 char_test_condition_t test_condition); 133 134 static const char * 135 test_repeated_ascii_char(const char *repeat_startp, 136 const char *stringp, 137 const char *regexp); 138 139 static const char * 140 test_repeated_multibyte_char(const char *repeat_startp, 141 const char *stringp, 142 const char *regexp); 143 144 static const char * 145 test_repeated_group(const char *repeat_startp, 146 const char *stringp, 147 const char *regexp); 148 149 static const char * 150 test_string(const char *stringp, 151 const char *regexp); 152 153 154 /* DEFINITIONS OF PUBLIC VARIABLES */ 155 156 char *__loc1; 157 158 /* 159 * reserve thread-specific storage for __loc1 160 */ 161 char ** 162 ____loc1(void) 163 { 164 if (_thr_main()) 165 return (&__loc1); 166 return ((char **)tsdalloc(_T_REGEX_LOC1, sizeof (char *), NULL)); 167 } 168 169 #define __loc1 (*(____loc1())) 170 171 /* DEFINITION OF regex() */ 172 173 extern char * 174 _regex(const char *regexp, 175 const char *stringp, ...) 176 { 177 va_list arg_listp; 178 int char_size; 179 const char *end_of_matchp; 180 wchar_t regex_wchar; 181 char *return_argp[NSUBSTRINGS]; 182 char *returned_substringp; 183 int substringn; 184 const char *substringp; 185 wchar_t string_wchar; 186 187 if (____loc1() == (char **)0) { 188 return ((char *)0); 189 } else { 190 lmutex_lock(®ex_lock); 191 __loc1 = (char *)0; 192 } 193 194 if ((stringp == (char *)0) || (regexp == (char *)0)) { 195 lmutex_unlock(®ex_lock); 196 return ((char *)0); 197 } 198 199 200 /* INITIALIZE SUBSTRINGS THAT MIGHT BE RETURNED IN VARARGS */ 201 202 substringn = 0; 203 va_start(arg_listp, stringp); 204 while (substringn < NSUBSTRINGS) { 205 return_argp[substringn] = va_arg(arg_listp, char *); 206 substring_startp[substringn] = (char *)0; 207 return_arg_number[substringn] = -1; 208 substringn++; 209 } 210 va_end(arg_listp); 211 212 213 /* TEST THE STRING AGAINST THE REGULAR EXPRESSION */ 214 215 end_of_matchp = (char *)0; 216 stringp_stackp = &stringp_stack[STRINGP_STACK_SIZE]; 217 218 if ((int)*regexp == (int)START_OF_STRING_MARK) { 219 220 /* 221 * the match must start at the beginning of the string 222 */ 223 224 __loc1 = (char *)stringp; 225 regexp++; 226 end_of_matchp = test_string(stringp, regexp); 227 228 } else if ((int)*regexp == (int)ASCII_CHAR) { 229 230 /* 231 * test a string against a regular expression 232 * that starts with a single ASCII character: 233 * 234 * move to each character in the string that matches 235 * the first character in the regular expression 236 * and test the remaining string 237 */ 238 239 while ((*stringp != *(regexp + 1)) && (*stringp != '\0')) { 240 stringp++; 241 } 242 while ((end_of_matchp == (char *)0) && (*stringp != '\0')) { 243 end_of_matchp = test_string(stringp, regexp); 244 if (end_of_matchp != (char *)0) { 245 __loc1 = (char *)stringp; 246 } else { 247 stringp++; 248 while ((*stringp != *(regexp + 1)) && (*stringp != '\0')) { 249 stringp++; 250 } 251 } 252 } 253 254 } else if (!multibyte) { 255 256 /* 257 * if the value of the "multibyte" macro defined in <euc.h> 258 * is false, regex() is running in an ASCII locale; 259 * test an ASCII string against an ASCII regular expression 260 * that doesn't start with a single ASCII character: 261 * 262 * move forward in the string one byte at a time, testing 263 * the remaining string against the regular expression 264 */ 265 266 end_of_matchp = test_string(stringp, regexp); 267 while ((end_of_matchp == (char *)0) && (*stringp != '\0')) { 268 stringp++; 269 end_of_matchp = test_string(stringp, regexp); 270 } 271 if (end_of_matchp != (char *)0) { 272 __loc1 = (char *)stringp; 273 } 274 275 } else if ((int)*regexp == (int)MULTIBYTE_CHAR) { 276 277 /* 278 * test a multibyte string against a multibyte regular expression 279 * that starts with a single multibyte character: 280 * 281 * move to each character in the string that matches 282 * the first character in the regular expression 283 * and test the remaining string 284 */ 285 286 (void) get_wchar(®ex_wchar, regexp + 1); 287 char_size = get_wchar(&string_wchar, stringp); 288 while ((string_wchar != regex_wchar) && (char_size > 0)) { 289 stringp += char_size; 290 char_size = get_wchar(&string_wchar, stringp); 291 } 292 while ((end_of_matchp == (char *)0) && (char_size > 0)) { 293 end_of_matchp = test_string(stringp, regexp); 294 if (end_of_matchp != (char *)0) { 295 __loc1 = (char *)stringp; 296 } else { 297 stringp += char_size; 298 char_size = get_wchar(&string_wchar, stringp); 299 while ((string_wchar != regex_wchar) && (char_size > 0)) { 300 stringp += char_size; 301 char_size = get_wchar(&string_wchar, stringp); 302 } 303 } 304 } 305 306 } else { 307 308 /* 309 * test a multibyte string against a multibyte regular expression 310 * that doesn't start with a single multibyte character 311 * 312 * move forward in the string one multibyte character at a time, 313 * testing the remaining string against the regular expression 314 */ 315 316 end_of_matchp = test_string(stringp, regexp); 317 char_size = get_wchar(&string_wchar, stringp); 318 while ((end_of_matchp == (char *)0) && (char_size > 0)) { 319 stringp += char_size; 320 end_of_matchp = test_string(stringp, regexp); 321 char_size = get_wchar(&string_wchar, stringp); 322 } 323 if (end_of_matchp != (char *)0) { 324 __loc1 = (char *)stringp; 325 } 326 } 327 328 /* 329 * Return substrings that matched subexpressions for which 330 * matching substrings are to be returned. 331 * 332 * NOTE: 333 * 334 * According to manual page regcmp(3G), regex() returns substrings 335 * that match subexpressions even when no substring matches the 336 * entire regular expression. 337 */ 338 339 substringn = 0; 340 while (substringn < NSUBSTRINGS) { 341 substringp = substring_startp[substringn]; 342 if ((substringp != (char *)0) && 343 (return_arg_number[substringn] >= 0)) { 344 returned_substringp = 345 return_argp[return_arg_number[substringn]]; 346 if (returned_substringp != (char *)0) { 347 while (substringp < substring_endp[substringn]) { 348 *returned_substringp = (char)*substringp; 349 returned_substringp++; 350 substringp++; 351 } 352 *returned_substringp = '\0'; 353 } 354 } 355 substringn++; 356 } 357 lmutex_unlock(®ex_lock); 358 return ((char *)end_of_matchp); 359 } /* regex() */ 360 361 362 /* DEFINITIONS OF PRIVATE FUNCTIONS */ 363 364 static int 365 get_wchar(wchar_t *wcharp, 366 const char *stringp) 367 { 368 int char_size; 369 370 if (stringp == (char *)0) { 371 char_size = 0; 372 *wcharp = (wchar_t)((unsigned int)'\0'); 373 } else if (*stringp == '\0') { 374 char_size = 0; 375 *wcharp = (wchar_t)((unsigned int)*stringp); 376 } else if ((unsigned char)*stringp <= (unsigned char)0x7f) { 377 char_size = 1; 378 *wcharp = (wchar_t)((unsigned int)*stringp); 379 } else { 380 char_size = mbtowc(wcharp, stringp, MB_LEN_MAX); 381 } 382 return (char_size); 383 } 384 385 static void 386 get_match_counts(int *nmust_matchp, 387 int *nextra_matches_allowedp, 388 const char *count_stringp) 389 { 390 int minimum_match_count; 391 int maximum_match_count; 392 393 minimum_match_count = 394 (int)((unsigned int)*count_stringp & SINGLE_BYTE_MASK); 395 *nmust_matchp = minimum_match_count; 396 397 count_stringp++; 398 maximum_match_count = 399 (int)((unsigned int)*count_stringp & SINGLE_BYTE_MASK); 400 if (maximum_match_count == (int)UNLIMITED) { 401 *nextra_matches_allowedp = (int)UNLIMITED; 402 } else { 403 *nextra_matches_allowedp = 404 maximum_match_count - minimum_match_count; 405 } 406 return; 407 408 } /* get_match_counts() */ 409 410 static boolean_t 411 in_wchar_range(wchar_t test_char, 412 wchar_t lower_char, 413 wchar_t upper_char) 414 { 415 return (((lower_char <= 0x7f) && (upper_char <= 0x7f) && 416 (lower_char <= test_char) && (test_char <= upper_char)) || 417 (((test_char & WCHAR_CSMASK) == (lower_char & WCHAR_CSMASK)) && 418 ((test_char & WCHAR_CSMASK) == (upper_char & WCHAR_CSMASK)) && 419 (lower_char <= test_char) && (test_char <= upper_char))); 420 421 } /* in_wchar_range() */ 422 423 static const char * 424 pop_stringp(void) 425 { 426 const char *stringp; 427 428 if (stringp_stackp >= &stringp_stack[STRINGP_STACK_SIZE]) { 429 return ((char *)0); 430 } else { 431 stringp = *stringp_stackp; 432 stringp_stackp++; 433 return (stringp); 434 } 435 } 436 437 438 static const char * 439 previous_charp(const char *current_charp) 440 { 441 /* 442 * returns the pointer to the previous character in 443 * a string of multibyte characters 444 */ 445 446 const char *prev_cs0 = current_charp - 1; 447 const char *prev_cs1 = current_charp - eucw1; 448 const char *prev_cs2 = current_charp - eucw2 - 1; 449 const char *prev_cs3 = current_charp - eucw3 - 1; 450 const char *prev_charp; 451 452 if ((unsigned char)*prev_cs0 <= 0x7f) { 453 prev_charp = prev_cs0; 454 } else if ((unsigned char)*prev_cs2 == SS2) { 455 prev_charp = prev_cs2; 456 } else if ((unsigned char)*prev_cs3 == SS3) { 457 prev_charp = prev_cs3; 458 } else { 459 prev_charp = prev_cs1; 460 } 461 return (prev_charp); 462 463 } /* previous_charp() */ 464 465 static const char * 466 push_stringp(const char *stringp) 467 { 468 if (stringp_stackp <= &stringp_stack[0]) { 469 return ((char *)0); 470 } else { 471 stringp_stackp--; 472 *stringp_stackp = stringp; 473 return (stringp); 474 } 475 } 476 477 478 static char_test_result_t 479 test_char_against_ascii_class(char test_char, 480 const char *classp, 481 char_test_condition_t test_condition) 482 { 483 /* 484 * tests a character for membership in an ASCII character class compiled 485 * by the internationalized version of regcmp(); 486 * 487 * NOTE: The internationalized version of regcmp() compiles 488 * the range a-z in an ASCII character class to aTHRUz. 489 */ 490 491 int nbytes_to_check; 492 493 nbytes_to_check = (int)*classp; 494 classp++; 495 nbytes_to_check--; 496 497 while (nbytes_to_check > 0) { 498 if (test_char == *classp) { 499 if (test_condition == IN_CLASS) 500 return (CONDITION_TRUE); 501 else 502 return (CONDITION_FALSE); 503 } else if (*classp == THRU) { 504 if ((*(classp - 1) <= test_char) && 505 (test_char <= *(classp + 1))) { 506 if (test_condition == IN_CLASS) 507 return (CONDITION_TRUE); 508 else 509 return (CONDITION_FALSE); 510 } else { 511 classp += 2; 512 nbytes_to_check -= 2; 513 } 514 } else { 515 classp++; 516 nbytes_to_check--; 517 } 518 } 519 if (test_condition == NOT_IN_CLASS) { 520 return (CONDITION_TRUE); 521 } else { 522 return (CONDITION_FALSE); 523 } 524 } /* test_char_against_ascii_class() */ 525 526 static char_test_result_t 527 test_char_against_multibyte_class(wchar_t test_char, 528 const char *classp, 529 char_test_condition_t test_condition) 530 { 531 /* 532 * tests a character for membership in a multibyte character class; 533 * 534 * NOTE: The range a-z in a multibyte character class compiles to 535 * aTHRUz. 536 */ 537 538 int char_size; 539 wchar_t current_char; 540 int nbytes_to_check; 541 wchar_t previous_char; 542 543 nbytes_to_check = (int)*classp; 544 classp++; 545 nbytes_to_check--; 546 547 char_size = get_wchar(¤t_char, classp); 548 if (char_size <= 0) { 549 return (CHAR_TEST_ERROR); 550 } else if (test_char == current_char) { 551 if (test_condition == IN_CLASS) { 552 return (CONDITION_TRUE); 553 } else { 554 return (CONDITION_FALSE); 555 } 556 } else { 557 classp += char_size; 558 nbytes_to_check -= char_size; 559 } 560 561 while (nbytes_to_check > 0) { 562 previous_char = current_char; 563 char_size = get_wchar(¤t_char, classp); 564 if (char_size <= 0) { 565 return (CHAR_TEST_ERROR); 566 } else if (test_char == current_char) { 567 if (test_condition == IN_CLASS) { 568 return (CONDITION_TRUE); 569 } else { 570 return (CONDITION_FALSE); 571 } 572 } else if (current_char == THRU) { 573 classp += char_size; 574 nbytes_to_check -= char_size; 575 char_size = get_wchar(¤t_char, classp); 576 if (char_size <= 0) { 577 return (CHAR_TEST_ERROR); 578 } else if (in_wchar_range(test_char, previous_char, 579 current_char)) { 580 if (test_condition == IN_CLASS) { 581 return (CONDITION_TRUE); 582 } else { 583 return (CONDITION_FALSE); 584 } 585 } else { 586 classp += char_size; 587 nbytes_to_check -= char_size; 588 } 589 } else { 590 classp += char_size; 591 nbytes_to_check -= char_size; 592 } 593 } 594 if (test_condition == NOT_IN_CLASS) { 595 return (CONDITION_TRUE); 596 } else { 597 return (CONDITION_FALSE); 598 } 599 } /* test_char_against_multibyte_class() */ 600 601 602 /* FOR COMPATIBILITY WITH PREVIOUS ASCII VERSIONS OF regcmp() */ 603 604 static char_test_result_t 605 test_char_against_old_ascii_class(char test_char, 606 const char *classp, 607 char_test_condition_t test_condition) 608 { 609 /* 610 * tests a character for membership in an ASCII character class compiled 611 * by the ASCII version of regcmp(); 612 * 613 * NOTE: ASCII versions of regcmp() compile the range a-z in an 614 * ASCII character class to THRUaz. The internationalized 615 * version compiles the same range to aTHRUz. 616 */ 617 618 int nbytes_to_check; 619 620 nbytes_to_check = (int)*classp; 621 classp++; 622 nbytes_to_check--; 623 624 while (nbytes_to_check > 0) { 625 if (test_char == *classp) { 626 if (test_condition == IN_CLASS) { 627 return (CONDITION_TRUE); 628 } else { 629 return (CONDITION_FALSE); 630 } 631 } else if (*classp == THRU) { 632 if ((*(classp + 1) <= test_char) && 633 (test_char <= *(classp + 2))) { 634 if (test_condition == IN_CLASS) { 635 return (CONDITION_TRUE); 636 } else { 637 return (CONDITION_FALSE); 638 } 639 } else { 640 classp += 3; 641 nbytes_to_check -= 3; 642 } 643 } else { 644 classp++; 645 nbytes_to_check--; 646 } 647 } 648 if (test_condition == NOT_IN_CLASS) { 649 return (CONDITION_TRUE); 650 } else { 651 return (CONDITION_FALSE); 652 } 653 } /* test_char_against_old_ascii_class() */ 654 655 static const char * 656 test_repeated_ascii_char(const char *repeat_startp, 657 const char *stringp, 658 const char *regexp) 659 { 660 const char *end_of_matchp; 661 662 end_of_matchp = test_string(stringp, regexp); 663 while ((end_of_matchp == (char *)0) && 664 (stringp > repeat_startp)) { 665 stringp--; 666 end_of_matchp = test_string(stringp, regexp); 667 } 668 return (end_of_matchp); 669 } 670 671 static const char * 672 test_repeated_multibyte_char(const char *repeat_startp, 673 const char *stringp, 674 const char *regexp) 675 { 676 const char *end_of_matchp; 677 678 end_of_matchp = test_string(stringp, regexp); 679 while ((end_of_matchp == (char *)0) && 680 (stringp > repeat_startp)) { 681 stringp = previous_charp(stringp); 682 end_of_matchp = test_string(stringp, regexp); 683 } 684 return (end_of_matchp); 685 } 686 687 static const char * 688 test_repeated_group(const char *repeat_startp, 689 const char *stringp, 690 const char *regexp) 691 { 692 const char *end_of_matchp; 693 694 end_of_matchp = test_string(stringp, regexp); 695 while ((end_of_matchp == (char *)0) && 696 (stringp > repeat_startp)) { 697 stringp = pop_stringp(); 698 if (stringp == (char *)0) { 699 return ((char *)0); 700 } 701 end_of_matchp = test_string(stringp, regexp); 702 } 703 return (end_of_matchp); 704 } 705 706 static const char * 707 test_string(const char *stringp, 708 const char *regexp) 709 { 710 /* 711 * returns a pointer to the first character following the first 712 * substring of the string addressed by stringp that matches 713 * the compiled regular expression addressed by regexp 714 */ 715 716 unsigned int group_length; 717 int nextra_matches_allowed; 718 int nmust_match; 719 wchar_t regex_wchar; 720 int regex_char_size; 721 const char *repeat_startp; 722 unsigned int return_argn; 723 wchar_t string_wchar; 724 int string_char_size; 725 unsigned int substringn; 726 char_test_condition_t test_condition; 727 const char *test_stringp; 728 729 for (;;) { 730 731 /* 732 * Exit the loop via a return whenever there's a match 733 * or it's clear that there can be no match. 734 */ 735 736 switch ((int)*regexp) { 737 738 /* 739 * No fall-through. 740 * Each case ends with either a return or with stringp 741 * addressing the next character to be tested and regexp 742 * addressing the next compiled regular expression 743 * 744 * NOTE: The comments for each case give the meaning 745 * of the compiled regular expression decoded by the case 746 * and the character string that the compiled regular 747 * expression uses to encode the case. Each single 748 * character encoded in the compiled regular expression 749 * is shown enclosed in angle brackets (<>). Each 750 * compiled regular expression begins with a marker 751 * character which is shown as a named constant 752 * (e.g. <ASCII_CHAR>). Character constants are shown 753 * enclosed in single quotes (e.g. <'$'>). All other 754 * single characters encoded in the compiled regular 755 * expression are shown as lower case variable names 756 * (e.g. <ascii_char> or <multibyte_char>). Multicharacter 757 * strings encoded in the compiled regular expression 758 * are shown as variable names followed by elipses 759 * (e.g. <compiled_regex...>). 760 */ 761 762 case ASCII_CHAR: /* single ASCII char */ 763 764 /* encoded as <ASCII_CHAR><ascii_char> */ 765 766 regexp++; 767 if (*regexp == *stringp) { 768 regexp++; 769 stringp++; 770 } else { 771 return ((char *)0); 772 } 773 break; /* end case ASCII_CHAR */ 774 775 case MULTIBYTE_CHAR: /* single multibyte char */ 776 777 /* encoded as <MULTIBYTE_CHAR><multibyte_char> */ 778 779 regexp++; 780 regex_char_size = get_wchar(®ex_wchar, regexp); 781 string_char_size = get_wchar(&string_wchar, stringp); 782 if ((string_char_size <= 0) || (string_wchar != regex_wchar)) { 783 return ((char *)0); 784 } else { 785 regexp += regex_char_size; 786 stringp += string_char_size; 787 } 788 break; /* end case MULTIBYTE_CHAR */ 789 790 case ANY_CHAR: /* any single ASCII or multibyte char */ 791 792 /* encoded as <ANY_CHAR> */ 793 794 if (!multibyte) { 795 if (*stringp == '\0') { 796 return ((char *)0); 797 } else { 798 regexp++; 799 stringp++; 800 } 801 } else { 802 string_char_size = get_wchar(&string_wchar, stringp); 803 if (string_char_size <= 0) { 804 return ((char *)0); 805 } else { 806 regexp++; 807 stringp += string_char_size; 808 } 809 } 810 break; /* end case ANY_CHAR */ 811 812 case IN_ASCII_CHAR_CLASS: /* [.....] */ 813 case NOT_IN_ASCII_CHAR_CLASS: 814 815 /* 816 * encoded as <IN_ASCII_CHAR_CLASS><class_length><class...> 817 * or <NOT_IN_ASCII_CHAR_CLASS><class_length><class...> 818 * 819 * NOTE: <class_length> includes the <class_length> byte 820 */ 821 822 if ((int)*regexp == (int)IN_ASCII_CHAR_CLASS) { 823 test_condition = IN_CLASS; 824 } else { 825 test_condition = NOT_IN_CLASS; 826 } 827 regexp++; /* point to the <class_length> byte */ 828 829 if ((*stringp != '\0') && 830 (test_char_against_ascii_class(*stringp, regexp, 831 test_condition) == CONDITION_TRUE)) { 832 regexp += (int)*regexp; /* add the class length to regexp */ 833 stringp++; 834 } else { 835 return ((char *)0); 836 } 837 break; /* end case IN_ASCII_CHAR_CLASS */ 838 839 case IN_MULTIBYTE_CHAR_CLASS: /* [....] */ 840 case NOT_IN_MULTIBYTE_CHAR_CLASS: 841 842 /* 843 * encoded as <IN_MULTIBYTE_CHAR_CLASS><class_length><class...> 844 * or <NOT_IN_MULTIBYTE_CHAR_CLASS><class_length><class...> 845 * 846 * NOTE: <class_length> includes the <class_length> byte 847 */ 848 849 if ((int)*regexp == (int)IN_MULTIBYTE_CHAR_CLASS) { 850 test_condition = IN_CLASS; 851 } else { 852 test_condition = NOT_IN_CLASS; 853 } 854 regexp++; /* point to the <class_length> byte */ 855 856 string_char_size = get_wchar(&string_wchar, stringp); 857 if ((string_char_size > 0) && 858 (test_char_against_multibyte_class(string_wchar, regexp, 859 test_condition) == CONDITION_TRUE)) { 860 regexp += (int)*regexp; /* add the class length to regexp */ 861 stringp += string_char_size; 862 } else { 863 return ((char *)0); 864 } 865 break; /* end case IN_MULTIBYTE_CHAR_CLASS */ 866 867 case IN_OLD_ASCII_CHAR_CLASS: /* [...] */ 868 case NOT_IN_OLD_ASCII_CHAR_CLASS: 869 870 /* 871 * encoded as <IN_OLD_ASCII_CHAR_CLASS><class_length><class...> 872 * or <NOT_IN_OLD_ASCII_CHAR_CLASS><class_length><class...> 873 * 874 * NOTE: <class_length> includes the <class_length> byte 875 */ 876 877 if ((int)*regexp == (int)IN_OLD_ASCII_CHAR_CLASS) { 878 test_condition = IN_CLASS; 879 } else { 880 test_condition = NOT_IN_CLASS; 881 } 882 regexp++; /* point to the <class_length> byte */ 883 884 if ((*stringp != '\0') && 885 (test_char_against_old_ascii_class(*stringp, regexp, 886 test_condition) == CONDITION_TRUE)) { 887 regexp += (int)*regexp; /* add the class length to regexp */ 888 stringp++; 889 } else { 890 return ((char *)0); 891 } 892 break; /* end case [NOT_]IN_OLD_ASCII_CHAR_CLASS */ 893 894 case SIMPLE_GROUP: /* (.....) */ 895 896 /* encoded as <SIMPLE_GROUP><group_length> */ 897 898 regexp += 2; 899 break; /* end case SIMPLE_GROUP */ 900 901 case END_GROUP: /* (.....) */ 902 903 /* encoded as <END_GROUP><groupn> */ 904 905 regexp += 2; 906 break; /* end case END_GROUP */ 907 908 case SAVED_GROUP: /* (.....)$0-9 */ 909 910 /* encoded as <SAVED_GROUP><substringn> */ 911 912 regexp++; 913 substringn = (unsigned int)*regexp; 914 if (substringn >= NSUBSTRINGS) 915 return ((char *)0); 916 substring_startp[substringn] = stringp; 917 regexp++; 918 break; /* end case SAVED_GROUP */ 919 920 case END_SAVED_GROUP: /* (.....)$0-9 */ 921 922 /* 923 * encoded as <END_SAVED_GROUP><substringn>\ 924 * <return_arg_number[substringn]> 925 */ 926 927 regexp++; 928 substringn = (unsigned int)*regexp; 929 if (substringn >= NSUBSTRINGS) 930 return ((char *)0); 931 substring_endp[substringn] = stringp; 932 regexp++; 933 return_argn = (unsigned int)*regexp; 934 if (return_argn >= NSUBSTRINGS) 935 return ((char *)0); 936 return_arg_number[substringn] = return_argn; 937 regexp++; 938 break; /* end case END_SAVED_GROUP */ 939 940 case ASCII_CHAR|ZERO_OR_MORE: /* char* */ 941 942 /* encoded as <ASCII_CHAR|ZERO_OR_MORE><ascii_char> */ 943 944 regexp++; 945 repeat_startp = stringp; 946 while (*stringp == *regexp) { 947 stringp++; 948 } 949 regexp++; 950 return (test_repeated_ascii_char(repeat_startp, 951 stringp, regexp)); 952 953 /* end case ASCII_CHAR|ZERO_OR_MORE */ 954 955 case ASCII_CHAR|ONE_OR_MORE: /* char+ */ 956 957 /* encoded as <ASCII_CHAR|ONE_OR_MORE><ascii_char> */ 958 959 regexp++; 960 if (*stringp != *regexp) { 961 return ((char *)0); 962 } else { 963 stringp++; 964 repeat_startp = stringp; 965 while (*stringp == *regexp) { 966 stringp++; 967 } 968 regexp++; 969 return (test_repeated_ascii_char(repeat_startp, stringp, 970 regexp)); 971 } 972 /* end case ASCII_CHAR|ONE_OR_MORE */ 973 974 case ASCII_CHAR|COUNT: /* char{min_count,max_count} */ 975 976 /* 977 * encoded as <ASCII_CHAR|COUNT><ascii_char>\ 978 * <minimum_match_count><maximum_match_count> 979 */ 980 981 regexp++; 982 get_match_counts(&nmust_match, &nextra_matches_allowed, 983 regexp + 1); 984 while ((*stringp == *regexp) && (nmust_match > 0)) { 985 nmust_match--; 986 stringp++; 987 } 988 if (nmust_match > 0) { 989 return ((char *)0); 990 } else if (nextra_matches_allowed == UNLIMITED) { 991 repeat_startp = stringp; 992 while (*stringp == *regexp) { 993 stringp++; 994 } 995 regexp += 3; 996 return (test_repeated_ascii_char(repeat_startp, stringp, 997 regexp)); 998 } else { 999 repeat_startp = stringp; 1000 while ((*stringp == *regexp) && 1001 (nextra_matches_allowed > 0)) { 1002 nextra_matches_allowed--; 1003 stringp++; 1004 } 1005 regexp += 3; 1006 return (test_repeated_ascii_char(repeat_startp, stringp, 1007 regexp)); 1008 } 1009 /* end case ASCII_CHAR|COUNT */ 1010 1011 case MULTIBYTE_CHAR|ZERO_OR_MORE: /* char* */ 1012 1013 /* encoded as <MULTIBYTE_CHAR|ZERO_OR_MORE><multibyte_char> */ 1014 1015 regexp++; 1016 regex_char_size = get_wchar(®ex_wchar, regexp); 1017 repeat_startp = stringp; 1018 string_char_size = get_wchar(&string_wchar, stringp); 1019 while ((string_char_size > 0) && 1020 (string_wchar == regex_wchar)) { 1021 stringp += string_char_size; 1022 string_char_size = get_wchar(&string_wchar, stringp); 1023 } 1024 regexp += regex_char_size; 1025 return (test_repeated_multibyte_char(repeat_startp, stringp, 1026 regexp)); 1027 1028 /* end case MULTIBYTE_CHAR|ZERO_OR_MORE */ 1029 1030 case MULTIBYTE_CHAR|ONE_OR_MORE: /* char+ */ 1031 1032 /* encoded as <MULTIBYTE_CHAR|ONE_OR_MORE><multibyte_char> */ 1033 1034 regexp++; 1035 regex_char_size = get_wchar(®ex_wchar, regexp); 1036 string_char_size = get_wchar(&string_wchar, stringp); 1037 if ((string_char_size <= 0) || (string_wchar != regex_wchar)) { 1038 return ((char *)0); 1039 } else { 1040 stringp += string_char_size; 1041 repeat_startp = stringp; 1042 string_char_size = get_wchar(&string_wchar, stringp); 1043 while ((string_char_size > 0) && 1044 (string_wchar == regex_wchar)) { 1045 stringp += string_char_size; 1046 string_char_size = get_wchar(&string_wchar, stringp); 1047 } 1048 regexp += regex_char_size; 1049 return (test_repeated_multibyte_char(repeat_startp, stringp, 1050 regexp)); 1051 } 1052 /* end case MULTIBYTE_CHAR|ONE_OR_MORE */ 1053 1054 case MULTIBYTE_CHAR|COUNT: /* char{min_count,max_count} */ 1055 1056 /* 1057 * encoded as <MULTIBYTE_CHAR|COUNT><multibyte_char>\ 1058 * <minimum_match_count><maximum_match_count> 1059 */ 1060 1061 regexp++; 1062 regex_char_size = get_wchar(®ex_wchar, regexp); 1063 get_match_counts(&nmust_match, &nextra_matches_allowed, 1064 regexp + regex_char_size); 1065 string_char_size = get_wchar(&string_wchar, stringp); 1066 while ((string_char_size > 0) && 1067 (string_wchar == regex_wchar) && 1068 (nmust_match > 0)) { 1069 1070 nmust_match--; 1071 stringp += string_char_size; 1072 string_char_size = get_wchar(&string_wchar, stringp); 1073 } 1074 if (nmust_match > 0) { 1075 return ((char *)0); 1076 } else if (nextra_matches_allowed == UNLIMITED) { 1077 repeat_startp = stringp; 1078 while ((string_char_size > 0) && 1079 (string_wchar == regex_wchar)) { 1080 stringp += string_char_size; 1081 string_char_size = get_wchar(&string_wchar, stringp); 1082 } 1083 regexp += regex_char_size + 2; 1084 return (test_repeated_multibyte_char(repeat_startp, stringp, 1085 regexp)); 1086 } else { 1087 repeat_startp = stringp; 1088 while ((string_char_size > 0) && 1089 (string_wchar == regex_wchar) && 1090 (nextra_matches_allowed > 0)) { 1091 nextra_matches_allowed--; 1092 stringp += string_char_size; 1093 string_char_size = get_wchar(&string_wchar, stringp); 1094 } 1095 regexp += regex_char_size + 2; 1096 return (test_repeated_multibyte_char(repeat_startp, stringp, 1097 regexp)); 1098 } 1099 /* end case MULTIBYTE_CHAR|COUNT */ 1100 1101 case ANY_CHAR|ZERO_OR_MORE: /* .* */ 1102 1103 /* encoded as <ANY_CHAR|ZERO_OR_MORE> */ 1104 1105 repeat_startp = stringp; 1106 if (!multibyte) { 1107 while (*stringp != '\0') { 1108 stringp++; 1109 } 1110 regexp++; 1111 return (test_repeated_ascii_char(repeat_startp, stringp, 1112 regexp)); 1113 } else { 1114 string_char_size = get_wchar(&string_wchar, stringp); 1115 while (string_char_size > 0) { 1116 stringp += string_char_size; 1117 string_char_size = get_wchar(&string_wchar, stringp); 1118 } 1119 regexp++; 1120 return (test_repeated_multibyte_char(repeat_startp, stringp, 1121 regexp)); 1122 } 1123 /* end case <ANY_CHAR|ZERO_OR_MORE> */ 1124 1125 case ANY_CHAR|ONE_OR_MORE: /* .+ */ 1126 1127 /* encoded as <ANY_CHAR|ONE_OR_MORE> */ 1128 1129 if (!multibyte) { 1130 if (*stringp == '\0') { 1131 return ((char *)0); 1132 } else { 1133 stringp++; 1134 repeat_startp = stringp; 1135 while (*stringp != '\0') { 1136 stringp++; 1137 } 1138 regexp++; 1139 return (test_repeated_ascii_char(repeat_startp, stringp, 1140 regexp)); 1141 } 1142 } else { 1143 string_char_size = get_wchar(&string_wchar, stringp); 1144 if (string_char_size <= 0) { 1145 return ((char *)0); 1146 } else { 1147 stringp += string_char_size; 1148 repeat_startp = stringp; 1149 string_char_size = get_wchar(&string_wchar, stringp); 1150 while (string_char_size > 0) { 1151 stringp += string_char_size; 1152 string_char_size = 1153 get_wchar(&string_wchar, stringp); 1154 } 1155 regexp++; 1156 return (test_repeated_multibyte_char(repeat_startp, 1157 stringp, regexp)); 1158 } 1159 } 1160 /* end case <ANY_CHAR|ONE_OR_MORE> */ 1161 1162 case ANY_CHAR|COUNT: /* .{min_count,max_count} */ 1163 1164 /* 1165 * encoded as <ANY_CHAR|COUNT>\ 1166 * <minimum_match_count><maximum_match_count> 1167 */ 1168 1169 get_match_counts(&nmust_match, &nextra_matches_allowed, 1170 regexp + 1); 1171 if (!multibyte) { 1172 while ((*stringp != '\0') && (nmust_match > 0)) { 1173 nmust_match--; 1174 stringp++; 1175 } 1176 if (nmust_match > 0) { 1177 return ((char *)0); 1178 } else if (nextra_matches_allowed == UNLIMITED) { 1179 repeat_startp = stringp; 1180 while (*stringp != '\0') { 1181 stringp++; 1182 } 1183 regexp += 3; 1184 return (test_repeated_ascii_char(repeat_startp, stringp, 1185 regexp)); 1186 } else { 1187 repeat_startp = stringp; 1188 while ((*stringp != '\0') && 1189 (nextra_matches_allowed > 0)) { 1190 nextra_matches_allowed--; 1191 stringp++; 1192 } 1193 regexp += 3; 1194 return (test_repeated_ascii_char(repeat_startp, stringp, 1195 regexp)); 1196 } 1197 } else { /* multibyte character */ 1198 1199 string_char_size = get_wchar(&string_wchar, stringp); 1200 while ((string_char_size > 0) && (nmust_match > 0)) { 1201 nmust_match--; 1202 stringp += string_char_size; 1203 string_char_size = get_wchar(&string_wchar, stringp); 1204 } 1205 if (nmust_match > 0) { 1206 return ((char *)0); 1207 } else if (nextra_matches_allowed == UNLIMITED) { 1208 repeat_startp = stringp; 1209 while (string_char_size > 0) { 1210 stringp += string_char_size; 1211 string_char_size = 1212 get_wchar(&string_wchar, stringp); 1213 } 1214 regexp += 3; 1215 return (test_repeated_multibyte_char(repeat_startp, 1216 stringp, regexp)); 1217 } else { 1218 repeat_startp = stringp; 1219 while ((string_char_size > 0) && 1220 (nextra_matches_allowed > 0)) { 1221 nextra_matches_allowed--; 1222 stringp += string_char_size; 1223 string_char_size = 1224 get_wchar(&string_wchar, stringp); 1225 } 1226 regexp += 3; 1227 return (test_repeated_multibyte_char(repeat_startp, 1228 stringp, regexp)); 1229 } 1230 } /* end case ANY_CHAR|COUNT */ 1231 1232 case IN_ASCII_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */ 1233 case NOT_IN_ASCII_CHAR_CLASS|ZERO_OR_MORE: 1234 1235 /* 1236 * encoded as <IN_ASCII_CHAR_CLASS|ZERO_OR_MORE>\ 1237 * <class_length><class ...> 1238 * or <NOT_IN_ASCII_CHAR_CLASS|ZERO_OR_MORE>\ 1239 * <class_length><class ...> 1240 * 1241 * NOTE: <class_length> includes the <class_length> byte 1242 */ 1243 1244 if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|ZERO_OR_MORE)) { 1245 test_condition = IN_CLASS; 1246 } else { 1247 test_condition = NOT_IN_CLASS; 1248 } 1249 regexp++; /* point to the <class_length> byte */ 1250 1251 repeat_startp = stringp; 1252 while ((*stringp != '\0') && 1253 (test_char_against_ascii_class(*stringp, regexp, 1254 test_condition) == CONDITION_TRUE)) { 1255 stringp++; 1256 } 1257 regexp += (int)*regexp; /* add the class length to regexp */ 1258 return (test_repeated_ascii_char(repeat_startp, stringp, 1259 regexp)); 1260 1261 /* end case IN_ASCII_CHAR_CLASS|ZERO_OR_MORE */ 1262 1263 case IN_ASCII_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */ 1264 case NOT_IN_ASCII_CHAR_CLASS|ONE_OR_MORE: 1265 1266 /* 1267 * encoded as <IN_ASCII_CHAR_CLASS|ONE_OR_MORE>\ 1268 * <class_length><class ...> 1269 * or <NOT_IN_ASCII_CHAR_CLASS|ONE_OR_MORE>\ 1270 * <class_length><class ...> 1271 * 1272 * NOTE: <class_length> includes the <class_length> byte 1273 */ 1274 1275 if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|ONE_OR_MORE)) { 1276 test_condition = IN_CLASS; 1277 } else { 1278 test_condition = NOT_IN_CLASS; 1279 } 1280 regexp++; /* point to the <class_length> byte */ 1281 1282 if ((*stringp == '\0') || 1283 (test_char_against_ascii_class(*stringp, regexp, 1284 test_condition) != CONDITION_TRUE)) { 1285 return ((char *)0); 1286 } else { 1287 stringp++; 1288 repeat_startp = stringp; 1289 while ((*stringp != '\0') && 1290 (test_char_against_ascii_class(*stringp, regexp, 1291 test_condition) == CONDITION_TRUE)) { 1292 stringp++; 1293 } 1294 regexp += (int)*regexp; /* add the class length to regexp */ 1295 return (test_repeated_ascii_char(repeat_startp, stringp, 1296 regexp)); 1297 } 1298 /* end case IN_ASCII_CHAR_CLASS|ONE_OR_MORE */ 1299 1300 case IN_ASCII_CHAR_CLASS | COUNT: /* [.....]{max_count,min_count} */ 1301 case NOT_IN_ASCII_CHAR_CLASS | COUNT: 1302 1303 /* 1304 * endoded as <IN_ASCII_CHAR_CLASS|COUNT><class_length>\ 1305 * <class ...><minimum_match_count>\ 1306 * <maximum_match_count> 1307 * or <NOT_IN_ASCII_CHAR_CLASS|COUNT><class_length>\ 1308 * <class ...><minimum_match_count>\ 1309 * <maximum_match_count> 1310 * 1311 * NOTE: <class_length> includes the <class_length> byte, 1312 * but not the <minimum_match_count> or 1313 * <maximum_match_count> bytes 1314 */ 1315 1316 if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|COUNT)) { 1317 test_condition = IN_CLASS; 1318 } else { 1319 test_condition = NOT_IN_CLASS; 1320 } 1321 regexp++; /* point to the <class_length> byte */ 1322 1323 get_match_counts(&nmust_match, &nextra_matches_allowed, 1324 regexp + (int)*regexp); 1325 while ((*stringp != '\0') && 1326 (test_char_against_ascii_class(*stringp, regexp, 1327 test_condition) == CONDITION_TRUE) && 1328 (nmust_match > 0)) { 1329 nmust_match--; 1330 stringp++; 1331 } 1332 if (nmust_match > 0) { 1333 return ((char *)0); 1334 } else if (nextra_matches_allowed == UNLIMITED) { 1335 repeat_startp = stringp; 1336 while ((*stringp != '\0') && 1337 (test_char_against_ascii_class(*stringp, regexp, 1338 test_condition) == CONDITION_TRUE)) { 1339 stringp++; 1340 } 1341 regexp += (int)*regexp + 2; 1342 return (test_repeated_ascii_char(repeat_startp, stringp, 1343 regexp)); 1344 } else { 1345 repeat_startp = stringp; 1346 while ((*stringp != '\0') && 1347 (test_char_against_ascii_class(*stringp, regexp, 1348 test_condition) == CONDITION_TRUE) && 1349 (nextra_matches_allowed > 0)) { 1350 nextra_matches_allowed--; 1351 stringp++; 1352 } 1353 regexp += (int)*regexp + 2; 1354 return (test_repeated_ascii_char(repeat_startp, stringp, 1355 regexp)); 1356 } 1357 /* end case IN_ASCII_CHAR_CLASS|COUNT */ 1358 1359 case IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */ 1360 case NOT_IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE: 1361 1362 /* 1363 * encoded as <IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE>\ 1364 * <class_length><class ...> 1365 * or <NOT_IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE>\ 1366 * <class_length><class ...> 1367 * 1368 * NOTE: <class_length> includes the <class_length> byte 1369 */ 1370 1371 if ((int)*regexp == 1372 (int)(IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE)) { 1373 test_condition = IN_CLASS; 1374 } else { 1375 test_condition = NOT_IN_CLASS; 1376 } 1377 regexp++; /* point to the <class_length> byte */ 1378 1379 repeat_startp = stringp; 1380 string_char_size = get_wchar(&string_wchar, stringp); 1381 while ((string_char_size > 0) && 1382 (test_char_against_multibyte_class(string_wchar, regexp, 1383 test_condition) == CONDITION_TRUE)) { 1384 stringp += string_char_size; 1385 string_char_size = get_wchar(&string_wchar, stringp); 1386 } 1387 regexp += (int)*regexp; /* add the class length to regexp */ 1388 return (test_repeated_multibyte_char(repeat_startp, stringp, 1389 regexp)); 1390 1391 /* end case IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE */ 1392 1393 case IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */ 1394 case NOT_IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE: 1395 1396 /* 1397 * encoded as <IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE>\ 1398 * <class_length><class ...> 1399 * or <NOT_IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE>\ 1400 * <class_length><class ...> 1401 * 1402 * NOTE: <class_length> includes the <class_length> byte 1403 */ 1404 1405 if ((int)*regexp == 1406 (int)(IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE)) { 1407 test_condition = IN_CLASS; 1408 } else { 1409 test_condition = NOT_IN_CLASS; 1410 } 1411 regexp++; /* point to the <class_length> byte */ 1412 1413 string_char_size = get_wchar(&string_wchar, stringp); 1414 if ((string_char_size <= 0) || 1415 (test_char_against_multibyte_class(string_wchar, regexp, 1416 test_condition) != CONDITION_TRUE)) { 1417 return ((char *)0); 1418 } else { 1419 stringp += string_char_size; 1420 repeat_startp = stringp; 1421 string_char_size = get_wchar(&string_wchar, stringp); 1422 while ((string_char_size > 0) && 1423 (test_char_against_multibyte_class(string_wchar, 1424 regexp, test_condition) == CONDITION_TRUE)) { 1425 stringp += string_char_size; 1426 string_char_size = get_wchar(&string_wchar, stringp); 1427 } 1428 regexp += (int)*regexp; /* add the class length to regexp */ 1429 return (test_repeated_multibyte_char(repeat_startp, stringp, 1430 regexp)); 1431 } 1432 /* end case IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE */ 1433 1434 case IN_MULTIBYTE_CHAR_CLASS|COUNT: /* [...]{min_count,max_count} */ 1435 case NOT_IN_MULTIBYTE_CHAR_CLASS|COUNT: 1436 1437 /* 1438 * encoded as <IN_MULTIBYTE_CHAR_CLASS|COUNT>\ 1439 * <class_length><class ...><min_count><max_count> 1440 * or <NOT_IN_MULTIBYTE_CHAR_CLASS|COUNT>\ 1441 * <class_length><class ...><min_count><max_count> 1442 * 1443 * NOTE: <class_length> includes the <class_length> byte 1444 * but not the <minimum_match_count> or 1445 * <maximum_match_count> bytes 1446 */ 1447 1448 if ((int)*regexp == (int)(IN_MULTIBYTE_CHAR_CLASS|COUNT)) { 1449 test_condition = IN_CLASS; 1450 } else { 1451 test_condition = NOT_IN_CLASS; 1452 } 1453 regexp++; /* point to the <class_length> byte */ 1454 1455 get_match_counts(&nmust_match, &nextra_matches_allowed, 1456 regexp + (int)*regexp); 1457 string_char_size = get_wchar(&string_wchar, stringp); 1458 while ((string_char_size > 0) && 1459 (test_char_against_multibyte_class(string_wchar, regexp, 1460 test_condition) == CONDITION_TRUE) && 1461 (nmust_match > 0)) { 1462 nmust_match--; 1463 stringp += string_char_size; 1464 string_char_size = get_wchar(&string_wchar, stringp); 1465 } 1466 if (nmust_match > 0) { 1467 return ((char *)0); 1468 } else if (nextra_matches_allowed == UNLIMITED) { 1469 repeat_startp = stringp; 1470 while ((string_char_size > 0) && 1471 (test_char_against_multibyte_class(string_wchar, 1472 regexp, test_condition) == CONDITION_TRUE)) { 1473 stringp += string_char_size; 1474 string_char_size = get_wchar(&string_wchar, stringp); 1475 } 1476 regexp += (int)*regexp + 2; 1477 return (test_repeated_multibyte_char(repeat_startp, stringp, 1478 regexp)); 1479 } else { 1480 repeat_startp = stringp; 1481 while ((string_char_size > 0) && 1482 (test_char_against_multibyte_class(string_wchar, 1483 regexp, test_condition) == CONDITION_TRUE) && 1484 (nextra_matches_allowed > 0)) { 1485 nextra_matches_allowed--; 1486 stringp += string_char_size; 1487 string_char_size = get_wchar(&string_wchar, stringp); 1488 } 1489 regexp += (int)*regexp + 2; 1490 return (test_repeated_multibyte_char(repeat_startp, stringp, 1491 regexp)); 1492 } 1493 /* end case IN_MULTIBYTE_CHAR_CLASS|COUNT */ 1494 1495 case IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */ 1496 case NOT_IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE: 1497 1498 /* 1499 * encoded as <IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE>\ 1500 * <class_length><class ...> 1501 * or <NOT_IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE>\ 1502 * <class_length><class ...> 1503 * 1504 * NOTE: <class_length> includes the <class_length> byte 1505 */ 1506 1507 if ((int)*regexp == 1508 (int)(IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE)) { 1509 test_condition = IN_CLASS; 1510 } else { 1511 test_condition = NOT_IN_CLASS; 1512 } 1513 regexp++; /* point to the <class_length> byte */ 1514 1515 repeat_startp = stringp; 1516 while ((*stringp != '\0') && 1517 (test_char_against_old_ascii_class(*stringp, regexp, 1518 test_condition) == CONDITION_TRUE)) { 1519 stringp++; 1520 } 1521 regexp += (int)*regexp; /* add the class length to regexp */ 1522 return (test_repeated_ascii_char(repeat_startp, stringp, 1523 regexp)); 1524 1525 /* end case IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE */ 1526 1527 case IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */ 1528 case NOT_IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE: 1529 1530 /* 1531 * encoded as <IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE>\ 1532 * <class_length><class ...> 1533 * or <NOT_IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE>\ 1534 * <class_length><class ...> 1535 * 1536 * NOTE: <class length> includes the <class_length> byte 1537 */ 1538 1539 if ((int)*regexp == 1540 (int)(IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE)) { 1541 test_condition = IN_CLASS; 1542 } else { 1543 test_condition = NOT_IN_CLASS; 1544 } 1545 regexp++; /* point to the <class_length> byte */ 1546 1547 if ((*stringp == '\0') || 1548 (test_char_against_old_ascii_class(*stringp, regexp, 1549 test_condition) != CONDITION_TRUE)) { 1550 return ((char *)0); 1551 } else { 1552 stringp++; 1553 repeat_startp = stringp; 1554 while ((*stringp != '\0') && 1555 (test_char_against_old_ascii_class(*stringp, regexp, 1556 test_condition) == CONDITION_TRUE)) { 1557 stringp++; 1558 } 1559 regexp += (int)*regexp; /* add the class length to regexp */ 1560 return (test_repeated_ascii_char(repeat_startp, stringp, 1561 regexp)); 1562 } 1563 /* end case IN_OLD_ASCII_CHAR_CLASS | ONE_OR_MORE */ 1564 1565 case IN_OLD_ASCII_CHAR_CLASS|COUNT: /* [...]{min_count,max_count} */ 1566 case NOT_IN_OLD_ASCII_CHAR_CLASS|COUNT: 1567 1568 /* 1569 * encoded as <IN_OLD_ASCII_CHAR_CLASS|COUNT><class_length>\ 1570 * <class ...><minimum_match_count>\ 1571 * <maximum_match_count> 1572 * or <NOT_IN_OLD_ASCII_CHAR_CLASS|COUNT>\ 1573 * <class_length><class ...><minimum_match_count>\ 1574 * <maximum_match_count> 1575 * 1576 * NOTE: <class_length> includes the <class_length> byte 1577 * but not the <minimum_match_count> or 1578 * <maximum_match_count> bytes 1579 */ 1580 1581 if ((int)*regexp == (int)(IN_OLD_ASCII_CHAR_CLASS|COUNT)) { 1582 test_condition = IN_CLASS; 1583 } else { 1584 test_condition = NOT_IN_CLASS; 1585 } 1586 regexp++; /* point to the <class_length> byte */ 1587 1588 get_match_counts(&nmust_match, &nextra_matches_allowed, 1589 regexp + (int)*regexp); 1590 while ((*stringp != '\0') && 1591 (test_char_against_old_ascii_class(*stringp, regexp, 1592 test_condition) == CONDITION_TRUE) && 1593 (nmust_match > 0)) { 1594 nmust_match--; 1595 stringp++; 1596 } 1597 if (nmust_match > 0) { 1598 return ((char *)0); 1599 } else if (nextra_matches_allowed == UNLIMITED) { 1600 repeat_startp = stringp; 1601 while ((*stringp != '\0') && 1602 (test_char_against_old_ascii_class(*stringp, regexp, 1603 test_condition) == CONDITION_TRUE)) { 1604 stringp++; 1605 } 1606 regexp += (int)*regexp + 2; 1607 return (test_repeated_ascii_char(repeat_startp, stringp, 1608 regexp)); 1609 } else { 1610 repeat_startp = stringp; 1611 while ((*stringp != '\0') && 1612 (test_char_against_old_ascii_class(*stringp, regexp, 1613 test_condition) == CONDITION_TRUE) && 1614 (nextra_matches_allowed > 0)) { 1615 nextra_matches_allowed--; 1616 stringp++; 1617 } 1618 regexp += (int)*regexp + 2; 1619 return (test_repeated_ascii_char(repeat_startp, stringp, 1620 regexp)); 1621 } 1622 /* end case IN_OLD_ASCII_CHAR_CLASS|COUNT */ 1623 1624 case ZERO_OR_MORE_GROUP: /* (.....)* */ 1625 case ZERO_OR_MORE_GROUP|ADD_256_TO_GROUP_LENGTH: 1626 case ZERO_OR_MORE_GROUP|ADD_512_TO_GROUP_LENGTH: 1627 case ZERO_OR_MORE_GROUP|ADD_768_TO_GROUP_LENGTH: 1628 1629 /* 1630 * encoded as <ZERO_OR_MORE_GROUP|ADDED_LENGTH_BITS>\ 1631 * <group_length><compiled_regex...>\ 1632 * <END_GROUP|ZERO_OR_MORE><groupn> 1633 * 1634 * NOTE: 1635 * 1636 * group_length + (256 * ADDED_LENGTH_BITS) == 1637 * length_of(<compiled_regex...><END_GROUP|ZERO_OR_MORE>\ 1638 * <groupn>) 1639 * 1640 */ 1641 1642 group_length = 1643 (((unsigned int)*regexp & ADDED_LENGTH_BITS) << 1644 TIMES_256_SHIFT); 1645 regexp++; 1646 group_length += (unsigned int)*regexp; 1647 regexp++; 1648 repeat_startp = stringp; 1649 test_stringp = test_string(stringp, regexp); 1650 while (test_stringp != (char *)0) { 1651 if (push_stringp(stringp) == (char *)0) 1652 return ((char *)0); 1653 stringp = test_stringp; 1654 test_stringp = test_string(stringp, regexp); 1655 } 1656 regexp += group_length; 1657 return (test_repeated_group(repeat_startp, stringp, regexp)); 1658 1659 /* end case ZERO_OR_MORE_GROUP */ 1660 1661 case END_GROUP|ZERO_OR_MORE: /* (.....)* */ 1662 1663 /* encoded as <END_GROUP|ZERO_OR_MORE> */ 1664 1665 /* return from recursive call to test_string() */ 1666 1667 return ((char *)stringp); 1668 1669 /* end case END_GROUP|ZERO_OR_MORE */ 1670 1671 case ONE_OR_MORE_GROUP: /* (.....)+ */ 1672 case ONE_OR_MORE_GROUP|ADD_256_TO_GROUP_LENGTH: 1673 case ONE_OR_MORE_GROUP|ADD_512_TO_GROUP_LENGTH: 1674 case ONE_OR_MORE_GROUP|ADD_768_TO_GROUP_LENGTH: 1675 1676 /* 1677 * encoded as <ONE_OR_MORE_GROUP|ADDED_LENGTH_BITS>\ 1678 * <group_length><compiled_regex...>\ 1679 * <END_GROUP|ONE_OR_MORE><groupn> 1680 * 1681 * NOTE: 1682 * 1683 * group_length + (256 * ADDED_LENGTH_BITS) == 1684 * length_of(<compiled_regex...><END_GROUP|ONE_OR_MORE>\ 1685 * <groupn>) 1686 */ 1687 1688 group_length = 1689 (((unsigned int)*regexp & ADDED_LENGTH_BITS) << 1690 TIMES_256_SHIFT); 1691 regexp++; 1692 group_length += (unsigned int)*regexp; 1693 regexp++; 1694 stringp = test_string(stringp, regexp); 1695 if (stringp == (char *)0) 1696 return ((char *)0); 1697 repeat_startp = stringp; 1698 test_stringp = test_string(stringp, regexp); 1699 while (test_stringp != (char *)0) { 1700 if (push_stringp(stringp) == (char *)0) 1701 return ((char *)0); 1702 stringp = test_stringp; 1703 test_stringp = test_string(stringp, regexp); 1704 } 1705 regexp += group_length; 1706 return (test_repeated_group(repeat_startp, stringp, regexp)); 1707 1708 /* end case ONE_OR_MORE_GROUP */ 1709 1710 case END_GROUP|ONE_OR_MORE: /* (.....)+ */ 1711 1712 /* encoded as <END_GROUP|ONE_OR_MORE><groupn> */ 1713 1714 /* return from recursive call to test_string() */ 1715 1716 return ((char *)stringp); 1717 1718 /* end case END_GROUP|ONE_OR_MORE */ 1719 1720 case COUNTED_GROUP: /* (.....){max_count,min_count} */ 1721 case COUNTED_GROUP|ADD_256_TO_GROUP_LENGTH: 1722 case COUNTED_GROUP|ADD_512_TO_GROUP_LENGTH: 1723 case COUNTED_GROUP|ADD_768_TO_GROUP_LENGTH: 1724 1725 /* 1726 * encoded as <COUNTED_GROUP|ADDED_LENGTH_BITS><group_length>\ 1727 * <compiled_regex...>\<END_GROUP|COUNT><groupn>\ 1728 * <minimum_match_count><maximum_match_count> 1729 * 1730 * NOTE: 1731 * 1732 * group_length + (256 * ADDED_LENGTH_BITS) == 1733 * length_of(<compiled_regex...><END_GROUP|COUNT><groupn>) 1734 * 1735 * but does not include the <minimum_match_count> or 1736 * <maximum_match_count> bytes 1737 */ 1738 1739 group_length = 1740 (((unsigned int)*regexp & ADDED_LENGTH_BITS) << 1741 TIMES_256_SHIFT); 1742 regexp++; 1743 group_length += (unsigned int)*regexp; 1744 regexp++; 1745 get_match_counts(&nmust_match, &nextra_matches_allowed, 1746 regexp + group_length); 1747 test_stringp = test_string(stringp, regexp); 1748 while ((test_stringp != (char *)0) && (nmust_match > 0)) { 1749 stringp = test_stringp; 1750 nmust_match--; 1751 test_stringp = test_string(stringp, regexp); 1752 } 1753 if (nmust_match > 0) { 1754 return ((char *)0); 1755 } else if (nextra_matches_allowed == UNLIMITED) { 1756 repeat_startp = stringp; 1757 while (test_stringp != (char *)0) { 1758 if (push_stringp(stringp) == (char *)0) 1759 return ((char *)0); 1760 stringp = test_stringp; 1761 test_stringp = test_string(stringp, regexp); 1762 } 1763 regexp += group_length + 2; 1764 return (test_repeated_group(repeat_startp, stringp, 1765 regexp)); 1766 } else { 1767 repeat_startp = stringp; 1768 while ((test_stringp != (char *)0) && 1769 (nextra_matches_allowed > 0)) { 1770 nextra_matches_allowed--; 1771 if (push_stringp(stringp) == (char *)0) 1772 return ((char *)0); 1773 stringp = test_stringp; 1774 test_stringp = test_string(stringp, regexp); 1775 } 1776 regexp += group_length + 2; 1777 return (test_repeated_group(repeat_startp, stringp, 1778 regexp)); 1779 } 1780 /* end case COUNTED_GROUP */ 1781 1782 case END_GROUP|COUNT: /* (.....){max_count,min_count} */ 1783 1784 /* encoded as <END_GROUP|COUNT> */ 1785 1786 /* return from recursive call to test_string() */ 1787 1788 return (stringp); 1789 1790 /* end case END_GROUP|COUNT */ 1791 1792 case END_OF_STRING_MARK: 1793 1794 /* encoded as <END_OF_STRING_MARK><END_REGEX> */ 1795 1796 if (*stringp == '\0') { 1797 regexp++; 1798 } else { 1799 return ((char *)0); 1800 } 1801 break; /* end case END_OF_STRING_MARK */ 1802 1803 case END_REGEX: /* end of the compiled regular expression */ 1804 1805 /* encoded as <END_REGEX> */ 1806 1807 return (stringp); 1808 1809 /* end case END_REGEX */ 1810 1811 default: 1812 1813 return ((char *)0); 1814 1815 } /* end switch (*regexp) */ 1816 1817 } /* end for (;;) */ 1818 1819 } /* test_string() */ 1820