1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 /* 33 * IMPORTANT NOTE: 34 * 35 * regex() WORKS **ONLY** WITH THE ASCII AND THE Solaris EUC CHARACTER SETS. 36 * IT IS **NOT** CHARACTER SET INDEPENDENT. 37 * 38 */ 39 40 #pragma weak regex = _regex 41 42 /* CONSTANTS SHARED WITH regcmp() */ 43 #include "regex.h" 44 45 #include "lint.h" 46 #include "mtlib.h" 47 #include <limits.h> 48 #include <stdarg.h> 49 #include <stdlib.h> 50 #include <thread.h> 51 #include <widec.h> 52 #include "tsd.h" 53 54 55 /* PRIVATE CONSTANTS */ 56 57 #define ADD_256_TO_GROUP_LENGTH 0x1 58 #define ADD_512_TO_GROUP_LENGTH 0x2 59 #define ADD_768_TO_GROUP_LENGTH 0x3 60 #define ADDED_LENGTH_BITS 0x3 61 #define SINGLE_BYTE_MASK 0xff 62 #define STRINGP_STACK_SIZE 50 63 64 65 /* PRIVATE TYPE DEFINITIONS */ 66 67 typedef enum { 68 NOT_IN_CLASS = 0, 69 IN_CLASS 70 } char_test_condition_t; 71 72 typedef enum { 73 TESTING_CHAR = 0, 74 CONDITION_TRUE, 75 CONDITION_FALSE, 76 CHAR_TEST_ERROR 77 } char_test_result_t; 78 79 80 /* PRIVATE GLOBAL VARIABLES */ 81 82 static mutex_t regex_lock = DEFAULTMUTEX; 83 static int return_arg_number[NSUBSTRINGS]; 84 static const char *substring_endp[NSUBSTRINGS]; 85 static const char *substring_startp[NSUBSTRINGS]; 86 static const char *stringp_stack[STRINGP_STACK_SIZE]; 87 static const char **stringp_stackp; 88 89 90 /* DECLARATIONS OF PRIVATE FUNCTIONS */ 91 92 static int 93 get_wchar(wchar_t *wcharp, 94 const char *stringp); 95 96 static void 97 get_match_counts(int *nmust_matchp, 98 int *nextra_matches_allowedp, 99 const char *count_stringp); 100 101 static boolean_t 102 in_wchar_range(wchar_t test_char, 103 wchar_t lower_char, 104 wchar_t upper_char); 105 106 static const char * 107 pop_stringp(void); 108 109 static const char * 110 previous_charp(const char *current_charp); 111 112 static const char * 113 push_stringp(const char *stringp); 114 115 static char_test_result_t 116 test_char_against_ascii_class(char test_char, 117 const char *classp, 118 char_test_condition_t test_condition); 119 120 static char_test_result_t 121 test_char_against_multibyte_class(wchar_t test_char, 122 const char *classp, 123 char_test_condition_t test_condition); 124 125 126 /* FOR COMPATIBILITY WITH PREVIOUS ASCII VERSIONS OF regcmp() */ 127 128 static char_test_result_t 129 test_char_against_old_ascii_class(char test_char, 130 const char *classp, 131 char_test_condition_t test_condition); 132 133 static const char * 134 test_repeated_ascii_char(const char *repeat_startp, 135 const char *stringp, 136 const char *regexp); 137 138 static const char * 139 test_repeated_multibyte_char(const char *repeat_startp, 140 const char *stringp, 141 const char *regexp); 142 143 static const char * 144 test_repeated_group(const char *repeat_startp, 145 const char *stringp, 146 const char *regexp); 147 148 static const char * 149 test_string(const char *stringp, 150 const char *regexp); 151 152 153 /* DEFINITIONS OF PUBLIC VARIABLES */ 154 155 char *__loc1; 156 157 /* 158 * reserve thread-specific storage for __loc1 159 */ 160 char ** 161 ____loc1(void) 162 { 163 if (_thr_main()) 164 return (&__loc1); 165 return ((char **)tsdalloc(_T_REGEX_LOC1, sizeof (char *), NULL)); 166 } 167 168 #define __loc1 (*(____loc1())) 169 170 /* DEFINITION OF regex() */ 171 172 extern char * 173 _regex(const char *regexp, 174 const char *stringp, ...) 175 { 176 va_list arg_listp; 177 int char_size; 178 const char *end_of_matchp; 179 wchar_t regex_wchar; 180 char *return_argp[NSUBSTRINGS]; 181 char *returned_substringp; 182 int substringn; 183 const char *substringp; 184 wchar_t string_wchar; 185 186 if (____loc1() == (char **)0) { 187 return ((char *)0); 188 } else { 189 lmutex_lock(®ex_lock); 190 __loc1 = (char *)0; 191 } 192 193 if ((stringp == (char *)0) || (regexp == (char *)0)) { 194 lmutex_unlock(®ex_lock); 195 return ((char *)0); 196 } 197 198 199 /* INITIALIZE SUBSTRINGS THAT MIGHT BE RETURNED IN VARARGS */ 200 201 substringn = 0; 202 va_start(arg_listp, stringp); 203 while (substringn < NSUBSTRINGS) { 204 return_argp[substringn] = va_arg(arg_listp, char *); 205 substring_startp[substringn] = (char *)0; 206 return_arg_number[substringn] = -1; 207 substringn++; 208 } 209 va_end(arg_listp); 210 211 212 /* TEST THE STRING AGAINST THE REGULAR EXPRESSION */ 213 214 end_of_matchp = (char *)0; 215 stringp_stackp = &stringp_stack[STRINGP_STACK_SIZE]; 216 217 if ((int)*regexp == (int)START_OF_STRING_MARK) { 218 219 /* 220 * the match must start at the beginning of the string 221 */ 222 223 __loc1 = (char *)stringp; 224 regexp++; 225 end_of_matchp = test_string(stringp, regexp); 226 227 } else if ((int)*regexp == (int)ASCII_CHAR) { 228 229 /* 230 * test a string against a regular expression 231 * that starts with a single ASCII character: 232 * 233 * move to each character in the string that matches 234 * the first character in the regular expression 235 * and test the remaining string 236 */ 237 238 while ((*stringp != *(regexp + 1)) && (*stringp != '\0')) { 239 stringp++; 240 } 241 while ((end_of_matchp == (char *)0) && (*stringp != '\0')) { 242 end_of_matchp = test_string(stringp, regexp); 243 if (end_of_matchp != (char *)0) { 244 __loc1 = (char *)stringp; 245 } else { 246 stringp++; 247 while ((*stringp != *(regexp + 1)) && (*stringp != '\0')) { 248 stringp++; 249 } 250 } 251 } 252 253 } else if (!multibyte) { 254 255 /* 256 * if the value of the "multibyte" macro defined in <euc.h> 257 * is false, regex() is running in an ASCII locale; 258 * test an ASCII string against an ASCII regular expression 259 * that doesn't start with a single ASCII character: 260 * 261 * move forward in the string one byte at a time, testing 262 * the remaining string against the regular expression 263 */ 264 265 end_of_matchp = test_string(stringp, regexp); 266 while ((end_of_matchp == (char *)0) && (*stringp != '\0')) { 267 stringp++; 268 end_of_matchp = test_string(stringp, regexp); 269 } 270 if (end_of_matchp != (char *)0) { 271 __loc1 = (char *)stringp; 272 } 273 274 } else if ((int)*regexp == (int)MULTIBYTE_CHAR) { 275 276 /* 277 * test a multibyte string against a multibyte regular expression 278 * that starts with a single multibyte character: 279 * 280 * move to each character in the string that matches 281 * the first character in the regular expression 282 * and test the remaining string 283 */ 284 285 (void) get_wchar(®ex_wchar, regexp + 1); 286 char_size = get_wchar(&string_wchar, stringp); 287 while ((string_wchar != regex_wchar) && (char_size > 0)) { 288 stringp += char_size; 289 char_size = get_wchar(&string_wchar, stringp); 290 } 291 while ((end_of_matchp == (char *)0) && (char_size > 0)) { 292 end_of_matchp = test_string(stringp, regexp); 293 if (end_of_matchp != (char *)0) { 294 __loc1 = (char *)stringp; 295 } else { 296 stringp += char_size; 297 char_size = get_wchar(&string_wchar, stringp); 298 while ((string_wchar != regex_wchar) && (char_size > 0)) { 299 stringp += char_size; 300 char_size = get_wchar(&string_wchar, stringp); 301 } 302 } 303 } 304 305 } else { 306 307 /* 308 * test a multibyte string against a multibyte regular expression 309 * that doesn't start with a single multibyte character 310 * 311 * move forward in the string one multibyte character at a time, 312 * testing the remaining string against the regular expression 313 */ 314 315 end_of_matchp = test_string(stringp, regexp); 316 char_size = get_wchar(&string_wchar, stringp); 317 while ((end_of_matchp == (char *)0) && (char_size > 0)) { 318 stringp += char_size; 319 end_of_matchp = test_string(stringp, regexp); 320 char_size = get_wchar(&string_wchar, stringp); 321 } 322 if (end_of_matchp != (char *)0) { 323 __loc1 = (char *)stringp; 324 } 325 } 326 327 /* 328 * Return substrings that matched subexpressions for which 329 * matching substrings are to be returned. 330 * 331 * NOTE: 332 * 333 * According to manual page regcmp(3G), regex() returns substrings 334 * that match subexpressions even when no substring matches the 335 * entire regular expression. 336 */ 337 338 substringn = 0; 339 while (substringn < NSUBSTRINGS) { 340 substringp = substring_startp[substringn]; 341 if ((substringp != (char *)0) && 342 (return_arg_number[substringn] >= 0)) { 343 returned_substringp = 344 return_argp[return_arg_number[substringn]]; 345 if (returned_substringp != (char *)0) { 346 while (substringp < substring_endp[substringn]) { 347 *returned_substringp = (char)*substringp; 348 returned_substringp++; 349 substringp++; 350 } 351 *returned_substringp = '\0'; 352 } 353 } 354 substringn++; 355 } 356 lmutex_unlock(®ex_lock); 357 return ((char *)end_of_matchp); 358 } /* regex() */ 359 360 361 /* DEFINITIONS OF PRIVATE FUNCTIONS */ 362 363 static int 364 get_wchar(wchar_t *wcharp, 365 const char *stringp) 366 { 367 int char_size; 368 369 if (stringp == (char *)0) { 370 char_size = 0; 371 *wcharp = (wchar_t)((unsigned int)'\0'); 372 } else if (*stringp == '\0') { 373 char_size = 0; 374 *wcharp = (wchar_t)((unsigned int)*stringp); 375 } else if ((unsigned char)*stringp <= (unsigned char)0x7f) { 376 char_size = 1; 377 *wcharp = (wchar_t)((unsigned int)*stringp); 378 } else { 379 char_size = mbtowc(wcharp, stringp, MB_LEN_MAX); 380 } 381 return (char_size); 382 } 383 384 static void 385 get_match_counts(int *nmust_matchp, 386 int *nextra_matches_allowedp, 387 const char *count_stringp) 388 { 389 int minimum_match_count; 390 int maximum_match_count; 391 392 minimum_match_count = 393 (int)((unsigned int)*count_stringp & SINGLE_BYTE_MASK); 394 *nmust_matchp = minimum_match_count; 395 396 count_stringp++; 397 maximum_match_count = 398 (int)((unsigned int)*count_stringp & SINGLE_BYTE_MASK); 399 if (maximum_match_count == (int)UNLIMITED) { 400 *nextra_matches_allowedp = (int)UNLIMITED; 401 } else { 402 *nextra_matches_allowedp = 403 maximum_match_count - minimum_match_count; 404 } 405 return; 406 407 } /* get_match_counts() */ 408 409 static boolean_t 410 in_wchar_range(wchar_t test_char, 411 wchar_t lower_char, 412 wchar_t upper_char) 413 { 414 return (((lower_char <= 0x7f) && (upper_char <= 0x7f) && 415 (lower_char <= test_char) && (test_char <= upper_char)) || 416 (((test_char & WCHAR_CSMASK) == (lower_char & WCHAR_CSMASK)) && 417 ((test_char & WCHAR_CSMASK) == (upper_char & WCHAR_CSMASK)) && 418 (lower_char <= test_char) && (test_char <= upper_char))); 419 420 } /* in_wchar_range() */ 421 422 static const char * 423 pop_stringp(void) 424 { 425 const char *stringp; 426 427 if (stringp_stackp >= &stringp_stack[STRINGP_STACK_SIZE]) { 428 return ((char *)0); 429 } else { 430 stringp = *stringp_stackp; 431 stringp_stackp++; 432 return (stringp); 433 } 434 } 435 436 437 static const char * 438 previous_charp(const char *current_charp) 439 { 440 /* 441 * returns the pointer to the previous character in 442 * a string of multibyte characters 443 */ 444 445 const char *prev_cs0 = current_charp - 1; 446 const char *prev_cs1 = current_charp - eucw1; 447 const char *prev_cs2 = current_charp - eucw2 - 1; 448 const char *prev_cs3 = current_charp - eucw3 - 1; 449 const char *prev_charp; 450 451 if ((unsigned char)*prev_cs0 <= 0x7f) { 452 prev_charp = prev_cs0; 453 } else if ((unsigned char)*prev_cs2 == SS2) { 454 prev_charp = prev_cs2; 455 } else if ((unsigned char)*prev_cs3 == SS3) { 456 prev_charp = prev_cs3; 457 } else { 458 prev_charp = prev_cs1; 459 } 460 return (prev_charp); 461 462 } /* previous_charp() */ 463 464 static const char * 465 push_stringp(const char *stringp) 466 { 467 if (stringp_stackp <= &stringp_stack[0]) { 468 return ((char *)0); 469 } else { 470 stringp_stackp--; 471 *stringp_stackp = stringp; 472 return (stringp); 473 } 474 } 475 476 477 static char_test_result_t 478 test_char_against_ascii_class(char test_char, 479 const char *classp, 480 char_test_condition_t test_condition) 481 { 482 /* 483 * tests a character for membership in an ASCII character class compiled 484 * by the internationalized version of regcmp(); 485 * 486 * NOTE: The internationalized version of regcmp() compiles 487 * the range a-z in an ASCII character class to aTHRUz. 488 */ 489 490 int nbytes_to_check; 491 492 nbytes_to_check = (int)*classp; 493 classp++; 494 nbytes_to_check--; 495 496 while (nbytes_to_check > 0) { 497 if (test_char == *classp) { 498 if (test_condition == IN_CLASS) 499 return (CONDITION_TRUE); 500 else 501 return (CONDITION_FALSE); 502 } else if (*classp == THRU) { 503 if ((*(classp - 1) <= test_char) && 504 (test_char <= *(classp + 1))) { 505 if (test_condition == IN_CLASS) 506 return (CONDITION_TRUE); 507 else 508 return (CONDITION_FALSE); 509 } else { 510 classp += 2; 511 nbytes_to_check -= 2; 512 } 513 } else { 514 classp++; 515 nbytes_to_check--; 516 } 517 } 518 if (test_condition == NOT_IN_CLASS) { 519 return (CONDITION_TRUE); 520 } else { 521 return (CONDITION_FALSE); 522 } 523 } /* test_char_against_ascii_class() */ 524 525 static char_test_result_t 526 test_char_against_multibyte_class(wchar_t test_char, 527 const char *classp, 528 char_test_condition_t test_condition) 529 { 530 /* 531 * tests a character for membership in a multibyte character class; 532 * 533 * NOTE: The range a-z in a multibyte character class compiles to 534 * aTHRUz. 535 */ 536 537 int char_size; 538 wchar_t current_char; 539 int nbytes_to_check; 540 wchar_t previous_char; 541 542 nbytes_to_check = (int)*classp; 543 classp++; 544 nbytes_to_check--; 545 546 char_size = get_wchar(¤t_char, classp); 547 if (char_size <= 0) { 548 return (CHAR_TEST_ERROR); 549 } else if (test_char == current_char) { 550 if (test_condition == IN_CLASS) { 551 return (CONDITION_TRUE); 552 } else { 553 return (CONDITION_FALSE); 554 } 555 } else { 556 classp += char_size; 557 nbytes_to_check -= char_size; 558 } 559 560 while (nbytes_to_check > 0) { 561 previous_char = current_char; 562 char_size = get_wchar(¤t_char, classp); 563 if (char_size <= 0) { 564 return (CHAR_TEST_ERROR); 565 } else if (test_char == current_char) { 566 if (test_condition == IN_CLASS) { 567 return (CONDITION_TRUE); 568 } else { 569 return (CONDITION_FALSE); 570 } 571 } else if (current_char == THRU) { 572 classp += char_size; 573 nbytes_to_check -= char_size; 574 char_size = get_wchar(¤t_char, classp); 575 if (char_size <= 0) { 576 return (CHAR_TEST_ERROR); 577 } else if (in_wchar_range(test_char, previous_char, 578 current_char)) { 579 if (test_condition == IN_CLASS) { 580 return (CONDITION_TRUE); 581 } else { 582 return (CONDITION_FALSE); 583 } 584 } else { 585 classp += char_size; 586 nbytes_to_check -= char_size; 587 } 588 } else { 589 classp += char_size; 590 nbytes_to_check -= char_size; 591 } 592 } 593 if (test_condition == NOT_IN_CLASS) { 594 return (CONDITION_TRUE); 595 } else { 596 return (CONDITION_FALSE); 597 } 598 } /* test_char_against_multibyte_class() */ 599 600 601 /* FOR COMPATIBILITY WITH PREVIOUS ASCII VERSIONS OF regcmp() */ 602 603 static char_test_result_t 604 test_char_against_old_ascii_class(char test_char, 605 const char *classp, 606 char_test_condition_t test_condition) 607 { 608 /* 609 * tests a character for membership in an ASCII character class compiled 610 * by the ASCII version of regcmp(); 611 * 612 * NOTE: ASCII versions of regcmp() compile the range a-z in an 613 * ASCII character class to THRUaz. The internationalized 614 * version compiles the same range to aTHRUz. 615 */ 616 617 int nbytes_to_check; 618 619 nbytes_to_check = (int)*classp; 620 classp++; 621 nbytes_to_check--; 622 623 while (nbytes_to_check > 0) { 624 if (test_char == *classp) { 625 if (test_condition == IN_CLASS) { 626 return (CONDITION_TRUE); 627 } else { 628 return (CONDITION_FALSE); 629 } 630 } else if (*classp == THRU) { 631 if ((*(classp + 1) <= test_char) && 632 (test_char <= *(classp + 2))) { 633 if (test_condition == IN_CLASS) { 634 return (CONDITION_TRUE); 635 } else { 636 return (CONDITION_FALSE); 637 } 638 } else { 639 classp += 3; 640 nbytes_to_check -= 3; 641 } 642 } else { 643 classp++; 644 nbytes_to_check--; 645 } 646 } 647 if (test_condition == NOT_IN_CLASS) { 648 return (CONDITION_TRUE); 649 } else { 650 return (CONDITION_FALSE); 651 } 652 } /* test_char_against_old_ascii_class() */ 653 654 static const char * 655 test_repeated_ascii_char(const char *repeat_startp, 656 const char *stringp, 657 const char *regexp) 658 { 659 const char *end_of_matchp; 660 661 end_of_matchp = test_string(stringp, regexp); 662 while ((end_of_matchp == (char *)0) && 663 (stringp > repeat_startp)) { 664 stringp--; 665 end_of_matchp = test_string(stringp, regexp); 666 } 667 return (end_of_matchp); 668 } 669 670 static const char * 671 test_repeated_multibyte_char(const char *repeat_startp, 672 const char *stringp, 673 const char *regexp) 674 { 675 const char *end_of_matchp; 676 677 end_of_matchp = test_string(stringp, regexp); 678 while ((end_of_matchp == (char *)0) && 679 (stringp > repeat_startp)) { 680 stringp = previous_charp(stringp); 681 end_of_matchp = test_string(stringp, regexp); 682 } 683 return (end_of_matchp); 684 } 685 686 static const char * 687 test_repeated_group(const char *repeat_startp, 688 const char *stringp, 689 const char *regexp) 690 { 691 const char *end_of_matchp; 692 693 end_of_matchp = test_string(stringp, regexp); 694 while ((end_of_matchp == (char *)0) && 695 (stringp > repeat_startp)) { 696 stringp = pop_stringp(); 697 if (stringp == (char *)0) { 698 return ((char *)0); 699 } 700 end_of_matchp = test_string(stringp, regexp); 701 } 702 return (end_of_matchp); 703 } 704 705 static const char * 706 test_string(const char *stringp, 707 const char *regexp) 708 { 709 /* 710 * returns a pointer to the first character following the first 711 * substring of the string addressed by stringp that matches 712 * the compiled regular expression addressed by regexp 713 */ 714 715 unsigned int group_length; 716 int nextra_matches_allowed; 717 int nmust_match; 718 wchar_t regex_wchar; 719 int regex_char_size; 720 const char *repeat_startp; 721 unsigned int return_argn; 722 wchar_t string_wchar; 723 int string_char_size; 724 unsigned int substringn; 725 char_test_condition_t test_condition; 726 const char *test_stringp; 727 728 for (;;) { 729 730 /* 731 * Exit the loop via a return whenever there's a match 732 * or it's clear that there can be no match. 733 */ 734 735 switch ((int)*regexp) { 736 737 /* 738 * No fall-through. 739 * Each case ends with either a return or with stringp 740 * addressing the next character to be tested and regexp 741 * addressing the next compiled regular expression 742 * 743 * NOTE: The comments for each case give the meaning 744 * of the compiled regular expression decoded by the case 745 * and the character string that the compiled regular 746 * expression uses to encode the case. Each single 747 * character encoded in the compiled regular expression 748 * is shown enclosed in angle brackets (<>). Each 749 * compiled regular expression begins with a marker 750 * character which is shown as a named constant 751 * (e.g. <ASCII_CHAR>). Character constants are shown 752 * enclosed in single quotes (e.g. <'$'>). All other 753 * single characters encoded in the compiled regular 754 * expression are shown as lower case variable names 755 * (e.g. <ascii_char> or <multibyte_char>). Multicharacter 756 * strings encoded in the compiled regular expression 757 * are shown as variable names followed by elipses 758 * (e.g. <compiled_regex...>). 759 */ 760 761 case ASCII_CHAR: /* single ASCII char */ 762 763 /* encoded as <ASCII_CHAR><ascii_char> */ 764 765 regexp++; 766 if (*regexp == *stringp) { 767 regexp++; 768 stringp++; 769 } else { 770 return ((char *)0); 771 } 772 break; /* end case ASCII_CHAR */ 773 774 case MULTIBYTE_CHAR: /* single multibyte char */ 775 776 /* encoded as <MULTIBYTE_CHAR><multibyte_char> */ 777 778 regexp++; 779 regex_char_size = get_wchar(®ex_wchar, regexp); 780 string_char_size = get_wchar(&string_wchar, stringp); 781 if ((string_char_size <= 0) || (string_wchar != regex_wchar)) { 782 return ((char *)0); 783 } else { 784 regexp += regex_char_size; 785 stringp += string_char_size; 786 } 787 break; /* end case MULTIBYTE_CHAR */ 788 789 case ANY_CHAR: /* any single ASCII or multibyte char */ 790 791 /* encoded as <ANY_CHAR> */ 792 793 if (!multibyte) { 794 if (*stringp == '\0') { 795 return ((char *)0); 796 } else { 797 regexp++; 798 stringp++; 799 } 800 } else { 801 string_char_size = get_wchar(&string_wchar, stringp); 802 if (string_char_size <= 0) { 803 return ((char *)0); 804 } else { 805 regexp++; 806 stringp += string_char_size; 807 } 808 } 809 break; /* end case ANY_CHAR */ 810 811 case IN_ASCII_CHAR_CLASS: /* [.....] */ 812 case NOT_IN_ASCII_CHAR_CLASS: 813 814 /* 815 * encoded as <IN_ASCII_CHAR_CLASS><class_length><class...> 816 * or <NOT_IN_ASCII_CHAR_CLASS><class_length><class...> 817 * 818 * NOTE: <class_length> includes the <class_length> byte 819 */ 820 821 if ((int)*regexp == (int)IN_ASCII_CHAR_CLASS) { 822 test_condition = IN_CLASS; 823 } else { 824 test_condition = NOT_IN_CLASS; 825 } 826 regexp++; /* point to the <class_length> byte */ 827 828 if ((*stringp != '\0') && 829 (test_char_against_ascii_class(*stringp, regexp, 830 test_condition) == CONDITION_TRUE)) { 831 regexp += (int)*regexp; /* add the class length to regexp */ 832 stringp++; 833 } else { 834 return ((char *)0); 835 } 836 break; /* end case IN_ASCII_CHAR_CLASS */ 837 838 case IN_MULTIBYTE_CHAR_CLASS: /* [....] */ 839 case NOT_IN_MULTIBYTE_CHAR_CLASS: 840 841 /* 842 * encoded as <IN_MULTIBYTE_CHAR_CLASS><class_length><class...> 843 * or <NOT_IN_MULTIBYTE_CHAR_CLASS><class_length><class...> 844 * 845 * NOTE: <class_length> includes the <class_length> byte 846 */ 847 848 if ((int)*regexp == (int)IN_MULTIBYTE_CHAR_CLASS) { 849 test_condition = IN_CLASS; 850 } else { 851 test_condition = NOT_IN_CLASS; 852 } 853 regexp++; /* point to the <class_length> byte */ 854 855 string_char_size = get_wchar(&string_wchar, stringp); 856 if ((string_char_size > 0) && 857 (test_char_against_multibyte_class(string_wchar, regexp, 858 test_condition) == CONDITION_TRUE)) { 859 regexp += (int)*regexp; /* add the class length to regexp */ 860 stringp += string_char_size; 861 } else { 862 return ((char *)0); 863 } 864 break; /* end case IN_MULTIBYTE_CHAR_CLASS */ 865 866 case IN_OLD_ASCII_CHAR_CLASS: /* [...] */ 867 case NOT_IN_OLD_ASCII_CHAR_CLASS: 868 869 /* 870 * encoded as <IN_OLD_ASCII_CHAR_CLASS><class_length><class...> 871 * or <NOT_IN_OLD_ASCII_CHAR_CLASS><class_length><class...> 872 * 873 * NOTE: <class_length> includes the <class_length> byte 874 */ 875 876 if ((int)*regexp == (int)IN_OLD_ASCII_CHAR_CLASS) { 877 test_condition = IN_CLASS; 878 } else { 879 test_condition = NOT_IN_CLASS; 880 } 881 regexp++; /* point to the <class_length> byte */ 882 883 if ((*stringp != '\0') && 884 (test_char_against_old_ascii_class(*stringp, regexp, 885 test_condition) == CONDITION_TRUE)) { 886 regexp += (int)*regexp; /* add the class length to regexp */ 887 stringp++; 888 } else { 889 return ((char *)0); 890 } 891 break; /* end case [NOT_]IN_OLD_ASCII_CHAR_CLASS */ 892 893 case SIMPLE_GROUP: /* (.....) */ 894 895 /* encoded as <SIMPLE_GROUP><group_length> */ 896 897 regexp += 2; 898 break; /* end case SIMPLE_GROUP */ 899 900 case END_GROUP: /* (.....) */ 901 902 /* encoded as <END_GROUP><groupn> */ 903 904 regexp += 2; 905 break; /* end case END_GROUP */ 906 907 case SAVED_GROUP: /* (.....)$0-9 */ 908 909 /* encoded as <SAVED_GROUP><substringn> */ 910 911 regexp++; 912 substringn = (unsigned int)*regexp; 913 if (substringn >= NSUBSTRINGS) 914 return ((char *)0); 915 substring_startp[substringn] = stringp; 916 regexp++; 917 break; /* end case SAVED_GROUP */ 918 919 case END_SAVED_GROUP: /* (.....)$0-9 */ 920 921 /* 922 * encoded as <END_SAVED_GROUP><substringn>\ 923 * <return_arg_number[substringn]> 924 */ 925 926 regexp++; 927 substringn = (unsigned int)*regexp; 928 if (substringn >= NSUBSTRINGS) 929 return ((char *)0); 930 substring_endp[substringn] = stringp; 931 regexp++; 932 return_argn = (unsigned int)*regexp; 933 if (return_argn >= NSUBSTRINGS) 934 return ((char *)0); 935 return_arg_number[substringn] = return_argn; 936 regexp++; 937 break; /* end case END_SAVED_GROUP */ 938 939 case ASCII_CHAR|ZERO_OR_MORE: /* char* */ 940 941 /* encoded as <ASCII_CHAR|ZERO_OR_MORE><ascii_char> */ 942 943 regexp++; 944 repeat_startp = stringp; 945 while (*stringp == *regexp) { 946 stringp++; 947 } 948 regexp++; 949 return (test_repeated_ascii_char(repeat_startp, 950 stringp, regexp)); 951 952 /* end case ASCII_CHAR|ZERO_OR_MORE */ 953 954 case ASCII_CHAR|ONE_OR_MORE: /* char+ */ 955 956 /* encoded as <ASCII_CHAR|ONE_OR_MORE><ascii_char> */ 957 958 regexp++; 959 if (*stringp != *regexp) { 960 return ((char *)0); 961 } else { 962 stringp++; 963 repeat_startp = stringp; 964 while (*stringp == *regexp) { 965 stringp++; 966 } 967 regexp++; 968 return (test_repeated_ascii_char(repeat_startp, stringp, 969 regexp)); 970 } 971 /* end case ASCII_CHAR|ONE_OR_MORE */ 972 973 case ASCII_CHAR|COUNT: /* char{min_count,max_count} */ 974 975 /* 976 * encoded as <ASCII_CHAR|COUNT><ascii_char>\ 977 * <minimum_match_count><maximum_match_count> 978 */ 979 980 regexp++; 981 get_match_counts(&nmust_match, &nextra_matches_allowed, 982 regexp + 1); 983 while ((*stringp == *regexp) && (nmust_match > 0)) { 984 nmust_match--; 985 stringp++; 986 } 987 if (nmust_match > 0) { 988 return ((char *)0); 989 } else if (nextra_matches_allowed == UNLIMITED) { 990 repeat_startp = stringp; 991 while (*stringp == *regexp) { 992 stringp++; 993 } 994 regexp += 3; 995 return (test_repeated_ascii_char(repeat_startp, stringp, 996 regexp)); 997 } else { 998 repeat_startp = stringp; 999 while ((*stringp == *regexp) && 1000 (nextra_matches_allowed > 0)) { 1001 nextra_matches_allowed--; 1002 stringp++; 1003 } 1004 regexp += 3; 1005 return (test_repeated_ascii_char(repeat_startp, stringp, 1006 regexp)); 1007 } 1008 /* end case ASCII_CHAR|COUNT */ 1009 1010 case MULTIBYTE_CHAR|ZERO_OR_MORE: /* char* */ 1011 1012 /* encoded as <MULTIBYTE_CHAR|ZERO_OR_MORE><multibyte_char> */ 1013 1014 regexp++; 1015 regex_char_size = get_wchar(®ex_wchar, regexp); 1016 repeat_startp = stringp; 1017 string_char_size = get_wchar(&string_wchar, stringp); 1018 while ((string_char_size > 0) && 1019 (string_wchar == regex_wchar)) { 1020 stringp += string_char_size; 1021 string_char_size = get_wchar(&string_wchar, stringp); 1022 } 1023 regexp += regex_char_size; 1024 return (test_repeated_multibyte_char(repeat_startp, stringp, 1025 regexp)); 1026 1027 /* end case MULTIBYTE_CHAR|ZERO_OR_MORE */ 1028 1029 case MULTIBYTE_CHAR|ONE_OR_MORE: /* char+ */ 1030 1031 /* encoded as <MULTIBYTE_CHAR|ONE_OR_MORE><multibyte_char> */ 1032 1033 regexp++; 1034 regex_char_size = get_wchar(®ex_wchar, regexp); 1035 string_char_size = get_wchar(&string_wchar, stringp); 1036 if ((string_char_size <= 0) || (string_wchar != regex_wchar)) { 1037 return ((char *)0); 1038 } else { 1039 stringp += string_char_size; 1040 repeat_startp = stringp; 1041 string_char_size = get_wchar(&string_wchar, stringp); 1042 while ((string_char_size > 0) && 1043 (string_wchar == regex_wchar)) { 1044 stringp += string_char_size; 1045 string_char_size = get_wchar(&string_wchar, stringp); 1046 } 1047 regexp += regex_char_size; 1048 return (test_repeated_multibyte_char(repeat_startp, stringp, 1049 regexp)); 1050 } 1051 /* end case MULTIBYTE_CHAR|ONE_OR_MORE */ 1052 1053 case MULTIBYTE_CHAR|COUNT: /* char{min_count,max_count} */ 1054 1055 /* 1056 * encoded as <MULTIBYTE_CHAR|COUNT><multibyte_char>\ 1057 * <minimum_match_count><maximum_match_count> 1058 */ 1059 1060 regexp++; 1061 regex_char_size = get_wchar(®ex_wchar, regexp); 1062 get_match_counts(&nmust_match, &nextra_matches_allowed, 1063 regexp + regex_char_size); 1064 string_char_size = get_wchar(&string_wchar, stringp); 1065 while ((string_char_size > 0) && 1066 (string_wchar == regex_wchar) && 1067 (nmust_match > 0)) { 1068 1069 nmust_match--; 1070 stringp += string_char_size; 1071 string_char_size = get_wchar(&string_wchar, stringp); 1072 } 1073 if (nmust_match > 0) { 1074 return ((char *)0); 1075 } else if (nextra_matches_allowed == UNLIMITED) { 1076 repeat_startp = stringp; 1077 while ((string_char_size > 0) && 1078 (string_wchar == regex_wchar)) { 1079 stringp += string_char_size; 1080 string_char_size = get_wchar(&string_wchar, stringp); 1081 } 1082 regexp += regex_char_size + 2; 1083 return (test_repeated_multibyte_char(repeat_startp, stringp, 1084 regexp)); 1085 } else { 1086 repeat_startp = stringp; 1087 while ((string_char_size > 0) && 1088 (string_wchar == regex_wchar) && 1089 (nextra_matches_allowed > 0)) { 1090 nextra_matches_allowed--; 1091 stringp += string_char_size; 1092 string_char_size = get_wchar(&string_wchar, stringp); 1093 } 1094 regexp += regex_char_size + 2; 1095 return (test_repeated_multibyte_char(repeat_startp, stringp, 1096 regexp)); 1097 } 1098 /* end case MULTIBYTE_CHAR|COUNT */ 1099 1100 case ANY_CHAR|ZERO_OR_MORE: /* .* */ 1101 1102 /* encoded as <ANY_CHAR|ZERO_OR_MORE> */ 1103 1104 repeat_startp = stringp; 1105 if (!multibyte) { 1106 while (*stringp != '\0') { 1107 stringp++; 1108 } 1109 regexp++; 1110 return (test_repeated_ascii_char(repeat_startp, stringp, 1111 regexp)); 1112 } else { 1113 string_char_size = get_wchar(&string_wchar, stringp); 1114 while (string_char_size > 0) { 1115 stringp += string_char_size; 1116 string_char_size = get_wchar(&string_wchar, stringp); 1117 } 1118 regexp++; 1119 return (test_repeated_multibyte_char(repeat_startp, stringp, 1120 regexp)); 1121 } 1122 /* end case <ANY_CHAR|ZERO_OR_MORE> */ 1123 1124 case ANY_CHAR|ONE_OR_MORE: /* .+ */ 1125 1126 /* encoded as <ANY_CHAR|ONE_OR_MORE> */ 1127 1128 if (!multibyte) { 1129 if (*stringp == '\0') { 1130 return ((char *)0); 1131 } else { 1132 stringp++; 1133 repeat_startp = stringp; 1134 while (*stringp != '\0') { 1135 stringp++; 1136 } 1137 regexp++; 1138 return (test_repeated_ascii_char(repeat_startp, stringp, 1139 regexp)); 1140 } 1141 } else { 1142 string_char_size = get_wchar(&string_wchar, stringp); 1143 if (string_char_size <= 0) { 1144 return ((char *)0); 1145 } else { 1146 stringp += string_char_size; 1147 repeat_startp = stringp; 1148 string_char_size = get_wchar(&string_wchar, stringp); 1149 while (string_char_size > 0) { 1150 stringp += string_char_size; 1151 string_char_size = 1152 get_wchar(&string_wchar, stringp); 1153 } 1154 regexp++; 1155 return (test_repeated_multibyte_char(repeat_startp, 1156 stringp, regexp)); 1157 } 1158 } 1159 /* end case <ANY_CHAR|ONE_OR_MORE> */ 1160 1161 case ANY_CHAR|COUNT: /* .{min_count,max_count} */ 1162 1163 /* 1164 * encoded as <ANY_CHAR|COUNT>\ 1165 * <minimum_match_count><maximum_match_count> 1166 */ 1167 1168 get_match_counts(&nmust_match, &nextra_matches_allowed, 1169 regexp + 1); 1170 if (!multibyte) { 1171 while ((*stringp != '\0') && (nmust_match > 0)) { 1172 nmust_match--; 1173 stringp++; 1174 } 1175 if (nmust_match > 0) { 1176 return ((char *)0); 1177 } else if (nextra_matches_allowed == UNLIMITED) { 1178 repeat_startp = stringp; 1179 while (*stringp != '\0') { 1180 stringp++; 1181 } 1182 regexp += 3; 1183 return (test_repeated_ascii_char(repeat_startp, stringp, 1184 regexp)); 1185 } else { 1186 repeat_startp = stringp; 1187 while ((*stringp != '\0') && 1188 (nextra_matches_allowed > 0)) { 1189 nextra_matches_allowed--; 1190 stringp++; 1191 } 1192 regexp += 3; 1193 return (test_repeated_ascii_char(repeat_startp, stringp, 1194 regexp)); 1195 } 1196 } else { /* multibyte character */ 1197 1198 string_char_size = get_wchar(&string_wchar, stringp); 1199 while ((string_char_size > 0) && (nmust_match > 0)) { 1200 nmust_match--; 1201 stringp += string_char_size; 1202 string_char_size = get_wchar(&string_wchar, stringp); 1203 } 1204 if (nmust_match > 0) { 1205 return ((char *)0); 1206 } else if (nextra_matches_allowed == UNLIMITED) { 1207 repeat_startp = stringp; 1208 while (string_char_size > 0) { 1209 stringp += string_char_size; 1210 string_char_size = 1211 get_wchar(&string_wchar, stringp); 1212 } 1213 regexp += 3; 1214 return (test_repeated_multibyte_char(repeat_startp, 1215 stringp, regexp)); 1216 } else { 1217 repeat_startp = stringp; 1218 while ((string_char_size > 0) && 1219 (nextra_matches_allowed > 0)) { 1220 nextra_matches_allowed--; 1221 stringp += string_char_size; 1222 string_char_size = 1223 get_wchar(&string_wchar, stringp); 1224 } 1225 regexp += 3; 1226 return (test_repeated_multibyte_char(repeat_startp, 1227 stringp, regexp)); 1228 } 1229 } /* end case ANY_CHAR|COUNT */ 1230 1231 case IN_ASCII_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */ 1232 case NOT_IN_ASCII_CHAR_CLASS|ZERO_OR_MORE: 1233 1234 /* 1235 * encoded as <IN_ASCII_CHAR_CLASS|ZERO_OR_MORE>\ 1236 * <class_length><class ...> 1237 * or <NOT_IN_ASCII_CHAR_CLASS|ZERO_OR_MORE>\ 1238 * <class_length><class ...> 1239 * 1240 * NOTE: <class_length> includes the <class_length> byte 1241 */ 1242 1243 if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|ZERO_OR_MORE)) { 1244 test_condition = IN_CLASS; 1245 } else { 1246 test_condition = NOT_IN_CLASS; 1247 } 1248 regexp++; /* point to the <class_length> byte */ 1249 1250 repeat_startp = stringp; 1251 while ((*stringp != '\0') && 1252 (test_char_against_ascii_class(*stringp, regexp, 1253 test_condition) == CONDITION_TRUE)) { 1254 stringp++; 1255 } 1256 regexp += (int)*regexp; /* add the class length to regexp */ 1257 return (test_repeated_ascii_char(repeat_startp, stringp, 1258 regexp)); 1259 1260 /* end case IN_ASCII_CHAR_CLASS|ZERO_OR_MORE */ 1261 1262 case IN_ASCII_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */ 1263 case NOT_IN_ASCII_CHAR_CLASS|ONE_OR_MORE: 1264 1265 /* 1266 * encoded as <IN_ASCII_CHAR_CLASS|ONE_OR_MORE>\ 1267 * <class_length><class ...> 1268 * or <NOT_IN_ASCII_CHAR_CLASS|ONE_OR_MORE>\ 1269 * <class_length><class ...> 1270 * 1271 * NOTE: <class_length> includes the <class_length> byte 1272 */ 1273 1274 if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|ONE_OR_MORE)) { 1275 test_condition = IN_CLASS; 1276 } else { 1277 test_condition = NOT_IN_CLASS; 1278 } 1279 regexp++; /* point to the <class_length> byte */ 1280 1281 if ((*stringp == '\0') || 1282 (test_char_against_ascii_class(*stringp, regexp, 1283 test_condition) != CONDITION_TRUE)) { 1284 return ((char *)0); 1285 } else { 1286 stringp++; 1287 repeat_startp = stringp; 1288 while ((*stringp != '\0') && 1289 (test_char_against_ascii_class(*stringp, regexp, 1290 test_condition) == CONDITION_TRUE)) { 1291 stringp++; 1292 } 1293 regexp += (int)*regexp; /* add the class length to regexp */ 1294 return (test_repeated_ascii_char(repeat_startp, stringp, 1295 regexp)); 1296 } 1297 /* end case IN_ASCII_CHAR_CLASS|ONE_OR_MORE */ 1298 1299 case IN_ASCII_CHAR_CLASS | COUNT: /* [.....]{max_count,min_count} */ 1300 case NOT_IN_ASCII_CHAR_CLASS | COUNT: 1301 1302 /* 1303 * endoded as <IN_ASCII_CHAR_CLASS|COUNT><class_length>\ 1304 * <class ...><minimum_match_count>\ 1305 * <maximum_match_count> 1306 * or <NOT_IN_ASCII_CHAR_CLASS|COUNT><class_length>\ 1307 * <class ...><minimum_match_count>\ 1308 * <maximum_match_count> 1309 * 1310 * NOTE: <class_length> includes the <class_length> byte, 1311 * but not the <minimum_match_count> or 1312 * <maximum_match_count> bytes 1313 */ 1314 1315 if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|COUNT)) { 1316 test_condition = IN_CLASS; 1317 } else { 1318 test_condition = NOT_IN_CLASS; 1319 } 1320 regexp++; /* point to the <class_length> byte */ 1321 1322 get_match_counts(&nmust_match, &nextra_matches_allowed, 1323 regexp + (int)*regexp); 1324 while ((*stringp != '\0') && 1325 (test_char_against_ascii_class(*stringp, regexp, 1326 test_condition) == CONDITION_TRUE) && 1327 (nmust_match > 0)) { 1328 nmust_match--; 1329 stringp++; 1330 } 1331 if (nmust_match > 0) { 1332 return ((char *)0); 1333 } else if (nextra_matches_allowed == UNLIMITED) { 1334 repeat_startp = stringp; 1335 while ((*stringp != '\0') && 1336 (test_char_against_ascii_class(*stringp, regexp, 1337 test_condition) == CONDITION_TRUE)) { 1338 stringp++; 1339 } 1340 regexp += (int)*regexp + 2; 1341 return (test_repeated_ascii_char(repeat_startp, stringp, 1342 regexp)); 1343 } else { 1344 repeat_startp = stringp; 1345 while ((*stringp != '\0') && 1346 (test_char_against_ascii_class(*stringp, regexp, 1347 test_condition) == CONDITION_TRUE) && 1348 (nextra_matches_allowed > 0)) { 1349 nextra_matches_allowed--; 1350 stringp++; 1351 } 1352 regexp += (int)*regexp + 2; 1353 return (test_repeated_ascii_char(repeat_startp, stringp, 1354 regexp)); 1355 } 1356 /* end case IN_ASCII_CHAR_CLASS|COUNT */ 1357 1358 case IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */ 1359 case NOT_IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE: 1360 1361 /* 1362 * encoded as <IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE>\ 1363 * <class_length><class ...> 1364 * or <NOT_IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE>\ 1365 * <class_length><class ...> 1366 * 1367 * NOTE: <class_length> includes the <class_length> byte 1368 */ 1369 1370 if ((int)*regexp == 1371 (int)(IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE)) { 1372 test_condition = IN_CLASS; 1373 } else { 1374 test_condition = NOT_IN_CLASS; 1375 } 1376 regexp++; /* point to the <class_length> byte */ 1377 1378 repeat_startp = stringp; 1379 string_char_size = get_wchar(&string_wchar, stringp); 1380 while ((string_char_size > 0) && 1381 (test_char_against_multibyte_class(string_wchar, regexp, 1382 test_condition) == CONDITION_TRUE)) { 1383 stringp += string_char_size; 1384 string_char_size = get_wchar(&string_wchar, stringp); 1385 } 1386 regexp += (int)*regexp; /* add the class length to regexp */ 1387 return (test_repeated_multibyte_char(repeat_startp, stringp, 1388 regexp)); 1389 1390 /* end case IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE */ 1391 1392 case IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */ 1393 case NOT_IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE: 1394 1395 /* 1396 * encoded as <IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE>\ 1397 * <class_length><class ...> 1398 * or <NOT_IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE>\ 1399 * <class_length><class ...> 1400 * 1401 * NOTE: <class_length> includes the <class_length> byte 1402 */ 1403 1404 if ((int)*regexp == 1405 (int)(IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE)) { 1406 test_condition = IN_CLASS; 1407 } else { 1408 test_condition = NOT_IN_CLASS; 1409 } 1410 regexp++; /* point to the <class_length> byte */ 1411 1412 string_char_size = get_wchar(&string_wchar, stringp); 1413 if ((string_char_size <= 0) || 1414 (test_char_against_multibyte_class(string_wchar, regexp, 1415 test_condition) != CONDITION_TRUE)) { 1416 return ((char *)0); 1417 } else { 1418 stringp += string_char_size; 1419 repeat_startp = stringp; 1420 string_char_size = get_wchar(&string_wchar, stringp); 1421 while ((string_char_size > 0) && 1422 (test_char_against_multibyte_class(string_wchar, 1423 regexp, test_condition) == CONDITION_TRUE)) { 1424 stringp += string_char_size; 1425 string_char_size = get_wchar(&string_wchar, stringp); 1426 } 1427 regexp += (int)*regexp; /* add the class length to regexp */ 1428 return (test_repeated_multibyte_char(repeat_startp, stringp, 1429 regexp)); 1430 } 1431 /* end case IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE */ 1432 1433 case IN_MULTIBYTE_CHAR_CLASS|COUNT: /* [...]{min_count,max_count} */ 1434 case NOT_IN_MULTIBYTE_CHAR_CLASS|COUNT: 1435 1436 /* 1437 * encoded as <IN_MULTIBYTE_CHAR_CLASS|COUNT>\ 1438 * <class_length><class ...><min_count><max_count> 1439 * or <NOT_IN_MULTIBYTE_CHAR_CLASS|COUNT>\ 1440 * <class_length><class ...><min_count><max_count> 1441 * 1442 * NOTE: <class_length> includes the <class_length> byte 1443 * but not the <minimum_match_count> or 1444 * <maximum_match_count> bytes 1445 */ 1446 1447 if ((int)*regexp == (int)(IN_MULTIBYTE_CHAR_CLASS|COUNT)) { 1448 test_condition = IN_CLASS; 1449 } else { 1450 test_condition = NOT_IN_CLASS; 1451 } 1452 regexp++; /* point to the <class_length> byte */ 1453 1454 get_match_counts(&nmust_match, &nextra_matches_allowed, 1455 regexp + (int)*regexp); 1456 string_char_size = get_wchar(&string_wchar, stringp); 1457 while ((string_char_size > 0) && 1458 (test_char_against_multibyte_class(string_wchar, regexp, 1459 test_condition) == CONDITION_TRUE) && 1460 (nmust_match > 0)) { 1461 nmust_match--; 1462 stringp += string_char_size; 1463 string_char_size = get_wchar(&string_wchar, stringp); 1464 } 1465 if (nmust_match > 0) { 1466 return ((char *)0); 1467 } else if (nextra_matches_allowed == UNLIMITED) { 1468 repeat_startp = stringp; 1469 while ((string_char_size > 0) && 1470 (test_char_against_multibyte_class(string_wchar, 1471 regexp, test_condition) == CONDITION_TRUE)) { 1472 stringp += string_char_size; 1473 string_char_size = get_wchar(&string_wchar, stringp); 1474 } 1475 regexp += (int)*regexp + 2; 1476 return (test_repeated_multibyte_char(repeat_startp, stringp, 1477 regexp)); 1478 } else { 1479 repeat_startp = stringp; 1480 while ((string_char_size > 0) && 1481 (test_char_against_multibyte_class(string_wchar, 1482 regexp, test_condition) == CONDITION_TRUE) && 1483 (nextra_matches_allowed > 0)) { 1484 nextra_matches_allowed--; 1485 stringp += string_char_size; 1486 string_char_size = get_wchar(&string_wchar, stringp); 1487 } 1488 regexp += (int)*regexp + 2; 1489 return (test_repeated_multibyte_char(repeat_startp, stringp, 1490 regexp)); 1491 } 1492 /* end case IN_MULTIBYTE_CHAR_CLASS|COUNT */ 1493 1494 case IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */ 1495 case NOT_IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE: 1496 1497 /* 1498 * encoded as <IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE>\ 1499 * <class_length><class ...> 1500 * or <NOT_IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE>\ 1501 * <class_length><class ...> 1502 * 1503 * NOTE: <class_length> includes the <class_length> byte 1504 */ 1505 1506 if ((int)*regexp == 1507 (int)(IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE)) { 1508 test_condition = IN_CLASS; 1509 } else { 1510 test_condition = NOT_IN_CLASS; 1511 } 1512 regexp++; /* point to the <class_length> byte */ 1513 1514 repeat_startp = stringp; 1515 while ((*stringp != '\0') && 1516 (test_char_against_old_ascii_class(*stringp, regexp, 1517 test_condition) == CONDITION_TRUE)) { 1518 stringp++; 1519 } 1520 regexp += (int)*regexp; /* add the class length to regexp */ 1521 return (test_repeated_ascii_char(repeat_startp, stringp, 1522 regexp)); 1523 1524 /* end case IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE */ 1525 1526 case IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */ 1527 case NOT_IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE: 1528 1529 /* 1530 * encoded as <IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE>\ 1531 * <class_length><class ...> 1532 * or <NOT_IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE>\ 1533 * <class_length><class ...> 1534 * 1535 * NOTE: <class length> includes the <class_length> byte 1536 */ 1537 1538 if ((int)*regexp == 1539 (int)(IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE)) { 1540 test_condition = IN_CLASS; 1541 } else { 1542 test_condition = NOT_IN_CLASS; 1543 } 1544 regexp++; /* point to the <class_length> byte */ 1545 1546 if ((*stringp == '\0') || 1547 (test_char_against_old_ascii_class(*stringp, regexp, 1548 test_condition) != CONDITION_TRUE)) { 1549 return ((char *)0); 1550 } else { 1551 stringp++; 1552 repeat_startp = stringp; 1553 while ((*stringp != '\0') && 1554 (test_char_against_old_ascii_class(*stringp, regexp, 1555 test_condition) == CONDITION_TRUE)) { 1556 stringp++; 1557 } 1558 regexp += (int)*regexp; /* add the class length to regexp */ 1559 return (test_repeated_ascii_char(repeat_startp, stringp, 1560 regexp)); 1561 } 1562 /* end case IN_OLD_ASCII_CHAR_CLASS | ONE_OR_MORE */ 1563 1564 case IN_OLD_ASCII_CHAR_CLASS|COUNT: /* [...]{min_count,max_count} */ 1565 case NOT_IN_OLD_ASCII_CHAR_CLASS|COUNT: 1566 1567 /* 1568 * encoded as <IN_OLD_ASCII_CHAR_CLASS|COUNT><class_length>\ 1569 * <class ...><minimum_match_count>\ 1570 * <maximum_match_count> 1571 * or <NOT_IN_OLD_ASCII_CHAR_CLASS|COUNT>\ 1572 * <class_length><class ...><minimum_match_count>\ 1573 * <maximum_match_count> 1574 * 1575 * NOTE: <class_length> includes the <class_length> byte 1576 * but not the <minimum_match_count> or 1577 * <maximum_match_count> bytes 1578 */ 1579 1580 if ((int)*regexp == (int)(IN_OLD_ASCII_CHAR_CLASS|COUNT)) { 1581 test_condition = IN_CLASS; 1582 } else { 1583 test_condition = NOT_IN_CLASS; 1584 } 1585 regexp++; /* point to the <class_length> byte */ 1586 1587 get_match_counts(&nmust_match, &nextra_matches_allowed, 1588 regexp + (int)*regexp); 1589 while ((*stringp != '\0') && 1590 (test_char_against_old_ascii_class(*stringp, regexp, 1591 test_condition) == CONDITION_TRUE) && 1592 (nmust_match > 0)) { 1593 nmust_match--; 1594 stringp++; 1595 } 1596 if (nmust_match > 0) { 1597 return ((char *)0); 1598 } else if (nextra_matches_allowed == UNLIMITED) { 1599 repeat_startp = stringp; 1600 while ((*stringp != '\0') && 1601 (test_char_against_old_ascii_class(*stringp, regexp, 1602 test_condition) == CONDITION_TRUE)) { 1603 stringp++; 1604 } 1605 regexp += (int)*regexp + 2; 1606 return (test_repeated_ascii_char(repeat_startp, stringp, 1607 regexp)); 1608 } else { 1609 repeat_startp = stringp; 1610 while ((*stringp != '\0') && 1611 (test_char_against_old_ascii_class(*stringp, regexp, 1612 test_condition) == CONDITION_TRUE) && 1613 (nextra_matches_allowed > 0)) { 1614 nextra_matches_allowed--; 1615 stringp++; 1616 } 1617 regexp += (int)*regexp + 2; 1618 return (test_repeated_ascii_char(repeat_startp, stringp, 1619 regexp)); 1620 } 1621 /* end case IN_OLD_ASCII_CHAR_CLASS|COUNT */ 1622 1623 case ZERO_OR_MORE_GROUP: /* (.....)* */ 1624 case ZERO_OR_MORE_GROUP|ADD_256_TO_GROUP_LENGTH: 1625 case ZERO_OR_MORE_GROUP|ADD_512_TO_GROUP_LENGTH: 1626 case ZERO_OR_MORE_GROUP|ADD_768_TO_GROUP_LENGTH: 1627 1628 /* 1629 * encoded as <ZERO_OR_MORE_GROUP|ADDED_LENGTH_BITS>\ 1630 * <group_length><compiled_regex...>\ 1631 * <END_GROUP|ZERO_OR_MORE><groupn> 1632 * 1633 * NOTE: 1634 * 1635 * group_length + (256 * ADDED_LENGTH_BITS) == 1636 * length_of(<compiled_regex...><END_GROUP|ZERO_OR_MORE>\ 1637 * <groupn>) 1638 * 1639 */ 1640 1641 group_length = 1642 (((unsigned int)*regexp & ADDED_LENGTH_BITS) << 1643 TIMES_256_SHIFT); 1644 regexp++; 1645 group_length += (unsigned int)*regexp; 1646 regexp++; 1647 repeat_startp = stringp; 1648 test_stringp = test_string(stringp, regexp); 1649 while (test_stringp != (char *)0) { 1650 if (push_stringp(stringp) == (char *)0) 1651 return ((char *)0); 1652 stringp = test_stringp; 1653 test_stringp = test_string(stringp, regexp); 1654 } 1655 regexp += group_length; 1656 return (test_repeated_group(repeat_startp, stringp, regexp)); 1657 1658 /* end case ZERO_OR_MORE_GROUP */ 1659 1660 case END_GROUP|ZERO_OR_MORE: /* (.....)* */ 1661 1662 /* encoded as <END_GROUP|ZERO_OR_MORE> */ 1663 1664 /* return from recursive call to test_string() */ 1665 1666 return ((char *)stringp); 1667 1668 /* end case END_GROUP|ZERO_OR_MORE */ 1669 1670 case ONE_OR_MORE_GROUP: /* (.....)+ */ 1671 case ONE_OR_MORE_GROUP|ADD_256_TO_GROUP_LENGTH: 1672 case ONE_OR_MORE_GROUP|ADD_512_TO_GROUP_LENGTH: 1673 case ONE_OR_MORE_GROUP|ADD_768_TO_GROUP_LENGTH: 1674 1675 /* 1676 * encoded as <ONE_OR_MORE_GROUP|ADDED_LENGTH_BITS>\ 1677 * <group_length><compiled_regex...>\ 1678 * <END_GROUP|ONE_OR_MORE><groupn> 1679 * 1680 * NOTE: 1681 * 1682 * group_length + (256 * ADDED_LENGTH_BITS) == 1683 * length_of(<compiled_regex...><END_GROUP|ONE_OR_MORE>\ 1684 * <groupn>) 1685 */ 1686 1687 group_length = 1688 (((unsigned int)*regexp & ADDED_LENGTH_BITS) << 1689 TIMES_256_SHIFT); 1690 regexp++; 1691 group_length += (unsigned int)*regexp; 1692 regexp++; 1693 stringp = test_string(stringp, regexp); 1694 if (stringp == (char *)0) 1695 return ((char *)0); 1696 repeat_startp = stringp; 1697 test_stringp = test_string(stringp, regexp); 1698 while (test_stringp != (char *)0) { 1699 if (push_stringp(stringp) == (char *)0) 1700 return ((char *)0); 1701 stringp = test_stringp; 1702 test_stringp = test_string(stringp, regexp); 1703 } 1704 regexp += group_length; 1705 return (test_repeated_group(repeat_startp, stringp, regexp)); 1706 1707 /* end case ONE_OR_MORE_GROUP */ 1708 1709 case END_GROUP|ONE_OR_MORE: /* (.....)+ */ 1710 1711 /* encoded as <END_GROUP|ONE_OR_MORE><groupn> */ 1712 1713 /* return from recursive call to test_string() */ 1714 1715 return ((char *)stringp); 1716 1717 /* end case END_GROUP|ONE_OR_MORE */ 1718 1719 case COUNTED_GROUP: /* (.....){max_count,min_count} */ 1720 case COUNTED_GROUP|ADD_256_TO_GROUP_LENGTH: 1721 case COUNTED_GROUP|ADD_512_TO_GROUP_LENGTH: 1722 case COUNTED_GROUP|ADD_768_TO_GROUP_LENGTH: 1723 1724 /* 1725 * encoded as <COUNTED_GROUP|ADDED_LENGTH_BITS><group_length>\ 1726 * <compiled_regex...>\<END_GROUP|COUNT><groupn>\ 1727 * <minimum_match_count><maximum_match_count> 1728 * 1729 * NOTE: 1730 * 1731 * group_length + (256 * ADDED_LENGTH_BITS) == 1732 * length_of(<compiled_regex...><END_GROUP|COUNT><groupn>) 1733 * 1734 * but does not include the <minimum_match_count> or 1735 * <maximum_match_count> bytes 1736 */ 1737 1738 group_length = 1739 (((unsigned int)*regexp & ADDED_LENGTH_BITS) << 1740 TIMES_256_SHIFT); 1741 regexp++; 1742 group_length += (unsigned int)*regexp; 1743 regexp++; 1744 get_match_counts(&nmust_match, &nextra_matches_allowed, 1745 regexp + group_length); 1746 test_stringp = test_string(stringp, regexp); 1747 while ((test_stringp != (char *)0) && (nmust_match > 0)) { 1748 stringp = test_stringp; 1749 nmust_match--; 1750 test_stringp = test_string(stringp, regexp); 1751 } 1752 if (nmust_match > 0) { 1753 return ((char *)0); 1754 } else if (nextra_matches_allowed == UNLIMITED) { 1755 repeat_startp = stringp; 1756 while (test_stringp != (char *)0) { 1757 if (push_stringp(stringp) == (char *)0) 1758 return ((char *)0); 1759 stringp = test_stringp; 1760 test_stringp = test_string(stringp, regexp); 1761 } 1762 regexp += group_length + 2; 1763 return (test_repeated_group(repeat_startp, stringp, 1764 regexp)); 1765 } else { 1766 repeat_startp = stringp; 1767 while ((test_stringp != (char *)0) && 1768 (nextra_matches_allowed > 0)) { 1769 nextra_matches_allowed--; 1770 if (push_stringp(stringp) == (char *)0) 1771 return ((char *)0); 1772 stringp = test_stringp; 1773 test_stringp = test_string(stringp, regexp); 1774 } 1775 regexp += group_length + 2; 1776 return (test_repeated_group(repeat_startp, stringp, 1777 regexp)); 1778 } 1779 /* end case COUNTED_GROUP */ 1780 1781 case END_GROUP|COUNT: /* (.....){max_count,min_count} */ 1782 1783 /* encoded as <END_GROUP|COUNT> */ 1784 1785 /* return from recursive call to test_string() */ 1786 1787 return (stringp); 1788 1789 /* end case END_GROUP|COUNT */ 1790 1791 case END_OF_STRING_MARK: 1792 1793 /* encoded as <END_OF_STRING_MARK><END_REGEX> */ 1794 1795 if (*stringp == '\0') { 1796 regexp++; 1797 } else { 1798 return ((char *)0); 1799 } 1800 break; /* end case END_OF_STRING_MARK */ 1801 1802 case END_REGEX: /* end of the compiled regular expression */ 1803 1804 /* encoded as <END_REGEX> */ 1805 1806 return (stringp); 1807 1808 /* end case END_REGEX */ 1809 1810 default: 1811 1812 return ((char *)0); 1813 1814 } /* end switch (*regexp) */ 1815 1816 } /* end for (;;) */ 1817 1818 } /* test_string() */ 1819