17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*7257d1b4Sraf * Common Development and Distribution License (the "License"). 6*7257d1b4Sraf * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 21e8031f0aSraf 227c478bd9Sstevel@tonic-gate /* 23*7257d1b4Sraf * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 247c478bd9Sstevel@tonic-gate * Use is subject to license terms. 257c478bd9Sstevel@tonic-gate */ 267c478bd9Sstevel@tonic-gate 277c478bd9Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 287c478bd9Sstevel@tonic-gate /* All Rights Reserved */ 297c478bd9Sstevel@tonic-gate 307c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 317c478bd9Sstevel@tonic-gate 327c478bd9Sstevel@tonic-gate /* 337c478bd9Sstevel@tonic-gate * IMPORTANT NOTE: 347c478bd9Sstevel@tonic-gate * 357c478bd9Sstevel@tonic-gate * regex() WORKS **ONLY** WITH THE ASCII AND THE Solaris EUC CHARACTER SETS. 367c478bd9Sstevel@tonic-gate * IT IS **NOT** CHARACTER SET INDEPENDENT. 377c478bd9Sstevel@tonic-gate * 387c478bd9Sstevel@tonic-gate */ 397c478bd9Sstevel@tonic-gate 40*7257d1b4Sraf #pragma weak _regex = regex 417c478bd9Sstevel@tonic-gate 42*7257d1b4Sraf #include "lint.h" 437c478bd9Sstevel@tonic-gate /* CONSTANTS SHARED WITH regcmp() */ 447c478bd9Sstevel@tonic-gate #include "regex.h" 457c478bd9Sstevel@tonic-gate #include "mtlib.h" 467c478bd9Sstevel@tonic-gate #include <limits.h> 477c478bd9Sstevel@tonic-gate #include <stdarg.h> 487c478bd9Sstevel@tonic-gate #include <stdlib.h> 497c478bd9Sstevel@tonic-gate #include <thread.h> 507c478bd9Sstevel@tonic-gate #include <widec.h> 517c478bd9Sstevel@tonic-gate #include "tsd.h" 527c478bd9Sstevel@tonic-gate 537c478bd9Sstevel@tonic-gate 547c478bd9Sstevel@tonic-gate /* PRIVATE CONSTANTS */ 557c478bd9Sstevel@tonic-gate 567c478bd9Sstevel@tonic-gate #define ADD_256_TO_GROUP_LENGTH 0x1 577c478bd9Sstevel@tonic-gate #define ADD_512_TO_GROUP_LENGTH 0x2 587c478bd9Sstevel@tonic-gate #define ADD_768_TO_GROUP_LENGTH 0x3 597c478bd9Sstevel@tonic-gate #define ADDED_LENGTH_BITS 0x3 607c478bd9Sstevel@tonic-gate #define SINGLE_BYTE_MASK 0xff 617c478bd9Sstevel@tonic-gate #define STRINGP_STACK_SIZE 50 627c478bd9Sstevel@tonic-gate 637c478bd9Sstevel@tonic-gate 647c478bd9Sstevel@tonic-gate /* PRIVATE TYPE DEFINITIONS */ 657c478bd9Sstevel@tonic-gate 667c478bd9Sstevel@tonic-gate typedef enum { 677c478bd9Sstevel@tonic-gate NOT_IN_CLASS = 0, 687c478bd9Sstevel@tonic-gate IN_CLASS 697c478bd9Sstevel@tonic-gate } char_test_condition_t; 707c478bd9Sstevel@tonic-gate 717c478bd9Sstevel@tonic-gate typedef enum { 727c478bd9Sstevel@tonic-gate TESTING_CHAR = 0, 737c478bd9Sstevel@tonic-gate CONDITION_TRUE, 747c478bd9Sstevel@tonic-gate CONDITION_FALSE, 757c478bd9Sstevel@tonic-gate CHAR_TEST_ERROR 767c478bd9Sstevel@tonic-gate } char_test_result_t; 777c478bd9Sstevel@tonic-gate 787c478bd9Sstevel@tonic-gate 797c478bd9Sstevel@tonic-gate /* PRIVATE GLOBAL VARIABLES */ 807c478bd9Sstevel@tonic-gate 817c478bd9Sstevel@tonic-gate static mutex_t regex_lock = DEFAULTMUTEX; 827c478bd9Sstevel@tonic-gate static int return_arg_number[NSUBSTRINGS]; 837c478bd9Sstevel@tonic-gate static const char *substring_endp[NSUBSTRINGS]; 847c478bd9Sstevel@tonic-gate static const char *substring_startp[NSUBSTRINGS]; 857c478bd9Sstevel@tonic-gate static const char *stringp_stack[STRINGP_STACK_SIZE]; 867c478bd9Sstevel@tonic-gate static const char **stringp_stackp; 877c478bd9Sstevel@tonic-gate 887c478bd9Sstevel@tonic-gate 897c478bd9Sstevel@tonic-gate /* DECLARATIONS OF PRIVATE FUNCTIONS */ 907c478bd9Sstevel@tonic-gate 917c478bd9Sstevel@tonic-gate static int 927c478bd9Sstevel@tonic-gate get_wchar(wchar_t *wcharp, 937c478bd9Sstevel@tonic-gate const char *stringp); 947c478bd9Sstevel@tonic-gate 957c478bd9Sstevel@tonic-gate static void 967c478bd9Sstevel@tonic-gate get_match_counts(int *nmust_matchp, 977c478bd9Sstevel@tonic-gate int *nextra_matches_allowedp, 987c478bd9Sstevel@tonic-gate const char *count_stringp); 997c478bd9Sstevel@tonic-gate 1007c478bd9Sstevel@tonic-gate static boolean_t 1017c478bd9Sstevel@tonic-gate in_wchar_range(wchar_t test_char, 1027c478bd9Sstevel@tonic-gate wchar_t lower_char, 1037c478bd9Sstevel@tonic-gate wchar_t upper_char); 1047c478bd9Sstevel@tonic-gate 1057c478bd9Sstevel@tonic-gate static const char * 1067c478bd9Sstevel@tonic-gate pop_stringp(void); 1077c478bd9Sstevel@tonic-gate 1087c478bd9Sstevel@tonic-gate static const char * 1097c478bd9Sstevel@tonic-gate previous_charp(const char *current_charp); 1107c478bd9Sstevel@tonic-gate 1117c478bd9Sstevel@tonic-gate static const char * 1127c478bd9Sstevel@tonic-gate push_stringp(const char *stringp); 1137c478bd9Sstevel@tonic-gate 1147c478bd9Sstevel@tonic-gate static char_test_result_t 1157c478bd9Sstevel@tonic-gate test_char_against_ascii_class(char test_char, 1167c478bd9Sstevel@tonic-gate const char *classp, 1177c478bd9Sstevel@tonic-gate char_test_condition_t test_condition); 1187c478bd9Sstevel@tonic-gate 1197c478bd9Sstevel@tonic-gate static char_test_result_t 1207c478bd9Sstevel@tonic-gate test_char_against_multibyte_class(wchar_t test_char, 1217c478bd9Sstevel@tonic-gate const char *classp, 1227c478bd9Sstevel@tonic-gate char_test_condition_t test_condition); 1237c478bd9Sstevel@tonic-gate 1247c478bd9Sstevel@tonic-gate 1257c478bd9Sstevel@tonic-gate /* FOR COMPATIBILITY WITH PREVIOUS ASCII VERSIONS OF regcmp() */ 1267c478bd9Sstevel@tonic-gate 1277c478bd9Sstevel@tonic-gate static char_test_result_t 1287c478bd9Sstevel@tonic-gate test_char_against_old_ascii_class(char test_char, 1297c478bd9Sstevel@tonic-gate const char *classp, 1307c478bd9Sstevel@tonic-gate char_test_condition_t test_condition); 1317c478bd9Sstevel@tonic-gate 1327c478bd9Sstevel@tonic-gate static const char * 1337c478bd9Sstevel@tonic-gate test_repeated_ascii_char(const char *repeat_startp, 1347c478bd9Sstevel@tonic-gate const char *stringp, 1357c478bd9Sstevel@tonic-gate const char *regexp); 1367c478bd9Sstevel@tonic-gate 1377c478bd9Sstevel@tonic-gate static const char * 1387c478bd9Sstevel@tonic-gate test_repeated_multibyte_char(const char *repeat_startp, 1397c478bd9Sstevel@tonic-gate const char *stringp, 1407c478bd9Sstevel@tonic-gate const char *regexp); 1417c478bd9Sstevel@tonic-gate 1427c478bd9Sstevel@tonic-gate static const char * 1437c478bd9Sstevel@tonic-gate test_repeated_group(const char *repeat_startp, 1447c478bd9Sstevel@tonic-gate const char *stringp, 1457c478bd9Sstevel@tonic-gate const char *regexp); 1467c478bd9Sstevel@tonic-gate 1477c478bd9Sstevel@tonic-gate static const char * 1487c478bd9Sstevel@tonic-gate test_string(const char *stringp, 1497c478bd9Sstevel@tonic-gate const char *regexp); 1507c478bd9Sstevel@tonic-gate 1517c478bd9Sstevel@tonic-gate 1527c478bd9Sstevel@tonic-gate /* DEFINITIONS OF PUBLIC VARIABLES */ 1537c478bd9Sstevel@tonic-gate 1547c478bd9Sstevel@tonic-gate char *__loc1; 1557c478bd9Sstevel@tonic-gate 1567c478bd9Sstevel@tonic-gate /* 1577c478bd9Sstevel@tonic-gate * reserve thread-specific storage for __loc1 1587c478bd9Sstevel@tonic-gate */ 1597c478bd9Sstevel@tonic-gate char ** 1607c478bd9Sstevel@tonic-gate ____loc1(void) 1617c478bd9Sstevel@tonic-gate { 162*7257d1b4Sraf if (thr_main()) 1637c478bd9Sstevel@tonic-gate return (&__loc1); 1647c478bd9Sstevel@tonic-gate return ((char **)tsdalloc(_T_REGEX_LOC1, sizeof (char *), NULL)); 1657c478bd9Sstevel@tonic-gate } 1667c478bd9Sstevel@tonic-gate 1677c478bd9Sstevel@tonic-gate #define __loc1 (*(____loc1())) 1687c478bd9Sstevel@tonic-gate 1697c478bd9Sstevel@tonic-gate /* DEFINITION OF regex() */ 1707c478bd9Sstevel@tonic-gate 1717c478bd9Sstevel@tonic-gate extern char * 172*7257d1b4Sraf regex(const char *regexp, const char *stringp, ...) 1737c478bd9Sstevel@tonic-gate { 1747c478bd9Sstevel@tonic-gate va_list arg_listp; 1757c478bd9Sstevel@tonic-gate int char_size; 1767c478bd9Sstevel@tonic-gate const char *end_of_matchp; 1777c478bd9Sstevel@tonic-gate wchar_t regex_wchar; 1787c478bd9Sstevel@tonic-gate char *return_argp[NSUBSTRINGS]; 1797c478bd9Sstevel@tonic-gate char *returned_substringp; 1807c478bd9Sstevel@tonic-gate int substringn; 1817c478bd9Sstevel@tonic-gate const char *substringp; 1827c478bd9Sstevel@tonic-gate wchar_t string_wchar; 1837c478bd9Sstevel@tonic-gate 1847c478bd9Sstevel@tonic-gate if (____loc1() == (char **)0) { 1857c478bd9Sstevel@tonic-gate return ((char *)0); 1867c478bd9Sstevel@tonic-gate } else { 1877c478bd9Sstevel@tonic-gate lmutex_lock(®ex_lock); 1887c478bd9Sstevel@tonic-gate __loc1 = (char *)0; 1897c478bd9Sstevel@tonic-gate } 1907c478bd9Sstevel@tonic-gate 1917c478bd9Sstevel@tonic-gate if ((stringp == (char *)0) || (regexp == (char *)0)) { 1927c478bd9Sstevel@tonic-gate lmutex_unlock(®ex_lock); 1937c478bd9Sstevel@tonic-gate return ((char *)0); 1947c478bd9Sstevel@tonic-gate } 1957c478bd9Sstevel@tonic-gate 1967c478bd9Sstevel@tonic-gate 1977c478bd9Sstevel@tonic-gate /* INITIALIZE SUBSTRINGS THAT MIGHT BE RETURNED IN VARARGS */ 1987c478bd9Sstevel@tonic-gate 1997c478bd9Sstevel@tonic-gate substringn = 0; 2007c478bd9Sstevel@tonic-gate va_start(arg_listp, stringp); 2017c478bd9Sstevel@tonic-gate while (substringn < NSUBSTRINGS) { 2027c478bd9Sstevel@tonic-gate return_argp[substringn] = va_arg(arg_listp, char *); 2037c478bd9Sstevel@tonic-gate substring_startp[substringn] = (char *)0; 2047c478bd9Sstevel@tonic-gate return_arg_number[substringn] = -1; 2057c478bd9Sstevel@tonic-gate substringn++; 2067c478bd9Sstevel@tonic-gate } 2077c478bd9Sstevel@tonic-gate va_end(arg_listp); 2087c478bd9Sstevel@tonic-gate 2097c478bd9Sstevel@tonic-gate 2107c478bd9Sstevel@tonic-gate /* TEST THE STRING AGAINST THE REGULAR EXPRESSION */ 2117c478bd9Sstevel@tonic-gate 2127c478bd9Sstevel@tonic-gate end_of_matchp = (char *)0; 2137c478bd9Sstevel@tonic-gate stringp_stackp = &stringp_stack[STRINGP_STACK_SIZE]; 2147c478bd9Sstevel@tonic-gate 2157c478bd9Sstevel@tonic-gate if ((int)*regexp == (int)START_OF_STRING_MARK) { 2167c478bd9Sstevel@tonic-gate 2177c478bd9Sstevel@tonic-gate /* 2187c478bd9Sstevel@tonic-gate * the match must start at the beginning of the string 2197c478bd9Sstevel@tonic-gate */ 2207c478bd9Sstevel@tonic-gate 2217c478bd9Sstevel@tonic-gate __loc1 = (char *)stringp; 2227c478bd9Sstevel@tonic-gate regexp++; 2237c478bd9Sstevel@tonic-gate end_of_matchp = test_string(stringp, regexp); 2247c478bd9Sstevel@tonic-gate 2257c478bd9Sstevel@tonic-gate } else if ((int)*regexp == (int)ASCII_CHAR) { 2267c478bd9Sstevel@tonic-gate 2277c478bd9Sstevel@tonic-gate /* 2287c478bd9Sstevel@tonic-gate * test a string against a regular expression 2297c478bd9Sstevel@tonic-gate * that starts with a single ASCII character: 2307c478bd9Sstevel@tonic-gate * 2317c478bd9Sstevel@tonic-gate * move to each character in the string that matches 2327c478bd9Sstevel@tonic-gate * the first character in the regular expression 2337c478bd9Sstevel@tonic-gate * and test the remaining string 2347c478bd9Sstevel@tonic-gate */ 2357c478bd9Sstevel@tonic-gate 2367c478bd9Sstevel@tonic-gate while ((*stringp != *(regexp + 1)) && (*stringp != '\0')) { 2377c478bd9Sstevel@tonic-gate stringp++; 2387c478bd9Sstevel@tonic-gate } 2397c478bd9Sstevel@tonic-gate while ((end_of_matchp == (char *)0) && (*stringp != '\0')) { 2407c478bd9Sstevel@tonic-gate end_of_matchp = test_string(stringp, regexp); 2417c478bd9Sstevel@tonic-gate if (end_of_matchp != (char *)0) { 2427c478bd9Sstevel@tonic-gate __loc1 = (char *)stringp; 2437c478bd9Sstevel@tonic-gate } else { 2447c478bd9Sstevel@tonic-gate stringp++; 2457c478bd9Sstevel@tonic-gate while ((*stringp != *(regexp + 1)) && (*stringp != '\0')) { 2467c478bd9Sstevel@tonic-gate stringp++; 2477c478bd9Sstevel@tonic-gate } 2487c478bd9Sstevel@tonic-gate } 2497c478bd9Sstevel@tonic-gate } 2507c478bd9Sstevel@tonic-gate 2517c478bd9Sstevel@tonic-gate } else if (!multibyte) { 2527c478bd9Sstevel@tonic-gate 2537c478bd9Sstevel@tonic-gate /* 2547c478bd9Sstevel@tonic-gate * if the value of the "multibyte" macro defined in <euc.h> 2557c478bd9Sstevel@tonic-gate * is false, regex() is running in an ASCII locale; 2567c478bd9Sstevel@tonic-gate * test an ASCII string against an ASCII regular expression 2577c478bd9Sstevel@tonic-gate * that doesn't start with a single ASCII character: 2587c478bd9Sstevel@tonic-gate * 2597c478bd9Sstevel@tonic-gate * move forward in the string one byte at a time, testing 2607c478bd9Sstevel@tonic-gate * the remaining string against the regular expression 2617c478bd9Sstevel@tonic-gate */ 2627c478bd9Sstevel@tonic-gate 2637c478bd9Sstevel@tonic-gate end_of_matchp = test_string(stringp, regexp); 2647c478bd9Sstevel@tonic-gate while ((end_of_matchp == (char *)0) && (*stringp != '\0')) { 2657c478bd9Sstevel@tonic-gate stringp++; 2667c478bd9Sstevel@tonic-gate end_of_matchp = test_string(stringp, regexp); 2677c478bd9Sstevel@tonic-gate } 2687c478bd9Sstevel@tonic-gate if (end_of_matchp != (char *)0) { 2697c478bd9Sstevel@tonic-gate __loc1 = (char *)stringp; 2707c478bd9Sstevel@tonic-gate } 2717c478bd9Sstevel@tonic-gate 2727c478bd9Sstevel@tonic-gate } else if ((int)*regexp == (int)MULTIBYTE_CHAR) { 2737c478bd9Sstevel@tonic-gate 2747c478bd9Sstevel@tonic-gate /* 2757c478bd9Sstevel@tonic-gate * test a multibyte string against a multibyte regular expression 2767c478bd9Sstevel@tonic-gate * that starts with a single multibyte character: 2777c478bd9Sstevel@tonic-gate * 2787c478bd9Sstevel@tonic-gate * move to each character in the string that matches 2797c478bd9Sstevel@tonic-gate * the first character in the regular expression 2807c478bd9Sstevel@tonic-gate * and test the remaining string 2817c478bd9Sstevel@tonic-gate */ 2827c478bd9Sstevel@tonic-gate 2837c478bd9Sstevel@tonic-gate (void) get_wchar(®ex_wchar, regexp + 1); 2847c478bd9Sstevel@tonic-gate char_size = get_wchar(&string_wchar, stringp); 2857c478bd9Sstevel@tonic-gate while ((string_wchar != regex_wchar) && (char_size > 0)) { 2867c478bd9Sstevel@tonic-gate stringp += char_size; 2877c478bd9Sstevel@tonic-gate char_size = get_wchar(&string_wchar, stringp); 2887c478bd9Sstevel@tonic-gate } 2897c478bd9Sstevel@tonic-gate while ((end_of_matchp == (char *)0) && (char_size > 0)) { 2907c478bd9Sstevel@tonic-gate end_of_matchp = test_string(stringp, regexp); 2917c478bd9Sstevel@tonic-gate if (end_of_matchp != (char *)0) { 2927c478bd9Sstevel@tonic-gate __loc1 = (char *)stringp; 2937c478bd9Sstevel@tonic-gate } else { 2947c478bd9Sstevel@tonic-gate stringp += char_size; 2957c478bd9Sstevel@tonic-gate char_size = get_wchar(&string_wchar, stringp); 2967c478bd9Sstevel@tonic-gate while ((string_wchar != regex_wchar) && (char_size > 0)) { 2977c478bd9Sstevel@tonic-gate stringp += char_size; 2987c478bd9Sstevel@tonic-gate char_size = get_wchar(&string_wchar, stringp); 2997c478bd9Sstevel@tonic-gate } 3007c478bd9Sstevel@tonic-gate } 3017c478bd9Sstevel@tonic-gate } 3027c478bd9Sstevel@tonic-gate 3037c478bd9Sstevel@tonic-gate } else { 3047c478bd9Sstevel@tonic-gate 3057c478bd9Sstevel@tonic-gate /* 3067c478bd9Sstevel@tonic-gate * test a multibyte string against a multibyte regular expression 3077c478bd9Sstevel@tonic-gate * that doesn't start with a single multibyte character 3087c478bd9Sstevel@tonic-gate * 3097c478bd9Sstevel@tonic-gate * move forward in the string one multibyte character at a time, 3107c478bd9Sstevel@tonic-gate * testing the remaining string against the regular expression 3117c478bd9Sstevel@tonic-gate */ 3127c478bd9Sstevel@tonic-gate 3137c478bd9Sstevel@tonic-gate end_of_matchp = test_string(stringp, regexp); 3147c478bd9Sstevel@tonic-gate char_size = get_wchar(&string_wchar, stringp); 3157c478bd9Sstevel@tonic-gate while ((end_of_matchp == (char *)0) && (char_size > 0)) { 3167c478bd9Sstevel@tonic-gate stringp += char_size; 3177c478bd9Sstevel@tonic-gate end_of_matchp = test_string(stringp, regexp); 3187c478bd9Sstevel@tonic-gate char_size = get_wchar(&string_wchar, stringp); 3197c478bd9Sstevel@tonic-gate } 3207c478bd9Sstevel@tonic-gate if (end_of_matchp != (char *)0) { 3217c478bd9Sstevel@tonic-gate __loc1 = (char *)stringp; 3227c478bd9Sstevel@tonic-gate } 3237c478bd9Sstevel@tonic-gate } 3247c478bd9Sstevel@tonic-gate 3257c478bd9Sstevel@tonic-gate /* 3267c478bd9Sstevel@tonic-gate * Return substrings that matched subexpressions for which 3277c478bd9Sstevel@tonic-gate * matching substrings are to be returned. 3287c478bd9Sstevel@tonic-gate * 3297c478bd9Sstevel@tonic-gate * NOTE: 3307c478bd9Sstevel@tonic-gate * 3317c478bd9Sstevel@tonic-gate * According to manual page regcmp(3G), regex() returns substrings 3327c478bd9Sstevel@tonic-gate * that match subexpressions even when no substring matches the 3337c478bd9Sstevel@tonic-gate * entire regular expression. 3347c478bd9Sstevel@tonic-gate */ 3357c478bd9Sstevel@tonic-gate 3367c478bd9Sstevel@tonic-gate substringn = 0; 3377c478bd9Sstevel@tonic-gate while (substringn < NSUBSTRINGS) { 3387c478bd9Sstevel@tonic-gate substringp = substring_startp[substringn]; 3397c478bd9Sstevel@tonic-gate if ((substringp != (char *)0) && 3407c478bd9Sstevel@tonic-gate (return_arg_number[substringn] >= 0)) { 3417c478bd9Sstevel@tonic-gate returned_substringp = 3427c478bd9Sstevel@tonic-gate return_argp[return_arg_number[substringn]]; 3437c478bd9Sstevel@tonic-gate if (returned_substringp != (char *)0) { 3447c478bd9Sstevel@tonic-gate while (substringp < substring_endp[substringn]) { 3457c478bd9Sstevel@tonic-gate *returned_substringp = (char)*substringp; 3467c478bd9Sstevel@tonic-gate returned_substringp++; 3477c478bd9Sstevel@tonic-gate substringp++; 3487c478bd9Sstevel@tonic-gate } 3497c478bd9Sstevel@tonic-gate *returned_substringp = '\0'; 3507c478bd9Sstevel@tonic-gate } 3517c478bd9Sstevel@tonic-gate } 3527c478bd9Sstevel@tonic-gate substringn++; 3537c478bd9Sstevel@tonic-gate } 3547c478bd9Sstevel@tonic-gate lmutex_unlock(®ex_lock); 3557c478bd9Sstevel@tonic-gate return ((char *)end_of_matchp); 3567c478bd9Sstevel@tonic-gate } /* regex() */ 3577c478bd9Sstevel@tonic-gate 3587c478bd9Sstevel@tonic-gate 3597c478bd9Sstevel@tonic-gate /* DEFINITIONS OF PRIVATE FUNCTIONS */ 3607c478bd9Sstevel@tonic-gate 3617c478bd9Sstevel@tonic-gate static int 3627c478bd9Sstevel@tonic-gate get_wchar(wchar_t *wcharp, 3637c478bd9Sstevel@tonic-gate const char *stringp) 3647c478bd9Sstevel@tonic-gate { 3657c478bd9Sstevel@tonic-gate int char_size; 3667c478bd9Sstevel@tonic-gate 3677c478bd9Sstevel@tonic-gate if (stringp == (char *)0) { 3687c478bd9Sstevel@tonic-gate char_size = 0; 3697c478bd9Sstevel@tonic-gate *wcharp = (wchar_t)((unsigned int)'\0'); 3707c478bd9Sstevel@tonic-gate } else if (*stringp == '\0') { 3717c478bd9Sstevel@tonic-gate char_size = 0; 3727c478bd9Sstevel@tonic-gate *wcharp = (wchar_t)((unsigned int)*stringp); 3737c478bd9Sstevel@tonic-gate } else if ((unsigned char)*stringp <= (unsigned char)0x7f) { 3747c478bd9Sstevel@tonic-gate char_size = 1; 3757c478bd9Sstevel@tonic-gate *wcharp = (wchar_t)((unsigned int)*stringp); 3767c478bd9Sstevel@tonic-gate } else { 3777c478bd9Sstevel@tonic-gate char_size = mbtowc(wcharp, stringp, MB_LEN_MAX); 3787c478bd9Sstevel@tonic-gate } 3797c478bd9Sstevel@tonic-gate return (char_size); 3807c478bd9Sstevel@tonic-gate } 3817c478bd9Sstevel@tonic-gate 3827c478bd9Sstevel@tonic-gate static void 3837c478bd9Sstevel@tonic-gate get_match_counts(int *nmust_matchp, 3847c478bd9Sstevel@tonic-gate int *nextra_matches_allowedp, 3857c478bd9Sstevel@tonic-gate const char *count_stringp) 3867c478bd9Sstevel@tonic-gate { 3877c478bd9Sstevel@tonic-gate int minimum_match_count; 3887c478bd9Sstevel@tonic-gate int maximum_match_count; 3897c478bd9Sstevel@tonic-gate 3907c478bd9Sstevel@tonic-gate minimum_match_count = 3917c478bd9Sstevel@tonic-gate (int)((unsigned int)*count_stringp & SINGLE_BYTE_MASK); 3927c478bd9Sstevel@tonic-gate *nmust_matchp = minimum_match_count; 3937c478bd9Sstevel@tonic-gate 3947c478bd9Sstevel@tonic-gate count_stringp++; 3957c478bd9Sstevel@tonic-gate maximum_match_count = 3967c478bd9Sstevel@tonic-gate (int)((unsigned int)*count_stringp & SINGLE_BYTE_MASK); 3977c478bd9Sstevel@tonic-gate if (maximum_match_count == (int)UNLIMITED) { 3987c478bd9Sstevel@tonic-gate *nextra_matches_allowedp = (int)UNLIMITED; 3997c478bd9Sstevel@tonic-gate } else { 4007c478bd9Sstevel@tonic-gate *nextra_matches_allowedp = 4017c478bd9Sstevel@tonic-gate maximum_match_count - minimum_match_count; 4027c478bd9Sstevel@tonic-gate } 4037c478bd9Sstevel@tonic-gate return; 4047c478bd9Sstevel@tonic-gate 4057c478bd9Sstevel@tonic-gate } /* get_match_counts() */ 4067c478bd9Sstevel@tonic-gate 4077c478bd9Sstevel@tonic-gate static boolean_t 4087c478bd9Sstevel@tonic-gate in_wchar_range(wchar_t test_char, 4097c478bd9Sstevel@tonic-gate wchar_t lower_char, 4107c478bd9Sstevel@tonic-gate wchar_t upper_char) 4117c478bd9Sstevel@tonic-gate { 4127c478bd9Sstevel@tonic-gate return (((lower_char <= 0x7f) && (upper_char <= 0x7f) && 4137c478bd9Sstevel@tonic-gate (lower_char <= test_char) && (test_char <= upper_char)) || 4147c478bd9Sstevel@tonic-gate (((test_char & WCHAR_CSMASK) == (lower_char & WCHAR_CSMASK)) && 4157c478bd9Sstevel@tonic-gate ((test_char & WCHAR_CSMASK) == (upper_char & WCHAR_CSMASK)) && 4167c478bd9Sstevel@tonic-gate (lower_char <= test_char) && (test_char <= upper_char))); 4177c478bd9Sstevel@tonic-gate 4187c478bd9Sstevel@tonic-gate } /* in_wchar_range() */ 4197c478bd9Sstevel@tonic-gate 4207c478bd9Sstevel@tonic-gate static const char * 4217c478bd9Sstevel@tonic-gate pop_stringp(void) 4227c478bd9Sstevel@tonic-gate { 4237c478bd9Sstevel@tonic-gate const char *stringp; 4247c478bd9Sstevel@tonic-gate 4257c478bd9Sstevel@tonic-gate if (stringp_stackp >= &stringp_stack[STRINGP_STACK_SIZE]) { 4267c478bd9Sstevel@tonic-gate return ((char *)0); 4277c478bd9Sstevel@tonic-gate } else { 4287c478bd9Sstevel@tonic-gate stringp = *stringp_stackp; 4297c478bd9Sstevel@tonic-gate stringp_stackp++; 4307c478bd9Sstevel@tonic-gate return (stringp); 4317c478bd9Sstevel@tonic-gate } 4327c478bd9Sstevel@tonic-gate } 4337c478bd9Sstevel@tonic-gate 4347c478bd9Sstevel@tonic-gate 4357c478bd9Sstevel@tonic-gate static const char * 4367c478bd9Sstevel@tonic-gate previous_charp(const char *current_charp) 4377c478bd9Sstevel@tonic-gate { 4387c478bd9Sstevel@tonic-gate /* 4397c478bd9Sstevel@tonic-gate * returns the pointer to the previous character in 4407c478bd9Sstevel@tonic-gate * a string of multibyte characters 4417c478bd9Sstevel@tonic-gate */ 4427c478bd9Sstevel@tonic-gate 4437c478bd9Sstevel@tonic-gate const char *prev_cs0 = current_charp - 1; 4447c478bd9Sstevel@tonic-gate const char *prev_cs1 = current_charp - eucw1; 4457c478bd9Sstevel@tonic-gate const char *prev_cs2 = current_charp - eucw2 - 1; 4467c478bd9Sstevel@tonic-gate const char *prev_cs3 = current_charp - eucw3 - 1; 4477c478bd9Sstevel@tonic-gate const char *prev_charp; 4487c478bd9Sstevel@tonic-gate 4497c478bd9Sstevel@tonic-gate if ((unsigned char)*prev_cs0 <= 0x7f) { 4507c478bd9Sstevel@tonic-gate prev_charp = prev_cs0; 4517c478bd9Sstevel@tonic-gate } else if ((unsigned char)*prev_cs2 == SS2) { 4527c478bd9Sstevel@tonic-gate prev_charp = prev_cs2; 4537c478bd9Sstevel@tonic-gate } else if ((unsigned char)*prev_cs3 == SS3) { 4547c478bd9Sstevel@tonic-gate prev_charp = prev_cs3; 4557c478bd9Sstevel@tonic-gate } else { 4567c478bd9Sstevel@tonic-gate prev_charp = prev_cs1; 4577c478bd9Sstevel@tonic-gate } 4587c478bd9Sstevel@tonic-gate return (prev_charp); 4597c478bd9Sstevel@tonic-gate 4607c478bd9Sstevel@tonic-gate } /* previous_charp() */ 4617c478bd9Sstevel@tonic-gate 4627c478bd9Sstevel@tonic-gate static const char * 4637c478bd9Sstevel@tonic-gate push_stringp(const char *stringp) 4647c478bd9Sstevel@tonic-gate { 4657c478bd9Sstevel@tonic-gate if (stringp_stackp <= &stringp_stack[0]) { 4667c478bd9Sstevel@tonic-gate return ((char *)0); 4677c478bd9Sstevel@tonic-gate } else { 4687c478bd9Sstevel@tonic-gate stringp_stackp--; 4697c478bd9Sstevel@tonic-gate *stringp_stackp = stringp; 4707c478bd9Sstevel@tonic-gate return (stringp); 4717c478bd9Sstevel@tonic-gate } 4727c478bd9Sstevel@tonic-gate } 4737c478bd9Sstevel@tonic-gate 4747c478bd9Sstevel@tonic-gate 4757c478bd9Sstevel@tonic-gate static char_test_result_t 4767c478bd9Sstevel@tonic-gate test_char_against_ascii_class(char test_char, 4777c478bd9Sstevel@tonic-gate const char *classp, 4787c478bd9Sstevel@tonic-gate char_test_condition_t test_condition) 4797c478bd9Sstevel@tonic-gate { 4807c478bd9Sstevel@tonic-gate /* 4817c478bd9Sstevel@tonic-gate * tests a character for membership in an ASCII character class compiled 4827c478bd9Sstevel@tonic-gate * by the internationalized version of regcmp(); 4837c478bd9Sstevel@tonic-gate * 4847c478bd9Sstevel@tonic-gate * NOTE: The internationalized version of regcmp() compiles 4857c478bd9Sstevel@tonic-gate * the range a-z in an ASCII character class to aTHRUz. 4867c478bd9Sstevel@tonic-gate */ 4877c478bd9Sstevel@tonic-gate 4887c478bd9Sstevel@tonic-gate int nbytes_to_check; 4897c478bd9Sstevel@tonic-gate 4907c478bd9Sstevel@tonic-gate nbytes_to_check = (int)*classp; 4917c478bd9Sstevel@tonic-gate classp++; 4927c478bd9Sstevel@tonic-gate nbytes_to_check--; 4937c478bd9Sstevel@tonic-gate 4947c478bd9Sstevel@tonic-gate while (nbytes_to_check > 0) { 4957c478bd9Sstevel@tonic-gate if (test_char == *classp) { 4967c478bd9Sstevel@tonic-gate if (test_condition == IN_CLASS) 4977c478bd9Sstevel@tonic-gate return (CONDITION_TRUE); 4987c478bd9Sstevel@tonic-gate else 4997c478bd9Sstevel@tonic-gate return (CONDITION_FALSE); 5007c478bd9Sstevel@tonic-gate } else if (*classp == THRU) { 5017c478bd9Sstevel@tonic-gate if ((*(classp - 1) <= test_char) && 5027c478bd9Sstevel@tonic-gate (test_char <= *(classp + 1))) { 5037c478bd9Sstevel@tonic-gate if (test_condition == IN_CLASS) 5047c478bd9Sstevel@tonic-gate return (CONDITION_TRUE); 5057c478bd9Sstevel@tonic-gate else 5067c478bd9Sstevel@tonic-gate return (CONDITION_FALSE); 5077c478bd9Sstevel@tonic-gate } else { 5087c478bd9Sstevel@tonic-gate classp += 2; 5097c478bd9Sstevel@tonic-gate nbytes_to_check -= 2; 5107c478bd9Sstevel@tonic-gate } 5117c478bd9Sstevel@tonic-gate } else { 5127c478bd9Sstevel@tonic-gate classp++; 5137c478bd9Sstevel@tonic-gate nbytes_to_check--; 5147c478bd9Sstevel@tonic-gate } 5157c478bd9Sstevel@tonic-gate } 5167c478bd9Sstevel@tonic-gate if (test_condition == NOT_IN_CLASS) { 5177c478bd9Sstevel@tonic-gate return (CONDITION_TRUE); 5187c478bd9Sstevel@tonic-gate } else { 5197c478bd9Sstevel@tonic-gate return (CONDITION_FALSE); 5207c478bd9Sstevel@tonic-gate } 5217c478bd9Sstevel@tonic-gate } /* test_char_against_ascii_class() */ 5227c478bd9Sstevel@tonic-gate 5237c478bd9Sstevel@tonic-gate static char_test_result_t 5247c478bd9Sstevel@tonic-gate test_char_against_multibyte_class(wchar_t test_char, 5257c478bd9Sstevel@tonic-gate const char *classp, 5267c478bd9Sstevel@tonic-gate char_test_condition_t test_condition) 5277c478bd9Sstevel@tonic-gate { 5287c478bd9Sstevel@tonic-gate /* 5297c478bd9Sstevel@tonic-gate * tests a character for membership in a multibyte character class; 5307c478bd9Sstevel@tonic-gate * 5317c478bd9Sstevel@tonic-gate * NOTE: The range a-z in a multibyte character class compiles to 5327c478bd9Sstevel@tonic-gate * aTHRUz. 5337c478bd9Sstevel@tonic-gate */ 5347c478bd9Sstevel@tonic-gate 5357c478bd9Sstevel@tonic-gate int char_size; 5367c478bd9Sstevel@tonic-gate wchar_t current_char; 5377c478bd9Sstevel@tonic-gate int nbytes_to_check; 5387c478bd9Sstevel@tonic-gate wchar_t previous_char; 5397c478bd9Sstevel@tonic-gate 5407c478bd9Sstevel@tonic-gate nbytes_to_check = (int)*classp; 5417c478bd9Sstevel@tonic-gate classp++; 5427c478bd9Sstevel@tonic-gate nbytes_to_check--; 5437c478bd9Sstevel@tonic-gate 5447c478bd9Sstevel@tonic-gate char_size = get_wchar(¤t_char, classp); 5457c478bd9Sstevel@tonic-gate if (char_size <= 0) { 5467c478bd9Sstevel@tonic-gate return (CHAR_TEST_ERROR); 5477c478bd9Sstevel@tonic-gate } else if (test_char == current_char) { 5487c478bd9Sstevel@tonic-gate if (test_condition == IN_CLASS) { 5497c478bd9Sstevel@tonic-gate return (CONDITION_TRUE); 5507c478bd9Sstevel@tonic-gate } else { 5517c478bd9Sstevel@tonic-gate return (CONDITION_FALSE); 5527c478bd9Sstevel@tonic-gate } 5537c478bd9Sstevel@tonic-gate } else { 5547c478bd9Sstevel@tonic-gate classp += char_size; 5557c478bd9Sstevel@tonic-gate nbytes_to_check -= char_size; 5567c478bd9Sstevel@tonic-gate } 5577c478bd9Sstevel@tonic-gate 5587c478bd9Sstevel@tonic-gate while (nbytes_to_check > 0) { 5597c478bd9Sstevel@tonic-gate previous_char = current_char; 5607c478bd9Sstevel@tonic-gate char_size = get_wchar(¤t_char, classp); 5617c478bd9Sstevel@tonic-gate if (char_size <= 0) { 5627c478bd9Sstevel@tonic-gate return (CHAR_TEST_ERROR); 5637c478bd9Sstevel@tonic-gate } else if (test_char == current_char) { 5647c478bd9Sstevel@tonic-gate if (test_condition == IN_CLASS) { 5657c478bd9Sstevel@tonic-gate return (CONDITION_TRUE); 5667c478bd9Sstevel@tonic-gate } else { 5677c478bd9Sstevel@tonic-gate return (CONDITION_FALSE); 5687c478bd9Sstevel@tonic-gate } 5697c478bd9Sstevel@tonic-gate } else if (current_char == THRU) { 5707c478bd9Sstevel@tonic-gate classp += char_size; 5717c478bd9Sstevel@tonic-gate nbytes_to_check -= char_size; 5727c478bd9Sstevel@tonic-gate char_size = get_wchar(¤t_char, classp); 5737c478bd9Sstevel@tonic-gate if (char_size <= 0) { 5747c478bd9Sstevel@tonic-gate return (CHAR_TEST_ERROR); 5757c478bd9Sstevel@tonic-gate } else if (in_wchar_range(test_char, previous_char, 5767c478bd9Sstevel@tonic-gate current_char)) { 5777c478bd9Sstevel@tonic-gate if (test_condition == IN_CLASS) { 5787c478bd9Sstevel@tonic-gate return (CONDITION_TRUE); 5797c478bd9Sstevel@tonic-gate } else { 5807c478bd9Sstevel@tonic-gate return (CONDITION_FALSE); 5817c478bd9Sstevel@tonic-gate } 5827c478bd9Sstevel@tonic-gate } else { 5837c478bd9Sstevel@tonic-gate classp += char_size; 5847c478bd9Sstevel@tonic-gate nbytes_to_check -= char_size; 5857c478bd9Sstevel@tonic-gate } 5867c478bd9Sstevel@tonic-gate } else { 5877c478bd9Sstevel@tonic-gate classp += char_size; 5887c478bd9Sstevel@tonic-gate nbytes_to_check -= char_size; 5897c478bd9Sstevel@tonic-gate } 5907c478bd9Sstevel@tonic-gate } 5917c478bd9Sstevel@tonic-gate if (test_condition == NOT_IN_CLASS) { 5927c478bd9Sstevel@tonic-gate return (CONDITION_TRUE); 5937c478bd9Sstevel@tonic-gate } else { 5947c478bd9Sstevel@tonic-gate return (CONDITION_FALSE); 5957c478bd9Sstevel@tonic-gate } 5967c478bd9Sstevel@tonic-gate } /* test_char_against_multibyte_class() */ 5977c478bd9Sstevel@tonic-gate 5987c478bd9Sstevel@tonic-gate 5997c478bd9Sstevel@tonic-gate /* FOR COMPATIBILITY WITH PREVIOUS ASCII VERSIONS OF regcmp() */ 6007c478bd9Sstevel@tonic-gate 6017c478bd9Sstevel@tonic-gate static char_test_result_t 6027c478bd9Sstevel@tonic-gate test_char_against_old_ascii_class(char test_char, 6037c478bd9Sstevel@tonic-gate const char *classp, 6047c478bd9Sstevel@tonic-gate char_test_condition_t test_condition) 6057c478bd9Sstevel@tonic-gate { 6067c478bd9Sstevel@tonic-gate /* 6077c478bd9Sstevel@tonic-gate * tests a character for membership in an ASCII character class compiled 6087c478bd9Sstevel@tonic-gate * by the ASCII version of regcmp(); 6097c478bd9Sstevel@tonic-gate * 6107c478bd9Sstevel@tonic-gate * NOTE: ASCII versions of regcmp() compile the range a-z in an 6117c478bd9Sstevel@tonic-gate * ASCII character class to THRUaz. The internationalized 6127c478bd9Sstevel@tonic-gate * version compiles the same range to aTHRUz. 6137c478bd9Sstevel@tonic-gate */ 6147c478bd9Sstevel@tonic-gate 6157c478bd9Sstevel@tonic-gate int nbytes_to_check; 6167c478bd9Sstevel@tonic-gate 6177c478bd9Sstevel@tonic-gate nbytes_to_check = (int)*classp; 6187c478bd9Sstevel@tonic-gate classp++; 6197c478bd9Sstevel@tonic-gate nbytes_to_check--; 6207c478bd9Sstevel@tonic-gate 6217c478bd9Sstevel@tonic-gate while (nbytes_to_check > 0) { 6227c478bd9Sstevel@tonic-gate if (test_char == *classp) { 6237c478bd9Sstevel@tonic-gate if (test_condition == IN_CLASS) { 6247c478bd9Sstevel@tonic-gate return (CONDITION_TRUE); 6257c478bd9Sstevel@tonic-gate } else { 6267c478bd9Sstevel@tonic-gate return (CONDITION_FALSE); 6277c478bd9Sstevel@tonic-gate } 6287c478bd9Sstevel@tonic-gate } else if (*classp == THRU) { 6297c478bd9Sstevel@tonic-gate if ((*(classp + 1) <= test_char) && 6307c478bd9Sstevel@tonic-gate (test_char <= *(classp + 2))) { 6317c478bd9Sstevel@tonic-gate if (test_condition == IN_CLASS) { 6327c478bd9Sstevel@tonic-gate return (CONDITION_TRUE); 6337c478bd9Sstevel@tonic-gate } else { 6347c478bd9Sstevel@tonic-gate return (CONDITION_FALSE); 6357c478bd9Sstevel@tonic-gate } 6367c478bd9Sstevel@tonic-gate } else { 6377c478bd9Sstevel@tonic-gate classp += 3; 6387c478bd9Sstevel@tonic-gate nbytes_to_check -= 3; 6397c478bd9Sstevel@tonic-gate } 6407c478bd9Sstevel@tonic-gate } else { 6417c478bd9Sstevel@tonic-gate classp++; 6427c478bd9Sstevel@tonic-gate nbytes_to_check--; 6437c478bd9Sstevel@tonic-gate } 6447c478bd9Sstevel@tonic-gate } 6457c478bd9Sstevel@tonic-gate if (test_condition == NOT_IN_CLASS) { 6467c478bd9Sstevel@tonic-gate return (CONDITION_TRUE); 6477c478bd9Sstevel@tonic-gate } else { 6487c478bd9Sstevel@tonic-gate return (CONDITION_FALSE); 6497c478bd9Sstevel@tonic-gate } 6507c478bd9Sstevel@tonic-gate } /* test_char_against_old_ascii_class() */ 6517c478bd9Sstevel@tonic-gate 6527c478bd9Sstevel@tonic-gate static const char * 6537c478bd9Sstevel@tonic-gate test_repeated_ascii_char(const char *repeat_startp, 6547c478bd9Sstevel@tonic-gate const char *stringp, 6557c478bd9Sstevel@tonic-gate const char *regexp) 6567c478bd9Sstevel@tonic-gate { 6577c478bd9Sstevel@tonic-gate const char *end_of_matchp; 6587c478bd9Sstevel@tonic-gate 6597c478bd9Sstevel@tonic-gate end_of_matchp = test_string(stringp, regexp); 6607c478bd9Sstevel@tonic-gate while ((end_of_matchp == (char *)0) && 6617c478bd9Sstevel@tonic-gate (stringp > repeat_startp)) { 6627c478bd9Sstevel@tonic-gate stringp--; 6637c478bd9Sstevel@tonic-gate end_of_matchp = test_string(stringp, regexp); 6647c478bd9Sstevel@tonic-gate } 6657c478bd9Sstevel@tonic-gate return (end_of_matchp); 6667c478bd9Sstevel@tonic-gate } 6677c478bd9Sstevel@tonic-gate 6687c478bd9Sstevel@tonic-gate static const char * 6697c478bd9Sstevel@tonic-gate test_repeated_multibyte_char(const char *repeat_startp, 6707c478bd9Sstevel@tonic-gate const char *stringp, 6717c478bd9Sstevel@tonic-gate const char *regexp) 6727c478bd9Sstevel@tonic-gate { 6737c478bd9Sstevel@tonic-gate const char *end_of_matchp; 6747c478bd9Sstevel@tonic-gate 6757c478bd9Sstevel@tonic-gate end_of_matchp = test_string(stringp, regexp); 6767c478bd9Sstevel@tonic-gate while ((end_of_matchp == (char *)0) && 6777c478bd9Sstevel@tonic-gate (stringp > repeat_startp)) { 6787c478bd9Sstevel@tonic-gate stringp = previous_charp(stringp); 6797c478bd9Sstevel@tonic-gate end_of_matchp = test_string(stringp, regexp); 6807c478bd9Sstevel@tonic-gate } 6817c478bd9Sstevel@tonic-gate return (end_of_matchp); 6827c478bd9Sstevel@tonic-gate } 6837c478bd9Sstevel@tonic-gate 6847c478bd9Sstevel@tonic-gate static const char * 6857c478bd9Sstevel@tonic-gate test_repeated_group(const char *repeat_startp, 6867c478bd9Sstevel@tonic-gate const char *stringp, 6877c478bd9Sstevel@tonic-gate const char *regexp) 6887c478bd9Sstevel@tonic-gate { 6897c478bd9Sstevel@tonic-gate const char *end_of_matchp; 6907c478bd9Sstevel@tonic-gate 6917c478bd9Sstevel@tonic-gate end_of_matchp = test_string(stringp, regexp); 6927c478bd9Sstevel@tonic-gate while ((end_of_matchp == (char *)0) && 6937c478bd9Sstevel@tonic-gate (stringp > repeat_startp)) { 6947c478bd9Sstevel@tonic-gate stringp = pop_stringp(); 6957c478bd9Sstevel@tonic-gate if (stringp == (char *)0) { 6967c478bd9Sstevel@tonic-gate return ((char *)0); 6977c478bd9Sstevel@tonic-gate } 6987c478bd9Sstevel@tonic-gate end_of_matchp = test_string(stringp, regexp); 6997c478bd9Sstevel@tonic-gate } 7007c478bd9Sstevel@tonic-gate return (end_of_matchp); 7017c478bd9Sstevel@tonic-gate } 7027c478bd9Sstevel@tonic-gate 7037c478bd9Sstevel@tonic-gate static const char * 7047c478bd9Sstevel@tonic-gate test_string(const char *stringp, 7057c478bd9Sstevel@tonic-gate const char *regexp) 7067c478bd9Sstevel@tonic-gate { 7077c478bd9Sstevel@tonic-gate /* 7087c478bd9Sstevel@tonic-gate * returns a pointer to the first character following the first 7097c478bd9Sstevel@tonic-gate * substring of the string addressed by stringp that matches 7107c478bd9Sstevel@tonic-gate * the compiled regular expression addressed by regexp 7117c478bd9Sstevel@tonic-gate */ 7127c478bd9Sstevel@tonic-gate 7137c478bd9Sstevel@tonic-gate unsigned int group_length; 7147c478bd9Sstevel@tonic-gate int nextra_matches_allowed; 7157c478bd9Sstevel@tonic-gate int nmust_match; 7167c478bd9Sstevel@tonic-gate wchar_t regex_wchar; 7177c478bd9Sstevel@tonic-gate int regex_char_size; 7187c478bd9Sstevel@tonic-gate const char *repeat_startp; 7197c478bd9Sstevel@tonic-gate unsigned int return_argn; 7207c478bd9Sstevel@tonic-gate wchar_t string_wchar; 7217c478bd9Sstevel@tonic-gate int string_char_size; 7227c478bd9Sstevel@tonic-gate unsigned int substringn; 7237c478bd9Sstevel@tonic-gate char_test_condition_t test_condition; 7247c478bd9Sstevel@tonic-gate const char *test_stringp; 7257c478bd9Sstevel@tonic-gate 7267c478bd9Sstevel@tonic-gate for (;;) { 7277c478bd9Sstevel@tonic-gate 7287c478bd9Sstevel@tonic-gate /* 7297c478bd9Sstevel@tonic-gate * Exit the loop via a return whenever there's a match 7307c478bd9Sstevel@tonic-gate * or it's clear that there can be no match. 7317c478bd9Sstevel@tonic-gate */ 7327c478bd9Sstevel@tonic-gate 7337c478bd9Sstevel@tonic-gate switch ((int)*regexp) { 7347c478bd9Sstevel@tonic-gate 7357c478bd9Sstevel@tonic-gate /* 7367c478bd9Sstevel@tonic-gate * No fall-through. 7377c478bd9Sstevel@tonic-gate * Each case ends with either a return or with stringp 7387c478bd9Sstevel@tonic-gate * addressing the next character to be tested and regexp 7397c478bd9Sstevel@tonic-gate * addressing the next compiled regular expression 7407c478bd9Sstevel@tonic-gate * 7417c478bd9Sstevel@tonic-gate * NOTE: The comments for each case give the meaning 7427c478bd9Sstevel@tonic-gate * of the compiled regular expression decoded by the case 7437c478bd9Sstevel@tonic-gate * and the character string that the compiled regular 7447c478bd9Sstevel@tonic-gate * expression uses to encode the case. Each single 7457c478bd9Sstevel@tonic-gate * character encoded in the compiled regular expression 7467c478bd9Sstevel@tonic-gate * is shown enclosed in angle brackets (<>). Each 7477c478bd9Sstevel@tonic-gate * compiled regular expression begins with a marker 7487c478bd9Sstevel@tonic-gate * character which is shown as a named constant 7497c478bd9Sstevel@tonic-gate * (e.g. <ASCII_CHAR>). Character constants are shown 7507c478bd9Sstevel@tonic-gate * enclosed in single quotes (e.g. <'$'>). All other 7517c478bd9Sstevel@tonic-gate * single characters encoded in the compiled regular 7527c478bd9Sstevel@tonic-gate * expression are shown as lower case variable names 7537c478bd9Sstevel@tonic-gate * (e.g. <ascii_char> or <multibyte_char>). Multicharacter 7547c478bd9Sstevel@tonic-gate * strings encoded in the compiled regular expression 7557c478bd9Sstevel@tonic-gate * are shown as variable names followed by elipses 7567c478bd9Sstevel@tonic-gate * (e.g. <compiled_regex...>). 7577c478bd9Sstevel@tonic-gate */ 7587c478bd9Sstevel@tonic-gate 7597c478bd9Sstevel@tonic-gate case ASCII_CHAR: /* single ASCII char */ 7607c478bd9Sstevel@tonic-gate 7617c478bd9Sstevel@tonic-gate /* encoded as <ASCII_CHAR><ascii_char> */ 7627c478bd9Sstevel@tonic-gate 7637c478bd9Sstevel@tonic-gate regexp++; 7647c478bd9Sstevel@tonic-gate if (*regexp == *stringp) { 7657c478bd9Sstevel@tonic-gate regexp++; 7667c478bd9Sstevel@tonic-gate stringp++; 7677c478bd9Sstevel@tonic-gate } else { 7687c478bd9Sstevel@tonic-gate return ((char *)0); 7697c478bd9Sstevel@tonic-gate } 7707c478bd9Sstevel@tonic-gate break; /* end case ASCII_CHAR */ 7717c478bd9Sstevel@tonic-gate 7727c478bd9Sstevel@tonic-gate case MULTIBYTE_CHAR: /* single multibyte char */ 7737c478bd9Sstevel@tonic-gate 7747c478bd9Sstevel@tonic-gate /* encoded as <MULTIBYTE_CHAR><multibyte_char> */ 7757c478bd9Sstevel@tonic-gate 7767c478bd9Sstevel@tonic-gate regexp++; 7777c478bd9Sstevel@tonic-gate regex_char_size = get_wchar(®ex_wchar, regexp); 7787c478bd9Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 7797c478bd9Sstevel@tonic-gate if ((string_char_size <= 0) || (string_wchar != regex_wchar)) { 7807c478bd9Sstevel@tonic-gate return ((char *)0); 7817c478bd9Sstevel@tonic-gate } else { 7827c478bd9Sstevel@tonic-gate regexp += regex_char_size; 7837c478bd9Sstevel@tonic-gate stringp += string_char_size; 7847c478bd9Sstevel@tonic-gate } 7857c478bd9Sstevel@tonic-gate break; /* end case MULTIBYTE_CHAR */ 7867c478bd9Sstevel@tonic-gate 7877c478bd9Sstevel@tonic-gate case ANY_CHAR: /* any single ASCII or multibyte char */ 7887c478bd9Sstevel@tonic-gate 7897c478bd9Sstevel@tonic-gate /* encoded as <ANY_CHAR> */ 7907c478bd9Sstevel@tonic-gate 7917c478bd9Sstevel@tonic-gate if (!multibyte) { 7927c478bd9Sstevel@tonic-gate if (*stringp == '\0') { 7937c478bd9Sstevel@tonic-gate return ((char *)0); 7947c478bd9Sstevel@tonic-gate } else { 7957c478bd9Sstevel@tonic-gate regexp++; 7967c478bd9Sstevel@tonic-gate stringp++; 7977c478bd9Sstevel@tonic-gate } 7987c478bd9Sstevel@tonic-gate } else { 7997c478bd9Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 8007c478bd9Sstevel@tonic-gate if (string_char_size <= 0) { 8017c478bd9Sstevel@tonic-gate return ((char *)0); 8027c478bd9Sstevel@tonic-gate } else { 8037c478bd9Sstevel@tonic-gate regexp++; 8047c478bd9Sstevel@tonic-gate stringp += string_char_size; 8057c478bd9Sstevel@tonic-gate } 8067c478bd9Sstevel@tonic-gate } 8077c478bd9Sstevel@tonic-gate break; /* end case ANY_CHAR */ 8087c478bd9Sstevel@tonic-gate 8097c478bd9Sstevel@tonic-gate case IN_ASCII_CHAR_CLASS: /* [.....] */ 8107c478bd9Sstevel@tonic-gate case NOT_IN_ASCII_CHAR_CLASS: 8117c478bd9Sstevel@tonic-gate 8127c478bd9Sstevel@tonic-gate /* 8137c478bd9Sstevel@tonic-gate * encoded as <IN_ASCII_CHAR_CLASS><class_length><class...> 8147c478bd9Sstevel@tonic-gate * or <NOT_IN_ASCII_CHAR_CLASS><class_length><class...> 8157c478bd9Sstevel@tonic-gate * 8167c478bd9Sstevel@tonic-gate * NOTE: <class_length> includes the <class_length> byte 8177c478bd9Sstevel@tonic-gate */ 8187c478bd9Sstevel@tonic-gate 8197c478bd9Sstevel@tonic-gate if ((int)*regexp == (int)IN_ASCII_CHAR_CLASS) { 8207c478bd9Sstevel@tonic-gate test_condition = IN_CLASS; 8217c478bd9Sstevel@tonic-gate } else { 8227c478bd9Sstevel@tonic-gate test_condition = NOT_IN_CLASS; 8237c478bd9Sstevel@tonic-gate } 8247c478bd9Sstevel@tonic-gate regexp++; /* point to the <class_length> byte */ 8257c478bd9Sstevel@tonic-gate 8267c478bd9Sstevel@tonic-gate if ((*stringp != '\0') && 8277c478bd9Sstevel@tonic-gate (test_char_against_ascii_class(*stringp, regexp, 8287c478bd9Sstevel@tonic-gate test_condition) == CONDITION_TRUE)) { 8297c478bd9Sstevel@tonic-gate regexp += (int)*regexp; /* add the class length to regexp */ 8307c478bd9Sstevel@tonic-gate stringp++; 8317c478bd9Sstevel@tonic-gate } else { 8327c478bd9Sstevel@tonic-gate return ((char *)0); 8337c478bd9Sstevel@tonic-gate } 8347c478bd9Sstevel@tonic-gate break; /* end case IN_ASCII_CHAR_CLASS */ 8357c478bd9Sstevel@tonic-gate 8367c478bd9Sstevel@tonic-gate case IN_MULTIBYTE_CHAR_CLASS: /* [....] */ 8377c478bd9Sstevel@tonic-gate case NOT_IN_MULTIBYTE_CHAR_CLASS: 8387c478bd9Sstevel@tonic-gate 8397c478bd9Sstevel@tonic-gate /* 8407c478bd9Sstevel@tonic-gate * encoded as <IN_MULTIBYTE_CHAR_CLASS><class_length><class...> 8417c478bd9Sstevel@tonic-gate * or <NOT_IN_MULTIBYTE_CHAR_CLASS><class_length><class...> 8427c478bd9Sstevel@tonic-gate * 8437c478bd9Sstevel@tonic-gate * NOTE: <class_length> includes the <class_length> byte 8447c478bd9Sstevel@tonic-gate */ 8457c478bd9Sstevel@tonic-gate 8467c478bd9Sstevel@tonic-gate if ((int)*regexp == (int)IN_MULTIBYTE_CHAR_CLASS) { 8477c478bd9Sstevel@tonic-gate test_condition = IN_CLASS; 8487c478bd9Sstevel@tonic-gate } else { 8497c478bd9Sstevel@tonic-gate test_condition = NOT_IN_CLASS; 8507c478bd9Sstevel@tonic-gate } 8517c478bd9Sstevel@tonic-gate regexp++; /* point to the <class_length> byte */ 8527c478bd9Sstevel@tonic-gate 8537c478bd9Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 8547c478bd9Sstevel@tonic-gate if ((string_char_size > 0) && 8557c478bd9Sstevel@tonic-gate (test_char_against_multibyte_class(string_wchar, regexp, 8567c478bd9Sstevel@tonic-gate test_condition) == CONDITION_TRUE)) { 8577c478bd9Sstevel@tonic-gate regexp += (int)*regexp; /* add the class length to regexp */ 8587c478bd9Sstevel@tonic-gate stringp += string_char_size; 8597c478bd9Sstevel@tonic-gate } else { 8607c478bd9Sstevel@tonic-gate return ((char *)0); 8617c478bd9Sstevel@tonic-gate } 8627c478bd9Sstevel@tonic-gate break; /* end case IN_MULTIBYTE_CHAR_CLASS */ 8637c478bd9Sstevel@tonic-gate 8647c478bd9Sstevel@tonic-gate case IN_OLD_ASCII_CHAR_CLASS: /* [...] */ 8657c478bd9Sstevel@tonic-gate case NOT_IN_OLD_ASCII_CHAR_CLASS: 8667c478bd9Sstevel@tonic-gate 8677c478bd9Sstevel@tonic-gate /* 8687c478bd9Sstevel@tonic-gate * encoded as <IN_OLD_ASCII_CHAR_CLASS><class_length><class...> 8697c478bd9Sstevel@tonic-gate * or <NOT_IN_OLD_ASCII_CHAR_CLASS><class_length><class...> 8707c478bd9Sstevel@tonic-gate * 8717c478bd9Sstevel@tonic-gate * NOTE: <class_length> includes the <class_length> byte 8727c478bd9Sstevel@tonic-gate */ 8737c478bd9Sstevel@tonic-gate 8747c478bd9Sstevel@tonic-gate if ((int)*regexp == (int)IN_OLD_ASCII_CHAR_CLASS) { 8757c478bd9Sstevel@tonic-gate test_condition = IN_CLASS; 8767c478bd9Sstevel@tonic-gate } else { 8777c478bd9Sstevel@tonic-gate test_condition = NOT_IN_CLASS; 8787c478bd9Sstevel@tonic-gate } 8797c478bd9Sstevel@tonic-gate regexp++; /* point to the <class_length> byte */ 8807c478bd9Sstevel@tonic-gate 8817c478bd9Sstevel@tonic-gate if ((*stringp != '\0') && 8827c478bd9Sstevel@tonic-gate (test_char_against_old_ascii_class(*stringp, regexp, 8837c478bd9Sstevel@tonic-gate test_condition) == CONDITION_TRUE)) { 8847c478bd9Sstevel@tonic-gate regexp += (int)*regexp; /* add the class length to regexp */ 8857c478bd9Sstevel@tonic-gate stringp++; 8867c478bd9Sstevel@tonic-gate } else { 8877c478bd9Sstevel@tonic-gate return ((char *)0); 8887c478bd9Sstevel@tonic-gate } 8897c478bd9Sstevel@tonic-gate break; /* end case [NOT_]IN_OLD_ASCII_CHAR_CLASS */ 8907c478bd9Sstevel@tonic-gate 8917c478bd9Sstevel@tonic-gate case SIMPLE_GROUP: /* (.....) */ 8927c478bd9Sstevel@tonic-gate 8937c478bd9Sstevel@tonic-gate /* encoded as <SIMPLE_GROUP><group_length> */ 8947c478bd9Sstevel@tonic-gate 8957c478bd9Sstevel@tonic-gate regexp += 2; 8967c478bd9Sstevel@tonic-gate break; /* end case SIMPLE_GROUP */ 8977c478bd9Sstevel@tonic-gate 8987c478bd9Sstevel@tonic-gate case END_GROUP: /* (.....) */ 8997c478bd9Sstevel@tonic-gate 9007c478bd9Sstevel@tonic-gate /* encoded as <END_GROUP><groupn> */ 9017c478bd9Sstevel@tonic-gate 9027c478bd9Sstevel@tonic-gate regexp += 2; 9037c478bd9Sstevel@tonic-gate break; /* end case END_GROUP */ 9047c478bd9Sstevel@tonic-gate 9057c478bd9Sstevel@tonic-gate case SAVED_GROUP: /* (.....)$0-9 */ 9067c478bd9Sstevel@tonic-gate 9077c478bd9Sstevel@tonic-gate /* encoded as <SAVED_GROUP><substringn> */ 9087c478bd9Sstevel@tonic-gate 9097c478bd9Sstevel@tonic-gate regexp++; 9107c478bd9Sstevel@tonic-gate substringn = (unsigned int)*regexp; 9117c478bd9Sstevel@tonic-gate if (substringn >= NSUBSTRINGS) 9127c478bd9Sstevel@tonic-gate return ((char *)0); 9137c478bd9Sstevel@tonic-gate substring_startp[substringn] = stringp; 9147c478bd9Sstevel@tonic-gate regexp++; 9157c478bd9Sstevel@tonic-gate break; /* end case SAVED_GROUP */ 9167c478bd9Sstevel@tonic-gate 9177c478bd9Sstevel@tonic-gate case END_SAVED_GROUP: /* (.....)$0-9 */ 9187c478bd9Sstevel@tonic-gate 9197c478bd9Sstevel@tonic-gate /* 9207c478bd9Sstevel@tonic-gate * encoded as <END_SAVED_GROUP><substringn>\ 9217c478bd9Sstevel@tonic-gate * <return_arg_number[substringn]> 9227c478bd9Sstevel@tonic-gate */ 9237c478bd9Sstevel@tonic-gate 9247c478bd9Sstevel@tonic-gate regexp++; 9257c478bd9Sstevel@tonic-gate substringn = (unsigned int)*regexp; 9267c478bd9Sstevel@tonic-gate if (substringn >= NSUBSTRINGS) 9277c478bd9Sstevel@tonic-gate return ((char *)0); 9287c478bd9Sstevel@tonic-gate substring_endp[substringn] = stringp; 9297c478bd9Sstevel@tonic-gate regexp++; 9307c478bd9Sstevel@tonic-gate return_argn = (unsigned int)*regexp; 9317c478bd9Sstevel@tonic-gate if (return_argn >= NSUBSTRINGS) 9327c478bd9Sstevel@tonic-gate return ((char *)0); 9337c478bd9Sstevel@tonic-gate return_arg_number[substringn] = return_argn; 9347c478bd9Sstevel@tonic-gate regexp++; 9357c478bd9Sstevel@tonic-gate break; /* end case END_SAVED_GROUP */ 9367c478bd9Sstevel@tonic-gate 9377c478bd9Sstevel@tonic-gate case ASCII_CHAR|ZERO_OR_MORE: /* char* */ 9387c478bd9Sstevel@tonic-gate 9397c478bd9Sstevel@tonic-gate /* encoded as <ASCII_CHAR|ZERO_OR_MORE><ascii_char> */ 9407c478bd9Sstevel@tonic-gate 9417c478bd9Sstevel@tonic-gate regexp++; 9427c478bd9Sstevel@tonic-gate repeat_startp = stringp; 9437c478bd9Sstevel@tonic-gate while (*stringp == *regexp) { 9447c478bd9Sstevel@tonic-gate stringp++; 9457c478bd9Sstevel@tonic-gate } 9467c478bd9Sstevel@tonic-gate regexp++; 9477c478bd9Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, 9487c478bd9Sstevel@tonic-gate stringp, regexp)); 9497c478bd9Sstevel@tonic-gate 9507c478bd9Sstevel@tonic-gate /* end case ASCII_CHAR|ZERO_OR_MORE */ 9517c478bd9Sstevel@tonic-gate 9527c478bd9Sstevel@tonic-gate case ASCII_CHAR|ONE_OR_MORE: /* char+ */ 9537c478bd9Sstevel@tonic-gate 9547c478bd9Sstevel@tonic-gate /* encoded as <ASCII_CHAR|ONE_OR_MORE><ascii_char> */ 9557c478bd9Sstevel@tonic-gate 9567c478bd9Sstevel@tonic-gate regexp++; 9577c478bd9Sstevel@tonic-gate if (*stringp != *regexp) { 9587c478bd9Sstevel@tonic-gate return ((char *)0); 9597c478bd9Sstevel@tonic-gate } else { 9607c478bd9Sstevel@tonic-gate stringp++; 9617c478bd9Sstevel@tonic-gate repeat_startp = stringp; 9627c478bd9Sstevel@tonic-gate while (*stringp == *regexp) { 9637c478bd9Sstevel@tonic-gate stringp++; 9647c478bd9Sstevel@tonic-gate } 9657c478bd9Sstevel@tonic-gate regexp++; 9667c478bd9Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp, 9677c478bd9Sstevel@tonic-gate regexp)); 9687c478bd9Sstevel@tonic-gate } 9697c478bd9Sstevel@tonic-gate /* end case ASCII_CHAR|ONE_OR_MORE */ 9707c478bd9Sstevel@tonic-gate 9717c478bd9Sstevel@tonic-gate case ASCII_CHAR|COUNT: /* char{min_count,max_count} */ 9727c478bd9Sstevel@tonic-gate 9737c478bd9Sstevel@tonic-gate /* 9747c478bd9Sstevel@tonic-gate * encoded as <ASCII_CHAR|COUNT><ascii_char>\ 9757c478bd9Sstevel@tonic-gate * <minimum_match_count><maximum_match_count> 9767c478bd9Sstevel@tonic-gate */ 9777c478bd9Sstevel@tonic-gate 9787c478bd9Sstevel@tonic-gate regexp++; 9797c478bd9Sstevel@tonic-gate get_match_counts(&nmust_match, &nextra_matches_allowed, 9807c478bd9Sstevel@tonic-gate regexp + 1); 9817c478bd9Sstevel@tonic-gate while ((*stringp == *regexp) && (nmust_match > 0)) { 9827c478bd9Sstevel@tonic-gate nmust_match--; 9837c478bd9Sstevel@tonic-gate stringp++; 9847c478bd9Sstevel@tonic-gate } 9857c478bd9Sstevel@tonic-gate if (nmust_match > 0) { 9867c478bd9Sstevel@tonic-gate return ((char *)0); 9877c478bd9Sstevel@tonic-gate } else if (nextra_matches_allowed == UNLIMITED) { 9887c478bd9Sstevel@tonic-gate repeat_startp = stringp; 9897c478bd9Sstevel@tonic-gate while (*stringp == *regexp) { 9907c478bd9Sstevel@tonic-gate stringp++; 9917c478bd9Sstevel@tonic-gate } 9927c478bd9Sstevel@tonic-gate regexp += 3; 9937c478bd9Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp, 9947c478bd9Sstevel@tonic-gate regexp)); 9957c478bd9Sstevel@tonic-gate } else { 9967c478bd9Sstevel@tonic-gate repeat_startp = stringp; 9977c478bd9Sstevel@tonic-gate while ((*stringp == *regexp) && 9987c478bd9Sstevel@tonic-gate (nextra_matches_allowed > 0)) { 9997c478bd9Sstevel@tonic-gate nextra_matches_allowed--; 10007c478bd9Sstevel@tonic-gate stringp++; 10017c478bd9Sstevel@tonic-gate } 10027c478bd9Sstevel@tonic-gate regexp += 3; 10037c478bd9Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp, 10047c478bd9Sstevel@tonic-gate regexp)); 10057c478bd9Sstevel@tonic-gate } 10067c478bd9Sstevel@tonic-gate /* end case ASCII_CHAR|COUNT */ 10077c478bd9Sstevel@tonic-gate 10087c478bd9Sstevel@tonic-gate case MULTIBYTE_CHAR|ZERO_OR_MORE: /* char* */ 10097c478bd9Sstevel@tonic-gate 10107c478bd9Sstevel@tonic-gate /* encoded as <MULTIBYTE_CHAR|ZERO_OR_MORE><multibyte_char> */ 10117c478bd9Sstevel@tonic-gate 10127c478bd9Sstevel@tonic-gate regexp++; 10137c478bd9Sstevel@tonic-gate regex_char_size = get_wchar(®ex_wchar, regexp); 10147c478bd9Sstevel@tonic-gate repeat_startp = stringp; 10157c478bd9Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 10167c478bd9Sstevel@tonic-gate while ((string_char_size > 0) && 10177c478bd9Sstevel@tonic-gate (string_wchar == regex_wchar)) { 10187c478bd9Sstevel@tonic-gate stringp += string_char_size; 10197c478bd9Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 10207c478bd9Sstevel@tonic-gate } 10217c478bd9Sstevel@tonic-gate regexp += regex_char_size; 10227c478bd9Sstevel@tonic-gate return (test_repeated_multibyte_char(repeat_startp, stringp, 10237c478bd9Sstevel@tonic-gate regexp)); 10247c478bd9Sstevel@tonic-gate 10257c478bd9Sstevel@tonic-gate /* end case MULTIBYTE_CHAR|ZERO_OR_MORE */ 10267c478bd9Sstevel@tonic-gate 10277c478bd9Sstevel@tonic-gate case MULTIBYTE_CHAR|ONE_OR_MORE: /* char+ */ 10287c478bd9Sstevel@tonic-gate 10297c478bd9Sstevel@tonic-gate /* encoded as <MULTIBYTE_CHAR|ONE_OR_MORE><multibyte_char> */ 10307c478bd9Sstevel@tonic-gate 10317c478bd9Sstevel@tonic-gate regexp++; 10327c478bd9Sstevel@tonic-gate regex_char_size = get_wchar(®ex_wchar, regexp); 10337c478bd9Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 10347c478bd9Sstevel@tonic-gate if ((string_char_size <= 0) || (string_wchar != regex_wchar)) { 10357c478bd9Sstevel@tonic-gate return ((char *)0); 10367c478bd9Sstevel@tonic-gate } else { 10377c478bd9Sstevel@tonic-gate stringp += string_char_size; 10387c478bd9Sstevel@tonic-gate repeat_startp = stringp; 10397c478bd9Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 10407c478bd9Sstevel@tonic-gate while ((string_char_size > 0) && 10417c478bd9Sstevel@tonic-gate (string_wchar == regex_wchar)) { 10427c478bd9Sstevel@tonic-gate stringp += string_char_size; 10437c478bd9Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 10447c478bd9Sstevel@tonic-gate } 10457c478bd9Sstevel@tonic-gate regexp += regex_char_size; 10467c478bd9Sstevel@tonic-gate return (test_repeated_multibyte_char(repeat_startp, stringp, 10477c478bd9Sstevel@tonic-gate regexp)); 10487c478bd9Sstevel@tonic-gate } 10497c478bd9Sstevel@tonic-gate /* end case MULTIBYTE_CHAR|ONE_OR_MORE */ 10507c478bd9Sstevel@tonic-gate 10517c478bd9Sstevel@tonic-gate case MULTIBYTE_CHAR|COUNT: /* char{min_count,max_count} */ 10527c478bd9Sstevel@tonic-gate 10537c478bd9Sstevel@tonic-gate /* 10547c478bd9Sstevel@tonic-gate * encoded as <MULTIBYTE_CHAR|COUNT><multibyte_char>\ 10557c478bd9Sstevel@tonic-gate * <minimum_match_count><maximum_match_count> 10567c478bd9Sstevel@tonic-gate */ 10577c478bd9Sstevel@tonic-gate 10587c478bd9Sstevel@tonic-gate regexp++; 10597c478bd9Sstevel@tonic-gate regex_char_size = get_wchar(®ex_wchar, regexp); 10607c478bd9Sstevel@tonic-gate get_match_counts(&nmust_match, &nextra_matches_allowed, 10617c478bd9Sstevel@tonic-gate regexp + regex_char_size); 10627c478bd9Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 10637c478bd9Sstevel@tonic-gate while ((string_char_size > 0) && 10647c478bd9Sstevel@tonic-gate (string_wchar == regex_wchar) && 10657c478bd9Sstevel@tonic-gate (nmust_match > 0)) { 10667c478bd9Sstevel@tonic-gate 10677c478bd9Sstevel@tonic-gate nmust_match--; 10687c478bd9Sstevel@tonic-gate stringp += string_char_size; 10697c478bd9Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 10707c478bd9Sstevel@tonic-gate } 10717c478bd9Sstevel@tonic-gate if (nmust_match > 0) { 10727c478bd9Sstevel@tonic-gate return ((char *)0); 10737c478bd9Sstevel@tonic-gate } else if (nextra_matches_allowed == UNLIMITED) { 10747c478bd9Sstevel@tonic-gate repeat_startp = stringp; 10757c478bd9Sstevel@tonic-gate while ((string_char_size > 0) && 10767c478bd9Sstevel@tonic-gate (string_wchar == regex_wchar)) { 10777c478bd9Sstevel@tonic-gate stringp += string_char_size; 10787c478bd9Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 10797c478bd9Sstevel@tonic-gate } 10807c478bd9Sstevel@tonic-gate regexp += regex_char_size + 2; 10817c478bd9Sstevel@tonic-gate return (test_repeated_multibyte_char(repeat_startp, stringp, 10827c478bd9Sstevel@tonic-gate regexp)); 10837c478bd9Sstevel@tonic-gate } else { 10847c478bd9Sstevel@tonic-gate repeat_startp = stringp; 10857c478bd9Sstevel@tonic-gate while ((string_char_size > 0) && 10867c478bd9Sstevel@tonic-gate (string_wchar == regex_wchar) && 10877c478bd9Sstevel@tonic-gate (nextra_matches_allowed > 0)) { 10887c478bd9Sstevel@tonic-gate nextra_matches_allowed--; 10897c478bd9Sstevel@tonic-gate stringp += string_char_size; 10907c478bd9Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 10917c478bd9Sstevel@tonic-gate } 10927c478bd9Sstevel@tonic-gate regexp += regex_char_size + 2; 10937c478bd9Sstevel@tonic-gate return (test_repeated_multibyte_char(repeat_startp, stringp, 10947c478bd9Sstevel@tonic-gate regexp)); 10957c478bd9Sstevel@tonic-gate } 10967c478bd9Sstevel@tonic-gate /* end case MULTIBYTE_CHAR|COUNT */ 10977c478bd9Sstevel@tonic-gate 10987c478bd9Sstevel@tonic-gate case ANY_CHAR|ZERO_OR_MORE: /* .* */ 10997c478bd9Sstevel@tonic-gate 11007c478bd9Sstevel@tonic-gate /* encoded as <ANY_CHAR|ZERO_OR_MORE> */ 11017c478bd9Sstevel@tonic-gate 11027c478bd9Sstevel@tonic-gate repeat_startp = stringp; 11037c478bd9Sstevel@tonic-gate if (!multibyte) { 11047c478bd9Sstevel@tonic-gate while (*stringp != '\0') { 11057c478bd9Sstevel@tonic-gate stringp++; 11067c478bd9Sstevel@tonic-gate } 11077c478bd9Sstevel@tonic-gate regexp++; 11087c478bd9Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp, 11097c478bd9Sstevel@tonic-gate regexp)); 11107c478bd9Sstevel@tonic-gate } else { 11117c478bd9Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 11127c478bd9Sstevel@tonic-gate while (string_char_size > 0) { 11137c478bd9Sstevel@tonic-gate stringp += string_char_size; 11147c478bd9Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 11157c478bd9Sstevel@tonic-gate } 11167c478bd9Sstevel@tonic-gate regexp++; 11177c478bd9Sstevel@tonic-gate return (test_repeated_multibyte_char(repeat_startp, stringp, 11187c478bd9Sstevel@tonic-gate regexp)); 11197c478bd9Sstevel@tonic-gate } 11207c478bd9Sstevel@tonic-gate /* end case <ANY_CHAR|ZERO_OR_MORE> */ 11217c478bd9Sstevel@tonic-gate 11227c478bd9Sstevel@tonic-gate case ANY_CHAR|ONE_OR_MORE: /* .+ */ 11237c478bd9Sstevel@tonic-gate 11247c478bd9Sstevel@tonic-gate /* encoded as <ANY_CHAR|ONE_OR_MORE> */ 11257c478bd9Sstevel@tonic-gate 11267c478bd9Sstevel@tonic-gate if (!multibyte) { 11277c478bd9Sstevel@tonic-gate if (*stringp == '\0') { 11287c478bd9Sstevel@tonic-gate return ((char *)0); 11297c478bd9Sstevel@tonic-gate } else { 11307c478bd9Sstevel@tonic-gate stringp++; 11317c478bd9Sstevel@tonic-gate repeat_startp = stringp; 11327c478bd9Sstevel@tonic-gate while (*stringp != '\0') { 11337c478bd9Sstevel@tonic-gate stringp++; 11347c478bd9Sstevel@tonic-gate } 11357c478bd9Sstevel@tonic-gate regexp++; 11367c478bd9Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp, 11377c478bd9Sstevel@tonic-gate regexp)); 11387c478bd9Sstevel@tonic-gate } 11397c478bd9Sstevel@tonic-gate } else { 11407c478bd9Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 11417c478bd9Sstevel@tonic-gate if (string_char_size <= 0) { 11427c478bd9Sstevel@tonic-gate return ((char *)0); 11437c478bd9Sstevel@tonic-gate } else { 11447c478bd9Sstevel@tonic-gate stringp += string_char_size; 11457c478bd9Sstevel@tonic-gate repeat_startp = stringp; 11467c478bd9Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 11477c478bd9Sstevel@tonic-gate while (string_char_size > 0) { 11487c478bd9Sstevel@tonic-gate stringp += string_char_size; 11497c478bd9Sstevel@tonic-gate string_char_size = 11507c478bd9Sstevel@tonic-gate get_wchar(&string_wchar, stringp); 11517c478bd9Sstevel@tonic-gate } 11527c478bd9Sstevel@tonic-gate regexp++; 11537c478bd9Sstevel@tonic-gate return (test_repeated_multibyte_char(repeat_startp, 11547c478bd9Sstevel@tonic-gate stringp, regexp)); 11557c478bd9Sstevel@tonic-gate } 11567c478bd9Sstevel@tonic-gate } 11577c478bd9Sstevel@tonic-gate /* end case <ANY_CHAR|ONE_OR_MORE> */ 11587c478bd9Sstevel@tonic-gate 11597c478bd9Sstevel@tonic-gate case ANY_CHAR|COUNT: /* .{min_count,max_count} */ 11607c478bd9Sstevel@tonic-gate 11617c478bd9Sstevel@tonic-gate /* 11627c478bd9Sstevel@tonic-gate * encoded as <ANY_CHAR|COUNT>\ 11637c478bd9Sstevel@tonic-gate * <minimum_match_count><maximum_match_count> 11647c478bd9Sstevel@tonic-gate */ 11657c478bd9Sstevel@tonic-gate 11667c478bd9Sstevel@tonic-gate get_match_counts(&nmust_match, &nextra_matches_allowed, 11677c478bd9Sstevel@tonic-gate regexp + 1); 11687c478bd9Sstevel@tonic-gate if (!multibyte) { 11697c478bd9Sstevel@tonic-gate while ((*stringp != '\0') && (nmust_match > 0)) { 11707c478bd9Sstevel@tonic-gate nmust_match--; 11717c478bd9Sstevel@tonic-gate stringp++; 11727c478bd9Sstevel@tonic-gate } 11737c478bd9Sstevel@tonic-gate if (nmust_match > 0) { 11747c478bd9Sstevel@tonic-gate return ((char *)0); 11757c478bd9Sstevel@tonic-gate } else if (nextra_matches_allowed == UNLIMITED) { 11767c478bd9Sstevel@tonic-gate repeat_startp = stringp; 11777c478bd9Sstevel@tonic-gate while (*stringp != '\0') { 11787c478bd9Sstevel@tonic-gate stringp++; 11797c478bd9Sstevel@tonic-gate } 11807c478bd9Sstevel@tonic-gate regexp += 3; 11817c478bd9Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp, 11827c478bd9Sstevel@tonic-gate regexp)); 11837c478bd9Sstevel@tonic-gate } else { 11847c478bd9Sstevel@tonic-gate repeat_startp = stringp; 11857c478bd9Sstevel@tonic-gate while ((*stringp != '\0') && 11867c478bd9Sstevel@tonic-gate (nextra_matches_allowed > 0)) { 11877c478bd9Sstevel@tonic-gate nextra_matches_allowed--; 11887c478bd9Sstevel@tonic-gate stringp++; 11897c478bd9Sstevel@tonic-gate } 11907c478bd9Sstevel@tonic-gate regexp += 3; 11917c478bd9Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp, 11927c478bd9Sstevel@tonic-gate regexp)); 11937c478bd9Sstevel@tonic-gate } 11947c478bd9Sstevel@tonic-gate } else { /* multibyte character */ 11957c478bd9Sstevel@tonic-gate 11967c478bd9Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 11977c478bd9Sstevel@tonic-gate while ((string_char_size > 0) && (nmust_match > 0)) { 11987c478bd9Sstevel@tonic-gate nmust_match--; 11997c478bd9Sstevel@tonic-gate stringp += string_char_size; 12007c478bd9Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 12017c478bd9Sstevel@tonic-gate } 12027c478bd9Sstevel@tonic-gate if (nmust_match > 0) { 12037c478bd9Sstevel@tonic-gate return ((char *)0); 12047c478bd9Sstevel@tonic-gate } else if (nextra_matches_allowed == UNLIMITED) { 12057c478bd9Sstevel@tonic-gate repeat_startp = stringp; 12067c478bd9Sstevel@tonic-gate while (string_char_size > 0) { 12077c478bd9Sstevel@tonic-gate stringp += string_char_size; 12087c478bd9Sstevel@tonic-gate string_char_size = 12097c478bd9Sstevel@tonic-gate get_wchar(&string_wchar, stringp); 12107c478bd9Sstevel@tonic-gate } 12117c478bd9Sstevel@tonic-gate regexp += 3; 12127c478bd9Sstevel@tonic-gate return (test_repeated_multibyte_char(repeat_startp, 12137c478bd9Sstevel@tonic-gate stringp, regexp)); 12147c478bd9Sstevel@tonic-gate } else { 12157c478bd9Sstevel@tonic-gate repeat_startp = stringp; 12167c478bd9Sstevel@tonic-gate while ((string_char_size > 0) && 12177c478bd9Sstevel@tonic-gate (nextra_matches_allowed > 0)) { 12187c478bd9Sstevel@tonic-gate nextra_matches_allowed--; 12197c478bd9Sstevel@tonic-gate stringp += string_char_size; 12207c478bd9Sstevel@tonic-gate string_char_size = 12217c478bd9Sstevel@tonic-gate get_wchar(&string_wchar, stringp); 12227c478bd9Sstevel@tonic-gate } 12237c478bd9Sstevel@tonic-gate regexp += 3; 12247c478bd9Sstevel@tonic-gate return (test_repeated_multibyte_char(repeat_startp, 12257c478bd9Sstevel@tonic-gate stringp, regexp)); 12267c478bd9Sstevel@tonic-gate } 12277c478bd9Sstevel@tonic-gate } /* end case ANY_CHAR|COUNT */ 12287c478bd9Sstevel@tonic-gate 12297c478bd9Sstevel@tonic-gate case IN_ASCII_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */ 12307c478bd9Sstevel@tonic-gate case NOT_IN_ASCII_CHAR_CLASS|ZERO_OR_MORE: 12317c478bd9Sstevel@tonic-gate 12327c478bd9Sstevel@tonic-gate /* 12337c478bd9Sstevel@tonic-gate * encoded as <IN_ASCII_CHAR_CLASS|ZERO_OR_MORE>\ 12347c478bd9Sstevel@tonic-gate * <class_length><class ...> 12357c478bd9Sstevel@tonic-gate * or <NOT_IN_ASCII_CHAR_CLASS|ZERO_OR_MORE>\ 12367c478bd9Sstevel@tonic-gate * <class_length><class ...> 12377c478bd9Sstevel@tonic-gate * 12387c478bd9Sstevel@tonic-gate * NOTE: <class_length> includes the <class_length> byte 12397c478bd9Sstevel@tonic-gate */ 12407c478bd9Sstevel@tonic-gate 12417c478bd9Sstevel@tonic-gate if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|ZERO_OR_MORE)) { 12427c478bd9Sstevel@tonic-gate test_condition = IN_CLASS; 12437c478bd9Sstevel@tonic-gate } else { 12447c478bd9Sstevel@tonic-gate test_condition = NOT_IN_CLASS; 12457c478bd9Sstevel@tonic-gate } 12467c478bd9Sstevel@tonic-gate regexp++; /* point to the <class_length> byte */ 12477c478bd9Sstevel@tonic-gate 12487c478bd9Sstevel@tonic-gate repeat_startp = stringp; 12497c478bd9Sstevel@tonic-gate while ((*stringp != '\0') && 12507c478bd9Sstevel@tonic-gate (test_char_against_ascii_class(*stringp, regexp, 12517c478bd9Sstevel@tonic-gate test_condition) == CONDITION_TRUE)) { 12527c478bd9Sstevel@tonic-gate stringp++; 12537c478bd9Sstevel@tonic-gate } 12547c478bd9Sstevel@tonic-gate regexp += (int)*regexp; /* add the class length to regexp */ 12557c478bd9Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp, 12567c478bd9Sstevel@tonic-gate regexp)); 12577c478bd9Sstevel@tonic-gate 12587c478bd9Sstevel@tonic-gate /* end case IN_ASCII_CHAR_CLASS|ZERO_OR_MORE */ 12597c478bd9Sstevel@tonic-gate 12607c478bd9Sstevel@tonic-gate case IN_ASCII_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */ 12617c478bd9Sstevel@tonic-gate case NOT_IN_ASCII_CHAR_CLASS|ONE_OR_MORE: 12627c478bd9Sstevel@tonic-gate 12637c478bd9Sstevel@tonic-gate /* 12647c478bd9Sstevel@tonic-gate * encoded as <IN_ASCII_CHAR_CLASS|ONE_OR_MORE>\ 12657c478bd9Sstevel@tonic-gate * <class_length><class ...> 12667c478bd9Sstevel@tonic-gate * or <NOT_IN_ASCII_CHAR_CLASS|ONE_OR_MORE>\ 12677c478bd9Sstevel@tonic-gate * <class_length><class ...> 12687c478bd9Sstevel@tonic-gate * 12697c478bd9Sstevel@tonic-gate * NOTE: <class_length> includes the <class_length> byte 12707c478bd9Sstevel@tonic-gate */ 12717c478bd9Sstevel@tonic-gate 12727c478bd9Sstevel@tonic-gate if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|ONE_OR_MORE)) { 12737c478bd9Sstevel@tonic-gate test_condition = IN_CLASS; 12747c478bd9Sstevel@tonic-gate } else { 12757c478bd9Sstevel@tonic-gate test_condition = NOT_IN_CLASS; 12767c478bd9Sstevel@tonic-gate } 12777c478bd9Sstevel@tonic-gate regexp++; /* point to the <class_length> byte */ 12787c478bd9Sstevel@tonic-gate 12797c478bd9Sstevel@tonic-gate if ((*stringp == '\0') || 12807c478bd9Sstevel@tonic-gate (test_char_against_ascii_class(*stringp, regexp, 12817c478bd9Sstevel@tonic-gate test_condition) != CONDITION_TRUE)) { 12827c478bd9Sstevel@tonic-gate return ((char *)0); 12837c478bd9Sstevel@tonic-gate } else { 12847c478bd9Sstevel@tonic-gate stringp++; 12857c478bd9Sstevel@tonic-gate repeat_startp = stringp; 12867c478bd9Sstevel@tonic-gate while ((*stringp != '\0') && 12877c478bd9Sstevel@tonic-gate (test_char_against_ascii_class(*stringp, regexp, 12887c478bd9Sstevel@tonic-gate test_condition) == CONDITION_TRUE)) { 12897c478bd9Sstevel@tonic-gate stringp++; 12907c478bd9Sstevel@tonic-gate } 12917c478bd9Sstevel@tonic-gate regexp += (int)*regexp; /* add the class length to regexp */ 12927c478bd9Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp, 12937c478bd9Sstevel@tonic-gate regexp)); 12947c478bd9Sstevel@tonic-gate } 12957c478bd9Sstevel@tonic-gate /* end case IN_ASCII_CHAR_CLASS|ONE_OR_MORE */ 12967c478bd9Sstevel@tonic-gate 12977c478bd9Sstevel@tonic-gate case IN_ASCII_CHAR_CLASS | COUNT: /* [.....]{max_count,min_count} */ 12987c478bd9Sstevel@tonic-gate case NOT_IN_ASCII_CHAR_CLASS | COUNT: 12997c478bd9Sstevel@tonic-gate 13007c478bd9Sstevel@tonic-gate /* 13017c478bd9Sstevel@tonic-gate * endoded as <IN_ASCII_CHAR_CLASS|COUNT><class_length>\ 13027c478bd9Sstevel@tonic-gate * <class ...><minimum_match_count>\ 13037c478bd9Sstevel@tonic-gate * <maximum_match_count> 13047c478bd9Sstevel@tonic-gate * or <NOT_IN_ASCII_CHAR_CLASS|COUNT><class_length>\ 13057c478bd9Sstevel@tonic-gate * <class ...><minimum_match_count>\ 13067c478bd9Sstevel@tonic-gate * <maximum_match_count> 13077c478bd9Sstevel@tonic-gate * 13087c478bd9Sstevel@tonic-gate * NOTE: <class_length> includes the <class_length> byte, 13097c478bd9Sstevel@tonic-gate * but not the <minimum_match_count> or 13107c478bd9Sstevel@tonic-gate * <maximum_match_count> bytes 13117c478bd9Sstevel@tonic-gate */ 13127c478bd9Sstevel@tonic-gate 13137c478bd9Sstevel@tonic-gate if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|COUNT)) { 13147c478bd9Sstevel@tonic-gate test_condition = IN_CLASS; 13157c478bd9Sstevel@tonic-gate } else { 13167c478bd9Sstevel@tonic-gate test_condition = NOT_IN_CLASS; 13177c478bd9Sstevel@tonic-gate } 13187c478bd9Sstevel@tonic-gate regexp++; /* point to the <class_length> byte */ 13197c478bd9Sstevel@tonic-gate 13207c478bd9Sstevel@tonic-gate get_match_counts(&nmust_match, &nextra_matches_allowed, 13217c478bd9Sstevel@tonic-gate regexp + (int)*regexp); 13227c478bd9Sstevel@tonic-gate while ((*stringp != '\0') && 13237c478bd9Sstevel@tonic-gate (test_char_against_ascii_class(*stringp, regexp, 13247c478bd9Sstevel@tonic-gate test_condition) == CONDITION_TRUE) && 13257c478bd9Sstevel@tonic-gate (nmust_match > 0)) { 13267c478bd9Sstevel@tonic-gate nmust_match--; 13277c478bd9Sstevel@tonic-gate stringp++; 13287c478bd9Sstevel@tonic-gate } 13297c478bd9Sstevel@tonic-gate if (nmust_match > 0) { 13307c478bd9Sstevel@tonic-gate return ((char *)0); 13317c478bd9Sstevel@tonic-gate } else if (nextra_matches_allowed == UNLIMITED) { 13327c478bd9Sstevel@tonic-gate repeat_startp = stringp; 13337c478bd9Sstevel@tonic-gate while ((*stringp != '\0') && 13347c478bd9Sstevel@tonic-gate (test_char_against_ascii_class(*stringp, regexp, 13357c478bd9Sstevel@tonic-gate test_condition) == CONDITION_TRUE)) { 13367c478bd9Sstevel@tonic-gate stringp++; 13377c478bd9Sstevel@tonic-gate } 13387c478bd9Sstevel@tonic-gate regexp += (int)*regexp + 2; 13397c478bd9Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp, 13407c478bd9Sstevel@tonic-gate regexp)); 13417c478bd9Sstevel@tonic-gate } else { 13427c478bd9Sstevel@tonic-gate repeat_startp = stringp; 13437c478bd9Sstevel@tonic-gate while ((*stringp != '\0') && 13447c478bd9Sstevel@tonic-gate (test_char_against_ascii_class(*stringp, regexp, 13457c478bd9Sstevel@tonic-gate test_condition) == CONDITION_TRUE) && 13467c478bd9Sstevel@tonic-gate (nextra_matches_allowed > 0)) { 13477c478bd9Sstevel@tonic-gate nextra_matches_allowed--; 13487c478bd9Sstevel@tonic-gate stringp++; 13497c478bd9Sstevel@tonic-gate } 13507c478bd9Sstevel@tonic-gate regexp += (int)*regexp + 2; 13517c478bd9Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp, 13527c478bd9Sstevel@tonic-gate regexp)); 13537c478bd9Sstevel@tonic-gate } 13547c478bd9Sstevel@tonic-gate /* end case IN_ASCII_CHAR_CLASS|COUNT */ 13557c478bd9Sstevel@tonic-gate 13567c478bd9Sstevel@tonic-gate case IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */ 13577c478bd9Sstevel@tonic-gate case NOT_IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE: 13587c478bd9Sstevel@tonic-gate 13597c478bd9Sstevel@tonic-gate /* 13607c478bd9Sstevel@tonic-gate * encoded as <IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE>\ 13617c478bd9Sstevel@tonic-gate * <class_length><class ...> 13627c478bd9Sstevel@tonic-gate * or <NOT_IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE>\ 13637c478bd9Sstevel@tonic-gate * <class_length><class ...> 13647c478bd9Sstevel@tonic-gate * 13657c478bd9Sstevel@tonic-gate * NOTE: <class_length> includes the <class_length> byte 13667c478bd9Sstevel@tonic-gate */ 13677c478bd9Sstevel@tonic-gate 13687c478bd9Sstevel@tonic-gate if ((int)*regexp == 13697c478bd9Sstevel@tonic-gate (int)(IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE)) { 13707c478bd9Sstevel@tonic-gate test_condition = IN_CLASS; 13717c478bd9Sstevel@tonic-gate } else { 13727c478bd9Sstevel@tonic-gate test_condition = NOT_IN_CLASS; 13737c478bd9Sstevel@tonic-gate } 13747c478bd9Sstevel@tonic-gate regexp++; /* point to the <class_length> byte */ 13757c478bd9Sstevel@tonic-gate 13767c478bd9Sstevel@tonic-gate repeat_startp = stringp; 13777c478bd9Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 13787c478bd9Sstevel@tonic-gate while ((string_char_size > 0) && 13797c478bd9Sstevel@tonic-gate (test_char_against_multibyte_class(string_wchar, regexp, 13807c478bd9Sstevel@tonic-gate test_condition) == CONDITION_TRUE)) { 13817c478bd9Sstevel@tonic-gate stringp += string_char_size; 13827c478bd9Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 13837c478bd9Sstevel@tonic-gate } 13847c478bd9Sstevel@tonic-gate regexp += (int)*regexp; /* add the class length to regexp */ 13857c478bd9Sstevel@tonic-gate return (test_repeated_multibyte_char(repeat_startp, stringp, 13867c478bd9Sstevel@tonic-gate regexp)); 13877c478bd9Sstevel@tonic-gate 13887c478bd9Sstevel@tonic-gate /* end case IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE */ 13897c478bd9Sstevel@tonic-gate 13907c478bd9Sstevel@tonic-gate case IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */ 13917c478bd9Sstevel@tonic-gate case NOT_IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE: 13927c478bd9Sstevel@tonic-gate 13937c478bd9Sstevel@tonic-gate /* 13947c478bd9Sstevel@tonic-gate * encoded as <IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE>\ 13957c478bd9Sstevel@tonic-gate * <class_length><class ...> 13967c478bd9Sstevel@tonic-gate * or <NOT_IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE>\ 13977c478bd9Sstevel@tonic-gate * <class_length><class ...> 13987c478bd9Sstevel@tonic-gate * 13997c478bd9Sstevel@tonic-gate * NOTE: <class_length> includes the <class_length> byte 14007c478bd9Sstevel@tonic-gate */ 14017c478bd9Sstevel@tonic-gate 14027c478bd9Sstevel@tonic-gate if ((int)*regexp == 14037c478bd9Sstevel@tonic-gate (int)(IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE)) { 14047c478bd9Sstevel@tonic-gate test_condition = IN_CLASS; 14057c478bd9Sstevel@tonic-gate } else { 14067c478bd9Sstevel@tonic-gate test_condition = NOT_IN_CLASS; 14077c478bd9Sstevel@tonic-gate } 14087c478bd9Sstevel@tonic-gate regexp++; /* point to the <class_length> byte */ 14097c478bd9Sstevel@tonic-gate 14107c478bd9Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 14117c478bd9Sstevel@tonic-gate if ((string_char_size <= 0) || 14127c478bd9Sstevel@tonic-gate (test_char_against_multibyte_class(string_wchar, regexp, 14137c478bd9Sstevel@tonic-gate test_condition) != CONDITION_TRUE)) { 14147c478bd9Sstevel@tonic-gate return ((char *)0); 14157c478bd9Sstevel@tonic-gate } else { 14167c478bd9Sstevel@tonic-gate stringp += string_char_size; 14177c478bd9Sstevel@tonic-gate repeat_startp = stringp; 14187c478bd9Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 14197c478bd9Sstevel@tonic-gate while ((string_char_size > 0) && 14207c478bd9Sstevel@tonic-gate (test_char_against_multibyte_class(string_wchar, 14217c478bd9Sstevel@tonic-gate regexp, test_condition) == CONDITION_TRUE)) { 14227c478bd9Sstevel@tonic-gate stringp += string_char_size; 14237c478bd9Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 14247c478bd9Sstevel@tonic-gate } 14257c478bd9Sstevel@tonic-gate regexp += (int)*regexp; /* add the class length to regexp */ 14267c478bd9Sstevel@tonic-gate return (test_repeated_multibyte_char(repeat_startp, stringp, 14277c478bd9Sstevel@tonic-gate regexp)); 14287c478bd9Sstevel@tonic-gate } 14297c478bd9Sstevel@tonic-gate /* end case IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE */ 14307c478bd9Sstevel@tonic-gate 14317c478bd9Sstevel@tonic-gate case IN_MULTIBYTE_CHAR_CLASS|COUNT: /* [...]{min_count,max_count} */ 14327c478bd9Sstevel@tonic-gate case NOT_IN_MULTIBYTE_CHAR_CLASS|COUNT: 14337c478bd9Sstevel@tonic-gate 14347c478bd9Sstevel@tonic-gate /* 14357c478bd9Sstevel@tonic-gate * encoded as <IN_MULTIBYTE_CHAR_CLASS|COUNT>\ 14367c478bd9Sstevel@tonic-gate * <class_length><class ...><min_count><max_count> 14377c478bd9Sstevel@tonic-gate * or <NOT_IN_MULTIBYTE_CHAR_CLASS|COUNT>\ 14387c478bd9Sstevel@tonic-gate * <class_length><class ...><min_count><max_count> 14397c478bd9Sstevel@tonic-gate * 14407c478bd9Sstevel@tonic-gate * NOTE: <class_length> includes the <class_length> byte 14417c478bd9Sstevel@tonic-gate * but not the <minimum_match_count> or 14427c478bd9Sstevel@tonic-gate * <maximum_match_count> bytes 14437c478bd9Sstevel@tonic-gate */ 14447c478bd9Sstevel@tonic-gate 14457c478bd9Sstevel@tonic-gate if ((int)*regexp == (int)(IN_MULTIBYTE_CHAR_CLASS|COUNT)) { 14467c478bd9Sstevel@tonic-gate test_condition = IN_CLASS; 14477c478bd9Sstevel@tonic-gate } else { 14487c478bd9Sstevel@tonic-gate test_condition = NOT_IN_CLASS; 14497c478bd9Sstevel@tonic-gate } 14507c478bd9Sstevel@tonic-gate regexp++; /* point to the <class_length> byte */ 14517c478bd9Sstevel@tonic-gate 14527c478bd9Sstevel@tonic-gate get_match_counts(&nmust_match, &nextra_matches_allowed, 14537c478bd9Sstevel@tonic-gate regexp + (int)*regexp); 14547c478bd9Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 14557c478bd9Sstevel@tonic-gate while ((string_char_size > 0) && 14567c478bd9Sstevel@tonic-gate (test_char_against_multibyte_class(string_wchar, regexp, 14577c478bd9Sstevel@tonic-gate test_condition) == CONDITION_TRUE) && 14587c478bd9Sstevel@tonic-gate (nmust_match > 0)) { 14597c478bd9Sstevel@tonic-gate nmust_match--; 14607c478bd9Sstevel@tonic-gate stringp += string_char_size; 14617c478bd9Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 14627c478bd9Sstevel@tonic-gate } 14637c478bd9Sstevel@tonic-gate if (nmust_match > 0) { 14647c478bd9Sstevel@tonic-gate return ((char *)0); 14657c478bd9Sstevel@tonic-gate } else if (nextra_matches_allowed == UNLIMITED) { 14667c478bd9Sstevel@tonic-gate repeat_startp = stringp; 14677c478bd9Sstevel@tonic-gate while ((string_char_size > 0) && 14687c478bd9Sstevel@tonic-gate (test_char_against_multibyte_class(string_wchar, 14697c478bd9Sstevel@tonic-gate regexp, test_condition) == CONDITION_TRUE)) { 14707c478bd9Sstevel@tonic-gate stringp += string_char_size; 14717c478bd9Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 14727c478bd9Sstevel@tonic-gate } 14737c478bd9Sstevel@tonic-gate regexp += (int)*regexp + 2; 14747c478bd9Sstevel@tonic-gate return (test_repeated_multibyte_char(repeat_startp, stringp, 14757c478bd9Sstevel@tonic-gate regexp)); 14767c478bd9Sstevel@tonic-gate } else { 14777c478bd9Sstevel@tonic-gate repeat_startp = stringp; 14787c478bd9Sstevel@tonic-gate while ((string_char_size > 0) && 14797c478bd9Sstevel@tonic-gate (test_char_against_multibyte_class(string_wchar, 14807c478bd9Sstevel@tonic-gate regexp, test_condition) == CONDITION_TRUE) && 14817c478bd9Sstevel@tonic-gate (nextra_matches_allowed > 0)) { 14827c478bd9Sstevel@tonic-gate nextra_matches_allowed--; 14837c478bd9Sstevel@tonic-gate stringp += string_char_size; 14847c478bd9Sstevel@tonic-gate string_char_size = get_wchar(&string_wchar, stringp); 14857c478bd9Sstevel@tonic-gate } 14867c478bd9Sstevel@tonic-gate regexp += (int)*regexp + 2; 14877c478bd9Sstevel@tonic-gate return (test_repeated_multibyte_char(repeat_startp, stringp, 14887c478bd9Sstevel@tonic-gate regexp)); 14897c478bd9Sstevel@tonic-gate } 14907c478bd9Sstevel@tonic-gate /* end case IN_MULTIBYTE_CHAR_CLASS|COUNT */ 14917c478bd9Sstevel@tonic-gate 14927c478bd9Sstevel@tonic-gate case IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */ 14937c478bd9Sstevel@tonic-gate case NOT_IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE: 14947c478bd9Sstevel@tonic-gate 14957c478bd9Sstevel@tonic-gate /* 14967c478bd9Sstevel@tonic-gate * encoded as <IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE>\ 14977c478bd9Sstevel@tonic-gate * <class_length><class ...> 14987c478bd9Sstevel@tonic-gate * or <NOT_IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE>\ 14997c478bd9Sstevel@tonic-gate * <class_length><class ...> 15007c478bd9Sstevel@tonic-gate * 15017c478bd9Sstevel@tonic-gate * NOTE: <class_length> includes the <class_length> byte 15027c478bd9Sstevel@tonic-gate */ 15037c478bd9Sstevel@tonic-gate 15047c478bd9Sstevel@tonic-gate if ((int)*regexp == 15057c478bd9Sstevel@tonic-gate (int)(IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE)) { 15067c478bd9Sstevel@tonic-gate test_condition = IN_CLASS; 15077c478bd9Sstevel@tonic-gate } else { 15087c478bd9Sstevel@tonic-gate test_condition = NOT_IN_CLASS; 15097c478bd9Sstevel@tonic-gate } 15107c478bd9Sstevel@tonic-gate regexp++; /* point to the <class_length> byte */ 15117c478bd9Sstevel@tonic-gate 15127c478bd9Sstevel@tonic-gate repeat_startp = stringp; 15137c478bd9Sstevel@tonic-gate while ((*stringp != '\0') && 15147c478bd9Sstevel@tonic-gate (test_char_against_old_ascii_class(*stringp, regexp, 15157c478bd9Sstevel@tonic-gate test_condition) == CONDITION_TRUE)) { 15167c478bd9Sstevel@tonic-gate stringp++; 15177c478bd9Sstevel@tonic-gate } 15187c478bd9Sstevel@tonic-gate regexp += (int)*regexp; /* add the class length to regexp */ 15197c478bd9Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp, 15207c478bd9Sstevel@tonic-gate regexp)); 15217c478bd9Sstevel@tonic-gate 15227c478bd9Sstevel@tonic-gate /* end case IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE */ 15237c478bd9Sstevel@tonic-gate 15247c478bd9Sstevel@tonic-gate case IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */ 15257c478bd9Sstevel@tonic-gate case NOT_IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE: 15267c478bd9Sstevel@tonic-gate 15277c478bd9Sstevel@tonic-gate /* 15287c478bd9Sstevel@tonic-gate * encoded as <IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE>\ 15297c478bd9Sstevel@tonic-gate * <class_length><class ...> 15307c478bd9Sstevel@tonic-gate * or <NOT_IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE>\ 15317c478bd9Sstevel@tonic-gate * <class_length><class ...> 15327c478bd9Sstevel@tonic-gate * 15337c478bd9Sstevel@tonic-gate * NOTE: <class length> includes the <class_length> byte 15347c478bd9Sstevel@tonic-gate */ 15357c478bd9Sstevel@tonic-gate 15367c478bd9Sstevel@tonic-gate if ((int)*regexp == 15377c478bd9Sstevel@tonic-gate (int)(IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE)) { 15387c478bd9Sstevel@tonic-gate test_condition = IN_CLASS; 15397c478bd9Sstevel@tonic-gate } else { 15407c478bd9Sstevel@tonic-gate test_condition = NOT_IN_CLASS; 15417c478bd9Sstevel@tonic-gate } 15427c478bd9Sstevel@tonic-gate regexp++; /* point to the <class_length> byte */ 15437c478bd9Sstevel@tonic-gate 15447c478bd9Sstevel@tonic-gate if ((*stringp == '\0') || 15457c478bd9Sstevel@tonic-gate (test_char_against_old_ascii_class(*stringp, regexp, 15467c478bd9Sstevel@tonic-gate test_condition) != CONDITION_TRUE)) { 15477c478bd9Sstevel@tonic-gate return ((char *)0); 15487c478bd9Sstevel@tonic-gate } else { 15497c478bd9Sstevel@tonic-gate stringp++; 15507c478bd9Sstevel@tonic-gate repeat_startp = stringp; 15517c478bd9Sstevel@tonic-gate while ((*stringp != '\0') && 15527c478bd9Sstevel@tonic-gate (test_char_against_old_ascii_class(*stringp, regexp, 15537c478bd9Sstevel@tonic-gate test_condition) == CONDITION_TRUE)) { 15547c478bd9Sstevel@tonic-gate stringp++; 15557c478bd9Sstevel@tonic-gate } 15567c478bd9Sstevel@tonic-gate regexp += (int)*regexp; /* add the class length to regexp */ 15577c478bd9Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp, 15587c478bd9Sstevel@tonic-gate regexp)); 15597c478bd9Sstevel@tonic-gate } 15607c478bd9Sstevel@tonic-gate /* end case IN_OLD_ASCII_CHAR_CLASS | ONE_OR_MORE */ 15617c478bd9Sstevel@tonic-gate 15627c478bd9Sstevel@tonic-gate case IN_OLD_ASCII_CHAR_CLASS|COUNT: /* [...]{min_count,max_count} */ 15637c478bd9Sstevel@tonic-gate case NOT_IN_OLD_ASCII_CHAR_CLASS|COUNT: 15647c478bd9Sstevel@tonic-gate 15657c478bd9Sstevel@tonic-gate /* 15667c478bd9Sstevel@tonic-gate * encoded as <IN_OLD_ASCII_CHAR_CLASS|COUNT><class_length>\ 15677c478bd9Sstevel@tonic-gate * <class ...><minimum_match_count>\ 15687c478bd9Sstevel@tonic-gate * <maximum_match_count> 15697c478bd9Sstevel@tonic-gate * or <NOT_IN_OLD_ASCII_CHAR_CLASS|COUNT>\ 15707c478bd9Sstevel@tonic-gate * <class_length><class ...><minimum_match_count>\ 15717c478bd9Sstevel@tonic-gate * <maximum_match_count> 15727c478bd9Sstevel@tonic-gate * 15737c478bd9Sstevel@tonic-gate * NOTE: <class_length> includes the <class_length> byte 15747c478bd9Sstevel@tonic-gate * but not the <minimum_match_count> or 15757c478bd9Sstevel@tonic-gate * <maximum_match_count> bytes 15767c478bd9Sstevel@tonic-gate */ 15777c478bd9Sstevel@tonic-gate 15787c478bd9Sstevel@tonic-gate if ((int)*regexp == (int)(IN_OLD_ASCII_CHAR_CLASS|COUNT)) { 15797c478bd9Sstevel@tonic-gate test_condition = IN_CLASS; 15807c478bd9Sstevel@tonic-gate } else { 15817c478bd9Sstevel@tonic-gate test_condition = NOT_IN_CLASS; 15827c478bd9Sstevel@tonic-gate } 15837c478bd9Sstevel@tonic-gate regexp++; /* point to the <class_length> byte */ 15847c478bd9Sstevel@tonic-gate 15857c478bd9Sstevel@tonic-gate get_match_counts(&nmust_match, &nextra_matches_allowed, 15867c478bd9Sstevel@tonic-gate regexp + (int)*regexp); 15877c478bd9Sstevel@tonic-gate while ((*stringp != '\0') && 15887c478bd9Sstevel@tonic-gate (test_char_against_old_ascii_class(*stringp, regexp, 15897c478bd9Sstevel@tonic-gate test_condition) == CONDITION_TRUE) && 15907c478bd9Sstevel@tonic-gate (nmust_match > 0)) { 15917c478bd9Sstevel@tonic-gate nmust_match--; 15927c478bd9Sstevel@tonic-gate stringp++; 15937c478bd9Sstevel@tonic-gate } 15947c478bd9Sstevel@tonic-gate if (nmust_match > 0) { 15957c478bd9Sstevel@tonic-gate return ((char *)0); 15967c478bd9Sstevel@tonic-gate } else if (nextra_matches_allowed == UNLIMITED) { 15977c478bd9Sstevel@tonic-gate repeat_startp = stringp; 15987c478bd9Sstevel@tonic-gate while ((*stringp != '\0') && 15997c478bd9Sstevel@tonic-gate (test_char_against_old_ascii_class(*stringp, regexp, 16007c478bd9Sstevel@tonic-gate test_condition) == CONDITION_TRUE)) { 16017c478bd9Sstevel@tonic-gate stringp++; 16027c478bd9Sstevel@tonic-gate } 16037c478bd9Sstevel@tonic-gate regexp += (int)*regexp + 2; 16047c478bd9Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp, 16057c478bd9Sstevel@tonic-gate regexp)); 16067c478bd9Sstevel@tonic-gate } else { 16077c478bd9Sstevel@tonic-gate repeat_startp = stringp; 16087c478bd9Sstevel@tonic-gate while ((*stringp != '\0') && 16097c478bd9Sstevel@tonic-gate (test_char_against_old_ascii_class(*stringp, regexp, 16107c478bd9Sstevel@tonic-gate test_condition) == CONDITION_TRUE) && 16117c478bd9Sstevel@tonic-gate (nextra_matches_allowed > 0)) { 16127c478bd9Sstevel@tonic-gate nextra_matches_allowed--; 16137c478bd9Sstevel@tonic-gate stringp++; 16147c478bd9Sstevel@tonic-gate } 16157c478bd9Sstevel@tonic-gate regexp += (int)*regexp + 2; 16167c478bd9Sstevel@tonic-gate return (test_repeated_ascii_char(repeat_startp, stringp, 16177c478bd9Sstevel@tonic-gate regexp)); 16187c478bd9Sstevel@tonic-gate } 16197c478bd9Sstevel@tonic-gate /* end case IN_OLD_ASCII_CHAR_CLASS|COUNT */ 16207c478bd9Sstevel@tonic-gate 16217c478bd9Sstevel@tonic-gate case ZERO_OR_MORE_GROUP: /* (.....)* */ 16227c478bd9Sstevel@tonic-gate case ZERO_OR_MORE_GROUP|ADD_256_TO_GROUP_LENGTH: 16237c478bd9Sstevel@tonic-gate case ZERO_OR_MORE_GROUP|ADD_512_TO_GROUP_LENGTH: 16247c478bd9Sstevel@tonic-gate case ZERO_OR_MORE_GROUP|ADD_768_TO_GROUP_LENGTH: 16257c478bd9Sstevel@tonic-gate 16267c478bd9Sstevel@tonic-gate /* 16277c478bd9Sstevel@tonic-gate * encoded as <ZERO_OR_MORE_GROUP|ADDED_LENGTH_BITS>\ 16287c478bd9Sstevel@tonic-gate * <group_length><compiled_regex...>\ 16297c478bd9Sstevel@tonic-gate * <END_GROUP|ZERO_OR_MORE><groupn> 16307c478bd9Sstevel@tonic-gate * 16317c478bd9Sstevel@tonic-gate * NOTE: 16327c478bd9Sstevel@tonic-gate * 16337c478bd9Sstevel@tonic-gate * group_length + (256 * ADDED_LENGTH_BITS) == 16347c478bd9Sstevel@tonic-gate * length_of(<compiled_regex...><END_GROUP|ZERO_OR_MORE>\ 16357c478bd9Sstevel@tonic-gate * <groupn>) 16367c478bd9Sstevel@tonic-gate * 16377c478bd9Sstevel@tonic-gate */ 16387c478bd9Sstevel@tonic-gate 16397c478bd9Sstevel@tonic-gate group_length = 16407c478bd9Sstevel@tonic-gate (((unsigned int)*regexp & ADDED_LENGTH_BITS) << 16417c478bd9Sstevel@tonic-gate TIMES_256_SHIFT); 16427c478bd9Sstevel@tonic-gate regexp++; 16437c478bd9Sstevel@tonic-gate group_length += (unsigned int)*regexp; 16447c478bd9Sstevel@tonic-gate regexp++; 16457c478bd9Sstevel@tonic-gate repeat_startp = stringp; 16467c478bd9Sstevel@tonic-gate test_stringp = test_string(stringp, regexp); 16477c478bd9Sstevel@tonic-gate while (test_stringp != (char *)0) { 16487c478bd9Sstevel@tonic-gate if (push_stringp(stringp) == (char *)0) 16497c478bd9Sstevel@tonic-gate return ((char *)0); 16507c478bd9Sstevel@tonic-gate stringp = test_stringp; 16517c478bd9Sstevel@tonic-gate test_stringp = test_string(stringp, regexp); 16527c478bd9Sstevel@tonic-gate } 16537c478bd9Sstevel@tonic-gate regexp += group_length; 16547c478bd9Sstevel@tonic-gate return (test_repeated_group(repeat_startp, stringp, regexp)); 16557c478bd9Sstevel@tonic-gate 16567c478bd9Sstevel@tonic-gate /* end case ZERO_OR_MORE_GROUP */ 16577c478bd9Sstevel@tonic-gate 16587c478bd9Sstevel@tonic-gate case END_GROUP|ZERO_OR_MORE: /* (.....)* */ 16597c478bd9Sstevel@tonic-gate 16607c478bd9Sstevel@tonic-gate /* encoded as <END_GROUP|ZERO_OR_MORE> */ 16617c478bd9Sstevel@tonic-gate 16627c478bd9Sstevel@tonic-gate /* return from recursive call to test_string() */ 16637c478bd9Sstevel@tonic-gate 16647c478bd9Sstevel@tonic-gate return ((char *)stringp); 16657c478bd9Sstevel@tonic-gate 16667c478bd9Sstevel@tonic-gate /* end case END_GROUP|ZERO_OR_MORE */ 16677c478bd9Sstevel@tonic-gate 16687c478bd9Sstevel@tonic-gate case ONE_OR_MORE_GROUP: /* (.....)+ */ 16697c478bd9Sstevel@tonic-gate case ONE_OR_MORE_GROUP|ADD_256_TO_GROUP_LENGTH: 16707c478bd9Sstevel@tonic-gate case ONE_OR_MORE_GROUP|ADD_512_TO_GROUP_LENGTH: 16717c478bd9Sstevel@tonic-gate case ONE_OR_MORE_GROUP|ADD_768_TO_GROUP_LENGTH: 16727c478bd9Sstevel@tonic-gate 16737c478bd9Sstevel@tonic-gate /* 16747c478bd9Sstevel@tonic-gate * encoded as <ONE_OR_MORE_GROUP|ADDED_LENGTH_BITS>\ 16757c478bd9Sstevel@tonic-gate * <group_length><compiled_regex...>\ 16767c478bd9Sstevel@tonic-gate * <END_GROUP|ONE_OR_MORE><groupn> 16777c478bd9Sstevel@tonic-gate * 16787c478bd9Sstevel@tonic-gate * NOTE: 16797c478bd9Sstevel@tonic-gate * 16807c478bd9Sstevel@tonic-gate * group_length + (256 * ADDED_LENGTH_BITS) == 16817c478bd9Sstevel@tonic-gate * length_of(<compiled_regex...><END_GROUP|ONE_OR_MORE>\ 16827c478bd9Sstevel@tonic-gate * <groupn>) 16837c478bd9Sstevel@tonic-gate */ 16847c478bd9Sstevel@tonic-gate 16857c478bd9Sstevel@tonic-gate group_length = 16867c478bd9Sstevel@tonic-gate (((unsigned int)*regexp & ADDED_LENGTH_BITS) << 16877c478bd9Sstevel@tonic-gate TIMES_256_SHIFT); 16887c478bd9Sstevel@tonic-gate regexp++; 16897c478bd9Sstevel@tonic-gate group_length += (unsigned int)*regexp; 16907c478bd9Sstevel@tonic-gate regexp++; 16917c478bd9Sstevel@tonic-gate stringp = test_string(stringp, regexp); 16927c478bd9Sstevel@tonic-gate if (stringp == (char *)0) 16937c478bd9Sstevel@tonic-gate return ((char *)0); 16947c478bd9Sstevel@tonic-gate repeat_startp = stringp; 16957c478bd9Sstevel@tonic-gate test_stringp = test_string(stringp, regexp); 16967c478bd9Sstevel@tonic-gate while (test_stringp != (char *)0) { 16977c478bd9Sstevel@tonic-gate if (push_stringp(stringp) == (char *)0) 16987c478bd9Sstevel@tonic-gate return ((char *)0); 16997c478bd9Sstevel@tonic-gate stringp = test_stringp; 17007c478bd9Sstevel@tonic-gate test_stringp = test_string(stringp, regexp); 17017c478bd9Sstevel@tonic-gate } 17027c478bd9Sstevel@tonic-gate regexp += group_length; 17037c478bd9Sstevel@tonic-gate return (test_repeated_group(repeat_startp, stringp, regexp)); 17047c478bd9Sstevel@tonic-gate 17057c478bd9Sstevel@tonic-gate /* end case ONE_OR_MORE_GROUP */ 17067c478bd9Sstevel@tonic-gate 17077c478bd9Sstevel@tonic-gate case END_GROUP|ONE_OR_MORE: /* (.....)+ */ 17087c478bd9Sstevel@tonic-gate 17097c478bd9Sstevel@tonic-gate /* encoded as <END_GROUP|ONE_OR_MORE><groupn> */ 17107c478bd9Sstevel@tonic-gate 17117c478bd9Sstevel@tonic-gate /* return from recursive call to test_string() */ 17127c478bd9Sstevel@tonic-gate 17137c478bd9Sstevel@tonic-gate return ((char *)stringp); 17147c478bd9Sstevel@tonic-gate 17157c478bd9Sstevel@tonic-gate /* end case END_GROUP|ONE_OR_MORE */ 17167c478bd9Sstevel@tonic-gate 17177c478bd9Sstevel@tonic-gate case COUNTED_GROUP: /* (.....){max_count,min_count} */ 17187c478bd9Sstevel@tonic-gate case COUNTED_GROUP|ADD_256_TO_GROUP_LENGTH: 17197c478bd9Sstevel@tonic-gate case COUNTED_GROUP|ADD_512_TO_GROUP_LENGTH: 17207c478bd9Sstevel@tonic-gate case COUNTED_GROUP|ADD_768_TO_GROUP_LENGTH: 17217c478bd9Sstevel@tonic-gate 17227c478bd9Sstevel@tonic-gate /* 17237c478bd9Sstevel@tonic-gate * encoded as <COUNTED_GROUP|ADDED_LENGTH_BITS><group_length>\ 17247c478bd9Sstevel@tonic-gate * <compiled_regex...>\<END_GROUP|COUNT><groupn>\ 17257c478bd9Sstevel@tonic-gate * <minimum_match_count><maximum_match_count> 17267c478bd9Sstevel@tonic-gate * 17277c478bd9Sstevel@tonic-gate * NOTE: 17287c478bd9Sstevel@tonic-gate * 17297c478bd9Sstevel@tonic-gate * group_length + (256 * ADDED_LENGTH_BITS) == 17307c478bd9Sstevel@tonic-gate * length_of(<compiled_regex...><END_GROUP|COUNT><groupn>) 17317c478bd9Sstevel@tonic-gate * 17327c478bd9Sstevel@tonic-gate * but does not include the <minimum_match_count> or 17337c478bd9Sstevel@tonic-gate * <maximum_match_count> bytes 17347c478bd9Sstevel@tonic-gate */ 17357c478bd9Sstevel@tonic-gate 17367c478bd9Sstevel@tonic-gate group_length = 17377c478bd9Sstevel@tonic-gate (((unsigned int)*regexp & ADDED_LENGTH_BITS) << 17387c478bd9Sstevel@tonic-gate TIMES_256_SHIFT); 17397c478bd9Sstevel@tonic-gate regexp++; 17407c478bd9Sstevel@tonic-gate group_length += (unsigned int)*regexp; 17417c478bd9Sstevel@tonic-gate regexp++; 17427c478bd9Sstevel@tonic-gate get_match_counts(&nmust_match, &nextra_matches_allowed, 17437c478bd9Sstevel@tonic-gate regexp + group_length); 17447c478bd9Sstevel@tonic-gate test_stringp = test_string(stringp, regexp); 17457c478bd9Sstevel@tonic-gate while ((test_stringp != (char *)0) && (nmust_match > 0)) { 17467c478bd9Sstevel@tonic-gate stringp = test_stringp; 17477c478bd9Sstevel@tonic-gate nmust_match--; 17487c478bd9Sstevel@tonic-gate test_stringp = test_string(stringp, regexp); 17497c478bd9Sstevel@tonic-gate } 17507c478bd9Sstevel@tonic-gate if (nmust_match > 0) { 17517c478bd9Sstevel@tonic-gate return ((char *)0); 17527c478bd9Sstevel@tonic-gate } else if (nextra_matches_allowed == UNLIMITED) { 17537c478bd9Sstevel@tonic-gate repeat_startp = stringp; 17547c478bd9Sstevel@tonic-gate while (test_stringp != (char *)0) { 17557c478bd9Sstevel@tonic-gate if (push_stringp(stringp) == (char *)0) 17567c478bd9Sstevel@tonic-gate return ((char *)0); 17577c478bd9Sstevel@tonic-gate stringp = test_stringp; 17587c478bd9Sstevel@tonic-gate test_stringp = test_string(stringp, regexp); 17597c478bd9Sstevel@tonic-gate } 17607c478bd9Sstevel@tonic-gate regexp += group_length + 2; 17617c478bd9Sstevel@tonic-gate return (test_repeated_group(repeat_startp, stringp, 17627c478bd9Sstevel@tonic-gate regexp)); 17637c478bd9Sstevel@tonic-gate } else { 17647c478bd9Sstevel@tonic-gate repeat_startp = stringp; 17657c478bd9Sstevel@tonic-gate while ((test_stringp != (char *)0) && 17667c478bd9Sstevel@tonic-gate (nextra_matches_allowed > 0)) { 17677c478bd9Sstevel@tonic-gate nextra_matches_allowed--; 17687c478bd9Sstevel@tonic-gate if (push_stringp(stringp) == (char *)0) 17697c478bd9Sstevel@tonic-gate return ((char *)0); 17707c478bd9Sstevel@tonic-gate stringp = test_stringp; 17717c478bd9Sstevel@tonic-gate test_stringp = test_string(stringp, regexp); 17727c478bd9Sstevel@tonic-gate } 17737c478bd9Sstevel@tonic-gate regexp += group_length + 2; 17747c478bd9Sstevel@tonic-gate return (test_repeated_group(repeat_startp, stringp, 17757c478bd9Sstevel@tonic-gate regexp)); 17767c478bd9Sstevel@tonic-gate } 17777c478bd9Sstevel@tonic-gate /* end case COUNTED_GROUP */ 17787c478bd9Sstevel@tonic-gate 17797c478bd9Sstevel@tonic-gate case END_GROUP|COUNT: /* (.....){max_count,min_count} */ 17807c478bd9Sstevel@tonic-gate 17817c478bd9Sstevel@tonic-gate /* encoded as <END_GROUP|COUNT> */ 17827c478bd9Sstevel@tonic-gate 17837c478bd9Sstevel@tonic-gate /* return from recursive call to test_string() */ 17847c478bd9Sstevel@tonic-gate 17857c478bd9Sstevel@tonic-gate return (stringp); 17867c478bd9Sstevel@tonic-gate 17877c478bd9Sstevel@tonic-gate /* end case END_GROUP|COUNT */ 17887c478bd9Sstevel@tonic-gate 17897c478bd9Sstevel@tonic-gate case END_OF_STRING_MARK: 17907c478bd9Sstevel@tonic-gate 17917c478bd9Sstevel@tonic-gate /* encoded as <END_OF_STRING_MARK><END_REGEX> */ 17927c478bd9Sstevel@tonic-gate 17937c478bd9Sstevel@tonic-gate if (*stringp == '\0') { 17947c478bd9Sstevel@tonic-gate regexp++; 17957c478bd9Sstevel@tonic-gate } else { 17967c478bd9Sstevel@tonic-gate return ((char *)0); 17977c478bd9Sstevel@tonic-gate } 17987c478bd9Sstevel@tonic-gate break; /* end case END_OF_STRING_MARK */ 17997c478bd9Sstevel@tonic-gate 18007c478bd9Sstevel@tonic-gate case END_REGEX: /* end of the compiled regular expression */ 18017c478bd9Sstevel@tonic-gate 18027c478bd9Sstevel@tonic-gate /* encoded as <END_REGEX> */ 18037c478bd9Sstevel@tonic-gate 18047c478bd9Sstevel@tonic-gate return (stringp); 18057c478bd9Sstevel@tonic-gate 18067c478bd9Sstevel@tonic-gate /* end case END_REGEX */ 18077c478bd9Sstevel@tonic-gate 18087c478bd9Sstevel@tonic-gate default: 18097c478bd9Sstevel@tonic-gate 18107c478bd9Sstevel@tonic-gate return ((char *)0); 18117c478bd9Sstevel@tonic-gate 18127c478bd9Sstevel@tonic-gate } /* end switch (*regexp) */ 18137c478bd9Sstevel@tonic-gate 18147c478bd9Sstevel@tonic-gate } /* end for (;;) */ 18157c478bd9Sstevel@tonic-gate 18167c478bd9Sstevel@tonic-gate } /* test_string() */ 1817