/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License, Version 1.0 only * (the "License"). You may not use this file except in compliance * with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2004 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ #pragma ident "%Z%%M% %I% %E% SMI" /* * IMPORTANT NOTE: * * regex() WORKS **ONLY** WITH THE ASCII AND THE Solaris EUC CHARACTER SETS. * IT IS **NOT** CHARACTER SET INDEPENDENT. * */ #pragma weak regex = _regex /* CONSTANTS SHARED WITH regcmp() */ #include "regex.h" #include "lint.h" #include "mtlib.h" #include #include #include #include #include #include "tsd.h" /* PRIVATE CONSTANTS */ #define ADD_256_TO_GROUP_LENGTH 0x1 #define ADD_512_TO_GROUP_LENGTH 0x2 #define ADD_768_TO_GROUP_LENGTH 0x3 #define ADDED_LENGTH_BITS 0x3 #define SINGLE_BYTE_MASK 0xff #define STRINGP_STACK_SIZE 50 /* PRIVATE TYPE DEFINITIONS */ typedef enum { NOT_IN_CLASS = 0, IN_CLASS } char_test_condition_t; typedef enum { TESTING_CHAR = 0, CONDITION_TRUE, CONDITION_FALSE, CHAR_TEST_ERROR } char_test_result_t; /* PRIVATE GLOBAL VARIABLES */ static mutex_t regex_lock = DEFAULTMUTEX; static int return_arg_number[NSUBSTRINGS]; static const char *substring_endp[NSUBSTRINGS]; static const char *substring_startp[NSUBSTRINGS]; static const char *stringp_stack[STRINGP_STACK_SIZE]; static const char **stringp_stackp; /* DECLARATIONS OF PRIVATE FUNCTIONS */ static int get_wchar(wchar_t *wcharp, const char *stringp); static void get_match_counts(int *nmust_matchp, int *nextra_matches_allowedp, const char *count_stringp); static boolean_t in_wchar_range(wchar_t test_char, wchar_t lower_char, wchar_t upper_char); static const char * pop_stringp(void); static const char * previous_charp(const char *current_charp); static const char * push_stringp(const char *stringp); static char_test_result_t test_char_against_ascii_class(char test_char, const char *classp, char_test_condition_t test_condition); static char_test_result_t test_char_against_multibyte_class(wchar_t test_char, const char *classp, char_test_condition_t test_condition); /* FOR COMPATIBILITY WITH PREVIOUS ASCII VERSIONS OF regcmp() */ static char_test_result_t test_char_against_old_ascii_class(char test_char, const char *classp, char_test_condition_t test_condition); static const char * test_repeated_ascii_char(const char *repeat_startp, const char *stringp, const char *regexp); static const char * test_repeated_multibyte_char(const char *repeat_startp, const char *stringp, const char *regexp); static const char * test_repeated_group(const char *repeat_startp, const char *stringp, const char *regexp); static const char * test_string(const char *stringp, const char *regexp); /* DEFINITIONS OF PUBLIC VARIABLES */ char *__loc1; /* * reserve thread-specific storage for __loc1 */ char ** ____loc1(void) { if (_thr_main()) return (&__loc1); return ((char **)tsdalloc(_T_REGEX_LOC1, sizeof (char *), NULL)); } #define __loc1 (*(____loc1())) /* DEFINITION OF regex() */ extern char * _regex(const char *regexp, const char *stringp, ...) { va_list arg_listp; int char_size; const char *end_of_matchp; wchar_t regex_wchar; char *return_argp[NSUBSTRINGS]; char *returned_substringp; int substringn; const char *substringp; wchar_t string_wchar; if (____loc1() == (char **)0) { return ((char *)0); } else { lmutex_lock(®ex_lock); __loc1 = (char *)0; } if ((stringp == (char *)0) || (regexp == (char *)0)) { lmutex_unlock(®ex_lock); return ((char *)0); } /* INITIALIZE SUBSTRINGS THAT MIGHT BE RETURNED IN VARARGS */ substringn = 0; va_start(arg_listp, stringp); while (substringn < NSUBSTRINGS) { return_argp[substringn] = va_arg(arg_listp, char *); substring_startp[substringn] = (char *)0; return_arg_number[substringn] = -1; substringn++; } va_end(arg_listp); /* TEST THE STRING AGAINST THE REGULAR EXPRESSION */ end_of_matchp = (char *)0; stringp_stackp = &stringp_stack[STRINGP_STACK_SIZE]; if ((int)*regexp == (int)START_OF_STRING_MARK) { /* * the match must start at the beginning of the string */ __loc1 = (char *)stringp; regexp++; end_of_matchp = test_string(stringp, regexp); } else if ((int)*regexp == (int)ASCII_CHAR) { /* * test a string against a regular expression * that starts with a single ASCII character: * * move to each character in the string that matches * the first character in the regular expression * and test the remaining string */ while ((*stringp != *(regexp + 1)) && (*stringp != '\0')) { stringp++; } while ((end_of_matchp == (char *)0) && (*stringp != '\0')) { end_of_matchp = test_string(stringp, regexp); if (end_of_matchp != (char *)0) { __loc1 = (char *)stringp; } else { stringp++; while ((*stringp != *(regexp + 1)) && (*stringp != '\0')) { stringp++; } } } } else if (!multibyte) { /* * if the value of the "multibyte" macro defined in * is false, regex() is running in an ASCII locale; * test an ASCII string against an ASCII regular expression * that doesn't start with a single ASCII character: * * move forward in the string one byte at a time, testing * the remaining string against the regular expression */ end_of_matchp = test_string(stringp, regexp); while ((end_of_matchp == (char *)0) && (*stringp != '\0')) { stringp++; end_of_matchp = test_string(stringp, regexp); } if (end_of_matchp != (char *)0) { __loc1 = (char *)stringp; } } else if ((int)*regexp == (int)MULTIBYTE_CHAR) { /* * test a multibyte string against a multibyte regular expression * that starts with a single multibyte character: * * move to each character in the string that matches * the first character in the regular expression * and test the remaining string */ (void) get_wchar(®ex_wchar, regexp + 1); char_size = get_wchar(&string_wchar, stringp); while ((string_wchar != regex_wchar) && (char_size > 0)) { stringp += char_size; char_size = get_wchar(&string_wchar, stringp); } while ((end_of_matchp == (char *)0) && (char_size > 0)) { end_of_matchp = test_string(stringp, regexp); if (end_of_matchp != (char *)0) { __loc1 = (char *)stringp; } else { stringp += char_size; char_size = get_wchar(&string_wchar, stringp); while ((string_wchar != regex_wchar) && (char_size > 0)) { stringp += char_size; char_size = get_wchar(&string_wchar, stringp); } } } } else { /* * test a multibyte string against a multibyte regular expression * that doesn't start with a single multibyte character * * move forward in the string one multibyte character at a time, * testing the remaining string against the regular expression */ end_of_matchp = test_string(stringp, regexp); char_size = get_wchar(&string_wchar, stringp); while ((end_of_matchp == (char *)0) && (char_size > 0)) { stringp += char_size; end_of_matchp = test_string(stringp, regexp); char_size = get_wchar(&string_wchar, stringp); } if (end_of_matchp != (char *)0) { __loc1 = (char *)stringp; } } /* * Return substrings that matched subexpressions for which * matching substrings are to be returned. * * NOTE: * * According to manual page regcmp(3G), regex() returns substrings * that match subexpressions even when no substring matches the * entire regular expression. */ substringn = 0; while (substringn < NSUBSTRINGS) { substringp = substring_startp[substringn]; if ((substringp != (char *)0) && (return_arg_number[substringn] >= 0)) { returned_substringp = return_argp[return_arg_number[substringn]]; if (returned_substringp != (char *)0) { while (substringp < substring_endp[substringn]) { *returned_substringp = (char)*substringp; returned_substringp++; substringp++; } *returned_substringp = '\0'; } } substringn++; } lmutex_unlock(®ex_lock); return ((char *)end_of_matchp); } /* regex() */ /* DEFINITIONS OF PRIVATE FUNCTIONS */ static int get_wchar(wchar_t *wcharp, const char *stringp) { int char_size; if (stringp == (char *)0) { char_size = 0; *wcharp = (wchar_t)((unsigned int)'\0'); } else if (*stringp == '\0') { char_size = 0; *wcharp = (wchar_t)((unsigned int)*stringp); } else if ((unsigned char)*stringp <= (unsigned char)0x7f) { char_size = 1; *wcharp = (wchar_t)((unsigned int)*stringp); } else { char_size = mbtowc(wcharp, stringp, MB_LEN_MAX); } return (char_size); } static void get_match_counts(int *nmust_matchp, int *nextra_matches_allowedp, const char *count_stringp) { int minimum_match_count; int maximum_match_count; minimum_match_count = (int)((unsigned int)*count_stringp & SINGLE_BYTE_MASK); *nmust_matchp = minimum_match_count; count_stringp++; maximum_match_count = (int)((unsigned int)*count_stringp & SINGLE_BYTE_MASK); if (maximum_match_count == (int)UNLIMITED) { *nextra_matches_allowedp = (int)UNLIMITED; } else { *nextra_matches_allowedp = maximum_match_count - minimum_match_count; } return; } /* get_match_counts() */ static boolean_t in_wchar_range(wchar_t test_char, wchar_t lower_char, wchar_t upper_char) { return (((lower_char <= 0x7f) && (upper_char <= 0x7f) && (lower_char <= test_char) && (test_char <= upper_char)) || (((test_char & WCHAR_CSMASK) == (lower_char & WCHAR_CSMASK)) && ((test_char & WCHAR_CSMASK) == (upper_char & WCHAR_CSMASK)) && (lower_char <= test_char) && (test_char <= upper_char))); } /* in_wchar_range() */ static const char * pop_stringp(void) { const char *stringp; if (stringp_stackp >= &stringp_stack[STRINGP_STACK_SIZE]) { return ((char *)0); } else { stringp = *stringp_stackp; stringp_stackp++; return (stringp); } } static const char * previous_charp(const char *current_charp) { /* * returns the pointer to the previous character in * a string of multibyte characters */ const char *prev_cs0 = current_charp - 1; const char *prev_cs1 = current_charp - eucw1; const char *prev_cs2 = current_charp - eucw2 - 1; const char *prev_cs3 = current_charp - eucw3 - 1; const char *prev_charp; if ((unsigned char)*prev_cs0 <= 0x7f) { prev_charp = prev_cs0; } else if ((unsigned char)*prev_cs2 == SS2) { prev_charp = prev_cs2; } else if ((unsigned char)*prev_cs3 == SS3) { prev_charp = prev_cs3; } else { prev_charp = prev_cs1; } return (prev_charp); } /* previous_charp() */ static const char * push_stringp(const char *stringp) { if (stringp_stackp <= &stringp_stack[0]) { return ((char *)0); } else { stringp_stackp--; *stringp_stackp = stringp; return (stringp); } } static char_test_result_t test_char_against_ascii_class(char test_char, const char *classp, char_test_condition_t test_condition) { /* * tests a character for membership in an ASCII character class compiled * by the internationalized version of regcmp(); * * NOTE: The internationalized version of regcmp() compiles * the range a-z in an ASCII character class to aTHRUz. */ int nbytes_to_check; nbytes_to_check = (int)*classp; classp++; nbytes_to_check--; while (nbytes_to_check > 0) { if (test_char == *classp) { if (test_condition == IN_CLASS) return (CONDITION_TRUE); else return (CONDITION_FALSE); } else if (*classp == THRU) { if ((*(classp - 1) <= test_char) && (test_char <= *(classp + 1))) { if (test_condition == IN_CLASS) return (CONDITION_TRUE); else return (CONDITION_FALSE); } else { classp += 2; nbytes_to_check -= 2; } } else { classp++; nbytes_to_check--; } } if (test_condition == NOT_IN_CLASS) { return (CONDITION_TRUE); } else { return (CONDITION_FALSE); } } /* test_char_against_ascii_class() */ static char_test_result_t test_char_against_multibyte_class(wchar_t test_char, const char *classp, char_test_condition_t test_condition) { /* * tests a character for membership in a multibyte character class; * * NOTE: The range a-z in a multibyte character class compiles to * aTHRUz. */ int char_size; wchar_t current_char; int nbytes_to_check; wchar_t previous_char; nbytes_to_check = (int)*classp; classp++; nbytes_to_check--; char_size = get_wchar(¤t_char, classp); if (char_size <= 0) { return (CHAR_TEST_ERROR); } else if (test_char == current_char) { if (test_condition == IN_CLASS) { return (CONDITION_TRUE); } else { return (CONDITION_FALSE); } } else { classp += char_size; nbytes_to_check -= char_size; } while (nbytes_to_check > 0) { previous_char = current_char; char_size = get_wchar(¤t_char, classp); if (char_size <= 0) { return (CHAR_TEST_ERROR); } else if (test_char == current_char) { if (test_condition == IN_CLASS) { return (CONDITION_TRUE); } else { return (CONDITION_FALSE); } } else if (current_char == THRU) { classp += char_size; nbytes_to_check -= char_size; char_size = get_wchar(¤t_char, classp); if (char_size <= 0) { return (CHAR_TEST_ERROR); } else if (in_wchar_range(test_char, previous_char, current_char)) { if (test_condition == IN_CLASS) { return (CONDITION_TRUE); } else { return (CONDITION_FALSE); } } else { classp += char_size; nbytes_to_check -= char_size; } } else { classp += char_size; nbytes_to_check -= char_size; } } if (test_condition == NOT_IN_CLASS) { return (CONDITION_TRUE); } else { return (CONDITION_FALSE); } } /* test_char_against_multibyte_class() */ /* FOR COMPATIBILITY WITH PREVIOUS ASCII VERSIONS OF regcmp() */ static char_test_result_t test_char_against_old_ascii_class(char test_char, const char *classp, char_test_condition_t test_condition) { /* * tests a character for membership in an ASCII character class compiled * by the ASCII version of regcmp(); * * NOTE: ASCII versions of regcmp() compile the range a-z in an * ASCII character class to THRUaz. The internationalized * version compiles the same range to aTHRUz. */ int nbytes_to_check; nbytes_to_check = (int)*classp; classp++; nbytes_to_check--; while (nbytes_to_check > 0) { if (test_char == *classp) { if (test_condition == IN_CLASS) { return (CONDITION_TRUE); } else { return (CONDITION_FALSE); } } else if (*classp == THRU) { if ((*(classp + 1) <= test_char) && (test_char <= *(classp + 2))) { if (test_condition == IN_CLASS) { return (CONDITION_TRUE); } else { return (CONDITION_FALSE); } } else { classp += 3; nbytes_to_check -= 3; } } else { classp++; nbytes_to_check--; } } if (test_condition == NOT_IN_CLASS) { return (CONDITION_TRUE); } else { return (CONDITION_FALSE); } } /* test_char_against_old_ascii_class() */ static const char * test_repeated_ascii_char(const char *repeat_startp, const char *stringp, const char *regexp) { const char *end_of_matchp; end_of_matchp = test_string(stringp, regexp); while ((end_of_matchp == (char *)0) && (stringp > repeat_startp)) { stringp--; end_of_matchp = test_string(stringp, regexp); } return (end_of_matchp); } static const char * test_repeated_multibyte_char(const char *repeat_startp, const char *stringp, const char *regexp) { const char *end_of_matchp; end_of_matchp = test_string(stringp, regexp); while ((end_of_matchp == (char *)0) && (stringp > repeat_startp)) { stringp = previous_charp(stringp); end_of_matchp = test_string(stringp, regexp); } return (end_of_matchp); } static const char * test_repeated_group(const char *repeat_startp, const char *stringp, const char *regexp) { const char *end_of_matchp; end_of_matchp = test_string(stringp, regexp); while ((end_of_matchp == (char *)0) && (stringp > repeat_startp)) { stringp = pop_stringp(); if (stringp == (char *)0) { return ((char *)0); } end_of_matchp = test_string(stringp, regexp); } return (end_of_matchp); } static const char * test_string(const char *stringp, const char *regexp) { /* * returns a pointer to the first character following the first * substring of the string addressed by stringp that matches * the compiled regular expression addressed by regexp */ unsigned int group_length; int nextra_matches_allowed; int nmust_match; wchar_t regex_wchar; int regex_char_size; const char *repeat_startp; unsigned int return_argn; wchar_t string_wchar; int string_char_size; unsigned int substringn; char_test_condition_t test_condition; const char *test_stringp; for (;;) { /* * Exit the loop via a return whenever there's a match * or it's clear that there can be no match. */ switch ((int)*regexp) { /* * No fall-through. * Each case ends with either a return or with stringp * addressing the next character to be tested and regexp * addressing the next compiled regular expression * * NOTE: The comments for each case give the meaning * of the compiled regular expression decoded by the case * and the character string that the compiled regular * expression uses to encode the case. Each single * character encoded in the compiled regular expression * is shown enclosed in angle brackets (<>). Each * compiled regular expression begins with a marker * character which is shown as a named constant * (e.g. ). Character constants are shown * enclosed in single quotes (e.g. <'$'>). All other * single characters encoded in the compiled regular * expression are shown as lower case variable names * (e.g. or ). Multicharacter * strings encoded in the compiled regular expression * are shown as variable names followed by elipses * (e.g. ). */ case ASCII_CHAR: /* single ASCII char */ /* encoded as */ regexp++; if (*regexp == *stringp) { regexp++; stringp++; } else { return ((char *)0); } break; /* end case ASCII_CHAR */ case MULTIBYTE_CHAR: /* single multibyte char */ /* encoded as */ regexp++; regex_char_size = get_wchar(®ex_wchar, regexp); string_char_size = get_wchar(&string_wchar, stringp); if ((string_char_size <= 0) || (string_wchar != regex_wchar)) { return ((char *)0); } else { regexp += regex_char_size; stringp += string_char_size; } break; /* end case MULTIBYTE_CHAR */ case ANY_CHAR: /* any single ASCII or multibyte char */ /* encoded as */ if (!multibyte) { if (*stringp == '\0') { return ((char *)0); } else { regexp++; stringp++; } } else { string_char_size = get_wchar(&string_wchar, stringp); if (string_char_size <= 0) { return ((char *)0); } else { regexp++; stringp += string_char_size; } } break; /* end case ANY_CHAR */ case IN_ASCII_CHAR_CLASS: /* [.....] */ case NOT_IN_ASCII_CHAR_CLASS: /* * encoded as * or * * NOTE: includes the byte */ if ((int)*regexp == (int)IN_ASCII_CHAR_CLASS) { test_condition = IN_CLASS; } else { test_condition = NOT_IN_CLASS; } regexp++; /* point to the byte */ if ((*stringp != '\0') && (test_char_against_ascii_class(*stringp, regexp, test_condition) == CONDITION_TRUE)) { regexp += (int)*regexp; /* add the class length to regexp */ stringp++; } else { return ((char *)0); } break; /* end case IN_ASCII_CHAR_CLASS */ case IN_MULTIBYTE_CHAR_CLASS: /* [....] */ case NOT_IN_MULTIBYTE_CHAR_CLASS: /* * encoded as * or * * NOTE: includes the byte */ if ((int)*regexp == (int)IN_MULTIBYTE_CHAR_CLASS) { test_condition = IN_CLASS; } else { test_condition = NOT_IN_CLASS; } regexp++; /* point to the byte */ string_char_size = get_wchar(&string_wchar, stringp); if ((string_char_size > 0) && (test_char_against_multibyte_class(string_wchar, regexp, test_condition) == CONDITION_TRUE)) { regexp += (int)*regexp; /* add the class length to regexp */ stringp += string_char_size; } else { return ((char *)0); } break; /* end case IN_MULTIBYTE_CHAR_CLASS */ case IN_OLD_ASCII_CHAR_CLASS: /* [...] */ case NOT_IN_OLD_ASCII_CHAR_CLASS: /* * encoded as * or * * NOTE: includes the byte */ if ((int)*regexp == (int)IN_OLD_ASCII_CHAR_CLASS) { test_condition = IN_CLASS; } else { test_condition = NOT_IN_CLASS; } regexp++; /* point to the byte */ if ((*stringp != '\0') && (test_char_against_old_ascii_class(*stringp, regexp, test_condition) == CONDITION_TRUE)) { regexp += (int)*regexp; /* add the class length to regexp */ stringp++; } else { return ((char *)0); } break; /* end case [NOT_]IN_OLD_ASCII_CHAR_CLASS */ case SIMPLE_GROUP: /* (.....) */ /* encoded as */ regexp += 2; break; /* end case SIMPLE_GROUP */ case END_GROUP: /* (.....) */ /* encoded as */ regexp += 2; break; /* end case END_GROUP */ case SAVED_GROUP: /* (.....)$0-9 */ /* encoded as */ regexp++; substringn = (unsigned int)*regexp; if (substringn >= NSUBSTRINGS) return ((char *)0); substring_startp[substringn] = stringp; regexp++; break; /* end case SAVED_GROUP */ case END_SAVED_GROUP: /* (.....)$0-9 */ /* * encoded as \ * */ regexp++; substringn = (unsigned int)*regexp; if (substringn >= NSUBSTRINGS) return ((char *)0); substring_endp[substringn] = stringp; regexp++; return_argn = (unsigned int)*regexp; if (return_argn >= NSUBSTRINGS) return ((char *)0); return_arg_number[substringn] = return_argn; regexp++; break; /* end case END_SAVED_GROUP */ case ASCII_CHAR|ZERO_OR_MORE: /* char* */ /* encoded as */ regexp++; repeat_startp = stringp; while (*stringp == *regexp) { stringp++; } regexp++; return (test_repeated_ascii_char(repeat_startp, stringp, regexp)); /* end case ASCII_CHAR|ZERO_OR_MORE */ case ASCII_CHAR|ONE_OR_MORE: /* char+ */ /* encoded as */ regexp++; if (*stringp != *regexp) { return ((char *)0); } else { stringp++; repeat_startp = stringp; while (*stringp == *regexp) { stringp++; } regexp++; return (test_repeated_ascii_char(repeat_startp, stringp, regexp)); } /* end case ASCII_CHAR|ONE_OR_MORE */ case ASCII_CHAR|COUNT: /* char{min_count,max_count} */ /* * encoded as \ * */ regexp++; get_match_counts(&nmust_match, &nextra_matches_allowed, regexp + 1); while ((*stringp == *regexp) && (nmust_match > 0)) { nmust_match--; stringp++; } if (nmust_match > 0) { return ((char *)0); } else if (nextra_matches_allowed == UNLIMITED) { repeat_startp = stringp; while (*stringp == *regexp) { stringp++; } regexp += 3; return (test_repeated_ascii_char(repeat_startp, stringp, regexp)); } else { repeat_startp = stringp; while ((*stringp == *regexp) && (nextra_matches_allowed > 0)) { nextra_matches_allowed--; stringp++; } regexp += 3; return (test_repeated_ascii_char(repeat_startp, stringp, regexp)); } /* end case ASCII_CHAR|COUNT */ case MULTIBYTE_CHAR|ZERO_OR_MORE: /* char* */ /* encoded as */ regexp++; regex_char_size = get_wchar(®ex_wchar, regexp); repeat_startp = stringp; string_char_size = get_wchar(&string_wchar, stringp); while ((string_char_size > 0) && (string_wchar == regex_wchar)) { stringp += string_char_size; string_char_size = get_wchar(&string_wchar, stringp); } regexp += regex_char_size; return (test_repeated_multibyte_char(repeat_startp, stringp, regexp)); /* end case MULTIBYTE_CHAR|ZERO_OR_MORE */ case MULTIBYTE_CHAR|ONE_OR_MORE: /* char+ */ /* encoded as */ regexp++; regex_char_size = get_wchar(®ex_wchar, regexp); string_char_size = get_wchar(&string_wchar, stringp); if ((string_char_size <= 0) || (string_wchar != regex_wchar)) { return ((char *)0); } else { stringp += string_char_size; repeat_startp = stringp; string_char_size = get_wchar(&string_wchar, stringp); while ((string_char_size > 0) && (string_wchar == regex_wchar)) { stringp += string_char_size; string_char_size = get_wchar(&string_wchar, stringp); } regexp += regex_char_size; return (test_repeated_multibyte_char(repeat_startp, stringp, regexp)); } /* end case MULTIBYTE_CHAR|ONE_OR_MORE */ case MULTIBYTE_CHAR|COUNT: /* char{min_count,max_count} */ /* * encoded as \ * */ regexp++; regex_char_size = get_wchar(®ex_wchar, regexp); get_match_counts(&nmust_match, &nextra_matches_allowed, regexp + regex_char_size); string_char_size = get_wchar(&string_wchar, stringp); while ((string_char_size > 0) && (string_wchar == regex_wchar) && (nmust_match > 0)) { nmust_match--; stringp += string_char_size; string_char_size = get_wchar(&string_wchar, stringp); } if (nmust_match > 0) { return ((char *)0); } else if (nextra_matches_allowed == UNLIMITED) { repeat_startp = stringp; while ((string_char_size > 0) && (string_wchar == regex_wchar)) { stringp += string_char_size; string_char_size = get_wchar(&string_wchar, stringp); } regexp += regex_char_size + 2; return (test_repeated_multibyte_char(repeat_startp, stringp, regexp)); } else { repeat_startp = stringp; while ((string_char_size > 0) && (string_wchar == regex_wchar) && (nextra_matches_allowed > 0)) { nextra_matches_allowed--; stringp += string_char_size; string_char_size = get_wchar(&string_wchar, stringp); } regexp += regex_char_size + 2; return (test_repeated_multibyte_char(repeat_startp, stringp, regexp)); } /* end case MULTIBYTE_CHAR|COUNT */ case ANY_CHAR|ZERO_OR_MORE: /* .* */ /* encoded as */ repeat_startp = stringp; if (!multibyte) { while (*stringp != '\0') { stringp++; } regexp++; return (test_repeated_ascii_char(repeat_startp, stringp, regexp)); } else { string_char_size = get_wchar(&string_wchar, stringp); while (string_char_size > 0) { stringp += string_char_size; string_char_size = get_wchar(&string_wchar, stringp); } regexp++; return (test_repeated_multibyte_char(repeat_startp, stringp, regexp)); } /* end case */ case ANY_CHAR|ONE_OR_MORE: /* .+ */ /* encoded as */ if (!multibyte) { if (*stringp == '\0') { return ((char *)0); } else { stringp++; repeat_startp = stringp; while (*stringp != '\0') { stringp++; } regexp++; return (test_repeated_ascii_char(repeat_startp, stringp, regexp)); } } else { string_char_size = get_wchar(&string_wchar, stringp); if (string_char_size <= 0) { return ((char *)0); } else { stringp += string_char_size; repeat_startp = stringp; string_char_size = get_wchar(&string_wchar, stringp); while (string_char_size > 0) { stringp += string_char_size; string_char_size = get_wchar(&string_wchar, stringp); } regexp++; return (test_repeated_multibyte_char(repeat_startp, stringp, regexp)); } } /* end case */ case ANY_CHAR|COUNT: /* .{min_count,max_count} */ /* * encoded as \ * */ get_match_counts(&nmust_match, &nextra_matches_allowed, regexp + 1); if (!multibyte) { while ((*stringp != '\0') && (nmust_match > 0)) { nmust_match--; stringp++; } if (nmust_match > 0) { return ((char *)0); } else if (nextra_matches_allowed == UNLIMITED) { repeat_startp = stringp; while (*stringp != '\0') { stringp++; } regexp += 3; return (test_repeated_ascii_char(repeat_startp, stringp, regexp)); } else { repeat_startp = stringp; while ((*stringp != '\0') && (nextra_matches_allowed > 0)) { nextra_matches_allowed--; stringp++; } regexp += 3; return (test_repeated_ascii_char(repeat_startp, stringp, regexp)); } } else { /* multibyte character */ string_char_size = get_wchar(&string_wchar, stringp); while ((string_char_size > 0) && (nmust_match > 0)) { nmust_match--; stringp += string_char_size; string_char_size = get_wchar(&string_wchar, stringp); } if (nmust_match > 0) { return ((char *)0); } else if (nextra_matches_allowed == UNLIMITED) { repeat_startp = stringp; while (string_char_size > 0) { stringp += string_char_size; string_char_size = get_wchar(&string_wchar, stringp); } regexp += 3; return (test_repeated_multibyte_char(repeat_startp, stringp, regexp)); } else { repeat_startp = stringp; while ((string_char_size > 0) && (nextra_matches_allowed > 0)) { nextra_matches_allowed--; stringp += string_char_size; string_char_size = get_wchar(&string_wchar, stringp); } regexp += 3; return (test_repeated_multibyte_char(repeat_startp, stringp, regexp)); } } /* end case ANY_CHAR|COUNT */ case IN_ASCII_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */ case NOT_IN_ASCII_CHAR_CLASS|ZERO_OR_MORE: /* * encoded as \ * * or \ * * * NOTE: includes the byte */ if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|ZERO_OR_MORE)) { test_condition = IN_CLASS; } else { test_condition = NOT_IN_CLASS; } regexp++; /* point to the byte */ repeat_startp = stringp; while ((*stringp != '\0') && (test_char_against_ascii_class(*stringp, regexp, test_condition) == CONDITION_TRUE)) { stringp++; } regexp += (int)*regexp; /* add the class length to regexp */ return (test_repeated_ascii_char(repeat_startp, stringp, regexp)); /* end case IN_ASCII_CHAR_CLASS|ZERO_OR_MORE */ case IN_ASCII_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */ case NOT_IN_ASCII_CHAR_CLASS|ONE_OR_MORE: /* * encoded as \ * * or \ * * * NOTE: includes the byte */ if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|ONE_OR_MORE)) { test_condition = IN_CLASS; } else { test_condition = NOT_IN_CLASS; } regexp++; /* point to the byte */ if ((*stringp == '\0') || (test_char_against_ascii_class(*stringp, regexp, test_condition) != CONDITION_TRUE)) { return ((char *)0); } else { stringp++; repeat_startp = stringp; while ((*stringp != '\0') && (test_char_against_ascii_class(*stringp, regexp, test_condition) == CONDITION_TRUE)) { stringp++; } regexp += (int)*regexp; /* add the class length to regexp */ return (test_repeated_ascii_char(repeat_startp, stringp, regexp)); } /* end case IN_ASCII_CHAR_CLASS|ONE_OR_MORE */ case IN_ASCII_CHAR_CLASS | COUNT: /* [.....]{max_count,min_count} */ case NOT_IN_ASCII_CHAR_CLASS | COUNT: /* * endoded as \ * \ * * or \ * \ * * * NOTE: includes the byte, * but not the or * bytes */ if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|COUNT)) { test_condition = IN_CLASS; } else { test_condition = NOT_IN_CLASS; } regexp++; /* point to the byte */ get_match_counts(&nmust_match, &nextra_matches_allowed, regexp + (int)*regexp); while ((*stringp != '\0') && (test_char_against_ascii_class(*stringp, regexp, test_condition) == CONDITION_TRUE) && (nmust_match > 0)) { nmust_match--; stringp++; } if (nmust_match > 0) { return ((char *)0); } else if (nextra_matches_allowed == UNLIMITED) { repeat_startp = stringp; while ((*stringp != '\0') && (test_char_against_ascii_class(*stringp, regexp, test_condition) == CONDITION_TRUE)) { stringp++; } regexp += (int)*regexp + 2; return (test_repeated_ascii_char(repeat_startp, stringp, regexp)); } else { repeat_startp = stringp; while ((*stringp != '\0') && (test_char_against_ascii_class(*stringp, regexp, test_condition) == CONDITION_TRUE) && (nextra_matches_allowed > 0)) { nextra_matches_allowed--; stringp++; } regexp += (int)*regexp + 2; return (test_repeated_ascii_char(repeat_startp, stringp, regexp)); } /* end case IN_ASCII_CHAR_CLASS|COUNT */ case IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */ case NOT_IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE: /* * encoded as \ * * or \ * * * NOTE: includes the byte */ if ((int)*regexp == (int)(IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE)) { test_condition = IN_CLASS; } else { test_condition = NOT_IN_CLASS; } regexp++; /* point to the byte */ repeat_startp = stringp; string_char_size = get_wchar(&string_wchar, stringp); while ((string_char_size > 0) && (test_char_against_multibyte_class(string_wchar, regexp, test_condition) == CONDITION_TRUE)) { stringp += string_char_size; string_char_size = get_wchar(&string_wchar, stringp); } regexp += (int)*regexp; /* add the class length to regexp */ return (test_repeated_multibyte_char(repeat_startp, stringp, regexp)); /* end case IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE */ case IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */ case NOT_IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE: /* * encoded as \ * * or \ * * * NOTE: includes the byte */ if ((int)*regexp == (int)(IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE)) { test_condition = IN_CLASS; } else { test_condition = NOT_IN_CLASS; } regexp++; /* point to the byte */ string_char_size = get_wchar(&string_wchar, stringp); if ((string_char_size <= 0) || (test_char_against_multibyte_class(string_wchar, regexp, test_condition) != CONDITION_TRUE)) { return ((char *)0); } else { stringp += string_char_size; repeat_startp = stringp; string_char_size = get_wchar(&string_wchar, stringp); while ((string_char_size > 0) && (test_char_against_multibyte_class(string_wchar, regexp, test_condition) == CONDITION_TRUE)) { stringp += string_char_size; string_char_size = get_wchar(&string_wchar, stringp); } regexp += (int)*regexp; /* add the class length to regexp */ return (test_repeated_multibyte_char(repeat_startp, stringp, regexp)); } /* end case IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE */ case IN_MULTIBYTE_CHAR_CLASS|COUNT: /* [...]{min_count,max_count} */ case NOT_IN_MULTIBYTE_CHAR_CLASS|COUNT: /* * encoded as \ * * or \ * * * NOTE: includes the byte * but not the or * bytes */ if ((int)*regexp == (int)(IN_MULTIBYTE_CHAR_CLASS|COUNT)) { test_condition = IN_CLASS; } else { test_condition = NOT_IN_CLASS; } regexp++; /* point to the byte */ get_match_counts(&nmust_match, &nextra_matches_allowed, regexp + (int)*regexp); string_char_size = get_wchar(&string_wchar, stringp); while ((string_char_size > 0) && (test_char_against_multibyte_class(string_wchar, regexp, test_condition) == CONDITION_TRUE) && (nmust_match > 0)) { nmust_match--; stringp += string_char_size; string_char_size = get_wchar(&string_wchar, stringp); } if (nmust_match > 0) { return ((char *)0); } else if (nextra_matches_allowed == UNLIMITED) { repeat_startp = stringp; while ((string_char_size > 0) && (test_char_against_multibyte_class(string_wchar, regexp, test_condition) == CONDITION_TRUE)) { stringp += string_char_size; string_char_size = get_wchar(&string_wchar, stringp); } regexp += (int)*regexp + 2; return (test_repeated_multibyte_char(repeat_startp, stringp, regexp)); } else { repeat_startp = stringp; while ((string_char_size > 0) && (test_char_against_multibyte_class(string_wchar, regexp, test_condition) == CONDITION_TRUE) && (nextra_matches_allowed > 0)) { nextra_matches_allowed--; stringp += string_char_size; string_char_size = get_wchar(&string_wchar, stringp); } regexp += (int)*regexp + 2; return (test_repeated_multibyte_char(repeat_startp, stringp, regexp)); } /* end case IN_MULTIBYTE_CHAR_CLASS|COUNT */ case IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */ case NOT_IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE: /* * encoded as \ * * or \ * * * NOTE: includes the byte */ if ((int)*regexp == (int)(IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE)) { test_condition = IN_CLASS; } else { test_condition = NOT_IN_CLASS; } regexp++; /* point to the byte */ repeat_startp = stringp; while ((*stringp != '\0') && (test_char_against_old_ascii_class(*stringp, regexp, test_condition) == CONDITION_TRUE)) { stringp++; } regexp += (int)*regexp; /* add the class length to regexp */ return (test_repeated_ascii_char(repeat_startp, stringp, regexp)); /* end case IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE */ case IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */ case NOT_IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE: /* * encoded as \ * * or \ * * * NOTE: includes the byte */ if ((int)*regexp == (int)(IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE)) { test_condition = IN_CLASS; } else { test_condition = NOT_IN_CLASS; } regexp++; /* point to the byte */ if ((*stringp == '\0') || (test_char_against_old_ascii_class(*stringp, regexp, test_condition) != CONDITION_TRUE)) { return ((char *)0); } else { stringp++; repeat_startp = stringp; while ((*stringp != '\0') && (test_char_against_old_ascii_class(*stringp, regexp, test_condition) == CONDITION_TRUE)) { stringp++; } regexp += (int)*regexp; /* add the class length to regexp */ return (test_repeated_ascii_char(repeat_startp, stringp, regexp)); } /* end case IN_OLD_ASCII_CHAR_CLASS | ONE_OR_MORE */ case IN_OLD_ASCII_CHAR_CLASS|COUNT: /* [...]{min_count,max_count} */ case NOT_IN_OLD_ASCII_CHAR_CLASS|COUNT: /* * encoded as \ * \ * * or \ * \ * * * NOTE: includes the byte * but not the or * bytes */ if ((int)*regexp == (int)(IN_OLD_ASCII_CHAR_CLASS|COUNT)) { test_condition = IN_CLASS; } else { test_condition = NOT_IN_CLASS; } regexp++; /* point to the byte */ get_match_counts(&nmust_match, &nextra_matches_allowed, regexp + (int)*regexp); while ((*stringp != '\0') && (test_char_against_old_ascii_class(*stringp, regexp, test_condition) == CONDITION_TRUE) && (nmust_match > 0)) { nmust_match--; stringp++; } if (nmust_match > 0) { return ((char *)0); } else if (nextra_matches_allowed == UNLIMITED) { repeat_startp = stringp; while ((*stringp != '\0') && (test_char_against_old_ascii_class(*stringp, regexp, test_condition) == CONDITION_TRUE)) { stringp++; } regexp += (int)*regexp + 2; return (test_repeated_ascii_char(repeat_startp, stringp, regexp)); } else { repeat_startp = stringp; while ((*stringp != '\0') && (test_char_against_old_ascii_class(*stringp, regexp, test_condition) == CONDITION_TRUE) && (nextra_matches_allowed > 0)) { nextra_matches_allowed--; stringp++; } regexp += (int)*regexp + 2; return (test_repeated_ascii_char(repeat_startp, stringp, regexp)); } /* end case IN_OLD_ASCII_CHAR_CLASS|COUNT */ case ZERO_OR_MORE_GROUP: /* (.....)* */ case ZERO_OR_MORE_GROUP|ADD_256_TO_GROUP_LENGTH: case ZERO_OR_MORE_GROUP|ADD_512_TO_GROUP_LENGTH: case ZERO_OR_MORE_GROUP|ADD_768_TO_GROUP_LENGTH: /* * encoded as \ * \ * * * NOTE: * * group_length + (256 * ADDED_LENGTH_BITS) == * length_of(\ * ) * */ group_length = (((unsigned int)*regexp & ADDED_LENGTH_BITS) << TIMES_256_SHIFT); regexp++; group_length += (unsigned int)*regexp; regexp++; repeat_startp = stringp; test_stringp = test_string(stringp, regexp); while (test_stringp != (char *)0) { if (push_stringp(stringp) == (char *)0) return ((char *)0); stringp = test_stringp; test_stringp = test_string(stringp, regexp); } regexp += group_length; return (test_repeated_group(repeat_startp, stringp, regexp)); /* end case ZERO_OR_MORE_GROUP */ case END_GROUP|ZERO_OR_MORE: /* (.....)* */ /* encoded as */ /* return from recursive call to test_string() */ return ((char *)stringp); /* end case END_GROUP|ZERO_OR_MORE */ case ONE_OR_MORE_GROUP: /* (.....)+ */ case ONE_OR_MORE_GROUP|ADD_256_TO_GROUP_LENGTH: case ONE_OR_MORE_GROUP|ADD_512_TO_GROUP_LENGTH: case ONE_OR_MORE_GROUP|ADD_768_TO_GROUP_LENGTH: /* * encoded as \ * \ * * * NOTE: * * group_length + (256 * ADDED_LENGTH_BITS) == * length_of(\ * ) */ group_length = (((unsigned int)*regexp & ADDED_LENGTH_BITS) << TIMES_256_SHIFT); regexp++; group_length += (unsigned int)*regexp; regexp++; stringp = test_string(stringp, regexp); if (stringp == (char *)0) return ((char *)0); repeat_startp = stringp; test_stringp = test_string(stringp, regexp); while (test_stringp != (char *)0) { if (push_stringp(stringp) == (char *)0) return ((char *)0); stringp = test_stringp; test_stringp = test_string(stringp, regexp); } regexp += group_length; return (test_repeated_group(repeat_startp, stringp, regexp)); /* end case ONE_OR_MORE_GROUP */ case END_GROUP|ONE_OR_MORE: /* (.....)+ */ /* encoded as */ /* return from recursive call to test_string() */ return ((char *)stringp); /* end case END_GROUP|ONE_OR_MORE */ case COUNTED_GROUP: /* (.....){max_count,min_count} */ case COUNTED_GROUP|ADD_256_TO_GROUP_LENGTH: case COUNTED_GROUP|ADD_512_TO_GROUP_LENGTH: case COUNTED_GROUP|ADD_768_TO_GROUP_LENGTH: /* * encoded as \ * \\ * * * NOTE: * * group_length + (256 * ADDED_LENGTH_BITS) == * length_of() * * but does not include the or * bytes */ group_length = (((unsigned int)*regexp & ADDED_LENGTH_BITS) << TIMES_256_SHIFT); regexp++; group_length += (unsigned int)*regexp; regexp++; get_match_counts(&nmust_match, &nextra_matches_allowed, regexp + group_length); test_stringp = test_string(stringp, regexp); while ((test_stringp != (char *)0) && (nmust_match > 0)) { stringp = test_stringp; nmust_match--; test_stringp = test_string(stringp, regexp); } if (nmust_match > 0) { return ((char *)0); } else if (nextra_matches_allowed == UNLIMITED) { repeat_startp = stringp; while (test_stringp != (char *)0) { if (push_stringp(stringp) == (char *)0) return ((char *)0); stringp = test_stringp; test_stringp = test_string(stringp, regexp); } regexp += group_length + 2; return (test_repeated_group(repeat_startp, stringp, regexp)); } else { repeat_startp = stringp; while ((test_stringp != (char *)0) && (nextra_matches_allowed > 0)) { nextra_matches_allowed--; if (push_stringp(stringp) == (char *)0) return ((char *)0); stringp = test_stringp; test_stringp = test_string(stringp, regexp); } regexp += group_length + 2; return (test_repeated_group(repeat_startp, stringp, regexp)); } /* end case COUNTED_GROUP */ case END_GROUP|COUNT: /* (.....){max_count,min_count} */ /* encoded as */ /* return from recursive call to test_string() */ return (stringp); /* end case END_GROUP|COUNT */ case END_OF_STRING_MARK: /* encoded as */ if (*stringp == '\0') { regexp++; } else { return ((char *)0); } break; /* end case END_OF_STRING_MARK */ case END_REGEX: /* end of the compiled regular expression */ /* encoded as */ return (stringp); /* end case END_REGEX */ default: return ((char *)0); } /* end switch (*regexp) */ } /* end for (;;) */ } /* test_string() */