17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 57257d1b4Sraf * Common Development and Distribution License (the "License"). 67257d1b4Sraf * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217257d1b4Sraf 227c478bd9Sstevel@tonic-gate /* 237257d1b4Sraf * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 247c478bd9Sstevel@tonic-gate * Use is subject to license terms. 257c478bd9Sstevel@tonic-gate */ 267c478bd9Sstevel@tonic-gate 277c478bd9Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 287c478bd9Sstevel@tonic-gate /* All Rights Reserved */ 297c478bd9Sstevel@tonic-gate 307c478bd9Sstevel@tonic-gate /* 317c478bd9Sstevel@tonic-gate * IMPORTANT NOTE: 327c478bd9Sstevel@tonic-gate * 337c478bd9Sstevel@tonic-gate * regcmp() WORKS **ONLY** WITH THE ASCII AND THE Solaris EUC CHARACTER SETS. 347c478bd9Sstevel@tonic-gate * IT IS **NOT** CHARACTER SET INDEPENDENT. 357c478bd9Sstevel@tonic-gate * 367c478bd9Sstevel@tonic-gate */ 377c478bd9Sstevel@tonic-gate 387257d1b4Sraf #pragma weak _regcmp = regcmp 397c478bd9Sstevel@tonic-gate 407257d1b4Sraf #include "lint.h" 417c478bd9Sstevel@tonic-gate #include "mtlib.h" 427c478bd9Sstevel@tonic-gate #include <limits.h> 437c478bd9Sstevel@tonic-gate #include <stdarg.h> 447c478bd9Sstevel@tonic-gate #include <stdlib.h> 457c478bd9Sstevel@tonic-gate #include <thread.h> 467c478bd9Sstevel@tonic-gate #include <wctype.h> 477c478bd9Sstevel@tonic-gate #include <widec.h> 487c478bd9Sstevel@tonic-gate #include <string.h> 497c478bd9Sstevel@tonic-gate #include "tsd.h" 507c478bd9Sstevel@tonic-gate 517c478bd9Sstevel@tonic-gate 527c478bd9Sstevel@tonic-gate /* CONSTANTS SHARED WITH regex() */ 537c478bd9Sstevel@tonic-gate 547c478bd9Sstevel@tonic-gate #include "regex.h" 557c478bd9Sstevel@tonic-gate 567c478bd9Sstevel@tonic-gate /* PRIVATE CONSTANTS */ 577c478bd9Sstevel@tonic-gate 587c478bd9Sstevel@tonic-gate #define BACKSLASH '\\' 597c478bd9Sstevel@tonic-gate #define CIRCUMFLEX '^' 607c478bd9Sstevel@tonic-gate #define COMMA ',' 617c478bd9Sstevel@tonic-gate #define DASH '-' 627c478bd9Sstevel@tonic-gate #define DOLLAR_SIGN '$' 637c478bd9Sstevel@tonic-gate #define DOT '.' 647c478bd9Sstevel@tonic-gate #define LEFT_CURLY_BRACE '{' 657c478bd9Sstevel@tonic-gate #define LEFT_PAREN '(' 667c478bd9Sstevel@tonic-gate #define LEFT_SQUARE_BRACKET '[' 677c478bd9Sstevel@tonic-gate #define PLUS '+' 687c478bd9Sstevel@tonic-gate #define RIGHT_CURLY_BRACE '}' 697c478bd9Sstevel@tonic-gate #define RIGHT_PAREN ')' 707c478bd9Sstevel@tonic-gate #define RIGHT_SQUARE_BRACKET ']' 717c478bd9Sstevel@tonic-gate #define SINGLE_BYTE_MASK 0xff 727c478bd9Sstevel@tonic-gate #define STRINGP_STACK_SIZE 50 737c478bd9Sstevel@tonic-gate #define STAR '*' 747c478bd9Sstevel@tonic-gate 757c478bd9Sstevel@tonic-gate /* PRIVATE GLOBAL VARIABLES */ 767c478bd9Sstevel@tonic-gate 777c478bd9Sstevel@tonic-gate static char *compilep_stack[STRINGP_STACK_SIZE]; 787c478bd9Sstevel@tonic-gate static char **compilep_stackp; 797c478bd9Sstevel@tonic-gate static mutex_t regcmp_lock = DEFAULTMUTEX; 807c478bd9Sstevel@tonic-gate 817c478bd9Sstevel@tonic-gate /* DECLARATIONS OF PRIVATE FUNCTIONS */ 827c478bd9Sstevel@tonic-gate 837c478bd9Sstevel@tonic-gate static int add_char(char *compilep, wchar_t wchar); 847c478bd9Sstevel@tonic-gate static int add_single_char_expr(char *compilep, wchar_t wchar); 857c478bd9Sstevel@tonic-gate 867c478bd9Sstevel@tonic-gate #define ERROR_EXIT(mutex_lockp, arg_listp, compile_startp) \ 877c478bd9Sstevel@tonic-gate \ 887c478bd9Sstevel@tonic-gate va_end(arg_listp); \ 897c478bd9Sstevel@tonic-gate lmutex_unlock(mutex_lockp); \ 907c478bd9Sstevel@tonic-gate if ((compile_startp) != (char *)0) \ 917c478bd9Sstevel@tonic-gate free((void *)compile_startp); \ 927c478bd9Sstevel@tonic-gate return ((char *)0) 937c478bd9Sstevel@tonic-gate 947c478bd9Sstevel@tonic-gate static int get_count(int *countp, const char *regexp); 957c478bd9Sstevel@tonic-gate static int get_digit(const char *regexp); 967c478bd9Sstevel@tonic-gate static int get_wchar(wchar_t *wchar, const char *regexp); 977c478bd9Sstevel@tonic-gate static char *pop_compilep(void); 987c478bd9Sstevel@tonic-gate static char *push_compilep(char *compilep); 997c478bd9Sstevel@tonic-gate static boolean_t valid_range(wchar_t lower_char, wchar_t upper_char); 1007c478bd9Sstevel@tonic-gate 1017c478bd9Sstevel@tonic-gate 1027c478bd9Sstevel@tonic-gate /* DEFINITIONS OF PUBLIC VARIABLES */ 1037c478bd9Sstevel@tonic-gate 1047c478bd9Sstevel@tonic-gate int __i_size; 1057c478bd9Sstevel@tonic-gate 1067c478bd9Sstevel@tonic-gate /* 1077c478bd9Sstevel@tonic-gate * define thread-specific storage for __i_size 1087c478bd9Sstevel@tonic-gate * 1097c478bd9Sstevel@tonic-gate */ 1107c478bd9Sstevel@tonic-gate int * 1117c478bd9Sstevel@tonic-gate ___i_size(void) 1127c478bd9Sstevel@tonic-gate { 1137257d1b4Sraf if (thr_main()) 1147c478bd9Sstevel@tonic-gate return (&__i_size); 1157c478bd9Sstevel@tonic-gate return ((int *)tsdalloc(_T_REGCMP_ISIZE, sizeof (int), NULL)); 1167c478bd9Sstevel@tonic-gate } 1177c478bd9Sstevel@tonic-gate 1187c478bd9Sstevel@tonic-gate #define __i_size (*(___i_size())) 1197c478bd9Sstevel@tonic-gate 1207c478bd9Sstevel@tonic-gate /* DEFINITION OF regcmp() */ 1217c478bd9Sstevel@tonic-gate 1227c478bd9Sstevel@tonic-gate extern char * 1237c478bd9Sstevel@tonic-gate regcmp(const char *regexp, ...) 1247c478bd9Sstevel@tonic-gate { 1257c478bd9Sstevel@tonic-gate va_list arg_listp; 1267c478bd9Sstevel@tonic-gate size_t arg_strlen; 1277c478bd9Sstevel@tonic-gate boolean_t can_repeat; 1287c478bd9Sstevel@tonic-gate int char_size; 1297c478bd9Sstevel@tonic-gate unsigned int class_length; 1307c478bd9Sstevel@tonic-gate char *compilep; 1317c478bd9Sstevel@tonic-gate char *compile_startp = (char *)0; 1327c478bd9Sstevel@tonic-gate int count_length; 1337c478bd9Sstevel@tonic-gate wchar_t current_char; 1347c478bd9Sstevel@tonic-gate int expr_length; 1357c478bd9Sstevel@tonic-gate int groupn; 1367c478bd9Sstevel@tonic-gate unsigned int group_length; 1377c478bd9Sstevel@tonic-gate unsigned int high_bits; 1387c478bd9Sstevel@tonic-gate boolean_t dash_indicates_range; 1397c478bd9Sstevel@tonic-gate unsigned int low_bits; 1407c478bd9Sstevel@tonic-gate int max_count; 1417c478bd9Sstevel@tonic-gate int min_count; 1427c478bd9Sstevel@tonic-gate const char *next_argp; 1437c478bd9Sstevel@tonic-gate wchar_t first_char_in_range; 1447c478bd9Sstevel@tonic-gate char *regex_typep; 1457c478bd9Sstevel@tonic-gate int return_arg_number; 1467c478bd9Sstevel@tonic-gate int substringn; 1477c478bd9Sstevel@tonic-gate 1487c478bd9Sstevel@tonic-gate if (___i_size() == (int *)0) 1497c478bd9Sstevel@tonic-gate return ((char *)0); 1507c478bd9Sstevel@tonic-gate 1517c478bd9Sstevel@tonic-gate /* 1527c478bd9Sstevel@tonic-gate * When compiling a regular expression, regcmp() generates at most 1537c478bd9Sstevel@tonic-gate * two extra single-byte characters for each character in the 1547c478bd9Sstevel@tonic-gate * expression, so allocating three times the number of bytes in all 1557c478bd9Sstevel@tonic-gate * the strings that comprise the regular expression will ensure that 1567c478bd9Sstevel@tonic-gate * regcmp() won't overwrite the end of the allocated block when 1577c478bd9Sstevel@tonic-gate * compiling the expression. 1587c478bd9Sstevel@tonic-gate */ 1597c478bd9Sstevel@tonic-gate 1607c478bd9Sstevel@tonic-gate va_start(arg_listp, regexp); 1617c478bd9Sstevel@tonic-gate next_argp = regexp; 1627c478bd9Sstevel@tonic-gate arg_strlen = 0; 1637c478bd9Sstevel@tonic-gate while (next_argp != (char *)0) { 1647c478bd9Sstevel@tonic-gate arg_strlen += strlen(next_argp); 1657c478bd9Sstevel@tonic-gate next_argp = va_arg(arg_listp, /* const */ char *); 1667c478bd9Sstevel@tonic-gate } 1677c478bd9Sstevel@tonic-gate va_end(arg_listp); 1687c478bd9Sstevel@tonic-gate 1697c478bd9Sstevel@tonic-gate if (arg_strlen == 0) 1707c478bd9Sstevel@tonic-gate return ((char *)0); 171*f6bce3fcSJason King compile_startp = (char *)malloc(3 * arg_strlen + 1); 1727c478bd9Sstevel@tonic-gate if (compile_startp == (char *)0) 1737c478bd9Sstevel@tonic-gate return ((char *)0); 1747c478bd9Sstevel@tonic-gate 1757c478bd9Sstevel@tonic-gate lmutex_lock(®cmp_lock); 1767c478bd9Sstevel@tonic-gate __i_size = 0; 1777c478bd9Sstevel@tonic-gate compilep = compile_startp; 1787c478bd9Sstevel@tonic-gate compilep_stackp = &compilep_stack[STRINGP_STACK_SIZE]; 1797c478bd9Sstevel@tonic-gate 1807c478bd9Sstevel@tonic-gate /* GET THE FIRST CHARACTER IN THE REGULAR EXPRESSION */ 1817c478bd9Sstevel@tonic-gate va_start(arg_listp, regexp); 1827c478bd9Sstevel@tonic-gate next_argp = va_arg(arg_listp, /* const */ char *); 1837c478bd9Sstevel@tonic-gate char_size = get_wchar(¤t_char, regexp); 1847c478bd9Sstevel@tonic-gate if (char_size < 0) { 1857c478bd9Sstevel@tonic-gate ERROR_EXIT(®cmp_lock, arg_listp, compile_startp); 1867c478bd9Sstevel@tonic-gate } else if (char_size > 0) { 1877c478bd9Sstevel@tonic-gate regexp += char_size; 1887c478bd9Sstevel@tonic-gate } else /* (char_size == 0 ) */ { 1897c478bd9Sstevel@tonic-gate regexp = next_argp; 1907c478bd9Sstevel@tonic-gate next_argp = va_arg(arg_listp, /* const */ char *); 1917c478bd9Sstevel@tonic-gate char_size = get_wchar(¤t_char, regexp); 1927c478bd9Sstevel@tonic-gate if (char_size <= 0) { 1937c478bd9Sstevel@tonic-gate ERROR_EXIT(®cmp_lock, arg_listp, compile_startp); 1947c478bd9Sstevel@tonic-gate } else { 1957c478bd9Sstevel@tonic-gate regexp += char_size; 1967c478bd9Sstevel@tonic-gate } 1977c478bd9Sstevel@tonic-gate } 1987c478bd9Sstevel@tonic-gate 1997c478bd9Sstevel@tonic-gate /* FIND OUT IF THE EXPRESSION MUST START AT THE START OF A STRING */ 2007c478bd9Sstevel@tonic-gate 2017c478bd9Sstevel@tonic-gate if (current_char == CIRCUMFLEX) { 2027c478bd9Sstevel@tonic-gate char_size = get_wchar(¤t_char, regexp); 2037c478bd9Sstevel@tonic-gate if (char_size < 0) { 2047c478bd9Sstevel@tonic-gate ERROR_EXIT(®cmp_lock, arg_listp, compile_startp); 2057c478bd9Sstevel@tonic-gate } else if (char_size > 0) { 2067c478bd9Sstevel@tonic-gate regexp += char_size; 2077c478bd9Sstevel@tonic-gate *compilep = (unsigned char)START_OF_STRING_MARK; 2087c478bd9Sstevel@tonic-gate compilep++; 2097c478bd9Sstevel@tonic-gate } else if /* (char_size == 0) && */ (next_argp != (char *)0) { 2107c478bd9Sstevel@tonic-gate regexp = next_argp; 2117c478bd9Sstevel@tonic-gate next_argp = va_arg(arg_listp, /* const */ char *); 2127c478bd9Sstevel@tonic-gate char_size = get_wchar(¤t_char, regexp); 2137c478bd9Sstevel@tonic-gate if (char_size <= 0) { 2147c478bd9Sstevel@tonic-gate ERROR_EXIT(®cmp_lock, arg_listp, 2157c478bd9Sstevel@tonic-gate compile_startp); 2167c478bd9Sstevel@tonic-gate } else { 2177c478bd9Sstevel@tonic-gate regexp += char_size; 2187c478bd9Sstevel@tonic-gate } 2197c478bd9Sstevel@tonic-gate *compilep = (unsigned char)START_OF_STRING_MARK; 2207c478bd9Sstevel@tonic-gate compilep++; 2217c478bd9Sstevel@tonic-gate } else { 2227c478bd9Sstevel@tonic-gate /* ((char_size==0) && (next_argp==(char *)0)) */ 2237c478bd9Sstevel@tonic-gate /* 2247c478bd9Sstevel@tonic-gate * the regular expression is "^" 2257c478bd9Sstevel@tonic-gate */ 2267c478bd9Sstevel@tonic-gate *compilep = (unsigned char)START_OF_STRING_MARK; 2277c478bd9Sstevel@tonic-gate compilep++; 2287c478bd9Sstevel@tonic-gate *compilep = (unsigned char)END_REGEX; 2297c478bd9Sstevel@tonic-gate compilep++; 2307c478bd9Sstevel@tonic-gate *compilep = '\0'; 2317c478bd9Sstevel@tonic-gate compilep++; 2327c478bd9Sstevel@tonic-gate __i_size = (int)(compilep - compile_startp); 2337c478bd9Sstevel@tonic-gate va_end(arg_listp); 2347c478bd9Sstevel@tonic-gate lmutex_unlock(®cmp_lock); 2357c478bd9Sstevel@tonic-gate return (compile_startp); 2367c478bd9Sstevel@tonic-gate } 2377c478bd9Sstevel@tonic-gate } 2387c478bd9Sstevel@tonic-gate 2397c478bd9Sstevel@tonic-gate /* COMPILE THE REGULAR EXPRESSION */ 2407c478bd9Sstevel@tonic-gate 2417c478bd9Sstevel@tonic-gate groupn = 0; 2427c478bd9Sstevel@tonic-gate substringn = 0; 2437c478bd9Sstevel@tonic-gate can_repeat = B_FALSE; 2447c478bd9Sstevel@tonic-gate for (;;) { 2457c478bd9Sstevel@tonic-gate 2467c478bd9Sstevel@tonic-gate /* 2477c478bd9Sstevel@tonic-gate * At the end of each iteration get the next character 2487c478bd9Sstevel@tonic-gate * from the regular expression and increment regexp to 2497c478bd9Sstevel@tonic-gate * point to the following character. Exit when all 2507c478bd9Sstevel@tonic-gate * the characters in all the strings in the argument 2517c478bd9Sstevel@tonic-gate * list have been read. 2527c478bd9Sstevel@tonic-gate */ 2537c478bd9Sstevel@tonic-gate 2547c478bd9Sstevel@tonic-gate switch (current_char) { 2557c478bd9Sstevel@tonic-gate 2567c478bd9Sstevel@tonic-gate /* 2577c478bd9Sstevel@tonic-gate * No fall-through. Each case ends with either 2587c478bd9Sstevel@tonic-gate * a break or an error exit. Each case starts 2597c478bd9Sstevel@tonic-gate * with compilep addressing the next location to 2607c478bd9Sstevel@tonic-gate * be written in the compiled regular expression, 2617c478bd9Sstevel@tonic-gate * and with regexp addressing the next character 2627c478bd9Sstevel@tonic-gate * to be read from the regular expression being 2637c478bd9Sstevel@tonic-gate * compiled. Each case that doesn't return 2647c478bd9Sstevel@tonic-gate * increments regexp to address the next character 2657c478bd9Sstevel@tonic-gate * to be read from the regular expression and 2667c478bd9Sstevel@tonic-gate * increments compilep to address the next 2677c478bd9Sstevel@tonic-gate * location to be written in the compiled 2687c478bd9Sstevel@tonic-gate * regular expression. 2697c478bd9Sstevel@tonic-gate * 2707c478bd9Sstevel@tonic-gate * NOTE: The comments for each case give the meaning 2717c478bd9Sstevel@tonic-gate * of the regular expression compiled by the case 2727c478bd9Sstevel@tonic-gate * and the character string written to the compiled 2737c478bd9Sstevel@tonic-gate * regular expression by the case. Each single 2747c478bd9Sstevel@tonic-gate * character 2757c478bd9Sstevel@tonic-gate * written to the compiled regular expression is 2767c478bd9Sstevel@tonic-gate * shown enclosed in angle brackets (<>). Each 2777c478bd9Sstevel@tonic-gate * compiled regular expression begins with a marker 2787c478bd9Sstevel@tonic-gate * character which is shown as a named constant 2797c478bd9Sstevel@tonic-gate * (e.g. <ASCII_CHAR>). Character constants are 2807c478bd9Sstevel@tonic-gate * shown enclosed in single quotes (e.g. <'$'>). 2817c478bd9Sstevel@tonic-gate * All other single characters written to the 2827c478bd9Sstevel@tonic-gate * compiled regular expression are shown as lower 2837c478bd9Sstevel@tonic-gate * case variable names (e.g. <ascii_char> or 2847c478bd9Sstevel@tonic-gate * <multibyte_char>). Multicharacter 2857c478bd9Sstevel@tonic-gate * strings written to the compiled regular expression 2867c478bd9Sstevel@tonic-gate * are shown as variable names followed by elipses 2877c478bd9Sstevel@tonic-gate * (e.g. <regex...>). 2887c478bd9Sstevel@tonic-gate */ 2897c478bd9Sstevel@tonic-gate 2907c478bd9Sstevel@tonic-gate case DOLLAR_SIGN: 2917c478bd9Sstevel@tonic-gate /* end of string marker or simple dollar sign */ 2927c478bd9Sstevel@tonic-gate /* compiles to <END_OF_STRING_MARK> or */ 2937c478bd9Sstevel@tonic-gate /* <ASCII_CHAR><'$'> */ 2947c478bd9Sstevel@tonic-gate 2957c478bd9Sstevel@tonic-gate char_size = get_wchar(¤t_char, regexp); 2967c478bd9Sstevel@tonic-gate if ((char_size == 0) && (next_argp == (char *)0)) { 2977c478bd9Sstevel@tonic-gate can_repeat = B_FALSE; 2987c478bd9Sstevel@tonic-gate *compilep = (unsigned char)END_OF_STRING_MARK; 2997c478bd9Sstevel@tonic-gate compilep++; 3007c478bd9Sstevel@tonic-gate } else { 3017c478bd9Sstevel@tonic-gate can_repeat = B_TRUE; 3027c478bd9Sstevel@tonic-gate *compilep = (unsigned char)ASCII_CHAR; 3037c478bd9Sstevel@tonic-gate regex_typep = compilep; 3047c478bd9Sstevel@tonic-gate compilep++; 3057c478bd9Sstevel@tonic-gate *compilep = DOLLAR_SIGN; 3067c478bd9Sstevel@tonic-gate compilep++; 3077c478bd9Sstevel@tonic-gate } 3087c478bd9Sstevel@tonic-gate break; /* end case DOLLAR_SIGN */ 3097c478bd9Sstevel@tonic-gate 3107c478bd9Sstevel@tonic-gate case DOT: /* any character */ 3117c478bd9Sstevel@tonic-gate 3127c478bd9Sstevel@tonic-gate /* compiles to <ANY_CHAR> */ 3137c478bd9Sstevel@tonic-gate 3147c478bd9Sstevel@tonic-gate can_repeat = B_TRUE; 3157c478bd9Sstevel@tonic-gate *compilep = (unsigned char)ANY_CHAR; 3167c478bd9Sstevel@tonic-gate regex_typep = compilep; 3177c478bd9Sstevel@tonic-gate compilep++; 3187c478bd9Sstevel@tonic-gate 3197c478bd9Sstevel@tonic-gate break; /* end case DOT */ 3207c478bd9Sstevel@tonic-gate 3217c478bd9Sstevel@tonic-gate case BACKSLASH: /* escaped character */ 3227c478bd9Sstevel@tonic-gate 3237c478bd9Sstevel@tonic-gate /* 3247c478bd9Sstevel@tonic-gate * compiles to <ASCII_CHAR><ascii_char> or 3257c478bd9Sstevel@tonic-gate * <MULTIBYTE_CHAR><multibyte_char> 3267c478bd9Sstevel@tonic-gate */ 3277c478bd9Sstevel@tonic-gate 3287c478bd9Sstevel@tonic-gate char_size = get_wchar(¤t_char, regexp); 3297c478bd9Sstevel@tonic-gate if (char_size <= 0) { 3307c478bd9Sstevel@tonic-gate ERROR_EXIT(®cmp_lock, arg_listp, 3317c478bd9Sstevel@tonic-gate compile_startp); 3327c478bd9Sstevel@tonic-gate } else { 3337c478bd9Sstevel@tonic-gate regexp += char_size; 3347c478bd9Sstevel@tonic-gate can_repeat = B_TRUE; 3357c478bd9Sstevel@tonic-gate expr_length = add_single_char_expr( 3367c478bd9Sstevel@tonic-gate compilep, current_char); 3377c478bd9Sstevel@tonic-gate regex_typep = compilep; 3387c478bd9Sstevel@tonic-gate compilep += expr_length; 3397c478bd9Sstevel@tonic-gate } 3407c478bd9Sstevel@tonic-gate break; /* end case '\\' */ 3417c478bd9Sstevel@tonic-gate 3427c478bd9Sstevel@tonic-gate case LEFT_SQUARE_BRACKET: 3437c478bd9Sstevel@tonic-gate /* start of a character class expression */ 3447c478bd9Sstevel@tonic-gate 3457c478bd9Sstevel@tonic-gate /* 3467c478bd9Sstevel@tonic-gate * [^...c...] compiles to 3477c478bd9Sstevel@tonic-gate * <NOT_IN_CLASS><class_length><...c...> 3487c478bd9Sstevel@tonic-gate * [^...a-z...] compiles to 3497c478bd9Sstevel@tonic-gate * <NOT_IN_CLASS><class_length><...a<THRU>z...> 3507c478bd9Sstevel@tonic-gate * [...c...] compiles to 3517c478bd9Sstevel@tonic-gate * <IN_CLASS><class_length><...c...> 3527c478bd9Sstevel@tonic-gate * [...a-z...] compiles to 3537c478bd9Sstevel@tonic-gate * <IN_CLASS><class_length><...a<THRU>z...> 3547c478bd9Sstevel@tonic-gate * 3557c478bd9Sstevel@tonic-gate * NOTE: <class_length> includes the 3567c478bd9Sstevel@tonic-gate * <class_length> byte 3577c478bd9Sstevel@tonic-gate */ 3587c478bd9Sstevel@tonic-gate 3597c478bd9Sstevel@tonic-gate can_repeat = B_TRUE; 3607c478bd9Sstevel@tonic-gate regex_typep = compilep; 3617c478bd9Sstevel@tonic-gate 3627c478bd9Sstevel@tonic-gate /* DETERMINE THE CLASS TYPE */ 3637c478bd9Sstevel@tonic-gate 3647c478bd9Sstevel@tonic-gate /* 3657c478bd9Sstevel@tonic-gate * NOTE: This algorithm checks the value of the 3667c478bd9Sstevel@tonic-gate * "multibyte" 3677c478bd9Sstevel@tonic-gate * macro in <euc.h> (included in <widec.h> ) 3687c478bd9Sstevel@tonic-gate * to find out if regcmp() 3697c478bd9Sstevel@tonic-gate * is compiling the regular expression in a 3707c478bd9Sstevel@tonic-gate * multibyte locale. 3717c478bd9Sstevel@tonic-gate */ 3727c478bd9Sstevel@tonic-gate char_size = get_wchar(¤t_char, regexp); 3737c478bd9Sstevel@tonic-gate if (char_size <= 0) { 3747c478bd9Sstevel@tonic-gate ERROR_EXIT(®cmp_lock, arg_listp, 3757c478bd9Sstevel@tonic-gate compile_startp); 3767c478bd9Sstevel@tonic-gate } else if (current_char == CIRCUMFLEX) { 3777c478bd9Sstevel@tonic-gate regexp++; 3787c478bd9Sstevel@tonic-gate char_size = get_wchar(¤t_char, regexp); 3797c478bd9Sstevel@tonic-gate if (char_size <= 0) { 3807c478bd9Sstevel@tonic-gate ERROR_EXIT(®cmp_lock, 3817c478bd9Sstevel@tonic-gate arg_listp, compile_startp); 3827c478bd9Sstevel@tonic-gate } else { 3837c478bd9Sstevel@tonic-gate regexp += char_size; 3847c478bd9Sstevel@tonic-gate if (!multibyte) { 3857c478bd9Sstevel@tonic-gate *compilep = (unsigned char) 3867c478bd9Sstevel@tonic-gate NOT_IN_ASCII_CHAR_CLASS; 3877c478bd9Sstevel@tonic-gate } else { 3887c478bd9Sstevel@tonic-gate *compilep = (unsigned char) 3897c478bd9Sstevel@tonic-gate NOT_IN_MULTIBYTE_CHAR_CLASS; 3907c478bd9Sstevel@tonic-gate } 3917c478bd9Sstevel@tonic-gate /* leave space for <class_length> */ 3927c478bd9Sstevel@tonic-gate compilep += 2; 3937c478bd9Sstevel@tonic-gate } 3947c478bd9Sstevel@tonic-gate } else { 3957c478bd9Sstevel@tonic-gate regexp += char_size; 3967c478bd9Sstevel@tonic-gate if (!multibyte) { 3977c478bd9Sstevel@tonic-gate *compilep = (unsigned char) 3987c478bd9Sstevel@tonic-gate IN_ASCII_CHAR_CLASS; 3997c478bd9Sstevel@tonic-gate } else { 4007c478bd9Sstevel@tonic-gate *compilep = (unsigned char) 4017c478bd9Sstevel@tonic-gate IN_MULTIBYTE_CHAR_CLASS; 4027c478bd9Sstevel@tonic-gate } 4037c478bd9Sstevel@tonic-gate /* leave space for <class_length> */ 4047c478bd9Sstevel@tonic-gate compilep += 2; 4057c478bd9Sstevel@tonic-gate } 4067c478bd9Sstevel@tonic-gate 4077c478bd9Sstevel@tonic-gate /* COMPILE THE CLASS */ 4087c478bd9Sstevel@tonic-gate /* 4097c478bd9Sstevel@tonic-gate * check for a leading right square bracket, 4107c478bd9Sstevel@tonic-gate * which is allowed 4117c478bd9Sstevel@tonic-gate */ 4127c478bd9Sstevel@tonic-gate 4137c478bd9Sstevel@tonic-gate if (current_char == RIGHT_SQUARE_BRACKET) { 4147c478bd9Sstevel@tonic-gate /* 4157c478bd9Sstevel@tonic-gate * the leading RIGHT_SQUARE_BRACKET may 4167c478bd9Sstevel@tonic-gate * be part of a character range 4177c478bd9Sstevel@tonic-gate * expression like "[]-\]" 4187c478bd9Sstevel@tonic-gate */ 4197c478bd9Sstevel@tonic-gate dash_indicates_range = B_TRUE; 4207c478bd9Sstevel@tonic-gate first_char_in_range = current_char; 4217c478bd9Sstevel@tonic-gate char_size = get_wchar(¤t_char, regexp); 4227c478bd9Sstevel@tonic-gate if (char_size <= 0) { 4237c478bd9Sstevel@tonic-gate ERROR_EXIT(®cmp_lock, 4247c478bd9Sstevel@tonic-gate arg_listp, compile_startp); 4257c478bd9Sstevel@tonic-gate } else { 4267c478bd9Sstevel@tonic-gate regexp += char_size; 4277c478bd9Sstevel@tonic-gate *compilep = RIGHT_SQUARE_BRACKET; 4287c478bd9Sstevel@tonic-gate compilep++; 4297c478bd9Sstevel@tonic-gate } 4307c478bd9Sstevel@tonic-gate } else { 4317c478bd9Sstevel@tonic-gate /* 4327c478bd9Sstevel@tonic-gate * decode the character in the following 4337c478bd9Sstevel@tonic-gate * while loop and decide then if it can 4347c478bd9Sstevel@tonic-gate * be the first character 4357c478bd9Sstevel@tonic-gate * in a character range expression 4367c478bd9Sstevel@tonic-gate */ 4377c478bd9Sstevel@tonic-gate dash_indicates_range = B_FALSE; 4387c478bd9Sstevel@tonic-gate } 4397c478bd9Sstevel@tonic-gate 4407c478bd9Sstevel@tonic-gate while (current_char != RIGHT_SQUARE_BRACKET) { 4417c478bd9Sstevel@tonic-gate if (current_char != DASH) { 4427c478bd9Sstevel@tonic-gate /* 4437c478bd9Sstevel@tonic-gate * if a DASH follows current_char, 4447c478bd9Sstevel@tonic-gate * current_char, the DASH and the 4457c478bd9Sstevel@tonic-gate * character that follows the DASH 4467c478bd9Sstevel@tonic-gate * may form a character range 4477c478bd9Sstevel@tonic-gate * expression 4487c478bd9Sstevel@tonic-gate */ 4497c478bd9Sstevel@tonic-gate dash_indicates_range = B_TRUE; 4507c478bd9Sstevel@tonic-gate first_char_in_range = current_char; 4517c478bd9Sstevel@tonic-gate expr_length = add_char( 4527c478bd9Sstevel@tonic-gate compilep, current_char); 4537c478bd9Sstevel@tonic-gate compilep += expr_length; 4547c478bd9Sstevel@tonic-gate 4557c478bd9Sstevel@tonic-gate } else if /* (current_char == DASH) && */ 4567c478bd9Sstevel@tonic-gate (dash_indicates_range == B_FALSE) { 4577c478bd9Sstevel@tonic-gate /* 4587c478bd9Sstevel@tonic-gate * current_char is a DASH, but 4597c478bd9Sstevel@tonic-gate * either begins the entire 4607c478bd9Sstevel@tonic-gate * character class or follows a 4617c478bd9Sstevel@tonic-gate * character that's already 4627c478bd9Sstevel@tonic-gate * part of a character range 4637c478bd9Sstevel@tonic-gate * expression, so it simply 4647c478bd9Sstevel@tonic-gate * represents the DASH character 4657c478bd9Sstevel@tonic-gate * itself 4667c478bd9Sstevel@tonic-gate */ 4677c478bd9Sstevel@tonic-gate *compilep = DASH; 4687c478bd9Sstevel@tonic-gate compilep ++; 4697c478bd9Sstevel@tonic-gate /* 4707c478bd9Sstevel@tonic-gate * if another DASH follows this 4717c478bd9Sstevel@tonic-gate * one, this DASH is part 4727c478bd9Sstevel@tonic-gate * of a character range expression 4737c478bd9Sstevel@tonic-gate * like "[--\]" 4747c478bd9Sstevel@tonic-gate */ 4757c478bd9Sstevel@tonic-gate dash_indicates_range = B_TRUE; 4767c478bd9Sstevel@tonic-gate first_char_in_range = current_char; 4777c478bd9Sstevel@tonic-gate 478*f6bce3fcSJason King } else { 479*f6bce3fcSJason King /* 480*f6bce3fcSJason King * ((current_char == DASH &&/ 481*f6bce3fcSJason King * (dash_indicates_range == B_TRUE)) 482*f6bce3fcSJason King */ 483*f6bce3fcSJason King 4847c478bd9Sstevel@tonic-gate /* 4857c478bd9Sstevel@tonic-gate * the DASH appears after a single 4867c478bd9Sstevel@tonic-gate * character that isn't 4877c478bd9Sstevel@tonic-gate * already part of a character 4887c478bd9Sstevel@tonic-gate * range expression, so it 4897c478bd9Sstevel@tonic-gate * and the characters preceding 4907c478bd9Sstevel@tonic-gate * and following it can form a 4917c478bd9Sstevel@tonic-gate * character range expression 4927c478bd9Sstevel@tonic-gate * like "[a-z]" 4937c478bd9Sstevel@tonic-gate */ 4947c478bd9Sstevel@tonic-gate char_size = get_wchar( 4957c478bd9Sstevel@tonic-gate ¤t_char, regexp); 4967c478bd9Sstevel@tonic-gate if (char_size <= 0) { 4977c478bd9Sstevel@tonic-gate ERROR_EXIT(®cmp_lock, 4987c478bd9Sstevel@tonic-gate arg_listp, compile_startp); 4997c478bd9Sstevel@tonic-gate 5007c478bd9Sstevel@tonic-gate } else if (current_char == 5017c478bd9Sstevel@tonic-gate RIGHT_SQUARE_BRACKET) { 5027c478bd9Sstevel@tonic-gate /* 5037c478bd9Sstevel@tonic-gate * the preceding DASH is 5047c478bd9Sstevel@tonic-gate * the last character in the 5057c478bd9Sstevel@tonic-gate * class and represents the 5067c478bd9Sstevel@tonic-gate * DASH character itself 5077c478bd9Sstevel@tonic-gate */ 5087c478bd9Sstevel@tonic-gate *compilep = DASH; 5097c478bd9Sstevel@tonic-gate compilep++; 5107c478bd9Sstevel@tonic-gate 5117c478bd9Sstevel@tonic-gate } else if (valid_range( 5127c478bd9Sstevel@tonic-gate first_char_in_range, 5137c478bd9Sstevel@tonic-gate current_char) == B_FALSE) { 5147c478bd9Sstevel@tonic-gate ERROR_EXIT(®cmp_lock, 5157c478bd9Sstevel@tonic-gate arg_listp, compile_startp); 5167c478bd9Sstevel@tonic-gate } else { 5177c478bd9Sstevel@tonic-gate /* 5187c478bd9Sstevel@tonic-gate * the DASH is part of a 5197c478bd9Sstevel@tonic-gate * character range 5207c478bd9Sstevel@tonic-gate * expression; encode the 5217c478bd9Sstevel@tonic-gate * rest of the expression 5227c478bd9Sstevel@tonic-gate */ 5237c478bd9Sstevel@tonic-gate regexp += char_size; 5247c478bd9Sstevel@tonic-gate *compilep = (unsigned char) 5257c478bd9Sstevel@tonic-gate THRU; 5267c478bd9Sstevel@tonic-gate compilep++; 5277c478bd9Sstevel@tonic-gate expr_length = add_char( 5287c478bd9Sstevel@tonic-gate compilep, current_char); 5297c478bd9Sstevel@tonic-gate compilep += expr_length; 5307c478bd9Sstevel@tonic-gate /* 5317c478bd9Sstevel@tonic-gate * if a DASH follows this 5327c478bd9Sstevel@tonic-gate * character range 5337c478bd9Sstevel@tonic-gate * expression, 5347c478bd9Sstevel@tonic-gate * it represents the DASH 5357c478bd9Sstevel@tonic-gate * character itself 5367c478bd9Sstevel@tonic-gate */ 5377c478bd9Sstevel@tonic-gate dash_indicates_range = 5387c478bd9Sstevel@tonic-gate B_FALSE; 5397c478bd9Sstevel@tonic-gate } 5407c478bd9Sstevel@tonic-gate } 5417c478bd9Sstevel@tonic-gate 5427c478bd9Sstevel@tonic-gate /* GET THE NEXT CHARACTER */ 5437c478bd9Sstevel@tonic-gate 5447c478bd9Sstevel@tonic-gate char_size = get_wchar(¤t_char, regexp); 5457c478bd9Sstevel@tonic-gate if (char_size <= 0) { 5467c478bd9Sstevel@tonic-gate ERROR_EXIT(®cmp_lock, 5477c478bd9Sstevel@tonic-gate arg_listp, compile_startp); 5487c478bd9Sstevel@tonic-gate } else { 5497c478bd9Sstevel@tonic-gate regexp += char_size; 5507c478bd9Sstevel@tonic-gate } 5517c478bd9Sstevel@tonic-gate 5527c478bd9Sstevel@tonic-gate } 5537c478bd9Sstevel@tonic-gate /* end while (current_char != RIGHT_SQUARE_BRACKET) */ 5547c478bd9Sstevel@tonic-gate 5557c478bd9Sstevel@tonic-gate /* INSERT THE LENGTH OF THE CLASS INTO THE */ 5567c478bd9Sstevel@tonic-gate /* COMPILED EXPRESSION */ 5577c478bd9Sstevel@tonic-gate 5587c478bd9Sstevel@tonic-gate class_length = (unsigned int) 5597c478bd9Sstevel@tonic-gate (compilep - regex_typep - 1); 5607c478bd9Sstevel@tonic-gate if ((class_length < 2) || 5617c478bd9Sstevel@tonic-gate (class_length > MAX_SINGLE_BYTE_INT)) { 5627c478bd9Sstevel@tonic-gate ERROR_EXIT(®cmp_lock, arg_listp, 5637c478bd9Sstevel@tonic-gate compile_startp); 5647c478bd9Sstevel@tonic-gate } else { 5657c478bd9Sstevel@tonic-gate *(regex_typep + 1) = (unsigned char) 5667c478bd9Sstevel@tonic-gate class_length; 5677c478bd9Sstevel@tonic-gate } 5687c478bd9Sstevel@tonic-gate break; /* end case LEFT_SQUARE_BRACKET */ 5697c478bd9Sstevel@tonic-gate 5707c478bd9Sstevel@tonic-gate case LEFT_PAREN: 5717c478bd9Sstevel@tonic-gate 5727c478bd9Sstevel@tonic-gate /* 5737c478bd9Sstevel@tonic-gate * start of a parenthesized group of regular 5747c478bd9Sstevel@tonic-gate * expressions compiles to <'\0'><'\0'>, leaving 5757c478bd9Sstevel@tonic-gate * space in the compiled regular expression for 5767c478bd9Sstevel@tonic-gate * <group_type|ADDED_LENGTH_BITS><group_length> 5777c478bd9Sstevel@tonic-gate */ 5787c478bd9Sstevel@tonic-gate 5797c478bd9Sstevel@tonic-gate if (push_compilep(compilep) == (char *)0) { 5807c478bd9Sstevel@tonic-gate /* 5817c478bd9Sstevel@tonic-gate * groups can contain groups, so group 5827c478bd9Sstevel@tonic-gate * start pointers 5837c478bd9Sstevel@tonic-gate * must be saved and restored in sequence 5847c478bd9Sstevel@tonic-gate */ 5857c478bd9Sstevel@tonic-gate ERROR_EXIT(®cmp_lock, arg_listp, 5867c478bd9Sstevel@tonic-gate compile_startp); 5877c478bd9Sstevel@tonic-gate } else { 5887c478bd9Sstevel@tonic-gate can_repeat = B_FALSE; 5897c478bd9Sstevel@tonic-gate *compilep = '\0'; /* for debugging */ 5907c478bd9Sstevel@tonic-gate compilep++; 5917c478bd9Sstevel@tonic-gate *compilep = '\0'; /* for debugging */ 5927c478bd9Sstevel@tonic-gate compilep++; 5937c478bd9Sstevel@tonic-gate } 5947c478bd9Sstevel@tonic-gate break; /* end case LEFT_PAREN */ 5957c478bd9Sstevel@tonic-gate 5967c478bd9Sstevel@tonic-gate case RIGHT_PAREN: 5977c478bd9Sstevel@tonic-gate /* end of a marked group of regular expressions */ 5987c478bd9Sstevel@tonic-gate 5997c478bd9Sstevel@tonic-gate /* 6007c478bd9Sstevel@tonic-gate * (<regex>)$0-9 compiles to 6017c478bd9Sstevel@tonic-gate * <SAVED_GROUP><substringn><compiled_regex...>\ 6027c478bd9Sstevel@tonic-gate * <END_SAVED_GROUP><substringn><return_arg_number> 6037c478bd9Sstevel@tonic-gate * (<regex>)* compiles to 6047c478bd9Sstevel@tonic-gate * <ZERO_OR_MORE_GROUP|ADDED_LENGTH_BITS> 6057c478bd9Sstevel@tonic-gate * <group_length> <compiled_regex...> 6067c478bd9Sstevel@tonic-gate * <END_GROUP|ZERO_OR_MORE><groupn> 6077c478bd9Sstevel@tonic-gate * (<regex>)+ compiles to 6087c478bd9Sstevel@tonic-gate * <ONE_OR_MORE_GROUP|ADDED_LENGTH_BITS> 6097c478bd9Sstevel@tonic-gate * <group_length>\ 6107c478bd9Sstevel@tonic-gate * <compiled_regex...><END_GROUP|ONE_OR_MORE> 6117c478bd9Sstevel@tonic-gate * <groupn> 6127c478bd9Sstevel@tonic-gate * (<regex>){...} compiles to 6137c478bd9Sstevel@tonic-gate * <COUNTED_GROUP|ADDED_LENGTH_BITS><group_length>\ 6147c478bd9Sstevel@tonic-gate * <compiled_regex...><END_GROUP|COUNT><groupn>\ 6157c478bd9Sstevel@tonic-gate * <minimum_repeat_count><maximum_repeat_count> 6167c478bd9Sstevel@tonic-gate * otherwise (<regex>) compiles to 6177c478bd9Sstevel@tonic-gate * <SIMPLE_GROUP><blank><compiled_regex...> 6187c478bd9Sstevel@tonic-gate * <END_GROUP><groupn> 6197c478bd9Sstevel@tonic-gate * 6207c478bd9Sstevel@tonic-gate * NOTE: 6217c478bd9Sstevel@tonic-gate * 6227c478bd9Sstevel@tonic-gate * group_length + (256 * ADDED_LENGTH_BITS) == 6237c478bd9Sstevel@tonic-gate * length_of(<compiled_regex...><END_GROUP|...> 6247c478bd9Sstevel@tonic-gate * <groupn>) 6257c478bd9Sstevel@tonic-gate * which also == 6267c478bd9Sstevel@tonic-gate * length_of(<group_type|ADDED_LENGTH_BITS> 6277c478bd9Sstevel@tonic-gate * <group_length>\ <compiled_regex...>) 6287c478bd9Sstevel@tonic-gate * groupn no longer seems to be used, but the code 6297c478bd9Sstevel@tonic-gate * still computes it to preserve backward 6307c478bd9Sstevel@tonic-gate * compatibility 6317c478bd9Sstevel@tonic-gate * with earlier versions of regex(). 6327c478bd9Sstevel@tonic-gate */ 6337c478bd9Sstevel@tonic-gate 6347c478bd9Sstevel@tonic-gate /* RETRIEVE THE ADDRESS OF THE START OF THE GROUP */ 6357c478bd9Sstevel@tonic-gate 6367c478bd9Sstevel@tonic-gate regex_typep = pop_compilep(); 6377c478bd9Sstevel@tonic-gate if (regex_typep == (char *)0) { 6387c478bd9Sstevel@tonic-gate ERROR_EXIT(®cmp_lock, arg_listp, 6397c478bd9Sstevel@tonic-gate compile_startp); 6407c478bd9Sstevel@tonic-gate } 6417c478bd9Sstevel@tonic-gate char_size = get_wchar(¤t_char, regexp); 6427c478bd9Sstevel@tonic-gate if (char_size < 0) { 6437c478bd9Sstevel@tonic-gate ERROR_EXIT(®cmp_lock, arg_listp, 6447c478bd9Sstevel@tonic-gate compile_startp); 6457c478bd9Sstevel@tonic-gate } else if (char_size == 0) { 6467c478bd9Sstevel@tonic-gate *regex_typep = SIMPLE_GROUP; 6477c478bd9Sstevel@tonic-gate can_repeat = B_TRUE; 6487c478bd9Sstevel@tonic-gate *compilep = (unsigned char)END_GROUP; 6497c478bd9Sstevel@tonic-gate regex_typep = compilep; 6507c478bd9Sstevel@tonic-gate compilep++; 6517c478bd9Sstevel@tonic-gate *compilep = (unsigned char)groupn; 6527c478bd9Sstevel@tonic-gate groupn++; 6537c478bd9Sstevel@tonic-gate compilep++; 6547c478bd9Sstevel@tonic-gate } else if (current_char == DOLLAR_SIGN) { 6557c478bd9Sstevel@tonic-gate *regex_typep = SAVED_GROUP; 6567c478bd9Sstevel@tonic-gate regex_typep++; 6577c478bd9Sstevel@tonic-gate *regex_typep = (char)substringn; 6587c478bd9Sstevel@tonic-gate can_repeat = B_FALSE; 6597c478bd9Sstevel@tonic-gate regexp ++; 6607c478bd9Sstevel@tonic-gate return_arg_number = get_digit(regexp); 6617c478bd9Sstevel@tonic-gate if ((return_arg_number < 0) || 6627c478bd9Sstevel@tonic-gate (substringn >= NSUBSTRINGS)) { 6637c478bd9Sstevel@tonic-gate ERROR_EXIT(®cmp_lock, arg_listp, 6647c478bd9Sstevel@tonic-gate compile_startp); 6657c478bd9Sstevel@tonic-gate } 6667c478bd9Sstevel@tonic-gate regexp++; 6677c478bd9Sstevel@tonic-gate *compilep = (unsigned char)END_SAVED_GROUP; 6687c478bd9Sstevel@tonic-gate compilep++; 6697c478bd9Sstevel@tonic-gate *compilep = (unsigned char)substringn; 6707c478bd9Sstevel@tonic-gate substringn++; 6717c478bd9Sstevel@tonic-gate compilep++; 6727c478bd9Sstevel@tonic-gate *compilep = (unsigned char)return_arg_number; 6737c478bd9Sstevel@tonic-gate compilep++; 6747c478bd9Sstevel@tonic-gate } else { 6757c478bd9Sstevel@tonic-gate switch (current_char) { 6767c478bd9Sstevel@tonic-gate case STAR: 6777c478bd9Sstevel@tonic-gate *regex_typep = ZERO_OR_MORE_GROUP; 6787c478bd9Sstevel@tonic-gate break; 6797c478bd9Sstevel@tonic-gate case PLUS: 6807c478bd9Sstevel@tonic-gate *regex_typep = ONE_OR_MORE_GROUP; 6817c478bd9Sstevel@tonic-gate break; 6827c478bd9Sstevel@tonic-gate case LEFT_CURLY_BRACE: 6837c478bd9Sstevel@tonic-gate *regex_typep = COUNTED_GROUP; 6847c478bd9Sstevel@tonic-gate break; 6857c478bd9Sstevel@tonic-gate default: 6867c478bd9Sstevel@tonic-gate *regex_typep = SIMPLE_GROUP; 6877c478bd9Sstevel@tonic-gate } 6887c478bd9Sstevel@tonic-gate if (*regex_typep != SIMPLE_GROUP) { 6897c478bd9Sstevel@tonic-gate group_length = (unsigned int) 6907c478bd9Sstevel@tonic-gate (compilep - regex_typep); 6917c478bd9Sstevel@tonic-gate if (group_length >= 1024) { 6927c478bd9Sstevel@tonic-gate ERROR_EXIT(®cmp_lock, 6937c478bd9Sstevel@tonic-gate arg_listp, compile_startp); 6947c478bd9Sstevel@tonic-gate } 6957c478bd9Sstevel@tonic-gate high_bits = group_length >> 6967c478bd9Sstevel@tonic-gate TIMES_256_SHIFT; 6977c478bd9Sstevel@tonic-gate low_bits = group_length & 6987c478bd9Sstevel@tonic-gate SINGLE_BYTE_MASK; 6997c478bd9Sstevel@tonic-gate *regex_typep = 7007c478bd9Sstevel@tonic-gate (unsigned char) 7017c478bd9Sstevel@tonic-gate ((unsigned int) 7027c478bd9Sstevel@tonic-gate *regex_typep | high_bits); 7037c478bd9Sstevel@tonic-gate regex_typep++; 7047c478bd9Sstevel@tonic-gate *regex_typep = 7057c478bd9Sstevel@tonic-gate (unsigned char)low_bits; 7067c478bd9Sstevel@tonic-gate } 7077c478bd9Sstevel@tonic-gate can_repeat = B_TRUE; 7087c478bd9Sstevel@tonic-gate *compilep = (unsigned char)END_GROUP; 7097c478bd9Sstevel@tonic-gate regex_typep = compilep; 7107c478bd9Sstevel@tonic-gate compilep++; 7117c478bd9Sstevel@tonic-gate *compilep = (unsigned char)groupn; 7127c478bd9Sstevel@tonic-gate groupn++; 7137c478bd9Sstevel@tonic-gate compilep++; 7147c478bd9Sstevel@tonic-gate } 7157c478bd9Sstevel@tonic-gate 7167c478bd9Sstevel@tonic-gate break; /* end case RIGHT_PAREN */ 7177c478bd9Sstevel@tonic-gate 7187c478bd9Sstevel@tonic-gate case STAR: /* zero or more repetitions of the */ 7197c478bd9Sstevel@tonic-gate /* preceding expression */ 7207c478bd9Sstevel@tonic-gate 7217c478bd9Sstevel@tonic-gate /* 7227c478bd9Sstevel@tonic-gate * <regex...>* compiles to <regex_type|ZERO_OR_MORE>\ 7237c478bd9Sstevel@tonic-gate * <compiled_regex...> 7247c478bd9Sstevel@tonic-gate * (<regex...>)* compiles to 7257c478bd9Sstevel@tonic-gate * <ZERO_OR_MORE_GROUP|ADDED_LENGTH_BITS>\ 7267c478bd9Sstevel@tonic-gate * <group_length><compiled_regex...>\ 7277c478bd9Sstevel@tonic-gate * <END_GROUP|ZERO_OR_MORE><groupn> 7287c478bd9Sstevel@tonic-gate */ 7297c478bd9Sstevel@tonic-gate 7307c478bd9Sstevel@tonic-gate if (can_repeat == B_FALSE) { 7317c478bd9Sstevel@tonic-gate ERROR_EXIT(®cmp_lock, arg_listp, 7327c478bd9Sstevel@tonic-gate compile_startp); 7337c478bd9Sstevel@tonic-gate } else { 7347c478bd9Sstevel@tonic-gate can_repeat = B_FALSE; 7357c478bd9Sstevel@tonic-gate *regex_typep = (unsigned char) 7367c478bd9Sstevel@tonic-gate ((unsigned int)*regex_typep | ZERO_OR_MORE); 7377c478bd9Sstevel@tonic-gate } 7387c478bd9Sstevel@tonic-gate break; /* end case '*' */ 7397c478bd9Sstevel@tonic-gate 7407c478bd9Sstevel@tonic-gate case PLUS: 7417c478bd9Sstevel@tonic-gate /* one or more repetitions of the preceding */ 7427c478bd9Sstevel@tonic-gate /* expression */ 7437c478bd9Sstevel@tonic-gate 7447c478bd9Sstevel@tonic-gate /* 7457c478bd9Sstevel@tonic-gate * <regex...>+ compiles to <regex_type|ONE_OR_MORE>\ 7467c478bd9Sstevel@tonic-gate * <compiled_regex...> (<regex...>)+ compiles to 7477c478bd9Sstevel@tonic-gate * <ONE_OR_MORE_GROUP|ADDED_LENGTH_BITS>\ 7487c478bd9Sstevel@tonic-gate * <group_length><compiled_regex...>\ 7497c478bd9Sstevel@tonic-gate * <END_GROUP|ONE_OR_MORE><groupn> 7507c478bd9Sstevel@tonic-gate */ 7517c478bd9Sstevel@tonic-gate 7527c478bd9Sstevel@tonic-gate if (can_repeat == B_FALSE) { 7537c478bd9Sstevel@tonic-gate ERROR_EXIT(®cmp_lock, arg_listp, 7547c478bd9Sstevel@tonic-gate compile_startp); 7557c478bd9Sstevel@tonic-gate } else { 7567c478bd9Sstevel@tonic-gate can_repeat = B_FALSE; 7577c478bd9Sstevel@tonic-gate *regex_typep = 7587c478bd9Sstevel@tonic-gate (unsigned char)((unsigned int)* 7597c478bd9Sstevel@tonic-gate regex_typep | ONE_OR_MORE); 7607c478bd9Sstevel@tonic-gate } 7617c478bd9Sstevel@tonic-gate break; /* end case '+' */ 7627c478bd9Sstevel@tonic-gate 7637c478bd9Sstevel@tonic-gate case LEFT_CURLY_BRACE: 7647c478bd9Sstevel@tonic-gate 7657c478bd9Sstevel@tonic-gate /* 7667c478bd9Sstevel@tonic-gate * repeat the preceding regular expression 7677c478bd9Sstevel@tonic-gate * at least min_count times 7687c478bd9Sstevel@tonic-gate * and at most max_count times 7697c478bd9Sstevel@tonic-gate * 7707c478bd9Sstevel@tonic-gate * <regex...>{min_count} compiles to 7717c478bd9Sstevel@tonic-gate * <regex type|COUNT><compiled_regex...> 7727c478bd9Sstevel@tonic-gate * <min_count><min_count> 7737c478bd9Sstevel@tonic-gate * 7747c478bd9Sstevel@tonic-gate * <regex...>{min_count,} compiles to 7757c478bd9Sstevel@tonic-gate * <regex type|COUNT><compiled_regex...> 7767c478bd9Sstevel@tonic-gate * <min_count><UNLIMITED> 7777c478bd9Sstevel@tonic-gate * 7787c478bd9Sstevel@tonic-gate * <regex...>{min_count,max_count} compiles to 7797c478bd9Sstevel@tonic-gate * <regex type>|COUNT><compiled_regex...> 7807c478bd9Sstevel@tonic-gate * <min_count><max_count> 7817c478bd9Sstevel@tonic-gate * 7827c478bd9Sstevel@tonic-gate * (<regex...>){min_count,max_count} compiles to 7837c478bd9Sstevel@tonic-gate * <COUNTED_GROUP|ADDED_LENGTH_BITS><group_length>\ 7847c478bd9Sstevel@tonic-gate * <compiled_regex...><END_GROUP|COUNT><groupn>\ 7857c478bd9Sstevel@tonic-gate * <minimum_match_count><maximum_match_count> 7867c478bd9Sstevel@tonic-gate */ 7877c478bd9Sstevel@tonic-gate 7887c478bd9Sstevel@tonic-gate if (can_repeat == B_FALSE) { 7897c478bd9Sstevel@tonic-gate ERROR_EXIT(®cmp_lock, arg_listp, 7907c478bd9Sstevel@tonic-gate compile_startp); 7917c478bd9Sstevel@tonic-gate } 7927c478bd9Sstevel@tonic-gate can_repeat = B_FALSE; 7937c478bd9Sstevel@tonic-gate *regex_typep = (unsigned char)((unsigned int)* 7947c478bd9Sstevel@tonic-gate regex_typep | COUNT); 7957c478bd9Sstevel@tonic-gate count_length = get_count(&min_count, regexp); 7967c478bd9Sstevel@tonic-gate if (count_length <= 0) { 7977c478bd9Sstevel@tonic-gate ERROR_EXIT(®cmp_lock, arg_listp, 7987c478bd9Sstevel@tonic-gate compile_startp); 7997c478bd9Sstevel@tonic-gate } 8007c478bd9Sstevel@tonic-gate regexp += count_length; 8017c478bd9Sstevel@tonic-gate 8027c478bd9Sstevel@tonic-gate if (*regexp == RIGHT_CURLY_BRACE) { /* {min_count} */ 8037c478bd9Sstevel@tonic-gate regexp++; 8047c478bd9Sstevel@tonic-gate max_count = min_count; 8057c478bd9Sstevel@tonic-gate } else if (*regexp == COMMA) { /* {min_count,..} */ 8067c478bd9Sstevel@tonic-gate regexp++; 8077c478bd9Sstevel@tonic-gate /* {min_count,} */ 8087c478bd9Sstevel@tonic-gate if (*regexp == RIGHT_CURLY_BRACE) { 8097c478bd9Sstevel@tonic-gate regexp++; 8107c478bd9Sstevel@tonic-gate max_count = UNLIMITED; 8117c478bd9Sstevel@tonic-gate } else { /* {min_count,max_count} */ 8127c478bd9Sstevel@tonic-gate count_length = get_count( 8137c478bd9Sstevel@tonic-gate &max_count, regexp); 8147c478bd9Sstevel@tonic-gate if (count_length <= 0) { 8157c478bd9Sstevel@tonic-gate ERROR_EXIT(®cmp_lock, 8167c478bd9Sstevel@tonic-gate arg_listp, compile_startp); 8177c478bd9Sstevel@tonic-gate } 8187c478bd9Sstevel@tonic-gate regexp += count_length; 8197c478bd9Sstevel@tonic-gate if (*regexp != RIGHT_CURLY_BRACE) { 8207c478bd9Sstevel@tonic-gate ERROR_EXIT(®cmp_lock, 8217c478bd9Sstevel@tonic-gate arg_listp, compile_startp); 8227c478bd9Sstevel@tonic-gate } 8237c478bd9Sstevel@tonic-gate regexp++; 8247c478bd9Sstevel@tonic-gate } 8257c478bd9Sstevel@tonic-gate } else { /* invalid expression */ 8267c478bd9Sstevel@tonic-gate ERROR_EXIT(®cmp_lock, arg_listp, 8277c478bd9Sstevel@tonic-gate compile_startp); 8287c478bd9Sstevel@tonic-gate } 8297c478bd9Sstevel@tonic-gate 8307c478bd9Sstevel@tonic-gate if ((min_count > MAX_SINGLE_BYTE_INT) || 8317c478bd9Sstevel@tonic-gate ((max_count != UNLIMITED) && 8327c478bd9Sstevel@tonic-gate (min_count > max_count))) { 8337c478bd9Sstevel@tonic-gate ERROR_EXIT(®cmp_lock, arg_listp, 8347c478bd9Sstevel@tonic-gate compile_startp); 8357c478bd9Sstevel@tonic-gate } else { 8367c478bd9Sstevel@tonic-gate *compilep = (unsigned char)min_count; 8377c478bd9Sstevel@tonic-gate compilep++; 8387c478bd9Sstevel@tonic-gate *compilep = (unsigned char)max_count; 8397c478bd9Sstevel@tonic-gate compilep++; 8407c478bd9Sstevel@tonic-gate } 8417c478bd9Sstevel@tonic-gate break; /* end case LEFT_CURLY_BRACE */ 8427c478bd9Sstevel@tonic-gate 8437c478bd9Sstevel@tonic-gate default: /* a single non-special character */ 8447c478bd9Sstevel@tonic-gate 8457c478bd9Sstevel@tonic-gate /* 8467c478bd9Sstevel@tonic-gate * compiles to <ASCII_CHAR><ascii_char> or 8477c478bd9Sstevel@tonic-gate * <MULTIBYTE_CHAR><multibyte_char> 8487c478bd9Sstevel@tonic-gate */ 8497c478bd9Sstevel@tonic-gate 8507c478bd9Sstevel@tonic-gate can_repeat = B_TRUE; 8517c478bd9Sstevel@tonic-gate regex_typep = compilep; 8527c478bd9Sstevel@tonic-gate expr_length = add_single_char_expr(compilep, 8537c478bd9Sstevel@tonic-gate current_char); 8547c478bd9Sstevel@tonic-gate compilep += expr_length; 8557c478bd9Sstevel@tonic-gate 8567c478bd9Sstevel@tonic-gate } /* end switch (current_char) */ 8577c478bd9Sstevel@tonic-gate 8587c478bd9Sstevel@tonic-gate /* GET THE NEXT CHARACTER FOR THE WHILE LOOP */ 8597c478bd9Sstevel@tonic-gate 8607c478bd9Sstevel@tonic-gate char_size = get_wchar(¤t_char, regexp); 8617c478bd9Sstevel@tonic-gate if (char_size < 0) { 8627c478bd9Sstevel@tonic-gate ERROR_EXIT(®cmp_lock, arg_listp, compile_startp); 8637c478bd9Sstevel@tonic-gate } else if (char_size > 0) { 8647c478bd9Sstevel@tonic-gate regexp += char_size; 8657c478bd9Sstevel@tonic-gate } else if /* (char_size == 0) && */ (next_argp != (char *)0) { 8667c478bd9Sstevel@tonic-gate regexp = next_argp; 8677c478bd9Sstevel@tonic-gate next_argp = va_arg(arg_listp, /* const */ char *); 8687c478bd9Sstevel@tonic-gate char_size = get_wchar(¤t_char, regexp); 8697c478bd9Sstevel@tonic-gate if (char_size <= 0) { 8707c478bd9Sstevel@tonic-gate ERROR_EXIT(®cmp_lock, arg_listp, 8717c478bd9Sstevel@tonic-gate compile_startp); 8727c478bd9Sstevel@tonic-gate } else { 8737c478bd9Sstevel@tonic-gate regexp += char_size; 8747c478bd9Sstevel@tonic-gate } 8757c478bd9Sstevel@tonic-gate } else /* ((char_size == 0) && (next_argp == (char *)0)) */ { 8767c478bd9Sstevel@tonic-gate if (pop_compilep() != (char *)0) { 8777c478bd9Sstevel@tonic-gate /* unmatched parentheses */ 8787c478bd9Sstevel@tonic-gate ERROR_EXIT(®cmp_lock, arg_listp, 8797c478bd9Sstevel@tonic-gate compile_startp); 8807c478bd9Sstevel@tonic-gate } 8817c478bd9Sstevel@tonic-gate *compilep = (unsigned char)END_REGEX; 8827c478bd9Sstevel@tonic-gate compilep++; 8837c478bd9Sstevel@tonic-gate *compilep = '\0'; 8847c478bd9Sstevel@tonic-gate compilep++; 8857c478bd9Sstevel@tonic-gate __i_size = (int)(compilep - compile_startp); 8867c478bd9Sstevel@tonic-gate va_end(arg_listp); 8877c478bd9Sstevel@tonic-gate lmutex_unlock(®cmp_lock); 8887c478bd9Sstevel@tonic-gate return (compile_startp); 8897c478bd9Sstevel@tonic-gate } 8907c478bd9Sstevel@tonic-gate } /* end for (;;) */ 8917c478bd9Sstevel@tonic-gate 8927c478bd9Sstevel@tonic-gate } /* regcmp() */ 8937c478bd9Sstevel@tonic-gate 8947c478bd9Sstevel@tonic-gate 8957c478bd9Sstevel@tonic-gate /* DEFINITIONS OF PRIVATE FUNCTIONS */ 8967c478bd9Sstevel@tonic-gate 8977c478bd9Sstevel@tonic-gate static int 8987c478bd9Sstevel@tonic-gate add_char(char *compilep, wchar_t wchar) 8997c478bd9Sstevel@tonic-gate { 9007c478bd9Sstevel@tonic-gate int expr_length; 9017c478bd9Sstevel@tonic-gate 9027c478bd9Sstevel@tonic-gate if ((unsigned int)wchar <= (unsigned int)0x7f) { 9037c478bd9Sstevel@tonic-gate *compilep = (unsigned char)wchar; 9047c478bd9Sstevel@tonic-gate expr_length = 1; 9057c478bd9Sstevel@tonic-gate } else { 9067c478bd9Sstevel@tonic-gate expr_length = wctomb(compilep, wchar); 9077c478bd9Sstevel@tonic-gate } 9087c478bd9Sstevel@tonic-gate return (expr_length); 9097c478bd9Sstevel@tonic-gate } 9107c478bd9Sstevel@tonic-gate 9117c478bd9Sstevel@tonic-gate static int 9127c478bd9Sstevel@tonic-gate add_single_char_expr(char *compilep, wchar_t wchar) 9137c478bd9Sstevel@tonic-gate { 9147c478bd9Sstevel@tonic-gate int expr_length = 0; 9157c478bd9Sstevel@tonic-gate 9167c478bd9Sstevel@tonic-gate if ((unsigned int)wchar <= (unsigned int)0x7f) { 9177c478bd9Sstevel@tonic-gate *compilep = (unsigned char)ASCII_CHAR; 9187c478bd9Sstevel@tonic-gate compilep++; 9197c478bd9Sstevel@tonic-gate *compilep = (unsigned char)wchar; 9207c478bd9Sstevel@tonic-gate expr_length += 2; 9217c478bd9Sstevel@tonic-gate } else { 9227c478bd9Sstevel@tonic-gate *compilep = (unsigned char)MULTIBYTE_CHAR; 9237c478bd9Sstevel@tonic-gate compilep++; 9247c478bd9Sstevel@tonic-gate expr_length++; 9257c478bd9Sstevel@tonic-gate expr_length += wctomb(compilep, wchar); 9267c478bd9Sstevel@tonic-gate } 9277c478bd9Sstevel@tonic-gate return (expr_length); 9287c478bd9Sstevel@tonic-gate } 9297c478bd9Sstevel@tonic-gate 9307c478bd9Sstevel@tonic-gate static int 9317c478bd9Sstevel@tonic-gate get_count(int *countp, const char *regexp) 9327c478bd9Sstevel@tonic-gate { 9337c478bd9Sstevel@tonic-gate char count_char = '0'; 9347c478bd9Sstevel@tonic-gate int count = 0; 9357c478bd9Sstevel@tonic-gate int count_length = 0; 9367c478bd9Sstevel@tonic-gate 9377c478bd9Sstevel@tonic-gate if (regexp == (char *)0) { 9387c478bd9Sstevel@tonic-gate return ((int)0); 9397c478bd9Sstevel@tonic-gate } else { 9407c478bd9Sstevel@tonic-gate count_char = *regexp; 9417c478bd9Sstevel@tonic-gate while (('0' <= count_char) && (count_char <= '9')) { 9427c478bd9Sstevel@tonic-gate count = (10 * count) + (int)(count_char - '0'); 9437c478bd9Sstevel@tonic-gate count_length++; 9447c478bd9Sstevel@tonic-gate regexp++; 9457c478bd9Sstevel@tonic-gate count_char = *regexp; 9467c478bd9Sstevel@tonic-gate } 9477c478bd9Sstevel@tonic-gate } 9487c478bd9Sstevel@tonic-gate *countp = count; 9497c478bd9Sstevel@tonic-gate return (count_length); 9507c478bd9Sstevel@tonic-gate } 9517c478bd9Sstevel@tonic-gate 9527c478bd9Sstevel@tonic-gate static int 9537c478bd9Sstevel@tonic-gate get_digit(const char *regexp) 9547c478bd9Sstevel@tonic-gate { 9557c478bd9Sstevel@tonic-gate char digit; 9567c478bd9Sstevel@tonic-gate 9577c478bd9Sstevel@tonic-gate if (regexp == (char *)0) { 9587c478bd9Sstevel@tonic-gate return ((int)-1); 9597c478bd9Sstevel@tonic-gate } else { 9607c478bd9Sstevel@tonic-gate digit = *regexp; 9617c478bd9Sstevel@tonic-gate if (('0' <= digit) && (digit <= '9')) { 9627c478bd9Sstevel@tonic-gate return ((int)(digit - '0')); 9637c478bd9Sstevel@tonic-gate } else { 9647c478bd9Sstevel@tonic-gate return ((int)-1); 9657c478bd9Sstevel@tonic-gate } 9667c478bd9Sstevel@tonic-gate } 9677c478bd9Sstevel@tonic-gate } 9687c478bd9Sstevel@tonic-gate 9697c478bd9Sstevel@tonic-gate static int 9707c478bd9Sstevel@tonic-gate get_wchar(wchar_t *wcharp, const char *regexp) 9717c478bd9Sstevel@tonic-gate { 9727c478bd9Sstevel@tonic-gate int char_size; 9737c478bd9Sstevel@tonic-gate 9747c478bd9Sstevel@tonic-gate if (regexp == (char *)0) { 9757c478bd9Sstevel@tonic-gate char_size = 0; 9767c478bd9Sstevel@tonic-gate *wcharp = (wchar_t)((unsigned int)'\0'); 9777c478bd9Sstevel@tonic-gate } else if (*regexp == '\0') { 9787c478bd9Sstevel@tonic-gate char_size = 0; 9797c478bd9Sstevel@tonic-gate *wcharp = (wchar_t)((unsigned int)*regexp); 9807c478bd9Sstevel@tonic-gate } else if ((unsigned char)*regexp <= (unsigned char)0x7f) { 9817c478bd9Sstevel@tonic-gate char_size = 1; 9827c478bd9Sstevel@tonic-gate *wcharp = (wchar_t)((unsigned int)*regexp); 9837c478bd9Sstevel@tonic-gate } else { 9847c478bd9Sstevel@tonic-gate char_size = mbtowc(wcharp, regexp, MB_LEN_MAX); 9857c478bd9Sstevel@tonic-gate } 9867c478bd9Sstevel@tonic-gate return (char_size); 9877c478bd9Sstevel@tonic-gate } 9887c478bd9Sstevel@tonic-gate 9897c478bd9Sstevel@tonic-gate static char * 9907c478bd9Sstevel@tonic-gate pop_compilep(void) 9917c478bd9Sstevel@tonic-gate { 9927c478bd9Sstevel@tonic-gate char *compilep; 9937c478bd9Sstevel@tonic-gate 9947c478bd9Sstevel@tonic-gate if (compilep_stackp >= &compilep_stack[STRINGP_STACK_SIZE]) { 9957c478bd9Sstevel@tonic-gate return ((char *)0); 9967c478bd9Sstevel@tonic-gate } else { 9977c478bd9Sstevel@tonic-gate compilep = *compilep_stackp; 9987c478bd9Sstevel@tonic-gate compilep_stackp++; 9997c478bd9Sstevel@tonic-gate return (compilep); 10007c478bd9Sstevel@tonic-gate } 10017c478bd9Sstevel@tonic-gate } 10027c478bd9Sstevel@tonic-gate 10037c478bd9Sstevel@tonic-gate static char * 10047c478bd9Sstevel@tonic-gate push_compilep(char *compilep) 10057c478bd9Sstevel@tonic-gate { 10067c478bd9Sstevel@tonic-gate if (compilep_stackp <= &compilep_stack[0]) { 10077c478bd9Sstevel@tonic-gate return ((char *)0); 10087c478bd9Sstevel@tonic-gate } else { 10097c478bd9Sstevel@tonic-gate compilep_stackp--; 10107c478bd9Sstevel@tonic-gate *compilep_stackp = compilep; 10117c478bd9Sstevel@tonic-gate return (compilep); 10127c478bd9Sstevel@tonic-gate } 10137c478bd9Sstevel@tonic-gate } 10147c478bd9Sstevel@tonic-gate 10157c478bd9Sstevel@tonic-gate static boolean_t 10167c478bd9Sstevel@tonic-gate valid_range(wchar_t lower_char, wchar_t upper_char) 10177c478bd9Sstevel@tonic-gate { 10187c478bd9Sstevel@tonic-gate return (((lower_char <= 0x7f) && (upper_char <= 0x7f) && 10197c478bd9Sstevel@tonic-gate !iswcntrl(lower_char) && !iswcntrl(upper_char) && 10207c478bd9Sstevel@tonic-gate (lower_char < upper_char)) || 10217c478bd9Sstevel@tonic-gate (((lower_char & WCHAR_CSMASK) == 10227c478bd9Sstevel@tonic-gate (upper_char & WCHAR_CSMASK)) && 10237c478bd9Sstevel@tonic-gate (lower_char < upper_char))); 10247c478bd9Sstevel@tonic-gate } 1025