xref: /titanic_50/usr/src/lib/libc/port/regex/regex.c (revision 3eae19d9cf3390cf5b75e10c9c1945fd36ad856a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 
23 /*
24  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
25  * Use is subject to license terms.
26  */
27 
28 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
29 /*	  All Rights Reserved	*/
30 
31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
32 
33 /*
34  * IMPORTANT NOTE:
35  *
36  * regex() WORKS **ONLY** WITH THE ASCII AND THE Solaris EUC CHARACTER SETS.
37  * IT IS **NOT** CHARACTER SET INDEPENDENT.
38  *
39  */
40 
41 #pragma weak regex = _regex
42 
43 /* CONSTANTS SHARED WITH regcmp() */
44 #include "regex.h"
45 
46 #include "synonyms.h"
47 #include "mtlib.h"
48 #include <limits.h>
49 #include <stdarg.h>
50 #include <stdlib.h>
51 #include <thread.h>
52 #include <widec.h>
53 #include "tsd.h"
54 
55 
56 /* PRIVATE CONSTANTS */
57 
58 #define	ADD_256_TO_GROUP_LENGTH	0x1
59 #define	ADD_512_TO_GROUP_LENGTH	0x2
60 #define	ADD_768_TO_GROUP_LENGTH	0x3
61 #define	ADDED_LENGTH_BITS	0x3
62 #define	SINGLE_BYTE_MASK	0xff
63 #define	STRINGP_STACK_SIZE	50
64 
65 
66 /* PRIVATE TYPE DEFINITIONS */
67 
68 typedef enum {
69 	NOT_IN_CLASS = 0,
70 	IN_CLASS
71 } char_test_condition_t;
72 
73 typedef enum {
74 	TESTING_CHAR = 0,
75 	CONDITION_TRUE,
76 	CONDITION_FALSE,
77 	CHAR_TEST_ERROR
78 } char_test_result_t;
79 
80 
81 /* PRIVATE GLOBAL VARIABLES */
82 
83 static mutex_t		regex_lock = DEFAULTMUTEX;
84 static int		return_arg_number[NSUBSTRINGS];
85 static const char	*substring_endp[NSUBSTRINGS];
86 static const char	*substring_startp[NSUBSTRINGS];
87 static const char	*stringp_stack[STRINGP_STACK_SIZE];
88 static const char	**stringp_stackp;
89 
90 
91 /* DECLARATIONS OF PRIVATE FUNCTIONS */
92 
93 static int
94 get_wchar(wchar_t *wcharp,
95 	const char *stringp);
96 
97 static void
98 get_match_counts(int *nmust_matchp,
99 	int *nextra_matches_allowedp,
100 	const char *count_stringp);
101 
102 static boolean_t
103 in_wchar_range(wchar_t test_char,
104 	wchar_t lower_char,
105 	wchar_t upper_char);
106 
107 static const char *
108 pop_stringp(void);
109 
110 static const char *
111 previous_charp(const char *current_charp);
112 
113 static const char *
114 push_stringp(const char *stringp);
115 
116 static char_test_result_t
117 test_char_against_ascii_class(char test_char,
118 	const char *classp,
119 	char_test_condition_t test_condition);
120 
121 static char_test_result_t
122 test_char_against_multibyte_class(wchar_t test_char,
123 	const char *classp,
124 	char_test_condition_t test_condition);
125 
126 
127 /* FOR COMPATIBILITY WITH PREVIOUS ASCII VERSIONS OF regcmp() */
128 
129 static char_test_result_t
130 test_char_against_old_ascii_class(char test_char,
131 	const char *classp,
132 	char_test_condition_t test_condition);
133 
134 static const char *
135 test_repeated_ascii_char(const char *repeat_startp,
136 	const char *stringp,
137 	const char *regexp);
138 
139 static const char *
140 test_repeated_multibyte_char(const char *repeat_startp,
141 	const char *stringp,
142 	const char *regexp);
143 
144 static const char *
145 test_repeated_group(const char *repeat_startp,
146 	const char *stringp,
147 	const char *regexp);
148 
149 static const char *
150 test_string(const char *stringp,
151 	const char *regexp);
152 
153 
154 /* DEFINITIONS OF PUBLIC VARIABLES */
155 
156 char *__loc1;
157 
158 /*
159  * reserve thread-specific storage for __loc1
160  */
161 char **
162 ____loc1(void)
163 {
164 	if (_thr_main())
165 		return (&__loc1);
166 	return ((char **)tsdalloc(_T_REGEX_LOC1, sizeof (char *), NULL));
167 }
168 
169 #define	__loc1 (*(____loc1()))
170 
171 /* DEFINITION OF regex() */
172 
173 extern char *
174 _regex(const char *regexp,
175 	const char *stringp, ...)
176 {
177 	va_list		arg_listp;
178 	int		char_size;
179 	const char	*end_of_matchp;
180 	wchar_t		regex_wchar;
181 	char		*return_argp[NSUBSTRINGS];
182 	char		*returned_substringp;
183 	int		substringn;
184 	const char	*substringp;
185 	wchar_t		string_wchar;
186 
187 	if (____loc1() == (char **)0) {
188 	    return ((char *)0);
189 	} else {
190 	    lmutex_lock(&regex_lock);
191 	    __loc1 = (char *)0;
192 	}
193 
194 	if ((stringp == (char *)0) || (regexp == (char *)0)) {
195 	    lmutex_unlock(&regex_lock);
196 	return ((char *)0);
197 	}
198 
199 
200 	/* INITIALIZE SUBSTRINGS THAT MIGHT BE RETURNED IN VARARGS  */
201 
202 	substringn = 0;
203 	va_start(arg_listp, stringp);
204 	while (substringn < NSUBSTRINGS) {
205 	    return_argp[substringn] = va_arg(arg_listp, char *);
206 	    substring_startp[substringn] = (char *)0;
207 	    return_arg_number[substringn] = -1;
208 	    substringn++;
209 	}
210 	va_end(arg_listp);
211 
212 
213 	/* TEST THE STRING AGAINST THE REGULAR EXPRESSION */
214 
215 	end_of_matchp = (char *)0;
216 	stringp_stackp = &stringp_stack[STRINGP_STACK_SIZE];
217 
218 	if ((int)*regexp == (int)START_OF_STRING_MARK) {
219 
220 	/*
221 	 * the match must start at the beginning of the string
222 	 */
223 
224 	    __loc1 = (char *)stringp;
225 	    regexp++;
226 	    end_of_matchp = test_string(stringp, regexp);
227 
228 	} else if ((int)*regexp == (int)ASCII_CHAR) {
229 
230 	/*
231 	 * test a string against a regular expression
232 	 * that starts with a single ASCII character:
233 	 *
234 	 * move to each character in the string that matches
235 	 * the first character in the regular expression
236 	 * and test the remaining string
237 	 */
238 
239 	    while ((*stringp != *(regexp + 1)) && (*stringp != '\0')) {
240 		stringp++;
241 	    }
242 	    while ((end_of_matchp == (char *)0) && (*stringp != '\0')) {
243 		end_of_matchp = test_string(stringp, regexp);
244 		if (end_of_matchp != (char *)0) {
245 		    __loc1 = (char *)stringp;
246 		} else {
247 		    stringp++;
248 		    while ((*stringp != *(regexp + 1)) && (*stringp != '\0')) {
249 			stringp++;
250 		    }
251 		}
252 	    }
253 
254 	} else if (!multibyte) {
255 
256 	/*
257 	 * if the value of the "multibyte" macro defined in <euc.h>
258 	 * is false, regex() is running in an ASCII locale;
259 	 * test an ASCII string against an ASCII regular expression
260 	 * that doesn't start with a single ASCII character:
261 	 *
262 	 * move forward in the string one byte at a time, testing
263 	 * the remaining string against the regular expression
264 	 */
265 
266 	    end_of_matchp = test_string(stringp, regexp);
267 	    while ((end_of_matchp == (char *)0) && (*stringp != '\0')) {
268 		stringp++;
269 		end_of_matchp = test_string(stringp, regexp);
270 	    }
271 	    if (end_of_matchp != (char *)0) {
272 		__loc1 = (char *)stringp;
273 	    }
274 
275 	} else if ((int)*regexp == (int)MULTIBYTE_CHAR) {
276 
277 	/*
278 	 * test a multibyte string against a multibyte regular expression
279 	 * that starts with a single multibyte character:
280 	 *
281 	 * move to each character in the string that matches
282 	 * the first character in the regular expression
283 	 * and test the remaining string
284 	 */
285 
286 	    (void) get_wchar(&regex_wchar, regexp + 1);
287 	    char_size = get_wchar(&string_wchar, stringp);
288 	    while ((string_wchar != regex_wchar) && (char_size > 0)) {
289 		stringp += char_size;
290 		char_size = get_wchar(&string_wchar, stringp);
291 	    }
292 	    while ((end_of_matchp == (char *)0) && (char_size > 0)) {
293 		end_of_matchp = test_string(stringp, regexp);
294 		if (end_of_matchp != (char *)0) {
295 		    __loc1 = (char *)stringp;
296 		} else {
297 		    stringp += char_size;
298 		    char_size = get_wchar(&string_wchar, stringp);
299 		    while ((string_wchar != regex_wchar) && (char_size > 0)) {
300 			stringp += char_size;
301 			char_size = get_wchar(&string_wchar, stringp);
302 		    }
303 		}
304 	    }
305 
306 	} else {
307 
308 	/*
309 	 * test a multibyte string against a multibyte regular expression
310 	 * that doesn't start with a single multibyte character
311 	 *
312 	 * move forward in the string one multibyte character at a time,
313 	 * testing the remaining string against the regular expression
314 	 */
315 
316 	    end_of_matchp = test_string(stringp, regexp);
317 	    char_size = get_wchar(&string_wchar, stringp);
318 	    while ((end_of_matchp == (char *)0) && (char_size > 0)) {
319 		stringp += char_size;
320 		end_of_matchp = test_string(stringp, regexp);
321 		char_size = get_wchar(&string_wchar, stringp);
322 	    }
323 	    if (end_of_matchp != (char *)0) {
324 		__loc1 = (char *)stringp;
325 	    }
326 	}
327 
328 	/*
329 	 * Return substrings that matched subexpressions for which
330 	 * matching substrings are to be returned.
331 	 *
332 	 * NOTE:
333 	 *
334 	 * According to manual page regcmp(3G), regex() returns substrings
335 	 * that match subexpressions even when no substring matches the
336 	 * entire regular expression.
337 	 */
338 
339 	substringn = 0;
340 	while (substringn < NSUBSTRINGS) {
341 	    substringp = substring_startp[substringn];
342 	    if ((substringp != (char *)0) &&
343 		(return_arg_number[substringn] >= 0)) {
344 		returned_substringp =
345 		    return_argp[return_arg_number[substringn]];
346 		if (returned_substringp != (char *)0) {
347 		    while (substringp < substring_endp[substringn]) {
348 			*returned_substringp = (char)*substringp;
349 			returned_substringp++;
350 			substringp++;
351 		    }
352 		    *returned_substringp = '\0';
353 		}
354 	    }
355 	    substringn++;
356 	}
357 	lmutex_unlock(&regex_lock);
358 	return ((char *)end_of_matchp);
359 }  /* regex() */
360 
361 
362 /* DEFINITIONS OF PRIVATE FUNCTIONS */
363 
364 static int
365 get_wchar(wchar_t *wcharp,
366 	const char *stringp)
367 {
368 	int char_size;
369 
370 	if (stringp == (char *)0) {
371 	    char_size = 0;
372 	    *wcharp = (wchar_t)((unsigned int)'\0');
373 	} else if (*stringp == '\0') {
374 	    char_size = 0;
375 	    *wcharp = (wchar_t)((unsigned int)*stringp);
376 	} else if ((unsigned char)*stringp <= (unsigned char)0x7f) {
377 	    char_size = 1;
378 	    *wcharp = (wchar_t)((unsigned int)*stringp);
379 	} else {
380 	    char_size = mbtowc(wcharp, stringp, MB_LEN_MAX);
381 	}
382 	return (char_size);
383 }
384 
385 static void
386 get_match_counts(int *nmust_matchp,
387 	int *nextra_matches_allowedp,
388 	const char *count_stringp)
389 {
390 	int minimum_match_count;
391 	int maximum_match_count;
392 
393 	minimum_match_count =
394 	    (int)((unsigned int)*count_stringp & SINGLE_BYTE_MASK);
395 	*nmust_matchp = minimum_match_count;
396 
397 	count_stringp++;
398 	maximum_match_count =
399 	    (int)((unsigned int)*count_stringp & SINGLE_BYTE_MASK);
400 	if (maximum_match_count == (int)UNLIMITED) {
401 	    *nextra_matches_allowedp = (int)UNLIMITED;
402 	} else {
403 	    *nextra_matches_allowedp =
404 		maximum_match_count - minimum_match_count;
405 	}
406 	return;
407 
408 } /* get_match_counts() */
409 
410 static boolean_t
411 in_wchar_range(wchar_t test_char,
412 	wchar_t lower_char,
413 	wchar_t upper_char)
414 {
415 	return (((lower_char <= 0x7f) && (upper_char <= 0x7f) &&
416 	    (lower_char <= test_char) && (test_char <= upper_char)) ||
417 	    (((test_char & WCHAR_CSMASK) == (lower_char & WCHAR_CSMASK)) &&
418 	    ((test_char & WCHAR_CSMASK) == (upper_char & WCHAR_CSMASK)) &&
419 	    (lower_char <= test_char) && (test_char <= upper_char)));
420 
421 } /* in_wchar_range() */
422 
423 static const char *
424 pop_stringp(void)
425 {
426 	const char *stringp;
427 
428 	if (stringp_stackp >= &stringp_stack[STRINGP_STACK_SIZE]) {
429 	    return ((char *)0);
430 	} else {
431 	    stringp = *stringp_stackp;
432 	    stringp_stackp++;
433 	    return (stringp);
434 	}
435 }
436 
437 
438 static const char *
439 previous_charp(const char *current_charp)
440 {
441 	/*
442 	 * returns the pointer to the previous character in
443 	 * a string of multibyte characters
444 	 */
445 
446 	const char *prev_cs0 = current_charp - 1;
447 	const char *prev_cs1 = current_charp - eucw1;
448 	const char *prev_cs2 = current_charp - eucw2 - 1;
449 	const char *prev_cs3 = current_charp - eucw3 - 1;
450 	const char *prev_charp;
451 
452 	if ((unsigned char)*prev_cs0 <= 0x7f) {
453 	    prev_charp = prev_cs0;
454 	} else if ((unsigned char)*prev_cs2 == SS2) {
455 	    prev_charp = prev_cs2;
456 	} else if ((unsigned char)*prev_cs3 == SS3) {
457 	    prev_charp = prev_cs3;
458 	} else {
459 	    prev_charp = prev_cs1;
460 	}
461 	return (prev_charp);
462 
463 } /* previous_charp() */
464 
465 static const char *
466 push_stringp(const char *stringp)
467 {
468 	if (stringp_stackp <= &stringp_stack[0]) {
469 	    return ((char *)0);
470 	} else {
471 	    stringp_stackp--;
472 	    *stringp_stackp = stringp;
473 	    return (stringp);
474 	}
475 }
476 
477 
478 static char_test_result_t
479 test_char_against_ascii_class(char test_char,
480 	const char *classp,
481 	char_test_condition_t test_condition)
482 {
483 	/*
484 	 * tests a character for membership in an ASCII character class compiled
485 	 * by the internationalized version of regcmp();
486 	 *
487 	 * NOTE: The internationalized version of regcmp() compiles
488 	 * 	the range a-z in an ASCII character class to aTHRUz.
489 	 */
490 
491 	int	nbytes_to_check;
492 
493 	nbytes_to_check = (int)*classp;
494 	classp++;
495 	nbytes_to_check--;
496 
497 	while (nbytes_to_check > 0) {
498 	    if (test_char == *classp) {
499 		if (test_condition == IN_CLASS)
500 		    return (CONDITION_TRUE);
501 		else
502 		    return (CONDITION_FALSE);
503 	    } else if (*classp == THRU) {
504 		if ((*(classp - 1) <= test_char) &&
505 		    (test_char <= *(classp + 1))) {
506 		    if (test_condition == IN_CLASS)
507 			return (CONDITION_TRUE);
508 		    else
509 			return (CONDITION_FALSE);
510 		} else {
511 		    classp += 2;
512 		    nbytes_to_check -= 2;
513 		}
514 	    } else {
515 		classp++;
516 		nbytes_to_check--;
517 	    }
518 	}
519 	if (test_condition == NOT_IN_CLASS) {
520 	    return (CONDITION_TRUE);
521 	} else {
522 	    return (CONDITION_FALSE);
523 	}
524 } /* test_char_against_ascii_class() */
525 
526 static char_test_result_t
527 test_char_against_multibyte_class(wchar_t test_char,
528 	const char *classp,
529 	char_test_condition_t test_condition)
530 {
531 	/*
532 	 * tests a character for membership in a multibyte character class;
533 	 *
534 	 * NOTE: The range a-z in a multibyte character class compiles to
535 	 * 	aTHRUz.
536 	 */
537 
538 	int		char_size;
539 	wchar_t		current_char;
540 	int		nbytes_to_check;
541 	wchar_t		previous_char;
542 
543 	nbytes_to_check = (int)*classp;
544 	classp++;
545 	nbytes_to_check--;
546 
547 	char_size = get_wchar(&current_char, classp);
548 	if (char_size <= 0) {
549 	    return (CHAR_TEST_ERROR);
550 	} else if (test_char == current_char) {
551 	    if (test_condition == IN_CLASS) {
552 		return (CONDITION_TRUE);
553 	    } else {
554 		return (CONDITION_FALSE);
555 	    }
556 	} else {
557 	    classp += char_size;
558 	    nbytes_to_check -= char_size;
559 	}
560 
561 	while (nbytes_to_check > 0) {
562 	    previous_char = current_char;
563 	    char_size = get_wchar(&current_char, classp);
564 	    if (char_size <= 0) {
565 		return (CHAR_TEST_ERROR);
566 	    } else if (test_char == current_char) {
567 		if (test_condition == IN_CLASS) {
568 		    return (CONDITION_TRUE);
569 		} else {
570 		    return (CONDITION_FALSE);
571 		}
572 	    } else if (current_char == THRU) {
573 		classp += char_size;
574 		nbytes_to_check -= char_size;
575 		char_size = get_wchar(&current_char, classp);
576 		if (char_size <= 0) {
577 		    return (CHAR_TEST_ERROR);
578 		} else if (in_wchar_range(test_char, previous_char,
579 		    current_char)) {
580 		    if (test_condition == IN_CLASS) {
581 			return (CONDITION_TRUE);
582 		    } else {
583 			return (CONDITION_FALSE);
584 		    }
585 		} else {
586 		    classp += char_size;
587 		    nbytes_to_check -= char_size;
588 		}
589 	    } else {
590 		classp += char_size;
591 		nbytes_to_check -= char_size;
592 	    }
593 	}
594 	if (test_condition == NOT_IN_CLASS) {
595 	    return (CONDITION_TRUE);
596 	} else {
597 	    return (CONDITION_FALSE);
598 	}
599 } /* test_char_against_multibyte_class() */
600 
601 
602 /* FOR COMPATIBILITY WITH PREVIOUS ASCII VERSIONS OF regcmp() */
603 
604 static char_test_result_t
605 test_char_against_old_ascii_class(char test_char,
606 	const char *classp,
607 	char_test_condition_t test_condition)
608 {
609 	/*
610 	 * tests a character for membership in an ASCII character class compiled
611 	 * by the ASCII version of regcmp();
612 	 *
613 	 * NOTE: ASCII versions of regcmp() compile the range a-z in an
614 	 *	ASCII character class to THRUaz.  The internationalized
615 	 *	version compiles the same range to aTHRUz.
616 	 */
617 
618 	int	nbytes_to_check;
619 
620 	nbytes_to_check = (int)*classp;
621 	classp++;
622 	nbytes_to_check--;
623 
624 	while (nbytes_to_check > 0) {
625 	    if (test_char == *classp) {
626 		if (test_condition == IN_CLASS) {
627 		    return (CONDITION_TRUE);
628 		} else {
629 		    return (CONDITION_FALSE);
630 		}
631 	    } else if (*classp == THRU) {
632 		if ((*(classp + 1) <= test_char) &&
633 		    (test_char <= *(classp + 2))) {
634 		    if (test_condition == IN_CLASS) {
635 			return (CONDITION_TRUE);
636 		    } else {
637 			return (CONDITION_FALSE);
638 		    }
639 		} else {
640 		    classp += 3;
641 		    nbytes_to_check -= 3;
642 		}
643 	    } else {
644 		classp++;
645 		nbytes_to_check--;
646 	    }
647 	}
648 	if (test_condition == NOT_IN_CLASS) {
649 	    return (CONDITION_TRUE);
650 	} else {
651 	    return (CONDITION_FALSE);
652 	}
653 } /* test_char_against_old_ascii_class() */
654 
655 static const char *
656 test_repeated_ascii_char(const char *repeat_startp,
657 	const char *stringp,
658 	const char *regexp)
659 {
660 	const char *end_of_matchp;
661 
662 	end_of_matchp = test_string(stringp, regexp);
663 	while ((end_of_matchp == (char *)0) &&
664 	    (stringp > repeat_startp)) {
665 	    stringp--;
666 	    end_of_matchp = test_string(stringp, regexp);
667 	}
668 	return (end_of_matchp);
669 }
670 
671 static const char *
672 test_repeated_multibyte_char(const char *repeat_startp,
673 	const char *stringp,
674 	const char *regexp)
675 {
676 	const char *end_of_matchp;
677 
678 	end_of_matchp = test_string(stringp, regexp);
679 	while ((end_of_matchp == (char *)0) &&
680 	    (stringp > repeat_startp)) {
681 	    stringp = previous_charp(stringp);
682 	    end_of_matchp = test_string(stringp, regexp);
683 	}
684 	return (end_of_matchp);
685 }
686 
687 static const char *
688 test_repeated_group(const char *repeat_startp,
689 	const char *stringp,
690 	const char *regexp)
691 {
692 	const char *end_of_matchp;
693 
694 	end_of_matchp = test_string(stringp, regexp);
695 	while ((end_of_matchp == (char *)0) &&
696 	    (stringp > repeat_startp)) {
697 	    stringp = pop_stringp();
698 	    if (stringp == (char *)0) {
699 		return ((char *)0);
700 	    }
701 	    end_of_matchp = test_string(stringp, regexp);
702 	}
703 	return (end_of_matchp);
704 }
705 
706 static const char *
707 test_string(const char *stringp,
708 	const char *regexp)
709 {
710 	/*
711 	 * returns a pointer to the first character following the first
712 	 * substring of the string addressed by stringp that matches
713 	 * the compiled regular expression addressed by regexp
714 	 */
715 
716 	unsigned int		group_length;
717 	int			nextra_matches_allowed;
718 	int			nmust_match;
719 	wchar_t			regex_wchar;
720 	int			regex_char_size;
721 	const char		*repeat_startp;
722 	unsigned int		return_argn;
723 	wchar_t			string_wchar;
724 	int			string_char_size;
725 	unsigned int		substringn;
726 	char_test_condition_t	test_condition;
727 	const char		*test_stringp;
728 
729 	for (;;) {
730 
731 		/*
732 		 * Exit the loop via a return whenever there's a match
733 		 * or it's clear that there can be no match.
734 		 */
735 
736 	    switch ((int)*regexp) {
737 
738 		/*
739 		 * No fall-through.
740 		 * Each case ends with either a return or with stringp
741 		 * addressing the next character to be tested and regexp
742 		 * addressing the next compiled regular expression
743 		 *
744 		 * NOTE: The comments for each case give the meaning
745 		 *	of the compiled regular expression decoded by the case
746 		 *	and the character string that the compiled regular
747 		 *	expression uses to encode the case.  Each single
748 		 *	character encoded in the compiled regular expression
749 		 *	is shown enclosed in angle brackets (<>).  Each
750 		 *	compiled regular expression begins with a marker
751 		 *	character which is shown as a named constant
752 		 *	(e.g. <ASCII_CHAR>). Character constants are shown
753 		 *	enclosed in single quotes (e.g. <'$'>).  All other
754 		 *	single characters encoded in the compiled regular
755 		 *	expression are shown as lower case variable names
756 		 *	(e.g. <ascii_char> or <multibyte_char>). Multicharacter
757 		 *	strings encoded in the compiled regular expression
758 		 *	are shown as variable names followed by elipses
759 		 *	(e.g. <compiled_regex...>).
760 		 */
761 
762 	    case ASCII_CHAR: /* single ASCII char */
763 
764 		/* encoded as <ASCII_CHAR><ascii_char> */
765 
766 		regexp++;
767 		if (*regexp == *stringp) {
768 		    regexp++;
769 		    stringp++;
770 		} else {
771 		    return ((char *)0);
772 		}
773 		break;		/* end case ASCII_CHAR */
774 
775 	    case MULTIBYTE_CHAR: /* single multibyte char */
776 
777 		/* encoded as <MULTIBYTE_CHAR><multibyte_char> */
778 
779 		regexp++;
780 		regex_char_size = get_wchar(&regex_wchar, regexp);
781 		string_char_size = get_wchar(&string_wchar, stringp);
782 		if ((string_char_size <= 0) || (string_wchar != regex_wchar)) {
783 		    return ((char *)0);
784 		} else {
785 		    regexp += regex_char_size;
786 		    stringp += string_char_size;
787 		}
788 		break;		/* end case MULTIBYTE_CHAR */
789 
790 	    case ANY_CHAR: /* any single ASCII or multibyte char */
791 
792 		/* encoded as <ANY_CHAR> */
793 
794 		if (!multibyte) {
795 		    if (*stringp == '\0') {
796 			return ((char *)0);
797 		    } else {
798 			regexp++;
799 			stringp++;
800 		    }
801 		} else {
802 		    string_char_size = get_wchar(&string_wchar, stringp);
803 		    if (string_char_size <= 0) {
804 			return ((char *)0);
805 		    } else {
806 			regexp++;
807 			stringp += string_char_size;
808 		    }
809 		}
810 		break;	/* end case ANY_CHAR */
811 
812 	    case IN_ASCII_CHAR_CLASS:		/* [.....] */
813 	    case NOT_IN_ASCII_CHAR_CLASS:
814 
815 		/*
816 		 * encoded as <IN_ASCII_CHAR_CLASS><class_length><class...>
817 		 *	or <NOT_IN_ASCII_CHAR_CLASS><class_length><class...>
818 		 *
819 		 * NOTE: <class_length> includes the <class_length> byte
820 		 */
821 
822 		if ((int)*regexp == (int)IN_ASCII_CHAR_CLASS) {
823 		    test_condition = IN_CLASS;
824 		} else {
825 		    test_condition = NOT_IN_CLASS;
826 		}
827 		regexp++; /* point to the <class_length> byte */
828 
829 		if ((*stringp != '\0') &&
830 		    (test_char_against_ascii_class(*stringp, regexp,
831 		    test_condition) == CONDITION_TRUE)) {
832 		    regexp += (int)*regexp; /* add the class length to regexp */
833 		    stringp++;
834 		} else {
835 		    return ((char *)0);
836 		}
837 		break; /* end case IN_ASCII_CHAR_CLASS */
838 
839 	    case IN_MULTIBYTE_CHAR_CLASS:	/* [....] */
840 	    case NOT_IN_MULTIBYTE_CHAR_CLASS:
841 
842 		/*
843 		 * encoded as <IN_MULTIBYTE_CHAR_CLASS><class_length><class...>
844 		 * 	or <NOT_IN_MULTIBYTE_CHAR_CLASS><class_length><class...>
845 		 *
846 		 * NOTE: <class_length> includes the <class_length> byte
847 		 */
848 
849 		if ((int)*regexp == (int)IN_MULTIBYTE_CHAR_CLASS) {
850 		    test_condition = IN_CLASS;
851 		} else {
852 		    test_condition = NOT_IN_CLASS;
853 		}
854 		regexp++; /* point to the <class_length> byte */
855 
856 		string_char_size = get_wchar(&string_wchar, stringp);
857 		if ((string_char_size > 0) &&
858 		    (test_char_against_multibyte_class(string_wchar, regexp,
859 		    test_condition) == CONDITION_TRUE)) {
860 		    regexp += (int)*regexp; /* add the class length to regexp */
861 		    stringp += string_char_size;
862 		} else {
863 		    return ((char *)0);
864 		}
865 		break; /* end case IN_MULTIBYTE_CHAR_CLASS */
866 
867 	    case IN_OLD_ASCII_CHAR_CLASS:	/* [...] */
868 	    case NOT_IN_OLD_ASCII_CHAR_CLASS:
869 
870 		/*
871 		 * encoded as <IN_OLD_ASCII_CHAR_CLASS><class_length><class...>
872 		 *	or <NOT_IN_OLD_ASCII_CHAR_CLASS><class_length><class...>
873 		 *
874 		 * NOTE: <class_length> includes the <class_length> byte
875 		 */
876 
877 		if ((int)*regexp == (int)IN_OLD_ASCII_CHAR_CLASS) {
878 		    test_condition = IN_CLASS;
879 		} else {
880 		    test_condition = NOT_IN_CLASS;
881 		}
882 		regexp++; /* point to the <class_length> byte */
883 
884 		if ((*stringp != '\0') &&
885 		    (test_char_against_old_ascii_class(*stringp, regexp,
886 		    test_condition) == CONDITION_TRUE)) {
887 		    regexp += (int)*regexp; /* add the class length to regexp */
888 		    stringp++;
889 		} else {
890 		    return ((char *)0);
891 		}
892 		break; /* end case [NOT_]IN_OLD_ASCII_CHAR_CLASS */
893 
894 	    case SIMPLE_GROUP: /* (.....) */
895 
896 		/* encoded as <SIMPLE_GROUP><group_length> */
897 
898 		regexp += 2;
899 		break;		/* end case SIMPLE_GROUP */
900 
901 	    case END_GROUP:	/* (.....) */
902 
903 		/* encoded as <END_GROUP><groupn> */
904 
905 		regexp += 2;
906 		break;		/* end case END_GROUP */
907 
908 	    case SAVED_GROUP:	/* (.....)$0-9 */
909 
910 		/* encoded as <SAVED_GROUP><substringn> */
911 
912 		regexp++;
913 		substringn = (unsigned int)*regexp;
914 		if (substringn >= NSUBSTRINGS)
915 		    return ((char *)0);
916 		substring_startp[substringn] = stringp;
917 		regexp++;
918 		break;		/* end case SAVED_GROUP */
919 
920 	    case END_SAVED_GROUP:	/* (.....)$0-9 */
921 
922 		/*
923 		 * encoded as <END_SAVED_GROUP><substringn>\
924 		 *	<return_arg_number[substringn]>
925 		 */
926 
927 		regexp++;
928 		substringn = (unsigned int)*regexp;
929 		if (substringn >= NSUBSTRINGS)
930 		    return ((char *)0);
931 		substring_endp[substringn] = stringp;
932 		regexp++;
933 		return_argn = (unsigned int)*regexp;
934 		if (return_argn >= NSUBSTRINGS)
935 		    return ((char *)0);
936 		return_arg_number[substringn] = return_argn;
937 		regexp++;
938 		break;		/* end case END_SAVED_GROUP */
939 
940 	    case ASCII_CHAR|ZERO_OR_MORE:  /* char* */
941 
942 		/* encoded as <ASCII_CHAR|ZERO_OR_MORE><ascii_char> */
943 
944 		regexp++;
945 		repeat_startp = stringp;
946 		while (*stringp == *regexp) {
947 		    stringp++;
948 		}
949 		regexp++;
950 		return (test_repeated_ascii_char(repeat_startp,
951 		    stringp, regexp));
952 
953 		/* end case ASCII_CHAR|ZERO_OR_MORE */
954 
955 	    case ASCII_CHAR|ONE_OR_MORE:   /* char+ */
956 
957 		/* encoded as <ASCII_CHAR|ONE_OR_MORE><ascii_char> */
958 
959 		regexp++;
960 		if (*stringp != *regexp) {
961 		    return ((char *)0);
962 		} else {
963 		    stringp++;
964 		    repeat_startp = stringp;
965 		    while (*stringp == *regexp) {
966 			stringp++;
967 		    }
968 		    regexp++;
969 		    return (test_repeated_ascii_char(repeat_startp, stringp,
970 			regexp));
971 		}
972 		/* end case ASCII_CHAR|ONE_OR_MORE */
973 
974 	    case ASCII_CHAR|COUNT:	/* char{min_count,max_count} */
975 
976 		/*
977 		 * encoded as <ASCII_CHAR|COUNT><ascii_char>\
978 		 *	<minimum_match_count><maximum_match_count>
979 		 */
980 
981 		regexp++;
982 		get_match_counts(&nmust_match, &nextra_matches_allowed,
983 		    regexp + 1);
984 		while ((*stringp == *regexp) && (nmust_match > 0)) {
985 		    nmust_match--;
986 		    stringp++;
987 		}
988 		if (nmust_match > 0) {
989 		    return ((char *)0);
990 		} else if (nextra_matches_allowed == UNLIMITED) {
991 		    repeat_startp = stringp;
992 		    while (*stringp == *regexp) {
993 			stringp++;
994 		    }
995 		    regexp += 3;
996 		    return (test_repeated_ascii_char(repeat_startp, stringp,
997 			regexp));
998 		} else {
999 		    repeat_startp = stringp;
1000 		    while ((*stringp == *regexp) &&
1001 			(nextra_matches_allowed > 0)) {
1002 			nextra_matches_allowed--;
1003 			stringp++;
1004 		    }
1005 		    regexp += 3;
1006 		    return (test_repeated_ascii_char(repeat_startp, stringp,
1007 			regexp));
1008 		}
1009 		/* end case ASCII_CHAR|COUNT */
1010 
1011 	    case MULTIBYTE_CHAR|ZERO_OR_MORE:   /* char* */
1012 
1013 		/* encoded as <MULTIBYTE_CHAR|ZERO_OR_MORE><multibyte_char> */
1014 
1015 		regexp++;
1016 		regex_char_size = get_wchar(&regex_wchar, regexp);
1017 		repeat_startp = stringp;
1018 		string_char_size = get_wchar(&string_wchar, stringp);
1019 		while ((string_char_size > 0) &&
1020 		    (string_wchar == regex_wchar)) {
1021 		    stringp += string_char_size;
1022 		    string_char_size = get_wchar(&string_wchar, stringp);
1023 		}
1024 		regexp += regex_char_size;
1025 		return (test_repeated_multibyte_char(repeat_startp, stringp,
1026 		    regexp));
1027 
1028 		/* end case MULTIBYTE_CHAR|ZERO_OR_MORE */
1029 
1030 	    case MULTIBYTE_CHAR|ONE_OR_MORE:    /* char+ */
1031 
1032 		/* encoded as <MULTIBYTE_CHAR|ONE_OR_MORE><multibyte_char> */
1033 
1034 		regexp++;
1035 		regex_char_size = get_wchar(&regex_wchar, regexp);
1036 		string_char_size = get_wchar(&string_wchar, stringp);
1037 		if ((string_char_size <= 0) || (string_wchar != regex_wchar)) {
1038 		    return ((char *)0);
1039 		} else {
1040 		    stringp += string_char_size;
1041 		    repeat_startp = stringp;
1042 		    string_char_size = get_wchar(&string_wchar, stringp);
1043 		    while ((string_char_size > 0) &&
1044 			(string_wchar == regex_wchar)) {
1045 			stringp += string_char_size;
1046 			string_char_size = get_wchar(&string_wchar, stringp);
1047 		    }
1048 		    regexp += regex_char_size;
1049 		    return (test_repeated_multibyte_char(repeat_startp, stringp,
1050 			regexp));
1051 		}
1052 		/* end case MULTIBYTE_CHAR|ONE_OR_MORE */
1053 
1054 	    case MULTIBYTE_CHAR|COUNT:		/* char{min_count,max_count} */
1055 
1056 		/*
1057 		 * encoded as <MULTIBYTE_CHAR|COUNT><multibyte_char>\
1058 		 * 	<minimum_match_count><maximum_match_count>
1059 		 */
1060 
1061 		regexp++;
1062 		regex_char_size = get_wchar(&regex_wchar, regexp);
1063 		get_match_counts(&nmust_match, &nextra_matches_allowed,
1064 		    regexp + regex_char_size);
1065 		string_char_size = get_wchar(&string_wchar, stringp);
1066 		while ((string_char_size > 0) &&
1067 		    (string_wchar == regex_wchar) &&
1068 		    (nmust_match > 0)) {
1069 
1070 		    nmust_match--;
1071 		    stringp += string_char_size;
1072 		    string_char_size = get_wchar(&string_wchar, stringp);
1073 		}
1074 		if (nmust_match > 0) {
1075 		    return ((char *)0);
1076 		} else if (nextra_matches_allowed == UNLIMITED) {
1077 		    repeat_startp = stringp;
1078 		    while ((string_char_size > 0) &&
1079 			(string_wchar == regex_wchar)) {
1080 			stringp += string_char_size;
1081 			string_char_size = get_wchar(&string_wchar, stringp);
1082 		    }
1083 		    regexp += regex_char_size + 2;
1084 		    return (test_repeated_multibyte_char(repeat_startp, stringp,
1085 			regexp));
1086 		} else {
1087 		    repeat_startp = stringp;
1088 		    while ((string_char_size > 0) &&
1089 			(string_wchar == regex_wchar) &&
1090 			(nextra_matches_allowed > 0)) {
1091 			nextra_matches_allowed--;
1092 			stringp += string_char_size;
1093 			string_char_size = get_wchar(&string_wchar, stringp);
1094 		    }
1095 		    regexp += regex_char_size + 2;
1096 		    return (test_repeated_multibyte_char(repeat_startp, stringp,
1097 			regexp));
1098 		}
1099 		/* end case MULTIBYTE_CHAR|COUNT */
1100 
1101 	    case ANY_CHAR|ZERO_OR_MORE:		/* .* */
1102 
1103 		/* encoded as <ANY_CHAR|ZERO_OR_MORE> */
1104 
1105 		repeat_startp = stringp;
1106 		if (!multibyte) {
1107 		    while (*stringp != '\0') {
1108 			stringp++;
1109 		    }
1110 		    regexp++;
1111 		    return (test_repeated_ascii_char(repeat_startp, stringp,
1112 			regexp));
1113 		} else {
1114 		    string_char_size = get_wchar(&string_wchar, stringp);
1115 		    while (string_char_size > 0) {
1116 			stringp += string_char_size;
1117 			string_char_size = get_wchar(&string_wchar, stringp);
1118 		    }
1119 		    regexp++;
1120 		    return (test_repeated_multibyte_char(repeat_startp, stringp,
1121 			regexp));
1122 		}
1123 		/* end case <ANY_CHAR|ZERO_OR_MORE> */
1124 
1125 	    case ANY_CHAR|ONE_OR_MORE:		/* .+ */
1126 
1127 		/* encoded as <ANY_CHAR|ONE_OR_MORE> */
1128 
1129 		if (!multibyte) {
1130 		    if (*stringp == '\0') {
1131 			return ((char *)0);
1132 		    } else {
1133 			stringp++;
1134 			repeat_startp = stringp;
1135 			while (*stringp != '\0') {
1136 			    stringp++;
1137 			}
1138 			regexp++;
1139 			return (test_repeated_ascii_char(repeat_startp, stringp,
1140 			    regexp));
1141 		    }
1142 		} else {
1143 		    string_char_size = get_wchar(&string_wchar, stringp);
1144 		    if (string_char_size <= 0) {
1145 			return ((char *)0);
1146 		    } else {
1147 			stringp += string_char_size;
1148 			repeat_startp = stringp;
1149 			string_char_size = get_wchar(&string_wchar, stringp);
1150 			while (string_char_size > 0) {
1151 			    stringp += string_char_size;
1152 			    string_char_size =
1153 				get_wchar(&string_wchar, stringp);
1154 			}
1155 			regexp++;
1156 			return (test_repeated_multibyte_char(repeat_startp,
1157 			    stringp, regexp));
1158 		    }
1159 		}
1160 		/* end case <ANY_CHAR|ONE_OR_MORE> */
1161 
1162 	    case ANY_CHAR|COUNT:	/* .{min_count,max_count} */
1163 
1164 		/*
1165 		 * encoded as	<ANY_CHAR|COUNT>\
1166 		 *		<minimum_match_count><maximum_match_count>
1167 		 */
1168 
1169 		get_match_counts(&nmust_match, &nextra_matches_allowed,
1170 		    regexp + 1);
1171 		if (!multibyte) {
1172 		    while ((*stringp != '\0') && (nmust_match > 0)) {
1173 			nmust_match--;
1174 			stringp++;
1175 		    }
1176 		    if (nmust_match > 0) {
1177 			return ((char *)0);
1178 		    } else if (nextra_matches_allowed == UNLIMITED) {
1179 			repeat_startp = stringp;
1180 			while (*stringp != '\0') {
1181 			    stringp++;
1182 			}
1183 			regexp += 3;
1184 			return (test_repeated_ascii_char(repeat_startp, stringp,
1185 			    regexp));
1186 		    } else {
1187 			repeat_startp = stringp;
1188 			while ((*stringp != '\0') &&
1189 			    (nextra_matches_allowed > 0)) {
1190 			    nextra_matches_allowed--;
1191 			    stringp++;
1192 			}
1193 			regexp += 3;
1194 			return (test_repeated_ascii_char(repeat_startp, stringp,
1195 			    regexp));
1196 		    }
1197 		} else { /* multibyte character */
1198 
1199 		    string_char_size = get_wchar(&string_wchar, stringp);
1200 		    while ((string_char_size > 0) && (nmust_match > 0)) {
1201 			nmust_match--;
1202 			stringp += string_char_size;
1203 			string_char_size = get_wchar(&string_wchar, stringp);
1204 		    }
1205 		    if (nmust_match > 0) {
1206 			return ((char *)0);
1207 		    } else if (nextra_matches_allowed == UNLIMITED) {
1208 			repeat_startp = stringp;
1209 			while (string_char_size > 0) {
1210 			    stringp += string_char_size;
1211 			    string_char_size =
1212 				get_wchar(&string_wchar, stringp);
1213 			}
1214 			regexp += 3;
1215 			return (test_repeated_multibyte_char(repeat_startp,
1216 			    stringp, regexp));
1217 		    } else {
1218 			repeat_startp = stringp;
1219 			while ((string_char_size > 0) &&
1220 			    (nextra_matches_allowed > 0)) {
1221 			    nextra_matches_allowed--;
1222 			    stringp += string_char_size;
1223 			    string_char_size =
1224 				get_wchar(&string_wchar, stringp);
1225 			}
1226 			regexp += 3;
1227 			return (test_repeated_multibyte_char(repeat_startp,
1228 			    stringp, regexp));
1229 		    }
1230 		} /* end case ANY_CHAR|COUNT */
1231 
1232 	    case IN_ASCII_CHAR_CLASS|ZERO_OR_MORE:	/* [.....]* */
1233 	    case NOT_IN_ASCII_CHAR_CLASS|ZERO_OR_MORE:
1234 
1235 		/*
1236 		 * encoded as	<IN_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
1237 		 *		<class_length><class ...>
1238 		 *	or	<NOT_IN_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
1239 		 *		<class_length><class ...>
1240 		 *
1241 		 * NOTE: <class_length> includes the <class_length> byte
1242 		 */
1243 
1244 		if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|ZERO_OR_MORE)) {
1245 		    test_condition = IN_CLASS;
1246 		} else {
1247 		    test_condition = NOT_IN_CLASS;
1248 		}
1249 		regexp++; /* point to the <class_length> byte */
1250 
1251 		repeat_startp = stringp;
1252 		while ((*stringp != '\0') &&
1253 		    (test_char_against_ascii_class(*stringp, regexp,
1254 		    test_condition) == CONDITION_TRUE)) {
1255 		    stringp++;
1256 		}
1257 		regexp += (int)*regexp; /* add the class length to regexp */
1258 		return (test_repeated_ascii_char(repeat_startp, stringp,
1259 		    regexp));
1260 
1261 		/* end case IN_ASCII_CHAR_CLASS|ZERO_OR_MORE */
1262 
1263 	    case IN_ASCII_CHAR_CLASS|ONE_OR_MORE:	/* [.....]+ */
1264 	    case NOT_IN_ASCII_CHAR_CLASS|ONE_OR_MORE:
1265 
1266 		/*
1267 		 * encoded as	<IN_ASCII_CHAR_CLASS|ONE_OR_MORE>\
1268 		 *		<class_length><class ...>
1269 		 *	or	<NOT_IN_ASCII_CHAR_CLASS|ONE_OR_MORE>\
1270 		 * 		<class_length><class ...>
1271 		 *
1272 		 * NOTE: <class_length> includes the <class_length> byte
1273 		 */
1274 
1275 		if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|ONE_OR_MORE)) {
1276 		    test_condition = IN_CLASS;
1277 		} else {
1278 		    test_condition = NOT_IN_CLASS;
1279 		}
1280 		regexp++; /* point to the <class_length> byte */
1281 
1282 		if ((*stringp == '\0') ||
1283 		    (test_char_against_ascii_class(*stringp, regexp,
1284 		    test_condition) != CONDITION_TRUE)) {
1285 		    return ((char *)0);
1286 		} else {
1287 		    stringp++;
1288 		    repeat_startp = stringp;
1289 		    while ((*stringp != '\0') &&
1290 			(test_char_against_ascii_class(*stringp, regexp,
1291 			test_condition) == CONDITION_TRUE)) {
1292 			stringp++;
1293 		    }
1294 		    regexp += (int)*regexp; /* add the class length to regexp */
1295 		    return (test_repeated_ascii_char(repeat_startp, stringp,
1296 			regexp));
1297 		}
1298 		/* end case IN_ASCII_CHAR_CLASS|ONE_OR_MORE */
1299 
1300 	    case IN_ASCII_CHAR_CLASS | COUNT: /* [.....]{max_count,min_count} */
1301 	    case NOT_IN_ASCII_CHAR_CLASS | COUNT:
1302 
1303 		/*
1304 		 * endoded as	<IN_ASCII_CHAR_CLASS|COUNT><class_length>\
1305 		 * 		<class ...><minimum_match_count>\
1306 		 *		<maximum_match_count>
1307 		 *	or	<NOT_IN_ASCII_CHAR_CLASS|COUNT><class_length>\
1308 		 *		<class ...><minimum_match_count>\
1309 		 *		<maximum_match_count>
1310 		 *
1311 		 * NOTE: <class_length> includes the <class_length> byte,
1312 		 *	but not the <minimum_match_count> or
1313 		 *	<maximum_match_count> bytes
1314 		 */
1315 
1316 		if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|COUNT)) {
1317 		    test_condition = IN_CLASS;
1318 		} else {
1319 		    test_condition = NOT_IN_CLASS;
1320 		}
1321 		regexp++; /* point to the <class_length> byte */
1322 
1323 		get_match_counts(&nmust_match, &nextra_matches_allowed,
1324 		    regexp + (int)*regexp);
1325 		while ((*stringp != '\0') &&
1326 		    (test_char_against_ascii_class(*stringp, regexp,
1327 		    test_condition) == CONDITION_TRUE) &&
1328 		    (nmust_match > 0)) {
1329 		    nmust_match--;
1330 		    stringp++;
1331 		}
1332 		if (nmust_match > 0) {
1333 		    return ((char *)0);
1334 		} else if (nextra_matches_allowed == UNLIMITED) {
1335 		    repeat_startp = stringp;
1336 		    while ((*stringp != '\0') &&
1337 			(test_char_against_ascii_class(*stringp, regexp,
1338 			test_condition) == CONDITION_TRUE)) {
1339 			stringp++;
1340 		    }
1341 		    regexp += (int)*regexp + 2;
1342 		    return (test_repeated_ascii_char(repeat_startp, stringp,
1343 			regexp));
1344 		} else {
1345 		    repeat_startp = stringp;
1346 		    while ((*stringp != '\0') &&
1347 			(test_char_against_ascii_class(*stringp, regexp,
1348 			test_condition) == CONDITION_TRUE) &&
1349 			(nextra_matches_allowed > 0)) {
1350 			nextra_matches_allowed--;
1351 			stringp++;
1352 		    }
1353 		    regexp += (int)*regexp + 2;
1354 		    return (test_repeated_ascii_char(repeat_startp, stringp,
1355 			regexp));
1356 		}
1357 		/* end case IN_ASCII_CHAR_CLASS|COUNT */
1358 
1359 	    case IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE:	/* [.....]* */
1360 	    case NOT_IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE:
1361 
1362 		/*
1363 		 * encoded as	<IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE>\
1364 		 *		<class_length><class ...>
1365 		 *	or	<NOT_IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE>\
1366 		 *		<class_length><class ...>
1367 		 *
1368 		 * NOTE: <class_length> includes the <class_length> byte
1369 		 */
1370 
1371 		if ((int)*regexp ==
1372 		    (int)(IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE)) {
1373 		    test_condition = IN_CLASS;
1374 		} else {
1375 		    test_condition = NOT_IN_CLASS;
1376 		}
1377 		regexp++; /* point to the <class_length> byte */
1378 
1379 		repeat_startp = stringp;
1380 		string_char_size = get_wchar(&string_wchar, stringp);
1381 		while ((string_char_size > 0) &&
1382 		    (test_char_against_multibyte_class(string_wchar, regexp,
1383 		    test_condition) == CONDITION_TRUE)) {
1384 		    stringp += string_char_size;
1385 		    string_char_size = get_wchar(&string_wchar, stringp);
1386 		}
1387 		regexp += (int)*regexp; /* add the class length to regexp */
1388 		return (test_repeated_multibyte_char(repeat_startp, stringp,
1389 		    regexp));
1390 
1391 		/* end case IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE */
1392 
1393 	    case IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE:	/* [.....]+ */
1394 	    case NOT_IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE:
1395 
1396 		/*
1397 		 * encoded as	<IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE>\
1398 		 *		<class_length><class ...>
1399 		 *	or	<NOT_IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE>\
1400 		 *		<class_length><class ...>
1401 		 *
1402 		 * NOTE: <class_length> includes the <class_length> byte
1403 		 */
1404 
1405 		if ((int)*regexp ==
1406 		    (int)(IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE)) {
1407 		    test_condition = IN_CLASS;
1408 		} else {
1409 		    test_condition = NOT_IN_CLASS;
1410 		}
1411 		regexp++; /* point to the <class_length> byte */
1412 
1413 		string_char_size = get_wchar(&string_wchar, stringp);
1414 		if ((string_char_size <= 0) ||
1415 		    (test_char_against_multibyte_class(string_wchar, regexp,
1416 		    test_condition) != CONDITION_TRUE)) {
1417 		    return ((char *)0);
1418 		} else {
1419 		    stringp += string_char_size;
1420 		    repeat_startp = stringp;
1421 		    string_char_size = get_wchar(&string_wchar, stringp);
1422 		    while ((string_char_size > 0) &&
1423 			(test_char_against_multibyte_class(string_wchar,
1424 			regexp, test_condition) == CONDITION_TRUE)) {
1425 			stringp += string_char_size;
1426 			string_char_size = get_wchar(&string_wchar, stringp);
1427 		    }
1428 		    regexp += (int)*regexp; /* add the class length to regexp */
1429 		    return (test_repeated_multibyte_char(repeat_startp, stringp,
1430 			regexp));
1431 		}
1432 		/* end case IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE */
1433 
1434 	    case IN_MULTIBYTE_CHAR_CLASS|COUNT: /* [...]{min_count,max_count} */
1435 	    case NOT_IN_MULTIBYTE_CHAR_CLASS|COUNT:
1436 
1437 		/*
1438 		 * encoded as	<IN_MULTIBYTE_CHAR_CLASS|COUNT>\
1439 		 *		<class_length><class ...><min_count><max_count>
1440 		 *	or	<NOT_IN_MULTIBYTE_CHAR_CLASS|COUNT>\
1441 		 *		<class_length><class ...><min_count><max_count>
1442 		 *
1443 		 * NOTE: <class_length> includes the <class_length> byte
1444 		 *	but not the <minimum_match_count> or
1445 		 *	<maximum_match_count> bytes
1446 		 */
1447 
1448 		if ((int)*regexp == (int)(IN_MULTIBYTE_CHAR_CLASS|COUNT)) {
1449 		    test_condition = IN_CLASS;
1450 		} else {
1451 		    test_condition = NOT_IN_CLASS;
1452 		}
1453 		regexp++; /* point to the <class_length> byte */
1454 
1455 		get_match_counts(&nmust_match, &nextra_matches_allowed,
1456 		    regexp + (int)*regexp);
1457 		string_char_size = get_wchar(&string_wchar, stringp);
1458 		while ((string_char_size > 0) &&
1459 		    (test_char_against_multibyte_class(string_wchar, regexp,
1460 		    test_condition) == CONDITION_TRUE) &&
1461 		    (nmust_match > 0)) {
1462 		    nmust_match--;
1463 		    stringp += string_char_size;
1464 		    string_char_size = get_wchar(&string_wchar, stringp);
1465 		}
1466 		if (nmust_match > 0) {
1467 		    return ((char *)0);
1468 		} else if (nextra_matches_allowed == UNLIMITED) {
1469 		    repeat_startp = stringp;
1470 		    while ((string_char_size > 0) &&
1471 			(test_char_against_multibyte_class(string_wchar,
1472 			regexp, test_condition) == CONDITION_TRUE)) {
1473 			stringp += string_char_size;
1474 			string_char_size = get_wchar(&string_wchar, stringp);
1475 		    }
1476 		    regexp += (int)*regexp + 2;
1477 		    return (test_repeated_multibyte_char(repeat_startp, stringp,
1478 			regexp));
1479 		} else {
1480 		    repeat_startp = stringp;
1481 		    while ((string_char_size > 0) &&
1482 			(test_char_against_multibyte_class(string_wchar,
1483 			regexp, test_condition) == CONDITION_TRUE) &&
1484 			(nextra_matches_allowed > 0)) {
1485 			nextra_matches_allowed--;
1486 			stringp += string_char_size;
1487 			string_char_size = get_wchar(&string_wchar, stringp);
1488 		    }
1489 		    regexp += (int)*regexp + 2;
1490 		    return (test_repeated_multibyte_char(repeat_startp, stringp,
1491 			regexp));
1492 		}
1493 		/* end case IN_MULTIBYTE_CHAR_CLASS|COUNT */
1494 
1495 	    case IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE:	/* [.....]* */
1496 	    case NOT_IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE:
1497 
1498 		/*
1499 		 * encoded as	<IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
1500 		 *		<class_length><class ...>
1501 		 *	or	<NOT_IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
1502 		 *		<class_length><class ...>
1503 		 *
1504 		 * NOTE: <class_length> includes the <class_length> byte
1505 		 */
1506 
1507 		if ((int)*regexp ==
1508 		    (int)(IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE)) {
1509 		    test_condition = IN_CLASS;
1510 		} else {
1511 		    test_condition = NOT_IN_CLASS;
1512 		}
1513 		regexp++; /* point to the <class_length> byte */
1514 
1515 		repeat_startp = stringp;
1516 		while ((*stringp != '\0') &&
1517 		    (test_char_against_old_ascii_class(*stringp, regexp,
1518 		    test_condition) == CONDITION_TRUE)) {
1519 		    stringp++;
1520 		}
1521 		regexp += (int)*regexp; /* add the class length to regexp */
1522 		return (test_repeated_ascii_char(repeat_startp, stringp,
1523 		    regexp));
1524 
1525 		/* end case IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE */
1526 
1527 	    case IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE:	/* [.....]+ */
1528 	    case NOT_IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE:
1529 
1530 		/*
1531 		 * encoded as	<IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE>\
1532 		 *		<class_length><class ...>
1533 		 *	or	<NOT_IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE>\
1534 		 *		<class_length><class ...>
1535 		 *
1536 		 * NOTE: <class length> includes the <class_length> byte
1537 		 */
1538 
1539 		if ((int)*regexp ==
1540 		    (int)(IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE)) {
1541 		    test_condition = IN_CLASS;
1542 		} else {
1543 		    test_condition = NOT_IN_CLASS;
1544 		}
1545 		regexp++; /* point to the <class_length> byte */
1546 
1547 		if ((*stringp == '\0') ||
1548 		    (test_char_against_old_ascii_class(*stringp, regexp,
1549 		    test_condition) != CONDITION_TRUE)) {
1550 		    return ((char *)0);
1551 		} else {
1552 		    stringp++;
1553 		    repeat_startp = stringp;
1554 		    while ((*stringp != '\0') &&
1555 			(test_char_against_old_ascii_class(*stringp, regexp,
1556 			test_condition) == CONDITION_TRUE)) {
1557 			stringp++;
1558 		    }
1559 		    regexp += (int)*regexp; /* add the class length to regexp */
1560 		    return (test_repeated_ascii_char(repeat_startp, stringp,
1561 			regexp));
1562 		}
1563 		/* end case IN_OLD_ASCII_CHAR_CLASS | ONE_OR_MORE */
1564 
1565 	    case IN_OLD_ASCII_CHAR_CLASS|COUNT:	/* [...]{min_count,max_count} */
1566 	    case NOT_IN_OLD_ASCII_CHAR_CLASS|COUNT:
1567 
1568 		/*
1569 		 * encoded as	<IN_OLD_ASCII_CHAR_CLASS|COUNT><class_length>\
1570 		 *		<class ...><minimum_match_count>\
1571 		 *		<maximum_match_count>
1572 		 *	or	<NOT_IN_OLD_ASCII_CHAR_CLASS|COUNT>\
1573 		 *		<class_length><class ...><minimum_match_count>\
1574 		 *		<maximum_match_count>
1575 		 *
1576 		 * NOTE: <class_length> includes the <class_length> byte
1577 		 *	but not the <minimum_match_count> or
1578 		 *	<maximum_match_count> bytes
1579 		 */
1580 
1581 		if ((int)*regexp == (int)(IN_OLD_ASCII_CHAR_CLASS|COUNT)) {
1582 		    test_condition = IN_CLASS;
1583 		} else {
1584 		    test_condition = NOT_IN_CLASS;
1585 		}
1586 		regexp++; /* point to the <class_length> byte */
1587 
1588 		get_match_counts(&nmust_match, &nextra_matches_allowed,
1589 		    regexp + (int)*regexp);
1590 		while ((*stringp != '\0') &&
1591 		    (test_char_against_old_ascii_class(*stringp, regexp,
1592 		    test_condition) == CONDITION_TRUE) &&
1593 		    (nmust_match > 0)) {
1594 		    nmust_match--;
1595 		    stringp++;
1596 		}
1597 		if (nmust_match > 0) {
1598 		    return ((char *)0);
1599 		} else if (nextra_matches_allowed == UNLIMITED) {
1600 		    repeat_startp = stringp;
1601 		    while ((*stringp != '\0') &&
1602 			(test_char_against_old_ascii_class(*stringp, regexp,
1603 			test_condition) == CONDITION_TRUE)) {
1604 			stringp++;
1605 		    }
1606 		    regexp += (int)*regexp + 2;
1607 		    return (test_repeated_ascii_char(repeat_startp, stringp,
1608 			regexp));
1609 		} else {
1610 		    repeat_startp = stringp;
1611 		    while ((*stringp != '\0') &&
1612 			(test_char_against_old_ascii_class(*stringp, regexp,
1613 			test_condition) == CONDITION_TRUE) &&
1614 			(nextra_matches_allowed > 0)) {
1615 			nextra_matches_allowed--;
1616 			stringp++;
1617 		    }
1618 		    regexp += (int)*regexp + 2;
1619 		    return (test_repeated_ascii_char(repeat_startp, stringp,
1620 			regexp));
1621 		}
1622 		/* end case IN_OLD_ASCII_CHAR_CLASS|COUNT */
1623 
1624 	    case ZERO_OR_MORE_GROUP:		/* (.....)* */
1625 	    case ZERO_OR_MORE_GROUP|ADD_256_TO_GROUP_LENGTH:
1626 	    case ZERO_OR_MORE_GROUP|ADD_512_TO_GROUP_LENGTH:
1627 	    case ZERO_OR_MORE_GROUP|ADD_768_TO_GROUP_LENGTH:
1628 
1629 		/*
1630 		 * encoded as	<ZERO_OR_MORE_GROUP|ADDED_LENGTH_BITS>\
1631 		 *		<group_length><compiled_regex...>\
1632 		 *		<END_GROUP|ZERO_OR_MORE><groupn>
1633 		 *
1634 		 * NOTE:
1635 		 *
1636 		 * group_length + (256 * ADDED_LENGTH_BITS) ==
1637 		 *	length_of(<compiled_regex...><END_GROUP|ZERO_OR_MORE>\
1638 		 *		<groupn>)
1639 		 *
1640 		 */
1641 
1642 		group_length =
1643 		    (((unsigned int)*regexp & ADDED_LENGTH_BITS) <<
1644 		    TIMES_256_SHIFT);
1645 		regexp++;
1646 		group_length += (unsigned int)*regexp;
1647 		regexp++;
1648 		repeat_startp = stringp;
1649 		test_stringp = test_string(stringp, regexp);
1650 		while (test_stringp != (char *)0) {
1651 		    if (push_stringp(stringp) == (char *)0)
1652 			return ((char *)0);
1653 		    stringp = test_stringp;
1654 		    test_stringp = test_string(stringp, regexp);
1655 		}
1656 		regexp += group_length;
1657 		return (test_repeated_group(repeat_startp, stringp, regexp));
1658 
1659 		/* end case ZERO_OR_MORE_GROUP */
1660 
1661 	    case END_GROUP|ZERO_OR_MORE:	/* (.....)* */
1662 
1663 		/* encoded as <END_GROUP|ZERO_OR_MORE> */
1664 
1665 		/* return from recursive call to test_string() */
1666 
1667 		return ((char *)stringp);
1668 
1669 		/* end case END_GROUP|ZERO_OR_MORE */
1670 
1671 	    case ONE_OR_MORE_GROUP:		/* (.....)+ */
1672 	    case ONE_OR_MORE_GROUP|ADD_256_TO_GROUP_LENGTH:
1673 	    case ONE_OR_MORE_GROUP|ADD_512_TO_GROUP_LENGTH:
1674 	    case ONE_OR_MORE_GROUP|ADD_768_TO_GROUP_LENGTH:
1675 
1676 		/*
1677 		 * encoded as	<ONE_OR_MORE_GROUP|ADDED_LENGTH_BITS>\
1678 		 *		<group_length><compiled_regex...>\
1679 		 *		<END_GROUP|ONE_OR_MORE><groupn>
1680 		 *
1681 		 * NOTE:
1682 		 *
1683 		 * group_length + (256 * ADDED_LENGTH_BITS) ==
1684 		 * 	length_of(<compiled_regex...><END_GROUP|ONE_OR_MORE>\
1685 		 *		<groupn>)
1686 		 */
1687 
1688 		group_length =
1689 		    (((unsigned int)*regexp & ADDED_LENGTH_BITS) <<
1690 		    TIMES_256_SHIFT);
1691 		regexp++;
1692 		group_length += (unsigned int)*regexp;
1693 		regexp++;
1694 		stringp = test_string(stringp, regexp);
1695 		if (stringp == (char *)0)
1696 		    return ((char *)0);
1697 		repeat_startp = stringp;
1698 		test_stringp = test_string(stringp, regexp);
1699 		while (test_stringp != (char *)0) {
1700 		    if (push_stringp(stringp) == (char *)0)
1701 			return ((char *)0);
1702 		    stringp = test_stringp;
1703 		    test_stringp = test_string(stringp, regexp);
1704 		}
1705 		regexp += group_length;
1706 		return (test_repeated_group(repeat_startp, stringp, regexp));
1707 
1708 		/* end case ONE_OR_MORE_GROUP */
1709 
1710 	    case END_GROUP|ONE_OR_MORE:		/* (.....)+ */
1711 
1712 		/* encoded as <END_GROUP|ONE_OR_MORE><groupn> */
1713 
1714 		/* return from recursive call to test_string() */
1715 
1716 		return ((char *)stringp);
1717 
1718 		/* end case END_GROUP|ONE_OR_MORE */
1719 
1720 	    case COUNTED_GROUP:		/* (.....){max_count,min_count} */
1721 	    case COUNTED_GROUP|ADD_256_TO_GROUP_LENGTH:
1722 	    case COUNTED_GROUP|ADD_512_TO_GROUP_LENGTH:
1723 	    case COUNTED_GROUP|ADD_768_TO_GROUP_LENGTH:
1724 
1725 		/*
1726 		 * encoded as	<COUNTED_GROUP|ADDED_LENGTH_BITS><group_length>\
1727 		 *		<compiled_regex...>\<END_GROUP|COUNT><groupn>\
1728 		 *		<minimum_match_count><maximum_match_count>
1729 		 *
1730 		 * NOTE:
1731 		 *
1732 		 * group_length + (256 * ADDED_LENGTH_BITS) ==
1733 		 *	length_of(<compiled_regex...><END_GROUP|COUNT><groupn>)
1734 		 *
1735 		 * but does not include the <minimum_match_count> or
1736 		 *	<maximum_match_count> bytes
1737 		 */
1738 
1739 		group_length =
1740 		    (((unsigned int)*regexp & ADDED_LENGTH_BITS) <<
1741 		    TIMES_256_SHIFT);
1742 		regexp++;
1743 		group_length += (unsigned int)*regexp;
1744 		regexp++;
1745 		get_match_counts(&nmust_match, &nextra_matches_allowed,
1746 		    regexp + group_length);
1747 		test_stringp = test_string(stringp, regexp);
1748 		while ((test_stringp != (char *)0) && (nmust_match > 0)) {
1749 		    stringp = test_stringp;
1750 		    nmust_match--;
1751 		    test_stringp = test_string(stringp, regexp);
1752 		}
1753 		if (nmust_match > 0) {
1754 		    return ((char *)0);
1755 		} else if (nextra_matches_allowed == UNLIMITED) {
1756 		    repeat_startp = stringp;
1757 		    while (test_stringp != (char *)0) {
1758 			if (push_stringp(stringp) == (char *)0)
1759 			    return ((char *)0);
1760 			stringp = test_stringp;
1761 			test_stringp = test_string(stringp, regexp);
1762 		}
1763 		    regexp += group_length + 2;
1764 		    return (test_repeated_group(repeat_startp, stringp,
1765 			regexp));
1766 		} else {
1767 		    repeat_startp = stringp;
1768 		    while ((test_stringp != (char *)0) &&
1769 			(nextra_matches_allowed > 0)) {
1770 			nextra_matches_allowed--;
1771 			if (push_stringp(stringp) == (char *)0)
1772 			    return ((char *)0);
1773 			stringp = test_stringp;
1774 			test_stringp = test_string(stringp, regexp);
1775 		}
1776 		    regexp += group_length + 2;
1777 		    return (test_repeated_group(repeat_startp, stringp,
1778 			regexp));
1779 		}
1780 		/* end case COUNTED_GROUP */
1781 
1782 	    case END_GROUP|COUNT:	/* (.....){max_count,min_count} */
1783 
1784 		/* encoded as <END_GROUP|COUNT> */
1785 
1786 		/* return from recursive call to test_string() */
1787 
1788 		return (stringp);
1789 
1790 		/* end case END_GROUP|COUNT */
1791 
1792 	    case END_OF_STRING_MARK:
1793 
1794 		/* encoded as <END_OF_STRING_MARK><END_REGEX> */
1795 
1796 		if (*stringp == '\0') {
1797 		    regexp++;
1798 		} else {
1799 		    return ((char *)0);
1800 		}
1801 		break; /* end case END_OF_STRING_MARK */
1802 
1803 	    case END_REGEX: /* end of the compiled regular expression */
1804 
1805 		/* encoded as <END_REGEX> */
1806 
1807 		return (stringp);
1808 
1809 		/* end case END_REGEX */
1810 
1811 	    default:
1812 
1813 		return ((char *)0);
1814 
1815 	    } /* end switch (*regexp) */
1816 
1817 	} /* end for (;;) */
1818 
1819 } /* test_string() */
1820