xref: /titanic_50/usr/src/lib/libc/port/regex/regex.c (revision 8461248208fabd3a8230615f8615e5bf1b4dcdcb)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved	*/
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 /*
33  * IMPORTANT NOTE:
34  *
35  * regex() WORKS **ONLY** WITH THE ASCII AND THE Solaris EUC CHARACTER SETS.
36  * IT IS **NOT** CHARACTER SET INDEPENDENT.
37  *
38  */
39 
40 #pragma weak regex = _regex
41 
42 /* CONSTANTS SHARED WITH regcmp() */
43 #include "regex.h"
44 
45 #include "lint.h"
46 #include "mtlib.h"
47 #include <limits.h>
48 #include <stdarg.h>
49 #include <stdlib.h>
50 #include <thread.h>
51 #include <widec.h>
52 #include "tsd.h"
53 
54 
55 /* PRIVATE CONSTANTS */
56 
57 #define	ADD_256_TO_GROUP_LENGTH	0x1
58 #define	ADD_512_TO_GROUP_LENGTH	0x2
59 #define	ADD_768_TO_GROUP_LENGTH	0x3
60 #define	ADDED_LENGTH_BITS	0x3
61 #define	SINGLE_BYTE_MASK	0xff
62 #define	STRINGP_STACK_SIZE	50
63 
64 
65 /* PRIVATE TYPE DEFINITIONS */
66 
67 typedef enum {
68 	NOT_IN_CLASS = 0,
69 	IN_CLASS
70 } char_test_condition_t;
71 
72 typedef enum {
73 	TESTING_CHAR = 0,
74 	CONDITION_TRUE,
75 	CONDITION_FALSE,
76 	CHAR_TEST_ERROR
77 } char_test_result_t;
78 
79 
80 /* PRIVATE GLOBAL VARIABLES */
81 
82 static mutex_t		regex_lock = DEFAULTMUTEX;
83 static int		return_arg_number[NSUBSTRINGS];
84 static const char	*substring_endp[NSUBSTRINGS];
85 static const char	*substring_startp[NSUBSTRINGS];
86 static const char	*stringp_stack[STRINGP_STACK_SIZE];
87 static const char	**stringp_stackp;
88 
89 
90 /* DECLARATIONS OF PRIVATE FUNCTIONS */
91 
92 static int
93 get_wchar(wchar_t *wcharp,
94 	const char *stringp);
95 
96 static void
97 get_match_counts(int *nmust_matchp,
98 	int *nextra_matches_allowedp,
99 	const char *count_stringp);
100 
101 static boolean_t
102 in_wchar_range(wchar_t test_char,
103 	wchar_t lower_char,
104 	wchar_t upper_char);
105 
106 static const char *
107 pop_stringp(void);
108 
109 static const char *
110 previous_charp(const char *current_charp);
111 
112 static const char *
113 push_stringp(const char *stringp);
114 
115 static char_test_result_t
116 test_char_against_ascii_class(char test_char,
117 	const char *classp,
118 	char_test_condition_t test_condition);
119 
120 static char_test_result_t
121 test_char_against_multibyte_class(wchar_t test_char,
122 	const char *classp,
123 	char_test_condition_t test_condition);
124 
125 
126 /* FOR COMPATIBILITY WITH PREVIOUS ASCII VERSIONS OF regcmp() */
127 
128 static char_test_result_t
129 test_char_against_old_ascii_class(char test_char,
130 	const char *classp,
131 	char_test_condition_t test_condition);
132 
133 static const char *
134 test_repeated_ascii_char(const char *repeat_startp,
135 	const char *stringp,
136 	const char *regexp);
137 
138 static const char *
139 test_repeated_multibyte_char(const char *repeat_startp,
140 	const char *stringp,
141 	const char *regexp);
142 
143 static const char *
144 test_repeated_group(const char *repeat_startp,
145 	const char *stringp,
146 	const char *regexp);
147 
148 static const char *
149 test_string(const char *stringp,
150 	const char *regexp);
151 
152 
153 /* DEFINITIONS OF PUBLIC VARIABLES */
154 
155 char *__loc1;
156 
157 /*
158  * reserve thread-specific storage for __loc1
159  */
160 char **
161 ____loc1(void)
162 {
163 	if (_thr_main())
164 		return (&__loc1);
165 	return ((char **)tsdalloc(_T_REGEX_LOC1, sizeof (char *), NULL));
166 }
167 
168 #define	__loc1 (*(____loc1()))
169 
170 /* DEFINITION OF regex() */
171 
172 extern char *
173 _regex(const char *regexp,
174 	const char *stringp, ...)
175 {
176 	va_list		arg_listp;
177 	int		char_size;
178 	const char	*end_of_matchp;
179 	wchar_t		regex_wchar;
180 	char		*return_argp[NSUBSTRINGS];
181 	char		*returned_substringp;
182 	int		substringn;
183 	const char	*substringp;
184 	wchar_t		string_wchar;
185 
186 	if (____loc1() == (char **)0) {
187 	    return ((char *)0);
188 	} else {
189 	    lmutex_lock(&regex_lock);
190 	    __loc1 = (char *)0;
191 	}
192 
193 	if ((stringp == (char *)0) || (regexp == (char *)0)) {
194 	    lmutex_unlock(&regex_lock);
195 	return ((char *)0);
196 	}
197 
198 
199 	/* INITIALIZE SUBSTRINGS THAT MIGHT BE RETURNED IN VARARGS  */
200 
201 	substringn = 0;
202 	va_start(arg_listp, stringp);
203 	while (substringn < NSUBSTRINGS) {
204 	    return_argp[substringn] = va_arg(arg_listp, char *);
205 	    substring_startp[substringn] = (char *)0;
206 	    return_arg_number[substringn] = -1;
207 	    substringn++;
208 	}
209 	va_end(arg_listp);
210 
211 
212 	/* TEST THE STRING AGAINST THE REGULAR EXPRESSION */
213 
214 	end_of_matchp = (char *)0;
215 	stringp_stackp = &stringp_stack[STRINGP_STACK_SIZE];
216 
217 	if ((int)*regexp == (int)START_OF_STRING_MARK) {
218 
219 	/*
220 	 * the match must start at the beginning of the string
221 	 */
222 
223 	    __loc1 = (char *)stringp;
224 	    regexp++;
225 	    end_of_matchp = test_string(stringp, regexp);
226 
227 	} else if ((int)*regexp == (int)ASCII_CHAR) {
228 
229 	/*
230 	 * test a string against a regular expression
231 	 * that starts with a single ASCII character:
232 	 *
233 	 * move to each character in the string that matches
234 	 * the first character in the regular expression
235 	 * and test the remaining string
236 	 */
237 
238 	    while ((*stringp != *(regexp + 1)) && (*stringp != '\0')) {
239 		stringp++;
240 	    }
241 	    while ((end_of_matchp == (char *)0) && (*stringp != '\0')) {
242 		end_of_matchp = test_string(stringp, regexp);
243 		if (end_of_matchp != (char *)0) {
244 		    __loc1 = (char *)stringp;
245 		} else {
246 		    stringp++;
247 		    while ((*stringp != *(regexp + 1)) && (*stringp != '\0')) {
248 			stringp++;
249 		    }
250 		}
251 	    }
252 
253 	} else if (!multibyte) {
254 
255 	/*
256 	 * if the value of the "multibyte" macro defined in <euc.h>
257 	 * is false, regex() is running in an ASCII locale;
258 	 * test an ASCII string against an ASCII regular expression
259 	 * that doesn't start with a single ASCII character:
260 	 *
261 	 * move forward in the string one byte at a time, testing
262 	 * the remaining string against the regular expression
263 	 */
264 
265 	    end_of_matchp = test_string(stringp, regexp);
266 	    while ((end_of_matchp == (char *)0) && (*stringp != '\0')) {
267 		stringp++;
268 		end_of_matchp = test_string(stringp, regexp);
269 	    }
270 	    if (end_of_matchp != (char *)0) {
271 		__loc1 = (char *)stringp;
272 	    }
273 
274 	} else if ((int)*regexp == (int)MULTIBYTE_CHAR) {
275 
276 	/*
277 	 * test a multibyte string against a multibyte regular expression
278 	 * that starts with a single multibyte character:
279 	 *
280 	 * move to each character in the string that matches
281 	 * the first character in the regular expression
282 	 * and test the remaining string
283 	 */
284 
285 	    (void) get_wchar(&regex_wchar, regexp + 1);
286 	    char_size = get_wchar(&string_wchar, stringp);
287 	    while ((string_wchar != regex_wchar) && (char_size > 0)) {
288 		stringp += char_size;
289 		char_size = get_wchar(&string_wchar, stringp);
290 	    }
291 	    while ((end_of_matchp == (char *)0) && (char_size > 0)) {
292 		end_of_matchp = test_string(stringp, regexp);
293 		if (end_of_matchp != (char *)0) {
294 		    __loc1 = (char *)stringp;
295 		} else {
296 		    stringp += char_size;
297 		    char_size = get_wchar(&string_wchar, stringp);
298 		    while ((string_wchar != regex_wchar) && (char_size > 0)) {
299 			stringp += char_size;
300 			char_size = get_wchar(&string_wchar, stringp);
301 		    }
302 		}
303 	    }
304 
305 	} else {
306 
307 	/*
308 	 * test a multibyte string against a multibyte regular expression
309 	 * that doesn't start with a single multibyte character
310 	 *
311 	 * move forward in the string one multibyte character at a time,
312 	 * testing the remaining string against the regular expression
313 	 */
314 
315 	    end_of_matchp = test_string(stringp, regexp);
316 	    char_size = get_wchar(&string_wchar, stringp);
317 	    while ((end_of_matchp == (char *)0) && (char_size > 0)) {
318 		stringp += char_size;
319 		end_of_matchp = test_string(stringp, regexp);
320 		char_size = get_wchar(&string_wchar, stringp);
321 	    }
322 	    if (end_of_matchp != (char *)0) {
323 		__loc1 = (char *)stringp;
324 	    }
325 	}
326 
327 	/*
328 	 * Return substrings that matched subexpressions for which
329 	 * matching substrings are to be returned.
330 	 *
331 	 * NOTE:
332 	 *
333 	 * According to manual page regcmp(3G), regex() returns substrings
334 	 * that match subexpressions even when no substring matches the
335 	 * entire regular expression.
336 	 */
337 
338 	substringn = 0;
339 	while (substringn < NSUBSTRINGS) {
340 	    substringp = substring_startp[substringn];
341 	    if ((substringp != (char *)0) &&
342 		(return_arg_number[substringn] >= 0)) {
343 		returned_substringp =
344 		    return_argp[return_arg_number[substringn]];
345 		if (returned_substringp != (char *)0) {
346 		    while (substringp < substring_endp[substringn]) {
347 			*returned_substringp = (char)*substringp;
348 			returned_substringp++;
349 			substringp++;
350 		    }
351 		    *returned_substringp = '\0';
352 		}
353 	    }
354 	    substringn++;
355 	}
356 	lmutex_unlock(&regex_lock);
357 	return ((char *)end_of_matchp);
358 }  /* regex() */
359 
360 
361 /* DEFINITIONS OF PRIVATE FUNCTIONS */
362 
363 static int
364 get_wchar(wchar_t *wcharp,
365 	const char *stringp)
366 {
367 	int char_size;
368 
369 	if (stringp == (char *)0) {
370 	    char_size = 0;
371 	    *wcharp = (wchar_t)((unsigned int)'\0');
372 	} else if (*stringp == '\0') {
373 	    char_size = 0;
374 	    *wcharp = (wchar_t)((unsigned int)*stringp);
375 	} else if ((unsigned char)*stringp <= (unsigned char)0x7f) {
376 	    char_size = 1;
377 	    *wcharp = (wchar_t)((unsigned int)*stringp);
378 	} else {
379 	    char_size = mbtowc(wcharp, stringp, MB_LEN_MAX);
380 	}
381 	return (char_size);
382 }
383 
384 static void
385 get_match_counts(int *nmust_matchp,
386 	int *nextra_matches_allowedp,
387 	const char *count_stringp)
388 {
389 	int minimum_match_count;
390 	int maximum_match_count;
391 
392 	minimum_match_count =
393 	    (int)((unsigned int)*count_stringp & SINGLE_BYTE_MASK);
394 	*nmust_matchp = minimum_match_count;
395 
396 	count_stringp++;
397 	maximum_match_count =
398 	    (int)((unsigned int)*count_stringp & SINGLE_BYTE_MASK);
399 	if (maximum_match_count == (int)UNLIMITED) {
400 	    *nextra_matches_allowedp = (int)UNLIMITED;
401 	} else {
402 	    *nextra_matches_allowedp =
403 		maximum_match_count - minimum_match_count;
404 	}
405 	return;
406 
407 } /* get_match_counts() */
408 
409 static boolean_t
410 in_wchar_range(wchar_t test_char,
411 	wchar_t lower_char,
412 	wchar_t upper_char)
413 {
414 	return (((lower_char <= 0x7f) && (upper_char <= 0x7f) &&
415 	    (lower_char <= test_char) && (test_char <= upper_char)) ||
416 	    (((test_char & WCHAR_CSMASK) == (lower_char & WCHAR_CSMASK)) &&
417 	    ((test_char & WCHAR_CSMASK) == (upper_char & WCHAR_CSMASK)) &&
418 	    (lower_char <= test_char) && (test_char <= upper_char)));
419 
420 } /* in_wchar_range() */
421 
422 static const char *
423 pop_stringp(void)
424 {
425 	const char *stringp;
426 
427 	if (stringp_stackp >= &stringp_stack[STRINGP_STACK_SIZE]) {
428 	    return ((char *)0);
429 	} else {
430 	    stringp = *stringp_stackp;
431 	    stringp_stackp++;
432 	    return (stringp);
433 	}
434 }
435 
436 
437 static const char *
438 previous_charp(const char *current_charp)
439 {
440 	/*
441 	 * returns the pointer to the previous character in
442 	 * a string of multibyte characters
443 	 */
444 
445 	const char *prev_cs0 = current_charp - 1;
446 	const char *prev_cs1 = current_charp - eucw1;
447 	const char *prev_cs2 = current_charp - eucw2 - 1;
448 	const char *prev_cs3 = current_charp - eucw3 - 1;
449 	const char *prev_charp;
450 
451 	if ((unsigned char)*prev_cs0 <= 0x7f) {
452 	    prev_charp = prev_cs0;
453 	} else if ((unsigned char)*prev_cs2 == SS2) {
454 	    prev_charp = prev_cs2;
455 	} else if ((unsigned char)*prev_cs3 == SS3) {
456 	    prev_charp = prev_cs3;
457 	} else {
458 	    prev_charp = prev_cs1;
459 	}
460 	return (prev_charp);
461 
462 } /* previous_charp() */
463 
464 static const char *
465 push_stringp(const char *stringp)
466 {
467 	if (stringp_stackp <= &stringp_stack[0]) {
468 	    return ((char *)0);
469 	} else {
470 	    stringp_stackp--;
471 	    *stringp_stackp = stringp;
472 	    return (stringp);
473 	}
474 }
475 
476 
477 static char_test_result_t
478 test_char_against_ascii_class(char test_char,
479 	const char *classp,
480 	char_test_condition_t test_condition)
481 {
482 	/*
483 	 * tests a character for membership in an ASCII character class compiled
484 	 * by the internationalized version of regcmp();
485 	 *
486 	 * NOTE: The internationalized version of regcmp() compiles
487 	 * 	the range a-z in an ASCII character class to aTHRUz.
488 	 */
489 
490 	int	nbytes_to_check;
491 
492 	nbytes_to_check = (int)*classp;
493 	classp++;
494 	nbytes_to_check--;
495 
496 	while (nbytes_to_check > 0) {
497 	    if (test_char == *classp) {
498 		if (test_condition == IN_CLASS)
499 		    return (CONDITION_TRUE);
500 		else
501 		    return (CONDITION_FALSE);
502 	    } else if (*classp == THRU) {
503 		if ((*(classp - 1) <= test_char) &&
504 		    (test_char <= *(classp + 1))) {
505 		    if (test_condition == IN_CLASS)
506 			return (CONDITION_TRUE);
507 		    else
508 			return (CONDITION_FALSE);
509 		} else {
510 		    classp += 2;
511 		    nbytes_to_check -= 2;
512 		}
513 	    } else {
514 		classp++;
515 		nbytes_to_check--;
516 	    }
517 	}
518 	if (test_condition == NOT_IN_CLASS) {
519 	    return (CONDITION_TRUE);
520 	} else {
521 	    return (CONDITION_FALSE);
522 	}
523 } /* test_char_against_ascii_class() */
524 
525 static char_test_result_t
526 test_char_against_multibyte_class(wchar_t test_char,
527 	const char *classp,
528 	char_test_condition_t test_condition)
529 {
530 	/*
531 	 * tests a character for membership in a multibyte character class;
532 	 *
533 	 * NOTE: The range a-z in a multibyte character class compiles to
534 	 * 	aTHRUz.
535 	 */
536 
537 	int		char_size;
538 	wchar_t		current_char;
539 	int		nbytes_to_check;
540 	wchar_t		previous_char;
541 
542 	nbytes_to_check = (int)*classp;
543 	classp++;
544 	nbytes_to_check--;
545 
546 	char_size = get_wchar(&current_char, classp);
547 	if (char_size <= 0) {
548 	    return (CHAR_TEST_ERROR);
549 	} else if (test_char == current_char) {
550 	    if (test_condition == IN_CLASS) {
551 		return (CONDITION_TRUE);
552 	    } else {
553 		return (CONDITION_FALSE);
554 	    }
555 	} else {
556 	    classp += char_size;
557 	    nbytes_to_check -= char_size;
558 	}
559 
560 	while (nbytes_to_check > 0) {
561 	    previous_char = current_char;
562 	    char_size = get_wchar(&current_char, classp);
563 	    if (char_size <= 0) {
564 		return (CHAR_TEST_ERROR);
565 	    } else if (test_char == current_char) {
566 		if (test_condition == IN_CLASS) {
567 		    return (CONDITION_TRUE);
568 		} else {
569 		    return (CONDITION_FALSE);
570 		}
571 	    } else if (current_char == THRU) {
572 		classp += char_size;
573 		nbytes_to_check -= char_size;
574 		char_size = get_wchar(&current_char, classp);
575 		if (char_size <= 0) {
576 		    return (CHAR_TEST_ERROR);
577 		} else if (in_wchar_range(test_char, previous_char,
578 		    current_char)) {
579 		    if (test_condition == IN_CLASS) {
580 			return (CONDITION_TRUE);
581 		    } else {
582 			return (CONDITION_FALSE);
583 		    }
584 		} else {
585 		    classp += char_size;
586 		    nbytes_to_check -= char_size;
587 		}
588 	    } else {
589 		classp += char_size;
590 		nbytes_to_check -= char_size;
591 	    }
592 	}
593 	if (test_condition == NOT_IN_CLASS) {
594 	    return (CONDITION_TRUE);
595 	} else {
596 	    return (CONDITION_FALSE);
597 	}
598 } /* test_char_against_multibyte_class() */
599 
600 
601 /* FOR COMPATIBILITY WITH PREVIOUS ASCII VERSIONS OF regcmp() */
602 
603 static char_test_result_t
604 test_char_against_old_ascii_class(char test_char,
605 	const char *classp,
606 	char_test_condition_t test_condition)
607 {
608 	/*
609 	 * tests a character for membership in an ASCII character class compiled
610 	 * by the ASCII version of regcmp();
611 	 *
612 	 * NOTE: ASCII versions of regcmp() compile the range a-z in an
613 	 *	ASCII character class to THRUaz.  The internationalized
614 	 *	version compiles the same range to aTHRUz.
615 	 */
616 
617 	int	nbytes_to_check;
618 
619 	nbytes_to_check = (int)*classp;
620 	classp++;
621 	nbytes_to_check--;
622 
623 	while (nbytes_to_check > 0) {
624 	    if (test_char == *classp) {
625 		if (test_condition == IN_CLASS) {
626 		    return (CONDITION_TRUE);
627 		} else {
628 		    return (CONDITION_FALSE);
629 		}
630 	    } else if (*classp == THRU) {
631 		if ((*(classp + 1) <= test_char) &&
632 		    (test_char <= *(classp + 2))) {
633 		    if (test_condition == IN_CLASS) {
634 			return (CONDITION_TRUE);
635 		    } else {
636 			return (CONDITION_FALSE);
637 		    }
638 		} else {
639 		    classp += 3;
640 		    nbytes_to_check -= 3;
641 		}
642 	    } else {
643 		classp++;
644 		nbytes_to_check--;
645 	    }
646 	}
647 	if (test_condition == NOT_IN_CLASS) {
648 	    return (CONDITION_TRUE);
649 	} else {
650 	    return (CONDITION_FALSE);
651 	}
652 } /* test_char_against_old_ascii_class() */
653 
654 static const char *
655 test_repeated_ascii_char(const char *repeat_startp,
656 	const char *stringp,
657 	const char *regexp)
658 {
659 	const char *end_of_matchp;
660 
661 	end_of_matchp = test_string(stringp, regexp);
662 	while ((end_of_matchp == (char *)0) &&
663 	    (stringp > repeat_startp)) {
664 	    stringp--;
665 	    end_of_matchp = test_string(stringp, regexp);
666 	}
667 	return (end_of_matchp);
668 }
669 
670 static const char *
671 test_repeated_multibyte_char(const char *repeat_startp,
672 	const char *stringp,
673 	const char *regexp)
674 {
675 	const char *end_of_matchp;
676 
677 	end_of_matchp = test_string(stringp, regexp);
678 	while ((end_of_matchp == (char *)0) &&
679 	    (stringp > repeat_startp)) {
680 	    stringp = previous_charp(stringp);
681 	    end_of_matchp = test_string(stringp, regexp);
682 	}
683 	return (end_of_matchp);
684 }
685 
686 static const char *
687 test_repeated_group(const char *repeat_startp,
688 	const char *stringp,
689 	const char *regexp)
690 {
691 	const char *end_of_matchp;
692 
693 	end_of_matchp = test_string(stringp, regexp);
694 	while ((end_of_matchp == (char *)0) &&
695 	    (stringp > repeat_startp)) {
696 	    stringp = pop_stringp();
697 	    if (stringp == (char *)0) {
698 		return ((char *)0);
699 	    }
700 	    end_of_matchp = test_string(stringp, regexp);
701 	}
702 	return (end_of_matchp);
703 }
704 
705 static const char *
706 test_string(const char *stringp,
707 	const char *regexp)
708 {
709 	/*
710 	 * returns a pointer to the first character following the first
711 	 * substring of the string addressed by stringp that matches
712 	 * the compiled regular expression addressed by regexp
713 	 */
714 
715 	unsigned int		group_length;
716 	int			nextra_matches_allowed;
717 	int			nmust_match;
718 	wchar_t			regex_wchar;
719 	int			regex_char_size;
720 	const char		*repeat_startp;
721 	unsigned int		return_argn;
722 	wchar_t			string_wchar;
723 	int			string_char_size;
724 	unsigned int		substringn;
725 	char_test_condition_t	test_condition;
726 	const char		*test_stringp;
727 
728 	for (;;) {
729 
730 		/*
731 		 * Exit the loop via a return whenever there's a match
732 		 * or it's clear that there can be no match.
733 		 */
734 
735 	    switch ((int)*regexp) {
736 
737 		/*
738 		 * No fall-through.
739 		 * Each case ends with either a return or with stringp
740 		 * addressing the next character to be tested and regexp
741 		 * addressing the next compiled regular expression
742 		 *
743 		 * NOTE: The comments for each case give the meaning
744 		 *	of the compiled regular expression decoded by the case
745 		 *	and the character string that the compiled regular
746 		 *	expression uses to encode the case.  Each single
747 		 *	character encoded in the compiled regular expression
748 		 *	is shown enclosed in angle brackets (<>).  Each
749 		 *	compiled regular expression begins with a marker
750 		 *	character which is shown as a named constant
751 		 *	(e.g. <ASCII_CHAR>). Character constants are shown
752 		 *	enclosed in single quotes (e.g. <'$'>).  All other
753 		 *	single characters encoded in the compiled regular
754 		 *	expression are shown as lower case variable names
755 		 *	(e.g. <ascii_char> or <multibyte_char>). Multicharacter
756 		 *	strings encoded in the compiled regular expression
757 		 *	are shown as variable names followed by elipses
758 		 *	(e.g. <compiled_regex...>).
759 		 */
760 
761 	    case ASCII_CHAR: /* single ASCII char */
762 
763 		/* encoded as <ASCII_CHAR><ascii_char> */
764 
765 		regexp++;
766 		if (*regexp == *stringp) {
767 		    regexp++;
768 		    stringp++;
769 		} else {
770 		    return ((char *)0);
771 		}
772 		break;		/* end case ASCII_CHAR */
773 
774 	    case MULTIBYTE_CHAR: /* single multibyte char */
775 
776 		/* encoded as <MULTIBYTE_CHAR><multibyte_char> */
777 
778 		regexp++;
779 		regex_char_size = get_wchar(&regex_wchar, regexp);
780 		string_char_size = get_wchar(&string_wchar, stringp);
781 		if ((string_char_size <= 0) || (string_wchar != regex_wchar)) {
782 		    return ((char *)0);
783 		} else {
784 		    regexp += regex_char_size;
785 		    stringp += string_char_size;
786 		}
787 		break;		/* end case MULTIBYTE_CHAR */
788 
789 	    case ANY_CHAR: /* any single ASCII or multibyte char */
790 
791 		/* encoded as <ANY_CHAR> */
792 
793 		if (!multibyte) {
794 		    if (*stringp == '\0') {
795 			return ((char *)0);
796 		    } else {
797 			regexp++;
798 			stringp++;
799 		    }
800 		} else {
801 		    string_char_size = get_wchar(&string_wchar, stringp);
802 		    if (string_char_size <= 0) {
803 			return ((char *)0);
804 		    } else {
805 			regexp++;
806 			stringp += string_char_size;
807 		    }
808 		}
809 		break;	/* end case ANY_CHAR */
810 
811 	    case IN_ASCII_CHAR_CLASS:		/* [.....] */
812 	    case NOT_IN_ASCII_CHAR_CLASS:
813 
814 		/*
815 		 * encoded as <IN_ASCII_CHAR_CLASS><class_length><class...>
816 		 *	or <NOT_IN_ASCII_CHAR_CLASS><class_length><class...>
817 		 *
818 		 * NOTE: <class_length> includes the <class_length> byte
819 		 */
820 
821 		if ((int)*regexp == (int)IN_ASCII_CHAR_CLASS) {
822 		    test_condition = IN_CLASS;
823 		} else {
824 		    test_condition = NOT_IN_CLASS;
825 		}
826 		regexp++; /* point to the <class_length> byte */
827 
828 		if ((*stringp != '\0') &&
829 		    (test_char_against_ascii_class(*stringp, regexp,
830 		    test_condition) == CONDITION_TRUE)) {
831 		    regexp += (int)*regexp; /* add the class length to regexp */
832 		    stringp++;
833 		} else {
834 		    return ((char *)0);
835 		}
836 		break; /* end case IN_ASCII_CHAR_CLASS */
837 
838 	    case IN_MULTIBYTE_CHAR_CLASS:	/* [....] */
839 	    case NOT_IN_MULTIBYTE_CHAR_CLASS:
840 
841 		/*
842 		 * encoded as <IN_MULTIBYTE_CHAR_CLASS><class_length><class...>
843 		 * 	or <NOT_IN_MULTIBYTE_CHAR_CLASS><class_length><class...>
844 		 *
845 		 * NOTE: <class_length> includes the <class_length> byte
846 		 */
847 
848 		if ((int)*regexp == (int)IN_MULTIBYTE_CHAR_CLASS) {
849 		    test_condition = IN_CLASS;
850 		} else {
851 		    test_condition = NOT_IN_CLASS;
852 		}
853 		regexp++; /* point to the <class_length> byte */
854 
855 		string_char_size = get_wchar(&string_wchar, stringp);
856 		if ((string_char_size > 0) &&
857 		    (test_char_against_multibyte_class(string_wchar, regexp,
858 		    test_condition) == CONDITION_TRUE)) {
859 		    regexp += (int)*regexp; /* add the class length to regexp */
860 		    stringp += string_char_size;
861 		} else {
862 		    return ((char *)0);
863 		}
864 		break; /* end case IN_MULTIBYTE_CHAR_CLASS */
865 
866 	    case IN_OLD_ASCII_CHAR_CLASS:	/* [...] */
867 	    case NOT_IN_OLD_ASCII_CHAR_CLASS:
868 
869 		/*
870 		 * encoded as <IN_OLD_ASCII_CHAR_CLASS><class_length><class...>
871 		 *	or <NOT_IN_OLD_ASCII_CHAR_CLASS><class_length><class...>
872 		 *
873 		 * NOTE: <class_length> includes the <class_length> byte
874 		 */
875 
876 		if ((int)*regexp == (int)IN_OLD_ASCII_CHAR_CLASS) {
877 		    test_condition = IN_CLASS;
878 		} else {
879 		    test_condition = NOT_IN_CLASS;
880 		}
881 		regexp++; /* point to the <class_length> byte */
882 
883 		if ((*stringp != '\0') &&
884 		    (test_char_against_old_ascii_class(*stringp, regexp,
885 		    test_condition) == CONDITION_TRUE)) {
886 		    regexp += (int)*regexp; /* add the class length to regexp */
887 		    stringp++;
888 		} else {
889 		    return ((char *)0);
890 		}
891 		break; /* end case [NOT_]IN_OLD_ASCII_CHAR_CLASS */
892 
893 	    case SIMPLE_GROUP: /* (.....) */
894 
895 		/* encoded as <SIMPLE_GROUP><group_length> */
896 
897 		regexp += 2;
898 		break;		/* end case SIMPLE_GROUP */
899 
900 	    case END_GROUP:	/* (.....) */
901 
902 		/* encoded as <END_GROUP><groupn> */
903 
904 		regexp += 2;
905 		break;		/* end case END_GROUP */
906 
907 	    case SAVED_GROUP:	/* (.....)$0-9 */
908 
909 		/* encoded as <SAVED_GROUP><substringn> */
910 
911 		regexp++;
912 		substringn = (unsigned int)*regexp;
913 		if (substringn >= NSUBSTRINGS)
914 		    return ((char *)0);
915 		substring_startp[substringn] = stringp;
916 		regexp++;
917 		break;		/* end case SAVED_GROUP */
918 
919 	    case END_SAVED_GROUP:	/* (.....)$0-9 */
920 
921 		/*
922 		 * encoded as <END_SAVED_GROUP><substringn>\
923 		 *	<return_arg_number[substringn]>
924 		 */
925 
926 		regexp++;
927 		substringn = (unsigned int)*regexp;
928 		if (substringn >= NSUBSTRINGS)
929 		    return ((char *)0);
930 		substring_endp[substringn] = stringp;
931 		regexp++;
932 		return_argn = (unsigned int)*regexp;
933 		if (return_argn >= NSUBSTRINGS)
934 		    return ((char *)0);
935 		return_arg_number[substringn] = return_argn;
936 		regexp++;
937 		break;		/* end case END_SAVED_GROUP */
938 
939 	    case ASCII_CHAR|ZERO_OR_MORE:  /* char* */
940 
941 		/* encoded as <ASCII_CHAR|ZERO_OR_MORE><ascii_char> */
942 
943 		regexp++;
944 		repeat_startp = stringp;
945 		while (*stringp == *regexp) {
946 		    stringp++;
947 		}
948 		regexp++;
949 		return (test_repeated_ascii_char(repeat_startp,
950 		    stringp, regexp));
951 
952 		/* end case ASCII_CHAR|ZERO_OR_MORE */
953 
954 	    case ASCII_CHAR|ONE_OR_MORE:   /* char+ */
955 
956 		/* encoded as <ASCII_CHAR|ONE_OR_MORE><ascii_char> */
957 
958 		regexp++;
959 		if (*stringp != *regexp) {
960 		    return ((char *)0);
961 		} else {
962 		    stringp++;
963 		    repeat_startp = stringp;
964 		    while (*stringp == *regexp) {
965 			stringp++;
966 		    }
967 		    regexp++;
968 		    return (test_repeated_ascii_char(repeat_startp, stringp,
969 			regexp));
970 		}
971 		/* end case ASCII_CHAR|ONE_OR_MORE */
972 
973 	    case ASCII_CHAR|COUNT:	/* char{min_count,max_count} */
974 
975 		/*
976 		 * encoded as <ASCII_CHAR|COUNT><ascii_char>\
977 		 *	<minimum_match_count><maximum_match_count>
978 		 */
979 
980 		regexp++;
981 		get_match_counts(&nmust_match, &nextra_matches_allowed,
982 		    regexp + 1);
983 		while ((*stringp == *regexp) && (nmust_match > 0)) {
984 		    nmust_match--;
985 		    stringp++;
986 		}
987 		if (nmust_match > 0) {
988 		    return ((char *)0);
989 		} else if (nextra_matches_allowed == UNLIMITED) {
990 		    repeat_startp = stringp;
991 		    while (*stringp == *regexp) {
992 			stringp++;
993 		    }
994 		    regexp += 3;
995 		    return (test_repeated_ascii_char(repeat_startp, stringp,
996 			regexp));
997 		} else {
998 		    repeat_startp = stringp;
999 		    while ((*stringp == *regexp) &&
1000 			(nextra_matches_allowed > 0)) {
1001 			nextra_matches_allowed--;
1002 			stringp++;
1003 		    }
1004 		    regexp += 3;
1005 		    return (test_repeated_ascii_char(repeat_startp, stringp,
1006 			regexp));
1007 		}
1008 		/* end case ASCII_CHAR|COUNT */
1009 
1010 	    case MULTIBYTE_CHAR|ZERO_OR_MORE:   /* char* */
1011 
1012 		/* encoded as <MULTIBYTE_CHAR|ZERO_OR_MORE><multibyte_char> */
1013 
1014 		regexp++;
1015 		regex_char_size = get_wchar(&regex_wchar, regexp);
1016 		repeat_startp = stringp;
1017 		string_char_size = get_wchar(&string_wchar, stringp);
1018 		while ((string_char_size > 0) &&
1019 		    (string_wchar == regex_wchar)) {
1020 		    stringp += string_char_size;
1021 		    string_char_size = get_wchar(&string_wchar, stringp);
1022 		}
1023 		regexp += regex_char_size;
1024 		return (test_repeated_multibyte_char(repeat_startp, stringp,
1025 		    regexp));
1026 
1027 		/* end case MULTIBYTE_CHAR|ZERO_OR_MORE */
1028 
1029 	    case MULTIBYTE_CHAR|ONE_OR_MORE:    /* char+ */
1030 
1031 		/* encoded as <MULTIBYTE_CHAR|ONE_OR_MORE><multibyte_char> */
1032 
1033 		regexp++;
1034 		regex_char_size = get_wchar(&regex_wchar, regexp);
1035 		string_char_size = get_wchar(&string_wchar, stringp);
1036 		if ((string_char_size <= 0) || (string_wchar != regex_wchar)) {
1037 		    return ((char *)0);
1038 		} else {
1039 		    stringp += string_char_size;
1040 		    repeat_startp = stringp;
1041 		    string_char_size = get_wchar(&string_wchar, stringp);
1042 		    while ((string_char_size > 0) &&
1043 			(string_wchar == regex_wchar)) {
1044 			stringp += string_char_size;
1045 			string_char_size = get_wchar(&string_wchar, stringp);
1046 		    }
1047 		    regexp += regex_char_size;
1048 		    return (test_repeated_multibyte_char(repeat_startp, stringp,
1049 			regexp));
1050 		}
1051 		/* end case MULTIBYTE_CHAR|ONE_OR_MORE */
1052 
1053 	    case MULTIBYTE_CHAR|COUNT:		/* char{min_count,max_count} */
1054 
1055 		/*
1056 		 * encoded as <MULTIBYTE_CHAR|COUNT><multibyte_char>\
1057 		 * 	<minimum_match_count><maximum_match_count>
1058 		 */
1059 
1060 		regexp++;
1061 		regex_char_size = get_wchar(&regex_wchar, regexp);
1062 		get_match_counts(&nmust_match, &nextra_matches_allowed,
1063 		    regexp + regex_char_size);
1064 		string_char_size = get_wchar(&string_wchar, stringp);
1065 		while ((string_char_size > 0) &&
1066 		    (string_wchar == regex_wchar) &&
1067 		    (nmust_match > 0)) {
1068 
1069 		    nmust_match--;
1070 		    stringp += string_char_size;
1071 		    string_char_size = get_wchar(&string_wchar, stringp);
1072 		}
1073 		if (nmust_match > 0) {
1074 		    return ((char *)0);
1075 		} else if (nextra_matches_allowed == UNLIMITED) {
1076 		    repeat_startp = stringp;
1077 		    while ((string_char_size > 0) &&
1078 			(string_wchar == regex_wchar)) {
1079 			stringp += string_char_size;
1080 			string_char_size = get_wchar(&string_wchar, stringp);
1081 		    }
1082 		    regexp += regex_char_size + 2;
1083 		    return (test_repeated_multibyte_char(repeat_startp, stringp,
1084 			regexp));
1085 		} else {
1086 		    repeat_startp = stringp;
1087 		    while ((string_char_size > 0) &&
1088 			(string_wchar == regex_wchar) &&
1089 			(nextra_matches_allowed > 0)) {
1090 			nextra_matches_allowed--;
1091 			stringp += string_char_size;
1092 			string_char_size = get_wchar(&string_wchar, stringp);
1093 		    }
1094 		    regexp += regex_char_size + 2;
1095 		    return (test_repeated_multibyte_char(repeat_startp, stringp,
1096 			regexp));
1097 		}
1098 		/* end case MULTIBYTE_CHAR|COUNT */
1099 
1100 	    case ANY_CHAR|ZERO_OR_MORE:		/* .* */
1101 
1102 		/* encoded as <ANY_CHAR|ZERO_OR_MORE> */
1103 
1104 		repeat_startp = stringp;
1105 		if (!multibyte) {
1106 		    while (*stringp != '\0') {
1107 			stringp++;
1108 		    }
1109 		    regexp++;
1110 		    return (test_repeated_ascii_char(repeat_startp, stringp,
1111 			regexp));
1112 		} else {
1113 		    string_char_size = get_wchar(&string_wchar, stringp);
1114 		    while (string_char_size > 0) {
1115 			stringp += string_char_size;
1116 			string_char_size = get_wchar(&string_wchar, stringp);
1117 		    }
1118 		    regexp++;
1119 		    return (test_repeated_multibyte_char(repeat_startp, stringp,
1120 			regexp));
1121 		}
1122 		/* end case <ANY_CHAR|ZERO_OR_MORE> */
1123 
1124 	    case ANY_CHAR|ONE_OR_MORE:		/* .+ */
1125 
1126 		/* encoded as <ANY_CHAR|ONE_OR_MORE> */
1127 
1128 		if (!multibyte) {
1129 		    if (*stringp == '\0') {
1130 			return ((char *)0);
1131 		    } else {
1132 			stringp++;
1133 			repeat_startp = stringp;
1134 			while (*stringp != '\0') {
1135 			    stringp++;
1136 			}
1137 			regexp++;
1138 			return (test_repeated_ascii_char(repeat_startp, stringp,
1139 			    regexp));
1140 		    }
1141 		} else {
1142 		    string_char_size = get_wchar(&string_wchar, stringp);
1143 		    if (string_char_size <= 0) {
1144 			return ((char *)0);
1145 		    } else {
1146 			stringp += string_char_size;
1147 			repeat_startp = stringp;
1148 			string_char_size = get_wchar(&string_wchar, stringp);
1149 			while (string_char_size > 0) {
1150 			    stringp += string_char_size;
1151 			    string_char_size =
1152 				get_wchar(&string_wchar, stringp);
1153 			}
1154 			regexp++;
1155 			return (test_repeated_multibyte_char(repeat_startp,
1156 			    stringp, regexp));
1157 		    }
1158 		}
1159 		/* end case <ANY_CHAR|ONE_OR_MORE> */
1160 
1161 	    case ANY_CHAR|COUNT:	/* .{min_count,max_count} */
1162 
1163 		/*
1164 		 * encoded as	<ANY_CHAR|COUNT>\
1165 		 *		<minimum_match_count><maximum_match_count>
1166 		 */
1167 
1168 		get_match_counts(&nmust_match, &nextra_matches_allowed,
1169 		    regexp + 1);
1170 		if (!multibyte) {
1171 		    while ((*stringp != '\0') && (nmust_match > 0)) {
1172 			nmust_match--;
1173 			stringp++;
1174 		    }
1175 		    if (nmust_match > 0) {
1176 			return ((char *)0);
1177 		    } else if (nextra_matches_allowed == UNLIMITED) {
1178 			repeat_startp = stringp;
1179 			while (*stringp != '\0') {
1180 			    stringp++;
1181 			}
1182 			regexp += 3;
1183 			return (test_repeated_ascii_char(repeat_startp, stringp,
1184 			    regexp));
1185 		    } else {
1186 			repeat_startp = stringp;
1187 			while ((*stringp != '\0') &&
1188 			    (nextra_matches_allowed > 0)) {
1189 			    nextra_matches_allowed--;
1190 			    stringp++;
1191 			}
1192 			regexp += 3;
1193 			return (test_repeated_ascii_char(repeat_startp, stringp,
1194 			    regexp));
1195 		    }
1196 		} else { /* multibyte character */
1197 
1198 		    string_char_size = get_wchar(&string_wchar, stringp);
1199 		    while ((string_char_size > 0) && (nmust_match > 0)) {
1200 			nmust_match--;
1201 			stringp += string_char_size;
1202 			string_char_size = get_wchar(&string_wchar, stringp);
1203 		    }
1204 		    if (nmust_match > 0) {
1205 			return ((char *)0);
1206 		    } else if (nextra_matches_allowed == UNLIMITED) {
1207 			repeat_startp = stringp;
1208 			while (string_char_size > 0) {
1209 			    stringp += string_char_size;
1210 			    string_char_size =
1211 				get_wchar(&string_wchar, stringp);
1212 			}
1213 			regexp += 3;
1214 			return (test_repeated_multibyte_char(repeat_startp,
1215 			    stringp, regexp));
1216 		    } else {
1217 			repeat_startp = stringp;
1218 			while ((string_char_size > 0) &&
1219 			    (nextra_matches_allowed > 0)) {
1220 			    nextra_matches_allowed--;
1221 			    stringp += string_char_size;
1222 			    string_char_size =
1223 				get_wchar(&string_wchar, stringp);
1224 			}
1225 			regexp += 3;
1226 			return (test_repeated_multibyte_char(repeat_startp,
1227 			    stringp, regexp));
1228 		    }
1229 		} /* end case ANY_CHAR|COUNT */
1230 
1231 	    case IN_ASCII_CHAR_CLASS|ZERO_OR_MORE:	/* [.....]* */
1232 	    case NOT_IN_ASCII_CHAR_CLASS|ZERO_OR_MORE:
1233 
1234 		/*
1235 		 * encoded as	<IN_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
1236 		 *		<class_length><class ...>
1237 		 *	or	<NOT_IN_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
1238 		 *		<class_length><class ...>
1239 		 *
1240 		 * NOTE: <class_length> includes the <class_length> byte
1241 		 */
1242 
1243 		if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|ZERO_OR_MORE)) {
1244 		    test_condition = IN_CLASS;
1245 		} else {
1246 		    test_condition = NOT_IN_CLASS;
1247 		}
1248 		regexp++; /* point to the <class_length> byte */
1249 
1250 		repeat_startp = stringp;
1251 		while ((*stringp != '\0') &&
1252 		    (test_char_against_ascii_class(*stringp, regexp,
1253 		    test_condition) == CONDITION_TRUE)) {
1254 		    stringp++;
1255 		}
1256 		regexp += (int)*regexp; /* add the class length to regexp */
1257 		return (test_repeated_ascii_char(repeat_startp, stringp,
1258 		    regexp));
1259 
1260 		/* end case IN_ASCII_CHAR_CLASS|ZERO_OR_MORE */
1261 
1262 	    case IN_ASCII_CHAR_CLASS|ONE_OR_MORE:	/* [.....]+ */
1263 	    case NOT_IN_ASCII_CHAR_CLASS|ONE_OR_MORE:
1264 
1265 		/*
1266 		 * encoded as	<IN_ASCII_CHAR_CLASS|ONE_OR_MORE>\
1267 		 *		<class_length><class ...>
1268 		 *	or	<NOT_IN_ASCII_CHAR_CLASS|ONE_OR_MORE>\
1269 		 * 		<class_length><class ...>
1270 		 *
1271 		 * NOTE: <class_length> includes the <class_length> byte
1272 		 */
1273 
1274 		if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|ONE_OR_MORE)) {
1275 		    test_condition = IN_CLASS;
1276 		} else {
1277 		    test_condition = NOT_IN_CLASS;
1278 		}
1279 		regexp++; /* point to the <class_length> byte */
1280 
1281 		if ((*stringp == '\0') ||
1282 		    (test_char_against_ascii_class(*stringp, regexp,
1283 		    test_condition) != CONDITION_TRUE)) {
1284 		    return ((char *)0);
1285 		} else {
1286 		    stringp++;
1287 		    repeat_startp = stringp;
1288 		    while ((*stringp != '\0') &&
1289 			(test_char_against_ascii_class(*stringp, regexp,
1290 			test_condition) == CONDITION_TRUE)) {
1291 			stringp++;
1292 		    }
1293 		    regexp += (int)*regexp; /* add the class length to regexp */
1294 		    return (test_repeated_ascii_char(repeat_startp, stringp,
1295 			regexp));
1296 		}
1297 		/* end case IN_ASCII_CHAR_CLASS|ONE_OR_MORE */
1298 
1299 	    case IN_ASCII_CHAR_CLASS | COUNT: /* [.....]{max_count,min_count} */
1300 	    case NOT_IN_ASCII_CHAR_CLASS | COUNT:
1301 
1302 		/*
1303 		 * endoded as	<IN_ASCII_CHAR_CLASS|COUNT><class_length>\
1304 		 * 		<class ...><minimum_match_count>\
1305 		 *		<maximum_match_count>
1306 		 *	or	<NOT_IN_ASCII_CHAR_CLASS|COUNT><class_length>\
1307 		 *		<class ...><minimum_match_count>\
1308 		 *		<maximum_match_count>
1309 		 *
1310 		 * NOTE: <class_length> includes the <class_length> byte,
1311 		 *	but not the <minimum_match_count> or
1312 		 *	<maximum_match_count> bytes
1313 		 */
1314 
1315 		if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|COUNT)) {
1316 		    test_condition = IN_CLASS;
1317 		} else {
1318 		    test_condition = NOT_IN_CLASS;
1319 		}
1320 		regexp++; /* point to the <class_length> byte */
1321 
1322 		get_match_counts(&nmust_match, &nextra_matches_allowed,
1323 		    regexp + (int)*regexp);
1324 		while ((*stringp != '\0') &&
1325 		    (test_char_against_ascii_class(*stringp, regexp,
1326 		    test_condition) == CONDITION_TRUE) &&
1327 		    (nmust_match > 0)) {
1328 		    nmust_match--;
1329 		    stringp++;
1330 		}
1331 		if (nmust_match > 0) {
1332 		    return ((char *)0);
1333 		} else if (nextra_matches_allowed == UNLIMITED) {
1334 		    repeat_startp = stringp;
1335 		    while ((*stringp != '\0') &&
1336 			(test_char_against_ascii_class(*stringp, regexp,
1337 			test_condition) == CONDITION_TRUE)) {
1338 			stringp++;
1339 		    }
1340 		    regexp += (int)*regexp + 2;
1341 		    return (test_repeated_ascii_char(repeat_startp, stringp,
1342 			regexp));
1343 		} else {
1344 		    repeat_startp = stringp;
1345 		    while ((*stringp != '\0') &&
1346 			(test_char_against_ascii_class(*stringp, regexp,
1347 			test_condition) == CONDITION_TRUE) &&
1348 			(nextra_matches_allowed > 0)) {
1349 			nextra_matches_allowed--;
1350 			stringp++;
1351 		    }
1352 		    regexp += (int)*regexp + 2;
1353 		    return (test_repeated_ascii_char(repeat_startp, stringp,
1354 			regexp));
1355 		}
1356 		/* end case IN_ASCII_CHAR_CLASS|COUNT */
1357 
1358 	    case IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE:	/* [.....]* */
1359 	    case NOT_IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE:
1360 
1361 		/*
1362 		 * encoded as	<IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE>\
1363 		 *		<class_length><class ...>
1364 		 *	or	<NOT_IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE>\
1365 		 *		<class_length><class ...>
1366 		 *
1367 		 * NOTE: <class_length> includes the <class_length> byte
1368 		 */
1369 
1370 		if ((int)*regexp ==
1371 		    (int)(IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE)) {
1372 		    test_condition = IN_CLASS;
1373 		} else {
1374 		    test_condition = NOT_IN_CLASS;
1375 		}
1376 		regexp++; /* point to the <class_length> byte */
1377 
1378 		repeat_startp = stringp;
1379 		string_char_size = get_wchar(&string_wchar, stringp);
1380 		while ((string_char_size > 0) &&
1381 		    (test_char_against_multibyte_class(string_wchar, regexp,
1382 		    test_condition) == CONDITION_TRUE)) {
1383 		    stringp += string_char_size;
1384 		    string_char_size = get_wchar(&string_wchar, stringp);
1385 		}
1386 		regexp += (int)*regexp; /* add the class length to regexp */
1387 		return (test_repeated_multibyte_char(repeat_startp, stringp,
1388 		    regexp));
1389 
1390 		/* end case IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE */
1391 
1392 	    case IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE:	/* [.....]+ */
1393 	    case NOT_IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE:
1394 
1395 		/*
1396 		 * encoded as	<IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE>\
1397 		 *		<class_length><class ...>
1398 		 *	or	<NOT_IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE>\
1399 		 *		<class_length><class ...>
1400 		 *
1401 		 * NOTE: <class_length> includes the <class_length> byte
1402 		 */
1403 
1404 		if ((int)*regexp ==
1405 		    (int)(IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE)) {
1406 		    test_condition = IN_CLASS;
1407 		} else {
1408 		    test_condition = NOT_IN_CLASS;
1409 		}
1410 		regexp++; /* point to the <class_length> byte */
1411 
1412 		string_char_size = get_wchar(&string_wchar, stringp);
1413 		if ((string_char_size <= 0) ||
1414 		    (test_char_against_multibyte_class(string_wchar, regexp,
1415 		    test_condition) != CONDITION_TRUE)) {
1416 		    return ((char *)0);
1417 		} else {
1418 		    stringp += string_char_size;
1419 		    repeat_startp = stringp;
1420 		    string_char_size = get_wchar(&string_wchar, stringp);
1421 		    while ((string_char_size > 0) &&
1422 			(test_char_against_multibyte_class(string_wchar,
1423 			regexp, test_condition) == CONDITION_TRUE)) {
1424 			stringp += string_char_size;
1425 			string_char_size = get_wchar(&string_wchar, stringp);
1426 		    }
1427 		    regexp += (int)*regexp; /* add the class length to regexp */
1428 		    return (test_repeated_multibyte_char(repeat_startp, stringp,
1429 			regexp));
1430 		}
1431 		/* end case IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE */
1432 
1433 	    case IN_MULTIBYTE_CHAR_CLASS|COUNT: /* [...]{min_count,max_count} */
1434 	    case NOT_IN_MULTIBYTE_CHAR_CLASS|COUNT:
1435 
1436 		/*
1437 		 * encoded as	<IN_MULTIBYTE_CHAR_CLASS|COUNT>\
1438 		 *		<class_length><class ...><min_count><max_count>
1439 		 *	or	<NOT_IN_MULTIBYTE_CHAR_CLASS|COUNT>\
1440 		 *		<class_length><class ...><min_count><max_count>
1441 		 *
1442 		 * NOTE: <class_length> includes the <class_length> byte
1443 		 *	but not the <minimum_match_count> or
1444 		 *	<maximum_match_count> bytes
1445 		 */
1446 
1447 		if ((int)*regexp == (int)(IN_MULTIBYTE_CHAR_CLASS|COUNT)) {
1448 		    test_condition = IN_CLASS;
1449 		} else {
1450 		    test_condition = NOT_IN_CLASS;
1451 		}
1452 		regexp++; /* point to the <class_length> byte */
1453 
1454 		get_match_counts(&nmust_match, &nextra_matches_allowed,
1455 		    regexp + (int)*regexp);
1456 		string_char_size = get_wchar(&string_wchar, stringp);
1457 		while ((string_char_size > 0) &&
1458 		    (test_char_against_multibyte_class(string_wchar, regexp,
1459 		    test_condition) == CONDITION_TRUE) &&
1460 		    (nmust_match > 0)) {
1461 		    nmust_match--;
1462 		    stringp += string_char_size;
1463 		    string_char_size = get_wchar(&string_wchar, stringp);
1464 		}
1465 		if (nmust_match > 0) {
1466 		    return ((char *)0);
1467 		} else if (nextra_matches_allowed == UNLIMITED) {
1468 		    repeat_startp = stringp;
1469 		    while ((string_char_size > 0) &&
1470 			(test_char_against_multibyte_class(string_wchar,
1471 			regexp, test_condition) == CONDITION_TRUE)) {
1472 			stringp += string_char_size;
1473 			string_char_size = get_wchar(&string_wchar, stringp);
1474 		    }
1475 		    regexp += (int)*regexp + 2;
1476 		    return (test_repeated_multibyte_char(repeat_startp, stringp,
1477 			regexp));
1478 		} else {
1479 		    repeat_startp = stringp;
1480 		    while ((string_char_size > 0) &&
1481 			(test_char_against_multibyte_class(string_wchar,
1482 			regexp, test_condition) == CONDITION_TRUE) &&
1483 			(nextra_matches_allowed > 0)) {
1484 			nextra_matches_allowed--;
1485 			stringp += string_char_size;
1486 			string_char_size = get_wchar(&string_wchar, stringp);
1487 		    }
1488 		    regexp += (int)*regexp + 2;
1489 		    return (test_repeated_multibyte_char(repeat_startp, stringp,
1490 			regexp));
1491 		}
1492 		/* end case IN_MULTIBYTE_CHAR_CLASS|COUNT */
1493 
1494 	    case IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE:	/* [.....]* */
1495 	    case NOT_IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE:
1496 
1497 		/*
1498 		 * encoded as	<IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
1499 		 *		<class_length><class ...>
1500 		 *	or	<NOT_IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
1501 		 *		<class_length><class ...>
1502 		 *
1503 		 * NOTE: <class_length> includes the <class_length> byte
1504 		 */
1505 
1506 		if ((int)*regexp ==
1507 		    (int)(IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE)) {
1508 		    test_condition = IN_CLASS;
1509 		} else {
1510 		    test_condition = NOT_IN_CLASS;
1511 		}
1512 		regexp++; /* point to the <class_length> byte */
1513 
1514 		repeat_startp = stringp;
1515 		while ((*stringp != '\0') &&
1516 		    (test_char_against_old_ascii_class(*stringp, regexp,
1517 		    test_condition) == CONDITION_TRUE)) {
1518 		    stringp++;
1519 		}
1520 		regexp += (int)*regexp; /* add the class length to regexp */
1521 		return (test_repeated_ascii_char(repeat_startp, stringp,
1522 		    regexp));
1523 
1524 		/* end case IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE */
1525 
1526 	    case IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE:	/* [.....]+ */
1527 	    case NOT_IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE:
1528 
1529 		/*
1530 		 * encoded as	<IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE>\
1531 		 *		<class_length><class ...>
1532 		 *	or	<NOT_IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE>\
1533 		 *		<class_length><class ...>
1534 		 *
1535 		 * NOTE: <class length> includes the <class_length> byte
1536 		 */
1537 
1538 		if ((int)*regexp ==
1539 		    (int)(IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE)) {
1540 		    test_condition = IN_CLASS;
1541 		} else {
1542 		    test_condition = NOT_IN_CLASS;
1543 		}
1544 		regexp++; /* point to the <class_length> byte */
1545 
1546 		if ((*stringp == '\0') ||
1547 		    (test_char_against_old_ascii_class(*stringp, regexp,
1548 		    test_condition) != CONDITION_TRUE)) {
1549 		    return ((char *)0);
1550 		} else {
1551 		    stringp++;
1552 		    repeat_startp = stringp;
1553 		    while ((*stringp != '\0') &&
1554 			(test_char_against_old_ascii_class(*stringp, regexp,
1555 			test_condition) == CONDITION_TRUE)) {
1556 			stringp++;
1557 		    }
1558 		    regexp += (int)*regexp; /* add the class length to regexp */
1559 		    return (test_repeated_ascii_char(repeat_startp, stringp,
1560 			regexp));
1561 		}
1562 		/* end case IN_OLD_ASCII_CHAR_CLASS | ONE_OR_MORE */
1563 
1564 	    case IN_OLD_ASCII_CHAR_CLASS|COUNT:	/* [...]{min_count,max_count} */
1565 	    case NOT_IN_OLD_ASCII_CHAR_CLASS|COUNT:
1566 
1567 		/*
1568 		 * encoded as	<IN_OLD_ASCII_CHAR_CLASS|COUNT><class_length>\
1569 		 *		<class ...><minimum_match_count>\
1570 		 *		<maximum_match_count>
1571 		 *	or	<NOT_IN_OLD_ASCII_CHAR_CLASS|COUNT>\
1572 		 *		<class_length><class ...><minimum_match_count>\
1573 		 *		<maximum_match_count>
1574 		 *
1575 		 * NOTE: <class_length> includes the <class_length> byte
1576 		 *	but not the <minimum_match_count> or
1577 		 *	<maximum_match_count> bytes
1578 		 */
1579 
1580 		if ((int)*regexp == (int)(IN_OLD_ASCII_CHAR_CLASS|COUNT)) {
1581 		    test_condition = IN_CLASS;
1582 		} else {
1583 		    test_condition = NOT_IN_CLASS;
1584 		}
1585 		regexp++; /* point to the <class_length> byte */
1586 
1587 		get_match_counts(&nmust_match, &nextra_matches_allowed,
1588 		    regexp + (int)*regexp);
1589 		while ((*stringp != '\0') &&
1590 		    (test_char_against_old_ascii_class(*stringp, regexp,
1591 		    test_condition) == CONDITION_TRUE) &&
1592 		    (nmust_match > 0)) {
1593 		    nmust_match--;
1594 		    stringp++;
1595 		}
1596 		if (nmust_match > 0) {
1597 		    return ((char *)0);
1598 		} else if (nextra_matches_allowed == UNLIMITED) {
1599 		    repeat_startp = stringp;
1600 		    while ((*stringp != '\0') &&
1601 			(test_char_against_old_ascii_class(*stringp, regexp,
1602 			test_condition) == CONDITION_TRUE)) {
1603 			stringp++;
1604 		    }
1605 		    regexp += (int)*regexp + 2;
1606 		    return (test_repeated_ascii_char(repeat_startp, stringp,
1607 			regexp));
1608 		} else {
1609 		    repeat_startp = stringp;
1610 		    while ((*stringp != '\0') &&
1611 			(test_char_against_old_ascii_class(*stringp, regexp,
1612 			test_condition) == CONDITION_TRUE) &&
1613 			(nextra_matches_allowed > 0)) {
1614 			nextra_matches_allowed--;
1615 			stringp++;
1616 		    }
1617 		    regexp += (int)*regexp + 2;
1618 		    return (test_repeated_ascii_char(repeat_startp, stringp,
1619 			regexp));
1620 		}
1621 		/* end case IN_OLD_ASCII_CHAR_CLASS|COUNT */
1622 
1623 	    case ZERO_OR_MORE_GROUP:		/* (.....)* */
1624 	    case ZERO_OR_MORE_GROUP|ADD_256_TO_GROUP_LENGTH:
1625 	    case ZERO_OR_MORE_GROUP|ADD_512_TO_GROUP_LENGTH:
1626 	    case ZERO_OR_MORE_GROUP|ADD_768_TO_GROUP_LENGTH:
1627 
1628 		/*
1629 		 * encoded as	<ZERO_OR_MORE_GROUP|ADDED_LENGTH_BITS>\
1630 		 *		<group_length><compiled_regex...>\
1631 		 *		<END_GROUP|ZERO_OR_MORE><groupn>
1632 		 *
1633 		 * NOTE:
1634 		 *
1635 		 * group_length + (256 * ADDED_LENGTH_BITS) ==
1636 		 *	length_of(<compiled_regex...><END_GROUP|ZERO_OR_MORE>\
1637 		 *		<groupn>)
1638 		 *
1639 		 */
1640 
1641 		group_length =
1642 		    (((unsigned int)*regexp & ADDED_LENGTH_BITS) <<
1643 		    TIMES_256_SHIFT);
1644 		regexp++;
1645 		group_length += (unsigned int)*regexp;
1646 		regexp++;
1647 		repeat_startp = stringp;
1648 		test_stringp = test_string(stringp, regexp);
1649 		while (test_stringp != (char *)0) {
1650 		    if (push_stringp(stringp) == (char *)0)
1651 			return ((char *)0);
1652 		    stringp = test_stringp;
1653 		    test_stringp = test_string(stringp, regexp);
1654 		}
1655 		regexp += group_length;
1656 		return (test_repeated_group(repeat_startp, stringp, regexp));
1657 
1658 		/* end case ZERO_OR_MORE_GROUP */
1659 
1660 	    case END_GROUP|ZERO_OR_MORE:	/* (.....)* */
1661 
1662 		/* encoded as <END_GROUP|ZERO_OR_MORE> */
1663 
1664 		/* return from recursive call to test_string() */
1665 
1666 		return ((char *)stringp);
1667 
1668 		/* end case END_GROUP|ZERO_OR_MORE */
1669 
1670 	    case ONE_OR_MORE_GROUP:		/* (.....)+ */
1671 	    case ONE_OR_MORE_GROUP|ADD_256_TO_GROUP_LENGTH:
1672 	    case ONE_OR_MORE_GROUP|ADD_512_TO_GROUP_LENGTH:
1673 	    case ONE_OR_MORE_GROUP|ADD_768_TO_GROUP_LENGTH:
1674 
1675 		/*
1676 		 * encoded as	<ONE_OR_MORE_GROUP|ADDED_LENGTH_BITS>\
1677 		 *		<group_length><compiled_regex...>\
1678 		 *		<END_GROUP|ONE_OR_MORE><groupn>
1679 		 *
1680 		 * NOTE:
1681 		 *
1682 		 * group_length + (256 * ADDED_LENGTH_BITS) ==
1683 		 * 	length_of(<compiled_regex...><END_GROUP|ONE_OR_MORE>\
1684 		 *		<groupn>)
1685 		 */
1686 
1687 		group_length =
1688 		    (((unsigned int)*regexp & ADDED_LENGTH_BITS) <<
1689 		    TIMES_256_SHIFT);
1690 		regexp++;
1691 		group_length += (unsigned int)*regexp;
1692 		regexp++;
1693 		stringp = test_string(stringp, regexp);
1694 		if (stringp == (char *)0)
1695 		    return ((char *)0);
1696 		repeat_startp = stringp;
1697 		test_stringp = test_string(stringp, regexp);
1698 		while (test_stringp != (char *)0) {
1699 		    if (push_stringp(stringp) == (char *)0)
1700 			return ((char *)0);
1701 		    stringp = test_stringp;
1702 		    test_stringp = test_string(stringp, regexp);
1703 		}
1704 		regexp += group_length;
1705 		return (test_repeated_group(repeat_startp, stringp, regexp));
1706 
1707 		/* end case ONE_OR_MORE_GROUP */
1708 
1709 	    case END_GROUP|ONE_OR_MORE:		/* (.....)+ */
1710 
1711 		/* encoded as <END_GROUP|ONE_OR_MORE><groupn> */
1712 
1713 		/* return from recursive call to test_string() */
1714 
1715 		return ((char *)stringp);
1716 
1717 		/* end case END_GROUP|ONE_OR_MORE */
1718 
1719 	    case COUNTED_GROUP:		/* (.....){max_count,min_count} */
1720 	    case COUNTED_GROUP|ADD_256_TO_GROUP_LENGTH:
1721 	    case COUNTED_GROUP|ADD_512_TO_GROUP_LENGTH:
1722 	    case COUNTED_GROUP|ADD_768_TO_GROUP_LENGTH:
1723 
1724 		/*
1725 		 * encoded as	<COUNTED_GROUP|ADDED_LENGTH_BITS><group_length>\
1726 		 *		<compiled_regex...>\<END_GROUP|COUNT><groupn>\
1727 		 *		<minimum_match_count><maximum_match_count>
1728 		 *
1729 		 * NOTE:
1730 		 *
1731 		 * group_length + (256 * ADDED_LENGTH_BITS) ==
1732 		 *	length_of(<compiled_regex...><END_GROUP|COUNT><groupn>)
1733 		 *
1734 		 * but does not include the <minimum_match_count> or
1735 		 *	<maximum_match_count> bytes
1736 		 */
1737 
1738 		group_length =
1739 		    (((unsigned int)*regexp & ADDED_LENGTH_BITS) <<
1740 		    TIMES_256_SHIFT);
1741 		regexp++;
1742 		group_length += (unsigned int)*regexp;
1743 		regexp++;
1744 		get_match_counts(&nmust_match, &nextra_matches_allowed,
1745 		    regexp + group_length);
1746 		test_stringp = test_string(stringp, regexp);
1747 		while ((test_stringp != (char *)0) && (nmust_match > 0)) {
1748 		    stringp = test_stringp;
1749 		    nmust_match--;
1750 		    test_stringp = test_string(stringp, regexp);
1751 		}
1752 		if (nmust_match > 0) {
1753 		    return ((char *)0);
1754 		} else if (nextra_matches_allowed == UNLIMITED) {
1755 		    repeat_startp = stringp;
1756 		    while (test_stringp != (char *)0) {
1757 			if (push_stringp(stringp) == (char *)0)
1758 			    return ((char *)0);
1759 			stringp = test_stringp;
1760 			test_stringp = test_string(stringp, regexp);
1761 		}
1762 		    regexp += group_length + 2;
1763 		    return (test_repeated_group(repeat_startp, stringp,
1764 			regexp));
1765 		} else {
1766 		    repeat_startp = stringp;
1767 		    while ((test_stringp != (char *)0) &&
1768 			(nextra_matches_allowed > 0)) {
1769 			nextra_matches_allowed--;
1770 			if (push_stringp(stringp) == (char *)0)
1771 			    return ((char *)0);
1772 			stringp = test_stringp;
1773 			test_stringp = test_string(stringp, regexp);
1774 		}
1775 		    regexp += group_length + 2;
1776 		    return (test_repeated_group(repeat_startp, stringp,
1777 			regexp));
1778 		}
1779 		/* end case COUNTED_GROUP */
1780 
1781 	    case END_GROUP|COUNT:	/* (.....){max_count,min_count} */
1782 
1783 		/* encoded as <END_GROUP|COUNT> */
1784 
1785 		/* return from recursive call to test_string() */
1786 
1787 		return (stringp);
1788 
1789 		/* end case END_GROUP|COUNT */
1790 
1791 	    case END_OF_STRING_MARK:
1792 
1793 		/* encoded as <END_OF_STRING_MARK><END_REGEX> */
1794 
1795 		if (*stringp == '\0') {
1796 		    regexp++;
1797 		} else {
1798 		    return ((char *)0);
1799 		}
1800 		break; /* end case END_OF_STRING_MARK */
1801 
1802 	    case END_REGEX: /* end of the compiled regular expression */
1803 
1804 		/* encoded as <END_REGEX> */
1805 
1806 		return (stringp);
1807 
1808 		/* end case END_REGEX */
1809 
1810 	    default:
1811 
1812 		return ((char *)0);
1813 
1814 	    } /* end switch (*regexp) */
1815 
1816 	} /* end for (;;) */
1817 
1818 } /* test_string() */
1819