xref: /illumos-gate/usr/src/lib/libc/port/regex/regex.c (revision e8d712970f7ec76e09d5013b0b9aa5f0e0cf3e62)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved	*/
29 
30 /*
31  * IMPORTANT NOTE:
32  *
33  * regex() WORKS **ONLY** WITH THE ASCII AND THE Solaris EUC CHARACTER SETS.
34  * IT IS **NOT** CHARACTER SET INDEPENDENT.
35  *
36  */
37 
38 #pragma weak _regex = regex
39 
40 #include "lint.h"
41 /* CONSTANTS SHARED WITH regcmp() */
42 #include "regex.h"
43 #include "mtlib.h"
44 #include <limits.h>
45 #include <stdarg.h>
46 #include <stdlib.h>
47 #include <thread.h>
48 #include <widec.h>
49 #include "tsd.h"
50 
51 
52 /* PRIVATE CONSTANTS */
53 
54 #define	ADD_256_TO_GROUP_LENGTH	0x1
55 #define	ADD_512_TO_GROUP_LENGTH	0x2
56 #define	ADD_768_TO_GROUP_LENGTH	0x3
57 #define	ADDED_LENGTH_BITS	0x3
58 #define	SINGLE_BYTE_MASK	0xff
59 #define	STRINGP_STACK_SIZE	50
60 
61 
62 /* PRIVATE TYPE DEFINITIONS */
63 
64 typedef enum {
65 	NOT_IN_CLASS = 0,
66 	IN_CLASS
67 } char_test_condition_t;
68 
69 typedef enum {
70 	TESTING_CHAR = 0,
71 	CONDITION_TRUE,
72 	CONDITION_FALSE,
73 	CHAR_TEST_ERROR
74 } char_test_result_t;
75 
76 
77 /* PRIVATE GLOBAL VARIABLES */
78 
79 static mutex_t		regex_lock = DEFAULTMUTEX;
80 static int		return_arg_number[NSUBSTRINGS];
81 static const char	*substring_endp[NSUBSTRINGS];
82 static const char	*substring_startp[NSUBSTRINGS];
83 static const char	*stringp_stack[STRINGP_STACK_SIZE];
84 static const char	**stringp_stackp;
85 
86 
87 /* DECLARATIONS OF PRIVATE FUNCTIONS */
88 
89 static int
90 get_wchar(wchar_t *wcharp,
91 	const char *stringp);
92 
93 static void
94 get_match_counts(int *nmust_matchp,
95 	int *nextra_matches_allowedp,
96 	const char *count_stringp);
97 
98 static boolean_t
99 in_wchar_range(wchar_t test_char,
100 	wchar_t lower_char,
101 	wchar_t upper_char);
102 
103 static const char *
104 pop_stringp(void);
105 
106 static const char *
107 previous_charp(const char *current_charp);
108 
109 static const char *
110 push_stringp(const char *stringp);
111 
112 static char_test_result_t
113 test_char_against_ascii_class(char test_char,
114 	const char *classp,
115 	char_test_condition_t test_condition);
116 
117 static char_test_result_t
118 test_char_against_multibyte_class(wchar_t test_char,
119 	const char *classp,
120 	char_test_condition_t test_condition);
121 
122 
123 /* FOR COMPATIBILITY WITH PREVIOUS ASCII VERSIONS OF regcmp() */
124 
125 static char_test_result_t
126 test_char_against_old_ascii_class(char test_char,
127 	const char *classp,
128 	char_test_condition_t test_condition);
129 
130 static const char *
131 test_repeated_ascii_char(const char *repeat_startp,
132 	const char *stringp,
133 	const char *regexp);
134 
135 static const char *
136 test_repeated_multibyte_char(const char *repeat_startp,
137 	const char *stringp,
138 	const char *regexp);
139 
140 static const char *
141 test_repeated_group(const char *repeat_startp,
142 	const char *stringp,
143 	const char *regexp);
144 
145 static const char *
146 test_string(const char *stringp,
147 	const char *regexp);
148 
149 
150 /* DEFINITIONS OF PUBLIC VARIABLES */
151 
152 char *__loc1;
153 
154 /*
155  * reserve thread-specific storage for __loc1
156  */
157 char **
158 ____loc1(void)
159 {
160 	if (thr_main())
161 		return (&__loc1);
162 	return ((char **)tsdalloc(_T_REGEX_LOC1, sizeof (char *), NULL));
163 }
164 
165 #define	__loc1 (*(____loc1()))
166 
167 /* DEFINITION OF regex() */
168 
169 extern char *
170 regex(const char *regexp, const char *stringp, ...)
171 {
172 	va_list		arg_listp;
173 	int		char_size;
174 	const char	*end_of_matchp;
175 	wchar_t		regex_wchar;
176 	char		*return_argp[NSUBSTRINGS];
177 	char		*returned_substringp;
178 	int		substringn;
179 	const char	*substringp;
180 	wchar_t		string_wchar;
181 
182 	if (____loc1() == (char **)0) {
183 	    return ((char *)0);
184 	} else {
185 	    lmutex_lock(&regex_lock);
186 	    __loc1 = (char *)0;
187 	}
188 
189 	if ((stringp == (char *)0) || (regexp == (char *)0)) {
190 	    lmutex_unlock(&regex_lock);
191 	return ((char *)0);
192 	}
193 
194 
195 	/* INITIALIZE SUBSTRINGS THAT MIGHT BE RETURNED IN VARARGS  */
196 
197 	substringn = 0;
198 	va_start(arg_listp, stringp);
199 	while (substringn < NSUBSTRINGS) {
200 	    return_argp[substringn] = va_arg(arg_listp, char *);
201 	    substring_startp[substringn] = (char *)0;
202 	    return_arg_number[substringn] = -1;
203 	    substringn++;
204 	}
205 	va_end(arg_listp);
206 
207 
208 	/* TEST THE STRING AGAINST THE REGULAR EXPRESSION */
209 
210 	end_of_matchp = (char *)0;
211 	stringp_stackp = &stringp_stack[STRINGP_STACK_SIZE];
212 
213 	if ((int)*regexp == (int)START_OF_STRING_MARK) {
214 
215 	/*
216 	 * the match must start at the beginning of the string
217 	 */
218 
219 	    __loc1 = (char *)stringp;
220 	    regexp++;
221 	    end_of_matchp = test_string(stringp, regexp);
222 
223 	} else if ((int)*regexp == (int)ASCII_CHAR) {
224 
225 	/*
226 	 * test a string against a regular expression
227 	 * that starts with a single ASCII character:
228 	 *
229 	 * move to each character in the string that matches
230 	 * the first character in the regular expression
231 	 * and test the remaining string
232 	 */
233 
234 	    while ((*stringp != *(regexp + 1)) && (*stringp != '\0')) {
235 		stringp++;
236 	    }
237 	    while ((end_of_matchp == (char *)0) && (*stringp != '\0')) {
238 		end_of_matchp = test_string(stringp, regexp);
239 		if (end_of_matchp != (char *)0) {
240 		    __loc1 = (char *)stringp;
241 		} else {
242 		    stringp++;
243 		    while ((*stringp != *(regexp + 1)) && (*stringp != '\0')) {
244 			stringp++;
245 		    }
246 		}
247 	    }
248 
249 	} else if (!multibyte) {
250 
251 	/*
252 	 * if the value of the "multibyte" macro defined in <euc.h>
253 	 * is false, regex() is running in an ASCII locale;
254 	 * test an ASCII string against an ASCII regular expression
255 	 * that doesn't start with a single ASCII character:
256 	 *
257 	 * move forward in the string one byte at a time, testing
258 	 * the remaining string against the regular expression
259 	 */
260 
261 	    end_of_matchp = test_string(stringp, regexp);
262 	    while ((end_of_matchp == (char *)0) && (*stringp != '\0')) {
263 		stringp++;
264 		end_of_matchp = test_string(stringp, regexp);
265 	    }
266 	    if (end_of_matchp != (char *)0) {
267 		__loc1 = (char *)stringp;
268 	    }
269 
270 	} else if ((int)*regexp == (int)MULTIBYTE_CHAR) {
271 
272 	/*
273 	 * test a multibyte string against a multibyte regular expression
274 	 * that starts with a single multibyte character:
275 	 *
276 	 * move to each character in the string that matches
277 	 * the first character in the regular expression
278 	 * and test the remaining string
279 	 */
280 
281 	    (void) get_wchar(&regex_wchar, regexp + 1);
282 	    char_size = get_wchar(&string_wchar, stringp);
283 	    while ((string_wchar != regex_wchar) && (char_size > 0)) {
284 		stringp += char_size;
285 		char_size = get_wchar(&string_wchar, stringp);
286 	    }
287 	    while ((end_of_matchp == (char *)0) && (char_size > 0)) {
288 		end_of_matchp = test_string(stringp, regexp);
289 		if (end_of_matchp != (char *)0) {
290 		    __loc1 = (char *)stringp;
291 		} else {
292 		    stringp += char_size;
293 		    char_size = get_wchar(&string_wchar, stringp);
294 		    while ((string_wchar != regex_wchar) && (char_size > 0)) {
295 			stringp += char_size;
296 			char_size = get_wchar(&string_wchar, stringp);
297 		    }
298 		}
299 	    }
300 
301 	} else {
302 
303 	/*
304 	 * test a multibyte string against a multibyte regular expression
305 	 * that doesn't start with a single multibyte character
306 	 *
307 	 * move forward in the string one multibyte character at a time,
308 	 * testing the remaining string against the regular expression
309 	 */
310 
311 	    end_of_matchp = test_string(stringp, regexp);
312 	    char_size = get_wchar(&string_wchar, stringp);
313 	    while ((end_of_matchp == (char *)0) && (char_size > 0)) {
314 		stringp += char_size;
315 		end_of_matchp = test_string(stringp, regexp);
316 		char_size = get_wchar(&string_wchar, stringp);
317 	    }
318 	    if (end_of_matchp != (char *)0) {
319 		__loc1 = (char *)stringp;
320 	    }
321 	}
322 
323 	/*
324 	 * Return substrings that matched subexpressions for which
325 	 * matching substrings are to be returned.
326 	 *
327 	 * NOTE:
328 	 *
329 	 * According to manual page regcmp(3C), regex() returns substrings
330 	 * that match subexpressions even when no substring matches the
331 	 * entire regular expression.
332 	 */
333 
334 	substringn = 0;
335 	while (substringn < NSUBSTRINGS) {
336 	    substringp = substring_startp[substringn];
337 	    if ((substringp != (char *)0) &&
338 		(return_arg_number[substringn] >= 0)) {
339 		returned_substringp =
340 		    return_argp[return_arg_number[substringn]];
341 		if (returned_substringp != (char *)0) {
342 		    while (substringp < substring_endp[substringn]) {
343 			*returned_substringp = (char)*substringp;
344 			returned_substringp++;
345 			substringp++;
346 		    }
347 		    *returned_substringp = '\0';
348 		}
349 	    }
350 	    substringn++;
351 	}
352 	lmutex_unlock(&regex_lock);
353 	return ((char *)end_of_matchp);
354 }  /* regex() */
355 
356 
357 /* DEFINITIONS OF PRIVATE FUNCTIONS */
358 
359 static int
360 get_wchar(wchar_t *wcharp,
361 	const char *stringp)
362 {
363 	int char_size;
364 
365 	if (stringp == (char *)0) {
366 	    char_size = 0;
367 	    *wcharp = (wchar_t)((unsigned int)'\0');
368 	} else if (*stringp == '\0') {
369 	    char_size = 0;
370 	    *wcharp = (wchar_t)((unsigned int)*stringp);
371 	} else if ((unsigned char)*stringp <= (unsigned char)0x7f) {
372 	    char_size = 1;
373 	    *wcharp = (wchar_t)((unsigned int)*stringp);
374 	} else {
375 	    char_size = mbtowc(wcharp, stringp, MB_LEN_MAX);
376 	}
377 	return (char_size);
378 }
379 
380 static void
381 get_match_counts(int *nmust_matchp,
382 	int *nextra_matches_allowedp,
383 	const char *count_stringp)
384 {
385 	int minimum_match_count;
386 	int maximum_match_count;
387 
388 	minimum_match_count =
389 	    (int)((unsigned int)*count_stringp & SINGLE_BYTE_MASK);
390 	*nmust_matchp = minimum_match_count;
391 
392 	count_stringp++;
393 	maximum_match_count =
394 	    (int)((unsigned int)*count_stringp & SINGLE_BYTE_MASK);
395 	if (maximum_match_count == (int)UNLIMITED) {
396 	    *nextra_matches_allowedp = (int)UNLIMITED;
397 	} else {
398 	    *nextra_matches_allowedp =
399 		maximum_match_count - minimum_match_count;
400 	}
401 	return;
402 
403 } /* get_match_counts() */
404 
405 static boolean_t
406 in_wchar_range(wchar_t test_char,
407 	wchar_t lower_char,
408 	wchar_t upper_char)
409 {
410 	return (((lower_char <= 0x7f) && (upper_char <= 0x7f) &&
411 	    (lower_char <= test_char) && (test_char <= upper_char)) ||
412 	    (((test_char & WCHAR_CSMASK) == (lower_char & WCHAR_CSMASK)) &&
413 	    ((test_char & WCHAR_CSMASK) == (upper_char & WCHAR_CSMASK)) &&
414 	    (lower_char <= test_char) && (test_char <= upper_char)));
415 
416 } /* in_wchar_range() */
417 
418 static const char *
419 pop_stringp(void)
420 {
421 	const char *stringp;
422 
423 	if (stringp_stackp >= &stringp_stack[STRINGP_STACK_SIZE]) {
424 	    return ((char *)0);
425 	} else {
426 	    stringp = *stringp_stackp;
427 	    stringp_stackp++;
428 	    return (stringp);
429 	}
430 }
431 
432 
433 static const char *
434 previous_charp(const char *current_charp)
435 {
436 	/*
437 	 * returns the pointer to the previous character in
438 	 * a string of multibyte characters
439 	 */
440 
441 	const char *prev_cs0 = current_charp - 1;
442 	const char *prev_cs1 = current_charp - eucw1;
443 	const char *prev_cs2 = current_charp - eucw2 - 1;
444 	const char *prev_cs3 = current_charp - eucw3 - 1;
445 	const char *prev_charp;
446 
447 	if ((unsigned char)*prev_cs0 <= 0x7f) {
448 	    prev_charp = prev_cs0;
449 	} else if ((unsigned char)*prev_cs2 == SS2) {
450 	    prev_charp = prev_cs2;
451 	} else if ((unsigned char)*prev_cs3 == SS3) {
452 	    prev_charp = prev_cs3;
453 	} else {
454 	    prev_charp = prev_cs1;
455 	}
456 	return (prev_charp);
457 
458 } /* previous_charp() */
459 
460 static const char *
461 push_stringp(const char *stringp)
462 {
463 	if (stringp_stackp <= &stringp_stack[0]) {
464 	    return ((char *)0);
465 	} else {
466 	    stringp_stackp--;
467 	    *stringp_stackp = stringp;
468 	    return (stringp);
469 	}
470 }
471 
472 
473 static char_test_result_t
474 test_char_against_ascii_class(char test_char,
475 	const char *classp,
476 	char_test_condition_t test_condition)
477 {
478 	/*
479 	 * tests a character for membership in an ASCII character class compiled
480 	 * by the internationalized version of regcmp();
481 	 *
482 	 * NOTE: The internationalized version of regcmp() compiles
483 	 * 	the range a-z in an ASCII character class to aTHRUz.
484 	 */
485 
486 	int	nbytes_to_check;
487 
488 	nbytes_to_check = (int)*classp;
489 	classp++;
490 	nbytes_to_check--;
491 
492 	while (nbytes_to_check > 0) {
493 	    if (test_char == *classp) {
494 		if (test_condition == IN_CLASS)
495 		    return (CONDITION_TRUE);
496 		else
497 		    return (CONDITION_FALSE);
498 	    } else if (*classp == THRU) {
499 		if ((*(classp - 1) <= test_char) &&
500 		    (test_char <= *(classp + 1))) {
501 		    if (test_condition == IN_CLASS)
502 			return (CONDITION_TRUE);
503 		    else
504 			return (CONDITION_FALSE);
505 		} else {
506 		    classp += 2;
507 		    nbytes_to_check -= 2;
508 		}
509 	    } else {
510 		classp++;
511 		nbytes_to_check--;
512 	    }
513 	}
514 	if (test_condition == NOT_IN_CLASS) {
515 	    return (CONDITION_TRUE);
516 	} else {
517 	    return (CONDITION_FALSE);
518 	}
519 } /* test_char_against_ascii_class() */
520 
521 static char_test_result_t
522 test_char_against_multibyte_class(wchar_t test_char,
523 	const char *classp,
524 	char_test_condition_t test_condition)
525 {
526 	/*
527 	 * tests a character for membership in a multibyte character class;
528 	 *
529 	 * NOTE: The range a-z in a multibyte character class compiles to
530 	 * 	aTHRUz.
531 	 */
532 
533 	int		char_size;
534 	wchar_t		current_char;
535 	int		nbytes_to_check;
536 	wchar_t		previous_char;
537 
538 	nbytes_to_check = (int)*classp;
539 	classp++;
540 	nbytes_to_check--;
541 
542 	char_size = get_wchar(&current_char, classp);
543 	if (char_size <= 0) {
544 	    return (CHAR_TEST_ERROR);
545 	} else if (test_char == current_char) {
546 	    if (test_condition == IN_CLASS) {
547 		return (CONDITION_TRUE);
548 	    } else {
549 		return (CONDITION_FALSE);
550 	    }
551 	} else {
552 	    classp += char_size;
553 	    nbytes_to_check -= char_size;
554 	}
555 
556 	while (nbytes_to_check > 0) {
557 	    previous_char = current_char;
558 	    char_size = get_wchar(&current_char, classp);
559 	    if (char_size <= 0) {
560 		return (CHAR_TEST_ERROR);
561 	    } else if (test_char == current_char) {
562 		if (test_condition == IN_CLASS) {
563 		    return (CONDITION_TRUE);
564 		} else {
565 		    return (CONDITION_FALSE);
566 		}
567 	    } else if (current_char == THRU) {
568 		classp += char_size;
569 		nbytes_to_check -= char_size;
570 		char_size = get_wchar(&current_char, classp);
571 		if (char_size <= 0) {
572 		    return (CHAR_TEST_ERROR);
573 		} else if (in_wchar_range(test_char, previous_char,
574 		    current_char)) {
575 		    if (test_condition == IN_CLASS) {
576 			return (CONDITION_TRUE);
577 		    } else {
578 			return (CONDITION_FALSE);
579 		    }
580 		} else {
581 		    classp += char_size;
582 		    nbytes_to_check -= char_size;
583 		}
584 	    } else {
585 		classp += char_size;
586 		nbytes_to_check -= char_size;
587 	    }
588 	}
589 	if (test_condition == NOT_IN_CLASS) {
590 	    return (CONDITION_TRUE);
591 	} else {
592 	    return (CONDITION_FALSE);
593 	}
594 } /* test_char_against_multibyte_class() */
595 
596 
597 /* FOR COMPATIBILITY WITH PREVIOUS ASCII VERSIONS OF regcmp() */
598 
599 static char_test_result_t
600 test_char_against_old_ascii_class(char test_char,
601 	const char *classp,
602 	char_test_condition_t test_condition)
603 {
604 	/*
605 	 * tests a character for membership in an ASCII character class compiled
606 	 * by the ASCII version of regcmp();
607 	 *
608 	 * NOTE: ASCII versions of regcmp() compile the range a-z in an
609 	 *	ASCII character class to THRUaz.  The internationalized
610 	 *	version compiles the same range to aTHRUz.
611 	 */
612 
613 	int	nbytes_to_check;
614 
615 	nbytes_to_check = (int)*classp;
616 	classp++;
617 	nbytes_to_check--;
618 
619 	while (nbytes_to_check > 0) {
620 	    if (test_char == *classp) {
621 		if (test_condition == IN_CLASS) {
622 		    return (CONDITION_TRUE);
623 		} else {
624 		    return (CONDITION_FALSE);
625 		}
626 	    } else if (*classp == THRU) {
627 		if ((*(classp + 1) <= test_char) &&
628 		    (test_char <= *(classp + 2))) {
629 		    if (test_condition == IN_CLASS) {
630 			return (CONDITION_TRUE);
631 		    } else {
632 			return (CONDITION_FALSE);
633 		    }
634 		} else {
635 		    classp += 3;
636 		    nbytes_to_check -= 3;
637 		}
638 	    } else {
639 		classp++;
640 		nbytes_to_check--;
641 	    }
642 	}
643 	if (test_condition == NOT_IN_CLASS) {
644 	    return (CONDITION_TRUE);
645 	} else {
646 	    return (CONDITION_FALSE);
647 	}
648 } /* test_char_against_old_ascii_class() */
649 
650 static const char *
651 test_repeated_ascii_char(const char *repeat_startp,
652 	const char *stringp,
653 	const char *regexp)
654 {
655 	const char *end_of_matchp;
656 
657 	end_of_matchp = test_string(stringp, regexp);
658 	while ((end_of_matchp == (char *)0) &&
659 	    (stringp > repeat_startp)) {
660 	    stringp--;
661 	    end_of_matchp = test_string(stringp, regexp);
662 	}
663 	return (end_of_matchp);
664 }
665 
666 static const char *
667 test_repeated_multibyte_char(const char *repeat_startp,
668 	const char *stringp,
669 	const char *regexp)
670 {
671 	const char *end_of_matchp;
672 
673 	end_of_matchp = test_string(stringp, regexp);
674 	while ((end_of_matchp == (char *)0) &&
675 	    (stringp > repeat_startp)) {
676 	    stringp = previous_charp(stringp);
677 	    end_of_matchp = test_string(stringp, regexp);
678 	}
679 	return (end_of_matchp);
680 }
681 
682 static const char *
683 test_repeated_group(const char *repeat_startp,
684 	const char *stringp,
685 	const char *regexp)
686 {
687 	const char *end_of_matchp;
688 
689 	end_of_matchp = test_string(stringp, regexp);
690 	while ((end_of_matchp == (char *)0) &&
691 	    (stringp > repeat_startp)) {
692 	    stringp = pop_stringp();
693 	    if (stringp == (char *)0) {
694 		return ((char *)0);
695 	    }
696 	    end_of_matchp = test_string(stringp, regexp);
697 	}
698 	return (end_of_matchp);
699 }
700 
701 static const char *
702 test_string(const char *stringp,
703 	const char *regexp)
704 {
705 	/*
706 	 * returns a pointer to the first character following the first
707 	 * substring of the string addressed by stringp that matches
708 	 * the compiled regular expression addressed by regexp
709 	 */
710 
711 	unsigned int		group_length;
712 	int			nextra_matches_allowed;
713 	int			nmust_match;
714 	wchar_t			regex_wchar;
715 	int			regex_char_size;
716 	const char		*repeat_startp;
717 	unsigned int		return_argn;
718 	wchar_t			string_wchar;
719 	int			string_char_size;
720 	unsigned int		substringn;
721 	char_test_condition_t	test_condition;
722 	const char		*test_stringp;
723 
724 	for (;;) {
725 
726 		/*
727 		 * Exit the loop via a return whenever there's a match
728 		 * or it's clear that there can be no match.
729 		 */
730 
731 	    switch ((int)*regexp) {
732 
733 		/*
734 		 * No fall-through.
735 		 * Each case ends with either a return or with stringp
736 		 * addressing the next character to be tested and regexp
737 		 * addressing the next compiled regular expression
738 		 *
739 		 * NOTE: The comments for each case give the meaning
740 		 *	of the compiled regular expression decoded by the case
741 		 *	and the character string that the compiled regular
742 		 *	expression uses to encode the case.  Each single
743 		 *	character encoded in the compiled regular expression
744 		 *	is shown enclosed in angle brackets (<>).  Each
745 		 *	compiled regular expression begins with a marker
746 		 *	character which is shown as a named constant
747 		 *	(e.g. <ASCII_CHAR>). Character constants are shown
748 		 *	enclosed in single quotes (e.g. <'$'>).  All other
749 		 *	single characters encoded in the compiled regular
750 		 *	expression are shown as lower case variable names
751 		 *	(e.g. <ascii_char> or <multibyte_char>). Multicharacter
752 		 *	strings encoded in the compiled regular expression
753 		 *	are shown as variable names followed by elipses
754 		 *	(e.g. <compiled_regex...>).
755 		 */
756 
757 	    case ASCII_CHAR: /* single ASCII char */
758 
759 		/* encoded as <ASCII_CHAR><ascii_char> */
760 
761 		regexp++;
762 		if (*regexp == *stringp) {
763 		    regexp++;
764 		    stringp++;
765 		} else {
766 		    return ((char *)0);
767 		}
768 		break;		/* end case ASCII_CHAR */
769 
770 	    case MULTIBYTE_CHAR: /* single multibyte char */
771 
772 		/* encoded as <MULTIBYTE_CHAR><multibyte_char> */
773 
774 		regexp++;
775 		regex_char_size = get_wchar(&regex_wchar, regexp);
776 		string_char_size = get_wchar(&string_wchar, stringp);
777 		if ((string_char_size <= 0) || (string_wchar != regex_wchar)) {
778 		    return ((char *)0);
779 		} else {
780 		    regexp += regex_char_size;
781 		    stringp += string_char_size;
782 		}
783 		break;		/* end case MULTIBYTE_CHAR */
784 
785 	    case ANY_CHAR: /* any single ASCII or multibyte char */
786 
787 		/* encoded as <ANY_CHAR> */
788 
789 		if (!multibyte) {
790 		    if (*stringp == '\0') {
791 			return ((char *)0);
792 		    } else {
793 			regexp++;
794 			stringp++;
795 		    }
796 		} else {
797 		    string_char_size = get_wchar(&string_wchar, stringp);
798 		    if (string_char_size <= 0) {
799 			return ((char *)0);
800 		    } else {
801 			regexp++;
802 			stringp += string_char_size;
803 		    }
804 		}
805 		break;	/* end case ANY_CHAR */
806 
807 	    case IN_ASCII_CHAR_CLASS:		/* [.....] */
808 	    case NOT_IN_ASCII_CHAR_CLASS:
809 
810 		/*
811 		 * encoded as <IN_ASCII_CHAR_CLASS><class_length><class...>
812 		 *	or <NOT_IN_ASCII_CHAR_CLASS><class_length><class...>
813 		 *
814 		 * NOTE: <class_length> includes the <class_length> byte
815 		 */
816 
817 		if ((int)*regexp == (int)IN_ASCII_CHAR_CLASS) {
818 		    test_condition = IN_CLASS;
819 		} else {
820 		    test_condition = NOT_IN_CLASS;
821 		}
822 		regexp++; /* point to the <class_length> byte */
823 
824 		if ((*stringp != '\0') &&
825 		    (test_char_against_ascii_class(*stringp, regexp,
826 		    test_condition) == CONDITION_TRUE)) {
827 		    regexp += (int)*regexp; /* add the class length to regexp */
828 		    stringp++;
829 		} else {
830 		    return ((char *)0);
831 		}
832 		break; /* end case IN_ASCII_CHAR_CLASS */
833 
834 	    case IN_MULTIBYTE_CHAR_CLASS:	/* [....] */
835 	    case NOT_IN_MULTIBYTE_CHAR_CLASS:
836 
837 		/*
838 		 * encoded as <IN_MULTIBYTE_CHAR_CLASS><class_length><class...>
839 		 * 	or <NOT_IN_MULTIBYTE_CHAR_CLASS><class_length><class...>
840 		 *
841 		 * NOTE: <class_length> includes the <class_length> byte
842 		 */
843 
844 		if ((int)*regexp == (int)IN_MULTIBYTE_CHAR_CLASS) {
845 		    test_condition = IN_CLASS;
846 		} else {
847 		    test_condition = NOT_IN_CLASS;
848 		}
849 		regexp++; /* point to the <class_length> byte */
850 
851 		string_char_size = get_wchar(&string_wchar, stringp);
852 		if ((string_char_size > 0) &&
853 		    (test_char_against_multibyte_class(string_wchar, regexp,
854 		    test_condition) == CONDITION_TRUE)) {
855 		    regexp += (int)*regexp; /* add the class length to regexp */
856 		    stringp += string_char_size;
857 		} else {
858 		    return ((char *)0);
859 		}
860 		break; /* end case IN_MULTIBYTE_CHAR_CLASS */
861 
862 	    case IN_OLD_ASCII_CHAR_CLASS:	/* [...] */
863 	    case NOT_IN_OLD_ASCII_CHAR_CLASS:
864 
865 		/*
866 		 * encoded as <IN_OLD_ASCII_CHAR_CLASS><class_length><class...>
867 		 *	or <NOT_IN_OLD_ASCII_CHAR_CLASS><class_length><class...>
868 		 *
869 		 * NOTE: <class_length> includes the <class_length> byte
870 		 */
871 
872 		if ((int)*regexp == (int)IN_OLD_ASCII_CHAR_CLASS) {
873 		    test_condition = IN_CLASS;
874 		} else {
875 		    test_condition = NOT_IN_CLASS;
876 		}
877 		regexp++; /* point to the <class_length> byte */
878 
879 		if ((*stringp != '\0') &&
880 		    (test_char_against_old_ascii_class(*stringp, regexp,
881 		    test_condition) == CONDITION_TRUE)) {
882 		    regexp += (int)*regexp; /* add the class length to regexp */
883 		    stringp++;
884 		} else {
885 		    return ((char *)0);
886 		}
887 		break; /* end case [NOT_]IN_OLD_ASCII_CHAR_CLASS */
888 
889 	    case SIMPLE_GROUP: /* (.....) */
890 
891 		/* encoded as <SIMPLE_GROUP><group_length> */
892 
893 		regexp += 2;
894 		break;		/* end case SIMPLE_GROUP */
895 
896 	    case END_GROUP:	/* (.....) */
897 
898 		/* encoded as <END_GROUP><groupn> */
899 
900 		regexp += 2;
901 		break;		/* end case END_GROUP */
902 
903 	    case SAVED_GROUP:	/* (.....)$0-9 */
904 
905 		/* encoded as <SAVED_GROUP><substringn> */
906 
907 		regexp++;
908 		substringn = (unsigned int)*regexp;
909 		if (substringn >= NSUBSTRINGS)
910 		    return ((char *)0);
911 		substring_startp[substringn] = stringp;
912 		regexp++;
913 		break;		/* end case SAVED_GROUP */
914 
915 	    case END_SAVED_GROUP:	/* (.....)$0-9 */
916 
917 		/*
918 		 * encoded as <END_SAVED_GROUP><substringn>\
919 		 *	<return_arg_number[substringn]>
920 		 */
921 
922 		regexp++;
923 		substringn = (unsigned int)*regexp;
924 		if (substringn >= NSUBSTRINGS)
925 		    return ((char *)0);
926 		substring_endp[substringn] = stringp;
927 		regexp++;
928 		return_argn = (unsigned int)*regexp;
929 		if (return_argn >= NSUBSTRINGS)
930 		    return ((char *)0);
931 		return_arg_number[substringn] = return_argn;
932 		regexp++;
933 		break;		/* end case END_SAVED_GROUP */
934 
935 	    case ASCII_CHAR|ZERO_OR_MORE:  /* char* */
936 
937 		/* encoded as <ASCII_CHAR|ZERO_OR_MORE><ascii_char> */
938 
939 		regexp++;
940 		repeat_startp = stringp;
941 		while (*stringp == *regexp) {
942 		    stringp++;
943 		}
944 		regexp++;
945 		return (test_repeated_ascii_char(repeat_startp,
946 		    stringp, regexp));
947 
948 		/* end case ASCII_CHAR|ZERO_OR_MORE */
949 
950 	    case ASCII_CHAR|ONE_OR_MORE:   /* char+ */
951 
952 		/* encoded as <ASCII_CHAR|ONE_OR_MORE><ascii_char> */
953 
954 		regexp++;
955 		if (*stringp != *regexp) {
956 		    return ((char *)0);
957 		} else {
958 		    stringp++;
959 		    repeat_startp = stringp;
960 		    while (*stringp == *regexp) {
961 			stringp++;
962 		    }
963 		    regexp++;
964 		    return (test_repeated_ascii_char(repeat_startp, stringp,
965 			regexp));
966 		}
967 		/* end case ASCII_CHAR|ONE_OR_MORE */
968 
969 	    case ASCII_CHAR|COUNT:	/* char{min_count,max_count} */
970 
971 		/*
972 		 * encoded as <ASCII_CHAR|COUNT><ascii_char>\
973 		 *	<minimum_match_count><maximum_match_count>
974 		 */
975 
976 		regexp++;
977 		get_match_counts(&nmust_match, &nextra_matches_allowed,
978 		    regexp + 1);
979 		while ((*stringp == *regexp) && (nmust_match > 0)) {
980 		    nmust_match--;
981 		    stringp++;
982 		}
983 		if (nmust_match > 0) {
984 		    return ((char *)0);
985 		} else if (nextra_matches_allowed == UNLIMITED) {
986 		    repeat_startp = stringp;
987 		    while (*stringp == *regexp) {
988 			stringp++;
989 		    }
990 		    regexp += 3;
991 		    return (test_repeated_ascii_char(repeat_startp, stringp,
992 			regexp));
993 		} else {
994 		    repeat_startp = stringp;
995 		    while ((*stringp == *regexp) &&
996 			(nextra_matches_allowed > 0)) {
997 			nextra_matches_allowed--;
998 			stringp++;
999 		    }
1000 		    regexp += 3;
1001 		    return (test_repeated_ascii_char(repeat_startp, stringp,
1002 			regexp));
1003 		}
1004 		/* end case ASCII_CHAR|COUNT */
1005 
1006 	    case MULTIBYTE_CHAR|ZERO_OR_MORE:   /* char* */
1007 
1008 		/* encoded as <MULTIBYTE_CHAR|ZERO_OR_MORE><multibyte_char> */
1009 
1010 		regexp++;
1011 		regex_char_size = get_wchar(&regex_wchar, regexp);
1012 		repeat_startp = stringp;
1013 		string_char_size = get_wchar(&string_wchar, stringp);
1014 		while ((string_char_size > 0) &&
1015 		    (string_wchar == regex_wchar)) {
1016 		    stringp += string_char_size;
1017 		    string_char_size = get_wchar(&string_wchar, stringp);
1018 		}
1019 		regexp += regex_char_size;
1020 		return (test_repeated_multibyte_char(repeat_startp, stringp,
1021 		    regexp));
1022 
1023 		/* end case MULTIBYTE_CHAR|ZERO_OR_MORE */
1024 
1025 	    case MULTIBYTE_CHAR|ONE_OR_MORE:    /* char+ */
1026 
1027 		/* encoded as <MULTIBYTE_CHAR|ONE_OR_MORE><multibyte_char> */
1028 
1029 		regexp++;
1030 		regex_char_size = get_wchar(&regex_wchar, regexp);
1031 		string_char_size = get_wchar(&string_wchar, stringp);
1032 		if ((string_char_size <= 0) || (string_wchar != regex_wchar)) {
1033 		    return ((char *)0);
1034 		} else {
1035 		    stringp += string_char_size;
1036 		    repeat_startp = stringp;
1037 		    string_char_size = get_wchar(&string_wchar, stringp);
1038 		    while ((string_char_size > 0) &&
1039 			(string_wchar == regex_wchar)) {
1040 			stringp += string_char_size;
1041 			string_char_size = get_wchar(&string_wchar, stringp);
1042 		    }
1043 		    regexp += regex_char_size;
1044 		    return (test_repeated_multibyte_char(repeat_startp, stringp,
1045 			regexp));
1046 		}
1047 		/* end case MULTIBYTE_CHAR|ONE_OR_MORE */
1048 
1049 	    case MULTIBYTE_CHAR|COUNT:		/* char{min_count,max_count} */
1050 
1051 		/*
1052 		 * encoded as <MULTIBYTE_CHAR|COUNT><multibyte_char>\
1053 		 * 	<minimum_match_count><maximum_match_count>
1054 		 */
1055 
1056 		regexp++;
1057 		regex_char_size = get_wchar(&regex_wchar, regexp);
1058 		get_match_counts(&nmust_match, &nextra_matches_allowed,
1059 		    regexp + regex_char_size);
1060 		string_char_size = get_wchar(&string_wchar, stringp);
1061 		while ((string_char_size > 0) &&
1062 		    (string_wchar == regex_wchar) &&
1063 		    (nmust_match > 0)) {
1064 
1065 		    nmust_match--;
1066 		    stringp += string_char_size;
1067 		    string_char_size = get_wchar(&string_wchar, stringp);
1068 		}
1069 		if (nmust_match > 0) {
1070 		    return ((char *)0);
1071 		} else if (nextra_matches_allowed == UNLIMITED) {
1072 		    repeat_startp = stringp;
1073 		    while ((string_char_size > 0) &&
1074 			(string_wchar == regex_wchar)) {
1075 			stringp += string_char_size;
1076 			string_char_size = get_wchar(&string_wchar, stringp);
1077 		    }
1078 		    regexp += regex_char_size + 2;
1079 		    return (test_repeated_multibyte_char(repeat_startp, stringp,
1080 			regexp));
1081 		} else {
1082 		    repeat_startp = stringp;
1083 		    while ((string_char_size > 0) &&
1084 			(string_wchar == regex_wchar) &&
1085 			(nextra_matches_allowed > 0)) {
1086 			nextra_matches_allowed--;
1087 			stringp += string_char_size;
1088 			string_char_size = get_wchar(&string_wchar, stringp);
1089 		    }
1090 		    regexp += regex_char_size + 2;
1091 		    return (test_repeated_multibyte_char(repeat_startp, stringp,
1092 			regexp));
1093 		}
1094 		/* end case MULTIBYTE_CHAR|COUNT */
1095 
1096 	    case ANY_CHAR|ZERO_OR_MORE:		/* .* */
1097 
1098 		/* encoded as <ANY_CHAR|ZERO_OR_MORE> */
1099 
1100 		repeat_startp = stringp;
1101 		if (!multibyte) {
1102 		    while (*stringp != '\0') {
1103 			stringp++;
1104 		    }
1105 		    regexp++;
1106 		    return (test_repeated_ascii_char(repeat_startp, stringp,
1107 			regexp));
1108 		} else {
1109 		    string_char_size = get_wchar(&string_wchar, stringp);
1110 		    while (string_char_size > 0) {
1111 			stringp += string_char_size;
1112 			string_char_size = get_wchar(&string_wchar, stringp);
1113 		    }
1114 		    regexp++;
1115 		    return (test_repeated_multibyte_char(repeat_startp, stringp,
1116 			regexp));
1117 		}
1118 		/* end case <ANY_CHAR|ZERO_OR_MORE> */
1119 
1120 	    case ANY_CHAR|ONE_OR_MORE:		/* .+ */
1121 
1122 		/* encoded as <ANY_CHAR|ONE_OR_MORE> */
1123 
1124 		if (!multibyte) {
1125 		    if (*stringp == '\0') {
1126 			return ((char *)0);
1127 		    } else {
1128 			stringp++;
1129 			repeat_startp = stringp;
1130 			while (*stringp != '\0') {
1131 			    stringp++;
1132 			}
1133 			regexp++;
1134 			return (test_repeated_ascii_char(repeat_startp, stringp,
1135 			    regexp));
1136 		    }
1137 		} else {
1138 		    string_char_size = get_wchar(&string_wchar, stringp);
1139 		    if (string_char_size <= 0) {
1140 			return ((char *)0);
1141 		    } else {
1142 			stringp += string_char_size;
1143 			repeat_startp = stringp;
1144 			string_char_size = get_wchar(&string_wchar, stringp);
1145 			while (string_char_size > 0) {
1146 			    stringp += string_char_size;
1147 			    string_char_size =
1148 				get_wchar(&string_wchar, stringp);
1149 			}
1150 			regexp++;
1151 			return (test_repeated_multibyte_char(repeat_startp,
1152 			    stringp, regexp));
1153 		    }
1154 		}
1155 		/* end case <ANY_CHAR|ONE_OR_MORE> */
1156 
1157 	    case ANY_CHAR|COUNT:	/* .{min_count,max_count} */
1158 
1159 		/*
1160 		 * encoded as	<ANY_CHAR|COUNT>\
1161 		 *		<minimum_match_count><maximum_match_count>
1162 		 */
1163 
1164 		get_match_counts(&nmust_match, &nextra_matches_allowed,
1165 		    regexp + 1);
1166 		if (!multibyte) {
1167 		    while ((*stringp != '\0') && (nmust_match > 0)) {
1168 			nmust_match--;
1169 			stringp++;
1170 		    }
1171 		    if (nmust_match > 0) {
1172 			return ((char *)0);
1173 		    } else if (nextra_matches_allowed == UNLIMITED) {
1174 			repeat_startp = stringp;
1175 			while (*stringp != '\0') {
1176 			    stringp++;
1177 			}
1178 			regexp += 3;
1179 			return (test_repeated_ascii_char(repeat_startp, stringp,
1180 			    regexp));
1181 		    } else {
1182 			repeat_startp = stringp;
1183 			while ((*stringp != '\0') &&
1184 			    (nextra_matches_allowed > 0)) {
1185 			    nextra_matches_allowed--;
1186 			    stringp++;
1187 			}
1188 			regexp += 3;
1189 			return (test_repeated_ascii_char(repeat_startp, stringp,
1190 			    regexp));
1191 		    }
1192 		} else { /* multibyte character */
1193 
1194 		    string_char_size = get_wchar(&string_wchar, stringp);
1195 		    while ((string_char_size > 0) && (nmust_match > 0)) {
1196 			nmust_match--;
1197 			stringp += string_char_size;
1198 			string_char_size = get_wchar(&string_wchar, stringp);
1199 		    }
1200 		    if (nmust_match > 0) {
1201 			return ((char *)0);
1202 		    } else if (nextra_matches_allowed == UNLIMITED) {
1203 			repeat_startp = stringp;
1204 			while (string_char_size > 0) {
1205 			    stringp += string_char_size;
1206 			    string_char_size =
1207 				get_wchar(&string_wchar, stringp);
1208 			}
1209 			regexp += 3;
1210 			return (test_repeated_multibyte_char(repeat_startp,
1211 			    stringp, regexp));
1212 		    } else {
1213 			repeat_startp = stringp;
1214 			while ((string_char_size > 0) &&
1215 			    (nextra_matches_allowed > 0)) {
1216 			    nextra_matches_allowed--;
1217 			    stringp += string_char_size;
1218 			    string_char_size =
1219 				get_wchar(&string_wchar, stringp);
1220 			}
1221 			regexp += 3;
1222 			return (test_repeated_multibyte_char(repeat_startp,
1223 			    stringp, regexp));
1224 		    }
1225 		} /* end case ANY_CHAR|COUNT */
1226 
1227 	    case IN_ASCII_CHAR_CLASS|ZERO_OR_MORE:	/* [.....]* */
1228 	    case NOT_IN_ASCII_CHAR_CLASS|ZERO_OR_MORE:
1229 
1230 		/*
1231 		 * encoded as	<IN_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
1232 		 *		<class_length><class ...>
1233 		 *	or	<NOT_IN_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
1234 		 *		<class_length><class ...>
1235 		 *
1236 		 * NOTE: <class_length> includes the <class_length> byte
1237 		 */
1238 
1239 		if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|ZERO_OR_MORE)) {
1240 		    test_condition = IN_CLASS;
1241 		} else {
1242 		    test_condition = NOT_IN_CLASS;
1243 		}
1244 		regexp++; /* point to the <class_length> byte */
1245 
1246 		repeat_startp = stringp;
1247 		while ((*stringp != '\0') &&
1248 		    (test_char_against_ascii_class(*stringp, regexp,
1249 		    test_condition) == CONDITION_TRUE)) {
1250 		    stringp++;
1251 		}
1252 		regexp += (int)*regexp; /* add the class length to regexp */
1253 		return (test_repeated_ascii_char(repeat_startp, stringp,
1254 		    regexp));
1255 
1256 		/* end case IN_ASCII_CHAR_CLASS|ZERO_OR_MORE */
1257 
1258 	    case IN_ASCII_CHAR_CLASS|ONE_OR_MORE:	/* [.....]+ */
1259 	    case NOT_IN_ASCII_CHAR_CLASS|ONE_OR_MORE:
1260 
1261 		/*
1262 		 * encoded as	<IN_ASCII_CHAR_CLASS|ONE_OR_MORE>\
1263 		 *		<class_length><class ...>
1264 		 *	or	<NOT_IN_ASCII_CHAR_CLASS|ONE_OR_MORE>\
1265 		 * 		<class_length><class ...>
1266 		 *
1267 		 * NOTE: <class_length> includes the <class_length> byte
1268 		 */
1269 
1270 		if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|ONE_OR_MORE)) {
1271 		    test_condition = IN_CLASS;
1272 		} else {
1273 		    test_condition = NOT_IN_CLASS;
1274 		}
1275 		regexp++; /* point to the <class_length> byte */
1276 
1277 		if ((*stringp == '\0') ||
1278 		    (test_char_against_ascii_class(*stringp, regexp,
1279 		    test_condition) != CONDITION_TRUE)) {
1280 		    return ((char *)0);
1281 		} else {
1282 		    stringp++;
1283 		    repeat_startp = stringp;
1284 		    while ((*stringp != '\0') &&
1285 			(test_char_against_ascii_class(*stringp, regexp,
1286 			test_condition) == CONDITION_TRUE)) {
1287 			stringp++;
1288 		    }
1289 		    regexp += (int)*regexp; /* add the class length to regexp */
1290 		    return (test_repeated_ascii_char(repeat_startp, stringp,
1291 			regexp));
1292 		}
1293 		/* end case IN_ASCII_CHAR_CLASS|ONE_OR_MORE */
1294 
1295 	    case IN_ASCII_CHAR_CLASS | COUNT: /* [.....]{max_count,min_count} */
1296 	    case NOT_IN_ASCII_CHAR_CLASS | COUNT:
1297 
1298 		/*
1299 		 * endoded as	<IN_ASCII_CHAR_CLASS|COUNT><class_length>\
1300 		 * 		<class ...><minimum_match_count>\
1301 		 *		<maximum_match_count>
1302 		 *	or	<NOT_IN_ASCII_CHAR_CLASS|COUNT><class_length>\
1303 		 *		<class ...><minimum_match_count>\
1304 		 *		<maximum_match_count>
1305 		 *
1306 		 * NOTE: <class_length> includes the <class_length> byte,
1307 		 *	but not the <minimum_match_count> or
1308 		 *	<maximum_match_count> bytes
1309 		 */
1310 
1311 		if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|COUNT)) {
1312 		    test_condition = IN_CLASS;
1313 		} else {
1314 		    test_condition = NOT_IN_CLASS;
1315 		}
1316 		regexp++; /* point to the <class_length> byte */
1317 
1318 		get_match_counts(&nmust_match, &nextra_matches_allowed,
1319 		    regexp + (int)*regexp);
1320 		while ((*stringp != '\0') &&
1321 		    (test_char_against_ascii_class(*stringp, regexp,
1322 		    test_condition) == CONDITION_TRUE) &&
1323 		    (nmust_match > 0)) {
1324 		    nmust_match--;
1325 		    stringp++;
1326 		}
1327 		if (nmust_match > 0) {
1328 		    return ((char *)0);
1329 		} else if (nextra_matches_allowed == UNLIMITED) {
1330 		    repeat_startp = stringp;
1331 		    while ((*stringp != '\0') &&
1332 			(test_char_against_ascii_class(*stringp, regexp,
1333 			test_condition) == CONDITION_TRUE)) {
1334 			stringp++;
1335 		    }
1336 		    regexp += (int)*regexp + 2;
1337 		    return (test_repeated_ascii_char(repeat_startp, stringp,
1338 			regexp));
1339 		} else {
1340 		    repeat_startp = stringp;
1341 		    while ((*stringp != '\0') &&
1342 			(test_char_against_ascii_class(*stringp, regexp,
1343 			test_condition) == CONDITION_TRUE) &&
1344 			(nextra_matches_allowed > 0)) {
1345 			nextra_matches_allowed--;
1346 			stringp++;
1347 		    }
1348 		    regexp += (int)*regexp + 2;
1349 		    return (test_repeated_ascii_char(repeat_startp, stringp,
1350 			regexp));
1351 		}
1352 		/* end case IN_ASCII_CHAR_CLASS|COUNT */
1353 
1354 	    case IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE:	/* [.....]* */
1355 	    case NOT_IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE:
1356 
1357 		/*
1358 		 * encoded as	<IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE>\
1359 		 *		<class_length><class ...>
1360 		 *	or	<NOT_IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE>\
1361 		 *		<class_length><class ...>
1362 		 *
1363 		 * NOTE: <class_length> includes the <class_length> byte
1364 		 */
1365 
1366 		if ((int)*regexp ==
1367 		    (int)(IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE)) {
1368 		    test_condition = IN_CLASS;
1369 		} else {
1370 		    test_condition = NOT_IN_CLASS;
1371 		}
1372 		regexp++; /* point to the <class_length> byte */
1373 
1374 		repeat_startp = stringp;
1375 		string_char_size = get_wchar(&string_wchar, stringp);
1376 		while ((string_char_size > 0) &&
1377 		    (test_char_against_multibyte_class(string_wchar, regexp,
1378 		    test_condition) == CONDITION_TRUE)) {
1379 		    stringp += string_char_size;
1380 		    string_char_size = get_wchar(&string_wchar, stringp);
1381 		}
1382 		regexp += (int)*regexp; /* add the class length to regexp */
1383 		return (test_repeated_multibyte_char(repeat_startp, stringp,
1384 		    regexp));
1385 
1386 		/* end case IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE */
1387 
1388 	    case IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE:	/* [.....]+ */
1389 	    case NOT_IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE:
1390 
1391 		/*
1392 		 * encoded as	<IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE>\
1393 		 *		<class_length><class ...>
1394 		 *	or	<NOT_IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE>\
1395 		 *		<class_length><class ...>
1396 		 *
1397 		 * NOTE: <class_length> includes the <class_length> byte
1398 		 */
1399 
1400 		if ((int)*regexp ==
1401 		    (int)(IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE)) {
1402 		    test_condition = IN_CLASS;
1403 		} else {
1404 		    test_condition = NOT_IN_CLASS;
1405 		}
1406 		regexp++; /* point to the <class_length> byte */
1407 
1408 		string_char_size = get_wchar(&string_wchar, stringp);
1409 		if ((string_char_size <= 0) ||
1410 		    (test_char_against_multibyte_class(string_wchar, regexp,
1411 		    test_condition) != CONDITION_TRUE)) {
1412 		    return ((char *)0);
1413 		} else {
1414 		    stringp += string_char_size;
1415 		    repeat_startp = stringp;
1416 		    string_char_size = get_wchar(&string_wchar, stringp);
1417 		    while ((string_char_size > 0) &&
1418 			(test_char_against_multibyte_class(string_wchar,
1419 			regexp, test_condition) == CONDITION_TRUE)) {
1420 			stringp += string_char_size;
1421 			string_char_size = get_wchar(&string_wchar, stringp);
1422 		    }
1423 		    regexp += (int)*regexp; /* add the class length to regexp */
1424 		    return (test_repeated_multibyte_char(repeat_startp, stringp,
1425 			regexp));
1426 		}
1427 		/* end case IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE */
1428 
1429 	    case IN_MULTIBYTE_CHAR_CLASS|COUNT: /* [...]{min_count,max_count} */
1430 	    case NOT_IN_MULTIBYTE_CHAR_CLASS|COUNT:
1431 
1432 		/*
1433 		 * encoded as	<IN_MULTIBYTE_CHAR_CLASS|COUNT>\
1434 		 *		<class_length><class ...><min_count><max_count>
1435 		 *	or	<NOT_IN_MULTIBYTE_CHAR_CLASS|COUNT>\
1436 		 *		<class_length><class ...><min_count><max_count>
1437 		 *
1438 		 * NOTE: <class_length> includes the <class_length> byte
1439 		 *	but not the <minimum_match_count> or
1440 		 *	<maximum_match_count> bytes
1441 		 */
1442 
1443 		if ((int)*regexp == (int)(IN_MULTIBYTE_CHAR_CLASS|COUNT)) {
1444 		    test_condition = IN_CLASS;
1445 		} else {
1446 		    test_condition = NOT_IN_CLASS;
1447 		}
1448 		regexp++; /* point to the <class_length> byte */
1449 
1450 		get_match_counts(&nmust_match, &nextra_matches_allowed,
1451 		    regexp + (int)*regexp);
1452 		string_char_size = get_wchar(&string_wchar, stringp);
1453 		while ((string_char_size > 0) &&
1454 		    (test_char_against_multibyte_class(string_wchar, regexp,
1455 		    test_condition) == CONDITION_TRUE) &&
1456 		    (nmust_match > 0)) {
1457 		    nmust_match--;
1458 		    stringp += string_char_size;
1459 		    string_char_size = get_wchar(&string_wchar, stringp);
1460 		}
1461 		if (nmust_match > 0) {
1462 		    return ((char *)0);
1463 		} else if (nextra_matches_allowed == UNLIMITED) {
1464 		    repeat_startp = stringp;
1465 		    while ((string_char_size > 0) &&
1466 			(test_char_against_multibyte_class(string_wchar,
1467 			regexp, test_condition) == CONDITION_TRUE)) {
1468 			stringp += string_char_size;
1469 			string_char_size = get_wchar(&string_wchar, stringp);
1470 		    }
1471 		    regexp += (int)*regexp + 2;
1472 		    return (test_repeated_multibyte_char(repeat_startp, stringp,
1473 			regexp));
1474 		} else {
1475 		    repeat_startp = stringp;
1476 		    while ((string_char_size > 0) &&
1477 			(test_char_against_multibyte_class(string_wchar,
1478 			regexp, test_condition) == CONDITION_TRUE) &&
1479 			(nextra_matches_allowed > 0)) {
1480 			nextra_matches_allowed--;
1481 			stringp += string_char_size;
1482 			string_char_size = get_wchar(&string_wchar, stringp);
1483 		    }
1484 		    regexp += (int)*regexp + 2;
1485 		    return (test_repeated_multibyte_char(repeat_startp, stringp,
1486 			regexp));
1487 		}
1488 		/* end case IN_MULTIBYTE_CHAR_CLASS|COUNT */
1489 
1490 	    case IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE:	/* [.....]* */
1491 	    case NOT_IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE:
1492 
1493 		/*
1494 		 * encoded as	<IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
1495 		 *		<class_length><class ...>
1496 		 *	or	<NOT_IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
1497 		 *		<class_length><class ...>
1498 		 *
1499 		 * NOTE: <class_length> includes the <class_length> byte
1500 		 */
1501 
1502 		if ((int)*regexp ==
1503 		    (int)(IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE)) {
1504 		    test_condition = IN_CLASS;
1505 		} else {
1506 		    test_condition = NOT_IN_CLASS;
1507 		}
1508 		regexp++; /* point to the <class_length> byte */
1509 
1510 		repeat_startp = stringp;
1511 		while ((*stringp != '\0') &&
1512 		    (test_char_against_old_ascii_class(*stringp, regexp,
1513 		    test_condition) == CONDITION_TRUE)) {
1514 		    stringp++;
1515 		}
1516 		regexp += (int)*regexp; /* add the class length to regexp */
1517 		return (test_repeated_ascii_char(repeat_startp, stringp,
1518 		    regexp));
1519 
1520 		/* end case IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE */
1521 
1522 	    case IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE:	/* [.....]+ */
1523 	    case NOT_IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE:
1524 
1525 		/*
1526 		 * encoded as	<IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE>\
1527 		 *		<class_length><class ...>
1528 		 *	or	<NOT_IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE>\
1529 		 *		<class_length><class ...>
1530 		 *
1531 		 * NOTE: <class length> includes the <class_length> byte
1532 		 */
1533 
1534 		if ((int)*regexp ==
1535 		    (int)(IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE)) {
1536 		    test_condition = IN_CLASS;
1537 		} else {
1538 		    test_condition = NOT_IN_CLASS;
1539 		}
1540 		regexp++; /* point to the <class_length> byte */
1541 
1542 		if ((*stringp == '\0') ||
1543 		    (test_char_against_old_ascii_class(*stringp, regexp,
1544 		    test_condition) != CONDITION_TRUE)) {
1545 		    return ((char *)0);
1546 		} else {
1547 		    stringp++;
1548 		    repeat_startp = stringp;
1549 		    while ((*stringp != '\0') &&
1550 			(test_char_against_old_ascii_class(*stringp, regexp,
1551 			test_condition) == CONDITION_TRUE)) {
1552 			stringp++;
1553 		    }
1554 		    regexp += (int)*regexp; /* add the class length to regexp */
1555 		    return (test_repeated_ascii_char(repeat_startp, stringp,
1556 			regexp));
1557 		}
1558 		/* end case IN_OLD_ASCII_CHAR_CLASS | ONE_OR_MORE */
1559 
1560 	    case IN_OLD_ASCII_CHAR_CLASS|COUNT:	/* [...]{min_count,max_count} */
1561 	    case NOT_IN_OLD_ASCII_CHAR_CLASS|COUNT:
1562 
1563 		/*
1564 		 * encoded as	<IN_OLD_ASCII_CHAR_CLASS|COUNT><class_length>\
1565 		 *		<class ...><minimum_match_count>\
1566 		 *		<maximum_match_count>
1567 		 *	or	<NOT_IN_OLD_ASCII_CHAR_CLASS|COUNT>\
1568 		 *		<class_length><class ...><minimum_match_count>\
1569 		 *		<maximum_match_count>
1570 		 *
1571 		 * NOTE: <class_length> includes the <class_length> byte
1572 		 *	but not the <minimum_match_count> or
1573 		 *	<maximum_match_count> bytes
1574 		 */
1575 
1576 		if ((int)*regexp == (int)(IN_OLD_ASCII_CHAR_CLASS|COUNT)) {
1577 		    test_condition = IN_CLASS;
1578 		} else {
1579 		    test_condition = NOT_IN_CLASS;
1580 		}
1581 		regexp++; /* point to the <class_length> byte */
1582 
1583 		get_match_counts(&nmust_match, &nextra_matches_allowed,
1584 		    regexp + (int)*regexp);
1585 		while ((*stringp != '\0') &&
1586 		    (test_char_against_old_ascii_class(*stringp, regexp,
1587 		    test_condition) == CONDITION_TRUE) &&
1588 		    (nmust_match > 0)) {
1589 		    nmust_match--;
1590 		    stringp++;
1591 		}
1592 		if (nmust_match > 0) {
1593 		    return ((char *)0);
1594 		} else if (nextra_matches_allowed == UNLIMITED) {
1595 		    repeat_startp = stringp;
1596 		    while ((*stringp != '\0') &&
1597 			(test_char_against_old_ascii_class(*stringp, regexp,
1598 			test_condition) == CONDITION_TRUE)) {
1599 			stringp++;
1600 		    }
1601 		    regexp += (int)*regexp + 2;
1602 		    return (test_repeated_ascii_char(repeat_startp, stringp,
1603 			regexp));
1604 		} else {
1605 		    repeat_startp = stringp;
1606 		    while ((*stringp != '\0') &&
1607 			(test_char_against_old_ascii_class(*stringp, regexp,
1608 			test_condition) == CONDITION_TRUE) &&
1609 			(nextra_matches_allowed > 0)) {
1610 			nextra_matches_allowed--;
1611 			stringp++;
1612 		    }
1613 		    regexp += (int)*regexp + 2;
1614 		    return (test_repeated_ascii_char(repeat_startp, stringp,
1615 			regexp));
1616 		}
1617 		/* end case IN_OLD_ASCII_CHAR_CLASS|COUNT */
1618 
1619 	    case ZERO_OR_MORE_GROUP:		/* (.....)* */
1620 	    case ZERO_OR_MORE_GROUP|ADD_256_TO_GROUP_LENGTH:
1621 	    case ZERO_OR_MORE_GROUP|ADD_512_TO_GROUP_LENGTH:
1622 	    case ZERO_OR_MORE_GROUP|ADD_768_TO_GROUP_LENGTH:
1623 
1624 		/*
1625 		 * encoded as	<ZERO_OR_MORE_GROUP|ADDED_LENGTH_BITS>\
1626 		 *		<group_length><compiled_regex...>\
1627 		 *		<END_GROUP|ZERO_OR_MORE><groupn>
1628 		 *
1629 		 * NOTE:
1630 		 *
1631 		 * group_length + (256 * ADDED_LENGTH_BITS) ==
1632 		 *	length_of(<compiled_regex...><END_GROUP|ZERO_OR_MORE>\
1633 		 *		<groupn>)
1634 		 *
1635 		 */
1636 
1637 		group_length =
1638 		    (((unsigned int)*regexp & ADDED_LENGTH_BITS) <<
1639 		    TIMES_256_SHIFT);
1640 		regexp++;
1641 		group_length += (unsigned int)*regexp;
1642 		regexp++;
1643 		repeat_startp = stringp;
1644 		test_stringp = test_string(stringp, regexp);
1645 		while (test_stringp != (char *)0) {
1646 		    if (push_stringp(stringp) == (char *)0)
1647 			return ((char *)0);
1648 		    stringp = test_stringp;
1649 		    test_stringp = test_string(stringp, regexp);
1650 		}
1651 		regexp += group_length;
1652 		return (test_repeated_group(repeat_startp, stringp, regexp));
1653 
1654 		/* end case ZERO_OR_MORE_GROUP */
1655 
1656 	    case END_GROUP|ZERO_OR_MORE:	/* (.....)* */
1657 
1658 		/* encoded as <END_GROUP|ZERO_OR_MORE> */
1659 
1660 		/* return from recursive call to test_string() */
1661 
1662 		return ((char *)stringp);
1663 
1664 		/* end case END_GROUP|ZERO_OR_MORE */
1665 
1666 	    case ONE_OR_MORE_GROUP:		/* (.....)+ */
1667 	    case ONE_OR_MORE_GROUP|ADD_256_TO_GROUP_LENGTH:
1668 	    case ONE_OR_MORE_GROUP|ADD_512_TO_GROUP_LENGTH:
1669 	    case ONE_OR_MORE_GROUP|ADD_768_TO_GROUP_LENGTH:
1670 
1671 		/*
1672 		 * encoded as	<ONE_OR_MORE_GROUP|ADDED_LENGTH_BITS>\
1673 		 *		<group_length><compiled_regex...>\
1674 		 *		<END_GROUP|ONE_OR_MORE><groupn>
1675 		 *
1676 		 * NOTE:
1677 		 *
1678 		 * group_length + (256 * ADDED_LENGTH_BITS) ==
1679 		 * 	length_of(<compiled_regex...><END_GROUP|ONE_OR_MORE>\
1680 		 *		<groupn>)
1681 		 */
1682 
1683 		group_length =
1684 		    (((unsigned int)*regexp & ADDED_LENGTH_BITS) <<
1685 		    TIMES_256_SHIFT);
1686 		regexp++;
1687 		group_length += (unsigned int)*regexp;
1688 		regexp++;
1689 		stringp = test_string(stringp, regexp);
1690 		if (stringp == (char *)0)
1691 		    return ((char *)0);
1692 		repeat_startp = stringp;
1693 		test_stringp = test_string(stringp, regexp);
1694 		while (test_stringp != (char *)0) {
1695 		    if (push_stringp(stringp) == (char *)0)
1696 			return ((char *)0);
1697 		    stringp = test_stringp;
1698 		    test_stringp = test_string(stringp, regexp);
1699 		}
1700 		regexp += group_length;
1701 		return (test_repeated_group(repeat_startp, stringp, regexp));
1702 
1703 		/* end case ONE_OR_MORE_GROUP */
1704 
1705 	    case END_GROUP|ONE_OR_MORE:		/* (.....)+ */
1706 
1707 		/* encoded as <END_GROUP|ONE_OR_MORE><groupn> */
1708 
1709 		/* return from recursive call to test_string() */
1710 
1711 		return ((char *)stringp);
1712 
1713 		/* end case END_GROUP|ONE_OR_MORE */
1714 
1715 	    case COUNTED_GROUP:		/* (.....){max_count,min_count} */
1716 	    case COUNTED_GROUP|ADD_256_TO_GROUP_LENGTH:
1717 	    case COUNTED_GROUP|ADD_512_TO_GROUP_LENGTH:
1718 	    case COUNTED_GROUP|ADD_768_TO_GROUP_LENGTH:
1719 
1720 		/*
1721 		 * encoded as	<COUNTED_GROUP|ADDED_LENGTH_BITS><group_length>\
1722 		 *		<compiled_regex...>\<END_GROUP|COUNT><groupn>\
1723 		 *		<minimum_match_count><maximum_match_count>
1724 		 *
1725 		 * NOTE:
1726 		 *
1727 		 * group_length + (256 * ADDED_LENGTH_BITS) ==
1728 		 *	length_of(<compiled_regex...><END_GROUP|COUNT><groupn>)
1729 		 *
1730 		 * but does not include the <minimum_match_count> or
1731 		 *	<maximum_match_count> bytes
1732 		 */
1733 
1734 		group_length =
1735 		    (((unsigned int)*regexp & ADDED_LENGTH_BITS) <<
1736 		    TIMES_256_SHIFT);
1737 		regexp++;
1738 		group_length += (unsigned int)*regexp;
1739 		regexp++;
1740 		get_match_counts(&nmust_match, &nextra_matches_allowed,
1741 		    regexp + group_length);
1742 		test_stringp = test_string(stringp, regexp);
1743 		while ((test_stringp != (char *)0) && (nmust_match > 0)) {
1744 		    stringp = test_stringp;
1745 		    nmust_match--;
1746 		    test_stringp = test_string(stringp, regexp);
1747 		}
1748 		if (nmust_match > 0) {
1749 		    return ((char *)0);
1750 		} else if (nextra_matches_allowed == UNLIMITED) {
1751 		    repeat_startp = stringp;
1752 		    while (test_stringp != (char *)0) {
1753 			if (push_stringp(stringp) == (char *)0)
1754 			    return ((char *)0);
1755 			stringp = test_stringp;
1756 			test_stringp = test_string(stringp, regexp);
1757 		}
1758 		    regexp += group_length + 2;
1759 		    return (test_repeated_group(repeat_startp, stringp,
1760 			regexp));
1761 		} else {
1762 		    repeat_startp = stringp;
1763 		    while ((test_stringp != (char *)0) &&
1764 			(nextra_matches_allowed > 0)) {
1765 			nextra_matches_allowed--;
1766 			if (push_stringp(stringp) == (char *)0)
1767 			    return ((char *)0);
1768 			stringp = test_stringp;
1769 			test_stringp = test_string(stringp, regexp);
1770 		}
1771 		    regexp += group_length + 2;
1772 		    return (test_repeated_group(repeat_startp, stringp,
1773 			regexp));
1774 		}
1775 		/* end case COUNTED_GROUP */
1776 
1777 	    case END_GROUP|COUNT:	/* (.....){max_count,min_count} */
1778 
1779 		/* encoded as <END_GROUP|COUNT> */
1780 
1781 		/* return from recursive call to test_string() */
1782 
1783 		return (stringp);
1784 
1785 		/* end case END_GROUP|COUNT */
1786 
1787 	    case END_OF_STRING_MARK:
1788 
1789 		/* encoded as <END_OF_STRING_MARK><END_REGEX> */
1790 
1791 		if (*stringp == '\0') {
1792 		    regexp++;
1793 		} else {
1794 		    return ((char *)0);
1795 		}
1796 		break; /* end case END_OF_STRING_MARK */
1797 
1798 	    case END_REGEX: /* end of the compiled regular expression */
1799 
1800 		/* encoded as <END_REGEX> */
1801 
1802 		return (stringp);
1803 
1804 		/* end case END_REGEX */
1805 
1806 	    default:
1807 
1808 		return ((char *)0);
1809 
1810 	    } /* end switch (*regexp) */
1811 
1812 	} /* end for (;;) */
1813 
1814 } /* test_string() */
1815