xref: /illumos-gate/usr/src/lib/libc/port/regex/regex.c (revision 13b136d3061155363c62c9f6568d25b8b27da8f6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved	*/
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 /*
33  * IMPORTANT NOTE:
34  *
35  * regex() WORKS **ONLY** WITH THE ASCII AND THE Solaris EUC CHARACTER SETS.
36  * IT IS **NOT** CHARACTER SET INDEPENDENT.
37  *
38  */
39 
40 #pragma weak _regex = regex
41 
42 #include "lint.h"
43 /* CONSTANTS SHARED WITH regcmp() */
44 #include "regex.h"
45 #include "mtlib.h"
46 #include <limits.h>
47 #include <stdarg.h>
48 #include <stdlib.h>
49 #include <thread.h>
50 #include <widec.h>
51 #include "tsd.h"
52 
53 
54 /* PRIVATE CONSTANTS */
55 
56 #define	ADD_256_TO_GROUP_LENGTH	0x1
57 #define	ADD_512_TO_GROUP_LENGTH	0x2
58 #define	ADD_768_TO_GROUP_LENGTH	0x3
59 #define	ADDED_LENGTH_BITS	0x3
60 #define	SINGLE_BYTE_MASK	0xff
61 #define	STRINGP_STACK_SIZE	50
62 
63 
64 /* PRIVATE TYPE DEFINITIONS */
65 
66 typedef enum {
67 	NOT_IN_CLASS = 0,
68 	IN_CLASS
69 } char_test_condition_t;
70 
71 typedef enum {
72 	TESTING_CHAR = 0,
73 	CONDITION_TRUE,
74 	CONDITION_FALSE,
75 	CHAR_TEST_ERROR
76 } char_test_result_t;
77 
78 
79 /* PRIVATE GLOBAL VARIABLES */
80 
81 static mutex_t		regex_lock = DEFAULTMUTEX;
82 static int		return_arg_number[NSUBSTRINGS];
83 static const char	*substring_endp[NSUBSTRINGS];
84 static const char	*substring_startp[NSUBSTRINGS];
85 static const char	*stringp_stack[STRINGP_STACK_SIZE];
86 static const char	**stringp_stackp;
87 
88 
89 /* DECLARATIONS OF PRIVATE FUNCTIONS */
90 
91 static int
92 get_wchar(wchar_t *wcharp,
93 	const char *stringp);
94 
95 static void
96 get_match_counts(int *nmust_matchp,
97 	int *nextra_matches_allowedp,
98 	const char *count_stringp);
99 
100 static boolean_t
101 in_wchar_range(wchar_t test_char,
102 	wchar_t lower_char,
103 	wchar_t upper_char);
104 
105 static const char *
106 pop_stringp(void);
107 
108 static const char *
109 previous_charp(const char *current_charp);
110 
111 static const char *
112 push_stringp(const char *stringp);
113 
114 static char_test_result_t
115 test_char_against_ascii_class(char test_char,
116 	const char *classp,
117 	char_test_condition_t test_condition);
118 
119 static char_test_result_t
120 test_char_against_multibyte_class(wchar_t test_char,
121 	const char *classp,
122 	char_test_condition_t test_condition);
123 
124 
125 /* FOR COMPATIBILITY WITH PREVIOUS ASCII VERSIONS OF regcmp() */
126 
127 static char_test_result_t
128 test_char_against_old_ascii_class(char test_char,
129 	const char *classp,
130 	char_test_condition_t test_condition);
131 
132 static const char *
133 test_repeated_ascii_char(const char *repeat_startp,
134 	const char *stringp,
135 	const char *regexp);
136 
137 static const char *
138 test_repeated_multibyte_char(const char *repeat_startp,
139 	const char *stringp,
140 	const char *regexp);
141 
142 static const char *
143 test_repeated_group(const char *repeat_startp,
144 	const char *stringp,
145 	const char *regexp);
146 
147 static const char *
148 test_string(const char *stringp,
149 	const char *regexp);
150 
151 
152 /* DEFINITIONS OF PUBLIC VARIABLES */
153 
154 char *__loc1;
155 
156 /*
157  * reserve thread-specific storage for __loc1
158  */
159 char **
160 ____loc1(void)
161 {
162 	if (thr_main())
163 		return (&__loc1);
164 	return ((char **)tsdalloc(_T_REGEX_LOC1, sizeof (char *), NULL));
165 }
166 
167 #define	__loc1 (*(____loc1()))
168 
169 /* DEFINITION OF regex() */
170 
171 extern char *
172 regex(const char *regexp, const char *stringp, ...)
173 {
174 	va_list		arg_listp;
175 	int		char_size;
176 	const char	*end_of_matchp;
177 	wchar_t		regex_wchar;
178 	char		*return_argp[NSUBSTRINGS];
179 	char		*returned_substringp;
180 	int		substringn;
181 	const char	*substringp;
182 	wchar_t		string_wchar;
183 
184 	if (____loc1() == (char **)0) {
185 	    return ((char *)0);
186 	} else {
187 	    lmutex_lock(&regex_lock);
188 	    __loc1 = (char *)0;
189 	}
190 
191 	if ((stringp == (char *)0) || (regexp == (char *)0)) {
192 	    lmutex_unlock(&regex_lock);
193 	return ((char *)0);
194 	}
195 
196 
197 	/* INITIALIZE SUBSTRINGS THAT MIGHT BE RETURNED IN VARARGS  */
198 
199 	substringn = 0;
200 	va_start(arg_listp, stringp);
201 	while (substringn < NSUBSTRINGS) {
202 	    return_argp[substringn] = va_arg(arg_listp, char *);
203 	    substring_startp[substringn] = (char *)0;
204 	    return_arg_number[substringn] = -1;
205 	    substringn++;
206 	}
207 	va_end(arg_listp);
208 
209 
210 	/* TEST THE STRING AGAINST THE REGULAR EXPRESSION */
211 
212 	end_of_matchp = (char *)0;
213 	stringp_stackp = &stringp_stack[STRINGP_STACK_SIZE];
214 
215 	if ((int)*regexp == (int)START_OF_STRING_MARK) {
216 
217 	/*
218 	 * the match must start at the beginning of the string
219 	 */
220 
221 	    __loc1 = (char *)stringp;
222 	    regexp++;
223 	    end_of_matchp = test_string(stringp, regexp);
224 
225 	} else if ((int)*regexp == (int)ASCII_CHAR) {
226 
227 	/*
228 	 * test a string against a regular expression
229 	 * that starts with a single ASCII character:
230 	 *
231 	 * move to each character in the string that matches
232 	 * the first character in the regular expression
233 	 * and test the remaining string
234 	 */
235 
236 	    while ((*stringp != *(regexp + 1)) && (*stringp != '\0')) {
237 		stringp++;
238 	    }
239 	    while ((end_of_matchp == (char *)0) && (*stringp != '\0')) {
240 		end_of_matchp = test_string(stringp, regexp);
241 		if (end_of_matchp != (char *)0) {
242 		    __loc1 = (char *)stringp;
243 		} else {
244 		    stringp++;
245 		    while ((*stringp != *(regexp + 1)) && (*stringp != '\0')) {
246 			stringp++;
247 		    }
248 		}
249 	    }
250 
251 	} else if (!multibyte) {
252 
253 	/*
254 	 * if the value of the "multibyte" macro defined in <euc.h>
255 	 * is false, regex() is running in an ASCII locale;
256 	 * test an ASCII string against an ASCII regular expression
257 	 * that doesn't start with a single ASCII character:
258 	 *
259 	 * move forward in the string one byte at a time, testing
260 	 * the remaining string against the regular expression
261 	 */
262 
263 	    end_of_matchp = test_string(stringp, regexp);
264 	    while ((end_of_matchp == (char *)0) && (*stringp != '\0')) {
265 		stringp++;
266 		end_of_matchp = test_string(stringp, regexp);
267 	    }
268 	    if (end_of_matchp != (char *)0) {
269 		__loc1 = (char *)stringp;
270 	    }
271 
272 	} else if ((int)*regexp == (int)MULTIBYTE_CHAR) {
273 
274 	/*
275 	 * test a multibyte string against a multibyte regular expression
276 	 * that starts with a single multibyte character:
277 	 *
278 	 * move to each character in the string that matches
279 	 * the first character in the regular expression
280 	 * and test the remaining string
281 	 */
282 
283 	    (void) get_wchar(&regex_wchar, regexp + 1);
284 	    char_size = get_wchar(&string_wchar, stringp);
285 	    while ((string_wchar != regex_wchar) && (char_size > 0)) {
286 		stringp += char_size;
287 		char_size = get_wchar(&string_wchar, stringp);
288 	    }
289 	    while ((end_of_matchp == (char *)0) && (char_size > 0)) {
290 		end_of_matchp = test_string(stringp, regexp);
291 		if (end_of_matchp != (char *)0) {
292 		    __loc1 = (char *)stringp;
293 		} else {
294 		    stringp += char_size;
295 		    char_size = get_wchar(&string_wchar, stringp);
296 		    while ((string_wchar != regex_wchar) && (char_size > 0)) {
297 			stringp += char_size;
298 			char_size = get_wchar(&string_wchar, stringp);
299 		    }
300 		}
301 	    }
302 
303 	} else {
304 
305 	/*
306 	 * test a multibyte string against a multibyte regular expression
307 	 * that doesn't start with a single multibyte character
308 	 *
309 	 * move forward in the string one multibyte character at a time,
310 	 * testing the remaining string against the regular expression
311 	 */
312 
313 	    end_of_matchp = test_string(stringp, regexp);
314 	    char_size = get_wchar(&string_wchar, stringp);
315 	    while ((end_of_matchp == (char *)0) && (char_size > 0)) {
316 		stringp += char_size;
317 		end_of_matchp = test_string(stringp, regexp);
318 		char_size = get_wchar(&string_wchar, stringp);
319 	    }
320 	    if (end_of_matchp != (char *)0) {
321 		__loc1 = (char *)stringp;
322 	    }
323 	}
324 
325 	/*
326 	 * Return substrings that matched subexpressions for which
327 	 * matching substrings are to be returned.
328 	 *
329 	 * NOTE:
330 	 *
331 	 * According to manual page regcmp(3G), regex() returns substrings
332 	 * that match subexpressions even when no substring matches the
333 	 * entire regular expression.
334 	 */
335 
336 	substringn = 0;
337 	while (substringn < NSUBSTRINGS) {
338 	    substringp = substring_startp[substringn];
339 	    if ((substringp != (char *)0) &&
340 		(return_arg_number[substringn] >= 0)) {
341 		returned_substringp =
342 		    return_argp[return_arg_number[substringn]];
343 		if (returned_substringp != (char *)0) {
344 		    while (substringp < substring_endp[substringn]) {
345 			*returned_substringp = (char)*substringp;
346 			returned_substringp++;
347 			substringp++;
348 		    }
349 		    *returned_substringp = '\0';
350 		}
351 	    }
352 	    substringn++;
353 	}
354 	lmutex_unlock(&regex_lock);
355 	return ((char *)end_of_matchp);
356 }  /* regex() */
357 
358 
359 /* DEFINITIONS OF PRIVATE FUNCTIONS */
360 
361 static int
362 get_wchar(wchar_t *wcharp,
363 	const char *stringp)
364 {
365 	int char_size;
366 
367 	if (stringp == (char *)0) {
368 	    char_size = 0;
369 	    *wcharp = (wchar_t)((unsigned int)'\0');
370 	} else if (*stringp == '\0') {
371 	    char_size = 0;
372 	    *wcharp = (wchar_t)((unsigned int)*stringp);
373 	} else if ((unsigned char)*stringp <= (unsigned char)0x7f) {
374 	    char_size = 1;
375 	    *wcharp = (wchar_t)((unsigned int)*stringp);
376 	} else {
377 	    char_size = mbtowc(wcharp, stringp, MB_LEN_MAX);
378 	}
379 	return (char_size);
380 }
381 
382 static void
383 get_match_counts(int *nmust_matchp,
384 	int *nextra_matches_allowedp,
385 	const char *count_stringp)
386 {
387 	int minimum_match_count;
388 	int maximum_match_count;
389 
390 	minimum_match_count =
391 	    (int)((unsigned int)*count_stringp & SINGLE_BYTE_MASK);
392 	*nmust_matchp = minimum_match_count;
393 
394 	count_stringp++;
395 	maximum_match_count =
396 	    (int)((unsigned int)*count_stringp & SINGLE_BYTE_MASK);
397 	if (maximum_match_count == (int)UNLIMITED) {
398 	    *nextra_matches_allowedp = (int)UNLIMITED;
399 	} else {
400 	    *nextra_matches_allowedp =
401 		maximum_match_count - minimum_match_count;
402 	}
403 	return;
404 
405 } /* get_match_counts() */
406 
407 static boolean_t
408 in_wchar_range(wchar_t test_char,
409 	wchar_t lower_char,
410 	wchar_t upper_char)
411 {
412 	return (((lower_char <= 0x7f) && (upper_char <= 0x7f) &&
413 	    (lower_char <= test_char) && (test_char <= upper_char)) ||
414 	    (((test_char & WCHAR_CSMASK) == (lower_char & WCHAR_CSMASK)) &&
415 	    ((test_char & WCHAR_CSMASK) == (upper_char & WCHAR_CSMASK)) &&
416 	    (lower_char <= test_char) && (test_char <= upper_char)));
417 
418 } /* in_wchar_range() */
419 
420 static const char *
421 pop_stringp(void)
422 {
423 	const char *stringp;
424 
425 	if (stringp_stackp >= &stringp_stack[STRINGP_STACK_SIZE]) {
426 	    return ((char *)0);
427 	} else {
428 	    stringp = *stringp_stackp;
429 	    stringp_stackp++;
430 	    return (stringp);
431 	}
432 }
433 
434 
435 static const char *
436 previous_charp(const char *current_charp)
437 {
438 	/*
439 	 * returns the pointer to the previous character in
440 	 * a string of multibyte characters
441 	 */
442 
443 	const char *prev_cs0 = current_charp - 1;
444 	const char *prev_cs1 = current_charp - eucw1;
445 	const char *prev_cs2 = current_charp - eucw2 - 1;
446 	const char *prev_cs3 = current_charp - eucw3 - 1;
447 	const char *prev_charp;
448 
449 	if ((unsigned char)*prev_cs0 <= 0x7f) {
450 	    prev_charp = prev_cs0;
451 	} else if ((unsigned char)*prev_cs2 == SS2) {
452 	    prev_charp = prev_cs2;
453 	} else if ((unsigned char)*prev_cs3 == SS3) {
454 	    prev_charp = prev_cs3;
455 	} else {
456 	    prev_charp = prev_cs1;
457 	}
458 	return (prev_charp);
459 
460 } /* previous_charp() */
461 
462 static const char *
463 push_stringp(const char *stringp)
464 {
465 	if (stringp_stackp <= &stringp_stack[0]) {
466 	    return ((char *)0);
467 	} else {
468 	    stringp_stackp--;
469 	    *stringp_stackp = stringp;
470 	    return (stringp);
471 	}
472 }
473 
474 
475 static char_test_result_t
476 test_char_against_ascii_class(char test_char,
477 	const char *classp,
478 	char_test_condition_t test_condition)
479 {
480 	/*
481 	 * tests a character for membership in an ASCII character class compiled
482 	 * by the internationalized version of regcmp();
483 	 *
484 	 * NOTE: The internationalized version of regcmp() compiles
485 	 * 	the range a-z in an ASCII character class to aTHRUz.
486 	 */
487 
488 	int	nbytes_to_check;
489 
490 	nbytes_to_check = (int)*classp;
491 	classp++;
492 	nbytes_to_check--;
493 
494 	while (nbytes_to_check > 0) {
495 	    if (test_char == *classp) {
496 		if (test_condition == IN_CLASS)
497 		    return (CONDITION_TRUE);
498 		else
499 		    return (CONDITION_FALSE);
500 	    } else if (*classp == THRU) {
501 		if ((*(classp - 1) <= test_char) &&
502 		    (test_char <= *(classp + 1))) {
503 		    if (test_condition == IN_CLASS)
504 			return (CONDITION_TRUE);
505 		    else
506 			return (CONDITION_FALSE);
507 		} else {
508 		    classp += 2;
509 		    nbytes_to_check -= 2;
510 		}
511 	    } else {
512 		classp++;
513 		nbytes_to_check--;
514 	    }
515 	}
516 	if (test_condition == NOT_IN_CLASS) {
517 	    return (CONDITION_TRUE);
518 	} else {
519 	    return (CONDITION_FALSE);
520 	}
521 } /* test_char_against_ascii_class() */
522 
523 static char_test_result_t
524 test_char_against_multibyte_class(wchar_t test_char,
525 	const char *classp,
526 	char_test_condition_t test_condition)
527 {
528 	/*
529 	 * tests a character for membership in a multibyte character class;
530 	 *
531 	 * NOTE: The range a-z in a multibyte character class compiles to
532 	 * 	aTHRUz.
533 	 */
534 
535 	int		char_size;
536 	wchar_t		current_char;
537 	int		nbytes_to_check;
538 	wchar_t		previous_char;
539 
540 	nbytes_to_check = (int)*classp;
541 	classp++;
542 	nbytes_to_check--;
543 
544 	char_size = get_wchar(&current_char, classp);
545 	if (char_size <= 0) {
546 	    return (CHAR_TEST_ERROR);
547 	} else if (test_char == current_char) {
548 	    if (test_condition == IN_CLASS) {
549 		return (CONDITION_TRUE);
550 	    } else {
551 		return (CONDITION_FALSE);
552 	    }
553 	} else {
554 	    classp += char_size;
555 	    nbytes_to_check -= char_size;
556 	}
557 
558 	while (nbytes_to_check > 0) {
559 	    previous_char = current_char;
560 	    char_size = get_wchar(&current_char, classp);
561 	    if (char_size <= 0) {
562 		return (CHAR_TEST_ERROR);
563 	    } else if (test_char == current_char) {
564 		if (test_condition == IN_CLASS) {
565 		    return (CONDITION_TRUE);
566 		} else {
567 		    return (CONDITION_FALSE);
568 		}
569 	    } else if (current_char == THRU) {
570 		classp += char_size;
571 		nbytes_to_check -= char_size;
572 		char_size = get_wchar(&current_char, classp);
573 		if (char_size <= 0) {
574 		    return (CHAR_TEST_ERROR);
575 		} else if (in_wchar_range(test_char, previous_char,
576 		    current_char)) {
577 		    if (test_condition == IN_CLASS) {
578 			return (CONDITION_TRUE);
579 		    } else {
580 			return (CONDITION_FALSE);
581 		    }
582 		} else {
583 		    classp += char_size;
584 		    nbytes_to_check -= char_size;
585 		}
586 	    } else {
587 		classp += char_size;
588 		nbytes_to_check -= char_size;
589 	    }
590 	}
591 	if (test_condition == NOT_IN_CLASS) {
592 	    return (CONDITION_TRUE);
593 	} else {
594 	    return (CONDITION_FALSE);
595 	}
596 } /* test_char_against_multibyte_class() */
597 
598 
599 /* FOR COMPATIBILITY WITH PREVIOUS ASCII VERSIONS OF regcmp() */
600 
601 static char_test_result_t
602 test_char_against_old_ascii_class(char test_char,
603 	const char *classp,
604 	char_test_condition_t test_condition)
605 {
606 	/*
607 	 * tests a character for membership in an ASCII character class compiled
608 	 * by the ASCII version of regcmp();
609 	 *
610 	 * NOTE: ASCII versions of regcmp() compile the range a-z in an
611 	 *	ASCII character class to THRUaz.  The internationalized
612 	 *	version compiles the same range to aTHRUz.
613 	 */
614 
615 	int	nbytes_to_check;
616 
617 	nbytes_to_check = (int)*classp;
618 	classp++;
619 	nbytes_to_check--;
620 
621 	while (nbytes_to_check > 0) {
622 	    if (test_char == *classp) {
623 		if (test_condition == IN_CLASS) {
624 		    return (CONDITION_TRUE);
625 		} else {
626 		    return (CONDITION_FALSE);
627 		}
628 	    } else if (*classp == THRU) {
629 		if ((*(classp + 1) <= test_char) &&
630 		    (test_char <= *(classp + 2))) {
631 		    if (test_condition == IN_CLASS) {
632 			return (CONDITION_TRUE);
633 		    } else {
634 			return (CONDITION_FALSE);
635 		    }
636 		} else {
637 		    classp += 3;
638 		    nbytes_to_check -= 3;
639 		}
640 	    } else {
641 		classp++;
642 		nbytes_to_check--;
643 	    }
644 	}
645 	if (test_condition == NOT_IN_CLASS) {
646 	    return (CONDITION_TRUE);
647 	} else {
648 	    return (CONDITION_FALSE);
649 	}
650 } /* test_char_against_old_ascii_class() */
651 
652 static const char *
653 test_repeated_ascii_char(const char *repeat_startp,
654 	const char *stringp,
655 	const char *regexp)
656 {
657 	const char *end_of_matchp;
658 
659 	end_of_matchp = test_string(stringp, regexp);
660 	while ((end_of_matchp == (char *)0) &&
661 	    (stringp > repeat_startp)) {
662 	    stringp--;
663 	    end_of_matchp = test_string(stringp, regexp);
664 	}
665 	return (end_of_matchp);
666 }
667 
668 static const char *
669 test_repeated_multibyte_char(const char *repeat_startp,
670 	const char *stringp,
671 	const char *regexp)
672 {
673 	const char *end_of_matchp;
674 
675 	end_of_matchp = test_string(stringp, regexp);
676 	while ((end_of_matchp == (char *)0) &&
677 	    (stringp > repeat_startp)) {
678 	    stringp = previous_charp(stringp);
679 	    end_of_matchp = test_string(stringp, regexp);
680 	}
681 	return (end_of_matchp);
682 }
683 
684 static const char *
685 test_repeated_group(const char *repeat_startp,
686 	const char *stringp,
687 	const char *regexp)
688 {
689 	const char *end_of_matchp;
690 
691 	end_of_matchp = test_string(stringp, regexp);
692 	while ((end_of_matchp == (char *)0) &&
693 	    (stringp > repeat_startp)) {
694 	    stringp = pop_stringp();
695 	    if (stringp == (char *)0) {
696 		return ((char *)0);
697 	    }
698 	    end_of_matchp = test_string(stringp, regexp);
699 	}
700 	return (end_of_matchp);
701 }
702 
703 static const char *
704 test_string(const char *stringp,
705 	const char *regexp)
706 {
707 	/*
708 	 * returns a pointer to the first character following the first
709 	 * substring of the string addressed by stringp that matches
710 	 * the compiled regular expression addressed by regexp
711 	 */
712 
713 	unsigned int		group_length;
714 	int			nextra_matches_allowed;
715 	int			nmust_match;
716 	wchar_t			regex_wchar;
717 	int			regex_char_size;
718 	const char		*repeat_startp;
719 	unsigned int		return_argn;
720 	wchar_t			string_wchar;
721 	int			string_char_size;
722 	unsigned int		substringn;
723 	char_test_condition_t	test_condition;
724 	const char		*test_stringp;
725 
726 	for (;;) {
727 
728 		/*
729 		 * Exit the loop via a return whenever there's a match
730 		 * or it's clear that there can be no match.
731 		 */
732 
733 	    switch ((int)*regexp) {
734 
735 		/*
736 		 * No fall-through.
737 		 * Each case ends with either a return or with stringp
738 		 * addressing the next character to be tested and regexp
739 		 * addressing the next compiled regular expression
740 		 *
741 		 * NOTE: The comments for each case give the meaning
742 		 *	of the compiled regular expression decoded by the case
743 		 *	and the character string that the compiled regular
744 		 *	expression uses to encode the case.  Each single
745 		 *	character encoded in the compiled regular expression
746 		 *	is shown enclosed in angle brackets (<>).  Each
747 		 *	compiled regular expression begins with a marker
748 		 *	character which is shown as a named constant
749 		 *	(e.g. <ASCII_CHAR>). Character constants are shown
750 		 *	enclosed in single quotes (e.g. <'$'>).  All other
751 		 *	single characters encoded in the compiled regular
752 		 *	expression are shown as lower case variable names
753 		 *	(e.g. <ascii_char> or <multibyte_char>). Multicharacter
754 		 *	strings encoded in the compiled regular expression
755 		 *	are shown as variable names followed by elipses
756 		 *	(e.g. <compiled_regex...>).
757 		 */
758 
759 	    case ASCII_CHAR: /* single ASCII char */
760 
761 		/* encoded as <ASCII_CHAR><ascii_char> */
762 
763 		regexp++;
764 		if (*regexp == *stringp) {
765 		    regexp++;
766 		    stringp++;
767 		} else {
768 		    return ((char *)0);
769 		}
770 		break;		/* end case ASCII_CHAR */
771 
772 	    case MULTIBYTE_CHAR: /* single multibyte char */
773 
774 		/* encoded as <MULTIBYTE_CHAR><multibyte_char> */
775 
776 		regexp++;
777 		regex_char_size = get_wchar(&regex_wchar, regexp);
778 		string_char_size = get_wchar(&string_wchar, stringp);
779 		if ((string_char_size <= 0) || (string_wchar != regex_wchar)) {
780 		    return ((char *)0);
781 		} else {
782 		    regexp += regex_char_size;
783 		    stringp += string_char_size;
784 		}
785 		break;		/* end case MULTIBYTE_CHAR */
786 
787 	    case ANY_CHAR: /* any single ASCII or multibyte char */
788 
789 		/* encoded as <ANY_CHAR> */
790 
791 		if (!multibyte) {
792 		    if (*stringp == '\0') {
793 			return ((char *)0);
794 		    } else {
795 			regexp++;
796 			stringp++;
797 		    }
798 		} else {
799 		    string_char_size = get_wchar(&string_wchar, stringp);
800 		    if (string_char_size <= 0) {
801 			return ((char *)0);
802 		    } else {
803 			regexp++;
804 			stringp += string_char_size;
805 		    }
806 		}
807 		break;	/* end case ANY_CHAR */
808 
809 	    case IN_ASCII_CHAR_CLASS:		/* [.....] */
810 	    case NOT_IN_ASCII_CHAR_CLASS:
811 
812 		/*
813 		 * encoded as <IN_ASCII_CHAR_CLASS><class_length><class...>
814 		 *	or <NOT_IN_ASCII_CHAR_CLASS><class_length><class...>
815 		 *
816 		 * NOTE: <class_length> includes the <class_length> byte
817 		 */
818 
819 		if ((int)*regexp == (int)IN_ASCII_CHAR_CLASS) {
820 		    test_condition = IN_CLASS;
821 		} else {
822 		    test_condition = NOT_IN_CLASS;
823 		}
824 		regexp++; /* point to the <class_length> byte */
825 
826 		if ((*stringp != '\0') &&
827 		    (test_char_against_ascii_class(*stringp, regexp,
828 		    test_condition) == CONDITION_TRUE)) {
829 		    regexp += (int)*regexp; /* add the class length to regexp */
830 		    stringp++;
831 		} else {
832 		    return ((char *)0);
833 		}
834 		break; /* end case IN_ASCII_CHAR_CLASS */
835 
836 	    case IN_MULTIBYTE_CHAR_CLASS:	/* [....] */
837 	    case NOT_IN_MULTIBYTE_CHAR_CLASS:
838 
839 		/*
840 		 * encoded as <IN_MULTIBYTE_CHAR_CLASS><class_length><class...>
841 		 * 	or <NOT_IN_MULTIBYTE_CHAR_CLASS><class_length><class...>
842 		 *
843 		 * NOTE: <class_length> includes the <class_length> byte
844 		 */
845 
846 		if ((int)*regexp == (int)IN_MULTIBYTE_CHAR_CLASS) {
847 		    test_condition = IN_CLASS;
848 		} else {
849 		    test_condition = NOT_IN_CLASS;
850 		}
851 		regexp++; /* point to the <class_length> byte */
852 
853 		string_char_size = get_wchar(&string_wchar, stringp);
854 		if ((string_char_size > 0) &&
855 		    (test_char_against_multibyte_class(string_wchar, regexp,
856 		    test_condition) == CONDITION_TRUE)) {
857 		    regexp += (int)*regexp; /* add the class length to regexp */
858 		    stringp += string_char_size;
859 		} else {
860 		    return ((char *)0);
861 		}
862 		break; /* end case IN_MULTIBYTE_CHAR_CLASS */
863 
864 	    case IN_OLD_ASCII_CHAR_CLASS:	/* [...] */
865 	    case NOT_IN_OLD_ASCII_CHAR_CLASS:
866 
867 		/*
868 		 * encoded as <IN_OLD_ASCII_CHAR_CLASS><class_length><class...>
869 		 *	or <NOT_IN_OLD_ASCII_CHAR_CLASS><class_length><class...>
870 		 *
871 		 * NOTE: <class_length> includes the <class_length> byte
872 		 */
873 
874 		if ((int)*regexp == (int)IN_OLD_ASCII_CHAR_CLASS) {
875 		    test_condition = IN_CLASS;
876 		} else {
877 		    test_condition = NOT_IN_CLASS;
878 		}
879 		regexp++; /* point to the <class_length> byte */
880 
881 		if ((*stringp != '\0') &&
882 		    (test_char_against_old_ascii_class(*stringp, regexp,
883 		    test_condition) == CONDITION_TRUE)) {
884 		    regexp += (int)*regexp; /* add the class length to regexp */
885 		    stringp++;
886 		} else {
887 		    return ((char *)0);
888 		}
889 		break; /* end case [NOT_]IN_OLD_ASCII_CHAR_CLASS */
890 
891 	    case SIMPLE_GROUP: /* (.....) */
892 
893 		/* encoded as <SIMPLE_GROUP><group_length> */
894 
895 		regexp += 2;
896 		break;		/* end case SIMPLE_GROUP */
897 
898 	    case END_GROUP:	/* (.....) */
899 
900 		/* encoded as <END_GROUP><groupn> */
901 
902 		regexp += 2;
903 		break;		/* end case END_GROUP */
904 
905 	    case SAVED_GROUP:	/* (.....)$0-9 */
906 
907 		/* encoded as <SAVED_GROUP><substringn> */
908 
909 		regexp++;
910 		substringn = (unsigned int)*regexp;
911 		if (substringn >= NSUBSTRINGS)
912 		    return ((char *)0);
913 		substring_startp[substringn] = stringp;
914 		regexp++;
915 		break;		/* end case SAVED_GROUP */
916 
917 	    case END_SAVED_GROUP:	/* (.....)$0-9 */
918 
919 		/*
920 		 * encoded as <END_SAVED_GROUP><substringn>\
921 		 *	<return_arg_number[substringn]>
922 		 */
923 
924 		regexp++;
925 		substringn = (unsigned int)*regexp;
926 		if (substringn >= NSUBSTRINGS)
927 		    return ((char *)0);
928 		substring_endp[substringn] = stringp;
929 		regexp++;
930 		return_argn = (unsigned int)*regexp;
931 		if (return_argn >= NSUBSTRINGS)
932 		    return ((char *)0);
933 		return_arg_number[substringn] = return_argn;
934 		regexp++;
935 		break;		/* end case END_SAVED_GROUP */
936 
937 	    case ASCII_CHAR|ZERO_OR_MORE:  /* char* */
938 
939 		/* encoded as <ASCII_CHAR|ZERO_OR_MORE><ascii_char> */
940 
941 		regexp++;
942 		repeat_startp = stringp;
943 		while (*stringp == *regexp) {
944 		    stringp++;
945 		}
946 		regexp++;
947 		return (test_repeated_ascii_char(repeat_startp,
948 		    stringp, regexp));
949 
950 		/* end case ASCII_CHAR|ZERO_OR_MORE */
951 
952 	    case ASCII_CHAR|ONE_OR_MORE:   /* char+ */
953 
954 		/* encoded as <ASCII_CHAR|ONE_OR_MORE><ascii_char> */
955 
956 		regexp++;
957 		if (*stringp != *regexp) {
958 		    return ((char *)0);
959 		} else {
960 		    stringp++;
961 		    repeat_startp = stringp;
962 		    while (*stringp == *regexp) {
963 			stringp++;
964 		    }
965 		    regexp++;
966 		    return (test_repeated_ascii_char(repeat_startp, stringp,
967 			regexp));
968 		}
969 		/* end case ASCII_CHAR|ONE_OR_MORE */
970 
971 	    case ASCII_CHAR|COUNT:	/* char{min_count,max_count} */
972 
973 		/*
974 		 * encoded as <ASCII_CHAR|COUNT><ascii_char>\
975 		 *	<minimum_match_count><maximum_match_count>
976 		 */
977 
978 		regexp++;
979 		get_match_counts(&nmust_match, &nextra_matches_allowed,
980 		    regexp + 1);
981 		while ((*stringp == *regexp) && (nmust_match > 0)) {
982 		    nmust_match--;
983 		    stringp++;
984 		}
985 		if (nmust_match > 0) {
986 		    return ((char *)0);
987 		} else if (nextra_matches_allowed == UNLIMITED) {
988 		    repeat_startp = stringp;
989 		    while (*stringp == *regexp) {
990 			stringp++;
991 		    }
992 		    regexp += 3;
993 		    return (test_repeated_ascii_char(repeat_startp, stringp,
994 			regexp));
995 		} else {
996 		    repeat_startp = stringp;
997 		    while ((*stringp == *regexp) &&
998 			(nextra_matches_allowed > 0)) {
999 			nextra_matches_allowed--;
1000 			stringp++;
1001 		    }
1002 		    regexp += 3;
1003 		    return (test_repeated_ascii_char(repeat_startp, stringp,
1004 			regexp));
1005 		}
1006 		/* end case ASCII_CHAR|COUNT */
1007 
1008 	    case MULTIBYTE_CHAR|ZERO_OR_MORE:   /* char* */
1009 
1010 		/* encoded as <MULTIBYTE_CHAR|ZERO_OR_MORE><multibyte_char> */
1011 
1012 		regexp++;
1013 		regex_char_size = get_wchar(&regex_wchar, regexp);
1014 		repeat_startp = stringp;
1015 		string_char_size = get_wchar(&string_wchar, stringp);
1016 		while ((string_char_size > 0) &&
1017 		    (string_wchar == regex_wchar)) {
1018 		    stringp += string_char_size;
1019 		    string_char_size = get_wchar(&string_wchar, stringp);
1020 		}
1021 		regexp += regex_char_size;
1022 		return (test_repeated_multibyte_char(repeat_startp, stringp,
1023 		    regexp));
1024 
1025 		/* end case MULTIBYTE_CHAR|ZERO_OR_MORE */
1026 
1027 	    case MULTIBYTE_CHAR|ONE_OR_MORE:    /* char+ */
1028 
1029 		/* encoded as <MULTIBYTE_CHAR|ONE_OR_MORE><multibyte_char> */
1030 
1031 		regexp++;
1032 		regex_char_size = get_wchar(&regex_wchar, regexp);
1033 		string_char_size = get_wchar(&string_wchar, stringp);
1034 		if ((string_char_size <= 0) || (string_wchar != regex_wchar)) {
1035 		    return ((char *)0);
1036 		} else {
1037 		    stringp += string_char_size;
1038 		    repeat_startp = stringp;
1039 		    string_char_size = get_wchar(&string_wchar, stringp);
1040 		    while ((string_char_size > 0) &&
1041 			(string_wchar == regex_wchar)) {
1042 			stringp += string_char_size;
1043 			string_char_size = get_wchar(&string_wchar, stringp);
1044 		    }
1045 		    regexp += regex_char_size;
1046 		    return (test_repeated_multibyte_char(repeat_startp, stringp,
1047 			regexp));
1048 		}
1049 		/* end case MULTIBYTE_CHAR|ONE_OR_MORE */
1050 
1051 	    case MULTIBYTE_CHAR|COUNT:		/* char{min_count,max_count} */
1052 
1053 		/*
1054 		 * encoded as <MULTIBYTE_CHAR|COUNT><multibyte_char>\
1055 		 * 	<minimum_match_count><maximum_match_count>
1056 		 */
1057 
1058 		regexp++;
1059 		regex_char_size = get_wchar(&regex_wchar, regexp);
1060 		get_match_counts(&nmust_match, &nextra_matches_allowed,
1061 		    regexp + regex_char_size);
1062 		string_char_size = get_wchar(&string_wchar, stringp);
1063 		while ((string_char_size > 0) &&
1064 		    (string_wchar == regex_wchar) &&
1065 		    (nmust_match > 0)) {
1066 
1067 		    nmust_match--;
1068 		    stringp += string_char_size;
1069 		    string_char_size = get_wchar(&string_wchar, stringp);
1070 		}
1071 		if (nmust_match > 0) {
1072 		    return ((char *)0);
1073 		} else if (nextra_matches_allowed == UNLIMITED) {
1074 		    repeat_startp = stringp;
1075 		    while ((string_char_size > 0) &&
1076 			(string_wchar == regex_wchar)) {
1077 			stringp += string_char_size;
1078 			string_char_size = get_wchar(&string_wchar, stringp);
1079 		    }
1080 		    regexp += regex_char_size + 2;
1081 		    return (test_repeated_multibyte_char(repeat_startp, stringp,
1082 			regexp));
1083 		} else {
1084 		    repeat_startp = stringp;
1085 		    while ((string_char_size > 0) &&
1086 			(string_wchar == regex_wchar) &&
1087 			(nextra_matches_allowed > 0)) {
1088 			nextra_matches_allowed--;
1089 			stringp += string_char_size;
1090 			string_char_size = get_wchar(&string_wchar, stringp);
1091 		    }
1092 		    regexp += regex_char_size + 2;
1093 		    return (test_repeated_multibyte_char(repeat_startp, stringp,
1094 			regexp));
1095 		}
1096 		/* end case MULTIBYTE_CHAR|COUNT */
1097 
1098 	    case ANY_CHAR|ZERO_OR_MORE:		/* .* */
1099 
1100 		/* encoded as <ANY_CHAR|ZERO_OR_MORE> */
1101 
1102 		repeat_startp = stringp;
1103 		if (!multibyte) {
1104 		    while (*stringp != '\0') {
1105 			stringp++;
1106 		    }
1107 		    regexp++;
1108 		    return (test_repeated_ascii_char(repeat_startp, stringp,
1109 			regexp));
1110 		} else {
1111 		    string_char_size = get_wchar(&string_wchar, stringp);
1112 		    while (string_char_size > 0) {
1113 			stringp += string_char_size;
1114 			string_char_size = get_wchar(&string_wchar, stringp);
1115 		    }
1116 		    regexp++;
1117 		    return (test_repeated_multibyte_char(repeat_startp, stringp,
1118 			regexp));
1119 		}
1120 		/* end case <ANY_CHAR|ZERO_OR_MORE> */
1121 
1122 	    case ANY_CHAR|ONE_OR_MORE:		/* .+ */
1123 
1124 		/* encoded as <ANY_CHAR|ONE_OR_MORE> */
1125 
1126 		if (!multibyte) {
1127 		    if (*stringp == '\0') {
1128 			return ((char *)0);
1129 		    } else {
1130 			stringp++;
1131 			repeat_startp = stringp;
1132 			while (*stringp != '\0') {
1133 			    stringp++;
1134 			}
1135 			regexp++;
1136 			return (test_repeated_ascii_char(repeat_startp, stringp,
1137 			    regexp));
1138 		    }
1139 		} else {
1140 		    string_char_size = get_wchar(&string_wchar, stringp);
1141 		    if (string_char_size <= 0) {
1142 			return ((char *)0);
1143 		    } else {
1144 			stringp += string_char_size;
1145 			repeat_startp = stringp;
1146 			string_char_size = get_wchar(&string_wchar, stringp);
1147 			while (string_char_size > 0) {
1148 			    stringp += string_char_size;
1149 			    string_char_size =
1150 				get_wchar(&string_wchar, stringp);
1151 			}
1152 			regexp++;
1153 			return (test_repeated_multibyte_char(repeat_startp,
1154 			    stringp, regexp));
1155 		    }
1156 		}
1157 		/* end case <ANY_CHAR|ONE_OR_MORE> */
1158 
1159 	    case ANY_CHAR|COUNT:	/* .{min_count,max_count} */
1160 
1161 		/*
1162 		 * encoded as	<ANY_CHAR|COUNT>\
1163 		 *		<minimum_match_count><maximum_match_count>
1164 		 */
1165 
1166 		get_match_counts(&nmust_match, &nextra_matches_allowed,
1167 		    regexp + 1);
1168 		if (!multibyte) {
1169 		    while ((*stringp != '\0') && (nmust_match > 0)) {
1170 			nmust_match--;
1171 			stringp++;
1172 		    }
1173 		    if (nmust_match > 0) {
1174 			return ((char *)0);
1175 		    } else if (nextra_matches_allowed == UNLIMITED) {
1176 			repeat_startp = stringp;
1177 			while (*stringp != '\0') {
1178 			    stringp++;
1179 			}
1180 			regexp += 3;
1181 			return (test_repeated_ascii_char(repeat_startp, stringp,
1182 			    regexp));
1183 		    } else {
1184 			repeat_startp = stringp;
1185 			while ((*stringp != '\0') &&
1186 			    (nextra_matches_allowed > 0)) {
1187 			    nextra_matches_allowed--;
1188 			    stringp++;
1189 			}
1190 			regexp += 3;
1191 			return (test_repeated_ascii_char(repeat_startp, stringp,
1192 			    regexp));
1193 		    }
1194 		} else { /* multibyte character */
1195 
1196 		    string_char_size = get_wchar(&string_wchar, stringp);
1197 		    while ((string_char_size > 0) && (nmust_match > 0)) {
1198 			nmust_match--;
1199 			stringp += string_char_size;
1200 			string_char_size = get_wchar(&string_wchar, stringp);
1201 		    }
1202 		    if (nmust_match > 0) {
1203 			return ((char *)0);
1204 		    } else if (nextra_matches_allowed == UNLIMITED) {
1205 			repeat_startp = stringp;
1206 			while (string_char_size > 0) {
1207 			    stringp += string_char_size;
1208 			    string_char_size =
1209 				get_wchar(&string_wchar, stringp);
1210 			}
1211 			regexp += 3;
1212 			return (test_repeated_multibyte_char(repeat_startp,
1213 			    stringp, regexp));
1214 		    } else {
1215 			repeat_startp = stringp;
1216 			while ((string_char_size > 0) &&
1217 			    (nextra_matches_allowed > 0)) {
1218 			    nextra_matches_allowed--;
1219 			    stringp += string_char_size;
1220 			    string_char_size =
1221 				get_wchar(&string_wchar, stringp);
1222 			}
1223 			regexp += 3;
1224 			return (test_repeated_multibyte_char(repeat_startp,
1225 			    stringp, regexp));
1226 		    }
1227 		} /* end case ANY_CHAR|COUNT */
1228 
1229 	    case IN_ASCII_CHAR_CLASS|ZERO_OR_MORE:	/* [.....]* */
1230 	    case NOT_IN_ASCII_CHAR_CLASS|ZERO_OR_MORE:
1231 
1232 		/*
1233 		 * encoded as	<IN_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
1234 		 *		<class_length><class ...>
1235 		 *	or	<NOT_IN_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
1236 		 *		<class_length><class ...>
1237 		 *
1238 		 * NOTE: <class_length> includes the <class_length> byte
1239 		 */
1240 
1241 		if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|ZERO_OR_MORE)) {
1242 		    test_condition = IN_CLASS;
1243 		} else {
1244 		    test_condition = NOT_IN_CLASS;
1245 		}
1246 		regexp++; /* point to the <class_length> byte */
1247 
1248 		repeat_startp = stringp;
1249 		while ((*stringp != '\0') &&
1250 		    (test_char_against_ascii_class(*stringp, regexp,
1251 		    test_condition) == CONDITION_TRUE)) {
1252 		    stringp++;
1253 		}
1254 		regexp += (int)*regexp; /* add the class length to regexp */
1255 		return (test_repeated_ascii_char(repeat_startp, stringp,
1256 		    regexp));
1257 
1258 		/* end case IN_ASCII_CHAR_CLASS|ZERO_OR_MORE */
1259 
1260 	    case IN_ASCII_CHAR_CLASS|ONE_OR_MORE:	/* [.....]+ */
1261 	    case NOT_IN_ASCII_CHAR_CLASS|ONE_OR_MORE:
1262 
1263 		/*
1264 		 * encoded as	<IN_ASCII_CHAR_CLASS|ONE_OR_MORE>\
1265 		 *		<class_length><class ...>
1266 		 *	or	<NOT_IN_ASCII_CHAR_CLASS|ONE_OR_MORE>\
1267 		 * 		<class_length><class ...>
1268 		 *
1269 		 * NOTE: <class_length> includes the <class_length> byte
1270 		 */
1271 
1272 		if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|ONE_OR_MORE)) {
1273 		    test_condition = IN_CLASS;
1274 		} else {
1275 		    test_condition = NOT_IN_CLASS;
1276 		}
1277 		regexp++; /* point to the <class_length> byte */
1278 
1279 		if ((*stringp == '\0') ||
1280 		    (test_char_against_ascii_class(*stringp, regexp,
1281 		    test_condition) != CONDITION_TRUE)) {
1282 		    return ((char *)0);
1283 		} else {
1284 		    stringp++;
1285 		    repeat_startp = stringp;
1286 		    while ((*stringp != '\0') &&
1287 			(test_char_against_ascii_class(*stringp, regexp,
1288 			test_condition) == CONDITION_TRUE)) {
1289 			stringp++;
1290 		    }
1291 		    regexp += (int)*regexp; /* add the class length to regexp */
1292 		    return (test_repeated_ascii_char(repeat_startp, stringp,
1293 			regexp));
1294 		}
1295 		/* end case IN_ASCII_CHAR_CLASS|ONE_OR_MORE */
1296 
1297 	    case IN_ASCII_CHAR_CLASS | COUNT: /* [.....]{max_count,min_count} */
1298 	    case NOT_IN_ASCII_CHAR_CLASS | COUNT:
1299 
1300 		/*
1301 		 * endoded as	<IN_ASCII_CHAR_CLASS|COUNT><class_length>\
1302 		 * 		<class ...><minimum_match_count>\
1303 		 *		<maximum_match_count>
1304 		 *	or	<NOT_IN_ASCII_CHAR_CLASS|COUNT><class_length>\
1305 		 *		<class ...><minimum_match_count>\
1306 		 *		<maximum_match_count>
1307 		 *
1308 		 * NOTE: <class_length> includes the <class_length> byte,
1309 		 *	but not the <minimum_match_count> or
1310 		 *	<maximum_match_count> bytes
1311 		 */
1312 
1313 		if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|COUNT)) {
1314 		    test_condition = IN_CLASS;
1315 		} else {
1316 		    test_condition = NOT_IN_CLASS;
1317 		}
1318 		regexp++; /* point to the <class_length> byte */
1319 
1320 		get_match_counts(&nmust_match, &nextra_matches_allowed,
1321 		    regexp + (int)*regexp);
1322 		while ((*stringp != '\0') &&
1323 		    (test_char_against_ascii_class(*stringp, regexp,
1324 		    test_condition) == CONDITION_TRUE) &&
1325 		    (nmust_match > 0)) {
1326 		    nmust_match--;
1327 		    stringp++;
1328 		}
1329 		if (nmust_match > 0) {
1330 		    return ((char *)0);
1331 		} else if (nextra_matches_allowed == UNLIMITED) {
1332 		    repeat_startp = stringp;
1333 		    while ((*stringp != '\0') &&
1334 			(test_char_against_ascii_class(*stringp, regexp,
1335 			test_condition) == CONDITION_TRUE)) {
1336 			stringp++;
1337 		    }
1338 		    regexp += (int)*regexp + 2;
1339 		    return (test_repeated_ascii_char(repeat_startp, stringp,
1340 			regexp));
1341 		} else {
1342 		    repeat_startp = stringp;
1343 		    while ((*stringp != '\0') &&
1344 			(test_char_against_ascii_class(*stringp, regexp,
1345 			test_condition) == CONDITION_TRUE) &&
1346 			(nextra_matches_allowed > 0)) {
1347 			nextra_matches_allowed--;
1348 			stringp++;
1349 		    }
1350 		    regexp += (int)*regexp + 2;
1351 		    return (test_repeated_ascii_char(repeat_startp, stringp,
1352 			regexp));
1353 		}
1354 		/* end case IN_ASCII_CHAR_CLASS|COUNT */
1355 
1356 	    case IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE:	/* [.....]* */
1357 	    case NOT_IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE:
1358 
1359 		/*
1360 		 * encoded as	<IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE>\
1361 		 *		<class_length><class ...>
1362 		 *	or	<NOT_IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE>\
1363 		 *		<class_length><class ...>
1364 		 *
1365 		 * NOTE: <class_length> includes the <class_length> byte
1366 		 */
1367 
1368 		if ((int)*regexp ==
1369 		    (int)(IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE)) {
1370 		    test_condition = IN_CLASS;
1371 		} else {
1372 		    test_condition = NOT_IN_CLASS;
1373 		}
1374 		regexp++; /* point to the <class_length> byte */
1375 
1376 		repeat_startp = stringp;
1377 		string_char_size = get_wchar(&string_wchar, stringp);
1378 		while ((string_char_size > 0) &&
1379 		    (test_char_against_multibyte_class(string_wchar, regexp,
1380 		    test_condition) == CONDITION_TRUE)) {
1381 		    stringp += string_char_size;
1382 		    string_char_size = get_wchar(&string_wchar, stringp);
1383 		}
1384 		regexp += (int)*regexp; /* add the class length to regexp */
1385 		return (test_repeated_multibyte_char(repeat_startp, stringp,
1386 		    regexp));
1387 
1388 		/* end case IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE */
1389 
1390 	    case IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE:	/* [.....]+ */
1391 	    case NOT_IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE:
1392 
1393 		/*
1394 		 * encoded as	<IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE>\
1395 		 *		<class_length><class ...>
1396 		 *	or	<NOT_IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE>\
1397 		 *		<class_length><class ...>
1398 		 *
1399 		 * NOTE: <class_length> includes the <class_length> byte
1400 		 */
1401 
1402 		if ((int)*regexp ==
1403 		    (int)(IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE)) {
1404 		    test_condition = IN_CLASS;
1405 		} else {
1406 		    test_condition = NOT_IN_CLASS;
1407 		}
1408 		regexp++; /* point to the <class_length> byte */
1409 
1410 		string_char_size = get_wchar(&string_wchar, stringp);
1411 		if ((string_char_size <= 0) ||
1412 		    (test_char_against_multibyte_class(string_wchar, regexp,
1413 		    test_condition) != CONDITION_TRUE)) {
1414 		    return ((char *)0);
1415 		} else {
1416 		    stringp += string_char_size;
1417 		    repeat_startp = stringp;
1418 		    string_char_size = get_wchar(&string_wchar, stringp);
1419 		    while ((string_char_size > 0) &&
1420 			(test_char_against_multibyte_class(string_wchar,
1421 			regexp, test_condition) == CONDITION_TRUE)) {
1422 			stringp += string_char_size;
1423 			string_char_size = get_wchar(&string_wchar, stringp);
1424 		    }
1425 		    regexp += (int)*regexp; /* add the class length to regexp */
1426 		    return (test_repeated_multibyte_char(repeat_startp, stringp,
1427 			regexp));
1428 		}
1429 		/* end case IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE */
1430 
1431 	    case IN_MULTIBYTE_CHAR_CLASS|COUNT: /* [...]{min_count,max_count} */
1432 	    case NOT_IN_MULTIBYTE_CHAR_CLASS|COUNT:
1433 
1434 		/*
1435 		 * encoded as	<IN_MULTIBYTE_CHAR_CLASS|COUNT>\
1436 		 *		<class_length><class ...><min_count><max_count>
1437 		 *	or	<NOT_IN_MULTIBYTE_CHAR_CLASS|COUNT>\
1438 		 *		<class_length><class ...><min_count><max_count>
1439 		 *
1440 		 * NOTE: <class_length> includes the <class_length> byte
1441 		 *	but not the <minimum_match_count> or
1442 		 *	<maximum_match_count> bytes
1443 		 */
1444 
1445 		if ((int)*regexp == (int)(IN_MULTIBYTE_CHAR_CLASS|COUNT)) {
1446 		    test_condition = IN_CLASS;
1447 		} else {
1448 		    test_condition = NOT_IN_CLASS;
1449 		}
1450 		regexp++; /* point to the <class_length> byte */
1451 
1452 		get_match_counts(&nmust_match, &nextra_matches_allowed,
1453 		    regexp + (int)*regexp);
1454 		string_char_size = get_wchar(&string_wchar, stringp);
1455 		while ((string_char_size > 0) &&
1456 		    (test_char_against_multibyte_class(string_wchar, regexp,
1457 		    test_condition) == CONDITION_TRUE) &&
1458 		    (nmust_match > 0)) {
1459 		    nmust_match--;
1460 		    stringp += string_char_size;
1461 		    string_char_size = get_wchar(&string_wchar, stringp);
1462 		}
1463 		if (nmust_match > 0) {
1464 		    return ((char *)0);
1465 		} else if (nextra_matches_allowed == UNLIMITED) {
1466 		    repeat_startp = stringp;
1467 		    while ((string_char_size > 0) &&
1468 			(test_char_against_multibyte_class(string_wchar,
1469 			regexp, test_condition) == CONDITION_TRUE)) {
1470 			stringp += string_char_size;
1471 			string_char_size = get_wchar(&string_wchar, stringp);
1472 		    }
1473 		    regexp += (int)*regexp + 2;
1474 		    return (test_repeated_multibyte_char(repeat_startp, stringp,
1475 			regexp));
1476 		} else {
1477 		    repeat_startp = stringp;
1478 		    while ((string_char_size > 0) &&
1479 			(test_char_against_multibyte_class(string_wchar,
1480 			regexp, test_condition) == CONDITION_TRUE) &&
1481 			(nextra_matches_allowed > 0)) {
1482 			nextra_matches_allowed--;
1483 			stringp += string_char_size;
1484 			string_char_size = get_wchar(&string_wchar, stringp);
1485 		    }
1486 		    regexp += (int)*regexp + 2;
1487 		    return (test_repeated_multibyte_char(repeat_startp, stringp,
1488 			regexp));
1489 		}
1490 		/* end case IN_MULTIBYTE_CHAR_CLASS|COUNT */
1491 
1492 	    case IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE:	/* [.....]* */
1493 	    case NOT_IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE:
1494 
1495 		/*
1496 		 * encoded as	<IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
1497 		 *		<class_length><class ...>
1498 		 *	or	<NOT_IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
1499 		 *		<class_length><class ...>
1500 		 *
1501 		 * NOTE: <class_length> includes the <class_length> byte
1502 		 */
1503 
1504 		if ((int)*regexp ==
1505 		    (int)(IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE)) {
1506 		    test_condition = IN_CLASS;
1507 		} else {
1508 		    test_condition = NOT_IN_CLASS;
1509 		}
1510 		regexp++; /* point to the <class_length> byte */
1511 
1512 		repeat_startp = stringp;
1513 		while ((*stringp != '\0') &&
1514 		    (test_char_against_old_ascii_class(*stringp, regexp,
1515 		    test_condition) == CONDITION_TRUE)) {
1516 		    stringp++;
1517 		}
1518 		regexp += (int)*regexp; /* add the class length to regexp */
1519 		return (test_repeated_ascii_char(repeat_startp, stringp,
1520 		    regexp));
1521 
1522 		/* end case IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE */
1523 
1524 	    case IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE:	/* [.....]+ */
1525 	    case NOT_IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE:
1526 
1527 		/*
1528 		 * encoded as	<IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE>\
1529 		 *		<class_length><class ...>
1530 		 *	or	<NOT_IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE>\
1531 		 *		<class_length><class ...>
1532 		 *
1533 		 * NOTE: <class length> includes the <class_length> byte
1534 		 */
1535 
1536 		if ((int)*regexp ==
1537 		    (int)(IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE)) {
1538 		    test_condition = IN_CLASS;
1539 		} else {
1540 		    test_condition = NOT_IN_CLASS;
1541 		}
1542 		regexp++; /* point to the <class_length> byte */
1543 
1544 		if ((*stringp == '\0') ||
1545 		    (test_char_against_old_ascii_class(*stringp, regexp,
1546 		    test_condition) != CONDITION_TRUE)) {
1547 		    return ((char *)0);
1548 		} else {
1549 		    stringp++;
1550 		    repeat_startp = stringp;
1551 		    while ((*stringp != '\0') &&
1552 			(test_char_against_old_ascii_class(*stringp, regexp,
1553 			test_condition) == CONDITION_TRUE)) {
1554 			stringp++;
1555 		    }
1556 		    regexp += (int)*regexp; /* add the class length to regexp */
1557 		    return (test_repeated_ascii_char(repeat_startp, stringp,
1558 			regexp));
1559 		}
1560 		/* end case IN_OLD_ASCII_CHAR_CLASS | ONE_OR_MORE */
1561 
1562 	    case IN_OLD_ASCII_CHAR_CLASS|COUNT:	/* [...]{min_count,max_count} */
1563 	    case NOT_IN_OLD_ASCII_CHAR_CLASS|COUNT:
1564 
1565 		/*
1566 		 * encoded as	<IN_OLD_ASCII_CHAR_CLASS|COUNT><class_length>\
1567 		 *		<class ...><minimum_match_count>\
1568 		 *		<maximum_match_count>
1569 		 *	or	<NOT_IN_OLD_ASCII_CHAR_CLASS|COUNT>\
1570 		 *		<class_length><class ...><minimum_match_count>\
1571 		 *		<maximum_match_count>
1572 		 *
1573 		 * NOTE: <class_length> includes the <class_length> byte
1574 		 *	but not the <minimum_match_count> or
1575 		 *	<maximum_match_count> bytes
1576 		 */
1577 
1578 		if ((int)*regexp == (int)(IN_OLD_ASCII_CHAR_CLASS|COUNT)) {
1579 		    test_condition = IN_CLASS;
1580 		} else {
1581 		    test_condition = NOT_IN_CLASS;
1582 		}
1583 		regexp++; /* point to the <class_length> byte */
1584 
1585 		get_match_counts(&nmust_match, &nextra_matches_allowed,
1586 		    regexp + (int)*regexp);
1587 		while ((*stringp != '\0') &&
1588 		    (test_char_against_old_ascii_class(*stringp, regexp,
1589 		    test_condition) == CONDITION_TRUE) &&
1590 		    (nmust_match > 0)) {
1591 		    nmust_match--;
1592 		    stringp++;
1593 		}
1594 		if (nmust_match > 0) {
1595 		    return ((char *)0);
1596 		} else if (nextra_matches_allowed == UNLIMITED) {
1597 		    repeat_startp = stringp;
1598 		    while ((*stringp != '\0') &&
1599 			(test_char_against_old_ascii_class(*stringp, regexp,
1600 			test_condition) == CONDITION_TRUE)) {
1601 			stringp++;
1602 		    }
1603 		    regexp += (int)*regexp + 2;
1604 		    return (test_repeated_ascii_char(repeat_startp, stringp,
1605 			regexp));
1606 		} else {
1607 		    repeat_startp = stringp;
1608 		    while ((*stringp != '\0') &&
1609 			(test_char_against_old_ascii_class(*stringp, regexp,
1610 			test_condition) == CONDITION_TRUE) &&
1611 			(nextra_matches_allowed > 0)) {
1612 			nextra_matches_allowed--;
1613 			stringp++;
1614 		    }
1615 		    regexp += (int)*regexp + 2;
1616 		    return (test_repeated_ascii_char(repeat_startp, stringp,
1617 			regexp));
1618 		}
1619 		/* end case IN_OLD_ASCII_CHAR_CLASS|COUNT */
1620 
1621 	    case ZERO_OR_MORE_GROUP:		/* (.....)* */
1622 	    case ZERO_OR_MORE_GROUP|ADD_256_TO_GROUP_LENGTH:
1623 	    case ZERO_OR_MORE_GROUP|ADD_512_TO_GROUP_LENGTH:
1624 	    case ZERO_OR_MORE_GROUP|ADD_768_TO_GROUP_LENGTH:
1625 
1626 		/*
1627 		 * encoded as	<ZERO_OR_MORE_GROUP|ADDED_LENGTH_BITS>\
1628 		 *		<group_length><compiled_regex...>\
1629 		 *		<END_GROUP|ZERO_OR_MORE><groupn>
1630 		 *
1631 		 * NOTE:
1632 		 *
1633 		 * group_length + (256 * ADDED_LENGTH_BITS) ==
1634 		 *	length_of(<compiled_regex...><END_GROUP|ZERO_OR_MORE>\
1635 		 *		<groupn>)
1636 		 *
1637 		 */
1638 
1639 		group_length =
1640 		    (((unsigned int)*regexp & ADDED_LENGTH_BITS) <<
1641 		    TIMES_256_SHIFT);
1642 		regexp++;
1643 		group_length += (unsigned int)*regexp;
1644 		regexp++;
1645 		repeat_startp = stringp;
1646 		test_stringp = test_string(stringp, regexp);
1647 		while (test_stringp != (char *)0) {
1648 		    if (push_stringp(stringp) == (char *)0)
1649 			return ((char *)0);
1650 		    stringp = test_stringp;
1651 		    test_stringp = test_string(stringp, regexp);
1652 		}
1653 		regexp += group_length;
1654 		return (test_repeated_group(repeat_startp, stringp, regexp));
1655 
1656 		/* end case ZERO_OR_MORE_GROUP */
1657 
1658 	    case END_GROUP|ZERO_OR_MORE:	/* (.....)* */
1659 
1660 		/* encoded as <END_GROUP|ZERO_OR_MORE> */
1661 
1662 		/* return from recursive call to test_string() */
1663 
1664 		return ((char *)stringp);
1665 
1666 		/* end case END_GROUP|ZERO_OR_MORE */
1667 
1668 	    case ONE_OR_MORE_GROUP:		/* (.....)+ */
1669 	    case ONE_OR_MORE_GROUP|ADD_256_TO_GROUP_LENGTH:
1670 	    case ONE_OR_MORE_GROUP|ADD_512_TO_GROUP_LENGTH:
1671 	    case ONE_OR_MORE_GROUP|ADD_768_TO_GROUP_LENGTH:
1672 
1673 		/*
1674 		 * encoded as	<ONE_OR_MORE_GROUP|ADDED_LENGTH_BITS>\
1675 		 *		<group_length><compiled_regex...>\
1676 		 *		<END_GROUP|ONE_OR_MORE><groupn>
1677 		 *
1678 		 * NOTE:
1679 		 *
1680 		 * group_length + (256 * ADDED_LENGTH_BITS) ==
1681 		 * 	length_of(<compiled_regex...><END_GROUP|ONE_OR_MORE>\
1682 		 *		<groupn>)
1683 		 */
1684 
1685 		group_length =
1686 		    (((unsigned int)*regexp & ADDED_LENGTH_BITS) <<
1687 		    TIMES_256_SHIFT);
1688 		regexp++;
1689 		group_length += (unsigned int)*regexp;
1690 		regexp++;
1691 		stringp = test_string(stringp, regexp);
1692 		if (stringp == (char *)0)
1693 		    return ((char *)0);
1694 		repeat_startp = stringp;
1695 		test_stringp = test_string(stringp, regexp);
1696 		while (test_stringp != (char *)0) {
1697 		    if (push_stringp(stringp) == (char *)0)
1698 			return ((char *)0);
1699 		    stringp = test_stringp;
1700 		    test_stringp = test_string(stringp, regexp);
1701 		}
1702 		regexp += group_length;
1703 		return (test_repeated_group(repeat_startp, stringp, regexp));
1704 
1705 		/* end case ONE_OR_MORE_GROUP */
1706 
1707 	    case END_GROUP|ONE_OR_MORE:		/* (.....)+ */
1708 
1709 		/* encoded as <END_GROUP|ONE_OR_MORE><groupn> */
1710 
1711 		/* return from recursive call to test_string() */
1712 
1713 		return ((char *)stringp);
1714 
1715 		/* end case END_GROUP|ONE_OR_MORE */
1716 
1717 	    case COUNTED_GROUP:		/* (.....){max_count,min_count} */
1718 	    case COUNTED_GROUP|ADD_256_TO_GROUP_LENGTH:
1719 	    case COUNTED_GROUP|ADD_512_TO_GROUP_LENGTH:
1720 	    case COUNTED_GROUP|ADD_768_TO_GROUP_LENGTH:
1721 
1722 		/*
1723 		 * encoded as	<COUNTED_GROUP|ADDED_LENGTH_BITS><group_length>\
1724 		 *		<compiled_regex...>\<END_GROUP|COUNT><groupn>\
1725 		 *		<minimum_match_count><maximum_match_count>
1726 		 *
1727 		 * NOTE:
1728 		 *
1729 		 * group_length + (256 * ADDED_LENGTH_BITS) ==
1730 		 *	length_of(<compiled_regex...><END_GROUP|COUNT><groupn>)
1731 		 *
1732 		 * but does not include the <minimum_match_count> or
1733 		 *	<maximum_match_count> bytes
1734 		 */
1735 
1736 		group_length =
1737 		    (((unsigned int)*regexp & ADDED_LENGTH_BITS) <<
1738 		    TIMES_256_SHIFT);
1739 		regexp++;
1740 		group_length += (unsigned int)*regexp;
1741 		regexp++;
1742 		get_match_counts(&nmust_match, &nextra_matches_allowed,
1743 		    regexp + group_length);
1744 		test_stringp = test_string(stringp, regexp);
1745 		while ((test_stringp != (char *)0) && (nmust_match > 0)) {
1746 		    stringp = test_stringp;
1747 		    nmust_match--;
1748 		    test_stringp = test_string(stringp, regexp);
1749 		}
1750 		if (nmust_match > 0) {
1751 		    return ((char *)0);
1752 		} else if (nextra_matches_allowed == UNLIMITED) {
1753 		    repeat_startp = stringp;
1754 		    while (test_stringp != (char *)0) {
1755 			if (push_stringp(stringp) == (char *)0)
1756 			    return ((char *)0);
1757 			stringp = test_stringp;
1758 			test_stringp = test_string(stringp, regexp);
1759 		}
1760 		    regexp += group_length + 2;
1761 		    return (test_repeated_group(repeat_startp, stringp,
1762 			regexp));
1763 		} else {
1764 		    repeat_startp = stringp;
1765 		    while ((test_stringp != (char *)0) &&
1766 			(nextra_matches_allowed > 0)) {
1767 			nextra_matches_allowed--;
1768 			if (push_stringp(stringp) == (char *)0)
1769 			    return ((char *)0);
1770 			stringp = test_stringp;
1771 			test_stringp = test_string(stringp, regexp);
1772 		}
1773 		    regexp += group_length + 2;
1774 		    return (test_repeated_group(repeat_startp, stringp,
1775 			regexp));
1776 		}
1777 		/* end case COUNTED_GROUP */
1778 
1779 	    case END_GROUP|COUNT:	/* (.....){max_count,min_count} */
1780 
1781 		/* encoded as <END_GROUP|COUNT> */
1782 
1783 		/* return from recursive call to test_string() */
1784 
1785 		return (stringp);
1786 
1787 		/* end case END_GROUP|COUNT */
1788 
1789 	    case END_OF_STRING_MARK:
1790 
1791 		/* encoded as <END_OF_STRING_MARK><END_REGEX> */
1792 
1793 		if (*stringp == '\0') {
1794 		    regexp++;
1795 		} else {
1796 		    return ((char *)0);
1797 		}
1798 		break; /* end case END_OF_STRING_MARK */
1799 
1800 	    case END_REGEX: /* end of the compiled regular expression */
1801 
1802 		/* encoded as <END_REGEX> */
1803 
1804 		return (stringp);
1805 
1806 		/* end case END_REGEX */
1807 
1808 	    default:
1809 
1810 		return ((char *)0);
1811 
1812 	    } /* end switch (*regexp) */
1813 
1814 	} /* end for (;;) */
1815 
1816 } /* test_string() */
1817