1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
29
30 /*
31 * IMPORTANT NOTE:
32 *
33 * regex() WORKS **ONLY** WITH THE ASCII AND THE Solaris EUC CHARACTER SETS.
34 * IT IS **NOT** CHARACTER SET INDEPENDENT.
35 *
36 */
37
38 #pragma weak _regex = regex
39
40 #include "lint.h"
41 /* CONSTANTS SHARED WITH regcmp() */
42 #include "regex.h"
43 #include "mtlib.h"
44 #include <limits.h>
45 #include <stdarg.h>
46 #include <stdlib.h>
47 #include <thread.h>
48 #include <widec.h>
49 #include "tsd.h"
50
51
52 /* PRIVATE CONSTANTS */
53
54 #define ADD_256_TO_GROUP_LENGTH 0x1
55 #define ADD_512_TO_GROUP_LENGTH 0x2
56 #define ADD_768_TO_GROUP_LENGTH 0x3
57 #define ADDED_LENGTH_BITS 0x3
58 #define SINGLE_BYTE_MASK 0xff
59 #define STRINGP_STACK_SIZE 50
60
61
62 /* PRIVATE TYPE DEFINITIONS */
63
64 typedef enum {
65 NOT_IN_CLASS = 0,
66 IN_CLASS
67 } char_test_condition_t;
68
69 typedef enum {
70 TESTING_CHAR = 0,
71 CONDITION_TRUE,
72 CONDITION_FALSE,
73 CHAR_TEST_ERROR
74 } char_test_result_t;
75
76
77 /* PRIVATE GLOBAL VARIABLES */
78
79 static mutex_t regex_lock = DEFAULTMUTEX;
80 static int return_arg_number[NSUBSTRINGS];
81 static const char *substring_endp[NSUBSTRINGS];
82 static const char *substring_startp[NSUBSTRINGS];
83 static const char *stringp_stack[STRINGP_STACK_SIZE];
84 static const char **stringp_stackp;
85
86
87 /* DECLARATIONS OF PRIVATE FUNCTIONS */
88
89 static int
90 get_wchar(wchar_t *wcharp,
91 const char *stringp);
92
93 static void
94 get_match_counts(int *nmust_matchp,
95 int *nextra_matches_allowedp,
96 const char *count_stringp);
97
98 static boolean_t
99 in_wchar_range(wchar_t test_char,
100 wchar_t lower_char,
101 wchar_t upper_char);
102
103 static const char *
104 pop_stringp(void);
105
106 static const char *
107 previous_charp(const char *current_charp);
108
109 static const char *
110 push_stringp(const char *stringp);
111
112 static char_test_result_t
113 test_char_against_ascii_class(char test_char,
114 const char *classp,
115 char_test_condition_t test_condition);
116
117 static char_test_result_t
118 test_char_against_multibyte_class(wchar_t test_char,
119 const char *classp,
120 char_test_condition_t test_condition);
121
122
123 /* FOR COMPATIBILITY WITH PREVIOUS ASCII VERSIONS OF regcmp() */
124
125 static char_test_result_t
126 test_char_against_old_ascii_class(char test_char,
127 const char *classp,
128 char_test_condition_t test_condition);
129
130 static const char *
131 test_repeated_ascii_char(const char *repeat_startp,
132 const char *stringp,
133 const char *regexp);
134
135 static const char *
136 test_repeated_multibyte_char(const char *repeat_startp,
137 const char *stringp,
138 const char *regexp);
139
140 static const char *
141 test_repeated_group(const char *repeat_startp,
142 const char *stringp,
143 const char *regexp);
144
145 static const char *
146 test_string(const char *stringp,
147 const char *regexp);
148
149
150 /* DEFINITIONS OF PUBLIC VARIABLES */
151
152 char *__loc1;
153
154 /*
155 * reserve thread-specific storage for __loc1
156 */
157 char **
____loc1(void)158 ____loc1(void)
159 {
160 if (thr_main())
161 return (&__loc1);
162 return ((char **)tsdalloc(_T_REGEX_LOC1, sizeof (char *), NULL));
163 }
164
165 #define __loc1 (*(____loc1()))
166
167 /* DEFINITION OF regex() */
168
169 extern char *
regex(const char * regexp,const char * stringp,...)170 regex(const char *regexp, const char *stringp, ...)
171 {
172 va_list arg_listp;
173 int char_size;
174 const char *end_of_matchp;
175 wchar_t regex_wchar;
176 char *return_argp[NSUBSTRINGS];
177 char *returned_substringp;
178 int substringn;
179 const char *substringp;
180 wchar_t string_wchar;
181
182 if (____loc1() == (char **)0) {
183 return ((char *)0);
184 } else {
185 lmutex_lock(®ex_lock);
186 __loc1 = (char *)0;
187 }
188
189 if ((stringp == (char *)0) || (regexp == (char *)0)) {
190 lmutex_unlock(®ex_lock);
191 return ((char *)0);
192 }
193
194
195 /* INITIALIZE SUBSTRINGS THAT MIGHT BE RETURNED IN VARARGS */
196
197 substringn = 0;
198 va_start(arg_listp, stringp);
199 while (substringn < NSUBSTRINGS) {
200 return_argp[substringn] = va_arg(arg_listp, char *);
201 substring_startp[substringn] = (char *)0;
202 return_arg_number[substringn] = -1;
203 substringn++;
204 }
205 va_end(arg_listp);
206
207
208 /* TEST THE STRING AGAINST THE REGULAR EXPRESSION */
209
210 end_of_matchp = (char *)0;
211 stringp_stackp = &stringp_stack[STRINGP_STACK_SIZE];
212
213 if ((int)*regexp == (int)START_OF_STRING_MARK) {
214
215 /*
216 * the match must start at the beginning of the string
217 */
218
219 __loc1 = (char *)stringp;
220 regexp++;
221 end_of_matchp = test_string(stringp, regexp);
222
223 } else if ((int)*regexp == (int)ASCII_CHAR) {
224
225 /*
226 * test a string against a regular expression
227 * that starts with a single ASCII character:
228 *
229 * move to each character in the string that matches
230 * the first character in the regular expression
231 * and test the remaining string
232 */
233
234 while ((*stringp != *(regexp + 1)) && (*stringp != '\0')) {
235 stringp++;
236 }
237 while ((end_of_matchp == (char *)0) && (*stringp != '\0')) {
238 end_of_matchp = test_string(stringp, regexp);
239 if (end_of_matchp != (char *)0) {
240 __loc1 = (char *)stringp;
241 } else {
242 stringp++;
243 while ((*stringp != *(regexp + 1)) && (*stringp != '\0')) {
244 stringp++;
245 }
246 }
247 }
248
249 } else if (!multibyte) {
250
251 /*
252 * if the value of the "multibyte" macro defined in <euc.h>
253 * is false, regex() is running in an ASCII locale;
254 * test an ASCII string against an ASCII regular expression
255 * that doesn't start with a single ASCII character:
256 *
257 * move forward in the string one byte at a time, testing
258 * the remaining string against the regular expression
259 */
260
261 end_of_matchp = test_string(stringp, regexp);
262 while ((end_of_matchp == (char *)0) && (*stringp != '\0')) {
263 stringp++;
264 end_of_matchp = test_string(stringp, regexp);
265 }
266 if (end_of_matchp != (char *)0) {
267 __loc1 = (char *)stringp;
268 }
269
270 } else if ((int)*regexp == (int)MULTIBYTE_CHAR) {
271
272 /*
273 * test a multibyte string against a multibyte regular expression
274 * that starts with a single multibyte character:
275 *
276 * move to each character in the string that matches
277 * the first character in the regular expression
278 * and test the remaining string
279 */
280
281 (void) get_wchar(®ex_wchar, regexp + 1);
282 char_size = get_wchar(&string_wchar, stringp);
283 while ((string_wchar != regex_wchar) && (char_size > 0)) {
284 stringp += char_size;
285 char_size = get_wchar(&string_wchar, stringp);
286 }
287 while ((end_of_matchp == (char *)0) && (char_size > 0)) {
288 end_of_matchp = test_string(stringp, regexp);
289 if (end_of_matchp != (char *)0) {
290 __loc1 = (char *)stringp;
291 } else {
292 stringp += char_size;
293 char_size = get_wchar(&string_wchar, stringp);
294 while ((string_wchar != regex_wchar) && (char_size > 0)) {
295 stringp += char_size;
296 char_size = get_wchar(&string_wchar, stringp);
297 }
298 }
299 }
300
301 } else {
302
303 /*
304 * test a multibyte string against a multibyte regular expression
305 * that doesn't start with a single multibyte character
306 *
307 * move forward in the string one multibyte character at a time,
308 * testing the remaining string against the regular expression
309 */
310
311 end_of_matchp = test_string(stringp, regexp);
312 char_size = get_wchar(&string_wchar, stringp);
313 while ((end_of_matchp == (char *)0) && (char_size > 0)) {
314 stringp += char_size;
315 end_of_matchp = test_string(stringp, regexp);
316 char_size = get_wchar(&string_wchar, stringp);
317 }
318 if (end_of_matchp != (char *)0) {
319 __loc1 = (char *)stringp;
320 }
321 }
322
323 /*
324 * Return substrings that matched subexpressions for which
325 * matching substrings are to be returned.
326 *
327 * NOTE:
328 *
329 * According to manual page regcmp(3C), regex() returns substrings
330 * that match subexpressions even when no substring matches the
331 * entire regular expression.
332 */
333
334 substringn = 0;
335 while (substringn < NSUBSTRINGS) {
336 substringp = substring_startp[substringn];
337 if ((substringp != (char *)0) &&
338 (return_arg_number[substringn] >= 0)) {
339 returned_substringp =
340 return_argp[return_arg_number[substringn]];
341 if (returned_substringp != (char *)0) {
342 while (substringp < substring_endp[substringn]) {
343 *returned_substringp = (char)*substringp;
344 returned_substringp++;
345 substringp++;
346 }
347 *returned_substringp = '\0';
348 }
349 }
350 substringn++;
351 }
352 lmutex_unlock(®ex_lock);
353 return ((char *)end_of_matchp);
354 } /* regex() */
355
356
357 /* DEFINITIONS OF PRIVATE FUNCTIONS */
358
359 static int
get_wchar(wchar_t * wcharp,const char * stringp)360 get_wchar(wchar_t *wcharp,
361 const char *stringp)
362 {
363 int char_size;
364
365 if (stringp == (char *)0) {
366 char_size = 0;
367 *wcharp = (wchar_t)((unsigned int)'\0');
368 } else if (*stringp == '\0') {
369 char_size = 0;
370 *wcharp = (wchar_t)((unsigned int)*stringp);
371 } else if ((unsigned char)*stringp <= (unsigned char)0x7f) {
372 char_size = 1;
373 *wcharp = (wchar_t)((unsigned int)*stringp);
374 } else {
375 char_size = mbtowc(wcharp, stringp, MB_LEN_MAX);
376 }
377 return (char_size);
378 }
379
380 static void
get_match_counts(int * nmust_matchp,int * nextra_matches_allowedp,const char * count_stringp)381 get_match_counts(int *nmust_matchp,
382 int *nextra_matches_allowedp,
383 const char *count_stringp)
384 {
385 int minimum_match_count;
386 int maximum_match_count;
387
388 minimum_match_count =
389 (int)((unsigned int)*count_stringp & SINGLE_BYTE_MASK);
390 *nmust_matchp = minimum_match_count;
391
392 count_stringp++;
393 maximum_match_count =
394 (int)((unsigned int)*count_stringp & SINGLE_BYTE_MASK);
395 if (maximum_match_count == (int)UNLIMITED) {
396 *nextra_matches_allowedp = (int)UNLIMITED;
397 } else {
398 *nextra_matches_allowedp =
399 maximum_match_count - minimum_match_count;
400 }
401 return;
402
403 } /* get_match_counts() */
404
405 static boolean_t
in_wchar_range(wchar_t test_char,wchar_t lower_char,wchar_t upper_char)406 in_wchar_range(wchar_t test_char,
407 wchar_t lower_char,
408 wchar_t upper_char)
409 {
410 return (((lower_char <= 0x7f) && (upper_char <= 0x7f) &&
411 (lower_char <= test_char) && (test_char <= upper_char)) ||
412 (((test_char & WCHAR_CSMASK) == (lower_char & WCHAR_CSMASK)) &&
413 ((test_char & WCHAR_CSMASK) == (upper_char & WCHAR_CSMASK)) &&
414 (lower_char <= test_char) && (test_char <= upper_char)));
415
416 } /* in_wchar_range() */
417
418 static const char *
pop_stringp(void)419 pop_stringp(void)
420 {
421 const char *stringp;
422
423 if (stringp_stackp >= &stringp_stack[STRINGP_STACK_SIZE]) {
424 return ((char *)0);
425 } else {
426 stringp = *stringp_stackp;
427 stringp_stackp++;
428 return (stringp);
429 }
430 }
431
432
433 static const char *
previous_charp(const char * current_charp)434 previous_charp(const char *current_charp)
435 {
436 /*
437 * returns the pointer to the previous character in
438 * a string of multibyte characters
439 */
440
441 const char *prev_cs0 = current_charp - 1;
442 const char *prev_cs1 = current_charp - eucw1;
443 const char *prev_cs2 = current_charp - eucw2 - 1;
444 const char *prev_cs3 = current_charp - eucw3 - 1;
445 const char *prev_charp;
446
447 if ((unsigned char)*prev_cs0 <= 0x7f) {
448 prev_charp = prev_cs0;
449 } else if ((unsigned char)*prev_cs2 == SS2) {
450 prev_charp = prev_cs2;
451 } else if ((unsigned char)*prev_cs3 == SS3) {
452 prev_charp = prev_cs3;
453 } else {
454 prev_charp = prev_cs1;
455 }
456 return (prev_charp);
457
458 } /* previous_charp() */
459
460 static const char *
push_stringp(const char * stringp)461 push_stringp(const char *stringp)
462 {
463 if (stringp_stackp <= &stringp_stack[0]) {
464 return ((char *)0);
465 } else {
466 stringp_stackp--;
467 *stringp_stackp = stringp;
468 return (stringp);
469 }
470 }
471
472
473 static char_test_result_t
test_char_against_ascii_class(char test_char,const char * classp,char_test_condition_t test_condition)474 test_char_against_ascii_class(char test_char,
475 const char *classp,
476 char_test_condition_t test_condition)
477 {
478 /*
479 * tests a character for membership in an ASCII character class compiled
480 * by the internationalized version of regcmp();
481 *
482 * NOTE: The internationalized version of regcmp() compiles
483 * the range a-z in an ASCII character class to aTHRUz.
484 */
485
486 int nbytes_to_check;
487
488 nbytes_to_check = (int)*classp;
489 classp++;
490 nbytes_to_check--;
491
492 while (nbytes_to_check > 0) {
493 if (test_char == *classp) {
494 if (test_condition == IN_CLASS)
495 return (CONDITION_TRUE);
496 else
497 return (CONDITION_FALSE);
498 } else if (*classp == THRU) {
499 if ((*(classp - 1) <= test_char) &&
500 (test_char <= *(classp + 1))) {
501 if (test_condition == IN_CLASS)
502 return (CONDITION_TRUE);
503 else
504 return (CONDITION_FALSE);
505 } else {
506 classp += 2;
507 nbytes_to_check -= 2;
508 }
509 } else {
510 classp++;
511 nbytes_to_check--;
512 }
513 }
514 if (test_condition == NOT_IN_CLASS) {
515 return (CONDITION_TRUE);
516 } else {
517 return (CONDITION_FALSE);
518 }
519 } /* test_char_against_ascii_class() */
520
521 static char_test_result_t
test_char_against_multibyte_class(wchar_t test_char,const char * classp,char_test_condition_t test_condition)522 test_char_against_multibyte_class(wchar_t test_char,
523 const char *classp,
524 char_test_condition_t test_condition)
525 {
526 /*
527 * tests a character for membership in a multibyte character class;
528 *
529 * NOTE: The range a-z in a multibyte character class compiles to
530 * aTHRUz.
531 */
532
533 int char_size;
534 wchar_t current_char;
535 int nbytes_to_check;
536 wchar_t previous_char;
537
538 nbytes_to_check = (int)*classp;
539 classp++;
540 nbytes_to_check--;
541
542 char_size = get_wchar(¤t_char, classp);
543 if (char_size <= 0) {
544 return (CHAR_TEST_ERROR);
545 } else if (test_char == current_char) {
546 if (test_condition == IN_CLASS) {
547 return (CONDITION_TRUE);
548 } else {
549 return (CONDITION_FALSE);
550 }
551 } else {
552 classp += char_size;
553 nbytes_to_check -= char_size;
554 }
555
556 while (nbytes_to_check > 0) {
557 previous_char = current_char;
558 char_size = get_wchar(¤t_char, classp);
559 if (char_size <= 0) {
560 return (CHAR_TEST_ERROR);
561 } else if (test_char == current_char) {
562 if (test_condition == IN_CLASS) {
563 return (CONDITION_TRUE);
564 } else {
565 return (CONDITION_FALSE);
566 }
567 } else if (current_char == THRU) {
568 classp += char_size;
569 nbytes_to_check -= char_size;
570 char_size = get_wchar(¤t_char, classp);
571 if (char_size <= 0) {
572 return (CHAR_TEST_ERROR);
573 } else if (in_wchar_range(test_char, previous_char,
574 current_char)) {
575 if (test_condition == IN_CLASS) {
576 return (CONDITION_TRUE);
577 } else {
578 return (CONDITION_FALSE);
579 }
580 } else {
581 classp += char_size;
582 nbytes_to_check -= char_size;
583 }
584 } else {
585 classp += char_size;
586 nbytes_to_check -= char_size;
587 }
588 }
589 if (test_condition == NOT_IN_CLASS) {
590 return (CONDITION_TRUE);
591 } else {
592 return (CONDITION_FALSE);
593 }
594 } /* test_char_against_multibyte_class() */
595
596
597 /* FOR COMPATIBILITY WITH PREVIOUS ASCII VERSIONS OF regcmp() */
598
599 static char_test_result_t
test_char_against_old_ascii_class(char test_char,const char * classp,char_test_condition_t test_condition)600 test_char_against_old_ascii_class(char test_char,
601 const char *classp,
602 char_test_condition_t test_condition)
603 {
604 /*
605 * tests a character for membership in an ASCII character class compiled
606 * by the ASCII version of regcmp();
607 *
608 * NOTE: ASCII versions of regcmp() compile the range a-z in an
609 * ASCII character class to THRUaz. The internationalized
610 * version compiles the same range to aTHRUz.
611 */
612
613 int nbytes_to_check;
614
615 nbytes_to_check = (int)*classp;
616 classp++;
617 nbytes_to_check--;
618
619 while (nbytes_to_check > 0) {
620 if (test_char == *classp) {
621 if (test_condition == IN_CLASS) {
622 return (CONDITION_TRUE);
623 } else {
624 return (CONDITION_FALSE);
625 }
626 } else if (*classp == THRU) {
627 if ((*(classp + 1) <= test_char) &&
628 (test_char <= *(classp + 2))) {
629 if (test_condition == IN_CLASS) {
630 return (CONDITION_TRUE);
631 } else {
632 return (CONDITION_FALSE);
633 }
634 } else {
635 classp += 3;
636 nbytes_to_check -= 3;
637 }
638 } else {
639 classp++;
640 nbytes_to_check--;
641 }
642 }
643 if (test_condition == NOT_IN_CLASS) {
644 return (CONDITION_TRUE);
645 } else {
646 return (CONDITION_FALSE);
647 }
648 } /* test_char_against_old_ascii_class() */
649
650 static const char *
test_repeated_ascii_char(const char * repeat_startp,const char * stringp,const char * regexp)651 test_repeated_ascii_char(const char *repeat_startp,
652 const char *stringp,
653 const char *regexp)
654 {
655 const char *end_of_matchp;
656
657 end_of_matchp = test_string(stringp, regexp);
658 while ((end_of_matchp == (char *)0) &&
659 (stringp > repeat_startp)) {
660 stringp--;
661 end_of_matchp = test_string(stringp, regexp);
662 }
663 return (end_of_matchp);
664 }
665
666 static const char *
test_repeated_multibyte_char(const char * repeat_startp,const char * stringp,const char * regexp)667 test_repeated_multibyte_char(const char *repeat_startp,
668 const char *stringp,
669 const char *regexp)
670 {
671 const char *end_of_matchp;
672
673 end_of_matchp = test_string(stringp, regexp);
674 while ((end_of_matchp == (char *)0) &&
675 (stringp > repeat_startp)) {
676 stringp = previous_charp(stringp);
677 end_of_matchp = test_string(stringp, regexp);
678 }
679 return (end_of_matchp);
680 }
681
682 static const char *
test_repeated_group(const char * repeat_startp,const char * stringp,const char * regexp)683 test_repeated_group(const char *repeat_startp,
684 const char *stringp,
685 const char *regexp)
686 {
687 const char *end_of_matchp;
688
689 end_of_matchp = test_string(stringp, regexp);
690 while ((end_of_matchp == (char *)0) &&
691 (stringp > repeat_startp)) {
692 stringp = pop_stringp();
693 if (stringp == (char *)0) {
694 return ((char *)0);
695 }
696 end_of_matchp = test_string(stringp, regexp);
697 }
698 return (end_of_matchp);
699 }
700
701 static const char *
test_string(const char * stringp,const char * regexp)702 test_string(const char *stringp,
703 const char *regexp)
704 {
705 /*
706 * returns a pointer to the first character following the first
707 * substring of the string addressed by stringp that matches
708 * the compiled regular expression addressed by regexp
709 */
710
711 unsigned int group_length;
712 int nextra_matches_allowed;
713 int nmust_match;
714 wchar_t regex_wchar;
715 int regex_char_size;
716 const char *repeat_startp;
717 unsigned int return_argn;
718 wchar_t string_wchar;
719 int string_char_size;
720 unsigned int substringn;
721 char_test_condition_t test_condition;
722 const char *test_stringp;
723
724 for (;;) {
725
726 /*
727 * Exit the loop via a return whenever there's a match
728 * or it's clear that there can be no match.
729 */
730
731 switch ((int)*regexp) {
732
733 /*
734 * No fall-through.
735 * Each case ends with either a return or with stringp
736 * addressing the next character to be tested and regexp
737 * addressing the next compiled regular expression
738 *
739 * NOTE: The comments for each case give the meaning
740 * of the compiled regular expression decoded by the case
741 * and the character string that the compiled regular
742 * expression uses to encode the case. Each single
743 * character encoded in the compiled regular expression
744 * is shown enclosed in angle brackets (<>). Each
745 * compiled regular expression begins with a marker
746 * character which is shown as a named constant
747 * (e.g. <ASCII_CHAR>). Character constants are shown
748 * enclosed in single quotes (e.g. <'$'>). All other
749 * single characters encoded in the compiled regular
750 * expression are shown as lower case variable names
751 * (e.g. <ascii_char> or <multibyte_char>). Multicharacter
752 * strings encoded in the compiled regular expression
753 * are shown as variable names followed by elipses
754 * (e.g. <compiled_regex...>).
755 */
756
757 case ASCII_CHAR: /* single ASCII char */
758
759 /* encoded as <ASCII_CHAR><ascii_char> */
760
761 regexp++;
762 if (*regexp == *stringp) {
763 regexp++;
764 stringp++;
765 } else {
766 return ((char *)0);
767 }
768 break; /* end case ASCII_CHAR */
769
770 case MULTIBYTE_CHAR: /* single multibyte char */
771
772 /* encoded as <MULTIBYTE_CHAR><multibyte_char> */
773
774 regexp++;
775 regex_char_size = get_wchar(®ex_wchar, regexp);
776 string_char_size = get_wchar(&string_wchar, stringp);
777 if ((string_char_size <= 0) || (string_wchar != regex_wchar)) {
778 return ((char *)0);
779 } else {
780 regexp += regex_char_size;
781 stringp += string_char_size;
782 }
783 break; /* end case MULTIBYTE_CHAR */
784
785 case ANY_CHAR: /* any single ASCII or multibyte char */
786
787 /* encoded as <ANY_CHAR> */
788
789 if (!multibyte) {
790 if (*stringp == '\0') {
791 return ((char *)0);
792 } else {
793 regexp++;
794 stringp++;
795 }
796 } else {
797 string_char_size = get_wchar(&string_wchar, stringp);
798 if (string_char_size <= 0) {
799 return ((char *)0);
800 } else {
801 regexp++;
802 stringp += string_char_size;
803 }
804 }
805 break; /* end case ANY_CHAR */
806
807 case IN_ASCII_CHAR_CLASS: /* [.....] */
808 case NOT_IN_ASCII_CHAR_CLASS:
809
810 /*
811 * encoded as <IN_ASCII_CHAR_CLASS><class_length><class...>
812 * or <NOT_IN_ASCII_CHAR_CLASS><class_length><class...>
813 *
814 * NOTE: <class_length> includes the <class_length> byte
815 */
816
817 if ((int)*regexp == (int)IN_ASCII_CHAR_CLASS) {
818 test_condition = IN_CLASS;
819 } else {
820 test_condition = NOT_IN_CLASS;
821 }
822 regexp++; /* point to the <class_length> byte */
823
824 if ((*stringp != '\0') &&
825 (test_char_against_ascii_class(*stringp, regexp,
826 test_condition) == CONDITION_TRUE)) {
827 regexp += (int)*regexp; /* add the class length to regexp */
828 stringp++;
829 } else {
830 return ((char *)0);
831 }
832 break; /* end case IN_ASCII_CHAR_CLASS */
833
834 case IN_MULTIBYTE_CHAR_CLASS: /* [....] */
835 case NOT_IN_MULTIBYTE_CHAR_CLASS:
836
837 /*
838 * encoded as <IN_MULTIBYTE_CHAR_CLASS><class_length><class...>
839 * or <NOT_IN_MULTIBYTE_CHAR_CLASS><class_length><class...>
840 *
841 * NOTE: <class_length> includes the <class_length> byte
842 */
843
844 if ((int)*regexp == (int)IN_MULTIBYTE_CHAR_CLASS) {
845 test_condition = IN_CLASS;
846 } else {
847 test_condition = NOT_IN_CLASS;
848 }
849 regexp++; /* point to the <class_length> byte */
850
851 string_char_size = get_wchar(&string_wchar, stringp);
852 if ((string_char_size > 0) &&
853 (test_char_against_multibyte_class(string_wchar, regexp,
854 test_condition) == CONDITION_TRUE)) {
855 regexp += (int)*regexp; /* add the class length to regexp */
856 stringp += string_char_size;
857 } else {
858 return ((char *)0);
859 }
860 break; /* end case IN_MULTIBYTE_CHAR_CLASS */
861
862 case IN_OLD_ASCII_CHAR_CLASS: /* [...] */
863 case NOT_IN_OLD_ASCII_CHAR_CLASS:
864
865 /*
866 * encoded as <IN_OLD_ASCII_CHAR_CLASS><class_length><class...>
867 * or <NOT_IN_OLD_ASCII_CHAR_CLASS><class_length><class...>
868 *
869 * NOTE: <class_length> includes the <class_length> byte
870 */
871
872 if ((int)*regexp == (int)IN_OLD_ASCII_CHAR_CLASS) {
873 test_condition = IN_CLASS;
874 } else {
875 test_condition = NOT_IN_CLASS;
876 }
877 regexp++; /* point to the <class_length> byte */
878
879 if ((*stringp != '\0') &&
880 (test_char_against_old_ascii_class(*stringp, regexp,
881 test_condition) == CONDITION_TRUE)) {
882 regexp += (int)*regexp; /* add the class length to regexp */
883 stringp++;
884 } else {
885 return ((char *)0);
886 }
887 break; /* end case [NOT_]IN_OLD_ASCII_CHAR_CLASS */
888
889 case SIMPLE_GROUP: /* (.....) */
890
891 /* encoded as <SIMPLE_GROUP><group_length> */
892
893 regexp += 2;
894 break; /* end case SIMPLE_GROUP */
895
896 case END_GROUP: /* (.....) */
897
898 /* encoded as <END_GROUP><groupn> */
899
900 regexp += 2;
901 break; /* end case END_GROUP */
902
903 case SAVED_GROUP: /* (.....)$0-9 */
904
905 /* encoded as <SAVED_GROUP><substringn> */
906
907 regexp++;
908 substringn = (unsigned int)*regexp;
909 if (substringn >= NSUBSTRINGS)
910 return ((char *)0);
911 substring_startp[substringn] = stringp;
912 regexp++;
913 break; /* end case SAVED_GROUP */
914
915 case END_SAVED_GROUP: /* (.....)$0-9 */
916
917 /*
918 * encoded as <END_SAVED_GROUP><substringn>\
919 * <return_arg_number[substringn]>
920 */
921
922 regexp++;
923 substringn = (unsigned int)*regexp;
924 if (substringn >= NSUBSTRINGS)
925 return ((char *)0);
926 substring_endp[substringn] = stringp;
927 regexp++;
928 return_argn = (unsigned int)*regexp;
929 if (return_argn >= NSUBSTRINGS)
930 return ((char *)0);
931 return_arg_number[substringn] = return_argn;
932 regexp++;
933 break; /* end case END_SAVED_GROUP */
934
935 case ASCII_CHAR|ZERO_OR_MORE: /* char* */
936
937 /* encoded as <ASCII_CHAR|ZERO_OR_MORE><ascii_char> */
938
939 regexp++;
940 repeat_startp = stringp;
941 while (*stringp == *regexp) {
942 stringp++;
943 }
944 regexp++;
945 return (test_repeated_ascii_char(repeat_startp,
946 stringp, regexp));
947
948 /* end case ASCII_CHAR|ZERO_OR_MORE */
949
950 case ASCII_CHAR|ONE_OR_MORE: /* char+ */
951
952 /* encoded as <ASCII_CHAR|ONE_OR_MORE><ascii_char> */
953
954 regexp++;
955 if (*stringp != *regexp) {
956 return ((char *)0);
957 } else {
958 stringp++;
959 repeat_startp = stringp;
960 while (*stringp == *regexp) {
961 stringp++;
962 }
963 regexp++;
964 return (test_repeated_ascii_char(repeat_startp, stringp,
965 regexp));
966 }
967 /* end case ASCII_CHAR|ONE_OR_MORE */
968
969 case ASCII_CHAR|COUNT: /* char{min_count,max_count} */
970
971 /*
972 * encoded as <ASCII_CHAR|COUNT><ascii_char>\
973 * <minimum_match_count><maximum_match_count>
974 */
975
976 regexp++;
977 get_match_counts(&nmust_match, &nextra_matches_allowed,
978 regexp + 1);
979 while ((*stringp == *regexp) && (nmust_match > 0)) {
980 nmust_match--;
981 stringp++;
982 }
983 if (nmust_match > 0) {
984 return ((char *)0);
985 } else if (nextra_matches_allowed == UNLIMITED) {
986 repeat_startp = stringp;
987 while (*stringp == *regexp) {
988 stringp++;
989 }
990 regexp += 3;
991 return (test_repeated_ascii_char(repeat_startp, stringp,
992 regexp));
993 } else {
994 repeat_startp = stringp;
995 while ((*stringp == *regexp) &&
996 (nextra_matches_allowed > 0)) {
997 nextra_matches_allowed--;
998 stringp++;
999 }
1000 regexp += 3;
1001 return (test_repeated_ascii_char(repeat_startp, stringp,
1002 regexp));
1003 }
1004 /* end case ASCII_CHAR|COUNT */
1005
1006 case MULTIBYTE_CHAR|ZERO_OR_MORE: /* char* */
1007
1008 /* encoded as <MULTIBYTE_CHAR|ZERO_OR_MORE><multibyte_char> */
1009
1010 regexp++;
1011 regex_char_size = get_wchar(®ex_wchar, regexp);
1012 repeat_startp = stringp;
1013 string_char_size = get_wchar(&string_wchar, stringp);
1014 while ((string_char_size > 0) &&
1015 (string_wchar == regex_wchar)) {
1016 stringp += string_char_size;
1017 string_char_size = get_wchar(&string_wchar, stringp);
1018 }
1019 regexp += regex_char_size;
1020 return (test_repeated_multibyte_char(repeat_startp, stringp,
1021 regexp));
1022
1023 /* end case MULTIBYTE_CHAR|ZERO_OR_MORE */
1024
1025 case MULTIBYTE_CHAR|ONE_OR_MORE: /* char+ */
1026
1027 /* encoded as <MULTIBYTE_CHAR|ONE_OR_MORE><multibyte_char> */
1028
1029 regexp++;
1030 regex_char_size = get_wchar(®ex_wchar, regexp);
1031 string_char_size = get_wchar(&string_wchar, stringp);
1032 if ((string_char_size <= 0) || (string_wchar != regex_wchar)) {
1033 return ((char *)0);
1034 } else {
1035 stringp += string_char_size;
1036 repeat_startp = stringp;
1037 string_char_size = get_wchar(&string_wchar, stringp);
1038 while ((string_char_size > 0) &&
1039 (string_wchar == regex_wchar)) {
1040 stringp += string_char_size;
1041 string_char_size = get_wchar(&string_wchar, stringp);
1042 }
1043 regexp += regex_char_size;
1044 return (test_repeated_multibyte_char(repeat_startp, stringp,
1045 regexp));
1046 }
1047 /* end case MULTIBYTE_CHAR|ONE_OR_MORE */
1048
1049 case MULTIBYTE_CHAR|COUNT: /* char{min_count,max_count} */
1050
1051 /*
1052 * encoded as <MULTIBYTE_CHAR|COUNT><multibyte_char>\
1053 * <minimum_match_count><maximum_match_count>
1054 */
1055
1056 regexp++;
1057 regex_char_size = get_wchar(®ex_wchar, regexp);
1058 get_match_counts(&nmust_match, &nextra_matches_allowed,
1059 regexp + regex_char_size);
1060 string_char_size = get_wchar(&string_wchar, stringp);
1061 while ((string_char_size > 0) &&
1062 (string_wchar == regex_wchar) &&
1063 (nmust_match > 0)) {
1064
1065 nmust_match--;
1066 stringp += string_char_size;
1067 string_char_size = get_wchar(&string_wchar, stringp);
1068 }
1069 if (nmust_match > 0) {
1070 return ((char *)0);
1071 } else if (nextra_matches_allowed == UNLIMITED) {
1072 repeat_startp = stringp;
1073 while ((string_char_size > 0) &&
1074 (string_wchar == regex_wchar)) {
1075 stringp += string_char_size;
1076 string_char_size = get_wchar(&string_wchar, stringp);
1077 }
1078 regexp += regex_char_size + 2;
1079 return (test_repeated_multibyte_char(repeat_startp, stringp,
1080 regexp));
1081 } else {
1082 repeat_startp = stringp;
1083 while ((string_char_size > 0) &&
1084 (string_wchar == regex_wchar) &&
1085 (nextra_matches_allowed > 0)) {
1086 nextra_matches_allowed--;
1087 stringp += string_char_size;
1088 string_char_size = get_wchar(&string_wchar, stringp);
1089 }
1090 regexp += regex_char_size + 2;
1091 return (test_repeated_multibyte_char(repeat_startp, stringp,
1092 regexp));
1093 }
1094 /* end case MULTIBYTE_CHAR|COUNT */
1095
1096 case ANY_CHAR|ZERO_OR_MORE: /* .* */
1097
1098 /* encoded as <ANY_CHAR|ZERO_OR_MORE> */
1099
1100 repeat_startp = stringp;
1101 if (!multibyte) {
1102 while (*stringp != '\0') {
1103 stringp++;
1104 }
1105 regexp++;
1106 return (test_repeated_ascii_char(repeat_startp, stringp,
1107 regexp));
1108 } else {
1109 string_char_size = get_wchar(&string_wchar, stringp);
1110 while (string_char_size > 0) {
1111 stringp += string_char_size;
1112 string_char_size = get_wchar(&string_wchar, stringp);
1113 }
1114 regexp++;
1115 return (test_repeated_multibyte_char(repeat_startp, stringp,
1116 regexp));
1117 }
1118 /* end case <ANY_CHAR|ZERO_OR_MORE> */
1119
1120 case ANY_CHAR|ONE_OR_MORE: /* .+ */
1121
1122 /* encoded as <ANY_CHAR|ONE_OR_MORE> */
1123
1124 if (!multibyte) {
1125 if (*stringp == '\0') {
1126 return ((char *)0);
1127 } else {
1128 stringp++;
1129 repeat_startp = stringp;
1130 while (*stringp != '\0') {
1131 stringp++;
1132 }
1133 regexp++;
1134 return (test_repeated_ascii_char(repeat_startp, stringp,
1135 regexp));
1136 }
1137 } else {
1138 string_char_size = get_wchar(&string_wchar, stringp);
1139 if (string_char_size <= 0) {
1140 return ((char *)0);
1141 } else {
1142 stringp += string_char_size;
1143 repeat_startp = stringp;
1144 string_char_size = get_wchar(&string_wchar, stringp);
1145 while (string_char_size > 0) {
1146 stringp += string_char_size;
1147 string_char_size =
1148 get_wchar(&string_wchar, stringp);
1149 }
1150 regexp++;
1151 return (test_repeated_multibyte_char(repeat_startp,
1152 stringp, regexp));
1153 }
1154 }
1155 /* end case <ANY_CHAR|ONE_OR_MORE> */
1156
1157 case ANY_CHAR|COUNT: /* .{min_count,max_count} */
1158
1159 /*
1160 * encoded as <ANY_CHAR|COUNT>\
1161 * <minimum_match_count><maximum_match_count>
1162 */
1163
1164 get_match_counts(&nmust_match, &nextra_matches_allowed,
1165 regexp + 1);
1166 if (!multibyte) {
1167 while ((*stringp != '\0') && (nmust_match > 0)) {
1168 nmust_match--;
1169 stringp++;
1170 }
1171 if (nmust_match > 0) {
1172 return ((char *)0);
1173 } else if (nextra_matches_allowed == UNLIMITED) {
1174 repeat_startp = stringp;
1175 while (*stringp != '\0') {
1176 stringp++;
1177 }
1178 regexp += 3;
1179 return (test_repeated_ascii_char(repeat_startp, stringp,
1180 regexp));
1181 } else {
1182 repeat_startp = stringp;
1183 while ((*stringp != '\0') &&
1184 (nextra_matches_allowed > 0)) {
1185 nextra_matches_allowed--;
1186 stringp++;
1187 }
1188 regexp += 3;
1189 return (test_repeated_ascii_char(repeat_startp, stringp,
1190 regexp));
1191 }
1192 } else { /* multibyte character */
1193
1194 string_char_size = get_wchar(&string_wchar, stringp);
1195 while ((string_char_size > 0) && (nmust_match > 0)) {
1196 nmust_match--;
1197 stringp += string_char_size;
1198 string_char_size = get_wchar(&string_wchar, stringp);
1199 }
1200 if (nmust_match > 0) {
1201 return ((char *)0);
1202 } else if (nextra_matches_allowed == UNLIMITED) {
1203 repeat_startp = stringp;
1204 while (string_char_size > 0) {
1205 stringp += string_char_size;
1206 string_char_size =
1207 get_wchar(&string_wchar, stringp);
1208 }
1209 regexp += 3;
1210 return (test_repeated_multibyte_char(repeat_startp,
1211 stringp, regexp));
1212 } else {
1213 repeat_startp = stringp;
1214 while ((string_char_size > 0) &&
1215 (nextra_matches_allowed > 0)) {
1216 nextra_matches_allowed--;
1217 stringp += string_char_size;
1218 string_char_size =
1219 get_wchar(&string_wchar, stringp);
1220 }
1221 regexp += 3;
1222 return (test_repeated_multibyte_char(repeat_startp,
1223 stringp, regexp));
1224 }
1225 } /* end case ANY_CHAR|COUNT */
1226
1227 case IN_ASCII_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */
1228 case NOT_IN_ASCII_CHAR_CLASS|ZERO_OR_MORE:
1229
1230 /*
1231 * encoded as <IN_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
1232 * <class_length><class ...>
1233 * or <NOT_IN_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
1234 * <class_length><class ...>
1235 *
1236 * NOTE: <class_length> includes the <class_length> byte
1237 */
1238
1239 if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|ZERO_OR_MORE)) {
1240 test_condition = IN_CLASS;
1241 } else {
1242 test_condition = NOT_IN_CLASS;
1243 }
1244 regexp++; /* point to the <class_length> byte */
1245
1246 repeat_startp = stringp;
1247 while ((*stringp != '\0') &&
1248 (test_char_against_ascii_class(*stringp, regexp,
1249 test_condition) == CONDITION_TRUE)) {
1250 stringp++;
1251 }
1252 regexp += (int)*regexp; /* add the class length to regexp */
1253 return (test_repeated_ascii_char(repeat_startp, stringp,
1254 regexp));
1255
1256 /* end case IN_ASCII_CHAR_CLASS|ZERO_OR_MORE */
1257
1258 case IN_ASCII_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */
1259 case NOT_IN_ASCII_CHAR_CLASS|ONE_OR_MORE:
1260
1261 /*
1262 * encoded as <IN_ASCII_CHAR_CLASS|ONE_OR_MORE>\
1263 * <class_length><class ...>
1264 * or <NOT_IN_ASCII_CHAR_CLASS|ONE_OR_MORE>\
1265 * <class_length><class ...>
1266 *
1267 * NOTE: <class_length> includes the <class_length> byte
1268 */
1269
1270 if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|ONE_OR_MORE)) {
1271 test_condition = IN_CLASS;
1272 } else {
1273 test_condition = NOT_IN_CLASS;
1274 }
1275 regexp++; /* point to the <class_length> byte */
1276
1277 if ((*stringp == '\0') ||
1278 (test_char_against_ascii_class(*stringp, regexp,
1279 test_condition) != CONDITION_TRUE)) {
1280 return ((char *)0);
1281 } else {
1282 stringp++;
1283 repeat_startp = stringp;
1284 while ((*stringp != '\0') &&
1285 (test_char_against_ascii_class(*stringp, regexp,
1286 test_condition) == CONDITION_TRUE)) {
1287 stringp++;
1288 }
1289 regexp += (int)*regexp; /* add the class length to regexp */
1290 return (test_repeated_ascii_char(repeat_startp, stringp,
1291 regexp));
1292 }
1293 /* end case IN_ASCII_CHAR_CLASS|ONE_OR_MORE */
1294
1295 case IN_ASCII_CHAR_CLASS | COUNT: /* [.....]{max_count,min_count} */
1296 case NOT_IN_ASCII_CHAR_CLASS | COUNT:
1297
1298 /*
1299 * endoded as <IN_ASCII_CHAR_CLASS|COUNT><class_length>\
1300 * <class ...><minimum_match_count>\
1301 * <maximum_match_count>
1302 * or <NOT_IN_ASCII_CHAR_CLASS|COUNT><class_length>\
1303 * <class ...><minimum_match_count>\
1304 * <maximum_match_count>
1305 *
1306 * NOTE: <class_length> includes the <class_length> byte,
1307 * but not the <minimum_match_count> or
1308 * <maximum_match_count> bytes
1309 */
1310
1311 if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|COUNT)) {
1312 test_condition = IN_CLASS;
1313 } else {
1314 test_condition = NOT_IN_CLASS;
1315 }
1316 regexp++; /* point to the <class_length> byte */
1317
1318 get_match_counts(&nmust_match, &nextra_matches_allowed,
1319 regexp + (int)*regexp);
1320 while ((*stringp != '\0') &&
1321 (test_char_against_ascii_class(*stringp, regexp,
1322 test_condition) == CONDITION_TRUE) &&
1323 (nmust_match > 0)) {
1324 nmust_match--;
1325 stringp++;
1326 }
1327 if (nmust_match > 0) {
1328 return ((char *)0);
1329 } else if (nextra_matches_allowed == UNLIMITED) {
1330 repeat_startp = stringp;
1331 while ((*stringp != '\0') &&
1332 (test_char_against_ascii_class(*stringp, regexp,
1333 test_condition) == CONDITION_TRUE)) {
1334 stringp++;
1335 }
1336 regexp += (int)*regexp + 2;
1337 return (test_repeated_ascii_char(repeat_startp, stringp,
1338 regexp));
1339 } else {
1340 repeat_startp = stringp;
1341 while ((*stringp != '\0') &&
1342 (test_char_against_ascii_class(*stringp, regexp,
1343 test_condition) == CONDITION_TRUE) &&
1344 (nextra_matches_allowed > 0)) {
1345 nextra_matches_allowed--;
1346 stringp++;
1347 }
1348 regexp += (int)*regexp + 2;
1349 return (test_repeated_ascii_char(repeat_startp, stringp,
1350 regexp));
1351 }
1352 /* end case IN_ASCII_CHAR_CLASS|COUNT */
1353
1354 case IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */
1355 case NOT_IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE:
1356
1357 /*
1358 * encoded as <IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE>\
1359 * <class_length><class ...>
1360 * or <NOT_IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE>\
1361 * <class_length><class ...>
1362 *
1363 * NOTE: <class_length> includes the <class_length> byte
1364 */
1365
1366 if ((int)*regexp ==
1367 (int)(IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE)) {
1368 test_condition = IN_CLASS;
1369 } else {
1370 test_condition = NOT_IN_CLASS;
1371 }
1372 regexp++; /* point to the <class_length> byte */
1373
1374 repeat_startp = stringp;
1375 string_char_size = get_wchar(&string_wchar, stringp);
1376 while ((string_char_size > 0) &&
1377 (test_char_against_multibyte_class(string_wchar, regexp,
1378 test_condition) == CONDITION_TRUE)) {
1379 stringp += string_char_size;
1380 string_char_size = get_wchar(&string_wchar, stringp);
1381 }
1382 regexp += (int)*regexp; /* add the class length to regexp */
1383 return (test_repeated_multibyte_char(repeat_startp, stringp,
1384 regexp));
1385
1386 /* end case IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE */
1387
1388 case IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */
1389 case NOT_IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE:
1390
1391 /*
1392 * encoded as <IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE>\
1393 * <class_length><class ...>
1394 * or <NOT_IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE>\
1395 * <class_length><class ...>
1396 *
1397 * NOTE: <class_length> includes the <class_length> byte
1398 */
1399
1400 if ((int)*regexp ==
1401 (int)(IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE)) {
1402 test_condition = IN_CLASS;
1403 } else {
1404 test_condition = NOT_IN_CLASS;
1405 }
1406 regexp++; /* point to the <class_length> byte */
1407
1408 string_char_size = get_wchar(&string_wchar, stringp);
1409 if ((string_char_size <= 0) ||
1410 (test_char_against_multibyte_class(string_wchar, regexp,
1411 test_condition) != CONDITION_TRUE)) {
1412 return ((char *)0);
1413 } else {
1414 stringp += string_char_size;
1415 repeat_startp = stringp;
1416 string_char_size = get_wchar(&string_wchar, stringp);
1417 while ((string_char_size > 0) &&
1418 (test_char_against_multibyte_class(string_wchar,
1419 regexp, test_condition) == CONDITION_TRUE)) {
1420 stringp += string_char_size;
1421 string_char_size = get_wchar(&string_wchar, stringp);
1422 }
1423 regexp += (int)*regexp; /* add the class length to regexp */
1424 return (test_repeated_multibyte_char(repeat_startp, stringp,
1425 regexp));
1426 }
1427 /* end case IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE */
1428
1429 case IN_MULTIBYTE_CHAR_CLASS|COUNT: /* [...]{min_count,max_count} */
1430 case NOT_IN_MULTIBYTE_CHAR_CLASS|COUNT:
1431
1432 /*
1433 * encoded as <IN_MULTIBYTE_CHAR_CLASS|COUNT>\
1434 * <class_length><class ...><min_count><max_count>
1435 * or <NOT_IN_MULTIBYTE_CHAR_CLASS|COUNT>\
1436 * <class_length><class ...><min_count><max_count>
1437 *
1438 * NOTE: <class_length> includes the <class_length> byte
1439 * but not the <minimum_match_count> or
1440 * <maximum_match_count> bytes
1441 */
1442
1443 if ((int)*regexp == (int)(IN_MULTIBYTE_CHAR_CLASS|COUNT)) {
1444 test_condition = IN_CLASS;
1445 } else {
1446 test_condition = NOT_IN_CLASS;
1447 }
1448 regexp++; /* point to the <class_length> byte */
1449
1450 get_match_counts(&nmust_match, &nextra_matches_allowed,
1451 regexp + (int)*regexp);
1452 string_char_size = get_wchar(&string_wchar, stringp);
1453 while ((string_char_size > 0) &&
1454 (test_char_against_multibyte_class(string_wchar, regexp,
1455 test_condition) == CONDITION_TRUE) &&
1456 (nmust_match > 0)) {
1457 nmust_match--;
1458 stringp += string_char_size;
1459 string_char_size = get_wchar(&string_wchar, stringp);
1460 }
1461 if (nmust_match > 0) {
1462 return ((char *)0);
1463 } else if (nextra_matches_allowed == UNLIMITED) {
1464 repeat_startp = stringp;
1465 while ((string_char_size > 0) &&
1466 (test_char_against_multibyte_class(string_wchar,
1467 regexp, test_condition) == CONDITION_TRUE)) {
1468 stringp += string_char_size;
1469 string_char_size = get_wchar(&string_wchar, stringp);
1470 }
1471 regexp += (int)*regexp + 2;
1472 return (test_repeated_multibyte_char(repeat_startp, stringp,
1473 regexp));
1474 } else {
1475 repeat_startp = stringp;
1476 while ((string_char_size > 0) &&
1477 (test_char_against_multibyte_class(string_wchar,
1478 regexp, test_condition) == CONDITION_TRUE) &&
1479 (nextra_matches_allowed > 0)) {
1480 nextra_matches_allowed--;
1481 stringp += string_char_size;
1482 string_char_size = get_wchar(&string_wchar, stringp);
1483 }
1484 regexp += (int)*regexp + 2;
1485 return (test_repeated_multibyte_char(repeat_startp, stringp,
1486 regexp));
1487 }
1488 /* end case IN_MULTIBYTE_CHAR_CLASS|COUNT */
1489
1490 case IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */
1491 case NOT_IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE:
1492
1493 /*
1494 * encoded as <IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
1495 * <class_length><class ...>
1496 * or <NOT_IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
1497 * <class_length><class ...>
1498 *
1499 * NOTE: <class_length> includes the <class_length> byte
1500 */
1501
1502 if ((int)*regexp ==
1503 (int)(IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE)) {
1504 test_condition = IN_CLASS;
1505 } else {
1506 test_condition = NOT_IN_CLASS;
1507 }
1508 regexp++; /* point to the <class_length> byte */
1509
1510 repeat_startp = stringp;
1511 while ((*stringp != '\0') &&
1512 (test_char_against_old_ascii_class(*stringp, regexp,
1513 test_condition) == CONDITION_TRUE)) {
1514 stringp++;
1515 }
1516 regexp += (int)*regexp; /* add the class length to regexp */
1517 return (test_repeated_ascii_char(repeat_startp, stringp,
1518 regexp));
1519
1520 /* end case IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE */
1521
1522 case IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */
1523 case NOT_IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE:
1524
1525 /*
1526 * encoded as <IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE>\
1527 * <class_length><class ...>
1528 * or <NOT_IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE>\
1529 * <class_length><class ...>
1530 *
1531 * NOTE: <class length> includes the <class_length> byte
1532 */
1533
1534 if ((int)*regexp ==
1535 (int)(IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE)) {
1536 test_condition = IN_CLASS;
1537 } else {
1538 test_condition = NOT_IN_CLASS;
1539 }
1540 regexp++; /* point to the <class_length> byte */
1541
1542 if ((*stringp == '\0') ||
1543 (test_char_against_old_ascii_class(*stringp, regexp,
1544 test_condition) != CONDITION_TRUE)) {
1545 return ((char *)0);
1546 } else {
1547 stringp++;
1548 repeat_startp = stringp;
1549 while ((*stringp != '\0') &&
1550 (test_char_against_old_ascii_class(*stringp, regexp,
1551 test_condition) == CONDITION_TRUE)) {
1552 stringp++;
1553 }
1554 regexp += (int)*regexp; /* add the class length to regexp */
1555 return (test_repeated_ascii_char(repeat_startp, stringp,
1556 regexp));
1557 }
1558 /* end case IN_OLD_ASCII_CHAR_CLASS | ONE_OR_MORE */
1559
1560 case IN_OLD_ASCII_CHAR_CLASS|COUNT: /* [...]{min_count,max_count} */
1561 case NOT_IN_OLD_ASCII_CHAR_CLASS|COUNT:
1562
1563 /*
1564 * encoded as <IN_OLD_ASCII_CHAR_CLASS|COUNT><class_length>\
1565 * <class ...><minimum_match_count>\
1566 * <maximum_match_count>
1567 * or <NOT_IN_OLD_ASCII_CHAR_CLASS|COUNT>\
1568 * <class_length><class ...><minimum_match_count>\
1569 * <maximum_match_count>
1570 *
1571 * NOTE: <class_length> includes the <class_length> byte
1572 * but not the <minimum_match_count> or
1573 * <maximum_match_count> bytes
1574 */
1575
1576 if ((int)*regexp == (int)(IN_OLD_ASCII_CHAR_CLASS|COUNT)) {
1577 test_condition = IN_CLASS;
1578 } else {
1579 test_condition = NOT_IN_CLASS;
1580 }
1581 regexp++; /* point to the <class_length> byte */
1582
1583 get_match_counts(&nmust_match, &nextra_matches_allowed,
1584 regexp + (int)*regexp);
1585 while ((*stringp != '\0') &&
1586 (test_char_against_old_ascii_class(*stringp, regexp,
1587 test_condition) == CONDITION_TRUE) &&
1588 (nmust_match > 0)) {
1589 nmust_match--;
1590 stringp++;
1591 }
1592 if (nmust_match > 0) {
1593 return ((char *)0);
1594 } else if (nextra_matches_allowed == UNLIMITED) {
1595 repeat_startp = stringp;
1596 while ((*stringp != '\0') &&
1597 (test_char_against_old_ascii_class(*stringp, regexp,
1598 test_condition) == CONDITION_TRUE)) {
1599 stringp++;
1600 }
1601 regexp += (int)*regexp + 2;
1602 return (test_repeated_ascii_char(repeat_startp, stringp,
1603 regexp));
1604 } else {
1605 repeat_startp = stringp;
1606 while ((*stringp != '\0') &&
1607 (test_char_against_old_ascii_class(*stringp, regexp,
1608 test_condition) == CONDITION_TRUE) &&
1609 (nextra_matches_allowed > 0)) {
1610 nextra_matches_allowed--;
1611 stringp++;
1612 }
1613 regexp += (int)*regexp + 2;
1614 return (test_repeated_ascii_char(repeat_startp, stringp,
1615 regexp));
1616 }
1617 /* end case IN_OLD_ASCII_CHAR_CLASS|COUNT */
1618
1619 case ZERO_OR_MORE_GROUP: /* (.....)* */
1620 case ZERO_OR_MORE_GROUP|ADD_256_TO_GROUP_LENGTH:
1621 case ZERO_OR_MORE_GROUP|ADD_512_TO_GROUP_LENGTH:
1622 case ZERO_OR_MORE_GROUP|ADD_768_TO_GROUP_LENGTH:
1623
1624 /*
1625 * encoded as <ZERO_OR_MORE_GROUP|ADDED_LENGTH_BITS>\
1626 * <group_length><compiled_regex...>\
1627 * <END_GROUP|ZERO_OR_MORE><groupn>
1628 *
1629 * NOTE:
1630 *
1631 * group_length + (256 * ADDED_LENGTH_BITS) ==
1632 * length_of(<compiled_regex...><END_GROUP|ZERO_OR_MORE>\
1633 * <groupn>)
1634 *
1635 */
1636
1637 group_length =
1638 (((unsigned int)*regexp & ADDED_LENGTH_BITS) <<
1639 TIMES_256_SHIFT);
1640 regexp++;
1641 group_length += (unsigned int)*regexp;
1642 regexp++;
1643 repeat_startp = stringp;
1644 test_stringp = test_string(stringp, regexp);
1645 while (test_stringp != (char *)0) {
1646 if (push_stringp(stringp) == (char *)0)
1647 return ((char *)0);
1648 stringp = test_stringp;
1649 test_stringp = test_string(stringp, regexp);
1650 }
1651 regexp += group_length;
1652 return (test_repeated_group(repeat_startp, stringp, regexp));
1653
1654 /* end case ZERO_OR_MORE_GROUP */
1655
1656 case END_GROUP|ZERO_OR_MORE: /* (.....)* */
1657
1658 /* encoded as <END_GROUP|ZERO_OR_MORE> */
1659
1660 /* return from recursive call to test_string() */
1661
1662 return ((char *)stringp);
1663
1664 /* end case END_GROUP|ZERO_OR_MORE */
1665
1666 case ONE_OR_MORE_GROUP: /* (.....)+ */
1667 case ONE_OR_MORE_GROUP|ADD_256_TO_GROUP_LENGTH:
1668 case ONE_OR_MORE_GROUP|ADD_512_TO_GROUP_LENGTH:
1669 case ONE_OR_MORE_GROUP|ADD_768_TO_GROUP_LENGTH:
1670
1671 /*
1672 * encoded as <ONE_OR_MORE_GROUP|ADDED_LENGTH_BITS>\
1673 * <group_length><compiled_regex...>\
1674 * <END_GROUP|ONE_OR_MORE><groupn>
1675 *
1676 * NOTE:
1677 *
1678 * group_length + (256 * ADDED_LENGTH_BITS) ==
1679 * length_of(<compiled_regex...><END_GROUP|ONE_OR_MORE>\
1680 * <groupn>)
1681 */
1682
1683 group_length =
1684 (((unsigned int)*regexp & ADDED_LENGTH_BITS) <<
1685 TIMES_256_SHIFT);
1686 regexp++;
1687 group_length += (unsigned int)*regexp;
1688 regexp++;
1689 stringp = test_string(stringp, regexp);
1690 if (stringp == (char *)0)
1691 return ((char *)0);
1692 repeat_startp = stringp;
1693 test_stringp = test_string(stringp, regexp);
1694 while (test_stringp != (char *)0) {
1695 if (push_stringp(stringp) == (char *)0)
1696 return ((char *)0);
1697 stringp = test_stringp;
1698 test_stringp = test_string(stringp, regexp);
1699 }
1700 regexp += group_length;
1701 return (test_repeated_group(repeat_startp, stringp, regexp));
1702
1703 /* end case ONE_OR_MORE_GROUP */
1704
1705 case END_GROUP|ONE_OR_MORE: /* (.....)+ */
1706
1707 /* encoded as <END_GROUP|ONE_OR_MORE><groupn> */
1708
1709 /* return from recursive call to test_string() */
1710
1711 return ((char *)stringp);
1712
1713 /* end case END_GROUP|ONE_OR_MORE */
1714
1715 case COUNTED_GROUP: /* (.....){max_count,min_count} */
1716 case COUNTED_GROUP|ADD_256_TO_GROUP_LENGTH:
1717 case COUNTED_GROUP|ADD_512_TO_GROUP_LENGTH:
1718 case COUNTED_GROUP|ADD_768_TO_GROUP_LENGTH:
1719
1720 /*
1721 * encoded as <COUNTED_GROUP|ADDED_LENGTH_BITS><group_length>\
1722 * <compiled_regex...>\<END_GROUP|COUNT><groupn>\
1723 * <minimum_match_count><maximum_match_count>
1724 *
1725 * NOTE:
1726 *
1727 * group_length + (256 * ADDED_LENGTH_BITS) ==
1728 * length_of(<compiled_regex...><END_GROUP|COUNT><groupn>)
1729 *
1730 * but does not include the <minimum_match_count> or
1731 * <maximum_match_count> bytes
1732 */
1733
1734 group_length =
1735 (((unsigned int)*regexp & ADDED_LENGTH_BITS) <<
1736 TIMES_256_SHIFT);
1737 regexp++;
1738 group_length += (unsigned int)*regexp;
1739 regexp++;
1740 get_match_counts(&nmust_match, &nextra_matches_allowed,
1741 regexp + group_length);
1742 test_stringp = test_string(stringp, regexp);
1743 while ((test_stringp != (char *)0) && (nmust_match > 0)) {
1744 stringp = test_stringp;
1745 nmust_match--;
1746 test_stringp = test_string(stringp, regexp);
1747 }
1748 if (nmust_match > 0) {
1749 return ((char *)0);
1750 } else if (nextra_matches_allowed == UNLIMITED) {
1751 repeat_startp = stringp;
1752 while (test_stringp != (char *)0) {
1753 if (push_stringp(stringp) == (char *)0)
1754 return ((char *)0);
1755 stringp = test_stringp;
1756 test_stringp = test_string(stringp, regexp);
1757 }
1758 regexp += group_length + 2;
1759 return (test_repeated_group(repeat_startp, stringp,
1760 regexp));
1761 } else {
1762 repeat_startp = stringp;
1763 while ((test_stringp != (char *)0) &&
1764 (nextra_matches_allowed > 0)) {
1765 nextra_matches_allowed--;
1766 if (push_stringp(stringp) == (char *)0)
1767 return ((char *)0);
1768 stringp = test_stringp;
1769 test_stringp = test_string(stringp, regexp);
1770 }
1771 regexp += group_length + 2;
1772 return (test_repeated_group(repeat_startp, stringp,
1773 regexp));
1774 }
1775 /* end case COUNTED_GROUP */
1776
1777 case END_GROUP|COUNT: /* (.....){max_count,min_count} */
1778
1779 /* encoded as <END_GROUP|COUNT> */
1780
1781 /* return from recursive call to test_string() */
1782
1783 return (stringp);
1784
1785 /* end case END_GROUP|COUNT */
1786
1787 case END_OF_STRING_MARK:
1788
1789 /* encoded as <END_OF_STRING_MARK><END_REGEX> */
1790
1791 if (*stringp == '\0') {
1792 regexp++;
1793 } else {
1794 return ((char *)0);
1795 }
1796 break; /* end case END_OF_STRING_MARK */
1797
1798 case END_REGEX: /* end of the compiled regular expression */
1799
1800 /* encoded as <END_REGEX> */
1801
1802 return (stringp);
1803
1804 /* end case END_REGEX */
1805
1806 default:
1807
1808 return ((char *)0);
1809
1810 } /* end switch (*regexp) */
1811
1812 } /* end for (;;) */
1813
1814 } /* test_string() */
1815