1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
29
30 #pragma ident "%Z%%M% %I% %E% SMI"
31
32 /*
33 * IMPORTANT NOTE:
34 *
35 * regex() WORKS **ONLY** WITH THE ASCII AND THE Solaris EUC CHARACTER SETS.
36 * IT IS **NOT** CHARACTER SET INDEPENDENT.
37 *
38 */
39
40 #pragma weak _regex = regex
41
42 #include "lint.h"
43 /* CONSTANTS SHARED WITH regcmp() */
44 #include "regex.h"
45 #include "mtlib.h"
46 #include <limits.h>
47 #include <stdarg.h>
48 #include <stdlib.h>
49 #include <thread.h>
50 #include <widec.h>
51 #include "tsd.h"
52
53
54 /* PRIVATE CONSTANTS */
55
56 #define ADD_256_TO_GROUP_LENGTH 0x1
57 #define ADD_512_TO_GROUP_LENGTH 0x2
58 #define ADD_768_TO_GROUP_LENGTH 0x3
59 #define ADDED_LENGTH_BITS 0x3
60 #define SINGLE_BYTE_MASK 0xff
61 #define STRINGP_STACK_SIZE 50
62
63
64 /* PRIVATE TYPE DEFINITIONS */
65
66 typedef enum {
67 NOT_IN_CLASS = 0,
68 IN_CLASS
69 } char_test_condition_t;
70
71 typedef enum {
72 TESTING_CHAR = 0,
73 CONDITION_TRUE,
74 CONDITION_FALSE,
75 CHAR_TEST_ERROR
76 } char_test_result_t;
77
78
79 /* PRIVATE GLOBAL VARIABLES */
80
81 static mutex_t regex_lock = DEFAULTMUTEX;
82 static int return_arg_number[NSUBSTRINGS];
83 static const char *substring_endp[NSUBSTRINGS];
84 static const char *substring_startp[NSUBSTRINGS];
85 static const char *stringp_stack[STRINGP_STACK_SIZE];
86 static const char **stringp_stackp;
87
88
89 /* DECLARATIONS OF PRIVATE FUNCTIONS */
90
91 static int
92 get_wchar(wchar_t *wcharp,
93 const char *stringp);
94
95 static void
96 get_match_counts(int *nmust_matchp,
97 int *nextra_matches_allowedp,
98 const char *count_stringp);
99
100 static boolean_t
101 in_wchar_range(wchar_t test_char,
102 wchar_t lower_char,
103 wchar_t upper_char);
104
105 static const char *
106 pop_stringp(void);
107
108 static const char *
109 previous_charp(const char *current_charp);
110
111 static const char *
112 push_stringp(const char *stringp);
113
114 static char_test_result_t
115 test_char_against_ascii_class(char test_char,
116 const char *classp,
117 char_test_condition_t test_condition);
118
119 static char_test_result_t
120 test_char_against_multibyte_class(wchar_t test_char,
121 const char *classp,
122 char_test_condition_t test_condition);
123
124
125 /* FOR COMPATIBILITY WITH PREVIOUS ASCII VERSIONS OF regcmp() */
126
127 static char_test_result_t
128 test_char_against_old_ascii_class(char test_char,
129 const char *classp,
130 char_test_condition_t test_condition);
131
132 static const char *
133 test_repeated_ascii_char(const char *repeat_startp,
134 const char *stringp,
135 const char *regexp);
136
137 static const char *
138 test_repeated_multibyte_char(const char *repeat_startp,
139 const char *stringp,
140 const char *regexp);
141
142 static const char *
143 test_repeated_group(const char *repeat_startp,
144 const char *stringp,
145 const char *regexp);
146
147 static const char *
148 test_string(const char *stringp,
149 const char *regexp);
150
151
152 /* DEFINITIONS OF PUBLIC VARIABLES */
153
154 char *__loc1;
155
156 /*
157 * reserve thread-specific storage for __loc1
158 */
159 char **
____loc1(void)160 ____loc1(void)
161 {
162 if (thr_main())
163 return (&__loc1);
164 return ((char **)tsdalloc(_T_REGEX_LOC1, sizeof (char *), NULL));
165 }
166
167 #define __loc1 (*(____loc1()))
168
169 /* DEFINITION OF regex() */
170
171 extern char *
regex(const char * regexp,const char * stringp,...)172 regex(const char *regexp, const char *stringp, ...)
173 {
174 va_list arg_listp;
175 int char_size;
176 const char *end_of_matchp;
177 wchar_t regex_wchar;
178 char *return_argp[NSUBSTRINGS];
179 char *returned_substringp;
180 int substringn;
181 const char *substringp;
182 wchar_t string_wchar;
183
184 if (____loc1() == (char **)0) {
185 return ((char *)0);
186 } else {
187 lmutex_lock(®ex_lock);
188 __loc1 = (char *)0;
189 }
190
191 if ((stringp == (char *)0) || (regexp == (char *)0)) {
192 lmutex_unlock(®ex_lock);
193 return ((char *)0);
194 }
195
196
197 /* INITIALIZE SUBSTRINGS THAT MIGHT BE RETURNED IN VARARGS */
198
199 substringn = 0;
200 va_start(arg_listp, stringp);
201 while (substringn < NSUBSTRINGS) {
202 return_argp[substringn] = va_arg(arg_listp, char *);
203 substring_startp[substringn] = (char *)0;
204 return_arg_number[substringn] = -1;
205 substringn++;
206 }
207 va_end(arg_listp);
208
209
210 /* TEST THE STRING AGAINST THE REGULAR EXPRESSION */
211
212 end_of_matchp = (char *)0;
213 stringp_stackp = &stringp_stack[STRINGP_STACK_SIZE];
214
215 if ((int)*regexp == (int)START_OF_STRING_MARK) {
216
217 /*
218 * the match must start at the beginning of the string
219 */
220
221 __loc1 = (char *)stringp;
222 regexp++;
223 end_of_matchp = test_string(stringp, regexp);
224
225 } else if ((int)*regexp == (int)ASCII_CHAR) {
226
227 /*
228 * test a string against a regular expression
229 * that starts with a single ASCII character:
230 *
231 * move to each character in the string that matches
232 * the first character in the regular expression
233 * and test the remaining string
234 */
235
236 while ((*stringp != *(regexp + 1)) && (*stringp != '\0')) {
237 stringp++;
238 }
239 while ((end_of_matchp == (char *)0) && (*stringp != '\0')) {
240 end_of_matchp = test_string(stringp, regexp);
241 if (end_of_matchp != (char *)0) {
242 __loc1 = (char *)stringp;
243 } else {
244 stringp++;
245 while ((*stringp != *(regexp + 1)) && (*stringp != '\0')) {
246 stringp++;
247 }
248 }
249 }
250
251 } else if (!multibyte) {
252
253 /*
254 * if the value of the "multibyte" macro defined in <euc.h>
255 * is false, regex() is running in an ASCII locale;
256 * test an ASCII string against an ASCII regular expression
257 * that doesn't start with a single ASCII character:
258 *
259 * move forward in the string one byte at a time, testing
260 * the remaining string against the regular expression
261 */
262
263 end_of_matchp = test_string(stringp, regexp);
264 while ((end_of_matchp == (char *)0) && (*stringp != '\0')) {
265 stringp++;
266 end_of_matchp = test_string(stringp, regexp);
267 }
268 if (end_of_matchp != (char *)0) {
269 __loc1 = (char *)stringp;
270 }
271
272 } else if ((int)*regexp == (int)MULTIBYTE_CHAR) {
273
274 /*
275 * test a multibyte string against a multibyte regular expression
276 * that starts with a single multibyte character:
277 *
278 * move to each character in the string that matches
279 * the first character in the regular expression
280 * and test the remaining string
281 */
282
283 (void) get_wchar(®ex_wchar, regexp + 1);
284 char_size = get_wchar(&string_wchar, stringp);
285 while ((string_wchar != regex_wchar) && (char_size > 0)) {
286 stringp += char_size;
287 char_size = get_wchar(&string_wchar, stringp);
288 }
289 while ((end_of_matchp == (char *)0) && (char_size > 0)) {
290 end_of_matchp = test_string(stringp, regexp);
291 if (end_of_matchp != (char *)0) {
292 __loc1 = (char *)stringp;
293 } else {
294 stringp += char_size;
295 char_size = get_wchar(&string_wchar, stringp);
296 while ((string_wchar != regex_wchar) && (char_size > 0)) {
297 stringp += char_size;
298 char_size = get_wchar(&string_wchar, stringp);
299 }
300 }
301 }
302
303 } else {
304
305 /*
306 * test a multibyte string against a multibyte regular expression
307 * that doesn't start with a single multibyte character
308 *
309 * move forward in the string one multibyte character at a time,
310 * testing the remaining string against the regular expression
311 */
312
313 end_of_matchp = test_string(stringp, regexp);
314 char_size = get_wchar(&string_wchar, stringp);
315 while ((end_of_matchp == (char *)0) && (char_size > 0)) {
316 stringp += char_size;
317 end_of_matchp = test_string(stringp, regexp);
318 char_size = get_wchar(&string_wchar, stringp);
319 }
320 if (end_of_matchp != (char *)0) {
321 __loc1 = (char *)stringp;
322 }
323 }
324
325 /*
326 * Return substrings that matched subexpressions for which
327 * matching substrings are to be returned.
328 *
329 * NOTE:
330 *
331 * According to manual page regcmp(3G), regex() returns substrings
332 * that match subexpressions even when no substring matches the
333 * entire regular expression.
334 */
335
336 substringn = 0;
337 while (substringn < NSUBSTRINGS) {
338 substringp = substring_startp[substringn];
339 if ((substringp != (char *)0) &&
340 (return_arg_number[substringn] >= 0)) {
341 returned_substringp =
342 return_argp[return_arg_number[substringn]];
343 if (returned_substringp != (char *)0) {
344 while (substringp < substring_endp[substringn]) {
345 *returned_substringp = (char)*substringp;
346 returned_substringp++;
347 substringp++;
348 }
349 *returned_substringp = '\0';
350 }
351 }
352 substringn++;
353 }
354 lmutex_unlock(®ex_lock);
355 return ((char *)end_of_matchp);
356 } /* regex() */
357
358
359 /* DEFINITIONS OF PRIVATE FUNCTIONS */
360
361 static int
get_wchar(wchar_t * wcharp,const char * stringp)362 get_wchar(wchar_t *wcharp,
363 const char *stringp)
364 {
365 int char_size;
366
367 if (stringp == (char *)0) {
368 char_size = 0;
369 *wcharp = (wchar_t)((unsigned int)'\0');
370 } else if (*stringp == '\0') {
371 char_size = 0;
372 *wcharp = (wchar_t)((unsigned int)*stringp);
373 } else if ((unsigned char)*stringp <= (unsigned char)0x7f) {
374 char_size = 1;
375 *wcharp = (wchar_t)((unsigned int)*stringp);
376 } else {
377 char_size = mbtowc(wcharp, stringp, MB_LEN_MAX);
378 }
379 return (char_size);
380 }
381
382 static void
get_match_counts(int * nmust_matchp,int * nextra_matches_allowedp,const char * count_stringp)383 get_match_counts(int *nmust_matchp,
384 int *nextra_matches_allowedp,
385 const char *count_stringp)
386 {
387 int minimum_match_count;
388 int maximum_match_count;
389
390 minimum_match_count =
391 (int)((unsigned int)*count_stringp & SINGLE_BYTE_MASK);
392 *nmust_matchp = minimum_match_count;
393
394 count_stringp++;
395 maximum_match_count =
396 (int)((unsigned int)*count_stringp & SINGLE_BYTE_MASK);
397 if (maximum_match_count == (int)UNLIMITED) {
398 *nextra_matches_allowedp = (int)UNLIMITED;
399 } else {
400 *nextra_matches_allowedp =
401 maximum_match_count - minimum_match_count;
402 }
403 return;
404
405 } /* get_match_counts() */
406
407 static boolean_t
in_wchar_range(wchar_t test_char,wchar_t lower_char,wchar_t upper_char)408 in_wchar_range(wchar_t test_char,
409 wchar_t lower_char,
410 wchar_t upper_char)
411 {
412 return (((lower_char <= 0x7f) && (upper_char <= 0x7f) &&
413 (lower_char <= test_char) && (test_char <= upper_char)) ||
414 (((test_char & WCHAR_CSMASK) == (lower_char & WCHAR_CSMASK)) &&
415 ((test_char & WCHAR_CSMASK) == (upper_char & WCHAR_CSMASK)) &&
416 (lower_char <= test_char) && (test_char <= upper_char)));
417
418 } /* in_wchar_range() */
419
420 static const char *
pop_stringp(void)421 pop_stringp(void)
422 {
423 const char *stringp;
424
425 if (stringp_stackp >= &stringp_stack[STRINGP_STACK_SIZE]) {
426 return ((char *)0);
427 } else {
428 stringp = *stringp_stackp;
429 stringp_stackp++;
430 return (stringp);
431 }
432 }
433
434
435 static const char *
previous_charp(const char * current_charp)436 previous_charp(const char *current_charp)
437 {
438 /*
439 * returns the pointer to the previous character in
440 * a string of multibyte characters
441 */
442
443 const char *prev_cs0 = current_charp - 1;
444 const char *prev_cs1 = current_charp - eucw1;
445 const char *prev_cs2 = current_charp - eucw2 - 1;
446 const char *prev_cs3 = current_charp - eucw3 - 1;
447 const char *prev_charp;
448
449 if ((unsigned char)*prev_cs0 <= 0x7f) {
450 prev_charp = prev_cs0;
451 } else if ((unsigned char)*prev_cs2 == SS2) {
452 prev_charp = prev_cs2;
453 } else if ((unsigned char)*prev_cs3 == SS3) {
454 prev_charp = prev_cs3;
455 } else {
456 prev_charp = prev_cs1;
457 }
458 return (prev_charp);
459
460 } /* previous_charp() */
461
462 static const char *
push_stringp(const char * stringp)463 push_stringp(const char *stringp)
464 {
465 if (stringp_stackp <= &stringp_stack[0]) {
466 return ((char *)0);
467 } else {
468 stringp_stackp--;
469 *stringp_stackp = stringp;
470 return (stringp);
471 }
472 }
473
474
475 static char_test_result_t
test_char_against_ascii_class(char test_char,const char * classp,char_test_condition_t test_condition)476 test_char_against_ascii_class(char test_char,
477 const char *classp,
478 char_test_condition_t test_condition)
479 {
480 /*
481 * tests a character for membership in an ASCII character class compiled
482 * by the internationalized version of regcmp();
483 *
484 * NOTE: The internationalized version of regcmp() compiles
485 * the range a-z in an ASCII character class to aTHRUz.
486 */
487
488 int nbytes_to_check;
489
490 nbytes_to_check = (int)*classp;
491 classp++;
492 nbytes_to_check--;
493
494 while (nbytes_to_check > 0) {
495 if (test_char == *classp) {
496 if (test_condition == IN_CLASS)
497 return (CONDITION_TRUE);
498 else
499 return (CONDITION_FALSE);
500 } else if (*classp == THRU) {
501 if ((*(classp - 1) <= test_char) &&
502 (test_char <= *(classp + 1))) {
503 if (test_condition == IN_CLASS)
504 return (CONDITION_TRUE);
505 else
506 return (CONDITION_FALSE);
507 } else {
508 classp += 2;
509 nbytes_to_check -= 2;
510 }
511 } else {
512 classp++;
513 nbytes_to_check--;
514 }
515 }
516 if (test_condition == NOT_IN_CLASS) {
517 return (CONDITION_TRUE);
518 } else {
519 return (CONDITION_FALSE);
520 }
521 } /* test_char_against_ascii_class() */
522
523 static char_test_result_t
test_char_against_multibyte_class(wchar_t test_char,const char * classp,char_test_condition_t test_condition)524 test_char_against_multibyte_class(wchar_t test_char,
525 const char *classp,
526 char_test_condition_t test_condition)
527 {
528 /*
529 * tests a character for membership in a multibyte character class;
530 *
531 * NOTE: The range a-z in a multibyte character class compiles to
532 * aTHRUz.
533 */
534
535 int char_size;
536 wchar_t current_char;
537 int nbytes_to_check;
538 wchar_t previous_char;
539
540 nbytes_to_check = (int)*classp;
541 classp++;
542 nbytes_to_check--;
543
544 char_size = get_wchar(¤t_char, classp);
545 if (char_size <= 0) {
546 return (CHAR_TEST_ERROR);
547 } else if (test_char == current_char) {
548 if (test_condition == IN_CLASS) {
549 return (CONDITION_TRUE);
550 } else {
551 return (CONDITION_FALSE);
552 }
553 } else {
554 classp += char_size;
555 nbytes_to_check -= char_size;
556 }
557
558 while (nbytes_to_check > 0) {
559 previous_char = current_char;
560 char_size = get_wchar(¤t_char, classp);
561 if (char_size <= 0) {
562 return (CHAR_TEST_ERROR);
563 } else if (test_char == current_char) {
564 if (test_condition == IN_CLASS) {
565 return (CONDITION_TRUE);
566 } else {
567 return (CONDITION_FALSE);
568 }
569 } else if (current_char == THRU) {
570 classp += char_size;
571 nbytes_to_check -= char_size;
572 char_size = get_wchar(¤t_char, classp);
573 if (char_size <= 0) {
574 return (CHAR_TEST_ERROR);
575 } else if (in_wchar_range(test_char, previous_char,
576 current_char)) {
577 if (test_condition == IN_CLASS) {
578 return (CONDITION_TRUE);
579 } else {
580 return (CONDITION_FALSE);
581 }
582 } else {
583 classp += char_size;
584 nbytes_to_check -= char_size;
585 }
586 } else {
587 classp += char_size;
588 nbytes_to_check -= char_size;
589 }
590 }
591 if (test_condition == NOT_IN_CLASS) {
592 return (CONDITION_TRUE);
593 } else {
594 return (CONDITION_FALSE);
595 }
596 } /* test_char_against_multibyte_class() */
597
598
599 /* FOR COMPATIBILITY WITH PREVIOUS ASCII VERSIONS OF regcmp() */
600
601 static char_test_result_t
test_char_against_old_ascii_class(char test_char,const char * classp,char_test_condition_t test_condition)602 test_char_against_old_ascii_class(char test_char,
603 const char *classp,
604 char_test_condition_t test_condition)
605 {
606 /*
607 * tests a character for membership in an ASCII character class compiled
608 * by the ASCII version of regcmp();
609 *
610 * NOTE: ASCII versions of regcmp() compile the range a-z in an
611 * ASCII character class to THRUaz. The internationalized
612 * version compiles the same range to aTHRUz.
613 */
614
615 int nbytes_to_check;
616
617 nbytes_to_check = (int)*classp;
618 classp++;
619 nbytes_to_check--;
620
621 while (nbytes_to_check > 0) {
622 if (test_char == *classp) {
623 if (test_condition == IN_CLASS) {
624 return (CONDITION_TRUE);
625 } else {
626 return (CONDITION_FALSE);
627 }
628 } else if (*classp == THRU) {
629 if ((*(classp + 1) <= test_char) &&
630 (test_char <= *(classp + 2))) {
631 if (test_condition == IN_CLASS) {
632 return (CONDITION_TRUE);
633 } else {
634 return (CONDITION_FALSE);
635 }
636 } else {
637 classp += 3;
638 nbytes_to_check -= 3;
639 }
640 } else {
641 classp++;
642 nbytes_to_check--;
643 }
644 }
645 if (test_condition == NOT_IN_CLASS) {
646 return (CONDITION_TRUE);
647 } else {
648 return (CONDITION_FALSE);
649 }
650 } /* test_char_against_old_ascii_class() */
651
652 static const char *
test_repeated_ascii_char(const char * repeat_startp,const char * stringp,const char * regexp)653 test_repeated_ascii_char(const char *repeat_startp,
654 const char *stringp,
655 const char *regexp)
656 {
657 const char *end_of_matchp;
658
659 end_of_matchp = test_string(stringp, regexp);
660 while ((end_of_matchp == (char *)0) &&
661 (stringp > repeat_startp)) {
662 stringp--;
663 end_of_matchp = test_string(stringp, regexp);
664 }
665 return (end_of_matchp);
666 }
667
668 static const char *
test_repeated_multibyte_char(const char * repeat_startp,const char * stringp,const char * regexp)669 test_repeated_multibyte_char(const char *repeat_startp,
670 const char *stringp,
671 const char *regexp)
672 {
673 const char *end_of_matchp;
674
675 end_of_matchp = test_string(stringp, regexp);
676 while ((end_of_matchp == (char *)0) &&
677 (stringp > repeat_startp)) {
678 stringp = previous_charp(stringp);
679 end_of_matchp = test_string(stringp, regexp);
680 }
681 return (end_of_matchp);
682 }
683
684 static const char *
test_repeated_group(const char * repeat_startp,const char * stringp,const char * regexp)685 test_repeated_group(const char *repeat_startp,
686 const char *stringp,
687 const char *regexp)
688 {
689 const char *end_of_matchp;
690
691 end_of_matchp = test_string(stringp, regexp);
692 while ((end_of_matchp == (char *)0) &&
693 (stringp > repeat_startp)) {
694 stringp = pop_stringp();
695 if (stringp == (char *)0) {
696 return ((char *)0);
697 }
698 end_of_matchp = test_string(stringp, regexp);
699 }
700 return (end_of_matchp);
701 }
702
703 static const char *
test_string(const char * stringp,const char * regexp)704 test_string(const char *stringp,
705 const char *regexp)
706 {
707 /*
708 * returns a pointer to the first character following the first
709 * substring of the string addressed by stringp that matches
710 * the compiled regular expression addressed by regexp
711 */
712
713 unsigned int group_length;
714 int nextra_matches_allowed;
715 int nmust_match;
716 wchar_t regex_wchar;
717 int regex_char_size;
718 const char *repeat_startp;
719 unsigned int return_argn;
720 wchar_t string_wchar;
721 int string_char_size;
722 unsigned int substringn;
723 char_test_condition_t test_condition;
724 const char *test_stringp;
725
726 for (;;) {
727
728 /*
729 * Exit the loop via a return whenever there's a match
730 * or it's clear that there can be no match.
731 */
732
733 switch ((int)*regexp) {
734
735 /*
736 * No fall-through.
737 * Each case ends with either a return or with stringp
738 * addressing the next character to be tested and regexp
739 * addressing the next compiled regular expression
740 *
741 * NOTE: The comments for each case give the meaning
742 * of the compiled regular expression decoded by the case
743 * and the character string that the compiled regular
744 * expression uses to encode the case. Each single
745 * character encoded in the compiled regular expression
746 * is shown enclosed in angle brackets (<>). Each
747 * compiled regular expression begins with a marker
748 * character which is shown as a named constant
749 * (e.g. <ASCII_CHAR>). Character constants are shown
750 * enclosed in single quotes (e.g. <'$'>). All other
751 * single characters encoded in the compiled regular
752 * expression are shown as lower case variable names
753 * (e.g. <ascii_char> or <multibyte_char>). Multicharacter
754 * strings encoded in the compiled regular expression
755 * are shown as variable names followed by elipses
756 * (e.g. <compiled_regex...>).
757 */
758
759 case ASCII_CHAR: /* single ASCII char */
760
761 /* encoded as <ASCII_CHAR><ascii_char> */
762
763 regexp++;
764 if (*regexp == *stringp) {
765 regexp++;
766 stringp++;
767 } else {
768 return ((char *)0);
769 }
770 break; /* end case ASCII_CHAR */
771
772 case MULTIBYTE_CHAR: /* single multibyte char */
773
774 /* encoded as <MULTIBYTE_CHAR><multibyte_char> */
775
776 regexp++;
777 regex_char_size = get_wchar(®ex_wchar, regexp);
778 string_char_size = get_wchar(&string_wchar, stringp);
779 if ((string_char_size <= 0) || (string_wchar != regex_wchar)) {
780 return ((char *)0);
781 } else {
782 regexp += regex_char_size;
783 stringp += string_char_size;
784 }
785 break; /* end case MULTIBYTE_CHAR */
786
787 case ANY_CHAR: /* any single ASCII or multibyte char */
788
789 /* encoded as <ANY_CHAR> */
790
791 if (!multibyte) {
792 if (*stringp == '\0') {
793 return ((char *)0);
794 } else {
795 regexp++;
796 stringp++;
797 }
798 } else {
799 string_char_size = get_wchar(&string_wchar, stringp);
800 if (string_char_size <= 0) {
801 return ((char *)0);
802 } else {
803 regexp++;
804 stringp += string_char_size;
805 }
806 }
807 break; /* end case ANY_CHAR */
808
809 case IN_ASCII_CHAR_CLASS: /* [.....] */
810 case NOT_IN_ASCII_CHAR_CLASS:
811
812 /*
813 * encoded as <IN_ASCII_CHAR_CLASS><class_length><class...>
814 * or <NOT_IN_ASCII_CHAR_CLASS><class_length><class...>
815 *
816 * NOTE: <class_length> includes the <class_length> byte
817 */
818
819 if ((int)*regexp == (int)IN_ASCII_CHAR_CLASS) {
820 test_condition = IN_CLASS;
821 } else {
822 test_condition = NOT_IN_CLASS;
823 }
824 regexp++; /* point to the <class_length> byte */
825
826 if ((*stringp != '\0') &&
827 (test_char_against_ascii_class(*stringp, regexp,
828 test_condition) == CONDITION_TRUE)) {
829 regexp += (int)*regexp; /* add the class length to regexp */
830 stringp++;
831 } else {
832 return ((char *)0);
833 }
834 break; /* end case IN_ASCII_CHAR_CLASS */
835
836 case IN_MULTIBYTE_CHAR_CLASS: /* [....] */
837 case NOT_IN_MULTIBYTE_CHAR_CLASS:
838
839 /*
840 * encoded as <IN_MULTIBYTE_CHAR_CLASS><class_length><class...>
841 * or <NOT_IN_MULTIBYTE_CHAR_CLASS><class_length><class...>
842 *
843 * NOTE: <class_length> includes the <class_length> byte
844 */
845
846 if ((int)*regexp == (int)IN_MULTIBYTE_CHAR_CLASS) {
847 test_condition = IN_CLASS;
848 } else {
849 test_condition = NOT_IN_CLASS;
850 }
851 regexp++; /* point to the <class_length> byte */
852
853 string_char_size = get_wchar(&string_wchar, stringp);
854 if ((string_char_size > 0) &&
855 (test_char_against_multibyte_class(string_wchar, regexp,
856 test_condition) == CONDITION_TRUE)) {
857 regexp += (int)*regexp; /* add the class length to regexp */
858 stringp += string_char_size;
859 } else {
860 return ((char *)0);
861 }
862 break; /* end case IN_MULTIBYTE_CHAR_CLASS */
863
864 case IN_OLD_ASCII_CHAR_CLASS: /* [...] */
865 case NOT_IN_OLD_ASCII_CHAR_CLASS:
866
867 /*
868 * encoded as <IN_OLD_ASCII_CHAR_CLASS><class_length><class...>
869 * or <NOT_IN_OLD_ASCII_CHAR_CLASS><class_length><class...>
870 *
871 * NOTE: <class_length> includes the <class_length> byte
872 */
873
874 if ((int)*regexp == (int)IN_OLD_ASCII_CHAR_CLASS) {
875 test_condition = IN_CLASS;
876 } else {
877 test_condition = NOT_IN_CLASS;
878 }
879 regexp++; /* point to the <class_length> byte */
880
881 if ((*stringp != '\0') &&
882 (test_char_against_old_ascii_class(*stringp, regexp,
883 test_condition) == CONDITION_TRUE)) {
884 regexp += (int)*regexp; /* add the class length to regexp */
885 stringp++;
886 } else {
887 return ((char *)0);
888 }
889 break; /* end case [NOT_]IN_OLD_ASCII_CHAR_CLASS */
890
891 case SIMPLE_GROUP: /* (.....) */
892
893 /* encoded as <SIMPLE_GROUP><group_length> */
894
895 regexp += 2;
896 break; /* end case SIMPLE_GROUP */
897
898 case END_GROUP: /* (.....) */
899
900 /* encoded as <END_GROUP><groupn> */
901
902 regexp += 2;
903 break; /* end case END_GROUP */
904
905 case SAVED_GROUP: /* (.....)$0-9 */
906
907 /* encoded as <SAVED_GROUP><substringn> */
908
909 regexp++;
910 substringn = (unsigned int)*regexp;
911 if (substringn >= NSUBSTRINGS)
912 return ((char *)0);
913 substring_startp[substringn] = stringp;
914 regexp++;
915 break; /* end case SAVED_GROUP */
916
917 case END_SAVED_GROUP: /* (.....)$0-9 */
918
919 /*
920 * encoded as <END_SAVED_GROUP><substringn>\
921 * <return_arg_number[substringn]>
922 */
923
924 regexp++;
925 substringn = (unsigned int)*regexp;
926 if (substringn >= NSUBSTRINGS)
927 return ((char *)0);
928 substring_endp[substringn] = stringp;
929 regexp++;
930 return_argn = (unsigned int)*regexp;
931 if (return_argn >= NSUBSTRINGS)
932 return ((char *)0);
933 return_arg_number[substringn] = return_argn;
934 regexp++;
935 break; /* end case END_SAVED_GROUP */
936
937 case ASCII_CHAR|ZERO_OR_MORE: /* char* */
938
939 /* encoded as <ASCII_CHAR|ZERO_OR_MORE><ascii_char> */
940
941 regexp++;
942 repeat_startp = stringp;
943 while (*stringp == *regexp) {
944 stringp++;
945 }
946 regexp++;
947 return (test_repeated_ascii_char(repeat_startp,
948 stringp, regexp));
949
950 /* end case ASCII_CHAR|ZERO_OR_MORE */
951
952 case ASCII_CHAR|ONE_OR_MORE: /* char+ */
953
954 /* encoded as <ASCII_CHAR|ONE_OR_MORE><ascii_char> */
955
956 regexp++;
957 if (*stringp != *regexp) {
958 return ((char *)0);
959 } else {
960 stringp++;
961 repeat_startp = stringp;
962 while (*stringp == *regexp) {
963 stringp++;
964 }
965 regexp++;
966 return (test_repeated_ascii_char(repeat_startp, stringp,
967 regexp));
968 }
969 /* end case ASCII_CHAR|ONE_OR_MORE */
970
971 case ASCII_CHAR|COUNT: /* char{min_count,max_count} */
972
973 /*
974 * encoded as <ASCII_CHAR|COUNT><ascii_char>\
975 * <minimum_match_count><maximum_match_count>
976 */
977
978 regexp++;
979 get_match_counts(&nmust_match, &nextra_matches_allowed,
980 regexp + 1);
981 while ((*stringp == *regexp) && (nmust_match > 0)) {
982 nmust_match--;
983 stringp++;
984 }
985 if (nmust_match > 0) {
986 return ((char *)0);
987 } else if (nextra_matches_allowed == UNLIMITED) {
988 repeat_startp = stringp;
989 while (*stringp == *regexp) {
990 stringp++;
991 }
992 regexp += 3;
993 return (test_repeated_ascii_char(repeat_startp, stringp,
994 regexp));
995 } else {
996 repeat_startp = stringp;
997 while ((*stringp == *regexp) &&
998 (nextra_matches_allowed > 0)) {
999 nextra_matches_allowed--;
1000 stringp++;
1001 }
1002 regexp += 3;
1003 return (test_repeated_ascii_char(repeat_startp, stringp,
1004 regexp));
1005 }
1006 /* end case ASCII_CHAR|COUNT */
1007
1008 case MULTIBYTE_CHAR|ZERO_OR_MORE: /* char* */
1009
1010 /* encoded as <MULTIBYTE_CHAR|ZERO_OR_MORE><multibyte_char> */
1011
1012 regexp++;
1013 regex_char_size = get_wchar(®ex_wchar, regexp);
1014 repeat_startp = stringp;
1015 string_char_size = get_wchar(&string_wchar, stringp);
1016 while ((string_char_size > 0) &&
1017 (string_wchar == regex_wchar)) {
1018 stringp += string_char_size;
1019 string_char_size = get_wchar(&string_wchar, stringp);
1020 }
1021 regexp += regex_char_size;
1022 return (test_repeated_multibyte_char(repeat_startp, stringp,
1023 regexp));
1024
1025 /* end case MULTIBYTE_CHAR|ZERO_OR_MORE */
1026
1027 case MULTIBYTE_CHAR|ONE_OR_MORE: /* char+ */
1028
1029 /* encoded as <MULTIBYTE_CHAR|ONE_OR_MORE><multibyte_char> */
1030
1031 regexp++;
1032 regex_char_size = get_wchar(®ex_wchar, regexp);
1033 string_char_size = get_wchar(&string_wchar, stringp);
1034 if ((string_char_size <= 0) || (string_wchar != regex_wchar)) {
1035 return ((char *)0);
1036 } else {
1037 stringp += string_char_size;
1038 repeat_startp = stringp;
1039 string_char_size = get_wchar(&string_wchar, stringp);
1040 while ((string_char_size > 0) &&
1041 (string_wchar == regex_wchar)) {
1042 stringp += string_char_size;
1043 string_char_size = get_wchar(&string_wchar, stringp);
1044 }
1045 regexp += regex_char_size;
1046 return (test_repeated_multibyte_char(repeat_startp, stringp,
1047 regexp));
1048 }
1049 /* end case MULTIBYTE_CHAR|ONE_OR_MORE */
1050
1051 case MULTIBYTE_CHAR|COUNT: /* char{min_count,max_count} */
1052
1053 /*
1054 * encoded as <MULTIBYTE_CHAR|COUNT><multibyte_char>\
1055 * <minimum_match_count><maximum_match_count>
1056 */
1057
1058 regexp++;
1059 regex_char_size = get_wchar(®ex_wchar, regexp);
1060 get_match_counts(&nmust_match, &nextra_matches_allowed,
1061 regexp + regex_char_size);
1062 string_char_size = get_wchar(&string_wchar, stringp);
1063 while ((string_char_size > 0) &&
1064 (string_wchar == regex_wchar) &&
1065 (nmust_match > 0)) {
1066
1067 nmust_match--;
1068 stringp += string_char_size;
1069 string_char_size = get_wchar(&string_wchar, stringp);
1070 }
1071 if (nmust_match > 0) {
1072 return ((char *)0);
1073 } else if (nextra_matches_allowed == UNLIMITED) {
1074 repeat_startp = stringp;
1075 while ((string_char_size > 0) &&
1076 (string_wchar == regex_wchar)) {
1077 stringp += string_char_size;
1078 string_char_size = get_wchar(&string_wchar, stringp);
1079 }
1080 regexp += regex_char_size + 2;
1081 return (test_repeated_multibyte_char(repeat_startp, stringp,
1082 regexp));
1083 } else {
1084 repeat_startp = stringp;
1085 while ((string_char_size > 0) &&
1086 (string_wchar == regex_wchar) &&
1087 (nextra_matches_allowed > 0)) {
1088 nextra_matches_allowed--;
1089 stringp += string_char_size;
1090 string_char_size = get_wchar(&string_wchar, stringp);
1091 }
1092 regexp += regex_char_size + 2;
1093 return (test_repeated_multibyte_char(repeat_startp, stringp,
1094 regexp));
1095 }
1096 /* end case MULTIBYTE_CHAR|COUNT */
1097
1098 case ANY_CHAR|ZERO_OR_MORE: /* .* */
1099
1100 /* encoded as <ANY_CHAR|ZERO_OR_MORE> */
1101
1102 repeat_startp = stringp;
1103 if (!multibyte) {
1104 while (*stringp != '\0') {
1105 stringp++;
1106 }
1107 regexp++;
1108 return (test_repeated_ascii_char(repeat_startp, stringp,
1109 regexp));
1110 } else {
1111 string_char_size = get_wchar(&string_wchar, stringp);
1112 while (string_char_size > 0) {
1113 stringp += string_char_size;
1114 string_char_size = get_wchar(&string_wchar, stringp);
1115 }
1116 regexp++;
1117 return (test_repeated_multibyte_char(repeat_startp, stringp,
1118 regexp));
1119 }
1120 /* end case <ANY_CHAR|ZERO_OR_MORE> */
1121
1122 case ANY_CHAR|ONE_OR_MORE: /* .+ */
1123
1124 /* encoded as <ANY_CHAR|ONE_OR_MORE> */
1125
1126 if (!multibyte) {
1127 if (*stringp == '\0') {
1128 return ((char *)0);
1129 } else {
1130 stringp++;
1131 repeat_startp = stringp;
1132 while (*stringp != '\0') {
1133 stringp++;
1134 }
1135 regexp++;
1136 return (test_repeated_ascii_char(repeat_startp, stringp,
1137 regexp));
1138 }
1139 } else {
1140 string_char_size = get_wchar(&string_wchar, stringp);
1141 if (string_char_size <= 0) {
1142 return ((char *)0);
1143 } else {
1144 stringp += string_char_size;
1145 repeat_startp = stringp;
1146 string_char_size = get_wchar(&string_wchar, stringp);
1147 while (string_char_size > 0) {
1148 stringp += string_char_size;
1149 string_char_size =
1150 get_wchar(&string_wchar, stringp);
1151 }
1152 regexp++;
1153 return (test_repeated_multibyte_char(repeat_startp,
1154 stringp, regexp));
1155 }
1156 }
1157 /* end case <ANY_CHAR|ONE_OR_MORE> */
1158
1159 case ANY_CHAR|COUNT: /* .{min_count,max_count} */
1160
1161 /*
1162 * encoded as <ANY_CHAR|COUNT>\
1163 * <minimum_match_count><maximum_match_count>
1164 */
1165
1166 get_match_counts(&nmust_match, &nextra_matches_allowed,
1167 regexp + 1);
1168 if (!multibyte) {
1169 while ((*stringp != '\0') && (nmust_match > 0)) {
1170 nmust_match--;
1171 stringp++;
1172 }
1173 if (nmust_match > 0) {
1174 return ((char *)0);
1175 } else if (nextra_matches_allowed == UNLIMITED) {
1176 repeat_startp = stringp;
1177 while (*stringp != '\0') {
1178 stringp++;
1179 }
1180 regexp += 3;
1181 return (test_repeated_ascii_char(repeat_startp, stringp,
1182 regexp));
1183 } else {
1184 repeat_startp = stringp;
1185 while ((*stringp != '\0') &&
1186 (nextra_matches_allowed > 0)) {
1187 nextra_matches_allowed--;
1188 stringp++;
1189 }
1190 regexp += 3;
1191 return (test_repeated_ascii_char(repeat_startp, stringp,
1192 regexp));
1193 }
1194 } else { /* multibyte character */
1195
1196 string_char_size = get_wchar(&string_wchar, stringp);
1197 while ((string_char_size > 0) && (nmust_match > 0)) {
1198 nmust_match--;
1199 stringp += string_char_size;
1200 string_char_size = get_wchar(&string_wchar, stringp);
1201 }
1202 if (nmust_match > 0) {
1203 return ((char *)0);
1204 } else if (nextra_matches_allowed == UNLIMITED) {
1205 repeat_startp = stringp;
1206 while (string_char_size > 0) {
1207 stringp += string_char_size;
1208 string_char_size =
1209 get_wchar(&string_wchar, stringp);
1210 }
1211 regexp += 3;
1212 return (test_repeated_multibyte_char(repeat_startp,
1213 stringp, regexp));
1214 } else {
1215 repeat_startp = stringp;
1216 while ((string_char_size > 0) &&
1217 (nextra_matches_allowed > 0)) {
1218 nextra_matches_allowed--;
1219 stringp += string_char_size;
1220 string_char_size =
1221 get_wchar(&string_wchar, stringp);
1222 }
1223 regexp += 3;
1224 return (test_repeated_multibyte_char(repeat_startp,
1225 stringp, regexp));
1226 }
1227 } /* end case ANY_CHAR|COUNT */
1228
1229 case IN_ASCII_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */
1230 case NOT_IN_ASCII_CHAR_CLASS|ZERO_OR_MORE:
1231
1232 /*
1233 * encoded as <IN_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
1234 * <class_length><class ...>
1235 * or <NOT_IN_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
1236 * <class_length><class ...>
1237 *
1238 * NOTE: <class_length> includes the <class_length> byte
1239 */
1240
1241 if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|ZERO_OR_MORE)) {
1242 test_condition = IN_CLASS;
1243 } else {
1244 test_condition = NOT_IN_CLASS;
1245 }
1246 regexp++; /* point to the <class_length> byte */
1247
1248 repeat_startp = stringp;
1249 while ((*stringp != '\0') &&
1250 (test_char_against_ascii_class(*stringp, regexp,
1251 test_condition) == CONDITION_TRUE)) {
1252 stringp++;
1253 }
1254 regexp += (int)*regexp; /* add the class length to regexp */
1255 return (test_repeated_ascii_char(repeat_startp, stringp,
1256 regexp));
1257
1258 /* end case IN_ASCII_CHAR_CLASS|ZERO_OR_MORE */
1259
1260 case IN_ASCII_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */
1261 case NOT_IN_ASCII_CHAR_CLASS|ONE_OR_MORE:
1262
1263 /*
1264 * encoded as <IN_ASCII_CHAR_CLASS|ONE_OR_MORE>\
1265 * <class_length><class ...>
1266 * or <NOT_IN_ASCII_CHAR_CLASS|ONE_OR_MORE>\
1267 * <class_length><class ...>
1268 *
1269 * NOTE: <class_length> includes the <class_length> byte
1270 */
1271
1272 if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|ONE_OR_MORE)) {
1273 test_condition = IN_CLASS;
1274 } else {
1275 test_condition = NOT_IN_CLASS;
1276 }
1277 regexp++; /* point to the <class_length> byte */
1278
1279 if ((*stringp == '\0') ||
1280 (test_char_against_ascii_class(*stringp, regexp,
1281 test_condition) != CONDITION_TRUE)) {
1282 return ((char *)0);
1283 } else {
1284 stringp++;
1285 repeat_startp = stringp;
1286 while ((*stringp != '\0') &&
1287 (test_char_against_ascii_class(*stringp, regexp,
1288 test_condition) == CONDITION_TRUE)) {
1289 stringp++;
1290 }
1291 regexp += (int)*regexp; /* add the class length to regexp */
1292 return (test_repeated_ascii_char(repeat_startp, stringp,
1293 regexp));
1294 }
1295 /* end case IN_ASCII_CHAR_CLASS|ONE_OR_MORE */
1296
1297 case IN_ASCII_CHAR_CLASS | COUNT: /* [.....]{max_count,min_count} */
1298 case NOT_IN_ASCII_CHAR_CLASS | COUNT:
1299
1300 /*
1301 * endoded as <IN_ASCII_CHAR_CLASS|COUNT><class_length>\
1302 * <class ...><minimum_match_count>\
1303 * <maximum_match_count>
1304 * or <NOT_IN_ASCII_CHAR_CLASS|COUNT><class_length>\
1305 * <class ...><minimum_match_count>\
1306 * <maximum_match_count>
1307 *
1308 * NOTE: <class_length> includes the <class_length> byte,
1309 * but not the <minimum_match_count> or
1310 * <maximum_match_count> bytes
1311 */
1312
1313 if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|COUNT)) {
1314 test_condition = IN_CLASS;
1315 } else {
1316 test_condition = NOT_IN_CLASS;
1317 }
1318 regexp++; /* point to the <class_length> byte */
1319
1320 get_match_counts(&nmust_match, &nextra_matches_allowed,
1321 regexp + (int)*regexp);
1322 while ((*stringp != '\0') &&
1323 (test_char_against_ascii_class(*stringp, regexp,
1324 test_condition) == CONDITION_TRUE) &&
1325 (nmust_match > 0)) {
1326 nmust_match--;
1327 stringp++;
1328 }
1329 if (nmust_match > 0) {
1330 return ((char *)0);
1331 } else if (nextra_matches_allowed == UNLIMITED) {
1332 repeat_startp = stringp;
1333 while ((*stringp != '\0') &&
1334 (test_char_against_ascii_class(*stringp, regexp,
1335 test_condition) == CONDITION_TRUE)) {
1336 stringp++;
1337 }
1338 regexp += (int)*regexp + 2;
1339 return (test_repeated_ascii_char(repeat_startp, stringp,
1340 regexp));
1341 } else {
1342 repeat_startp = stringp;
1343 while ((*stringp != '\0') &&
1344 (test_char_against_ascii_class(*stringp, regexp,
1345 test_condition) == CONDITION_TRUE) &&
1346 (nextra_matches_allowed > 0)) {
1347 nextra_matches_allowed--;
1348 stringp++;
1349 }
1350 regexp += (int)*regexp + 2;
1351 return (test_repeated_ascii_char(repeat_startp, stringp,
1352 regexp));
1353 }
1354 /* end case IN_ASCII_CHAR_CLASS|COUNT */
1355
1356 case IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */
1357 case NOT_IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE:
1358
1359 /*
1360 * encoded as <IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE>\
1361 * <class_length><class ...>
1362 * or <NOT_IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE>\
1363 * <class_length><class ...>
1364 *
1365 * NOTE: <class_length> includes the <class_length> byte
1366 */
1367
1368 if ((int)*regexp ==
1369 (int)(IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE)) {
1370 test_condition = IN_CLASS;
1371 } else {
1372 test_condition = NOT_IN_CLASS;
1373 }
1374 regexp++; /* point to the <class_length> byte */
1375
1376 repeat_startp = stringp;
1377 string_char_size = get_wchar(&string_wchar, stringp);
1378 while ((string_char_size > 0) &&
1379 (test_char_against_multibyte_class(string_wchar, regexp,
1380 test_condition) == CONDITION_TRUE)) {
1381 stringp += string_char_size;
1382 string_char_size = get_wchar(&string_wchar, stringp);
1383 }
1384 regexp += (int)*regexp; /* add the class length to regexp */
1385 return (test_repeated_multibyte_char(repeat_startp, stringp,
1386 regexp));
1387
1388 /* end case IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE */
1389
1390 case IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */
1391 case NOT_IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE:
1392
1393 /*
1394 * encoded as <IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE>\
1395 * <class_length><class ...>
1396 * or <NOT_IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE>\
1397 * <class_length><class ...>
1398 *
1399 * NOTE: <class_length> includes the <class_length> byte
1400 */
1401
1402 if ((int)*regexp ==
1403 (int)(IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE)) {
1404 test_condition = IN_CLASS;
1405 } else {
1406 test_condition = NOT_IN_CLASS;
1407 }
1408 regexp++; /* point to the <class_length> byte */
1409
1410 string_char_size = get_wchar(&string_wchar, stringp);
1411 if ((string_char_size <= 0) ||
1412 (test_char_against_multibyte_class(string_wchar, regexp,
1413 test_condition) != CONDITION_TRUE)) {
1414 return ((char *)0);
1415 } else {
1416 stringp += string_char_size;
1417 repeat_startp = stringp;
1418 string_char_size = get_wchar(&string_wchar, stringp);
1419 while ((string_char_size > 0) &&
1420 (test_char_against_multibyte_class(string_wchar,
1421 regexp, test_condition) == CONDITION_TRUE)) {
1422 stringp += string_char_size;
1423 string_char_size = get_wchar(&string_wchar, stringp);
1424 }
1425 regexp += (int)*regexp; /* add the class length to regexp */
1426 return (test_repeated_multibyte_char(repeat_startp, stringp,
1427 regexp));
1428 }
1429 /* end case IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE */
1430
1431 case IN_MULTIBYTE_CHAR_CLASS|COUNT: /* [...]{min_count,max_count} */
1432 case NOT_IN_MULTIBYTE_CHAR_CLASS|COUNT:
1433
1434 /*
1435 * encoded as <IN_MULTIBYTE_CHAR_CLASS|COUNT>\
1436 * <class_length><class ...><min_count><max_count>
1437 * or <NOT_IN_MULTIBYTE_CHAR_CLASS|COUNT>\
1438 * <class_length><class ...><min_count><max_count>
1439 *
1440 * NOTE: <class_length> includes the <class_length> byte
1441 * but not the <minimum_match_count> or
1442 * <maximum_match_count> bytes
1443 */
1444
1445 if ((int)*regexp == (int)(IN_MULTIBYTE_CHAR_CLASS|COUNT)) {
1446 test_condition = IN_CLASS;
1447 } else {
1448 test_condition = NOT_IN_CLASS;
1449 }
1450 regexp++; /* point to the <class_length> byte */
1451
1452 get_match_counts(&nmust_match, &nextra_matches_allowed,
1453 regexp + (int)*regexp);
1454 string_char_size = get_wchar(&string_wchar, stringp);
1455 while ((string_char_size > 0) &&
1456 (test_char_against_multibyte_class(string_wchar, regexp,
1457 test_condition) == CONDITION_TRUE) &&
1458 (nmust_match > 0)) {
1459 nmust_match--;
1460 stringp += string_char_size;
1461 string_char_size = get_wchar(&string_wchar, stringp);
1462 }
1463 if (nmust_match > 0) {
1464 return ((char *)0);
1465 } else if (nextra_matches_allowed == UNLIMITED) {
1466 repeat_startp = stringp;
1467 while ((string_char_size > 0) &&
1468 (test_char_against_multibyte_class(string_wchar,
1469 regexp, test_condition) == CONDITION_TRUE)) {
1470 stringp += string_char_size;
1471 string_char_size = get_wchar(&string_wchar, stringp);
1472 }
1473 regexp += (int)*regexp + 2;
1474 return (test_repeated_multibyte_char(repeat_startp, stringp,
1475 regexp));
1476 } else {
1477 repeat_startp = stringp;
1478 while ((string_char_size > 0) &&
1479 (test_char_against_multibyte_class(string_wchar,
1480 regexp, test_condition) == CONDITION_TRUE) &&
1481 (nextra_matches_allowed > 0)) {
1482 nextra_matches_allowed--;
1483 stringp += string_char_size;
1484 string_char_size = get_wchar(&string_wchar, stringp);
1485 }
1486 regexp += (int)*regexp + 2;
1487 return (test_repeated_multibyte_char(repeat_startp, stringp,
1488 regexp));
1489 }
1490 /* end case IN_MULTIBYTE_CHAR_CLASS|COUNT */
1491
1492 case IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */
1493 case NOT_IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE:
1494
1495 /*
1496 * encoded as <IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
1497 * <class_length><class ...>
1498 * or <NOT_IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
1499 * <class_length><class ...>
1500 *
1501 * NOTE: <class_length> includes the <class_length> byte
1502 */
1503
1504 if ((int)*regexp ==
1505 (int)(IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE)) {
1506 test_condition = IN_CLASS;
1507 } else {
1508 test_condition = NOT_IN_CLASS;
1509 }
1510 regexp++; /* point to the <class_length> byte */
1511
1512 repeat_startp = stringp;
1513 while ((*stringp != '\0') &&
1514 (test_char_against_old_ascii_class(*stringp, regexp,
1515 test_condition) == CONDITION_TRUE)) {
1516 stringp++;
1517 }
1518 regexp += (int)*regexp; /* add the class length to regexp */
1519 return (test_repeated_ascii_char(repeat_startp, stringp,
1520 regexp));
1521
1522 /* end case IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE */
1523
1524 case IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */
1525 case NOT_IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE:
1526
1527 /*
1528 * encoded as <IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE>\
1529 * <class_length><class ...>
1530 * or <NOT_IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE>\
1531 * <class_length><class ...>
1532 *
1533 * NOTE: <class length> includes the <class_length> byte
1534 */
1535
1536 if ((int)*regexp ==
1537 (int)(IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE)) {
1538 test_condition = IN_CLASS;
1539 } else {
1540 test_condition = NOT_IN_CLASS;
1541 }
1542 regexp++; /* point to the <class_length> byte */
1543
1544 if ((*stringp == '\0') ||
1545 (test_char_against_old_ascii_class(*stringp, regexp,
1546 test_condition) != CONDITION_TRUE)) {
1547 return ((char *)0);
1548 } else {
1549 stringp++;
1550 repeat_startp = stringp;
1551 while ((*stringp != '\0') &&
1552 (test_char_against_old_ascii_class(*stringp, regexp,
1553 test_condition) == CONDITION_TRUE)) {
1554 stringp++;
1555 }
1556 regexp += (int)*regexp; /* add the class length to regexp */
1557 return (test_repeated_ascii_char(repeat_startp, stringp,
1558 regexp));
1559 }
1560 /* end case IN_OLD_ASCII_CHAR_CLASS | ONE_OR_MORE */
1561
1562 case IN_OLD_ASCII_CHAR_CLASS|COUNT: /* [...]{min_count,max_count} */
1563 case NOT_IN_OLD_ASCII_CHAR_CLASS|COUNT:
1564
1565 /*
1566 * encoded as <IN_OLD_ASCII_CHAR_CLASS|COUNT><class_length>\
1567 * <class ...><minimum_match_count>\
1568 * <maximum_match_count>
1569 * or <NOT_IN_OLD_ASCII_CHAR_CLASS|COUNT>\
1570 * <class_length><class ...><minimum_match_count>\
1571 * <maximum_match_count>
1572 *
1573 * NOTE: <class_length> includes the <class_length> byte
1574 * but not the <minimum_match_count> or
1575 * <maximum_match_count> bytes
1576 */
1577
1578 if ((int)*regexp == (int)(IN_OLD_ASCII_CHAR_CLASS|COUNT)) {
1579 test_condition = IN_CLASS;
1580 } else {
1581 test_condition = NOT_IN_CLASS;
1582 }
1583 regexp++; /* point to the <class_length> byte */
1584
1585 get_match_counts(&nmust_match, &nextra_matches_allowed,
1586 regexp + (int)*regexp);
1587 while ((*stringp != '\0') &&
1588 (test_char_against_old_ascii_class(*stringp, regexp,
1589 test_condition) == CONDITION_TRUE) &&
1590 (nmust_match > 0)) {
1591 nmust_match--;
1592 stringp++;
1593 }
1594 if (nmust_match > 0) {
1595 return ((char *)0);
1596 } else if (nextra_matches_allowed == UNLIMITED) {
1597 repeat_startp = stringp;
1598 while ((*stringp != '\0') &&
1599 (test_char_against_old_ascii_class(*stringp, regexp,
1600 test_condition) == CONDITION_TRUE)) {
1601 stringp++;
1602 }
1603 regexp += (int)*regexp + 2;
1604 return (test_repeated_ascii_char(repeat_startp, stringp,
1605 regexp));
1606 } else {
1607 repeat_startp = stringp;
1608 while ((*stringp != '\0') &&
1609 (test_char_against_old_ascii_class(*stringp, regexp,
1610 test_condition) == CONDITION_TRUE) &&
1611 (nextra_matches_allowed > 0)) {
1612 nextra_matches_allowed--;
1613 stringp++;
1614 }
1615 regexp += (int)*regexp + 2;
1616 return (test_repeated_ascii_char(repeat_startp, stringp,
1617 regexp));
1618 }
1619 /* end case IN_OLD_ASCII_CHAR_CLASS|COUNT */
1620
1621 case ZERO_OR_MORE_GROUP: /* (.....)* */
1622 case ZERO_OR_MORE_GROUP|ADD_256_TO_GROUP_LENGTH:
1623 case ZERO_OR_MORE_GROUP|ADD_512_TO_GROUP_LENGTH:
1624 case ZERO_OR_MORE_GROUP|ADD_768_TO_GROUP_LENGTH:
1625
1626 /*
1627 * encoded as <ZERO_OR_MORE_GROUP|ADDED_LENGTH_BITS>\
1628 * <group_length><compiled_regex...>\
1629 * <END_GROUP|ZERO_OR_MORE><groupn>
1630 *
1631 * NOTE:
1632 *
1633 * group_length + (256 * ADDED_LENGTH_BITS) ==
1634 * length_of(<compiled_regex...><END_GROUP|ZERO_OR_MORE>\
1635 * <groupn>)
1636 *
1637 */
1638
1639 group_length =
1640 (((unsigned int)*regexp & ADDED_LENGTH_BITS) <<
1641 TIMES_256_SHIFT);
1642 regexp++;
1643 group_length += (unsigned int)*regexp;
1644 regexp++;
1645 repeat_startp = stringp;
1646 test_stringp = test_string(stringp, regexp);
1647 while (test_stringp != (char *)0) {
1648 if (push_stringp(stringp) == (char *)0)
1649 return ((char *)0);
1650 stringp = test_stringp;
1651 test_stringp = test_string(stringp, regexp);
1652 }
1653 regexp += group_length;
1654 return (test_repeated_group(repeat_startp, stringp, regexp));
1655
1656 /* end case ZERO_OR_MORE_GROUP */
1657
1658 case END_GROUP|ZERO_OR_MORE: /* (.....)* */
1659
1660 /* encoded as <END_GROUP|ZERO_OR_MORE> */
1661
1662 /* return from recursive call to test_string() */
1663
1664 return ((char *)stringp);
1665
1666 /* end case END_GROUP|ZERO_OR_MORE */
1667
1668 case ONE_OR_MORE_GROUP: /* (.....)+ */
1669 case ONE_OR_MORE_GROUP|ADD_256_TO_GROUP_LENGTH:
1670 case ONE_OR_MORE_GROUP|ADD_512_TO_GROUP_LENGTH:
1671 case ONE_OR_MORE_GROUP|ADD_768_TO_GROUP_LENGTH:
1672
1673 /*
1674 * encoded as <ONE_OR_MORE_GROUP|ADDED_LENGTH_BITS>\
1675 * <group_length><compiled_regex...>\
1676 * <END_GROUP|ONE_OR_MORE><groupn>
1677 *
1678 * NOTE:
1679 *
1680 * group_length + (256 * ADDED_LENGTH_BITS) ==
1681 * length_of(<compiled_regex...><END_GROUP|ONE_OR_MORE>\
1682 * <groupn>)
1683 */
1684
1685 group_length =
1686 (((unsigned int)*regexp & ADDED_LENGTH_BITS) <<
1687 TIMES_256_SHIFT);
1688 regexp++;
1689 group_length += (unsigned int)*regexp;
1690 regexp++;
1691 stringp = test_string(stringp, regexp);
1692 if (stringp == (char *)0)
1693 return ((char *)0);
1694 repeat_startp = stringp;
1695 test_stringp = test_string(stringp, regexp);
1696 while (test_stringp != (char *)0) {
1697 if (push_stringp(stringp) == (char *)0)
1698 return ((char *)0);
1699 stringp = test_stringp;
1700 test_stringp = test_string(stringp, regexp);
1701 }
1702 regexp += group_length;
1703 return (test_repeated_group(repeat_startp, stringp, regexp));
1704
1705 /* end case ONE_OR_MORE_GROUP */
1706
1707 case END_GROUP|ONE_OR_MORE: /* (.....)+ */
1708
1709 /* encoded as <END_GROUP|ONE_OR_MORE><groupn> */
1710
1711 /* return from recursive call to test_string() */
1712
1713 return ((char *)stringp);
1714
1715 /* end case END_GROUP|ONE_OR_MORE */
1716
1717 case COUNTED_GROUP: /* (.....){max_count,min_count} */
1718 case COUNTED_GROUP|ADD_256_TO_GROUP_LENGTH:
1719 case COUNTED_GROUP|ADD_512_TO_GROUP_LENGTH:
1720 case COUNTED_GROUP|ADD_768_TO_GROUP_LENGTH:
1721
1722 /*
1723 * encoded as <COUNTED_GROUP|ADDED_LENGTH_BITS><group_length>\
1724 * <compiled_regex...>\<END_GROUP|COUNT><groupn>\
1725 * <minimum_match_count><maximum_match_count>
1726 *
1727 * NOTE:
1728 *
1729 * group_length + (256 * ADDED_LENGTH_BITS) ==
1730 * length_of(<compiled_regex...><END_GROUP|COUNT><groupn>)
1731 *
1732 * but does not include the <minimum_match_count> or
1733 * <maximum_match_count> bytes
1734 */
1735
1736 group_length =
1737 (((unsigned int)*regexp & ADDED_LENGTH_BITS) <<
1738 TIMES_256_SHIFT);
1739 regexp++;
1740 group_length += (unsigned int)*regexp;
1741 regexp++;
1742 get_match_counts(&nmust_match, &nextra_matches_allowed,
1743 regexp + group_length);
1744 test_stringp = test_string(stringp, regexp);
1745 while ((test_stringp != (char *)0) && (nmust_match > 0)) {
1746 stringp = test_stringp;
1747 nmust_match--;
1748 test_stringp = test_string(stringp, regexp);
1749 }
1750 if (nmust_match > 0) {
1751 return ((char *)0);
1752 } else if (nextra_matches_allowed == UNLIMITED) {
1753 repeat_startp = stringp;
1754 while (test_stringp != (char *)0) {
1755 if (push_stringp(stringp) == (char *)0)
1756 return ((char *)0);
1757 stringp = test_stringp;
1758 test_stringp = test_string(stringp, regexp);
1759 }
1760 regexp += group_length + 2;
1761 return (test_repeated_group(repeat_startp, stringp,
1762 regexp));
1763 } else {
1764 repeat_startp = stringp;
1765 while ((test_stringp != (char *)0) &&
1766 (nextra_matches_allowed > 0)) {
1767 nextra_matches_allowed--;
1768 if (push_stringp(stringp) == (char *)0)
1769 return ((char *)0);
1770 stringp = test_stringp;
1771 test_stringp = test_string(stringp, regexp);
1772 }
1773 regexp += group_length + 2;
1774 return (test_repeated_group(repeat_startp, stringp,
1775 regexp));
1776 }
1777 /* end case COUNTED_GROUP */
1778
1779 case END_GROUP|COUNT: /* (.....){max_count,min_count} */
1780
1781 /* encoded as <END_GROUP|COUNT> */
1782
1783 /* return from recursive call to test_string() */
1784
1785 return (stringp);
1786
1787 /* end case END_GROUP|COUNT */
1788
1789 case END_OF_STRING_MARK:
1790
1791 /* encoded as <END_OF_STRING_MARK><END_REGEX> */
1792
1793 if (*stringp == '\0') {
1794 regexp++;
1795 } else {
1796 return ((char *)0);
1797 }
1798 break; /* end case END_OF_STRING_MARK */
1799
1800 case END_REGEX: /* end of the compiled regular expression */
1801
1802 /* encoded as <END_REGEX> */
1803
1804 return (stringp);
1805
1806 /* end case END_REGEX */
1807
1808 default:
1809
1810 return ((char *)0);
1811
1812 } /* end switch (*regexp) */
1813
1814 } /* end for (;;) */
1815
1816 } /* test_string() */
1817