1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /* Copyright (c) 1988 AT&T */
28 /* All Rights Reserved */
29
30 #include <sys/types.h>
31 #include <ctype.h>
32 #include <stdlib.h>
33 #include <limits.h>
34 #include <string.h>
35 #include <synch.h>
36 #include <thread.h>
37 #include <pthread.h>
38 #include <widec.h>
39 #include "_regexp.h"
40
41 #define ecmp(s1, s2, n) (strncmp(s1, s2, n) == 0)
42 #define Popwchar(p, l) mbtowc(&l, p, MB_LEN_MAX)
43 #define uletter(c) (isalpha(c) || c == '_')
44 #define _NBRA 128
45
46 char *loc1 = (char *)0, *loc2 = (char *)0, *locs = (char *)0;
47 char *braslist[_NBRA] = { (char *)0};
48 char *braelist[_NBRA] = { (char *)0};
49
50 #ifdef _REENTRANT
51 static thread_key_t key = THR_ONCE_KEY;
52 typedef struct _vars_storage {
53 char *loc1, *loc2, *locs;
54 char *braslist[_NBRA];
55 char *braelist[_NBRA];
56 } vars_storage;
57 #endif
58
59 static unsigned char _bittab[] = { 1, 2, 4, 8, 16, 32, 64, 128 };
60 static void getrnge(char *);
61 static int cclass(char *, char **, int);
62 static int low;
63 static int size;
64 static int _advance(char *, char *);
65 static char *start;
66
67 #ifdef _REENTRANT
68 static mutex_t lock = DEFAULTMUTEX;
69
70 static vars_storage *
_get_vars_storage(thread_key_t * keyp)71 _get_vars_storage(thread_key_t *keyp)
72 {
73 vars_storage *vars;
74
75 if (thr_keycreate_once(keyp, free) != 0)
76 return (NULL);
77 vars = pthread_getspecific(*keyp);
78 if (vars == NULL) {
79 vars = calloc(1, sizeof (vars_storage));
80 if (thr_setspecific(*keyp, vars) != 0) {
81 if (vars)
82 (void) free(vars);
83 vars = NULL;
84 }
85 }
86 return (vars);
87 }
88
89 char **
___braslist(void)90 ___braslist(void)
91 {
92 if (thr_main())
93 return ((char **)&braslist);
94 else {
95 vars_storage *vars = _get_vars_storage(&key);
96 return ((char **)&vars->braslist);
97 }
98 }
99
100 char **
___braelist(void)101 ___braelist(void)
102 {
103 if (thr_main())
104 return ((char **)&braelist);
105 else {
106 vars_storage *vars = _get_vars_storage(&key);
107 return ((char **)&vars->braelist);
108 }
109 }
110
111 char **
___loc1(void)112 ___loc1(void)
113 {
114 if (thr_main())
115 return (&loc1);
116 else {
117 vars_storage *vars = _get_vars_storage(&key);
118 return (&vars->loc1);
119 }
120 }
121
122 char **
___loc2(void)123 ___loc2(void)
124 {
125 if (thr_main())
126 return (&loc2);
127 else {
128 vars_storage *vars = _get_vars_storage(&key);
129 return (&vars->loc2);
130 }
131 }
132
133 char **
___locs(void)134 ___locs(void)
135 {
136 if (thr_main())
137 return (&locs);
138 else {
139 vars_storage *vars = _get_vars_storage(&key);
140 return (&vars->locs);
141 }
142 }
143
144 #undef braslist
145 #define braslist (___braslist())
146 #undef braelist
147 #define braelist (___braelist())
148 #undef loc1
149 #define loc1 (*(___loc1()))
150 #undef loc2
151 #define loc2 (*(___loc2()))
152 #undef locs
153 #define locs (*(___locs()))
154
155 #endif /* _REENTRANT */
156
157 int
step(char * p1,char * p2)158 step(char *p1, char *p2)
159 {
160 int c;
161 wchar_t cl;
162 int n;
163 int ret;
164
165 /* check if match is restricted to beginning of string */
166 (void) mutex_lock(&lock);
167 start = p1;
168 if (*p2++) {
169 loc1 = p1;
170 ret = _advance(p1, p2);
171 (void) mutex_unlock(&lock);
172 return (ret);
173 }
174 if (*p2 == CCHR) {
175 /* fast check for first character */
176 c = p2[1];
177 do {
178 if (*p1 != c)
179 continue;
180 if (_advance(p1, p2)) {
181 loc1 = p1;
182 (void) mutex_unlock(&lock);
183 return (1);
184 }
185 } while (*p1++);
186 } else if (multibyte)
187 do {
188 if (_advance(p1, p2)) {
189 loc1 = p1;
190 (void) mutex_unlock(&lock);
191 return (1);
192 }
193 n = Popwchar(p1, cl);
194 if (n < 0)
195 /* skip past illegal multibyte characters */
196 p1++;
197 else
198 p1 += n;
199 } while (n);
200 else
201 /* regular algorithm */
202 do {
203 if (_advance(p1, p2)) {
204 loc1 = p1;
205 (void) mutex_unlock(&lock);
206 return (1);
207 }
208 } while (*p1++);
209 (void) mutex_unlock(&lock);
210 return (0);
211 }
212
213 int
advance(char * lp,char * ep)214 advance(char *lp, char *ep)
215 {
216 int ret;
217
218 (void) mutex_lock(&lock);
219 /* ignore flag to see if expression is anchored */
220 start = lp;
221 ret = _advance(lp, ++ep);
222 (void) mutex_unlock(&lock);
223 return (ret);
224 }
225
226 static int
_advance(char * lp,char * ep)227 _advance(char *lp, char *ep)
228 {
229 char *rp;
230 char *curlp;
231 wchar_t c, d;
232 int n;
233 wchar_t cl;
234 int neg;
235 char *bbeg;
236 int ct;
237
238 for (;;) {
239 neg = 0;
240 switch (*ep++) {
241
242 case CCHR:
243 if (*ep++ == *lp++)
244 continue;
245 return (0);
246
247 case MCCHR:
248 ep += Popwchar(ep, cl);
249 c = cl;
250 if ((n = Popwchar(lp, cl)) <= 0 || c != cl)
251 return (0);
252 lp += n;
253 continue;
254
255 case CDOT:
256 /*
257 * match any characters except NULL
258 */
259 if ((n = Popwchar(lp, cl)) > 0) {
260 lp += n;
261 continue;
262 } else if (n < 0) {
263 lp++;
264 continue;
265 } else {
266 return (0);
267 }
268 case CDOL:
269 if (*lp == 0)
270 continue;
271 return (0);
272
273 case CCEOF:
274 loc2 = lp;
275 return (1);
276
277 case CCL:
278 c = (unsigned char)*lp++;
279 if (ISTHERE(c)) {
280 ep += 32;
281 continue;
282 }
283 return (0);
284
285 case NMCCL:
286 neg = 1;
287 /* FALLTHRU */
288
289 case MCCL:
290 rp = lp;
291 if (cclass(ep, &rp, neg) != 1)
292 return (0);
293 ep += *(ep + 32) + 32;
294 lp = rp;
295 continue;
296
297 case CBRA:
298 braslist[*ep++] = lp;
299 continue;
300
301 case CKET:
302 braelist[*ep++] = lp;
303 continue;
304
305 case MCCHR | RNGE:
306 ep += Popwchar(ep, cl);
307 c = cl;
308 getrnge(ep);
309 while (low--) {
310 if ((n = Popwchar(lp, cl)) <= 0 || cl != c)
311 return (0);
312 lp += n;
313 }
314 curlp = lp;
315 while (size--) {
316 if ((n = Popwchar(lp, cl)) <= 0 || cl != c)
317 break;
318 lp += n;
319 }
320 if (size < 0)
321 n = Popwchar(lp, cl);
322 if (n == -1)
323 return (0);
324 lp += (n ? n : 1);
325 ep += 2;
326 goto mstar;
327
328 case CCHR | RNGE:
329 c = *ep++;
330 getrnge(ep);
331 while (low--)
332 if (*lp++ != c)
333 return (0);
334 curlp = lp;
335 while (size--)
336 if (*lp++ != c)
337 break;
338 if (size < 0)
339 lp++;
340 ep += 2;
341 goto star;
342
343 case CDOT | RNGE:
344 getrnge(ep);
345 while (low--) {
346 if ((n = Popwchar(lp, cl)) > 0) {
347 lp += n;
348 } else if (n < 0) {
349 lp++;
350 } else {
351 return (0);
352 }
353 }
354 curlp = lp;
355 while (size--) {
356 if ((n = Popwchar(lp, cl)) > 0) {
357 lp += n;
358 } else if (n < 0) {
359 lp++;
360 } else {
361 break;
362 }
363 }
364 if (size < 0)
365 n = Popwchar(lp, cl);
366 if (n > 0) {
367 lp += n;
368 } else {
369 lp++;
370 }
371 ep += 2;
372 goto mstar;
373
374 case NMCCL | RNGE:
375 neg = 1;
376 /* FALLTHRU */
377
378 case MCCL | RNGE:
379 getrnge(ep + *(ep + 32) + 32);
380 rp = lp;
381 while (low--) {
382 if (cclass(ep, &rp, neg) != 1)
383 return (0);
384 }
385 curlp = rp;
386 while (size-- && (c = (cclass(ep, &rp, neg))) == 1)
387 ;
388 if (c == -1)
389 return (0);
390 lp = rp;
391 if (size < 0) {
392 if ((n = Popwchar(lp, cl)) == -1)
393 return (0);
394 lp += (n ? n : 1);
395 }
396 ep += *(ep + 32) + 34;
397 goto mstar;
398
399 case CCL | RNGE:
400 getrnge(ep + 32);
401 while (low--) {
402 c = (unsigned char)*lp++;
403 if (!ISTHERE(c))
404 return (0);
405 }
406 curlp = lp;
407 while (size--) {
408 c = (unsigned char)*lp++;
409 if (!ISTHERE(c))
410 break;
411 }
412 if (size < 0)
413 lp++;
414 ep += 34; /* 32 + 2 */
415 goto star;
416
417 case CBACK:
418 bbeg = braslist[*ep];
419 ct = (int)(braelist[*ep++] - bbeg);
420
421 if (ecmp(bbeg, lp, ct)) {
422 lp += ct;
423 continue;
424 }
425 return (0);
426
427 case CBACK | STAR:
428 bbeg = braslist[*ep];
429 ct = (int)(braelist[*ep++] - bbeg);
430 curlp = lp;
431 while (ecmp(bbeg, lp, ct))
432 lp += ct;
433
434 while (lp >= curlp) {
435 if (_advance(lp, ep))
436 return (1);
437 lp -= ct;
438 }
439 return (0);
440
441 case CDOT | STAR:
442 curlp = lp;
443 if (!multibyte)
444 while (*lp++)
445 ;
446 else {
447 for (;;) {
448 n = Popwchar(lp, cl);
449 if (n > 0) {
450 lp += n;
451 } else if (n < 0) {
452 lp++;
453 } else {
454 lp++;
455 break;
456 }
457 }
458 }
459 goto mstar;
460
461 case CCHR | STAR:
462 curlp = lp;
463 while (*lp++ == *ep)
464 ;
465 ep++;
466 goto star;
467
468 case MCCHR | STAR:
469 curlp = lp;
470 ep += Popwchar(ep, cl);
471 c = cl;
472 while ((n = Popwchar(lp, cl)) > 0 && cl == c)
473 lp += n;
474 if (n == -1)
475 return (0);
476 lp += (n ? n : 1);
477 goto mstar;
478
479 case NMCCL | STAR:
480 neg = 1;
481 /* FALLTHRU */
482
483 case MCCL | STAR:
484 curlp = rp = lp;
485 while ((d = cclass(ep, &rp, neg)) == 1)
486 ;
487 if (d == -1)
488 return (0);
489 lp = rp;
490 ep += *(ep + 32) + 32;
491 goto mstar;
492
493 case CCL | STAR:
494 curlp = lp;
495 do {
496 c = (unsigned char)*lp++;
497 } while (ISTHERE(c));
498 ep += 32;
499 goto star;
500
501 case CBRC:
502 if (lp == start && locs == (char *)0)
503 continue;
504 c = (unsigned char)*lp;
505 d = (unsigned char)*(lp-1);
506 if ((isdigit((int)c) || uletter((int)c) || c >= 0200 &&
507 MB_CUR_MAX > 1) && !isdigit((int)d) &&
508 !uletter((int)d) &&
509 (d < 0200 || MB_CUR_MAX == 1))
510 continue;
511 return (0);
512
513 case CLET:
514 d = (unsigned char)*lp;
515 if (!isdigit((int)d) && !uletter((int)d) && (d < 0200 ||
516 MB_CUR_MAX == 1))
517 continue;
518 return (0);
519
520 default:
521 return (0);
522 }
523 }
524
525 mstar:
526 if (multibyte) {
527 /* MB_CUR_MAX > 1 */
528 if ((eucw1 != 0) || (eucw2 != 0) || (eucw3 != 0)) {
529 /* EUC locale */
530 do {
531 char *p1, *p2;
532 lp--;
533 p1 = lp - eucw2;
534 p2 = lp - eucw3;
535 /* check if previous character is from */
536 /* supplementary code sets 1, 2, or 3 and */
537 /* back up appropriate number of bytes */
538 if ((unsigned char)*lp >= 0200) {
539 if (p1 >= curlp &&
540 (unsigned char)*p1 == SS2)
541 lp = p1;
542 else if (p2 >= curlp &&
543 (unsigned char)*p2 == SS3)
544 lp = p2;
545 else
546 lp = lp - eucw1 + 1;
547 }
548 if (lp == locs)
549 break;
550 if (_advance(lp, ep))
551 return (1);
552 } while (lp > curlp);
553 return (0);
554 } else {
555 /* Anything else */
556 do {
557 int len;
558 char *p1, *p2;
559
560 p2 = curlp;
561 do {
562 p1 = p2;
563 if (isascii(*p1)) {
564 p2 = p1 + 1;
565 } else {
566 len = mblen(p1, MB_CUR_MAX);
567 if (len == -1) {
568 len = 1;
569 }
570 p2 = p1 + len;
571 }
572 if (p2 > lp) {
573 /* something is wrong */
574 return (0);
575 }
576 } while (p2 != lp);
577 lp = p1;
578 if (lp == locs)
579 break;
580 if (_advance(lp, ep))
581 return (1);
582 } while (lp > curlp);
583 return (0);
584 }
585 }
586 star:
587 do {
588 if (--lp == locs)
589 break;
590 if (_advance(lp, ep))
591 return (1);
592 } while (lp > curlp);
593 return (0);
594 }
595
596 static void
getrnge(char * str)597 getrnge(char *str)
598 {
599 low = *str++ & 0377;
600 size = (*str == (char)255)? 20000: (*str &0377) - low;
601 }
602
603 static int
cclass(char * ep,char ** rp,int neg)604 cclass(char *ep, char **rp, int neg)
605 {
606 char *lp;
607 wchar_t c, d, f = 0;
608 int n;
609 wchar_t cl;
610 char *endep;
611
612 lp = *rp;
613 if ((n = Popwchar(lp, cl)) == -1)
614 return (-1);
615 *rp = lp + (n ? n : 1);
616 c = cl;
617 /* look for eight bit characters in bitmap */
618 if (c <= 0177 || c <= 0377 && iscntrl((int)c))
619 return (ISTHERE(c) && !neg || !ISTHERE(c) && neg);
620 else {
621 /* look past bitmap for multibyte characters */
622 endep = *(ep + 32) + ep + 32;
623 ep += 33;
624 for (;;) {
625 if (ep >= endep)
626 return (neg);
627 ep += Popwchar(ep, cl);
628 d = cl;
629 if (d == '-') {
630 ep += Popwchar(ep, cl);
631 d = cl;
632 if (f <= c && c <= d)
633 return (!neg);
634 }
635 if (d == c)
636 return (!neg);
637 f = d;
638 }
639 }
640 /*NOTREACHED*/
641 }
642