1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /* Copyright (c) 1988 AT&T */
28 /* All Rights Reserved */
29
30 #pragma ident "%Z%%M% %I% %E% SMI"
31
32 #include <sys/types.h>
33 #include <ctype.h>
34 #include <stdlib.h>
35 #include <limits.h>
36 #include <string.h>
37 #include <synch.h>
38 #include <thread.h>
39 #include <pthread.h>
40 #include <widec.h>
41 #include "_regexp.h"
42
43 #define ecmp(s1, s2, n) (strncmp(s1, s2, n) == 0)
44 #define Popwchar(p, l) mbtowc(&l, p, MB_LEN_MAX)
45 #define uletter(c) (isalpha(c) || c == '_')
46 #define _NBRA 128
47
48 char *loc1 = (char *)0, *loc2 = (char *)0, *locs = (char *)0;
49 char *braslist[_NBRA] = { (char *)0};
50 char *braelist[_NBRA] = { (char *)0};
51
52 #ifdef _REENTRANT
53 static thread_key_t key = THR_ONCE_KEY;
54 typedef struct _vars_storage {
55 char *loc1, *loc2, *locs;
56 char *braslist[_NBRA];
57 char *braelist[_NBRA];
58 } vars_storage;
59 #endif
60
61 static unsigned char _bittab[] = { 1, 2, 4, 8, 16, 32, 64, 128 };
62 static void getrnge(char *);
63 static int cclass(char *, char **, int);
64 static int low;
65 static int size;
66 static int _advance(char *, char *);
67 static char *start;
68
69 #ifdef _REENTRANT
70 static mutex_t lock = DEFAULTMUTEX;
71
72 static vars_storage *
_get_vars_storage(thread_key_t * keyp)73 _get_vars_storage(thread_key_t *keyp)
74 {
75 vars_storage *vars;
76
77 if (thr_keycreate_once(keyp, free) != 0)
78 return (NULL);
79 vars = pthread_getspecific(*keyp);
80 if (vars == NULL) {
81 vars = calloc(1, sizeof (vars_storage));
82 if (thr_setspecific(*keyp, vars) != 0) {
83 if (vars)
84 (void) free(vars);
85 vars = NULL;
86 }
87 }
88 return (vars);
89 }
90
91 char **
___braslist(void)92 ___braslist(void)
93 {
94 if (thr_main())
95 return ((char **)&braslist);
96 else {
97 vars_storage *vars = _get_vars_storage(&key);
98 return ((char **)&vars->braslist);
99 }
100 }
101
102 char **
___braelist(void)103 ___braelist(void)
104 {
105 if (thr_main())
106 return ((char **)&braelist);
107 else {
108 vars_storage *vars = _get_vars_storage(&key);
109 return ((char **)&vars->braelist);
110 }
111 }
112
113 char **
___loc1(void)114 ___loc1(void)
115 {
116 if (thr_main())
117 return (&loc1);
118 else {
119 vars_storage *vars = _get_vars_storage(&key);
120 return (&vars->loc1);
121 }
122 }
123
124 char **
___loc2(void)125 ___loc2(void)
126 {
127 if (thr_main())
128 return (&loc2);
129 else {
130 vars_storage *vars = _get_vars_storage(&key);
131 return (&vars->loc2);
132 }
133 }
134
135 char **
___locs(void)136 ___locs(void)
137 {
138 if (thr_main())
139 return (&locs);
140 else {
141 vars_storage *vars = _get_vars_storage(&key);
142 return (&vars->locs);
143 }
144 }
145
146 #undef braslist
147 #define braslist (___braslist())
148 #undef braelist
149 #define braelist (___braelist())
150 #undef loc1
151 #define loc1 (*(___loc1()))
152 #undef loc2
153 #define loc2 (*(___loc2()))
154 #undef locs
155 #define locs (*(___locs()))
156
157 #endif /* _REENTRANT */
158
159 int
step(char * p1,char * p2)160 step(char *p1, char *p2)
161 {
162 int c;
163 wchar_t cl;
164 int n;
165 int ret;
166
167 /* check if match is restricted to beginning of string */
168 (void) mutex_lock(&lock);
169 start = p1;
170 if (*p2++) {
171 loc1 = p1;
172 ret = _advance(p1, p2);
173 (void) mutex_unlock(&lock);
174 return (ret);
175 }
176 if (*p2 == CCHR) {
177 /* fast check for first character */
178 c = p2[1];
179 do {
180 if (*p1 != c)
181 continue;
182 if (_advance(p1, p2)) {
183 loc1 = p1;
184 (void) mutex_unlock(&lock);
185 return (1);
186 }
187 } while (*p1++);
188 } else if (multibyte)
189 do {
190 if (_advance(p1, p2)) {
191 loc1 = p1;
192 (void) mutex_unlock(&lock);
193 return (1);
194 }
195 n = Popwchar(p1, cl);
196 if (n < 0)
197 /* skip past illegal multibyte characters */
198 p1++;
199 else
200 p1 += n;
201 } while (n);
202 else
203 /* regular algorithm */
204 do {
205 if (_advance(p1, p2)) {
206 loc1 = p1;
207 (void) mutex_unlock(&lock);
208 return (1);
209 }
210 } while (*p1++);
211 (void) mutex_unlock(&lock);
212 return (0);
213 }
214
215 int
advance(char * lp,char * ep)216 advance(char *lp, char *ep)
217 {
218 int ret;
219
220 (void) mutex_lock(&lock);
221 /* ignore flag to see if expression is anchored */
222 start = lp;
223 ret = _advance(lp, ++ep);
224 (void) mutex_unlock(&lock);
225 return (ret);
226 }
227
228 static int
_advance(char * lp,char * ep)229 _advance(char *lp, char *ep)
230 {
231 char *rp;
232 char *curlp;
233 wchar_t c, d;
234 int n;
235 wchar_t cl;
236 int neg;
237 char *bbeg;
238 int ct;
239
240 for (;;) {
241 neg = 0;
242 switch (*ep++) {
243
244 case CCHR:
245 if (*ep++ == *lp++)
246 continue;
247 return (0);
248
249 case MCCHR:
250 ep += Popwchar(ep, cl);
251 c = cl;
252 if ((n = Popwchar(lp, cl)) <= 0 || c != cl)
253 return (0);
254 lp += n;
255 continue;
256
257 case CDOT:
258 /*
259 * match any characters except NULL
260 */
261 if ((n = Popwchar(lp, cl)) > 0) {
262 lp += n;
263 continue;
264 } else if (n < 0) {
265 lp++;
266 continue;
267 } else {
268 return (0);
269 }
270 case CDOL:
271 if (*lp == 0)
272 continue;
273 return (0);
274
275 case CCEOF:
276 loc2 = lp;
277 return (1);
278
279 case CCL:
280 c = (unsigned char)*lp++;
281 if (ISTHERE(c)) {
282 ep += 32;
283 continue;
284 }
285 return (0);
286
287 case NMCCL:
288 neg = 1;
289 /* FALLTHRU */
290
291 case MCCL:
292 rp = lp;
293 if (cclass(ep, &rp, neg) != 1)
294 return (0);
295 ep += *(ep + 32) + 32;
296 lp = rp;
297 continue;
298
299 case CBRA:
300 braslist[*ep++] = lp;
301 continue;
302
303 case CKET:
304 braelist[*ep++] = lp;
305 continue;
306
307 case MCCHR | RNGE:
308 ep += Popwchar(ep, cl);
309 c = cl;
310 getrnge(ep);
311 while (low--) {
312 if ((n = Popwchar(lp, cl)) <= 0 || cl != c)
313 return (0);
314 lp += n;
315 }
316 curlp = lp;
317 while (size--) {
318 if ((n = Popwchar(lp, cl)) <= 0 || cl != c)
319 break;
320 lp += n;
321 }
322 if (size < 0)
323 n = Popwchar(lp, cl);
324 if (n == -1)
325 return (0);
326 lp += (n ? n : 1);
327 ep += 2;
328 goto mstar;
329
330 case CCHR | RNGE:
331 c = *ep++;
332 getrnge(ep);
333 while (low--)
334 if (*lp++ != c)
335 return (0);
336 curlp = lp;
337 while (size--)
338 if (*lp++ != c)
339 break;
340 if (size < 0)
341 lp++;
342 ep += 2;
343 goto star;
344
345 case CDOT | RNGE:
346 getrnge(ep);
347 while (low--) {
348 if ((n = Popwchar(lp, cl)) > 0) {
349 lp += n;
350 } else if (n < 0) {
351 lp++;
352 } else {
353 return (0);
354 }
355 }
356 curlp = lp;
357 while (size--) {
358 if ((n = Popwchar(lp, cl)) > 0) {
359 lp += n;
360 } else if (n < 0) {
361 lp++;
362 } else {
363 break;
364 }
365 }
366 if (size < 0)
367 n = Popwchar(lp, cl);
368 if (n > 0) {
369 lp += n;
370 } else {
371 lp++;
372 }
373 ep += 2;
374 goto mstar;
375
376 case NMCCL | RNGE:
377 neg = 1;
378 /* FALLTHRU */
379
380 case MCCL | RNGE:
381 getrnge(ep + *(ep + 32) + 32);
382 rp = lp;
383 while (low--) {
384 if (cclass(ep, &rp, neg) != 1)
385 return (0);
386 }
387 curlp = rp;
388 while (size-- && (c = (cclass(ep, &rp, neg))) == 1)
389 ;
390 if (c == -1)
391 return (0);
392 lp = rp;
393 if (size < 0) {
394 if ((n = Popwchar(lp, cl)) == -1)
395 return (0);
396 lp += (n ? n : 1);
397 }
398 ep += *(ep + 32) + 34;
399 goto mstar;
400
401 case CCL | RNGE:
402 getrnge(ep + 32);
403 while (low--) {
404 c = (unsigned char)*lp++;
405 if (!ISTHERE(c))
406 return (0);
407 }
408 curlp = lp;
409 while (size--) {
410 c = (unsigned char)*lp++;
411 if (!ISTHERE(c))
412 break;
413 }
414 if (size < 0)
415 lp++;
416 ep += 34; /* 32 + 2 */
417 goto star;
418
419 case CBACK:
420 bbeg = braslist[*ep];
421 ct = (int)(braelist[*ep++] - bbeg);
422
423 if (ecmp(bbeg, lp, ct)) {
424 lp += ct;
425 continue;
426 }
427 return (0);
428
429 case CBACK | STAR:
430 bbeg = braslist[*ep];
431 ct = (int)(braelist[*ep++] - bbeg);
432 curlp = lp;
433 while (ecmp(bbeg, lp, ct))
434 lp += ct;
435
436 while (lp >= curlp) {
437 if (_advance(lp, ep))
438 return (1);
439 lp -= ct;
440 }
441 return (0);
442
443 case CDOT | STAR:
444 curlp = lp;
445 if (!multibyte)
446 while (*lp++)
447 ;
448 else {
449 for (;;) {
450 n = Popwchar(lp, cl);
451 if (n > 0) {
452 lp += n;
453 } else if (n < 0) {
454 lp++;
455 } else {
456 lp++;
457 break;
458 }
459 }
460 }
461 goto mstar;
462
463 case CCHR | STAR:
464 curlp = lp;
465 while (*lp++ == *ep)
466 ;
467 ep++;
468 goto star;
469
470 case MCCHR | STAR:
471 curlp = lp;
472 ep += Popwchar(ep, cl);
473 c = cl;
474 while ((n = Popwchar(lp, cl)) > 0 && cl == c)
475 lp += n;
476 if (n == -1)
477 return (0);
478 lp += (n ? n : 1);
479 goto mstar;
480
481 case NMCCL | STAR:
482 neg = 1;
483 /* FALLTHRU */
484
485 case MCCL | STAR:
486 curlp = rp = lp;
487 while ((d = cclass(ep, &rp, neg)) == 1)
488 ;
489 if (d == -1)
490 return (0);
491 lp = rp;
492 ep += *(ep + 32) + 32;
493 goto mstar;
494
495 case CCL | STAR:
496 curlp = lp;
497 do {
498 c = (unsigned char)*lp++;
499 } while (ISTHERE(c));
500 ep += 32;
501 goto star;
502
503 case CBRC:
504 if (lp == start && locs == (char *)0)
505 continue;
506 c = (unsigned char)*lp;
507 d = (unsigned char)*(lp-1);
508 if ((isdigit((int)c) || uletter((int)c) || c >= 0200 &&
509 MB_CUR_MAX > 1) && !isdigit((int)d) &&
510 !uletter((int)d) &&
511 (d < 0200 || MB_CUR_MAX == 1))
512 continue;
513 return (0);
514
515 case CLET:
516 d = (unsigned char)*lp;
517 if (!isdigit((int)d) && !uletter((int)d) && (d < 0200 ||
518 MB_CUR_MAX == 1))
519 continue;
520 return (0);
521
522 default:
523 return (0);
524 }
525 }
526
527 mstar:
528 if (multibyte) {
529 /* MB_CUR_MAX > 1 */
530 if ((eucw1 != 0) || (eucw2 != 0) || (eucw3 != 0)) {
531 /* EUC locale */
532 do {
533 char *p1, *p2;
534 lp--;
535 p1 = lp - eucw2;
536 p2 = lp - eucw3;
537 /* check if previous character is from */
538 /* supplementary code sets 1, 2, or 3 and */
539 /* back up appropriate number of bytes */
540 if ((unsigned char)*lp >= 0200) {
541 if (p1 >= curlp &&
542 (unsigned char)*p1 == SS2)
543 lp = p1;
544 else if (p2 >= curlp &&
545 (unsigned char)*p2 == SS3)
546 lp = p2;
547 else
548 lp = lp - eucw1 + 1;
549 }
550 if (lp == locs)
551 break;
552 if (_advance(lp, ep))
553 return (1);
554 } while (lp > curlp);
555 return (0);
556 } else {
557 /* Anything else */
558 do {
559 int len;
560 char *p1, *p2;
561
562 p2 = curlp;
563 do {
564 p1 = p2;
565 if (isascii(*p1)) {
566 p2 = p1 + 1;
567 } else {
568 len = mblen(p1, MB_CUR_MAX);
569 if (len == -1) {
570 len = 1;
571 }
572 p2 = p1 + len;
573 }
574 if (p2 > lp) {
575 /* something is wrong */
576 return (0);
577 }
578 } while (p2 != lp);
579 lp = p1;
580 if (lp == locs)
581 break;
582 if (_advance(lp, ep))
583 return (1);
584 } while (lp > curlp);
585 return (0);
586 }
587 }
588 star:
589 do {
590 if (--lp == locs)
591 break;
592 if (_advance(lp, ep))
593 return (1);
594 } while (lp > curlp);
595 return (0);
596 }
597
598 static void
getrnge(char * str)599 getrnge(char *str)
600 {
601 low = *str++ & 0377;
602 size = (*str == (char)255)? 20000: (*str &0377) - low;
603 }
604
605 static int
cclass(char * ep,char ** rp,int neg)606 cclass(char *ep, char **rp, int neg)
607 {
608 char *lp;
609 wchar_t c, d, f = 0;
610 int n;
611 wchar_t cl;
612 char *endep;
613
614 lp = *rp;
615 if ((n = Popwchar(lp, cl)) == -1)
616 return (-1);
617 *rp = lp + (n ? n : 1);
618 c = cl;
619 /* look for eight bit characters in bitmap */
620 if (c <= 0177 || c <= 0377 && iscntrl((int)c))
621 return (ISTHERE(c) && !neg || !ISTHERE(c) && neg);
622 else {
623 /* look past bitmap for multibyte characters */
624 endep = *(ep + 32) + ep + 32;
625 ep += 33;
626 for (;;) {
627 if (ep >= endep)
628 return (neg);
629 ep += Popwchar(ep, cl);
630 d = cl;
631 if (d == '-') {
632 ep += Popwchar(ep, cl);
633 d = cl;
634 if (f <= c && c <= d)
635 return (!neg);
636 }
637 if (d == c)
638 return (!neg);
639 f = d;
640 }
641 }
642 /*NOTREACHED*/
643 }
644