1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /* Copyright (c) 1988 AT&T */
28 /* All Rights Reserved */
29
30 #include <sys/types.h>
31 #include <ctype.h>
32 #include <limits.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <thread.h>
36 #include <pthread.h>
37 #include <widec.h> /* Defines multibyte and WCHAR_CSMASK for valid_range(). */
38 #include "_range.h"
39 #include "_regexp.h"
40
41 #define MBYTE_SIZE 255
42
43 #define GETC() ((unsigned char)*sp++)
44 #define PEEKC() ((unsigned char)*sp)
45 #define ERROR(c) { \
46 regerrno = c; \
47 goto out; \
48 }
49
50 #define Popwchar oldsp = sp; \
51 if (!multibyte || (unsigned char) *sp <= 0177) { \
52 n = 1; \
53 c = (wchar_t)(unsigned char)*sp++; \
54 } else { \
55 if ((n = mbtowc(&cl, sp, MB_LEN_MAX)) == -1) \
56 ERROR(67) \
57 sp += n; \
58 c = cl; \
59 }
60
61 int nbra = 0, regerrno = 0, reglength = 0;
62
63 static unsigned char _bittab[] = { 1, 2, 4, 8, 16, 32, 64, 128 };
64
65 #ifdef _REENTRANT
66 typedef struct _vars_storage {
67 int nbra, regerrno, reglength;
68 } vars_storage;
69
70 static thread_key_t key = THR_ONCE_KEY;
71
72 static vars_storage *
_get_vars_storage(thread_key_t * keyp)73 _get_vars_storage(thread_key_t *keyp)
74 {
75 vars_storage *vars;
76
77 if (thr_keycreate_once(keyp, free) != 0)
78 return (NULL);
79 vars = pthread_getspecific(*keyp);
80 if (vars == NULL) {
81 vars = calloc(1, sizeof (vars_storage));
82 if (thr_setspecific(*keyp, vars) != 0) {
83 if (vars)
84 (void) free(vars);
85 vars = NULL;
86 }
87 }
88 return (vars);
89 }
90
91 int *
___nbra(void)92 ___nbra(void)
93 {
94 if (thr_main())
95 return (&nbra);
96 else {
97 vars_storage *vars = _get_vars_storage(&key);
98 return (&vars->nbra);
99 }
100 }
101
102 int *
___regerrno(void)103 ___regerrno(void)
104 {
105 if (thr_main())
106 return (®errno);
107 else {
108 vars_storage *vars = _get_vars_storage(&key);
109 return (&vars->regerrno);
110 }
111 }
112
113 int *
___reglength(void)114 ___reglength(void)
115 {
116 if (thr_main())
117 return (®length);
118 else {
119 vars_storage *vars = _get_vars_storage(&key);
120 return (&vars->reglength);
121 }
122 }
123
124 #undef nbra
125 #define nbra (*(___nbra()))
126 #undef regerrno
127 #define regerrno (*(___regerrno()))
128 #undef reglength
129 #define reglength (*(___reglength()))
130
131 #endif /* _REENTRANT */
132
133 char *_compile(const char *, char *, char *, int);
134
135 char *
compile(const char * sp,char * ep,char * endbuf)136 compile(const char *sp, char *ep, char *endbuf)
137 {
138 return (_compile(sp, ep, endbuf, 0));
139 }
140
141 char *
_compile(const char * sp,char * ep,char * endbuf,int viflag)142 _compile(const char *sp, char *ep, char *endbuf, int viflag)
143 {
144 wchar_t c;
145 int n;
146 wchar_t d;
147 const char *oldsp;
148 char *lastep;
149 int cclcnt;
150 char bracket[NBRA], *bracketp;
151 int closed;
152 int neg;
153 int alloc;
154 wchar_t lc, cl;
155 int i, cflg;
156 char *expbuf = ep;
157 char *start;
158
159 regerrno = 0;
160 reglength = 0;
161 lastep = 0;
162 bracketp = bracket;
163 closed = 0;
164 alloc = 0;
165
166 oldsp = sp;
167 if ((c = *sp++) == '\0') {
168 if (ep == (char *)0 || ep[1] == 0)
169 ERROR(41);
170 goto out;
171 }
172 nbra = 0;
173 if (ep == (char *)0) {
174 /* malloc space */
175 const char *startsp = oldsp;
176 n = 0;
177 while ((d = *startsp++) != 0) {
178 if (d == '[')
179 n += 33; /* add room for bitmaps */
180 }
181 n += 2 * (startsp - oldsp) + 3;
182 if ((ep = malloc(n)) == (char *)0)
183 ERROR(50);
184 expbuf = ep;
185 alloc = 1;
186 endbuf = ep + n;
187 }
188
189 if (c == '^')
190 *ep++ = 1;
191 else {
192 *ep++ = 0;
193 sp--;
194 }
195
196 endbuf--; /* avoid extra check for overflow */
197 for (;;) {
198 if (ep >= endbuf)
199 ERROR(50);
200 Popwchar;
201 if (c != '*' && ((c != '\\') || (PEEKC() != '{')))
202 lastep = ep;
203 if (c == '\0') {
204 *ep++ = CCEOF;
205 if (bracketp != bracket)
206 ERROR(42);
207 goto out;
208 }
209 switch (c) {
210
211 case '.':
212 *ep++ = CDOT;
213 continue;
214
215 case '*':
216 if (lastep == 0 || *lastep == CBRA ||*lastep == CKET ||
217 *lastep == CBRC || *lastep == CLET)
218 goto defchar;
219 *lastep |= STAR;
220 continue;
221
222 case '$':
223 /* look one character ahead to see if $ means */
224 /* to anchor match at end of line */
225 if ((d = PEEKC()) != '\0')
226 goto defchar;
227 *ep++ = CDOL;
228 continue;
229
230 case '[':
231 start = ep + 34;
232 if (start > endbuf)
233 ERROR(50);
234
235 *ep++ = CCL;
236 lc = 0;
237 for (i = 0; i < 32; i++)
238 ep[i] = 0;
239
240 neg = 0;
241 Popwchar;
242 if (c == '^') {
243 neg = 1;
244 Popwchar;
245 }
246 if (multibyte) {
247 if (neg) {
248 /* do not negate bitmap for */
249 /* for multibyte characters */
250 neg = 0;
251 ep[-1] = NMCCL;
252 /* turn off null byte */
253 ep[0] |= 01;
254 } else
255 ep[-1] = MCCL;
256 }
257 do {
258 if (c == '\0')
259 ERROR(49);
260 if (c == '-' && lc != 0) {
261 Popwchar;
262 if (c == '\0')
263 ERROR(49);
264 if (c == ']') {
265 PLACE('-');
266 break;
267 }
268 /*
269 * ranges do not span code sets
270 */
271 if (!multibyte || c <= 0177)
272 while (lc < c) {
273 PLACE(lc);
274 lc++;
275 }
276 else
277 if (valid_range(lc, c) && lc < c)
278 /* insert '-' for range */
279 *start++ = '-';
280 if (viflag & 1)
281 lc = 0;
282 else
283 lc = c;
284 } else
285 if (c == '\\' && (viflag & 1) &&
286 strchr("\\^-]", PEEKC())) {
287 c = GETC();
288 lc = c;
289 } else
290 lc = c;
291 /* put eight bit characters into bitmap */
292 if (!multibyte || c <= 0177 || c <= 0377 &&
293 iscntrl((int)c))
294 PLACE(c);
295 else {
296 /*
297 * insert individual bytes of
298 * multibyte characters after
299 * bitmap
300 */
301 if (start + n > endbuf)
302 ERROR(50);
303 while (n--)
304 *start++ = *oldsp++;
305 }
306 Popwchar;
307 } while (c != ']');
308
309 if (neg) {
310 for (cclcnt = 0; cclcnt < 32; cclcnt++)
311 ep[cclcnt] ^= 0377;
312 ep[0] &= 0376;
313 }
314 ep += 32;
315 if (multibyte) {
316 /*
317 * Only allow 256 bytes to
318 * represent multibyte characters
319 * character class
320 */
321 if (start - ep > MBYTE_SIZE)
322 ERROR(50);
323 *ep = (char)(start - ep);
324 ep = start;
325 }
326 continue;
327
328 case '\\':
329 Popwchar;
330 switch (c) {
331
332 case '(':
333 if (nbra >= NBRA)
334 ERROR(43);
335 *bracketp++ = nbra;
336 *ep++ = CBRA;
337 *ep++ = nbra++;
338 continue;
339
340 case ')':
341 if (bracketp <= bracket)
342 ERROR(42);
343 *ep++ = CKET;
344 *ep++ = *--bracketp;
345 closed++;
346 continue;
347
348 case '{':
349 if (lastep == (char *)0)
350 goto defchar;
351 *lastep |= RNGE;
352 cflg = 0;
353 c = GETC();
354 nlim:
355 i = 0;
356 do {
357 if ('0' <= c && c <= '9')
358 i = 10 * i + (int)c - '0';
359 else
360 ERROR(16);
361 } while (((c = GETC()) != '\\') && (c != ','));
362 if (i > MBYTE_SIZE)
363 ERROR(11);
364 *ep++ = (char)i;
365 if (c == ',') {
366 if (cflg++)
367 ERROR(44);
368 if ((c = GETC()) == '\\')
369 *ep++ = (char)MBYTE_SIZE;
370 else
371 goto nlim;
372 /* get 2'nd number */
373 }
374 if (GETC() != '}')
375 ERROR(45);
376 if (!cflg) /* one number */
377 *ep++ = (char)i;
378 else
379 if ((int)(unsigned char)ep[-1] <
380 (int)(unsigned char)ep[-2])
381 ERROR(46);
382 continue;
383
384 case 'n':
385 c = '\n';
386 goto defchar;
387
388 case '<':
389 *ep++ = CBRC;
390 continue;
391
392 case '>':
393 *ep++ = CLET;
394 continue;
395
396 default:
397 if (c >= '1' && c <= '9') {
398 if ((c -= '1') >= closed)
399 ERROR(25);
400 *ep++ = CBACK;
401 *ep++ = (char)c;
402 continue;
403 }
404 break;
405 }
406
407 /*
408 * Drop through to default to use \ to turn off
409 * special chars
410 */
411 /* FALLTHROUGH */
412 defchar:
413 default:
414 lastep = ep;
415 if (!multibyte || c <= 0177) {
416 /* 8-bit character */
417 *ep++ = CCHR;
418 *ep++ = (char)c;
419 } else {
420 /* multibyte character */
421 *ep++ = MCCHR;
422 if (ep + n > endbuf)
423 ERROR(50);
424 while (n--)
425 *ep++ = *oldsp++;
426 }
427 }
428 }
429 out:
430 if (regerrno) {
431 if (alloc)
432 free(expbuf);
433 return ((char *)0);
434 }
435 reglength = (int)(ep - expbuf);
436 if (alloc)
437 return (expbuf);
438 return (ep);
439 }
440