1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /* Copyright (c) 1988 AT&T */
28 /* All Rights Reserved */
29
30 #pragma ident "%Z%%M% %I% %E% SMI"
31
32 #include <sys/types.h>
33 #include <ctype.h>
34 #include <limits.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <thread.h>
38 #include <pthread.h>
39 #include <widec.h> /* Defines multibyte and WCHAR_CSMASK for valid_range(). */
40 #include "_range.h"
41 #include "_regexp.h"
42
43 #define MBYTE_SIZE 255
44
45 #define GETC() ((unsigned char)*sp++)
46 #define PEEKC() ((unsigned char)*sp)
47 #define ERROR(c) { \
48 regerrno = c; \
49 goto out; \
50 }
51
52 #define Popwchar oldsp = sp; \
53 if (!multibyte || (unsigned char) *sp <= 0177) { \
54 n = 1; \
55 c = (wchar_t)(unsigned char)*sp++; \
56 } else { \
57 if ((n = mbtowc(&cl, sp, MB_LEN_MAX)) == -1) \
58 ERROR(67) \
59 sp += n; \
60 c = cl; \
61 }
62
63 int nbra = 0, regerrno = 0, reglength = 0;
64
65 static unsigned char _bittab[] = { 1, 2, 4, 8, 16, 32, 64, 128 };
66
67 #ifdef _REENTRANT
68 typedef struct _vars_storage {
69 int nbra, regerrno, reglength;
70 } vars_storage;
71
72 static thread_key_t key = THR_ONCE_KEY;
73
74 static vars_storage *
_get_vars_storage(thread_key_t * keyp)75 _get_vars_storage(thread_key_t *keyp)
76 {
77 vars_storage *vars;
78
79 if (thr_keycreate_once(keyp, free) != 0)
80 return (NULL);
81 vars = pthread_getspecific(*keyp);
82 if (vars == NULL) {
83 vars = calloc(1, sizeof (vars_storage));
84 if (thr_setspecific(*keyp, vars) != 0) {
85 if (vars)
86 (void) free(vars);
87 vars = NULL;
88 }
89 }
90 return (vars);
91 }
92
93 int *
___nbra(void)94 ___nbra(void)
95 {
96 if (thr_main())
97 return (&nbra);
98 else {
99 vars_storage *vars = _get_vars_storage(&key);
100 return (&vars->nbra);
101 }
102 }
103
104 int *
___regerrno(void)105 ___regerrno(void)
106 {
107 if (thr_main())
108 return (®errno);
109 else {
110 vars_storage *vars = _get_vars_storage(&key);
111 return (&vars->regerrno);
112 }
113 }
114
115 int *
___reglength(void)116 ___reglength(void)
117 {
118 if (thr_main())
119 return (®length);
120 else {
121 vars_storage *vars = _get_vars_storage(&key);
122 return (&vars->reglength);
123 }
124 }
125
126 #undef nbra
127 #define nbra (*(___nbra()))
128 #undef regerrno
129 #define regerrno (*(___regerrno()))
130 #undef reglength
131 #define reglength (*(___reglength()))
132
133 #endif /* _REENTRANT */
134
135 char *_compile(const char *, char *, char *, int);
136
137 char *
compile(const char * sp,char * ep,char * endbuf)138 compile(const char *sp, char *ep, char *endbuf)
139 {
140 return (_compile(sp, ep, endbuf, 0));
141 }
142
143 char *
_compile(const char * sp,char * ep,char * endbuf,int viflag)144 _compile(const char *sp, char *ep, char *endbuf, int viflag)
145 {
146 wchar_t c;
147 int n;
148 wchar_t d;
149 const char *oldsp;
150 char *lastep;
151 int cclcnt;
152 char bracket[NBRA], *bracketp;
153 int closed;
154 int neg;
155 int alloc;
156 wchar_t lc, cl;
157 int i, cflg;
158 char *expbuf = ep;
159 char *start;
160
161 regerrno = 0;
162 reglength = 0;
163 lastep = 0;
164 bracketp = bracket;
165 closed = 0;
166 alloc = 0;
167
168 oldsp = sp;
169 if ((c = *sp++) == '\0') {
170 if (ep == (char *)0 || ep[1] == 0)
171 ERROR(41);
172 goto out;
173 }
174 nbra = 0;
175 if (ep == (char *)0) {
176 /* malloc space */
177 const char *startsp = oldsp;
178 n = 0;
179 while ((d = *startsp++) != NULL) {
180 if (d == '[')
181 n += 33; /* add room for bitmaps */
182 }
183 n += 2 * (startsp - oldsp) + 3;
184 if ((ep = malloc(n)) == (char *)0)
185 ERROR(50);
186 expbuf = ep;
187 alloc = 1;
188 endbuf = ep + n;
189 }
190
191 if (c == '^')
192 *ep++ = 1;
193 else {
194 *ep++ = 0;
195 sp--;
196 }
197
198 endbuf--; /* avoid extra check for overflow */
199 for (;;) {
200 if (ep >= endbuf)
201 ERROR(50);
202 Popwchar;
203 if (c != '*' && ((c != '\\') || (PEEKC() != '{')))
204 lastep = ep;
205 if (c == '\0') {
206 *ep++ = CCEOF;
207 if (bracketp != bracket)
208 ERROR(42);
209 goto out;
210 }
211 switch (c) {
212
213 case '.':
214 *ep++ = CDOT;
215 continue;
216
217 case '*':
218 if (lastep == 0 || *lastep == CBRA ||*lastep == CKET ||
219 *lastep == CBRC || *lastep == CLET)
220 goto defchar;
221 *lastep |= STAR;
222 continue;
223
224 case '$':
225 /* look one character ahead to see if $ means */
226 /* to anchor match at end of line */
227 if ((d = PEEKC()) != '\0')
228 goto defchar;
229 *ep++ = CDOL;
230 continue;
231
232 case '[':
233 start = ep + 34;
234 if (start > endbuf)
235 ERROR(50);
236
237 *ep++ = CCL;
238 lc = 0;
239 for (i = 0; i < 32; i++)
240 ep[i] = 0;
241
242 neg = 0;
243 Popwchar;
244 if (c == '^') {
245 neg = 1;
246 Popwchar;
247 }
248 if (multibyte) {
249 if (neg) {
250 /* do not negate bitmap for */
251 /* for multibyte characters */
252 neg = 0;
253 ep[-1] = NMCCL;
254 /* turn off null byte */
255 ep[0] |= 01;
256 } else
257 ep[-1] = MCCL;
258 }
259 do {
260 if (c == '\0')
261 ERROR(49);
262 if (c == '-' && lc != 0) {
263 Popwchar;
264 if (c == '\0')
265 ERROR(49);
266 if (c == ']') {
267 PLACE('-');
268 break;
269 }
270 /*
271 * ranges do not span code sets
272 */
273 if (!multibyte || c <= 0177)
274 while (lc < c) {
275 PLACE(lc);
276 lc++;
277 }
278 else
279 if (valid_range(lc, c) && lc < c)
280 /* insert '-' for range */
281 *start++ = '-';
282 if (viflag & 1)
283 lc = 0;
284 else
285 lc = c;
286 } else
287 if (c == '\\' && (viflag & 1) &&
288 strchr("\\^-]", PEEKC())) {
289 c = GETC();
290 lc = c;
291 } else
292 lc = c;
293 /* put eight bit characters into bitmap */
294 if (!multibyte || c <= 0177 || c <= 0377 &&
295 iscntrl((int)c))
296 PLACE(c);
297 else {
298 /*
299 * insert individual bytes of
300 * multibyte characters after
301 * bitmap
302 */
303 if (start + n > endbuf)
304 ERROR(50);
305 while (n--)
306 *start++ = *oldsp++;
307 }
308 Popwchar;
309 } while (c != ']');
310
311 if (neg) {
312 for (cclcnt = 0; cclcnt < 32; cclcnt++)
313 ep[cclcnt] ^= 0377;
314 ep[0] &= 0376;
315 }
316 ep += 32;
317 if (multibyte) {
318 /*
319 * Only allow 256 bytes to
320 * represent multibyte characters
321 * character class
322 */
323 if (start - ep > MBYTE_SIZE)
324 ERROR(50);
325 *ep = (char)(start - ep);
326 ep = start;
327 }
328 continue;
329
330 case '\\':
331 Popwchar;
332 switch (c) {
333
334 case '(':
335 if (nbra >= NBRA)
336 ERROR(43);
337 *bracketp++ = nbra;
338 *ep++ = CBRA;
339 *ep++ = nbra++;
340 continue;
341
342 case ')':
343 if (bracketp <= bracket)
344 ERROR(42);
345 *ep++ = CKET;
346 *ep++ = *--bracketp;
347 closed++;
348 continue;
349
350 case '{':
351 if (lastep == (char *)0)
352 goto defchar;
353 *lastep |= RNGE;
354 cflg = 0;
355 c = GETC();
356 nlim:
357 i = 0;
358 do {
359 if ('0' <= c && c <= '9')
360 i = 10 * i + (int)c - '0';
361 else
362 ERROR(16);
363 } while (((c = GETC()) != '\\') && (c != ','));
364 if (i > MBYTE_SIZE)
365 ERROR(11);
366 *ep++ = (char)i;
367 if (c == ',') {
368 if (cflg++)
369 ERROR(44);
370 if ((c = GETC()) == '\\')
371 *ep++ = (char)MBYTE_SIZE;
372 else
373 goto nlim;
374 /* get 2'nd number */
375 }
376 if (GETC() != '}')
377 ERROR(45);
378 if (!cflg) /* one number */
379 *ep++ = (char)i;
380 else
381 if ((int)(unsigned char)ep[-1] <
382 (int)(unsigned char)ep[-2])
383 ERROR(46);
384 continue;
385
386 case 'n':
387 c = '\n';
388 goto defchar;
389
390 case '<':
391 *ep++ = CBRC;
392 continue;
393
394 case '>':
395 *ep++ = CLET;
396 continue;
397
398 default:
399 if (c >= '1' && c <= '9') {
400 if ((c -= '1') >= closed)
401 ERROR(25);
402 *ep++ = CBACK;
403 *ep++ = (char)c;
404 continue;
405 }
406 }
407
408 /* Drop through to default to use \ to turn off special chars */
409
410 defchar:
411 default:
412 lastep = ep;
413 if (!multibyte || c <= 0177) {
414 /* 8-bit character */
415 *ep++ = CCHR;
416 *ep++ = (char)c;
417 } else {
418 /* multibyte character */
419 *ep++ = MCCHR;
420 if (ep + n > endbuf)
421 ERROR(50);
422 while (n--)
423 *ep++ = *oldsp++;
424 }
425 }
426 }
427 out:
428 if (regerrno) {
429 if (alloc)
430 free(expbuf);
431 return ((char *)0);
432 }
433 reglength = (int)(ep - expbuf);
434 if (alloc)
435 return (expbuf);
436 return (ep);
437 }
438