1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /* Copyright (c) 1988 AT&T */
22 /* All Rights Reserved */
23
24 /*
25 * Copyright 2014 Garrett D'Amore <garrett@damore.org>
26 *
27 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
28 * Use is subject to license terms.
29 */
30
31 #ifndef _REGEXP_H
32 #define _REGEXP_H
33
34 #include <string.h>
35
36 #ifdef __cplusplus
37 extern "C" {
38 #endif
39
40 #define CBRA 2
41 #define CCHR 4
42 #define CDOT 8
43 #define CCL 12
44 #define CXCL 16
45 #define CDOL 20
46 #define CCEOF 22
47 #define CKET 24
48 #define CBACK 36
49 #define NCCL 40
50
51 #define STAR 01
52 #define RNGE 03
53
54 #define NBRA 9
55
56 #define PLACE(c) ep[c >> 3] |= bittab[c & 07]
57 #define ISTHERE(c) (ep[c >> 3] & bittab[c & 07])
58 #define ecmp(s1, s2, n) (strncmp(s1, s2, n) == 0)
59
60 static char *braslist[NBRA];
61 static char *braelist[NBRA];
62 int sed, nbra;
63 char *loc1, *loc2, *locs;
64 static int nodelim;
65
66 int circf;
67 static int low;
68 static int size;
69
70 static unsigned char bittab[] = { 1, 2, 4, 8, 16, 32, 64, 128 };
71
72 int advance(const char *lp, const char *ep);
73 static void getrnge(const char *str);
74
75 char *
compile(char * instring,char * ep,const char * endbuf,int seof)76 compile(char *instring, char *ep, const char *endbuf, int seof)
77 {
78 INIT /* Dependent declarations and initializations */
79 register int c;
80 register int eof = seof;
81 char *lastep;
82 int cclcnt;
83 char bracket[NBRA], *bracketp;
84 int closed;
85 int neg;
86 int lc;
87 int i, cflg;
88 int iflag; /* used for non-ascii characters in brackets */
89
90 #ifdef __lint
91 /* make lint happy */
92 c = nodelim;
93 #endif
94
95 lastep = NULL;
96 if ((c = GETC()) == eof || c == '\n') {
97 if (c == '\n') {
98 UNGETC(c);
99 nodelim = 1;
100 }
101 if (*ep == 0 && !sed)
102 ERROR(41);
103 RETURN(ep);
104 }
105 bracketp = bracket;
106 circf = closed = nbra = 0;
107 if (c == '^')
108 circf++;
109 else
110 UNGETC(c);
111 for (;;) {
112 if (ep >= endbuf)
113 ERROR(50);
114 c = GETC();
115 if (c != '*' && ((c != '\\') || (PEEKC() != '{')))
116 lastep = ep;
117 if (c == eof) {
118 *ep++ = CCEOF;
119 if (bracketp != bracket)
120 ERROR(42);
121 RETURN(ep);
122 }
123 switch (c) {
124
125 case '.':
126 *ep++ = CDOT;
127 continue;
128
129 case '\n':
130 if (!sed) {
131 UNGETC(c);
132 *ep++ = CCEOF;
133 nodelim = 1;
134 if (bracketp != bracket)
135 ERROR(42);
136 RETURN(ep);
137 } else ERROR(36);
138 case '*':
139 if (lastep == NULL || *lastep == CBRA ||
140 *lastep == CKET)
141 goto defchar;
142 *lastep |= STAR;
143 continue;
144
145 case '$':
146 if (PEEKC() != eof && PEEKC() != '\n')
147 goto defchar;
148 *ep++ = CDOL;
149 continue;
150
151 case '[':
152 if (&ep[17] >= endbuf)
153 ERROR(50);
154
155 *ep++ = CCL;
156 lc = 0;
157 for (i = 0; i < 16; i++)
158 ep[i] = 0;
159
160 neg = 0;
161 if ((c = GETC()) == '^') {
162 neg = 1;
163 c = GETC();
164 }
165 iflag = 1;
166 do {
167 c &= 0377;
168 if (c == '\0' || c == '\n')
169 ERROR(49);
170 if ((c & 0200) && iflag) {
171 iflag = 0;
172 if (&ep[32] >= endbuf)
173 ERROR(50);
174 ep[-1] = CXCL;
175 for (i = 16; i < 32; i++)
176 ep[i] = 0;
177 }
178 if (c == '-' && lc != 0) {
179 if ((c = GETC()) == ']') {
180 PLACE('-');
181 break;
182 }
183 if ((c & 0200) && iflag) {
184 iflag = 0;
185 if (&ep[32] >= endbuf)
186 ERROR(50);
187 ep[-1] = CXCL;
188 for (i = 16; i < 32; i++)
189 ep[i] = 0;
190 }
191 while (lc < c) {
192 PLACE(lc);
193 lc++;
194 }
195 }
196 lc = c;
197 PLACE(c);
198 } while ((c = GETC()) != ']');
199
200 if (iflag)
201 iflag = 16;
202 else
203 iflag = 32;
204
205 if (neg) {
206 if (iflag == 32) {
207 for (cclcnt = 0; cclcnt < iflag;
208 cclcnt++)
209 ep[cclcnt] ^= 0377;
210 ep[0] &= 0376;
211 } else {
212 ep[-1] = NCCL;
213 /* make nulls match so test fails */
214 ep[0] |= 01;
215 }
216 }
217
218 ep += iflag;
219
220 continue;
221
222 case '\\':
223 switch (c = GETC()) {
224
225 case '(':
226 if (nbra >= NBRA)
227 ERROR(43);
228 *bracketp++ = (char)nbra;
229 *ep++ = CBRA;
230 *ep++ = (char)nbra++;
231 continue;
232
233 case ')':
234 if (bracketp <= bracket)
235 ERROR(42);
236 *ep++ = CKET;
237 *ep++ = *--bracketp;
238 closed++;
239 continue;
240
241 case '{':
242 if (lastep == NULL)
243 goto defchar;
244 *lastep |= RNGE;
245 cflg = 0;
246 nlim:
247 c = GETC();
248 i = 0;
249 do {
250 if ('0' <= c && c <= '9')
251 i = 10 * i + c - '0';
252 else
253 ERROR(16);
254 } while (((c = GETC()) != '\\') && (c != ','));
255 if (i >= 255)
256 ERROR(11);
257 *ep++ = (char)i;
258 if (c == ',') {
259 if (cflg++)
260 ERROR(44);
261 if ((c = GETC()) == '\\')
262 *ep++ = (char)255;
263 else {
264 UNGETC(c);
265 goto nlim;
266 /* get 2'nd number */
267 }
268 }
269 if (GETC() != '}')
270 ERROR(45);
271 if (!cflg) /* one number */
272 *ep++ = (char)i;
273 else if ((ep[-1] & 0377) < (ep[-2] & 0377))
274 ERROR(46);
275 continue;
276
277 case '\n':
278 ERROR(36);
279
280 case 'n':
281 c = '\n';
282 goto defchar;
283
284 default:
285 if (c >= '1' && c <= '9') {
286 if ((c -= '1') >= closed)
287 ERROR(25);
288 *ep++ = CBACK;
289 *ep++ = (char)c;
290 continue;
291 }
292 /* FALLTHROUGH */
293 }
294 /* FALLTHROUGH */
295 /* Drop through to default to use \ to turn off special chars */
296
297 defchar:
298 default:
299 lastep = ep;
300 *ep++ = CCHR;
301 *ep++ = (char)c;
302 }
303 }
304 /*NOTREACHED*/
305 }
306
307 int
step(const char * p1,const char * p2)308 step(const char *p1, const char *p2)
309 {
310 char c;
311
312
313 if (circf) {
314 loc1 = (char *)p1;
315 return (advance(p1, p2));
316 }
317 /* fast check for first character */
318 if (*p2 == CCHR) {
319 c = p2[1];
320 do {
321 if (*p1 != c)
322 continue;
323 if (advance(p1, p2)) {
324 loc1 = (char *)p1;
325 return (1);
326 }
327 } while (*p1++);
328 return (0);
329 }
330 /* regular algorithm */
331 do {
332 if (advance(p1, p2)) {
333 loc1 = (char *)p1;
334 return (1);
335 }
336 } while (*p1++);
337 return (0);
338 }
339
340 int
advance(const char * lp,const char * ep)341 advance(const char *lp, const char *ep)
342 {
343 const char *curlp;
344 int c;
345 char *bbeg;
346 register char neg;
347 size_t ct;
348
349 for (;;) {
350 neg = 0;
351 switch (*ep++) {
352
353 case CCHR:
354 if (*ep++ == *lp++)
355 continue;
356 return (0);
357 /*FALLTHRU*/
358
359 case CDOT:
360 if (*lp++)
361 continue;
362 return (0);
363 /*FALLTHRU*/
364
365 case CDOL:
366 if (*lp == 0)
367 continue;
368 return (0);
369 /*FALLTHRU*/
370
371 case CCEOF:
372 loc2 = (char *)lp;
373 return (1);
374 /*FALLTHRU*/
375
376 case CXCL:
377 c = (unsigned char)*lp++;
378 if (ISTHERE(c)) {
379 ep += 32;
380 continue;
381 }
382 return (0);
383 /*FALLTHRU*/
384
385 case NCCL:
386 neg = 1;
387 /*FALLTHRU*/
388
389 case CCL:
390 c = *lp++;
391 if (((c & 0200) == 0 && ISTHERE(c)) ^ neg) {
392 ep += 16;
393 continue;
394 }
395 return (0);
396 /*FALLTHRU*/
397
398 case CBRA:
399 braslist[*ep++] = (char *)lp;
400 continue;
401 /*FALLTHRU*/
402
403 case CKET:
404 braelist[*ep++] = (char *)lp;
405 continue;
406 /*FALLTHRU*/
407
408 case CCHR | RNGE:
409 c = *ep++;
410 getrnge(ep);
411 while (low--)
412 if (*lp++ != c)
413 return (0);
414 curlp = lp;
415 while (size--)
416 if (*lp++ != c)
417 break;
418 if (size < 0)
419 lp++;
420 ep += 2;
421 goto star;
422 /*FALLTHRU*/
423
424 case CDOT | RNGE:
425 getrnge(ep);
426 while (low--)
427 if (*lp++ == '\0')
428 return (0);
429 curlp = lp;
430 while (size--)
431 if (*lp++ == '\0')
432 break;
433 if (size < 0)
434 lp++;
435 ep += 2;
436 goto star;
437 /*FALLTHRU*/
438
439 case CXCL | RNGE:
440 getrnge(ep + 32);
441 while (low--) {
442 c = (unsigned char)*lp++;
443 if (!ISTHERE(c))
444 return (0);
445 }
446 curlp = lp;
447 while (size--) {
448 c = (unsigned char)*lp++;
449 if (!ISTHERE(c))
450 break;
451 }
452 if (size < 0)
453 lp++;
454 ep += 34; /* 32 + 2 */
455 goto star;
456 /*FALLTHRU*/
457
458 case NCCL | RNGE:
459 neg = 1;
460 /*FALLTHRU*/
461
462 case CCL | RNGE:
463 getrnge(ep + 16);
464 while (low--) {
465 c = *lp++;
466 if (((c & 0200) || !ISTHERE(c)) ^ neg)
467 return (0);
468 }
469 curlp = lp;
470 while (size--) {
471 c = *lp++;
472 if (((c & 0200) || !ISTHERE(c)) ^ neg)
473 break;
474 }
475 if (size < 0)
476 lp++;
477 ep += 18; /* 16 + 2 */
478 goto star;
479 /*FALLTHRU*/
480
481 case CBACK:
482 bbeg = braslist[*ep];
483 ct = braelist[*ep++] - bbeg;
484
485 if (ecmp(bbeg, lp, ct)) {
486 lp += ct;
487 continue;
488 }
489 return (0);
490 /*FALLTHRU*/
491
492 case CBACK | STAR:
493 bbeg = braslist[*ep];
494 ct = braelist[*ep++] - bbeg;
495 curlp = lp;
496 while (ecmp(bbeg, lp, ct))
497 lp += ct;
498
499 while (lp >= curlp) {
500 if (advance(lp, ep))
501 return (1);
502 lp -= ct;
503 }
504 return (0);
505 /*FALLTHRU*/
506
507 case CDOT | STAR:
508 curlp = lp;
509 while (*lp++)
510 ;
511 goto star;
512 /*FALLTHRU*/
513
514 case CCHR | STAR:
515 curlp = lp;
516 while (*lp++ == *ep)
517 ;
518 ep++;
519 goto star;
520 /*FALLTHRU*/
521
522 case CXCL | STAR:
523 curlp = lp;
524 do {
525 c = (unsigned char)*lp++;
526 } while (ISTHERE(c));
527 ep += 32;
528 goto star;
529 /*FALLTHRU*/
530
531 case NCCL | STAR:
532 neg = 1;
533 /*FALLTHRU*/
534
535 case CCL | STAR:
536 curlp = lp;
537 do {
538 c = *lp++;
539 } while (((c & 0200) == 0 && ISTHERE(c)) ^ neg);
540 ep += 16;
541 goto star;
542 /*FALLTHRU*/
543
544 star:
545 do {
546 if (--lp == locs)
547 break;
548 if (advance(lp, ep))
549 return (1);
550 } while (lp > curlp);
551 return (0);
552
553 }
554 }
555 /*NOTREACHED*/
556 }
557
558 static void
getrnge(const char * str)559 getrnge(const char *str)
560 {
561 low = *str++ & 0377;
562 size = ((*str & 0377) == 255)? 20000: (*str &0377) - low;
563 }
564
565 #ifdef __cplusplus
566 }
567 #endif
568
569 #endif /* _REGEXP_H */
570