xref: /illumos-gate/usr/src/head/regexp.h (revision ed093b41a93e8563e6e1e5dae0768dda2a7bcc27)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*	Copyright (c) 1988 AT&T	*/
22 /*	  All Rights Reserved	*/
23 
24 /*
25  * Copyright 2014 Garrett D'Amore <garrett@damore.org>
26  *
27  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
28  * Use is subject to license terms.
29  */
30 
31 #ifndef _REGEXP_H
32 #define	_REGEXP_H
33 
34 #include <string.h>
35 
36 #ifdef	__cplusplus
37 extern "C" {
38 #endif
39 
40 #define	CBRA	2
41 #define	CCHR	4
42 #define	CDOT	8
43 #define	CCL	12
44 #define	CXCL	16
45 #define	CDOL	20
46 #define	CCEOF	22
47 #define	CKET	24
48 #define	CBACK	36
49 #define	NCCL	40
50 
51 #define	STAR	01
52 #define	RNGE	03
53 
54 #define	NBRA	9
55 
56 #define	PLACE(c)	ep[c >> 3] |= bittab[c & 07]
57 #define	ISTHERE(c)	(ep[c >> 3] & bittab[c & 07])
58 #define	ecmp(s1, s2, n)	(strncmp(s1, s2, n) == 0)
59 
60 static char	*braslist[NBRA];
61 static char	*braelist[NBRA];
62 int	sed, nbra;
63 char	*loc1, *loc2, *locs;
64 static int	nodelim;
65 
66 int	circf;
67 static int	low;
68 static int	size;
69 
70 static unsigned char	bittab[] = { 1, 2, 4, 8, 16, 32, 64, 128 };
71 
72 int advance(const char *lp, const char *ep);
73 static void getrnge(const char *str);
74 
75 char *
76 compile(char *instring, char *ep, const char *endbuf, int seof)
77 {
78 	INIT	/* Dependent declarations and initializations */
79 	register int c;
80 	register int eof = seof;
81 	char *lastep;
82 	int cclcnt;
83 	char bracket[NBRA], *bracketp;
84 	int closed;
85 	int neg;
86 	int lc;
87 	int i, cflg;
88 	int iflag; /* used for non-ascii characters in brackets */
89 
90 #ifdef __lint
91 	/* make lint happy */
92 	c = nodelim;
93 #endif
94 
95 	lastep = NULL;
96 	if ((c = GETC()) == eof || c == '\n') {
97 		if (c == '\n') {
98 			UNGETC(c);
99 			nodelim = 1;
100 		}
101 		if (*ep == 0 && !sed)
102 			ERROR(41);
103 		RETURN(ep);
104 	}
105 	bracketp = bracket;
106 	circf = closed = nbra = 0;
107 	if (c == '^')
108 		circf++;
109 	else
110 		UNGETC(c);
111 	for (;;) {
112 		if (ep >= endbuf)
113 			ERROR(50);
114 		c = GETC();
115 		if (c != '*' && ((c != '\\') || (PEEKC() != '{')))
116 			lastep = ep;
117 		if (c == eof) {
118 			*ep++ = CCEOF;
119 			if (bracketp != bracket)
120 				ERROR(42);
121 			RETURN(ep);
122 		}
123 		switch (c) {
124 
125 		case '.':
126 			*ep++ = CDOT;
127 			continue;
128 
129 		case '\n':
130 			if (!sed) {
131 				UNGETC(c);
132 				*ep++ = CCEOF;
133 				nodelim = 1;
134 				if (bracketp != bracket)
135 					ERROR(42);
136 				RETURN(ep);
137 			} else ERROR(36);
138 		case '*':
139 			if (lastep == NULL || *lastep == CBRA ||
140 			    *lastep == CKET)
141 				goto defchar;
142 			*lastep |= STAR;
143 			continue;
144 
145 		case '$':
146 			if (PEEKC() != eof && PEEKC() != '\n')
147 				goto defchar;
148 			*ep++ = CDOL;
149 			continue;
150 
151 		case '[':
152 			if (&ep[17] >= endbuf)
153 				ERROR(50);
154 
155 			*ep++ = CCL;
156 			lc = 0;
157 			for (i = 0; i < 16; i++)
158 				ep[i] = 0;
159 
160 			neg = 0;
161 			if ((c = GETC()) == '^') {
162 				neg = 1;
163 				c = GETC();
164 			}
165 			iflag = 1;
166 			do {
167 				c &= 0377;
168 				if (c == '\0' || c == '\n')
169 					ERROR(49);
170 				if ((c & 0200) && iflag) {
171 					iflag = 0;
172 					if (&ep[32] >= endbuf)
173 						ERROR(50);
174 					ep[-1] = CXCL;
175 					for (i = 16; i < 32; i++)
176 						ep[i] = 0;
177 				}
178 				if (c == '-' && lc != 0) {
179 					if ((c = GETC()) == ']') {
180 						PLACE('-');
181 						break;
182 					}
183 					if ((c & 0200) && iflag) {
184 						iflag = 0;
185 						if (&ep[32] >= endbuf)
186 							ERROR(50);
187 						ep[-1] = CXCL;
188 						for (i = 16; i < 32; i++)
189 							ep[i] = 0;
190 					}
191 					while (lc < c) {
192 						PLACE(lc);
193 						lc++;
194 					}
195 				}
196 				lc = c;
197 				PLACE(c);
198 			} while ((c = GETC()) != ']');
199 
200 			if (iflag)
201 				iflag = 16;
202 			else
203 				iflag = 32;
204 
205 			if (neg) {
206 				if (iflag == 32) {
207 					for (cclcnt = 0; cclcnt < iflag;
208 					    cclcnt++)
209 						ep[cclcnt] ^= 0377;
210 					ep[0] &= 0376;
211 				} else {
212 					ep[-1] = NCCL;
213 					/* make nulls match so test fails */
214 					ep[0] |= 01;
215 				}
216 			}
217 
218 			ep += iflag;
219 
220 			continue;
221 
222 		case '\\':
223 			switch (c = GETC()) {
224 
225 			case '(':
226 				if (nbra >= NBRA)
227 					ERROR(43);
228 				*bracketp++ = (char)nbra;
229 				*ep++ = CBRA;
230 				*ep++ = (char)nbra++;
231 				continue;
232 
233 			case ')':
234 				if (bracketp <= bracket)
235 					ERROR(42);
236 				*ep++ = CKET;
237 				*ep++ = *--bracketp;
238 				closed++;
239 				continue;
240 
241 			case '{':
242 				if (lastep == NULL)
243 					goto defchar;
244 				*lastep |= RNGE;
245 				cflg = 0;
246 			nlim:
247 				c = GETC();
248 				i = 0;
249 				do {
250 					if ('0' <= c && c <= '9')
251 						i = 10 * i + c - '0';
252 					else
253 						ERROR(16);
254 				} while (((c = GETC()) != '\\') && (c != ','));
255 				if (i >= 255)
256 					ERROR(11);
257 				*ep++ = (char)i;
258 				if (c == ',') {
259 					if (cflg++)
260 						ERROR(44);
261 					if ((c = GETC()) == '\\')
262 						*ep++ = (char)255;
263 					else {
264 						UNGETC(c);
265 						goto nlim;
266 						/* get 2'nd number */
267 					}
268 				}
269 				if (GETC() != '}')
270 					ERROR(45);
271 				if (!cflg)	/* one number */
272 					*ep++ = (char)i;
273 				else if ((ep[-1] & 0377) < (ep[-2] & 0377))
274 					ERROR(46);
275 				continue;
276 
277 			case '\n':
278 				ERROR(36);
279 
280 			case 'n':
281 				c = '\n';
282 				goto defchar;
283 
284 			default:
285 				if (c >= '1' && c <= '9') {
286 					if ((c -= '1') >= closed)
287 						ERROR(25);
288 					*ep++ = CBACK;
289 					*ep++ = (char)c;
290 					continue;
291 				}
292 				/* FALLTHROUGH */
293 			}
294 			/* FALLTHROUGH */
295 	/* Drop through to default to use \ to turn off special chars */
296 
297 		defchar:
298 		default:
299 			lastep = ep;
300 			*ep++ = CCHR;
301 			*ep++ = (char)c;
302 		}
303 	}
304 	/*NOTREACHED*/
305 }
306 
307 int
308 step(const char *p1, const char *p2)
309 {
310 	char c;
311 
312 
313 	if (circf) {
314 		loc1 = (char *)p1;
315 		return (advance(p1, p2));
316 	}
317 	/* fast check for first character */
318 	if (*p2 == CCHR) {
319 		c = p2[1];
320 		do {
321 			if (*p1 != c)
322 				continue;
323 			if (advance(p1, p2)) {
324 				loc1 = (char *)p1;
325 				return (1);
326 			}
327 		} while (*p1++);
328 		return (0);
329 	}
330 		/* regular algorithm */
331 	do {
332 		if (advance(p1, p2)) {
333 			loc1 = (char *)p1;
334 			return (1);
335 		}
336 	} while (*p1++);
337 	return (0);
338 }
339 
340 int
341 advance(const char *lp, const char *ep)
342 {
343 	const char *curlp;
344 	int c;
345 	char *bbeg;
346 	register char neg;
347 	size_t ct;
348 
349 	for (;;) {
350 		neg = 0;
351 		switch (*ep++) {
352 
353 		case CCHR:
354 			if (*ep++ == *lp++)
355 				continue;
356 			return (0);
357 			/*FALLTHRU*/
358 
359 		case CDOT:
360 			if (*lp++)
361 				continue;
362 			return (0);
363 			/*FALLTHRU*/
364 
365 		case CDOL:
366 			if (*lp == 0)
367 				continue;
368 			return (0);
369 			/*FALLTHRU*/
370 
371 		case CCEOF:
372 			loc2 = (char *)lp;
373 			return (1);
374 			/*FALLTHRU*/
375 
376 		case CXCL:
377 			c = (unsigned char)*lp++;
378 			if (ISTHERE(c)) {
379 				ep += 32;
380 				continue;
381 			}
382 			return (0);
383 			/*FALLTHRU*/
384 
385 		case NCCL:
386 			neg = 1;
387 			/*FALLTHRU*/
388 
389 		case CCL:
390 			c = *lp++;
391 			if (((c & 0200) == 0 && ISTHERE(c)) ^ neg) {
392 				ep += 16;
393 				continue;
394 			}
395 			return (0);
396 			/*FALLTHRU*/
397 
398 		case CBRA:
399 			braslist[*ep++] = (char *)lp;
400 			continue;
401 			/*FALLTHRU*/
402 
403 		case CKET:
404 			braelist[*ep++] = (char *)lp;
405 			continue;
406 			/*FALLTHRU*/
407 
408 		case CCHR | RNGE:
409 			c = *ep++;
410 			getrnge(ep);
411 			while (low--)
412 				if (*lp++ != c)
413 					return (0);
414 			curlp = lp;
415 			while (size--)
416 				if (*lp++ != c)
417 					break;
418 			if (size < 0)
419 				lp++;
420 			ep += 2;
421 			goto star;
422 			/*FALLTHRU*/
423 
424 		case CDOT | RNGE:
425 			getrnge(ep);
426 			while (low--)
427 				if (*lp++ == '\0')
428 					return (0);
429 			curlp = lp;
430 			while (size--)
431 				if (*lp++ == '\0')
432 					break;
433 			if (size < 0)
434 				lp++;
435 			ep += 2;
436 			goto star;
437 			/*FALLTHRU*/
438 
439 		case CXCL | RNGE:
440 			getrnge(ep + 32);
441 			while (low--) {
442 				c = (unsigned char)*lp++;
443 				if (!ISTHERE(c))
444 					return (0);
445 			}
446 			curlp = lp;
447 			while (size--) {
448 				c = (unsigned char)*lp++;
449 				if (!ISTHERE(c))
450 					break;
451 			}
452 			if (size < 0)
453 				lp++;
454 			ep += 34;		/* 32 + 2 */
455 			goto star;
456 			/*FALLTHRU*/
457 
458 		case NCCL | RNGE:
459 			neg = 1;
460 			/*FALLTHRU*/
461 
462 		case CCL | RNGE:
463 			getrnge(ep + 16);
464 			while (low--) {
465 				c = *lp++;
466 				if (((c & 0200) || !ISTHERE(c)) ^ neg)
467 					return (0);
468 			}
469 			curlp = lp;
470 			while (size--) {
471 				c = *lp++;
472 				if (((c & 0200) || !ISTHERE(c)) ^ neg)
473 					break;
474 			}
475 			if (size < 0)
476 				lp++;
477 			ep += 18;		/* 16 + 2 */
478 			goto star;
479 			/*FALLTHRU*/
480 
481 		case CBACK:
482 			bbeg = braslist[*ep];
483 			ct = braelist[*ep++] - bbeg;
484 
485 			if (ecmp(bbeg, lp, ct)) {
486 				lp += ct;
487 				continue;
488 			}
489 			return (0);
490 			/*FALLTHRU*/
491 
492 		case CBACK | STAR:
493 			bbeg = braslist[*ep];
494 			ct = braelist[*ep++] - bbeg;
495 			curlp = lp;
496 			while (ecmp(bbeg, lp, ct))
497 				lp += ct;
498 
499 			while (lp >= curlp) {
500 				if (advance(lp, ep))
501 					return (1);
502 				lp -= ct;
503 			}
504 			return (0);
505 			/*FALLTHRU*/
506 
507 		case CDOT | STAR:
508 			curlp = lp;
509 			while (*lp++)
510 				;
511 			goto star;
512 			/*FALLTHRU*/
513 
514 		case CCHR | STAR:
515 			curlp = lp;
516 			while (*lp++ == *ep)
517 				;
518 			ep++;
519 			goto star;
520 			/*FALLTHRU*/
521 
522 		case CXCL | STAR:
523 			curlp = lp;
524 			do {
525 				c = (unsigned char)*lp++;
526 			} while (ISTHERE(c));
527 			ep += 32;
528 			goto star;
529 			/*FALLTHRU*/
530 
531 		case NCCL | STAR:
532 			neg = 1;
533 			/*FALLTHRU*/
534 
535 		case CCL | STAR:
536 			curlp = lp;
537 			do {
538 				c = *lp++;
539 			} while (((c & 0200) == 0 && ISTHERE(c)) ^ neg);
540 			ep += 16;
541 			goto star;
542 			/*FALLTHRU*/
543 
544 		star:
545 			do {
546 				if (--lp == locs)
547 					break;
548 				if (advance(lp, ep))
549 					return (1);
550 			} while (lp > curlp);
551 			return (0);
552 
553 		}
554 	}
555 	/*NOTREACHED*/
556 }
557 
558 static void
559 getrnge(const char *str)
560 {
561 	low = *str++ & 0377;
562 	size = ((*str & 0377) == 255)? 20000: (*str &0377) - low;
563 }
564 
565 #ifdef	__cplusplus
566 }
567 #endif
568 
569 #endif	/* _REGEXP_H */
570