xref: /titanic_52/usr/src/head/regexp.h (revision 03831d35f7499c87d51205817c93e9a8d42c4bae)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*	Copyright (c) 1988 AT&T	*/
23 /*	  All Rights Reserved  	*/
24 
25 
26 /*
27  * Copyright 1997-2002 Sun Microsystems, Inc.  All rights reserved.
28  * Use is subject to license terms.
29  */
30 
31 #ifndef _REGEXP_H
32 #define	_REGEXP_H
33 
34 #pragma ident	"%Z%%M%	%I%	%E% SMI"	/* SVr4.0 1.9	*/
35 
36 #include <string.h>
37 
38 #ifdef	__cplusplus
39 extern "C" {
40 #endif
41 
42 #define	CBRA	2
43 #define	CCHR	4
44 #define	CDOT	8
45 #define	CCL	12
46 #define	CXCL	16
47 #define	CDOL	20
48 #define	CCEOF	22
49 #define	CKET	24
50 #define	CBACK	36
51 #define	NCCL	40
52 
53 #define	STAR	01
54 #define	RNGE	03
55 
56 #define	NBRA	9
57 
58 #define	PLACE(c)	ep[c >> 3] |= bittab[c & 07]
59 #define	ISTHERE(c)	(ep[c >> 3] & bittab[c & 07])
60 #define	ecmp(s1, s2, n)	(strncmp(s1, s2, n) == 0)
61 
62 static char	*braslist[NBRA];
63 static char	*braelist[NBRA];
64 int	sed, nbra;
65 char	*loc1, *loc2, *locs;
66 static int	nodelim;
67 
68 int	circf;
69 static int	low;
70 static int	size;
71 
72 static unsigned char	bittab[] = { 1, 2, 4, 8, 16, 32, 64, 128 };
73 
74 #ifdef	__STDC__
75 int advance(const char *lp, const char *ep);
76 static void getrnge(const char *str);
77 #else
78 int advance();
79 static void getrnge();
80 #endif
81 
82 char *
83 #ifdef	__STDC__
84 compile(char *instring, char *ep, const char *endbuf, int seof)
85 #else
86 compile(instring, ep, endbuf, seof)
87 register char *ep;
88 char *instring, *endbuf;
89 int seof;
90 #endif
91 {
92 	INIT	/* Dependent declarations and initializations */
93 	register int c;
94 	register int eof = seof;
95 	char *lastep;
96 	int cclcnt;
97 	char bracket[NBRA], *bracketp;
98 	int closed;
99 	int neg;
100 	int lc;
101 	int i, cflg;
102 	int iflag; /* used for non-ascii characters in brackets */
103 
104 	lastep = NULL;
105 	if ((c = GETC()) == eof || c == '\n') {
106 		if (c == '\n') {
107 			UNGETC(c);
108 			nodelim = 1;
109 		}
110 		if (*ep == 0 && !sed)
111 			ERROR(41);
112 		RETURN(ep);
113 	}
114 	bracketp = bracket;
115 	circf = closed = nbra = 0;
116 	if (c == '^')
117 		circf++;
118 	else
119 		UNGETC(c);
120 	while (1) {
121 		if (ep >= endbuf)
122 			ERROR(50);
123 		c = GETC();
124 		if (c != '*' && ((c != '\\') || (PEEKC() != '{')))
125 			lastep = ep;
126 		if (c == eof) {
127 			*ep++ = CCEOF;
128 			if (bracketp != bracket)
129 				ERROR(42);
130 			RETURN(ep);
131 		}
132 		switch (c) {
133 
134 		case '.':
135 			*ep++ = CDOT;
136 			continue;
137 
138 		case '\n':
139 			if (!sed) {
140 				UNGETC(c);
141 				*ep++ = CCEOF;
142 				nodelim = 1;
143 				if (bracketp != bracket)
144 					ERROR(42);
145 				RETURN(ep);
146 			} else ERROR(36);
147 		case '*':
148 			if (lastep == NULL || *lastep == CBRA ||
149 			    *lastep == CKET)
150 				goto defchar;
151 			*lastep |= STAR;
152 			continue;
153 
154 		case '$':
155 			if (PEEKC() != eof && PEEKC() != '\n')
156 				goto defchar;
157 			*ep++ = CDOL;
158 			continue;
159 
160 		case '[':
161 			if (&ep[17] >= endbuf)
162 				ERROR(50);
163 
164 			*ep++ = CCL;
165 			lc = 0;
166 			for (i = 0; i < 16; i++)
167 				ep[i] = 0;
168 
169 			neg = 0;
170 			if ((c = GETC()) == '^') {
171 				neg = 1;
172 				c = GETC();
173 			}
174 			iflag = 1;
175 			do {
176 				c &= 0377;
177 				if (c == '\0' || c == '\n')
178 					ERROR(49);
179 				if ((c & 0200) && iflag) {
180 					iflag = 0;
181 					if (&ep[32] >= endbuf)
182 						ERROR(50);
183 					ep[-1] = CXCL;
184 					for (i = 16; i < 32; i++)
185 						ep[i] = 0;
186 				}
187 				if (c == '-' && lc != 0) {
188 					if ((c = GETC()) == ']') {
189 						PLACE('-');
190 						break;
191 					}
192 					if ((c & 0200) && iflag) {
193 						iflag = 0;
194 						if (&ep[32] >= endbuf)
195 							ERROR(50);
196 						ep[-1] = CXCL;
197 						for (i = 16; i < 32; i++)
198 							ep[i] = 0;
199 					}
200 					while (lc < c) {
201 						PLACE(lc);
202 						lc++;
203 					}
204 				}
205 				lc = c;
206 				PLACE(c);
207 			} while ((c = GETC()) != ']');
208 
209 			if (iflag)
210 				iflag = 16;
211 			else
212 				iflag = 32;
213 
214 			if (neg) {
215 				if (iflag == 32) {
216 					for (cclcnt = 0; cclcnt < iflag;
217 					    cclcnt++)
218 						ep[cclcnt] ^= 0377;
219 					ep[0] &= 0376;
220 				} else {
221 					ep[-1] = NCCL;
222 					/* make nulls match so test fails */
223 					ep[0] |= 01;
224 				}
225 			}
226 
227 			ep += iflag;
228 
229 			continue;
230 
231 		case '\\':
232 			switch (c = GETC()) {
233 
234 			case '(':
235 				if (nbra >= NBRA)
236 					ERROR(43);
237 				*bracketp++ = (char)nbra;
238 				*ep++ = CBRA;
239 				*ep++ = (char)nbra++;
240 				continue;
241 
242 			case ')':
243 				if (bracketp <= bracket)
244 					ERROR(42);
245 				*ep++ = CKET;
246 				*ep++ = *--bracketp;
247 				closed++;
248 				continue;
249 
250 			case '{':
251 				if (lastep == NULL)
252 					goto defchar;
253 				*lastep |= RNGE;
254 				cflg = 0;
255 			nlim:
256 				c = GETC();
257 				i = 0;
258 				do {
259 					if ('0' <= c && c <= '9')
260 						i = 10 * i + c - '0';
261 					else
262 						ERROR(16);
263 				} while (((c = GETC()) != '\\') && (c != ','));
264 				if (i >= 255)
265 					ERROR(11);
266 				*ep++ = (char)i;
267 				if (c == ',') {
268 					if (cflg++)
269 						ERROR(44);
270 					if ((c = GETC()) == '\\')
271 						*ep++ = (char)255;
272 					else {
273 						UNGETC(c);
274 						goto nlim;
275 						/* get 2'nd number */
276 					}
277 				}
278 				if (GETC() != '}')
279 					ERROR(45);
280 				if (!cflg)	/* one number */
281 					*ep++ = (char)i;
282 				else if ((ep[-1] & 0377) < (ep[-2] & 0377))
283 					ERROR(46);
284 				continue;
285 
286 			case '\n':
287 				ERROR(36);
288 
289 			case 'n':
290 				c = '\n';
291 				goto defchar;
292 
293 			default:
294 				if (c >= '1' && c <= '9') {
295 					if ((c -= '1') >= closed)
296 						ERROR(25);
297 					*ep++ = CBACK;
298 					*ep++ = (char)c;
299 					continue;
300 				}
301 			}
302 	/* Drop through to default to use \ to turn off special chars */
303 
304 		defchar:
305 		default:
306 			lastep = ep;
307 			*ep++ = CCHR;
308 			*ep++ = (char)c;
309 		}
310 	}
311 }
312 
313 #ifdef	__STDC__
314 int
315 step(const char *p1, const char *p2)
316 #else
317 int
318 step(p1, p2)
319 register char *p1, *p2;
320 #endif
321 {
322 	char c;
323 
324 
325 	if (circf) {
326 		loc1 = (char *)p1;
327 		return (advance(p1, p2));
328 	}
329 	/* fast check for first character */
330 	if (*p2 == CCHR) {
331 		c = p2[1];
332 		do {
333 			if (*p1 != c)
334 				continue;
335 			if (advance(p1, p2)) {
336 				loc1 = (char *)p1;
337 				return (1);
338 			}
339 		} while (*p1++);
340 		return (0);
341 	}
342 		/* regular algorithm */
343 	do {
344 		if (advance(p1, p2)) {
345 			loc1 = (char *)p1;
346 			return (1);
347 		}
348 	} while (*p1++);
349 	return (0);
350 }
351 
352 int
353 #ifdef	__STDC__
354 advance(const char *lp, const char *ep)
355 #else
356 advance(lp, ep)
357 register char *lp, *ep;
358 #endif
359 {
360 #ifdef	__STDC__
361 	const char *curlp;
362 #else
363 	register char *curlp;
364 #endif
365 	int c;
366 	char *bbeg;
367 	register char neg;
368 	size_t ct;
369 
370 	while (1) {
371 		neg = 0;
372 		switch (*ep++) {
373 
374 		case CCHR:
375 			if (*ep++ == *lp++)
376 				continue;
377 			return (0);
378 			/*FALLTHRU*/
379 
380 		case CDOT:
381 			if (*lp++)
382 				continue;
383 			return (0);
384 			/*FALLTHRU*/
385 
386 		case CDOL:
387 			if (*lp == 0)
388 				continue;
389 			return (0);
390 			/*FALLTHRU*/
391 
392 		case CCEOF:
393 			loc2 = (char *)lp;
394 			return (1);
395 			/*FALLTHRU*/
396 
397 		case CXCL:
398 			c = (unsigned char)*lp++;
399 			if (ISTHERE(c)) {
400 				ep += 32;
401 				continue;
402 			}
403 			return (0);
404 			/*FALLTHRU*/
405 
406 		case NCCL:
407 			neg = 1;
408 			/*FALLTHRU*/
409 
410 		case CCL:
411 			c = *lp++;
412 			if (((c & 0200) == 0 && ISTHERE(c)) ^ neg) {
413 				ep += 16;
414 				continue;
415 			}
416 			return (0);
417 			/*FALLTHRU*/
418 
419 		case CBRA:
420 			braslist[*ep++] = (char *)lp;
421 			continue;
422 			/*FALLTHRU*/
423 
424 		case CKET:
425 			braelist[*ep++] = (char *)lp;
426 			continue;
427 			/*FALLTHRU*/
428 
429 		case CCHR | RNGE:
430 			c = *ep++;
431 			getrnge(ep);
432 			while (low--)
433 				if (*lp++ != c)
434 					return (0);
435 			curlp = lp;
436 			while (size--)
437 				if (*lp++ != c)
438 					break;
439 			if (size < 0)
440 				lp++;
441 			ep += 2;
442 			goto star;
443 			/*FALLTHRU*/
444 
445 		case CDOT | RNGE:
446 			getrnge(ep);
447 			while (low--)
448 				if (*lp++ == '\0')
449 					return (0);
450 			curlp = lp;
451 			while (size--)
452 				if (*lp++ == '\0')
453 					break;
454 			if (size < 0)
455 				lp++;
456 			ep += 2;
457 			goto star;
458 			/*FALLTHRU*/
459 
460 		case CXCL | RNGE:
461 			getrnge(ep + 32);
462 			while (low--) {
463 				c = (unsigned char)*lp++;
464 				if (!ISTHERE(c))
465 					return (0);
466 			}
467 			curlp = lp;
468 			while (size--) {
469 				c = (unsigned char)*lp++;
470 				if (!ISTHERE(c))
471 					break;
472 			}
473 			if (size < 0)
474 				lp++;
475 			ep += 34;		/* 32 + 2 */
476 			goto star;
477 			/*FALLTHRU*/
478 
479 		case NCCL | RNGE:
480 			neg = 1;
481 			/*FALLTHRU*/
482 
483 		case CCL | RNGE:
484 			getrnge(ep + 16);
485 			while (low--) {
486 				c = *lp++;
487 				if (((c & 0200) || !ISTHERE(c)) ^ neg)
488 					return (0);
489 			}
490 			curlp = lp;
491 			while (size--) {
492 				c = *lp++;
493 				if (((c & 0200) || !ISTHERE(c)) ^ neg)
494 					break;
495 			}
496 			if (size < 0)
497 				lp++;
498 			ep += 18; 		/* 16 + 2 */
499 			goto star;
500 			/*FALLTHRU*/
501 
502 		case CBACK:
503 			bbeg = braslist[*ep];
504 			ct = braelist[*ep++] - bbeg;
505 
506 			if (ecmp(bbeg, lp, ct)) {
507 				lp += ct;
508 				continue;
509 			}
510 			return (0);
511 			/*FALLTHRU*/
512 
513 		case CBACK | STAR:
514 			bbeg = braslist[*ep];
515 			ct = braelist[*ep++] - bbeg;
516 			curlp = lp;
517 			while (ecmp(bbeg, lp, ct))
518 				lp += ct;
519 
520 			while (lp >= curlp) {
521 				if (advance(lp, ep))
522 					return (1);
523 				lp -= ct;
524 			}
525 			return (0);
526 			/*FALLTHRU*/
527 
528 		case CDOT | STAR:
529 			curlp = lp;
530 			while (*lp++);
531 			goto star;
532 			/*FALLTHRU*/
533 
534 		case CCHR | STAR:
535 			curlp = lp;
536 			while (*lp++ == *ep);
537 			ep++;
538 			goto star;
539 			/*FALLTHRU*/
540 
541 		case CXCL | STAR:
542 			curlp = lp;
543 			do {
544 				c = (unsigned char)*lp++;
545 			} while (ISTHERE(c));
546 			ep += 32;
547 			goto star;
548 			/*FALLTHRU*/
549 
550 		case NCCL | STAR:
551 			neg = 1;
552 			/*FALLTHRU*/
553 
554 		case CCL | STAR:
555 			curlp = lp;
556 			do {
557 				c = *lp++;
558 			} while (((c & 0200) == 0 && ISTHERE(c)) ^ neg);
559 			ep += 16;
560 			goto star;
561 			/*FALLTHRU*/
562 
563 		star:
564 			do {
565 				if (--lp == locs)
566 					break;
567 				if (advance(lp, ep))
568 					return (1);
569 			} while (lp > curlp);
570 			return (0);
571 
572 		}
573 	}
574 }
575 
576 static void
577 #ifdef	__STDC__
578 getrnge(const char *str)
579 #else
580 getrnge(str)
581 register char *str;
582 #endif
583 {
584 	low = *str++ & 0377;
585 	size = ((*str & 0377) == 255)? 20000: (*str &0377) - low;
586 }
587 
588 #ifdef	__cplusplus
589 }
590 #endif
591 
592 #endif	/* _REGEXP_H */
593