xref: /illumos-gate/usr/src/lib/libeti/form/common/regex.c (revision 66582b606a8194f7f3ba5b3a3a6dca5b0d346361)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*	Copyright (c) 1988 AT&T	*/
23 /*	  All Rights Reserved  	*/
24 
25 
26 /*
27  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
28  * Use is subject to license terms.
29  */
30 
31 /*
32  * Copyright (c) 2018, Joyent, Inc.
33  */
34 
35 /*LINTLIBRARY*/
36 
37 #include <sys/types.h>
38 #include <stdlib.h>
39 #include <unistd.h>
40 #include "utility.h"
41 
42 /*
43  *	this code was taken from REGCMP(3X)
44  */
45 /*VARARGS*/
46 /*ARGSUSED*/
47 
48 #define	SSIZE	50
49 #define	TGRP	48
50 #define	A256	01
51 #define	A512	02
52 #define	A768	03
53 #define	NBRA	10
54 #define	CIRCFL	32
55 
56 #define	CBRA	60
57 #define	GRP	40
58 #define	SGRP	56
59 #define	PGRP	68
60 #define	EGRP	44
61 #define	RNGE	03
62 #define	CCHR	20
63 #define	CDOT	64
64 #define	CCL	24
65 #define	NCCL	8
66 #define	CDOL	28
67 #define	FCEOF	52 /* This was originally CEOF but it clashes with the header */
68 			/* definition so it was changed to FCEOF */
69 #define	CKET	12
70 
71 #define	STAR	01
72 #define	PLUS	02
73 #define	MINUS	16
74 
75 char	*__braslist[NBRA];
76 char	*__braelist[NBRA];
77 char	*__loc1;
78 intptr_t	__bravar[NBRA];
79 intptr_t	*__st[SSIZE + 1];
80 intptr_t	*__eptr_, *__lptr_;
81 intptr_t	__cflg;
82 
83 char *
84 libform_regex(char *addrc, char *addrl, char *a1)
85 {
86 	intptr_t cur, in;
87 	intptr_t *adx;
88 	char *p1, *p2;
89 
90 	for (in = 0; in < NBRA; in++) {
91 		__braslist[in] = 0;
92 		__bravar[in] = -1;
93 	}
94 	__cflg = 0;
95 	cur = __execute(addrc, addrl);
96 	adx = (intptr_t *)&a1;
97 	for (in = 0; in < NBRA; in++) {
98 		if (((p1 = __braslist[in]) != 0) && (__bravar[in] >= 0)) {
99 			p2 = (char *)adx[__bravar[in]];
100 			while (p1 < __braelist[in]) *p2++ = *p1++;
101 			*p2 = '\0';
102 		}
103 	}
104 	if (!__cflg)
105 		return ((addrl == (char *)cur) ? (char *)0 : (char *)cur);
106 	else
107 		return ((char *)cur);
108 }
109 
110 intptr_t
111 __execute(char *addrc, char *addrl)
112 {
113 	char *p1, *p2, c;
114 	intptr_t i;
115 
116 	p1 = addrl;
117 	p2 = addrc;
118 	__eptr_ = (intptr_t *)&__st[SSIZE];
119 	__lptr_ = (intptr_t *)&__st[0];
120 	if (*p2 == CIRCFL) {
121 		__loc1 = p1;
122 		return ((i = __advance(p1, ++p2)) ? i : (intptr_t)addrl);
123 	}
124 	/* fast check for first character */
125 	if (*p2 == CCHR) {
126 		c = p2[1];
127 		do {
128 			if (*p1 != c)
129 				continue;
130 			__eptr_ = (intptr_t *)&__st[SSIZE];
131 			__lptr_ = (intptr_t *)&__st[0];
132 			if (i = __advance(p1, p2))  {
133 				__loc1 = p1;
134 				return (i);
135 			}
136 		} while (*p1++);
137 		return ((intptr_t)addrl);
138 	}
139 	/* regular algorithm */
140 	do {
141 		__eptr_ = (intptr_t *)&__st[SSIZE];
142 		__lptr_ = (intptr_t *)&__st[0];
143 		if (i = __advance(p1, p2))  {
144 			__loc1 = p1;
145 			return (i);
146 		}
147 	} while (*p1++);
148 	return ((intptr_t)addrl);
149 }
150 
151 intptr_t
152 __advance(char *alp, char *aep)
153 {
154 	char *lp, *ep, *curlp;
155 	char *sep, *dp;
156 	intptr_t i, lcnt, dcnt, gflg;
157 
158 	lp = alp;
159 	ep = aep;
160 	gflg = 0;
161 	for (; ; ) {
162 		switch (*ep++) {
163 
164 	case CCHR:
165 		if (*ep++ == *lp++)
166 			continue;
167 		return (0);
168 
169 	case EGRP|RNGE:
170 		return ((intptr_t)lp);
171 	case EGRP:
172 	case GRP:
173 		ep++;
174 		continue;
175 
176 	case EGRP|STAR:
177 		(void) __xpop(0);
178 		/* FALLTHROUGH */
179 	case EGRP|PLUS:
180 		(void) __xpush(0, ++ep);
181 		return ((intptr_t)lp);
182 
183 	case CDOT:
184 		if (*lp++)
185 			continue;
186 		return (0);
187 
188 	case CDOL:
189 		if (*lp == 0)
190 			continue;
191 		lp++;
192 		return (0);
193 
194 	case FCEOF:
195 		__cflg = 1;
196 		return ((intptr_t)lp);
197 
198 	case TGRP:
199 	case TGRP|A768:
200 	case TGRP|A512:
201 	case TGRP|A256:
202 		i = (((ep[-1] & 03) << 8) + (*ep) & 0377);
203 		ep++;
204 		(void) __xpush(0, ep + i + 2);
205 		(void) __xpush(0, ++ep);
206 		(void) __xpush(0, ++ep);
207 		gflg = 1;
208 		(void) __getrnge(&lcnt, &dcnt, &ep[i]);
209 		while (lcnt--)
210 			if (!(lp = (char *)__advance(lp, ep)))
211 				return (0);
212 		(void) __xpush(1, curlp = lp);
213 		while (dcnt--)
214 			if (!(dp = (char *)__advance(lp, ep))) break;
215 			else
216 				(void) __xpush(1, lp = dp);
217 		ep = (char *)__xpop(0);
218 		goto star;
219 	case CCHR|RNGE:
220 		sep = ep++;
221 		(void) __getrnge(&lcnt, &dcnt, ep);
222 		while (lcnt--)
223 			if (*lp++ != *sep)
224 				return (0);
225 		curlp = lp;
226 		while (dcnt--)
227 			if (*lp++ != *sep) break;
228 		if (dcnt < 0) lp++;
229 		ep += 2;
230 		goto star;
231 	case CDOT|RNGE:
232 		(void) __getrnge(&lcnt, &dcnt, ep);
233 		while (lcnt--)
234 			if (*lp++ == '\0')
235 				return (0);
236 		curlp = lp;
237 		while (dcnt--)
238 			if (*lp++ == '\0') break;
239 		if (dcnt < 0) lp++;
240 		ep += 2;
241 		goto star;
242 	case CCL|RNGE:
243 	case NCCL|RNGE:
244 		(void) __getrnge(&lcnt, &dcnt, (ep + (*ep & 0377)));
245 		while (lcnt--)
246 			if (!__cclass(ep, *lp++, ep[-1] == (CCL | RNGE)))
247 				return (0);
248 		curlp = lp;
249 		while (dcnt--)
250 			if (!__cclass(ep, *lp++, ep[-1] == (CCL|RNGE)))
251 				break;
252 		if (dcnt < 0) lp++;
253 		ep += (*ep + 2);
254 		goto star;
255 	case CCL:
256 		if (__cclass(ep, *lp++, 1)) {
257 			ep += *ep;
258 			continue;
259 		}
260 		return (0);
261 
262 	case NCCL:
263 		if (__cclass(ep, *lp++, 0)) {
264 			ep += *ep;
265 			continue;
266 		}
267 		return (0);
268 
269 	case CBRA:
270 		__braslist[*ep++] = lp;
271 		continue;
272 
273 	case CKET:
274 		__braelist[*ep] = lp;
275 		__bravar[*ep] = ep[1];
276 		ep += 2;
277 		continue;
278 
279 	case CDOT|PLUS:
280 		if (*lp++ == '\0')
281 			return (0);
282 		/* FALLTHROUGH */
283 	case CDOT|STAR:
284 		curlp = lp;
285 		while (*lp++)
286 			;
287 		goto star;
288 
289 	case CCHR|PLUS:
290 		if (*lp++ != *ep)
291 			return (0);
292 		/* FALLTHROUGH */
293 	case CCHR|STAR:
294 		curlp = lp;
295 		while (*lp++ == *ep)
296 			;
297 		ep++;
298 		goto star;
299 
300 	case PGRP:
301 	case PGRP|A256:
302 	case PGRP|A512:
303 	case PGRP|A768:
304 		if (!(lp = (char *)__advance(lp, ep+1)))
305 			return (0);
306 		/* FALLTHROUGH */
307 	case SGRP|A768:
308 	case SGRP|A512:
309 	case SGRP|A256:
310 	case SGRP:
311 		i = (((ep[-1]&03) << 8) + (*ep & 0377));
312 		ep++;
313 		(void) __xpush(0, ep + i);
314 		(void) __xpush(1, curlp = lp);
315 		while (i = __advance(lp, ep))
316 			(void) __xpush(1, lp = (char *)i);
317 		ep = (char *)__xpop(0);
318 		gflg = 1;
319 		goto star;
320 
321 	case CCL|PLUS:
322 	case NCCL|PLUS:
323 		if (!__cclass(ep, *lp++, ep[-1] == (CCL | PLUS)))
324 			return (0);
325 		/* FALLTHROUGH */
326 	case CCL|STAR:
327 	case NCCL|STAR:
328 		curlp = lp;
329 		while (__cclass(ep, *lp++, ((ep[-1] == (CCL | STAR)) ||
330 		    (ep[-1] == (CCL | PLUS)))))
331 			;
332 		ep += *ep;
333 		goto star;
334 
335 	star:
336 		do {
337 			if (!gflg) lp--;
338 			else if (!(lp = (char *)__xpop(1))) break;
339 			if (i = __advance(lp, ep))
340 				return (i);
341 		} while (lp > curlp);
342 		return (0);
343 
344 	default:
345 		return (0);
346 	}
347 	}
348 }
349 
350 intptr_t
351 __cclass(char *aset, char ac, intptr_t af)
352 {
353 	char *set, c;
354 	intptr_t n;
355 
356 	set = (char *)aset;
357 	if ((c = ac) == 0)
358 		return (0);
359 	n = *set++;
360 	while (--n) {
361 		if (*set == MINUS) {
362 			if ((set[2] - set[1]) < 0)
363 				return (0);
364 			if (*++set <= c) {
365 				if (c <= *++set)
366 					return (af);
367 			} else
368 				++set;
369 			++set;
370 			n -= 2;
371 			continue;
372 		}
373 		if (*set++ == c)
374 			return (af);
375 	}
376 	return (!af);
377 }
378 
379 intptr_t
380 __xpush(intptr_t i, char *p)
381 {
382 	if (__lptr_ >= __eptr_) {
383 		(void) write(2, "stack overflow\n", 15);
384 		(void) exit(1);
385 	}
386 	if (i)
387 		*__lptr_++ = (intptr_t)p;
388 	else
389 		*__eptr_-- = (intptr_t)p;
390 	return (1);
391 }
392 
393 intptr_t
394 __xpop(intptr_t i)
395 {
396 	if (i)
397 		return ((__lptr_ < (intptr_t *)&__st[0]) ? 0 : *--__lptr_);
398 	else
399 		return ((__eptr_ > (intptr_t *)&__st[SSIZE]) ? 0 : *++__eptr_);
400 }
401 
402 intptr_t
403 __getrnge(intptr_t *i, intptr_t *j, char *k)
404 {
405 	*i = (*k++&0377);
406 	if (*k == (char)-1)
407 		*j = 20000;
408 	else
409 		*j = ((*k&0377) - *i);
410 	return (1);
411 }
412