xref: /illumos-gate/usr/src/lib/libeti/form/common/regex.c (revision d327dbeacda682ba3d4efc9b451baa429ba8830c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*	Copyright (c) 1988 AT&T	*/
23 /*	  All Rights Reserved	*/
24 
25 
26 /*
27  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
28  * Use is subject to license terms.
29  */
30 
31 /*
32  * Copyright (c) 2018, Joyent, Inc.
33  */
34 
35 #include <sys/types.h>
36 #include <stdlib.h>
37 #include <unistd.h>
38 #include "utility.h"
39 
40 /*
41  *	this code was taken from REGCMP(3X)
42  */
43 /*VARARGS*/
44 /*ARGSUSED*/
45 
46 #define	SSIZE	50
47 #define	TGRP	48
48 #define	A256	01
49 #define	A512	02
50 #define	A768	03
51 #define	NBRA	10
52 #define	CIRCFL	32
53 
54 #define	CBRA	60
55 #define	GRP	40
56 #define	SGRP	56
57 #define	PGRP	68
58 #define	EGRP	44
59 #define	RNGE	03
60 #define	CCHR	20
61 #define	CDOT	64
62 #define	CCL	24
63 #define	NCCL	8
64 #define	CDOL	28
65 #define	FCEOF	52 /* This was originally CEOF but it clashes with the header */
66 			/* definition so it was changed to FCEOF */
67 #define	CKET	12
68 
69 #define	STAR	01
70 #define	PLUS	02
71 #define	MINUS	16
72 
73 char	*__braslist[NBRA];
74 char	*__braelist[NBRA];
75 extern char *__loc1;
76 intptr_t	__bravar[NBRA];
77 intptr_t	*__st[SSIZE + 1];
78 intptr_t	*__eptr_, *__lptr_;
79 intptr_t	__cflg;
80 
81 char *
82 libform_regex(char *addrc, char *addrl, char *a1)
83 {
84 	intptr_t cur, in;
85 	intptr_t *adx;
86 	char *p1, *p2;
87 
88 	for (in = 0; in < NBRA; in++) {
89 		__braslist[in] = 0;
90 		__bravar[in] = -1;
91 	}
92 	__cflg = 0;
93 	cur = __execute(addrc, addrl);
94 	adx = (intptr_t *)&a1;
95 	for (in = 0; in < NBRA; in++) {
96 		if (((p1 = __braslist[in]) != 0) && (__bravar[in] >= 0)) {
97 			p2 = (char *)adx[__bravar[in]];
98 			while (p1 < __braelist[in]) *p2++ = *p1++;
99 			*p2 = '\0';
100 		}
101 	}
102 	if (!__cflg)
103 		return ((addrl == (char *)cur) ? (char *)0 : (char *)cur);
104 	else
105 		return ((char *)cur);
106 }
107 
108 intptr_t
109 __execute(char *addrc, char *addrl)
110 {
111 	char *p1, *p2, c;
112 	intptr_t i;
113 
114 	p1 = addrl;
115 	p2 = addrc;
116 	__eptr_ = (intptr_t *)&__st[SSIZE];
117 	__lptr_ = (intptr_t *)&__st[0];
118 	if (*p2 == CIRCFL) {
119 		__loc1 = p1;
120 		return ((i = __advance(p1, ++p2)) ? i : (intptr_t)addrl);
121 	}
122 	/* fast check for first character */
123 	if (*p2 == CCHR) {
124 		c = p2[1];
125 		do {
126 			if (*p1 != c)
127 				continue;
128 			__eptr_ = (intptr_t *)&__st[SSIZE];
129 			__lptr_ = (intptr_t *)&__st[0];
130 			if (i = __advance(p1, p2))  {
131 				__loc1 = p1;
132 				return (i);
133 			}
134 		} while (*p1++);
135 		return ((intptr_t)addrl);
136 	}
137 	/* regular algorithm */
138 	do {
139 		__eptr_ = (intptr_t *)&__st[SSIZE];
140 		__lptr_ = (intptr_t *)&__st[0];
141 		if (i = __advance(p1, p2))  {
142 			__loc1 = p1;
143 			return (i);
144 		}
145 	} while (*p1++);
146 	return ((intptr_t)addrl);
147 }
148 
149 intptr_t
150 __advance(char *alp, char *aep)
151 {
152 	char *lp, *ep, *curlp;
153 	char *sep, *dp;
154 	intptr_t i, lcnt, dcnt, gflg;
155 
156 	lp = alp;
157 	ep = aep;
158 	gflg = 0;
159 	for (; ; ) {
160 		switch (*ep++) {
161 
162 	case CCHR:
163 		if (*ep++ == *lp++)
164 			continue;
165 		return (0);
166 
167 	case EGRP|RNGE:
168 		return ((intptr_t)lp);
169 	case EGRP:
170 	case GRP:
171 		ep++;
172 		continue;
173 
174 	case EGRP|STAR:
175 		(void) __xpop(0);
176 		/* FALLTHROUGH */
177 	case EGRP|PLUS:
178 		(void) __xpush(0, ++ep);
179 		return ((intptr_t)lp);
180 
181 	case CDOT:
182 		if (*lp++)
183 			continue;
184 		return (0);
185 
186 	case CDOL:
187 		if (*lp == 0)
188 			continue;
189 		lp++;
190 		return (0);
191 
192 	case FCEOF:
193 		__cflg = 1;
194 		return ((intptr_t)lp);
195 
196 	case TGRP:
197 	case TGRP|A768:
198 	case TGRP|A512:
199 	case TGRP|A256:
200 		i = (((ep[-1] & 03) << 8) + (*ep) & 0377);
201 		ep++;
202 		(void) __xpush(0, ep + i + 2);
203 		(void) __xpush(0, ++ep);
204 		(void) __xpush(0, ++ep);
205 		gflg = 1;
206 		(void) __getrnge(&lcnt, &dcnt, &ep[i]);
207 		while (lcnt--)
208 			if (!(lp = (char *)__advance(lp, ep)))
209 				return (0);
210 		(void) __xpush(1, curlp = lp);
211 		while (dcnt--)
212 			if (!(dp = (char *)__advance(lp, ep))) break;
213 			else
214 				(void) __xpush(1, lp = dp);
215 		ep = (char *)__xpop(0);
216 		goto star;
217 	case CCHR|RNGE:
218 		sep = ep++;
219 		(void) __getrnge(&lcnt, &dcnt, ep);
220 		while (lcnt--)
221 			if (*lp++ != *sep)
222 				return (0);
223 		curlp = lp;
224 		while (dcnt--)
225 			if (*lp++ != *sep) break;
226 		if (dcnt < 0) lp++;
227 		ep += 2;
228 		goto star;
229 	case CDOT|RNGE:
230 		(void) __getrnge(&lcnt, &dcnt, ep);
231 		while (lcnt--)
232 			if (*lp++ == '\0')
233 				return (0);
234 		curlp = lp;
235 		while (dcnt--)
236 			if (*lp++ == '\0') break;
237 		if (dcnt < 0) lp++;
238 		ep += 2;
239 		goto star;
240 	case CCL|RNGE:
241 	case NCCL|RNGE:
242 		(void) __getrnge(&lcnt, &dcnt, (ep + (*ep & 0377)));
243 		while (lcnt--)
244 			if (!__cclass(ep, *lp++, ep[-1] == (CCL | RNGE)))
245 				return (0);
246 		curlp = lp;
247 		while (dcnt--)
248 			if (!__cclass(ep, *lp++, ep[-1] == (CCL|RNGE)))
249 				break;
250 		if (dcnt < 0) lp++;
251 		ep += (*ep + 2);
252 		goto star;
253 	case CCL:
254 		if (__cclass(ep, *lp++, 1)) {
255 			ep += *ep;
256 			continue;
257 		}
258 		return (0);
259 
260 	case NCCL:
261 		if (__cclass(ep, *lp++, 0)) {
262 			ep += *ep;
263 			continue;
264 		}
265 		return (0);
266 
267 	case CBRA:
268 		__braslist[*ep++] = lp;
269 		continue;
270 
271 	case CKET:
272 		__braelist[*ep] = lp;
273 		__bravar[*ep] = ep[1];
274 		ep += 2;
275 		continue;
276 
277 	case CDOT|PLUS:
278 		if (*lp++ == '\0')
279 			return (0);
280 		/* FALLTHROUGH */
281 	case CDOT|STAR:
282 		curlp = lp;
283 		while (*lp++)
284 			;
285 		goto star;
286 
287 	case CCHR|PLUS:
288 		if (*lp++ != *ep)
289 			return (0);
290 		/* FALLTHROUGH */
291 	case CCHR|STAR:
292 		curlp = lp;
293 		while (*lp++ == *ep)
294 			;
295 		ep++;
296 		goto star;
297 
298 	case PGRP:
299 	case PGRP|A256:
300 	case PGRP|A512:
301 	case PGRP|A768:
302 		if (!(lp = (char *)__advance(lp, ep+1)))
303 			return (0);
304 		/* FALLTHROUGH */
305 	case SGRP|A768:
306 	case SGRP|A512:
307 	case SGRP|A256:
308 	case SGRP:
309 		i = (((ep[-1]&03) << 8) + (*ep & 0377));
310 		ep++;
311 		(void) __xpush(0, ep + i);
312 		(void) __xpush(1, curlp = lp);
313 		while (i = __advance(lp, ep))
314 			(void) __xpush(1, lp = (char *)i);
315 		ep = (char *)__xpop(0);
316 		gflg = 1;
317 		goto star;
318 
319 	case CCL|PLUS:
320 	case NCCL|PLUS:
321 		if (!__cclass(ep, *lp++, ep[-1] == (CCL | PLUS)))
322 			return (0);
323 		/* FALLTHROUGH */
324 	case CCL|STAR:
325 	case NCCL|STAR:
326 		curlp = lp;
327 		while (__cclass(ep, *lp++, ((ep[-1] == (CCL | STAR)) ||
328 		    (ep[-1] == (CCL | PLUS)))))
329 			;
330 		ep += *ep;
331 		goto star;
332 
333 	star:
334 		do {
335 			if (!gflg) lp--;
336 			else if (!(lp = (char *)__xpop(1))) break;
337 			if (i = __advance(lp, ep))
338 				return (i);
339 		} while (lp > curlp);
340 		return (0);
341 
342 	default:
343 		return (0);
344 	}
345 	}
346 }
347 
348 intptr_t
349 __cclass(char *aset, char ac, intptr_t af)
350 {
351 	char *set, c;
352 	intptr_t n;
353 
354 	set = (char *)aset;
355 	if ((c = ac) == 0)
356 		return (0);
357 	n = *set++;
358 	while (--n) {
359 		if (*set == MINUS) {
360 			if ((set[2] - set[1]) < 0)
361 				return (0);
362 			if (*++set <= c) {
363 				if (c <= *++set)
364 					return (af);
365 			} else
366 				++set;
367 			++set;
368 			n -= 2;
369 			continue;
370 		}
371 		if (*set++ == c)
372 			return (af);
373 	}
374 	return (!af);
375 }
376 
377 intptr_t
378 __xpush(intptr_t i, char *p)
379 {
380 	if (__lptr_ >= __eptr_) {
381 		(void) write(2, "stack overflow\n", 15);
382 		(void) exit(1);
383 	}
384 	if (i)
385 		*__lptr_++ = (intptr_t)p;
386 	else
387 		*__eptr_-- = (intptr_t)p;
388 	return (1);
389 }
390 
391 intptr_t
392 __xpop(intptr_t i)
393 {
394 	if (i)
395 		return ((__lptr_ < (intptr_t *)&__st[0]) ? 0 : *--__lptr_);
396 	else
397 		return ((__eptr_ > (intptr_t *)&__st[SSIZE]) ? 0 : *++__eptr_);
398 }
399 
400 intptr_t
401 __getrnge(intptr_t *i, intptr_t *j, char *k)
402 {
403 	*i = (*k++&0377);
404 	if (*k == (char)-1)
405 		*j = 20000;
406 	else
407 		*j = ((*k&0377) - *i);
408 	return (1);
409 }
410