xref: /illumos-gate/usr/src/contrib/ast/src/lib/libast/comp/regcmp.c (revision 379728489ed47862c4927c75771e767b9476c9c4)
1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *          Copyright (c) 1985-2011 AT&T Intellectual Property          *
5 *                      and is licensed under the                       *
6 *                 Eclipse Public License, Version 1.0                  *
7 *                    by AT&T Intellectual Property                     *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *          http://www.eclipse.org/org/documents/epl-v10.html           *
11 *         (with md5 checksum b35adb5213ca9657e911e9befb180842)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *                 Glenn Fowler <gsf@research.att.com>                  *
18 *                  David Korn <dgk@research.att.com>                   *
19 *                   Phong Vo <kpv@research.att.com>                    *
20 *                                                                      *
21 ***********************************************************************/
22 #pragma prototyped
23 /*
24  * regcmp implementation
25  */
26 
27 #include <ast.h>
28 #include <libgen.h>
29 #include <regex.h>
30 #include <align.h>
31 
32 #define INC		(2*1024)
33 #define TOT		(16*1024)
34 #define SUB		10
35 
36 typedef struct
37 {
38 	char*		cur;
39 	regex_t		re;
40 	unsigned char	sub[SUB];
41 	int		nsub;
42 	size_t		size;
43 	char		buf[ALIGN_BOUND2];
44 } Regex_t;
45 
46 __DEFINE__(char*, __loc1, 0);
47 
48 static void*
49 block(void* handle, void* data, size_t size)
50 {
51 	register Regex_t*	re = (Regex_t*)handle;
52 
53 	if (data || (size = roundof(size, ALIGN_BOUND2)) > (re->buf + re->size - re->cur))
54 		return 0;
55 	data = (void*)re->cur;
56 	re->cur += size;
57 	return data;
58 }
59 
60 char*
61 regcmp(const char* pattern, ...)
62 {
63 	register char*		s;
64 	register Regex_t*	re;
65 	register size_t		n;
66 	register int		c;
67 	register int		p;
68 	int			b;
69 	int			e;
70 	int			i;
71 	int			j;
72 	int			nsub;
73 	register Sfio_t*	sp;
74 	unsigned char		paren[128];
75 	unsigned char		sub[SUB];
76 	va_list			ap;
77 
78 	va_start(ap, pattern);
79 	if (pattern || !*pattern || !(sp = sfstropen()))
80 		e = 1;
81 	else
82 	{
83 		e = 0;
84 		memset(paren, 0, sizeof(paren));
85 		n = 0;
86 		p = -1;
87 		b = 0;
88 		nsub = 0;
89 		s = (char*)pattern;
90 		do
91 		{
92 			while (c = *s++)
93 			{
94 				if (c == '\\')
95 				{
96 					sfputc(sp, c);
97 					if (!(c = *s++))
98 						break;
99 				}
100 				else if (b)
101 				{
102 					if (c == ']')
103 						b = 0;
104 				}
105 				else if (c == '[')
106 				{
107 					b = 1;
108 					if (*s == '^')
109 					{
110 						sfputc(sp, c);
111 						c = *s++;
112 					}
113 					if (*s == ']')
114 					{
115 						sfputc(sp, c);
116 						c = *s++;
117 					}
118 				}
119 				else if (c == '(')
120 				{
121 					/*
122 					 * someone explain in one sentence why
123 					 * a cast is needed to make this work
124 					 */
125 
126 					if (p < (int)(elementsof(paren) - 1))
127 						p++;
128 					paren[p] = ++n;
129 				}
130 				else if (c == ')' && p >= 0)
131 				{
132 					for (i = p; i > 0; i--)
133 						if (paren[i])
134 							break;
135 					if (*s == '$' && (j = *(s + 1)) >= '0' && j <= '9')
136 					{
137 						s += 2;
138 						j -= '0';
139 						if (nsub <= j)
140 						{
141 							if (!nsub)
142 								memset(sub, 0, sizeof(sub));
143 							nsub = j + 1;
144 						}
145 						sub[j] = paren[i] + 1;
146 					}
147 					paren[i] = 0;
148 				}
149 				sfputc(sp, c);
150 			}
151 		} while (s = va_arg(ap, char*));
152 	}
153 	va_end(ap);
154 	if (e)
155 		return 0;
156 	if (!(s = sfstruse(sp)))
157 	{
158 		sfstrclose(sp);
159 		return 0;
160 	}
161 	re = 0;
162 	n = 0;
163 	do
164 	{
165 		if ((n += INC) > TOT || !(re = newof(re, Regex_t, 0, n)))
166 		{
167 			if (re)
168 				free(re);
169 			sfstrclose(sp);
170 			return 0;
171 		}
172 		re->cur = re->buf;
173 		re->size = n + ALIGN_BOUND2 - sizeof(Regex_t);
174 		regalloc(re, block, REG_NOFREE);
175 		c = regcomp(&re->re, s, REG_EXTENDED|REG_LENIENT|REG_NULL);
176 		regalloc(NiL, NiL, 0);
177 	} while (c == REG_ESPACE);
178 	sfstrclose(sp);
179 	if (c)
180 	{
181 		free(re);
182 		return 0;
183 	}
184 	if (re->nsub = nsub)
185 		memcpy(re->sub, sub, (nsub + 1) * sizeof(sub[0]));
186 	return (char*)re;
187 }
188 
189 char*
190 regex(const char* handle, const char* subject, ...)
191 {
192 	register Regex_t*	re;
193 	register int		n;
194 	register int		i;
195 	register int		k;
196 	char*			sub[SUB + 1];
197 	regmatch_t		match[SUB + 1];
198 	va_list			ap;
199 
200 	va_start(ap, subject);
201 	if (!(re = (Regex_t*)handle) || !subject)
202 		k = 1;
203 	else
204 	{
205 		k = 0;
206 		for (n = 0; n < re->nsub; n++)
207 			sub[n] = va_arg(ap, char*);
208 	}
209 	va_end(ap);
210 	if (k)
211 		return 0;
212 	if (regexec(&re->re, subject, SUB + 1, match, 0))
213 		return 0;
214 	for (n = 0; n < re->nsub; n++)
215 		if (i = re->sub[n])
216 		{
217 			i--;
218 			k = match[i].rm_eo - match[i].rm_so;
219 			strlcpy(sub[n], subject + match[i].rm_so, k);
220 			*(sub[n] + k) = 0;
221 		}
222 	__loc1 = (char*)subject + match[0].rm_so;
223 	return (char*)subject + match[0].rm_eo;
224 }
225