xref: /titanic_44/usr/src/lib/libast/common/comp/regcmp.c (revision 1022fd2a9aa2c967697116c2ca51a238a3c550ac)
1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *          Copyright (c) 1985-2010 AT&T Intellectual Property          *
5 *                      and is licensed under the                       *
6 *                  Common Public License, Version 1.0                  *
7 *                    by AT&T Intellectual Property                     *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *            http://www.opensource.org/licenses/cpl1.0.txt             *
11 *         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *                 Glenn Fowler <gsf@research.att.com>                  *
18 *                  David Korn <dgk@research.att.com>                   *
19 *                   Phong Vo <kpv@research.att.com>                    *
20 *                                                                      *
21 ***********************************************************************/
22 #pragma prototyped
23 /*
24  * regcmp implementation
25  */
26 
27 #include <ast.h>
28 #include <libgen.h>
29 #include <regex.h>
30 #include <align.h>
31 
32 #define INC		(2*1024)
33 #define TOT		(16*1024)
34 #define SUB		10
35 
36 typedef struct
37 {
38 	char*		cur;
39 	regex_t		re;
40 	unsigned char	sub[SUB];
41 	int		nsub;
42 	size_t		size;
43 	char		buf[ALIGN_BOUND2];
44 } Regex_t;
45 
46 __DEFINE__(char*, __loc1, 0);
47 
48 static void*
49 block(void* handle, void* data, size_t size)
50 {
51 	register Regex_t*	re = (Regex_t*)handle;
52 
53 	if (data || (size = roundof(size, ALIGN_BOUND2)) > (re->buf + re->size - re->cur))
54 		return 0;
55 	data = (void*)re->cur;
56 	re->cur += size;
57 	return data;
58 }
59 
60 char*
61 regcmp(const char* pattern, ...)
62 {
63 	register char*		s;
64 	register Regex_t*	re;
65 	register size_t		n;
66 	register int		c;
67 	register int		p;
68 	int			b;
69 	int			i;
70 	int			j;
71 	int			nsub;
72 	register Sfio_t*	sp;
73 	unsigned char		paren[128];
74 	unsigned char		sub[SUB];
75 	va_list			ap;
76 
77 	va_start(ap, pattern);
78 	if (!pattern || !*pattern || !(sp = sfstropen()))
79 		return 0;
80 	memset(paren, 0, sizeof(paren));
81 	n = 0;
82 	p = -1;
83 	b = 0;
84 	nsub = 0;
85 	s = (char*)pattern;
86 	do
87 	{
88 		while (c = *s++)
89 		{
90 			if (c == '\\')
91 			{
92 				sfputc(sp, c);
93 				if (!(c = *s++))
94 					break;
95 			}
96 			else if (b)
97 			{
98 				if (c == ']')
99 					b = 0;
100 			}
101 			else if (c == '[')
102 			{
103 				b = 1;
104 				if (*s == '^')
105 				{
106 					sfputc(sp, c);
107 					c = *s++;
108 				}
109 				if (*s == ']')
110 				{
111 					sfputc(sp, c);
112 					c = *s++;
113 				}
114 			}
115 			else if (c == '(')
116 			{
117 				/*
118 				 * someone explain in one sentence why
119 				 * a cast is needed to make this work
120 				 */
121 
122 				if (p < (int)(elementsof(paren) - 1))
123 					p++;
124 				paren[p] = ++n;
125 			}
126 			else if (c == ')' && p >= 0)
127 			{
128 				for (i = p; i > 0; i--)
129 					if (paren[i])
130 						break;
131 				if (*s == '$' && (j = *(s + 1)) >= '0' && j <= '9')
132 				{
133 					s += 2;
134 					j -= '0';
135 					if (nsub <= j)
136 					{
137 						if (!nsub)
138 							memset(sub, 0, sizeof(sub));
139 						nsub = j + 1;
140 					}
141 					sub[j] = paren[i] + 1;
142 				}
143 				paren[i] = 0;
144 			}
145 			sfputc(sp, c);
146 		}
147 	} while (s = va_arg(ap, char*));
148 	va_end(ap);
149 	if (!(s = sfstruse(sp)))
150 	{
151 		sfstrclose(sp);
152 		return 0;
153 	}
154 	re = 0;
155 	n = 0;
156 	do
157 	{
158 		if ((n += INC) > TOT || !(re = newof(re, Regex_t, 0, n)))
159 		{
160 			if (re)
161 				free(re);
162 			sfstrclose(sp);
163 			return 0;
164 		}
165 		re->cur = re->buf;
166 		re->size = n + ALIGN_BOUND2 - sizeof(Regex_t);
167 		regalloc(re, block, REG_NOFREE);
168 		c = regcomp(&re->re, s, REG_EXTENDED|REG_LENIENT|REG_NULL);
169 		regalloc(NiL, NiL, 0);
170 	} while (c == REG_ESPACE);
171 	sfstrclose(sp);
172 	if (c)
173 	{
174 		free(re);
175 		return 0;
176 	}
177 	if (re->nsub = nsub)
178 		memcpy(re->sub, sub, (nsub + 1) * sizeof(sub[0]));
179 	return (char*)re;
180 }
181 
182 char*
183 regex(const char* handle, const char* subject, ...)
184 {
185 	register Regex_t*	re;
186 	register int		n;
187 	register int		i;
188 	register int		k;
189 	char*			sub[SUB + 1];
190 	regmatch_t		match[SUB + 1];
191 	va_list			ap;
192 
193 	va_start(ap, subject);
194 	if (!(re = (Regex_t*)handle) || !subject)
195 		return 0;
196 	for (n = 0; n < re->nsub; n++)
197 		sub[n] = va_arg(ap, char*);
198 	va_end(ap);
199 	if (regexec(&re->re, subject, SUB + 1, match, 0))
200 		return 0;
201 	for (n = 0; n < re->nsub; n++)
202 		if (i = re->sub[n])
203 		{
204 			i--;
205 			k = match[i].rm_eo - match[i].rm_so;
206 			strncpy(sub[n], subject + match[i].rm_so, k);
207 			*(sub[n] + k) = 0;
208 		}
209 	__loc1 = (char*)subject + match[0].rm_so;
210 	return (char*)subject + match[0].rm_eo;
211 }
212