1*b30d1939SAndy Fiddaman /***********************************************************************
2*b30d1939SAndy Fiddaman * *
3*b30d1939SAndy Fiddaman * This software is part of the ast package *
4*b30d1939SAndy Fiddaman * Copyright (c) 1985-2011 AT&T Intellectual Property *
5*b30d1939SAndy Fiddaman * and is licensed under the *
6*b30d1939SAndy Fiddaman * Eclipse Public License, Version 1.0 *
7*b30d1939SAndy Fiddaman * by AT&T Intellectual Property *
8*b30d1939SAndy Fiddaman * *
9*b30d1939SAndy Fiddaman * A copy of the License is available at *
10*b30d1939SAndy Fiddaman * http://www.eclipse.org/org/documents/epl-v10.html *
11*b30d1939SAndy Fiddaman * (with md5 checksum b35adb5213ca9657e911e9befb180842) *
12*b30d1939SAndy Fiddaman * *
13*b30d1939SAndy Fiddaman * Information and Software Systems Research *
14*b30d1939SAndy Fiddaman * AT&T Research *
15*b30d1939SAndy Fiddaman * Florham Park NJ *
16*b30d1939SAndy Fiddaman * *
17*b30d1939SAndy Fiddaman * Glenn Fowler <gsf@research.att.com> *
18*b30d1939SAndy Fiddaman * David Korn <dgk@research.att.com> *
19*b30d1939SAndy Fiddaman * Phong Vo <kpv@research.att.com> *
20*b30d1939SAndy Fiddaman * *
21*b30d1939SAndy Fiddaman ***********************************************************************/
22*b30d1939SAndy Fiddaman #pragma prototyped
23*b30d1939SAndy Fiddaman /*
24*b30d1939SAndy Fiddaman * regcmp implementation
25*b30d1939SAndy Fiddaman */
26*b30d1939SAndy Fiddaman
27*b30d1939SAndy Fiddaman #include <ast.h>
28*b30d1939SAndy Fiddaman #include <libgen.h>
29*b30d1939SAndy Fiddaman #include <regex.h>
30*b30d1939SAndy Fiddaman #include <align.h>
31*b30d1939SAndy Fiddaman
32*b30d1939SAndy Fiddaman #define INC (2*1024)
33*b30d1939SAndy Fiddaman #define TOT (16*1024)
34*b30d1939SAndy Fiddaman #define SUB 10
35*b30d1939SAndy Fiddaman
36*b30d1939SAndy Fiddaman typedef struct
37*b30d1939SAndy Fiddaman {
38*b30d1939SAndy Fiddaman char* cur;
39*b30d1939SAndy Fiddaman regex_t re;
40*b30d1939SAndy Fiddaman unsigned char sub[SUB];
41*b30d1939SAndy Fiddaman int nsub;
42*b30d1939SAndy Fiddaman size_t size;
43*b30d1939SAndy Fiddaman char buf[ALIGN_BOUND2];
44*b30d1939SAndy Fiddaman } Regex_t;
45*b30d1939SAndy Fiddaman
46*b30d1939SAndy Fiddaman __DEFINE__(char*, __loc1, 0);
47*b30d1939SAndy Fiddaman
48*b30d1939SAndy Fiddaman static void*
block(void * handle,void * data,size_t size)49*b30d1939SAndy Fiddaman block(void* handle, void* data, size_t size)
50*b30d1939SAndy Fiddaman {
51*b30d1939SAndy Fiddaman register Regex_t* re = (Regex_t*)handle;
52*b30d1939SAndy Fiddaman
53*b30d1939SAndy Fiddaman if (data || (size = roundof(size, ALIGN_BOUND2)) > (re->buf + re->size - re->cur))
54*b30d1939SAndy Fiddaman return 0;
55*b30d1939SAndy Fiddaman data = (void*)re->cur;
56*b30d1939SAndy Fiddaman re->cur += size;
57*b30d1939SAndy Fiddaman return data;
58*b30d1939SAndy Fiddaman }
59*b30d1939SAndy Fiddaman
60*b30d1939SAndy Fiddaman char*
regcmp(const char * pattern,...)61*b30d1939SAndy Fiddaman regcmp(const char* pattern, ...)
62*b30d1939SAndy Fiddaman {
63*b30d1939SAndy Fiddaman register char* s;
64*b30d1939SAndy Fiddaman register Regex_t* re;
65*b30d1939SAndy Fiddaman register size_t n;
66*b30d1939SAndy Fiddaman register int c;
67*b30d1939SAndy Fiddaman register int p;
68*b30d1939SAndy Fiddaman int b;
69*b30d1939SAndy Fiddaman int e;
70*b30d1939SAndy Fiddaman int i;
71*b30d1939SAndy Fiddaman int j;
72*b30d1939SAndy Fiddaman int nsub;
73*b30d1939SAndy Fiddaman register Sfio_t* sp;
74*b30d1939SAndy Fiddaman unsigned char paren[128];
75*b30d1939SAndy Fiddaman unsigned char sub[SUB];
76*b30d1939SAndy Fiddaman va_list ap;
77*b30d1939SAndy Fiddaman
78*b30d1939SAndy Fiddaman va_start(ap, pattern);
79*b30d1939SAndy Fiddaman if (pattern || !*pattern || !(sp = sfstropen()))
80*b30d1939SAndy Fiddaman e = 1;
81*b30d1939SAndy Fiddaman else
82*b30d1939SAndy Fiddaman {
83*b30d1939SAndy Fiddaman e = 0;
84*b30d1939SAndy Fiddaman memset(paren, 0, sizeof(paren));
85*b30d1939SAndy Fiddaman n = 0;
86*b30d1939SAndy Fiddaman p = -1;
87*b30d1939SAndy Fiddaman b = 0;
88*b30d1939SAndy Fiddaman nsub = 0;
89*b30d1939SAndy Fiddaman s = (char*)pattern;
90*b30d1939SAndy Fiddaman do
91*b30d1939SAndy Fiddaman {
92*b30d1939SAndy Fiddaman while (c = *s++)
93*b30d1939SAndy Fiddaman {
94*b30d1939SAndy Fiddaman if (c == '\\')
95*b30d1939SAndy Fiddaman {
96*b30d1939SAndy Fiddaman sfputc(sp, c);
97*b30d1939SAndy Fiddaman if (!(c = *s++))
98*b30d1939SAndy Fiddaman break;
99*b30d1939SAndy Fiddaman }
100*b30d1939SAndy Fiddaman else if (b)
101*b30d1939SAndy Fiddaman {
102*b30d1939SAndy Fiddaman if (c == ']')
103*b30d1939SAndy Fiddaman b = 0;
104*b30d1939SAndy Fiddaman }
105*b30d1939SAndy Fiddaman else if (c == '[')
106*b30d1939SAndy Fiddaman {
107*b30d1939SAndy Fiddaman b = 1;
108*b30d1939SAndy Fiddaman if (*s == '^')
109*b30d1939SAndy Fiddaman {
110*b30d1939SAndy Fiddaman sfputc(sp, c);
111*b30d1939SAndy Fiddaman c = *s++;
112*b30d1939SAndy Fiddaman }
113*b30d1939SAndy Fiddaman if (*s == ']')
114*b30d1939SAndy Fiddaman {
115*b30d1939SAndy Fiddaman sfputc(sp, c);
116*b30d1939SAndy Fiddaman c = *s++;
117*b30d1939SAndy Fiddaman }
118*b30d1939SAndy Fiddaman }
119*b30d1939SAndy Fiddaman else if (c == '(')
120*b30d1939SAndy Fiddaman {
121*b30d1939SAndy Fiddaman /*
122*b30d1939SAndy Fiddaman * someone explain in one sentence why
123*b30d1939SAndy Fiddaman * a cast is needed to make this work
124*b30d1939SAndy Fiddaman */
125*b30d1939SAndy Fiddaman
126*b30d1939SAndy Fiddaman if (p < (int)(elementsof(paren) - 1))
127*b30d1939SAndy Fiddaman p++;
128*b30d1939SAndy Fiddaman paren[p] = ++n;
129*b30d1939SAndy Fiddaman }
130*b30d1939SAndy Fiddaman else if (c == ')' && p >= 0)
131*b30d1939SAndy Fiddaman {
132*b30d1939SAndy Fiddaman for (i = p; i > 0; i--)
133*b30d1939SAndy Fiddaman if (paren[i])
134*b30d1939SAndy Fiddaman break;
135*b30d1939SAndy Fiddaman if (*s == '$' && (j = *(s + 1)) >= '0' && j <= '9')
136*b30d1939SAndy Fiddaman {
137*b30d1939SAndy Fiddaman s += 2;
138*b30d1939SAndy Fiddaman j -= '0';
139*b30d1939SAndy Fiddaman if (nsub <= j)
140*b30d1939SAndy Fiddaman {
141*b30d1939SAndy Fiddaman if (!nsub)
142*b30d1939SAndy Fiddaman memset(sub, 0, sizeof(sub));
143*b30d1939SAndy Fiddaman nsub = j + 1;
144*b30d1939SAndy Fiddaman }
145*b30d1939SAndy Fiddaman sub[j] = paren[i] + 1;
146*b30d1939SAndy Fiddaman }
147*b30d1939SAndy Fiddaman paren[i] = 0;
148*b30d1939SAndy Fiddaman }
149*b30d1939SAndy Fiddaman sfputc(sp, c);
150*b30d1939SAndy Fiddaman }
151*b30d1939SAndy Fiddaman } while (s = va_arg(ap, char*));
152*b30d1939SAndy Fiddaman }
153*b30d1939SAndy Fiddaman va_end(ap);
154*b30d1939SAndy Fiddaman if (e)
155*b30d1939SAndy Fiddaman return 0;
156*b30d1939SAndy Fiddaman if (!(s = sfstruse(sp)))
157*b30d1939SAndy Fiddaman {
158*b30d1939SAndy Fiddaman sfstrclose(sp);
159*b30d1939SAndy Fiddaman return 0;
160*b30d1939SAndy Fiddaman }
161*b30d1939SAndy Fiddaman re = 0;
162*b30d1939SAndy Fiddaman n = 0;
163*b30d1939SAndy Fiddaman do
164*b30d1939SAndy Fiddaman {
165*b30d1939SAndy Fiddaman if ((n += INC) > TOT || !(re = newof(re, Regex_t, 0, n)))
166*b30d1939SAndy Fiddaman {
167*b30d1939SAndy Fiddaman if (re)
168*b30d1939SAndy Fiddaman free(re);
169*b30d1939SAndy Fiddaman sfstrclose(sp);
170*b30d1939SAndy Fiddaman return 0;
171*b30d1939SAndy Fiddaman }
172*b30d1939SAndy Fiddaman re->cur = re->buf;
173*b30d1939SAndy Fiddaman re->size = n + ALIGN_BOUND2 - sizeof(Regex_t);
174*b30d1939SAndy Fiddaman regalloc(re, block, REG_NOFREE);
175*b30d1939SAndy Fiddaman c = regcomp(&re->re, s, REG_EXTENDED|REG_LENIENT|REG_NULL);
176*b30d1939SAndy Fiddaman regalloc(NiL, NiL, 0);
177*b30d1939SAndy Fiddaman } while (c == REG_ESPACE);
178*b30d1939SAndy Fiddaman sfstrclose(sp);
179*b30d1939SAndy Fiddaman if (c)
180*b30d1939SAndy Fiddaman {
181*b30d1939SAndy Fiddaman free(re);
182*b30d1939SAndy Fiddaman return 0;
183*b30d1939SAndy Fiddaman }
184*b30d1939SAndy Fiddaman if (re->nsub = nsub)
185*b30d1939SAndy Fiddaman memcpy(re->sub, sub, (nsub + 1) * sizeof(sub[0]));
186*b30d1939SAndy Fiddaman return (char*)re;
187*b30d1939SAndy Fiddaman }
188*b30d1939SAndy Fiddaman
189*b30d1939SAndy Fiddaman char*
regex(const char * handle,const char * subject,...)190*b30d1939SAndy Fiddaman regex(const char* handle, const char* subject, ...)
191*b30d1939SAndy Fiddaman {
192*b30d1939SAndy Fiddaman register Regex_t* re;
193*b30d1939SAndy Fiddaman register int n;
194*b30d1939SAndy Fiddaman register int i;
195*b30d1939SAndy Fiddaman register int k;
196*b30d1939SAndy Fiddaman char* sub[SUB + 1];
197*b30d1939SAndy Fiddaman regmatch_t match[SUB + 1];
198*b30d1939SAndy Fiddaman va_list ap;
199*b30d1939SAndy Fiddaman
200*b30d1939SAndy Fiddaman va_start(ap, subject);
201*b30d1939SAndy Fiddaman if (!(re = (Regex_t*)handle) || !subject)
202*b30d1939SAndy Fiddaman k = 1;
203*b30d1939SAndy Fiddaman else
204*b30d1939SAndy Fiddaman {
205*b30d1939SAndy Fiddaman k = 0;
206*b30d1939SAndy Fiddaman for (n = 0; n < re->nsub; n++)
207*b30d1939SAndy Fiddaman sub[n] = va_arg(ap, char*);
208*b30d1939SAndy Fiddaman }
209*b30d1939SAndy Fiddaman va_end(ap);
210*b30d1939SAndy Fiddaman if (k)
211*b30d1939SAndy Fiddaman return 0;
212*b30d1939SAndy Fiddaman if (regexec(&re->re, subject, SUB + 1, match, 0))
213*b30d1939SAndy Fiddaman return 0;
214*b30d1939SAndy Fiddaman for (n = 0; n < re->nsub; n++)
215*b30d1939SAndy Fiddaman if (i = re->sub[n])
216*b30d1939SAndy Fiddaman {
217*b30d1939SAndy Fiddaman i--;
218*b30d1939SAndy Fiddaman k = match[i].rm_eo - match[i].rm_so;
219*b30d1939SAndy Fiddaman strlcpy(sub[n], subject + match[i].rm_so, k);
220*b30d1939SAndy Fiddaman *(sub[n] + k) = 0;
221*b30d1939SAndy Fiddaman }
222*b30d1939SAndy Fiddaman __loc1 = (char*)subject + match[0].rm_so;
223*b30d1939SAndy Fiddaman return (char*)subject + match[0].rm_eo;
224*b30d1939SAndy Fiddaman }
225