1 /***********************************************************************
2 * *
3 * This software is part of the ast package *
4 * Copyright (c) 1985-2011 AT&T Intellectual Property *
5 * and is licensed under the *
6 * Eclipse Public License, Version 1.0 *
7 * by AT&T Intellectual Property *
8 * *
9 * A copy of the License is available at *
10 * http://www.eclipse.org/org/documents/epl-v10.html *
11 * (with md5 checksum b35adb5213ca9657e911e9befb180842) *
12 * *
13 * Information and Software Systems Research *
14 * AT&T Research *
15 * Florham Park NJ *
16 * *
17 * Glenn Fowler <gsf@research.att.com> *
18 * David Korn <dgk@research.att.com> *
19 * Phong Vo <kpv@research.att.com> *
20 * *
21 ***********************************************************************/
22 #pragma prototyped
23 /*
24 * regcmp implementation
25 */
26
27 #include <ast.h>
28 #include <libgen.h>
29 #include <regex.h>
30 #include <align.h>
31
32 #define INC (2*1024)
33 #define TOT (16*1024)
34 #define SUB 10
35
36 typedef struct
37 {
38 char* cur;
39 regex_t re;
40 unsigned char sub[SUB];
41 int nsub;
42 size_t size;
43 char buf[ALIGN_BOUND2];
44 } Regex_t;
45
46 __DEFINE__(char*, __loc1, 0);
47
48 static void*
block(void * handle,void * data,size_t size)49 block(void* handle, void* data, size_t size)
50 {
51 register Regex_t* re = (Regex_t*)handle;
52
53 if (data || (size = roundof(size, ALIGN_BOUND2)) > (re->buf + re->size - re->cur))
54 return 0;
55 data = (void*)re->cur;
56 re->cur += size;
57 return data;
58 }
59
60 char*
regcmp(const char * pattern,...)61 regcmp(const char* pattern, ...)
62 {
63 register char* s;
64 register Regex_t* re;
65 register size_t n;
66 register int c;
67 register int p;
68 int b;
69 int e;
70 int i;
71 int j;
72 int nsub;
73 register Sfio_t* sp;
74 unsigned char paren[128];
75 unsigned char sub[SUB];
76 va_list ap;
77
78 va_start(ap, pattern);
79 if (pattern || !*pattern || !(sp = sfstropen()))
80 e = 1;
81 else
82 {
83 e = 0;
84 memset(paren, 0, sizeof(paren));
85 n = 0;
86 p = -1;
87 b = 0;
88 nsub = 0;
89 s = (char*)pattern;
90 do
91 {
92 while (c = *s++)
93 {
94 if (c == '\\')
95 {
96 sfputc(sp, c);
97 if (!(c = *s++))
98 break;
99 }
100 else if (b)
101 {
102 if (c == ']')
103 b = 0;
104 }
105 else if (c == '[')
106 {
107 b = 1;
108 if (*s == '^')
109 {
110 sfputc(sp, c);
111 c = *s++;
112 }
113 if (*s == ']')
114 {
115 sfputc(sp, c);
116 c = *s++;
117 }
118 }
119 else if (c == '(')
120 {
121 /*
122 * someone explain in one sentence why
123 * a cast is needed to make this work
124 */
125
126 if (p < (int)(elementsof(paren) - 1))
127 p++;
128 paren[p] = ++n;
129 }
130 else if (c == ')' && p >= 0)
131 {
132 for (i = p; i > 0; i--)
133 if (paren[i])
134 break;
135 if (*s == '$' && (j = *(s + 1)) >= '0' && j <= '9')
136 {
137 s += 2;
138 j -= '0';
139 if (nsub <= j)
140 {
141 if (!nsub)
142 memset(sub, 0, sizeof(sub));
143 nsub = j + 1;
144 }
145 sub[j] = paren[i] + 1;
146 }
147 paren[i] = 0;
148 }
149 sfputc(sp, c);
150 }
151 } while (s = va_arg(ap, char*));
152 }
153 va_end(ap);
154 if (e)
155 return 0;
156 if (!(s = sfstruse(sp)))
157 {
158 sfstrclose(sp);
159 return 0;
160 }
161 re = 0;
162 n = 0;
163 do
164 {
165 if ((n += INC) > TOT || !(re = newof(re, Regex_t, 0, n)))
166 {
167 if (re)
168 free(re);
169 sfstrclose(sp);
170 return 0;
171 }
172 re->cur = re->buf;
173 re->size = n + ALIGN_BOUND2 - sizeof(Regex_t);
174 regalloc(re, block, REG_NOFREE);
175 c = regcomp(&re->re, s, REG_EXTENDED|REG_LENIENT|REG_NULL);
176 regalloc(NiL, NiL, 0);
177 } while (c == REG_ESPACE);
178 sfstrclose(sp);
179 if (c)
180 {
181 free(re);
182 return 0;
183 }
184 if (re->nsub = nsub)
185 memcpy(re->sub, sub, (nsub + 1) * sizeof(sub[0]));
186 return (char*)re;
187 }
188
189 char*
regex(const char * handle,const char * subject,...)190 regex(const char* handle, const char* subject, ...)
191 {
192 register Regex_t* re;
193 register int n;
194 register int i;
195 register int k;
196 char* sub[SUB + 1];
197 regmatch_t match[SUB + 1];
198 va_list ap;
199
200 va_start(ap, subject);
201 if (!(re = (Regex_t*)handle) || !subject)
202 k = 1;
203 else
204 {
205 k = 0;
206 for (n = 0; n < re->nsub; n++)
207 sub[n] = va_arg(ap, char*);
208 }
209 va_end(ap);
210 if (k)
211 return 0;
212 if (regexec(&re->re, subject, SUB + 1, match, 0))
213 return 0;
214 for (n = 0; n < re->nsub; n++)
215 if (i = re->sub[n])
216 {
217 i--;
218 k = match[i].rm_eo - match[i].rm_so;
219 strlcpy(sub[n], subject + match[i].rm_so, k);
220 *(sub[n] + k) = 0;
221 }
222 __loc1 = (char*)subject + match[0].rm_so;
223 return (char*)subject + match[0].rm_eo;
224 }
225