1 /***********************************************************************
2 * *
3 * This software is part of the ast package *
4 * Copyright (c) 1985-2010 AT&T Intellectual Property *
5 * and is licensed under the *
6 * Common Public License, Version 1.0 *
7 * by AT&T Intellectual Property *
8 * *
9 * A copy of the License is available at *
10 * http://www.opensource.org/licenses/cpl1.0.txt *
11 * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
12 * *
13 * Information and Software Systems Research *
14 * AT&T Research *
15 * Florham Park NJ *
16 * *
17 * Glenn Fowler <gsf@research.att.com> *
18 * David Korn <dgk@research.att.com> *
19 * Phong Vo <kpv@research.att.com> *
20 * *
21 ***********************************************************************/
22 #pragma prototyped
23
24 /*
25 * posix regex decompiler
26 */
27
28 #include "reglib.h"
29
30 #undef ismeta
31 #define ismeta(c,t,e,d) (state.magic[c] && state.magic[c][(t)+(e)] >= T_META || (c) == (d))
32 #define meta(f,c,t,e,d) do { if (ismeta(c,t,e,d)) sfputc(f, '\\'); sfputc(f, c); } while (0)
33
34 static void
detrie(Trie_node_t * x,Sfio_t * sp,char * b,char * p,char * e,int delimiter)35 detrie(Trie_node_t* x, Sfio_t* sp, char* b, char* p, char* e, int delimiter)
36 {
37 register Trie_node_t* y;
38 char* o;
39 int k;
40
41 o = p;
42 k = 1;
43 do
44 {
45 if (k)
46 {
47 o = p;
48 if (p < e)
49 *p++ = x->c;
50 }
51 sfputc(sp, x->c);
52 for (y = x->sib; y; y = y->sib)
53 {
54 sfputc(sp, '|');
55 sfputc(sp, '<');
56 sfwrite(sp, b, p - b);
57 sfputc(sp, '>');
58 detrie(y, sp, b, p, e, delimiter);
59 }
60 if (x->end && x->son)
61 {
62 sfputc(sp, '|');
63 sfputc(sp, '{');
64 sfwrite(sp, b, p - b);
65 sfputc(sp, '}');
66 p = o;
67 }
68 } while (x = x->son);
69 }
70
71 static int
decomp(register Rex_t * e,Sfio_t * sp,int type,int delimiter,regflags_t flags)72 decomp(register Rex_t* e, Sfio_t* sp, int type, int delimiter, regflags_t flags)
73 {
74 Rex_t* q;
75 unsigned char* s;
76 unsigned char* t;
77 int c;
78 int m;
79 int cb;
80 int cd;
81 int cr;
82 int ib;
83 int ie;
84 int nb;
85 int ne;
86 unsigned char ic[2*UCHAR_MAX];
87 unsigned char nc[2*UCHAR_MAX];
88
89 do
90 {
91 switch (e->type)
92 {
93 case REX_ALT:
94 if (decomp(e->re.group.expr.binary.left, sp, type, delimiter, flags))
95 return 1;
96 sfputc(sp, '|');
97 if (e->re.group.expr.binary.right && decomp(e->re.group.expr.binary.right, sp, type, delimiter, flags))
98 return 1;
99 break;
100 case REX_BACK:
101 sfprintf(sp, "\\%d", e->lo);
102 break;
103 case REX_BEG:
104 if (type < SRE)
105 sfputc(sp, '^');
106 break;
107 case REX_END:
108 if (type < SRE)
109 sfputc(sp, '$');
110 break;
111 case REX_WBEG:
112 meta(sp, '<', type, 1, delimiter);
113 break;
114 case REX_WEND:
115 meta(sp, '<', type, 1, delimiter);
116 break;
117 case REX_WORD:
118 sfprintf(sp, "\\w");
119 break;
120 case REX_CLASS:
121 case REX_COLL_CLASS:
122 case REX_ONECHAR:
123 case REX_DOT:
124 case REX_REP:
125 if (type >= SRE)
126 {
127 c = ')';
128 if (e->hi == RE_DUP_INF)
129 {
130 if (!e->lo)
131 sfputc(sp, '*');
132 else if (e->lo == 1)
133 sfputc(sp, '+');
134 else
135 sfprintf(sp, "{%d,}", e->lo);
136 }
137 else if (e->hi != 1)
138 sfprintf(sp, "{%d,%d}", e->lo, e->hi);
139 else if (e->lo == 0)
140 sfputc(sp, '?');
141 else
142 c = 0;
143 }
144 switch (e->type)
145 {
146 case REX_REP:
147 if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags))
148 return 1;
149 break;
150 case REX_CLASS:
151 sfputc(sp, '[');
152 nb = ne = ib = ie = -2;
153 cb = cd = cr = 0;
154 s = nc;
155 t = ic;
156 for (m = 0; m <= UCHAR_MAX; m++)
157 if (settst(e->re.charclass, m))
158 {
159 if (m == ']')
160 cb = 1;
161 else if (m == '-')
162 cr = 1;
163 else if (m == delimiter)
164 cd = 1;
165 else if (nb < 0)
166 ne = nb = m;
167 else if (ne == (m - 1))
168 ne = m;
169 else
170 {
171 if (ne == nb)
172 *s++ = ne;
173 else
174 {
175 *s++ = nb;
176 *s++ = '-';
177 *s++ = ne;
178 }
179 ne = nb = m;
180 }
181 }
182 else
183 {
184 if (m == ']')
185 cb = -1;
186 else if (m == '-')
187 cr = -1;
188 else if (m == delimiter)
189 cd = -1;
190 else if (ib < 0)
191 ie = ib = m;
192 else if (ie == (m - 1))
193 ie = m;
194 else
195 {
196 if (ie == ib)
197 *t++ = ie;
198 else
199 {
200 *t++ = ib;
201 *t++ = '-';
202 *t++ = ie;
203 }
204 ie = ib = m;
205 }
206 }
207 if (nb >= 0)
208 {
209 *s++ = nb;
210 if (ne != nb)
211 {
212 *s++ = '-';
213 *s++ = ne;
214 }
215 }
216 if (ib >= 0)
217 {
218 *t++ = ib;
219 if (ie != ib)
220 {
221 *t++ = '-';
222 *t++ = ie;
223 }
224 }
225 if ((t - ic + 1) < (s - nc + (nc[0] == '^')))
226 {
227 sfputc(sp, '^');
228 if (cb < 0)
229 sfputc(sp, ']');
230 if (cr < 0)
231 sfputc(sp, '-');
232 if (cd < 0 && delimiter > 0)
233 {
234 if (flags & REG_ESCAPE)
235 sfputc(sp, '\\');
236 sfputc(sp, delimiter);
237 }
238 sfwrite(sp, ic, t - ic);
239 }
240 else
241 {
242 if (cb > 0)
243 sfputc(sp, ']');
244 if (cr > 0)
245 sfputc(sp, '-');
246 if (cd > 0 && delimiter > 0)
247 {
248 if (flags & REG_ESCAPE)
249 sfputc(sp, '\\');
250 sfputc(sp, delimiter);
251 }
252 if (nc[0] == '^')
253 {
254 sfwrite(sp, nc + 1, s - nc - 1);
255 sfputc(sp, '^');
256 }
257 else
258 sfwrite(sp, nc, s - nc);
259 }
260 sfputc(sp, ']');
261 break;
262 case REX_COLL_CLASS:
263 break;
264 case REX_ONECHAR:
265 meta(sp, e->re.onechar, type, 0, delimiter);
266 break;
267 case REX_DOT:
268 sfputc(sp, '.');
269 break;
270 }
271 if (type < SRE)
272 {
273 if (e->hi == RE_DUP_INF)
274 {
275 if (!e->lo)
276 sfputc(sp, '*');
277 else if (e->lo == 1 && ismeta('+', type, 0, delimiter))
278 meta(sp, '+', type, 1, delimiter);
279 else
280 {
281 meta(sp, '{', type, 1, delimiter);
282 sfprintf(sp, "%d,", e->lo);
283 meta(sp, '}', type, 1, delimiter);
284 }
285 }
286 else if (e->hi != 1 || e->lo == 0 && !ismeta('?', type, 0, delimiter))
287 {
288 meta(sp, '{', type, 1, delimiter);
289 sfprintf(sp, "%d,%d", e->lo, e->hi);
290 meta(sp, '}', type, 1, delimiter);
291 }
292 else if (e->lo == 0)
293 meta(sp, '?', type, 1, delimiter);
294 }
295 else if (c)
296 sfputc(sp, c);
297 break;
298 case REX_STRING:
299 case REX_KMP:
300 t = (s = e->re.string.base) + e->re.string.size;
301 while (s < t)
302 {
303 c = *s++;
304 meta(sp, c, type, 0, delimiter);
305 }
306 break;
307 case REX_TRIE:
308 ib = 0;
309 for (c = 0; c <= UCHAR_MAX; c++)
310 if (e->re.trie.root[c])
311 {
312 char pfx[1024];
313
314 if (ib)
315 sfputc(sp, '|');
316 else
317 ib = 1;
318 detrie(e->re.trie.root[c], sp, pfx, pfx, &pfx[sizeof(pfx)], delimiter);
319 }
320 break;
321 case REX_NEG:
322 if (type >= SRE)
323 sfprintf(sp, "!(");
324 if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags))
325 return 1;
326 if (type >= SRE)
327 sfputc(sp, ')');
328 else
329 sfputc(sp, '!');
330 break;
331 case REX_CONJ:
332 if (decomp(e->re.group.expr.binary.left, sp, type, delimiter, flags))
333 return 1;
334 sfputc(sp, '&');
335 if (decomp(e->re.group.expr.binary.right, sp, type, delimiter, flags))
336 return 1;
337 break;
338 case REX_GROUP:
339 if (type >= SRE)
340 sfputc(sp, '@');
341 meta(sp, '(', type, 1, delimiter);
342 if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags))
343 return 1;
344 meta(sp, ')', type, 1, delimiter);
345 break;
346 case REX_GROUP_AHEAD:
347 case REX_GROUP_AHEAD_NOT:
348 case REX_GROUP_BEHIND:
349 case REX_GROUP_BEHIND_NOT:
350 meta(sp, '(', type, 1, delimiter);
351 sfputc(sp, '?');
352 if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags))
353 return 1;
354 meta(sp, ')', type, 1, delimiter);
355 break;
356 case REX_GROUP_COND:
357 meta(sp, '(', type, 1, delimiter);
358 sfputc(sp, '?');
359 if (e->re.group.expr.binary.left && decomp(e->re.group.expr.binary.left, sp, type, delimiter, flags))
360 return 1;
361 if (q = e->re.group.expr.binary.right)
362 {
363 sfputc(sp, ':');
364 if (q->re.group.expr.binary.left && decomp(q->re.group.expr.binary.left, sp, type, delimiter, flags))
365 return 1;
366 sfputc(sp, ':');
367 if (q->re.group.expr.binary.right && decomp(q->re.group.expr.binary.right, sp, type, delimiter, flags))
368 return 1;
369 }
370 meta(sp, ')', type, 1, delimiter);
371 break;
372 case REX_GROUP_CUT:
373 meta(sp, '(', type, 1, delimiter);
374 sfputc(sp, '?');
375 if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags))
376 return 1;
377 meta(sp, ')', type, 1, delimiter);
378 break;
379 case REX_BM:
380 break;
381 default:
382 sfprintf(sp, "<ERROR:REX_%d>", e->type);
383 break;
384 }
385 } while (e = e->next);
386 return 0;
387 }
388
389 /*
390 * reconstruct pattern from compiled re p into sp
391 */
392
393 size_t
regdecomp(regex_t * p,regflags_t flags,char * buf,size_t n)394 regdecomp(regex_t* p, regflags_t flags, char* buf, size_t n)
395 {
396 Sfio_t* sp;
397 char* s;
398 int type;
399 int delimiter;
400 size_t r;
401
402 if (!(sp = sfstropen()))
403 return 0;
404 if (flags == (regflags_t)~0)
405 flags = p->env->flags;
406 switch (flags & (REG_AUGMENTED|REG_EXTENDED|REG_SHELL))
407 {
408 case 0:
409 type = BRE;
410 break;
411 case REG_AUGMENTED:
412 case REG_AUGMENTED|REG_EXTENDED:
413 type = ARE;
414 break;
415 case REG_EXTENDED:
416 type = ERE;
417 break;
418 case REG_SHELL:
419 type = SRE;
420 break;
421 default:
422 type = KRE;
423 break;
424 }
425 if (flags & REG_DELIMITED)
426 {
427 delimiter = '/';
428 sfputc(sp, delimiter);
429 }
430 else
431 delimiter = -1;
432 if (decomp(p->env->rex, sp, type, delimiter, flags))
433 r = 0;
434 else
435 {
436 if (delimiter > 0)
437 sfputc(sp, delimiter);
438 if ((r = sfstrtell(sp) + 1) <= n)
439 {
440 if (!(s = sfstruse(sp)))
441 r = 0;
442 else
443 memcpy(buf, s, r);
444 }
445 }
446 sfstrclose(sp);
447 return r;
448 }
449