1 /*********************************************************************** 2 * * 3 * This software is part of the ast package * 4 * Copyright (c) 1985-2009 AT&T Intellectual Property * 5 * and is licensed under the * 6 * Common Public License, Version 1.0 * 7 * by AT&T Intellectual Property * 8 * * 9 * A copy of the License is available at * 10 * http://www.opensource.org/licenses/cpl1.0.txt * 11 * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 12 * * 13 * Information and Software Systems Research * 14 * AT&T Research * 15 * Florham Park NJ * 16 * * 17 * Glenn Fowler <gsf@research.att.com> * 18 * David Korn <dgk@research.att.com> * 19 * Phong Vo <kpv@research.att.com> * 20 * * 21 ***********************************************************************/ 22 #pragma prototyped 23 24 /* 25 * posix regex decompiler 26 */ 27 28 #include "reglib.h" 29 30 #undef ismeta 31 #define ismeta(c,t,e,d) (state.magic[c] && state.magic[c][(t)+(e)] >= T_META || (c) == (d)) 32 #define meta(f,c,t,e,d) do { if (ismeta(c,t,e,d)) sfputc(f, '\\'); sfputc(f, c); } while (0) 33 34 static void 35 detrie(Trie_node_t* x, Sfio_t* sp, char* b, char* p, char* e, int delimiter) 36 { 37 register Trie_node_t* y; 38 char* o; 39 int k; 40 41 o = p; 42 k = 1; 43 do 44 { 45 if (k) 46 { 47 o = p; 48 if (p < e) 49 *p++ = x->c; 50 } 51 sfputc(sp, x->c); 52 for (y = x->sib; y; y = y->sib) 53 { 54 sfputc(sp, '|'); 55 sfputc(sp, '<'); 56 sfwrite(sp, b, p - b); 57 sfputc(sp, '>'); 58 detrie(y, sp, b, p, e, delimiter); 59 } 60 if (x->end && x->son) 61 { 62 sfputc(sp, '|'); 63 sfputc(sp, '{'); 64 sfwrite(sp, b, p - b); 65 sfputc(sp, '}'); 66 p = o; 67 } 68 } while (x = x->son); 69 } 70 71 static int 72 decomp(register Rex_t* e, Sfio_t* sp, int type, int delimiter, regflags_t flags) 73 { 74 Rex_t* q; 75 unsigned char* s; 76 unsigned char* t; 77 int c; 78 int cb; 79 int cd; 80 int cr; 81 int ib; 82 int ie; 83 int nb; 84 int ne; 85 unsigned char ic[2*UCHAR_MAX]; 86 unsigned char nc[2*UCHAR_MAX]; 87 88 do 89 { 90 switch (e->type) 91 { 92 case REX_ALT: 93 if (decomp(e->re.group.expr.binary.left, sp, type, delimiter, flags)) 94 return 1; 95 sfputc(sp, '|'); 96 if (e->re.group.expr.binary.right && decomp(e->re.group.expr.binary.right, sp, type, delimiter, flags)) 97 return 1; 98 break; 99 case REX_BACK: 100 sfprintf(sp, "\\%d", e->lo); 101 break; 102 case REX_BEG: 103 if (type < SRE) 104 sfputc(sp, '^'); 105 break; 106 case REX_END: 107 if (type < SRE) 108 sfputc(sp, '$'); 109 break; 110 case REX_WBEG: 111 meta(sp, '<', type, 1, delimiter); 112 break; 113 case REX_WEND: 114 meta(sp, '<', type, 1, delimiter); 115 break; 116 case REX_WORD: 117 sfprintf(sp, "\\w"); 118 break; 119 case REX_CLASS: 120 case REX_COLL_CLASS: 121 case REX_ONECHAR: 122 case REX_DOT: 123 case REX_REP: 124 if (type >= SRE) 125 { 126 c = ')'; 127 if (e->hi == RE_DUP_INF) 128 { 129 if (!e->lo) 130 sfputc(sp, '*'); 131 else if (e->lo == 1) 132 sfputc(sp, '+'); 133 else 134 sfprintf(sp, "{%d,}", e->lo); 135 } 136 else if (e->hi != 1) 137 sfprintf(sp, "{%d,%d}", e->lo, e->hi); 138 else if (e->lo == 0) 139 sfputc(sp, '?'); 140 else 141 c = 0; 142 } 143 switch (e->type) 144 { 145 case REX_REP: 146 if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags)) 147 return 1; 148 break; 149 case REX_CLASS: 150 sfputc(sp, '['); 151 nb = ne = ib = ie = -2; 152 cb = cd = cr = 0; 153 s = nc; 154 t = ic; 155 for (c = 0; c <= UCHAR_MAX; c++) 156 if (settst(e->re.charclass, c)) 157 { 158 if (c == ']') 159 cb = 1; 160 else if (c == '-') 161 cr = 1; 162 else if (c == delimiter) 163 cd = 1; 164 else if (nb < 0) 165 ne = nb = c; 166 else if (ne == (c - 1)) 167 ne = c; 168 else 169 { 170 if (ne == nb) 171 *s++ = ne; 172 else 173 { 174 *s++ = nb; 175 *s++ = '-'; 176 *s++ = ne; 177 } 178 ne = nb = c; 179 } 180 } 181 else 182 { 183 if (c == ']') 184 cb = -1; 185 else if (c == '-') 186 cr = -1; 187 else if (c == delimiter) 188 cd = -1; 189 else if (ib < 0) 190 ie = ib = c; 191 else if (ie == (c - 1)) 192 ie = c; 193 else 194 { 195 if (ie == ib) 196 *t++ = ie; 197 else 198 { 199 *t++ = ib; 200 *t++ = '-'; 201 *t++ = ie; 202 } 203 ie = ib = c; 204 } 205 } 206 if (nb >= 0) 207 { 208 *s++ = nb; 209 if (ne != nb) 210 { 211 *s++ = '-'; 212 *s++ = ne; 213 } 214 } 215 if (ib >= 0) 216 { 217 *t++ = ib; 218 if (ie != ib) 219 { 220 *t++ = '-'; 221 *t++ = ie; 222 } 223 } 224 if ((t - ic + 1) < (s - nc + (nc[0] == '^'))) 225 { 226 sfputc(sp, '^'); 227 if (cb < 0) 228 sfputc(sp, ']'); 229 if (cr < 0) 230 sfputc(sp, '-'); 231 if (cd < 0) 232 { 233 if (flags & REG_ESCAPE) 234 sfputc(sp, '\\'); 235 sfputc(sp, delimiter); 236 } 237 sfwrite(sp, ic, t - ic); 238 } 239 else 240 { 241 if (cb > 0) 242 sfputc(sp, ']'); 243 if (cr > 0) 244 sfputc(sp, '-'); 245 if (cd > 0) 246 { 247 if (flags & REG_ESCAPE) 248 sfputc(sp, '\\'); 249 sfputc(sp, delimiter); 250 } 251 if (nc[0] == '^') 252 { 253 sfwrite(sp, nc + 1, s - nc - 1); 254 sfputc(sp, '^'); 255 } 256 else 257 sfwrite(sp, nc, s - nc); 258 } 259 sfputc(sp, ']'); 260 break; 261 case REX_COLL_CLASS: 262 break; 263 case REX_ONECHAR: 264 meta(sp, e->re.onechar, type, 0, delimiter); 265 break; 266 case REX_DOT: 267 sfputc(sp, '.'); 268 break; 269 } 270 if (type < SRE) 271 { 272 if (e->hi == RE_DUP_INF) 273 { 274 if (!e->lo) 275 sfputc(sp, '*'); 276 else if (e->lo == 1 && ismeta('+', type, 0, delimiter)) 277 meta(sp, '+', type, 1, delimiter); 278 else 279 { 280 meta(sp, '{', type, 1, delimiter); 281 sfprintf(sp, "%d,", e->lo); 282 meta(sp, '}', type, 1, delimiter); 283 } 284 } 285 else if (e->hi != 1 || e->lo == 0 && !ismeta('?', type, 0, delimiter)) 286 { 287 meta(sp, '{', type, 1, delimiter); 288 sfprintf(sp, "%d,%d", e->lo, e->hi); 289 meta(sp, '}', type, 1, delimiter); 290 } 291 else if (e->lo == 0) 292 meta(sp, '?', type, 1, delimiter); 293 } 294 else if (c) 295 sfputc(sp, c); 296 break; 297 case REX_STRING: 298 case REX_KMP: 299 t = (s = e->re.string.base) + e->re.string.size; 300 while (s < t) 301 { 302 c = *s++; 303 meta(sp, c, type, 0, delimiter); 304 } 305 break; 306 case REX_TRIE: 307 ib = 0; 308 for (c = 0; c <= UCHAR_MAX; c++) 309 if (e->re.trie.root[c]) 310 { 311 char pfx[1024]; 312 313 if (ib) 314 sfputc(sp, '|'); 315 else 316 ib = 1; 317 detrie(e->re.trie.root[c], sp, pfx, pfx, &pfx[sizeof(pfx)], delimiter); 318 } 319 break; 320 case REX_NEG: 321 if (type >= SRE) 322 sfprintf(sp, "!("); 323 if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags)) 324 return 1; 325 if (type >= SRE) 326 sfputc(sp, ')'); 327 else 328 sfputc(sp, '!'); 329 break; 330 case REX_CONJ: 331 if (decomp(e->re.group.expr.binary.left, sp, type, delimiter, flags)) 332 return 1; 333 sfputc(sp, '&'); 334 if (decomp(e->re.group.expr.binary.right, sp, type, delimiter, flags)) 335 return 1; 336 break; 337 case REX_GROUP: 338 if (type >= SRE) 339 sfputc(sp, '@'); 340 meta(sp, '(', type, 1, delimiter); 341 if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags)) 342 return 1; 343 meta(sp, ')', type, 1, delimiter); 344 break; 345 case REX_GROUP_AHEAD: 346 case REX_GROUP_AHEAD_NOT: 347 case REX_GROUP_BEHIND: 348 case REX_GROUP_BEHIND_NOT: 349 meta(sp, '(', type, 1, delimiter); 350 sfputc(sp, '?'); 351 if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags)) 352 return 1; 353 meta(sp, ')', type, 1, delimiter); 354 break; 355 case REX_GROUP_COND: 356 meta(sp, '(', type, 1, delimiter); 357 sfputc(sp, '?'); 358 if (e->re.group.expr.binary.left && decomp(e->re.group.expr.binary.left, sp, type, delimiter, flags)) 359 return 1; 360 if (q = e->re.group.expr.binary.right) 361 { 362 sfputc(sp, ':'); 363 if (q->re.group.expr.binary.left && decomp(q->re.group.expr.binary.left, sp, type, delimiter, flags)) 364 return 1; 365 sfputc(sp, ':'); 366 if (q->re.group.expr.binary.right && decomp(q->re.group.expr.binary.right, sp, type, delimiter, flags)) 367 return 1; 368 } 369 meta(sp, ')', type, 1, delimiter); 370 break; 371 case REX_GROUP_CUT: 372 meta(sp, '(', type, 1, delimiter); 373 sfputc(sp, '?'); 374 if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags)) 375 return 1; 376 meta(sp, ')', type, 1, delimiter); 377 break; 378 case REX_BM: 379 break; 380 default: 381 sfprintf(sp, "<ERROR:REX_%d>", e->type); 382 break; 383 } 384 } while (e = e->next); 385 return 0; 386 } 387 388 /* 389 * reconstruct pattern from compiled re p into sp 390 */ 391 392 size_t 393 regdecomp(regex_t* p, regflags_t flags, char* buf, size_t n) 394 { 395 Sfio_t* sp; 396 char* s; 397 int type; 398 int delimiter; 399 size_t r; 400 401 if (!(sp = sfstropen())) 402 return 0; 403 if (flags < 0) 404 flags = p->env->flags; 405 switch (flags & (REG_AUGMENTED|REG_EXTENDED|REG_SHELL)) 406 { 407 case 0: 408 type = BRE; 409 break; 410 case REG_AUGMENTED: 411 case REG_AUGMENTED|REG_EXTENDED: 412 type = ARE; 413 break; 414 case REG_EXTENDED: 415 type = ERE; 416 break; 417 case REG_SHELL: 418 type = SRE; 419 break; 420 default: 421 type = KRE; 422 break; 423 } 424 if (flags & REG_DELIMITED) 425 { 426 delimiter = '/'; 427 sfputc(sp, delimiter); 428 } 429 else 430 delimiter = 0; 431 if (decomp(p->env->rex, sp, type, delimiter, flags)) 432 r = 0; 433 else 434 { 435 if (delimiter) 436 sfputc(sp, delimiter); 437 if ((r = sfstrtell(sp) + 1) <= n) 438 { 439 if (!(s = sfstruse(sp))) 440 r = 0; 441 else 442 memcpy(buf, s, r); 443 } 444 } 445 sfstrclose(sp); 446 return r; 447 } 448