1 /*********************************************************************** 2 * * 3 * This software is part of the ast package * 4 * Copyright (c) 1985-2007 AT&T Knowledge Ventures * 5 * and is licensed under the * 6 * Common Public License, Version 1.0 * 7 * by AT&T Knowledge Ventures * 8 * * 9 * A copy of the License is available at * 10 * http://www.opensource.org/licenses/cpl1.0.txt * 11 * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 12 * * 13 * Information and Software Systems Research * 14 * AT&T Research * 15 * Florham Park NJ * 16 * * 17 * Glenn Fowler <gsf@research.att.com> * 18 * David Korn <dgk@research.att.com> * 19 * Phong Vo <kpv@research.att.com> * 20 * * 21 ***********************************************************************/ 22 #pragma prototyped 23 24 /* 25 * posix regex decompiler 26 */ 27 28 #include "reglib.h" 29 30 #undef ismeta 31 #define ismeta(c,t,e,d) (state.magic[c] && state.magic[c][(t)+(e)] >= T_META || (c) == (d)) 32 #define meta(f,c,t,e,d) do { if (ismeta(c,t,e,d)) sfputc(f, '\\'); sfputc(f, c); } while (0) 33 34 static void 35 detrie(Trie_node_t* x, Sfio_t* sp, char* b, char* p, char* e, int delimiter) 36 { 37 register Trie_node_t* y; 38 char* o; 39 int k; 40 41 o = p; 42 k = 1; 43 do 44 { 45 if (k) 46 { 47 o = p; 48 if (p < e) 49 *p++ = x->c; 50 } 51 sfputc(sp, x->c); 52 for (y = x->sib; y; y = y->sib) 53 { 54 sfputc(sp, '|'); 55 sfputc(sp, '<'); 56 sfwrite(sp, b, p - b); 57 sfputc(sp, '>'); 58 detrie(y, sp, b, p, e, delimiter); 59 } 60 if (x->end && x->son) 61 { 62 sfputc(sp, '|'); 63 sfputc(sp, '{'); 64 sfwrite(sp, b, p - b); 65 sfputc(sp, '}'); 66 p = o; 67 } 68 } while (x = x->son); 69 } 70 71 static int 72 decomp(register Rex_t* e, Sfio_t* sp, int type, int delimiter, regflags_t flags) 73 { 74 Rex_t* q; 75 unsigned char* s; 76 unsigned char* t; 77 int c; 78 int d; 79 int cb; 80 int cd; 81 int cr; 82 int ib; 83 int ie; 84 int nb; 85 int ne; 86 unsigned char ic[2*UCHAR_MAX]; 87 unsigned char nc[2*UCHAR_MAX]; 88 89 do 90 { 91 switch (e->type) 92 { 93 case REX_ALT: 94 if (decomp(e->re.group.expr.binary.left, sp, type, delimiter, flags)) 95 return 1; 96 sfputc(sp, '|'); 97 if (e->re.group.expr.binary.right && decomp(e->re.group.expr.binary.right, sp, type, delimiter, flags)) 98 return 1; 99 break; 100 case REX_BACK: 101 sfprintf(sp, "\\%d", e->lo); 102 break; 103 case REX_BEG: 104 if (type < SRE) 105 sfputc(sp, '^'); 106 break; 107 case REX_END: 108 if (type < SRE) 109 sfputc(sp, '$'); 110 break; 111 case REX_WBEG: 112 meta(sp, '<', type, 1, delimiter); 113 break; 114 case REX_WEND: 115 meta(sp, '<', type, 1, delimiter); 116 break; 117 case REX_WORD: 118 sfprintf(sp, "\\w"); 119 break; 120 case REX_CLASS: 121 case REX_COLL_CLASS: 122 case REX_ONECHAR: 123 case REX_DOT: 124 case REX_REP: 125 if (type >= SRE) 126 { 127 c = ')'; 128 if (e->hi == RE_DUP_INF) 129 { 130 if (!e->lo) 131 sfputc(sp, '*'); 132 else if (e->lo == 1) 133 sfputc(sp, '+'); 134 else 135 sfprintf(sp, "{%d,}", e->lo); 136 } 137 else if (e->hi != 1) 138 sfprintf(sp, "{%d,%d}", e->lo, e->hi); 139 else if (e->lo == 0) 140 sfputc(sp, '?'); 141 else 142 c = 0; 143 } 144 switch (e->type) 145 { 146 case REX_REP: 147 if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags)) 148 return 1; 149 break; 150 case REX_CLASS: 151 sfputc(sp, '['); 152 nb = ne = ib = ie = -2; 153 cb = cd = cr = 0; 154 s = nc; 155 t = ic; 156 for (c = 0; c <= UCHAR_MAX; c++) 157 if (settst(e->re.charclass, c)) 158 { 159 if (c == ']') 160 cb = 1; 161 else if (c == '-') 162 cr = 1; 163 else if (c == delimiter) 164 cd = 1; 165 else if (nb < 0) 166 ne = nb = c; 167 else if (ne == (c - 1)) 168 ne = c; 169 else 170 { 171 if (ne == nb) 172 *s++ = ne; 173 else 174 { 175 *s++ = nb; 176 *s++ = '-'; 177 *s++ = ne; 178 } 179 ne = nb = c; 180 } 181 } 182 else 183 { 184 if (c == ']') 185 cb = -1; 186 else if (c == '-') 187 cr = -1; 188 else if (c == delimiter) 189 cd = -1; 190 else if (ib < 0) 191 ie = ib = c; 192 else if (ie == (c - 1)) 193 ie = c; 194 else 195 { 196 if (ie == ib) 197 *t++ = ie; 198 else 199 { 200 *t++ = ib; 201 *t++ = '-'; 202 *t++ = ie; 203 } 204 ie = ib = c; 205 } 206 } 207 if (nb >= 0) 208 { 209 *s++ = nb; 210 if (ne != nb) 211 { 212 *s++ = '-'; 213 *s++ = ne; 214 } 215 } 216 if (ib >= 0) 217 { 218 *t++ = ib; 219 if (ie != ib) 220 { 221 *t++ = '-'; 222 *t++ = ie; 223 } 224 } 225 if ((t - ic + 1) < (s - nc + (nc[0] == '^'))) 226 { 227 sfputc(sp, '^'); 228 if (cb < 0) 229 sfputc(sp, ']'); 230 if (cr < 0) 231 sfputc(sp, '-'); 232 if (cd < 0) 233 { 234 if (flags & REG_ESCAPE) 235 sfputc(sp, '\\'); 236 sfputc(sp, delimiter); 237 } 238 sfwrite(sp, ic, t - ic); 239 } 240 else 241 { 242 if (cb > 0) 243 sfputc(sp, ']'); 244 if (cr > 0) 245 sfputc(sp, '-'); 246 if (cd > 0) 247 { 248 if (flags & REG_ESCAPE) 249 sfputc(sp, '\\'); 250 sfputc(sp, delimiter); 251 } 252 if (nc[0] == '^') 253 { 254 sfwrite(sp, nc + 1, s - nc - 1); 255 sfputc(sp, '^'); 256 } 257 else 258 sfwrite(sp, nc, s - nc); 259 } 260 sfputc(sp, ']'); 261 break; 262 case REX_COLL_CLASS: 263 break; 264 case REX_ONECHAR: 265 meta(sp, e->re.onechar, type, 0, delimiter); 266 break; 267 case REX_DOT: 268 sfputc(sp, '.'); 269 break; 270 } 271 if (type < SRE) 272 { 273 if (e->hi == RE_DUP_INF) 274 { 275 if (!e->lo) 276 sfputc(sp, '*'); 277 else if (e->lo == 1 && ismeta('+', type, 0, delimiter)) 278 meta(sp, '+', type, 1, delimiter); 279 else 280 { 281 meta(sp, '{', type, 1, delimiter); 282 sfprintf(sp, "%d,", e->lo); 283 meta(sp, '}', type, 1, delimiter); 284 } 285 } 286 else if (e->hi != 1 || e->lo == 0 && !ismeta('?', type, 0, delimiter)) 287 { 288 meta(sp, '{', type, 1, delimiter); 289 sfprintf(sp, "%d,%d", e->lo, e->hi); 290 meta(sp, '}', type, 1, delimiter); 291 } 292 else if (e->lo == 0) 293 meta(sp, '?', type, 1, delimiter); 294 } 295 else if (c) 296 sfputc(sp, c); 297 break; 298 case REX_STRING: 299 case REX_KMP: 300 t = (s = e->re.string.base) + e->re.string.size; 301 while (s < t) 302 { 303 c = *s++; 304 meta(sp, c, type, 0, delimiter); 305 } 306 break; 307 case REX_TRIE: 308 ib = 0; 309 for (c = 0; c <= UCHAR_MAX; c++) 310 if (e->re.trie.root[c]) 311 { 312 char pfx[1024]; 313 314 if (ib) 315 sfputc(sp, '|'); 316 else 317 ib = 1; 318 detrie(e->re.trie.root[c], sp, pfx, pfx, &pfx[sizeof(pfx)], delimiter); 319 } 320 break; 321 case REX_NEG: 322 if (type >= SRE) 323 sfprintf(sp, "!("); 324 if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags)) 325 return 1; 326 if (type >= SRE) 327 sfputc(sp, ')'); 328 else 329 sfputc(sp, '!'); 330 break; 331 case REX_CONJ: 332 if (decomp(e->re.group.expr.binary.left, sp, type, delimiter, flags)) 333 return 1; 334 sfputc(sp, '&'); 335 if (decomp(e->re.group.expr.binary.right, sp, type, delimiter, flags)) 336 return 1; 337 break; 338 case REX_GROUP: 339 if (type >= SRE) 340 sfputc(sp, '@'); 341 meta(sp, '(', type, 1, delimiter); 342 if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags)) 343 return 1; 344 meta(sp, ')', type, 1, delimiter); 345 break; 346 case REX_GROUP_AHEAD: 347 case REX_GROUP_AHEAD_NOT: 348 case REX_GROUP_BEHIND: 349 case REX_GROUP_BEHIND_NOT: 350 meta(sp, '(', type, 1, delimiter); 351 sfputc(sp, '?'); 352 if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags)) 353 return 1; 354 meta(sp, ')', type, 1, delimiter); 355 break; 356 case REX_GROUP_COND: 357 meta(sp, '(', type, 1, delimiter); 358 sfputc(sp, '?'); 359 if (e->re.group.expr.binary.left && decomp(e->re.group.expr.binary.left, sp, type, delimiter, flags)) 360 return 1; 361 if (q = e->re.group.expr.binary.right) 362 { 363 sfputc(sp, ':'); 364 if (q->re.group.expr.binary.left && decomp(q->re.group.expr.binary.left, sp, type, delimiter, flags)) 365 return 1; 366 sfputc(sp, ':'); 367 if (q->re.group.expr.binary.right && decomp(q->re.group.expr.binary.right, sp, type, delimiter, flags)) 368 return 1; 369 } 370 meta(sp, ')', type, 1, delimiter); 371 break; 372 case REX_GROUP_CUT: 373 meta(sp, '(', type, 1, delimiter); 374 sfputc(sp, '?'); 375 if (decomp(e->re.group.expr.rex, sp, type, delimiter, flags)) 376 return 1; 377 meta(sp, ')', type, 1, delimiter); 378 break; 379 case REX_BM: 380 break; 381 default: 382 sfprintf(sp, "<ERROR:REX_%d>", e->type); 383 break; 384 } 385 } while (e = e->next); 386 return 0; 387 } 388 389 /* 390 * reconstruct pattern from compiled re p into sp 391 */ 392 393 size_t 394 regdecomp(regex_t* p, regflags_t flags, char* buf, size_t n) 395 { 396 Sfio_t* sp; 397 char* s; 398 int type; 399 int delimiter; 400 size_t r; 401 402 if (!(sp = sfstropen())) 403 return 0; 404 if (flags < 0) 405 flags = p->env->flags; 406 switch (flags & (REG_AUGMENTED|REG_EXTENDED|REG_SHELL)) 407 { 408 case 0: 409 type = BRE; 410 break; 411 case REG_AUGMENTED: 412 case REG_AUGMENTED|REG_EXTENDED: 413 type = ARE; 414 break; 415 case REG_EXTENDED: 416 type = ERE; 417 break; 418 case REG_SHELL: 419 type = SRE; 420 break; 421 default: 422 type = KRE; 423 break; 424 } 425 if (flags & REG_DELIMITED) 426 { 427 delimiter = '/'; 428 sfputc(sp, delimiter); 429 } 430 else 431 delimiter = 0; 432 if (decomp(p->env->rex, sp, type, delimiter, flags)) 433 r = 0; 434 else 435 { 436 if (delimiter) 437 sfputc(sp, delimiter); 438 if ((r = sfstrtell(sp) + 1) <= n) 439 { 440 if (!(s = sfstruse(sp))) 441 r = 0; 442 else 443 memcpy(buf, s, r); 444 } 445 } 446 sfstrclose(sp); 447 return r; 448 } 449