1 /*********************************************************************** 2 * * 3 * This software is part of the ast package * 4 * Copyright (c) 1985-2007 AT&T Knowledge Ventures * 5 * and is licensed under the * 6 * Common Public License, Version 1.0 * 7 * by AT&T Knowledge Ventures * 8 * * 9 * A copy of the License is available at * 10 * http://www.opensource.org/licenses/cpl1.0.txt * 11 * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 12 * * 13 * Information and Software Systems Research * 14 * AT&T Research * 15 * Florham Park NJ * 16 * * 17 * Glenn Fowler <gsf@research.att.com> * 18 * David Korn <dgk@research.att.com> * 19 * Phong Vo <kpv@research.att.com> * 20 * * 21 ***********************************************************************/ 22 #pragma prototyped 23 24 /* 25 * posix regex ed(1) style substitute compile 26 */ 27 28 #include "reglib.h" 29 30 static const regflags_t submap[] = 31 { 32 'g', REG_SUB_ALL, 33 'l', REG_SUB_LOWER, 34 'n', REG_SUB_NUMBER, 35 'p', REG_SUB_PRINT, 36 's', REG_SUB_STOP, 37 'u', REG_SUB_UPPER, 38 'w', REG_SUB_WRITE|REG_SUB_LAST, 39 0, 0 40 }; 41 42 int 43 regsubflags(regex_t* p, register const char* s, char** e, int delim, register const regflags_t* map, int* pm, regflags_t* pf) 44 { 45 register int c; 46 register const regflags_t* m; 47 regflags_t flags; 48 int minmatch; 49 regdisc_t* disc; 50 51 flags = pf ? *pf : 0; 52 minmatch = pm ? *pm : 0; 53 if (!map) 54 map = submap; 55 while (!(flags & REG_SUB_LAST)) 56 { 57 if (!(c = *s++) || c == delim) 58 { 59 s--; 60 break; 61 } 62 else if (c >= '0' && c <= '9') 63 { 64 if (minmatch) 65 { 66 disc = p->env->disc; 67 regfree(p); 68 return fatal(disc, REG_EFLAGS, s - 1); 69 } 70 minmatch = c - '0'; 71 while (*s >= '0' && *s <= '9') 72 minmatch = minmatch * 10 + *s++ - '0'; 73 } 74 else 75 { 76 for (m = map; *m; m++) 77 if (*m++ == c) 78 { 79 if (flags & *m) 80 { 81 disc = p->env->disc; 82 regfree(p); 83 return fatal(disc, REG_EFLAGS, s - 1); 84 } 85 flags |= *m--; 86 break; 87 } 88 if (!*m) 89 { 90 s--; 91 break; 92 } 93 } 94 } 95 if (pf) 96 *pf = flags; 97 if (pm) 98 *pm = minmatch; 99 if (e) 100 *e = (char*)s; 101 return 0; 102 } 103 104 /* 105 * compile substitute rhs and optional flags 106 */ 107 108 int 109 regsubcomp(regex_t* p, register const char* s, const regflags_t* map, int minmatch, regflags_t flags) 110 { 111 register regsub_t* sub; 112 register int c; 113 register int d; 114 register char* t; 115 register regsubop_t* op; 116 char* e; 117 const char* r; 118 int sre; 119 int f; 120 int g; 121 int n; 122 int nops; 123 const char* o; 124 regdisc_t* disc; 125 126 disc = p->env->disc; 127 if (p->env->flags & REG_NOSUB) 128 { 129 regfree(p); 130 return fatal(disc, REG_BADPAT, NiL); 131 } 132 if (!(sub = (regsub_t*)alloc(p->env->disc, 0, sizeof(regsub_t) + strlen(s))) || !(sub->re_ops = (regsubop_t*)alloc(p->env->disc, 0, (nops = 8) * sizeof(regsubop_t)))) 133 { 134 if (sub) 135 alloc(p->env->disc, sub, 0); 136 regfree(p); 137 return fatal(disc, REG_ESPACE, s); 138 } 139 sub->re_buf = sub->re_end = 0; 140 p->re_sub = sub; 141 p->env->sub = 1; 142 op = sub->re_ops; 143 o = s; 144 if (!(p->env->flags & REG_DELIMITED)) 145 d = 0; 146 else 147 switch (d = *(s - 1)) 148 { 149 case '\\': 150 case '\n': 151 case '\r': 152 regfree(p); 153 return fatal(disc, REG_EDELIM, s); 154 } 155 sre = p->env->flags & REG_SHELL; 156 t = sub->re_rhs; 157 if (d) 158 { 159 r = s; 160 for (;;) 161 { 162 if (!*s) 163 { 164 if (p->env->flags & REG_MUSTDELIM) 165 { 166 regfree(p); 167 return fatal(disc, REG_EDELIM, r); 168 } 169 break; 170 } 171 else if (*s == d) 172 { 173 flags |= REG_SUB_FULL; 174 s++; 175 break; 176 } 177 else if (*s++ == '\\' && !*s++) 178 { 179 regfree(p); 180 return fatal(disc, REG_EESCAPE, r); 181 } 182 } 183 if (*s) 184 { 185 if (n = regsubflags(p, s, &e, d, map, &minmatch, &flags)) 186 return n; 187 s = (const char*)e; 188 } 189 p->re_npat = s - o; 190 s = r; 191 } 192 else 193 p->re_npat = 0; 194 op->op = f = g = flags & (REG_SUB_LOWER|REG_SUB_UPPER); 195 op->off = 0; 196 while ((c = *s++) != d) 197 { 198 again: 199 if (!c) 200 { 201 p->re_npat = s - o - 1; 202 break; 203 } 204 else if (c == '~') 205 { 206 if (!sre || *s != '(') 207 { 208 *t++ = c; 209 continue; 210 } 211 r = s - 1; 212 s++; 213 c = *s++; 214 } 215 else if (c == '\\') 216 { 217 if (*s == c) 218 { 219 *t++ = *s++; 220 continue; 221 } 222 if ((c = *s++) == d) 223 goto again; 224 if (!c) 225 { 226 regfree(p); 227 return fatal(disc, REG_EESCAPE, s - 2); 228 } 229 if (sre) 230 { 231 *t++ = chresc(s - 2, &e); 232 s = (const char*)e; 233 continue; 234 } 235 if (c == '&') 236 { 237 *t++ = c; 238 continue; 239 } 240 } 241 else if (c == '&') 242 { 243 if (sre) 244 { 245 *t++ = c; 246 continue; 247 } 248 } 249 else 250 { 251 switch (op->op) 252 { 253 case REG_SUB_UPPER: 254 if (islower(c)) 255 c = toupper(c); 256 break; 257 case REG_SUB_LOWER: 258 if (isupper(c)) 259 c = tolower(c); 260 break; 261 case REG_SUB_UPPER|REG_SUB_LOWER: 262 if (isupper(c)) 263 c = tolower(c); 264 else if (islower(c)) 265 c = toupper(c); 266 break; 267 } 268 *t++ = c; 269 continue; 270 } 271 switch (c) 272 { 273 case 0: 274 s--; 275 continue; 276 case '&': 277 c = 0; 278 break; 279 case '0': case '1': case '2': case '3': case '4': 280 case '5': case '6': case '7': case '8': case '9': 281 c -= '0'; 282 if (sre) 283 while (isdigit(*s)) 284 c = c * 10 + *s++ - '0'; 285 else if (isdigit(*s) && (p->env->flags & REG_MULTIREF)) 286 c = c * 10 + *s++ - '0'; 287 break; 288 case 'l': 289 if (sre) 290 { 291 if (*s != ')') 292 { 293 c = -1; 294 break; 295 } 296 s++; 297 } 298 if (c = *s) 299 { 300 s++; 301 if (isupper(c)) 302 c = tolower(c); 303 *t++ = c; 304 } 305 continue; 306 case 'u': 307 if (sre) 308 { 309 if (*s != ')') 310 { 311 c = -1; 312 break; 313 } 314 s++; 315 } 316 if (c = *s) 317 { 318 s++; 319 if (islower(c)) 320 c = toupper(c); 321 *t++ = c; 322 } 323 continue; 324 case 'E': 325 if (sre) 326 { 327 if (*s != ')') 328 { 329 c = -1; 330 break; 331 } 332 s++; 333 } 334 f = g; 335 set: 336 if ((op->len = (t - sub->re_rhs) - op->off) && (n = ++op - sub->re_ops) >= nops) 337 { 338 if (!(sub->re_ops = (regsubop_t*)alloc(p->env->disc, sub->re_ops, (nops *= 2) * sizeof(regsubop_t)))) 339 { 340 regfree(p); 341 return fatal(disc, REG_ESPACE, NiL); 342 } 343 op = sub->re_ops + n; 344 } 345 op->op = f; 346 op->off = t - sub->re_rhs; 347 continue; 348 case 'L': 349 if (sre) 350 { 351 if (*s != ')') 352 { 353 c = -1; 354 break; 355 } 356 s++; 357 } 358 g = f; 359 f = REG_SUB_LOWER; 360 goto set; 361 case 'U': 362 if (sre) 363 { 364 if (*s != ')') 365 { 366 c = -1; 367 break; 368 } 369 s++; 370 } 371 g = f; 372 f = REG_SUB_UPPER; 373 goto set; 374 default: 375 if (!sre) 376 { 377 *t++ = chresc(s - 2, &e); 378 s = (const char*)e; 379 continue; 380 } 381 s--; 382 c = -1; 383 break; 384 } 385 if (sre) 386 { 387 if (c < 0 || *s != ')') 388 { 389 while (r < s) 390 *t++ = *r++; 391 continue; 392 } 393 s++; 394 } 395 if (c > p->re_nsub) 396 { 397 regfree(p); 398 return fatal(disc, REG_ESUBREG, s - 1); 399 } 400 if ((n = op - sub->re_ops) >= (nops - 2)) 401 { 402 if (!(sub->re_ops = (regsubop_t*)alloc(p->env->disc, sub->re_ops, (nops *= 2) * sizeof(regsubop_t)))) 403 { 404 regfree(p); 405 return fatal(disc, REG_ESPACE, NiL); 406 } 407 op = sub->re_ops + n; 408 } 409 if (op->len = (t - sub->re_rhs) - op->off) 410 op++; 411 op->op = f; 412 op->off = c; 413 op->len = 0; 414 op++; 415 op->op = f; 416 op->off = t - sub->re_rhs; 417 } 418 if ((op->len = (t - sub->re_rhs) - op->off) && (n = ++op - sub->re_ops) >= nops) 419 { 420 if (!(sub->re_ops = (regsubop_t*)alloc(p->env->disc, sub->re_ops, (nops *= 2) * sizeof(regsubop_t)))) 421 { 422 regfree(p); 423 return fatal(disc, REG_ESPACE, NiL); 424 } 425 op = sub->re_ops + n; 426 } 427 op->len = -1; 428 sub->re_flags = flags; 429 sub->re_min = minmatch; 430 return 0; 431 } 432 433 void 434 regsubfree(regex_t* p) 435 { 436 Env_t* env; 437 regsub_t* sub; 438 439 if (p && (env = p->env) && env->sub && (sub = p->re_sub)) 440 { 441 env->sub = 0; 442 p->re_sub = 0; 443 if (!(env->disc->re_flags & REG_NOFREE)) 444 { 445 if (sub->re_buf) 446 alloc(env->disc, sub->re_buf, 0); 447 if (sub->re_ops) 448 alloc(env->disc, sub->re_ops, 0); 449 alloc(env->disc, sub, 0); 450 } 451 } 452 } 453