1 /*- 2 * Copyright (c) 1992, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * Copyright (c) 1992, 1993, 1994, 1995, 1996 5 * Keith Bostic. All rights reserved. 6 * 7 * See the LICENSE file for redistribution information. 8 */ 9 10 #include "config.h" 11 12 #ifndef lint 13 static const char sccsid[] = "@(#)ex_subst.c 10.37 (Berkeley) 9/15/96"; 14 #endif /* not lint */ 15 16 #include <sys/types.h> 17 #include <sys/queue.h> 18 #include <sys/time.h> 19 20 #include <bitstring.h> 21 #include <ctype.h> 22 #include <errno.h> 23 #include <limits.h> 24 #include <stdio.h> 25 #include <stdlib.h> 26 #include <string.h> 27 #include <unistd.h> 28 29 #include "../common/common.h" 30 #include "../vi/vi.h" 31 32 #define SUB_FIRST 0x01 /* The 'r' flag isn't reasonable. */ 33 #define SUB_MUSTSETR 0x02 /* The 'r' flag is required. */ 34 35 static int re_conv __P((SCR *, char **, size_t *, int *)); 36 static int re_cscope_conv __P((SCR *, char **, size_t *, int *)); 37 static int re_sub __P((SCR *, 38 char *, char **, size_t *, size_t *, regmatch_t [10])); 39 static int re_tag_conv __P((SCR *, char **, size_t *, int *)); 40 static int s __P((SCR *, EXCMD *, char *, regex_t *, u_int)); 41 42 /* 43 * ex_s -- 44 * [line [,line]] s[ubstitute] [[/;]pat[/;]/repl[/;] [cgr] [count] [#lp]] 45 * 46 * Substitute on lines matching a pattern. 47 * 48 * PUBLIC: int ex_s __P((SCR *, EXCMD *)); 49 */ 50 int 51 ex_s(sp, cmdp) 52 SCR *sp; 53 EXCMD *cmdp; 54 { 55 regex_t *re; 56 size_t blen, len; 57 u_int flags; 58 int delim; 59 char *bp, *ptrn, *rep, *p, *t; 60 61 /* 62 * Skip leading white space. 63 * 64 * !!! 65 * Historic vi allowed any non-alphanumeric to serve as the 66 * substitution command delimiter. 67 * 68 * !!! 69 * If the arguments are empty, it's the same as &, i.e. we 70 * repeat the last substitution. 71 */ 72 if (cmdp->argc == 0) 73 goto subagain; 74 for (p = cmdp->argv[0]->bp, 75 len = cmdp->argv[0]->len; len > 0; --len, ++p) { 76 if (!isblank(*p)) 77 break; 78 } 79 if (len == 0) 80 subagain: return (ex_subagain(sp, cmdp)); 81 82 delim = *p++; 83 if (isalnum(delim) || delim == '\\') 84 return (s(sp, cmdp, p, &sp->subre_c, SUB_MUSTSETR)); 85 86 /* 87 * !!! 88 * The full-blown substitute command reset the remembered 89 * state of the 'c' and 'g' suffices. 90 */ 91 sp->c_suffix = sp->g_suffix = 0; 92 93 /* 94 * Get the pattern string, toss escaping characters. 95 * 96 * !!! 97 * Historic vi accepted any of the following forms: 98 * 99 * :s/abc/def/ change "abc" to "def" 100 * :s/abc/def change "abc" to "def" 101 * :s/abc/ delete "abc" 102 * :s/abc delete "abc" 103 * 104 * QUOTING NOTE: 105 * 106 * Only toss an escaping character if it escapes a delimiter. 107 * This means that "s/A/\\\\f" replaces "A" with "\\f". It 108 * would be nice to be more regular, i.e. for each layer of 109 * escaping a single escaping character is removed, but that's 110 * not how the historic vi worked. 111 */ 112 for (ptrn = t = p;;) { 113 if (p[0] == '\0' || p[0] == delim) { 114 if (p[0] == delim) 115 ++p; 116 /* 117 * !!! 118 * Nul terminate the pattern string -- it's passed 119 * to regcomp which doesn't understand anything else. 120 */ 121 *t = '\0'; 122 break; 123 } 124 if (p[0] == '\\') 125 if (p[1] == delim) 126 ++p; 127 else if (p[1] == '\\') 128 *t++ = *p++; 129 *t++ = *p++; 130 } 131 132 /* 133 * If the pattern string is empty, use the last RE (not just the 134 * last substitution RE). 135 */ 136 if (*ptrn == '\0') { 137 if (sp->re == NULL) { 138 ex_emsg(sp, NULL, EXM_NOPREVRE); 139 return (1); 140 } 141 142 /* Re-compile the RE if necessary. */ 143 if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp, 144 sp->re, sp->re_len, NULL, NULL, &sp->re_c, RE_C_SEARCH)) 145 return (1); 146 flags = 0; 147 } else { 148 /* 149 * !!! 150 * Compile the RE. Historic practice is that substitutes set 151 * the search direction as well as both substitute and search 152 * RE's. We compile the RE twice, as we don't want to bother 153 * ref counting the pattern string and (opaque) structure. 154 */ 155 if (re_compile(sp, ptrn, t - ptrn, 156 &sp->re, &sp->re_len, &sp->re_c, RE_C_SEARCH)) 157 return (1); 158 if (re_compile(sp, ptrn, t - ptrn, 159 &sp->subre, &sp->subre_len, &sp->subre_c, RE_C_SUBST)) 160 return (1); 161 162 flags = SUB_FIRST; 163 sp->searchdir = FORWARD; 164 } 165 re = &sp->re_c; 166 167 /* 168 * Get the replacement string. 169 * 170 * The special character & (\& if O_MAGIC not set) matches the 171 * entire RE. No handling of & is required here, it's done by 172 * re_sub(). 173 * 174 * The special character ~ (\~ if O_MAGIC not set) inserts the 175 * previous replacement string into this replacement string. 176 * Count ~'s to figure out how much space we need. We could 177 * special case nonexistent last patterns or whether or not 178 * O_MAGIC is set, but it's probably not worth the effort. 179 * 180 * QUOTING NOTE: 181 * 182 * Only toss an escaping character if it escapes a delimiter or 183 * if O_MAGIC is set and it escapes a tilde. 184 * 185 * !!! 186 * If the entire replacement pattern is "%", then use the last 187 * replacement pattern. This semantic was added to vi in System 188 * V and then percolated elsewhere, presumably around the time 189 * that it was added to their version of ed(1). 190 */ 191 if (p[0] == '\0' || p[0] == delim) { 192 if (p[0] == delim) 193 ++p; 194 if (sp->repl != NULL) 195 free(sp->repl); 196 sp->repl = NULL; 197 sp->repl_len = 0; 198 } else if (p[0] == '%' && (p[1] == '\0' || p[1] == delim)) 199 p += p[1] == delim ? 2 : 1; 200 else { 201 for (rep = p, len = 0; 202 p[0] != '\0' && p[0] != delim; ++p, ++len) 203 if (p[0] == '~') 204 len += sp->repl_len; 205 GET_SPACE_RET(sp, bp, blen, len); 206 for (t = bp, len = 0, p = rep;;) { 207 if (p[0] == '\0' || p[0] == delim) { 208 if (p[0] == delim) 209 ++p; 210 break; 211 } 212 if (p[0] == '\\') { 213 if (p[1] == delim) 214 ++p; 215 else if (p[1] == '\\') { 216 *t++ = *p++; 217 ++len; 218 } else if (p[1] == '~') { 219 ++p; 220 if (!O_ISSET(sp, O_MAGIC)) 221 goto tilde; 222 } 223 } else if (p[0] == '~' && O_ISSET(sp, O_MAGIC)) { 224 tilde: ++p; 225 memcpy(t, sp->repl, sp->repl_len); 226 t += sp->repl_len; 227 len += sp->repl_len; 228 continue; 229 } 230 *t++ = *p++; 231 ++len; 232 } 233 if ((sp->repl_len = len) != 0) { 234 if (sp->repl != NULL) 235 free(sp->repl); 236 if ((sp->repl = malloc(len)) == NULL) { 237 msgq(sp, M_SYSERR, NULL); 238 FREE_SPACE(sp, bp, blen); 239 return (1); 240 } 241 memcpy(sp->repl, bp, len); 242 } 243 FREE_SPACE(sp, bp, blen); 244 } 245 return (s(sp, cmdp, p, re, flags)); 246 } 247 248 /* 249 * ex_subagain -- 250 * [line [,line]] & [cgr] [count] [#lp]] 251 * 252 * Substitute using the last substitute RE and replacement pattern. 253 * 254 * PUBLIC: int ex_subagain __P((SCR *, EXCMD *)); 255 */ 256 int 257 ex_subagain(sp, cmdp) 258 SCR *sp; 259 EXCMD *cmdp; 260 { 261 if (sp->subre == NULL) { 262 ex_emsg(sp, NULL, EXM_NOPREVRE); 263 return (1); 264 } 265 if (!F_ISSET(sp, SC_RE_SUBST) && re_compile(sp, 266 sp->subre, sp->subre_len, NULL, NULL, &sp->subre_c, RE_C_SUBST)) 267 return (1); 268 return (s(sp, 269 cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->subre_c, 0)); 270 } 271 272 /* 273 * ex_subtilde -- 274 * [line [,line]] ~ [cgr] [count] [#lp]] 275 * 276 * Substitute using the last RE and last substitute replacement pattern. 277 * 278 * PUBLIC: int ex_subtilde __P((SCR *, EXCMD *)); 279 */ 280 int 281 ex_subtilde(sp, cmdp) 282 SCR *sp; 283 EXCMD *cmdp; 284 { 285 if (sp->re == NULL) { 286 ex_emsg(sp, NULL, EXM_NOPREVRE); 287 return (1); 288 } 289 if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp, 290 sp->re, sp->re_len, NULL, NULL, &sp->re_c, RE_C_SEARCH)) 291 return (1); 292 return (s(sp, 293 cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->re_c, 0)); 294 } 295 296 /* 297 * s -- 298 * Do the substitution. This stuff is *really* tricky. There are lots of 299 * special cases, and general nastiness. Don't mess with it unless you're 300 * pretty confident. 301 * 302 * The nasty part of the substitution is what happens when the replacement 303 * string contains newlines. It's a bit tricky -- consider the information 304 * that has to be retained for "s/f\(o\)o/^M\1^M\1/". The solution here is 305 * to build a set of newline offsets which we use to break the line up later, 306 * when the replacement is done. Don't change it unless you're *damned* 307 * confident. 308 */ 309 #define NEEDNEWLINE(sp) { \ 310 if (sp->newl_len == sp->newl_cnt) { \ 311 sp->newl_len += 25; \ 312 REALLOC(sp, sp->newl, size_t *, \ 313 sp->newl_len * sizeof(size_t)); \ 314 if (sp->newl == NULL) { \ 315 sp->newl_len = 0; \ 316 return (1); \ 317 } \ 318 } \ 319 } 320 321 #define BUILD(sp, l, len) { \ 322 if (lbclen + (len) > lblen) { \ 323 lblen += MAX(lbclen + (len), 256); \ 324 REALLOC(sp, lb, char *, lblen); \ 325 if (lb == NULL) { \ 326 lbclen = 0; \ 327 return (1); \ 328 } \ 329 } \ 330 memcpy(lb + lbclen, l, len); \ 331 lbclen += len; \ 332 } 333 334 #define NEEDSP(sp, len, pnt) { \ 335 if (lbclen + (len) > lblen) { \ 336 lblen += MAX(lbclen + (len), 256); \ 337 REALLOC(sp, lb, char *, lblen); \ 338 if (lb == NULL) { \ 339 lbclen = 0; \ 340 return (1); \ 341 } \ 342 pnt = lb + lbclen; \ 343 } \ 344 } 345 346 static int 347 s(sp, cmdp, s, re, flags) 348 SCR *sp; 349 EXCMD *cmdp; 350 char *s; 351 regex_t *re; 352 u_int flags; 353 { 354 EVENT ev; 355 MARK from, to; 356 TEXTH tiq; 357 recno_t elno, lno, slno; 358 regmatch_t match[10]; 359 size_t blen, cnt, last, lbclen, lblen, len, llen; 360 size_t offset, saved_offset, scno; 361 int cflag, lflag, nflag, pflag, rflag; 362 int didsub, do_eol_match, eflags, empty_ok, eval; 363 int linechanged, matched, quit, rval; 364 char *bp, *lb; 365 366 NEEDFILE(sp, cmdp); 367 368 slno = sp->lno; 369 scno = sp->cno; 370 371 /* 372 * !!! 373 * Historically, the 'g' and 'c' suffices were always toggled as flags, 374 * so ":s/A/B/" was the same as ":s/A/B/ccgg". If O_EDCOMPATIBLE was 375 * not set, they were initialized to 0 for all substitute commands. If 376 * O_EDCOMPATIBLE was set, they were initialized to 0 only if the user 377 * specified substitute/replacement patterns (see ex_s()). 378 */ 379 if (!O_ISSET(sp, O_EDCOMPATIBLE)) 380 sp->c_suffix = sp->g_suffix = 0; 381 382 /* 383 * Historic vi permitted the '#', 'l' and 'p' options in vi mode, but 384 * it only displayed the last change. I'd disallow them, but they are 385 * useful in combination with the [v]global commands. In the current 386 * model the problem is combining them with the 'c' flag -- the screen 387 * would have to flip back and forth between the confirm screen and the 388 * ex print screen, which would be pretty awful. We do display all 389 * changes, though, for what that's worth. 390 * 391 * !!! 392 * Historic vi was fairly strict about the order of "options", the 393 * count, and "flags". I'm somewhat fuzzy on the difference between 394 * options and flags, anyway, so this is a simpler approach, and we 395 * just take it them in whatever order the user gives them. (The ex 396 * usage statement doesn't reflect this.) 397 */ 398 cflag = lflag = nflag = pflag = rflag = 0; 399 if (s == NULL) 400 goto noargs; 401 for (lno = OOBLNO; *s != '\0'; ++s) 402 switch (*s) { 403 case ' ': 404 case '\t': 405 continue; 406 case '+': 407 ++cmdp->flagoff; 408 break; 409 case '-': 410 --cmdp->flagoff; 411 break; 412 case '0': case '1': case '2': case '3': case '4': 413 case '5': case '6': case '7': case '8': case '9': 414 if (lno != OOBLNO) 415 goto usage; 416 errno = 0; 417 lno = strtoul(s, &s, 10); 418 if (*s == '\0') /* Loop increment correction. */ 419 --s; 420 if (errno == ERANGE) { 421 if (lno == LONG_MAX) 422 msgq(sp, M_ERR, "153|Count overflow"); 423 else if (lno == LONG_MIN) 424 msgq(sp, M_ERR, "154|Count underflow"); 425 else 426 msgq(sp, M_SYSERR, NULL); 427 return (1); 428 } 429 /* 430 * In historic vi, the count was inclusive from the 431 * second address. 432 */ 433 cmdp->addr1.lno = cmdp->addr2.lno; 434 cmdp->addr2.lno += lno - 1; 435 if (!db_exist(sp, cmdp->addr2.lno) && 436 db_last(sp, &cmdp->addr2.lno)) 437 return (1); 438 break; 439 case '#': 440 nflag = 1; 441 break; 442 case 'c': 443 sp->c_suffix = !sp->c_suffix; 444 445 /* Ex text structure initialization. */ 446 if (F_ISSET(sp, SC_EX)) { 447 memset(&tiq, 0, sizeof(TEXTH)); 448 CIRCLEQ_INIT(&tiq); 449 } 450 break; 451 case 'g': 452 sp->g_suffix = !sp->g_suffix; 453 break; 454 case 'l': 455 lflag = 1; 456 break; 457 case 'p': 458 pflag = 1; 459 break; 460 case 'r': 461 if (LF_ISSET(SUB_FIRST)) { 462 msgq(sp, M_ERR, 463 "155|Regular expression specified; r flag meaningless"); 464 return (1); 465 } 466 if (!F_ISSET(sp, SC_RE_SEARCH)) { 467 ex_emsg(sp, NULL, EXM_NOPREVRE); 468 return (1); 469 } 470 rflag = 1; 471 re = &sp->re_c; 472 break; 473 default: 474 goto usage; 475 } 476 477 if (*s != '\0' || !rflag && LF_ISSET(SUB_MUSTSETR)) { 478 usage: ex_emsg(sp, cmdp->cmd->usage, EXM_USAGE); 479 return (1); 480 } 481 482 noargs: if (F_ISSET(sp, SC_VI) && sp->c_suffix && (lflag || nflag || pflag)) { 483 msgq(sp, M_ERR, 484 "156|The #, l and p flags may not be combined with the c flag in vi mode"); 485 return (1); 486 } 487 488 /* 489 * bp: if interactive, line cache 490 * blen: if interactive, line cache length 491 * lb: build buffer pointer. 492 * lbclen: current length of built buffer. 493 * lblen; length of build buffer. 494 */ 495 bp = lb = NULL; 496 blen = lbclen = lblen = 0; 497 498 /* For each line... */ 499 for (matched = quit = 0, lno = cmdp->addr1.lno, 500 elno = cmdp->addr2.lno; !quit && lno <= elno; ++lno) { 501 502 /* Someone's unhappy, time to stop. */ 503 if (INTERRUPTED(sp)) 504 break; 505 506 /* Get the line. */ 507 if (db_get(sp, lno, DBG_FATAL, &s, &llen)) 508 goto err; 509 510 /* 511 * Make a local copy if doing confirmation -- when calling 512 * the confirm routine we're likely to lose the cached copy. 513 */ 514 if (sp->c_suffix) { 515 if (bp == NULL) { 516 GET_SPACE_RET(sp, bp, blen, llen); 517 } else 518 ADD_SPACE_RET(sp, bp, blen, llen); 519 memcpy(bp, s, llen); 520 s = bp; 521 } 522 523 /* Start searching from the beginning. */ 524 offset = 0; 525 len = llen; 526 527 /* Reset the build buffer offset. */ 528 lbclen = 0; 529 530 /* Reset empty match flag. */ 531 empty_ok = 1; 532 533 /* 534 * We don't want to have to do a setline if the line didn't 535 * change -- keep track of whether or not this line changed. 536 * If doing confirmations, don't want to keep setting the 537 * line if change is refused -- keep track of substitutions. 538 */ 539 didsub = linechanged = 0; 540 541 /* New line, do an EOL match. */ 542 do_eol_match = 1; 543 544 /* It's not nul terminated, but we pretend it is. */ 545 eflags = REG_STARTEND; 546 547 /* 548 * The search area is from s + offset to the EOL. 549 * 550 * Generally, match[0].rm_so is the offset of the start 551 * of the match from the start of the search, and offset 552 * is the offset of the start of the last search. 553 */ 554 nextmatch: match[0].rm_so = 0; 555 match[0].rm_eo = len; 556 557 /* Get the next match. */ 558 eval = regexec(re, (char *)s + offset, 10, match, eflags); 559 560 /* 561 * There wasn't a match or if there was an error, deal with 562 * it. If there was a previous match in this line, resolve 563 * the changes into the database. Otherwise, just move on. 564 */ 565 if (eval == REG_NOMATCH) 566 goto endmatch; 567 if (eval != 0) { 568 re_error(sp, eval, re); 569 goto err; 570 } 571 matched = 1; 572 573 /* Only the first search can match an anchored expression. */ 574 eflags |= REG_NOTBOL; 575 576 /* 577 * !!! 578 * It's possible to match 0-length strings -- for example, the 579 * command s;a*;X;, when matched against the string "aabb" will 580 * result in "XbXbX", i.e. the matches are "aa", the space 581 * between the b's and the space between the b's and the end of 582 * the string. There is a similar space between the beginning 583 * of the string and the a's. The rule that we use (because vi 584 * historically used it) is that any 0-length match, occurring 585 * immediately after a match, is ignored. Otherwise, the above 586 * example would have resulted in "XXbXbX". Another example is 587 * incorrectly using " *" to replace groups of spaces with one 588 * space. 589 * 590 * The way we do this is that if we just had a successful match, 591 * the starting offset does not skip characters, and the match 592 * is empty, ignore the match and move forward. If there's no 593 * more characters in the string, we were attempting to match 594 * after the last character, so quit. 595 */ 596 if (!empty_ok && match[0].rm_so == 0 && match[0].rm_eo == 0) { 597 empty_ok = 1; 598 if (len == 0) 599 goto endmatch; 600 BUILD(sp, s + offset, 1) 601 ++offset; 602 --len; 603 goto nextmatch; 604 } 605 606 /* Confirm change. */ 607 if (sp->c_suffix) { 608 /* 609 * Set the cursor position for confirmation. Note, 610 * if we matched on a '$', the cursor may be past 611 * the end of line. 612 */ 613 from.lno = to.lno = lno; 614 from.cno = match[0].rm_so + offset; 615 to.cno = match[0].rm_eo + offset; 616 /* 617 * Both ex and vi have to correct for a change before 618 * the first character in the line. 619 */ 620 if (llen == 0) 621 from.cno = to.cno = 0; 622 if (F_ISSET(sp, SC_VI)) { 623 /* 624 * Only vi has to correct for a change after 625 * the last character in the line. 626 * 627 * XXX 628 * It would be nice to change the vi code so 629 * that we could display a cursor past EOL. 630 */ 631 if (to.cno >= llen) 632 to.cno = llen - 1; 633 if (from.cno >= llen) 634 from.cno = llen - 1; 635 636 sp->lno = from.lno; 637 sp->cno = from.cno; 638 if (vs_refresh(sp, 1)) 639 goto err; 640 641 vs_update(sp, msg_cat(sp, 642 "169|Confirm change? [n]", NULL), NULL); 643 644 if (v_event_get(sp, &ev, 0, 0)) 645 goto err; 646 switch (ev.e_event) { 647 case E_CHARACTER: 648 break; 649 case E_EOF: 650 case E_ERR: 651 case E_INTERRUPT: 652 goto lquit; 653 default: 654 v_event_err(sp, &ev); 655 goto lquit; 656 } 657 } else { 658 if (ex_print(sp, cmdp, &from, &to, 0) || 659 ex_scprint(sp, &from, &to)) 660 goto lquit; 661 if (ex_txt(sp, &tiq, 0, TXT_CR)) 662 goto err; 663 ev.e_c = tiq.cqh_first->lb[0]; 664 } 665 666 switch (ev.e_c) { 667 case CH_YES: 668 break; 669 default: 670 case CH_NO: 671 didsub = 0; 672 BUILD(sp, s +offset, match[0].rm_eo); 673 goto skip; 674 case CH_QUIT: 675 /* Set the quit/interrupted flags. */ 676 lquit: quit = 1; 677 F_SET(sp->gp, G_INTERRUPTED); 678 679 /* 680 * Resolve any changes, then return to (and 681 * exit from) the main loop. 682 */ 683 goto endmatch; 684 } 685 } 686 687 /* 688 * Set the cursor to the last position changed, converting 689 * from 1-based to 0-based. 690 */ 691 sp->lno = lno; 692 sp->cno = match[0].rm_so; 693 694 /* Copy the bytes before the match into the build buffer. */ 695 BUILD(sp, s + offset, match[0].rm_so); 696 697 /* Substitute the matching bytes. */ 698 didsub = 1; 699 if (re_sub(sp, s + offset, &lb, &lbclen, &lblen, match)) 700 goto err; 701 702 /* Set the change flag so we know this line was modified. */ 703 linechanged = 1; 704 705 /* Move past the matched bytes. */ 706 skip: offset += match[0].rm_eo; 707 len -= match[0].rm_eo; 708 709 /* A match cannot be followed by an empty pattern. */ 710 empty_ok = 0; 711 712 /* 713 * If doing a global change with confirmation, we have to 714 * update the screen. The basic idea is to store the line 715 * so the screen update routines can find it, and restart. 716 */ 717 if (didsub && sp->c_suffix && sp->g_suffix) { 718 /* 719 * The new search offset will be the end of the 720 * modified line. 721 */ 722 saved_offset = lbclen; 723 724 /* Copy the rest of the line. */ 725 if (len) 726 BUILD(sp, s + offset, len) 727 728 /* Set the new offset. */ 729 offset = saved_offset; 730 731 /* Store inserted lines, adjusting the build buffer. */ 732 last = 0; 733 if (sp->newl_cnt) { 734 for (cnt = 0; 735 cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) { 736 if (db_insert(sp, lno, 737 lb + last, sp->newl[cnt] - last)) 738 goto err; 739 last = sp->newl[cnt] + 1; 740 ++sp->rptlines[L_ADDED]; 741 } 742 lbclen -= last; 743 offset -= last; 744 sp->newl_cnt = 0; 745 } 746 747 /* Store and retrieve the line. */ 748 if (db_set(sp, lno, lb + last, lbclen)) 749 goto err; 750 if (db_get(sp, lno, DBG_FATAL, &s, &llen)) 751 goto err; 752 ADD_SPACE_RET(sp, bp, blen, llen) 753 memcpy(bp, s, llen); 754 s = bp; 755 len = llen - offset; 756 757 /* Restart the build. */ 758 lbclen = 0; 759 BUILD(sp, s, offset); 760 761 /* 762 * If we haven't already done the after-the-string 763 * match, do one. Set REG_NOTEOL so the '$' pattern 764 * only matches once. 765 */ 766 if (!do_eol_match) 767 goto endmatch; 768 if (offset == len) { 769 do_eol_match = 0; 770 eflags |= REG_NOTEOL; 771 } 772 goto nextmatch; 773 } 774 775 /* 776 * If it's a global: 777 * 778 * If at the end of the string, do a test for the after 779 * the string match. Set REG_NOTEOL so the '$' pattern 780 * only matches once. 781 */ 782 if (sp->g_suffix && do_eol_match) { 783 if (len == 0) { 784 do_eol_match = 0; 785 eflags |= REG_NOTEOL; 786 } 787 goto nextmatch; 788 } 789 790 endmatch: if (!linechanged) 791 continue; 792 793 /* Copy any remaining bytes into the build buffer. */ 794 if (len) 795 BUILD(sp, s + offset, len) 796 797 /* Store inserted lines, adjusting the build buffer. */ 798 last = 0; 799 if (sp->newl_cnt) { 800 for (cnt = 0; 801 cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) { 802 if (db_insert(sp, 803 lno, lb + last, sp->newl[cnt] - last)) 804 goto err; 805 last = sp->newl[cnt] + 1; 806 ++sp->rptlines[L_ADDED]; 807 } 808 lbclen -= last; 809 sp->newl_cnt = 0; 810 } 811 812 /* Store the changed line. */ 813 if (db_set(sp, lno, lb + last, lbclen)) 814 goto err; 815 816 /* Update changed line counter. */ 817 if (sp->rptlchange != lno) { 818 sp->rptlchange = lno; 819 ++sp->rptlines[L_CHANGED]; 820 } 821 822 /* 823 * !!! 824 * Display as necessary. Historic practice is to only 825 * display the last line of a line split into multiple 826 * lines. 827 */ 828 if (lflag || nflag || pflag) { 829 from.lno = to.lno = lno; 830 from.cno = to.cno = 0; 831 if (lflag) 832 (void)ex_print(sp, cmdp, &from, &to, E_C_LIST); 833 if (nflag) 834 (void)ex_print(sp, cmdp, &from, &to, E_C_HASH); 835 if (pflag) 836 (void)ex_print(sp, cmdp, &from, &to, E_C_PRINT); 837 } 838 } 839 840 /* 841 * !!! 842 * Historically, vi attempted to leave the cursor at the same place if 843 * the substitution was done at the current cursor position. Otherwise 844 * it moved it to the first non-blank of the last line changed. There 845 * were some problems: for example, :s/$/foo/ with the cursor on the 846 * last character of the line left the cursor on the last character, or 847 * the & command with multiple occurrences of the matching string in the 848 * line usually left the cursor in a fairly random position. 849 * 850 * We try to do the same thing, with the exception that if the user is 851 * doing substitution with confirmation, we move to the last line about 852 * which the user was consulted, as opposed to the last line that they 853 * actually changed. This prevents a screen flash if the user doesn't 854 * change many of the possible lines. 855 */ 856 if (!sp->c_suffix && (sp->lno != slno || sp->cno != scno)) { 857 sp->cno = 0; 858 (void)nonblank(sp, sp->lno, &sp->cno); 859 } 860 861 /* 862 * If not in a global command, and nothing matched, say so. 863 * Else, if none of the lines displayed, put something up. 864 */ 865 rval = 0; 866 if (!matched) { 867 if (!F_ISSET(sp, SC_EX_GLOBAL)) { 868 msgq(sp, M_ERR, "157|No match found"); 869 goto err; 870 } 871 } else if (!lflag && !nflag && !pflag) 872 F_SET(cmdp, E_AUTOPRINT); 873 874 if (0) { 875 err: rval = 1; 876 } 877 878 if (bp != NULL) 879 FREE_SPACE(sp, bp, blen); 880 if (lb != NULL) 881 free(lb); 882 return (rval); 883 } 884 885 /* 886 * re_compile -- 887 * Compile the RE. 888 * 889 * PUBLIC: int re_compile __P((SCR *, 890 * PUBLIC: char *, size_t, char **, size_t *, regex_t *, u_int)); 891 */ 892 int 893 re_compile(sp, ptrn, plen, ptrnp, lenp, rep, flags) 894 SCR *sp; 895 char *ptrn, **ptrnp; 896 size_t plen, *lenp; 897 regex_t *rep; 898 u_int flags; 899 { 900 size_t len; 901 int reflags, replaced, rval; 902 char *p; 903 904 /* Set RE flags. */ 905 reflags = 0; 906 if (!LF_ISSET(RE_C_CSCOPE | RE_C_TAG)) { 907 if (O_ISSET(sp, O_EXTENDED)) 908 reflags |= REG_EXTENDED; 909 if (O_ISSET(sp, O_IGNORECASE)) 910 reflags |= REG_ICASE; 911 if (O_ISSET(sp, O_ICLOWER)) { 912 for (p = ptrn, len = plen; len > 0; ++p, --len) 913 if (isupper(*p)) 914 break; 915 if (len == 0) 916 reflags |= REG_ICASE; 917 } 918 } 919 920 /* If we're replacing a saved value, clear the old one. */ 921 if (LF_ISSET(RE_C_SEARCH) && F_ISSET(sp, SC_RE_SEARCH)) { 922 regfree(&sp->re_c); 923 F_CLR(sp, SC_RE_SEARCH); 924 } 925 if (LF_ISSET(RE_C_SUBST) && F_ISSET(sp, SC_RE_SUBST)) { 926 regfree(&sp->subre_c); 927 F_CLR(sp, SC_RE_SUBST); 928 } 929 930 /* 931 * If we're saving the string, it's a pattern we haven't seen before, 932 * so convert the vi-style RE's to POSIX 1003.2 RE's. Save a copy for 933 * later recompilation. Free any previously saved value. 934 */ 935 if (ptrnp != NULL) { 936 if (LF_ISSET(RE_C_CSCOPE)) { 937 if (re_cscope_conv(sp, &ptrn, &plen, &replaced)) 938 return (1); 939 /* 940 * XXX 941 * Currently, the match-any-<blank> expression used in 942 * re_cscope_conv() requires extended RE's. This may 943 * not be right or safe. 944 */ 945 reflags |= REG_EXTENDED; 946 } else if (LF_ISSET(RE_C_TAG)) { 947 if (re_tag_conv(sp, &ptrn, &plen, &replaced)) 948 return (1); 949 } else 950 if (re_conv(sp, &ptrn, &plen, &replaced)) 951 return (1); 952 953 /* Discard previous pattern. */ 954 if (*ptrnp != NULL) { 955 free(*ptrnp); 956 *ptrnp = NULL; 957 } 958 if (lenp != NULL) 959 *lenp = plen; 960 961 /* 962 * Copy the string into allocated memory. 963 * 964 * XXX 965 * Regcomp isn't 8-bit clean, so the pattern is nul-terminated 966 * for now. There's just no other solution. 967 */ 968 MALLOC(sp, *ptrnp, char *, plen + 1); 969 if (*ptrnp != NULL) { 970 memcpy(*ptrnp, ptrn, plen); 971 (*ptrnp)[plen] = '\0'; 972 } 973 974 /* Free up conversion-routine-allocated memory. */ 975 if (replaced) 976 FREE_SPACE(sp, ptrn, 0); 977 978 if (*ptrnp == NULL) 979 return (1); 980 981 ptrn = *ptrnp; 982 } 983 984 /* 985 * XXX 986 * Regcomp isn't 8-bit clean, so we just lost if the pattern 987 * contained a nul. Bummer! 988 */ 989 if ((rval = regcomp(rep, ptrn, /* plen, */ reflags)) != 0) { 990 if (!LF_ISSET(RE_C_SILENT)) 991 re_error(sp, rval, rep); 992 return (1); 993 } 994 995 if (LF_ISSET(RE_C_SEARCH)) 996 F_SET(sp, SC_RE_SEARCH); 997 if (LF_ISSET(RE_C_SUBST)) 998 F_SET(sp, SC_RE_SUBST); 999 1000 return (0); 1001 } 1002 1003 /* 1004 * re_conv -- 1005 * Convert vi's regular expressions into something that the 1006 * the POSIX 1003.2 RE functions can handle. 1007 * 1008 * There are three conversions we make to make vi's RE's (specifically 1009 * the global, search, and substitute patterns) work with POSIX RE's. 1010 * 1011 * 1: If O_MAGIC is not set, strip backslashes from the magic character 1012 * set (.[*~) that have them, and add them to the ones that don't. 1013 * 2: If O_MAGIC is not set, the string "\~" is replaced with the text 1014 * from the last substitute command's replacement string. If O_MAGIC 1015 * is set, it's the string "~". 1016 * 3: The pattern \<ptrn\> does "word" searches, convert it to use the 1017 * new RE escapes. 1018 * 1019 * !!!/XXX 1020 * This doesn't exactly match the historic behavior of vi because we do 1021 * the ~ substitution before calling the RE engine, so magic characters 1022 * in the replacement string will be expanded by the RE engine, and they 1023 * weren't historically. It's a bug. 1024 */ 1025 static int 1026 re_conv(sp, ptrnp, plenp, replacedp) 1027 SCR *sp; 1028 char **ptrnp; 1029 size_t *plenp; 1030 int *replacedp; 1031 { 1032 size_t blen, len, needlen; 1033 int magic; 1034 char *bp, *p, *t; 1035 1036 /* 1037 * First pass through, we figure out how much space we'll need. 1038 * We do it in two passes, on the grounds that most of the time 1039 * the user is doing a search and won't have magic characters. 1040 * That way we can skip most of the memory allocation and copies. 1041 */ 1042 magic = 0; 1043 for (p = *ptrnp, len = *plenp, needlen = 0; len > 0; ++p, --len) 1044 switch (*p) { 1045 case '\\': 1046 if (len > 1) { 1047 --len; 1048 switch (*++p) { 1049 case '<': 1050 magic = 1; 1051 needlen += sizeof(RE_WSTART); 1052 break; 1053 case '>': 1054 magic = 1; 1055 needlen += sizeof(RE_WSTOP); 1056 break; 1057 case '~': 1058 if (!O_ISSET(sp, O_MAGIC)) { 1059 magic = 1; 1060 needlen += sp->repl_len; 1061 } 1062 break; 1063 case '.': 1064 case '[': 1065 case '*': 1066 if (!O_ISSET(sp, O_MAGIC)) { 1067 magic = 1; 1068 needlen += 1; 1069 } 1070 break; 1071 default: 1072 needlen += 2; 1073 } 1074 } else 1075 needlen += 1; 1076 break; 1077 case '~': 1078 if (O_ISSET(sp, O_MAGIC)) { 1079 magic = 1; 1080 needlen += sp->repl_len; 1081 } 1082 break; 1083 case '.': 1084 case '[': 1085 case '*': 1086 if (!O_ISSET(sp, O_MAGIC)) { 1087 magic = 1; 1088 needlen += 2; 1089 } 1090 break; 1091 default: 1092 needlen += 1; 1093 break; 1094 } 1095 1096 if (!magic) { 1097 *replacedp = 0; 1098 return (0); 1099 } 1100 1101 /* Get enough memory to hold the final pattern. */ 1102 *replacedp = 1; 1103 GET_SPACE_RET(sp, bp, blen, needlen); 1104 1105 for (p = *ptrnp, len = *plenp, t = bp; len > 0; ++p, --len) 1106 switch (*p) { 1107 case '\\': 1108 if (len > 1) { 1109 --len; 1110 switch (*++p) { 1111 case '<': 1112 memcpy(t, 1113 RE_WSTART, sizeof(RE_WSTART) - 1); 1114 t += sizeof(RE_WSTART) - 1; 1115 break; 1116 case '>': 1117 memcpy(t, 1118 RE_WSTOP, sizeof(RE_WSTOP) - 1); 1119 t += sizeof(RE_WSTOP) - 1; 1120 break; 1121 case '~': 1122 if (O_ISSET(sp, O_MAGIC)) 1123 *t++ = '~'; 1124 else { 1125 memcpy(t, 1126 sp->repl, sp->repl_len); 1127 t += sp->repl_len; 1128 } 1129 break; 1130 case '.': 1131 case '[': 1132 case '*': 1133 if (O_ISSET(sp, O_MAGIC)) 1134 *t++ = '\\'; 1135 *t++ = *p; 1136 break; 1137 default: 1138 *t++ = '\\'; 1139 *t++ = *p; 1140 } 1141 } else 1142 *t++ = '\\'; 1143 break; 1144 case '~': 1145 if (O_ISSET(sp, O_MAGIC)) { 1146 memcpy(t, sp->repl, sp->repl_len); 1147 t += sp->repl_len; 1148 } else 1149 *t++ = '~'; 1150 break; 1151 case '.': 1152 case '[': 1153 case '*': 1154 if (!O_ISSET(sp, O_MAGIC)) 1155 *t++ = '\\'; 1156 *t++ = *p; 1157 break; 1158 default: 1159 *t++ = *p; 1160 break; 1161 } 1162 1163 *ptrnp = bp; 1164 *plenp = t - bp; 1165 return (0); 1166 } 1167 1168 /* 1169 * re_tag_conv -- 1170 * Convert a tags search path into something that the POSIX 1171 * 1003.2 RE functions can handle. 1172 */ 1173 static int 1174 re_tag_conv(sp, ptrnp, plenp, replacedp) 1175 SCR *sp; 1176 char **ptrnp; 1177 size_t *plenp; 1178 int *replacedp; 1179 { 1180 size_t blen, len; 1181 int lastdollar; 1182 char *bp, *p, *t; 1183 1184 len = *plenp; 1185 1186 /* Max memory usage is 2 times the length of the string. */ 1187 *replacedp = 1; 1188 GET_SPACE_RET(sp, bp, blen, len * 2); 1189 1190 p = *ptrnp; 1191 t = bp; 1192 1193 /* If the last character is a '/' or '?', we just strip it. */ 1194 if (len > 0 && (p[len - 1] == '/' || p[len - 1] == '?')) 1195 --len; 1196 1197 /* If the next-to-last or last character is a '$', it's magic. */ 1198 if (len > 0 && p[len - 1] == '$') { 1199 --len; 1200 lastdollar = 1; 1201 } else 1202 lastdollar = 0; 1203 1204 /* If the first character is a '/' or '?', we just strip it. */ 1205 if (len > 0 && (p[0] == '/' || p[0] == '?')) { 1206 ++p; 1207 --len; 1208 } 1209 1210 /* If the first or second character is a '^', it's magic. */ 1211 if (p[0] == '^') { 1212 *t++ = *p++; 1213 --len; 1214 } 1215 1216 /* 1217 * Escape every other magic character we can find, meanwhile stripping 1218 * the backslashes ctags inserts when escaping the search delimiter 1219 * characters. 1220 */ 1221 for (; len > 0; --len) { 1222 if (p[0] == '\\' && (p[1] == '/' || p[1] == '?')) { 1223 ++p; 1224 --len; 1225 } else if (strchr("^.[]$*", p[0])) 1226 *t++ = '\\'; 1227 *t++ = *p++; 1228 } 1229 if (lastdollar) 1230 *t++ = '$'; 1231 1232 *ptrnp = bp; 1233 *plenp = t - bp; 1234 return (0); 1235 } 1236 1237 /* 1238 * re_cscope_conv -- 1239 * Convert a cscope search path into something that the POSIX 1240 * 1003.2 RE functions can handle. 1241 */ 1242 static int 1243 re_cscope_conv(sp, ptrnp, plenp, replacedp) 1244 SCR *sp; 1245 char **ptrnp; 1246 size_t *plenp; 1247 int *replacedp; 1248 { 1249 size_t blen, len, nspaces; 1250 char *bp, *p, *t; 1251 1252 /* 1253 * Each space in the source line printed by cscope represents an 1254 * arbitrary sequence of spaces, tabs, and comments. 1255 */ 1256 #define CSCOPE_RE_SPACE "([ \t]|/\\*([^*]|\\*/)*\\*/)*" 1257 for (nspaces = 0, p = *ptrnp, len = *plenp; len > 0; ++p, --len) 1258 if (*p == ' ') 1259 ++nspaces; 1260 1261 /* 1262 * Allocate plenty of space: 1263 * the string, plus potential escaping characters; 1264 * nspaces + 2 copies of CSCOPE_RE_SPACE; 1265 * ^, $, nul terminator characters. 1266 */ 1267 *replacedp = 1; 1268 len = (p - *ptrnp) * 2 + (nspaces + 2) * sizeof(CSCOPE_RE_SPACE) + 3; 1269 GET_SPACE_RET(sp, bp, blen, len); 1270 1271 p = *ptrnp; 1272 t = bp; 1273 1274 *t++ = '^'; 1275 memcpy(t, CSCOPE_RE_SPACE, sizeof(CSCOPE_RE_SPACE) - 1); 1276 t += sizeof(CSCOPE_RE_SPACE) - 1; 1277 1278 for (len = *plenp; len > 0; ++p, --len) 1279 if (*p == ' ') { 1280 memcpy(t, CSCOPE_RE_SPACE, sizeof(CSCOPE_RE_SPACE) - 1); 1281 t += sizeof(CSCOPE_RE_SPACE) - 1; 1282 } else { 1283 if (strchr("\\^.[]$*+?()|{}", *p)) 1284 *t++ = '\\'; 1285 *t++ = *p; 1286 } 1287 1288 memcpy(t, CSCOPE_RE_SPACE, sizeof(CSCOPE_RE_SPACE) - 1); 1289 t += sizeof(CSCOPE_RE_SPACE) - 1; 1290 *t++ = '$'; 1291 1292 *ptrnp = bp; 1293 *plenp = t - bp; 1294 return (0); 1295 } 1296 1297 /* 1298 * re_error -- 1299 * Report a regular expression error. 1300 * 1301 * PUBLIC: void re_error __P((SCR *, int, regex_t *)); 1302 */ 1303 void 1304 re_error(sp, errcode, preg) 1305 SCR *sp; 1306 int errcode; 1307 regex_t *preg; 1308 { 1309 size_t s; 1310 char *oe; 1311 1312 s = regerror(errcode, preg, "", 0); 1313 if ((oe = malloc(s)) == NULL) 1314 msgq(sp, M_SYSERR, NULL); 1315 else { 1316 (void)regerror(errcode, preg, oe, s); 1317 msgq(sp, M_ERR, "RE error: %s", oe); 1318 free(oe); 1319 } 1320 } 1321 1322 /* 1323 * re_sub -- 1324 * Do the substitution for a regular expression. 1325 */ 1326 static int 1327 re_sub(sp, ip, lbp, lbclenp, lblenp, match) 1328 SCR *sp; 1329 char *ip; /* Input line. */ 1330 char **lbp; 1331 size_t *lbclenp, *lblenp; 1332 regmatch_t match[10]; 1333 { 1334 enum { C_NOTSET, C_LOWER, C_ONELOWER, C_ONEUPPER, C_UPPER } conv; 1335 size_t lbclen, lblen; /* Local copies. */ 1336 size_t mlen; /* Match length. */ 1337 size_t rpl; /* Remaining replacement length. */ 1338 char *rp; /* Replacement pointer. */ 1339 int ch; 1340 int no; /* Match replacement offset. */ 1341 char *p, *t; /* Buffer pointers. */ 1342 char *lb; /* Local copies. */ 1343 1344 lb = *lbp; /* Get local copies. */ 1345 lbclen = *lbclenp; 1346 lblen = *lblenp; 1347 1348 /* 1349 * QUOTING NOTE: 1350 * 1351 * There are some special sequences that vi provides in the 1352 * replacement patterns. 1353 * & string the RE matched (\& if nomagic set) 1354 * \# n-th regular subexpression 1355 * \E end \U, \L conversion 1356 * \e end \U, \L conversion 1357 * \l convert the next character to lower-case 1358 * \L convert to lower-case, until \E, \e, or end of replacement 1359 * \u convert the next character to upper-case 1360 * \U convert to upper-case, until \E, \e, or end of replacement 1361 * 1362 * Otherwise, since this is the lowest level of replacement, discard 1363 * all escaping characters. This (hopefully) matches historic practice. 1364 */ 1365 #define OUTCH(ch, nltrans) { \ 1366 CHAR_T __ch = (ch); \ 1367 u_int __value = KEY_VAL(sp, __ch); \ 1368 if (nltrans && (__value == K_CR || __value == K_NL)) { \ 1369 NEEDNEWLINE(sp); \ 1370 sp->newl[sp->newl_cnt++] = lbclen; \ 1371 } else if (conv != C_NOTSET) { \ 1372 switch (conv) { \ 1373 case C_ONELOWER: \ 1374 conv = C_NOTSET; \ 1375 /* FALLTHROUGH */ \ 1376 case C_LOWER: \ 1377 if (isupper(__ch)) \ 1378 __ch = tolower(__ch); \ 1379 break; \ 1380 case C_ONEUPPER: \ 1381 conv = C_NOTSET; \ 1382 /* FALLTHROUGH */ \ 1383 case C_UPPER: \ 1384 if (islower(__ch)) \ 1385 __ch = toupper(__ch); \ 1386 break; \ 1387 default: \ 1388 abort(); \ 1389 } \ 1390 } \ 1391 NEEDSP(sp, 1, p); \ 1392 *p++ = __ch; \ 1393 ++lbclen; \ 1394 } 1395 conv = C_NOTSET; 1396 for (rp = sp->repl, rpl = sp->repl_len, p = lb + lbclen; rpl--;) { 1397 switch (ch = *rp++) { 1398 case '&': 1399 if (O_ISSET(sp, O_MAGIC)) { 1400 no = 0; 1401 goto subzero; 1402 } 1403 break; 1404 case '\\': 1405 if (rpl == 0) 1406 break; 1407 --rpl; 1408 switch (ch = *rp) { 1409 case '&': 1410 ++rp; 1411 if (!O_ISSET(sp, O_MAGIC)) { 1412 no = 0; 1413 goto subzero; 1414 } 1415 break; 1416 case '0': case '1': case '2': case '3': case '4': 1417 case '5': case '6': case '7': case '8': case '9': 1418 no = *rp++ - '0'; 1419 subzero: if (match[no].rm_so == -1 || 1420 match[no].rm_eo == -1) 1421 break; 1422 mlen = match[no].rm_eo - match[no].rm_so; 1423 for (t = ip + match[no].rm_so; mlen--; ++t) 1424 OUTCH(*t, 0); 1425 continue; 1426 case 'e': 1427 case 'E': 1428 ++rp; 1429 conv = C_NOTSET; 1430 continue; 1431 case 'l': 1432 ++rp; 1433 conv = C_ONELOWER; 1434 continue; 1435 case 'L': 1436 ++rp; 1437 conv = C_LOWER; 1438 continue; 1439 case 'u': 1440 ++rp; 1441 conv = C_ONEUPPER; 1442 continue; 1443 case 'U': 1444 ++rp; 1445 conv = C_UPPER; 1446 continue; 1447 default: 1448 ++rp; 1449 break; 1450 } 1451 } 1452 OUTCH(ch, 1); 1453 } 1454 1455 *lbp = lb; /* Update caller's information. */ 1456 *lbclenp = lbclen; 1457 *lblenp = lblen; 1458 return (0); 1459 } 1460