1 /*- 2 * Copyright (c) 1992, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * Copyright (c) 1992, 1993, 1994, 1995, 1996 5 * Keith Bostic. All rights reserved. 6 * 7 * See the LICENSE file for redistribution information. 8 */ 9 10 #include "config.h" 11 12 #include <sys/types.h> 13 #include <sys/queue.h> 14 #include <sys/time.h> 15 16 #include <bitstring.h> 17 #include <ctype.h> 18 #include <errno.h> 19 #include <limits.h> 20 #include <stdio.h> 21 #include <stdlib.h> 22 #include <string.h> 23 #include <unistd.h> 24 25 #include "../common/common.h" 26 #include "../vi/vi.h" 27 28 #define SUB_FIRST 0x01 /* The 'r' flag isn't reasonable. */ 29 #define SUB_MUSTSETR 0x02 /* The 'r' flag is required. */ 30 31 static int re_conv(SCR *, CHAR_T **, size_t *, int *); 32 static int re_cscope_conv(SCR *, CHAR_T **, size_t *, int *); 33 static int re_sub(SCR *, 34 CHAR_T *, CHAR_T **, size_t *, size_t *, regmatch_t [10]); 35 static int re_tag_conv(SCR *, CHAR_T **, size_t *, int *); 36 static int s(SCR *, EXCMD *, CHAR_T *, regex_t *, u_int); 37 38 /* 39 * ex_s -- 40 * [line [,line]] s[ubstitute] [[/;]pat[/;]/repl[/;] [cgr] [count] [#lp]] 41 * 42 * Substitute on lines matching a pattern. 43 * 44 * PUBLIC: int ex_s(SCR *, EXCMD *); 45 */ 46 int 47 ex_s(SCR *sp, EXCMD *cmdp) 48 { 49 regex_t *re; 50 size_t blen, len; 51 u_int flags; 52 int delim; 53 CHAR_T *bp, *p, *ptrn, *rep, *t; 54 55 /* 56 * Skip leading white space. 57 * 58 * !!! 59 * Historic vi allowed any non-alphanumeric to serve as the 60 * substitution command delimiter. 61 * 62 * !!! 63 * If the arguments are empty, it's the same as &, i.e. we 64 * repeat the last substitution. 65 */ 66 if (cmdp->argc == 0) 67 goto subagain; 68 for (p = cmdp->argv[0]->bp, 69 len = cmdp->argv[0]->len; len > 0; --len, ++p) { 70 if (!cmdskip(*p)) 71 break; 72 } 73 if (len == 0) 74 subagain: return (ex_subagain(sp, cmdp)); 75 76 delim = *p++; 77 if (is09azAZ(delim) || delim == '\\') 78 return (s(sp, cmdp, p, &sp->subre_c, SUB_MUSTSETR)); 79 80 /* 81 * !!! 82 * The full-blown substitute command reset the remembered 83 * state of the 'c' and 'g' suffices. 84 */ 85 sp->c_suffix = sp->g_suffix = 0; 86 87 /* 88 * Get the pattern string, toss escaping characters. 89 * 90 * !!! 91 * Historic vi accepted any of the following forms: 92 * 93 * :s/abc/def/ change "abc" to "def" 94 * :s/abc/def change "abc" to "def" 95 * :s/abc/ delete "abc" 96 * :s/abc delete "abc" 97 * 98 * QUOTING NOTE: 99 * 100 * Only toss an escaping character if it escapes a delimiter. 101 * This means that "s/A/\\\\f" replaces "A" with "\\f". It 102 * would be nice to be more regular, i.e. for each layer of 103 * escaping a single escaping character is removed, but that's 104 * not how the historic vi worked. 105 */ 106 for (ptrn = t = p;;) { 107 if (p[0] == '\0' || p[0] == delim) { 108 if (p[0] == delim) 109 ++p; 110 /* 111 * !!! 112 * Nul terminate the pattern string -- it's passed 113 * to regcomp which doesn't understand anything else. 114 */ 115 *t = '\0'; 116 break; 117 } 118 if (p[0] == '\\') { 119 if (p[1] == delim) 120 ++p; 121 else if (p[1] == '\\') 122 *t++ = *p++; 123 } 124 *t++ = *p++; 125 } 126 127 /* 128 * If the pattern string is empty, use the last RE (not just the 129 * last substitution RE). 130 */ 131 if (*ptrn == '\0') { 132 if (sp->re == NULL) { 133 ex_emsg(sp, NULL, EXM_NOPREVRE); 134 return (1); 135 } 136 137 /* Re-compile the RE if necessary. */ 138 if (!F_ISSET(sp, SC_RE_SEARCH) && 139 re_compile(sp, sp->re, sp->re_len, 140 NULL, NULL, &sp->re_c, RE_C_SEARCH)) 141 return (1); 142 flags = 0; 143 } else { 144 /* 145 * !!! 146 * Compile the RE. Historic practice is that substitutes set 147 * the search direction as well as both substitute and search 148 * RE's. We compile the RE twice, as we don't want to bother 149 * ref counting the pattern string and (opaque) structure. 150 */ 151 if (re_compile(sp, ptrn, t - ptrn, &sp->re, 152 &sp->re_len, &sp->re_c, RE_C_SEARCH)) 153 return (1); 154 if (re_compile(sp, ptrn, t - ptrn, &sp->subre, 155 &sp->subre_len, &sp->subre_c, RE_C_SUBST)) 156 return (1); 157 158 flags = SUB_FIRST; 159 sp->searchdir = FORWARD; 160 } 161 re = &sp->re_c; 162 163 /* 164 * Get the replacement string. 165 * 166 * The special character & (\& if O_MAGIC not set) matches the 167 * entire RE. No handling of & is required here, it's done by 168 * re_sub(). 169 * 170 * The special character ~ (\~ if O_MAGIC not set) inserts the 171 * previous replacement string into this replacement string. 172 * Count ~'s to figure out how much space we need. We could 173 * special case nonexistent last patterns or whether or not 174 * O_MAGIC is set, but it's probably not worth the effort. 175 * 176 * QUOTING NOTE: 177 * 178 * Only toss an escaping character if it escapes a delimiter or 179 * if O_MAGIC is set and it escapes a tilde. 180 * 181 * !!! 182 * If the entire replacement pattern is "%", then use the last 183 * replacement pattern. This semantic was added to vi in System 184 * V and then percolated elsewhere, presumably around the time 185 * that it was added to their version of ed(1). 186 */ 187 if (p[0] == '\0' || p[0] == delim) { 188 if (p[0] == delim) 189 ++p; 190 free(sp->repl); 191 sp->repl = NULL; 192 sp->repl_len = 0; 193 } else if (p[0] == '%' && (p[1] == '\0' || p[1] == delim)) 194 p += p[1] == delim ? 2 : 1; 195 else { 196 for (rep = p, len = 0; 197 p[0] != '\0' && p[0] != delim; ++p, ++len) 198 if (p[0] == '~') 199 len += sp->repl_len; 200 GET_SPACE_RETW(sp, bp, blen, len); 201 for (t = bp, len = 0, p = rep;;) { 202 if (p[0] == '\0' || p[0] == delim) { 203 if (p[0] == delim) 204 ++p; 205 break; 206 } 207 if (p[0] == '\\') { 208 if (p[1] == delim) 209 ++p; 210 else if (p[1] == '\\') { 211 *t++ = *p++; 212 ++len; 213 } else if (p[1] == '~') { 214 ++p; 215 if (!O_ISSET(sp, O_MAGIC)) 216 goto tilde; 217 } 218 } else if (p[0] == '~' && O_ISSET(sp, O_MAGIC)) { 219 tilde: ++p; 220 MEMCPY(t, sp->repl, sp->repl_len); 221 t += sp->repl_len; 222 len += sp->repl_len; 223 continue; 224 } 225 *t++ = *p++; 226 ++len; 227 } 228 if ((sp->repl_len = len) != 0) { 229 free(sp->repl); 230 MALLOC(sp, sp->repl, len * sizeof(CHAR_T)); 231 if (sp->repl == NULL) { 232 FREE_SPACEW(sp, bp, blen); 233 return (1); 234 } 235 MEMCPY(sp->repl, bp, len); 236 } 237 FREE_SPACEW(sp, bp, blen); 238 } 239 return (s(sp, cmdp, p, re, flags)); 240 } 241 242 /* 243 * ex_subagain -- 244 * [line [,line]] & [cgr] [count] [#lp]] 245 * 246 * Substitute using the last substitute RE and replacement pattern. 247 * 248 * PUBLIC: int ex_subagain(SCR *, EXCMD *); 249 */ 250 int 251 ex_subagain(SCR *sp, EXCMD *cmdp) 252 { 253 if (sp->subre == NULL) { 254 ex_emsg(sp, NULL, EXM_NOPREVRE); 255 return (1); 256 } 257 if (!F_ISSET(sp, SC_RE_SUBST) && 258 re_compile(sp, sp->subre, sp->subre_len, 259 NULL, NULL, &sp->subre_c, RE_C_SUBST)) 260 return (1); 261 return (s(sp, 262 cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->subre_c, 0)); 263 } 264 265 /* 266 * ex_subtilde -- 267 * [line [,line]] ~ [cgr] [count] [#lp]] 268 * 269 * Substitute using the last RE and last substitute replacement pattern. 270 * 271 * PUBLIC: int ex_subtilde(SCR *, EXCMD *); 272 */ 273 int 274 ex_subtilde(SCR *sp, EXCMD *cmdp) 275 { 276 if (sp->re == NULL) { 277 ex_emsg(sp, NULL, EXM_NOPREVRE); 278 return (1); 279 } 280 if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp, sp->re, 281 sp->re_len, NULL, NULL, &sp->re_c, RE_C_SEARCH)) 282 return (1); 283 return (s(sp, 284 cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->re_c, 0)); 285 } 286 287 /* 288 * s -- 289 * Do the substitution. This stuff is *really* tricky. There are lots of 290 * special cases, and general nastiness. Don't mess with it unless you're 291 * pretty confident. 292 * 293 * The nasty part of the substitution is what happens when the replacement 294 * string contains newlines. It's a bit tricky -- consider the information 295 * that has to be retained for "s/f\(o\)o/^M\1^M\1/". The solution here is 296 * to build a set of newline offsets which we use to break the line up later, 297 * when the replacement is done. Don't change it unless you're *damned* 298 * confident. 299 */ 300 #define NEEDNEWLINE(sp) do { \ 301 if (sp->newl_len == sp->newl_cnt) { \ 302 sp->newl_len += 25; \ 303 REALLOC(sp, sp->newl, size_t *, \ 304 sp->newl_len * sizeof(size_t)); \ 305 if (sp->newl == NULL) { \ 306 sp->newl_len = 0; \ 307 return (1); \ 308 } \ 309 } \ 310 } while (0) 311 312 #define BUILD(sp, l, len) do { \ 313 if (lbclen + (len) > lblen) { \ 314 lblen = p2roundup(MAX(lbclen + (len), 256)); \ 315 REALLOC(sp, lb, CHAR_T *, lblen * sizeof(CHAR_T)); \ 316 if (lb == NULL) { \ 317 lbclen = 0; \ 318 return (1); \ 319 } \ 320 } \ 321 MEMCPY(lb + lbclen, l, len); \ 322 lbclen += len; \ 323 } while (0) 324 325 #define NEEDSP(sp, len, pnt) do { \ 326 if (lbclen + (len) > lblen) { \ 327 lblen = p2roundup(MAX(lbclen + (len), 256)); \ 328 REALLOC(sp, lb, CHAR_T *, lblen * sizeof(CHAR_T)); \ 329 if (lb == NULL) { \ 330 lbclen = 0; \ 331 return (1); \ 332 } \ 333 pnt = lb + lbclen; \ 334 } \ 335 } while (0) 336 337 static int 338 s(SCR *sp, EXCMD *cmdp, CHAR_T *s, regex_t *re, u_int flags) 339 { 340 EVENT ev; 341 MARK from, to; 342 TEXTH tiq[] = {{ 0 }}; 343 recno_t elno, lno, slno; 344 u_long ul; 345 regmatch_t match[10]; 346 size_t blen, cnt, last, lbclen, lblen, len, llen; 347 size_t offset, saved_offset, scno; 348 int cflag, lflag, nflag, pflag, rflag; 349 int didsub, do_eol_match, eflags, empty_ok, eval; 350 int linechanged, matched, quit, rval; 351 CHAR_T *bp, *lb; 352 enum nresult nret; 353 354 NEEDFILE(sp, cmdp); 355 356 slno = sp->lno; 357 scno = sp->cno; 358 359 /* 360 * !!! 361 * Historically, the 'g' and 'c' suffices were always toggled as flags, 362 * so ":s/A/B/" was the same as ":s/A/B/ccgg". If O_EDCOMPATIBLE was 363 * not set, they were initialized to 0 for all substitute commands. If 364 * O_EDCOMPATIBLE was set, they were initialized to 0 only if the user 365 * specified substitute/replacement patterns (see ex_s()). 366 */ 367 if (!O_ISSET(sp, O_EDCOMPATIBLE)) 368 sp->c_suffix = sp->g_suffix = 0; 369 370 /* 371 * Historic vi permitted the '#', 'l' and 'p' options in vi mode, but 372 * it only displayed the last change. I'd disallow them, but they are 373 * useful in combination with the [v]global commands. In the current 374 * model the problem is combining them with the 'c' flag -- the screen 375 * would have to flip back and forth between the confirm screen and the 376 * ex print screen, which would be pretty awful. We do display all 377 * changes, though, for what that's worth. 378 * 379 * !!! 380 * Historic vi was fairly strict about the order of "options", the 381 * count, and "flags". I'm somewhat fuzzy on the difference between 382 * options and flags, anyway, so this is a simpler approach, and we 383 * just take it them in whatever order the user gives them. (The ex 384 * usage statement doesn't reflect this.) 385 */ 386 cflag = lflag = nflag = pflag = rflag = 0; 387 if (s == NULL) 388 goto noargs; 389 for (lno = OOBLNO; *s != '\0'; ++s) 390 switch (*s) { 391 case ' ': 392 case '\t': 393 continue; 394 case '+': 395 ++cmdp->flagoff; 396 break; 397 case '-': 398 --cmdp->flagoff; 399 break; 400 case '0': case '1': case '2': case '3': case '4': 401 case '5': case '6': case '7': case '8': case '9': 402 if (lno != OOBLNO) 403 goto usage; 404 errno = 0; 405 nret = nget_uslong(&ul, s, &s, 10); 406 lno = ul; 407 if (*s == '\0') /* Loop increment correction. */ 408 --s; 409 if (nret != NUM_OK) { 410 if (nret == NUM_OVER) 411 msgq(sp, M_ERR, "153|Count overflow"); 412 else if (nret == NUM_UNDER) 413 msgq(sp, M_ERR, "154|Count underflow"); 414 else 415 msgq(sp, M_SYSERR, NULL); 416 return (1); 417 } 418 /* 419 * In historic vi, the count was inclusive from the 420 * second address. 421 */ 422 cmdp->addr1.lno = cmdp->addr2.lno; 423 cmdp->addr2.lno += lno - 1; 424 if (!db_exist(sp, cmdp->addr2.lno) && 425 db_last(sp, &cmdp->addr2.lno)) 426 return (1); 427 break; 428 case '#': 429 nflag = 1; 430 break; 431 case 'c': 432 sp->c_suffix = !sp->c_suffix; 433 434 /* Ex text structure initialization. */ 435 if (F_ISSET(sp, SC_EX)) 436 TAILQ_INIT(tiq); 437 break; 438 case 'g': 439 sp->g_suffix = !sp->g_suffix; 440 break; 441 case 'l': 442 lflag = 1; 443 break; 444 case 'p': 445 pflag = 1; 446 break; 447 case 'r': 448 if (LF_ISSET(SUB_FIRST)) { 449 msgq(sp, M_ERR, 450 "155|Regular expression specified; r flag meaningless"); 451 return (1); 452 } 453 if (!F_ISSET(sp, SC_RE_SEARCH)) { 454 ex_emsg(sp, NULL, EXM_NOPREVRE); 455 return (1); 456 } 457 rflag = 1; 458 re = &sp->re_c; 459 break; 460 default: 461 goto usage; 462 } 463 464 if (*s != '\0' || (!rflag && LF_ISSET(SUB_MUSTSETR))) { 465 usage: ex_emsg(sp, cmdp->cmd->usage, EXM_USAGE); 466 return (1); 467 } 468 469 noargs: if (F_ISSET(sp, SC_VI) && sp->c_suffix && (lflag || nflag || pflag)) { 470 msgq(sp, M_ERR, 471 "156|The #, l and p flags may not be combined with the c flag in vi mode"); 472 return (1); 473 } 474 475 /* 476 * bp: if interactive, line cache 477 * blen: if interactive, line cache length 478 * lb: build buffer pointer. 479 * lbclen: current length of built buffer. 480 * lblen; length of build buffer. 481 */ 482 bp = lb = NULL; 483 blen = lbclen = lblen = 0; 484 485 /* For each line... */ 486 lno = cmdp->addr1.lno == 0 ? 1 : cmdp->addr1.lno; 487 for (matched = quit = 0, 488 elno = cmdp->addr2.lno; !quit && lno <= elno; ++lno) { 489 490 /* Someone's unhappy, time to stop. */ 491 if (INTERRUPTED(sp)) 492 break; 493 494 /* Get the line. */ 495 if (db_get(sp, lno, DBG_FATAL, &s, &llen)) 496 goto err; 497 498 /* 499 * Make a local copy if doing confirmation -- when calling 500 * the confirm routine we're likely to lose the cached copy. 501 */ 502 if (sp->c_suffix) { 503 if (bp == NULL) { 504 GET_SPACE_RETW(sp, bp, blen, llen); 505 } else 506 ADD_SPACE_RETW(sp, bp, blen, llen); 507 MEMCPY(bp, s, llen); 508 s = bp; 509 } 510 511 /* Start searching from the beginning. */ 512 offset = 0; 513 len = llen; 514 515 /* Reset the build buffer offset. */ 516 lbclen = 0; 517 518 /* Reset empty match flag. */ 519 empty_ok = 1; 520 521 /* 522 * We don't want to have to do a setline if the line didn't 523 * change -- keep track of whether or not this line changed. 524 * If doing confirmations, don't want to keep setting the 525 * line if change is refused -- keep track of substitutions. 526 */ 527 didsub = linechanged = 0; 528 529 /* New line, do an EOL match. */ 530 do_eol_match = 1; 531 532 /* It's not nul terminated, but we pretend it is. */ 533 eflags = REG_STARTEND; 534 535 /* 536 * The search area is from s + offset to the EOL. 537 * 538 * Generally, match[0].rm_so is the offset of the start 539 * of the match from the start of the search, and offset 540 * is the offset of the start of the last search. 541 */ 542 nextmatch: match[0].rm_so = 0; 543 match[0].rm_eo = len; 544 545 /* Get the next match. */ 546 eval = regexec(re, s + offset, 10, match, eflags); 547 548 /* 549 * There wasn't a match or if there was an error, deal with 550 * it. If there was a previous match in this line, resolve 551 * the changes into the database. Otherwise, just move on. 552 */ 553 if (eval == REG_NOMATCH) 554 goto endmatch; 555 if (eval != 0) { 556 re_error(sp, eval, re); 557 goto err; 558 } 559 matched = 1; 560 561 /* Only the first search can match an anchored expression. */ 562 eflags |= REG_NOTBOL; 563 564 /* 565 * !!! 566 * It's possible to match 0-length strings -- for example, the 567 * command s;a*;X;, when matched against the string "aabb" will 568 * result in "XbXbX", i.e. the matches are "aa", the space 569 * between the b's and the space between the b's and the end of 570 * the string. There is a similar space between the beginning 571 * of the string and the a's. The rule that we use (because vi 572 * historically used it) is that any 0-length match, occurring 573 * immediately after a match, is ignored. Otherwise, the above 574 * example would have resulted in "XXbXbX". Another example is 575 * incorrectly using " *" to replace groups of spaces with one 576 * space. 577 * 578 * The way we do this is that if we just had a successful match, 579 * the starting offset does not skip characters, and the match 580 * is empty, ignore the match and move forward. If there's no 581 * more characters in the string, we were attempting to match 582 * after the last character, so quit. 583 */ 584 if (!empty_ok && match[0].rm_so == 0 && match[0].rm_eo == 0) { 585 empty_ok = 1; 586 if (len == 0) 587 goto endmatch; 588 BUILD(sp, s + offset, 1); 589 ++offset; 590 --len; 591 goto nextmatch; 592 } 593 594 /* Confirm change. */ 595 if (sp->c_suffix) { 596 /* 597 * Set the cursor position for confirmation. Note, 598 * if we matched on a '$', the cursor may be past 599 * the end of line. 600 */ 601 from.lno = to.lno = lno; 602 from.cno = match[0].rm_so + offset; 603 to.cno = match[0].rm_eo + offset; 604 /* 605 * Both ex and vi have to correct for a change before 606 * the first character in the line. 607 */ 608 if (llen == 0) 609 from.cno = to.cno = 0; 610 if (F_ISSET(sp, SC_VI)) { 611 /* 612 * Only vi has to correct for a change after 613 * the last character in the line. 614 * 615 * XXX 616 * It would be nice to change the vi code so 617 * that we could display a cursor past EOL. 618 */ 619 if (to.cno >= llen) 620 to.cno = llen - 1; 621 if (from.cno >= llen) 622 from.cno = llen - 1; 623 624 sp->lno = from.lno; 625 sp->cno = from.cno; 626 if (vs_refresh(sp, 1)) 627 goto err; 628 629 vs_update(sp, msg_cat(sp, 630 "169|Confirm change? [n]", NULL), NULL); 631 632 if (v_event_get(sp, &ev, 0, 0)) 633 goto err; 634 switch (ev.e_event) { 635 case E_CHARACTER: 636 break; 637 case E_EOF: 638 case E_ERR: 639 case E_INTERRUPT: 640 goto lquit; 641 default: 642 v_event_err(sp, &ev); 643 goto lquit; 644 } 645 } else { 646 const int flags = 647 O_ISSET(sp, O_NUMBER) ? E_C_HASH : 0; 648 if (ex_print(sp, cmdp, &from, &to, flags) || 649 ex_scprint(sp, &from, &to)) 650 goto lquit; 651 if (ex_txt(sp, tiq, 0, TXT_CR)) 652 goto err; 653 ev.e_c = TAILQ_FIRST(tiq)->lb[0]; 654 } 655 656 switch (ev.e_c) { 657 case CH_YES: 658 break; 659 default: 660 case CH_NO: 661 didsub = 0; 662 BUILD(sp, s +offset, match[0].rm_eo); 663 goto skip; 664 case CH_QUIT: 665 /* Set the quit/interrupted flags. */ 666 lquit: quit = 1; 667 F_SET(sp->gp, G_INTERRUPTED); 668 669 /* 670 * Resolve any changes, then return to (and 671 * exit from) the main loop. 672 */ 673 goto endmatch; 674 } 675 } 676 677 /* 678 * Set the cursor to the last position changed, converting 679 * from 1-based to 0-based. 680 */ 681 sp->lno = lno; 682 sp->cno = match[0].rm_so; 683 684 /* Copy the bytes before the match into the build buffer. */ 685 BUILD(sp, s + offset, match[0].rm_so); 686 687 /* Substitute the matching bytes. */ 688 didsub = 1; 689 if (re_sub(sp, s + offset, &lb, &lbclen, &lblen, match)) 690 goto err; 691 692 /* Set the change flag so we know this line was modified. */ 693 linechanged = 1; 694 695 /* Move past the matched bytes. */ 696 skip: offset += match[0].rm_eo; 697 len -= match[0].rm_eo; 698 699 /* A match cannot be followed by an empty pattern. */ 700 empty_ok = 0; 701 702 /* 703 * If doing a global change with confirmation, we have to 704 * update the screen. The basic idea is to store the line 705 * so the screen update routines can find it, and restart. 706 */ 707 if (didsub && sp->c_suffix && sp->g_suffix) { 708 /* 709 * The new search offset will be the end of the 710 * modified line. 711 */ 712 saved_offset = lbclen; 713 714 /* Copy the rest of the line. */ 715 if (len) 716 BUILD(sp, s + offset, len); 717 718 /* Set the new offset. */ 719 offset = saved_offset; 720 721 /* Store inserted lines, adjusting the build buffer. */ 722 last = 0; 723 if (sp->newl_cnt) { 724 for (cnt = 0; 725 cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) { 726 if (db_insert(sp, lno, 727 lb + last, sp->newl[cnt] - last)) 728 goto err; 729 last = sp->newl[cnt] + 1; 730 ++sp->rptlines[L_ADDED]; 731 } 732 lbclen -= last; 733 offset -= last; 734 sp->newl_cnt = 0; 735 } 736 737 /* Store and retrieve the line. */ 738 if (db_set(sp, lno, lb + last, lbclen)) 739 goto err; 740 if (db_get(sp, lno, DBG_FATAL, &s, &llen)) 741 goto err; 742 ADD_SPACE_RETW(sp, bp, blen, llen); 743 MEMCPY(bp, s, llen); 744 s = bp; 745 len = llen - offset; 746 747 /* Restart the build. */ 748 lbclen = 0; 749 BUILD(sp, s, offset); 750 751 /* 752 * If we haven't already done the after-the-string 753 * match, do one. Set REG_NOTEOL so the '$' pattern 754 * only matches once. 755 */ 756 if (!do_eol_match) 757 goto endmatch; 758 if (offset == len) { 759 do_eol_match = 0; 760 eflags |= REG_NOTEOL; 761 } 762 goto nextmatch; 763 } 764 765 /* 766 * If it's a global: 767 * 768 * If at the end of the string, do a test for the after 769 * the string match. Set REG_NOTEOL so the '$' pattern 770 * only matches once. 771 */ 772 if (sp->g_suffix && do_eol_match) { 773 if (len == 0) { 774 do_eol_match = 0; 775 eflags |= REG_NOTEOL; 776 } 777 goto nextmatch; 778 } 779 780 endmatch: if (!linechanged) 781 continue; 782 783 /* Copy any remaining bytes into the build buffer. */ 784 if (len) 785 BUILD(sp, s + offset, len); 786 787 /* Store inserted lines, adjusting the build buffer. */ 788 last = 0; 789 if (sp->newl_cnt) { 790 for (cnt = 0; 791 cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) { 792 if (db_insert(sp, 793 lno, lb + last, sp->newl[cnt] - last)) 794 goto err; 795 last = sp->newl[cnt] + 1; 796 ++sp->rptlines[L_ADDED]; 797 } 798 lbclen -= last; 799 sp->newl_cnt = 0; 800 } 801 802 /* Store the changed line. */ 803 if (db_set(sp, lno, lb + last, lbclen)) 804 goto err; 805 806 /* Update changed line counter. */ 807 if (sp->rptlchange != lno) { 808 sp->rptlchange = lno; 809 ++sp->rptlines[L_CHANGED]; 810 } 811 812 /* 813 * !!! 814 * Display as necessary. Historic practice is to only 815 * display the last line of a line split into multiple 816 * lines. 817 */ 818 if (lflag || nflag || pflag) { 819 from.lno = to.lno = lno; 820 from.cno = to.cno = 0; 821 if (lflag) 822 (void)ex_print(sp, cmdp, &from, &to, E_C_LIST); 823 if (nflag) 824 (void)ex_print(sp, cmdp, &from, &to, E_C_HASH); 825 if (pflag) 826 (void)ex_print(sp, cmdp, &from, &to, E_C_PRINT); 827 } 828 } 829 830 /* 831 * !!! 832 * Historically, vi attempted to leave the cursor at the same place if 833 * the substitution was done at the current cursor position. Otherwise 834 * it moved it to the first non-blank of the last line changed. There 835 * were some problems: for example, :s/$/foo/ with the cursor on the 836 * last character of the line left the cursor on the last character, or 837 * the & command with multiple occurrences of the matching string in the 838 * line usually left the cursor in a fairly random position. 839 * 840 * We try to do the same thing, with the exception that if the user is 841 * doing substitution with confirmation, we move to the last line about 842 * which the user was consulted, as opposed to the last line that they 843 * actually changed. This prevents a screen flash if the user doesn't 844 * change many of the possible lines. 845 */ 846 if (!sp->c_suffix && (sp->lno != slno || sp->cno != scno)) { 847 sp->cno = 0; 848 (void)nonblank(sp, sp->lno, &sp->cno); 849 } 850 851 /* 852 * If not in a global command, and nothing matched, say so. 853 * Else, if none of the lines displayed, put something up. 854 */ 855 rval = 0; 856 if (!matched) { 857 if (!F_ISSET(sp, SC_EX_GLOBAL)) { 858 msgq(sp, M_ERR, "157|No match found"); 859 goto err; 860 } 861 } else if (!lflag && !nflag && !pflag) 862 F_SET(cmdp, E_AUTOPRINT); 863 864 if (0) { 865 err: rval = 1; 866 } 867 868 if (bp != NULL) 869 FREE_SPACEW(sp, bp, blen); 870 free(lb); 871 return (rval); 872 } 873 874 /* 875 * re_compile -- 876 * Compile the RE. 877 * 878 * PUBLIC: int re_compile(SCR *, 879 * PUBLIC: CHAR_T *, size_t, CHAR_T **, size_t *, regex_t *, u_int); 880 */ 881 int 882 re_compile(SCR *sp, CHAR_T *ptrn, size_t plen, CHAR_T **ptrnp, size_t *lenp, regex_t *rep, u_int flags) 883 { 884 size_t len; 885 int reflags, replaced, rval; 886 CHAR_T *p; 887 888 /* Set RE flags. */ 889 reflags = 0; 890 if (!LF_ISSET(RE_C_CSCOPE | RE_C_TAG)) { 891 if (O_ISSET(sp, O_EXTENDED)) 892 reflags |= REG_EXTENDED; 893 if (O_ISSET(sp, O_IGNORECASE)) 894 reflags |= REG_ICASE; 895 if (O_ISSET(sp, O_ICLOWER)) { 896 for (p = ptrn, len = plen; len > 0; ++p, --len) 897 if (ISUPPER(*p)) 898 break; 899 if (len == 0) 900 reflags |= REG_ICASE; 901 } 902 } 903 904 /* If we're replacing a saved value, clear the old one. */ 905 if (LF_ISSET(RE_C_SEARCH) && F_ISSET(sp, SC_RE_SEARCH)) { 906 regfree(&sp->re_c); 907 F_CLR(sp, SC_RE_SEARCH); 908 } 909 if (LF_ISSET(RE_C_SUBST) && F_ISSET(sp, SC_RE_SUBST)) { 910 regfree(&sp->subre_c); 911 F_CLR(sp, SC_RE_SUBST); 912 } 913 914 /* 915 * If we're saving the string, it's a pattern we haven't seen before, 916 * so convert the vi-style RE's to POSIX 1003.2 RE's. Save a copy for 917 * later recompilation. Free any previously saved value. 918 */ 919 if (ptrnp != NULL) { 920 replaced = 0; 921 if (LF_ISSET(RE_C_CSCOPE)) { 922 if (re_cscope_conv(sp, &ptrn, &plen, &replaced)) 923 return (1); 924 /* 925 * XXX 926 * Currently, the match-any-<blank> expression used in 927 * re_cscope_conv() requires extended RE's. This may 928 * not be right or safe. 929 */ 930 reflags |= REG_EXTENDED; 931 } else if (LF_ISSET(RE_C_TAG)) { 932 if (re_tag_conv(sp, &ptrn, &plen, &replaced)) 933 return (1); 934 } else 935 if (re_conv(sp, &ptrn, &plen, &replaced)) 936 return (1); 937 938 /* Discard previous pattern. */ 939 free(*ptrnp); 940 *ptrnp = NULL; 941 942 if (lenp != NULL) 943 *lenp = plen; 944 945 /* 946 * Copy the string into allocated memory. 947 * 948 * XXX 949 * Regcomp isn't 8-bit clean, so the pattern is nul-terminated 950 * for now. There's just no other solution. 951 */ 952 MALLOC(sp, *ptrnp, (plen + 1) * sizeof(CHAR_T)); 953 if (*ptrnp != NULL) { 954 MEMCPY(*ptrnp, ptrn, plen); 955 (*ptrnp)[plen] = '\0'; 956 } 957 958 /* Free up conversion-routine-allocated memory. */ 959 if (replaced) 960 FREE_SPACEW(sp, ptrn, 0); 961 962 if (*ptrnp == NULL) 963 return (1); 964 965 ptrn = *ptrnp; 966 } 967 968 /* 969 * XXX 970 * Regcomp isn't 8-bit clean, so we just lost if the pattern 971 * contained a nul. Bummer! 972 */ 973 if ((rval = regcomp(rep, ptrn, /* plen, */ reflags)) != 0) { 974 if (!LF_ISSET(RE_C_SILENT)) 975 re_error(sp, rval, rep); 976 return (1); 977 } 978 979 if (LF_ISSET(RE_C_SEARCH)) 980 F_SET(sp, SC_RE_SEARCH); 981 if (LF_ISSET(RE_C_SUBST)) 982 F_SET(sp, SC_RE_SUBST); 983 984 return (0); 985 } 986 987 /* 988 * re_conv -- 989 * Convert vi's regular expressions into something that the 990 * the POSIX 1003.2 RE functions can handle. 991 * 992 * There are three conversions we make to make vi's RE's (specifically 993 * the global, search, and substitute patterns) work with POSIX RE's. 994 * 995 * 1: If O_MAGIC is not set, strip backslashes from the magic character 996 * set (.[*~) that have them, and add them to the ones that don't. 997 * 2: If O_MAGIC is not set, the string "\~" is replaced with the text 998 * from the last substitute command's replacement string. If O_MAGIC 999 * is set, it's the string "~". 1000 * 3: The pattern \<ptrn\> does "word" searches, convert it to use the 1001 * new RE escapes. 1002 * 1003 * !!!/XXX 1004 * This doesn't exactly match the historic behavior of vi because we do 1005 * the ~ substitution before calling the RE engine, so magic characters 1006 * in the replacement string will be expanded by the RE engine, and they 1007 * weren't historically. It's a bug. 1008 */ 1009 static int 1010 re_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp) 1011 { 1012 size_t blen, len, needlen; 1013 int magic; 1014 CHAR_T *bp, *p, *t; 1015 1016 /* 1017 * First pass through, we figure out how much space we'll need. 1018 * We do it in two passes, on the grounds that most of the time 1019 * the user is doing a search and won't have magic characters. 1020 * That way we can skip most of the memory allocation and copies. 1021 */ 1022 magic = 0; 1023 for (p = *ptrnp, len = *plenp, needlen = 0; len > 0; ++p, --len) 1024 switch (*p) { 1025 case '\\': 1026 if (len > 1) { 1027 --len; 1028 switch (*++p) { 1029 case '<': 1030 magic = 1; 1031 needlen += RE_WSTART_LEN + 1; 1032 break; 1033 case '>': 1034 magic = 1; 1035 needlen += RE_WSTOP_LEN + 1; 1036 break; 1037 case '~': 1038 if (!O_ISSET(sp, O_MAGIC)) { 1039 magic = 1; 1040 needlen += sp->repl_len; 1041 } 1042 break; 1043 case '.': 1044 case '[': 1045 case '*': 1046 if (!O_ISSET(sp, O_MAGIC)) { 1047 magic = 1; 1048 needlen += 1; 1049 } 1050 break; 1051 default: 1052 needlen += 2; 1053 } 1054 } else 1055 needlen += 1; 1056 break; 1057 case '~': 1058 if (O_ISSET(sp, O_MAGIC)) { 1059 magic = 1; 1060 needlen += sp->repl_len; 1061 } 1062 break; 1063 case '.': 1064 case '[': 1065 case '*': 1066 if (!O_ISSET(sp, O_MAGIC)) { 1067 magic = 1; 1068 needlen += 2; 1069 } 1070 break; 1071 default: 1072 needlen += 1; 1073 break; 1074 } 1075 1076 if (!magic) { 1077 *replacedp = 0; 1078 return (0); 1079 } 1080 1081 /* Get enough memory to hold the final pattern. */ 1082 *replacedp = 1; 1083 GET_SPACE_RETW(sp, bp, blen, needlen); 1084 1085 for (p = *ptrnp, len = *plenp, t = bp; len > 0; ++p, --len) 1086 switch (*p) { 1087 case '\\': 1088 if (len > 1) { 1089 --len; 1090 switch (*++p) { 1091 case '<': 1092 MEMCPY(t, 1093 RE_WSTART, RE_WSTART_LEN); 1094 t += RE_WSTART_LEN; 1095 break; 1096 case '>': 1097 MEMCPY(t, 1098 RE_WSTOP, RE_WSTOP_LEN); 1099 t += RE_WSTOP_LEN; 1100 break; 1101 case '~': 1102 if (O_ISSET(sp, O_MAGIC)) 1103 *t++ = '~'; 1104 else { 1105 MEMCPY(t, 1106 sp->repl, sp->repl_len); 1107 t += sp->repl_len; 1108 } 1109 break; 1110 case '.': 1111 case '[': 1112 case '*': 1113 if (O_ISSET(sp, O_MAGIC)) 1114 *t++ = '\\'; 1115 *t++ = *p; 1116 break; 1117 default: 1118 *t++ = '\\'; 1119 *t++ = *p; 1120 } 1121 } else 1122 *t++ = '\\'; 1123 break; 1124 case '~': 1125 if (O_ISSET(sp, O_MAGIC)) { 1126 MEMCPY(t, sp->repl, sp->repl_len); 1127 t += sp->repl_len; 1128 } else 1129 *t++ = '~'; 1130 break; 1131 case '.': 1132 case '[': 1133 case '*': 1134 if (!O_ISSET(sp, O_MAGIC)) 1135 *t++ = '\\'; 1136 *t++ = *p; 1137 break; 1138 default: 1139 *t++ = *p; 1140 break; 1141 } 1142 1143 *ptrnp = bp; 1144 *plenp = t - bp; 1145 return (0); 1146 } 1147 1148 /* 1149 * re_tag_conv -- 1150 * Convert a tags search path into something that the POSIX 1151 * 1003.2 RE functions can handle. 1152 */ 1153 static int 1154 re_tag_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp) 1155 { 1156 size_t blen, len; 1157 int lastdollar; 1158 CHAR_T *bp, *p, *t; 1159 1160 len = *plenp; 1161 1162 /* Max memory usage is 2 times the length of the string. */ 1163 *replacedp = 1; 1164 GET_SPACE_RETW(sp, bp, blen, len * 2); 1165 1166 p = *ptrnp; 1167 t = bp; 1168 1169 /* If the last character is a '/' or '?', we just strip it. */ 1170 if (len > 0 && (p[len - 1] == '/' || p[len - 1] == '?')) 1171 --len; 1172 1173 /* If the next-to-last or last character is a '$', it's magic. */ 1174 if (len > 0 && p[len - 1] == '$') { 1175 --len; 1176 lastdollar = 1; 1177 } else 1178 lastdollar = 0; 1179 1180 /* If the first character is a '/' or '?', we just strip it. */ 1181 if (len > 0 && (p[0] == '/' || p[0] == '?')) { 1182 ++p; 1183 --len; 1184 } 1185 1186 /* If the first or second character is a '^', it's magic. */ 1187 if (p[0] == '^') { 1188 *t++ = *p++; 1189 --len; 1190 } 1191 1192 /* 1193 * Escape every other magic character we can find, meanwhile stripping 1194 * the backslashes ctags inserts when escaping the search delimiter 1195 * characters. 1196 */ 1197 for (; len > 0; --len) { 1198 if (p[0] == '\\' && (p[1] == '/' || p[1] == '?')) { 1199 ++p; 1200 if (len > 1) 1201 --len; 1202 } else if (STRCHR(L("^.[]$*"), p[0])) 1203 *t++ = '\\'; 1204 *t++ = *p++; 1205 } 1206 if (lastdollar) 1207 *t++ = '$'; 1208 1209 *ptrnp = bp; 1210 *plenp = t - bp; 1211 return (0); 1212 } 1213 1214 /* 1215 * re_cscope_conv -- 1216 * Convert a cscope search path into something that the POSIX 1217 * 1003.2 RE functions can handle. 1218 */ 1219 static int 1220 re_cscope_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp) 1221 { 1222 size_t blen, len, nspaces; 1223 CHAR_T *bp, *t; 1224 CHAR_T *p; 1225 CHAR_T *wp; 1226 size_t wlen; 1227 1228 /* 1229 * Each space in the source line printed by cscope represents an 1230 * arbitrary sequence of spaces, tabs, and comments. 1231 */ 1232 #define CSCOPE_RE_SPACE "([ \t]|/\\*([^*]|\\*/)*\\*/)*" 1233 #define CSCOPE_LEN sizeof(CSCOPE_RE_SPACE) - 1 1234 CHAR2INT(sp, CSCOPE_RE_SPACE, CSCOPE_LEN, wp, wlen); 1235 for (nspaces = 0, p = *ptrnp, len = *plenp; len > 0; ++p, --len) 1236 if (*p == ' ') 1237 ++nspaces; 1238 1239 /* 1240 * Allocate plenty of space: 1241 * the string, plus potential escaping characters; 1242 * nspaces + 2 copies of CSCOPE_RE_SPACE; 1243 * ^, $, nul terminator characters. 1244 */ 1245 *replacedp = 1; 1246 len = (p - *ptrnp) * 2 + (nspaces + 2) * sizeof(CSCOPE_RE_SPACE) + 3; 1247 GET_SPACE_RETW(sp, bp, blen, len); 1248 1249 p = *ptrnp; 1250 t = bp; 1251 1252 *t++ = '^'; 1253 MEMCPY(t, wp, wlen); 1254 t += wlen; 1255 1256 for (len = *plenp; len > 0; ++p, --len) 1257 if (*p == ' ') { 1258 MEMCPY(t, wp, wlen); 1259 t += wlen; 1260 } else { 1261 if (STRCHR(L("\\^.[]$*+?()|{}"), *p)) 1262 *t++ = '\\'; 1263 *t++ = *p; 1264 } 1265 1266 MEMCPY(t, wp, wlen); 1267 t += wlen; 1268 *t++ = '$'; 1269 1270 *ptrnp = bp; 1271 *plenp = t - bp; 1272 return (0); 1273 } 1274 1275 /* 1276 * re_error -- 1277 * Report a regular expression error. 1278 * 1279 * PUBLIC: void re_error(SCR *, int, regex_t *); 1280 */ 1281 void 1282 re_error(SCR *sp, int errcode, regex_t *preg) 1283 { 1284 size_t s; 1285 char *oe; 1286 1287 s = regerror(errcode, preg, "", 0); 1288 MALLOC(sp, oe, s); 1289 if (oe != NULL) { 1290 (void)regerror(errcode, preg, oe, s); 1291 msgq(sp, M_ERR, "RE error: %s", oe); 1292 free(oe); 1293 } 1294 } 1295 1296 /* 1297 * re_sub -- 1298 * Do the substitution for a regular expression. 1299 */ 1300 static int 1301 re_sub( 1302 SCR *sp, 1303 CHAR_T *ip, /* Input line. */ 1304 CHAR_T **lbp, 1305 size_t *lbclenp, 1306 size_t *lblenp, 1307 regmatch_t match[10]) 1308 { 1309 enum { C_NOTSET, C_LOWER, C_ONELOWER, C_ONEUPPER, C_UPPER } conv; 1310 size_t lbclen, lblen; /* Local copies. */ 1311 size_t mlen; /* Match length. */ 1312 size_t rpl; /* Remaining replacement length. */ 1313 CHAR_T *rp; /* Replacement pointer. */ 1314 int ch; 1315 int no; /* Match replacement offset. */ 1316 CHAR_T *p, *t; /* Buffer pointers. */ 1317 CHAR_T *lb; /* Local copies. */ 1318 1319 lb = *lbp; /* Get local copies. */ 1320 lbclen = *lbclenp; 1321 lblen = *lblenp; 1322 1323 /* 1324 * QUOTING NOTE: 1325 * 1326 * There are some special sequences that vi provides in the 1327 * replacement patterns. 1328 * & string the RE matched (\& if nomagic set) 1329 * \# n-th regular subexpression 1330 * \E end \U, \L conversion 1331 * \e end \U, \L conversion 1332 * \l convert the next character to lower-case 1333 * \L convert to lower-case, until \E, \e, or end of replacement 1334 * \u convert the next character to upper-case 1335 * \U convert to upper-case, until \E, \e, or end of replacement 1336 * 1337 * Otherwise, since this is the lowest level of replacement, discard 1338 * all escaping characters. This (hopefully) matches historic practice. 1339 */ 1340 #define OUTCH(ch, nltrans) do { \ 1341 ARG_CHAR_T __ch = (ch); \ 1342 e_key_t __value = KEY_VAL(sp, __ch); \ 1343 if (nltrans && (__value == K_CR || __value == K_NL)) { \ 1344 NEEDNEWLINE(sp); \ 1345 sp->newl[sp->newl_cnt++] = lbclen; \ 1346 } else if (conv != C_NOTSET) { \ 1347 switch (conv) { \ 1348 case C_ONELOWER: \ 1349 conv = C_NOTSET; \ 1350 /* FALLTHROUGH */ \ 1351 case C_LOWER: \ 1352 if (ISUPPER(__ch)) \ 1353 __ch = TOLOWER(__ch); \ 1354 break; \ 1355 case C_ONEUPPER: \ 1356 conv = C_NOTSET; \ 1357 /* FALLTHROUGH */ \ 1358 case C_UPPER: \ 1359 if (ISLOWER(__ch)) \ 1360 __ch = TOUPPER(__ch); \ 1361 break; \ 1362 default: \ 1363 abort(); \ 1364 } \ 1365 } \ 1366 NEEDSP(sp, 1, p); \ 1367 *p++ = __ch; \ 1368 ++lbclen; \ 1369 } while (0) 1370 conv = C_NOTSET; 1371 for (rp = sp->repl, rpl = sp->repl_len, p = lb + lbclen; rpl--;) { 1372 switch (ch = *rp++) { 1373 case '&': 1374 if (O_ISSET(sp, O_MAGIC)) { 1375 no = 0; 1376 goto subzero; 1377 } 1378 break; 1379 case '\\': 1380 if (rpl == 0) 1381 break; 1382 --rpl; 1383 switch (ch = *rp) { 1384 case '&': 1385 ++rp; 1386 if (!O_ISSET(sp, O_MAGIC)) { 1387 no = 0; 1388 goto subzero; 1389 } 1390 break; 1391 case '0': case '1': case '2': case '3': case '4': 1392 case '5': case '6': case '7': case '8': case '9': 1393 no = *rp++ - '0'; 1394 subzero: if (match[no].rm_so == -1 || 1395 match[no].rm_eo == -1) 1396 break; 1397 mlen = match[no].rm_eo - match[no].rm_so; 1398 for (t = ip + match[no].rm_so; mlen--; ++t) 1399 OUTCH(*t, 0); 1400 continue; 1401 case 'e': 1402 case 'E': 1403 ++rp; 1404 conv = C_NOTSET; 1405 continue; 1406 case 'l': 1407 ++rp; 1408 conv = C_ONELOWER; 1409 continue; 1410 case 'L': 1411 ++rp; 1412 conv = C_LOWER; 1413 continue; 1414 case 'u': 1415 ++rp; 1416 conv = C_ONEUPPER; 1417 continue; 1418 case 'U': 1419 ++rp; 1420 conv = C_UPPER; 1421 continue; 1422 case '\r': 1423 OUTCH(ch, 0); 1424 continue; 1425 default: 1426 ++rp; 1427 break; 1428 } 1429 } 1430 OUTCH(ch, 1); 1431 } 1432 1433 *lbp = lb; /* Update caller's information. */ 1434 *lbclenp = lbclen; 1435 *lblenp = lblen; 1436 return (0); 1437 } 1438