1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 30 /* Copyright (c) 1981 Regents of the University of California */ 31 32 #include "ex.h" 33 #include "ex_re.h" 34 35 /* from libgen */ 36 char *_compile(const char *, char *, char *, int); 37 38 /* 39 * The compiled-regular-expression storage areas (re, scanre, and subre) 40 * have been changed into dynamically allocated memory areas, in both the 41 * Solaris and XPG4 versions. 42 * 43 * In the Solaris version, which uses the original libgen(3g) compile() 44 * and step() calls, these areas are allocated once, and then data are 45 * copied between them subsequently, as they were in the original 46 * implementation. This is possible because the compiled information is 47 * a self-contained block of bits. 48 * 49 * In the XPG4 version, the expr:compile.o object is linked in as a 50 * simulation of these functions using the new regcomp() and regexec() 51 * functions. The problem here is that the resulting 52 * compiled-regular-expression data contain pointers to other data, which 53 * need to be freed, but only when we are quite sure that we are done 54 * with them - and certainly not before. There was an earlier attempt to 55 * handle these differences, but that effort was flawed. 56 */ 57 58 extern int getchar(); 59 #ifdef XPG4 60 void regex_comp_free(void *); 61 extern size_t regexc_size; /* compile.c: size of regex_comp structure */ 62 #endif /* XPG4 */ 63 64 /* 65 * Global, substitute and regular expressions. 66 * Very similar to ed, with some re extensions and 67 * confirmed substitute. 68 */ 69 void 70 global(k) 71 bool k; 72 { 73 unsigned char *gp; 74 int c; 75 line *a1; 76 unsigned char globuf[GBSIZE], *Cwas; 77 int nlines = lineDOL(); 78 int oinglobal = inglobal; 79 unsigned char *oglobp = globp; 80 char multi[MB_LEN_MAX + 1]; 81 wchar_t wc; 82 int len; 83 84 85 Cwas = Command; 86 /* 87 * States of inglobal: 88 * 0: ordinary - not in a global command. 89 * 1: text coming from some buffer, not tty. 90 * 2: like 1, but the source of the buffer is a global command. 91 * Hence you're only in a global command if inglobal==2. This 92 * strange sounding convention is historically derived from 93 * everybody simulating a global command. 94 */ 95 if (inglobal==2) 96 error(value(vi_TERSE) ? gettext("Global within global") : 97 gettext("Global within global not allowed")); 98 markDOT(); 99 setall(); 100 nonzero(); 101 if (skipend()) 102 error(value(vi_TERSE) ? gettext("Global needs re") : 103 gettext("Missing regular expression for global")); 104 c = getchar(); 105 (void)vi_compile(c, 1); 106 savere(&scanre); 107 gp = globuf; 108 while ((c = peekchar()) != '\n') { 109 if (!isascii(c)) { 110 if (c == EOF) { 111 c = '\n'; 112 ungetchar(c); 113 goto out; 114 } 115 116 mb_copy: 117 if ((len = _mbftowc(multi, &wc, getchar, &peekc)) > 0) { 118 if ((gp + len) >= &globuf[GBSIZE - 2]) 119 error(gettext("Global command too long")); 120 strncpy(gp, multi, len); 121 gp += len; 122 continue; 123 } 124 } 125 126 (void) getchar(); 127 switch (c) { 128 129 case EOF: 130 c = '\n'; 131 ungetchar(c); 132 goto out; 133 134 case '\\': 135 c = peekchar(); 136 if (!isascii(c)) { 137 *gp++ = '\\'; 138 goto mb_copy; 139 } 140 141 (void) getchar(); 142 switch (c) { 143 144 case '\\': 145 ungetchar(c); 146 break; 147 148 case '\n': 149 break; 150 151 default: 152 *gp++ = '\\'; 153 break; 154 } 155 break; 156 } 157 *gp++ = c; 158 if (gp >= &globuf[GBSIZE - 2]) 159 error(gettext("Global command too long")); 160 } 161 162 out: 163 donewline(); 164 *gp++ = c; 165 *gp++ = 0; 166 saveall(); 167 inglobal = 2; 168 for (a1 = one; a1 <= dol; a1++) { 169 *a1 &= ~01; 170 if (a1 >= addr1 && a1 <= addr2 && execute(0, a1) == k) 171 *a1 |= 01; 172 } 173 #ifdef notdef 174 /* 175 * This code is commented out for now. The problem is that we don't 176 * fix up the undo area the way we should. Basically, I think what has 177 * to be done is to copy the undo area down (since we shrunk everything) 178 * and move the various pointers into it down too. I will do this later 179 * when I have time. (Mark, 10-20-80) 180 */ 181 /* 182 * Special case: g/.../d (avoid n^2 algorithm) 183 */ 184 if (globuf[0]=='d' && globuf[1]=='\n' && globuf[2]=='\0') { 185 gdelete(); 186 return; 187 } 188 #endif 189 if (inopen) 190 inopen = -1; 191 /* 192 * Now for each marked line, set dot there and do the commands. 193 * Note the n^2 behavior here for lots of lines matching. 194 * This is really needed: in some cases you could delete lines, 195 * causing a marked line to be moved before a1 and missed if 196 * we didn't restart at zero each time. 197 */ 198 for (a1 = one; a1 <= dol; a1++) { 199 if (*a1 & 01) { 200 *a1 &= ~01; 201 dot = a1; 202 globp = globuf; 203 commands(1, 1); 204 a1 = zero; 205 } 206 } 207 globp = oglobp; 208 inglobal = oinglobal; 209 endline = 1; 210 Command = Cwas; 211 netchHAD(nlines); 212 setlastchar(EOF); 213 if (inopen) { 214 ungetchar(EOF); 215 inopen = 1; 216 } 217 } 218 219 /* 220 * gdelete: delete inside a global command. Handles the 221 * special case g/r.e./d. All lines to be deleted have 222 * already been marked. Squeeze the remaining lines together. 223 * Note that other cases such as g/r.e./p, g/r.e./s/r.e.2/rhs/, 224 * and g/r.e./.,/r.e.2/d are not treated specially. There is no 225 * good reason for this except the question: where to you draw the line? 226 */ 227 void 228 gdelete(void) 229 { 230 line *a1, *a2, *a3; 231 232 a3 = dol; 233 /* find first marked line. can skip all before it */ 234 for (a1=zero; (*a1&01)==0; a1++) 235 if (a1>=a3) 236 return; 237 /* copy down unmarked lines, compacting as we go. */ 238 for (a2=a1+1; a2<=a3;) { 239 if (*a2&01) { 240 a2++; /* line is marked, skip it */ 241 dot = a1; /* dot left after line deletion */ 242 } else 243 *a1++ = *a2++; /* unmarked, copy it */ 244 } 245 dol = a1-1; 246 if (dot>dol) 247 dot = dol; 248 change(); 249 } 250 251 bool cflag; 252 int scount, slines, stotal; 253 254 int 255 substitute(int c) 256 { 257 line *addr; 258 int n; 259 int gsubf, hopcount; 260 261 gsubf = compsub(c); 262 if(FIXUNDO) 263 save12(), undkind = UNDCHANGE; 264 stotal = 0; 265 slines = 0; 266 for (addr = addr1; addr <= addr2; addr++) { 267 scount = hopcount = 0; 268 if (dosubcon(0, addr) == 0) 269 continue; 270 if (gsubf) { 271 /* 272 * The loop can happen from s/\</&/g 273 * but we don't want to break other, reasonable cases. 274 */ 275 hopcount = 0; 276 while (*loc2) { 277 if (++hopcount > sizeof linebuf) 278 error(gettext("substitution loop")); 279 if (dosubcon(1, addr) == 0) 280 break; 281 } 282 } 283 if (scount) { 284 stotal += scount; 285 slines++; 286 putmark(addr); 287 n = append(getsub, addr); 288 addr += n; 289 addr2 += n; 290 } 291 } 292 if (stotal == 0 && !inglobal && !cflag) 293 error(value(vi_TERSE) ? gettext("Fail") : 294 gettext("Substitute pattern match failed")); 295 snote(stotal, slines); 296 return (stotal); 297 } 298 299 int 300 compsub(int ch) 301 { 302 int seof, c, uselastre; 303 static int gsubf; 304 static unsigned char remem[RHSSIZE]; 305 static int remflg = -1; 306 307 if (!value(vi_EDCOMPATIBLE)) 308 gsubf = cflag = 0; 309 uselastre = 0; 310 switch (ch) { 311 312 case 's': 313 (void)skipwh(); 314 seof = getchar(); 315 if (endcmd(seof) || any(seof, "gcr")) { 316 ungetchar(seof); 317 goto redo; 318 } 319 if (isalpha(seof) || isdigit(seof)) 320 error(value(vi_TERSE) ? gettext("Substitute needs re") : 321 gettext("Missing regular expression for substitute")); 322 seof = vi_compile(seof, 1); 323 uselastre = 1; 324 comprhs(seof); 325 gsubf = cflag = 0; 326 break; 327 328 case '~': 329 uselastre = 1; 330 /* FALLTHROUGH */ 331 case '&': 332 redo: 333 if (re == NULL || re->Expbuf[1] == 0) 334 error(value(vi_TERSE) ? gettext("No previous re") : 335 gettext("No previous regular expression")); 336 if (subre == NULL || subre->Expbuf[1] == 0) 337 error(value(vi_TERSE) ? gettext("No previous substitute re") : 338 gettext("No previous substitute to repeat")); 339 break; 340 } 341 for (;;) { 342 c = getchar(); 343 switch (c) { 344 345 case 'g': 346 gsubf = !gsubf; 347 continue; 348 349 case 'c': 350 cflag = !cflag; 351 continue; 352 353 case 'r': 354 uselastre = 1; 355 continue; 356 357 default: 358 ungetchar(c); 359 setcount(); 360 donewline(); 361 if (uselastre) 362 savere(&subre); 363 else 364 resre(subre); 365 366 /* 367 * The % by itself on the right hand side means 368 * that the previous value of the right hand side 369 * should be used. A -1 is used to indicate no 370 * previously remembered search string. 371 */ 372 373 if (rhsbuf[0] == '%' && rhsbuf[1] == 0) 374 if (remflg == -1) 375 error(gettext("No previously remembered string")); 376 else 377 strcpy(rhsbuf, remem); 378 else { 379 strcpy(remem, rhsbuf); 380 remflg = 1; 381 } 382 return (gsubf); 383 } 384 } 385 } 386 387 void 388 comprhs(int seof) 389 { 390 unsigned char *rp, *orp; 391 int c; 392 unsigned char orhsbuf[RHSSIZE]; 393 char multi[MB_LEN_MAX + 1]; 394 int len; 395 wchar_t wc; 396 397 rp = rhsbuf; 398 CP(orhsbuf, rp); 399 for (;;) { 400 c = peekchar(); 401 if (c == seof) { 402 (void) getchar(); 403 break; 404 } 405 406 if (!isascii(c) && c != EOF) { 407 if ((len = _mbftowc(multi, &wc, getchar, &peekc)) > 0) { 408 if ((rp + len) >= &rhsbuf[RHSSIZE - 1]) 409 goto toobig; 410 strncpy(rp, multi, len); 411 rp += len; 412 continue; 413 } 414 } 415 416 (void) getchar(); 417 switch (c) { 418 419 case '\\': 420 c = peekchar(); 421 if (c == EOF) { 422 (void) getchar(); 423 error(gettext("Replacement string ends with \\")); 424 } 425 426 if (!isascii(c)) { 427 *rp++ = '\\'; 428 if ((len = _mbftowc(multi, &wc, getchar, &peekc)) > 0) { 429 if ((rp + len) >= &rhsbuf[RHSSIZE - 1]) 430 goto over_flow; 431 strncpy(rp, multi, len); 432 rp += len; 433 continue; 434 } 435 } 436 437 (void) getchar(); 438 if (value(vi_MAGIC)) { 439 /* 440 * When "magic", \& turns into a plain &, 441 * and all other chars work fine quoted. 442 */ 443 if (c != '&') { 444 if(rp >= &rhsbuf[RHSSIZE - 1]) { 445 *rp=0; 446 error(value(vi_TERSE) ? 447 gettext("Replacement pattern too long") : 448 gettext("Replacement pattern too long - limit 256 characters")); 449 } 450 *rp++ = '\\'; 451 } 452 break; 453 } 454 magic: 455 if (c == '~') { 456 for (orp = orhsbuf; *orp; *rp++ = *orp++) 457 if (rp >= &rhsbuf[RHSSIZE - 1]) 458 goto toobig; 459 continue; 460 } 461 if(rp >= &rhsbuf[RHSSIZE - 1]) { 462 over_flow: 463 *rp=0; 464 error(value(vi_TERSE) ? 465 gettext("Replacement pattern too long") : 466 gettext("Replacement pattern too long - limit 256 characters")); 467 } 468 *rp++ = '\\'; 469 break; 470 471 case '\n': 472 case EOF: 473 if (!(globp && globp[0])) { 474 ungetchar(c); 475 goto endrhs; 476 } 477 /* FALLTHROUGH */ 478 479 case '~': 480 case '&': 481 if (value(vi_MAGIC)) 482 goto magic; 483 break; 484 } 485 if (rp >= &rhsbuf[RHSSIZE - 1]) { 486 toobig: 487 *rp = 0; 488 error(value(vi_TERSE) ? 489 gettext("Replacement pattern too long") : 490 gettext("Replacement pattern too long - limit 256 characters")); 491 } 492 *rp++ = c; 493 } 494 endrhs: 495 *rp++ = 0; 496 } 497 498 int 499 getsub(void) 500 { 501 unsigned char *p; 502 503 if ((p = linebp) == 0) 504 return (EOF); 505 strcLIN(p); 506 linebp = 0; 507 return (0); 508 } 509 510 int 511 dosubcon(bool f, line *a) 512 { 513 514 if (execute(f, a) == 0) 515 return (0); 516 if (confirmed(a)) { 517 dosub(); 518 scount++; 519 } 520 return (1); 521 } 522 523 int 524 confirmed(line *a) 525 { 526 int c, cnt, ch; 527 528 if (cflag == 0) 529 return (1); 530 pofix(); 531 pline(lineno(a)); 532 if (inopen) 533 putchar('\n' | QUOTE); 534 c = lcolumn(loc1); 535 ugo(c, ' '); 536 ugo(lcolumn(loc2) - c, '^'); 537 flush(); 538 cnt = 0; 539 bkup: 540 ch = c = getkey(); 541 again: 542 if (c == '\b') { 543 if ((inopen) 544 && (cnt > 0)) { 545 putchar('\b' | QUOTE); 546 putchar(' '); 547 putchar('\b' | QUOTE), flush(); 548 cnt --; 549 } 550 goto bkup; 551 } 552 if (c == '\r') 553 c = '\n'; 554 if (inopen && MB_CUR_MAX == 1 || c < 0200) { 555 putchar(c); 556 flush(); 557 cnt++; 558 } 559 if (c != '\n' && c != EOF) { 560 c = getkey(); 561 goto again; 562 } 563 noteinp(); 564 return (ch == 'y'); 565 } 566 567 void 568 ugo(int cnt, int with) 569 { 570 571 if (cnt > 0) 572 do 573 putchar(with); 574 while (--cnt > 0); 575 } 576 577 int casecnt; 578 bool destuc; 579 580 void 581 dosub(void) 582 { 583 unsigned char *lp, *sp, *rp; 584 int c; 585 int len; 586 587 lp = linebuf; 588 sp = genbuf; 589 rp = rhsbuf; 590 while (lp < (unsigned char *)loc1) 591 *sp++ = *lp++; 592 casecnt = 0; 593 /* 594 * Caution: depending on the hardware, c will be either sign 595 * extended or not if C"E is set. Thus, on a VAX, c will 596 * be < 0, but on a 3B, c will be >= 128. 597 */ 598 while (c = *rp) { 599 if ((len = mblen((char *)rp, MB_CUR_MAX)) <= 0) 600 len = 1; 601 /* ^V <return> from vi to split lines */ 602 if (c == '\r') 603 c = '\n'; 604 605 if (c == '\\') { 606 rp++; 607 if ((len = mblen((char *)rp, MB_CUR_MAX)) <= 0) 608 len = 1; 609 switch (c = *rp++) { 610 611 case '&': 612 sp = place(sp, loc1, loc2); 613 if (sp == 0) 614 goto ovflo; 615 continue; 616 617 case 'l': 618 casecnt = 1; 619 destuc = 0; 620 continue; 621 622 case 'L': 623 casecnt = LBSIZE; 624 destuc = 0; 625 continue; 626 627 case 'u': 628 casecnt = 1; 629 destuc = 1; 630 continue; 631 632 case 'U': 633 casecnt = LBSIZE; 634 destuc = 1; 635 continue; 636 637 case 'E': 638 case 'e': 639 casecnt = 0; 640 continue; 641 } 642 if(re != NULL && c >= '1' && c < re->Nbra + '1') { 643 sp = place(sp, braslist[c - '1'] , braelist[c - '1']); 644 if (sp == 0) 645 goto ovflo; 646 continue; 647 } 648 rp--; 649 } 650 if (len > 1) { 651 if ((sp + len) >= &genbuf[LBSIZE]) 652 goto ovflo; 653 strncpy(sp, rp, len); 654 } else { 655 if (casecnt) 656 *sp = fixcase(c); 657 else 658 *sp = c; 659 } 660 sp += len; rp += len; 661 if (sp >= &genbuf[LBSIZE]) 662 ovflo: 663 error(value(vi_TERSE) ? gettext("Line overflow") : 664 gettext("Line overflow in substitute")); 665 } 666 lp = (unsigned char *)loc2; 667 loc2 = (char *)(linebuf + (sp - genbuf)); 668 while (*sp++ = *lp++) 669 if (sp >= &genbuf[LBSIZE]) 670 goto ovflo; 671 strcLIN(genbuf); 672 } 673 674 int 675 fixcase(int c) 676 { 677 678 if (casecnt == 0) 679 return (c); 680 casecnt--; 681 if (destuc) { 682 if (islower(c)) 683 c = toupper(c); 684 } else 685 if (isupper(c)) 686 c = tolower(c); 687 return (c); 688 } 689 690 unsigned char * 691 place(sp, l1, l2) 692 unsigned char *sp, *l1, *l2; 693 { 694 695 while (l1 < l2) { 696 *sp++ = fixcase(*l1++); 697 if (sp >= &genbuf[LBSIZE]) 698 return (0); 699 } 700 return (sp); 701 } 702 703 void 704 snote(int total, int nlines) 705 { 706 707 if (!notable(total)) 708 return; 709 if (nlines != 1 && nlines != total) 710 viprintf(mesg(value(vi_TERSE) ? 711 /* 712 * TRANSLATION_NOTE 713 * Reference order of arguments must not 714 * be changed using '%digit$', since vi's 715 * viprintf() does not support it. 716 */ 717 gettext("%d subs on %d lines") : 718 /* 719 * TRANSLATION_NOTE 720 * Reference order of arguments must not 721 * be changed using '%digit$', since vi's 722 * viprintf() does not support it. 723 */ 724 gettext("%d substitutions on %d lines")), 725 total, nlines); 726 else 727 viprintf(mesg(value(vi_TERSE) ? 728 gettext("%d subs") : 729 gettext("%d substitutions")), 730 total); 731 noonl(); 732 flush(); 733 } 734 735 #ifdef XPG4 736 #include <regex.h> 737 738 extern int regcomp_flags; /* use to specify cflags for regcomp() */ 739 #endif /* XPG4 */ 740 741 int 742 vi_compile(int eof, int oknl) 743 { 744 int c; 745 unsigned char *gp, *p1; 746 unsigned char *rhsp; 747 unsigned char rebuf[LBSIZE]; 748 char multi[MB_LEN_MAX + 1]; 749 int len; 750 wchar_t wc; 751 752 #ifdef XPG4 753 /* 754 * reset cflags to plain BRE 755 */ 756 regcomp_flags = 0; 757 #endif /* XPG4 */ 758 759 gp = genbuf; 760 if (isalpha(eof) || isdigit(eof)) 761 error(gettext("Regular expressions cannot be delimited by letters or digits")); 762 if(eof >= 0200 && MB_CUR_MAX > 1) 763 error(gettext("Regular expressions cannot be delimited by multibyte characters")); 764 c = getchar(); 765 if (eof == '\\') 766 switch (c) { 767 768 case '/': 769 case '?': 770 if (scanre == NULL || scanre->Expbuf[1] == 0) 771 error(value(vi_TERSE) ? gettext("No previous scan re") : 772 gettext("No previous scanning regular expression")); 773 resre(scanre); 774 return (c); 775 776 case '&': 777 if (subre == NULL || subre->Expbuf[1] == 0) 778 error(value(vi_TERSE) ? gettext("No previous substitute re") : 779 gettext("No previous substitute regular expression")); 780 resre(subre); 781 return (c); 782 783 default: 784 error(value(vi_TERSE) ? gettext("Badly formed re") : 785 gettext("Regular expression \\ must be followed by / or ?")); 786 } 787 if (c == eof || c == '\n' || c == EOF) { 788 if (re == NULL || re->Expbuf[1] == 0) 789 error(value(vi_TERSE) ? gettext("No previous re") : 790 gettext("No previous regular expression")); 791 if (c == '\n' && oknl == 0) 792 error(value(vi_TERSE) ? gettext("Missing closing delimiter") : 793 gettext("Missing closing delimiter for regular expression")); 794 if (c != eof) 795 ungetchar(c); 796 return (eof); 797 } 798 gp = genbuf; 799 if (c == '^') { 800 *gp++ = c; 801 c = getchar(); 802 } 803 ungetchar(c); 804 for (;;) { 805 c = getchar(); 806 if (c == eof || c == EOF) { 807 if (c == EOF) 808 ungetchar(c); 809 goto out; 810 } 811 if (gp >= &genbuf[LBSIZE - 3]) 812 complex: 813 cerror(value(vi_TERSE) ? 814 (unsigned char *)gettext("Re too complex") : 815 (unsigned char *) 816 gettext("Regular expression too complicated")); 817 818 if (!(isascii(c) || MB_CUR_MAX == 1)) { 819 ungetchar(c); 820 if ((len = _mbftowc(multi, &wc, getchar, &peekc)) >= 1) { 821 if ((gp + len) >= &genbuf[LBSIZE - 3]) 822 goto complex; 823 strncpy(gp, multi, len); 824 gp += len; 825 continue; 826 } 827 (void) getchar(); 828 } 829 830 switch (c) { 831 832 case '\\': 833 c = getchar(); 834 if (!isascii(c)) { 835 ungetchar(c); 836 if ((len = _mbftowc(multi, &wc, getchar, &peekc)) >= 1) { 837 if ((gp + len) >= &genbuf[LBSIZE - 3]) 838 goto complex; 839 *gp++ = '\\'; 840 strncpy(gp, multi, len); 841 gp += len; 842 continue; 843 } 844 (void) getchar(); 845 } 846 847 switch (c) { 848 849 case '<': 850 case '>': 851 case '(': 852 case ')': 853 case '{': 854 case '}': 855 case '$': 856 case '^': 857 case '\\': 858 *gp++ = '\\'; 859 *gp++ = c; 860 continue; 861 862 case 'n': 863 *gp++ = c; 864 continue; 865 } 866 if(c >= '0' && c <= '9') { 867 *gp++ = '\\'; 868 *gp++ = c; 869 continue; 870 } 871 if (value(vi_MAGIC) == 0) 872 magic: 873 switch (c) { 874 875 case '.': 876 *gp++ = '.'; 877 continue; 878 879 case '~': 880 rhsp = rhsbuf; 881 while (*rhsp) { 882 if (!isascii(*rhsp)) { 883 if ((len = mbtowc((wchar_t *)0, (char *)rhsp, MB_CUR_MAX)) > 1) { 884 if ((gp + len) >= &genbuf[LBSIZE-2]) 885 goto complex; 886 strncpy(gp, rhsp, len); 887 rhsp += len; gp += len; 888 continue; 889 } 890 } 891 len = 1; 892 if (*rhsp == '\\') { 893 c = *++rhsp; 894 if (c == '&') 895 cerror(value(vi_TERSE) ? (unsigned char *) 896 gettext("Replacement pattern contains &") : 897 (unsigned char *)gettext("Replacement pattern contains & - cannot use in re")); 898 if (c >= '1' && c <= '9') 899 cerror(value(vi_TERSE) ? (unsigned char *) 900 gettext("Replacement pattern contains \\d") : 901 (unsigned char *) 902 gettext("Replacement pattern contains \\d - cannot use in re")); 903 if ((len = mbtowc((wchar_t *)0, (char *)rhsp, MB_CUR_MAX)) <= 1) { 904 len = 1; 905 if(any(c, ".\\*[$")) 906 *gp++ = '\\'; 907 } 908 } 909 910 if ((gp + len) >= &genbuf[LBSIZE-2]) 911 goto complex; 912 if (len == 1) { 913 c = *rhsp++; 914 *gp++ = (value(vi_IGNORECASE) ? tolower(c) : c); 915 } else { 916 strncpy(gp, rhsp, len); 917 gp += len; rhsp += len; 918 } 919 } 920 continue; 921 922 case '*': 923 *gp++ = '*'; 924 continue; 925 926 case '[': 927 *gp++ = '['; 928 c = getchar(); 929 if (c == '^') { 930 *gp++ = '^'; 931 c = getchar(); 932 } 933 934 do { 935 if (!isascii(c) && c != EOF) { 936 ungetchar(c); 937 if ((len = _mbftowc(multi, &wc, getchar, &peekc)) >= 1) { 938 if ((gp + len)>= &genbuf[LBSIZE-4]) 939 goto complex; 940 strncpy(gp, multi, len); 941 gp += len; 942 c = getchar(); 943 continue; 944 } 945 (void) getchar(); 946 } 947 948 if (gp >= &genbuf[LBSIZE-4]) 949 goto complex; 950 if(c == '\\' && peekchar() == ']') { 951 (void)getchar(); 952 *gp++ = '\\'; 953 *gp++ = ']'; 954 } 955 else if (c == '\n' || c == EOF) 956 cerror((unsigned char *) 957 gettext("Missing ]")); 958 else 959 *gp++ = (value(vi_IGNORECASE) ? tolower(c) : c); 960 c = getchar(); 961 } while(c != ']'); 962 *gp++ = ']'; 963 continue; 964 } 965 if (c == EOF) { 966 ungetchar(EOF); 967 *gp++ = '\\'; 968 *gp++ = '\\'; 969 continue; 970 } 971 if (c == '\n') 972 cerror(value(vi_TERSE) ? (unsigned char *)gettext("No newlines in re's") : 973 (unsigned char *)gettext("Can't escape newlines into regular expressions")); 974 *gp++ = '\\'; 975 *gp++ = (value(vi_IGNORECASE) ? tolower(c) : c); 976 continue; 977 978 case '\n': 979 if (oknl) { 980 ungetchar(c); 981 goto out; 982 } 983 cerror(value(vi_TERSE) ? (unsigned char *)gettext("Badly formed re") : 984 (unsigned char *)gettext("Missing closing delimiter for regular expression")); 985 /* FALLTHROUGH */ 986 987 case '.': 988 case '~': 989 case '*': 990 case '[': 991 if (value(vi_MAGIC)) 992 goto magic; 993 if(c != '~') 994 *gp++ = '\\'; 995 /* FALLTHROUGH */ 996 defchar: 997 default: 998 *gp++ = (value(vi_IGNORECASE) ? tolower(c) : c); 999 continue; 1000 } 1001 } 1002 out: 1003 *gp++ = '\0'; 1004 1005 #ifdef XPG4 1006 /* see if our compiled RE's will fit in the re structure: */ 1007 if (regexc_size > EXPSIZ) { 1008 /* 1009 * this should never happen. but it's critical that we 1010 * check here, otherwise .bss would get overwritten. 1011 */ 1012 cerror(value(vi_TERSE) ? (unsigned char *) 1013 gettext("RE's can't fit") : 1014 (unsigned char *)gettext("Regular expressions can't fit")); 1015 return(eof); 1016 } 1017 1018 /* 1019 * We create re each time we need it. 1020 */ 1021 1022 if (re == NULL || re == scanre || re == subre) { 1023 if ((re = calloc(1, sizeof(struct regexp))) == NULL) { 1024 error(gettext("out of memory")); 1025 exit(errcnt); 1026 } 1027 } else { 1028 regex_comp_free(&re->Expbuf); 1029 memset(re, 0, sizeof(struct regexp)); 1030 } 1031 1032 compile((char *) genbuf, (char *) re->Expbuf, (char *) re->Expbuf 1033 + regexc_size); 1034 #else /* !XPG4 */ 1035 (void) _compile((const char *)genbuf, (char *)re->Expbuf, 1036 (char *)(re->Expbuf + sizeof (re->Expbuf)), 1); 1037 #endif /* XPG4 */ 1038 1039 if(regerrno) 1040 switch(regerrno) { 1041 1042 case 42: 1043 cerror((unsigned char *)gettext("\\( \\) Imbalance")); 1044 /* FALLTHROUGH */ 1045 case 43: 1046 cerror(value(vi_TERSE) ? (unsigned char *)gettext("Awash in \\('s!") : 1047 (unsigned char *) 1048 gettext("Too many \\('d subexpressions in a regular expression")); 1049 case 50: 1050 goto complex; 1051 case 67: 1052 cerror(value(vi_TERSE) ? (unsigned char *)gettext("Illegal byte sequence") : 1053 (unsigned char *)gettext("Regular expression has illegal byte sequence")); 1054 } 1055 re->Nbra = nbra; 1056 return(eof); 1057 } 1058 1059 void 1060 cerror(unsigned char *s) 1061 { 1062 if (re) { 1063 re->Expbuf[0] = re->Expbuf[1] = 0; 1064 } 1065 error(s); 1066 } 1067 1068 int 1069 execute(int gf, line *addr) 1070 { 1071 unsigned char *p1, *p2; 1072 char *start; 1073 int c, i; 1074 int ret; 1075 int len; 1076 1077 if (gf) { 1078 if (re == NULL || re->Expbuf[0]) 1079 return (0); 1080 if(value(vi_IGNORECASE)) { 1081 p1 = genbuf; 1082 p2 = (unsigned char *)loc2; 1083 while(c = *p2) { 1084 if ((len = mblen((char *)p2, MB_CUR_MAX)) <= 0) 1085 len = 1; 1086 if (len == 1) { 1087 *p1++ = tolower(c); 1088 p2++; 1089 continue; 1090 } 1091 strncpy(p1, p2, len); 1092 p1 += len; p2 += len; 1093 } 1094 *p1 = '\0'; 1095 locs = (char *)genbuf; 1096 p1 = genbuf; 1097 start = loc2; 1098 } else { 1099 p1 = (unsigned char *)loc2; 1100 locs = loc2; 1101 } 1102 } else { 1103 if (addr == zero) 1104 return (0); 1105 p1 = linebuf; 1106 getaline(*addr); 1107 if(value(vi_IGNORECASE)) { 1108 p1 = genbuf; 1109 p2 = linebuf; 1110 while(c = *p2) { 1111 if ((len = mblen((char *)p2, MB_CUR_MAX)) <= 0) 1112 len = 1; 1113 if (len == 1) { 1114 *p1++ = tolower(c); 1115 p2++; 1116 continue; 1117 } 1118 strncpy(p1, p2, len); 1119 p1 += len; p2 += len; 1120 } 1121 *p1 = '\0'; 1122 p1 = genbuf; 1123 start = (char *)linebuf; 1124 } 1125 locs = (char *)0; 1126 } 1127 1128 ret = step((char *)p1, (char *)re->Expbuf); 1129 1130 if(value(vi_IGNORECASE) && ret) { 1131 loc1 = start + (loc1 - (char *)genbuf); 1132 loc2 = start + (loc2 - (char *)genbuf); 1133 for(i = 0; i < NBRA; i++) { 1134 braslist[i] = start + (braslist[i] - (char *)genbuf); 1135 braelist[i] = start + (braelist[i] - (char *)genbuf); 1136 } 1137 } 1138 return ret; 1139 } 1140 1141 /* 1142 * Initialize the compiled regular-expression storage areas (called from 1143 * main()). 1144 */ 1145 1146 void init_re (void) 1147 { 1148 #ifdef XPG4 1149 re = scanre = subre = NULL; 1150 #else /* !XPG4 */ 1151 if ((re = calloc(1, sizeof(struct regexp))) == NULL) { 1152 error(gettext("out of memory")); 1153 exit(errcnt); 1154 } 1155 1156 if ((scanre = calloc(1, sizeof(struct regexp))) == NULL) { 1157 error(gettext("out of memory")); 1158 exit(errcnt); 1159 } 1160 1161 if ((subre = calloc(1, sizeof(struct regexp))) == NULL) { 1162 error(gettext("out of memory")); 1163 exit(errcnt); 1164 } 1165 #endif /* XPG4 */ 1166 } 1167 1168 /* 1169 * Save what is in the special place re to the named alternate 1170 * location. This means freeing up what's currently in this target 1171 * location, if necessary. 1172 */ 1173 1174 void savere(struct regexp ** a) 1175 { 1176 #ifdef XPG4 1177 if (a == NULL || re == NULL) { 1178 return; 1179 } 1180 1181 if (*a == NULL) { 1182 *a = re; 1183 return; 1184 } 1185 1186 if (*a != re) { 1187 if (scanre != subre) { 1188 regex_comp_free(&((*a)->Expbuf)); 1189 free(*a); 1190 } 1191 *a = re; 1192 } 1193 #else /* !XPG4 */ 1194 memcpy(*a, re, sizeof(struct regexp)); 1195 #endif /* XPG4 */ 1196 } 1197 1198 1199 /* 1200 * Restore what is in the named alternate location to the special place 1201 * re. This means first freeing up what's currently in re, if necessary. 1202 */ 1203 1204 void resre(struct regexp * a) 1205 { 1206 #ifdef XPG4 1207 if (a == NULL) { 1208 return; 1209 } 1210 1211 if (re == NULL) { 1212 re = a; 1213 return; 1214 } 1215 1216 if (a != re) { 1217 if ((re != scanre) && (re != subre)) { 1218 regex_comp_free(&re->Expbuf); 1219 free(re); 1220 } 1221 1222 re = a; 1223 } 1224 #else /* !XPG4 */ 1225 memcpy(re, a, sizeof(struct regexp)); 1226 #endif /* XPG4 */ 1227 } 1228