1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 30 /* Copyright (c) 1981 Regents of the University of California */ 31 32 #include "ex.h" 33 #include "ex_re.h" 34 35 /* from libgen */ 36 char *_compile(const char *, char *, char *, int); 37 38 /* 39 * The compiled-regular-expression storage areas (re, scanre, and subre) 40 * have been changed into dynamically allocated memory areas, in both the 41 * Solaris and XPG4 versions. 42 * 43 * In the Solaris version, which uses the original libgen(3g) compile() 44 * and step() calls, these areas are allocated once, and then data are 45 * copied between them subsequently, as they were in the original 46 * implementation. This is possible because the compiled information is 47 * a self-contained block of bits. 48 * 49 * In the XPG4 version, the expr:compile.o object is linked in as a 50 * simulation of these functions using the new regcomp() and regexec() 51 * functions. The problem here is that the resulting 52 * compiled-regular-expression data contain pointers to other data, which 53 * need to be freed, but only when we are quite sure that we are done 54 * with them - and certainly not before. There was an earlier attempt to 55 * handle these differences, but that effort was flawed. 56 */ 57 58 extern int getchar(); 59 #ifdef XPG4 60 void regex_comp_free(void *); 61 extern size_t regexc_size; /* compile.c: size of regex_comp structure */ 62 #endif /* XPG4 */ 63 64 /* 65 * Global, substitute and regular expressions. 66 * Very similar to ed, with some re extensions and 67 * confirmed substitute. 68 */ 69 void 70 global(k) 71 bool k; 72 { 73 unsigned char *gp; 74 int c; 75 line *a1; 76 unsigned char globuf[GBSIZE], *Cwas; 77 int nlines = lineDOL(); 78 int oinglobal = inglobal; 79 unsigned char *oglobp = globp; 80 char multi[MB_LEN_MAX + 1]; 81 wchar_t wc; 82 int len; 83 84 85 Cwas = Command; 86 /* 87 * States of inglobal: 88 * 0: ordinary - not in a global command. 89 * 1: text coming from some buffer, not tty. 90 * 2: like 1, but the source of the buffer is a global command. 91 * Hence you're only in a global command if inglobal==2. This 92 * strange sounding convention is historically derived from 93 * everybody simulating a global command. 94 */ 95 if (inglobal==2) 96 error(value(vi_TERSE) ? gettext("Global within global") : 97 gettext("Global within global not allowed")); 98 markDOT(); 99 setall(); 100 nonzero(); 101 if (skipend()) 102 error(value(vi_TERSE) ? gettext("Global needs re") : 103 gettext("Missing regular expression for global")); 104 c = getchar(); 105 (void)vi_compile(c, 1); 106 savere(&scanre); 107 gp = globuf; 108 while ((c = peekchar()) != '\n') { 109 if (!isascii(c)) { 110 if (c == EOF) { 111 c = '\n'; 112 ungetchar(c); 113 goto out; 114 } 115 116 mb_copy: 117 if ((len = _mbftowc(multi, &wc, getchar, &peekc)) > 0) { 118 if ((gp + len) >= &globuf[GBSIZE - 2]) 119 error(gettext("Global command too long")); 120 strncpy(gp, multi, len); 121 gp += len; 122 continue; 123 } 124 } 125 126 (void) getchar(); 127 switch (c) { 128 129 case EOF: 130 c = '\n'; 131 ungetchar(c); 132 goto out; 133 134 case '\\': 135 c = peekchar(); 136 if (!isascii(c)) { 137 *gp++ = '\\'; 138 goto mb_copy; 139 } 140 141 (void) getchar(); 142 switch (c) { 143 144 case '\\': 145 ungetchar(c); 146 break; 147 148 case '\n': 149 break; 150 151 default: 152 *gp++ = '\\'; 153 break; 154 } 155 break; 156 } 157 *gp++ = c; 158 if (gp >= &globuf[GBSIZE - 2]) 159 error(gettext("Global command too long")); 160 } 161 162 out: 163 donewline(); 164 *gp++ = c; 165 *gp++ = 0; 166 saveall(); 167 inglobal = 2; 168 for (a1 = one; a1 <= dol; a1++) { 169 *a1 &= ~01; 170 if (a1 >= addr1 && a1 <= addr2 && execute(0, a1) == k) 171 *a1 |= 01; 172 } 173 #ifdef notdef 174 /* 175 * This code is commented out for now. The problem is that we don't 176 * fix up the undo area the way we should. Basically, I think what has 177 * to be done is to copy the undo area down (since we shrunk everything) 178 * and move the various pointers into it down too. I will do this later 179 * when I have time. (Mark, 10-20-80) 180 */ 181 /* 182 * Special case: g/.../d (avoid n^2 algorithm) 183 */ 184 if (globuf[0]=='d' && globuf[1]=='\n' && globuf[2]=='\0') { 185 gdelete(); 186 return; 187 } 188 #endif 189 if (inopen) 190 inopen = -1; 191 /* 192 * Now for each marked line, set dot there and do the commands. 193 * Note the n^2 behavior here for lots of lines matching. 194 * This is really needed: in some cases you could delete lines, 195 * causing a marked line to be moved before a1 and missed if 196 * we didn't restart at zero each time. 197 */ 198 for (a1 = one; a1 <= dol; a1++) { 199 if (*a1 & 01) { 200 *a1 &= ~01; 201 dot = a1; 202 globp = globuf; 203 commands(1, 1); 204 a1 = zero; 205 } 206 } 207 globp = oglobp; 208 inglobal = oinglobal; 209 endline = 1; 210 Command = Cwas; 211 netchHAD(nlines); 212 setlastchar(EOF); 213 if (inopen) { 214 ungetchar(EOF); 215 inopen = 1; 216 } 217 } 218 219 /* 220 * gdelete: delete inside a global command. Handles the 221 * special case g/r.e./d. All lines to be deleted have 222 * already been marked. Squeeze the remaining lines together. 223 * Note that other cases such as g/r.e./p, g/r.e./s/r.e.2/rhs/, 224 * and g/r.e./.,/r.e.2/d are not treated specially. There is no 225 * good reason for this except the question: where to you draw the line? 226 */ 227 void 228 gdelete(void) 229 { 230 line *a1, *a2, *a3; 231 232 a3 = dol; 233 /* find first marked line. can skip all before it */ 234 for (a1=zero; (*a1&01)==0; a1++) 235 if (a1>=a3) 236 return; 237 /* copy down unmarked lines, compacting as we go. */ 238 for (a2=a1+1; a2<=a3;) { 239 if (*a2&01) { 240 a2++; /* line is marked, skip it */ 241 dot = a1; /* dot left after line deletion */ 242 } else 243 *a1++ = *a2++; /* unmarked, copy it */ 244 } 245 dol = a1-1; 246 if (dot>dol) 247 dot = dol; 248 change(); 249 } 250 251 bool cflag; 252 int scount, slines, stotal; 253 254 int 255 substitute(int c) 256 { 257 line *addr; 258 int n; 259 int gsubf, hopcount; 260 261 gsubf = compsub(c); 262 if(FIXUNDO) 263 save12(), undkind = UNDCHANGE; 264 stotal = 0; 265 slines = 0; 266 for (addr = addr1; addr <= addr2; addr++) { 267 scount = hopcount = 0; 268 if (dosubcon(0, addr) == 0) 269 continue; 270 if (gsubf) { 271 /* 272 * The loop can happen from s/\</&/g 273 * but we don't want to break other, reasonable cases. 274 */ 275 hopcount = 0; 276 while (*loc2) { 277 if (++hopcount > sizeof linebuf) 278 error(gettext("substitution loop")); 279 if (dosubcon(1, addr) == 0) 280 break; 281 } 282 } 283 if (scount) { 284 stotal += scount; 285 slines++; 286 putmark(addr); 287 n = append(getsub, addr); 288 addr += n; 289 addr2 += n; 290 } 291 } 292 if (stotal == 0 && !inglobal && !cflag) 293 error(value(vi_TERSE) ? gettext("Fail") : 294 gettext("Substitute pattern match failed")); 295 snote(stotal, slines); 296 return (stotal); 297 } 298 299 int 300 compsub(int ch) 301 { 302 int seof, c, uselastre; 303 static int gsubf; 304 static unsigned char remem[RHSSIZE]; 305 static int remflg = -1; 306 307 if (!value(vi_EDCOMPATIBLE)) 308 gsubf = cflag = 0; 309 uselastre = 0; 310 switch (ch) { 311 312 case 's': 313 (void)skipwh(); 314 seof = getchar(); 315 if (endcmd(seof) || any(seof, "gcr")) { 316 ungetchar(seof); 317 goto redo; 318 } 319 if (isalpha(seof) || isdigit(seof)) 320 error(value(vi_TERSE) ? gettext("Substitute needs re") : 321 gettext("Missing regular expression for substitute")); 322 seof = vi_compile(seof, 1); 323 uselastre = 1; 324 comprhs(seof); 325 gsubf = cflag = 0; 326 break; 327 328 case '~': 329 uselastre = 1; 330 /* fall into ... */ 331 case '&': 332 redo: 333 if (re == NULL || re->Expbuf[1] == 0) 334 error(value(vi_TERSE) ? gettext("No previous re") : 335 gettext("No previous regular expression")); 336 if (subre == NULL || subre->Expbuf[1] == 0) 337 error(value(vi_TERSE) ? gettext("No previous substitute re") : 338 gettext("No previous substitute to repeat")); 339 break; 340 } 341 for (;;) { 342 c = getchar(); 343 switch (c) { 344 345 case 'g': 346 gsubf = !gsubf; 347 continue; 348 349 case 'c': 350 cflag = !cflag; 351 continue; 352 353 case 'r': 354 uselastre = 1; 355 continue; 356 357 default: 358 ungetchar(c); 359 setcount(); 360 donewline(); 361 if (uselastre) 362 savere(&subre); 363 else 364 resre(subre); 365 366 /* 367 * The % by itself on the right hand side means 368 * that the previous value of the right hand side 369 * should be used. A -1 is used to indicate no 370 * previously remembered search string. 371 */ 372 373 if (rhsbuf[0] == '%' && rhsbuf[1] == 0) 374 if (remflg == -1) 375 error(gettext("No previously remembered string")); 376 else 377 strcpy(rhsbuf, remem); 378 else { 379 strcpy(remem, rhsbuf); 380 remflg = 1; 381 } 382 return (gsubf); 383 } 384 } 385 } 386 387 void 388 comprhs(int seof) 389 { 390 unsigned char *rp, *orp; 391 int c; 392 unsigned char orhsbuf[RHSSIZE]; 393 char multi[MB_LEN_MAX + 1]; 394 int len; 395 wchar_t wc; 396 397 rp = rhsbuf; 398 CP(orhsbuf, rp); 399 for (;;) { 400 c = peekchar(); 401 if (c == seof) { 402 (void) getchar(); 403 break; 404 } 405 406 if (!isascii(c) && c != EOF) { 407 if ((len = _mbftowc(multi, &wc, getchar, &peekc)) > 0) { 408 if ((rp + len) >= &rhsbuf[RHSSIZE - 1]) 409 goto toobig; 410 strncpy(rp, multi, len); 411 rp += len; 412 continue; 413 } 414 } 415 416 (void) getchar(); 417 switch (c) { 418 419 case '\\': 420 c = peekchar(); 421 if (c == EOF) { 422 (void) getchar(); 423 error(gettext("Replacement string ends with \\")); 424 } 425 426 if (!isascii(c)) { 427 *rp++ = '\\'; 428 if ((len = _mbftowc(multi, &wc, getchar, &peekc)) > 0) { 429 if ((rp + len) >= &rhsbuf[RHSSIZE - 1]) 430 goto over_flow; 431 strncpy(rp, multi, len); 432 rp += len; 433 continue; 434 } 435 } 436 437 (void) getchar(); 438 if (value(vi_MAGIC)) { 439 /* 440 * When "magic", \& turns into a plain &, 441 * and all other chars work fine quoted. 442 */ 443 if (c != '&') { 444 if(rp >= &rhsbuf[RHSSIZE - 1]) { 445 *rp=0; 446 error(value(vi_TERSE) ? 447 gettext("Replacement pattern too long") : 448 gettext("Replacement pattern too long - limit 256 characters")); 449 } 450 *rp++ = '\\'; 451 } 452 break; 453 } 454 magic: 455 if (c == '~') { 456 for (orp = orhsbuf; *orp; *rp++ = *orp++) 457 if (rp >= &rhsbuf[RHSSIZE - 1]) 458 goto toobig; 459 continue; 460 } 461 if(rp >= &rhsbuf[RHSSIZE - 1]) { 462 over_flow: 463 *rp=0; 464 error(value(vi_TERSE) ? 465 gettext("Replacement pattern too long") : 466 gettext("Replacement pattern too long - limit 256 characters")); 467 } 468 *rp++ = '\\'; 469 break; 470 471 case '\n': 472 case EOF: 473 if (!(globp && globp[0])) { 474 ungetchar(c); 475 goto endrhs; 476 } 477 478 case '~': 479 case '&': 480 if (value(vi_MAGIC)) 481 goto magic; 482 break; 483 } 484 if (rp >= &rhsbuf[RHSSIZE - 1]) { 485 toobig: 486 *rp = 0; 487 error(value(vi_TERSE) ? 488 gettext("Replacement pattern too long") : 489 gettext("Replacement pattern too long - limit 256 characters")); 490 } 491 *rp++ = c; 492 } 493 endrhs: 494 *rp++ = 0; 495 } 496 497 int 498 getsub(void) 499 { 500 unsigned char *p; 501 502 if ((p = linebp) == 0) 503 return (EOF); 504 strcLIN(p); 505 linebp = 0; 506 return (0); 507 } 508 509 int 510 dosubcon(bool f, line *a) 511 { 512 513 if (execute(f, a) == 0) 514 return (0); 515 if (confirmed(a)) { 516 dosub(); 517 scount++; 518 } 519 return (1); 520 } 521 522 int 523 confirmed(line *a) 524 { 525 int c, cnt, ch; 526 527 if (cflag == 0) 528 return (1); 529 pofix(); 530 pline(lineno(a)); 531 if (inopen) 532 putchar('\n' | QUOTE); 533 c = lcolumn(loc1); 534 ugo(c, ' '); 535 ugo(lcolumn(loc2) - c, '^'); 536 flush(); 537 cnt = 0; 538 bkup: 539 ch = c = getkey(); 540 again: 541 if (c == '\b') { 542 if ((inopen) 543 && (cnt > 0)) { 544 putchar('\b' | QUOTE); 545 putchar(' '); 546 putchar('\b' | QUOTE), flush(); 547 cnt --; 548 } 549 goto bkup; 550 } 551 if (c == '\r') 552 c = '\n'; 553 if (inopen && MB_CUR_MAX == 1 || c < 0200) { 554 putchar(c); 555 flush(); 556 cnt++; 557 } 558 if (c != '\n' && c != EOF) { 559 c = getkey(); 560 goto again; 561 } 562 noteinp(); 563 return (ch == 'y'); 564 } 565 566 void 567 ugo(int cnt, int with) 568 { 569 570 if (cnt > 0) 571 do 572 putchar(with); 573 while (--cnt > 0); 574 } 575 576 int casecnt; 577 bool destuc; 578 579 void 580 dosub(void) 581 { 582 unsigned char *lp, *sp, *rp; 583 int c; 584 int len; 585 586 lp = linebuf; 587 sp = genbuf; 588 rp = rhsbuf; 589 while (lp < (unsigned char *)loc1) 590 *sp++ = *lp++; 591 casecnt = 0; 592 /* 593 * Caution: depending on the hardware, c will be either sign 594 * extended or not if C"E is set. Thus, on a VAX, c will 595 * be < 0, but on a 3B, c will be >= 128. 596 */ 597 while (c = *rp) { 598 if ((len = mblen((char *)rp, MB_CUR_MAX)) <= 0) 599 len = 1; 600 /* ^V <return> from vi to split lines */ 601 if (c == '\r') 602 c = '\n'; 603 604 if (c == '\\') { 605 rp++; 606 if ((len = mblen((char *)rp, MB_CUR_MAX)) <= 0) 607 len = 1; 608 switch (c = *rp++) { 609 610 case '&': 611 sp = place(sp, loc1, loc2); 612 if (sp == 0) 613 goto ovflo; 614 continue; 615 616 case 'l': 617 casecnt = 1; 618 destuc = 0; 619 continue; 620 621 case 'L': 622 casecnt = LBSIZE; 623 destuc = 0; 624 continue; 625 626 case 'u': 627 casecnt = 1; 628 destuc = 1; 629 continue; 630 631 case 'U': 632 casecnt = LBSIZE; 633 destuc = 1; 634 continue; 635 636 case 'E': 637 case 'e': 638 casecnt = 0; 639 continue; 640 } 641 if(re != NULL && c >= '1' && c < re->Nbra + '1') { 642 sp = place(sp, braslist[c - '1'] , braelist[c - '1']); 643 if (sp == 0) 644 goto ovflo; 645 continue; 646 } 647 rp--; 648 } 649 if (len > 1) { 650 if ((sp + len) >= &genbuf[LBSIZE]) 651 goto ovflo; 652 strncpy(sp, rp, len); 653 } else { 654 if (casecnt) 655 *sp = fixcase(c); 656 else 657 *sp = c; 658 } 659 sp += len; rp += len; 660 if (sp >= &genbuf[LBSIZE]) 661 ovflo: 662 error(value(vi_TERSE) ? gettext("Line overflow") : 663 gettext("Line overflow in substitute")); 664 } 665 lp = (unsigned char *)loc2; 666 loc2 = (char *)(linebuf + (sp - genbuf)); 667 while (*sp++ = *lp++) 668 if (sp >= &genbuf[LBSIZE]) 669 goto ovflo; 670 strcLIN(genbuf); 671 } 672 673 int 674 fixcase(int c) 675 { 676 677 if (casecnt == 0) 678 return (c); 679 casecnt--; 680 if (destuc) { 681 if (islower(c)) 682 c = toupper(c); 683 } else 684 if (isupper(c)) 685 c = tolower(c); 686 return (c); 687 } 688 689 unsigned char * 690 place(sp, l1, l2) 691 unsigned char *sp, *l1, *l2; 692 { 693 694 while (l1 < l2) { 695 *sp++ = fixcase(*l1++); 696 if (sp >= &genbuf[LBSIZE]) 697 return (0); 698 } 699 return (sp); 700 } 701 702 void 703 snote(int total, int nlines) 704 { 705 706 if (!notable(total)) 707 return; 708 if (nlines != 1 && nlines != total) 709 viprintf(mesg(value(vi_TERSE) ? 710 /* 711 * TRANSLATION_NOTE 712 * Reference order of arguments must not 713 * be changed using '%digit$', since vi's 714 * viprintf() does not support it. 715 */ 716 gettext("%d subs on %d lines") : 717 /* 718 * TRANSLATION_NOTE 719 * Reference order of arguments must not 720 * be changed using '%digit$', since vi's 721 * viprintf() does not support it. 722 */ 723 gettext("%d substitutions on %d lines")), 724 total, nlines); 725 else 726 viprintf(mesg(value(vi_TERSE) ? 727 gettext("%d subs") : 728 gettext("%d substitutions")), 729 total); 730 noonl(); 731 flush(); 732 } 733 734 #ifdef XPG4 735 #include <regex.h> 736 737 extern int regcomp_flags; /* use to specify cflags for regcomp() */ 738 #endif /* XPG4 */ 739 740 int 741 vi_compile(int eof, int oknl) 742 { 743 int c; 744 unsigned char *gp, *p1; 745 unsigned char *rhsp; 746 unsigned char rebuf[LBSIZE]; 747 char multi[MB_LEN_MAX + 1]; 748 int len; 749 wchar_t wc; 750 751 #ifdef XPG4 752 /* 753 * reset cflags to plain BRE 754 * if \< and/or \> is specified, REG_WORDS is set. 755 */ 756 regcomp_flags = 0; 757 #endif /* XPG4 */ 758 759 gp = genbuf; 760 if (isalpha(eof) || isdigit(eof)) 761 error(gettext("Regular expressions cannot be delimited by letters or digits")); 762 if(eof >= 0200 && MB_CUR_MAX > 1) 763 error(gettext("Regular expressions cannot be delimited by multibyte characters")); 764 c = getchar(); 765 if (eof == '\\') 766 switch (c) { 767 768 case '/': 769 case '?': 770 if (scanre == NULL || scanre->Expbuf[1] == 0) 771 error(value(vi_TERSE) ? gettext("No previous scan re") : 772 gettext("No previous scanning regular expression")); 773 resre(scanre); 774 return (c); 775 776 case '&': 777 if (subre == NULL || subre->Expbuf[1] == 0) 778 error(value(vi_TERSE) ? gettext("No previous substitute re") : 779 gettext("No previous substitute regular expression")); 780 resre(subre); 781 return (c); 782 783 default: 784 error(value(vi_TERSE) ? gettext("Badly formed re") : 785 gettext("Regular expression \\ must be followed by / or ?")); 786 } 787 if (c == eof || c == '\n' || c == EOF) { 788 if (re == NULL || re->Expbuf[1] == 0) 789 error(value(vi_TERSE) ? gettext("No previous re") : 790 gettext("No previous regular expression")); 791 if (c == '\n' && oknl == 0) 792 error(value(vi_TERSE) ? gettext("Missing closing delimiter") : 793 gettext("Missing closing delimiter for regular expression")); 794 if (c != eof) 795 ungetchar(c); 796 return (eof); 797 } 798 gp = genbuf; 799 if (c == '^') { 800 *gp++ = c; 801 c = getchar(); 802 } 803 ungetchar(c); 804 for (;;) { 805 c = getchar(); 806 if (c == eof || c == EOF) { 807 if (c == EOF) 808 ungetchar(c); 809 goto out; 810 } 811 if (gp >= &genbuf[LBSIZE - 3]) 812 complex: 813 cerror(value(vi_TERSE) ? 814 (unsigned char *)gettext("Re too complex") : 815 (unsigned char *) 816 gettext("Regular expression too complicated")); 817 818 if (!(isascii(c) || MB_CUR_MAX == 1)) { 819 ungetchar(c); 820 if ((len = _mbftowc(multi, &wc, getchar, &peekc)) >= 1) { 821 if ((gp + len) >= &genbuf[LBSIZE - 3]) 822 goto complex; 823 strncpy(gp, multi, len); 824 gp += len; 825 continue; 826 } 827 (void) getchar(); 828 } 829 830 switch (c) { 831 832 case '\\': 833 c = getchar(); 834 if (!isascii(c)) { 835 ungetchar(c); 836 if ((len = _mbftowc(multi, &wc, getchar, &peekc)) >= 1) { 837 if ((gp + len) >= &genbuf[LBSIZE - 3]) 838 goto complex; 839 *gp++ = '\\'; 840 strncpy(gp, multi, len); 841 gp += len; 842 continue; 843 } 844 (void) getchar(); 845 } 846 847 switch (c) { 848 849 case '<': 850 case '>': 851 #ifdef XPG4 852 regcomp_flags = REG_WORDS; 853 /*FALLTHRU*/ 854 #endif /* XPG4 */ 855 case '(': 856 case ')': 857 case '{': 858 case '}': 859 case '$': 860 case '^': 861 case '\\': 862 *gp++ = '\\'; 863 *gp++ = c; 864 continue; 865 866 case 'n': 867 *gp++ = c; 868 continue; 869 } 870 if(c >= '0' && c <= '9') { 871 *gp++ = '\\'; 872 *gp++ = c; 873 continue; 874 } 875 if (value(vi_MAGIC) == 0) 876 magic: 877 switch (c) { 878 879 case '.': 880 *gp++ = '.'; 881 continue; 882 883 case '~': 884 rhsp = rhsbuf; 885 while (*rhsp) { 886 if (!isascii(*rhsp)) { 887 if ((len = mbtowc((wchar_t *)0, (char *)rhsp, MB_CUR_MAX)) > 1) { 888 if ((gp + len) >= &genbuf[LBSIZE-2]) 889 goto complex; 890 strncpy(gp, rhsp, len); 891 rhsp += len; gp += len; 892 continue; 893 } 894 } 895 len = 1; 896 if (*rhsp == '\\') { 897 c = *++rhsp; 898 if (c == '&') 899 cerror(value(vi_TERSE) ? (unsigned char *) 900 gettext("Replacement pattern contains &") : 901 (unsigned char *)gettext("Replacement pattern contains & - cannot use in re")); 902 if (c >= '1' && c <= '9') 903 cerror(value(vi_TERSE) ? (unsigned char *) 904 gettext("Replacement pattern contains \\d") : 905 (unsigned char *) 906 gettext("Replacement pattern contains \\d - cannot use in re")); 907 if ((len = mbtowc((wchar_t *)0, (char *)rhsp, MB_CUR_MAX)) <= 1) { 908 len = 1; 909 if(any(c, ".\\*[$")) 910 *gp++ = '\\'; 911 } 912 } 913 914 if ((gp + len) >= &genbuf[LBSIZE-2]) 915 goto complex; 916 if (len == 1) { 917 c = *rhsp++; 918 *gp++ = (value(vi_IGNORECASE) ? tolower(c) : c); 919 } else { 920 strncpy(gp, rhsp, len); 921 gp += len; rhsp += len; 922 } 923 } 924 continue; 925 926 case '*': 927 *gp++ = '*'; 928 continue; 929 930 case '[': 931 *gp++ = '['; 932 c = getchar(); 933 if (c == '^') { 934 *gp++ = '^'; 935 c = getchar(); 936 } 937 938 do { 939 if (!isascii(c) && c != EOF) { 940 ungetchar(c); 941 if ((len = _mbftowc(multi, &wc, getchar, &peekc)) >= 1) { 942 if ((gp + len)>= &genbuf[LBSIZE-4]) 943 goto complex; 944 strncpy(gp, multi, len); 945 gp += len; 946 c = getchar(); 947 continue; 948 } 949 (void) getchar(); 950 } 951 952 if (gp >= &genbuf[LBSIZE-4]) 953 goto complex; 954 if(c == '\\' && peekchar() == ']') { 955 (void)getchar(); 956 *gp++ = '\\'; 957 *gp++ = ']'; 958 } 959 else if (c == '\n' || c == EOF) 960 cerror((unsigned char *) 961 gettext("Missing ]")); 962 else 963 *gp++ = (value(vi_IGNORECASE) ? tolower(c) : c); 964 c = getchar(); 965 } while(c != ']'); 966 *gp++ = ']'; 967 continue; 968 } 969 if (c == EOF) { 970 ungetchar(EOF); 971 *gp++ = '\\'; 972 *gp++ = '\\'; 973 continue; 974 } 975 if (c == '\n') 976 cerror(value(vi_TERSE) ? (unsigned char *)gettext("No newlines in re's") : 977 (unsigned char *)gettext("Can't escape newlines into regular expressions")); 978 *gp++ = '\\'; 979 *gp++ = (value(vi_IGNORECASE) ? tolower(c) : c); 980 continue; 981 982 case '\n': 983 if (oknl) { 984 ungetchar(c); 985 goto out; 986 } 987 cerror(value(vi_TERSE) ? (unsigned char *)gettext("Badly formed re") : 988 (unsigned char *)gettext("Missing closing delimiter for regular expression")); 989 990 case '.': 991 case '~': 992 case '*': 993 case '[': 994 if (value(vi_MAGIC)) 995 goto magic; 996 if(c != '~') 997 *gp++ = '\\'; 998 defchar: 999 default: 1000 *gp++ = (value(vi_IGNORECASE) ? tolower(c) : c); 1001 continue; 1002 } 1003 } 1004 out: 1005 *gp++ = '\0'; 1006 1007 #ifdef XPG4 1008 /* see if our compiled RE's will fit in the re structure: */ 1009 if (regexc_size > EXPSIZ) { 1010 /* 1011 * this should never happen. but it's critical that we 1012 * check here, otherwise .bss would get overwritten. 1013 */ 1014 cerror(value(vi_TERSE) ? (unsigned char *) 1015 gettext("RE's can't fit") : 1016 (unsigned char *)gettext("Regular expressions can't fit")); 1017 return(eof); 1018 } 1019 1020 /* 1021 * We create re each time we need it. 1022 */ 1023 1024 if (re == NULL || re == scanre || re == subre) { 1025 if ((re = calloc(1, sizeof(struct regexp))) == NULL) { 1026 error(gettext("out of memory")); 1027 exit(errcnt); 1028 } 1029 } else { 1030 regex_comp_free(&re->Expbuf); 1031 memset(re, 0, sizeof(struct regexp)); 1032 } 1033 1034 compile((char *) genbuf, (char *) re->Expbuf, (char *) re->Expbuf 1035 + regexc_size); 1036 #else /* !XPG4 */ 1037 (void) _compile((const char *)genbuf, (char *)re->Expbuf, 1038 (char *)(re->Expbuf + sizeof (re->Expbuf)), 1); 1039 #endif /* XPG4 */ 1040 1041 if(regerrno) 1042 switch(regerrno) { 1043 1044 case 42: 1045 cerror((unsigned char *)gettext("\\( \\) Imbalance")); 1046 case 43: 1047 cerror(value(vi_TERSE) ? (unsigned char *)gettext("Awash in \\('s!") : 1048 (unsigned char *) 1049 gettext("Too many \\('d subexpressions in a regular expression")); 1050 case 50: 1051 goto complex; 1052 case 67: 1053 cerror(value(vi_TERSE) ? (unsigned char *)gettext("Illegal byte sequence") : 1054 (unsigned char *)gettext("Regular expression has illegal byte sequence")); 1055 } 1056 re->Nbra = nbra; 1057 return(eof); 1058 } 1059 1060 void 1061 cerror(unsigned char *s) 1062 { 1063 if (re) { 1064 re->Expbuf[0] = re->Expbuf[1] = 0; 1065 } 1066 error(s); 1067 } 1068 1069 int 1070 execute(int gf, line *addr) 1071 { 1072 unsigned char *p1, *p2; 1073 char *start; 1074 int c, i; 1075 int ret; 1076 int len; 1077 1078 if (gf) { 1079 if (re == NULL || re->Expbuf[0]) 1080 return (0); 1081 if(value(vi_IGNORECASE)) { 1082 p1 = genbuf; 1083 p2 = (unsigned char *)loc2; 1084 while(c = *p2) { 1085 if ((len = mblen((char *)p2, MB_CUR_MAX)) <= 0) 1086 len = 1; 1087 if (len == 1) { 1088 *p1++ = tolower(c); 1089 p2++; 1090 continue; 1091 } 1092 strncpy(p1, p2, len); 1093 p1 += len; p2 += len; 1094 } 1095 *p1 = '\0'; 1096 locs = (char *)genbuf; 1097 p1 = genbuf; 1098 start = loc2; 1099 } else { 1100 p1 = (unsigned char *)loc2; 1101 locs = loc2; 1102 } 1103 } else { 1104 if (addr == zero) 1105 return (0); 1106 p1 = linebuf; 1107 getaline(*addr); 1108 if(value(vi_IGNORECASE)) { 1109 p1 = genbuf; 1110 p2 = linebuf; 1111 while(c = *p2) { 1112 if ((len = mblen((char *)p2, MB_CUR_MAX)) <= 0) 1113 len = 1; 1114 if (len == 1) { 1115 *p1++ = tolower(c); 1116 p2++; 1117 continue; 1118 } 1119 strncpy(p1, p2, len); 1120 p1 += len; p2 += len; 1121 } 1122 *p1 = '\0'; 1123 p1 = genbuf; 1124 start = (char *)linebuf; 1125 } 1126 locs = (char *)0; 1127 } 1128 1129 ret = step((char *)p1, (char *)re->Expbuf); 1130 1131 if(value(vi_IGNORECASE) && ret) { 1132 loc1 = start + (loc1 - (char *)genbuf); 1133 loc2 = start + (loc2 - (char *)genbuf); 1134 for(i = 0; i < NBRA; i++) { 1135 braslist[i] = start + (braslist[i] - (char *)genbuf); 1136 braelist[i] = start + (braelist[i] - (char *)genbuf); 1137 } 1138 } 1139 return ret; 1140 } 1141 1142 /* 1143 * Initialize the compiled regular-expression storage areas (called from 1144 * main()). 1145 */ 1146 1147 void init_re (void) 1148 { 1149 #ifdef XPG4 1150 re = scanre = subre = NULL; 1151 #else /* !XPG4 */ 1152 if ((re = calloc(1, sizeof(struct regexp))) == NULL) { 1153 error(gettext("out of memory")); 1154 exit(errcnt); 1155 } 1156 1157 if ((scanre = calloc(1, sizeof(struct regexp))) == NULL) { 1158 error(gettext("out of memory")); 1159 exit(errcnt); 1160 } 1161 1162 if ((subre = calloc(1, sizeof(struct regexp))) == NULL) { 1163 error(gettext("out of memory")); 1164 exit(errcnt); 1165 } 1166 #endif /* XPG4 */ 1167 } 1168 1169 /* 1170 * Save what is in the special place re to the named alternate 1171 * location. This means freeing up what's currently in this target 1172 * location, if necessary. 1173 */ 1174 1175 void savere(struct regexp ** a) 1176 { 1177 #ifdef XPG4 1178 if (a == NULL || re == NULL) { 1179 return; 1180 } 1181 1182 if (*a == NULL) { 1183 *a = re; 1184 return; 1185 } 1186 1187 if (*a != re) { 1188 if (scanre != subre) { 1189 regex_comp_free(&((*a)->Expbuf)); 1190 free(*a); 1191 } 1192 *a = re; 1193 } 1194 #else /* !XPG4 */ 1195 memcpy(*a, re, sizeof(struct regexp)); 1196 #endif /* XPG4 */ 1197 } 1198 1199 1200 /* 1201 * Restore what is in the named alternate location to the special place 1202 * re. This means first freeing up what's currently in re, if necessary. 1203 */ 1204 1205 void resre(struct regexp * a) 1206 { 1207 #ifdef XPG4 1208 if (a == NULL) { 1209 return; 1210 } 1211 1212 if (re == NULL) { 1213 re = a; 1214 return; 1215 } 1216 1217 if (a != re) { 1218 if ((re != scanre) && (re != subre)) { 1219 regex_comp_free(&re->Expbuf); 1220 free(re); 1221 } 1222 1223 re = a; 1224 } 1225 #else /* !XPG4 */ 1226 memcpy(re, a, sizeof(struct regexp)); 1227 #endif /* XPG4 */ 1228 } 1229