1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 30 /* Copyright (c) 1981 Regents of the University of California */ 31 32 #include "ex.h" 33 #include "ex_re.h" 34 35 /* from libgen */ 36 char *_compile(const char *, char *, char *, int); 37 38 /* 39 * The compiled-regular-expression storage areas (re, scanre, and subre) 40 * have been changed into dynamically allocated memory areas, in both the 41 * Solaris and XPG4 versions. 42 * 43 * In the Solaris version, which uses the original libgen(3g) compile() 44 * and step() calls, these areas are allocated once, and then data are 45 * copied between them subsequently, as they were in the original 46 * implementation. This is possible because the compiled information is 47 * a self-contained block of bits. 48 * 49 * In the XPG4 version, the expr:compile.o object is linked in as a 50 * simulation of these functions using the new regcomp() and regexec() 51 * functions. The problem here is that the resulting 52 * compiled-regular-expression data contain pointers to other data, which 53 * need to be freed, but only when we are quite sure that we are done 54 * with them - and certainly not before. There was an earlier attempt to 55 * handle these differences, but that effort was flawed. 56 */ 57 58 extern int getchar(); 59 #ifdef XPG4 60 void regex_comp_free(void *); 61 extern size_t regexc_size; /* compile.c: size of regex_comp structure */ 62 #endif /* XPG4 */ 63 64 /* 65 * Global, substitute and regular expressions. 66 * Very similar to ed, with some re extensions and 67 * confirmed substitute. 68 */ 69 void 70 global(k) 71 bool k; 72 { 73 unsigned char *gp; 74 int c; 75 line *a1; 76 unsigned char globuf[GBSIZE], *Cwas; 77 int nlines = lineDOL(); 78 int oinglobal = inglobal; 79 unsigned char *oglobp = globp; 80 char multi[MB_LEN_MAX + 1]; 81 wchar_t wc; 82 int len; 83 84 85 Cwas = Command; 86 /* 87 * States of inglobal: 88 * 0: ordinary - not in a global command. 89 * 1: text coming from some buffer, not tty. 90 * 2: like 1, but the source of the buffer is a global command. 91 * Hence you're only in a global command if inglobal==2. This 92 * strange sounding convention is historically derived from 93 * everybody simulating a global command. 94 */ 95 if (inglobal==2) 96 error(value(vi_TERSE) ? gettext("Global within global") : 97 gettext("Global within global not allowed")); 98 markDOT(); 99 setall(); 100 nonzero(); 101 if (skipend()) 102 error(value(vi_TERSE) ? gettext("Global needs re") : 103 gettext("Missing regular expression for global")); 104 c = getchar(); 105 (void)vi_compile(c, 1); 106 savere(&scanre); 107 gp = globuf; 108 while ((c = peekchar()) != '\n') { 109 if (!isascii(c)) { 110 if (c == EOF) { 111 c = '\n'; 112 ungetchar(c); 113 goto out; 114 } 115 116 mb_copy: 117 if ((len = _mbftowc(multi, &wc, getchar, &peekc)) > 0) { 118 if ((gp + len) >= &globuf[GBSIZE - 2]) 119 error(gettext("Global command too long")); 120 strncpy(gp, multi, len); 121 gp += len; 122 continue; 123 } 124 } 125 126 (void) getchar(); 127 switch (c) { 128 129 case EOF: 130 c = '\n'; 131 ungetchar(c); 132 goto out; 133 134 case '\\': 135 c = peekchar(); 136 if (!isascii(c)) { 137 *gp++ = '\\'; 138 goto mb_copy; 139 } 140 141 (void) getchar(); 142 switch (c) { 143 144 case '\\': 145 ungetchar(c); 146 break; 147 148 case '\n': 149 break; 150 151 default: 152 *gp++ = '\\'; 153 break; 154 } 155 break; 156 } 157 *gp++ = c; 158 if (gp >= &globuf[GBSIZE - 2]) 159 error(gettext("Global command too long")); 160 } 161 162 out: 163 donewline(); 164 *gp++ = c; 165 *gp++ = 0; 166 saveall(); 167 inglobal = 2; 168 for (a1 = one; a1 <= dol; a1++) { 169 *a1 &= ~01; 170 if (a1 >= addr1 && a1 <= addr2 && execute(0, a1) == k) 171 *a1 |= 01; 172 } 173 #ifdef notdef 174 /* 175 * This code is commented out for now. The problem is that we don't 176 * fix up the undo area the way we should. Basically, I think what has 177 * to be done is to copy the undo area down (since we shrunk everything) 178 * and move the various pointers into it down too. I will do this later 179 * when I have time. (Mark, 10-20-80) 180 */ 181 /* 182 * Special case: g/.../d (avoid n^2 algorithm) 183 */ 184 if (globuf[0]=='d' && globuf[1]=='\n' && globuf[2]=='\0') { 185 gdelete(); 186 return; 187 } 188 #endif 189 if (inopen) 190 inopen = -1; 191 /* 192 * Now for each marked line, set dot there and do the commands. 193 * Note the n^2 behavior here for lots of lines matching. 194 * This is really needed: in some cases you could delete lines, 195 * causing a marked line to be moved before a1 and missed if 196 * we didn't restart at zero each time. 197 */ 198 for (a1 = one; a1 <= dol; a1++) { 199 if (*a1 & 01) { 200 *a1 &= ~01; 201 dot = a1; 202 globp = globuf; 203 commands(1, 1); 204 a1 = zero; 205 } 206 } 207 globp = oglobp; 208 inglobal = oinglobal; 209 endline = 1; 210 Command = Cwas; 211 netchHAD(nlines); 212 setlastchar(EOF); 213 if (inopen) { 214 ungetchar(EOF); 215 inopen = 1; 216 } 217 } 218 219 /* 220 * gdelete: delete inside a global command. Handles the 221 * special case g/r.e./d. All lines to be deleted have 222 * already been marked. Squeeze the remaining lines together. 223 * Note that other cases such as g/r.e./p, g/r.e./s/r.e.2/rhs/, 224 * and g/r.e./.,/r.e.2/d are not treated specially. There is no 225 * good reason for this except the question: where to you draw the line? 226 */ 227 void 228 gdelete(void) 229 { 230 line *a1, *a2, *a3; 231 232 a3 = dol; 233 /* find first marked line. can skip all before it */ 234 for (a1=zero; (*a1&01)==0; a1++) 235 if (a1>=a3) 236 return; 237 /* copy down unmarked lines, compacting as we go. */ 238 for (a2=a1+1; a2<=a3;) { 239 if (*a2&01) { 240 a2++; /* line is marked, skip it */ 241 dot = a1; /* dot left after line deletion */ 242 } else 243 *a1++ = *a2++; /* unmarked, copy it */ 244 } 245 dol = a1-1; 246 if (dot>dol) 247 dot = dol; 248 change(); 249 } 250 251 bool cflag; 252 int scount, slines, stotal; 253 254 int 255 substitute(int c) 256 { 257 line *addr; 258 int n; 259 int gsubf, hopcount; 260 261 gsubf = compsub(c); 262 if(FIXUNDO) 263 save12(), undkind = UNDCHANGE; 264 stotal = 0; 265 slines = 0; 266 for (addr = addr1; addr <= addr2; addr++) { 267 scount = hopcount = 0; 268 if (dosubcon(0, addr) == 0) 269 continue; 270 if (gsubf) { 271 /* 272 * The loop can happen from s/\</&/g 273 * but we don't want to break other, reasonable cases. 274 */ 275 hopcount = 0; 276 while (*loc2) { 277 if (++hopcount > sizeof linebuf) 278 error(gettext("substitution loop")); 279 if (dosubcon(1, addr) == 0) 280 break; 281 } 282 } 283 if (scount) { 284 stotal += scount; 285 slines++; 286 putmark(addr); 287 n = append(getsub, addr); 288 addr += n; 289 addr2 += n; 290 } 291 } 292 if (stotal == 0 && !inglobal && !cflag) 293 error(value(vi_TERSE) ? gettext("Fail") : 294 gettext("Substitute pattern match failed")); 295 snote(stotal, slines); 296 return (stotal); 297 } 298 299 int 300 compsub(int ch) 301 { 302 int seof, c, uselastre; 303 static int gsubf; 304 static unsigned char remem[RHSSIZE]; 305 static int remflg = -1; 306 307 if (!value(vi_EDCOMPATIBLE)) 308 gsubf = cflag = 0; 309 uselastre = 0; 310 switch (ch) { 311 312 case 's': 313 (void)skipwh(); 314 seof = getchar(); 315 if (endcmd(seof) || any(seof, "gcr")) { 316 ungetchar(seof); 317 goto redo; 318 } 319 if (isalpha(seof) || isdigit(seof)) 320 error(value(vi_TERSE) ? gettext("Substitute needs re") : 321 gettext("Missing regular expression for substitute")); 322 seof = vi_compile(seof, 1); 323 uselastre = 1; 324 comprhs(seof); 325 gsubf = cflag = 0; 326 break; 327 328 case '~': 329 uselastre = 1; 330 /* fall into ... */ 331 case '&': 332 redo: 333 if (re == NULL || re->Expbuf[1] == 0) 334 error(value(vi_TERSE) ? gettext("No previous re") : 335 gettext("No previous regular expression")); 336 if (subre == NULL || subre->Expbuf[1] == 0) 337 error(value(vi_TERSE) ? gettext("No previous substitute re") : 338 gettext("No previous substitute to repeat")); 339 break; 340 } 341 for (;;) { 342 c = getchar(); 343 switch (c) { 344 345 case 'g': 346 gsubf = !gsubf; 347 continue; 348 349 case 'c': 350 cflag = !cflag; 351 continue; 352 353 case 'r': 354 uselastre = 1; 355 continue; 356 357 default: 358 ungetchar(c); 359 setcount(); 360 donewline(); 361 if (uselastre) 362 savere(&subre); 363 else 364 resre(subre); 365 366 /* 367 * The % by itself on the right hand side means 368 * that the previous value of the right hand side 369 * should be used. A -1 is used to indicate no 370 * previously remembered search string. 371 */ 372 373 if (rhsbuf[0] == '%' && rhsbuf[1] == 0) 374 if (remflg == -1) 375 error(gettext("No previously remembered string")); 376 else 377 strcpy(rhsbuf, remem); 378 else { 379 strcpy(remem, rhsbuf); 380 remflg = 1; 381 } 382 return (gsubf); 383 } 384 } 385 } 386 387 void 388 comprhs(int seof) 389 { 390 unsigned char *rp, *orp; 391 int c; 392 unsigned char orhsbuf[RHSSIZE]; 393 char multi[MB_LEN_MAX + 1]; 394 int len; 395 wchar_t wc; 396 397 rp = rhsbuf; 398 CP(orhsbuf, rp); 399 for (;;) { 400 c = peekchar(); 401 if (c == seof) { 402 (void) getchar(); 403 break; 404 } 405 406 if (!isascii(c) && c != EOF) { 407 if ((len = _mbftowc(multi, &wc, getchar, &peekc)) > 0) { 408 if ((rp + len) >= &rhsbuf[RHSSIZE - 1]) 409 goto toobig; 410 strncpy(rp, multi, len); 411 rp += len; 412 continue; 413 } 414 } 415 416 (void) getchar(); 417 switch (c) { 418 419 case '\\': 420 c = peekchar(); 421 if (c == EOF) { 422 (void) getchar(); 423 error(gettext("Replacement string ends with \\")); 424 } 425 426 if (!isascii(c)) { 427 *rp++ = '\\'; 428 if ((len = _mbftowc(multi, &wc, getchar, &peekc)) > 0) { 429 if ((rp + len) >= &rhsbuf[RHSSIZE - 1]) 430 goto over_flow; 431 strncpy(rp, multi, len); 432 rp += len; 433 continue; 434 } 435 } 436 437 (void) getchar(); 438 if (value(vi_MAGIC)) { 439 /* 440 * When "magic", \& turns into a plain &, 441 * and all other chars work fine quoted. 442 */ 443 if (c != '&') { 444 if(rp >= &rhsbuf[RHSSIZE - 1]) { 445 *rp=0; 446 error(value(vi_TERSE) ? 447 gettext("Replacement pattern too long") : 448 gettext("Replacement pattern too long - limit 256 characters")); 449 } 450 *rp++ = '\\'; 451 } 452 break; 453 } 454 magic: 455 if (c == '~') { 456 for (orp = orhsbuf; *orp; *rp++ = *orp++) 457 if (rp >= &rhsbuf[RHSSIZE - 1]) 458 goto toobig; 459 continue; 460 } 461 if(rp >= &rhsbuf[RHSSIZE - 1]) { 462 over_flow: 463 *rp=0; 464 error(value(vi_TERSE) ? 465 gettext("Replacement pattern too long") : 466 gettext("Replacement pattern too long - limit 256 characters")); 467 } 468 *rp++ = '\\'; 469 break; 470 471 case '\n': 472 case EOF: 473 if (!(globp && globp[0])) { 474 ungetchar(c); 475 goto endrhs; 476 } 477 478 case '~': 479 case '&': 480 if (value(vi_MAGIC)) 481 goto magic; 482 break; 483 } 484 if (rp >= &rhsbuf[RHSSIZE - 1]) { 485 toobig: 486 *rp = 0; 487 error(value(vi_TERSE) ? 488 gettext("Replacement pattern too long") : 489 gettext("Replacement pattern too long - limit 256 characters")); 490 } 491 *rp++ = c; 492 } 493 endrhs: 494 *rp++ = 0; 495 } 496 497 int 498 getsub(void) 499 { 500 unsigned char *p; 501 502 if ((p = linebp) == 0) 503 return (EOF); 504 strcLIN(p); 505 linebp = 0; 506 return (0); 507 } 508 509 int 510 dosubcon(bool f, line *a) 511 { 512 513 if (execute(f, a) == 0) 514 return (0); 515 if (confirmed(a)) { 516 dosub(); 517 scount++; 518 } 519 return (1); 520 } 521 522 int 523 confirmed(line *a) 524 { 525 int c, cnt, ch; 526 527 if (cflag == 0) 528 return (1); 529 pofix(); 530 pline(lineno(a)); 531 if (inopen) 532 putchar('\n' | QUOTE); 533 c = lcolumn(loc1); 534 ugo(c, ' '); 535 ugo(lcolumn(loc2) - c, '^'); 536 flush(); 537 cnt = 0; 538 bkup: 539 ch = c = getkey(); 540 again: 541 if (c == '\b') { 542 if ((inopen) 543 && (cnt > 0)) { 544 putchar('\b' | QUOTE); 545 putchar(' '); 546 putchar('\b' | QUOTE), flush(); 547 cnt --; 548 } 549 goto bkup; 550 } 551 if (c == '\r') 552 c = '\n'; 553 if (inopen && MB_CUR_MAX == 1 || c < 0200) { 554 putchar(c); 555 flush(); 556 cnt++; 557 } 558 if (c != '\n' && c != EOF) { 559 c = getkey(); 560 goto again; 561 } 562 noteinp(); 563 return (ch == 'y'); 564 } 565 566 void 567 ugo(int cnt, int with) 568 { 569 570 if (cnt > 0) 571 do 572 putchar(with); 573 while (--cnt > 0); 574 } 575 576 int casecnt; 577 bool destuc; 578 579 void 580 dosub(void) 581 { 582 unsigned char *lp, *sp, *rp; 583 int c; 584 int len; 585 586 lp = linebuf; 587 sp = genbuf; 588 rp = rhsbuf; 589 while (lp < (unsigned char *)loc1) 590 *sp++ = *lp++; 591 casecnt = 0; 592 /* 593 * Caution: depending on the hardware, c will be either sign 594 * extended or not if C"E is set. Thus, on a VAX, c will 595 * be < 0, but on a 3B, c will be >= 128. 596 */ 597 while (c = *rp) { 598 if ((len = mblen((char *)rp, MB_CUR_MAX)) <= 0) 599 len = 1; 600 /* ^V <return> from vi to split lines */ 601 if (c == '\r') 602 c = '\n'; 603 604 if (c == '\\') { 605 rp++; 606 if ((len = mblen((char *)rp, MB_CUR_MAX)) <= 0) 607 len = 1; 608 switch (c = *rp++) { 609 610 case '&': 611 sp = place(sp, loc1, loc2); 612 if (sp == 0) 613 goto ovflo; 614 continue; 615 616 case 'l': 617 casecnt = 1; 618 destuc = 0; 619 continue; 620 621 case 'L': 622 casecnt = LBSIZE; 623 destuc = 0; 624 continue; 625 626 case 'u': 627 casecnt = 1; 628 destuc = 1; 629 continue; 630 631 case 'U': 632 casecnt = LBSIZE; 633 destuc = 1; 634 continue; 635 636 case 'E': 637 case 'e': 638 casecnt = 0; 639 continue; 640 } 641 if(re != NULL && c >= '1' && c < re->Nbra + '1') { 642 sp = place(sp, braslist[c - '1'] , braelist[c - '1']); 643 if (sp == 0) 644 goto ovflo; 645 continue; 646 } 647 rp--; 648 } 649 if (len > 1) { 650 if ((sp + len) >= &genbuf[LBSIZE]) 651 goto ovflo; 652 strncpy(sp, rp, len); 653 } else { 654 if (casecnt) 655 *sp = fixcase(c); 656 else 657 *sp = c; 658 } 659 sp += len; rp += len; 660 if (sp >= &genbuf[LBSIZE]) 661 ovflo: 662 error(value(vi_TERSE) ? gettext("Line overflow") : 663 gettext("Line overflow in substitute")); 664 } 665 lp = (unsigned char *)loc2; 666 loc2 = (char *)(linebuf + (sp - genbuf)); 667 while (*sp++ = *lp++) 668 if (sp >= &genbuf[LBSIZE]) 669 goto ovflo; 670 strcLIN(genbuf); 671 } 672 673 int 674 fixcase(int c) 675 { 676 677 if (casecnt == 0) 678 return (c); 679 casecnt--; 680 if (destuc) { 681 if (islower(c)) 682 c = toupper(c); 683 } else 684 if (isupper(c)) 685 c = tolower(c); 686 return (c); 687 } 688 689 unsigned char * 690 place(sp, l1, l2) 691 unsigned char *sp, *l1, *l2; 692 { 693 694 while (l1 < l2) { 695 *sp++ = fixcase(*l1++); 696 if (sp >= &genbuf[LBSIZE]) 697 return (0); 698 } 699 return (sp); 700 } 701 702 void 703 snote(int total, int nlines) 704 { 705 706 if (!notable(total)) 707 return; 708 if (nlines != 1 && nlines != total) 709 viprintf(mesg(value(vi_TERSE) ? 710 /* 711 * TRANSLATION_NOTE 712 * Reference order of arguments must not 713 * be changed using '%digit$', since vi's 714 * viprintf() does not support it. 715 */ 716 gettext("%d subs on %d lines") : 717 /* 718 * TRANSLATION_NOTE 719 * Reference order of arguments must not 720 * be changed using '%digit$', since vi's 721 * viprintf() does not support it. 722 */ 723 gettext("%d substitutions on %d lines")), 724 total, nlines); 725 else 726 viprintf(mesg(value(vi_TERSE) ? 727 gettext("%d subs") : 728 gettext("%d substitutions")), 729 total); 730 noonl(); 731 flush(); 732 } 733 734 #ifdef XPG4 735 #include <regex.h> 736 737 extern int regcomp_flags; /* use to specify cflags for regcomp() */ 738 #endif /* XPG4 */ 739 740 int 741 vi_compile(int eof, int oknl) 742 { 743 int c; 744 unsigned char *gp, *p1; 745 unsigned char *rhsp; 746 unsigned char rebuf[LBSIZE]; 747 char multi[MB_LEN_MAX + 1]; 748 int len; 749 wchar_t wc; 750 751 #ifdef XPG4 752 /* 753 * reset cflags to plain BRE 754 */ 755 regcomp_flags = 0; 756 #endif /* XPG4 */ 757 758 gp = genbuf; 759 if (isalpha(eof) || isdigit(eof)) 760 error(gettext("Regular expressions cannot be delimited by letters or digits")); 761 if(eof >= 0200 && MB_CUR_MAX > 1) 762 error(gettext("Regular expressions cannot be delimited by multibyte characters")); 763 c = getchar(); 764 if (eof == '\\') 765 switch (c) { 766 767 case '/': 768 case '?': 769 if (scanre == NULL || scanre->Expbuf[1] == 0) 770 error(value(vi_TERSE) ? gettext("No previous scan re") : 771 gettext("No previous scanning regular expression")); 772 resre(scanre); 773 return (c); 774 775 case '&': 776 if (subre == NULL || subre->Expbuf[1] == 0) 777 error(value(vi_TERSE) ? gettext("No previous substitute re") : 778 gettext("No previous substitute regular expression")); 779 resre(subre); 780 return (c); 781 782 default: 783 error(value(vi_TERSE) ? gettext("Badly formed re") : 784 gettext("Regular expression \\ must be followed by / or ?")); 785 } 786 if (c == eof || c == '\n' || c == EOF) { 787 if (re == NULL || re->Expbuf[1] == 0) 788 error(value(vi_TERSE) ? gettext("No previous re") : 789 gettext("No previous regular expression")); 790 if (c == '\n' && oknl == 0) 791 error(value(vi_TERSE) ? gettext("Missing closing delimiter") : 792 gettext("Missing closing delimiter for regular expression")); 793 if (c != eof) 794 ungetchar(c); 795 return (eof); 796 } 797 gp = genbuf; 798 if (c == '^') { 799 *gp++ = c; 800 c = getchar(); 801 } 802 ungetchar(c); 803 for (;;) { 804 c = getchar(); 805 if (c == eof || c == EOF) { 806 if (c == EOF) 807 ungetchar(c); 808 goto out; 809 } 810 if (gp >= &genbuf[LBSIZE - 3]) 811 complex: 812 cerror(value(vi_TERSE) ? 813 (unsigned char *)gettext("Re too complex") : 814 (unsigned char *) 815 gettext("Regular expression too complicated")); 816 817 if (!(isascii(c) || MB_CUR_MAX == 1)) { 818 ungetchar(c); 819 if ((len = _mbftowc(multi, &wc, getchar, &peekc)) >= 1) { 820 if ((gp + len) >= &genbuf[LBSIZE - 3]) 821 goto complex; 822 strncpy(gp, multi, len); 823 gp += len; 824 continue; 825 } 826 (void) getchar(); 827 } 828 829 switch (c) { 830 831 case '\\': 832 c = getchar(); 833 if (!isascii(c)) { 834 ungetchar(c); 835 if ((len = _mbftowc(multi, &wc, getchar, &peekc)) >= 1) { 836 if ((gp + len) >= &genbuf[LBSIZE - 3]) 837 goto complex; 838 *gp++ = '\\'; 839 strncpy(gp, multi, len); 840 gp += len; 841 continue; 842 } 843 (void) getchar(); 844 } 845 846 switch (c) { 847 848 case '<': 849 case '>': 850 case '(': 851 case ')': 852 case '{': 853 case '}': 854 case '$': 855 case '^': 856 case '\\': 857 *gp++ = '\\'; 858 *gp++ = c; 859 continue; 860 861 case 'n': 862 *gp++ = c; 863 continue; 864 } 865 if(c >= '0' && c <= '9') { 866 *gp++ = '\\'; 867 *gp++ = c; 868 continue; 869 } 870 if (value(vi_MAGIC) == 0) 871 magic: 872 switch (c) { 873 874 case '.': 875 *gp++ = '.'; 876 continue; 877 878 case '~': 879 rhsp = rhsbuf; 880 while (*rhsp) { 881 if (!isascii(*rhsp)) { 882 if ((len = mbtowc((wchar_t *)0, (char *)rhsp, MB_CUR_MAX)) > 1) { 883 if ((gp + len) >= &genbuf[LBSIZE-2]) 884 goto complex; 885 strncpy(gp, rhsp, len); 886 rhsp += len; gp += len; 887 continue; 888 } 889 } 890 len = 1; 891 if (*rhsp == '\\') { 892 c = *++rhsp; 893 if (c == '&') 894 cerror(value(vi_TERSE) ? (unsigned char *) 895 gettext("Replacement pattern contains &") : 896 (unsigned char *)gettext("Replacement pattern contains & - cannot use in re")); 897 if (c >= '1' && c <= '9') 898 cerror(value(vi_TERSE) ? (unsigned char *) 899 gettext("Replacement pattern contains \\d") : 900 (unsigned char *) 901 gettext("Replacement pattern contains \\d - cannot use in re")); 902 if ((len = mbtowc((wchar_t *)0, (char *)rhsp, MB_CUR_MAX)) <= 1) { 903 len = 1; 904 if(any(c, ".\\*[$")) 905 *gp++ = '\\'; 906 } 907 } 908 909 if ((gp + len) >= &genbuf[LBSIZE-2]) 910 goto complex; 911 if (len == 1) { 912 c = *rhsp++; 913 *gp++ = (value(vi_IGNORECASE) ? tolower(c) : c); 914 } else { 915 strncpy(gp, rhsp, len); 916 gp += len; rhsp += len; 917 } 918 } 919 continue; 920 921 case '*': 922 *gp++ = '*'; 923 continue; 924 925 case '[': 926 *gp++ = '['; 927 c = getchar(); 928 if (c == '^') { 929 *gp++ = '^'; 930 c = getchar(); 931 } 932 933 do { 934 if (!isascii(c) && c != EOF) { 935 ungetchar(c); 936 if ((len = _mbftowc(multi, &wc, getchar, &peekc)) >= 1) { 937 if ((gp + len)>= &genbuf[LBSIZE-4]) 938 goto complex; 939 strncpy(gp, multi, len); 940 gp += len; 941 c = getchar(); 942 continue; 943 } 944 (void) getchar(); 945 } 946 947 if (gp >= &genbuf[LBSIZE-4]) 948 goto complex; 949 if(c == '\\' && peekchar() == ']') { 950 (void)getchar(); 951 *gp++ = '\\'; 952 *gp++ = ']'; 953 } 954 else if (c == '\n' || c == EOF) 955 cerror((unsigned char *) 956 gettext("Missing ]")); 957 else 958 *gp++ = (value(vi_IGNORECASE) ? tolower(c) : c); 959 c = getchar(); 960 } while(c != ']'); 961 *gp++ = ']'; 962 continue; 963 } 964 if (c == EOF) { 965 ungetchar(EOF); 966 *gp++ = '\\'; 967 *gp++ = '\\'; 968 continue; 969 } 970 if (c == '\n') 971 cerror(value(vi_TERSE) ? (unsigned char *)gettext("No newlines in re's") : 972 (unsigned char *)gettext("Can't escape newlines into regular expressions")); 973 *gp++ = '\\'; 974 *gp++ = (value(vi_IGNORECASE) ? tolower(c) : c); 975 continue; 976 977 case '\n': 978 if (oknl) { 979 ungetchar(c); 980 goto out; 981 } 982 cerror(value(vi_TERSE) ? (unsigned char *)gettext("Badly formed re") : 983 (unsigned char *)gettext("Missing closing delimiter for regular expression")); 984 985 case '.': 986 case '~': 987 case '*': 988 case '[': 989 if (value(vi_MAGIC)) 990 goto magic; 991 if(c != '~') 992 *gp++ = '\\'; 993 defchar: 994 default: 995 *gp++ = (value(vi_IGNORECASE) ? tolower(c) : c); 996 continue; 997 } 998 } 999 out: 1000 *gp++ = '\0'; 1001 1002 #ifdef XPG4 1003 /* see if our compiled RE's will fit in the re structure: */ 1004 if (regexc_size > EXPSIZ) { 1005 /* 1006 * this should never happen. but it's critical that we 1007 * check here, otherwise .bss would get overwritten. 1008 */ 1009 cerror(value(vi_TERSE) ? (unsigned char *) 1010 gettext("RE's can't fit") : 1011 (unsigned char *)gettext("Regular expressions can't fit")); 1012 return(eof); 1013 } 1014 1015 /* 1016 * We create re each time we need it. 1017 */ 1018 1019 if (re == NULL || re == scanre || re == subre) { 1020 if ((re = calloc(1, sizeof(struct regexp))) == NULL) { 1021 error(gettext("out of memory")); 1022 exit(errcnt); 1023 } 1024 } else { 1025 regex_comp_free(&re->Expbuf); 1026 memset(re, 0, sizeof(struct regexp)); 1027 } 1028 1029 compile((char *) genbuf, (char *) re->Expbuf, (char *) re->Expbuf 1030 + regexc_size); 1031 #else /* !XPG4 */ 1032 (void) _compile((const char *)genbuf, (char *)re->Expbuf, 1033 (char *)(re->Expbuf + sizeof (re->Expbuf)), 1); 1034 #endif /* XPG4 */ 1035 1036 if(regerrno) 1037 switch(regerrno) { 1038 1039 case 42: 1040 cerror((unsigned char *)gettext("\\( \\) Imbalance")); 1041 case 43: 1042 cerror(value(vi_TERSE) ? (unsigned char *)gettext("Awash in \\('s!") : 1043 (unsigned char *) 1044 gettext("Too many \\('d subexpressions in a regular expression")); 1045 case 50: 1046 goto complex; 1047 case 67: 1048 cerror(value(vi_TERSE) ? (unsigned char *)gettext("Illegal byte sequence") : 1049 (unsigned char *)gettext("Regular expression has illegal byte sequence")); 1050 } 1051 re->Nbra = nbra; 1052 return(eof); 1053 } 1054 1055 void 1056 cerror(unsigned char *s) 1057 { 1058 if (re) { 1059 re->Expbuf[0] = re->Expbuf[1] = 0; 1060 } 1061 error(s); 1062 } 1063 1064 int 1065 execute(int gf, line *addr) 1066 { 1067 unsigned char *p1, *p2; 1068 char *start; 1069 int c, i; 1070 int ret; 1071 int len; 1072 1073 if (gf) { 1074 if (re == NULL || re->Expbuf[0]) 1075 return (0); 1076 if(value(vi_IGNORECASE)) { 1077 p1 = genbuf; 1078 p2 = (unsigned char *)loc2; 1079 while(c = *p2) { 1080 if ((len = mblen((char *)p2, MB_CUR_MAX)) <= 0) 1081 len = 1; 1082 if (len == 1) { 1083 *p1++ = tolower(c); 1084 p2++; 1085 continue; 1086 } 1087 strncpy(p1, p2, len); 1088 p1 += len; p2 += len; 1089 } 1090 *p1 = '\0'; 1091 locs = (char *)genbuf; 1092 p1 = genbuf; 1093 start = loc2; 1094 } else { 1095 p1 = (unsigned char *)loc2; 1096 locs = loc2; 1097 } 1098 } else { 1099 if (addr == zero) 1100 return (0); 1101 p1 = linebuf; 1102 getaline(*addr); 1103 if(value(vi_IGNORECASE)) { 1104 p1 = genbuf; 1105 p2 = linebuf; 1106 while(c = *p2) { 1107 if ((len = mblen((char *)p2, MB_CUR_MAX)) <= 0) 1108 len = 1; 1109 if (len == 1) { 1110 *p1++ = tolower(c); 1111 p2++; 1112 continue; 1113 } 1114 strncpy(p1, p2, len); 1115 p1 += len; p2 += len; 1116 } 1117 *p1 = '\0'; 1118 p1 = genbuf; 1119 start = (char *)linebuf; 1120 } 1121 locs = (char *)0; 1122 } 1123 1124 ret = step((char *)p1, (char *)re->Expbuf); 1125 1126 if(value(vi_IGNORECASE) && ret) { 1127 loc1 = start + (loc1 - (char *)genbuf); 1128 loc2 = start + (loc2 - (char *)genbuf); 1129 for(i = 0; i < NBRA; i++) { 1130 braslist[i] = start + (braslist[i] - (char *)genbuf); 1131 braelist[i] = start + (braelist[i] - (char *)genbuf); 1132 } 1133 } 1134 return ret; 1135 } 1136 1137 /* 1138 * Initialize the compiled regular-expression storage areas (called from 1139 * main()). 1140 */ 1141 1142 void init_re (void) 1143 { 1144 #ifdef XPG4 1145 re = scanre = subre = NULL; 1146 #else /* !XPG4 */ 1147 if ((re = calloc(1, sizeof(struct regexp))) == NULL) { 1148 error(gettext("out of memory")); 1149 exit(errcnt); 1150 } 1151 1152 if ((scanre = calloc(1, sizeof(struct regexp))) == NULL) { 1153 error(gettext("out of memory")); 1154 exit(errcnt); 1155 } 1156 1157 if ((subre = calloc(1, sizeof(struct regexp))) == NULL) { 1158 error(gettext("out of memory")); 1159 exit(errcnt); 1160 } 1161 #endif /* XPG4 */ 1162 } 1163 1164 /* 1165 * Save what is in the special place re to the named alternate 1166 * location. This means freeing up what's currently in this target 1167 * location, if necessary. 1168 */ 1169 1170 void savere(struct regexp ** a) 1171 { 1172 #ifdef XPG4 1173 if (a == NULL || re == NULL) { 1174 return; 1175 } 1176 1177 if (*a == NULL) { 1178 *a = re; 1179 return; 1180 } 1181 1182 if (*a != re) { 1183 if (scanre != subre) { 1184 regex_comp_free(&((*a)->Expbuf)); 1185 free(*a); 1186 } 1187 *a = re; 1188 } 1189 #else /* !XPG4 */ 1190 memcpy(*a, re, sizeof(struct regexp)); 1191 #endif /* XPG4 */ 1192 } 1193 1194 1195 /* 1196 * Restore what is in the named alternate location to the special place 1197 * re. This means first freeing up what's currently in re, if necessary. 1198 */ 1199 1200 void resre(struct regexp * a) 1201 { 1202 #ifdef XPG4 1203 if (a == NULL) { 1204 return; 1205 } 1206 1207 if (re == NULL) { 1208 re = a; 1209 return; 1210 } 1211 1212 if (a != re) { 1213 if ((re != scanre) && (re != subre)) { 1214 regex_comp_free(&re->Expbuf); 1215 free(re); 1216 } 1217 1218 re = a; 1219 } 1220 #else /* !XPG4 */ 1221 memcpy(re, a, sizeof(struct regexp)); 1222 #endif /* XPG4 */ 1223 } 1224