1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 23 /* All Rights Reserved */ 24 25 26 /* Copyright (c) 1981 Regents of the University of California */ 27 28 /* 29 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 30 * Use is subject to license terms. 31 */ 32 33 #pragma ident "%Z%%M% %I% %E% SMI" /* SVr4.0 1.21 */ 34 35 #include "ex.h" 36 #include "ex_re.h" 37 38 /* from libgen */ 39 char *_compile(const char *, char *, char *, int); 40 41 /* 42 * The compiled-regular-expression storage areas (re, scanre, and subre) 43 * have been changed into dynamically allocated memory areas, in both the 44 * Solaris and XPG4 versions. 45 * 46 * In the Solaris version, which uses the original libgen(3g) compile() 47 * and step() calls, these areas are allocated once, and then data are 48 * copied between them subsequently, as they were in the original 49 * implementation. This is possible because the compiled information is 50 * a self-contained block of bits. 51 * 52 * In the XPG4 version, the expr:compile.o object is linked in as a 53 * simulation of these functions using the new regcomp() and regexec() 54 * functions. The problem here is that the resulting 55 * compiled-regular-expression data contain pointers to other data, which 56 * need to be freed, but only when we are quite sure that we are done 57 * with them - and certainly not before. There was an earlier attempt to 58 * handle these differences, but that effort was flawed. 59 */ 60 61 extern char getchar(); 62 #ifdef XPG4 63 void regex_comp_free(void *); 64 extern size_t regexc_size; /* compile.c: size of regex_comp structure */ 65 #endif /* XPG4 */ 66 67 /* 68 * Global, substitute and regular expressions. 69 * Very similar to ed, with some re extensions and 70 * confirmed substitute. 71 */ 72 global(k) 73 bool k; 74 { 75 unsigned char *gp; 76 int c; 77 line *a1; 78 unsigned char globuf[GBSIZE], *Cwas; 79 int nlines = lineDOL(); 80 int oinglobal = inglobal; 81 unsigned char *oglobp = globp; 82 char multi[MB_LEN_MAX + 1]; 83 wchar_t wc; 84 int len; 85 86 87 Cwas = Command; 88 /* 89 * States of inglobal: 90 * 0: ordinary - not in a global command. 91 * 1: text coming from some buffer, not tty. 92 * 2: like 1, but the source of the buffer is a global command. 93 * Hence you're only in a global command if inglobal==2. This 94 * strange sounding convention is historically derived from 95 * everybody simulating a global command. 96 */ 97 if (inglobal==2) 98 error(value(vi_TERSE) ? gettext("Global within global") : 99 gettext("Global within global not allowed")); 100 markDOT(); 101 setall(); 102 nonzero(); 103 if (skipend()) 104 error(value(vi_TERSE) ? gettext("Global needs re") : 105 gettext("Missing regular expression for global")); 106 c = getchar(); 107 (void)vi_compile(c, 1); 108 savere(&scanre); 109 gp = globuf; 110 while ((c = peekchar()) != '\n') { 111 if (!isascii(c)) { 112 if (c == EOF) { 113 c = '\n'; 114 ungetchar(c); 115 goto out; 116 } 117 118 mb_copy: 119 if ((len = _mbftowc(multi, &wc, getchar, &peekc)) > 0) { 120 if ((gp + len) >= &globuf[GBSIZE - 2]) 121 error(gettext("Global command too long")); 122 strncpy(gp, multi, len); 123 gp += len; 124 continue; 125 } 126 } 127 128 (void) getchar(); 129 switch (c) { 130 131 case EOF: 132 c = '\n'; 133 ungetchar(c); 134 goto out; 135 136 case '\\': 137 c = peekchar(); 138 if (!isascii(c)) { 139 *gp++ = '\\'; 140 goto mb_copy; 141 } 142 143 (void) getchar(); 144 switch (c) { 145 146 case '\\': 147 ungetchar(c); 148 break; 149 150 case '\n': 151 break; 152 153 default: 154 *gp++ = '\\'; 155 break; 156 } 157 break; 158 } 159 *gp++ = c; 160 if (gp >= &globuf[GBSIZE - 2]) 161 error(gettext("Global command too long")); 162 } 163 164 out: 165 donewline(); 166 *gp++ = c; 167 *gp++ = 0; 168 saveall(); 169 inglobal = 2; 170 for (a1 = one; a1 <= dol; a1++) { 171 *a1 &= ~01; 172 if (a1 >= addr1 && a1 <= addr2 && execute(0, a1) == k) 173 *a1 |= 01; 174 } 175 #ifdef notdef 176 /* 177 * This code is commented out for now. The problem is that we don't 178 * fix up the undo area the way we should. Basically, I think what has 179 * to be done is to copy the undo area down (since we shrunk everything) 180 * and move the various pointers into it down too. I will do this later 181 * when I have time. (Mark, 10-20-80) 182 */ 183 /* 184 * Special case: g/.../d (avoid n^2 algorithm) 185 */ 186 if (globuf[0]=='d' && globuf[1]=='\n' && globuf[2]=='\0') { 187 gdelete(); 188 return; 189 } 190 #endif 191 if (inopen) 192 inopen = -1; 193 /* 194 * Now for each marked line, set dot there and do the commands. 195 * Note the n^2 behavior here for lots of lines matching. 196 * This is really needed: in some cases you could delete lines, 197 * causing a marked line to be moved before a1 and missed if 198 * we didn't restart at zero each time. 199 */ 200 for (a1 = one; a1 <= dol; a1++) { 201 if (*a1 & 01) { 202 *a1 &= ~01; 203 dot = a1; 204 globp = globuf; 205 commands(1, 1); 206 a1 = zero; 207 } 208 } 209 globp = oglobp; 210 inglobal = oinglobal; 211 endline = 1; 212 Command = Cwas; 213 netchHAD(nlines); 214 setlastchar(EOF); 215 if (inopen) { 216 ungetchar(EOF); 217 inopen = 1; 218 } 219 } 220 221 /* 222 * gdelete: delete inside a global command. Handles the 223 * special case g/r.e./d. All lines to be deleted have 224 * already been marked. Squeeze the remaining lines together. 225 * Note that other cases such as g/r.e./p, g/r.e./s/r.e.2/rhs/, 226 * and g/r.e./.,/r.e.2/d are not treated specially. There is no 227 * good reason for this except the question: where to you draw the line? 228 */ 229 gdelete() 230 { 231 line *a1, *a2, *a3; 232 233 a3 = dol; 234 /* find first marked line. can skip all before it */ 235 for (a1=zero; (*a1&01)==0; a1++) 236 if (a1>=a3) 237 return; 238 /* copy down unmarked lines, compacting as we go. */ 239 for (a2=a1+1; a2<=a3;) { 240 if (*a2&01) { 241 a2++; /* line is marked, skip it */ 242 dot = a1; /* dot left after line deletion */ 243 } else 244 *a1++ = *a2++; /* unmarked, copy it */ 245 } 246 dol = a1-1; 247 if (dot>dol) 248 dot = dol; 249 change(); 250 } 251 252 bool cflag; 253 int scount, slines, stotal; 254 255 substitute(c) 256 int c; 257 { 258 line *addr; 259 int n; 260 int gsubf, hopcount; 261 262 gsubf = compsub(c); 263 if(FIXUNDO) 264 save12(), undkind = UNDCHANGE; 265 stotal = 0; 266 slines = 0; 267 for (addr = addr1; addr <= addr2; addr++) { 268 scount = hopcount = 0; 269 if (dosubcon(0, addr) == 0) 270 continue; 271 if (gsubf) { 272 /* 273 * The loop can happen from s/\</&/g 274 * but we don't want to break other, reasonable cases. 275 */ 276 hopcount = 0; 277 while (*loc2) { 278 if (++hopcount > sizeof linebuf) 279 error(gettext("substitution loop")); 280 if (dosubcon(1, addr) == 0) 281 break; 282 } 283 } 284 if (scount) { 285 stotal += scount; 286 slines++; 287 putmark(addr); 288 n = append(getsub, addr); 289 addr += n; 290 addr2 += n; 291 } 292 } 293 if (stotal == 0 && !inglobal && !cflag) 294 error(value(vi_TERSE) ? gettext("Fail") : 295 gettext("Substitute pattern match failed")); 296 snote(stotal, slines); 297 return (stotal); 298 } 299 300 compsub(ch) 301 { 302 int seof, c, uselastre; 303 static int gsubf; 304 static unsigned char remem[RHSSIZE]; 305 static int remflg = -1; 306 307 if (!value(vi_EDCOMPATIBLE)) 308 gsubf = cflag = 0; 309 uselastre = 0; 310 switch (ch) { 311 312 case 's': 313 (void)skipwh(); 314 seof = getchar(); 315 if (endcmd(seof) || any(seof, "gcr")) { 316 ungetchar(seof); 317 goto redo; 318 } 319 if (isalpha(seof) || isdigit(seof)) 320 error(value(vi_TERSE) ? gettext("Substitute needs re") : 321 gettext("Missing regular expression for substitute")); 322 seof = vi_compile(seof, 1); 323 uselastre = 1; 324 comprhs(seof); 325 gsubf = cflag = 0; 326 break; 327 328 case '~': 329 uselastre = 1; 330 /* fall into ... */ 331 case '&': 332 redo: 333 if (re == NULL || re->Expbuf[1] == 0) 334 error(value(vi_TERSE) ? gettext("No previous re") : 335 gettext("No previous regular expression")); 336 if (subre == NULL || subre->Expbuf[1] == 0) 337 error(value(vi_TERSE) ? gettext("No previous substitute re") : 338 gettext("No previous substitute to repeat")); 339 break; 340 } 341 for (;;) { 342 c = getchar(); 343 switch (c) { 344 345 case 'g': 346 gsubf = !gsubf; 347 continue; 348 349 case 'c': 350 cflag = !cflag; 351 continue; 352 353 case 'r': 354 uselastre = 1; 355 continue; 356 357 default: 358 ungetchar(c); 359 setcount(); 360 donewline(); 361 if (uselastre) 362 savere(&subre); 363 else 364 resre(subre); 365 366 /* 367 * The % by itself on the right hand side means 368 * that the previous value of the right hand side 369 * should be used. A -1 is used to indicate no 370 * previously remembered search string. 371 */ 372 373 if (rhsbuf[0] == '%' && rhsbuf[1] == 0) 374 if (remflg == -1) 375 error(gettext("No previously remembered string")); 376 else 377 strcpy(rhsbuf, remem); 378 else { 379 strcpy(remem, rhsbuf); 380 remflg = 1; 381 } 382 return (gsubf); 383 } 384 } 385 } 386 387 comprhs(seof) 388 int seof; 389 { 390 unsigned char *rp, *orp; 391 int c; 392 unsigned char orhsbuf[RHSSIZE]; 393 char multi[MB_LEN_MAX + 1]; 394 int len; 395 wchar_t wc; 396 397 rp = rhsbuf; 398 CP(orhsbuf, rp); 399 for (;;) { 400 c = peekchar(); 401 if (c == seof) { 402 (void) getchar(); 403 break; 404 } 405 406 if (!isascii(c) && c != EOF) { 407 if ((len = _mbftowc(multi, &wc, getchar, &peekc)) > 0) { 408 if ((rp + len) >= &rhsbuf[RHSSIZE - 1]) 409 goto toobig; 410 strncpy(rp, multi, len); 411 rp += len; 412 continue; 413 } 414 } 415 416 (void) getchar(); 417 switch (c) { 418 419 case '\\': 420 c = peekchar(); 421 if (c == EOF) { 422 (void) getchar(); 423 error(gettext("Replacement string ends with \\")); 424 } 425 426 if (!isascii(c)) { 427 *rp++ = '\\'; 428 if ((len = _mbftowc(multi, &wc, getchar, &peekc)) > 0) { 429 if ((rp + len) >= &rhsbuf[RHSSIZE - 1]) 430 goto over_flow; 431 strncpy(rp, multi, len); 432 rp += len; 433 continue; 434 } 435 } 436 437 (void) getchar(); 438 if (value(vi_MAGIC)) { 439 /* 440 * When "magic", \& turns into a plain &, 441 * and all other chars work fine quoted. 442 */ 443 if (c != '&') { 444 if(rp >= &rhsbuf[RHSSIZE - 1]) { 445 *rp=0; 446 error(value(vi_TERSE) ? 447 gettext("Replacement pattern too long") : 448 gettext("Replacement pattern too long - limit 256 characters")); 449 } 450 *rp++ = '\\'; 451 } 452 break; 453 } 454 magic: 455 if (c == '~') { 456 for (orp = orhsbuf; *orp; *rp++ = *orp++) 457 if (rp >= &rhsbuf[RHSSIZE - 1]) 458 goto toobig; 459 continue; 460 } 461 if(rp >= &rhsbuf[RHSSIZE - 1]) { 462 over_flow: 463 *rp=0; 464 error(value(vi_TERSE) ? 465 gettext("Replacement pattern too long") : 466 gettext("Replacement pattern too long - limit 256 characters")); 467 } 468 *rp++ = '\\'; 469 break; 470 471 case '\n': 472 case EOF: 473 if (!(globp && globp[0])) { 474 ungetchar(c); 475 goto endrhs; 476 } 477 478 case '~': 479 case '&': 480 if (value(vi_MAGIC)) 481 goto magic; 482 break; 483 } 484 if (rp >= &rhsbuf[RHSSIZE - 1]) { 485 toobig: 486 *rp = 0; 487 error(value(vi_TERSE) ? 488 gettext("Replacement pattern too long") : 489 gettext("Replacement pattern too long - limit 256 characters")); 490 } 491 *rp++ = c; 492 } 493 endrhs: 494 *rp++ = 0; 495 } 496 497 getsub() 498 { 499 unsigned char *p; 500 501 if ((p = linebp) == 0) 502 return (EOF); 503 strcLIN(p); 504 linebp = 0; 505 return (0); 506 } 507 508 dosubcon(f, a) 509 bool f; 510 line *a; 511 { 512 513 if (execute(f, a) == 0) 514 return (0); 515 if (confirmed(a)) { 516 dosub(); 517 scount++; 518 } 519 return (1); 520 } 521 522 confirmed(a) 523 line *a; 524 { 525 int c, cnt, ch; 526 527 if (cflag == 0) 528 return (1); 529 pofix(); 530 pline(lineno(a)); 531 if (inopen) 532 putchar('\n' | QUOTE); 533 c = lcolumn(loc1); 534 ugo(c, ' '); 535 ugo(lcolumn(loc2) - c, '^'); 536 flush(); 537 cnt = 0; 538 bkup: 539 ch = c = getkey(); 540 again: 541 if (c == '\b') { 542 if ((inopen) 543 && (cnt > 0)) { 544 putchar('\b' | QUOTE); 545 putchar(' '); 546 putchar('\b' | QUOTE), flush(); 547 cnt --; 548 } 549 goto bkup; 550 } 551 if (c == '\r') 552 c = '\n'; 553 if (inopen && MB_CUR_MAX == 1 || c < 0200) { 554 putchar(c); 555 flush(); 556 cnt++; 557 } 558 if (c != '\n' && c != EOF) { 559 c = getkey(); 560 goto again; 561 } 562 noteinp(); 563 return (ch == 'y'); 564 } 565 566 ugo(cnt, with) 567 int with; 568 int cnt; 569 { 570 571 if (cnt > 0) 572 do 573 putchar(with); 574 while (--cnt > 0); 575 } 576 577 int casecnt; 578 bool destuc; 579 580 dosub() 581 { 582 unsigned char *lp, *sp, *rp; 583 int c; 584 int len; 585 586 lp = linebuf; 587 sp = genbuf; 588 rp = rhsbuf; 589 while (lp < (unsigned char *)loc1) 590 *sp++ = *lp++; 591 casecnt = 0; 592 /* 593 * Caution: depending on the hardware, c will be either sign 594 * extended or not if C"E is set. Thus, on a VAX, c will 595 * be < 0, but on a 3B, c will be >= 128. 596 */ 597 while (c = *rp) { 598 if ((len = mblen((char *)rp, MB_CUR_MAX)) <= 0) 599 len = 1; 600 /* ^V <return> from vi to split lines */ 601 if (c == '\r') 602 c = '\n'; 603 604 if (c == '\\') { 605 rp++; 606 if ((len = mblen((char *)rp, MB_CUR_MAX)) <= 0) 607 len = 1; 608 switch (c = *rp++) { 609 610 case '&': 611 sp = place(sp, loc1, loc2); 612 if (sp == 0) 613 goto ovflo; 614 continue; 615 616 case 'l': 617 casecnt = 1; 618 destuc = 0; 619 continue; 620 621 case 'L': 622 casecnt = LBSIZE; 623 destuc = 0; 624 continue; 625 626 case 'u': 627 casecnt = 1; 628 destuc = 1; 629 continue; 630 631 case 'U': 632 casecnt = LBSIZE; 633 destuc = 1; 634 continue; 635 636 case 'E': 637 case 'e': 638 casecnt = 0; 639 continue; 640 } 641 if(re != NULL && c >= '1' && c < re->Nbra + '1') { 642 sp = place(sp, braslist[c - '1'] , braelist[c - '1']); 643 if (sp == 0) 644 goto ovflo; 645 continue; 646 } 647 rp--; 648 } 649 if (len > 1) { 650 if ((sp + len) >= &genbuf[LBSIZE]) 651 goto ovflo; 652 strncpy(sp, rp, len); 653 } else { 654 if (casecnt) 655 *sp = fixcase(c); 656 else 657 *sp = c; 658 } 659 sp += len; rp += len; 660 if (sp >= &genbuf[LBSIZE]) 661 ovflo: 662 error(value(vi_TERSE) ? gettext("Line overflow") : 663 gettext("Line overflow in substitute")); 664 } 665 lp = (unsigned char *)loc2; 666 loc2 = (char *)(linebuf + (sp - genbuf)); 667 while (*sp++ = *lp++) 668 if (sp >= &genbuf[LBSIZE]) 669 goto ovflo; 670 strcLIN(genbuf); 671 } 672 673 fixcase(c) 674 int c; 675 { 676 677 if (casecnt == 0) 678 return (c); 679 casecnt--; 680 if (destuc) { 681 if (islower(c)) 682 c = toupper(c); 683 } else 684 if (isupper(c)) 685 c = tolower(c); 686 return (c); 687 } 688 689 unsigned char * 690 place(sp, l1, l2) 691 unsigned char *sp, *l1, *l2; 692 { 693 694 while (l1 < l2) { 695 *sp++ = fixcase(*l1++); 696 if (sp >= &genbuf[LBSIZE]) 697 return (0); 698 } 699 return (sp); 700 } 701 702 snote(total, nlines) 703 int total, nlines; 704 { 705 706 if (!notable(total)) 707 return; 708 if (nlines != 1 && nlines != total) 709 printf(mesg(value(vi_TERSE) ? 710 /* 711 * TRANSLATION_NOTE 712 * Reference order of arguments must not 713 * be changed using '%digit$', since vi's 714 * printf() does not support it. 715 */ 716 gettext("%d subs on %d lines") : 717 /* 718 * TRANSLATION_NOTE 719 * Reference order of arguments must not 720 * be changed using '%digit$', since vi's 721 * printf() does not support it. 722 */ 723 gettext("%d substitutions on %d lines")), 724 total, nlines); 725 else 726 printf(mesg(value(vi_TERSE) ? 727 gettext("%d subs") : 728 gettext("%d substitutions")), 729 total); 730 noonl(); 731 flush(); 732 } 733 734 #ifdef XPG4 735 #include <regex.h> 736 737 extern int regcomp_flags; /* use to specify cflags for regcomp() */ 738 #endif /* XPG4 */ 739 740 vi_compile(eof, oknl) 741 int eof; 742 int oknl; 743 { 744 int c; 745 unsigned char *gp, *p1; 746 unsigned char *rhsp; 747 unsigned char rebuf[LBSIZE]; 748 char multi[MB_LEN_MAX + 1]; 749 int len; 750 wchar_t wc; 751 752 #ifdef XPG4 753 /* 754 * reset cflags to plain BRE 755 * if \< and/or \> is specified, REG_WORDS is set. 756 */ 757 regcomp_flags = 0; 758 #endif /* XPG4 */ 759 760 gp = genbuf; 761 if (isalpha(eof) || isdigit(eof)) 762 error(gettext("Regular expressions cannot be delimited by letters or digits")); 763 if(eof >= 0200 && MB_CUR_MAX > 1) 764 error(gettext("Regular expressions cannot be delimited by multibyte characters")); 765 c = getchar(); 766 if (eof == '\\') 767 switch (c) { 768 769 case '/': 770 case '?': 771 if (scanre == NULL || scanre->Expbuf[1] == 0) 772 error(value(vi_TERSE) ? gettext("No previous scan re") : 773 gettext("No previous scanning regular expression")); 774 resre(scanre); 775 return (c); 776 777 case '&': 778 if (subre == NULL || subre->Expbuf[1] == 0) 779 error(value(vi_TERSE) ? gettext("No previous substitute re") : 780 gettext("No previous substitute regular expression")); 781 resre(subre); 782 return (c); 783 784 default: 785 error(value(vi_TERSE) ? gettext("Badly formed re") : 786 gettext("Regular expression \\ must be followed by / or ?")); 787 } 788 if (c == eof || c == '\n' || c == EOF) { 789 if (re == NULL || re->Expbuf[1] == 0) 790 error(value(vi_TERSE) ? gettext("No previous re") : 791 gettext("No previous regular expression")); 792 if (c == '\n' && oknl == 0) 793 error(value(vi_TERSE) ? gettext("Missing closing delimiter") : 794 gettext("Missing closing delimiter for regular expression")); 795 if (c != eof) 796 ungetchar(c); 797 return (eof); 798 } 799 gp = genbuf; 800 if (c == '^') { 801 *gp++ = c; 802 c = getchar(); 803 } 804 ungetchar(c); 805 for (;;) { 806 c = getchar(); 807 if (c == eof || c == EOF) { 808 if (c == EOF) 809 ungetchar(c); 810 goto out; 811 } 812 if (gp >= &genbuf[LBSIZE - 3]) 813 complex: 814 cerror(value(vi_TERSE) ? gettext("Re too complex") : 815 gettext("Regular expression too complicated")); 816 817 if (!(isascii(c) || MB_CUR_MAX == 1)) { 818 ungetchar(c); 819 if ((len = _mbftowc(multi, &wc, getchar, &peekc)) >= 1) { 820 if ((gp + len) >= &genbuf[LBSIZE - 3]) 821 goto complex; 822 strncpy(gp, multi, len); 823 gp += len; 824 continue; 825 } 826 (void) getchar(); 827 } 828 829 switch (c) { 830 831 case '\\': 832 c = getchar(); 833 if (!isascii(c)) { 834 ungetchar(c); 835 if ((len = _mbftowc(multi, &wc, getchar, &peekc)) >= 1) { 836 if ((gp + len) >= &genbuf[LBSIZE - 3]) 837 goto complex; 838 *gp++ = '\\'; 839 strncpy(gp, multi, len); 840 gp += len; 841 continue; 842 } 843 (void) getchar(); 844 } 845 846 switch (c) { 847 848 case '<': 849 case '>': 850 #ifdef XPG4 851 regcomp_flags = REG_WORDS; 852 /*FALLTHRU*/ 853 #endif /* XPG4 */ 854 case '(': 855 case ')': 856 case '{': 857 case '}': 858 case '$': 859 case '^': 860 case '\\': 861 *gp++ = '\\'; 862 *gp++ = c; 863 continue; 864 865 case 'n': 866 *gp++ = c; 867 continue; 868 } 869 if(c >= '0' && c <= '9') { 870 *gp++ = '\\'; 871 *gp++ = c; 872 continue; 873 } 874 if (value(vi_MAGIC) == 0) 875 magic: 876 switch (c) { 877 878 case '.': 879 *gp++ = '.'; 880 continue; 881 882 case '~': 883 rhsp = rhsbuf; 884 while (*rhsp) { 885 if (!isascii(*rhsp)) { 886 if ((len = mbtowc((wchar_t *)0, (char *)rhsp, MB_CUR_MAX)) > 1) { 887 if ((gp + len) >= &genbuf[LBSIZE-2]) 888 goto complex; 889 strncpy(gp, rhsp, len); 890 rhsp += len; gp += len; 891 continue; 892 } 893 } 894 len = 1; 895 if (*rhsp == '\\') { 896 c = *++rhsp; 897 if (c == '&') 898 cerror(value(vi_TERSE) ? gettext("Replacement pattern contains &") : 899 gettext("Replacement pattern contains & - cannot use in re")); 900 if (c >= '1' && c <= '9') 901 cerror(value(vi_TERSE) ? gettext("Replacement pattern contains \\d") : 902 gettext("Replacement pattern contains \\d - cannot use in re")); 903 if ((len = mbtowc((wchar_t *)0, (char *)rhsp, MB_CUR_MAX)) <= 1) { 904 len = 1; 905 if(any(c, ".\\*[$")) 906 *gp++ = '\\'; 907 } 908 } 909 910 if ((gp + len) >= &genbuf[LBSIZE-2]) 911 goto complex; 912 if (len == 1) { 913 c = *rhsp++; 914 *gp++ = (value(vi_IGNORECASE) ? tolower(c) : c); 915 } else { 916 strncpy(gp, rhsp, len); 917 gp += len; rhsp += len; 918 } 919 } 920 continue; 921 922 case '*': 923 *gp++ = '*'; 924 continue; 925 926 case '[': 927 *gp++ = '['; 928 c = getchar(); 929 if (c == '^') { 930 *gp++ = '^'; 931 c = getchar(); 932 } 933 934 do { 935 if (!isascii(c) && c != EOF) { 936 ungetchar(c); 937 if ((len = _mbftowc(multi, &wc, getchar, &peekc)) >= 1) { 938 if ((gp + len)>= &genbuf[LBSIZE-4]) 939 goto complex; 940 strncpy(gp, multi, len); 941 gp += len; 942 c = getchar(); 943 continue; 944 } 945 (void) getchar(); 946 } 947 948 if (gp >= &genbuf[LBSIZE-4]) 949 goto complex; 950 if(c == '\\' && peekchar() == ']') { 951 (void)getchar(); 952 *gp++ = '\\'; 953 *gp++ = ']'; 954 } 955 else if (c == '\n' || c == EOF) 956 cerror(gettext("Missing ]")); 957 else 958 *gp++ = (value(vi_IGNORECASE) ? tolower(c) : c); 959 c = getchar(); 960 } while(c != ']'); 961 *gp++ = ']'; 962 continue; 963 } 964 if (c == EOF) { 965 ungetchar(EOF); 966 *gp++ = '\\'; 967 *gp++ = '\\'; 968 continue; 969 } 970 if (c == '\n') 971 cerror(value(vi_TERSE) ? gettext("No newlines in re's") : 972 gettext("Can't escape newlines into regular expressions")); 973 *gp++ = '\\'; 974 *gp++ = (value(vi_IGNORECASE) ? tolower(c) : c); 975 continue; 976 977 case '\n': 978 if (oknl) { 979 ungetchar(c); 980 goto out; 981 } 982 cerror(value(vi_TERSE) ? gettext("Badly formed re") : 983 gettext("Missing closing delimiter for regular expression")); 984 985 case '.': 986 case '~': 987 case '*': 988 case '[': 989 if (value(vi_MAGIC)) 990 goto magic; 991 if(c != '~') 992 *gp++ = '\\'; 993 defchar: 994 default: 995 *gp++ = (value(vi_IGNORECASE) ? tolower(c) : c); 996 continue; 997 } 998 } 999 out: 1000 *gp++ = '\0'; 1001 1002 #ifdef XPG4 1003 /* see if our compiled RE's will fit in the re structure: */ 1004 if (regexc_size > EXPSIZ) { 1005 /* 1006 * this should never happen. but it's critical that we 1007 * check here, otherwise .bss would get overwritten. 1008 */ 1009 cerror(value(vi_TERSE) ? gettext("RE's can't fit") : 1010 gettext("Regular expressions can't fit")); 1011 return(eof); 1012 } 1013 1014 /* 1015 * We create re each time we need it. 1016 */ 1017 1018 if (re == NULL || re == scanre || re == subre) { 1019 if ((re = calloc(1, sizeof(struct regexp))) == NULL) { 1020 error(gettext("out of memory")); 1021 exit(errcnt); 1022 } 1023 } else { 1024 regex_comp_free(&re->Expbuf); 1025 memset(re, 0, sizeof(struct regexp)); 1026 } 1027 1028 compile((char *) genbuf, (char *) re->Expbuf, (char *) re->Expbuf 1029 + regexc_size); 1030 #else /* !XPG4 */ 1031 (void) _compile((const char *)genbuf, (char *)re->Expbuf, 1032 (char *)(re->Expbuf + sizeof (re->Expbuf)), 1); 1033 #endif /* XPG4 */ 1034 1035 if(regerrno) 1036 switch(regerrno) { 1037 1038 case 42: 1039 cerror(gettext("\\( \\) Imbalance")); 1040 case 43: 1041 cerror(value(vi_TERSE) ? gettext("Awash in \\('s!") : 1042 gettext("Too many \\('d subexpressions in a regular expression")); 1043 case 50: 1044 goto complex; 1045 case 67: 1046 cerror(value(vi_TERSE) ? gettext("Illegal byte sequence") : 1047 gettext("Regular expression has illegal byte sequence")); 1048 } 1049 re->Nbra = nbra; 1050 return(eof); 1051 } 1052 1053 cerror(s) 1054 unsigned char *s; 1055 { 1056 if (re) { 1057 re->Expbuf[0] = re->Expbuf[1] = 0; 1058 } 1059 error(s); 1060 } 1061 1062 execute(gf, addr) 1063 line *addr; 1064 { 1065 unsigned char *p1, *p2; 1066 char *start; 1067 int c, i; 1068 int ret; 1069 int len; 1070 1071 if (gf) { 1072 if (re == NULL || re->Expbuf[0]) 1073 return (0); 1074 if(value(vi_IGNORECASE)) { 1075 p1 = genbuf; 1076 p2 = (unsigned char *)loc2; 1077 while(c = *p2) { 1078 if ((len = mblen((char *)p2, MB_CUR_MAX)) <= 0) 1079 len = 1; 1080 if (len == 1) { 1081 *p1++ = tolower(c); 1082 p2++; 1083 continue; 1084 } 1085 strncpy(p1, p2, len); 1086 p1 += len; p2 += len; 1087 } 1088 *p1 = '\0'; 1089 locs = (char *)genbuf; 1090 p1 = genbuf; 1091 start = loc2; 1092 } else { 1093 p1 = (unsigned char *)loc2; 1094 locs = loc2; 1095 } 1096 } else { 1097 if (addr == zero) 1098 return (0); 1099 p1 = linebuf; 1100 getline(*addr); 1101 if(value(vi_IGNORECASE)) { 1102 p1 = genbuf; 1103 p2 = linebuf; 1104 while(c = *p2) { 1105 if ((len = mblen((char *)p2, MB_CUR_MAX)) <= 0) 1106 len = 1; 1107 if (len == 1) { 1108 *p1++ = tolower(c); 1109 p2++; 1110 continue; 1111 } 1112 strncpy(p1, p2, len); 1113 p1 += len; p2 += len; 1114 } 1115 *p1 = '\0'; 1116 p1 = genbuf; 1117 start = (char *)linebuf; 1118 } 1119 locs = (char *)0; 1120 } 1121 1122 ret = step((char *)p1, (char *)re->Expbuf); 1123 1124 if(value(vi_IGNORECASE) && ret) { 1125 loc1 = start + (loc1 - (char *)genbuf); 1126 loc2 = start + (loc2 - (char *)genbuf); 1127 for(i = 0; i < NBRA; i++) { 1128 braslist[i] = start + (braslist[i] - (char *)genbuf); 1129 braelist[i] = start + (braelist[i] - (char *)genbuf); 1130 } 1131 } 1132 return ret; 1133 } 1134 1135 /* 1136 * Initialize the compiled regular-expression storage areas (called from 1137 * main()). 1138 */ 1139 1140 void init_re (void) 1141 { 1142 #ifdef XPG4 1143 re = scanre = subre = NULL; 1144 #else /* !XPG4 */ 1145 if ((re = calloc(1, sizeof(struct regexp))) == NULL) { 1146 error(gettext("out of memory")); 1147 exit(errcnt); 1148 } 1149 1150 if ((scanre = calloc(1, sizeof(struct regexp))) == NULL) { 1151 error(gettext("out of memory")); 1152 exit(errcnt); 1153 } 1154 1155 if ((subre = calloc(1, sizeof(struct regexp))) == NULL) { 1156 error(gettext("out of memory")); 1157 exit(errcnt); 1158 } 1159 #endif /* XPG4 */ 1160 } 1161 1162 /* 1163 * Save what is in the special place re to the named alternate 1164 * location. This means freeing up what's currently in this target 1165 * location, if necessary. 1166 */ 1167 1168 void savere(struct regexp ** a) 1169 { 1170 #ifdef XPG4 1171 if (a == NULL || re == NULL) { 1172 return; 1173 } 1174 1175 if (*a == NULL) { 1176 *a = re; 1177 return; 1178 } 1179 1180 if (*a != re) { 1181 if (scanre != subre) { 1182 regex_comp_free(&((*a)->Expbuf)); 1183 free(*a); 1184 } 1185 *a = re; 1186 } 1187 #else /* !XPG4 */ 1188 memcpy(*a, re, sizeof(struct regexp)); 1189 #endif /* XPG4 */ 1190 } 1191 1192 1193 /* 1194 * Restore what is in the named alternate location to the special place 1195 * re. This means first freeing up what's currently in re, if necessary. 1196 */ 1197 1198 void resre(struct regexp * a) 1199 { 1200 #ifdef XPG4 1201 if (a == NULL) { 1202 return; 1203 } 1204 1205 if (re == NULL) { 1206 re = a; 1207 return; 1208 } 1209 1210 if (a != re) { 1211 if ((re != scanre) && (re != subre)) { 1212 regex_comp_free(&re->Expbuf); 1213 free(re); 1214 } 1215 1216 re = a; 1217 } 1218 #else /* !XPG4 */ 1219 memcpy(re, a, sizeof(struct regexp)); 1220 #endif /* XPG4 */ 1221 } 1222