1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 31 /* Copyright (c) 1981 Regents of the University of California */ 32 33 #pragma ident "%Z%%M% %I% %E% SMI" 34 35 #include "ex.h" 36 #include "ex_re.h" 37 38 /* from libgen */ 39 char *_compile(const char *, char *, char *, int); 40 41 /* 42 * The compiled-regular-expression storage areas (re, scanre, and subre) 43 * have been changed into dynamically allocated memory areas, in both the 44 * Solaris and XPG4 versions. 45 * 46 * In the Solaris version, which uses the original libgen(3g) compile() 47 * and step() calls, these areas are allocated once, and then data are 48 * copied between them subsequently, as they were in the original 49 * implementation. This is possible because the compiled information is 50 * a self-contained block of bits. 51 * 52 * In the XPG4 version, the expr:compile.o object is linked in as a 53 * simulation of these functions using the new regcomp() and regexec() 54 * functions. The problem here is that the resulting 55 * compiled-regular-expression data contain pointers to other data, which 56 * need to be freed, but only when we are quite sure that we are done 57 * with them - and certainly not before. There was an earlier attempt to 58 * handle these differences, but that effort was flawed. 59 */ 60 61 extern int getchar(); 62 #ifdef XPG4 63 void regex_comp_free(void *); 64 extern size_t regexc_size; /* compile.c: size of regex_comp structure */ 65 #endif /* XPG4 */ 66 67 /* 68 * Global, substitute and regular expressions. 69 * Very similar to ed, with some re extensions and 70 * confirmed substitute. 71 */ 72 void 73 global(k) 74 bool k; 75 { 76 unsigned char *gp; 77 int c; 78 line *a1; 79 unsigned char globuf[GBSIZE], *Cwas; 80 int nlines = lineDOL(); 81 int oinglobal = inglobal; 82 unsigned char *oglobp = globp; 83 char multi[MB_LEN_MAX + 1]; 84 wchar_t wc; 85 int len; 86 87 88 Cwas = Command; 89 /* 90 * States of inglobal: 91 * 0: ordinary - not in a global command. 92 * 1: text coming from some buffer, not tty. 93 * 2: like 1, but the source of the buffer is a global command. 94 * Hence you're only in a global command if inglobal==2. This 95 * strange sounding convention is historically derived from 96 * everybody simulating a global command. 97 */ 98 if (inglobal==2) 99 error(value(vi_TERSE) ? gettext("Global within global") : 100 gettext("Global within global not allowed")); 101 markDOT(); 102 setall(); 103 nonzero(); 104 if (skipend()) 105 error(value(vi_TERSE) ? gettext("Global needs re") : 106 gettext("Missing regular expression for global")); 107 c = getchar(); 108 (void)vi_compile(c, 1); 109 savere(&scanre); 110 gp = globuf; 111 while ((c = peekchar()) != '\n') { 112 if (!isascii(c)) { 113 if (c == EOF) { 114 c = '\n'; 115 ungetchar(c); 116 goto out; 117 } 118 119 mb_copy: 120 if ((len = _mbftowc(multi, &wc, getchar, &peekc)) > 0) { 121 if ((gp + len) >= &globuf[GBSIZE - 2]) 122 error(gettext("Global command too long")); 123 strncpy(gp, multi, len); 124 gp += len; 125 continue; 126 } 127 } 128 129 (void) getchar(); 130 switch (c) { 131 132 case EOF: 133 c = '\n'; 134 ungetchar(c); 135 goto out; 136 137 case '\\': 138 c = peekchar(); 139 if (!isascii(c)) { 140 *gp++ = '\\'; 141 goto mb_copy; 142 } 143 144 (void) getchar(); 145 switch (c) { 146 147 case '\\': 148 ungetchar(c); 149 break; 150 151 case '\n': 152 break; 153 154 default: 155 *gp++ = '\\'; 156 break; 157 } 158 break; 159 } 160 *gp++ = c; 161 if (gp >= &globuf[GBSIZE - 2]) 162 error(gettext("Global command too long")); 163 } 164 165 out: 166 donewline(); 167 *gp++ = c; 168 *gp++ = 0; 169 saveall(); 170 inglobal = 2; 171 for (a1 = one; a1 <= dol; a1++) { 172 *a1 &= ~01; 173 if (a1 >= addr1 && a1 <= addr2 && execute(0, a1) == k) 174 *a1 |= 01; 175 } 176 #ifdef notdef 177 /* 178 * This code is commented out for now. The problem is that we don't 179 * fix up the undo area the way we should. Basically, I think what has 180 * to be done is to copy the undo area down (since we shrunk everything) 181 * and move the various pointers into it down too. I will do this later 182 * when I have time. (Mark, 10-20-80) 183 */ 184 /* 185 * Special case: g/.../d (avoid n^2 algorithm) 186 */ 187 if (globuf[0]=='d' && globuf[1]=='\n' && globuf[2]=='\0') { 188 gdelete(); 189 return; 190 } 191 #endif 192 if (inopen) 193 inopen = -1; 194 /* 195 * Now for each marked line, set dot there and do the commands. 196 * Note the n^2 behavior here for lots of lines matching. 197 * This is really needed: in some cases you could delete lines, 198 * causing a marked line to be moved before a1 and missed if 199 * we didn't restart at zero each time. 200 */ 201 for (a1 = one; a1 <= dol; a1++) { 202 if (*a1 & 01) { 203 *a1 &= ~01; 204 dot = a1; 205 globp = globuf; 206 commands(1, 1); 207 a1 = zero; 208 } 209 } 210 globp = oglobp; 211 inglobal = oinglobal; 212 endline = 1; 213 Command = Cwas; 214 netchHAD(nlines); 215 setlastchar(EOF); 216 if (inopen) { 217 ungetchar(EOF); 218 inopen = 1; 219 } 220 } 221 222 /* 223 * gdelete: delete inside a global command. Handles the 224 * special case g/r.e./d. All lines to be deleted have 225 * already been marked. Squeeze the remaining lines together. 226 * Note that other cases such as g/r.e./p, g/r.e./s/r.e.2/rhs/, 227 * and g/r.e./.,/r.e.2/d are not treated specially. There is no 228 * good reason for this except the question: where to you draw the line? 229 */ 230 void 231 gdelete(void) 232 { 233 line *a1, *a2, *a3; 234 235 a3 = dol; 236 /* find first marked line. can skip all before it */ 237 for (a1=zero; (*a1&01)==0; a1++) 238 if (a1>=a3) 239 return; 240 /* copy down unmarked lines, compacting as we go. */ 241 for (a2=a1+1; a2<=a3;) { 242 if (*a2&01) { 243 a2++; /* line is marked, skip it */ 244 dot = a1; /* dot left after line deletion */ 245 } else 246 *a1++ = *a2++; /* unmarked, copy it */ 247 } 248 dol = a1-1; 249 if (dot>dol) 250 dot = dol; 251 change(); 252 } 253 254 bool cflag; 255 int scount, slines, stotal; 256 257 int 258 substitute(int c) 259 { 260 line *addr; 261 int n; 262 int gsubf, hopcount; 263 264 gsubf = compsub(c); 265 if(FIXUNDO) 266 save12(), undkind = UNDCHANGE; 267 stotal = 0; 268 slines = 0; 269 for (addr = addr1; addr <= addr2; addr++) { 270 scount = hopcount = 0; 271 if (dosubcon(0, addr) == 0) 272 continue; 273 if (gsubf) { 274 /* 275 * The loop can happen from s/\</&/g 276 * but we don't want to break other, reasonable cases. 277 */ 278 hopcount = 0; 279 while (*loc2) { 280 if (++hopcount > sizeof linebuf) 281 error(gettext("substitution loop")); 282 if (dosubcon(1, addr) == 0) 283 break; 284 } 285 } 286 if (scount) { 287 stotal += scount; 288 slines++; 289 putmark(addr); 290 n = append(getsub, addr); 291 addr += n; 292 addr2 += n; 293 } 294 } 295 if (stotal == 0 && !inglobal && !cflag) 296 error(value(vi_TERSE) ? gettext("Fail") : 297 gettext("Substitute pattern match failed")); 298 snote(stotal, slines); 299 return (stotal); 300 } 301 302 int 303 compsub(int ch) 304 { 305 int seof, c, uselastre; 306 static int gsubf; 307 static unsigned char remem[RHSSIZE]; 308 static int remflg = -1; 309 310 if (!value(vi_EDCOMPATIBLE)) 311 gsubf = cflag = 0; 312 uselastre = 0; 313 switch (ch) { 314 315 case 's': 316 (void)skipwh(); 317 seof = getchar(); 318 if (endcmd(seof) || any(seof, "gcr")) { 319 ungetchar(seof); 320 goto redo; 321 } 322 if (isalpha(seof) || isdigit(seof)) 323 error(value(vi_TERSE) ? gettext("Substitute needs re") : 324 gettext("Missing regular expression for substitute")); 325 seof = vi_compile(seof, 1); 326 uselastre = 1; 327 comprhs(seof); 328 gsubf = cflag = 0; 329 break; 330 331 case '~': 332 uselastre = 1; 333 /* fall into ... */ 334 case '&': 335 redo: 336 if (re == NULL || re->Expbuf[1] == 0) 337 error(value(vi_TERSE) ? gettext("No previous re") : 338 gettext("No previous regular expression")); 339 if (subre == NULL || subre->Expbuf[1] == 0) 340 error(value(vi_TERSE) ? gettext("No previous substitute re") : 341 gettext("No previous substitute to repeat")); 342 break; 343 } 344 for (;;) { 345 c = getchar(); 346 switch (c) { 347 348 case 'g': 349 gsubf = !gsubf; 350 continue; 351 352 case 'c': 353 cflag = !cflag; 354 continue; 355 356 case 'r': 357 uselastre = 1; 358 continue; 359 360 default: 361 ungetchar(c); 362 setcount(); 363 donewline(); 364 if (uselastre) 365 savere(&subre); 366 else 367 resre(subre); 368 369 /* 370 * The % by itself on the right hand side means 371 * that the previous value of the right hand side 372 * should be used. A -1 is used to indicate no 373 * previously remembered search string. 374 */ 375 376 if (rhsbuf[0] == '%' && rhsbuf[1] == 0) 377 if (remflg == -1) 378 error(gettext("No previously remembered string")); 379 else 380 strcpy(rhsbuf, remem); 381 else { 382 strcpy(remem, rhsbuf); 383 remflg = 1; 384 } 385 return (gsubf); 386 } 387 } 388 } 389 390 void 391 comprhs(int seof) 392 { 393 unsigned char *rp, *orp; 394 int c; 395 unsigned char orhsbuf[RHSSIZE]; 396 char multi[MB_LEN_MAX + 1]; 397 int len; 398 wchar_t wc; 399 400 rp = rhsbuf; 401 CP(orhsbuf, rp); 402 for (;;) { 403 c = peekchar(); 404 if (c == seof) { 405 (void) getchar(); 406 break; 407 } 408 409 if (!isascii(c) && c != EOF) { 410 if ((len = _mbftowc(multi, &wc, getchar, &peekc)) > 0) { 411 if ((rp + len) >= &rhsbuf[RHSSIZE - 1]) 412 goto toobig; 413 strncpy(rp, multi, len); 414 rp += len; 415 continue; 416 } 417 } 418 419 (void) getchar(); 420 switch (c) { 421 422 case '\\': 423 c = peekchar(); 424 if (c == EOF) { 425 (void) getchar(); 426 error(gettext("Replacement string ends with \\")); 427 } 428 429 if (!isascii(c)) { 430 *rp++ = '\\'; 431 if ((len = _mbftowc(multi, &wc, getchar, &peekc)) > 0) { 432 if ((rp + len) >= &rhsbuf[RHSSIZE - 1]) 433 goto over_flow; 434 strncpy(rp, multi, len); 435 rp += len; 436 continue; 437 } 438 } 439 440 (void) getchar(); 441 if (value(vi_MAGIC)) { 442 /* 443 * When "magic", \& turns into a plain &, 444 * and all other chars work fine quoted. 445 */ 446 if (c != '&') { 447 if(rp >= &rhsbuf[RHSSIZE - 1]) { 448 *rp=0; 449 error(value(vi_TERSE) ? 450 gettext("Replacement pattern too long") : 451 gettext("Replacement pattern too long - limit 256 characters")); 452 } 453 *rp++ = '\\'; 454 } 455 break; 456 } 457 magic: 458 if (c == '~') { 459 for (orp = orhsbuf; *orp; *rp++ = *orp++) 460 if (rp >= &rhsbuf[RHSSIZE - 1]) 461 goto toobig; 462 continue; 463 } 464 if(rp >= &rhsbuf[RHSSIZE - 1]) { 465 over_flow: 466 *rp=0; 467 error(value(vi_TERSE) ? 468 gettext("Replacement pattern too long") : 469 gettext("Replacement pattern too long - limit 256 characters")); 470 } 471 *rp++ = '\\'; 472 break; 473 474 case '\n': 475 case EOF: 476 if (!(globp && globp[0])) { 477 ungetchar(c); 478 goto endrhs; 479 } 480 481 case '~': 482 case '&': 483 if (value(vi_MAGIC)) 484 goto magic; 485 break; 486 } 487 if (rp >= &rhsbuf[RHSSIZE - 1]) { 488 toobig: 489 *rp = 0; 490 error(value(vi_TERSE) ? 491 gettext("Replacement pattern too long") : 492 gettext("Replacement pattern too long - limit 256 characters")); 493 } 494 *rp++ = c; 495 } 496 endrhs: 497 *rp++ = 0; 498 } 499 500 int 501 getsub(void) 502 { 503 unsigned char *p; 504 505 if ((p = linebp) == 0) 506 return (EOF); 507 strcLIN(p); 508 linebp = 0; 509 return (0); 510 } 511 512 int 513 dosubcon(bool f, line *a) 514 { 515 516 if (execute(f, a) == 0) 517 return (0); 518 if (confirmed(a)) { 519 dosub(); 520 scount++; 521 } 522 return (1); 523 } 524 525 int 526 confirmed(line *a) 527 { 528 int c, cnt, ch; 529 530 if (cflag == 0) 531 return (1); 532 pofix(); 533 pline(lineno(a)); 534 if (inopen) 535 putchar('\n' | QUOTE); 536 c = lcolumn(loc1); 537 ugo(c, ' '); 538 ugo(lcolumn(loc2) - c, '^'); 539 flush(); 540 cnt = 0; 541 bkup: 542 ch = c = getkey(); 543 again: 544 if (c == '\b') { 545 if ((inopen) 546 && (cnt > 0)) { 547 putchar('\b' | QUOTE); 548 putchar(' '); 549 putchar('\b' | QUOTE), flush(); 550 cnt --; 551 } 552 goto bkup; 553 } 554 if (c == '\r') 555 c = '\n'; 556 if (inopen && MB_CUR_MAX == 1 || c < 0200) { 557 putchar(c); 558 flush(); 559 cnt++; 560 } 561 if (c != '\n' && c != EOF) { 562 c = getkey(); 563 goto again; 564 } 565 noteinp(); 566 return (ch == 'y'); 567 } 568 569 void 570 ugo(int cnt, int with) 571 { 572 573 if (cnt > 0) 574 do 575 putchar(with); 576 while (--cnt > 0); 577 } 578 579 int casecnt; 580 bool destuc; 581 582 void 583 dosub(void) 584 { 585 unsigned char *lp, *sp, *rp; 586 int c; 587 int len; 588 589 lp = linebuf; 590 sp = genbuf; 591 rp = rhsbuf; 592 while (lp < (unsigned char *)loc1) 593 *sp++ = *lp++; 594 casecnt = 0; 595 /* 596 * Caution: depending on the hardware, c will be either sign 597 * extended or not if C"E is set. Thus, on a VAX, c will 598 * be < 0, but on a 3B, c will be >= 128. 599 */ 600 while (c = *rp) { 601 if ((len = mblen((char *)rp, MB_CUR_MAX)) <= 0) 602 len = 1; 603 /* ^V <return> from vi to split lines */ 604 if (c == '\r') 605 c = '\n'; 606 607 if (c == '\\') { 608 rp++; 609 if ((len = mblen((char *)rp, MB_CUR_MAX)) <= 0) 610 len = 1; 611 switch (c = *rp++) { 612 613 case '&': 614 sp = place(sp, loc1, loc2); 615 if (sp == 0) 616 goto ovflo; 617 continue; 618 619 case 'l': 620 casecnt = 1; 621 destuc = 0; 622 continue; 623 624 case 'L': 625 casecnt = LBSIZE; 626 destuc = 0; 627 continue; 628 629 case 'u': 630 casecnt = 1; 631 destuc = 1; 632 continue; 633 634 case 'U': 635 casecnt = LBSIZE; 636 destuc = 1; 637 continue; 638 639 case 'E': 640 case 'e': 641 casecnt = 0; 642 continue; 643 } 644 if(re != NULL && c >= '1' && c < re->Nbra + '1') { 645 sp = place(sp, braslist[c - '1'] , braelist[c - '1']); 646 if (sp == 0) 647 goto ovflo; 648 continue; 649 } 650 rp--; 651 } 652 if (len > 1) { 653 if ((sp + len) >= &genbuf[LBSIZE]) 654 goto ovflo; 655 strncpy(sp, rp, len); 656 } else { 657 if (casecnt) 658 *sp = fixcase(c); 659 else 660 *sp = c; 661 } 662 sp += len; rp += len; 663 if (sp >= &genbuf[LBSIZE]) 664 ovflo: 665 error(value(vi_TERSE) ? gettext("Line overflow") : 666 gettext("Line overflow in substitute")); 667 } 668 lp = (unsigned char *)loc2; 669 loc2 = (char *)(linebuf + (sp - genbuf)); 670 while (*sp++ = *lp++) 671 if (sp >= &genbuf[LBSIZE]) 672 goto ovflo; 673 strcLIN(genbuf); 674 } 675 676 int 677 fixcase(int c) 678 { 679 680 if (casecnt == 0) 681 return (c); 682 casecnt--; 683 if (destuc) { 684 if (islower(c)) 685 c = toupper(c); 686 } else 687 if (isupper(c)) 688 c = tolower(c); 689 return (c); 690 } 691 692 unsigned char * 693 place(sp, l1, l2) 694 unsigned char *sp, *l1, *l2; 695 { 696 697 while (l1 < l2) { 698 *sp++ = fixcase(*l1++); 699 if (sp >= &genbuf[LBSIZE]) 700 return (0); 701 } 702 return (sp); 703 } 704 705 void 706 snote(int total, int nlines) 707 { 708 709 if (!notable(total)) 710 return; 711 if (nlines != 1 && nlines != total) 712 viprintf(mesg(value(vi_TERSE) ? 713 /* 714 * TRANSLATION_NOTE 715 * Reference order of arguments must not 716 * be changed using '%digit$', since vi's 717 * viprintf() does not support it. 718 */ 719 gettext("%d subs on %d lines") : 720 /* 721 * TRANSLATION_NOTE 722 * Reference order of arguments must not 723 * be changed using '%digit$', since vi's 724 * viprintf() does not support it. 725 */ 726 gettext("%d substitutions on %d lines")), 727 total, nlines); 728 else 729 viprintf(mesg(value(vi_TERSE) ? 730 gettext("%d subs") : 731 gettext("%d substitutions")), 732 total); 733 noonl(); 734 flush(); 735 } 736 737 #ifdef XPG4 738 #include <regex.h> 739 740 extern int regcomp_flags; /* use to specify cflags for regcomp() */ 741 #endif /* XPG4 */ 742 743 int 744 vi_compile(int eof, int oknl) 745 { 746 int c; 747 unsigned char *gp, *p1; 748 unsigned char *rhsp; 749 unsigned char rebuf[LBSIZE]; 750 char multi[MB_LEN_MAX + 1]; 751 int len; 752 wchar_t wc; 753 754 #ifdef XPG4 755 /* 756 * reset cflags to plain BRE 757 * if \< and/or \> is specified, REG_WORDS is set. 758 */ 759 regcomp_flags = 0; 760 #endif /* XPG4 */ 761 762 gp = genbuf; 763 if (isalpha(eof) || isdigit(eof)) 764 error(gettext("Regular expressions cannot be delimited by letters or digits")); 765 if(eof >= 0200 && MB_CUR_MAX > 1) 766 error(gettext("Regular expressions cannot be delimited by multibyte characters")); 767 c = getchar(); 768 if (eof == '\\') 769 switch (c) { 770 771 case '/': 772 case '?': 773 if (scanre == NULL || scanre->Expbuf[1] == 0) 774 error(value(vi_TERSE) ? gettext("No previous scan re") : 775 gettext("No previous scanning regular expression")); 776 resre(scanre); 777 return (c); 778 779 case '&': 780 if (subre == NULL || subre->Expbuf[1] == 0) 781 error(value(vi_TERSE) ? gettext("No previous substitute re") : 782 gettext("No previous substitute regular expression")); 783 resre(subre); 784 return (c); 785 786 default: 787 error(value(vi_TERSE) ? gettext("Badly formed re") : 788 gettext("Regular expression \\ must be followed by / or ?")); 789 } 790 if (c == eof || c == '\n' || c == EOF) { 791 if (re == NULL || re->Expbuf[1] == 0) 792 error(value(vi_TERSE) ? gettext("No previous re") : 793 gettext("No previous regular expression")); 794 if (c == '\n' && oknl == 0) 795 error(value(vi_TERSE) ? gettext("Missing closing delimiter") : 796 gettext("Missing closing delimiter for regular expression")); 797 if (c != eof) 798 ungetchar(c); 799 return (eof); 800 } 801 gp = genbuf; 802 if (c == '^') { 803 *gp++ = c; 804 c = getchar(); 805 } 806 ungetchar(c); 807 for (;;) { 808 c = getchar(); 809 if (c == eof || c == EOF) { 810 if (c == EOF) 811 ungetchar(c); 812 goto out; 813 } 814 if (gp >= &genbuf[LBSIZE - 3]) 815 complex: 816 cerror(value(vi_TERSE) ? 817 (unsigned char *)gettext("Re too complex") : 818 (unsigned char *) 819 gettext("Regular expression too complicated")); 820 821 if (!(isascii(c) || MB_CUR_MAX == 1)) { 822 ungetchar(c); 823 if ((len = _mbftowc(multi, &wc, getchar, &peekc)) >= 1) { 824 if ((gp + len) >= &genbuf[LBSIZE - 3]) 825 goto complex; 826 strncpy(gp, multi, len); 827 gp += len; 828 continue; 829 } 830 (void) getchar(); 831 } 832 833 switch (c) { 834 835 case '\\': 836 c = getchar(); 837 if (!isascii(c)) { 838 ungetchar(c); 839 if ((len = _mbftowc(multi, &wc, getchar, &peekc)) >= 1) { 840 if ((gp + len) >= &genbuf[LBSIZE - 3]) 841 goto complex; 842 *gp++ = '\\'; 843 strncpy(gp, multi, len); 844 gp += len; 845 continue; 846 } 847 (void) getchar(); 848 } 849 850 switch (c) { 851 852 case '<': 853 case '>': 854 #ifdef XPG4 855 regcomp_flags = REG_WORDS; 856 /*FALLTHRU*/ 857 #endif /* XPG4 */ 858 case '(': 859 case ')': 860 case '{': 861 case '}': 862 case '$': 863 case '^': 864 case '\\': 865 *gp++ = '\\'; 866 *gp++ = c; 867 continue; 868 869 case 'n': 870 *gp++ = c; 871 continue; 872 } 873 if(c >= '0' && c <= '9') { 874 *gp++ = '\\'; 875 *gp++ = c; 876 continue; 877 } 878 if (value(vi_MAGIC) == 0) 879 magic: 880 switch (c) { 881 882 case '.': 883 *gp++ = '.'; 884 continue; 885 886 case '~': 887 rhsp = rhsbuf; 888 while (*rhsp) { 889 if (!isascii(*rhsp)) { 890 if ((len = mbtowc((wchar_t *)0, (char *)rhsp, MB_CUR_MAX)) > 1) { 891 if ((gp + len) >= &genbuf[LBSIZE-2]) 892 goto complex; 893 strncpy(gp, rhsp, len); 894 rhsp += len; gp += len; 895 continue; 896 } 897 } 898 len = 1; 899 if (*rhsp == '\\') { 900 c = *++rhsp; 901 if (c == '&') 902 cerror(value(vi_TERSE) ? (unsigned char *) 903 gettext("Replacement pattern contains &") : 904 (unsigned char *)gettext("Replacement pattern contains & - cannot use in re")); 905 if (c >= '1' && c <= '9') 906 cerror(value(vi_TERSE) ? (unsigned char *) 907 gettext("Replacement pattern contains \\d") : 908 (unsigned char *) 909 gettext("Replacement pattern contains \\d - cannot use in re")); 910 if ((len = mbtowc((wchar_t *)0, (char *)rhsp, MB_CUR_MAX)) <= 1) { 911 len = 1; 912 if(any(c, ".\\*[$")) 913 *gp++ = '\\'; 914 } 915 } 916 917 if ((gp + len) >= &genbuf[LBSIZE-2]) 918 goto complex; 919 if (len == 1) { 920 c = *rhsp++; 921 *gp++ = (value(vi_IGNORECASE) ? tolower(c) : c); 922 } else { 923 strncpy(gp, rhsp, len); 924 gp += len; rhsp += len; 925 } 926 } 927 continue; 928 929 case '*': 930 *gp++ = '*'; 931 continue; 932 933 case '[': 934 *gp++ = '['; 935 c = getchar(); 936 if (c == '^') { 937 *gp++ = '^'; 938 c = getchar(); 939 } 940 941 do { 942 if (!isascii(c) && c != EOF) { 943 ungetchar(c); 944 if ((len = _mbftowc(multi, &wc, getchar, &peekc)) >= 1) { 945 if ((gp + len)>= &genbuf[LBSIZE-4]) 946 goto complex; 947 strncpy(gp, multi, len); 948 gp += len; 949 c = getchar(); 950 continue; 951 } 952 (void) getchar(); 953 } 954 955 if (gp >= &genbuf[LBSIZE-4]) 956 goto complex; 957 if(c == '\\' && peekchar() == ']') { 958 (void)getchar(); 959 *gp++ = '\\'; 960 *gp++ = ']'; 961 } 962 else if (c == '\n' || c == EOF) 963 cerror((unsigned char *) 964 gettext("Missing ]")); 965 else 966 *gp++ = (value(vi_IGNORECASE) ? tolower(c) : c); 967 c = getchar(); 968 } while(c != ']'); 969 *gp++ = ']'; 970 continue; 971 } 972 if (c == EOF) { 973 ungetchar(EOF); 974 *gp++ = '\\'; 975 *gp++ = '\\'; 976 continue; 977 } 978 if (c == '\n') 979 cerror(value(vi_TERSE) ? (unsigned char *)gettext("No newlines in re's") : 980 (unsigned char *)gettext("Can't escape newlines into regular expressions")); 981 *gp++ = '\\'; 982 *gp++ = (value(vi_IGNORECASE) ? tolower(c) : c); 983 continue; 984 985 case '\n': 986 if (oknl) { 987 ungetchar(c); 988 goto out; 989 } 990 cerror(value(vi_TERSE) ? (unsigned char *)gettext("Badly formed re") : 991 (unsigned char *)gettext("Missing closing delimiter for regular expression")); 992 993 case '.': 994 case '~': 995 case '*': 996 case '[': 997 if (value(vi_MAGIC)) 998 goto magic; 999 if(c != '~') 1000 *gp++ = '\\'; 1001 defchar: 1002 default: 1003 *gp++ = (value(vi_IGNORECASE) ? tolower(c) : c); 1004 continue; 1005 } 1006 } 1007 out: 1008 *gp++ = '\0'; 1009 1010 #ifdef XPG4 1011 /* see if our compiled RE's will fit in the re structure: */ 1012 if (regexc_size > EXPSIZ) { 1013 /* 1014 * this should never happen. but it's critical that we 1015 * check here, otherwise .bss would get overwritten. 1016 */ 1017 cerror(value(vi_TERSE) ? (unsigned char *) 1018 gettext("RE's can't fit") : 1019 (unsigned char *)gettext("Regular expressions can't fit")); 1020 return(eof); 1021 } 1022 1023 /* 1024 * We create re each time we need it. 1025 */ 1026 1027 if (re == NULL || re == scanre || re == subre) { 1028 if ((re = calloc(1, sizeof(struct regexp))) == NULL) { 1029 error(gettext("out of memory")); 1030 exit(errcnt); 1031 } 1032 } else { 1033 regex_comp_free(&re->Expbuf); 1034 memset(re, 0, sizeof(struct regexp)); 1035 } 1036 1037 compile((char *) genbuf, (char *) re->Expbuf, (char *) re->Expbuf 1038 + regexc_size); 1039 #else /* !XPG4 */ 1040 (void) _compile((const char *)genbuf, (char *)re->Expbuf, 1041 (char *)(re->Expbuf + sizeof (re->Expbuf)), 1); 1042 #endif /* XPG4 */ 1043 1044 if(regerrno) 1045 switch(regerrno) { 1046 1047 case 42: 1048 cerror((unsigned char *)gettext("\\( \\) Imbalance")); 1049 case 43: 1050 cerror(value(vi_TERSE) ? (unsigned char *)gettext("Awash in \\('s!") : 1051 (unsigned char *) 1052 gettext("Too many \\('d subexpressions in a regular expression")); 1053 case 50: 1054 goto complex; 1055 case 67: 1056 cerror(value(vi_TERSE) ? (unsigned char *)gettext("Illegal byte sequence") : 1057 (unsigned char *)gettext("Regular expression has illegal byte sequence")); 1058 } 1059 re->Nbra = nbra; 1060 return(eof); 1061 } 1062 1063 void 1064 cerror(unsigned char *s) 1065 { 1066 if (re) { 1067 re->Expbuf[0] = re->Expbuf[1] = 0; 1068 } 1069 error(s); 1070 } 1071 1072 int 1073 execute(int gf, line *addr) 1074 { 1075 unsigned char *p1, *p2; 1076 char *start; 1077 int c, i; 1078 int ret; 1079 int len; 1080 1081 if (gf) { 1082 if (re == NULL || re->Expbuf[0]) 1083 return (0); 1084 if(value(vi_IGNORECASE)) { 1085 p1 = genbuf; 1086 p2 = (unsigned char *)loc2; 1087 while(c = *p2) { 1088 if ((len = mblen((char *)p2, MB_CUR_MAX)) <= 0) 1089 len = 1; 1090 if (len == 1) { 1091 *p1++ = tolower(c); 1092 p2++; 1093 continue; 1094 } 1095 strncpy(p1, p2, len); 1096 p1 += len; p2 += len; 1097 } 1098 *p1 = '\0'; 1099 locs = (char *)genbuf; 1100 p1 = genbuf; 1101 start = loc2; 1102 } else { 1103 p1 = (unsigned char *)loc2; 1104 locs = loc2; 1105 } 1106 } else { 1107 if (addr == zero) 1108 return (0); 1109 p1 = linebuf; 1110 getline(*addr); 1111 if(value(vi_IGNORECASE)) { 1112 p1 = genbuf; 1113 p2 = linebuf; 1114 while(c = *p2) { 1115 if ((len = mblen((char *)p2, MB_CUR_MAX)) <= 0) 1116 len = 1; 1117 if (len == 1) { 1118 *p1++ = tolower(c); 1119 p2++; 1120 continue; 1121 } 1122 strncpy(p1, p2, len); 1123 p1 += len; p2 += len; 1124 } 1125 *p1 = '\0'; 1126 p1 = genbuf; 1127 start = (char *)linebuf; 1128 } 1129 locs = (char *)0; 1130 } 1131 1132 ret = step((char *)p1, (char *)re->Expbuf); 1133 1134 if(value(vi_IGNORECASE) && ret) { 1135 loc1 = start + (loc1 - (char *)genbuf); 1136 loc2 = start + (loc2 - (char *)genbuf); 1137 for(i = 0; i < NBRA; i++) { 1138 braslist[i] = start + (braslist[i] - (char *)genbuf); 1139 braelist[i] = start + (braelist[i] - (char *)genbuf); 1140 } 1141 } 1142 return ret; 1143 } 1144 1145 /* 1146 * Initialize the compiled regular-expression storage areas (called from 1147 * main()). 1148 */ 1149 1150 void init_re (void) 1151 { 1152 #ifdef XPG4 1153 re = scanre = subre = NULL; 1154 #else /* !XPG4 */ 1155 if ((re = calloc(1, sizeof(struct regexp))) == NULL) { 1156 error(gettext("out of memory")); 1157 exit(errcnt); 1158 } 1159 1160 if ((scanre = calloc(1, sizeof(struct regexp))) == NULL) { 1161 error(gettext("out of memory")); 1162 exit(errcnt); 1163 } 1164 1165 if ((subre = calloc(1, sizeof(struct regexp))) == NULL) { 1166 error(gettext("out of memory")); 1167 exit(errcnt); 1168 } 1169 #endif /* XPG4 */ 1170 } 1171 1172 /* 1173 * Save what is in the special place re to the named alternate 1174 * location. This means freeing up what's currently in this target 1175 * location, if necessary. 1176 */ 1177 1178 void savere(struct regexp ** a) 1179 { 1180 #ifdef XPG4 1181 if (a == NULL || re == NULL) { 1182 return; 1183 } 1184 1185 if (*a == NULL) { 1186 *a = re; 1187 return; 1188 } 1189 1190 if (*a != re) { 1191 if (scanre != subre) { 1192 regex_comp_free(&((*a)->Expbuf)); 1193 free(*a); 1194 } 1195 *a = re; 1196 } 1197 #else /* !XPG4 */ 1198 memcpy(*a, re, sizeof(struct regexp)); 1199 #endif /* XPG4 */ 1200 } 1201 1202 1203 /* 1204 * Restore what is in the named alternate location to the special place 1205 * re. This means first freeing up what's currently in re, if necessary. 1206 */ 1207 1208 void resre(struct regexp * a) 1209 { 1210 #ifdef XPG4 1211 if (a == NULL) { 1212 return; 1213 } 1214 1215 if (re == NULL) { 1216 re = a; 1217 return; 1218 } 1219 1220 if (a != re) { 1221 if ((re != scanre) && (re != subre)) { 1222 regex_comp_free(&re->Expbuf); 1223 free(re); 1224 } 1225 1226 re = a; 1227 } 1228 #else /* !XPG4 */ 1229 memcpy(re, a, sizeof(struct regexp)); 1230 #endif /* XPG4 */ 1231 } 1232