1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
28
29
30 /* Copyright (c) 1981 Regents of the University of California */
31
32 #include "ex.h"
33 #include "ex_re.h"
34
35 /* from libgen */
36 char *_compile(const char *, char *, char *, int);
37
38 /*
39 * The compiled-regular-expression storage areas (re, scanre, and subre)
40 * have been changed into dynamically allocated memory areas, in both the
41 * Solaris and XPG4 versions.
42 *
43 * In the Solaris version, which uses the original libgen(3g) compile()
44 * and step() calls, these areas are allocated once, and then data are
45 * copied between them subsequently, as they were in the original
46 * implementation. This is possible because the compiled information is
47 * a self-contained block of bits.
48 *
49 * In the XPG4 version, the expr:compile.o object is linked in as a
50 * simulation of these functions using the new regcomp() and regexec()
51 * functions. The problem here is that the resulting
52 * compiled-regular-expression data contain pointers to other data, which
53 * need to be freed, but only when we are quite sure that we are done
54 * with them - and certainly not before. There was an earlier attempt to
55 * handle these differences, but that effort was flawed.
56 */
57
58 extern int getchar();
59 #ifdef XPG4
60 void regex_comp_free(void *);
61 extern size_t regexc_size; /* compile.c: size of regex_comp structure */
62 #endif /* XPG4 */
63
64 /*
65 * Global, substitute and regular expressions.
66 * Very similar to ed, with some re extensions and
67 * confirmed substitute.
68 */
69 void
global(k)70 global(k)
71 bool k;
72 {
73 unsigned char *gp;
74 int c;
75 line *a1;
76 unsigned char globuf[GBSIZE], *Cwas;
77 int nlines = lineDOL();
78 int oinglobal = inglobal;
79 unsigned char *oglobp = globp;
80 char multi[MB_LEN_MAX + 1];
81 wchar_t wc;
82 int len;
83
84
85 Cwas = Command;
86 /*
87 * States of inglobal:
88 * 0: ordinary - not in a global command.
89 * 1: text coming from some buffer, not tty.
90 * 2: like 1, but the source of the buffer is a global command.
91 * Hence you're only in a global command if inglobal==2. This
92 * strange sounding convention is historically derived from
93 * everybody simulating a global command.
94 */
95 if (inglobal==2)
96 error(value(vi_TERSE) ? gettext("Global within global") :
97 gettext("Global within global not allowed"));
98 markDOT();
99 setall();
100 nonzero();
101 if (skipend())
102 error(value(vi_TERSE) ? gettext("Global needs re") :
103 gettext("Missing regular expression for global"));
104 c = getchar();
105 (void)vi_compile(c, 1);
106 savere(&scanre);
107 gp = globuf;
108 while ((c = peekchar()) != '\n') {
109 if (!isascii(c)) {
110 if (c == EOF) {
111 c = '\n';
112 ungetchar(c);
113 goto out;
114 }
115
116 mb_copy:
117 if ((len = _mbftowc(multi, &wc, getchar, &peekc)) > 0) {
118 if ((gp + len) >= &globuf[GBSIZE - 2])
119 error(gettext("Global command too long"));
120 strncpy(gp, multi, len);
121 gp += len;
122 continue;
123 }
124 }
125
126 (void) getchar();
127 switch (c) {
128
129 case EOF:
130 c = '\n';
131 ungetchar(c);
132 goto out;
133
134 case '\\':
135 c = peekchar();
136 if (!isascii(c)) {
137 *gp++ = '\\';
138 goto mb_copy;
139 }
140
141 (void) getchar();
142 switch (c) {
143
144 case '\\':
145 ungetchar(c);
146 break;
147
148 case '\n':
149 break;
150
151 default:
152 *gp++ = '\\';
153 break;
154 }
155 break;
156 }
157 *gp++ = c;
158 if (gp >= &globuf[GBSIZE - 2])
159 error(gettext("Global command too long"));
160 }
161
162 out:
163 donewline();
164 *gp++ = c;
165 *gp++ = 0;
166 saveall();
167 inglobal = 2;
168 for (a1 = one; a1 <= dol; a1++) {
169 *a1 &= ~01;
170 if (a1 >= addr1 && a1 <= addr2 && execute(0, a1) == k)
171 *a1 |= 01;
172 }
173 #ifdef notdef
174 /*
175 * This code is commented out for now. The problem is that we don't
176 * fix up the undo area the way we should. Basically, I think what has
177 * to be done is to copy the undo area down (since we shrunk everything)
178 * and move the various pointers into it down too. I will do this later
179 * when I have time. (Mark, 10-20-80)
180 */
181 /*
182 * Special case: g/.../d (avoid n^2 algorithm)
183 */
184 if (globuf[0]=='d' && globuf[1]=='\n' && globuf[2]=='\0') {
185 gdelete();
186 return;
187 }
188 #endif
189 if (inopen)
190 inopen = -1;
191 /*
192 * Now for each marked line, set dot there and do the commands.
193 * Note the n^2 behavior here for lots of lines matching.
194 * This is really needed: in some cases you could delete lines,
195 * causing a marked line to be moved before a1 and missed if
196 * we didn't restart at zero each time.
197 */
198 for (a1 = one; a1 <= dol; a1++) {
199 if (*a1 & 01) {
200 *a1 &= ~01;
201 dot = a1;
202 globp = globuf;
203 commands(1, 1);
204 a1 = zero;
205 }
206 }
207 globp = oglobp;
208 inglobal = oinglobal;
209 endline = 1;
210 Command = Cwas;
211 netchHAD(nlines);
212 setlastchar(EOF);
213 if (inopen) {
214 ungetchar(EOF);
215 inopen = 1;
216 }
217 }
218
219 /*
220 * gdelete: delete inside a global command. Handles the
221 * special case g/r.e./d. All lines to be deleted have
222 * already been marked. Squeeze the remaining lines together.
223 * Note that other cases such as g/r.e./p, g/r.e./s/r.e.2/rhs/,
224 * and g/r.e./.,/r.e.2/d are not treated specially. There is no
225 * good reason for this except the question: where to you draw the line?
226 */
227 void
gdelete(void)228 gdelete(void)
229 {
230 line *a1, *a2, *a3;
231
232 a3 = dol;
233 /* find first marked line. can skip all before it */
234 for (a1=zero; (*a1&01)==0; a1++)
235 if (a1>=a3)
236 return;
237 /* copy down unmarked lines, compacting as we go. */
238 for (a2=a1+1; a2<=a3;) {
239 if (*a2&01) {
240 a2++; /* line is marked, skip it */
241 dot = a1; /* dot left after line deletion */
242 } else
243 *a1++ = *a2++; /* unmarked, copy it */
244 }
245 dol = a1-1;
246 if (dot>dol)
247 dot = dol;
248 change();
249 }
250
251 bool cflag;
252 int scount, slines, stotal;
253
254 int
substitute(int c)255 substitute(int c)
256 {
257 line *addr;
258 int n;
259 int gsubf, hopcount;
260
261 gsubf = compsub(c);
262 if(FIXUNDO)
263 save12(), undkind = UNDCHANGE;
264 stotal = 0;
265 slines = 0;
266 for (addr = addr1; addr <= addr2; addr++) {
267 scount = hopcount = 0;
268 if (dosubcon(0, addr) == 0)
269 continue;
270 if (gsubf) {
271 /*
272 * The loop can happen from s/\</&/g
273 * but we don't want to break other, reasonable cases.
274 */
275 hopcount = 0;
276 while (*loc2) {
277 if (++hopcount > sizeof linebuf)
278 error(gettext("substitution loop"));
279 if (dosubcon(1, addr) == 0)
280 break;
281 }
282 }
283 if (scount) {
284 stotal += scount;
285 slines++;
286 putmark(addr);
287 n = append(getsub, addr);
288 addr += n;
289 addr2 += n;
290 }
291 }
292 if (stotal == 0 && !inglobal && !cflag)
293 error(value(vi_TERSE) ? gettext("Fail") :
294 gettext("Substitute pattern match failed"));
295 snote(stotal, slines);
296 return (stotal);
297 }
298
299 int
compsub(int ch)300 compsub(int ch)
301 {
302 int seof, c, uselastre;
303 static int gsubf;
304 static unsigned char remem[RHSSIZE];
305 static int remflg = -1;
306
307 if (!value(vi_EDCOMPATIBLE))
308 gsubf = cflag = 0;
309 uselastre = 0;
310 switch (ch) {
311
312 case 's':
313 (void)skipwh();
314 seof = getchar();
315 if (endcmd(seof) || any(seof, "gcr")) {
316 ungetchar(seof);
317 goto redo;
318 }
319 if (isalpha(seof) || isdigit(seof))
320 error(value(vi_TERSE) ? gettext("Substitute needs re") :
321 gettext("Missing regular expression for substitute"));
322 seof = vi_compile(seof, 1);
323 uselastre = 1;
324 comprhs(seof);
325 gsubf = cflag = 0;
326 break;
327
328 case '~':
329 uselastre = 1;
330 /* FALLTHROUGH */
331 case '&':
332 redo:
333 if (re == NULL || re->Expbuf[1] == 0)
334 error(value(vi_TERSE) ? gettext("No previous re") :
335 gettext("No previous regular expression"));
336 if (subre == NULL || subre->Expbuf[1] == 0)
337 error(value(vi_TERSE) ? gettext("No previous substitute re") :
338 gettext("No previous substitute to repeat"));
339 break;
340 }
341 for (;;) {
342 c = getchar();
343 switch (c) {
344
345 case 'g':
346 gsubf = !gsubf;
347 continue;
348
349 case 'c':
350 cflag = !cflag;
351 continue;
352
353 case 'r':
354 uselastre = 1;
355 continue;
356
357 default:
358 ungetchar(c);
359 setcount();
360 donewline();
361 if (uselastre)
362 savere(&subre);
363 else
364 resre(subre);
365
366 /*
367 * The % by itself on the right hand side means
368 * that the previous value of the right hand side
369 * should be used. A -1 is used to indicate no
370 * previously remembered search string.
371 */
372
373 if (rhsbuf[0] == '%' && rhsbuf[1] == 0)
374 if (remflg == -1)
375 error(gettext("No previously remembered string"));
376 else
377 strcpy(rhsbuf, remem);
378 else {
379 strcpy(remem, rhsbuf);
380 remflg = 1;
381 }
382 return (gsubf);
383 }
384 }
385 }
386
387 void
comprhs(int seof)388 comprhs(int seof)
389 {
390 unsigned char *rp, *orp;
391 int c;
392 unsigned char orhsbuf[RHSSIZE];
393 char multi[MB_LEN_MAX + 1];
394 int len;
395 wchar_t wc;
396
397 rp = rhsbuf;
398 CP(orhsbuf, rp);
399 for (;;) {
400 c = peekchar();
401 if (c == seof) {
402 (void) getchar();
403 break;
404 }
405
406 if (!isascii(c) && c != EOF) {
407 if ((len = _mbftowc(multi, &wc, getchar, &peekc)) > 0) {
408 if ((rp + len) >= &rhsbuf[RHSSIZE - 1])
409 goto toobig;
410 strncpy(rp, multi, len);
411 rp += len;
412 continue;
413 }
414 }
415
416 (void) getchar();
417 switch (c) {
418
419 case '\\':
420 c = peekchar();
421 if (c == EOF) {
422 (void) getchar();
423 error(gettext("Replacement string ends with \\"));
424 }
425
426 if (!isascii(c)) {
427 *rp++ = '\\';
428 if ((len = _mbftowc(multi, &wc, getchar, &peekc)) > 0) {
429 if ((rp + len) >= &rhsbuf[RHSSIZE - 1])
430 goto over_flow;
431 strncpy(rp, multi, len);
432 rp += len;
433 continue;
434 }
435 }
436
437 (void) getchar();
438 if (value(vi_MAGIC)) {
439 /*
440 * When "magic", \& turns into a plain &,
441 * and all other chars work fine quoted.
442 */
443 if (c != '&') {
444 if(rp >= &rhsbuf[RHSSIZE - 1]) {
445 *rp=0;
446 error(value(vi_TERSE) ?
447 gettext("Replacement pattern too long") :
448 gettext("Replacement pattern too long - limit 256 characters"));
449 }
450 *rp++ = '\\';
451 }
452 break;
453 }
454 magic:
455 if (c == '~') {
456 for (orp = orhsbuf; *orp; *rp++ = *orp++)
457 if (rp >= &rhsbuf[RHSSIZE - 1])
458 goto toobig;
459 continue;
460 }
461 if(rp >= &rhsbuf[RHSSIZE - 1]) {
462 over_flow:
463 *rp=0;
464 error(value(vi_TERSE) ?
465 gettext("Replacement pattern too long") :
466 gettext("Replacement pattern too long - limit 256 characters"));
467 }
468 *rp++ = '\\';
469 break;
470
471 case '\n':
472 case EOF:
473 if (!(globp && globp[0])) {
474 ungetchar(c);
475 goto endrhs;
476 }
477 /* FALLTHROUGH */
478
479 case '~':
480 case '&':
481 if (value(vi_MAGIC))
482 goto magic;
483 break;
484 }
485 if (rp >= &rhsbuf[RHSSIZE - 1]) {
486 toobig:
487 *rp = 0;
488 error(value(vi_TERSE) ?
489 gettext("Replacement pattern too long") :
490 gettext("Replacement pattern too long - limit 256 characters"));
491 }
492 *rp++ = c;
493 }
494 endrhs:
495 *rp++ = 0;
496 }
497
498 int
getsub(void)499 getsub(void)
500 {
501 unsigned char *p;
502
503 if ((p = linebp) == 0)
504 return (EOF);
505 strcLIN(p);
506 linebp = 0;
507 return (0);
508 }
509
510 int
dosubcon(bool f,line * a)511 dosubcon(bool f, line *a)
512 {
513
514 if (execute(f, a) == 0)
515 return (0);
516 if (confirmed(a)) {
517 dosub();
518 scount++;
519 }
520 return (1);
521 }
522
523 int
confirmed(line * a)524 confirmed(line *a)
525 {
526 int c, cnt, ch;
527
528 if (cflag == 0)
529 return (1);
530 pofix();
531 pline(lineno(a));
532 if (inopen)
533 putchar('\n' | QUOTE);
534 c = lcolumn(loc1);
535 ugo(c, ' ');
536 ugo(lcolumn(loc2) - c, '^');
537 flush();
538 cnt = 0;
539 bkup:
540 ch = c = getkey();
541 again:
542 if (c == '\b') {
543 if ((inopen)
544 && (cnt > 0)) {
545 putchar('\b' | QUOTE);
546 putchar(' ');
547 putchar('\b' | QUOTE), flush();
548 cnt --;
549 }
550 goto bkup;
551 }
552 if (c == '\r')
553 c = '\n';
554 if (inopen && MB_CUR_MAX == 1 || c < 0200) {
555 putchar(c);
556 flush();
557 cnt++;
558 }
559 if (c != '\n' && c != EOF) {
560 c = getkey();
561 goto again;
562 }
563 noteinp();
564 return (ch == 'y');
565 }
566
567 void
ugo(int cnt,int with)568 ugo(int cnt, int with)
569 {
570
571 if (cnt > 0)
572 do
573 putchar(with);
574 while (--cnt > 0);
575 }
576
577 int casecnt;
578 bool destuc;
579
580 void
dosub(void)581 dosub(void)
582 {
583 unsigned char *lp, *sp, *rp;
584 int c;
585 int len;
586
587 lp = linebuf;
588 sp = genbuf;
589 rp = rhsbuf;
590 while (lp < (unsigned char *)loc1)
591 *sp++ = *lp++;
592 casecnt = 0;
593 /*
594 * Caution: depending on the hardware, c will be either sign
595 * extended or not if C"E is set. Thus, on a VAX, c will
596 * be < 0, but on a 3B, c will be >= 128.
597 */
598 while (c = *rp) {
599 if ((len = mblen((char *)rp, MB_CUR_MAX)) <= 0)
600 len = 1;
601 /* ^V <return> from vi to split lines */
602 if (c == '\r')
603 c = '\n';
604
605 if (c == '\\') {
606 rp++;
607 if ((len = mblen((char *)rp, MB_CUR_MAX)) <= 0)
608 len = 1;
609 switch (c = *rp++) {
610
611 case '&':
612 sp = place(sp, loc1, loc2);
613 if (sp == 0)
614 goto ovflo;
615 continue;
616
617 case 'l':
618 casecnt = 1;
619 destuc = 0;
620 continue;
621
622 case 'L':
623 casecnt = LBSIZE;
624 destuc = 0;
625 continue;
626
627 case 'u':
628 casecnt = 1;
629 destuc = 1;
630 continue;
631
632 case 'U':
633 casecnt = LBSIZE;
634 destuc = 1;
635 continue;
636
637 case 'E':
638 case 'e':
639 casecnt = 0;
640 continue;
641 }
642 if(re != NULL && c >= '1' && c < re->Nbra + '1') {
643 sp = place(sp, braslist[c - '1'] , braelist[c - '1']);
644 if (sp == 0)
645 goto ovflo;
646 continue;
647 }
648 rp--;
649 }
650 if (len > 1) {
651 if ((sp + len) >= &genbuf[LBSIZE])
652 goto ovflo;
653 strncpy(sp, rp, len);
654 } else {
655 if (casecnt)
656 *sp = fixcase(c);
657 else
658 *sp = c;
659 }
660 sp += len; rp += len;
661 if (sp >= &genbuf[LBSIZE])
662 ovflo:
663 error(value(vi_TERSE) ? gettext("Line overflow") :
664 gettext("Line overflow in substitute"));
665 }
666 lp = (unsigned char *)loc2;
667 loc2 = (char *)(linebuf + (sp - genbuf));
668 while (*sp++ = *lp++)
669 if (sp >= &genbuf[LBSIZE])
670 goto ovflo;
671 strcLIN(genbuf);
672 }
673
674 int
fixcase(int c)675 fixcase(int c)
676 {
677
678 if (casecnt == 0)
679 return (c);
680 casecnt--;
681 if (destuc) {
682 if (islower(c))
683 c = toupper(c);
684 } else
685 if (isupper(c))
686 c = tolower(c);
687 return (c);
688 }
689
690 unsigned char *
place(sp,l1,l2)691 place(sp, l1, l2)
692 unsigned char *sp, *l1, *l2;
693 {
694
695 while (l1 < l2) {
696 *sp++ = fixcase(*l1++);
697 if (sp >= &genbuf[LBSIZE])
698 return (0);
699 }
700 return (sp);
701 }
702
703 void
snote(int total,int nlines)704 snote(int total, int nlines)
705 {
706
707 if (!notable(total))
708 return;
709 if (nlines != 1 && nlines != total)
710 viprintf(mesg(value(vi_TERSE) ?
711 /*
712 * TRANSLATION_NOTE
713 * Reference order of arguments must not
714 * be changed using '%digit$', since vi's
715 * viprintf() does not support it.
716 */
717 gettext("%d subs on %d lines") :
718 /*
719 * TRANSLATION_NOTE
720 * Reference order of arguments must not
721 * be changed using '%digit$', since vi's
722 * viprintf() does not support it.
723 */
724 gettext("%d substitutions on %d lines")),
725 total, nlines);
726 else
727 viprintf(mesg(value(vi_TERSE) ?
728 gettext("%d subs") :
729 gettext("%d substitutions")),
730 total);
731 noonl();
732 flush();
733 }
734
735 #ifdef XPG4
736 #include <regex.h>
737
738 extern int regcomp_flags; /* use to specify cflags for regcomp() */
739 #endif /* XPG4 */
740
741 int
vi_compile(int eof,int oknl)742 vi_compile(int eof, int oknl)
743 {
744 int c;
745 unsigned char *gp, *p1;
746 unsigned char *rhsp;
747 unsigned char rebuf[LBSIZE];
748 char multi[MB_LEN_MAX + 1];
749 int len;
750 wchar_t wc;
751
752 #ifdef XPG4
753 /*
754 * reset cflags to plain BRE
755 */
756 regcomp_flags = 0;
757 #endif /* XPG4 */
758
759 gp = genbuf;
760 if (isalpha(eof) || isdigit(eof))
761 error(gettext("Regular expressions cannot be delimited by letters or digits"));
762 if(eof >= 0200 && MB_CUR_MAX > 1)
763 error(gettext("Regular expressions cannot be delimited by multibyte characters"));
764 c = getchar();
765 if (eof == '\\')
766 switch (c) {
767
768 case '/':
769 case '?':
770 if (scanre == NULL || scanre->Expbuf[1] == 0)
771 error(value(vi_TERSE) ? gettext("No previous scan re") :
772 gettext("No previous scanning regular expression"));
773 resre(scanre);
774 return (c);
775
776 case '&':
777 if (subre == NULL || subre->Expbuf[1] == 0)
778 error(value(vi_TERSE) ? gettext("No previous substitute re") :
779 gettext("No previous substitute regular expression"));
780 resre(subre);
781 return (c);
782
783 default:
784 error(value(vi_TERSE) ? gettext("Badly formed re") :
785 gettext("Regular expression \\ must be followed by / or ?"));
786 }
787 if (c == eof || c == '\n' || c == EOF) {
788 if (re == NULL || re->Expbuf[1] == 0)
789 error(value(vi_TERSE) ? gettext("No previous re") :
790 gettext("No previous regular expression"));
791 if (c == '\n' && oknl == 0)
792 error(value(vi_TERSE) ? gettext("Missing closing delimiter") :
793 gettext("Missing closing delimiter for regular expression"));
794 if (c != eof)
795 ungetchar(c);
796 return (eof);
797 }
798 gp = genbuf;
799 if (c == '^') {
800 *gp++ = c;
801 c = getchar();
802 }
803 ungetchar(c);
804 for (;;) {
805 c = getchar();
806 if (c == eof || c == EOF) {
807 if (c == EOF)
808 ungetchar(c);
809 goto out;
810 }
811 if (gp >= &genbuf[LBSIZE - 3])
812 complex:
813 cerror(value(vi_TERSE) ?
814 (unsigned char *)gettext("Re too complex") :
815 (unsigned char *)
816 gettext("Regular expression too complicated"));
817
818 if (!(isascii(c) || MB_CUR_MAX == 1)) {
819 ungetchar(c);
820 if ((len = _mbftowc(multi, &wc, getchar, &peekc)) >= 1) {
821 if ((gp + len) >= &genbuf[LBSIZE - 3])
822 goto complex;
823 strncpy(gp, multi, len);
824 gp += len;
825 continue;
826 }
827 (void) getchar();
828 }
829
830 switch (c) {
831
832 case '\\':
833 c = getchar();
834 if (!isascii(c)) {
835 ungetchar(c);
836 if ((len = _mbftowc(multi, &wc, getchar, &peekc)) >= 1) {
837 if ((gp + len) >= &genbuf[LBSIZE - 3])
838 goto complex;
839 *gp++ = '\\';
840 strncpy(gp, multi, len);
841 gp += len;
842 continue;
843 }
844 (void) getchar();
845 }
846
847 switch (c) {
848
849 case '<':
850 case '>':
851 case '(':
852 case ')':
853 case '{':
854 case '}':
855 case '$':
856 case '^':
857 case '\\':
858 *gp++ = '\\';
859 *gp++ = c;
860 continue;
861
862 case 'n':
863 *gp++ = c;
864 continue;
865 }
866 if(c >= '0' && c <= '9') {
867 *gp++ = '\\';
868 *gp++ = c;
869 continue;
870 }
871 if (value(vi_MAGIC) == 0)
872 magic:
873 switch (c) {
874
875 case '.':
876 *gp++ = '.';
877 continue;
878
879 case '~':
880 rhsp = rhsbuf;
881 while (*rhsp) {
882 if (!isascii(*rhsp)) {
883 if ((len = mbtowc((wchar_t *)0, (char *)rhsp, MB_CUR_MAX)) > 1) {
884 if ((gp + len) >= &genbuf[LBSIZE-2])
885 goto complex;
886 strncpy(gp, rhsp, len);
887 rhsp += len; gp += len;
888 continue;
889 }
890 }
891 len = 1;
892 if (*rhsp == '\\') {
893 c = *++rhsp;
894 if (c == '&')
895 cerror(value(vi_TERSE) ? (unsigned char *)
896 gettext("Replacement pattern contains &") :
897 (unsigned char *)gettext("Replacement pattern contains & - cannot use in re"));
898 if (c >= '1' && c <= '9')
899 cerror(value(vi_TERSE) ? (unsigned char *)
900 gettext("Replacement pattern contains \\d") :
901 (unsigned char *)
902 gettext("Replacement pattern contains \\d - cannot use in re"));
903 if ((len = mbtowc((wchar_t *)0, (char *)rhsp, MB_CUR_MAX)) <= 1) {
904 len = 1;
905 if(any(c, ".\\*[$"))
906 *gp++ = '\\';
907 }
908 }
909
910 if ((gp + len) >= &genbuf[LBSIZE-2])
911 goto complex;
912 if (len == 1) {
913 c = *rhsp++;
914 *gp++ = (value(vi_IGNORECASE) ? tolower(c) : c);
915 } else {
916 strncpy(gp, rhsp, len);
917 gp += len; rhsp += len;
918 }
919 }
920 continue;
921
922 case '*':
923 *gp++ = '*';
924 continue;
925
926 case '[':
927 *gp++ = '[';
928 c = getchar();
929 if (c == '^') {
930 *gp++ = '^';
931 c = getchar();
932 }
933
934 do {
935 if (!isascii(c) && c != EOF) {
936 ungetchar(c);
937 if ((len = _mbftowc(multi, &wc, getchar, &peekc)) >= 1) {
938 if ((gp + len)>= &genbuf[LBSIZE-4])
939 goto complex;
940 strncpy(gp, multi, len);
941 gp += len;
942 c = getchar();
943 continue;
944 }
945 (void) getchar();
946 }
947
948 if (gp >= &genbuf[LBSIZE-4])
949 goto complex;
950 if(c == '\\' && peekchar() == ']') {
951 (void)getchar();
952 *gp++ = '\\';
953 *gp++ = ']';
954 }
955 else if (c == '\n' || c == EOF)
956 cerror((unsigned char *)
957 gettext("Missing ]"));
958 else
959 *gp++ = (value(vi_IGNORECASE) ? tolower(c) : c);
960 c = getchar();
961 } while(c != ']');
962 *gp++ = ']';
963 continue;
964 }
965 if (c == EOF) {
966 ungetchar(EOF);
967 *gp++ = '\\';
968 *gp++ = '\\';
969 continue;
970 }
971 if (c == '\n')
972 cerror(value(vi_TERSE) ? (unsigned char *)gettext("No newlines in re's") :
973 (unsigned char *)gettext("Can't escape newlines into regular expressions"));
974 *gp++ = '\\';
975 *gp++ = (value(vi_IGNORECASE) ? tolower(c) : c);
976 continue;
977
978 case '\n':
979 if (oknl) {
980 ungetchar(c);
981 goto out;
982 }
983 cerror(value(vi_TERSE) ? (unsigned char *)gettext("Badly formed re") :
984 (unsigned char *)gettext("Missing closing delimiter for regular expression"));
985 /* FALLTHROUGH */
986
987 case '.':
988 case '~':
989 case '*':
990 case '[':
991 if (value(vi_MAGIC))
992 goto magic;
993 if(c != '~')
994 *gp++ = '\\';
995 /* FALLTHROUGH */
996 defchar:
997 default:
998 *gp++ = (value(vi_IGNORECASE) ? tolower(c) : c);
999 continue;
1000 }
1001 }
1002 out:
1003 *gp++ = '\0';
1004
1005 #ifdef XPG4
1006 /* see if our compiled RE's will fit in the re structure: */
1007 if (regexc_size > EXPSIZ) {
1008 /*
1009 * this should never happen. but it's critical that we
1010 * check here, otherwise .bss would get overwritten.
1011 */
1012 cerror(value(vi_TERSE) ? (unsigned char *)
1013 gettext("RE's can't fit") :
1014 (unsigned char *)gettext("Regular expressions can't fit"));
1015 return(eof);
1016 }
1017
1018 /*
1019 * We create re each time we need it.
1020 */
1021
1022 if (re == NULL || re == scanre || re == subre) {
1023 if ((re = calloc(1, sizeof(struct regexp))) == NULL) {
1024 error(gettext("out of memory"));
1025 exit(errcnt);
1026 }
1027 } else {
1028 regex_comp_free(&re->Expbuf);
1029 memset(re, 0, sizeof(struct regexp));
1030 }
1031
1032 compile((char *) genbuf, (char *) re->Expbuf, (char *) re->Expbuf
1033 + regexc_size);
1034 #else /* !XPG4 */
1035 (void) _compile((const char *)genbuf, (char *)re->Expbuf,
1036 (char *)(re->Expbuf + sizeof (re->Expbuf)), 1);
1037 #endif /* XPG4 */
1038
1039 if(regerrno)
1040 switch(regerrno) {
1041
1042 case 42:
1043 cerror((unsigned char *)gettext("\\( \\) Imbalance"));
1044 /* FALLTHROUGH */
1045 case 43:
1046 cerror(value(vi_TERSE) ? (unsigned char *)gettext("Awash in \\('s!") :
1047 (unsigned char *)
1048 gettext("Too many \\('d subexpressions in a regular expression"));
1049 case 50:
1050 goto complex;
1051 case 67:
1052 cerror(value(vi_TERSE) ? (unsigned char *)gettext("Illegal byte sequence") :
1053 (unsigned char *)gettext("Regular expression has illegal byte sequence"));
1054 }
1055 re->Nbra = nbra;
1056 return(eof);
1057 }
1058
1059 void
cerror(unsigned char * s)1060 cerror(unsigned char *s)
1061 {
1062 if (re) {
1063 re->Expbuf[0] = re->Expbuf[1] = 0;
1064 }
1065 error(s);
1066 }
1067
1068 int
execute(int gf,line * addr)1069 execute(int gf, line *addr)
1070 {
1071 unsigned char *p1, *p2;
1072 char *start;
1073 int c, i;
1074 int ret;
1075 int len;
1076
1077 if (gf) {
1078 if (re == NULL || re->Expbuf[0])
1079 return (0);
1080 if(value(vi_IGNORECASE)) {
1081 p1 = genbuf;
1082 p2 = (unsigned char *)loc2;
1083 while(c = *p2) {
1084 if ((len = mblen((char *)p2, MB_CUR_MAX)) <= 0)
1085 len = 1;
1086 if (len == 1) {
1087 *p1++ = tolower(c);
1088 p2++;
1089 continue;
1090 }
1091 strncpy(p1, p2, len);
1092 p1 += len; p2 += len;
1093 }
1094 *p1 = '\0';
1095 locs = (char *)genbuf;
1096 p1 = genbuf;
1097 start = loc2;
1098 } else {
1099 p1 = (unsigned char *)loc2;
1100 locs = loc2;
1101 }
1102 } else {
1103 if (addr == zero)
1104 return (0);
1105 p1 = linebuf;
1106 getaline(*addr);
1107 if(value(vi_IGNORECASE)) {
1108 p1 = genbuf;
1109 p2 = linebuf;
1110 while(c = *p2) {
1111 if ((len = mblen((char *)p2, MB_CUR_MAX)) <= 0)
1112 len = 1;
1113 if (len == 1) {
1114 *p1++ = tolower(c);
1115 p2++;
1116 continue;
1117 }
1118 strncpy(p1, p2, len);
1119 p1 += len; p2 += len;
1120 }
1121 *p1 = '\0';
1122 p1 = genbuf;
1123 start = (char *)linebuf;
1124 }
1125 locs = (char *)0;
1126 }
1127
1128 ret = step((char *)p1, (char *)re->Expbuf);
1129
1130 if(value(vi_IGNORECASE) && ret) {
1131 loc1 = start + (loc1 - (char *)genbuf);
1132 loc2 = start + (loc2 - (char *)genbuf);
1133 for(i = 0; i < NBRA; i++) {
1134 braslist[i] = start + (braslist[i] - (char *)genbuf);
1135 braelist[i] = start + (braelist[i] - (char *)genbuf);
1136 }
1137 }
1138 return ret;
1139 }
1140
1141 /*
1142 * Initialize the compiled regular-expression storage areas (called from
1143 * main()).
1144 */
1145
init_re(void)1146 void init_re (void)
1147 {
1148 #ifdef XPG4
1149 re = scanre = subre = NULL;
1150 #else /* !XPG4 */
1151 if ((re = calloc(1, sizeof(struct regexp))) == NULL) {
1152 error(gettext("out of memory"));
1153 exit(errcnt);
1154 }
1155
1156 if ((scanre = calloc(1, sizeof(struct regexp))) == NULL) {
1157 error(gettext("out of memory"));
1158 exit(errcnt);
1159 }
1160
1161 if ((subre = calloc(1, sizeof(struct regexp))) == NULL) {
1162 error(gettext("out of memory"));
1163 exit(errcnt);
1164 }
1165 #endif /* XPG4 */
1166 }
1167
1168 /*
1169 * Save what is in the special place re to the named alternate
1170 * location. This means freeing up what's currently in this target
1171 * location, if necessary.
1172 */
1173
savere(struct regexp ** a)1174 void savere(struct regexp ** a)
1175 {
1176 #ifdef XPG4
1177 if (a == NULL || re == NULL) {
1178 return;
1179 }
1180
1181 if (*a == NULL) {
1182 *a = re;
1183 return;
1184 }
1185
1186 if (*a != re) {
1187 if (scanre != subre) {
1188 regex_comp_free(&((*a)->Expbuf));
1189 free(*a);
1190 }
1191 *a = re;
1192 }
1193 #else /* !XPG4 */
1194 memcpy(*a, re, sizeof(struct regexp));
1195 #endif /* XPG4 */
1196 }
1197
1198
1199 /*
1200 * Restore what is in the named alternate location to the special place
1201 * re. This means first freeing up what's currently in re, if necessary.
1202 */
1203
resre(struct regexp * a)1204 void resre(struct regexp * a)
1205 {
1206 #ifdef XPG4
1207 if (a == NULL) {
1208 return;
1209 }
1210
1211 if (re == NULL) {
1212 re = a;
1213 return;
1214 }
1215
1216 if (a != re) {
1217 if ((re != scanre) && (re != subre)) {
1218 regex_comp_free(&re->Expbuf);
1219 free(re);
1220 }
1221
1222 re = a;
1223 }
1224 #else /* !XPG4 */
1225 memcpy(re, a, sizeof(struct regexp));
1226 #endif /* XPG4 */
1227 }
1228