1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
28
29
30 /* Copyright (c) 1981 Regents of the University of California */
31
32 #include "ex.h"
33 #include "ex_re.h"
34
35 /* from libgen */
36 char *_compile(const char *, char *, char *, int);
37
38 /*
39 * The compiled-regular-expression storage areas (re, scanre, and subre)
40 * have been changed into dynamically allocated memory areas, in both the
41 * Solaris and XPG4 versions.
42 *
43 * In the Solaris version, which uses the original libgen(3g) compile()
44 * and step() calls, these areas are allocated once, and then data are
45 * copied between them subsequently, as they were in the original
46 * implementation. This is possible because the compiled information is
47 * a self-contained block of bits.
48 *
49 * In the XPG4 version, the expr:compile.o object is linked in as a
50 * simulation of these functions using the new regcomp() and regexec()
51 * functions. The problem here is that the resulting
52 * compiled-regular-expression data contain pointers to other data, which
53 * need to be freed, but only when we are quite sure that we are done
54 * with them - and certainly not before. There was an earlier attempt to
55 * handle these differences, but that effort was flawed.
56 */
57
58 extern int getchar();
59 #ifdef XPG4
60 void regex_comp_free(void *);
61 extern size_t regexc_size; /* compile.c: size of regex_comp structure */
62 #endif /* XPG4 */
63
64 /*
65 * Global, substitute and regular expressions.
66 * Very similar to ed, with some re extensions and
67 * confirmed substitute.
68 */
69 void
global(k)70 global(k)
71 bool k;
72 {
73 unsigned char *gp;
74 int c;
75 line *a1;
76 unsigned char globuf[GBSIZE], *Cwas;
77 int nlines = lineDOL();
78 int oinglobal = inglobal;
79 unsigned char *oglobp = globp;
80 char multi[MB_LEN_MAX + 1];
81 wchar_t wc;
82 int len;
83
84
85 Cwas = Command;
86 /*
87 * States of inglobal:
88 * 0: ordinary - not in a global command.
89 * 1: text coming from some buffer, not tty.
90 * 2: like 1, but the source of the buffer is a global command.
91 * Hence you're only in a global command if inglobal==2. This
92 * strange sounding convention is historically derived from
93 * everybody simulating a global command.
94 */
95 if (inglobal==2)
96 error(value(vi_TERSE) ? gettext("Global within global") :
97 gettext("Global within global not allowed"));
98 markDOT();
99 setall();
100 nonzero();
101 if (skipend())
102 error(value(vi_TERSE) ? gettext("Global needs re") :
103 gettext("Missing regular expression for global"));
104 c = getchar();
105 (void)vi_compile(c, 1);
106 savere(&scanre);
107 gp = globuf;
108 while ((c = peekchar()) != '\n') {
109 if (!isascii(c)) {
110 if (c == EOF) {
111 c = '\n';
112 ungetchar(c);
113 goto out;
114 }
115
116 mb_copy:
117 if ((len = _mbftowc(multi, &wc, getchar, &peekc)) > 0) {
118 if ((gp + len) >= &globuf[GBSIZE - 2])
119 error(gettext("Global command too long"));
120 strncpy(gp, multi, len);
121 gp += len;
122 continue;
123 }
124 }
125
126 (void) getchar();
127 switch (c) {
128
129 case EOF:
130 c = '\n';
131 ungetchar(c);
132 goto out;
133
134 case '\\':
135 c = peekchar();
136 if (!isascii(c)) {
137 *gp++ = '\\';
138 goto mb_copy;
139 }
140
141 (void) getchar();
142 switch (c) {
143
144 case '\\':
145 ungetchar(c);
146 break;
147
148 case '\n':
149 break;
150
151 default:
152 *gp++ = '\\';
153 break;
154 }
155 break;
156 }
157 *gp++ = c;
158 if (gp >= &globuf[GBSIZE - 2])
159 error(gettext("Global command too long"));
160 }
161
162 out:
163 donewline();
164 *gp++ = c;
165 *gp++ = 0;
166 saveall();
167 inglobal = 2;
168 for (a1 = one; a1 <= dol; a1++) {
169 *a1 &= ~01;
170 if (a1 >= addr1 && a1 <= addr2 && execute(0, a1) == k)
171 *a1 |= 01;
172 }
173 #ifdef notdef
174 /*
175 * This code is commented out for now. The problem is that we don't
176 * fix up the undo area the way we should. Basically, I think what has
177 * to be done is to copy the undo area down (since we shrunk everything)
178 * and move the various pointers into it down too. I will do this later
179 * when I have time. (Mark, 10-20-80)
180 */
181 /*
182 * Special case: g/.../d (avoid n^2 algorithm)
183 */
184 if (globuf[0]=='d' && globuf[1]=='\n' && globuf[2]=='\0') {
185 gdelete();
186 return;
187 }
188 #endif
189 if (inopen)
190 inopen = -1;
191 /*
192 * Now for each marked line, set dot there and do the commands.
193 * Note the n^2 behavior here for lots of lines matching.
194 * This is really needed: in some cases you could delete lines,
195 * causing a marked line to be moved before a1 and missed if
196 * we didn't restart at zero each time.
197 */
198 for (a1 = one; a1 <= dol; a1++) {
199 if (*a1 & 01) {
200 *a1 &= ~01;
201 dot = a1;
202 globp = globuf;
203 commands(1, 1);
204 a1 = zero;
205 }
206 }
207 globp = oglobp;
208 inglobal = oinglobal;
209 endline = 1;
210 Command = Cwas;
211 netchHAD(nlines);
212 setlastchar(EOF);
213 if (inopen) {
214 ungetchar(EOF);
215 inopen = 1;
216 }
217 }
218
219 /*
220 * gdelete: delete inside a global command. Handles the
221 * special case g/r.e./d. All lines to be deleted have
222 * already been marked. Squeeze the remaining lines together.
223 * Note that other cases such as g/r.e./p, g/r.e./s/r.e.2/rhs/,
224 * and g/r.e./.,/r.e.2/d are not treated specially. There is no
225 * good reason for this except the question: where to you draw the line?
226 */
227 void
gdelete(void)228 gdelete(void)
229 {
230 line *a1, *a2, *a3;
231
232 a3 = dol;
233 /* find first marked line. can skip all before it */
234 for (a1=zero; (*a1&01)==0; a1++)
235 if (a1>=a3)
236 return;
237 /* copy down unmarked lines, compacting as we go. */
238 for (a2=a1+1; a2<=a3;) {
239 if (*a2&01) {
240 a2++; /* line is marked, skip it */
241 dot = a1; /* dot left after line deletion */
242 } else
243 *a1++ = *a2++; /* unmarked, copy it */
244 }
245 dol = a1-1;
246 if (dot>dol)
247 dot = dol;
248 change();
249 }
250
251 bool cflag;
252 int scount, slines, stotal;
253
254 int
substitute(int c)255 substitute(int c)
256 {
257 line *addr;
258 int n;
259 int gsubf, hopcount;
260
261 gsubf = compsub(c);
262 if(FIXUNDO)
263 save12(), undkind = UNDCHANGE;
264 stotal = 0;
265 slines = 0;
266 for (addr = addr1; addr <= addr2; addr++) {
267 scount = hopcount = 0;
268 if (dosubcon(0, addr) == 0)
269 continue;
270 if (gsubf) {
271 /*
272 * The loop can happen from s/\</&/g
273 * but we don't want to break other, reasonable cases.
274 */
275 hopcount = 0;
276 while (*loc2) {
277 if (++hopcount > sizeof linebuf)
278 error(gettext("substitution loop"));
279 if (dosubcon(1, addr) == 0)
280 break;
281 }
282 }
283 if (scount) {
284 stotal += scount;
285 slines++;
286 putmark(addr);
287 n = append(getsub, addr);
288 addr += n;
289 addr2 += n;
290 }
291 }
292 if (stotal == 0 && !inglobal && !cflag)
293 error(value(vi_TERSE) ? gettext("Fail") :
294 gettext("Substitute pattern match failed"));
295 snote(stotal, slines);
296 return (stotal);
297 }
298
299 int
compsub(int ch)300 compsub(int ch)
301 {
302 int seof, c, uselastre;
303 static int gsubf;
304 static unsigned char remem[RHSSIZE];
305 static int remflg = -1;
306
307 if (!value(vi_EDCOMPATIBLE))
308 gsubf = cflag = 0;
309 uselastre = 0;
310 switch (ch) {
311
312 case 's':
313 (void)skipwh();
314 seof = getchar();
315 if (endcmd(seof) || any(seof, "gcr")) {
316 ungetchar(seof);
317 goto redo;
318 }
319 if (isalpha(seof) || isdigit(seof))
320 error(value(vi_TERSE) ? gettext("Substitute needs re") :
321 gettext("Missing regular expression for substitute"));
322 seof = vi_compile(seof, 1);
323 uselastre = 1;
324 comprhs(seof);
325 gsubf = cflag = 0;
326 break;
327
328 case '~':
329 uselastre = 1;
330 /* fall into ... */
331 case '&':
332 redo:
333 if (re == NULL || re->Expbuf[1] == 0)
334 error(value(vi_TERSE) ? gettext("No previous re") :
335 gettext("No previous regular expression"));
336 if (subre == NULL || subre->Expbuf[1] == 0)
337 error(value(vi_TERSE) ? gettext("No previous substitute re") :
338 gettext("No previous substitute to repeat"));
339 break;
340 }
341 for (;;) {
342 c = getchar();
343 switch (c) {
344
345 case 'g':
346 gsubf = !gsubf;
347 continue;
348
349 case 'c':
350 cflag = !cflag;
351 continue;
352
353 case 'r':
354 uselastre = 1;
355 continue;
356
357 default:
358 ungetchar(c);
359 setcount();
360 donewline();
361 if (uselastre)
362 savere(&subre);
363 else
364 resre(subre);
365
366 /*
367 * The % by itself on the right hand side means
368 * that the previous value of the right hand side
369 * should be used. A -1 is used to indicate no
370 * previously remembered search string.
371 */
372
373 if (rhsbuf[0] == '%' && rhsbuf[1] == 0)
374 if (remflg == -1)
375 error(gettext("No previously remembered string"));
376 else
377 strcpy(rhsbuf, remem);
378 else {
379 strcpy(remem, rhsbuf);
380 remflg = 1;
381 }
382 return (gsubf);
383 }
384 }
385 }
386
387 void
comprhs(int seof)388 comprhs(int seof)
389 {
390 unsigned char *rp, *orp;
391 int c;
392 unsigned char orhsbuf[RHSSIZE];
393 char multi[MB_LEN_MAX + 1];
394 int len;
395 wchar_t wc;
396
397 rp = rhsbuf;
398 CP(orhsbuf, rp);
399 for (;;) {
400 c = peekchar();
401 if (c == seof) {
402 (void) getchar();
403 break;
404 }
405
406 if (!isascii(c) && c != EOF) {
407 if ((len = _mbftowc(multi, &wc, getchar, &peekc)) > 0) {
408 if ((rp + len) >= &rhsbuf[RHSSIZE - 1])
409 goto toobig;
410 strncpy(rp, multi, len);
411 rp += len;
412 continue;
413 }
414 }
415
416 (void) getchar();
417 switch (c) {
418
419 case '\\':
420 c = peekchar();
421 if (c == EOF) {
422 (void) getchar();
423 error(gettext("Replacement string ends with \\"));
424 }
425
426 if (!isascii(c)) {
427 *rp++ = '\\';
428 if ((len = _mbftowc(multi, &wc, getchar, &peekc)) > 0) {
429 if ((rp + len) >= &rhsbuf[RHSSIZE - 1])
430 goto over_flow;
431 strncpy(rp, multi, len);
432 rp += len;
433 continue;
434 }
435 }
436
437 (void) getchar();
438 if (value(vi_MAGIC)) {
439 /*
440 * When "magic", \& turns into a plain &,
441 * and all other chars work fine quoted.
442 */
443 if (c != '&') {
444 if(rp >= &rhsbuf[RHSSIZE - 1]) {
445 *rp=0;
446 error(value(vi_TERSE) ?
447 gettext("Replacement pattern too long") :
448 gettext("Replacement pattern too long - limit 256 characters"));
449 }
450 *rp++ = '\\';
451 }
452 break;
453 }
454 magic:
455 if (c == '~') {
456 for (orp = orhsbuf; *orp; *rp++ = *orp++)
457 if (rp >= &rhsbuf[RHSSIZE - 1])
458 goto toobig;
459 continue;
460 }
461 if(rp >= &rhsbuf[RHSSIZE - 1]) {
462 over_flow:
463 *rp=0;
464 error(value(vi_TERSE) ?
465 gettext("Replacement pattern too long") :
466 gettext("Replacement pattern too long - limit 256 characters"));
467 }
468 *rp++ = '\\';
469 break;
470
471 case '\n':
472 case EOF:
473 if (!(globp && globp[0])) {
474 ungetchar(c);
475 goto endrhs;
476 }
477
478 case '~':
479 case '&':
480 if (value(vi_MAGIC))
481 goto magic;
482 break;
483 }
484 if (rp >= &rhsbuf[RHSSIZE - 1]) {
485 toobig:
486 *rp = 0;
487 error(value(vi_TERSE) ?
488 gettext("Replacement pattern too long") :
489 gettext("Replacement pattern too long - limit 256 characters"));
490 }
491 *rp++ = c;
492 }
493 endrhs:
494 *rp++ = 0;
495 }
496
497 int
getsub(void)498 getsub(void)
499 {
500 unsigned char *p;
501
502 if ((p = linebp) == 0)
503 return (EOF);
504 strcLIN(p);
505 linebp = 0;
506 return (0);
507 }
508
509 int
dosubcon(bool f,line * a)510 dosubcon(bool f, line *a)
511 {
512
513 if (execute(f, a) == 0)
514 return (0);
515 if (confirmed(a)) {
516 dosub();
517 scount++;
518 }
519 return (1);
520 }
521
522 int
confirmed(line * a)523 confirmed(line *a)
524 {
525 int c, cnt, ch;
526
527 if (cflag == 0)
528 return (1);
529 pofix();
530 pline(lineno(a));
531 if (inopen)
532 putchar('\n' | QUOTE);
533 c = lcolumn(loc1);
534 ugo(c, ' ');
535 ugo(lcolumn(loc2) - c, '^');
536 flush();
537 cnt = 0;
538 bkup:
539 ch = c = getkey();
540 again:
541 if (c == '\b') {
542 if ((inopen)
543 && (cnt > 0)) {
544 putchar('\b' | QUOTE);
545 putchar(' ');
546 putchar('\b' | QUOTE), flush();
547 cnt --;
548 }
549 goto bkup;
550 }
551 if (c == '\r')
552 c = '\n';
553 if (inopen && MB_CUR_MAX == 1 || c < 0200) {
554 putchar(c);
555 flush();
556 cnt++;
557 }
558 if (c != '\n' && c != EOF) {
559 c = getkey();
560 goto again;
561 }
562 noteinp();
563 return (ch == 'y');
564 }
565
566 void
ugo(int cnt,int with)567 ugo(int cnt, int with)
568 {
569
570 if (cnt > 0)
571 do
572 putchar(with);
573 while (--cnt > 0);
574 }
575
576 int casecnt;
577 bool destuc;
578
579 void
dosub(void)580 dosub(void)
581 {
582 unsigned char *lp, *sp, *rp;
583 int c;
584 int len;
585
586 lp = linebuf;
587 sp = genbuf;
588 rp = rhsbuf;
589 while (lp < (unsigned char *)loc1)
590 *sp++ = *lp++;
591 casecnt = 0;
592 /*
593 * Caution: depending on the hardware, c will be either sign
594 * extended or not if C"E is set. Thus, on a VAX, c will
595 * be < 0, but on a 3B, c will be >= 128.
596 */
597 while (c = *rp) {
598 if ((len = mblen((char *)rp, MB_CUR_MAX)) <= 0)
599 len = 1;
600 /* ^V <return> from vi to split lines */
601 if (c == '\r')
602 c = '\n';
603
604 if (c == '\\') {
605 rp++;
606 if ((len = mblen((char *)rp, MB_CUR_MAX)) <= 0)
607 len = 1;
608 switch (c = *rp++) {
609
610 case '&':
611 sp = place(sp, loc1, loc2);
612 if (sp == 0)
613 goto ovflo;
614 continue;
615
616 case 'l':
617 casecnt = 1;
618 destuc = 0;
619 continue;
620
621 case 'L':
622 casecnt = LBSIZE;
623 destuc = 0;
624 continue;
625
626 case 'u':
627 casecnt = 1;
628 destuc = 1;
629 continue;
630
631 case 'U':
632 casecnt = LBSIZE;
633 destuc = 1;
634 continue;
635
636 case 'E':
637 case 'e':
638 casecnt = 0;
639 continue;
640 }
641 if(re != NULL && c >= '1' && c < re->Nbra + '1') {
642 sp = place(sp, braslist[c - '1'] , braelist[c - '1']);
643 if (sp == 0)
644 goto ovflo;
645 continue;
646 }
647 rp--;
648 }
649 if (len > 1) {
650 if ((sp + len) >= &genbuf[LBSIZE])
651 goto ovflo;
652 strncpy(sp, rp, len);
653 } else {
654 if (casecnt)
655 *sp = fixcase(c);
656 else
657 *sp = c;
658 }
659 sp += len; rp += len;
660 if (sp >= &genbuf[LBSIZE])
661 ovflo:
662 error(value(vi_TERSE) ? gettext("Line overflow") :
663 gettext("Line overflow in substitute"));
664 }
665 lp = (unsigned char *)loc2;
666 loc2 = (char *)(linebuf + (sp - genbuf));
667 while (*sp++ = *lp++)
668 if (sp >= &genbuf[LBSIZE])
669 goto ovflo;
670 strcLIN(genbuf);
671 }
672
673 int
fixcase(int c)674 fixcase(int c)
675 {
676
677 if (casecnt == 0)
678 return (c);
679 casecnt--;
680 if (destuc) {
681 if (islower(c))
682 c = toupper(c);
683 } else
684 if (isupper(c))
685 c = tolower(c);
686 return (c);
687 }
688
689 unsigned char *
place(sp,l1,l2)690 place(sp, l1, l2)
691 unsigned char *sp, *l1, *l2;
692 {
693
694 while (l1 < l2) {
695 *sp++ = fixcase(*l1++);
696 if (sp >= &genbuf[LBSIZE])
697 return (0);
698 }
699 return (sp);
700 }
701
702 void
snote(int total,int nlines)703 snote(int total, int nlines)
704 {
705
706 if (!notable(total))
707 return;
708 if (nlines != 1 && nlines != total)
709 viprintf(mesg(value(vi_TERSE) ?
710 /*
711 * TRANSLATION_NOTE
712 * Reference order of arguments must not
713 * be changed using '%digit$', since vi's
714 * viprintf() does not support it.
715 */
716 gettext("%d subs on %d lines") :
717 /*
718 * TRANSLATION_NOTE
719 * Reference order of arguments must not
720 * be changed using '%digit$', since vi's
721 * viprintf() does not support it.
722 */
723 gettext("%d substitutions on %d lines")),
724 total, nlines);
725 else
726 viprintf(mesg(value(vi_TERSE) ?
727 gettext("%d subs") :
728 gettext("%d substitutions")),
729 total);
730 noonl();
731 flush();
732 }
733
734 #ifdef XPG4
735 #include <regex.h>
736
737 extern int regcomp_flags; /* use to specify cflags for regcomp() */
738 #endif /* XPG4 */
739
740 int
vi_compile(int eof,int oknl)741 vi_compile(int eof, int oknl)
742 {
743 int c;
744 unsigned char *gp, *p1;
745 unsigned char *rhsp;
746 unsigned char rebuf[LBSIZE];
747 char multi[MB_LEN_MAX + 1];
748 int len;
749 wchar_t wc;
750
751 #ifdef XPG4
752 /*
753 * reset cflags to plain BRE
754 * if \< and/or \> is specified, REG_WORDS is set.
755 */
756 regcomp_flags = 0;
757 #endif /* XPG4 */
758
759 gp = genbuf;
760 if (isalpha(eof) || isdigit(eof))
761 error(gettext("Regular expressions cannot be delimited by letters or digits"));
762 if(eof >= 0200 && MB_CUR_MAX > 1)
763 error(gettext("Regular expressions cannot be delimited by multibyte characters"));
764 c = getchar();
765 if (eof == '\\')
766 switch (c) {
767
768 case '/':
769 case '?':
770 if (scanre == NULL || scanre->Expbuf[1] == 0)
771 error(value(vi_TERSE) ? gettext("No previous scan re") :
772 gettext("No previous scanning regular expression"));
773 resre(scanre);
774 return (c);
775
776 case '&':
777 if (subre == NULL || subre->Expbuf[1] == 0)
778 error(value(vi_TERSE) ? gettext("No previous substitute re") :
779 gettext("No previous substitute regular expression"));
780 resre(subre);
781 return (c);
782
783 default:
784 error(value(vi_TERSE) ? gettext("Badly formed re") :
785 gettext("Regular expression \\ must be followed by / or ?"));
786 }
787 if (c == eof || c == '\n' || c == EOF) {
788 if (re == NULL || re->Expbuf[1] == 0)
789 error(value(vi_TERSE) ? gettext("No previous re") :
790 gettext("No previous regular expression"));
791 if (c == '\n' && oknl == 0)
792 error(value(vi_TERSE) ? gettext("Missing closing delimiter") :
793 gettext("Missing closing delimiter for regular expression"));
794 if (c != eof)
795 ungetchar(c);
796 return (eof);
797 }
798 gp = genbuf;
799 if (c == '^') {
800 *gp++ = c;
801 c = getchar();
802 }
803 ungetchar(c);
804 for (;;) {
805 c = getchar();
806 if (c == eof || c == EOF) {
807 if (c == EOF)
808 ungetchar(c);
809 goto out;
810 }
811 if (gp >= &genbuf[LBSIZE - 3])
812 complex:
813 cerror(value(vi_TERSE) ?
814 (unsigned char *)gettext("Re too complex") :
815 (unsigned char *)
816 gettext("Regular expression too complicated"));
817
818 if (!(isascii(c) || MB_CUR_MAX == 1)) {
819 ungetchar(c);
820 if ((len = _mbftowc(multi, &wc, getchar, &peekc)) >= 1) {
821 if ((gp + len) >= &genbuf[LBSIZE - 3])
822 goto complex;
823 strncpy(gp, multi, len);
824 gp += len;
825 continue;
826 }
827 (void) getchar();
828 }
829
830 switch (c) {
831
832 case '\\':
833 c = getchar();
834 if (!isascii(c)) {
835 ungetchar(c);
836 if ((len = _mbftowc(multi, &wc, getchar, &peekc)) >= 1) {
837 if ((gp + len) >= &genbuf[LBSIZE - 3])
838 goto complex;
839 *gp++ = '\\';
840 strncpy(gp, multi, len);
841 gp += len;
842 continue;
843 }
844 (void) getchar();
845 }
846
847 switch (c) {
848
849 case '<':
850 case '>':
851 #ifdef XPG4
852 regcomp_flags = REG_WORDS;
853 /*FALLTHRU*/
854 #endif /* XPG4 */
855 case '(':
856 case ')':
857 case '{':
858 case '}':
859 case '$':
860 case '^':
861 case '\\':
862 *gp++ = '\\';
863 *gp++ = c;
864 continue;
865
866 case 'n':
867 *gp++ = c;
868 continue;
869 }
870 if(c >= '0' && c <= '9') {
871 *gp++ = '\\';
872 *gp++ = c;
873 continue;
874 }
875 if (value(vi_MAGIC) == 0)
876 magic:
877 switch (c) {
878
879 case '.':
880 *gp++ = '.';
881 continue;
882
883 case '~':
884 rhsp = rhsbuf;
885 while (*rhsp) {
886 if (!isascii(*rhsp)) {
887 if ((len = mbtowc((wchar_t *)0, (char *)rhsp, MB_CUR_MAX)) > 1) {
888 if ((gp + len) >= &genbuf[LBSIZE-2])
889 goto complex;
890 strncpy(gp, rhsp, len);
891 rhsp += len; gp += len;
892 continue;
893 }
894 }
895 len = 1;
896 if (*rhsp == '\\') {
897 c = *++rhsp;
898 if (c == '&')
899 cerror(value(vi_TERSE) ? (unsigned char *)
900 gettext("Replacement pattern contains &") :
901 (unsigned char *)gettext("Replacement pattern contains & - cannot use in re"));
902 if (c >= '1' && c <= '9')
903 cerror(value(vi_TERSE) ? (unsigned char *)
904 gettext("Replacement pattern contains \\d") :
905 (unsigned char *)
906 gettext("Replacement pattern contains \\d - cannot use in re"));
907 if ((len = mbtowc((wchar_t *)0, (char *)rhsp, MB_CUR_MAX)) <= 1) {
908 len = 1;
909 if(any(c, ".\\*[$"))
910 *gp++ = '\\';
911 }
912 }
913
914 if ((gp + len) >= &genbuf[LBSIZE-2])
915 goto complex;
916 if (len == 1) {
917 c = *rhsp++;
918 *gp++ = (value(vi_IGNORECASE) ? tolower(c) : c);
919 } else {
920 strncpy(gp, rhsp, len);
921 gp += len; rhsp += len;
922 }
923 }
924 continue;
925
926 case '*':
927 *gp++ = '*';
928 continue;
929
930 case '[':
931 *gp++ = '[';
932 c = getchar();
933 if (c == '^') {
934 *gp++ = '^';
935 c = getchar();
936 }
937
938 do {
939 if (!isascii(c) && c != EOF) {
940 ungetchar(c);
941 if ((len = _mbftowc(multi, &wc, getchar, &peekc)) >= 1) {
942 if ((gp + len)>= &genbuf[LBSIZE-4])
943 goto complex;
944 strncpy(gp, multi, len);
945 gp += len;
946 c = getchar();
947 continue;
948 }
949 (void) getchar();
950 }
951
952 if (gp >= &genbuf[LBSIZE-4])
953 goto complex;
954 if(c == '\\' && peekchar() == ']') {
955 (void)getchar();
956 *gp++ = '\\';
957 *gp++ = ']';
958 }
959 else if (c == '\n' || c == EOF)
960 cerror((unsigned char *)
961 gettext("Missing ]"));
962 else
963 *gp++ = (value(vi_IGNORECASE) ? tolower(c) : c);
964 c = getchar();
965 } while(c != ']');
966 *gp++ = ']';
967 continue;
968 }
969 if (c == EOF) {
970 ungetchar(EOF);
971 *gp++ = '\\';
972 *gp++ = '\\';
973 continue;
974 }
975 if (c == '\n')
976 cerror(value(vi_TERSE) ? (unsigned char *)gettext("No newlines in re's") :
977 (unsigned char *)gettext("Can't escape newlines into regular expressions"));
978 *gp++ = '\\';
979 *gp++ = (value(vi_IGNORECASE) ? tolower(c) : c);
980 continue;
981
982 case '\n':
983 if (oknl) {
984 ungetchar(c);
985 goto out;
986 }
987 cerror(value(vi_TERSE) ? (unsigned char *)gettext("Badly formed re") :
988 (unsigned char *)gettext("Missing closing delimiter for regular expression"));
989
990 case '.':
991 case '~':
992 case '*':
993 case '[':
994 if (value(vi_MAGIC))
995 goto magic;
996 if(c != '~')
997 *gp++ = '\\';
998 defchar:
999 default:
1000 *gp++ = (value(vi_IGNORECASE) ? tolower(c) : c);
1001 continue;
1002 }
1003 }
1004 out:
1005 *gp++ = '\0';
1006
1007 #ifdef XPG4
1008 /* see if our compiled RE's will fit in the re structure: */
1009 if (regexc_size > EXPSIZ) {
1010 /*
1011 * this should never happen. but it's critical that we
1012 * check here, otherwise .bss would get overwritten.
1013 */
1014 cerror(value(vi_TERSE) ? (unsigned char *)
1015 gettext("RE's can't fit") :
1016 (unsigned char *)gettext("Regular expressions can't fit"));
1017 return(eof);
1018 }
1019
1020 /*
1021 * We create re each time we need it.
1022 */
1023
1024 if (re == NULL || re == scanre || re == subre) {
1025 if ((re = calloc(1, sizeof(struct regexp))) == NULL) {
1026 error(gettext("out of memory"));
1027 exit(errcnt);
1028 }
1029 } else {
1030 regex_comp_free(&re->Expbuf);
1031 memset(re, 0, sizeof(struct regexp));
1032 }
1033
1034 compile((char *) genbuf, (char *) re->Expbuf, (char *) re->Expbuf
1035 + regexc_size);
1036 #else /* !XPG4 */
1037 (void) _compile((const char *)genbuf, (char *)re->Expbuf,
1038 (char *)(re->Expbuf + sizeof (re->Expbuf)), 1);
1039 #endif /* XPG4 */
1040
1041 if(regerrno)
1042 switch(regerrno) {
1043
1044 case 42:
1045 cerror((unsigned char *)gettext("\\( \\) Imbalance"));
1046 case 43:
1047 cerror(value(vi_TERSE) ? (unsigned char *)gettext("Awash in \\('s!") :
1048 (unsigned char *)
1049 gettext("Too many \\('d subexpressions in a regular expression"));
1050 case 50:
1051 goto complex;
1052 case 67:
1053 cerror(value(vi_TERSE) ? (unsigned char *)gettext("Illegal byte sequence") :
1054 (unsigned char *)gettext("Regular expression has illegal byte sequence"));
1055 }
1056 re->Nbra = nbra;
1057 return(eof);
1058 }
1059
1060 void
cerror(unsigned char * s)1061 cerror(unsigned char *s)
1062 {
1063 if (re) {
1064 re->Expbuf[0] = re->Expbuf[1] = 0;
1065 }
1066 error(s);
1067 }
1068
1069 int
execute(int gf,line * addr)1070 execute(int gf, line *addr)
1071 {
1072 unsigned char *p1, *p2;
1073 char *start;
1074 int c, i;
1075 int ret;
1076 int len;
1077
1078 if (gf) {
1079 if (re == NULL || re->Expbuf[0])
1080 return (0);
1081 if(value(vi_IGNORECASE)) {
1082 p1 = genbuf;
1083 p2 = (unsigned char *)loc2;
1084 while(c = *p2) {
1085 if ((len = mblen((char *)p2, MB_CUR_MAX)) <= 0)
1086 len = 1;
1087 if (len == 1) {
1088 *p1++ = tolower(c);
1089 p2++;
1090 continue;
1091 }
1092 strncpy(p1, p2, len);
1093 p1 += len; p2 += len;
1094 }
1095 *p1 = '\0';
1096 locs = (char *)genbuf;
1097 p1 = genbuf;
1098 start = loc2;
1099 } else {
1100 p1 = (unsigned char *)loc2;
1101 locs = loc2;
1102 }
1103 } else {
1104 if (addr == zero)
1105 return (0);
1106 p1 = linebuf;
1107 getaline(*addr);
1108 if(value(vi_IGNORECASE)) {
1109 p1 = genbuf;
1110 p2 = linebuf;
1111 while(c = *p2) {
1112 if ((len = mblen((char *)p2, MB_CUR_MAX)) <= 0)
1113 len = 1;
1114 if (len == 1) {
1115 *p1++ = tolower(c);
1116 p2++;
1117 continue;
1118 }
1119 strncpy(p1, p2, len);
1120 p1 += len; p2 += len;
1121 }
1122 *p1 = '\0';
1123 p1 = genbuf;
1124 start = (char *)linebuf;
1125 }
1126 locs = (char *)0;
1127 }
1128
1129 ret = step((char *)p1, (char *)re->Expbuf);
1130
1131 if(value(vi_IGNORECASE) && ret) {
1132 loc1 = start + (loc1 - (char *)genbuf);
1133 loc2 = start + (loc2 - (char *)genbuf);
1134 for(i = 0; i < NBRA; i++) {
1135 braslist[i] = start + (braslist[i] - (char *)genbuf);
1136 braelist[i] = start + (braelist[i] - (char *)genbuf);
1137 }
1138 }
1139 return ret;
1140 }
1141
1142 /*
1143 * Initialize the compiled regular-expression storage areas (called from
1144 * main()).
1145 */
1146
init_re(void)1147 void init_re (void)
1148 {
1149 #ifdef XPG4
1150 re = scanre = subre = NULL;
1151 #else /* !XPG4 */
1152 if ((re = calloc(1, sizeof(struct regexp))) == NULL) {
1153 error(gettext("out of memory"));
1154 exit(errcnt);
1155 }
1156
1157 if ((scanre = calloc(1, sizeof(struct regexp))) == NULL) {
1158 error(gettext("out of memory"));
1159 exit(errcnt);
1160 }
1161
1162 if ((subre = calloc(1, sizeof(struct regexp))) == NULL) {
1163 error(gettext("out of memory"));
1164 exit(errcnt);
1165 }
1166 #endif /* XPG4 */
1167 }
1168
1169 /*
1170 * Save what is in the special place re to the named alternate
1171 * location. This means freeing up what's currently in this target
1172 * location, if necessary.
1173 */
1174
savere(struct regexp ** a)1175 void savere(struct regexp ** a)
1176 {
1177 #ifdef XPG4
1178 if (a == NULL || re == NULL) {
1179 return;
1180 }
1181
1182 if (*a == NULL) {
1183 *a = re;
1184 return;
1185 }
1186
1187 if (*a != re) {
1188 if (scanre != subre) {
1189 regex_comp_free(&((*a)->Expbuf));
1190 free(*a);
1191 }
1192 *a = re;
1193 }
1194 #else /* !XPG4 */
1195 memcpy(*a, re, sizeof(struct regexp));
1196 #endif /* XPG4 */
1197 }
1198
1199
1200 /*
1201 * Restore what is in the named alternate location to the special place
1202 * re. This means first freeing up what's currently in re, if necessary.
1203 */
1204
resre(struct regexp * a)1205 void resre(struct regexp * a)
1206 {
1207 #ifdef XPG4
1208 if (a == NULL) {
1209 return;
1210 }
1211
1212 if (re == NULL) {
1213 re = a;
1214 return;
1215 }
1216
1217 if (a != re) {
1218 if ((re != scanre) && (re != subre)) {
1219 regex_comp_free(&re->Expbuf);
1220 free(re);
1221 }
1222
1223 re = a;
1224 }
1225 #else /* !XPG4 */
1226 memcpy(re, a, sizeof(struct regexp));
1227 #endif /* XPG4 */
1228 }
1229