xref: /illumos-gate/usr/src/cmd/vi/port/ex_re.c (revision 471b551f6042e421bfe941f593337a8a5b2a7a7d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved  	*/
28 
29 
30 /* Copyright (c) 1981 Regents of the University of California */
31 
32 #include "ex.h"
33 #include "ex_re.h"
34 
35 /* from libgen */
36 char *_compile(const char *, char *, char *, int);
37 
38 /*
39  * The compiled-regular-expression storage areas (re, scanre, and subre)
40  * have been changed into dynamically allocated memory areas, in both the
41  * Solaris and XPG4 versions.
42  *
43  * In the Solaris version, which uses the original libgen(3g) compile()
44  * and step() calls, these areas are allocated once, and then data are
45  * copied between them subsequently, as they were in the original
46  * implementation.  This is possible because the compiled information is
47  * a self-contained block of bits.
48  *
49  * In the XPG4 version, the expr:compile.o object is linked in as a
50  * simulation of these functions using the new regcomp() and regexec()
51  * functions.  The problem here is that the resulting
52  * compiled-regular-expression data contain pointers to other data, which
53  * need to be freed, but only when we are quite sure that we are done
54  * with them - and certainly not before.  There was an earlier attempt to
55  * handle these differences, but that effort was flawed.
56  */
57 
58 extern int	getchar();
59 #ifdef XPG4
60 void regex_comp_free(void *);
61 extern size_t regexc_size;	/* compile.c: size of regex_comp structure */
62 #endif /* XPG4 */
63 
64 /*
65  * Global, substitute and regular expressions.
66  * Very similar to ed, with some re extensions and
67  * confirmed substitute.
68  */
69 void
70 global(k)
71 	bool k;
72 {
73 	unsigned char *gp;
74 	int c;
75 	line *a1;
76 	unsigned char globuf[GBSIZE], *Cwas;
77 	int nlines = lineDOL();
78 	int oinglobal = inglobal;
79 	unsigned char *oglobp = globp;
80 	char	multi[MB_LEN_MAX + 1];
81 	wchar_t	wc;
82 	int	len;
83 
84 
85 	Cwas = Command;
86 	/*
87 	 * States of inglobal:
88 	 *  0: ordinary - not in a global command.
89 	 *  1: text coming from some buffer, not tty.
90 	 *  2: like 1, but the source of the buffer is a global command.
91 	 * Hence you're only in a global command if inglobal==2. This
92 	 * strange sounding convention is historically derived from
93 	 * everybody simulating a global command.
94 	 */
95 	if (inglobal==2)
96 		error(value(vi_TERSE) ? gettext("Global within global") :
97 gettext("Global within global not allowed"));
98 	markDOT();
99 	setall();
100 	nonzero();
101 	if (skipend())
102 		error(value(vi_TERSE) ? gettext("Global needs re") :
103 gettext("Missing regular expression for global"));
104 	c = getchar();
105 	(void)vi_compile(c, 1);
106 	savere(&scanre);
107 	gp = globuf;
108 	while ((c = peekchar()) != '\n') {
109 		if (!isascii(c)) {
110 			if (c == EOF) {
111 				c = '\n';
112 				ungetchar(c);
113 				goto out;
114 			}
115 
116 mb_copy:
117 			if ((len = _mbftowc(multi, &wc, getchar, &peekc)) > 0) {
118 				if ((gp + len) >= &globuf[GBSIZE - 2])
119 					error(gettext("Global command too long"));
120 				strncpy(gp, multi, len);
121 				gp += len;
122 				continue;
123 			}
124 		}
125 
126 		(void) getchar();
127 		switch (c) {
128 
129 		case EOF:
130 			c = '\n';
131 			ungetchar(c);
132 			goto out;
133 
134 		case '\\':
135 			c = peekchar();
136 			if (!isascii(c)) {
137 				*gp++ = '\\';
138 				goto mb_copy;
139 			}
140 
141 			(void) getchar();
142 			switch (c) {
143 
144 			case '\\':
145 				ungetchar(c);
146 				break;
147 
148 			case '\n':
149 				break;
150 
151 			default:
152 				*gp++ = '\\';
153 				break;
154 			}
155 			break;
156 		}
157 		*gp++ = c;
158 		if (gp >= &globuf[GBSIZE - 2])
159 			error(gettext("Global command too long"));
160 	}
161 
162 out:
163 	donewline();
164 	*gp++ = c;
165 	*gp++ = 0;
166 	saveall();
167 	inglobal = 2;
168 	for (a1 = one; a1 <= dol; a1++) {
169 		*a1 &= ~01;
170 		if (a1 >= addr1 && a1 <= addr2 && execute(0, a1) == k)
171 			*a1 |= 01;
172 	}
173 #ifdef notdef
174 /*
175  * This code is commented out for now.  The problem is that we don't
176  * fix up the undo area the way we should.  Basically, I think what has
177  * to be done is to copy the undo area down (since we shrunk everything)
178  * and move the various pointers into it down too.  I will do this later
179  * when I have time. (Mark, 10-20-80)
180  */
181 	/*
182 	 * Special case: g/.../d (avoid n^2 algorithm)
183 	 */
184 	if (globuf[0]=='d' && globuf[1]=='\n' && globuf[2]=='\0') {
185 		gdelete();
186 		return;
187 	}
188 #endif
189 	if (inopen)
190 		inopen = -1;
191 	/*
192 	 * Now for each marked line, set dot there and do the commands.
193 	 * Note the n^2 behavior here for lots of lines matching.
194 	 * This is really needed: in some cases you could delete lines,
195 	 * causing a marked line to be moved before a1 and missed if
196 	 * we didn't restart at zero each time.
197 	 */
198 	for (a1 = one; a1 <= dol; a1++) {
199 		if (*a1 & 01) {
200 			*a1 &= ~01;
201 			dot = a1;
202 			globp = globuf;
203 			commands(1, 1);
204 			a1 = zero;
205 		}
206 	}
207 	globp = oglobp;
208 	inglobal = oinglobal;
209 	endline = 1;
210 	Command = Cwas;
211 	netchHAD(nlines);
212 	setlastchar(EOF);
213 	if (inopen) {
214 		ungetchar(EOF);
215 		inopen = 1;
216 	}
217 }
218 
219 /*
220  * gdelete: delete inside a global command. Handles the
221  * special case g/r.e./d. All lines to be deleted have
222  * already been marked. Squeeze the remaining lines together.
223  * Note that other cases such as g/r.e./p, g/r.e./s/r.e.2/rhs/,
224  * and g/r.e./.,/r.e.2/d are not treated specially.  There is no
225  * good reason for this except the question: where to you draw the line?
226  */
227 void
228 gdelete(void)
229 {
230 	line *a1, *a2, *a3;
231 
232 	a3 = dol;
233 	/* find first marked line. can skip all before it */
234 	for (a1=zero; (*a1&01)==0; a1++)
235 		if (a1>=a3)
236 			return;
237 	/* copy down unmarked lines, compacting as we go. */
238 	for (a2=a1+1; a2<=a3;) {
239 		if (*a2&01) {
240 			a2++;		/* line is marked, skip it */
241 			dot = a1;	/* dot left after line deletion */
242 		} else
243 			*a1++ = *a2++;	/* unmarked, copy it */
244 	}
245 	dol = a1-1;
246 	if (dot>dol)
247 		dot = dol;
248 	change();
249 }
250 
251 bool	cflag;
252 int	scount, slines, stotal;
253 
254 int
255 substitute(int c)
256 {
257 	line *addr;
258 	int n;
259 	int gsubf, hopcount;
260 
261 	gsubf = compsub(c);
262 	if(FIXUNDO)
263 		save12(), undkind = UNDCHANGE;
264 	stotal = 0;
265 	slines = 0;
266 	for (addr = addr1; addr <= addr2; addr++) {
267 		scount = hopcount = 0;
268 		if (dosubcon(0, addr) == 0)
269 			continue;
270 		if (gsubf) {
271 			/*
272 			 * The loop can happen from s/\</&/g
273 			 * but we don't want to break other, reasonable cases.
274 			 */
275 			hopcount = 0;
276 			while (*loc2) {
277 				if (++hopcount > sizeof linebuf)
278 					error(gettext("substitution loop"));
279 				if (dosubcon(1, addr) == 0)
280 					break;
281 			}
282 		}
283 		if (scount) {
284 			stotal += scount;
285 			slines++;
286 			putmark(addr);
287 			n = append(getsub, addr);
288 			addr += n;
289 			addr2 += n;
290 		}
291 	}
292 	if (stotal == 0 && !inglobal && !cflag)
293 		error(value(vi_TERSE) ? gettext("Fail") :
294 gettext("Substitute pattern match failed"));
295 	snote(stotal, slines);
296 	return (stotal);
297 }
298 
299 int
300 compsub(int ch)
301 {
302 	int seof, c, uselastre;
303 	static int gsubf;
304 	static unsigned char remem[RHSSIZE];
305 	static int remflg = -1;
306 
307 	if (!value(vi_EDCOMPATIBLE))
308 		gsubf = cflag = 0;
309 	uselastre = 0;
310 	switch (ch) {
311 
312 	case 's':
313 		(void)skipwh();
314 		seof = getchar();
315 		if (endcmd(seof) || any(seof, "gcr")) {
316 			ungetchar(seof);
317 			goto redo;
318 		}
319 		if (isalpha(seof) || isdigit(seof))
320 			error(value(vi_TERSE) ? gettext("Substitute needs re") :
321 gettext("Missing regular expression for substitute"));
322 		seof = vi_compile(seof, 1);
323 		uselastre = 1;
324 		comprhs(seof);
325 		gsubf = cflag = 0;
326 		break;
327 
328 	case '~':
329 		uselastre = 1;
330 		/* fall into ... */
331 	case '&':
332 	redo:
333 		if (re == NULL || re->Expbuf[1] == 0)
334 			error(value(vi_TERSE) ? gettext("No previous re") :
335 gettext("No previous regular expression"));
336 		if (subre == NULL || subre->Expbuf[1] == 0)
337 			error(value(vi_TERSE) ? gettext("No previous substitute re") :
338 gettext("No previous substitute to repeat"));
339 		break;
340 	}
341 	for (;;) {
342 		c = getchar();
343 		switch (c) {
344 
345 		case 'g':
346 			gsubf = !gsubf;
347 			continue;
348 
349 		case 'c':
350 			cflag = !cflag;
351 			continue;
352 
353 		case 'r':
354 			uselastre = 1;
355 			continue;
356 
357 		default:
358 			ungetchar(c);
359 			setcount();
360 			donewline();
361 			if (uselastre)
362 				savere(&subre);
363 			else
364 				resre(subre);
365 
366 			/*
367 			 * The % by itself on the right hand side means
368 			 * that the previous value of the right hand side
369 			 * should be used. A -1 is used to indicate no
370 			 * previously remembered search string.
371 			 */
372 
373 			if (rhsbuf[0] == '%' && rhsbuf[1] == 0)
374 				if (remflg == -1)
375 					error(gettext("No previously remembered string"));
376 			        else
377 					strcpy(rhsbuf, remem);
378 			else {
379 				strcpy(remem, rhsbuf);
380 				remflg = 1;
381 			}
382 			return (gsubf);
383 		}
384 	}
385 }
386 
387 void
388 comprhs(int seof)
389 {
390 	unsigned char *rp, *orp;
391 	int c;
392 	unsigned char orhsbuf[RHSSIZE];
393 	char	multi[MB_LEN_MAX + 1];
394 	int	len;
395 	wchar_t	wc;
396 
397 	rp = rhsbuf;
398 	CP(orhsbuf, rp);
399 	for (;;) {
400 		c = peekchar();
401 		if (c == seof) {
402 			(void) getchar();
403 			break;
404 		}
405 
406 		if (!isascii(c) && c != EOF) {
407 			if ((len = _mbftowc(multi, &wc, getchar, &peekc)) > 0) {
408 				if ((rp + len) >= &rhsbuf[RHSSIZE - 1])
409 					goto toobig;
410 				strncpy(rp, multi, len);
411 				rp += len;
412 				continue;
413 			}
414 		}
415 
416 		(void) getchar();
417 		switch (c) {
418 
419 		case '\\':
420 			c = peekchar();
421 			if (c == EOF) {
422 				(void) getchar();
423 				error(gettext("Replacement string ends with \\"));
424 			}
425 
426 			if (!isascii(c)) {
427 				*rp++ = '\\';
428 				if ((len = _mbftowc(multi, &wc, getchar, &peekc)) > 0) {
429 					if ((rp + len) >= &rhsbuf[RHSSIZE - 1])
430 						goto over_flow;
431 					strncpy(rp, multi, len);
432 					rp += len;
433 					continue;
434 				}
435 			}
436 
437 			(void) getchar();
438 			if (value(vi_MAGIC)) {
439 				/*
440 				 * When "magic", \& turns into a plain &,
441 				 * and all other chars work fine quoted.
442 				 */
443 				if (c != '&') {
444 					if(rp >= &rhsbuf[RHSSIZE - 1]) {
445 						*rp=0;
446 						error(value(vi_TERSE) ?
447 gettext("Replacement pattern too long") :
448 gettext("Replacement pattern too long - limit 256 characters"));
449 					}
450 					*rp++ = '\\';
451 				}
452 				break;
453 			}
454 magic:
455 			if (c == '~') {
456 				for (orp = orhsbuf; *orp; *rp++ = *orp++)
457 					if (rp >= &rhsbuf[RHSSIZE - 1])
458 						goto toobig;
459 				continue;
460 			}
461 			if(rp >= &rhsbuf[RHSSIZE - 1]) {
462 over_flow:
463 				*rp=0;
464 				error(value(vi_TERSE) ?
465 gettext("Replacement pattern too long") :
466 gettext("Replacement pattern too long - limit 256 characters"));
467 			}
468 			*rp++ = '\\';
469 			break;
470 
471 		case '\n':
472 		case EOF:
473 			if (!(globp && globp[0])) {
474 				ungetchar(c);
475 				goto endrhs;
476 			}
477 
478 		case '~':
479 		case '&':
480 			if (value(vi_MAGIC))
481 				goto magic;
482 			break;
483 		}
484 		if (rp >= &rhsbuf[RHSSIZE - 1]) {
485 toobig:
486 			*rp = 0;
487 			error(value(vi_TERSE) ?
488 gettext("Replacement pattern too long") :
489 gettext("Replacement pattern too long - limit 256 characters"));
490 		}
491 		*rp++ = c;
492 	}
493 endrhs:
494 	*rp++ = 0;
495 }
496 
497 int
498 getsub(void)
499 {
500 	unsigned char *p;
501 
502 	if ((p = linebp) == 0)
503 		return (EOF);
504 	strcLIN(p);
505 	linebp = 0;
506 	return (0);
507 }
508 
509 int
510 dosubcon(bool f, line *a)
511 {
512 
513 	if (execute(f, a) == 0)
514 		return (0);
515 	if (confirmed(a)) {
516 		dosub();
517 		scount++;
518 	}
519 	return (1);
520 }
521 
522 int
523 confirmed(line *a)
524 {
525 	int c, cnt, ch;
526 
527 	if (cflag == 0)
528 		return (1);
529 	pofix();
530 	pline(lineno(a));
531 	if (inopen)
532 		putchar('\n' | QUOTE);
533 	c = lcolumn(loc1);
534 	ugo(c, ' ');
535 	ugo(lcolumn(loc2) - c, '^');
536 	flush();
537 	cnt = 0;
538 bkup:
539 	ch = c = getkey();
540 again:
541 	if (c == '\b') {
542 		if ((inopen)
543 		 && (cnt > 0)) {
544 			putchar('\b' | QUOTE);
545 			putchar(' ');
546 			putchar('\b' | QUOTE), flush();
547 			cnt --;
548 		}
549 		goto bkup;
550 	}
551 	if (c == '\r')
552 		c = '\n';
553 	if (inopen && MB_CUR_MAX == 1 || c < 0200) {
554 		putchar(c);
555 		flush();
556 		cnt++;
557 	}
558 	if (c != '\n' && c != EOF) {
559 		c = getkey();
560 		goto again;
561 	}
562 	noteinp();
563 	return (ch == 'y');
564 }
565 
566 void
567 ugo(int cnt, int with)
568 {
569 
570 	if (cnt > 0)
571 		do
572 			putchar(with);
573 		while (--cnt > 0);
574 }
575 
576 int	casecnt;
577 bool	destuc;
578 
579 void
580 dosub(void)
581 {
582 	unsigned char *lp, *sp, *rp;
583 	int c;
584 	int	len;
585 
586 	lp = linebuf;
587 	sp = genbuf;
588 	rp = rhsbuf;
589 	while (lp < (unsigned char *)loc1)
590 		*sp++ = *lp++;
591 	casecnt = 0;
592 	/*
593 	 * Caution: depending on the hardware, c will be either sign
594 	 * extended or not if C&QUOTE is set.  Thus, on a VAX, c will
595 	 * be < 0, but on a 3B, c will be >= 128.
596 	 */
597 	while (c = *rp) {
598 		if ((len = mblen((char *)rp, MB_CUR_MAX)) <= 0)
599 			len = 1;
600 		/* ^V <return> from vi to split lines */
601 		if (c == '\r')
602 			c = '\n';
603 
604 		if (c == '\\') {
605 			rp++;
606 			if ((len = mblen((char *)rp, MB_CUR_MAX)) <= 0)
607 				len = 1;
608 			switch (c = *rp++) {
609 
610 			case '&':
611 				sp = place(sp, loc1, loc2);
612 				if (sp == 0)
613 					goto ovflo;
614 				continue;
615 
616 			case 'l':
617 				casecnt = 1;
618 				destuc = 0;
619 				continue;
620 
621 			case 'L':
622 				casecnt = LBSIZE;
623 				destuc = 0;
624 				continue;
625 
626 			case 'u':
627 				casecnt = 1;
628 				destuc = 1;
629 				continue;
630 
631 			case 'U':
632 				casecnt = LBSIZE;
633 				destuc = 1;
634 				continue;
635 
636 			case 'E':
637 			case 'e':
638 				casecnt = 0;
639 				continue;
640 			}
641 			if(re != NULL && c >= '1' && c < re->Nbra + '1') {
642 				sp = place(sp, braslist[c - '1'] , braelist[c - '1']);
643 				if (sp == 0)
644 					goto ovflo;
645 				continue;
646 			}
647 			rp--;
648 		}
649 		if (len > 1) {
650 			if ((sp + len) >= &genbuf[LBSIZE])
651 				goto ovflo;
652 			strncpy(sp, rp, len);
653 		} else {
654 			if (casecnt)
655 				*sp = fixcase(c);
656 			else
657 				*sp = c;
658 		}
659 		sp += len; rp += len;
660 		if (sp >= &genbuf[LBSIZE])
661 ovflo:
662 			error(value(vi_TERSE) ? gettext("Line overflow") :
663 gettext("Line overflow in substitute"));
664 	}
665 	lp = (unsigned char *)loc2;
666 	loc2 = (char *)(linebuf + (sp - genbuf));
667 	while (*sp++ = *lp++)
668 		if (sp >= &genbuf[LBSIZE])
669 			goto ovflo;
670 	strcLIN(genbuf);
671 }
672 
673 int
674 fixcase(int c)
675 {
676 
677 	if (casecnt == 0)
678 		return (c);
679 	casecnt--;
680 	if (destuc) {
681 		if (islower(c))
682 			c = toupper(c);
683 	} else
684 		if (isupper(c))
685 			c = tolower(c);
686 	return (c);
687 }
688 
689 unsigned char *
690 place(sp, l1, l2)
691 	unsigned char *sp, *l1, *l2;
692 {
693 
694 	while (l1 < l2) {
695 		*sp++ = fixcase(*l1++);
696 		if (sp >= &genbuf[LBSIZE])
697 			return (0);
698 	}
699 	return (sp);
700 }
701 
702 void
703 snote(int total, int nlines)
704 {
705 
706 	if (!notable(total))
707 		return;
708 	if (nlines != 1 && nlines != total)
709 		viprintf(mesg(value(vi_TERSE) ?
710 			/*
711 			 * TRANSLATION_NOTE
712 			 *	Reference order of arguments must not
713 			 *	be changed using '%digit$', since vi's
714 			 *	viprintf() does not support it.
715 			 */
716 			    gettext("%d subs on %d lines") :
717 			/*
718 			 * TRANSLATION_NOTE
719 			 *	Reference order of arguments must not
720 			 *	be changed using '%digit$', since vi's
721 			 *	viprintf() does not support it.
722 			 */
723 			    gettext("%d substitutions on %d lines")),
724 		       total, nlines);
725 	else
726 		viprintf(mesg(value(vi_TERSE) ?
727 			    gettext("%d subs") :
728 			    gettext("%d substitutions")),
729 		       total);
730 	noonl();
731 	flush();
732 }
733 
734 #ifdef XPG4
735 #include <regex.h>
736 
737 extern int regcomp_flags;	/* use to specify cflags for regcomp() */
738 #endif /* XPG4 */
739 
740 int
741 vi_compile(int eof, int oknl)
742 {
743 	int c;
744 	unsigned char *gp, *p1;
745 	unsigned char *rhsp;
746 	unsigned char rebuf[LBSIZE];
747 	char	multi[MB_LEN_MAX + 1];
748 	int	len;
749 	wchar_t	wc;
750 
751 #ifdef XPG4
752 	/*
753 	 * reset cflags to plain BRE
754 	 */
755 	regcomp_flags = 0;
756 #endif /* XPG4 */
757 
758 	gp = genbuf;
759 	if (isalpha(eof) || isdigit(eof))
760 error(gettext("Regular expressions cannot be delimited by letters or digits"));
761 	if(eof >= 0200 && MB_CUR_MAX > 1)
762 error(gettext("Regular expressions cannot be delimited by multibyte characters"));
763 	c = getchar();
764 	if (eof == '\\')
765 		switch (c) {
766 
767 		case '/':
768 		case '?':
769 			if (scanre == NULL || scanre->Expbuf[1] == 0)
770 error(value(vi_TERSE) ? gettext("No previous scan re") :
771 gettext("No previous scanning regular expression"));
772 			resre(scanre);
773 			return (c);
774 
775 		case '&':
776 			if (subre == NULL || subre->Expbuf[1] == 0)
777 error(value(vi_TERSE) ? gettext("No previous substitute re") :
778 gettext("No previous substitute regular expression"));
779 			resre(subre);
780 			return (c);
781 
782 		default:
783 error(value(vi_TERSE) ? gettext("Badly formed re") :
784 gettext("Regular expression \\ must be followed by / or ?"));
785 		}
786 	if (c == eof || c == '\n' || c == EOF) {
787 		if (re == NULL || re->Expbuf[1] == 0)
788 error(value(vi_TERSE) ? gettext("No previous re") :
789 gettext("No previous regular expression"));
790 		if (c == '\n' && oknl == 0)
791 error(value(vi_TERSE) ? gettext("Missing closing delimiter") :
792 gettext("Missing closing delimiter for regular expression"));
793 		if (c != eof)
794 			ungetchar(c);
795 		return (eof);
796 	}
797 	gp = genbuf;
798 	if (c == '^') {
799 		*gp++ = c;
800 		c = getchar();
801 	}
802 	ungetchar(c);
803 	for (;;) {
804 		c = getchar();
805 		if (c == eof || c == EOF) {
806 			if (c == EOF)
807 				ungetchar(c);
808 			goto out;
809 		}
810 		if (gp >= &genbuf[LBSIZE - 3])
811 complex:
812 			cerror(value(vi_TERSE) ?
813 			    (unsigned char *)gettext("Re too complex") :
814 			    (unsigned char *)
815 			    gettext("Regular expression too complicated"));
816 
817 		if (!(isascii(c) || MB_CUR_MAX == 1)) {
818 			ungetchar(c);
819 			if ((len = _mbftowc(multi, &wc, getchar, &peekc)) >= 1) {
820 				if ((gp + len) >= &genbuf[LBSIZE - 3])
821 					goto complex;
822 				strncpy(gp, multi, len);
823 				gp += len;
824 				continue;
825 			}
826 			(void) getchar();
827 		}
828 
829 		switch (c) {
830 
831 		case '\\':
832 			c = getchar();
833 			if (!isascii(c)) {
834 				ungetchar(c);
835 				if ((len = _mbftowc(multi, &wc, getchar, &peekc)) >= 1) {
836 					if ((gp + len) >= &genbuf[LBSIZE - 3])
837 						goto complex;
838 					*gp++ = '\\';
839 					strncpy(gp, multi, len);
840 					gp += len;
841 					continue;
842 				}
843 				(void) getchar();
844 			}
845 
846 			switch (c) {
847 
848 			case '<':
849 			case '>':
850 			case '(':
851 			case ')':
852 			case '{':
853 			case '}':
854 			case '$':
855 			case '^':
856 			case '\\':
857 				*gp++ = '\\';
858 				*gp++ = c;
859 				continue;
860 
861 			case 'n':
862 				*gp++ = c;
863 				continue;
864 			}
865 			if(c >= '0' && c <= '9') {
866 				*gp++ = '\\';
867 				*gp++ = c;
868 				continue;
869 			}
870 			if (value(vi_MAGIC) == 0)
871 magic:
872 			switch (c) {
873 
874 			case '.':
875 				*gp++ = '.';
876 				continue;
877 
878 			case '~':
879 				rhsp = rhsbuf;
880 				while (*rhsp) {
881 					if (!isascii(*rhsp)) {
882 						if ((len = mbtowc((wchar_t *)0, (char *)rhsp, MB_CUR_MAX)) > 1) {
883 							if ((gp + len) >= &genbuf[LBSIZE-2])
884 								goto complex;
885 							strncpy(gp, rhsp, len);
886 							rhsp += len; gp += len;
887 							continue;
888 						}
889 					}
890 					len = 1;
891 					if (*rhsp == '\\') {
892 						c = *++rhsp;
893 						if (c == '&')
894 cerror(value(vi_TERSE) ? (unsigned char *)
895 gettext("Replacement pattern contains &") :
896 (unsigned char *)gettext("Replacement pattern contains & - cannot use in re"));
897 						if (c >= '1' && c <= '9')
898 cerror(value(vi_TERSE) ? (unsigned char *)
899 gettext("Replacement pattern contains \\d") :
900 (unsigned char *)
901 gettext("Replacement pattern contains \\d - cannot use in re"));
902 						if ((len = mbtowc((wchar_t *)0, (char *)rhsp, MB_CUR_MAX)) <= 1) {
903 							len = 1;
904 							if(any(c, ".\\*[$"))
905 								*gp++ = '\\';
906 						}
907 					}
908 
909 					if ((gp + len) >= &genbuf[LBSIZE-2])
910 						goto complex;
911 					if (len == 1) {
912 						c = *rhsp++;
913 						*gp++ = (value(vi_IGNORECASE) ? tolower(c) : c);
914 					} else {
915 						strncpy(gp, rhsp, len);
916 						gp += len; rhsp += len;
917 					}
918 				}
919 				continue;
920 
921 			case '*':
922 				*gp++ = '*';
923 				continue;
924 
925 			case '[':
926 				*gp++ = '[';
927 				c = getchar();
928 				if (c == '^') {
929 					*gp++ = '^';
930 					c = getchar();
931 				}
932 
933 				do {
934 					if (!isascii(c) && c != EOF) {
935 						ungetchar(c);
936 						if ((len = _mbftowc(multi, &wc, getchar, &peekc)) >= 1) {
937 							if ((gp + len)>= &genbuf[LBSIZE-4])
938 								goto complex;
939 							strncpy(gp, multi, len);
940 							gp += len;
941 							c = getchar();
942 							continue;
943 						}
944 						(void) getchar();
945 					}
946 
947 					if (gp >= &genbuf[LBSIZE-4])
948 						goto complex;
949 					if(c == '\\' && peekchar() == ']') {
950 						(void)getchar();
951 						*gp++ = '\\';
952 						*gp++ = ']';
953 					}
954 					else if (c == '\n' || c == EOF)
955 						cerror((unsigned char *)
956 						    gettext("Missing ]"));
957 					else
958 						*gp++ = (value(vi_IGNORECASE) ? tolower(c) : c);
959 					c = getchar();
960 				} while(c != ']');
961 				*gp++ = ']';
962 				continue;
963 			}
964 			if (c == EOF) {
965 				ungetchar(EOF);
966 				*gp++ = '\\';
967 				*gp++ = '\\';
968 				continue;
969 			}
970 			if (c == '\n')
971 cerror(value(vi_TERSE) ? (unsigned char *)gettext("No newlines in re's") :
972 (unsigned char *)gettext("Can't escape newlines into regular expressions"));
973 			*gp++ = '\\';
974 			*gp++ = (value(vi_IGNORECASE) ? tolower(c) : c);
975 			continue;
976 
977 		case '\n':
978 			if (oknl) {
979 				ungetchar(c);
980 				goto out;
981 			}
982 cerror(value(vi_TERSE) ? (unsigned char *)gettext("Badly formed re") :
983 (unsigned char *)gettext("Missing closing delimiter for regular expression"));
984 
985 		case '.':
986 		case '~':
987 		case '*':
988 		case '[':
989 			if (value(vi_MAGIC))
990 				goto magic;
991 			if(c != '~')
992 				*gp++ = '\\';
993 defchar:
994 		default:
995 			*gp++ = (value(vi_IGNORECASE) ? tolower(c) : c);
996 			continue;
997 		}
998 	}
999 out:
1000 	*gp++ = '\0';
1001 
1002 #ifdef XPG4
1003 	/* see if our compiled RE's will fit in the re structure:	*/
1004 	if (regexc_size > EXPSIZ) {
1005 		/*
1006 		 * this should never happen. but it's critical that we
1007 		 * check here, otherwise .bss would get overwritten.
1008 		 */
1009 		cerror(value(vi_TERSE) ? (unsigned char *)
1010 		    gettext("RE's can't fit") :
1011 		    (unsigned char *)gettext("Regular expressions can't fit"));
1012 		return(eof);
1013 	}
1014 
1015 	/*
1016 	 * We create re each time we need it.
1017 	 */
1018 
1019 	if (re == NULL || re == scanre || re == subre) {
1020 		if ((re = calloc(1, sizeof(struct regexp))) == NULL) {
1021 			error(gettext("out of memory"));
1022 			exit(errcnt);
1023 		}
1024 	} else {
1025 		regex_comp_free(&re->Expbuf);
1026 		memset(re, 0, sizeof(struct regexp));
1027 	}
1028 
1029 	compile((char *) genbuf, (char *) re->Expbuf, (char *) re->Expbuf
1030 	    + regexc_size);
1031 #else /* !XPG4 */
1032 	(void) _compile((const char *)genbuf, (char *)re->Expbuf,
1033 		(char *)(re->Expbuf + sizeof (re->Expbuf)), 1);
1034 #endif /* XPG4 */
1035 
1036 	if(regerrno)
1037 		switch(regerrno) {
1038 
1039 		case 42:
1040 cerror((unsigned char *)gettext("\\( \\) Imbalance"));
1041 		case 43:
1042 cerror(value(vi_TERSE) ? (unsigned char *)gettext("Awash in \\('s!") :
1043 (unsigned char *)
1044 gettext("Too many \\('d subexpressions in a regular expression"));
1045 		case 50:
1046 			goto complex;
1047 		case 67:
1048 cerror(value(vi_TERSE) ? (unsigned char *)gettext("Illegal byte sequence") :
1049 (unsigned char *)gettext("Regular expression has illegal byte sequence"));
1050 		}
1051 	re->Nbra = nbra;
1052 	return(eof);
1053 }
1054 
1055 void
1056 cerror(unsigned char *s)
1057 {
1058 	if (re) {
1059 		re->Expbuf[0] = re->Expbuf[1] = 0;
1060 	}
1061 	error(s);
1062 }
1063 
1064 int
1065 execute(int gf, line *addr)
1066 {
1067 	unsigned char *p1, *p2;
1068 	char *start;
1069 	int c, i;
1070 	int ret;
1071 	int	len;
1072 
1073 	if (gf) {
1074 		if (re == NULL || re->Expbuf[0])
1075 			return (0);
1076 		if(value(vi_IGNORECASE)) {
1077 			p1 = genbuf;
1078 			p2 = (unsigned char *)loc2;
1079 			while(c = *p2) {
1080 				if ((len = mblen((char *)p2, MB_CUR_MAX)) <= 0)
1081 					len = 1;
1082 				if (len == 1) {
1083 					*p1++ = tolower(c);
1084 					p2++;
1085 					continue;
1086 				}
1087 				strncpy(p1, p2, len);
1088 				p1 += len; p2 += len;
1089 			}
1090 			*p1 = '\0';
1091 			locs = (char *)genbuf;
1092 			p1 = genbuf;
1093 			start = loc2;
1094 		} else {
1095 			p1 = (unsigned char *)loc2;
1096 			locs = loc2;
1097 		}
1098 	} else {
1099 		if (addr == zero)
1100 			return (0);
1101 		p1 = linebuf;
1102 		getaline(*addr);
1103 		if(value(vi_IGNORECASE)) {
1104 			p1 = genbuf;
1105 			p2 = linebuf;
1106 			while(c = *p2) {
1107 				if ((len = mblen((char *)p2, MB_CUR_MAX)) <= 0)
1108 					len = 1;
1109 				if (len == 1) {
1110 					*p1++ = tolower(c);
1111 					p2++;
1112 					continue;
1113 				}
1114 				strncpy(p1, p2, len);
1115 				p1 += len; p2 += len;
1116 			}
1117 			*p1 = '\0';
1118 			p1 = genbuf;
1119 			start = (char *)linebuf;
1120 		}
1121 		locs = (char *)0;
1122 	}
1123 
1124 	ret = step((char *)p1, (char *)re->Expbuf);
1125 
1126 	if(value(vi_IGNORECASE) && ret) {
1127 		loc1 = start + (loc1 - (char *)genbuf);
1128 		loc2 = start + (loc2 - (char *)genbuf);
1129 		for(i = 0; i < NBRA; i++) {
1130 			braslist[i] = start + (braslist[i] - (char *)genbuf);
1131 			braelist[i] = start + (braelist[i] - (char *)genbuf);
1132 		}
1133 	}
1134 	return ret;
1135 }
1136 
1137 /*
1138  *  Initialize the compiled regular-expression storage areas (called from
1139  *  main()).
1140  */
1141 
1142 void init_re (void)
1143 {
1144 #ifdef XPG4
1145 	re = scanre = subre = NULL;
1146 #else /* !XPG4 */
1147 	if ((re = calloc(1, sizeof(struct regexp))) == NULL) {
1148 		error(gettext("out of memory"));
1149 		exit(errcnt);
1150 	}
1151 
1152 	if ((scanre = calloc(1, sizeof(struct regexp))) == NULL) {
1153 		error(gettext("out of memory"));
1154 		exit(errcnt);
1155 	}
1156 
1157 	if ((subre = calloc(1, sizeof(struct regexp))) == NULL) {
1158 		error(gettext("out of memory"));
1159 		exit(errcnt);
1160 	}
1161 #endif /* XPG4 */
1162 }
1163 
1164 /*
1165  *  Save what is in the special place re to the named alternate
1166  *  location.  This means freeing up what's currently in this target
1167  *  location, if necessary.
1168  */
1169 
1170 void savere(struct regexp ** a)
1171 {
1172 #ifdef XPG4
1173 	if (a == NULL || re == NULL) {
1174 		return;
1175 	}
1176 
1177 	if (*a == NULL) {
1178 		*a = re;
1179 		return;
1180 	}
1181 
1182 	if (*a != re) {
1183 		if (scanre != subre) {
1184 			regex_comp_free(&((*a)->Expbuf));
1185 			free(*a);
1186 		}
1187 		*a = re;
1188 	}
1189 #else /* !XPG4 */
1190 	memcpy(*a, re, sizeof(struct regexp));
1191 #endif /* XPG4 */
1192 }
1193 
1194 
1195 /*
1196  *  Restore what is in the named alternate location to the special place
1197  *  re.  This means first freeing up what's currently in re, if necessary.
1198  */
1199 
1200 void resre(struct regexp * a)
1201 {
1202 #ifdef XPG4
1203 	if (a == NULL) {
1204 		return;
1205 	}
1206 
1207 	if (re == NULL) {
1208 		re = a;
1209 		return;
1210 	}
1211 
1212 	if (a != re) {
1213 		if ((re != scanre) && (re != subre)) {
1214 			regex_comp_free(&re->Expbuf);
1215 			free(re);
1216 		}
1217 
1218 		re = a;
1219 	}
1220 #else /* !XPG4 */
1221 	memcpy(re, a, sizeof(struct regexp));
1222 #endif /* XPG4 */
1223 }
1224