xref: /titanic_52/usr/src/cmd/vi/port/ex_re.c (revision 1a7c1b724419d3cb5fa6eea75123c6b2060ba31b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
23 /*	  All Rights Reserved  	*/
24 
25 
26 /* Copyright (c) 1981 Regents of the University of California */
27 
28 /*
29  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
30  * Use is subject to license terms.
31  */
32 
33 #pragma ident	"%Z%%M%	%I%	%E% SMI"	/* SVr4.0 1.21	*/
34 
35 #include "ex.h"
36 #include "ex_re.h"
37 
38 /* from libgen */
39 char *_compile(const char *, char *, char *, int);
40 
41 /*
42  * The compiled-regular-expression storage areas (re, scanre, and subre)
43  * have been changed into dynamically allocated memory areas, in both the
44  * Solaris and XPG4 versions.
45  *
46  * In the Solaris version, which uses the original libgen(3g) compile()
47  * and step() calls, these areas are allocated once, and then data are
48  * copied between them subsequently, as they were in the original
49  * implementation.  This is possible because the compiled information is
50  * a self-contained block of bits.
51  *
52  * In the XPG4 version, the expr:compile.o object is linked in as a
53  * simulation of these functions using the new regcomp() and regexec()
54  * functions.  The problem here is that the resulting
55  * compiled-regular-expression data contain pointers to other data, which
56  * need to be freed, but only when we are quite sure that we are done
57  * with them - and certainly not before.  There was an earlier attempt to
58  * handle these differences, but that effort was flawed.
59  */
60 
61 extern char	getchar();
62 #ifdef XPG4
63 void regex_comp_free(void *);
64 extern size_t regexc_size;	/* compile.c: size of regex_comp structure */
65 #endif /* XPG4 */
66 
67 /*
68  * Global, substitute and regular expressions.
69  * Very similar to ed, with some re extensions and
70  * confirmed substitute.
71  */
72 global(k)
73 	bool k;
74 {
75 	unsigned char *gp;
76 	int c;
77 	line *a1;
78 	unsigned char globuf[GBSIZE], *Cwas;
79 	int nlines = lineDOL();
80 	int oinglobal = inglobal;
81 	unsigned char *oglobp = globp;
82 	char	multi[MB_LEN_MAX + 1];
83 	wchar_t	wc;
84 	int	len;
85 
86 
87 	Cwas = Command;
88 	/*
89 	 * States of inglobal:
90 	 *  0: ordinary - not in a global command.
91 	 *  1: text coming from some buffer, not tty.
92 	 *  2: like 1, but the source of the buffer is a global command.
93 	 * Hence you're only in a global command if inglobal==2. This
94 	 * strange sounding convention is historically derived from
95 	 * everybody simulating a global command.
96 	 */
97 	if (inglobal==2)
98 		error(value(vi_TERSE) ? gettext("Global within global") :
99 gettext("Global within global not allowed"));
100 	markDOT();
101 	setall();
102 	nonzero();
103 	if (skipend())
104 		error(value(vi_TERSE) ? gettext("Global needs re") :
105 gettext("Missing regular expression for global"));
106 	c = getchar();
107 	(void)vi_compile(c, 1);
108 	savere(&scanre);
109 	gp = globuf;
110 	while ((c = peekchar()) != '\n') {
111 		if (!isascii(c)) {
112 			if (c == EOF) {
113 				c = '\n';
114 				ungetchar(c);
115 				goto out;
116 			}
117 
118 mb_copy:
119 			if ((len = _mbftowc(multi, &wc, getchar, &peekc)) > 0) {
120 				if ((gp + len) >= &globuf[GBSIZE - 2])
121 					error(gettext("Global command too long"));
122 				strncpy(gp, multi, len);
123 				gp += len;
124 				continue;
125 			}
126 		}
127 
128 		(void) getchar();
129 		switch (c) {
130 
131 		case EOF:
132 			c = '\n';
133 			ungetchar(c);
134 			goto out;
135 
136 		case '\\':
137 			c = peekchar();
138 			if (!isascii(c)) {
139 				*gp++ = '\\';
140 				goto mb_copy;
141 			}
142 
143 			(void) getchar();
144 			switch (c) {
145 
146 			case '\\':
147 				ungetchar(c);
148 				break;
149 
150 			case '\n':
151 				break;
152 
153 			default:
154 				*gp++ = '\\';
155 				break;
156 			}
157 			break;
158 		}
159 		*gp++ = c;
160 		if (gp >= &globuf[GBSIZE - 2])
161 			error(gettext("Global command too long"));
162 	}
163 
164 out:
165 	donewline();
166 	*gp++ = c;
167 	*gp++ = 0;
168 	saveall();
169 	inglobal = 2;
170 	for (a1 = one; a1 <= dol; a1++) {
171 		*a1 &= ~01;
172 		if (a1 >= addr1 && a1 <= addr2 && execute(0, a1) == k)
173 			*a1 |= 01;
174 	}
175 #ifdef notdef
176 /*
177  * This code is commented out for now.  The problem is that we don't
178  * fix up the undo area the way we should.  Basically, I think what has
179  * to be done is to copy the undo area down (since we shrunk everything)
180  * and move the various pointers into it down too.  I will do this later
181  * when I have time. (Mark, 10-20-80)
182  */
183 	/*
184 	 * Special case: g/.../d (avoid n^2 algorithm)
185 	 */
186 	if (globuf[0]=='d' && globuf[1]=='\n' && globuf[2]=='\0') {
187 		gdelete();
188 		return;
189 	}
190 #endif
191 	if (inopen)
192 		inopen = -1;
193 	/*
194 	 * Now for each marked line, set dot there and do the commands.
195 	 * Note the n^2 behavior here for lots of lines matching.
196 	 * This is really needed: in some cases you could delete lines,
197 	 * causing a marked line to be moved before a1 and missed if
198 	 * we didn't restart at zero each time.
199 	 */
200 	for (a1 = one; a1 <= dol; a1++) {
201 		if (*a1 & 01) {
202 			*a1 &= ~01;
203 			dot = a1;
204 			globp = globuf;
205 			commands(1, 1);
206 			a1 = zero;
207 		}
208 	}
209 	globp = oglobp;
210 	inglobal = oinglobal;
211 	endline = 1;
212 	Command = Cwas;
213 	netchHAD(nlines);
214 	setlastchar(EOF);
215 	if (inopen) {
216 		ungetchar(EOF);
217 		inopen = 1;
218 	}
219 }
220 
221 /*
222  * gdelete: delete inside a global command. Handles the
223  * special case g/r.e./d. All lines to be deleted have
224  * already been marked. Squeeze the remaining lines together.
225  * Note that other cases such as g/r.e./p, g/r.e./s/r.e.2/rhs/,
226  * and g/r.e./.,/r.e.2/d are not treated specially.  There is no
227  * good reason for this except the question: where to you draw the line?
228  */
229 gdelete()
230 {
231 	line *a1, *a2, *a3;
232 
233 	a3 = dol;
234 	/* find first marked line. can skip all before it */
235 	for (a1=zero; (*a1&01)==0; a1++)
236 		if (a1>=a3)
237 			return;
238 	/* copy down unmarked lines, compacting as we go. */
239 	for (a2=a1+1; a2<=a3;) {
240 		if (*a2&01) {
241 			a2++;		/* line is marked, skip it */
242 			dot = a1;	/* dot left after line deletion */
243 		} else
244 			*a1++ = *a2++;	/* unmarked, copy it */
245 	}
246 	dol = a1-1;
247 	if (dot>dol)
248 		dot = dol;
249 	change();
250 }
251 
252 bool	cflag;
253 int	scount, slines, stotal;
254 
255 substitute(c)
256 	int c;
257 {
258 	line *addr;
259 	int n;
260 	int gsubf, hopcount;
261 
262 	gsubf = compsub(c);
263 	if(FIXUNDO)
264 		save12(), undkind = UNDCHANGE;
265 	stotal = 0;
266 	slines = 0;
267 	for (addr = addr1; addr <= addr2; addr++) {
268 		scount = hopcount = 0;
269 		if (dosubcon(0, addr) == 0)
270 			continue;
271 		if (gsubf) {
272 			/*
273 			 * The loop can happen from s/\</&/g
274 			 * but we don't want to break other, reasonable cases.
275 			 */
276 			hopcount = 0;
277 			while (*loc2) {
278 				if (++hopcount > sizeof linebuf)
279 					error(gettext("substitution loop"));
280 				if (dosubcon(1, addr) == 0)
281 					break;
282 			}
283 		}
284 		if (scount) {
285 			stotal += scount;
286 			slines++;
287 			putmark(addr);
288 			n = append(getsub, addr);
289 			addr += n;
290 			addr2 += n;
291 		}
292 	}
293 	if (stotal == 0 && !inglobal && !cflag)
294 		error(value(vi_TERSE) ? gettext("Fail") :
295 gettext("Substitute pattern match failed"));
296 	snote(stotal, slines);
297 	return (stotal);
298 }
299 
300 compsub(ch)
301 {
302 	int seof, c, uselastre;
303 	static int gsubf;
304 	static unsigned char remem[RHSSIZE];
305 	static int remflg = -1;
306 
307 	if (!value(vi_EDCOMPATIBLE))
308 		gsubf = cflag = 0;
309 	uselastre = 0;
310 	switch (ch) {
311 
312 	case 's':
313 		(void)skipwh();
314 		seof = getchar();
315 		if (endcmd(seof) || any(seof, "gcr")) {
316 			ungetchar(seof);
317 			goto redo;
318 		}
319 		if (isalpha(seof) || isdigit(seof))
320 			error(value(vi_TERSE) ? gettext("Substitute needs re") :
321 gettext("Missing regular expression for substitute"));
322 		seof = vi_compile(seof, 1);
323 		uselastre = 1;
324 		comprhs(seof);
325 		gsubf = cflag = 0;
326 		break;
327 
328 	case '~':
329 		uselastre = 1;
330 		/* fall into ... */
331 	case '&':
332 	redo:
333 		if (re == NULL || re->Expbuf[1] == 0)
334 			error(value(vi_TERSE) ? gettext("No previous re") :
335 gettext("No previous regular expression"));
336 		if (subre == NULL || subre->Expbuf[1] == 0)
337 			error(value(vi_TERSE) ? gettext("No previous substitute re") :
338 gettext("No previous substitute to repeat"));
339 		break;
340 	}
341 	for (;;) {
342 		c = getchar();
343 		switch (c) {
344 
345 		case 'g':
346 			gsubf = !gsubf;
347 			continue;
348 
349 		case 'c':
350 			cflag = !cflag;
351 			continue;
352 
353 		case 'r':
354 			uselastre = 1;
355 			continue;
356 
357 		default:
358 			ungetchar(c);
359 			setcount();
360 			donewline();
361 			if (uselastre)
362 				savere(&subre);
363 			else
364 				resre(subre);
365 
366 			/*
367 			 * The % by itself on the right hand side means
368 			 * that the previous value of the right hand side
369 			 * should be used. A -1 is used to indicate no
370 			 * previously remembered search string.
371 			 */
372 
373 			if (rhsbuf[0] == '%' && rhsbuf[1] == 0)
374 				if (remflg == -1)
375 					error(gettext("No previously remembered string"));
376 			        else
377 					strcpy(rhsbuf, remem);
378 			else {
379 				strcpy(remem, rhsbuf);
380 				remflg = 1;
381 			}
382 			return (gsubf);
383 		}
384 	}
385 }
386 
387 comprhs(seof)
388 	int seof;
389 {
390 	unsigned char *rp, *orp;
391 	int c;
392 	unsigned char orhsbuf[RHSSIZE];
393 	char	multi[MB_LEN_MAX + 1];
394 	int	len;
395 	wchar_t	wc;
396 
397 	rp = rhsbuf;
398 	CP(orhsbuf, rp);
399 	for (;;) {
400 		c = peekchar();
401 		if (c == seof) {
402 			(void) getchar();
403 			break;
404 		}
405 
406 		if (!isascii(c) && c != EOF) {
407 			if ((len = _mbftowc(multi, &wc, getchar, &peekc)) > 0) {
408 				if ((rp + len) >= &rhsbuf[RHSSIZE - 1])
409 					goto toobig;
410 				strncpy(rp, multi, len);
411 				rp += len;
412 				continue;
413 			}
414 		}
415 
416 		(void) getchar();
417 		switch (c) {
418 
419 		case '\\':
420 			c = peekchar();
421 			if (c == EOF) {
422 				(void) getchar();
423 				error(gettext("Replacement string ends with \\"));
424 			}
425 
426 			if (!isascii(c)) {
427 				*rp++ = '\\';
428 				if ((len = _mbftowc(multi, &wc, getchar, &peekc)) > 0) {
429 					if ((rp + len) >= &rhsbuf[RHSSIZE - 1])
430 						goto over_flow;
431 					strncpy(rp, multi, len);
432 					rp += len;
433 					continue;
434 				}
435 			}
436 
437 			(void) getchar();
438 			if (value(vi_MAGIC)) {
439 				/*
440 				 * When "magic", \& turns into a plain &,
441 				 * and all other chars work fine quoted.
442 				 */
443 				if (c != '&') {
444 					if(rp >= &rhsbuf[RHSSIZE - 1]) {
445 						*rp=0;
446 						error(value(vi_TERSE) ?
447 gettext("Replacement pattern too long") :
448 gettext("Replacement pattern too long - limit 256 characters"));
449 					}
450 					*rp++ = '\\';
451 				}
452 				break;
453 			}
454 magic:
455 			if (c == '~') {
456 				for (orp = orhsbuf; *orp; *rp++ = *orp++)
457 					if (rp >= &rhsbuf[RHSSIZE - 1])
458 						goto toobig;
459 				continue;
460 			}
461 			if(rp >= &rhsbuf[RHSSIZE - 1]) {
462 over_flow:
463 				*rp=0;
464 				error(value(vi_TERSE) ?
465 gettext("Replacement pattern too long") :
466 gettext("Replacement pattern too long - limit 256 characters"));
467 			}
468 			*rp++ = '\\';
469 			break;
470 
471 		case '\n':
472 		case EOF:
473 			if (!(globp && globp[0])) {
474 				ungetchar(c);
475 				goto endrhs;
476 			}
477 
478 		case '~':
479 		case '&':
480 			if (value(vi_MAGIC))
481 				goto magic;
482 			break;
483 		}
484 		if (rp >= &rhsbuf[RHSSIZE - 1]) {
485 toobig:
486 			*rp = 0;
487 			error(value(vi_TERSE) ?
488 gettext("Replacement pattern too long") :
489 gettext("Replacement pattern too long - limit 256 characters"));
490 		}
491 		*rp++ = c;
492 	}
493 endrhs:
494 	*rp++ = 0;
495 }
496 
497 getsub()
498 {
499 	unsigned char *p;
500 
501 	if ((p = linebp) == 0)
502 		return (EOF);
503 	strcLIN(p);
504 	linebp = 0;
505 	return (0);
506 }
507 
508 dosubcon(f, a)
509 	bool f;
510 	line *a;
511 {
512 
513 	if (execute(f, a) == 0)
514 		return (0);
515 	if (confirmed(a)) {
516 		dosub();
517 		scount++;
518 	}
519 	return (1);
520 }
521 
522 confirmed(a)
523 	line *a;
524 {
525 	int c, cnt, ch;
526 
527 	if (cflag == 0)
528 		return (1);
529 	pofix();
530 	pline(lineno(a));
531 	if (inopen)
532 		putchar('\n' | QUOTE);
533 	c = lcolumn(loc1);
534 	ugo(c, ' ');
535 	ugo(lcolumn(loc2) - c, '^');
536 	flush();
537 	cnt = 0;
538 bkup:
539 	ch = c = getkey();
540 again:
541 	if (c == '\b') {
542 		if ((inopen)
543 		 && (cnt > 0)) {
544 			putchar('\b' | QUOTE);
545 			putchar(' ');
546 			putchar('\b' | QUOTE), flush();
547 			cnt --;
548 		}
549 		goto bkup;
550 	}
551 	if (c == '\r')
552 		c = '\n';
553 	if (inopen && MB_CUR_MAX == 1 || c < 0200) {
554 		putchar(c);
555 		flush();
556 		cnt++;
557 	}
558 	if (c != '\n' && c != EOF) {
559 		c = getkey();
560 		goto again;
561 	}
562 	noteinp();
563 	return (ch == 'y');
564 }
565 
566 ugo(cnt, with)
567 	int with;
568 	int cnt;
569 {
570 
571 	if (cnt > 0)
572 		do
573 			putchar(with);
574 		while (--cnt > 0);
575 }
576 
577 int	casecnt;
578 bool	destuc;
579 
580 dosub()
581 {
582 	unsigned char *lp, *sp, *rp;
583 	int c;
584 	int	len;
585 
586 	lp = linebuf;
587 	sp = genbuf;
588 	rp = rhsbuf;
589 	while (lp < (unsigned char *)loc1)
590 		*sp++ = *lp++;
591 	casecnt = 0;
592 	/*
593 	 * Caution: depending on the hardware, c will be either sign
594 	 * extended or not if C&QUOTE is set.  Thus, on a VAX, c will
595 	 * be < 0, but on a 3B, c will be >= 128.
596 	 */
597 	while (c = *rp) {
598 		if ((len = mblen((char *)rp, MB_CUR_MAX)) <= 0)
599 			len = 1;
600 		/* ^V <return> from vi to split lines */
601 		if (c == '\r')
602 			c = '\n';
603 
604 		if (c == '\\') {
605 			rp++;
606 			if ((len = mblen((char *)rp, MB_CUR_MAX)) <= 0)
607 				len = 1;
608 			switch (c = *rp++) {
609 
610 			case '&':
611 				sp = place(sp, loc1, loc2);
612 				if (sp == 0)
613 					goto ovflo;
614 				continue;
615 
616 			case 'l':
617 				casecnt = 1;
618 				destuc = 0;
619 				continue;
620 
621 			case 'L':
622 				casecnt = LBSIZE;
623 				destuc = 0;
624 				continue;
625 
626 			case 'u':
627 				casecnt = 1;
628 				destuc = 1;
629 				continue;
630 
631 			case 'U':
632 				casecnt = LBSIZE;
633 				destuc = 1;
634 				continue;
635 
636 			case 'E':
637 			case 'e':
638 				casecnt = 0;
639 				continue;
640 			}
641 			if(re != NULL && c >= '1' && c < re->Nbra + '1') {
642 				sp = place(sp, braslist[c - '1'] , braelist[c - '1']);
643 				if (sp == 0)
644 					goto ovflo;
645 				continue;
646 			}
647 			rp--;
648 		}
649 		if (len > 1) {
650 			if ((sp + len) >= &genbuf[LBSIZE])
651 				goto ovflo;
652 			strncpy(sp, rp, len);
653 		} else {
654 			if (casecnt)
655 				*sp = fixcase(c);
656 			else
657 				*sp = c;
658 		}
659 		sp += len; rp += len;
660 		if (sp >= &genbuf[LBSIZE])
661 ovflo:
662 			error(value(vi_TERSE) ? gettext("Line overflow") :
663 gettext("Line overflow in substitute"));
664 	}
665 	lp = (unsigned char *)loc2;
666 	loc2 = (char *)(linebuf + (sp - genbuf));
667 	while (*sp++ = *lp++)
668 		if (sp >= &genbuf[LBSIZE])
669 			goto ovflo;
670 	strcLIN(genbuf);
671 }
672 
673 fixcase(c)
674 	int c;
675 {
676 
677 	if (casecnt == 0)
678 		return (c);
679 	casecnt--;
680 	if (destuc) {
681 		if (islower(c))
682 			c = toupper(c);
683 	} else
684 		if (isupper(c))
685 			c = tolower(c);
686 	return (c);
687 }
688 
689 unsigned char *
690 place(sp, l1, l2)
691 	unsigned char *sp, *l1, *l2;
692 {
693 
694 	while (l1 < l2) {
695 		*sp++ = fixcase(*l1++);
696 		if (sp >= &genbuf[LBSIZE])
697 			return (0);
698 	}
699 	return (sp);
700 }
701 
702 snote(total, nlines)
703 	int total, nlines;
704 {
705 
706 	if (!notable(total))
707 		return;
708 	if (nlines != 1 && nlines != total)
709 		printf(mesg(value(vi_TERSE) ?
710 			/*
711 			 * TRANSLATION_NOTE
712 			 *	Reference order of arguments must not
713 			 *	be changed using '%digit$', since vi's
714 			 *	printf() does not support it.
715 			 */
716 			    gettext("%d subs on %d lines") :
717 			/*
718 			 * TRANSLATION_NOTE
719 			 *	Reference order of arguments must not
720 			 *	be changed using '%digit$', since vi's
721 			 *	printf() does not support it.
722 			 */
723 			    gettext("%d substitutions on %d lines")),
724 		       total, nlines);
725 	else
726 		printf(mesg(value(vi_TERSE) ?
727 			    gettext("%d subs") :
728 			    gettext("%d substitutions")),
729 		       total);
730 	noonl();
731 	flush();
732 }
733 
734 #ifdef XPG4
735 #include <regex.h>
736 
737 extern int regcomp_flags;	/* use to specify cflags for regcomp() */
738 #endif /* XPG4 */
739 
740 vi_compile(eof, oknl)
741 	int eof;
742 	int oknl;
743 {
744 	int c;
745 	unsigned char *gp, *p1;
746 	unsigned char *rhsp;
747 	unsigned char rebuf[LBSIZE];
748 	char	multi[MB_LEN_MAX + 1];
749 	int	len;
750 	wchar_t	wc;
751 
752 #ifdef XPG4
753 	/*
754 	 * reset cflags to plain BRE
755 	 * if \< and/or \> is specified, REG_WORDS is set.
756 	 */
757 	regcomp_flags = 0;
758 #endif /* XPG4 */
759 
760 	gp = genbuf;
761 	if (isalpha(eof) || isdigit(eof))
762 error(gettext("Regular expressions cannot be delimited by letters or digits"));
763 	if(eof >= 0200 && MB_CUR_MAX > 1)
764 error(gettext("Regular expressions cannot be delimited by multibyte characters"));
765 	c = getchar();
766 	if (eof == '\\')
767 		switch (c) {
768 
769 		case '/':
770 		case '?':
771 			if (scanre == NULL || scanre->Expbuf[1] == 0)
772 error(value(vi_TERSE) ? gettext("No previous scan re") :
773 gettext("No previous scanning regular expression"));
774 			resre(scanre);
775 			return (c);
776 
777 		case '&':
778 			if (subre == NULL || subre->Expbuf[1] == 0)
779 error(value(vi_TERSE) ? gettext("No previous substitute re") :
780 gettext("No previous substitute regular expression"));
781 			resre(subre);
782 			return (c);
783 
784 		default:
785 error(value(vi_TERSE) ? gettext("Badly formed re") :
786 gettext("Regular expression \\ must be followed by / or ?"));
787 		}
788 	if (c == eof || c == '\n' || c == EOF) {
789 		if (re == NULL || re->Expbuf[1] == 0)
790 error(value(vi_TERSE) ? gettext("No previous re") :
791 gettext("No previous regular expression"));
792 		if (c == '\n' && oknl == 0)
793 error(value(vi_TERSE) ? gettext("Missing closing delimiter") :
794 gettext("Missing closing delimiter for regular expression"));
795 		if (c != eof)
796 			ungetchar(c);
797 		return (eof);
798 	}
799 	gp = genbuf;
800 	if (c == '^') {
801 		*gp++ = c;
802 		c = getchar();
803 	}
804 	ungetchar(c);
805 	for (;;) {
806 		c = getchar();
807 		if (c == eof || c == EOF) {
808 			if (c == EOF)
809 				ungetchar(c);
810 			goto out;
811 		}
812 		if (gp >= &genbuf[LBSIZE - 3])
813 complex:
814 			cerror(value(vi_TERSE) ? gettext("Re too complex") :
815 gettext("Regular expression too complicated"));
816 
817 		if (!(isascii(c) || MB_CUR_MAX == 1)) {
818 			ungetchar(c);
819 			if ((len = _mbftowc(multi, &wc, getchar, &peekc)) >= 1) {
820 				if ((gp + len) >= &genbuf[LBSIZE - 3])
821 					goto complex;
822 				strncpy(gp, multi, len);
823 				gp += len;
824 				continue;
825 			}
826 			(void) getchar();
827 		}
828 
829 		switch (c) {
830 
831 		case '\\':
832 			c = getchar();
833 			if (!isascii(c)) {
834 				ungetchar(c);
835 				if ((len = _mbftowc(multi, &wc, getchar, &peekc)) >= 1) {
836 					if ((gp + len) >= &genbuf[LBSIZE - 3])
837 						goto complex;
838 					*gp++ = '\\';
839 					strncpy(gp, multi, len);
840 					gp += len;
841 					continue;
842 				}
843 				(void) getchar();
844 			}
845 
846 			switch (c) {
847 
848 			case '<':
849 			case '>':
850 #ifdef XPG4
851 				regcomp_flags = REG_WORDS;
852 				/*FALLTHRU*/
853 #endif /* XPG4 */
854 			case '(':
855 			case ')':
856 			case '{':
857 			case '}':
858 			case '$':
859 			case '^':
860 			case '\\':
861 				*gp++ = '\\';
862 				*gp++ = c;
863 				continue;
864 
865 			case 'n':
866 				*gp++ = c;
867 				continue;
868 			}
869 			if(c >= '0' && c <= '9') {
870 				*gp++ = '\\';
871 				*gp++ = c;
872 				continue;
873 			}
874 			if (value(vi_MAGIC) == 0)
875 magic:
876 			switch (c) {
877 
878 			case '.':
879 				*gp++ = '.';
880 				continue;
881 
882 			case '~':
883 				rhsp = rhsbuf;
884 				while (*rhsp) {
885 					if (!isascii(*rhsp)) {
886 						if ((len = mbtowc((wchar_t *)0, (char *)rhsp, MB_CUR_MAX)) > 1) {
887 							if ((gp + len) >= &genbuf[LBSIZE-2])
888 								goto complex;
889 							strncpy(gp, rhsp, len);
890 							rhsp += len; gp += len;
891 							continue;
892 						}
893 					}
894 					len = 1;
895 					if (*rhsp == '\\') {
896 						c = *++rhsp;
897 						if (c == '&')
898 cerror(value(vi_TERSE) ? gettext("Replacement pattern contains &") :
899 gettext("Replacement pattern contains & - cannot use in re"));
900 						if (c >= '1' && c <= '9')
901 cerror(value(vi_TERSE) ? gettext("Replacement pattern contains \\d") :
902 gettext("Replacement pattern contains \\d - cannot use in re"));
903 						if ((len = mbtowc((wchar_t *)0, (char *)rhsp, MB_CUR_MAX)) <= 1) {
904 							len = 1;
905 							if(any(c, ".\\*[$"))
906 								*gp++ = '\\';
907 						}
908 					}
909 
910 					if ((gp + len) >= &genbuf[LBSIZE-2])
911 						goto complex;
912 					if (len == 1) {
913 						c = *rhsp++;
914 						*gp++ = (value(vi_IGNORECASE) ? tolower(c) : c);
915 					} else {
916 						strncpy(gp, rhsp, len);
917 						gp += len; rhsp += len;
918 					}
919 				}
920 				continue;
921 
922 			case '*':
923 				*gp++ = '*';
924 				continue;
925 
926 			case '[':
927 				*gp++ = '[';
928 				c = getchar();
929 				if (c == '^') {
930 					*gp++ = '^';
931 					c = getchar();
932 				}
933 
934 				do {
935 					if (!isascii(c) && c != EOF) {
936 						ungetchar(c);
937 						if ((len = _mbftowc(multi, &wc, getchar, &peekc)) >= 1) {
938 							if ((gp + len)>= &genbuf[LBSIZE-4])
939 								goto complex;
940 							strncpy(gp, multi, len);
941 							gp += len;
942 							c = getchar();
943 							continue;
944 						}
945 						(void) getchar();
946 					}
947 
948 					if (gp >= &genbuf[LBSIZE-4])
949 						goto complex;
950 					if(c == '\\' && peekchar() == ']') {
951 						(void)getchar();
952 						*gp++ = '\\';
953 						*gp++ = ']';
954 					}
955 					else if (c == '\n' || c == EOF)
956 						cerror(gettext("Missing ]"));
957 					else
958 						*gp++ = (value(vi_IGNORECASE) ? tolower(c) : c);
959 					c = getchar();
960 				} while(c != ']');
961 				*gp++ = ']';
962 				continue;
963 			}
964 			if (c == EOF) {
965 				ungetchar(EOF);
966 				*gp++ = '\\';
967 				*gp++ = '\\';
968 				continue;
969 			}
970 			if (c == '\n')
971 cerror(value(vi_TERSE) ? gettext("No newlines in re's") :
972 gettext("Can't escape newlines into regular expressions"));
973 			*gp++ = '\\';
974 			*gp++ = (value(vi_IGNORECASE) ? tolower(c) : c);
975 			continue;
976 
977 		case '\n':
978 			if (oknl) {
979 				ungetchar(c);
980 				goto out;
981 			}
982 cerror(value(vi_TERSE) ? gettext("Badly formed re") :
983 gettext("Missing closing delimiter for regular expression"));
984 
985 		case '.':
986 		case '~':
987 		case '*':
988 		case '[':
989 			if (value(vi_MAGIC))
990 				goto magic;
991 			if(c != '~')
992 				*gp++ = '\\';
993 defchar:
994 		default:
995 			*gp++ = (value(vi_IGNORECASE) ? tolower(c) : c);
996 			continue;
997 		}
998 	}
999 out:
1000 	*gp++ = '\0';
1001 
1002 #ifdef XPG4
1003 	/* see if our compiled RE's will fit in the re structure:	*/
1004 	if (regexc_size > EXPSIZ) {
1005 		/*
1006 		 * this should never happen. but it's critical that we
1007 		 * check here, otherwise .bss would get overwritten.
1008 		 */
1009 		cerror(value(vi_TERSE) ? gettext("RE's can't fit") :
1010 			gettext("Regular expressions can't fit"));
1011 		return(eof);
1012 	}
1013 
1014 	/*
1015 	 * We create re each time we need it.
1016 	 */
1017 
1018 	if (re == NULL || re == scanre || re == subre) {
1019 		if ((re = calloc(1, sizeof(struct regexp))) == NULL) {
1020 			error(gettext("out of memory"));
1021 			exit(errcnt);
1022 		}
1023 	} else {
1024 		regex_comp_free(&re->Expbuf);
1025 		memset(re, 0, sizeof(struct regexp));
1026 	}
1027 
1028 	compile((char *) genbuf, (char *) re->Expbuf, (char *) re->Expbuf
1029 	    + regexc_size);
1030 #else /* !XPG4 */
1031 	(void) _compile((const char *)genbuf, (char *)re->Expbuf,
1032 		(char *)(re->Expbuf + sizeof (re->Expbuf)), 1);
1033 #endif /* XPG4 */
1034 
1035 	if(regerrno)
1036 		switch(regerrno) {
1037 
1038 		case 42:
1039 cerror(gettext("\\( \\) Imbalance"));
1040 		case 43:
1041 cerror(value(vi_TERSE) ? gettext("Awash in \\('s!") :
1042 gettext("Too many \\('d subexpressions in a regular expression"));
1043 		case 50:
1044 			goto complex;
1045 		case 67:
1046 cerror(value(vi_TERSE) ? gettext("Illegal byte sequence") :
1047 gettext("Regular expression has illegal byte sequence"));
1048 		}
1049 	re->Nbra = nbra;
1050 	return(eof);
1051 }
1052 
1053 cerror(s)
1054 	unsigned char *s;
1055 {
1056 	if (re) {
1057 		re->Expbuf[0] = re->Expbuf[1] = 0;
1058 	}
1059 	error(s);
1060 }
1061 
1062 execute(gf, addr)
1063 	line *addr;
1064 {
1065 	unsigned char *p1, *p2;
1066 	char *start;
1067 	int c, i;
1068 	int ret;
1069 	int	len;
1070 
1071 	if (gf) {
1072 		if (re == NULL || re->Expbuf[0])
1073 			return (0);
1074 		if(value(vi_IGNORECASE)) {
1075 			p1 = genbuf;
1076 			p2 = (unsigned char *)loc2;
1077 			while(c = *p2) {
1078 				if ((len = mblen((char *)p2, MB_CUR_MAX)) <= 0)
1079 					len = 1;
1080 				if (len == 1) {
1081 					*p1++ = tolower(c);
1082 					p2++;
1083 					continue;
1084 				}
1085 				strncpy(p1, p2, len);
1086 				p1 += len; p2 += len;
1087 			}
1088 			*p1 = '\0';
1089 			locs = (char *)genbuf;
1090 			p1 = genbuf;
1091 			start = loc2;
1092 		} else {
1093 			p1 = (unsigned char *)loc2;
1094 			locs = loc2;
1095 		}
1096 	} else {
1097 		if (addr == zero)
1098 			return (0);
1099 		p1 = linebuf;
1100 		getline(*addr);
1101 		if(value(vi_IGNORECASE)) {
1102 			p1 = genbuf;
1103 			p2 = linebuf;
1104 			while(c = *p2) {
1105 				if ((len = mblen((char *)p2, MB_CUR_MAX)) <= 0)
1106 					len = 1;
1107 				if (len == 1) {
1108 					*p1++ = tolower(c);
1109 					p2++;
1110 					continue;
1111 				}
1112 				strncpy(p1, p2, len);
1113 				p1 += len; p2 += len;
1114 			}
1115 			*p1 = '\0';
1116 			p1 = genbuf;
1117 			start = (char *)linebuf;
1118 		}
1119 		locs = (char *)0;
1120 	}
1121 
1122 	ret = step((char *)p1, (char *)re->Expbuf);
1123 
1124 	if(value(vi_IGNORECASE) && ret) {
1125 		loc1 = start + (loc1 - (char *)genbuf);
1126 		loc2 = start + (loc2 - (char *)genbuf);
1127 		for(i = 0; i < NBRA; i++) {
1128 			braslist[i] = start + (braslist[i] - (char *)genbuf);
1129 			braelist[i] = start + (braelist[i] - (char *)genbuf);
1130 		}
1131 	}
1132 	return ret;
1133 }
1134 
1135 /*
1136  *  Initialize the compiled regular-expression storage areas (called from
1137  *  main()).
1138  */
1139 
1140 void init_re (void)
1141 {
1142 #ifdef XPG4
1143 	re = scanre = subre = NULL;
1144 #else /* !XPG4 */
1145 	if ((re = calloc(1, sizeof(struct regexp))) == NULL) {
1146 		error(gettext("out of memory"));
1147 		exit(errcnt);
1148 	}
1149 
1150 	if ((scanre = calloc(1, sizeof(struct regexp))) == NULL) {
1151 		error(gettext("out of memory"));
1152 		exit(errcnt);
1153 	}
1154 
1155 	if ((subre = calloc(1, sizeof(struct regexp))) == NULL) {
1156 		error(gettext("out of memory"));
1157 		exit(errcnt);
1158 	}
1159 #endif /* XPG4 */
1160 }
1161 
1162 /*
1163  *  Save what is in the special place re to the named alternate
1164  *  location.  This means freeing up what's currently in this target
1165  *  location, if necessary.
1166  */
1167 
1168 void savere(struct regexp ** a)
1169 {
1170 #ifdef XPG4
1171 	if (a == NULL || re == NULL) {
1172 		return;
1173 	}
1174 
1175 	if (*a == NULL) {
1176 		*a = re;
1177 		return;
1178 	}
1179 
1180 	if (*a != re) {
1181 		if (scanre != subre) {
1182 			regex_comp_free(&((*a)->Expbuf));
1183 			free(*a);
1184 		}
1185 		*a = re;
1186 	}
1187 #else /* !XPG4 */
1188 	memcpy(*a, re, sizeof(struct regexp));
1189 #endif /* XPG4 */
1190 }
1191 
1192 
1193 /*
1194  *  Restore what is in the named alternate location to the special place
1195  *  re.  This means first freeing up what's currently in re, if necessary.
1196  */
1197 
1198 void resre(struct regexp * a)
1199 {
1200 #ifdef XPG4
1201 	if (a == NULL) {
1202 		return;
1203 	}
1204 
1205 	if (re == NULL) {
1206 		re = a;
1207 		return;
1208 	}
1209 
1210 	if (a != re) {
1211 		if ((re != scanre) && (re != subre)) {
1212 			regex_comp_free(&re->Expbuf);
1213 			free(re);
1214 		}
1215 
1216 		re = a;
1217 	}
1218 #else /* !XPG4 */
1219 	memcpy(re, a, sizeof(struct regexp));
1220 #endif /* XPG4 */
1221 }
1222