xref: /titanic_51/usr/src/cmd/vi/port/ex_re.c (revision 0f1702c5201310f0529cd5abb77652e5e9b241b6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 
31 /* Copyright (c) 1981 Regents of the University of California */
32 
33 #pragma ident	"%Z%%M%	%I%	%E% SMI"
34 
35 #include "ex.h"
36 #include "ex_re.h"
37 
38 /* from libgen */
39 char *_compile(const char *, char *, char *, int);
40 
41 /*
42  * The compiled-regular-expression storage areas (re, scanre, and subre)
43  * have been changed into dynamically allocated memory areas, in both the
44  * Solaris and XPG4 versions.
45  *
46  * In the Solaris version, which uses the original libgen(3g) compile()
47  * and step() calls, these areas are allocated once, and then data are
48  * copied between them subsequently, as they were in the original
49  * implementation.  This is possible because the compiled information is
50  * a self-contained block of bits.
51  *
52  * In the XPG4 version, the expr:compile.o object is linked in as a
53  * simulation of these functions using the new regcomp() and regexec()
54  * functions.  The problem here is that the resulting
55  * compiled-regular-expression data contain pointers to other data, which
56  * need to be freed, but only when we are quite sure that we are done
57  * with them - and certainly not before.  There was an earlier attempt to
58  * handle these differences, but that effort was flawed.
59  */
60 
61 extern int	getchar();
62 #ifdef XPG4
63 void regex_comp_free(void *);
64 extern size_t regexc_size;	/* compile.c: size of regex_comp structure */
65 #endif /* XPG4 */
66 
67 /*
68  * Global, substitute and regular expressions.
69  * Very similar to ed, with some re extensions and
70  * confirmed substitute.
71  */
72 void
73 global(k)
74 	bool k;
75 {
76 	unsigned char *gp;
77 	int c;
78 	line *a1;
79 	unsigned char globuf[GBSIZE], *Cwas;
80 	int nlines = lineDOL();
81 	int oinglobal = inglobal;
82 	unsigned char *oglobp = globp;
83 	char	multi[MB_LEN_MAX + 1];
84 	wchar_t	wc;
85 	int	len;
86 
87 
88 	Cwas = Command;
89 	/*
90 	 * States of inglobal:
91 	 *  0: ordinary - not in a global command.
92 	 *  1: text coming from some buffer, not tty.
93 	 *  2: like 1, but the source of the buffer is a global command.
94 	 * Hence you're only in a global command if inglobal==2. This
95 	 * strange sounding convention is historically derived from
96 	 * everybody simulating a global command.
97 	 */
98 	if (inglobal==2)
99 		error(value(vi_TERSE) ? gettext("Global within global") :
100 gettext("Global within global not allowed"));
101 	markDOT();
102 	setall();
103 	nonzero();
104 	if (skipend())
105 		error(value(vi_TERSE) ? gettext("Global needs re") :
106 gettext("Missing regular expression for global"));
107 	c = getchar();
108 	(void)vi_compile(c, 1);
109 	savere(&scanre);
110 	gp = globuf;
111 	while ((c = peekchar()) != '\n') {
112 		if (!isascii(c)) {
113 			if (c == EOF) {
114 				c = '\n';
115 				ungetchar(c);
116 				goto out;
117 			}
118 
119 mb_copy:
120 			if ((len = _mbftowc(multi, &wc, getchar, &peekc)) > 0) {
121 				if ((gp + len) >= &globuf[GBSIZE - 2])
122 					error(gettext("Global command too long"));
123 				strncpy(gp, multi, len);
124 				gp += len;
125 				continue;
126 			}
127 		}
128 
129 		(void) getchar();
130 		switch (c) {
131 
132 		case EOF:
133 			c = '\n';
134 			ungetchar(c);
135 			goto out;
136 
137 		case '\\':
138 			c = peekchar();
139 			if (!isascii(c)) {
140 				*gp++ = '\\';
141 				goto mb_copy;
142 			}
143 
144 			(void) getchar();
145 			switch (c) {
146 
147 			case '\\':
148 				ungetchar(c);
149 				break;
150 
151 			case '\n':
152 				break;
153 
154 			default:
155 				*gp++ = '\\';
156 				break;
157 			}
158 			break;
159 		}
160 		*gp++ = c;
161 		if (gp >= &globuf[GBSIZE - 2])
162 			error(gettext("Global command too long"));
163 	}
164 
165 out:
166 	donewline();
167 	*gp++ = c;
168 	*gp++ = 0;
169 	saveall();
170 	inglobal = 2;
171 	for (a1 = one; a1 <= dol; a1++) {
172 		*a1 &= ~01;
173 		if (a1 >= addr1 && a1 <= addr2 && execute(0, a1) == k)
174 			*a1 |= 01;
175 	}
176 #ifdef notdef
177 /*
178  * This code is commented out for now.  The problem is that we don't
179  * fix up the undo area the way we should.  Basically, I think what has
180  * to be done is to copy the undo area down (since we shrunk everything)
181  * and move the various pointers into it down too.  I will do this later
182  * when I have time. (Mark, 10-20-80)
183  */
184 	/*
185 	 * Special case: g/.../d (avoid n^2 algorithm)
186 	 */
187 	if (globuf[0]=='d' && globuf[1]=='\n' && globuf[2]=='\0') {
188 		gdelete();
189 		return;
190 	}
191 #endif
192 	if (inopen)
193 		inopen = -1;
194 	/*
195 	 * Now for each marked line, set dot there and do the commands.
196 	 * Note the n^2 behavior here for lots of lines matching.
197 	 * This is really needed: in some cases you could delete lines,
198 	 * causing a marked line to be moved before a1 and missed if
199 	 * we didn't restart at zero each time.
200 	 */
201 	for (a1 = one; a1 <= dol; a1++) {
202 		if (*a1 & 01) {
203 			*a1 &= ~01;
204 			dot = a1;
205 			globp = globuf;
206 			commands(1, 1);
207 			a1 = zero;
208 		}
209 	}
210 	globp = oglobp;
211 	inglobal = oinglobal;
212 	endline = 1;
213 	Command = Cwas;
214 	netchHAD(nlines);
215 	setlastchar(EOF);
216 	if (inopen) {
217 		ungetchar(EOF);
218 		inopen = 1;
219 	}
220 }
221 
222 /*
223  * gdelete: delete inside a global command. Handles the
224  * special case g/r.e./d. All lines to be deleted have
225  * already been marked. Squeeze the remaining lines together.
226  * Note that other cases such as g/r.e./p, g/r.e./s/r.e.2/rhs/,
227  * and g/r.e./.,/r.e.2/d are not treated specially.  There is no
228  * good reason for this except the question: where to you draw the line?
229  */
230 void
231 gdelete(void)
232 {
233 	line *a1, *a2, *a3;
234 
235 	a3 = dol;
236 	/* find first marked line. can skip all before it */
237 	for (a1=zero; (*a1&01)==0; a1++)
238 		if (a1>=a3)
239 			return;
240 	/* copy down unmarked lines, compacting as we go. */
241 	for (a2=a1+1; a2<=a3;) {
242 		if (*a2&01) {
243 			a2++;		/* line is marked, skip it */
244 			dot = a1;	/* dot left after line deletion */
245 		} else
246 			*a1++ = *a2++;	/* unmarked, copy it */
247 	}
248 	dol = a1-1;
249 	if (dot>dol)
250 		dot = dol;
251 	change();
252 }
253 
254 bool	cflag;
255 int	scount, slines, stotal;
256 
257 int
258 substitute(int c)
259 {
260 	line *addr;
261 	int n;
262 	int gsubf, hopcount;
263 
264 	gsubf = compsub(c);
265 	if(FIXUNDO)
266 		save12(), undkind = UNDCHANGE;
267 	stotal = 0;
268 	slines = 0;
269 	for (addr = addr1; addr <= addr2; addr++) {
270 		scount = hopcount = 0;
271 		if (dosubcon(0, addr) == 0)
272 			continue;
273 		if (gsubf) {
274 			/*
275 			 * The loop can happen from s/\</&/g
276 			 * but we don't want to break other, reasonable cases.
277 			 */
278 			hopcount = 0;
279 			while (*loc2) {
280 				if (++hopcount > sizeof linebuf)
281 					error(gettext("substitution loop"));
282 				if (dosubcon(1, addr) == 0)
283 					break;
284 			}
285 		}
286 		if (scount) {
287 			stotal += scount;
288 			slines++;
289 			putmark(addr);
290 			n = append(getsub, addr);
291 			addr += n;
292 			addr2 += n;
293 		}
294 	}
295 	if (stotal == 0 && !inglobal && !cflag)
296 		error(value(vi_TERSE) ? gettext("Fail") :
297 gettext("Substitute pattern match failed"));
298 	snote(stotal, slines);
299 	return (stotal);
300 }
301 
302 int
303 compsub(int ch)
304 {
305 	int seof, c, uselastre;
306 	static int gsubf;
307 	static unsigned char remem[RHSSIZE];
308 	static int remflg = -1;
309 
310 	if (!value(vi_EDCOMPATIBLE))
311 		gsubf = cflag = 0;
312 	uselastre = 0;
313 	switch (ch) {
314 
315 	case 's':
316 		(void)skipwh();
317 		seof = getchar();
318 		if (endcmd(seof) || any(seof, "gcr")) {
319 			ungetchar(seof);
320 			goto redo;
321 		}
322 		if (isalpha(seof) || isdigit(seof))
323 			error(value(vi_TERSE) ? gettext("Substitute needs re") :
324 gettext("Missing regular expression for substitute"));
325 		seof = vi_compile(seof, 1);
326 		uselastre = 1;
327 		comprhs(seof);
328 		gsubf = cflag = 0;
329 		break;
330 
331 	case '~':
332 		uselastre = 1;
333 		/* fall into ... */
334 	case '&':
335 	redo:
336 		if (re == NULL || re->Expbuf[1] == 0)
337 			error(value(vi_TERSE) ? gettext("No previous re") :
338 gettext("No previous regular expression"));
339 		if (subre == NULL || subre->Expbuf[1] == 0)
340 			error(value(vi_TERSE) ? gettext("No previous substitute re") :
341 gettext("No previous substitute to repeat"));
342 		break;
343 	}
344 	for (;;) {
345 		c = getchar();
346 		switch (c) {
347 
348 		case 'g':
349 			gsubf = !gsubf;
350 			continue;
351 
352 		case 'c':
353 			cflag = !cflag;
354 			continue;
355 
356 		case 'r':
357 			uselastre = 1;
358 			continue;
359 
360 		default:
361 			ungetchar(c);
362 			setcount();
363 			donewline();
364 			if (uselastre)
365 				savere(&subre);
366 			else
367 				resre(subre);
368 
369 			/*
370 			 * The % by itself on the right hand side means
371 			 * that the previous value of the right hand side
372 			 * should be used. A -1 is used to indicate no
373 			 * previously remembered search string.
374 			 */
375 
376 			if (rhsbuf[0] == '%' && rhsbuf[1] == 0)
377 				if (remflg == -1)
378 					error(gettext("No previously remembered string"));
379 			        else
380 					strcpy(rhsbuf, remem);
381 			else {
382 				strcpy(remem, rhsbuf);
383 				remflg = 1;
384 			}
385 			return (gsubf);
386 		}
387 	}
388 }
389 
390 void
391 comprhs(int seof)
392 {
393 	unsigned char *rp, *orp;
394 	int c;
395 	unsigned char orhsbuf[RHSSIZE];
396 	char	multi[MB_LEN_MAX + 1];
397 	int	len;
398 	wchar_t	wc;
399 
400 	rp = rhsbuf;
401 	CP(orhsbuf, rp);
402 	for (;;) {
403 		c = peekchar();
404 		if (c == seof) {
405 			(void) getchar();
406 			break;
407 		}
408 
409 		if (!isascii(c) && c != EOF) {
410 			if ((len = _mbftowc(multi, &wc, getchar, &peekc)) > 0) {
411 				if ((rp + len) >= &rhsbuf[RHSSIZE - 1])
412 					goto toobig;
413 				strncpy(rp, multi, len);
414 				rp += len;
415 				continue;
416 			}
417 		}
418 
419 		(void) getchar();
420 		switch (c) {
421 
422 		case '\\':
423 			c = peekchar();
424 			if (c == EOF) {
425 				(void) getchar();
426 				error(gettext("Replacement string ends with \\"));
427 			}
428 
429 			if (!isascii(c)) {
430 				*rp++ = '\\';
431 				if ((len = _mbftowc(multi, &wc, getchar, &peekc)) > 0) {
432 					if ((rp + len) >= &rhsbuf[RHSSIZE - 1])
433 						goto over_flow;
434 					strncpy(rp, multi, len);
435 					rp += len;
436 					continue;
437 				}
438 			}
439 
440 			(void) getchar();
441 			if (value(vi_MAGIC)) {
442 				/*
443 				 * When "magic", \& turns into a plain &,
444 				 * and all other chars work fine quoted.
445 				 */
446 				if (c != '&') {
447 					if(rp >= &rhsbuf[RHSSIZE - 1]) {
448 						*rp=0;
449 						error(value(vi_TERSE) ?
450 gettext("Replacement pattern too long") :
451 gettext("Replacement pattern too long - limit 256 characters"));
452 					}
453 					*rp++ = '\\';
454 				}
455 				break;
456 			}
457 magic:
458 			if (c == '~') {
459 				for (orp = orhsbuf; *orp; *rp++ = *orp++)
460 					if (rp >= &rhsbuf[RHSSIZE - 1])
461 						goto toobig;
462 				continue;
463 			}
464 			if(rp >= &rhsbuf[RHSSIZE - 1]) {
465 over_flow:
466 				*rp=0;
467 				error(value(vi_TERSE) ?
468 gettext("Replacement pattern too long") :
469 gettext("Replacement pattern too long - limit 256 characters"));
470 			}
471 			*rp++ = '\\';
472 			break;
473 
474 		case '\n':
475 		case EOF:
476 			if (!(globp && globp[0])) {
477 				ungetchar(c);
478 				goto endrhs;
479 			}
480 
481 		case '~':
482 		case '&':
483 			if (value(vi_MAGIC))
484 				goto magic;
485 			break;
486 		}
487 		if (rp >= &rhsbuf[RHSSIZE - 1]) {
488 toobig:
489 			*rp = 0;
490 			error(value(vi_TERSE) ?
491 gettext("Replacement pattern too long") :
492 gettext("Replacement pattern too long - limit 256 characters"));
493 		}
494 		*rp++ = c;
495 	}
496 endrhs:
497 	*rp++ = 0;
498 }
499 
500 int
501 getsub(void)
502 {
503 	unsigned char *p;
504 
505 	if ((p = linebp) == 0)
506 		return (EOF);
507 	strcLIN(p);
508 	linebp = 0;
509 	return (0);
510 }
511 
512 int
513 dosubcon(bool f, line *a)
514 {
515 
516 	if (execute(f, a) == 0)
517 		return (0);
518 	if (confirmed(a)) {
519 		dosub();
520 		scount++;
521 	}
522 	return (1);
523 }
524 
525 int
526 confirmed(line *a)
527 {
528 	int c, cnt, ch;
529 
530 	if (cflag == 0)
531 		return (1);
532 	pofix();
533 	pline(lineno(a));
534 	if (inopen)
535 		putchar('\n' | QUOTE);
536 	c = lcolumn(loc1);
537 	ugo(c, ' ');
538 	ugo(lcolumn(loc2) - c, '^');
539 	flush();
540 	cnt = 0;
541 bkup:
542 	ch = c = getkey();
543 again:
544 	if (c == '\b') {
545 		if ((inopen)
546 		 && (cnt > 0)) {
547 			putchar('\b' | QUOTE);
548 			putchar(' ');
549 			putchar('\b' | QUOTE), flush();
550 			cnt --;
551 		}
552 		goto bkup;
553 	}
554 	if (c == '\r')
555 		c = '\n';
556 	if (inopen && MB_CUR_MAX == 1 || c < 0200) {
557 		putchar(c);
558 		flush();
559 		cnt++;
560 	}
561 	if (c != '\n' && c != EOF) {
562 		c = getkey();
563 		goto again;
564 	}
565 	noteinp();
566 	return (ch == 'y');
567 }
568 
569 void
570 ugo(int cnt, int with)
571 {
572 
573 	if (cnt > 0)
574 		do
575 			putchar(with);
576 		while (--cnt > 0);
577 }
578 
579 int	casecnt;
580 bool	destuc;
581 
582 void
583 dosub(void)
584 {
585 	unsigned char *lp, *sp, *rp;
586 	int c;
587 	int	len;
588 
589 	lp = linebuf;
590 	sp = genbuf;
591 	rp = rhsbuf;
592 	while (lp < (unsigned char *)loc1)
593 		*sp++ = *lp++;
594 	casecnt = 0;
595 	/*
596 	 * Caution: depending on the hardware, c will be either sign
597 	 * extended or not if C&QUOTE is set.  Thus, on a VAX, c will
598 	 * be < 0, but on a 3B, c will be >= 128.
599 	 */
600 	while (c = *rp) {
601 		if ((len = mblen((char *)rp, MB_CUR_MAX)) <= 0)
602 			len = 1;
603 		/* ^V <return> from vi to split lines */
604 		if (c == '\r')
605 			c = '\n';
606 
607 		if (c == '\\') {
608 			rp++;
609 			if ((len = mblen((char *)rp, MB_CUR_MAX)) <= 0)
610 				len = 1;
611 			switch (c = *rp++) {
612 
613 			case '&':
614 				sp = place(sp, loc1, loc2);
615 				if (sp == 0)
616 					goto ovflo;
617 				continue;
618 
619 			case 'l':
620 				casecnt = 1;
621 				destuc = 0;
622 				continue;
623 
624 			case 'L':
625 				casecnt = LBSIZE;
626 				destuc = 0;
627 				continue;
628 
629 			case 'u':
630 				casecnt = 1;
631 				destuc = 1;
632 				continue;
633 
634 			case 'U':
635 				casecnt = LBSIZE;
636 				destuc = 1;
637 				continue;
638 
639 			case 'E':
640 			case 'e':
641 				casecnt = 0;
642 				continue;
643 			}
644 			if(re != NULL && c >= '1' && c < re->Nbra + '1') {
645 				sp = place(sp, braslist[c - '1'] , braelist[c - '1']);
646 				if (sp == 0)
647 					goto ovflo;
648 				continue;
649 			}
650 			rp--;
651 		}
652 		if (len > 1) {
653 			if ((sp + len) >= &genbuf[LBSIZE])
654 				goto ovflo;
655 			strncpy(sp, rp, len);
656 		} else {
657 			if (casecnt)
658 				*sp = fixcase(c);
659 			else
660 				*sp = c;
661 		}
662 		sp += len; rp += len;
663 		if (sp >= &genbuf[LBSIZE])
664 ovflo:
665 			error(value(vi_TERSE) ? gettext("Line overflow") :
666 gettext("Line overflow in substitute"));
667 	}
668 	lp = (unsigned char *)loc2;
669 	loc2 = (char *)(linebuf + (sp - genbuf));
670 	while (*sp++ = *lp++)
671 		if (sp >= &genbuf[LBSIZE])
672 			goto ovflo;
673 	strcLIN(genbuf);
674 }
675 
676 int
677 fixcase(int c)
678 {
679 
680 	if (casecnt == 0)
681 		return (c);
682 	casecnt--;
683 	if (destuc) {
684 		if (islower(c))
685 			c = toupper(c);
686 	} else
687 		if (isupper(c))
688 			c = tolower(c);
689 	return (c);
690 }
691 
692 unsigned char *
693 place(sp, l1, l2)
694 	unsigned char *sp, *l1, *l2;
695 {
696 
697 	while (l1 < l2) {
698 		*sp++ = fixcase(*l1++);
699 		if (sp >= &genbuf[LBSIZE])
700 			return (0);
701 	}
702 	return (sp);
703 }
704 
705 void
706 snote(int total, int nlines)
707 {
708 
709 	if (!notable(total))
710 		return;
711 	if (nlines != 1 && nlines != total)
712 		viprintf(mesg(value(vi_TERSE) ?
713 			/*
714 			 * TRANSLATION_NOTE
715 			 *	Reference order of arguments must not
716 			 *	be changed using '%digit$', since vi's
717 			 *	viprintf() does not support it.
718 			 */
719 			    gettext("%d subs on %d lines") :
720 			/*
721 			 * TRANSLATION_NOTE
722 			 *	Reference order of arguments must not
723 			 *	be changed using '%digit$', since vi's
724 			 *	viprintf() does not support it.
725 			 */
726 			    gettext("%d substitutions on %d lines")),
727 		       total, nlines);
728 	else
729 		viprintf(mesg(value(vi_TERSE) ?
730 			    gettext("%d subs") :
731 			    gettext("%d substitutions")),
732 		       total);
733 	noonl();
734 	flush();
735 }
736 
737 #ifdef XPG4
738 #include <regex.h>
739 
740 extern int regcomp_flags;	/* use to specify cflags for regcomp() */
741 #endif /* XPG4 */
742 
743 int
744 vi_compile(int eof, int oknl)
745 {
746 	int c;
747 	unsigned char *gp, *p1;
748 	unsigned char *rhsp;
749 	unsigned char rebuf[LBSIZE];
750 	char	multi[MB_LEN_MAX + 1];
751 	int	len;
752 	wchar_t	wc;
753 
754 #ifdef XPG4
755 	/*
756 	 * reset cflags to plain BRE
757 	 * if \< and/or \> is specified, REG_WORDS is set.
758 	 */
759 	regcomp_flags = 0;
760 #endif /* XPG4 */
761 
762 	gp = genbuf;
763 	if (isalpha(eof) || isdigit(eof))
764 error(gettext("Regular expressions cannot be delimited by letters or digits"));
765 	if(eof >= 0200 && MB_CUR_MAX > 1)
766 error(gettext("Regular expressions cannot be delimited by multibyte characters"));
767 	c = getchar();
768 	if (eof == '\\')
769 		switch (c) {
770 
771 		case '/':
772 		case '?':
773 			if (scanre == NULL || scanre->Expbuf[1] == 0)
774 error(value(vi_TERSE) ? gettext("No previous scan re") :
775 gettext("No previous scanning regular expression"));
776 			resre(scanre);
777 			return (c);
778 
779 		case '&':
780 			if (subre == NULL || subre->Expbuf[1] == 0)
781 error(value(vi_TERSE) ? gettext("No previous substitute re") :
782 gettext("No previous substitute regular expression"));
783 			resre(subre);
784 			return (c);
785 
786 		default:
787 error(value(vi_TERSE) ? gettext("Badly formed re") :
788 gettext("Regular expression \\ must be followed by / or ?"));
789 		}
790 	if (c == eof || c == '\n' || c == EOF) {
791 		if (re == NULL || re->Expbuf[1] == 0)
792 error(value(vi_TERSE) ? gettext("No previous re") :
793 gettext("No previous regular expression"));
794 		if (c == '\n' && oknl == 0)
795 error(value(vi_TERSE) ? gettext("Missing closing delimiter") :
796 gettext("Missing closing delimiter for regular expression"));
797 		if (c != eof)
798 			ungetchar(c);
799 		return (eof);
800 	}
801 	gp = genbuf;
802 	if (c == '^') {
803 		*gp++ = c;
804 		c = getchar();
805 	}
806 	ungetchar(c);
807 	for (;;) {
808 		c = getchar();
809 		if (c == eof || c == EOF) {
810 			if (c == EOF)
811 				ungetchar(c);
812 			goto out;
813 		}
814 		if (gp >= &genbuf[LBSIZE - 3])
815 complex:
816 			cerror(value(vi_TERSE) ?
817 			    (unsigned char *)gettext("Re too complex") :
818 			    (unsigned char *)
819 			    gettext("Regular expression too complicated"));
820 
821 		if (!(isascii(c) || MB_CUR_MAX == 1)) {
822 			ungetchar(c);
823 			if ((len = _mbftowc(multi, &wc, getchar, &peekc)) >= 1) {
824 				if ((gp + len) >= &genbuf[LBSIZE - 3])
825 					goto complex;
826 				strncpy(gp, multi, len);
827 				gp += len;
828 				continue;
829 			}
830 			(void) getchar();
831 		}
832 
833 		switch (c) {
834 
835 		case '\\':
836 			c = getchar();
837 			if (!isascii(c)) {
838 				ungetchar(c);
839 				if ((len = _mbftowc(multi, &wc, getchar, &peekc)) >= 1) {
840 					if ((gp + len) >= &genbuf[LBSIZE - 3])
841 						goto complex;
842 					*gp++ = '\\';
843 					strncpy(gp, multi, len);
844 					gp += len;
845 					continue;
846 				}
847 				(void) getchar();
848 			}
849 
850 			switch (c) {
851 
852 			case '<':
853 			case '>':
854 #ifdef XPG4
855 				regcomp_flags = REG_WORDS;
856 				/*FALLTHRU*/
857 #endif /* XPG4 */
858 			case '(':
859 			case ')':
860 			case '{':
861 			case '}':
862 			case '$':
863 			case '^':
864 			case '\\':
865 				*gp++ = '\\';
866 				*gp++ = c;
867 				continue;
868 
869 			case 'n':
870 				*gp++ = c;
871 				continue;
872 			}
873 			if(c >= '0' && c <= '9') {
874 				*gp++ = '\\';
875 				*gp++ = c;
876 				continue;
877 			}
878 			if (value(vi_MAGIC) == 0)
879 magic:
880 			switch (c) {
881 
882 			case '.':
883 				*gp++ = '.';
884 				continue;
885 
886 			case '~':
887 				rhsp = rhsbuf;
888 				while (*rhsp) {
889 					if (!isascii(*rhsp)) {
890 						if ((len = mbtowc((wchar_t *)0, (char *)rhsp, MB_CUR_MAX)) > 1) {
891 							if ((gp + len) >= &genbuf[LBSIZE-2])
892 								goto complex;
893 							strncpy(gp, rhsp, len);
894 							rhsp += len; gp += len;
895 							continue;
896 						}
897 					}
898 					len = 1;
899 					if (*rhsp == '\\') {
900 						c = *++rhsp;
901 						if (c == '&')
902 cerror(value(vi_TERSE) ? (unsigned char *)
903 gettext("Replacement pattern contains &") :
904 (unsigned char *)gettext("Replacement pattern contains & - cannot use in re"));
905 						if (c >= '1' && c <= '9')
906 cerror(value(vi_TERSE) ? (unsigned char *)
907 gettext("Replacement pattern contains \\d") :
908 (unsigned char *)
909 gettext("Replacement pattern contains \\d - cannot use in re"));
910 						if ((len = mbtowc((wchar_t *)0, (char *)rhsp, MB_CUR_MAX)) <= 1) {
911 							len = 1;
912 							if(any(c, ".\\*[$"))
913 								*gp++ = '\\';
914 						}
915 					}
916 
917 					if ((gp + len) >= &genbuf[LBSIZE-2])
918 						goto complex;
919 					if (len == 1) {
920 						c = *rhsp++;
921 						*gp++ = (value(vi_IGNORECASE) ? tolower(c) : c);
922 					} else {
923 						strncpy(gp, rhsp, len);
924 						gp += len; rhsp += len;
925 					}
926 				}
927 				continue;
928 
929 			case '*':
930 				*gp++ = '*';
931 				continue;
932 
933 			case '[':
934 				*gp++ = '[';
935 				c = getchar();
936 				if (c == '^') {
937 					*gp++ = '^';
938 					c = getchar();
939 				}
940 
941 				do {
942 					if (!isascii(c) && c != EOF) {
943 						ungetchar(c);
944 						if ((len = _mbftowc(multi, &wc, getchar, &peekc)) >= 1) {
945 							if ((gp + len)>= &genbuf[LBSIZE-4])
946 								goto complex;
947 							strncpy(gp, multi, len);
948 							gp += len;
949 							c = getchar();
950 							continue;
951 						}
952 						(void) getchar();
953 					}
954 
955 					if (gp >= &genbuf[LBSIZE-4])
956 						goto complex;
957 					if(c == '\\' && peekchar() == ']') {
958 						(void)getchar();
959 						*gp++ = '\\';
960 						*gp++ = ']';
961 					}
962 					else if (c == '\n' || c == EOF)
963 						cerror((unsigned char *)
964 						    gettext("Missing ]"));
965 					else
966 						*gp++ = (value(vi_IGNORECASE) ? tolower(c) : c);
967 					c = getchar();
968 				} while(c != ']');
969 				*gp++ = ']';
970 				continue;
971 			}
972 			if (c == EOF) {
973 				ungetchar(EOF);
974 				*gp++ = '\\';
975 				*gp++ = '\\';
976 				continue;
977 			}
978 			if (c == '\n')
979 cerror(value(vi_TERSE) ? (unsigned char *)gettext("No newlines in re's") :
980 (unsigned char *)gettext("Can't escape newlines into regular expressions"));
981 			*gp++ = '\\';
982 			*gp++ = (value(vi_IGNORECASE) ? tolower(c) : c);
983 			continue;
984 
985 		case '\n':
986 			if (oknl) {
987 				ungetchar(c);
988 				goto out;
989 			}
990 cerror(value(vi_TERSE) ? (unsigned char *)gettext("Badly formed re") :
991 (unsigned char *)gettext("Missing closing delimiter for regular expression"));
992 
993 		case '.':
994 		case '~':
995 		case '*':
996 		case '[':
997 			if (value(vi_MAGIC))
998 				goto magic;
999 			if(c != '~')
1000 				*gp++ = '\\';
1001 defchar:
1002 		default:
1003 			*gp++ = (value(vi_IGNORECASE) ? tolower(c) : c);
1004 			continue;
1005 		}
1006 	}
1007 out:
1008 	*gp++ = '\0';
1009 
1010 #ifdef XPG4
1011 	/* see if our compiled RE's will fit in the re structure:	*/
1012 	if (regexc_size > EXPSIZ) {
1013 		/*
1014 		 * this should never happen. but it's critical that we
1015 		 * check here, otherwise .bss would get overwritten.
1016 		 */
1017 		cerror(value(vi_TERSE) ? (unsigned char *)
1018 		    gettext("RE's can't fit") :
1019 		    (unsigned char *)gettext("Regular expressions can't fit"));
1020 		return(eof);
1021 	}
1022 
1023 	/*
1024 	 * We create re each time we need it.
1025 	 */
1026 
1027 	if (re == NULL || re == scanre || re == subre) {
1028 		if ((re = calloc(1, sizeof(struct regexp))) == NULL) {
1029 			error(gettext("out of memory"));
1030 			exit(errcnt);
1031 		}
1032 	} else {
1033 		regex_comp_free(&re->Expbuf);
1034 		memset(re, 0, sizeof(struct regexp));
1035 	}
1036 
1037 	compile((char *) genbuf, (char *) re->Expbuf, (char *) re->Expbuf
1038 	    + regexc_size);
1039 #else /* !XPG4 */
1040 	(void) _compile((const char *)genbuf, (char *)re->Expbuf,
1041 		(char *)(re->Expbuf + sizeof (re->Expbuf)), 1);
1042 #endif /* XPG4 */
1043 
1044 	if(regerrno)
1045 		switch(regerrno) {
1046 
1047 		case 42:
1048 cerror((unsigned char *)gettext("\\( \\) Imbalance"));
1049 		case 43:
1050 cerror(value(vi_TERSE) ? (unsigned char *)gettext("Awash in \\('s!") :
1051 (unsigned char *)
1052 gettext("Too many \\('d subexpressions in a regular expression"));
1053 		case 50:
1054 			goto complex;
1055 		case 67:
1056 cerror(value(vi_TERSE) ? (unsigned char *)gettext("Illegal byte sequence") :
1057 (unsigned char *)gettext("Regular expression has illegal byte sequence"));
1058 		}
1059 	re->Nbra = nbra;
1060 	return(eof);
1061 }
1062 
1063 void
1064 cerror(unsigned char *s)
1065 {
1066 	if (re) {
1067 		re->Expbuf[0] = re->Expbuf[1] = 0;
1068 	}
1069 	error(s);
1070 }
1071 
1072 int
1073 execute(int gf, line *addr)
1074 {
1075 	unsigned char *p1, *p2;
1076 	char *start;
1077 	int c, i;
1078 	int ret;
1079 	int	len;
1080 
1081 	if (gf) {
1082 		if (re == NULL || re->Expbuf[0])
1083 			return (0);
1084 		if(value(vi_IGNORECASE)) {
1085 			p1 = genbuf;
1086 			p2 = (unsigned char *)loc2;
1087 			while(c = *p2) {
1088 				if ((len = mblen((char *)p2, MB_CUR_MAX)) <= 0)
1089 					len = 1;
1090 				if (len == 1) {
1091 					*p1++ = tolower(c);
1092 					p2++;
1093 					continue;
1094 				}
1095 				strncpy(p1, p2, len);
1096 				p1 += len; p2 += len;
1097 			}
1098 			*p1 = '\0';
1099 			locs = (char *)genbuf;
1100 			p1 = genbuf;
1101 			start = loc2;
1102 		} else {
1103 			p1 = (unsigned char *)loc2;
1104 			locs = loc2;
1105 		}
1106 	} else {
1107 		if (addr == zero)
1108 			return (0);
1109 		p1 = linebuf;
1110 		getline(*addr);
1111 		if(value(vi_IGNORECASE)) {
1112 			p1 = genbuf;
1113 			p2 = linebuf;
1114 			while(c = *p2) {
1115 				if ((len = mblen((char *)p2, MB_CUR_MAX)) <= 0)
1116 					len = 1;
1117 				if (len == 1) {
1118 					*p1++ = tolower(c);
1119 					p2++;
1120 					continue;
1121 				}
1122 				strncpy(p1, p2, len);
1123 				p1 += len; p2 += len;
1124 			}
1125 			*p1 = '\0';
1126 			p1 = genbuf;
1127 			start = (char *)linebuf;
1128 		}
1129 		locs = (char *)0;
1130 	}
1131 
1132 	ret = step((char *)p1, (char *)re->Expbuf);
1133 
1134 	if(value(vi_IGNORECASE) && ret) {
1135 		loc1 = start + (loc1 - (char *)genbuf);
1136 		loc2 = start + (loc2 - (char *)genbuf);
1137 		for(i = 0; i < NBRA; i++) {
1138 			braslist[i] = start + (braslist[i] - (char *)genbuf);
1139 			braelist[i] = start + (braelist[i] - (char *)genbuf);
1140 		}
1141 	}
1142 	return ret;
1143 }
1144 
1145 /*
1146  *  Initialize the compiled regular-expression storage areas (called from
1147  *  main()).
1148  */
1149 
1150 void init_re (void)
1151 {
1152 #ifdef XPG4
1153 	re = scanre = subre = NULL;
1154 #else /* !XPG4 */
1155 	if ((re = calloc(1, sizeof(struct regexp))) == NULL) {
1156 		error(gettext("out of memory"));
1157 		exit(errcnt);
1158 	}
1159 
1160 	if ((scanre = calloc(1, sizeof(struct regexp))) == NULL) {
1161 		error(gettext("out of memory"));
1162 		exit(errcnt);
1163 	}
1164 
1165 	if ((subre = calloc(1, sizeof(struct regexp))) == NULL) {
1166 		error(gettext("out of memory"));
1167 		exit(errcnt);
1168 	}
1169 #endif /* XPG4 */
1170 }
1171 
1172 /*
1173  *  Save what is in the special place re to the named alternate
1174  *  location.  This means freeing up what's currently in this target
1175  *  location, if necessary.
1176  */
1177 
1178 void savere(struct regexp ** a)
1179 {
1180 #ifdef XPG4
1181 	if (a == NULL || re == NULL) {
1182 		return;
1183 	}
1184 
1185 	if (*a == NULL) {
1186 		*a = re;
1187 		return;
1188 	}
1189 
1190 	if (*a != re) {
1191 		if (scanre != subre) {
1192 			regex_comp_free(&((*a)->Expbuf));
1193 			free(*a);
1194 		}
1195 		*a = re;
1196 	}
1197 #else /* !XPG4 */
1198 	memcpy(*a, re, sizeof(struct regexp));
1199 #endif /* XPG4 */
1200 }
1201 
1202 
1203 /*
1204  *  Restore what is in the named alternate location to the special place
1205  *  re.  This means first freeing up what's currently in re, if necessary.
1206  */
1207 
1208 void resre(struct regexp * a)
1209 {
1210 #ifdef XPG4
1211 	if (a == NULL) {
1212 		return;
1213 	}
1214 
1215 	if (re == NULL) {
1216 		re = a;
1217 		return;
1218 	}
1219 
1220 	if (a != re) {
1221 		if ((re != scanre) && (re != subre)) {
1222 			regex_comp_free(&re->Expbuf);
1223 			free(re);
1224 		}
1225 
1226 		re = a;
1227 	}
1228 #else /* !XPG4 */
1229 	memcpy(re, a, sizeof(struct regexp));
1230 #endif /* XPG4 */
1231 }
1232