xref: /titanic_50/usr/src/cmd/vi/port/ex_re.c (revision 6a634c9dca3093f3922e4b7ab826d7bdf17bf78e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved  	*/
28 
29 
30 /* Copyright (c) 1981 Regents of the University of California */
31 
32 #include "ex.h"
33 #include "ex_re.h"
34 
35 /* from libgen */
36 char *_compile(const char *, char *, char *, int);
37 
38 /*
39  * The compiled-regular-expression storage areas (re, scanre, and subre)
40  * have been changed into dynamically allocated memory areas, in both the
41  * Solaris and XPG4 versions.
42  *
43  * In the Solaris version, which uses the original libgen(3g) compile()
44  * and step() calls, these areas are allocated once, and then data are
45  * copied between them subsequently, as they were in the original
46  * implementation.  This is possible because the compiled information is
47  * a self-contained block of bits.
48  *
49  * In the XPG4 version, the expr:compile.o object is linked in as a
50  * simulation of these functions using the new regcomp() and regexec()
51  * functions.  The problem here is that the resulting
52  * compiled-regular-expression data contain pointers to other data, which
53  * need to be freed, but only when we are quite sure that we are done
54  * with them - and certainly not before.  There was an earlier attempt to
55  * handle these differences, but that effort was flawed.
56  */
57 
58 extern int	getchar();
59 #ifdef XPG4
60 void regex_comp_free(void *);
61 extern size_t regexc_size;	/* compile.c: size of regex_comp structure */
62 #endif /* XPG4 */
63 
64 /*
65  * Global, substitute and regular expressions.
66  * Very similar to ed, with some re extensions and
67  * confirmed substitute.
68  */
69 void
global(k)70 global(k)
71 	bool k;
72 {
73 	unsigned char *gp;
74 	int c;
75 	line *a1;
76 	unsigned char globuf[GBSIZE], *Cwas;
77 	int nlines = lineDOL();
78 	int oinglobal = inglobal;
79 	unsigned char *oglobp = globp;
80 	char	multi[MB_LEN_MAX + 1];
81 	wchar_t	wc;
82 	int	len;
83 
84 
85 	Cwas = Command;
86 	/*
87 	 * States of inglobal:
88 	 *  0: ordinary - not in a global command.
89 	 *  1: text coming from some buffer, not tty.
90 	 *  2: like 1, but the source of the buffer is a global command.
91 	 * Hence you're only in a global command if inglobal==2. This
92 	 * strange sounding convention is historically derived from
93 	 * everybody simulating a global command.
94 	 */
95 	if (inglobal==2)
96 		error(value(vi_TERSE) ? gettext("Global within global") :
97 gettext("Global within global not allowed"));
98 	markDOT();
99 	setall();
100 	nonzero();
101 	if (skipend())
102 		error(value(vi_TERSE) ? gettext("Global needs re") :
103 gettext("Missing regular expression for global"));
104 	c = getchar();
105 	(void)vi_compile(c, 1);
106 	savere(&scanre);
107 	gp = globuf;
108 	while ((c = peekchar()) != '\n') {
109 		if (!isascii(c)) {
110 			if (c == EOF) {
111 				c = '\n';
112 				ungetchar(c);
113 				goto out;
114 			}
115 
116 mb_copy:
117 			if ((len = _mbftowc(multi, &wc, getchar, &peekc)) > 0) {
118 				if ((gp + len) >= &globuf[GBSIZE - 2])
119 					error(gettext("Global command too long"));
120 				strncpy(gp, multi, len);
121 				gp += len;
122 				continue;
123 			}
124 		}
125 
126 		(void) getchar();
127 		switch (c) {
128 
129 		case EOF:
130 			c = '\n';
131 			ungetchar(c);
132 			goto out;
133 
134 		case '\\':
135 			c = peekchar();
136 			if (!isascii(c)) {
137 				*gp++ = '\\';
138 				goto mb_copy;
139 			}
140 
141 			(void) getchar();
142 			switch (c) {
143 
144 			case '\\':
145 				ungetchar(c);
146 				break;
147 
148 			case '\n':
149 				break;
150 
151 			default:
152 				*gp++ = '\\';
153 				break;
154 			}
155 			break;
156 		}
157 		*gp++ = c;
158 		if (gp >= &globuf[GBSIZE - 2])
159 			error(gettext("Global command too long"));
160 	}
161 
162 out:
163 	donewline();
164 	*gp++ = c;
165 	*gp++ = 0;
166 	saveall();
167 	inglobal = 2;
168 	for (a1 = one; a1 <= dol; a1++) {
169 		*a1 &= ~01;
170 		if (a1 >= addr1 && a1 <= addr2 && execute(0, a1) == k)
171 			*a1 |= 01;
172 	}
173 #ifdef notdef
174 /*
175  * This code is commented out for now.  The problem is that we don't
176  * fix up the undo area the way we should.  Basically, I think what has
177  * to be done is to copy the undo area down (since we shrunk everything)
178  * and move the various pointers into it down too.  I will do this later
179  * when I have time. (Mark, 10-20-80)
180  */
181 	/*
182 	 * Special case: g/.../d (avoid n^2 algorithm)
183 	 */
184 	if (globuf[0]=='d' && globuf[1]=='\n' && globuf[2]=='\0') {
185 		gdelete();
186 		return;
187 	}
188 #endif
189 	if (inopen)
190 		inopen = -1;
191 	/*
192 	 * Now for each marked line, set dot there and do the commands.
193 	 * Note the n^2 behavior here for lots of lines matching.
194 	 * This is really needed: in some cases you could delete lines,
195 	 * causing a marked line to be moved before a1 and missed if
196 	 * we didn't restart at zero each time.
197 	 */
198 	for (a1 = one; a1 <= dol; a1++) {
199 		if (*a1 & 01) {
200 			*a1 &= ~01;
201 			dot = a1;
202 			globp = globuf;
203 			commands(1, 1);
204 			a1 = zero;
205 		}
206 	}
207 	globp = oglobp;
208 	inglobal = oinglobal;
209 	endline = 1;
210 	Command = Cwas;
211 	netchHAD(nlines);
212 	setlastchar(EOF);
213 	if (inopen) {
214 		ungetchar(EOF);
215 		inopen = 1;
216 	}
217 }
218 
219 /*
220  * gdelete: delete inside a global command. Handles the
221  * special case g/r.e./d. All lines to be deleted have
222  * already been marked. Squeeze the remaining lines together.
223  * Note that other cases such as g/r.e./p, g/r.e./s/r.e.2/rhs/,
224  * and g/r.e./.,/r.e.2/d are not treated specially.  There is no
225  * good reason for this except the question: where to you draw the line?
226  */
227 void
gdelete(void)228 gdelete(void)
229 {
230 	line *a1, *a2, *a3;
231 
232 	a3 = dol;
233 	/* find first marked line. can skip all before it */
234 	for (a1=zero; (*a1&01)==0; a1++)
235 		if (a1>=a3)
236 			return;
237 	/* copy down unmarked lines, compacting as we go. */
238 	for (a2=a1+1; a2<=a3;) {
239 		if (*a2&01) {
240 			a2++;		/* line is marked, skip it */
241 			dot = a1;	/* dot left after line deletion */
242 		} else
243 			*a1++ = *a2++;	/* unmarked, copy it */
244 	}
245 	dol = a1-1;
246 	if (dot>dol)
247 		dot = dol;
248 	change();
249 }
250 
251 bool	cflag;
252 int	scount, slines, stotal;
253 
254 int
substitute(int c)255 substitute(int c)
256 {
257 	line *addr;
258 	int n;
259 	int gsubf, hopcount;
260 
261 	gsubf = compsub(c);
262 	if(FIXUNDO)
263 		save12(), undkind = UNDCHANGE;
264 	stotal = 0;
265 	slines = 0;
266 	for (addr = addr1; addr <= addr2; addr++) {
267 		scount = hopcount = 0;
268 		if (dosubcon(0, addr) == 0)
269 			continue;
270 		if (gsubf) {
271 			/*
272 			 * The loop can happen from s/\</&/g
273 			 * but we don't want to break other, reasonable cases.
274 			 */
275 			hopcount = 0;
276 			while (*loc2) {
277 				if (++hopcount > sizeof linebuf)
278 					error(gettext("substitution loop"));
279 				if (dosubcon(1, addr) == 0)
280 					break;
281 			}
282 		}
283 		if (scount) {
284 			stotal += scount;
285 			slines++;
286 			putmark(addr);
287 			n = append(getsub, addr);
288 			addr += n;
289 			addr2 += n;
290 		}
291 	}
292 	if (stotal == 0 && !inglobal && !cflag)
293 		error(value(vi_TERSE) ? gettext("Fail") :
294 gettext("Substitute pattern match failed"));
295 	snote(stotal, slines);
296 	return (stotal);
297 }
298 
299 int
compsub(int ch)300 compsub(int ch)
301 {
302 	int seof, c, uselastre;
303 	static int gsubf;
304 	static unsigned char remem[RHSSIZE];
305 	static int remflg = -1;
306 
307 	if (!value(vi_EDCOMPATIBLE))
308 		gsubf = cflag = 0;
309 	uselastre = 0;
310 	switch (ch) {
311 
312 	case 's':
313 		(void)skipwh();
314 		seof = getchar();
315 		if (endcmd(seof) || any(seof, "gcr")) {
316 			ungetchar(seof);
317 			goto redo;
318 		}
319 		if (isalpha(seof) || isdigit(seof))
320 			error(value(vi_TERSE) ? gettext("Substitute needs re") :
321 gettext("Missing regular expression for substitute"));
322 		seof = vi_compile(seof, 1);
323 		uselastre = 1;
324 		comprhs(seof);
325 		gsubf = cflag = 0;
326 		break;
327 
328 	case '~':
329 		uselastre = 1;
330 		/* fall into ... */
331 	case '&':
332 	redo:
333 		if (re == NULL || re->Expbuf[1] == 0)
334 			error(value(vi_TERSE) ? gettext("No previous re") :
335 gettext("No previous regular expression"));
336 		if (subre == NULL || subre->Expbuf[1] == 0)
337 			error(value(vi_TERSE) ? gettext("No previous substitute re") :
338 gettext("No previous substitute to repeat"));
339 		break;
340 	}
341 	for (;;) {
342 		c = getchar();
343 		switch (c) {
344 
345 		case 'g':
346 			gsubf = !gsubf;
347 			continue;
348 
349 		case 'c':
350 			cflag = !cflag;
351 			continue;
352 
353 		case 'r':
354 			uselastre = 1;
355 			continue;
356 
357 		default:
358 			ungetchar(c);
359 			setcount();
360 			donewline();
361 			if (uselastre)
362 				savere(&subre);
363 			else
364 				resre(subre);
365 
366 			/*
367 			 * The % by itself on the right hand side means
368 			 * that the previous value of the right hand side
369 			 * should be used. A -1 is used to indicate no
370 			 * previously remembered search string.
371 			 */
372 
373 			if (rhsbuf[0] == '%' && rhsbuf[1] == 0)
374 				if (remflg == -1)
375 					error(gettext("No previously remembered string"));
376 			        else
377 					strcpy(rhsbuf, remem);
378 			else {
379 				strcpy(remem, rhsbuf);
380 				remflg = 1;
381 			}
382 			return (gsubf);
383 		}
384 	}
385 }
386 
387 void
comprhs(int seof)388 comprhs(int seof)
389 {
390 	unsigned char *rp, *orp;
391 	int c;
392 	unsigned char orhsbuf[RHSSIZE];
393 	char	multi[MB_LEN_MAX + 1];
394 	int	len;
395 	wchar_t	wc;
396 
397 	rp = rhsbuf;
398 	CP(orhsbuf, rp);
399 	for (;;) {
400 		c = peekchar();
401 		if (c == seof) {
402 			(void) getchar();
403 			break;
404 		}
405 
406 		if (!isascii(c) && c != EOF) {
407 			if ((len = _mbftowc(multi, &wc, getchar, &peekc)) > 0) {
408 				if ((rp + len) >= &rhsbuf[RHSSIZE - 1])
409 					goto toobig;
410 				strncpy(rp, multi, len);
411 				rp += len;
412 				continue;
413 			}
414 		}
415 
416 		(void) getchar();
417 		switch (c) {
418 
419 		case '\\':
420 			c = peekchar();
421 			if (c == EOF) {
422 				(void) getchar();
423 				error(gettext("Replacement string ends with \\"));
424 			}
425 
426 			if (!isascii(c)) {
427 				*rp++ = '\\';
428 				if ((len = _mbftowc(multi, &wc, getchar, &peekc)) > 0) {
429 					if ((rp + len) >= &rhsbuf[RHSSIZE - 1])
430 						goto over_flow;
431 					strncpy(rp, multi, len);
432 					rp += len;
433 					continue;
434 				}
435 			}
436 
437 			(void) getchar();
438 			if (value(vi_MAGIC)) {
439 				/*
440 				 * When "magic", \& turns into a plain &,
441 				 * and all other chars work fine quoted.
442 				 */
443 				if (c != '&') {
444 					if(rp >= &rhsbuf[RHSSIZE - 1]) {
445 						*rp=0;
446 						error(value(vi_TERSE) ?
447 gettext("Replacement pattern too long") :
448 gettext("Replacement pattern too long - limit 256 characters"));
449 					}
450 					*rp++ = '\\';
451 				}
452 				break;
453 			}
454 magic:
455 			if (c == '~') {
456 				for (orp = orhsbuf; *orp; *rp++ = *orp++)
457 					if (rp >= &rhsbuf[RHSSIZE - 1])
458 						goto toobig;
459 				continue;
460 			}
461 			if(rp >= &rhsbuf[RHSSIZE - 1]) {
462 over_flow:
463 				*rp=0;
464 				error(value(vi_TERSE) ?
465 gettext("Replacement pattern too long") :
466 gettext("Replacement pattern too long - limit 256 characters"));
467 			}
468 			*rp++ = '\\';
469 			break;
470 
471 		case '\n':
472 		case EOF:
473 			if (!(globp && globp[0])) {
474 				ungetchar(c);
475 				goto endrhs;
476 			}
477 
478 		case '~':
479 		case '&':
480 			if (value(vi_MAGIC))
481 				goto magic;
482 			break;
483 		}
484 		if (rp >= &rhsbuf[RHSSIZE - 1]) {
485 toobig:
486 			*rp = 0;
487 			error(value(vi_TERSE) ?
488 gettext("Replacement pattern too long") :
489 gettext("Replacement pattern too long - limit 256 characters"));
490 		}
491 		*rp++ = c;
492 	}
493 endrhs:
494 	*rp++ = 0;
495 }
496 
497 int
getsub(void)498 getsub(void)
499 {
500 	unsigned char *p;
501 
502 	if ((p = linebp) == 0)
503 		return (EOF);
504 	strcLIN(p);
505 	linebp = 0;
506 	return (0);
507 }
508 
509 int
dosubcon(bool f,line * a)510 dosubcon(bool f, line *a)
511 {
512 
513 	if (execute(f, a) == 0)
514 		return (0);
515 	if (confirmed(a)) {
516 		dosub();
517 		scount++;
518 	}
519 	return (1);
520 }
521 
522 int
confirmed(line * a)523 confirmed(line *a)
524 {
525 	int c, cnt, ch;
526 
527 	if (cflag == 0)
528 		return (1);
529 	pofix();
530 	pline(lineno(a));
531 	if (inopen)
532 		putchar('\n' | QUOTE);
533 	c = lcolumn(loc1);
534 	ugo(c, ' ');
535 	ugo(lcolumn(loc2) - c, '^');
536 	flush();
537 	cnt = 0;
538 bkup:
539 	ch = c = getkey();
540 again:
541 	if (c == '\b') {
542 		if ((inopen)
543 		 && (cnt > 0)) {
544 			putchar('\b' | QUOTE);
545 			putchar(' ');
546 			putchar('\b' | QUOTE), flush();
547 			cnt --;
548 		}
549 		goto bkup;
550 	}
551 	if (c == '\r')
552 		c = '\n';
553 	if (inopen && MB_CUR_MAX == 1 || c < 0200) {
554 		putchar(c);
555 		flush();
556 		cnt++;
557 	}
558 	if (c != '\n' && c != EOF) {
559 		c = getkey();
560 		goto again;
561 	}
562 	noteinp();
563 	return (ch == 'y');
564 }
565 
566 void
ugo(int cnt,int with)567 ugo(int cnt, int with)
568 {
569 
570 	if (cnt > 0)
571 		do
572 			putchar(with);
573 		while (--cnt > 0);
574 }
575 
576 int	casecnt;
577 bool	destuc;
578 
579 void
dosub(void)580 dosub(void)
581 {
582 	unsigned char *lp, *sp, *rp;
583 	int c;
584 	int	len;
585 
586 	lp = linebuf;
587 	sp = genbuf;
588 	rp = rhsbuf;
589 	while (lp < (unsigned char *)loc1)
590 		*sp++ = *lp++;
591 	casecnt = 0;
592 	/*
593 	 * Caution: depending on the hardware, c will be either sign
594 	 * extended or not if C&QUOTE is set.  Thus, on a VAX, c will
595 	 * be < 0, but on a 3B, c will be >= 128.
596 	 */
597 	while (c = *rp) {
598 		if ((len = mblen((char *)rp, MB_CUR_MAX)) <= 0)
599 			len = 1;
600 		/* ^V <return> from vi to split lines */
601 		if (c == '\r')
602 			c = '\n';
603 
604 		if (c == '\\') {
605 			rp++;
606 			if ((len = mblen((char *)rp, MB_CUR_MAX)) <= 0)
607 				len = 1;
608 			switch (c = *rp++) {
609 
610 			case '&':
611 				sp = place(sp, loc1, loc2);
612 				if (sp == 0)
613 					goto ovflo;
614 				continue;
615 
616 			case 'l':
617 				casecnt = 1;
618 				destuc = 0;
619 				continue;
620 
621 			case 'L':
622 				casecnt = LBSIZE;
623 				destuc = 0;
624 				continue;
625 
626 			case 'u':
627 				casecnt = 1;
628 				destuc = 1;
629 				continue;
630 
631 			case 'U':
632 				casecnt = LBSIZE;
633 				destuc = 1;
634 				continue;
635 
636 			case 'E':
637 			case 'e':
638 				casecnt = 0;
639 				continue;
640 			}
641 			if(re != NULL && c >= '1' && c < re->Nbra + '1') {
642 				sp = place(sp, braslist[c - '1'] , braelist[c - '1']);
643 				if (sp == 0)
644 					goto ovflo;
645 				continue;
646 			}
647 			rp--;
648 		}
649 		if (len > 1) {
650 			if ((sp + len) >= &genbuf[LBSIZE])
651 				goto ovflo;
652 			strncpy(sp, rp, len);
653 		} else {
654 			if (casecnt)
655 				*sp = fixcase(c);
656 			else
657 				*sp = c;
658 		}
659 		sp += len; rp += len;
660 		if (sp >= &genbuf[LBSIZE])
661 ovflo:
662 			error(value(vi_TERSE) ? gettext("Line overflow") :
663 gettext("Line overflow in substitute"));
664 	}
665 	lp = (unsigned char *)loc2;
666 	loc2 = (char *)(linebuf + (sp - genbuf));
667 	while (*sp++ = *lp++)
668 		if (sp >= &genbuf[LBSIZE])
669 			goto ovflo;
670 	strcLIN(genbuf);
671 }
672 
673 int
fixcase(int c)674 fixcase(int c)
675 {
676 
677 	if (casecnt == 0)
678 		return (c);
679 	casecnt--;
680 	if (destuc) {
681 		if (islower(c))
682 			c = toupper(c);
683 	} else
684 		if (isupper(c))
685 			c = tolower(c);
686 	return (c);
687 }
688 
689 unsigned char *
place(sp,l1,l2)690 place(sp, l1, l2)
691 	unsigned char *sp, *l1, *l2;
692 {
693 
694 	while (l1 < l2) {
695 		*sp++ = fixcase(*l1++);
696 		if (sp >= &genbuf[LBSIZE])
697 			return (0);
698 	}
699 	return (sp);
700 }
701 
702 void
snote(int total,int nlines)703 snote(int total, int nlines)
704 {
705 
706 	if (!notable(total))
707 		return;
708 	if (nlines != 1 && nlines != total)
709 		viprintf(mesg(value(vi_TERSE) ?
710 			/*
711 			 * TRANSLATION_NOTE
712 			 *	Reference order of arguments must not
713 			 *	be changed using '%digit$', since vi's
714 			 *	viprintf() does not support it.
715 			 */
716 			    gettext("%d subs on %d lines") :
717 			/*
718 			 * TRANSLATION_NOTE
719 			 *	Reference order of arguments must not
720 			 *	be changed using '%digit$', since vi's
721 			 *	viprintf() does not support it.
722 			 */
723 			    gettext("%d substitutions on %d lines")),
724 		       total, nlines);
725 	else
726 		viprintf(mesg(value(vi_TERSE) ?
727 			    gettext("%d subs") :
728 			    gettext("%d substitutions")),
729 		       total);
730 	noonl();
731 	flush();
732 }
733 
734 #ifdef XPG4
735 #include <regex.h>
736 
737 extern int regcomp_flags;	/* use to specify cflags for regcomp() */
738 #endif /* XPG4 */
739 
740 int
vi_compile(int eof,int oknl)741 vi_compile(int eof, int oknl)
742 {
743 	int c;
744 	unsigned char *gp, *p1;
745 	unsigned char *rhsp;
746 	unsigned char rebuf[LBSIZE];
747 	char	multi[MB_LEN_MAX + 1];
748 	int	len;
749 	wchar_t	wc;
750 
751 #ifdef XPG4
752 	/*
753 	 * reset cflags to plain BRE
754 	 * if \< and/or \> is specified, REG_WORDS is set.
755 	 */
756 	regcomp_flags = 0;
757 #endif /* XPG4 */
758 
759 	gp = genbuf;
760 	if (isalpha(eof) || isdigit(eof))
761 error(gettext("Regular expressions cannot be delimited by letters or digits"));
762 	if(eof >= 0200 && MB_CUR_MAX > 1)
763 error(gettext("Regular expressions cannot be delimited by multibyte characters"));
764 	c = getchar();
765 	if (eof == '\\')
766 		switch (c) {
767 
768 		case '/':
769 		case '?':
770 			if (scanre == NULL || scanre->Expbuf[1] == 0)
771 error(value(vi_TERSE) ? gettext("No previous scan re") :
772 gettext("No previous scanning regular expression"));
773 			resre(scanre);
774 			return (c);
775 
776 		case '&':
777 			if (subre == NULL || subre->Expbuf[1] == 0)
778 error(value(vi_TERSE) ? gettext("No previous substitute re") :
779 gettext("No previous substitute regular expression"));
780 			resre(subre);
781 			return (c);
782 
783 		default:
784 error(value(vi_TERSE) ? gettext("Badly formed re") :
785 gettext("Regular expression \\ must be followed by / or ?"));
786 		}
787 	if (c == eof || c == '\n' || c == EOF) {
788 		if (re == NULL || re->Expbuf[1] == 0)
789 error(value(vi_TERSE) ? gettext("No previous re") :
790 gettext("No previous regular expression"));
791 		if (c == '\n' && oknl == 0)
792 error(value(vi_TERSE) ? gettext("Missing closing delimiter") :
793 gettext("Missing closing delimiter for regular expression"));
794 		if (c != eof)
795 			ungetchar(c);
796 		return (eof);
797 	}
798 	gp = genbuf;
799 	if (c == '^') {
800 		*gp++ = c;
801 		c = getchar();
802 	}
803 	ungetchar(c);
804 	for (;;) {
805 		c = getchar();
806 		if (c == eof || c == EOF) {
807 			if (c == EOF)
808 				ungetchar(c);
809 			goto out;
810 		}
811 		if (gp >= &genbuf[LBSIZE - 3])
812 complex:
813 			cerror(value(vi_TERSE) ?
814 			    (unsigned char *)gettext("Re too complex") :
815 			    (unsigned char *)
816 			    gettext("Regular expression too complicated"));
817 
818 		if (!(isascii(c) || MB_CUR_MAX == 1)) {
819 			ungetchar(c);
820 			if ((len = _mbftowc(multi, &wc, getchar, &peekc)) >= 1) {
821 				if ((gp + len) >= &genbuf[LBSIZE - 3])
822 					goto complex;
823 				strncpy(gp, multi, len);
824 				gp += len;
825 				continue;
826 			}
827 			(void) getchar();
828 		}
829 
830 		switch (c) {
831 
832 		case '\\':
833 			c = getchar();
834 			if (!isascii(c)) {
835 				ungetchar(c);
836 				if ((len = _mbftowc(multi, &wc, getchar, &peekc)) >= 1) {
837 					if ((gp + len) >= &genbuf[LBSIZE - 3])
838 						goto complex;
839 					*gp++ = '\\';
840 					strncpy(gp, multi, len);
841 					gp += len;
842 					continue;
843 				}
844 				(void) getchar();
845 			}
846 
847 			switch (c) {
848 
849 			case '<':
850 			case '>':
851 #ifdef XPG4
852 				regcomp_flags = REG_WORDS;
853 				/*FALLTHRU*/
854 #endif /* XPG4 */
855 			case '(':
856 			case ')':
857 			case '{':
858 			case '}':
859 			case '$':
860 			case '^':
861 			case '\\':
862 				*gp++ = '\\';
863 				*gp++ = c;
864 				continue;
865 
866 			case 'n':
867 				*gp++ = c;
868 				continue;
869 			}
870 			if(c >= '0' && c <= '9') {
871 				*gp++ = '\\';
872 				*gp++ = c;
873 				continue;
874 			}
875 			if (value(vi_MAGIC) == 0)
876 magic:
877 			switch (c) {
878 
879 			case '.':
880 				*gp++ = '.';
881 				continue;
882 
883 			case '~':
884 				rhsp = rhsbuf;
885 				while (*rhsp) {
886 					if (!isascii(*rhsp)) {
887 						if ((len = mbtowc((wchar_t *)0, (char *)rhsp, MB_CUR_MAX)) > 1) {
888 							if ((gp + len) >= &genbuf[LBSIZE-2])
889 								goto complex;
890 							strncpy(gp, rhsp, len);
891 							rhsp += len; gp += len;
892 							continue;
893 						}
894 					}
895 					len = 1;
896 					if (*rhsp == '\\') {
897 						c = *++rhsp;
898 						if (c == '&')
899 cerror(value(vi_TERSE) ? (unsigned char *)
900 gettext("Replacement pattern contains &") :
901 (unsigned char *)gettext("Replacement pattern contains & - cannot use in re"));
902 						if (c >= '1' && c <= '9')
903 cerror(value(vi_TERSE) ? (unsigned char *)
904 gettext("Replacement pattern contains \\d") :
905 (unsigned char *)
906 gettext("Replacement pattern contains \\d - cannot use in re"));
907 						if ((len = mbtowc((wchar_t *)0, (char *)rhsp, MB_CUR_MAX)) <= 1) {
908 							len = 1;
909 							if(any(c, ".\\*[$"))
910 								*gp++ = '\\';
911 						}
912 					}
913 
914 					if ((gp + len) >= &genbuf[LBSIZE-2])
915 						goto complex;
916 					if (len == 1) {
917 						c = *rhsp++;
918 						*gp++ = (value(vi_IGNORECASE) ? tolower(c) : c);
919 					} else {
920 						strncpy(gp, rhsp, len);
921 						gp += len; rhsp += len;
922 					}
923 				}
924 				continue;
925 
926 			case '*':
927 				*gp++ = '*';
928 				continue;
929 
930 			case '[':
931 				*gp++ = '[';
932 				c = getchar();
933 				if (c == '^') {
934 					*gp++ = '^';
935 					c = getchar();
936 				}
937 
938 				do {
939 					if (!isascii(c) && c != EOF) {
940 						ungetchar(c);
941 						if ((len = _mbftowc(multi, &wc, getchar, &peekc)) >= 1) {
942 							if ((gp + len)>= &genbuf[LBSIZE-4])
943 								goto complex;
944 							strncpy(gp, multi, len);
945 							gp += len;
946 							c = getchar();
947 							continue;
948 						}
949 						(void) getchar();
950 					}
951 
952 					if (gp >= &genbuf[LBSIZE-4])
953 						goto complex;
954 					if(c == '\\' && peekchar() == ']') {
955 						(void)getchar();
956 						*gp++ = '\\';
957 						*gp++ = ']';
958 					}
959 					else if (c == '\n' || c == EOF)
960 						cerror((unsigned char *)
961 						    gettext("Missing ]"));
962 					else
963 						*gp++ = (value(vi_IGNORECASE) ? tolower(c) : c);
964 					c = getchar();
965 				} while(c != ']');
966 				*gp++ = ']';
967 				continue;
968 			}
969 			if (c == EOF) {
970 				ungetchar(EOF);
971 				*gp++ = '\\';
972 				*gp++ = '\\';
973 				continue;
974 			}
975 			if (c == '\n')
976 cerror(value(vi_TERSE) ? (unsigned char *)gettext("No newlines in re's") :
977 (unsigned char *)gettext("Can't escape newlines into regular expressions"));
978 			*gp++ = '\\';
979 			*gp++ = (value(vi_IGNORECASE) ? tolower(c) : c);
980 			continue;
981 
982 		case '\n':
983 			if (oknl) {
984 				ungetchar(c);
985 				goto out;
986 			}
987 cerror(value(vi_TERSE) ? (unsigned char *)gettext("Badly formed re") :
988 (unsigned char *)gettext("Missing closing delimiter for regular expression"));
989 
990 		case '.':
991 		case '~':
992 		case '*':
993 		case '[':
994 			if (value(vi_MAGIC))
995 				goto magic;
996 			if(c != '~')
997 				*gp++ = '\\';
998 defchar:
999 		default:
1000 			*gp++ = (value(vi_IGNORECASE) ? tolower(c) : c);
1001 			continue;
1002 		}
1003 	}
1004 out:
1005 	*gp++ = '\0';
1006 
1007 #ifdef XPG4
1008 	/* see if our compiled RE's will fit in the re structure:	*/
1009 	if (regexc_size > EXPSIZ) {
1010 		/*
1011 		 * this should never happen. but it's critical that we
1012 		 * check here, otherwise .bss would get overwritten.
1013 		 */
1014 		cerror(value(vi_TERSE) ? (unsigned char *)
1015 		    gettext("RE's can't fit") :
1016 		    (unsigned char *)gettext("Regular expressions can't fit"));
1017 		return(eof);
1018 	}
1019 
1020 	/*
1021 	 * We create re each time we need it.
1022 	 */
1023 
1024 	if (re == NULL || re == scanre || re == subre) {
1025 		if ((re = calloc(1, sizeof(struct regexp))) == NULL) {
1026 			error(gettext("out of memory"));
1027 			exit(errcnt);
1028 		}
1029 	} else {
1030 		regex_comp_free(&re->Expbuf);
1031 		memset(re, 0, sizeof(struct regexp));
1032 	}
1033 
1034 	compile((char *) genbuf, (char *) re->Expbuf, (char *) re->Expbuf
1035 	    + regexc_size);
1036 #else /* !XPG4 */
1037 	(void) _compile((const char *)genbuf, (char *)re->Expbuf,
1038 		(char *)(re->Expbuf + sizeof (re->Expbuf)), 1);
1039 #endif /* XPG4 */
1040 
1041 	if(regerrno)
1042 		switch(regerrno) {
1043 
1044 		case 42:
1045 cerror((unsigned char *)gettext("\\( \\) Imbalance"));
1046 		case 43:
1047 cerror(value(vi_TERSE) ? (unsigned char *)gettext("Awash in \\('s!") :
1048 (unsigned char *)
1049 gettext("Too many \\('d subexpressions in a regular expression"));
1050 		case 50:
1051 			goto complex;
1052 		case 67:
1053 cerror(value(vi_TERSE) ? (unsigned char *)gettext("Illegal byte sequence") :
1054 (unsigned char *)gettext("Regular expression has illegal byte sequence"));
1055 		}
1056 	re->Nbra = nbra;
1057 	return(eof);
1058 }
1059 
1060 void
cerror(unsigned char * s)1061 cerror(unsigned char *s)
1062 {
1063 	if (re) {
1064 		re->Expbuf[0] = re->Expbuf[1] = 0;
1065 	}
1066 	error(s);
1067 }
1068 
1069 int
execute(int gf,line * addr)1070 execute(int gf, line *addr)
1071 {
1072 	unsigned char *p1, *p2;
1073 	char *start;
1074 	int c, i;
1075 	int ret;
1076 	int	len;
1077 
1078 	if (gf) {
1079 		if (re == NULL || re->Expbuf[0])
1080 			return (0);
1081 		if(value(vi_IGNORECASE)) {
1082 			p1 = genbuf;
1083 			p2 = (unsigned char *)loc2;
1084 			while(c = *p2) {
1085 				if ((len = mblen((char *)p2, MB_CUR_MAX)) <= 0)
1086 					len = 1;
1087 				if (len == 1) {
1088 					*p1++ = tolower(c);
1089 					p2++;
1090 					continue;
1091 				}
1092 				strncpy(p1, p2, len);
1093 				p1 += len; p2 += len;
1094 			}
1095 			*p1 = '\0';
1096 			locs = (char *)genbuf;
1097 			p1 = genbuf;
1098 			start = loc2;
1099 		} else {
1100 			p1 = (unsigned char *)loc2;
1101 			locs = loc2;
1102 		}
1103 	} else {
1104 		if (addr == zero)
1105 			return (0);
1106 		p1 = linebuf;
1107 		getaline(*addr);
1108 		if(value(vi_IGNORECASE)) {
1109 			p1 = genbuf;
1110 			p2 = linebuf;
1111 			while(c = *p2) {
1112 				if ((len = mblen((char *)p2, MB_CUR_MAX)) <= 0)
1113 					len = 1;
1114 				if (len == 1) {
1115 					*p1++ = tolower(c);
1116 					p2++;
1117 					continue;
1118 				}
1119 				strncpy(p1, p2, len);
1120 				p1 += len; p2 += len;
1121 			}
1122 			*p1 = '\0';
1123 			p1 = genbuf;
1124 			start = (char *)linebuf;
1125 		}
1126 		locs = (char *)0;
1127 	}
1128 
1129 	ret = step((char *)p1, (char *)re->Expbuf);
1130 
1131 	if(value(vi_IGNORECASE) && ret) {
1132 		loc1 = start + (loc1 - (char *)genbuf);
1133 		loc2 = start + (loc2 - (char *)genbuf);
1134 		for(i = 0; i < NBRA; i++) {
1135 			braslist[i] = start + (braslist[i] - (char *)genbuf);
1136 			braelist[i] = start + (braelist[i] - (char *)genbuf);
1137 		}
1138 	}
1139 	return ret;
1140 }
1141 
1142 /*
1143  *  Initialize the compiled regular-expression storage areas (called from
1144  *  main()).
1145  */
1146 
init_re(void)1147 void init_re (void)
1148 {
1149 #ifdef XPG4
1150 	re = scanre = subre = NULL;
1151 #else /* !XPG4 */
1152 	if ((re = calloc(1, sizeof(struct regexp))) == NULL) {
1153 		error(gettext("out of memory"));
1154 		exit(errcnt);
1155 	}
1156 
1157 	if ((scanre = calloc(1, sizeof(struct regexp))) == NULL) {
1158 		error(gettext("out of memory"));
1159 		exit(errcnt);
1160 	}
1161 
1162 	if ((subre = calloc(1, sizeof(struct regexp))) == NULL) {
1163 		error(gettext("out of memory"));
1164 		exit(errcnt);
1165 	}
1166 #endif /* XPG4 */
1167 }
1168 
1169 /*
1170  *  Save what is in the special place re to the named alternate
1171  *  location.  This means freeing up what's currently in this target
1172  *  location, if necessary.
1173  */
1174 
savere(struct regexp ** a)1175 void savere(struct regexp ** a)
1176 {
1177 #ifdef XPG4
1178 	if (a == NULL || re == NULL) {
1179 		return;
1180 	}
1181 
1182 	if (*a == NULL) {
1183 		*a = re;
1184 		return;
1185 	}
1186 
1187 	if (*a != re) {
1188 		if (scanre != subre) {
1189 			regex_comp_free(&((*a)->Expbuf));
1190 			free(*a);
1191 		}
1192 		*a = re;
1193 	}
1194 #else /* !XPG4 */
1195 	memcpy(*a, re, sizeof(struct regexp));
1196 #endif /* XPG4 */
1197 }
1198 
1199 
1200 /*
1201  *  Restore what is in the named alternate location to the special place
1202  *  re.  This means first freeing up what's currently in re, if necessary.
1203  */
1204 
resre(struct regexp * a)1205 void resre(struct regexp * a)
1206 {
1207 #ifdef XPG4
1208 	if (a == NULL) {
1209 		return;
1210 	}
1211 
1212 	if (re == NULL) {
1213 		re = a;
1214 		return;
1215 	}
1216 
1217 	if (a != re) {
1218 		if ((re != scanre) && (re != subre)) {
1219 			regex_comp_free(&re->Expbuf);
1220 			free(re);
1221 		}
1222 
1223 		re = a;
1224 	}
1225 #else /* !XPG4 */
1226 	memcpy(re, a, sizeof(struct regexp));
1227 #endif /* XPG4 */
1228 }
1229