xref: /freebsd/contrib/nvi/ex/ex_subst.c (revision 0fcececbac9880b092aeb56a41a16f1ec8ac1ae6)
1 /*-
2  * Copyright (c) 1992, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  * Copyright (c) 1992, 1993, 1994, 1995, 1996
5  *	Keith Bostic.  All rights reserved.
6  *
7  * See the LICENSE file for redistribution information.
8  */
9 
10 #include "config.h"
11 
12 #include <sys/types.h>
13 #include <sys/queue.h>
14 #include <sys/time.h>
15 
16 #include <bitstring.h>
17 #include <ctype.h>
18 #include <errno.h>
19 #include <limits.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <unistd.h>
24 
25 #include "../common/common.h"
26 #include "../vi/vi.h"
27 
28 #define	SUB_FIRST	0x01		/* The 'r' flag isn't reasonable. */
29 #define	SUB_MUSTSETR	0x02		/* The 'r' flag is required. */
30 
31 static int re_conv(SCR *, CHAR_T **, size_t *, int *);
32 static int re_cscope_conv(SCR *, CHAR_T **, size_t *, int *);
33 static int re_sub(SCR *,
34 		CHAR_T *, CHAR_T **, size_t *, size_t *, regmatch_t [10]);
35 static int re_tag_conv(SCR *, CHAR_T **, size_t *, int *);
36 static int s(SCR *, EXCMD *, CHAR_T *, regex_t *, u_int);
37 
38 /*
39  * ex_s --
40  *	[line [,line]] s[ubstitute] [[/;]pat[/;]/repl[/;] [cgr] [count] [#lp]]
41  *
42  *	Substitute on lines matching a pattern.
43  *
44  * PUBLIC: int ex_s(SCR *, EXCMD *);
45  */
46 int
ex_s(SCR * sp,EXCMD * cmdp)47 ex_s(SCR *sp, EXCMD *cmdp)
48 {
49 	regex_t *re;
50 	size_t blen, len;
51 	u_int flags;
52 	int delim;
53 	CHAR_T *bp, *p, *ptrn, *rep, *t;
54 
55 	/*
56 	 * Skip leading white space.
57 	 *
58 	 * !!!
59 	 * Historic vi allowed any non-alphanumeric to serve as the
60 	 * substitution command delimiter.
61 	 *
62 	 * !!!
63 	 * If the arguments are empty, it's the same as &, i.e. we
64 	 * repeat the last substitution.
65 	 */
66 	if (cmdp->argc == 0)
67 		goto subagain;
68 	for (p = cmdp->argv[0]->bp,
69 	    len = cmdp->argv[0]->len; len > 0; --len, ++p) {
70 		if (!cmdskip(*p))
71 			break;
72 	}
73 	if (len == 0)
74 subagain:	return (ex_subagain(sp, cmdp));
75 
76 	delim = *p++;
77 	if (is09azAZ(delim) || delim == '\\')
78 		return (s(sp, cmdp, p, &sp->subre_c, SUB_MUSTSETR));
79 
80 	/*
81 	 * !!!
82 	 * The full-blown substitute command reset the remembered
83 	 * state of the 'c' and 'g' suffices.
84 	 */
85 	sp->c_suffix = sp->g_suffix = 0;
86 
87 	/*
88 	 * Get the pattern string, toss escaping characters.
89 	 *
90 	 * !!!
91 	 * Historic vi accepted any of the following forms:
92 	 *
93 	 *	:s/abc/def/		change "abc" to "def"
94 	 *	:s/abc/def		change "abc" to "def"
95 	 *	:s/abc/			delete "abc"
96 	 *	:s/abc			delete "abc"
97 	 *
98 	 * QUOTING NOTE:
99 	 *
100 	 * Only toss an escaping character if it escapes a delimiter.
101 	 * This means that "s/A/\\\\f" replaces "A" with "\\f".  It
102 	 * would be nice to be more regular, i.e. for each layer of
103 	 * escaping a single escaping character is removed, but that's
104 	 * not how the historic vi worked.
105 	 */
106 	for (ptrn = t = p;;) {
107 		if (p[0] == '\0' || p[0] == delim) {
108 			if (p[0] == delim)
109 				++p;
110 			/*
111 			 * !!!
112 			 * Nul terminate the pattern string -- it's passed
113 			 * to regcomp which doesn't understand anything else.
114 			 */
115 			*t = '\0';
116 			break;
117 		}
118 		if (p[0] == '\\') {
119 			if (p[1] == delim)
120 				++p;
121 			else if (p[1] == '\\')
122 				*t++ = *p++;
123 		}
124 		*t++ = *p++;
125 	}
126 
127 	/*
128 	 * If the pattern string is empty, use the last RE (not just the
129 	 * last substitution RE).
130 	 */
131 	if (*ptrn == '\0') {
132 		if (sp->re == NULL) {
133 			ex_emsg(sp, NULL, EXM_NOPREVRE);
134 			return (1);
135 		}
136 
137 		/* Re-compile the RE if necessary. */
138 		if (!F_ISSET(sp, SC_RE_SEARCH) &&
139 		    re_compile(sp, sp->re, sp->re_len,
140 		    NULL, NULL, &sp->re_c, RE_C_SEARCH))
141 			return (1);
142 		flags = 0;
143 	} else {
144 		/*
145 		 * !!!
146 		 * Compile the RE.  Historic practice is that substitutes set
147 		 * the search direction as well as both substitute and search
148 		 * RE's.  We compile the RE twice, as we don't want to bother
149 		 * ref counting the pattern string and (opaque) structure.
150 		 */
151 		if (re_compile(sp, ptrn, t - ptrn, &sp->re,
152 		    &sp->re_len, &sp->re_c, RE_C_SEARCH))
153 			return (1);
154 		if (re_compile(sp, ptrn, t - ptrn, &sp->subre,
155 		    &sp->subre_len, &sp->subre_c, RE_C_SUBST))
156 			return (1);
157 
158 		flags = SUB_FIRST;
159 		sp->searchdir = FORWARD;
160 	}
161 	re = &sp->re_c;
162 
163 	/*
164 	 * Get the replacement string.
165 	 *
166 	 * The special character & (\& if O_MAGIC not set) matches the
167 	 * entire RE.  No handling of & is required here, it's done by
168 	 * re_sub().
169 	 *
170 	 * The special character ~ (\~ if O_MAGIC not set) inserts the
171 	 * previous replacement string into this replacement string.
172 	 * Count ~'s to figure out how much space we need.  We could
173 	 * special case nonexistent last patterns or whether or not
174 	 * O_MAGIC is set, but it's probably not worth the effort.
175 	 *
176 	 * QUOTING NOTE:
177 	 *
178 	 * Only toss an escaping character if it escapes a delimiter or
179 	 * if O_MAGIC is set and it escapes a tilde.
180 	 *
181 	 * !!!
182 	 * If the entire replacement pattern is "%", then use the last
183 	 * replacement pattern.  This semantic was added to vi in System
184 	 * V and then percolated elsewhere, presumably around the time
185 	 * that it was added to their version of ed(1).
186 	 */
187 	if (p[0] == '\0' || p[0] == delim) {
188 		if (p[0] == delim)
189 			++p;
190 		free(sp->repl);
191 		sp->repl = NULL;
192 		sp->repl_len = 0;
193 	} else if (p[0] == '%' && (p[1] == '\0' || p[1] == delim))
194 		p += p[1] == delim ? 2 : 1;
195 	else {
196 		for (rep = p, len = 0;
197 		    p[0] != '\0' && p[0] != delim; ++p, ++len)
198 			if (p[0] == '~')
199 				len += sp->repl_len;
200 		GET_SPACE_RETW(sp, bp, blen, len);
201 		for (t = bp, len = 0, p = rep;;) {
202 			if (p[0] == '\0' || p[0] == delim) {
203 				if (p[0] == delim)
204 					++p;
205 				break;
206 			}
207 			if (p[0] == '\\') {
208 				if (p[1] == delim)
209 					++p;
210 				else if (p[1] == '\\') {
211 					*t++ = *p++;
212 					++len;
213 				} else if (p[1] == '~') {
214 					++p;
215 					if (!O_ISSET(sp, O_MAGIC))
216 						goto tilde;
217 				}
218 			} else if (p[0] == '~' && O_ISSET(sp, O_MAGIC)) {
219 tilde:				++p;
220 				MEMCPY(t, sp->repl, sp->repl_len);
221 				t += sp->repl_len;
222 				len += sp->repl_len;
223 				continue;
224 			}
225 			*t++ = *p++;
226 			++len;
227 		}
228 		if ((sp->repl_len = len) != 0) {
229 			free(sp->repl);
230 			MALLOC(sp, sp->repl, len * sizeof(CHAR_T));
231 			if (sp->repl == NULL) {
232 				FREE_SPACEW(sp, bp, blen);
233 				return (1);
234 			}
235 			MEMCPY(sp->repl, bp, len);
236 		}
237 		FREE_SPACEW(sp, bp, blen);
238 	}
239 	return (s(sp, cmdp, p, re, flags));
240 }
241 
242 /*
243  * ex_subagain --
244  *	[line [,line]] & [cgr] [count] [#lp]]
245  *
246  *	Substitute using the last substitute RE and replacement pattern.
247  *
248  * PUBLIC: int ex_subagain(SCR *, EXCMD *);
249  */
250 int
ex_subagain(SCR * sp,EXCMD * cmdp)251 ex_subagain(SCR *sp, EXCMD *cmdp)
252 {
253 	if (sp->subre == NULL) {
254 		ex_emsg(sp, NULL, EXM_NOPREVRE);
255 		return (1);
256 	}
257 	if (!F_ISSET(sp, SC_RE_SUBST) &&
258 	    re_compile(sp, sp->subre, sp->subre_len,
259 	    NULL, NULL, &sp->subre_c, RE_C_SUBST))
260 		return (1);
261 	return (s(sp,
262 	    cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->subre_c, 0));
263 }
264 
265 /*
266  * ex_subtilde --
267  *	[line [,line]] ~ [cgr] [count] [#lp]]
268  *
269  *	Substitute using the last RE and last substitute replacement pattern.
270  *
271  * PUBLIC: int ex_subtilde(SCR *, EXCMD *);
272  */
273 int
ex_subtilde(SCR * sp,EXCMD * cmdp)274 ex_subtilde(SCR *sp, EXCMD *cmdp)
275 {
276 	if (sp->re == NULL) {
277 		ex_emsg(sp, NULL, EXM_NOPREVRE);
278 		return (1);
279 	}
280 	if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp, sp->re,
281 	    sp->re_len, NULL, NULL, &sp->re_c, RE_C_SEARCH))
282 		return (1);
283 	return (s(sp,
284 	    cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->re_c, 0));
285 }
286 
287 /*
288  * s --
289  * Do the substitution.  This stuff is *really* tricky.  There are lots of
290  * special cases, and general nastiness.  Don't mess with it unless you're
291  * pretty confident.
292  *
293  * The nasty part of the substitution is what happens when the replacement
294  * string contains newlines.  It's a bit tricky -- consider the information
295  * that has to be retained for "s/f\(o\)o/^M\1^M\1/".  The solution here is
296  * to build a set of newline offsets which we use to break the line up later,
297  * when the replacement is done.  Don't change it unless you're *damned*
298  * confident.
299  */
300 #define	NEEDNEWLINE(sp) do {						\
301 	if (sp->newl_len == sp->newl_cnt) {				\
302 		sp->newl_len += 25;					\
303 		REALLOC(sp, sp->newl, size_t *,				\
304 		    sp->newl_len * sizeof(size_t));			\
305 		if (sp->newl == NULL) {					\
306 			sp->newl_len = 0;				\
307 			return (1);					\
308 		}							\
309 	}								\
310 } while (0)
311 
312 #define	BUILD(sp, l, len) do {						\
313 	if (lbclen + (len) > lblen) {					\
314 		lblen = p2roundup(MAX(lbclen + (len), 256));		\
315 		REALLOC(sp, lb, CHAR_T *, lblen * sizeof(CHAR_T));	\
316 		if (lb == NULL) {					\
317 			lbclen = 0;					\
318 			return (1);					\
319 		}							\
320 	}								\
321 	MEMCPY(lb + lbclen, l, len);					\
322 	lbclen += len;							\
323 } while (0)
324 
325 #define	NEEDSP(sp, len, pnt) do {					\
326 	if (lbclen + (len) > lblen) {					\
327 		lblen = p2roundup(MAX(lbclen + (len), 256));		\
328 		REALLOC(sp, lb, CHAR_T *, lblen * sizeof(CHAR_T));	\
329 		if (lb == NULL) {					\
330 			lbclen = 0;					\
331 			return (1);					\
332 		}							\
333 		pnt = lb + lbclen;					\
334 	}								\
335 } while (0)
336 
337 static int
s(SCR * sp,EXCMD * cmdp,CHAR_T * s,regex_t * re,u_int flags)338 s(SCR *sp, EXCMD *cmdp, CHAR_T *s, regex_t *re, u_int flags)
339 {
340 	EVENT ev;
341 	MARK from, to;
342 	TEXTH tiq[] = {{ 0 }};
343 	recno_t elno, lno, slno;
344 	u_long ul;
345 	regmatch_t match[10];
346 	size_t blen, cnt, last, lbclen, lblen, len, llen;
347 	size_t offset, saved_offset, scno;
348 	int cflag, lflag, nflag, pflag, rflag;
349 	int didsub, do_eol_match, eflags, empty_ok, eval;
350 	int linechanged, matched, quit, rval;
351 	CHAR_T *bp, *lb;
352 	enum nresult nret;
353 
354 	NEEDFILE(sp, cmdp);
355 
356 	slno = sp->lno;
357 	scno = sp->cno;
358 
359 	/*
360 	 * !!!
361 	 * Historically, the 'g' and 'c' suffices were always toggled as flags,
362 	 * so ":s/A/B/" was the same as ":s/A/B/ccgg".  If O_EDCOMPATIBLE was
363 	 * not set, they were initialized to 0 for all substitute commands.  If
364 	 * O_EDCOMPATIBLE was set, they were initialized to 0 only if the user
365 	 * specified substitute/replacement patterns (see ex_s()).
366 	 */
367 	if (!O_ISSET(sp, O_EDCOMPATIBLE))
368 		sp->c_suffix = sp->g_suffix = 0;
369 
370 	/*
371 	 * Historic vi permitted the '#', 'l' and 'p' options in vi mode, but
372 	 * it only displayed the last change.  I'd disallow them, but they are
373 	 * useful in combination with the [v]global commands.  In the current
374 	 * model the problem is combining them with the 'c' flag -- the screen
375 	 * would have to flip back and forth between the confirm screen and the
376 	 * ex print screen, which would be pretty awful.  We do display all
377 	 * changes, though, for what that's worth.
378 	 *
379 	 * !!!
380 	 * Historic vi was fairly strict about the order of "options", the
381 	 * count, and "flags".  I'm somewhat fuzzy on the difference between
382 	 * options and flags, anyway, so this is a simpler approach, and we
383 	 * just take it them in whatever order the user gives them.  (The ex
384 	 * usage statement doesn't reflect this.)
385 	 */
386 	cflag = lflag = nflag = pflag = rflag = 0;
387 	if (s == NULL)
388 		goto noargs;
389 	for (lno = OOBLNO; *s != '\0'; ++s)
390 		switch (*s) {
391 		case ' ':
392 		case '\t':
393 			continue;
394 		case '+':
395 			++cmdp->flagoff;
396 			break;
397 		case '-':
398 			--cmdp->flagoff;
399 			break;
400 		case '0': case '1': case '2': case '3': case '4':
401 		case '5': case '6': case '7': case '8': case '9':
402 			if (lno != OOBLNO)
403 				goto usage;
404 			errno = 0;
405 			nret = nget_uslong(&ul, s, &s, 10);
406 			lno = ul;
407 			if (*s == '\0')		/* Loop increment correction. */
408 				--s;
409 			if (nret != NUM_OK) {
410 				if (nret == NUM_OVER)
411 					msgq(sp, M_ERR, "153|Count overflow");
412 				else if (nret == NUM_UNDER)
413 					msgq(sp, M_ERR, "154|Count underflow");
414 				else
415 					msgq(sp, M_SYSERR, NULL);
416 				return (1);
417 			}
418 			/*
419 			 * In historic vi, the count was inclusive from the
420 			 * second address.
421 			 */
422 			cmdp->addr1.lno = cmdp->addr2.lno;
423 			cmdp->addr2.lno += lno - 1;
424 			if (!db_exist(sp, cmdp->addr2.lno) &&
425 			    db_last(sp, &cmdp->addr2.lno))
426 				return (1);
427 			break;
428 		case '#':
429 			nflag = 1;
430 			break;
431 		case 'c':
432 			sp->c_suffix = !sp->c_suffix;
433 
434 			/* Ex text structure initialization. */
435 			if (F_ISSET(sp, SC_EX))
436 				TAILQ_INIT(tiq);
437 			break;
438 		case 'g':
439 			sp->g_suffix = !sp->g_suffix;
440 			break;
441 		case 'l':
442 			lflag = 1;
443 			break;
444 		case 'p':
445 			pflag = 1;
446 			break;
447 		case 'r':
448 			if (LF_ISSET(SUB_FIRST)) {
449 				msgq(sp, M_ERR,
450 		    "155|Regular expression specified; r flag meaningless");
451 				return (1);
452 			}
453 			if (!F_ISSET(sp, SC_RE_SEARCH)) {
454 				ex_emsg(sp, NULL, EXM_NOPREVRE);
455 				return (1);
456 			}
457 			rflag = 1;
458 			re = &sp->re_c;
459 			break;
460 		default:
461 			goto usage;
462 		}
463 
464 	if (*s != '\0' || (!rflag && LF_ISSET(SUB_MUSTSETR))) {
465 usage:		ex_emsg(sp, cmdp->cmd->usage, EXM_USAGE);
466 		return (1);
467 	}
468 
469 noargs:	if (F_ISSET(sp, SC_VI) && sp->c_suffix && (lflag || nflag || pflag)) {
470 		msgq(sp, M_ERR,
471 "156|The #, l and p flags may not be combined with the c flag in vi mode");
472 		return (1);
473 	}
474 
475 	/*
476 	 * bp:		if interactive, line cache
477 	 * blen:	if interactive, line cache length
478 	 * lb:		build buffer pointer.
479 	 * lbclen:	current length of built buffer.
480 	 * lblen;	length of build buffer.
481 	 */
482 	bp = lb = NULL;
483 	blen = lbclen = lblen = 0;
484 
485 	/* For each line... */
486 	lno = cmdp->addr1.lno == 0 ? 1 : cmdp->addr1.lno;
487 	for (matched = quit = 0,
488 	    elno = cmdp->addr2.lno; !quit && lno <= elno; ++lno) {
489 
490 		/* Someone's unhappy, time to stop. */
491 		if (INTERRUPTED(sp))
492 			break;
493 
494 		/* Get the line. */
495 		if (db_get(sp, lno, DBG_FATAL, &s, &llen))
496 			goto err;
497 
498 		/*
499 		 * Make a local copy if doing confirmation -- when calling
500 		 * the confirm routine we're likely to lose the cached copy.
501 		 */
502 		if (sp->c_suffix) {
503 			if (bp == NULL) {
504 				GET_SPACE_RETW(sp, bp, blen, llen);
505 			} else
506 				ADD_SPACE_RETW(sp, bp, blen, llen);
507 			MEMCPY(bp, s, llen);
508 			s = bp;
509 		}
510 
511 		/* Start searching from the beginning. */
512 		offset = 0;
513 		len = llen;
514 
515 		/* Reset the build buffer offset. */
516 		lbclen = 0;
517 
518 		/* Reset empty match flag. */
519 		empty_ok = 1;
520 
521 		/*
522 		 * We don't want to have to do a setline if the line didn't
523 		 * change -- keep track of whether or not this line changed.
524 		 * If doing confirmations, don't want to keep setting the
525 		 * line if change is refused -- keep track of substitutions.
526 		 */
527 		didsub = linechanged = 0;
528 
529 		/* New line, do an EOL match. */
530 		do_eol_match = 1;
531 
532 		/* It's not nul terminated, but we pretend it is. */
533 		eflags = REG_STARTEND;
534 
535 		/*
536 		 * The search area is from s + offset to the EOL.
537 		 *
538 		 * Generally, match[0].rm_so is the offset of the start
539 		 * of the match from the start of the search, and offset
540 		 * is the offset of the start of the last search.
541 		 */
542 nextmatch:	match[0].rm_so = 0;
543 		match[0].rm_eo = len;
544 
545 		/* Get the next match. */
546 		eval = regexec(re, s + offset, 10, match, eflags);
547 
548 		/*
549 		 * There wasn't a match or if there was an error, deal with
550 		 * it.  If there was a previous match in this line, resolve
551 		 * the changes into the database.  Otherwise, just move on.
552 		 */
553 		if (eval == REG_NOMATCH)
554 			goto endmatch;
555 		if (eval != 0) {
556 			re_error(sp, eval, re);
557 			goto err;
558 		}
559 		matched = 1;
560 
561 		/* Only the first search can match an anchored expression. */
562 		eflags |= REG_NOTBOL;
563 
564 		/*
565 		 * !!!
566 		 * It's possible to match 0-length strings -- for example, the
567 		 * command s;a*;X;, when matched against the string "aabb" will
568 		 * result in "XbXbX", i.e. the matches are "aa", the space
569 		 * between the b's and the space between the b's and the end of
570 		 * the string.  There is a similar space between the beginning
571 		 * of the string and the a's.  The rule that we use (because vi
572 		 * historically used it) is that any 0-length match, occurring
573 		 * immediately after a match, is ignored.  Otherwise, the above
574 		 * example would have resulted in "XXbXbX".  Another example is
575 		 * incorrectly using " *" to replace groups of spaces with one
576 		 * space.
577 		 *
578 		 * The way we do this is that if we just had a successful match,
579 		 * the starting offset does not skip characters, and the match
580 		 * is empty, ignore the match and move forward.  If there's no
581 		 * more characters in the string, we were attempting to match
582 		 * after the last character, so quit.
583 		 */
584 		if (!empty_ok && match[0].rm_so == 0 && match[0].rm_eo == 0) {
585 			empty_ok = 1;
586 			if (len == 0)
587 				goto endmatch;
588 			BUILD(sp, s + offset, 1);
589 			++offset;
590 			--len;
591 			goto nextmatch;
592 		}
593 
594 		/* Confirm change. */
595 		if (sp->c_suffix) {
596 			/*
597 			 * Set the cursor position for confirmation.  Note,
598 			 * if we matched on a '$', the cursor may be past
599 			 * the end of line.
600 			 */
601 			from.lno = to.lno = lno;
602 			from.cno = match[0].rm_so + offset;
603 			to.cno = match[0].rm_eo + offset;
604 			/*
605 			 * Both ex and vi have to correct for a change before
606 			 * the first character in the line.
607 			 */
608 			if (llen == 0)
609 				from.cno = to.cno = 0;
610 			if (F_ISSET(sp, SC_VI)) {
611 				/*
612 				 * Only vi has to correct for a change after
613 				 * the last character in the line.
614 				 *
615 				 * XXX
616 				 * It would be nice to change the vi code so
617 				 * that we could display a cursor past EOL.
618 				 */
619 				if (to.cno >= llen)
620 					to.cno = llen - 1;
621 				if (from.cno >= llen)
622 					from.cno = llen - 1;
623 
624 				sp->lno = from.lno;
625 				sp->cno = from.cno;
626 				if (vs_refresh(sp, 1))
627 					goto err;
628 
629 				vs_update(sp, msg_cat(sp,
630 				    "169|Confirm change? [n]", NULL), NULL);
631 
632 				if (v_event_get(sp, &ev, 0, 0))
633 					goto err;
634 				switch (ev.e_event) {
635 				case E_CHARACTER:
636 					break;
637 				case E_EOF:
638 				case E_ERR:
639 				case E_INTERRUPT:
640 					goto lquit;
641 				default:
642 					v_event_err(sp, &ev);
643 					goto lquit;
644 				}
645 			} else {
646 				const int flags =
647 				    O_ISSET(sp, O_NUMBER) ? E_C_HASH : 0;
648 				if (ex_print(sp, cmdp, &from, &to, flags) ||
649 				    ex_scprint(sp, &from, &to))
650 					goto lquit;
651 				if (ex_txt(sp, tiq, 0, TXT_CR))
652 					goto err;
653 				ev.e_c = TAILQ_FIRST(tiq)->lb[0];
654 			}
655 
656 			switch (ev.e_c) {
657 			case CH_YES:
658 				break;
659 			default:
660 			case CH_NO:
661 				didsub = 0;
662 				BUILD(sp, s +offset, match[0].rm_eo);
663 				goto skip;
664 			case CH_QUIT:
665 				/* Set the quit/interrupted flags. */
666 lquit:				quit = 1;
667 				F_SET(sp->gp, G_INTERRUPTED);
668 
669 				/*
670 				 * Resolve any changes, then return to (and
671 				 * exit from) the main loop.
672 				 */
673 				goto endmatch;
674 			}
675 		}
676 
677 		/*
678 		 * Set the cursor to the last position changed, converting
679 		 * from 1-based to 0-based.
680 		 */
681 		sp->lno = lno;
682 		sp->cno = match[0].rm_so;
683 
684 		/* Copy the bytes before the match into the build buffer. */
685 		BUILD(sp, s + offset, match[0].rm_so);
686 
687 		/* Substitute the matching bytes. */
688 		didsub = 1;
689 		if (re_sub(sp, s + offset, &lb, &lbclen, &lblen, match))
690 			goto err;
691 
692 		/* Set the change flag so we know this line was modified. */
693 		linechanged = 1;
694 
695 		/* Move past the matched bytes. */
696 skip:		offset += match[0].rm_eo;
697 		len -= match[0].rm_eo;
698 
699 		/* A match cannot be followed by an empty pattern. */
700 		empty_ok = 0;
701 
702 		/*
703 		 * If doing a global change with confirmation, we have to
704 		 * update the screen.  The basic idea is to store the line
705 		 * so the screen update routines can find it, and restart.
706 		 */
707 		if (didsub && sp->c_suffix && sp->g_suffix) {
708 			/*
709 			 * The new search offset will be the end of the
710 			 * modified line.
711 			 */
712 			saved_offset = lbclen;
713 
714 			/* Copy the rest of the line. */
715 			if (len)
716 				BUILD(sp, s + offset, len);
717 
718 			/* Set the new offset. */
719 			offset = saved_offset;
720 
721 			/* Store inserted lines, adjusting the build buffer. */
722 			last = 0;
723 			if (sp->newl_cnt) {
724 				for (cnt = 0;
725 				    cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
726 					if (db_insert(sp, lno,
727 					    lb + last, sp->newl[cnt] - last))
728 						goto err;
729 					last = sp->newl[cnt] + 1;
730 					++sp->rptlines[L_ADDED];
731 				}
732 				lbclen -= last;
733 				offset -= last;
734 				sp->newl_cnt = 0;
735 			}
736 
737 			/* Store and retrieve the line. */
738 			if (db_set(sp, lno, lb + last, lbclen))
739 				goto err;
740 			if (db_get(sp, lno, DBG_FATAL, &s, &llen))
741 				goto err;
742 			ADD_SPACE_RETW(sp, bp, blen, llen);
743 			MEMCPY(bp, s, llen);
744 			s = bp;
745 			len = llen - offset;
746 
747 			/* Restart the build. */
748 			lbclen = 0;
749 			BUILD(sp, s, offset);
750 
751 			/*
752 			 * If we haven't already done the after-the-string
753 			 * match, do one.  Set REG_NOTEOL so the '$' pattern
754 			 * only matches once.
755 			 */
756 			if (!do_eol_match)
757 				goto endmatch;
758 			if (offset == len) {
759 				do_eol_match = 0;
760 				eflags |= REG_NOTEOL;
761 			}
762 			goto nextmatch;
763 		}
764 
765 		/*
766 		 * If it's a global:
767 		 *
768 		 * If at the end of the string, do a test for the after
769 		 * the string match.  Set REG_NOTEOL so the '$' pattern
770 		 * only matches once.
771 		 */
772 		if (sp->g_suffix && do_eol_match) {
773 			if (len == 0) {
774 				do_eol_match = 0;
775 				eflags |= REG_NOTEOL;
776 			}
777 			goto nextmatch;
778 		}
779 
780 endmatch:	if (!linechanged)
781 			continue;
782 
783 		/* Copy any remaining bytes into the build buffer. */
784 		if (len)
785 			BUILD(sp, s + offset, len);
786 
787 		/* Store inserted lines, adjusting the build buffer. */
788 		last = 0;
789 		if (sp->newl_cnt) {
790 			for (cnt = 0;
791 			    cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
792 				if (db_insert(sp,
793 				    lno, lb + last, sp->newl[cnt] - last))
794 					goto err;
795 				last = sp->newl[cnt] + 1;
796 				++sp->rptlines[L_ADDED];
797 			}
798 			lbclen -= last;
799 			sp->newl_cnt = 0;
800 		}
801 
802 		/* Store the changed line. */
803 		if (db_set(sp, lno, lb + last, lbclen))
804 			goto err;
805 
806 		/* Update changed line counter. */
807 		if (sp->rptlchange != lno) {
808 			sp->rptlchange = lno;
809 			++sp->rptlines[L_CHANGED];
810 		}
811 
812 		/*
813 		 * !!!
814 		 * Display as necessary.  Historic practice is to only
815 		 * display the last line of a line split into multiple
816 		 * lines.
817 		 */
818 		if (lflag || nflag || pflag) {
819 			from.lno = to.lno = lno;
820 			from.cno = to.cno = 0;
821 			if (lflag)
822 				(void)ex_print(sp, cmdp, &from, &to, E_C_LIST);
823 			if (nflag)
824 				(void)ex_print(sp, cmdp, &from, &to, E_C_HASH);
825 			if (pflag)
826 				(void)ex_print(sp, cmdp, &from, &to, E_C_PRINT);
827 		}
828 	}
829 
830 	/*
831 	 * !!!
832 	 * Historically, vi attempted to leave the cursor at the same place if
833 	 * the substitution was done at the current cursor position.  Otherwise
834 	 * it moved it to the first non-blank of the last line changed.  There
835 	 * were some problems: for example, :s/$/foo/ with the cursor on the
836 	 * last character of the line left the cursor on the last character, or
837 	 * the & command with multiple occurrences of the matching string in the
838 	 * line usually left the cursor in a fairly random position.
839 	 *
840 	 * We try to do the same thing, with the exception that if the user is
841 	 * doing substitution with confirmation, we move to the last line about
842 	 * which the user was consulted, as opposed to the last line that they
843 	 * actually changed.  This prevents a screen flash if the user doesn't
844 	 * change many of the possible lines.
845 	 */
846 	if (!sp->c_suffix && (sp->lno != slno || sp->cno != scno)) {
847 		sp->cno = 0;
848 		(void)nonblank(sp, sp->lno, &sp->cno);
849 	}
850 
851 	/*
852 	 * If not in a global command, and nothing matched, say so.
853 	 * Else, if none of the lines displayed, put something up.
854 	 */
855 	rval = 0;
856 	if (!matched) {
857 		if (!F_ISSET(sp, SC_EX_GLOBAL)) {
858 			msgq(sp, M_ERR, "157|No match found");
859 			goto err;
860 		}
861 	} else if (!lflag && !nflag && !pflag)
862 		F_SET(cmdp, E_AUTOPRINT);
863 
864 	if (0) {
865 err:		rval = 1;
866 	}
867 
868 	if (bp != NULL)
869 		FREE_SPACEW(sp, bp, blen);
870 	free(lb);
871 	return (rval);
872 }
873 
874 /*
875  * re_compile --
876  *	Compile the RE.
877  *
878  * PUBLIC: int re_compile(SCR *,
879  * PUBLIC:     CHAR_T *, size_t, CHAR_T **, size_t *, regex_t *, u_int);
880  */
881 int
re_compile(SCR * sp,CHAR_T * ptrn,size_t plen,CHAR_T ** ptrnp,size_t * lenp,regex_t * rep,u_int flags)882 re_compile(SCR *sp, CHAR_T *ptrn, size_t plen, CHAR_T **ptrnp, size_t *lenp, regex_t *rep, u_int flags)
883 {
884 	size_t len;
885 	int reflags, replaced, rval;
886 	CHAR_T *p;
887 
888 	/* Set RE flags. */
889 	reflags = 0;
890 	if (!LF_ISSET(RE_C_CSCOPE | RE_C_TAG)) {
891 		if (O_ISSET(sp, O_EXTENDED))
892 			reflags |= REG_EXTENDED;
893 		if (O_ISSET(sp, O_IGNORECASE))
894 			reflags |= REG_ICASE;
895 		if (O_ISSET(sp, O_ICLOWER)) {
896 			for (p = ptrn, len = plen; len > 0; ++p, --len)
897 				if (ISUPPER(*p))
898 					break;
899 			if (len == 0)
900 				reflags |= REG_ICASE;
901 		}
902 	}
903 
904 	/* If we're replacing a saved value, clear the old one. */
905 	if (LF_ISSET(RE_C_SEARCH) && F_ISSET(sp, SC_RE_SEARCH)) {
906 		regfree(&sp->re_c);
907 		F_CLR(sp, SC_RE_SEARCH);
908 	}
909 	if (LF_ISSET(RE_C_SUBST) && F_ISSET(sp, SC_RE_SUBST)) {
910 		regfree(&sp->subre_c);
911 		F_CLR(sp, SC_RE_SUBST);
912 	}
913 
914 	/*
915 	 * If we're saving the string, it's a pattern we haven't seen before,
916 	 * so convert the vi-style RE's to POSIX 1003.2 RE's.  Save a copy for
917 	 * later recompilation.   Free any previously saved value.
918 	 */
919 	if (ptrnp != NULL) {
920 		replaced = 0;
921 		if (LF_ISSET(RE_C_CSCOPE)) {
922 			if (re_cscope_conv(sp, &ptrn, &plen, &replaced))
923 				return (1);
924 			/*
925 			 * XXX
926 			 * Currently, the match-any-<blank> expression used in
927 			 * re_cscope_conv() requires extended RE's.  This may
928 			 * not be right or safe.
929 			 */
930 			reflags |= REG_EXTENDED;
931 		} else if (LF_ISSET(RE_C_TAG)) {
932 			if (re_tag_conv(sp, &ptrn, &plen, &replaced))
933 				return (1);
934 		} else
935 			if (re_conv(sp, &ptrn, &plen, &replaced))
936 				return (1);
937 
938 		/* Discard previous pattern. */
939 		free(*ptrnp);
940 		*ptrnp = NULL;
941 
942 		if (lenp != NULL)
943 			*lenp = plen;
944 
945 		/*
946 		 * Copy the string into allocated memory.
947 		 *
948 		 * XXX
949 		 * Regcomp isn't 8-bit clean, so the pattern is nul-terminated
950 		 * for now.  There's just no other solution.
951 		 */
952 		MALLOC(sp, *ptrnp, (plen + 1) * sizeof(CHAR_T));
953 		if (*ptrnp != NULL) {
954 			MEMCPY(*ptrnp, ptrn, plen);
955 			(*ptrnp)[plen] = '\0';
956 		}
957 
958 		/* Free up conversion-routine-allocated memory. */
959 		if (replaced)
960 			FREE_SPACEW(sp, ptrn, 0);
961 
962 		if (*ptrnp == NULL)
963 			return (1);
964 
965 		ptrn = *ptrnp;
966 	}
967 
968 	/*
969 	 * XXX
970 	 * Regcomp isn't 8-bit clean, so we just lost if the pattern
971 	 * contained a nul.  Bummer!
972 	 */
973 	if ((rval = regcomp(rep, ptrn, /* plen, */ reflags)) != 0) {
974 		if (!LF_ISSET(RE_C_SILENT))
975 			re_error(sp, rval, rep);
976 		return (1);
977 	}
978 
979 	if (LF_ISSET(RE_C_SEARCH))
980 		F_SET(sp, SC_RE_SEARCH);
981 	if (LF_ISSET(RE_C_SUBST))
982 		F_SET(sp, SC_RE_SUBST);
983 
984 	return (0);
985 }
986 
987 /*
988  * re_conv --
989  *	Convert vi's regular expressions into something that the
990  *	the POSIX 1003.2 RE functions can handle.
991  *
992  * There are three conversions we make to make vi's RE's (specifically
993  * the global, search, and substitute patterns) work with POSIX RE's.
994  *
995  * 1: If O_MAGIC is not set, strip backslashes from the magic character
996  *    set (.[*~) that have them, and add them to the ones that don't.
997  * 2: If O_MAGIC is not set, the string "\~" is replaced with the text
998  *    from the last substitute command's replacement string.  If O_MAGIC
999  *    is set, it's the string "~".
1000  * 3: The pattern \<ptrn\> does "word" searches, convert it to use the
1001  *    new RE escapes.
1002  *
1003  * !!!/XXX
1004  * This doesn't exactly match the historic behavior of vi because we do
1005  * the ~ substitution before calling the RE engine, so magic characters
1006  * in the replacement string will be expanded by the RE engine, and they
1007  * weren't historically.  It's a bug.
1008  */
1009 static int
re_conv(SCR * sp,CHAR_T ** ptrnp,size_t * plenp,int * replacedp)1010 re_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp)
1011 {
1012 	size_t blen, len, needlen;
1013 	int magic;
1014 	CHAR_T *bp, *p, *t;
1015 
1016 	/*
1017 	 * First pass through, we figure out how much space we'll need.
1018 	 * We do it in two passes, on the grounds that most of the time
1019 	 * the user is doing a search and won't have magic characters.
1020 	 * That way we can skip most of the memory allocation and copies.
1021 	 */
1022 	magic = 0;
1023 	for (p = *ptrnp, len = *plenp, needlen = 0; len > 0; ++p, --len)
1024 		switch (*p) {
1025 		case '\\':
1026 			if (len > 1) {
1027 				--len;
1028 				switch (*++p) {
1029 				case '<':
1030 					magic = 1;
1031 					needlen += RE_WSTART_LEN + 1;
1032 					break;
1033 				case '>':
1034 					magic = 1;
1035 					needlen += RE_WSTOP_LEN + 1;
1036 					break;
1037 				case '~':
1038 					if (!O_ISSET(sp, O_MAGIC)) {
1039 						magic = 1;
1040 						needlen += sp->repl_len;
1041 					}
1042 					break;
1043 				case '.':
1044 				case '[':
1045 				case '*':
1046 					if (!O_ISSET(sp, O_MAGIC)) {
1047 						magic = 1;
1048 						needlen += 1;
1049 					}
1050 					break;
1051 				default:
1052 					needlen += 2;
1053 				}
1054 			} else
1055 				needlen += 1;
1056 			break;
1057 		case '~':
1058 			if (O_ISSET(sp, O_MAGIC)) {
1059 				magic = 1;
1060 				needlen += sp->repl_len;
1061 			}
1062 			break;
1063 		case '.':
1064 		case '[':
1065 		case '*':
1066 			if (!O_ISSET(sp, O_MAGIC)) {
1067 				magic = 1;
1068 				needlen += 2;
1069 			}
1070 			break;
1071 		default:
1072 			needlen += 1;
1073 			break;
1074 		}
1075 
1076 	if (!magic) {
1077 		*replacedp = 0;
1078 		return (0);
1079 	}
1080 
1081 	/* Get enough memory to hold the final pattern. */
1082 	*replacedp = 1;
1083 	GET_SPACE_RETW(sp, bp, blen, needlen);
1084 
1085 	for (p = *ptrnp, len = *plenp, t = bp; len > 0; ++p, --len)
1086 		switch (*p) {
1087 		case '\\':
1088 			if (len > 1) {
1089 				--len;
1090 				switch (*++p) {
1091 				case '<':
1092 					MEMCPY(t,
1093 					    RE_WSTART, RE_WSTART_LEN);
1094 					t += RE_WSTART_LEN;
1095 					break;
1096 				case '>':
1097 					MEMCPY(t,
1098 					    RE_WSTOP, RE_WSTOP_LEN);
1099 					t += RE_WSTOP_LEN;
1100 					break;
1101 				case '~':
1102 					if (O_ISSET(sp, O_MAGIC))
1103 						*t++ = '~';
1104 					else {
1105 						MEMCPY(t,
1106 						    sp->repl, sp->repl_len);
1107 						t += sp->repl_len;
1108 					}
1109 					break;
1110 				case '.':
1111 				case '[':
1112 				case '*':
1113 					if (O_ISSET(sp, O_MAGIC))
1114 						*t++ = '\\';
1115 					*t++ = *p;
1116 					break;
1117 				default:
1118 					*t++ = '\\';
1119 					*t++ = *p;
1120 				}
1121 			} else
1122 				*t++ = '\\';
1123 			break;
1124 		case '~':
1125 			if (O_ISSET(sp, O_MAGIC)) {
1126 				MEMCPY(t, sp->repl, sp->repl_len);
1127 				t += sp->repl_len;
1128 			} else
1129 				*t++ = '~';
1130 			break;
1131 		case '.':
1132 		case '[':
1133 		case '*':
1134 			if (!O_ISSET(sp, O_MAGIC))
1135 				*t++ = '\\';
1136 			*t++ = *p;
1137 			break;
1138 		default:
1139 			*t++ = *p;
1140 			break;
1141 		}
1142 
1143 	*ptrnp = bp;
1144 	*plenp = t - bp;
1145 	return (0);
1146 }
1147 
1148 /*
1149  * re_tag_conv --
1150  *	Convert a tags search path into something that the POSIX
1151  *	1003.2 RE functions can handle.
1152  */
1153 static int
re_tag_conv(SCR * sp,CHAR_T ** ptrnp,size_t * plenp,int * replacedp)1154 re_tag_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp)
1155 {
1156 	size_t blen, len;
1157 	int lastdollar;
1158 	CHAR_T *bp, *p, *t;
1159 
1160 	len = *plenp;
1161 
1162 	/* Max memory usage is 2 times the length of the string. */
1163 	*replacedp = 1;
1164 	GET_SPACE_RETW(sp, bp, blen, len * 2);
1165 
1166 	p = *ptrnp;
1167 	t = bp;
1168 
1169 	/* If the last character is a '/' or '?', we just strip it. */
1170 	if (len > 0 && (p[len - 1] == '/' || p[len - 1] == '?'))
1171 		--len;
1172 
1173 	/* If the next-to-last or last character is a '$', it's magic. */
1174 	if (len > 0 && p[len - 1] == '$') {
1175 		--len;
1176 		lastdollar = 1;
1177 	} else
1178 		lastdollar = 0;
1179 
1180 	/* If the first character is a '/' or '?', we just strip it. */
1181 	if (len > 0 && (p[0] == '/' || p[0] == '?')) {
1182 		++p;
1183 		--len;
1184 	}
1185 
1186 	/* If the first or second character is a '^', it's magic. */
1187 	if (p[0] == '^') {
1188 		*t++ = *p++;
1189 		--len;
1190 	}
1191 
1192 	/*
1193 	 * Escape every other magic character we can find, meanwhile stripping
1194 	 * the backslashes ctags inserts when escaping the search delimiter
1195 	 * characters.
1196 	 */
1197 	for (; len > 0; --len) {
1198 		if (p[0] == '\\' && (p[1] == '/' || p[1] == '?')) {
1199 			++p;
1200 			if (len > 1)
1201 				--len;
1202 		} else if (STRCHR(L("^.[]$*"), p[0]))
1203 			*t++ = '\\';
1204 		*t++ = *p++;
1205 	}
1206 	if (lastdollar)
1207 		*t++ = '$';
1208 
1209 	*ptrnp = bp;
1210 	*plenp = t - bp;
1211 	return (0);
1212 }
1213 
1214 /*
1215  * re_cscope_conv --
1216  *	 Convert a cscope search path into something that the POSIX
1217  *      1003.2 RE functions can handle.
1218  */
1219 static int
re_cscope_conv(SCR * sp,CHAR_T ** ptrnp,size_t * plenp,int * replacedp)1220 re_cscope_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp)
1221 {
1222 	size_t blen, len, nspaces;
1223 	CHAR_T *bp, *t;
1224 	CHAR_T *p;
1225 	CHAR_T *wp;
1226 	size_t wlen;
1227 
1228 	/*
1229 	 * Each space in the source line printed by cscope represents an
1230 	 * arbitrary sequence of spaces, tabs, and comments.
1231 	 */
1232 #define	CSCOPE_RE_SPACE		"([ \t]|/\\*([^*]|\\*/)*\\*/)*"
1233 #define CSCOPE_LEN	sizeof(CSCOPE_RE_SPACE) - 1
1234 	CHAR2INT(sp, CSCOPE_RE_SPACE, CSCOPE_LEN, wp, wlen);
1235 	for (nspaces = 0, p = *ptrnp, len = *plenp; len > 0; ++p, --len)
1236 		if (*p == ' ')
1237 			++nspaces;
1238 
1239 	/*
1240 	 * Allocate plenty of space:
1241 	 *	the string, plus potential escaping characters;
1242 	 *	nspaces + 2 copies of CSCOPE_RE_SPACE;
1243 	 *	^, $, nul terminator characters.
1244 	 */
1245 	*replacedp = 1;
1246 	len = (p - *ptrnp) * 2 + (nspaces + 2) * sizeof(CSCOPE_RE_SPACE) + 3;
1247 	GET_SPACE_RETW(sp, bp, blen, len);
1248 
1249 	p = *ptrnp;
1250 	t = bp;
1251 
1252 	*t++ = '^';
1253 	MEMCPY(t, wp, wlen);
1254 	t += wlen;
1255 
1256 	for (len = *plenp; len > 0; ++p, --len)
1257 		if (*p == ' ') {
1258 			MEMCPY(t, wp, wlen);
1259 			t += wlen;
1260 		} else {
1261 			if (STRCHR(L("\\^.[]$*+?()|{}"), *p))
1262 				*t++ = '\\';
1263 			*t++ = *p;
1264 		}
1265 
1266 	MEMCPY(t, wp, wlen);
1267 	t += wlen;
1268 	*t++ = '$';
1269 
1270 	*ptrnp = bp;
1271 	*plenp = t - bp;
1272 	return (0);
1273 }
1274 
1275 /*
1276  * re_error --
1277  *	Report a regular expression error.
1278  *
1279  * PUBLIC: void re_error(SCR *, int, regex_t *);
1280  */
1281 void
re_error(SCR * sp,int errcode,regex_t * preg)1282 re_error(SCR *sp, int errcode, regex_t *preg)
1283 {
1284 	size_t s;
1285 	char *oe;
1286 
1287 	s = regerror(errcode, preg, "", 0);
1288 	MALLOC(sp, oe, s);
1289 	if (oe != NULL) {
1290 		(void)regerror(errcode, preg, oe, s);
1291 		msgq(sp, M_ERR, "RE error: %s", oe);
1292 		free(oe);
1293 	}
1294 }
1295 
1296 /*
1297  * re_sub --
1298  * 	Do the substitution for a regular expression.
1299  */
1300 static int
re_sub(SCR * sp,CHAR_T * ip,CHAR_T ** lbp,size_t * lbclenp,size_t * lblenp,regmatch_t match[10])1301 re_sub(
1302 	SCR *sp,
1303 	CHAR_T *ip,			/* Input line. */
1304 	CHAR_T **lbp,
1305 	size_t *lbclenp,
1306 	size_t *lblenp,
1307 	regmatch_t match[10])
1308 {
1309 	enum { C_NOTSET, C_LOWER, C_ONELOWER, C_ONEUPPER, C_UPPER } conv;
1310 	size_t lbclen, lblen;		/* Local copies. */
1311 	size_t mlen;			/* Match length. */
1312 	size_t rpl;			/* Remaining replacement length. */
1313 	CHAR_T *rp;			/* Replacement pointer. */
1314 	int ch;
1315 	int no;				/* Match replacement offset. */
1316 	CHAR_T *p, *t;			/* Buffer pointers. */
1317 	CHAR_T *lb;			/* Local copies. */
1318 
1319 	lb = *lbp;			/* Get local copies. */
1320 	lbclen = *lbclenp;
1321 	lblen = *lblenp;
1322 
1323 	/*
1324 	 * QUOTING NOTE:
1325 	 *
1326 	 * There are some special sequences that vi provides in the
1327 	 * replacement patterns.
1328 	 *	 & string the RE matched (\& if nomagic set)
1329 	 *	\# n-th regular subexpression
1330 	 *	\E end \U, \L conversion
1331 	 *	\e end \U, \L conversion
1332 	 *	\l convert the next character to lower-case
1333 	 *	\L convert to lower-case, until \E, \e, or end of replacement
1334 	 *	\u convert the next character to upper-case
1335 	 *	\U convert to upper-case, until \E, \e, or end of replacement
1336 	 *
1337 	 * Otherwise, since this is the lowest level of replacement, discard
1338 	 * all escaping characters.  This (hopefully) matches historic practice.
1339 	 */
1340 #define	OUTCH(ch, nltrans) do {						\
1341 	ARG_CHAR_T __ch = (ch);						\
1342 	e_key_t __value = KEY_VAL(sp, __ch);				\
1343 	if (nltrans && (__value == K_CR || __value == K_NL)) {		\
1344 		NEEDNEWLINE(sp);					\
1345 		sp->newl[sp->newl_cnt++] = lbclen;			\
1346 	} else if (conv != C_NOTSET) {					\
1347 		switch (conv) {						\
1348 		case C_ONELOWER:					\
1349 			conv = C_NOTSET;				\
1350 			/* FALLTHROUGH */				\
1351 		case C_LOWER:						\
1352 			if (ISUPPER(__ch))				\
1353 				__ch = TOLOWER(__ch);			\
1354 			break;						\
1355 		case C_ONEUPPER:					\
1356 			conv = C_NOTSET;				\
1357 			/* FALLTHROUGH */				\
1358 		case C_UPPER:						\
1359 			if (ISLOWER(__ch))				\
1360 				__ch = TOUPPER(__ch);			\
1361 			break;						\
1362 		default:						\
1363 			abort();					\
1364 		}							\
1365 	}								\
1366 	NEEDSP(sp, 1, p);						\
1367 	*p++ = __ch;							\
1368 	++lbclen;							\
1369 } while (0)
1370 	conv = C_NOTSET;
1371 	for (rp = sp->repl, rpl = sp->repl_len, p = lb + lbclen; rpl--;) {
1372 		switch (ch = *rp++) {
1373 		case '&':
1374 			if (O_ISSET(sp, O_MAGIC)) {
1375 				no = 0;
1376 				goto subzero;
1377 			}
1378 			break;
1379 		case '\\':
1380 			if (rpl == 0)
1381 				break;
1382 			--rpl;
1383 			switch (ch = *rp) {
1384 			case '&':
1385 				++rp;
1386 				if (!O_ISSET(sp, O_MAGIC)) {
1387 					no = 0;
1388 					goto subzero;
1389 				}
1390 				break;
1391 			case '0': case '1': case '2': case '3': case '4':
1392 			case '5': case '6': case '7': case '8': case '9':
1393 				no = *rp++ - '0';
1394 subzero:			if (match[no].rm_so == -1 ||
1395 				    match[no].rm_eo == -1)
1396 					break;
1397 				mlen = match[no].rm_eo - match[no].rm_so;
1398 				for (t = ip + match[no].rm_so; mlen--; ++t)
1399 					OUTCH(*t, 0);
1400 				continue;
1401 			case 'e':
1402 			case 'E':
1403 				++rp;
1404 				conv = C_NOTSET;
1405 				continue;
1406 			case 'l':
1407 				++rp;
1408 				conv = C_ONELOWER;
1409 				continue;
1410 			case 'L':
1411 				++rp;
1412 				conv = C_LOWER;
1413 				continue;
1414 			case 'u':
1415 				++rp;
1416 				conv = C_ONEUPPER;
1417 				continue;
1418 			case 'U':
1419 				++rp;
1420 				conv = C_UPPER;
1421 				continue;
1422 			case '\r':
1423 				OUTCH(ch, 0);
1424 				continue;
1425 			default:
1426 				++rp;
1427 				break;
1428 			}
1429 		}
1430 		OUTCH(ch, 1);
1431 	}
1432 
1433 	*lbp = lb;			/* Update caller's information. */
1434 	*lbclenp = lbclen;
1435 	*lblenp = lblen;
1436 	return (0);
1437 }
1438