xref: /freebsd/contrib/nvi/ex/ex_subst.c (revision d93a896ef95946b0bf1219866fcb324b78543444)
1 /*-
2  * Copyright (c) 1992, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  * Copyright (c) 1992, 1993, 1994, 1995, 1996
5  *	Keith Bostic.  All rights reserved.
6  *
7  * See the LICENSE file for redistribution information.
8  */
9 
10 #include "config.h"
11 
12 #ifndef lint
13 static const char sccsid[] = "$Id: ex_subst.c,v 10.53 2011/12/21 20:40:35 zy Exp $";
14 #endif /* not lint */
15 
16 #include <sys/types.h>
17 #include <sys/queue.h>
18 #include <sys/time.h>
19 
20 #include <bitstring.h>
21 #include <ctype.h>
22 #include <errno.h>
23 #include <limits.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <unistd.h>
28 
29 #include "../common/common.h"
30 #include "../vi/vi.h"
31 
32 #define	SUB_FIRST	0x01		/* The 'r' flag isn't reasonable. */
33 #define	SUB_MUSTSETR	0x02		/* The 'r' flag is required. */
34 
35 static int re_conv(SCR *, CHAR_T **, size_t *, int *);
36 static int re_cscope_conv(SCR *, CHAR_T **, size_t *, int *);
37 static int re_sub(SCR *,
38 		CHAR_T *, CHAR_T **, size_t *, size_t *, regmatch_t [10]);
39 static int re_tag_conv(SCR *, CHAR_T **, size_t *, int *);
40 static int s(SCR *, EXCMD *, CHAR_T *, regex_t *, u_int);
41 
42 /*
43  * ex_s --
44  *	[line [,line]] s[ubstitute] [[/;]pat[/;]/repl[/;] [cgr] [count] [#lp]]
45  *
46  *	Substitute on lines matching a pattern.
47  *
48  * PUBLIC: int ex_s(SCR *, EXCMD *);
49  */
50 int
51 ex_s(SCR *sp, EXCMD *cmdp)
52 {
53 	regex_t *re;
54 	size_t blen, len;
55 	u_int flags;
56 	int delim;
57 	CHAR_T *bp, *p, *ptrn, *rep, *t;
58 
59 	/*
60 	 * Skip leading white space.
61 	 *
62 	 * !!!
63 	 * Historic vi allowed any non-alphanumeric to serve as the
64 	 * substitution command delimiter.
65 	 *
66 	 * !!!
67 	 * If the arguments are empty, it's the same as &, i.e. we
68 	 * repeat the last substitution.
69 	 */
70 	if (cmdp->argc == 0)
71 		goto subagain;
72 	for (p = cmdp->argv[0]->bp,
73 	    len = cmdp->argv[0]->len; len > 0; --len, ++p) {
74 		if (!cmdskip(*p))
75 			break;
76 	}
77 	if (len == 0)
78 subagain:	return (ex_subagain(sp, cmdp));
79 
80 	delim = *p++;
81 	if (!isascii(delim) || isalnum(delim) || delim == '\\')
82 		return (s(sp, cmdp, p, &sp->subre_c, SUB_MUSTSETR));
83 
84 	/*
85 	 * !!!
86 	 * The full-blown substitute command reset the remembered
87 	 * state of the 'c' and 'g' suffices.
88 	 */
89 	sp->c_suffix = sp->g_suffix = 0;
90 
91 	/*
92 	 * Get the pattern string, toss escaping characters.
93 	 *
94 	 * !!!
95 	 * Historic vi accepted any of the following forms:
96 	 *
97 	 *	:s/abc/def/		change "abc" to "def"
98 	 *	:s/abc/def		change "abc" to "def"
99 	 *	:s/abc/			delete "abc"
100 	 *	:s/abc			delete "abc"
101 	 *
102 	 * QUOTING NOTE:
103 	 *
104 	 * Only toss an escaping character if it escapes a delimiter.
105 	 * This means that "s/A/\\\\f" replaces "A" with "\\f".  It
106 	 * would be nice to be more regular, i.e. for each layer of
107 	 * escaping a single escaping character is removed, but that's
108 	 * not how the historic vi worked.
109 	 */
110 	for (ptrn = t = p;;) {
111 		if (p[0] == '\0' || p[0] == delim) {
112 			if (p[0] == delim)
113 				++p;
114 			/*
115 			 * !!!
116 			 * Nul terminate the pattern string -- it's passed
117 			 * to regcomp which doesn't understand anything else.
118 			 */
119 			*t = '\0';
120 			break;
121 		}
122 		if (p[0] == '\\')
123 			if (p[1] == delim)
124 				++p;
125 			else if (p[1] == '\\')
126 				*t++ = *p++;
127 		*t++ = *p++;
128 	}
129 
130 	/*
131 	 * If the pattern string is empty, use the last RE (not just the
132 	 * last substitution RE).
133 	 */
134 	if (*ptrn == '\0') {
135 		if (sp->re == NULL) {
136 			ex_emsg(sp, NULL, EXM_NOPREVRE);
137 			return (1);
138 		}
139 
140 		/* Re-compile the RE if necessary. */
141 		if (!F_ISSET(sp, SC_RE_SEARCH) &&
142 		    re_compile(sp, sp->re, sp->re_len,
143 		    NULL, NULL, &sp->re_c, RE_C_SEARCH))
144 			return (1);
145 		flags = 0;
146 	} else {
147 		/*
148 		 * !!!
149 		 * Compile the RE.  Historic practice is that substitutes set
150 		 * the search direction as well as both substitute and search
151 		 * RE's.  We compile the RE twice, as we don't want to bother
152 		 * ref counting the pattern string and (opaque) structure.
153 		 */
154 		if (re_compile(sp, ptrn, t - ptrn, &sp->re,
155 		    &sp->re_len, &sp->re_c, RE_C_SEARCH))
156 			return (1);
157 		if (re_compile(sp, ptrn, t - ptrn, &sp->subre,
158 		    &sp->subre_len, &sp->subre_c, RE_C_SUBST))
159 			return (1);
160 
161 		flags = SUB_FIRST;
162 		sp->searchdir = FORWARD;
163 	}
164 	re = &sp->re_c;
165 
166 	/*
167 	 * Get the replacement string.
168 	 *
169 	 * The special character & (\& if O_MAGIC not set) matches the
170 	 * entire RE.  No handling of & is required here, it's done by
171 	 * re_sub().
172 	 *
173 	 * The special character ~ (\~ if O_MAGIC not set) inserts the
174 	 * previous replacement string into this replacement string.
175 	 * Count ~'s to figure out how much space we need.  We could
176 	 * special case nonexistent last patterns or whether or not
177 	 * O_MAGIC is set, but it's probably not worth the effort.
178 	 *
179 	 * QUOTING NOTE:
180 	 *
181 	 * Only toss an escaping character if it escapes a delimiter or
182 	 * if O_MAGIC is set and it escapes a tilde.
183 	 *
184 	 * !!!
185 	 * If the entire replacement pattern is "%", then use the last
186 	 * replacement pattern.  This semantic was added to vi in System
187 	 * V and then percolated elsewhere, presumably around the time
188 	 * that it was added to their version of ed(1).
189 	 */
190 	if (p[0] == '\0' || p[0] == delim) {
191 		if (p[0] == delim)
192 			++p;
193 		if (sp->repl != NULL)
194 			free(sp->repl);
195 		sp->repl = NULL;
196 		sp->repl_len = 0;
197 	} else if (p[0] == '%' && (p[1] == '\0' || p[1] == delim))
198 		p += p[1] == delim ? 2 : 1;
199 	else {
200 		for (rep = p, len = 0;
201 		    p[0] != '\0' && p[0] != delim; ++p, ++len)
202 			if (p[0] == '~')
203 				len += sp->repl_len;
204 		GET_SPACE_RETW(sp, bp, blen, len);
205 		for (t = bp, len = 0, p = rep;;) {
206 			if (p[0] == '\0' || p[0] == delim) {
207 				if (p[0] == delim)
208 					++p;
209 				break;
210 			}
211 			if (p[0] == '\\') {
212 				if (p[1] == delim)
213 					++p;
214 				else if (p[1] == '\\') {
215 					*t++ = *p++;
216 					++len;
217 				} else if (p[1] == '~') {
218 					++p;
219 					if (!O_ISSET(sp, O_MAGIC))
220 						goto tilde;
221 				}
222 			} else if (p[0] == '~' && O_ISSET(sp, O_MAGIC)) {
223 tilde:				++p;
224 				MEMCPY(t, sp->repl, sp->repl_len);
225 				t += sp->repl_len;
226 				len += sp->repl_len;
227 				continue;
228 			}
229 			*t++ = *p++;
230 			++len;
231 		}
232 		if ((sp->repl_len = len) != 0) {
233 			if (sp->repl != NULL)
234 				free(sp->repl);
235 			MALLOC(sp, sp->repl, CHAR_T *, len * sizeof(CHAR_T));
236 			if (sp->repl == NULL) {
237 				FREE_SPACEW(sp, bp, blen);
238 				return (1);
239 			}
240 			MEMCPY(sp->repl, bp, len);
241 		}
242 		FREE_SPACEW(sp, bp, blen);
243 	}
244 	return (s(sp, cmdp, p, re, flags));
245 }
246 
247 /*
248  * ex_subagain --
249  *	[line [,line]] & [cgr] [count] [#lp]]
250  *
251  *	Substitute using the last substitute RE and replacement pattern.
252  *
253  * PUBLIC: int ex_subagain(SCR *, EXCMD *);
254  */
255 int
256 ex_subagain(SCR *sp, EXCMD *cmdp)
257 {
258 	if (sp->subre == NULL) {
259 		ex_emsg(sp, NULL, EXM_NOPREVRE);
260 		return (1);
261 	}
262 	if (!F_ISSET(sp, SC_RE_SUBST) &&
263 	    re_compile(sp, sp->subre, sp->subre_len,
264 	    NULL, NULL, &sp->subre_c, RE_C_SUBST))
265 		return (1);
266 	return (s(sp,
267 	    cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->subre_c, 0));
268 }
269 
270 /*
271  * ex_subtilde --
272  *	[line [,line]] ~ [cgr] [count] [#lp]]
273  *
274  *	Substitute using the last RE and last substitute replacement pattern.
275  *
276  * PUBLIC: int ex_subtilde(SCR *, EXCMD *);
277  */
278 int
279 ex_subtilde(SCR *sp, EXCMD *cmdp)
280 {
281 	if (sp->re == NULL) {
282 		ex_emsg(sp, NULL, EXM_NOPREVRE);
283 		return (1);
284 	}
285 	if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp, sp->re,
286 	    sp->re_len, NULL, NULL, &sp->re_c, RE_C_SEARCH))
287 		return (1);
288 	return (s(sp,
289 	    cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->re_c, 0));
290 }
291 
292 /*
293  * s --
294  * Do the substitution.  This stuff is *really* tricky.  There are lots of
295  * special cases, and general nastiness.  Don't mess with it unless you're
296  * pretty confident.
297  *
298  * The nasty part of the substitution is what happens when the replacement
299  * string contains newlines.  It's a bit tricky -- consider the information
300  * that has to be retained for "s/f\(o\)o/^M\1^M\1/".  The solution here is
301  * to build a set of newline offsets which we use to break the line up later,
302  * when the replacement is done.  Don't change it unless you're *damned*
303  * confident.
304  */
305 #define	NEEDNEWLINE(sp) {						\
306 	if (sp->newl_len == sp->newl_cnt) {				\
307 		sp->newl_len += 25;					\
308 		REALLOC(sp, sp->newl, size_t *,				\
309 		    sp->newl_len * sizeof(size_t));			\
310 		if (sp->newl == NULL) {					\
311 			sp->newl_len = 0;				\
312 			return (1);					\
313 		}							\
314 	}								\
315 }
316 
317 #define	BUILD(sp, l, len) {						\
318 	if (lbclen + (len) > lblen) {					\
319 		lblen = p2roundup(MAX(lbclen + (len), 256));		\
320 		REALLOC(sp, lb, CHAR_T *, lblen * sizeof(CHAR_T));	\
321 		if (lb == NULL) {					\
322 			lbclen = 0;					\
323 			return (1);					\
324 		}							\
325 	}								\
326 	MEMCPY(lb + lbclen, l, len);					\
327 	lbclen += len;							\
328 }
329 
330 #define	NEEDSP(sp, len, pnt) {						\
331 	if (lbclen + (len) > lblen) {					\
332 		lblen = p2roundup(MAX(lbclen + (len), 256));		\
333 		REALLOC(sp, lb, CHAR_T *, lblen * sizeof(CHAR_T));	\
334 		if (lb == NULL) {					\
335 			lbclen = 0;					\
336 			return (1);					\
337 		}							\
338 		pnt = lb + lbclen;					\
339 	}								\
340 }
341 
342 static int
343 s(SCR *sp, EXCMD *cmdp, CHAR_T *s, regex_t *re, u_int flags)
344 {
345 	EVENT ev;
346 	MARK from, to;
347 	TEXTH tiq[] = {{ 0 }};
348 	recno_t elno, lno, slno;
349 	u_long ul;
350 	regmatch_t match[10];
351 	size_t blen, cnt, last, lbclen, lblen, len, llen;
352 	size_t offset, saved_offset, scno;
353 	int cflag, lflag, nflag, pflag, rflag;
354 	int didsub, do_eol_match, eflags, empty_ok, eval;
355 	int linechanged, matched, quit, rval;
356 	CHAR_T *bp, *lb;
357 	enum nresult nret;
358 
359 	NEEDFILE(sp, cmdp);
360 
361 	slno = sp->lno;
362 	scno = sp->cno;
363 
364 	/*
365 	 * !!!
366 	 * Historically, the 'g' and 'c' suffices were always toggled as flags,
367 	 * so ":s/A/B/" was the same as ":s/A/B/ccgg".  If O_EDCOMPATIBLE was
368 	 * not set, they were initialized to 0 for all substitute commands.  If
369 	 * O_EDCOMPATIBLE was set, they were initialized to 0 only if the user
370 	 * specified substitute/replacement patterns (see ex_s()).
371 	 */
372 	if (!O_ISSET(sp, O_EDCOMPATIBLE))
373 		sp->c_suffix = sp->g_suffix = 0;
374 
375 	/*
376 	 * Historic vi permitted the '#', 'l' and 'p' options in vi mode, but
377 	 * it only displayed the last change.  I'd disallow them, but they are
378 	 * useful in combination with the [v]global commands.  In the current
379 	 * model the problem is combining them with the 'c' flag -- the screen
380 	 * would have to flip back and forth between the confirm screen and the
381 	 * ex print screen, which would be pretty awful.  We do display all
382 	 * changes, though, for what that's worth.
383 	 *
384 	 * !!!
385 	 * Historic vi was fairly strict about the order of "options", the
386 	 * count, and "flags".  I'm somewhat fuzzy on the difference between
387 	 * options and flags, anyway, so this is a simpler approach, and we
388 	 * just take it them in whatever order the user gives them.  (The ex
389 	 * usage statement doesn't reflect this.)
390 	 */
391 	cflag = lflag = nflag = pflag = rflag = 0;
392 	if (s == NULL)
393 		goto noargs;
394 	for (lno = OOBLNO; *s != '\0'; ++s)
395 		switch (*s) {
396 		case ' ':
397 		case '\t':
398 			continue;
399 		case '+':
400 			++cmdp->flagoff;
401 			break;
402 		case '-':
403 			--cmdp->flagoff;
404 			break;
405 		case '0': case '1': case '2': case '3': case '4':
406 		case '5': case '6': case '7': case '8': case '9':
407 			if (lno != OOBLNO)
408 				goto usage;
409 			errno = 0;
410 			nret = nget_uslong(&ul, s, &s, 10);
411 			lno = ul;
412 			if (*s == '\0')		/* Loop increment correction. */
413 				--s;
414 			if (nret != NUM_OK) {
415 				if (nret == NUM_OVER)
416 					msgq(sp, M_ERR, "153|Count overflow");
417 				else if (nret == NUM_UNDER)
418 					msgq(sp, M_ERR, "154|Count underflow");
419 				else
420 					msgq(sp, M_SYSERR, NULL);
421 				return (1);
422 			}
423 			/*
424 			 * In historic vi, the count was inclusive from the
425 			 * second address.
426 			 */
427 			cmdp->addr1.lno = cmdp->addr2.lno;
428 			cmdp->addr2.lno += lno - 1;
429 			if (!db_exist(sp, cmdp->addr2.lno) &&
430 			    db_last(sp, &cmdp->addr2.lno))
431 				return (1);
432 			break;
433 		case '#':
434 			nflag = 1;
435 			break;
436 		case 'c':
437 			sp->c_suffix = !sp->c_suffix;
438 
439 			/* Ex text structure initialization. */
440 			if (F_ISSET(sp, SC_EX))
441 				TAILQ_INIT(tiq);
442 			break;
443 		case 'g':
444 			sp->g_suffix = !sp->g_suffix;
445 			break;
446 		case 'l':
447 			lflag = 1;
448 			break;
449 		case 'p':
450 			pflag = 1;
451 			break;
452 		case 'r':
453 			if (LF_ISSET(SUB_FIRST)) {
454 				msgq(sp, M_ERR,
455 		    "155|Regular expression specified; r flag meaningless");
456 				return (1);
457 			}
458 			if (!F_ISSET(sp, SC_RE_SEARCH)) {
459 				ex_emsg(sp, NULL, EXM_NOPREVRE);
460 				return (1);
461 			}
462 			rflag = 1;
463 			re = &sp->re_c;
464 			break;
465 		default:
466 			goto usage;
467 		}
468 
469 	if (*s != '\0' || (!rflag && LF_ISSET(SUB_MUSTSETR))) {
470 usage:		ex_emsg(sp, cmdp->cmd->usage, EXM_USAGE);
471 		return (1);
472 	}
473 
474 noargs:	if (F_ISSET(sp, SC_VI) && sp->c_suffix && (lflag || nflag || pflag)) {
475 		msgq(sp, M_ERR,
476 "156|The #, l and p flags may not be combined with the c flag in vi mode");
477 		return (1);
478 	}
479 
480 	/*
481 	 * bp:		if interactive, line cache
482 	 * blen:	if interactive, line cache length
483 	 * lb:		build buffer pointer.
484 	 * lbclen:	current length of built buffer.
485 	 * lblen;	length of build buffer.
486 	 */
487 	bp = lb = NULL;
488 	blen = lbclen = lblen = 0;
489 
490 	/* For each line... */
491 	lno = cmdp->addr1.lno == 0 ? 1 : cmdp->addr1.lno;
492 	for (matched = quit = 0,
493 	    elno = cmdp->addr2.lno; !quit && lno <= elno; ++lno) {
494 
495 		/* Someone's unhappy, time to stop. */
496 		if (INTERRUPTED(sp))
497 			break;
498 
499 		/* Get the line. */
500 		if (db_get(sp, lno, DBG_FATAL, &s, &llen))
501 			goto err;
502 
503 		/*
504 		 * Make a local copy if doing confirmation -- when calling
505 		 * the confirm routine we're likely to lose the cached copy.
506 		 */
507 		if (sp->c_suffix) {
508 			if (bp == NULL) {
509 				GET_SPACE_RETW(sp, bp, blen, llen);
510 			} else
511 				ADD_SPACE_RETW(sp, bp, blen, llen);
512 			MEMCPY(bp, s, llen);
513 			s = bp;
514 		}
515 
516 		/* Start searching from the beginning. */
517 		offset = 0;
518 		len = llen;
519 
520 		/* Reset the build buffer offset. */
521 		lbclen = 0;
522 
523 		/* Reset empty match flag. */
524 		empty_ok = 1;
525 
526 		/*
527 		 * We don't want to have to do a setline if the line didn't
528 		 * change -- keep track of whether or not this line changed.
529 		 * If doing confirmations, don't want to keep setting the
530 		 * line if change is refused -- keep track of substitutions.
531 		 */
532 		didsub = linechanged = 0;
533 
534 		/* New line, do an EOL match. */
535 		do_eol_match = 1;
536 
537 		/* It's not nul terminated, but we pretend it is. */
538 		eflags = REG_STARTEND;
539 
540 		/*
541 		 * The search area is from s + offset to the EOL.
542 		 *
543 		 * Generally, match[0].rm_so is the offset of the start
544 		 * of the match from the start of the search, and offset
545 		 * is the offset of the start of the last search.
546 		 */
547 nextmatch:	match[0].rm_so = 0;
548 		match[0].rm_eo = len;
549 
550 		/* Get the next match. */
551 		eval = regexec(re, s + offset, 10, match, eflags);
552 
553 		/*
554 		 * There wasn't a match or if there was an error, deal with
555 		 * it.  If there was a previous match in this line, resolve
556 		 * the changes into the database.  Otherwise, just move on.
557 		 */
558 		if (eval == REG_NOMATCH)
559 			goto endmatch;
560 		if (eval != 0) {
561 			re_error(sp, eval, re);
562 			goto err;
563 		}
564 		matched = 1;
565 
566 		/* Only the first search can match an anchored expression. */
567 		eflags |= REG_NOTBOL;
568 
569 		/*
570 		 * !!!
571 		 * It's possible to match 0-length strings -- for example, the
572 		 * command s;a*;X;, when matched against the string "aabb" will
573 		 * result in "XbXbX", i.e. the matches are "aa", the space
574 		 * between the b's and the space between the b's and the end of
575 		 * the string.  There is a similar space between the beginning
576 		 * of the string and the a's.  The rule that we use (because vi
577 		 * historically used it) is that any 0-length match, occurring
578 		 * immediately after a match, is ignored.  Otherwise, the above
579 		 * example would have resulted in "XXbXbX".  Another example is
580 		 * incorrectly using " *" to replace groups of spaces with one
581 		 * space.
582 		 *
583 		 * The way we do this is that if we just had a successful match,
584 		 * the starting offset does not skip characters, and the match
585 		 * is empty, ignore the match and move forward.  If there's no
586 		 * more characters in the string, we were attempting to match
587 		 * after the last character, so quit.
588 		 */
589 		if (!empty_ok && match[0].rm_so == 0 && match[0].rm_eo == 0) {
590 			empty_ok = 1;
591 			if (len == 0)
592 				goto endmatch;
593 			BUILD(sp, s + offset, 1)
594 			++offset;
595 			--len;
596 			goto nextmatch;
597 		}
598 
599 		/* Confirm change. */
600 		if (sp->c_suffix) {
601 			/*
602 			 * Set the cursor position for confirmation.  Note,
603 			 * if we matched on a '$', the cursor may be past
604 			 * the end of line.
605 			 */
606 			from.lno = to.lno = lno;
607 			from.cno = match[0].rm_so + offset;
608 			to.cno = match[0].rm_eo + offset;
609 			/*
610 			 * Both ex and vi have to correct for a change before
611 			 * the first character in the line.
612 			 */
613 			if (llen == 0)
614 				from.cno = to.cno = 0;
615 			if (F_ISSET(sp, SC_VI)) {
616 				/*
617 				 * Only vi has to correct for a change after
618 				 * the last character in the line.
619 				 *
620 				 * XXX
621 				 * It would be nice to change the vi code so
622 				 * that we could display a cursor past EOL.
623 				 */
624 				if (to.cno >= llen)
625 					to.cno = llen - 1;
626 				if (from.cno >= llen)
627 					from.cno = llen - 1;
628 
629 				sp->lno = from.lno;
630 				sp->cno = from.cno;
631 				if (vs_refresh(sp, 1))
632 					goto err;
633 
634 				vs_update(sp, msg_cat(sp,
635 				    "169|Confirm change? [n]", NULL), NULL);
636 
637 				if (v_event_get(sp, &ev, 0, 0))
638 					goto err;
639 				switch (ev.e_event) {
640 				case E_CHARACTER:
641 					break;
642 				case E_EOF:
643 				case E_ERR:
644 				case E_INTERRUPT:
645 					goto lquit;
646 				default:
647 					v_event_err(sp, &ev);
648 					goto lquit;
649 				}
650 			} else {
651 				if (ex_print(sp, cmdp, &from, &to, 0) ||
652 				    ex_scprint(sp, &from, &to))
653 					goto lquit;
654 				if (ex_txt(sp, tiq, 0, TXT_CR))
655 					goto err;
656 				ev.e_c = TAILQ_FIRST(tiq)->lb[0];
657 			}
658 
659 			switch (ev.e_c) {
660 			case CH_YES:
661 				break;
662 			default:
663 			case CH_NO:
664 				didsub = 0;
665 				BUILD(sp, s +offset, match[0].rm_eo);
666 				goto skip;
667 			case CH_QUIT:
668 				/* Set the quit/interrupted flags. */
669 lquit:				quit = 1;
670 				F_SET(sp->gp, G_INTERRUPTED);
671 
672 				/*
673 				 * Resolve any changes, then return to (and
674 				 * exit from) the main loop.
675 				 */
676 				goto endmatch;
677 			}
678 		}
679 
680 		/*
681 		 * Set the cursor to the last position changed, converting
682 		 * from 1-based to 0-based.
683 		 */
684 		sp->lno = lno;
685 		sp->cno = match[0].rm_so;
686 
687 		/* Copy the bytes before the match into the build buffer. */
688 		BUILD(sp, s + offset, match[0].rm_so);
689 
690 		/* Substitute the matching bytes. */
691 		didsub = 1;
692 		if (re_sub(sp, s + offset, &lb, &lbclen, &lblen, match))
693 			goto err;
694 
695 		/* Set the change flag so we know this line was modified. */
696 		linechanged = 1;
697 
698 		/* Move past the matched bytes. */
699 skip:		offset += match[0].rm_eo;
700 		len -= match[0].rm_eo;
701 
702 		/* A match cannot be followed by an empty pattern. */
703 		empty_ok = 0;
704 
705 		/*
706 		 * If doing a global change with confirmation, we have to
707 		 * update the screen.  The basic idea is to store the line
708 		 * so the screen update routines can find it, and restart.
709 		 */
710 		if (didsub && sp->c_suffix && sp->g_suffix) {
711 			/*
712 			 * The new search offset will be the end of the
713 			 * modified line.
714 			 */
715 			saved_offset = lbclen;
716 
717 			/* Copy the rest of the line. */
718 			if (len)
719 				BUILD(sp, s + offset, len)
720 
721 			/* Set the new offset. */
722 			offset = saved_offset;
723 
724 			/* Store inserted lines, adjusting the build buffer. */
725 			last = 0;
726 			if (sp->newl_cnt) {
727 				for (cnt = 0;
728 				    cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
729 					if (db_insert(sp, lno,
730 					    lb + last, sp->newl[cnt] - last))
731 						goto err;
732 					last = sp->newl[cnt] + 1;
733 					++sp->rptlines[L_ADDED];
734 				}
735 				lbclen -= last;
736 				offset -= last;
737 				sp->newl_cnt = 0;
738 			}
739 
740 			/* Store and retrieve the line. */
741 			if (db_set(sp, lno, lb + last, lbclen))
742 				goto err;
743 			if (db_get(sp, lno, DBG_FATAL, &s, &llen))
744 				goto err;
745 			ADD_SPACE_RETW(sp, bp, blen, llen)
746 			MEMCPY(bp, s, llen);
747 			s = bp;
748 			len = llen - offset;
749 
750 			/* Restart the build. */
751 			lbclen = 0;
752 			BUILD(sp, s, offset);
753 
754 			/*
755 			 * If we haven't already done the after-the-string
756 			 * match, do one.  Set REG_NOTEOL so the '$' pattern
757 			 * only matches once.
758 			 */
759 			if (!do_eol_match)
760 				goto endmatch;
761 			if (offset == len) {
762 				do_eol_match = 0;
763 				eflags |= REG_NOTEOL;
764 			}
765 			goto nextmatch;
766 		}
767 
768 		/*
769 		 * If it's a global:
770 		 *
771 		 * If at the end of the string, do a test for the after
772 		 * the string match.  Set REG_NOTEOL so the '$' pattern
773 		 * only matches once.
774 		 */
775 		if (sp->g_suffix && do_eol_match) {
776 			if (len == 0) {
777 				do_eol_match = 0;
778 				eflags |= REG_NOTEOL;
779 			}
780 			goto nextmatch;
781 		}
782 
783 endmatch:	if (!linechanged)
784 			continue;
785 
786 		/* Copy any remaining bytes into the build buffer. */
787 		if (len)
788 			BUILD(sp, s + offset, len)
789 
790 		/* Store inserted lines, adjusting the build buffer. */
791 		last = 0;
792 		if (sp->newl_cnt) {
793 			for (cnt = 0;
794 			    cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
795 				if (db_insert(sp,
796 				    lno, lb + last, sp->newl[cnt] - last))
797 					goto err;
798 				last = sp->newl[cnt] + 1;
799 				++sp->rptlines[L_ADDED];
800 			}
801 			lbclen -= last;
802 			sp->newl_cnt = 0;
803 		}
804 
805 		/* Store the changed line. */
806 		if (db_set(sp, lno, lb + last, lbclen))
807 			goto err;
808 
809 		/* Update changed line counter. */
810 		if (sp->rptlchange != lno) {
811 			sp->rptlchange = lno;
812 			++sp->rptlines[L_CHANGED];
813 		}
814 
815 		/*
816 		 * !!!
817 		 * Display as necessary.  Historic practice is to only
818 		 * display the last line of a line split into multiple
819 		 * lines.
820 		 */
821 		if (lflag || nflag || pflag) {
822 			from.lno = to.lno = lno;
823 			from.cno = to.cno = 0;
824 			if (lflag)
825 				(void)ex_print(sp, cmdp, &from, &to, E_C_LIST);
826 			if (nflag)
827 				(void)ex_print(sp, cmdp, &from, &to, E_C_HASH);
828 			if (pflag)
829 				(void)ex_print(sp, cmdp, &from, &to, E_C_PRINT);
830 		}
831 	}
832 
833 	/*
834 	 * !!!
835 	 * Historically, vi attempted to leave the cursor at the same place if
836 	 * the substitution was done at the current cursor position.  Otherwise
837 	 * it moved it to the first non-blank of the last line changed.  There
838 	 * were some problems: for example, :s/$/foo/ with the cursor on the
839 	 * last character of the line left the cursor on the last character, or
840 	 * the & command with multiple occurrences of the matching string in the
841 	 * line usually left the cursor in a fairly random position.
842 	 *
843 	 * We try to do the same thing, with the exception that if the user is
844 	 * doing substitution with confirmation, we move to the last line about
845 	 * which the user was consulted, as opposed to the last line that they
846 	 * actually changed.  This prevents a screen flash if the user doesn't
847 	 * change many of the possible lines.
848 	 */
849 	if (!sp->c_suffix && (sp->lno != slno || sp->cno != scno)) {
850 		sp->cno = 0;
851 		(void)nonblank(sp, sp->lno, &sp->cno);
852 	}
853 
854 	/*
855 	 * If not in a global command, and nothing matched, say so.
856 	 * Else, if none of the lines displayed, put something up.
857 	 */
858 	rval = 0;
859 	if (!matched) {
860 		if (!F_ISSET(sp, SC_EX_GLOBAL)) {
861 			msgq(sp, M_ERR, "157|No match found");
862 			goto err;
863 		}
864 	} else if (!lflag && !nflag && !pflag)
865 		F_SET(cmdp, E_AUTOPRINT);
866 
867 	if (0) {
868 err:		rval = 1;
869 	}
870 
871 	if (bp != NULL)
872 		FREE_SPACEW(sp, bp, blen);
873 	if (lb != NULL)
874 		free(lb);
875 	return (rval);
876 }
877 
878 /*
879  * re_compile --
880  *	Compile the RE.
881  *
882  * PUBLIC: int re_compile(SCR *,
883  * PUBLIC:     CHAR_T *, size_t, CHAR_T **, size_t *, regex_t *, u_int);
884  */
885 int
886 re_compile(SCR *sp, CHAR_T *ptrn, size_t plen, CHAR_T **ptrnp, size_t *lenp, regex_t *rep, u_int flags)
887 {
888 	size_t len;
889 	int reflags, replaced, rval;
890 	CHAR_T *p;
891 
892 	/* Set RE flags. */
893 	reflags = 0;
894 	if (!LF_ISSET(RE_C_CSCOPE | RE_C_TAG)) {
895 		if (O_ISSET(sp, O_EXTENDED))
896 			reflags |= REG_EXTENDED;
897 		if (O_ISSET(sp, O_IGNORECASE))
898 			reflags |= REG_ICASE;
899 		if (O_ISSET(sp, O_ICLOWER)) {
900 			for (p = ptrn, len = plen; len > 0; ++p, --len)
901 				if (ISUPPER(*p))
902 					break;
903 			if (len == 0)
904 				reflags |= REG_ICASE;
905 		}
906 	}
907 
908 	/* If we're replacing a saved value, clear the old one. */
909 	if (LF_ISSET(RE_C_SEARCH) && F_ISSET(sp, SC_RE_SEARCH)) {
910 		regfree(&sp->re_c);
911 		F_CLR(sp, SC_RE_SEARCH);
912 	}
913 	if (LF_ISSET(RE_C_SUBST) && F_ISSET(sp, SC_RE_SUBST)) {
914 		regfree(&sp->subre_c);
915 		F_CLR(sp, SC_RE_SUBST);
916 	}
917 
918 	/*
919 	 * If we're saving the string, it's a pattern we haven't seen before,
920 	 * so convert the vi-style RE's to POSIX 1003.2 RE's.  Save a copy for
921 	 * later recompilation.   Free any previously saved value.
922 	 */
923 	if (ptrnp != NULL) {
924 		replaced = 0;
925 		if (LF_ISSET(RE_C_CSCOPE)) {
926 			if (re_cscope_conv(sp, &ptrn, &plen, &replaced))
927 				return (1);
928 			/*
929 			 * XXX
930 			 * Currently, the match-any-<blank> expression used in
931 			 * re_cscope_conv() requires extended RE's.  This may
932 			 * not be right or safe.
933 			 */
934 			reflags |= REG_EXTENDED;
935 		} else if (LF_ISSET(RE_C_TAG)) {
936 			if (re_tag_conv(sp, &ptrn, &plen, &replaced))
937 				return (1);
938 		} else
939 			if (re_conv(sp, &ptrn, &plen, &replaced))
940 				return (1);
941 
942 		/* Discard previous pattern. */
943 		if (*ptrnp != NULL) {
944 			free(*ptrnp);
945 			*ptrnp = NULL;
946 		}
947 		if (lenp != NULL)
948 			*lenp = plen;
949 
950 		/*
951 		 * Copy the string into allocated memory.
952 		 *
953 		 * XXX
954 		 * Regcomp isn't 8-bit clean, so the pattern is nul-terminated
955 		 * for now.  There's just no other solution.
956 		 */
957 		MALLOC(sp, *ptrnp, CHAR_T *, (plen + 1) * sizeof(CHAR_T));
958 		if (*ptrnp != NULL) {
959 			MEMCPY(*ptrnp, ptrn, plen);
960 			(*ptrnp)[plen] = '\0';
961 		}
962 
963 		/* Free up conversion-routine-allocated memory. */
964 		if (replaced)
965 			FREE_SPACEW(sp, ptrn, 0);
966 
967 		if (*ptrnp == NULL)
968 			return (1);
969 
970 		ptrn = *ptrnp;
971 	}
972 
973 	/*
974 	 * XXX
975 	 * Regcomp isn't 8-bit clean, so we just lost if the pattern
976 	 * contained a nul.  Bummer!
977 	 */
978 	if ((rval = regcomp(rep, ptrn, /* plen, */ reflags)) != 0) {
979 		if (!LF_ISSET(RE_C_SILENT))
980 			re_error(sp, rval, rep);
981 		return (1);
982 	}
983 
984 	if (LF_ISSET(RE_C_SEARCH))
985 		F_SET(sp, SC_RE_SEARCH);
986 	if (LF_ISSET(RE_C_SUBST))
987 		F_SET(sp, SC_RE_SUBST);
988 
989 	return (0);
990 }
991 
992 /*
993  * re_conv --
994  *	Convert vi's regular expressions into something that the
995  *	the POSIX 1003.2 RE functions can handle.
996  *
997  * There are three conversions we make to make vi's RE's (specifically
998  * the global, search, and substitute patterns) work with POSIX RE's.
999  *
1000  * 1: If O_MAGIC is not set, strip backslashes from the magic character
1001  *    set (.[*~) that have them, and add them to the ones that don't.
1002  * 2: If O_MAGIC is not set, the string "\~" is replaced with the text
1003  *    from the last substitute command's replacement string.  If O_MAGIC
1004  *    is set, it's the string "~".
1005  * 3: The pattern \<ptrn\> does "word" searches, convert it to use the
1006  *    new RE escapes.
1007  *
1008  * !!!/XXX
1009  * This doesn't exactly match the historic behavior of vi because we do
1010  * the ~ substitution before calling the RE engine, so magic characters
1011  * in the replacement string will be expanded by the RE engine, and they
1012  * weren't historically.  It's a bug.
1013  */
1014 static int
1015 re_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp)
1016 {
1017 	size_t blen, len, needlen;
1018 	int magic;
1019 	CHAR_T *bp, *p, *t;
1020 
1021 	/*
1022 	 * First pass through, we figure out how much space we'll need.
1023 	 * We do it in two passes, on the grounds that most of the time
1024 	 * the user is doing a search and won't have magic characters.
1025 	 * That way we can skip most of the memory allocation and copies.
1026 	 */
1027 	magic = 0;
1028 	for (p = *ptrnp, len = *plenp, needlen = 0; len > 0; ++p, --len)
1029 		switch (*p) {
1030 		case '\\':
1031 			if (len > 1) {
1032 				--len;
1033 				switch (*++p) {
1034 				case '<':
1035 					magic = 1;
1036 					needlen += RE_WSTART_LEN + 1;
1037 					break;
1038 				case '>':
1039 					magic = 1;
1040 					needlen += RE_WSTOP_LEN + 1;
1041 					break;
1042 				case '~':
1043 					if (!O_ISSET(sp, O_MAGIC)) {
1044 						magic = 1;
1045 						needlen += sp->repl_len;
1046 					}
1047 					break;
1048 				case '.':
1049 				case '[':
1050 				case '*':
1051 					if (!O_ISSET(sp, O_MAGIC)) {
1052 						magic = 1;
1053 						needlen += 1;
1054 					}
1055 					break;
1056 				default:
1057 					needlen += 2;
1058 				}
1059 			} else
1060 				needlen += 1;
1061 			break;
1062 		case '~':
1063 			if (O_ISSET(sp, O_MAGIC)) {
1064 				magic = 1;
1065 				needlen += sp->repl_len;
1066 			}
1067 			break;
1068 		case '.':
1069 		case '[':
1070 		case '*':
1071 			if (!O_ISSET(sp, O_MAGIC)) {
1072 				magic = 1;
1073 				needlen += 2;
1074 			}
1075 			break;
1076 		default:
1077 			needlen += 1;
1078 			break;
1079 		}
1080 
1081 	if (!magic) {
1082 		*replacedp = 0;
1083 		return (0);
1084 	}
1085 
1086 	/* Get enough memory to hold the final pattern. */
1087 	*replacedp = 1;
1088 	GET_SPACE_RETW(sp, bp, blen, needlen);
1089 
1090 	for (p = *ptrnp, len = *plenp, t = bp; len > 0; ++p, --len)
1091 		switch (*p) {
1092 		case '\\':
1093 			if (len > 1) {
1094 				--len;
1095 				switch (*++p) {
1096 				case '<':
1097 					MEMCPY(t,
1098 					    RE_WSTART, RE_WSTART_LEN);
1099 					t += RE_WSTART_LEN;
1100 					break;
1101 				case '>':
1102 					MEMCPY(t,
1103 					    RE_WSTOP, RE_WSTOP_LEN);
1104 					t += RE_WSTOP_LEN;
1105 					break;
1106 				case '~':
1107 					if (O_ISSET(sp, O_MAGIC))
1108 						*t++ = '~';
1109 					else {
1110 						MEMCPY(t,
1111 						    sp->repl, sp->repl_len);
1112 						t += sp->repl_len;
1113 					}
1114 					break;
1115 				case '.':
1116 				case '[':
1117 				case '*':
1118 					if (O_ISSET(sp, O_MAGIC))
1119 						*t++ = '\\';
1120 					*t++ = *p;
1121 					break;
1122 				default:
1123 					*t++ = '\\';
1124 					*t++ = *p;
1125 				}
1126 			} else
1127 				*t++ = '\\';
1128 			break;
1129 		case '~':
1130 			if (O_ISSET(sp, O_MAGIC)) {
1131 				MEMCPY(t, sp->repl, sp->repl_len);
1132 				t += sp->repl_len;
1133 			} else
1134 				*t++ = '~';
1135 			break;
1136 		case '.':
1137 		case '[':
1138 		case '*':
1139 			if (!O_ISSET(sp, O_MAGIC))
1140 				*t++ = '\\';
1141 			*t++ = *p;
1142 			break;
1143 		default:
1144 			*t++ = *p;
1145 			break;
1146 		}
1147 
1148 	*ptrnp = bp;
1149 	*plenp = t - bp;
1150 	return (0);
1151 }
1152 
1153 /*
1154  * re_tag_conv --
1155  *	Convert a tags search path into something that the POSIX
1156  *	1003.2 RE functions can handle.
1157  */
1158 static int
1159 re_tag_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp)
1160 {
1161 	size_t blen, len;
1162 	int lastdollar;
1163 	CHAR_T *bp, *p, *t;
1164 
1165 	len = *plenp;
1166 
1167 	/* Max memory usage is 2 times the length of the string. */
1168 	*replacedp = 1;
1169 	GET_SPACE_RETW(sp, bp, blen, len * 2);
1170 
1171 	p = *ptrnp;
1172 	t = bp;
1173 
1174 	/* If the last character is a '/' or '?', we just strip it. */
1175 	if (len > 0 && (p[len - 1] == '/' || p[len - 1] == '?'))
1176 		--len;
1177 
1178 	/* If the next-to-last or last character is a '$', it's magic. */
1179 	if (len > 0 && p[len - 1] == '$') {
1180 		--len;
1181 		lastdollar = 1;
1182 	} else
1183 		lastdollar = 0;
1184 
1185 	/* If the first character is a '/' or '?', we just strip it. */
1186 	if (len > 0 && (p[0] == '/' || p[0] == '?')) {
1187 		++p;
1188 		--len;
1189 	}
1190 
1191 	/* If the first or second character is a '^', it's magic. */
1192 	if (p[0] == '^') {
1193 		*t++ = *p++;
1194 		--len;
1195 	}
1196 
1197 	/*
1198 	 * Escape every other magic character we can find, meanwhile stripping
1199 	 * the backslashes ctags inserts when escaping the search delimiter
1200 	 * characters.
1201 	 */
1202 	for (; len > 0; --len) {
1203 		if (p[0] == '\\' && (p[1] == '/' || p[1] == '?')) {
1204 			++p;
1205 			--len;
1206 		} else if (STRCHR(L("^.[]$*"), p[0]))
1207 			*t++ = '\\';
1208 		*t++ = *p++;
1209 	}
1210 	if (lastdollar)
1211 		*t++ = '$';
1212 
1213 	*ptrnp = bp;
1214 	*plenp = t - bp;
1215 	return (0);
1216 }
1217 
1218 /*
1219  * re_cscope_conv --
1220  *	 Convert a cscope search path into something that the POSIX
1221  *      1003.2 RE functions can handle.
1222  */
1223 static int
1224 re_cscope_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp)
1225 {
1226 	size_t blen, len, nspaces;
1227 	CHAR_T *bp, *t;
1228 	CHAR_T *p;
1229 	CHAR_T *wp;
1230 	size_t wlen;
1231 
1232 	/*
1233 	 * Each space in the source line printed by cscope represents an
1234 	 * arbitrary sequence of spaces, tabs, and comments.
1235 	 */
1236 #define	CSCOPE_RE_SPACE		"([ \t]|/\\*([^*]|\\*/)*\\*/)*"
1237 #define CSCOPE_LEN	sizeof(CSCOPE_RE_SPACE) - 1
1238 	CHAR2INT(sp, CSCOPE_RE_SPACE, CSCOPE_LEN, wp, wlen);
1239 	for (nspaces = 0, p = *ptrnp, len = *plenp; len > 0; ++p, --len)
1240 		if (*p == ' ')
1241 			++nspaces;
1242 
1243 	/*
1244 	 * Allocate plenty of space:
1245 	 *	the string, plus potential escaping characters;
1246 	 *	nspaces + 2 copies of CSCOPE_RE_SPACE;
1247 	 *	^, $, nul terminator characters.
1248 	 */
1249 	*replacedp = 1;
1250 	len = (p - *ptrnp) * 2 + (nspaces + 2) * sizeof(CSCOPE_RE_SPACE) + 3;
1251 	GET_SPACE_RETW(sp, bp, blen, len);
1252 
1253 	p = *ptrnp;
1254 	t = bp;
1255 
1256 	*t++ = '^';
1257 	MEMCPY(t, wp, wlen);
1258 	t += wlen;
1259 
1260 	for (len = *plenp; len > 0; ++p, --len)
1261 		if (*p == ' ') {
1262 			MEMCPY(t, wp, wlen);
1263 			t += wlen;
1264 		} else {
1265 			if (STRCHR(L("\\^.[]$*+?()|{}"), *p))
1266 				*t++ = '\\';
1267 			*t++ = *p;
1268 		}
1269 
1270 	MEMCPY(t, wp, wlen);
1271 	t += wlen;
1272 	*t++ = '$';
1273 
1274 	*ptrnp = bp;
1275 	*plenp = t - bp;
1276 	return (0);
1277 }
1278 
1279 /*
1280  * re_error --
1281  *	Report a regular expression error.
1282  *
1283  * PUBLIC: void re_error(SCR *, int, regex_t *);
1284  */
1285 void
1286 re_error(SCR *sp, int errcode, regex_t *preg)
1287 {
1288 	size_t s;
1289 	char *oe;
1290 
1291 	s = regerror(errcode, preg, "", 0);
1292 	MALLOC(sp, oe, char *, s);
1293 	if (oe != NULL) {
1294 		(void)regerror(errcode, preg, oe, s);
1295 		msgq(sp, M_ERR, "RE error: %s", oe);
1296 		free(oe);
1297 	}
1298 }
1299 
1300 /*
1301  * re_sub --
1302  * 	Do the substitution for a regular expression.
1303  */
1304 static int
1305 re_sub(
1306 	SCR *sp,
1307 	CHAR_T *ip,			/* Input line. */
1308 	CHAR_T **lbp,
1309 	size_t *lbclenp,
1310 	size_t *lblenp,
1311 	regmatch_t match[10])
1312 {
1313 	enum { C_NOTSET, C_LOWER, C_ONELOWER, C_ONEUPPER, C_UPPER } conv;
1314 	size_t lbclen, lblen;		/* Local copies. */
1315 	size_t mlen;			/* Match length. */
1316 	size_t rpl;			/* Remaining replacement length. */
1317 	CHAR_T *rp;			/* Replacement pointer. */
1318 	int ch;
1319 	int no;				/* Match replacement offset. */
1320 	CHAR_T *p, *t;			/* Buffer pointers. */
1321 	CHAR_T *lb;			/* Local copies. */
1322 
1323 	lb = *lbp;			/* Get local copies. */
1324 	lbclen = *lbclenp;
1325 	lblen = *lblenp;
1326 
1327 	/*
1328 	 * QUOTING NOTE:
1329 	 *
1330 	 * There are some special sequences that vi provides in the
1331 	 * replacement patterns.
1332 	 *	 & string the RE matched (\& if nomagic set)
1333 	 *	\# n-th regular subexpression
1334 	 *	\E end \U, \L conversion
1335 	 *	\e end \U, \L conversion
1336 	 *	\l convert the next character to lower-case
1337 	 *	\L convert to lower-case, until \E, \e, or end of replacement
1338 	 *	\u convert the next character to upper-case
1339 	 *	\U convert to upper-case, until \E, \e, or end of replacement
1340 	 *
1341 	 * Otherwise, since this is the lowest level of replacement, discard
1342 	 * all escaping characters.  This (hopefully) matches historic practice.
1343 	 */
1344 #define	OUTCH(ch, nltrans) {						\
1345 	ARG_CHAR_T __ch = (ch);						\
1346 	e_key_t __value = KEY_VAL(sp, __ch);				\
1347 	if (nltrans && (__value == K_CR || __value == K_NL)) {		\
1348 		NEEDNEWLINE(sp);					\
1349 		sp->newl[sp->newl_cnt++] = lbclen;			\
1350 	} else if (conv != C_NOTSET) {					\
1351 		switch (conv) {						\
1352 		case C_ONELOWER:					\
1353 			conv = C_NOTSET;				\
1354 			/* FALLTHROUGH */				\
1355 		case C_LOWER:						\
1356 			if (ISUPPER(__ch))				\
1357 				__ch = TOLOWER(__ch);			\
1358 			break;						\
1359 		case C_ONEUPPER:					\
1360 			conv = C_NOTSET;				\
1361 			/* FALLTHROUGH */				\
1362 		case C_UPPER:						\
1363 			if (ISLOWER(__ch))				\
1364 				__ch = TOUPPER(__ch);			\
1365 			break;						\
1366 		default:						\
1367 			abort();					\
1368 		}							\
1369 	}								\
1370 	NEEDSP(sp, 1, p);						\
1371 	*p++ = __ch;							\
1372 	++lbclen;							\
1373 }
1374 	conv = C_NOTSET;
1375 	for (rp = sp->repl, rpl = sp->repl_len, p = lb + lbclen; rpl--;) {
1376 		switch (ch = *rp++) {
1377 		case '&':
1378 			if (O_ISSET(sp, O_MAGIC)) {
1379 				no = 0;
1380 				goto subzero;
1381 			}
1382 			break;
1383 		case '\\':
1384 			if (rpl == 0)
1385 				break;
1386 			--rpl;
1387 			switch (ch = *rp) {
1388 			case '&':
1389 				++rp;
1390 				if (!O_ISSET(sp, O_MAGIC)) {
1391 					no = 0;
1392 					goto subzero;
1393 				}
1394 				break;
1395 			case '0': case '1': case '2': case '3': case '4':
1396 			case '5': case '6': case '7': case '8': case '9':
1397 				no = *rp++ - '0';
1398 subzero:			if (match[no].rm_so == -1 ||
1399 				    match[no].rm_eo == -1)
1400 					break;
1401 				mlen = match[no].rm_eo - match[no].rm_so;
1402 				for (t = ip + match[no].rm_so; mlen--; ++t)
1403 					OUTCH(*t, 0);
1404 				continue;
1405 			case 'e':
1406 			case 'E':
1407 				++rp;
1408 				conv = C_NOTSET;
1409 				continue;
1410 			case 'l':
1411 				++rp;
1412 				conv = C_ONELOWER;
1413 				continue;
1414 			case 'L':
1415 				++rp;
1416 				conv = C_LOWER;
1417 				continue;
1418 			case 'u':
1419 				++rp;
1420 				conv = C_ONEUPPER;
1421 				continue;
1422 			case 'U':
1423 				++rp;
1424 				conv = C_UPPER;
1425 				continue;
1426 			case '\r':
1427 				OUTCH(ch, 0);
1428 				continue;
1429 			default:
1430 				++rp;
1431 				break;
1432 			}
1433 		}
1434 		OUTCH(ch, 1);
1435 	}
1436 
1437 	*lbp = lb;			/* Update caller's information. */
1438 	*lbclenp = lbclen;
1439 	*lblenp = lblen;
1440 	return (0);
1441 }
1442