1b8ba871bSPeter Wemm /*-
2b8ba871bSPeter Wemm * Copyright (c) 1992, 1993, 1994
3b8ba871bSPeter Wemm * The Regents of the University of California. All rights reserved.
4b8ba871bSPeter Wemm * Copyright (c) 1992, 1993, 1994, 1995, 1996
5b8ba871bSPeter Wemm * Keith Bostic. All rights reserved.
6b8ba871bSPeter Wemm *
7b8ba871bSPeter Wemm * See the LICENSE file for redistribution information.
8b8ba871bSPeter Wemm */
9b8ba871bSPeter Wemm
10b8ba871bSPeter Wemm #include "config.h"
11b8ba871bSPeter Wemm
12b8ba871bSPeter Wemm #include <sys/types.h>
13b8ba871bSPeter Wemm #include <sys/queue.h>
14b8ba871bSPeter Wemm #include <sys/time.h>
15b8ba871bSPeter Wemm
16b8ba871bSPeter Wemm #include <bitstring.h>
17b8ba871bSPeter Wemm #include <ctype.h>
18b8ba871bSPeter Wemm #include <errno.h>
19b8ba871bSPeter Wemm #include <limits.h>
20b8ba871bSPeter Wemm #include <stdio.h>
21b8ba871bSPeter Wemm #include <stdlib.h>
22b8ba871bSPeter Wemm #include <string.h>
23b8ba871bSPeter Wemm #include <unistd.h>
24b8ba871bSPeter Wemm
25b8ba871bSPeter Wemm #include "../common/common.h"
26b8ba871bSPeter Wemm #include "../vi/vi.h"
27b8ba871bSPeter Wemm
28b8ba871bSPeter Wemm #define SUB_FIRST 0x01 /* The 'r' flag isn't reasonable. */
29b8ba871bSPeter Wemm #define SUB_MUSTSETR 0x02 /* The 'r' flag is required. */
30b8ba871bSPeter Wemm
31c271fa92SBaptiste Daroussin static int re_conv(SCR *, CHAR_T **, size_t *, int *);
32c271fa92SBaptiste Daroussin static int re_cscope_conv(SCR *, CHAR_T **, size_t *, int *);
33c271fa92SBaptiste Daroussin static int re_sub(SCR *,
34c271fa92SBaptiste Daroussin CHAR_T *, CHAR_T **, size_t *, size_t *, regmatch_t [10]);
35c271fa92SBaptiste Daroussin static int re_tag_conv(SCR *, CHAR_T **, size_t *, int *);
36c271fa92SBaptiste Daroussin static int s(SCR *, EXCMD *, CHAR_T *, regex_t *, u_int);
37b8ba871bSPeter Wemm
38b8ba871bSPeter Wemm /*
39b8ba871bSPeter Wemm * ex_s --
40b8ba871bSPeter Wemm * [line [,line]] s[ubstitute] [[/;]pat[/;]/repl[/;] [cgr] [count] [#lp]]
41b8ba871bSPeter Wemm *
42b8ba871bSPeter Wemm * Substitute on lines matching a pattern.
43b8ba871bSPeter Wemm *
44c271fa92SBaptiste Daroussin * PUBLIC: int ex_s(SCR *, EXCMD *);
45b8ba871bSPeter Wemm */
46b8ba871bSPeter Wemm int
ex_s(SCR * sp,EXCMD * cmdp)47f0957ccaSPeter Wemm ex_s(SCR *sp, EXCMD *cmdp)
48b8ba871bSPeter Wemm {
49b8ba871bSPeter Wemm regex_t *re;
50b8ba871bSPeter Wemm size_t blen, len;
51b8ba871bSPeter Wemm u_int flags;
52b8ba871bSPeter Wemm int delim;
53f0957ccaSPeter Wemm CHAR_T *bp, *p, *ptrn, *rep, *t;
54b8ba871bSPeter Wemm
55b8ba871bSPeter Wemm /*
56b8ba871bSPeter Wemm * Skip leading white space.
57b8ba871bSPeter Wemm *
58b8ba871bSPeter Wemm * !!!
59b8ba871bSPeter Wemm * Historic vi allowed any non-alphanumeric to serve as the
60b8ba871bSPeter Wemm * substitution command delimiter.
61b8ba871bSPeter Wemm *
62b8ba871bSPeter Wemm * !!!
63b8ba871bSPeter Wemm * If the arguments are empty, it's the same as &, i.e. we
64b8ba871bSPeter Wemm * repeat the last substitution.
65b8ba871bSPeter Wemm */
66b8ba871bSPeter Wemm if (cmdp->argc == 0)
67b8ba871bSPeter Wemm goto subagain;
68b8ba871bSPeter Wemm for (p = cmdp->argv[0]->bp,
69b8ba871bSPeter Wemm len = cmdp->argv[0]->len; len > 0; --len, ++p) {
70f0957ccaSPeter Wemm if (!cmdskip(*p))
71b8ba871bSPeter Wemm break;
72b8ba871bSPeter Wemm }
73b8ba871bSPeter Wemm if (len == 0)
74b8ba871bSPeter Wemm subagain: return (ex_subagain(sp, cmdp));
75b8ba871bSPeter Wemm
76b8ba871bSPeter Wemm delim = *p++;
77110d525eSBaptiste Daroussin if (is09azAZ(delim) || delim == '\\')
78b8ba871bSPeter Wemm return (s(sp, cmdp, p, &sp->subre_c, SUB_MUSTSETR));
79b8ba871bSPeter Wemm
80b8ba871bSPeter Wemm /*
81b8ba871bSPeter Wemm * !!!
82b8ba871bSPeter Wemm * The full-blown substitute command reset the remembered
83b8ba871bSPeter Wemm * state of the 'c' and 'g' suffices.
84b8ba871bSPeter Wemm */
85b8ba871bSPeter Wemm sp->c_suffix = sp->g_suffix = 0;
86b8ba871bSPeter Wemm
87b8ba871bSPeter Wemm /*
88b8ba871bSPeter Wemm * Get the pattern string, toss escaping characters.
89b8ba871bSPeter Wemm *
90b8ba871bSPeter Wemm * !!!
91b8ba871bSPeter Wemm * Historic vi accepted any of the following forms:
92b8ba871bSPeter Wemm *
93b8ba871bSPeter Wemm * :s/abc/def/ change "abc" to "def"
94b8ba871bSPeter Wemm * :s/abc/def change "abc" to "def"
95b8ba871bSPeter Wemm * :s/abc/ delete "abc"
96b8ba871bSPeter Wemm * :s/abc delete "abc"
97b8ba871bSPeter Wemm *
98b8ba871bSPeter Wemm * QUOTING NOTE:
99b8ba871bSPeter Wemm *
100b8ba871bSPeter Wemm * Only toss an escaping character if it escapes a delimiter.
101b8ba871bSPeter Wemm * This means that "s/A/\\\\f" replaces "A" with "\\f". It
102b8ba871bSPeter Wemm * would be nice to be more regular, i.e. for each layer of
103b8ba871bSPeter Wemm * escaping a single escaping character is removed, but that's
104b8ba871bSPeter Wemm * not how the historic vi worked.
105b8ba871bSPeter Wemm */
106b8ba871bSPeter Wemm for (ptrn = t = p;;) {
107b8ba871bSPeter Wemm if (p[0] == '\0' || p[0] == delim) {
108b8ba871bSPeter Wemm if (p[0] == delim)
109b8ba871bSPeter Wemm ++p;
110b8ba871bSPeter Wemm /*
111b8ba871bSPeter Wemm * !!!
112b8ba871bSPeter Wemm * Nul terminate the pattern string -- it's passed
113b8ba871bSPeter Wemm * to regcomp which doesn't understand anything else.
114b8ba871bSPeter Wemm */
115b8ba871bSPeter Wemm *t = '\0';
116b8ba871bSPeter Wemm break;
117b8ba871bSPeter Wemm }
118755cc40cSBaptiste Daroussin if (p[0] == '\\') {
119b8ba871bSPeter Wemm if (p[1] == delim)
120b8ba871bSPeter Wemm ++p;
121b8ba871bSPeter Wemm else if (p[1] == '\\')
122b8ba871bSPeter Wemm *t++ = *p++;
123755cc40cSBaptiste Daroussin }
124b8ba871bSPeter Wemm *t++ = *p++;
125b8ba871bSPeter Wemm }
126b8ba871bSPeter Wemm
127b8ba871bSPeter Wemm /*
128b8ba871bSPeter Wemm * If the pattern string is empty, use the last RE (not just the
129b8ba871bSPeter Wemm * last substitution RE).
130b8ba871bSPeter Wemm */
131b8ba871bSPeter Wemm if (*ptrn == '\0') {
132b8ba871bSPeter Wemm if (sp->re == NULL) {
133b8ba871bSPeter Wemm ex_emsg(sp, NULL, EXM_NOPREVRE);
134b8ba871bSPeter Wemm return (1);
135b8ba871bSPeter Wemm }
136b8ba871bSPeter Wemm
137b8ba871bSPeter Wemm /* Re-compile the RE if necessary. */
138f0957ccaSPeter Wemm if (!F_ISSET(sp, SC_RE_SEARCH) &&
139f0957ccaSPeter Wemm re_compile(sp, sp->re, sp->re_len,
140f0957ccaSPeter Wemm NULL, NULL, &sp->re_c, RE_C_SEARCH))
141b8ba871bSPeter Wemm return (1);
142b8ba871bSPeter Wemm flags = 0;
143b8ba871bSPeter Wemm } else {
144b8ba871bSPeter Wemm /*
145b8ba871bSPeter Wemm * !!!
146b8ba871bSPeter Wemm * Compile the RE. Historic practice is that substitutes set
147b8ba871bSPeter Wemm * the search direction as well as both substitute and search
148b8ba871bSPeter Wemm * RE's. We compile the RE twice, as we don't want to bother
149b8ba871bSPeter Wemm * ref counting the pattern string and (opaque) structure.
150b8ba871bSPeter Wemm */
151f0957ccaSPeter Wemm if (re_compile(sp, ptrn, t - ptrn, &sp->re,
152f0957ccaSPeter Wemm &sp->re_len, &sp->re_c, RE_C_SEARCH))
153b8ba871bSPeter Wemm return (1);
154f0957ccaSPeter Wemm if (re_compile(sp, ptrn, t - ptrn, &sp->subre,
155f0957ccaSPeter Wemm &sp->subre_len, &sp->subre_c, RE_C_SUBST))
156b8ba871bSPeter Wemm return (1);
157b8ba871bSPeter Wemm
158b8ba871bSPeter Wemm flags = SUB_FIRST;
159b8ba871bSPeter Wemm sp->searchdir = FORWARD;
160b8ba871bSPeter Wemm }
161b8ba871bSPeter Wemm re = &sp->re_c;
162b8ba871bSPeter Wemm
163b8ba871bSPeter Wemm /*
164b8ba871bSPeter Wemm * Get the replacement string.
165b8ba871bSPeter Wemm *
166b8ba871bSPeter Wemm * The special character & (\& if O_MAGIC not set) matches the
167b8ba871bSPeter Wemm * entire RE. No handling of & is required here, it's done by
168b8ba871bSPeter Wemm * re_sub().
169b8ba871bSPeter Wemm *
170b8ba871bSPeter Wemm * The special character ~ (\~ if O_MAGIC not set) inserts the
171b8ba871bSPeter Wemm * previous replacement string into this replacement string.
172b8ba871bSPeter Wemm * Count ~'s to figure out how much space we need. We could
173b8ba871bSPeter Wemm * special case nonexistent last patterns or whether or not
174b8ba871bSPeter Wemm * O_MAGIC is set, but it's probably not worth the effort.
175b8ba871bSPeter Wemm *
176b8ba871bSPeter Wemm * QUOTING NOTE:
177b8ba871bSPeter Wemm *
178b8ba871bSPeter Wemm * Only toss an escaping character if it escapes a delimiter or
179b8ba871bSPeter Wemm * if O_MAGIC is set and it escapes a tilde.
180b8ba871bSPeter Wemm *
181b8ba871bSPeter Wemm * !!!
182b8ba871bSPeter Wemm * If the entire replacement pattern is "%", then use the last
183b8ba871bSPeter Wemm * replacement pattern. This semantic was added to vi in System
184b8ba871bSPeter Wemm * V and then percolated elsewhere, presumably around the time
185b8ba871bSPeter Wemm * that it was added to their version of ed(1).
186b8ba871bSPeter Wemm */
187b8ba871bSPeter Wemm if (p[0] == '\0' || p[0] == delim) {
188b8ba871bSPeter Wemm if (p[0] == delim)
189b8ba871bSPeter Wemm ++p;
190b8ba871bSPeter Wemm free(sp->repl);
191b8ba871bSPeter Wemm sp->repl = NULL;
192b8ba871bSPeter Wemm sp->repl_len = 0;
193b8ba871bSPeter Wemm } else if (p[0] == '%' && (p[1] == '\0' || p[1] == delim))
194b8ba871bSPeter Wemm p += p[1] == delim ? 2 : 1;
195b8ba871bSPeter Wemm else {
196b8ba871bSPeter Wemm for (rep = p, len = 0;
197b8ba871bSPeter Wemm p[0] != '\0' && p[0] != delim; ++p, ++len)
198b8ba871bSPeter Wemm if (p[0] == '~')
199b8ba871bSPeter Wemm len += sp->repl_len;
200f0957ccaSPeter Wemm GET_SPACE_RETW(sp, bp, blen, len);
201b8ba871bSPeter Wemm for (t = bp, len = 0, p = rep;;) {
202b8ba871bSPeter Wemm if (p[0] == '\0' || p[0] == delim) {
203b8ba871bSPeter Wemm if (p[0] == delim)
204b8ba871bSPeter Wemm ++p;
205b8ba871bSPeter Wemm break;
206b8ba871bSPeter Wemm }
207b8ba871bSPeter Wemm if (p[0] == '\\') {
208b8ba871bSPeter Wemm if (p[1] == delim)
209b8ba871bSPeter Wemm ++p;
210b8ba871bSPeter Wemm else if (p[1] == '\\') {
211b8ba871bSPeter Wemm *t++ = *p++;
212b8ba871bSPeter Wemm ++len;
213b8ba871bSPeter Wemm } else if (p[1] == '~') {
214b8ba871bSPeter Wemm ++p;
215b8ba871bSPeter Wemm if (!O_ISSET(sp, O_MAGIC))
216b8ba871bSPeter Wemm goto tilde;
217b8ba871bSPeter Wemm }
218b8ba871bSPeter Wemm } else if (p[0] == '~' && O_ISSET(sp, O_MAGIC)) {
219b8ba871bSPeter Wemm tilde: ++p;
220f0957ccaSPeter Wemm MEMCPY(t, sp->repl, sp->repl_len);
221b8ba871bSPeter Wemm t += sp->repl_len;
222b8ba871bSPeter Wemm len += sp->repl_len;
223b8ba871bSPeter Wemm continue;
224b8ba871bSPeter Wemm }
225b8ba871bSPeter Wemm *t++ = *p++;
226b8ba871bSPeter Wemm ++len;
227b8ba871bSPeter Wemm }
228b8ba871bSPeter Wemm if ((sp->repl_len = len) != 0) {
229b8ba871bSPeter Wemm free(sp->repl);
230110d525eSBaptiste Daroussin MALLOC(sp, sp->repl, len * sizeof(CHAR_T));
231f0957ccaSPeter Wemm if (sp->repl == NULL) {
232f0957ccaSPeter Wemm FREE_SPACEW(sp, bp, blen);
233b8ba871bSPeter Wemm return (1);
234b8ba871bSPeter Wemm }
235f0957ccaSPeter Wemm MEMCPY(sp->repl, bp, len);
236b8ba871bSPeter Wemm }
237f0957ccaSPeter Wemm FREE_SPACEW(sp, bp, blen);
238b8ba871bSPeter Wemm }
239b8ba871bSPeter Wemm return (s(sp, cmdp, p, re, flags));
240b8ba871bSPeter Wemm }
241b8ba871bSPeter Wemm
242b8ba871bSPeter Wemm /*
243b8ba871bSPeter Wemm * ex_subagain --
244b8ba871bSPeter Wemm * [line [,line]] & [cgr] [count] [#lp]]
245b8ba871bSPeter Wemm *
246b8ba871bSPeter Wemm * Substitute using the last substitute RE and replacement pattern.
247b8ba871bSPeter Wemm *
248c271fa92SBaptiste Daroussin * PUBLIC: int ex_subagain(SCR *, EXCMD *);
249b8ba871bSPeter Wemm */
250b8ba871bSPeter Wemm int
ex_subagain(SCR * sp,EXCMD * cmdp)251f0957ccaSPeter Wemm ex_subagain(SCR *sp, EXCMD *cmdp)
252b8ba871bSPeter Wemm {
253b8ba871bSPeter Wemm if (sp->subre == NULL) {
254b8ba871bSPeter Wemm ex_emsg(sp, NULL, EXM_NOPREVRE);
255b8ba871bSPeter Wemm return (1);
256b8ba871bSPeter Wemm }
257f0957ccaSPeter Wemm if (!F_ISSET(sp, SC_RE_SUBST) &&
258f0957ccaSPeter Wemm re_compile(sp, sp->subre, sp->subre_len,
259f0957ccaSPeter Wemm NULL, NULL, &sp->subre_c, RE_C_SUBST))
260b8ba871bSPeter Wemm return (1);
261b8ba871bSPeter Wemm return (s(sp,
262b8ba871bSPeter Wemm cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->subre_c, 0));
263b8ba871bSPeter Wemm }
264b8ba871bSPeter Wemm
265b8ba871bSPeter Wemm /*
266b8ba871bSPeter Wemm * ex_subtilde --
267b8ba871bSPeter Wemm * [line [,line]] ~ [cgr] [count] [#lp]]
268b8ba871bSPeter Wemm *
269b8ba871bSPeter Wemm * Substitute using the last RE and last substitute replacement pattern.
270b8ba871bSPeter Wemm *
271c271fa92SBaptiste Daroussin * PUBLIC: int ex_subtilde(SCR *, EXCMD *);
272b8ba871bSPeter Wemm */
273b8ba871bSPeter Wemm int
ex_subtilde(SCR * sp,EXCMD * cmdp)274f0957ccaSPeter Wemm ex_subtilde(SCR *sp, EXCMD *cmdp)
275b8ba871bSPeter Wemm {
276b8ba871bSPeter Wemm if (sp->re == NULL) {
277b8ba871bSPeter Wemm ex_emsg(sp, NULL, EXM_NOPREVRE);
278b8ba871bSPeter Wemm return (1);
279b8ba871bSPeter Wemm }
280f0957ccaSPeter Wemm if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp, sp->re,
281f0957ccaSPeter Wemm sp->re_len, NULL, NULL, &sp->re_c, RE_C_SEARCH))
282b8ba871bSPeter Wemm return (1);
283b8ba871bSPeter Wemm return (s(sp,
284b8ba871bSPeter Wemm cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->re_c, 0));
285b8ba871bSPeter Wemm }
286b8ba871bSPeter Wemm
287b8ba871bSPeter Wemm /*
288b8ba871bSPeter Wemm * s --
289b8ba871bSPeter Wemm * Do the substitution. This stuff is *really* tricky. There are lots of
290b8ba871bSPeter Wemm * special cases, and general nastiness. Don't mess with it unless you're
291b8ba871bSPeter Wemm * pretty confident.
292b8ba871bSPeter Wemm *
293b8ba871bSPeter Wemm * The nasty part of the substitution is what happens when the replacement
294b8ba871bSPeter Wemm * string contains newlines. It's a bit tricky -- consider the information
295b8ba871bSPeter Wemm * that has to be retained for "s/f\(o\)o/^M\1^M\1/". The solution here is
296b8ba871bSPeter Wemm * to build a set of newline offsets which we use to break the line up later,
297b8ba871bSPeter Wemm * when the replacement is done. Don't change it unless you're *damned*
298b8ba871bSPeter Wemm * confident.
299b8ba871bSPeter Wemm */
300755cc40cSBaptiste Daroussin #define NEEDNEWLINE(sp) do { \
301b8ba871bSPeter Wemm if (sp->newl_len == sp->newl_cnt) { \
302b8ba871bSPeter Wemm sp->newl_len += 25; \
303b8ba871bSPeter Wemm REALLOC(sp, sp->newl, size_t *, \
304b8ba871bSPeter Wemm sp->newl_len * sizeof(size_t)); \
305b8ba871bSPeter Wemm if (sp->newl == NULL) { \
306b8ba871bSPeter Wemm sp->newl_len = 0; \
307b8ba871bSPeter Wemm return (1); \
308b8ba871bSPeter Wemm } \
309b8ba871bSPeter Wemm } \
310755cc40cSBaptiste Daroussin } while (0)
311b8ba871bSPeter Wemm
312755cc40cSBaptiste Daroussin #define BUILD(sp, l, len) do { \
313b8ba871bSPeter Wemm if (lbclen + (len) > lblen) { \
314f0957ccaSPeter Wemm lblen = p2roundup(MAX(lbclen + (len), 256)); \
315f0957ccaSPeter Wemm REALLOC(sp, lb, CHAR_T *, lblen * sizeof(CHAR_T)); \
316b8ba871bSPeter Wemm if (lb == NULL) { \
317b8ba871bSPeter Wemm lbclen = 0; \
318b8ba871bSPeter Wemm return (1); \
319b8ba871bSPeter Wemm } \
320b8ba871bSPeter Wemm } \
321f0957ccaSPeter Wemm MEMCPY(lb + lbclen, l, len); \
322b8ba871bSPeter Wemm lbclen += len; \
323755cc40cSBaptiste Daroussin } while (0)
324b8ba871bSPeter Wemm
325755cc40cSBaptiste Daroussin #define NEEDSP(sp, len, pnt) do { \
326b8ba871bSPeter Wemm if (lbclen + (len) > lblen) { \
327f0957ccaSPeter Wemm lblen = p2roundup(MAX(lbclen + (len), 256)); \
328f0957ccaSPeter Wemm REALLOC(sp, lb, CHAR_T *, lblen * sizeof(CHAR_T)); \
329b8ba871bSPeter Wemm if (lb == NULL) { \
330b8ba871bSPeter Wemm lbclen = 0; \
331b8ba871bSPeter Wemm return (1); \
332b8ba871bSPeter Wemm } \
333b8ba871bSPeter Wemm pnt = lb + lbclen; \
334b8ba871bSPeter Wemm } \
335755cc40cSBaptiste Daroussin } while (0)
336b8ba871bSPeter Wemm
337b8ba871bSPeter Wemm static int
s(SCR * sp,EXCMD * cmdp,CHAR_T * s,regex_t * re,u_int flags)338f0957ccaSPeter Wemm s(SCR *sp, EXCMD *cmdp, CHAR_T *s, regex_t *re, u_int flags)
339b8ba871bSPeter Wemm {
340b8ba871bSPeter Wemm EVENT ev;
341b8ba871bSPeter Wemm MARK from, to;
342f0957ccaSPeter Wemm TEXTH tiq[] = {{ 0 }};
343b8ba871bSPeter Wemm recno_t elno, lno, slno;
344f0957ccaSPeter Wemm u_long ul;
345b8ba871bSPeter Wemm regmatch_t match[10];
346b8ba871bSPeter Wemm size_t blen, cnt, last, lbclen, lblen, len, llen;
347b8ba871bSPeter Wemm size_t offset, saved_offset, scno;
348b8ba871bSPeter Wemm int cflag, lflag, nflag, pflag, rflag;
349b8ba871bSPeter Wemm int didsub, do_eol_match, eflags, empty_ok, eval;
350b8ba871bSPeter Wemm int linechanged, matched, quit, rval;
351f0957ccaSPeter Wemm CHAR_T *bp, *lb;
352f0957ccaSPeter Wemm enum nresult nret;
353b8ba871bSPeter Wemm
354b8ba871bSPeter Wemm NEEDFILE(sp, cmdp);
355b8ba871bSPeter Wemm
356b8ba871bSPeter Wemm slno = sp->lno;
357b8ba871bSPeter Wemm scno = sp->cno;
358b8ba871bSPeter Wemm
359b8ba871bSPeter Wemm /*
360b8ba871bSPeter Wemm * !!!
361b8ba871bSPeter Wemm * Historically, the 'g' and 'c' suffices were always toggled as flags,
362b8ba871bSPeter Wemm * so ":s/A/B/" was the same as ":s/A/B/ccgg". If O_EDCOMPATIBLE was
363b8ba871bSPeter Wemm * not set, they were initialized to 0 for all substitute commands. If
364b8ba871bSPeter Wemm * O_EDCOMPATIBLE was set, they were initialized to 0 only if the user
365b8ba871bSPeter Wemm * specified substitute/replacement patterns (see ex_s()).
366b8ba871bSPeter Wemm */
367b8ba871bSPeter Wemm if (!O_ISSET(sp, O_EDCOMPATIBLE))
368b8ba871bSPeter Wemm sp->c_suffix = sp->g_suffix = 0;
369b8ba871bSPeter Wemm
370b8ba871bSPeter Wemm /*
371b8ba871bSPeter Wemm * Historic vi permitted the '#', 'l' and 'p' options in vi mode, but
372b8ba871bSPeter Wemm * it only displayed the last change. I'd disallow them, but they are
373b8ba871bSPeter Wemm * useful in combination with the [v]global commands. In the current
374b8ba871bSPeter Wemm * model the problem is combining them with the 'c' flag -- the screen
375b8ba871bSPeter Wemm * would have to flip back and forth between the confirm screen and the
376b8ba871bSPeter Wemm * ex print screen, which would be pretty awful. We do display all
377b8ba871bSPeter Wemm * changes, though, for what that's worth.
378b8ba871bSPeter Wemm *
379b8ba871bSPeter Wemm * !!!
380b8ba871bSPeter Wemm * Historic vi was fairly strict about the order of "options", the
381b8ba871bSPeter Wemm * count, and "flags". I'm somewhat fuzzy on the difference between
382b8ba871bSPeter Wemm * options and flags, anyway, so this is a simpler approach, and we
383b8ba871bSPeter Wemm * just take it them in whatever order the user gives them. (The ex
384b8ba871bSPeter Wemm * usage statement doesn't reflect this.)
385b8ba871bSPeter Wemm */
386b8ba871bSPeter Wemm cflag = lflag = nflag = pflag = rflag = 0;
387b8ba871bSPeter Wemm if (s == NULL)
388b8ba871bSPeter Wemm goto noargs;
389b8ba871bSPeter Wemm for (lno = OOBLNO; *s != '\0'; ++s)
390b8ba871bSPeter Wemm switch (*s) {
391b8ba871bSPeter Wemm case ' ':
392b8ba871bSPeter Wemm case '\t':
393b8ba871bSPeter Wemm continue;
394b8ba871bSPeter Wemm case '+':
395b8ba871bSPeter Wemm ++cmdp->flagoff;
396b8ba871bSPeter Wemm break;
397b8ba871bSPeter Wemm case '-':
398b8ba871bSPeter Wemm --cmdp->flagoff;
399b8ba871bSPeter Wemm break;
400b8ba871bSPeter Wemm case '0': case '1': case '2': case '3': case '4':
401b8ba871bSPeter Wemm case '5': case '6': case '7': case '8': case '9':
402b8ba871bSPeter Wemm if (lno != OOBLNO)
403b8ba871bSPeter Wemm goto usage;
404b8ba871bSPeter Wemm errno = 0;
405f0957ccaSPeter Wemm nret = nget_uslong(&ul, s, &s, 10);
406f0957ccaSPeter Wemm lno = ul;
407b8ba871bSPeter Wemm if (*s == '\0') /* Loop increment correction. */
408b8ba871bSPeter Wemm --s;
409f0957ccaSPeter Wemm if (nret != NUM_OK) {
410f0957ccaSPeter Wemm if (nret == NUM_OVER)
411b8ba871bSPeter Wemm msgq(sp, M_ERR, "153|Count overflow");
412f0957ccaSPeter Wemm else if (nret == NUM_UNDER)
413b8ba871bSPeter Wemm msgq(sp, M_ERR, "154|Count underflow");
414b8ba871bSPeter Wemm else
415b8ba871bSPeter Wemm msgq(sp, M_SYSERR, NULL);
416b8ba871bSPeter Wemm return (1);
417b8ba871bSPeter Wemm }
418b8ba871bSPeter Wemm /*
419b8ba871bSPeter Wemm * In historic vi, the count was inclusive from the
420b8ba871bSPeter Wemm * second address.
421b8ba871bSPeter Wemm */
422b8ba871bSPeter Wemm cmdp->addr1.lno = cmdp->addr2.lno;
423b8ba871bSPeter Wemm cmdp->addr2.lno += lno - 1;
424b8ba871bSPeter Wemm if (!db_exist(sp, cmdp->addr2.lno) &&
425b8ba871bSPeter Wemm db_last(sp, &cmdp->addr2.lno))
426b8ba871bSPeter Wemm return (1);
427b8ba871bSPeter Wemm break;
428b8ba871bSPeter Wemm case '#':
429b8ba871bSPeter Wemm nflag = 1;
430b8ba871bSPeter Wemm break;
431b8ba871bSPeter Wemm case 'c':
432b8ba871bSPeter Wemm sp->c_suffix = !sp->c_suffix;
433b8ba871bSPeter Wemm
434b8ba871bSPeter Wemm /* Ex text structure initialization. */
435f0957ccaSPeter Wemm if (F_ISSET(sp, SC_EX))
436f0957ccaSPeter Wemm TAILQ_INIT(tiq);
437b8ba871bSPeter Wemm break;
438b8ba871bSPeter Wemm case 'g':
439b8ba871bSPeter Wemm sp->g_suffix = !sp->g_suffix;
440b8ba871bSPeter Wemm break;
441b8ba871bSPeter Wemm case 'l':
442b8ba871bSPeter Wemm lflag = 1;
443b8ba871bSPeter Wemm break;
444b8ba871bSPeter Wemm case 'p':
445b8ba871bSPeter Wemm pflag = 1;
446b8ba871bSPeter Wemm break;
447b8ba871bSPeter Wemm case 'r':
448b8ba871bSPeter Wemm if (LF_ISSET(SUB_FIRST)) {
449b8ba871bSPeter Wemm msgq(sp, M_ERR,
450b8ba871bSPeter Wemm "155|Regular expression specified; r flag meaningless");
451b8ba871bSPeter Wemm return (1);
452b8ba871bSPeter Wemm }
453b8ba871bSPeter Wemm if (!F_ISSET(sp, SC_RE_SEARCH)) {
454b8ba871bSPeter Wemm ex_emsg(sp, NULL, EXM_NOPREVRE);
455b8ba871bSPeter Wemm return (1);
456b8ba871bSPeter Wemm }
457b8ba871bSPeter Wemm rflag = 1;
458b8ba871bSPeter Wemm re = &sp->re_c;
459b8ba871bSPeter Wemm break;
460b8ba871bSPeter Wemm default:
461b8ba871bSPeter Wemm goto usage;
462b8ba871bSPeter Wemm }
463b8ba871bSPeter Wemm
464f0957ccaSPeter Wemm if (*s != '\0' || (!rflag && LF_ISSET(SUB_MUSTSETR))) {
465b8ba871bSPeter Wemm usage: ex_emsg(sp, cmdp->cmd->usage, EXM_USAGE);
466b8ba871bSPeter Wemm return (1);
467b8ba871bSPeter Wemm }
468b8ba871bSPeter Wemm
469b8ba871bSPeter Wemm noargs: if (F_ISSET(sp, SC_VI) && sp->c_suffix && (lflag || nflag || pflag)) {
470b8ba871bSPeter Wemm msgq(sp, M_ERR,
471b8ba871bSPeter Wemm "156|The #, l and p flags may not be combined with the c flag in vi mode");
472b8ba871bSPeter Wemm return (1);
473b8ba871bSPeter Wemm }
474b8ba871bSPeter Wemm
475b8ba871bSPeter Wemm /*
476b8ba871bSPeter Wemm * bp: if interactive, line cache
477b8ba871bSPeter Wemm * blen: if interactive, line cache length
478b8ba871bSPeter Wemm * lb: build buffer pointer.
479b8ba871bSPeter Wemm * lbclen: current length of built buffer.
480b8ba871bSPeter Wemm * lblen; length of build buffer.
481b8ba871bSPeter Wemm */
482b8ba871bSPeter Wemm bp = lb = NULL;
483b8ba871bSPeter Wemm blen = lbclen = lblen = 0;
484b8ba871bSPeter Wemm
485b8ba871bSPeter Wemm /* For each line... */
486f0957ccaSPeter Wemm lno = cmdp->addr1.lno == 0 ? 1 : cmdp->addr1.lno;
487f0957ccaSPeter Wemm for (matched = quit = 0,
488b8ba871bSPeter Wemm elno = cmdp->addr2.lno; !quit && lno <= elno; ++lno) {
489b8ba871bSPeter Wemm
490b8ba871bSPeter Wemm /* Someone's unhappy, time to stop. */
491b8ba871bSPeter Wemm if (INTERRUPTED(sp))
492b8ba871bSPeter Wemm break;
493b8ba871bSPeter Wemm
494b8ba871bSPeter Wemm /* Get the line. */
495b8ba871bSPeter Wemm if (db_get(sp, lno, DBG_FATAL, &s, &llen))
496b8ba871bSPeter Wemm goto err;
497b8ba871bSPeter Wemm
498b8ba871bSPeter Wemm /*
499b8ba871bSPeter Wemm * Make a local copy if doing confirmation -- when calling
500b8ba871bSPeter Wemm * the confirm routine we're likely to lose the cached copy.
501b8ba871bSPeter Wemm */
502b8ba871bSPeter Wemm if (sp->c_suffix) {
503b8ba871bSPeter Wemm if (bp == NULL) {
504f0957ccaSPeter Wemm GET_SPACE_RETW(sp, bp, blen, llen);
505b8ba871bSPeter Wemm } else
506f0957ccaSPeter Wemm ADD_SPACE_RETW(sp, bp, blen, llen);
507f0957ccaSPeter Wemm MEMCPY(bp, s, llen);
508b8ba871bSPeter Wemm s = bp;
509b8ba871bSPeter Wemm }
510b8ba871bSPeter Wemm
511b8ba871bSPeter Wemm /* Start searching from the beginning. */
512b8ba871bSPeter Wemm offset = 0;
513b8ba871bSPeter Wemm len = llen;
514b8ba871bSPeter Wemm
515b8ba871bSPeter Wemm /* Reset the build buffer offset. */
516b8ba871bSPeter Wemm lbclen = 0;
517b8ba871bSPeter Wemm
518b8ba871bSPeter Wemm /* Reset empty match flag. */
519b8ba871bSPeter Wemm empty_ok = 1;
520b8ba871bSPeter Wemm
521b8ba871bSPeter Wemm /*
522b8ba871bSPeter Wemm * We don't want to have to do a setline if the line didn't
523b8ba871bSPeter Wemm * change -- keep track of whether or not this line changed.
524b8ba871bSPeter Wemm * If doing confirmations, don't want to keep setting the
525b8ba871bSPeter Wemm * line if change is refused -- keep track of substitutions.
526b8ba871bSPeter Wemm */
527b8ba871bSPeter Wemm didsub = linechanged = 0;
528b8ba871bSPeter Wemm
529b8ba871bSPeter Wemm /* New line, do an EOL match. */
530b8ba871bSPeter Wemm do_eol_match = 1;
531b8ba871bSPeter Wemm
532b8ba871bSPeter Wemm /* It's not nul terminated, but we pretend it is. */
533b8ba871bSPeter Wemm eflags = REG_STARTEND;
534b8ba871bSPeter Wemm
535b8ba871bSPeter Wemm /*
536b8ba871bSPeter Wemm * The search area is from s + offset to the EOL.
537b8ba871bSPeter Wemm *
538b8ba871bSPeter Wemm * Generally, match[0].rm_so is the offset of the start
539b8ba871bSPeter Wemm * of the match from the start of the search, and offset
540b8ba871bSPeter Wemm * is the offset of the start of the last search.
541b8ba871bSPeter Wemm */
542b8ba871bSPeter Wemm nextmatch: match[0].rm_so = 0;
543b8ba871bSPeter Wemm match[0].rm_eo = len;
544b8ba871bSPeter Wemm
545b8ba871bSPeter Wemm /* Get the next match. */
546f0957ccaSPeter Wemm eval = regexec(re, s + offset, 10, match, eflags);
547b8ba871bSPeter Wemm
548b8ba871bSPeter Wemm /*
549b8ba871bSPeter Wemm * There wasn't a match or if there was an error, deal with
550b8ba871bSPeter Wemm * it. If there was a previous match in this line, resolve
551b8ba871bSPeter Wemm * the changes into the database. Otherwise, just move on.
552b8ba871bSPeter Wemm */
553b8ba871bSPeter Wemm if (eval == REG_NOMATCH)
554b8ba871bSPeter Wemm goto endmatch;
555b8ba871bSPeter Wemm if (eval != 0) {
556b8ba871bSPeter Wemm re_error(sp, eval, re);
557b8ba871bSPeter Wemm goto err;
558b8ba871bSPeter Wemm }
559b8ba871bSPeter Wemm matched = 1;
560b8ba871bSPeter Wemm
561b8ba871bSPeter Wemm /* Only the first search can match an anchored expression. */
562b8ba871bSPeter Wemm eflags |= REG_NOTBOL;
563b8ba871bSPeter Wemm
564b8ba871bSPeter Wemm /*
565b8ba871bSPeter Wemm * !!!
566b8ba871bSPeter Wemm * It's possible to match 0-length strings -- for example, the
567b8ba871bSPeter Wemm * command s;a*;X;, when matched against the string "aabb" will
568b8ba871bSPeter Wemm * result in "XbXbX", i.e. the matches are "aa", the space
569b8ba871bSPeter Wemm * between the b's and the space between the b's and the end of
570b8ba871bSPeter Wemm * the string. There is a similar space between the beginning
571b8ba871bSPeter Wemm * of the string and the a's. The rule that we use (because vi
572b8ba871bSPeter Wemm * historically used it) is that any 0-length match, occurring
573b8ba871bSPeter Wemm * immediately after a match, is ignored. Otherwise, the above
574b8ba871bSPeter Wemm * example would have resulted in "XXbXbX". Another example is
575b8ba871bSPeter Wemm * incorrectly using " *" to replace groups of spaces with one
576b8ba871bSPeter Wemm * space.
577b8ba871bSPeter Wemm *
578b8ba871bSPeter Wemm * The way we do this is that if we just had a successful match,
579b8ba871bSPeter Wemm * the starting offset does not skip characters, and the match
580b8ba871bSPeter Wemm * is empty, ignore the match and move forward. If there's no
581b8ba871bSPeter Wemm * more characters in the string, we were attempting to match
582b8ba871bSPeter Wemm * after the last character, so quit.
583b8ba871bSPeter Wemm */
584b8ba871bSPeter Wemm if (!empty_ok && match[0].rm_so == 0 && match[0].rm_eo == 0) {
585b8ba871bSPeter Wemm empty_ok = 1;
586b8ba871bSPeter Wemm if (len == 0)
587b8ba871bSPeter Wemm goto endmatch;
588755cc40cSBaptiste Daroussin BUILD(sp, s + offset, 1);
589b8ba871bSPeter Wemm ++offset;
590b8ba871bSPeter Wemm --len;
591b8ba871bSPeter Wemm goto nextmatch;
592b8ba871bSPeter Wemm }
593b8ba871bSPeter Wemm
594b8ba871bSPeter Wemm /* Confirm change. */
595b8ba871bSPeter Wemm if (sp->c_suffix) {
596b8ba871bSPeter Wemm /*
597b8ba871bSPeter Wemm * Set the cursor position for confirmation. Note,
598b8ba871bSPeter Wemm * if we matched on a '$', the cursor may be past
599b8ba871bSPeter Wemm * the end of line.
600b8ba871bSPeter Wemm */
601b8ba871bSPeter Wemm from.lno = to.lno = lno;
602b8ba871bSPeter Wemm from.cno = match[0].rm_so + offset;
603b8ba871bSPeter Wemm to.cno = match[0].rm_eo + offset;
604b8ba871bSPeter Wemm /*
605b8ba871bSPeter Wemm * Both ex and vi have to correct for a change before
606b8ba871bSPeter Wemm * the first character in the line.
607b8ba871bSPeter Wemm */
608b8ba871bSPeter Wemm if (llen == 0)
609b8ba871bSPeter Wemm from.cno = to.cno = 0;
610b8ba871bSPeter Wemm if (F_ISSET(sp, SC_VI)) {
611b8ba871bSPeter Wemm /*
612b8ba871bSPeter Wemm * Only vi has to correct for a change after
613b8ba871bSPeter Wemm * the last character in the line.
614b8ba871bSPeter Wemm *
615b8ba871bSPeter Wemm * XXX
616b8ba871bSPeter Wemm * It would be nice to change the vi code so
617b8ba871bSPeter Wemm * that we could display a cursor past EOL.
618b8ba871bSPeter Wemm */
619b8ba871bSPeter Wemm if (to.cno >= llen)
620b8ba871bSPeter Wemm to.cno = llen - 1;
621b8ba871bSPeter Wemm if (from.cno >= llen)
622b8ba871bSPeter Wemm from.cno = llen - 1;
623b8ba871bSPeter Wemm
624b8ba871bSPeter Wemm sp->lno = from.lno;
625b8ba871bSPeter Wemm sp->cno = from.cno;
626b8ba871bSPeter Wemm if (vs_refresh(sp, 1))
627b8ba871bSPeter Wemm goto err;
628b8ba871bSPeter Wemm
629b8ba871bSPeter Wemm vs_update(sp, msg_cat(sp,
630b8ba871bSPeter Wemm "169|Confirm change? [n]", NULL), NULL);
631b8ba871bSPeter Wemm
632b8ba871bSPeter Wemm if (v_event_get(sp, &ev, 0, 0))
633b8ba871bSPeter Wemm goto err;
634b8ba871bSPeter Wemm switch (ev.e_event) {
635b8ba871bSPeter Wemm case E_CHARACTER:
636b8ba871bSPeter Wemm break;
637b8ba871bSPeter Wemm case E_EOF:
638b8ba871bSPeter Wemm case E_ERR:
639b8ba871bSPeter Wemm case E_INTERRUPT:
640b8ba871bSPeter Wemm goto lquit;
641b8ba871bSPeter Wemm default:
642b8ba871bSPeter Wemm v_event_err(sp, &ev);
643b8ba871bSPeter Wemm goto lquit;
644b8ba871bSPeter Wemm }
645b8ba871bSPeter Wemm } else {
646*0fcececbSBaptiste Daroussin const int flags =
647*0fcececbSBaptiste Daroussin O_ISSET(sp, O_NUMBER) ? E_C_HASH : 0;
648*0fcececbSBaptiste Daroussin if (ex_print(sp, cmdp, &from, &to, flags) ||
649b8ba871bSPeter Wemm ex_scprint(sp, &from, &to))
650b8ba871bSPeter Wemm goto lquit;
651f0957ccaSPeter Wemm if (ex_txt(sp, tiq, 0, TXT_CR))
652b8ba871bSPeter Wemm goto err;
653f0957ccaSPeter Wemm ev.e_c = TAILQ_FIRST(tiq)->lb[0];
654b8ba871bSPeter Wemm }
655b8ba871bSPeter Wemm
656b8ba871bSPeter Wemm switch (ev.e_c) {
657b8ba871bSPeter Wemm case CH_YES:
658b8ba871bSPeter Wemm break;
659b8ba871bSPeter Wemm default:
660b8ba871bSPeter Wemm case CH_NO:
661b8ba871bSPeter Wemm didsub = 0;
662b8ba871bSPeter Wemm BUILD(sp, s +offset, match[0].rm_eo);
663b8ba871bSPeter Wemm goto skip;
664b8ba871bSPeter Wemm case CH_QUIT:
665b8ba871bSPeter Wemm /* Set the quit/interrupted flags. */
666b8ba871bSPeter Wemm lquit: quit = 1;
667b8ba871bSPeter Wemm F_SET(sp->gp, G_INTERRUPTED);
668b8ba871bSPeter Wemm
669b8ba871bSPeter Wemm /*
670b8ba871bSPeter Wemm * Resolve any changes, then return to (and
671b8ba871bSPeter Wemm * exit from) the main loop.
672b8ba871bSPeter Wemm */
673b8ba871bSPeter Wemm goto endmatch;
674b8ba871bSPeter Wemm }
675b8ba871bSPeter Wemm }
676b8ba871bSPeter Wemm
677b8ba871bSPeter Wemm /*
678b8ba871bSPeter Wemm * Set the cursor to the last position changed, converting
679b8ba871bSPeter Wemm * from 1-based to 0-based.
680b8ba871bSPeter Wemm */
681b8ba871bSPeter Wemm sp->lno = lno;
682b8ba871bSPeter Wemm sp->cno = match[0].rm_so;
683b8ba871bSPeter Wemm
684b8ba871bSPeter Wemm /* Copy the bytes before the match into the build buffer. */
685b8ba871bSPeter Wemm BUILD(sp, s + offset, match[0].rm_so);
686b8ba871bSPeter Wemm
687b8ba871bSPeter Wemm /* Substitute the matching bytes. */
688b8ba871bSPeter Wemm didsub = 1;
689b8ba871bSPeter Wemm if (re_sub(sp, s + offset, &lb, &lbclen, &lblen, match))
690b8ba871bSPeter Wemm goto err;
691b8ba871bSPeter Wemm
692b8ba871bSPeter Wemm /* Set the change flag so we know this line was modified. */
693b8ba871bSPeter Wemm linechanged = 1;
694b8ba871bSPeter Wemm
695b8ba871bSPeter Wemm /* Move past the matched bytes. */
696b8ba871bSPeter Wemm skip: offset += match[0].rm_eo;
697b8ba871bSPeter Wemm len -= match[0].rm_eo;
698b8ba871bSPeter Wemm
699b8ba871bSPeter Wemm /* A match cannot be followed by an empty pattern. */
700b8ba871bSPeter Wemm empty_ok = 0;
701b8ba871bSPeter Wemm
702b8ba871bSPeter Wemm /*
703b8ba871bSPeter Wemm * If doing a global change with confirmation, we have to
704b8ba871bSPeter Wemm * update the screen. The basic idea is to store the line
705b8ba871bSPeter Wemm * so the screen update routines can find it, and restart.
706b8ba871bSPeter Wemm */
707b8ba871bSPeter Wemm if (didsub && sp->c_suffix && sp->g_suffix) {
708b8ba871bSPeter Wemm /*
709b8ba871bSPeter Wemm * The new search offset will be the end of the
710b8ba871bSPeter Wemm * modified line.
711b8ba871bSPeter Wemm */
712b8ba871bSPeter Wemm saved_offset = lbclen;
713b8ba871bSPeter Wemm
714b8ba871bSPeter Wemm /* Copy the rest of the line. */
715b8ba871bSPeter Wemm if (len)
716755cc40cSBaptiste Daroussin BUILD(sp, s + offset, len);
717b8ba871bSPeter Wemm
718b8ba871bSPeter Wemm /* Set the new offset. */
719b8ba871bSPeter Wemm offset = saved_offset;
720b8ba871bSPeter Wemm
721b8ba871bSPeter Wemm /* Store inserted lines, adjusting the build buffer. */
722b8ba871bSPeter Wemm last = 0;
723b8ba871bSPeter Wemm if (sp->newl_cnt) {
724b8ba871bSPeter Wemm for (cnt = 0;
725b8ba871bSPeter Wemm cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
726b8ba871bSPeter Wemm if (db_insert(sp, lno,
727b8ba871bSPeter Wemm lb + last, sp->newl[cnt] - last))
728b8ba871bSPeter Wemm goto err;
729b8ba871bSPeter Wemm last = sp->newl[cnt] + 1;
730b8ba871bSPeter Wemm ++sp->rptlines[L_ADDED];
731b8ba871bSPeter Wemm }
732b8ba871bSPeter Wemm lbclen -= last;
733b8ba871bSPeter Wemm offset -= last;
734b8ba871bSPeter Wemm sp->newl_cnt = 0;
735b8ba871bSPeter Wemm }
736b8ba871bSPeter Wemm
737b8ba871bSPeter Wemm /* Store and retrieve the line. */
738b8ba871bSPeter Wemm if (db_set(sp, lno, lb + last, lbclen))
739b8ba871bSPeter Wemm goto err;
740b8ba871bSPeter Wemm if (db_get(sp, lno, DBG_FATAL, &s, &llen))
741b8ba871bSPeter Wemm goto err;
742755cc40cSBaptiste Daroussin ADD_SPACE_RETW(sp, bp, blen, llen);
743f0957ccaSPeter Wemm MEMCPY(bp, s, llen);
744b8ba871bSPeter Wemm s = bp;
745b8ba871bSPeter Wemm len = llen - offset;
746b8ba871bSPeter Wemm
747b8ba871bSPeter Wemm /* Restart the build. */
748b8ba871bSPeter Wemm lbclen = 0;
749b8ba871bSPeter Wemm BUILD(sp, s, offset);
750b8ba871bSPeter Wemm
751b8ba871bSPeter Wemm /*
752b8ba871bSPeter Wemm * If we haven't already done the after-the-string
753b8ba871bSPeter Wemm * match, do one. Set REG_NOTEOL so the '$' pattern
754b8ba871bSPeter Wemm * only matches once.
755b8ba871bSPeter Wemm */
756b8ba871bSPeter Wemm if (!do_eol_match)
757b8ba871bSPeter Wemm goto endmatch;
758b8ba871bSPeter Wemm if (offset == len) {
759b8ba871bSPeter Wemm do_eol_match = 0;
760b8ba871bSPeter Wemm eflags |= REG_NOTEOL;
761b8ba871bSPeter Wemm }
762b8ba871bSPeter Wemm goto nextmatch;
763b8ba871bSPeter Wemm }
764b8ba871bSPeter Wemm
765b8ba871bSPeter Wemm /*
766b8ba871bSPeter Wemm * If it's a global:
767b8ba871bSPeter Wemm *
768b8ba871bSPeter Wemm * If at the end of the string, do a test for the after
769b8ba871bSPeter Wemm * the string match. Set REG_NOTEOL so the '$' pattern
770b8ba871bSPeter Wemm * only matches once.
771b8ba871bSPeter Wemm */
772b8ba871bSPeter Wemm if (sp->g_suffix && do_eol_match) {
773b8ba871bSPeter Wemm if (len == 0) {
774b8ba871bSPeter Wemm do_eol_match = 0;
775b8ba871bSPeter Wemm eflags |= REG_NOTEOL;
776b8ba871bSPeter Wemm }
777b8ba871bSPeter Wemm goto nextmatch;
778b8ba871bSPeter Wemm }
779b8ba871bSPeter Wemm
780b8ba871bSPeter Wemm endmatch: if (!linechanged)
781b8ba871bSPeter Wemm continue;
782b8ba871bSPeter Wemm
783b8ba871bSPeter Wemm /* Copy any remaining bytes into the build buffer. */
784b8ba871bSPeter Wemm if (len)
785755cc40cSBaptiste Daroussin BUILD(sp, s + offset, len);
786b8ba871bSPeter Wemm
787b8ba871bSPeter Wemm /* Store inserted lines, adjusting the build buffer. */
788b8ba871bSPeter Wemm last = 0;
789b8ba871bSPeter Wemm if (sp->newl_cnt) {
790b8ba871bSPeter Wemm for (cnt = 0;
791b8ba871bSPeter Wemm cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
792b8ba871bSPeter Wemm if (db_insert(sp,
793b8ba871bSPeter Wemm lno, lb + last, sp->newl[cnt] - last))
794b8ba871bSPeter Wemm goto err;
795b8ba871bSPeter Wemm last = sp->newl[cnt] + 1;
796b8ba871bSPeter Wemm ++sp->rptlines[L_ADDED];
797b8ba871bSPeter Wemm }
798b8ba871bSPeter Wemm lbclen -= last;
799b8ba871bSPeter Wemm sp->newl_cnt = 0;
800b8ba871bSPeter Wemm }
801b8ba871bSPeter Wemm
802b8ba871bSPeter Wemm /* Store the changed line. */
803b8ba871bSPeter Wemm if (db_set(sp, lno, lb + last, lbclen))
804b8ba871bSPeter Wemm goto err;
805b8ba871bSPeter Wemm
806b8ba871bSPeter Wemm /* Update changed line counter. */
807b8ba871bSPeter Wemm if (sp->rptlchange != lno) {
808b8ba871bSPeter Wemm sp->rptlchange = lno;
809b8ba871bSPeter Wemm ++sp->rptlines[L_CHANGED];
810b8ba871bSPeter Wemm }
811b8ba871bSPeter Wemm
812b8ba871bSPeter Wemm /*
813b8ba871bSPeter Wemm * !!!
814b8ba871bSPeter Wemm * Display as necessary. Historic practice is to only
815b8ba871bSPeter Wemm * display the last line of a line split into multiple
816b8ba871bSPeter Wemm * lines.
817b8ba871bSPeter Wemm */
818b8ba871bSPeter Wemm if (lflag || nflag || pflag) {
819b8ba871bSPeter Wemm from.lno = to.lno = lno;
820b8ba871bSPeter Wemm from.cno = to.cno = 0;
821b8ba871bSPeter Wemm if (lflag)
822b8ba871bSPeter Wemm (void)ex_print(sp, cmdp, &from, &to, E_C_LIST);
823b8ba871bSPeter Wemm if (nflag)
824b8ba871bSPeter Wemm (void)ex_print(sp, cmdp, &from, &to, E_C_HASH);
825b8ba871bSPeter Wemm if (pflag)
826b8ba871bSPeter Wemm (void)ex_print(sp, cmdp, &from, &to, E_C_PRINT);
827b8ba871bSPeter Wemm }
828b8ba871bSPeter Wemm }
829b8ba871bSPeter Wemm
830b8ba871bSPeter Wemm /*
831b8ba871bSPeter Wemm * !!!
832b8ba871bSPeter Wemm * Historically, vi attempted to leave the cursor at the same place if
833b8ba871bSPeter Wemm * the substitution was done at the current cursor position. Otherwise
834b8ba871bSPeter Wemm * it moved it to the first non-blank of the last line changed. There
835b8ba871bSPeter Wemm * were some problems: for example, :s/$/foo/ with the cursor on the
836b8ba871bSPeter Wemm * last character of the line left the cursor on the last character, or
837b8ba871bSPeter Wemm * the & command with multiple occurrences of the matching string in the
838b8ba871bSPeter Wemm * line usually left the cursor in a fairly random position.
839b8ba871bSPeter Wemm *
840b8ba871bSPeter Wemm * We try to do the same thing, with the exception that if the user is
841b8ba871bSPeter Wemm * doing substitution with confirmation, we move to the last line about
842b8ba871bSPeter Wemm * which the user was consulted, as opposed to the last line that they
843b8ba871bSPeter Wemm * actually changed. This prevents a screen flash if the user doesn't
844b8ba871bSPeter Wemm * change many of the possible lines.
845b8ba871bSPeter Wemm */
846b8ba871bSPeter Wemm if (!sp->c_suffix && (sp->lno != slno || sp->cno != scno)) {
847b8ba871bSPeter Wemm sp->cno = 0;
848b8ba871bSPeter Wemm (void)nonblank(sp, sp->lno, &sp->cno);
849b8ba871bSPeter Wemm }
850b8ba871bSPeter Wemm
851b8ba871bSPeter Wemm /*
852b8ba871bSPeter Wemm * If not in a global command, and nothing matched, say so.
853b8ba871bSPeter Wemm * Else, if none of the lines displayed, put something up.
854b8ba871bSPeter Wemm */
855b8ba871bSPeter Wemm rval = 0;
856b8ba871bSPeter Wemm if (!matched) {
857b8ba871bSPeter Wemm if (!F_ISSET(sp, SC_EX_GLOBAL)) {
858b8ba871bSPeter Wemm msgq(sp, M_ERR, "157|No match found");
859b8ba871bSPeter Wemm goto err;
860b8ba871bSPeter Wemm }
861b8ba871bSPeter Wemm } else if (!lflag && !nflag && !pflag)
862b8ba871bSPeter Wemm F_SET(cmdp, E_AUTOPRINT);
863b8ba871bSPeter Wemm
864b8ba871bSPeter Wemm if (0) {
865b8ba871bSPeter Wemm err: rval = 1;
866b8ba871bSPeter Wemm }
867b8ba871bSPeter Wemm
868b8ba871bSPeter Wemm if (bp != NULL)
869f0957ccaSPeter Wemm FREE_SPACEW(sp, bp, blen);
870b8ba871bSPeter Wemm free(lb);
871b8ba871bSPeter Wemm return (rval);
872b8ba871bSPeter Wemm }
873b8ba871bSPeter Wemm
874b8ba871bSPeter Wemm /*
875b8ba871bSPeter Wemm * re_compile --
876b8ba871bSPeter Wemm * Compile the RE.
877b8ba871bSPeter Wemm *
878c271fa92SBaptiste Daroussin * PUBLIC: int re_compile(SCR *,
879c271fa92SBaptiste Daroussin * PUBLIC: CHAR_T *, size_t, CHAR_T **, size_t *, regex_t *, u_int);
880b8ba871bSPeter Wemm */
881b8ba871bSPeter Wemm int
re_compile(SCR * sp,CHAR_T * ptrn,size_t plen,CHAR_T ** ptrnp,size_t * lenp,regex_t * rep,u_int flags)882f0957ccaSPeter Wemm re_compile(SCR *sp, CHAR_T *ptrn, size_t plen, CHAR_T **ptrnp, size_t *lenp, regex_t *rep, u_int flags)
883b8ba871bSPeter Wemm {
884b8ba871bSPeter Wemm size_t len;
885b8ba871bSPeter Wemm int reflags, replaced, rval;
886f0957ccaSPeter Wemm CHAR_T *p;
887b8ba871bSPeter Wemm
888b8ba871bSPeter Wemm /* Set RE flags. */
889b8ba871bSPeter Wemm reflags = 0;
890b8ba871bSPeter Wemm if (!LF_ISSET(RE_C_CSCOPE | RE_C_TAG)) {
891b8ba871bSPeter Wemm if (O_ISSET(sp, O_EXTENDED))
892b8ba871bSPeter Wemm reflags |= REG_EXTENDED;
893b8ba871bSPeter Wemm if (O_ISSET(sp, O_IGNORECASE))
894b8ba871bSPeter Wemm reflags |= REG_ICASE;
895b8ba871bSPeter Wemm if (O_ISSET(sp, O_ICLOWER)) {
896b8ba871bSPeter Wemm for (p = ptrn, len = plen; len > 0; ++p, --len)
897f0957ccaSPeter Wemm if (ISUPPER(*p))
898b8ba871bSPeter Wemm break;
899b8ba871bSPeter Wemm if (len == 0)
900b8ba871bSPeter Wemm reflags |= REG_ICASE;
901b8ba871bSPeter Wemm }
902b8ba871bSPeter Wemm }
903b8ba871bSPeter Wemm
904b8ba871bSPeter Wemm /* If we're replacing a saved value, clear the old one. */
905b8ba871bSPeter Wemm if (LF_ISSET(RE_C_SEARCH) && F_ISSET(sp, SC_RE_SEARCH)) {
906b8ba871bSPeter Wemm regfree(&sp->re_c);
907b8ba871bSPeter Wemm F_CLR(sp, SC_RE_SEARCH);
908b8ba871bSPeter Wemm }
909b8ba871bSPeter Wemm if (LF_ISSET(RE_C_SUBST) && F_ISSET(sp, SC_RE_SUBST)) {
910b8ba871bSPeter Wemm regfree(&sp->subre_c);
911b8ba871bSPeter Wemm F_CLR(sp, SC_RE_SUBST);
912b8ba871bSPeter Wemm }
913b8ba871bSPeter Wemm
914b8ba871bSPeter Wemm /*
915b8ba871bSPeter Wemm * If we're saving the string, it's a pattern we haven't seen before,
916b8ba871bSPeter Wemm * so convert the vi-style RE's to POSIX 1003.2 RE's. Save a copy for
917b8ba871bSPeter Wemm * later recompilation. Free any previously saved value.
918b8ba871bSPeter Wemm */
919b8ba871bSPeter Wemm if (ptrnp != NULL) {
920f0957ccaSPeter Wemm replaced = 0;
921b8ba871bSPeter Wemm if (LF_ISSET(RE_C_CSCOPE)) {
922b8ba871bSPeter Wemm if (re_cscope_conv(sp, &ptrn, &plen, &replaced))
923b8ba871bSPeter Wemm return (1);
924b8ba871bSPeter Wemm /*
925b8ba871bSPeter Wemm * XXX
926b8ba871bSPeter Wemm * Currently, the match-any-<blank> expression used in
927b8ba871bSPeter Wemm * re_cscope_conv() requires extended RE's. This may
928b8ba871bSPeter Wemm * not be right or safe.
929b8ba871bSPeter Wemm */
930b8ba871bSPeter Wemm reflags |= REG_EXTENDED;
931b8ba871bSPeter Wemm } else if (LF_ISSET(RE_C_TAG)) {
932b8ba871bSPeter Wemm if (re_tag_conv(sp, &ptrn, &plen, &replaced))
933b8ba871bSPeter Wemm return (1);
934b8ba871bSPeter Wemm } else
935b8ba871bSPeter Wemm if (re_conv(sp, &ptrn, &plen, &replaced))
936b8ba871bSPeter Wemm return (1);
937b8ba871bSPeter Wemm
938b8ba871bSPeter Wemm /* Discard previous pattern. */
939b8ba871bSPeter Wemm free(*ptrnp);
940b8ba871bSPeter Wemm *ptrnp = NULL;
941110d525eSBaptiste Daroussin
942b8ba871bSPeter Wemm if (lenp != NULL)
943b8ba871bSPeter Wemm *lenp = plen;
944b8ba871bSPeter Wemm
945b8ba871bSPeter Wemm /*
946b8ba871bSPeter Wemm * Copy the string into allocated memory.
947b8ba871bSPeter Wemm *
948b8ba871bSPeter Wemm * XXX
949b8ba871bSPeter Wemm * Regcomp isn't 8-bit clean, so the pattern is nul-terminated
950b8ba871bSPeter Wemm * for now. There's just no other solution.
951b8ba871bSPeter Wemm */
952110d525eSBaptiste Daroussin MALLOC(sp, *ptrnp, (plen + 1) * sizeof(CHAR_T));
953b8ba871bSPeter Wemm if (*ptrnp != NULL) {
954f0957ccaSPeter Wemm MEMCPY(*ptrnp, ptrn, plen);
955b8ba871bSPeter Wemm (*ptrnp)[plen] = '\0';
956b8ba871bSPeter Wemm }
957b8ba871bSPeter Wemm
958b8ba871bSPeter Wemm /* Free up conversion-routine-allocated memory. */
959b8ba871bSPeter Wemm if (replaced)
960f0957ccaSPeter Wemm FREE_SPACEW(sp, ptrn, 0);
961b8ba871bSPeter Wemm
962b8ba871bSPeter Wemm if (*ptrnp == NULL)
963b8ba871bSPeter Wemm return (1);
964b8ba871bSPeter Wemm
965b8ba871bSPeter Wemm ptrn = *ptrnp;
966b8ba871bSPeter Wemm }
967b8ba871bSPeter Wemm
968b8ba871bSPeter Wemm /*
969b8ba871bSPeter Wemm * XXX
970b8ba871bSPeter Wemm * Regcomp isn't 8-bit clean, so we just lost if the pattern
971b8ba871bSPeter Wemm * contained a nul. Bummer!
972b8ba871bSPeter Wemm */
973b8ba871bSPeter Wemm if ((rval = regcomp(rep, ptrn, /* plen, */ reflags)) != 0) {
974b8ba871bSPeter Wemm if (!LF_ISSET(RE_C_SILENT))
975b8ba871bSPeter Wemm re_error(sp, rval, rep);
976b8ba871bSPeter Wemm return (1);
977b8ba871bSPeter Wemm }
978b8ba871bSPeter Wemm
979b8ba871bSPeter Wemm if (LF_ISSET(RE_C_SEARCH))
980b8ba871bSPeter Wemm F_SET(sp, SC_RE_SEARCH);
981b8ba871bSPeter Wemm if (LF_ISSET(RE_C_SUBST))
982b8ba871bSPeter Wemm F_SET(sp, SC_RE_SUBST);
983b8ba871bSPeter Wemm
984b8ba871bSPeter Wemm return (0);
985b8ba871bSPeter Wemm }
986b8ba871bSPeter Wemm
987b8ba871bSPeter Wemm /*
988b8ba871bSPeter Wemm * re_conv --
989b8ba871bSPeter Wemm * Convert vi's regular expressions into something that the
990b8ba871bSPeter Wemm * the POSIX 1003.2 RE functions can handle.
991b8ba871bSPeter Wemm *
992b8ba871bSPeter Wemm * There are three conversions we make to make vi's RE's (specifically
993b8ba871bSPeter Wemm * the global, search, and substitute patterns) work with POSIX RE's.
994b8ba871bSPeter Wemm *
995b8ba871bSPeter Wemm * 1: If O_MAGIC is not set, strip backslashes from the magic character
996b8ba871bSPeter Wemm * set (.[*~) that have them, and add them to the ones that don't.
997b8ba871bSPeter Wemm * 2: If O_MAGIC is not set, the string "\~" is replaced with the text
998b8ba871bSPeter Wemm * from the last substitute command's replacement string. If O_MAGIC
999b8ba871bSPeter Wemm * is set, it's the string "~".
1000b8ba871bSPeter Wemm * 3: The pattern \<ptrn\> does "word" searches, convert it to use the
1001b8ba871bSPeter Wemm * new RE escapes.
1002b8ba871bSPeter Wemm *
1003b8ba871bSPeter Wemm * !!!/XXX
1004b8ba871bSPeter Wemm * This doesn't exactly match the historic behavior of vi because we do
1005b8ba871bSPeter Wemm * the ~ substitution before calling the RE engine, so magic characters
1006b8ba871bSPeter Wemm * in the replacement string will be expanded by the RE engine, and they
1007b8ba871bSPeter Wemm * weren't historically. It's a bug.
1008b8ba871bSPeter Wemm */
1009b8ba871bSPeter Wemm static int
re_conv(SCR * sp,CHAR_T ** ptrnp,size_t * plenp,int * replacedp)1010f0957ccaSPeter Wemm re_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp)
1011b8ba871bSPeter Wemm {
1012b8ba871bSPeter Wemm size_t blen, len, needlen;
1013b8ba871bSPeter Wemm int magic;
1014f0957ccaSPeter Wemm CHAR_T *bp, *p, *t;
1015b8ba871bSPeter Wemm
1016b8ba871bSPeter Wemm /*
1017b8ba871bSPeter Wemm * First pass through, we figure out how much space we'll need.
1018b8ba871bSPeter Wemm * We do it in two passes, on the grounds that most of the time
1019b8ba871bSPeter Wemm * the user is doing a search and won't have magic characters.
1020b8ba871bSPeter Wemm * That way we can skip most of the memory allocation and copies.
1021b8ba871bSPeter Wemm */
1022b8ba871bSPeter Wemm magic = 0;
1023b8ba871bSPeter Wemm for (p = *ptrnp, len = *plenp, needlen = 0; len > 0; ++p, --len)
1024b8ba871bSPeter Wemm switch (*p) {
1025b8ba871bSPeter Wemm case '\\':
1026b8ba871bSPeter Wemm if (len > 1) {
1027b8ba871bSPeter Wemm --len;
1028b8ba871bSPeter Wemm switch (*++p) {
1029b8ba871bSPeter Wemm case '<':
1030b8ba871bSPeter Wemm magic = 1;
1031f0957ccaSPeter Wemm needlen += RE_WSTART_LEN + 1;
1032b8ba871bSPeter Wemm break;
1033b8ba871bSPeter Wemm case '>':
1034b8ba871bSPeter Wemm magic = 1;
1035f0957ccaSPeter Wemm needlen += RE_WSTOP_LEN + 1;
1036b8ba871bSPeter Wemm break;
1037b8ba871bSPeter Wemm case '~':
1038b8ba871bSPeter Wemm if (!O_ISSET(sp, O_MAGIC)) {
1039b8ba871bSPeter Wemm magic = 1;
1040b8ba871bSPeter Wemm needlen += sp->repl_len;
1041b8ba871bSPeter Wemm }
1042b8ba871bSPeter Wemm break;
1043b8ba871bSPeter Wemm case '.':
1044b8ba871bSPeter Wemm case '[':
1045b8ba871bSPeter Wemm case '*':
1046b8ba871bSPeter Wemm if (!O_ISSET(sp, O_MAGIC)) {
1047b8ba871bSPeter Wemm magic = 1;
1048b8ba871bSPeter Wemm needlen += 1;
1049b8ba871bSPeter Wemm }
1050b8ba871bSPeter Wemm break;
1051b8ba871bSPeter Wemm default:
1052b8ba871bSPeter Wemm needlen += 2;
1053b8ba871bSPeter Wemm }
1054b8ba871bSPeter Wemm } else
1055b8ba871bSPeter Wemm needlen += 1;
1056b8ba871bSPeter Wemm break;
1057b8ba871bSPeter Wemm case '~':
1058b8ba871bSPeter Wemm if (O_ISSET(sp, O_MAGIC)) {
1059b8ba871bSPeter Wemm magic = 1;
1060b8ba871bSPeter Wemm needlen += sp->repl_len;
1061b8ba871bSPeter Wemm }
1062b8ba871bSPeter Wemm break;
1063b8ba871bSPeter Wemm case '.':
1064b8ba871bSPeter Wemm case '[':
1065b8ba871bSPeter Wemm case '*':
1066b8ba871bSPeter Wemm if (!O_ISSET(sp, O_MAGIC)) {
1067b8ba871bSPeter Wemm magic = 1;
1068b8ba871bSPeter Wemm needlen += 2;
1069b8ba871bSPeter Wemm }
1070b8ba871bSPeter Wemm break;
1071b8ba871bSPeter Wemm default:
1072b8ba871bSPeter Wemm needlen += 1;
1073b8ba871bSPeter Wemm break;
1074b8ba871bSPeter Wemm }
1075b8ba871bSPeter Wemm
1076b8ba871bSPeter Wemm if (!magic) {
1077b8ba871bSPeter Wemm *replacedp = 0;
1078b8ba871bSPeter Wemm return (0);
1079b8ba871bSPeter Wemm }
1080b8ba871bSPeter Wemm
1081b8ba871bSPeter Wemm /* Get enough memory to hold the final pattern. */
1082b8ba871bSPeter Wemm *replacedp = 1;
1083f0957ccaSPeter Wemm GET_SPACE_RETW(sp, bp, blen, needlen);
1084b8ba871bSPeter Wemm
1085b8ba871bSPeter Wemm for (p = *ptrnp, len = *plenp, t = bp; len > 0; ++p, --len)
1086b8ba871bSPeter Wemm switch (*p) {
1087b8ba871bSPeter Wemm case '\\':
1088b8ba871bSPeter Wemm if (len > 1) {
1089b8ba871bSPeter Wemm --len;
1090b8ba871bSPeter Wemm switch (*++p) {
1091b8ba871bSPeter Wemm case '<':
1092f0957ccaSPeter Wemm MEMCPY(t,
1093f0957ccaSPeter Wemm RE_WSTART, RE_WSTART_LEN);
1094f0957ccaSPeter Wemm t += RE_WSTART_LEN;
1095b8ba871bSPeter Wemm break;
1096b8ba871bSPeter Wemm case '>':
1097f0957ccaSPeter Wemm MEMCPY(t,
1098f0957ccaSPeter Wemm RE_WSTOP, RE_WSTOP_LEN);
1099f0957ccaSPeter Wemm t += RE_WSTOP_LEN;
1100b8ba871bSPeter Wemm break;
1101b8ba871bSPeter Wemm case '~':
1102b8ba871bSPeter Wemm if (O_ISSET(sp, O_MAGIC))
1103b8ba871bSPeter Wemm *t++ = '~';
1104b8ba871bSPeter Wemm else {
1105f0957ccaSPeter Wemm MEMCPY(t,
1106b8ba871bSPeter Wemm sp->repl, sp->repl_len);
1107b8ba871bSPeter Wemm t += sp->repl_len;
1108b8ba871bSPeter Wemm }
1109b8ba871bSPeter Wemm break;
1110b8ba871bSPeter Wemm case '.':
1111b8ba871bSPeter Wemm case '[':
1112b8ba871bSPeter Wemm case '*':
1113b8ba871bSPeter Wemm if (O_ISSET(sp, O_MAGIC))
1114b8ba871bSPeter Wemm *t++ = '\\';
1115b8ba871bSPeter Wemm *t++ = *p;
1116b8ba871bSPeter Wemm break;
1117b8ba871bSPeter Wemm default:
1118b8ba871bSPeter Wemm *t++ = '\\';
1119b8ba871bSPeter Wemm *t++ = *p;
1120b8ba871bSPeter Wemm }
1121b8ba871bSPeter Wemm } else
1122b8ba871bSPeter Wemm *t++ = '\\';
1123b8ba871bSPeter Wemm break;
1124b8ba871bSPeter Wemm case '~':
1125b8ba871bSPeter Wemm if (O_ISSET(sp, O_MAGIC)) {
1126f0957ccaSPeter Wemm MEMCPY(t, sp->repl, sp->repl_len);
1127b8ba871bSPeter Wemm t += sp->repl_len;
1128b8ba871bSPeter Wemm } else
1129b8ba871bSPeter Wemm *t++ = '~';
1130b8ba871bSPeter Wemm break;
1131b8ba871bSPeter Wemm case '.':
1132b8ba871bSPeter Wemm case '[':
1133b8ba871bSPeter Wemm case '*':
1134b8ba871bSPeter Wemm if (!O_ISSET(sp, O_MAGIC))
1135b8ba871bSPeter Wemm *t++ = '\\';
1136b8ba871bSPeter Wemm *t++ = *p;
1137b8ba871bSPeter Wemm break;
1138b8ba871bSPeter Wemm default:
1139b8ba871bSPeter Wemm *t++ = *p;
1140b8ba871bSPeter Wemm break;
1141b8ba871bSPeter Wemm }
1142b8ba871bSPeter Wemm
1143b8ba871bSPeter Wemm *ptrnp = bp;
1144b8ba871bSPeter Wemm *plenp = t - bp;
1145b8ba871bSPeter Wemm return (0);
1146b8ba871bSPeter Wemm }
1147b8ba871bSPeter Wemm
1148b8ba871bSPeter Wemm /*
1149b8ba871bSPeter Wemm * re_tag_conv --
1150b8ba871bSPeter Wemm * Convert a tags search path into something that the POSIX
1151b8ba871bSPeter Wemm * 1003.2 RE functions can handle.
1152b8ba871bSPeter Wemm */
1153b8ba871bSPeter Wemm static int
re_tag_conv(SCR * sp,CHAR_T ** ptrnp,size_t * plenp,int * replacedp)1154f0957ccaSPeter Wemm re_tag_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp)
1155b8ba871bSPeter Wemm {
1156b8ba871bSPeter Wemm size_t blen, len;
1157b8ba871bSPeter Wemm int lastdollar;
1158f0957ccaSPeter Wemm CHAR_T *bp, *p, *t;
1159b8ba871bSPeter Wemm
1160b8ba871bSPeter Wemm len = *plenp;
1161b8ba871bSPeter Wemm
1162b8ba871bSPeter Wemm /* Max memory usage is 2 times the length of the string. */
1163b8ba871bSPeter Wemm *replacedp = 1;
1164f0957ccaSPeter Wemm GET_SPACE_RETW(sp, bp, blen, len * 2);
1165b8ba871bSPeter Wemm
1166b8ba871bSPeter Wemm p = *ptrnp;
1167b8ba871bSPeter Wemm t = bp;
1168b8ba871bSPeter Wemm
1169b8ba871bSPeter Wemm /* If the last character is a '/' or '?', we just strip it. */
1170b8ba871bSPeter Wemm if (len > 0 && (p[len - 1] == '/' || p[len - 1] == '?'))
1171b8ba871bSPeter Wemm --len;
1172b8ba871bSPeter Wemm
1173b8ba871bSPeter Wemm /* If the next-to-last or last character is a '$', it's magic. */
1174b8ba871bSPeter Wemm if (len > 0 && p[len - 1] == '$') {
1175b8ba871bSPeter Wemm --len;
1176b8ba871bSPeter Wemm lastdollar = 1;
1177b8ba871bSPeter Wemm } else
1178b8ba871bSPeter Wemm lastdollar = 0;
1179b8ba871bSPeter Wemm
1180b8ba871bSPeter Wemm /* If the first character is a '/' or '?', we just strip it. */
1181b8ba871bSPeter Wemm if (len > 0 && (p[0] == '/' || p[0] == '?')) {
1182b8ba871bSPeter Wemm ++p;
1183b8ba871bSPeter Wemm --len;
1184b8ba871bSPeter Wemm }
1185b8ba871bSPeter Wemm
1186b8ba871bSPeter Wemm /* If the first or second character is a '^', it's magic. */
1187b8ba871bSPeter Wemm if (p[0] == '^') {
1188b8ba871bSPeter Wemm *t++ = *p++;
1189b8ba871bSPeter Wemm --len;
1190b8ba871bSPeter Wemm }
1191b8ba871bSPeter Wemm
1192b8ba871bSPeter Wemm /*
1193b8ba871bSPeter Wemm * Escape every other magic character we can find, meanwhile stripping
1194b8ba871bSPeter Wemm * the backslashes ctags inserts when escaping the search delimiter
1195b8ba871bSPeter Wemm * characters.
1196b8ba871bSPeter Wemm */
1197b8ba871bSPeter Wemm for (; len > 0; --len) {
1198b8ba871bSPeter Wemm if (p[0] == '\\' && (p[1] == '/' || p[1] == '?')) {
1199b8ba871bSPeter Wemm ++p;
1200*0fcececbSBaptiste Daroussin if (len > 1)
1201b8ba871bSPeter Wemm --len;
1202f0957ccaSPeter Wemm } else if (STRCHR(L("^.[]$*"), p[0]))
1203b8ba871bSPeter Wemm *t++ = '\\';
1204b8ba871bSPeter Wemm *t++ = *p++;
1205b8ba871bSPeter Wemm }
1206b8ba871bSPeter Wemm if (lastdollar)
1207b8ba871bSPeter Wemm *t++ = '$';
1208b8ba871bSPeter Wemm
1209b8ba871bSPeter Wemm *ptrnp = bp;
1210b8ba871bSPeter Wemm *plenp = t - bp;
1211b8ba871bSPeter Wemm return (0);
1212b8ba871bSPeter Wemm }
1213b8ba871bSPeter Wemm
1214b8ba871bSPeter Wemm /*
1215b8ba871bSPeter Wemm * re_cscope_conv --
1216b8ba871bSPeter Wemm * Convert a cscope search path into something that the POSIX
1217b8ba871bSPeter Wemm * 1003.2 RE functions can handle.
1218b8ba871bSPeter Wemm */
1219b8ba871bSPeter Wemm static int
re_cscope_conv(SCR * sp,CHAR_T ** ptrnp,size_t * plenp,int * replacedp)1220f0957ccaSPeter Wemm re_cscope_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp)
1221b8ba871bSPeter Wemm {
1222b8ba871bSPeter Wemm size_t blen, len, nspaces;
1223f0957ccaSPeter Wemm CHAR_T *bp, *t;
1224f0957ccaSPeter Wemm CHAR_T *p;
1225f0957ccaSPeter Wemm CHAR_T *wp;
1226f0957ccaSPeter Wemm size_t wlen;
1227b8ba871bSPeter Wemm
1228b8ba871bSPeter Wemm /*
1229b8ba871bSPeter Wemm * Each space in the source line printed by cscope represents an
1230b8ba871bSPeter Wemm * arbitrary sequence of spaces, tabs, and comments.
1231b8ba871bSPeter Wemm */
1232b8ba871bSPeter Wemm #define CSCOPE_RE_SPACE "([ \t]|/\\*([^*]|\\*/)*\\*/)*"
1233f0957ccaSPeter Wemm #define CSCOPE_LEN sizeof(CSCOPE_RE_SPACE) - 1
1234f0957ccaSPeter Wemm CHAR2INT(sp, CSCOPE_RE_SPACE, CSCOPE_LEN, wp, wlen);
1235b8ba871bSPeter Wemm for (nspaces = 0, p = *ptrnp, len = *plenp; len > 0; ++p, --len)
1236b8ba871bSPeter Wemm if (*p == ' ')
1237b8ba871bSPeter Wemm ++nspaces;
1238b8ba871bSPeter Wemm
1239b8ba871bSPeter Wemm /*
1240b8ba871bSPeter Wemm * Allocate plenty of space:
1241b8ba871bSPeter Wemm * the string, plus potential escaping characters;
1242b8ba871bSPeter Wemm * nspaces + 2 copies of CSCOPE_RE_SPACE;
1243b8ba871bSPeter Wemm * ^, $, nul terminator characters.
1244b8ba871bSPeter Wemm */
1245b8ba871bSPeter Wemm *replacedp = 1;
1246b8ba871bSPeter Wemm len = (p - *ptrnp) * 2 + (nspaces + 2) * sizeof(CSCOPE_RE_SPACE) + 3;
1247f0957ccaSPeter Wemm GET_SPACE_RETW(sp, bp, blen, len);
1248b8ba871bSPeter Wemm
1249b8ba871bSPeter Wemm p = *ptrnp;
1250b8ba871bSPeter Wemm t = bp;
1251b8ba871bSPeter Wemm
1252b8ba871bSPeter Wemm *t++ = '^';
1253f0957ccaSPeter Wemm MEMCPY(t, wp, wlen);
1254f0957ccaSPeter Wemm t += wlen;
1255b8ba871bSPeter Wemm
1256b8ba871bSPeter Wemm for (len = *plenp; len > 0; ++p, --len)
1257b8ba871bSPeter Wemm if (*p == ' ') {
1258f0957ccaSPeter Wemm MEMCPY(t, wp, wlen);
1259f0957ccaSPeter Wemm t += wlen;
1260b8ba871bSPeter Wemm } else {
1261f0957ccaSPeter Wemm if (STRCHR(L("\\^.[]$*+?()|{}"), *p))
1262b8ba871bSPeter Wemm *t++ = '\\';
1263b8ba871bSPeter Wemm *t++ = *p;
1264b8ba871bSPeter Wemm }
1265b8ba871bSPeter Wemm
1266f0957ccaSPeter Wemm MEMCPY(t, wp, wlen);
1267f0957ccaSPeter Wemm t += wlen;
1268b8ba871bSPeter Wemm *t++ = '$';
1269b8ba871bSPeter Wemm
1270b8ba871bSPeter Wemm *ptrnp = bp;
1271b8ba871bSPeter Wemm *plenp = t - bp;
1272b8ba871bSPeter Wemm return (0);
1273b8ba871bSPeter Wemm }
1274b8ba871bSPeter Wemm
1275b8ba871bSPeter Wemm /*
1276b8ba871bSPeter Wemm * re_error --
1277b8ba871bSPeter Wemm * Report a regular expression error.
1278b8ba871bSPeter Wemm *
1279c271fa92SBaptiste Daroussin * PUBLIC: void re_error(SCR *, int, regex_t *);
1280b8ba871bSPeter Wemm */
1281b8ba871bSPeter Wemm void
re_error(SCR * sp,int errcode,regex_t * preg)1282f0957ccaSPeter Wemm re_error(SCR *sp, int errcode, regex_t *preg)
1283b8ba871bSPeter Wemm {
1284b8ba871bSPeter Wemm size_t s;
1285b8ba871bSPeter Wemm char *oe;
1286b8ba871bSPeter Wemm
1287b8ba871bSPeter Wemm s = regerror(errcode, preg, "", 0);
1288110d525eSBaptiste Daroussin MALLOC(sp, oe, s);
1289f0957ccaSPeter Wemm if (oe != NULL) {
1290b8ba871bSPeter Wemm (void)regerror(errcode, preg, oe, s);
1291b8ba871bSPeter Wemm msgq(sp, M_ERR, "RE error: %s", oe);
1292b8ba871bSPeter Wemm free(oe);
1293b8ba871bSPeter Wemm }
1294b8ba871bSPeter Wemm }
1295b8ba871bSPeter Wemm
1296b8ba871bSPeter Wemm /*
1297b8ba871bSPeter Wemm * re_sub --
1298b8ba871bSPeter Wemm * Do the substitution for a regular expression.
1299b8ba871bSPeter Wemm */
1300b8ba871bSPeter Wemm static int
re_sub(SCR * sp,CHAR_T * ip,CHAR_T ** lbp,size_t * lbclenp,size_t * lblenp,regmatch_t match[10])1301f0957ccaSPeter Wemm re_sub(
1302f0957ccaSPeter Wemm SCR *sp,
1303f0957ccaSPeter Wemm CHAR_T *ip, /* Input line. */
1304f0957ccaSPeter Wemm CHAR_T **lbp,
1305f0957ccaSPeter Wemm size_t *lbclenp,
1306f0957ccaSPeter Wemm size_t *lblenp,
1307f0957ccaSPeter Wemm regmatch_t match[10])
1308b8ba871bSPeter Wemm {
1309b8ba871bSPeter Wemm enum { C_NOTSET, C_LOWER, C_ONELOWER, C_ONEUPPER, C_UPPER } conv;
1310b8ba871bSPeter Wemm size_t lbclen, lblen; /* Local copies. */
1311b8ba871bSPeter Wemm size_t mlen; /* Match length. */
1312b8ba871bSPeter Wemm size_t rpl; /* Remaining replacement length. */
1313f0957ccaSPeter Wemm CHAR_T *rp; /* Replacement pointer. */
1314b8ba871bSPeter Wemm int ch;
1315b8ba871bSPeter Wemm int no; /* Match replacement offset. */
1316f0957ccaSPeter Wemm CHAR_T *p, *t; /* Buffer pointers. */
1317f0957ccaSPeter Wemm CHAR_T *lb; /* Local copies. */
1318b8ba871bSPeter Wemm
1319b8ba871bSPeter Wemm lb = *lbp; /* Get local copies. */
1320b8ba871bSPeter Wemm lbclen = *lbclenp;
1321b8ba871bSPeter Wemm lblen = *lblenp;
1322b8ba871bSPeter Wemm
1323b8ba871bSPeter Wemm /*
1324b8ba871bSPeter Wemm * QUOTING NOTE:
1325b8ba871bSPeter Wemm *
1326b8ba871bSPeter Wemm * There are some special sequences that vi provides in the
1327b8ba871bSPeter Wemm * replacement patterns.
1328b8ba871bSPeter Wemm * & string the RE matched (\& if nomagic set)
1329b8ba871bSPeter Wemm * \# n-th regular subexpression
1330b8ba871bSPeter Wemm * \E end \U, \L conversion
1331b8ba871bSPeter Wemm * \e end \U, \L conversion
1332b8ba871bSPeter Wemm * \l convert the next character to lower-case
1333b8ba871bSPeter Wemm * \L convert to lower-case, until \E, \e, or end of replacement
1334b8ba871bSPeter Wemm * \u convert the next character to upper-case
1335b8ba871bSPeter Wemm * \U convert to upper-case, until \E, \e, or end of replacement
1336b8ba871bSPeter Wemm *
1337b8ba871bSPeter Wemm * Otherwise, since this is the lowest level of replacement, discard
1338b8ba871bSPeter Wemm * all escaping characters. This (hopefully) matches historic practice.
1339b8ba871bSPeter Wemm */
1340755cc40cSBaptiste Daroussin #define OUTCH(ch, nltrans) do { \
1341f0957ccaSPeter Wemm ARG_CHAR_T __ch = (ch); \
1342f0957ccaSPeter Wemm e_key_t __value = KEY_VAL(sp, __ch); \
1343b8ba871bSPeter Wemm if (nltrans && (__value == K_CR || __value == K_NL)) { \
1344b8ba871bSPeter Wemm NEEDNEWLINE(sp); \
1345b8ba871bSPeter Wemm sp->newl[sp->newl_cnt++] = lbclen; \
1346b8ba871bSPeter Wemm } else if (conv != C_NOTSET) { \
1347b8ba871bSPeter Wemm switch (conv) { \
1348b8ba871bSPeter Wemm case C_ONELOWER: \
1349b8ba871bSPeter Wemm conv = C_NOTSET; \
1350b8ba871bSPeter Wemm /* FALLTHROUGH */ \
1351b8ba871bSPeter Wemm case C_LOWER: \
1352f0957ccaSPeter Wemm if (ISUPPER(__ch)) \
1353f0957ccaSPeter Wemm __ch = TOLOWER(__ch); \
1354b8ba871bSPeter Wemm break; \
1355b8ba871bSPeter Wemm case C_ONEUPPER: \
1356b8ba871bSPeter Wemm conv = C_NOTSET; \
1357b8ba871bSPeter Wemm /* FALLTHROUGH */ \
1358b8ba871bSPeter Wemm case C_UPPER: \
1359f0957ccaSPeter Wemm if (ISLOWER(__ch)) \
1360f0957ccaSPeter Wemm __ch = TOUPPER(__ch); \
1361b8ba871bSPeter Wemm break; \
1362b8ba871bSPeter Wemm default: \
1363b8ba871bSPeter Wemm abort(); \
1364b8ba871bSPeter Wemm } \
1365b8ba871bSPeter Wemm } \
1366b8ba871bSPeter Wemm NEEDSP(sp, 1, p); \
1367b8ba871bSPeter Wemm *p++ = __ch; \
1368b8ba871bSPeter Wemm ++lbclen; \
1369755cc40cSBaptiste Daroussin } while (0)
1370b8ba871bSPeter Wemm conv = C_NOTSET;
1371b8ba871bSPeter Wemm for (rp = sp->repl, rpl = sp->repl_len, p = lb + lbclen; rpl--;) {
1372b8ba871bSPeter Wemm switch (ch = *rp++) {
1373b8ba871bSPeter Wemm case '&':
1374b8ba871bSPeter Wemm if (O_ISSET(sp, O_MAGIC)) {
1375b8ba871bSPeter Wemm no = 0;
1376b8ba871bSPeter Wemm goto subzero;
1377b8ba871bSPeter Wemm }
1378b8ba871bSPeter Wemm break;
1379b8ba871bSPeter Wemm case '\\':
1380b8ba871bSPeter Wemm if (rpl == 0)
1381b8ba871bSPeter Wemm break;
1382b8ba871bSPeter Wemm --rpl;
1383b8ba871bSPeter Wemm switch (ch = *rp) {
1384b8ba871bSPeter Wemm case '&':
1385b8ba871bSPeter Wemm ++rp;
1386b8ba871bSPeter Wemm if (!O_ISSET(sp, O_MAGIC)) {
1387b8ba871bSPeter Wemm no = 0;
1388b8ba871bSPeter Wemm goto subzero;
1389b8ba871bSPeter Wemm }
1390b8ba871bSPeter Wemm break;
1391b8ba871bSPeter Wemm case '0': case '1': case '2': case '3': case '4':
1392b8ba871bSPeter Wemm case '5': case '6': case '7': case '8': case '9':
1393b8ba871bSPeter Wemm no = *rp++ - '0';
1394b8ba871bSPeter Wemm subzero: if (match[no].rm_so == -1 ||
1395b8ba871bSPeter Wemm match[no].rm_eo == -1)
1396b8ba871bSPeter Wemm break;
1397b8ba871bSPeter Wemm mlen = match[no].rm_eo - match[no].rm_so;
1398b8ba871bSPeter Wemm for (t = ip + match[no].rm_so; mlen--; ++t)
1399b8ba871bSPeter Wemm OUTCH(*t, 0);
1400b8ba871bSPeter Wemm continue;
1401b8ba871bSPeter Wemm case 'e':
1402b8ba871bSPeter Wemm case 'E':
1403b8ba871bSPeter Wemm ++rp;
1404b8ba871bSPeter Wemm conv = C_NOTSET;
1405b8ba871bSPeter Wemm continue;
1406b8ba871bSPeter Wemm case 'l':
1407b8ba871bSPeter Wemm ++rp;
1408b8ba871bSPeter Wemm conv = C_ONELOWER;
1409b8ba871bSPeter Wemm continue;
1410b8ba871bSPeter Wemm case 'L':
1411b8ba871bSPeter Wemm ++rp;
1412b8ba871bSPeter Wemm conv = C_LOWER;
1413b8ba871bSPeter Wemm continue;
1414b8ba871bSPeter Wemm case 'u':
1415b8ba871bSPeter Wemm ++rp;
1416b8ba871bSPeter Wemm conv = C_ONEUPPER;
1417b8ba871bSPeter Wemm continue;
1418b8ba871bSPeter Wemm case 'U':
1419b8ba871bSPeter Wemm ++rp;
1420b8ba871bSPeter Wemm conv = C_UPPER;
1421b8ba871bSPeter Wemm continue;
1422f0957ccaSPeter Wemm case '\r':
1423f0957ccaSPeter Wemm OUTCH(ch, 0);
1424f0957ccaSPeter Wemm continue;
1425b8ba871bSPeter Wemm default:
1426b8ba871bSPeter Wemm ++rp;
1427b8ba871bSPeter Wemm break;
1428b8ba871bSPeter Wemm }
1429b8ba871bSPeter Wemm }
1430b8ba871bSPeter Wemm OUTCH(ch, 1);
1431b8ba871bSPeter Wemm }
1432b8ba871bSPeter Wemm
1433b8ba871bSPeter Wemm *lbp = lb; /* Update caller's information. */
1434b8ba871bSPeter Wemm *lbclenp = lbclen;
1435b8ba871bSPeter Wemm *lblenp = lblen;
1436b8ba871bSPeter Wemm return (0);
1437b8ba871bSPeter Wemm }
1438