xref: /freebsd/contrib/nvi/vi/v_search.c (revision f4b37ed0f8b307b1f3f0f630ca725d68f1dff30d)
1 /*-
2  * Copyright (c) 1992, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  * Copyright (c) 1992, 1993, 1994, 1995, 1996
5  *	Keith Bostic.  All rights reserved.
6  *
7  * See the LICENSE file for redistribution information.
8  */
9 
10 #include "config.h"
11 
12 #ifndef lint
13 static const char sccsid[] = "$Id: v_search.c,v 10.31 2012/02/08 07:26:59 zy Exp $";
14 #endif /* not lint */
15 
16 #include <sys/types.h>
17 #include <sys/queue.h>
18 #include <sys/time.h>
19 
20 #include <bitstring.h>
21 #include <ctype.h>
22 #include <errno.h>
23 #include <limits.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 
28 #include "../common/common.h"
29 #include "vi.h"
30 
31 static int v_exaddr(SCR *, VICMD *, dir_t);
32 static int v_search(SCR *, VICMD *, CHAR_T *, size_t, u_int, dir_t);
33 
34 /*
35  * v_srch -- [count]?RE[? offset]
36  *	Ex address search backward.
37  *
38  * PUBLIC: int v_searchb(SCR *, VICMD *);
39  */
40 int
41 v_searchb(SCR *sp, VICMD *vp)
42 {
43 	return (v_exaddr(sp, vp, BACKWARD));
44 }
45 
46 /*
47  * v_searchf -- [count]/RE[/ offset]
48  *	Ex address search forward.
49  *
50  * PUBLIC: int v_searchf(SCR *, VICMD *);
51  */
52 int
53 v_searchf(SCR *sp, VICMD *vp)
54 {
55 	return (v_exaddr(sp, vp, FORWARD));
56 }
57 
58 /*
59  * v_exaddr --
60  *	Do a vi search (which is really an ex address).
61  */
62 static int
63 v_exaddr(SCR *sp, VICMD *vp, dir_t dir)
64 {
65 	static EXCMDLIST fake = { L("search") };
66 	EXCMD *cmdp;
67 	GS *gp;
68 	TEXT *tp;
69 	recno_t s_lno;
70 	size_t len, s_cno, tlen;
71 	int err, nb, type;
72 	char buf[20];
73 	CHAR_T *cmd, *t;
74 	CHAR_T *w;
75 	size_t wlen;
76 
77 	/*
78 	 * !!!
79 	 * If using the search command as a motion, any addressing components
80 	 * are lost, i.e. y/ptrn/+2, when repeated, is the same as y/ptrn/.
81 	 */
82 	if (F_ISSET(vp, VC_ISDOT))
83 		return (v_search(sp, vp,
84 		    NULL, 0, SEARCH_PARSE | SEARCH_MSG | SEARCH_SET, dir));
85 
86 	/* Get the search pattern. */
87 	if (v_tcmd(sp, vp, dir == BACKWARD ? CH_BSEARCH : CH_FSEARCH,
88 	    TXT_BS | TXT_CR | TXT_ESCAPE | TXT_PROMPT |
89 	    (O_ISSET(sp, O_SEARCHINCR) ? TXT_SEARCHINCR : 0)))
90 		return (1);
91 
92 	tp = TAILQ_FIRST(sp->tiq);
93 
94 	/* If the user backspaced over the prompt, do nothing. */
95 	if (tp->term == TERM_BS)
96 		return (1);
97 
98 	/*
99 	 * If the user was doing an incremental search, then we've already
100 	 * updated the cursor and moved to the right location.  Return the
101 	 * correct values, we're done.
102 	 */
103 	if (tp->term == TERM_SEARCH) {
104 		vp->m_stop.lno = sp->lno;
105 		vp->m_stop.cno = sp->cno;
106 		if (ISMOTION(vp))
107 			return (v_correct(sp, vp, 0));
108 		vp->m_final = vp->m_stop;
109 		return (0);
110 	}
111 
112 	/*
113 	 * If the user entered <escape> or <carriage-return>, the length is
114 	 * 1 and the right thing will happen, i.e. the prompt will be used
115 	 * as a command character.
116 	 *
117 	 * Build a fake ex command structure.
118 	 */
119 	gp = sp->gp;
120 	gp->excmd.cp = tp->lb;
121 	gp->excmd.clen = tp->len;
122 	F_INIT(&gp->excmd, E_VISEARCH);
123 
124 	/*
125 	 * XXX
126 	 * Warn if the search wraps.  This is a pretty special case, but it's
127 	 * nice feature that wasn't in the original implementations of ex/vi.
128 	 * (It was added at some point to System V's version.)  This message
129 	 * is only displayed if there are no keys in the queue. The problem is
130 	 * the command is going to succeed, and the message is informational,
131 	 * not an error.  If a macro displays it repeatedly, e.g., the pattern
132 	 * only occurs once in the file and wrapscan is set, you lose big.  For
133 	 * example, if the macro does something like:
134 	 *
135 	 *	:map K /pattern/^MjK
136 	 *
137 	 * Each search will display the message, but the following "/pattern/"
138 	 * will immediately overwrite it, with strange results.  The System V
139 	 * vi displays the "wrapped" message multiple times, but because it's
140 	 * overwritten each time, it's not as noticeable.  As we don't discard
141 	 * messages, it's a real problem for us.
142 	 */
143 	if (!KEYS_WAITING(sp))
144 		F_SET(&gp->excmd, E_SEARCH_WMSG);
145 
146 	/* Save the current line/column. */
147 	s_lno = sp->lno;
148 	s_cno = sp->cno;
149 
150 	/*
151 	 * !!!
152 	 * Historically, vi / and ? commands were full-blown ex addresses,
153 	 * including ';' delimiters, trailing <blank>'s, multiple search
154 	 * strings (separated by semi-colons) and, finally, full-blown z
155 	 * commands after the / and ? search strings.  (If the search was
156 	 * being used as a motion, the trailing z command was ignored.
157 	 * Also, we do some argument checking on the z command, to be sure
158 	 * that it's not some other random command.) For multiple search
159 	 * strings, leading <blank>'s at the second and subsequent strings
160 	 * were eaten as well.  This has some (unintended?) side-effects:
161 	 * the command /ptrn/;3 is legal and results in moving to line 3.
162 	 * I suppose you could use it to optionally move to line 3...
163 	 *
164 	 * !!!
165 	 * Historically, if any part of the search command failed, the cursor
166 	 * remained unmodified (even if ; was used).  We have to play games
167 	 * because the underlying ex parser thinks we're modifying the cursor
168 	 * as we go, but I think we're compatible with historic practice.
169 	 *
170 	 * !!!
171 	 * Historically, the command "/STRING/;   " failed, apparently it
172 	 * confused the parser.  We're not that compatible.
173 	 */
174 	cmdp = &gp->excmd;
175 	if (ex_range(sp, cmdp, &err))
176 		return (1);
177 
178 	/*
179 	 * Remember where any remaining command information is, and clean
180 	 * up the fake ex command.
181 	 */
182 	cmd = cmdp->cp;
183 	len = cmdp->clen;
184 	gp->excmd.clen = 0;
185 
186 	if (err)
187 		goto err2;
188 
189 	/* Copy out the new cursor position and make sure it's okay. */
190 	switch (cmdp->addrcnt) {
191 	case 1:
192 		vp->m_stop = cmdp->addr1;
193 		break;
194 	case 2:
195 		vp->m_stop = cmdp->addr2;
196 		break;
197 	}
198 	if (!db_exist(sp, vp->m_stop.lno)) {
199 		ex_badaddr(sp, &fake,
200 		    vp->m_stop.lno == 0 ? A_ZERO : A_EOF, NUM_OK);
201 		goto err2;
202 	}
203 
204 	/*
205 	 * !!!
206 	 * Historic practice is that a trailing 'z' was ignored if it was a
207 	 * motion command.  Should probably be an error, but not worth the
208 	 * effort.
209 	 */
210 	if (ISMOTION(vp))
211 		return (v_correct(sp, vp, F_ISSET(cmdp, E_DELTA)));
212 
213 	/*
214 	 * !!!
215 	 * Historically, if it wasn't a motion command, a delta in the search
216 	 * pattern turns it into a first nonblank movement.
217 	 */
218 	nb = F_ISSET(cmdp, E_DELTA);
219 
220 	/* Check for the 'z' command. */
221 	if (len != 0) {
222 		if (*cmd != 'z')
223 			goto err1;
224 
225 		/* No blanks, just like the z command. */
226 		for (t = cmd + 1, tlen = len - 1; tlen > 0; ++t, --tlen)
227 			if (!isdigit(*t))
228 				break;
229 		if (tlen &&
230 		    (*t == '-' || *t == '.' || *t == '+' || *t == '^')) {
231 			++t;
232 			--tlen;
233 			type = 1;
234 		} else
235 			type = 0;
236 		if (tlen)
237 			goto err1;
238 
239 		/* The z command will do the nonblank for us. */
240 		nb = 0;
241 
242 		/* Default to z+. */
243 		if (!type &&
244 		    v_event_push(sp, NULL, L("+"), 1, CH_NOMAP | CH_QUOTED))
245 			return (1);
246 
247 		/* Push the user's command. */
248 		if (v_event_push(sp, NULL, cmd, len, CH_NOMAP | CH_QUOTED))
249 			return (1);
250 
251 		/* Push line number so get correct z display. */
252 		tlen = snprintf(buf,
253 		    sizeof(buf), "%lu", (u_long)vp->m_stop.lno);
254 		CHAR2INT(sp, buf, tlen, w, wlen);
255 		if (v_event_push(sp, NULL, w, wlen, CH_NOMAP | CH_QUOTED))
256 			return (1);
257 
258 		/* Don't refresh until after 'z' happens. */
259 		F_SET(VIP(sp), VIP_S_REFRESH);
260 	}
261 
262 	/* Non-motion commands move to the end of the range. */
263 	vp->m_final = vp->m_stop;
264 	if (nb) {
265 		F_CLR(vp, VM_RCM_MASK);
266 		F_SET(vp, VM_RCM_SETFNB);
267 	}
268 	return (0);
269 
270 err1:	msgq(sp, M_ERR,
271 	    "188|Characters after search string, line offset and/or z command");
272 err2:	vp->m_final.lno = s_lno;
273 	vp->m_final.cno = s_cno;
274 	return (1);
275 }
276 
277 /*
278  * v_searchN -- N
279  *	Reverse last search.
280  *
281  * PUBLIC: int v_searchN(SCR *, VICMD *);
282  */
283 int
284 v_searchN(SCR *sp, VICMD *vp)
285 {
286 	dir_t dir;
287 
288 	switch (sp->searchdir) {
289 	case BACKWARD:
290 		dir = FORWARD;
291 		break;
292 	case FORWARD:
293 		dir = BACKWARD;
294 		break;
295 	default:
296 		dir = sp->searchdir;
297 		break;
298 	}
299 	return (v_search(sp, vp, NULL, 0, SEARCH_PARSE, dir));
300 }
301 
302 /*
303  * v_searchn -- n
304  *	Repeat last search.
305  *
306  * PUBLIC: int v_searchn(SCR *, VICMD *);
307  */
308 int
309 v_searchn(SCR *sp, VICMD *vp)
310 {
311 	return (v_search(sp, vp, NULL, 0, SEARCH_PARSE, sp->searchdir));
312 }
313 
314 /*
315  * is_special --
316  *	Test if the character is special in a basic RE.
317  */
318 static int
319 is_special(CHAR_T c)
320 {
321 	/*
322 	 * !!!
323 	 * `*' and `$' are ordinary when appear at the beginning of a RE,
324 	 * but it's safe to distinguish them from the ordinary characters.
325 	 * The tilde is vi-specific, of course.
326 	 */
327 	return (STRCHR(L(".[*\\^$~"), c) && c);
328 }
329 
330 /*
331  * Rear delimiter for word search when the keyword ends in
332  * (i.e., consists of) a non-word character.  See v_searchw below.
333  */
334 #define RE_NWSTOP	L("([^[:alnum:]_]|$)")
335 #define RE_NWSTOP_LEN	(SIZE(RE_NWSTOP) - 1)
336 
337 /*
338  * v_searchw -- [count]^A
339  *	Search for the word under the cursor.
340  *
341  * PUBLIC: int v_searchw(SCR *, VICMD *);
342  */
343 int
344 v_searchw(SCR *sp, VICMD *vp)
345 {
346 	size_t blen, len;
347 	int rval;
348 	CHAR_T *bp, *p;
349 
350 	/* An upper bound for the SIZE of the RE under construction. */
351 	len = VIP(sp)->klen + MAX(RE_WSTART_LEN, 1)
352 	    + MAX(RE_WSTOP_LEN, RE_NWSTOP_LEN);
353 	GET_SPACE_RETW(sp, bp, blen, len);
354 	p = bp;
355 
356 	/* Only the first character can be non-word, see v_curword. */
357 	if (inword(VIP(sp)->keyw[0])) {
358 		MEMCPY(p, RE_WSTART, RE_WSTART_LEN);
359 		p += RE_WSTART_LEN;
360 	} else if (is_special(VIP(sp)->keyw[0])) {
361 		MEMCPY(p, L("\\"), 1);
362 		p += 1;
363 	}
364 
365 	MEMCPY(p, VIP(sp)->keyw, VIP(sp)->klen);
366 	p += VIP(sp)->klen;
367 
368 	if (inword(p[-1])) {
369 		MEMCPY(p, RE_WSTOP, RE_WSTOP_LEN);
370 		p += RE_WSTOP_LEN;
371 	} else {
372 		/*
373 		 * The keyword is a single non-word character.
374 		 * We want it to stay the same when typing ^A several times
375 		 * in a row, just the way the other cases behave.
376 		 */
377 		MEMCPY(p, RE_NWSTOP, RE_NWSTOP_LEN);
378 		p += RE_NWSTOP_LEN;
379 	}
380 
381 	len = p - bp;
382 	rval = v_search(sp, vp, bp, len, SEARCH_SET, FORWARD);
383 
384 	FREE_SPACEW(sp, bp, blen);
385 	return (rval);
386 }
387 
388 /*
389  * v_search --
390  *	The search commands.
391  */
392 static int
393 v_search(SCR *sp, VICMD *vp, CHAR_T *ptrn, size_t plen, u_int flags, dir_t dir)
394 {
395 	/* Display messages. */
396 	LF_SET(SEARCH_MSG);
397 
398 	/* If it's a motion search, offset past end-of-line is okay. */
399 	if (ISMOTION(vp))
400 		LF_SET(SEARCH_EOL);
401 
402 	/*
403 	 * XXX
404 	 * Warn if the search wraps.  See the comment above, in v_exaddr().
405 	 */
406 	if (!KEYS_WAITING(sp))
407 		LF_SET(SEARCH_WMSG);
408 
409 	switch (dir) {
410 	case BACKWARD:
411 		if (b_search(sp,
412 		    &vp->m_start, &vp->m_stop, ptrn, plen, NULL, flags))
413 			return (1);
414 		break;
415 	case FORWARD:
416 		if (f_search(sp,
417 		    &vp->m_start, &vp->m_stop, ptrn, plen, NULL, flags))
418 			return (1);
419 		break;
420 	case NOTSET:
421 		msgq(sp, M_ERR, "189|No previous search pattern");
422 		return (1);
423 	default:
424 		abort();
425 	}
426 
427 	/* Correct motion commands, otherwise, simply move to the location. */
428 	if (ISMOTION(vp)) {
429 		if (v_correct(sp, vp, 0))
430 			return(1);
431 	} else
432 		vp->m_final = vp->m_stop;
433 	return (0);
434 }
435 
436 /*
437  * v_correct --
438  *	Handle command with a search as the motion.
439  *
440  * !!!
441  * Historically, commands didn't affect the line searched to/from if the
442  * motion command was a search and the final position was the start/end
443  * of the line.  There were some special cases and vi was not consistent;
444  * it was fairly easy to confuse it.  For example, given the two lines:
445  *
446  *	abcdefghi
447  *	ABCDEFGHI
448  *
449  * placing the cursor on the 'A' and doing y?$ would so confuse it that 'h'
450  * 'k' and put would no longer work correctly.  In any case, we try to do
451  * the right thing, but it's not going to exactly match historic practice.
452  *
453  * PUBLIC: int v_correct(SCR *, VICMD *, int);
454  */
455 int
456 v_correct(SCR *sp, VICMD *vp, int isdelta)
457 {
458 	dir_t dir;
459 	MARK m;
460 	size_t len;
461 
462 	/*
463 	 * !!!
464 	 * We may have wrapped if wrapscan was set, and we may have returned
465 	 * to the position where the cursor started.  Historic vi didn't cope
466 	 * with this well.  Yank wouldn't beep, but the first put after the
467 	 * yank would move the cursor right one column (without adding any
468 	 * text) and the second would put a copy of the current line.  The
469 	 * change and delete commands would beep, but would leave the cursor
470 	 * on the colon command line.  I believe that there are macros that
471 	 * depend on delete, at least, failing.  For now, commands that use
472 	 * search as a motion component fail when the search returns to the
473 	 * original cursor position.
474 	 */
475 	if (vp->m_start.lno == vp->m_stop.lno &&
476 	    vp->m_start.cno == vp->m_stop.cno) {
477 		msgq(sp, M_BERR, "190|Search wrapped to original position");
478 		return (1);
479 	}
480 
481 	/*
482 	 * !!!
483 	 * Searches become line mode operations if there was a delta specified
484 	 * to the search pattern.
485 	 */
486 	if (isdelta)
487 		F_SET(vp, VM_LMODE);
488 
489 	/*
490 	 * If the motion is in the reverse direction, switch the start and
491 	 * stop MARK's so that it's in a forward direction.  (There's no
492 	 * reason for this other than to make the tests below easier.  The
493 	 * code in vi.c:vi() would have done the switch.)  Both forward
494 	 * and backward motions can happen for any kind of search command
495 	 * because of the wrapscan option.
496 	 */
497 	if (vp->m_start.lno > vp->m_stop.lno ||
498 	    (vp->m_start.lno == vp->m_stop.lno &&
499 	    vp->m_start.cno > vp->m_stop.cno)) {
500 		m = vp->m_start;
501 		vp->m_start = vp->m_stop;
502 		vp->m_stop = m;
503 		dir = BACKWARD;
504 	} else
505 		dir = FORWARD;
506 
507 	/*
508 	 * BACKWARD:
509 	 *	Delete and yank commands move to the end of the range.
510 	 *	Ignore others.
511 	 *
512 	 * FORWARD:
513 	 *	Delete and yank commands don't move.  Ignore others.
514 	 */
515 	vp->m_final = vp->m_start;
516 
517 	/*
518 	 * !!!
519 	 * Delta'd searches don't correct based on column positions.
520 	 */
521 	if (isdelta)
522 		return (0);
523 
524 	/*
525 	 * !!!
526 	 * Backward searches starting at column 0, and forward searches ending
527 	 * at column 0 are corrected to the last column of the previous line.
528 	 * Otherwise, adjust the starting/ending point to the character before
529 	 * the current one (this is safe because we know the search had to move
530 	 * to succeed).
531 	 *
532 	 * Searches become line mode operations if they start at the first
533 	 * nonblank and end at column 0 of another line.
534 	 */
535 	if (vp->m_start.lno < vp->m_stop.lno && vp->m_stop.cno == 0) {
536 		if (db_get(sp, --vp->m_stop.lno, DBG_FATAL, NULL, &len))
537 			return (1);
538 		vp->m_stop.cno = len ? len - 1 : 0;
539 		len = 0;
540 		if (nonblank(sp, vp->m_start.lno, &len))
541 			return (1);
542 		if (vp->m_start.cno <= len)
543 			F_SET(vp, VM_LMODE);
544 	} else
545 		--vp->m_stop.cno;
546 
547 	return (0);
548 }
549