xref: /freebsd/contrib/nvi/common/search.c (revision d940bfec8c329dd82d8d54efebd81c8aa420503b)
1 /*-
2  * Copyright (c) 1992, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  * Copyright (c) 1992, 1993, 1994, 1995, 1996
5  *	Keith Bostic.  All rights reserved.
6  *
7  * See the LICENSE file for redistribution information.
8  */
9 
10 #include "config.h"
11 
12 #ifndef lint
13 static const char sccsid[] = "$Id: search.c,v 10.26 2011/07/04 20:16:26 zy Exp $";
14 #endif /* not lint */
15 
16 #include <sys/types.h>
17 #include <sys/queue.h>
18 #include <sys/time.h>
19 
20 #include <bitstring.h>
21 #include <ctype.h>
22 #include <errno.h>
23 #include <limits.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <unistd.h>
28 
29 #include "common.h"
30 
31 typedef enum { S_EMPTY, S_EOF, S_NOPREV, S_NOTFOUND, S_SOF, S_WRAP } smsg_t;
32 
33 static void	search_msg __P((SCR *, smsg_t));
34 static int	search_init __P((SCR *, dir_t, CHAR_T *, size_t, CHAR_T **, u_int));
35 
36 /*
37  * search_init --
38  *	Set up a search.
39  */
40 static int
41 search_init(
42 	SCR *sp,
43 	dir_t dir,
44 	CHAR_T *ptrn,
45 	size_t plen,
46 	CHAR_T **epp,
47 	u_int flags)
48 {
49 	recno_t lno;
50 	int delim;
51 	CHAR_T *p, *t;
52 
53 	/* If the file is empty, it's a fast search. */
54 	if (sp->lno <= 1) {
55 		if (db_last(sp, &lno))
56 			return (1);
57 		if (lno == 0) {
58 			if (LF_ISSET(SEARCH_MSG))
59 				search_msg(sp, S_EMPTY);
60 			return (1);
61 		}
62 	}
63 
64 	if (LF_ISSET(SEARCH_PARSE)) {		/* Parse the string. */
65 		/*
66 		 * Use the saved pattern if no pattern specified, or if only
67 		 * one or two delimiter characters specified.
68 		 *
69 		 * !!!
70 		 * Historically, only the pattern itself was saved, vi didn't
71 		 * preserve addressing or delta information.
72 		 */
73 		if (ptrn == NULL)
74 			goto prev;
75 		if (plen == 1) {
76 			if (epp != NULL)
77 				*epp = ptrn + 1;
78 			goto prev;
79 		}
80 		if (ptrn[0] == ptrn[1]) {
81 			if (epp != NULL)
82 				*epp = ptrn + 2;
83 
84 			/* Complain if we don't have a previous pattern. */
85 prev:			if (sp->re == NULL) {
86 				search_msg(sp, S_NOPREV);
87 				return (1);
88 			}
89 			/* Re-compile the search pattern if necessary. */
90 			if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp,
91 			    sp->re, sp->re_len, NULL, NULL, &sp->re_c,
92 			    RE_C_SEARCH |
93 			    (LF_ISSET(SEARCH_MSG) ? 0 : RE_C_SILENT)))
94 				return (1);
95 
96 			/* Set the search direction. */
97 			if (LF_ISSET(SEARCH_SET))
98 				sp->searchdir = dir;
99 			return (0);
100 		}
101 
102 		/*
103 		 * Set the delimiter, and move forward to the terminating
104 		 * delimiter, handling escaped delimiters.
105 		 *
106 		 * QUOTING NOTE:
107 		 * Only discard an escape character if it escapes a delimiter.
108 		 */
109 		for (delim = *ptrn, p = t = ++ptrn;; *t++ = *p++) {
110 			if (--plen == 0 || p[0] == delim) {
111 				if (plen != 0)
112 					++p;
113 				break;
114 			}
115 			if (plen > 1 && p[0] == '\\' && p[1] == delim) {
116 				++p;
117 				--plen;
118 			}
119 		}
120 		if (epp != NULL)
121 			*epp = p;
122 
123 		plen = t - ptrn;
124 	}
125 
126 	/* Compile the RE. */
127 	if (re_compile(sp, ptrn, plen, &sp->re, &sp->re_len, &sp->re_c,
128 	    RE_C_SEARCH |
129 	    (LF_ISSET(SEARCH_MSG) ? 0 : RE_C_SILENT) |
130 	    (LF_ISSET(SEARCH_TAG) ? RE_C_TAG : 0) |
131 	    (LF_ISSET(SEARCH_CSCOPE) ? RE_C_CSCOPE : 0)))
132 		return (1);
133 
134 	/* Set the search direction. */
135 	if (LF_ISSET(SEARCH_SET))
136 		sp->searchdir = dir;
137 
138 	return (0);
139 }
140 
141 /*
142  * f_search --
143  *	Do a forward search.
144  *
145  * PUBLIC: int f_search __P((SCR *,
146  * PUBLIC:    MARK *, MARK *, CHAR_T *, size_t, CHAR_T **, u_int));
147  */
148 int
149 f_search(
150 	SCR *sp,
151 	MARK *fm,
152 	MARK *rm,
153 	CHAR_T *ptrn,
154 	size_t plen,
155 	CHAR_T **eptrn,
156 	u_int flags)
157 {
158 	busy_t btype;
159 	recno_t lno;
160 	regmatch_t match[1];
161 	size_t coff, len;
162 	int cnt, eval, rval, wrapped;
163 	CHAR_T *l;
164 
165 	if (search_init(sp, FORWARD, ptrn, plen, eptrn, flags))
166 		return (1);
167 
168 	if (LF_ISSET(SEARCH_FILE)) {
169 		lno = 1;
170 		coff = 0;
171 	} else {
172 		if (db_get(sp, fm->lno, DBG_FATAL, &l, &len))
173 			return (1);
174 		lno = fm->lno;
175 
176 		/*
177 		 * If doing incremental search, start searching at the previous
178 		 * column, so that we search a minimal distance and still match
179 		 * special patterns, e.g., \< for beginning of a word.
180 		 *
181 		 * Otherwise, start searching immediately after the cursor.  If
182 		 * at the end of the line, start searching on the next line.
183 		 * This is incompatible (read bug fix) with the historic vi --
184 		 * searches for the '$' pattern never moved forward, and the
185 		 * "-t foo" didn't work if the 'f' was the first character in
186 		 * the file.
187 		 */
188 		if (LF_ISSET(SEARCH_INCR)) {
189 			if ((coff = fm->cno) != 0)
190 				--coff;
191 		} else if (fm->cno + 1 >= len) {
192 			coff = 0;
193 			lno = fm->lno + 1;
194 			if (db_get(sp, lno, 0, &l, &len)) {
195 				if (!O_ISSET(sp, O_WRAPSCAN)) {
196 					if (LF_ISSET(SEARCH_MSG))
197 						search_msg(sp, S_EOF);
198 					return (1);
199 				}
200 				lno = 1;
201 			}
202 		} else
203 			coff = fm->cno + 1;
204 	}
205 
206 	btype = BUSY_ON;
207 	for (cnt = INTERRUPT_CHECK, rval = 1, wrapped = 0;; ++lno, coff = 0) {
208 		if (cnt-- == 0) {
209 			if (INTERRUPTED(sp))
210 				break;
211 			if (LF_ISSET(SEARCH_MSG)) {
212 				search_busy(sp, btype);
213 				btype = BUSY_UPDATE;
214 			}
215 			cnt = INTERRUPT_CHECK;
216 		}
217 		if ((wrapped && lno > fm->lno) || db_get(sp, lno, 0, &l, &len)) {
218 			if (wrapped) {
219 				if (LF_ISSET(SEARCH_MSG))
220 					search_msg(sp, S_NOTFOUND);
221 				break;
222 			}
223 			if (!O_ISSET(sp, O_WRAPSCAN)) {
224 				if (LF_ISSET(SEARCH_MSG))
225 					search_msg(sp, S_EOF);
226 				break;
227 			}
228 			lno = 0;
229 			wrapped = 1;
230 			continue;
231 		}
232 
233 		/* If already at EOL, just keep going. */
234 		if (len != 0 && coff == len)
235 			continue;
236 
237 		/* Set the termination. */
238 		match[0].rm_so = coff;
239 		match[0].rm_eo = len;
240 
241 #if defined(DEBUG) && 0
242 		TRACE(sp, "F search: %lu from %u to %u\n",
243 		    lno, coff, len != 0 ? len - 1 : len);
244 #endif
245 		/* Search the line. */
246 		eval = regexec(&sp->re_c, l, 1, match,
247 		    (match[0].rm_so == 0 ? 0 : REG_NOTBOL) | REG_STARTEND);
248 		if (eval == REG_NOMATCH)
249 			continue;
250 		if (eval != 0) {
251 			if (LF_ISSET(SEARCH_MSG))
252 				re_error(sp, eval, &sp->re_c);
253 			else
254 				(void)sp->gp->scr_bell(sp);
255 			break;
256 		}
257 
258 		/* Warn if the search wrapped. */
259 		if (wrapped && LF_ISSET(SEARCH_WMSG))
260 			search_msg(sp, S_WRAP);
261 
262 #if defined(DEBUG) && 0
263 		TRACE(sp, "F search: %qu to %qu\n",
264 		    match[0].rm_so, match[0].rm_eo);
265 #endif
266 		rm->lno = lno;
267 		rm->cno = match[0].rm_so;
268 
269 		/*
270 		 * If a change command, it's possible to move beyond the end
271 		 * of a line.  Historic vi generally got this wrong (e.g. try
272 		 * "c?$<cr>").  Not all that sure this gets it right, there
273 		 * are lots of strange cases.
274 		 */
275 		if (!LF_ISSET(SEARCH_EOL) && rm->cno >= len)
276 			rm->cno = len != 0 ? len - 1 : 0;
277 
278 		rval = 0;
279 		break;
280 	}
281 
282 	if (LF_ISSET(SEARCH_MSG))
283 		search_busy(sp, BUSY_OFF);
284 	return (rval);
285 }
286 
287 /*
288  * b_search --
289  *	Do a backward search.
290  *
291  * PUBLIC: int b_search __P((SCR *,
292  * PUBLIC:    MARK *, MARK *, CHAR_T *, size_t, CHAR_T **, u_int));
293  */
294 int
295 b_search(
296 	SCR *sp,
297 	MARK *fm,
298 	MARK *rm,
299 	CHAR_T *ptrn,
300 	size_t plen,
301 	CHAR_T **eptrn,
302 	u_int flags)
303 {
304 	busy_t btype;
305 	recno_t lno;
306 	regmatch_t match[1];
307 	size_t coff, last, len;
308 	int cnt, eval, rval, wrapped;
309 	CHAR_T *l;
310 
311 	if (search_init(sp, BACKWARD, ptrn, plen, eptrn, flags))
312 		return (1);
313 
314 	/*
315 	 * If doing incremental search, set the "starting" position past the
316 	 * current column, so that we search a minimal distance and still
317 	 * match special patterns, e.g., \> for the end of a word.  This is
318 	 * safe when the cursor is at the end of a line because we only use
319 	 * it for comparison with the location of the match.
320 	 *
321 	 * Otherwise, start searching immediately before the cursor.  If in
322 	 * the first column, start search on the previous line.
323 	 */
324 	if (LF_ISSET(SEARCH_INCR)) {
325 		lno = fm->lno;
326 		coff = fm->cno + 1;
327 	} else {
328 		if (fm->cno == 0) {
329 			if (fm->lno == 1 && !O_ISSET(sp, O_WRAPSCAN)) {
330 				if (LF_ISSET(SEARCH_MSG))
331 					search_msg(sp, S_SOF);
332 				return (1);
333 			}
334 			lno = fm->lno - 1;
335 		} else
336 			lno = fm->lno;
337 		coff = fm->cno;
338 	}
339 
340 	btype = BUSY_ON;
341 	for (cnt = INTERRUPT_CHECK, rval = 1, wrapped = 0;; --lno, coff = 0) {
342 		if (cnt-- == 0) {
343 			if (INTERRUPTED(sp))
344 				break;
345 			if (LF_ISSET(SEARCH_MSG)) {
346 				search_busy(sp, btype);
347 				btype = BUSY_UPDATE;
348 			}
349 			cnt = INTERRUPT_CHECK;
350 		}
351 		if ((wrapped && lno < fm->lno) || lno == 0) {
352 			if (wrapped) {
353 				if (LF_ISSET(SEARCH_MSG))
354 					search_msg(sp, S_NOTFOUND);
355 				break;
356 			}
357 			if (!O_ISSET(sp, O_WRAPSCAN)) {
358 				if (LF_ISSET(SEARCH_MSG))
359 					search_msg(sp, S_SOF);
360 				break;
361 			}
362 			if (db_last(sp, &lno))
363 				break;
364 			if (lno == 0) {
365 				if (LF_ISSET(SEARCH_MSG))
366 					search_msg(sp, S_EMPTY);
367 				break;
368 			}
369 			++lno;
370 			wrapped = 1;
371 			continue;
372 		}
373 
374 		if (db_get(sp, lno, 0, &l, &len))
375 			break;
376 
377 		/* Set the termination. */
378 		match[0].rm_so = 0;
379 		match[0].rm_eo = len;
380 
381 #if defined(DEBUG) && 0
382 		TRACE(sp, "B search: %lu from 0 to %qu\n", lno, match[0].rm_eo);
383 #endif
384 		/* Search the line. */
385 		eval = regexec(&sp->re_c, l, 1, match,
386 		    (match[0].rm_eo == len ? 0 : REG_NOTEOL) | REG_STARTEND);
387 		if (eval == REG_NOMATCH)
388 			continue;
389 		if (eval != 0) {
390 			if (LF_ISSET(SEARCH_MSG))
391 				re_error(sp, eval, &sp->re_c);
392 			else
393 				(void)sp->gp->scr_bell(sp);
394 			break;
395 		}
396 
397 		/* Check for a match starting past the cursor. */
398 		if (coff != 0 && match[0].rm_so >= coff)
399 			continue;
400 
401 		/* Warn if the search wrapped. */
402 		if (wrapped && LF_ISSET(SEARCH_WMSG))
403 			search_msg(sp, S_WRAP);
404 
405 #if defined(DEBUG) && 0
406 		TRACE(sp, "B found: %qu to %qu\n",
407 		    match[0].rm_so, match[0].rm_eo);
408 #endif
409 		/*
410 		 * We now have the first match on the line.  Step through the
411 		 * line character by character until find the last acceptable
412 		 * match.  This is painful, we need a better interface to regex
413 		 * to make this work.
414 		 */
415 		for (;;) {
416 			last = match[0].rm_so++;
417 			if (match[0].rm_so >= len)
418 				break;
419 			match[0].rm_eo = len;
420 			eval = regexec(&sp->re_c, l, 1, match,
421 			    (match[0].rm_so == 0 ? 0 : REG_NOTBOL) |
422 			    REG_STARTEND);
423 			if (eval == REG_NOMATCH)
424 				break;
425 			if (eval != 0) {
426 				if (LF_ISSET(SEARCH_MSG))
427 					re_error(sp, eval, &sp->re_c);
428 				else
429 					(void)sp->gp->scr_bell(sp);
430 				goto err;
431 			}
432 			if (coff && match[0].rm_so >= coff)
433 				break;
434 		}
435 		rm->lno = lno;
436 
437 		/* See comment in f_search(). */
438 		if (!LF_ISSET(SEARCH_EOL) && last >= len)
439 			rm->cno = len != 0 ? len - 1 : 0;
440 		else
441 			rm->cno = last;
442 		rval = 0;
443 		break;
444 	}
445 
446 err:	if (LF_ISSET(SEARCH_MSG))
447 		search_busy(sp, BUSY_OFF);
448 	return (rval);
449 }
450 
451 /*
452  * search_msg --
453  *	Display one of the search messages.
454  */
455 static void
456 search_msg(
457 	SCR *sp,
458 	smsg_t msg)
459 {
460 	switch (msg) {
461 	case S_EMPTY:
462 		msgq(sp, M_ERR, "072|File empty; nothing to search");
463 		break;
464 	case S_EOF:
465 		msgq(sp, M_ERR,
466 		    "073|Reached end-of-file without finding the pattern");
467 		break;
468 	case S_NOPREV:
469 		msgq(sp, M_ERR, "074|No previous search pattern");
470 		break;
471 	case S_NOTFOUND:
472 		msgq(sp, M_ERR, "075|Pattern not found");
473 		break;
474 	case S_SOF:
475 		msgq(sp, M_ERR,
476 		    "076|Reached top-of-file without finding the pattern");
477 		break;
478 	case S_WRAP:
479 		msgq(sp, M_ERR, "077|Search wrapped");
480 		break;
481 	default:
482 		abort();
483 	}
484 }
485 
486 /*
487  * search_busy --
488  *	Put up the busy searching message.
489  *
490  * PUBLIC: void search_busy __P((SCR *, busy_t));
491  */
492 void
493 search_busy(
494 	SCR *sp,
495 	busy_t btype)
496 {
497 	sp->gp->scr_busy(sp, "078|Searching...", btype);
498 }
499