xref: /freebsd/contrib/nvi/common/search.c (revision 4f29da19bd44f0e99f021510460a81bf754c21d2)
1 /*-
2  * Copyright (c) 1992, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  * Copyright (c) 1992, 1993, 1994, 1995, 1996
5  *	Keith Bostic.  All rights reserved.
6  *
7  * See the LICENSE file for redistribution information.
8  */
9 
10 #include "config.h"
11 
12 #ifndef lint
13 static const char sccsid[] = "@(#)search.c	10.25 (Berkeley) 6/30/96";
14 #endif /* not lint */
15 
16 #include <sys/types.h>
17 #include <sys/queue.h>
18 
19 #include <bitstring.h>
20 #include <ctype.h>
21 #include <errno.h>
22 #include <limits.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <unistd.h>
27 
28 #include "common.h"
29 
30 typedef enum { S_EMPTY, S_EOF, S_NOPREV, S_NOTFOUND, S_SOF, S_WRAP } smsg_t;
31 
32 static void	search_msg __P((SCR *, smsg_t));
33 static int	search_init __P((SCR *, dir_t, char *, size_t, char **, u_int));
34 
35 /*
36  * search_init --
37  *	Set up a search.
38  */
39 static int
40 search_init(sp, dir, ptrn, plen, epp, flags)
41 	SCR *sp;
42 	dir_t dir;
43 	char *ptrn, **epp;
44 	size_t plen;
45 	u_int flags;
46 {
47 	recno_t lno;
48 	int delim;
49 	char *p, *t;
50 
51 	/* If the file is empty, it's a fast search. */
52 	if (sp->lno <= 1) {
53 		if (db_last(sp, &lno))
54 			return (1);
55 		if (lno == 0) {
56 			if (LF_ISSET(SEARCH_MSG))
57 				search_msg(sp, S_EMPTY);
58 			return (1);
59 		}
60 	}
61 
62 	if (LF_ISSET(SEARCH_PARSE)) {		/* Parse the string. */
63 		/*
64 		 * Use the saved pattern if no pattern specified, or if only
65 		 * one or two delimiter characters specified.
66 		 *
67 		 * !!!
68 		 * Historically, only the pattern itself was saved, vi didn't
69 		 * preserve addressing or delta information.
70 		 */
71 		if (ptrn == NULL)
72 			goto prev;
73 		if (plen == 1) {
74 			if (epp != NULL)
75 				*epp = ptrn + 1;
76 			goto prev;
77 		}
78 		if (ptrn[0] == ptrn[1]) {
79 			if (epp != NULL)
80 				*epp = ptrn + 2;
81 
82 			/* Complain if we don't have a previous pattern. */
83 prev:			if (sp->re == NULL) {
84 				search_msg(sp, S_NOPREV);
85 				return (1);
86 			}
87 			/* Re-compile the search pattern if necessary. */
88 			if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp,
89 			    sp->re, sp->re_len, NULL, NULL, &sp->re_c,
90 			    RE_C_SEARCH |
91 			    (LF_ISSET(SEARCH_MSG) ? 0 : RE_C_SILENT)))
92 				return (1);
93 
94 			/* Set the search direction. */
95 			if (LF_ISSET(SEARCH_SET))
96 				sp->searchdir = dir;
97 			return (0);
98 		}
99 
100 		/*
101 		 * Set the delimiter, and move forward to the terminating
102 		 * delimiter, handling escaped delimiters.
103 		 *
104 		 * QUOTING NOTE:
105 		 * Only discard an escape character if it escapes a delimiter.
106 		 */
107 		for (delim = *ptrn, p = t = ++ptrn;; *t++ = *p++) {
108 			if (--plen == 0 || p[0] == delim) {
109 				if (plen != 0)
110 					++p;
111 				break;
112 			}
113 			if (plen > 1 && p[0] == '\\' && p[1] == delim) {
114 				++p;
115 				--plen;
116 			}
117 		}
118 		if (epp != NULL)
119 			*epp = p;
120 
121 		plen = t - ptrn;
122 	}
123 
124 	/* Compile the RE. */
125 	if (re_compile(sp, ptrn, plen, &sp->re, &sp->re_len, &sp->re_c,
126 	    RE_C_SEARCH |
127 	    (LF_ISSET(SEARCH_MSG) ? 0 : RE_C_SILENT) |
128 	    (LF_ISSET(SEARCH_TAG) ? RE_C_TAG : 0) |
129 	    (LF_ISSET(SEARCH_CSCOPE) ? RE_C_CSCOPE : 0)))
130 		return (1);
131 
132 	/* Set the search direction. */
133 	if (LF_ISSET(SEARCH_SET))
134 		sp->searchdir = dir;
135 
136 	return (0);
137 }
138 
139 /*
140  * f_search --
141  *	Do a forward search.
142  *
143  * PUBLIC: int f_search __P((SCR *,
144  * PUBLIC:    MARK *, MARK *, char *, size_t, char **, u_int));
145  */
146 int
147 f_search(sp, fm, rm, ptrn, plen, eptrn, flags)
148 	SCR *sp;
149 	MARK *fm, *rm;
150 	char *ptrn, **eptrn;
151 	size_t plen;
152 	u_int flags;
153 {
154 	busy_t btype;
155 	recno_t lno;
156 	regmatch_t match[1];
157 	size_t coff, len;
158 	int cnt, eval, rval, wrapped;
159 	char *l;
160 
161 	if (search_init(sp, FORWARD, ptrn, plen, eptrn, flags))
162 		return (1);
163 
164 	if (LF_ISSET(SEARCH_FILE)) {
165 		lno = 1;
166 		coff = 0;
167 	} else {
168 		if (db_get(sp, fm->lno, DBG_FATAL, &l, &len))
169 			return (1);
170 		lno = fm->lno;
171 
172 		/*
173 		 * If doing incremental search, start searching at the previous
174 		 * column, so that we search a minimal distance and still match
175 		 * special patterns, e.g., \< for beginning of a word.
176 		 *
177 		 * Otherwise, start searching immediately after the cursor.  If
178 		 * at the end of the line, start searching on the next line.
179 		 * This is incompatible (read bug fix) with the historic vi --
180 		 * searches for the '$' pattern never moved forward, and the
181 		 * "-t foo" didn't work if the 'f' was the first character in
182 		 * the file.
183 		 */
184 		if (LF_ISSET(SEARCH_INCR)) {
185 			if ((coff = fm->cno) != 0)
186 				--coff;
187 		} else if (fm->cno + 1 >= len) {
188 			coff = 0;
189 			lno = fm->lno + 1;
190 			if (db_get(sp, lno, 0, &l, &len)) {
191 				if (!O_ISSET(sp, O_WRAPSCAN)) {
192 					if (LF_ISSET(SEARCH_MSG))
193 						search_msg(sp, S_EOF);
194 					return (1);
195 				}
196 				lno = 1;
197 			}
198 		} else
199 			coff = fm->cno + 1;
200 	}
201 
202 	btype = BUSY_ON;
203 	for (cnt = INTERRUPT_CHECK, rval = 1, wrapped = 0;; ++lno, coff = 0) {
204 		if (cnt-- == 0) {
205 			if (INTERRUPTED(sp))
206 				break;
207 			if (LF_ISSET(SEARCH_MSG)) {
208 				search_busy(sp, btype);
209 				btype = BUSY_UPDATE;
210 			}
211 			cnt = INTERRUPT_CHECK;
212 		}
213 		if (wrapped && lno > fm->lno || db_get(sp, lno, 0, &l, &len)) {
214 			if (wrapped) {
215 				if (LF_ISSET(SEARCH_MSG))
216 					search_msg(sp, S_NOTFOUND);
217 				break;
218 			}
219 			if (!O_ISSET(sp, O_WRAPSCAN)) {
220 				if (LF_ISSET(SEARCH_MSG))
221 					search_msg(sp, S_EOF);
222 				break;
223 			}
224 			lno = 0;
225 			wrapped = 1;
226 			continue;
227 		}
228 
229 		/* If already at EOL, just keep going. */
230 		if (len != 0 && coff == len)
231 			continue;
232 
233 		/* Set the termination. */
234 		match[0].rm_so = coff;
235 		match[0].rm_eo = len;
236 
237 #if defined(DEBUG) && 0
238 		TRACE(sp, "F search: %lu from %u to %u\n",
239 		    lno, coff, len != 0 ? len - 1 : len);
240 #endif
241 		/* Search the line. */
242 		eval = regexec(&sp->re_c, l, 1, match,
243 		    (match[0].rm_so == 0 ? 0 : REG_NOTBOL) | REG_STARTEND);
244 		if (eval == REG_NOMATCH)
245 			continue;
246 		if (eval != 0) {
247 			if (LF_ISSET(SEARCH_MSG))
248 				re_error(sp, eval, &sp->re_c);
249 			else
250 				(void)sp->gp->scr_bell(sp);
251 			break;
252 		}
253 
254 		/* Warn if the search wrapped. */
255 		if (wrapped && LF_ISSET(SEARCH_WMSG))
256 			search_msg(sp, S_WRAP);
257 
258 #if defined(DEBUG) && 0
259 		TRACE(sp, "F search: %qu to %qu\n",
260 		    match[0].rm_so, match[0].rm_eo);
261 #endif
262 		rm->lno = lno;
263 		rm->cno = match[0].rm_so;
264 
265 		/*
266 		 * If a change command, it's possible to move beyond the end
267 		 * of a line.  Historic vi generally got this wrong (e.g. try
268 		 * "c?$<cr>").  Not all that sure this gets it right, there
269 		 * are lots of strange cases.
270 		 */
271 		if (!LF_ISSET(SEARCH_EOL) && rm->cno >= len)
272 			rm->cno = len != 0 ? len - 1 : 0;
273 
274 		rval = 0;
275 		break;
276 	}
277 
278 	if (LF_ISSET(SEARCH_MSG))
279 		search_busy(sp, BUSY_OFF);
280 	return (rval);
281 }
282 
283 /*
284  * b_search --
285  *	Do a backward search.
286  *
287  * PUBLIC: int b_search __P((SCR *,
288  * PUBLIC:    MARK *, MARK *, char *, size_t, char **, u_int));
289  */
290 int
291 b_search(sp, fm, rm, ptrn, plen, eptrn, flags)
292 	SCR *sp;
293 	MARK *fm, *rm;
294 	char *ptrn, **eptrn;
295 	size_t plen;
296 	u_int flags;
297 {
298 	busy_t btype;
299 	recno_t lno;
300 	regmatch_t match[1];
301 	size_t coff, last, len;
302 	int cnt, eval, rval, wrapped;
303 	char *l;
304 
305 	if (search_init(sp, BACKWARD, ptrn, plen, eptrn, flags))
306 		return (1);
307 
308 	/*
309 	 * If doing incremental search, set the "starting" position past the
310 	 * current column, so that we search a minimal distance and still
311 	 * match special patterns, e.g., \> for the end of a word.  This is
312 	 * safe when the cursor is at the end of a line because we only use
313 	 * it for comparison with the location of the match.
314 	 *
315 	 * Otherwise, start searching immediately before the cursor.  If in
316 	 * the first column, start search on the previous line.
317 	 */
318 	if (LF_ISSET(SEARCH_INCR)) {
319 		lno = fm->lno;
320 		coff = fm->cno + 1;
321 	} else {
322 		if (fm->cno == 0) {
323 			if (fm->lno == 1 && !O_ISSET(sp, O_WRAPSCAN)) {
324 				if (LF_ISSET(SEARCH_MSG))
325 					search_msg(sp, S_SOF);
326 				return (1);
327 			}
328 			lno = fm->lno - 1;
329 		} else
330 			lno = fm->lno;
331 		coff = fm->cno;
332 	}
333 
334 	btype = BUSY_ON;
335 	for (cnt = INTERRUPT_CHECK, rval = 1, wrapped = 0;; --lno, coff = 0) {
336 		if (cnt-- == 0) {
337 			if (INTERRUPTED(sp))
338 				break;
339 			if (LF_ISSET(SEARCH_MSG)) {
340 				search_busy(sp, btype);
341 				btype = BUSY_UPDATE;
342 			}
343 			cnt = INTERRUPT_CHECK;
344 		}
345 		if (wrapped && lno < fm->lno || lno == 0) {
346 			if (wrapped) {
347 				if (LF_ISSET(SEARCH_MSG))
348 					search_msg(sp, S_NOTFOUND);
349 				break;
350 			}
351 			if (!O_ISSET(sp, O_WRAPSCAN)) {
352 				if (LF_ISSET(SEARCH_MSG))
353 					search_msg(sp, S_SOF);
354 				break;
355 			}
356 			if (db_last(sp, &lno))
357 				break;
358 			if (lno == 0) {
359 				if (LF_ISSET(SEARCH_MSG))
360 					search_msg(sp, S_EMPTY);
361 				break;
362 			}
363 			++lno;
364 			wrapped = 1;
365 			continue;
366 		}
367 
368 		if (db_get(sp, lno, 0, &l, &len))
369 			break;
370 
371 		/* Set the termination. */
372 		match[0].rm_so = 0;
373 		match[0].rm_eo = len;
374 
375 #if defined(DEBUG) && 0
376 		TRACE(sp, "B search: %lu from 0 to %qu\n", lno, match[0].rm_eo);
377 #endif
378 		/* Search the line. */
379 		eval = regexec(&sp->re_c, l, 1, match,
380 		    (match[0].rm_eo == len ? 0 : REG_NOTEOL) | REG_STARTEND);
381 		if (eval == REG_NOMATCH)
382 			continue;
383 		if (eval != 0) {
384 			if (LF_ISSET(SEARCH_MSG))
385 				re_error(sp, eval, &sp->re_c);
386 			else
387 				(void)sp->gp->scr_bell(sp);
388 			break;
389 		}
390 
391 		/* Check for a match starting past the cursor. */
392 		if (coff != 0 && match[0].rm_so >= coff)
393 			continue;
394 
395 		/* Warn if the search wrapped. */
396 		if (wrapped && LF_ISSET(SEARCH_WMSG))
397 			search_msg(sp, S_WRAP);
398 
399 #if defined(DEBUG) && 0
400 		TRACE(sp, "B found: %qu to %qu\n",
401 		    match[0].rm_so, match[0].rm_eo);
402 #endif
403 		/*
404 		 * We now have the first match on the line.  Step through the
405 		 * line character by character until find the last acceptable
406 		 * match.  This is painful, we need a better interface to regex
407 		 * to make this work.
408 		 */
409 		for (;;) {
410 			last = match[0].rm_so++;
411 			if (match[0].rm_so >= len)
412 				break;
413 			match[0].rm_eo = len;
414 			eval = regexec(&sp->re_c, l, 1, match,
415 			    (match[0].rm_so == 0 ? 0 : REG_NOTBOL) |
416 			    REG_STARTEND);
417 			if (eval == REG_NOMATCH)
418 				break;
419 			if (eval != 0) {
420 				if (LF_ISSET(SEARCH_MSG))
421 					re_error(sp, eval, &sp->re_c);
422 				else
423 					(void)sp->gp->scr_bell(sp);
424 				goto err;
425 			}
426 			if (coff && match[0].rm_so >= coff)
427 				break;
428 		}
429 		rm->lno = lno;
430 
431 		/* See comment in f_search(). */
432 		if (!LF_ISSET(SEARCH_EOL) && last >= len)
433 			rm->cno = len != 0 ? len - 1 : 0;
434 		else
435 			rm->cno = last;
436 		rval = 0;
437 		break;
438 	}
439 
440 err:	if (LF_ISSET(SEARCH_MSG))
441 		search_busy(sp, BUSY_OFF);
442 	return (rval);
443 }
444 
445 /*
446  * search_msg --
447  *	Display one of the search messages.
448  */
449 static void
450 search_msg(sp, msg)
451 	SCR *sp;
452 	smsg_t msg;
453 {
454 	switch (msg) {
455 	case S_EMPTY:
456 		msgq(sp, M_ERR, "072|File empty; nothing to search");
457 		break;
458 	case S_EOF:
459 		msgq(sp, M_ERR,
460 		    "073|Reached end-of-file without finding the pattern");
461 		break;
462 	case S_NOPREV:
463 		msgq(sp, M_ERR, "074|No previous search pattern");
464 		break;
465 	case S_NOTFOUND:
466 		msgq(sp, M_ERR, "075|Pattern not found");
467 		break;
468 	case S_SOF:
469 		msgq(sp, M_ERR,
470 		    "076|Reached top-of-file without finding the pattern");
471 		break;
472 	case S_WRAP:
473 		msgq(sp, M_ERR, "077|Search wrapped");
474 		break;
475 	default:
476 		abort();
477 	}
478 }
479 
480 /*
481  * search_busy --
482  *	Put up the busy searching message.
483  *
484  * PUBLIC: void search_busy __P((SCR *, busy_t));
485  */
486 void
487 search_busy(sp, btype)
488 	SCR *sp;
489 	busy_t btype;
490 {
491 	sp->gp->scr_busy(sp, "078|Searching...", btype);
492 }
493