xref: /freebsd/contrib/nvi/common/search.c (revision 5ca8e32633c4ffbbcd6762e5888b6a4ba0708c6c)
1 /*-
2  * Copyright (c) 1992, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  * Copyright (c) 1992, 1993, 1994, 1995, 1996
5  *	Keith Bostic.  All rights reserved.
6  *
7  * See the LICENSE file for redistribution information.
8  */
9 
10 #include "config.h"
11 
12 #include <sys/types.h>
13 #include <sys/queue.h>
14 #include <sys/time.h>
15 
16 #include <bitstring.h>
17 #include <ctype.h>
18 #include <errno.h>
19 #include <limits.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <unistd.h>
24 
25 #include "common.h"
26 
27 typedef enum { S_EMPTY, S_EOF, S_NOPREV, S_NOTFOUND, S_SOF, S_WRAP } smsg_t;
28 
29 static void	search_msg(SCR *, smsg_t);
30 static int	search_init(SCR *, dir_t, CHAR_T *, size_t, CHAR_T **, u_int);
31 
32 /*
33  * search_init --
34  *	Set up a search.
35  */
36 static int
37 search_init(SCR *sp, dir_t dir, CHAR_T *ptrn, size_t plen, CHAR_T **epp,
38     u_int flags)
39 {
40 	recno_t lno;
41 	int delim;
42 	CHAR_T *p, *t;
43 
44 	/* If the file is empty, it's a fast search. */
45 	if (sp->lno <= 1) {
46 		if (db_last(sp, &lno))
47 			return (1);
48 		if (lno == 0) {
49 			if (LF_ISSET(SEARCH_MSG))
50 				search_msg(sp, S_EMPTY);
51 			return (1);
52 		}
53 	}
54 
55 	if (LF_ISSET(SEARCH_PARSE)) {		/* Parse the string. */
56 		/*
57 		 * Use the saved pattern if no pattern specified, or if only
58 		 * one or two delimiter characters specified.
59 		 *
60 		 * !!!
61 		 * Historically, only the pattern itself was saved, vi didn't
62 		 * preserve addressing or delta information.
63 		 */
64 		if (ptrn == NULL)
65 			goto prev;
66 		if (plen == 1) {
67 			if (epp != NULL)
68 				*epp = ptrn + 1;
69 			goto prev;
70 		}
71 		if (ptrn[0] == ptrn[1]) {
72 			if (epp != NULL)
73 				*epp = ptrn + 2;
74 
75 			/* Complain if we don't have a previous pattern. */
76 prev:			if (sp->re == NULL) {
77 				search_msg(sp, S_NOPREV);
78 				return (1);
79 			}
80 			/* Re-compile the search pattern if necessary. */
81 			if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp,
82 			    sp->re, sp->re_len, NULL, NULL, &sp->re_c,
83 			    RE_C_SEARCH |
84 			    (LF_ISSET(SEARCH_MSG) ? 0 : RE_C_SILENT)))
85 				return (1);
86 
87 			/* Set the search direction. */
88 			if (LF_ISSET(SEARCH_SET))
89 				sp->searchdir = dir;
90 			return (0);
91 		}
92 
93 		/*
94 		 * Set the delimiter, and move forward to the terminating
95 		 * delimiter, handling escaped delimiters.
96 		 *
97 		 * QUOTING NOTE:
98 		 * Only discard an escape character if it escapes a delimiter.
99 		 */
100 		for (delim = *ptrn, p = t = ++ptrn;; *t++ = *p++) {
101 			if (--plen == 0 || p[0] == delim) {
102 				if (plen != 0)
103 					++p;
104 				break;
105 			}
106 			if (plen > 1 && p[0] == '\\') {
107 				if (p[1] == delim) {
108 					++p;
109 					--plen;
110 				} else if ( p[1] == '\\') {
111 					*t++ = *p++;
112 					--plen;
113 				}
114 			}
115 		}
116 		if (epp != NULL)
117 			*epp = p;
118 
119 		plen = t - ptrn;
120 	}
121 
122 	/* Compile the RE. */
123 	if (re_compile(sp, ptrn, plen, &sp->re, &sp->re_len, &sp->re_c,
124 	    RE_C_SEARCH |
125 	    (LF_ISSET(SEARCH_MSG) ? 0 : RE_C_SILENT) |
126 	    (LF_ISSET(SEARCH_TAG) ? RE_C_TAG : 0) |
127 	    (LF_ISSET(SEARCH_CSCOPE) ? RE_C_CSCOPE : 0)))
128 		return (1);
129 
130 	/* Set the search direction. */
131 	if (LF_ISSET(SEARCH_SET))
132 		sp->searchdir = dir;
133 
134 	return (0);
135 }
136 
137 /*
138  * f_search --
139  *	Do a forward search.
140  *
141  * PUBLIC: int f_search(SCR *,
142  * PUBLIC:    MARK *, MARK *, CHAR_T *, size_t, CHAR_T **, u_int);
143  */
144 int
145 f_search(SCR *sp, MARK *fm, MARK *rm, CHAR_T *ptrn, size_t plen,
146     CHAR_T **eptrn, u_int flags)
147 {
148 	busy_t btype;
149 	recno_t lno;
150 	regmatch_t match[1];
151 	size_t coff, len;
152 	int cnt, eval, rval, wrapped = 0;
153 	CHAR_T *l;
154 
155 	if (search_init(sp, FORWARD, ptrn, plen, eptrn, flags))
156 		return (1);
157 
158 	if (LF_ISSET(SEARCH_FILE)) {
159 		lno = 1;
160 		coff = 0;
161 	} else {
162 		if (db_get(sp, fm->lno, DBG_FATAL, &l, &len))
163 			return (1);
164 		lno = fm->lno;
165 
166 		/*
167 		 * If doing incremental search, start searching at the previous
168 		 * column, so that we search a minimal distance and still match
169 		 * special patterns, e.g., \< for beginning of a word.
170 		 *
171 		 * Otherwise, start searching immediately after the cursor.  If
172 		 * at the end of the line, start searching on the next line.
173 		 * This is incompatible (read bug fix) with the historic vi --
174 		 * searches for the '$' pattern never moved forward, and the
175 		 * "-t foo" didn't work if the 'f' was the first character in
176 		 * the file.
177 		 */
178 		if (LF_ISSET(SEARCH_INCR)) {
179 			if ((coff = fm->cno) != 0)
180 				--coff;
181 		} else if (fm->cno + 1 >= len) {
182 			coff = 0;
183 			lno = fm->lno + 1;
184 			if (db_get(sp, lno, 0, &l, &len)) {
185 				if (!O_ISSET(sp, O_WRAPSCAN)) {
186 					if (LF_ISSET(SEARCH_MSG))
187 						search_msg(sp, S_EOF);
188 					return (1);
189 				}
190 				lno = 1;
191 				wrapped = 1;
192 			}
193 		} else
194 			coff = fm->cno + 1;
195 	}
196 
197 	btype = BUSY_ON;
198 	for (cnt = INTERRUPT_CHECK, rval = 1;; ++lno, coff = 0) {
199 		if (cnt-- == 0) {
200 			if (INTERRUPTED(sp))
201 				break;
202 			if (LF_ISSET(SEARCH_MSG)) {
203 				search_busy(sp, btype);
204 				btype = BUSY_UPDATE;
205 			}
206 			cnt = INTERRUPT_CHECK;
207 		}
208 		if ((wrapped && lno > fm->lno) || db_get(sp, lno, 0, &l, &len)) {
209 			if (wrapped) {
210 				if (LF_ISSET(SEARCH_MSG))
211 					search_msg(sp, S_NOTFOUND);
212 				break;
213 			}
214 			if (!O_ISSET(sp, O_WRAPSCAN)) {
215 				if (LF_ISSET(SEARCH_MSG))
216 					search_msg(sp, S_EOF);
217 				break;
218 			}
219 			lno = 0;
220 			wrapped = 1;
221 			continue;
222 		}
223 
224 		/* If already at EOL, just keep going. */
225 		if (len != 0 && coff == len)
226 			continue;
227 
228 		/* Set the termination. */
229 		match[0].rm_so = coff;
230 		match[0].rm_eo = len;
231 
232 #if defined(DEBUG) && 0
233 		TRACE(sp, "F search: %lu from %u to %u\n",
234 		    lno, coff, len != 0 ? len - 1 : len);
235 #endif
236 		/* Search the line. */
237 		eval = regexec(&sp->re_c, l, 1, match,
238 		    (match[0].rm_so == 0 ? 0 : REG_NOTBOL) | REG_STARTEND);
239 		if (eval == REG_NOMATCH)
240 			continue;
241 		if (eval != 0) {
242 			if (LF_ISSET(SEARCH_MSG))
243 				re_error(sp, eval, &sp->re_c);
244 			else
245 				(void)sp->gp->scr_bell(sp);
246 			break;
247 		}
248 
249 		/* Warn if the search wrapped. */
250 		if (wrapped && LF_ISSET(SEARCH_WMSG))
251 			search_msg(sp, S_WRAP);
252 
253 #if defined(DEBUG) && 0
254 		TRACE(sp, "F search: %qu to %qu\n",
255 		    match[0].rm_so, match[0].rm_eo);
256 #endif
257 		rm->lno = lno;
258 		rm->cno = match[0].rm_so;
259 
260 		/*
261 		 * If a change command, it's possible to move beyond the end
262 		 * of a line.  Historic vi generally got this wrong (e.g. try
263 		 * "c?$<cr>").  Not all that sure this gets it right, there
264 		 * are lots of strange cases.
265 		 */
266 		if (!LF_ISSET(SEARCH_EOL) && rm->cno >= len)
267 			rm->cno = len != 0 ? len - 1 : 0;
268 
269 		rval = 0;
270 		break;
271 	}
272 
273 	if (LF_ISSET(SEARCH_MSG))
274 		search_busy(sp, BUSY_OFF);
275 	return (rval);
276 }
277 
278 /*
279  * b_search --
280  *	Do a backward search.
281  *
282  * PUBLIC: int b_search(SCR *,
283  * PUBLIC:    MARK *, MARK *, CHAR_T *, size_t, CHAR_T **, u_int);
284  */
285 int
286 b_search(SCR *sp, MARK *fm, MARK *rm, CHAR_T *ptrn, size_t plen,
287     CHAR_T **eptrn, u_int flags)
288 {
289 	busy_t btype;
290 	recno_t lno;
291 	regmatch_t match[1];
292 	size_t coff, last, len;
293 	int cnt, eval, rval, wrapped;
294 	CHAR_T *l;
295 
296 	if (search_init(sp, BACKWARD, ptrn, plen, eptrn, flags))
297 		return (1);
298 
299 	/*
300 	 * If doing incremental search, set the "starting" position past the
301 	 * current column, so that we search a minimal distance and still
302 	 * match special patterns, e.g., \> for the end of a word.  This is
303 	 * safe when the cursor is at the end of a line because we only use
304 	 * it for comparison with the location of the match.
305 	 *
306 	 * Otherwise, start searching immediately before the cursor.  If in
307 	 * the first column, start search on the previous line.
308 	 */
309 	if (LF_ISSET(SEARCH_INCR)) {
310 		lno = fm->lno;
311 		coff = fm->cno + 1;
312 	} else {
313 		if (fm->cno == 0) {
314 			if (fm->lno == 1 && !O_ISSET(sp, O_WRAPSCAN)) {
315 				if (LF_ISSET(SEARCH_MSG))
316 					search_msg(sp, S_SOF);
317 				return (1);
318 			}
319 			lno = fm->lno - 1;
320 		} else
321 			lno = fm->lno;
322 		coff = fm->cno;
323 	}
324 
325 	btype = BUSY_ON;
326 	for (cnt = INTERRUPT_CHECK, rval = 1, wrapped = 0;; --lno, coff = 0) {
327 		if (cnt-- == 0) {
328 			if (INTERRUPTED(sp))
329 				break;
330 			if (LF_ISSET(SEARCH_MSG)) {
331 				search_busy(sp, btype);
332 				btype = BUSY_UPDATE;
333 			}
334 			cnt = INTERRUPT_CHECK;
335 		}
336 		if ((wrapped && lno < fm->lno) || lno == 0) {
337 			if (wrapped) {
338 				if (LF_ISSET(SEARCH_MSG))
339 					search_msg(sp, S_NOTFOUND);
340 				break;
341 			}
342 			if (!O_ISSET(sp, O_WRAPSCAN)) {
343 				if (LF_ISSET(SEARCH_MSG))
344 					search_msg(sp, S_SOF);
345 				break;
346 			}
347 			if (db_last(sp, &lno))
348 				break;
349 			if (lno == 0) {
350 				if (LF_ISSET(SEARCH_MSG))
351 					search_msg(sp, S_EMPTY);
352 				break;
353 			}
354 			++lno;
355 			wrapped = 1;
356 			continue;
357 		}
358 
359 		if (db_get(sp, lno, 0, &l, &len))
360 			break;
361 
362 		/* Set the termination. */
363 		match[0].rm_so = 0;
364 		match[0].rm_eo = len;
365 
366 #if defined(DEBUG) && 0
367 		TRACE(sp, "B search: %lu from 0 to %qu\n", lno, match[0].rm_eo);
368 #endif
369 		/* Search the line. */
370 		eval = regexec(&sp->re_c, l, 1, match,
371 		    (match[0].rm_eo == len ? 0 : REG_NOTEOL) | REG_STARTEND);
372 		if (eval == REG_NOMATCH)
373 			continue;
374 		if (eval != 0) {
375 			if (LF_ISSET(SEARCH_MSG))
376 				re_error(sp, eval, &sp->re_c);
377 			else
378 				(void)sp->gp->scr_bell(sp);
379 			break;
380 		}
381 
382 		/* Check for a match starting past the cursor. */
383 		if (coff != 0 && match[0].rm_so >= coff)
384 			continue;
385 
386 		/* Warn if the search wrapped. */
387 		if (wrapped && LF_ISSET(SEARCH_WMSG))
388 			search_msg(sp, S_WRAP);
389 
390 #if defined(DEBUG) && 0
391 		TRACE(sp, "B found: %qu to %qu\n",
392 		    match[0].rm_so, match[0].rm_eo);
393 #endif
394 		/*
395 		 * We now have the first match on the line.  Step through the
396 		 * line character by character until find the last acceptable
397 		 * match.  This is painful, we need a better interface to regex
398 		 * to make this work.
399 		 */
400 		for (;;) {
401 			last = match[0].rm_so++;
402 			if (match[0].rm_so >= len)
403 				break;
404 			match[0].rm_eo = len;
405 			eval = regexec(&sp->re_c, l, 1, match,
406 			    (match[0].rm_so == 0 ? 0 : REG_NOTBOL) |
407 			    REG_STARTEND);
408 			if (eval == REG_NOMATCH)
409 				break;
410 			if (eval != 0) {
411 				if (LF_ISSET(SEARCH_MSG))
412 					re_error(sp, eval, &sp->re_c);
413 				else
414 					(void)sp->gp->scr_bell(sp);
415 				goto err;
416 			}
417 			if (coff && match[0].rm_so >= coff)
418 				break;
419 		}
420 		rm->lno = lno;
421 
422 		/* See comment in f_search(). */
423 		if (!LF_ISSET(SEARCH_EOL) && last >= len)
424 			rm->cno = len != 0 ? len - 1 : 0;
425 		else
426 			rm->cno = last;
427 		rval = 0;
428 		break;
429 	}
430 
431 err:	if (LF_ISSET(SEARCH_MSG))
432 		search_busy(sp, BUSY_OFF);
433 	return (rval);
434 }
435 
436 /*
437  * search_msg --
438  *	Display one of the search messages.
439  */
440 static void
441 search_msg(SCR *sp, smsg_t msg)
442 {
443 	switch (msg) {
444 	case S_EMPTY:
445 		msgq(sp, M_ERR, "072|File empty; nothing to search");
446 		break;
447 	case S_EOF:
448 		msgq(sp, M_ERR,
449 		    "073|Reached end-of-file without finding the pattern");
450 		break;
451 	case S_NOPREV:
452 		msgq(sp, M_ERR, "074|No previous search pattern");
453 		break;
454 	case S_NOTFOUND:
455 		msgq(sp, M_ERR, "075|Pattern not found");
456 		break;
457 	case S_SOF:
458 		msgq(sp, M_ERR,
459 		    "076|Reached top-of-file without finding the pattern");
460 		break;
461 	case S_WRAP:
462 		msgq(sp, M_ERR, "077|Search wrapped");
463 		break;
464 	default:
465 		abort();
466 	}
467 }
468 
469 /*
470  * search_busy --
471  *	Put up the busy searching message.
472  *
473  * PUBLIC: void search_busy(SCR *, busy_t);
474  */
475 void
476 search_busy(SCR *sp, busy_t btype)
477 {
478 	sp->gp->scr_busy(sp, "078|Searching...", btype);
479 }
480