xref: /freebsd/contrib/less/search.c (revision 6607310b740c499e273ae5bbf1f308ed91b61c05)
1 /* $FreeBSD$ */
2 /*
3  * Copyright (C) 1984-2007  Mark Nudelman
4  *
5  * You may distribute under the terms of either the GNU General Public
6  * License or the Less License, as specified in the README file.
7  *
8  * For more information about less, or for information on how to
9  * contact the author, see the README file.
10  */
11 
12 
13 /*
14  * Routines to search a file for a pattern.
15  */
16 
17 #include "less.h"
18 #include "position.h"
19 #include "charset.h"
20 
21 #define	MINPOS(a,b)	(((a) < (b)) ? (a) : (b))
22 #define	MAXPOS(a,b)	(((a) > (b)) ? (a) : (b))
23 
24 #if HAVE_POSIX_REGCOMP
25 #include <regex.h>
26 #ifdef REG_EXTENDED
27 #define	REGCOMP_FLAG	(less_is_more ? 0 : REG_EXTENDED)
28 #else
29 #define	REGCOMP_FLAG	0
30 #endif
31 #endif
32 #if HAVE_PCRE
33 #include <pcre.h>
34 #endif
35 #if HAVE_RE_COMP
36 char *re_comp();
37 int re_exec();
38 #endif
39 #if HAVE_REGCMP
40 char *regcmp();
41 char *regex();
42 extern char *__loc1;
43 #endif
44 #if HAVE_V8_REGCOMP
45 #include "regexp.h"
46 #endif
47 
48 static int match();
49 
50 extern int sigs;
51 extern int how_search;
52 extern int caseless;
53 extern int linenums;
54 extern int sc_height;
55 extern int jump_sline;
56 extern int bs_mode;
57 extern int less_is_more;
58 extern int ctldisp;
59 extern int status_col;
60 extern void * constant ml_search;
61 extern POSITION start_attnpos;
62 extern POSITION end_attnpos;
63 #if HILITE_SEARCH
64 extern int hilite_search;
65 extern int screen_trashed;
66 extern int size_linebuf;
67 extern int squished;
68 extern int can_goto_line;
69 static int hide_hilite;
70 static int oldbot;
71 static POSITION prep_startpos;
72 static POSITION prep_endpos;
73 
74 struct hilite
75 {
76 	struct hilite *hl_next;
77 	POSITION hl_startpos;
78 	POSITION hl_endpos;
79 };
80 static struct hilite hilite_anchor = { NULL, NULL_POSITION, NULL_POSITION };
81 #define	hl_first	hl_next
82 #endif
83 
84 /*
85  * These are the static variables that represent the "remembered"
86  * search pattern.
87  */
88 #if HAVE_POSIX_REGCOMP
89 static regex_t *regpattern = NULL;
90 #endif
91 #if HAVE_PCRE
92 pcre *regpattern = NULL;
93 #endif
94 #if HAVE_RE_COMP
95 int re_pattern = 0;
96 #endif
97 #if HAVE_REGCMP
98 static char *cpattern = NULL;
99 #endif
100 #if HAVE_V8_REGCOMP
101 static struct regexp *regpattern = NULL;
102 #endif
103 
104 static int is_caseless;
105 static int is_ucase_pattern;
106 static int last_search_type;
107 static char *last_pattern = NULL;
108 
109 /*
110  * Convert text.  Perform one or more of these transformations:
111  */
112 #define	CVT_TO_LC	01	/* Convert upper-case to lower-case */
113 #define	CVT_BS		02	/* Do backspace processing */
114 #define	CVT_CRLF	04	/* Remove CR after LF */
115 #define	CVT_ANSI	010	/* Remove ANSI escape sequences */
116 
117 	static void
118 cvt_text(odst, osrc, lenp, ops)
119 	char *odst;
120 	char *osrc;
121 	int *lenp;
122 	int ops;
123 {
124 	char *dst;
125 	char *src;
126 	register char *src_end;
127 	LWCHAR ch;
128 
129 	if (lenp != NULL)
130 		src_end = osrc + *lenp;
131 	else
132 		src_end = osrc + strlen(osrc);
133 
134 	for (src = osrc, dst = odst;  src < src_end;  )
135 	{
136 		ch = step_char(&src, +1, src_end);
137 		if ((ops & CVT_TO_LC) && IS_UPPER(ch))
138 		{
139 			/* Convert uppercase to lowercase. */
140 			put_wchar(&dst, TO_LOWER(ch));
141 		} else if ((ops & CVT_BS) && ch == '\b' && dst > odst)
142 		{
143 			/* Delete BS and preceding char. */
144 			do {
145 				dst--;
146 			} while (dst > odst &&
147 				!IS_ASCII_OCTET(*dst) && !IS_UTF8_LEAD(*dst));
148 		} else if ((ops & CVT_ANSI) && IS_CSI_START(ch))
149 		{
150 			/* Skip to end of ANSI escape sequence. */
151 			while (src + 1 != src_end)
152 				if (!is_ansi_middle(*++src))
153 					break;
154 		} else
155 			/* Just copy. */
156 			put_wchar(&dst, ch);
157 	}
158 	if ((ops & CVT_CRLF) && dst > odst && dst[-1] == '\r')
159 		dst--;
160 	*dst = '\0';
161 	if (lenp != NULL)
162 		*lenp = dst - odst;
163 }
164 
165 /*
166  * Determine which conversions to perform.
167  */
168 	static int
169 get_cvt_ops()
170 {
171 	int ops = 0;
172 	if (is_caseless || bs_mode == BS_SPECIAL)
173 	{
174 		if (is_caseless)
175 			ops |= CVT_TO_LC;
176 		if (bs_mode == BS_SPECIAL)
177 			ops |= CVT_BS;
178 		if (bs_mode != BS_CONTROL)
179 			ops |= CVT_CRLF;
180 	} else if (bs_mode != BS_CONTROL)
181 	{
182 		ops |= CVT_CRLF;
183 	}
184 	if (ctldisp == OPT_ONPLUS)
185 		ops |= CVT_ANSI;
186 	return (ops);
187 }
188 
189 /*
190  * Are there any uppercase letters in this string?
191  */
192 	static int
193 is_ucase(str)
194 	char *str;
195 {
196 	char *str_end = str + strlen(str);
197 	LWCHAR ch;
198 
199 	while (str < str_end)
200 	{
201 		ch = step_char(&str, +1, str_end);
202 		if (IS_UPPER(ch))
203 			return (1);
204 	}
205 	return (0);
206 }
207 
208 /*
209  * Is there a previous (remembered) search pattern?
210  */
211 	static int
212 prev_pattern()
213 {
214 	if (last_search_type & SRCH_NO_REGEX)
215 		return (last_pattern != NULL);
216 #if HAVE_POSIX_REGCOMP
217 	return (regpattern != NULL);
218 #endif
219 #if HAVE_PCRE
220 	return (regpattern != NULL);
221 #endif
222 #if HAVE_RE_COMP
223 	return (re_pattern != 0);
224 #endif
225 #if HAVE_REGCMP
226 	return (cpattern != NULL);
227 #endif
228 #if HAVE_V8_REGCOMP
229 	return (regpattern != NULL);
230 #endif
231 #if NO_REGEX
232 	return (last_pattern != NULL);
233 #endif
234 }
235 
236 #if HILITE_SEARCH
237 /*
238  * Repaint the hilites currently displayed on the screen.
239  * Repaint each line which contains highlighted text.
240  * If on==0, force all hilites off.
241  */
242 	public void
243 repaint_hilite(on)
244 	int on;
245 {
246 	int slinenum;
247 	POSITION pos;
248 	POSITION epos;
249 	int save_hide_hilite;
250 
251 	if (squished)
252 		repaint();
253 
254 	save_hide_hilite = hide_hilite;
255 	if (!on)
256 	{
257 		if (hide_hilite)
258 			return;
259 		hide_hilite = 1;
260 	}
261 
262 	if (!can_goto_line)
263 	{
264 		repaint();
265 		hide_hilite = save_hide_hilite;
266 		return;
267 	}
268 
269 	for (slinenum = TOP;  slinenum < TOP + sc_height-1;  slinenum++)
270 	{
271 		pos = position(slinenum);
272 		if (pos == NULL_POSITION)
273 			continue;
274 		epos = position(slinenum+1);
275 #if 0
276 		/*
277 		 * If any character in the line is highlighted,
278 		 * repaint the line.
279 		 *
280 		 * {{ This doesn't work -- if line is drawn with highlights
281 		 * which should be erased (e.g. toggle -i with status column),
282 		 * we must redraw the line even if it has no highlights.
283 		 * For now, just repaint every line. }}
284 		 */
285 		if (is_hilited(pos, epos, 1, NULL))
286 #endif
287 		{
288 			(void) forw_line(pos);
289 			goto_line(slinenum);
290 			put_line();
291 		}
292 	}
293 	if (!oldbot)
294 		lower_left();
295 	hide_hilite = save_hide_hilite;
296 }
297 
298 /*
299  * Clear the attn hilite.
300  */
301 	public void
302 clear_attn()
303 {
304 	int slinenum;
305 	POSITION old_start_attnpos;
306 	POSITION old_end_attnpos;
307 	POSITION pos;
308 	POSITION epos;
309 	int moved = 0;
310 
311 	if (start_attnpos == NULL_POSITION)
312 		return;
313 	old_start_attnpos = start_attnpos;
314 	old_end_attnpos = end_attnpos;
315 	start_attnpos = end_attnpos = NULL_POSITION;
316 
317 	if (!can_goto_line)
318 	{
319 		repaint();
320 		return;
321 	}
322 	if (squished)
323 		repaint();
324 
325 	for (slinenum = TOP;  slinenum < TOP + sc_height-1;  slinenum++)
326 	{
327 		pos = position(slinenum);
328 		if (pos == NULL_POSITION)
329 			continue;
330 		epos = position(slinenum+1);
331 		if (pos < old_end_attnpos &&
332 		     (epos == NULL_POSITION || epos > old_start_attnpos))
333 		{
334 			(void) forw_line(pos);
335 			goto_line(slinenum);
336 			put_line();
337 			moved = 1;
338 		}
339 	}
340 	if (moved)
341 		lower_left();
342 }
343 #endif
344 
345 /*
346  * Hide search string highlighting.
347  */
348 	public void
349 undo_search()
350 {
351 	if (!prev_pattern())
352 	{
353 		error("No previous regular expression", NULL_PARG);
354 		return;
355 	}
356 #if HILITE_SEARCH
357 	hide_hilite = !hide_hilite;
358 	repaint_hilite(1);
359 #endif
360 }
361 
362 /*
363  * Compile a search pattern, for future use by match_pattern.
364  */
365 	static int
366 compile_pattern(pattern, search_type)
367 	char *pattern;
368 	int search_type;
369 {
370 	if ((search_type & SRCH_NO_REGEX) == 0)
371 	{
372 #if HAVE_POSIX_REGCOMP
373 		regex_t *s = (regex_t *) ecalloc(1, sizeof(regex_t));
374 		if (regcomp(s, pattern, REGCOMP_FLAG))
375 		{
376 			free(s);
377 			error("Invalid pattern", NULL_PARG);
378 			return (-1);
379 		}
380 		if (regpattern != NULL)
381 			regfree(regpattern);
382 		regpattern = s;
383 #endif
384 #if HAVE_PCRE
385 		pcre *comp;
386 		const char *errstring;
387 		int erroffset;
388 		PARG parg;
389 		comp = pcre_compile(pattern, 0,
390 				&errstring, &erroffset, NULL);
391 		if (comp == NULL)
392 		{
393 			parg.p_string = (char *) errstring;
394 			error("%s", &parg);
395 			return (-1);
396 		}
397 		regpattern = comp;
398 #endif
399 #if HAVE_RE_COMP
400 		PARG parg;
401 		if ((parg.p_string = re_comp(pattern)) != NULL)
402 		{
403 			error("%s", &parg);
404 			return (-1);
405 		}
406 		re_pattern = 1;
407 #endif
408 #if HAVE_REGCMP
409 		char *s;
410 		if ((s = regcmp(pattern, 0)) == NULL)
411 		{
412 			error("Invalid pattern", NULL_PARG);
413 			return (-1);
414 		}
415 		if (cpattern != NULL)
416 			free(cpattern);
417 		cpattern = s;
418 #endif
419 #if HAVE_V8_REGCOMP
420 		struct regexp *s;
421 		if ((s = regcomp(pattern)) == NULL)
422 		{
423 			/*
424 			 * regcomp has already printed an error message
425 			 * via regerror().
426 			 */
427 			return (-1);
428 		}
429 		if (regpattern != NULL)
430 			free(regpattern);
431 		regpattern = s;
432 #endif
433 	}
434 
435 	if (last_pattern != NULL)
436 		free(last_pattern);
437 	last_pattern = (char *) calloc(1, strlen(pattern)+1);
438 	if (last_pattern != NULL)
439 		strcpy(last_pattern, pattern);
440 
441 	last_search_type = search_type;
442 	return (0);
443 }
444 
445 /*
446  * Forget that we have a compiled pattern.
447  */
448 	static void
449 uncompile_pattern()
450 {
451 #if HAVE_POSIX_REGCOMP
452 	if (regpattern != NULL)
453 		regfree(regpattern);
454 	regpattern = NULL;
455 #endif
456 #if HAVE_PCRE
457 	if (regpattern != NULL)
458 		pcre_free(regpattern);
459 	regpattern = NULL;
460 #endif
461 #if HAVE_RE_COMP
462 	re_pattern = 0;
463 #endif
464 #if HAVE_REGCMP
465 	if (cpattern != NULL)
466 		free(cpattern);
467 	cpattern = NULL;
468 #endif
469 #if HAVE_V8_REGCOMP
470 	if (regpattern != NULL)
471 		free(regpattern);
472 	regpattern = NULL;
473 #endif
474 	last_pattern = NULL;
475 }
476 
477 /*
478  * Perform a pattern match with the previously compiled pattern.
479  * Set sp and ep to the start and end of the matched string.
480  */
481 	static int
482 match_pattern(line, line_len, sp, ep, notbol)
483 	char *line;
484 	int line_len;
485 	char **sp;
486 	char **ep;
487 	int notbol;
488 {
489 	int matched;
490 
491 	if (last_search_type & SRCH_NO_REGEX)
492 		return (match(last_pattern, strlen(last_pattern), line, line_len, sp, ep));
493 
494 #if HAVE_POSIX_REGCOMP
495 	{
496 		regmatch_t rm;
497 		int flags = (notbol) ? REG_NOTBOL : 0;
498 		matched = !regexec(regpattern, line, 1, &rm, flags);
499 		if (!matched)
500 			return (0);
501 #ifndef __WATCOMC__
502 		*sp = line + rm.rm_so;
503 		*ep = line + rm.rm_eo;
504 #else
505 		*sp = rm.rm_sp;
506 		*ep = rm.rm_ep;
507 #endif
508 	}
509 #endif
510 #if HAVE_PCRE
511 	{
512 		int flags = (notbol) ? PCRE_NOTBOL : 0;
513 		int ovector[3];
514 		matched = pcre_exec(regpattern, NULL, line, line_len,
515 			0, flags, ovector, 3) >= 0;
516 		if (!matched)
517 			return (0);
518 		*sp = line + ovector[0];
519 		*ep = line + ovector[1];
520 	}
521 #endif
522 #if HAVE_RE_COMP
523 	matched = (re_exec(line) == 1);
524 	/*
525 	 * re_exec doesn't seem to provide a way to get the matched string.
526 	 */
527 	*sp = *ep = NULL;
528 #endif
529 #if HAVE_REGCMP
530 	*ep = regex(cpattern, line);
531 	matched = (*ep != NULL);
532 	if (!matched)
533 		return (0);
534 	*sp = __loc1;
535 #endif
536 #if HAVE_V8_REGCOMP
537 #if HAVE_REGEXEC2
538 	matched = regexec2(regpattern, line, notbol);
539 #else
540 	matched = regexec(regpattern, line);
541 #endif
542 	if (!matched)
543 		return (0);
544 	*sp = regpattern->startp[0];
545 	*ep = regpattern->endp[0];
546 #endif
547 #if NO_REGEX
548 	matched = match(last_pattern, strlen(last_pattern), line, line_len, sp, ep);
549 #endif
550 	return (matched);
551 }
552 
553 #if HILITE_SEARCH
554 /*
555  * Clear the hilite list.
556  */
557 	public void
558 clr_hilite()
559 {
560 	struct hilite *hl;
561 	struct hilite *nexthl;
562 
563 	for (hl = hilite_anchor.hl_first;  hl != NULL;  hl = nexthl)
564 	{
565 		nexthl = hl->hl_next;
566 		free((void*)hl);
567 	}
568 	hilite_anchor.hl_first = NULL;
569 	prep_startpos = prep_endpos = NULL_POSITION;
570 }
571 
572 /*
573  * Should any characters in a specified range be highlighted?
574  */
575 	static int
576 is_hilited_range(pos, epos)
577 	POSITION pos;
578 	POSITION epos;
579 {
580 	struct hilite *hl;
581 
582 	/*
583 	 * Look at each highlight and see if any part of it falls in the range.
584 	 */
585 	for (hl = hilite_anchor.hl_first;  hl != NULL;  hl = hl->hl_next)
586 	{
587 		if (hl->hl_endpos > pos &&
588 		    (epos == NULL_POSITION || epos > hl->hl_startpos))
589 			return (1);
590 	}
591 	return (0);
592 }
593 
594 /*
595  * Should any characters in a specified range be highlighted?
596  * If nohide is nonzero, don't consider hide_hilite.
597  */
598 	public int
599 is_hilited(pos, epos, nohide, p_matches)
600 	POSITION pos;
601 	POSITION epos;
602 	int nohide;
603 	int *p_matches;
604 {
605 	int match;
606 
607 	if (p_matches != NULL)
608 		*p_matches = 0;
609 
610 	if (!status_col &&
611 	    start_attnpos != NULL_POSITION &&
612 	    pos < end_attnpos &&
613 	     (epos == NULL_POSITION || epos > start_attnpos))
614 		/*
615 		 * The attn line overlaps this range.
616 		 */
617 		return (1);
618 
619 	match = is_hilited_range(pos, epos);
620 	if (!match)
621 		return (0);
622 
623 	if (p_matches != NULL)
624 		/*
625 		 * Report matches, even if we're hiding highlights.
626 		 */
627 		*p_matches = 1;
628 
629 	if (hilite_search == 0)
630 		/*
631 		 * Not doing highlighting.
632 		 */
633 		return (0);
634 
635 	if (!nohide && hide_hilite)
636 		/*
637 		 * Highlighting is hidden.
638 		 */
639 		return (0);
640 
641 	return (1);
642 }
643 
644 /*
645  * Add a new hilite to a hilite list.
646  */
647 	static void
648 add_hilite(anchor, hl)
649 	struct hilite *anchor;
650 	struct hilite *hl;
651 {
652 	struct hilite *ihl;
653 
654 	/*
655 	 * Hilites are sorted in the list; find where new one belongs.
656 	 * Insert new one after ihl.
657 	 */
658 	for (ihl = anchor;  ihl->hl_next != NULL;  ihl = ihl->hl_next)
659 	{
660 		if (ihl->hl_next->hl_startpos > hl->hl_startpos)
661 			break;
662 	}
663 
664 	/*
665 	 * Truncate hilite so it doesn't overlap any existing ones
666 	 * above and below it.
667 	 */
668 	if (ihl != anchor)
669 		hl->hl_startpos = MAXPOS(hl->hl_startpos, ihl->hl_endpos);
670 	if (ihl->hl_next != NULL)
671 		hl->hl_endpos = MINPOS(hl->hl_endpos, ihl->hl_next->hl_startpos);
672 	if (hl->hl_startpos >= hl->hl_endpos)
673 	{
674 		/*
675 		 * Hilite was truncated out of existence.
676 		 */
677 		free(hl);
678 		return;
679 	}
680 	hl->hl_next = ihl->hl_next;
681 	ihl->hl_next = hl;
682 }
683 
684 	static void
685 adj_hilite_ansi(cvt_ops, line, line_len, npos)
686 	int cvt_ops;
687 	char **line;
688 	int line_len;
689 	POSITION *npos;
690 {
691 	char *line_end = *line + line_len;
692 
693 	if (cvt_ops & CVT_ANSI)
694 		while (IS_CSI_START(**line))
695 		{
696 			/*
697 			 * Found an ESC.  The file position moves
698 			 * forward past the entire ANSI escape sequence.
699 			 */
700 			(*line)++;
701 			(*npos)++;
702 			while (*line < line_end)
703 			{
704 				(*npos)++;
705 				if (!is_ansi_middle(*(*line)++))
706 					break;
707 			}
708 		}
709 }
710 
711 /*
712  * Adjust hl_startpos & hl_endpos to account for backspace processing.
713  */
714 	static void
715 adj_hilite(anchor, linepos, cvt_ops)
716 	struct hilite *anchor;
717 	POSITION linepos;
718 	int cvt_ops;
719 {
720 	char *line;
721 	int line_len;
722 	char *line_end;
723 	struct hilite *hl;
724 	int checkstart;
725 	POSITION opos;
726 	POSITION npos;
727 
728 	/*
729 	 * The line was already scanned and hilites were added (in hilite_line).
730 	 * But it was assumed that each char position in the line
731 	 * correponds to one char position in the file.
732 	 * This may not be true if there are backspaces in the line.
733 	 * Get the raw line again.  Look at each character.
734 	 */
735 	(void) forw_raw_line(linepos, &line, &line_len);
736 	line_end = line + line_len;
737 	opos = npos = linepos;
738 	hl = anchor->hl_first;
739 	checkstart = TRUE;
740 	while (hl != NULL)
741 	{
742 		/*
743 		 * See if we need to adjust the current hl_startpos or
744 		 * hl_endpos.  After adjusting startpos[i], move to endpos[i].
745 		 * After adjusting endpos[i], move to startpos[i+1].
746 		 * The hilite list must be sorted thus:
747 		 * startpos[0] < endpos[0] <= startpos[1] < endpos[1] <= etc.
748 		 */
749 		if (checkstart && hl->hl_startpos == opos)
750 		{
751 			hl->hl_startpos = npos;
752 			checkstart = FALSE;
753 			continue; /* {{ not really necessary }} */
754 		} else if (!checkstart && hl->hl_endpos == opos)
755 		{
756 			hl->hl_endpos = npos;
757 			checkstart = TRUE;
758 			hl = hl->hl_next;
759 			continue; /* {{ necessary }} */
760 		}
761 		if (line == line_end)
762 			break;
763 		adj_hilite_ansi(cvt_ops, &line, line_end - line, &npos);
764 		opos++;
765 		npos++;
766 		line++;
767 		if (cvt_ops & CVT_BS)
768 		{
769 			while (*line == '\b')
770 			{
771 				npos++;
772 				line++;
773 				adj_hilite_ansi(cvt_ops, &line, line_end - line, &npos);
774 				if (line == line_end)
775 				{
776 					--npos;
777 					--line;
778 					break;
779 				}
780 				/*
781 				 * Found a backspace.  The file position moves
782 				 * forward by 2 relative to the processed line
783 				 * which was searched in hilite_line.
784 				 */
785 				npos++;
786 				line++;
787 			}
788 		}
789 	}
790 }
791 
792 /*
793  * Make a hilite for each string in a physical line which matches
794  * the current pattern.
795  * sp,ep delimit the first match already found.
796  */
797 	static void
798 hilite_line(linepos, line, line_len, sp, ep, cvt_ops)
799 	POSITION linepos;
800 	char *line;
801 	int line_len;
802 	char *sp;
803 	char *ep;
804 	int cvt_ops;
805 {
806 	char *searchp;
807 	char *line_end = line + line_len;
808 	struct hilite *hl;
809 	struct hilite hilites;
810 
811 	if (sp == NULL || ep == NULL)
812 		return;
813 	/*
814 	 * sp and ep delimit the first match in the line.
815 	 * Mark the corresponding file positions, then
816 	 * look for further matches and mark them.
817 	 * {{ This technique, of calling match_pattern on subsequent
818 	 *    substrings of the line, may mark more than is correct
819 	 *    if the pattern starts with "^".  This bug is fixed
820 	 *    for those regex functions that accept a notbol parameter
821 	 *    (currently POSIX, PCRE and V8-with-regexec2). }}
822 	 */
823 	searchp = line;
824 	/*
825 	 * Put the hilites into a temporary list until they're adjusted.
826 	 */
827 	hilites.hl_first = NULL;
828 	do {
829 		if (ep > sp)
830 		{
831 			/*
832 			 * Assume that each char position in the "line"
833 			 * buffer corresponds to one char position in the file.
834 			 * This is not quite true; we need to adjust later.
835 			 */
836 			hl = (struct hilite *) ecalloc(1, sizeof(struct hilite));
837 			hl->hl_startpos = linepos + (sp-line);
838 			hl->hl_endpos = linepos + (ep-line);
839 			add_hilite(&hilites, hl);
840 		}
841 		/*
842 		 * If we matched more than zero characters,
843 		 * move to the first char after the string we matched.
844 		 * If we matched zero, just move to the next char.
845 		 */
846 		if (ep > searchp)
847 			searchp = ep;
848 		else if (searchp != line_end)
849 			searchp++;
850 		else /* end of line */
851 			break;
852 	} while (match_pattern(searchp, line_end - searchp, &sp, &ep, 1));
853 
854 	/*
855 	 * If there were backspaces in the original line, they
856 	 * were removed, and hl_startpos/hl_endpos are not correct.
857 	 * {{ This is very ugly. }}
858 	 */
859 	adj_hilite(&hilites, linepos, cvt_ops);
860 
861 	/*
862 	 * Now put the hilites into the real list.
863 	 */
864 	while ((hl = hilites.hl_next) != NULL)
865 	{
866 		hilites.hl_next = hl->hl_next;
867 		add_hilite(&hilite_anchor, hl);
868 	}
869 }
870 #endif
871 
872 /*
873  * Change the caseless-ness of searches.
874  * Updates the internal search state to reflect a change in the -i flag.
875  */
876 	public void
877 chg_caseless()
878 {
879 	if (!is_ucase_pattern)
880 		/*
881 		 * Pattern did not have uppercase.
882 		 * Just set the search caselessness to the global caselessness.
883 		 */
884 		is_caseless = caseless;
885 	else
886 		/*
887 		 * Pattern did have uppercase.
888 		 * Discard the pattern; we can't change search caselessness now.
889 		 */
890 		uncompile_pattern();
891 }
892 
893 #if HILITE_SEARCH
894 /*
895  * Find matching text which is currently on screen and highlight it.
896  */
897 	static void
898 hilite_screen()
899 {
900 	struct scrpos scrpos;
901 
902 	get_scrpos(&scrpos);
903 	if (scrpos.pos == NULL_POSITION)
904 		return;
905 	prep_hilite(scrpos.pos, position(BOTTOM_PLUS_ONE), -1);
906 	repaint_hilite(1);
907 }
908 
909 /*
910  * Change highlighting parameters.
911  */
912 	public void
913 chg_hilite()
914 {
915 	/*
916 	 * Erase any highlights currently on screen.
917 	 */
918 	clr_hilite();
919 	hide_hilite = 0;
920 
921 	if (hilite_search == OPT_ONPLUS)
922 		/*
923 		 * Display highlights.
924 		 */
925 		hilite_screen();
926 }
927 #endif
928 
929 /*
930  * Figure out where to start a search.
931  */
932 	static POSITION
933 search_pos(search_type)
934 	int search_type;
935 {
936 	POSITION pos;
937 	int linenum;
938 
939 	if (empty_screen())
940 	{
941 		/*
942 		 * Start at the beginning (or end) of the file.
943 		 * The empty_screen() case is mainly for
944 		 * command line initiated searches;
945 		 * for example, "+/xyz" on the command line.
946 		 * Also for multi-file (SRCH_PAST_EOF) searches.
947 		 */
948 		if (search_type & SRCH_FORW)
949 		{
950 			return (ch_zero());
951 		} else
952 		{
953 			pos = ch_length();
954 			if (pos == NULL_POSITION)
955 			{
956 				(void) ch_end_seek();
957 				pos = ch_length();
958 			}
959 			return (pos);
960 		}
961 	}
962 	if (how_search)
963 	{
964 		/*
965 		 * Search does not include current screen.
966 		 */
967 		if (search_type & SRCH_FORW)
968 			linenum = BOTTOM_PLUS_ONE;
969 		else
970 			linenum = TOP;
971 		pos = position(linenum);
972 	} else
973 	{
974 		/*
975 		 * Search includes current screen.
976 		 * It starts at the jump target (if searching backwards),
977 		 * or at the jump target plus one (if forwards).
978 		 */
979 		linenum = adjsline(jump_sline);
980 		pos = position(linenum);
981 		if (search_type & SRCH_FORW)
982 		{
983 			pos = forw_raw_line(pos, (char **)NULL, (int *)NULL);
984 			while (pos == NULL_POSITION)
985 			{
986 				if (++linenum >= sc_height)
987 					break;
988 				pos = position(linenum);
989 			}
990 		} else
991 		{
992 			while (pos == NULL_POSITION)
993 			{
994 				if (--linenum < 0)
995 					break;
996 				pos = position(linenum);
997 			}
998 		}
999 	}
1000 	return (pos);
1001 }
1002 
1003 /*
1004  * Search a subset of the file, specified by start/end position.
1005  */
1006 	static int
1007 search_range(pos, endpos, search_type, matches, maxlines, plinepos, pendpos)
1008 	POSITION pos;
1009 	POSITION endpos;
1010 	int search_type;
1011 	int matches;
1012 	int maxlines;
1013 	POSITION *plinepos;
1014 	POSITION *pendpos;
1015 {
1016 	char *line;
1017 	int line_len;
1018 	LINENUM linenum;
1019 	char *sp, *ep;
1020 	int line_match;
1021 	int cvt_ops;
1022 	POSITION linepos, oldpos;
1023 
1024 	linenum = find_linenum(pos);
1025 	oldpos = pos;
1026 	for (;;)
1027 	{
1028 		/*
1029 		 * Get lines until we find a matching one or until
1030 		 * we hit end-of-file (or beginning-of-file if we're
1031 		 * going backwards), or until we hit the end position.
1032 		 */
1033 		if (ABORT_SIGS())
1034 		{
1035 			/*
1036 			 * A signal aborts the search.
1037 			 */
1038 			return (-1);
1039 		}
1040 
1041 		if ((endpos != NULL_POSITION && pos >= endpos) || maxlines == 0)
1042 		{
1043 			/*
1044 			 * Reached end position without a match.
1045 			 */
1046 			if (pendpos != NULL)
1047 				*pendpos = pos;
1048 			return (matches);
1049 		}
1050 		if (maxlines > 0)
1051 			maxlines--;
1052 
1053 		if (search_type & SRCH_FORW)
1054 		{
1055 			/*
1056 			 * Read the next line, and save the
1057 			 * starting position of that line in linepos.
1058 			 */
1059 			linepos = pos;
1060 			pos = forw_raw_line(pos, &line, &line_len);
1061 			if (linenum != 0)
1062 				linenum++;
1063 		} else
1064 		{
1065 			/*
1066 			 * Read the previous line and save the
1067 			 * starting position of that line in linepos.
1068 			 */
1069 			pos = back_raw_line(pos, &line, &line_len);
1070 			linepos = pos;
1071 			if (linenum != 0)
1072 				linenum--;
1073 		}
1074 
1075 		if (pos == NULL_POSITION)
1076 		{
1077 			/*
1078 			 * Reached EOF/BOF without a match.
1079 			 */
1080 			if (pendpos != NULL)
1081 				*pendpos = oldpos;
1082 			return (matches);
1083 		}
1084 
1085 		/*
1086 		 * If we're using line numbers, we might as well
1087 		 * remember the information we have now (the position
1088 		 * and line number of the current line).
1089 		 * Don't do it for every line because it slows down
1090 		 * the search.  Remember the line number only if
1091 		 * we're "far" from the last place we remembered it.
1092 		 */
1093 		if (linenums && abs((int)(pos - oldpos)) > 1024)
1094 			add_lnum(linenum, pos);
1095 		oldpos = pos;
1096 
1097 		/*
1098 		 * If it's a caseless search, convert the line to lowercase.
1099 		 * If we're doing backspace processing, delete backspaces.
1100 		 */
1101 		cvt_ops = get_cvt_ops();
1102 		cvt_text(line, line, &line_len, cvt_ops);
1103 
1104 		/*
1105 		 * Test the next line to see if we have a match.
1106 		 * We are successful if we either want a match and got one,
1107 		 * or if we want a non-match and got one.
1108 		 */
1109 		line_match = match_pattern(line, line_len, &sp, &ep, 0);
1110 		line_match = (!(search_type & SRCH_NO_MATCH) && line_match) ||
1111 				((search_type & SRCH_NO_MATCH) && !line_match);
1112 		if (!line_match)
1113 			continue;
1114 		/*
1115 		 * Got a match.
1116 		 */
1117 		if (search_type & SRCH_FIND_ALL)
1118 		{
1119 #if HILITE_SEARCH
1120 			/*
1121 			 * We are supposed to find all matches in the range.
1122 			 * Just add the matches in this line to the
1123 			 * hilite list and keep searching.
1124 			 */
1125 			if (line_match)
1126 				hilite_line(linepos, line, line_len, sp, ep, cvt_ops);
1127 #endif
1128 		} else if (--matches <= 0)
1129 		{
1130 			/*
1131 			 * Found the one match we're looking for.
1132 			 * Return it.
1133 			 */
1134 #if HILITE_SEARCH
1135 			if (hilite_search == OPT_ON)
1136 			{
1137 				/*
1138 				 * Clear the hilite list and add only
1139 				 * the matches in this one line.
1140 				 */
1141 				clr_hilite();
1142 				if (line_match)
1143 					hilite_line(linepos, line, line_len, sp, ep, cvt_ops);
1144 			}
1145 #endif
1146 			if (plinepos != NULL)
1147 				*plinepos = linepos;
1148 			return (0);
1149 		}
1150 	}
1151 }
1152 
1153  /*
1154  * search for a pattern in history. If found, compile that pattern.
1155  */
1156 	static int
1157 hist_pattern(search_type)
1158 	int search_type;
1159 {
1160 #if CMD_HISTORY
1161 	char *pattern;
1162 
1163 	set_mlist(ml_search, 0);
1164 	pattern = cmd_lastpattern();
1165 	if (pattern == NULL)
1166 		return (0);
1167 
1168 	if (caseless == OPT_ONPLUS)
1169 		cvt_text(pattern, pattern, (int *)NULL, CVT_TO_LC);
1170 
1171 	if (compile_pattern(pattern, search_type) < 0)
1172 		return (0);
1173 
1174 	is_ucase_pattern = is_ucase(pattern);
1175 	if (is_ucase_pattern && caseless != OPT_ONPLUS)
1176 		is_caseless = 0;
1177 	else
1178 		is_caseless = caseless;
1179 
1180 #if HILITE_SEARCH
1181 	if (hilite_search == OPT_ONPLUS && !hide_hilite)
1182 		hilite_screen();
1183 #endif
1184 
1185 	return (1);
1186 #else /* CMD_HISTORY */
1187 	return (0);
1188 #endif /* CMD_HISTORY */
1189 }
1190 
1191 /*
1192  * Search for the n-th occurrence of a specified pattern,
1193  * either forward or backward.
1194  * Return the number of matches not yet found in this file
1195  * (that is, n minus the number of matches found).
1196  * Return -1 if the search should be aborted.
1197  * Caller may continue the search in another file
1198  * if less than n matches are found in this file.
1199  */
1200 	public int
1201 search(search_type, pattern, n)
1202 	int search_type;
1203 	char *pattern;
1204 	int n;
1205 {
1206 	POSITION pos;
1207 	int ucase;
1208 
1209 	if (pattern == NULL || *pattern == '\0')
1210 	{
1211 		/*
1212 		 * A null pattern means use the previously compiled pattern.
1213 		 */
1214 		if (!prev_pattern() && !hist_pattern(search_type))
1215 		{
1216 			error("No previous regular expression", NULL_PARG);
1217 			return (-1);
1218 		}
1219 		if ((search_type & SRCH_NO_REGEX) !=
1220 		    (last_search_type & SRCH_NO_REGEX))
1221 		{
1222 			error("Please re-enter search pattern", NULL_PARG);
1223 			return -1;
1224 		}
1225 #if HILITE_SEARCH
1226 		if (hilite_search == OPT_ON)
1227 		{
1228 			/*
1229 			 * Erase the highlights currently on screen.
1230 			 * If the search fails, we'll redisplay them later.
1231 			 */
1232 			repaint_hilite(0);
1233 		}
1234 		if (hilite_search == OPT_ONPLUS && hide_hilite)
1235 		{
1236 			/*
1237 			 * Highlight any matches currently on screen,
1238 			 * before we actually start the search.
1239 			 */
1240 			hide_hilite = 0;
1241 			hilite_screen();
1242 		}
1243 		hide_hilite = 0;
1244 #endif
1245 	} else
1246 	{
1247 		/*
1248 		 * Compile the pattern.
1249 		 */
1250 		ucase = is_ucase(pattern);
1251 		if (caseless == OPT_ONPLUS)
1252 			cvt_text(pattern, pattern, (int *)NULL, CVT_TO_LC);
1253 		if (compile_pattern(pattern, search_type) < 0)
1254 			return (-1);
1255 		/*
1256 		 * Ignore case if -I is set OR
1257 		 * -i is set AND the pattern is all lowercase.
1258 		 */
1259 		is_ucase_pattern = ucase;
1260 		if (is_ucase_pattern && caseless != OPT_ONPLUS)
1261 			is_caseless = 0;
1262 		else
1263 			is_caseless = caseless;
1264 #if HILITE_SEARCH
1265 		if (hilite_search)
1266 		{
1267 			/*
1268 			 * Erase the highlights currently on screen.
1269 			 * Also permanently delete them from the hilite list.
1270 			 */
1271 			repaint_hilite(0);
1272 			hide_hilite = 0;
1273 			clr_hilite();
1274 		}
1275 		if (hilite_search == OPT_ONPLUS)
1276 		{
1277 			/*
1278 			 * Highlight any matches currently on screen,
1279 			 * before we actually start the search.
1280 			 */
1281 			hilite_screen();
1282 		}
1283 #endif
1284 	}
1285 
1286 	/*
1287 	 * Figure out where to start the search.
1288 	 */
1289 	pos = search_pos(search_type);
1290 	if (pos == NULL_POSITION)
1291 	{
1292 		/*
1293 		 * Can't find anyplace to start searching from.
1294 		 */
1295 		if (search_type & SRCH_PAST_EOF)
1296 			return (n);
1297 		/* repaint(); -- why was this here? */
1298 		error("Nothing to search", NULL_PARG);
1299 		return (-1);
1300 	}
1301 
1302 	n = search_range(pos, NULL_POSITION, search_type, n, -1,
1303 			&pos, (POSITION*)NULL);
1304 	if (n != 0)
1305 	{
1306 		/*
1307 		 * Search was unsuccessful.
1308 		 */
1309 #if HILITE_SEARCH
1310 		if (hilite_search == OPT_ON && n > 0)
1311 			/*
1312 			 * Redisplay old hilites.
1313 			 */
1314 			repaint_hilite(1);
1315 #endif
1316 		return (n);
1317 	}
1318 
1319 	if (!(search_type & SRCH_NO_MOVE))
1320 	{
1321 		/*
1322 		 * Go to the matching line.
1323 		 */
1324 		jump_loc(pos, jump_sline);
1325 	}
1326 
1327 #if HILITE_SEARCH
1328 	if (hilite_search == OPT_ON)
1329 		/*
1330 		 * Display new hilites in the matching line.
1331 		 */
1332 		repaint_hilite(1);
1333 #endif
1334 	return (0);
1335 }
1336 
1337 
1338 #if HILITE_SEARCH
1339 /*
1340  * Prepare hilites in a given range of the file.
1341  *
1342  * The pair (prep_startpos,prep_endpos) delimits a contiguous region
1343  * of the file that has been "prepared"; that is, scanned for matches for
1344  * the current search pattern, and hilites have been created for such matches.
1345  * If prep_startpos == NULL_POSITION, the prep region is empty.
1346  * If prep_endpos == NULL_POSITION, the prep region extends to EOF.
1347  * prep_hilite asks that the range (spos,epos) be covered by the prep region.
1348  */
1349 	public void
1350 prep_hilite(spos, epos, maxlines)
1351 	POSITION spos;
1352 	POSITION epos;
1353 	int maxlines;
1354 {
1355 	POSITION nprep_startpos = prep_startpos;
1356 	POSITION nprep_endpos = prep_endpos;
1357 	POSITION new_epos;
1358 	POSITION max_epos;
1359 	int result;
1360 	int i;
1361 /*
1362  * Search beyond where we're asked to search, so the prep region covers
1363  * more than we need.  Do one big search instead of a bunch of small ones.
1364  */
1365 #define	SEARCH_MORE (3*size_linebuf)
1366 
1367 	if (!prev_pattern())
1368 		return;
1369 
1370 	/*
1371 	 * If we're limited to a max number of lines, figure out the
1372 	 * file position we should stop at.
1373 	 */
1374 	if (maxlines < 0)
1375 		max_epos = NULL_POSITION;
1376 	else
1377 	{
1378 		max_epos = spos;
1379 		for (i = 0;  i < maxlines;  i++)
1380 			max_epos = forw_raw_line(max_epos, (char **)NULL, (int *)NULL);
1381 	}
1382 
1383 	/*
1384 	 * Find two ranges:
1385 	 * The range that we need to search (spos,epos); and the range that
1386 	 * the "prep" region will then cover (nprep_startpos,nprep_endpos).
1387 	 */
1388 
1389 	if (prep_startpos == NULL_POSITION ||
1390 	    (epos != NULL_POSITION && epos < prep_startpos) ||
1391 	    spos > prep_endpos)
1392 	{
1393 		/*
1394 		 * New range is not contiguous with old prep region.
1395 		 * Discard the old prep region and start a new one.
1396 		 */
1397 		clr_hilite();
1398 		if (epos != NULL_POSITION)
1399 			epos += SEARCH_MORE;
1400 		nprep_startpos = spos;
1401 	} else
1402 	{
1403 		/*
1404 		 * New range partially or completely overlaps old prep region.
1405 		 */
1406 		if (epos == NULL_POSITION)
1407 		{
1408 			/*
1409 			 * New range goes to end of file.
1410 			 */
1411 			;
1412 		} else if (epos > prep_endpos)
1413 		{
1414 			/*
1415 			 * New range ends after old prep region.
1416 			 * Extend prep region to end at end of new range.
1417 			 */
1418 			epos += SEARCH_MORE;
1419 		} else /* (epos <= prep_endpos) */
1420 		{
1421 			/*
1422 			 * New range ends within old prep region.
1423 			 * Truncate search to end at start of old prep region.
1424 			 */
1425 			epos = prep_startpos;
1426 		}
1427 
1428 		if (spos < prep_startpos)
1429 		{
1430 			/*
1431 			 * New range starts before old prep region.
1432 			 * Extend old prep region backwards to start at
1433 			 * start of new range.
1434 			 */
1435 			if (spos < SEARCH_MORE)
1436 				spos = 0;
1437 			else
1438 				spos -= SEARCH_MORE;
1439 			nprep_startpos = spos;
1440 		} else /* (spos >= prep_startpos) */
1441 		{
1442 			/*
1443 			 * New range starts within or after old prep region.
1444 			 * Trim search to start at end of old prep region.
1445 			 */
1446 			spos = prep_endpos;
1447 		}
1448 	}
1449 
1450 	if (epos != NULL_POSITION && max_epos != NULL_POSITION &&
1451 	    epos > max_epos)
1452 		/*
1453 		 * Don't go past the max position we're allowed.
1454 		 */
1455 		epos = max_epos;
1456 
1457 	if (epos == NULL_POSITION || epos > spos)
1458 	{
1459 		result = search_range(spos, epos, SRCH_FORW|SRCH_FIND_ALL, 0,
1460 				maxlines, (POSITION*)NULL, &new_epos);
1461 		if (result < 0)
1462 			return;
1463 		if (prep_endpos == NULL_POSITION || new_epos > prep_endpos)
1464 			nprep_endpos = new_epos;
1465 	}
1466 	prep_startpos = nprep_startpos;
1467 	prep_endpos = nprep_endpos;
1468 }
1469 #endif
1470 
1471 /*
1472  * Simple pattern matching function.
1473  * It supports no metacharacters like *, etc.
1474  */
1475 	static int
1476 match(pattern, pattern_len, buf, buf_len, pfound, pend)
1477 	char *pattern;
1478 	int pattern_len;
1479 	char *buf;
1480 	int buf_len;
1481 	char **pfound, **pend;
1482 {
1483 	register char *pp, *lp;
1484 	register char *pattern_end = pattern + pattern_len;
1485 	register char *buf_end = buf + buf_len;
1486 
1487 	for ( ;  buf < buf_end;  buf++)
1488 	{
1489 		for (pp = pattern, lp = buf;  *pp == *lp;  pp++, lp++)
1490 			if (pp == pattern_end || lp == buf_end)
1491 				break;
1492 		if (pp == pattern_end)
1493 		{
1494 			if (pfound != NULL)
1495 				*pfound = buf;
1496 			if (pend != NULL)
1497 				*pend = lp;
1498 			return (1);
1499 		}
1500 	}
1501 	return (0);
1502 }
1503 
1504 #if HAVE_V8_REGCOMP
1505 /*
1506  * This function is called by the V8 regcomp to report
1507  * errors in regular expressions.
1508  */
1509 	void
1510 regerror(s)
1511 	char *s;
1512 {
1513 	PARG parg;
1514 
1515 	parg.p_string = s;
1516 	error("%s", &parg);
1517 }
1518 #endif
1519 
1520