xref: /freebsd/contrib/less/search.c (revision 1e413cf93298b5b97441a21d9a50fdcd0ee9945e)
1 /* $FreeBSD$ */
2 /*
3  * Copyright (C) 1984-2007  Mark Nudelman
4  *
5  * You may distribute under the terms of either the GNU General Public
6  * License or the Less License, as specified in the README file.
7  *
8  * For more information about less, or for information on how to
9  * contact the author, see the README file.
10  */
11 
12 
13 /*
14  * Routines to search a file for a pattern.
15  */
16 
17 #include "less.h"
18 #include "position.h"
19 #include "charset.h"
20 
21 #define	MINPOS(a,b)	(((a) < (b)) ? (a) : (b))
22 #define	MAXPOS(a,b)	(((a) > (b)) ? (a) : (b))
23 
24 #if HAVE_POSIX_REGCOMP
25 #include <regex.h>
26 #ifdef REG_EXTENDED
27 #define	REGCOMP_FLAG	(less_is_more ? 0 : REG_EXTENDED)
28 #else
29 #define	REGCOMP_FLAG	0
30 #endif
31 #endif
32 #if HAVE_PCRE
33 #include <pcre.h>
34 #endif
35 #if HAVE_RE_COMP
36 char *re_comp();
37 int re_exec();
38 #endif
39 #if HAVE_REGCMP
40 char *regcmp();
41 char *regex();
42 extern char *__loc1;
43 #endif
44 #if HAVE_V8_REGCOMP
45 #include "regexp.h"
46 #endif
47 
48 static int match();
49 
50 extern int sigs;
51 extern int how_search;
52 extern int caseless;
53 extern int linenums;
54 extern int sc_height;
55 extern int jump_sline;
56 extern int bs_mode;
57 extern int less_is_more;
58 extern int ctldisp;
59 extern int status_col;
60 extern void * constant ml_search;
61 extern POSITION start_attnpos;
62 extern POSITION end_attnpos;
63 #if HILITE_SEARCH
64 extern int hilite_search;
65 extern int screen_trashed;
66 extern int size_linebuf;
67 extern int squished;
68 extern int can_goto_line;
69 extern int utf_mode;
70 static int hide_hilite;
71 static int oldbot;
72 static POSITION prep_startpos;
73 static POSITION prep_endpos;
74 
75 struct hilite
76 {
77 	struct hilite *hl_next;
78 	POSITION hl_startpos;
79 	POSITION hl_endpos;
80 };
81 static struct hilite hilite_anchor = { NULL, NULL_POSITION, NULL_POSITION };
82 #define	hl_first	hl_next
83 #endif
84 
85 /*
86  * These are the static variables that represent the "remembered"
87  * search pattern.
88  */
89 #if HAVE_POSIX_REGCOMP
90 static regex_t *regpattern = NULL;
91 #endif
92 #if HAVE_PCRE
93 pcre *regpattern = NULL;
94 #endif
95 #if HAVE_RE_COMP
96 int re_pattern = 0;
97 #endif
98 #if HAVE_REGCMP
99 static char *cpattern = NULL;
100 #endif
101 #if HAVE_V8_REGCOMP
102 static struct regexp *regpattern = NULL;
103 #endif
104 
105 static int is_caseless;
106 static int is_ucase_pattern;
107 static int last_search_type;
108 static char *last_pattern = NULL;
109 
110 #define	CVT_TO_LC	01	/* Convert upper-case to lower-case */
111 #define	CVT_BS		02	/* Do backspace processing */
112 #define	CVT_CRLF	04	/* Remove CR after LF */
113 #define	CVT_ANSI	010	/* Remove ANSI escape sequences */
114 
115 /*
116  * Get the length of a buffer needed to convert a string.
117  */
118 	static int
119 cvt_length(len, ops)
120 	int len;
121 	int ops;
122 {
123 	if (utf_mode)
124 		/*
125 		 * Just copying a string in UTF-8 mode can cause it to grow
126 		 * in length.
127 		 * Six output bytes for one input byte is the worst case
128 		 * (and unfortunately is far more than is needed in any
129 		 * non-pathological situation, so this is very wasteful).
130 		 */
131 		len *= 6;
132 	return len + 1;
133 }
134 
135 /*
136  * Convert text.  Perform one or more of these transformations:
137  */
138 	static void
139 cvt_text(odst, osrc, lenp, ops)
140 	char *odst;
141 	char *osrc;
142 	int *lenp;
143 	int ops;
144 {
145 	char *dst;
146 	char *src;
147 	register char *src_end;
148 	LWCHAR ch;
149 
150 	if (lenp != NULL)
151 		src_end = osrc + *lenp;
152 	else
153 		src_end = osrc + strlen(osrc);
154 
155 	for (src = osrc, dst = odst;  src < src_end;  )
156 	{
157 		ch = step_char(&src, +1, src_end);
158 		if ((ops & CVT_TO_LC) && IS_UPPER(ch))
159 		{
160 			/* Convert uppercase to lowercase. */
161 			put_wchar(&dst, TO_LOWER(ch));
162 		} else if ((ops & CVT_BS) && ch == '\b' && dst > odst)
163 		{
164 			/* Delete backspace and preceding char. */
165 			do {
166 				dst--;
167 			} while (dst > odst &&
168 				!IS_ASCII_OCTET(*dst) && !IS_UTF8_LEAD(*dst));
169 		} else if ((ops & CVT_ANSI) && IS_CSI_START(ch))
170 		{
171 			/* Skip to end of ANSI escape sequence. */
172 			while (src + 1 != src_end)
173 				if (!is_ansi_middle(*++src))
174 					break;
175 		} else
176 			/* Just copy. */
177 			put_wchar(&dst, ch);
178 	}
179 	if ((ops & CVT_CRLF) && dst > odst && dst[-1] == '\r')
180 		dst--;
181 	*dst = '\0';
182 	if (lenp != NULL)
183 		*lenp = dst - odst;
184 }
185 
186 /*
187  * Determine which conversions to perform.
188  */
189 	static int
190 get_cvt_ops()
191 {
192 	int ops = 0;
193 	if (is_caseless || bs_mode == BS_SPECIAL)
194 	{
195 		if (is_caseless)
196 			ops |= CVT_TO_LC;
197 		if (bs_mode == BS_SPECIAL)
198 			ops |= CVT_BS;
199 		if (bs_mode != BS_CONTROL)
200 			ops |= CVT_CRLF;
201 	} else if (bs_mode != BS_CONTROL)
202 	{
203 		ops |= CVT_CRLF;
204 	}
205 	if (ctldisp == OPT_ONPLUS)
206 		ops |= CVT_ANSI;
207 	return (ops);
208 }
209 
210 /*
211  * Are there any uppercase letters in this string?
212  */
213 	static int
214 is_ucase(str)
215 	char *str;
216 {
217 	char *str_end = str + strlen(str);
218 	LWCHAR ch;
219 
220 	while (str < str_end)
221 	{
222 		ch = step_char(&str, +1, str_end);
223 		if (IS_UPPER(ch))
224 			return (1);
225 	}
226 	return (0);
227 }
228 
229 /*
230  * Is there a previous (remembered) search pattern?
231  */
232 	static int
233 prev_pattern()
234 {
235 	if (last_search_type & SRCH_NO_REGEX)
236 		return (last_pattern != NULL);
237 #if HAVE_POSIX_REGCOMP
238 	return (regpattern != NULL);
239 #endif
240 #if HAVE_PCRE
241 	return (regpattern != NULL);
242 #endif
243 #if HAVE_RE_COMP
244 	return (re_pattern != 0);
245 #endif
246 #if HAVE_REGCMP
247 	return (cpattern != NULL);
248 #endif
249 #if HAVE_V8_REGCOMP
250 	return (regpattern != NULL);
251 #endif
252 #if NO_REGEX
253 	return (last_pattern != NULL);
254 #endif
255 }
256 
257 #if HILITE_SEARCH
258 /*
259  * Repaint the hilites currently displayed on the screen.
260  * Repaint each line which contains highlighted text.
261  * If on==0, force all hilites off.
262  */
263 	public void
264 repaint_hilite(on)
265 	int on;
266 {
267 	int slinenum;
268 	POSITION pos;
269 	POSITION epos;
270 	int save_hide_hilite;
271 
272 	if (squished)
273 		repaint();
274 
275 	save_hide_hilite = hide_hilite;
276 	if (!on)
277 	{
278 		if (hide_hilite)
279 			return;
280 		hide_hilite = 1;
281 	}
282 
283 	if (!can_goto_line)
284 	{
285 		repaint();
286 		hide_hilite = save_hide_hilite;
287 		return;
288 	}
289 
290 	for (slinenum = TOP;  slinenum < TOP + sc_height-1;  slinenum++)
291 	{
292 		pos = position(slinenum);
293 		if (pos == NULL_POSITION)
294 			continue;
295 		epos = position(slinenum+1);
296 #if 0
297 		/*
298 		 * If any character in the line is highlighted,
299 		 * repaint the line.
300 		 *
301 		 * {{ This doesn't work -- if line is drawn with highlights
302 		 * which should be erased (e.g. toggle -i with status column),
303 		 * we must redraw the line even if it has no highlights.
304 		 * For now, just repaint every line. }}
305 		 */
306 		if (is_hilited(pos, epos, 1, NULL))
307 #endif
308 		{
309 			(void) forw_line(pos);
310 			goto_line(slinenum);
311 			put_line();
312 		}
313 	}
314 	if (!oldbot)
315 		lower_left();
316 	hide_hilite = save_hide_hilite;
317 }
318 
319 /*
320  * Clear the attn hilite.
321  */
322 	public void
323 clear_attn()
324 {
325 	int slinenum;
326 	POSITION old_start_attnpos;
327 	POSITION old_end_attnpos;
328 	POSITION pos;
329 	POSITION epos;
330 	int moved = 0;
331 
332 	if (start_attnpos == NULL_POSITION)
333 		return;
334 	old_start_attnpos = start_attnpos;
335 	old_end_attnpos = end_attnpos;
336 	start_attnpos = end_attnpos = NULL_POSITION;
337 
338 	if (!can_goto_line)
339 	{
340 		repaint();
341 		return;
342 	}
343 	if (squished)
344 		repaint();
345 
346 	for (slinenum = TOP;  slinenum < TOP + sc_height-1;  slinenum++)
347 	{
348 		pos = position(slinenum);
349 		if (pos == NULL_POSITION)
350 			continue;
351 		epos = position(slinenum+1);
352 		if (pos < old_end_attnpos &&
353 		     (epos == NULL_POSITION || epos > old_start_attnpos))
354 		{
355 			(void) forw_line(pos);
356 			goto_line(slinenum);
357 			put_line();
358 			moved = 1;
359 		}
360 	}
361 	if (moved)
362 		lower_left();
363 }
364 #endif
365 
366 /*
367  * Hide search string highlighting.
368  */
369 	public void
370 undo_search()
371 {
372 	if (!prev_pattern())
373 	{
374 		error("No previous regular expression", NULL_PARG);
375 		return;
376 	}
377 #if HILITE_SEARCH
378 	hide_hilite = !hide_hilite;
379 	repaint_hilite(1);
380 #endif
381 }
382 
383 /*
384  * Compile a search pattern, for future use by match_pattern.
385  */
386 	static int
387 compile_pattern2(pattern, search_type)
388 	char *pattern;
389 	int search_type;
390 {
391 	if ((search_type & SRCH_NO_REGEX) == 0)
392 	{
393 #if HAVE_POSIX_REGCOMP
394 		regex_t *s = (regex_t *) ecalloc(1, sizeof(regex_t));
395 		if (regcomp(s, pattern, REGCOMP_FLAG))
396 		{
397 			free(s);
398 			error("Invalid pattern", NULL_PARG);
399 			return (-1);
400 		}
401 		if (regpattern != NULL)
402 			regfree(regpattern);
403 		regpattern = s;
404 #endif
405 #if HAVE_PCRE
406 		pcre *comp;
407 		const char *errstring;
408 		int erroffset;
409 		PARG parg;
410 		comp = pcre_compile(pattern, 0,
411 				&errstring, &erroffset, NULL);
412 		if (comp == NULL)
413 		{
414 			parg.p_string = (char *) errstring;
415 			error("%s", &parg);
416 			return (-1);
417 		}
418 		regpattern = comp;
419 #endif
420 #if HAVE_RE_COMP
421 		PARG parg;
422 		if ((parg.p_string = re_comp(pattern)) != NULL)
423 		{
424 			error("%s", &parg);
425 			return (-1);
426 		}
427 		re_pattern = 1;
428 #endif
429 #if HAVE_REGCMP
430 		char *s;
431 		if ((s = regcmp(pattern, 0)) == NULL)
432 		{
433 			error("Invalid pattern", NULL_PARG);
434 			return (-1);
435 		}
436 		if (cpattern != NULL)
437 			free(cpattern);
438 		cpattern = s;
439 #endif
440 #if HAVE_V8_REGCOMP
441 		struct regexp *s;
442 		if ((s = regcomp(pattern)) == NULL)
443 		{
444 			/*
445 			 * regcomp has already printed an error message
446 			 * via regerror().
447 			 */
448 			return (-1);
449 		}
450 		if (regpattern != NULL)
451 			free(regpattern);
452 		regpattern = s;
453 #endif
454 	}
455 
456 	if (last_pattern != NULL)
457 		free(last_pattern);
458 	last_pattern = (char *) calloc(1, strlen(pattern)+1);
459 	if (last_pattern != NULL)
460 		strcpy(last_pattern, pattern);
461 
462 	last_search_type = search_type;
463 	return (0);
464 }
465 
466 /*
467  * Like compile_pattern, but convert the pattern to lowercase if necessary.
468  */
469 	static int
470 compile_pattern(pattern, search_type)
471 	char *pattern;
472 	int search_type;
473 {
474 	char *cvt_pattern;
475 	int result;
476 
477 	if (caseless != OPT_ONPLUS)
478 		cvt_pattern = pattern;
479 	else
480 	{
481 		cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC));
482 		cvt_text(cvt_pattern, pattern, (int *)NULL, CVT_TO_LC);
483 	}
484 	result = compile_pattern2(cvt_pattern, search_type);
485 	if (cvt_pattern != pattern)
486 		free(cvt_pattern);
487 	return (result);
488 }
489 
490 /*
491  * Forget that we have a compiled pattern.
492  */
493 	static void
494 uncompile_pattern()
495 {
496 #if HAVE_POSIX_REGCOMP
497 	if (regpattern != NULL)
498 		regfree(regpattern);
499 	regpattern = NULL;
500 #endif
501 #if HAVE_PCRE
502 	if (regpattern != NULL)
503 		pcre_free(regpattern);
504 	regpattern = NULL;
505 #endif
506 #if HAVE_RE_COMP
507 	re_pattern = 0;
508 #endif
509 #if HAVE_REGCMP
510 	if (cpattern != NULL)
511 		free(cpattern);
512 	cpattern = NULL;
513 #endif
514 #if HAVE_V8_REGCOMP
515 	if (regpattern != NULL)
516 		free(regpattern);
517 	regpattern = NULL;
518 #endif
519 	last_pattern = NULL;
520 }
521 
522 /*
523  * Perform a pattern match with the previously compiled pattern.
524  * Set sp and ep to the start and end of the matched string.
525  */
526 	static int
527 match_pattern(line, line_len, sp, ep, notbol)
528 	char *line;
529 	int line_len;
530 	char **sp;
531 	char **ep;
532 	int notbol;
533 {
534 	int matched;
535 
536 	if (last_search_type & SRCH_NO_REGEX)
537 		return (match(last_pattern, strlen(last_pattern), line, line_len, sp, ep));
538 
539 #if HAVE_POSIX_REGCOMP
540 	{
541 		regmatch_t rm;
542 		int flags = (notbol) ? REG_NOTBOL : 0;
543 		matched = !regexec(regpattern, line, 1, &rm, flags);
544 		if (!matched)
545 			return (0);
546 #ifndef __WATCOMC__
547 		*sp = line + rm.rm_so;
548 		*ep = line + rm.rm_eo;
549 #else
550 		*sp = rm.rm_sp;
551 		*ep = rm.rm_ep;
552 #endif
553 	}
554 #endif
555 #if HAVE_PCRE
556 	{
557 		int flags = (notbol) ? PCRE_NOTBOL : 0;
558 		int ovector[3];
559 		matched = pcre_exec(regpattern, NULL, line, line_len,
560 			0, flags, ovector, 3) >= 0;
561 		if (!matched)
562 			return (0);
563 		*sp = line + ovector[0];
564 		*ep = line + ovector[1];
565 	}
566 #endif
567 #if HAVE_RE_COMP
568 	matched = (re_exec(line) == 1);
569 	/*
570 	 * re_exec doesn't seem to provide a way to get the matched string.
571 	 */
572 	*sp = *ep = NULL;
573 #endif
574 #if HAVE_REGCMP
575 	*ep = regex(cpattern, line);
576 	matched = (*ep != NULL);
577 	if (!matched)
578 		return (0);
579 	*sp = __loc1;
580 #endif
581 #if HAVE_V8_REGCOMP
582 #if HAVE_REGEXEC2
583 	matched = regexec2(regpattern, line, notbol);
584 #else
585 	matched = regexec(regpattern, line);
586 #endif
587 	if (!matched)
588 		return (0);
589 	*sp = regpattern->startp[0];
590 	*ep = regpattern->endp[0];
591 #endif
592 #if NO_REGEX
593 	matched = match(last_pattern, strlen(last_pattern), line, line_len, sp, ep);
594 #endif
595 	return (matched);
596 }
597 
598 #if HILITE_SEARCH
599 /*
600  * Clear the hilite list.
601  */
602 	public void
603 clr_hilite()
604 {
605 	struct hilite *hl;
606 	struct hilite *nexthl;
607 
608 	for (hl = hilite_anchor.hl_first;  hl != NULL;  hl = nexthl)
609 	{
610 		nexthl = hl->hl_next;
611 		free((void*)hl);
612 	}
613 	hilite_anchor.hl_first = NULL;
614 	prep_startpos = prep_endpos = NULL_POSITION;
615 }
616 
617 /*
618  * Should any characters in a specified range be highlighted?
619  */
620 	static int
621 is_hilited_range(pos, epos)
622 	POSITION pos;
623 	POSITION epos;
624 {
625 	struct hilite *hl;
626 
627 	/*
628 	 * Look at each highlight and see if any part of it falls in the range.
629 	 */
630 	for (hl = hilite_anchor.hl_first;  hl != NULL;  hl = hl->hl_next)
631 	{
632 		if (hl->hl_endpos > pos &&
633 		    (epos == NULL_POSITION || epos > hl->hl_startpos))
634 			return (1);
635 	}
636 	return (0);
637 }
638 
639 /*
640  * Should any characters in a specified range be highlighted?
641  * If nohide is nonzero, don't consider hide_hilite.
642  */
643 	public int
644 is_hilited(pos, epos, nohide, p_matches)
645 	POSITION pos;
646 	POSITION epos;
647 	int nohide;
648 	int *p_matches;
649 {
650 	int match;
651 
652 	if (p_matches != NULL)
653 		*p_matches = 0;
654 
655 	if (!status_col &&
656 	    start_attnpos != NULL_POSITION &&
657 	    pos < end_attnpos &&
658 	     (epos == NULL_POSITION || epos > start_attnpos))
659 		/*
660 		 * The attn line overlaps this range.
661 		 */
662 		return (1);
663 
664 	match = is_hilited_range(pos, epos);
665 	if (!match)
666 		return (0);
667 
668 	if (p_matches != NULL)
669 		/*
670 		 * Report matches, even if we're hiding highlights.
671 		 */
672 		*p_matches = 1;
673 
674 	if (hilite_search == 0)
675 		/*
676 		 * Not doing highlighting.
677 		 */
678 		return (0);
679 
680 	if (!nohide && hide_hilite)
681 		/*
682 		 * Highlighting is hidden.
683 		 */
684 		return (0);
685 
686 	return (1);
687 }
688 
689 /*
690  * Add a new hilite to a hilite list.
691  */
692 	static void
693 add_hilite(anchor, hl)
694 	struct hilite *anchor;
695 	struct hilite *hl;
696 {
697 	struct hilite *ihl;
698 
699 	/*
700 	 * Hilites are sorted in the list; find where new one belongs.
701 	 * Insert new one after ihl.
702 	 */
703 	for (ihl = anchor;  ihl->hl_next != NULL;  ihl = ihl->hl_next)
704 	{
705 		if (ihl->hl_next->hl_startpos > hl->hl_startpos)
706 			break;
707 	}
708 
709 	/*
710 	 * Truncate hilite so it doesn't overlap any existing ones
711 	 * above and below it.
712 	 */
713 	if (ihl != anchor)
714 		hl->hl_startpos = MAXPOS(hl->hl_startpos, ihl->hl_endpos);
715 	if (ihl->hl_next != NULL)
716 		hl->hl_endpos = MINPOS(hl->hl_endpos, ihl->hl_next->hl_startpos);
717 	if (hl->hl_startpos >= hl->hl_endpos)
718 	{
719 		/*
720 		 * Hilite was truncated out of existence.
721 		 */
722 		free(hl);
723 		return;
724 	}
725 	hl->hl_next = ihl->hl_next;
726 	ihl->hl_next = hl;
727 }
728 
729 /*
730  * Adjust hl_startpos & hl_endpos to account for processing by cvt_text.
731  */
732 	static void
733 adj_hilite(anchor, linepos, cvt_ops)
734 	struct hilite *anchor;
735 	POSITION linepos;
736 	int cvt_ops;
737 {
738 	char *line;
739 	char *oline;
740 	int line_len;
741 	char *line_end;
742 	struct hilite *hl;
743 	int checkstart;
744 	POSITION opos;
745 	POSITION npos;
746 	LWCHAR ch;
747 	int ncwidth;
748 
749 	/*
750 	 * The line was already scanned and hilites were added (in hilite_line).
751 	 * But it was assumed that each char position in the line
752 	 * correponds to one char position in the file.
753 	 * This may not be true if cvt_text modified the line.
754 	 * Get the raw line again.  Look at each character.
755 	 */
756 	(void) forw_raw_line(linepos, &line, &line_len);
757 	line_end = line + line_len;
758 	opos = npos = linepos;
759 	hl = anchor->hl_first;
760 	checkstart = TRUE;
761 	while (hl != NULL)
762 	{
763 		/*
764 		 * See if we need to adjust the current hl_startpos or
765 		 * hl_endpos.  After adjusting startpos[i], move to endpos[i].
766 		 * After adjusting endpos[i], move to startpos[i+1].
767 		 * The hilite list must be sorted thus:
768 		 * startpos[0] < endpos[0] <= startpos[1] < endpos[1] <= etc.
769 		 */
770 		if (checkstart && hl->hl_startpos == opos)
771 		{
772 			hl->hl_startpos = npos;
773 			checkstart = FALSE;
774 			continue; /* {{ not really necessary }} */
775 		} else if (!checkstart && hl->hl_endpos == opos)
776 		{
777 			hl->hl_endpos = npos;
778 			checkstart = TRUE;
779 			hl = hl->hl_next;
780 			continue; /* {{ necessary }} */
781 		}
782 		if (line == line_end)
783 			break;
784 
785 		/* Get the next char from the line. */
786 		oline = line;
787 		ch = step_char(&line, +1, line_end);
788 		ncwidth = line - oline;
789 		npos += ncwidth;
790 
791 		/* Figure out how this char was processed by cvt_text. */
792 		if ((cvt_ops & CVT_BS) && ch == '\b')
793 		{
794 			/* Skip the backspace and the following char. */
795 			oline = line;
796 			ch = step_char(&line, +1, line_end);
797 			ncwidth = line - oline;
798 			npos += ncwidth;
799 		} else if ((cvt_ops & CVT_TO_LC) && IS_UPPER(ch))
800 		{
801 			/* Converted uppercase to lower.
802 			 * Note that this may have changed the number of bytes
803 			 * that the character occupies. */
804 			char dbuf[6];
805 			char *dst = dbuf;
806 			put_wchar(&dst, TO_LOWER(ch));
807 			opos += dst - dbuf;
808 		} else if ((cvt_ops & CVT_ANSI) && IS_CSI_START(ch))
809 		{
810 			/* Skip to end of ANSI escape sequence. */
811 			while (line < line_end)
812 			{
813 				npos++;
814 				if (!is_ansi_middle(*++line))
815 					break;
816 			}
817 		} else
818 		{
819 			/* Ordinary unprocessed character. */
820 			opos += ncwidth;
821 		}
822 	}
823 }
824 
825 /*
826  * Make a hilite for each string in a physical line which matches
827  * the current pattern.
828  * sp,ep delimit the first match already found.
829  */
830 	static void
831 hilite_line(linepos, line, line_len, sp, ep, cvt_ops)
832 	POSITION linepos;
833 	char *line;
834 	int line_len;
835 	char *sp;
836 	char *ep;
837 	int cvt_ops;
838 {
839 	char *searchp;
840 	char *line_end = line + line_len;
841 	struct hilite *hl;
842 	struct hilite hilites;
843 
844 	if (sp == NULL || ep == NULL)
845 		return;
846 	/*
847 	 * sp and ep delimit the first match in the line.
848 	 * Mark the corresponding file positions, then
849 	 * look for further matches and mark them.
850 	 * {{ This technique, of calling match_pattern on subsequent
851 	 *    substrings of the line, may mark more than is correct
852 	 *    if the pattern starts with "^".  This bug is fixed
853 	 *    for those regex functions that accept a notbol parameter
854 	 *    (currently POSIX, PCRE and V8-with-regexec2). }}
855 	 */
856 	searchp = line;
857 	/*
858 	 * Put the hilites into a temporary list until they're adjusted.
859 	 */
860 	hilites.hl_first = NULL;
861 	do {
862 		if (ep > sp)
863 		{
864 			/*
865 			 * Assume that each char position in the "line"
866 			 * buffer corresponds to one char position in the file.
867 			 * This is not quite true; we need to adjust later.
868 			 */
869 			hl = (struct hilite *) ecalloc(1, sizeof(struct hilite));
870 			hl->hl_startpos = linepos + (sp-line);
871 			hl->hl_endpos = linepos + (ep-line);
872 			add_hilite(&hilites, hl);
873 		}
874 		/*
875 		 * If we matched more than zero characters,
876 		 * move to the first char after the string we matched.
877 		 * If we matched zero, just move to the next char.
878 		 */
879 		if (ep > searchp)
880 			searchp = ep;
881 		else if (searchp != line_end)
882 			searchp++;
883 		else /* end of line */
884 			break;
885 	} while (match_pattern(searchp, line_end - searchp, &sp, &ep, 1));
886 
887 	/*
888 	 * If there were backspaces in the original line, they
889 	 * were removed, and hl_startpos/hl_endpos are not correct.
890 	 * {{ This is very ugly. }}
891 	 */
892 	adj_hilite(&hilites, linepos, cvt_ops);
893 
894 	/*
895 	 * Now put the hilites into the real list.
896 	 */
897 	while ((hl = hilites.hl_next) != NULL)
898 	{
899 		hilites.hl_next = hl->hl_next;
900 		add_hilite(&hilite_anchor, hl);
901 	}
902 }
903 #endif
904 
905 /*
906  * Change the caseless-ness of searches.
907  * Updates the internal search state to reflect a change in the -i flag.
908  */
909 	public void
910 chg_caseless()
911 {
912 	if (!is_ucase_pattern)
913 		/*
914 		 * Pattern did not have uppercase.
915 		 * Just set the search caselessness to the global caselessness.
916 		 */
917 		is_caseless = caseless;
918 	else
919 		/*
920 		 * Pattern did have uppercase.
921 		 * Discard the pattern; we can't change search caselessness now.
922 		 */
923 		uncompile_pattern();
924 }
925 
926 #if HILITE_SEARCH
927 /*
928  * Find matching text which is currently on screen and highlight it.
929  */
930 	static void
931 hilite_screen()
932 {
933 	struct scrpos scrpos;
934 
935 	get_scrpos(&scrpos);
936 	if (scrpos.pos == NULL_POSITION)
937 		return;
938 	prep_hilite(scrpos.pos, position(BOTTOM_PLUS_ONE), -1);
939 	repaint_hilite(1);
940 }
941 
942 /*
943  * Change highlighting parameters.
944  */
945 	public void
946 chg_hilite()
947 {
948 	/*
949 	 * Erase any highlights currently on screen.
950 	 */
951 	clr_hilite();
952 	hide_hilite = 0;
953 
954 	if (hilite_search == OPT_ONPLUS)
955 		/*
956 		 * Display highlights.
957 		 */
958 		hilite_screen();
959 }
960 #endif
961 
962 /*
963  * Figure out where to start a search.
964  */
965 	static POSITION
966 search_pos(search_type)
967 	int search_type;
968 {
969 	POSITION pos;
970 	int linenum;
971 
972 	if (empty_screen())
973 	{
974 		/*
975 		 * Start at the beginning (or end) of the file.
976 		 * The empty_screen() case is mainly for
977 		 * command line initiated searches;
978 		 * for example, "+/xyz" on the command line.
979 		 * Also for multi-file (SRCH_PAST_EOF) searches.
980 		 */
981 		if (search_type & SRCH_FORW)
982 		{
983 			return (ch_zero());
984 		} else
985 		{
986 			pos = ch_length();
987 			if (pos == NULL_POSITION)
988 			{
989 				(void) ch_end_seek();
990 				pos = ch_length();
991 			}
992 			return (pos);
993 		}
994 	}
995 	if (how_search)
996 	{
997 		/*
998 		 * Search does not include current screen.
999 		 */
1000 		if (search_type & SRCH_FORW)
1001 			linenum = BOTTOM_PLUS_ONE;
1002 		else
1003 			linenum = TOP;
1004 		pos = position(linenum);
1005 	} else
1006 	{
1007 		/*
1008 		 * Search includes current screen.
1009 		 * It starts at the jump target (if searching backwards),
1010 		 * or at the jump target plus one (if forwards).
1011 		 */
1012 		linenum = adjsline(jump_sline);
1013 		pos = position(linenum);
1014 		if (search_type & SRCH_FORW)
1015 		{
1016 			pos = forw_raw_line(pos, (char **)NULL, (int *)NULL);
1017 			while (pos == NULL_POSITION)
1018 			{
1019 				if (++linenum >= sc_height)
1020 					break;
1021 				pos = position(linenum);
1022 			}
1023 		} else
1024 		{
1025 			while (pos == NULL_POSITION)
1026 			{
1027 				if (--linenum < 0)
1028 					break;
1029 				pos = position(linenum);
1030 			}
1031 		}
1032 	}
1033 	return (pos);
1034 }
1035 
1036 /*
1037  * Search a subset of the file, specified by start/end position.
1038  */
1039 	static int
1040 search_range(pos, endpos, search_type, matches, maxlines, plinepos, pendpos)
1041 	POSITION pos;
1042 	POSITION endpos;
1043 	int search_type;
1044 	int matches;
1045 	int maxlines;
1046 	POSITION *plinepos;
1047 	POSITION *pendpos;
1048 {
1049 	char *line;
1050 	char *cline;
1051 	int line_len;
1052 	LINENUM linenum;
1053 	char *sp, *ep;
1054 	int line_match;
1055 	int cvt_ops;
1056 	POSITION linepos, oldpos;
1057 
1058 	linenum = find_linenum(pos);
1059 	oldpos = pos;
1060 	for (;;)
1061 	{
1062 		/*
1063 		 * Get lines until we find a matching one or until
1064 		 * we hit end-of-file (or beginning-of-file if we're
1065 		 * going backwards), or until we hit the end position.
1066 		 */
1067 		if (ABORT_SIGS())
1068 		{
1069 			/*
1070 			 * A signal aborts the search.
1071 			 */
1072 			return (-1);
1073 		}
1074 
1075 		if ((endpos != NULL_POSITION && pos >= endpos) || maxlines == 0)
1076 		{
1077 			/*
1078 			 * Reached end position without a match.
1079 			 */
1080 			if (pendpos != NULL)
1081 				*pendpos = pos;
1082 			return (matches);
1083 		}
1084 		if (maxlines > 0)
1085 			maxlines--;
1086 
1087 		if (search_type & SRCH_FORW)
1088 		{
1089 			/*
1090 			 * Read the next line, and save the
1091 			 * starting position of that line in linepos.
1092 			 */
1093 			linepos = pos;
1094 			pos = forw_raw_line(pos, &line, &line_len);
1095 			if (linenum != 0)
1096 				linenum++;
1097 		} else
1098 		{
1099 			/*
1100 			 * Read the previous line and save the
1101 			 * starting position of that line in linepos.
1102 			 */
1103 			pos = back_raw_line(pos, &line, &line_len);
1104 			linepos = pos;
1105 			if (linenum != 0)
1106 				linenum--;
1107 		}
1108 
1109 		if (pos == NULL_POSITION)
1110 		{
1111 			/*
1112 			 * Reached EOF/BOF without a match.
1113 			 */
1114 			if (pendpos != NULL)
1115 				*pendpos = oldpos;
1116 			return (matches);
1117 		}
1118 
1119 		/*
1120 		 * If we're using line numbers, we might as well
1121 		 * remember the information we have now (the position
1122 		 * and line number of the current line).
1123 		 * Don't do it for every line because it slows down
1124 		 * the search.  Remember the line number only if
1125 		 * we're "far" from the last place we remembered it.
1126 		 */
1127 		if (linenums && abs((int)(pos - oldpos)) > 1024)
1128 			add_lnum(linenum, pos);
1129 		oldpos = pos;
1130 
1131 		/*
1132 		 * If it's a caseless search, convert the line to lowercase.
1133 		 * If we're doing backspace processing, delete backspaces.
1134 		 */
1135 		cvt_ops = get_cvt_ops();
1136 		cline = calloc(1, cvt_length(line_len, cvt_ops));
1137 		cvt_text(cline, line, &line_len, cvt_ops);
1138 
1139 		/*
1140 		 * Test the next line to see if we have a match.
1141 		 * We are successful if we either want a match and got one,
1142 		 * or if we want a non-match and got one.
1143 		 */
1144 		line_match = match_pattern(cline, line_len, &sp, &ep, 0);
1145 		line_match = (!(search_type & SRCH_NO_MATCH) && line_match) ||
1146 				((search_type & SRCH_NO_MATCH) && !line_match);
1147 		if (!line_match)
1148 		{
1149 			free(cline);
1150 			continue;
1151 		}
1152 		/*
1153 		 * Got a match.
1154 		 */
1155 		if (search_type & SRCH_FIND_ALL)
1156 		{
1157 #if HILITE_SEARCH
1158 			/*
1159 			 * We are supposed to find all matches in the range.
1160 			 * Just add the matches in this line to the
1161 			 * hilite list and keep searching.
1162 			 */
1163 			if (line_match)
1164 				hilite_line(linepos, cline, line_len, sp, ep, cvt_ops);
1165 #endif
1166 			free(cline);
1167 		} else if (--matches <= 0)
1168 		{
1169 			/*
1170 			 * Found the one match we're looking for.
1171 			 * Return it.
1172 			 */
1173 #if HILITE_SEARCH
1174 			if (hilite_search == OPT_ON)
1175 			{
1176 				/*
1177 				 * Clear the hilite list and add only
1178 				 * the matches in this one line.
1179 				 */
1180 				clr_hilite();
1181 				if (line_match)
1182 					hilite_line(linepos, cline, line_len, sp, ep, cvt_ops);
1183 			}
1184 #endif
1185 			free(cline);
1186 			if (plinepos != NULL)
1187 				*plinepos = linepos;
1188 			return (0);
1189 		}
1190 	}
1191 }
1192 
1193  /*
1194  * search for a pattern in history. If found, compile that pattern.
1195  */
1196 	static int
1197 hist_pattern(search_type)
1198 	int search_type;
1199 {
1200 #if CMD_HISTORY
1201 	char *pattern;
1202 
1203 	set_mlist(ml_search, 0);
1204 	pattern = cmd_lastpattern();
1205 	if (pattern == NULL)
1206 		return (0);
1207 
1208 	if (compile_pattern(pattern, search_type) < 0)
1209 		return (0);
1210 
1211 	is_ucase_pattern = is_ucase(pattern);
1212 	if (is_ucase_pattern && caseless != OPT_ONPLUS)
1213 		is_caseless = 0;
1214 	else
1215 		is_caseless = caseless;
1216 
1217 #if HILITE_SEARCH
1218 	if (hilite_search == OPT_ONPLUS && !hide_hilite)
1219 		hilite_screen();
1220 #endif
1221 
1222 	return (1);
1223 #else /* CMD_HISTORY */
1224 	return (0);
1225 #endif /* CMD_HISTORY */
1226 }
1227 
1228 /*
1229  * Search for the n-th occurrence of a specified pattern,
1230  * either forward or backward.
1231  * Return the number of matches not yet found in this file
1232  * (that is, n minus the number of matches found).
1233  * Return -1 if the search should be aborted.
1234  * Caller may continue the search in another file
1235  * if less than n matches are found in this file.
1236  */
1237 	public int
1238 search(search_type, pattern, n)
1239 	int search_type;
1240 	char *pattern;
1241 	int n;
1242 {
1243 	POSITION pos;
1244 	int result;
1245 
1246 	if (pattern == NULL || *pattern == '\0')
1247 	{
1248 		/*
1249 		 * A null pattern means use the previously compiled pattern.
1250 		 */
1251 		if (!prev_pattern() && !hist_pattern(search_type))
1252 		{
1253 			error("No previous regular expression", NULL_PARG);
1254 			return (-1);
1255 		}
1256 		if ((search_type & SRCH_NO_REGEX) !=
1257 		    (last_search_type & SRCH_NO_REGEX))
1258 		{
1259 			error("Please re-enter search pattern", NULL_PARG);
1260 			return -1;
1261 		}
1262 #if HILITE_SEARCH
1263 		if (hilite_search == OPT_ON)
1264 		{
1265 			/*
1266 			 * Erase the highlights currently on screen.
1267 			 * If the search fails, we'll redisplay them later.
1268 			 */
1269 			repaint_hilite(0);
1270 		}
1271 		if (hilite_search == OPT_ONPLUS && hide_hilite)
1272 		{
1273 			/*
1274 			 * Highlight any matches currently on screen,
1275 			 * before we actually start the search.
1276 			 */
1277 			hide_hilite = 0;
1278 			hilite_screen();
1279 		}
1280 		hide_hilite = 0;
1281 #endif
1282 	} else
1283 	{
1284 		/*
1285 		 * Compile the pattern.
1286 		 */
1287 		if (compile_pattern(pattern, search_type) < 0)
1288 			return (-1);
1289 		/*
1290 		 * Ignore case if -I is set OR
1291 		 * -i is set AND the pattern is all lowercase.
1292 		 */
1293 		is_ucase_pattern = is_ucase(pattern);
1294 		if (is_ucase_pattern && caseless != OPT_ONPLUS)
1295 			is_caseless = 0;
1296 		else
1297 			is_caseless = caseless;
1298 #if HILITE_SEARCH
1299 		if (hilite_search)
1300 		{
1301 			/*
1302 			 * Erase the highlights currently on screen.
1303 			 * Also permanently delete them from the hilite list.
1304 			 */
1305 			repaint_hilite(0);
1306 			hide_hilite = 0;
1307 			clr_hilite();
1308 		}
1309 		if (hilite_search == OPT_ONPLUS)
1310 		{
1311 			/*
1312 			 * Highlight any matches currently on screen,
1313 			 * before we actually start the search.
1314 			 */
1315 			hilite_screen();
1316 		}
1317 #endif
1318 	}
1319 
1320 	/*
1321 	 * Figure out where to start the search.
1322 	 */
1323 	pos = search_pos(search_type);
1324 	if (pos == NULL_POSITION)
1325 	{
1326 		/*
1327 		 * Can't find anyplace to start searching from.
1328 		 */
1329 		if (search_type & SRCH_PAST_EOF)
1330 			return (n);
1331 		/* repaint(); -- why was this here? */
1332 		error("Nothing to search", NULL_PARG);
1333 		return (-1);
1334 	}
1335 
1336 	n = search_range(pos, NULL_POSITION, search_type, n, -1,
1337 			&pos, (POSITION*)NULL);
1338 	if (n != 0)
1339 	{
1340 		/*
1341 		 * Search was unsuccessful.
1342 		 */
1343 #if HILITE_SEARCH
1344 		if (hilite_search == OPT_ON && n > 0)
1345 			/*
1346 			 * Redisplay old hilites.
1347 			 */
1348 			repaint_hilite(1);
1349 #endif
1350 		return (n);
1351 	}
1352 
1353 	if (!(search_type & SRCH_NO_MOVE))
1354 	{
1355 		/*
1356 		 * Go to the matching line.
1357 		 */
1358 		jump_loc(pos, jump_sline);
1359 	}
1360 
1361 #if HILITE_SEARCH
1362 	if (hilite_search == OPT_ON)
1363 		/*
1364 		 * Display new hilites in the matching line.
1365 		 */
1366 		repaint_hilite(1);
1367 #endif
1368 	return (0);
1369 }
1370 
1371 
1372 #if HILITE_SEARCH
1373 /*
1374  * Prepare hilites in a given range of the file.
1375  *
1376  * The pair (prep_startpos,prep_endpos) delimits a contiguous region
1377  * of the file that has been "prepared"; that is, scanned for matches for
1378  * the current search pattern, and hilites have been created for such matches.
1379  * If prep_startpos == NULL_POSITION, the prep region is empty.
1380  * If prep_endpos == NULL_POSITION, the prep region extends to EOF.
1381  * prep_hilite asks that the range (spos,epos) be covered by the prep region.
1382  */
1383 	public void
1384 prep_hilite(spos, epos, maxlines)
1385 	POSITION spos;
1386 	POSITION epos;
1387 	int maxlines;
1388 {
1389 	POSITION nprep_startpos = prep_startpos;
1390 	POSITION nprep_endpos = prep_endpos;
1391 	POSITION new_epos;
1392 	POSITION max_epos;
1393 	int result;
1394 	int i;
1395 /*
1396  * Search beyond where we're asked to search, so the prep region covers
1397  * more than we need.  Do one big search instead of a bunch of small ones.
1398  */
1399 #define	SEARCH_MORE (3*size_linebuf)
1400 
1401 	if (!prev_pattern())
1402 		return;
1403 
1404 	/*
1405 	 * If we're limited to a max number of lines, figure out the
1406 	 * file position we should stop at.
1407 	 */
1408 	if (maxlines < 0)
1409 		max_epos = NULL_POSITION;
1410 	else
1411 	{
1412 		max_epos = spos;
1413 		for (i = 0;  i < maxlines;  i++)
1414 			max_epos = forw_raw_line(max_epos, (char **)NULL, (int *)NULL);
1415 	}
1416 
1417 	/*
1418 	 * Find two ranges:
1419 	 * The range that we need to search (spos,epos); and the range that
1420 	 * the "prep" region will then cover (nprep_startpos,nprep_endpos).
1421 	 */
1422 
1423 	if (prep_startpos == NULL_POSITION ||
1424 	    (epos != NULL_POSITION && epos < prep_startpos) ||
1425 	    spos > prep_endpos)
1426 	{
1427 		/*
1428 		 * New range is not contiguous with old prep region.
1429 		 * Discard the old prep region and start a new one.
1430 		 */
1431 		clr_hilite();
1432 		if (epos != NULL_POSITION)
1433 			epos += SEARCH_MORE;
1434 		nprep_startpos = spos;
1435 	} else
1436 	{
1437 		/*
1438 		 * New range partially or completely overlaps old prep region.
1439 		 */
1440 		if (epos == NULL_POSITION)
1441 		{
1442 			/*
1443 			 * New range goes to end of file.
1444 			 */
1445 			;
1446 		} else if (epos > prep_endpos)
1447 		{
1448 			/*
1449 			 * New range ends after old prep region.
1450 			 * Extend prep region to end at end of new range.
1451 			 */
1452 			epos += SEARCH_MORE;
1453 		} else /* (epos <= prep_endpos) */
1454 		{
1455 			/*
1456 			 * New range ends within old prep region.
1457 			 * Truncate search to end at start of old prep region.
1458 			 */
1459 			epos = prep_startpos;
1460 		}
1461 
1462 		if (spos < prep_startpos)
1463 		{
1464 			/*
1465 			 * New range starts before old prep region.
1466 			 * Extend old prep region backwards to start at
1467 			 * start of new range.
1468 			 */
1469 			if (spos < SEARCH_MORE)
1470 				spos = 0;
1471 			else
1472 				spos -= SEARCH_MORE;
1473 			nprep_startpos = spos;
1474 		} else /* (spos >= prep_startpos) */
1475 		{
1476 			/*
1477 			 * New range starts within or after old prep region.
1478 			 * Trim search to start at end of old prep region.
1479 			 */
1480 			spos = prep_endpos;
1481 		}
1482 	}
1483 
1484 	if (epos != NULL_POSITION && max_epos != NULL_POSITION &&
1485 	    epos > max_epos)
1486 		/*
1487 		 * Don't go past the max position we're allowed.
1488 		 */
1489 		epos = max_epos;
1490 
1491 	if (epos == NULL_POSITION || epos > spos)
1492 	{
1493 		result = search_range(spos, epos, SRCH_FORW|SRCH_FIND_ALL, 0,
1494 				maxlines, (POSITION*)NULL, &new_epos);
1495 		if (result < 0)
1496 			return;
1497 		if (prep_endpos == NULL_POSITION || new_epos > prep_endpos)
1498 			nprep_endpos = new_epos;
1499 	}
1500 	prep_startpos = nprep_startpos;
1501 	prep_endpos = nprep_endpos;
1502 }
1503 #endif
1504 
1505 /*
1506  * Simple pattern matching function.
1507  * It supports no metacharacters like *, etc.
1508  */
1509 	static int
1510 match(pattern, pattern_len, buf, buf_len, pfound, pend)
1511 	char *pattern;
1512 	int pattern_len;
1513 	char *buf;
1514 	int buf_len;
1515 	char **pfound, **pend;
1516 {
1517 	register char *pp, *lp;
1518 	register char *pattern_end = pattern + pattern_len;
1519 	register char *buf_end = buf + buf_len;
1520 
1521 	for ( ;  buf < buf_end;  buf++)
1522 	{
1523 		for (pp = pattern, lp = buf;  *pp == *lp;  pp++, lp++)
1524 			if (pp == pattern_end || lp == buf_end)
1525 				break;
1526 		if (pp == pattern_end)
1527 		{
1528 			if (pfound != NULL)
1529 				*pfound = buf;
1530 			if (pend != NULL)
1531 				*pend = lp;
1532 			return (1);
1533 		}
1534 	}
1535 	return (0);
1536 }
1537 
1538 #if HAVE_V8_REGCOMP
1539 /*
1540  * This function is called by the V8 regcomp to report
1541  * errors in regular expressions.
1542  */
1543 	void
1544 regerror(s)
1545 	char *s;
1546 {
1547 	PARG parg;
1548 
1549 	parg.p_string = s;
1550 	error("%s", &parg);
1551 }
1552 #endif
1553 
1554