xref: /freebsd/contrib/less/pattern.c (revision 6c05f3a74f30934ee60919cc97e16ec69b542b06)
1 /*
2  * Copyright (C) 1984-2024  Mark Nudelman
3  *
4  * You may distribute under the terms of either the GNU General Public
5  * License or the Less License, as specified in the README file.
6  *
7  * For more information, see the README file.
8  */
9 
10 /*
11  * Routines to do pattern matching.
12  */
13 
14 #include "less.h"
15 
16 extern int caseless;
17 extern int is_caseless;
18 extern int utf_mode;
19 
20 /*
21  * Compile a search pattern, for future use by match_pattern.
22  */
23 static int compile_pattern2(constant char *pattern, int search_type, PATTERN_TYPE *comp_pattern, int show_error)
24 {
25 	if (search_type & SRCH_NO_REGEX)
26 		return (0);
27   {
28 #if HAVE_GNU_REGEX
29 	struct re_pattern_buffer *comp = (struct re_pattern_buffer *)
30 		ecalloc(1, sizeof(struct re_pattern_buffer));
31 	re_set_syntax(RE_SYNTAX_POSIX_EXTENDED);
32 	if (re_compile_pattern(pattern, strlen(pattern), comp))
33 	{
34 		free(comp);
35 		if (show_error)
36 			error("Invalid pattern", NULL_PARG);
37 		return (-1);
38 	}
39 	if (*comp_pattern != NULL)
40 	{
41 		regfree(*comp_pattern);
42 		free(*comp_pattern);
43 	}
44 	*comp_pattern = comp;
45 #endif
46 #if HAVE_POSIX_REGCOMP
47 	regex_t *comp = (regex_t *) ecalloc(1, sizeof(regex_t));
48 	if (regcomp(comp, pattern, REGCOMP_FLAG | (is_caseless ? REG_ICASE : 0)))
49 	{
50 		free(comp);
51 		if (show_error)
52 			error("Invalid pattern", NULL_PARG);
53 		return (-1);
54 	}
55 	if (*comp_pattern != NULL)
56 	{
57 		regfree(*comp_pattern);
58 		free(*comp_pattern);
59 	}
60 	*comp_pattern = comp;
61 #endif
62 #if HAVE_PCRE
63 	constant char *errstring;
64 	int erroffset;
65 	PARG parg;
66 	pcre *comp = pcre_compile(pattern,
67 			((utf_mode) ? PCRE_UTF8 | PCRE_NO_UTF8_CHECK : 0) |
68 			(is_caseless ? PCRE_CASELESS : 0),
69 			&errstring, &erroffset, NULL);
70 	if (comp == NULL)
71 	{
72 		parg.p_string = (char *) errstring;
73 		if (show_error)
74 			error("%s", &parg);
75 		return (-1);
76 	}
77 	*comp_pattern = comp;
78 #endif
79 #if HAVE_PCRE2
80 	int errcode;
81 	PCRE2_SIZE erroffset;
82 	PARG parg;
83 	pcre2_code *comp = pcre2_compile((PCRE2_SPTR)pattern, strlen(pattern),
84 			(is_caseless ? PCRE2_CASELESS : 0),
85 			&errcode, &erroffset, NULL);
86 	if (comp == NULL)
87 	{
88 		if (show_error)
89 		{
90 			char msg[160];
91 			pcre2_get_error_message(errcode, (PCRE2_UCHAR*)msg, sizeof(msg));
92 			parg.p_string = msg;
93 			error("%s", &parg);
94 		}
95 		return (-1);
96 	}
97 	*comp_pattern = comp;
98 #endif
99 #if HAVE_RE_COMP
100 	PARG parg;
101 	if ((parg.p_string = re_comp(pattern)) != NULL)
102 	{
103 		if (show_error)
104 			error("%s", &parg);
105 		return (-1);
106 	}
107 	*comp_pattern = 1;
108 #endif
109 #if HAVE_REGCMP
110 	char *comp;
111 	if ((comp = regcmp(pattern, 0)) == NULL)
112 	{
113 		if (show_error)
114 			error("Invalid pattern", NULL_PARG);
115 		return (-1);
116 	}
117 	if (comp_pattern != NULL)
118 		free(*comp_pattern);
119 	*comp_pattern = comp;
120 #endif
121 #if HAVE_V8_REGCOMP
122 	struct regexp *comp;
123 	reg_show_error = show_error;
124 	comp = regcomp(pattern);
125 	reg_show_error = 1;
126 	if (comp == NULL)
127 	{
128 		/*
129 		 * regcomp has already printed an error message
130 		 * via regerror().
131 		 */
132 		return (-1);
133 	}
134 	if (*comp_pattern != NULL)
135 		free(*comp_pattern);
136 	*comp_pattern = comp;
137 #endif
138   }
139 	return (0);
140 }
141 
142 /*
143  * Like compile_pattern2, but convert the pattern to lowercase if necessary.
144  */
145 public int compile_pattern(constant char *pattern, int search_type, int show_error, PATTERN_TYPE *comp_pattern)
146 {
147 	int result;
148 
149 	if (caseless != OPT_ONPLUS || (re_handles_caseless && !(search_type & SRCH_NO_REGEX)))
150 	{
151 		result = compile_pattern2(pattern, search_type, comp_pattern, show_error);
152 	} else
153 	{
154 		char *cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC));
155 		cvt_text(cvt_pattern, pattern, NULL, NULL, CVT_TO_LC);
156 		result = compile_pattern2(cvt_pattern, search_type, comp_pattern, show_error);
157 		free(cvt_pattern);
158 	}
159 	return (result);
160 }
161 
162 /*
163  * Forget that we have a compiled pattern.
164  */
165 public void uncompile_pattern(PATTERN_TYPE *pattern)
166 {
167 #if HAVE_GNU_REGEX
168 	if (*pattern != NULL)
169 	{
170 		regfree(*pattern);
171 		free(*pattern);
172 	}
173 	*pattern = NULL;
174 #endif
175 #if HAVE_POSIX_REGCOMP
176 	if (*pattern != NULL)
177 	{
178 		regfree(*pattern);
179 		free(*pattern);
180 	}
181 	*pattern = NULL;
182 #endif
183 #if HAVE_PCRE
184 	if (*pattern != NULL)
185 		pcre_free(*pattern);
186 	*pattern = NULL;
187 #endif
188 #if HAVE_PCRE2
189 	if (*pattern != NULL)
190 		pcre2_code_free(*pattern);
191 	*pattern = NULL;
192 #endif
193 #if HAVE_RE_COMP
194 	*pattern = 0;
195 #endif
196 #if HAVE_REGCMP
197 	if (*pattern != NULL)
198 		free(*pattern);
199 	*pattern = NULL;
200 #endif
201 #if HAVE_V8_REGCOMP
202 	if (*pattern != NULL)
203 		free(*pattern);
204 	*pattern = NULL;
205 #endif
206 }
207 
208 #if 0
209 /*
210  * Can a pattern be successfully compiled?
211  */
212 public int valid_pattern(char *pattern)
213 {
214 	PATTERN_TYPE comp_pattern;
215 	int result;
216 
217 	SET_NULL_PATTERN(comp_pattern);
218 	result = compile_pattern2(pattern, 0, &comp_pattern, 0);
219 	if (result != 0)
220 		return (0);
221 	uncompile_pattern(&comp_pattern);
222 	return (1);
223 }
224 #endif
225 
226 /*
227  * Is a compiled pattern null?
228  */
229 public lbool is_null_pattern(PATTERN_TYPE pattern)
230 {
231 #if HAVE_GNU_REGEX
232 	return (pattern == NULL);
233 #endif
234 #if HAVE_POSIX_REGCOMP
235 	return (pattern == NULL);
236 #endif
237 #if HAVE_PCRE
238 	return (pattern == NULL);
239 #endif
240 #if HAVE_PCRE2
241 	return (pattern == NULL);
242 #endif
243 #if HAVE_RE_COMP
244 	return (pattern == 0);
245 #endif
246 #if HAVE_REGCMP
247 	return (pattern == NULL);
248 #endif
249 #if HAVE_V8_REGCOMP
250 	return (pattern == NULL);
251 #endif
252 #if NO_REGEX
253 	return (pattern == NULL);
254 #endif
255 }
256 /*
257  * Simple pattern matching function.
258  * It supports no metacharacters like *, etc.
259  */
260 static int match(constant char *pattern, size_t pattern_len, constant char *buf, int buf_len, constant char ***sp, constant char ***ep, int nsubs)
261 {
262 	constant char *pp;
263 	constant char *lp;
264 	constant char *pattern_end = pattern + pattern_len;
265 	constant char *buf_end = buf + buf_len;
266 
267 	(void) nsubs;
268 	for ( ;  buf < buf_end;  buf++)
269 	{
270 		for (pp = pattern, lp = buf;  ;  pp++, lp++)
271 		{
272 			char cp = *pp;
273 			char cl = *lp;
274 			if (caseless == OPT_ONPLUS && ASCII_IS_UPPER(cp))
275 				cp = ASCII_TO_LOWER(cp);
276 			if (cp != cl)
277 				break;
278 			if (pp == pattern_end || lp == buf_end)
279 				break;
280 		}
281 		if (pp == pattern_end)
282 		{
283 			*(*sp)++ = buf;
284 			*(*ep)++ = lp;
285 			return (1);
286 		}
287 	}
288 	**sp = **ep = NULL;
289 	return (0);
290 }
291 
292 /*
293  * Perform a pattern match with the previously compiled pattern.
294  * Set sp[0] and ep[0] to the start and end of the matched string.
295  * Set sp[i] and ep[i] to the start and end of the i-th matched subpattern.
296  * Subpatterns are defined by parentheses in the regex language.
297  */
298 static int match_pattern1(PATTERN_TYPE pattern, constant char *tpattern, constant char *line, size_t aline_len, constant char **sp, constant char **ep, int nsp, int notbol, int search_type)
299 {
300 	int matched;
301 	int line_len = (int) aline_len; /*{{type-issue}}*/
302 
303 #if NO_REGEX
304 	search_type |= SRCH_NO_REGEX;
305 #endif
306 	if (search_type & SRCH_NO_REGEX)
307 		matched = match(tpattern, strlen(tpattern), line, line_len, &sp, &ep, nsp);
308 	else
309 	{
310 #if HAVE_GNU_REGEX
311 	{
312 		struct re_registers search_regs;
313 		pattern->not_bol = notbol;
314 		pattern->regs_allocated = REGS_UNALLOCATED;
315 		matched = re_search(pattern, line, line_len, 0, line_len, &search_regs) >= 0;
316 		if (matched)
317 		{
318 			*sp++ = line + search_regs.start[0];
319 			*ep++ = line + search_regs.end[0];
320 		}
321 	}
322 #endif
323 #if HAVE_POSIX_REGCOMP
324 	{
325 		#define RM_COUNT (NUM_SEARCH_COLORS+2)
326 		regmatch_t rm[RM_COUNT];
327 		int flags = (notbol) ? REG_NOTBOL : 0;
328 #ifdef REG_STARTEND
329 		flags |= REG_STARTEND;
330 		rm[0].rm_so = 0;
331 		rm[0].rm_eo = line_len;
332 #endif
333 		matched = !regexec(pattern, line, RM_COUNT, rm, flags);
334 		if (matched)
335 		{
336 			int i;
337 			int ecount;
338 			for (ecount = RM_COUNT;  ecount > 0;  ecount--)
339 				if (rm[ecount-1].rm_so >= 0)
340 					break;
341 			if (ecount >= nsp)
342 				ecount = nsp-1;
343 			for (i = 0;  i < ecount;  i++)
344 			{
345 				if (rm[i].rm_so < 0)
346 				{
347 					*sp++ = *ep++ = line;
348 				} else
349 				{
350 #ifndef __WATCOMC__
351 					*sp++ = line + rm[i].rm_so;
352 					*ep++ = line + rm[i].rm_eo;
353 #else
354 					*sp++ = rm[i].rm_sp;
355 					*ep++ = rm[i].rm_ep;
356 #endif
357 				}
358 			}
359 		}
360 	}
361 #endif
362 #if HAVE_PCRE
363 	{
364 		#define OVECTOR_COUNT ((3*NUM_SEARCH_COLORS)+3)
365 		int ovector[OVECTOR_COUNT];
366 		int flags = (notbol) ? PCRE_NOTBOL : 0;
367 		int i;
368 		int ecount;
369 		int mcount = pcre_exec(pattern, NULL, line, line_len,
370 			0, flags, ovector, OVECTOR_COUNT);
371 		matched = (mcount > 0);
372 		ecount = nsp-1;
373 		if (ecount > mcount) ecount = mcount;
374 		for (i = 0;  i < ecount*2; )
375 		{
376 			if (ovector[i] < 0 || ovector[i+1] < 0)
377 			{
378 				*sp++ = *ep++ = line;
379 				i += 2;
380 			} else
381 			{
382 				*sp++ = line + ovector[i++];
383 				*ep++ = line + ovector[i++];
384 			}
385 		}
386 	}
387 #endif
388 #if HAVE_PCRE2
389 	{
390 		int flags = (notbol) ? PCRE2_NOTBOL : 0;
391 		pcre2_match_data *md = pcre2_match_data_create(nsp-1, NULL);
392 		int mcount = pcre2_match(pattern, (PCRE2_SPTR)line, line_len,
393 			0, flags, md, NULL);
394 		matched = (mcount > 0);
395 		if (matched)
396 		{
397 			PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md);
398 			int i;
399 			int ecount = nsp-1;
400 			if (ecount > mcount) ecount = mcount;
401 			for (i = 0;  i < ecount*2; )
402 			{
403 				if (ovector[i] < 0 || ovector[i+1] < 0)
404 				{
405 					*sp++ = *ep++ = line;
406 					i += 2;
407 				} else
408 				{
409 					*sp++ = line + ovector[i++];
410 					*ep++ = line + ovector[i++];
411 				}
412 			}
413 		}
414 		pcre2_match_data_free(md);
415 	}
416 #endif
417 #if HAVE_RE_COMP
418 	matched = (re_exec(line) == 1);
419 	/*
420 	 * re_exec doesn't seem to provide a way to get the matched string.
421 	 */
422 #endif
423 #if HAVE_REGCMP
424 	matched = ((*ep++ = regex(pattern, line)) != NULL);
425 	if (matched)
426 		*sp++ = __loc1;
427 #endif
428 #if HAVE_V8_REGCOMP
429 #if HAVE_REGEXEC2
430 	matched = regexec2(pattern, line, notbol);
431 #else
432 	matched = regexec(pattern, line);
433 #endif
434 	if (matched)
435 	{
436 		*sp++ = pattern->startp[0];
437 		*ep++ = pattern->endp[0];
438 	}
439 #endif
440 	}
441 	*sp = *ep = NULL;
442 	matched = (!(search_type & SRCH_NO_MATCH) && matched) ||
443 			((search_type & SRCH_NO_MATCH) && !matched);
444 	return (matched);
445 }
446 
447 public int match_pattern(PATTERN_TYPE pattern, constant char *tpattern, constant char *line, size_t line_len, constant char **sp, constant char **ep, int nsp, int notbol, int search_type)
448 {
449 	int matched = match_pattern1(pattern, tpattern, line, line_len, sp, ep, nsp, notbol, search_type);
450 	int i;
451 	for (i = 1;  i <= NUM_SEARCH_COLORS;  i++)
452 	{
453 		if ((search_type & SRCH_SUBSEARCH(i)) && ep[i] == sp[i])
454 			matched = 0;
455 	}
456 	return matched;
457 }
458 
459 /*
460  * Return the name of the pattern matching library.
461  */
462 public constant char * pattern_lib_name(void)
463 {
464 #if HAVE_GNU_REGEX
465 	return ("GNU");
466 #else
467 #if HAVE_POSIX_REGCOMP
468 	return ("POSIX");
469 #else
470 #if HAVE_PCRE2
471 	return ("PCRE2");
472 #else
473 #if HAVE_PCRE
474 	return ("PCRE");
475 #else
476 #if HAVE_RE_COMP
477 	return ("BSD");
478 #else
479 #if HAVE_REGCMP
480 	return ("V8");
481 #else
482 #if HAVE_V8_REGCOMP
483 	return ("Spencer V8");
484 #else
485 	return ("no");
486 #endif
487 #endif
488 #endif
489 #endif
490 #endif
491 #endif
492 #endif
493 }
494