xref: /freebsd/contrib/less/pattern.c (revision ebacd8013fe5f7fdf9f6a5b286f6680dd2891036)
1 /*
2  * Copyright (C) 1984-2022  Mark Nudelman
3  *
4  * You may distribute under the terms of either the GNU General Public
5  * License or the Less License, as specified in the README file.
6  *
7  * For more information, see the README file.
8  */
9 
10 /*
11  * Routines to do pattern matching.
12  */
13 
14 #include "less.h"
15 
16 extern int caseless;
17 extern int is_caseless;
18 extern int utf_mode;
19 
20 /*
21  * Compile a search pattern, for future use by match_pattern.
22  */
23 	static int
24 compile_pattern2(pattern, search_type, comp_pattern, show_error)
25 	char *pattern;
26 	int search_type;
27 	PATTERN_TYPE *comp_pattern;
28 	int show_error;
29 {
30 	if (search_type & SRCH_NO_REGEX)
31 		return (0);
32   {
33 #if HAVE_GNU_REGEX
34 	struct re_pattern_buffer *comp = (struct re_pattern_buffer *)
35 		ecalloc(1, sizeof(struct re_pattern_buffer));
36 	re_set_syntax(RE_SYNTAX_POSIX_EXTENDED);
37 	if (re_compile_pattern(pattern, strlen(pattern), comp))
38 	{
39 		free(comp);
40 		if (show_error)
41 			error("Invalid pattern", NULL_PARG);
42 		return (-1);
43 	}
44 	if (*comp_pattern != NULL)
45 	{
46 		regfree(*comp_pattern);
47 		free(*comp_pattern);
48 	}
49 	*comp_pattern = comp;
50 #endif
51 #if HAVE_POSIX_REGCOMP
52 	regex_t *comp = (regex_t *) ecalloc(1, sizeof(regex_t));
53 	if (regcomp(comp, pattern, REGCOMP_FLAG | (is_caseless ? REG_ICASE : 0)))
54 	{
55 		free(comp);
56 		if (show_error)
57 			error("Invalid pattern", NULL_PARG);
58 		return (-1);
59 	}
60 	if (*comp_pattern != NULL)
61 	{
62 		regfree(*comp_pattern);
63 		free(*comp_pattern);
64 	}
65 	*comp_pattern = comp;
66 #endif
67 #if HAVE_PCRE
68 	constant char *errstring;
69 	int erroffset;
70 	PARG parg;
71 	pcre *comp = pcre_compile(pattern,
72 			((utf_mode) ? PCRE_UTF8 | PCRE_NO_UTF8_CHECK : 0) |
73 			(is_caseless ? PCRE_CASELESS : 0),
74 			&errstring, &erroffset, NULL);
75 	if (comp == NULL)
76 	{
77 		parg.p_string = (char *) errstring;
78 		if (show_error)
79 			error("%s", &parg);
80 		return (-1);
81 	}
82 	*comp_pattern = comp;
83 #endif
84 #if HAVE_PCRE2
85 	int errcode;
86 	PCRE2_SIZE erroffset;
87 	PARG parg;
88 	pcre2_code *comp = pcre2_compile((PCRE2_SPTR)pattern, strlen(pattern),
89 			(is_caseless ? PCRE2_CASELESS : 0),
90 			&errcode, &erroffset, NULL);
91 	if (comp == NULL)
92 	{
93 		if (show_error)
94 		{
95 			char msg[160];
96 			pcre2_get_error_message(errcode, (PCRE2_UCHAR*)msg, sizeof(msg));
97 			parg.p_string = msg;
98 			error("%s", &parg);
99 		}
100 		return (-1);
101 	}
102 	*comp_pattern = comp;
103 #endif
104 #if HAVE_RE_COMP
105 	PARG parg;
106 	if ((parg.p_string = re_comp(pattern)) != NULL)
107 	{
108 		if (show_error)
109 			error("%s", &parg);
110 		return (-1);
111 	}
112 	*comp_pattern = 1;
113 #endif
114 #if HAVE_REGCMP
115 	char *comp;
116 	if ((comp = regcmp(pattern, 0)) == NULL)
117 	{
118 		if (show_error)
119 			error("Invalid pattern", NULL_PARG);
120 		return (-1);
121 	}
122 	if (comp_pattern != NULL)
123 		free(*comp_pattern);
124 	*comp_pattern = comp;
125 #endif
126 #if HAVE_V8_REGCOMP
127 	struct regexp *comp;
128 	reg_show_error = show_error;
129 	comp = regcomp(pattern);
130 	reg_show_error = 1;
131 	if (comp == NULL)
132 	{
133 		/*
134 		 * regcomp has already printed an error message
135 		 * via regerror().
136 		 */
137 		return (-1);
138 	}
139 	if (*comp_pattern != NULL)
140 		free(*comp_pattern);
141 	*comp_pattern = comp;
142 #endif
143   }
144 	return (0);
145 }
146 
147 /*
148  * Like compile_pattern2, but convert the pattern to lowercase if necessary.
149  */
150 	public int
151 compile_pattern(pattern, search_type, show_error, comp_pattern)
152 	char *pattern;
153 	int search_type;
154 	int show_error;
155 	PATTERN_TYPE *comp_pattern;
156 {
157 	char *cvt_pattern;
158 	int result;
159 
160 	if (caseless != OPT_ONPLUS || re_handles_caseless)
161 		cvt_pattern = pattern;
162 	else
163 	{
164 		cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC));
165 		cvt_text(cvt_pattern, pattern, (int *)NULL, (int *)NULL, CVT_TO_LC);
166 	}
167 	result = compile_pattern2(cvt_pattern, search_type, comp_pattern, show_error);
168 	if (cvt_pattern != pattern)
169 		free(cvt_pattern);
170 	return (result);
171 }
172 
173 /*
174  * Forget that we have a compiled pattern.
175  */
176 	public void
177 uncompile_pattern(pattern)
178 	PATTERN_TYPE *pattern;
179 {
180 #if HAVE_GNU_REGEX
181 	if (*pattern != NULL)
182 	{
183 		regfree(*pattern);
184 		free(*pattern);
185 	}
186 	*pattern = NULL;
187 #endif
188 #if HAVE_POSIX_REGCOMP
189 	if (*pattern != NULL)
190 	{
191 		regfree(*pattern);
192 		free(*pattern);
193 	}
194 	*pattern = NULL;
195 #endif
196 #if HAVE_PCRE
197 	if (*pattern != NULL)
198 		pcre_free(*pattern);
199 	*pattern = NULL;
200 #endif
201 #if HAVE_PCRE2
202 	if (*pattern != NULL)
203 		pcre2_code_free(*pattern);
204 	*pattern = NULL;
205 #endif
206 #if HAVE_RE_COMP
207 	*pattern = 0;
208 #endif
209 #if HAVE_REGCMP
210 	if (*pattern != NULL)
211 		free(*pattern);
212 	*pattern = NULL;
213 #endif
214 #if HAVE_V8_REGCOMP
215 	if (*pattern != NULL)
216 		free(*pattern);
217 	*pattern = NULL;
218 #endif
219 }
220 
221 #if 0
222 /*
223  * Can a pattern be successfully compiled?
224  */
225 	public int
226 valid_pattern(pattern)
227 	char *pattern;
228 {
229 	PATTERN_TYPE comp_pattern;
230 	int result;
231 
232 	SET_NULL_PATTERN(comp_pattern);
233 	result = compile_pattern2(pattern, 0, &comp_pattern, 0);
234 	if (result != 0)
235 		return (0);
236 	uncompile_pattern(&comp_pattern);
237 	return (1);
238 }
239 #endif
240 
241 /*
242  * Is a compiled pattern null?
243  */
244 	public int
245 is_null_pattern(pattern)
246 	PATTERN_TYPE pattern;
247 {
248 #if HAVE_GNU_REGEX
249 	return (pattern == NULL);
250 #endif
251 #if HAVE_POSIX_REGCOMP
252 	return (pattern == NULL);
253 #endif
254 #if HAVE_PCRE
255 	return (pattern == NULL);
256 #endif
257 #if HAVE_PCRE2
258 	return (pattern == NULL);
259 #endif
260 #if HAVE_RE_COMP
261 	return (pattern == 0);
262 #endif
263 #if HAVE_REGCMP
264 	return (pattern == NULL);
265 #endif
266 #if HAVE_V8_REGCOMP
267 	return (pattern == NULL);
268 #endif
269 #if NO_REGEX
270 	return (pattern == NULL);
271 #endif
272 }
273 
274 /*
275  * Simple pattern matching function.
276  * It supports no metacharacters like *, etc.
277  */
278 	static int
279 match(pattern, pattern_len, buf, buf_len, pfound, pend)
280 	char *pattern;
281 	int pattern_len;
282 	char *buf;
283 	int buf_len;
284 	char **pfound, **pend;
285 {
286 	char *pp, *lp;
287 	char *pattern_end = pattern + pattern_len;
288 	char *buf_end = buf + buf_len;
289 
290 	for ( ;  buf < buf_end;  buf++)
291 	{
292 		for (pp = pattern, lp = buf;  ;  pp++, lp++)
293 		{
294 			char cp = *pp;
295 			char cl = *lp;
296 			if (caseless == OPT_ONPLUS && ASCII_IS_UPPER(cp))
297 				cp = ASCII_TO_LOWER(cp);
298 			if (cp != cl)
299 				break;
300 			if (pp == pattern_end || lp == buf_end)
301 				break;
302 		}
303 		if (pp == pattern_end)
304 		{
305 			if (pfound != NULL)
306 				*pfound = buf;
307 			if (pend != NULL)
308 				*pend = lp;
309 			return (1);
310 		}
311 	}
312 	return (0);
313 }
314 
315 /*
316  * Perform a pattern match with the previously compiled pattern.
317  * Set sp and ep to the start and end of the matched string.
318  */
319 	public int
320 match_pattern(pattern, tpattern, line, line_len, sp, ep, notbol, search_type)
321 	PATTERN_TYPE pattern;
322 	char *tpattern;
323 	char *line;
324 	int line_len;
325 	char **sp;
326 	char **ep;
327 	int notbol;
328 	int search_type;
329 {
330 	int matched;
331 
332 	*sp = *ep = NULL;
333 #if NO_REGEX
334 	search_type |= SRCH_NO_REGEX;
335 #endif
336 	if (search_type & SRCH_NO_REGEX)
337 		matched = match(tpattern, strlen(tpattern), line, line_len, sp, ep);
338 	else
339 	{
340 #if HAVE_GNU_REGEX
341 	{
342 		struct re_registers search_regs;
343 		pattern->not_bol = notbol;
344 		pattern->regs_allocated = REGS_UNALLOCATED;
345 		matched = re_search(pattern, line, line_len, 0, line_len, &search_regs) >= 0;
346 		if (matched)
347 		{
348 			*sp = line + search_regs.start[0];
349 			*ep = line + search_regs.end[0];
350 		}
351 	}
352 #endif
353 #if HAVE_POSIX_REGCOMP
354 	{
355 		regmatch_t rm;
356 		int flags = (notbol) ? REG_NOTBOL : 0;
357 #ifdef REG_STARTEND
358 		flags |= REG_STARTEND;
359 		rm.rm_so = 0;
360 		rm.rm_eo = line_len;
361 #endif
362 		matched = !regexec(pattern, line, 1, &rm, flags);
363 		if (matched)
364 		{
365 #ifndef __WATCOMC__
366 			*sp = line + rm.rm_so;
367 			*ep = line + rm.rm_eo;
368 #else
369 			*sp = rm.rm_sp;
370 			*ep = rm.rm_ep;
371 #endif
372 		}
373 	}
374 #endif
375 #if HAVE_PCRE
376 	{
377 		int flags = (notbol) ? PCRE_NOTBOL : 0;
378 		int ovector[3];
379 		matched = pcre_exec(pattern, NULL, line, line_len,
380 			0, flags, ovector, 3) >= 0;
381 		if (matched)
382 		{
383 			*sp = line + ovector[0];
384 			*ep = line + ovector[1];
385 		}
386 	}
387 #endif
388 #if HAVE_PCRE2
389 	{
390 		int flags = (notbol) ? PCRE2_NOTBOL : 0;
391 		pcre2_match_data *md = pcre2_match_data_create(3, NULL);
392 		matched = pcre2_match(pattern, (PCRE2_SPTR)line, line_len,
393 			0, flags, md, NULL) >= 0;
394 		if (matched)
395 		{
396 			PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md);
397 			*sp = line + ovector[0];
398 			*ep = line + ovector[1];
399 		}
400 		pcre2_match_data_free(md);
401 	}
402 #endif
403 #if HAVE_RE_COMP
404 	matched = (re_exec(line) == 1);
405 	/*
406 	 * re_exec doesn't seem to provide a way to get the matched string.
407 	 */
408 	*sp = *ep = NULL;
409 #endif
410 #if HAVE_REGCMP
411 	*ep = regex(pattern, line);
412 	matched = (*ep != NULL);
413 	if (matched)
414 		*sp = __loc1;
415 #endif
416 #if HAVE_V8_REGCOMP
417 #if HAVE_REGEXEC2
418 	matched = regexec2(pattern, line, notbol);
419 #else
420 	matched = regexec(pattern, line);
421 #endif
422 	if (matched)
423 	{
424 		*sp = pattern->startp[0];
425 		*ep = pattern->endp[0];
426 	}
427 #endif
428 	}
429 	matched = (!(search_type & SRCH_NO_MATCH) && matched) ||
430 			((search_type & SRCH_NO_MATCH) && !matched);
431 	return (matched);
432 }
433 
434 /*
435  * Return the name of the pattern matching library.
436  */
437 	public char *
438 pattern_lib_name(VOID_PARAM)
439 {
440 #if HAVE_GNU_REGEX
441 	return ("GNU");
442 #else
443 #if HAVE_POSIX_REGCOMP
444 	return ("POSIX");
445 #else
446 #if HAVE_PCRE2
447 	return ("PCRE2");
448 #else
449 #if HAVE_PCRE
450 	return ("PCRE");
451 #else
452 #if HAVE_RE_COMP
453 	return ("BSD");
454 #else
455 #if HAVE_REGCMP
456 	return ("V8");
457 #else
458 #if HAVE_V8_REGCOMP
459 	return ("Spencer V8");
460 #else
461 	return ("no");
462 #endif
463 #endif
464 #endif
465 #endif
466 #endif
467 #endif
468 #endif
469 }
470