xref: /freebsd/contrib/less/pattern.c (revision e2eeea75eb8b6dd50c1298067a0655880d186734)
1 /*
2  * Copyright (C) 1984-2020  Mark Nudelman
3  *
4  * You may distribute under the terms of either the GNU General Public
5  * License or the Less License, as specified in the README file.
6  *
7  * For more information, see the README file.
8  */
9 
10 /*
11  * Routines to do pattern matching.
12  */
13 
14 #include "less.h"
15 
16 extern int caseless;
17 extern int utf_mode;
18 
19 /*
20  * Compile a search pattern, for future use by match_pattern.
21  */
22 	static int
23 compile_pattern2(pattern, search_type, comp_pattern, show_error)
24 	char *pattern;
25 	int search_type;
26 	PATTERN_TYPE *comp_pattern;
27 	int show_error;
28 {
29 	if (search_type & SRCH_NO_REGEX)
30 		return (0);
31   {
32 #if HAVE_GNU_REGEX
33 	struct re_pattern_buffer *comp = (struct re_pattern_buffer *)
34 		ecalloc(1, sizeof(struct re_pattern_buffer));
35 	re_set_syntax(RE_SYNTAX_POSIX_EXTENDED);
36 	if (re_compile_pattern(pattern, strlen(pattern), comp))
37 	{
38 		free(comp);
39 		if (show_error)
40 			error("Invalid pattern", NULL_PARG);
41 		return (-1);
42 	}
43 	if (*comp_pattern != NULL)
44 	{
45 		regfree(*comp_pattern);
46 		free(*comp_pattern);
47 	}
48 	*comp_pattern = comp;
49 #endif
50 #if HAVE_POSIX_REGCOMP
51 	regex_t *comp = (regex_t *) ecalloc(1, sizeof(regex_t));
52 	if (regcomp(comp, pattern, REGCOMP_FLAG))
53 	{
54 		free(comp);
55 		if (show_error)
56 			error("Invalid pattern", NULL_PARG);
57 		return (-1);
58 	}
59 	if (*comp_pattern != NULL)
60 	{
61 		regfree(*comp_pattern);
62 		free(*comp_pattern);
63 	}
64 	*comp_pattern = comp;
65 #endif
66 #if HAVE_PCRE
67 	constant char *errstring;
68 	int erroffset;
69 	PARG parg;
70 	pcre *comp = pcre_compile(pattern,
71 			(utf_mode) ? PCRE_UTF8 | PCRE_NO_UTF8_CHECK : 0,
72 			&errstring, &erroffset, NULL);
73 	if (comp == NULL)
74 	{
75 		parg.p_string = (char *) errstring;
76 		if (show_error)
77 			error("%s", &parg);
78 		return (-1);
79 	}
80 	*comp_pattern = comp;
81 #endif
82 #if HAVE_PCRE2
83 	int errcode;
84 	PCRE2_SIZE erroffset;
85 	PARG parg;
86 	pcre2_code *comp = pcre2_compile((PCRE2_SPTR)pattern, strlen(pattern),
87 			0, &errcode, &erroffset, NULL);
88 	if (comp == NULL)
89 	{
90 		if (show_error)
91 		{
92 			char msg[160];
93 			pcre2_get_error_message(errcode, (PCRE2_UCHAR*)msg, sizeof(msg));
94 			parg.p_string = msg;
95 			error("%s", &parg);
96 		}
97 		return (-1);
98 	}
99 	*comp_pattern = comp;
100 #endif
101 #if HAVE_RE_COMP
102 	PARG parg;
103 	if ((parg.p_string = re_comp(pattern)) != NULL)
104 	{
105 		if (show_error)
106 			error("%s", &parg);
107 		return (-1);
108 	}
109 	*comp_pattern = 1;
110 #endif
111 #if HAVE_REGCMP
112 	char *comp;
113 	if ((comp = regcmp(pattern, 0)) == NULL)
114 	{
115 		if (show_error)
116 			error("Invalid pattern", NULL_PARG);
117 		return (-1);
118 	}
119 	if (comp_pattern != NULL)
120 		free(*comp_pattern);
121 	*comp_pattern = comp;
122 #endif
123 #if HAVE_V8_REGCOMP
124 	struct regexp *comp;
125 	reg_show_error = show_error;
126 	comp = regcomp(pattern);
127 	reg_show_error = 1;
128 	if (comp == NULL)
129 	{
130 		/*
131 		 * regcomp has already printed an error message
132 		 * via regerror().
133 		 */
134 		return (-1);
135 	}
136 	if (*comp_pattern != NULL)
137 		free(*comp_pattern);
138 	*comp_pattern = comp;
139 #endif
140   }
141 	return (0);
142 }
143 
144 /*
145  * Like compile_pattern2, but convert the pattern to lowercase if necessary.
146  */
147 	public int
148 compile_pattern(pattern, search_type, comp_pattern)
149 	char *pattern;
150 	int search_type;
151 	PATTERN_TYPE *comp_pattern;
152 {
153 	char *cvt_pattern;
154 	int result;
155 
156 	if (caseless != OPT_ONPLUS)
157 		cvt_pattern = pattern;
158 	else
159 	{
160 		cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC));
161 		cvt_text(cvt_pattern, pattern, (int *)NULL, (int *)NULL, CVT_TO_LC);
162 	}
163 	result = compile_pattern2(cvt_pattern, search_type, comp_pattern, 1);
164 	if (cvt_pattern != pattern)
165 		free(cvt_pattern);
166 	return (result);
167 }
168 
169 /*
170  * Forget that we have a compiled pattern.
171  */
172 	public void
173 uncompile_pattern(pattern)
174 	PATTERN_TYPE *pattern;
175 {
176 #if HAVE_GNU_REGEX
177 	if (*pattern != NULL)
178 	{
179 		regfree(*pattern);
180 		free(*pattern);
181 	}
182 	*pattern = NULL;
183 #endif
184 #if HAVE_POSIX_REGCOMP
185 	if (*pattern != NULL)
186 	{
187 		regfree(*pattern);
188 		free(*pattern);
189 	}
190 	*pattern = NULL;
191 #endif
192 #if HAVE_PCRE
193 	if (*pattern != NULL)
194 		pcre_free(*pattern);
195 	*pattern = NULL;
196 #endif
197 #if HAVE_PCRE2
198 	if (*pattern != NULL)
199 		pcre2_code_free(*pattern);
200 	*pattern = NULL;
201 #endif
202 #if HAVE_RE_COMP
203 	*pattern = 0;
204 #endif
205 #if HAVE_REGCMP
206 	if (*pattern != NULL)
207 		free(*pattern);
208 	*pattern = NULL;
209 #endif
210 #if HAVE_V8_REGCOMP
211 	if (*pattern != NULL)
212 		free(*pattern);
213 	*pattern = NULL;
214 #endif
215 }
216 
217 /*
218  * Can a pattern be successfully compiled?
219  */
220 	public int
221 valid_pattern(pattern)
222 	char *pattern;
223 {
224 	PATTERN_TYPE comp_pattern;
225 	int result;
226 
227 	CLEAR_PATTERN(comp_pattern);
228 	result = compile_pattern2(pattern, 0, &comp_pattern, 0);
229 	if (result != 0)
230 		return (0);
231 	uncompile_pattern(&comp_pattern);
232 	return (1);
233 }
234 
235 /*
236  * Is a compiled pattern null?
237  */
238 	public int
239 is_null_pattern(pattern)
240 	PATTERN_TYPE pattern;
241 {
242 #if HAVE_GNU_REGEX
243 	return (pattern == NULL);
244 #endif
245 #if HAVE_POSIX_REGCOMP
246 	return (pattern == NULL);
247 #endif
248 #if HAVE_PCRE
249 	return (pattern == NULL);
250 #endif
251 #if HAVE_PCRE2
252 	return (pattern == NULL);
253 #endif
254 #if HAVE_RE_COMP
255 	return (pattern == 0);
256 #endif
257 #if HAVE_REGCMP
258 	return (pattern == NULL);
259 #endif
260 #if HAVE_V8_REGCOMP
261 	return (pattern == NULL);
262 #endif
263 #if NO_REGEX
264 	return (pattern == NULL);
265 #endif
266 }
267 
268 /*
269  * Simple pattern matching function.
270  * It supports no metacharacters like *, etc.
271  */
272 	static int
273 match(pattern, pattern_len, buf, buf_len, pfound, pend)
274 	char *pattern;
275 	int pattern_len;
276 	char *buf;
277 	int buf_len;
278 	char **pfound, **pend;
279 {
280 	char *pp, *lp;
281 	char *pattern_end = pattern + pattern_len;
282 	char *buf_end = buf + buf_len;
283 
284 	for ( ;  buf < buf_end;  buf++)
285 	{
286 		for (pp = pattern, lp = buf;  ;  pp++, lp++)
287 		{
288 			char cp = *pp;
289 			char cl = *lp;
290 			if (caseless == OPT_ONPLUS && ASCII_IS_UPPER(cp))
291 				cp = ASCII_TO_LOWER(cp);
292 			if (cp != cl)
293 				break;
294 			if (pp == pattern_end || lp == buf_end)
295 				break;
296 		}
297 		if (pp == pattern_end)
298 		{
299 			if (pfound != NULL)
300 				*pfound = buf;
301 			if (pend != NULL)
302 				*pend = lp;
303 			return (1);
304 		}
305 	}
306 	return (0);
307 }
308 
309 /*
310  * Perform a pattern match with the previously compiled pattern.
311  * Set sp and ep to the start and end of the matched string.
312  */
313 	public int
314 match_pattern(pattern, tpattern, line, line_len, sp, ep, notbol, search_type)
315 	PATTERN_TYPE pattern;
316 	char *tpattern;
317 	char *line;
318 	int line_len;
319 	char **sp;
320 	char **ep;
321 	int notbol;
322 	int search_type;
323 {
324 	int matched;
325 
326 	*sp = *ep = NULL;
327 #if NO_REGEX
328 	search_type |= SRCH_NO_REGEX;
329 #endif
330 	if (search_type & SRCH_NO_REGEX)
331 		matched = match(tpattern, strlen(tpattern), line, line_len, sp, ep);
332 	else
333 	{
334 #if HAVE_GNU_REGEX
335 	{
336 		struct re_registers search_regs;
337 		pattern->not_bol = notbol;
338 		pattern->regs_allocated = REGS_UNALLOCATED;
339 		matched = re_search(pattern, line, line_len, 0, line_len, &search_regs) >= 0;
340 		if (matched)
341 		{
342 			*sp = line + search_regs.start[0];
343 			*ep = line + search_regs.end[0];
344 		}
345 	}
346 #endif
347 #if HAVE_POSIX_REGCOMP
348 	{
349 		regmatch_t rm;
350 		int flags = (notbol) ? REG_NOTBOL : 0;
351 #ifdef REG_STARTEND
352 		flags |= REG_STARTEND;
353 		rm.rm_so = 0;
354 		rm.rm_eo = line_len;
355 #endif
356 		matched = !regexec(pattern, line, 1, &rm, flags);
357 		if (matched)
358 		{
359 #ifndef __WATCOMC__
360 			*sp = line + rm.rm_so;
361 			*ep = line + rm.rm_eo;
362 #else
363 			*sp = rm.rm_sp;
364 			*ep = rm.rm_ep;
365 #endif
366 		}
367 	}
368 #endif
369 #if HAVE_PCRE
370 	{
371 		int flags = (notbol) ? PCRE_NOTBOL : 0;
372 		int ovector[3];
373 		matched = pcre_exec(pattern, NULL, line, line_len,
374 			0, flags, ovector, 3) >= 0;
375 		if (matched)
376 		{
377 			*sp = line + ovector[0];
378 			*ep = line + ovector[1];
379 		}
380 	}
381 #endif
382 #if HAVE_PCRE2
383 	{
384 		int flags = (notbol) ? PCRE2_NOTBOL : 0;
385 		pcre2_match_data *md = pcre2_match_data_create(3, NULL);
386 		matched = pcre2_match(pattern, (PCRE2_SPTR)line, line_len,
387 			0, flags, md, NULL) >= 0;
388 		if (matched)
389 		{
390 			PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md);
391 			*sp = line + ovector[0];
392 			*ep = line + ovector[1];
393 		}
394 		pcre2_match_data_free(md);
395 	}
396 #endif
397 #if HAVE_RE_COMP
398 	matched = (re_exec(line) == 1);
399 	/*
400 	 * re_exec doesn't seem to provide a way to get the matched string.
401 	 */
402 	*sp = *ep = NULL;
403 #endif
404 #if HAVE_REGCMP
405 	*ep = regex(pattern, line);
406 	matched = (*ep != NULL);
407 	if (matched)
408 		*sp = __loc1;
409 #endif
410 #if HAVE_V8_REGCOMP
411 #if HAVE_REGEXEC2
412 	matched = regexec2(pattern, line, notbol);
413 #else
414 	matched = regexec(pattern, line);
415 #endif
416 	if (matched)
417 	{
418 		*sp = pattern->startp[0];
419 		*ep = pattern->endp[0];
420 	}
421 #endif
422 	}
423 	matched = (!(search_type & SRCH_NO_MATCH) && matched) ||
424 			((search_type & SRCH_NO_MATCH) && !matched);
425 	return (matched);
426 }
427 
428 /*
429  * Return the name of the pattern matching library.
430  */
431 	public char *
432 pattern_lib_name(VOID_PARAM)
433 {
434 #if HAVE_GNU_REGEX
435 	return ("GNU");
436 #else
437 #if HAVE_POSIX_REGCOMP
438 	return ("POSIX");
439 #else
440 #if HAVE_PCRE2
441 	return ("PCRE2");
442 #else
443 #if HAVE_PCRE
444 	return ("PCRE");
445 #else
446 #if HAVE_RE_COMP
447 	return ("BSD");
448 #else
449 #if HAVE_REGCMP
450 	return ("V8");
451 #else
452 #if HAVE_V8_REGCOMP
453 	return ("Spencer V8");
454 #else
455 	return ("no");
456 #endif
457 #endif
458 #endif
459 #endif
460 #endif
461 #endif
462 #endif
463 }
464