xref: /freebsd/contrib/less/pattern.c (revision 6829dae12bb055451fa467da4589c43bd03b1e64)
1 /*
2  * Copyright (C) 1984-2017  Mark Nudelman
3  *
4  * You may distribute under the terms of either the GNU General Public
5  * License or the Less License, as specified in the README file.
6  *
7  * For more information, see the README file.
8  */
9 
10 /*
11  * Routines to do pattern matching.
12  */
13 
14 #include "less.h"
15 
16 extern int caseless;
17 
18 /*
19  * Compile a search pattern, for future use by match_pattern.
20  */
21 	static int
22 compile_pattern2(pattern, search_type, comp_pattern, show_error)
23 	char *pattern;
24 	int search_type;
25 	PATTERN_TYPE *comp_pattern;
26 	int show_error;
27 {
28 	if (search_type & SRCH_NO_REGEX)
29 		return (0);
30   {
31 #if HAVE_GNU_REGEX
32 	struct re_pattern_buffer *comp = (struct re_pattern_buffer *)
33 		ecalloc(1, sizeof(struct re_pattern_buffer));
34 	re_set_syntax(RE_SYNTAX_POSIX_EXTENDED);
35 	if (re_compile_pattern(pattern, strlen(pattern), comp))
36 	{
37 		free(comp);
38 		if (show_error)
39 			error("Invalid pattern", NULL_PARG);
40 		return (-1);
41 	}
42 	if (*comp_pattern != NULL)
43 	{
44 		regfree(*comp_pattern);
45 		free(*comp_pattern);
46 	}
47 	*comp_pattern = comp;
48 #endif
49 #if HAVE_POSIX_REGCOMP
50 	regex_t *comp = (regex_t *) ecalloc(1, sizeof(regex_t));
51 	if (regcomp(comp, pattern, REGCOMP_FLAG))
52 	{
53 		free(comp);
54 		if (show_error)
55 			error("Invalid pattern", NULL_PARG);
56 		return (-1);
57 	}
58 	if (*comp_pattern != NULL)
59 	{
60 		regfree(*comp_pattern);
61 		free(*comp_pattern);
62 	}
63 	*comp_pattern = comp;
64 #endif
65 #if HAVE_PCRE
66 	pcre *comp;
67 	constant char *errstring;
68 	int erroffset;
69 	PARG parg;
70 	comp = pcre_compile(pattern, 0,
71 			&errstring, &erroffset, NULL);
72 	if (comp == NULL)
73 	{
74 		parg.p_string = (char *) errstring;
75 		if (show_error)
76 			error("%s", &parg);
77 		return (-1);
78 	}
79 	*comp_pattern = comp;
80 #endif
81 #if HAVE_RE_COMP
82 	PARG parg;
83 	if ((parg.p_string = re_comp(pattern)) != NULL)
84 	{
85 		if (show_error)
86 			error("%s", &parg);
87 		return (-1);
88 	}
89 	*comp_pattern = 1;
90 #endif
91 #if HAVE_REGCMP
92 	char *comp;
93 	if ((comp = regcmp(pattern, 0)) == NULL)
94 	{
95 		if (show_error)
96 			error("Invalid pattern", NULL_PARG);
97 		return (-1);
98 	}
99 	if (comp_pattern != NULL)
100 		free(*comp_pattern);
101 	*comp_pattern = comp;
102 #endif
103 #if HAVE_V8_REGCOMP
104 	struct regexp *comp;
105 	reg_show_error = show_error;
106 	comp = regcomp(pattern);
107 	reg_show_error = 1;
108 	if (comp == NULL)
109 	{
110 		/*
111 		 * regcomp has already printed an error message
112 		 * via regerror().
113 		 */
114 		return (-1);
115 	}
116 	if (*comp_pattern != NULL)
117 		free(*comp_pattern);
118 	*comp_pattern = comp;
119 #endif
120   }
121 	return (0);
122 }
123 
124 /*
125  * Like compile_pattern2, but convert the pattern to lowercase if necessary.
126  */
127 	public int
128 compile_pattern(pattern, search_type, comp_pattern)
129 	char *pattern;
130 	int search_type;
131 	PATTERN_TYPE *comp_pattern;
132 {
133 	char *cvt_pattern;
134 	int result;
135 
136 	if (caseless != OPT_ONPLUS)
137 		cvt_pattern = pattern;
138 	else
139 	{
140 		cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC));
141 		cvt_text(cvt_pattern, pattern, (int *)NULL, (int *)NULL, CVT_TO_LC);
142 	}
143 	result = compile_pattern2(cvt_pattern, search_type, comp_pattern, 1);
144 	if (cvt_pattern != pattern)
145 		free(cvt_pattern);
146 	return (result);
147 }
148 
149 /*
150  * Forget that we have a compiled pattern.
151  */
152 	public void
153 uncompile_pattern(pattern)
154 	PATTERN_TYPE *pattern;
155 {
156 #if HAVE_GNU_REGEX
157 	if (*pattern != NULL)
158 	{
159 		regfree(*pattern);
160 		free(*pattern);
161 	}
162 	*pattern = NULL;
163 #endif
164 #if HAVE_POSIX_REGCOMP
165 	if (*pattern != NULL)
166 	{
167 		regfree(*pattern);
168 		free(*pattern);
169 	}
170 	*pattern = NULL;
171 #endif
172 #if HAVE_PCRE
173 	if (*pattern != NULL)
174 		pcre_free(*pattern);
175 	*pattern = NULL;
176 #endif
177 #if HAVE_RE_COMP
178 	*pattern = 0;
179 #endif
180 #if HAVE_REGCMP
181 	if (*pattern != NULL)
182 		free(*pattern);
183 	*pattern = NULL;
184 #endif
185 #if HAVE_V8_REGCOMP
186 	if (*pattern != NULL)
187 		free(*pattern);
188 	*pattern = NULL;
189 #endif
190 }
191 
192 /*
193  * Can a pattern be successfully compiled?
194  */
195 	public int
196 valid_pattern(pattern)
197 	char *pattern;
198 {
199 	PATTERN_TYPE comp_pattern;
200 	int result;
201 
202 	CLEAR_PATTERN(comp_pattern);
203 	result = compile_pattern2(pattern, 0, &comp_pattern, 0);
204 	if (result != 0)
205 		return (0);
206 	uncompile_pattern(&comp_pattern);
207 	return (1);
208 }
209 
210 /*
211  * Is a compiled pattern null?
212  */
213 	public int
214 is_null_pattern(pattern)
215 	PATTERN_TYPE pattern;
216 {
217 #if HAVE_GNU_REGEX
218 	return (pattern == NULL);
219 #endif
220 #if HAVE_POSIX_REGCOMP
221 	return (pattern == NULL);
222 #endif
223 #if HAVE_PCRE
224 	return (pattern == NULL);
225 #endif
226 #if HAVE_RE_COMP
227 	return (pattern == 0);
228 #endif
229 #if HAVE_REGCMP
230 	return (pattern == NULL);
231 #endif
232 #if HAVE_V8_REGCOMP
233 	return (pattern == NULL);
234 #endif
235 #if NO_REGEX
236 	return (pattern == NULL);
237 #endif
238 }
239 
240 /*
241  * Simple pattern matching function.
242  * It supports no metacharacters like *, etc.
243  */
244 	static int
245 match(pattern, pattern_len, buf, buf_len, pfound, pend)
246 	char *pattern;
247 	int pattern_len;
248 	char *buf;
249 	int buf_len;
250 	char **pfound, **pend;
251 {
252 	char *pp, *lp;
253 	char *pattern_end = pattern + pattern_len;
254 	char *buf_end = buf + buf_len;
255 
256 	for ( ;  buf < buf_end;  buf++)
257 	{
258 		for (pp = pattern, lp = buf;  ;  pp++, lp++)
259 		{
260 			char cp = *pp;
261 			char cl = *lp;
262 			if (caseless == OPT_ONPLUS && ASCII_IS_UPPER(cp))
263 				cp = ASCII_TO_LOWER(cp);
264 			if (cp != cl)
265 				break;
266 			if (pp == pattern_end || lp == buf_end)
267 				break;
268 		}
269 		if (pp == pattern_end)
270 		{
271 			if (pfound != NULL)
272 				*pfound = buf;
273 			if (pend != NULL)
274 				*pend = lp;
275 			return (1);
276 		}
277 	}
278 	return (0);
279 }
280 
281 /*
282  * Perform a pattern match with the previously compiled pattern.
283  * Set sp and ep to the start and end of the matched string.
284  */
285 	public int
286 match_pattern(pattern, tpattern, line, line_len, sp, ep, notbol, search_type)
287 	PATTERN_TYPE pattern;
288 	char *tpattern;
289 	char *line;
290 	int line_len;
291 	char **sp;
292 	char **ep;
293 	int notbol;
294 	int search_type;
295 {
296 	int matched;
297 
298 	*sp = *ep = NULL;
299 #if NO_REGEX
300 	search_type |= SRCH_NO_REGEX;
301 #endif
302 	if (search_type & SRCH_NO_REGEX)
303 		matched = match(tpattern, strlen(tpattern), line, line_len, sp, ep);
304 	else
305 	{
306 #if HAVE_GNU_REGEX
307 	{
308 		struct re_registers search_regs;
309 		pattern->not_bol = notbol;
310 		pattern->regs_allocated = REGS_UNALLOCATED;
311 		matched = re_search(pattern, line, line_len, 0, line_len, &search_regs) >= 0;
312 		if (matched)
313 		{
314 			*sp = line + search_regs.start[0];
315 			*ep = line + search_regs.end[0];
316 		}
317 	}
318 #endif
319 #if HAVE_POSIX_REGCOMP
320 	{
321 		regmatch_t rm;
322 		int flags = (notbol) ? REG_NOTBOL : 0;
323 #ifdef REG_STARTEND
324 		flags |= REG_STARTEND;
325 		rm.rm_so = 0;
326 		rm.rm_eo = line_len;
327 #endif
328 		matched = !regexec(pattern, line, 1, &rm, flags);
329 		if (matched)
330 		{
331 #ifndef __WATCOMC__
332 			*sp = line + rm.rm_so;
333 			*ep = line + rm.rm_eo;
334 #else
335 			*sp = rm.rm_sp;
336 			*ep = rm.rm_ep;
337 #endif
338 		}
339 	}
340 #endif
341 #if HAVE_PCRE
342 	{
343 		int flags = (notbol) ? PCRE_NOTBOL : 0;
344 		int ovector[3];
345 		matched = pcre_exec(pattern, NULL, line, line_len,
346 			0, flags, ovector, 3) >= 0;
347 		if (matched)
348 		{
349 			*sp = line + ovector[0];
350 			*ep = line + ovector[1];
351 		}
352 	}
353 #endif
354 #if HAVE_RE_COMP
355 	matched = (re_exec(line) == 1);
356 	/*
357 	 * re_exec doesn't seem to provide a way to get the matched string.
358 	 */
359 	*sp = *ep = NULL;
360 #endif
361 #if HAVE_REGCMP
362 	*ep = regex(pattern, line);
363 	matched = (*ep != NULL);
364 	if (matched)
365 		*sp = __loc1;
366 #endif
367 #if HAVE_V8_REGCOMP
368 #if HAVE_REGEXEC2
369 	matched = regexec2(pattern, line, notbol);
370 #else
371 	matched = regexec(pattern, line);
372 #endif
373 	if (matched)
374 	{
375 		*sp = pattern->startp[0];
376 		*ep = pattern->endp[0];
377 	}
378 #endif
379 	}
380 	matched = (!(search_type & SRCH_NO_MATCH) && matched) ||
381 			((search_type & SRCH_NO_MATCH) && !matched);
382 	return (matched);
383 }
384 
385