xref: /freebsd/contrib/less/pattern.c (revision 0b3105a37d7adcadcb720112fed4dc4e8040be99)
1 /*
2  * Copyright (C) 1984-2015  Mark Nudelman
3  *
4  * You may distribute under the terms of either the GNU General Public
5  * License or the Less License, as specified in the README file.
6  *
7  * For more information, see the README file.
8  */
9 
10 /*
11  * Routines to do pattern matching.
12  */
13 
14 #include "less.h"
15 #include "pattern.h"
16 
17 extern int caseless;
18 
19 /*
20  * Compile a search pattern, for future use by match_pattern.
21  */
22 	static int
23 compile_pattern2(pattern, search_type, comp_pattern, show_error)
24 	char *pattern;
25 	int search_type;
26 	void **comp_pattern;
27 	int show_error;
28 {
29 	if (search_type & SRCH_NO_REGEX)
30 		return (0);
31   {
32 #if HAVE_GNU_REGEX
33 	struct re_pattern_buffer *comp = (struct re_pattern_buffer *)
34 		ecalloc(1, sizeof(struct re_pattern_buffer));
35 	struct re_pattern_buffer **pcomp =
36 		(struct re_pattern_buffer **) comp_pattern;
37 	re_set_syntax(RE_SYNTAX_POSIX_EXTENDED);
38 	if (re_compile_pattern(pattern, strlen(pattern), comp))
39 	{
40 		free(comp);
41 		if (show_error)
42 			error("Invalid pattern", NULL_PARG);
43 		return (-1);
44 	}
45 	if (*pcomp != NULL)
46 		regfree(*pcomp);
47 	*pcomp = comp;
48 #endif
49 #if HAVE_POSIX_REGCOMP
50 	regex_t *comp = (regex_t *) ecalloc(1, sizeof(regex_t));
51 	regex_t **pcomp = (regex_t **) comp_pattern;
52 	if (regcomp(comp, pattern, REGCOMP_FLAG))
53 	{
54 		free(comp);
55 		if (show_error)
56 			error("Invalid pattern", NULL_PARG);
57 		return (-1);
58 	}
59 	if (*pcomp != NULL)
60 		regfree(*pcomp);
61 	*pcomp = comp;
62 #endif
63 #if HAVE_PCRE
64 	pcre *comp;
65 	pcre **pcomp = (pcre **) comp_pattern;
66 	constant char *errstring;
67 	int erroffset;
68 	PARG parg;
69 	comp = pcre_compile(pattern, 0,
70 			&errstring, &erroffset, NULL);
71 	if (comp == NULL)
72 	{
73 		parg.p_string = (char *) errstring;
74 		if (show_error)
75 			error("%s", &parg);
76 		return (-1);
77 	}
78 	*pcomp = comp;
79 #endif
80 #if HAVE_RE_COMP
81 	PARG parg;
82 	int *pcomp = (int *) comp_pattern;
83 	if ((parg.p_string = re_comp(pattern)) != NULL)
84 	{
85 		if (show_error)
86 			error("%s", &parg);
87 		return (-1);
88 	}
89 	*pcomp = 1;
90 #endif
91 #if HAVE_REGCMP
92 	char *comp;
93 	char **pcomp = (char **) comp_pattern;
94 	if ((comp = regcmp(pattern, 0)) == NULL)
95 	{
96 		if (show_error)
97 			error("Invalid pattern", NULL_PARG);
98 		return (-1);
99 	}
100 	if (pcomp != NULL)
101 		free(*pcomp);
102 	*pcomp = comp;
103 #endif
104 #if HAVE_V8_REGCOMP
105 	struct regexp *comp;
106 	struct regexp **pcomp = (struct regexp **) comp_pattern;
107 	reg_show_error = show_error;
108 	comp = regcomp(pattern);
109 	reg_show_error = 1;
110 	if (comp == NULL)
111 	{
112 		/*
113 		 * regcomp has already printed an error message
114 		 * via regerror().
115 		 */
116 		return (-1);
117 	}
118 	if (*pcomp != NULL)
119 		free(*pcomp);
120 	*pcomp = comp;
121 #endif
122   }
123 	return (0);
124 }
125 
126 /*
127  * Like compile_pattern2, but convert the pattern to lowercase if necessary.
128  */
129 	public int
130 compile_pattern(pattern, search_type, comp_pattern)
131 	char *pattern;
132 	int search_type;
133 	void **comp_pattern;
134 {
135 	char *cvt_pattern;
136 	int result;
137 
138 	if (caseless != OPT_ONPLUS)
139 		cvt_pattern = pattern;
140 	else
141 	{
142 		cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC));
143 		cvt_text(cvt_pattern, pattern, (int *)NULL, (int *)NULL, CVT_TO_LC);
144 	}
145 	result = compile_pattern2(cvt_pattern, search_type, comp_pattern, 1);
146 	if (cvt_pattern != pattern)
147 		free(cvt_pattern);
148 	return (result);
149 }
150 
151 /*
152  * Forget that we have a compiled pattern.
153  */
154 	public void
155 uncompile_pattern(pattern)
156 	void **pattern;
157 {
158 #if HAVE_GNU_REGEX
159 	struct re_pattern_buffer **pcomp = (struct re_pattern_buffer **) pattern;
160 	if (*pcomp != NULL)
161 		regfree(*pcomp);
162 	*pcomp = NULL;
163 #endif
164 #if HAVE_POSIX_REGCOMP
165 	regex_t **pcomp = (regex_t **) pattern;
166 	if (*pcomp != NULL)
167 		regfree(*pcomp);
168 	*pcomp = NULL;
169 #endif
170 #if HAVE_PCRE
171 	pcre **pcomp = (pcre **) pattern;
172 	if (*pcomp != NULL)
173 		pcre_free(*pcomp);
174 	*pcomp = NULL;
175 #endif
176 #if HAVE_RE_COMP
177 	int *pcomp = (int *) pattern;
178 	*pcomp = 0;
179 #endif
180 #if HAVE_REGCMP
181 	char **pcomp = (char **) pattern;
182 	if (*pcomp != NULL)
183 		free(*pcomp);
184 	*pcomp = NULL;
185 #endif
186 #if HAVE_V8_REGCOMP
187 	struct regexp **pcomp = (struct regexp **) pattern;
188 	if (*pcomp != NULL)
189 		free(*pcomp);
190 	*pcomp = NULL;
191 #endif
192 }
193 
194 /*
195  * Can a pattern be successfully compiled?
196  */
197 	public int
198 valid_pattern(pattern)
199 	char *pattern;
200 {
201 	void *comp_pattern;
202 	int result;
203 
204 	CLEAR_PATTERN(comp_pattern);
205 	result = compile_pattern2(pattern, 0, &comp_pattern, 0);
206 	if (result != 0)
207 		return (0);
208 	uncompile_pattern(&comp_pattern);
209 	return (1);
210 }
211 
212 /*
213  * Is a compiled pattern null?
214  */
215 	public int
216 is_null_pattern(pattern)
217 	void *pattern;
218 {
219 #if HAVE_GNU_REGEX
220 	return (pattern == NULL);
221 #endif
222 #if HAVE_POSIX_REGCOMP
223 	return (pattern == NULL);
224 #endif
225 #if HAVE_PCRE
226 	return (pattern == NULL);
227 #endif
228 #if HAVE_RE_COMP
229 	return (pattern == 0);
230 #endif
231 #if HAVE_REGCMP
232 	return (pattern == NULL);
233 #endif
234 #if HAVE_V8_REGCOMP
235 	return (pattern == NULL);
236 #endif
237 #if NO_REGEX
238 	return (pattern == NULL);
239 #endif
240 }
241 
242 /*
243  * Simple pattern matching function.
244  * It supports no metacharacters like *, etc.
245  */
246 	static int
247 match(pattern, pattern_len, buf, buf_len, pfound, pend)
248 	char *pattern;
249 	int pattern_len;
250 	char *buf;
251 	int buf_len;
252 	char **pfound, **pend;
253 {
254 	register char *pp, *lp;
255 	register char *pattern_end = pattern + pattern_len;
256 	register char *buf_end = buf + buf_len;
257 
258 	for ( ;  buf < buf_end;  buf++)
259 	{
260 		for (pp = pattern, lp = buf;  ;  pp++, lp++)
261 		{
262 			char cp = *pp;
263 			char cl = *lp;
264 			if (caseless == OPT_ONPLUS && ASCII_IS_UPPER(cp))
265 				cp = ASCII_TO_LOWER(cp);
266 			if (cp != cl)
267 				break;
268 			if (pp == pattern_end || lp == buf_end)
269 				break;
270 		}
271 		if (pp == pattern_end)
272 		{
273 			if (pfound != NULL)
274 				*pfound = buf;
275 			if (pend != NULL)
276 				*pend = lp;
277 			return (1);
278 		}
279 	}
280 	return (0);
281 }
282 
283 /*
284  * Perform a pattern match with the previously compiled pattern.
285  * Set sp and ep to the start and end of the matched string.
286  */
287 	public int
288 match_pattern(pattern, tpattern, line, line_len, sp, ep, notbol, search_type)
289 	void *pattern;
290 	char *tpattern;
291 	char *line;
292 	int line_len;
293 	char **sp;
294 	char **ep;
295 	int notbol;
296 	int search_type;
297 {
298 	int matched;
299 #if HAVE_GNU_REGEX
300 	struct re_pattern_buffer *spattern = (struct re_pattern_buffer *) pattern;
301 #endif
302 #if HAVE_POSIX_REGCOMP
303 	regex_t *spattern = (regex_t *) pattern;
304 #endif
305 #if HAVE_PCRE
306 	pcre *spattern = (pcre *) pattern;
307 #endif
308 #if HAVE_RE_COMP
309 	int spattern = (int) pattern;
310 #endif
311 #if HAVE_REGCMP
312 	char *spattern = (char *) pattern;
313 #endif
314 #if HAVE_V8_REGCOMP
315 	struct regexp *spattern = (struct regexp *) pattern;
316 #endif
317 
318 	*sp = *ep = NULL;
319 #if NO_REGEX
320 	search_type |= SRCH_NO_REGEX;
321 #endif
322 	if (search_type & SRCH_NO_REGEX)
323 		matched = match(tpattern, strlen(tpattern), line, line_len, sp, ep);
324 	else
325 	{
326 #if HAVE_GNU_REGEX
327 	{
328 		struct re_registers search_regs;
329 		spattern->not_bol = notbol;
330 		spattern->regs_allocated = REGS_UNALLOCATED;
331 		matched = re_search(spattern, line, line_len, 0, line_len, &search_regs) >= 0;
332 		if (matched)
333 		{
334 			*sp = line + search_regs.start[0];
335 			*ep = line + search_regs.end[0];
336 		}
337 	}
338 #endif
339 #if HAVE_POSIX_REGCOMP
340 	{
341 		regmatch_t rm;
342 		int flags = (notbol) ? REG_NOTBOL : 0;
343 #ifdef REG_STARTEND
344 		flags |= REG_STARTEND;
345 		rm.rm_so = 0;
346 		rm.rm_eo = line_len;
347 #endif
348 		matched = !regexec(spattern, line, 1, &rm, flags);
349 		if (matched)
350 		{
351 #ifndef __WATCOMC__
352 			*sp = line + rm.rm_so;
353 			*ep = line + rm.rm_eo;
354 #else
355 			*sp = rm.rm_sp;
356 			*ep = rm.rm_ep;
357 #endif
358 		}
359 	}
360 #endif
361 #if HAVE_PCRE
362 	{
363 		int flags = (notbol) ? PCRE_NOTBOL : 0;
364 		int ovector[3];
365 		matched = pcre_exec(spattern, NULL, line, line_len,
366 			0, flags, ovector, 3) >= 0;
367 		if (matched)
368 		{
369 			*sp = line + ovector[0];
370 			*ep = line + ovector[1];
371 		}
372 	}
373 #endif
374 #if HAVE_RE_COMP
375 	matched = (re_exec(line) == 1);
376 	/*
377 	 * re_exec doesn't seem to provide a way to get the matched string.
378 	 */
379 	*sp = *ep = NULL;
380 #endif
381 #if HAVE_REGCMP
382 	*ep = regex(spattern, line);
383 	matched = (*ep != NULL);
384 	if (matched)
385 		*sp = __loc1;
386 #endif
387 #if HAVE_V8_REGCOMP
388 #if HAVE_REGEXEC2
389 	matched = regexec2(spattern, line, notbol);
390 #else
391 	matched = regexec(spattern, line);
392 #endif
393 	if (matched)
394 	{
395 		*sp = spattern->startp[0];
396 		*ep = spattern->endp[0];
397 	}
398 #endif
399 	}
400 	matched = (!(search_type & SRCH_NO_MATCH) && matched) ||
401 			((search_type & SRCH_NO_MATCH) && !matched);
402 	return (matched);
403 }
404 
405