xref: /freebsd/contrib/less/pattern.c (revision 39ee7a7a6bdd1557b1c3532abf60d139798ac88b)
1 /*
2  * Copyright (C) 1984-2012  Mark Nudelman
3  *
4  * You may distribute under the terms of either the GNU General Public
5  * License or the Less License, as specified in the README file.
6  *
7  * For more information, see the README file.
8  */
9 
10 /*
11  * Routines to do pattern matching.
12  */
13 
14 #include "less.h"
15 #include "pattern.h"
16 
17 extern int caseless;
18 
19 /*
20  * Compile a search pattern, for future use by match_pattern.
21  */
22 	static int
23 compile_pattern2(pattern, search_type, comp_pattern)
24 	char *pattern;
25 	int search_type;
26 	void **comp_pattern;
27 {
28 	if (search_type & SRCH_NO_REGEX)
29 		return (0);
30   {
31 #if HAVE_GNU_REGEX
32 	struct re_pattern_buffer *comp = (struct re_pattern_buffer *)
33 		ecalloc(1, sizeof(struct re_pattern_buffer));
34 	struct re_pattern_buffer **pcomp =
35 		(struct re_pattern_buffer **) comp_pattern;
36 	re_set_syntax(RE_SYNTAX_POSIX_EXTENDED);
37 	if (re_compile_pattern(pattern, strlen(pattern), comp))
38 	{
39 		free(comp);
40 		error("Invalid pattern", NULL_PARG);
41 		return (-1);
42 	}
43 	if (*pcomp != NULL)
44 		regfree(*pcomp);
45 	*pcomp = comp;
46 #endif
47 #if HAVE_POSIX_REGCOMP
48 	regex_t *comp = (regex_t *) ecalloc(1, sizeof(regex_t));
49 	regex_t **pcomp = (regex_t **) comp_pattern;
50 	if (regcomp(comp, pattern, REGCOMP_FLAG))
51 	{
52 		free(comp);
53 		error("Invalid pattern", NULL_PARG);
54 		return (-1);
55 	}
56 	if (*pcomp != NULL)
57 		regfree(*pcomp);
58 	*pcomp = comp;
59 #endif
60 #if HAVE_PCRE
61 	pcre *comp;
62 	pcre **pcomp = (pcre **) comp_pattern;
63 	constant char *errstring;
64 	int erroffset;
65 	PARG parg;
66 	comp = pcre_compile(pattern, 0,
67 			&errstring, &erroffset, NULL);
68 	if (comp == NULL)
69 	{
70 		parg.p_string = (char *) errstring;
71 		error("%s", &parg);
72 		return (-1);
73 	}
74 	*pcomp = comp;
75 #endif
76 #if HAVE_RE_COMP
77 	PARG parg;
78 	int *pcomp = (int *) comp_pattern;
79 	if ((parg.p_string = re_comp(pattern)) != NULL)
80 	{
81 		error("%s", &parg);
82 		return (-1);
83 	}
84 	*pcomp = 1;
85 #endif
86 #if HAVE_REGCMP
87 	char *comp;
88 	char **pcomp = (char **) comp_pattern;
89 	if ((comp = regcmp(pattern, 0)) == NULL)
90 	{
91 		error("Invalid pattern", NULL_PARG);
92 		return (-1);
93 	}
94 	if (pcomp != NULL)
95 		free(*pcomp);
96 	*pcomp = comp;
97 #endif
98 #if HAVE_V8_REGCOMP
99 	struct regexp *comp;
100 	struct regexp **pcomp = (struct regexp **) comp_pattern;
101 	if ((comp = regcomp(pattern)) == NULL)
102 	{
103 		/*
104 		 * regcomp has already printed an error message
105 		 * via regerror().
106 		 */
107 		return (-1);
108 	}
109 	if (*pcomp != NULL)
110 		free(*pcomp);
111 	*pcomp = comp;
112 #endif
113   }
114 	return (0);
115 }
116 
117 /*
118  * Like compile_pattern2, but convert the pattern to lowercase if necessary.
119  */
120 	public int
121 compile_pattern(pattern, search_type, comp_pattern)
122 	char *pattern;
123 	int search_type;
124 	void **comp_pattern;
125 {
126 	char *cvt_pattern;
127 	int result;
128 
129 	if (caseless != OPT_ONPLUS)
130 		cvt_pattern = pattern;
131 	else
132 	{
133 		cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC));
134 		cvt_text(cvt_pattern, pattern, (int *)NULL, (int *)NULL, CVT_TO_LC);
135 	}
136 	result = compile_pattern2(cvt_pattern, search_type, comp_pattern);
137 	if (cvt_pattern != pattern)
138 		free(cvt_pattern);
139 	return (result);
140 }
141 
142 /*
143  * Forget that we have a compiled pattern.
144  */
145 	public void
146 uncompile_pattern(pattern)
147 	void **pattern;
148 {
149 #if HAVE_GNU_REGEX
150 	struct re_pattern_buffer **pcomp = (struct re_pattern_buffer **) pattern;
151 	if (*pcomp != NULL)
152 		regfree(*pcomp);
153 	*pcomp = NULL;
154 #endif
155 #if HAVE_POSIX_REGCOMP
156 	regex_t **pcomp = (regex_t **) pattern;
157 	if (*pcomp != NULL)
158 		regfree(*pcomp);
159 	*pcomp = NULL;
160 #endif
161 #if HAVE_PCRE
162 	pcre **pcomp = (pcre **) pattern;
163 	if (*pcomp != NULL)
164 		pcre_free(*pcomp);
165 	*pcomp = NULL;
166 #endif
167 #if HAVE_RE_COMP
168 	int *pcomp = (int *) pattern;
169 	*pcomp = 0;
170 #endif
171 #if HAVE_REGCMP
172 	char **pcomp = (char **) pattern;
173 	if (*pcomp != NULL)
174 		free(*pcomp);
175 	*pcomp = NULL;
176 #endif
177 #if HAVE_V8_REGCOMP
178 	struct regexp **pcomp = (struct regexp **) pattern;
179 	if (*pcomp != NULL)
180 		free(*pcomp);
181 	*pcomp = NULL;
182 #endif
183 }
184 
185 /*
186  * Is a compiled pattern null?
187  */
188 	public int
189 is_null_pattern(pattern)
190 	void *pattern;
191 {
192 #if HAVE_GNU_REGEX
193 	return (pattern == NULL);
194 #endif
195 #if HAVE_POSIX_REGCOMP
196 	return (pattern == NULL);
197 #endif
198 #if HAVE_PCRE
199 	return (pattern == NULL);
200 #endif
201 #if HAVE_RE_COMP
202 	return (pattern == 0);
203 #endif
204 #if HAVE_REGCMP
205 	return (pattern == NULL);
206 #endif
207 #if HAVE_V8_REGCOMP
208 	return (pattern == NULL);
209 #endif
210 }
211 
212 /*
213  * Simple pattern matching function.
214  * It supports no metacharacters like *, etc.
215  */
216 	static int
217 match(pattern, pattern_len, buf, buf_len, pfound, pend)
218 	char *pattern;
219 	int pattern_len;
220 	char *buf;
221 	int buf_len;
222 	char **pfound, **pend;
223 {
224 	register char *pp, *lp;
225 	register char *pattern_end = pattern + pattern_len;
226 	register char *buf_end = buf + buf_len;
227 
228 	for ( ;  buf < buf_end;  buf++)
229 	{
230 		for (pp = pattern, lp = buf;  *pp == *lp;  pp++, lp++)
231 			if (pp == pattern_end || lp == buf_end)
232 				break;
233 		if (pp == pattern_end)
234 		{
235 			if (pfound != NULL)
236 				*pfound = buf;
237 			if (pend != NULL)
238 				*pend = lp;
239 			return (1);
240 		}
241 	}
242 	return (0);
243 }
244 
245 /*
246  * Perform a pattern match with the previously compiled pattern.
247  * Set sp and ep to the start and end of the matched string.
248  */
249 	public int
250 match_pattern(pattern, tpattern, line, line_len, sp, ep, notbol, search_type)
251 	void *pattern;
252 	char *tpattern;
253 	char *line;
254 	int line_len;
255 	char **sp;
256 	char **ep;
257 	int notbol;
258 	int search_type;
259 {
260 	int matched;
261 #if HAVE_GNU_REGEX
262 	struct re_pattern_buffer *spattern = (struct re_pattern_buffer *) pattern;
263 #endif
264 #if HAVE_POSIX_REGCOMP
265 	regex_t *spattern = (regex_t *) pattern;
266 #endif
267 #if HAVE_PCRE
268 	pcre *spattern = (pcre *) pattern;
269 #endif
270 #if HAVE_RE_COMP
271 	int spattern = (int) pattern;
272 #endif
273 #if HAVE_REGCMP
274 	char *spattern = (char *) pattern;
275 #endif
276 #if HAVE_V8_REGCOMP
277 	struct regexp *spattern = (struct regexp *) pattern;
278 #endif
279 
280 #if NO_REGEX
281 	search_type |= SRCH_NO_REGEX;
282 #endif
283 	if (search_type & SRCH_NO_REGEX)
284 		matched = match(tpattern, strlen(tpattern), line, line_len, sp, ep);
285 	else
286 	{
287 #if HAVE_GNU_REGEX
288 	{
289 		struct re_registers search_regs;
290 		regoff_t *starts = (regoff_t *) ecalloc(1, sizeof (regoff_t));
291 		regoff_t *ends = (regoff_t *) ecalloc(1, sizeof (regoff_t));
292 		spattern->not_bol = notbol;
293 		re_set_registers(spattern, &search_regs, 1, starts, ends);
294 		matched = re_search(spattern, line, line_len, 0, line_len, &search_regs) >= 0;
295 		if (matched)
296 		{
297 			*sp = line + search_regs.start[0];
298 			*ep = line + search_regs.end[0];
299 		}
300 		free(starts);
301 		free(ends);
302 	}
303 #endif
304 #if HAVE_POSIX_REGCOMP
305 	{
306 		regmatch_t rm;
307 		int flags = (notbol) ? REG_NOTBOL : 0;
308 		matched = !regexec(spattern, line, 1, &rm, flags);
309 		if (matched)
310 		{
311 #ifndef __WATCOMC__
312 			*sp = line + rm.rm_so;
313 			*ep = line + rm.rm_eo;
314 #else
315 			*sp = rm.rm_sp;
316 			*ep = rm.rm_ep;
317 #endif
318 		}
319 	}
320 #endif
321 #if HAVE_PCRE
322 	{
323 		int flags = (notbol) ? PCRE_NOTBOL : 0;
324 		int ovector[3];
325 		matched = pcre_exec(spattern, NULL, line, line_len,
326 			0, flags, ovector, 3) >= 0;
327 		if (matched)
328 		{
329 			*sp = line + ovector[0];
330 			*ep = line + ovector[1];
331 		}
332 	}
333 #endif
334 #if HAVE_RE_COMP
335 	matched = (re_exec(line) == 1);
336 	/*
337 	 * re_exec doesn't seem to provide a way to get the matched string.
338 	 */
339 	*sp = *ep = NULL;
340 #endif
341 #if HAVE_REGCMP
342 	*ep = regex(spattern, line);
343 	matched = (*ep != NULL);
344 	if (matched)
345 		*sp = __loc1;
346 #endif
347 #if HAVE_V8_REGCOMP
348 #if HAVE_REGEXEC2
349 	matched = regexec2(spattern, line, notbol);
350 #else
351 	matched = regexec(spattern, line);
352 #endif
353 	if (matched)
354 	{
355 		*sp = spattern->startp[0];
356 		*ep = spattern->endp[0];
357 	}
358 #endif
359 	}
360 	matched = (!(search_type & SRCH_NO_MATCH) && matched) ||
361 			((search_type & SRCH_NO_MATCH) && !matched);
362 	return (matched);
363 }
364 
365