1 /*
2 * Copyright (C) 1984-2023 Mark Nudelman
3 *
4 * You may distribute under the terms of either the GNU General Public
5 * License or the Less License, as specified in the README file.
6 *
7 * For more information, see the README file.
8 */
9
10 /*
11 * Routines to do pattern matching.
12 */
13
14 #include "less.h"
15
16 extern int caseless;
17 extern int is_caseless;
18 extern int utf_mode;
19
20 /*
21 * Compile a search pattern, for future use by match_pattern.
22 */
compile_pattern2(char * pattern,int search_type,PATTERN_TYPE * comp_pattern,int show_error)23 static int compile_pattern2(char *pattern, int search_type, PATTERN_TYPE *comp_pattern, int show_error)
24 {
25 if (search_type & SRCH_NO_REGEX)
26 return (0);
27 {
28 #if HAVE_GNU_REGEX
29 struct re_pattern_buffer *comp = (struct re_pattern_buffer *)
30 ecalloc(1, sizeof(struct re_pattern_buffer));
31 re_set_syntax(RE_SYNTAX_POSIX_EXTENDED);
32 if (re_compile_pattern(pattern, strlen(pattern), comp))
33 {
34 free(comp);
35 if (show_error)
36 error("Invalid pattern", NULL_PARG);
37 return (-1);
38 }
39 if (*comp_pattern != NULL)
40 {
41 regfree(*comp_pattern);
42 free(*comp_pattern);
43 }
44 *comp_pattern = comp;
45 #endif
46 #if HAVE_POSIX_REGCOMP
47 regex_t *comp = (regex_t *) ecalloc(1, sizeof(regex_t));
48 if (regcomp(comp, pattern, REGCOMP_FLAG | (is_caseless ? REG_ICASE : 0)))
49 {
50 free(comp);
51 if (show_error)
52 error("Invalid pattern", NULL_PARG);
53 return (-1);
54 }
55 if (*comp_pattern != NULL)
56 {
57 regfree(*comp_pattern);
58 free(*comp_pattern);
59 }
60 *comp_pattern = comp;
61 #endif
62 #if HAVE_PCRE
63 constant char *errstring;
64 int erroffset;
65 PARG parg;
66 pcre *comp = pcre_compile(pattern,
67 ((utf_mode) ? PCRE_UTF8 | PCRE_NO_UTF8_CHECK : 0) |
68 (is_caseless ? PCRE_CASELESS : 0),
69 &errstring, &erroffset, NULL);
70 if (comp == NULL)
71 {
72 parg.p_string = (char *) errstring;
73 if (show_error)
74 error("%s", &parg);
75 return (-1);
76 }
77 *comp_pattern = comp;
78 #endif
79 #if HAVE_PCRE2
80 int errcode;
81 PCRE2_SIZE erroffset;
82 PARG parg;
83 pcre2_code *comp = pcre2_compile((PCRE2_SPTR)pattern, strlen(pattern),
84 (is_caseless ? PCRE2_CASELESS : 0),
85 &errcode, &erroffset, NULL);
86 if (comp == NULL)
87 {
88 if (show_error)
89 {
90 char msg[160];
91 pcre2_get_error_message(errcode, (PCRE2_UCHAR*)msg, sizeof(msg));
92 parg.p_string = msg;
93 error("%s", &parg);
94 }
95 return (-1);
96 }
97 *comp_pattern = comp;
98 #endif
99 #if HAVE_RE_COMP
100 PARG parg;
101 if ((parg.p_string = re_comp(pattern)) != NULL)
102 {
103 if (show_error)
104 error("%s", &parg);
105 return (-1);
106 }
107 *comp_pattern = 1;
108 #endif
109 #if HAVE_REGCMP
110 char *comp;
111 if ((comp = regcmp(pattern, 0)) == NULL)
112 {
113 if (show_error)
114 error("Invalid pattern", NULL_PARG);
115 return (-1);
116 }
117 if (comp_pattern != NULL)
118 free(*comp_pattern);
119 *comp_pattern = comp;
120 #endif
121 #if HAVE_V8_REGCOMP
122 struct regexp *comp;
123 reg_show_error = show_error;
124 comp = regcomp(pattern);
125 reg_show_error = 1;
126 if (comp == NULL)
127 {
128 /*
129 * regcomp has already printed an error message
130 * via regerror().
131 */
132 return (-1);
133 }
134 if (*comp_pattern != NULL)
135 free(*comp_pattern);
136 *comp_pattern = comp;
137 #endif
138 }
139 return (0);
140 }
141
142 /*
143 * Like compile_pattern2, but convert the pattern to lowercase if necessary.
144 */
compile_pattern(char * pattern,int search_type,int show_error,PATTERN_TYPE * comp_pattern)145 public int compile_pattern(char *pattern, int search_type, int show_error, PATTERN_TYPE *comp_pattern)
146 {
147 char *cvt_pattern;
148 int result;
149
150 if (caseless != OPT_ONPLUS || (re_handles_caseless && !(search_type & SRCH_NO_REGEX)))
151 cvt_pattern = pattern;
152 else
153 {
154 cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC));
155 cvt_text(cvt_pattern, pattern, (int *)NULL, (int *)NULL, CVT_TO_LC);
156 }
157 result = compile_pattern2(cvt_pattern, search_type, comp_pattern, show_error);
158 if (cvt_pattern != pattern)
159 free(cvt_pattern);
160 return (result);
161 }
162
163 /*
164 * Forget that we have a compiled pattern.
165 */
uncompile_pattern(PATTERN_TYPE * pattern)166 public void uncompile_pattern(PATTERN_TYPE *pattern)
167 {
168 #if HAVE_GNU_REGEX
169 if (*pattern != NULL)
170 {
171 regfree(*pattern);
172 free(*pattern);
173 }
174 *pattern = NULL;
175 #endif
176 #if HAVE_POSIX_REGCOMP
177 if (*pattern != NULL)
178 {
179 regfree(*pattern);
180 free(*pattern);
181 }
182 *pattern = NULL;
183 #endif
184 #if HAVE_PCRE
185 if (*pattern != NULL)
186 pcre_free(*pattern);
187 *pattern = NULL;
188 #endif
189 #if HAVE_PCRE2
190 if (*pattern != NULL)
191 pcre2_code_free(*pattern);
192 *pattern = NULL;
193 #endif
194 #if HAVE_RE_COMP
195 *pattern = 0;
196 #endif
197 #if HAVE_REGCMP
198 if (*pattern != NULL)
199 free(*pattern);
200 *pattern = NULL;
201 #endif
202 #if HAVE_V8_REGCOMP
203 if (*pattern != NULL)
204 free(*pattern);
205 *pattern = NULL;
206 #endif
207 }
208
209 #if 0
210 /*
211 * Can a pattern be successfully compiled?
212 */
213 public int valid_pattern(char *pattern)
214 {
215 PATTERN_TYPE comp_pattern;
216 int result;
217
218 SET_NULL_PATTERN(comp_pattern);
219 result = compile_pattern2(pattern, 0, &comp_pattern, 0);
220 if (result != 0)
221 return (0);
222 uncompile_pattern(&comp_pattern);
223 return (1);
224 }
225 #endif
226
227 /*
228 * Is a compiled pattern null?
229 */
is_null_pattern(PATTERN_TYPE pattern)230 public int is_null_pattern(PATTERN_TYPE pattern)
231 {
232 #if HAVE_GNU_REGEX
233 return (pattern == NULL);
234 #endif
235 #if HAVE_POSIX_REGCOMP
236 return (pattern == NULL);
237 #endif
238 #if HAVE_PCRE
239 return (pattern == NULL);
240 #endif
241 #if HAVE_PCRE2
242 return (pattern == NULL);
243 #endif
244 #if HAVE_RE_COMP
245 return (pattern == 0);
246 #endif
247 #if HAVE_REGCMP
248 return (pattern == NULL);
249 #endif
250 #if HAVE_V8_REGCOMP
251 return (pattern == NULL);
252 #endif
253 #if NO_REGEX
254 return (pattern == NULL);
255 #endif
256 }
257 /*
258 * Simple pattern matching function.
259 * It supports no metacharacters like *, etc.
260 */
match(char * pattern,int pattern_len,char * buf,int buf_len,char *** sp,char *** ep,int nsubs)261 static int match(char *pattern, int pattern_len, char *buf, int buf_len, char ***sp, char ***ep, int nsubs)
262 {
263 char *pp, *lp;
264 char *pattern_end = pattern + pattern_len;
265 char *buf_end = buf + buf_len;
266
267 for ( ; buf < buf_end; buf++)
268 {
269 for (pp = pattern, lp = buf; ; pp++, lp++)
270 {
271 char cp = *pp;
272 char cl = *lp;
273 if (caseless == OPT_ONPLUS && ASCII_IS_UPPER(cp))
274 cp = ASCII_TO_LOWER(cp);
275 if (cp != cl)
276 break;
277 if (pp == pattern_end || lp == buf_end)
278 break;
279 }
280 if (pp == pattern_end)
281 {
282 *(*sp)++ = buf;
283 *(*ep)++ = lp;
284 return (1);
285 }
286 }
287 **sp = **ep = NULL;
288 return (0);
289 }
290
291 /*
292 * Perform a pattern match with the previously compiled pattern.
293 * Set sp[0] and ep[0] to the start and end of the matched string.
294 * Set sp[i] and ep[i] to the start and end of the i-th matched subpattern.
295 * Subpatterns are defined by parentheses in the regex language.
296 */
match_pattern1(PATTERN_TYPE pattern,char * tpattern,char * line,int line_len,char ** sp,char ** ep,int nsp,int notbol,int search_type)297 static int match_pattern1(PATTERN_TYPE pattern, char *tpattern, char *line, int line_len, char **sp, char **ep, int nsp, int notbol, int search_type)
298 {
299 int matched;
300
301 #if NO_REGEX
302 search_type |= SRCH_NO_REGEX;
303 #endif
304 if (search_type & SRCH_NO_REGEX)
305 matched = match(tpattern, strlen(tpattern), line, line_len, &sp, &ep, nsp);
306 else
307 {
308 #if HAVE_GNU_REGEX
309 {
310 struct re_registers search_regs;
311 pattern->not_bol = notbol;
312 pattern->regs_allocated = REGS_UNALLOCATED;
313 matched = re_search(pattern, line, line_len, 0, line_len, &search_regs) >= 0;
314 if (matched)
315 {
316 *sp++ = line + search_regs.start[0];
317 *ep++ = line + search_regs.end[0];
318 }
319 }
320 #endif
321 #if HAVE_POSIX_REGCOMP
322 {
323 #define RM_COUNT (NUM_SEARCH_COLORS+2)
324 regmatch_t rm[RM_COUNT];
325 int flags = (notbol) ? REG_NOTBOL : 0;
326 #ifdef REG_STARTEND
327 flags |= REG_STARTEND;
328 rm[0].rm_so = 0;
329 rm[0].rm_eo = line_len;
330 #endif
331 matched = !regexec(pattern, line, RM_COUNT, rm, flags);
332 if (matched)
333 {
334 int i;
335 int ecount;
336 for (ecount = RM_COUNT; ecount > 0; ecount--)
337 if (rm[ecount-1].rm_so >= 0)
338 break;
339 if (ecount >= nsp)
340 ecount = nsp-1;
341 for (i = 0; i < ecount; i++)
342 {
343 if (rm[i].rm_so < 0)
344 {
345 *sp++ = *ep++ = line;
346 } else
347 {
348 #ifndef __WATCOMC__
349 *sp++ = line + rm[i].rm_so;
350 *ep++ = line + rm[i].rm_eo;
351 #else
352 *sp++ = rm[i].rm_sp;
353 *ep++ = rm[i].rm_ep;
354 #endif
355 }
356 }
357 }
358 }
359 #endif
360 #if HAVE_PCRE
361 {
362 #define OVECTOR_COUNT ((3*NUM_SEARCH_COLORS)+3)
363 int ovector[OVECTOR_COUNT];
364 int flags = (notbol) ? PCRE_NOTBOL : 0;
365 int i;
366 int ecount;
367 int mcount = pcre_exec(pattern, NULL, line, line_len,
368 0, flags, ovector, OVECTOR_COUNT);
369 matched = (mcount > 0);
370 ecount = nsp-1;
371 if (ecount > mcount) ecount = mcount;
372 for (i = 0; i < ecount*2; )
373 {
374 if (ovector[i] < 0 || ovector[i+1] < 0)
375 {
376 *sp++ = *ep++ = line;
377 i += 2;
378 } else
379 {
380 *sp++ = line + ovector[i++];
381 *ep++ = line + ovector[i++];
382 }
383 }
384 }
385 #endif
386 #if HAVE_PCRE2
387 {
388 int flags = (notbol) ? PCRE2_NOTBOL : 0;
389 pcre2_match_data *md = pcre2_match_data_create(nsp-1, NULL);
390 int mcount = pcre2_match(pattern, (PCRE2_SPTR)line, line_len,
391 0, flags, md, NULL);
392 matched = (mcount > 0);
393 if (matched)
394 {
395 PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md);
396 int i;
397 int ecount = nsp-1;
398 if (ecount > mcount) ecount = mcount;
399 for (i = 0; i < ecount*2; )
400 {
401 if (ovector[i] < 0 || ovector[i+1] < 0)
402 {
403 *sp++ = *ep++ = line;
404 i += 2;
405 } else
406 {
407 *sp++ = line + ovector[i++];
408 *ep++ = line + ovector[i++];
409 }
410 }
411 }
412 pcre2_match_data_free(md);
413 }
414 #endif
415 #if HAVE_RE_COMP
416 matched = (re_exec(line) == 1);
417 /*
418 * re_exec doesn't seem to provide a way to get the matched string.
419 */
420 #endif
421 #if HAVE_REGCMP
422 matched = ((*ep++ = regex(pattern, line)) != NULL);
423 if (matched)
424 *sp++ = __loc1;
425 #endif
426 #if HAVE_V8_REGCOMP
427 #if HAVE_REGEXEC2
428 matched = regexec2(pattern, line, notbol);
429 #else
430 matched = regexec(pattern, line);
431 #endif
432 if (matched)
433 {
434 *sp++ = pattern->startp[0];
435 *ep++ = pattern->endp[0];
436 }
437 #endif
438 }
439 *sp = *ep = NULL;
440 matched = (!(search_type & SRCH_NO_MATCH) && matched) ||
441 ((search_type & SRCH_NO_MATCH) && !matched);
442 return (matched);
443 }
444
match_pattern(PATTERN_TYPE pattern,char * tpattern,char * line,int line_len,char ** sp,char ** ep,int nsp,int notbol,int search_type)445 public int match_pattern(PATTERN_TYPE pattern, char *tpattern, char *line, int line_len, char **sp, char **ep, int nsp, int notbol, int search_type)
446 {
447 int matched = match_pattern1(pattern, tpattern, line, line_len, sp, ep, nsp, notbol, search_type);
448 int i;
449 for (i = 1; i <= NUM_SEARCH_COLORS; i++)
450 {
451 if ((search_type & SRCH_SUBSEARCH(i)) && ep[i] == sp[i])
452 matched = 0;
453 }
454 return matched;
455 }
456
457 /*
458 * Return the name of the pattern matching library.
459 */
pattern_lib_name(void)460 public char * pattern_lib_name(void)
461 {
462 #if HAVE_GNU_REGEX
463 return ("GNU");
464 #else
465 #if HAVE_POSIX_REGCOMP
466 return ("POSIX");
467 #else
468 #if HAVE_PCRE2
469 return ("PCRE2");
470 #else
471 #if HAVE_PCRE
472 return ("PCRE");
473 #else
474 #if HAVE_RE_COMP
475 return ("BSD");
476 #else
477 #if HAVE_REGCMP
478 return ("V8");
479 #else
480 #if HAVE_V8_REGCOMP
481 return ("Spencer V8");
482 #else
483 return ("no");
484 #endif
485 #endif
486 #endif
487 #endif
488 #endif
489 #endif
490 #endif
491 }
492