pattern.c (95270f73baf6fa95ae529bc2eb6a61f5c79f32c0) pattern.c (d713e0891ff9ab8246245c3206851d486ecfdd37)
1/*
1/*
2 * Copyright (C) 1984-2022 Mark Nudelman
2 * Copyright (C) 1984-2023 Mark Nudelman
3 *
4 * You may distribute under the terms of either the GNU General Public
5 * License or the Less License, as specified in the README file.
6 *
7 * For more information, see the README file.
8 */
9
10/*

--- 4 unchanged lines hidden (view full) ---

15
16extern int caseless;
17extern int is_caseless;
18extern int utf_mode;
19
20/*
21 * Compile a search pattern, for future use by match_pattern.
22 */
3 *
4 * You may distribute under the terms of either the GNU General Public
5 * License or the Less License, as specified in the README file.
6 *
7 * For more information, see the README file.
8 */
9
10/*

--- 4 unchanged lines hidden (view full) ---

15
16extern int caseless;
17extern int is_caseless;
18extern int utf_mode;
19
20/*
21 * Compile a search pattern, for future use by match_pattern.
22 */
23 static int
24compile_pattern2(pattern, search_type, comp_pattern, show_error)
25 char *pattern;
26 int search_type;
27 PATTERN_TYPE *comp_pattern;
28 int show_error;
23static int compile_pattern2(char *pattern, int search_type, PATTERN_TYPE *comp_pattern, int show_error)
29{
30 if (search_type & SRCH_NO_REGEX)
31 return (0);
32 {
33#if HAVE_GNU_REGEX
34 struct re_pattern_buffer *comp = (struct re_pattern_buffer *)
35 ecalloc(1, sizeof(struct re_pattern_buffer));
36 re_set_syntax(RE_SYNTAX_POSIX_EXTENDED);

--- 105 unchanged lines hidden (view full) ---

142#endif
143 }
144 return (0);
145}
146
147/*
148 * Like compile_pattern2, but convert the pattern to lowercase if necessary.
149 */
24{
25 if (search_type & SRCH_NO_REGEX)
26 return (0);
27 {
28#if HAVE_GNU_REGEX
29 struct re_pattern_buffer *comp = (struct re_pattern_buffer *)
30 ecalloc(1, sizeof(struct re_pattern_buffer));
31 re_set_syntax(RE_SYNTAX_POSIX_EXTENDED);

--- 105 unchanged lines hidden (view full) ---

137#endif
138 }
139 return (0);
140}
141
142/*
143 * Like compile_pattern2, but convert the pattern to lowercase if necessary.
144 */
150 public int
151compile_pattern(pattern, search_type, show_error, comp_pattern)
152 char *pattern;
153 int search_type;
154 int show_error;
155 PATTERN_TYPE *comp_pattern;
145public int compile_pattern(char *pattern, int search_type, int show_error, PATTERN_TYPE *comp_pattern)
156{
157 char *cvt_pattern;
158 int result;
159
146{
147 char *cvt_pattern;
148 int result;
149
160 if (caseless != OPT_ONPLUS || re_handles_caseless)
150 if (caseless != OPT_ONPLUS || (re_handles_caseless && !(search_type & SRCH_NO_REGEX)))
161 cvt_pattern = pattern;
162 else
163 {
164 cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC));
165 cvt_text(cvt_pattern, pattern, (int *)NULL, (int *)NULL, CVT_TO_LC);
166 }
167 result = compile_pattern2(cvt_pattern, search_type, comp_pattern, show_error);
168 if (cvt_pattern != pattern)
169 free(cvt_pattern);
170 return (result);
171}
172
173/*
174 * Forget that we have a compiled pattern.
175 */
151 cvt_pattern = pattern;
152 else
153 {
154 cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC));
155 cvt_text(cvt_pattern, pattern, (int *)NULL, (int *)NULL, CVT_TO_LC);
156 }
157 result = compile_pattern2(cvt_pattern, search_type, comp_pattern, show_error);
158 if (cvt_pattern != pattern)
159 free(cvt_pattern);
160 return (result);
161}
162
163/*
164 * Forget that we have a compiled pattern.
165 */
176 public void
177uncompile_pattern(pattern)
178 PATTERN_TYPE *pattern;
166public void uncompile_pattern(PATTERN_TYPE *pattern)
179{
180#if HAVE_GNU_REGEX
181 if (*pattern != NULL)
182 {
183 regfree(*pattern);
184 free(*pattern);
185 }
186 *pattern = NULL;

--- 30 unchanged lines hidden (view full) ---

217 *pattern = NULL;
218#endif
219}
220
221#if 0
222/*
223 * Can a pattern be successfully compiled?
224 */
167{
168#if HAVE_GNU_REGEX
169 if (*pattern != NULL)
170 {
171 regfree(*pattern);
172 free(*pattern);
173 }
174 *pattern = NULL;

--- 30 unchanged lines hidden (view full) ---

205 *pattern = NULL;
206#endif
207}
208
209#if 0
210/*
211 * Can a pattern be successfully compiled?
212 */
225 public int
226valid_pattern(pattern)
227 char *pattern;
213public int valid_pattern(char *pattern)
228{
229 PATTERN_TYPE comp_pattern;
230 int result;
231
232 SET_NULL_PATTERN(comp_pattern);
233 result = compile_pattern2(pattern, 0, &comp_pattern, 0);
234 if (result != 0)
235 return (0);
236 uncompile_pattern(&comp_pattern);
237 return (1);
238}
239#endif
240
241/*
242 * Is a compiled pattern null?
243 */
214{
215 PATTERN_TYPE comp_pattern;
216 int result;
217
218 SET_NULL_PATTERN(comp_pattern);
219 result = compile_pattern2(pattern, 0, &comp_pattern, 0);
220 if (result != 0)
221 return (0);
222 uncompile_pattern(&comp_pattern);
223 return (1);
224}
225#endif
226
227/*
228 * Is a compiled pattern null?
229 */
244 public int
245is_null_pattern(pattern)
246 PATTERN_TYPE pattern;
230public int is_null_pattern(PATTERN_TYPE pattern)
247{
248#if HAVE_GNU_REGEX
249 return (pattern == NULL);
250#endif
251#if HAVE_POSIX_REGCOMP
252 return (pattern == NULL);
253#endif
254#if HAVE_PCRE

--- 10 unchanged lines hidden (view full) ---

265#endif
266#if HAVE_V8_REGCOMP
267 return (pattern == NULL);
268#endif
269#if NO_REGEX
270 return (pattern == NULL);
271#endif
272}
231{
232#if HAVE_GNU_REGEX
233 return (pattern == NULL);
234#endif
235#if HAVE_POSIX_REGCOMP
236 return (pattern == NULL);
237#endif
238#if HAVE_PCRE

--- 10 unchanged lines hidden (view full) ---

249#endif
250#if HAVE_V8_REGCOMP
251 return (pattern == NULL);
252#endif
253#if NO_REGEX
254 return (pattern == NULL);
255#endif
256}
273
274/*
275 * Simple pattern matching function.
276 * It supports no metacharacters like *, etc.
277 */
257/*
258 * Simple pattern matching function.
259 * It supports no metacharacters like *, etc.
260 */
278 static int
279match(pattern, pattern_len, buf, buf_len, pfound, pend)
280 char *pattern;
281 int pattern_len;
282 char *buf;
283 int buf_len;
284 char **pfound, **pend;
261static int match(char *pattern, int pattern_len, char *buf, int buf_len, char ***sp, char ***ep, int nsubs)
285{
286 char *pp, *lp;
287 char *pattern_end = pattern + pattern_len;
288 char *buf_end = buf + buf_len;
289
290 for ( ; buf < buf_end; buf++)
291 {
292 for (pp = pattern, lp = buf; ; pp++, lp++)

--- 4 unchanged lines hidden (view full) ---

297 cp = ASCII_TO_LOWER(cp);
298 if (cp != cl)
299 break;
300 if (pp == pattern_end || lp == buf_end)
301 break;
302 }
303 if (pp == pattern_end)
304 {
262{
263 char *pp, *lp;
264 char *pattern_end = pattern + pattern_len;
265 char *buf_end = buf + buf_len;
266
267 for ( ; buf < buf_end; buf++)
268 {
269 for (pp = pattern, lp = buf; ; pp++, lp++)

--- 4 unchanged lines hidden (view full) ---

274 cp = ASCII_TO_LOWER(cp);
275 if (cp != cl)
276 break;
277 if (pp == pattern_end || lp == buf_end)
278 break;
279 }
280 if (pp == pattern_end)
281 {
305 if (pfound != NULL)
306 *pfound = buf;
307 if (pend != NULL)
308 *pend = lp;
282 *(*sp)++ = buf;
283 *(*ep)++ = lp;
309 return (1);
310 }
311 }
284 return (1);
285 }
286 }
287 **sp = **ep = NULL;
312 return (0);
313}
314
315/*
316 * Perform a pattern match with the previously compiled pattern.
288 return (0);
289}
290
291/*
292 * Perform a pattern match with the previously compiled pattern.
317 * Set sp and ep to the start and end of the matched string.
293 * Set sp[0] and ep[0] to the start and end of the matched string.
294 * Set sp[i] and ep[i] to the start and end of the i-th matched subpattern.
295 * Subpatterns are defined by parentheses in the regex language.
318 */
296 */
319 public int
320match_pattern(pattern, tpattern, line, line_len, sp, ep, notbol, search_type)
321 PATTERN_TYPE pattern;
322 char *tpattern;
323 char *line;
324 int line_len;
325 char **sp;
326 char **ep;
327 int notbol;
328 int search_type;
297static int match_pattern1(PATTERN_TYPE pattern, char *tpattern, char *line, int line_len, char **sp, char **ep, int nsp, int notbol, int search_type)
329{
330 int matched;
331
298{
299 int matched;
300
332 *sp = *ep = NULL;
333#if NO_REGEX
334 search_type |= SRCH_NO_REGEX;
335#endif
336 if (search_type & SRCH_NO_REGEX)
301#if NO_REGEX
302 search_type |= SRCH_NO_REGEX;
303#endif
304 if (search_type & SRCH_NO_REGEX)
337 matched = match(tpattern, strlen(tpattern), line, line_len, sp, ep);
305 matched = match(tpattern, strlen(tpattern), line, line_len, &sp, &ep, nsp);
338 else
339 {
340#if HAVE_GNU_REGEX
341 {
342 struct re_registers search_regs;
343 pattern->not_bol = notbol;
344 pattern->regs_allocated = REGS_UNALLOCATED;
345 matched = re_search(pattern, line, line_len, 0, line_len, &search_regs) >= 0;
346 if (matched)
347 {
306 else
307 {
308#if HAVE_GNU_REGEX
309 {
310 struct re_registers search_regs;
311 pattern->not_bol = notbol;
312 pattern->regs_allocated = REGS_UNALLOCATED;
313 matched = re_search(pattern, line, line_len, 0, line_len, &search_regs) >= 0;
314 if (matched)
315 {
348 *sp = line + search_regs.start[0];
349 *ep = line + search_regs.end[0];
316 *sp++ = line + search_regs.start[0];
317 *ep++ = line + search_regs.end[0];
350 }
351 }
352#endif
353#if HAVE_POSIX_REGCOMP
354 {
318 }
319 }
320#endif
321#if HAVE_POSIX_REGCOMP
322 {
355 regmatch_t rm;
323 #define RM_COUNT (NUM_SEARCH_COLORS+2)
324 regmatch_t rm[RM_COUNT];
356 int flags = (notbol) ? REG_NOTBOL : 0;
357#ifdef REG_STARTEND
358 flags |= REG_STARTEND;
325 int flags = (notbol) ? REG_NOTBOL : 0;
326#ifdef REG_STARTEND
327 flags |= REG_STARTEND;
359 rm.rm_so = 0;
360 rm.rm_eo = line_len;
328 rm[0].rm_so = 0;
329 rm[0].rm_eo = line_len;
361#endif
330#endif
362 matched = !regexec(pattern, line, 1, &rm, flags);
331 matched = !regexec(pattern, line, RM_COUNT, rm, flags);
363 if (matched)
364 {
332 if (matched)
333 {
334 int i;
335 int ecount;
336 for (ecount = RM_COUNT; ecount > 0; ecount--)
337 if (rm[ecount-1].rm_so >= 0)
338 break;
339 if (ecount >= nsp)
340 ecount = nsp-1;
341 for (i = 0; i < ecount; i++)
342 {
343 if (rm[i].rm_so < 0)
344 {
345 *sp++ = *ep++ = line;
346 } else
347 {
365#ifndef __WATCOMC__
348#ifndef __WATCOMC__
366 *sp = line + rm.rm_so;
367 *ep = line + rm.rm_eo;
349 *sp++ = line + rm[i].rm_so;
350 *ep++ = line + rm[i].rm_eo;
368#else
351#else
369 *sp = rm.rm_sp;
370 *ep = rm.rm_ep;
352 *sp++ = rm[i].rm_sp;
353 *ep++ = rm[i].rm_ep;
371#endif
354#endif
355 }
356 }
372 }
373 }
374#endif
375#if HAVE_PCRE
376 {
357 }
358 }
359#endif
360#if HAVE_PCRE
361 {
362 #define OVECTOR_COUNT ((3*NUM_SEARCH_COLORS)+3)
363 int ovector[OVECTOR_COUNT];
377 int flags = (notbol) ? PCRE_NOTBOL : 0;
364 int flags = (notbol) ? PCRE_NOTBOL : 0;
378 int ovector[3];
379 matched = pcre_exec(pattern, NULL, line, line_len,
380 0, flags, ovector, 3) >= 0;
381 if (matched)
365 int i;
366 int ecount;
367 int mcount = pcre_exec(pattern, NULL, line, line_len,
368 0, flags, ovector, OVECTOR_COUNT);
369 matched = (mcount > 0);
370 ecount = nsp-1;
371 if (ecount > mcount) ecount = mcount;
372 for (i = 0; i < ecount*2; )
382 {
373 {
383 *sp = line + ovector[0];
384 *ep = line + ovector[1];
374 if (ovector[i] < 0 || ovector[i+1] < 0)
375 {
376 *sp++ = *ep++ = line;
377 i += 2;
378 } else
379 {
380 *sp++ = line + ovector[i++];
381 *ep++ = line + ovector[i++];
382 }
385 }
386 }
387#endif
388#if HAVE_PCRE2
389 {
390 int flags = (notbol) ? PCRE2_NOTBOL : 0;
383 }
384 }
385#endif
386#if HAVE_PCRE2
387 {
388 int flags = (notbol) ? PCRE2_NOTBOL : 0;
391 pcre2_match_data *md = pcre2_match_data_create(3, NULL);
392 matched = pcre2_match(pattern, (PCRE2_SPTR)line, line_len,
393 0, flags, md, NULL) >= 0;
389 pcre2_match_data *md = pcre2_match_data_create(nsp-1, NULL);
390 int mcount = pcre2_match(pattern, (PCRE2_SPTR)line, line_len,
391 0, flags, md, NULL);
392 matched = (mcount > 0);
394 if (matched)
395 {
396 PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md);
393 if (matched)
394 {
395 PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md);
397 *sp = line + ovector[0];
398 *ep = line + ovector[1];
396 int i;
397 int ecount = nsp-1;
398 if (ecount > mcount) ecount = mcount;
399 for (i = 0; i < ecount*2; )
400 {
401 if (ovector[i] < 0 || ovector[i+1] < 0)
402 {
403 *sp++ = *ep++ = line;
404 i += 2;
405 } else
406 {
407 *sp++ = line + ovector[i++];
408 *ep++ = line + ovector[i++];
409 }
410 }
399 }
400 pcre2_match_data_free(md);
401 }
402#endif
403#if HAVE_RE_COMP
404 matched = (re_exec(line) == 1);
405 /*
406 * re_exec doesn't seem to provide a way to get the matched string.
407 */
411 }
412 pcre2_match_data_free(md);
413 }
414#endif
415#if HAVE_RE_COMP
416 matched = (re_exec(line) == 1);
417 /*
418 * re_exec doesn't seem to provide a way to get the matched string.
419 */
408 *sp = *ep = NULL;
409#endif
410#if HAVE_REGCMP
420#endif
421#if HAVE_REGCMP
411 *ep = regex(pattern, line);
412 matched = (*ep != NULL);
422 matched = ((*ep++ = regex(pattern, line)) != NULL);
413 if (matched)
423 if (matched)
414 *sp = __loc1;
424 *sp++ = __loc1;
415#endif
416#if HAVE_V8_REGCOMP
417#if HAVE_REGEXEC2
418 matched = regexec2(pattern, line, notbol);
419#else
420 matched = regexec(pattern, line);
421#endif
422 if (matched)
423 {
425#endif
426#if HAVE_V8_REGCOMP
427#if HAVE_REGEXEC2
428 matched = regexec2(pattern, line, notbol);
429#else
430 matched = regexec(pattern, line);
431#endif
432 if (matched)
433 {
424 *sp = pattern->startp[0];
425 *ep = pattern->endp[0];
434 *sp++ = pattern->startp[0];
435 *ep++ = pattern->endp[0];
426 }
427#endif
428 }
436 }
437#endif
438 }
439 *sp = *ep = NULL;
429 matched = (!(search_type & SRCH_NO_MATCH) && matched) ||
430 ((search_type & SRCH_NO_MATCH) && !matched);
431 return (matched);
432}
433
440 matched = (!(search_type & SRCH_NO_MATCH) && matched) ||
441 ((search_type & SRCH_NO_MATCH) && !matched);
442 return (matched);
443}
444
445public int match_pattern(PATTERN_TYPE pattern, char *tpattern, char *line, int line_len, char **sp, char **ep, int nsp, int notbol, int search_type)
446{
447 int matched = match_pattern1(pattern, tpattern, line, line_len, sp, ep, nsp, notbol, search_type);
448 int i;
449 for (i = 1; i <= NUM_SEARCH_COLORS; i++)
450 {
451 if ((search_type & SRCH_SUBSEARCH(i)) && ep[i] == sp[i])
452 matched = 0;
453 }
454 return matched;
455}
456
434/*
435 * Return the name of the pattern matching library.
436 */
457/*
458 * Return the name of the pattern matching library.
459 */
437 public char *
438pattern_lib_name(VOID_PARAM)
460public char * pattern_lib_name(void)
439{
440#if HAVE_GNU_REGEX
441 return ("GNU");
442#else
443#if HAVE_POSIX_REGCOMP
444 return ("POSIX");
445#else
446#if HAVE_PCRE2

--- 23 unchanged lines hidden ---
461{
462#if HAVE_GNU_REGEX
463 return ("GNU");
464#else
465#if HAVE_POSIX_REGCOMP
466 return ("POSIX");
467#else
468#if HAVE_PCRE2

--- 23 unchanged lines hidden ---