1 /* 2 * Copyright (C) 1984-2025 Mark Nudelman 3 * 4 * You may distribute under the terms of either the GNU General Public 5 * License or the Less License, as specified in the README file. 6 * 7 * For more information, see the README file. 8 */ 9 10 /* 11 * Routines to do pattern matching. 12 */ 13 14 #include "less.h" 15 16 extern int caseless; 17 extern int is_caseless; 18 extern int utf_mode; 19 20 /* 21 * Compile a search pattern, for future use by match_pattern. 22 */ 23 static int compile_pattern2(constant char *pattern, int search_type, PATTERN_TYPE *comp_pattern, int show_error) 24 { 25 if (search_type & SRCH_NO_REGEX) 26 return (0); 27 { 28 #if HAVE_GNU_REGEX 29 struct re_pattern_buffer *comp = (struct re_pattern_buffer *) 30 ecalloc(1, sizeof(struct re_pattern_buffer)); 31 re_set_syntax(RE_SYNTAX_POSIX_EXTENDED); 32 if (re_compile_pattern(pattern, strlen(pattern), comp)) 33 { 34 free(comp); 35 if (show_error) 36 error("Invalid pattern", NULL_PARG); 37 return (-1); 38 } 39 if (*comp_pattern != NULL) 40 { 41 regfree(*comp_pattern); 42 free(*comp_pattern); 43 } 44 *comp_pattern = comp; 45 #endif 46 #if HAVE_POSIX_REGCOMP 47 regex_t *comp = (regex_t *) ecalloc(1, sizeof(regex_t)); 48 if (regcomp(comp, pattern, REGCOMP_FLAG | (is_caseless ? REG_ICASE : 0))) 49 { 50 free(comp); 51 if (show_error) 52 error("Invalid pattern", NULL_PARG); 53 return (-1); 54 } 55 if (*comp_pattern != NULL) 56 { 57 regfree(*comp_pattern); 58 free(*comp_pattern); 59 } 60 *comp_pattern = comp; 61 #endif 62 #if HAVE_PCRE 63 constant char *errstring; 64 int erroffset; 65 PARG parg; 66 pcre *comp = pcre_compile(pattern, 67 ((utf_mode) ? PCRE_UTF8 | PCRE_NO_UTF8_CHECK : 0) | 68 (is_caseless ? PCRE_CASELESS : 0), 69 &errstring, &erroffset, NULL); 70 if (comp == NULL) 71 { 72 parg.p_string = (char *) errstring; 73 if (show_error) 74 error("%s", &parg); 75 return (-1); 76 } 77 *comp_pattern = comp; 78 #endif 79 #if HAVE_PCRE2 80 int errcode; 81 PCRE2_SIZE erroffset; 82 PARG parg; 83 pcre2_code *comp = pcre2_compile((PCRE2_SPTR)pattern, strlen(pattern), 84 ((utf_mode) ? PCRE2_UTF | PCRE2_NO_UTF_CHECK : 0) | 85 (is_caseless ? PCRE2_CASELESS : 0), 86 &errcode, &erroffset, NULL); 87 if (comp == NULL) 88 { 89 if (show_error) 90 { 91 char msg[160]; 92 pcre2_get_error_message(errcode, (PCRE2_UCHAR*)msg, sizeof(msg)); 93 parg.p_string = msg; 94 error("%s", &parg); 95 } 96 return (-1); 97 } 98 *comp_pattern = comp; 99 #endif 100 #if HAVE_RE_COMP 101 PARG parg; 102 if ((parg.p_string = re_comp(pattern)) != NULL) 103 { 104 if (show_error) 105 error("%s", &parg); 106 return (-1); 107 } 108 *comp_pattern = 1; 109 #endif 110 #if HAVE_REGCMP 111 char *comp; 112 if ((comp = regcmp(pattern, 0)) == NULL) 113 { 114 if (show_error) 115 error("Invalid pattern", NULL_PARG); 116 return (-1); 117 } 118 if (comp_pattern != NULL) 119 free(*comp_pattern); 120 *comp_pattern = comp; 121 #endif 122 #if HAVE_V8_REGCOMP 123 struct regexp *comp; 124 reg_show_error = show_error; 125 comp = regcomp(pattern); 126 reg_show_error = 1; 127 if (comp == NULL) 128 { 129 /* 130 * regcomp has already printed an error message 131 * via regerror(). 132 */ 133 return (-1); 134 } 135 if (*comp_pattern != NULL) 136 free(*comp_pattern); 137 *comp_pattern = comp; 138 #endif 139 } 140 return (0); 141 } 142 143 /* 144 * Like compile_pattern2, but convert the pattern to lowercase if necessary. 145 */ 146 public int compile_pattern(constant char *pattern, int search_type, int show_error, PATTERN_TYPE *comp_pattern) 147 { 148 int result; 149 150 if (caseless != OPT_ONPLUS || (re_handles_caseless && !(search_type & SRCH_NO_REGEX))) 151 { 152 result = compile_pattern2(pattern, search_type, comp_pattern, show_error); 153 } else 154 { 155 char *cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC)); 156 cvt_text(cvt_pattern, pattern, NULL, NULL, CVT_TO_LC); 157 result = compile_pattern2(cvt_pattern, search_type, comp_pattern, show_error); 158 free(cvt_pattern); 159 } 160 return (result); 161 } 162 163 /* 164 * Forget that we have a compiled pattern. 165 */ 166 public void uncompile_pattern(PATTERN_TYPE *pattern) 167 { 168 #if HAVE_GNU_REGEX 169 if (*pattern != NULL) 170 { 171 regfree(*pattern); 172 free(*pattern); 173 } 174 *pattern = NULL; 175 #endif 176 #if HAVE_POSIX_REGCOMP 177 if (*pattern != NULL) 178 { 179 regfree(*pattern); 180 free(*pattern); 181 } 182 *pattern = NULL; 183 #endif 184 #if HAVE_PCRE 185 if (*pattern != NULL) 186 pcre_free(*pattern); 187 *pattern = NULL; 188 #endif 189 #if HAVE_PCRE2 190 if (*pattern != NULL) 191 pcre2_code_free(*pattern); 192 *pattern = NULL; 193 #endif 194 #if HAVE_RE_COMP 195 *pattern = 0; 196 #endif 197 #if HAVE_REGCMP 198 if (*pattern != NULL) 199 free(*pattern); 200 *pattern = NULL; 201 #endif 202 #if HAVE_V8_REGCOMP 203 if (*pattern != NULL) 204 free(*pattern); 205 *pattern = NULL; 206 #endif 207 } 208 209 #if 0 210 /* 211 * Can a pattern be successfully compiled? 212 */ 213 public int valid_pattern(char *pattern) 214 { 215 PATTERN_TYPE comp_pattern; 216 int result; 217 218 SET_NULL_PATTERN(comp_pattern); 219 result = compile_pattern2(pattern, 0, &comp_pattern, 0); 220 if (result != 0) 221 return (0); 222 uncompile_pattern(&comp_pattern); 223 return (1); 224 } 225 #endif 226 227 /* 228 * Is a compiled pattern null? 229 */ 230 public lbool is_null_pattern(PATTERN_TYPE pattern) 231 { 232 #if HAVE_GNU_REGEX 233 return (pattern == NULL); 234 #endif 235 #if HAVE_POSIX_REGCOMP 236 return (pattern == NULL); 237 #endif 238 #if HAVE_PCRE 239 return (pattern == NULL); 240 #endif 241 #if HAVE_PCRE2 242 return (pattern == NULL); 243 #endif 244 #if HAVE_RE_COMP 245 return (pattern == 0); 246 #endif 247 #if HAVE_REGCMP 248 return (pattern == NULL); 249 #endif 250 #if HAVE_V8_REGCOMP 251 return (pattern == NULL); 252 #endif 253 #if NO_REGEX 254 return (pattern == NULL); 255 #endif 256 } 257 /* 258 * Simple pattern matching function. 259 * It supports no metacharacters like *, etc. 260 */ 261 static int match(constant char *pattern, size_t pattern_len, constant char *buf, int buf_len, constant char ***sp, constant char ***ep, int nsubs) 262 { 263 constant char *pp; 264 constant char *lp; 265 constant char *pattern_end = pattern + pattern_len; 266 constant char *buf_end = buf + buf_len; 267 268 (void) nsubs; 269 for ( ; buf < buf_end; buf++) 270 { 271 for (pp = pattern, lp = buf; ; pp++, lp++) 272 { 273 char cp = *pp; 274 char cl = *lp; 275 if (caseless == OPT_ONPLUS && ASCII_IS_UPPER(cp)) 276 cp = ASCII_TO_LOWER(cp); 277 if (cp != cl) 278 break; 279 if (pp == pattern_end || lp == buf_end) 280 break; 281 } 282 if (pp == pattern_end) 283 { 284 *(*sp)++ = buf; 285 *(*ep)++ = lp; 286 return (1); 287 } 288 } 289 **sp = **ep = NULL; 290 return (0); 291 } 292 293 /* 294 * Perform a pattern match with the previously compiled pattern. 295 * Set sp[0] and ep[0] to the start and end of the matched string. 296 * Set sp[i] and ep[i] to the start and end of the i-th matched subpattern. 297 * Subpatterns are defined by parentheses in the regex language. 298 */ 299 static lbool match_pattern1(PATTERN_TYPE pattern, constant char *tpattern, constant char *line, size_t aline_len, size_t line_off, constant char **sp, constant char **ep, int nsp, int notbol, int search_type) 300 { 301 int matched; 302 int line_len = (int) aline_len; /*{{type-issue}}*/ 303 304 #if NO_REGEX 305 search_type |= SRCH_NO_REGEX; 306 #endif 307 if (search_type & SRCH_NO_REGEX) 308 matched = match(tpattern, strlen(tpattern), line + line_off, line_len - line_off, &sp, &ep, nsp); 309 else 310 { 311 #if HAVE_GNU_REGEX 312 { 313 struct re_registers search_regs; 314 pattern->not_bol = notbol; 315 pattern->regs_allocated = REGS_UNALLOCATED; 316 matched = re_search(pattern, line, line_len, line_off, line_len - line_off, &search_regs) >= 0; 317 if (matched) 318 { 319 *sp++ = line + search_regs.start[0]; 320 *ep++ = line + search_regs.end[0]; 321 } 322 } 323 #endif 324 #if HAVE_POSIX_REGCOMP 325 { 326 #define RM_COUNT (NUM_SEARCH_COLORS+2) 327 regmatch_t rm[RM_COUNT]; 328 int flags = (notbol) ? REG_NOTBOL : 0; 329 #ifdef REG_STARTEND 330 flags |= REG_STARTEND; 331 rm[0].rm_so = line_off; 332 rm[0].rm_eo = line_len; 333 #else 334 line += line_off; 335 #endif 336 matched = !regexec(pattern, line, RM_COUNT, rm, flags); 337 if (matched) 338 { 339 int i; 340 int ecount; 341 for (ecount = RM_COUNT; ecount > 0; ecount--) 342 if (rm[ecount-1].rm_so >= 0) 343 break; 344 if (ecount >= nsp) 345 ecount = nsp-1; 346 for (i = 0; i < ecount; i++) 347 { 348 if (rm[i].rm_so < 0) 349 { 350 *sp++ = *ep++ = line; 351 } else 352 { 353 #ifndef __WATCOMC__ 354 *sp++ = line + rm[i].rm_so; 355 *ep++ = line + rm[i].rm_eo; 356 #else 357 *sp++ = rm[i].rm_sp; 358 *ep++ = rm[i].rm_ep; 359 #endif 360 } 361 } 362 } 363 } 364 #endif 365 #if HAVE_PCRE 366 { 367 #define OVECTOR_COUNT ((3*NUM_SEARCH_COLORS)+3) 368 int ovector[OVECTOR_COUNT]; 369 int flags = (notbol) ? PCRE_NOTBOL : 0; 370 int i; 371 int ecount; 372 int mcount = pcre_exec(pattern, NULL, line, line_len, 373 line_off, flags, ovector, OVECTOR_COUNT); 374 matched = (mcount > 0); 375 ecount = nsp-1; 376 if (ecount > mcount) ecount = mcount; 377 for (i = 0; i < ecount*2; ) 378 { 379 if (ovector[i] < 0 || ovector[i+1] < 0) 380 { 381 *sp++ = *ep++ = line; 382 i += 2; 383 } else 384 { 385 *sp++ = line + ovector[i++]; 386 *ep++ = line + ovector[i++]; 387 } 388 } 389 } 390 #endif 391 #if HAVE_PCRE2 392 { 393 int flags = (notbol) ? PCRE2_NOTBOL : 0; 394 pcre2_match_data *md = pcre2_match_data_create(nsp-1, NULL); 395 int mcount = pcre2_match(pattern, (PCRE2_SPTR)line, line_len, 396 line_off, flags, md, NULL); 397 matched = (mcount > 0); 398 if (matched) 399 { 400 PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md); 401 int i; 402 int ecount = nsp-1; 403 if (ecount > mcount) ecount = mcount; 404 for (i = 0; i < ecount*2; ) 405 { 406 if (ovector[i] < 0 || ovector[i+1] < 0) 407 { 408 *sp++ = *ep++ = line; 409 i += 2; 410 } else 411 { 412 *sp++ = line + ovector[i++]; 413 *ep++ = line + ovector[i++]; 414 } 415 } 416 } 417 pcre2_match_data_free(md); 418 } 419 #endif 420 #if HAVE_RE_COMP 421 matched = (re_exec(line + line_off) == 1); 422 /* 423 * re_exec doesn't seem to provide a way to get the matched string. 424 */ 425 #endif 426 #if HAVE_REGCMP 427 matched = ((*ep++ = regex(pattern, line + line_off)) != NULL); 428 if (matched) 429 *sp++ = __loc1; 430 #endif 431 #if HAVE_V8_REGCOMP 432 #if HAVE_REGEXEC2 433 matched = regexec2(pattern, line + line_off, notbol); 434 #else 435 matched = regexec(pattern, line + line_off); 436 #endif 437 if (matched) 438 { 439 *sp++ = pattern->startp[0]; 440 *ep++ = pattern->endp[0]; 441 } 442 #endif 443 } 444 *sp = *ep = NULL; 445 matched = (!(search_type & SRCH_NO_MATCH) && matched) || 446 ((search_type & SRCH_NO_MATCH) && !matched); 447 return (matched != 0); 448 } 449 450 /* 451 * Return TRUE if the match satisfies all SUBSEARCH conditions. 452 */ 453 static lbool subsearch_ok(constant char **sp, constant char **ep, int search_type) 454 { 455 int i; 456 for (i = 1; i <= NUM_SEARCH_COLORS; i++) 457 { 458 if ((search_type & SRCH_SUBSEARCH(i)) && ep[i] == sp[i]) 459 return FALSE; 460 } 461 return TRUE; 462 } 463 464 public lbool match_pattern(PATTERN_TYPE pattern, constant char *tpattern, constant char *line, size_t line_len, size_t line_off, constant char **sp, constant char **ep, int nsp, int notbol, int search_type) 465 { 466 for (;;) 467 { 468 size_t mlen; 469 lbool matched = match_pattern1(pattern, tpattern, line, line_len, line_off, sp, ep, nsp, notbol, search_type); 470 if (!matched || subsearch_ok(sp, ep, search_type)) 471 return matched; 472 mlen = ep[0] - line; 473 line += mlen; 474 line_len -= mlen; 475 notbol = 1; 476 } 477 } 478 479 /* 480 * Return the name of the pattern matching library. 481 */ 482 public constant char * pattern_lib_name(void) 483 { 484 #if HAVE_GNU_REGEX 485 return ("GNU"); 486 #else 487 #if HAVE_POSIX_REGCOMP 488 return ("POSIX"); 489 #else 490 #if HAVE_PCRE2 491 return ("PCRE2"); 492 #else 493 #if HAVE_PCRE 494 return ("PCRE"); 495 #else 496 #if HAVE_RE_COMP 497 return ("BSD"); 498 #else 499 #if HAVE_REGCMP 500 return ("V8"); 501 #else 502 #if HAVE_V8_REGCOMP 503 return ("Spencer V8"); 504 #else 505 return ("no"); 506 #endif 507 #endif 508 #endif 509 #endif 510 #endif 511 #endif 512 #endif 513 } 514