1 /* 2 * Copyright (C) 1984-2024 Mark Nudelman 3 * 4 * You may distribute under the terms of either the GNU General Public 5 * License or the Less License, as specified in the README file. 6 * 7 * For more information, see the README file. 8 */ 9 10 /* 11 * Routines to do pattern matching. 12 */ 13 14 #include "less.h" 15 16 extern int caseless; 17 extern int is_caseless; 18 extern int utf_mode; 19 20 /* 21 * Compile a search pattern, for future use by match_pattern. 22 */ 23 static int compile_pattern2(constant char *pattern, int search_type, PATTERN_TYPE *comp_pattern, int show_error) 24 { 25 if (search_type & SRCH_NO_REGEX) 26 return (0); 27 { 28 #if HAVE_GNU_REGEX 29 struct re_pattern_buffer *comp = (struct re_pattern_buffer *) 30 ecalloc(1, sizeof(struct re_pattern_buffer)); 31 re_set_syntax(RE_SYNTAX_POSIX_EXTENDED); 32 if (re_compile_pattern(pattern, strlen(pattern), comp)) 33 { 34 free(comp); 35 if (show_error) 36 error("Invalid pattern", NULL_PARG); 37 return (-1); 38 } 39 if (*comp_pattern != NULL) 40 { 41 regfree(*comp_pattern); 42 free(*comp_pattern); 43 } 44 *comp_pattern = comp; 45 #endif 46 #if HAVE_POSIX_REGCOMP 47 regex_t *comp = (regex_t *) ecalloc(1, sizeof(regex_t)); 48 if (regcomp(comp, pattern, REGCOMP_FLAG | (is_caseless ? REG_ICASE : 0))) 49 { 50 free(comp); 51 if (show_error) 52 error("Invalid pattern", NULL_PARG); 53 return (-1); 54 } 55 if (*comp_pattern != NULL) 56 { 57 regfree(*comp_pattern); 58 free(*comp_pattern); 59 } 60 *comp_pattern = comp; 61 #endif 62 #if HAVE_PCRE 63 constant char *errstring; 64 int erroffset; 65 PARG parg; 66 pcre *comp = pcre_compile(pattern, 67 ((utf_mode) ? PCRE_UTF8 | PCRE_NO_UTF8_CHECK : 0) | 68 (is_caseless ? PCRE_CASELESS : 0), 69 &errstring, &erroffset, NULL); 70 if (comp == NULL) 71 { 72 parg.p_string = (char *) errstring; 73 if (show_error) 74 error("%s", &parg); 75 return (-1); 76 } 77 *comp_pattern = comp; 78 #endif 79 #if HAVE_PCRE2 80 int errcode; 81 PCRE2_SIZE erroffset; 82 PARG parg; 83 pcre2_code *comp = pcre2_compile((PCRE2_SPTR)pattern, strlen(pattern), 84 (is_caseless ? PCRE2_CASELESS : 0), 85 &errcode, &erroffset, NULL); 86 if (comp == NULL) 87 { 88 if (show_error) 89 { 90 char msg[160]; 91 pcre2_get_error_message(errcode, (PCRE2_UCHAR*)msg, sizeof(msg)); 92 parg.p_string = msg; 93 error("%s", &parg); 94 } 95 return (-1); 96 } 97 *comp_pattern = comp; 98 #endif 99 #if HAVE_RE_COMP 100 PARG parg; 101 if ((parg.p_string = re_comp(pattern)) != NULL) 102 { 103 if (show_error) 104 error("%s", &parg); 105 return (-1); 106 } 107 *comp_pattern = 1; 108 #endif 109 #if HAVE_REGCMP 110 char *comp; 111 if ((comp = regcmp(pattern, 0)) == NULL) 112 { 113 if (show_error) 114 error("Invalid pattern", NULL_PARG); 115 return (-1); 116 } 117 if (comp_pattern != NULL) 118 free(*comp_pattern); 119 *comp_pattern = comp; 120 #endif 121 #if HAVE_V8_REGCOMP 122 struct regexp *comp; 123 reg_show_error = show_error; 124 comp = regcomp(pattern); 125 reg_show_error = 1; 126 if (comp == NULL) 127 { 128 /* 129 * regcomp has already printed an error message 130 * via regerror(). 131 */ 132 return (-1); 133 } 134 if (*comp_pattern != NULL) 135 free(*comp_pattern); 136 *comp_pattern = comp; 137 #endif 138 } 139 return (0); 140 } 141 142 /* 143 * Like compile_pattern2, but convert the pattern to lowercase if necessary. 144 */ 145 public int compile_pattern(constant char *pattern, int search_type, int show_error, PATTERN_TYPE *comp_pattern) 146 { 147 int result; 148 149 if (caseless != OPT_ONPLUS || (re_handles_caseless && !(search_type & SRCH_NO_REGEX))) 150 { 151 result = compile_pattern2(pattern, search_type, comp_pattern, show_error); 152 } else 153 { 154 char *cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC)); 155 cvt_text(cvt_pattern, pattern, NULL, NULL, CVT_TO_LC); 156 result = compile_pattern2(cvt_pattern, search_type, comp_pattern, show_error); 157 free(cvt_pattern); 158 } 159 return (result); 160 } 161 162 /* 163 * Forget that we have a compiled pattern. 164 */ 165 public void uncompile_pattern(PATTERN_TYPE *pattern) 166 { 167 #if HAVE_GNU_REGEX 168 if (*pattern != NULL) 169 { 170 regfree(*pattern); 171 free(*pattern); 172 } 173 *pattern = NULL; 174 #endif 175 #if HAVE_POSIX_REGCOMP 176 if (*pattern != NULL) 177 { 178 regfree(*pattern); 179 free(*pattern); 180 } 181 *pattern = NULL; 182 #endif 183 #if HAVE_PCRE 184 if (*pattern != NULL) 185 pcre_free(*pattern); 186 *pattern = NULL; 187 #endif 188 #if HAVE_PCRE2 189 if (*pattern != NULL) 190 pcre2_code_free(*pattern); 191 *pattern = NULL; 192 #endif 193 #if HAVE_RE_COMP 194 *pattern = 0; 195 #endif 196 #if HAVE_REGCMP 197 if (*pattern != NULL) 198 free(*pattern); 199 *pattern = NULL; 200 #endif 201 #if HAVE_V8_REGCOMP 202 if (*pattern != NULL) 203 free(*pattern); 204 *pattern = NULL; 205 #endif 206 } 207 208 #if 0 209 /* 210 * Can a pattern be successfully compiled? 211 */ 212 public int valid_pattern(char *pattern) 213 { 214 PATTERN_TYPE comp_pattern; 215 int result; 216 217 SET_NULL_PATTERN(comp_pattern); 218 result = compile_pattern2(pattern, 0, &comp_pattern, 0); 219 if (result != 0) 220 return (0); 221 uncompile_pattern(&comp_pattern); 222 return (1); 223 } 224 #endif 225 226 /* 227 * Is a compiled pattern null? 228 */ 229 public lbool is_null_pattern(PATTERN_TYPE pattern) 230 { 231 #if HAVE_GNU_REGEX 232 return (pattern == NULL); 233 #endif 234 #if HAVE_POSIX_REGCOMP 235 return (pattern == NULL); 236 #endif 237 #if HAVE_PCRE 238 return (pattern == NULL); 239 #endif 240 #if HAVE_PCRE2 241 return (pattern == NULL); 242 #endif 243 #if HAVE_RE_COMP 244 return (pattern == 0); 245 #endif 246 #if HAVE_REGCMP 247 return (pattern == NULL); 248 #endif 249 #if HAVE_V8_REGCOMP 250 return (pattern == NULL); 251 #endif 252 #if NO_REGEX 253 return (pattern == NULL); 254 #endif 255 } 256 /* 257 * Simple pattern matching function. 258 * It supports no metacharacters like *, etc. 259 */ 260 static int match(constant char *pattern, size_t pattern_len, constant char *buf, int buf_len, constant char ***sp, constant char ***ep, int nsubs) 261 { 262 constant char *pp; 263 constant char *lp; 264 constant char *pattern_end = pattern + pattern_len; 265 constant char *buf_end = buf + buf_len; 266 267 (void) nsubs; 268 for ( ; buf < buf_end; buf++) 269 { 270 for (pp = pattern, lp = buf; ; pp++, lp++) 271 { 272 char cp = *pp; 273 char cl = *lp; 274 if (caseless == OPT_ONPLUS && ASCII_IS_UPPER(cp)) 275 cp = ASCII_TO_LOWER(cp); 276 if (cp != cl) 277 break; 278 if (pp == pattern_end || lp == buf_end) 279 break; 280 } 281 if (pp == pattern_end) 282 { 283 *(*sp)++ = buf; 284 *(*ep)++ = lp; 285 return (1); 286 } 287 } 288 **sp = **ep = NULL; 289 return (0); 290 } 291 292 /* 293 * Perform a pattern match with the previously compiled pattern. 294 * Set sp[0] and ep[0] to the start and end of the matched string. 295 * Set sp[i] and ep[i] to the start and end of the i-th matched subpattern. 296 * Subpatterns are defined by parentheses in the regex language. 297 */ 298 static int match_pattern1(PATTERN_TYPE pattern, constant char *tpattern, constant char *line, size_t aline_len, constant char **sp, constant char **ep, int nsp, int notbol, int search_type) 299 { 300 int matched; 301 int line_len = (int) aline_len; /*{{type-issue}}*/ 302 303 #if NO_REGEX 304 search_type |= SRCH_NO_REGEX; 305 #endif 306 if (search_type & SRCH_NO_REGEX) 307 matched = match(tpattern, strlen(tpattern), line, line_len, &sp, &ep, nsp); 308 else 309 { 310 #if HAVE_GNU_REGEX 311 { 312 struct re_registers search_regs; 313 pattern->not_bol = notbol; 314 pattern->regs_allocated = REGS_UNALLOCATED; 315 matched = re_search(pattern, line, line_len, 0, line_len, &search_regs) >= 0; 316 if (matched) 317 { 318 *sp++ = line + search_regs.start[0]; 319 *ep++ = line + search_regs.end[0]; 320 } 321 } 322 #endif 323 #if HAVE_POSIX_REGCOMP 324 { 325 #define RM_COUNT (NUM_SEARCH_COLORS+2) 326 regmatch_t rm[RM_COUNT]; 327 int flags = (notbol) ? REG_NOTBOL : 0; 328 #ifdef REG_STARTEND 329 flags |= REG_STARTEND; 330 rm[0].rm_so = 0; 331 rm[0].rm_eo = line_len; 332 #endif 333 matched = !regexec(pattern, line, RM_COUNT, rm, flags); 334 if (matched) 335 { 336 int i; 337 int ecount; 338 for (ecount = RM_COUNT; ecount > 0; ecount--) 339 if (rm[ecount-1].rm_so >= 0) 340 break; 341 if (ecount >= nsp) 342 ecount = nsp-1; 343 for (i = 0; i < ecount; i++) 344 { 345 if (rm[i].rm_so < 0) 346 { 347 *sp++ = *ep++ = line; 348 } else 349 { 350 #ifndef __WATCOMC__ 351 *sp++ = line + rm[i].rm_so; 352 *ep++ = line + rm[i].rm_eo; 353 #else 354 *sp++ = rm[i].rm_sp; 355 *ep++ = rm[i].rm_ep; 356 #endif 357 } 358 } 359 } 360 } 361 #endif 362 #if HAVE_PCRE 363 { 364 #define OVECTOR_COUNT ((3*NUM_SEARCH_COLORS)+3) 365 int ovector[OVECTOR_COUNT]; 366 int flags = (notbol) ? PCRE_NOTBOL : 0; 367 int i; 368 int ecount; 369 int mcount = pcre_exec(pattern, NULL, line, line_len, 370 0, flags, ovector, OVECTOR_COUNT); 371 matched = (mcount > 0); 372 ecount = nsp-1; 373 if (ecount > mcount) ecount = mcount; 374 for (i = 0; i < ecount*2; ) 375 { 376 if (ovector[i] < 0 || ovector[i+1] < 0) 377 { 378 *sp++ = *ep++ = line; 379 i += 2; 380 } else 381 { 382 *sp++ = line + ovector[i++]; 383 *ep++ = line + ovector[i++]; 384 } 385 } 386 } 387 #endif 388 #if HAVE_PCRE2 389 { 390 int flags = (notbol) ? PCRE2_NOTBOL : 0; 391 pcre2_match_data *md = pcre2_match_data_create(nsp-1, NULL); 392 int mcount = pcre2_match(pattern, (PCRE2_SPTR)line, line_len, 393 0, flags, md, NULL); 394 matched = (mcount > 0); 395 if (matched) 396 { 397 PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md); 398 int i; 399 int ecount = nsp-1; 400 if (ecount > mcount) ecount = mcount; 401 for (i = 0; i < ecount*2; ) 402 { 403 if (ovector[i] < 0 || ovector[i+1] < 0) 404 { 405 *sp++ = *ep++ = line; 406 i += 2; 407 } else 408 { 409 *sp++ = line + ovector[i++]; 410 *ep++ = line + ovector[i++]; 411 } 412 } 413 } 414 pcre2_match_data_free(md); 415 } 416 #endif 417 #if HAVE_RE_COMP 418 matched = (re_exec(line) == 1); 419 /* 420 * re_exec doesn't seem to provide a way to get the matched string. 421 */ 422 #endif 423 #if HAVE_REGCMP 424 matched = ((*ep++ = regex(pattern, line)) != NULL); 425 if (matched) 426 *sp++ = __loc1; 427 #endif 428 #if HAVE_V8_REGCOMP 429 #if HAVE_REGEXEC2 430 matched = regexec2(pattern, line, notbol); 431 #else 432 matched = regexec(pattern, line); 433 #endif 434 if (matched) 435 { 436 *sp++ = pattern->startp[0]; 437 *ep++ = pattern->endp[0]; 438 } 439 #endif 440 } 441 *sp = *ep = NULL; 442 matched = (!(search_type & SRCH_NO_MATCH) && matched) || 443 ((search_type & SRCH_NO_MATCH) && !matched); 444 return (matched); 445 } 446 447 public int match_pattern(PATTERN_TYPE pattern, constant char *tpattern, constant char *line, size_t line_len, constant char **sp, constant char **ep, int nsp, int notbol, int search_type) 448 { 449 int matched = match_pattern1(pattern, tpattern, line, line_len, sp, ep, nsp, notbol, search_type); 450 int i; 451 for (i = 1; i <= NUM_SEARCH_COLORS; i++) 452 { 453 if ((search_type & SRCH_SUBSEARCH(i)) && ep[i] == sp[i]) 454 matched = 0; 455 } 456 return matched; 457 } 458 459 /* 460 * Return the name of the pattern matching library. 461 */ 462 public constant char * pattern_lib_name(void) 463 { 464 #if HAVE_GNU_REGEX 465 return ("GNU"); 466 #else 467 #if HAVE_POSIX_REGCOMP 468 return ("POSIX"); 469 #else 470 #if HAVE_PCRE2 471 return ("PCRE2"); 472 #else 473 #if HAVE_PCRE 474 return ("PCRE"); 475 #else 476 #if HAVE_RE_COMP 477 return ("BSD"); 478 #else 479 #if HAVE_REGCMP 480 return ("V8"); 481 #else 482 #if HAVE_V8_REGCOMP 483 return ("Spencer V8"); 484 #else 485 return ("no"); 486 #endif 487 #endif 488 #endif 489 #endif 490 #endif 491 #endif 492 #endif 493 } 494