1 /* 2 * Copyright (C) 1984-2023 Mark Nudelman 3 * 4 * You may distribute under the terms of either the GNU General Public 5 * License or the Less License, as specified in the README file. 6 * 7 * For more information, see the README file. 8 */ 9 10 /* 11 * Routines to do pattern matching. 12 */ 13 14 #include "less.h" 15 16 extern int caseless; 17 extern int is_caseless; 18 extern int utf_mode; 19 20 /* 21 * Compile a search pattern, for future use by match_pattern. 22 */ 23 static int compile_pattern2(char *pattern, int search_type, PATTERN_TYPE *comp_pattern, int show_error) 24 { 25 if (search_type & SRCH_NO_REGEX) 26 return (0); 27 { 28 #if HAVE_GNU_REGEX 29 struct re_pattern_buffer *comp = (struct re_pattern_buffer *) 30 ecalloc(1, sizeof(struct re_pattern_buffer)); 31 re_set_syntax(RE_SYNTAX_POSIX_EXTENDED); 32 if (re_compile_pattern(pattern, strlen(pattern), comp)) 33 { 34 free(comp); 35 if (show_error) 36 error("Invalid pattern", NULL_PARG); 37 return (-1); 38 } 39 if (*comp_pattern != NULL) 40 { 41 regfree(*comp_pattern); 42 free(*comp_pattern); 43 } 44 *comp_pattern = comp; 45 #endif 46 #if HAVE_POSIX_REGCOMP 47 regex_t *comp = (regex_t *) ecalloc(1, sizeof(regex_t)); 48 if (regcomp(comp, pattern, REGCOMP_FLAG | (is_caseless ? REG_ICASE : 0))) 49 { 50 free(comp); 51 if (show_error) 52 error("Invalid pattern", NULL_PARG); 53 return (-1); 54 } 55 if (*comp_pattern != NULL) 56 { 57 regfree(*comp_pattern); 58 free(*comp_pattern); 59 } 60 *comp_pattern = comp; 61 #endif 62 #if HAVE_PCRE 63 constant char *errstring; 64 int erroffset; 65 PARG parg; 66 pcre *comp = pcre_compile(pattern, 67 ((utf_mode) ? PCRE_UTF8 | PCRE_NO_UTF8_CHECK : 0) | 68 (is_caseless ? PCRE_CASELESS : 0), 69 &errstring, &erroffset, NULL); 70 if (comp == NULL) 71 { 72 parg.p_string = (char *) errstring; 73 if (show_error) 74 error("%s", &parg); 75 return (-1); 76 } 77 *comp_pattern = comp; 78 #endif 79 #if HAVE_PCRE2 80 int errcode; 81 PCRE2_SIZE erroffset; 82 PARG parg; 83 pcre2_code *comp = pcre2_compile((PCRE2_SPTR)pattern, strlen(pattern), 84 (is_caseless ? PCRE2_CASELESS : 0), 85 &errcode, &erroffset, NULL); 86 if (comp == NULL) 87 { 88 if (show_error) 89 { 90 char msg[160]; 91 pcre2_get_error_message(errcode, (PCRE2_UCHAR*)msg, sizeof(msg)); 92 parg.p_string = msg; 93 error("%s", &parg); 94 } 95 return (-1); 96 } 97 *comp_pattern = comp; 98 #endif 99 #if HAVE_RE_COMP 100 PARG parg; 101 if ((parg.p_string = re_comp(pattern)) != NULL) 102 { 103 if (show_error) 104 error("%s", &parg); 105 return (-1); 106 } 107 *comp_pattern = 1; 108 #endif 109 #if HAVE_REGCMP 110 char *comp; 111 if ((comp = regcmp(pattern, 0)) == NULL) 112 { 113 if (show_error) 114 error("Invalid pattern", NULL_PARG); 115 return (-1); 116 } 117 if (comp_pattern != NULL) 118 free(*comp_pattern); 119 *comp_pattern = comp; 120 #endif 121 #if HAVE_V8_REGCOMP 122 struct regexp *comp; 123 reg_show_error = show_error; 124 comp = regcomp(pattern); 125 reg_show_error = 1; 126 if (comp == NULL) 127 { 128 /* 129 * regcomp has already printed an error message 130 * via regerror(). 131 */ 132 return (-1); 133 } 134 if (*comp_pattern != NULL) 135 free(*comp_pattern); 136 *comp_pattern = comp; 137 #endif 138 } 139 return (0); 140 } 141 142 /* 143 * Like compile_pattern2, but convert the pattern to lowercase if necessary. 144 */ 145 public int compile_pattern(char *pattern, int search_type, int show_error, PATTERN_TYPE *comp_pattern) 146 { 147 char *cvt_pattern; 148 int result; 149 150 if (caseless != OPT_ONPLUS || (re_handles_caseless && !(search_type & SRCH_NO_REGEX))) 151 cvt_pattern = pattern; 152 else 153 { 154 cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC)); 155 cvt_text(cvt_pattern, pattern, (int *)NULL, (int *)NULL, CVT_TO_LC); 156 } 157 result = compile_pattern2(cvt_pattern, search_type, comp_pattern, show_error); 158 if (cvt_pattern != pattern) 159 free(cvt_pattern); 160 return (result); 161 } 162 163 /* 164 * Forget that we have a compiled pattern. 165 */ 166 public void uncompile_pattern(PATTERN_TYPE *pattern) 167 { 168 #if HAVE_GNU_REGEX 169 if (*pattern != NULL) 170 { 171 regfree(*pattern); 172 free(*pattern); 173 } 174 *pattern = NULL; 175 #endif 176 #if HAVE_POSIX_REGCOMP 177 if (*pattern != NULL) 178 { 179 regfree(*pattern); 180 free(*pattern); 181 } 182 *pattern = NULL; 183 #endif 184 #if HAVE_PCRE 185 if (*pattern != NULL) 186 pcre_free(*pattern); 187 *pattern = NULL; 188 #endif 189 #if HAVE_PCRE2 190 if (*pattern != NULL) 191 pcre2_code_free(*pattern); 192 *pattern = NULL; 193 #endif 194 #if HAVE_RE_COMP 195 *pattern = 0; 196 #endif 197 #if HAVE_REGCMP 198 if (*pattern != NULL) 199 free(*pattern); 200 *pattern = NULL; 201 #endif 202 #if HAVE_V8_REGCOMP 203 if (*pattern != NULL) 204 free(*pattern); 205 *pattern = NULL; 206 #endif 207 } 208 209 #if 0 210 /* 211 * Can a pattern be successfully compiled? 212 */ 213 public int valid_pattern(char *pattern) 214 { 215 PATTERN_TYPE comp_pattern; 216 int result; 217 218 SET_NULL_PATTERN(comp_pattern); 219 result = compile_pattern2(pattern, 0, &comp_pattern, 0); 220 if (result != 0) 221 return (0); 222 uncompile_pattern(&comp_pattern); 223 return (1); 224 } 225 #endif 226 227 /* 228 * Is a compiled pattern null? 229 */ 230 public int is_null_pattern(PATTERN_TYPE pattern) 231 { 232 #if HAVE_GNU_REGEX 233 return (pattern == NULL); 234 #endif 235 #if HAVE_POSIX_REGCOMP 236 return (pattern == NULL); 237 #endif 238 #if HAVE_PCRE 239 return (pattern == NULL); 240 #endif 241 #if HAVE_PCRE2 242 return (pattern == NULL); 243 #endif 244 #if HAVE_RE_COMP 245 return (pattern == 0); 246 #endif 247 #if HAVE_REGCMP 248 return (pattern == NULL); 249 #endif 250 #if HAVE_V8_REGCOMP 251 return (pattern == NULL); 252 #endif 253 #if NO_REGEX 254 return (pattern == NULL); 255 #endif 256 } 257 /* 258 * Simple pattern matching function. 259 * It supports no metacharacters like *, etc. 260 */ 261 static int match(char *pattern, int pattern_len, char *buf, int buf_len, char ***sp, char ***ep, int nsubs) 262 { 263 char *pp, *lp; 264 char *pattern_end = pattern + pattern_len; 265 char *buf_end = buf + buf_len; 266 267 for ( ; buf < buf_end; buf++) 268 { 269 for (pp = pattern, lp = buf; ; pp++, lp++) 270 { 271 char cp = *pp; 272 char cl = *lp; 273 if (caseless == OPT_ONPLUS && ASCII_IS_UPPER(cp)) 274 cp = ASCII_TO_LOWER(cp); 275 if (cp != cl) 276 break; 277 if (pp == pattern_end || lp == buf_end) 278 break; 279 } 280 if (pp == pattern_end) 281 { 282 *(*sp)++ = buf; 283 *(*ep)++ = lp; 284 return (1); 285 } 286 } 287 **sp = **ep = NULL; 288 return (0); 289 } 290 291 /* 292 * Perform a pattern match with the previously compiled pattern. 293 * Set sp[0] and ep[0] to the start and end of the matched string. 294 * Set sp[i] and ep[i] to the start and end of the i-th matched subpattern. 295 * Subpatterns are defined by parentheses in the regex language. 296 */ 297 static int match_pattern1(PATTERN_TYPE pattern, char *tpattern, char *line, int line_len, char **sp, char **ep, int nsp, int notbol, int search_type) 298 { 299 int matched; 300 301 #if NO_REGEX 302 search_type |= SRCH_NO_REGEX; 303 #endif 304 if (search_type & SRCH_NO_REGEX) 305 matched = match(tpattern, strlen(tpattern), line, line_len, &sp, &ep, nsp); 306 else 307 { 308 #if HAVE_GNU_REGEX 309 { 310 struct re_registers search_regs; 311 pattern->not_bol = notbol; 312 pattern->regs_allocated = REGS_UNALLOCATED; 313 matched = re_search(pattern, line, line_len, 0, line_len, &search_regs) >= 0; 314 if (matched) 315 { 316 *sp++ = line + search_regs.start[0]; 317 *ep++ = line + search_regs.end[0]; 318 } 319 } 320 #endif 321 #if HAVE_POSIX_REGCOMP 322 { 323 #define RM_COUNT (NUM_SEARCH_COLORS+2) 324 regmatch_t rm[RM_COUNT]; 325 int flags = (notbol) ? REG_NOTBOL : 0; 326 #ifdef REG_STARTEND 327 flags |= REG_STARTEND; 328 rm[0].rm_so = 0; 329 rm[0].rm_eo = line_len; 330 #endif 331 matched = !regexec(pattern, line, RM_COUNT, rm, flags); 332 if (matched) 333 { 334 int i; 335 int ecount; 336 for (ecount = RM_COUNT; ecount > 0; ecount--) 337 if (rm[ecount-1].rm_so >= 0) 338 break; 339 if (ecount >= nsp) 340 ecount = nsp-1; 341 for (i = 0; i < ecount; i++) 342 { 343 if (rm[i].rm_so < 0) 344 { 345 *sp++ = *ep++ = line; 346 } else 347 { 348 #ifndef __WATCOMC__ 349 *sp++ = line + rm[i].rm_so; 350 *ep++ = line + rm[i].rm_eo; 351 #else 352 *sp++ = rm[i].rm_sp; 353 *ep++ = rm[i].rm_ep; 354 #endif 355 } 356 } 357 } 358 } 359 #endif 360 #if HAVE_PCRE 361 { 362 #define OVECTOR_COUNT ((3*NUM_SEARCH_COLORS)+3) 363 int ovector[OVECTOR_COUNT]; 364 int flags = (notbol) ? PCRE_NOTBOL : 0; 365 int i; 366 int ecount; 367 int mcount = pcre_exec(pattern, NULL, line, line_len, 368 0, flags, ovector, OVECTOR_COUNT); 369 matched = (mcount > 0); 370 ecount = nsp-1; 371 if (ecount > mcount) ecount = mcount; 372 for (i = 0; i < ecount*2; ) 373 { 374 if (ovector[i] < 0 || ovector[i+1] < 0) 375 { 376 *sp++ = *ep++ = line; 377 i += 2; 378 } else 379 { 380 *sp++ = line + ovector[i++]; 381 *ep++ = line + ovector[i++]; 382 } 383 } 384 } 385 #endif 386 #if HAVE_PCRE2 387 { 388 int flags = (notbol) ? PCRE2_NOTBOL : 0; 389 pcre2_match_data *md = pcre2_match_data_create(nsp-1, NULL); 390 int mcount = pcre2_match(pattern, (PCRE2_SPTR)line, line_len, 391 0, flags, md, NULL); 392 matched = (mcount > 0); 393 if (matched) 394 { 395 PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md); 396 int i; 397 int ecount = nsp-1; 398 if (ecount > mcount) ecount = mcount; 399 for (i = 0; i < ecount*2; ) 400 { 401 if (ovector[i] < 0 || ovector[i+1] < 0) 402 { 403 *sp++ = *ep++ = line; 404 i += 2; 405 } else 406 { 407 *sp++ = line + ovector[i++]; 408 *ep++ = line + ovector[i++]; 409 } 410 } 411 } 412 pcre2_match_data_free(md); 413 } 414 #endif 415 #if HAVE_RE_COMP 416 matched = (re_exec(line) == 1); 417 /* 418 * re_exec doesn't seem to provide a way to get the matched string. 419 */ 420 #endif 421 #if HAVE_REGCMP 422 matched = ((*ep++ = regex(pattern, line)) != NULL); 423 if (matched) 424 *sp++ = __loc1; 425 #endif 426 #if HAVE_V8_REGCOMP 427 #if HAVE_REGEXEC2 428 matched = regexec2(pattern, line, notbol); 429 #else 430 matched = regexec(pattern, line); 431 #endif 432 if (matched) 433 { 434 *sp++ = pattern->startp[0]; 435 *ep++ = pattern->endp[0]; 436 } 437 #endif 438 } 439 *sp = *ep = NULL; 440 matched = (!(search_type & SRCH_NO_MATCH) && matched) || 441 ((search_type & SRCH_NO_MATCH) && !matched); 442 return (matched); 443 } 444 445 public int match_pattern(PATTERN_TYPE pattern, char *tpattern, char *line, int line_len, char **sp, char **ep, int nsp, int notbol, int search_type) 446 { 447 int matched = match_pattern1(pattern, tpattern, line, line_len, sp, ep, nsp, notbol, search_type); 448 int i; 449 for (i = 1; i <= NUM_SEARCH_COLORS; i++) 450 { 451 if ((search_type & SRCH_SUBSEARCH(i)) && ep[i] == sp[i]) 452 matched = 0; 453 } 454 return matched; 455 } 456 457 /* 458 * Return the name of the pattern matching library. 459 */ 460 public char * pattern_lib_name(void) 461 { 462 #if HAVE_GNU_REGEX 463 return ("GNU"); 464 #else 465 #if HAVE_POSIX_REGCOMP 466 return ("POSIX"); 467 #else 468 #if HAVE_PCRE2 469 return ("PCRE2"); 470 #else 471 #if HAVE_PCRE 472 return ("PCRE"); 473 #else 474 #if HAVE_RE_COMP 475 return ("BSD"); 476 #else 477 #if HAVE_REGCMP 478 return ("V8"); 479 #else 480 #if HAVE_V8_REGCOMP 481 return ("Spencer V8"); 482 #else 483 return ("no"); 484 #endif 485 #endif 486 #endif 487 #endif 488 #endif 489 #endif 490 #endif 491 } 492