1 /* 2 * Copyright (C) 1984-2022 Mark Nudelman 3 * 4 * You may distribute under the terms of either the GNU General Public 5 * License or the Less License, as specified in the README file. 6 * 7 * For more information, see the README file. 8 */ 9 10 /* 11 * Routines to do pattern matching. 12 */ 13 14 #include "less.h" 15 16 extern int caseless; 17 extern int is_caseless; 18 extern int utf_mode; 19 20 /* 21 * Compile a search pattern, for future use by match_pattern. 22 */ 23 static int 24 compile_pattern2(pattern, search_type, comp_pattern, show_error) 25 char *pattern; 26 int search_type; 27 PATTERN_TYPE *comp_pattern; 28 int show_error; 29 { 30 if (search_type & SRCH_NO_REGEX) 31 return (0); 32 { 33 #if HAVE_GNU_REGEX 34 struct re_pattern_buffer *comp = (struct re_pattern_buffer *) 35 ecalloc(1, sizeof(struct re_pattern_buffer)); 36 re_set_syntax(RE_SYNTAX_POSIX_EXTENDED); 37 if (re_compile_pattern(pattern, strlen(pattern), comp)) 38 { 39 free(comp); 40 if (show_error) 41 error("Invalid pattern", NULL_PARG); 42 return (-1); 43 } 44 if (*comp_pattern != NULL) 45 { 46 regfree(*comp_pattern); 47 free(*comp_pattern); 48 } 49 *comp_pattern = comp; 50 #endif 51 #if HAVE_POSIX_REGCOMP 52 regex_t *comp = (regex_t *) ecalloc(1, sizeof(regex_t)); 53 if (regcomp(comp, pattern, REGCOMP_FLAG | (is_caseless ? REG_ICASE : 0))) 54 { 55 free(comp); 56 if (show_error) 57 error("Invalid pattern", NULL_PARG); 58 return (-1); 59 } 60 if (*comp_pattern != NULL) 61 { 62 regfree(*comp_pattern); 63 free(*comp_pattern); 64 } 65 *comp_pattern = comp; 66 #endif 67 #if HAVE_PCRE 68 constant char *errstring; 69 int erroffset; 70 PARG parg; 71 pcre *comp = pcre_compile(pattern, 72 ((utf_mode) ? PCRE_UTF8 | PCRE_NO_UTF8_CHECK : 0) | 73 (is_caseless ? PCRE_CASELESS : 0), 74 &errstring, &erroffset, NULL); 75 if (comp == NULL) 76 { 77 parg.p_string = (char *) errstring; 78 if (show_error) 79 error("%s", &parg); 80 return (-1); 81 } 82 *comp_pattern = comp; 83 #endif 84 #if HAVE_PCRE2 85 int errcode; 86 PCRE2_SIZE erroffset; 87 PARG parg; 88 pcre2_code *comp = pcre2_compile((PCRE2_SPTR)pattern, strlen(pattern), 89 (is_caseless ? PCRE2_CASELESS : 0), 90 &errcode, &erroffset, NULL); 91 if (comp == NULL) 92 { 93 if (show_error) 94 { 95 char msg[160]; 96 pcre2_get_error_message(errcode, (PCRE2_UCHAR*)msg, sizeof(msg)); 97 parg.p_string = msg; 98 error("%s", &parg); 99 } 100 return (-1); 101 } 102 *comp_pattern = comp; 103 #endif 104 #if HAVE_RE_COMP 105 PARG parg; 106 if ((parg.p_string = re_comp(pattern)) != NULL) 107 { 108 if (show_error) 109 error("%s", &parg); 110 return (-1); 111 } 112 *comp_pattern = 1; 113 #endif 114 #if HAVE_REGCMP 115 char *comp; 116 if ((comp = regcmp(pattern, 0)) == NULL) 117 { 118 if (show_error) 119 error("Invalid pattern", NULL_PARG); 120 return (-1); 121 } 122 if (comp_pattern != NULL) 123 free(*comp_pattern); 124 *comp_pattern = comp; 125 #endif 126 #if HAVE_V8_REGCOMP 127 struct regexp *comp; 128 reg_show_error = show_error; 129 comp = regcomp(pattern); 130 reg_show_error = 1; 131 if (comp == NULL) 132 { 133 /* 134 * regcomp has already printed an error message 135 * via regerror(). 136 */ 137 return (-1); 138 } 139 if (*comp_pattern != NULL) 140 free(*comp_pattern); 141 *comp_pattern = comp; 142 #endif 143 } 144 return (0); 145 } 146 147 /* 148 * Like compile_pattern2, but convert the pattern to lowercase if necessary. 149 */ 150 public int 151 compile_pattern(pattern, search_type, show_error, comp_pattern) 152 char *pattern; 153 int search_type; 154 int show_error; 155 PATTERN_TYPE *comp_pattern; 156 { 157 char *cvt_pattern; 158 int result; 159 160 if (caseless != OPT_ONPLUS || re_handles_caseless) 161 cvt_pattern = pattern; 162 else 163 { 164 cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC)); 165 cvt_text(cvt_pattern, pattern, (int *)NULL, (int *)NULL, CVT_TO_LC); 166 } 167 result = compile_pattern2(cvt_pattern, search_type, comp_pattern, show_error); 168 if (cvt_pattern != pattern) 169 free(cvt_pattern); 170 return (result); 171 } 172 173 /* 174 * Forget that we have a compiled pattern. 175 */ 176 public void 177 uncompile_pattern(pattern) 178 PATTERN_TYPE *pattern; 179 { 180 #if HAVE_GNU_REGEX 181 if (*pattern != NULL) 182 { 183 regfree(*pattern); 184 free(*pattern); 185 } 186 *pattern = NULL; 187 #endif 188 #if HAVE_POSIX_REGCOMP 189 if (*pattern != NULL) 190 { 191 regfree(*pattern); 192 free(*pattern); 193 } 194 *pattern = NULL; 195 #endif 196 #if HAVE_PCRE 197 if (*pattern != NULL) 198 pcre_free(*pattern); 199 *pattern = NULL; 200 #endif 201 #if HAVE_PCRE2 202 if (*pattern != NULL) 203 pcre2_code_free(*pattern); 204 *pattern = NULL; 205 #endif 206 #if HAVE_RE_COMP 207 *pattern = 0; 208 #endif 209 #if HAVE_REGCMP 210 if (*pattern != NULL) 211 free(*pattern); 212 *pattern = NULL; 213 #endif 214 #if HAVE_V8_REGCOMP 215 if (*pattern != NULL) 216 free(*pattern); 217 *pattern = NULL; 218 #endif 219 } 220 221 #if 0 222 /* 223 * Can a pattern be successfully compiled? 224 */ 225 public int 226 valid_pattern(pattern) 227 char *pattern; 228 { 229 PATTERN_TYPE comp_pattern; 230 int result; 231 232 SET_NULL_PATTERN(comp_pattern); 233 result = compile_pattern2(pattern, 0, &comp_pattern, 0); 234 if (result != 0) 235 return (0); 236 uncompile_pattern(&comp_pattern); 237 return (1); 238 } 239 #endif 240 241 /* 242 * Is a compiled pattern null? 243 */ 244 public int 245 is_null_pattern(pattern) 246 PATTERN_TYPE pattern; 247 { 248 #if HAVE_GNU_REGEX 249 return (pattern == NULL); 250 #endif 251 #if HAVE_POSIX_REGCOMP 252 return (pattern == NULL); 253 #endif 254 #if HAVE_PCRE 255 return (pattern == NULL); 256 #endif 257 #if HAVE_PCRE2 258 return (pattern == NULL); 259 #endif 260 #if HAVE_RE_COMP 261 return (pattern == 0); 262 #endif 263 #if HAVE_REGCMP 264 return (pattern == NULL); 265 #endif 266 #if HAVE_V8_REGCOMP 267 return (pattern == NULL); 268 #endif 269 #if NO_REGEX 270 return (pattern == NULL); 271 #endif 272 } 273 274 /* 275 * Simple pattern matching function. 276 * It supports no metacharacters like *, etc. 277 */ 278 static int 279 match(pattern, pattern_len, buf, buf_len, pfound, pend) 280 char *pattern; 281 int pattern_len; 282 char *buf; 283 int buf_len; 284 char **pfound, **pend; 285 { 286 char *pp, *lp; 287 char *pattern_end = pattern + pattern_len; 288 char *buf_end = buf + buf_len; 289 290 for ( ; buf < buf_end; buf++) 291 { 292 for (pp = pattern, lp = buf; ; pp++, lp++) 293 { 294 char cp = *pp; 295 char cl = *lp; 296 if (caseless == OPT_ONPLUS && ASCII_IS_UPPER(cp)) 297 cp = ASCII_TO_LOWER(cp); 298 if (cp != cl) 299 break; 300 if (pp == pattern_end || lp == buf_end) 301 break; 302 } 303 if (pp == pattern_end) 304 { 305 if (pfound != NULL) 306 *pfound = buf; 307 if (pend != NULL) 308 *pend = lp; 309 return (1); 310 } 311 } 312 return (0); 313 } 314 315 /* 316 * Perform a pattern match with the previously compiled pattern. 317 * Set sp and ep to the start and end of the matched string. 318 */ 319 public int 320 match_pattern(pattern, tpattern, line, line_len, sp, ep, notbol, search_type) 321 PATTERN_TYPE pattern; 322 char *tpattern; 323 char *line; 324 int line_len; 325 char **sp; 326 char **ep; 327 int notbol; 328 int search_type; 329 { 330 int matched; 331 332 *sp = *ep = NULL; 333 #if NO_REGEX 334 search_type |= SRCH_NO_REGEX; 335 #endif 336 if (search_type & SRCH_NO_REGEX) 337 matched = match(tpattern, strlen(tpattern), line, line_len, sp, ep); 338 else 339 { 340 #if HAVE_GNU_REGEX 341 { 342 struct re_registers search_regs; 343 pattern->not_bol = notbol; 344 pattern->regs_allocated = REGS_UNALLOCATED; 345 matched = re_search(pattern, line, line_len, 0, line_len, &search_regs) >= 0; 346 if (matched) 347 { 348 *sp = line + search_regs.start[0]; 349 *ep = line + search_regs.end[0]; 350 } 351 } 352 #endif 353 #if HAVE_POSIX_REGCOMP 354 { 355 regmatch_t rm; 356 int flags = (notbol) ? REG_NOTBOL : 0; 357 #ifdef REG_STARTEND 358 flags |= REG_STARTEND; 359 rm.rm_so = 0; 360 rm.rm_eo = line_len; 361 #endif 362 matched = !regexec(pattern, line, 1, &rm, flags); 363 if (matched) 364 { 365 #ifndef __WATCOMC__ 366 *sp = line + rm.rm_so; 367 *ep = line + rm.rm_eo; 368 #else 369 *sp = rm.rm_sp; 370 *ep = rm.rm_ep; 371 #endif 372 } 373 } 374 #endif 375 #if HAVE_PCRE 376 { 377 int flags = (notbol) ? PCRE_NOTBOL : 0; 378 int ovector[3]; 379 matched = pcre_exec(pattern, NULL, line, line_len, 380 0, flags, ovector, 3) >= 0; 381 if (matched) 382 { 383 *sp = line + ovector[0]; 384 *ep = line + ovector[1]; 385 } 386 } 387 #endif 388 #if HAVE_PCRE2 389 { 390 int flags = (notbol) ? PCRE2_NOTBOL : 0; 391 pcre2_match_data *md = pcre2_match_data_create(3, NULL); 392 matched = pcre2_match(pattern, (PCRE2_SPTR)line, line_len, 393 0, flags, md, NULL) >= 0; 394 if (matched) 395 { 396 PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md); 397 *sp = line + ovector[0]; 398 *ep = line + ovector[1]; 399 } 400 pcre2_match_data_free(md); 401 } 402 #endif 403 #if HAVE_RE_COMP 404 matched = (re_exec(line) == 1); 405 /* 406 * re_exec doesn't seem to provide a way to get the matched string. 407 */ 408 *sp = *ep = NULL; 409 #endif 410 #if HAVE_REGCMP 411 *ep = regex(pattern, line); 412 matched = (*ep != NULL); 413 if (matched) 414 *sp = __loc1; 415 #endif 416 #if HAVE_V8_REGCOMP 417 #if HAVE_REGEXEC2 418 matched = regexec2(pattern, line, notbol); 419 #else 420 matched = regexec(pattern, line); 421 #endif 422 if (matched) 423 { 424 *sp = pattern->startp[0]; 425 *ep = pattern->endp[0]; 426 } 427 #endif 428 } 429 matched = (!(search_type & SRCH_NO_MATCH) && matched) || 430 ((search_type & SRCH_NO_MATCH) && !matched); 431 return (matched); 432 } 433 434 /* 435 * Return the name of the pattern matching library. 436 */ 437 public char * 438 pattern_lib_name(VOID_PARAM) 439 { 440 #if HAVE_GNU_REGEX 441 return ("GNU"); 442 #else 443 #if HAVE_POSIX_REGCOMP 444 return ("POSIX"); 445 #else 446 #if HAVE_PCRE2 447 return ("PCRE2"); 448 #else 449 #if HAVE_PCRE 450 return ("PCRE"); 451 #else 452 #if HAVE_RE_COMP 453 return ("BSD"); 454 #else 455 #if HAVE_REGCMP 456 return ("V8"); 457 #else 458 #if HAVE_V8_REGCOMP 459 return ("Spencer V8"); 460 #else 461 return ("no"); 462 #endif 463 #endif 464 #endif 465 #endif 466 #endif 467 #endif 468 #endif 469 } 470