1 /* 2 * Copyright (C) 1984-2021 Mark Nudelman 3 * 4 * You may distribute under the terms of either the GNU General Public 5 * License or the Less License, as specified in the README file. 6 * 7 * For more information, see the README file. 8 */ 9 10 /* 11 * Routines to do pattern matching. 12 */ 13 14 #include "less.h" 15 16 extern int caseless; 17 extern int utf_mode; 18 19 /* 20 * Compile a search pattern, for future use by match_pattern. 21 */ 22 static int 23 compile_pattern2(pattern, search_type, comp_pattern, show_error) 24 char *pattern; 25 int search_type; 26 PATTERN_TYPE *comp_pattern; 27 int show_error; 28 { 29 if (search_type & SRCH_NO_REGEX) 30 return (0); 31 { 32 #if HAVE_GNU_REGEX 33 struct re_pattern_buffer *comp = (struct re_pattern_buffer *) 34 ecalloc(1, sizeof(struct re_pattern_buffer)); 35 re_set_syntax(RE_SYNTAX_POSIX_EXTENDED); 36 if (re_compile_pattern(pattern, strlen(pattern), comp)) 37 { 38 free(comp); 39 if (show_error) 40 error("Invalid pattern", NULL_PARG); 41 return (-1); 42 } 43 if (*comp_pattern != NULL) 44 { 45 regfree(*comp_pattern); 46 free(*comp_pattern); 47 } 48 *comp_pattern = comp; 49 #endif 50 #if HAVE_POSIX_REGCOMP 51 regex_t *comp = (regex_t *) ecalloc(1, sizeof(regex_t)); 52 if (regcomp(comp, pattern, REGCOMP_FLAG)) 53 { 54 free(comp); 55 if (show_error) 56 error("Invalid pattern", NULL_PARG); 57 return (-1); 58 } 59 if (*comp_pattern != NULL) 60 { 61 regfree(*comp_pattern); 62 free(*comp_pattern); 63 } 64 *comp_pattern = comp; 65 #endif 66 #if HAVE_PCRE 67 constant char *errstring; 68 int erroffset; 69 PARG parg; 70 pcre *comp = pcre_compile(pattern, 71 (utf_mode) ? PCRE_UTF8 | PCRE_NO_UTF8_CHECK : 0, 72 &errstring, &erroffset, NULL); 73 if (comp == NULL) 74 { 75 parg.p_string = (char *) errstring; 76 if (show_error) 77 error("%s", &parg); 78 return (-1); 79 } 80 *comp_pattern = comp; 81 #endif 82 #if HAVE_PCRE2 83 int errcode; 84 PCRE2_SIZE erroffset; 85 PARG parg; 86 pcre2_code *comp = pcre2_compile((PCRE2_SPTR)pattern, strlen(pattern), 87 0, &errcode, &erroffset, NULL); 88 if (comp == NULL) 89 { 90 if (show_error) 91 { 92 char msg[160]; 93 pcre2_get_error_message(errcode, (PCRE2_UCHAR*)msg, sizeof(msg)); 94 parg.p_string = msg; 95 error("%s", &parg); 96 } 97 return (-1); 98 } 99 *comp_pattern = comp; 100 #endif 101 #if HAVE_RE_COMP 102 PARG parg; 103 if ((parg.p_string = re_comp(pattern)) != NULL) 104 { 105 if (show_error) 106 error("%s", &parg); 107 return (-1); 108 } 109 *comp_pattern = 1; 110 #endif 111 #if HAVE_REGCMP 112 char *comp; 113 if ((comp = regcmp(pattern, 0)) == NULL) 114 { 115 if (show_error) 116 error("Invalid pattern", NULL_PARG); 117 return (-1); 118 } 119 if (comp_pattern != NULL) 120 free(*comp_pattern); 121 *comp_pattern = comp; 122 #endif 123 #if HAVE_V8_REGCOMP 124 struct regexp *comp; 125 reg_show_error = show_error; 126 comp = regcomp(pattern); 127 reg_show_error = 1; 128 if (comp == NULL) 129 { 130 /* 131 * regcomp has already printed an error message 132 * via regerror(). 133 */ 134 return (-1); 135 } 136 if (*comp_pattern != NULL) 137 free(*comp_pattern); 138 *comp_pattern = comp; 139 #endif 140 } 141 return (0); 142 } 143 144 /* 145 * Like compile_pattern2, but convert the pattern to lowercase if necessary. 146 */ 147 public int 148 compile_pattern(pattern, search_type, show_error, comp_pattern) 149 char *pattern; 150 int search_type; 151 int show_error; 152 PATTERN_TYPE *comp_pattern; 153 { 154 char *cvt_pattern; 155 int result; 156 157 if (caseless != OPT_ONPLUS) 158 cvt_pattern = pattern; 159 else 160 { 161 cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC)); 162 cvt_text(cvt_pattern, pattern, (int *)NULL, (int *)NULL, CVT_TO_LC); 163 } 164 result = compile_pattern2(cvt_pattern, search_type, comp_pattern, show_error); 165 if (cvt_pattern != pattern) 166 free(cvt_pattern); 167 return (result); 168 } 169 170 /* 171 * Forget that we have a compiled pattern. 172 */ 173 public void 174 uncompile_pattern(pattern) 175 PATTERN_TYPE *pattern; 176 { 177 #if HAVE_GNU_REGEX 178 if (*pattern != NULL) 179 { 180 regfree(*pattern); 181 free(*pattern); 182 } 183 *pattern = NULL; 184 #endif 185 #if HAVE_POSIX_REGCOMP 186 if (*pattern != NULL) 187 { 188 regfree(*pattern); 189 free(*pattern); 190 } 191 *pattern = NULL; 192 #endif 193 #if HAVE_PCRE 194 if (*pattern != NULL) 195 pcre_free(*pattern); 196 *pattern = NULL; 197 #endif 198 #if HAVE_PCRE2 199 if (*pattern != NULL) 200 pcre2_code_free(*pattern); 201 *pattern = NULL; 202 #endif 203 #if HAVE_RE_COMP 204 *pattern = 0; 205 #endif 206 #if HAVE_REGCMP 207 if (*pattern != NULL) 208 free(*pattern); 209 *pattern = NULL; 210 #endif 211 #if HAVE_V8_REGCOMP 212 if (*pattern != NULL) 213 free(*pattern); 214 *pattern = NULL; 215 #endif 216 } 217 218 #if 0 219 /* 220 * Can a pattern be successfully compiled? 221 */ 222 public int 223 valid_pattern(pattern) 224 char *pattern; 225 { 226 PATTERN_TYPE comp_pattern; 227 int result; 228 229 SET_NULL_PATTERN(comp_pattern); 230 result = compile_pattern2(pattern, 0, &comp_pattern, 0); 231 if (result != 0) 232 return (0); 233 uncompile_pattern(&comp_pattern); 234 return (1); 235 } 236 #endif 237 238 /* 239 * Is a compiled pattern null? 240 */ 241 public int 242 is_null_pattern(pattern) 243 PATTERN_TYPE pattern; 244 { 245 #if HAVE_GNU_REGEX 246 return (pattern == NULL); 247 #endif 248 #if HAVE_POSIX_REGCOMP 249 return (pattern == NULL); 250 #endif 251 #if HAVE_PCRE 252 return (pattern == NULL); 253 #endif 254 #if HAVE_PCRE2 255 return (pattern == NULL); 256 #endif 257 #if HAVE_RE_COMP 258 return (pattern == 0); 259 #endif 260 #if HAVE_REGCMP 261 return (pattern == NULL); 262 #endif 263 #if HAVE_V8_REGCOMP 264 return (pattern == NULL); 265 #endif 266 #if NO_REGEX 267 return (pattern == NULL); 268 #endif 269 } 270 271 /* 272 * Simple pattern matching function. 273 * It supports no metacharacters like *, etc. 274 */ 275 static int 276 match(pattern, pattern_len, buf, buf_len, pfound, pend) 277 char *pattern; 278 int pattern_len; 279 char *buf; 280 int buf_len; 281 char **pfound, **pend; 282 { 283 char *pp, *lp; 284 char *pattern_end = pattern + pattern_len; 285 char *buf_end = buf + buf_len; 286 287 for ( ; buf < buf_end; buf++) 288 { 289 for (pp = pattern, lp = buf; ; pp++, lp++) 290 { 291 char cp = *pp; 292 char cl = *lp; 293 if (caseless == OPT_ONPLUS && ASCII_IS_UPPER(cp)) 294 cp = ASCII_TO_LOWER(cp); 295 if (cp != cl) 296 break; 297 if (pp == pattern_end || lp == buf_end) 298 break; 299 } 300 if (pp == pattern_end) 301 { 302 if (pfound != NULL) 303 *pfound = buf; 304 if (pend != NULL) 305 *pend = lp; 306 return (1); 307 } 308 } 309 return (0); 310 } 311 312 /* 313 * Perform a pattern match with the previously compiled pattern. 314 * Set sp and ep to the start and end of the matched string. 315 */ 316 public int 317 match_pattern(pattern, tpattern, line, line_len, sp, ep, notbol, search_type) 318 PATTERN_TYPE pattern; 319 char *tpattern; 320 char *line; 321 int line_len; 322 char **sp; 323 char **ep; 324 int notbol; 325 int search_type; 326 { 327 int matched; 328 329 *sp = *ep = NULL; 330 #if NO_REGEX 331 search_type |= SRCH_NO_REGEX; 332 #endif 333 if (search_type & SRCH_NO_REGEX) 334 matched = match(tpattern, strlen(tpattern), line, line_len, sp, ep); 335 else 336 { 337 #if HAVE_GNU_REGEX 338 { 339 struct re_registers search_regs; 340 pattern->not_bol = notbol; 341 pattern->regs_allocated = REGS_UNALLOCATED; 342 matched = re_search(pattern, line, line_len, 0, line_len, &search_regs) >= 0; 343 if (matched) 344 { 345 *sp = line + search_regs.start[0]; 346 *ep = line + search_regs.end[0]; 347 } 348 } 349 #endif 350 #if HAVE_POSIX_REGCOMP 351 { 352 regmatch_t rm; 353 int flags = (notbol) ? REG_NOTBOL : 0; 354 #ifdef REG_STARTEND 355 flags |= REG_STARTEND; 356 rm.rm_so = 0; 357 rm.rm_eo = line_len; 358 #endif 359 matched = !regexec(pattern, line, 1, &rm, flags); 360 if (matched) 361 { 362 #ifndef __WATCOMC__ 363 *sp = line + rm.rm_so; 364 *ep = line + rm.rm_eo; 365 #else 366 *sp = rm.rm_sp; 367 *ep = rm.rm_ep; 368 #endif 369 } 370 } 371 #endif 372 #if HAVE_PCRE 373 { 374 int flags = (notbol) ? PCRE_NOTBOL : 0; 375 int ovector[3]; 376 matched = pcre_exec(pattern, NULL, line, line_len, 377 0, flags, ovector, 3) >= 0; 378 if (matched) 379 { 380 *sp = line + ovector[0]; 381 *ep = line + ovector[1]; 382 } 383 } 384 #endif 385 #if HAVE_PCRE2 386 { 387 int flags = (notbol) ? PCRE2_NOTBOL : 0; 388 pcre2_match_data *md = pcre2_match_data_create(3, NULL); 389 matched = pcre2_match(pattern, (PCRE2_SPTR)line, line_len, 390 0, flags, md, NULL) >= 0; 391 if (matched) 392 { 393 PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md); 394 *sp = line + ovector[0]; 395 *ep = line + ovector[1]; 396 } 397 pcre2_match_data_free(md); 398 } 399 #endif 400 #if HAVE_RE_COMP 401 matched = (re_exec(line) == 1); 402 /* 403 * re_exec doesn't seem to provide a way to get the matched string. 404 */ 405 *sp = *ep = NULL; 406 #endif 407 #if HAVE_REGCMP 408 *ep = regex(pattern, line); 409 matched = (*ep != NULL); 410 if (matched) 411 *sp = __loc1; 412 #endif 413 #if HAVE_V8_REGCOMP 414 #if HAVE_REGEXEC2 415 matched = regexec2(pattern, line, notbol); 416 #else 417 matched = regexec(pattern, line); 418 #endif 419 if (matched) 420 { 421 *sp = pattern->startp[0]; 422 *ep = pattern->endp[0]; 423 } 424 #endif 425 } 426 matched = (!(search_type & SRCH_NO_MATCH) && matched) || 427 ((search_type & SRCH_NO_MATCH) && !matched); 428 return (matched); 429 } 430 431 /* 432 * Return the name of the pattern matching library. 433 */ 434 public char * 435 pattern_lib_name(VOID_PARAM) 436 { 437 #if HAVE_GNU_REGEX 438 return ("GNU"); 439 #else 440 #if HAVE_POSIX_REGCOMP 441 return ("POSIX"); 442 #else 443 #if HAVE_PCRE2 444 return ("PCRE2"); 445 #else 446 #if HAVE_PCRE 447 return ("PCRE"); 448 #else 449 #if HAVE_RE_COMP 450 return ("BSD"); 451 #else 452 #if HAVE_REGCMP 453 return ("V8"); 454 #else 455 #if HAVE_V8_REGCOMP 456 return ("Spencer V8"); 457 #else 458 return ("no"); 459 #endif 460 #endif 461 #endif 462 #endif 463 #endif 464 #endif 465 #endif 466 } 467