1 /* 2 * Copyright (C) 1984-2020 Mark Nudelman 3 * 4 * You may distribute under the terms of either the GNU General Public 5 * License or the Less License, as specified in the README file. 6 * 7 * For more information, see the README file. 8 */ 9 10 /* 11 * Routines to do pattern matching. 12 */ 13 14 #include "less.h" 15 16 extern int caseless; 17 extern int utf_mode; 18 19 /* 20 * Compile a search pattern, for future use by match_pattern. 21 */ 22 static int 23 compile_pattern2(pattern, search_type, comp_pattern, show_error) 24 char *pattern; 25 int search_type; 26 PATTERN_TYPE *comp_pattern; 27 int show_error; 28 { 29 if (search_type & SRCH_NO_REGEX) 30 return (0); 31 { 32 #if HAVE_GNU_REGEX 33 struct re_pattern_buffer *comp = (struct re_pattern_buffer *) 34 ecalloc(1, sizeof(struct re_pattern_buffer)); 35 re_set_syntax(RE_SYNTAX_POSIX_EXTENDED); 36 if (re_compile_pattern(pattern, strlen(pattern), comp)) 37 { 38 free(comp); 39 if (show_error) 40 error("Invalid pattern", NULL_PARG); 41 return (-1); 42 } 43 if (*comp_pattern != NULL) 44 { 45 regfree(*comp_pattern); 46 free(*comp_pattern); 47 } 48 *comp_pattern = comp; 49 #endif 50 #if HAVE_POSIX_REGCOMP 51 regex_t *comp = (regex_t *) ecalloc(1, sizeof(regex_t)); 52 if (regcomp(comp, pattern, REGCOMP_FLAG)) 53 { 54 free(comp); 55 if (show_error) 56 error("Invalid pattern", NULL_PARG); 57 return (-1); 58 } 59 if (*comp_pattern != NULL) 60 { 61 regfree(*comp_pattern); 62 free(*comp_pattern); 63 } 64 *comp_pattern = comp; 65 #endif 66 #if HAVE_PCRE 67 constant char *errstring; 68 int erroffset; 69 PARG parg; 70 pcre *comp = pcre_compile(pattern, 71 (utf_mode) ? PCRE_UTF8 | PCRE_NO_UTF8_CHECK : 0, 72 &errstring, &erroffset, NULL); 73 if (comp == NULL) 74 { 75 parg.p_string = (char *) errstring; 76 if (show_error) 77 error("%s", &parg); 78 return (-1); 79 } 80 *comp_pattern = comp; 81 #endif 82 #if HAVE_PCRE2 83 int errcode; 84 PCRE2_SIZE erroffset; 85 PARG parg; 86 pcre2_code *comp = pcre2_compile((PCRE2_SPTR)pattern, strlen(pattern), 87 0, &errcode, &erroffset, NULL); 88 if (comp == NULL) 89 { 90 if (show_error) 91 { 92 char msg[160]; 93 pcre2_get_error_message(errcode, (PCRE2_UCHAR*)msg, sizeof(msg)); 94 parg.p_string = msg; 95 error("%s", &parg); 96 } 97 return (-1); 98 } 99 *comp_pattern = comp; 100 #endif 101 #if HAVE_RE_COMP 102 PARG parg; 103 if ((parg.p_string = re_comp(pattern)) != NULL) 104 { 105 if (show_error) 106 error("%s", &parg); 107 return (-1); 108 } 109 *comp_pattern = 1; 110 #endif 111 #if HAVE_REGCMP 112 char *comp; 113 if ((comp = regcmp(pattern, 0)) == NULL) 114 { 115 if (show_error) 116 error("Invalid pattern", NULL_PARG); 117 return (-1); 118 } 119 if (comp_pattern != NULL) 120 free(*comp_pattern); 121 *comp_pattern = comp; 122 #endif 123 #if HAVE_V8_REGCOMP 124 struct regexp *comp; 125 reg_show_error = show_error; 126 comp = regcomp(pattern); 127 reg_show_error = 1; 128 if (comp == NULL) 129 { 130 /* 131 * regcomp has already printed an error message 132 * via regerror(). 133 */ 134 return (-1); 135 } 136 if (*comp_pattern != NULL) 137 free(*comp_pattern); 138 *comp_pattern = comp; 139 #endif 140 } 141 return (0); 142 } 143 144 /* 145 * Like compile_pattern2, but convert the pattern to lowercase if necessary. 146 */ 147 public int 148 compile_pattern(pattern, search_type, comp_pattern) 149 char *pattern; 150 int search_type; 151 PATTERN_TYPE *comp_pattern; 152 { 153 char *cvt_pattern; 154 int result; 155 156 if (caseless != OPT_ONPLUS) 157 cvt_pattern = pattern; 158 else 159 { 160 cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC)); 161 cvt_text(cvt_pattern, pattern, (int *)NULL, (int *)NULL, CVT_TO_LC); 162 } 163 result = compile_pattern2(cvt_pattern, search_type, comp_pattern, 1); 164 if (cvt_pattern != pattern) 165 free(cvt_pattern); 166 return (result); 167 } 168 169 /* 170 * Forget that we have a compiled pattern. 171 */ 172 public void 173 uncompile_pattern(pattern) 174 PATTERN_TYPE *pattern; 175 { 176 #if HAVE_GNU_REGEX 177 if (*pattern != NULL) 178 { 179 regfree(*pattern); 180 free(*pattern); 181 } 182 *pattern = NULL; 183 #endif 184 #if HAVE_POSIX_REGCOMP 185 if (*pattern != NULL) 186 { 187 regfree(*pattern); 188 free(*pattern); 189 } 190 *pattern = NULL; 191 #endif 192 #if HAVE_PCRE 193 if (*pattern != NULL) 194 pcre_free(*pattern); 195 *pattern = NULL; 196 #endif 197 #if HAVE_PCRE2 198 if (*pattern != NULL) 199 pcre2_code_free(*pattern); 200 *pattern = NULL; 201 #endif 202 #if HAVE_RE_COMP 203 *pattern = 0; 204 #endif 205 #if HAVE_REGCMP 206 if (*pattern != NULL) 207 free(*pattern); 208 *pattern = NULL; 209 #endif 210 #if HAVE_V8_REGCOMP 211 if (*pattern != NULL) 212 free(*pattern); 213 *pattern = NULL; 214 #endif 215 } 216 217 /* 218 * Can a pattern be successfully compiled? 219 */ 220 public int 221 valid_pattern(pattern) 222 char *pattern; 223 { 224 PATTERN_TYPE comp_pattern; 225 int result; 226 227 CLEAR_PATTERN(comp_pattern); 228 result = compile_pattern2(pattern, 0, &comp_pattern, 0); 229 if (result != 0) 230 return (0); 231 uncompile_pattern(&comp_pattern); 232 return (1); 233 } 234 235 /* 236 * Is a compiled pattern null? 237 */ 238 public int 239 is_null_pattern(pattern) 240 PATTERN_TYPE pattern; 241 { 242 #if HAVE_GNU_REGEX 243 return (pattern == NULL); 244 #endif 245 #if HAVE_POSIX_REGCOMP 246 return (pattern == NULL); 247 #endif 248 #if HAVE_PCRE 249 return (pattern == NULL); 250 #endif 251 #if HAVE_PCRE2 252 return (pattern == NULL); 253 #endif 254 #if HAVE_RE_COMP 255 return (pattern == 0); 256 #endif 257 #if HAVE_REGCMP 258 return (pattern == NULL); 259 #endif 260 #if HAVE_V8_REGCOMP 261 return (pattern == NULL); 262 #endif 263 #if NO_REGEX 264 return (pattern == NULL); 265 #endif 266 } 267 268 /* 269 * Simple pattern matching function. 270 * It supports no metacharacters like *, etc. 271 */ 272 static int 273 match(pattern, pattern_len, buf, buf_len, pfound, pend) 274 char *pattern; 275 int pattern_len; 276 char *buf; 277 int buf_len; 278 char **pfound, **pend; 279 { 280 char *pp, *lp; 281 char *pattern_end = pattern + pattern_len; 282 char *buf_end = buf + buf_len; 283 284 for ( ; buf < buf_end; buf++) 285 { 286 for (pp = pattern, lp = buf; ; pp++, lp++) 287 { 288 char cp = *pp; 289 char cl = *lp; 290 if (caseless == OPT_ONPLUS && ASCII_IS_UPPER(cp)) 291 cp = ASCII_TO_LOWER(cp); 292 if (cp != cl) 293 break; 294 if (pp == pattern_end || lp == buf_end) 295 break; 296 } 297 if (pp == pattern_end) 298 { 299 if (pfound != NULL) 300 *pfound = buf; 301 if (pend != NULL) 302 *pend = lp; 303 return (1); 304 } 305 } 306 return (0); 307 } 308 309 /* 310 * Perform a pattern match with the previously compiled pattern. 311 * Set sp and ep to the start and end of the matched string. 312 */ 313 public int 314 match_pattern(pattern, tpattern, line, line_len, sp, ep, notbol, search_type) 315 PATTERN_TYPE pattern; 316 char *tpattern; 317 char *line; 318 int line_len; 319 char **sp; 320 char **ep; 321 int notbol; 322 int search_type; 323 { 324 int matched; 325 326 *sp = *ep = NULL; 327 #if NO_REGEX 328 search_type |= SRCH_NO_REGEX; 329 #endif 330 if (search_type & SRCH_NO_REGEX) 331 matched = match(tpattern, strlen(tpattern), line, line_len, sp, ep); 332 else 333 { 334 #if HAVE_GNU_REGEX 335 { 336 struct re_registers search_regs; 337 pattern->not_bol = notbol; 338 pattern->regs_allocated = REGS_UNALLOCATED; 339 matched = re_search(pattern, line, line_len, 0, line_len, &search_regs) >= 0; 340 if (matched) 341 { 342 *sp = line + search_regs.start[0]; 343 *ep = line + search_regs.end[0]; 344 } 345 } 346 #endif 347 #if HAVE_POSIX_REGCOMP 348 { 349 regmatch_t rm; 350 int flags = (notbol) ? REG_NOTBOL : 0; 351 #ifdef REG_STARTEND 352 flags |= REG_STARTEND; 353 rm.rm_so = 0; 354 rm.rm_eo = line_len; 355 #endif 356 matched = !regexec(pattern, line, 1, &rm, flags); 357 if (matched) 358 { 359 #ifndef __WATCOMC__ 360 *sp = line + rm.rm_so; 361 *ep = line + rm.rm_eo; 362 #else 363 *sp = rm.rm_sp; 364 *ep = rm.rm_ep; 365 #endif 366 } 367 } 368 #endif 369 #if HAVE_PCRE 370 { 371 int flags = (notbol) ? PCRE_NOTBOL : 0; 372 int ovector[3]; 373 matched = pcre_exec(pattern, NULL, line, line_len, 374 0, flags, ovector, 3) >= 0; 375 if (matched) 376 { 377 *sp = line + ovector[0]; 378 *ep = line + ovector[1]; 379 } 380 } 381 #endif 382 #if HAVE_PCRE2 383 { 384 int flags = (notbol) ? PCRE2_NOTBOL : 0; 385 pcre2_match_data *md = pcre2_match_data_create(3, NULL); 386 matched = pcre2_match(pattern, (PCRE2_SPTR)line, line_len, 387 0, flags, md, NULL) >= 0; 388 if (matched) 389 { 390 PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md); 391 *sp = line + ovector[0]; 392 *ep = line + ovector[1]; 393 } 394 pcre2_match_data_free(md); 395 } 396 #endif 397 #if HAVE_RE_COMP 398 matched = (re_exec(line) == 1); 399 /* 400 * re_exec doesn't seem to provide a way to get the matched string. 401 */ 402 *sp = *ep = NULL; 403 #endif 404 #if HAVE_REGCMP 405 *ep = regex(pattern, line); 406 matched = (*ep != NULL); 407 if (matched) 408 *sp = __loc1; 409 #endif 410 #if HAVE_V8_REGCOMP 411 #if HAVE_REGEXEC2 412 matched = regexec2(pattern, line, notbol); 413 #else 414 matched = regexec(pattern, line); 415 #endif 416 if (matched) 417 { 418 *sp = pattern->startp[0]; 419 *ep = pattern->endp[0]; 420 } 421 #endif 422 } 423 matched = (!(search_type & SRCH_NO_MATCH) && matched) || 424 ((search_type & SRCH_NO_MATCH) && !matched); 425 return (matched); 426 } 427 428 /* 429 * Return the name of the pattern matching library. 430 */ 431 public char * 432 pattern_lib_name(VOID_PARAM) 433 { 434 #if HAVE_GNU_REGEX 435 return ("GNU"); 436 #else 437 #if HAVE_POSIX_REGCOMP 438 return ("POSIX"); 439 #else 440 #if HAVE_PCRE2 441 return ("PCRE2"); 442 #else 443 #if HAVE_PCRE 444 return ("PCRE"); 445 #else 446 #if HAVE_RE_COMP 447 return ("BSD"); 448 #else 449 #if HAVE_REGCMP 450 return ("V8"); 451 #else 452 #if HAVE_V8_REGCOMP 453 return ("Spencer V8"); 454 #else 455 return ("no"); 456 #endif 457 #endif 458 #endif 459 #endif 460 #endif 461 #endif 462 #endif 463 } 464