pattern.c (95270f73baf6fa95ae529bc2eb6a61f5c79f32c0) | pattern.c (d713e0891ff9ab8246245c3206851d486ecfdd37) |
---|---|
1/* | 1/* |
2 * Copyright (C) 1984-2022 Mark Nudelman | 2 * Copyright (C) 1984-2023 Mark Nudelman |
3 * 4 * You may distribute under the terms of either the GNU General Public 5 * License or the Less License, as specified in the README file. 6 * 7 * For more information, see the README file. 8 */ 9 10/* --- 4 unchanged lines hidden (view full) --- 15 16extern int caseless; 17extern int is_caseless; 18extern int utf_mode; 19 20/* 21 * Compile a search pattern, for future use by match_pattern. 22 */ | 3 * 4 * You may distribute under the terms of either the GNU General Public 5 * License or the Less License, as specified in the README file. 6 * 7 * For more information, see the README file. 8 */ 9 10/* --- 4 unchanged lines hidden (view full) --- 15 16extern int caseless; 17extern int is_caseless; 18extern int utf_mode; 19 20/* 21 * Compile a search pattern, for future use by match_pattern. 22 */ |
23 static int 24compile_pattern2(pattern, search_type, comp_pattern, show_error) 25 char *pattern; 26 int search_type; 27 PATTERN_TYPE *comp_pattern; 28 int show_error; | 23static int compile_pattern2(char *pattern, int search_type, PATTERN_TYPE *comp_pattern, int show_error) |
29{ 30 if (search_type & SRCH_NO_REGEX) 31 return (0); 32 { 33#if HAVE_GNU_REGEX 34 struct re_pattern_buffer *comp = (struct re_pattern_buffer *) 35 ecalloc(1, sizeof(struct re_pattern_buffer)); 36 re_set_syntax(RE_SYNTAX_POSIX_EXTENDED); --- 105 unchanged lines hidden (view full) --- 142#endif 143 } 144 return (0); 145} 146 147/* 148 * Like compile_pattern2, but convert the pattern to lowercase if necessary. 149 */ | 24{ 25 if (search_type & SRCH_NO_REGEX) 26 return (0); 27 { 28#if HAVE_GNU_REGEX 29 struct re_pattern_buffer *comp = (struct re_pattern_buffer *) 30 ecalloc(1, sizeof(struct re_pattern_buffer)); 31 re_set_syntax(RE_SYNTAX_POSIX_EXTENDED); --- 105 unchanged lines hidden (view full) --- 137#endif 138 } 139 return (0); 140} 141 142/* 143 * Like compile_pattern2, but convert the pattern to lowercase if necessary. 144 */ |
150 public int 151compile_pattern(pattern, search_type, show_error, comp_pattern) 152 char *pattern; 153 int search_type; 154 int show_error; 155 PATTERN_TYPE *comp_pattern; | 145public int compile_pattern(char *pattern, int search_type, int show_error, PATTERN_TYPE *comp_pattern) |
156{ 157 char *cvt_pattern; 158 int result; 159 | 146{ 147 char *cvt_pattern; 148 int result; 149 |
160 if (caseless != OPT_ONPLUS || re_handles_caseless) | 150 if (caseless != OPT_ONPLUS || (re_handles_caseless && !(search_type & SRCH_NO_REGEX))) |
161 cvt_pattern = pattern; 162 else 163 { 164 cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC)); 165 cvt_text(cvt_pattern, pattern, (int *)NULL, (int *)NULL, CVT_TO_LC); 166 } 167 result = compile_pattern2(cvt_pattern, search_type, comp_pattern, show_error); 168 if (cvt_pattern != pattern) 169 free(cvt_pattern); 170 return (result); 171} 172 173/* 174 * Forget that we have a compiled pattern. 175 */ | 151 cvt_pattern = pattern; 152 else 153 { 154 cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC)); 155 cvt_text(cvt_pattern, pattern, (int *)NULL, (int *)NULL, CVT_TO_LC); 156 } 157 result = compile_pattern2(cvt_pattern, search_type, comp_pattern, show_error); 158 if (cvt_pattern != pattern) 159 free(cvt_pattern); 160 return (result); 161} 162 163/* 164 * Forget that we have a compiled pattern. 165 */ |
176 public void 177uncompile_pattern(pattern) 178 PATTERN_TYPE *pattern; | 166public void uncompile_pattern(PATTERN_TYPE *pattern) |
179{ 180#if HAVE_GNU_REGEX 181 if (*pattern != NULL) 182 { 183 regfree(*pattern); 184 free(*pattern); 185 } 186 *pattern = NULL; --- 30 unchanged lines hidden (view full) --- 217 *pattern = NULL; 218#endif 219} 220 221#if 0 222/* 223 * Can a pattern be successfully compiled? 224 */ | 167{ 168#if HAVE_GNU_REGEX 169 if (*pattern != NULL) 170 { 171 regfree(*pattern); 172 free(*pattern); 173 } 174 *pattern = NULL; --- 30 unchanged lines hidden (view full) --- 205 *pattern = NULL; 206#endif 207} 208 209#if 0 210/* 211 * Can a pattern be successfully compiled? 212 */ |
225 public int 226valid_pattern(pattern) 227 char *pattern; | 213public int valid_pattern(char *pattern) |
228{ 229 PATTERN_TYPE comp_pattern; 230 int result; 231 232 SET_NULL_PATTERN(comp_pattern); 233 result = compile_pattern2(pattern, 0, &comp_pattern, 0); 234 if (result != 0) 235 return (0); 236 uncompile_pattern(&comp_pattern); 237 return (1); 238} 239#endif 240 241/* 242 * Is a compiled pattern null? 243 */ | 214{ 215 PATTERN_TYPE comp_pattern; 216 int result; 217 218 SET_NULL_PATTERN(comp_pattern); 219 result = compile_pattern2(pattern, 0, &comp_pattern, 0); 220 if (result != 0) 221 return (0); 222 uncompile_pattern(&comp_pattern); 223 return (1); 224} 225#endif 226 227/* 228 * Is a compiled pattern null? 229 */ |
244 public int 245is_null_pattern(pattern) 246 PATTERN_TYPE pattern; | 230public int is_null_pattern(PATTERN_TYPE pattern) |
247{ 248#if HAVE_GNU_REGEX 249 return (pattern == NULL); 250#endif 251#if HAVE_POSIX_REGCOMP 252 return (pattern == NULL); 253#endif 254#if HAVE_PCRE --- 10 unchanged lines hidden (view full) --- 265#endif 266#if HAVE_V8_REGCOMP 267 return (pattern == NULL); 268#endif 269#if NO_REGEX 270 return (pattern == NULL); 271#endif 272} | 231{ 232#if HAVE_GNU_REGEX 233 return (pattern == NULL); 234#endif 235#if HAVE_POSIX_REGCOMP 236 return (pattern == NULL); 237#endif 238#if HAVE_PCRE --- 10 unchanged lines hidden (view full) --- 249#endif 250#if HAVE_V8_REGCOMP 251 return (pattern == NULL); 252#endif 253#if NO_REGEX 254 return (pattern == NULL); 255#endif 256} |
273 | |
274/* 275 * Simple pattern matching function. 276 * It supports no metacharacters like *, etc. 277 */ | 257/* 258 * Simple pattern matching function. 259 * It supports no metacharacters like *, etc. 260 */ |
278 static int 279match(pattern, pattern_len, buf, buf_len, pfound, pend) 280 char *pattern; 281 int pattern_len; 282 char *buf; 283 int buf_len; 284 char **pfound, **pend; | 261static int match(char *pattern, int pattern_len, char *buf, int buf_len, char ***sp, char ***ep, int nsubs) |
285{ 286 char *pp, *lp; 287 char *pattern_end = pattern + pattern_len; 288 char *buf_end = buf + buf_len; 289 290 for ( ; buf < buf_end; buf++) 291 { 292 for (pp = pattern, lp = buf; ; pp++, lp++) --- 4 unchanged lines hidden (view full) --- 297 cp = ASCII_TO_LOWER(cp); 298 if (cp != cl) 299 break; 300 if (pp == pattern_end || lp == buf_end) 301 break; 302 } 303 if (pp == pattern_end) 304 { | 262{ 263 char *pp, *lp; 264 char *pattern_end = pattern + pattern_len; 265 char *buf_end = buf + buf_len; 266 267 for ( ; buf < buf_end; buf++) 268 { 269 for (pp = pattern, lp = buf; ; pp++, lp++) --- 4 unchanged lines hidden (view full) --- 274 cp = ASCII_TO_LOWER(cp); 275 if (cp != cl) 276 break; 277 if (pp == pattern_end || lp == buf_end) 278 break; 279 } 280 if (pp == pattern_end) 281 { |
305 if (pfound != NULL) 306 *pfound = buf; 307 if (pend != NULL) 308 *pend = lp; | 282 *(*sp)++ = buf; 283 *(*ep)++ = lp; |
309 return (1); 310 } 311 } | 284 return (1); 285 } 286 } |
287 **sp = **ep = NULL; |
|
312 return (0); 313} 314 315/* 316 * Perform a pattern match with the previously compiled pattern. | 288 return (0); 289} 290 291/* 292 * Perform a pattern match with the previously compiled pattern. |
317 * Set sp and ep to the start and end of the matched string. | 293 * Set sp[0] and ep[0] to the start and end of the matched string. 294 * Set sp[i] and ep[i] to the start and end of the i-th matched subpattern. 295 * Subpatterns are defined by parentheses in the regex language. |
318 */ | 296 */ |
319 public int 320match_pattern(pattern, tpattern, line, line_len, sp, ep, notbol, search_type) 321 PATTERN_TYPE pattern; 322 char *tpattern; 323 char *line; 324 int line_len; 325 char **sp; 326 char **ep; 327 int notbol; 328 int search_type; | 297static int match_pattern1(PATTERN_TYPE pattern, char *tpattern, char *line, int line_len, char **sp, char **ep, int nsp, int notbol, int search_type) |
329{ 330 int matched; 331 | 298{ 299 int matched; 300 |
332 *sp = *ep = NULL; | |
333#if NO_REGEX 334 search_type |= SRCH_NO_REGEX; 335#endif 336 if (search_type & SRCH_NO_REGEX) | 301#if NO_REGEX 302 search_type |= SRCH_NO_REGEX; 303#endif 304 if (search_type & SRCH_NO_REGEX) |
337 matched = match(tpattern, strlen(tpattern), line, line_len, sp, ep); | 305 matched = match(tpattern, strlen(tpattern), line, line_len, &sp, &ep, nsp); |
338 else 339 { 340#if HAVE_GNU_REGEX 341 { 342 struct re_registers search_regs; 343 pattern->not_bol = notbol; 344 pattern->regs_allocated = REGS_UNALLOCATED; 345 matched = re_search(pattern, line, line_len, 0, line_len, &search_regs) >= 0; 346 if (matched) 347 { | 306 else 307 { 308#if HAVE_GNU_REGEX 309 { 310 struct re_registers search_regs; 311 pattern->not_bol = notbol; 312 pattern->regs_allocated = REGS_UNALLOCATED; 313 matched = re_search(pattern, line, line_len, 0, line_len, &search_regs) >= 0; 314 if (matched) 315 { |
348 *sp = line + search_regs.start[0]; 349 *ep = line + search_regs.end[0]; | 316 *sp++ = line + search_regs.start[0]; 317 *ep++ = line + search_regs.end[0]; |
350 } 351 } 352#endif 353#if HAVE_POSIX_REGCOMP 354 { | 318 } 319 } 320#endif 321#if HAVE_POSIX_REGCOMP 322 { |
355 regmatch_t rm; | 323 #define RM_COUNT (NUM_SEARCH_COLORS+2) 324 regmatch_t rm[RM_COUNT]; |
356 int flags = (notbol) ? REG_NOTBOL : 0; 357#ifdef REG_STARTEND 358 flags |= REG_STARTEND; | 325 int flags = (notbol) ? REG_NOTBOL : 0; 326#ifdef REG_STARTEND 327 flags |= REG_STARTEND; |
359 rm.rm_so = 0; 360 rm.rm_eo = line_len; | 328 rm[0].rm_so = 0; 329 rm[0].rm_eo = line_len; |
361#endif | 330#endif |
362 matched = !regexec(pattern, line, 1, &rm, flags); | 331 matched = !regexec(pattern, line, RM_COUNT, rm, flags); |
363 if (matched) 364 { | 332 if (matched) 333 { |
334 int i; 335 int ecount; 336 for (ecount = RM_COUNT; ecount > 0; ecount--) 337 if (rm[ecount-1].rm_so >= 0) 338 break; 339 if (ecount >= nsp) 340 ecount = nsp-1; 341 for (i = 0; i < ecount; i++) 342 { 343 if (rm[i].rm_so < 0) 344 { 345 *sp++ = *ep++ = line; 346 } else 347 { |
|
365#ifndef __WATCOMC__ | 348#ifndef __WATCOMC__ |
366 *sp = line + rm.rm_so; 367 *ep = line + rm.rm_eo; | 349 *sp++ = line + rm[i].rm_so; 350 *ep++ = line + rm[i].rm_eo; |
368#else | 351#else |
369 *sp = rm.rm_sp; 370 *ep = rm.rm_ep; | 352 *sp++ = rm[i].rm_sp; 353 *ep++ = rm[i].rm_ep; |
371#endif | 354#endif |
355 } 356 } |
|
372 } 373 } 374#endif 375#if HAVE_PCRE 376 { | 357 } 358 } 359#endif 360#if HAVE_PCRE 361 { |
362 #define OVECTOR_COUNT ((3*NUM_SEARCH_COLORS)+3) 363 int ovector[OVECTOR_COUNT]; |
|
377 int flags = (notbol) ? PCRE_NOTBOL : 0; | 364 int flags = (notbol) ? PCRE_NOTBOL : 0; |
378 int ovector[3]; 379 matched = pcre_exec(pattern, NULL, line, line_len, 380 0, flags, ovector, 3) >= 0; 381 if (matched) | 365 int i; 366 int ecount; 367 int mcount = pcre_exec(pattern, NULL, line, line_len, 368 0, flags, ovector, OVECTOR_COUNT); 369 matched = (mcount > 0); 370 ecount = nsp-1; 371 if (ecount > mcount) ecount = mcount; 372 for (i = 0; i < ecount*2; ) |
382 { | 373 { |
383 *sp = line + ovector[0]; 384 *ep = line + ovector[1]; | 374 if (ovector[i] < 0 || ovector[i+1] < 0) 375 { 376 *sp++ = *ep++ = line; 377 i += 2; 378 } else 379 { 380 *sp++ = line + ovector[i++]; 381 *ep++ = line + ovector[i++]; 382 } |
385 } 386 } 387#endif 388#if HAVE_PCRE2 389 { 390 int flags = (notbol) ? PCRE2_NOTBOL : 0; | 383 } 384 } 385#endif 386#if HAVE_PCRE2 387 { 388 int flags = (notbol) ? PCRE2_NOTBOL : 0; |
391 pcre2_match_data *md = pcre2_match_data_create(3, NULL); 392 matched = pcre2_match(pattern, (PCRE2_SPTR)line, line_len, 393 0, flags, md, NULL) >= 0; | 389 pcre2_match_data *md = pcre2_match_data_create(nsp-1, NULL); 390 int mcount = pcre2_match(pattern, (PCRE2_SPTR)line, line_len, 391 0, flags, md, NULL); 392 matched = (mcount > 0); |
394 if (matched) 395 { 396 PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md); | 393 if (matched) 394 { 395 PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md); |
397 *sp = line + ovector[0]; 398 *ep = line + ovector[1]; | 396 int i; 397 int ecount = nsp-1; 398 if (ecount > mcount) ecount = mcount; 399 for (i = 0; i < ecount*2; ) 400 { 401 if (ovector[i] < 0 || ovector[i+1] < 0) 402 { 403 *sp++ = *ep++ = line; 404 i += 2; 405 } else 406 { 407 *sp++ = line + ovector[i++]; 408 *ep++ = line + ovector[i++]; 409 } 410 } |
399 } 400 pcre2_match_data_free(md); 401 } 402#endif 403#if HAVE_RE_COMP 404 matched = (re_exec(line) == 1); 405 /* 406 * re_exec doesn't seem to provide a way to get the matched string. 407 */ | 411 } 412 pcre2_match_data_free(md); 413 } 414#endif 415#if HAVE_RE_COMP 416 matched = (re_exec(line) == 1); 417 /* 418 * re_exec doesn't seem to provide a way to get the matched string. 419 */ |
408 *sp = *ep = NULL; | |
409#endif 410#if HAVE_REGCMP | 420#endif 421#if HAVE_REGCMP |
411 *ep = regex(pattern, line); 412 matched = (*ep != NULL); | 422 matched = ((*ep++ = regex(pattern, line)) != NULL); |
413 if (matched) | 423 if (matched) |
414 *sp = __loc1; | 424 *sp++ = __loc1; |
415#endif 416#if HAVE_V8_REGCOMP 417#if HAVE_REGEXEC2 418 matched = regexec2(pattern, line, notbol); 419#else 420 matched = regexec(pattern, line); 421#endif 422 if (matched) 423 { | 425#endif 426#if HAVE_V8_REGCOMP 427#if HAVE_REGEXEC2 428 matched = regexec2(pattern, line, notbol); 429#else 430 matched = regexec(pattern, line); 431#endif 432 if (matched) 433 { |
424 *sp = pattern->startp[0]; 425 *ep = pattern->endp[0]; | 434 *sp++ = pattern->startp[0]; 435 *ep++ = pattern->endp[0]; |
426 } 427#endif 428 } | 436 } 437#endif 438 } |
439 *sp = *ep = NULL; |
|
429 matched = (!(search_type & SRCH_NO_MATCH) && matched) || 430 ((search_type & SRCH_NO_MATCH) && !matched); 431 return (matched); 432} 433 | 440 matched = (!(search_type & SRCH_NO_MATCH) && matched) || 441 ((search_type & SRCH_NO_MATCH) && !matched); 442 return (matched); 443} 444 |
445public int match_pattern(PATTERN_TYPE pattern, char *tpattern, char *line, int line_len, char **sp, char **ep, int nsp, int notbol, int search_type) 446{ 447 int matched = match_pattern1(pattern, tpattern, line, line_len, sp, ep, nsp, notbol, search_type); 448 int i; 449 for (i = 1; i <= NUM_SEARCH_COLORS; i++) 450 { 451 if ((search_type & SRCH_SUBSEARCH(i)) && ep[i] == sp[i]) 452 matched = 0; 453 } 454 return matched; 455} 456 |
|
434/* 435 * Return the name of the pattern matching library. 436 */ | 457/* 458 * Return the name of the pattern matching library. 459 */ |
437 public char * 438pattern_lib_name(VOID_PARAM) | 460public char * pattern_lib_name(void) |
439{ 440#if HAVE_GNU_REGEX 441 return ("GNU"); 442#else 443#if HAVE_POSIX_REGCOMP 444 return ("POSIX"); 445#else 446#if HAVE_PCRE2 --- 23 unchanged lines hidden --- | 461{ 462#if HAVE_GNU_REGEX 463 return ("GNU"); 464#else 465#if HAVE_POSIX_REGCOMP 466 return ("POSIX"); 467#else 468#if HAVE_PCRE2 --- 23 unchanged lines hidden --- |