1 /* $NetBSD: util.c,v 1.9 2011/02/27 17:33:37 joerg Exp $ */ 2 /* $FreeBSD$ */ 3 /* $OpenBSD: util.c,v 1.39 2010/07/02 22:18:03 tedu Exp $ */ 4 5 /*- 6 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 7 * 8 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav 9 * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org> 10 * Copyright (C) 2017 Kyle Evans <kevans@FreeBSD.org> 11 * All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include <sys/stat.h> 39 #include <sys/types.h> 40 41 #include <ctype.h> 42 #include <err.h> 43 #include <errno.h> 44 #include <fnmatch.h> 45 #include <fts.h> 46 #include <libgen.h> 47 #include <stdbool.h> 48 #include <stdio.h> 49 #include <stdlib.h> 50 #include <string.h> 51 #include <unistd.h> 52 #include <wchar.h> 53 #include <wctype.h> 54 55 #ifndef WITHOUT_FASTMATCH 56 #include "fastmatch.h" 57 #endif 58 #include "grep.h" 59 60 static bool first_match = true; 61 62 /* 63 * Parsing context; used to hold things like matches made and 64 * other useful bits 65 */ 66 struct parsec { 67 regmatch_t matches[MAX_MATCHES]; /* Matches made */ 68 struct str ln; /* Current line */ 69 size_t lnstart; /* Position in line */ 70 size_t matchidx; /* Latest match index */ 71 int printed; /* Metadata printed? */ 72 bool binary; /* Binary file? */ 73 }; 74 75 #ifdef WITH_INTERNAL_NOSPEC 76 static int litexec(const struct pat *pat, const char *string, 77 size_t nmatch, regmatch_t pmatch[]); 78 #endif 79 static int procline(struct parsec *pc); 80 static void printline(struct parsec *pc, int sep); 81 static void printline_metadata(struct str *line, int sep); 82 83 bool 84 file_matching(const char *fname) 85 { 86 char *fname_base, *fname_buf; 87 bool ret; 88 89 ret = finclude ? false : true; 90 fname_buf = strdup(fname); 91 if (fname_buf == NULL) 92 err(2, "strdup"); 93 fname_base = basename(fname_buf); 94 95 for (unsigned int i = 0; i < fpatterns; ++i) { 96 if (fnmatch(fpattern[i].pat, fname, 0) == 0 || 97 fnmatch(fpattern[i].pat, fname_base, 0) == 0) { 98 if (fpattern[i].mode == EXCL_PAT) { 99 ret = false; 100 break; 101 } else 102 ret = true; 103 } 104 } 105 free(fname_buf); 106 return (ret); 107 } 108 109 static inline bool 110 dir_matching(const char *dname) 111 { 112 bool ret; 113 114 ret = dinclude ? false : true; 115 116 for (unsigned int i = 0; i < dpatterns; ++i) { 117 if (dname != NULL && 118 fnmatch(dpattern[i].pat, dname, 0) == 0) { 119 if (dpattern[i].mode == EXCL_PAT) 120 return (false); 121 else 122 ret = true; 123 } 124 } 125 return (ret); 126 } 127 128 /* 129 * Processes a directory when a recursive search is performed with 130 * the -R option. Each appropriate file is passed to procfile(). 131 */ 132 int 133 grep_tree(char **argv) 134 { 135 FTS *fts; 136 FTSENT *p; 137 int c, fts_flags; 138 bool ok; 139 const char *wd[] = { ".", NULL }; 140 141 c = fts_flags = 0; 142 143 switch(linkbehave) { 144 case LINK_EXPLICIT: 145 fts_flags = FTS_COMFOLLOW; 146 break; 147 case LINK_SKIP: 148 fts_flags = FTS_PHYSICAL; 149 break; 150 default: 151 fts_flags = FTS_LOGICAL; 152 153 } 154 155 fts_flags |= FTS_NOSTAT | FTS_NOCHDIR; 156 157 fts = fts_open((argv[0] == NULL) ? 158 __DECONST(char * const *, wd) : argv, fts_flags, NULL); 159 if (fts == NULL) 160 err(2, "fts_open"); 161 while ((p = fts_read(fts)) != NULL) { 162 switch (p->fts_info) { 163 case FTS_DNR: 164 /* FALLTHROUGH */ 165 case FTS_ERR: 166 file_err = true; 167 if(!sflag) 168 warnx("%s: %s", p->fts_path, strerror(p->fts_errno)); 169 break; 170 case FTS_D: 171 /* FALLTHROUGH */ 172 case FTS_DP: 173 if (dexclude || dinclude) 174 if (!dir_matching(p->fts_name) || 175 !dir_matching(p->fts_path)) 176 fts_set(fts, p, FTS_SKIP); 177 break; 178 case FTS_DC: 179 /* Print a warning for recursive directory loop */ 180 warnx("warning: %s: recursive directory loop", 181 p->fts_path); 182 break; 183 default: 184 /* Check for file exclusion/inclusion */ 185 ok = true; 186 if (fexclude || finclude) 187 ok &= file_matching(p->fts_path); 188 189 if (ok) 190 c += procfile(p->fts_path); 191 break; 192 } 193 } 194 195 fts_close(fts); 196 return (c); 197 } 198 199 /* 200 * Opens a file and processes it. Each file is processed line-by-line 201 * passing the lines to procline(). 202 */ 203 int 204 procfile(const char *fn) 205 { 206 struct parsec pc; 207 long long tail; 208 struct file *f; 209 struct stat sb; 210 struct str *ln; 211 mode_t s; 212 int c, last_outed, t; 213 bool doctx, printmatch, same_file; 214 215 if (strcmp(fn, "-") == 0) { 216 fn = label != NULL ? label : getstr(1); 217 f = grep_open(NULL); 218 } else { 219 if (!stat(fn, &sb)) { 220 /* Check if we need to process the file */ 221 s = sb.st_mode & S_IFMT; 222 if (s == S_IFDIR && dirbehave == DIR_SKIP) 223 return (0); 224 if ((s == S_IFIFO || s == S_IFCHR || s == S_IFBLK 225 || s == S_IFSOCK) && devbehave == DEV_SKIP) 226 return (0); 227 } 228 f = grep_open(fn); 229 } 230 if (f == NULL) { 231 file_err = true; 232 if (!sflag) 233 warn("%s", fn); 234 return (0); 235 } 236 237 /* Convenience */ 238 ln = &pc.ln; 239 pc.ln.file = grep_malloc(strlen(fn) + 1); 240 strcpy(pc.ln.file, fn); 241 pc.ln.line_no = 0; 242 pc.ln.len = 0; 243 pc.ln.boff = 0; 244 pc.ln.off = -1; 245 pc.binary = f->binary; 246 pc.printed = 0; 247 tail = 0; 248 last_outed = 0; 249 same_file = false; 250 doctx = false; 251 printmatch = true; 252 if ((pc.binary && binbehave == BINFILE_BIN) || cflag || qflag || 253 lflag || Lflag) 254 printmatch = false; 255 if (printmatch && (Aflag != 0 || Bflag != 0)) 256 doctx = true; 257 mcount = mlimit; 258 259 for (c = 0; c == 0 || !(lflag || qflag); ) { 260 /* Reset per-line statistics */ 261 pc.printed = 0; 262 pc.matchidx = 0; 263 pc.lnstart = 0; 264 pc.ln.boff = 0; 265 pc.ln.off += pc.ln.len + 1; 266 if ((pc.ln.dat = grep_fgetln(f, &pc.ln.len)) == NULL || 267 pc.ln.len == 0) 268 break; 269 270 if (pc.ln.len > 0 && pc.ln.dat[pc.ln.len - 1] == fileeol) 271 --pc.ln.len; 272 pc.ln.line_no++; 273 274 /* Return if we need to skip a binary file */ 275 if (pc.binary && binbehave == BINFILE_SKIP) { 276 grep_close(f); 277 free(pc.ln.file); 278 free(f); 279 return (0); 280 } 281 282 if ((t = procline(&pc)) == 0) 283 ++c; 284 285 /* Deal with any -B context or context separators */ 286 if (t == 0 && doctx) { 287 if (!first_match && (!same_file || last_outed > 0)) 288 printf("--\n"); 289 if (Bflag > 0) 290 printqueue(); 291 tail = Aflag; 292 } 293 /* Print the matching line, but only if not quiet/binary */ 294 if (t == 0 && printmatch) { 295 printline(&pc, ':'); 296 while (pc.matchidx >= MAX_MATCHES) { 297 /* Reset matchidx and try again */ 298 pc.matchidx = 0; 299 if (procline(&pc) == 0) 300 printline(&pc, ':'); 301 else 302 break; 303 } 304 first_match = false; 305 same_file = true; 306 last_outed = 0; 307 } 308 if (t != 0 && doctx) { 309 /* Deal with any -A context */ 310 if (tail > 0) { 311 grep_printline(&pc.ln, '-'); 312 tail--; 313 if (Bflag > 0) 314 clearqueue(); 315 } else { 316 /* 317 * Enqueue non-matching lines for -B context. 318 * If we're not actually doing -B context or if 319 * the enqueue resulted in a line being rotated 320 * out, then go ahead and increment last_outed 321 * to signify a gap between context/match. 322 */ 323 if (Bflag == 0 || (Bflag > 0 && enqueue(ln))) 324 ++last_outed; 325 } 326 } 327 328 /* Count the matches if we have a match limit */ 329 if (t == 0 && mflag) { 330 --mcount; 331 if (mflag && mcount <= 0) 332 break; 333 } 334 335 } 336 if (Bflag > 0) 337 clearqueue(); 338 grep_close(f); 339 340 if (cflag) { 341 if (!hflag) 342 printf("%s:", pc.ln.file); 343 printf("%u\n", c); 344 } 345 if (lflag && !qflag && c != 0) 346 printf("%s%c", fn, nullflag ? 0 : '\n'); 347 if (Lflag && !qflag && c == 0) 348 printf("%s%c", fn, nullflag ? 0 : '\n'); 349 if (c && !cflag && !lflag && !Lflag && 350 binbehave == BINFILE_BIN && f->binary && !qflag) 351 printf(getstr(8), fn); 352 353 free(pc.ln.file); 354 free(f); 355 return (c); 356 } 357 358 #ifdef WITH_INTERNAL_NOSPEC 359 /* 360 * Internal implementation of literal string search within a string, modeled 361 * after regexec(3), for use when the regex(3) implementation doesn't offer 362 * either REG_NOSPEC or REG_LITERAL. This does not apply in the default FreeBSD 363 * config, but in other scenarios such as building against libgnuregex or on 364 * some non-FreeBSD OSes. 365 */ 366 static int 367 litexec(const struct pat *pat, const char *string, size_t nmatch, 368 regmatch_t pmatch[]) 369 { 370 char *(*strstr_fn)(const char *, const char *); 371 char *sub, *subject; 372 const char *search; 373 size_t idx, n, ofs, stringlen; 374 375 if (cflags & REG_ICASE) 376 strstr_fn = strcasestr; 377 else 378 strstr_fn = strstr; 379 idx = 0; 380 ofs = pmatch[0].rm_so; 381 stringlen = pmatch[0].rm_eo; 382 if (ofs >= stringlen) 383 return (REG_NOMATCH); 384 subject = strndup(string, stringlen); 385 if (subject == NULL) 386 return (REG_ESPACE); 387 for (n = 0; ofs < stringlen;) { 388 search = (subject + ofs); 389 if ((unsigned long)pat->len > strlen(search)) 390 break; 391 sub = strstr_fn(search, pat->pat); 392 /* 393 * Ignoring the empty string possibility due to context: grep optimizes 394 * for empty patterns and will never reach this point. 395 */ 396 if (sub == NULL) 397 break; 398 ++n; 399 /* Fill in pmatch if necessary */ 400 if (nmatch > 0) { 401 pmatch[idx].rm_so = ofs + (sub - search); 402 pmatch[idx].rm_eo = pmatch[idx].rm_so + pat->len; 403 if (++idx == nmatch) 404 break; 405 ofs = pmatch[idx].rm_so + 1; 406 } else 407 /* We only needed to know if we match or not */ 408 break; 409 } 410 free(subject); 411 if (n > 0 && nmatch > 0) 412 for (n = idx; n < nmatch; ++n) 413 pmatch[n].rm_so = pmatch[n].rm_eo = -1; 414 415 return (n > 0 ? 0 : REG_NOMATCH); 416 } 417 #endif /* WITH_INTERNAL_NOSPEC */ 418 419 #define iswword(x) (iswalnum((x)) || (x) == L'_') 420 421 /* 422 * Processes a line comparing it with the specified patterns. Each pattern 423 * is looped to be compared along with the full string, saving each and every 424 * match, which is necessary to colorize the output and to count the 425 * matches. The matching lines are passed to printline() to display the 426 * appropriate output. 427 */ 428 static int 429 procline(struct parsec *pc) 430 { 431 regmatch_t pmatch, lastmatch, chkmatch; 432 wchar_t wbegin, wend; 433 size_t st, nst; 434 unsigned int i; 435 int c = 0, r = 0, lastmatches = 0, leflags = eflags; 436 size_t startm = 0, matchidx; 437 unsigned int retry; 438 439 matchidx = pc->matchidx; 440 441 /* Special case: empty pattern with -w flag, check first character */ 442 if (matchall && wflag) { 443 if (pc->ln.len == 0) 444 return (0); 445 wend = L' '; 446 if (sscanf(&pc->ln.dat[0], "%lc", &wend) != 1 || iswword(wend)) 447 return (1); 448 else 449 return (0); 450 } else if (matchall) 451 return (0); 452 453 st = pc->lnstart; 454 nst = 0; 455 /* Initialize to avoid a false positive warning from GCC. */ 456 lastmatch.rm_so = lastmatch.rm_eo = 0; 457 458 /* Loop to process the whole line */ 459 while (st <= pc->ln.len) { 460 lastmatches = 0; 461 startm = matchidx; 462 retry = 0; 463 if (st > 0 && pc->ln.dat[st - 1] != fileeol) 464 leflags |= REG_NOTBOL; 465 /* Loop to compare with all the patterns */ 466 for (i = 0; i < patterns; i++) { 467 pmatch.rm_so = st; 468 pmatch.rm_eo = pc->ln.len; 469 #ifdef WITH_INTERNAL_NOSPEC 470 if (grepbehave == GREP_FIXED) 471 r = litexec(&pattern[i], pc->ln.dat, 1, &pmatch); 472 else 473 #endif 474 #ifndef WITHOUT_FASTMATCH 475 if (fg_pattern[i].pattern) 476 r = fastexec(&fg_pattern[i], 477 pc->ln.dat, 1, &pmatch, leflags); 478 else 479 #endif 480 r = regexec(&r_pattern[i], pc->ln.dat, 1, 481 &pmatch, leflags); 482 if (r != 0) 483 continue; 484 /* Check for full match */ 485 if (xflag && (pmatch.rm_so != 0 || 486 (size_t)pmatch.rm_eo != pc->ln.len)) 487 continue; 488 /* Check for whole word match */ 489 #ifndef WITHOUT_FASTMATCH 490 if (wflag || fg_pattern[i].word) { 491 #else 492 if (wflag) { 493 #endif 494 wbegin = wend = L' '; 495 if (pmatch.rm_so != 0 && 496 sscanf(&pc->ln.dat[pmatch.rm_so - 1], 497 "%lc", &wbegin) != 1) 498 r = REG_NOMATCH; 499 else if ((size_t)pmatch.rm_eo != 500 pc->ln.len && 501 sscanf(&pc->ln.dat[pmatch.rm_eo], 502 "%lc", &wend) != 1) 503 r = REG_NOMATCH; 504 else if (iswword(wbegin) || 505 iswword(wend)) 506 r = REG_NOMATCH; 507 /* 508 * If we're doing whole word matching and we 509 * matched once, then we should try the pattern 510 * again after advancing just past the start of 511 * the earliest match. This allows the pattern 512 * to match later on in the line and possibly 513 * still match a whole word. 514 */ 515 if (r == REG_NOMATCH && 516 (retry == pc->lnstart || 517 (unsigned int)pmatch.rm_so + 1 < retry)) 518 retry = pmatch.rm_so + 1; 519 if (r == REG_NOMATCH) 520 continue; 521 } 522 lastmatches++; 523 lastmatch = pmatch; 524 525 if (matchidx == 0) 526 c++; 527 528 /* 529 * Replace previous match if the new one is earlier 530 * and/or longer. This will lead to some amount of 531 * extra work if -o/--color are specified, but it's 532 * worth it from a correctness point of view. 533 */ 534 if (matchidx > startm) { 535 chkmatch = pc->matches[matchidx - 1]; 536 if (pmatch.rm_so < chkmatch.rm_so || 537 (pmatch.rm_so == chkmatch.rm_so && 538 (pmatch.rm_eo - pmatch.rm_so) > 539 (chkmatch.rm_eo - chkmatch.rm_so))) { 540 pc->matches[matchidx - 1] = pmatch; 541 nst = pmatch.rm_eo; 542 } 543 } else { 544 /* Advance as normal if not */ 545 pc->matches[matchidx++] = pmatch; 546 nst = pmatch.rm_eo; 547 } 548 /* avoid excessive matching - skip further patterns */ 549 if ((color == NULL && !oflag) || qflag || lflag || 550 matchidx >= MAX_MATCHES) { 551 pc->lnstart = nst; 552 lastmatches = 0; 553 break; 554 } 555 } 556 557 /* 558 * Advance to just past the start of the earliest match, try 559 * again just in case we still have a chance to match later in 560 * the string. 561 */ 562 if (lastmatches == 0 && retry > pc->lnstart) { 563 st = retry; 564 continue; 565 } 566 567 /* One pass if we are not recording matches */ 568 if (!wflag && ((color == NULL && !oflag) || qflag || lflag || Lflag)) 569 break; 570 571 /* If we didn't have any matches or REG_NOSUB set */ 572 if (lastmatches == 0 || (cflags & REG_NOSUB)) 573 nst = pc->ln.len; 574 575 if (lastmatches == 0) 576 /* No matches */ 577 break; 578 else if (st == nst && lastmatch.rm_so == lastmatch.rm_eo) 579 /* Zero-length match -- advance one more so we don't get stuck */ 580 nst++; 581 582 /* Advance st based on previous matches */ 583 st = nst; 584 pc->lnstart = st; 585 } 586 587 /* Reflect the new matchidx in the context */ 588 pc->matchidx = matchidx; 589 if (vflag) 590 c = !c; 591 return (c ? 0 : 1); 592 } 593 594 /* 595 * Safe malloc() for internal use. 596 */ 597 void * 598 grep_malloc(size_t size) 599 { 600 void *ptr; 601 602 if ((ptr = malloc(size)) == NULL) 603 err(2, "malloc"); 604 return (ptr); 605 } 606 607 /* 608 * Safe calloc() for internal use. 609 */ 610 void * 611 grep_calloc(size_t nmemb, size_t size) 612 { 613 void *ptr; 614 615 if ((ptr = calloc(nmemb, size)) == NULL) 616 err(2, "calloc"); 617 return (ptr); 618 } 619 620 /* 621 * Safe realloc() for internal use. 622 */ 623 void * 624 grep_realloc(void *ptr, size_t size) 625 { 626 627 if ((ptr = realloc(ptr, size)) == NULL) 628 err(2, "realloc"); 629 return (ptr); 630 } 631 632 /* 633 * Safe strdup() for internal use. 634 */ 635 char * 636 grep_strdup(const char *str) 637 { 638 char *ret; 639 640 if ((ret = strdup(str)) == NULL) 641 err(2, "strdup"); 642 return (ret); 643 } 644 645 /* 646 * Print an entire line as-is, there are no inline matches to consider. This is 647 * used for printing context. 648 */ 649 void grep_printline(struct str *line, int sep) { 650 printline_metadata(line, sep); 651 fwrite(line->dat, line->len, 1, stdout); 652 putchar(fileeol); 653 } 654 655 static void 656 printline_metadata(struct str *line, int sep) 657 { 658 bool printsep; 659 660 printsep = false; 661 if (!hflag) { 662 if (!nullflag) { 663 fputs(line->file, stdout); 664 printsep = true; 665 } else { 666 printf("%s", line->file); 667 putchar(0); 668 } 669 } 670 if (nflag) { 671 if (printsep) 672 putchar(sep); 673 printf("%d", line->line_no); 674 printsep = true; 675 } 676 if (bflag) { 677 if (printsep) 678 putchar(sep); 679 printf("%lld", (long long)(line->off + line->boff)); 680 printsep = true; 681 } 682 if (printsep) 683 putchar(sep); 684 } 685 686 /* 687 * Prints a matching line according to the command line options. 688 */ 689 static void 690 printline(struct parsec *pc, int sep) 691 { 692 size_t a = 0; 693 size_t i, matchidx; 694 regmatch_t match; 695 696 /* If matchall, everything matches but don't actually print for -o */ 697 if (oflag && matchall) 698 return; 699 700 matchidx = pc->matchidx; 701 702 /* --color and -o */ 703 if ((oflag || color) && matchidx > 0) { 704 /* Only print metadata once per line if --color */ 705 if (!oflag && pc->printed == 0) 706 printline_metadata(&pc->ln, sep); 707 for (i = 0; i < matchidx; i++) { 708 match = pc->matches[i]; 709 /* Don't output zero length matches */ 710 if (match.rm_so == match.rm_eo) 711 continue; 712 /* 713 * Metadata is printed on a per-line basis, so every 714 * match gets file metadata with the -o flag. 715 */ 716 if (oflag) { 717 pc->ln.boff = match.rm_so; 718 printline_metadata(&pc->ln, sep); 719 } else 720 fwrite(pc->ln.dat + a, match.rm_so - a, 1, 721 stdout); 722 if (color) 723 fprintf(stdout, "\33[%sm\33[K", color); 724 fwrite(pc->ln.dat + match.rm_so, 725 match.rm_eo - match.rm_so, 1, stdout); 726 if (color) 727 fprintf(stdout, "\33[m\33[K"); 728 a = match.rm_eo; 729 if (oflag) 730 putchar('\n'); 731 } 732 if (!oflag) { 733 if (pc->ln.len - a > 0) 734 fwrite(pc->ln.dat + a, pc->ln.len - a, 1, 735 stdout); 736 putchar('\n'); 737 } 738 } else 739 grep_printline(&pc->ln, sep); 740 pc->printed++; 741 } 742