1 /* $NetBSD: util.c,v 1.9 2011/02/27 17:33:37 joerg Exp $ */ 2 /* $FreeBSD$ */ 3 /* $OpenBSD: util.c,v 1.39 2010/07/02 22:18:03 tedu Exp $ */ 4 5 /*- 6 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav 7 * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org> 8 * Copyright (C) 2017 Kyle Evans <kevans@FreeBSD.org> 9 * All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 #include <sys/cdefs.h> 34 __FBSDID("$FreeBSD$"); 35 36 #include <sys/stat.h> 37 #include <sys/types.h> 38 39 #include <ctype.h> 40 #include <err.h> 41 #include <errno.h> 42 #include <fnmatch.h> 43 #include <fts.h> 44 #include <libgen.h> 45 #include <stdbool.h> 46 #include <stdio.h> 47 #include <stdlib.h> 48 #include <string.h> 49 #include <unistd.h> 50 #include <wchar.h> 51 #include <wctype.h> 52 53 #ifndef WITHOUT_FASTMATCH 54 #include "fastmatch.h" 55 #endif 56 #include "grep.h" 57 58 static bool first_match = true; 59 60 /* 61 * Parsing context; used to hold things like matches made and 62 * other useful bits 63 */ 64 struct parsec { 65 regmatch_t matches[MAX_MATCHES]; /* Matches made */ 66 struct str ln; /* Current line */ 67 size_t lnstart; /* Position in line */ 68 size_t matchidx; /* Latest match index */ 69 int printed; /* Metadata printed? */ 70 bool binary; /* Binary file? */ 71 }; 72 73 #ifdef WITH_INTERNAL_NOSPEC 74 static int litexec(const struct pat *pat, const char *string, 75 size_t nmatch, regmatch_t pmatch[]); 76 #endif 77 static int procline(struct parsec *pc); 78 static void printline(struct parsec *pc, int sep); 79 static void printline_metadata(struct str *line, int sep); 80 81 bool 82 file_matching(const char *fname) 83 { 84 char *fname_base, *fname_buf; 85 bool ret; 86 87 ret = finclude ? false : true; 88 fname_buf = strdup(fname); 89 if (fname_buf == NULL) 90 err(2, "strdup"); 91 fname_base = basename(fname_buf); 92 93 for (unsigned int i = 0; i < fpatterns; ++i) { 94 if (fnmatch(fpattern[i].pat, fname, 0) == 0 || 95 fnmatch(fpattern[i].pat, fname_base, 0) == 0) { 96 if (fpattern[i].mode == EXCL_PAT) { 97 ret = false; 98 break; 99 } else 100 ret = true; 101 } 102 } 103 free(fname_buf); 104 return (ret); 105 } 106 107 static inline bool 108 dir_matching(const char *dname) 109 { 110 bool ret; 111 112 ret = dinclude ? false : true; 113 114 for (unsigned int i = 0; i < dpatterns; ++i) { 115 if (dname != NULL && 116 fnmatch(dpattern[i].pat, dname, 0) == 0) { 117 if (dpattern[i].mode == EXCL_PAT) 118 return (false); 119 else 120 ret = true; 121 } 122 } 123 return (ret); 124 } 125 126 /* 127 * Processes a directory when a recursive search is performed with 128 * the -R option. Each appropriate file is passed to procfile(). 129 */ 130 int 131 grep_tree(char **argv) 132 { 133 FTS *fts; 134 FTSENT *p; 135 int c, fts_flags; 136 bool ok; 137 const char *wd[] = { ".", NULL }; 138 139 c = fts_flags = 0; 140 141 switch(linkbehave) { 142 case LINK_EXPLICIT: 143 fts_flags = FTS_COMFOLLOW; 144 break; 145 case LINK_SKIP: 146 fts_flags = FTS_PHYSICAL; 147 break; 148 default: 149 fts_flags = FTS_LOGICAL; 150 151 } 152 153 fts_flags |= FTS_NOSTAT | FTS_NOCHDIR; 154 155 fts = fts_open((argv[0] == NULL) ? 156 __DECONST(char * const *, wd) : argv, fts_flags, NULL); 157 if (fts == NULL) 158 err(2, "fts_open"); 159 while ((p = fts_read(fts)) != NULL) { 160 switch (p->fts_info) { 161 case FTS_DNR: 162 /* FALLTHROUGH */ 163 case FTS_ERR: 164 file_err = true; 165 if(!sflag) 166 warnx("%s: %s", p->fts_path, strerror(p->fts_errno)); 167 break; 168 case FTS_D: 169 /* FALLTHROUGH */ 170 case FTS_DP: 171 if (dexclude || dinclude) 172 if (!dir_matching(p->fts_name) || 173 !dir_matching(p->fts_path)) 174 fts_set(fts, p, FTS_SKIP); 175 break; 176 case FTS_DC: 177 /* Print a warning for recursive directory loop */ 178 warnx("warning: %s: recursive directory loop", 179 p->fts_path); 180 break; 181 default: 182 /* Check for file exclusion/inclusion */ 183 ok = true; 184 if (fexclude || finclude) 185 ok &= file_matching(p->fts_path); 186 187 if (ok) 188 c += procfile(p->fts_path); 189 break; 190 } 191 } 192 193 fts_close(fts); 194 return (c); 195 } 196 197 /* 198 * Opens a file and processes it. Each file is processed line-by-line 199 * passing the lines to procline(). 200 */ 201 int 202 procfile(const char *fn) 203 { 204 struct parsec pc; 205 long long tail; 206 struct file *f; 207 struct stat sb; 208 struct str *ln; 209 mode_t s; 210 int c, last_outed, t; 211 bool doctx, printmatch, same_file; 212 213 if (strcmp(fn, "-") == 0) { 214 fn = label != NULL ? label : getstr(1); 215 f = grep_open(NULL); 216 } else { 217 if (!stat(fn, &sb)) { 218 /* Check if we need to process the file */ 219 s = sb.st_mode & S_IFMT; 220 if (s == S_IFDIR && dirbehave == DIR_SKIP) 221 return (0); 222 if ((s == S_IFIFO || s == S_IFCHR || s == S_IFBLK 223 || s == S_IFSOCK) && devbehave == DEV_SKIP) 224 return (0); 225 } 226 f = grep_open(fn); 227 } 228 if (f == NULL) { 229 file_err = true; 230 if (!sflag) 231 warn("%s", fn); 232 return (0); 233 } 234 235 /* Convenience */ 236 ln = &pc.ln; 237 pc.ln.file = grep_malloc(strlen(fn) + 1); 238 strcpy(pc.ln.file, fn); 239 pc.ln.line_no = 0; 240 pc.ln.len = 0; 241 pc.ln.boff = 0; 242 pc.ln.off = -1; 243 pc.binary = f->binary; 244 pc.printed = 0; 245 tail = 0; 246 last_outed = 0; 247 same_file = false; 248 doctx = false; 249 printmatch = true; 250 if ((pc.binary && binbehave == BINFILE_BIN) || cflag || qflag || 251 lflag || Lflag) 252 printmatch = false; 253 if (printmatch && (Aflag != 0 || Bflag != 0)) 254 doctx = true; 255 mcount = mlimit; 256 257 for (c = 0; c == 0 || !(lflag || qflag); ) { 258 /* Reset per-line statistics */ 259 pc.printed = 0; 260 pc.matchidx = 0; 261 pc.lnstart = 0; 262 pc.ln.boff = 0; 263 pc.ln.off += pc.ln.len + 1; 264 if ((pc.ln.dat = grep_fgetln(f, &pc.ln.len)) == NULL || 265 pc.ln.len == 0) 266 break; 267 268 if (pc.ln.len > 0 && pc.ln.dat[pc.ln.len - 1] == fileeol) 269 --pc.ln.len; 270 pc.ln.line_no++; 271 272 /* Return if we need to skip a binary file */ 273 if (pc.binary && binbehave == BINFILE_SKIP) { 274 grep_close(f); 275 free(pc.ln.file); 276 free(f); 277 return (0); 278 } 279 280 if ((t = procline(&pc)) == 0) 281 ++c; 282 283 /* Deal with any -B context or context separators */ 284 if (t == 0 && doctx) { 285 if (!first_match && (!same_file || last_outed > 0)) 286 printf("--\n"); 287 if (Bflag > 0) 288 printqueue(); 289 tail = Aflag; 290 } 291 /* Print the matching line, but only if not quiet/binary */ 292 if (t == 0 && printmatch) { 293 printline(&pc, ':'); 294 while (pc.matchidx >= MAX_MATCHES) { 295 /* Reset matchidx and try again */ 296 pc.matchidx = 0; 297 if (procline(&pc) == 0) 298 printline(&pc, ':'); 299 else 300 break; 301 } 302 first_match = false; 303 same_file = true; 304 last_outed = 0; 305 } 306 if (t != 0 && doctx) { 307 /* Deal with any -A context */ 308 if (tail > 0) { 309 grep_printline(&pc.ln, '-'); 310 tail--; 311 if (Bflag > 0) 312 clearqueue(); 313 } else { 314 /* 315 * Enqueue non-matching lines for -B context. 316 * If we're not actually doing -B context or if 317 * the enqueue resulted in a line being rotated 318 * out, then go ahead and increment last_outed 319 * to signify a gap between context/match. 320 */ 321 if (Bflag == 0 || (Bflag > 0 && enqueue(ln))) 322 ++last_outed; 323 } 324 } 325 326 /* Count the matches if we have a match limit */ 327 if (t == 0 && mflag) { 328 --mcount; 329 if (mflag && mcount <= 0) 330 break; 331 } 332 333 } 334 if (Bflag > 0) 335 clearqueue(); 336 grep_close(f); 337 338 if (cflag) { 339 if (!hflag) 340 printf("%s:", pc.ln.file); 341 printf("%u\n", c); 342 } 343 if (lflag && !qflag && c != 0) 344 printf("%s%c", fn, nullflag ? 0 : '\n'); 345 if (Lflag && !qflag && c == 0) 346 printf("%s%c", fn, nullflag ? 0 : '\n'); 347 if (c && !cflag && !lflag && !Lflag && 348 binbehave == BINFILE_BIN && f->binary && !qflag) 349 printf(getstr(8), fn); 350 351 free(pc.ln.file); 352 free(f); 353 return (c); 354 } 355 356 #ifdef WITH_INTERNAL_NOSPEC 357 /* 358 * Internal implementation of literal string search within a string, modeled 359 * after regexec(3), for use when the regex(3) implementation doesn't offer 360 * either REG_NOSPEC or REG_LITERAL. This does not apply in the default FreeBSD 361 * config, but in other scenarios such as building against libgnuregex or on 362 * some non-FreeBSD OSes. 363 */ 364 static int 365 litexec(const struct pat *pat, const char *string, size_t nmatch, 366 regmatch_t pmatch[]) 367 { 368 char *(*strstr_fn)(const char *, const char *); 369 char *sub, *subject; 370 const char *search; 371 size_t idx, n, ofs, stringlen; 372 373 if (cflags & REG_ICASE) 374 strstr_fn = strcasestr; 375 else 376 strstr_fn = strstr; 377 idx = 0; 378 ofs = pmatch[0].rm_so; 379 stringlen = pmatch[0].rm_eo; 380 if (ofs >= stringlen) 381 return (REG_NOMATCH); 382 subject = strndup(string, stringlen); 383 if (subject == NULL) 384 return (REG_ESPACE); 385 for (n = 0; ofs < stringlen;) { 386 search = (subject + ofs); 387 if ((unsigned long)pat->len > strlen(search)) 388 break; 389 sub = strstr_fn(search, pat->pat); 390 /* 391 * Ignoring the empty string possibility due to context: grep optimizes 392 * for empty patterns and will never reach this point. 393 */ 394 if (sub == NULL) 395 break; 396 ++n; 397 /* Fill in pmatch if necessary */ 398 if (nmatch > 0) { 399 pmatch[idx].rm_so = ofs + (sub - search); 400 pmatch[idx].rm_eo = pmatch[idx].rm_so + pat->len; 401 if (++idx == nmatch) 402 break; 403 ofs = pmatch[idx].rm_so + 1; 404 } else 405 /* We only needed to know if we match or not */ 406 break; 407 } 408 free(subject); 409 if (n > 0 && nmatch > 0) 410 for (n = idx; n < nmatch; ++n) 411 pmatch[n].rm_so = pmatch[n].rm_eo = -1; 412 413 return (n > 0 ? 0 : REG_NOMATCH); 414 } 415 #endif /* WITH_INTERNAL_NOSPEC */ 416 417 #define iswword(x) (iswalnum((x)) || (x) == L'_') 418 419 /* 420 * Processes a line comparing it with the specified patterns. Each pattern 421 * is looped to be compared along with the full string, saving each and every 422 * match, which is necessary to colorize the output and to count the 423 * matches. The matching lines are passed to printline() to display the 424 * appropriate output. 425 */ 426 static int 427 procline(struct parsec *pc) 428 { 429 regmatch_t pmatch, lastmatch, chkmatch; 430 wchar_t wbegin, wend; 431 size_t st, nst; 432 unsigned int i; 433 int c = 0, r = 0, lastmatches = 0, leflags = eflags; 434 size_t startm = 0, matchidx; 435 unsigned int retry; 436 437 matchidx = pc->matchidx; 438 439 /* Special case: empty pattern with -w flag, check first character */ 440 if (matchall && wflag) { 441 if (pc->ln.len == 0) 442 return (0); 443 wend = L' '; 444 if (sscanf(&pc->ln.dat[0], "%lc", &wend) != 1 || iswword(wend)) 445 return (1); 446 else 447 return (0); 448 } else if (matchall) 449 return (0); 450 451 st = pc->lnstart; 452 nst = 0; 453 /* Initialize to avoid a false positive warning from GCC. */ 454 lastmatch.rm_so = lastmatch.rm_eo = 0; 455 456 /* Loop to process the whole line */ 457 while (st <= pc->ln.len) { 458 lastmatches = 0; 459 startm = matchidx; 460 retry = 0; 461 if (st > 0 && pc->ln.dat[st - 1] != fileeol) 462 leflags |= REG_NOTBOL; 463 /* Loop to compare with all the patterns */ 464 for (i = 0; i < patterns; i++) { 465 pmatch.rm_so = st; 466 pmatch.rm_eo = pc->ln.len; 467 #ifdef WITH_INTERNAL_NOSPEC 468 if (grepbehave == GREP_FIXED) 469 r = litexec(&pattern[i], pc->ln.dat, 1, &pmatch); 470 else 471 #endif 472 #ifndef WITHOUT_FASTMATCH 473 if (fg_pattern[i].pattern) 474 r = fastexec(&fg_pattern[i], 475 pc->ln.dat, 1, &pmatch, leflags); 476 else 477 #endif 478 r = regexec(&r_pattern[i], pc->ln.dat, 1, 479 &pmatch, leflags); 480 if (r != 0) 481 continue; 482 /* Check for full match */ 483 if (xflag && (pmatch.rm_so != 0 || 484 (size_t)pmatch.rm_eo != pc->ln.len)) 485 continue; 486 /* Check for whole word match */ 487 #ifndef WITHOUT_FASTMATCH 488 if (wflag || fg_pattern[i].word) { 489 #else 490 if (wflag) { 491 #endif 492 wbegin = wend = L' '; 493 if (pmatch.rm_so != 0 && 494 sscanf(&pc->ln.dat[pmatch.rm_so - 1], 495 "%lc", &wbegin) != 1) 496 r = REG_NOMATCH; 497 else if ((size_t)pmatch.rm_eo != 498 pc->ln.len && 499 sscanf(&pc->ln.dat[pmatch.rm_eo], 500 "%lc", &wend) != 1) 501 r = REG_NOMATCH; 502 else if (iswword(wbegin) || 503 iswword(wend)) 504 r = REG_NOMATCH; 505 /* 506 * If we're doing whole word matching and we 507 * matched once, then we should try the pattern 508 * again after advancing just past the start of 509 * the earliest match. This allows the pattern 510 * to match later on in the line and possibly 511 * still match a whole word. 512 */ 513 if (r == REG_NOMATCH && 514 (retry == pc->lnstart || 515 (unsigned int)pmatch.rm_so + 1 < retry)) 516 retry = pmatch.rm_so + 1; 517 if (r == REG_NOMATCH) 518 continue; 519 } 520 lastmatches++; 521 lastmatch = pmatch; 522 523 if (matchidx == 0) 524 c++; 525 526 /* 527 * Replace previous match if the new one is earlier 528 * and/or longer. This will lead to some amount of 529 * extra work if -o/--color are specified, but it's 530 * worth it from a correctness point of view. 531 */ 532 if (matchidx > startm) { 533 chkmatch = pc->matches[matchidx - 1]; 534 if (pmatch.rm_so < chkmatch.rm_so || 535 (pmatch.rm_so == chkmatch.rm_so && 536 (pmatch.rm_eo - pmatch.rm_so) > 537 (chkmatch.rm_eo - chkmatch.rm_so))) { 538 pc->matches[matchidx - 1] = pmatch; 539 nst = pmatch.rm_eo; 540 } 541 } else { 542 /* Advance as normal if not */ 543 pc->matches[matchidx++] = pmatch; 544 nst = pmatch.rm_eo; 545 } 546 /* avoid excessive matching - skip further patterns */ 547 if ((color == NULL && !oflag) || qflag || lflag || 548 matchidx >= MAX_MATCHES) { 549 pc->lnstart = nst; 550 lastmatches = 0; 551 break; 552 } 553 } 554 555 /* 556 * Advance to just past the start of the earliest match, try 557 * again just in case we still have a chance to match later in 558 * the string. 559 */ 560 if (lastmatches == 0 && retry > pc->lnstart) { 561 st = retry; 562 continue; 563 } 564 565 /* One pass if we are not recording matches */ 566 if (!wflag && ((color == NULL && !oflag) || qflag || lflag || Lflag)) 567 break; 568 569 /* If we didn't have any matches or REG_NOSUB set */ 570 if (lastmatches == 0 || (cflags & REG_NOSUB)) 571 nst = pc->ln.len; 572 573 if (lastmatches == 0) 574 /* No matches */ 575 break; 576 else if (st == nst && lastmatch.rm_so == lastmatch.rm_eo) 577 /* Zero-length match -- advance one more so we don't get stuck */ 578 nst++; 579 580 /* Advance st based on previous matches */ 581 st = nst; 582 pc->lnstart = st; 583 } 584 585 /* Reflect the new matchidx in the context */ 586 pc->matchidx = matchidx; 587 if (vflag) 588 c = !c; 589 return (c ? 0 : 1); 590 } 591 592 /* 593 * Safe malloc() for internal use. 594 */ 595 void * 596 grep_malloc(size_t size) 597 { 598 void *ptr; 599 600 if ((ptr = malloc(size)) == NULL) 601 err(2, "malloc"); 602 return (ptr); 603 } 604 605 /* 606 * Safe calloc() for internal use. 607 */ 608 void * 609 grep_calloc(size_t nmemb, size_t size) 610 { 611 void *ptr; 612 613 if ((ptr = calloc(nmemb, size)) == NULL) 614 err(2, "calloc"); 615 return (ptr); 616 } 617 618 /* 619 * Safe realloc() for internal use. 620 */ 621 void * 622 grep_realloc(void *ptr, size_t size) 623 { 624 625 if ((ptr = realloc(ptr, size)) == NULL) 626 err(2, "realloc"); 627 return (ptr); 628 } 629 630 /* 631 * Safe strdup() for internal use. 632 */ 633 char * 634 grep_strdup(const char *str) 635 { 636 char *ret; 637 638 if ((ret = strdup(str)) == NULL) 639 err(2, "strdup"); 640 return (ret); 641 } 642 643 /* 644 * Print an entire line as-is, there are no inline matches to consider. This is 645 * used for printing context. 646 */ 647 void grep_printline(struct str *line, int sep) { 648 printline_metadata(line, sep); 649 fwrite(line->dat, line->len, 1, stdout); 650 putchar(fileeol); 651 } 652 653 static void 654 printline_metadata(struct str *line, int sep) 655 { 656 bool printsep; 657 658 printsep = false; 659 if (!hflag) { 660 if (!nullflag) { 661 fputs(line->file, stdout); 662 printsep = true; 663 } else { 664 printf("%s", line->file); 665 putchar(0); 666 } 667 } 668 if (nflag) { 669 if (printsep) 670 putchar(sep); 671 printf("%d", line->line_no); 672 printsep = true; 673 } 674 if (bflag) { 675 if (printsep) 676 putchar(sep); 677 printf("%lld", (long long)(line->off + line->boff)); 678 printsep = true; 679 } 680 if (printsep) 681 putchar(sep); 682 } 683 684 /* 685 * Prints a matching line according to the command line options. 686 */ 687 static void 688 printline(struct parsec *pc, int sep) 689 { 690 size_t a = 0; 691 size_t i, matchidx; 692 regmatch_t match; 693 694 /* If matchall, everything matches but don't actually print for -o */ 695 if (oflag && matchall) 696 return; 697 698 matchidx = pc->matchidx; 699 700 /* --color and -o */ 701 if ((oflag || color) && matchidx > 0) { 702 /* Only print metadata once per line if --color */ 703 if (!oflag && pc->printed == 0) 704 printline_metadata(&pc->ln, sep); 705 for (i = 0; i < matchidx; i++) { 706 match = pc->matches[i]; 707 /* Don't output zero length matches */ 708 if (match.rm_so == match.rm_eo) 709 continue; 710 /* 711 * Metadata is printed on a per-line basis, so every 712 * match gets file metadata with the -o flag. 713 */ 714 if (oflag) { 715 pc->ln.boff = match.rm_so; 716 printline_metadata(&pc->ln, sep); 717 } else 718 fwrite(pc->ln.dat + a, match.rm_so - a, 1, 719 stdout); 720 if (color) 721 fprintf(stdout, "\33[%sm\33[K", color); 722 fwrite(pc->ln.dat + match.rm_so, 723 match.rm_eo - match.rm_so, 1, stdout); 724 if (color) 725 fprintf(stdout, "\33[m\33[K"); 726 a = match.rm_eo; 727 if (oflag) 728 putchar('\n'); 729 } 730 if (!oflag) { 731 if (pc->ln.len - a > 0) 732 fwrite(pc->ln.dat + a, pc->ln.len - a, 1, 733 stdout); 734 putchar('\n'); 735 } 736 } else 737 grep_printline(&pc->ln, sep); 738 pc->printed++; 739 } 740