1 /* $NetBSD: util.c,v 1.9 2011/02/27 17:33:37 joerg Exp $ */ 2 /* $FreeBSD$ */ 3 /* $OpenBSD: util.c,v 1.39 2010/07/02 22:18:03 tedu Exp $ */ 4 5 /*- 6 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav 7 * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org> 8 * Copyright (C) 2017 Kyle Evans <kevans@FreeBSD.org> 9 * All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 #include <sys/cdefs.h> 34 __FBSDID("$FreeBSD$"); 35 36 #include <sys/stat.h> 37 #include <sys/types.h> 38 39 #include <ctype.h> 40 #include <err.h> 41 #include <errno.h> 42 #include <fnmatch.h> 43 #include <fts.h> 44 #include <libgen.h> 45 #include <stdbool.h> 46 #include <stdio.h> 47 #include <stdlib.h> 48 #include <string.h> 49 #include <unistd.h> 50 #include <wchar.h> 51 #include <wctype.h> 52 53 #ifndef WITHOUT_FASTMATCH 54 #include "fastmatch.h" 55 #endif 56 #include "grep.h" 57 58 static bool first_match = true; 59 60 /* 61 * Parsing context; used to hold things like matches made and 62 * other useful bits 63 */ 64 struct parsec { 65 regmatch_t matches[MAX_MATCHES]; /* Matches made */ 66 struct str ln; /* Current line */ 67 size_t lnstart; /* Position in line */ 68 size_t matchidx; /* Latest match index */ 69 int printed; /* Metadata printed? */ 70 bool binary; /* Binary file? */ 71 }; 72 73 74 static int procline(struct parsec *pc); 75 static void printline(struct parsec *pc, int sep); 76 static void printline_metadata(struct str *line, int sep); 77 78 bool 79 file_matching(const char *fname) 80 { 81 char *fname_base, *fname_buf; 82 bool ret; 83 84 ret = finclude ? false : true; 85 fname_buf = strdup(fname); 86 if (fname_buf == NULL) 87 err(2, "strdup"); 88 fname_base = basename(fname_buf); 89 90 for (unsigned int i = 0; i < fpatterns; ++i) { 91 if (fnmatch(fpattern[i].pat, fname, 0) == 0 || 92 fnmatch(fpattern[i].pat, fname_base, 0) == 0) { 93 if (fpattern[i].mode == EXCL_PAT) { 94 ret = false; 95 break; 96 } else 97 ret = true; 98 } 99 } 100 free(fname_buf); 101 return (ret); 102 } 103 104 static inline bool 105 dir_matching(const char *dname) 106 { 107 bool ret; 108 109 ret = dinclude ? false : true; 110 111 for (unsigned int i = 0; i < dpatterns; ++i) { 112 if (dname != NULL && 113 fnmatch(dpattern[i].pat, dname, 0) == 0) { 114 if (dpattern[i].mode == EXCL_PAT) 115 return (false); 116 else 117 ret = true; 118 } 119 } 120 return (ret); 121 } 122 123 /* 124 * Processes a directory when a recursive search is performed with 125 * the -R option. Each appropriate file is passed to procfile(). 126 */ 127 int 128 grep_tree(char **argv) 129 { 130 FTS *fts; 131 FTSENT *p; 132 int c, fts_flags; 133 bool ok; 134 const char *wd[] = { ".", NULL }; 135 136 c = fts_flags = 0; 137 138 switch(linkbehave) { 139 case LINK_EXPLICIT: 140 fts_flags = FTS_COMFOLLOW; 141 break; 142 case LINK_SKIP: 143 fts_flags = FTS_PHYSICAL; 144 break; 145 default: 146 fts_flags = FTS_LOGICAL; 147 148 } 149 150 fts_flags |= FTS_NOSTAT | FTS_NOCHDIR; 151 152 fts = fts_open((argv[0] == NULL) ? 153 __DECONST(char * const *, wd) : argv, fts_flags, NULL); 154 if (fts == NULL) 155 err(2, "fts_open"); 156 while ((p = fts_read(fts)) != NULL) { 157 switch (p->fts_info) { 158 case FTS_DNR: 159 /* FALLTHROUGH */ 160 case FTS_ERR: 161 file_err = true; 162 if(!sflag) 163 warnx("%s: %s", p->fts_path, strerror(p->fts_errno)); 164 break; 165 case FTS_D: 166 /* FALLTHROUGH */ 167 case FTS_DP: 168 if (dexclude || dinclude) 169 if (!dir_matching(p->fts_name) || 170 !dir_matching(p->fts_path)) 171 fts_set(fts, p, FTS_SKIP); 172 break; 173 case FTS_DC: 174 /* Print a warning for recursive directory loop */ 175 warnx("warning: %s: recursive directory loop", 176 p->fts_path); 177 break; 178 default: 179 /* Check for file exclusion/inclusion */ 180 ok = true; 181 if (fexclude || finclude) 182 ok &= file_matching(p->fts_path); 183 184 if (ok) 185 c += procfile(p->fts_path); 186 break; 187 } 188 } 189 190 fts_close(fts); 191 return (c); 192 } 193 194 /* 195 * Opens a file and processes it. Each file is processed line-by-line 196 * passing the lines to procline(). 197 */ 198 int 199 procfile(const char *fn) 200 { 201 struct parsec pc; 202 long long tail; 203 struct file *f; 204 struct stat sb; 205 struct str *ln; 206 mode_t s; 207 int c, last_outed, t; 208 bool doctx, printmatch, same_file; 209 210 if (strcmp(fn, "-") == 0) { 211 fn = label != NULL ? label : getstr(1); 212 f = grep_open(NULL); 213 } else { 214 if (!stat(fn, &sb)) { 215 /* Check if we need to process the file */ 216 s = sb.st_mode & S_IFMT; 217 if (s == S_IFDIR && dirbehave == DIR_SKIP) 218 return (0); 219 if ((s == S_IFIFO || s == S_IFCHR || s == S_IFBLK 220 || s == S_IFSOCK) && devbehave == DEV_SKIP) 221 return (0); 222 } 223 f = grep_open(fn); 224 } 225 if (f == NULL) { 226 file_err = true; 227 if (!sflag) 228 warn("%s", fn); 229 return (0); 230 } 231 232 /* Convenience */ 233 ln = &pc.ln; 234 pc.ln.file = grep_malloc(strlen(fn) + 1); 235 strcpy(pc.ln.file, fn); 236 pc.ln.line_no = 0; 237 pc.ln.len = 0; 238 pc.ln.boff = 0; 239 pc.ln.off = -1; 240 pc.binary = f->binary; 241 pc.printed = 0; 242 tail = 0; 243 last_outed = 0; 244 same_file = false; 245 doctx = false; 246 printmatch = true; 247 if ((pc.binary && binbehave == BINFILE_BIN) || cflag || qflag || 248 lflag || Lflag) 249 printmatch = false; 250 if (printmatch && (Aflag != 0 || Bflag != 0)) 251 doctx = true; 252 mcount = mlimit; 253 254 for (c = 0; c == 0 || !(lflag || qflag); ) { 255 /* Reset per-line statistics */ 256 pc.printed = 0; 257 pc.matchidx = 0; 258 pc.lnstart = 0; 259 pc.ln.boff = 0; 260 pc.ln.off += pc.ln.len + 1; 261 if ((pc.ln.dat = grep_fgetln(f, &pc.ln.len)) == NULL || 262 pc.ln.len == 0) { 263 if (pc.ln.line_no == 0 && matchall) 264 /* 265 * An empty file with an empty pattern and the 266 * -w flag does not match 267 */ 268 exit(matchall && wflag ? 1 : 0); 269 else 270 break; 271 } 272 273 if (pc.ln.len > 0 && pc.ln.dat[pc.ln.len - 1] == fileeol) 274 --pc.ln.len; 275 pc.ln.line_no++; 276 277 /* Return if we need to skip a binary file */ 278 if (pc.binary && binbehave == BINFILE_SKIP) { 279 grep_close(f); 280 free(pc.ln.file); 281 free(f); 282 return (0); 283 } 284 285 if ((t = procline(&pc)) == 0) 286 ++c; 287 288 /* Deal with any -B context or context separators */ 289 if (t == 0 && doctx) { 290 if (!first_match && (!same_file || last_outed > 0)) 291 printf("--\n"); 292 if (Bflag > 0) 293 printqueue(); 294 tail = Aflag; 295 } 296 /* Print the matching line, but only if not quiet/binary */ 297 if (t == 0 && printmatch) { 298 printline(&pc, ':'); 299 while (pc.matchidx >= MAX_MATCHES) { 300 /* Reset matchidx and try again */ 301 pc.matchidx = 0; 302 if (procline(&pc) == 0) 303 printline(&pc, ':'); 304 else 305 break; 306 } 307 first_match = false; 308 same_file = true; 309 last_outed = 0; 310 } 311 if (t != 0 && doctx) { 312 /* Deal with any -A context */ 313 if (tail > 0) { 314 grep_printline(&pc.ln, '-'); 315 tail--; 316 if (Bflag > 0) 317 clearqueue(); 318 } else { 319 /* 320 * Enqueue non-matching lines for -B context. 321 * If we're not actually doing -B context or if 322 * the enqueue resulted in a line being rotated 323 * out, then go ahead and increment last_outed 324 * to signify a gap between context/match. 325 */ 326 if (Bflag == 0 || (Bflag > 0 && enqueue(ln))) 327 ++last_outed; 328 } 329 } 330 331 /* Count the matches if we have a match limit */ 332 if (t == 0 && mflag) { 333 --mcount; 334 if (mflag && mcount <= 0) 335 break; 336 } 337 338 } 339 if (Bflag > 0) 340 clearqueue(); 341 grep_close(f); 342 343 if (cflag) { 344 if (!hflag) 345 printf("%s:", pc.ln.file); 346 printf("%u\n", c); 347 } 348 if (lflag && !qflag && c != 0) 349 printf("%s%c", fn, nullflag ? 0 : '\n'); 350 if (Lflag && !qflag && c == 0) 351 printf("%s%c", fn, nullflag ? 0 : '\n'); 352 if (c && !cflag && !lflag && !Lflag && 353 binbehave == BINFILE_BIN && f->binary && !qflag) 354 printf(getstr(8), fn); 355 356 free(pc.ln.file); 357 free(f); 358 return (c); 359 } 360 361 #define iswword(x) (iswalnum((x)) || (x) == L'_') 362 363 /* 364 * Processes a line comparing it with the specified patterns. Each pattern 365 * is looped to be compared along with the full string, saving each and every 366 * match, which is necessary to colorize the output and to count the 367 * matches. The matching lines are passed to printline() to display the 368 * appropriate output. 369 */ 370 static int 371 procline(struct parsec *pc) 372 { 373 regmatch_t pmatch, lastmatch, chkmatch; 374 wchar_t wbegin, wend; 375 size_t st, nst; 376 unsigned int i; 377 int c = 0, r = 0, lastmatches = 0, leflags = eflags; 378 size_t startm = 0, matchidx; 379 unsigned int retry; 380 381 matchidx = pc->matchidx; 382 383 /* Special case: empty pattern with -w flag, check first character */ 384 if (matchall && wflag) { 385 if (pc->ln.len == 0) 386 return (0); 387 wend = L' '; 388 if (sscanf(&pc->ln.dat[0], "%lc", &wend) != 1 || iswword(wend)) 389 return (1); 390 else 391 return (0); 392 } else if (matchall) 393 return (0); 394 395 st = pc->lnstart; 396 nst = 0; 397 /* Initialize to avoid a false positive warning from GCC. */ 398 lastmatch.rm_so = lastmatch.rm_eo = 0; 399 400 /* Loop to process the whole line */ 401 while (st <= pc->ln.len) { 402 lastmatches = 0; 403 startm = matchidx; 404 retry = 0; 405 if (st > 0 && pc->ln.dat[st - 1] != fileeol) 406 leflags |= REG_NOTBOL; 407 /* Loop to compare with all the patterns */ 408 for (i = 0; i < patterns; i++) { 409 pmatch.rm_so = st; 410 pmatch.rm_eo = pc->ln.len; 411 #ifndef WITHOUT_FASTMATCH 412 if (fg_pattern[i].pattern) 413 r = fastexec(&fg_pattern[i], 414 pc->ln.dat, 1, &pmatch, leflags); 415 else 416 #endif 417 r = regexec(&r_pattern[i], pc->ln.dat, 1, 418 &pmatch, leflags); 419 if (r != 0) 420 continue; 421 /* Check for full match */ 422 if (xflag && (pmatch.rm_so != 0 || 423 (size_t)pmatch.rm_eo != pc->ln.len)) 424 continue; 425 /* Check for whole word match */ 426 #ifndef WITHOUT_FASTMATCH 427 if (wflag || fg_pattern[i].word) { 428 #else 429 if (wflag) { 430 #endif 431 wbegin = wend = L' '; 432 if (pmatch.rm_so != 0 && 433 sscanf(&pc->ln.dat[pmatch.rm_so - 1], 434 "%lc", &wbegin) != 1) 435 r = REG_NOMATCH; 436 else if ((size_t)pmatch.rm_eo != 437 pc->ln.len && 438 sscanf(&pc->ln.dat[pmatch.rm_eo], 439 "%lc", &wend) != 1) 440 r = REG_NOMATCH; 441 else if (iswword(wbegin) || 442 iswword(wend)) 443 r = REG_NOMATCH; 444 /* 445 * If we're doing whole word matching and we 446 * matched once, then we should try the pattern 447 * again after advancing just past the start of 448 * the earliest match. This allows the pattern 449 * to match later on in the line and possibly 450 * still match a whole word. 451 */ 452 if (r == REG_NOMATCH && 453 (retry == pc->lnstart || 454 pmatch.rm_so + 1 < retry)) 455 retry = pmatch.rm_so + 1; 456 if (r == REG_NOMATCH) 457 continue; 458 } 459 lastmatches++; 460 lastmatch = pmatch; 461 462 if (matchidx == 0) 463 c++; 464 465 /* 466 * Replace previous match if the new one is earlier 467 * and/or longer. This will lead to some amount of 468 * extra work if -o/--color are specified, but it's 469 * worth it from a correctness point of view. 470 */ 471 if (matchidx > startm) { 472 chkmatch = pc->matches[matchidx - 1]; 473 if (pmatch.rm_so < chkmatch.rm_so || 474 (pmatch.rm_so == chkmatch.rm_so && 475 (pmatch.rm_eo - pmatch.rm_so) > 476 (chkmatch.rm_eo - chkmatch.rm_so))) { 477 pc->matches[matchidx - 1] = pmatch; 478 nst = pmatch.rm_eo; 479 } 480 } else { 481 /* Advance as normal if not */ 482 pc->matches[matchidx++] = pmatch; 483 nst = pmatch.rm_eo; 484 } 485 /* avoid excessive matching - skip further patterns */ 486 if ((color == NULL && !oflag) || qflag || lflag || 487 matchidx >= MAX_MATCHES) { 488 pc->lnstart = nst; 489 lastmatches = 0; 490 break; 491 } 492 } 493 494 /* 495 * Advance to just past the start of the earliest match, try 496 * again just in case we still have a chance to match later in 497 * the string. 498 */ 499 if (lastmatches == 0 && retry > pc->lnstart) { 500 st = retry; 501 continue; 502 } 503 504 /* One pass if we are not recording matches */ 505 if (!wflag && ((color == NULL && !oflag) || qflag || lflag || Lflag)) 506 break; 507 508 /* If we didn't have any matches or REG_NOSUB set */ 509 if (lastmatches == 0 || (cflags & REG_NOSUB)) 510 nst = pc->ln.len; 511 512 if (lastmatches == 0) 513 /* No matches */ 514 break; 515 else if (st == nst && lastmatch.rm_so == lastmatch.rm_eo) 516 /* Zero-length match -- advance one more so we don't get stuck */ 517 nst++; 518 519 /* Advance st based on previous matches */ 520 st = nst; 521 pc->lnstart = st; 522 } 523 524 /* Reflect the new matchidx in the context */ 525 pc->matchidx = matchidx; 526 if (vflag) 527 c = !c; 528 return (c ? 0 : 1); 529 } 530 531 /* 532 * Safe malloc() for internal use. 533 */ 534 void * 535 grep_malloc(size_t size) 536 { 537 void *ptr; 538 539 if ((ptr = malloc(size)) == NULL) 540 err(2, "malloc"); 541 return (ptr); 542 } 543 544 /* 545 * Safe calloc() for internal use. 546 */ 547 void * 548 grep_calloc(size_t nmemb, size_t size) 549 { 550 void *ptr; 551 552 if ((ptr = calloc(nmemb, size)) == NULL) 553 err(2, "calloc"); 554 return (ptr); 555 } 556 557 /* 558 * Safe realloc() for internal use. 559 */ 560 void * 561 grep_realloc(void *ptr, size_t size) 562 { 563 564 if ((ptr = realloc(ptr, size)) == NULL) 565 err(2, "realloc"); 566 return (ptr); 567 } 568 569 /* 570 * Safe strdup() for internal use. 571 */ 572 char * 573 grep_strdup(const char *str) 574 { 575 char *ret; 576 577 if ((ret = strdup(str)) == NULL) 578 err(2, "strdup"); 579 return (ret); 580 } 581 582 /* 583 * Print an entire line as-is, there are no inline matches to consider. This is 584 * used for printing context. 585 */ 586 void grep_printline(struct str *line, int sep) { 587 printline_metadata(line, sep); 588 fwrite(line->dat, line->len, 1, stdout); 589 putchar(fileeol); 590 } 591 592 static void 593 printline_metadata(struct str *line, int sep) 594 { 595 bool printsep; 596 597 printsep = false; 598 if (!hflag) { 599 if (!nullflag) { 600 fputs(line->file, stdout); 601 printsep = true; 602 } else { 603 printf("%s", line->file); 604 putchar(0); 605 } 606 } 607 if (nflag) { 608 if (printsep) 609 putchar(sep); 610 printf("%d", line->line_no); 611 printsep = true; 612 } 613 if (bflag) { 614 if (printsep) 615 putchar(sep); 616 printf("%lld", (long long)(line->off + line->boff)); 617 printsep = true; 618 } 619 if (printsep) 620 putchar(sep); 621 } 622 623 /* 624 * Prints a matching line according to the command line options. 625 */ 626 static void 627 printline(struct parsec *pc, int sep) 628 { 629 size_t a = 0; 630 size_t i, matchidx; 631 regmatch_t match; 632 633 /* If matchall, everything matches but don't actually print for -o */ 634 if (oflag && matchall) 635 return; 636 637 matchidx = pc->matchidx; 638 639 /* --color and -o */ 640 if ((oflag || color) && matchidx > 0) { 641 /* Only print metadata once per line if --color */ 642 if (!oflag && pc->printed == 0) 643 printline_metadata(&pc->ln, sep); 644 for (i = 0; i < matchidx; i++) { 645 match = pc->matches[i]; 646 /* Don't output zero length matches */ 647 if (match.rm_so == match.rm_eo) 648 continue; 649 /* 650 * Metadata is printed on a per-line basis, so every 651 * match gets file metadata with the -o flag. 652 */ 653 if (oflag) { 654 pc->ln.boff = match.rm_so; 655 printline_metadata(&pc->ln, sep); 656 } else 657 fwrite(pc->ln.dat + a, match.rm_so - a, 1, 658 stdout); 659 if (color) 660 fprintf(stdout, "\33[%sm\33[K", color); 661 fwrite(pc->ln.dat + match.rm_so, 662 match.rm_eo - match.rm_so, 1, stdout); 663 if (color) 664 fprintf(stdout, "\33[m\33[K"); 665 a = match.rm_eo; 666 if (oflag) 667 putchar('\n'); 668 } 669 if (!oflag) { 670 if (pc->ln.len - a > 0) 671 fwrite(pc->ln.dat + a, pc->ln.len - a, 1, 672 stdout); 673 putchar('\n'); 674 } 675 } else 676 grep_printline(&pc->ln, sep); 677 pc->printed++; 678 } 679