1 /* $NetBSD: util.c,v 1.9 2011/02/27 17:33:37 joerg Exp $ */ 2 /* $FreeBSD$ */ 3 /* $OpenBSD: util.c,v 1.39 2010/07/02 22:18:03 tedu Exp $ */ 4 5 /*- 6 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav 7 * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org> 8 * Copyright (C) 2017 Kyle Evans <kevans@FreeBSD.org> 9 * All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 #include <sys/cdefs.h> 34 __FBSDID("$FreeBSD$"); 35 36 #include <sys/stat.h> 37 #include <sys/types.h> 38 39 #include <ctype.h> 40 #include <err.h> 41 #include <errno.h> 42 #include <fnmatch.h> 43 #include <fts.h> 44 #include <libgen.h> 45 #include <stdbool.h> 46 #include <stdio.h> 47 #include <stdlib.h> 48 #include <string.h> 49 #include <unistd.h> 50 #include <wchar.h> 51 #include <wctype.h> 52 53 #ifndef WITHOUT_FASTMATCH 54 #include "fastmatch.h" 55 #endif 56 #include "grep.h" 57 58 static bool first_match = true; 59 60 /* 61 * Parsing context; used to hold things like matches made and 62 * other useful bits 63 */ 64 struct parsec { 65 regmatch_t matches[MAX_MATCHES]; /* Matches made */ 66 struct str ln; /* Current line */ 67 size_t lnstart; /* Position in line */ 68 size_t matchidx; /* Latest match index */ 69 int printed; /* Metadata printed? */ 70 bool binary; /* Binary file? */ 71 }; 72 73 74 static int procline(struct parsec *pc); 75 static void printline(struct parsec *pc, int sep); 76 static void printline_metadata(struct str *line, int sep); 77 78 bool 79 file_matching(const char *fname) 80 { 81 char *fname_base, *fname_buf; 82 bool ret; 83 84 ret = finclude ? false : true; 85 fname_buf = strdup(fname); 86 if (fname_buf == NULL) 87 err(2, "strdup"); 88 fname_base = basename(fname_buf); 89 90 for (unsigned int i = 0; i < fpatterns; ++i) { 91 if (fnmatch(fpattern[i].pat, fname, 0) == 0 || 92 fnmatch(fpattern[i].pat, fname_base, 0) == 0) { 93 if (fpattern[i].mode == EXCL_PAT) { 94 ret = false; 95 break; 96 } else 97 ret = true; 98 } 99 } 100 free(fname_buf); 101 return (ret); 102 } 103 104 static inline bool 105 dir_matching(const char *dname) 106 { 107 bool ret; 108 109 ret = dinclude ? false : true; 110 111 for (unsigned int i = 0; i < dpatterns; ++i) { 112 if (dname != NULL && 113 fnmatch(dpattern[i].pat, dname, 0) == 0) { 114 if (dpattern[i].mode == EXCL_PAT) 115 return (false); 116 else 117 ret = true; 118 } 119 } 120 return (ret); 121 } 122 123 /* 124 * Processes a directory when a recursive search is performed with 125 * the -R option. Each appropriate file is passed to procfile(). 126 */ 127 int 128 grep_tree(char **argv) 129 { 130 FTS *fts; 131 FTSENT *p; 132 int c, fts_flags; 133 bool ok; 134 const char *wd[] = { ".", NULL }; 135 136 c = fts_flags = 0; 137 138 switch(linkbehave) { 139 case LINK_EXPLICIT: 140 fts_flags = FTS_COMFOLLOW; 141 break; 142 case LINK_SKIP: 143 fts_flags = FTS_PHYSICAL; 144 break; 145 default: 146 fts_flags = FTS_LOGICAL; 147 148 } 149 150 fts_flags |= FTS_NOSTAT | FTS_NOCHDIR; 151 152 fts = fts_open((argv[0] == NULL) ? 153 __DECONST(char * const *, wd) : argv, fts_flags, NULL); 154 if (fts == NULL) 155 err(2, "fts_open"); 156 while ((p = fts_read(fts)) != NULL) { 157 switch (p->fts_info) { 158 case FTS_DNR: 159 /* FALLTHROUGH */ 160 case FTS_ERR: 161 file_err = true; 162 if(!sflag) 163 warnx("%s: %s", p->fts_path, strerror(p->fts_errno)); 164 break; 165 case FTS_D: 166 /* FALLTHROUGH */ 167 case FTS_DP: 168 if (dexclude || dinclude) 169 if (!dir_matching(p->fts_name) || 170 !dir_matching(p->fts_path)) 171 fts_set(fts, p, FTS_SKIP); 172 break; 173 case FTS_DC: 174 /* Print a warning for recursive directory loop */ 175 warnx("warning: %s: recursive directory loop", 176 p->fts_path); 177 break; 178 default: 179 /* Check for file exclusion/inclusion */ 180 ok = true; 181 if (fexclude || finclude) 182 ok &= file_matching(p->fts_path); 183 184 if (ok) 185 c += procfile(p->fts_path); 186 break; 187 } 188 } 189 190 fts_close(fts); 191 return (c); 192 } 193 194 /* 195 * Opens a file and processes it. Each file is processed line-by-line 196 * passing the lines to procline(). 197 */ 198 int 199 procfile(const char *fn) 200 { 201 struct parsec pc; 202 long long tail; 203 struct file *f; 204 struct stat sb; 205 struct str *ln; 206 mode_t s; 207 int c, last_outed, t; 208 bool doctx, printmatch, same_file; 209 210 if (strcmp(fn, "-") == 0) { 211 fn = label != NULL ? label : getstr(1); 212 f = grep_open(NULL); 213 } else { 214 if (!stat(fn, &sb)) { 215 /* Check if we need to process the file */ 216 s = sb.st_mode & S_IFMT; 217 if (s == S_IFDIR && dirbehave == DIR_SKIP) 218 return (0); 219 if ((s == S_IFIFO || s == S_IFCHR || s == S_IFBLK 220 || s == S_IFSOCK) && devbehave == DEV_SKIP) 221 return (0); 222 } 223 f = grep_open(fn); 224 } 225 if (f == NULL) { 226 file_err = true; 227 if (!sflag) 228 warn("%s", fn); 229 return (0); 230 } 231 232 /* Convenience */ 233 ln = &pc.ln; 234 pc.ln.file = grep_malloc(strlen(fn) + 1); 235 strcpy(pc.ln.file, fn); 236 pc.ln.line_no = 0; 237 pc.ln.len = 0; 238 pc.ln.boff = 0; 239 pc.ln.off = -1; 240 pc.binary = f->binary; 241 pc.printed = 0; 242 tail = 0; 243 last_outed = 0; 244 same_file = false; 245 doctx = false; 246 printmatch = true; 247 if ((pc.binary && binbehave == BINFILE_BIN) || cflag || qflag || 248 lflag || Lflag) 249 printmatch = false; 250 if (printmatch && (Aflag != 0 || Bflag != 0)) 251 doctx = true; 252 mcount = mlimit; 253 254 for (c = 0; c == 0 || !(lflag || qflag); ) { 255 /* Reset per-line statistics */ 256 pc.printed = 0; 257 pc.matchidx = 0; 258 pc.lnstart = 0; 259 pc.ln.boff = 0; 260 pc.ln.off += pc.ln.len + 1; 261 if ((pc.ln.dat = grep_fgetln(f, &pc.ln.len)) == NULL || 262 pc.ln.len == 0) 263 break; 264 265 if (pc.ln.len > 0 && pc.ln.dat[pc.ln.len - 1] == fileeol) 266 --pc.ln.len; 267 pc.ln.line_no++; 268 269 /* Return if we need to skip a binary file */ 270 if (pc.binary && binbehave == BINFILE_SKIP) { 271 grep_close(f); 272 free(pc.ln.file); 273 free(f); 274 return (0); 275 } 276 277 if ((t = procline(&pc)) == 0) 278 ++c; 279 280 /* Deal with any -B context or context separators */ 281 if (t == 0 && doctx) { 282 if (!first_match && (!same_file || last_outed > 0)) 283 printf("--\n"); 284 if (Bflag > 0) 285 printqueue(); 286 tail = Aflag; 287 } 288 /* Print the matching line, but only if not quiet/binary */ 289 if (t == 0 && printmatch) { 290 printline(&pc, ':'); 291 while (pc.matchidx >= MAX_MATCHES) { 292 /* Reset matchidx and try again */ 293 pc.matchidx = 0; 294 if (procline(&pc) == 0) 295 printline(&pc, ':'); 296 else 297 break; 298 } 299 first_match = false; 300 same_file = true; 301 last_outed = 0; 302 } 303 if (t != 0 && doctx) { 304 /* Deal with any -A context */ 305 if (tail > 0) { 306 grep_printline(&pc.ln, '-'); 307 tail--; 308 if (Bflag > 0) 309 clearqueue(); 310 } else { 311 /* 312 * Enqueue non-matching lines for -B context. 313 * If we're not actually doing -B context or if 314 * the enqueue resulted in a line being rotated 315 * out, then go ahead and increment last_outed 316 * to signify a gap between context/match. 317 */ 318 if (Bflag == 0 || (Bflag > 0 && enqueue(ln))) 319 ++last_outed; 320 } 321 } 322 323 /* Count the matches if we have a match limit */ 324 if (t == 0 && mflag) { 325 --mcount; 326 if (mflag && mcount <= 0) 327 break; 328 } 329 330 } 331 if (Bflag > 0) 332 clearqueue(); 333 grep_close(f); 334 335 if (cflag) { 336 if (!hflag) 337 printf("%s:", pc.ln.file); 338 printf("%u\n", c); 339 } 340 if (lflag && !qflag && c != 0) 341 printf("%s%c", fn, nullflag ? 0 : '\n'); 342 if (Lflag && !qflag && c == 0) 343 printf("%s%c", fn, nullflag ? 0 : '\n'); 344 if (c && !cflag && !lflag && !Lflag && 345 binbehave == BINFILE_BIN && f->binary && !qflag) 346 printf(getstr(8), fn); 347 348 free(pc.ln.file); 349 free(f); 350 return (c); 351 } 352 353 #define iswword(x) (iswalnum((x)) || (x) == L'_') 354 355 /* 356 * Processes a line comparing it with the specified patterns. Each pattern 357 * is looped to be compared along with the full string, saving each and every 358 * match, which is necessary to colorize the output and to count the 359 * matches. The matching lines are passed to printline() to display the 360 * appropriate output. 361 */ 362 static int 363 procline(struct parsec *pc) 364 { 365 regmatch_t pmatch, lastmatch, chkmatch; 366 wchar_t wbegin, wend; 367 size_t st, nst; 368 unsigned int i; 369 int c = 0, r = 0, lastmatches = 0, leflags = eflags; 370 size_t startm = 0, matchidx; 371 unsigned int retry; 372 373 matchidx = pc->matchidx; 374 375 /* Special case: empty pattern with -w flag, check first character */ 376 if (matchall && wflag) { 377 if (pc->ln.len == 0) 378 return (0); 379 wend = L' '; 380 if (sscanf(&pc->ln.dat[0], "%lc", &wend) != 1 || iswword(wend)) 381 return (1); 382 else 383 return (0); 384 } else if (matchall) 385 return (0); 386 387 st = pc->lnstart; 388 nst = 0; 389 /* Initialize to avoid a false positive warning from GCC. */ 390 lastmatch.rm_so = lastmatch.rm_eo = 0; 391 392 /* Loop to process the whole line */ 393 while (st <= pc->ln.len) { 394 lastmatches = 0; 395 startm = matchidx; 396 retry = 0; 397 if (st > 0 && pc->ln.dat[st - 1] != fileeol) 398 leflags |= REG_NOTBOL; 399 /* Loop to compare with all the patterns */ 400 for (i = 0; i < patterns; i++) { 401 pmatch.rm_so = st; 402 pmatch.rm_eo = pc->ln.len; 403 #ifndef WITHOUT_FASTMATCH 404 if (fg_pattern[i].pattern) 405 r = fastexec(&fg_pattern[i], 406 pc->ln.dat, 1, &pmatch, leflags); 407 else 408 #endif 409 r = regexec(&r_pattern[i], pc->ln.dat, 1, 410 &pmatch, leflags); 411 if (r != 0) 412 continue; 413 /* Check for full match */ 414 if (xflag && (pmatch.rm_so != 0 || 415 (size_t)pmatch.rm_eo != pc->ln.len)) 416 continue; 417 /* Check for whole word match */ 418 #ifndef WITHOUT_FASTMATCH 419 if (wflag || fg_pattern[i].word) { 420 #else 421 if (wflag) { 422 #endif 423 wbegin = wend = L' '; 424 if (pmatch.rm_so != 0 && 425 sscanf(&pc->ln.dat[pmatch.rm_so - 1], 426 "%lc", &wbegin) != 1) 427 r = REG_NOMATCH; 428 else if ((size_t)pmatch.rm_eo != 429 pc->ln.len && 430 sscanf(&pc->ln.dat[pmatch.rm_eo], 431 "%lc", &wend) != 1) 432 r = REG_NOMATCH; 433 else if (iswword(wbegin) || 434 iswword(wend)) 435 r = REG_NOMATCH; 436 /* 437 * If we're doing whole word matching and we 438 * matched once, then we should try the pattern 439 * again after advancing just past the start of 440 * the earliest match. This allows the pattern 441 * to match later on in the line and possibly 442 * still match a whole word. 443 */ 444 if (r == REG_NOMATCH && 445 (retry == pc->lnstart || 446 pmatch.rm_so + 1 < retry)) 447 retry = pmatch.rm_so + 1; 448 if (r == REG_NOMATCH) 449 continue; 450 } 451 lastmatches++; 452 lastmatch = pmatch; 453 454 if (matchidx == 0) 455 c++; 456 457 /* 458 * Replace previous match if the new one is earlier 459 * and/or longer. This will lead to some amount of 460 * extra work if -o/--color are specified, but it's 461 * worth it from a correctness point of view. 462 */ 463 if (matchidx > startm) { 464 chkmatch = pc->matches[matchidx - 1]; 465 if (pmatch.rm_so < chkmatch.rm_so || 466 (pmatch.rm_so == chkmatch.rm_so && 467 (pmatch.rm_eo - pmatch.rm_so) > 468 (chkmatch.rm_eo - chkmatch.rm_so))) { 469 pc->matches[matchidx - 1] = pmatch; 470 nst = pmatch.rm_eo; 471 } 472 } else { 473 /* Advance as normal if not */ 474 pc->matches[matchidx++] = pmatch; 475 nst = pmatch.rm_eo; 476 } 477 /* avoid excessive matching - skip further patterns */ 478 if ((color == NULL && !oflag) || qflag || lflag || 479 matchidx >= MAX_MATCHES) { 480 pc->lnstart = nst; 481 lastmatches = 0; 482 break; 483 } 484 } 485 486 /* 487 * Advance to just past the start of the earliest match, try 488 * again just in case we still have a chance to match later in 489 * the string. 490 */ 491 if (lastmatches == 0 && retry > pc->lnstart) { 492 st = retry; 493 continue; 494 } 495 496 /* One pass if we are not recording matches */ 497 if (!wflag && ((color == NULL && !oflag) || qflag || lflag || Lflag)) 498 break; 499 500 /* If we didn't have any matches or REG_NOSUB set */ 501 if (lastmatches == 0 || (cflags & REG_NOSUB)) 502 nst = pc->ln.len; 503 504 if (lastmatches == 0) 505 /* No matches */ 506 break; 507 else if (st == nst && lastmatch.rm_so == lastmatch.rm_eo) 508 /* Zero-length match -- advance one more so we don't get stuck */ 509 nst++; 510 511 /* Advance st based on previous matches */ 512 st = nst; 513 pc->lnstart = st; 514 } 515 516 /* Reflect the new matchidx in the context */ 517 pc->matchidx = matchidx; 518 if (vflag) 519 c = !c; 520 return (c ? 0 : 1); 521 } 522 523 /* 524 * Safe malloc() for internal use. 525 */ 526 void * 527 grep_malloc(size_t size) 528 { 529 void *ptr; 530 531 if ((ptr = malloc(size)) == NULL) 532 err(2, "malloc"); 533 return (ptr); 534 } 535 536 /* 537 * Safe calloc() for internal use. 538 */ 539 void * 540 grep_calloc(size_t nmemb, size_t size) 541 { 542 void *ptr; 543 544 if ((ptr = calloc(nmemb, size)) == NULL) 545 err(2, "calloc"); 546 return (ptr); 547 } 548 549 /* 550 * Safe realloc() for internal use. 551 */ 552 void * 553 grep_realloc(void *ptr, size_t size) 554 { 555 556 if ((ptr = realloc(ptr, size)) == NULL) 557 err(2, "realloc"); 558 return (ptr); 559 } 560 561 /* 562 * Safe strdup() for internal use. 563 */ 564 char * 565 grep_strdup(const char *str) 566 { 567 char *ret; 568 569 if ((ret = strdup(str)) == NULL) 570 err(2, "strdup"); 571 return (ret); 572 } 573 574 /* 575 * Print an entire line as-is, there are no inline matches to consider. This is 576 * used for printing context. 577 */ 578 void grep_printline(struct str *line, int sep) { 579 printline_metadata(line, sep); 580 fwrite(line->dat, line->len, 1, stdout); 581 putchar(fileeol); 582 } 583 584 static void 585 printline_metadata(struct str *line, int sep) 586 { 587 bool printsep; 588 589 printsep = false; 590 if (!hflag) { 591 if (!nullflag) { 592 fputs(line->file, stdout); 593 printsep = true; 594 } else { 595 printf("%s", line->file); 596 putchar(0); 597 } 598 } 599 if (nflag) { 600 if (printsep) 601 putchar(sep); 602 printf("%d", line->line_no); 603 printsep = true; 604 } 605 if (bflag) { 606 if (printsep) 607 putchar(sep); 608 printf("%lld", (long long)(line->off + line->boff)); 609 printsep = true; 610 } 611 if (printsep) 612 putchar(sep); 613 } 614 615 /* 616 * Prints a matching line according to the command line options. 617 */ 618 static void 619 printline(struct parsec *pc, int sep) 620 { 621 size_t a = 0; 622 size_t i, matchidx; 623 regmatch_t match; 624 625 /* If matchall, everything matches but don't actually print for -o */ 626 if (oflag && matchall) 627 return; 628 629 matchidx = pc->matchidx; 630 631 /* --color and -o */ 632 if ((oflag || color) && matchidx > 0) { 633 /* Only print metadata once per line if --color */ 634 if (!oflag && pc->printed == 0) 635 printline_metadata(&pc->ln, sep); 636 for (i = 0; i < matchidx; i++) { 637 match = pc->matches[i]; 638 /* Don't output zero length matches */ 639 if (match.rm_so == match.rm_eo) 640 continue; 641 /* 642 * Metadata is printed on a per-line basis, so every 643 * match gets file metadata with the -o flag. 644 */ 645 if (oflag) { 646 pc->ln.boff = match.rm_so; 647 printline_metadata(&pc->ln, sep); 648 } else 649 fwrite(pc->ln.dat + a, match.rm_so - a, 1, 650 stdout); 651 if (color) 652 fprintf(stdout, "\33[%sm\33[K", color); 653 fwrite(pc->ln.dat + match.rm_so, 654 match.rm_eo - match.rm_so, 1, stdout); 655 if (color) 656 fprintf(stdout, "\33[m\33[K"); 657 a = match.rm_eo; 658 if (oflag) 659 putchar('\n'); 660 } 661 if (!oflag) { 662 if (pc->ln.len - a > 0) 663 fwrite(pc->ln.dat + a, pc->ln.len - a, 1, 664 stdout); 665 putchar('\n'); 666 } 667 } else 668 grep_printline(&pc->ln, sep); 669 pc->printed++; 670 } 671