1 /* $OpenBSD: grep.c,v 1.42 2010/07/02 22:18:03 tedu Exp $ */ 2 3 /*- 4 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav 5 * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/stat.h> 34 #include <sys/types.h> 35 36 #include <ctype.h> 37 #include <err.h> 38 #include <errno.h> 39 #include <getopt.h> 40 #include <limits.h> 41 #include <libgen.h> 42 #include <locale.h> 43 #include <stdbool.h> 44 #include <stdio.h> 45 #include <stdlib.h> 46 #include <string.h> 47 #include <unistd.h> 48 49 #include "grep.h" 50 51 #ifndef WITHOUT_NLS 52 #include <nl_types.h> 53 nl_catd catalog; 54 #endif 55 56 /* 57 * Default messags to use when NLS is disabled or no catalogue 58 * is found. 59 */ 60 const char *errstr[] = { 61 "", 62 /* 1*/ "(standard input)", 63 /* 2*/ "cannot read bzip2 compressed file", 64 /* 3*/ "unknown %s option", 65 /* 4*/ "usage: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZ] [-A num] [-B num] [-C[num]]\n", 66 /* 5*/ "\t[-e pattern] [-f file] [--binary-files=value] [--color=when]\n", 67 /* 6*/ "\t[--context[=num]] [--directories=action] [--label] [--line-buffered]\n", 68 /* 7*/ "\t[--null] [pattern] [file ...]\n", 69 /* 8*/ "Binary file %s matches\n", 70 /* 9*/ "%s (BSD grep) %s\n", 71 }; 72 73 /* Flags passed to regcomp() and regexec() */ 74 int cflags = 0; 75 int eflags = REG_STARTEND; 76 77 /* Shortcut for matching all cases like empty regex */ 78 bool matchall; 79 80 /* Searching patterns */ 81 unsigned int patterns, pattern_sz; 82 char **pattern; 83 regex_t *r_pattern; 84 fastgrep_t *fg_pattern; 85 86 /* Filename exclusion/inclusion patterns */ 87 unsigned int fpatterns, fpattern_sz; 88 unsigned int dpatterns, dpattern_sz; 89 struct epat *dpattern, *fpattern; 90 91 /* For regex errors */ 92 char re_error[RE_ERROR_BUF + 1]; 93 94 /* Command-line flags */ 95 unsigned long long Aflag; /* -A x: print x lines trailing each match */ 96 unsigned long long Bflag; /* -B x: print x lines leading each match */ 97 bool Hflag; /* -H: always print file name */ 98 bool Lflag; /* -L: only show names of files with no matches */ 99 bool bflag; /* -b: show block numbers for each match */ 100 bool cflag; /* -c: only show a count of matching lines */ 101 bool hflag; /* -h: don't print filename headers */ 102 bool iflag; /* -i: ignore case */ 103 bool lflag; /* -l: only show names of files with matches */ 104 bool mflag; /* -m x: stop reading the files after x matches */ 105 unsigned long long mcount; /* count for -m */ 106 bool nflag; /* -n: show line numbers in front of matching lines */ 107 bool oflag; /* -o: print only matching part */ 108 bool qflag; /* -q: quiet mode (don't output anything) */ 109 bool sflag; /* -s: silent mode (ignore errors) */ 110 bool vflag; /* -v: only show non-matching lines */ 111 bool wflag; /* -w: pattern must start and end on word boundaries */ 112 bool xflag; /* -x: pattern must match entire line */ 113 bool lbflag; /* --line-buffered */ 114 bool nullflag; /* --null */ 115 char *label; /* --label */ 116 const char *color; /* --color */ 117 int grepbehave = GREP_BASIC; /* -EFGP: type of the regex */ 118 int binbehave = BINFILE_BIN; /* -aIU: handling of binary files */ 119 int filebehave = FILE_STDIO; /* -JZ: normal, gzip or bzip2 file */ 120 int devbehave = DEV_READ; /* -D: handling of devices */ 121 int dirbehave = DIR_READ; /* -dRr: handling of directories */ 122 int linkbehave = LINK_READ; /* -OpS: handling of symlinks */ 123 124 bool dexclude, dinclude; /* --exclude-dir and --include-dir */ 125 bool fexclude, finclude; /* --exclude and --include */ 126 127 enum { 128 BIN_OPT = CHAR_MAX + 1, 129 COLOR_OPT, 130 HELP_OPT, 131 MMAP_OPT, 132 LINEBUF_OPT, 133 LABEL_OPT, 134 NULL_OPT, 135 R_EXCLUDE_OPT, 136 R_INCLUDE_OPT, 137 R_DEXCLUDE_OPT, 138 R_DINCLUDE_OPT 139 }; 140 141 static inline const char *init_color(const char *); 142 143 /* Housekeeping */ 144 bool first = true; /* flag whether we are processing the first match */ 145 bool prev; /* flag whether or not the previous line matched */ 146 int tail; /* lines left to print */ 147 bool notfound; /* file not found */ 148 149 extern char *__progname; 150 151 /* 152 * Prints usage information and returns 2. 153 */ 154 static void 155 usage(void) 156 { 157 fprintf(stderr, getstr(4), __progname); 158 fprintf(stderr, "%s", getstr(5)); 159 fprintf(stderr, "%s", getstr(5)); 160 fprintf(stderr, "%s", getstr(6)); 161 fprintf(stderr, "%s", getstr(7)); 162 exit(2); 163 } 164 165 static const char *optstr = "0123456789A:B:C:D:EFGHIJLOPSRUVZabcd:e:f:hilm:nopqrsuvwxy"; 166 167 struct option long_options[] = 168 { 169 {"binary-files", required_argument, NULL, BIN_OPT}, 170 {"help", no_argument, NULL, HELP_OPT}, 171 {"mmap", no_argument, NULL, MMAP_OPT}, 172 {"line-buffered", no_argument, NULL, LINEBUF_OPT}, 173 {"label", required_argument, NULL, LABEL_OPT}, 174 {"null", no_argument, NULL, NULL_OPT}, 175 {"color", optional_argument, NULL, COLOR_OPT}, 176 {"colour", optional_argument, NULL, COLOR_OPT}, 177 {"exclude", required_argument, NULL, R_EXCLUDE_OPT}, 178 {"include", required_argument, NULL, R_INCLUDE_OPT}, 179 {"exclude-dir", required_argument, NULL, R_DEXCLUDE_OPT}, 180 {"include-dir", required_argument, NULL, R_DINCLUDE_OPT}, 181 {"after-context", required_argument, NULL, 'A'}, 182 {"text", no_argument, NULL, 'a'}, 183 {"before-context", required_argument, NULL, 'B'}, 184 {"byte-offset", no_argument, NULL, 'b'}, 185 {"context", optional_argument, NULL, 'C'}, 186 {"count", no_argument, NULL, 'c'}, 187 {"devices", required_argument, NULL, 'D'}, 188 {"directories", required_argument, NULL, 'd'}, 189 {"extended-regexp", no_argument, NULL, 'E'}, 190 {"regexp", required_argument, NULL, 'e'}, 191 {"fixed-strings", no_argument, NULL, 'F'}, 192 {"file", required_argument, NULL, 'f'}, 193 {"basic-regexp", no_argument, NULL, 'G'}, 194 {"no-filename", no_argument, NULL, 'h'}, 195 {"with-filename", no_argument, NULL, 'H'}, 196 {"ignore-case", no_argument, NULL, 'i'}, 197 {"bz2decompress", no_argument, NULL, 'J'}, 198 {"files-with-matches", no_argument, NULL, 'l'}, 199 {"files-without-match", no_argument, NULL, 'L'}, 200 {"max-count", required_argument, NULL, 'm'}, 201 {"line-number", no_argument, NULL, 'n'}, 202 {"only-matching", no_argument, NULL, 'o'}, 203 {"quiet", no_argument, NULL, 'q'}, 204 {"silent", no_argument, NULL, 'q'}, 205 {"recursive", no_argument, NULL, 'r'}, 206 {"no-messages", no_argument, NULL, 's'}, 207 {"binary", no_argument, NULL, 'U'}, 208 {"unix-byte-offsets", no_argument, NULL, 'u'}, 209 {"invert-match", no_argument, NULL, 'v'}, 210 {"version", no_argument, NULL, 'V'}, 211 {"word-regexp", no_argument, NULL, 'w'}, 212 {"line-regexp", no_argument, NULL, 'x'}, 213 {"decompress", no_argument, NULL, 'Z'}, 214 {NULL, no_argument, NULL, 0} 215 }; 216 217 /* 218 * Adds a searching pattern to the internal array. 219 */ 220 static void 221 add_pattern(char *pat, size_t len) 222 { 223 224 /* Check if we can do a shortcut */ 225 if (len == 0 || matchall) { 226 matchall = true; 227 return; 228 } 229 /* Increase size if necessary */ 230 if (patterns == pattern_sz) { 231 pattern_sz *= 2; 232 pattern = grep_realloc(pattern, ++pattern_sz * 233 sizeof(*pattern)); 234 } 235 if (len > 0 && pat[len - 1] == '\n') 236 --len; 237 /* pat may not be NUL-terminated */ 238 pattern[patterns] = grep_malloc(len + 1); 239 memcpy(pattern[patterns], pat, len); 240 pattern[patterns][len] = '\0'; 241 ++patterns; 242 } 243 244 /* 245 * Adds a file include/exclude pattern to the internal array. 246 */ 247 static void 248 add_fpattern(const char *pat, int mode) 249 { 250 251 /* Increase size if necessary */ 252 if (fpatterns == fpattern_sz) { 253 fpattern_sz *= 2; 254 fpattern = grep_realloc(fpattern, ++fpattern_sz * 255 sizeof(struct epat)); 256 } 257 fpattern[fpatterns].pat = grep_strdup(pat); 258 fpattern[fpatterns].mode = mode; 259 ++fpatterns; 260 } 261 262 /* 263 * Adds a directory include/exclude pattern to the internal array. 264 */ 265 static void 266 add_dpattern(const char *pat, int mode) 267 { 268 269 /* Increase size if necessary */ 270 if (dpatterns == dpattern_sz) { 271 dpattern_sz *= 2; 272 dpattern = grep_realloc(dpattern, ++dpattern_sz * 273 sizeof(struct epat)); 274 } 275 dpattern[dpatterns].pat = grep_strdup(pat); 276 dpattern[dpatterns].mode = mode; 277 ++dpatterns; 278 } 279 280 /* 281 * Reads searching patterns from a file and adds them with add_pattern(). 282 */ 283 static void 284 read_patterns(const char *fn) 285 { 286 FILE *f; 287 char *line; 288 size_t len; 289 290 if ((f = fopen(fn, "r")) == NULL) 291 err(2, "%s", fn); 292 while ((line = fgetln(f, &len)) != NULL) 293 add_pattern(line, *line == '\n' ? 0 : len); 294 if (ferror(f)) 295 err(2, "%s", fn); 296 fclose(f); 297 } 298 299 static inline const char * 300 init_color(const char *d) 301 { 302 char *c; 303 304 c = getenv("GREP_COLOR"); 305 return (c != NULL ? c : d); 306 } 307 308 int 309 main(int argc, char *argv[]) 310 { 311 char **aargv, **eargv, *eopts; 312 char *ep; 313 unsigned long long l; 314 unsigned int aargc, eargc, i; 315 int c, lastc, needpattern, newarg, prevoptind; 316 317 setlocale(LC_ALL, ""); 318 319 #ifndef WITHOUT_NLS 320 catalog = catopen("grep", NL_CAT_LOCALE); 321 #endif 322 323 /* Check what is the program name of the binary. In this 324 way we can have all the funcionalities in one binary 325 without the need of scripting and using ugly hacks. */ 326 switch (__progname[0]) { 327 case 'e': 328 grepbehave = GREP_EXTENDED; 329 break; 330 case 'f': 331 grepbehave = GREP_FIXED; 332 break; 333 case 'g': 334 grepbehave = GREP_BASIC; 335 break; 336 case 'z': 337 filebehave = FILE_GZIP; 338 switch(__progname[1]) { 339 case 'e': 340 grepbehave = GREP_EXTENDED; 341 break; 342 case 'f': 343 grepbehave = GREP_FIXED; 344 break; 345 case 'g': 346 grepbehave = GREP_BASIC; 347 break; 348 } 349 break; 350 } 351 352 lastc = '\0'; 353 newarg = 1; 354 prevoptind = 1; 355 needpattern = 1; 356 357 eopts = getenv("GREP_OPTIONS"); 358 359 /* support for extra arguments in GREP_OPTIONS */ 360 eargc = 0; 361 if (eopts != NULL) { 362 char *str; 363 364 /* make an estimation of how many extra arguments we have */ 365 for (unsigned int j = 0; j < strlen(eopts); j++) 366 if (eopts[j] == ' ') 367 eargc++; 368 369 eargv = (char **)grep_malloc(sizeof(char *) * (eargc + 1)); 370 371 eargc = 0; 372 /* parse extra arguments */ 373 while ((str = strsep(&eopts, " ")) != NULL) 374 eargv[eargc++] = grep_strdup(str); 375 376 aargv = (char **)grep_calloc(eargc + argc + 1, 377 sizeof(char *)); 378 379 aargv[0] = argv[0]; 380 for (i = 0; i < eargc; i++) 381 aargv[i + 1] = eargv[i]; 382 for (int j = 1; j < argc; j++, i++) 383 aargv[i + 1] = argv[j]; 384 385 aargc = eargc + argc; 386 } else { 387 aargv = argv; 388 aargc = argc; 389 } 390 391 while (((c = getopt_long(aargc, aargv, optstr, long_options, NULL)) != 392 -1)) { 393 switch (c) { 394 case '0': case '1': case '2': case '3': case '4': 395 case '5': case '6': case '7': case '8': case '9': 396 if (newarg || !isdigit(lastc)) 397 Aflag = 0; 398 else if (Aflag > LLONG_MAX / 10) { 399 errno = ERANGE; 400 err(2, NULL); 401 } 402 Aflag = Bflag = (Aflag * 10) + (c - '0'); 403 break; 404 case 'C': 405 if (optarg == NULL) { 406 Aflag = Bflag = 2; 407 break; 408 } 409 /* FALLTHROUGH */ 410 case 'A': 411 /* FALLTHROUGH */ 412 case 'B': 413 errno = 0; 414 l = strtoull(optarg, &ep, 10); 415 if (((errno == ERANGE) && (l == ULLONG_MAX)) || 416 ((errno == EINVAL) && (l == 0))) 417 err(2, NULL); 418 else if (ep[0] != '\0') { 419 errno = EINVAL; 420 err(2, NULL); 421 } 422 if (c == 'A') 423 Aflag = l; 424 else if (c == 'B') 425 Bflag = l; 426 else 427 Aflag = Bflag = l; 428 break; 429 case 'a': 430 binbehave = BINFILE_TEXT; 431 break; 432 case 'b': 433 bflag = true; 434 break; 435 case 'c': 436 cflag = true; 437 break; 438 case 'D': 439 if (strcasecmp(optarg, "skip") == 0) 440 devbehave = DEV_SKIP; 441 else if (strcasecmp(optarg, "read") == 0) 442 devbehave = DEV_READ; 443 else 444 errx(2, getstr(3), "--devices"); 445 break; 446 case 'd': 447 if (strcasecmp("recurse", optarg) == 0) { 448 Hflag = true; 449 dirbehave = DIR_RECURSE; 450 } else if (strcasecmp("skip", optarg) == 0) 451 dirbehave = DIR_SKIP; 452 else if (strcasecmp("read", optarg) == 0) 453 dirbehave = DIR_READ; 454 else 455 errx(2, getstr(3), "--directories"); 456 break; 457 case 'E': 458 grepbehave = GREP_EXTENDED; 459 break; 460 case 'e': 461 add_pattern(optarg, strlen(optarg)); 462 needpattern = 0; 463 break; 464 case 'F': 465 grepbehave = GREP_FIXED; 466 break; 467 case 'f': 468 read_patterns(optarg); 469 needpattern = 0; 470 break; 471 case 'G': 472 grepbehave = GREP_BASIC; 473 break; 474 case 'H': 475 Hflag = true; 476 break; 477 case 'h': 478 Hflag = false; 479 hflag = true; 480 break; 481 case 'I': 482 binbehave = BINFILE_SKIP; 483 break; 484 case 'i': 485 case 'y': 486 iflag = true; 487 cflags |= REG_ICASE; 488 break; 489 case 'J': 490 filebehave = FILE_BZIP; 491 break; 492 case 'L': 493 lflag = false; 494 Lflag = true; 495 break; 496 case 'l': 497 Lflag = false; 498 lflag = true; 499 break; 500 case 'm': 501 mflag = true; 502 errno = 0; 503 mcount = strtoull(optarg, &ep, 10); 504 if (((errno == ERANGE) && (mcount == ULLONG_MAX)) || 505 ((errno == EINVAL) && (mcount == 0))) 506 err(2, NULL); 507 else if (ep[0] != '\0') { 508 errno = EINVAL; 509 err(2, NULL); 510 } 511 break; 512 case 'n': 513 nflag = true; 514 break; 515 case 'O': 516 linkbehave = LINK_EXPLICIT; 517 break; 518 case 'o': 519 oflag = true; 520 break; 521 case 'p': 522 linkbehave = LINK_SKIP; 523 break; 524 case 'q': 525 qflag = true; 526 break; 527 case 'S': 528 linkbehave = LINK_READ; 529 break; 530 case 'R': 531 case 'r': 532 dirbehave = DIR_RECURSE; 533 Hflag = true; 534 break; 535 case 's': 536 sflag = true; 537 break; 538 case 'U': 539 binbehave = BINFILE_BIN; 540 break; 541 case 'u': 542 case MMAP_OPT: 543 /* noop, compatibility */ 544 break; 545 case 'V': 546 printf(getstr(9), __progname, VERSION); 547 exit(0); 548 case 'v': 549 vflag = true; 550 break; 551 case 'w': 552 wflag = true; 553 break; 554 case 'x': 555 xflag = true; 556 break; 557 case 'Z': 558 filebehave = FILE_GZIP; 559 break; 560 case BIN_OPT: 561 if (strcasecmp("binary", optarg) == 0) 562 binbehave = BINFILE_BIN; 563 else if (strcasecmp("without-match", optarg) == 0) 564 binbehave = BINFILE_SKIP; 565 else if (strcasecmp("text", optarg) == 0) 566 binbehave = BINFILE_TEXT; 567 else 568 errx(2, getstr(3), "--binary-files"); 569 break; 570 case COLOR_OPT: 571 color = NULL; 572 if (optarg == NULL || strcasecmp("auto", optarg) == 0 || 573 strcasecmp("tty", optarg) == 0 || 574 strcasecmp("if-tty", optarg) == 0) { 575 char *term; 576 577 term = getenv("TERM"); 578 if (isatty(STDOUT_FILENO) && term != NULL && 579 strcasecmp(term, "dumb") != 0) 580 color = init_color("01;31"); 581 } else if (strcasecmp("always", optarg) == 0 || 582 strcasecmp("yes", optarg) == 0 || 583 strcasecmp("force", optarg) == 0) { 584 color = init_color("01;31"); 585 } else if (strcasecmp("never", optarg) != 0 && 586 strcasecmp("none", optarg) != 0 && 587 strcasecmp("no", optarg) != 0) 588 errx(2, getstr(3), "--color"); 589 break; 590 case LABEL_OPT: 591 label = optarg; 592 break; 593 case LINEBUF_OPT: 594 lbflag = true; 595 break; 596 case NULL_OPT: 597 nullflag = true; 598 break; 599 case R_INCLUDE_OPT: 600 finclude = true; 601 add_fpattern(optarg, INCL_PAT); 602 break; 603 case R_EXCLUDE_OPT: 604 fexclude = true; 605 add_fpattern(optarg, EXCL_PAT); 606 break; 607 case R_DINCLUDE_OPT: 608 dinclude = true; 609 add_dpattern(optarg, INCL_PAT); 610 break; 611 case R_DEXCLUDE_OPT: 612 dexclude = true; 613 add_dpattern(optarg, EXCL_PAT); 614 break; 615 case HELP_OPT: 616 default: 617 usage(); 618 } 619 lastc = c; 620 newarg = optind != prevoptind; 621 prevoptind = optind; 622 } 623 aargc -= optind; 624 aargv += optind; 625 626 /* Fail if we don't have any pattern */ 627 if (aargc == 0 && needpattern) 628 usage(); 629 630 /* Process patterns from command line */ 631 if (aargc != 0 && needpattern) { 632 add_pattern(*aargv, strlen(*aargv)); 633 --aargc; 634 ++aargv; 635 } 636 637 switch (grepbehave) { 638 case GREP_FIXED: 639 case GREP_BASIC: 640 break; 641 case GREP_EXTENDED: 642 cflags |= REG_EXTENDED; 643 break; 644 default: 645 /* NOTREACHED */ 646 usage(); 647 } 648 649 fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern)); 650 r_pattern = grep_calloc(patterns, sizeof(*r_pattern)); 651 /* 652 * XXX: fgrepcomp() and fastcomp() are workarounds for regexec() performance. 653 * Optimizations should be done there. 654 */ 655 /* Check if cheating is allowed (always is for fgrep). */ 656 if (grepbehave == GREP_FIXED) { 657 for (i = 0; i < patterns; ++i) 658 fgrepcomp(&fg_pattern[i], pattern[i]); 659 } else { 660 for (i = 0; i < patterns; ++i) { 661 if (fastcomp(&fg_pattern[i], pattern[i])) { 662 /* Fall back to full regex library */ 663 c = regcomp(&r_pattern[i], pattern[i], cflags); 664 if (c != 0) { 665 regerror(c, &r_pattern[i], re_error, 666 RE_ERROR_BUF); 667 errx(2, "%s", re_error); 668 } 669 } 670 } 671 } 672 673 if (lbflag) 674 setlinebuf(stdout); 675 676 if ((aargc == 0 || aargc == 1) && !Hflag) 677 hflag = true; 678 679 if (aargc == 0) 680 exit(!procfile("-")); 681 682 if (dirbehave == DIR_RECURSE) 683 c = grep_tree(aargv); 684 else 685 for (c = 0; aargc--; ++aargv) { 686 if ((finclude || fexclude) && !file_matching(*aargv)) 687 continue; 688 c+= procfile(*aargv); 689 } 690 691 #ifndef WITHOUT_NLS 692 catclose(catalog); 693 #endif 694 695 /* Find out the correct return value according to the 696 results and the command line option. */ 697 exit(c ? (notfound ? (qflag ? 0 : 2) : 0) : (notfound ? 2 : 1)); 698 } 699