1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * grep - pattern matching program - combined grep, egrep, and fgrep. 29 * Based on MKS grep command, with XCU & Solaris mods. 30 */ 31 32 /* 33 * Copyright 1985, 1992 by Mortice Kern Systems Inc. All rights reserved. 34 * 35 */ 36 37 /* 38 * Copyright 2020 Peter Tribble. 39 * Copyright 2018 RackTop Systems. 40 * Copyright 2018 Nexenta Systems, Inc. 41 * Copyright 2013 Damian Bogel. All rights reserved. 42 */ 43 44 #include <string.h> 45 #include <stdlib.h> 46 #include <ctype.h> 47 #include <stdarg.h> 48 #include <regex.h> 49 #include <limits.h> 50 #include <sys/types.h> 51 #include <sys/stat.h> 52 #include <fcntl.h> 53 #include <stdio.h> 54 #include <locale.h> 55 #include <wchar.h> 56 #include <errno.h> 57 #include <unistd.h> 58 #include <wctype.h> 59 #include <ftw.h> 60 #include <sys/param.h> 61 62 #define STDIN_FILENAME gettext("(standard input)") 63 64 #define BSIZE 512 /* Size of block for -b */ 65 #define BUFSIZE 8192 /* Input buffer size */ 66 #define MAX_DEPTH 1000 /* how deep to recurse */ 67 68 #define AFTER 1 /* 'After' Context */ 69 #define BEFORE 2 /* 'Before' Context */ 70 #define CONTEXT (AFTER|BEFORE) /* Full Context */ 71 72 #define M_CSETSIZE 256 /* singlebyte chars */ 73 static int bmglen; /* length of BMG pattern */ 74 static char *bmgpat; /* BMG pattern */ 75 static int bmgtab[M_CSETSIZE]; /* BMG delta1 table */ 76 77 typedef struct _PATTERN { 78 char *pattern; /* original pattern */ 79 wchar_t *wpattern; /* wide, lowercased pattern */ 80 struct _PATTERN *next; 81 regex_t re; /* compiled pattern */ 82 } PATTERN; 83 84 static PATTERN *patterns; 85 static char errstr[128]; /* regerror string buffer */ 86 static int regflags = 0; /* regcomp options */ 87 static int matched = 0; /* return of the grep() */ 88 static int errors = 0; /* count of errors */ 89 static uchar_t fgrep = 0; /* Invoked as fgrep */ 90 static uchar_t egrep = 0; /* Invoked as egrep */ 91 static boolean_t nvflag = B_TRUE; /* Print matching lines */ 92 static uchar_t cflag; /* Count of matches */ 93 static uchar_t iflag; /* Case insensitve matching */ 94 static uchar_t Hflag; /* Precede lines by file name */ 95 static uchar_t hflag; /* Suppress printing of filename */ 96 static uchar_t lflag; /* Print file names of matches */ 97 static uchar_t nflag; /* Precede lines by line number */ 98 static uchar_t rflag; /* Search directories recursively */ 99 static uchar_t bflag; /* Precede matches by block number */ 100 static uchar_t sflag; /* Suppress file error messages */ 101 static uchar_t qflag; /* Suppress standard output */ 102 static uchar_t wflag; /* Search for expression as a word */ 103 static uchar_t xflag; /* Anchoring */ 104 static uchar_t Eflag; /* Egrep or -E flag */ 105 static uchar_t Fflag; /* Fgrep or -F flag */ 106 static uchar_t Rflag; /* Like rflag, but follow symlinks */ 107 static uchar_t outfn; /* Put out file name */ 108 static uchar_t conflag; /* show context of matches */ 109 static char *cmdname; 110 111 static int use_wchar, use_bmg, mblocale; 112 113 static size_t outbuflen, prntbuflen, conbuflen; 114 static unsigned long conalen, conblen, conmatches; 115 static char *prntbuf, *conbuf; 116 static wchar_t *outline; 117 118 static void addfile(const char *fn); 119 static void addpattern(char *s); 120 static void fixpatterns(void); 121 static void usage(void); 122 static int grep(int, const char *); 123 static void bmgcomp(char *, int); 124 static char *bmgexec(char *, char *); 125 static int recursive(const char *, const struct stat *, int, struct FTW *); 126 static void process_path(const char *); 127 static void process_file(const char *, int); 128 129 /* 130 * mainline for grep 131 */ 132 int 133 main(int argc, char **argv) 134 { 135 char *ap, *test; 136 int c; 137 int fflag = 0; 138 int i, n_pattern = 0, n_file = 0; 139 char **pattern_list = NULL; 140 char **file_list = NULL; 141 142 (void) setlocale(LC_ALL, ""); 143 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */ 144 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't */ 145 #endif 146 (void) textdomain(TEXT_DOMAIN); 147 148 /* 149 * true if this is running on the multibyte locale 150 */ 151 mblocale = (MB_CUR_MAX > 1); 152 /* 153 * Skip leading slashes 154 */ 155 cmdname = argv[0]; 156 if (ap = strrchr(cmdname, '/')) 157 cmdname = ap + 1; 158 159 ap = cmdname; 160 /* 161 * Detect egrep/fgrep via command name, map to -E and -F options. 162 */ 163 if (*ap == 'e' || *ap == 'E') { 164 regflags |= REG_EXTENDED; 165 egrep++; 166 } else { 167 if (*ap == 'f' || *ap == 'F') { 168 fgrep++; 169 } 170 } 171 172 /* check for non-standard "-line-count" option */ 173 for (i = 1; i < argc; i++) { 174 if (strcmp(argv[i], "--") == 0) 175 break; 176 177 /* isdigit() check prevents negative arguments */ 178 if ((argv[i][0] == '-') && isdigit(argv[i][1])) { 179 if (strlen(&argv[i][1]) != 180 strspn(&argv[i][1], "0123456789")) { 181 (void) fprintf(stderr, gettext( 182 "%s: Bad number flag\n"), argv[0]); 183 usage(); 184 } 185 186 errno = 0; 187 conalen = conblen = strtoul(&argv[i][1], (char **)NULL, 188 10); 189 190 if (errno != 0 || conalen >= ULONG_MAX) { 191 (void) fprintf(stderr, gettext( 192 "%s: Bad context argument\n"), argv[0]); 193 } else if (conalen) 194 conflag = CONTEXT; 195 196 while (i < argc) { 197 argv[i] = argv[i + 1]; 198 i++; 199 } 200 argc--; 201 } 202 } 203 204 while ((c = getopt(argc, argv, "vwchHilnrbse:f:qxEFIRA:B:C:")) != EOF) { 205 unsigned long tval; 206 switch (c) { 207 case 'v': /* POSIX: negate matches */ 208 nvflag = B_FALSE; 209 break; 210 211 case 'c': /* POSIX: write count */ 212 cflag++; 213 break; 214 215 case 'i': /* POSIX: ignore case */ 216 iflag++; 217 regflags |= REG_ICASE; 218 break; 219 220 case 'l': /* POSIX: Write filenames only */ 221 lflag++; 222 break; 223 224 case 'n': /* POSIX: Write line numbers */ 225 nflag++; 226 break; 227 228 case 'r': /* Solaris: search recursively */ 229 rflag++; 230 break; 231 232 case 'b': /* Solaris: Write file block numbers */ 233 bflag++; 234 break; 235 236 case 's': /* POSIX: No error msgs for files */ 237 sflag++; 238 break; 239 240 case 'e': /* POSIX: pattern list */ 241 n_pattern++; 242 pattern_list = realloc(pattern_list, 243 sizeof (char *) * n_pattern); 244 if (pattern_list == NULL) { 245 (void) fprintf(stderr, 246 gettext("%s: out of memory\n"), 247 cmdname); 248 exit(2); 249 } 250 *(pattern_list + n_pattern - 1) = optarg; 251 break; 252 253 case 'f': /* POSIX: pattern file */ 254 fflag = 1; 255 n_file++; 256 file_list = realloc(file_list, 257 sizeof (char *) * n_file); 258 if (file_list == NULL) { 259 (void) fprintf(stderr, 260 gettext("%s: out of memory\n"), 261 cmdname); 262 exit(2); 263 } 264 *(file_list + n_file - 1) = optarg; 265 break; 266 267 /* based on options order h or H is set as in GNU grep */ 268 case 'h': /* Solaris: suppress printing of file name */ 269 hflag = 1; 270 Hflag = 0; 271 break; 272 /* Solaris: precede every match with file name */ 273 case 'H': 274 Hflag = 1; 275 hflag = 0; 276 break; 277 278 case 'q': /* POSIX: quiet: status only */ 279 qflag++; 280 break; 281 282 case 'w': /* Solaris: treat pattern as word */ 283 wflag++; 284 break; 285 286 case 'x': /* POSIX: full line matches */ 287 xflag++; 288 break; 289 290 case 'E': /* POSIX: Extended RE's */ 291 regflags |= REG_EXTENDED; 292 Eflag++; 293 break; 294 295 case 'F': /* POSIX: strings, not RE's */ 296 Fflag++; 297 break; 298 299 case 'R': /* Solaris: like rflag, but follow symlinks */ 300 Rflag++; 301 rflag++; 302 break; 303 304 case 'A': /* print N lines after each match */ 305 errno = 0; 306 conalen = strtoul(optarg, &test, 10); 307 /* *test will be non-null if optarg is negative */ 308 if (errno != 0 || *test != '\0' || 309 conalen >= ULONG_MAX) { 310 (void) fprintf(stderr, gettext( 311 "%s: Bad context argument: %s\n"), 312 argv[0], optarg); 313 exit(2); 314 } 315 if (conalen) 316 conflag |= AFTER; 317 else 318 conflag &= ~AFTER; 319 break; 320 case 'B': /* print N lines before each match */ 321 errno = 0; 322 conblen = strtoul(optarg, &test, 10); 323 /* *test will be non-null if optarg is negative */ 324 if (errno != 0 || *test != '\0' || 325 conblen >= ULONG_MAX) { 326 (void) fprintf(stderr, gettext( 327 "%s: Bad context argument: %s\n"), 328 argv[0], optarg); 329 exit(2); 330 } 331 if (conblen) 332 conflag |= BEFORE; 333 else 334 conflag &= ~BEFORE; 335 break; 336 case 'C': /* print N lines around each match */ 337 errno = 0; 338 tval = strtoul(optarg, &test, 10); 339 /* *test will be non-null if optarg is negative */ 340 if (errno != 0 || *test != '\0' || tval >= ULONG_MAX) { 341 (void) fprintf(stderr, gettext( 342 "%s: Bad context argument: %s\n"), 343 argv[0], optarg); 344 exit(2); 345 } 346 if (tval) { 347 if ((conflag & BEFORE) == 0) 348 conblen = tval; 349 if ((conflag & AFTER) == 0) 350 conalen = tval; 351 conflag = CONTEXT; 352 } 353 break; 354 355 default: 356 usage(); 357 } 358 } 359 /* 360 * If we're invoked as egrep or fgrep we need to do some checks 361 */ 362 363 if (egrep || fgrep) { 364 /* 365 * Use of -E or -F with egrep or fgrep is illegal 366 */ 367 if (Eflag || Fflag) 368 usage(); 369 /* 370 * Don't allow use of wflag with egrep / fgrep 371 */ 372 if (wflag) 373 usage(); 374 /* 375 * For Solaris the -s flag is equivalent to XCU -q 376 */ 377 if (sflag) 378 qflag++; 379 /* 380 * done with above checks - set the appropriate flags 381 */ 382 if (egrep) 383 Eflag++; 384 else /* Else fgrep */ 385 Fflag++; 386 } 387 388 if (wflag && (Eflag || Fflag)) { 389 /* 390 * -w cannot be specified with grep -F 391 */ 392 usage(); 393 } 394 395 /* 396 * -E and -F flags are mutually exclusive - check for this 397 */ 398 if (Eflag && Fflag) 399 usage(); 400 401 /* 402 * -l overrides -H like in GNU grep 403 */ 404 if (lflag) 405 Hflag = 0; 406 407 /* 408 * -c, -l and -q flags are mutually exclusive 409 * We have -c override -l like in Solaris. 410 * -q overrides -l & -c programmatically in grep() function. 411 */ 412 if (cflag && lflag) 413 lflag = 0; 414 415 argv += optind - 1; 416 argc -= optind - 1; 417 418 /* 419 * Now handling -e and -f option 420 */ 421 if (pattern_list) { 422 for (i = 0; i < n_pattern; i++) { 423 addpattern(pattern_list[i]); 424 } 425 free(pattern_list); 426 } 427 if (file_list) { 428 for (i = 0; i < n_file; i++) { 429 addfile(file_list[i]); 430 } 431 free(file_list); 432 } 433 434 /* 435 * No -e or -f? Make sure there is one more arg, use it as the pattern. 436 */ 437 if (patterns == NULL && !fflag) { 438 if (argc < 2) 439 usage(); 440 addpattern(argv[1]); 441 argc--; 442 argv++; 443 } 444 445 /* 446 * If -x flag is not specified or -i flag is specified 447 * with fgrep in a multibyte locale, need to use 448 * the wide character APIs. Otherwise, byte-oriented 449 * process will be done. 450 */ 451 use_wchar = Fflag && mblocale && (!xflag || iflag); 452 453 /* 454 * Compile Patterns and also decide if BMG can be used 455 */ 456 fixpatterns(); 457 458 /* Process all files: stdin, or rest of arg list */ 459 if (argc < 2) { 460 matched = grep(0, STDIN_FILENAME); 461 } else { 462 if (Hflag || (argc > 2 && hflag == 0)) 463 outfn = 1; /* Print filename on match line */ 464 for (argv++; *argv != NULL; argv++) { 465 process_path(*argv); 466 } 467 } 468 /* 469 * Return() here is used instead of exit 470 */ 471 472 (void) fflush(stdout); 473 474 if (errors) 475 return (2); 476 return (matched ? 0 : 1); 477 } 478 479 static void 480 process_path(const char *path) 481 { 482 struct stat st; 483 int walkflags = FTW_CHDIR; 484 char *buf = NULL; 485 486 if (rflag) { 487 if (stat(path, &st) != -1 && 488 (st.st_mode & S_IFMT) == S_IFDIR) { 489 if (!hflag) 490 outfn = 1; /* Print filename unless -h */ 491 492 /* 493 * Add trailing slash if arg 494 * is directory, to resolve symlinks. 495 */ 496 if (path[strlen(path) - 1] != '/') { 497 (void) asprintf(&buf, "%s/", path); 498 if (buf != NULL) 499 path = buf; 500 } 501 502 /* 503 * Search through subdirs if path is directory. 504 * Don't follow symlinks if Rflag is not set. 505 */ 506 if (!Rflag) 507 walkflags |= FTW_PHYS; 508 509 if (nftw(path, recursive, MAX_DEPTH, walkflags) != 0) { 510 if (!sflag) 511 (void) fprintf(stderr, 512 gettext("%s: can't open \"%s\"\n"), 513 cmdname, path); 514 errors = 1; 515 } 516 return; 517 } 518 } 519 process_file(path, 0); 520 } 521 522 /* 523 * Read and process all files in directory recursively. 524 */ 525 static int 526 recursive(const char *name, const struct stat *statp, int info, struct FTW *ftw) 527 { 528 /* 529 * Process files and follow symlinks if Rflag set. 530 */ 531 if (info != FTW_F) { 532 /* Report broken symlinks and unreadable files */ 533 if (!sflag && 534 (info == FTW_SLN || info == FTW_DNR || info == FTW_NS)) { 535 (void) fprintf(stderr, 536 gettext("%s: can't open \"%s\"\n"), cmdname, name); 537 } 538 return (0); 539 } 540 541 542 /* Skip devices and pipes if Rflag is not set */ 543 if (!Rflag && !S_ISREG(statp->st_mode)) 544 return (0); 545 /* Pass offset to relative name from FTW_CHDIR */ 546 process_file(name, ftw->base); 547 return (0); 548 } 549 550 /* 551 * Opens file and call grep function. 552 */ 553 static void 554 process_file(const char *name, int base) 555 { 556 int fd; 557 558 if ((fd = open(name + base, O_RDONLY)) == -1) { 559 errors = 1; 560 if (!sflag) /* Silent mode */ 561 (void) fprintf(stderr, gettext( 562 "%s: can't open \"%s\"\n"), 563 cmdname, name); 564 return; 565 } 566 matched |= grep(fd, name); 567 (void) close(fd); 568 569 if (ferror(stdout)) { 570 (void) fprintf(stderr, gettext( 571 "%s: error writing to stdout\n"), 572 cmdname); 573 (void) fflush(stdout); 574 exit(2); 575 } 576 577 } 578 579 /* 580 * Add a file of strings to the pattern list. 581 */ 582 static void 583 addfile(const char *fn) 584 { 585 FILE *fp; 586 char *inbuf; 587 char *bufp; 588 size_t bufsiz, buflen, bufused; 589 590 /* 591 * Open the pattern file 592 */ 593 if ((fp = fopen(fn, "r")) == NULL) { 594 (void) fprintf(stderr, gettext("%s: can't open \"%s\"\n"), 595 cmdname, fn); 596 exit(2); 597 } 598 bufsiz = BUFSIZE; 599 if ((inbuf = malloc(bufsiz)) == NULL) { 600 (void) fprintf(stderr, 601 gettext("%s: out of memory\n"), cmdname); 602 exit(2); 603 } 604 bufp = inbuf; 605 bufused = 0; 606 /* 607 * Read in the file, reallocing as we need more memory 608 */ 609 while (fgets(bufp, bufsiz - bufused, fp) != NULL) { 610 buflen = strlen(bufp); 611 bufused += buflen; 612 if (bufused + 1 == bufsiz && bufp[buflen - 1] != '\n') { 613 /* 614 * if this line does not fit to the buffer, 615 * realloc larger buffer 616 */ 617 bufsiz += BUFSIZE; 618 if ((inbuf = realloc(inbuf, bufsiz)) == NULL) { 619 (void) fprintf(stderr, 620 gettext("%s: out of memory\n"), 621 cmdname); 622 exit(2); 623 } 624 bufp = inbuf + bufused; 625 continue; 626 } 627 if (bufp[buflen - 1] == '\n') { 628 bufp[--buflen] = '\0'; 629 } 630 addpattern(inbuf); 631 632 bufp = inbuf; 633 bufused = 0; 634 } 635 free(inbuf); 636 free(prntbuf); 637 free(conbuf); 638 (void) fclose(fp); 639 } 640 641 /* 642 * Add a string to the pattern list. 643 */ 644 static void 645 addpattern(char *s) 646 { 647 PATTERN *pp; 648 char *wordbuf; 649 char *np; 650 651 for (; ; ) { 652 np = strchr(s, '\n'); 653 if (np != NULL) 654 *np = '\0'; 655 if ((pp = malloc(sizeof (PATTERN))) == NULL) { 656 (void) fprintf(stderr, gettext( 657 "%s: out of memory\n"), 658 cmdname); 659 exit(2); 660 } 661 if (wflag) { 662 /* 663 * Solaris wflag support: Add '<' '>' to pattern to 664 * select it as a word. Doesn't make sense with -F 665 * but we're Libertarian. 666 */ 667 size_t slen, wordlen; 668 669 slen = strlen(s); 670 wordlen = slen + 5; /* '\\' '<' s '\\' '>' '\0' */ 671 if ((wordbuf = malloc(wordlen)) == NULL) { 672 (void) fprintf(stderr, 673 gettext("%s: out of memory\n"), 674 cmdname); 675 exit(2); 676 } 677 (void) strcpy(wordbuf, "\\<"); 678 (void) strcpy(wordbuf + 2, s); 679 (void) strcpy(wordbuf + 2 + slen, "\\>"); 680 } else { 681 if ((wordbuf = strdup(s)) == NULL) { 682 (void) fprintf(stderr, 683 gettext("%s: out of memory\n"), 684 cmdname); 685 exit(2); 686 } 687 } 688 pp->pattern = wordbuf; 689 pp->next = patterns; 690 patterns = pp; 691 if (np == NULL) 692 break; 693 s = np + 1; 694 } 695 } 696 697 /* 698 * Fix patterns. 699 * Must do after all arguments read, in case later -i option. 700 */ 701 static void 702 fixpatterns(void) 703 { 704 PATTERN *pp; 705 int rv, fix_pattern, npatterns; 706 707 /* 708 * Fix the specified pattern if -x is specified. 709 */ 710 fix_pattern = !Fflag && xflag; 711 712 for (npatterns = 0, pp = patterns; pp != NULL; pp = pp->next) { 713 npatterns++; 714 if (fix_pattern) { 715 char *cp, *cq; 716 size_t plen, nplen; 717 718 plen = strlen(pp->pattern); 719 /* '^' pattern '$' */ 720 nplen = 1 + plen + 1 + 1; 721 if ((cp = malloc(nplen)) == NULL) { 722 (void) fprintf(stderr, 723 gettext("%s: out of memory\n"), 724 cmdname); 725 exit(2); 726 } 727 cq = cp; 728 *cq++ = '^'; 729 cq = strcpy(cq, pp->pattern) + plen; 730 *cq++ = '$'; 731 *cq = '\0'; 732 free(pp->pattern); 733 pp->pattern = cp; 734 } 735 736 if (Fflag) { 737 if (use_wchar) { 738 /* 739 * Fflag && mblocale && iflag 740 * Fflag && mblocale && !xflag 741 */ 742 size_t n; 743 n = strlen(pp->pattern) + 1; 744 if ((pp->wpattern = 745 malloc(sizeof (wchar_t) * n)) == NULL) { 746 (void) fprintf(stderr, 747 gettext("%s: out of memory\n"), 748 cmdname); 749 exit(2); 750 } 751 if (mbstowcs(pp->wpattern, pp->pattern, n) == 752 (size_t)-1) { 753 (void) fprintf(stderr, 754 gettext("%s: failed to convert " 755 "\"%s\" to wide-characters\n"), 756 cmdname, pp->pattern); 757 exit(2); 758 } 759 if (iflag) { 760 wchar_t *wp; 761 for (wp = pp->wpattern; *wp != L'\0'; 762 wp++) { 763 *wp = towlower((wint_t)*wp); 764 } 765 } 766 free(pp->pattern); 767 } else { 768 /* 769 * Fflag && mblocale && !iflag 770 * Fflag && !mblocale && iflag 771 * Fflag && !mblocale && !iflag 772 */ 773 if (iflag) { 774 unsigned char *cp; 775 for (cp = (unsigned char *)pp->pattern; 776 *cp != '\0'; cp++) { 777 *cp = tolower(*cp); 778 } 779 } 780 } 781 /* 782 * fgrep: No regular expressions. 783 */ 784 continue; 785 } 786 787 /* 788 * For non-fgrep, compile the regular expression, 789 * give an informative error message, and exit if 790 * it didn't compile. 791 */ 792 if ((rv = regcomp(&pp->re, pp->pattern, regflags)) != 0) { 793 (void) regerror(rv, &pp->re, errstr, sizeof (errstr)); 794 (void) fprintf(stderr, 795 gettext("%s: RE error in %s: %s\n"), 796 cmdname, pp->pattern, errstr); 797 exit(2); 798 } 799 free(pp->pattern); 800 } 801 802 /* 803 * Decide if we are able to run the Boyer-Moore-Gosper algorithm. 804 * Use the Boyer-Moore-Gosper algorithm if: 805 * - fgrep (Fflag) 806 * - singlebyte locale (!mblocale) 807 * - no ignoring case (!iflag) 808 * - no printing line numbers (!nflag) 809 * - no negating the output (nvflag) 810 * - only one pattern (npatterns == 1) 811 * - non zero length pattern (strlen(patterns->pattern) != 0) 812 * - no context required (conflag == 0) 813 * 814 * It's guaranteed patterns->pattern is still alive 815 * when Fflag && !mblocale. 816 */ 817 use_bmg = Fflag && !mblocale && !iflag && !nflag && nvflag && 818 (npatterns == 1) && (strlen(patterns->pattern) != 0) && 819 conflag == 0; 820 } 821 822 /* 823 * Search a newline from the beginning of the string 824 */ 825 static char * 826 find_nl(const char *ptr, size_t len) 827 { 828 while (len-- != 0) { 829 if (*ptr++ == '\n') { 830 return ((char *)--ptr); 831 } 832 } 833 return (NULL); 834 } 835 836 /* 837 * Search a newline from the end of the string 838 */ 839 static char * 840 rfind_nl(const char *ptr, size_t len) 841 { 842 const char *uptr = ptr + len; 843 while (len--) { 844 if (*--uptr == '\n') { 845 return ((char *)uptr); 846 } 847 } 848 return (NULL); 849 } 850 851 /* 852 * Duplicate the specified string converting each character 853 * into a lower case. 854 */ 855 static char * 856 istrdup(const char *s1) 857 { 858 static size_t ibuflen = 0; 859 static char *ibuf = NULL; 860 size_t slen; 861 char *p; 862 863 slen = strlen(s1); 864 if (slen >= ibuflen) { 865 /* ibuf does not fit to s1 */ 866 ibuflen = slen + 1; 867 ibuf = realloc(ibuf, ibuflen); 868 if (ibuf == NULL) { 869 (void) fprintf(stderr, 870 gettext("%s: out of memory\n"), cmdname); 871 exit(2); 872 } 873 } 874 p = ibuf; 875 do { 876 *p++ = tolower(*s1); 877 } while (*s1++ != '\0'); 878 return (ibuf); 879 } 880 881 /* 882 * Do grep on a single file. 883 * Return true in any lines matched. 884 * 885 * We have two strategies: 886 * The fast one is used when we have a single pattern with 887 * a string known to occur in the pattern. We can then 888 * do a BMG match on the whole buffer. 889 * This is an order of magnitude faster. 890 * Otherwise we split the buffer into lines, 891 * and check for a match on each line. 892 */ 893 static int 894 grep(int fd, const char *fn) 895 { 896 PATTERN *pp; 897 off_t data_len; /* length of the data chunk */ 898 off_t line_len; /* length of the current line */ 899 off_t line_offset; /* current line's offset from the beginning */ 900 off_t blkoffset; /* line_offset but context-compatible */ 901 long long lineno, linenum; 902 long long matches = 0; /* Number of matching lines */ 903 long long conacnt = 0, conbcnt = 0; /* context line count */ 904 int newlinep; /* 0 if the last line of file has no newline */ 905 char *ptr, *ptrend, *prntptr, *prntptrend; 906 char *nextptr = NULL, *nextend = NULL; 907 char *conptr = NULL, *conptrend = NULL; 908 char *matchptr = NULL; 909 int conaprnt = 0, conbprnt = 0, lastmatch = 0; 910 boolean_t nearmatch; /* w/in N+1 of last match */ 911 boolean_t havematch = B_FALSE; /* have a match in context */ 912 size_t prntlen; 913 914 if (patterns == NULL) 915 return (0); /* no patterns to match -- just return */ 916 917 pp = patterns; 918 919 if (use_bmg) { 920 bmgcomp(pp->pattern, strlen(pp->pattern)); 921 } 922 923 if (use_wchar && outline == NULL) { 924 outbuflen = BUFSIZE + 1; 925 outline = malloc(sizeof (wchar_t) * outbuflen); 926 if (outline == NULL) { 927 (void) fprintf(stderr, gettext("%s: out of memory\n"), 928 cmdname); 929 exit(2); 930 } 931 } 932 933 if (prntbuf == NULL) { 934 prntbuflen = BUFSIZE; 935 if ((prntbuf = malloc(prntbuflen + 1)) == NULL) { 936 (void) fprintf(stderr, gettext("%s: out of memory\n"), 937 cmdname); 938 exit(2); 939 } 940 } 941 942 if (conflag != 0 && (conbuf == NULL)) { 943 conbuflen = BUFSIZE; 944 if ((conbuf = malloc(BUFSIZE+1)) == NULL) { 945 (void) fprintf(stderr, gettext("%s: out of memory\n"), 946 cmdname); 947 exit(2); 948 } 949 } 950 951 nearmatch = (conmatches != 0); 952 blkoffset = line_offset = 0; 953 lineno = 0; 954 linenum = 1; 955 newlinep = 1; 956 data_len = 0; 957 for (; ; ) { 958 long count; 959 off_t offset = 0; 960 char separate; 961 boolean_t last_ctx = B_FALSE, eof = B_FALSE; 962 963 if (data_len == 0) { 964 /* 965 * If no data in the buffer, reset ptr 966 */ 967 ptr = prntbuf; 968 if (conflag != 0 && conptr == NULL) { 969 conptr = conbuf; 970 conptrend = conptr - 1; 971 } 972 } 973 if (ptr == prntbuf) { 974 /* 975 * The current data chunk starts from prntbuf. 976 * This means either the buffer has no data 977 * or the buffer has no newline. 978 * So, read more data from input. 979 */ 980 count = read(fd, ptr + data_len, prntbuflen - data_len); 981 if (count < 0) { 982 /* read error */ 983 if (cflag) { 984 if (outfn && !rflag) { 985 (void) fprintf(stdout, 986 "%s:", fn); 987 } 988 if (!qflag && !rflag) { 989 (void) fprintf(stdout, "%lld\n", 990 matches); 991 } 992 } 993 return (0); 994 } else if (count == 0) { 995 /* no new data */ 996 eof = B_TRUE; 997 998 if (data_len == 0) { 999 /* end of file already reached */ 1000 if (conflag != 0) { 1001 if (conptrend >= conptr) 1002 *conptrend = '\n'; 1003 last_ctx = B_TRUE; 1004 goto L_next_line; 1005 } else { 1006 goto out; 1007 } 1008 } 1009 /* last line of file has no newline */ 1010 ptrend = ptr + data_len; 1011 newlinep = 0; 1012 goto L_start_process; 1013 } 1014 offset = data_len; 1015 data_len += count; 1016 } 1017 1018 /* 1019 * Look for newline in the chunk 1020 * between ptr + offset and ptr + data_len - offset. 1021 */ 1022 ptrend = find_nl(ptr + offset, data_len - offset); 1023 if (ptrend == NULL) { 1024 /* no newline found in this chunk */ 1025 if (ptr > prntbuf) { 1026 /* 1027 * Move remaining data to the beginning 1028 * of the buffer. 1029 * Remaining data lie from ptr for 1030 * data_len bytes. 1031 */ 1032 (void) memmove(prntbuf, ptr, data_len); 1033 } 1034 if (data_len == prntbuflen) { 1035 /* 1036 * Not enough room in the buffer 1037 */ 1038 if (prntbuflen > SIZE_MAX - BUFSIZE) { 1039 (void) fprintf(stderr, 1040 gettext("%s: buflen would" 1041 " overflow\n"), 1042 cmdname); 1043 exit(2); 1044 } 1045 1046 prntbuflen += BUFSIZE; 1047 prntbuf = realloc(prntbuf, prntbuflen + 1); 1048 if (prntbuf == NULL) { 1049 (void) fprintf(stderr, 1050 gettext("%s: out of memory\n"), 1051 cmdname); 1052 exit(2); 1053 } 1054 } 1055 ptr = prntbuf; 1056 /* read the next input */ 1057 continue; 1058 } 1059 L_start_process: 1060 1061 /* 1062 * Beginning of the chunk: ptr 1063 * End of the chunk: ptr + data_len 1064 * Beginning of the line: ptr 1065 * End of the line: ptrend 1066 * 1067 * conptr: Beginning of the context. 1068 * conptrend: If context is empty, conptr - 1 (invalid memory). 1069 * Otherwise, Last newline in the context. 1070 */ 1071 1072 if (use_bmg) { 1073 /* 1074 * Use Boyer-Moore-Gosper algorithm to find out if 1075 * this chunk (not this line) contains the specified 1076 * pattern. If not, restart from the last line 1077 * of this chunk. 1078 */ 1079 char *bline; 1080 bline = bmgexec(ptr, ptr + data_len); 1081 if (bline == NULL) { 1082 /* 1083 * No pattern found in this chunk. 1084 * Need to find the last line 1085 * in this chunk. 1086 */ 1087 ptrend = rfind_nl(ptr, data_len); 1088 1089 /* 1090 * When this chunk does not contain newline, 1091 * ptrend becomes NULL, which should happen 1092 * when the last line of file does not end 1093 * with a newline. At such a point, 1094 * newlinep should have been set to 0. 1095 * Therefore, just after jumping to 1096 * L_skip_line, the main for-loop quits, 1097 * and the line_len value won't be 1098 * used. 1099 */ 1100 line_len = ptrend - ptr; 1101 goto L_skip_line; 1102 } 1103 if (bline > ptrend) { 1104 /* 1105 * Pattern found not in the first line 1106 * of this chunk. 1107 * Discard the first line. 1108 */ 1109 line_len = ptrend - ptr; 1110 goto L_skip_line; 1111 } 1112 /* 1113 * Pattern found in the first line of this chunk. 1114 * Using this result. 1115 */ 1116 *ptrend = '\0'; 1117 line_len = ptrend - ptr; 1118 1119 /* 1120 * before jumping to L_next_line, 1121 * need to handle xflag if specified 1122 */ 1123 if (xflag && (line_len != bmglen || 1124 strcmp(bmgpat, ptr) != 0)) { 1125 /* didn't match */ 1126 pp = NULL; 1127 } else { 1128 pp = patterns; /* to make it happen */ 1129 } 1130 goto L_next_line; 1131 } 1132 lineno++; 1133 /* 1134 * Line starts from ptr and ends at ptrend. 1135 * line_len will be the length of the line. 1136 */ 1137 *ptrend = '\0'; 1138 line_len = ptrend - ptr; 1139 1140 /* 1141 * From now, the process will be performed based 1142 * on the line from ptr to ptrend. 1143 */ 1144 if (use_wchar) { 1145 size_t len; 1146 1147 if (line_len >= outbuflen) { 1148 outbuflen = line_len + 1; 1149 outline = realloc(outline, 1150 sizeof (wchar_t) * outbuflen); 1151 if (outline == NULL) { 1152 (void) fprintf(stderr, 1153 gettext("%s: out of memory\n"), 1154 cmdname); 1155 exit(2); 1156 } 1157 } 1158 1159 len = mbstowcs(outline, ptr, line_len); 1160 if (len == (size_t)-1) { 1161 (void) fprintf(stderr, gettext( 1162 "%s: input file \"%s\": line %lld: invalid multibyte character\n"), 1163 cmdname, fn, lineno); 1164 /* never match a line with invalid sequence */ 1165 goto L_skip_line; 1166 } 1167 outline[len] = L'\0'; 1168 1169 if (iflag) { 1170 wchar_t *cp; 1171 for (cp = outline; *cp != '\0'; cp++) { 1172 *cp = towlower((wint_t)*cp); 1173 } 1174 } 1175 1176 if (xflag) { 1177 for (pp = patterns; pp; pp = pp->next) { 1178 if (outline[0] == pp->wpattern[0] && 1179 wcscmp(outline, 1180 pp->wpattern) == 0) { 1181 /* matched */ 1182 break; 1183 } 1184 } 1185 } else { 1186 for (pp = patterns; pp; pp = pp->next) { 1187 if (wcswcs(outline, pp->wpattern) 1188 != NULL) { 1189 /* matched */ 1190 break; 1191 } 1192 } 1193 } 1194 } else if (Fflag) { 1195 /* fgrep in byte-oriented handling */ 1196 char *fptr; 1197 if (iflag) { 1198 fptr = istrdup(ptr); 1199 } else { 1200 fptr = ptr; 1201 } 1202 if (xflag) { 1203 /* fgrep -x */ 1204 for (pp = patterns; pp; pp = pp->next) { 1205 if (fptr[0] == pp->pattern[0] && 1206 strcmp(fptr, pp->pattern) == 0) { 1207 /* matched */ 1208 break; 1209 } 1210 } 1211 } else { 1212 for (pp = patterns; pp; pp = pp->next) { 1213 if (strstr(fptr, pp->pattern) != NULL) { 1214 /* matched */ 1215 break; 1216 } 1217 } 1218 } 1219 } else { 1220 /* grep or egrep */ 1221 for (pp = patterns; pp; pp = pp->next) { 1222 int rv; 1223 1224 rv = regexec(&pp->re, ptr, 0, NULL, 0); 1225 if (rv == REG_OK) { 1226 /* matched */ 1227 break; 1228 } 1229 1230 switch (rv) { 1231 case REG_NOMATCH: 1232 break; 1233 case REG_ECHAR: 1234 (void) fprintf(stderr, gettext( 1235 "%s: input file \"%s\": line %lld: invalid multibyte character\n"), 1236 cmdname, fn, lineno); 1237 break; 1238 default: 1239 (void) regerror(rv, &pp->re, errstr, 1240 sizeof (errstr)); 1241 (void) fprintf(stderr, gettext( 1242 "%s: input file \"%s\": line %lld: %s\n"), 1243 cmdname, fn, lineno, errstr); 1244 exit(2); 1245 } 1246 } 1247 } 1248 1249 /* 1250 * Context is set up as follows: 1251 * For a 'Before' context, we maintain a set of pointers 1252 * containing 'N' lines of context. If the current number of 1253 * lines contained is greater than N, and N isn't a match, the 1254 * start pointer is moved forward to the next newline. 1255 * 1256 * If we ever find a match, we print out immediately. 1257 * 'nearmatch' tells us if we're within N+1 lines of the last 1258 * match ; if we are, and we find another match, we don't 1259 * separate the matches. 'nearmatch' becomes false when 1260 * a line gets rotated out of the context. 1261 * 1262 * For an 'After' context, we simply wait until we've found a 1263 * match, then create a context N+1 lines big. If we don't find 1264 * a match within the context, we print out the current context. 1265 * Otherwise, we save a reference to the new matching line, 1266 * print out the other context, and reset our context pointers 1267 * to the new matching line. 1268 * 1269 * 'nearmatch' becomes false when we find a non-matching line 1270 * that isn't a part of any context. 1271 * 1272 * A full-context is implemented as a combination of the 1273 * 'Before' and 'After' context logic. Before we find a match, 1274 * we follow the Before logic. When we find a match, we 1275 * follow the After logic. 'nearmatch' is handled by the Before 1276 * logic. 1277 */ 1278 1279 if (conflag == 0) 1280 goto L_next_line; 1281 1282 /* Do we have room to add this line to the context buffer? */ 1283 while ((line_len + 1) > (conbuflen - 1284 ((conptrend >= conptr) ? conptrend - conbuf : 0))) { 1285 char *oldconbuf = conbuf; 1286 char *oldconptr = conptr; 1287 long tmp = matchptr - conptr; 1288 1289 if (conbuflen > SIZE_MAX - BUFSIZE) { 1290 (void) fprintf(stderr, 1291 gettext("%s: buflen would overflow\n"), 1292 cmdname); 1293 exit(2); 1294 } 1295 1296 conbuflen += BUFSIZE; 1297 conbuf = realloc(conbuf, conbuflen + 1); 1298 if (conbuf == NULL) { 1299 (void) fprintf(stderr, 1300 gettext("%s: out of memory\n"), 1301 cmdname); 1302 exit(2); 1303 } 1304 1305 conptr = conbuf + (conptr - oldconbuf); 1306 conptrend = conptr + (conptrend - oldconptr); 1307 if (matchptr) 1308 matchptr = conptr + tmp; 1309 } 1310 (void) memcpy(conptrend + 1, ptr, line_len); 1311 conptrend += line_len + 1; 1312 *conptrend = '\n'; 1313 1314 if (nvflag == (pp != NULL)) { 1315 /* matched */ 1316 if (havematch) { 1317 if ((conflag & AFTER) != 0) { 1318 conaprnt = 1; 1319 nextend = conptrend; 1320 conptrend = conptr + lastmatch; 1321 nextptr = conptrend + 1; 1322 *nextend = '\n'; 1323 } 1324 } else { 1325 if (conflag == AFTER) { 1326 conptr = conptrend - (line_len); 1327 linenum = lineno; 1328 } 1329 blkoffset = line_offset - 1330 (conptrend - conptr - line_len); 1331 } 1332 1333 if (conflag == BEFORE) 1334 conbprnt = 1; 1335 1336 lastmatch = conptrend - conptr; 1337 havematch = B_TRUE; 1338 goto L_next_line; 1339 } 1340 1341 if (!havematch) { 1342 if ((conflag & BEFORE) != 0) { 1343 if (conbcnt >= conblen) { 1344 char *tmp = conptr; 1345 conptr = find_nl(conptr, 1346 conptrend - conptr) + 1; 1347 if (bflag) 1348 blkoffset += conptr - tmp; 1349 linenum++; 1350 nearmatch = B_TRUE; 1351 } else { 1352 conbcnt++; 1353 } 1354 } 1355 if (conflag == AFTER) 1356 nearmatch = B_TRUE; 1357 } else { 1358 if (++conacnt >= conalen && !conaprnt && conalen) 1359 conaprnt = 1; 1360 else 1361 lastmatch = conptrend - conptr; 1362 } 1363 1364 L_next_line: 1365 /* 1366 * Here, if pp points to non-NULL, something has been matched 1367 * to the pattern. 1368 */ 1369 if (!last_ctx && nvflag == (pp != NULL)) { 1370 matches++; 1371 if (!nextend) 1372 matchptr = (conflag != 0) ? conptrend : ptrend; 1373 } 1374 1375 /* 1376 * Set up some print context so that we can treat 1377 * single-line matches as a zero-N context. 1378 * Apply CLI flags to each line of the context. 1379 * 1380 * For context, we only print if we both have a match and are 1381 * either at the end of the data stream, or we've previously 1382 * declared that we want to print for a particular context. 1383 */ 1384 if (havematch && (eof || conaprnt || conbprnt)) { 1385 1386 /* 1387 * We'd normally do this earlier, but we had to 1388 * escape early because we reached the end of the data. 1389 */ 1390 if (eof && nextptr) 1391 conptrend = nextend; 1392 1393 prntlen = conptrend - conptr + 1; 1394 prntptr = conptr; 1395 if (conmatches++ && nearmatch && !cflag) 1396 (void) fwrite("--\n", 1, 3, stdout); 1397 } else if (conflag == 0 && nvflag == (pp != NULL)) { 1398 *ptrend = '\n'; 1399 prntlen = line_len + 1; 1400 prntptr = ptr; 1401 linenum = lineno; 1402 blkoffset = line_offset; 1403 } else if (eof) { 1404 /* No match and no more data */ 1405 goto out; 1406 } else { 1407 /* No match, or we're not done building context */ 1408 goto L_skip_line; 1409 } 1410 1411 prntptrend = prntptr - 1; 1412 while ((prntptrend = find_nl(prntptrend + 1, 1413 prntlen)) != NULL) { 1414 1415 /* 1416 * GNU grep uses '-' for context lines and ':' for 1417 * matching lines, so replicate that here. 1418 */ 1419 if (prntptrend == matchptr) { 1420 if (eof && nextptr) { 1421 matchptr = nextend; 1422 nextptr = NULL; 1423 } else { 1424 matchptr = NULL; 1425 } 1426 separate = ':'; 1427 } else { 1428 separate = '-'; 1429 } 1430 1431 /* 1432 * Handle q, l, and c flags. 1433 */ 1434 if (qflag) { 1435 /* no need to continue */ 1436 /* 1437 * End of this line is ptrend. 1438 * We have read up to ptr + data_len. 1439 */ 1440 off_t pos; 1441 pos = ptr + data_len - (ptrend + 1); 1442 (void) lseek(fd, -pos, SEEK_CUR); 1443 exit(0); 1444 } 1445 if (lflag) { 1446 (void) printf("%s\n", fn); 1447 goto out; 1448 } 1449 if (!cflag) { 1450 if (Hflag || outfn) { 1451 (void) printf("%s%c", fn, separate); 1452 } 1453 if (bflag) { 1454 (void) printf("%lld%c", (offset_t) 1455 (blkoffset / BSIZE), separate); 1456 } 1457 if (nflag) { 1458 (void) printf("%lld%c", linenum, 1459 separate); 1460 } 1461 (void) fwrite(prntptr, 1, 1462 prntptrend - prntptr + 1, stdout); 1463 } 1464 if (ferror(stdout)) { 1465 return (0); 1466 } 1467 linenum++; 1468 prntlen -= prntptrend - prntptr + 1; 1469 blkoffset += prntptrend - prntptr + 1; 1470 prntptr = prntptrend + 1; 1471 } 1472 1473 if (eof) 1474 goto out; 1475 1476 /* 1477 * Update context buffer and variables post-print 1478 */ 1479 if (conflag != 0) { 1480 conptr = conbuf; 1481 conaprnt = conbprnt = 0; 1482 nearmatch = B_FALSE; 1483 conacnt = conbcnt = 0; 1484 1485 if (nextptr) { 1486 (void) memmove(conbuf, nextptr, 1487 nextend - nextptr + 1); 1488 blkoffset += nextptr - conptrend - 1; 1489 conptrend = conptr + (nextend - nextptr); 1490 matchptr = conptrend; 1491 linenum = lineno; 1492 lastmatch = conptrend - conptr; 1493 havematch = B_TRUE; 1494 } else { 1495 conptrend = conptr - 1; 1496 conacnt = 0; 1497 lastmatch = 0; 1498 havematch = B_FALSE; 1499 } 1500 nextptr = nextend = NULL; 1501 } 1502 1503 L_skip_line: 1504 if (!newlinep) 1505 break; 1506 1507 data_len -= line_len + 1; 1508 line_offset += line_len + 1; 1509 ptr = ptrend + 1; 1510 } 1511 1512 out: 1513 if (cflag) { 1514 if (Hflag || outfn) { 1515 (void) printf("%s:", fn); 1516 } 1517 if (!qflag) { 1518 (void) printf("%lld\n", matches); 1519 } 1520 } 1521 return (matches != 0); 1522 } 1523 1524 /* 1525 * usage message for grep 1526 */ 1527 static void 1528 usage(void) 1529 { 1530 (void) fprintf(stderr, gettext("usage: %5s"), cmdname); 1531 if (!egrep && !fgrep) 1532 (void) fprintf(stderr, gettext(" [-E|-F]")); 1533 (void) fprintf(stderr, gettext(" [-bchHilnqrRsvx] [-A num] [-B num] " 1534 "[-C num|-num]\n [-e pattern_list]... " 1535 "[-f pattern_file]... [pattern_list] [file]...\n")); 1536 exit(2); 1537 /* NOTREACHED */ 1538 } 1539 1540 /* 1541 * Compile literal pattern into BMG tables 1542 */ 1543 static void 1544 bmgcomp(char *pat, int len) 1545 { 1546 int i; 1547 int tlen; 1548 unsigned char *uc = (unsigned char *)pat; 1549 1550 bmglen = len; 1551 bmgpat = pat; 1552 1553 for (i = 0; i < M_CSETSIZE; i++) { 1554 bmgtab[i] = len; 1555 } 1556 1557 len--; 1558 for (tlen = len, i = 0; i <= len; i++, tlen--) { 1559 bmgtab[*uc++] = tlen; 1560 } 1561 } 1562 1563 /* 1564 * BMG search. 1565 */ 1566 static char * 1567 bmgexec(char *str, char *end) 1568 { 1569 int t; 1570 char *k, *s, *p; 1571 1572 k = str + bmglen - 1; 1573 if (bmglen == 1) { 1574 return (memchr(str, bmgpat[0], end - str)); 1575 } 1576 for (; ; ) { 1577 /* inner loop, should be most optimized */ 1578 while (k < end && (t = bmgtab[(unsigned char)*k]) != 0) { 1579 k += t; 1580 } 1581 if (k >= end) { 1582 return (NULL); 1583 } 1584 for (s = k, p = bmgpat + bmglen - 1; *--s == *--p; ) { 1585 if (p == bmgpat) { 1586 return (s); 1587 } 1588 } 1589 k++; 1590 } 1591 /* NOTREACHED */ 1592 } 1593