1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * grep - pattern matching program - combined grep, egrep, and fgrep. 29 * Based on MKS grep command, with XCU & Solaris mods. 30 */ 31 32 /* 33 * Copyright 1985, 1992 by Mortice Kern Systems Inc. All rights reserved. 34 * 35 */ 36 37 /* 38 * Copyright 2018 RackTop Systems. 39 * Copyright 2018 Nexenta Systems, Inc. 40 * Copyright 2013 Damian Bogel. All rights reserved. 41 */ 42 43 #include <string.h> 44 #include <stdlib.h> 45 #include <ctype.h> 46 #include <stdarg.h> 47 #include <regex.h> 48 #include <limits.h> 49 #include <sys/types.h> 50 #include <sys/stat.h> 51 #include <fcntl.h> 52 #include <stdio.h> 53 #include <locale.h> 54 #include <wchar.h> 55 #include <errno.h> 56 #include <unistd.h> 57 #include <wctype.h> 58 #include <ftw.h> 59 #include <sys/param.h> 60 61 #define STDIN_FILENAME gettext("(standard input)") 62 63 #define BSIZE 512 /* Size of block for -b */ 64 #define BUFSIZE 8192 /* Input buffer size */ 65 #define MAX_DEPTH 1000 /* how deep to recurse */ 66 67 #define AFTER 1 /* 'After' Context */ 68 #define BEFORE 2 /* 'Before' Context */ 69 #define CONTEXT (AFTER|BEFORE) /* Full Context */ 70 71 #define M_CSETSIZE 256 /* singlebyte chars */ 72 static int bmglen; /* length of BMG pattern */ 73 static char *bmgpat; /* BMG pattern */ 74 static int bmgtab[M_CSETSIZE]; /* BMG delta1 table */ 75 76 typedef struct _PATTERN { 77 char *pattern; /* original pattern */ 78 wchar_t *wpattern; /* wide, lowercased pattern */ 79 struct _PATTERN *next; 80 regex_t re; /* compiled pattern */ 81 } PATTERN; 82 83 static PATTERN *patterns; 84 static char errstr[128]; /* regerror string buffer */ 85 static int regflags = 0; /* regcomp options */ 86 static int matched = 0; /* return of the grep() */ 87 static int errors = 0; /* count of errors */ 88 static uchar_t fgrep = 0; /* Invoked as fgrep */ 89 static uchar_t egrep = 0; /* Invoked as egrep */ 90 static boolean_t nvflag = B_TRUE; /* Print matching lines */ 91 static uchar_t cflag; /* Count of matches */ 92 static uchar_t iflag; /* Case insensitve matching */ 93 static uchar_t Hflag; /* Precede lines by file name */ 94 static uchar_t hflag; /* Supress printing of filename */ 95 static uchar_t lflag; /* Print file names of matches */ 96 static uchar_t nflag; /* Precede lines by line number */ 97 static uchar_t rflag; /* Search directories recursively */ 98 static uchar_t bflag; /* Preccede matches by block number */ 99 static uchar_t sflag; /* Suppress file error messages */ 100 static uchar_t qflag; /* Suppress standard output */ 101 static uchar_t wflag; /* Search for expression as a word */ 102 static uchar_t xflag; /* Anchoring */ 103 static uchar_t Eflag; /* Egrep or -E flag */ 104 static uchar_t Fflag; /* Fgrep or -F flag */ 105 static uchar_t Rflag; /* Like rflag, but follow symlinks */ 106 static uchar_t outfn; /* Put out file name */ 107 static uchar_t conflag; /* show context of matches */ 108 static char *cmdname; 109 110 static int use_wchar, use_bmg, mblocale; 111 112 static size_t outbuflen, prntbuflen, conbuflen; 113 static unsigned long conalen, conblen, conmatches; 114 static char *prntbuf, *conbuf; 115 static wchar_t *outline; 116 117 static void addfile(const char *fn); 118 static void addpattern(char *s); 119 static void fixpatterns(void); 120 static void usage(void); 121 static int grep(int, const char *); 122 static void bmgcomp(char *, int); 123 static char *bmgexec(char *, char *); 124 static int recursive(const char *, const struct stat *, int, struct FTW *); 125 static void process_path(const char *); 126 static void process_file(const char *, int); 127 128 /* 129 * mainline for grep 130 */ 131 int 132 main(int argc, char **argv) 133 { 134 char *ap, *test; 135 int c; 136 int fflag = 0; 137 int i, n_pattern = 0, n_file = 0; 138 char **pattern_list = NULL; 139 char **file_list = NULL; 140 141 (void) setlocale(LC_ALL, ""); 142 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */ 143 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't */ 144 #endif 145 (void) textdomain(TEXT_DOMAIN); 146 147 /* 148 * true if this is running on the multibyte locale 149 */ 150 mblocale = (MB_CUR_MAX > 1); 151 /* 152 * Skip leading slashes 153 */ 154 cmdname = argv[0]; 155 if (ap = strrchr(cmdname, '/')) 156 cmdname = ap + 1; 157 158 ap = cmdname; 159 /* 160 * Detect egrep/fgrep via command name, map to -E and -F options. 161 */ 162 if (*ap == 'e' || *ap == 'E') { 163 regflags |= REG_EXTENDED; 164 egrep++; 165 } else { 166 if (*ap == 'f' || *ap == 'F') { 167 fgrep++; 168 } 169 } 170 171 /* check for non-standard "-line-count" option */ 172 for (i = 1; i < argc; i++) { 173 if (strcmp(argv[i], "--") == 0) 174 break; 175 176 /* isdigit() check prevents negative arguments */ 177 if ((argv[i][0] == '-') && isdigit(argv[i][1])) { 178 if (strlen(&argv[i][1]) != 179 strspn(&argv[i][1], "0123456789")) { 180 (void) fprintf(stderr, gettext( 181 "%s: Bad number flag\n"), argv[0]); 182 usage(); 183 } 184 185 errno = 0; 186 conalen = conblen = strtoul(&argv[i][1], (char **)NULL, 187 10); 188 189 if (errno != 0 || conalen >= ULONG_MAX) { 190 (void) fprintf(stderr, gettext( 191 "%s: Bad context argument\n"), argv[0]); 192 } else if (conalen) 193 conflag = CONTEXT; 194 195 while (i < argc) { 196 argv[i] = argv[i + 1]; 197 i++; 198 } 199 argc--; 200 } 201 } 202 203 while ((c = getopt(argc, argv, "vwchHilnrbse:f:qxEFIRA:B:C:")) != EOF) { 204 unsigned long tval; 205 switch (c) { 206 case 'v': /* POSIX: negate matches */ 207 nvflag = B_FALSE; 208 break; 209 210 case 'c': /* POSIX: write count */ 211 cflag++; 212 break; 213 214 case 'i': /* POSIX: ignore case */ 215 iflag++; 216 regflags |= REG_ICASE; 217 break; 218 219 case 'l': /* POSIX: Write filenames only */ 220 lflag++; 221 break; 222 223 case 'n': /* POSIX: Write line numbers */ 224 nflag++; 225 break; 226 227 case 'r': /* Solaris: search recursively */ 228 rflag++; 229 break; 230 231 case 'b': /* Solaris: Write file block numbers */ 232 bflag++; 233 break; 234 235 case 's': /* POSIX: No error msgs for files */ 236 sflag++; 237 break; 238 239 case 'e': /* POSIX: pattern list */ 240 n_pattern++; 241 pattern_list = realloc(pattern_list, 242 sizeof (char *) * n_pattern); 243 if (pattern_list == NULL) { 244 (void) fprintf(stderr, 245 gettext("%s: out of memory\n"), 246 cmdname); 247 exit(2); 248 } 249 *(pattern_list + n_pattern - 1) = optarg; 250 break; 251 252 case 'f': /* POSIX: pattern file */ 253 fflag = 1; 254 n_file++; 255 file_list = realloc(file_list, 256 sizeof (char *) * n_file); 257 if (file_list == NULL) { 258 (void) fprintf(stderr, 259 gettext("%s: out of memory\n"), 260 cmdname); 261 exit(2); 262 } 263 *(file_list + n_file - 1) = optarg; 264 break; 265 266 /* based on options order h or H is set as in GNU grep */ 267 case 'h': /* Solaris: supress printing of file name */ 268 hflag = 1; 269 Hflag = 0; 270 break; 271 /* Solaris: precede every matching with file name */ 272 case 'H': 273 Hflag = 1; 274 hflag = 0; 275 break; 276 277 case 'q': /* POSIX: quiet: status only */ 278 qflag++; 279 break; 280 281 case 'w': /* Solaris: treat pattern as word */ 282 wflag++; 283 break; 284 285 case 'x': /* POSIX: full line matches */ 286 xflag++; 287 break; 288 289 case 'E': /* POSIX: Extended RE's */ 290 regflags |= REG_EXTENDED; 291 Eflag++; 292 break; 293 294 case 'F': /* POSIX: strings, not RE's */ 295 Fflag++; 296 break; 297 298 case 'R': /* Solaris: like rflag, but follow symlinks */ 299 Rflag++; 300 rflag++; 301 break; 302 303 case 'A': /* print N lines after each match */ 304 errno = 0; 305 conalen = strtoul(optarg, &test, 10); 306 /* *test will be non-null if optarg is negative */ 307 if (errno != 0 || *test != '\0' || 308 conalen >= ULONG_MAX) { 309 (void) fprintf(stderr, gettext( 310 "%s: Bad context argument: %s\n"), 311 argv[0], optarg); 312 exit(2); 313 } 314 if (conalen) 315 conflag |= AFTER; 316 else 317 conflag &= ~AFTER; 318 break; 319 case 'B': /* print N lines before each match */ 320 errno = 0; 321 conblen = strtoul(optarg, &test, 10); 322 /* *test will be non-null if optarg is negative */ 323 if (errno != 0 || *test != '\0' || 324 conblen >= ULONG_MAX) { 325 (void) fprintf(stderr, gettext( 326 "%s: Bad context argument: %s\n"), 327 argv[0], optarg); 328 exit(2); 329 } 330 if (conblen) 331 conflag |= BEFORE; 332 else 333 conflag &= ~BEFORE; 334 break; 335 case 'C': /* print N lines around each match */ 336 errno = 0; 337 tval = strtoul(optarg, &test, 10); 338 /* *test will be non-null if optarg is negative */ 339 if (errno != 0 || *test != '\0' || tval >= ULONG_MAX) { 340 (void) fprintf(stderr, gettext( 341 "%s: Bad context argument: %s\n"), 342 argv[0], optarg); 343 exit(2); 344 } 345 if (tval) { 346 if ((conflag & BEFORE) == 0) 347 conblen = tval; 348 if ((conflag & AFTER) == 0) 349 conalen = tval; 350 conflag = CONTEXT; 351 } 352 break; 353 354 default: 355 usage(); 356 } 357 } 358 /* 359 * If we're invoked as egrep or fgrep we need to do some checks 360 */ 361 362 if (egrep || fgrep) { 363 /* 364 * Use of -E or -F with egrep or fgrep is illegal 365 */ 366 if (Eflag || Fflag) 367 usage(); 368 /* 369 * Don't allow use of wflag with egrep / fgrep 370 */ 371 if (wflag) 372 usage(); 373 /* 374 * For Solaris the -s flag is equivalent to XCU -q 375 */ 376 if (sflag) 377 qflag++; 378 /* 379 * done with above checks - set the appropriate flags 380 */ 381 if (egrep) 382 Eflag++; 383 else /* Else fgrep */ 384 Fflag++; 385 } 386 387 if (wflag && (Eflag || Fflag)) { 388 /* 389 * -w cannot be specified with grep -F 390 */ 391 usage(); 392 } 393 394 /* 395 * -E and -F flags are mutually exclusive - check for this 396 */ 397 if (Eflag && Fflag) 398 usage(); 399 400 /* 401 * -l overrides -H like in GNU grep 402 */ 403 if (lflag) 404 Hflag = 0; 405 406 /* 407 * -c, -l and -q flags are mutually exclusive 408 * We have -c override -l like in Solaris. 409 * -q overrides -l & -c programmatically in grep() function. 410 */ 411 if (cflag && lflag) 412 lflag = 0; 413 414 argv += optind - 1; 415 argc -= optind - 1; 416 417 /* 418 * Now handling -e and -f option 419 */ 420 if (pattern_list) { 421 for (i = 0; i < n_pattern; i++) { 422 addpattern(pattern_list[i]); 423 } 424 free(pattern_list); 425 } 426 if (file_list) { 427 for (i = 0; i < n_file; i++) { 428 addfile(file_list[i]); 429 } 430 free(file_list); 431 } 432 433 /* 434 * No -e or -f? Make sure there is one more arg, use it as the pattern. 435 */ 436 if (patterns == NULL && !fflag) { 437 if (argc < 2) 438 usage(); 439 addpattern(argv[1]); 440 argc--; 441 argv++; 442 } 443 444 /* 445 * If -x flag is not specified or -i flag is specified 446 * with fgrep in a multibyte locale, need to use 447 * the wide character APIs. Otherwise, byte-oriented 448 * process will be done. 449 */ 450 use_wchar = Fflag && mblocale && (!xflag || iflag); 451 452 /* 453 * Compile Patterns and also decide if BMG can be used 454 */ 455 fixpatterns(); 456 457 /* Process all files: stdin, or rest of arg list */ 458 if (argc < 2) { 459 matched = grep(0, STDIN_FILENAME); 460 } else { 461 if (Hflag || (argc > 2 && hflag == 0)) 462 outfn = 1; /* Print filename on match line */ 463 for (argv++; *argv != NULL; argv++) { 464 process_path(*argv); 465 } 466 } 467 /* 468 * Return() here is used instead of exit 469 */ 470 471 (void) fflush(stdout); 472 473 if (errors) 474 return (2); 475 return (matched ? 0 : 1); 476 } 477 478 static void 479 process_path(const char *path) 480 { 481 struct stat st; 482 int walkflags = FTW_CHDIR; 483 char *buf = NULL; 484 485 if (rflag) { 486 if (stat(path, &st) != -1 && 487 (st.st_mode & S_IFMT) == S_IFDIR) { 488 outfn = 1; /* Print filename */ 489 490 /* 491 * Add trailing slash if arg 492 * is directory, to resolve symlinks. 493 */ 494 if (path[strlen(path) - 1] != '/') { 495 (void) asprintf(&buf, "%s/", path); 496 if (buf != NULL) 497 path = buf; 498 } 499 500 /* 501 * Search through subdirs if path is directory. 502 * Don't follow symlinks if Rflag is not set. 503 */ 504 if (!Rflag) 505 walkflags |= FTW_PHYS; 506 507 if (nftw(path, recursive, MAX_DEPTH, walkflags) != 0) { 508 if (!sflag) 509 (void) fprintf(stderr, 510 gettext("%s: can't open \"%s\"\n"), 511 cmdname, path); 512 errors = 1; 513 } 514 return; 515 } 516 } 517 process_file(path, 0); 518 } 519 520 /* 521 * Read and process all files in directory recursively. 522 */ 523 static int 524 recursive(const char *name, const struct stat *statp, int info, struct FTW *ftw) 525 { 526 /* 527 * Process files and follow symlinks if Rflag set. 528 */ 529 if (info != FTW_F) { 530 /* Report broken symlinks and unreadable files */ 531 if (!sflag && 532 (info == FTW_SLN || info == FTW_DNR || info == FTW_NS)) { 533 (void) fprintf(stderr, 534 gettext("%s: can't open \"%s\"\n"), cmdname, name); 535 } 536 return (0); 537 } 538 539 540 /* Skip devices and pipes if Rflag is not set */ 541 if (!Rflag && !S_ISREG(statp->st_mode)) 542 return (0); 543 /* Pass offset to relative name from FTW_CHDIR */ 544 process_file(name, ftw->base); 545 return (0); 546 } 547 548 /* 549 * Opens file and call grep function. 550 */ 551 static void 552 process_file(const char *name, int base) 553 { 554 int fd; 555 556 if ((fd = open(name + base, O_RDONLY)) == -1) { 557 errors = 1; 558 if (!sflag) /* Silent mode */ 559 (void) fprintf(stderr, gettext( 560 "%s: can't open \"%s\"\n"), 561 cmdname, name); 562 return; 563 } 564 matched |= grep(fd, name); 565 (void) close(fd); 566 567 if (ferror(stdout)) { 568 (void) fprintf(stderr, gettext( 569 "%s: error writing to stdout\n"), 570 cmdname); 571 (void) fflush(stdout); 572 exit(2); 573 } 574 575 } 576 577 /* 578 * Add a file of strings to the pattern list. 579 */ 580 static void 581 addfile(const char *fn) 582 { 583 FILE *fp; 584 char *inbuf; 585 char *bufp; 586 size_t bufsiz, buflen, bufused; 587 588 /* 589 * Open the pattern file 590 */ 591 if ((fp = fopen(fn, "r")) == NULL) { 592 (void) fprintf(stderr, gettext("%s: can't open \"%s\"\n"), 593 cmdname, fn); 594 exit(2); 595 } 596 bufsiz = BUFSIZE; 597 if ((inbuf = malloc(bufsiz)) == NULL) { 598 (void) fprintf(stderr, 599 gettext("%s: out of memory\n"), cmdname); 600 exit(2); 601 } 602 bufp = inbuf; 603 bufused = 0; 604 /* 605 * Read in the file, reallocing as we need more memory 606 */ 607 while (fgets(bufp, bufsiz - bufused, fp) != NULL) { 608 buflen = strlen(bufp); 609 bufused += buflen; 610 if (bufused + 1 == bufsiz && bufp[buflen - 1] != '\n') { 611 /* 612 * if this line does not fit to the buffer, 613 * realloc larger buffer 614 */ 615 bufsiz += BUFSIZE; 616 if ((inbuf = realloc(inbuf, bufsiz)) == NULL) { 617 (void) fprintf(stderr, 618 gettext("%s: out of memory\n"), 619 cmdname); 620 exit(2); 621 } 622 bufp = inbuf + bufused; 623 continue; 624 } 625 if (bufp[buflen - 1] == '\n') { 626 bufp[--buflen] = '\0'; 627 } 628 addpattern(inbuf); 629 630 bufp = inbuf; 631 bufused = 0; 632 } 633 free(inbuf); 634 free(prntbuf); 635 free(conbuf); 636 (void) fclose(fp); 637 } 638 639 /* 640 * Add a string to the pattern list. 641 */ 642 static void 643 addpattern(char *s) 644 { 645 PATTERN *pp; 646 char *wordbuf; 647 char *np; 648 649 for (; ; ) { 650 np = strchr(s, '\n'); 651 if (np != NULL) 652 *np = '\0'; 653 if ((pp = malloc(sizeof (PATTERN))) == NULL) { 654 (void) fprintf(stderr, gettext( 655 "%s: out of memory\n"), 656 cmdname); 657 exit(2); 658 } 659 if (wflag) { 660 /* 661 * Solaris wflag support: Add '<' '>' to pattern to 662 * select it as a word. Doesn't make sense with -F 663 * but we're Libertarian. 664 */ 665 size_t slen, wordlen; 666 667 slen = strlen(s); 668 wordlen = slen + 5; /* '\\' '<' s '\\' '>' '\0' */ 669 if ((wordbuf = malloc(wordlen)) == NULL) { 670 (void) fprintf(stderr, 671 gettext("%s: out of memory\n"), 672 cmdname); 673 exit(2); 674 } 675 (void) strcpy(wordbuf, "\\<"); 676 (void) strcpy(wordbuf + 2, s); 677 (void) strcpy(wordbuf + 2 + slen, "\\>"); 678 } else { 679 if ((wordbuf = strdup(s)) == NULL) { 680 (void) fprintf(stderr, 681 gettext("%s: out of memory\n"), 682 cmdname); 683 exit(2); 684 } 685 } 686 pp->pattern = wordbuf; 687 pp->next = patterns; 688 patterns = pp; 689 if (np == NULL) 690 break; 691 s = np + 1; 692 } 693 } 694 695 /* 696 * Fix patterns. 697 * Must do after all arguments read, in case later -i option. 698 */ 699 static void 700 fixpatterns(void) 701 { 702 PATTERN *pp; 703 int rv, fix_pattern, npatterns; 704 705 /* 706 * Fix the specified pattern if -x is specified. 707 */ 708 fix_pattern = !Fflag && xflag; 709 710 for (npatterns = 0, pp = patterns; pp != NULL; pp = pp->next) { 711 npatterns++; 712 if (fix_pattern) { 713 char *cp, *cq; 714 size_t plen, nplen; 715 716 plen = strlen(pp->pattern); 717 /* '^' pattern '$' */ 718 nplen = 1 + plen + 1 + 1; 719 if ((cp = malloc(nplen)) == NULL) { 720 (void) fprintf(stderr, 721 gettext("%s: out of memory\n"), 722 cmdname); 723 exit(2); 724 } 725 cq = cp; 726 *cq++ = '^'; 727 cq = strcpy(cq, pp->pattern) + plen; 728 *cq++ = '$'; 729 *cq = '\0'; 730 free(pp->pattern); 731 pp->pattern = cp; 732 } 733 734 if (Fflag) { 735 if (use_wchar) { 736 /* 737 * Fflag && mblocale && iflag 738 * Fflag && mblocale && !xflag 739 */ 740 size_t n; 741 n = strlen(pp->pattern) + 1; 742 if ((pp->wpattern = 743 malloc(sizeof (wchar_t) * n)) == NULL) { 744 (void) fprintf(stderr, 745 gettext("%s: out of memory\n"), 746 cmdname); 747 exit(2); 748 } 749 if (mbstowcs(pp->wpattern, pp->pattern, n) == 750 (size_t)-1) { 751 (void) fprintf(stderr, 752 gettext("%s: failed to convert " 753 "\"%s\" to wide-characters\n"), 754 cmdname, pp->pattern); 755 exit(2); 756 } 757 if (iflag) { 758 wchar_t *wp; 759 for (wp = pp->wpattern; *wp != L'\0'; 760 wp++) { 761 *wp = towlower((wint_t)*wp); 762 } 763 } 764 free(pp->pattern); 765 } else { 766 /* 767 * Fflag && mblocale && !iflag 768 * Fflag && !mblocale && iflag 769 * Fflag && !mblocale && !iflag 770 */ 771 if (iflag) { 772 unsigned char *cp; 773 for (cp = (unsigned char *)pp->pattern; 774 *cp != '\0'; cp++) { 775 *cp = tolower(*cp); 776 } 777 } 778 } 779 /* 780 * fgrep: No regular expressions. 781 */ 782 continue; 783 } 784 785 /* 786 * For non-fgrep, compile the regular expression, 787 * give an informative error message, and exit if 788 * it didn't compile. 789 */ 790 if ((rv = regcomp(&pp->re, pp->pattern, regflags)) != 0) { 791 (void) regerror(rv, &pp->re, errstr, sizeof (errstr)); 792 (void) fprintf(stderr, 793 gettext("%s: RE error in %s: %s\n"), 794 cmdname, pp->pattern, errstr); 795 exit(2); 796 } 797 free(pp->pattern); 798 } 799 800 /* 801 * Decide if we are able to run the Boyer-Moore-Gosper algorithm. 802 * Use the Boyer-Moore-Gosper algorithm if: 803 * - fgrep (Fflag) 804 * - singlebyte locale (!mblocale) 805 * - no ignoring case (!iflag) 806 * - no printing line numbers (!nflag) 807 * - no negating the output (nvflag) 808 * - only one pattern (npatterns == 1) 809 * - non zero length pattern (strlen(patterns->pattern) != 0) 810 * - no context required (conflag == 0) 811 * 812 * It's guaranteed patterns->pattern is still alive 813 * when Fflag && !mblocale. 814 */ 815 use_bmg = Fflag && !mblocale && !iflag && !nflag && nvflag && 816 (npatterns == 1) && (strlen(patterns->pattern) != 0) && 817 conflag == 0; 818 } 819 820 /* 821 * Search a newline from the beginning of the string 822 */ 823 static char * 824 find_nl(const char *ptr, size_t len) 825 { 826 while (len-- != 0) { 827 if (*ptr++ == '\n') { 828 return ((char *)--ptr); 829 } 830 } 831 return (NULL); 832 } 833 834 /* 835 * Search a newline from the end of the string 836 */ 837 static char * 838 rfind_nl(const char *ptr, size_t len) 839 { 840 const char *uptr = ptr + len; 841 while (len--) { 842 if (*--uptr == '\n') { 843 return ((char *)uptr); 844 } 845 } 846 return (NULL); 847 } 848 849 /* 850 * Duplicate the specified string converting each character 851 * into a lower case. 852 */ 853 static char * 854 istrdup(const char *s1) 855 { 856 static size_t ibuflen = 0; 857 static char *ibuf = NULL; 858 size_t slen; 859 char *p; 860 861 slen = strlen(s1); 862 if (slen >= ibuflen) { 863 /* ibuf does not fit to s1 */ 864 ibuflen = slen + 1; 865 ibuf = realloc(ibuf, ibuflen); 866 if (ibuf == NULL) { 867 (void) fprintf(stderr, 868 gettext("%s: out of memory\n"), cmdname); 869 exit(2); 870 } 871 } 872 p = ibuf; 873 do { 874 *p++ = tolower(*s1); 875 } while (*s1++ != '\0'); 876 return (ibuf); 877 } 878 879 /* 880 * Do grep on a single file. 881 * Return true in any lines matched. 882 * 883 * We have two strategies: 884 * The fast one is used when we have a single pattern with 885 * a string known to occur in the pattern. We can then 886 * do a BMG match on the whole buffer. 887 * This is an order of magnitude faster. 888 * Otherwise we split the buffer into lines, 889 * and check for a match on each line. 890 */ 891 static int 892 grep(int fd, const char *fn) 893 { 894 PATTERN *pp; 895 off_t data_len; /* length of the data chunk */ 896 off_t line_len; /* length of the current line */ 897 off_t line_offset; /* current line's offset from the beginning */ 898 off_t blkoffset; /* line_offset but context-compatible */ 899 long long lineno, linenum; 900 long long matches = 0; /* Number of matching lines */ 901 long long conacnt = 0, conbcnt = 0; /* context line count */ 902 int newlinep; /* 0 if the last line of file has no newline */ 903 char *ptr, *ptrend, *prntptr, *prntptrend; 904 char *nextptr = NULL, *nextend = NULL; 905 char *conptr = NULL, *conptrend = NULL; 906 char *matchptr = NULL; 907 int conaprnt = 0, conbprnt = 0, lastmatch = 0; 908 boolean_t nearmatch; /* w/in N+1 of last match */ 909 boolean_t havematch = B_FALSE; /* have a match in context */ 910 size_t prntlen; 911 912 if (patterns == NULL) 913 return (0); /* no patterns to match -- just return */ 914 915 pp = patterns; 916 917 if (use_bmg) { 918 bmgcomp(pp->pattern, strlen(pp->pattern)); 919 } 920 921 if (use_wchar && outline == NULL) { 922 outbuflen = BUFSIZE + 1; 923 outline = malloc(sizeof (wchar_t) * outbuflen); 924 if (outline == NULL) { 925 (void) fprintf(stderr, gettext("%s: out of memory\n"), 926 cmdname); 927 exit(2); 928 } 929 } 930 931 if (prntbuf == NULL) { 932 prntbuflen = BUFSIZE; 933 if ((prntbuf = malloc(prntbuflen + 1)) == NULL) { 934 (void) fprintf(stderr, gettext("%s: out of memory\n"), 935 cmdname); 936 exit(2); 937 } 938 } 939 940 if (conflag != 0 && (conbuf == NULL)) { 941 conbuflen = BUFSIZE; 942 if ((conbuf = malloc(BUFSIZE+1)) == NULL) { 943 (void) fprintf(stderr, gettext("%s: out of memory\n"), 944 cmdname); 945 exit(2); 946 } 947 } 948 949 nearmatch = (conmatches != 0); 950 blkoffset = line_offset = 0; 951 lineno = 0; 952 linenum = 1; 953 newlinep = 1; 954 data_len = 0; 955 for (; ; ) { 956 long count; 957 off_t offset = 0; 958 char separate; 959 boolean_t last_ctx = B_FALSE, eof = B_FALSE; 960 961 if (data_len == 0) { 962 /* 963 * If no data in the buffer, reset ptr 964 */ 965 ptr = prntbuf; 966 if (conflag != 0 && conptr == NULL) { 967 conptr = conbuf; 968 conptrend = conptr - 1; 969 } 970 } 971 if (ptr == prntbuf) { 972 /* 973 * The current data chunk starts from prntbuf. 974 * This means either the buffer has no data 975 * or the buffer has no newline. 976 * So, read more data from input. 977 */ 978 count = read(fd, ptr + data_len, prntbuflen - data_len); 979 if (count < 0) { 980 /* read error */ 981 if (cflag) { 982 if (outfn && !rflag) { 983 (void) fprintf(stdout, 984 "%s:", fn); 985 } 986 if (!qflag && !rflag) { 987 (void) fprintf(stdout, "%lld\n", 988 matches); 989 } 990 } 991 return (0); 992 } else if (count == 0) { 993 /* no new data */ 994 eof = B_TRUE; 995 996 if (data_len == 0) { 997 /* end of file already reached */ 998 if (conflag != 0) { 999 if (conptrend >= conptr) 1000 *conptrend = '\n'; 1001 last_ctx = B_TRUE; 1002 goto L_next_line; 1003 } else { 1004 goto out; 1005 } 1006 } 1007 /* last line of file has no newline */ 1008 ptrend = ptr + data_len; 1009 newlinep = 0; 1010 goto L_start_process; 1011 } 1012 offset = data_len; 1013 data_len += count; 1014 } 1015 1016 /* 1017 * Look for newline in the chunk 1018 * between ptr + offset and ptr + data_len - offset. 1019 */ 1020 ptrend = find_nl(ptr + offset, data_len - offset); 1021 if (ptrend == NULL) { 1022 /* no newline found in this chunk */ 1023 if (ptr > prntbuf) { 1024 /* 1025 * Move remaining data to the beginning 1026 * of the buffer. 1027 * Remaining data lie from ptr for 1028 * data_len bytes. 1029 */ 1030 (void) memmove(prntbuf, ptr, data_len); 1031 } 1032 if (data_len == prntbuflen) { 1033 /* 1034 * Not enough room in the buffer 1035 */ 1036 if (prntbuflen > SIZE_MAX - BUFSIZE) { 1037 (void) fprintf(stderr, 1038 gettext("%s: buflen would" 1039 " overflow\n"), 1040 cmdname); 1041 exit(2); 1042 } 1043 1044 prntbuflen += BUFSIZE; 1045 prntbuf = realloc(prntbuf, prntbuflen + 1); 1046 if (prntbuf == NULL) { 1047 (void) fprintf(stderr, 1048 gettext("%s: out of memory\n"), 1049 cmdname); 1050 exit(2); 1051 } 1052 } 1053 ptr = prntbuf; 1054 /* read the next input */ 1055 continue; 1056 } 1057 L_start_process: 1058 1059 /* 1060 * Beginning of the chunk: ptr 1061 * End of the chunk: ptr + data_len 1062 * Beginning of the line: ptr 1063 * End of the line: ptrend 1064 * 1065 * conptr: Beginning of the context. 1066 * conptrend: If context is empty, conptr - 1 (invalid memory). 1067 * Otherwise, Last newline in the context. 1068 */ 1069 1070 if (use_bmg) { 1071 /* 1072 * Use Boyer-Moore-Gosper algorithm to find out if 1073 * this chunk (not this line) contains the specified 1074 * pattern. If not, restart from the last line 1075 * of this chunk. 1076 */ 1077 char *bline; 1078 bline = bmgexec(ptr, ptr + data_len); 1079 if (bline == NULL) { 1080 /* 1081 * No pattern found in this chunk. 1082 * Need to find the last line 1083 * in this chunk. 1084 */ 1085 ptrend = rfind_nl(ptr, data_len); 1086 1087 /* 1088 * When this chunk does not contain newline, 1089 * ptrend becomes NULL, which should happen 1090 * when the last line of file does not end 1091 * with a newline. At such a point, 1092 * newlinep should have been set to 0. 1093 * Therefore, just after jumping to 1094 * L_skip_line, the main for-loop quits, 1095 * and the line_len value won't be 1096 * used. 1097 */ 1098 line_len = ptrend - ptr; 1099 goto L_skip_line; 1100 } 1101 if (bline > ptrend) { 1102 /* 1103 * Pattern found not in the first line 1104 * of this chunk. 1105 * Discard the first line. 1106 */ 1107 line_len = ptrend - ptr; 1108 goto L_skip_line; 1109 } 1110 /* 1111 * Pattern found in the first line of this chunk. 1112 * Using this result. 1113 */ 1114 *ptrend = '\0'; 1115 line_len = ptrend - ptr; 1116 1117 /* 1118 * before jumping to L_next_line, 1119 * need to handle xflag if specified 1120 */ 1121 if (xflag && (line_len != bmglen || 1122 strcmp(bmgpat, ptr) != 0)) { 1123 /* didn't match */ 1124 pp = NULL; 1125 } else { 1126 pp = patterns; /* to make it happen */ 1127 } 1128 goto L_next_line; 1129 } 1130 lineno++; 1131 /* 1132 * Line starts from ptr and ends at ptrend. 1133 * line_len will be the length of the line. 1134 */ 1135 *ptrend = '\0'; 1136 line_len = ptrend - ptr; 1137 1138 /* 1139 * From now, the process will be performed based 1140 * on the line from ptr to ptrend. 1141 */ 1142 if (use_wchar) { 1143 size_t len; 1144 1145 if (line_len >= outbuflen) { 1146 outbuflen = line_len + 1; 1147 outline = realloc(outline, 1148 sizeof (wchar_t) * outbuflen); 1149 if (outline == NULL) { 1150 (void) fprintf(stderr, 1151 gettext("%s: out of memory\n"), 1152 cmdname); 1153 exit(2); 1154 } 1155 } 1156 1157 len = mbstowcs(outline, ptr, line_len); 1158 if (len == (size_t)-1) { 1159 (void) fprintf(stderr, gettext( 1160 "%s: input file \"%s\": line %lld: invalid multibyte character\n"), 1161 cmdname, fn, lineno); 1162 /* never match a line with invalid sequence */ 1163 goto L_skip_line; 1164 } 1165 outline[len] = L'\0'; 1166 1167 if (iflag) { 1168 wchar_t *cp; 1169 for (cp = outline; *cp != '\0'; cp++) { 1170 *cp = towlower((wint_t)*cp); 1171 } 1172 } 1173 1174 if (xflag) { 1175 for (pp = patterns; pp; pp = pp->next) { 1176 if (outline[0] == pp->wpattern[0] && 1177 wcscmp(outline, 1178 pp->wpattern) == 0) { 1179 /* matched */ 1180 break; 1181 } 1182 } 1183 } else { 1184 for (pp = patterns; pp; pp = pp->next) { 1185 if (wcswcs(outline, pp->wpattern) 1186 != NULL) { 1187 /* matched */ 1188 break; 1189 } 1190 } 1191 } 1192 } else if (Fflag) { 1193 /* fgrep in byte-oriented handling */ 1194 char *fptr; 1195 if (iflag) { 1196 fptr = istrdup(ptr); 1197 } else { 1198 fptr = ptr; 1199 } 1200 if (xflag) { 1201 /* fgrep -x */ 1202 for (pp = patterns; pp; pp = pp->next) { 1203 if (fptr[0] == pp->pattern[0] && 1204 strcmp(fptr, pp->pattern) == 0) { 1205 /* matched */ 1206 break; 1207 } 1208 } 1209 } else { 1210 for (pp = patterns; pp; pp = pp->next) { 1211 if (strstr(fptr, pp->pattern) != NULL) { 1212 /* matched */ 1213 break; 1214 } 1215 } 1216 } 1217 } else { 1218 /* grep or egrep */ 1219 for (pp = patterns; pp; pp = pp->next) { 1220 int rv; 1221 1222 rv = regexec(&pp->re, ptr, 0, NULL, 0); 1223 if (rv == REG_OK) { 1224 /* matched */ 1225 break; 1226 } 1227 1228 switch (rv) { 1229 case REG_NOMATCH: 1230 break; 1231 case REG_ECHAR: 1232 (void) fprintf(stderr, gettext( 1233 "%s: input file \"%s\": line %lld: invalid multibyte character\n"), 1234 cmdname, fn, lineno); 1235 break; 1236 default: 1237 (void) regerror(rv, &pp->re, errstr, 1238 sizeof (errstr)); 1239 (void) fprintf(stderr, gettext( 1240 "%s: input file \"%s\": line %lld: %s\n"), 1241 cmdname, fn, lineno, errstr); 1242 exit(2); 1243 } 1244 } 1245 } 1246 1247 /* 1248 * Context is set up as follows: 1249 * For a 'Before' context, we maintain a set of pointers 1250 * containing 'N' lines of context. If the current number of 1251 * lines contained is greater than N, and N isn't a match, the 1252 * start pointer is moved forward to the next newline. 1253 * 1254 * If we ever find a match, we print out immediately. 1255 * 'nearmatch' tells us if we're within N+1 lines of the last 1256 * match ; if we are, and we find another match, we don't 1257 * separate the matches. 'nearmatch' becomes false when 1258 * a line gets rotated out of the context. 1259 * 1260 * For an 'After' context, we simply wait until we've found a 1261 * match, then create a context N+1 lines big. If we don't find 1262 * a match within the context, we print out the current context. 1263 * Otherwise, we save a reference to the new matching line, 1264 * print out the other context, and reset our context pointers 1265 * to the new matching line. 1266 * 1267 * 'nearmatch' becomes false when we find a non-matching line 1268 * that isn't a part of any context. 1269 * 1270 * A full-context is implemented as a combination of the 1271 * 'Before' and 'After' context logic. Before we find a match, 1272 * we follow the Before logic. When we find a match, we 1273 * follow the After logic. 'nearmatch' is handled by the Before 1274 * logic. 1275 */ 1276 1277 if (conflag == 0) 1278 goto L_next_line; 1279 1280 /* Do we have room to add this line to the context buffer? */ 1281 while ((line_len + 1) > (conbuflen - 1282 ((conptrend >= conptr) ? conptrend - conbuf : 0))) { 1283 char *oldconbuf = conbuf; 1284 char *oldconptr = conptr; 1285 long tmp = matchptr - conptr; 1286 1287 if (conbuflen > SIZE_MAX - BUFSIZE) { 1288 (void) fprintf(stderr, 1289 gettext("%s: buflen would overflow\n"), 1290 cmdname); 1291 exit(2); 1292 } 1293 1294 conbuflen += BUFSIZE; 1295 conbuf = realloc(conbuf, conbuflen + 1); 1296 if (conbuf == NULL) { 1297 (void) fprintf(stderr, 1298 gettext("%s: out of memory\n"), 1299 cmdname); 1300 exit(2); 1301 } 1302 1303 conptr = conbuf + (conptr - oldconbuf); 1304 conptrend = conptr + (conptrend - oldconptr); 1305 if (matchptr) 1306 matchptr = conptr + tmp; 1307 } 1308 (void) memcpy(conptrend + 1, ptr, line_len); 1309 conptrend += line_len + 1; 1310 *conptrend = '\n'; 1311 1312 if (nvflag == (pp != NULL)) { 1313 /* matched */ 1314 if (havematch) { 1315 if ((conflag & AFTER) != 0) { 1316 conaprnt = 1; 1317 nextend = conptrend; 1318 conptrend = conptr + lastmatch; 1319 nextptr = conptrend + 1; 1320 *nextend = '\n'; 1321 } 1322 } else { 1323 if (conflag == AFTER) { 1324 conptr = conptrend - (line_len); 1325 linenum = lineno; 1326 } 1327 blkoffset = line_offset - 1328 (conptrend - conptr - line_len); 1329 } 1330 1331 if (conflag == BEFORE) 1332 conbprnt = 1; 1333 1334 lastmatch = conptrend - conptr; 1335 havematch = B_TRUE; 1336 goto L_next_line; 1337 } 1338 1339 if (!havematch) { 1340 if ((conflag & BEFORE) != 0) { 1341 if (conbcnt >= conblen) { 1342 char *tmp = conptr; 1343 conptr = find_nl(conptr, 1344 conptrend - conptr) + 1; 1345 if (bflag) 1346 blkoffset += conptr - tmp; 1347 linenum++; 1348 nearmatch = B_TRUE; 1349 } else { 1350 conbcnt++; 1351 } 1352 } 1353 if (conflag == AFTER) 1354 nearmatch = B_TRUE; 1355 } else { 1356 if (++conacnt >= conalen && !conaprnt && conalen) 1357 conaprnt = 1; 1358 else 1359 lastmatch = conptrend - conptr; 1360 } 1361 1362 L_next_line: 1363 /* 1364 * Here, if pp points to non-NULL, something has been matched 1365 * to the pattern. 1366 */ 1367 if (!last_ctx && nvflag == (pp != NULL)) { 1368 matches++; 1369 if (!nextend) 1370 matchptr = (conflag != 0) ? conptrend : ptrend; 1371 } 1372 1373 /* 1374 * Set up some print context so that we can treat 1375 * single-line matches as a zero-N context. 1376 * Apply CLI flags to each line of the context. 1377 * 1378 * For context, we only print if we both have a match and are 1379 * either at the end of the data stream, or we've previously 1380 * declared that we want to print for a particular context. 1381 */ 1382 if (havematch && (eof || conaprnt || conbprnt)) { 1383 1384 /* 1385 * We'd normally do this earlier, but we had to 1386 * escape early because we reached the end of the data. 1387 */ 1388 if (eof && nextptr) 1389 conptrend = nextend; 1390 1391 prntlen = conptrend - conptr + 1; 1392 prntptr = conptr; 1393 if (conmatches++ && nearmatch && !cflag) 1394 (void) fwrite("--\n", 1, 3, stdout); 1395 } else if (conflag == 0 && nvflag == (pp != NULL)) { 1396 *ptrend = '\n'; 1397 prntlen = line_len + 1; 1398 prntptr = ptr; 1399 linenum = lineno; 1400 blkoffset = line_offset; 1401 } else if (eof) { 1402 /* No match and no more data */ 1403 goto out; 1404 } else { 1405 /* No match, or we're not done building context */ 1406 goto L_skip_line; 1407 } 1408 1409 prntptrend = prntptr - 1; 1410 while ((prntptrend = find_nl(prntptrend + 1, 1411 prntlen)) != NULL) { 1412 1413 /* 1414 * GNU grep uses '-' for context lines and ':' for 1415 * matching lines, so replicate that here. 1416 */ 1417 if (prntptrend == matchptr) { 1418 if (eof && nextptr) { 1419 matchptr = nextend; 1420 nextptr = NULL; 1421 } else { 1422 matchptr = NULL; 1423 } 1424 separate = ':'; 1425 } else { 1426 separate = '-'; 1427 } 1428 1429 /* 1430 * Handle q, l, and c flags. 1431 */ 1432 if (qflag) { 1433 /* no need to continue */ 1434 /* 1435 * End of this line is ptrend. 1436 * We have read up to ptr + data_len. 1437 */ 1438 off_t pos; 1439 pos = ptr + data_len - (ptrend + 1); 1440 (void) lseek(fd, -pos, SEEK_CUR); 1441 exit(0); 1442 } 1443 if (lflag) { 1444 (void) printf("%s\n", fn); 1445 goto out; 1446 } 1447 if (!cflag) { 1448 if (Hflag || outfn) { 1449 (void) printf("%s%c", fn, separate); 1450 } 1451 if (bflag) { 1452 (void) printf("%lld%c", (offset_t) 1453 (blkoffset / BSIZE), separate); 1454 } 1455 if (nflag) { 1456 (void) printf("%lld%c", linenum, 1457 separate); 1458 } 1459 (void) fwrite(prntptr, 1, 1460 prntptrend - prntptr + 1, stdout); 1461 } 1462 if (ferror(stdout)) { 1463 return (0); 1464 } 1465 linenum++; 1466 prntlen -= prntptrend - prntptr + 1; 1467 blkoffset += prntptrend - prntptr + 1; 1468 prntptr = prntptrend + 1; 1469 } 1470 1471 if (eof) 1472 goto out; 1473 1474 /* 1475 * Update context buffer and variables post-print 1476 */ 1477 if (conflag != 0) { 1478 conptr = conbuf; 1479 conaprnt = conbprnt = 0; 1480 nearmatch = B_FALSE; 1481 conacnt = conbcnt = 0; 1482 1483 if (nextptr) { 1484 (void) memmove(conbuf, nextptr, 1485 nextend - nextptr + 1); 1486 blkoffset += nextptr - conptrend - 1; 1487 conptrend = conptr + (nextend - nextptr); 1488 matchptr = conptrend; 1489 linenum = lineno; 1490 lastmatch = conptrend - conptr; 1491 havematch = B_TRUE; 1492 } else { 1493 conptrend = conptr - 1; 1494 conacnt = 0; 1495 lastmatch = 0; 1496 havematch = B_FALSE; 1497 } 1498 nextptr = nextend = NULL; 1499 } 1500 1501 L_skip_line: 1502 if (!newlinep) 1503 break; 1504 1505 data_len -= line_len + 1; 1506 line_offset += line_len + 1; 1507 ptr = ptrend + 1; 1508 } 1509 1510 out: 1511 if (cflag) { 1512 if (Hflag || outfn) { 1513 (void) printf("%s:", fn); 1514 } 1515 if (!qflag) { 1516 (void) printf("%lld\n", matches); 1517 } 1518 } 1519 return (matches != 0); 1520 } 1521 1522 /* 1523 * usage message for grep 1524 */ 1525 static void 1526 usage(void) 1527 { 1528 (void) fprintf(stderr, gettext("usage: %5s"), cmdname); 1529 if (!egrep && !fgrep) 1530 (void) fprintf(stderr, gettext(" [-E|-F]")); 1531 (void) fprintf(stderr, gettext(" [-bchHilnqrRsvx] [-A num] [-B num] " 1532 "[-C num|-num]\n [-e pattern_list]... " 1533 "[-f pattern_file]... [pattern_list] [file]...\n")); 1534 exit(2); 1535 /* NOTREACHED */ 1536 } 1537 1538 /* 1539 * Compile literal pattern into BMG tables 1540 */ 1541 static void 1542 bmgcomp(char *pat, int len) 1543 { 1544 int i; 1545 int tlen; 1546 unsigned char *uc = (unsigned char *)pat; 1547 1548 bmglen = len; 1549 bmgpat = pat; 1550 1551 for (i = 0; i < M_CSETSIZE; i++) { 1552 bmgtab[i] = len; 1553 } 1554 1555 len--; 1556 for (tlen = len, i = 0; i <= len; i++, tlen--) { 1557 bmgtab[*uc++] = tlen; 1558 } 1559 } 1560 1561 /* 1562 * BMG search. 1563 */ 1564 static char * 1565 bmgexec(char *str, char *end) 1566 { 1567 int t; 1568 char *k, *s, *p; 1569 1570 k = str + bmglen - 1; 1571 if (bmglen == 1) { 1572 return (memchr(str, bmgpat[0], end - str)); 1573 } 1574 for (; ; ) { 1575 /* inner loop, should be most optimized */ 1576 while (k < end && (t = bmgtab[(unsigned char)*k]) != 0) { 1577 k += t; 1578 } 1579 if (k >= end) { 1580 return (NULL); 1581 } 1582 for (s = k, p = bmgpat + bmglen - 1; *--s == *--p; ) { 1583 if (p == bmgpat) { 1584 return (s); 1585 } 1586 } 1587 k++; 1588 } 1589 /* NOTREACHED */ 1590 } 1591