1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * grep - pattern matching program - combined grep, egrep, and fgrep. 29 * Based on MKS grep command, with XCU & Solaris mods. 30 */ 31 32 /* 33 * Copyright 1985, 1992 by Mortice Kern Systems Inc. All rights reserved. 34 * 35 */ 36 37 /* 38 * Copyright 2018 Nexenta Systems, Inc. 39 * Copyright 2013 Damian Bogel. All rights reserved. 40 */ 41 42 #include <string.h> 43 #include <stdlib.h> 44 #include <ctype.h> 45 #include <stdarg.h> 46 #include <regex.h> 47 #include <limits.h> 48 #include <sys/types.h> 49 #include <sys/stat.h> 50 #include <fcntl.h> 51 #include <stdio.h> 52 #include <locale.h> 53 #include <wchar.h> 54 #include <errno.h> 55 #include <unistd.h> 56 #include <wctype.h> 57 #include <ftw.h> 58 #include <sys/param.h> 59 60 #define STDIN_FILENAME gettext("(standard input)") 61 62 #define BSIZE 512 /* Size of block for -b */ 63 #define BUFSIZE 8192 /* Input buffer size */ 64 #define MAX_DEPTH 1000 /* how deep to recurse */ 65 66 #define AFTER 1 /* 'After' Context */ 67 #define BEFORE 2 /* 'Before' Context */ 68 #define CONTEXT (AFTER|BEFORE) /* Full Context */ 69 70 #define M_CSETSIZE 256 /* singlebyte chars */ 71 static int bmglen; /* length of BMG pattern */ 72 static char *bmgpat; /* BMG pattern */ 73 static int bmgtab[M_CSETSIZE]; /* BMG delta1 table */ 74 75 typedef struct _PATTERN { 76 char *pattern; /* original pattern */ 77 wchar_t *wpattern; /* wide, lowercased pattern */ 78 struct _PATTERN *next; 79 regex_t re; /* compiled pattern */ 80 } PATTERN; 81 82 static PATTERN *patterns; 83 static char errstr[128]; /* regerror string buffer */ 84 static int regflags = 0; /* regcomp options */ 85 static int matched = 0; /* return of the grep() */ 86 static int errors = 0; /* count of errors */ 87 static uchar_t fgrep = 0; /* Invoked as fgrep */ 88 static uchar_t egrep = 0; /* Invoked as egrep */ 89 static boolean_t nvflag = B_TRUE; /* Print matching lines */ 90 static uchar_t cflag; /* Count of matches */ 91 static uchar_t iflag; /* Case insensitve matching */ 92 static uchar_t Hflag; /* Precede lines by file name */ 93 static uchar_t hflag; /* Supress printing of filename */ 94 static uchar_t lflag; /* Print file names of matches */ 95 static uchar_t nflag; /* Precede lines by line number */ 96 static uchar_t rflag; /* Search directories recursively */ 97 static uchar_t bflag; /* Preccede matches by block number */ 98 static uchar_t sflag; /* Suppress file error messages */ 99 static uchar_t qflag; /* Suppress standard output */ 100 static uchar_t wflag; /* Search for expression as a word */ 101 static uchar_t xflag; /* Anchoring */ 102 static uchar_t Eflag; /* Egrep or -E flag */ 103 static uchar_t Fflag; /* Fgrep or -F flag */ 104 static uchar_t Rflag; /* Like rflag, but follow symlinks */ 105 static uchar_t outfn; /* Put out file name */ 106 static uchar_t conflag; /* show context of matches */ 107 static char *cmdname; 108 109 static int use_wchar, use_bmg, mblocale; 110 111 static size_t outbuflen, prntbuflen, conbuflen; 112 static unsigned long conalen, conblen, conmatches; 113 static char *prntbuf, *conbuf; 114 static wchar_t *outline; 115 116 static void addfile(const char *fn); 117 static void addpattern(char *s); 118 static void fixpatterns(void); 119 static void usage(void); 120 static int grep(int, const char *); 121 static void bmgcomp(char *, int); 122 static char *bmgexec(char *, char *); 123 static int recursive(const char *, const struct stat *, int, struct FTW *); 124 static void process_path(const char *); 125 static void process_file(const char *, int); 126 127 /* 128 * mainline for grep 129 */ 130 int 131 main(int argc, char **argv) 132 { 133 char *ap, *test; 134 int c; 135 int fflag = 0; 136 int i, n_pattern = 0, n_file = 0; 137 char **pattern_list = NULL; 138 char **file_list = NULL; 139 140 (void) setlocale(LC_ALL, ""); 141 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */ 142 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't */ 143 #endif 144 (void) textdomain(TEXT_DOMAIN); 145 146 /* 147 * true if this is running on the multibyte locale 148 */ 149 mblocale = (MB_CUR_MAX > 1); 150 /* 151 * Skip leading slashes 152 */ 153 cmdname = argv[0]; 154 if (ap = strrchr(cmdname, '/')) 155 cmdname = ap + 1; 156 157 ap = cmdname; 158 /* 159 * Detect egrep/fgrep via command name, map to -E and -F options. 160 */ 161 if (*ap == 'e' || *ap == 'E') { 162 regflags |= REG_EXTENDED; 163 egrep++; 164 } else { 165 if (*ap == 'f' || *ap == 'F') { 166 fgrep++; 167 } 168 } 169 170 /* check for non-standard "-line-count" option */ 171 for (i = 1; i < argc; i++) { 172 if (strcmp(argv[i], "--") == 0) 173 break; 174 175 /* isdigit() check prevents negative arguments */ 176 if ((argv[i][0] == '-') && isdigit(argv[i][1])) { 177 if (strlen(&argv[i][1]) != 178 strspn(&argv[i][1], "0123456789")) { 179 (void) fprintf(stderr, gettext( 180 "%s: Bad number flag\n"), argv[0]); 181 usage(); 182 } 183 184 errno = 0; 185 conalen = conblen = strtoul(&argv[i][1], (char **)NULL, 186 10); 187 188 if (errno != 0 || conalen >= ULONG_MAX) { 189 (void) fprintf(stderr, gettext( 190 "%s: Bad context argument\n"), argv[0]); 191 } else if (conalen) 192 conflag = CONTEXT; 193 194 while (i < argc) { 195 argv[i] = argv[i + 1]; 196 i++; 197 } 198 argc--; 199 } 200 } 201 202 while ((c = getopt(argc, argv, "vwchHilnrbse:f:qxEFIRA:B:C:")) != EOF) { 203 unsigned long tval; 204 switch (c) { 205 case 'v': /* POSIX: negate matches */ 206 nvflag = B_FALSE; 207 break; 208 209 case 'c': /* POSIX: write count */ 210 cflag++; 211 break; 212 213 case 'i': /* POSIX: ignore case */ 214 iflag++; 215 regflags |= REG_ICASE; 216 break; 217 218 case 'l': /* POSIX: Write filenames only */ 219 lflag++; 220 break; 221 222 case 'n': /* POSIX: Write line numbers */ 223 nflag++; 224 break; 225 226 case 'r': /* Solaris: search recursively */ 227 rflag++; 228 break; 229 230 case 'b': /* Solaris: Write file block numbers */ 231 bflag++; 232 break; 233 234 case 's': /* POSIX: No error msgs for files */ 235 sflag++; 236 break; 237 238 case 'e': /* POSIX: pattern list */ 239 n_pattern++; 240 pattern_list = realloc(pattern_list, 241 sizeof (char *) * n_pattern); 242 if (pattern_list == NULL) { 243 (void) fprintf(stderr, 244 gettext("%s: out of memory\n"), 245 cmdname); 246 exit(2); 247 } 248 *(pattern_list + n_pattern - 1) = optarg; 249 break; 250 251 case 'f': /* POSIX: pattern file */ 252 fflag = 1; 253 n_file++; 254 file_list = realloc(file_list, 255 sizeof (char *) * n_file); 256 if (file_list == NULL) { 257 (void) fprintf(stderr, 258 gettext("%s: out of memory\n"), 259 cmdname); 260 exit(2); 261 } 262 *(file_list + n_file - 1) = optarg; 263 break; 264 265 /* based on options order h or H is set as in GNU grep */ 266 case 'h': /* Solaris: supress printing of file name */ 267 hflag = 1; 268 Hflag = 0; 269 break; 270 /* Solaris: precede every matching with file name */ 271 case 'H': 272 Hflag = 1; 273 hflag = 0; 274 break; 275 276 case 'q': /* POSIX: quiet: status only */ 277 qflag++; 278 break; 279 280 case 'w': /* Solaris: treat pattern as word */ 281 wflag++; 282 break; 283 284 case 'x': /* POSIX: full line matches */ 285 xflag++; 286 break; 287 288 case 'E': /* POSIX: Extended RE's */ 289 regflags |= REG_EXTENDED; 290 Eflag++; 291 break; 292 293 case 'F': /* POSIX: strings, not RE's */ 294 Fflag++; 295 break; 296 297 case 'R': /* Solaris: like rflag, but follow symlinks */ 298 Rflag++; 299 rflag++; 300 break; 301 302 case 'A': /* print N lines after each match */ 303 errno = 0; 304 conalen = strtoul(optarg, &test, 10); 305 /* *test will be non-null if optarg is negative */ 306 if (errno != 0 || *test != '\0' || 307 conalen >= ULONG_MAX) { 308 (void) fprintf(stderr, gettext( 309 "%s: Bad context argument: %s\n"), 310 argv[0], optarg); 311 exit(2); 312 } 313 if (conalen) 314 conflag |= AFTER; 315 else 316 conflag &= ~AFTER; 317 break; 318 case 'B': /* print N lines before each match */ 319 errno = 0; 320 conblen = strtoul(optarg, &test, 10); 321 /* *test will be non-null if optarg is negative */ 322 if (errno != 0 || *test != '\0' || 323 conblen >= ULONG_MAX) { 324 (void) fprintf(stderr, gettext( 325 "%s: Bad context argument: %s\n"), 326 argv[0], optarg); 327 exit(2); 328 } 329 if (conblen) 330 conflag |= BEFORE; 331 else 332 conflag &= ~BEFORE; 333 break; 334 case 'C': /* print N lines around each match */ 335 errno = 0; 336 tval = strtoul(optarg, &test, 10); 337 /* *test will be non-null if optarg is negative */ 338 if (errno != 0 || *test != '\0' || tval >= ULONG_MAX) { 339 (void) fprintf(stderr, gettext( 340 "%s: Bad context argument: %s\n"), 341 argv[0], optarg); 342 exit(2); 343 } 344 if (tval) { 345 if ((conflag & BEFORE) == 0) 346 conblen = tval; 347 if ((conflag & AFTER) == 0) 348 conalen = tval; 349 conflag = CONTEXT; 350 } 351 break; 352 353 default: 354 usage(); 355 } 356 } 357 /* 358 * If we're invoked as egrep or fgrep we need to do some checks 359 */ 360 361 if (egrep || fgrep) { 362 /* 363 * Use of -E or -F with egrep or fgrep is illegal 364 */ 365 if (Eflag || Fflag) 366 usage(); 367 /* 368 * Don't allow use of wflag with egrep / fgrep 369 */ 370 if (wflag) 371 usage(); 372 /* 373 * For Solaris the -s flag is equivalent to XCU -q 374 */ 375 if (sflag) 376 qflag++; 377 /* 378 * done with above checks - set the appropriate flags 379 */ 380 if (egrep) 381 Eflag++; 382 else /* Else fgrep */ 383 Fflag++; 384 } 385 386 if (wflag && (Eflag || Fflag)) { 387 /* 388 * -w cannot be specified with grep -F 389 */ 390 usage(); 391 } 392 393 /* 394 * -E and -F flags are mutually exclusive - check for this 395 */ 396 if (Eflag && Fflag) 397 usage(); 398 399 /* 400 * -l overrides -H like in GNU grep 401 */ 402 if (lflag) 403 Hflag = 0; 404 405 /* 406 * -c, -l and -q flags are mutually exclusive 407 * We have -c override -l like in Solaris. 408 * -q overrides -l & -c programmatically in grep() function. 409 */ 410 if (cflag && lflag) 411 lflag = 0; 412 413 argv += optind - 1; 414 argc -= optind - 1; 415 416 /* 417 * Now handling -e and -f option 418 */ 419 if (pattern_list) { 420 for (i = 0; i < n_pattern; i++) { 421 addpattern(pattern_list[i]); 422 } 423 free(pattern_list); 424 } 425 if (file_list) { 426 for (i = 0; i < n_file; i++) { 427 addfile(file_list[i]); 428 } 429 free(file_list); 430 } 431 432 /* 433 * No -e or -f? Make sure there is one more arg, use it as the pattern. 434 */ 435 if (patterns == NULL && !fflag) { 436 if (argc < 2) 437 usage(); 438 addpattern(argv[1]); 439 argc--; 440 argv++; 441 } 442 443 /* 444 * If -x flag is not specified or -i flag is specified 445 * with fgrep in a multibyte locale, need to use 446 * the wide character APIs. Otherwise, byte-oriented 447 * process will be done. 448 */ 449 use_wchar = Fflag && mblocale && (!xflag || iflag); 450 451 /* 452 * Compile Patterns and also decide if BMG can be used 453 */ 454 fixpatterns(); 455 456 /* Process all files: stdin, or rest of arg list */ 457 if (argc < 2) { 458 matched = grep(0, STDIN_FILENAME); 459 } else { 460 if (Hflag || (argc > 2 && hflag == 0)) 461 outfn = 1; /* Print filename on match line */ 462 for (argv++; *argv != NULL; argv++) { 463 process_path(*argv); 464 } 465 } 466 /* 467 * Return() here is used instead of exit 468 */ 469 470 (void) fflush(stdout); 471 472 if (errors) 473 return (2); 474 return (matched ? 0 : 1); 475 } 476 477 static void 478 process_path(const char *path) 479 { 480 struct stat st; 481 int walkflags = FTW_CHDIR; 482 char *buf = NULL; 483 484 if (rflag) { 485 if (stat(path, &st) != -1 && 486 (st.st_mode & S_IFMT) == S_IFDIR) { 487 outfn = 1; /* Print filename */ 488 489 /* 490 * Add trailing slash if arg 491 * is directory, to resolve symlinks. 492 */ 493 if (path[strlen(path) - 1] != '/') { 494 (void) asprintf(&buf, "%s/", path); 495 if (buf != NULL) 496 path = buf; 497 } 498 499 /* 500 * Search through subdirs if path is directory. 501 * Don't follow symlinks if Rflag is not set. 502 */ 503 if (!Rflag) 504 walkflags |= FTW_PHYS; 505 506 if (nftw(path, recursive, MAX_DEPTH, walkflags) != 0) { 507 if (!sflag) 508 (void) fprintf(stderr, 509 gettext("%s: can't open \"%s\"\n"), 510 cmdname, path); 511 errors = 1; 512 } 513 return; 514 } 515 } 516 process_file(path, 0); 517 } 518 519 /* 520 * Read and process all files in directory recursively. 521 */ 522 static int 523 recursive(const char *name, const struct stat *statp, int info, struct FTW *ftw) 524 { 525 /* 526 * Process files and follow symlinks if Rflag set. 527 */ 528 if (info != FTW_F) { 529 /* Report broken symlinks and unreadable files */ 530 if (!sflag && 531 (info == FTW_SLN || info == FTW_DNR || info == FTW_NS)) { 532 (void) fprintf(stderr, 533 gettext("%s: can't open \"%s\"\n"), cmdname, name); 534 } 535 return (0); 536 } 537 538 539 /* Skip devices and pipes if Rflag is not set */ 540 if (!Rflag && !S_ISREG(statp->st_mode)) 541 return (0); 542 /* Pass offset to relative name from FTW_CHDIR */ 543 process_file(name, ftw->base); 544 return (0); 545 } 546 547 /* 548 * Opens file and call grep function. 549 */ 550 static void 551 process_file(const char *name, int base) 552 { 553 int fd; 554 555 if ((fd = open(name + base, O_RDONLY)) == -1) { 556 errors = 1; 557 if (!sflag) /* Silent mode */ 558 (void) fprintf(stderr, gettext( 559 "%s: can't open \"%s\"\n"), 560 cmdname, name); 561 return; 562 } 563 matched |= grep(fd, name); 564 (void) close(fd); 565 566 if (ferror(stdout)) { 567 (void) fprintf(stderr, gettext( 568 "%s: error writing to stdout\n"), 569 cmdname); 570 (void) fflush(stdout); 571 exit(2); 572 } 573 574 } 575 576 /* 577 * Add a file of strings to the pattern list. 578 */ 579 static void 580 addfile(const char *fn) 581 { 582 FILE *fp; 583 char *inbuf; 584 char *bufp; 585 size_t bufsiz, buflen, bufused; 586 587 /* 588 * Open the pattern file 589 */ 590 if ((fp = fopen(fn, "r")) == NULL) { 591 (void) fprintf(stderr, gettext("%s: can't open \"%s\"\n"), 592 cmdname, fn); 593 exit(2); 594 } 595 bufsiz = BUFSIZE; 596 if ((inbuf = malloc(bufsiz)) == NULL) { 597 (void) fprintf(stderr, 598 gettext("%s: out of memory\n"), cmdname); 599 exit(2); 600 } 601 bufp = inbuf; 602 bufused = 0; 603 /* 604 * Read in the file, reallocing as we need more memory 605 */ 606 while (fgets(bufp, bufsiz - bufused, fp) != NULL) { 607 buflen = strlen(bufp); 608 bufused += buflen; 609 if (bufused + 1 == bufsiz && bufp[buflen - 1] != '\n') { 610 /* 611 * if this line does not fit to the buffer, 612 * realloc larger buffer 613 */ 614 bufsiz += BUFSIZE; 615 if ((inbuf = realloc(inbuf, bufsiz)) == NULL) { 616 (void) fprintf(stderr, 617 gettext("%s: out of memory\n"), 618 cmdname); 619 exit(2); 620 } 621 bufp = inbuf + bufused; 622 continue; 623 } 624 if (bufp[buflen - 1] == '\n') { 625 bufp[--buflen] = '\0'; 626 } 627 addpattern(inbuf); 628 629 bufp = inbuf; 630 bufused = 0; 631 } 632 free(inbuf); 633 free(prntbuf); 634 free(conbuf); 635 (void) fclose(fp); 636 } 637 638 /* 639 * Add a string to the pattern list. 640 */ 641 static void 642 addpattern(char *s) 643 { 644 PATTERN *pp; 645 char *wordbuf; 646 char *np; 647 648 for (; ; ) { 649 np = strchr(s, '\n'); 650 if (np != NULL) 651 *np = '\0'; 652 if ((pp = malloc(sizeof (PATTERN))) == NULL) { 653 (void) fprintf(stderr, gettext( 654 "%s: out of memory\n"), 655 cmdname); 656 exit(2); 657 } 658 if (wflag) { 659 /* 660 * Solaris wflag support: Add '<' '>' to pattern to 661 * select it as a word. Doesn't make sense with -F 662 * but we're Libertarian. 663 */ 664 size_t slen, wordlen; 665 666 slen = strlen(s); 667 wordlen = slen + 5; /* '\\' '<' s '\\' '>' '\0' */ 668 if ((wordbuf = malloc(wordlen)) == NULL) { 669 (void) fprintf(stderr, 670 gettext("%s: out of memory\n"), 671 cmdname); 672 exit(2); 673 } 674 (void) strcpy(wordbuf, "\\<"); 675 (void) strcpy(wordbuf + 2, s); 676 (void) strcpy(wordbuf + 2 + slen, "\\>"); 677 } else { 678 if ((wordbuf = strdup(s)) == NULL) { 679 (void) fprintf(stderr, 680 gettext("%s: out of memory\n"), 681 cmdname); 682 exit(2); 683 } 684 } 685 pp->pattern = wordbuf; 686 pp->next = patterns; 687 patterns = pp; 688 if (np == NULL) 689 break; 690 s = np + 1; 691 } 692 } 693 694 /* 695 * Fix patterns. 696 * Must do after all arguments read, in case later -i option. 697 */ 698 static void 699 fixpatterns(void) 700 { 701 PATTERN *pp; 702 int rv, fix_pattern, npatterns; 703 704 /* 705 * Fix the specified pattern if -x is specified. 706 */ 707 fix_pattern = !Fflag && xflag; 708 709 for (npatterns = 0, pp = patterns; pp != NULL; pp = pp->next) { 710 npatterns++; 711 if (fix_pattern) { 712 char *cp, *cq; 713 size_t plen, nplen; 714 715 plen = strlen(pp->pattern); 716 /* '^' pattern '$' */ 717 nplen = 1 + plen + 1 + 1; 718 if ((cp = malloc(nplen)) == NULL) { 719 (void) fprintf(stderr, 720 gettext("%s: out of memory\n"), 721 cmdname); 722 exit(2); 723 } 724 cq = cp; 725 *cq++ = '^'; 726 cq = strcpy(cq, pp->pattern) + plen; 727 *cq++ = '$'; 728 *cq = '\0'; 729 free(pp->pattern); 730 pp->pattern = cp; 731 } 732 733 if (Fflag) { 734 if (use_wchar) { 735 /* 736 * Fflag && mblocale && iflag 737 * Fflag && mblocale && !xflag 738 */ 739 size_t n; 740 n = strlen(pp->pattern) + 1; 741 if ((pp->wpattern = 742 malloc(sizeof (wchar_t) * n)) == NULL) { 743 (void) fprintf(stderr, 744 gettext("%s: out of memory\n"), 745 cmdname); 746 exit(2); 747 } 748 if (mbstowcs(pp->wpattern, pp->pattern, n) == 749 (size_t)-1) { 750 (void) fprintf(stderr, 751 gettext("%s: failed to convert " 752 "\"%s\" to wide-characters\n"), 753 cmdname, pp->pattern); 754 exit(2); 755 } 756 if (iflag) { 757 wchar_t *wp; 758 for (wp = pp->wpattern; *wp != L'\0'; 759 wp++) { 760 *wp = towlower((wint_t)*wp); 761 } 762 } 763 free(pp->pattern); 764 } else { 765 /* 766 * Fflag && mblocale && !iflag 767 * Fflag && !mblocale && iflag 768 * Fflag && !mblocale && !iflag 769 */ 770 if (iflag) { 771 unsigned char *cp; 772 for (cp = (unsigned char *)pp->pattern; 773 *cp != '\0'; cp++) { 774 *cp = tolower(*cp); 775 } 776 } 777 } 778 /* 779 * fgrep: No regular expressions. 780 */ 781 continue; 782 } 783 784 /* 785 * For non-fgrep, compile the regular expression, 786 * give an informative error message, and exit if 787 * it didn't compile. 788 */ 789 if ((rv = regcomp(&pp->re, pp->pattern, regflags)) != 0) { 790 (void) regerror(rv, &pp->re, errstr, sizeof (errstr)); 791 (void) fprintf(stderr, 792 gettext("%s: RE error in %s: %s\n"), 793 cmdname, pp->pattern, errstr); 794 exit(2); 795 } 796 free(pp->pattern); 797 } 798 799 /* 800 * Decide if we are able to run the Boyer-Moore-Gosper algorithm. 801 * Use the Boyer-Moore-Gosper algorithm if: 802 * - fgrep (Fflag) 803 * - singlebyte locale (!mblocale) 804 * - no ignoring case (!iflag) 805 * - no printing line numbers (!nflag) 806 * - no negating the output (nvflag) 807 * - only one pattern (npatterns == 1) 808 * - non zero length pattern (strlen(patterns->pattern) != 0) 809 * - no context required (conflag == 0) 810 * 811 * It's guaranteed patterns->pattern is still alive 812 * when Fflag && !mblocale. 813 */ 814 use_bmg = Fflag && !mblocale && !iflag && !nflag && nvflag && 815 (npatterns == 1) && (strlen(patterns->pattern) != 0) && 816 conflag == 0; 817 } 818 819 /* 820 * Search a newline from the beginning of the string 821 */ 822 static char * 823 find_nl(const char *ptr, size_t len) 824 { 825 while (len-- != 0) { 826 if (*ptr++ == '\n') { 827 return ((char *)--ptr); 828 } 829 } 830 return (NULL); 831 } 832 833 /* 834 * Search a newline from the end of the string 835 */ 836 static char * 837 rfind_nl(const char *ptr, size_t len) 838 { 839 const char *uptr = ptr + len; 840 while (len--) { 841 if (*--uptr == '\n') { 842 return ((char *)uptr); 843 } 844 } 845 return (NULL); 846 } 847 848 /* 849 * Duplicate the specified string converting each character 850 * into a lower case. 851 */ 852 static char * 853 istrdup(const char *s1) 854 { 855 static size_t ibuflen = 0; 856 static char *ibuf = NULL; 857 size_t slen; 858 char *p; 859 860 slen = strlen(s1); 861 if (slen >= ibuflen) { 862 /* ibuf does not fit to s1 */ 863 ibuflen = slen + 1; 864 ibuf = realloc(ibuf, ibuflen); 865 if (ibuf == NULL) { 866 (void) fprintf(stderr, 867 gettext("%s: out of memory\n"), cmdname); 868 exit(2); 869 } 870 } 871 p = ibuf; 872 do { 873 *p++ = tolower(*s1); 874 } while (*s1++ != '\0'); 875 return (ibuf); 876 } 877 878 /* 879 * Do grep on a single file. 880 * Return true in any lines matched. 881 * 882 * We have two strategies: 883 * The fast one is used when we have a single pattern with 884 * a string known to occur in the pattern. We can then 885 * do a BMG match on the whole buffer. 886 * This is an order of magnitude faster. 887 * Otherwise we split the buffer into lines, 888 * and check for a match on each line. 889 */ 890 static int 891 grep(int fd, const char *fn) 892 { 893 PATTERN *pp; 894 off_t data_len; /* length of the data chunk */ 895 off_t line_len; /* length of the current line */ 896 off_t line_offset; /* current line's offset from the beginning */ 897 off_t blkoffset; /* line_offset but context-compatible */ 898 long long lineno, linenum; 899 long long matches = 0; /* Number of matching lines */ 900 long long conacnt = 0, conbcnt = 0; /* context line count */ 901 int newlinep; /* 0 if the last line of file has no newline */ 902 char *ptr, *ptrend, *prntptr, *prntptrend; 903 char *nextptr = NULL, *nextend = NULL; 904 char *conptr = NULL, *conptrend = NULL; 905 char *matchptr = NULL; 906 int conaprnt = 0, conbprnt = 0, lastmatch = 0; 907 boolean_t nearmatch; /* w/in N+1 of last match */ 908 boolean_t havematch = B_FALSE; /* have a match in context */ 909 size_t prntlen; 910 911 if (patterns == NULL) 912 return (0); /* no patterns to match -- just return */ 913 914 pp = patterns; 915 916 if (use_bmg) { 917 bmgcomp(pp->pattern, strlen(pp->pattern)); 918 } 919 920 if (use_wchar && outline == NULL) { 921 outbuflen = BUFSIZE + 1; 922 outline = malloc(sizeof (wchar_t) * outbuflen); 923 if (outline == NULL) { 924 (void) fprintf(stderr, gettext("%s: out of memory\n"), 925 cmdname); 926 exit(2); 927 } 928 } 929 930 if (prntbuf == NULL) { 931 prntbuflen = BUFSIZE; 932 if ((prntbuf = malloc(prntbuflen + 1)) == NULL) { 933 (void) fprintf(stderr, gettext("%s: out of memory\n"), 934 cmdname); 935 exit(2); 936 } 937 } 938 939 if (conflag != 0 && (conbuf == NULL)) { 940 conbuflen = BUFSIZE; 941 if ((conbuf = malloc(BUFSIZE+1)) == NULL) { 942 (void) fprintf(stderr, gettext("%s: out of memory\n"), 943 cmdname); 944 exit(2); 945 } 946 } 947 948 nearmatch = (conmatches != 0); 949 blkoffset = line_offset = 0; 950 lineno = 0; 951 linenum = 1; 952 newlinep = 1; 953 data_len = 0; 954 for (; ; ) { 955 long count; 956 off_t offset = 0; 957 char separate; 958 boolean_t last_ctx = B_FALSE, eof = B_FALSE; 959 960 if (data_len == 0) { 961 /* 962 * If no data in the buffer, reset ptr 963 */ 964 ptr = prntbuf; 965 if (conflag != 0 && conptr == NULL) { 966 conptr = conbuf; 967 conptrend = conptr - 1; 968 } 969 } 970 if (ptr == prntbuf) { 971 /* 972 * The current data chunk starts from prntbuf. 973 * This means either the buffer has no data 974 * or the buffer has no newline. 975 * So, read more data from input. 976 */ 977 count = read(fd, ptr + data_len, prntbuflen - data_len); 978 if (count < 0) { 979 /* read error */ 980 if (cflag) { 981 if (outfn && !rflag) { 982 (void) fprintf(stdout, 983 "%s:", fn); 984 } 985 if (!qflag && !rflag) { 986 (void) fprintf(stdout, "%lld\n", 987 matches); 988 } 989 } 990 return (0); 991 } else if (count == 0) { 992 /* no new data */ 993 eof = B_TRUE; 994 995 if (data_len == 0) { 996 /* end of file already reached */ 997 if (conflag != 0) { 998 if (conptrend >= conptr) 999 *conptrend = '\n'; 1000 last_ctx = B_TRUE; 1001 goto L_next_line; 1002 } else { 1003 goto out; 1004 } 1005 } 1006 /* last line of file has no newline */ 1007 ptrend = ptr + data_len; 1008 newlinep = 0; 1009 goto L_start_process; 1010 } 1011 offset = data_len; 1012 data_len += count; 1013 } 1014 1015 /* 1016 * Look for newline in the chunk 1017 * between ptr + offset and ptr + data_len - offset. 1018 */ 1019 ptrend = find_nl(ptr + offset, data_len - offset); 1020 if (ptrend == NULL) { 1021 /* no newline found in this chunk */ 1022 if (ptr > prntbuf) { 1023 /* 1024 * Move remaining data to the beginning 1025 * of the buffer. 1026 * Remaining data lie from ptr for 1027 * data_len bytes. 1028 */ 1029 (void) memmove(prntbuf, ptr, data_len); 1030 } 1031 if (data_len == prntbuflen) { 1032 /* 1033 * Not enough room in the buffer 1034 */ 1035 if (prntbuflen > SIZE_MAX - BUFSIZE) { 1036 (void) fprintf(stderr, 1037 gettext("%s: buflen would" 1038 " overflow\n"), 1039 cmdname); 1040 exit(2); 1041 } 1042 1043 prntbuflen += BUFSIZE; 1044 prntbuf = realloc(prntbuf, prntbuflen + 1); 1045 if (prntbuf == NULL) { 1046 (void) fprintf(stderr, 1047 gettext("%s: out of memory\n"), 1048 cmdname); 1049 exit(2); 1050 } 1051 } 1052 ptr = prntbuf; 1053 /* read the next input */ 1054 continue; 1055 } 1056 L_start_process: 1057 1058 /* 1059 * Beginning of the chunk: ptr 1060 * End of the chunk: ptr + data_len 1061 * Beginning of the line: ptr 1062 * End of the line: ptrend 1063 * 1064 * conptr: Beginning of the context. 1065 * conptrend: If context is empty, conptr - 1 (invalid memory). 1066 * Otherwise, Last newline in the context. 1067 */ 1068 1069 if (use_bmg) { 1070 /* 1071 * Use Boyer-Moore-Gosper algorithm to find out if 1072 * this chunk (not this line) contains the specified 1073 * pattern. If not, restart from the last line 1074 * of this chunk. 1075 */ 1076 char *bline; 1077 bline = bmgexec(ptr, ptr + data_len); 1078 if (bline == NULL) { 1079 /* 1080 * No pattern found in this chunk. 1081 * Need to find the last line 1082 * in this chunk. 1083 */ 1084 ptrend = rfind_nl(ptr, data_len); 1085 1086 /* 1087 * When this chunk does not contain newline, 1088 * ptrend becomes NULL, which should happen 1089 * when the last line of file does not end 1090 * with a newline. At such a point, 1091 * newlinep should have been set to 0. 1092 * Therefore, just after jumping to 1093 * L_skip_line, the main for-loop quits, 1094 * and the line_len value won't be 1095 * used. 1096 */ 1097 line_len = ptrend - ptr; 1098 goto L_skip_line; 1099 } 1100 if (bline > ptrend) { 1101 /* 1102 * Pattern found not in the first line 1103 * of this chunk. 1104 * Discard the first line. 1105 */ 1106 line_len = ptrend - ptr; 1107 goto L_skip_line; 1108 } 1109 /* 1110 * Pattern found in the first line of this chunk. 1111 * Using this result. 1112 */ 1113 *ptrend = '\0'; 1114 line_len = ptrend - ptr; 1115 1116 /* 1117 * before jumping to L_next_line, 1118 * need to handle xflag if specified 1119 */ 1120 if (xflag && (line_len != bmglen || 1121 strcmp(bmgpat, ptr) != 0)) { 1122 /* didn't match */ 1123 pp = NULL; 1124 } else { 1125 pp = patterns; /* to make it happen */ 1126 } 1127 goto L_next_line; 1128 } 1129 lineno++; 1130 /* 1131 * Line starts from ptr and ends at ptrend. 1132 * line_len will be the length of the line. 1133 */ 1134 *ptrend = '\0'; 1135 line_len = ptrend - ptr; 1136 1137 /* 1138 * From now, the process will be performed based 1139 * on the line from ptr to ptrend. 1140 */ 1141 if (use_wchar) { 1142 size_t len; 1143 1144 if (line_len >= outbuflen) { 1145 outbuflen = line_len + 1; 1146 outline = realloc(outline, 1147 sizeof (wchar_t) * outbuflen); 1148 if (outline == NULL) { 1149 (void) fprintf(stderr, 1150 gettext("%s: out of memory\n"), 1151 cmdname); 1152 exit(2); 1153 } 1154 } 1155 1156 len = mbstowcs(outline, ptr, line_len); 1157 if (len == (size_t)-1) { 1158 (void) fprintf(stderr, gettext( 1159 "%s: input file \"%s\": line %lld: invalid multibyte character\n"), 1160 cmdname, fn, lineno); 1161 /* never match a line with invalid sequence */ 1162 goto L_skip_line; 1163 } 1164 outline[len] = L'\0'; 1165 1166 if (iflag) { 1167 wchar_t *cp; 1168 for (cp = outline; *cp != '\0'; cp++) { 1169 *cp = towlower((wint_t)*cp); 1170 } 1171 } 1172 1173 if (xflag) { 1174 for (pp = patterns; pp; pp = pp->next) { 1175 if (outline[0] == pp->wpattern[0] && 1176 wcscmp(outline, 1177 pp->wpattern) == 0) { 1178 /* matched */ 1179 break; 1180 } 1181 } 1182 } else { 1183 for (pp = patterns; pp; pp = pp->next) { 1184 if (wcswcs(outline, pp->wpattern) 1185 != NULL) { 1186 /* matched */ 1187 break; 1188 } 1189 } 1190 } 1191 } else if (Fflag) { 1192 /* fgrep in byte-oriented handling */ 1193 char *fptr; 1194 if (iflag) { 1195 fptr = istrdup(ptr); 1196 } else { 1197 fptr = ptr; 1198 } 1199 if (xflag) { 1200 /* fgrep -x */ 1201 for (pp = patterns; pp; pp = pp->next) { 1202 if (fptr[0] == pp->pattern[0] && 1203 strcmp(fptr, pp->pattern) == 0) { 1204 /* matched */ 1205 break; 1206 } 1207 } 1208 } else { 1209 for (pp = patterns; pp; pp = pp->next) { 1210 if (strstr(fptr, pp->pattern) != NULL) { 1211 /* matched */ 1212 break; 1213 } 1214 } 1215 } 1216 } else { 1217 /* grep or egrep */ 1218 for (pp = patterns; pp; pp = pp->next) { 1219 int rv; 1220 1221 rv = regexec(&pp->re, ptr, 0, NULL, 0); 1222 if (rv == REG_OK) { 1223 /* matched */ 1224 break; 1225 } 1226 1227 switch (rv) { 1228 case REG_NOMATCH: 1229 break; 1230 case REG_ECHAR: 1231 (void) fprintf(stderr, gettext( 1232 "%s: input file \"%s\": line %lld: invalid multibyte character\n"), 1233 cmdname, fn, lineno); 1234 break; 1235 default: 1236 (void) regerror(rv, &pp->re, errstr, 1237 sizeof (errstr)); 1238 (void) fprintf(stderr, gettext( 1239 "%s: input file \"%s\": line %lld: %s\n"), 1240 cmdname, fn, lineno, errstr); 1241 exit(2); 1242 } 1243 } 1244 } 1245 1246 /* 1247 * Context is set up as follows: 1248 * For a 'Before' context, we maintain a set of pointers 1249 * containing 'N' lines of context. If the current number of 1250 * lines contained is greater than N, and N isn't a match, the 1251 * start pointer is moved forward to the next newline. 1252 * 1253 * If we ever find a match, we print out immediately. 1254 * 'nearmatch' tells us if we're within N+1 lines of the last 1255 * match ; if we are, and we find another match, we don't 1256 * separate the matches. 'nearmatch' becomes false when 1257 * a line gets rotated out of the context. 1258 * 1259 * For an 'After' context, we simply wait until we've found a 1260 * match, then create a context N+1 lines big. If we don't find 1261 * a match within the context, we print out the current context. 1262 * Otherwise, we save a reference to the new matching line, 1263 * print out the other context, and reset our context pointers 1264 * to the new matching line. 1265 * 1266 * 'nearmatch' becomes false when we find a non-matching line 1267 * that isn't a part of any context. 1268 * 1269 * A full-context is implemented as a combination of the 1270 * 'Before' and 'After' context logic. Before we find a match, 1271 * we follow the Before logic. When we find a match, we 1272 * follow the After logic. 'nearmatch' is handled by the Before 1273 * logic. 1274 */ 1275 1276 if (conflag == 0) 1277 goto L_next_line; 1278 1279 /* Do we have room to add this line to the context buffer? */ 1280 if ((line_len + 1) > (conbuflen - 1281 (conptrend >= conptr) ? conptrend - conbuf : 0)) { 1282 char *oldconbuf = conbuf; 1283 char *oldconptr = conptr; 1284 long tmp = matchptr - conptr; 1285 1286 if (conbuflen > SIZE_MAX - BUFSIZE) { 1287 (void) fprintf(stderr, 1288 gettext("%s: buflen would overflow\n"), 1289 cmdname); 1290 exit(2); 1291 } 1292 1293 conbuflen += BUFSIZE; 1294 conbuf = realloc(conbuf, conbuflen + 1); 1295 if (conbuf == NULL) { 1296 (void) fprintf(stderr, 1297 gettext("%s: out of memory\n"), 1298 cmdname); 1299 exit(2); 1300 } 1301 1302 conptr = conbuf + (conptr - oldconbuf); 1303 conptrend = conptr + (conptrend - oldconptr); 1304 if (matchptr) 1305 matchptr = conptr + tmp; 1306 } 1307 (void) memcpy(conptrend + 1, ptr, line_len); 1308 conptrend += line_len + 1; 1309 *conptrend = '\n'; 1310 1311 if (nvflag == (pp != NULL)) { 1312 /* matched */ 1313 if (havematch) { 1314 if ((conflag & AFTER) != 0) { 1315 conaprnt = 1; 1316 nextend = conptrend; 1317 conptrend = conptr + lastmatch; 1318 nextptr = conptrend + 1; 1319 *nextend = '\n'; 1320 } 1321 } else { 1322 if (conflag == AFTER) { 1323 conptr = conptrend - (line_len); 1324 linenum = lineno; 1325 } 1326 blkoffset = line_offset - 1327 (conptrend - conptr - line_len); 1328 } 1329 1330 if (conflag == BEFORE) 1331 conbprnt = 1; 1332 1333 lastmatch = conptrend - conptr; 1334 havematch = B_TRUE; 1335 goto L_next_line; 1336 } 1337 1338 if (!havematch) { 1339 if ((conflag & BEFORE) != 0) { 1340 if (conbcnt >= conblen) { 1341 char *tmp = conptr; 1342 conptr = find_nl(conptr, 1343 conptrend - conptr) + 1; 1344 if (bflag) 1345 blkoffset += conptr - tmp; 1346 linenum++; 1347 nearmatch = B_TRUE; 1348 } else { 1349 conbcnt++; 1350 } 1351 } 1352 if (conflag == AFTER) 1353 nearmatch = B_TRUE; 1354 } else { 1355 if (++conacnt >= conalen && !conaprnt && conalen) 1356 conaprnt = 1; 1357 else 1358 lastmatch = conptrend - conptr; 1359 } 1360 1361 L_next_line: 1362 /* 1363 * Here, if pp points to non-NULL, something has been matched 1364 * to the pattern. 1365 */ 1366 if (!last_ctx && nvflag == (pp != NULL)) { 1367 matches++; 1368 if (!nextend) 1369 matchptr = (conflag != 0) ? conptrend : ptrend; 1370 } 1371 1372 /* 1373 * Set up some print context so that we can treat 1374 * single-line matches as a zero-N context. 1375 * Apply CLI flags to each line of the context. 1376 * 1377 * For context, we only print if we both have a match and are 1378 * either at the end of the data stream, or we've previously 1379 * declared that we want to print for a particular context. 1380 */ 1381 if (havematch && (eof || conaprnt || conbprnt)) { 1382 1383 /* 1384 * We'd normally do this earlier, but we had to 1385 * escape early because we reached the end of the data. 1386 */ 1387 if (eof && nextptr) 1388 conptrend = nextend; 1389 1390 prntlen = conptrend - conptr + 1; 1391 prntptr = conptr; 1392 if (conmatches++ && nearmatch && !cflag) 1393 (void) fwrite("--\n", 1, 3, stdout); 1394 } else if (conflag == 0 && nvflag == (pp != NULL)) { 1395 *ptrend = '\n'; 1396 prntlen = line_len + 1; 1397 prntptr = ptr; 1398 linenum = lineno; 1399 blkoffset = line_offset; 1400 } else if (eof) { 1401 /* No match and no more data */ 1402 goto out; 1403 } else { 1404 /* No match, or we're not done building context */ 1405 goto L_skip_line; 1406 } 1407 1408 prntptrend = prntptr - 1; 1409 while ((prntptrend = find_nl(prntptrend + 1, 1410 prntlen)) != NULL) { 1411 1412 /* 1413 * GNU grep uses '-' for context lines and ':' for 1414 * matching lines, so replicate that here. 1415 */ 1416 if (prntptrend == matchptr) { 1417 if (eof && nextptr) { 1418 matchptr = nextend; 1419 nextptr = NULL; 1420 } else { 1421 matchptr = NULL; 1422 } 1423 separate = ':'; 1424 } else { 1425 separate = '-'; 1426 } 1427 1428 /* 1429 * Handle q, l, and c flags. 1430 */ 1431 if (qflag) { 1432 /* no need to continue */ 1433 /* 1434 * End of this line is ptrend. 1435 * We have read up to ptr + data_len. 1436 */ 1437 off_t pos; 1438 pos = ptr + data_len - (ptrend + 1); 1439 (void) lseek(fd, -pos, SEEK_CUR); 1440 exit(0); 1441 } 1442 if (lflag) { 1443 (void) printf("%s\n", fn); 1444 goto out; 1445 } 1446 if (!cflag) { 1447 if (Hflag || outfn) { 1448 (void) printf("%s%c", fn, separate); 1449 } 1450 if (bflag) { 1451 (void) printf("%lld%c", (offset_t) 1452 (blkoffset / BSIZE), separate); 1453 } 1454 if (nflag) { 1455 (void) printf("%lld%c", linenum, 1456 separate); 1457 } 1458 (void) fwrite(prntptr, 1, 1459 prntptrend - prntptr + 1, stdout); 1460 } 1461 if (ferror(stdout)) { 1462 return (0); 1463 } 1464 linenum++; 1465 prntlen -= prntptrend - prntptr + 1; 1466 blkoffset += prntptrend - prntptr + 1; 1467 prntptr = prntptrend + 1; 1468 } 1469 1470 if (eof) 1471 goto out; 1472 1473 /* 1474 * Update context buffer and variables post-print 1475 */ 1476 if (conflag != 0) { 1477 conptr = conbuf; 1478 conaprnt = conbprnt = 0; 1479 nearmatch = B_FALSE; 1480 conacnt = conbcnt = 0; 1481 1482 if (nextptr) { 1483 (void) memmove(conbuf, nextptr, 1484 nextend - nextptr + 1); 1485 blkoffset += nextptr - conptrend - 1; 1486 conptrend = conptr + (nextend - nextptr); 1487 matchptr = conptrend; 1488 linenum = lineno; 1489 lastmatch = conptrend - conptr; 1490 havematch = B_TRUE; 1491 } else { 1492 conptrend = conptr - 1; 1493 conacnt = 0; 1494 lastmatch = 0; 1495 havematch = B_FALSE; 1496 } 1497 nextptr = nextend = NULL; 1498 } 1499 1500 L_skip_line: 1501 if (!newlinep) 1502 break; 1503 1504 data_len -= line_len + 1; 1505 line_offset += line_len + 1; 1506 ptr = ptrend + 1; 1507 } 1508 1509 out: 1510 if (cflag) { 1511 if (Hflag || outfn) { 1512 (void) printf("%s:", fn); 1513 } 1514 if (!qflag) { 1515 (void) printf("%lld\n", matches); 1516 } 1517 } 1518 return (matches != 0); 1519 } 1520 1521 /* 1522 * usage message for grep 1523 */ 1524 static void 1525 usage(void) 1526 { 1527 (void) fprintf(stderr, gettext("usage: %5s"), cmdname); 1528 if (!egrep && !fgrep) 1529 (void) fprintf(stderr, gettext(" [-E|-F]")); 1530 (void) fprintf(stderr, gettext(" [-bchHilnqrRsvx] [-A num] [-B num] " 1531 "[-C num|-num]\n [-e pattern_list]... " 1532 "[-f pattern_file]... [pattern_list] [file]...\n")); 1533 exit(2); 1534 /* NOTREACHED */ 1535 } 1536 1537 /* 1538 * Compile literal pattern into BMG tables 1539 */ 1540 static void 1541 bmgcomp(char *pat, int len) 1542 { 1543 int i; 1544 int tlen; 1545 unsigned char *uc = (unsigned char *)pat; 1546 1547 bmglen = len; 1548 bmgpat = pat; 1549 1550 for (i = 0; i < M_CSETSIZE; i++) { 1551 bmgtab[i] = len; 1552 } 1553 1554 len--; 1555 for (tlen = len, i = 0; i <= len; i++, tlen--) { 1556 bmgtab[*uc++] = tlen; 1557 } 1558 } 1559 1560 /* 1561 * BMG search. 1562 */ 1563 static char * 1564 bmgexec(char *str, char *end) 1565 { 1566 int t; 1567 char *k, *s, *p; 1568 1569 k = str + bmglen - 1; 1570 if (bmglen == 1) { 1571 return (memchr(str, bmgpat[0], end - str)); 1572 } 1573 for (; ; ) { 1574 /* inner loop, should be most optimized */ 1575 while (k < end && (t = bmgtab[(unsigned char)*k]) != 0) { 1576 k += t; 1577 } 1578 if (k >= end) { 1579 return (NULL); 1580 } 1581 for (s = k, p = bmgpat + bmglen - 1; *--s == *--p; ) { 1582 if (p == bmgpat) { 1583 return (s); 1584 } 1585 } 1586 k++; 1587 } 1588 /* NOTREACHED */ 1589 } 1590