1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * grep - pattern matching program - combined grep, egrep, and fgrep. 29 * Based on MKS grep command, with XCU & Solaris mods. 30 */ 31 32 /* 33 * Copyright 1985, 1992 by Mortice Kern Systems Inc. All rights reserved. 34 * 35 */ 36 37 /* Copyright 2012 Nexenta Systems, Inc. All rights reserved. */ 38 39 #include <string.h> 40 #include <stdlib.h> 41 #include <ctype.h> 42 #include <stdarg.h> 43 #include <regex.h> 44 #include <limits.h> 45 #include <sys/types.h> 46 #include <sys/stat.h> 47 #include <fcntl.h> 48 #include <stdio.h> 49 #include <locale.h> 50 #include <wchar.h> 51 #include <errno.h> 52 #include <unistd.h> 53 #include <wctype.h> 54 #include <ftw.h> 55 #include <sys/param.h> 56 57 #define BSIZE 512 /* Size of block for -b */ 58 #define BUFSIZE 8192 /* Input buffer size */ 59 #define MAX_DEPTH 1000 /* how deep to recurse */ 60 61 #define M_CSETSIZE 256 /* singlebyte chars */ 62 static int bmglen; /* length of BMG pattern */ 63 static char *bmgpat; /* BMG pattern */ 64 static int bmgtab[M_CSETSIZE]; /* BMG delta1 table */ 65 66 typedef struct _PATTERN { 67 char *pattern; /* original pattern */ 68 wchar_t *wpattern; /* wide, lowercased pattern */ 69 struct _PATTERN *next; 70 regex_t re; /* compiled pattern */ 71 } PATTERN; 72 73 static PATTERN *patterns; 74 static char errstr[128]; /* regerror string buffer */ 75 static int regflags = 0; /* regcomp options */ 76 static int matched = 0; /* return of the grep() */ 77 static int errors = 0; /* count of errors */ 78 static uchar_t fgrep = 0; /* Invoked as fgrep */ 79 static uchar_t egrep = 0; /* Invoked as egrep */ 80 static uchar_t nvflag = 1; /* Print matching lines */ 81 static uchar_t cflag; /* Count of matches */ 82 static uchar_t iflag; /* Case insensitve matching */ 83 static uchar_t hflag; /* Supress printing of filename */ 84 static uchar_t lflag; /* Print file names of matches */ 85 static uchar_t nflag; /* Precede lines by line number */ 86 static uchar_t rflag; /* Search directories recursively */ 87 static uchar_t bflag; /* Preccede matches by block number */ 88 static uchar_t sflag; /* Suppress file error messages */ 89 static uchar_t qflag; /* Suppress standard output */ 90 static uchar_t wflag; /* Search for expression as a word */ 91 static uchar_t xflag; /* Anchoring */ 92 static uchar_t Eflag; /* Egrep or -E flag */ 93 static uchar_t Fflag; /* Fgrep or -F flag */ 94 static uchar_t Rflag; /* Like rflag, but follow symlinks */ 95 static uchar_t outfn; /* Put out file name */ 96 static char *cmdname; 97 98 static int use_wchar, use_bmg, mblocale; 99 100 static size_t outbuflen, prntbuflen; 101 static char *prntbuf; 102 static wchar_t *outline; 103 104 static void addfile(const char *fn); 105 static void addpattern(char *s); 106 static void fixpatterns(void); 107 static void usage(void); 108 static int grep(int, const char *); 109 static void bmgcomp(char *, int); 110 static char *bmgexec(char *, char *); 111 static int recursive(const char *, const struct stat *, int, struct FTW *); 112 static void process_path(const char *); 113 static void process_file(const char *, int); 114 115 /* 116 * mainline for grep 117 */ 118 int 119 main(int argc, char **argv) 120 { 121 char *ap; 122 int c; 123 int fflag = 0; 124 int i, n_pattern = 0, n_file = 0; 125 char **pattern_list = NULL; 126 char **file_list = NULL; 127 128 (void) setlocale(LC_ALL, ""); 129 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */ 130 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't */ 131 #endif 132 (void) textdomain(TEXT_DOMAIN); 133 134 /* 135 * true if this is running on the multibyte locale 136 */ 137 mblocale = (MB_CUR_MAX > 1); 138 /* 139 * Skip leading slashes 140 */ 141 cmdname = argv[0]; 142 if (ap = strrchr(cmdname, '/')) 143 cmdname = ap + 1; 144 145 ap = cmdname; 146 /* 147 * Detect egrep/fgrep via command name, map to -E and -F options. 148 */ 149 if (*ap == 'e' || *ap == 'E') { 150 regflags |= REG_EXTENDED; 151 egrep++; 152 } else { 153 if (*ap == 'f' || *ap == 'F') { 154 fgrep++; 155 } 156 } 157 158 while ((c = getopt(argc, argv, "vwchilnrbse:f:qxEFIR")) != EOF) { 159 switch (c) { 160 case 'v': /* POSIX: negate matches */ 161 nvflag = 0; 162 break; 163 164 case 'c': /* POSIX: write count */ 165 cflag++; 166 break; 167 168 case 'i': /* POSIX: ignore case */ 169 iflag++; 170 regflags |= REG_ICASE; 171 break; 172 173 case 'l': /* POSIX: Write filenames only */ 174 lflag++; 175 break; 176 177 case 'n': /* POSIX: Write line numbers */ 178 nflag++; 179 break; 180 181 case 'r': /* Solaris: search recursively */ 182 rflag++; 183 break; 184 185 case 'b': /* Solaris: Write file block numbers */ 186 bflag++; 187 break; 188 189 case 's': /* POSIX: No error msgs for files */ 190 sflag++; 191 break; 192 193 case 'e': /* POSIX: pattern list */ 194 n_pattern++; 195 pattern_list = realloc(pattern_list, 196 sizeof (char *) * n_pattern); 197 if (pattern_list == NULL) { 198 (void) fprintf(stderr, 199 gettext("%s: out of memory\n"), 200 cmdname); 201 exit(2); 202 } 203 *(pattern_list + n_pattern - 1) = optarg; 204 break; 205 206 case 'f': /* POSIX: pattern file */ 207 fflag = 1; 208 n_file++; 209 file_list = realloc(file_list, 210 sizeof (char *) * n_file); 211 if (file_list == NULL) { 212 (void) fprintf(stderr, 213 gettext("%s: out of memory\n"), 214 cmdname); 215 exit(2); 216 } 217 *(file_list + n_file - 1) = optarg; 218 break; 219 case 'h': /* Solaris: supress printing of file name */ 220 hflag = 1; 221 break; 222 223 case 'q': /* POSIX: quiet: status only */ 224 qflag++; 225 break; 226 227 case 'w': /* Solaris: treat pattern as word */ 228 wflag++; 229 break; 230 231 case 'x': /* POSIX: full line matches */ 232 xflag++; 233 regflags |= REG_ANCHOR; 234 break; 235 236 case 'E': /* POSIX: Extended RE's */ 237 regflags |= REG_EXTENDED; 238 Eflag++; 239 break; 240 241 case 'F': /* POSIX: strings, not RE's */ 242 Fflag++; 243 break; 244 245 case 'R': /* Solaris: like rflag, but follow symlinks */ 246 Rflag++; 247 rflag++; 248 break; 249 250 default: 251 usage(); 252 } 253 } 254 /* 255 * If we're invoked as egrep or fgrep we need to do some checks 256 */ 257 258 if (egrep || fgrep) { 259 /* 260 * Use of -E or -F with egrep or fgrep is illegal 261 */ 262 if (Eflag || Fflag) 263 usage(); 264 /* 265 * Don't allow use of wflag with egrep / fgrep 266 */ 267 if (wflag) 268 usage(); 269 /* 270 * For Solaris the -s flag is equivalent to XCU -q 271 */ 272 if (sflag) 273 qflag++; 274 /* 275 * done with above checks - set the appropriate flags 276 */ 277 if (egrep) 278 Eflag++; 279 else /* Else fgrep */ 280 Fflag++; 281 } 282 283 if (wflag && (Eflag || Fflag)) { 284 /* 285 * -w cannot be specified with grep -F 286 */ 287 usage(); 288 } 289 290 /* 291 * -E and -F flags are mutually exclusive - check for this 292 */ 293 if (Eflag && Fflag) 294 usage(); 295 296 /* 297 * -c, -l and -q flags are mutually exclusive 298 * We have -c override -l like in Solaris. 299 * -q overrides -l & -c programmatically in grep() function. 300 */ 301 if (cflag && lflag) 302 lflag = 0; 303 304 argv += optind - 1; 305 argc -= optind - 1; 306 307 /* 308 * Now handling -e and -f option 309 */ 310 if (pattern_list) { 311 for (i = 0; i < n_pattern; i++) { 312 addpattern(pattern_list[i]); 313 } 314 free(pattern_list); 315 } 316 if (file_list) { 317 for (i = 0; i < n_file; i++) { 318 addfile(file_list[i]); 319 } 320 free(file_list); 321 } 322 323 /* 324 * No -e or -f? Make sure there is one more arg, use it as the pattern. 325 */ 326 if (patterns == NULL && !fflag) { 327 if (argc < 2) 328 usage(); 329 addpattern(argv[1]); 330 argc--; 331 argv++; 332 } 333 334 /* 335 * If -x flag is not specified or -i flag is specified 336 * with fgrep in a multibyte locale, need to use 337 * the wide character APIs. Otherwise, byte-oriented 338 * process will be done. 339 */ 340 use_wchar = Fflag && mblocale && (!xflag || iflag); 341 342 /* 343 * Compile Patterns and also decide if BMG can be used 344 */ 345 fixpatterns(); 346 347 /* Process all files: stdin, or rest of arg list */ 348 if (argc < 2) { 349 matched = grep(0, gettext("(standard input)")); 350 } else { 351 if (argc > 2 && hflag == 0) 352 outfn = 1; /* Print filename on match line */ 353 for (argv++; *argv != NULL; argv++) { 354 process_path(*argv); 355 } 356 } 357 /* 358 * Return() here is used instead of exit 359 */ 360 361 (void) fflush(stdout); 362 363 if (errors) 364 return (2); 365 return (matched ? 0 : 1); 366 } 367 368 static void 369 process_path(const char *path) 370 { 371 struct stat st; 372 int walkflags = FTW_CHDIR; 373 char *buf = NULL; 374 375 if (rflag) { 376 if (stat(path, &st) != -1 && 377 (st.st_mode & S_IFMT) == S_IFDIR) { 378 outfn = 1; /* Print filename */ 379 380 /* 381 * Add trailing slash if arg 382 * is directory, to resolve symlinks. 383 */ 384 if (path[strlen(path) - 1] != '/') { 385 (void) asprintf(&buf, "%s/", path); 386 if (buf != NULL) 387 path = buf; 388 } 389 390 /* 391 * Search through subdirs if path is directory. 392 * Don't follow symlinks if Rflag is not set. 393 */ 394 if (!Rflag) 395 walkflags |= FTW_PHYS; 396 397 if (nftw(path, recursive, MAX_DEPTH, walkflags) != 0) { 398 if (!sflag) 399 (void) fprintf(stderr, 400 gettext("%s: can't open \"%s\"\n"), 401 cmdname, path); 402 errors = 1; 403 } 404 return; 405 } 406 } 407 process_file(path, 0); 408 } 409 410 /* 411 * Read and process all files in directory recursively. 412 */ 413 static int 414 recursive(const char *name, const struct stat *statp, int info, struct FTW *ftw) 415 { 416 /* 417 * Process files and follow symlinks if Rflag set. 418 */ 419 if (info != FTW_F) { 420 /* Report broken symlinks and unreadable files */ 421 if (!sflag && 422 (info == FTW_SLN || info == FTW_DNR || info == FTW_NS)) { 423 (void) fprintf(stderr, 424 gettext("%s: can't open \"%s\"\n"), cmdname, name); 425 } 426 return (0); 427 } 428 429 430 /* Skip devices and pipes if Rflag is not set */ 431 if (!Rflag && !S_ISREG(statp->st_mode)) 432 return (0); 433 /* Pass offset to relative name from FTW_CHDIR */ 434 process_file(name, ftw->base); 435 return (0); 436 } 437 438 /* 439 * Opens file and call grep function. 440 */ 441 static void 442 process_file(const char *name, int base) 443 { 444 int fd; 445 446 if ((fd = open(name + base, O_RDONLY)) == -1) { 447 errors = 1; 448 if (!sflag) /* Silent mode */ 449 (void) fprintf(stderr, gettext( 450 "%s: can't open \"%s\"\n"), 451 cmdname, name); 452 return; 453 } 454 matched |= grep(fd, name); 455 (void) close(fd); 456 457 if (ferror(stdout)) { 458 (void) fprintf(stderr, gettext( 459 "%s: error writing to stdout\n"), 460 cmdname); 461 (void) fflush(stdout); 462 exit(2); 463 } 464 465 } 466 467 /* 468 * Add a file of strings to the pattern list. 469 */ 470 static void 471 addfile(const char *fn) 472 { 473 FILE *fp; 474 char *inbuf; 475 char *bufp; 476 size_t bufsiz, buflen, bufused; 477 478 /* 479 * Open the pattern file 480 */ 481 if ((fp = fopen(fn, "r")) == NULL) { 482 (void) fprintf(stderr, gettext("%s: can't open \"%s\"\n"), 483 cmdname, fn); 484 exit(2); 485 } 486 bufsiz = BUFSIZE; 487 if ((inbuf = malloc(bufsiz)) == NULL) { 488 (void) fprintf(stderr, 489 gettext("%s: out of memory\n"), cmdname); 490 exit(2); 491 } 492 bufp = inbuf; 493 bufused = 0; 494 /* 495 * Read in the file, reallocing as we need more memory 496 */ 497 while (fgets(bufp, bufsiz - bufused, fp) != NULL) { 498 buflen = strlen(bufp); 499 bufused += buflen; 500 if (bufused + 1 == bufsiz && bufp[buflen - 1] != '\n') { 501 /* 502 * if this line does not fit to the buffer, 503 * realloc larger buffer 504 */ 505 bufsiz += BUFSIZE; 506 if ((inbuf = realloc(inbuf, bufsiz)) == NULL) { 507 (void) fprintf(stderr, 508 gettext("%s: out of memory\n"), 509 cmdname); 510 exit(2); 511 } 512 bufp = inbuf + bufused; 513 continue; 514 } 515 if (bufp[buflen - 1] == '\n') { 516 bufp[--buflen] = '\0'; 517 } 518 addpattern(inbuf); 519 520 bufp = inbuf; 521 bufused = 0; 522 } 523 free(inbuf); 524 (void) fclose(fp); 525 } 526 527 /* 528 * Add a string to the pattern list. 529 */ 530 static void 531 addpattern(char *s) 532 { 533 PATTERN *pp; 534 char *wordbuf; 535 char *np; 536 537 for (; ; ) { 538 np = strchr(s, '\n'); 539 if (np != NULL) 540 *np = '\0'; 541 if ((pp = malloc(sizeof (PATTERN))) == NULL) { 542 (void) fprintf(stderr, gettext( 543 "%s: out of memory\n"), 544 cmdname); 545 exit(2); 546 } 547 if (wflag) { 548 /* 549 * Solaris wflag support: Add '<' '>' to pattern to 550 * select it as a word. Doesn't make sense with -F 551 * but we're Libertarian. 552 */ 553 size_t slen, wordlen; 554 555 slen = strlen(s); 556 wordlen = slen + 5; /* '\\' '<' s '\\' '>' '\0' */ 557 if ((wordbuf = malloc(wordlen)) == NULL) { 558 (void) fprintf(stderr, 559 gettext("%s: out of memory\n"), 560 cmdname); 561 exit(2); 562 } 563 (void) strcpy(wordbuf, "\\<"); 564 (void) strcpy(wordbuf + 2, s); 565 (void) strcpy(wordbuf + 2 + slen, "\\>"); 566 } else { 567 if ((wordbuf = strdup(s)) == NULL) { 568 (void) fprintf(stderr, 569 gettext("%s: out of memory\n"), 570 cmdname); 571 exit(2); 572 } 573 } 574 pp->pattern = wordbuf; 575 pp->next = patterns; 576 patterns = pp; 577 if (np == NULL) 578 break; 579 s = np + 1; 580 } 581 } 582 583 /* 584 * Fix patterns. 585 * Must do after all arguments read, in case later -i option. 586 */ 587 static void 588 fixpatterns(void) 589 { 590 PATTERN *pp; 591 int rv, fix_pattern, npatterns; 592 593 /* 594 * As REG_ANCHOR flag is not supported in the current Solaris, 595 * need to fix the specified pattern if -x is specified with 596 * grep or egrep 597 */ 598 fix_pattern = !Fflag && xflag; 599 600 for (npatterns = 0, pp = patterns; pp != NULL; pp = pp->next) { 601 npatterns++; 602 if (fix_pattern) { 603 char *cp, *cq; 604 size_t plen, nplen; 605 606 plen = strlen(pp->pattern); 607 /* '^' pattern '$' */ 608 nplen = 1 + plen + 1 + 1; 609 if ((cp = malloc(nplen)) == NULL) { 610 (void) fprintf(stderr, 611 gettext("%s: out of memory\n"), 612 cmdname); 613 exit(2); 614 } 615 cq = cp; 616 *cq++ = '^'; 617 cq = strcpy(cq, pp->pattern) + plen; 618 *cq++ = '$'; 619 *cq = '\0'; 620 free(pp->pattern); 621 pp->pattern = cp; 622 } 623 624 if (Fflag) { 625 if (use_wchar) { 626 /* 627 * Fflag && mblocale && iflag 628 * Fflag && mblocale && !xflag 629 */ 630 size_t n; 631 n = strlen(pp->pattern) + 1; 632 if ((pp->wpattern = 633 malloc(sizeof (wchar_t) * n)) == NULL) { 634 (void) fprintf(stderr, 635 gettext("%s: out of memory\n"), 636 cmdname); 637 exit(2); 638 } 639 if (mbstowcs(pp->wpattern, pp->pattern, n) == 640 (size_t)-1) { 641 (void) fprintf(stderr, 642 gettext("%s: failed to convert " 643 "\"%s\" to wide-characters\n"), 644 cmdname, pp->pattern); 645 exit(2); 646 } 647 if (iflag) { 648 wchar_t *wp; 649 for (wp = pp->wpattern; *wp != L'\0'; 650 wp++) { 651 *wp = towlower((wint_t)*wp); 652 } 653 } 654 free(pp->pattern); 655 } else { 656 /* 657 * Fflag && mblocale && !iflag 658 * Fflag && !mblocale && iflag 659 * Fflag && !mblocale && !iflag 660 */ 661 if (iflag) { 662 unsigned char *cp; 663 for (cp = (unsigned char *)pp->pattern; 664 *cp != '\0'; cp++) { 665 *cp = tolower(*cp); 666 } 667 } 668 } 669 /* 670 * fgrep: No regular expressions. 671 */ 672 continue; 673 } 674 675 /* 676 * For non-fgrep, compile the regular expression, 677 * give an informative error message, and exit if 678 * it didn't compile. 679 */ 680 if ((rv = regcomp(&pp->re, pp->pattern, regflags)) != 0) { 681 (void) regerror(rv, &pp->re, errstr, sizeof (errstr)); 682 (void) fprintf(stderr, 683 gettext("%s: RE error in %s: %s\n"), 684 cmdname, pp->pattern, errstr); 685 exit(2); 686 } 687 free(pp->pattern); 688 } 689 690 /* 691 * Decide if we are able to run the Boyer-Moore-Gosper algorithm. 692 * Use the Boyer-Moore-Gosper algorithm if: 693 * - fgrep (Fflag) 694 * - singlebyte locale (!mblocale) 695 * - no ignoring case (!iflag) 696 * - no printing line numbers (!nflag) 697 * - no negating the output (nvflag) 698 * - only one pattern (npatterns == 1) 699 * - non zero length pattern (strlen(patterns->pattern) != 0) 700 * 701 * It's guaranteed patterns->pattern is still alive 702 * when Fflag && !mblocale. 703 */ 704 use_bmg = Fflag && !mblocale && !iflag && !nflag && nvflag && 705 (npatterns == 1) && (strlen(patterns->pattern) != 0); 706 } 707 708 /* 709 * Search a newline from the beginning of the string 710 */ 711 static char * 712 find_nl(const char *ptr, size_t len) 713 { 714 while (len-- != 0) { 715 if (*ptr++ == '\n') { 716 return ((char *)--ptr); 717 } 718 } 719 return (NULL); 720 } 721 722 /* 723 * Search a newline from the end of the string 724 */ 725 static char * 726 rfind_nl(const char *ptr, size_t len) 727 { 728 const char *uptr = ptr + len; 729 while (len--) { 730 if (*--uptr == '\n') { 731 return ((char *)uptr); 732 } 733 } 734 return (NULL); 735 } 736 737 /* 738 * Duplicate the specified string converting each character 739 * into a lower case. 740 */ 741 static char * 742 istrdup(const char *s1) 743 { 744 static size_t ibuflen = 0; 745 static char *ibuf = NULL; 746 size_t slen; 747 char *p; 748 749 slen = strlen(s1); 750 if (slen >= ibuflen) { 751 /* ibuf does not fit to s1 */ 752 ibuflen = slen + 1; 753 ibuf = realloc(ibuf, ibuflen); 754 if (ibuf == NULL) { 755 (void) fprintf(stderr, 756 gettext("%s: out of memory\n"), cmdname); 757 exit(2); 758 } 759 } 760 p = ibuf; 761 do { 762 *p++ = tolower(*s1); 763 } while (*s1++ != '\0'); 764 return (ibuf); 765 } 766 767 /* 768 * Do grep on a single file. 769 * Return true in any lines matched. 770 * 771 * We have two strategies: 772 * The fast one is used when we have a single pattern with 773 * a string known to occur in the pattern. We can then 774 * do a BMG match on the whole buffer. 775 * This is an order of magnitude faster. 776 * Otherwise we split the buffer into lines, 777 * and check for a match on each line. 778 */ 779 static int 780 grep(int fd, const char *fn) 781 { 782 PATTERN *pp; 783 off_t data_len; /* length of the data chunk */ 784 off_t line_len; /* length of the current line */ 785 off_t line_offset; /* current line's offset from the beginning */ 786 long long lineno; 787 long long matches = 0; /* Number of matching lines */ 788 int newlinep; /* 0 if the last line of file has no newline */ 789 char *ptr, *ptrend; 790 791 792 if (patterns == NULL) 793 return (0); /* no patterns to match -- just return */ 794 795 pp = patterns; 796 797 if (use_bmg) { 798 bmgcomp(pp->pattern, strlen(pp->pattern)); 799 } 800 801 if (use_wchar && outline == NULL) { 802 outbuflen = BUFSIZE + 1; 803 outline = malloc(sizeof (wchar_t) * outbuflen); 804 if (outline == NULL) { 805 (void) fprintf(stderr, gettext("%s: out of memory\n"), 806 cmdname); 807 exit(2); 808 } 809 } 810 811 if (prntbuf == NULL) { 812 prntbuflen = BUFSIZE; 813 if ((prntbuf = malloc(prntbuflen + 1)) == NULL) { 814 (void) fprintf(stderr, gettext("%s: out of memory\n"), 815 cmdname); 816 exit(2); 817 } 818 } 819 820 line_offset = 0; 821 lineno = 0; 822 newlinep = 1; 823 data_len = 0; 824 for (; ; ) { 825 long count; 826 off_t offset = 0; 827 828 if (data_len == 0) { 829 /* 830 * If no data in the buffer, reset ptr 831 */ 832 ptr = prntbuf; 833 } 834 if (ptr == prntbuf) { 835 /* 836 * The current data chunk starts from prntbuf. 837 * This means either the buffer has no data 838 * or the buffer has no newline. 839 * So, read more data from input. 840 */ 841 count = read(fd, ptr + data_len, prntbuflen - data_len); 842 if (count < 0) { 843 /* read error */ 844 if (cflag) { 845 if (outfn && !rflag) { 846 (void) fprintf(stdout, 847 "%s:", fn); 848 } 849 if (!qflag && !rflag) { 850 (void) fprintf(stdout, "%lld\n", 851 matches); 852 } 853 } 854 return (0); 855 } else if (count == 0) { 856 /* no new data */ 857 if (data_len == 0) { 858 /* end of file already reached */ 859 break; 860 } 861 /* last line of file has no newline */ 862 ptrend = ptr + data_len; 863 newlinep = 0; 864 goto L_start_process; 865 } 866 offset = data_len; 867 data_len += count; 868 } 869 870 /* 871 * Look for newline in the chunk 872 * between ptr + offset and ptr + data_len - offset. 873 */ 874 ptrend = find_nl(ptr + offset, data_len - offset); 875 if (ptrend == NULL) { 876 /* no newline found in this chunk */ 877 if (ptr > prntbuf) { 878 /* 879 * Move remaining data to the beginning 880 * of the buffer. 881 * Remaining data lie from ptr for 882 * data_len bytes. 883 */ 884 (void) memmove(prntbuf, ptr, data_len); 885 } 886 if (data_len == prntbuflen) { 887 /* 888 * No enough room in the buffer 889 */ 890 prntbuflen += BUFSIZE; 891 prntbuf = realloc(prntbuf, prntbuflen + 1); 892 if (prntbuf == NULL) { 893 (void) fprintf(stderr, 894 gettext("%s: out of memory\n"), 895 cmdname); 896 exit(2); 897 } 898 } 899 ptr = prntbuf; 900 /* read the next input */ 901 continue; 902 } 903 L_start_process: 904 905 /* 906 * Beginning of the chunk: ptr 907 * End of the chunk: ptr + data_len 908 * Beginning of the line: ptr 909 * End of the line: ptrend 910 */ 911 912 if (use_bmg) { 913 /* 914 * Use Boyer-Moore-Gosper algorithm to find out if 915 * this chunk (not this line) contains the specified 916 * pattern. If not, restart from the last line 917 * of this chunk. 918 */ 919 char *bline; 920 bline = bmgexec(ptr, ptr + data_len); 921 if (bline == NULL) { 922 /* 923 * No pattern found in this chunk. 924 * Need to find the last line 925 * in this chunk. 926 */ 927 ptrend = rfind_nl(ptr, data_len); 928 929 /* 930 * When this chunk does not contain newline, 931 * ptrend becomes NULL, which should happen 932 * when the last line of file does not end 933 * with a newline. At such a point, 934 * newlinep should have been set to 0. 935 * Therefore, just after jumping to 936 * L_skip_line, the main for-loop quits, 937 * and the line_len value won't be 938 * used. 939 */ 940 line_len = ptrend - ptr; 941 goto L_skip_line; 942 } 943 if (bline > ptrend) { 944 /* 945 * Pattern found not in the first line 946 * of this chunk. 947 * Discard the first line. 948 */ 949 line_len = ptrend - ptr; 950 goto L_skip_line; 951 } 952 /* 953 * Pattern found in the first line of this chunk. 954 * Using this result. 955 */ 956 *ptrend = '\0'; 957 line_len = ptrend - ptr; 958 959 /* 960 * before jumping to L_next_line, 961 * need to handle xflag if specified 962 */ 963 if (xflag && (line_len != bmglen || 964 strcmp(bmgpat, ptr) != 0)) { 965 /* didn't match */ 966 pp = NULL; 967 } else { 968 pp = patterns; /* to make it happen */ 969 } 970 goto L_next_line; 971 } 972 lineno++; 973 /* 974 * Line starts from ptr and ends at ptrend. 975 * line_len will be the length of the line. 976 */ 977 *ptrend = '\0'; 978 line_len = ptrend - ptr; 979 980 /* 981 * From now, the process will be performed based 982 * on the line from ptr to ptrend. 983 */ 984 if (use_wchar) { 985 size_t len; 986 987 if (line_len >= outbuflen) { 988 outbuflen = line_len + 1; 989 outline = realloc(outline, 990 sizeof (wchar_t) * outbuflen); 991 if (outline == NULL) { 992 (void) fprintf(stderr, 993 gettext("%s: out of memory\n"), 994 cmdname); 995 exit(2); 996 } 997 } 998 999 len = mbstowcs(outline, ptr, line_len); 1000 if (len == (size_t)-1) { 1001 (void) fprintf(stderr, gettext( 1002 "%s: input file \"%s\": line %lld: invalid multibyte character\n"), 1003 cmdname, fn, lineno); 1004 /* never match a line with invalid sequence */ 1005 goto L_skip_line; 1006 } 1007 outline[len] = L'\0'; 1008 1009 if (iflag) { 1010 wchar_t *cp; 1011 for (cp = outline; *cp != '\0'; cp++) { 1012 *cp = towlower((wint_t)*cp); 1013 } 1014 } 1015 1016 if (xflag) { 1017 for (pp = patterns; pp; pp = pp->next) { 1018 if (outline[0] == pp->wpattern[0] && 1019 wcscmp(outline, 1020 pp->wpattern) == 0) { 1021 /* matched */ 1022 break; 1023 } 1024 } 1025 } else { 1026 for (pp = patterns; pp; pp = pp->next) { 1027 if (wcswcs(outline, pp->wpattern) 1028 != NULL) { 1029 /* matched */ 1030 break; 1031 } 1032 } 1033 } 1034 } else if (Fflag) { 1035 /* fgrep in byte-oriented handling */ 1036 char *fptr; 1037 if (iflag) { 1038 fptr = istrdup(ptr); 1039 } else { 1040 fptr = ptr; 1041 } 1042 if (xflag) { 1043 /* fgrep -x */ 1044 for (pp = patterns; pp; pp = pp->next) { 1045 if (fptr[0] == pp->pattern[0] && 1046 strcmp(fptr, pp->pattern) == 0) { 1047 /* matched */ 1048 break; 1049 } 1050 } 1051 } else { 1052 for (pp = patterns; pp; pp = pp->next) { 1053 if (strstr(fptr, pp->pattern) != NULL) { 1054 /* matched */ 1055 break; 1056 } 1057 } 1058 } 1059 } else { 1060 /* grep or egrep */ 1061 for (pp = patterns; pp; pp = pp->next) { 1062 int rv; 1063 1064 rv = regexec(&pp->re, ptr, 0, NULL, 0); 1065 if (rv == REG_OK) { 1066 /* matched */ 1067 break; 1068 } 1069 1070 switch (rv) { 1071 case REG_NOMATCH: 1072 break; 1073 case REG_ECHAR: 1074 (void) fprintf(stderr, gettext( 1075 "%s: input file \"%s\": line %lld: invalid multibyte character\n"), 1076 cmdname, fn, lineno); 1077 break; 1078 default: 1079 (void) regerror(rv, &pp->re, errstr, 1080 sizeof (errstr)); 1081 (void) fprintf(stderr, gettext( 1082 "%s: input file \"%s\": line %lld: %s\n"), 1083 cmdname, fn, lineno, errstr); 1084 exit(2); 1085 } 1086 } 1087 } 1088 1089 L_next_line: 1090 /* 1091 * Here, if pp points to non-NULL, something has been matched 1092 * to the pattern. 1093 */ 1094 if (nvflag == (pp != NULL)) { 1095 matches++; 1096 /* 1097 * Handle q, l, and c flags. 1098 */ 1099 if (qflag) { 1100 /* no need to continue */ 1101 /* 1102 * End of this line is ptrend. 1103 * We have read up to ptr + data_len. 1104 */ 1105 off_t pos; 1106 pos = ptr + data_len - (ptrend + 1); 1107 (void) lseek(fd, -pos, SEEK_CUR); 1108 exit(0); 1109 } 1110 if (lflag) { 1111 (void) printf("%s\n", fn); 1112 break; 1113 } 1114 if (!cflag) { 1115 if (outfn) { 1116 (void) printf("%s:", fn); 1117 } 1118 if (bflag) { 1119 (void) printf("%lld:", (offset_t) 1120 (line_offset / BSIZE)); 1121 } 1122 if (nflag) { 1123 (void) printf("%lld:", lineno); 1124 } 1125 *ptrend = '\n'; 1126 (void) fwrite(ptr, 1, line_len + 1, stdout); 1127 } 1128 if (ferror(stdout)) { 1129 return (0); 1130 } 1131 } 1132 L_skip_line: 1133 if (!newlinep) 1134 break; 1135 1136 data_len -= line_len + 1; 1137 line_offset += line_len + 1; 1138 ptr = ptrend + 1; 1139 } 1140 1141 if (cflag) { 1142 if (outfn) { 1143 (void) printf("%s:", fn); 1144 } 1145 if (!qflag) { 1146 (void) printf("%lld\n", matches); 1147 } 1148 } 1149 return (matches != 0); 1150 } 1151 1152 /* 1153 * usage message for grep 1154 */ 1155 static void 1156 usage(void) 1157 { 1158 if (egrep || fgrep) { 1159 (void) fprintf(stderr, gettext("Usage:\t%s"), cmdname); 1160 (void) fprintf(stderr, 1161 gettext(" [-c|-l|-q] [-r|-R] [-bhinsvx] " 1162 "pattern_list [file ...]\n")); 1163 1164 (void) fprintf(stderr, "\t%s", cmdname); 1165 (void) fprintf(stderr, 1166 gettext(" [-c|-l|-q] [-r|-R] [-bhinsvx] " 1167 "[-e pattern_list]... " 1168 "[-f pattern_file]... [file...]\n")); 1169 } else { 1170 (void) fprintf(stderr, gettext("Usage:\t%s"), cmdname); 1171 (void) fprintf(stderr, 1172 gettext(" [-c|-l|-q] [-r|-R] [-bhinsvwx] " 1173 "pattern_list [file ...]\n")); 1174 1175 (void) fprintf(stderr, "\t%s", cmdname); 1176 (void) fprintf(stderr, 1177 gettext(" [-c|-l|-q] [-r|-R] [-bhinsvwx] " 1178 "[-e pattern_list]... " 1179 "[-f pattern_file]... [file...]\n")); 1180 1181 (void) fprintf(stderr, "\t%s", cmdname); 1182 (void) fprintf(stderr, 1183 gettext(" -E [-c|-l|-q] [-r|-R] [-bhinsvx] " 1184 "pattern_list [file ...]\n")); 1185 1186 (void) fprintf(stderr, "\t%s", cmdname); 1187 (void) fprintf(stderr, 1188 gettext(" -E [-c|-l|-q] [-r|-R] [-bhinsvx] " 1189 "[-e pattern_list]... " 1190 "[-f pattern_file]... [file...]\n")); 1191 1192 (void) fprintf(stderr, "\t%s", cmdname); 1193 (void) fprintf(stderr, 1194 gettext(" -F [-c|-l|-q] [-r|-R] [-bhinsvx] " 1195 "pattern_list [file ...]\n")); 1196 1197 (void) fprintf(stderr, "\t%s", cmdname); 1198 (void) fprintf(stderr, 1199 gettext(" -F [-c|-l|-q] [-bhinsvx] [-e pattern_list]... " 1200 "[-f pattern_file]... [file...]\n")); 1201 } 1202 exit(2); 1203 /* NOTREACHED */ 1204 } 1205 1206 /* 1207 * Compile literal pattern into BMG tables 1208 */ 1209 static void 1210 bmgcomp(char *pat, int len) 1211 { 1212 int i; 1213 int tlen; 1214 unsigned char *uc = (unsigned char *)pat; 1215 1216 bmglen = len; 1217 bmgpat = pat; 1218 1219 for (i = 0; i < M_CSETSIZE; i++) { 1220 bmgtab[i] = len; 1221 } 1222 1223 len--; 1224 for (tlen = len, i = 0; i <= len; i++, tlen--) { 1225 bmgtab[*uc++] = tlen; 1226 } 1227 } 1228 1229 /* 1230 * BMG search. 1231 */ 1232 static char * 1233 bmgexec(char *str, char *end) 1234 { 1235 int t; 1236 char *k, *s, *p; 1237 1238 k = str + bmglen - 1; 1239 if (bmglen == 1) { 1240 return (memchr(str, bmgpat[0], end - str)); 1241 } 1242 for (; ; ) { 1243 /* inner loop, should be most optimized */ 1244 while (k < end && (t = bmgtab[(unsigned char)*k]) != 0) { 1245 k += t; 1246 } 1247 if (k >= end) { 1248 return (NULL); 1249 } 1250 for (s = k, p = bmgpat + bmglen - 1; *--s == *--p; ) { 1251 if (p == bmgpat) { 1252 return (s); 1253 } 1254 } 1255 k++; 1256 } 1257 /* NOTREACHED */ 1258 } 1259