1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * grep - pattern matching program - combined grep, egrep, and fgrep. 29 * Based on MKS grep command, with XCU & Solaris mods. 30 */ 31 32 /* 33 * Copyright 1985, 1992 by Mortice Kern Systems Inc. All rights reserved. 34 * 35 */ 36 37 /* Copyright 2012 Nexenta Systems, Inc. All rights reserved. */ 38 39 /* 40 * Copyright 2013 Damian Bogel. All rights reserved. 41 */ 42 43 #include <string.h> 44 #include <stdlib.h> 45 #include <ctype.h> 46 #include <stdarg.h> 47 #include <regex.h> 48 #include <limits.h> 49 #include <sys/types.h> 50 #include <sys/stat.h> 51 #include <fcntl.h> 52 #include <stdio.h> 53 #include <locale.h> 54 #include <wchar.h> 55 #include <errno.h> 56 #include <unistd.h> 57 #include <wctype.h> 58 #include <ftw.h> 59 #include <sys/param.h> 60 61 #define STDIN_FILENAME gettext("(standard input)") 62 63 #define BSIZE 512 /* Size of block for -b */ 64 #define BUFSIZE 8192 /* Input buffer size */ 65 #define MAX_DEPTH 1000 /* how deep to recurse */ 66 67 #define M_CSETSIZE 256 /* singlebyte chars */ 68 static int bmglen; /* length of BMG pattern */ 69 static char *bmgpat; /* BMG pattern */ 70 static int bmgtab[M_CSETSIZE]; /* BMG delta1 table */ 71 72 typedef struct _PATTERN { 73 char *pattern; /* original pattern */ 74 wchar_t *wpattern; /* wide, lowercased pattern */ 75 struct _PATTERN *next; 76 regex_t re; /* compiled pattern */ 77 } PATTERN; 78 79 static PATTERN *patterns; 80 static char errstr[128]; /* regerror string buffer */ 81 static int regflags = 0; /* regcomp options */ 82 static int matched = 0; /* return of the grep() */ 83 static int errors = 0; /* count of errors */ 84 static uchar_t fgrep = 0; /* Invoked as fgrep */ 85 static uchar_t egrep = 0; /* Invoked as egrep */ 86 static uchar_t nvflag = 1; /* Print matching lines */ 87 static uchar_t cflag; /* Count of matches */ 88 static uchar_t iflag; /* Case insensitve matching */ 89 static uchar_t Hflag; /* Precede lines by file name */ 90 static uchar_t hflag; /* Supress printing of filename */ 91 static uchar_t lflag; /* Print file names of matches */ 92 static uchar_t nflag; /* Precede lines by line number */ 93 static uchar_t rflag; /* Search directories recursively */ 94 static uchar_t bflag; /* Preccede matches by block number */ 95 static uchar_t sflag; /* Suppress file error messages */ 96 static uchar_t qflag; /* Suppress standard output */ 97 static uchar_t wflag; /* Search for expression as a word */ 98 static uchar_t xflag; /* Anchoring */ 99 static uchar_t Eflag; /* Egrep or -E flag */ 100 static uchar_t Fflag; /* Fgrep or -F flag */ 101 static uchar_t Rflag; /* Like rflag, but follow symlinks */ 102 static uchar_t outfn; /* Put out file name */ 103 static char *cmdname; 104 105 static int use_wchar, use_bmg, mblocale; 106 107 static size_t outbuflen, prntbuflen; 108 static char *prntbuf; 109 static wchar_t *outline; 110 111 static void addfile(const char *fn); 112 static void addpattern(char *s); 113 static void fixpatterns(void); 114 static void usage(void); 115 static int grep(int, const char *); 116 static void bmgcomp(char *, int); 117 static char *bmgexec(char *, char *); 118 static int recursive(const char *, const struct stat *, int, struct FTW *); 119 static void process_path(const char *); 120 static void process_file(const char *, int); 121 122 /* 123 * mainline for grep 124 */ 125 int 126 main(int argc, char **argv) 127 { 128 char *ap; 129 int c; 130 int fflag = 0; 131 int i, n_pattern = 0, n_file = 0; 132 char **pattern_list = NULL; 133 char **file_list = NULL; 134 135 (void) setlocale(LC_ALL, ""); 136 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */ 137 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't */ 138 #endif 139 (void) textdomain(TEXT_DOMAIN); 140 141 /* 142 * true if this is running on the multibyte locale 143 */ 144 mblocale = (MB_CUR_MAX > 1); 145 /* 146 * Skip leading slashes 147 */ 148 cmdname = argv[0]; 149 if (ap = strrchr(cmdname, '/')) 150 cmdname = ap + 1; 151 152 ap = cmdname; 153 /* 154 * Detect egrep/fgrep via command name, map to -E and -F options. 155 */ 156 if (*ap == 'e' || *ap == 'E') { 157 regflags |= REG_EXTENDED; 158 egrep++; 159 } else { 160 if (*ap == 'f' || *ap == 'F') { 161 fgrep++; 162 } 163 } 164 165 while ((c = getopt(argc, argv, "vwchHilnrbse:f:qxEFIR")) != EOF) { 166 switch (c) { 167 case 'v': /* POSIX: negate matches */ 168 nvflag = 0; 169 break; 170 171 case 'c': /* POSIX: write count */ 172 cflag++; 173 break; 174 175 case 'i': /* POSIX: ignore case */ 176 iflag++; 177 regflags |= REG_ICASE; 178 break; 179 180 case 'l': /* POSIX: Write filenames only */ 181 lflag++; 182 break; 183 184 case 'n': /* POSIX: Write line numbers */ 185 nflag++; 186 break; 187 188 case 'r': /* Solaris: search recursively */ 189 rflag++; 190 break; 191 192 case 'b': /* Solaris: Write file block numbers */ 193 bflag++; 194 break; 195 196 case 's': /* POSIX: No error msgs for files */ 197 sflag++; 198 break; 199 200 case 'e': /* POSIX: pattern list */ 201 n_pattern++; 202 pattern_list = realloc(pattern_list, 203 sizeof (char *) * n_pattern); 204 if (pattern_list == NULL) { 205 (void) fprintf(stderr, 206 gettext("%s: out of memory\n"), 207 cmdname); 208 exit(2); 209 } 210 *(pattern_list + n_pattern - 1) = optarg; 211 break; 212 213 case 'f': /* POSIX: pattern file */ 214 fflag = 1; 215 n_file++; 216 file_list = realloc(file_list, 217 sizeof (char *) * n_file); 218 if (file_list == NULL) { 219 (void) fprintf(stderr, 220 gettext("%s: out of memory\n"), 221 cmdname); 222 exit(2); 223 } 224 *(file_list + n_file - 1) = optarg; 225 break; 226 227 /* based on options order h or H is set as in GNU grep */ 228 case 'h': /* Solaris: supress printing of file name */ 229 hflag = 1; 230 Hflag = 0; 231 break; 232 /* Solaris: precede every matching with file name */ 233 case 'H': 234 Hflag = 1; 235 hflag = 0; 236 break; 237 238 case 'q': /* POSIX: quiet: status only */ 239 qflag++; 240 break; 241 242 case 'w': /* Solaris: treat pattern as word */ 243 wflag++; 244 break; 245 246 case 'x': /* POSIX: full line matches */ 247 xflag++; 248 regflags |= REG_ANCHOR; 249 break; 250 251 case 'E': /* POSIX: Extended RE's */ 252 regflags |= REG_EXTENDED; 253 Eflag++; 254 break; 255 256 case 'F': /* POSIX: strings, not RE's */ 257 Fflag++; 258 break; 259 260 case 'R': /* Solaris: like rflag, but follow symlinks */ 261 Rflag++; 262 rflag++; 263 break; 264 265 default: 266 usage(); 267 } 268 } 269 /* 270 * If we're invoked as egrep or fgrep we need to do some checks 271 */ 272 273 if (egrep || fgrep) { 274 /* 275 * Use of -E or -F with egrep or fgrep is illegal 276 */ 277 if (Eflag || Fflag) 278 usage(); 279 /* 280 * Don't allow use of wflag with egrep / fgrep 281 */ 282 if (wflag) 283 usage(); 284 /* 285 * For Solaris the -s flag is equivalent to XCU -q 286 */ 287 if (sflag) 288 qflag++; 289 /* 290 * done with above checks - set the appropriate flags 291 */ 292 if (egrep) 293 Eflag++; 294 else /* Else fgrep */ 295 Fflag++; 296 } 297 298 if (wflag && (Eflag || Fflag)) { 299 /* 300 * -w cannot be specified with grep -F 301 */ 302 usage(); 303 } 304 305 /* 306 * -E and -F flags are mutually exclusive - check for this 307 */ 308 if (Eflag && Fflag) 309 usage(); 310 311 /* 312 * -l overrides -H like in GNU grep 313 */ 314 if (lflag) 315 Hflag = 0; 316 317 /* 318 * -c, -l and -q flags are mutually exclusive 319 * We have -c override -l like in Solaris. 320 * -q overrides -l & -c programmatically in grep() function. 321 */ 322 if (cflag && lflag) 323 lflag = 0; 324 325 argv += optind - 1; 326 argc -= optind - 1; 327 328 /* 329 * Now handling -e and -f option 330 */ 331 if (pattern_list) { 332 for (i = 0; i < n_pattern; i++) { 333 addpattern(pattern_list[i]); 334 } 335 free(pattern_list); 336 } 337 if (file_list) { 338 for (i = 0; i < n_file; i++) { 339 addfile(file_list[i]); 340 } 341 free(file_list); 342 } 343 344 /* 345 * No -e or -f? Make sure there is one more arg, use it as the pattern. 346 */ 347 if (patterns == NULL && !fflag) { 348 if (argc < 2) 349 usage(); 350 addpattern(argv[1]); 351 argc--; 352 argv++; 353 } 354 355 /* 356 * If -x flag is not specified or -i flag is specified 357 * with fgrep in a multibyte locale, need to use 358 * the wide character APIs. Otherwise, byte-oriented 359 * process will be done. 360 */ 361 use_wchar = Fflag && mblocale && (!xflag || iflag); 362 363 /* 364 * Compile Patterns and also decide if BMG can be used 365 */ 366 fixpatterns(); 367 368 /* Process all files: stdin, or rest of arg list */ 369 if (argc < 2) { 370 matched = grep(0, STDIN_FILENAME); 371 } else { 372 if (Hflag || (argc > 2 && hflag == 0)) 373 outfn = 1; /* Print filename on match line */ 374 for (argv++; *argv != NULL; argv++) { 375 process_path(*argv); 376 } 377 } 378 /* 379 * Return() here is used instead of exit 380 */ 381 382 (void) fflush(stdout); 383 384 if (errors) 385 return (2); 386 return (matched ? 0 : 1); 387 } 388 389 static void 390 process_path(const char *path) 391 { 392 struct stat st; 393 int walkflags = FTW_CHDIR; 394 char *buf = NULL; 395 396 if (rflag) { 397 if (stat(path, &st) != -1 && 398 (st.st_mode & S_IFMT) == S_IFDIR) { 399 outfn = 1; /* Print filename */ 400 401 /* 402 * Add trailing slash if arg 403 * is directory, to resolve symlinks. 404 */ 405 if (path[strlen(path) - 1] != '/') { 406 (void) asprintf(&buf, "%s/", path); 407 if (buf != NULL) 408 path = buf; 409 } 410 411 /* 412 * Search through subdirs if path is directory. 413 * Don't follow symlinks if Rflag is not set. 414 */ 415 if (!Rflag) 416 walkflags |= FTW_PHYS; 417 418 if (nftw(path, recursive, MAX_DEPTH, walkflags) != 0) { 419 if (!sflag) 420 (void) fprintf(stderr, 421 gettext("%s: can't open \"%s\"\n"), 422 cmdname, path); 423 errors = 1; 424 } 425 return; 426 } 427 } 428 process_file(path, 0); 429 } 430 431 /* 432 * Read and process all files in directory recursively. 433 */ 434 static int 435 recursive(const char *name, const struct stat *statp, int info, struct FTW *ftw) 436 { 437 /* 438 * Process files and follow symlinks if Rflag set. 439 */ 440 if (info != FTW_F) { 441 /* Report broken symlinks and unreadable files */ 442 if (!sflag && 443 (info == FTW_SLN || info == FTW_DNR || info == FTW_NS)) { 444 (void) fprintf(stderr, 445 gettext("%s: can't open \"%s\"\n"), cmdname, name); 446 } 447 return (0); 448 } 449 450 451 /* Skip devices and pipes if Rflag is not set */ 452 if (!Rflag && !S_ISREG(statp->st_mode)) 453 return (0); 454 /* Pass offset to relative name from FTW_CHDIR */ 455 process_file(name, ftw->base); 456 return (0); 457 } 458 459 /* 460 * Opens file and call grep function. 461 */ 462 static void 463 process_file(const char *name, int base) 464 { 465 int fd; 466 467 if ((fd = open(name + base, O_RDONLY)) == -1) { 468 errors = 1; 469 if (!sflag) /* Silent mode */ 470 (void) fprintf(stderr, gettext( 471 "%s: can't open \"%s\"\n"), 472 cmdname, name); 473 return; 474 } 475 matched |= grep(fd, name); 476 (void) close(fd); 477 478 if (ferror(stdout)) { 479 (void) fprintf(stderr, gettext( 480 "%s: error writing to stdout\n"), 481 cmdname); 482 (void) fflush(stdout); 483 exit(2); 484 } 485 486 } 487 488 /* 489 * Add a file of strings to the pattern list. 490 */ 491 static void 492 addfile(const char *fn) 493 { 494 FILE *fp; 495 char *inbuf; 496 char *bufp; 497 size_t bufsiz, buflen, bufused; 498 499 /* 500 * Open the pattern file 501 */ 502 if ((fp = fopen(fn, "r")) == NULL) { 503 (void) fprintf(stderr, gettext("%s: can't open \"%s\"\n"), 504 cmdname, fn); 505 exit(2); 506 } 507 bufsiz = BUFSIZE; 508 if ((inbuf = malloc(bufsiz)) == NULL) { 509 (void) fprintf(stderr, 510 gettext("%s: out of memory\n"), cmdname); 511 exit(2); 512 } 513 bufp = inbuf; 514 bufused = 0; 515 /* 516 * Read in the file, reallocing as we need more memory 517 */ 518 while (fgets(bufp, bufsiz - bufused, fp) != NULL) { 519 buflen = strlen(bufp); 520 bufused += buflen; 521 if (bufused + 1 == bufsiz && bufp[buflen - 1] != '\n') { 522 /* 523 * if this line does not fit to the buffer, 524 * realloc larger buffer 525 */ 526 bufsiz += BUFSIZE; 527 if ((inbuf = realloc(inbuf, bufsiz)) == NULL) { 528 (void) fprintf(stderr, 529 gettext("%s: out of memory\n"), 530 cmdname); 531 exit(2); 532 } 533 bufp = inbuf + bufused; 534 continue; 535 } 536 if (bufp[buflen - 1] == '\n') { 537 bufp[--buflen] = '\0'; 538 } 539 addpattern(inbuf); 540 541 bufp = inbuf; 542 bufused = 0; 543 } 544 free(inbuf); 545 (void) fclose(fp); 546 } 547 548 /* 549 * Add a string to the pattern list. 550 */ 551 static void 552 addpattern(char *s) 553 { 554 PATTERN *pp; 555 char *wordbuf; 556 char *np; 557 558 for (; ; ) { 559 np = strchr(s, '\n'); 560 if (np != NULL) 561 *np = '\0'; 562 if ((pp = malloc(sizeof (PATTERN))) == NULL) { 563 (void) fprintf(stderr, gettext( 564 "%s: out of memory\n"), 565 cmdname); 566 exit(2); 567 } 568 if (wflag) { 569 /* 570 * Solaris wflag support: Add '<' '>' to pattern to 571 * select it as a word. Doesn't make sense with -F 572 * but we're Libertarian. 573 */ 574 size_t slen, wordlen; 575 576 slen = strlen(s); 577 wordlen = slen + 5; /* '\\' '<' s '\\' '>' '\0' */ 578 if ((wordbuf = malloc(wordlen)) == NULL) { 579 (void) fprintf(stderr, 580 gettext("%s: out of memory\n"), 581 cmdname); 582 exit(2); 583 } 584 (void) strcpy(wordbuf, "\\<"); 585 (void) strcpy(wordbuf + 2, s); 586 (void) strcpy(wordbuf + 2 + slen, "\\>"); 587 } else { 588 if ((wordbuf = strdup(s)) == NULL) { 589 (void) fprintf(stderr, 590 gettext("%s: out of memory\n"), 591 cmdname); 592 exit(2); 593 } 594 } 595 pp->pattern = wordbuf; 596 pp->next = patterns; 597 patterns = pp; 598 if (np == NULL) 599 break; 600 s = np + 1; 601 } 602 } 603 604 /* 605 * Fix patterns. 606 * Must do after all arguments read, in case later -i option. 607 */ 608 static void 609 fixpatterns(void) 610 { 611 PATTERN *pp; 612 int rv, fix_pattern, npatterns; 613 614 /* 615 * As REG_ANCHOR flag is not supported in the current Solaris, 616 * need to fix the specified pattern if -x is specified with 617 * grep or egrep 618 */ 619 fix_pattern = !Fflag && xflag; 620 621 for (npatterns = 0, pp = patterns; pp != NULL; pp = pp->next) { 622 npatterns++; 623 if (fix_pattern) { 624 char *cp, *cq; 625 size_t plen, nplen; 626 627 plen = strlen(pp->pattern); 628 /* '^' pattern '$' */ 629 nplen = 1 + plen + 1 + 1; 630 if ((cp = malloc(nplen)) == NULL) { 631 (void) fprintf(stderr, 632 gettext("%s: out of memory\n"), 633 cmdname); 634 exit(2); 635 } 636 cq = cp; 637 *cq++ = '^'; 638 cq = strcpy(cq, pp->pattern) + plen; 639 *cq++ = '$'; 640 *cq = '\0'; 641 free(pp->pattern); 642 pp->pattern = cp; 643 } 644 645 if (Fflag) { 646 if (use_wchar) { 647 /* 648 * Fflag && mblocale && iflag 649 * Fflag && mblocale && !xflag 650 */ 651 size_t n; 652 n = strlen(pp->pattern) + 1; 653 if ((pp->wpattern = 654 malloc(sizeof (wchar_t) * n)) == NULL) { 655 (void) fprintf(stderr, 656 gettext("%s: out of memory\n"), 657 cmdname); 658 exit(2); 659 } 660 if (mbstowcs(pp->wpattern, pp->pattern, n) == 661 (size_t)-1) { 662 (void) fprintf(stderr, 663 gettext("%s: failed to convert " 664 "\"%s\" to wide-characters\n"), 665 cmdname, pp->pattern); 666 exit(2); 667 } 668 if (iflag) { 669 wchar_t *wp; 670 for (wp = pp->wpattern; *wp != L'\0'; 671 wp++) { 672 *wp = towlower((wint_t)*wp); 673 } 674 } 675 free(pp->pattern); 676 } else { 677 /* 678 * Fflag && mblocale && !iflag 679 * Fflag && !mblocale && iflag 680 * Fflag && !mblocale && !iflag 681 */ 682 if (iflag) { 683 unsigned char *cp; 684 for (cp = (unsigned char *)pp->pattern; 685 *cp != '\0'; cp++) { 686 *cp = tolower(*cp); 687 } 688 } 689 } 690 /* 691 * fgrep: No regular expressions. 692 */ 693 continue; 694 } 695 696 /* 697 * For non-fgrep, compile the regular expression, 698 * give an informative error message, and exit if 699 * it didn't compile. 700 */ 701 if ((rv = regcomp(&pp->re, pp->pattern, regflags)) != 0) { 702 (void) regerror(rv, &pp->re, errstr, sizeof (errstr)); 703 (void) fprintf(stderr, 704 gettext("%s: RE error in %s: %s\n"), 705 cmdname, pp->pattern, errstr); 706 exit(2); 707 } 708 free(pp->pattern); 709 } 710 711 /* 712 * Decide if we are able to run the Boyer-Moore-Gosper algorithm. 713 * Use the Boyer-Moore-Gosper algorithm if: 714 * - fgrep (Fflag) 715 * - singlebyte locale (!mblocale) 716 * - no ignoring case (!iflag) 717 * - no printing line numbers (!nflag) 718 * - no negating the output (nvflag) 719 * - only one pattern (npatterns == 1) 720 * - non zero length pattern (strlen(patterns->pattern) != 0) 721 * 722 * It's guaranteed patterns->pattern is still alive 723 * when Fflag && !mblocale. 724 */ 725 use_bmg = Fflag && !mblocale && !iflag && !nflag && nvflag && 726 (npatterns == 1) && (strlen(patterns->pattern) != 0); 727 } 728 729 /* 730 * Search a newline from the beginning of the string 731 */ 732 static char * 733 find_nl(const char *ptr, size_t len) 734 { 735 while (len-- != 0) { 736 if (*ptr++ == '\n') { 737 return ((char *)--ptr); 738 } 739 } 740 return (NULL); 741 } 742 743 /* 744 * Search a newline from the end of the string 745 */ 746 static char * 747 rfind_nl(const char *ptr, size_t len) 748 { 749 const char *uptr = ptr + len; 750 while (len--) { 751 if (*--uptr == '\n') { 752 return ((char *)uptr); 753 } 754 } 755 return (NULL); 756 } 757 758 /* 759 * Duplicate the specified string converting each character 760 * into a lower case. 761 */ 762 static char * 763 istrdup(const char *s1) 764 { 765 static size_t ibuflen = 0; 766 static char *ibuf = NULL; 767 size_t slen; 768 char *p; 769 770 slen = strlen(s1); 771 if (slen >= ibuflen) { 772 /* ibuf does not fit to s1 */ 773 ibuflen = slen + 1; 774 ibuf = realloc(ibuf, ibuflen); 775 if (ibuf == NULL) { 776 (void) fprintf(stderr, 777 gettext("%s: out of memory\n"), cmdname); 778 exit(2); 779 } 780 } 781 p = ibuf; 782 do { 783 *p++ = tolower(*s1); 784 } while (*s1++ != '\0'); 785 return (ibuf); 786 } 787 788 /* 789 * Do grep on a single file. 790 * Return true in any lines matched. 791 * 792 * We have two strategies: 793 * The fast one is used when we have a single pattern with 794 * a string known to occur in the pattern. We can then 795 * do a BMG match on the whole buffer. 796 * This is an order of magnitude faster. 797 * Otherwise we split the buffer into lines, 798 * and check for a match on each line. 799 */ 800 static int 801 grep(int fd, const char *fn) 802 { 803 PATTERN *pp; 804 off_t data_len; /* length of the data chunk */ 805 off_t line_len; /* length of the current line */ 806 off_t line_offset; /* current line's offset from the beginning */ 807 long long lineno; 808 long long matches = 0; /* Number of matching lines */ 809 int newlinep; /* 0 if the last line of file has no newline */ 810 char *ptr, *ptrend; 811 812 813 if (patterns == NULL) 814 return (0); /* no patterns to match -- just return */ 815 816 pp = patterns; 817 818 if (use_bmg) { 819 bmgcomp(pp->pattern, strlen(pp->pattern)); 820 } 821 822 if (use_wchar && outline == NULL) { 823 outbuflen = BUFSIZE + 1; 824 outline = malloc(sizeof (wchar_t) * outbuflen); 825 if (outline == NULL) { 826 (void) fprintf(stderr, gettext("%s: out of memory\n"), 827 cmdname); 828 exit(2); 829 } 830 } 831 832 if (prntbuf == NULL) { 833 prntbuflen = BUFSIZE; 834 if ((prntbuf = malloc(prntbuflen + 1)) == NULL) { 835 (void) fprintf(stderr, gettext("%s: out of memory\n"), 836 cmdname); 837 exit(2); 838 } 839 } 840 841 line_offset = 0; 842 lineno = 0; 843 newlinep = 1; 844 data_len = 0; 845 for (; ; ) { 846 long count; 847 off_t offset = 0; 848 849 if (data_len == 0) { 850 /* 851 * If no data in the buffer, reset ptr 852 */ 853 ptr = prntbuf; 854 } 855 if (ptr == prntbuf) { 856 /* 857 * The current data chunk starts from prntbuf. 858 * This means either the buffer has no data 859 * or the buffer has no newline. 860 * So, read more data from input. 861 */ 862 count = read(fd, ptr + data_len, prntbuflen - data_len); 863 if (count < 0) { 864 /* read error */ 865 if (cflag) { 866 if (outfn && !rflag) { 867 (void) fprintf(stdout, 868 "%s:", fn); 869 } 870 if (!qflag && !rflag) { 871 (void) fprintf(stdout, "%lld\n", 872 matches); 873 } 874 } 875 return (0); 876 } else if (count == 0) { 877 /* no new data */ 878 if (data_len == 0) { 879 /* end of file already reached */ 880 break; 881 } 882 /* last line of file has no newline */ 883 ptrend = ptr + data_len; 884 newlinep = 0; 885 goto L_start_process; 886 } 887 offset = data_len; 888 data_len += count; 889 } 890 891 /* 892 * Look for newline in the chunk 893 * between ptr + offset and ptr + data_len - offset. 894 */ 895 ptrend = find_nl(ptr + offset, data_len - offset); 896 if (ptrend == NULL) { 897 /* no newline found in this chunk */ 898 if (ptr > prntbuf) { 899 /* 900 * Move remaining data to the beginning 901 * of the buffer. 902 * Remaining data lie from ptr for 903 * data_len bytes. 904 */ 905 (void) memmove(prntbuf, ptr, data_len); 906 } 907 if (data_len == prntbuflen) { 908 /* 909 * No enough room in the buffer 910 */ 911 prntbuflen += BUFSIZE; 912 prntbuf = realloc(prntbuf, prntbuflen + 1); 913 if (prntbuf == NULL) { 914 (void) fprintf(stderr, 915 gettext("%s: out of memory\n"), 916 cmdname); 917 exit(2); 918 } 919 } 920 ptr = prntbuf; 921 /* read the next input */ 922 continue; 923 } 924 L_start_process: 925 926 /* 927 * Beginning of the chunk: ptr 928 * End of the chunk: ptr + data_len 929 * Beginning of the line: ptr 930 * End of the line: ptrend 931 */ 932 933 if (use_bmg) { 934 /* 935 * Use Boyer-Moore-Gosper algorithm to find out if 936 * this chunk (not this line) contains the specified 937 * pattern. If not, restart from the last line 938 * of this chunk. 939 */ 940 char *bline; 941 bline = bmgexec(ptr, ptr + data_len); 942 if (bline == NULL) { 943 /* 944 * No pattern found in this chunk. 945 * Need to find the last line 946 * in this chunk. 947 */ 948 ptrend = rfind_nl(ptr, data_len); 949 950 /* 951 * When this chunk does not contain newline, 952 * ptrend becomes NULL, which should happen 953 * when the last line of file does not end 954 * with a newline. At such a point, 955 * newlinep should have been set to 0. 956 * Therefore, just after jumping to 957 * L_skip_line, the main for-loop quits, 958 * and the line_len value won't be 959 * used. 960 */ 961 line_len = ptrend - ptr; 962 goto L_skip_line; 963 } 964 if (bline > ptrend) { 965 /* 966 * Pattern found not in the first line 967 * of this chunk. 968 * Discard the first line. 969 */ 970 line_len = ptrend - ptr; 971 goto L_skip_line; 972 } 973 /* 974 * Pattern found in the first line of this chunk. 975 * Using this result. 976 */ 977 *ptrend = '\0'; 978 line_len = ptrend - ptr; 979 980 /* 981 * before jumping to L_next_line, 982 * need to handle xflag if specified 983 */ 984 if (xflag && (line_len != bmglen || 985 strcmp(bmgpat, ptr) != 0)) { 986 /* didn't match */ 987 pp = NULL; 988 } else { 989 pp = patterns; /* to make it happen */ 990 } 991 goto L_next_line; 992 } 993 lineno++; 994 /* 995 * Line starts from ptr and ends at ptrend. 996 * line_len will be the length of the line. 997 */ 998 *ptrend = '\0'; 999 line_len = ptrend - ptr; 1000 1001 /* 1002 * From now, the process will be performed based 1003 * on the line from ptr to ptrend. 1004 */ 1005 if (use_wchar) { 1006 size_t len; 1007 1008 if (line_len >= outbuflen) { 1009 outbuflen = line_len + 1; 1010 outline = realloc(outline, 1011 sizeof (wchar_t) * outbuflen); 1012 if (outline == NULL) { 1013 (void) fprintf(stderr, 1014 gettext("%s: out of memory\n"), 1015 cmdname); 1016 exit(2); 1017 } 1018 } 1019 1020 len = mbstowcs(outline, ptr, line_len); 1021 if (len == (size_t)-1) { 1022 (void) fprintf(stderr, gettext( 1023 "%s: input file \"%s\": line %lld: invalid multibyte character\n"), 1024 cmdname, fn, lineno); 1025 /* never match a line with invalid sequence */ 1026 goto L_skip_line; 1027 } 1028 outline[len] = L'\0'; 1029 1030 if (iflag) { 1031 wchar_t *cp; 1032 for (cp = outline; *cp != '\0'; cp++) { 1033 *cp = towlower((wint_t)*cp); 1034 } 1035 } 1036 1037 if (xflag) { 1038 for (pp = patterns; pp; pp = pp->next) { 1039 if (outline[0] == pp->wpattern[0] && 1040 wcscmp(outline, 1041 pp->wpattern) == 0) { 1042 /* matched */ 1043 break; 1044 } 1045 } 1046 } else { 1047 for (pp = patterns; pp; pp = pp->next) { 1048 if (wcswcs(outline, pp->wpattern) 1049 != NULL) { 1050 /* matched */ 1051 break; 1052 } 1053 } 1054 } 1055 } else if (Fflag) { 1056 /* fgrep in byte-oriented handling */ 1057 char *fptr; 1058 if (iflag) { 1059 fptr = istrdup(ptr); 1060 } else { 1061 fptr = ptr; 1062 } 1063 if (xflag) { 1064 /* fgrep -x */ 1065 for (pp = patterns; pp; pp = pp->next) { 1066 if (fptr[0] == pp->pattern[0] && 1067 strcmp(fptr, pp->pattern) == 0) { 1068 /* matched */ 1069 break; 1070 } 1071 } 1072 } else { 1073 for (pp = patterns; pp; pp = pp->next) { 1074 if (strstr(fptr, pp->pattern) != NULL) { 1075 /* matched */ 1076 break; 1077 } 1078 } 1079 } 1080 } else { 1081 /* grep or egrep */ 1082 for (pp = patterns; pp; pp = pp->next) { 1083 int rv; 1084 1085 rv = regexec(&pp->re, ptr, 0, NULL, 0); 1086 if (rv == REG_OK) { 1087 /* matched */ 1088 break; 1089 } 1090 1091 switch (rv) { 1092 case REG_NOMATCH: 1093 break; 1094 case REG_ECHAR: 1095 (void) fprintf(stderr, gettext( 1096 "%s: input file \"%s\": line %lld: invalid multibyte character\n"), 1097 cmdname, fn, lineno); 1098 break; 1099 default: 1100 (void) regerror(rv, &pp->re, errstr, 1101 sizeof (errstr)); 1102 (void) fprintf(stderr, gettext( 1103 "%s: input file \"%s\": line %lld: %s\n"), 1104 cmdname, fn, lineno, errstr); 1105 exit(2); 1106 } 1107 } 1108 } 1109 1110 L_next_line: 1111 /* 1112 * Here, if pp points to non-NULL, something has been matched 1113 * to the pattern. 1114 */ 1115 if (nvflag == (pp != NULL)) { 1116 matches++; 1117 /* 1118 * Handle q, l, and c flags. 1119 */ 1120 if (qflag) { 1121 /* no need to continue */ 1122 /* 1123 * End of this line is ptrend. 1124 * We have read up to ptr + data_len. 1125 */ 1126 off_t pos; 1127 pos = ptr + data_len - (ptrend + 1); 1128 (void) lseek(fd, -pos, SEEK_CUR); 1129 exit(0); 1130 } 1131 if (lflag) { 1132 (void) printf("%s\n", fn); 1133 break; 1134 } 1135 if (!cflag) { 1136 if (Hflag || outfn) { 1137 (void) printf("%s:", fn); 1138 } 1139 if (bflag) { 1140 (void) printf("%lld:", (offset_t) 1141 (line_offset / BSIZE)); 1142 } 1143 if (nflag) { 1144 (void) printf("%lld:", lineno); 1145 } 1146 *ptrend = '\n'; 1147 (void) fwrite(ptr, 1, line_len + 1, stdout); 1148 } 1149 if (ferror(stdout)) { 1150 return (0); 1151 } 1152 } 1153 L_skip_line: 1154 if (!newlinep) 1155 break; 1156 1157 data_len -= line_len + 1; 1158 line_offset += line_len + 1; 1159 ptr = ptrend + 1; 1160 } 1161 1162 if (cflag) { 1163 if (Hflag || outfn) { 1164 (void) printf("%s:", fn); 1165 } 1166 if (!qflag) { 1167 (void) printf("%lld\n", matches); 1168 } 1169 } 1170 return (matches != 0); 1171 } 1172 1173 /* 1174 * usage message for grep 1175 */ 1176 static void 1177 usage(void) 1178 { 1179 if (egrep || fgrep) { 1180 (void) fprintf(stderr, gettext("Usage:\t%s"), cmdname); 1181 (void) fprintf(stderr, 1182 gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvx] " 1183 "pattern_list [file ...]\n")); 1184 1185 (void) fprintf(stderr, "\t%s", cmdname); 1186 (void) fprintf(stderr, 1187 gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvx] " 1188 "[-e pattern_list]... " 1189 "[-f pattern_file]... [file...]\n")); 1190 } else { 1191 (void) fprintf(stderr, gettext("Usage:\t%s"), cmdname); 1192 (void) fprintf(stderr, 1193 gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvwx] " 1194 "pattern_list [file ...]\n")); 1195 1196 (void) fprintf(stderr, "\t%s", cmdname); 1197 (void) fprintf(stderr, 1198 gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvwx] " 1199 "[-e pattern_list]... " 1200 "[-f pattern_file]... [file...]\n")); 1201 1202 (void) fprintf(stderr, "\t%s", cmdname); 1203 (void) fprintf(stderr, 1204 gettext(" -E [-c|-l|-q] [-r|-R] [-bhHinsvx] " 1205 "pattern_list [file ...]\n")); 1206 1207 (void) fprintf(stderr, "\t%s", cmdname); 1208 (void) fprintf(stderr, 1209 gettext(" -E [-c|-l|-q] [-r|-R] [-bhHinsvx] " 1210 "[-e pattern_list]... " 1211 "[-f pattern_file]... [file...]\n")); 1212 1213 (void) fprintf(stderr, "\t%s", cmdname); 1214 (void) fprintf(stderr, 1215 gettext(" -F [-c|-l|-q] [-r|-R] [-bhHinsvx] " 1216 "pattern_list [file ...]\n")); 1217 1218 (void) fprintf(stderr, "\t%s", cmdname); 1219 (void) fprintf(stderr, 1220 gettext(" -F [-c|-l|-q] [-bhHinsvx] [-e pattern_list]... " 1221 "[-f pattern_file]... [file...]\n")); 1222 } 1223 exit(2); 1224 /* NOTREACHED */ 1225 } 1226 1227 /* 1228 * Compile literal pattern into BMG tables 1229 */ 1230 static void 1231 bmgcomp(char *pat, int len) 1232 { 1233 int i; 1234 int tlen; 1235 unsigned char *uc = (unsigned char *)pat; 1236 1237 bmglen = len; 1238 bmgpat = pat; 1239 1240 for (i = 0; i < M_CSETSIZE; i++) { 1241 bmgtab[i] = len; 1242 } 1243 1244 len--; 1245 for (tlen = len, i = 0; i <= len; i++, tlen--) { 1246 bmgtab[*uc++] = tlen; 1247 } 1248 } 1249 1250 /* 1251 * BMG search. 1252 */ 1253 static char * 1254 bmgexec(char *str, char *end) 1255 { 1256 int t; 1257 char *k, *s, *p; 1258 1259 k = str + bmglen - 1; 1260 if (bmglen == 1) { 1261 return (memchr(str, bmgpat[0], end - str)); 1262 } 1263 for (; ; ) { 1264 /* inner loop, should be most optimized */ 1265 while (k < end && (t = bmgtab[(unsigned char)*k]) != 0) { 1266 k += t; 1267 } 1268 if (k >= end) { 1269 return (NULL); 1270 } 1271 for (s = k, p = bmgpat + bmglen - 1; *--s == *--p; ) { 1272 if (p == bmgpat) { 1273 return (s); 1274 } 1275 } 1276 k++; 1277 } 1278 /* NOTREACHED */ 1279 } 1280