1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * grep - pattern matching program - combined grep, egrep, and fgrep. 29 * Based on MKS grep command, with XCU & Solaris mods. 30 */ 31 32 /* 33 * Copyright 1985, 1992 by Mortice Kern Systems Inc. All rights reserved. 34 * 35 */ 36 37 /* Copyright 2017 Nexenta Systems, Inc. All rights reserved. */ 38 39 /* 40 * Copyright 2013 Damian Bogel. All rights reserved. 41 */ 42 43 #include <string.h> 44 #include <stdlib.h> 45 #include <ctype.h> 46 #include <stdarg.h> 47 #include <regex.h> 48 #include <limits.h> 49 #include <sys/types.h> 50 #include <sys/stat.h> 51 #include <fcntl.h> 52 #include <stdio.h> 53 #include <locale.h> 54 #include <wchar.h> 55 #include <errno.h> 56 #include <unistd.h> 57 #include <wctype.h> 58 #include <ftw.h> 59 #include <sys/param.h> 60 61 #define STDIN_FILENAME gettext("(standard input)") 62 63 #define BSIZE 512 /* Size of block for -b */ 64 #define BUFSIZE 8192 /* Input buffer size */ 65 #define MAX_DEPTH 1000 /* how deep to recurse */ 66 67 #define AFTER 1 /* 'After' Context */ 68 #define BEFORE 2 /* 'Before' Context */ 69 #define CONTEXT (AFTER|BEFORE) /* Full Context */ 70 71 #define M_CSETSIZE 256 /* singlebyte chars */ 72 static int bmglen; /* length of BMG pattern */ 73 static char *bmgpat; /* BMG pattern */ 74 static int bmgtab[M_CSETSIZE]; /* BMG delta1 table */ 75 76 typedef struct _PATTERN { 77 char *pattern; /* original pattern */ 78 wchar_t *wpattern; /* wide, lowercased pattern */ 79 struct _PATTERN *next; 80 regex_t re; /* compiled pattern */ 81 } PATTERN; 82 83 static PATTERN *patterns; 84 static char errstr[128]; /* regerror string buffer */ 85 static int regflags = 0; /* regcomp options */ 86 static int matched = 0; /* return of the grep() */ 87 static int errors = 0; /* count of errors */ 88 static uchar_t fgrep = 0; /* Invoked as fgrep */ 89 static uchar_t egrep = 0; /* Invoked as egrep */ 90 static boolean_t nvflag = B_TRUE; /* Print matching lines */ 91 static uchar_t cflag; /* Count of matches */ 92 static uchar_t iflag; /* Case insensitve matching */ 93 static uchar_t Hflag; /* Precede lines by file name */ 94 static uchar_t hflag; /* Supress printing of filename */ 95 static uchar_t lflag; /* Print file names of matches */ 96 static uchar_t nflag; /* Precede lines by line number */ 97 static uchar_t rflag; /* Search directories recursively */ 98 static uchar_t bflag; /* Preccede matches by block number */ 99 static uchar_t sflag; /* Suppress file error messages */ 100 static uchar_t qflag; /* Suppress standard output */ 101 static uchar_t wflag; /* Search for expression as a word */ 102 static uchar_t xflag; /* Anchoring */ 103 static uchar_t Eflag; /* Egrep or -E flag */ 104 static uchar_t Fflag; /* Fgrep or -F flag */ 105 static uchar_t Rflag; /* Like rflag, but follow symlinks */ 106 static uchar_t outfn; /* Put out file name */ 107 static uchar_t conflag; /* show context of matches */ 108 static char *cmdname; 109 110 static int use_wchar, use_bmg, mblocale; 111 112 static size_t outbuflen, prntbuflen, conbuflen; 113 static unsigned long conalen, conblen, conmatches; 114 static char *prntbuf, *conbuf; 115 static wchar_t *outline; 116 117 static void addfile(const char *fn); 118 static void addpattern(char *s); 119 static void fixpatterns(void); 120 static void usage(void); 121 static int grep(int, const char *); 122 static void bmgcomp(char *, int); 123 static char *bmgexec(char *, char *); 124 static int recursive(const char *, const struct stat *, int, struct FTW *); 125 static void process_path(const char *); 126 static void process_file(const char *, int); 127 128 /* 129 * mainline for grep 130 */ 131 int 132 main(int argc, char **argv) 133 { 134 char *ap, *test; 135 int c; 136 int fflag = 0; 137 int i, n_pattern = 0, n_file = 0; 138 char **pattern_list = NULL; 139 char **file_list = NULL; 140 141 (void) setlocale(LC_ALL, ""); 142 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */ 143 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't */ 144 #endif 145 (void) textdomain(TEXT_DOMAIN); 146 147 /* 148 * true if this is running on the multibyte locale 149 */ 150 mblocale = (MB_CUR_MAX > 1); 151 /* 152 * Skip leading slashes 153 */ 154 cmdname = argv[0]; 155 if (ap = strrchr(cmdname, '/')) 156 cmdname = ap + 1; 157 158 ap = cmdname; 159 /* 160 * Detect egrep/fgrep via command name, map to -E and -F options. 161 */ 162 if (*ap == 'e' || *ap == 'E') { 163 regflags |= REG_EXTENDED; 164 egrep++; 165 } else { 166 if (*ap == 'f' || *ap == 'F') { 167 fgrep++; 168 } 169 } 170 171 /* check for non-standard "-line-count" option */ 172 for (i = 1; i < argc; i++) { 173 if (strcmp(argv[i], "--") == 0) 174 break; 175 176 /* isdigit() check prevents negative arguments */ 177 if ((argv[i][0] == '-') && isdigit(argv[i][1])) { 178 if (strlen(&argv[i][1]) != 179 strspn(&argv[i][1], "0123456789")) { 180 (void) fprintf(stderr, gettext( 181 "%s: Bad number flag\n"), argv[0]); 182 usage(); 183 } 184 185 errno = 0; 186 conalen = conblen = strtoul(&argv[i][1], (char **)NULL, 187 10); 188 189 if (errno != 0 || conalen >= ULONG_MAX) { 190 (void) fprintf(stderr, gettext( 191 "%s: Bad context argument\n"), argv[0]); 192 } else if (conalen) 193 conflag = CONTEXT; 194 195 while (i < argc) { 196 argv[i] = argv[i + 1]; 197 i++; 198 } 199 argc--; 200 } 201 } 202 203 while ((c = getopt(argc, argv, "vwchHilnrbse:f:qxEFIRA:B:C:")) != EOF) { 204 unsigned long tval; 205 switch (c) { 206 case 'v': /* POSIX: negate matches */ 207 nvflag = B_FALSE; 208 break; 209 210 case 'c': /* POSIX: write count */ 211 cflag++; 212 break; 213 214 case 'i': /* POSIX: ignore case */ 215 iflag++; 216 regflags |= REG_ICASE; 217 break; 218 219 case 'l': /* POSIX: Write filenames only */ 220 lflag++; 221 break; 222 223 case 'n': /* POSIX: Write line numbers */ 224 nflag++; 225 break; 226 227 case 'r': /* Solaris: search recursively */ 228 rflag++; 229 break; 230 231 case 'b': /* Solaris: Write file block numbers */ 232 bflag++; 233 break; 234 235 case 's': /* POSIX: No error msgs for files */ 236 sflag++; 237 break; 238 239 case 'e': /* POSIX: pattern list */ 240 n_pattern++; 241 pattern_list = realloc(pattern_list, 242 sizeof (char *) * n_pattern); 243 if (pattern_list == NULL) { 244 (void) fprintf(stderr, 245 gettext("%s: out of memory\n"), 246 cmdname); 247 exit(2); 248 } 249 *(pattern_list + n_pattern - 1) = optarg; 250 break; 251 252 case 'f': /* POSIX: pattern file */ 253 fflag = 1; 254 n_file++; 255 file_list = realloc(file_list, 256 sizeof (char *) * n_file); 257 if (file_list == NULL) { 258 (void) fprintf(stderr, 259 gettext("%s: out of memory\n"), 260 cmdname); 261 exit(2); 262 } 263 *(file_list + n_file - 1) = optarg; 264 break; 265 266 /* based on options order h or H is set as in GNU grep */ 267 case 'h': /* Solaris: supress printing of file name */ 268 hflag = 1; 269 Hflag = 0; 270 break; 271 /* Solaris: precede every matching with file name */ 272 case 'H': 273 Hflag = 1; 274 hflag = 0; 275 break; 276 277 case 'q': /* POSIX: quiet: status only */ 278 qflag++; 279 break; 280 281 case 'w': /* Solaris: treat pattern as word */ 282 wflag++; 283 break; 284 285 case 'x': /* POSIX: full line matches */ 286 xflag++; 287 regflags |= REG_ANCHOR; 288 break; 289 290 case 'E': /* POSIX: Extended RE's */ 291 regflags |= REG_EXTENDED; 292 Eflag++; 293 break; 294 295 case 'F': /* POSIX: strings, not RE's */ 296 Fflag++; 297 break; 298 299 case 'R': /* Solaris: like rflag, but follow symlinks */ 300 Rflag++; 301 rflag++; 302 break; 303 304 case 'A': /* print N lines after each match */ 305 errno = 0; 306 conalen = strtoul(optarg, &test, 10); 307 /* *test will be non-null if optarg is negative */ 308 if (errno != 0 || *test != '\0' || 309 conalen >= ULONG_MAX) { 310 (void) fprintf(stderr, gettext( 311 "%s: Bad context argument: %s\n"), 312 argv[0], optarg); 313 exit(2); 314 } 315 if (conalen) 316 conflag |= AFTER; 317 else 318 conflag &= ~AFTER; 319 break; 320 case 'B': /* print N lines before each match */ 321 errno = 0; 322 conblen = strtoul(optarg, &test, 10); 323 /* *test will be non-null if optarg is negative */ 324 if (errno != 0 || *test != '\0' || 325 conblen >= ULONG_MAX) { 326 (void) fprintf(stderr, gettext( 327 "%s: Bad context argument: %s\n"), 328 argv[0], optarg); 329 exit(2); 330 } 331 if (conblen) 332 conflag |= BEFORE; 333 else 334 conflag &= ~BEFORE; 335 break; 336 case 'C': /* print N lines around each match */ 337 errno = 0; 338 tval = strtoul(optarg, &test, 10); 339 /* *test will be non-null if optarg is negative */ 340 if (errno != 0 || *test != '\0' || tval >= ULONG_MAX) { 341 (void) fprintf(stderr, gettext( 342 "%s: Bad context argument: %s\n"), 343 argv[0], optarg); 344 exit(2); 345 } 346 if (tval) { 347 if ((conflag & BEFORE) == 0) 348 conblen = tval; 349 if ((conflag & AFTER) == 0) 350 conalen = tval; 351 conflag = CONTEXT; 352 } 353 break; 354 355 default: 356 usage(); 357 } 358 } 359 /* 360 * If we're invoked as egrep or fgrep we need to do some checks 361 */ 362 363 if (egrep || fgrep) { 364 /* 365 * Use of -E or -F with egrep or fgrep is illegal 366 */ 367 if (Eflag || Fflag) 368 usage(); 369 /* 370 * Don't allow use of wflag with egrep / fgrep 371 */ 372 if (wflag) 373 usage(); 374 /* 375 * For Solaris the -s flag is equivalent to XCU -q 376 */ 377 if (sflag) 378 qflag++; 379 /* 380 * done with above checks - set the appropriate flags 381 */ 382 if (egrep) 383 Eflag++; 384 else /* Else fgrep */ 385 Fflag++; 386 } 387 388 if (wflag && (Eflag || Fflag)) { 389 /* 390 * -w cannot be specified with grep -F 391 */ 392 usage(); 393 } 394 395 /* 396 * -E and -F flags are mutually exclusive - check for this 397 */ 398 if (Eflag && Fflag) 399 usage(); 400 401 /* 402 * -l overrides -H like in GNU grep 403 */ 404 if (lflag) 405 Hflag = 0; 406 407 /* 408 * -c, -l and -q flags are mutually exclusive 409 * We have -c override -l like in Solaris. 410 * -q overrides -l & -c programmatically in grep() function. 411 */ 412 if (cflag && lflag) 413 lflag = 0; 414 415 argv += optind - 1; 416 argc -= optind - 1; 417 418 /* 419 * Now handling -e and -f option 420 */ 421 if (pattern_list) { 422 for (i = 0; i < n_pattern; i++) { 423 addpattern(pattern_list[i]); 424 } 425 free(pattern_list); 426 } 427 if (file_list) { 428 for (i = 0; i < n_file; i++) { 429 addfile(file_list[i]); 430 } 431 free(file_list); 432 } 433 434 /* 435 * No -e or -f? Make sure there is one more arg, use it as the pattern. 436 */ 437 if (patterns == NULL && !fflag) { 438 if (argc < 2) 439 usage(); 440 addpattern(argv[1]); 441 argc--; 442 argv++; 443 } 444 445 /* 446 * If -x flag is not specified or -i flag is specified 447 * with fgrep in a multibyte locale, need to use 448 * the wide character APIs. Otherwise, byte-oriented 449 * process will be done. 450 */ 451 use_wchar = Fflag && mblocale && (!xflag || iflag); 452 453 /* 454 * Compile Patterns and also decide if BMG can be used 455 */ 456 fixpatterns(); 457 458 /* Process all files: stdin, or rest of arg list */ 459 if (argc < 2) { 460 matched = grep(0, STDIN_FILENAME); 461 } else { 462 if (Hflag || (argc > 2 && hflag == 0)) 463 outfn = 1; /* Print filename on match line */ 464 for (argv++; *argv != NULL; argv++) { 465 process_path(*argv); 466 } 467 } 468 /* 469 * Return() here is used instead of exit 470 */ 471 472 (void) fflush(stdout); 473 474 if (errors) 475 return (2); 476 return (matched ? 0 : 1); 477 } 478 479 static void 480 process_path(const char *path) 481 { 482 struct stat st; 483 int walkflags = FTW_CHDIR; 484 char *buf = NULL; 485 486 if (rflag) { 487 if (stat(path, &st) != -1 && 488 (st.st_mode & S_IFMT) == S_IFDIR) { 489 outfn = 1; /* Print filename */ 490 491 /* 492 * Add trailing slash if arg 493 * is directory, to resolve symlinks. 494 */ 495 if (path[strlen(path) - 1] != '/') { 496 (void) asprintf(&buf, "%s/", path); 497 if (buf != NULL) 498 path = buf; 499 } 500 501 /* 502 * Search through subdirs if path is directory. 503 * Don't follow symlinks if Rflag is not set. 504 */ 505 if (!Rflag) 506 walkflags |= FTW_PHYS; 507 508 if (nftw(path, recursive, MAX_DEPTH, walkflags) != 0) { 509 if (!sflag) 510 (void) fprintf(stderr, 511 gettext("%s: can't open \"%s\"\n"), 512 cmdname, path); 513 errors = 1; 514 } 515 return; 516 } 517 } 518 process_file(path, 0); 519 } 520 521 /* 522 * Read and process all files in directory recursively. 523 */ 524 static int 525 recursive(const char *name, const struct stat *statp, int info, struct FTW *ftw) 526 { 527 /* 528 * Process files and follow symlinks if Rflag set. 529 */ 530 if (info != FTW_F) { 531 /* Report broken symlinks and unreadable files */ 532 if (!sflag && 533 (info == FTW_SLN || info == FTW_DNR || info == FTW_NS)) { 534 (void) fprintf(stderr, 535 gettext("%s: can't open \"%s\"\n"), cmdname, name); 536 } 537 return (0); 538 } 539 540 541 /* Skip devices and pipes if Rflag is not set */ 542 if (!Rflag && !S_ISREG(statp->st_mode)) 543 return (0); 544 /* Pass offset to relative name from FTW_CHDIR */ 545 process_file(name, ftw->base); 546 return (0); 547 } 548 549 /* 550 * Opens file and call grep function. 551 */ 552 static void 553 process_file(const char *name, int base) 554 { 555 int fd; 556 557 if ((fd = open(name + base, O_RDONLY)) == -1) { 558 errors = 1; 559 if (!sflag) /* Silent mode */ 560 (void) fprintf(stderr, gettext( 561 "%s: can't open \"%s\"\n"), 562 cmdname, name); 563 return; 564 } 565 matched |= grep(fd, name); 566 (void) close(fd); 567 568 if (ferror(stdout)) { 569 (void) fprintf(stderr, gettext( 570 "%s: error writing to stdout\n"), 571 cmdname); 572 (void) fflush(stdout); 573 exit(2); 574 } 575 576 } 577 578 /* 579 * Add a file of strings to the pattern list. 580 */ 581 static void 582 addfile(const char *fn) 583 { 584 FILE *fp; 585 char *inbuf; 586 char *bufp; 587 size_t bufsiz, buflen, bufused; 588 589 /* 590 * Open the pattern file 591 */ 592 if ((fp = fopen(fn, "r")) == NULL) { 593 (void) fprintf(stderr, gettext("%s: can't open \"%s\"\n"), 594 cmdname, fn); 595 exit(2); 596 } 597 bufsiz = BUFSIZE; 598 if ((inbuf = malloc(bufsiz)) == NULL) { 599 (void) fprintf(stderr, 600 gettext("%s: out of memory\n"), cmdname); 601 exit(2); 602 } 603 bufp = inbuf; 604 bufused = 0; 605 /* 606 * Read in the file, reallocing as we need more memory 607 */ 608 while (fgets(bufp, bufsiz - bufused, fp) != NULL) { 609 buflen = strlen(bufp); 610 bufused += buflen; 611 if (bufused + 1 == bufsiz && bufp[buflen - 1] != '\n') { 612 /* 613 * if this line does not fit to the buffer, 614 * realloc larger buffer 615 */ 616 bufsiz += BUFSIZE; 617 if ((inbuf = realloc(inbuf, bufsiz)) == NULL) { 618 (void) fprintf(stderr, 619 gettext("%s: out of memory\n"), 620 cmdname); 621 exit(2); 622 } 623 bufp = inbuf + bufused; 624 continue; 625 } 626 if (bufp[buflen - 1] == '\n') { 627 bufp[--buflen] = '\0'; 628 } 629 addpattern(inbuf); 630 631 bufp = inbuf; 632 bufused = 0; 633 } 634 free(inbuf); 635 free(prntbuf); 636 free(conbuf); 637 (void) fclose(fp); 638 } 639 640 /* 641 * Add a string to the pattern list. 642 */ 643 static void 644 addpattern(char *s) 645 { 646 PATTERN *pp; 647 char *wordbuf; 648 char *np; 649 650 for (; ; ) { 651 np = strchr(s, '\n'); 652 if (np != NULL) 653 *np = '\0'; 654 if ((pp = malloc(sizeof (PATTERN))) == NULL) { 655 (void) fprintf(stderr, gettext( 656 "%s: out of memory\n"), 657 cmdname); 658 exit(2); 659 } 660 if (wflag) { 661 /* 662 * Solaris wflag support: Add '<' '>' to pattern to 663 * select it as a word. Doesn't make sense with -F 664 * but we're Libertarian. 665 */ 666 size_t slen, wordlen; 667 668 slen = strlen(s); 669 wordlen = slen + 5; /* '\\' '<' s '\\' '>' '\0' */ 670 if ((wordbuf = malloc(wordlen)) == NULL) { 671 (void) fprintf(stderr, 672 gettext("%s: out of memory\n"), 673 cmdname); 674 exit(2); 675 } 676 (void) strcpy(wordbuf, "\\<"); 677 (void) strcpy(wordbuf + 2, s); 678 (void) strcpy(wordbuf + 2 + slen, "\\>"); 679 } else { 680 if ((wordbuf = strdup(s)) == NULL) { 681 (void) fprintf(stderr, 682 gettext("%s: out of memory\n"), 683 cmdname); 684 exit(2); 685 } 686 } 687 pp->pattern = wordbuf; 688 pp->next = patterns; 689 patterns = pp; 690 if (np == NULL) 691 break; 692 s = np + 1; 693 } 694 } 695 696 /* 697 * Fix patterns. 698 * Must do after all arguments read, in case later -i option. 699 */ 700 static void 701 fixpatterns(void) 702 { 703 PATTERN *pp; 704 int rv, fix_pattern, npatterns; 705 706 /* 707 * As REG_ANCHOR flag is not supported in the current Solaris, 708 * need to fix the specified pattern if -x is specified with 709 * grep or egrep 710 */ 711 fix_pattern = !Fflag && xflag; 712 713 for (npatterns = 0, pp = patterns; pp != NULL; pp = pp->next) { 714 npatterns++; 715 if (fix_pattern) { 716 char *cp, *cq; 717 size_t plen, nplen; 718 719 plen = strlen(pp->pattern); 720 /* '^' pattern '$' */ 721 nplen = 1 + plen + 1 + 1; 722 if ((cp = malloc(nplen)) == NULL) { 723 (void) fprintf(stderr, 724 gettext("%s: out of memory\n"), 725 cmdname); 726 exit(2); 727 } 728 cq = cp; 729 *cq++ = '^'; 730 cq = strcpy(cq, pp->pattern) + plen; 731 *cq++ = '$'; 732 *cq = '\0'; 733 free(pp->pattern); 734 pp->pattern = cp; 735 } 736 737 if (Fflag) { 738 if (use_wchar) { 739 /* 740 * Fflag && mblocale && iflag 741 * Fflag && mblocale && !xflag 742 */ 743 size_t n; 744 n = strlen(pp->pattern) + 1; 745 if ((pp->wpattern = 746 malloc(sizeof (wchar_t) * n)) == NULL) { 747 (void) fprintf(stderr, 748 gettext("%s: out of memory\n"), 749 cmdname); 750 exit(2); 751 } 752 if (mbstowcs(pp->wpattern, pp->pattern, n) == 753 (size_t)-1) { 754 (void) fprintf(stderr, 755 gettext("%s: failed to convert " 756 "\"%s\" to wide-characters\n"), 757 cmdname, pp->pattern); 758 exit(2); 759 } 760 if (iflag) { 761 wchar_t *wp; 762 for (wp = pp->wpattern; *wp != L'\0'; 763 wp++) { 764 *wp = towlower((wint_t)*wp); 765 } 766 } 767 free(pp->pattern); 768 } else { 769 /* 770 * Fflag && mblocale && !iflag 771 * Fflag && !mblocale && iflag 772 * Fflag && !mblocale && !iflag 773 */ 774 if (iflag) { 775 unsigned char *cp; 776 for (cp = (unsigned char *)pp->pattern; 777 *cp != '\0'; cp++) { 778 *cp = tolower(*cp); 779 } 780 } 781 } 782 /* 783 * fgrep: No regular expressions. 784 */ 785 continue; 786 } 787 788 /* 789 * For non-fgrep, compile the regular expression, 790 * give an informative error message, and exit if 791 * it didn't compile. 792 */ 793 if ((rv = regcomp(&pp->re, pp->pattern, regflags)) != 0) { 794 (void) regerror(rv, &pp->re, errstr, sizeof (errstr)); 795 (void) fprintf(stderr, 796 gettext("%s: RE error in %s: %s\n"), 797 cmdname, pp->pattern, errstr); 798 exit(2); 799 } 800 free(pp->pattern); 801 } 802 803 /* 804 * Decide if we are able to run the Boyer-Moore-Gosper algorithm. 805 * Use the Boyer-Moore-Gosper algorithm if: 806 * - fgrep (Fflag) 807 * - singlebyte locale (!mblocale) 808 * - no ignoring case (!iflag) 809 * - no printing line numbers (!nflag) 810 * - no negating the output (nvflag) 811 * - only one pattern (npatterns == 1) 812 * - non zero length pattern (strlen(patterns->pattern) != 0) 813 * - no context required (conflag == 0) 814 * 815 * It's guaranteed patterns->pattern is still alive 816 * when Fflag && !mblocale. 817 */ 818 use_bmg = Fflag && !mblocale && !iflag && !nflag && nvflag && 819 (npatterns == 1) && (strlen(patterns->pattern) != 0) && 820 conflag == 0; 821 } 822 823 /* 824 * Search a newline from the beginning of the string 825 */ 826 static char * 827 find_nl(const char *ptr, size_t len) 828 { 829 while (len-- != 0) { 830 if (*ptr++ == '\n') { 831 return ((char *)--ptr); 832 } 833 } 834 return (NULL); 835 } 836 837 /* 838 * Search a newline from the end of the string 839 */ 840 static char * 841 rfind_nl(const char *ptr, size_t len) 842 { 843 const char *uptr = ptr + len; 844 while (len--) { 845 if (*--uptr == '\n') { 846 return ((char *)uptr); 847 } 848 } 849 return (NULL); 850 } 851 852 /* 853 * Duplicate the specified string converting each character 854 * into a lower case. 855 */ 856 static char * 857 istrdup(const char *s1) 858 { 859 static size_t ibuflen = 0; 860 static char *ibuf = NULL; 861 size_t slen; 862 char *p; 863 864 slen = strlen(s1); 865 if (slen >= ibuflen) { 866 /* ibuf does not fit to s1 */ 867 ibuflen = slen + 1; 868 ibuf = realloc(ibuf, ibuflen); 869 if (ibuf == NULL) { 870 (void) fprintf(stderr, 871 gettext("%s: out of memory\n"), cmdname); 872 exit(2); 873 } 874 } 875 p = ibuf; 876 do { 877 *p++ = tolower(*s1); 878 } while (*s1++ != '\0'); 879 return (ibuf); 880 } 881 882 /* 883 * Do grep on a single file. 884 * Return true in any lines matched. 885 * 886 * We have two strategies: 887 * The fast one is used when we have a single pattern with 888 * a string known to occur in the pattern. We can then 889 * do a BMG match on the whole buffer. 890 * This is an order of magnitude faster. 891 * Otherwise we split the buffer into lines, 892 * and check for a match on each line. 893 */ 894 static int 895 grep(int fd, const char *fn) 896 { 897 PATTERN *pp; 898 off_t data_len; /* length of the data chunk */ 899 off_t line_len; /* length of the current line */ 900 off_t line_offset; /* current line's offset from the beginning */ 901 off_t blkoffset; /* line_offset but context-compatible */ 902 long long lineno, linenum; 903 long long matches = 0; /* Number of matching lines */ 904 long long conacnt = 0, conbcnt = 0; /* context line count */ 905 int newlinep; /* 0 if the last line of file has no newline */ 906 char *ptr, *ptrend, *prntptr, *prntptrend; 907 char *nextptr = NULL, *nextend = NULL; 908 char *conptr = NULL, *conptrend = NULL; 909 char *matchptr = NULL; 910 int conaprnt = 0, conbprnt = 0, lastmatch = 0; 911 boolean_t nearmatch; /* w/in N+1 of last match */ 912 boolean_t havematch = B_FALSE; /* have a match in context */ 913 size_t prntlen; 914 915 if (patterns == NULL) 916 return (0); /* no patterns to match -- just return */ 917 918 pp = patterns; 919 920 if (use_bmg) { 921 bmgcomp(pp->pattern, strlen(pp->pattern)); 922 } 923 924 if (use_wchar && outline == NULL) { 925 outbuflen = BUFSIZE + 1; 926 outline = malloc(sizeof (wchar_t) * outbuflen); 927 if (outline == NULL) { 928 (void) fprintf(stderr, gettext("%s: out of memory\n"), 929 cmdname); 930 exit(2); 931 } 932 } 933 934 if (prntbuf == NULL) { 935 prntbuflen = BUFSIZE; 936 if ((prntbuf = malloc(prntbuflen + 1)) == NULL) { 937 (void) fprintf(stderr, gettext("%s: out of memory\n"), 938 cmdname); 939 exit(2); 940 } 941 } 942 943 if (conflag != 0 && (conbuf == NULL)) { 944 conbuflen = BUFSIZE; 945 if ((conbuf = malloc(BUFSIZE+1)) == NULL) { 946 (void) fprintf(stderr, gettext("%s: out of memory\n"), 947 cmdname); 948 exit(2); 949 } 950 } 951 952 nearmatch = (conmatches != 0); 953 blkoffset = line_offset = 0; 954 lineno = 0; 955 linenum = 1; 956 newlinep = 1; 957 data_len = 0; 958 for (; ; ) { 959 long count; 960 off_t offset = 0; 961 char separate; 962 boolean_t last_ctx = B_FALSE, eof = B_FALSE; 963 964 if (data_len == 0) { 965 /* 966 * If no data in the buffer, reset ptr 967 */ 968 ptr = prntbuf; 969 if (conflag != 0 && conptr == NULL) { 970 conptr = conbuf; 971 conptrend = conptr - 1; 972 } 973 } 974 if (ptr == prntbuf) { 975 /* 976 * The current data chunk starts from prntbuf. 977 * This means either the buffer has no data 978 * or the buffer has no newline. 979 * So, read more data from input. 980 */ 981 count = read(fd, ptr + data_len, prntbuflen - data_len); 982 if (count < 0) { 983 /* read error */ 984 if (cflag) { 985 if (outfn && !rflag) { 986 (void) fprintf(stdout, 987 "%s:", fn); 988 } 989 if (!qflag && !rflag) { 990 (void) fprintf(stdout, "%lld\n", 991 matches); 992 } 993 } 994 return (0); 995 } else if (count == 0) { 996 /* no new data */ 997 eof = B_TRUE; 998 999 if (data_len == 0) { 1000 /* end of file already reached */ 1001 if (conflag != 0) { 1002 if (conptrend >= conptr) 1003 *conptrend = '\n'; 1004 last_ctx = B_TRUE; 1005 goto L_next_line; 1006 } else { 1007 goto out; 1008 } 1009 } 1010 /* last line of file has no newline */ 1011 ptrend = ptr + data_len; 1012 newlinep = 0; 1013 goto L_start_process; 1014 } 1015 offset = data_len; 1016 data_len += count; 1017 } 1018 1019 /* 1020 * Look for newline in the chunk 1021 * between ptr + offset and ptr + data_len - offset. 1022 */ 1023 ptrend = find_nl(ptr + offset, data_len - offset); 1024 if (ptrend == NULL) { 1025 /* no newline found in this chunk */ 1026 if (ptr > prntbuf) { 1027 /* 1028 * Move remaining data to the beginning 1029 * of the buffer. 1030 * Remaining data lie from ptr for 1031 * data_len bytes. 1032 */ 1033 (void) memmove(prntbuf, ptr, data_len); 1034 } 1035 if (data_len == prntbuflen) { 1036 /* 1037 * Not enough room in the buffer 1038 */ 1039 if (prntbuflen > SIZE_MAX - BUFSIZE) { 1040 (void) fprintf(stderr, 1041 gettext("%s: buflen would" 1042 " overflow\n"), 1043 cmdname); 1044 exit(2); 1045 } 1046 1047 prntbuflen += BUFSIZE; 1048 prntbuf = realloc(prntbuf, prntbuflen + 1); 1049 if (prntbuf == NULL) { 1050 (void) fprintf(stderr, 1051 gettext("%s: out of memory\n"), 1052 cmdname); 1053 exit(2); 1054 } 1055 } 1056 ptr = prntbuf; 1057 /* read the next input */ 1058 continue; 1059 } 1060 L_start_process: 1061 1062 /* 1063 * Beginning of the chunk: ptr 1064 * End of the chunk: ptr + data_len 1065 * Beginning of the line: ptr 1066 * End of the line: ptrend 1067 * 1068 * conptr: Beginning of the context. 1069 * conptrend: If context is empty, conptr - 1 (invalid memory). 1070 * Otherwise, Last newline in the context. 1071 */ 1072 1073 if (use_bmg) { 1074 /* 1075 * Use Boyer-Moore-Gosper algorithm to find out if 1076 * this chunk (not this line) contains the specified 1077 * pattern. If not, restart from the last line 1078 * of this chunk. 1079 */ 1080 char *bline; 1081 bline = bmgexec(ptr, ptr + data_len); 1082 if (bline == NULL) { 1083 /* 1084 * No pattern found in this chunk. 1085 * Need to find the last line 1086 * in this chunk. 1087 */ 1088 ptrend = rfind_nl(ptr, data_len); 1089 1090 /* 1091 * When this chunk does not contain newline, 1092 * ptrend becomes NULL, which should happen 1093 * when the last line of file does not end 1094 * with a newline. At such a point, 1095 * newlinep should have been set to 0. 1096 * Therefore, just after jumping to 1097 * L_skip_line, the main for-loop quits, 1098 * and the line_len value won't be 1099 * used. 1100 */ 1101 line_len = ptrend - ptr; 1102 goto L_skip_line; 1103 } 1104 if (bline > ptrend) { 1105 /* 1106 * Pattern found not in the first line 1107 * of this chunk. 1108 * Discard the first line. 1109 */ 1110 line_len = ptrend - ptr; 1111 goto L_skip_line; 1112 } 1113 /* 1114 * Pattern found in the first line of this chunk. 1115 * Using this result. 1116 */ 1117 *ptrend = '\0'; 1118 line_len = ptrend - ptr; 1119 1120 /* 1121 * before jumping to L_next_line, 1122 * need to handle xflag if specified 1123 */ 1124 if (xflag && (line_len != bmglen || 1125 strcmp(bmgpat, ptr) != 0)) { 1126 /* didn't match */ 1127 pp = NULL; 1128 } else { 1129 pp = patterns; /* to make it happen */ 1130 } 1131 goto L_next_line; 1132 } 1133 lineno++; 1134 /* 1135 * Line starts from ptr and ends at ptrend. 1136 * line_len will be the length of the line. 1137 */ 1138 *ptrend = '\0'; 1139 line_len = ptrend - ptr; 1140 1141 /* 1142 * From now, the process will be performed based 1143 * on the line from ptr to ptrend. 1144 */ 1145 if (use_wchar) { 1146 size_t len; 1147 1148 if (line_len >= outbuflen) { 1149 outbuflen = line_len + 1; 1150 outline = realloc(outline, 1151 sizeof (wchar_t) * outbuflen); 1152 if (outline == NULL) { 1153 (void) fprintf(stderr, 1154 gettext("%s: out of memory\n"), 1155 cmdname); 1156 exit(2); 1157 } 1158 } 1159 1160 len = mbstowcs(outline, ptr, line_len); 1161 if (len == (size_t)-1) { 1162 (void) fprintf(stderr, gettext( 1163 "%s: input file \"%s\": line %lld: invalid multibyte character\n"), 1164 cmdname, fn, lineno); 1165 /* never match a line with invalid sequence */ 1166 goto L_skip_line; 1167 } 1168 outline[len] = L'\0'; 1169 1170 if (iflag) { 1171 wchar_t *cp; 1172 for (cp = outline; *cp != '\0'; cp++) { 1173 *cp = towlower((wint_t)*cp); 1174 } 1175 } 1176 1177 if (xflag) { 1178 for (pp = patterns; pp; pp = pp->next) { 1179 if (outline[0] == pp->wpattern[0] && 1180 wcscmp(outline, 1181 pp->wpattern) == 0) { 1182 /* matched */ 1183 break; 1184 } 1185 } 1186 } else { 1187 for (pp = patterns; pp; pp = pp->next) { 1188 if (wcswcs(outline, pp->wpattern) 1189 != NULL) { 1190 /* matched */ 1191 break; 1192 } 1193 } 1194 } 1195 } else if (Fflag) { 1196 /* fgrep in byte-oriented handling */ 1197 char *fptr; 1198 if (iflag) { 1199 fptr = istrdup(ptr); 1200 } else { 1201 fptr = ptr; 1202 } 1203 if (xflag) { 1204 /* fgrep -x */ 1205 for (pp = patterns; pp; pp = pp->next) { 1206 if (fptr[0] == pp->pattern[0] && 1207 strcmp(fptr, pp->pattern) == 0) { 1208 /* matched */ 1209 break; 1210 } 1211 } 1212 } else { 1213 for (pp = patterns; pp; pp = pp->next) { 1214 if (strstr(fptr, pp->pattern) != NULL) { 1215 /* matched */ 1216 break; 1217 } 1218 } 1219 } 1220 } else { 1221 /* grep or egrep */ 1222 for (pp = patterns; pp; pp = pp->next) { 1223 int rv; 1224 1225 rv = regexec(&pp->re, ptr, 0, NULL, 0); 1226 if (rv == REG_OK) { 1227 /* matched */ 1228 break; 1229 } 1230 1231 switch (rv) { 1232 case REG_NOMATCH: 1233 break; 1234 case REG_ECHAR: 1235 (void) fprintf(stderr, gettext( 1236 "%s: input file \"%s\": line %lld: invalid multibyte character\n"), 1237 cmdname, fn, lineno); 1238 break; 1239 default: 1240 (void) regerror(rv, &pp->re, errstr, 1241 sizeof (errstr)); 1242 (void) fprintf(stderr, gettext( 1243 "%s: input file \"%s\": line %lld: %s\n"), 1244 cmdname, fn, lineno, errstr); 1245 exit(2); 1246 } 1247 } 1248 } 1249 1250 /* 1251 * Context is set up as follows: 1252 * For a 'Before' context, we maintain a set of pointers 1253 * containing 'N' lines of context. If the current number of 1254 * lines contained is greater than N, and N isn't a match, the 1255 * start pointer is moved forward to the next newline. 1256 * 1257 * If we ever find a match, we print out immediately. 1258 * 'nearmatch' tells us if we're within N+1 lines of the last 1259 * match ; if we are, and we find another match, we don't 1260 * separate the matches. 'nearmatch' becomes false when 1261 * a line gets rotated out of the context. 1262 * 1263 * For an 'After' context, we simply wait until we've found a 1264 * match, then create a context N+1 lines big. If we don't find 1265 * a match within the context, we print out the current context. 1266 * Otherwise, we save a reference to the new matching line, 1267 * print out the other context, and reset our context pointers 1268 * to the new matching line. 1269 * 1270 * 'nearmatch' becomes false when we find a non-matching line 1271 * that isn't a part of any context. 1272 * 1273 * A full-context is implemented as a combination of the 1274 * 'Before' and 'After' context logic. Before we find a match, 1275 * we follow the Before logic. When we find a match, we 1276 * follow the After logic. 'nearmatch' is handled by the Before 1277 * logic. 1278 */ 1279 1280 if (conflag == 0) 1281 goto L_next_line; 1282 1283 /* Do we have room to add this line to the context buffer? */ 1284 if ((line_len + 1) > (conbuflen - 1285 (conptrend >= conptr) ? conptrend - conbuf : 0)) { 1286 char *oldconbuf = conbuf; 1287 char *oldconptr = conptr; 1288 long tmp = matchptr - conptr; 1289 1290 if (conbuflen > SIZE_MAX - BUFSIZE) { 1291 (void) fprintf(stderr, 1292 gettext("%s: buflen would overflow\n"), 1293 cmdname); 1294 exit(2); 1295 } 1296 1297 conbuflen += BUFSIZE; 1298 conbuf = realloc(conbuf, conbuflen + 1); 1299 if (conbuf == NULL) { 1300 (void) fprintf(stderr, 1301 gettext("%s: out of memory\n"), 1302 cmdname); 1303 exit(2); 1304 } 1305 1306 conptr = conbuf + (conptr - oldconbuf); 1307 conptrend = conptr + (conptrend - oldconptr); 1308 if (matchptr) 1309 matchptr = conptr + tmp; 1310 } 1311 (void) memcpy(conptrend + 1, ptr, line_len); 1312 conptrend += line_len + 1; 1313 *conptrend = '\n'; 1314 1315 if (nvflag == (pp != NULL)) { 1316 /* matched */ 1317 if (havematch) { 1318 if ((conflag & AFTER) != 0) { 1319 conaprnt = 1; 1320 nextend = conptrend; 1321 conptrend = conptr + lastmatch; 1322 nextptr = conptrend + 1; 1323 *nextend = '\n'; 1324 } 1325 } else { 1326 if (conflag == AFTER) { 1327 conptr = conptrend - (line_len); 1328 linenum = lineno; 1329 } 1330 blkoffset = line_offset - 1331 (conptrend - conptr - line_len); 1332 } 1333 1334 if (conflag == BEFORE) 1335 conbprnt = 1; 1336 1337 lastmatch = conptrend - conptr; 1338 havematch = B_TRUE; 1339 goto L_next_line; 1340 } 1341 1342 if (!havematch) { 1343 if ((conflag & BEFORE) != 0) { 1344 if (conbcnt >= conblen) { 1345 char *tmp = conptr; 1346 conptr = find_nl(conptr, 1347 conptrend - conptr) + 1; 1348 if (bflag) 1349 blkoffset += conptr - tmp; 1350 linenum++; 1351 nearmatch = B_TRUE; 1352 } else { 1353 conbcnt++; 1354 } 1355 } 1356 if (conflag == AFTER) 1357 nearmatch = B_TRUE; 1358 } else { 1359 if (++conacnt >= conalen && !conaprnt && conalen) 1360 conaprnt = 1; 1361 else 1362 lastmatch = conptrend - conptr; 1363 } 1364 1365 L_next_line: 1366 /* 1367 * Here, if pp points to non-NULL, something has been matched 1368 * to the pattern. 1369 */ 1370 if (!last_ctx && nvflag == (pp != NULL)) { 1371 matches++; 1372 if (!nextend) 1373 matchptr = (conflag != 0) ? conptrend : ptrend; 1374 } 1375 1376 /* 1377 * Set up some print context so that we can treat 1378 * single-line matches as a zero-N context. 1379 * Apply CLI flags to each line of the context. 1380 * 1381 * For context, we only print if we both have a match and are 1382 * either at the end of the data stream, or we've previously 1383 * declared that we want to print for a particular context. 1384 */ 1385 if (havematch && (eof || conaprnt || conbprnt)) { 1386 1387 /* 1388 * We'd normally do this earlier, but we had to 1389 * escape early because we reached the end of the data. 1390 */ 1391 if (eof && nextptr) 1392 conptrend = nextend; 1393 1394 prntlen = conptrend - conptr + 1; 1395 prntptr = conptr; 1396 if (conmatches++ && nearmatch && !cflag) 1397 (void) fwrite("--\n", 1, 3, stdout); 1398 } else if (conflag == 0 && nvflag == (pp != NULL)) { 1399 *ptrend = '\n'; 1400 prntlen = line_len + 1; 1401 prntptr = ptr; 1402 linenum = lineno; 1403 blkoffset = line_offset; 1404 } else if (eof) { 1405 /* No match and no more data */ 1406 goto out; 1407 } else { 1408 /* No match, or we're not done building context */ 1409 goto L_skip_line; 1410 } 1411 1412 prntptrend = prntptr - 1; 1413 while ((prntptrend = find_nl(prntptrend + 1, 1414 prntlen)) != NULL) { 1415 1416 /* 1417 * GNU grep uses '-' for context lines and ':' for 1418 * matching lines, so replicate that here. 1419 */ 1420 if (prntptrend == matchptr) { 1421 if (eof && nextptr) { 1422 matchptr = nextend; 1423 nextptr = NULL; 1424 } else { 1425 matchptr = NULL; 1426 } 1427 separate = ':'; 1428 } else { 1429 separate = '-'; 1430 } 1431 1432 /* 1433 * Handle q, l, and c flags. 1434 */ 1435 if (qflag) { 1436 /* no need to continue */ 1437 /* 1438 * End of this line is ptrend. 1439 * We have read up to ptr + data_len. 1440 */ 1441 off_t pos; 1442 pos = ptr + data_len - (ptrend + 1); 1443 (void) lseek(fd, -pos, SEEK_CUR); 1444 exit(0); 1445 } 1446 if (lflag) { 1447 (void) printf("%s\n", fn); 1448 goto out; 1449 } 1450 if (!cflag) { 1451 if (Hflag || outfn) { 1452 (void) printf("%s%c", fn, separate); 1453 } 1454 if (bflag) { 1455 (void) printf("%lld%c", (offset_t) 1456 (blkoffset / BSIZE), separate); 1457 } 1458 if (nflag) { 1459 (void) printf("%lld%c", linenum, 1460 separate); 1461 } 1462 (void) fwrite(prntptr, 1, 1463 prntptrend - prntptr + 1, stdout); 1464 } 1465 if (ferror(stdout)) { 1466 return (0); 1467 } 1468 linenum++; 1469 prntlen -= prntptrend - prntptr + 1; 1470 blkoffset += prntptrend - prntptr + 1; 1471 prntptr = prntptrend + 1; 1472 } 1473 1474 if (eof) 1475 goto out; 1476 1477 /* 1478 * Update context buffer and variables post-print 1479 */ 1480 if (conflag != 0) { 1481 conptr = conbuf; 1482 conaprnt = conbprnt = 0; 1483 nearmatch = B_FALSE; 1484 conacnt = conbcnt = 0; 1485 1486 if (nextptr) { 1487 (void) memmove(conbuf, nextptr, 1488 nextend - nextptr + 1); 1489 blkoffset += nextptr - conptrend - 1; 1490 conptrend = conptr + (nextend - nextptr); 1491 matchptr = conptrend; 1492 linenum = lineno; 1493 lastmatch = conptrend - conptr; 1494 havematch = B_TRUE; 1495 } else { 1496 conptrend = conptr - 1; 1497 conacnt = 0; 1498 lastmatch = 0; 1499 havematch = B_FALSE; 1500 } 1501 nextptr = nextend = NULL; 1502 } 1503 1504 L_skip_line: 1505 if (!newlinep) 1506 break; 1507 1508 data_len -= line_len + 1; 1509 line_offset += line_len + 1; 1510 ptr = ptrend + 1; 1511 } 1512 1513 out: 1514 if (cflag) { 1515 if (Hflag || outfn) { 1516 (void) printf("%s:", fn); 1517 } 1518 if (!qflag) { 1519 (void) printf("%lld\n", matches); 1520 } 1521 } 1522 return (matches != 0); 1523 } 1524 1525 /* 1526 * usage message for grep 1527 */ 1528 static void 1529 usage(void) 1530 { 1531 if (egrep || fgrep) { 1532 (void) fprintf(stderr, gettext("Usage:\t%s"), cmdname); 1533 (void) fprintf(stderr, 1534 gettext(" [-c|-l|-q] [-r|-R] " 1535 "[-A num] [-B num] [-C num|-num] " 1536 "[-bhHinsvx] pattern_list [file ...]\n")); 1537 1538 (void) fprintf(stderr, "\t%s", cmdname); 1539 (void) fprintf(stderr, 1540 gettext(" [-c|-l|-q] [-r|-R] " 1541 "[-A num] [-B num] [-C num|-num] " 1542 "[-bhHinsvx] [-e pattern_list]... " 1543 "[-f pattern_file]... [file...]\n")); 1544 } else { 1545 (void) fprintf(stderr, gettext("Usage:\t%s"), cmdname); 1546 (void) fprintf(stderr, 1547 gettext(" [-c|-l|-q] [-r|-R] " 1548 "[-A num] [-B num] [-C num|-num] " 1549 "[-bhHinsvx] pattern_list [file ...]\n")); 1550 1551 (void) fprintf(stderr, "\t%s", cmdname); 1552 (void) fprintf(stderr, 1553 gettext(" [-c|-l|-q] [-r|-R] " 1554 "[-A num] [-B num] [-C num|-num] " 1555 "[-bhHinsvx] [-e pattern_list]... " 1556 "[-f pattern_file]... [file...]\n")); 1557 1558 (void) fprintf(stderr, "\t%s", cmdname); 1559 (void) fprintf(stderr, 1560 gettext(" -E [-c|-l|-q] [-r|-R] " 1561 "[-A num] [-B num] [-C num|-num] " 1562 "[-bhHinsvx] pattern_list [file ...]\n")); 1563 1564 (void) fprintf(stderr, "\t%s", cmdname); 1565 (void) fprintf(stderr, 1566 gettext(" -E [-c|-l|-q] [-r|-R] " 1567 "[-A num] [-B num] [-C num|-num] " 1568 "[-bhHinsvx] [-e pattern_list]... " 1569 "[-f pattern_file]... [file...]\n")); 1570 1571 (void) fprintf(stderr, "\t%s", cmdname); 1572 (void) fprintf(stderr, 1573 gettext(" -F [-c|-l|-q] [-r|-R] " 1574 "[-A num] [-B num] [-C num|-num] " 1575 "[-bhHinsvx] pattern_list [file ...]\n")); 1576 1577 (void) fprintf(stderr, "\t%s", cmdname); 1578 (void) fprintf(stderr, 1579 gettext(" -F [-c|-l|-q] " 1580 "[-A num] [-B num] [-C num|-num] " 1581 "[-bhHinsvx] [-e pattern_list]... " 1582 "[-f pattern_file]... [file...]\n")); 1583 } 1584 exit(2); 1585 /* NOTREACHED */ 1586 } 1587 1588 /* 1589 * Compile literal pattern into BMG tables 1590 */ 1591 static void 1592 bmgcomp(char *pat, int len) 1593 { 1594 int i; 1595 int tlen; 1596 unsigned char *uc = (unsigned char *)pat; 1597 1598 bmglen = len; 1599 bmgpat = pat; 1600 1601 for (i = 0; i < M_CSETSIZE; i++) { 1602 bmgtab[i] = len; 1603 } 1604 1605 len--; 1606 for (tlen = len, i = 0; i <= len; i++, tlen--) { 1607 bmgtab[*uc++] = tlen; 1608 } 1609 } 1610 1611 /* 1612 * BMG search. 1613 */ 1614 static char * 1615 bmgexec(char *str, char *end) 1616 { 1617 int t; 1618 char *k, *s, *p; 1619 1620 k = str + bmglen - 1; 1621 if (bmglen == 1) { 1622 return (memchr(str, bmgpat[0], end - str)); 1623 } 1624 for (; ; ) { 1625 /* inner loop, should be most optimized */ 1626 while (k < end && (t = bmgtab[(unsigned char)*k]) != 0) { 1627 k += t; 1628 } 1629 if (k >= end) { 1630 return (NULL); 1631 } 1632 for (s = k, p = bmgpat + bmglen - 1; *--s == *--p; ) { 1633 if (p == bmgpat) { 1634 return (s); 1635 } 1636 } 1637 k++; 1638 } 1639 /* NOTREACHED */ 1640 } 1641