1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * grep - pattern matching program - combined grep, egrep, and fgrep. 31 * Based on MKS grep command, with XCU & Solaris mods. 32 */ 33 34 /* 35 * Copyright 1985, 1992 by Mortice Kern Systems Inc. All rights reserved. 36 * 37 */ 38 39 #include <string.h> 40 #include <stdlib.h> 41 #include <ctype.h> 42 #include <stdarg.h> 43 #include <regex.h> 44 #include <limits.h> 45 #include <sys/types.h> 46 #include <sys/stat.h> 47 #include <fcntl.h> 48 #include <stdio.h> 49 #include <locale.h> 50 #include <wchar.h> 51 #include <errno.h> 52 #include <unistd.h> 53 #include <wctype.h> 54 55 #define BSIZE 512 /* Size of block for -b */ 56 #define BUFSIZE 8192 /* Input buffer size */ 57 58 #define M_CSETSIZE 256 /* singlebyte chars */ 59 static int bmglen; /* length of BMG pattern */ 60 static char *bmgpat; /* BMG pattern */ 61 static int bmgtab[M_CSETSIZE]; /* BMG delta1 table */ 62 63 typedef struct _PATTERN { 64 char *pattern; /* original pattern */ 65 wchar_t *wpattern; /* wide, lowercased pattern */ 66 struct _PATTERN *next; 67 regex_t re; /* compiled pattern */ 68 } PATTERN; 69 70 static PATTERN *patterns; 71 static char errstr[128]; /* regerror string buffer */ 72 static int regflags = 0; /* regcomp options */ 73 static uchar_t fgrep = 0; /* Invoked as fgrep */ 74 static uchar_t egrep = 0; /* Invoked as egrep */ 75 static uchar_t nvflag = 1; /* Print matching lines */ 76 static uchar_t cflag; /* Count of matches */ 77 static uchar_t iflag; /* Case insensitve matching */ 78 static uchar_t hflag; /* Supress printing of filename */ 79 static uchar_t lflag; /* Print file names of matches */ 80 static uchar_t nflag; /* Precede lines by line number */ 81 static uchar_t bflag; /* Preccede matches by block number */ 82 static uchar_t sflag; /* Suppress file error messages */ 83 static uchar_t qflag; /* Suppress standard output */ 84 static uchar_t wflag; /* Search for expression as a word */ 85 static uchar_t xflag; /* Anchoring */ 86 static uchar_t Eflag; /* Egrep or -E flag */ 87 static uchar_t Fflag; /* Fgrep or -F flag */ 88 static uchar_t outfn; /* Put out file name */ 89 static char *cmdname; 90 91 static int use_wchar, use_bmg, mblocale; 92 93 static size_t outbuflen, prntbuflen; 94 static char *prntbuf; 95 static wchar_t *outline; 96 97 static void addfile(char *fn); 98 static void addpattern(char *s); 99 static void fixpatterns(void); 100 static void usage(void); 101 static int grep(int, char *); 102 static void bmgcomp(char *, int); 103 static char *bmgexec(char *, char *); 104 105 /* 106 * mainline for grep 107 */ 108 int 109 main(int argc, char **argv) 110 { 111 char *ap; 112 int matched = 0; 113 int c; 114 int fflag = 0; 115 int errors = 0; 116 int i, n_pattern = 0, n_file = 0; 117 char **pattern_list = NULL; 118 char **file_list = NULL; 119 120 (void) setlocale(LC_ALL, ""); 121 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */ 122 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't */ 123 #endif 124 (void) textdomain(TEXT_DOMAIN); 125 126 /* 127 * true if this is running on the multibyte locale 128 */ 129 mblocale = (MB_CUR_MAX > 1); 130 /* 131 * Skip leading slashes 132 */ 133 cmdname = argv[0]; 134 if (ap = strrchr(cmdname, '/')) 135 cmdname = ap + 1; 136 137 ap = cmdname; 138 /* 139 * Detect egrep/fgrep via command name, map to -E and -F options. 140 */ 141 if (*ap == 'e' || *ap == 'E') { 142 regflags |= REG_EXTENDED; 143 egrep++; 144 } else { 145 if (*ap == 'f' || *ap == 'F') { 146 fgrep++; 147 } 148 } 149 150 while ((c = getopt(argc, argv, "vwchilnbse:f:qxEFI")) != EOF) { 151 switch (c) { 152 case 'v': /* POSIX: negate matches */ 153 nvflag = 0; 154 break; 155 156 case 'c': /* POSIX: write count */ 157 cflag++; 158 break; 159 160 case 'i': /* POSIX: ignore case */ 161 iflag++; 162 regflags |= REG_ICASE; 163 break; 164 165 case 'l': /* POSIX: Write filenames only */ 166 lflag++; 167 break; 168 169 case 'n': /* POSIX: Write line numbers */ 170 nflag++; 171 break; 172 173 case 'b': /* Solaris: Write file block numbers */ 174 bflag++; 175 break; 176 177 case 's': /* POSIX: No error msgs for files */ 178 sflag++; 179 break; 180 181 case 'e': /* POSIX: pattern list */ 182 n_pattern++; 183 pattern_list = realloc(pattern_list, 184 sizeof (char *) * n_pattern); 185 if (pattern_list == NULL) { 186 (void) fprintf(stderr, 187 gettext("%s: out of memory\n"), 188 cmdname); 189 exit(2); 190 } 191 *(pattern_list + n_pattern - 1) = optarg; 192 break; 193 194 case 'f': /* POSIX: pattern file */ 195 fflag = 1; 196 n_file++; 197 file_list = realloc(file_list, 198 sizeof (char *) * n_file); 199 if (file_list == NULL) { 200 (void) fprintf(stderr, 201 gettext("%s: out of memory\n"), 202 cmdname); 203 exit(2); 204 } 205 *(file_list + n_file - 1) = optarg; 206 break; 207 case 'h': /* Solaris: supress printing of file name */ 208 hflag = 1; 209 break; 210 211 case 'q': /* POSIX: quiet: status only */ 212 qflag++; 213 break; 214 215 case 'w': /* Solaris: treat pattern as word */ 216 wflag++; 217 break; 218 219 case 'x': /* POSIX: full line matches */ 220 xflag++; 221 regflags |= REG_ANCHOR; 222 break; 223 224 case 'E': /* POSIX: Extended RE's */ 225 regflags |= REG_EXTENDED; 226 Eflag++; 227 break; 228 229 case 'F': /* POSIX: strings, not RE's */ 230 Fflag++; 231 break; 232 233 default: 234 usage(); 235 } 236 } 237 /* 238 * If we're invoked as egrep or fgrep we need to do some checks 239 */ 240 241 if (egrep || fgrep) { 242 /* 243 * Use of -E or -F with egrep or fgrep is illegal 244 */ 245 if (Eflag || Fflag) 246 usage(); 247 /* 248 * Don't allow use of wflag with egrep / fgrep 249 */ 250 if (wflag) 251 usage(); 252 /* 253 * For Solaris the -s flag is equivalent to XCU -q 254 */ 255 if (sflag) 256 qflag++; 257 /* 258 * done with above checks - set the appropriate flags 259 */ 260 if (egrep) 261 Eflag++; 262 else /* Else fgrep */ 263 Fflag++; 264 } 265 266 if (wflag && (Eflag || Fflag)) { 267 /* 268 * -w cannot be specified with grep -F 269 */ 270 usage(); 271 } 272 273 /* 274 * -E and -F flags are mutually exclusive - check for this 275 */ 276 if (Eflag && Fflag) 277 usage(); 278 279 /* 280 * -c, -l and -q flags are mutually exclusive 281 * We have -c override -l like in Solaris. 282 * -q overrides -l & -c programmatically in grep() function. 283 */ 284 if (cflag && lflag) 285 lflag = 0; 286 287 argv += optind - 1; 288 argc -= optind - 1; 289 290 /* 291 * Now handling -e and -f option 292 */ 293 if (pattern_list) { 294 for (i = 0; i < n_pattern; i++) { 295 addpattern(pattern_list[i]); 296 } 297 free(pattern_list); 298 } 299 if (file_list) { 300 for (i = 0; i < n_file; i++) { 301 addfile(file_list[i]); 302 } 303 free(file_list); 304 } 305 306 /* 307 * No -e or -f? Make sure there is one more arg, use it as the pattern. 308 */ 309 if (patterns == NULL && !fflag) { 310 if (argc < 2) 311 usage(); 312 addpattern(argv[1]); 313 argc--; 314 argv++; 315 } 316 317 /* 318 * If -x flag is not specified or -i flag is specified 319 * with fgrep in a multibyte locale, need to use 320 * the wide character APIs. Otherwise, byte-oriented 321 * process will be done. 322 */ 323 use_wchar = Fflag && mblocale && (!xflag || iflag); 324 325 /* 326 * Compile Patterns and also decide if BMG can be used 327 */ 328 fixpatterns(); 329 330 /* Process all files: stdin, or rest of arg list */ 331 if (argc < 2) { 332 matched = grep(0, gettext("(standard input)")); 333 } else { 334 if (argc > 2 && hflag == 0) 335 outfn = 1; /* Print filename on match line */ 336 for (argv++; *argv != NULL; argv++) { 337 int fd; 338 339 if ((fd = open(*argv, O_RDONLY)) == -1) { 340 errors = 1; 341 if (sflag) 342 continue; 343 (void) fprintf(stderr, gettext( 344 "%s: can't open \"%s\"\n"), 345 cmdname, *argv); 346 continue; 347 } 348 matched |= grep(fd, *argv); 349 (void) close(fd); 350 if (ferror(stdout)) 351 break; 352 } 353 } 354 /* 355 * Return() here is used instead of exit 356 */ 357 358 (void) fflush(stdout); 359 360 if (errors) 361 return (2); 362 return (matched ? 0 : 1); 363 } 364 365 /* 366 * Add a file of strings to the pattern list. 367 */ 368 static void 369 addfile(char *fn) 370 { 371 FILE *fp; 372 char *inbuf; 373 char *bufp; 374 size_t bufsiz, buflen, bufused; 375 376 /* 377 * Open the pattern file 378 */ 379 if ((fp = fopen(fn, "r")) == NULL) { 380 (void) fprintf(stderr, gettext("%s: can't open \"%s\"\n"), 381 cmdname, fn); 382 exit(2); 383 } 384 bufsiz = BUFSIZE; 385 if ((inbuf = malloc(bufsiz)) == NULL) { 386 (void) fprintf(stderr, 387 gettext("%s: out of memory\n"), cmdname); 388 exit(2); 389 } 390 bufp = inbuf; 391 bufused = 0; 392 /* 393 * Read in the file, reallocing as we need more memory 394 */ 395 while (fgets(bufp, bufsiz - bufused, fp) != NULL) { 396 buflen = strlen(bufp); 397 bufused += buflen; 398 if (bufused + 1 == bufsiz && bufp[buflen - 1] != '\n') { 399 /* 400 * if this line does not fit to the buffer, 401 * realloc larger buffer 402 */ 403 bufsiz += BUFSIZE; 404 if ((inbuf = realloc(inbuf, bufsiz)) == NULL) { 405 (void) fprintf(stderr, 406 gettext("%s: out of memory\n"), 407 cmdname); 408 exit(2); 409 } 410 bufp = inbuf + bufused; 411 continue; 412 } 413 if (bufp[buflen - 1] == '\n') { 414 bufp[--buflen] = '\0'; 415 } 416 addpattern(inbuf); 417 418 bufp = inbuf; 419 bufused = 0; 420 } 421 free(inbuf); 422 (void) fclose(fp); 423 } 424 425 /* 426 * Add a string to the pattern list. 427 */ 428 static void 429 addpattern(char *s) 430 { 431 PATTERN *pp; 432 char *wordbuf; 433 char *np; 434 435 for (; ; ) { 436 np = strchr(s, '\n'); 437 if (np != NULL) 438 *np = '\0'; 439 if ((pp = malloc(sizeof (PATTERN))) == NULL) { 440 (void) fprintf(stderr, gettext( 441 "%s: out of memory\n"), 442 cmdname); 443 exit(2); 444 } 445 if (wflag) { 446 /* 447 * Solaris wflag support: Add '<' '>' to pattern to 448 * select it as a word. Doesn't make sense with -F 449 * but we're Libertarian. 450 */ 451 size_t slen, wordlen; 452 453 slen = strlen(s); 454 wordlen = slen + 5; /* '\\' '<' s '\\' '>' '\0' */ 455 if ((wordbuf = malloc(wordlen)) == NULL) { 456 (void) fprintf(stderr, 457 gettext("%s: out of memory\n"), 458 cmdname); 459 exit(2); 460 } 461 (void) strcpy(wordbuf, "\\<"); 462 (void) strcpy(wordbuf + 2, s); 463 (void) strcpy(wordbuf + 2 + slen, "\\>"); 464 } else { 465 if ((wordbuf = strdup(s)) == NULL) { 466 (void) fprintf(stderr, 467 gettext("%s: out of memory\n"), 468 cmdname); 469 exit(2); 470 } 471 } 472 pp->pattern = wordbuf; 473 pp->next = patterns; 474 patterns = pp; 475 if (np == NULL) 476 break; 477 s = np + 1; 478 } 479 } 480 481 /* 482 * Fix patterns. 483 * Must do after all arguments read, in case later -i option. 484 */ 485 static void 486 fixpatterns(void) 487 { 488 PATTERN *pp; 489 int rv, fix_pattern, npatterns; 490 491 /* 492 * As REG_ANCHOR flag is not supported in the current Solaris, 493 * need to fix the specified pattern if -x is specified with 494 * grep or egrep 495 */ 496 fix_pattern = !Fflag && xflag; 497 498 for (npatterns = 0, pp = patterns; pp != NULL; pp = pp->next) { 499 npatterns++; 500 if (fix_pattern) { 501 char *cp, *cq; 502 size_t plen, nplen; 503 504 plen = strlen(pp->pattern); 505 /* '^' pattern '$' */ 506 nplen = 1 + plen + 1 + 1; 507 if ((cp = malloc(nplen)) == NULL) { 508 (void) fprintf(stderr, 509 gettext("%s: out of memory\n"), 510 cmdname); 511 exit(2); 512 } 513 cq = cp; 514 *cq++ = '^'; 515 cq = strcpy(cq, pp->pattern) + plen; 516 *cq++ = '$'; 517 *cq = '\0'; 518 free(pp->pattern); 519 pp->pattern = cp; 520 } 521 522 if (Fflag) { 523 if (use_wchar) { 524 /* 525 * Fflag && mblocale && iflag 526 * Fflag && mblocale && !xflag 527 */ 528 size_t n; 529 n = strlen(pp->pattern) + 1; 530 if ((pp->wpattern = 531 malloc(sizeof (wchar_t) * n)) == NULL) { 532 (void) fprintf(stderr, 533 gettext("%s: out of memory\n"), 534 cmdname); 535 exit(2); 536 } 537 if (mbstowcs(pp->wpattern, pp->pattern, n) == 538 (size_t)-1) { 539 (void) fprintf(stderr, 540 gettext("%s: failed to convert " 541 "\"%s\" to wide-characters\n"), 542 cmdname, pp->pattern); 543 exit(2); 544 } 545 if (iflag) { 546 wchar_t *wp; 547 for (wp = pp->wpattern; *wp != L'\0'; 548 wp++) { 549 *wp = towlower((wint_t)*wp); 550 } 551 } 552 free(pp->pattern); 553 } else { 554 /* 555 * Fflag && mblocale && !iflag 556 * Fflag && !mblocale && iflag 557 * Fflag && !mblocale && !iflag 558 */ 559 if (iflag) { 560 unsigned char *cp; 561 for (cp = (unsigned char *)pp->pattern; 562 *cp != '\0'; cp++) { 563 *cp = tolower(*cp); 564 } 565 } 566 } 567 /* 568 * fgrep: No regular expressions. 569 */ 570 continue; 571 } 572 573 /* 574 * For non-fgrep, compile the regular expression, 575 * give an informative error message, and exit if 576 * it didn't compile. 577 */ 578 if ((rv = regcomp(&pp->re, pp->pattern, regflags)) != 0) { 579 (void) regerror(rv, &pp->re, errstr, sizeof (errstr)); 580 (void) fprintf(stderr, 581 gettext("%s: RE error in %s: %s\n"), 582 cmdname, pp->pattern, errstr); 583 exit(2); 584 } 585 free(pp->pattern); 586 } 587 588 /* 589 * Decide if we are able to run the Boyer-Moore-Gosper algorithm. 590 * Use the Boyer-Moore-Gosper algorithm if: 591 * - fgrep (Fflag) 592 * - singlebyte locale (!mblocale) 593 * - no ignoring case (!iflag) 594 * - no printing line numbers (!nflag) 595 * - no negating the output (nvflag) 596 * - only one pattern (npatterns == 1) 597 * - non zero length pattern (strlen(patterns->pattern) != 0) 598 * 599 * It's guaranteed patterns->pattern is still alive 600 * when Fflag && !mblocale. 601 */ 602 use_bmg = Fflag && !mblocale && !iflag && !nflag && nvflag && 603 (npatterns == 1) && (strlen(patterns->pattern) != 0); 604 } 605 606 /* 607 * Search a newline from the beginning of the string 608 */ 609 static char * 610 find_nl(const char *ptr, size_t len) 611 { 612 while (len-- != 0) { 613 if (*ptr++ == '\n') { 614 return ((char *)--ptr); 615 } 616 } 617 return (NULL); 618 } 619 620 /* 621 * Search a newline from the end of the string 622 */ 623 static char * 624 rfind_nl(const char *ptr, size_t len) 625 { 626 const char *uptr = ptr + len; 627 while (len--) { 628 if (*--uptr == '\n') { 629 return ((char *)uptr); 630 } 631 } 632 return (NULL); 633 } 634 635 /* 636 * Duplicate the specified string converting each character 637 * into a lower case. 638 */ 639 static char * 640 istrdup(const char *s1) 641 { 642 static size_t ibuflen = 0; 643 static char *ibuf = NULL; 644 size_t slen; 645 char *p; 646 647 slen = strlen(s1); 648 if (slen >= ibuflen) { 649 /* ibuf does not fit to s1 */ 650 ibuflen = slen + 1; 651 ibuf = realloc(ibuf, ibuflen); 652 if (ibuf == NULL) { 653 (void) fprintf(stderr, 654 gettext("%s: out of memory\n"), cmdname); 655 exit(2); 656 } 657 } 658 p = ibuf; 659 do { 660 *p++ = tolower(*s1); 661 } while (*s1++ != '\0'); 662 return (ibuf); 663 } 664 665 /* 666 * Do grep on a single file. 667 * Return true in any lines matched. 668 * 669 * We have two strategies: 670 * The fast one is used when we have a single pattern with 671 * a string known to occur in the pattern. We can then 672 * do a BMG match on the whole buffer. 673 * This is an order of magnitude faster. 674 * Otherwise we split the buffer into lines, 675 * and check for a match on each line. 676 */ 677 static int 678 grep(int fd, char *fn) 679 { 680 PATTERN *pp; 681 off_t data_len; /* length of the data chunk */ 682 off_t line_len; /* length of the current line */ 683 off_t line_offset; /* current line's offset from the beginning */ 684 long long lineno; 685 long long matches = 0; /* Number of matching lines */ 686 int newlinep; /* 0 if the last line of file has no newline */ 687 char *ptr, *ptrend; 688 689 690 if (patterns == NULL) 691 return (0); /* no patterns to match -- just return */ 692 693 pp = patterns; 694 695 if (use_bmg) { 696 bmgcomp(pp->pattern, strlen(pp->pattern)); 697 } 698 699 if (use_wchar && outline == NULL) { 700 outbuflen = BUFSIZE + 1; 701 outline = malloc(sizeof (wchar_t) * outbuflen); 702 if (outline == NULL) { 703 (void) fprintf(stderr, gettext("%s: out of memory\n"), 704 cmdname); 705 exit(2); 706 } 707 } 708 709 if (prntbuf == NULL) { 710 prntbuflen = BUFSIZE; 711 if ((prntbuf = malloc(prntbuflen + 1)) == NULL) { 712 (void) fprintf(stderr, gettext("%s: out of memory\n"), 713 cmdname); 714 exit(2); 715 } 716 } 717 718 line_offset = 0; 719 lineno = 0; 720 newlinep = 1; 721 data_len = 0; 722 for (; ; ) { 723 long count; 724 off_t offset = 0; 725 726 if (data_len == 0) { 727 /* 728 * If no data in the buffer, reset ptr 729 */ 730 ptr = prntbuf; 731 } 732 if (ptr == prntbuf) { 733 /* 734 * The current data chunk starts from prntbuf. 735 * This means either the buffer has no data 736 * or the buffer has no newline. 737 * So, read more data from input. 738 */ 739 count = read(fd, ptr + data_len, prntbuflen - data_len); 740 if (count < 0) { 741 /* read error */ 742 if (cflag) { 743 if (outfn) { 744 (void) fprintf(stdout, 745 "%s:", fn); 746 } 747 if (!qflag) { 748 (void) fprintf(stdout, "%lld\n", 749 matches); 750 } 751 } 752 return (0); 753 } else if (count == 0) { 754 /* no new data */ 755 if (data_len == 0) { 756 /* end of file already reached */ 757 break; 758 } 759 /* last line of file has no newline */ 760 ptrend = ptr + data_len; 761 newlinep = 0; 762 goto L_start_process; 763 } 764 offset = data_len; 765 data_len += count; 766 } 767 768 /* 769 * Look for newline in the chunk 770 * between ptr + offset and ptr + data_len - offset. 771 */ 772 ptrend = find_nl(ptr + offset, data_len - offset); 773 if (ptrend == NULL) { 774 /* no newline found in this chunk */ 775 if (ptr > prntbuf) { 776 /* 777 * Move remaining data to the beginning 778 * of the buffer. 779 * Remaining data lie from ptr for 780 * data_len bytes. 781 */ 782 (void) memmove(prntbuf, ptr, data_len); 783 } 784 if (data_len == prntbuflen) { 785 /* 786 * No enough room in the buffer 787 */ 788 prntbuflen += BUFSIZE; 789 prntbuf = realloc(prntbuf, prntbuflen + 1); 790 if (prntbuf == NULL) { 791 (void) fprintf(stderr, 792 gettext("%s: out of memory\n"), 793 cmdname); 794 exit(2); 795 } 796 } 797 ptr = prntbuf; 798 /* read the next input */ 799 continue; 800 } 801 L_start_process: 802 803 /* 804 * Beginning of the chunk: ptr 805 * End of the chunk: ptr + data_len 806 * Beginning of the line: ptr 807 * End of the line: ptrend 808 */ 809 810 if (use_bmg) { 811 /* 812 * Use Boyer-Moore-Gosper algorithm to find out if 813 * this chunk (not this line) contains the specified 814 * pattern. If not, restart from the last line 815 * of this chunk. 816 */ 817 char *bline; 818 bline = bmgexec(ptr, ptr + data_len); 819 if (bline == NULL) { 820 /* 821 * No pattern found in this chunk. 822 * Need to find the last line 823 * in this chunk. 824 */ 825 ptrend = rfind_nl(ptr, data_len); 826 827 /* 828 * When this chunk does not contain newline, 829 * ptrend becomes NULL, which should happen 830 * when the last line of file does not end 831 * with a newline. At such a point, 832 * newlinep should have been set to 0. 833 * Therefore, just after jumping to 834 * L_skip_line, the main for-loop quits, 835 * and the line_len value won't be 836 * used. 837 */ 838 line_len = ptrend - ptr; 839 goto L_skip_line; 840 } 841 if (bline > ptrend) { 842 /* 843 * Pattern found not in the first line 844 * of this chunk. 845 * Discard the first line. 846 */ 847 line_len = ptrend - ptr; 848 goto L_skip_line; 849 } 850 /* 851 * Pattern found in the first line of this chunk. 852 * Using this result. 853 */ 854 *ptrend = '\0'; 855 line_len = ptrend - ptr; 856 857 /* 858 * before jumping to L_next_line, 859 * need to handle xflag if specified 860 */ 861 if (xflag && (line_len != bmglen || 862 strcmp(bmgpat, ptr) != 0)) { 863 /* didn't match */ 864 pp = NULL; 865 } else { 866 pp = patterns; /* to make it happen */ 867 } 868 goto L_next_line; 869 } 870 lineno++; 871 /* 872 * Line starts from ptr and ends at ptrend. 873 * line_len will be the length of the line. 874 */ 875 *ptrend = '\0'; 876 line_len = ptrend - ptr; 877 878 /* 879 * From now, the process will be performed based 880 * on the line from ptr to ptrend. 881 */ 882 if (use_wchar) { 883 size_t len; 884 885 if (line_len >= outbuflen) { 886 outbuflen = line_len + 1; 887 outline = realloc(outline, 888 sizeof (wchar_t) * outbuflen); 889 if (outline == NULL) { 890 (void) fprintf(stderr, 891 gettext("%s: out of memory\n"), 892 cmdname); 893 exit(2); 894 } 895 } 896 897 len = mbstowcs(outline, ptr, line_len); 898 if (len == (size_t)-1) { 899 (void) fprintf(stderr, gettext( 900 "%s: input file \"%s\": line %lld: invalid multibyte character\n"), 901 cmdname, fn, lineno); 902 /* never match a line with invalid sequence */ 903 goto L_skip_line; 904 } 905 outline[len] = L'\0'; 906 907 if (iflag) { 908 wchar_t *cp; 909 for (cp = outline; *cp != '\0'; cp++) { 910 *cp = towlower((wint_t)*cp); 911 } 912 } 913 914 if (xflag) { 915 for (pp = patterns; pp; pp = pp->next) { 916 if (outline[0] == pp->wpattern[0] && 917 wcscmp(outline, 918 pp->wpattern) == 0) { 919 /* matched */ 920 break; 921 } 922 } 923 } else { 924 for (pp = patterns; pp; pp = pp->next) { 925 if (wcswcs(outline, pp->wpattern) 926 != NULL) { 927 /* matched */ 928 break; 929 } 930 } 931 } 932 } else if (Fflag) { 933 /* fgrep in byte-oriented handling */ 934 char *fptr; 935 if (iflag) { 936 fptr = istrdup(ptr); 937 } else { 938 fptr = ptr; 939 } 940 if (xflag) { 941 /* fgrep -x */ 942 for (pp = patterns; pp; pp = pp->next) { 943 if (fptr[0] == pp->pattern[0] && 944 strcmp(fptr, pp->pattern) == 0) { 945 /* matched */ 946 break; 947 } 948 } 949 } else { 950 for (pp = patterns; pp; pp = pp->next) { 951 if (strstr(fptr, pp->pattern) != NULL) { 952 /* matched */ 953 break; 954 } 955 } 956 } 957 } else { 958 /* grep or egrep */ 959 for (pp = patterns; pp; pp = pp->next) { 960 int rv; 961 962 rv = regexec(&pp->re, ptr, 0, NULL, 0); 963 if (rv == REG_OK) { 964 /* matched */ 965 break; 966 } 967 968 switch (rv) { 969 case REG_NOMATCH: 970 break; 971 case REG_ECHAR: 972 (void) fprintf(stderr, gettext( 973 "%s: input file \"%s\": line %lld: invalid multibyte character\n"), 974 cmdname, fn, lineno); 975 break; 976 default: 977 (void) regerror(rv, &pp->re, errstr, 978 sizeof (errstr)); 979 (void) fprintf(stderr, gettext( 980 "%s: input file \"%s\": line %lld: %s\n"), 981 cmdname, fn, lineno, errstr); 982 exit(2); 983 } 984 } 985 } 986 987 L_next_line: 988 /* 989 * Here, if pp points to non-NULL, something has been matched 990 * to the pattern. 991 */ 992 if (nvflag == (pp != NULL)) { 993 matches++; 994 /* 995 * Handle q, l, and c flags. 996 */ 997 if (qflag) { 998 /* no need to continue */ 999 /* 1000 * End of this line is ptrend. 1001 * We have read up to ptr + data_len. 1002 */ 1003 off_t pos; 1004 pos = ptr + data_len - (ptrend + 1); 1005 (void) lseek(fd, -pos, SEEK_CUR); 1006 exit(0); 1007 } 1008 if (lflag) { 1009 (void) printf("%s\n", fn); 1010 break; 1011 } 1012 if (!cflag) { 1013 if (outfn) { 1014 (void) printf("%s:", fn); 1015 } 1016 if (bflag) { 1017 (void) printf("%lld:", (offset_t) 1018 (line_offset / BSIZE)); 1019 } 1020 if (nflag) { 1021 (void) printf("%lld:", lineno); 1022 } 1023 *ptrend = '\n'; 1024 (void) fwrite(ptr, 1, line_len + 1, stdout); 1025 } 1026 if (ferror(stdout)) { 1027 return (0); 1028 } 1029 } 1030 L_skip_line: 1031 if (!newlinep) 1032 break; 1033 1034 data_len -= line_len + 1; 1035 line_offset += line_len + 1; 1036 ptr = ptrend + 1; 1037 } 1038 1039 if (cflag) { 1040 if (outfn) { 1041 (void) printf("%s:", fn); 1042 } 1043 if (!qflag) { 1044 (void) printf("%lld\n", matches); 1045 } 1046 } 1047 return (matches != 0); 1048 } 1049 1050 /* 1051 * usage message for grep 1052 */ 1053 static void 1054 usage(void) 1055 { 1056 if (egrep || fgrep) { 1057 (void) fprintf(stderr, gettext("Usage:\t%s"), cmdname); 1058 (void) fprintf(stderr, 1059 gettext(" [-c|-l|-q] [-bhinsvx] " 1060 "pattern_list [file ...]\n")); 1061 1062 (void) fprintf(stderr, "\t%s", cmdname); 1063 (void) fprintf(stderr, 1064 gettext(" [-c|-l|-q] [-bhinsvx] [-e pattern_list]... " 1065 "[-f pattern_file]... [file...]\n")); 1066 } else { 1067 (void) fprintf(stderr, gettext("Usage:\t%s"), cmdname); 1068 (void) fprintf(stderr, 1069 gettext(" [-c|-l|-q] [-bhinsvwx] " 1070 "pattern_list [file ...]\n")); 1071 1072 (void) fprintf(stderr, "\t%s", cmdname); 1073 (void) fprintf(stderr, 1074 gettext(" [-c|-l|-q] [-bhinsvwx] [-e pattern_list]... " 1075 "[-f pattern_file]... [file...]\n")); 1076 1077 (void) fprintf(stderr, "\t%s", cmdname); 1078 (void) fprintf(stderr, 1079 gettext(" -E [-c|-l|-q] [-bhinsvx] " 1080 "pattern_list [file ...]\n")); 1081 1082 (void) fprintf(stderr, "\t%s", cmdname); 1083 (void) fprintf(stderr, 1084 gettext(" -E [-c|-l|-q] [-bhinsvx] [-e pattern_list]... " 1085 "[-f pattern_file]... [file...]\n")); 1086 1087 (void) fprintf(stderr, "\t%s", cmdname); 1088 (void) fprintf(stderr, 1089 gettext(" -F [-c|-l|-q] [-bhinsvx] " 1090 "pattern_list [file ...]\n")); 1091 1092 (void) fprintf(stderr, "\t%s", cmdname); 1093 (void) fprintf(stderr, 1094 gettext(" -F [-c|-l|-q] [-bhinsvx] [-e pattern_list]... " 1095 "[-f pattern_file]... [file...]\n")); 1096 } 1097 exit(2); 1098 /* NOTREACHED */ 1099 } 1100 1101 /* 1102 * Compile literal pattern into BMG tables 1103 */ 1104 static void 1105 bmgcomp(char *pat, int len) 1106 { 1107 int i; 1108 int tlen; 1109 unsigned char *uc = (unsigned char *)pat; 1110 1111 bmglen = len; 1112 bmgpat = pat; 1113 1114 for (i = 0; i < M_CSETSIZE; i++) { 1115 bmgtab[i] = len; 1116 } 1117 1118 len--; 1119 for (tlen = len, i = 0; i <= len; i++, tlen--) { 1120 bmgtab[*uc++] = tlen; 1121 } 1122 } 1123 1124 /* 1125 * BMG search. 1126 */ 1127 static char * 1128 bmgexec(char *str, char *end) 1129 { 1130 int t; 1131 char *k, *s, *p; 1132 1133 k = str + bmglen - 1; 1134 if (bmglen == 1) { 1135 return (memchr(str, bmgpat[0], end - str)); 1136 } 1137 for (; ; ) { 1138 /* inner loop, should be most optimized */ 1139 while (k < end && (t = bmgtab[(unsigned char)*k]) != 0) { 1140 k += t; 1141 } 1142 if (k >= end) { 1143 return (NULL); 1144 } 1145 for (s = k, p = bmgpat + bmglen - 1; *--s == *--p; ) { 1146 if (p == bmgpat) { 1147 return (s); 1148 } 1149 } 1150 k++; 1151 } 1152 /* NOTREACHED */ 1153 } 1154