1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * grep - pattern matching program - combined grep, egrep, and fgrep.
29 * Based on MKS grep command, with XCU & Solaris mods.
30 */
31
32 /*
33 * Copyright 1985, 1992 by Mortice Kern Systems Inc. All rights reserved.
34 *
35 */
36
37 /* Copyright 2012 Nexenta Systems, Inc. All rights reserved. */
38
39 /*
40 * Copyright 2013 Damian Bogel. All rights reserved.
41 */
42
43 #include <string.h>
44 #include <stdlib.h>
45 #include <ctype.h>
46 #include <stdarg.h>
47 #include <regex.h>
48 #include <limits.h>
49 #include <sys/types.h>
50 #include <sys/stat.h>
51 #include <fcntl.h>
52 #include <stdio.h>
53 #include <locale.h>
54 #include <wchar.h>
55 #include <errno.h>
56 #include <unistd.h>
57 #include <wctype.h>
58 #include <ftw.h>
59 #include <sys/param.h>
60
61 #define STDIN_FILENAME gettext("(standard input)")
62
63 #define BSIZE 512 /* Size of block for -b */
64 #define BUFSIZE 8192 /* Input buffer size */
65 #define MAX_DEPTH 1000 /* how deep to recurse */
66
67 #define M_CSETSIZE 256 /* singlebyte chars */
68 static int bmglen; /* length of BMG pattern */
69 static char *bmgpat; /* BMG pattern */
70 static int bmgtab[M_CSETSIZE]; /* BMG delta1 table */
71
72 typedef struct _PATTERN {
73 char *pattern; /* original pattern */
74 wchar_t *wpattern; /* wide, lowercased pattern */
75 struct _PATTERN *next;
76 regex_t re; /* compiled pattern */
77 } PATTERN;
78
79 static PATTERN *patterns;
80 static char errstr[128]; /* regerror string buffer */
81 static int regflags = 0; /* regcomp options */
82 static int matched = 0; /* return of the grep() */
83 static int errors = 0; /* count of errors */
84 static uchar_t fgrep = 0; /* Invoked as fgrep */
85 static uchar_t egrep = 0; /* Invoked as egrep */
86 static uchar_t nvflag = 1; /* Print matching lines */
87 static uchar_t cflag; /* Count of matches */
88 static uchar_t iflag; /* Case insensitve matching */
89 static uchar_t Hflag; /* Precede lines by file name */
90 static uchar_t hflag; /* Supress printing of filename */
91 static uchar_t lflag; /* Print file names of matches */
92 static uchar_t nflag; /* Precede lines by line number */
93 static uchar_t rflag; /* Search directories recursively */
94 static uchar_t bflag; /* Preccede matches by block number */
95 static uchar_t sflag; /* Suppress file error messages */
96 static uchar_t qflag; /* Suppress standard output */
97 static uchar_t wflag; /* Search for expression as a word */
98 static uchar_t xflag; /* Anchoring */
99 static uchar_t Eflag; /* Egrep or -E flag */
100 static uchar_t Fflag; /* Fgrep or -F flag */
101 static uchar_t Rflag; /* Like rflag, but follow symlinks */
102 static uchar_t outfn; /* Put out file name */
103 static char *cmdname;
104
105 static int use_wchar, use_bmg, mblocale;
106
107 static size_t outbuflen, prntbuflen;
108 static char *prntbuf;
109 static wchar_t *outline;
110
111 static void addfile(const char *fn);
112 static void addpattern(char *s);
113 static void fixpatterns(void);
114 static void usage(void);
115 static int grep(int, const char *);
116 static void bmgcomp(char *, int);
117 static char *bmgexec(char *, char *);
118 static int recursive(const char *, const struct stat *, int, struct FTW *);
119 static void process_path(const char *);
120 static void process_file(const char *, int);
121
122 /*
123 * mainline for grep
124 */
125 int
main(int argc,char ** argv)126 main(int argc, char **argv)
127 {
128 char *ap;
129 int c;
130 int fflag = 0;
131 int i, n_pattern = 0, n_file = 0;
132 char **pattern_list = NULL;
133 char **file_list = NULL;
134
135 (void) setlocale(LC_ALL, "");
136 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */
137 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't */
138 #endif
139 (void) textdomain(TEXT_DOMAIN);
140
141 /*
142 * true if this is running on the multibyte locale
143 */
144 mblocale = (MB_CUR_MAX > 1);
145 /*
146 * Skip leading slashes
147 */
148 cmdname = argv[0];
149 if (ap = strrchr(cmdname, '/'))
150 cmdname = ap + 1;
151
152 ap = cmdname;
153 /*
154 * Detect egrep/fgrep via command name, map to -E and -F options.
155 */
156 if (*ap == 'e' || *ap == 'E') {
157 regflags |= REG_EXTENDED;
158 egrep++;
159 } else {
160 if (*ap == 'f' || *ap == 'F') {
161 fgrep++;
162 }
163 }
164
165 while ((c = getopt(argc, argv, "vwchHilnrbse:f:qxEFIR")) != EOF) {
166 switch (c) {
167 case 'v': /* POSIX: negate matches */
168 nvflag = 0;
169 break;
170
171 case 'c': /* POSIX: write count */
172 cflag++;
173 break;
174
175 case 'i': /* POSIX: ignore case */
176 iflag++;
177 regflags |= REG_ICASE;
178 break;
179
180 case 'l': /* POSIX: Write filenames only */
181 lflag++;
182 break;
183
184 case 'n': /* POSIX: Write line numbers */
185 nflag++;
186 break;
187
188 case 'r': /* Solaris: search recursively */
189 rflag++;
190 break;
191
192 case 'b': /* Solaris: Write file block numbers */
193 bflag++;
194 break;
195
196 case 's': /* POSIX: No error msgs for files */
197 sflag++;
198 break;
199
200 case 'e': /* POSIX: pattern list */
201 n_pattern++;
202 pattern_list = realloc(pattern_list,
203 sizeof (char *) * n_pattern);
204 if (pattern_list == NULL) {
205 (void) fprintf(stderr,
206 gettext("%s: out of memory\n"),
207 cmdname);
208 exit(2);
209 }
210 *(pattern_list + n_pattern - 1) = optarg;
211 break;
212
213 case 'f': /* POSIX: pattern file */
214 fflag = 1;
215 n_file++;
216 file_list = realloc(file_list,
217 sizeof (char *) * n_file);
218 if (file_list == NULL) {
219 (void) fprintf(stderr,
220 gettext("%s: out of memory\n"),
221 cmdname);
222 exit(2);
223 }
224 *(file_list + n_file - 1) = optarg;
225 break;
226
227 /* based on options order h or H is set as in GNU grep */
228 case 'h': /* Solaris: supress printing of file name */
229 hflag = 1;
230 Hflag = 0;
231 break;
232 /* Solaris: precede every matching with file name */
233 case 'H':
234 Hflag = 1;
235 hflag = 0;
236 break;
237
238 case 'q': /* POSIX: quiet: status only */
239 qflag++;
240 break;
241
242 case 'w': /* Solaris: treat pattern as word */
243 wflag++;
244 break;
245
246 case 'x': /* POSIX: full line matches */
247 xflag++;
248 regflags |= REG_ANCHOR;
249 break;
250
251 case 'E': /* POSIX: Extended RE's */
252 regflags |= REG_EXTENDED;
253 Eflag++;
254 break;
255
256 case 'F': /* POSIX: strings, not RE's */
257 Fflag++;
258 break;
259
260 case 'R': /* Solaris: like rflag, but follow symlinks */
261 Rflag++;
262 rflag++;
263 break;
264
265 default:
266 usage();
267 }
268 }
269 /*
270 * If we're invoked as egrep or fgrep we need to do some checks
271 */
272
273 if (egrep || fgrep) {
274 /*
275 * Use of -E or -F with egrep or fgrep is illegal
276 */
277 if (Eflag || Fflag)
278 usage();
279 /*
280 * Don't allow use of wflag with egrep / fgrep
281 */
282 if (wflag)
283 usage();
284 /*
285 * For Solaris the -s flag is equivalent to XCU -q
286 */
287 if (sflag)
288 qflag++;
289 /*
290 * done with above checks - set the appropriate flags
291 */
292 if (egrep)
293 Eflag++;
294 else /* Else fgrep */
295 Fflag++;
296 }
297
298 if (wflag && (Eflag || Fflag)) {
299 /*
300 * -w cannot be specified with grep -F
301 */
302 usage();
303 }
304
305 /*
306 * -E and -F flags are mutually exclusive - check for this
307 */
308 if (Eflag && Fflag)
309 usage();
310
311 /*
312 * -l overrides -H like in GNU grep
313 */
314 if (lflag)
315 Hflag = 0;
316
317 /*
318 * -c, -l and -q flags are mutually exclusive
319 * We have -c override -l like in Solaris.
320 * -q overrides -l & -c programmatically in grep() function.
321 */
322 if (cflag && lflag)
323 lflag = 0;
324
325 argv += optind - 1;
326 argc -= optind - 1;
327
328 /*
329 * Now handling -e and -f option
330 */
331 if (pattern_list) {
332 for (i = 0; i < n_pattern; i++) {
333 addpattern(pattern_list[i]);
334 }
335 free(pattern_list);
336 }
337 if (file_list) {
338 for (i = 0; i < n_file; i++) {
339 addfile(file_list[i]);
340 }
341 free(file_list);
342 }
343
344 /*
345 * No -e or -f? Make sure there is one more arg, use it as the pattern.
346 */
347 if (patterns == NULL && !fflag) {
348 if (argc < 2)
349 usage();
350 addpattern(argv[1]);
351 argc--;
352 argv++;
353 }
354
355 /*
356 * If -x flag is not specified or -i flag is specified
357 * with fgrep in a multibyte locale, need to use
358 * the wide character APIs. Otherwise, byte-oriented
359 * process will be done.
360 */
361 use_wchar = Fflag && mblocale && (!xflag || iflag);
362
363 /*
364 * Compile Patterns and also decide if BMG can be used
365 */
366 fixpatterns();
367
368 /* Process all files: stdin, or rest of arg list */
369 if (argc < 2) {
370 matched = grep(0, STDIN_FILENAME);
371 } else {
372 if (Hflag || (argc > 2 && hflag == 0))
373 outfn = 1; /* Print filename on match line */
374 for (argv++; *argv != NULL; argv++) {
375 process_path(*argv);
376 }
377 }
378 /*
379 * Return() here is used instead of exit
380 */
381
382 (void) fflush(stdout);
383
384 if (errors)
385 return (2);
386 return (matched ? 0 : 1);
387 }
388
389 static void
process_path(const char * path)390 process_path(const char *path)
391 {
392 struct stat st;
393 int walkflags = FTW_CHDIR;
394 char *buf = NULL;
395
396 if (rflag) {
397 if (stat(path, &st) != -1 &&
398 (st.st_mode & S_IFMT) == S_IFDIR) {
399 outfn = 1; /* Print filename */
400
401 /*
402 * Add trailing slash if arg
403 * is directory, to resolve symlinks.
404 */
405 if (path[strlen(path) - 1] != '/') {
406 (void) asprintf(&buf, "%s/", path);
407 if (buf != NULL)
408 path = buf;
409 }
410
411 /*
412 * Search through subdirs if path is directory.
413 * Don't follow symlinks if Rflag is not set.
414 */
415 if (!Rflag)
416 walkflags |= FTW_PHYS;
417
418 if (nftw(path, recursive, MAX_DEPTH, walkflags) != 0) {
419 if (!sflag)
420 (void) fprintf(stderr,
421 gettext("%s: can't open \"%s\"\n"),
422 cmdname, path);
423 errors = 1;
424 }
425 return;
426 }
427 }
428 process_file(path, 0);
429 }
430
431 /*
432 * Read and process all files in directory recursively.
433 */
434 static int
recursive(const char * name,const struct stat * statp,int info,struct FTW * ftw)435 recursive(const char *name, const struct stat *statp, int info, struct FTW *ftw)
436 {
437 /*
438 * Process files and follow symlinks if Rflag set.
439 */
440 if (info != FTW_F) {
441 /* Report broken symlinks and unreadable files */
442 if (!sflag &&
443 (info == FTW_SLN || info == FTW_DNR || info == FTW_NS)) {
444 (void) fprintf(stderr,
445 gettext("%s: can't open \"%s\"\n"), cmdname, name);
446 }
447 return (0);
448 }
449
450
451 /* Skip devices and pipes if Rflag is not set */
452 if (!Rflag && !S_ISREG(statp->st_mode))
453 return (0);
454 /* Pass offset to relative name from FTW_CHDIR */
455 process_file(name, ftw->base);
456 return (0);
457 }
458
459 /*
460 * Opens file and call grep function.
461 */
462 static void
process_file(const char * name,int base)463 process_file(const char *name, int base)
464 {
465 int fd;
466
467 if ((fd = open(name + base, O_RDONLY)) == -1) {
468 errors = 1;
469 if (!sflag) /* Silent mode */
470 (void) fprintf(stderr, gettext(
471 "%s: can't open \"%s\"\n"),
472 cmdname, name);
473 return;
474 }
475 matched |= grep(fd, name);
476 (void) close(fd);
477
478 if (ferror(stdout)) {
479 (void) fprintf(stderr, gettext(
480 "%s: error writing to stdout\n"),
481 cmdname);
482 (void) fflush(stdout);
483 exit(2);
484 }
485
486 }
487
488 /*
489 * Add a file of strings to the pattern list.
490 */
491 static void
addfile(const char * fn)492 addfile(const char *fn)
493 {
494 FILE *fp;
495 char *inbuf;
496 char *bufp;
497 size_t bufsiz, buflen, bufused;
498
499 /*
500 * Open the pattern file
501 */
502 if ((fp = fopen(fn, "r")) == NULL) {
503 (void) fprintf(stderr, gettext("%s: can't open \"%s\"\n"),
504 cmdname, fn);
505 exit(2);
506 }
507 bufsiz = BUFSIZE;
508 if ((inbuf = malloc(bufsiz)) == NULL) {
509 (void) fprintf(stderr,
510 gettext("%s: out of memory\n"), cmdname);
511 exit(2);
512 }
513 bufp = inbuf;
514 bufused = 0;
515 /*
516 * Read in the file, reallocing as we need more memory
517 */
518 while (fgets(bufp, bufsiz - bufused, fp) != NULL) {
519 buflen = strlen(bufp);
520 bufused += buflen;
521 if (bufused + 1 == bufsiz && bufp[buflen - 1] != '\n') {
522 /*
523 * if this line does not fit to the buffer,
524 * realloc larger buffer
525 */
526 bufsiz += BUFSIZE;
527 if ((inbuf = realloc(inbuf, bufsiz)) == NULL) {
528 (void) fprintf(stderr,
529 gettext("%s: out of memory\n"),
530 cmdname);
531 exit(2);
532 }
533 bufp = inbuf + bufused;
534 continue;
535 }
536 if (bufp[buflen - 1] == '\n') {
537 bufp[--buflen] = '\0';
538 }
539 addpattern(inbuf);
540
541 bufp = inbuf;
542 bufused = 0;
543 }
544 free(inbuf);
545 (void) fclose(fp);
546 }
547
548 /*
549 * Add a string to the pattern list.
550 */
551 static void
addpattern(char * s)552 addpattern(char *s)
553 {
554 PATTERN *pp;
555 char *wordbuf;
556 char *np;
557
558 for (; ; ) {
559 np = strchr(s, '\n');
560 if (np != NULL)
561 *np = '\0';
562 if ((pp = malloc(sizeof (PATTERN))) == NULL) {
563 (void) fprintf(stderr, gettext(
564 "%s: out of memory\n"),
565 cmdname);
566 exit(2);
567 }
568 if (wflag) {
569 /*
570 * Solaris wflag support: Add '<' '>' to pattern to
571 * select it as a word. Doesn't make sense with -F
572 * but we're Libertarian.
573 */
574 size_t slen, wordlen;
575
576 slen = strlen(s);
577 wordlen = slen + 5; /* '\\' '<' s '\\' '>' '\0' */
578 if ((wordbuf = malloc(wordlen)) == NULL) {
579 (void) fprintf(stderr,
580 gettext("%s: out of memory\n"),
581 cmdname);
582 exit(2);
583 }
584 (void) strcpy(wordbuf, "\\<");
585 (void) strcpy(wordbuf + 2, s);
586 (void) strcpy(wordbuf + 2 + slen, "\\>");
587 } else {
588 if ((wordbuf = strdup(s)) == NULL) {
589 (void) fprintf(stderr,
590 gettext("%s: out of memory\n"),
591 cmdname);
592 exit(2);
593 }
594 }
595 pp->pattern = wordbuf;
596 pp->next = patterns;
597 patterns = pp;
598 if (np == NULL)
599 break;
600 s = np + 1;
601 }
602 }
603
604 /*
605 * Fix patterns.
606 * Must do after all arguments read, in case later -i option.
607 */
608 static void
fixpatterns(void)609 fixpatterns(void)
610 {
611 PATTERN *pp;
612 int rv, fix_pattern, npatterns;
613
614 /*
615 * As REG_ANCHOR flag is not supported in the current Solaris,
616 * need to fix the specified pattern if -x is specified with
617 * grep or egrep
618 */
619 fix_pattern = !Fflag && xflag;
620
621 for (npatterns = 0, pp = patterns; pp != NULL; pp = pp->next) {
622 npatterns++;
623 if (fix_pattern) {
624 char *cp, *cq;
625 size_t plen, nplen;
626
627 plen = strlen(pp->pattern);
628 /* '^' pattern '$' */
629 nplen = 1 + plen + 1 + 1;
630 if ((cp = malloc(nplen)) == NULL) {
631 (void) fprintf(stderr,
632 gettext("%s: out of memory\n"),
633 cmdname);
634 exit(2);
635 }
636 cq = cp;
637 *cq++ = '^';
638 cq = strcpy(cq, pp->pattern) + plen;
639 *cq++ = '$';
640 *cq = '\0';
641 free(pp->pattern);
642 pp->pattern = cp;
643 }
644
645 if (Fflag) {
646 if (use_wchar) {
647 /*
648 * Fflag && mblocale && iflag
649 * Fflag && mblocale && !xflag
650 */
651 size_t n;
652 n = strlen(pp->pattern) + 1;
653 if ((pp->wpattern =
654 malloc(sizeof (wchar_t) * n)) == NULL) {
655 (void) fprintf(stderr,
656 gettext("%s: out of memory\n"),
657 cmdname);
658 exit(2);
659 }
660 if (mbstowcs(pp->wpattern, pp->pattern, n) ==
661 (size_t)-1) {
662 (void) fprintf(stderr,
663 gettext("%s: failed to convert "
664 "\"%s\" to wide-characters\n"),
665 cmdname, pp->pattern);
666 exit(2);
667 }
668 if (iflag) {
669 wchar_t *wp;
670 for (wp = pp->wpattern; *wp != L'\0';
671 wp++) {
672 *wp = towlower((wint_t)*wp);
673 }
674 }
675 free(pp->pattern);
676 } else {
677 /*
678 * Fflag && mblocale && !iflag
679 * Fflag && !mblocale && iflag
680 * Fflag && !mblocale && !iflag
681 */
682 if (iflag) {
683 unsigned char *cp;
684 for (cp = (unsigned char *)pp->pattern;
685 *cp != '\0'; cp++) {
686 *cp = tolower(*cp);
687 }
688 }
689 }
690 /*
691 * fgrep: No regular expressions.
692 */
693 continue;
694 }
695
696 /*
697 * For non-fgrep, compile the regular expression,
698 * give an informative error message, and exit if
699 * it didn't compile.
700 */
701 if ((rv = regcomp(&pp->re, pp->pattern, regflags)) != 0) {
702 (void) regerror(rv, &pp->re, errstr, sizeof (errstr));
703 (void) fprintf(stderr,
704 gettext("%s: RE error in %s: %s\n"),
705 cmdname, pp->pattern, errstr);
706 exit(2);
707 }
708 free(pp->pattern);
709 }
710
711 /*
712 * Decide if we are able to run the Boyer-Moore-Gosper algorithm.
713 * Use the Boyer-Moore-Gosper algorithm if:
714 * - fgrep (Fflag)
715 * - singlebyte locale (!mblocale)
716 * - no ignoring case (!iflag)
717 * - no printing line numbers (!nflag)
718 * - no negating the output (nvflag)
719 * - only one pattern (npatterns == 1)
720 * - non zero length pattern (strlen(patterns->pattern) != 0)
721 *
722 * It's guaranteed patterns->pattern is still alive
723 * when Fflag && !mblocale.
724 */
725 use_bmg = Fflag && !mblocale && !iflag && !nflag && nvflag &&
726 (npatterns == 1) && (strlen(patterns->pattern) != 0);
727 }
728
729 /*
730 * Search a newline from the beginning of the string
731 */
732 static char *
find_nl(const char * ptr,size_t len)733 find_nl(const char *ptr, size_t len)
734 {
735 while (len-- != 0) {
736 if (*ptr++ == '\n') {
737 return ((char *)--ptr);
738 }
739 }
740 return (NULL);
741 }
742
743 /*
744 * Search a newline from the end of the string
745 */
746 static char *
rfind_nl(const char * ptr,size_t len)747 rfind_nl(const char *ptr, size_t len)
748 {
749 const char *uptr = ptr + len;
750 while (len--) {
751 if (*--uptr == '\n') {
752 return ((char *)uptr);
753 }
754 }
755 return (NULL);
756 }
757
758 /*
759 * Duplicate the specified string converting each character
760 * into a lower case.
761 */
762 static char *
istrdup(const char * s1)763 istrdup(const char *s1)
764 {
765 static size_t ibuflen = 0;
766 static char *ibuf = NULL;
767 size_t slen;
768 char *p;
769
770 slen = strlen(s1);
771 if (slen >= ibuflen) {
772 /* ibuf does not fit to s1 */
773 ibuflen = slen + 1;
774 ibuf = realloc(ibuf, ibuflen);
775 if (ibuf == NULL) {
776 (void) fprintf(stderr,
777 gettext("%s: out of memory\n"), cmdname);
778 exit(2);
779 }
780 }
781 p = ibuf;
782 do {
783 *p++ = tolower(*s1);
784 } while (*s1++ != '\0');
785 return (ibuf);
786 }
787
788 /*
789 * Do grep on a single file.
790 * Return true in any lines matched.
791 *
792 * We have two strategies:
793 * The fast one is used when we have a single pattern with
794 * a string known to occur in the pattern. We can then
795 * do a BMG match on the whole buffer.
796 * This is an order of magnitude faster.
797 * Otherwise we split the buffer into lines,
798 * and check for a match on each line.
799 */
800 static int
grep(int fd,const char * fn)801 grep(int fd, const char *fn)
802 {
803 PATTERN *pp;
804 off_t data_len; /* length of the data chunk */
805 off_t line_len; /* length of the current line */
806 off_t line_offset; /* current line's offset from the beginning */
807 long long lineno;
808 long long matches = 0; /* Number of matching lines */
809 int newlinep; /* 0 if the last line of file has no newline */
810 char *ptr, *ptrend;
811
812
813 if (patterns == NULL)
814 return (0); /* no patterns to match -- just return */
815
816 pp = patterns;
817
818 if (use_bmg) {
819 bmgcomp(pp->pattern, strlen(pp->pattern));
820 }
821
822 if (use_wchar && outline == NULL) {
823 outbuflen = BUFSIZE + 1;
824 outline = malloc(sizeof (wchar_t) * outbuflen);
825 if (outline == NULL) {
826 (void) fprintf(stderr, gettext("%s: out of memory\n"),
827 cmdname);
828 exit(2);
829 }
830 }
831
832 if (prntbuf == NULL) {
833 prntbuflen = BUFSIZE;
834 if ((prntbuf = malloc(prntbuflen + 1)) == NULL) {
835 (void) fprintf(stderr, gettext("%s: out of memory\n"),
836 cmdname);
837 exit(2);
838 }
839 }
840
841 line_offset = 0;
842 lineno = 0;
843 newlinep = 1;
844 data_len = 0;
845 for (; ; ) {
846 long count;
847 off_t offset = 0;
848
849 if (data_len == 0) {
850 /*
851 * If no data in the buffer, reset ptr
852 */
853 ptr = prntbuf;
854 }
855 if (ptr == prntbuf) {
856 /*
857 * The current data chunk starts from prntbuf.
858 * This means either the buffer has no data
859 * or the buffer has no newline.
860 * So, read more data from input.
861 */
862 count = read(fd, ptr + data_len, prntbuflen - data_len);
863 if (count < 0) {
864 /* read error */
865 if (cflag) {
866 if (outfn && !rflag) {
867 (void) fprintf(stdout,
868 "%s:", fn);
869 }
870 if (!qflag && !rflag) {
871 (void) fprintf(stdout, "%lld\n",
872 matches);
873 }
874 }
875 return (0);
876 } else if (count == 0) {
877 /* no new data */
878 if (data_len == 0) {
879 /* end of file already reached */
880 break;
881 }
882 /* last line of file has no newline */
883 ptrend = ptr + data_len;
884 newlinep = 0;
885 goto L_start_process;
886 }
887 offset = data_len;
888 data_len += count;
889 }
890
891 /*
892 * Look for newline in the chunk
893 * between ptr + offset and ptr + data_len - offset.
894 */
895 ptrend = find_nl(ptr + offset, data_len - offset);
896 if (ptrend == NULL) {
897 /* no newline found in this chunk */
898 if (ptr > prntbuf) {
899 /*
900 * Move remaining data to the beginning
901 * of the buffer.
902 * Remaining data lie from ptr for
903 * data_len bytes.
904 */
905 (void) memmove(prntbuf, ptr, data_len);
906 }
907 if (data_len == prntbuflen) {
908 /*
909 * No enough room in the buffer
910 */
911 prntbuflen += BUFSIZE;
912 prntbuf = realloc(prntbuf, prntbuflen + 1);
913 if (prntbuf == NULL) {
914 (void) fprintf(stderr,
915 gettext("%s: out of memory\n"),
916 cmdname);
917 exit(2);
918 }
919 }
920 ptr = prntbuf;
921 /* read the next input */
922 continue;
923 }
924 L_start_process:
925
926 /*
927 * Beginning of the chunk: ptr
928 * End of the chunk: ptr + data_len
929 * Beginning of the line: ptr
930 * End of the line: ptrend
931 */
932
933 if (use_bmg) {
934 /*
935 * Use Boyer-Moore-Gosper algorithm to find out if
936 * this chunk (not this line) contains the specified
937 * pattern. If not, restart from the last line
938 * of this chunk.
939 */
940 char *bline;
941 bline = bmgexec(ptr, ptr + data_len);
942 if (bline == NULL) {
943 /*
944 * No pattern found in this chunk.
945 * Need to find the last line
946 * in this chunk.
947 */
948 ptrend = rfind_nl(ptr, data_len);
949
950 /*
951 * When this chunk does not contain newline,
952 * ptrend becomes NULL, which should happen
953 * when the last line of file does not end
954 * with a newline. At such a point,
955 * newlinep should have been set to 0.
956 * Therefore, just after jumping to
957 * L_skip_line, the main for-loop quits,
958 * and the line_len value won't be
959 * used.
960 */
961 line_len = ptrend - ptr;
962 goto L_skip_line;
963 }
964 if (bline > ptrend) {
965 /*
966 * Pattern found not in the first line
967 * of this chunk.
968 * Discard the first line.
969 */
970 line_len = ptrend - ptr;
971 goto L_skip_line;
972 }
973 /*
974 * Pattern found in the first line of this chunk.
975 * Using this result.
976 */
977 *ptrend = '\0';
978 line_len = ptrend - ptr;
979
980 /*
981 * before jumping to L_next_line,
982 * need to handle xflag if specified
983 */
984 if (xflag && (line_len != bmglen ||
985 strcmp(bmgpat, ptr) != 0)) {
986 /* didn't match */
987 pp = NULL;
988 } else {
989 pp = patterns; /* to make it happen */
990 }
991 goto L_next_line;
992 }
993 lineno++;
994 /*
995 * Line starts from ptr and ends at ptrend.
996 * line_len will be the length of the line.
997 */
998 *ptrend = '\0';
999 line_len = ptrend - ptr;
1000
1001 /*
1002 * From now, the process will be performed based
1003 * on the line from ptr to ptrend.
1004 */
1005 if (use_wchar) {
1006 size_t len;
1007
1008 if (line_len >= outbuflen) {
1009 outbuflen = line_len + 1;
1010 outline = realloc(outline,
1011 sizeof (wchar_t) * outbuflen);
1012 if (outline == NULL) {
1013 (void) fprintf(stderr,
1014 gettext("%s: out of memory\n"),
1015 cmdname);
1016 exit(2);
1017 }
1018 }
1019
1020 len = mbstowcs(outline, ptr, line_len);
1021 if (len == (size_t)-1) {
1022 (void) fprintf(stderr, gettext(
1023 "%s: input file \"%s\": line %lld: invalid multibyte character\n"),
1024 cmdname, fn, lineno);
1025 /* never match a line with invalid sequence */
1026 goto L_skip_line;
1027 }
1028 outline[len] = L'\0';
1029
1030 if (iflag) {
1031 wchar_t *cp;
1032 for (cp = outline; *cp != '\0'; cp++) {
1033 *cp = towlower((wint_t)*cp);
1034 }
1035 }
1036
1037 if (xflag) {
1038 for (pp = patterns; pp; pp = pp->next) {
1039 if (outline[0] == pp->wpattern[0] &&
1040 wcscmp(outline,
1041 pp->wpattern) == 0) {
1042 /* matched */
1043 break;
1044 }
1045 }
1046 } else {
1047 for (pp = patterns; pp; pp = pp->next) {
1048 if (wcswcs(outline, pp->wpattern)
1049 != NULL) {
1050 /* matched */
1051 break;
1052 }
1053 }
1054 }
1055 } else if (Fflag) {
1056 /* fgrep in byte-oriented handling */
1057 char *fptr;
1058 if (iflag) {
1059 fptr = istrdup(ptr);
1060 } else {
1061 fptr = ptr;
1062 }
1063 if (xflag) {
1064 /* fgrep -x */
1065 for (pp = patterns; pp; pp = pp->next) {
1066 if (fptr[0] == pp->pattern[0] &&
1067 strcmp(fptr, pp->pattern) == 0) {
1068 /* matched */
1069 break;
1070 }
1071 }
1072 } else {
1073 for (pp = patterns; pp; pp = pp->next) {
1074 if (strstr(fptr, pp->pattern) != NULL) {
1075 /* matched */
1076 break;
1077 }
1078 }
1079 }
1080 } else {
1081 /* grep or egrep */
1082 for (pp = patterns; pp; pp = pp->next) {
1083 int rv;
1084
1085 rv = regexec(&pp->re, ptr, 0, NULL, 0);
1086 if (rv == REG_OK) {
1087 /* matched */
1088 break;
1089 }
1090
1091 switch (rv) {
1092 case REG_NOMATCH:
1093 break;
1094 case REG_ECHAR:
1095 (void) fprintf(stderr, gettext(
1096 "%s: input file \"%s\": line %lld: invalid multibyte character\n"),
1097 cmdname, fn, lineno);
1098 break;
1099 default:
1100 (void) regerror(rv, &pp->re, errstr,
1101 sizeof (errstr));
1102 (void) fprintf(stderr, gettext(
1103 "%s: input file \"%s\": line %lld: %s\n"),
1104 cmdname, fn, lineno, errstr);
1105 exit(2);
1106 }
1107 }
1108 }
1109
1110 L_next_line:
1111 /*
1112 * Here, if pp points to non-NULL, something has been matched
1113 * to the pattern.
1114 */
1115 if (nvflag == (pp != NULL)) {
1116 matches++;
1117 /*
1118 * Handle q, l, and c flags.
1119 */
1120 if (qflag) {
1121 /* no need to continue */
1122 /*
1123 * End of this line is ptrend.
1124 * We have read up to ptr + data_len.
1125 */
1126 off_t pos;
1127 pos = ptr + data_len - (ptrend + 1);
1128 (void) lseek(fd, -pos, SEEK_CUR);
1129 exit(0);
1130 }
1131 if (lflag) {
1132 (void) printf("%s\n", fn);
1133 break;
1134 }
1135 if (!cflag) {
1136 if (Hflag || outfn) {
1137 (void) printf("%s:", fn);
1138 }
1139 if (bflag) {
1140 (void) printf("%lld:", (offset_t)
1141 (line_offset / BSIZE));
1142 }
1143 if (nflag) {
1144 (void) printf("%lld:", lineno);
1145 }
1146 *ptrend = '\n';
1147 (void) fwrite(ptr, 1, line_len + 1, stdout);
1148 }
1149 if (ferror(stdout)) {
1150 return (0);
1151 }
1152 }
1153 L_skip_line:
1154 if (!newlinep)
1155 break;
1156
1157 data_len -= line_len + 1;
1158 line_offset += line_len + 1;
1159 ptr = ptrend + 1;
1160 }
1161
1162 if (cflag) {
1163 if (Hflag || outfn) {
1164 (void) printf("%s:", fn);
1165 }
1166 if (!qflag) {
1167 (void) printf("%lld\n", matches);
1168 }
1169 }
1170 return (matches != 0);
1171 }
1172
1173 /*
1174 * usage message for grep
1175 */
1176 static void
usage(void)1177 usage(void)
1178 {
1179 if (egrep || fgrep) {
1180 (void) fprintf(stderr, gettext("Usage:\t%s"), cmdname);
1181 (void) fprintf(stderr,
1182 gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1183 "pattern_list [file ...]\n"));
1184
1185 (void) fprintf(stderr, "\t%s", cmdname);
1186 (void) fprintf(stderr,
1187 gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1188 "[-e pattern_list]... "
1189 "[-f pattern_file]... [file...]\n"));
1190 } else {
1191 (void) fprintf(stderr, gettext("Usage:\t%s"), cmdname);
1192 (void) fprintf(stderr,
1193 gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvwx] "
1194 "pattern_list [file ...]\n"));
1195
1196 (void) fprintf(stderr, "\t%s", cmdname);
1197 (void) fprintf(stderr,
1198 gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvwx] "
1199 "[-e pattern_list]... "
1200 "[-f pattern_file]... [file...]\n"));
1201
1202 (void) fprintf(stderr, "\t%s", cmdname);
1203 (void) fprintf(stderr,
1204 gettext(" -E [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1205 "pattern_list [file ...]\n"));
1206
1207 (void) fprintf(stderr, "\t%s", cmdname);
1208 (void) fprintf(stderr,
1209 gettext(" -E [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1210 "[-e pattern_list]... "
1211 "[-f pattern_file]... [file...]\n"));
1212
1213 (void) fprintf(stderr, "\t%s", cmdname);
1214 (void) fprintf(stderr,
1215 gettext(" -F [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1216 "pattern_list [file ...]\n"));
1217
1218 (void) fprintf(stderr, "\t%s", cmdname);
1219 (void) fprintf(stderr,
1220 gettext(" -F [-c|-l|-q] [-bhHinsvx] [-e pattern_list]... "
1221 "[-f pattern_file]... [file...]\n"));
1222 }
1223 exit(2);
1224 /* NOTREACHED */
1225 }
1226
1227 /*
1228 * Compile literal pattern into BMG tables
1229 */
1230 static void
bmgcomp(char * pat,int len)1231 bmgcomp(char *pat, int len)
1232 {
1233 int i;
1234 int tlen;
1235 unsigned char *uc = (unsigned char *)pat;
1236
1237 bmglen = len;
1238 bmgpat = pat;
1239
1240 for (i = 0; i < M_CSETSIZE; i++) {
1241 bmgtab[i] = len;
1242 }
1243
1244 len--;
1245 for (tlen = len, i = 0; i <= len; i++, tlen--) {
1246 bmgtab[*uc++] = tlen;
1247 }
1248 }
1249
1250 /*
1251 * BMG search.
1252 */
1253 static char *
bmgexec(char * str,char * end)1254 bmgexec(char *str, char *end)
1255 {
1256 int t;
1257 char *k, *s, *p;
1258
1259 k = str + bmglen - 1;
1260 if (bmglen == 1) {
1261 return (memchr(str, bmgpat[0], end - str));
1262 }
1263 for (; ; ) {
1264 /* inner loop, should be most optimized */
1265 while (k < end && (t = bmgtab[(unsigned char)*k]) != 0) {
1266 k += t;
1267 }
1268 if (k >= end) {
1269 return (NULL);
1270 }
1271 for (s = k, p = bmgpat + bmglen - 1; *--s == *--p; ) {
1272 if (p == bmgpat) {
1273 return (s);
1274 }
1275 }
1276 k++;
1277 }
1278 /* NOTREACHED */
1279 }
1280