xref: /titanic_41/usr/src/cmd/grep_xpg4/grep.c (revision b59b3655028aeb11832ad679eb5c2577c2b75235)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * grep - pattern matching program - combined grep, egrep, and fgrep.
29  *	Based on MKS grep command, with XCU & Solaris mods.
30  */
31 
32 /*
33  * Copyright 1985, 1992 by Mortice Kern Systems Inc.  All rights reserved.
34  *
35  */
36 
37 /* Copyright 2012 Nexenta Systems, Inc.  All rights reserved. */
38 
39 /*
40  * Copyright 2013 Damian Bogel. All rights reserved.
41  */
42 
43 #include <string.h>
44 #include <stdlib.h>
45 #include <ctype.h>
46 #include <stdarg.h>
47 #include <regex.h>
48 #include <limits.h>
49 #include <sys/types.h>
50 #include <sys/stat.h>
51 #include <fcntl.h>
52 #include <stdio.h>
53 #include <locale.h>
54 #include <wchar.h>
55 #include <errno.h>
56 #include <unistd.h>
57 #include <wctype.h>
58 #include <ftw.h>
59 #include <sys/param.h>
60 
61 #define	STDIN_FILENAME gettext("(standard input)")
62 
63 #define	BSIZE		512		/* Size of block for -b */
64 #define	BUFSIZE		8192		/* Input buffer size */
65 #define	MAX_DEPTH	1000		/* how deep to recurse */
66 
67 #define	M_CSETSIZE	256		/* singlebyte chars */
68 static int	bmglen;			/* length of BMG pattern */
69 static char	*bmgpat;		/* BMG pattern */
70 static int	bmgtab[M_CSETSIZE];	/* BMG delta1 table */
71 
72 typedef	struct	_PATTERN	{
73 	char	*pattern;		/* original pattern */
74 	wchar_t	*wpattern;		/* wide, lowercased pattern */
75 	struct	_PATTERN	*next;
76 	regex_t	re;			/* compiled pattern */
77 } PATTERN;
78 
79 static PATTERN	*patterns;
80 static char	errstr[128];		/* regerror string buffer */
81 static int	regflags = 0;		/* regcomp options */
82 static int	matched = 0;		/* return of the grep() */
83 static int	errors = 0;		/* count of errors */
84 static uchar_t	fgrep = 0;		/* Invoked as fgrep */
85 static uchar_t	egrep = 0;		/* Invoked as egrep */
86 static uchar_t	nvflag = 1;		/* Print matching lines */
87 static uchar_t	cflag;			/* Count of matches */
88 static uchar_t	iflag;			/* Case insensitve matching */
89 static uchar_t	Hflag;			/* Precede lines by file name */
90 static uchar_t	hflag;			/* Supress printing of filename */
91 static uchar_t	lflag;			/* Print file names of matches */
92 static uchar_t	nflag;			/* Precede lines by line number */
93 static uchar_t	rflag;			/* Search directories recursively */
94 static uchar_t	bflag;			/* Preccede matches by block number */
95 static uchar_t	sflag;			/* Suppress file error messages */
96 static uchar_t	qflag;			/* Suppress standard output */
97 static uchar_t	wflag;			/* Search for expression as a word */
98 static uchar_t	xflag;			/* Anchoring */
99 static uchar_t	Eflag;			/* Egrep or -E flag */
100 static uchar_t	Fflag;			/* Fgrep or -F flag */
101 static uchar_t	Rflag;			/* Like rflag, but follow symlinks */
102 static uchar_t	outfn;			/* Put out file name */
103 static char	*cmdname;
104 
105 static int	use_wchar, use_bmg, mblocale;
106 
107 static size_t	outbuflen, prntbuflen;
108 static char	*prntbuf;
109 static wchar_t	*outline;
110 
111 static void	addfile(const char *fn);
112 static void	addpattern(char *s);
113 static void	fixpatterns(void);
114 static void	usage(void);
115 static int	grep(int, const char *);
116 static void	bmgcomp(char *, int);
117 static char	*bmgexec(char *, char *);
118 static int	recursive(const char *, const struct stat *, int, struct FTW *);
119 static void	process_path(const char *);
120 static void	process_file(const char *, int);
121 
122 /*
123  * mainline for grep
124  */
125 int
126 main(int argc, char **argv)
127 {
128 	char	*ap;
129 	int	c;
130 	int	fflag = 0;
131 	int	i, n_pattern = 0, n_file = 0;
132 	char	**pattern_list = NULL;
133 	char	**file_list = NULL;
134 
135 	(void) setlocale(LC_ALL, "");
136 #if !defined(TEXT_DOMAIN)	/* Should be defined by cc -D */
137 #define	TEXT_DOMAIN	"SYS_TEST"	/* Use this only if it weren't */
138 #endif
139 	(void) textdomain(TEXT_DOMAIN);
140 
141 	/*
142 	 * true if this is running on the multibyte locale
143 	 */
144 	mblocale = (MB_CUR_MAX > 1);
145 	/*
146 	 * Skip leading slashes
147 	 */
148 	cmdname = argv[0];
149 	if (ap = strrchr(cmdname, '/'))
150 		cmdname = ap + 1;
151 
152 	ap = cmdname;
153 	/*
154 	 * Detect egrep/fgrep via command name, map to -E and -F options.
155 	 */
156 	if (*ap == 'e' || *ap == 'E') {
157 		regflags |= REG_EXTENDED;
158 		egrep++;
159 	} else {
160 		if (*ap == 'f' || *ap == 'F') {
161 			fgrep++;
162 		}
163 	}
164 
165 	while ((c = getopt(argc, argv, "vwchHilnrbse:f:qxEFIR")) != EOF) {
166 		switch (c) {
167 		case 'v':	/* POSIX: negate matches */
168 			nvflag = 0;
169 			break;
170 
171 		case 'c':	/* POSIX: write count */
172 			cflag++;
173 			break;
174 
175 		case 'i':	/* POSIX: ignore case */
176 			iflag++;
177 			regflags |= REG_ICASE;
178 			break;
179 
180 		case 'l':	/* POSIX: Write filenames only */
181 			lflag++;
182 			break;
183 
184 		case 'n':	/* POSIX: Write line numbers */
185 			nflag++;
186 			break;
187 
188 		case 'r':	/* Solaris: search recursively */
189 			rflag++;
190 			break;
191 
192 		case 'b':	/* Solaris: Write file block numbers */
193 			bflag++;
194 			break;
195 
196 		case 's':	/* POSIX: No error msgs for files */
197 			sflag++;
198 			break;
199 
200 		case 'e':	/* POSIX: pattern list */
201 			n_pattern++;
202 			pattern_list = realloc(pattern_list,
203 			    sizeof (char *) * n_pattern);
204 			if (pattern_list == NULL) {
205 				(void) fprintf(stderr,
206 				    gettext("%s: out of memory\n"),
207 				    cmdname);
208 				exit(2);
209 			}
210 			*(pattern_list + n_pattern - 1) = optarg;
211 			break;
212 
213 		case 'f':	/* POSIX: pattern file */
214 			fflag = 1;
215 			n_file++;
216 			file_list = realloc(file_list,
217 			    sizeof (char *) * n_file);
218 			if (file_list == NULL) {
219 				(void) fprintf(stderr,
220 				    gettext("%s: out of memory\n"),
221 				    cmdname);
222 				exit(2);
223 			}
224 			*(file_list + n_file - 1) = optarg;
225 			break;
226 
227 		/* based on options order h or H is set as in GNU grep */
228 		case 'h':	/* Solaris: supress printing of file name */
229 			hflag = 1;
230 			Hflag = 0;
231 			break;
232 		/* Solaris: precede every matching with file name */
233 		case 'H':
234 			Hflag = 1;
235 			hflag = 0;
236 			break;
237 
238 		case 'q':	/* POSIX: quiet: status only */
239 			qflag++;
240 			break;
241 
242 		case 'w':	/* Solaris: treat pattern as word */
243 			wflag++;
244 			break;
245 
246 		case 'x':	/* POSIX: full line matches */
247 			xflag++;
248 			regflags |= REG_ANCHOR;
249 			break;
250 
251 		case 'E':	/* POSIX: Extended RE's */
252 			regflags |= REG_EXTENDED;
253 			Eflag++;
254 			break;
255 
256 		case 'F':	/* POSIX: strings, not RE's */
257 			Fflag++;
258 			break;
259 
260 		case 'R':	/* Solaris: like rflag, but follow symlinks */
261 			Rflag++;
262 			rflag++;
263 			break;
264 
265 		default:
266 			usage();
267 		}
268 	}
269 	/*
270 	 * If we're invoked as egrep or fgrep we need to do some checks
271 	 */
272 
273 	if (egrep || fgrep) {
274 		/*
275 		 * Use of -E or -F with egrep or fgrep is illegal
276 		 */
277 		if (Eflag || Fflag)
278 			usage();
279 		/*
280 		 * Don't allow use of wflag with egrep / fgrep
281 		 */
282 		if (wflag)
283 			usage();
284 		/*
285 		 * For Solaris the -s flag is equivalent to XCU -q
286 		 */
287 		if (sflag)
288 			qflag++;
289 		/*
290 		 * done with above checks - set the appropriate flags
291 		 */
292 		if (egrep)
293 			Eflag++;
294 		else			/* Else fgrep */
295 			Fflag++;
296 	}
297 
298 	if (wflag && (Eflag || Fflag)) {
299 		/*
300 		 * -w cannot be specified with grep -F
301 		 */
302 		usage();
303 	}
304 
305 	/*
306 	 * -E and -F flags are mutually exclusive - check for this
307 	 */
308 	if (Eflag && Fflag)
309 		usage();
310 
311 	/*
312 	 * -l overrides -H like in GNU grep
313 	 */
314 	if (lflag)
315 		Hflag = 0;
316 
317 	/*
318 	 * -c, -l and -q flags are mutually exclusive
319 	 * We have -c override -l like in Solaris.
320 	 * -q overrides -l & -c programmatically in grep() function.
321 	 */
322 	if (cflag && lflag)
323 		lflag = 0;
324 
325 	argv += optind - 1;
326 	argc -= optind - 1;
327 
328 	/*
329 	 * Now handling -e and -f option
330 	 */
331 	if (pattern_list) {
332 		for (i = 0; i < n_pattern; i++) {
333 			addpattern(pattern_list[i]);
334 		}
335 		free(pattern_list);
336 	}
337 	if (file_list) {
338 		for (i = 0; i < n_file; i++) {
339 			addfile(file_list[i]);
340 		}
341 		free(file_list);
342 	}
343 
344 	/*
345 	 * No -e or -f?  Make sure there is one more arg, use it as the pattern.
346 	 */
347 	if (patterns == NULL && !fflag) {
348 		if (argc < 2)
349 			usage();
350 		addpattern(argv[1]);
351 		argc--;
352 		argv++;
353 	}
354 
355 	/*
356 	 * If -x flag is not specified or -i flag is specified
357 	 * with fgrep in a multibyte locale, need to use
358 	 * the wide character APIs.  Otherwise, byte-oriented
359 	 * process will be done.
360 	 */
361 	use_wchar = Fflag && mblocale && (!xflag || iflag);
362 
363 	/*
364 	 * Compile Patterns and also decide if BMG can be used
365 	 */
366 	fixpatterns();
367 
368 	/* Process all files: stdin, or rest of arg list */
369 	if (argc < 2) {
370 		matched = grep(0, STDIN_FILENAME);
371 	} else {
372 		if (Hflag || (argc > 2 && hflag == 0))
373 			outfn = 1;	/* Print filename on match line */
374 		for (argv++; *argv != NULL; argv++) {
375 			process_path(*argv);
376 		}
377 	}
378 	/*
379 	 * Return() here is used instead of exit
380 	 */
381 
382 	(void) fflush(stdout);
383 
384 	if (errors)
385 		return (2);
386 	return (matched ? 0 : 1);
387 }
388 
389 static void
390 process_path(const char *path)
391 {
392 	struct	stat st;
393 	int	walkflags = FTW_CHDIR;
394 	char	*buf = NULL;
395 
396 	if (rflag) {
397 		if (stat(path, &st) != -1 &&
398 		    (st.st_mode & S_IFMT) == S_IFDIR) {
399 			outfn = 1; /* Print filename */
400 
401 			/*
402 			 * Add trailing slash if arg
403 			 * is directory, to resolve symlinks.
404 			 */
405 			if (path[strlen(path) - 1] != '/') {
406 				(void) asprintf(&buf, "%s/", path);
407 				if (buf != NULL)
408 					path = buf;
409 			}
410 
411 			/*
412 			 * Search through subdirs if path is directory.
413 			 * Don't follow symlinks if Rflag is not set.
414 			 */
415 			if (!Rflag)
416 				walkflags |= FTW_PHYS;
417 
418 			if (nftw(path, recursive, MAX_DEPTH, walkflags) != 0) {
419 				if (!sflag)
420 					(void) fprintf(stderr,
421 					    gettext("%s: can't open \"%s\"\n"),
422 					    cmdname, path);
423 				errors = 1;
424 			}
425 			return;
426 		}
427 	}
428 	process_file(path, 0);
429 }
430 
431 /*
432  * Read and process all files in directory recursively.
433  */
434 static int
435 recursive(const char *name, const struct stat *statp, int info, struct FTW *ftw)
436 {
437 	/*
438 	 * Process files and follow symlinks if Rflag set.
439 	 */
440 	if (info != FTW_F) {
441 		/* Report broken symlinks and unreadable files */
442 		if (!sflag &&
443 		    (info == FTW_SLN || info == FTW_DNR || info == FTW_NS)) {
444 			(void) fprintf(stderr,
445 			    gettext("%s: can't open \"%s\"\n"), cmdname, name);
446 		}
447 		return (0);
448 	}
449 
450 
451 	/* Skip devices and pipes if Rflag is not set */
452 	if (!Rflag && !S_ISREG(statp->st_mode))
453 		return (0);
454 	/* Pass offset to relative name from FTW_CHDIR */
455 	process_file(name, ftw->base);
456 	return (0);
457 }
458 
459 /*
460  * Opens file and call grep function.
461  */
462 static void
463 process_file(const char *name, int base)
464 {
465 	int fd;
466 
467 	if ((fd = open(name + base, O_RDONLY)) == -1) {
468 		errors = 1;
469 		if (!sflag) /* Silent mode */
470 			(void) fprintf(stderr, gettext(
471 			    "%s: can't open \"%s\"\n"),
472 			    cmdname, name);
473 		return;
474 	}
475 	matched |= grep(fd, name);
476 	(void) close(fd);
477 
478 	if (ferror(stdout)) {
479 		(void) fprintf(stderr, gettext(
480 		    "%s: error writing to stdout\n"),
481 		    cmdname);
482 		(void) fflush(stdout);
483 		exit(2);
484 	}
485 
486 }
487 
488 /*
489  * Add a file of strings to the pattern list.
490  */
491 static void
492 addfile(const char *fn)
493 {
494 	FILE	*fp;
495 	char	*inbuf;
496 	char	*bufp;
497 	size_t	bufsiz, buflen, bufused;
498 
499 	/*
500 	 * Open the pattern file
501 	 */
502 	if ((fp = fopen(fn, "r")) == NULL) {
503 		(void) fprintf(stderr, gettext("%s: can't open \"%s\"\n"),
504 		    cmdname, fn);
505 		exit(2);
506 	}
507 	bufsiz = BUFSIZE;
508 	if ((inbuf = malloc(bufsiz)) == NULL) {
509 		(void) fprintf(stderr,
510 		    gettext("%s: out of memory\n"), cmdname);
511 		exit(2);
512 	}
513 	bufp = inbuf;
514 	bufused = 0;
515 	/*
516 	 * Read in the file, reallocing as we need more memory
517 	 */
518 	while (fgets(bufp, bufsiz - bufused, fp) != NULL) {
519 		buflen = strlen(bufp);
520 		bufused += buflen;
521 		if (bufused + 1 == bufsiz && bufp[buflen - 1] != '\n') {
522 			/*
523 			 * if this line does not fit to the buffer,
524 			 * realloc larger buffer
525 			 */
526 			bufsiz += BUFSIZE;
527 			if ((inbuf = realloc(inbuf, bufsiz)) == NULL) {
528 				(void) fprintf(stderr,
529 				    gettext("%s: out of memory\n"),
530 				    cmdname);
531 				exit(2);
532 			}
533 			bufp = inbuf + bufused;
534 			continue;
535 		}
536 		if (bufp[buflen - 1] == '\n') {
537 			bufp[--buflen] = '\0';
538 		}
539 		addpattern(inbuf);
540 
541 		bufp = inbuf;
542 		bufused = 0;
543 	}
544 	free(inbuf);
545 	(void) fclose(fp);
546 }
547 
548 /*
549  * Add a string to the pattern list.
550  */
551 static void
552 addpattern(char *s)
553 {
554 	PATTERN	*pp;
555 	char	*wordbuf;
556 	char	*np;
557 
558 	for (; ; ) {
559 		np = strchr(s, '\n');
560 		if (np != NULL)
561 			*np = '\0';
562 		if ((pp = malloc(sizeof (PATTERN))) == NULL) {
563 			(void) fprintf(stderr, gettext(
564 			    "%s: out of memory\n"),
565 			    cmdname);
566 			exit(2);
567 		}
568 		if (wflag) {
569 			/*
570 			 * Solaris wflag support: Add '<' '>' to pattern to
571 			 * select it as a word. Doesn't make sense with -F
572 			 * but we're Libertarian.
573 			 */
574 			size_t	slen, wordlen;
575 
576 			slen = strlen(s);
577 			wordlen = slen + 5; /* '\\' '<' s '\\' '>' '\0' */
578 			if ((wordbuf = malloc(wordlen)) == NULL) {
579 				(void) fprintf(stderr,
580 				    gettext("%s: out of memory\n"),
581 				    cmdname);
582 				exit(2);
583 			}
584 			(void) strcpy(wordbuf, "\\<");
585 			(void) strcpy(wordbuf + 2, s);
586 			(void) strcpy(wordbuf + 2 + slen, "\\>");
587 		} else {
588 			if ((wordbuf = strdup(s)) == NULL) {
589 				(void) fprintf(stderr,
590 				    gettext("%s: out of memory\n"),
591 				    cmdname);
592 				exit(2);
593 			}
594 		}
595 		pp->pattern = wordbuf;
596 		pp->next = patterns;
597 		patterns = pp;
598 		if (np == NULL)
599 			break;
600 		s = np + 1;
601 	}
602 }
603 
604 /*
605  * Fix patterns.
606  * Must do after all arguments read, in case later -i option.
607  */
608 static void
609 fixpatterns(void)
610 {
611 	PATTERN	*pp;
612 	int	rv, fix_pattern, npatterns;
613 
614 	/*
615 	 * As REG_ANCHOR flag is not supported in the current Solaris,
616 	 * need to fix the specified pattern if -x is specified with
617 	 * grep or egrep
618 	 */
619 	fix_pattern = !Fflag && xflag;
620 
621 	for (npatterns = 0, pp = patterns; pp != NULL; pp = pp->next) {
622 		npatterns++;
623 		if (fix_pattern) {
624 			char	*cp, *cq;
625 			size_t	plen, nplen;
626 
627 			plen = strlen(pp->pattern);
628 			/* '^' pattern '$' */
629 			nplen = 1 + plen + 1 + 1;
630 			if ((cp = malloc(nplen)) == NULL) {
631 				(void) fprintf(stderr,
632 				    gettext("%s: out of memory\n"),
633 				    cmdname);
634 				exit(2);
635 			}
636 			cq = cp;
637 			*cq++ = '^';
638 			cq = strcpy(cq, pp->pattern) + plen;
639 			*cq++ = '$';
640 			*cq = '\0';
641 			free(pp->pattern);
642 			pp->pattern = cp;
643 		}
644 
645 		if (Fflag) {
646 			if (use_wchar) {
647 				/*
648 				 * Fflag && mblocale && iflag
649 				 * Fflag && mblocale && !xflag
650 				 */
651 				size_t	n;
652 				n = strlen(pp->pattern) + 1;
653 				if ((pp->wpattern =
654 				    malloc(sizeof (wchar_t) * n)) == NULL) {
655 					(void) fprintf(stderr,
656 					    gettext("%s: out of memory\n"),
657 					    cmdname);
658 					exit(2);
659 				}
660 				if (mbstowcs(pp->wpattern, pp->pattern, n) ==
661 				    (size_t)-1) {
662 					(void) fprintf(stderr,
663 					    gettext("%s: failed to convert "
664 					    "\"%s\" to wide-characters\n"),
665 					    cmdname, pp->pattern);
666 					exit(2);
667 				}
668 				if (iflag) {
669 					wchar_t	*wp;
670 					for (wp = pp->wpattern; *wp != L'\0';
671 					    wp++) {
672 						*wp = towlower((wint_t)*wp);
673 					}
674 				}
675 				free(pp->pattern);
676 			} else {
677 				/*
678 				 * Fflag && mblocale && !iflag
679 				 * Fflag && !mblocale && iflag
680 				 * Fflag && !mblocale && !iflag
681 				 */
682 				if (iflag) {
683 					unsigned char	*cp;
684 					for (cp = (unsigned char *)pp->pattern;
685 					    *cp != '\0'; cp++) {
686 						*cp = tolower(*cp);
687 					}
688 				}
689 			}
690 			/*
691 			 * fgrep: No regular expressions.
692 			 */
693 			continue;
694 		}
695 
696 		/*
697 		 * For non-fgrep, compile the regular expression,
698 		 * give an informative error message, and exit if
699 		 * it didn't compile.
700 		 */
701 		if ((rv = regcomp(&pp->re, pp->pattern, regflags)) != 0) {
702 			(void) regerror(rv, &pp->re, errstr, sizeof (errstr));
703 			(void) fprintf(stderr,
704 			    gettext("%s: RE error in %s: %s\n"),
705 			    cmdname, pp->pattern, errstr);
706 			exit(2);
707 		}
708 		free(pp->pattern);
709 	}
710 
711 	/*
712 	 * Decide if we are able to run the Boyer-Moore-Gosper algorithm.
713 	 * Use the Boyer-Moore-Gosper algorithm if:
714 	 * - fgrep			(Fflag)
715 	 * - singlebyte locale		(!mblocale)
716 	 * - no ignoring case		(!iflag)
717 	 * - no printing line numbers	(!nflag)
718 	 * - no negating the output	(nvflag)
719 	 * - only one pattern		(npatterns == 1)
720 	 * - non zero length pattern	(strlen(patterns->pattern) != 0)
721 	 *
722 	 * It's guaranteed patterns->pattern is still alive
723 	 * when Fflag && !mblocale.
724 	 */
725 	use_bmg = Fflag && !mblocale && !iflag && !nflag && nvflag &&
726 	    (npatterns == 1) && (strlen(patterns->pattern) != 0);
727 }
728 
729 /*
730  * Search a newline from the beginning of the string
731  */
732 static char *
733 find_nl(const char *ptr, size_t len)
734 {
735 	while (len-- != 0) {
736 		if (*ptr++ == '\n') {
737 			return ((char *)--ptr);
738 		}
739 	}
740 	return (NULL);
741 }
742 
743 /*
744  * Search a newline from the end of the string
745  */
746 static char *
747 rfind_nl(const char *ptr, size_t len)
748 {
749 	const char	*uptr = ptr + len;
750 	while (len--) {
751 		if (*--uptr == '\n') {
752 			return ((char *)uptr);
753 		}
754 	}
755 	return (NULL);
756 }
757 
758 /*
759  * Duplicate the specified string converting each character
760  * into a lower case.
761  */
762 static char *
763 istrdup(const char *s1)
764 {
765 	static size_t	ibuflen = 0;
766 	static char	*ibuf = NULL;
767 	size_t	slen;
768 	char	*p;
769 
770 	slen = strlen(s1);
771 	if (slen >= ibuflen) {
772 		/* ibuf does not fit to s1 */
773 		ibuflen = slen + 1;
774 		ibuf = realloc(ibuf, ibuflen);
775 		if (ibuf == NULL) {
776 			(void) fprintf(stderr,
777 			    gettext("%s: out of memory\n"), cmdname);
778 			exit(2);
779 		}
780 	}
781 	p = ibuf;
782 	do {
783 		*p++ = tolower(*s1);
784 	} while (*s1++ != '\0');
785 	return (ibuf);
786 }
787 
788 /*
789  * Do grep on a single file.
790  * Return true in any lines matched.
791  *
792  * We have two strategies:
793  * The fast one is used when we have a single pattern with
794  * a string known to occur in the pattern. We can then
795  * do a BMG match on the whole buffer.
796  * This is an order of magnitude faster.
797  * Otherwise we split the buffer into lines,
798  * and check for a match on each line.
799  */
800 static int
801 grep(int fd, const char *fn)
802 {
803 	PATTERN *pp;
804 	off_t	data_len;	/* length of the data chunk */
805 	off_t	line_len;	/* length of the current line */
806 	off_t	line_offset;	/* current line's offset from the beginning */
807 	long long	lineno;
808 	long long	matches = 0;	/* Number of matching lines */
809 	int	newlinep;	/* 0 if the last line of file has no newline */
810 	char	*ptr, *ptrend;
811 
812 
813 	if (patterns == NULL)
814 		return (0);	/* no patterns to match -- just return */
815 
816 	pp = patterns;
817 
818 	if (use_bmg) {
819 		bmgcomp(pp->pattern, strlen(pp->pattern));
820 	}
821 
822 	if (use_wchar && outline == NULL) {
823 		outbuflen = BUFSIZE + 1;
824 		outline = malloc(sizeof (wchar_t) * outbuflen);
825 		if (outline == NULL) {
826 			(void) fprintf(stderr, gettext("%s: out of memory\n"),
827 			    cmdname);
828 			exit(2);
829 		}
830 	}
831 
832 	if (prntbuf == NULL) {
833 		prntbuflen = BUFSIZE;
834 		if ((prntbuf = malloc(prntbuflen + 1)) == NULL) {
835 			(void) fprintf(stderr, gettext("%s: out of memory\n"),
836 			    cmdname);
837 			exit(2);
838 		}
839 	}
840 
841 	line_offset = 0;
842 	lineno = 0;
843 	newlinep = 1;
844 	data_len = 0;
845 	for (; ; ) {
846 		long	count;
847 		off_t	offset = 0;
848 
849 		if (data_len == 0) {
850 			/*
851 			 * If no data in the buffer, reset ptr
852 			 */
853 			ptr = prntbuf;
854 		}
855 		if (ptr == prntbuf) {
856 			/*
857 			 * The current data chunk starts from prntbuf.
858 			 * This means either the buffer has no data
859 			 * or the buffer has no newline.
860 			 * So, read more data from input.
861 			 */
862 			count = read(fd, ptr + data_len, prntbuflen - data_len);
863 			if (count < 0) {
864 				/* read error */
865 				if (cflag) {
866 					if (outfn && !rflag) {
867 						(void) fprintf(stdout,
868 						    "%s:", fn);
869 					}
870 					if (!qflag && !rflag) {
871 						(void) fprintf(stdout, "%lld\n",
872 						    matches);
873 					}
874 				}
875 				return (0);
876 			} else if (count == 0) {
877 				/* no new data */
878 				if (data_len == 0) {
879 					/* end of file already reached */
880 					break;
881 				}
882 				/* last line of file has no newline */
883 				ptrend = ptr + data_len;
884 				newlinep = 0;
885 				goto L_start_process;
886 			}
887 			offset = data_len;
888 			data_len += count;
889 		}
890 
891 		/*
892 		 * Look for newline in the chunk
893 		 * between ptr + offset and ptr + data_len - offset.
894 		 */
895 		ptrend = find_nl(ptr + offset, data_len - offset);
896 		if (ptrend == NULL) {
897 			/* no newline found in this chunk */
898 			if (ptr > prntbuf) {
899 				/*
900 				 * Move remaining data to the beginning
901 				 * of the buffer.
902 				 * Remaining data lie from ptr for
903 				 * data_len bytes.
904 				 */
905 				(void) memmove(prntbuf, ptr, data_len);
906 			}
907 			if (data_len == prntbuflen) {
908 				/*
909 				 * No enough room in the buffer
910 				 */
911 				prntbuflen += BUFSIZE;
912 				prntbuf = realloc(prntbuf, prntbuflen + 1);
913 				if (prntbuf == NULL) {
914 					(void) fprintf(stderr,
915 					    gettext("%s: out of memory\n"),
916 					    cmdname);
917 					exit(2);
918 				}
919 			}
920 			ptr = prntbuf;
921 			/* read the next input */
922 			continue;
923 		}
924 L_start_process:
925 
926 		/*
927 		 * Beginning of the chunk:	ptr
928 		 * End of the chunk:		ptr + data_len
929 		 * Beginning of the line:	ptr
930 		 * End of the line:		ptrend
931 		 */
932 
933 		if (use_bmg) {
934 			/*
935 			 * Use Boyer-Moore-Gosper algorithm to find out if
936 			 * this chunk (not this line) contains the specified
937 			 * pattern.  If not, restart from the last line
938 			 * of this chunk.
939 			 */
940 			char	*bline;
941 			bline = bmgexec(ptr, ptr + data_len);
942 			if (bline == NULL) {
943 				/*
944 				 * No pattern found in this chunk.
945 				 * Need to find the last line
946 				 * in this chunk.
947 				 */
948 				ptrend = rfind_nl(ptr, data_len);
949 
950 				/*
951 				 * When this chunk does not contain newline,
952 				 * ptrend becomes NULL, which should happen
953 				 * when the last line of file does not end
954 				 * with a newline.  At such a point,
955 				 * newlinep should have been set to 0.
956 				 * Therefore, just after jumping to
957 				 * L_skip_line, the main for-loop quits,
958 				 * and the line_len value won't be
959 				 * used.
960 				 */
961 				line_len = ptrend - ptr;
962 				goto L_skip_line;
963 			}
964 			if (bline > ptrend) {
965 				/*
966 				 * Pattern found not in the first line
967 				 * of this chunk.
968 				 * Discard the first line.
969 				 */
970 				line_len = ptrend - ptr;
971 				goto L_skip_line;
972 			}
973 			/*
974 			 * Pattern found in the first line of this chunk.
975 			 * Using this result.
976 			 */
977 			*ptrend = '\0';
978 			line_len = ptrend - ptr;
979 
980 			/*
981 			 * before jumping to L_next_line,
982 			 * need to handle xflag if specified
983 			 */
984 			if (xflag && (line_len != bmglen ||
985 			    strcmp(bmgpat, ptr) != 0)) {
986 				/* didn't match */
987 				pp = NULL;
988 			} else {
989 				pp = patterns; /* to make it happen */
990 			}
991 			goto L_next_line;
992 		}
993 		lineno++;
994 		/*
995 		 * Line starts from ptr and ends at ptrend.
996 		 * line_len will be the length of the line.
997 		 */
998 		*ptrend = '\0';
999 		line_len = ptrend - ptr;
1000 
1001 		/*
1002 		 * From now, the process will be performed based
1003 		 * on the line from ptr to ptrend.
1004 		 */
1005 		if (use_wchar) {
1006 			size_t	len;
1007 
1008 			if (line_len >= outbuflen) {
1009 				outbuflen = line_len + 1;
1010 				outline = realloc(outline,
1011 				    sizeof (wchar_t) * outbuflen);
1012 				if (outline == NULL) {
1013 					(void) fprintf(stderr,
1014 					    gettext("%s: out of memory\n"),
1015 					    cmdname);
1016 					exit(2);
1017 				}
1018 			}
1019 
1020 			len = mbstowcs(outline, ptr, line_len);
1021 			if (len == (size_t)-1) {
1022 				(void) fprintf(stderr, gettext(
1023 	"%s: input file \"%s\": line %lld: invalid multibyte character\n"),
1024 				    cmdname, fn, lineno);
1025 				/* never match a line with invalid sequence */
1026 				goto L_skip_line;
1027 			}
1028 			outline[len] = L'\0';
1029 
1030 			if (iflag) {
1031 				wchar_t	*cp;
1032 				for (cp = outline; *cp != '\0'; cp++) {
1033 					*cp = towlower((wint_t)*cp);
1034 				}
1035 			}
1036 
1037 			if (xflag) {
1038 				for (pp = patterns; pp; pp = pp->next) {
1039 					if (outline[0] == pp->wpattern[0] &&
1040 					    wcscmp(outline,
1041 					    pp->wpattern) == 0) {
1042 						/* matched */
1043 						break;
1044 					}
1045 				}
1046 			} else {
1047 				for (pp = patterns; pp; pp = pp->next) {
1048 					if (wcswcs(outline, pp->wpattern)
1049 					    != NULL) {
1050 						/* matched */
1051 						break;
1052 					}
1053 				}
1054 			}
1055 		} else if (Fflag) {
1056 			/* fgrep in byte-oriented handling */
1057 			char	*fptr;
1058 			if (iflag) {
1059 				fptr = istrdup(ptr);
1060 			} else {
1061 				fptr = ptr;
1062 			}
1063 			if (xflag) {
1064 				/* fgrep -x */
1065 				for (pp = patterns; pp; pp = pp->next) {
1066 					if (fptr[0] == pp->pattern[0] &&
1067 					    strcmp(fptr, pp->pattern) == 0) {
1068 						/* matched */
1069 						break;
1070 					}
1071 				}
1072 			} else {
1073 				for (pp = patterns; pp; pp = pp->next) {
1074 					if (strstr(fptr, pp->pattern) != NULL) {
1075 						/* matched */
1076 						break;
1077 					}
1078 				}
1079 			}
1080 		} else {
1081 			/* grep or egrep */
1082 			for (pp = patterns; pp; pp = pp->next) {
1083 				int	rv;
1084 
1085 				rv = regexec(&pp->re, ptr, 0, NULL, 0);
1086 				if (rv == REG_OK) {
1087 					/* matched */
1088 					break;
1089 				}
1090 
1091 				switch (rv) {
1092 				case REG_NOMATCH:
1093 					break;
1094 				case REG_ECHAR:
1095 					(void) fprintf(stderr, gettext(
1096 	    "%s: input file \"%s\": line %lld: invalid multibyte character\n"),
1097 					    cmdname, fn, lineno);
1098 					break;
1099 				default:
1100 					(void) regerror(rv, &pp->re, errstr,
1101 					    sizeof (errstr));
1102 					(void) fprintf(stderr, gettext(
1103 	    "%s: input file \"%s\": line %lld: %s\n"),
1104 					    cmdname, fn, lineno, errstr);
1105 					exit(2);
1106 				}
1107 			}
1108 		}
1109 
1110 L_next_line:
1111 		/*
1112 		 * Here, if pp points to non-NULL, something has been matched
1113 		 * to the pattern.
1114 		 */
1115 		if (nvflag == (pp != NULL)) {
1116 			matches++;
1117 			/*
1118 			 * Handle q, l, and c flags.
1119 			 */
1120 			if (qflag) {
1121 				/* no need to continue */
1122 				/*
1123 				 * End of this line is ptrend.
1124 				 * We have read up to ptr + data_len.
1125 				 */
1126 				off_t	pos;
1127 				pos = ptr + data_len - (ptrend + 1);
1128 				(void) lseek(fd, -pos, SEEK_CUR);
1129 				exit(0);
1130 			}
1131 			if (lflag) {
1132 				(void) printf("%s\n", fn);
1133 				break;
1134 			}
1135 			if (!cflag) {
1136 				if (Hflag || outfn) {
1137 					(void) printf("%s:", fn);
1138 				}
1139 				if (bflag) {
1140 					(void) printf("%lld:", (offset_t)
1141 					    (line_offset / BSIZE));
1142 				}
1143 				if (nflag) {
1144 					(void) printf("%lld:", lineno);
1145 				}
1146 				*ptrend = '\n';
1147 				(void) fwrite(ptr, 1, line_len + 1, stdout);
1148 			}
1149 			if (ferror(stdout)) {
1150 				return (0);
1151 			}
1152 		}
1153 L_skip_line:
1154 		if (!newlinep)
1155 			break;
1156 
1157 		data_len -= line_len + 1;
1158 		line_offset += line_len + 1;
1159 		ptr = ptrend + 1;
1160 	}
1161 
1162 	if (cflag) {
1163 		if (Hflag || outfn) {
1164 			(void) printf("%s:", fn);
1165 		}
1166 		if (!qflag) {
1167 			(void) printf("%lld\n", matches);
1168 		}
1169 	}
1170 	return (matches != 0);
1171 }
1172 
1173 /*
1174  * usage message for grep
1175  */
1176 static void
1177 usage(void)
1178 {
1179 	if (egrep || fgrep) {
1180 		(void) fprintf(stderr, gettext("Usage:\t%s"), cmdname);
1181 		(void) fprintf(stderr,
1182 		    gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1183 		    "pattern_list [file ...]\n"));
1184 
1185 		(void) fprintf(stderr, "\t%s", cmdname);
1186 		(void) fprintf(stderr,
1187 		    gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1188 		    "[-e pattern_list]... "
1189 		    "[-f pattern_file]... [file...]\n"));
1190 	} else {
1191 		(void) fprintf(stderr, gettext("Usage:\t%s"), cmdname);
1192 		(void) fprintf(stderr,
1193 		    gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvwx] "
1194 		    "pattern_list [file ...]\n"));
1195 
1196 		(void) fprintf(stderr, "\t%s", cmdname);
1197 		(void) fprintf(stderr,
1198 		    gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvwx] "
1199 		    "[-e pattern_list]... "
1200 		    "[-f pattern_file]... [file...]\n"));
1201 
1202 		(void) fprintf(stderr, "\t%s", cmdname);
1203 		(void) fprintf(stderr,
1204 		    gettext(" -E [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1205 		    "pattern_list [file ...]\n"));
1206 
1207 		(void) fprintf(stderr, "\t%s", cmdname);
1208 		(void) fprintf(stderr,
1209 		    gettext(" -E [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1210 		    "[-e pattern_list]... "
1211 		    "[-f pattern_file]... [file...]\n"));
1212 
1213 		(void) fprintf(stderr, "\t%s", cmdname);
1214 		(void) fprintf(stderr,
1215 		    gettext(" -F [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1216 		    "pattern_list [file ...]\n"));
1217 
1218 		(void) fprintf(stderr, "\t%s", cmdname);
1219 		(void) fprintf(stderr,
1220 		    gettext(" -F [-c|-l|-q] [-bhHinsvx] [-e pattern_list]... "
1221 		    "[-f pattern_file]... [file...]\n"));
1222 	}
1223 	exit(2);
1224 	/* NOTREACHED */
1225 }
1226 
1227 /*
1228  * Compile literal pattern into BMG tables
1229  */
1230 static void
1231 bmgcomp(char *pat, int len)
1232 {
1233 	int	i;
1234 	int	tlen;
1235 	unsigned char	*uc = (unsigned char *)pat;
1236 
1237 	bmglen = len;
1238 	bmgpat = pat;
1239 
1240 	for (i = 0; i < M_CSETSIZE; i++) {
1241 		bmgtab[i] = len;
1242 	}
1243 
1244 	len--;
1245 	for (tlen = len, i = 0; i <= len; i++, tlen--) {
1246 		bmgtab[*uc++] = tlen;
1247 	}
1248 }
1249 
1250 /*
1251  * BMG search.
1252  */
1253 static char *
1254 bmgexec(char *str, char *end)
1255 {
1256 	int	t;
1257 	char	*k, *s, *p;
1258 
1259 	k = str + bmglen - 1;
1260 	if (bmglen == 1) {
1261 		return (memchr(str, bmgpat[0], end - str));
1262 	}
1263 	for (; ; ) {
1264 		/* inner loop, should be most optimized */
1265 		while (k < end && (t = bmgtab[(unsigned char)*k]) != 0) {
1266 			k += t;
1267 		}
1268 		if (k >= end) {
1269 			return (NULL);
1270 		}
1271 		for (s = k, p = bmgpat + bmglen - 1; *--s == *--p; ) {
1272 			if (p == bmgpat) {
1273 				return (s);
1274 			}
1275 		}
1276 		k++;
1277 	}
1278 	/* NOTREACHED */
1279 }
1280