xref: /titanic_52/usr/src/cmd/grep_xpg4/grep.c (revision 3f7d54a6b84904c8f4d8daa4c7b577bede7df8b9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * grep - pattern matching program - combined grep, egrep, and fgrep.
31  *	Based on MKS grep command, with XCU & Solaris mods.
32  */
33 
34 /*
35  * Copyright 1985, 1992 by Mortice Kern Systems Inc.  All rights reserved.
36  *
37  */
38 
39 #include <string.h>
40 #include <stdlib.h>
41 #include <ctype.h>
42 #include <stdarg.h>
43 #include <regex.h>
44 #include <limits.h>
45 #include <sys/types.h>
46 #include <sys/stat.h>
47 #include <fcntl.h>
48 #include <stdio.h>
49 #include <locale.h>
50 #include <wchar.h>
51 #include <errno.h>
52 #include <unistd.h>
53 #include <wctype.h>
54 
55 #define	BSIZE		512		/* Size of block for -b */
56 #define	BUFSIZE		8192		/* Input buffer size */
57 
58 #define	M_CSETSIZE	256		/* singlebyte chars */
59 static int	bmglen;			/* length of BMG pattern */
60 static char	*bmgpat;		/* BMG pattern */
61 static int	bmgtab[M_CSETSIZE];	/* BMG delta1 table */
62 
63 typedef	struct	_PATTERN	{
64 	char	*pattern;		/* original pattern */
65 	wchar_t	*wpattern;		/* wide, lowercased pattern */
66 	struct	_PATTERN	*next;
67 	regex_t	re;			/* compiled pattern */
68 } PATTERN;
69 
70 static PATTERN	*patterns;
71 static char	errstr[128];		/* regerror string buffer */
72 static int	regflags = 0;		/* regcomp options */
73 static uchar_t	fgrep = 0;		/* Invoked as fgrep */
74 static uchar_t	egrep = 0;		/* Invoked as egrep */
75 static uchar_t	nvflag = 1;		/* Print matching lines */
76 static uchar_t	cflag;			/* Count of matches */
77 static uchar_t	iflag;			/* Case insensitve matching */
78 static uchar_t	hflag;			/* Supress printing of filename */
79 static uchar_t	lflag;			/* Print file names of matches */
80 static uchar_t	nflag;			/* Precede lines by line number */
81 static uchar_t	bflag;			/* Preccede matches by block number */
82 static uchar_t	sflag;			/* Suppress file error messages */
83 static uchar_t	qflag;			/* Suppress standard output */
84 static uchar_t	wflag;			/* Search for expression as a word */
85 static uchar_t	xflag;			/* Anchoring */
86 static uchar_t	Eflag;			/* Egrep or -E flag */
87 static uchar_t	Fflag;			/* Fgrep or -F flag */
88 static uchar_t	outfn;			/* Put out file name */
89 static char	*cmdname;
90 
91 static int	use_wchar, use_bmg, mblocale;
92 
93 static size_t	outbuflen, prntbuflen;
94 static char	*prntbuf;
95 static wchar_t	*outline;
96 
97 static void	addfile(char *fn);
98 static void	addpattern(char *s);
99 static void	fixpatterns(void);
100 static void	usage(void);
101 static int	grep(int, char *);
102 static void	bmgcomp(char *, int);
103 static char	*bmgexec(char *, char *);
104 
105 /*
106  * mainline for grep
107  */
108 int
109 main(int argc, char **argv)
110 {
111 	char	*ap;
112 	int	matched = 0;
113 	int	c;
114 	int	fflag = 0;
115 	int	errors = 0;
116 	int	i, n_pattern = 0, n_file = 0;
117 	char	**pattern_list = NULL;
118 	char	**file_list = NULL;
119 
120 	(void) setlocale(LC_ALL, "");
121 #if !defined(TEXT_DOMAIN)	/* Should be defined by cc -D */
122 #define	TEXT_DOMAIN	"SYS_TEST"	/* Use this only if it weren't */
123 #endif
124 	(void) textdomain(TEXT_DOMAIN);
125 
126 	/*
127 	 * true if this is running on the multibyte locale
128 	 */
129 	mblocale = (MB_CUR_MAX > 1);
130 	/*
131 	 * Skip leading slashes
132 	 */
133 	cmdname = argv[0];
134 	if (ap = strrchr(cmdname, '/'))
135 		cmdname = ap + 1;
136 
137 	ap = cmdname;
138 	/*
139 	 * Detect egrep/fgrep via command name, map to -E and -F options.
140 	 */
141 	if (*ap == 'e' || *ap == 'E') {
142 		regflags |= REG_EXTENDED;
143 		egrep++;
144 	} else {
145 		if (*ap == 'f' || *ap == 'F') {
146 			fgrep++;
147 		}
148 	}
149 
150 	while ((c = getopt(argc, argv, "vwchilnbse:f:qxEFI")) != EOF) {
151 		switch (c) {
152 		case 'v':	/* POSIX: negate matches */
153 			nvflag = 0;
154 			break;
155 
156 		case 'c':	/* POSIX: write count */
157 			cflag++;
158 			break;
159 
160 		case 'i':	/* POSIX: ignore case */
161 			iflag++;
162 			regflags |= REG_ICASE;
163 			break;
164 
165 		case 'l':	/* POSIX: Write filenames only */
166 			lflag++;
167 			break;
168 
169 		case 'n':	/* POSIX: Write line numbers */
170 			nflag++;
171 			break;
172 
173 		case 'b':	/* Solaris: Write file block numbers */
174 			bflag++;
175 			break;
176 
177 		case 's':	/* POSIX: No error msgs for files */
178 			sflag++;
179 			break;
180 
181 		case 'e':	/* POSIX: pattern list */
182 			n_pattern++;
183 			pattern_list = realloc(pattern_list,
184 			    sizeof (char *) * n_pattern);
185 			if (pattern_list == NULL) {
186 				(void) fprintf(stderr,
187 				    gettext("%s: out of memory\n"),
188 				    cmdname);
189 				exit(2);
190 			}
191 			*(pattern_list + n_pattern - 1) = optarg;
192 			break;
193 
194 		case 'f':	/* POSIX: pattern file */
195 			fflag = 1;
196 			n_file++;
197 			file_list = realloc(file_list,
198 			    sizeof (char *) * n_file);
199 			if (file_list == NULL) {
200 				(void) fprintf(stderr,
201 				    gettext("%s: out of memory\n"),
202 				    cmdname);
203 				exit(2);
204 			}
205 			*(file_list + n_file - 1) = optarg;
206 			break;
207 		case 'h':	/* Solaris: supress printing of file name */
208 			hflag = 1;
209 			break;
210 
211 		case 'q':	/* POSIX: quiet: status only */
212 			qflag++;
213 			break;
214 
215 		case 'w':	/* Solaris: treat pattern as word */
216 			wflag++;
217 			break;
218 
219 		case 'x':	/* POSIX: full line matches */
220 			xflag++;
221 			regflags |= REG_ANCHOR;
222 			break;
223 
224 		case 'E':	/* POSIX: Extended RE's */
225 			regflags |= REG_EXTENDED;
226 			Eflag++;
227 			break;
228 
229 		case 'F':	/* POSIX: strings, not RE's */
230 			Fflag++;
231 			break;
232 
233 		default:
234 			usage();
235 		}
236 	}
237 	/*
238 	 * If we're invoked as egrep or fgrep we need to do some checks
239 	 */
240 
241 	if (egrep || fgrep) {
242 		/*
243 		 * Use of -E or -F with egrep or fgrep is illegal
244 		 */
245 		if (Eflag || Fflag)
246 			usage();
247 		/*
248 		 * Don't allow use of wflag with egrep / fgrep
249 		 */
250 		if (wflag)
251 			usage();
252 		/*
253 		 * For Solaris the -s flag is equivalent to XCU -q
254 		 */
255 		if (sflag)
256 			qflag++;
257 		/*
258 		 * done with above checks - set the appropriate flags
259 		 */
260 		if (egrep)
261 			Eflag++;
262 		else			/* Else fgrep */
263 			Fflag++;
264 	}
265 
266 	if (wflag && (Eflag || Fflag)) {
267 		/*
268 		 * -w cannot be specified with grep -F
269 		 */
270 		usage();
271 	}
272 
273 	/*
274 	 * -E and -F flags are mutually exclusive - check for this
275 	 */
276 	if (Eflag && Fflag)
277 		usage();
278 
279 	/*
280 	 * -c, -l and -q flags are mutually exclusive
281 	 * We have -c override -l like in Solaris.
282 	 * -q overrides -l & -c programmatically in grep() function.
283 	 */
284 	if (cflag && lflag)
285 		lflag = 0;
286 
287 	argv += optind - 1;
288 	argc -= optind - 1;
289 
290 	/*
291 	 * Now handling -e and -f option
292 	 */
293 	if (pattern_list) {
294 		for (i = 0; i < n_pattern; i++) {
295 			addpattern(pattern_list[i]);
296 		}
297 		free(pattern_list);
298 	}
299 	if (file_list) {
300 		for (i = 0; i < n_file; i++) {
301 			addfile(file_list[i]);
302 		}
303 		free(file_list);
304 	}
305 
306 	/*
307 	 * No -e or -f?  Make sure there is one more arg, use it as the pattern.
308 	 */
309 	if (patterns == NULL && !fflag) {
310 		if (argc < 2)
311 			usage();
312 		addpattern(argv[1]);
313 		argc--;
314 		argv++;
315 	}
316 
317 	/*
318 	 * If -x flag is not specified or -i flag is specified
319 	 * with fgrep in a multibyte locale, need to use
320 	 * the wide character APIs.  Otherwise, byte-oriented
321 	 * process will be done.
322 	 */
323 	use_wchar = Fflag && mblocale && (!xflag || iflag);
324 
325 	/*
326 	 * Compile Patterns and also decide if BMG can be used
327 	 */
328 	fixpatterns();
329 
330 	/* Process all files: stdin, or rest of arg list */
331 	if (argc < 2) {
332 		matched = grep(0, gettext("(standard input)"));
333 	} else {
334 		if (argc > 2 && hflag == 0)
335 			outfn = 1;	/* Print filename on match line */
336 		for (argv++; *argv != NULL; argv++) {
337 			int	fd;
338 
339 			if ((fd = open(*argv, O_RDONLY)) == -1) {
340 				errors = 1;
341 				if (sflag)
342 					continue;
343 				(void) fprintf(stderr, gettext(
344 				    "%s: can't open \"%s\"\n"),
345 				    cmdname, *argv);
346 				continue;
347 			}
348 			matched |= grep(fd, *argv);
349 			(void) close(fd);
350 			if (ferror(stdout))
351 				break;
352 		}
353 	}
354 	/*
355 	 * Return() here is used instead of exit
356 	 */
357 
358 	(void) fflush(stdout);
359 
360 	if (errors)
361 		return (2);
362 	return (matched ? 0 : 1);
363 }
364 
365 /*
366  * Add a file of strings to the pattern list.
367  */
368 static void
369 addfile(char *fn)
370 {
371 	FILE	*fp;
372 	char	*inbuf;
373 	char	*bufp;
374 	size_t	bufsiz, buflen, bufused;
375 
376 	/*
377 	 * Open the pattern file
378 	 */
379 	if ((fp = fopen(fn, "r")) == NULL) {
380 		(void) fprintf(stderr, gettext("%s: can't open \"%s\"\n"),
381 		    cmdname, fn);
382 		exit(2);
383 	}
384 	bufsiz = BUFSIZE;
385 	if ((inbuf = malloc(bufsiz)) == NULL) {
386 		(void) fprintf(stderr,
387 		    gettext("%s: out of memory\n"), cmdname);
388 		exit(2);
389 	}
390 	bufp = inbuf;
391 	bufused = 0;
392 	/*
393 	 * Read in the file, reallocing as we need more memory
394 	 */
395 	while (fgets(bufp, bufsiz - bufused, fp) != NULL) {
396 		buflen = strlen(bufp);
397 		bufused += buflen;
398 		if (bufused + 1 == bufsiz && bufp[buflen - 1] != '\n') {
399 			/*
400 			 * if this line does not fit to the buffer,
401 			 * realloc larger buffer
402 			 */
403 			bufsiz += BUFSIZE;
404 			if ((inbuf = realloc(inbuf, bufsiz)) == NULL) {
405 				(void) fprintf(stderr,
406 				    gettext("%s: out of memory\n"),
407 				    cmdname);
408 				exit(2);
409 			}
410 			bufp = inbuf + bufused;
411 			continue;
412 		}
413 		if (bufp[buflen - 1] == '\n') {
414 			bufp[--buflen] = '\0';
415 		}
416 		addpattern(inbuf);
417 
418 		bufp = inbuf;
419 		bufused = 0;
420 	}
421 	free(inbuf);
422 	(void) fclose(fp);
423 }
424 
425 /*
426  * Add a string to the pattern list.
427  */
428 static void
429 addpattern(char *s)
430 {
431 	PATTERN	*pp;
432 	char	*wordbuf;
433 	char	*np;
434 
435 	for (; ; ) {
436 		np = strchr(s, '\n');
437 		if (np != NULL)
438 			*np = '\0';
439 		if ((pp = malloc(sizeof (PATTERN))) == NULL) {
440 			(void) fprintf(stderr, gettext(
441 			    "%s: out of memory\n"),
442 			    cmdname);
443 			exit(2);
444 		}
445 		if (wflag) {
446 			/*
447 			 * Solaris wflag support: Add '<' '>' to pattern to
448 			 * select it as a word. Doesn't make sense with -F
449 			 * but we're Libertarian.
450 			 */
451 			size_t	slen, wordlen;
452 
453 			slen = strlen(s);
454 			wordlen = slen + 5; /* '\\' '<' s '\\' '>' '\0' */
455 			if ((wordbuf = malloc(wordlen)) == NULL) {
456 				(void) fprintf(stderr,
457 				    gettext("%s: out of memory\n"),
458 				    cmdname);
459 				exit(2);
460 			}
461 			(void) strcpy(wordbuf, "\\<");
462 			(void) strcpy(wordbuf + 2, s);
463 			(void) strcpy(wordbuf + 2 + slen, "\\>");
464 		} else {
465 			if ((wordbuf = strdup(s)) == NULL) {
466 				(void) fprintf(stderr,
467 				    gettext("%s: out of memory\n"),
468 				    cmdname);
469 				exit(2);
470 			}
471 		}
472 		pp->pattern = wordbuf;
473 		pp->next = patterns;
474 		patterns = pp;
475 		if (np == NULL)
476 			break;
477 		s = np + 1;
478 	}
479 }
480 
481 /*
482  * Fix patterns.
483  * Must do after all arguments read, in case later -i option.
484  */
485 static void
486 fixpatterns(void)
487 {
488 	PATTERN	*pp;
489 	int	rv, fix_pattern, npatterns;
490 
491 	/*
492 	 * As REG_ANCHOR flag is not supported in the current Solaris,
493 	 * need to fix the specified pattern if -x is specified with
494 	 * grep or egrep
495 	 */
496 	fix_pattern = !Fflag && xflag;
497 
498 	for (npatterns = 0, pp = patterns; pp != NULL; pp = pp->next) {
499 		npatterns++;
500 		if (fix_pattern) {
501 			char	*cp, *cq;
502 			size_t	plen, nplen;
503 
504 			plen = strlen(pp->pattern);
505 			/* '^' pattern '$' */
506 			nplen = 1 + plen + 1 + 1;
507 			if ((cp = malloc(nplen)) == NULL) {
508 				(void) fprintf(stderr,
509 				    gettext("%s: out of memory\n"),
510 				    cmdname);
511 				exit(2);
512 			}
513 			cq = cp;
514 			*cq++ = '^';
515 			cq = strcpy(cq, pp->pattern) + plen;
516 			*cq++ = '$';
517 			*cq = '\0';
518 			free(pp->pattern);
519 			pp->pattern = cp;
520 		}
521 
522 		if (Fflag) {
523 			if (use_wchar) {
524 				/*
525 				 * Fflag && mblocale && iflag
526 				 * Fflag && mblocale && !xflag
527 				 */
528 				size_t	n;
529 				n = strlen(pp->pattern) + 1;
530 				if ((pp->wpattern =
531 					malloc(sizeof (wchar_t) * n)) == NULL) {
532 					(void) fprintf(stderr,
533 					    gettext("%s: out of memory\n"),
534 					    cmdname);
535 					exit(2);
536 				}
537 				if (mbstowcs(pp->wpattern, pp->pattern, n) ==
538 				    (size_t)-1) {
539 					(void) fprintf(stderr,
540 					    gettext("%s: failed to convert "
541 						"\"%s\" to wide-characters\n"),
542 					    cmdname, pp->pattern);
543 					exit(2);
544 				}
545 				if (iflag) {
546 					wchar_t	*wp;
547 					for (wp = pp->wpattern; *wp != L'\0';
548 					    wp++) {
549 						*wp = towlower((wint_t)*wp);
550 					}
551 				}
552 				free(pp->pattern);
553 			} else {
554 				/*
555 				 * Fflag && mblocale && !iflag
556 				 * Fflag && !mblocale && iflag
557 				 * Fflag && !mblocale && !iflag
558 				 */
559 				if (iflag) {
560 					unsigned char	*cp;
561 					for (cp = (unsigned char *)pp->pattern;
562 					    *cp != '\0'; cp++) {
563 						*cp = tolower(*cp);
564 					}
565 				}
566 			}
567 			/*
568 			 * fgrep: No regular expressions.
569 			 */
570 			continue;
571 		}
572 
573 		/*
574 		 * For non-fgrep, compile the regular expression,
575 		 * give an informative error message, and exit if
576 		 * it didn't compile.
577 		 */
578 		if ((rv = regcomp(&pp->re, pp->pattern, regflags)) != 0) {
579 			(void) regerror(rv, &pp->re, errstr, sizeof (errstr));
580 			(void) fprintf(stderr,
581 			    gettext("%s: RE error in %s: %s\n"),
582 				cmdname, pp->pattern, errstr);
583 			exit(2);
584 		}
585 		free(pp->pattern);
586 	}
587 
588 	/*
589 	 * Decide if we are able to run the Boyer-Moore-Gosper algorithm.
590 	 * Use the Boyer-Moore-Gosper algorithm if:
591 	 * - fgrep			(Fflag)
592 	 * - singlebyte locale		(!mblocale)
593 	 * - no ignoring case		(!iflag)
594 	 * - no printing line numbers	(!nflag)
595 	 * - no negating the output	(nvflag)
596 	 * - only one pattern		(npatterns == 1)
597 	 * - non zero length pattern	(strlen(patterns->pattern) != 0)
598 	 *
599 	 * It's guaranteed patterns->pattern is still alive
600 	 * when Fflag && !mblocale.
601 	 */
602 	use_bmg = Fflag && !mblocale && !iflag && !nflag && nvflag &&
603 	    (npatterns == 1) && (strlen(patterns->pattern) != 0);
604 }
605 
606 /*
607  * Search a newline from the beginning of the string
608  */
609 static char *
610 find_nl(const char *ptr, size_t len)
611 {
612 	while (len-- != 0) {
613 		if (*ptr++ == '\n') {
614 			return ((char *)--ptr);
615 		}
616 	}
617 	return (NULL);
618 }
619 
620 /*
621  * Search a newline from the end of the string
622  */
623 static char *
624 rfind_nl(const char *ptr, size_t len)
625 {
626 	const char	*uptr = ptr + len;
627 	while (len--) {
628 		if (*--uptr == '\n') {
629 			return ((char *)uptr);
630 		}
631 	}
632 	return (NULL);
633 }
634 
635 /*
636  * Duplicate the specified string converting each character
637  * into a lower case.
638  */
639 static char *
640 istrdup(const char *s1)
641 {
642 	static size_t	ibuflen = 0;
643 	static char	*ibuf = NULL;
644 	size_t	slen;
645 	char	*p;
646 
647 	slen = strlen(s1);
648 	if (slen >= ibuflen) {
649 		/* ibuf does not fit to s1 */
650 		ibuflen = slen + 1;
651 		ibuf = realloc(ibuf, ibuflen);
652 		if (ibuf == NULL) {
653 			(void) fprintf(stderr,
654 			    gettext("%s: out of memory\n"), cmdname);
655 			exit(2);
656 		}
657 	}
658 	p = ibuf;
659 	do {
660 		*p++ = tolower(*s1);
661 	} while (*s1++ != '\0');
662 	return (ibuf);
663 }
664 
665 /*
666  * Do grep on a single file.
667  * Return true in any lines matched.
668  *
669  * We have two strategies:
670  * The fast one is used when we have a single pattern with
671  * a string known to occur in the pattern. We can then
672  * do a BMG match on the whole buffer.
673  * This is an order of magnitude faster.
674  * Otherwise we split the buffer into lines,
675  * and check for a match on each line.
676  */
677 static int
678 grep(int fd, char *fn)
679 {
680 	PATTERN *pp;
681 	off_t	data_len;	/* length of the data chunk */
682 	off_t	line_len;	/* length of the current line */
683 	off_t	line_offset;	/* current line's offset from the beginning */
684 	long long	lineno;
685 	long long	matches = 0;	/* Number of matching lines */
686 	int	newlinep;	/* 0 if the last line of file has no newline */
687 	char	*ptr, *ptrend;
688 
689 
690 	if (patterns == NULL)
691 		return (0);	/* no patterns to match -- just return */
692 
693 	pp = patterns;
694 
695 	if (use_bmg) {
696 		bmgcomp(pp->pattern, strlen(pp->pattern));
697 	}
698 
699 	if (use_wchar && outline == NULL) {
700 		outbuflen = BUFSIZE + 1;
701 		outline = malloc(sizeof (wchar_t) * outbuflen);
702 		if (outline == NULL) {
703 			(void) fprintf(stderr, gettext("%s: out of memory\n"),
704 			    cmdname);
705 			exit(2);
706 		}
707 	}
708 
709 	if (prntbuf == NULL) {
710 		prntbuflen = BUFSIZE;
711 		if ((prntbuf = malloc(prntbuflen + 1)) == NULL) {
712 			(void) fprintf(stderr, gettext("%s: out of memory\n"),
713 			    cmdname);
714 			exit(2);
715 		}
716 	}
717 
718 	line_offset = 0;
719 	lineno = 0;
720 	newlinep = 1;
721 	data_len = 0;
722 	for (; ; ) {
723 		long	count;
724 		off_t	offset = 0;
725 
726 		if (data_len == 0) {
727 			/*
728 			 * If no data in the buffer, reset ptr
729 			 */
730 			ptr = prntbuf;
731 		}
732 		if (ptr == prntbuf) {
733 			/*
734 			 * The current data chunk starts from prntbuf.
735 			 * This means either the buffer has no data
736 			 * or the buffer has no newline.
737 			 * So, read more data from input.
738 			 */
739 			count = read(fd, ptr + data_len, prntbuflen - data_len);
740 			if (count < 0) {
741 				/* read error */
742 				if (cflag) {
743 					if (outfn) {
744 						(void) fprintf(stdout,
745 						    "%s:", fn);
746 					}
747 					if (!qflag) {
748 						(void) fprintf(stdout, "%lld\n",
749 						    matches);
750 					}
751 				}
752 				return (0);
753 			} else if (count == 0) {
754 				/* no new data */
755 				if (data_len == 0) {
756 					/* end of file already reached */
757 					break;
758 				}
759 				/* last line of file has no newline */
760 				ptrend = ptr + data_len;
761 				newlinep = 0;
762 				goto L_start_process;
763 			}
764 			offset = data_len;
765 			data_len += count;
766 		}
767 
768 		/*
769 		 * Look for newline in the chunk
770 		 * between ptr + offset and ptr + data_len - offset.
771 		 */
772 		ptrend = find_nl(ptr + offset, data_len - offset);
773 		if (ptrend == NULL) {
774 			/* no newline found in this chunk */
775 			if (ptr > prntbuf) {
776 				/*
777 				 * Move remaining data to the beginning
778 				 * of the buffer.
779 				 * Remaining data lie from ptr for
780 				 * data_len bytes.
781 				 */
782 				(void) memmove(prntbuf, ptr, data_len);
783 			}
784 			if (data_len == prntbuflen) {
785 				/*
786 				 * No enough room in the buffer
787 				 */
788 				prntbuflen += BUFSIZE;
789 				prntbuf = realloc(prntbuf, prntbuflen + 1);
790 				if (prntbuf == NULL) {
791 					(void) fprintf(stderr,
792 					    gettext("%s: out of memory\n"),
793 					    cmdname);
794 					exit(2);
795 				}
796 			}
797 			ptr = prntbuf;
798 			/* read the next input */
799 			continue;
800 		}
801 L_start_process:
802 
803 		/*
804 		 * Beginning of the chunk:	ptr
805 		 * End of the chunk:		ptr + data_len
806 		 * Beginning of the line:	ptr
807 		 * End of the line:		ptrend
808 		 */
809 
810 		if (use_bmg) {
811 			/*
812 			 * Use Boyer-Moore-Gosper algorithm to find out if
813 			 * this chunk (not this line) contains the specified
814 			 * pattern.  If not, restart from the last line
815 			 * of this chunk.
816 			 */
817 			char	*bline;
818 			bline = bmgexec(ptr, ptr + data_len);
819 			if (bline == NULL) {
820 				/*
821 				 * No pattern found in this chunk.
822 				 * Need to find the last line
823 				 * in this chunk.
824 				 */
825 				ptrend = rfind_nl(ptr, data_len);
826 
827 				/*
828 				 * When this chunk does not contain newline,
829 				 * ptrend becomes NULL, which should happen
830 				 * when the last line of file does not end
831 				 * with a newline.  At such a point,
832 				 * newlinep should have been set to 0.
833 				 * Therefore, just after jumping to
834 				 * L_skip_line, the main for-loop quits,
835 				 * and the line_len value won't be
836 				 * used.
837 				 */
838 				line_len = ptrend - ptr;
839 				goto L_skip_line;
840 			}
841 			if (bline > ptrend) {
842 				/*
843 				 * Pattern found not in the first line
844 				 * of this chunk.
845 				 * Discard the first line.
846 				 */
847 				line_len = ptrend - ptr;
848 				goto L_skip_line;
849 			}
850 			/*
851 			 * Pattern found in the first line of this chunk.
852 			 * Using this result.
853 			 */
854 			*ptrend = '\0';
855 			line_len = ptrend - ptr;
856 
857 			/*
858 			 * before jumping to L_next_line,
859 			 * need to handle xflag if specified
860 			 */
861 			if (xflag && (line_len != bmglen ||
862 				strcmp(bmgpat, ptr) != 0)) {
863 				/* didn't match */
864 				pp = NULL;
865 			} else {
866 				pp = patterns; /* to make it happen */
867 			}
868 			goto L_next_line;
869 		}
870 		lineno++;
871 		/*
872 		 * Line starts from ptr and ends at ptrend.
873 		 * line_len will be the length of the line.
874 		 */
875 		*ptrend = '\0';
876 		line_len = ptrend - ptr;
877 
878 		/*
879 		 * From now, the process will be performed based
880 		 * on the line from ptr to ptrend.
881 		 */
882 		if (use_wchar) {
883 			size_t	len;
884 
885 			if (line_len >= outbuflen) {
886 				outbuflen = line_len + 1;
887 				outline = realloc(outline,
888 				    sizeof (wchar_t) * outbuflen);
889 				if (outline == NULL) {
890 					(void) fprintf(stderr,
891 					    gettext("%s: out of memory\n"),
892 					    cmdname);
893 					exit(2);
894 				}
895 			}
896 
897 			len = mbstowcs(outline, ptr, line_len);
898 			if (len == (size_t)-1) {
899 				(void) fprintf(stderr, gettext(
900 	"%s: input file \"%s\": line %lld: invalid multibyte character\n"),
901 				    cmdname, fn, lineno);
902 				/* never match a line with invalid sequence */
903 				goto L_skip_line;
904 			}
905 			outline[len] = L'\0';
906 
907 			if (iflag) {
908 				wchar_t	*cp;
909 				for (cp = outline; *cp != '\0'; cp++) {
910 					*cp = towlower((wint_t)*cp);
911 				}
912 			}
913 
914 			if (xflag) {
915 				for (pp = patterns; pp; pp = pp->next) {
916 					if (outline[0] == pp->wpattern[0] &&
917 					    wcscmp(outline,
918 						pp->wpattern) == 0) {
919 						/* matched */
920 						break;
921 					}
922 				}
923 			} else {
924 				for (pp = patterns; pp; pp = pp->next) {
925 					if (wcswcs(outline, pp->wpattern)
926 					    != NULL) {
927 						/* matched */
928 						break;
929 					}
930 				}
931 			}
932 		} else if (Fflag) {
933 			/* fgrep in byte-oriented handling */
934 			char	*fptr;
935 			if (iflag) {
936 				fptr = istrdup(ptr);
937 			} else {
938 				fptr = ptr;
939 			}
940 			if (xflag) {
941 				/* fgrep -x */
942 				for (pp = patterns; pp; pp = pp->next) {
943 					if (fptr[0] == pp->pattern[0] &&
944 					    strcmp(fptr, pp->pattern) == 0) {
945 						/* matched */
946 						break;
947 					}
948 				}
949 			} else {
950 				for (pp = patterns; pp; pp = pp->next) {
951 					if (strstr(fptr, pp->pattern) != NULL) {
952 						/* matched */
953 						break;
954 					}
955 				}
956 			}
957 		} else {
958 			/* grep or egrep */
959 			for (pp = patterns; pp; pp = pp->next) {
960 				int	rv;
961 
962 				rv = regexec(&pp->re, ptr, 0, NULL, 0);
963 				if (rv == REG_OK) {
964 					/* matched */
965 					break;
966 				}
967 
968 				switch (rv) {
969 				case REG_NOMATCH:
970 					break;
971 				case REG_ECHAR:
972 					(void) fprintf(stderr, gettext(
973 	    "%s: input file \"%s\": line %lld: invalid multibyte character\n"),
974 					    cmdname, fn, lineno);
975 					break;
976 				default:
977 					(void) regerror(rv, &pp->re, errstr,
978 					    sizeof (errstr));
979 					(void) fprintf(stderr, gettext(
980 	    "%s: input file \"%s\": line %lld: %s\n"),
981 					    cmdname, fn, lineno, errstr);
982 					exit(2);
983 				}
984 			}
985 		}
986 
987 L_next_line:
988 		/*
989 		 * Here, if pp points to non-NULL, something has been matched
990 		 * to the pattern.
991 		 */
992 		if (nvflag == (pp != NULL)) {
993 			matches++;
994 			/*
995 			 * Handle q, l, and c flags.
996 			 */
997 			if (qflag) {
998 				/* no need to continue */
999 				/*
1000 				 * End of this line is ptrend.
1001 				 * We have read up to ptr + data_len.
1002 				 */
1003 				off_t	pos;
1004 				pos = ptr + data_len - (ptrend + 1);
1005 				(void) lseek(fd, -pos, SEEK_CUR);
1006 				exit(0);
1007 			}
1008 			if (lflag) {
1009 				(void) printf("%s\n", fn);
1010 				break;
1011 			}
1012 			if (!cflag) {
1013 				if (outfn) {
1014 					(void) printf("%s:", fn);
1015 				}
1016 				if (bflag) {
1017 					(void) printf("%lld:", (offset_t)
1018 					    (line_offset / BSIZE));
1019 				}
1020 				if (nflag) {
1021 					(void) printf("%lld:", lineno);
1022 				}
1023 				*ptrend = '\n';
1024 				(void) fwrite(ptr, 1, line_len + 1, stdout);
1025 			}
1026 			if (ferror(stdout)) {
1027 				return (0);
1028 			}
1029 		}
1030 L_skip_line:
1031 		if (!newlinep)
1032 			break;
1033 
1034 		data_len -= line_len + 1;
1035 		line_offset += line_len + 1;
1036 		ptr = ptrend + 1;
1037 	}
1038 
1039 	if (cflag) {
1040 		if (outfn) {
1041 			(void) printf("%s:", fn);
1042 		}
1043 		if (!qflag) {
1044 			(void) printf("%lld\n", matches);
1045 		}
1046 	}
1047 	return (matches != 0);
1048 }
1049 
1050 /*
1051  * usage message for grep
1052  */
1053 static void
1054 usage(void)
1055 {
1056 	if (egrep || fgrep) {
1057 		(void) fprintf(stderr, gettext("Usage:\t%s"), cmdname);
1058 		(void) fprintf(stderr,
1059 		    gettext(" [-c|-l|-q] [-bhinsvx] "
1060 			"pattern_list [file ...]\n"));
1061 
1062 		(void) fprintf(stderr, "\t%s", cmdname);
1063 		(void) fprintf(stderr,
1064 		    gettext(" [-c|-l|-q] [-bhinsvx] [-e pattern_list]... "
1065 			"[-f pattern_file]... [file...]\n"));
1066 	} else {
1067 		(void) fprintf(stderr, gettext("Usage:\t%s"), cmdname);
1068 		(void) fprintf(stderr,
1069 		    gettext(" [-c|-l|-q] [-bhinsvwx] "
1070 			"pattern_list [file ...]\n"));
1071 
1072 		(void) fprintf(stderr, "\t%s", cmdname);
1073 		(void) fprintf(stderr,
1074 		    gettext(" [-c|-l|-q] [-bhinsvwx] [-e pattern_list]... "
1075 			"[-f pattern_file]... [file...]\n"));
1076 
1077 		(void) fprintf(stderr, "\t%s", cmdname);
1078 		(void) fprintf(stderr,
1079 		    gettext(" -E [-c|-l|-q] [-bhinsvx] "
1080 			"pattern_list [file ...]\n"));
1081 
1082 		(void) fprintf(stderr, "\t%s", cmdname);
1083 		(void) fprintf(stderr,
1084 		    gettext(" -E [-c|-l|-q] [-bhinsvx] [-e pattern_list]... "
1085 			"[-f pattern_file]... [file...]\n"));
1086 
1087 		(void) fprintf(stderr, "\t%s", cmdname);
1088 		(void) fprintf(stderr,
1089 		    gettext(" -F [-c|-l|-q] [-bhinsvx] "
1090 			"pattern_list [file ...]\n"));
1091 
1092 		(void) fprintf(stderr, "\t%s", cmdname);
1093 		(void) fprintf(stderr,
1094 		    gettext(" -F [-c|-l|-q] [-bhinsvx] [-e pattern_list]... "
1095 			"[-f pattern_file]... [file...]\n"));
1096 	}
1097 	exit(2);
1098 	/* NOTREACHED */
1099 }
1100 
1101 /*
1102  * Compile literal pattern into BMG tables
1103  */
1104 static void
1105 bmgcomp(char *pat, int len)
1106 {
1107 	int	i;
1108 	int	tlen;
1109 	unsigned char	*uc = (unsigned char *)pat;
1110 
1111 	bmglen = len;
1112 	bmgpat = pat;
1113 
1114 	for (i = 0; i < M_CSETSIZE; i++) {
1115 		bmgtab[i] = len;
1116 	}
1117 
1118 	len--;
1119 	for (tlen = len, i = 0; i <= len; i++, tlen--) {
1120 		bmgtab[*uc++] = tlen;
1121 	}
1122 }
1123 
1124 /*
1125  * BMG search.
1126  */
1127 static char *
1128 bmgexec(char *str, char *end)
1129 {
1130 	int	t;
1131 	char	*k, *s, *p;
1132 
1133 	k = str + bmglen - 1;
1134 	if (bmglen == 1) {
1135 		return (memchr(str, bmgpat[0], end - str));
1136 	}
1137 	for (; ; ) {
1138 		/* inner loop, should be most optimized */
1139 		while (k < end && (t = bmgtab[(unsigned char)*k]) != 0) {
1140 			k += t;
1141 		}
1142 		if (k >= end) {
1143 			return (NULL);
1144 		}
1145 		for (s = k, p = bmgpat + bmglen - 1; *--s == *--p; ) {
1146 			if (p == bmgpat) {
1147 				return (s);
1148 			}
1149 		}
1150 		k++;
1151 	}
1152 	/* NOTREACHED */
1153 }
1154