xref: /titanic_54/usr/src/cmd/grep_xpg4/grep.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1*7c478bd9Sstevel@tonic-gate /*
2*7c478bd9Sstevel@tonic-gate  * CDDL HEADER START
3*7c478bd9Sstevel@tonic-gate  *
4*7c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*7c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*7c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*7c478bd9Sstevel@tonic-gate  * with the License.
8*7c478bd9Sstevel@tonic-gate  *
9*7c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*7c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*7c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*7c478bd9Sstevel@tonic-gate  * and limitations under the License.
13*7c478bd9Sstevel@tonic-gate  *
14*7c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*7c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*7c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*7c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*7c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*7c478bd9Sstevel@tonic-gate  *
20*7c478bd9Sstevel@tonic-gate  * CDDL HEADER END
21*7c478bd9Sstevel@tonic-gate  */
22*7c478bd9Sstevel@tonic-gate /*
23*7c478bd9Sstevel@tonic-gate  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24*7c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
25*7c478bd9Sstevel@tonic-gate  */
26*7c478bd9Sstevel@tonic-gate 
27*7c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*7c478bd9Sstevel@tonic-gate 
29*7c478bd9Sstevel@tonic-gate /*
30*7c478bd9Sstevel@tonic-gate  * grep - pattern matching program - combined grep, egrep, and fgrep.
31*7c478bd9Sstevel@tonic-gate  *	Based on MKS grep command, with XCU & Solaris mods.
32*7c478bd9Sstevel@tonic-gate  */
33*7c478bd9Sstevel@tonic-gate 
34*7c478bd9Sstevel@tonic-gate /*
35*7c478bd9Sstevel@tonic-gate  * Copyright 1985, 1992 by Mortice Kern Systems Inc.  All rights reserved.
36*7c478bd9Sstevel@tonic-gate  *
37*7c478bd9Sstevel@tonic-gate  */
38*7c478bd9Sstevel@tonic-gate 
39*7c478bd9Sstevel@tonic-gate #include <string.h>
40*7c478bd9Sstevel@tonic-gate #include <stdlib.h>
41*7c478bd9Sstevel@tonic-gate #include <ctype.h>
42*7c478bd9Sstevel@tonic-gate #include <stdarg.h>
43*7c478bd9Sstevel@tonic-gate #include <regex.h>
44*7c478bd9Sstevel@tonic-gate #include <limits.h>
45*7c478bd9Sstevel@tonic-gate #include <sys/types.h>
46*7c478bd9Sstevel@tonic-gate #include <sys/stat.h>
47*7c478bd9Sstevel@tonic-gate #include <fcntl.h>
48*7c478bd9Sstevel@tonic-gate #include <stdio.h>
49*7c478bd9Sstevel@tonic-gate #include <locale.h>
50*7c478bd9Sstevel@tonic-gate #include <wchar.h>
51*7c478bd9Sstevel@tonic-gate #include <errno.h>
52*7c478bd9Sstevel@tonic-gate #include <unistd.h>
53*7c478bd9Sstevel@tonic-gate #include <wctype.h>
54*7c478bd9Sstevel@tonic-gate 
55*7c478bd9Sstevel@tonic-gate #define	BSIZE		512		/* Size of block for -b */
56*7c478bd9Sstevel@tonic-gate #define	BUFSIZE		8192		/* Input buffer size */
57*7c478bd9Sstevel@tonic-gate 
58*7c478bd9Sstevel@tonic-gate #define	M_CSETSIZE	256		/* singlebyte chars */
59*7c478bd9Sstevel@tonic-gate static int	bmglen;			/* length of BMG pattern */
60*7c478bd9Sstevel@tonic-gate static char	*bmgpat;		/* BMG pattern */
61*7c478bd9Sstevel@tonic-gate static int	bmgtab[M_CSETSIZE];	/* BMG delta1 table */
62*7c478bd9Sstevel@tonic-gate 
63*7c478bd9Sstevel@tonic-gate typedef	struct	_PATTERN	{
64*7c478bd9Sstevel@tonic-gate 	char	*pattern;		/* original pattern */
65*7c478bd9Sstevel@tonic-gate 	wchar_t	*wpattern;		/* wide, lowercased pattern */
66*7c478bd9Sstevel@tonic-gate 	struct	_PATTERN	*next;
67*7c478bd9Sstevel@tonic-gate 	regex_t	re;			/* compiled pattern */
68*7c478bd9Sstevel@tonic-gate } PATTERN;
69*7c478bd9Sstevel@tonic-gate 
70*7c478bd9Sstevel@tonic-gate static PATTERN	*patterns;
71*7c478bd9Sstevel@tonic-gate static char	errstr[128];		/* regerror string buffer */
72*7c478bd9Sstevel@tonic-gate static int	regflags = 0;		/* regcomp options */
73*7c478bd9Sstevel@tonic-gate static uchar_t	fgrep = 0;		/* Invoked as fgrep */
74*7c478bd9Sstevel@tonic-gate static uchar_t	egrep = 0;		/* Invoked as egrep */
75*7c478bd9Sstevel@tonic-gate static uchar_t	nvflag = 1;		/* Print matching lines */
76*7c478bd9Sstevel@tonic-gate static uchar_t	cflag;			/* Count of matches */
77*7c478bd9Sstevel@tonic-gate static uchar_t	iflag;			/* Case insensitve matching */
78*7c478bd9Sstevel@tonic-gate static uchar_t	hflag;			/* Supress printing of filename */
79*7c478bd9Sstevel@tonic-gate static uchar_t	lflag;			/* Print file names of matches */
80*7c478bd9Sstevel@tonic-gate static uchar_t	nflag;			/* Precede lines by line number */
81*7c478bd9Sstevel@tonic-gate static uchar_t	bflag;			/* Preccede matches by block number */
82*7c478bd9Sstevel@tonic-gate static uchar_t	sflag;			/* Suppress file error messages */
83*7c478bd9Sstevel@tonic-gate static uchar_t	qflag;			/* Suppress standard output */
84*7c478bd9Sstevel@tonic-gate static uchar_t	wflag;			/* Search for expression as a word */
85*7c478bd9Sstevel@tonic-gate static uchar_t	xflag;			/* Anchoring */
86*7c478bd9Sstevel@tonic-gate static uchar_t	Eflag;			/* Egrep or -E flag */
87*7c478bd9Sstevel@tonic-gate static uchar_t	Fflag;			/* Fgrep or -F flag */
88*7c478bd9Sstevel@tonic-gate static uchar_t	outfn;			/* Put out file name */
89*7c478bd9Sstevel@tonic-gate static char	*cmdname;
90*7c478bd9Sstevel@tonic-gate 
91*7c478bd9Sstevel@tonic-gate static int	use_wchar, use_bmg, mblocale;
92*7c478bd9Sstevel@tonic-gate 
93*7c478bd9Sstevel@tonic-gate static size_t	outbuflen, prntbuflen;
94*7c478bd9Sstevel@tonic-gate static char	*prntbuf;
95*7c478bd9Sstevel@tonic-gate static wchar_t	*outline;
96*7c478bd9Sstevel@tonic-gate 
97*7c478bd9Sstevel@tonic-gate static void	addfile(char *fn);
98*7c478bd9Sstevel@tonic-gate static void	addpattern(char *s);
99*7c478bd9Sstevel@tonic-gate static void	fixpatterns(void);
100*7c478bd9Sstevel@tonic-gate static void	usage(void);
101*7c478bd9Sstevel@tonic-gate static int	grep(int, char *);
102*7c478bd9Sstevel@tonic-gate static void	bmgcomp(char *, int);
103*7c478bd9Sstevel@tonic-gate static char	*bmgexec(char *, char *);
104*7c478bd9Sstevel@tonic-gate 
105*7c478bd9Sstevel@tonic-gate /*
106*7c478bd9Sstevel@tonic-gate  * mainline for grep
107*7c478bd9Sstevel@tonic-gate  */
108*7c478bd9Sstevel@tonic-gate int
109*7c478bd9Sstevel@tonic-gate main(int argc, char **argv)
110*7c478bd9Sstevel@tonic-gate {
111*7c478bd9Sstevel@tonic-gate 	char	*ap;
112*7c478bd9Sstevel@tonic-gate 	int	matched = 0;
113*7c478bd9Sstevel@tonic-gate 	int	c;
114*7c478bd9Sstevel@tonic-gate 	int	fflag = 0;
115*7c478bd9Sstevel@tonic-gate 	int	errors = 0;
116*7c478bd9Sstevel@tonic-gate 	int	i, n_pattern = 0, n_file = 0;
117*7c478bd9Sstevel@tonic-gate 	char	**pattern_list = NULL;
118*7c478bd9Sstevel@tonic-gate 	char	**file_list = NULL;
119*7c478bd9Sstevel@tonic-gate 
120*7c478bd9Sstevel@tonic-gate 	(void) setlocale(LC_ALL, "");
121*7c478bd9Sstevel@tonic-gate #if !defined(TEXT_DOMAIN)	/* Should be defined by cc -D */
122*7c478bd9Sstevel@tonic-gate #define	TEXT_DOMAIN	"SYS_TEST"	/* Use this only if it weren't */
123*7c478bd9Sstevel@tonic-gate #endif
124*7c478bd9Sstevel@tonic-gate 	(void) textdomain(TEXT_DOMAIN);
125*7c478bd9Sstevel@tonic-gate 
126*7c478bd9Sstevel@tonic-gate 	/*
127*7c478bd9Sstevel@tonic-gate 	 * true if this is running on the multibyte locale
128*7c478bd9Sstevel@tonic-gate 	 */
129*7c478bd9Sstevel@tonic-gate 	mblocale = (MB_CUR_MAX > 1);
130*7c478bd9Sstevel@tonic-gate 	/*
131*7c478bd9Sstevel@tonic-gate 	 * Skip leading slashes
132*7c478bd9Sstevel@tonic-gate 	 */
133*7c478bd9Sstevel@tonic-gate 	cmdname = argv[0];
134*7c478bd9Sstevel@tonic-gate 	if (ap = strrchr(cmdname, '/'))
135*7c478bd9Sstevel@tonic-gate 		cmdname = ap + 1;
136*7c478bd9Sstevel@tonic-gate 
137*7c478bd9Sstevel@tonic-gate 	ap = cmdname;
138*7c478bd9Sstevel@tonic-gate 	/*
139*7c478bd9Sstevel@tonic-gate 	 * Detect egrep/fgrep via command name, map to -E and -F options.
140*7c478bd9Sstevel@tonic-gate 	 */
141*7c478bd9Sstevel@tonic-gate 	if (*ap == 'e' || *ap == 'E') {
142*7c478bd9Sstevel@tonic-gate 		regflags |= REG_EXTENDED;
143*7c478bd9Sstevel@tonic-gate 		egrep++;
144*7c478bd9Sstevel@tonic-gate 	} else {
145*7c478bd9Sstevel@tonic-gate 		if (*ap == 'f' || *ap == 'F') {
146*7c478bd9Sstevel@tonic-gate 			fgrep++;
147*7c478bd9Sstevel@tonic-gate 		}
148*7c478bd9Sstevel@tonic-gate 	}
149*7c478bd9Sstevel@tonic-gate 
150*7c478bd9Sstevel@tonic-gate 	while ((c = getopt(argc, argv, "vwchilnbse:f:qxEFI")) != EOF) {
151*7c478bd9Sstevel@tonic-gate 		switch (c) {
152*7c478bd9Sstevel@tonic-gate 		case 'v':	/* POSIX: negate matches */
153*7c478bd9Sstevel@tonic-gate 			nvflag = 0;
154*7c478bd9Sstevel@tonic-gate 			break;
155*7c478bd9Sstevel@tonic-gate 
156*7c478bd9Sstevel@tonic-gate 		case 'c':	/* POSIX: write count */
157*7c478bd9Sstevel@tonic-gate 			cflag++;
158*7c478bd9Sstevel@tonic-gate 			break;
159*7c478bd9Sstevel@tonic-gate 
160*7c478bd9Sstevel@tonic-gate 		case 'i':	/* POSIX: ignore case */
161*7c478bd9Sstevel@tonic-gate 			iflag++;
162*7c478bd9Sstevel@tonic-gate 			regflags |= REG_ICASE;
163*7c478bd9Sstevel@tonic-gate 			break;
164*7c478bd9Sstevel@tonic-gate 
165*7c478bd9Sstevel@tonic-gate 		case 'l':	/* POSIX: Write filenames only */
166*7c478bd9Sstevel@tonic-gate 			lflag++;
167*7c478bd9Sstevel@tonic-gate 			break;
168*7c478bd9Sstevel@tonic-gate 
169*7c478bd9Sstevel@tonic-gate 		case 'n':	/* POSIX: Write line numbers */
170*7c478bd9Sstevel@tonic-gate 			nflag++;
171*7c478bd9Sstevel@tonic-gate 			break;
172*7c478bd9Sstevel@tonic-gate 
173*7c478bd9Sstevel@tonic-gate 		case 'b':	/* Solaris: Write file block numbers */
174*7c478bd9Sstevel@tonic-gate 			bflag++;
175*7c478bd9Sstevel@tonic-gate 			break;
176*7c478bd9Sstevel@tonic-gate 
177*7c478bd9Sstevel@tonic-gate 		case 's':	/* POSIX: No error msgs for files */
178*7c478bd9Sstevel@tonic-gate 			sflag++;
179*7c478bd9Sstevel@tonic-gate 			break;
180*7c478bd9Sstevel@tonic-gate 
181*7c478bd9Sstevel@tonic-gate 		case 'e':	/* POSIX: pattern list */
182*7c478bd9Sstevel@tonic-gate 			n_pattern++;
183*7c478bd9Sstevel@tonic-gate 			pattern_list = realloc(pattern_list,
184*7c478bd9Sstevel@tonic-gate 			    sizeof (char *) * n_pattern);
185*7c478bd9Sstevel@tonic-gate 			if (pattern_list == NULL) {
186*7c478bd9Sstevel@tonic-gate 				(void) fprintf(stderr,
187*7c478bd9Sstevel@tonic-gate 				    gettext("%s: out of memory\n"),
188*7c478bd9Sstevel@tonic-gate 				    cmdname);
189*7c478bd9Sstevel@tonic-gate 				exit(2);
190*7c478bd9Sstevel@tonic-gate 			}
191*7c478bd9Sstevel@tonic-gate 			*(pattern_list + n_pattern - 1) = optarg;
192*7c478bd9Sstevel@tonic-gate 			break;
193*7c478bd9Sstevel@tonic-gate 
194*7c478bd9Sstevel@tonic-gate 		case 'f':	/* POSIX: pattern file */
195*7c478bd9Sstevel@tonic-gate 			fflag = 1;
196*7c478bd9Sstevel@tonic-gate 			n_file++;
197*7c478bd9Sstevel@tonic-gate 			file_list = realloc(file_list,
198*7c478bd9Sstevel@tonic-gate 			    sizeof (char *) * n_file);
199*7c478bd9Sstevel@tonic-gate 			if (file_list == NULL) {
200*7c478bd9Sstevel@tonic-gate 				(void) fprintf(stderr,
201*7c478bd9Sstevel@tonic-gate 				    gettext("%s: out of memory\n"),
202*7c478bd9Sstevel@tonic-gate 				    cmdname);
203*7c478bd9Sstevel@tonic-gate 				exit(2);
204*7c478bd9Sstevel@tonic-gate 			}
205*7c478bd9Sstevel@tonic-gate 			*(file_list + n_file - 1) = optarg;
206*7c478bd9Sstevel@tonic-gate 			break;
207*7c478bd9Sstevel@tonic-gate 		case 'h':	/* Solaris: supress printing of file name */
208*7c478bd9Sstevel@tonic-gate 			hflag = 1;
209*7c478bd9Sstevel@tonic-gate 			break;
210*7c478bd9Sstevel@tonic-gate 
211*7c478bd9Sstevel@tonic-gate 		case 'q':	/* POSIX: quiet: status only */
212*7c478bd9Sstevel@tonic-gate 			qflag++;
213*7c478bd9Sstevel@tonic-gate 			break;
214*7c478bd9Sstevel@tonic-gate 
215*7c478bd9Sstevel@tonic-gate 		case 'w':	/* Solaris: treat pattern as word */
216*7c478bd9Sstevel@tonic-gate 			wflag++;
217*7c478bd9Sstevel@tonic-gate 			break;
218*7c478bd9Sstevel@tonic-gate 
219*7c478bd9Sstevel@tonic-gate 		case 'x':	/* POSIX: full line matches */
220*7c478bd9Sstevel@tonic-gate 			xflag++;
221*7c478bd9Sstevel@tonic-gate 			regflags |= REG_ANCHOR;
222*7c478bd9Sstevel@tonic-gate 			break;
223*7c478bd9Sstevel@tonic-gate 
224*7c478bd9Sstevel@tonic-gate 		case 'E':	/* POSIX: Extended RE's */
225*7c478bd9Sstevel@tonic-gate 			regflags |= REG_EXTENDED;
226*7c478bd9Sstevel@tonic-gate 			Eflag++;
227*7c478bd9Sstevel@tonic-gate 			break;
228*7c478bd9Sstevel@tonic-gate 
229*7c478bd9Sstevel@tonic-gate 		case 'F':	/* POSIX: strings, not RE's */
230*7c478bd9Sstevel@tonic-gate 			Fflag++;
231*7c478bd9Sstevel@tonic-gate 			break;
232*7c478bd9Sstevel@tonic-gate 
233*7c478bd9Sstevel@tonic-gate 		default:
234*7c478bd9Sstevel@tonic-gate 			usage();
235*7c478bd9Sstevel@tonic-gate 		}
236*7c478bd9Sstevel@tonic-gate 	}
237*7c478bd9Sstevel@tonic-gate 	/*
238*7c478bd9Sstevel@tonic-gate 	 * If we're invoked as egrep or fgrep we need to do some checks
239*7c478bd9Sstevel@tonic-gate 	 */
240*7c478bd9Sstevel@tonic-gate 
241*7c478bd9Sstevel@tonic-gate 	if (egrep || fgrep) {
242*7c478bd9Sstevel@tonic-gate 		/*
243*7c478bd9Sstevel@tonic-gate 		 * Use of -E or -F with egrep or fgrep is illegal
244*7c478bd9Sstevel@tonic-gate 		 */
245*7c478bd9Sstevel@tonic-gate 		if (Eflag || Fflag)
246*7c478bd9Sstevel@tonic-gate 			usage();
247*7c478bd9Sstevel@tonic-gate 		/*
248*7c478bd9Sstevel@tonic-gate 		 * Don't allow use of wflag with egrep / fgrep
249*7c478bd9Sstevel@tonic-gate 		 */
250*7c478bd9Sstevel@tonic-gate 		if (wflag)
251*7c478bd9Sstevel@tonic-gate 			usage();
252*7c478bd9Sstevel@tonic-gate 		/*
253*7c478bd9Sstevel@tonic-gate 		 * For Solaris the -s flag is equivalent to XCU -q
254*7c478bd9Sstevel@tonic-gate 		 */
255*7c478bd9Sstevel@tonic-gate 		if (sflag)
256*7c478bd9Sstevel@tonic-gate 			qflag++;
257*7c478bd9Sstevel@tonic-gate 		/*
258*7c478bd9Sstevel@tonic-gate 		 * done with above checks - set the appropriate flags
259*7c478bd9Sstevel@tonic-gate 		 */
260*7c478bd9Sstevel@tonic-gate 		if (egrep)
261*7c478bd9Sstevel@tonic-gate 			Eflag++;
262*7c478bd9Sstevel@tonic-gate 		else			/* Else fgrep */
263*7c478bd9Sstevel@tonic-gate 			Fflag++;
264*7c478bd9Sstevel@tonic-gate 	}
265*7c478bd9Sstevel@tonic-gate 
266*7c478bd9Sstevel@tonic-gate 	if (wflag && (Eflag || Fflag)) {
267*7c478bd9Sstevel@tonic-gate 		/*
268*7c478bd9Sstevel@tonic-gate 		 * -w cannot be specified with grep -F
269*7c478bd9Sstevel@tonic-gate 		 */
270*7c478bd9Sstevel@tonic-gate 		usage();
271*7c478bd9Sstevel@tonic-gate 	}
272*7c478bd9Sstevel@tonic-gate 
273*7c478bd9Sstevel@tonic-gate 	/*
274*7c478bd9Sstevel@tonic-gate 	 * -E and -F flags are mutually exclusive - check for this
275*7c478bd9Sstevel@tonic-gate 	 */
276*7c478bd9Sstevel@tonic-gate 	if (Eflag && Fflag)
277*7c478bd9Sstevel@tonic-gate 		usage();
278*7c478bd9Sstevel@tonic-gate 
279*7c478bd9Sstevel@tonic-gate 	/*
280*7c478bd9Sstevel@tonic-gate 	 * -c, -l and -q flags are mutually exclusive
281*7c478bd9Sstevel@tonic-gate 	 * We have -c override -l like in Solaris.
282*7c478bd9Sstevel@tonic-gate 	 * -q overrides -l & -c programmatically in grep() function.
283*7c478bd9Sstevel@tonic-gate 	 */
284*7c478bd9Sstevel@tonic-gate 	if (cflag && lflag)
285*7c478bd9Sstevel@tonic-gate 		lflag = 0;
286*7c478bd9Sstevel@tonic-gate 
287*7c478bd9Sstevel@tonic-gate 	argv += optind - 1;
288*7c478bd9Sstevel@tonic-gate 	argc -= optind - 1;
289*7c478bd9Sstevel@tonic-gate 
290*7c478bd9Sstevel@tonic-gate 	/*
291*7c478bd9Sstevel@tonic-gate 	 * Now handling -e and -f option
292*7c478bd9Sstevel@tonic-gate 	 */
293*7c478bd9Sstevel@tonic-gate 	if (pattern_list) {
294*7c478bd9Sstevel@tonic-gate 		for (i = 0; i < n_pattern; i++) {
295*7c478bd9Sstevel@tonic-gate 			addpattern(pattern_list[i]);
296*7c478bd9Sstevel@tonic-gate 		}
297*7c478bd9Sstevel@tonic-gate 		free(pattern_list);
298*7c478bd9Sstevel@tonic-gate 	}
299*7c478bd9Sstevel@tonic-gate 	if (file_list) {
300*7c478bd9Sstevel@tonic-gate 		for (i = 0; i < n_file; i++) {
301*7c478bd9Sstevel@tonic-gate 			addfile(file_list[i]);
302*7c478bd9Sstevel@tonic-gate 		}
303*7c478bd9Sstevel@tonic-gate 		free(file_list);
304*7c478bd9Sstevel@tonic-gate 	}
305*7c478bd9Sstevel@tonic-gate 
306*7c478bd9Sstevel@tonic-gate 	/*
307*7c478bd9Sstevel@tonic-gate 	 * No -e or -f?  Make sure there is one more arg, use it as the pattern.
308*7c478bd9Sstevel@tonic-gate 	 */
309*7c478bd9Sstevel@tonic-gate 	if (patterns == NULL && !fflag) {
310*7c478bd9Sstevel@tonic-gate 		if (argc < 2)
311*7c478bd9Sstevel@tonic-gate 			usage();
312*7c478bd9Sstevel@tonic-gate 		addpattern(argv[1]);
313*7c478bd9Sstevel@tonic-gate 		argc--;
314*7c478bd9Sstevel@tonic-gate 		argv++;
315*7c478bd9Sstevel@tonic-gate 	}
316*7c478bd9Sstevel@tonic-gate 
317*7c478bd9Sstevel@tonic-gate 	/*
318*7c478bd9Sstevel@tonic-gate 	 * If -x flag is not specified or -i flag is specified
319*7c478bd9Sstevel@tonic-gate 	 * with fgrep in a multibyte locale, need to use
320*7c478bd9Sstevel@tonic-gate 	 * the wide character APIs.  Otherwise, byte-oriented
321*7c478bd9Sstevel@tonic-gate 	 * process will be done.
322*7c478bd9Sstevel@tonic-gate 	 */
323*7c478bd9Sstevel@tonic-gate 	use_wchar = Fflag && mblocale && (!xflag || iflag);
324*7c478bd9Sstevel@tonic-gate 
325*7c478bd9Sstevel@tonic-gate 	/*
326*7c478bd9Sstevel@tonic-gate 	 * Compile Patterns and also decide if BMG can be used
327*7c478bd9Sstevel@tonic-gate 	 */
328*7c478bd9Sstevel@tonic-gate 	fixpatterns();
329*7c478bd9Sstevel@tonic-gate 
330*7c478bd9Sstevel@tonic-gate 	/* Process all files: stdin, or rest of arg list */
331*7c478bd9Sstevel@tonic-gate 	if (argc < 2) {
332*7c478bd9Sstevel@tonic-gate 		matched = grep(0, gettext("(standard input)"));
333*7c478bd9Sstevel@tonic-gate 	} else {
334*7c478bd9Sstevel@tonic-gate 		if (argc > 2 && hflag == 0)
335*7c478bd9Sstevel@tonic-gate 			outfn = 1;	/* Print filename on match line */
336*7c478bd9Sstevel@tonic-gate 		for (argv++; *argv != NULL; argv++) {
337*7c478bd9Sstevel@tonic-gate 			int	fd;
338*7c478bd9Sstevel@tonic-gate 
339*7c478bd9Sstevel@tonic-gate 			if ((fd = open(*argv, O_RDONLY)) == -1) {
340*7c478bd9Sstevel@tonic-gate 				errors = 1;
341*7c478bd9Sstevel@tonic-gate 				if (sflag)
342*7c478bd9Sstevel@tonic-gate 					continue;
343*7c478bd9Sstevel@tonic-gate 				(void) fprintf(stderr, gettext(
344*7c478bd9Sstevel@tonic-gate 				    "%s: can't open \"%s\"\n"),
345*7c478bd9Sstevel@tonic-gate 				    cmdname, *argv);
346*7c478bd9Sstevel@tonic-gate 				continue;
347*7c478bd9Sstevel@tonic-gate 			}
348*7c478bd9Sstevel@tonic-gate 			matched |= grep(fd, *argv);
349*7c478bd9Sstevel@tonic-gate 			(void) close(fd);
350*7c478bd9Sstevel@tonic-gate 			if (ferror(stdout))
351*7c478bd9Sstevel@tonic-gate 				break;
352*7c478bd9Sstevel@tonic-gate 		}
353*7c478bd9Sstevel@tonic-gate 	}
354*7c478bd9Sstevel@tonic-gate 	/*
355*7c478bd9Sstevel@tonic-gate 	 * Return() here is used instead of exit
356*7c478bd9Sstevel@tonic-gate 	 */
357*7c478bd9Sstevel@tonic-gate 
358*7c478bd9Sstevel@tonic-gate 	(void) fflush(stdout);
359*7c478bd9Sstevel@tonic-gate 
360*7c478bd9Sstevel@tonic-gate 	if (errors)
361*7c478bd9Sstevel@tonic-gate 		return (2);
362*7c478bd9Sstevel@tonic-gate 	return (matched ? 0 : 1);
363*7c478bd9Sstevel@tonic-gate }
364*7c478bd9Sstevel@tonic-gate 
365*7c478bd9Sstevel@tonic-gate /*
366*7c478bd9Sstevel@tonic-gate  * Add a file of strings to the pattern list.
367*7c478bd9Sstevel@tonic-gate  */
368*7c478bd9Sstevel@tonic-gate static void
369*7c478bd9Sstevel@tonic-gate addfile(char *fn)
370*7c478bd9Sstevel@tonic-gate {
371*7c478bd9Sstevel@tonic-gate 	FILE	*fp;
372*7c478bd9Sstevel@tonic-gate 	char	*inbuf;
373*7c478bd9Sstevel@tonic-gate 	char	*bufp;
374*7c478bd9Sstevel@tonic-gate 	size_t	bufsiz, buflen, bufused;
375*7c478bd9Sstevel@tonic-gate 
376*7c478bd9Sstevel@tonic-gate 	/*
377*7c478bd9Sstevel@tonic-gate 	 * Open the pattern file
378*7c478bd9Sstevel@tonic-gate 	 */
379*7c478bd9Sstevel@tonic-gate 	if ((fp = fopen(fn, "r")) == NULL) {
380*7c478bd9Sstevel@tonic-gate 		(void) fprintf(stderr, gettext("%s: can't open \"%s\"\n"),
381*7c478bd9Sstevel@tonic-gate 		    cmdname, fn);
382*7c478bd9Sstevel@tonic-gate 		exit(2);
383*7c478bd9Sstevel@tonic-gate 	}
384*7c478bd9Sstevel@tonic-gate 	bufsiz = BUFSIZE;
385*7c478bd9Sstevel@tonic-gate 	if ((inbuf = malloc(bufsiz)) == NULL) {
386*7c478bd9Sstevel@tonic-gate 		(void) fprintf(stderr,
387*7c478bd9Sstevel@tonic-gate 		    gettext("%s: out of memory\n"), cmdname);
388*7c478bd9Sstevel@tonic-gate 		exit(2);
389*7c478bd9Sstevel@tonic-gate 	}
390*7c478bd9Sstevel@tonic-gate 	bufp = inbuf;
391*7c478bd9Sstevel@tonic-gate 	bufused = 0;
392*7c478bd9Sstevel@tonic-gate 	/*
393*7c478bd9Sstevel@tonic-gate 	 * Read in the file, reallocing as we need more memory
394*7c478bd9Sstevel@tonic-gate 	 */
395*7c478bd9Sstevel@tonic-gate 	while (fgets(bufp, bufsiz - bufused, fp) != NULL) {
396*7c478bd9Sstevel@tonic-gate 		buflen = strlen(bufp);
397*7c478bd9Sstevel@tonic-gate 		bufused += buflen;
398*7c478bd9Sstevel@tonic-gate 		if (bufused + 1 == bufsiz && bufp[buflen - 1] != '\n') {
399*7c478bd9Sstevel@tonic-gate 			/*
400*7c478bd9Sstevel@tonic-gate 			 * if this line does not fit to the buffer,
401*7c478bd9Sstevel@tonic-gate 			 * realloc larger buffer
402*7c478bd9Sstevel@tonic-gate 			 */
403*7c478bd9Sstevel@tonic-gate 			bufsiz += BUFSIZE;
404*7c478bd9Sstevel@tonic-gate 			if ((inbuf = realloc(inbuf, bufsiz)) == NULL) {
405*7c478bd9Sstevel@tonic-gate 				(void) fprintf(stderr,
406*7c478bd9Sstevel@tonic-gate 				    gettext("%s: out of memory\n"),
407*7c478bd9Sstevel@tonic-gate 				    cmdname);
408*7c478bd9Sstevel@tonic-gate 				exit(2);
409*7c478bd9Sstevel@tonic-gate 			}
410*7c478bd9Sstevel@tonic-gate 			bufp = inbuf + bufused;
411*7c478bd9Sstevel@tonic-gate 			continue;
412*7c478bd9Sstevel@tonic-gate 		}
413*7c478bd9Sstevel@tonic-gate 		if (bufp[buflen - 1] == '\n') {
414*7c478bd9Sstevel@tonic-gate 			bufp[--buflen] = '\0';
415*7c478bd9Sstevel@tonic-gate 		}
416*7c478bd9Sstevel@tonic-gate 		addpattern(inbuf);
417*7c478bd9Sstevel@tonic-gate 
418*7c478bd9Sstevel@tonic-gate 		bufp = inbuf;
419*7c478bd9Sstevel@tonic-gate 		bufused = 0;
420*7c478bd9Sstevel@tonic-gate 	}
421*7c478bd9Sstevel@tonic-gate 	free(inbuf);
422*7c478bd9Sstevel@tonic-gate 	(void) fclose(fp);
423*7c478bd9Sstevel@tonic-gate }
424*7c478bd9Sstevel@tonic-gate 
425*7c478bd9Sstevel@tonic-gate /*
426*7c478bd9Sstevel@tonic-gate  * Add a string to the pattern list.
427*7c478bd9Sstevel@tonic-gate  */
428*7c478bd9Sstevel@tonic-gate static void
429*7c478bd9Sstevel@tonic-gate addpattern(char *s)
430*7c478bd9Sstevel@tonic-gate {
431*7c478bd9Sstevel@tonic-gate 	PATTERN	*pp;
432*7c478bd9Sstevel@tonic-gate 	char	*wordbuf;
433*7c478bd9Sstevel@tonic-gate 	char	*np;
434*7c478bd9Sstevel@tonic-gate 
435*7c478bd9Sstevel@tonic-gate 	for (; ; ) {
436*7c478bd9Sstevel@tonic-gate 		np = strchr(s, '\n');
437*7c478bd9Sstevel@tonic-gate 		if (np != NULL)
438*7c478bd9Sstevel@tonic-gate 			*np = '\0';
439*7c478bd9Sstevel@tonic-gate 		if ((pp = malloc(sizeof (PATTERN))) == NULL) {
440*7c478bd9Sstevel@tonic-gate 			(void) fprintf(stderr, gettext(
441*7c478bd9Sstevel@tonic-gate 			    "%s: out of memory\n"),
442*7c478bd9Sstevel@tonic-gate 			    cmdname);
443*7c478bd9Sstevel@tonic-gate 			exit(2);
444*7c478bd9Sstevel@tonic-gate 		}
445*7c478bd9Sstevel@tonic-gate 		if (wflag) {
446*7c478bd9Sstevel@tonic-gate 			/*
447*7c478bd9Sstevel@tonic-gate 			 * Solaris wflag support: Add '<' '>' to pattern to
448*7c478bd9Sstevel@tonic-gate 			 * select it as a word. Doesn't make sense with -F
449*7c478bd9Sstevel@tonic-gate 			 * but we're Libertarian.
450*7c478bd9Sstevel@tonic-gate 			 */
451*7c478bd9Sstevel@tonic-gate 			size_t	slen, wordlen;
452*7c478bd9Sstevel@tonic-gate 
453*7c478bd9Sstevel@tonic-gate 			slen = strlen(s);
454*7c478bd9Sstevel@tonic-gate 			wordlen = slen + 5; /* '\\' '<' s '\\' '>' '\0' */
455*7c478bd9Sstevel@tonic-gate 			if ((wordbuf = malloc(wordlen)) == NULL) {
456*7c478bd9Sstevel@tonic-gate 				(void) fprintf(stderr,
457*7c478bd9Sstevel@tonic-gate 				    gettext("%s: out of memory\n"),
458*7c478bd9Sstevel@tonic-gate 				    cmdname);
459*7c478bd9Sstevel@tonic-gate 				exit(2);
460*7c478bd9Sstevel@tonic-gate 			}
461*7c478bd9Sstevel@tonic-gate 			(void) strcpy(wordbuf, "\\<");
462*7c478bd9Sstevel@tonic-gate 			(void) strcpy(wordbuf + 2, s);
463*7c478bd9Sstevel@tonic-gate 			(void) strcpy(wordbuf + 2 + slen, "\\>");
464*7c478bd9Sstevel@tonic-gate 		} else {
465*7c478bd9Sstevel@tonic-gate 			if ((wordbuf = strdup(s)) == NULL) {
466*7c478bd9Sstevel@tonic-gate 				(void) fprintf(stderr,
467*7c478bd9Sstevel@tonic-gate 				    gettext("%s: out of memory\n"),
468*7c478bd9Sstevel@tonic-gate 				    cmdname);
469*7c478bd9Sstevel@tonic-gate 				exit(2);
470*7c478bd9Sstevel@tonic-gate 			}
471*7c478bd9Sstevel@tonic-gate 		}
472*7c478bd9Sstevel@tonic-gate 		pp->pattern = wordbuf;
473*7c478bd9Sstevel@tonic-gate 		pp->next = patterns;
474*7c478bd9Sstevel@tonic-gate 		patterns = pp;
475*7c478bd9Sstevel@tonic-gate 		if (np == NULL)
476*7c478bd9Sstevel@tonic-gate 			break;
477*7c478bd9Sstevel@tonic-gate 		s = np + 1;
478*7c478bd9Sstevel@tonic-gate 	}
479*7c478bd9Sstevel@tonic-gate }
480*7c478bd9Sstevel@tonic-gate 
481*7c478bd9Sstevel@tonic-gate /*
482*7c478bd9Sstevel@tonic-gate  * Fix patterns.
483*7c478bd9Sstevel@tonic-gate  * Must do after all arguments read, in case later -i option.
484*7c478bd9Sstevel@tonic-gate  */
485*7c478bd9Sstevel@tonic-gate static void
486*7c478bd9Sstevel@tonic-gate fixpatterns(void)
487*7c478bd9Sstevel@tonic-gate {
488*7c478bd9Sstevel@tonic-gate 	PATTERN	*pp;
489*7c478bd9Sstevel@tonic-gate 	int	rv, fix_pattern, npatterns;
490*7c478bd9Sstevel@tonic-gate 
491*7c478bd9Sstevel@tonic-gate 	/*
492*7c478bd9Sstevel@tonic-gate 	 * As REG_ANCHOR flag is not supported in the current Solaris,
493*7c478bd9Sstevel@tonic-gate 	 * need to fix the specified pattern if -x is specified with
494*7c478bd9Sstevel@tonic-gate 	 * grep or egrep
495*7c478bd9Sstevel@tonic-gate 	 */
496*7c478bd9Sstevel@tonic-gate 	fix_pattern = !Fflag && xflag;
497*7c478bd9Sstevel@tonic-gate 
498*7c478bd9Sstevel@tonic-gate 	for (npatterns = 0, pp = patterns; pp != NULL; pp = pp->next) {
499*7c478bd9Sstevel@tonic-gate 		npatterns++;
500*7c478bd9Sstevel@tonic-gate 		if (fix_pattern) {
501*7c478bd9Sstevel@tonic-gate 			char	*cp, *cq;
502*7c478bd9Sstevel@tonic-gate 			size_t	plen, nplen;
503*7c478bd9Sstevel@tonic-gate 
504*7c478bd9Sstevel@tonic-gate 			plen = strlen(pp->pattern);
505*7c478bd9Sstevel@tonic-gate 			/* '^' pattern '$' */
506*7c478bd9Sstevel@tonic-gate 			nplen = 1 + plen + 1 + 1;
507*7c478bd9Sstevel@tonic-gate 			if ((cp = malloc(nplen)) == NULL) {
508*7c478bd9Sstevel@tonic-gate 				(void) fprintf(stderr,
509*7c478bd9Sstevel@tonic-gate 				    gettext("%s: out of memory\n"),
510*7c478bd9Sstevel@tonic-gate 				    cmdname);
511*7c478bd9Sstevel@tonic-gate 				exit(2);
512*7c478bd9Sstevel@tonic-gate 			}
513*7c478bd9Sstevel@tonic-gate 			cq = cp;
514*7c478bd9Sstevel@tonic-gate 			*cq++ = '^';
515*7c478bd9Sstevel@tonic-gate 			cq = strcpy(cq, pp->pattern) + plen;
516*7c478bd9Sstevel@tonic-gate 			*cq++ = '$';
517*7c478bd9Sstevel@tonic-gate 			*cq = '\0';
518*7c478bd9Sstevel@tonic-gate 			free(pp->pattern);
519*7c478bd9Sstevel@tonic-gate 			pp->pattern = cp;
520*7c478bd9Sstevel@tonic-gate 		}
521*7c478bd9Sstevel@tonic-gate 
522*7c478bd9Sstevel@tonic-gate 		if (Fflag) {
523*7c478bd9Sstevel@tonic-gate 			if (use_wchar) {
524*7c478bd9Sstevel@tonic-gate 				/*
525*7c478bd9Sstevel@tonic-gate 				 * Fflag && mblocale && iflag
526*7c478bd9Sstevel@tonic-gate 				 * Fflag && mblocale && !xflag
527*7c478bd9Sstevel@tonic-gate 				 */
528*7c478bd9Sstevel@tonic-gate 				size_t	n;
529*7c478bd9Sstevel@tonic-gate 				n = strlen(pp->pattern) + 1;
530*7c478bd9Sstevel@tonic-gate 				if ((pp->wpattern =
531*7c478bd9Sstevel@tonic-gate 					malloc(sizeof (wchar_t) * n)) == NULL) {
532*7c478bd9Sstevel@tonic-gate 					(void) fprintf(stderr,
533*7c478bd9Sstevel@tonic-gate 					    gettext("%s: out of memory\n"),
534*7c478bd9Sstevel@tonic-gate 					    cmdname);
535*7c478bd9Sstevel@tonic-gate 					exit(2);
536*7c478bd9Sstevel@tonic-gate 				}
537*7c478bd9Sstevel@tonic-gate 				if (mbstowcs(pp->wpattern, pp->pattern, n) ==
538*7c478bd9Sstevel@tonic-gate 				    (size_t)-1) {
539*7c478bd9Sstevel@tonic-gate 					(void) fprintf(stderr,
540*7c478bd9Sstevel@tonic-gate 					    gettext("%s: failed to convert "
541*7c478bd9Sstevel@tonic-gate 						"\"%s\" to wide-characters\n"),
542*7c478bd9Sstevel@tonic-gate 					    cmdname, pp->pattern);
543*7c478bd9Sstevel@tonic-gate 					exit(2);
544*7c478bd9Sstevel@tonic-gate 				}
545*7c478bd9Sstevel@tonic-gate 				if (iflag) {
546*7c478bd9Sstevel@tonic-gate 					wchar_t	*wp;
547*7c478bd9Sstevel@tonic-gate 					for (wp = pp->wpattern; *wp != L'\0';
548*7c478bd9Sstevel@tonic-gate 					    wp++) {
549*7c478bd9Sstevel@tonic-gate 						*wp = towlower((wint_t)*wp);
550*7c478bd9Sstevel@tonic-gate 					}
551*7c478bd9Sstevel@tonic-gate 				}
552*7c478bd9Sstevel@tonic-gate 				free(pp->pattern);
553*7c478bd9Sstevel@tonic-gate 			} else {
554*7c478bd9Sstevel@tonic-gate 				/*
555*7c478bd9Sstevel@tonic-gate 				 * Fflag && mblocale && !iflag
556*7c478bd9Sstevel@tonic-gate 				 * Fflag && !mblocale && iflag
557*7c478bd9Sstevel@tonic-gate 				 * Fflag && !mblocale && !iflag
558*7c478bd9Sstevel@tonic-gate 				 */
559*7c478bd9Sstevel@tonic-gate 				if (iflag) {
560*7c478bd9Sstevel@tonic-gate 					unsigned char	*cp;
561*7c478bd9Sstevel@tonic-gate 					for (cp = (unsigned char *)pp->pattern;
562*7c478bd9Sstevel@tonic-gate 					    *cp != '\0'; cp++) {
563*7c478bd9Sstevel@tonic-gate 						*cp = tolower(*cp);
564*7c478bd9Sstevel@tonic-gate 					}
565*7c478bd9Sstevel@tonic-gate 				}
566*7c478bd9Sstevel@tonic-gate 			}
567*7c478bd9Sstevel@tonic-gate 			/*
568*7c478bd9Sstevel@tonic-gate 			 * fgrep: No regular expressions.
569*7c478bd9Sstevel@tonic-gate 			 */
570*7c478bd9Sstevel@tonic-gate 			continue;
571*7c478bd9Sstevel@tonic-gate 		}
572*7c478bd9Sstevel@tonic-gate 
573*7c478bd9Sstevel@tonic-gate 		/*
574*7c478bd9Sstevel@tonic-gate 		 * For non-fgrep, compile the regular expression,
575*7c478bd9Sstevel@tonic-gate 		 * give an informative error message, and exit if
576*7c478bd9Sstevel@tonic-gate 		 * it didn't compile.
577*7c478bd9Sstevel@tonic-gate 		 */
578*7c478bd9Sstevel@tonic-gate 		if ((rv = regcomp(&pp->re, pp->pattern, regflags)) != 0) {
579*7c478bd9Sstevel@tonic-gate 			(void) regerror(rv, &pp->re, errstr, sizeof (errstr));
580*7c478bd9Sstevel@tonic-gate 			(void) fprintf(stderr,
581*7c478bd9Sstevel@tonic-gate 			    gettext("%s: RE error in %s: %s\n"),
582*7c478bd9Sstevel@tonic-gate 				cmdname, pp->pattern, errstr);
583*7c478bd9Sstevel@tonic-gate 			exit(2);
584*7c478bd9Sstevel@tonic-gate 		}
585*7c478bd9Sstevel@tonic-gate 		free(pp->pattern);
586*7c478bd9Sstevel@tonic-gate 	}
587*7c478bd9Sstevel@tonic-gate 
588*7c478bd9Sstevel@tonic-gate 	/*
589*7c478bd9Sstevel@tonic-gate 	 * Decide if we are able to run the Boyer-Moore-Gosper algorithm.
590*7c478bd9Sstevel@tonic-gate 	 * Use the Boyer-Moore-Gosper algorithm if:
591*7c478bd9Sstevel@tonic-gate 	 * - fgrep			(Fflag)
592*7c478bd9Sstevel@tonic-gate 	 * - singlebyte locale		(!mblocale)
593*7c478bd9Sstevel@tonic-gate 	 * - no ignoring case		(!iflag)
594*7c478bd9Sstevel@tonic-gate 	 * - no printing line numbers	(!nflag)
595*7c478bd9Sstevel@tonic-gate 	 * - no negating the output	(nvflag)
596*7c478bd9Sstevel@tonic-gate 	 * - only one pattern		(npatterns == 1)
597*7c478bd9Sstevel@tonic-gate 	 * - non zero length pattern	(strlen(patterns->pattern) != 0)
598*7c478bd9Sstevel@tonic-gate 	 *
599*7c478bd9Sstevel@tonic-gate 	 * It's guaranteed patterns->pattern is still alive
600*7c478bd9Sstevel@tonic-gate 	 * when Fflag && !mblocale.
601*7c478bd9Sstevel@tonic-gate 	 */
602*7c478bd9Sstevel@tonic-gate 	use_bmg = Fflag && !mblocale && !iflag && !nflag && nvflag &&
603*7c478bd9Sstevel@tonic-gate 	    (npatterns == 1) && (strlen(patterns->pattern) != 0);
604*7c478bd9Sstevel@tonic-gate }
605*7c478bd9Sstevel@tonic-gate 
606*7c478bd9Sstevel@tonic-gate /*
607*7c478bd9Sstevel@tonic-gate  * Search a newline from the beginning of the string
608*7c478bd9Sstevel@tonic-gate  */
609*7c478bd9Sstevel@tonic-gate static char *
610*7c478bd9Sstevel@tonic-gate find_nl(const char *ptr, size_t len)
611*7c478bd9Sstevel@tonic-gate {
612*7c478bd9Sstevel@tonic-gate 	while (len-- != 0) {
613*7c478bd9Sstevel@tonic-gate 		if (*ptr++ == '\n') {
614*7c478bd9Sstevel@tonic-gate 			return ((char *)--ptr);
615*7c478bd9Sstevel@tonic-gate 		}
616*7c478bd9Sstevel@tonic-gate 	}
617*7c478bd9Sstevel@tonic-gate 	return (NULL);
618*7c478bd9Sstevel@tonic-gate }
619*7c478bd9Sstevel@tonic-gate 
620*7c478bd9Sstevel@tonic-gate /*
621*7c478bd9Sstevel@tonic-gate  * Search a newline from the end of the string
622*7c478bd9Sstevel@tonic-gate  */
623*7c478bd9Sstevel@tonic-gate static char *
624*7c478bd9Sstevel@tonic-gate rfind_nl(const char *ptr, size_t len)
625*7c478bd9Sstevel@tonic-gate {
626*7c478bd9Sstevel@tonic-gate 	const char	*uptr = ptr + len;
627*7c478bd9Sstevel@tonic-gate 	while (len--) {
628*7c478bd9Sstevel@tonic-gate 		if (*--uptr == '\n') {
629*7c478bd9Sstevel@tonic-gate 			return ((char *)uptr);
630*7c478bd9Sstevel@tonic-gate 		}
631*7c478bd9Sstevel@tonic-gate 	}
632*7c478bd9Sstevel@tonic-gate 	return (NULL);
633*7c478bd9Sstevel@tonic-gate }
634*7c478bd9Sstevel@tonic-gate 
635*7c478bd9Sstevel@tonic-gate /*
636*7c478bd9Sstevel@tonic-gate  * Duplicate the specified string converting each character
637*7c478bd9Sstevel@tonic-gate  * into a lower case.
638*7c478bd9Sstevel@tonic-gate  */
639*7c478bd9Sstevel@tonic-gate static char *
640*7c478bd9Sstevel@tonic-gate istrdup(const char *s1)
641*7c478bd9Sstevel@tonic-gate {
642*7c478bd9Sstevel@tonic-gate 	static size_t	ibuflen = 0;
643*7c478bd9Sstevel@tonic-gate 	static char	*ibuf = NULL;
644*7c478bd9Sstevel@tonic-gate 	size_t	slen;
645*7c478bd9Sstevel@tonic-gate 	char	*p;
646*7c478bd9Sstevel@tonic-gate 
647*7c478bd9Sstevel@tonic-gate 	slen = strlen(s1);
648*7c478bd9Sstevel@tonic-gate 	if (slen >= ibuflen) {
649*7c478bd9Sstevel@tonic-gate 		/* ibuf does not fit to s1 */
650*7c478bd9Sstevel@tonic-gate 		ibuflen = slen + 1;
651*7c478bd9Sstevel@tonic-gate 		ibuf = realloc(ibuf, ibuflen);
652*7c478bd9Sstevel@tonic-gate 		if (ibuf == NULL) {
653*7c478bd9Sstevel@tonic-gate 			(void) fprintf(stderr,
654*7c478bd9Sstevel@tonic-gate 			    gettext("%s: out of memory\n"), cmdname);
655*7c478bd9Sstevel@tonic-gate 			exit(2);
656*7c478bd9Sstevel@tonic-gate 		}
657*7c478bd9Sstevel@tonic-gate 	}
658*7c478bd9Sstevel@tonic-gate 	p = ibuf;
659*7c478bd9Sstevel@tonic-gate 	do {
660*7c478bd9Sstevel@tonic-gate 		*p++ = tolower(*s1);
661*7c478bd9Sstevel@tonic-gate 	} while (*s1++ != '\0');
662*7c478bd9Sstevel@tonic-gate 	return (ibuf);
663*7c478bd9Sstevel@tonic-gate }
664*7c478bd9Sstevel@tonic-gate 
665*7c478bd9Sstevel@tonic-gate /*
666*7c478bd9Sstevel@tonic-gate  * Do grep on a single file.
667*7c478bd9Sstevel@tonic-gate  * Return true in any lines matched.
668*7c478bd9Sstevel@tonic-gate  *
669*7c478bd9Sstevel@tonic-gate  * We have two strategies:
670*7c478bd9Sstevel@tonic-gate  * The fast one is used when we have a single pattern with
671*7c478bd9Sstevel@tonic-gate  * a string known to occur in the pattern. We can then
672*7c478bd9Sstevel@tonic-gate  * do a BMG match on the whole buffer.
673*7c478bd9Sstevel@tonic-gate  * This is an order of magnitude faster.
674*7c478bd9Sstevel@tonic-gate  * Otherwise we split the buffer into lines,
675*7c478bd9Sstevel@tonic-gate  * and check for a match on each line.
676*7c478bd9Sstevel@tonic-gate  */
677*7c478bd9Sstevel@tonic-gate static int
678*7c478bd9Sstevel@tonic-gate grep(int fd, char *fn)
679*7c478bd9Sstevel@tonic-gate {
680*7c478bd9Sstevel@tonic-gate 	PATTERN *pp;
681*7c478bd9Sstevel@tonic-gate 	off_t	data_len;	/* length of the data chunk */
682*7c478bd9Sstevel@tonic-gate 	off_t	line_len;	/* length of the current line */
683*7c478bd9Sstevel@tonic-gate 	off_t	line_offset;	/* current line's offset from the beginning */
684*7c478bd9Sstevel@tonic-gate 	long long	lineno;
685*7c478bd9Sstevel@tonic-gate 	long long	matches = 0;	/* Number of matching lines */
686*7c478bd9Sstevel@tonic-gate 	int	newlinep;	/* 0 if the last line of file has no newline */
687*7c478bd9Sstevel@tonic-gate 	char	*ptr, *ptrend;
688*7c478bd9Sstevel@tonic-gate 
689*7c478bd9Sstevel@tonic-gate 
690*7c478bd9Sstevel@tonic-gate 	if (patterns == NULL)
691*7c478bd9Sstevel@tonic-gate 		return (0);	/* no patterns to match -- just return */
692*7c478bd9Sstevel@tonic-gate 
693*7c478bd9Sstevel@tonic-gate 	pp = patterns;
694*7c478bd9Sstevel@tonic-gate 
695*7c478bd9Sstevel@tonic-gate 	if (use_bmg) {
696*7c478bd9Sstevel@tonic-gate 		bmgcomp(pp->pattern, strlen(pp->pattern));
697*7c478bd9Sstevel@tonic-gate 	}
698*7c478bd9Sstevel@tonic-gate 
699*7c478bd9Sstevel@tonic-gate 	if (use_wchar && outline == NULL) {
700*7c478bd9Sstevel@tonic-gate 		outbuflen = BUFSIZE + 1;
701*7c478bd9Sstevel@tonic-gate 		outline = malloc(sizeof (wchar_t) * outbuflen);
702*7c478bd9Sstevel@tonic-gate 		if (outline == NULL) {
703*7c478bd9Sstevel@tonic-gate 			(void) fprintf(stderr, gettext("%s: out of memory\n"),
704*7c478bd9Sstevel@tonic-gate 			    cmdname);
705*7c478bd9Sstevel@tonic-gate 			exit(2);
706*7c478bd9Sstevel@tonic-gate 		}
707*7c478bd9Sstevel@tonic-gate 	}
708*7c478bd9Sstevel@tonic-gate 
709*7c478bd9Sstevel@tonic-gate 	if (prntbuf == NULL) {
710*7c478bd9Sstevel@tonic-gate 		prntbuflen = BUFSIZE;
711*7c478bd9Sstevel@tonic-gate 		if ((prntbuf = malloc(prntbuflen + 1)) == NULL) {
712*7c478bd9Sstevel@tonic-gate 			(void) fprintf(stderr, gettext("%s: out of memory\n"),
713*7c478bd9Sstevel@tonic-gate 			    cmdname);
714*7c478bd9Sstevel@tonic-gate 			exit(2);
715*7c478bd9Sstevel@tonic-gate 		}
716*7c478bd9Sstevel@tonic-gate 	}
717*7c478bd9Sstevel@tonic-gate 
718*7c478bd9Sstevel@tonic-gate 	line_offset = 0;
719*7c478bd9Sstevel@tonic-gate 	lineno = 0;
720*7c478bd9Sstevel@tonic-gate 	newlinep = 1;
721*7c478bd9Sstevel@tonic-gate 	data_len = 0;
722*7c478bd9Sstevel@tonic-gate 	for (; ; ) {
723*7c478bd9Sstevel@tonic-gate 		long	count;
724*7c478bd9Sstevel@tonic-gate 		off_t	offset = 0;
725*7c478bd9Sstevel@tonic-gate 
726*7c478bd9Sstevel@tonic-gate 		if (data_len == 0) {
727*7c478bd9Sstevel@tonic-gate 			/*
728*7c478bd9Sstevel@tonic-gate 			 * If no data in the buffer, reset ptr
729*7c478bd9Sstevel@tonic-gate 			 */
730*7c478bd9Sstevel@tonic-gate 			ptr = prntbuf;
731*7c478bd9Sstevel@tonic-gate 		}
732*7c478bd9Sstevel@tonic-gate 		if (ptr == prntbuf) {
733*7c478bd9Sstevel@tonic-gate 			/*
734*7c478bd9Sstevel@tonic-gate 			 * The current data chunk starts from prntbuf.
735*7c478bd9Sstevel@tonic-gate 			 * This means either the buffer has no data
736*7c478bd9Sstevel@tonic-gate 			 * or the buffer has no newline.
737*7c478bd9Sstevel@tonic-gate 			 * So, read more data from input.
738*7c478bd9Sstevel@tonic-gate 			 */
739*7c478bd9Sstevel@tonic-gate 			count = read(fd, ptr + data_len, prntbuflen - data_len);
740*7c478bd9Sstevel@tonic-gate 			if (count < 0) {
741*7c478bd9Sstevel@tonic-gate 				/* read error */
742*7c478bd9Sstevel@tonic-gate 				if (cflag) {
743*7c478bd9Sstevel@tonic-gate 					if (outfn) {
744*7c478bd9Sstevel@tonic-gate 						(void) fprintf(stdout,
745*7c478bd9Sstevel@tonic-gate 						    "%s:", fn);
746*7c478bd9Sstevel@tonic-gate 					}
747*7c478bd9Sstevel@tonic-gate 					if (!qflag) {
748*7c478bd9Sstevel@tonic-gate 						(void) fprintf(stdout, "%lld\n",
749*7c478bd9Sstevel@tonic-gate 						    matches);
750*7c478bd9Sstevel@tonic-gate 					}
751*7c478bd9Sstevel@tonic-gate 				}
752*7c478bd9Sstevel@tonic-gate 				return (0);
753*7c478bd9Sstevel@tonic-gate 			} else if (count == 0) {
754*7c478bd9Sstevel@tonic-gate 				/* no new data */
755*7c478bd9Sstevel@tonic-gate 				if (data_len == 0) {
756*7c478bd9Sstevel@tonic-gate 					/* end of file already reached */
757*7c478bd9Sstevel@tonic-gate 					break;
758*7c478bd9Sstevel@tonic-gate 				}
759*7c478bd9Sstevel@tonic-gate 				/* last line of file has no newline */
760*7c478bd9Sstevel@tonic-gate 				ptrend = ptr + data_len;
761*7c478bd9Sstevel@tonic-gate 				newlinep = 0;
762*7c478bd9Sstevel@tonic-gate 				goto L_start_process;
763*7c478bd9Sstevel@tonic-gate 			}
764*7c478bd9Sstevel@tonic-gate 			offset = data_len;
765*7c478bd9Sstevel@tonic-gate 			data_len += count;
766*7c478bd9Sstevel@tonic-gate 		}
767*7c478bd9Sstevel@tonic-gate 
768*7c478bd9Sstevel@tonic-gate 		/*
769*7c478bd9Sstevel@tonic-gate 		 * Look for newline in the chunk
770*7c478bd9Sstevel@tonic-gate 		 * between ptr + offset and ptr + data_len - offset.
771*7c478bd9Sstevel@tonic-gate 		 */
772*7c478bd9Sstevel@tonic-gate 		ptrend = find_nl(ptr + offset, data_len - offset);
773*7c478bd9Sstevel@tonic-gate 		if (ptrend == NULL) {
774*7c478bd9Sstevel@tonic-gate 			/* no newline found in this chunk */
775*7c478bd9Sstevel@tonic-gate 			if (ptr > prntbuf) {
776*7c478bd9Sstevel@tonic-gate 				/*
777*7c478bd9Sstevel@tonic-gate 				 * Move remaining data to the beginning
778*7c478bd9Sstevel@tonic-gate 				 * of the buffer.
779*7c478bd9Sstevel@tonic-gate 				 * Remaining data lie from ptr for
780*7c478bd9Sstevel@tonic-gate 				 * data_len bytes.
781*7c478bd9Sstevel@tonic-gate 				 */
782*7c478bd9Sstevel@tonic-gate 				(void) memmove(prntbuf, ptr, data_len);
783*7c478bd9Sstevel@tonic-gate 			}
784*7c478bd9Sstevel@tonic-gate 			if (data_len == prntbuflen) {
785*7c478bd9Sstevel@tonic-gate 				/*
786*7c478bd9Sstevel@tonic-gate 				 * No enough room in the buffer
787*7c478bd9Sstevel@tonic-gate 				 */
788*7c478bd9Sstevel@tonic-gate 				prntbuflen += BUFSIZE;
789*7c478bd9Sstevel@tonic-gate 				prntbuf = realloc(prntbuf, prntbuflen + 1);
790*7c478bd9Sstevel@tonic-gate 				if (prntbuf == NULL) {
791*7c478bd9Sstevel@tonic-gate 					(void) fprintf(stderr,
792*7c478bd9Sstevel@tonic-gate 					    gettext("%s: out of memory\n"),
793*7c478bd9Sstevel@tonic-gate 					    cmdname);
794*7c478bd9Sstevel@tonic-gate 					exit(2);
795*7c478bd9Sstevel@tonic-gate 				}
796*7c478bd9Sstevel@tonic-gate 			}
797*7c478bd9Sstevel@tonic-gate 			ptr = prntbuf;
798*7c478bd9Sstevel@tonic-gate 			/* read the next input */
799*7c478bd9Sstevel@tonic-gate 			continue;
800*7c478bd9Sstevel@tonic-gate 		}
801*7c478bd9Sstevel@tonic-gate L_start_process:
802*7c478bd9Sstevel@tonic-gate 
803*7c478bd9Sstevel@tonic-gate 		/*
804*7c478bd9Sstevel@tonic-gate 		 * Beginning of the chunk:	ptr
805*7c478bd9Sstevel@tonic-gate 		 * End of the chunk:		ptr + data_len
806*7c478bd9Sstevel@tonic-gate 		 * Beginning of the line:	ptr
807*7c478bd9Sstevel@tonic-gate 		 * End of the line:		ptrend
808*7c478bd9Sstevel@tonic-gate 		 */
809*7c478bd9Sstevel@tonic-gate 
810*7c478bd9Sstevel@tonic-gate 		if (use_bmg) {
811*7c478bd9Sstevel@tonic-gate 			/*
812*7c478bd9Sstevel@tonic-gate 			 * Use Boyer-Moore-Gosper algorithm to find out if
813*7c478bd9Sstevel@tonic-gate 			 * this chunk (not this line) contains the specified
814*7c478bd9Sstevel@tonic-gate 			 * pattern.  If not, restart from the last line
815*7c478bd9Sstevel@tonic-gate 			 * of this chunk.
816*7c478bd9Sstevel@tonic-gate 			 */
817*7c478bd9Sstevel@tonic-gate 			char	*bline;
818*7c478bd9Sstevel@tonic-gate 			bline = bmgexec(ptr, ptr + data_len);
819*7c478bd9Sstevel@tonic-gate 			if (bline == NULL) {
820*7c478bd9Sstevel@tonic-gate 				/*
821*7c478bd9Sstevel@tonic-gate 				 * No pattern found in this chunk.
822*7c478bd9Sstevel@tonic-gate 				 * Need to find the last line
823*7c478bd9Sstevel@tonic-gate 				 * in this chunk.
824*7c478bd9Sstevel@tonic-gate 				 */
825*7c478bd9Sstevel@tonic-gate 				ptrend = rfind_nl(ptr, data_len);
826*7c478bd9Sstevel@tonic-gate 
827*7c478bd9Sstevel@tonic-gate 				/*
828*7c478bd9Sstevel@tonic-gate 				 * When this chunk does not contain newline,
829*7c478bd9Sstevel@tonic-gate 				 * ptrend becomes NULL, which should happen
830*7c478bd9Sstevel@tonic-gate 				 * when the last line of file does not end
831*7c478bd9Sstevel@tonic-gate 				 * with a newline.  At such a point,
832*7c478bd9Sstevel@tonic-gate 				 * newlinep should have been set to 0.
833*7c478bd9Sstevel@tonic-gate 				 * Therefore, just after jumping to
834*7c478bd9Sstevel@tonic-gate 				 * L_skip_line, the main for-loop quits,
835*7c478bd9Sstevel@tonic-gate 				 * and the line_len value won't be
836*7c478bd9Sstevel@tonic-gate 				 * used.
837*7c478bd9Sstevel@tonic-gate 				 */
838*7c478bd9Sstevel@tonic-gate 				line_len = ptrend - ptr;
839*7c478bd9Sstevel@tonic-gate 				goto L_skip_line;
840*7c478bd9Sstevel@tonic-gate 			}
841*7c478bd9Sstevel@tonic-gate 			if (bline > ptrend) {
842*7c478bd9Sstevel@tonic-gate 				/*
843*7c478bd9Sstevel@tonic-gate 				 * Pattern found not in the first line
844*7c478bd9Sstevel@tonic-gate 				 * of this chunk.
845*7c478bd9Sstevel@tonic-gate 				 * Discard the first line.
846*7c478bd9Sstevel@tonic-gate 				 */
847*7c478bd9Sstevel@tonic-gate 				line_len = ptrend - ptr;
848*7c478bd9Sstevel@tonic-gate 				goto L_skip_line;
849*7c478bd9Sstevel@tonic-gate 			}
850*7c478bd9Sstevel@tonic-gate 			/*
851*7c478bd9Sstevel@tonic-gate 			 * Pattern found in the first line of this chunk.
852*7c478bd9Sstevel@tonic-gate 			 * Using this result.
853*7c478bd9Sstevel@tonic-gate 			 */
854*7c478bd9Sstevel@tonic-gate 			*ptrend = '\0';
855*7c478bd9Sstevel@tonic-gate 			line_len = ptrend - ptr;
856*7c478bd9Sstevel@tonic-gate 
857*7c478bd9Sstevel@tonic-gate 			/*
858*7c478bd9Sstevel@tonic-gate 			 * before jumping to L_next_line,
859*7c478bd9Sstevel@tonic-gate 			 * need to handle xflag if specified
860*7c478bd9Sstevel@tonic-gate 			 */
861*7c478bd9Sstevel@tonic-gate 			if (xflag && (line_len != bmglen ||
862*7c478bd9Sstevel@tonic-gate 				strcmp(bmgpat, ptr) != 0)) {
863*7c478bd9Sstevel@tonic-gate 				/* didn't match */
864*7c478bd9Sstevel@tonic-gate 				pp = NULL;
865*7c478bd9Sstevel@tonic-gate 			} else {
866*7c478bd9Sstevel@tonic-gate 				pp = patterns; /* to make it happen */
867*7c478bd9Sstevel@tonic-gate 			}
868*7c478bd9Sstevel@tonic-gate 			goto L_next_line;
869*7c478bd9Sstevel@tonic-gate 		}
870*7c478bd9Sstevel@tonic-gate 		lineno++;
871*7c478bd9Sstevel@tonic-gate 		/*
872*7c478bd9Sstevel@tonic-gate 		 * Line starts from ptr and ends at ptrend.
873*7c478bd9Sstevel@tonic-gate 		 * line_len will be the length of the line.
874*7c478bd9Sstevel@tonic-gate 		 */
875*7c478bd9Sstevel@tonic-gate 		*ptrend = '\0';
876*7c478bd9Sstevel@tonic-gate 		line_len = ptrend - ptr;
877*7c478bd9Sstevel@tonic-gate 
878*7c478bd9Sstevel@tonic-gate 		/*
879*7c478bd9Sstevel@tonic-gate 		 * From now, the process will be performed based
880*7c478bd9Sstevel@tonic-gate 		 * on the line from ptr to ptrend.
881*7c478bd9Sstevel@tonic-gate 		 */
882*7c478bd9Sstevel@tonic-gate 		if (use_wchar) {
883*7c478bd9Sstevel@tonic-gate 			size_t	len;
884*7c478bd9Sstevel@tonic-gate 
885*7c478bd9Sstevel@tonic-gate 			if (line_len >= outbuflen) {
886*7c478bd9Sstevel@tonic-gate 				outbuflen = line_len + 1;
887*7c478bd9Sstevel@tonic-gate 				outline = realloc(outline,
888*7c478bd9Sstevel@tonic-gate 				    sizeof (wchar_t) * outbuflen);
889*7c478bd9Sstevel@tonic-gate 				if (outline == NULL) {
890*7c478bd9Sstevel@tonic-gate 					(void) fprintf(stderr,
891*7c478bd9Sstevel@tonic-gate 					    gettext("%s: out of memory\n"),
892*7c478bd9Sstevel@tonic-gate 					    cmdname);
893*7c478bd9Sstevel@tonic-gate 					exit(2);
894*7c478bd9Sstevel@tonic-gate 				}
895*7c478bd9Sstevel@tonic-gate 			}
896*7c478bd9Sstevel@tonic-gate 
897*7c478bd9Sstevel@tonic-gate 			len = mbstowcs(outline, ptr, line_len);
898*7c478bd9Sstevel@tonic-gate 			if (len == (size_t)-1) {
899*7c478bd9Sstevel@tonic-gate 				(void) fprintf(stderr, gettext(
900*7c478bd9Sstevel@tonic-gate 	"%s: input file \"%s\": line %lld: invalid multibyte character\n"),
901*7c478bd9Sstevel@tonic-gate 				    cmdname, fn, lineno);
902*7c478bd9Sstevel@tonic-gate 				/* never match a line with invalid sequence */
903*7c478bd9Sstevel@tonic-gate 				goto L_skip_line;
904*7c478bd9Sstevel@tonic-gate 			}
905*7c478bd9Sstevel@tonic-gate 			outline[len] = L'\0';
906*7c478bd9Sstevel@tonic-gate 
907*7c478bd9Sstevel@tonic-gate 			if (iflag) {
908*7c478bd9Sstevel@tonic-gate 				wchar_t	*cp;
909*7c478bd9Sstevel@tonic-gate 				for (cp = outline; *cp != '\0'; cp++) {
910*7c478bd9Sstevel@tonic-gate 					*cp = towlower((wint_t)*cp);
911*7c478bd9Sstevel@tonic-gate 				}
912*7c478bd9Sstevel@tonic-gate 			}
913*7c478bd9Sstevel@tonic-gate 
914*7c478bd9Sstevel@tonic-gate 			if (xflag) {
915*7c478bd9Sstevel@tonic-gate 				for (pp = patterns; pp; pp = pp->next) {
916*7c478bd9Sstevel@tonic-gate 					if (outline[0] == pp->wpattern[0] &&
917*7c478bd9Sstevel@tonic-gate 					    wcscmp(outline,
918*7c478bd9Sstevel@tonic-gate 						pp->wpattern) == 0) {
919*7c478bd9Sstevel@tonic-gate 						/* matched */
920*7c478bd9Sstevel@tonic-gate 						break;
921*7c478bd9Sstevel@tonic-gate 					}
922*7c478bd9Sstevel@tonic-gate 				}
923*7c478bd9Sstevel@tonic-gate 			} else {
924*7c478bd9Sstevel@tonic-gate 				for (pp = patterns; pp; pp = pp->next) {
925*7c478bd9Sstevel@tonic-gate 					if (wcswcs(outline, pp->wpattern)
926*7c478bd9Sstevel@tonic-gate 					    != NULL) {
927*7c478bd9Sstevel@tonic-gate 						/* matched */
928*7c478bd9Sstevel@tonic-gate 						break;
929*7c478bd9Sstevel@tonic-gate 					}
930*7c478bd9Sstevel@tonic-gate 				}
931*7c478bd9Sstevel@tonic-gate 			}
932*7c478bd9Sstevel@tonic-gate 		} else if (Fflag) {
933*7c478bd9Sstevel@tonic-gate 			/* fgrep in byte-oriented handling */
934*7c478bd9Sstevel@tonic-gate 			char	*fptr;
935*7c478bd9Sstevel@tonic-gate 			if (iflag) {
936*7c478bd9Sstevel@tonic-gate 				fptr = istrdup(ptr);
937*7c478bd9Sstevel@tonic-gate 			} else {
938*7c478bd9Sstevel@tonic-gate 				fptr = ptr;
939*7c478bd9Sstevel@tonic-gate 			}
940*7c478bd9Sstevel@tonic-gate 			if (xflag) {
941*7c478bd9Sstevel@tonic-gate 				/* fgrep -x */
942*7c478bd9Sstevel@tonic-gate 				for (pp = patterns; pp; pp = pp->next) {
943*7c478bd9Sstevel@tonic-gate 					if (fptr[0] == pp->pattern[0] &&
944*7c478bd9Sstevel@tonic-gate 					    strcmp(fptr, pp->pattern) == 0) {
945*7c478bd9Sstevel@tonic-gate 						/* matched */
946*7c478bd9Sstevel@tonic-gate 						break;
947*7c478bd9Sstevel@tonic-gate 					}
948*7c478bd9Sstevel@tonic-gate 				}
949*7c478bd9Sstevel@tonic-gate 			} else {
950*7c478bd9Sstevel@tonic-gate 				for (pp = patterns; pp; pp = pp->next) {
951*7c478bd9Sstevel@tonic-gate 					if (strstr(fptr, pp->pattern) != NULL) {
952*7c478bd9Sstevel@tonic-gate 						/* matched */
953*7c478bd9Sstevel@tonic-gate 						break;
954*7c478bd9Sstevel@tonic-gate 					}
955*7c478bd9Sstevel@tonic-gate 				}
956*7c478bd9Sstevel@tonic-gate 			}
957*7c478bd9Sstevel@tonic-gate 		} else {
958*7c478bd9Sstevel@tonic-gate 			/* grep or egrep */
959*7c478bd9Sstevel@tonic-gate 			for (pp = patterns; pp; pp = pp->next) {
960*7c478bd9Sstevel@tonic-gate 				int	rv;
961*7c478bd9Sstevel@tonic-gate 
962*7c478bd9Sstevel@tonic-gate 				rv = regexec(&pp->re, ptr, 0, NULL, 0);
963*7c478bd9Sstevel@tonic-gate 				if (rv == REG_OK) {
964*7c478bd9Sstevel@tonic-gate 					/* matched */
965*7c478bd9Sstevel@tonic-gate 					break;
966*7c478bd9Sstevel@tonic-gate 				}
967*7c478bd9Sstevel@tonic-gate 
968*7c478bd9Sstevel@tonic-gate 				switch (rv) {
969*7c478bd9Sstevel@tonic-gate 				case REG_NOMATCH:
970*7c478bd9Sstevel@tonic-gate 					break;
971*7c478bd9Sstevel@tonic-gate 				case REG_ECHAR:
972*7c478bd9Sstevel@tonic-gate 					(void) fprintf(stderr, gettext(
973*7c478bd9Sstevel@tonic-gate 	    "%s: input file \"%s\": line %lld: invalid multibyte character\n"),
974*7c478bd9Sstevel@tonic-gate 					    cmdname, fn, lineno);
975*7c478bd9Sstevel@tonic-gate 					break;
976*7c478bd9Sstevel@tonic-gate 				default:
977*7c478bd9Sstevel@tonic-gate 					(void) regerror(rv, &pp->re, errstr,
978*7c478bd9Sstevel@tonic-gate 					    sizeof (errstr));
979*7c478bd9Sstevel@tonic-gate 					(void) fprintf(stderr, gettext(
980*7c478bd9Sstevel@tonic-gate 	    "%s: input file \"%s\": line %lld: %s\n"),
981*7c478bd9Sstevel@tonic-gate 					    cmdname, fn, lineno, errstr);
982*7c478bd9Sstevel@tonic-gate 					exit(2);
983*7c478bd9Sstevel@tonic-gate 				}
984*7c478bd9Sstevel@tonic-gate 			}
985*7c478bd9Sstevel@tonic-gate 		}
986*7c478bd9Sstevel@tonic-gate 
987*7c478bd9Sstevel@tonic-gate L_next_line:
988*7c478bd9Sstevel@tonic-gate 		/*
989*7c478bd9Sstevel@tonic-gate 		 * Here, if pp points to non-NULL, something has been matched
990*7c478bd9Sstevel@tonic-gate 		 * to the pattern.
991*7c478bd9Sstevel@tonic-gate 		 */
992*7c478bd9Sstevel@tonic-gate 		if (nvflag == (pp != NULL)) {
993*7c478bd9Sstevel@tonic-gate 			matches++;
994*7c478bd9Sstevel@tonic-gate 			/*
995*7c478bd9Sstevel@tonic-gate 			 * Handle q, l, and c flags.
996*7c478bd9Sstevel@tonic-gate 			 */
997*7c478bd9Sstevel@tonic-gate 			if (qflag) {
998*7c478bd9Sstevel@tonic-gate 				/* no need to continue */
999*7c478bd9Sstevel@tonic-gate 				/*
1000*7c478bd9Sstevel@tonic-gate 				 * End of this line is ptrend.
1001*7c478bd9Sstevel@tonic-gate 				 * We have read up to ptr + data_len.
1002*7c478bd9Sstevel@tonic-gate 				 */
1003*7c478bd9Sstevel@tonic-gate 				off_t	pos;
1004*7c478bd9Sstevel@tonic-gate 				pos = ptr + data_len - (ptrend + 1);
1005*7c478bd9Sstevel@tonic-gate 				(void) lseek(fd, -pos, SEEK_CUR);
1006*7c478bd9Sstevel@tonic-gate 				exit(0);
1007*7c478bd9Sstevel@tonic-gate 			}
1008*7c478bd9Sstevel@tonic-gate 			if (lflag) {
1009*7c478bd9Sstevel@tonic-gate 				(void) printf("%s\n", fn);
1010*7c478bd9Sstevel@tonic-gate 				break;
1011*7c478bd9Sstevel@tonic-gate 			}
1012*7c478bd9Sstevel@tonic-gate 			if (!cflag) {
1013*7c478bd9Sstevel@tonic-gate 				if (outfn) {
1014*7c478bd9Sstevel@tonic-gate 					(void) printf("%s:", fn);
1015*7c478bd9Sstevel@tonic-gate 				}
1016*7c478bd9Sstevel@tonic-gate 				if (bflag) {
1017*7c478bd9Sstevel@tonic-gate 					(void) printf("%lld:", (offset_t)
1018*7c478bd9Sstevel@tonic-gate 					    (line_offset / BSIZE));
1019*7c478bd9Sstevel@tonic-gate 				}
1020*7c478bd9Sstevel@tonic-gate 				if (nflag) {
1021*7c478bd9Sstevel@tonic-gate 					(void) printf("%lld:", lineno);
1022*7c478bd9Sstevel@tonic-gate 				}
1023*7c478bd9Sstevel@tonic-gate 				*ptrend = '\n';
1024*7c478bd9Sstevel@tonic-gate 				(void) fwrite(ptr, 1, line_len + 1, stdout);
1025*7c478bd9Sstevel@tonic-gate 			}
1026*7c478bd9Sstevel@tonic-gate 			if (ferror(stdout)) {
1027*7c478bd9Sstevel@tonic-gate 				return (0);
1028*7c478bd9Sstevel@tonic-gate 			}
1029*7c478bd9Sstevel@tonic-gate 		}
1030*7c478bd9Sstevel@tonic-gate L_skip_line:
1031*7c478bd9Sstevel@tonic-gate 		if (!newlinep)
1032*7c478bd9Sstevel@tonic-gate 			break;
1033*7c478bd9Sstevel@tonic-gate 
1034*7c478bd9Sstevel@tonic-gate 		data_len -= line_len + 1;
1035*7c478bd9Sstevel@tonic-gate 		line_offset += line_len + 1;
1036*7c478bd9Sstevel@tonic-gate 		ptr = ptrend + 1;
1037*7c478bd9Sstevel@tonic-gate 	}
1038*7c478bd9Sstevel@tonic-gate 
1039*7c478bd9Sstevel@tonic-gate 	if (cflag) {
1040*7c478bd9Sstevel@tonic-gate 		if (outfn) {
1041*7c478bd9Sstevel@tonic-gate 			(void) printf("%s:", fn);
1042*7c478bd9Sstevel@tonic-gate 		}
1043*7c478bd9Sstevel@tonic-gate 		if (!qflag) {
1044*7c478bd9Sstevel@tonic-gate 			(void) printf("%lld\n", matches);
1045*7c478bd9Sstevel@tonic-gate 		}
1046*7c478bd9Sstevel@tonic-gate 	}
1047*7c478bd9Sstevel@tonic-gate 	return (matches != 0);
1048*7c478bd9Sstevel@tonic-gate }
1049*7c478bd9Sstevel@tonic-gate 
1050*7c478bd9Sstevel@tonic-gate /*
1051*7c478bd9Sstevel@tonic-gate  * usage message for grep
1052*7c478bd9Sstevel@tonic-gate  */
1053*7c478bd9Sstevel@tonic-gate static void
1054*7c478bd9Sstevel@tonic-gate usage(void)
1055*7c478bd9Sstevel@tonic-gate {
1056*7c478bd9Sstevel@tonic-gate 	if (egrep || fgrep) {
1057*7c478bd9Sstevel@tonic-gate 		(void) fprintf(stderr, gettext("Usage:\t%s"), cmdname);
1058*7c478bd9Sstevel@tonic-gate 		(void) fprintf(stderr,
1059*7c478bd9Sstevel@tonic-gate 		    gettext(" [-c|-l|-q] [-bhinsvx] "
1060*7c478bd9Sstevel@tonic-gate 			"pattern_list [file ...]\n"));
1061*7c478bd9Sstevel@tonic-gate 
1062*7c478bd9Sstevel@tonic-gate 		(void) fprintf(stderr, "\t%s", cmdname);
1063*7c478bd9Sstevel@tonic-gate 		(void) fprintf(stderr,
1064*7c478bd9Sstevel@tonic-gate 		    gettext(" [-c|-l|-q] [-bhinsvx] [-e pattern_list]... "
1065*7c478bd9Sstevel@tonic-gate 			"[-f pattern_file]... [file...]\n"));
1066*7c478bd9Sstevel@tonic-gate 	} else {
1067*7c478bd9Sstevel@tonic-gate 		(void) fprintf(stderr, gettext("Usage:\t%s"), cmdname);
1068*7c478bd9Sstevel@tonic-gate 		(void) fprintf(stderr,
1069*7c478bd9Sstevel@tonic-gate 		    gettext(" [-c|-l|-q] [-bhinsvwx] "
1070*7c478bd9Sstevel@tonic-gate 			"pattern_list [file ...]\n"));
1071*7c478bd9Sstevel@tonic-gate 
1072*7c478bd9Sstevel@tonic-gate 		(void) fprintf(stderr, "\t%s", cmdname);
1073*7c478bd9Sstevel@tonic-gate 		(void) fprintf(stderr,
1074*7c478bd9Sstevel@tonic-gate 		    gettext(" [-c|-l|-q] [-bhinsvwx] [-e pattern_list]... "
1075*7c478bd9Sstevel@tonic-gate 			"[-f pattern_file]... [file...]\n"));
1076*7c478bd9Sstevel@tonic-gate 
1077*7c478bd9Sstevel@tonic-gate 		(void) fprintf(stderr, "\t%s", cmdname);
1078*7c478bd9Sstevel@tonic-gate 		(void) fprintf(stderr,
1079*7c478bd9Sstevel@tonic-gate 		    gettext(" -E [-c|-l|-q] [-bhinsvx] "
1080*7c478bd9Sstevel@tonic-gate 			"pattern_list [file ...]\n"));
1081*7c478bd9Sstevel@tonic-gate 
1082*7c478bd9Sstevel@tonic-gate 		(void) fprintf(stderr, "\t%s", cmdname);
1083*7c478bd9Sstevel@tonic-gate 		(void) fprintf(stderr,
1084*7c478bd9Sstevel@tonic-gate 		    gettext(" -E [-c|-l|-q] [-bhinsvx] [-e pattern_list]... "
1085*7c478bd9Sstevel@tonic-gate 			"[-f pattern_file]... [file...]\n"));
1086*7c478bd9Sstevel@tonic-gate 
1087*7c478bd9Sstevel@tonic-gate 		(void) fprintf(stderr, "\t%s", cmdname);
1088*7c478bd9Sstevel@tonic-gate 		(void) fprintf(stderr,
1089*7c478bd9Sstevel@tonic-gate 		    gettext(" -F [-c|-l|-q] [-bhinsvx] "
1090*7c478bd9Sstevel@tonic-gate 			"pattern_list [file ...]\n"));
1091*7c478bd9Sstevel@tonic-gate 
1092*7c478bd9Sstevel@tonic-gate 		(void) fprintf(stderr, "\t%s", cmdname);
1093*7c478bd9Sstevel@tonic-gate 		(void) fprintf(stderr,
1094*7c478bd9Sstevel@tonic-gate 		    gettext(" -F [-c|-l|-q] [-bhinsvx] [-e pattern_list]... "
1095*7c478bd9Sstevel@tonic-gate 			"[-f pattern_file]... [file...]\n"));
1096*7c478bd9Sstevel@tonic-gate 	}
1097*7c478bd9Sstevel@tonic-gate 	exit(2);
1098*7c478bd9Sstevel@tonic-gate 	/* NOTREACHED */
1099*7c478bd9Sstevel@tonic-gate }
1100*7c478bd9Sstevel@tonic-gate 
1101*7c478bd9Sstevel@tonic-gate /*
1102*7c478bd9Sstevel@tonic-gate  * Compile literal pattern into BMG tables
1103*7c478bd9Sstevel@tonic-gate  */
1104*7c478bd9Sstevel@tonic-gate static void
1105*7c478bd9Sstevel@tonic-gate bmgcomp(char *pat, int len)
1106*7c478bd9Sstevel@tonic-gate {
1107*7c478bd9Sstevel@tonic-gate 	int	i;
1108*7c478bd9Sstevel@tonic-gate 	int	tlen;
1109*7c478bd9Sstevel@tonic-gate 	unsigned char	*uc = (unsigned char *)pat;
1110*7c478bd9Sstevel@tonic-gate 
1111*7c478bd9Sstevel@tonic-gate 	bmglen = len;
1112*7c478bd9Sstevel@tonic-gate 	bmgpat = pat;
1113*7c478bd9Sstevel@tonic-gate 
1114*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < M_CSETSIZE; i++) {
1115*7c478bd9Sstevel@tonic-gate 		bmgtab[i] = len;
1116*7c478bd9Sstevel@tonic-gate 	}
1117*7c478bd9Sstevel@tonic-gate 
1118*7c478bd9Sstevel@tonic-gate 	len--;
1119*7c478bd9Sstevel@tonic-gate 	for (tlen = len, i = 0; i <= len; i++, tlen--) {
1120*7c478bd9Sstevel@tonic-gate 		bmgtab[*uc++] = tlen;
1121*7c478bd9Sstevel@tonic-gate 	}
1122*7c478bd9Sstevel@tonic-gate }
1123*7c478bd9Sstevel@tonic-gate 
1124*7c478bd9Sstevel@tonic-gate /*
1125*7c478bd9Sstevel@tonic-gate  * BMG search.
1126*7c478bd9Sstevel@tonic-gate  */
1127*7c478bd9Sstevel@tonic-gate static char *
1128*7c478bd9Sstevel@tonic-gate bmgexec(char *str, char *end)
1129*7c478bd9Sstevel@tonic-gate {
1130*7c478bd9Sstevel@tonic-gate 	int	t;
1131*7c478bd9Sstevel@tonic-gate 	char	*k, *s, *p;
1132*7c478bd9Sstevel@tonic-gate 
1133*7c478bd9Sstevel@tonic-gate 	k = str + bmglen - 1;
1134*7c478bd9Sstevel@tonic-gate 	if (bmglen == 1) {
1135*7c478bd9Sstevel@tonic-gate 		return (memchr(str, bmgpat[0], end - str));
1136*7c478bd9Sstevel@tonic-gate 	}
1137*7c478bd9Sstevel@tonic-gate 	for (; ; ) {
1138*7c478bd9Sstevel@tonic-gate 		/* inner loop, should be most optimized */
1139*7c478bd9Sstevel@tonic-gate 		while (k < end && (t = bmgtab[(unsigned char)*k]) != 0) {
1140*7c478bd9Sstevel@tonic-gate 			k += t;
1141*7c478bd9Sstevel@tonic-gate 		}
1142*7c478bd9Sstevel@tonic-gate 		if (k >= end) {
1143*7c478bd9Sstevel@tonic-gate 			return (NULL);
1144*7c478bd9Sstevel@tonic-gate 		}
1145*7c478bd9Sstevel@tonic-gate 		for (s = k, p = bmgpat + bmglen - 1; *--s == *--p; ) {
1146*7c478bd9Sstevel@tonic-gate 			if (p == bmgpat) {
1147*7c478bd9Sstevel@tonic-gate 				return (s);
1148*7c478bd9Sstevel@tonic-gate 			}
1149*7c478bd9Sstevel@tonic-gate 		}
1150*7c478bd9Sstevel@tonic-gate 		k++;
1151*7c478bd9Sstevel@tonic-gate 	}
1152*7c478bd9Sstevel@tonic-gate 	/* NOTREACHED */
1153*7c478bd9Sstevel@tonic-gate }
1154