xref: /illumos-gate/usr/src/cmd/grep/grep.c (revision f498645a3eecf2ddd304b4ea9c7f1b4c155ff79e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 /*	Copyright (c) 1987, 1988 Microsoft Corporation	*/
31 /*	  All Rights Reserved	*/
32 
33 #pragma ident	"%Z%%M%	%I%	%E% SMI"
34 
35 /*
36  * grep -- print lines matching (or not matching) a pattern
37  *
38  *	status returns:
39  *		0 - ok, and some matches
40  *		1 - ok, but no matches
41  *		2 - some error
42  */
43 
44 #include <sys/types.h>
45 
46 #include <ctype.h>
47 #include <fcntl.h>
48 #include <locale.h>
49 #include <memory.h>
50 #include <regexpr.h>
51 #include <stdio.h>
52 #include <stdlib.h>
53 #include <string.h>
54 #include <unistd.h>
55 
56 static const char *errstr[] = {
57 	"Range endpoint too large.",
58 	"Bad number.",
59 	"``\\digit'' out of range.",
60 	"No remembered search string.",
61 	"\\( \\) imbalance.",
62 	"Too many \\(.",
63 	"More than 2 numbers given in \\{ \\}.",
64 	"} expected after \\.",
65 	"First number exceeds second in \\{ \\}.",
66 	"[ ] imbalance.",
67 	"Regular expression overflow.",
68 	"Illegal byte sequence.",
69 	"Unknown regexp error code!!",
70 	NULL
71 };
72 
73 #define	errmsg(msg, arg)	(void) fprintf(stderr, gettext(msg), arg)
74 #define	BLKSIZE	512
75 #define	GBUFSIZ	8192
76 
77 static int	temp;
78 static long long	lnum;
79 static char	*linebuf;
80 static char	*prntbuf = NULL;
81 static long	fw_lPrntBufLen = 0;
82 static int	nflag;
83 static int	bflag;
84 static int	lflag;
85 static int	cflag;
86 static int	vflag;
87 static int	sflag;
88 static int	iflag;
89 static int	wflag;
90 static int	hflag;
91 static int	errflg;
92 static int	nfile;
93 static long long	tln;
94 static int	nsucc;
95 static int	nlflag;
96 static char	*ptr, *ptrend;
97 static char	*expbuf;
98 
99 static void	execute(char *);
100 static void	regerr(int);
101 static int	succeed(char *);
102 
103 int
104 main(int argc, char **argv)
105 {
106 	int	c;
107 	char	*arg;
108 	extern int	optind;
109 
110 	(void) setlocale(LC_ALL, "");
111 #if !defined(TEXT_DOMAIN)	/* Should be defined by cc -D */
112 #define	TEXT_DOMAIN "SYS_TEST"	/* Use this only if it weren't */
113 #endif
114 	(void) textdomain(TEXT_DOMAIN);
115 
116 	while ((c = getopt(argc, argv, "hblcnsviyw")) != -1)
117 		switch (c) {
118 		case 'h':
119 			hflag++;
120 			break;
121 		case 'v':
122 			vflag++;
123 			break;
124 		case 'c':
125 			cflag++;
126 			break;
127 		case 'n':
128 			nflag++;
129 			break;
130 		case 'b':
131 			bflag++;
132 			break;
133 		case 's':
134 			sflag++;
135 			break;
136 		case 'l':
137 			lflag++;
138 			break;
139 		case 'y':
140 		case 'i':
141 			iflag++;
142 			break;
143 		case 'w':
144 			wflag++;
145 			break;
146 		case '?':
147 			errflg++;
148 		}
149 
150 	if (errflg || (optind >= argc)) {
151 		errmsg("Usage: grep -hblcnsviw pattern file . . .\n",
152 		    (char *)NULL);
153 		exit(2);
154 	}
155 
156 	argv = &argv[optind];
157 	argc -= optind;
158 	nfile = argc - 1;
159 
160 	if (strrchr(*argv, '\n') != NULL)
161 		regerr(41);
162 
163 	if (iflag) {
164 		for (arg = *argv; *arg != NULL; ++arg)
165 			*arg = (char)tolower((int)((unsigned char)*arg));
166 	}
167 
168 	if (wflag) {
169 		unsigned int	wordlen;
170 		char		*wordbuf;
171 
172 		wordlen = strlen(*argv) + 5; /* '\\' '<' *argv '\\' '>' '\0' */
173 		if ((wordbuf = malloc(wordlen)) == NULL) {
174 			errmsg("grep: Out of memory for word\n", (char *)NULL);
175 			exit(2);
176 		}
177 
178 		(void) strcpy(wordbuf, "\\<");
179 		(void) strcat(wordbuf, *argv);
180 		(void) strcat(wordbuf, "\\>");
181 		*argv = wordbuf;
182 	}
183 
184 	expbuf = compile(*argv, (char *)0, (char *)0);
185 	if (regerrno)
186 		regerr(regerrno);
187 
188 	if (--argc == 0)
189 		execute(NULL);
190 	else
191 		while (argc-- > 0)
192 			execute(*++argv);
193 
194 	return (nsucc == 2 ? 2 : (nsucc == 0 ? 1 : 0));
195 }
196 
197 static void
198 execute(char *file)
199 {
200 	char	*lbuf, *p;
201 	long	count;
202 	long	offset = 0;
203 	char	*next_ptr = NULL;
204 	long	next_count = 0;
205 
206 	tln = 0;
207 
208 	if (prntbuf == NULL) {
209 		fw_lPrntBufLen = GBUFSIZ + 1;
210 		if ((prntbuf = malloc(fw_lPrntBufLen)) == NULL) {
211 			exit(2); /* out of memory - BAIL */
212 		}
213 		if ((linebuf = malloc(fw_lPrntBufLen)) == NULL) {
214 			exit(2); /* out of memory - BAIL */
215 		}
216 	}
217 
218 	if (file == NULL)
219 		temp = 0;
220 	else if ((temp = open(file, O_RDONLY)) == -1) {
221 		if (!sflag)
222 			errmsg("grep: can't open %s\n", file);
223 		nsucc = 2;
224 		return;
225 	}
226 
227 	/* read in first block of bytes */
228 	if ((count = read(temp, prntbuf, GBUFSIZ)) <= 0) {
229 		(void) close(temp);
230 
231 		if (cflag) {
232 			if (nfile > 1 && !hflag && file)
233 				(void) fprintf(stdout, "%s:", file);
234 			(void) fprintf(stdout, "%lld\n", tln);
235 		}
236 		return;
237 	}
238 
239 	lnum = 0;
240 	ptr = prntbuf;
241 	for (;;) {
242 		/* look for next newline */
243 		if ((ptrend = memchr(ptr + offset, '\n', count)) == NULL) {
244 			offset += count;
245 
246 			/*
247 			 * shift unused data to the beginning of the buffer
248 			 */
249 			if (ptr > prntbuf) {
250 				(void) memmove(prntbuf, ptr, offset);
251 				ptr = prntbuf;
252 			}
253 
254 			/*
255 			 * re-allocate a larger buffer if this one is full
256 			 */
257 			if (offset + GBUFSIZ > fw_lPrntBufLen) {
258 				/*
259 				 * allocate a new buffer and preserve the
260 				 * contents...
261 				 */
262 				fw_lPrntBufLen += GBUFSIZ;
263 				if ((prntbuf = realloc(prntbuf,
264 				    fw_lPrntBufLen)) == NULL)
265 					exit(2);
266 
267 				/*
268 				 * set up a bigger linebuffer (this is only used
269 				 * for case insensitive operations). Contents do
270 				 * not have to be preserved.
271 				 */
272 				free(linebuf);
273 				if ((linebuf = malloc(fw_lPrntBufLen)) == NULL)
274 					exit(2);
275 
276 				ptr = prntbuf;
277 			}
278 
279 			p = prntbuf + offset;
280 			if ((count = read(temp, p, GBUFSIZ)) > 0)
281 				continue;
282 
283 			if (offset == 0)
284 				/* end of file already reached */
285 				break;
286 
287 			/* last line of file has no newline */
288 			ptrend = ptr + offset;
289 			nlflag = 0;
290 		} else {
291 			next_ptr = ptrend + 1;
292 			next_count = offset + count - (next_ptr - ptr);
293 			nlflag = 1;
294 		}
295 		lnum++;
296 		*ptrend = '\0';
297 
298 		if (iflag) {
299 			/*
300 			 * Make a lower case copy of the record
301 			 */
302 			p = ptr;
303 			for (lbuf = linebuf; p < ptrend; )
304 				*lbuf++ = (char)tolower((int)
305 				    (unsigned char)*p++);
306 			*lbuf = '\0';
307 			lbuf = linebuf;
308 		} else
309 			/*
310 			 * Use record as is
311 			 */
312 			lbuf = ptr;
313 
314 		/* lflag only once */
315 		if ((step(lbuf, expbuf) ^ vflag) && succeed(file) == 1)
316 			break;
317 
318 		if (!nlflag)
319 			break;
320 
321 		ptr = next_ptr;
322 		count = next_count;
323 		offset = 0;
324 	}
325 	(void) close(temp);
326 
327 	if (cflag) {
328 		if (nfile > 1 && !hflag && file)
329 			(void) fprintf(stdout, "%s:", file);
330 		(void) fprintf(stdout, "%lld\n", tln);
331 	}
332 }
333 
334 static int
335 succeed(char *f)
336 {
337 	int nchars;
338 	nsucc = (nsucc == 2) ? 2 : 1;
339 
340 	if (f == NULL)
341 		f = "<stdin>";
342 
343 	if (cflag) {
344 		tln++;
345 		return (0);
346 	}
347 
348 	if (lflag) {
349 		(void) fprintf(stdout, "%s\n", f);
350 		return (1);
351 	}
352 
353 	if (nfile > 1 && !hflag)
354 		/* print filename */
355 		(void) fprintf(stdout, "%s:", f);
356 
357 	if (bflag)
358 		/* print block number */
359 		(void) fprintf(stdout, "%lld:", (offset_t)
360 		    ((lseek(temp, (off_t)0, SEEK_CUR) - 1) / BLKSIZE));
361 
362 	if (nflag)
363 		/* print line number */
364 		(void) fprintf(stdout, "%lld:", lnum);
365 
366 	if (nlflag) {
367 		/* newline at end of line */
368 		*ptrend = '\n';
369 		nchars = ptrend - ptr + 1;
370 	} else {
371 		/* don't write sentinel \0 */
372 		nchars = ptrend - ptr;
373 	}
374 
375 	(void) fwrite(ptr, 1, nchars, stdout);
376 	return (0);
377 }
378 
379 static void
380 regerr(int err)
381 {
382 	errmsg("grep: RE error %d: ", err);
383 	switch (err) {
384 		case 11:
385 			err = 0;
386 			break;
387 		case 16:
388 			err = 1;
389 			break;
390 		case 25:
391 			err = 2;
392 			break;
393 		case 41:
394 			err = 3;
395 			break;
396 		case 42:
397 			err = 4;
398 			break;
399 		case 43:
400 			err = 5;
401 			break;
402 		case 44:
403 			err = 6;
404 			break;
405 		case 45:
406 			err = 7;
407 			break;
408 		case 46:
409 			err = 8;
410 			break;
411 		case 49:
412 			err = 9;
413 			break;
414 		case 50:
415 			err = 10;
416 			break;
417 		case 67:
418 			err = 11;
419 			break;
420 		default:
421 			err = 12;
422 			break;
423 	}
424 
425 	errmsg("%s\n", gettext(errstr[err]));
426 	exit(2);
427 }
428