xref: /illumos-gate/usr/src/cmd/grep/grep.c (revision edb348833aaacfa1176e502ad38875fd0b2717ab)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 /*	Copyright (c) 1987, 1988 Microsoft Corporation	*/
31 /*	  All Rights Reserved	*/
32 
33 /* Copyright 2012 Nexenta Systems, Inc.  All rights reserved. */
34 
35 /*
36  * grep -- print lines matching (or not matching) a pattern
37  *
38  *	status returns:
39  *		0 - ok, and some matches
40  *		1 - ok, but no matches
41  *		2 - some error
42  */
43 
44 #include <sys/types.h>
45 
46 #include <ctype.h>
47 #include <fcntl.h>
48 #include <locale.h>
49 #include <memory.h>
50 #include <regexpr.h>
51 #include <stdio.h>
52 #include <stdlib.h>
53 #include <string.h>
54 #include <unistd.h>
55 
56 static const char *errstr[] = {
57 	"Range endpoint too large.",
58 	"Bad number.",
59 	"``\\digit'' out of range.",
60 	"No remembered search string.",
61 	"\\( \\) imbalance.",
62 	"Too many \\(.",
63 	"More than 2 numbers given in \\{ \\}.",
64 	"} expected after \\.",
65 	"First number exceeds second in \\{ \\}.",
66 	"[ ] imbalance.",
67 	"Regular expression overflow.",
68 	"Illegal byte sequence.",
69 	"Unknown regexp error code!!",
70 	NULL
71 };
72 
73 #define	errmsg(msg, arg)	(void) fprintf(stderr, gettext(msg), arg)
74 #define	BLKSIZE	512
75 #define	GBUFSIZ	8192
76 
77 static int	temp;
78 static long long	lnum;
79 static char	*linebuf;
80 static char	*prntbuf = NULL;
81 static long	fw_lPrntBufLen = 0;
82 static int	nflag;
83 static int	bflag;
84 static int	lflag;
85 static int	cflag;
86 static int	vflag;
87 static int	sflag;
88 static int	iflag;
89 static int	wflag;
90 static int	hflag;
91 static int	qflag;
92 static int	errflg;
93 static int	nfile;
94 static long long	tln;
95 static int	nsucc;
96 static int	nlflag;
97 static char	*ptr, *ptrend;
98 static char	*expbuf;
99 
100 static void	execute(char *);
101 static void	regerr(int);
102 static int	succeed(char *);
103 
104 int
105 main(int argc, char **argv)
106 {
107 	int	c;
108 	char	*arg;
109 	extern int	optind;
110 
111 	(void) setlocale(LC_ALL, "");
112 #if !defined(TEXT_DOMAIN)	/* Should be defined by cc -D */
113 #define	TEXT_DOMAIN "SYS_TEST"	/* Use this only if it weren't */
114 #endif
115 	(void) textdomain(TEXT_DOMAIN);
116 
117 	while ((c = getopt(argc, argv, "hqblcnsviyw")) != -1)
118 		switch (c) {
119 		case 'h':
120 			hflag++;
121 			break;
122 		case 'q':	/* POSIX: quiet: status only */
123 			qflag++;
124 			break;
125 		case 'v':
126 			vflag++;
127 			break;
128 		case 'c':
129 			cflag++;
130 			break;
131 		case 'n':
132 			nflag++;
133 			break;
134 		case 'b':
135 			bflag++;
136 			break;
137 		case 's':
138 			sflag++;
139 			break;
140 		case 'l':
141 			lflag++;
142 			break;
143 		case 'y':
144 		case 'i':
145 			iflag++;
146 			break;
147 		case 'w':
148 			wflag++;
149 			break;
150 		case '?':
151 			errflg++;
152 		}
153 
154 	if (errflg || (optind >= argc)) {
155 		errmsg("Usage: grep [-c|-l|-q] -hbnsviw pattern file . . .\n",
156 		    (char *)NULL);
157 		exit(2);
158 	}
159 
160 	argv = &argv[optind];
161 	argc -= optind;
162 	nfile = argc - 1;
163 
164 	if (strrchr(*argv, '\n') != NULL)
165 		regerr(41);
166 
167 	if (iflag) {
168 		for (arg = *argv; *arg != NULL; ++arg)
169 			*arg = (char)tolower((int)((unsigned char)*arg));
170 	}
171 
172 	if (wflag) {
173 		unsigned int	wordlen;
174 		char		*wordbuf;
175 
176 		wordlen = strlen(*argv) + 5; /* '\\' '<' *argv '\\' '>' '\0' */
177 		if ((wordbuf = malloc(wordlen)) == NULL) {
178 			errmsg("grep: Out of memory for word\n", (char *)NULL);
179 			exit(2);
180 		}
181 
182 		(void) strcpy(wordbuf, "\\<");
183 		(void) strcat(wordbuf, *argv);
184 		(void) strcat(wordbuf, "\\>");
185 		*argv = wordbuf;
186 	}
187 
188 	expbuf = compile(*argv, (char *)0, (char *)0);
189 	if (regerrno)
190 		regerr(regerrno);
191 
192 	if (--argc == 0)
193 		execute(NULL);
194 	else
195 		while (argc-- > 0)
196 			execute(*++argv);
197 
198 	return (nsucc == 2 ? 2 : (nsucc == 0 ? 1 : 0));
199 }
200 
201 static void
202 execute(char *file)
203 {
204 	char	*lbuf, *p;
205 	long	count;
206 	long	offset = 0;
207 	char	*next_ptr = NULL;
208 	long	next_count = 0;
209 
210 	tln = 0;
211 
212 	if (prntbuf == NULL) {
213 		fw_lPrntBufLen = GBUFSIZ + 1;
214 		if ((prntbuf = malloc(fw_lPrntBufLen)) == NULL) {
215 			exit(2); /* out of memory - BAIL */
216 		}
217 		if ((linebuf = malloc(fw_lPrntBufLen)) == NULL) {
218 			exit(2); /* out of memory - BAIL */
219 		}
220 	}
221 
222 	if (file == NULL)
223 		temp = 0;
224 	else if ((temp = open(file, O_RDONLY)) == -1) {
225 		if (!sflag)
226 			errmsg("grep: can't open %s\n", file);
227 		nsucc = 2;
228 		return;
229 	}
230 
231 	/* read in first block of bytes */
232 	if ((count = read(temp, prntbuf, GBUFSIZ)) <= 0) {
233 		(void) close(temp);
234 
235 		if (cflag && !qflag) {
236 			if (nfile > 1 && !hflag && file)
237 				(void) fprintf(stdout, "%s:", file);
238 			(void) fprintf(stdout, "%lld\n", tln);
239 		}
240 		return;
241 	}
242 
243 	lnum = 0;
244 	ptr = prntbuf;
245 	for (;;) {
246 		/* look for next newline */
247 		if ((ptrend = memchr(ptr + offset, '\n', count)) == NULL) {
248 			offset += count;
249 
250 			/*
251 			 * shift unused data to the beginning of the buffer
252 			 */
253 			if (ptr > prntbuf) {
254 				(void) memmove(prntbuf, ptr, offset);
255 				ptr = prntbuf;
256 			}
257 
258 			/*
259 			 * re-allocate a larger buffer if this one is full
260 			 */
261 			if (offset + GBUFSIZ > fw_lPrntBufLen) {
262 				/*
263 				 * allocate a new buffer and preserve the
264 				 * contents...
265 				 */
266 				fw_lPrntBufLen += GBUFSIZ;
267 				if ((prntbuf = realloc(prntbuf,
268 				    fw_lPrntBufLen)) == NULL)
269 					exit(2);
270 
271 				/*
272 				 * set up a bigger linebuffer (this is only used
273 				 * for case insensitive operations). Contents do
274 				 * not have to be preserved.
275 				 */
276 				free(linebuf);
277 				if ((linebuf = malloc(fw_lPrntBufLen)) == NULL)
278 					exit(2);
279 
280 				ptr = prntbuf;
281 			}
282 
283 			p = prntbuf + offset;
284 			if ((count = read(temp, p, GBUFSIZ)) > 0)
285 				continue;
286 
287 			if (offset == 0)
288 				/* end of file already reached */
289 				break;
290 
291 			/* last line of file has no newline */
292 			ptrend = ptr + offset;
293 			nlflag = 0;
294 		} else {
295 			next_ptr = ptrend + 1;
296 			next_count = offset + count - (next_ptr - ptr);
297 			nlflag = 1;
298 		}
299 		lnum++;
300 		*ptrend = '\0';
301 
302 		if (iflag) {
303 			/*
304 			 * Make a lower case copy of the record
305 			 */
306 			p = ptr;
307 			for (lbuf = linebuf; p < ptrend; )
308 				*lbuf++ = (char)tolower((int)
309 				    (unsigned char)*p++);
310 			*lbuf = '\0';
311 			lbuf = linebuf;
312 		} else
313 			/*
314 			 * Use record as is
315 			 */
316 			lbuf = ptr;
317 
318 		/* lflag only once */
319 		if ((step(lbuf, expbuf) ^ vflag) && succeed(file) == 1)
320 			break;
321 
322 		if (!nlflag)
323 			break;
324 
325 		ptr = next_ptr;
326 		count = next_count;
327 		offset = 0;
328 	}
329 	(void) close(temp);
330 
331 	if (cflag && !qflag) {
332 		if (nfile > 1 && !hflag && file)
333 			(void) fprintf(stdout, "%s:", file);
334 		(void) fprintf(stdout, "%lld\n", tln);
335 	}
336 }
337 
338 static int
339 succeed(char *f)
340 {
341 	int nchars;
342 	nsucc = (nsucc == 2) ? 2 : 1;
343 
344 	if (f == NULL)
345 		f = "<stdin>";
346 
347 	if (qflag) {
348 		/* no need to continue */
349 		return (1);
350 	}
351 
352 	if (cflag) {
353 		tln++;
354 		return (0);
355 	}
356 
357 	if (lflag) {
358 		(void) fprintf(stdout, "%s\n", f);
359 		return (1);
360 	}
361 
362 	if (nfile > 1 && !hflag)
363 		/* print filename */
364 		(void) fprintf(stdout, "%s:", f);
365 
366 	if (bflag)
367 		/* print block number */
368 		(void) fprintf(stdout, "%lld:", (offset_t)
369 		    ((lseek(temp, (off_t)0, SEEK_CUR) - 1) / BLKSIZE));
370 
371 	if (nflag)
372 		/* print line number */
373 		(void) fprintf(stdout, "%lld:", lnum);
374 
375 	if (nlflag) {
376 		/* newline at end of line */
377 		*ptrend = '\n';
378 		nchars = ptrend - ptr + 1;
379 	} else {
380 		/* don't write sentinel \0 */
381 		nchars = ptrend - ptr;
382 	}
383 
384 	(void) fwrite(ptr, 1, nchars, stdout);
385 	return (0);
386 }
387 
388 static void
389 regerr(int err)
390 {
391 	errmsg("grep: RE error %d: ", err);
392 	switch (err) {
393 		case 11:
394 			err = 0;
395 			break;
396 		case 16:
397 			err = 1;
398 			break;
399 		case 25:
400 			err = 2;
401 			break;
402 		case 41:
403 			err = 3;
404 			break;
405 		case 42:
406 			err = 4;
407 			break;
408 		case 43:
409 			err = 5;
410 			break;
411 		case 44:
412 			err = 6;
413 			break;
414 		case 45:
415 			err = 7;
416 			break;
417 		case 46:
418 			err = 8;
419 			break;
420 		case 49:
421 			err = 9;
422 			break;
423 		case 50:
424 			err = 10;
425 			break;
426 		case 67:
427 			err = 11;
428 			break;
429 		default:
430 			err = 12;
431 			break;
432 	}
433 
434 	errmsg("%s\n", gettext(errstr[err]));
435 	exit(2);
436 }
437