xref: /illumos-gate/usr/src/cmd/grep/grep.c (revision 7e0955bbb1c326d78038afe0d108c8ae4934a78a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 /*	Copyright (c) 1987, 1988 Microsoft Corporation	*/
31 /*	  All Rights Reserved	*/
32 
33 /* Copyright 2012 Nexenta Systems, Inc.  All rights reserved. */
34 
35 /*
36  * grep -- print lines matching (or not matching) a pattern
37  *
38  *	status returns:
39  *		0 - ok, and some matches
40  *		1 - ok, but no matches
41  *		2 - some error
42  */
43 
44 #include <sys/types.h>
45 
46 #include <ctype.h>
47 #include <fcntl.h>
48 #include <locale.h>
49 #include <memory.h>
50 #include <regexpr.h>
51 #include <stdio.h>
52 #include <stdlib.h>
53 #include <string.h>
54 #include <unistd.h>
55 #include <ftw.h>
56 #include <limits.h>
57 #include <sys/param.h>
58 
59 static const char *errstr[] = {
60 	"Range endpoint too large.",
61 	"Bad number.",
62 	"``\\digit'' out of range.",
63 	"No remembered search string.",
64 	"\\( \\) imbalance.",
65 	"Too many \\(.",
66 	"More than 2 numbers given in \\{ \\}.",
67 	"} expected after \\.",
68 	"First number exceeds second in \\{ \\}.",
69 	"[ ] imbalance.",
70 	"Regular expression overflow.",
71 	"Illegal byte sequence.",
72 	"Unknown regexp error code!!",
73 	NULL
74 };
75 
76 #define	errmsg(msg, arg)	(void) fprintf(stderr, gettext(msg), arg)
77 #define	BLKSIZE	512
78 #define	GBUFSIZ	8192
79 #define	MAX_DEPTH	1000
80 
81 static int	temp;
82 static long long	lnum;
83 static char	*linebuf;
84 static char	*prntbuf = NULL;
85 static long	fw_lPrntBufLen = 0;
86 static int	nflag;
87 static int	bflag;
88 static int	lflag;
89 static int	cflag;
90 static int	rflag;
91 static int	Rflag;
92 static int	vflag;
93 static int	sflag;
94 static int	iflag;
95 static int	wflag;
96 static int	hflag;
97 static int	qflag;
98 static int	errflg;
99 static int	nfile;
100 static long long	tln;
101 static int	nsucc;
102 static int	outfn = 0;
103 static int	nlflag;
104 static char	*ptr, *ptrend;
105 static char	*expbuf;
106 
107 static void	execute(const char *, int);
108 static void	regerr(int);
109 static void	prepare(const char *);
110 static int	recursive(const char *, const struct stat *, int, struct FTW *);
111 static int	succeed(const char *);
112 
113 int
114 main(int argc, char **argv)
115 {
116 	int	c;
117 	char	*arg;
118 	extern int	optind;
119 
120 	(void) setlocale(LC_ALL, "");
121 #if !defined(TEXT_DOMAIN)	/* Should be defined by cc -D */
122 #define	TEXT_DOMAIN "SYS_TEST"	/* Use this only if it weren't */
123 #endif
124 	(void) textdomain(TEXT_DOMAIN);
125 
126 	while ((c = getopt(argc, argv, "hqblcnRrsviyw")) != -1)
127 		switch (c) {
128 		case 'h':
129 			hflag++;
130 			break;
131 		case 'q':	/* POSIX: quiet: status only */
132 			qflag++;
133 			break;
134 		case 'v':
135 			vflag++;
136 			break;
137 		case 'c':
138 			cflag++;
139 			break;
140 		case 'n':
141 			nflag++;
142 			break;
143 		case 'R':
144 			Rflag++;
145 			/* FALLTHROUGH */
146 		case 'r':
147 			rflag++;
148 			break;
149 		case 'b':
150 			bflag++;
151 			break;
152 		case 's':
153 			sflag++;
154 			break;
155 		case 'l':
156 			lflag++;
157 			break;
158 		case 'y':
159 		case 'i':
160 			iflag++;
161 			break;
162 		case 'w':
163 			wflag++;
164 			break;
165 		case '?':
166 			errflg++;
167 		}
168 
169 	if (errflg || (optind >= argc)) {
170 		errmsg("Usage: grep [-c|-l|-q] [-r|-R] -hbnsviw "
171 		    "pattern file . . .\n",
172 		    (char *)NULL);
173 		exit(2);
174 	}
175 
176 	argv = &argv[optind];
177 	argc -= optind;
178 	nfile = argc - 1;
179 
180 	if (strrchr(*argv, '\n') != NULL)
181 		regerr(41);
182 
183 	if (iflag) {
184 		for (arg = *argv; *arg != NULL; ++arg)
185 			*arg = (char)tolower((int)((unsigned char)*arg));
186 	}
187 
188 	if (wflag) {
189 		unsigned int	wordlen;
190 		char		*wordbuf;
191 
192 		wordlen = strlen(*argv) + 5; /* '\\' '<' *argv '\\' '>' '\0' */
193 		if ((wordbuf = malloc(wordlen)) == NULL) {
194 			errmsg("grep: Out of memory for word\n", (char *)NULL);
195 			exit(2);
196 		}
197 
198 		(void) strcpy(wordbuf, "\\<");
199 		(void) strcat(wordbuf, *argv);
200 		(void) strcat(wordbuf, "\\>");
201 		*argv = wordbuf;
202 	}
203 
204 	expbuf = compile(*argv, (char *)0, (char *)0);
205 	if (regerrno)
206 		regerr(regerrno);
207 
208 	if (--argc == 0)
209 		execute(NULL, 0);
210 	else
211 		while (argc-- > 0)
212 			prepare(*++argv);
213 
214 	return (nsucc == 2 ? 2 : (nsucc == 0 ? 1 : 0));
215 }
216 
217 static void
218 prepare(const char *path)
219 {
220 	struct	stat st;
221 	int	walkflags = FTW_CHDIR;
222 	char	*buf = NULL;
223 
224 	if (rflag) {
225 		if (stat(path, &st) != -1 &&
226 		    (st.st_mode & S_IFMT) == S_IFDIR) {
227 			outfn = 1;
228 
229 			/*
230 			 * Add trailing slash if arg
231 			 * is directory, to resolve symlinks.
232 			 */
233 			if (path[strlen(path) - 1] != '/') {
234 				(void) asprintf(&buf, "%s/", path);
235 				if (buf != NULL)
236 					path = buf;
237 			}
238 
239 			/*
240 			 * Search through subdirs if path is directory.
241 			 * Don't follow symlinks if Rflag is not set.
242 			 */
243 			if (!Rflag)
244 				walkflags |= FTW_PHYS;
245 
246 			if (nftw(path, recursive, MAX_DEPTH, walkflags) != 0) {
247 				if (!sflag)
248 					errmsg("grep: can't open %s\n", path);
249 				nsucc = 2;
250 			}
251 			return;
252 		}
253 	}
254 	execute(path, 0);
255 }
256 
257 static int
258 recursive(const char *name, const struct stat *statp, int info, struct FTW *ftw)
259 {
260 	/*
261 	 * process files and follow symlinks if Rflag set.
262 	 */
263 	if (info != FTW_F) {
264 		if (!sflag &&
265 		    (info == FTW_SLN || info == FTW_DNR || info == FTW_NS)) {
266 			/* report broken symlinks and unreadable files */
267 			errmsg("grep: can't open %s\n", name);
268 		}
269 		return (0);
270 	}
271 
272 	/* skip devices and pipes if Rflag is not set */
273 	if (!Rflag && !S_ISREG(statp->st_mode))
274 		return (0);
275 
276 	/* pass offset to relative name from FTW_CHDIR */
277 	execute(name, ftw->base);
278 	return (0);
279 }
280 
281 static void
282 execute(const char *file, int base)
283 {
284 	char	*lbuf, *p;
285 	long	count;
286 	long	offset = 0;
287 	char	*next_ptr = NULL;
288 	long	next_count = 0;
289 
290 	tln = 0;
291 
292 	if (prntbuf == NULL) {
293 		fw_lPrntBufLen = GBUFSIZ + 1;
294 		if ((prntbuf = malloc(fw_lPrntBufLen)) == NULL) {
295 			exit(2); /* out of memory - BAIL */
296 		}
297 		if ((linebuf = malloc(fw_lPrntBufLen)) == NULL) {
298 			exit(2); /* out of memory - BAIL */
299 		}
300 	}
301 
302 	if (file == NULL)
303 		temp = 0;
304 	else if ((temp = open(file + base, O_RDONLY)) == -1) {
305 		if (!sflag)
306 			errmsg("grep: can't open %s\n", file);
307 		nsucc = 2;
308 		return;
309 	}
310 
311 	/* read in first block of bytes */
312 	if ((count = read(temp, prntbuf, GBUFSIZ)) <= 0) {
313 		(void) close(temp);
314 
315 		if (cflag && !qflag) {
316 			if (nfile > 1 && !hflag && file)
317 				(void) fprintf(stdout, "%s:", file);
318 			if (!rflag)
319 			(void) fprintf(stdout, "%lld\n", tln);
320 		}
321 		return;
322 	}
323 
324 	lnum = 0;
325 	ptr = prntbuf;
326 	for (;;) {
327 		/* look for next newline */
328 		if ((ptrend = memchr(ptr + offset, '\n', count)) == NULL) {
329 			offset += count;
330 
331 			/*
332 			 * shift unused data to the beginning of the buffer
333 			 */
334 			if (ptr > prntbuf) {
335 				(void) memmove(prntbuf, ptr, offset);
336 				ptr = prntbuf;
337 			}
338 
339 			/*
340 			 * re-allocate a larger buffer if this one is full
341 			 */
342 			if (offset + GBUFSIZ > fw_lPrntBufLen) {
343 				/*
344 				 * allocate a new buffer and preserve the
345 				 * contents...
346 				 */
347 				fw_lPrntBufLen += GBUFSIZ;
348 				if ((prntbuf = realloc(prntbuf,
349 				    fw_lPrntBufLen)) == NULL)
350 					exit(2);
351 
352 				/*
353 				 * set up a bigger linebuffer (this is only used
354 				 * for case insensitive operations). Contents do
355 				 * not have to be preserved.
356 				 */
357 				free(linebuf);
358 				if ((linebuf = malloc(fw_lPrntBufLen)) == NULL)
359 					exit(2);
360 
361 				ptr = prntbuf;
362 			}
363 
364 			p = prntbuf + offset;
365 			if ((count = read(temp, p, GBUFSIZ)) > 0)
366 				continue;
367 
368 			if (offset == 0)
369 				/* end of file already reached */
370 				break;
371 
372 			/* last line of file has no newline */
373 			ptrend = ptr + offset;
374 			nlflag = 0;
375 		} else {
376 			next_ptr = ptrend + 1;
377 			next_count = offset + count - (next_ptr - ptr);
378 			nlflag = 1;
379 		}
380 		lnum++;
381 		*ptrend = '\0';
382 
383 		if (iflag) {
384 			/*
385 			 * Make a lower case copy of the record
386 			 */
387 			p = ptr;
388 			for (lbuf = linebuf; p < ptrend; )
389 				*lbuf++ = (char)tolower((int)
390 				    (unsigned char)*p++);
391 			*lbuf = '\0';
392 			lbuf = linebuf;
393 		} else
394 			/*
395 			 * Use record as is
396 			 */
397 			lbuf = ptr;
398 
399 		/* lflag only once */
400 		if ((step(lbuf, expbuf) ^ vflag) && succeed(file) == 1)
401 			break;
402 
403 		if (!nlflag)
404 			break;
405 
406 		ptr = next_ptr;
407 		count = next_count;
408 		offset = 0;
409 	}
410 	(void) close(temp);
411 
412 	if (cflag && !qflag) {
413 		if (!hflag && file && (nfile > 1 ||
414 		    (rflag && outfn)))
415 			(void) fprintf(stdout, "%s:", file);
416 		(void) fprintf(stdout, "%lld\n", tln);
417 	}
418 }
419 
420 static int
421 succeed(const char *f)
422 {
423 	int nchars;
424 	nsucc = (nsucc == 2) ? 2 : 1;
425 
426 	if (f == NULL)
427 		f = "<stdin>";
428 
429 	if (qflag) {
430 		/* no need to continue */
431 		return (1);
432 	}
433 
434 	if (cflag) {
435 		tln++;
436 		return (0);
437 	}
438 
439 	if (lflag) {
440 		(void) fprintf(stdout, "%s\n", f);
441 		return (1);
442 	}
443 
444 	if (!hflag && (nfile > 1 || (rflag && outfn))) {
445 		/* print filename */
446 		(void) fprintf(stdout, "%s:", f);
447 	}
448 
449 	if (bflag)
450 		/* print block number */
451 		(void) fprintf(stdout, "%lld:", (offset_t)
452 		    ((lseek(temp, (off_t)0, SEEK_CUR) - 1) / BLKSIZE));
453 
454 	if (nflag)
455 		/* print line number */
456 		(void) fprintf(stdout, "%lld:", lnum);
457 
458 	if (nlflag) {
459 		/* newline at end of line */
460 		*ptrend = '\n';
461 		nchars = ptrend - ptr + 1;
462 	} else {
463 		/* don't write sentinel \0 */
464 		nchars = ptrend - ptr;
465 	}
466 
467 	(void) fwrite(ptr, 1, nchars, stdout);
468 	return (0);
469 }
470 
471 static void
472 regerr(int err)
473 {
474 	errmsg("grep: RE error %d: ", err);
475 	switch (err) {
476 		case 11:
477 			err = 0;
478 			break;
479 		case 16:
480 			err = 1;
481 			break;
482 		case 25:
483 			err = 2;
484 			break;
485 		case 41:
486 			err = 3;
487 			break;
488 		case 42:
489 			err = 4;
490 			break;
491 		case 43:
492 			err = 5;
493 			break;
494 		case 44:
495 			err = 6;
496 			break;
497 		case 45:
498 			err = 7;
499 			break;
500 		case 46:
501 			err = 8;
502 			break;
503 		case 49:
504 			err = 9;
505 			break;
506 		case 50:
507 			err = 10;
508 			break;
509 		case 67:
510 			err = 11;
511 			break;
512 		default:
513 			err = 12;
514 			break;
515 	}
516 
517 	errmsg("%s\n", gettext(errstr[err]));
518 	exit(2);
519 }
520