xref: /illumos-gate/usr/src/cmd/grep/grep.c (revision 52244c0958bdf281ca42932b449f644b4decfdc2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 /*	Copyright (c) 1987, 1988 Microsoft Corporation	*/
31 /*	  All Rights Reserved	*/
32 
33 /* Copyright 2012 Nexenta Systems, Inc.  All rights reserved. */
34 
35 /*
36  * Copyright 2013 Damian Bogel. All rights reserved.
37  */
38 
39 /*
40  * grep -- print lines matching (or not matching) a pattern
41  *
42  *	status returns:
43  *		0 - ok, and some matches
44  *		1 - ok, but no matches
45  *		2 - some error
46  */
47 
48 #include <sys/types.h>
49 
50 #include <ctype.h>
51 #include <fcntl.h>
52 #include <locale.h>
53 #include <memory.h>
54 #include <regexpr.h>
55 #include <stdio.h>
56 #include <stdlib.h>
57 #include <string.h>
58 #include <unistd.h>
59 #include <ftw.h>
60 #include <limits.h>
61 #include <sys/param.h>
62 
63 static const char *errstr[] = {
64 	"Range endpoint too large.",
65 	"Bad number.",
66 	"``\\digit'' out of range.",
67 	"No remembered search string.",
68 	"\\( \\) imbalance.",
69 	"Too many \\(.",
70 	"More than 2 numbers given in \\{ \\}.",
71 	"} expected after \\.",
72 	"First number exceeds second in \\{ \\}.",
73 	"[ ] imbalance.",
74 	"Regular expression overflow.",
75 	"Illegal byte sequence.",
76 	"Unknown regexp error code!!",
77 	NULL
78 };
79 
80 #define	STDIN_FILENAME	gettext("(standard input)")
81 
82 #define	errmsg(msg, arg)	(void) fprintf(stderr, gettext(msg), arg)
83 #define	BLKSIZE	512
84 #define	GBUFSIZ	8192
85 #define	MAX_DEPTH	1000
86 
87 static int	temp;
88 static long long	lnum;
89 static char	*linebuf;
90 static char	*prntbuf = NULL;
91 static long	fw_lPrntBufLen = 0;
92 static int	nflag;
93 static int	bflag;
94 static int	lflag;
95 static int	cflag;
96 static int	rflag;
97 static int	Rflag;
98 static int	vflag;
99 static int	sflag;
100 static int	iflag;
101 static int	wflag;
102 static int	hflag;
103 static int 	Hflag;
104 static int	qflag;
105 static int	errflg;
106 static int	nfile;
107 static long long	tln;
108 static int	nsucc;
109 static int	outfn = 0;
110 static int	nlflag;
111 static char	*ptr, *ptrend;
112 static char	*expbuf;
113 
114 static void	execute(const char *, int);
115 static void	regerr(int);
116 static void	prepare(const char *);
117 static int	recursive(const char *, const struct stat *, int, struct FTW *);
118 static int	succeed(const char *);
119 
120 int
121 main(int argc, char **argv)
122 {
123 	int	c;
124 	char	*arg;
125 	extern int	optind;
126 
127 	(void) setlocale(LC_ALL, "");
128 #if !defined(TEXT_DOMAIN)	/* Should be defined by cc -D */
129 #define	TEXT_DOMAIN "SYS_TEST"	/* Use this only if it weren't */
130 #endif
131 	(void) textdomain(TEXT_DOMAIN);
132 
133 	while ((c = getopt(argc, argv, "hHqblcnRrsviyw")) != -1)
134 		switch (c) {
135 		/* based on options order h or H is set as in GNU grep */
136 		case 'h':
137 			hflag++;
138 			Hflag = 0; /* h excludes H */
139 			break;
140 		case 'H':
141 			if (!lflag) /* H is excluded by l */
142 				Hflag++;
143 			hflag = 0; /* H excludes h */
144 			break;
145 		case 'q':	/* POSIX: quiet: status only */
146 			qflag++;
147 			break;
148 		case 'v':
149 			vflag++;
150 			break;
151 		case 'c':
152 			cflag++;
153 			break;
154 		case 'n':
155 			nflag++;
156 			break;
157 		case 'R':
158 			Rflag++;
159 			/* FALLTHROUGH */
160 		case 'r':
161 			rflag++;
162 			break;
163 		case 'b':
164 			bflag++;
165 			break;
166 		case 's':
167 			sflag++;
168 			break;
169 		case 'l':
170 			lflag++;
171 			Hflag = 0; /* l excludes H */
172 			break;
173 		case 'y':
174 		case 'i':
175 			iflag++;
176 			break;
177 		case 'w':
178 			wflag++;
179 			break;
180 		case '?':
181 			errflg++;
182 		}
183 
184 	if (errflg || (optind >= argc)) {
185 		errmsg("Usage: grep [-c|-l|-q] [-r|-R] -hHbnsviw "
186 		    "pattern file . . .\n",
187 		    (char *)NULL);
188 		exit(2);
189 	}
190 
191 	argv = &argv[optind];
192 	argc -= optind;
193 	nfile = argc - 1;
194 
195 	if (strrchr(*argv, '\n') != NULL)
196 		regerr(41);
197 
198 	if (iflag) {
199 		for (arg = *argv; *arg != NULL; ++arg)
200 			*arg = (char)tolower((int)((unsigned char)*arg));
201 	}
202 
203 	if (wflag) {
204 		unsigned int	wordlen;
205 		char		*wordbuf;
206 
207 		wordlen = strlen(*argv) + 5; /* '\\' '<' *argv '\\' '>' '\0' */
208 		if ((wordbuf = malloc(wordlen)) == NULL) {
209 			errmsg("grep: Out of memory for word\n", (char *)NULL);
210 			exit(2);
211 		}
212 
213 		(void) strcpy(wordbuf, "\\<");
214 		(void) strcat(wordbuf, *argv);
215 		(void) strcat(wordbuf, "\\>");
216 		*argv = wordbuf;
217 	}
218 
219 	expbuf = compile(*argv, (char *)0, (char *)0);
220 	if (regerrno)
221 		regerr(regerrno);
222 
223 	if (--argc == 0)
224 		execute(NULL, 0);
225 	else
226 		while (argc-- > 0)
227 			prepare(*++argv);
228 
229 	return (nsucc == 2 ? 2 : (nsucc == 0 ? 1 : 0));
230 }
231 
232 static void
233 prepare(const char *path)
234 {
235 	struct	stat st;
236 	int	walkflags = FTW_CHDIR;
237 	char	*buf = NULL;
238 
239 	if (rflag) {
240 		if (stat(path, &st) != -1 &&
241 		    (st.st_mode & S_IFMT) == S_IFDIR) {
242 			outfn = 1;
243 
244 			/*
245 			 * Add trailing slash if arg
246 			 * is directory, to resolve symlinks.
247 			 */
248 			if (path[strlen(path) - 1] != '/') {
249 				(void) asprintf(&buf, "%s/", path);
250 				if (buf != NULL)
251 					path = buf;
252 			}
253 
254 			/*
255 			 * Search through subdirs if path is directory.
256 			 * Don't follow symlinks if Rflag is not set.
257 			 */
258 			if (!Rflag)
259 				walkflags |= FTW_PHYS;
260 
261 			if (nftw(path, recursive, MAX_DEPTH, walkflags) != 0) {
262 				if (!sflag)
263 					errmsg("grep: can't open %s\n", path);
264 				nsucc = 2;
265 			}
266 			return;
267 		}
268 	}
269 	execute(path, 0);
270 }
271 
272 static int
273 recursive(const char *name, const struct stat *statp, int info, struct FTW *ftw)
274 {
275 	/*
276 	 * process files and follow symlinks if Rflag set.
277 	 */
278 	if (info != FTW_F) {
279 		if (!sflag &&
280 		    (info == FTW_SLN || info == FTW_DNR || info == FTW_NS)) {
281 			/* report broken symlinks and unreadable files */
282 			errmsg("grep: can't open %s\n", name);
283 		}
284 		return (0);
285 	}
286 
287 	/* skip devices and pipes if Rflag is not set */
288 	if (!Rflag && !S_ISREG(statp->st_mode))
289 		return (0);
290 
291 	/* pass offset to relative name from FTW_CHDIR */
292 	execute(name, ftw->base);
293 	return (0);
294 }
295 
296 static void
297 execute(const char *file, int base)
298 {
299 	char	*lbuf, *p;
300 	long	count;
301 	long	offset = 0;
302 	char	*next_ptr = NULL;
303 	long	next_count = 0;
304 
305 	tln = 0;
306 
307 	if (prntbuf == NULL) {
308 		fw_lPrntBufLen = GBUFSIZ + 1;
309 		if ((prntbuf = malloc(fw_lPrntBufLen)) == NULL) {
310 			exit(2); /* out of memory - BAIL */
311 		}
312 		if ((linebuf = malloc(fw_lPrntBufLen)) == NULL) {
313 			exit(2); /* out of memory - BAIL */
314 		}
315 	}
316 
317 	if (file == NULL) {
318 		temp = 0;
319 		file = STDIN_FILENAME;
320 	} else if ((temp = open(file + base, O_RDONLY)) == -1) {
321 		if (!sflag)
322 			errmsg("grep: can't open %s\n", file);
323 		nsucc = 2;
324 		return;
325 	}
326 
327 	/* read in first block of bytes */
328 	if ((count = read(temp, prntbuf, GBUFSIZ)) <= 0) {
329 		(void) close(temp);
330 
331 		if (cflag && !qflag) {
332 			if (Hflag || (nfile > 1 && !hflag))
333 				(void) fprintf(stdout, "%s:", file);
334 			if (!rflag)
335 			(void) fprintf(stdout, "%lld\n", tln);
336 		}
337 		return;
338 	}
339 
340 	lnum = 0;
341 	ptr = prntbuf;
342 	for (;;) {
343 		/* look for next newline */
344 		if ((ptrend = memchr(ptr + offset, '\n', count)) == NULL) {
345 			offset += count;
346 
347 			/*
348 			 * shift unused data to the beginning of the buffer
349 			 */
350 			if (ptr > prntbuf) {
351 				(void) memmove(prntbuf, ptr, offset);
352 				ptr = prntbuf;
353 			}
354 
355 			/*
356 			 * re-allocate a larger buffer if this one is full
357 			 */
358 			if (offset + GBUFSIZ > fw_lPrntBufLen) {
359 				/*
360 				 * allocate a new buffer and preserve the
361 				 * contents...
362 				 */
363 				fw_lPrntBufLen += GBUFSIZ;
364 				if ((prntbuf = realloc(prntbuf,
365 				    fw_lPrntBufLen)) == NULL)
366 					exit(2);
367 
368 				/*
369 				 * set up a bigger linebuffer (this is only used
370 				 * for case insensitive operations). Contents do
371 				 * not have to be preserved.
372 				 */
373 				free(linebuf);
374 				if ((linebuf = malloc(fw_lPrntBufLen)) == NULL)
375 					exit(2);
376 
377 				ptr = prntbuf;
378 			}
379 
380 			p = prntbuf + offset;
381 			if ((count = read(temp, p, GBUFSIZ)) > 0)
382 				continue;
383 
384 			if (offset == 0)
385 				/* end of file already reached */
386 				break;
387 
388 			/* last line of file has no newline */
389 			ptrend = ptr + offset;
390 			nlflag = 0;
391 		} else {
392 			next_ptr = ptrend + 1;
393 			next_count = offset + count - (next_ptr - ptr);
394 			nlflag = 1;
395 		}
396 		lnum++;
397 		*ptrend = '\0';
398 
399 		if (iflag) {
400 			/*
401 			 * Make a lower case copy of the record
402 			 */
403 			p = ptr;
404 			for (lbuf = linebuf; p < ptrend; )
405 				*lbuf++ = (char)tolower((int)
406 				    (unsigned char)*p++);
407 			*lbuf = '\0';
408 			lbuf = linebuf;
409 		} else
410 			/*
411 			 * Use record as is
412 			 */
413 			lbuf = ptr;
414 
415 		/* lflag only once */
416 		if ((step(lbuf, expbuf) ^ vflag) && succeed(file) == 1)
417 			break;
418 
419 		if (!nlflag)
420 			break;
421 
422 		ptr = next_ptr;
423 		count = next_count;
424 		offset = 0;
425 	}
426 	(void) close(temp);
427 
428 	if (cflag && !qflag) {
429 		if (Hflag || (!hflag && ((nfile > 1) ||
430 		    (rflag && outfn))))
431 			(void) fprintf(stdout, "%s:", file);
432 		(void) fprintf(stdout, "%lld\n", tln);
433 	}
434 }
435 
436 static int
437 succeed(const char *f)
438 {
439 	int nchars;
440 	nsucc = (nsucc == 2) ? 2 : 1;
441 
442 	if (qflag) {
443 		/* no need to continue */
444 		return (1);
445 	}
446 
447 	if (cflag) {
448 		tln++;
449 		return (0);
450 	}
451 
452 	if (lflag) {
453 		(void) fprintf(stdout, "%s\n", f);
454 		return (1);
455 	}
456 
457 	if (Hflag || (!hflag && (nfile > 1 || (rflag && outfn)))) {
458 		/* print filename */
459 		(void) fprintf(stdout, "%s:", f);
460 	}
461 
462 	if (bflag)
463 		/* print block number */
464 		(void) fprintf(stdout, "%lld:", (offset_t)
465 		    ((lseek(temp, (off_t)0, SEEK_CUR) - 1) / BLKSIZE));
466 
467 	if (nflag)
468 		/* print line number */
469 		(void) fprintf(stdout, "%lld:", lnum);
470 
471 	if (nlflag) {
472 		/* newline at end of line */
473 		*ptrend = '\n';
474 		nchars = ptrend - ptr + 1;
475 	} else {
476 		/* don't write sentinel \0 */
477 		nchars = ptrend - ptr;
478 	}
479 
480 	(void) fwrite(ptr, 1, nchars, stdout);
481 	return (0);
482 }
483 
484 static void
485 regerr(int err)
486 {
487 	errmsg("grep: RE error %d: ", err);
488 	switch (err) {
489 		case 11:
490 			err = 0;
491 			break;
492 		case 16:
493 			err = 1;
494 			break;
495 		case 25:
496 			err = 2;
497 			break;
498 		case 41:
499 			err = 3;
500 			break;
501 		case 42:
502 			err = 4;
503 			break;
504 		case 43:
505 			err = 5;
506 			break;
507 		case 44:
508 			err = 6;
509 			break;
510 		case 45:
511 			err = 7;
512 			break;
513 		case 46:
514 			err = 8;
515 			break;
516 		case 49:
517 			err = 9;
518 			break;
519 		case 50:
520 			err = 10;
521 			break;
522 		case 67:
523 			err = 11;
524 			break;
525 		default:
526 			err = 12;
527 			break;
528 	}
529 
530 	errmsg("%s\n", gettext(errstr[err]));
531 	exit(2);
532 }
533