xref: /freebsd/usr.bin/grep/util.c (revision d841ecb30ddcc9855e75434c714eeecd5b4b714b)
1 /*	$OpenBSD: util.c,v 1.39 2010/07/02 22:18:03 tedu Exp $	*/
2 
3 /*-
4  * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
5  * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/stat.h>
34 #include <sys/types.h>
35 
36 #include <ctype.h>
37 #include <err.h>
38 #include <errno.h>
39 #include <fnmatch.h>
40 #include <fts.h>
41 #include <libgen.h>
42 #include <stdbool.h>
43 #include <stdio.h>
44 #include <stdlib.h>
45 #include <string.h>
46 #include <unistd.h>
47 #include <wchar.h>
48 #include <wctype.h>
49 
50 #include "grep.h"
51 
52 static int	 linesqueued;
53 static int	 procline(struct str *l, int);
54 
55 bool
56 file_matching(const char *fname)
57 {
58 	char *fname_base;
59 	bool ret;
60 
61 	ret = finclude ? false : true;
62 	fname_base = basename(fname);
63 
64 	for (unsigned int i = 0; i < fpatterns; ++i) {
65 		if (fnmatch(fpattern[i].pat, fname, 0) == 0 ||
66 		    fnmatch(fpattern[i].pat, fname_base, 0) == 0) {
67 			if (fpattern[i].mode == EXCL_PAT)
68 				return (false);
69 			else
70 				ret = true;
71 		}
72 	}
73 	return (ret);
74 }
75 
76 static inline bool
77 dir_matching(const char *dname)
78 {
79 	bool ret;
80 
81 	ret = dinclude ? false : true;
82 
83 	for (unsigned int i = 0; i < dpatterns; ++i) {
84 		if (dname != NULL &&
85 		    fnmatch(dname, dpattern[i].pat, 0) == 0) {
86 			if (dpattern[i].mode == EXCL_PAT)
87 				return (false);
88 			else
89 				ret = true;
90 		}
91 	}
92 	return (ret);
93 }
94 
95 /*
96  * Processes a directory when a recursive search is performed with
97  * the -R option.  Each appropriate file is passed to procfile().
98  */
99 int
100 grep_tree(char **argv)
101 {
102 	FTS *fts;
103 	FTSENT *p;
104 	char *d, *dir = NULL;
105 	int c, fts_flags;
106 	bool ok;
107 
108 	c = fts_flags = 0;
109 
110 	switch(linkbehave) {
111 	case LINK_EXPLICIT:
112 		fts_flags = FTS_COMFOLLOW;
113 		break;
114 	case LINK_SKIP:
115 		fts_flags = FTS_PHYSICAL;
116 		break;
117 	default:
118 		fts_flags = FTS_LOGICAL;
119 
120 	}
121 
122 	fts_flags |= FTS_NOSTAT | FTS_NOCHDIR;
123 
124 	if (!(fts = fts_open(argv, fts_flags, NULL)))
125 		err(2, "fts_open");
126 	while ((p = fts_read(fts)) != NULL) {
127 		switch (p->fts_info) {
128 		case FTS_DNR:
129 			/* FALLTHROUGH */
130 		case FTS_ERR:
131 			errx(2, "%s: %s", p->fts_path, strerror(p->fts_errno));
132 			break;
133 		case FTS_D:
134 			/* FALLTHROUGH */
135 		case FTS_DP:
136 			break;
137 		case FTS_DC:
138 			/* Print a warning for recursive directory loop */
139 			warnx("warning: %s: recursive directory loop",
140 				p->fts_path);
141 			break;
142 		default:
143 			/* Check for file exclusion/inclusion */
144 			ok = true;
145 			if (dexclude || dinclude) {
146 				if ((d = strrchr(p->fts_path, '/')) != NULL) {
147 					dir = grep_malloc(sizeof(char) *
148 					    (d - p->fts_path + 1));
149 					memcpy(dir, p->fts_path,
150 					    d - p->fts_path);
151 					dir[d - p->fts_path] = '\0';
152 				}
153 				ok = dir_matching(dir);
154 				free(dir);
155 				dir = NULL;
156 			}
157 			if (fexclude || finclude)
158 				ok &= file_matching(p->fts_path);
159 
160 			if (ok)
161 				c += procfile(p->fts_path);
162 			break;
163 		}
164 	}
165 
166 	fts_close(fts);
167 	return (c);
168 }
169 
170 /*
171  * Opens a file and processes it.  Each file is processed line-by-line
172  * passing the lines to procline().
173  */
174 int
175 procfile(const char *fn)
176 {
177 	struct file *f;
178 	struct stat sb;
179 	struct str ln;
180 	mode_t s;
181 	int c, t;
182 
183 	if (mflag && (mcount <= 0))
184 		return (0);
185 
186 	if (strcmp(fn, "-") == 0) {
187 		fn = label != NULL ? label : getstr(1);
188 		f = grep_open(NULL);
189 	} else {
190 		if (!stat(fn, &sb)) {
191 			/* Check if we need to process the file */
192 			s = sb.st_mode & S_IFMT;
193 			if (s == S_IFDIR && dirbehave == DIR_SKIP)
194 				return (0);
195 			if ((s == S_IFIFO || s == S_IFCHR || s == S_IFBLK
196 				|| s == S_IFSOCK) && devbehave == DEV_SKIP)
197 					return (0);
198 		}
199 		f = grep_open(fn);
200 	}
201 	if (f == NULL) {
202 		if (!sflag)
203 			warn("%s", fn);
204 		if (errno == ENOENT)
205 			notfound = true;
206 		return (0);
207 	}
208 
209 	ln.file = grep_malloc(strlen(fn) + 1);
210 	strcpy(ln.file, fn);
211 	ln.line_no = 0;
212 	ln.len = 0;
213 	linesqueued = 0;
214 	tail = 0;
215 	ln.off = -1;
216 
217 	for (c = 0;  c == 0 || !(lflag || qflag); ) {
218 		ln.off += ln.len + 1;
219 		if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL || ln.len == 0) {
220 			if (ln.line_no == 0 && matchall)
221 				exit(0);
222 			else
223 				break;
224 		}
225 		if (ln.len > 0 && ln.dat[ln.len - 1] == '\n')
226 			--ln.len;
227 		ln.line_no++;
228 
229 		/* Return if we need to skip a binary file */
230 		if (f->binary && binbehave == BINFILE_SKIP) {
231 			grep_close(f);
232 			free(ln.file);
233 			free(f);
234 			return (0);
235 		}
236 		/* Process the file line-by-line */
237 		if ((t = procline(&ln, f->binary)) == 0 && Bflag > 0) {
238 			enqueue(&ln);
239 			linesqueued++;
240 		}
241 		c += t;
242 
243 		/* Count the matches if we have a match limit */
244 		if (mflag) {
245 			mcount -= t;
246 			if (mcount <= 0)
247 				break;
248 		}
249 	}
250 	if (Bflag > 0)
251 		clearqueue();
252 	grep_close(f);
253 
254 	if (cflag) {
255 		if (!hflag)
256 			printf("%s:", ln.file);
257 		printf("%u\n", c);
258 	}
259 	if (lflag && !qflag && c != 0)
260 		printf("%s\n", fn);
261 	if (Lflag && !qflag && c == 0)
262 		printf("%s\n", fn);
263 	if (c && !cflag && !lflag && !Lflag &&
264 	    binbehave == BINFILE_BIN && f->binary && !qflag)
265 		printf(getstr(8), fn);
266 
267 	free(ln.file);
268 	free(f);
269 	return (c);
270 }
271 
272 #define iswword(x)	(iswalnum((x)) || (x) == L'_')
273 
274 /*
275  * Processes a line comparing it with the specified patterns.  Each pattern
276  * is looped to be compared along with the full string, saving each and every
277  * match, which is necessary to colorize the output and to count the
278  * matches.  The matching lines are passed to printline() to display the
279  * appropriate output.
280  */
281 static int
282 procline(struct str *l, int nottext)
283 {
284 	regmatch_t matches[MAX_LINE_MATCHES];
285 	regmatch_t pmatch;
286 	size_t st = 0;
287 	unsigned int i;
288 	int c = 0, m = 0, r = 0;
289 
290 	if (!matchall) {
291 		/* Loop to process the whole line */
292 		while (st <= l->len) {
293 			pmatch.rm_so = st;
294 			pmatch.rm_eo = l->len;
295 
296 			/* Loop to compare with all the patterns */
297 			for (i = 0; i < patterns; i++) {
298 /*
299  * XXX: grep_search() is a workaround for speed up and should be
300  * removed in the future.  See fastgrep.c.
301  */
302 				if (fg_pattern[i].pattern) {
303 					r = grep_search(&fg_pattern[i],
304 					    (unsigned char *)l->dat,
305 					    l->len, &pmatch);
306 					r = (r == 0) ? 0 : REG_NOMATCH;
307 					st = pmatch.rm_eo;
308 				} else {
309 					r = regexec(&r_pattern[i], l->dat, 1,
310 					    &pmatch, eflags);
311 					r = (r == 0) ? 0 : REG_NOMATCH;
312 					st = pmatch.rm_eo;
313 				}
314 				if (r == REG_NOMATCH)
315 					continue;
316 				/* Check for full match */
317 				if (r == 0 && xflag)
318 					if (pmatch.rm_so != 0 ||
319 					    (size_t)pmatch.rm_eo != l->len)
320 						r = REG_NOMATCH;
321 				/* Check for whole word match */
322 				if (r == 0 && fg_pattern[i].word &&
323 				    pmatch.rm_so != 0) {
324 					wint_t wbegin, wend;
325 
326 					wbegin = wend = L' ';
327 					if (pmatch.rm_so != 0 &&
328 					    sscanf(&l->dat[pmatch.rm_so - 1],
329 					    "%lc", &wbegin) != 1)
330 						r = REG_NOMATCH;
331 					else if ((size_t)pmatch.rm_eo != l->len &&
332 					    sscanf(&l->dat[pmatch.rm_eo],
333 					    "%lc", &wend) != 1)
334 						r = REG_NOMATCH;
335 					else if (iswword(wbegin) || iswword(wend))
336 						r = REG_NOMATCH;
337 				}
338 				if (r == 0) {
339 					if (m == 0)
340 						c++;
341 					if (m < MAX_LINE_MATCHES)
342 						matches[m++] = pmatch;
343 					/* matches - skip further patterns */
344 					if ((color != NULL && !oflag) || qflag || lflag)
345 						break;
346 				}
347 			}
348 
349 			if (vflag) {
350 				c = !c;
351 				break;
352 			}
353 			/* One pass if we are not recording matches */
354 			if ((color != NULL && !oflag) || qflag || lflag)
355 				break;
356 
357 			if (st == (size_t)pmatch.rm_so)
358 				break; 	/* No matches */
359 		}
360 	} else
361 		c = !vflag;
362 
363 	if (c && binbehave == BINFILE_BIN && nottext)
364 		return (c); /* Binary file */
365 
366 	/* Dealing with the context */
367 	if ((tail || c) && !cflag && !qflag && !lflag && !Lflag) {
368 		if (c) {
369 			if (!first && !prev && !tail && Aflag)
370 				printf("--\n");
371 			tail = Aflag;
372 			if (Bflag > 0) {
373 				if (!first && !prev)
374 					printf("--\n");
375 				printqueue();
376 			}
377 			linesqueued = 0;
378 			printline(l, ':', matches, m);
379 		} else {
380 			printline(l, '-', matches, m);
381 			tail--;
382 		}
383 	}
384 
385 	if (c) {
386 		prev = true;
387 		first = false;
388 	} else
389 		prev = false;
390 
391 	return (c);
392 }
393 
394 /*
395  * Safe malloc() for internal use.
396  */
397 void *
398 grep_malloc(size_t size)
399 {
400 	void *ptr;
401 
402 	if ((ptr = malloc(size)) == NULL)
403 		err(2, "malloc");
404 	return (ptr);
405 }
406 
407 /*
408  * Safe calloc() for internal use.
409  */
410 void *
411 grep_calloc(size_t nmemb, size_t size)
412 {
413 	void *ptr;
414 
415 	if ((ptr = calloc(nmemb, size)) == NULL)
416 		err(2, "calloc");
417 	return (ptr);
418 }
419 
420 /*
421  * Safe realloc() for internal use.
422  */
423 void *
424 grep_realloc(void *ptr, size_t size)
425 {
426 
427 	if ((ptr = realloc(ptr, size)) == NULL)
428 		err(2, "realloc");
429 	return (ptr);
430 }
431 
432 /*
433  * Safe strdup() for internal use.
434  */
435 char *
436 grep_strdup(const char *str)
437 {
438 	char *ret;
439 
440 	if ((ret = strdup(str)) == NULL)
441 		err(2, "strdup");
442 	return (ret);
443 }
444 
445 /*
446  * Prints a matching line according to the command line options.
447  */
448 void
449 printline(struct str *line, int sep, regmatch_t *matches, int m)
450 {
451 	size_t a = 0;
452 	int i, n = 0;
453 
454 	if (!hflag) {
455 		if (nullflag == 0)
456 			fputs(line->file, stdout);
457 		else {
458 			printf("%s", line->file);
459 			putchar(0);
460 		}
461 		++n;
462 	}
463 	if (nflag) {
464 		if (n > 0)
465 			putchar(sep);
466 		printf("%d", line->line_no);
467 		++n;
468 	}
469 	if (bflag) {
470 		if (n > 0)
471 			putchar(sep);
472 		printf("%lld", (long long)line->off);
473 		++n;
474 	}
475 	if (n)
476 		putchar(sep);
477 	/* --color and -o */
478 	if ((oflag || color) && m > 0) {
479 		for (i = 0; i < m; i++) {
480 			if (!oflag)
481 				fwrite(line->dat + a, matches[i].rm_so - a, 1,
482 				    stdout);
483 			if (color)
484 				fprintf(stdout, "\33[%sm\33[K", color);
485 
486 				fwrite(line->dat + matches[i].rm_so,
487 				    matches[i].rm_eo - matches[i].rm_so, 1,
488 				    stdout);
489 			if (color)
490 				fprintf(stdout, "\33[m\33[K");
491 			a = matches[i].rm_eo;
492 			if (oflag)
493 				putchar('\n');
494 		}
495 		if (!oflag) {
496 			if (line->len - a > 0)
497 				fwrite(line->dat + a, line->len - a, 1, stdout);
498 			putchar('\n');
499 		}
500 	} else {
501 		fwrite(line->dat, line->len, 1, stdout);
502 		putchar('\n');
503 	}
504 }
505