xref: /freebsd/usr.bin/grep/util.c (revision 4c9ffb13dd74159bd3ed7e1c4c706dbd15a70df2)
1 /*	$NetBSD: util.c,v 1.9 2011/02/27 17:33:37 joerg Exp $	*/
2 /*	$OpenBSD: util.c,v 1.39 2010/07/02 22:18:03 tedu Exp $	*/
3 
4 /*-
5  * SPDX-License-Identifier: BSD-2-Clause
6  *
7  * Copyright (c) 1999 James Howard and Dag-Erling Smørgrav
8  * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org>
9  * Copyright (C) 2017 Kyle Evans <kevans@FreeBSD.org>
10  * All rights reserved.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 #include <sys/cdefs.h>
35 #include <sys/stat.h>
36 #include <sys/types.h>
37 
38 #include <ctype.h>
39 #include <err.h>
40 #include <errno.h>
41 #include <fnmatch.h>
42 #include <fts.h>
43 #include <libgen.h>
44 #include <stdbool.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #include <unistd.h>
49 #include <wchar.h>
50 #include <wctype.h>
51 
52 #include "grep.h"
53 
54 static bool	 first_match = true;
55 
56 /*
57  * Match printing context
58  */
59 struct mprintc {
60 	long long	tail;		/* Number of trailing lines to record */
61 	int		last_outed;	/* Number of lines since last output */
62 	bool		doctx;		/* Printing context? */
63 	bool		printmatch;	/* Printing matches? */
64 	bool		same_file;	/* Same file as previously printed? */
65 };
66 
67 static void procmatch_match(struct mprintc *mc, struct parsec *pc);
68 static void procmatch_nomatch(struct mprintc *mc, struct parsec *pc);
69 static bool procmatches(struct mprintc *mc, struct parsec *pc, bool matched);
70 #ifdef WITH_INTERNAL_NOSPEC
71 static int litexec(const struct pat *pat, const char *string,
72     size_t nmatch, regmatch_t pmatch[]);
73 #endif
74 static bool procline(struct parsec *pc);
75 static bool printline(struct parsec *pc, int sep, size_t *last_out);
76 static void printline_metadata(struct str *line, int sep);
77 
78 bool
file_matching(const char * fname)79 file_matching(const char *fname)
80 {
81 	char *fname_base, *fname_buf;
82 	bool ret;
83 
84 	ret = finclude ? false : true;
85 	fname_buf = strdup(fname);
86 	if (fname_buf == NULL)
87 		err(2, "strdup");
88 	fname_base = basename(fname_buf);
89 
90 	for (unsigned int i = 0; i < fpatterns; ++i) {
91 		if (fnmatch(fpattern[i].pat, fname, 0) == 0 ||
92 		    fnmatch(fpattern[i].pat, fname_base, 0) == 0)
93 			/*
94 			 * The last pattern matched wins exclusion/inclusion
95 			 * rights, so we can't reasonably bail out early here.
96 			 */
97 			ret = (fpattern[i].mode != EXCL_PAT);
98 	}
99 	free(fname_buf);
100 	return (ret);
101 }
102 
103 static inline bool
dir_matching(const char * dname)104 dir_matching(const char *dname)
105 {
106 	bool ret;
107 
108 	ret = dinclude ? false : true;
109 
110 	for (unsigned int i = 0; i < dpatterns; ++i) {
111 		if (dname != NULL && fnmatch(dpattern[i].pat, dname, 0) == 0)
112 			/*
113 			 * The last pattern matched wins exclusion/inclusion
114 			 * rights, so we can't reasonably bail out early here.
115 			 */
116 			ret = (dpattern[i].mode != EXCL_PAT);
117 	}
118 	return (ret);
119 }
120 
121 /*
122  * Processes a directory when a recursive search is performed with
123  * the -R option.  Each appropriate file is passed to procfile().
124  */
125 bool
grep_tree(char ** argv)126 grep_tree(char **argv)
127 {
128 	FTS *fts;
129 	FTSENT *p;
130 	int fts_flags;
131 	bool matched, ok;
132 	const char *wd[] = { ".", NULL };
133 
134 	matched = false;
135 
136 	/* This switch effectively initializes 'fts_flags' */
137 	switch(linkbehave) {
138 	case LINK_EXPLICIT:
139 		fts_flags = FTS_COMFOLLOW | FTS_PHYSICAL;
140 		break;
141 	case LINK_SKIP:
142 		fts_flags = FTS_PHYSICAL;
143 		break;
144 	default:
145 		fts_flags = FTS_LOGICAL | FTS_NOSTAT;
146 	}
147 
148 	fts_flags |= FTS_NOCHDIR;
149 
150 	fts = fts_open((argv[0] == NULL) ?
151 	    __DECONST(char * const *, wd) : argv, fts_flags, NULL);
152 	if (fts == NULL)
153 		err(2, "fts_open");
154 	while (errno = 0, (p = fts_read(fts)) != NULL) {
155 		switch (p->fts_info) {
156 		case FTS_DNR:
157 		case FTS_ERR:
158 		case FTS_NS:
159 			file_err = true;
160 			if(!sflag)
161 				warnc(p->fts_errno, "%s", p->fts_path);
162 			break;
163 		case FTS_D:
164 			if (dexclude || dinclude)
165 				if (!dir_matching(p->fts_name) ||
166 				    !dir_matching(p->fts_path))
167 					fts_set(fts, p, FTS_SKIP);
168 			break;
169 		case FTS_DC:
170 			/* Print a warning for recursive directory loop */
171 			warnx("warning: %s: recursive directory loop",
172 			    p->fts_path);
173 			break;
174 		case FTS_DP:
175 			break;
176 		case FTS_SL:
177 			/*
178 			 * Skip symlinks for LINK_EXPLICIT and
179 			 * LINK_SKIP.  Note that due to FTS_COMFOLLOW,
180 			 * symlinks on the command line are followed
181 			 * for LINK_EXPLICIT and not reported as
182 			 * symlinks.
183 			 */
184 			break;
185 		default:
186 			/* Check for file exclusion/inclusion */
187 			ok = true;
188 			if (fexclude || finclude)
189 				ok &= file_matching(p->fts_path);
190 
191 			if (ok && procfile(p->fts_path))
192 				matched = true;
193 			break;
194 		}
195 	}
196 	if (errno != 0)
197 		err(2, "fts_read");
198 
199 	fts_close(fts);
200 	return (matched);
201 }
202 
203 static void
procmatch_match(struct mprintc * mc,struct parsec * pc)204 procmatch_match(struct mprintc *mc, struct parsec *pc)
205 {
206 
207 	if (mc->doctx) {
208 		if (!first_match && (!mc->same_file || mc->last_outed > 0))
209 			printf("--\n");
210 		if (Bflag > 0)
211 			printqueue();
212 		mc->tail = Aflag;
213 	}
214 
215 	/* Print the matching line, but only if not quiet/binary */
216 	if (mc->printmatch) {
217 		size_t last_out;
218 		bool terminated;
219 
220 		last_out = 0;
221 		terminated = printline(pc, ':', &last_out);
222 		while (pc->matchidx >= MAX_MATCHES) {
223 			/* Reset matchidx and try again */
224 			pc->matchidx = 0;
225 			if (procline(pc) == !vflag)
226 				terminated = printline(pc, ':', &last_out);
227 			else
228 				break;
229 		}
230 
231 		/*
232 		 * The above loop processes the entire line as long as we keep
233 		 * hitting the maximum match count.  At this point, we know
234 		 * that there's nothing left to be printed and can terminate the
235 		 * line.
236 		 */
237 		if (!terminated)
238 			printline(pc, ':', &last_out);
239 
240 		first_match = false;
241 		mc->same_file = true;
242 		mc->last_outed = 0;
243 	}
244 }
245 
246 static void
procmatch_nomatch(struct mprintc * mc,struct parsec * pc)247 procmatch_nomatch(struct mprintc *mc, struct parsec *pc)
248 {
249 
250 	/* Deal with any -A context as needed */
251 	if (mc->tail > 0) {
252 		grep_printline(&pc->ln, '-');
253 		mc->tail--;
254 		if (Bflag > 0)
255 			clearqueue();
256 	} else if (Bflag == 0 || (Bflag > 0 && enqueue(&pc->ln)))
257 		/*
258 		 * Enqueue non-matching lines for -B context. If we're not
259 		 * actually doing -B context or if the enqueue resulted in a
260 		 * line being rotated out, then go ahead and increment
261 		 * last_outed to signify a gap between context/match.
262 		 */
263 		++mc->last_outed;
264 }
265 
266 /*
267  * Process any matches in the current parsing context, return a boolean
268  * indicating whether we should halt any further processing or not. 'true' to
269  * continue processing, 'false' to halt.
270  */
271 static bool
procmatches(struct mprintc * mc,struct parsec * pc,bool matched)272 procmatches(struct mprintc *mc, struct parsec *pc, bool matched)
273 {
274 
275 	if (mflag && mcount <= 0) {
276 		/*
277 		 * We already hit our match count, but we need to keep dumping
278 		 * lines until we've lost our tail.
279 		 */
280 		grep_printline(&pc->ln, '-');
281 		mc->tail--;
282 		return (mc->tail != 0);
283 	}
284 
285 	/*
286 	 * XXX TODO: This should loop over pc->matches and handle things on a
287 	 * line-by-line basis, setting up a `struct str` as needed.
288 	 */
289 	/* Deal with any -B context or context separators */
290 	if (matched) {
291 		procmatch_match(mc, pc);
292 
293 		/* Count the matches if we have a match limit */
294 		if (mflag) {
295 			/* XXX TODO: Decrement by number of matched lines */
296 			mcount -= 1;
297 			if (mcount <= 0)
298 				return (mc->tail != 0);
299 		}
300 	} else if (mc->doctx)
301 		procmatch_nomatch(mc, pc);
302 
303 	return (true);
304 }
305 
306 /*
307  * Opens a file and processes it.  Each file is processed line-by-line
308  * passing the lines to procline().
309  */
310 bool
procfile(const char * fn)311 procfile(const char *fn)
312 {
313 	struct parsec pc;
314 	struct mprintc mc;
315 	struct file *f;
316 	struct stat sb;
317 	mode_t s;
318 	int lines;
319 	bool line_matched;
320 
321 	if (strcmp(fn, "-") == 0) {
322 		fn = label != NULL ? label : errstr[1];
323 		f = grep_open(NULL);
324 	} else {
325 		if (stat(fn, &sb) == 0) {
326 			/* Check if we need to process the file */
327 			s = sb.st_mode & S_IFMT;
328 			if (dirbehave == DIR_SKIP && s == S_IFDIR)
329 				return (false);
330 			if (devbehave == DEV_SKIP && (s == S_IFIFO ||
331 			    s == S_IFCHR || s == S_IFBLK || s == S_IFSOCK))
332 				return (false);
333 		}
334 		f = grep_open(fn);
335 	}
336 	if (f == NULL) {
337 		file_err = true;
338 		if (!sflag)
339 			warn("%s", fn);
340 		return (false);
341 	}
342 
343 	pc.ln.file = grep_strdup(fn);
344 	pc.ln.line_no = 0;
345 	pc.ln.len = 0;
346 	pc.ln.boff = 0;
347 	pc.ln.off = -1;
348 	pc.binary = f->binary;
349 	pc.cntlines = false;
350 	memset(&mc, 0, sizeof(mc));
351 	mc.printmatch = true;
352 	if ((pc.binary && binbehave == BINFILE_BIN) || cflag || qflag ||
353 	    lflag || Lflag)
354 		mc.printmatch = false;
355 	if (mc.printmatch && (Aflag != 0 || Bflag != 0))
356 		mc.doctx = true;
357 	if (mc.printmatch && (Aflag != 0 || Bflag != 0 || mflag || nflag))
358 		pc.cntlines = true;
359 	mcount = mlimit;
360 
361 	for (lines = 0; lines == 0 || !(lflag || qflag); ) {
362 		/*
363 		 * XXX TODO: We need to revisit this in a chunking world. We're
364 		 * not going to be doing per-line statistics because of the
365 		 * overhead involved. procmatches can figure that stuff out as
366 		 * needed. */
367 		/* Reset per-line statistics */
368 		pc.printed = 0;
369 		pc.matchidx = 0;
370 		pc.lnstart = 0;
371 		pc.ln.boff = 0;
372 		pc.ln.off += pc.ln.len + 1;
373 		/* XXX TODO: Grab a chunk */
374 		if ((pc.ln.dat = grep_fgetln(f, &pc)) == NULL ||
375 		    pc.ln.len == 0)
376 			break;
377 
378 		if (pc.ln.len > 0 && pc.ln.dat[pc.ln.len - 1] == fileeol)
379 			--pc.ln.len;
380 		pc.ln.line_no++;
381 
382 		/* Return if we need to skip a binary file */
383 		if (pc.binary && binbehave == BINFILE_SKIP) {
384 			grep_close(f);
385 			free(pc.ln.file);
386 			free(f);
387 			return (0);
388 		}
389 
390 		if (mflag && mcount <= 0) {
391 			/*
392 			 * Short-circuit, already hit match count and now we're
393 			 * just picking up any remaining pieces.
394 			 */
395 			if (!procmatches(&mc, &pc, false))
396 				break;
397 			continue;
398 		}
399 		line_matched = procline(&pc) == !vflag;
400 		if (line_matched)
401 			++lines;
402 
403 		/* Halt processing if we hit our match limit */
404 		if (!procmatches(&mc, &pc, line_matched))
405 			break;
406 	}
407 	if (Bflag > 0)
408 		clearqueue();
409 	grep_close(f);
410 
411 	if (cflag && !qflag) {
412 		if (!hflag)
413 			printf("%s:", pc.ln.file);
414 		printf("%u\n", lines);
415 	}
416 	if (lflag && !qflag && lines != 0)
417 		printf("%s%c", fn, nullflag ? 0 : '\n');
418 	if (Lflag && !qflag && lines == 0)
419 		printf("%s%c", fn, nullflag ? 0 : '\n');
420 	if (lines != 0 && !cflag && !lflag && !Lflag &&
421 	    binbehave == BINFILE_BIN && f->binary && !qflag)
422 		printf(errstr[7], fn);
423 
424 	free(pc.ln.file);
425 	free(f);
426 	return (lines != 0);
427 }
428 
429 #ifdef WITH_INTERNAL_NOSPEC
430 /*
431  * Internal implementation of literal string search within a string, modeled
432  * after regexec(3), for use when the regex(3) implementation doesn't offer
433  * either REG_NOSPEC or REG_LITERAL. This does not apply in the default FreeBSD
434  * config, but in other scenarios such as building against libgnuregex or on
435  * some non-FreeBSD OSes.
436  */
437 static int
litexec(const struct pat * pat,const char * string,size_t nmatch,regmatch_t pmatch[])438 litexec(const struct pat *pat, const char *string, size_t nmatch,
439     regmatch_t pmatch[])
440 {
441 	char *(*strstr_fn)(const char *, const char *);
442 	char *sub, *subject;
443 	const char *search;
444 	size_t idx, n, ofs, stringlen;
445 
446 	if (cflags & REG_ICASE)
447 		strstr_fn = strcasestr;
448 	else
449 		strstr_fn = strstr;
450 	idx = 0;
451 	ofs = pmatch[0].rm_so;
452 	stringlen = pmatch[0].rm_eo;
453 	if (ofs >= stringlen)
454 		return (REG_NOMATCH);
455 	subject = strndup(string, stringlen);
456 	if (subject == NULL)
457 		return (REG_ESPACE);
458 	for (n = 0; ofs < stringlen;) {
459 		search = (subject + ofs);
460 		if ((unsigned long)pat->len > strlen(search))
461 			break;
462 		sub = strstr_fn(search, pat->pat);
463 		/*
464 		 * Ignoring the empty string possibility due to context: grep optimizes
465 		 * for empty patterns and will never reach this point.
466 		 */
467 		if (sub == NULL)
468 			break;
469 		++n;
470 		/* Fill in pmatch if necessary */
471 		if (nmatch > 0) {
472 			pmatch[idx].rm_so = ofs + (sub - search);
473 			pmatch[idx].rm_eo = pmatch[idx].rm_so + pat->len;
474 			if (++idx == nmatch)
475 				break;
476 			ofs = pmatch[idx].rm_so + 1;
477 		} else
478 			/* We only needed to know if we match or not */
479 			break;
480 	}
481 	free(subject);
482 	if (n > 0 && nmatch > 0)
483 		for (n = idx; n < nmatch; ++n)
484 			pmatch[n].rm_so = pmatch[n].rm_eo = -1;
485 
486 	return (n > 0 ? 0 : REG_NOMATCH);
487 }
488 #endif /* WITH_INTERNAL_NOSPEC */
489 
490 #define iswword(x)	(iswalnum((x)) || (x) == L'_')
491 
492 /*
493  * Processes a line comparing it with the specified patterns.  Each pattern
494  * is looped to be compared along with the full string, saving each and every
495  * match, which is necessary to colorize the output and to count the
496  * matches.  The matching lines are passed to printline() to display the
497  * appropriate output.
498  */
499 static bool
procline(struct parsec * pc)500 procline(struct parsec *pc)
501 {
502 	regmatch_t pmatch, lastmatch, chkmatch;
503 	wchar_t wbegin, wend;
504 	size_t st, nst;
505 	unsigned int i;
506 	int r = 0, leflags = eflags;
507 	size_t startm = 0, matchidx;
508 	unsigned int retry;
509 	bool lastmatched, matched;
510 
511 	matchidx = pc->matchidx;
512 
513 	/* Null pattern shortcuts. */
514 	if (matchall) {
515 		if (xflag && pc->ln.len == 0) {
516 			/* Matches empty lines (-x). */
517 			return (true);
518 		} else if (!wflag && !xflag) {
519 			/* Matches every line (no -w or -x). */
520 			return (true);
521 		}
522 
523 		/*
524 		 * If we only have the NULL pattern, whether we match or not
525 		 * depends on if we got here with -w or -x.  If either is set,
526 		 * the answer is no.  If we have other patterns, we'll defer
527 		 * to them.
528 		 */
529 		if (patterns == 0) {
530 			return (!(wflag || xflag));
531 		}
532 	} else if (patterns == 0) {
533 		/* Pattern file with no patterns. */
534 		return (false);
535 	}
536 
537 	matched = false;
538 	st = pc->lnstart;
539 	nst = 0;
540 	/* Initialize to avoid a false positive warning from GCC. */
541 	lastmatch.rm_so = lastmatch.rm_eo = 0;
542 
543 	/* Loop to process the whole line */
544 	while (st <= pc->ln.len) {
545 		lastmatched = false;
546 		startm = matchidx;
547 		retry = 0;
548 		if (st > 0 && pc->ln.dat[st - 1] != fileeol)
549 			leflags |= REG_NOTBOL;
550 		/* Loop to compare with all the patterns */
551 		for (i = 0; i < patterns; i++) {
552 			pmatch.rm_so = st;
553 			pmatch.rm_eo = pc->ln.len;
554 #ifdef WITH_INTERNAL_NOSPEC
555 			if (grepbehave == GREP_FIXED)
556 				r = litexec(&pattern[i], pc->ln.dat, 1, &pmatch);
557 			else
558 #endif
559 			r = regexec(&r_pattern[i], pc->ln.dat, 1, &pmatch,
560 			    leflags);
561 			if (r != 0)
562 				continue;
563 			/* Check for full match */
564 			if (xflag && (pmatch.rm_so != 0 ||
565 			    (size_t)pmatch.rm_eo != pc->ln.len))
566 				continue;
567 			/* Check for whole word match */
568 			if (wflag) {
569 				wbegin = wend = L' ';
570 				if (pmatch.rm_so != 0 &&
571 				    sscanf(&pc->ln.dat[pmatch.rm_so - 1],
572 				    "%lc", &wbegin) != 1)
573 					r = REG_NOMATCH;
574 				else if ((size_t)pmatch.rm_eo !=
575 				    pc->ln.len &&
576 				    sscanf(&pc->ln.dat[pmatch.rm_eo],
577 				    "%lc", &wend) != 1)
578 					r = REG_NOMATCH;
579 				else if (iswword(wbegin) ||
580 				    iswword(wend))
581 					r = REG_NOMATCH;
582 				/*
583 				 * If we're doing whole word matching and we
584 				 * matched once, then we should try the pattern
585 				 * again after advancing just past the start of
586 				 * the earliest match. This allows the pattern
587 				 * to  match later on in the line and possibly
588 				 * still match a whole word.
589 				 */
590 				if (r == REG_NOMATCH &&
591 				    (retry == pc->lnstart ||
592 				    (unsigned int)pmatch.rm_so + 1 < retry))
593 					retry = pmatch.rm_so + 1;
594 				if (r == REG_NOMATCH)
595 					continue;
596 			}
597 			lastmatched = true;
598 			lastmatch = pmatch;
599 
600 			if (matchidx == 0)
601 				matched = true;
602 
603 			/*
604 			 * Replace previous match if the new one is earlier
605 			 * and/or longer. This will lead to some amount of
606 			 * extra work if -o/--color are specified, but it's
607 			 * worth it from a correctness point of view.
608 			 */
609 			if (matchidx > startm) {
610 				chkmatch = pc->matches[matchidx - 1];
611 				if (pmatch.rm_so < chkmatch.rm_so ||
612 				    (pmatch.rm_so == chkmatch.rm_so &&
613 				    (pmatch.rm_eo - pmatch.rm_so) >
614 				    (chkmatch.rm_eo - chkmatch.rm_so))) {
615 					pc->matches[matchidx - 1] = pmatch;
616 					nst = pmatch.rm_eo;
617 				}
618 			} else {
619 				/* Advance as normal if not */
620 				pc->matches[matchidx++] = pmatch;
621 				nst = pmatch.rm_eo;
622 			}
623 			/* avoid excessive matching - skip further patterns */
624 			if ((color == NULL && !oflag) || qflag || lflag ||
625 			    matchidx >= MAX_MATCHES) {
626 				pc->lnstart = nst;
627 				lastmatched = false;
628 				break;
629 			}
630 		}
631 
632 		/*
633 		 * Advance to just past the start of the earliest match, try
634 		 * again just in case we still have a chance to match later in
635 		 * the string.
636 		 */
637 		if (!lastmatched && retry > pc->lnstart) {
638 			st = retry;
639 			continue;
640 		}
641 
642 		/* XXX TODO: We will need to keep going, since we're chunky */
643 		/* One pass if we are not recording matches */
644 		if (!wflag && ((color == NULL && !oflag) || qflag || lflag || Lflag))
645 			break;
646 
647 		/* If we didn't have any matches or REG_NOSUB set */
648 		if (!lastmatched || (cflags & REG_NOSUB))
649 			nst = pc->ln.len;
650 
651 		if (!lastmatched)
652 			/* No matches */
653 			break;
654 		else if (st == nst && lastmatch.rm_so == lastmatch.rm_eo)
655 			/* Zero-length match -- advance one more so we don't get stuck */
656 			nst++;
657 
658 		/* Advance st based on previous matches */
659 		st = nst;
660 		pc->lnstart = st;
661 	}
662 
663 	/* Reflect the new matchidx in the context */
664 	pc->matchidx = matchidx;
665 	return matched;
666 }
667 
668 /*
669  * Safe malloc() for internal use.
670  */
671 void *
grep_malloc(size_t size)672 grep_malloc(size_t size)
673 {
674 	void *ptr;
675 
676 	if (size == 0)
677 		return (NULL);
678 	if ((ptr = malloc(size)) == NULL)
679 		err(2, "malloc");
680 	return (ptr);
681 }
682 
683 /*
684  * Safe calloc() for internal use.
685  */
686 void *
grep_calloc(size_t nmemb,size_t size)687 grep_calloc(size_t nmemb, size_t size)
688 {
689 	void *ptr;
690 
691 	if (nmemb == 0 || size == 0)
692 		return (NULL);
693 	if ((ptr = calloc(nmemb, size)) == NULL)
694 		err(2, "calloc");
695 	return (ptr);
696 }
697 
698 /*
699  * Safe realloc() for internal use.
700  */
701 void *
grep_realloc(void * ptr,size_t size)702 grep_realloc(void *ptr, size_t size)
703 {
704 
705 	if ((ptr = realloc(ptr, size)) == NULL)
706 		err(2, "realloc");
707 	return (ptr);
708 }
709 
710 /*
711  * Safe strdup() for internal use.
712  */
713 char *
grep_strdup(const char * str)714 grep_strdup(const char *str)
715 {
716 	char *ret;
717 
718 	if ((ret = strdup(str)) == NULL)
719 		err(2, "strdup");
720 	return (ret);
721 }
722 
723 /*
724  * Print an entire line as-is, there are no inline matches to consider. This is
725  * used for printing context.
726  */
grep_printline(struct str * line,int sep)727 void grep_printline(struct str *line, int sep) {
728 	printline_metadata(line, sep);
729 	fwrite(line->dat, line->len, 1, stdout);
730 	putchar(fileeol);
731 }
732 
733 static void
printline_metadata(struct str * line,int sep)734 printline_metadata(struct str *line, int sep)
735 {
736 	bool printsep;
737 
738 	printsep = false;
739 	if (!hflag) {
740 		if (!nullflag) {
741 			fputs(line->file, stdout);
742 			printsep = true;
743 		} else {
744 			printf("%s", line->file);
745 			putchar(0);
746 		}
747 	}
748 	if (nflag) {
749 		if (printsep)
750 			putchar(sep);
751 		printf("%d", line->line_no);
752 		printsep = true;
753 	}
754 	if (bflag) {
755 		if (printsep)
756 			putchar(sep);
757 		printf("%lld", (long long)(line->off + line->boff));
758 		printsep = true;
759 	}
760 	if (printsep)
761 		putchar(sep);
762 }
763 
764 /*
765  * Prints a matching line according to the command line options.  We need
766  * *last_out to be populated on entry in case this is just a continuation of
767  * matches within the same line.
768  *
769  * Returns true if the line was terminated, false if it was not.
770  */
771 static bool
printline(struct parsec * pc,int sep,size_t * last_out)772 printline(struct parsec *pc, int sep, size_t *last_out)
773 {
774 	size_t a = *last_out;
775 	size_t i, matchidx;
776 	regmatch_t match;
777 	bool terminated;
778 
779 	/*
780 	 * Nearly all paths below will terminate the line by default, but it is
781 	 * avoided in some circumstances in case we don't have the full context
782 	 * available here.
783 	 */
784 	terminated = true;
785 
786 	/* If matchall, everything matches but don't actually print for -o */
787 	if (oflag && matchall)
788 		return (terminated);
789 
790 	matchidx = pc->matchidx;
791 
792 	/* --color and -o */
793 	if ((oflag || color) && (pc->printed > 0 || matchidx > 0)) {
794 		/* Only print metadata once per line if --color */
795 		if (!oflag && pc->printed == 0) {
796 			printline_metadata(&pc->ln, sep);
797 		}
798 		for (i = 0; i < matchidx; i++) {
799 			match = pc->matches[i];
800 			/* Don't output zero length matches */
801 			if (match.rm_so == match.rm_eo)
802 				continue;
803 			/*
804 			 * Metadata is printed on a per-line basis, so every
805 			 * match gets file metadata with the -o flag.
806 			 */
807 			if (oflag) {
808 				pc->ln.boff = match.rm_so;
809 				printline_metadata(&pc->ln, sep);
810 			} else {
811 				fwrite(pc->ln.dat + a, match.rm_so - a, 1,
812 				    stdout);
813 			}
814 			if (color)
815 				fprintf(stdout, "\33[%sm\33[K", color);
816 			fwrite(pc->ln.dat + match.rm_so,
817 			    match.rm_eo - match.rm_so, 1, stdout);
818 			if (color)
819 				fprintf(stdout, "\33[m\33[K");
820 			a = match.rm_eo;
821 			if (oflag)
822 				putchar('\n');
823 		}
824 
825 		/*
826 		 * Don't terminate if we reached the match limit; we may have
827 		 * other matches on this line to process.
828 		 */
829 		*last_out = a;
830 		if (!oflag && matchidx != MAX_MATCHES) {
831 			if (pc->ln.len - a > 0) {
832 				fwrite(pc->ln.dat + a, pc->ln.len - a, 1,
833 				    stdout);
834 				*last_out = pc->ln.len;
835 			}
836 			putchar('\n');
837 		} else if (!oflag) {
838 			/*
839 			 * -o is terminated on every match output, so this
840 			 * branch is only designed to capture MAX_MATCHES in a
841 			 * line which may be a signal to us for a lack of
842 			 * context.  The caller will know more and call us again
843 			 * to terminate if it needs to.
844 			 */
845 			terminated = false;
846 		}
847 	} else
848 		grep_printline(&pc->ln, sep);
849 	pc->printed++;
850 	return (terminated);
851 }
852