xref: /freebsd/usr.bin/grep/file.c (revision e1e636193db45630c7881246d25902e57c43d24e)
1  /*	$NetBSD: file.c,v 1.5 2011/02/16 18:35:39 joerg Exp $	*/
2  /*	$OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $	*/
3  
4  /*-
5   * SPDX-License-Identifier: BSD-2-Clause
6   *
7   * Copyright (c) 1999 James Howard and Dag-Erling Smørgrav
8   * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org>
9   * Copyright (C) 2010 Dimitry Andric <dimitry@andric.com>
10   * All rights reserved.
11   *
12   * Redistribution and use in source and binary forms, with or without
13   * modification, are permitted provided that the following conditions
14   * are met:
15   * 1. Redistributions of source code must retain the above copyright
16   *    notice, this list of conditions and the following disclaimer.
17   * 2. Redistributions in binary form must reproduce the above copyright
18   *    notice, this list of conditions and the following disclaimer in the
19   *    documentation and/or other materials provided with the distribution.
20   *
21   * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
22   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24   * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
25   * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26   * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27   * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28   * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29   * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30   * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31   * SUCH DAMAGE.
32   */
33  
34  #include <sys/param.h>
35  #include <sys/mman.h>
36  #include <sys/stat.h>
37  #include <sys/types.h>
38  
39  #include <err.h>
40  #include <errno.h>
41  #include <fcntl.h>
42  #include <stddef.h>
43  #include <stdlib.h>
44  #include <string.h>
45  #include <unistd.h>
46  #include <wchar.h>
47  #include <wctype.h>
48  
49  #include "grep.h"
50  
51  #define	MAXBUFSIZ	(32 * 1024)
52  #define	LNBUFBUMP	80
53  
54  static char *buffer;
55  static char *bufpos;
56  static size_t bufrem;
57  static size_t fsiz;
58  
59  static char *lnbuf;
60  static size_t lnbuflen;
61  
62  static inline int
63  grep_refill(struct file *f)
64  {
65  	ssize_t nr;
66  
67  	if (filebehave == FILE_MMAP)
68  		return (0);
69  
70  	bufpos = buffer;
71  	bufrem = 0;
72  
73  	nr = read(f->fd, buffer, MAXBUFSIZ);
74  	if (nr < 0 && errno == EISDIR)
75  		nr = 0;
76  	if (nr < 0)
77  		return (-1);
78  
79  	bufrem = nr;
80  	return (0);
81  }
82  
83  static inline int
84  grep_lnbufgrow(size_t newlen)
85  {
86  
87  	if (lnbuflen < newlen) {
88  		lnbuf = grep_realloc(lnbuf, newlen);
89  		lnbuflen = newlen;
90  	}
91  
92  	return (0);
93  }
94  
95  char *
96  grep_fgetln(struct file *f, struct parsec *pc)
97  {
98  	char *p;
99  	size_t len;
100  	size_t off;
101  	ptrdiff_t diff;
102  
103  	/* Fill the buffer, if necessary */
104  	if (bufrem == 0 && grep_refill(f) != 0)
105  		goto error;
106  
107  	if (bufrem == 0) {
108  		/* Return zero length to indicate EOF */
109  		pc->ln.len= 0;
110  		return (bufpos);
111  	}
112  
113  	/* Look for a newline in the remaining part of the buffer */
114  	if ((p = memchr(bufpos, fileeol, bufrem)) != NULL) {
115  		++p; /* advance over newline */
116  		len = p - bufpos;
117  		if (grep_lnbufgrow(len + 1))
118  			goto error;
119  		memcpy(lnbuf, bufpos, len);
120  		bufrem -= len;
121  		bufpos = p;
122  		pc->ln.len = len;
123  		lnbuf[len] = '\0';
124  		return (lnbuf);
125  	}
126  
127  	/* We have to copy the current buffered data to the line buffer */
128  	for (len = bufrem, off = 0; ; len += bufrem) {
129  		/* Make sure there is room for more data */
130  		if (grep_lnbufgrow(len + LNBUFBUMP))
131  			goto error;
132  		memcpy(lnbuf + off, bufpos, len - off);
133  		/* With FILE_MMAP, this is EOF; there's no more to refill */
134  		if (filebehave == FILE_MMAP) {
135  			bufrem -= len;
136  			break;
137  		}
138  		off = len;
139  		/* Fetch more to try and find EOL/EOF */
140  		if (grep_refill(f) != 0)
141  			goto error;
142  		if (bufrem == 0)
143  			/* EOF: return partial line */
144  			break;
145  		if ((p = memchr(bufpos, fileeol, bufrem)) == NULL)
146  			continue;
147  		/* got it: finish up the line (like code above) */
148  		++p;
149  		diff = p - bufpos;
150  		len += diff;
151  		if (grep_lnbufgrow(len + 1))
152  		    goto error;
153  		memcpy(lnbuf + off, bufpos, diff);
154  		bufrem -= diff;
155  		bufpos = p;
156  		break;
157  	}
158  	pc->ln.len = len;
159  	lnbuf[len] = '\0';
160  	return (lnbuf);
161  
162  error:
163  	pc->ln.len = 0;
164  	return (NULL);
165  }
166  
167  /*
168   * Opens a file for processing.
169   */
170  struct file *
171  grep_open(const char *path)
172  {
173  	struct file *f;
174  
175  	f = grep_malloc(sizeof *f);
176  	memset(f, 0, sizeof *f);
177  	if (path == NULL) {
178  		/* Processing stdin implies --line-buffered. */
179  		lbflag = true;
180  		f->fd = STDIN_FILENO;
181  	} else if ((f->fd = open(path, O_RDONLY)) == -1)
182  		goto error1;
183  
184  	if (filebehave == FILE_MMAP) {
185  		struct stat st;
186  
187  		if (fstat(f->fd, &st) == -1 || !S_ISREG(st.st_mode))
188  			filebehave = FILE_STDIO;
189  		else {
190  			int flags = MAP_PRIVATE | MAP_NOCORE | MAP_NOSYNC;
191  #ifdef MAP_PREFAULT_READ
192  			flags |= MAP_PREFAULT_READ;
193  #endif
194  			fsiz = st.st_size;
195  			buffer = mmap(NULL, fsiz, PROT_READ, flags,
196  			     f->fd, (off_t)0);
197  			if (buffer == MAP_FAILED)
198  				filebehave = FILE_STDIO;
199  			else {
200  				bufrem = st.st_size;
201  				bufpos = buffer;
202  				madvise(buffer, st.st_size, MADV_SEQUENTIAL);
203  			}
204  		}
205  	}
206  
207  	if ((buffer == NULL) || (buffer == MAP_FAILED))
208  		buffer = grep_malloc(MAXBUFSIZ);
209  
210  	/* Fill read buffer, also catches errors early */
211  	if (bufrem == 0 && grep_refill(f) != 0)
212  		goto error2;
213  
214  	/* Check for binary stuff, if necessary */
215  	if (binbehave != BINFILE_TEXT && fileeol != '\0' &&
216  	    memchr(bufpos, '\0', bufrem) != NULL)
217  		f->binary = true;
218  
219  	return (f);
220  
221  error2:
222  	close(f->fd);
223  error1:
224  	free(f);
225  	return (NULL);
226  }
227  
228  /*
229   * Closes a file.
230   */
231  void
232  grep_close(struct file *f)
233  {
234  
235  	close(f->fd);
236  
237  	/* Reset read buffer and line buffer */
238  	if (filebehave == FILE_MMAP) {
239  		munmap(buffer, fsiz);
240  		buffer = NULL;
241  	}
242  	bufpos = buffer;
243  	bufrem = 0;
244  
245  	free(lnbuf);
246  	lnbuf = NULL;
247  	lnbuflen = 0;
248  }
249