xref: /freebsd/usr.bin/grep/file.c (revision 884a2a699669ec61e2366e3e358342dbc94be24a)
1 /*	$NetBSD: file.c,v 1.5 2011/02/16 18:35:39 joerg Exp $	*/
2 /*	$FreeBSD$	*/
3 /*	$OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $	*/
4 
5 /*-
6  * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
7  * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org>
8  * Copyright (C) 2010 Dimitry Andric <dimitry@andric.com>
9  * All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35 
36 #include <sys/param.h>
37 #include <sys/types.h>
38 #include <sys/stat.h>
39 
40 #include <bzlib.h>
41 #include <err.h>
42 #include <errno.h>
43 #include <fcntl.h>
44 #include <stddef.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <unistd.h>
48 #include <wchar.h>
49 #include <wctype.h>
50 #include <zlib.h>
51 
52 #include "grep.h"
53 
54 #define	MAXBUFSIZ	(32 * 1024)
55 #define	LNBUFBUMP	80
56 
57 static gzFile gzbufdesc;
58 static BZFILE* bzbufdesc;
59 
60 static unsigned char buffer[MAXBUFSIZ];
61 static unsigned char *bufpos;
62 static size_t bufrem;
63 
64 static unsigned char *lnbuf;
65 static size_t lnbuflen;
66 
67 static inline int
68 grep_refill(struct file *f)
69 {
70 	ssize_t nr;
71 	int bzerr;
72 
73 	bufpos = buffer;
74 	bufrem = 0;
75 
76 	if (filebehave == FILE_GZIP)
77 		nr = gzread(gzbufdesc, buffer, MAXBUFSIZ);
78 	else if (filebehave == FILE_BZIP && bzbufdesc != NULL) {
79 		nr = BZ2_bzRead(&bzerr, bzbufdesc, buffer, MAXBUFSIZ);
80 		switch (bzerr) {
81 		case BZ_OK:
82 		case BZ_STREAM_END:
83 			/* No problem, nr will be okay */
84 			break;
85 		case BZ_DATA_ERROR_MAGIC:
86 			/*
87 			 * As opposed to gzread(), which simply returns the
88 			 * plain file data, if it is not in the correct
89 			 * compressed format, BZ2_bzRead() instead aborts.
90 			 *
91 			 * So, just restart at the beginning of the file again,
92 			 * and use plain reads from now on.
93 			 */
94 			BZ2_bzReadClose(&bzerr, bzbufdesc);
95 			bzbufdesc = NULL;
96 			if (lseek(f->fd, 0, SEEK_SET) == -1)
97 				return (-1);
98 			nr = read(f->fd, buffer, MAXBUFSIZ);
99 			break;
100 		default:
101 			/* Make sure we exit with an error */
102 			nr = -1;
103 		}
104 	} else
105 		nr = read(f->fd, buffer, MAXBUFSIZ);
106 
107 	if (nr < 0)
108 		return (-1);
109 
110 	bufrem = nr;
111 	return (0);
112 }
113 
114 static inline int
115 grep_lnbufgrow(size_t newlen)
116 {
117 
118 	if (lnbuflen < newlen) {
119 		lnbuf = grep_realloc(lnbuf, newlen);
120 		lnbuflen = newlen;
121 	}
122 
123 	return (0);
124 }
125 
126 char *
127 grep_fgetln(struct file *f, size_t *lenp)
128 {
129 	unsigned char *p;
130 	char *ret;
131 	size_t len;
132 	size_t off;
133 	ptrdiff_t diff;
134 
135 	/* Fill the buffer, if necessary */
136 	if (bufrem == 0 && grep_refill(f) != 0)
137 		goto error;
138 
139 	if (bufrem == 0) {
140 		/* Return zero length to indicate EOF */
141 		*lenp = 0;
142 		return (bufpos);
143 	}
144 
145 	/* Look for a newline in the remaining part of the buffer */
146 	if ((p = memchr(bufpos, '\n', bufrem)) != NULL) {
147 		++p; /* advance over newline */
148 		ret = bufpos;
149 		len = p - bufpos;
150 		bufrem -= len;
151 		bufpos = p;
152 		*lenp = len;
153 		return (ret);
154 	}
155 
156 	/* We have to copy the current buffered data to the line buffer */
157 	for (len = bufrem, off = 0; ; len += bufrem) {
158 		/* Make sure there is room for more data */
159 		if (grep_lnbufgrow(len + LNBUFBUMP))
160 			goto error;
161 		memcpy(lnbuf + off, bufpos, len - off);
162 		off = len;
163 		if (grep_refill(f) != 0)
164 			goto error;
165 		if (bufrem == 0)
166 			/* EOF: return partial line */
167 			break;
168 		if ((p = memchr(bufpos, '\n', bufrem)) == NULL)
169 			continue;
170 		/* got it: finish up the line (like code above) */
171 		++p;
172 		diff = p - bufpos;
173 		len += diff;
174 		if (grep_lnbufgrow(len))
175 		    goto error;
176 		memcpy(lnbuf + off, bufpos, diff);
177 		bufrem -= diff;
178 		bufpos = p;
179 		break;
180 	}
181 	*lenp = len;
182 	return (lnbuf);
183 
184 error:
185 	*lenp = 0;
186 	return (NULL);
187 }
188 
189 static inline struct file *
190 grep_file_init(struct file *f)
191 {
192 
193 	if (filebehave == FILE_GZIP &&
194 	    (gzbufdesc = gzdopen(f->fd, "r")) == NULL)
195 		goto error;
196 
197 	if (filebehave == FILE_BZIP &&
198 	    (bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL)
199 		goto error;
200 
201 	/* Fill read buffer, also catches errors early */
202 	if (grep_refill(f) != 0)
203 		goto error;
204 
205 	/* Check for binary stuff, if necessary */
206 	if (binbehave != BINFILE_TEXT && memchr(bufpos, '\0', bufrem) != NULL)
207 		f->binary = true;
208 
209 	return (f);
210 error:
211 	close(f->fd);
212 	free(f);
213 	return (NULL);
214 }
215 
216 /*
217  * Opens a file for processing.
218  */
219 struct file *
220 grep_open(const char *path)
221 {
222 	struct file *f;
223 
224 	f = grep_malloc(sizeof *f);
225 	memset(f, 0, sizeof *f);
226 	if (path == NULL) {
227 		/* Processing stdin implies --line-buffered. */
228 		lbflag = true;
229 		f->fd = STDIN_FILENO;
230 	} else if ((f->fd = open(path, O_RDONLY)) == -1) {
231 		free(f);
232 		return (NULL);
233 	}
234 
235 	return (grep_file_init(f));
236 }
237 
238 /*
239  * Closes a file.
240  */
241 void
242 grep_close(struct file *f)
243 {
244 
245 	close(f->fd);
246 
247 	/* Reset read buffer and line buffer */
248 	bufpos = buffer;
249 	bufrem = 0;
250 
251 	free(lnbuf);
252 	lnbuf = NULL;
253 	lnbuflen = 0;
254 }
255