xref: /freebsd/usr.bin/grep/file.c (revision a3cf0ef5a295c885c895fabfd56470c0d1db322d)
1 /*	$OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $	*/
2 
3 /*-
4  * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
5  * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org>
6  * Copyright (C) 2010 Dimitry Andric <dimitry@andric.com>
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33 
34 #include <sys/param.h>
35 #include <sys/types.h>
36 #include <sys/stat.h>
37 
38 #include <bzlib.h>
39 #include <err.h>
40 #include <errno.h>
41 #include <fcntl.h>
42 #include <stddef.h>
43 #include <stdlib.h>
44 #include <string.h>
45 #include <unistd.h>
46 #include <wchar.h>
47 #include <wctype.h>
48 #include <zlib.h>
49 
50 #include "grep.h"
51 
52 #define	MAXBUFSIZ	(32 * 1024)
53 #define	LNBUFBUMP	80
54 
55 static gzFile gzbufdesc;
56 static BZFILE* bzbufdesc;
57 
58 static unsigned char buffer[MAXBUFSIZ];
59 static unsigned char *bufpos;
60 static size_t bufrem;
61 
62 static unsigned char *lnbuf;
63 static size_t lnbuflen;
64 
65 static inline int
66 grep_refill(struct file *f)
67 {
68 	ssize_t nr;
69 	int bzerr;
70 
71 	bufpos = buffer;
72 	bufrem = 0;
73 
74 	if (filebehave == FILE_GZIP)
75 		nr = gzread(gzbufdesc, buffer, MAXBUFSIZ);
76 	else if (filebehave == FILE_BZIP && bzbufdesc != NULL) {
77 		nr = BZ2_bzRead(&bzerr, bzbufdesc, buffer, MAXBUFSIZ);
78 		switch (bzerr) {
79 		case BZ_OK:
80 		case BZ_STREAM_END:
81 			/* No problem, nr will be okay */
82 			break;
83 		case BZ_DATA_ERROR_MAGIC:
84 			/*
85 			 * As opposed to gzread(), which simply returns the
86 			 * plain file data, if it is not in the correct
87 			 * compressed format, BZ2_bzRead() instead aborts.
88 			 *
89 			 * So, just restart at the beginning of the file again,
90 			 * and use plain reads from now on.
91 			 */
92 			BZ2_bzReadClose(&bzerr, bzbufdesc);
93 			bzbufdesc = NULL;
94 			if (lseek(f->fd, 0, SEEK_SET) == -1)
95 				return (-1);
96 			nr = read(f->fd, buffer, MAXBUFSIZ);
97 			break;
98 		default:
99 			/* Make sure we exit with an error */
100 			nr = -1;
101 		}
102 	} else
103 		nr = read(f->fd, buffer, MAXBUFSIZ);
104 
105 	if (nr < 0)
106 		return (-1);
107 
108 	bufrem = nr;
109 	return (0);
110 }
111 
112 static inline int
113 grep_lnbufgrow(size_t newlen)
114 {
115 
116 	if (lnbuflen < newlen) {
117 		lnbuf = grep_realloc(lnbuf, newlen);
118 		lnbuflen = newlen;
119 	}
120 
121 	return (0);
122 }
123 
124 char *
125 grep_fgetln(struct file *f, size_t *lenp)
126 {
127 	unsigned char *p;
128 	char *ret;
129 	size_t len;
130 	size_t off;
131 	ptrdiff_t diff;
132 
133 	/* Fill the buffer, if necessary */
134 	if (bufrem == 0 && grep_refill(f) != 0)
135 		goto error;
136 
137 	if (bufrem == 0) {
138 		/* Return zero length to indicate EOF */
139 		*lenp = 0;
140 		return (bufpos);
141 	}
142 
143 	/* Look for a newline in the remaining part of the buffer */
144 	if ((p = memchr(bufpos, '\n', bufrem)) != NULL) {
145 		++p; /* advance over newline */
146 		ret = bufpos;
147 		len = p - bufpos;
148 		bufrem -= len;
149 		bufpos = p;
150 		*lenp = len;
151 		return (ret);
152 	}
153 
154 	/* We have to copy the current buffered data to the line buffer */
155 	for (len = bufrem, off = 0; ; len += bufrem) {
156 		/* Make sure there is room for more data */
157 		if (grep_lnbufgrow(len + LNBUFBUMP))
158 			goto error;
159 		memcpy(lnbuf + off, bufpos, len - off);
160 		off = len;
161 		if (grep_refill(f) != 0)
162 			goto error;
163 		if (bufrem == 0)
164 			/* EOF: return partial line */
165 			break;
166 		if ((p = memchr(bufpos, '\n', bufrem)) == NULL)
167 			continue;
168 		/* got it: finish up the line (like code above) */
169 		++p;
170 		diff = p - bufpos;
171 		len += diff;
172 		if (grep_lnbufgrow(len))
173 		    goto error;
174 		memcpy(lnbuf + off, bufpos, diff);
175 		bufrem -= diff;
176 		bufpos = p;
177 		break;
178 	}
179 	*lenp = len;
180 	return (lnbuf);
181 
182 error:
183 	*lenp = 0;
184 	return (NULL);
185 }
186 
187 static inline struct file *
188 grep_file_init(struct file *f)
189 {
190 
191 	if (filebehave == FILE_GZIP &&
192 	    (gzbufdesc = gzdopen(f->fd, "r")) == NULL)
193 		goto error;
194 
195 	if (filebehave == FILE_BZIP &&
196 	    (bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL)
197 		goto error;
198 
199 	/* Fill read buffer, also catches errors early */
200 	if (grep_refill(f) != 0)
201 		goto error;
202 
203 	/* Check for binary stuff, if necessary */
204 	if (binbehave != BINFILE_TEXT && memchr(bufpos, '\0', bufrem) != NULL)
205 		f->binary = true;
206 
207 	return (f);
208 error:
209 	close(f->fd);
210 	free(f);
211 	return (NULL);
212 }
213 
214 /*
215  * Opens a file for processing.
216  */
217 struct file *
218 grep_open(const char *path)
219 {
220 	struct file *f;
221 
222 	f = grep_malloc(sizeof *f);
223 	memset(f, 0, sizeof *f);
224 	if (path == NULL) {
225 		/* Processing stdin implies --line-buffered. */
226 		lbflag = true;
227 		f->fd = STDIN_FILENO;
228 	} else if ((f->fd = open(path, O_RDONLY)) == -1) {
229 		free(f);
230 		return (NULL);
231 	}
232 
233 	return (grep_file_init(f));
234 }
235 
236 /*
237  * Closes a file.
238  */
239 void
240 grep_close(struct file *f)
241 {
242 
243 	close(f->fd);
244 
245 	/* Reset read buffer and line buffer */
246 	bufpos = buffer;
247 	bufrem = 0;
248 
249 	free(lnbuf);
250 	lnbuf = NULL;
251 	lnbuflen = 0;
252 }
253