xref: /freebsd/usr.bin/grep/file.c (revision 9b1f97079c5255ef7ea87fa302faccaf93b4bd2a)
1 /*	$OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $	*/
2 
3 /*-
4  * Copyright (c) 1999 James Howard and Dag-Erling Co�dan Sm�rgrav
5  * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/param.h>
34 #include <sys/types.h>
35 #include <sys/stat.h>
36 
37 #include <bzlib.h>
38 #include <err.h>
39 #include <errno.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <unistd.h>
44 #include <wchar.h>
45 #include <wctype.h>
46 #include <zlib.h>
47 
48 #include "grep.h"
49 
50 static char	 fname[MAXPATHLEN];	/* file name */
51 
52 #define		 MAXBUFSIZ	(16 * 1024)
53 #define		 PREREAD_M	0.2
54 
55 /* Some global variables for the buffering and reading. */
56 static char	*lnbuf;
57 static size_t	 lnbuflen;
58 static unsigned char *binbuf;
59 static int	 binbufsiz;
60 unsigned char	*binbufptr;
61 static int	 bzerr;
62 
63 #define iswbinary(ch)	(!iswspace((ch)) && iswcntrl((ch)) && \
64 			    (ch != L'\b') && (ch != L'\0'))
65 
66 /*
67  * Returns a single character according to the file type.
68  * Returns -1 on failure.
69  */
70 static inline int
71 grep_fgetc(struct file *f)
72 {
73 	unsigned char c;
74 
75 	switch (filebehave) {
76 	case FILE_STDIO:
77 		return (getc_unlocked(f->f));
78 	case FILE_GZIP:
79 		return (gzgetc(f->gzf));
80 	case FILE_BZIP:
81 		BZ2_bzRead(&bzerr, f->bzf, &c, 1);
82 		if (bzerr == BZ_STREAM_END)
83 			return (-1);
84 		else if (bzerr != BZ_SEQUENCE_ERROR && bzerr != BZ_OK)
85 			errx(2, "%s", getstr(2));
86 		return (c);
87 	}
88 	return (-1);
89 }
90 
91 /*
92  * Returns true if the file position is a EOF, returns false
93  * otherwise.
94  */
95 static inline int
96 grep_feof(struct file *f)
97 {
98 
99 	switch (filebehave) {
100 	case FILE_STDIO:
101 		return (feof_unlocked(f->f));
102 	case FILE_GZIP:
103 		return (gzeof(f->gzf));
104 	case FILE_BZIP:
105 		return (bzerr == BZ_STREAM_END);
106 	}
107 	return (1);
108 }
109 
110 /*
111  * At the first call, fills in an internal buffer and checks if the given
112  * file is a binary file and sets the binary flag accordingly.  Then returns
113  * a single line and sets len to the length of the returned line.
114  * At any other call returns a single line either from the internal buffer
115  * or from the file if the buffer is exhausted and sets len to the length
116  * of the line.
117  */
118 char *
119 grep_fgetln(struct file *f, size_t *len)
120 {
121 	struct stat st;
122 	size_t bufsiz, i = 0;
123 	int ch = 0;
124 
125 	/* Fill in the buffer if it is empty. */
126 	if (binbufptr == NULL) {
127 
128 		/* Only pre-read to the buffer if we need the binary check. */
129 		if (binbehave != BINFILE_TEXT) {
130 			if (f->stdin)
131 				st.st_size = MAXBUFSIZ;
132 			else if (stat(fname, &st) != 0)
133 				err(2, NULL);
134 			/* no need to allocate buffer. */
135 			if (st.st_size == 0)
136 				return (NULL);
137 
138 			bufsiz = (MAXBUFSIZ > (st.st_size * PREREAD_M)) ?
139 			    (st.st_size / 2) : MAXBUFSIZ;
140 
141 			binbuf = grep_malloc(sizeof(char) * bufsiz);
142 
143 			while (i < bufsiz) {
144 				ch = grep_fgetc(f);
145 				if (ch == EOF)
146 					break;
147 				binbuf[i++] = ch;
148 				if ((ch == '\n') && lbflag)
149 					break;
150 			}
151 
152 			f->binary = memchr(binbuf, (filebehave != FILE_GZIP) ?
153 			    '\0' : '\200', i - 1) != NULL;
154 		}
155 		binbufsiz = i;
156 		binbufptr = binbuf;
157 	}
158 
159 	/* Read a line whether from the buffer or from the file itself. */
160 	for (i = 0; !(grep_feof(f) &&
161 	    (binbufptr == &binbuf[binbufsiz])); i++) {
162 		if (binbufptr == &binbuf[binbufsiz]) {
163 			ch = grep_fgetc(f);
164 		} else {
165 			ch = binbufptr[0];
166 			binbufptr++;
167 		}
168 		if (i >= lnbuflen) {
169 			lnbuflen *= 2;
170 			lnbuf = grep_realloc(lnbuf, ++lnbuflen);
171 		}
172 		if ((ch == '\n') || (ch == EOF)) {
173 			lnbuf[i] = '\0';
174 			break;
175 		} else
176 			lnbuf[i] = ch;
177 	}
178 	if (grep_feof(f) && (i == 0) && (ch != '\n'))
179 		return (NULL);
180 	*len = i;
181 	return (lnbuf);
182 }
183 
184 /*
185  * Opens the standard input for processing.
186  */
187 struct file *
188 grep_stdin_open(void)
189 {
190 	struct file *f;
191 
192 	/* Processing stdin implies --line-buffered for tail -f to work. */
193 	lbflag = true;
194 
195 	snprintf(fname, sizeof fname, "%s", getstr(1));
196 
197 	f = grep_malloc(sizeof *f);
198 
199 	binbuf = NULL;
200 	if ((f->f = fdopen(STDIN_FILENO, "r")) != NULL) {
201 		flockfile(f->f);
202 		f->stdin = true;
203 		return (f);
204 	}
205 
206 	free(f);
207 	return (NULL);
208 }
209 
210 /*
211  * Opens a normal, a gzipped or a bzip2 compressed file for processing.
212  */
213 struct file *
214 grep_open(const char *path)
215 {
216 	struct file *f;
217 
218 	snprintf(fname, sizeof fname, "%s", path);
219 
220 	f = grep_malloc(sizeof *f);
221 
222 	binbuf = NULL;
223 	f->stdin = false;
224 	switch (filebehave) {
225 	case FILE_STDIO:
226 		if ((f->f = fopen(path, "r")) != NULL) {
227 			flockfile(f->f);
228 			return (f);
229 		}
230 		break;
231 	case FILE_GZIP:
232 		if ((f->gzf = gzopen(fname, "r")) != NULL)
233 			return (f);
234 		break;
235 	case FILE_BZIP:
236 		if ((f->bzf = BZ2_bzopen(fname, "r")) != NULL)
237 			return (f);
238 		break;
239 	}
240 
241 	free(f);
242 	return (NULL);
243 }
244 
245 /*
246  * Closes a normal, a gzipped or a bzip2 compressed file.
247  */
248 void
249 grep_close(struct file *f)
250 {
251 
252 	switch (filebehave) {
253 	case FILE_STDIO:
254 		funlockfile(f->f);
255 		fclose(f->f);
256 		break;
257 	case FILE_GZIP:
258 		gzclose(f->gzf);
259 		break;
260 	case FILE_BZIP:
261 		BZ2_bzclose(f->bzf);
262 		break;
263 	}
264 
265 	/* Reset read buffer for the file we are closing */
266 	binbufptr = NULL;
267 	free(binbuf);
268 }
269