1 /* $NetBSD: file.c,v 1.5 2011/02/16 18:35:39 joerg Exp $ */ 2 /* $FreeBSD$ */ 3 /* $OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $ */ 4 5 /*- 6 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 7 * 8 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav 9 * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org> 10 * Copyright (C) 2010 Dimitry Andric <dimitry@andric.com> 11 * All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include <sys/param.h> 39 #include <sys/mman.h> 40 #include <sys/stat.h> 41 #include <sys/types.h> 42 43 #include <err.h> 44 #include <errno.h> 45 #include <fcntl.h> 46 #include <stddef.h> 47 #include <stdlib.h> 48 #include <string.h> 49 #include <unistd.h> 50 #include <wchar.h> 51 #include <wctype.h> 52 #include <zlib.h> 53 54 #ifndef WITHOUT_LZMA 55 #include <lzma.h> 56 #endif 57 58 #ifndef WITHOUT_BZIP2 59 #include <bzlib.h> 60 #endif 61 62 #include "grep.h" 63 64 #define MAXBUFSIZ (32 * 1024) 65 #define LNBUFBUMP 80 66 67 static gzFile gzbufdesc; 68 #ifndef WITHOUT_LZMA 69 static lzma_stream lstrm = LZMA_STREAM_INIT; 70 static lzma_action laction; 71 static uint8_t lin_buf[MAXBUFSIZ]; 72 #endif 73 #ifndef WITHOUT_BZIP2 74 static BZFILE* bzbufdesc; 75 #endif 76 77 static unsigned char *buffer; 78 static unsigned char *bufpos; 79 static size_t bufrem; 80 static size_t fsiz; 81 82 static unsigned char *lnbuf; 83 static size_t lnbuflen; 84 85 static inline int 86 grep_refill(struct file *f) 87 { 88 ssize_t nr; 89 90 if (filebehave == FILE_MMAP) 91 return (0); 92 93 bufpos = buffer; 94 bufrem = 0; 95 96 if (filebehave == FILE_GZIP) { 97 nr = gzread(gzbufdesc, buffer, MAXBUFSIZ); 98 #ifndef WITHOUT_BZIP2 99 } else if (filebehave == FILE_BZIP && bzbufdesc != NULL) { 100 int bzerr; 101 102 nr = BZ2_bzRead(&bzerr, bzbufdesc, buffer, MAXBUFSIZ); 103 switch (bzerr) { 104 case BZ_OK: 105 case BZ_STREAM_END: 106 /* No problem, nr will be okay */ 107 break; 108 case BZ_DATA_ERROR_MAGIC: 109 /* 110 * As opposed to gzread(), which simply returns the 111 * plain file data, if it is not in the correct 112 * compressed format, BZ2_bzRead() instead aborts. 113 * 114 * So, just restart at the beginning of the file again, 115 * and use plain reads from now on. 116 */ 117 BZ2_bzReadClose(&bzerr, bzbufdesc); 118 bzbufdesc = NULL; 119 if (lseek(f->fd, 0, SEEK_SET) == -1) 120 return (-1); 121 nr = read(f->fd, buffer, MAXBUFSIZ); 122 break; 123 default: 124 /* Make sure we exit with an error */ 125 nr = -1; 126 } 127 #endif 128 #ifndef WITHOUT_LZMA 129 } else if ((filebehave == FILE_XZ) || (filebehave == FILE_LZMA)) { 130 lzma_ret ret; 131 lstrm.next_out = buffer; 132 133 do { 134 if (lstrm.avail_in == 0) { 135 lstrm.next_in = lin_buf; 136 nr = read(f->fd, lin_buf, MAXBUFSIZ); 137 138 if (nr < 0) 139 return (-1); 140 else if (nr == 0) 141 laction = LZMA_FINISH; 142 143 lstrm.avail_in = nr; 144 } 145 146 ret = lzma_code(&lstrm, laction); 147 148 if (ret != LZMA_OK && ret != LZMA_STREAM_END) 149 return (-1); 150 151 if (lstrm.avail_out == 0 || ret == LZMA_STREAM_END) { 152 bufrem = MAXBUFSIZ - lstrm.avail_out; 153 lstrm.next_out = buffer; 154 lstrm.avail_out = MAXBUFSIZ; 155 } 156 } while (bufrem == 0 && ret != LZMA_STREAM_END); 157 158 return (0); 159 #endif /* WIHTOUT_LZMA */ 160 } else 161 nr = read(f->fd, buffer, MAXBUFSIZ); 162 163 if (nr < 0) 164 return (-1); 165 166 bufrem = nr; 167 return (0); 168 } 169 170 static inline int 171 grep_lnbufgrow(size_t newlen) 172 { 173 174 if (lnbuflen < newlen) { 175 lnbuf = grep_realloc(lnbuf, newlen); 176 lnbuflen = newlen; 177 } 178 179 return (0); 180 } 181 182 char * 183 grep_fgetln(struct file *f, size_t *lenp) 184 { 185 unsigned char *p; 186 char *ret; 187 size_t len; 188 size_t off; 189 ptrdiff_t diff; 190 191 /* Fill the buffer, if necessary */ 192 if (bufrem == 0 && grep_refill(f) != 0) 193 goto error; 194 195 if (bufrem == 0) { 196 /* Return zero length to indicate EOF */ 197 *lenp = 0; 198 return (bufpos); 199 } 200 201 /* Look for a newline in the remaining part of the buffer */ 202 if ((p = memchr(bufpos, fileeol, bufrem)) != NULL) { 203 ++p; /* advance over newline */ 204 ret = bufpos; 205 len = p - bufpos; 206 bufrem -= len; 207 bufpos = p; 208 *lenp = len; 209 return (ret); 210 } 211 212 /* We have to copy the current buffered data to the line buffer */ 213 for (len = bufrem, off = 0; ; len += bufrem) { 214 /* Make sure there is room for more data */ 215 if (grep_lnbufgrow(len + LNBUFBUMP)) 216 goto error; 217 memcpy(lnbuf + off, bufpos, len - off); 218 /* With FILE_MMAP, this is EOF; there's no more to refill */ 219 if (filebehave == FILE_MMAP) { 220 bufrem -= len; 221 break; 222 } 223 off = len; 224 /* Fetch more to try and find EOL/EOF */ 225 if (grep_refill(f) != 0) 226 goto error; 227 if (bufrem == 0) 228 /* EOF: return partial line */ 229 break; 230 if ((p = memchr(bufpos, fileeol, bufrem)) == NULL) 231 continue; 232 /* got it: finish up the line (like code above) */ 233 ++p; 234 diff = p - bufpos; 235 len += diff; 236 if (grep_lnbufgrow(len)) 237 goto error; 238 memcpy(lnbuf + off, bufpos, diff); 239 bufrem -= diff; 240 bufpos = p; 241 break; 242 } 243 *lenp = len; 244 return (lnbuf); 245 246 error: 247 *lenp = 0; 248 return (NULL); 249 } 250 251 /* 252 * Opens a file for processing. 253 */ 254 struct file * 255 grep_open(const char *path) 256 { 257 struct file *f; 258 259 f = grep_malloc(sizeof *f); 260 memset(f, 0, sizeof *f); 261 if (path == NULL) { 262 /* Processing stdin implies --line-buffered. */ 263 lbflag = true; 264 f->fd = STDIN_FILENO; 265 } else if ((f->fd = open(path, O_RDONLY)) == -1) 266 goto error1; 267 268 if (filebehave == FILE_MMAP) { 269 struct stat st; 270 271 if ((fstat(f->fd, &st) == -1) || (st.st_size > OFF_MAX) || 272 (!S_ISREG(st.st_mode))) 273 filebehave = FILE_STDIO; 274 else { 275 int flags = MAP_PRIVATE | MAP_NOCORE | MAP_NOSYNC; 276 #ifdef MAP_PREFAULT_READ 277 flags |= MAP_PREFAULT_READ; 278 #endif 279 fsiz = st.st_size; 280 buffer = mmap(NULL, fsiz, PROT_READ, flags, 281 f->fd, (off_t)0); 282 if (buffer == MAP_FAILED) 283 filebehave = FILE_STDIO; 284 else { 285 bufrem = st.st_size; 286 bufpos = buffer; 287 madvise(buffer, st.st_size, MADV_SEQUENTIAL); 288 } 289 } 290 } 291 292 if ((buffer == NULL) || (buffer == MAP_FAILED)) 293 buffer = grep_malloc(MAXBUFSIZ); 294 295 if (filebehave == FILE_GZIP && 296 (gzbufdesc = gzdopen(f->fd, "r")) == NULL) 297 goto error2; 298 299 #ifndef WITHOUT_BZIP2 300 if (filebehave == FILE_BZIP && 301 (bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL) 302 goto error2; 303 #endif 304 #ifndef WITHOUT_LZMA 305 else if ((filebehave == FILE_XZ) || (filebehave == FILE_LZMA)) { 306 lzma_ret ret; 307 308 ret = (filebehave == FILE_XZ) ? 309 lzma_stream_decoder(&lstrm, UINT64_MAX, 310 LZMA_CONCATENATED) : 311 lzma_alone_decoder(&lstrm, UINT64_MAX); 312 313 if (ret != LZMA_OK) 314 goto error2; 315 316 lstrm.avail_in = 0; 317 lstrm.avail_out = MAXBUFSIZ; 318 laction = LZMA_RUN; 319 } 320 #endif 321 322 /* Fill read buffer, also catches errors early */ 323 if (bufrem == 0 && grep_refill(f) != 0) 324 goto error2; 325 326 /* Check for binary stuff, if necessary */ 327 if (binbehave != BINFILE_TEXT && fileeol != '\0' && 328 memchr(bufpos, '\0', bufrem) != NULL) 329 f->binary = true; 330 331 return (f); 332 333 error2: 334 close(f->fd); 335 error1: 336 free(f); 337 return (NULL); 338 } 339 340 /* 341 * Closes a file. 342 */ 343 void 344 grep_close(struct file *f) 345 { 346 347 close(f->fd); 348 349 /* Reset read buffer and line buffer */ 350 if (filebehave == FILE_MMAP) { 351 munmap(buffer, fsiz); 352 buffer = NULL; 353 } 354 bufpos = buffer; 355 bufrem = 0; 356 357 free(lnbuf); 358 lnbuf = NULL; 359 lnbuflen = 0; 360 } 361