1 /* $OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $ */ 2 3 /*- 4 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav 5 * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org> 6 * Copyright (C) 2010 Dimitry Andric <dimitry@andric.com> 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/types.h> 36 #include <sys/stat.h> 37 38 #include <bzlib.h> 39 #include <err.h> 40 #include <errno.h> 41 #include <fcntl.h> 42 #include <stddef.h> 43 #include <stdlib.h> 44 #include <string.h> 45 #include <unistd.h> 46 #include <wchar.h> 47 #include <wctype.h> 48 #include <zlib.h> 49 50 #include "grep.h" 51 52 #define MAXBUFSIZ (32 * 1024) 53 #define LNBUFBUMP 80 54 55 static gzFile gzbufdesc; 56 static BZFILE* bzbufdesc; 57 58 static unsigned char buffer[MAXBUFSIZ]; 59 static unsigned char *bufpos; 60 static size_t bufrem; 61 62 static unsigned char *lnbuf; 63 static size_t lnbuflen; 64 65 static inline int 66 grep_refill(struct file *f) 67 { 68 ssize_t nr; 69 int bzerr; 70 71 bufpos = buffer; 72 bufrem = 0; 73 74 if (filebehave == FILE_GZIP) 75 nr = gzread(gzbufdesc, buffer, MAXBUFSIZ); 76 else if (filebehave == FILE_BZIP && bzbufdesc != NULL) { 77 nr = BZ2_bzRead(&bzerr, bzbufdesc, buffer, MAXBUFSIZ); 78 switch (bzerr) { 79 case BZ_OK: 80 case BZ_STREAM_END: 81 /* No problem, nr will be okay */ 82 break; 83 case BZ_DATA_ERROR_MAGIC: 84 /* 85 * As opposed to gzread(), which simply returns the 86 * plain file data, if it is not in the correct 87 * compressed format, BZ2_bzRead() instead aborts. 88 * 89 * So, just restart at the beginning of the file again, 90 * and use plain reads from now on. 91 */ 92 BZ2_bzReadClose(&bzerr, bzbufdesc); 93 bzbufdesc = NULL; 94 if (lseek(f->fd, 0, SEEK_SET) == -1) 95 return (-1); 96 nr = read(f->fd, buffer, MAXBUFSIZ); 97 break; 98 default: 99 /* Make sure we exit with an error */ 100 nr = -1; 101 } 102 } else 103 nr = read(f->fd, buffer, MAXBUFSIZ); 104 105 if (nr < 0) 106 return (-1); 107 108 bufrem = nr; 109 return (0); 110 } 111 112 static inline int 113 grep_lnbufgrow(size_t newlen) 114 { 115 116 if (lnbuflen < newlen) { 117 lnbuf = grep_realloc(lnbuf, newlen); 118 lnbuflen = newlen; 119 } 120 121 return (0); 122 } 123 124 char * 125 grep_fgetln(struct file *f, size_t *lenp) 126 { 127 unsigned char *p; 128 char *ret; 129 size_t len; 130 size_t off; 131 ptrdiff_t diff; 132 133 /* Fill the buffer, if necessary */ 134 if (bufrem == 0 && grep_refill(f) != 0) 135 goto error; 136 137 if (bufrem == 0) { 138 /* Return zero length to indicate EOF */ 139 *lenp = 0; 140 return (bufpos); 141 } 142 143 /* Look for a newline in the remaining part of the buffer */ 144 if ((p = memchr(bufpos, '\n', bufrem)) != NULL) { 145 ++p; /* advance over newline */ 146 ret = bufpos; 147 len = p - bufpos; 148 bufrem -= len; 149 bufpos = p; 150 *lenp = len; 151 return (ret); 152 } 153 154 /* We have to copy the current buffered data to the line buffer */ 155 for (len = bufrem, off = 0; ; len += bufrem) { 156 /* Make sure there is room for more data */ 157 if (grep_lnbufgrow(len + LNBUFBUMP)) 158 goto error; 159 memcpy(lnbuf + off, bufpos, len - off); 160 off = len; 161 if (grep_refill(f) != 0) 162 goto error; 163 if (bufrem == 0) 164 /* EOF: return partial line */ 165 break; 166 if ((p = memchr(bufpos, '\n', bufrem)) == NULL) 167 continue; 168 /* got it: finish up the line (like code above) */ 169 ++p; 170 diff = p - bufpos; 171 len += diff; 172 if (grep_lnbufgrow(len)) 173 goto error; 174 memcpy(lnbuf + off, bufpos, diff); 175 bufrem -= diff; 176 bufpos = p; 177 break; 178 } 179 *lenp = len; 180 return (lnbuf); 181 182 error: 183 *lenp = 0; 184 return (NULL); 185 } 186 187 static inline struct file * 188 grep_file_init(struct file *f) 189 { 190 191 if (filebehave == FILE_GZIP && 192 (gzbufdesc = gzdopen(f->fd, "r")) == NULL) 193 goto error; 194 195 if (filebehave == FILE_BZIP && 196 (bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL) 197 goto error; 198 199 /* Fill read buffer, also catches errors early */ 200 if (grep_refill(f) != 0) 201 goto error; 202 203 /* Check for binary stuff, if necessary */ 204 if (binbehave != BINFILE_TEXT && memchr(bufpos, '\0', bufrem) != NULL) 205 f->binary = true; 206 207 return (f); 208 error: 209 close(f->fd); 210 free(f); 211 return (NULL); 212 } 213 214 /* 215 * Opens a file for processing. 216 */ 217 struct file * 218 grep_open(const char *path) 219 { 220 struct file *f; 221 222 f = grep_malloc(sizeof *f); 223 memset(f, 0, sizeof *f); 224 if (path == NULL) { 225 /* Processing stdin implies --line-buffered. */ 226 lbflag = true; 227 f->fd = STDIN_FILENO; 228 } else if ((f->fd = open(path, O_RDONLY)) == -1) { 229 free(f); 230 return (NULL); 231 } 232 233 return (grep_file_init(f)); 234 } 235 236 /* 237 * Closes a file. 238 */ 239 void 240 grep_close(struct file *f) 241 { 242 243 close(f->fd); 244 245 /* Reset read buffer and line buffer */ 246 bufpos = buffer; 247 bufrem = 0; 248 249 free(lnbuf); 250 lnbuf = NULL; 251 lnbuflen = 0; 252 } 253