1b66a823bSGabor Kovesdan /* $NetBSD: file.c,v 1.5 2011/02/16 18:35:39 joerg Exp $ */ 2b66a823bSGabor Kovesdan /* $FreeBSD$ */ 34dc88ebeSGabor Kovesdan /* $OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $ */ 44dc88ebeSGabor Kovesdan 54dc88ebeSGabor Kovesdan /*- 61de7b4b8SPedro F. Giffuni * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 71de7b4b8SPedro F. Giffuni * 8a0ef9ad6SDag-Erling Smørgrav * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav 93ed1008bSGabor Kovesdan * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org> 103ed1008bSGabor Kovesdan * Copyright (C) 2010 Dimitry Andric <dimitry@andric.com> 114dc88ebeSGabor Kovesdan * All rights reserved. 124dc88ebeSGabor Kovesdan * 134dc88ebeSGabor Kovesdan * Redistribution and use in source and binary forms, with or without 144dc88ebeSGabor Kovesdan * modification, are permitted provided that the following conditions 154dc88ebeSGabor Kovesdan * are met: 164dc88ebeSGabor Kovesdan * 1. Redistributions of source code must retain the above copyright 174dc88ebeSGabor Kovesdan * notice, this list of conditions and the following disclaimer. 184dc88ebeSGabor Kovesdan * 2. Redistributions in binary form must reproduce the above copyright 194dc88ebeSGabor Kovesdan * notice, this list of conditions and the following disclaimer in the 204dc88ebeSGabor Kovesdan * documentation and/or other materials provided with the distribution. 214dc88ebeSGabor Kovesdan * 224dc88ebeSGabor Kovesdan * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 234dc88ebeSGabor Kovesdan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 244dc88ebeSGabor Kovesdan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 254dc88ebeSGabor Kovesdan * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 264dc88ebeSGabor Kovesdan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 274dc88ebeSGabor Kovesdan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 284dc88ebeSGabor Kovesdan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 294dc88ebeSGabor Kovesdan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 304dc88ebeSGabor Kovesdan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 314dc88ebeSGabor Kovesdan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 324dc88ebeSGabor Kovesdan * SUCH DAMAGE. 334dc88ebeSGabor Kovesdan */ 344dc88ebeSGabor Kovesdan 354dc88ebeSGabor Kovesdan #include <sys/cdefs.h> 364dc88ebeSGabor Kovesdan __FBSDID("$FreeBSD$"); 374dc88ebeSGabor Kovesdan 384dc88ebeSGabor Kovesdan #include <sys/param.h> 39f20f6f3fSGabor Kovesdan #include <sys/mman.h> 404dc88ebeSGabor Kovesdan #include <sys/stat.h> 41f20f6f3fSGabor Kovesdan #include <sys/types.h> 424dc88ebeSGabor Kovesdan 434dc88ebeSGabor Kovesdan #include <err.h> 444dc88ebeSGabor Kovesdan #include <errno.h> 453ed1008bSGabor Kovesdan #include <fcntl.h> 463ed1008bSGabor Kovesdan #include <stddef.h> 474dc88ebeSGabor Kovesdan #include <stdlib.h> 484dc88ebeSGabor Kovesdan #include <string.h> 494dc88ebeSGabor Kovesdan #include <unistd.h> 504dc88ebeSGabor Kovesdan #include <wchar.h> 514dc88ebeSGabor Kovesdan #include <wctype.h> 52afbbd357SGabor Kovesdan 534dc88ebeSGabor Kovesdan #include "grep.h" 544dc88ebeSGabor Kovesdan 553ed1008bSGabor Kovesdan #define MAXBUFSIZ (32 * 1024) 563ed1008bSGabor Kovesdan #define LNBUFBUMP 80 574dc88ebeSGabor Kovesdan 5886ce5365SAlex Richardson static char *buffer; 5986ce5365SAlex Richardson static char *bufpos; 603ed1008bSGabor Kovesdan static size_t bufrem; 61f20f6f3fSGabor Kovesdan static size_t fsiz; 623ed1008bSGabor Kovesdan 6386ce5365SAlex Richardson static char *lnbuf; 644dc88ebeSGabor Kovesdan static size_t lnbuflen; 654dc88ebeSGabor Kovesdan 6659218eb7SGabor Kovesdan static inline int 673ed1008bSGabor Kovesdan grep_refill(struct file *f) 684dc88ebeSGabor Kovesdan { 693ed1008bSGabor Kovesdan ssize_t nr; 704dc88ebeSGabor Kovesdan 71f20f6f3fSGabor Kovesdan if (filebehave == FILE_MMAP) 72f20f6f3fSGabor Kovesdan return (0); 73f20f6f3fSGabor Kovesdan 743ed1008bSGabor Kovesdan bufpos = buffer; 753ed1008bSGabor Kovesdan bufrem = 0; 764dc88ebeSGabor Kovesdan 773ed1008bSGabor Kovesdan nr = read(f->fd, buffer, MAXBUFSIZ); 783ed1008bSGabor Kovesdan if (nr < 0) 793ed1008bSGabor Kovesdan return (-1); 803ed1008bSGabor Kovesdan 813ed1008bSGabor Kovesdan bufrem = nr; 823ed1008bSGabor Kovesdan return (0); 834dc88ebeSGabor Kovesdan } 844dc88ebeSGabor Kovesdan 853ed1008bSGabor Kovesdan static inline int 863ed1008bSGabor Kovesdan grep_lnbufgrow(size_t newlen) 874dc88ebeSGabor Kovesdan { 884dc88ebeSGabor Kovesdan 893ed1008bSGabor Kovesdan if (lnbuflen < newlen) { 903ed1008bSGabor Kovesdan lnbuf = grep_realloc(lnbuf, newlen); 913ed1008bSGabor Kovesdan lnbuflen = newlen; 924dc88ebeSGabor Kovesdan } 934dc88ebeSGabor Kovesdan 943ed1008bSGabor Kovesdan return (0); 953ed1008bSGabor Kovesdan } 963ed1008bSGabor Kovesdan 973ed1008bSGabor Kovesdan char * 98bd60b9b4SKyle Evans grep_fgetln(struct file *f, struct parsec *pc) 993ed1008bSGabor Kovesdan { 10086ce5365SAlex Richardson char *p; 1013ed1008bSGabor Kovesdan size_t len; 1023ed1008bSGabor Kovesdan size_t off; 1033ed1008bSGabor Kovesdan ptrdiff_t diff; 1043ed1008bSGabor Kovesdan 1053ed1008bSGabor Kovesdan /* Fill the buffer, if necessary */ 1063ed1008bSGabor Kovesdan if (bufrem == 0 && grep_refill(f) != 0) 1073ed1008bSGabor Kovesdan goto error; 1083ed1008bSGabor Kovesdan 1093ed1008bSGabor Kovesdan if (bufrem == 0) { 1103ed1008bSGabor Kovesdan /* Return zero length to indicate EOF */ 111bd60b9b4SKyle Evans pc->ln.len= 0; 1123ed1008bSGabor Kovesdan return (bufpos); 1133ed1008bSGabor Kovesdan } 1143ed1008bSGabor Kovesdan 115d1a920b4SKyle Evans /* Look for a newline in the remaining part of the buffer */ 1165ee1ea02SEd Maste if ((p = memchr(bufpos, fileeol, bufrem)) != NULL) { 1173ed1008bSGabor Kovesdan ++p; /* advance over newline */ 1183ed1008bSGabor Kovesdan len = p - bufpos; 11981c3f641SAlex Richardson if (grep_lnbufgrow(len + 1)) 12081c3f641SAlex Richardson goto error; 12181c3f641SAlex Richardson memcpy(lnbuf, bufpos, len); 1223ed1008bSGabor Kovesdan bufrem -= len; 1233ed1008bSGabor Kovesdan bufpos = p; 124bd60b9b4SKyle Evans pc->ln.len = len; 12581c3f641SAlex Richardson lnbuf[len] = '\0'; 12681c3f641SAlex Richardson return (lnbuf); 1273ed1008bSGabor Kovesdan } 1283ed1008bSGabor Kovesdan 1293ed1008bSGabor Kovesdan /* We have to copy the current buffered data to the line buffer */ 1303ed1008bSGabor Kovesdan for (len = bufrem, off = 0; ; len += bufrem) { 1313ed1008bSGabor Kovesdan /* Make sure there is room for more data */ 1323ed1008bSGabor Kovesdan if (grep_lnbufgrow(len + LNBUFBUMP)) 1333ed1008bSGabor Kovesdan goto error; 1343ed1008bSGabor Kovesdan memcpy(lnbuf + off, bufpos, len - off); 1359a145202SEd Maste /* With FILE_MMAP, this is EOF; there's no more to refill */ 1369a145202SEd Maste if (filebehave == FILE_MMAP) { 1379a145202SEd Maste bufrem -= len; 1389a145202SEd Maste break; 1399a145202SEd Maste } 1403ed1008bSGabor Kovesdan off = len; 1419a145202SEd Maste /* Fetch more to try and find EOL/EOF */ 1423ed1008bSGabor Kovesdan if (grep_refill(f) != 0) 1433ed1008bSGabor Kovesdan goto error; 1443ed1008bSGabor Kovesdan if (bufrem == 0) 1453ed1008bSGabor Kovesdan /* EOF: return partial line */ 1463ed1008bSGabor Kovesdan break; 1479a145202SEd Maste if ((p = memchr(bufpos, fileeol, bufrem)) == NULL) 1483ed1008bSGabor Kovesdan continue; 1493ed1008bSGabor Kovesdan /* got it: finish up the line (like code above) */ 1503ed1008bSGabor Kovesdan ++p; 1513ed1008bSGabor Kovesdan diff = p - bufpos; 1523ed1008bSGabor Kovesdan len += diff; 15381c3f641SAlex Richardson if (grep_lnbufgrow(len + 1)) 1543ed1008bSGabor Kovesdan goto error; 1553ed1008bSGabor Kovesdan memcpy(lnbuf + off, bufpos, diff); 1563ed1008bSGabor Kovesdan bufrem -= diff; 1573ed1008bSGabor Kovesdan bufpos = p; 1583ed1008bSGabor Kovesdan break; 1593ed1008bSGabor Kovesdan } 160bd60b9b4SKyle Evans pc->ln.len = len; 16181c3f641SAlex Richardson lnbuf[len] = '\0'; 1623ed1008bSGabor Kovesdan return (lnbuf); 1633ed1008bSGabor Kovesdan 1643ed1008bSGabor Kovesdan error: 165bd60b9b4SKyle Evans pc->ln.len = 0; 1663ed1008bSGabor Kovesdan return (NULL); 1673ed1008bSGabor Kovesdan } 1683ed1008bSGabor Kovesdan 1694dc88ebeSGabor Kovesdan /* 1703ed1008bSGabor Kovesdan * Opens a file for processing. 1714dc88ebeSGabor Kovesdan */ 1724dc88ebeSGabor Kovesdan struct file * 1734dc88ebeSGabor Kovesdan grep_open(const char *path) 1744dc88ebeSGabor Kovesdan { 1754dc88ebeSGabor Kovesdan struct file *f; 1764dc88ebeSGabor Kovesdan 1774dc88ebeSGabor Kovesdan f = grep_malloc(sizeof *f); 1783ed1008bSGabor Kovesdan memset(f, 0, sizeof *f); 1793ed1008bSGabor Kovesdan if (path == NULL) { 1803ed1008bSGabor Kovesdan /* Processing stdin implies --line-buffered. */ 1813ed1008bSGabor Kovesdan lbflag = true; 1823ed1008bSGabor Kovesdan f->fd = STDIN_FILENO; 183f20f6f3fSGabor Kovesdan } else if ((f->fd = open(path, O_RDONLY)) == -1) 184f20f6f3fSGabor Kovesdan goto error1; 185f20f6f3fSGabor Kovesdan 186f20f6f3fSGabor Kovesdan if (filebehave == FILE_MMAP) { 187f20f6f3fSGabor Kovesdan struct stat st; 188f20f6f3fSGabor Kovesdan 189*b6175849SKyle Evans if (fstat(f->fd, &st) == -1 || !S_ISREG(st.st_mode)) 190f20f6f3fSGabor Kovesdan filebehave = FILE_STDIO; 191f20f6f3fSGabor Kovesdan else { 192f20f6f3fSGabor Kovesdan int flags = MAP_PRIVATE | MAP_NOCORE | MAP_NOSYNC; 193f20f6f3fSGabor Kovesdan #ifdef MAP_PREFAULT_READ 194f20f6f3fSGabor Kovesdan flags |= MAP_PREFAULT_READ; 195f20f6f3fSGabor Kovesdan #endif 196f20f6f3fSGabor Kovesdan fsiz = st.st_size; 197f20f6f3fSGabor Kovesdan buffer = mmap(NULL, fsiz, PROT_READ, flags, 198f20f6f3fSGabor Kovesdan f->fd, (off_t)0); 199f20f6f3fSGabor Kovesdan if (buffer == MAP_FAILED) 200f20f6f3fSGabor Kovesdan filebehave = FILE_STDIO; 201f20f6f3fSGabor Kovesdan else { 202f20f6f3fSGabor Kovesdan bufrem = st.st_size; 203f20f6f3fSGabor Kovesdan bufpos = buffer; 204f20f6f3fSGabor Kovesdan madvise(buffer, st.st_size, MADV_SEQUENTIAL); 205f20f6f3fSGabor Kovesdan } 206f20f6f3fSGabor Kovesdan } 2074dc88ebeSGabor Kovesdan } 2084dc88ebeSGabor Kovesdan 209f20f6f3fSGabor Kovesdan if ((buffer == NULL) || (buffer == MAP_FAILED)) 210f20f6f3fSGabor Kovesdan buffer = grep_malloc(MAXBUFSIZ); 211f20f6f3fSGabor Kovesdan 212f20f6f3fSGabor Kovesdan /* Fill read buffer, also catches errors early */ 213f20f6f3fSGabor Kovesdan if (bufrem == 0 && grep_refill(f) != 0) 214f20f6f3fSGabor Kovesdan goto error2; 215f20f6f3fSGabor Kovesdan 216f20f6f3fSGabor Kovesdan /* Check for binary stuff, if necessary */ 2175ee1ea02SEd Maste if (binbehave != BINFILE_TEXT && fileeol != '\0' && 2185ee1ea02SEd Maste memchr(bufpos, '\0', bufrem) != NULL) 219f20f6f3fSGabor Kovesdan f->binary = true; 220f20f6f3fSGabor Kovesdan 221f20f6f3fSGabor Kovesdan return (f); 222f20f6f3fSGabor Kovesdan 223f20f6f3fSGabor Kovesdan error2: 224f20f6f3fSGabor Kovesdan close(f->fd); 225f20f6f3fSGabor Kovesdan error1: 226f20f6f3fSGabor Kovesdan free(f); 227f20f6f3fSGabor Kovesdan return (NULL); 2283ed1008bSGabor Kovesdan } 2293ed1008bSGabor Kovesdan 2304dc88ebeSGabor Kovesdan /* 2313ed1008bSGabor Kovesdan * Closes a file. 2324dc88ebeSGabor Kovesdan */ 2334dc88ebeSGabor Kovesdan void 2344dc88ebeSGabor Kovesdan grep_close(struct file *f) 2354dc88ebeSGabor Kovesdan { 2364dc88ebeSGabor Kovesdan 2373ed1008bSGabor Kovesdan close(f->fd); 2384dc88ebeSGabor Kovesdan 2393ed1008bSGabor Kovesdan /* Reset read buffer and line buffer */ 240f20f6f3fSGabor Kovesdan if (filebehave == FILE_MMAP) { 241f20f6f3fSGabor Kovesdan munmap(buffer, fsiz); 242f20f6f3fSGabor Kovesdan buffer = NULL; 243f20f6f3fSGabor Kovesdan } 2443ed1008bSGabor Kovesdan bufpos = buffer; 2453ed1008bSGabor Kovesdan bufrem = 0; 2463ed1008bSGabor Kovesdan 2473ed1008bSGabor Kovesdan free(lnbuf); 2483ed1008bSGabor Kovesdan lnbuf = NULL; 2493ed1008bSGabor Kovesdan lnbuflen = 0; 2504dc88ebeSGabor Kovesdan } 251