1caf54c4fSMartin Matuska /*- 2caf54c4fSMartin Matuska * Copyright (c) 2008 Tim Kientzle 36c95142eSMartin Matuska * Copyright (c) 2010 Joerg Sonnenberger 4caf54c4fSMartin Matuska * All rights reserved. 5caf54c4fSMartin Matuska * 6caf54c4fSMartin Matuska * Redistribution and use in source and binary forms, with or without 7caf54c4fSMartin Matuska * modification, are permitted provided that the following conditions 8caf54c4fSMartin Matuska * are met: 9caf54c4fSMartin Matuska * 1. Redistributions of source code must retain the above copyright 10caf54c4fSMartin Matuska * notice, this list of conditions and the following disclaimer 11caf54c4fSMartin Matuska * in this position and unchanged. 12caf54c4fSMartin Matuska * 2. Redistributions in binary form must reproduce the above copyright 13caf54c4fSMartin Matuska * notice, this list of conditions and the following disclaimer in the 14caf54c4fSMartin Matuska * documentation and/or other materials provided with the distribution. 15caf54c4fSMartin Matuska * 16caf54c4fSMartin Matuska * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 17caf54c4fSMartin Matuska * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18caf54c4fSMartin Matuska * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19caf54c4fSMartin Matuska * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 20caf54c4fSMartin Matuska * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21caf54c4fSMartin Matuska * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22caf54c4fSMartin Matuska * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23caf54c4fSMartin Matuska * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24caf54c4fSMartin Matuska * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25caf54c4fSMartin Matuska * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26caf54c4fSMartin Matuska */ 27caf54c4fSMartin Matuska 28caf54c4fSMartin Matuska #include "lafe_platform.h" 29caf54c4fSMartin Matuska #include <errno.h> 30caf54c4fSMartin Matuska #include <stdio.h> 31caf54c4fSMartin Matuska #include <stdlib.h> 32caf54c4fSMartin Matuska #include <string.h> 33caf54c4fSMartin Matuska 34caf54c4fSMartin Matuska #include "err.h" 35caf54c4fSMartin Matuska #include "line_reader.h" 36caf54c4fSMartin Matuska 37caf54c4fSMartin Matuska #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__BORLANDC__) 38caf54c4fSMartin Matuska #define strdup _strdup 39caf54c4fSMartin Matuska #endif 40caf54c4fSMartin Matuska 41caf54c4fSMartin Matuska /* 42caf54c4fSMartin Matuska * Read lines from file and do something with each one. If option_null 43caf54c4fSMartin Matuska * is set, lines are terminated with zero bytes; otherwise, they're 44caf54c4fSMartin Matuska * terminated with newlines. 45caf54c4fSMartin Matuska * 46caf54c4fSMartin Matuska * This uses a self-sizing buffer to handle arbitrarily-long lines. 47caf54c4fSMartin Matuska */ 48caf54c4fSMartin Matuska struct lafe_line_reader { 49caf54c4fSMartin Matuska FILE *f; 50*fae5c36eSMartin Matuska char *buff, *buff_end, *line_start, *line_end; 51caf54c4fSMartin Matuska char *pathname; 52caf54c4fSMartin Matuska size_t buff_length; 53caf54c4fSMartin Matuska int nullSeparator; /* Lines separated by null, not CR/CRLF/etc. */ 54caf54c4fSMartin Matuska }; 55caf54c4fSMartin Matuska 56caf54c4fSMartin Matuska struct lafe_line_reader * 57caf54c4fSMartin Matuska lafe_line_reader(const char *pathname, int nullSeparator) 58caf54c4fSMartin Matuska { 59caf54c4fSMartin Matuska struct lafe_line_reader *lr; 60caf54c4fSMartin Matuska 61caf54c4fSMartin Matuska lr = calloc(1, sizeof(*lr)); 62caf54c4fSMartin Matuska if (lr == NULL) 63caf54c4fSMartin Matuska lafe_errc(1, ENOMEM, "Can't open %s", pathname); 64caf54c4fSMartin Matuska 65caf54c4fSMartin Matuska lr->nullSeparator = nullSeparator; 66caf54c4fSMartin Matuska lr->pathname = strdup(pathname); 67caf54c4fSMartin Matuska 68caf54c4fSMartin Matuska if (strcmp(pathname, "-") == 0) 69caf54c4fSMartin Matuska lr->f = stdin; 70caf54c4fSMartin Matuska else 71caf54c4fSMartin Matuska lr->f = fopen(pathname, "r"); 72caf54c4fSMartin Matuska if (lr->f == NULL) 73caf54c4fSMartin Matuska lafe_errc(1, errno, "Couldn't open %s", pathname); 74caf54c4fSMartin Matuska lr->buff_length = 8192; 756c95142eSMartin Matuska lr->line_start = lr->line_end = lr->buff_end = lr->buff = NULL; 76caf54c4fSMartin Matuska 77caf54c4fSMartin Matuska return (lr); 78caf54c4fSMartin Matuska } 79caf54c4fSMartin Matuska 806c95142eSMartin Matuska static void 816c95142eSMartin Matuska lafe_line_reader_find_eol(struct lafe_line_reader *lr) 826c95142eSMartin Matuska { 836c95142eSMartin Matuska 846c95142eSMartin Matuska lr->line_end += strcspn(lr->line_end, 856c95142eSMartin Matuska lr->nullSeparator ? "" : "\x0d\x0a"); 866c95142eSMartin Matuska *lr->line_end = '\0'; /* Noop if line_end == buff_end */ 876c95142eSMartin Matuska } 886c95142eSMartin Matuska 89caf54c4fSMartin Matuska const char * 90caf54c4fSMartin Matuska lafe_line_reader_next(struct lafe_line_reader *lr) 91caf54c4fSMartin Matuska { 92caf54c4fSMartin Matuska size_t bytes_wanted, bytes_read, new_buff_size; 93caf54c4fSMartin Matuska char *line_start, *p; 94caf54c4fSMartin Matuska 95caf54c4fSMartin Matuska for (;;) { 96caf54c4fSMartin Matuska /* If there's a line in the buffer, return it immediately. */ 97caf54c4fSMartin Matuska while (lr->line_end < lr->buff_end) { 98caf54c4fSMartin Matuska line_start = lr->line_start; 996c95142eSMartin Matuska lr->line_start = ++lr->line_end; 1006c95142eSMartin Matuska lafe_line_reader_find_eol(lr); 1016c95142eSMartin Matuska 1026c95142eSMartin Matuska if (lr->nullSeparator || line_start[0] != '\0') 103caf54c4fSMartin Matuska return (line_start); 104caf54c4fSMartin Matuska } 105caf54c4fSMartin Matuska 106caf54c4fSMartin Matuska /* If we're at end-of-file, process the final data. */ 107caf54c4fSMartin Matuska if (lr->f == NULL) { 1086c95142eSMartin Matuska if (lr->line_start == lr->buff_end) 1096c95142eSMartin Matuska return (NULL); /* No more text */ 110caf54c4fSMartin Matuska line_start = lr->line_start; 1116c95142eSMartin Matuska lr->line_start = lr->buff_end; 112caf54c4fSMartin Matuska return (line_start); 113caf54c4fSMartin Matuska } 114caf54c4fSMartin Matuska 115caf54c4fSMartin Matuska /* Buffer only has part of a line. */ 116caf54c4fSMartin Matuska if (lr->line_start > lr->buff) { 117caf54c4fSMartin Matuska /* Move a leftover fractional line to the beginning. */ 118caf54c4fSMartin Matuska memmove(lr->buff, lr->line_start, 119caf54c4fSMartin Matuska lr->buff_end - lr->line_start); 120caf54c4fSMartin Matuska lr->buff_end -= lr->line_start - lr->buff; 121caf54c4fSMartin Matuska lr->line_end -= lr->line_start - lr->buff; 122caf54c4fSMartin Matuska lr->line_start = lr->buff; 123caf54c4fSMartin Matuska } else { 124caf54c4fSMartin Matuska /* Line is too big; enlarge the buffer. */ 125caf54c4fSMartin Matuska new_buff_size = lr->buff_length * 2; 126caf54c4fSMartin Matuska if (new_buff_size <= lr->buff_length) 127caf54c4fSMartin Matuska lafe_errc(1, ENOMEM, 128caf54c4fSMartin Matuska "Line too long in %s", lr->pathname); 129caf54c4fSMartin Matuska lr->buff_length = new_buff_size; 1306c95142eSMartin Matuska /* 1316c95142eSMartin Matuska * Allocate one extra byte to allow terminating 1326c95142eSMartin Matuska * the buffer. 1336c95142eSMartin Matuska */ 1346c95142eSMartin Matuska p = realloc(lr->buff, new_buff_size + 1); 135caf54c4fSMartin Matuska if (p == NULL) 136caf54c4fSMartin Matuska lafe_errc(1, ENOMEM, 137caf54c4fSMartin Matuska "Line too long in %s", lr->pathname); 138caf54c4fSMartin Matuska lr->buff_end = p + (lr->buff_end - lr->buff); 139caf54c4fSMartin Matuska lr->line_end = p + (lr->line_end - lr->buff); 140caf54c4fSMartin Matuska lr->line_start = lr->buff = p; 141caf54c4fSMartin Matuska } 142caf54c4fSMartin Matuska 143caf54c4fSMartin Matuska /* Get some more data into the buffer. */ 144caf54c4fSMartin Matuska bytes_wanted = lr->buff + lr->buff_length - lr->buff_end; 145caf54c4fSMartin Matuska bytes_read = fread(lr->buff_end, 1, bytes_wanted, lr->f); 146caf54c4fSMartin Matuska lr->buff_end += bytes_read; 1476c95142eSMartin Matuska *lr->buff_end = '\0'; /* Always terminate buffer */ 1486c95142eSMartin Matuska lafe_line_reader_find_eol(lr); 149caf54c4fSMartin Matuska 150caf54c4fSMartin Matuska if (ferror(lr->f)) 151caf54c4fSMartin Matuska lafe_errc(1, errno, "Can't read %s", lr->pathname); 152caf54c4fSMartin Matuska if (feof(lr->f)) { 153caf54c4fSMartin Matuska if (lr->f != stdin) 154caf54c4fSMartin Matuska fclose(lr->f); 155caf54c4fSMartin Matuska lr->f = NULL; 156caf54c4fSMartin Matuska } 157caf54c4fSMartin Matuska } 158caf54c4fSMartin Matuska } 159caf54c4fSMartin Matuska 160caf54c4fSMartin Matuska void 161caf54c4fSMartin Matuska lafe_line_reader_free(struct lafe_line_reader *lr) 162caf54c4fSMartin Matuska { 163caf54c4fSMartin Matuska free(lr->buff); 164caf54c4fSMartin Matuska free(lr->pathname); 165caf54c4fSMartin Matuska free(lr); 166caf54c4fSMartin Matuska } 167