xref: /freebsd/contrib/libarchive/libarchive_fe/line_reader.c (revision b9128a37faafede823eb456aa65a11ac69997284)
1caf54c4fSMartin Matuska /*-
2caf54c4fSMartin Matuska  * Copyright (c) 2008 Tim Kientzle
36c95142eSMartin Matuska  * Copyright (c) 2010 Joerg Sonnenberger
4caf54c4fSMartin Matuska  * All rights reserved.
5caf54c4fSMartin Matuska  *
6caf54c4fSMartin Matuska  * Redistribution and use in source and binary forms, with or without
7caf54c4fSMartin Matuska  * modification, are permitted provided that the following conditions
8caf54c4fSMartin Matuska  * are met:
9caf54c4fSMartin Matuska  * 1. Redistributions of source code must retain the above copyright
10caf54c4fSMartin Matuska  *    notice, this list of conditions and the following disclaimer
11caf54c4fSMartin Matuska  *    in this position and unchanged.
12caf54c4fSMartin Matuska  * 2. Redistributions in binary form must reproduce the above copyright
13caf54c4fSMartin Matuska  *    notice, this list of conditions and the following disclaimer in the
14caf54c4fSMartin Matuska  *    documentation and/or other materials provided with the distribution.
15caf54c4fSMartin Matuska  *
16caf54c4fSMartin Matuska  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
17caf54c4fSMartin Matuska  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18caf54c4fSMartin Matuska  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19caf54c4fSMartin Matuska  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
20caf54c4fSMartin Matuska  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21caf54c4fSMartin Matuska  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22caf54c4fSMartin Matuska  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23caf54c4fSMartin Matuska  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24caf54c4fSMartin Matuska  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25caf54c4fSMartin Matuska  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26caf54c4fSMartin Matuska  */
27caf54c4fSMartin Matuska 
28caf54c4fSMartin Matuska #include "lafe_platform.h"
29caf54c4fSMartin Matuska #include <errno.h>
30caf54c4fSMartin Matuska #include <stdio.h>
31caf54c4fSMartin Matuska #include <stdlib.h>
32caf54c4fSMartin Matuska #include <string.h>
33caf54c4fSMartin Matuska 
34caf54c4fSMartin Matuska #include "err.h"
35caf54c4fSMartin Matuska #include "line_reader.h"
36caf54c4fSMartin Matuska 
37caf54c4fSMartin Matuska #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__BORLANDC__)
38caf54c4fSMartin Matuska #define strdup _strdup
39caf54c4fSMartin Matuska #endif
40caf54c4fSMartin Matuska 
41caf54c4fSMartin Matuska /*
42caf54c4fSMartin Matuska  * Read lines from file and do something with each one.  If option_null
43caf54c4fSMartin Matuska  * is set, lines are terminated with zero bytes; otherwise, they're
44caf54c4fSMartin Matuska  * terminated with newlines.
45caf54c4fSMartin Matuska  *
46caf54c4fSMartin Matuska  * This uses a self-sizing buffer to handle arbitrarily-long lines.
47caf54c4fSMartin Matuska  */
48caf54c4fSMartin Matuska struct lafe_line_reader {
49caf54c4fSMartin Matuska 	FILE *f;
50*fae5c36eSMartin Matuska 	char *buff, *buff_end, *line_start, *line_end;
51caf54c4fSMartin Matuska 	char *pathname;
52caf54c4fSMartin Matuska 	size_t buff_length;
53caf54c4fSMartin Matuska 	int nullSeparator; /* Lines separated by null, not CR/CRLF/etc. */
54caf54c4fSMartin Matuska };
55caf54c4fSMartin Matuska 
56caf54c4fSMartin Matuska struct lafe_line_reader *
57caf54c4fSMartin Matuska lafe_line_reader(const char *pathname, int nullSeparator)
58caf54c4fSMartin Matuska {
59caf54c4fSMartin Matuska 	struct lafe_line_reader *lr;
60caf54c4fSMartin Matuska 
61caf54c4fSMartin Matuska 	lr = calloc(1, sizeof(*lr));
62caf54c4fSMartin Matuska 	if (lr == NULL)
63caf54c4fSMartin Matuska 		lafe_errc(1, ENOMEM, "Can't open %s", pathname);
64caf54c4fSMartin Matuska 
65caf54c4fSMartin Matuska 	lr->nullSeparator = nullSeparator;
66caf54c4fSMartin Matuska 	lr->pathname = strdup(pathname);
67caf54c4fSMartin Matuska 
68caf54c4fSMartin Matuska 	if (strcmp(pathname, "-") == 0)
69caf54c4fSMartin Matuska 		lr->f = stdin;
70caf54c4fSMartin Matuska 	else
71caf54c4fSMartin Matuska 		lr->f = fopen(pathname, "r");
72caf54c4fSMartin Matuska 	if (lr->f == NULL)
73caf54c4fSMartin Matuska 		lafe_errc(1, errno, "Couldn't open %s", pathname);
74caf54c4fSMartin Matuska 	lr->buff_length = 8192;
756c95142eSMartin Matuska 	lr->line_start = lr->line_end = lr->buff_end = lr->buff = NULL;
76caf54c4fSMartin Matuska 
77caf54c4fSMartin Matuska 	return (lr);
78caf54c4fSMartin Matuska }
79caf54c4fSMartin Matuska 
806c95142eSMartin Matuska static void
816c95142eSMartin Matuska lafe_line_reader_find_eol(struct lafe_line_reader *lr)
826c95142eSMartin Matuska {
836c95142eSMartin Matuska 
846c95142eSMartin Matuska 	lr->line_end += strcspn(lr->line_end,
856c95142eSMartin Matuska 	    lr->nullSeparator ? "" : "\x0d\x0a");
866c95142eSMartin Matuska 	*lr->line_end = '\0'; /* Noop if line_end == buff_end */
876c95142eSMartin Matuska }
886c95142eSMartin Matuska 
89caf54c4fSMartin Matuska const char *
90caf54c4fSMartin Matuska lafe_line_reader_next(struct lafe_line_reader *lr)
91caf54c4fSMartin Matuska {
92caf54c4fSMartin Matuska 	size_t bytes_wanted, bytes_read, new_buff_size;
93caf54c4fSMartin Matuska 	char *line_start, *p;
94caf54c4fSMartin Matuska 
95caf54c4fSMartin Matuska 	for (;;) {
96caf54c4fSMartin Matuska 		/* If there's a line in the buffer, return it immediately. */
97caf54c4fSMartin Matuska 		while (lr->line_end < lr->buff_end) {
98caf54c4fSMartin Matuska 			line_start = lr->line_start;
996c95142eSMartin Matuska 			lr->line_start = ++lr->line_end;
1006c95142eSMartin Matuska 			lafe_line_reader_find_eol(lr);
1016c95142eSMartin Matuska 
1026c95142eSMartin Matuska 			if (lr->nullSeparator || line_start[0] != '\0')
103caf54c4fSMartin Matuska 				return (line_start);
104caf54c4fSMartin Matuska 		}
105caf54c4fSMartin Matuska 
106caf54c4fSMartin Matuska 		/* If we're at end-of-file, process the final data. */
107caf54c4fSMartin Matuska 		if (lr->f == NULL) {
1086c95142eSMartin Matuska 			if (lr->line_start == lr->buff_end)
1096c95142eSMartin Matuska 				return (NULL); /* No more text */
110caf54c4fSMartin Matuska 			line_start = lr->line_start;
1116c95142eSMartin Matuska 			lr->line_start = lr->buff_end;
112caf54c4fSMartin Matuska 			return (line_start);
113caf54c4fSMartin Matuska 		}
114caf54c4fSMartin Matuska 
115caf54c4fSMartin Matuska 		/* Buffer only has part of a line. */
116caf54c4fSMartin Matuska 		if (lr->line_start > lr->buff) {
117caf54c4fSMartin Matuska 			/* Move a leftover fractional line to the beginning. */
118caf54c4fSMartin Matuska 			memmove(lr->buff, lr->line_start,
119caf54c4fSMartin Matuska 			    lr->buff_end - lr->line_start);
120caf54c4fSMartin Matuska 			lr->buff_end -= lr->line_start - lr->buff;
121caf54c4fSMartin Matuska 			lr->line_end -= lr->line_start - lr->buff;
122caf54c4fSMartin Matuska 			lr->line_start = lr->buff;
123caf54c4fSMartin Matuska 		} else {
124caf54c4fSMartin Matuska 			/* Line is too big; enlarge the buffer. */
125caf54c4fSMartin Matuska 			new_buff_size = lr->buff_length * 2;
126caf54c4fSMartin Matuska 			if (new_buff_size <= lr->buff_length)
127caf54c4fSMartin Matuska 				lafe_errc(1, ENOMEM,
128caf54c4fSMartin Matuska 				    "Line too long in %s", lr->pathname);
129caf54c4fSMartin Matuska 			lr->buff_length = new_buff_size;
1306c95142eSMartin Matuska 			/*
1316c95142eSMartin Matuska 			 * Allocate one extra byte to allow terminating
1326c95142eSMartin Matuska 			 * the buffer.
1336c95142eSMartin Matuska 			 */
1346c95142eSMartin Matuska 			p = realloc(lr->buff, new_buff_size + 1);
135caf54c4fSMartin Matuska 			if (p == NULL)
136caf54c4fSMartin Matuska 				lafe_errc(1, ENOMEM,
137caf54c4fSMartin Matuska 				    "Line too long in %s", lr->pathname);
138caf54c4fSMartin Matuska 			lr->buff_end = p + (lr->buff_end - lr->buff);
139caf54c4fSMartin Matuska 			lr->line_end = p + (lr->line_end - lr->buff);
140caf54c4fSMartin Matuska 			lr->line_start = lr->buff = p;
141caf54c4fSMartin Matuska 		}
142caf54c4fSMartin Matuska 
143caf54c4fSMartin Matuska 		/* Get some more data into the buffer. */
144caf54c4fSMartin Matuska 		bytes_wanted = lr->buff + lr->buff_length - lr->buff_end;
145caf54c4fSMartin Matuska 		bytes_read = fread(lr->buff_end, 1, bytes_wanted, lr->f);
146caf54c4fSMartin Matuska 		lr->buff_end += bytes_read;
1476c95142eSMartin Matuska 		*lr->buff_end = '\0'; /* Always terminate buffer */
1486c95142eSMartin Matuska 		lafe_line_reader_find_eol(lr);
149caf54c4fSMartin Matuska 
150caf54c4fSMartin Matuska 		if (ferror(lr->f))
151caf54c4fSMartin Matuska 			lafe_errc(1, errno, "Can't read %s", lr->pathname);
152caf54c4fSMartin Matuska 		if (feof(lr->f)) {
153caf54c4fSMartin Matuska 			if (lr->f != stdin)
154caf54c4fSMartin Matuska 				fclose(lr->f);
155caf54c4fSMartin Matuska 			lr->f = NULL;
156caf54c4fSMartin Matuska 		}
157caf54c4fSMartin Matuska 	}
158caf54c4fSMartin Matuska }
159caf54c4fSMartin Matuska 
160caf54c4fSMartin Matuska void
161caf54c4fSMartin Matuska lafe_line_reader_free(struct lafe_line_reader *lr)
162caf54c4fSMartin Matuska {
163caf54c4fSMartin Matuska 	free(lr->buff);
164caf54c4fSMartin Matuska 	free(lr->pathname);
165caf54c4fSMartin Matuska 	free(lr);
166caf54c4fSMartin Matuska }
167