xref: /freebsd/contrib/libarchive/libarchive_fe/line_reader.c (revision 39ee7a7a6bdd1557b1c3532abf60d139798ac88b)
1 /*-
2  * Copyright (c) 2008 Tim Kientzle
3  * Copyright (c) 2010 Joerg Sonnenberger
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer
11  *    in this position and unchanged.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include "lafe_platform.h"
29 __FBSDID("$FreeBSD$");
30 
31 #include <errno.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
35 
36 #include "err.h"
37 #include "line_reader.h"
38 
39 #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__BORLANDC__)
40 #define strdup _strdup
41 #endif
42 
43 /*
44  * Read lines from file and do something with each one.  If option_null
45  * is set, lines are terminated with zero bytes; otherwise, they're
46  * terminated with newlines.
47  *
48  * This uses a self-sizing buffer to handle arbitrarily-long lines.
49  */
50 struct lafe_line_reader {
51 	FILE *f;
52 	char *buff, *buff_end, *line_start, *line_end, *p;
53 	char *pathname;
54 	size_t buff_length;
55 	int nullSeparator; /* Lines separated by null, not CR/CRLF/etc. */
56 	int ret;
57 };
58 
59 struct lafe_line_reader *
60 lafe_line_reader(const char *pathname, int nullSeparator)
61 {
62 	struct lafe_line_reader *lr;
63 
64 	lr = calloc(1, sizeof(*lr));
65 	if (lr == NULL)
66 		lafe_errc(1, ENOMEM, "Can't open %s", pathname);
67 
68 	lr->nullSeparator = nullSeparator;
69 	lr->pathname = strdup(pathname);
70 
71 	if (strcmp(pathname, "-") == 0)
72 		lr->f = stdin;
73 	else
74 		lr->f = fopen(pathname, "r");
75 	if (lr->f == NULL)
76 		lafe_errc(1, errno, "Couldn't open %s", pathname);
77 	lr->buff_length = 8192;
78 	lr->line_start = lr->line_end = lr->buff_end = lr->buff = NULL;
79 
80 	return (lr);
81 }
82 
83 static void
84 lafe_line_reader_find_eol(struct lafe_line_reader *lr)
85 {
86 
87 	lr->line_end += strcspn(lr->line_end,
88 	    lr->nullSeparator ? "" : "\x0d\x0a");
89 	*lr->line_end = '\0'; /* Noop if line_end == buff_end */
90 }
91 
92 const char *
93 lafe_line_reader_next(struct lafe_line_reader *lr)
94 {
95 	size_t bytes_wanted, bytes_read, new_buff_size;
96 	char *line_start, *p;
97 
98 	for (;;) {
99 		/* If there's a line in the buffer, return it immediately. */
100 		while (lr->line_end < lr->buff_end) {
101 			line_start = lr->line_start;
102 			lr->line_start = ++lr->line_end;
103 			lafe_line_reader_find_eol(lr);
104 
105 			if (lr->nullSeparator || line_start[0] != '\0')
106 				return (line_start);
107 		}
108 
109 		/* If we're at end-of-file, process the final data. */
110 		if (lr->f == NULL) {
111 			if (lr->line_start == lr->buff_end)
112 				return (NULL); /* No more text */
113 			line_start = lr->line_start;
114 			lr->line_start = lr->buff_end;
115 			return (line_start);
116 		}
117 
118 		/* Buffer only has part of a line. */
119 		if (lr->line_start > lr->buff) {
120 			/* Move a leftover fractional line to the beginning. */
121 			memmove(lr->buff, lr->line_start,
122 			    lr->buff_end - lr->line_start);
123 			lr->buff_end -= lr->line_start - lr->buff;
124 			lr->line_end -= lr->line_start - lr->buff;
125 			lr->line_start = lr->buff;
126 		} else {
127 			/* Line is too big; enlarge the buffer. */
128 			new_buff_size = lr->buff_length * 2;
129 			if (new_buff_size <= lr->buff_length)
130 				lafe_errc(1, ENOMEM,
131 				    "Line too long in %s", lr->pathname);
132 			lr->buff_length = new_buff_size;
133 			/*
134 			 * Allocate one extra byte to allow terminating
135 			 * the buffer.
136 			 */
137 			p = realloc(lr->buff, new_buff_size + 1);
138 			if (p == NULL)
139 				lafe_errc(1, ENOMEM,
140 				    "Line too long in %s", lr->pathname);
141 			lr->buff_end = p + (lr->buff_end - lr->buff);
142 			lr->line_end = p + (lr->line_end - lr->buff);
143 			lr->line_start = lr->buff = p;
144 		}
145 
146 		/* Get some more data into the buffer. */
147 		bytes_wanted = lr->buff + lr->buff_length - lr->buff_end;
148 		bytes_read = fread(lr->buff_end, 1, bytes_wanted, lr->f);
149 		lr->buff_end += bytes_read;
150 		*lr->buff_end = '\0'; /* Always terminate buffer */
151 		lafe_line_reader_find_eol(lr);
152 
153 		if (ferror(lr->f))
154 			lafe_errc(1, errno, "Can't read %s", lr->pathname);
155 		if (feof(lr->f)) {
156 			if (lr->f != stdin)
157 				fclose(lr->f);
158 			lr->f = NULL;
159 		}
160 	}
161 }
162 
163 void
164 lafe_line_reader_free(struct lafe_line_reader *lr)
165 {
166 	free(lr->buff);
167 	free(lr->pathname);
168 	free(lr);
169 }
170