xref: /freebsd/contrib/libarchive/libarchive/archive_read_support_filter_gzip.c (revision bd66c1b43e33540205dbc1187c2f2a15c58b57ba)
1 /*-
2  * Copyright (c) 2003-2007 Tim Kientzle
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #include "archive_platform.h"
27 
28 #ifdef HAVE_ERRNO_H
29 #include <errno.h>
30 #endif
31 #ifdef HAVE_STDLIB_H
32 #include <stdlib.h>
33 #endif
34 #ifdef HAVE_STRING_H
35 #include <string.h>
36 #endif
37 #ifdef HAVE_LIMITS_H
38 #include <limits.h>
39 #endif
40 #ifdef HAVE_UNISTD_H
41 #include <unistd.h>
42 #endif
43 #ifdef HAVE_ZLIB_H
44 #include <zlib.h>
45 #endif
46 
47 #include "archive.h"
48 #include "archive_entry.h"
49 #include "archive_endian.h"
50 #include "archive_private.h"
51 #include "archive_read_private.h"
52 
53 #ifdef HAVE_ZLIB_H
54 struct private_data {
55 	z_stream	 stream;
56 	char		 in_stream;
57 	unsigned char	*out_block;
58 	size_t		 out_block_size;
59 	int64_t		 total_out;
60 	unsigned long	 crc;
61 	uint32_t	 mtime;
62 	char		*name;
63 	char		 eof; /* True = found end of compressed data. */
64 };
65 
66 /* Gzip Filter. */
67 static ssize_t	gzip_filter_read(struct archive_read_filter *, const void **);
68 static int	gzip_filter_close(struct archive_read_filter *);
69 #endif
70 
71 /*
72  * Note that we can detect gzip archives even if we can't decompress
73  * them.  (In fact, we like detecting them because we can give better
74  * error messages.)  So the bid framework here gets compiled even
75  * if zlib is unavailable.
76  *
77  * TODO: If zlib is unavailable, gzip_bidder_init() should
78  * use the compress_program framework to try to fire up an external
79  * gzip program.
80  */
81 static int	gzip_bidder_bid(struct archive_read_filter_bidder *,
82 		    struct archive_read_filter *);
83 static int	gzip_bidder_init(struct archive_read_filter *);
84 
85 #if ARCHIVE_VERSION_NUMBER < 4000000
86 /* Deprecated; remove in libarchive 4.0 */
87 int
archive_read_support_compression_gzip(struct archive * a)88 archive_read_support_compression_gzip(struct archive *a)
89 {
90 	return archive_read_support_filter_gzip(a);
91 }
92 #endif
93 
94 static const struct archive_read_filter_bidder_vtable
95 gzip_bidder_vtable = {
96 	.bid = gzip_bidder_bid,
97 	.init = gzip_bidder_init,
98 };
99 
100 int
archive_read_support_filter_gzip(struct archive * _a)101 archive_read_support_filter_gzip(struct archive *_a)
102 {
103 	struct archive_read *a = (struct archive_read *)_a;
104 
105 	if (__archive_read_register_bidder(a, NULL, "gzip",
106 				&gzip_bidder_vtable) != ARCHIVE_OK)
107 		return (ARCHIVE_FATAL);
108 
109 	/* Signal the extent of gzip support with the return value here. */
110 #if HAVE_ZLIB_H
111 	return (ARCHIVE_OK);
112 #else
113 	archive_set_error(_a, ARCHIVE_ERRNO_MISC,
114 	    "Using external gzip program");
115 	return (ARCHIVE_WARN);
116 #endif
117 }
118 
119 /*
120  * Read and verify the header.
121  *
122  * Returns zero if the header couldn't be validated, else returns
123  * number of bytes in header.  If pbits is non-NULL, it receives a
124  * count of bits verified, suitable for use by bidder.
125  */
126 #define MAX_FILENAME_LENGTH (1024 * 1024L)
127 #define MAX_COMMENT_LENGTH (1024 * 1024L)
128 static ssize_t
peek_at_header(struct archive_read_filter * filter,int * pbits,struct private_data * state)129 peek_at_header(struct archive_read_filter *filter, int *pbits,
130 #ifdef HAVE_ZLIB_H
131 	       struct private_data *state
132 #else
133 	       void *state
134 #endif
135 	      )
136 {
137 	const unsigned char *p;
138 	ssize_t avail, len;
139 	int bits = 0;
140 	int header_flags;
141 #ifndef HAVE_ZLIB_H
142 	(void)state; /* UNUSED */
143 #endif
144 
145 	/* Start by looking at the first ten bytes of the header, which
146 	 * is all fixed layout. */
147 	len = 10;
148 	p = __archive_read_filter_ahead(filter, len, &avail);
149 	if (p == NULL || avail == 0)
150 		return (0);
151 	/* We only support deflation- third byte must be 0x08. */
152 	if (memcmp(p, "\x1F\x8B\x08", 3) != 0)
153 		return (0);
154 	bits += 24;
155 	if ((p[3] & 0xE0)!= 0)	/* No reserved flags set. */
156 		return (0);
157 	bits += 3;
158 	header_flags = p[3];
159 	/* Bytes 4-7 are mod time in little endian. */
160 #ifdef HAVE_ZLIB_H
161 	if (state)
162 		state->mtime = archive_le32dec(p + 4);
163 #endif
164 	/* Byte 8 is deflate flags. */
165 	/* XXXX TODO: return deflate flags back to consume_header for use
166 	   in initializing the decompressor. */
167 	/* Byte 9 is OS. */
168 
169 	/* Optional extra data:  2 byte length plus variable body. */
170 	if (header_flags & 4) {
171 		p = __archive_read_filter_ahead(filter, len + 2, &avail);
172 		if (p == NULL)
173 			return (0);
174 		len += ((int)p[len + 1] << 8) | (int)p[len];
175 		len += 2;
176 	}
177 
178 	/* Null-terminated optional filename. */
179 	if (header_flags & 8) {
180 #ifdef HAVE_ZLIB_H
181 		ssize_t file_start = len;
182 #endif
183 		do {
184 			++len;
185 			if (avail < len) {
186 				if (avail > MAX_FILENAME_LENGTH) {
187 					return (0);
188 				}
189 				p = __archive_read_filter_ahead(filter,
190 				    len, &avail);
191 			}
192 			if (p == NULL)
193 				return (0);
194 		} while (p[len - 1] != 0);
195 
196 #ifdef HAVE_ZLIB_H
197 		if (state) {
198 			/* Reset the name in case of repeat header reads. */
199 			free(state->name);
200 			state->name = strdup((const char *)&p[file_start]);
201 		}
202 #endif
203 	}
204 
205 	/* Null-terminated optional comment. */
206 	if (header_flags & 16) {
207 		do {
208 			++len;
209 			if (avail < len) {
210 				if (avail > MAX_COMMENT_LENGTH) {
211 					return (0);
212 				}
213 				p = __archive_read_filter_ahead(filter,
214 				    len, &avail);
215 			}
216 			if (p == NULL)
217 				return (0);
218 		} while (p[len - 1] != 0);
219 	}
220 
221 	/* Optional header CRC */
222 	if ((header_flags & 2)) {
223 		p = __archive_read_filter_ahead(filter, len + 2, &avail);
224 		if (p == NULL)
225 			return (0);
226 #if 0
227 	int hcrc = ((int)p[len + 1] << 8) | (int)p[len];
228 	int crc = /* XXX TODO: Compute header CRC. */;
229 	if (crc != hcrc)
230 		return (0);
231 	bits += 16;
232 #endif
233 		len += 2;
234 	}
235 
236 	if (pbits != NULL)
237 		*pbits = bits;
238 	return (len);
239 }
240 
241 /*
242  * Bidder just verifies the header and returns the number of verified bits.
243  */
244 static int
gzip_bidder_bid(struct archive_read_filter_bidder * self,struct archive_read_filter * filter)245 gzip_bidder_bid(struct archive_read_filter_bidder *self,
246     struct archive_read_filter *filter)
247 {
248 	int bits_checked;
249 
250 	(void)self; /* UNUSED */
251 
252 	if (peek_at_header(filter, &bits_checked, NULL))
253 		return (bits_checked);
254 	return (0);
255 }
256 
257 #ifndef HAVE_ZLIB_H
258 
259 /*
260  * If we don't have the library on this system, we can't do the
261  * decompression directly.  We can, however, try to run "gzip -d"
262  * in case that's available.
263  */
264 static int
gzip_bidder_init(struct archive_read_filter * self)265 gzip_bidder_init(struct archive_read_filter *self)
266 {
267 	int r;
268 
269 	r = __archive_read_program(self, "gzip -d");
270 	/* Note: We set the format here even if __archive_read_program()
271 	 * above fails.  We do, after all, know what the format is
272 	 * even if we weren't able to read it. */
273 	self->code = ARCHIVE_FILTER_GZIP;
274 	self->name = "gzip";
275 	return (r);
276 }
277 
278 #else
279 
280 static int
gzip_read_header(struct archive_read_filter * self,struct archive_entry * entry)281 gzip_read_header(struct archive_read_filter *self, struct archive_entry *entry)
282 {
283 	struct private_data *state;
284 
285 	state = (struct private_data *)self->data;
286 
287 	/* A mtime of 0 is considered invalid/missing. */
288 	if (state->mtime != 0)
289 		archive_entry_set_mtime(entry, state->mtime, 0);
290 
291 	/* If the name is available, extract it. */
292 	if (state->name)
293 		archive_entry_set_pathname(entry, state->name);
294 
295 	return (ARCHIVE_OK);
296 }
297 
298 static const struct archive_read_filter_vtable
299 gzip_reader_vtable = {
300 	.read = gzip_filter_read,
301 	.close = gzip_filter_close,
302 #ifdef HAVE_ZLIB_H
303 	.read_header = gzip_read_header,
304 #endif
305 };
306 
307 /*
308  * Initialize the filter object.
309  */
310 static int
gzip_bidder_init(struct archive_read_filter * self)311 gzip_bidder_init(struct archive_read_filter *self)
312 {
313 	struct private_data *state;
314 	static const size_t out_block_size = 64 * 1024;
315 	void *out_block;
316 
317 	self->code = ARCHIVE_FILTER_GZIP;
318 	self->name = "gzip";
319 
320 	state = calloc(1, sizeof(*state));
321 	out_block = malloc(out_block_size);
322 	if (state == NULL || out_block == NULL) {
323 		free(out_block);
324 		free(state);
325 		archive_set_error(&self->archive->archive, ENOMEM,
326 		    "Can't allocate data for gzip decompression");
327 		return (ARCHIVE_FATAL);
328 	}
329 
330 	self->data = state;
331 	state->out_block_size = out_block_size;
332 	state->out_block = out_block;
333 	self->vtable = &gzip_reader_vtable;
334 
335 	state->in_stream = 0; /* We're not actually within a stream yet. */
336 
337 	return (ARCHIVE_OK);
338 }
339 
340 static int
consume_header(struct archive_read_filter * self)341 consume_header(struct archive_read_filter *self)
342 {
343 	struct private_data *state;
344 	ssize_t avail;
345 	size_t len;
346 	int ret;
347 
348 	state = (struct private_data *)self->data;
349 
350 	/* If this is a real header, consume it. */
351 	len = peek_at_header(self->upstream, NULL, state);
352 	if (len == 0)
353 		return (ARCHIVE_EOF);
354 	__archive_read_filter_consume(self->upstream, len);
355 
356 	/* Initialize CRC accumulator. */
357 	state->crc = crc32(0L, NULL, 0);
358 
359 	/* Initialize compression library. */
360 	state->stream.next_in = (unsigned char *)(uintptr_t)
361 	    __archive_read_filter_ahead(self->upstream, 1, &avail);
362 	state->stream.avail_in = (uInt)avail;
363 	ret = inflateInit2(&(state->stream),
364 	    -15 /* Don't check for zlib header */);
365 
366 	/* Decipher the error code. */
367 	switch (ret) {
368 	case Z_OK:
369 		state->in_stream = 1;
370 		return (ARCHIVE_OK);
371 	case Z_STREAM_ERROR:
372 		archive_set_error(&self->archive->archive,
373 		    ARCHIVE_ERRNO_MISC,
374 		    "Internal error initializing compression library: "
375 		    "invalid setup parameter");
376 		break;
377 	case Z_MEM_ERROR:
378 		archive_set_error(&self->archive->archive, ENOMEM,
379 		    "Internal error initializing compression library: "
380 		    "out of memory");
381 		break;
382 	case Z_VERSION_ERROR:
383 		archive_set_error(&self->archive->archive,
384 		    ARCHIVE_ERRNO_MISC,
385 		    "Internal error initializing compression library: "
386 		    "invalid library version");
387 		break;
388 	default:
389 		archive_set_error(&self->archive->archive,
390 		    ARCHIVE_ERRNO_MISC,
391 		    "Internal error initializing compression library: "
392 		    " Zlib error %d", ret);
393 		break;
394 	}
395 	return (ARCHIVE_FATAL);
396 }
397 
398 static int
consume_trailer(struct archive_read_filter * self)399 consume_trailer(struct archive_read_filter *self)
400 {
401 	struct private_data *state;
402 	const unsigned char *p;
403 	ssize_t avail;
404 
405 	state = (struct private_data *)self->data;
406 
407 	state->in_stream = 0;
408 	switch (inflateEnd(&(state->stream))) {
409 	case Z_OK:
410 		break;
411 	default:
412 		archive_set_error(&self->archive->archive,
413 		    ARCHIVE_ERRNO_MISC,
414 		    "Failed to clean up gzip decompressor");
415 		return (ARCHIVE_FATAL);
416 	}
417 
418 	/* GZip trailer is a fixed 8 byte structure. */
419 	p = __archive_read_filter_ahead(self->upstream, 8, &avail);
420 	if (p == NULL || avail == 0)
421 		return (ARCHIVE_FATAL);
422 
423 	/* XXX TODO: Verify the length and CRC. */
424 
425 	/* We've verified the trailer, so consume it now. */
426 	__archive_read_filter_consume(self->upstream, 8);
427 
428 	return (ARCHIVE_OK);
429 }
430 
431 static ssize_t
gzip_filter_read(struct archive_read_filter * self,const void ** p)432 gzip_filter_read(struct archive_read_filter *self, const void **p)
433 {
434 	struct private_data *state;
435 	size_t decompressed;
436 	ssize_t avail_in, max_in;
437 	int ret;
438 
439 	state = (struct private_data *)self->data;
440 
441 	/* Empty our output buffer. */
442 	state->stream.next_out = state->out_block;
443 	state->stream.avail_out = (uInt)state->out_block_size;
444 
445 	/* Try to fill the output buffer. */
446 	while (state->stream.avail_out > 0 && !state->eof) {
447 		/* If we're not in a stream, read a header
448 		 * and initialize the decompression library. */
449 		if (!state->in_stream) {
450 			ret = consume_header(self);
451 			if (ret == ARCHIVE_EOF) {
452 				state->eof = 1;
453 				break;
454 			}
455 			if (ret < ARCHIVE_OK)
456 				return (ret);
457 		}
458 
459 		/* Peek at the next available data. */
460 		/* ZLib treats stream.next_in as const but doesn't declare
461 		 * it so, hence this ugly cast. */
462 		state->stream.next_in = (unsigned char *)(uintptr_t)
463 		    __archive_read_filter_ahead(self->upstream, 1, &avail_in);
464 		if (state->stream.next_in == NULL) {
465 			archive_set_error(&self->archive->archive,
466 			    ARCHIVE_ERRNO_MISC,
467 			    "truncated gzip input");
468 			return (ARCHIVE_FATAL);
469 		}
470 		if (UINT_MAX >= SSIZE_MAX)
471 			max_in = SSIZE_MAX;
472 		else
473 			max_in = UINT_MAX;
474 		if (avail_in > max_in)
475 			avail_in = max_in;
476 		state->stream.avail_in = (uInt)avail_in;
477 
478 		/* Decompress and consume some of that data. */
479 		ret = inflate(&(state->stream), 0);
480 		switch (ret) {
481 		case Z_OK: /* Decompressor made some progress. */
482 			__archive_read_filter_consume(self->upstream,
483 			    avail_in - state->stream.avail_in);
484 			break;
485 		case Z_STREAM_END: /* Found end of stream. */
486 			__archive_read_filter_consume(self->upstream,
487 			    avail_in - state->stream.avail_in);
488 			/* Consume the stream trailer; release the
489 			 * decompression library. */
490 			ret = consume_trailer(self);
491 			if (ret < ARCHIVE_OK)
492 				return (ret);
493 			break;
494 		default:
495 			/* Return an error. */
496 			archive_set_error(&self->archive->archive,
497 			    ARCHIVE_ERRNO_MISC,
498 			    "gzip decompression failed");
499 			return (ARCHIVE_FATAL);
500 		}
501 	}
502 
503 	/* We've read as much as we can. */
504 	decompressed = state->stream.next_out - state->out_block;
505 	state->total_out += decompressed;
506 	if (decompressed == 0)
507 		*p = NULL;
508 	else
509 		*p = state->out_block;
510 	return (decompressed);
511 }
512 
513 /*
514  * Clean up the decompressor.
515  */
516 static int
gzip_filter_close(struct archive_read_filter * self)517 gzip_filter_close(struct archive_read_filter *self)
518 {
519 	struct private_data *state;
520 	int ret;
521 
522 	state = (struct private_data *)self->data;
523 	ret = ARCHIVE_OK;
524 
525 	if (state->in_stream) {
526 		switch (inflateEnd(&(state->stream))) {
527 		case Z_OK:
528 			break;
529 		default:
530 			archive_set_error(&(self->archive->archive),
531 			    ARCHIVE_ERRNO_MISC,
532 			    "Failed to clean up gzip compressor");
533 			ret = ARCHIVE_FATAL;
534 		}
535 	}
536 
537 	free(state->name);
538 	free(state->out_block);
539 	free(state);
540 	return (ret);
541 }
542 
543 #endif /* HAVE_ZLIB_H */
544