1 /*-
2 * Copyright (c) 2003-2007 Tim Kientzle
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26 #include "archive_platform.h"
27
28 #ifdef HAVE_ERRNO_H
29 #include <errno.h>
30 #endif
31 #ifdef HAVE_STDLIB_H
32 #include <stdlib.h>
33 #endif
34 #ifdef HAVE_STRING_H
35 #include <string.h>
36 #endif
37 #ifdef HAVE_LIMITS_H
38 #include <limits.h>
39 #endif
40 #ifdef HAVE_UNISTD_H
41 #include <unistd.h>
42 #endif
43 #ifdef HAVE_ZLIB_H
44 #include <zlib.h>
45 #endif
46
47 #include "archive.h"
48 #include "archive_entry.h"
49 #include "archive_endian.h"
50 #include "archive_private.h"
51 #include "archive_read_private.h"
52
53 #ifdef HAVE_ZLIB_H
54 struct private_data {
55 z_stream stream;
56 char in_stream;
57 unsigned char *out_block;
58 size_t out_block_size;
59 int64_t total_out;
60 unsigned long crc;
61 uint32_t mtime;
62 char *name;
63 char eof; /* True = found end of compressed data. */
64 };
65
66 /* Gzip Filter. */
67 static ssize_t gzip_filter_read(struct archive_read_filter *, const void **);
68 static int gzip_filter_close(struct archive_read_filter *);
69 #endif
70
71 /*
72 * Note that we can detect gzip archives even if we can't decompress
73 * them. (In fact, we like detecting them because we can give better
74 * error messages.) So the bid framework here gets compiled even
75 * if zlib is unavailable.
76 *
77 * TODO: If zlib is unavailable, gzip_bidder_init() should
78 * use the compress_program framework to try to fire up an external
79 * gzip program.
80 */
81 static int gzip_bidder_bid(struct archive_read_filter_bidder *,
82 struct archive_read_filter *);
83 static int gzip_bidder_init(struct archive_read_filter *);
84
85 #if ARCHIVE_VERSION_NUMBER < 4000000
86 /* Deprecated; remove in libarchive 4.0 */
87 int
archive_read_support_compression_gzip(struct archive * a)88 archive_read_support_compression_gzip(struct archive *a)
89 {
90 return archive_read_support_filter_gzip(a);
91 }
92 #endif
93
94 static const struct archive_read_filter_bidder_vtable
95 gzip_bidder_vtable = {
96 .bid = gzip_bidder_bid,
97 .init = gzip_bidder_init,
98 };
99
100 int
archive_read_support_filter_gzip(struct archive * _a)101 archive_read_support_filter_gzip(struct archive *_a)
102 {
103 struct archive_read *a = (struct archive_read *)_a;
104
105 if (__archive_read_register_bidder(a, NULL, "gzip",
106 &gzip_bidder_vtable) != ARCHIVE_OK)
107 return (ARCHIVE_FATAL);
108
109 /* Signal the extent of gzip support with the return value here. */
110 #if HAVE_ZLIB_H
111 return (ARCHIVE_OK);
112 #else
113 archive_set_error(_a, ARCHIVE_ERRNO_MISC,
114 "Using external gzip program");
115 return (ARCHIVE_WARN);
116 #endif
117 }
118
119 /*
120 * Read and verify the header.
121 *
122 * Returns zero if the header couldn't be validated, else returns
123 * number of bytes in header. If pbits is non-NULL, it receives a
124 * count of bits verified, suitable for use by bidder.
125 */
126 #define MAX_FILENAME_LENGTH (1024 * 1024L)
127 #define MAX_COMMENT_LENGTH (1024 * 1024L)
128 static ssize_t
peek_at_header(struct archive_read_filter * filter,int * pbits,struct private_data * state)129 peek_at_header(struct archive_read_filter *filter, int *pbits,
130 #ifdef HAVE_ZLIB_H
131 struct private_data *state
132 #else
133 void *state
134 #endif
135 )
136 {
137 const unsigned char *p;
138 ssize_t avail, len;
139 int bits = 0;
140 int header_flags;
141 #ifndef HAVE_ZLIB_H
142 (void)state; /* UNUSED */
143 #endif
144
145 /* Start by looking at the first ten bytes of the header, which
146 * is all fixed layout. */
147 len = 10;
148 p = __archive_read_filter_ahead(filter, len, &avail);
149 if (p == NULL || avail == 0)
150 return (0);
151 /* We only support deflation- third byte must be 0x08. */
152 if (memcmp(p, "\x1F\x8B\x08", 3) != 0)
153 return (0);
154 bits += 24;
155 if ((p[3] & 0xE0)!= 0) /* No reserved flags set. */
156 return (0);
157 bits += 3;
158 header_flags = p[3];
159 /* Bytes 4-7 are mod time in little endian. */
160 #ifdef HAVE_ZLIB_H
161 if (state)
162 state->mtime = archive_le32dec(p + 4);
163 #endif
164 /* Byte 8 is deflate flags. */
165 /* XXXX TODO: return deflate flags back to consume_header for use
166 in initializing the decompressor. */
167 /* Byte 9 is OS. */
168
169 /* Optional extra data: 2 byte length plus variable body. */
170 if (header_flags & 4) {
171 p = __archive_read_filter_ahead(filter, len + 2, &avail);
172 if (p == NULL)
173 return (0);
174 len += ((int)p[len + 1] << 8) | (int)p[len];
175 len += 2;
176 }
177
178 /* Null-terminated optional filename. */
179 if (header_flags & 8) {
180 #ifdef HAVE_ZLIB_H
181 ssize_t file_start = len;
182 #endif
183 do {
184 ++len;
185 if (avail < len) {
186 if (avail > MAX_FILENAME_LENGTH) {
187 return (0);
188 }
189 p = __archive_read_filter_ahead(filter,
190 len, &avail);
191 }
192 if (p == NULL)
193 return (0);
194 } while (p[len - 1] != 0);
195
196 #ifdef HAVE_ZLIB_H
197 if (state) {
198 /* Reset the name in case of repeat header reads. */
199 free(state->name);
200 state->name = strdup((const char *)&p[file_start]);
201 }
202 #endif
203 }
204
205 /* Null-terminated optional comment. */
206 if (header_flags & 16) {
207 do {
208 ++len;
209 if (avail < len) {
210 if (avail > MAX_COMMENT_LENGTH) {
211 return (0);
212 }
213 p = __archive_read_filter_ahead(filter,
214 len, &avail);
215 }
216 if (p == NULL)
217 return (0);
218 } while (p[len - 1] != 0);
219 }
220
221 /* Optional header CRC */
222 if ((header_flags & 2)) {
223 p = __archive_read_filter_ahead(filter, len + 2, &avail);
224 if (p == NULL)
225 return (0);
226 #if 0
227 int hcrc = ((int)p[len + 1] << 8) | (int)p[len];
228 int crc = /* XXX TODO: Compute header CRC. */;
229 if (crc != hcrc)
230 return (0);
231 bits += 16;
232 #endif
233 len += 2;
234 }
235
236 if (pbits != NULL)
237 *pbits = bits;
238 return (len);
239 }
240
241 /*
242 * Bidder just verifies the header and returns the number of verified bits.
243 */
244 static int
gzip_bidder_bid(struct archive_read_filter_bidder * self,struct archive_read_filter * filter)245 gzip_bidder_bid(struct archive_read_filter_bidder *self,
246 struct archive_read_filter *filter)
247 {
248 int bits_checked;
249
250 (void)self; /* UNUSED */
251
252 if (peek_at_header(filter, &bits_checked, NULL))
253 return (bits_checked);
254 return (0);
255 }
256
257 #ifndef HAVE_ZLIB_H
258
259 /*
260 * If we don't have the library on this system, we can't do the
261 * decompression directly. We can, however, try to run "gzip -d"
262 * in case that's available.
263 */
264 static int
gzip_bidder_init(struct archive_read_filter * self)265 gzip_bidder_init(struct archive_read_filter *self)
266 {
267 int r;
268
269 r = __archive_read_program(self, "gzip -d");
270 /* Note: We set the format here even if __archive_read_program()
271 * above fails. We do, after all, know what the format is
272 * even if we weren't able to read it. */
273 self->code = ARCHIVE_FILTER_GZIP;
274 self->name = "gzip";
275 return (r);
276 }
277
278 #else
279
280 static int
gzip_read_header(struct archive_read_filter * self,struct archive_entry * entry)281 gzip_read_header(struct archive_read_filter *self, struct archive_entry *entry)
282 {
283 struct private_data *state;
284
285 state = (struct private_data *)self->data;
286
287 /* A mtime of 0 is considered invalid/missing. */
288 if (state->mtime != 0)
289 archive_entry_set_mtime(entry, state->mtime, 0);
290
291 /* If the name is available, extract it. */
292 if (state->name)
293 archive_entry_set_pathname(entry, state->name);
294
295 return (ARCHIVE_OK);
296 }
297
298 static const struct archive_read_filter_vtable
299 gzip_reader_vtable = {
300 .read = gzip_filter_read,
301 .close = gzip_filter_close,
302 #ifdef HAVE_ZLIB_H
303 .read_header = gzip_read_header,
304 #endif
305 };
306
307 /*
308 * Initialize the filter object.
309 */
310 static int
gzip_bidder_init(struct archive_read_filter * self)311 gzip_bidder_init(struct archive_read_filter *self)
312 {
313 struct private_data *state;
314 static const size_t out_block_size = 64 * 1024;
315 void *out_block;
316
317 self->code = ARCHIVE_FILTER_GZIP;
318 self->name = "gzip";
319
320 state = calloc(1, sizeof(*state));
321 out_block = malloc(out_block_size);
322 if (state == NULL || out_block == NULL) {
323 free(out_block);
324 free(state);
325 archive_set_error(&self->archive->archive, ENOMEM,
326 "Can't allocate data for gzip decompression");
327 return (ARCHIVE_FATAL);
328 }
329
330 self->data = state;
331 state->out_block_size = out_block_size;
332 state->out_block = out_block;
333 self->vtable = &gzip_reader_vtable;
334
335 state->in_stream = 0; /* We're not actually within a stream yet. */
336
337 return (ARCHIVE_OK);
338 }
339
340 static int
consume_header(struct archive_read_filter * self)341 consume_header(struct archive_read_filter *self)
342 {
343 struct private_data *state;
344 ssize_t avail;
345 size_t len;
346 int ret;
347
348 state = (struct private_data *)self->data;
349
350 /* If this is a real header, consume it. */
351 len = peek_at_header(self->upstream, NULL, state);
352 if (len == 0)
353 return (ARCHIVE_EOF);
354 __archive_read_filter_consume(self->upstream, len);
355
356 /* Initialize CRC accumulator. */
357 state->crc = crc32(0L, NULL, 0);
358
359 /* Initialize compression library. */
360 state->stream.next_in = (unsigned char *)(uintptr_t)
361 __archive_read_filter_ahead(self->upstream, 1, &avail);
362 state->stream.avail_in = (uInt)avail;
363 ret = inflateInit2(&(state->stream),
364 -15 /* Don't check for zlib header */);
365
366 /* Decipher the error code. */
367 switch (ret) {
368 case Z_OK:
369 state->in_stream = 1;
370 return (ARCHIVE_OK);
371 case Z_STREAM_ERROR:
372 archive_set_error(&self->archive->archive,
373 ARCHIVE_ERRNO_MISC,
374 "Internal error initializing compression library: "
375 "invalid setup parameter");
376 break;
377 case Z_MEM_ERROR:
378 archive_set_error(&self->archive->archive, ENOMEM,
379 "Internal error initializing compression library: "
380 "out of memory");
381 break;
382 case Z_VERSION_ERROR:
383 archive_set_error(&self->archive->archive,
384 ARCHIVE_ERRNO_MISC,
385 "Internal error initializing compression library: "
386 "invalid library version");
387 break;
388 default:
389 archive_set_error(&self->archive->archive,
390 ARCHIVE_ERRNO_MISC,
391 "Internal error initializing compression library: "
392 " Zlib error %d", ret);
393 break;
394 }
395 return (ARCHIVE_FATAL);
396 }
397
398 static int
consume_trailer(struct archive_read_filter * self)399 consume_trailer(struct archive_read_filter *self)
400 {
401 struct private_data *state;
402 const unsigned char *p;
403 ssize_t avail;
404
405 state = (struct private_data *)self->data;
406
407 state->in_stream = 0;
408 switch (inflateEnd(&(state->stream))) {
409 case Z_OK:
410 break;
411 default:
412 archive_set_error(&self->archive->archive,
413 ARCHIVE_ERRNO_MISC,
414 "Failed to clean up gzip decompressor");
415 return (ARCHIVE_FATAL);
416 }
417
418 /* GZip trailer is a fixed 8 byte structure. */
419 p = __archive_read_filter_ahead(self->upstream, 8, &avail);
420 if (p == NULL || avail == 0)
421 return (ARCHIVE_FATAL);
422
423 /* XXX TODO: Verify the length and CRC. */
424
425 /* We've verified the trailer, so consume it now. */
426 __archive_read_filter_consume(self->upstream, 8);
427
428 return (ARCHIVE_OK);
429 }
430
431 static ssize_t
gzip_filter_read(struct archive_read_filter * self,const void ** p)432 gzip_filter_read(struct archive_read_filter *self, const void **p)
433 {
434 struct private_data *state;
435 size_t decompressed;
436 ssize_t avail_in, max_in;
437 int ret;
438
439 state = (struct private_data *)self->data;
440
441 /* Empty our output buffer. */
442 state->stream.next_out = state->out_block;
443 state->stream.avail_out = (uInt)state->out_block_size;
444
445 /* Try to fill the output buffer. */
446 while (state->stream.avail_out > 0 && !state->eof) {
447 /* If we're not in a stream, read a header
448 * and initialize the decompression library. */
449 if (!state->in_stream) {
450 ret = consume_header(self);
451 if (ret == ARCHIVE_EOF) {
452 state->eof = 1;
453 break;
454 }
455 if (ret < ARCHIVE_OK)
456 return (ret);
457 }
458
459 /* Peek at the next available data. */
460 /* ZLib treats stream.next_in as const but doesn't declare
461 * it so, hence this ugly cast. */
462 state->stream.next_in = (unsigned char *)(uintptr_t)
463 __archive_read_filter_ahead(self->upstream, 1, &avail_in);
464 if (state->stream.next_in == NULL) {
465 archive_set_error(&self->archive->archive,
466 ARCHIVE_ERRNO_MISC,
467 "truncated gzip input");
468 return (ARCHIVE_FATAL);
469 }
470 if (UINT_MAX >= SSIZE_MAX)
471 max_in = SSIZE_MAX;
472 else
473 max_in = UINT_MAX;
474 if (avail_in > max_in)
475 avail_in = max_in;
476 state->stream.avail_in = (uInt)avail_in;
477
478 /* Decompress and consume some of that data. */
479 ret = inflate(&(state->stream), 0);
480 switch (ret) {
481 case Z_OK: /* Decompressor made some progress. */
482 __archive_read_filter_consume(self->upstream,
483 avail_in - state->stream.avail_in);
484 break;
485 case Z_STREAM_END: /* Found end of stream. */
486 __archive_read_filter_consume(self->upstream,
487 avail_in - state->stream.avail_in);
488 /* Consume the stream trailer; release the
489 * decompression library. */
490 ret = consume_trailer(self);
491 if (ret < ARCHIVE_OK)
492 return (ret);
493 break;
494 default:
495 /* Return an error. */
496 archive_set_error(&self->archive->archive,
497 ARCHIVE_ERRNO_MISC,
498 "gzip decompression failed");
499 return (ARCHIVE_FATAL);
500 }
501 }
502
503 /* We've read as much as we can. */
504 decompressed = state->stream.next_out - state->out_block;
505 state->total_out += decompressed;
506 if (decompressed == 0)
507 *p = NULL;
508 else
509 *p = state->out_block;
510 return (decompressed);
511 }
512
513 /*
514 * Clean up the decompressor.
515 */
516 static int
gzip_filter_close(struct archive_read_filter * self)517 gzip_filter_close(struct archive_read_filter *self)
518 {
519 struct private_data *state;
520 int ret;
521
522 state = (struct private_data *)self->data;
523 ret = ARCHIVE_OK;
524
525 if (state->in_stream) {
526 switch (inflateEnd(&(state->stream))) {
527 case Z_OK:
528 break;
529 default:
530 archive_set_error(&(self->archive->archive),
531 ARCHIVE_ERRNO_MISC,
532 "Failed to clean up gzip compressor");
533 ret = ARCHIVE_FATAL;
534 }
535 }
536
537 free(state->name);
538 free(state->out_block);
539 free(state);
540 return (ret);
541 }
542
543 #endif /* HAVE_ZLIB_H */
544