1 /*-
2 * Copyright (c) 2008-2014 Michihiro NAKAJIMA
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26 #include "archive_platform.h"
27
28 #ifdef HAVE_ERRNO_H
29 #include <errno.h>
30 #endif
31 #ifdef HAVE_LIMITS_H
32 #include <limits.h>
33 #endif
34 #ifdef HAVE_STDLIB_H
35 #include <stdlib.h>
36 #endif
37 #ifdef HAVE_STRING_H
38 #include <string.h>
39 #endif
40
41 #include "archive.h"
42 #include "archive_entry.h"
43 #include "archive_entry_locale.h"
44 #include "archive_private.h"
45 #include "archive_read_private.h"
46 #include "archive_time_private.h"
47 #include "archive_endian.h"
48
49
50 #define MAXMATCH 256 /* Maximum match length. */
51 #define MINMATCH 3 /* Minimum match length. */
52 /*
53 * Literal table format:
54 * +0 +256 +510
55 * +---------------+-------------------------+
56 * | literal code | match length |
57 * | 0 ... 255 | MINMATCH ... MAXMATCH |
58 * +---------------+-------------------------+
59 * <--- LT_BITLEN_SIZE --->
60 */
61 /* Literal table size. */
62 #define LT_BITLEN_SIZE (UCHAR_MAX + 1 + MAXMATCH - MINMATCH + 1)
63 /* Position table size.
64 * Note: this used for both position table and pre literal table.*/
65 #define PT_BITLEN_SIZE (3 + 16)
66
67 struct lzh_dec {
68 /* Decoding status. */
69 int state;
70
71 /*
72 * Window to see last 8Ki(lh5),32Ki(lh6),64Ki(lh7) bytes of decoded
73 * data.
74 */
75 int w_size;
76 int w_mask;
77 /* Window buffer, which is a loop buffer. */
78 unsigned char *w_buff;
79 /* The insert position to the window. */
80 int w_pos;
81 /* The position where we can copy decoded code from the window. */
82 int copy_pos;
83 /* The length how many bytes we can copy decoded code from
84 * the window. */
85 int copy_len;
86
87 /*
88 * Bit stream reader.
89 */
90 struct lzh_br {
91 #define CACHE_TYPE uint64_t
92 #define CACHE_BITS (8 * sizeof(CACHE_TYPE))
93 /* Cache buffer. */
94 CACHE_TYPE cache_buffer;
95 /* Indicates how many bits avail in cache_buffer. */
96 int cache_avail;
97 } br;
98
99 /*
100 * Huffman coding.
101 */
102 struct huffman {
103 int len_size;
104 int len_avail;
105 int len_bits;
106 int freq[17];
107 unsigned char *bitlen;
108
109 /*
110 * Use a index table. It's faster than searching a huffman
111 * coding tree, which is a binary tree. But a use of a large
112 * index table causes L1 cache read miss many times.
113 */
114 #define HTBL_BITS 10
115 int max_bits;
116 int shift_bits;
117 int tbl_bits;
118 int tree_used;
119 int tree_avail;
120 /* Direct access table. */
121 uint16_t *tbl;
122 /* Binary tree table for extra bits over the direct access. */
123 struct htree_t {
124 uint16_t left;
125 uint16_t right;
126 } *tree;
127 } lt, pt;
128
129 int blocks_avail;
130 int pos_pt_len_size;
131 int pos_pt_len_bits;
132 int literal_pt_len_size;
133 int literal_pt_len_bits;
134 int reading_position;
135 int loop;
136 int error;
137 };
138
139 struct lzh_stream {
140 const unsigned char *next_in;
141 int avail_in;
142 int64_t total_in;
143 const unsigned char *ref_ptr;
144 int avail_out;
145 int64_t total_out;
146 struct lzh_dec *ds;
147 };
148
149 struct lha {
150 /* entry_bytes_remaining is the number of bytes we expect. */
151 int64_t entry_offset;
152 int64_t entry_bytes_remaining;
153 int64_t entry_unconsumed;
154 uint16_t entry_crc_calculated;
155
156 size_t header_size; /* header size */
157 unsigned char level; /* header level */
158 char method[3]; /* compress type */
159 int64_t compsize; /* compressed data size */
160 int64_t origsize; /* original file size */
161 int setflag;
162 #define BIRTHTIME_IS_SET 1
163 #define ATIME_IS_SET 2
164 #define UNIX_MODE_IS_SET 4
165 #define CRC_IS_SET 8
166 int64_t birthtime;
167 uint32_t birthtime_tv_nsec;
168 int64_t mtime;
169 uint32_t mtime_tv_nsec;
170 int64_t atime;
171 uint32_t atime_tv_nsec;
172 mode_t mode;
173 int64_t uid;
174 int64_t gid;
175 struct archive_string uname;
176 struct archive_string gname;
177 uint16_t header_crc;
178 uint16_t crc;
179 /* dirname and filename could be in different codepages */
180 struct archive_string_conv *sconv_dir;
181 struct archive_string_conv *sconv_fname;
182 struct archive_string_conv *opt_sconv;
183
184 struct archive_string dirname;
185 struct archive_string filename;
186 struct archive_wstring ws;
187
188 unsigned char dos_attr;
189
190 /* Flag to mark progress that an archive was read their first header.*/
191 char found_first_header;
192 /* Flag to mark that indicates an empty directory. */
193 char directory;
194
195 /* Flags to mark progress of decompression. */
196 char decompress_init;
197 char end_of_entry;
198 char end_of_entry_cleanup;
199 char entry_is_compressed;
200
201 char format_name[64];
202
203 struct lzh_stream strm;
204 };
205
206 /*
207 * LHA header common member offset.
208 */
209 #define H_METHOD_OFFSET 2 /* Compress type. */
210 #define H_ATTR_OFFSET 19 /* DOS attribute. */
211 #define H_LEVEL_OFFSET 20 /* Header Level. */
212 #define H_SIZE 22 /* Minimum header size. */
213
214 static int archive_read_format_lha_bid(struct archive_read *, int);
215 static int archive_read_format_lha_options(struct archive_read *,
216 const char *, const char *);
217 static int archive_read_format_lha_read_header(struct archive_read *,
218 struct archive_entry *);
219 static int archive_read_format_lha_read_data(struct archive_read *,
220 const void **, size_t *, int64_t *);
221 static int archive_read_format_lha_read_data_skip(struct archive_read *);
222 static int archive_read_format_lha_cleanup(struct archive_read *);
223
224 static void lha_replace_path_separator(struct lha *,
225 struct archive_entry *);
226 static int lha_read_file_header_0(struct archive_read *, struct lha *);
227 static int lha_read_file_header_1(struct archive_read *, struct lha *);
228 static int lha_read_file_header_2(struct archive_read *, struct lha *);
229 static int lha_read_file_header_3(struct archive_read *, struct lha *);
230 static int lha_read_file_extended_header(struct archive_read *,
231 struct lha *, uint16_t *, int, uint64_t, size_t *);
232 static size_t lha_check_header_format(const void *);
233 static int lha_skip_sfx(struct archive_read *);
234 static unsigned char lha_calcsum(unsigned char, const void *,
235 int, size_t);
236 static int lha_parse_linkname(struct archive_wstring *,
237 struct archive_wstring *);
238 static int lha_read_data_none(struct archive_read *, const void **,
239 size_t *, int64_t *);
240 static int lha_read_data_lzh(struct archive_read *, const void **,
241 size_t *, int64_t *);
242 static void lha_crc16_init(void);
243 static uint16_t lha_crc16(uint16_t, const void *, size_t);
244 static int lzh_decode_init(struct lzh_stream *, const char *);
245 static void lzh_decode_free(struct lzh_stream *);
246 static int lzh_decode(struct lzh_stream *, int);
247 static int lzh_br_fillup(struct lzh_stream *, struct lzh_br *);
248 static int lzh_huffman_init(struct huffman *, size_t, int);
249 static void lzh_huffman_free(struct huffman *);
250 static int lzh_read_pt_bitlen(struct lzh_stream *, int start, int end);
251 static int lzh_make_fake_table(struct huffman *, uint16_t);
252 static int lzh_make_huffman_table(struct huffman *);
253 static inline int lzh_decode_huffman(struct huffman *, unsigned);
254 static int lzh_decode_huffman_tree(struct huffman *, unsigned, int);
255
256
257 int
archive_read_support_format_lha(struct archive * _a)258 archive_read_support_format_lha(struct archive *_a)
259 {
260 struct archive_read *a = (struct archive_read *)_a;
261 struct lha *lha;
262 int r;
263
264 archive_check_magic(_a, ARCHIVE_READ_MAGIC,
265 ARCHIVE_STATE_NEW, "archive_read_support_format_lha");
266
267 lha = calloc(1, sizeof(*lha));
268 if (lha == NULL) {
269 archive_set_error(&a->archive, ENOMEM,
270 "Can't allocate lha data");
271 return (ARCHIVE_FATAL);
272 }
273 archive_string_init(&lha->ws);
274
275 r = __archive_read_register_format(a,
276 lha,
277 "lha",
278 archive_read_format_lha_bid,
279 archive_read_format_lha_options,
280 archive_read_format_lha_read_header,
281 archive_read_format_lha_read_data,
282 archive_read_format_lha_read_data_skip,
283 NULL,
284 archive_read_format_lha_cleanup,
285 NULL,
286 NULL);
287
288 if (r != ARCHIVE_OK)
289 free(lha);
290 return (ARCHIVE_OK);
291 }
292
293 static size_t
lha_check_header_format(const void * h)294 lha_check_header_format(const void *h)
295 {
296 const unsigned char *p = h;
297 size_t next_skip_bytes;
298
299 switch (p[H_METHOD_OFFSET+3]) {
300 /*
301 * "-lh0-" ... "-lh7-" "-lhd-"
302 * "-lzs-" "-lz5-"
303 */
304 case '0': case '1': case '2': case '3':
305 case '4': case '5': case '6': case '7':
306 case 'd':
307 case 's':
308 next_skip_bytes = 4;
309
310 /* b0 == 0 means the end of an LHa archive file. */
311 if (p[0] == 0)
312 break;
313 if (p[H_METHOD_OFFSET] != '-' || p[H_METHOD_OFFSET+1] != 'l'
314 || p[H_METHOD_OFFSET+4] != '-')
315 break;
316
317 if (p[H_METHOD_OFFSET+2] == 'h') {
318 /* "-lh?-" */
319 if (p[H_METHOD_OFFSET+3] == 's')
320 break;
321 if (p[H_LEVEL_OFFSET] == 0)
322 return (0);
323 if (p[H_LEVEL_OFFSET] <= 3 && p[H_ATTR_OFFSET] == 0x20)
324 return (0);
325 }
326 if (p[H_METHOD_OFFSET+2] == 'z') {
327 /* LArc extensions: -lzs-,-lz4- and -lz5- */
328 if (p[H_LEVEL_OFFSET] != 0)
329 break;
330 if (p[H_METHOD_OFFSET+3] == 's'
331 || p[H_METHOD_OFFSET+3] == '4'
332 || p[H_METHOD_OFFSET+3] == '5')
333 return (0);
334 }
335 break;
336 case 'h': next_skip_bytes = 1; break;
337 case 'z': next_skip_bytes = 1; break;
338 case 'l': next_skip_bytes = 2; break;
339 case '-': next_skip_bytes = 3; break;
340 default : next_skip_bytes = 4; break;
341 }
342
343 return (next_skip_bytes);
344 }
345
346 static int
archive_read_format_lha_bid(struct archive_read * a,int best_bid)347 archive_read_format_lha_bid(struct archive_read *a, int best_bid)
348 {
349 const char *p;
350 const void *buff;
351 ssize_t bytes_avail, offset, window;
352 size_t next;
353
354 /* If there's already a better bid than we can ever
355 make, don't bother testing. */
356 if (best_bid > 30)
357 return (-1);
358
359 if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL)
360 return (-1);
361
362 if (lha_check_header_format(p) == 0)
363 return (30);
364
365 if (p[0] == 'M' && p[1] == 'Z') {
366 /* PE file */
367 offset = 0;
368 window = 4096;
369 while (offset < (1024 * 20)) {
370 buff = __archive_read_ahead(a, offset + window,
371 &bytes_avail);
372 if (buff == NULL) {
373 /* Remaining bytes are less than window. */
374 window >>= 1;
375 if (window < (H_SIZE + 3))
376 return (0);
377 continue;
378 }
379 p = (const char *)buff + offset;
380 while (p + H_SIZE < (const char *)buff + bytes_avail) {
381 if ((next = lha_check_header_format(p)) == 0)
382 return (30);
383 p += next;
384 }
385 offset = p - (const char *)buff;
386 }
387 }
388 return (0);
389 }
390
391 static int
archive_read_format_lha_options(struct archive_read * a,const char * key,const char * val)392 archive_read_format_lha_options(struct archive_read *a,
393 const char *key, const char *val)
394 {
395 struct lha *lha;
396 int ret = ARCHIVE_FAILED;
397
398 lha = (struct lha *)(a->format->data);
399 if (strcmp(key, "hdrcharset") == 0) {
400 if (val == NULL || val[0] == 0)
401 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
402 "lha: hdrcharset option needs a character-set name");
403 else {
404 lha->opt_sconv =
405 archive_string_conversion_from_charset(
406 &a->archive, val, 0);
407 if (lha->opt_sconv != NULL)
408 ret = ARCHIVE_OK;
409 else
410 ret = ARCHIVE_FATAL;
411 }
412 return (ret);
413 }
414
415 /* Note: The "warn" return is just to inform the options
416 * supervisor that we didn't handle it. It will generate
417 * a suitable error if no one used this option. */
418 return (ARCHIVE_WARN);
419 }
420
421 static int
lha_skip_sfx(struct archive_read * a)422 lha_skip_sfx(struct archive_read *a)
423 {
424 const void *h;
425 const char *p, *q;
426 size_t next, skip;
427 ssize_t bytes, window;
428
429 window = 4096;
430 for (;;) {
431 h = __archive_read_ahead(a, window, &bytes);
432 if (h == NULL) {
433 /* Remaining bytes are less than window. */
434 window >>= 1;
435 if (window < (H_SIZE + 3))
436 goto fatal;
437 continue;
438 }
439 if (bytes < H_SIZE)
440 goto fatal;
441 p = h;
442 q = p + bytes;
443
444 /*
445 * Scan ahead until we find something that looks
446 * like the lha header.
447 */
448 while (p + H_SIZE < q) {
449 if ((next = lha_check_header_format(p)) == 0) {
450 skip = p - (const char *)h;
451 __archive_read_consume(a, skip);
452 return (ARCHIVE_OK);
453 }
454 p += next;
455 }
456 skip = p - (const char *)h;
457 __archive_read_consume(a, skip);
458 }
459 fatal:
460 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
461 "Couldn't find out LHa header");
462 return (ARCHIVE_FATAL);
463 }
464
465 static int
truncated_error(struct archive_read * a)466 truncated_error(struct archive_read *a)
467 {
468 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
469 "Truncated LHa header");
470 return (ARCHIVE_FATAL);
471 }
472
473 static int
archive_read_format_lha_read_header(struct archive_read * a,struct archive_entry * entry)474 archive_read_format_lha_read_header(struct archive_read *a,
475 struct archive_entry *entry)
476 {
477 struct archive_wstring linkname;
478 struct archive_wstring pathname;
479 struct lha *lha;
480 const unsigned char *p;
481 const char *signature;
482 int err;
483 struct archive_mstring conv_buffer;
484 const wchar_t *conv_buffer_p;
485
486 lha_crc16_init();
487
488 a->archive.archive_format = ARCHIVE_FORMAT_LHA;
489 if (a->archive.archive_format_name == NULL)
490 a->archive.archive_format_name = "lha";
491
492 lha = (struct lha *)(a->format->data);
493 lha->decompress_init = 0;
494 lha->end_of_entry = 0;
495 lha->end_of_entry_cleanup = 0;
496 lha->entry_unconsumed = 0;
497
498 if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL) {
499 /*
500 * LHa archiver added 0 to the tail of its archive file as
501 * the mark of the end of the archive.
502 */
503 signature = __archive_read_ahead(a, sizeof(signature[0]), NULL);
504 if (signature == NULL || signature[0] == 0)
505 return (ARCHIVE_EOF);
506 return (truncated_error(a));
507 }
508
509 signature = (const char *)p;
510 if (lha->found_first_header == 0 &&
511 signature[0] == 'M' && signature[1] == 'Z') {
512 /* This is an executable? Must be self-extracting... */
513 err = lha_skip_sfx(a);
514 if (err < ARCHIVE_WARN)
515 return (err);
516
517 if ((p = __archive_read_ahead(a, sizeof(*p), NULL)) == NULL)
518 return (truncated_error(a));
519 signature = (const char *)p;
520 }
521 /* signature[0] == 0 means the end of an LHa archive file. */
522 if (signature[0] == 0)
523 return (ARCHIVE_EOF);
524
525 /*
526 * Check the header format and method type.
527 */
528 if (lha_check_header_format(p) != 0) {
529 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
530 "Bad LHa file");
531 return (ARCHIVE_FATAL);
532 }
533
534 /* We've found the first header. */
535 lha->found_first_header = 1;
536 /* Set a default value and common data */
537 lha->header_size = 0;
538 lha->level = p[H_LEVEL_OFFSET];
539 lha->method[0] = p[H_METHOD_OFFSET+1];
540 lha->method[1] = p[H_METHOD_OFFSET+2];
541 lha->method[2] = p[H_METHOD_OFFSET+3];
542 if (memcmp(lha->method, "lhd", 3) == 0)
543 lha->directory = 1;
544 else
545 lha->directory = 0;
546 if (memcmp(lha->method, "lh0", 3) == 0 ||
547 memcmp(lha->method, "lz4", 3) == 0)
548 lha->entry_is_compressed = 0;
549 else
550 lha->entry_is_compressed = 1;
551
552 lha->compsize = 0;
553 lha->origsize = 0;
554 lha->setflag = 0;
555 lha->birthtime = 0;
556 lha->birthtime_tv_nsec = 0;
557 lha->mtime = 0;
558 lha->mtime_tv_nsec = 0;
559 lha->atime = 0;
560 lha->atime_tv_nsec = 0;
561 lha->mode = (lha->directory)? 0777 : 0666;
562 lha->uid = 0;
563 lha->gid = 0;
564 archive_string_empty(&lha->dirname);
565 archive_string_empty(&lha->filename);
566 lha->dos_attr = 0;
567 if (lha->opt_sconv != NULL) {
568 lha->sconv_dir = lha->opt_sconv;
569 lha->sconv_fname = lha->opt_sconv;
570 } else {
571 lha->sconv_dir = NULL;
572 lha->sconv_fname = NULL;
573 }
574
575 switch (p[H_LEVEL_OFFSET]) {
576 case 0:
577 err = lha_read_file_header_0(a, lha);
578 break;
579 case 1:
580 err = lha_read_file_header_1(a, lha);
581 break;
582 case 2:
583 err = lha_read_file_header_2(a, lha);
584 break;
585 case 3:
586 err = lha_read_file_header_3(a, lha);
587 break;
588 default:
589 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
590 "Unsupported LHa header level %d", p[H_LEVEL_OFFSET]);
591 err = ARCHIVE_FATAL;
592 break;
593 }
594 if (err < ARCHIVE_WARN)
595 return (err);
596
597
598 if (!lha->directory && archive_strlen(&lha->filename) == 0)
599 /* The filename has not been set */
600 return (truncated_error(a));
601
602 /*
603 * Make a pathname from a dirname and a filename, after converting to Unicode.
604 * This is because codepages might differ between dirname and filename.
605 */
606 archive_string_init(&pathname);
607 archive_string_init(&linkname);
608 archive_string_init(&conv_buffer.aes_mbs);
609 archive_string_init(&conv_buffer.aes_mbs_in_locale);
610 archive_string_init(&conv_buffer.aes_utf8);
611 archive_string_init(&conv_buffer.aes_wcs);
612 if (0 != archive_mstring_copy_mbs_len_l(&conv_buffer, lha->dirname.s, lha->dirname.length, lha->sconv_dir)) {
613 archive_set_error(&a->archive,
614 ARCHIVE_ERRNO_FILE_FORMAT,
615 "Pathname cannot be converted "
616 "from %s to Unicode.",
617 archive_string_conversion_charset_name(lha->sconv_dir));
618 err = ARCHIVE_FATAL;
619 } else if (0 != archive_mstring_get_wcs(&a->archive, &conv_buffer, &conv_buffer_p))
620 err = ARCHIVE_FATAL;
621 if (err == ARCHIVE_FATAL) {
622 archive_mstring_clean(&conv_buffer);
623 archive_wstring_free(&pathname);
624 archive_wstring_free(&linkname);
625 return (err);
626 }
627 archive_wstring_copy(&pathname, &conv_buffer.aes_wcs);
628
629 archive_string_empty(&conv_buffer.aes_mbs);
630 archive_string_empty(&conv_buffer.aes_mbs_in_locale);
631 archive_string_empty(&conv_buffer.aes_utf8);
632 archive_wstring_empty(&conv_buffer.aes_wcs);
633 if (0 != archive_mstring_copy_mbs_len_l(&conv_buffer, lha->filename.s, lha->filename.length, lha->sconv_fname)) {
634 archive_set_error(&a->archive,
635 ARCHIVE_ERRNO_FILE_FORMAT,
636 "Pathname cannot be converted "
637 "from %s to Unicode.",
638 archive_string_conversion_charset_name(lha->sconv_fname));
639 err = ARCHIVE_FATAL;
640 }
641 else if (0 != archive_mstring_get_wcs(&a->archive, &conv_buffer, &conv_buffer_p))
642 err = ARCHIVE_FATAL;
643 if (err == ARCHIVE_FATAL) {
644 archive_mstring_clean(&conv_buffer);
645 archive_wstring_free(&pathname);
646 archive_wstring_free(&linkname);
647 return (err);
648 }
649 archive_wstring_concat(&pathname, &conv_buffer.aes_wcs);
650 archive_mstring_clean(&conv_buffer);
651
652 if ((lha->mode & AE_IFMT) == AE_IFLNK) {
653 /*
654 * Extract the symlink-name if it's included in the pathname.
655 */
656 if (!lha_parse_linkname(&linkname, &pathname)) {
657 /* We couldn't get the symlink-name. */
658 archive_set_error(&a->archive,
659 ARCHIVE_ERRNO_FILE_FORMAT,
660 "Unknown symlink-name");
661 archive_wstring_free(&pathname);
662 archive_wstring_free(&linkname);
663 return (ARCHIVE_FAILED);
664 }
665 } else {
666 /*
667 * Make sure a file-type is set.
668 * The mode has been overridden if it is in the extended data.
669 */
670 lha->mode = (lha->mode & ~AE_IFMT) |
671 ((lha->directory)? AE_IFDIR: AE_IFREG);
672 }
673 if ((lha->setflag & UNIX_MODE_IS_SET) == 0 &&
674 (lha->dos_attr & 1) != 0)
675 lha->mode &= ~(0222);/* read only. */
676
677 /*
678 * Set basic file parameters.
679 */
680 archive_entry_copy_pathname_w(entry, pathname.s);
681 archive_wstring_free(&pathname);
682 if (archive_strlen(&linkname) > 0) {
683 archive_entry_copy_symlink_w(entry, linkname.s);
684 } else
685 archive_entry_set_symlink(entry, NULL);
686 archive_wstring_free(&linkname);
687 /*
688 * When a header level is 0, there is a possibility that
689 * a pathname and a symlink has '\' character, a directory
690 * separator in DOS/Windows. So we should convert it to '/'.
691 */
692 if (p[H_LEVEL_OFFSET] == 0)
693 lha_replace_path_separator(lha, entry);
694
695 archive_entry_set_mode(entry, lha->mode);
696 archive_entry_set_uid(entry, lha->uid);
697 archive_entry_set_gid(entry, lha->gid);
698 if (archive_strlen(&lha->uname) > 0)
699 archive_entry_set_uname(entry, lha->uname.s);
700 if (archive_strlen(&lha->gname) > 0)
701 archive_entry_set_gname(entry, lha->gname.s);
702 if (lha->setflag & BIRTHTIME_IS_SET) {
703 archive_entry_set_birthtime(entry, lha->birthtime,
704 lha->birthtime_tv_nsec);
705 archive_entry_set_ctime(entry, lha->birthtime,
706 lha->birthtime_tv_nsec);
707 } else {
708 archive_entry_unset_birthtime(entry);
709 archive_entry_unset_ctime(entry);
710 }
711 archive_entry_set_mtime(entry, lha->mtime, lha->mtime_tv_nsec);
712 if (lha->setflag & ATIME_IS_SET)
713 archive_entry_set_atime(entry, lha->atime,
714 lha->atime_tv_nsec);
715 else
716 archive_entry_unset_atime(entry);
717 if (lha->directory || archive_entry_symlink(entry) != NULL)
718 archive_entry_unset_size(entry);
719 else
720 archive_entry_set_size(entry, lha->origsize);
721
722 /*
723 * Prepare variables used to read a file content.
724 */
725 lha->entry_bytes_remaining = lha->compsize;
726 if (lha->entry_bytes_remaining < 0) {
727 archive_set_error(&a->archive,
728 ARCHIVE_ERRNO_FILE_FORMAT,
729 "Invalid LHa entry size");
730 return (ARCHIVE_FATAL);
731 }
732 lha->entry_offset = 0;
733 lha->entry_crc_calculated = 0;
734
735 /*
736 * This file does not have a content.
737 */
738 if (lha->directory || lha->compsize == 0)
739 lha->end_of_entry = 1;
740
741 snprintf(lha->format_name, sizeof(lha->format_name), "lha -%c%c%c-",
742 lha->method[0], lha->method[1], lha->method[2]);
743 a->archive.archive_format_name = lha->format_name;
744
745 return (err);
746 }
747
748 /*
749 * Replace a DOS path separator '\' by a character '/'.
750 * Some multi-byte character set have a character '\' in its second byte.
751 */
752 static void
lha_replace_path_separator(struct lha * lha,struct archive_entry * entry)753 lha_replace_path_separator(struct lha *lha, struct archive_entry *entry)
754 {
755 const wchar_t *wp;
756 size_t i;
757
758 if ((wp = archive_entry_pathname_w(entry)) != NULL) {
759 archive_wstrcpy(&(lha->ws), wp);
760 for (i = 0; i < archive_strlen(&(lha->ws)); i++) {
761 if (lha->ws.s[i] == L'\\')
762 lha->ws.s[i] = L'/';
763 }
764 archive_entry_copy_pathname_w(entry, lha->ws.s);
765 }
766
767 if ((wp = archive_entry_symlink_w(entry)) != NULL) {
768 archive_wstrcpy(&(lha->ws), wp);
769 for (i = 0; i < archive_strlen(&(lha->ws)); i++) {
770 if (lha->ws.s[i] == L'\\')
771 lha->ws.s[i] = L'/';
772 }
773 archive_entry_copy_symlink_w(entry, lha->ws.s);
774 }
775 }
776
777 /*
778 * Header 0 format
779 *
780 * +0 +1 +2 +7 +11
781 * +---------------+----------+----------------+-------------------+
782 * |header size(*1)|header sum|compression type|compressed size(*2)|
783 * +---------------+----------+----------------+-------------------+
784 * <---------------------(*1)----------*
785 *
786 * +11 +15 +17 +19 +20 +21
787 * +-----------------+---------+---------+--------------+----------------+
788 * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=0)|
789 * +-----------------+---------+---------+--------------+----------------+
790 * *--------------------------------(*1)---------------------------------*
791 *
792 * +21 +22 +22+(*3) +22+(*3)+2 +22+(*3)+2+(*4)
793 * +---------------+---------+----------+----------------+------------------+
794 * |name length(*3)|file name|file CRC16|extra header(*4)| compressed data |
795 * +---------------+---------+----------+----------------+------------------+
796 * <--(*3)-> <------(*2)------>
797 * *----------------------(*1)-------------------------->
798 *
799 */
800 #define H0_HEADER_SIZE_OFFSET 0
801 #define H0_HEADER_SUM_OFFSET 1
802 #define H0_COMP_SIZE_OFFSET 7
803 #define H0_ORIG_SIZE_OFFSET 11
804 #define H0_DOS_TIME_OFFSET 15
805 #define H0_NAME_LEN_OFFSET 21
806 #define H0_FILE_NAME_OFFSET 22
807 #define H0_FIXED_SIZE 24
808 static int
lha_read_file_header_0(struct archive_read * a,struct lha * lha)809 lha_read_file_header_0(struct archive_read *a, struct lha *lha)
810 {
811 const unsigned char *p;
812 int extdsize, namelen;
813 unsigned char headersum, sum_calculated;
814
815 if ((p = __archive_read_ahead(a, H0_FIXED_SIZE, NULL)) == NULL)
816 return (truncated_error(a));
817 lha->header_size = p[H0_HEADER_SIZE_OFFSET] + 2;
818 headersum = p[H0_HEADER_SUM_OFFSET];
819 lha->compsize = archive_le32dec(p + H0_COMP_SIZE_OFFSET);
820 lha->origsize = archive_le32dec(p + H0_ORIG_SIZE_OFFSET);
821 lha->mtime = dos_to_unix(archive_le32dec(p + H0_DOS_TIME_OFFSET));
822 namelen = p[H0_NAME_LEN_OFFSET];
823 extdsize = (int)lha->header_size - H0_FIXED_SIZE - namelen;
824 if ((namelen > 221 || extdsize < 0) && extdsize != -2) {
825 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
826 "Invalid LHa header");
827 return (ARCHIVE_FATAL);
828 }
829 if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL)
830 return (truncated_error(a));
831
832 archive_strncpy(&lha->filename, p + H0_FILE_NAME_OFFSET, namelen);
833 /* When extdsize == -2, A CRC16 value is not present in the header. */
834 if (extdsize >= 0) {
835 lha->crc = archive_le16dec(p + H0_FILE_NAME_OFFSET + namelen);
836 lha->setflag |= CRC_IS_SET;
837 }
838 sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2);
839
840 /* Read an extended header */
841 if (extdsize > 0) {
842 /* This extended data is set by 'LHa for UNIX' only.
843 * Maybe fixed size.
844 */
845 p += H0_FILE_NAME_OFFSET + namelen + 2;
846 if (p[0] == 'U' && extdsize == 12) {
847 /* p[1] is a minor version. */
848 lha->mtime = archive_le32dec(&p[2]);
849 lha->mode = archive_le16dec(&p[6]);
850 lha->uid = archive_le16dec(&p[8]);
851 lha->gid = archive_le16dec(&p[10]);
852 lha->setflag |= UNIX_MODE_IS_SET;
853 }
854 }
855 __archive_read_consume(a, lha->header_size);
856
857 if (sum_calculated != headersum) {
858 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
859 "LHa header sum error");
860 return (ARCHIVE_FATAL);
861 }
862
863 return (ARCHIVE_OK);
864 }
865
866 /*
867 * Header 1 format
868 *
869 * +0 +1 +2 +7 +11
870 * +---------------+----------+----------------+-------------+
871 * |header size(*1)|header sum|compression type|skip size(*2)|
872 * +---------------+----------+----------------+-------------+
873 * <---------------(*1)----------*
874 *
875 * +11 +15 +17 +19 +20 +21
876 * +-----------------+---------+---------+--------------+----------------+
877 * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=1)|
878 * +-----------------+---------+---------+--------------+----------------+
879 * *-------------------------------(*1)----------------------------------*
880 *
881 * +21 +22 +22+(*3) +22+(*3)+2 +22+(*3)+3 +22+(*3)+3+(*4)
882 * +---------------+---------+----------+-----------+-----------+
883 * |name length(*3)|file name|file CRC16| creator |padding(*4)|
884 * +---------------+---------+----------+-----------+-----------+
885 * <--(*3)->
886 * *----------------------------(*1)----------------------------*
887 *
888 * +22+(*3)+3+(*4) +22+(*3)+3+(*4)+2 +22+(*3)+3+(*4)+2+(*5)
889 * +----------------+---------------------+------------------------+
890 * |next header size| extended header(*5) | compressed data |
891 * +----------------+---------------------+------------------------+
892 * *------(*1)-----> <--------------------(*2)-------------------->
893 */
894 #define H1_HEADER_SIZE_OFFSET 0
895 #define H1_HEADER_SUM_OFFSET 1
896 #define H1_COMP_SIZE_OFFSET 7
897 #define H1_ORIG_SIZE_OFFSET 11
898 #define H1_DOS_TIME_OFFSET 15
899 #define H1_NAME_LEN_OFFSET 21
900 #define H1_FILE_NAME_OFFSET 22
901 #define H1_FIXED_SIZE 27
902 static int
lha_read_file_header_1(struct archive_read * a,struct lha * lha)903 lha_read_file_header_1(struct archive_read *a, struct lha *lha)
904 {
905 const unsigned char *p;
906 size_t extdsize;
907 int i, err, err2;
908 int namelen, padding;
909 unsigned char headersum, sum_calculated;
910
911 err = ARCHIVE_OK;
912
913 if ((p = __archive_read_ahead(a, H1_FIXED_SIZE, NULL)) == NULL)
914 return (truncated_error(a));
915
916 lha->header_size = p[H1_HEADER_SIZE_OFFSET] + 2;
917 headersum = p[H1_HEADER_SUM_OFFSET];
918 /* Note: An extended header size is included in a compsize. */
919 lha->compsize = archive_le32dec(p + H1_COMP_SIZE_OFFSET);
920 lha->origsize = archive_le32dec(p + H1_ORIG_SIZE_OFFSET);
921 lha->mtime = dos_to_unix(archive_le32dec(p + H1_DOS_TIME_OFFSET));
922 namelen = p[H1_NAME_LEN_OFFSET];
923 /* Calculate a padding size. The result will be normally 0 only(?) */
924 padding = ((int)lha->header_size) - H1_FIXED_SIZE - namelen;
925
926 if (namelen > 230 || padding < 0)
927 goto invalid;
928
929 if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL)
930 return (truncated_error(a));
931
932 for (i = 0; i < namelen; i++) {
933 if (p[i + H1_FILE_NAME_OFFSET] == 0xff)
934 goto invalid;/* Invalid filename. */
935 }
936 archive_strncpy(&lha->filename, p + H1_FILE_NAME_OFFSET, namelen);
937 lha->crc = archive_le16dec(p + H1_FILE_NAME_OFFSET + namelen);
938 lha->setflag |= CRC_IS_SET;
939
940 sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2);
941 /* Consume used bytes but not include `next header size' data
942 * since it will be consumed in lha_read_file_extended_header(). */
943 __archive_read_consume(a, lha->header_size - 2);
944
945 /* Read extended headers */
946 err2 = lha_read_file_extended_header(a, lha, NULL, 2,
947 (uint64_t)(lha->compsize + 2), &extdsize);
948 if (err2 < ARCHIVE_WARN)
949 return (err2);
950 if (err2 < err)
951 err = err2;
952 /* Get a real compressed file size. */
953 lha->compsize -= extdsize - 2;
954
955 if (lha->compsize < 0)
956 goto invalid; /* Invalid compressed file size */
957
958 if (sum_calculated != headersum) {
959 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
960 "LHa header sum error");
961 return (ARCHIVE_FATAL);
962 }
963 return (err);
964 invalid:
965 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
966 "Invalid LHa header");
967 return (ARCHIVE_FATAL);
968 }
969
970 /*
971 * Header 2 format
972 *
973 * +0 +2 +7 +11 +15
974 * +---------------+----------------+-------------------+-----------------+
975 * |header size(*1)|compression type|compressed size(*2)|uncompressed size|
976 * +---------------+----------------+-------------------+-----------------+
977 * <--------------------------------(*1)---------------------------------*
978 *
979 * +15 +19 +20 +21 +23 +24
980 * +-----------------+------------+----------------+----------+-----------+
981 * |data/time(time_t)| 0x20 fixed |header level(=2)|file CRC16| creator |
982 * +-----------------+------------+----------------+----------+-----------+
983 * *---------------------------------(*1)---------------------------------*
984 *
985 * +24 +26 +26+(*3) +26+(*3)+(*4)
986 * +----------------+-------------------+-------------+-------------------+
987 * |next header size|extended header(*3)| padding(*4) | compressed data |
988 * +----------------+-------------------+-------------+-------------------+
989 * *--------------------------(*1)-------------------> <------(*2)------->
990 *
991 */
992 #define H2_HEADER_SIZE_OFFSET 0
993 #define H2_COMP_SIZE_OFFSET 7
994 #define H2_ORIG_SIZE_OFFSET 11
995 #define H2_TIME_OFFSET 15
996 #define H2_CRC_OFFSET 21
997 #define H2_FIXED_SIZE 24
998 static int
lha_read_file_header_2(struct archive_read * a,struct lha * lha)999 lha_read_file_header_2(struct archive_read *a, struct lha *lha)
1000 {
1001 const unsigned char *p;
1002 size_t extdsize;
1003 int err, padding;
1004 uint16_t header_crc;
1005
1006 if ((p = __archive_read_ahead(a, H2_FIXED_SIZE, NULL)) == NULL)
1007 return (truncated_error(a));
1008
1009 lha->header_size =archive_le16dec(p + H2_HEADER_SIZE_OFFSET);
1010 lha->compsize = archive_le32dec(p + H2_COMP_SIZE_OFFSET);
1011 lha->origsize = archive_le32dec(p + H2_ORIG_SIZE_OFFSET);
1012 lha->mtime = archive_le32dec(p + H2_TIME_OFFSET);
1013 lha->crc = archive_le16dec(p + H2_CRC_OFFSET);
1014 lha->setflag |= CRC_IS_SET;
1015
1016 if (lha->header_size < H2_FIXED_SIZE) {
1017 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1018 "Invalid LHa header size");
1019 return (ARCHIVE_FATAL);
1020 }
1021
1022 header_crc = lha_crc16(0, p, H2_FIXED_SIZE);
1023 __archive_read_consume(a, H2_FIXED_SIZE);
1024
1025 /* Read extended headers */
1026 err = lha_read_file_extended_header(a, lha, &header_crc, 2,
1027 lha->header_size - H2_FIXED_SIZE, &extdsize);
1028 if (err < ARCHIVE_WARN)
1029 return (err);
1030
1031 /* Calculate a padding size. The result will be normally 0 or 1. */
1032 padding = (int)lha->header_size - (int)(H2_FIXED_SIZE + extdsize);
1033 if (padding > 0) {
1034 if ((p = __archive_read_ahead(a, padding, NULL)) == NULL)
1035 return (truncated_error(a));
1036 header_crc = lha_crc16(header_crc, p, padding);
1037 __archive_read_consume(a, padding);
1038 }
1039
1040 if (header_crc != lha->header_crc) {
1041 #ifndef DONT_FAIL_ON_CRC_ERROR
1042 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1043 "LHa header CRC error");
1044 return (ARCHIVE_FATAL);
1045 #endif
1046 }
1047 return (err);
1048 }
1049
1050 /*
1051 * Header 3 format
1052 *
1053 * +0 +2 +7 +11 +15
1054 * +------------+----------------+-------------------+-----------------+
1055 * | 0x04 fixed |compression type|compressed size(*2)|uncompressed size|
1056 * +------------+----------------+-------------------+-----------------+
1057 * <-------------------------------(*1)-------------------------------*
1058 *
1059 * +15 +19 +20 +21 +23 +24
1060 * +-----------------+------------+----------------+----------+-----------+
1061 * |date/time(time_t)| 0x20 fixed |header level(=3)|file CRC16| creator |
1062 * +-----------------+------------+----------------+----------+-----------+
1063 * *--------------------------------(*1)----------------------------------*
1064 *
1065 * +24 +28 +32 +32+(*3)
1066 * +---------------+----------------+-------------------+-----------------+
1067 * |header size(*1)|next header size|extended header(*3)| compressed data |
1068 * +---------------+----------------+-------------------+-----------------+
1069 * *------------------------(*1)-----------------------> <------(*2)----->
1070 *
1071 */
1072 #define H3_FIELD_LEN_OFFSET 0
1073 #define H3_COMP_SIZE_OFFSET 7
1074 #define H3_ORIG_SIZE_OFFSET 11
1075 #define H3_TIME_OFFSET 15
1076 #define H3_CRC_OFFSET 21
1077 #define H3_HEADER_SIZE_OFFSET 24
1078 #define H3_FIXED_SIZE 28
1079 static int
lha_read_file_header_3(struct archive_read * a,struct lha * lha)1080 lha_read_file_header_3(struct archive_read *a, struct lha *lha)
1081 {
1082 const unsigned char *p;
1083 size_t extdsize;
1084 int err;
1085 uint16_t header_crc;
1086
1087 if ((p = __archive_read_ahead(a, H3_FIXED_SIZE, NULL)) == NULL)
1088 return (truncated_error(a));
1089
1090 if (archive_le16dec(p + H3_FIELD_LEN_OFFSET) != 4)
1091 goto invalid;
1092 lha->header_size = archive_le32dec(p + H3_HEADER_SIZE_OFFSET);
1093 lha->compsize = archive_le32dec(p + H3_COMP_SIZE_OFFSET);
1094 lha->origsize = archive_le32dec(p + H3_ORIG_SIZE_OFFSET);
1095 lha->mtime = archive_le32dec(p + H3_TIME_OFFSET);
1096 lha->crc = archive_le16dec(p + H3_CRC_OFFSET);
1097 lha->setflag |= CRC_IS_SET;
1098
1099 if (lha->header_size < H3_FIXED_SIZE + 4)
1100 goto invalid;
1101 header_crc = lha_crc16(0, p, H3_FIXED_SIZE);
1102 __archive_read_consume(a, H3_FIXED_SIZE);
1103
1104 /* Read extended headers */
1105 err = lha_read_file_extended_header(a, lha, &header_crc, 4,
1106 lha->header_size - H3_FIXED_SIZE, &extdsize);
1107 if (err < ARCHIVE_WARN)
1108 return (err);
1109
1110 if (header_crc != lha->header_crc) {
1111 #ifndef DONT_FAIL_ON_CRC_ERROR
1112 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1113 "LHa header CRC error");
1114 return (ARCHIVE_FATAL);
1115 #endif
1116 }
1117 return (err);
1118 invalid:
1119 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1120 "Invalid LHa header");
1121 return (ARCHIVE_FATAL);
1122 }
1123
1124 /*
1125 * Extended header format
1126 *
1127 * +0 +2 +3 -- used in header 1 and 2
1128 * +0 +4 +5 -- used in header 3
1129 * +--------------+---------+-------------------+--------------+--
1130 * |ex-header size|header id| data |ex-header size| .......
1131 * +--------------+---------+-------------------+--------------+--
1132 * <-------------( ex-header size)------------> <-- next extended header --*
1133 *
1134 * If the ex-header size is zero, it is the make of the end of extended
1135 * headers.
1136 *
1137 */
1138 static int
lha_read_file_extended_header(struct archive_read * a,struct lha * lha,uint16_t * crc,int sizefield_length,uint64_t limitsize,size_t * total_size)1139 lha_read_file_extended_header(struct archive_read *a, struct lha *lha,
1140 uint16_t *crc, int sizefield_length, uint64_t limitsize, size_t *total_size)
1141 {
1142 const void *h;
1143 const unsigned char *extdheader;
1144 size_t extdsize;
1145 size_t datasize;
1146 unsigned int i;
1147 unsigned char extdtype;
1148
1149 #define EXT_HEADER_CRC 0x00 /* Header CRC and information*/
1150 #define EXT_FILENAME 0x01 /* Filename */
1151 #define EXT_DIRECTORY 0x02 /* Directory name */
1152 #define EXT_DOS_ATTR 0x40 /* MS-DOS attribute */
1153 #define EXT_TIMESTAMP 0x41 /* Windows time stamp */
1154 #define EXT_FILESIZE 0x42 /* Large file size */
1155 #define EXT_TIMEZONE 0x43 /* Time zone */
1156 #define EXT_UTF16_FILENAME 0x44 /* UTF-16 filename */
1157 #define EXT_UTF16_DIRECTORY 0x45 /* UTF-16 directory name */
1158 #define EXT_CODEPAGE 0x46 /* Codepage */
1159 #define EXT_UNIX_MODE 0x50 /* File permission */
1160 #define EXT_UNIX_GID_UID 0x51 /* gid,uid */
1161 #define EXT_UNIX_GNAME 0x52 /* Group name */
1162 #define EXT_UNIX_UNAME 0x53 /* User name */
1163 #define EXT_UNIX_MTIME 0x54 /* Modified time */
1164 #define EXT_OS2_NEW_ATTR 0x7f /* new attribute(OS/2 only) */
1165 #define EXT_NEW_ATTR 0xff /* new attribute */
1166
1167 *total_size = sizefield_length;
1168
1169 for (;;) {
1170 /* Read an extended header size. */
1171 if ((h =
1172 __archive_read_ahead(a, sizefield_length, NULL)) == NULL)
1173 return (truncated_error(a));
1174 /* Check if the size is the zero indicates the end of the
1175 * extended header. */
1176 if (sizefield_length == sizeof(uint16_t))
1177 extdsize = archive_le16dec(h);
1178 else
1179 extdsize = archive_le32dec(h);
1180 if (extdsize == 0) {
1181 /* End of extended header */
1182 if (crc != NULL)
1183 *crc = lha_crc16(*crc, h, sizefield_length);
1184 __archive_read_consume(a, sizefield_length);
1185 return (ARCHIVE_OK);
1186 }
1187
1188 /* Sanity check to the extended header size. */
1189 if (((uint64_t)*total_size + extdsize) > limitsize ||
1190 extdsize <= (size_t)sizefield_length)
1191 goto invalid;
1192
1193 /* Read the extended header. */
1194 if ((h = __archive_read_ahead(a, extdsize, NULL)) == NULL)
1195 return (truncated_error(a));
1196 *total_size += extdsize;
1197
1198 extdheader = (const unsigned char *)h;
1199 /* Get the extended header type. */
1200 extdtype = extdheader[sizefield_length];
1201 /* Calculate an extended data size. */
1202 datasize = extdsize - (1 + sizefield_length);
1203 /* Skip an extended header size field and type field. */
1204 extdheader += sizefield_length + 1;
1205
1206 if (crc != NULL && extdtype != EXT_HEADER_CRC)
1207 *crc = lha_crc16(*crc, h, extdsize);
1208 switch (extdtype) {
1209 case EXT_HEADER_CRC:
1210 /* We only use a header CRC. Following data will not
1211 * be used. */
1212 if (datasize >= 2) {
1213 lha->header_crc = archive_le16dec(extdheader);
1214 if (crc != NULL) {
1215 static const char zeros[2] = {0, 0};
1216 *crc = lha_crc16(*crc, h,
1217 extdsize - datasize);
1218 /* CRC value itself as zero */
1219 *crc = lha_crc16(*crc, zeros, 2);
1220 *crc = lha_crc16(*crc,
1221 extdheader+2, datasize - 2);
1222 }
1223 }
1224 break;
1225 case EXT_FILENAME:
1226 if (datasize == 0) {
1227 /* maybe directory header */
1228 archive_string_empty(&lha->filename);
1229 break;
1230 }
1231 if (extdheader[0] == '\0')
1232 goto invalid;
1233 archive_strncpy(&lha->filename,
1234 (const char *)extdheader, datasize);
1235 break;
1236 case EXT_UTF16_FILENAME:
1237 if (datasize == 0) {
1238 /* maybe directory header */
1239 archive_string_empty(&lha->filename);
1240 break;
1241 } else if (datasize & 1) {
1242 /* UTF-16 characters take always 2 or 4 bytes */
1243 goto invalid;
1244 }
1245 if (extdheader[0] == '\0')
1246 goto invalid;
1247 archive_string_empty(&lha->filename);
1248 archive_array_append(&lha->filename,
1249 (const char *)extdheader, datasize);
1250 /* Setup a string conversion for a filename. */
1251 lha->sconv_fname =
1252 archive_string_conversion_from_charset(&a->archive,
1253 "UTF-16LE", 1);
1254 if (lha->sconv_fname == NULL)
1255 return (ARCHIVE_FATAL);
1256 break;
1257 case EXT_DIRECTORY:
1258 if (datasize == 0 || extdheader[0] == '\0')
1259 /* no directory name data. exit this case. */
1260 goto invalid;
1261
1262 archive_strncpy(&lha->dirname,
1263 (const char *)extdheader, datasize);
1264 /*
1265 * Convert directory delimiter from 0xFF
1266 * to '/' for local system.
1267 */
1268 for (i = 0; i < lha->dirname.length; i++) {
1269 if ((unsigned char)lha->dirname.s[i] == 0xFF)
1270 lha->dirname.s[i] = '/';
1271 }
1272 /* Is last character directory separator? */
1273 if (lha->dirname.s[lha->dirname.length-1] != '/')
1274 /* invalid directory data */
1275 goto invalid;
1276 break;
1277 case EXT_UTF16_DIRECTORY:
1278 /* UTF-16 characters take always 2 or 4 bytes */
1279 if (datasize == 0 || (datasize & 1) ||
1280 extdheader[0] == '\0') {
1281 /* no directory name data. exit this case. */
1282 goto invalid;
1283 }
1284
1285 archive_string_empty(&lha->dirname);
1286 archive_array_append(&lha->dirname,
1287 (const char *)extdheader, datasize);
1288 lha->sconv_dir =
1289 archive_string_conversion_from_charset(&a->archive,
1290 "UTF-16LE", 1);
1291 if (lha->sconv_dir == NULL)
1292 return (ARCHIVE_FATAL);
1293 else {
1294 /*
1295 * Convert directory delimiter from 0xFFFF
1296 * to '/' for local system.
1297 */
1298 uint16_t dirSep;
1299 uint16_t d = 1;
1300 if (archive_be16dec(&d) == 1)
1301 dirSep = 0x2F00;
1302 else
1303 dirSep = 0x002F;
1304
1305 /* UTF-16LE character */
1306 uint16_t *utf16name =
1307 (uint16_t *)lha->dirname.s;
1308 for (i = 0; i < lha->dirname.length / 2; i++) {
1309 if (utf16name[i] == 0xFFFF) {
1310 utf16name[i] = dirSep;
1311 }
1312 }
1313 /* Is last character directory separator? */
1314 if (utf16name[lha->dirname.length / 2 - 1] !=
1315 dirSep) {
1316 /* invalid directory data */
1317 goto invalid;
1318 }
1319 }
1320 break;
1321 case EXT_DOS_ATTR:
1322 if (datasize == 2)
1323 lha->dos_attr = (unsigned char)
1324 (archive_le16dec(extdheader) & 0xff);
1325 break;
1326 case EXT_TIMESTAMP:
1327 if (datasize == (sizeof(uint64_t) * 3)) {
1328 ntfs_to_unix(archive_le64dec(extdheader),
1329 &lha->birthtime,
1330 &lha->birthtime_tv_nsec);
1331 extdheader += sizeof(uint64_t);
1332 ntfs_to_unix(archive_le64dec(extdheader),
1333 &lha->mtime,
1334 &lha->mtime_tv_nsec);
1335 extdheader += sizeof(uint64_t);
1336 ntfs_to_unix(archive_le64dec(extdheader),
1337 &lha->atime,
1338 &lha->atime_tv_nsec);
1339 lha->setflag |= BIRTHTIME_IS_SET |
1340 ATIME_IS_SET;
1341 }
1342 break;
1343 case EXT_FILESIZE:
1344 if (datasize == sizeof(uint64_t) * 2) {
1345 lha->compsize = archive_le64dec(extdheader);
1346 extdheader += sizeof(uint64_t);
1347 lha->origsize = archive_le64dec(extdheader);
1348 if (lha->compsize < 0 || lha->origsize < 0)
1349 goto invalid;
1350 }
1351 break;
1352 case EXT_CODEPAGE:
1353 /* Get an archived filename charset from codepage.
1354 * This overwrites the charset specified by
1355 * hdrcharset option. */
1356 if (datasize == sizeof(uint32_t)) {
1357 struct archive_string cp;
1358 const char *charset;
1359
1360 archive_string_init(&cp);
1361 switch (archive_le32dec(extdheader)) {
1362 case 65001: /* UTF-8 */
1363 charset = "UTF-8";
1364 break;
1365 default:
1366 archive_string_sprintf(&cp, "CP%d",
1367 (int)archive_le32dec(extdheader));
1368 charset = cp.s;
1369 break;
1370 }
1371 lha->sconv_dir =
1372 archive_string_conversion_from_charset(
1373 &(a->archive), charset, 1);
1374 lha->sconv_fname =
1375 archive_string_conversion_from_charset(
1376 &(a->archive), charset, 1);
1377 archive_string_free(&cp);
1378 if (lha->sconv_dir == NULL)
1379 return (ARCHIVE_FATAL);
1380 if (lha->sconv_fname == NULL)
1381 return (ARCHIVE_FATAL);
1382 }
1383 break;
1384 case EXT_UNIX_MODE:
1385 if (datasize == sizeof(uint16_t)) {
1386 lha->mode = archive_le16dec(extdheader);
1387 lha->setflag |= UNIX_MODE_IS_SET;
1388 }
1389 break;
1390 case EXT_UNIX_GID_UID:
1391 if (datasize == (sizeof(uint16_t) * 2)) {
1392 lha->gid = archive_le16dec(extdheader);
1393 lha->uid = archive_le16dec(extdheader+2);
1394 }
1395 break;
1396 case EXT_UNIX_GNAME:
1397 if (datasize > 0)
1398 archive_strncpy(&lha->gname,
1399 (const char *)extdheader, datasize);
1400 break;
1401 case EXT_UNIX_UNAME:
1402 if (datasize > 0)
1403 archive_strncpy(&lha->uname,
1404 (const char *)extdheader, datasize);
1405 break;
1406 case EXT_UNIX_MTIME:
1407 if (datasize == sizeof(uint32_t))
1408 lha->mtime = archive_le32dec(extdheader);
1409 break;
1410 case EXT_OS2_NEW_ATTR:
1411 /* This extended header is OS/2 depend. */
1412 if (datasize == 16) {
1413 lha->dos_attr = (unsigned char)
1414 (archive_le16dec(extdheader) & 0xff);
1415 lha->mode = archive_le16dec(extdheader+2);
1416 lha->gid = archive_le16dec(extdheader+4);
1417 lha->uid = archive_le16dec(extdheader+6);
1418 lha->birthtime = archive_le32dec(extdheader+8);
1419 lha->atime = archive_le32dec(extdheader+12);
1420 lha->setflag |= UNIX_MODE_IS_SET
1421 | BIRTHTIME_IS_SET | ATIME_IS_SET;
1422 }
1423 break;
1424 case EXT_NEW_ATTR:
1425 if (datasize == 20) {
1426 lha->mode = (mode_t)archive_le32dec(extdheader);
1427 lha->gid = archive_le32dec(extdheader+4);
1428 lha->uid = archive_le32dec(extdheader+8);
1429 lha->birthtime = archive_le32dec(extdheader+12);
1430 lha->atime = archive_le32dec(extdheader+16);
1431 lha->setflag |= UNIX_MODE_IS_SET
1432 | BIRTHTIME_IS_SET | ATIME_IS_SET;
1433 }
1434 break;
1435 case EXT_TIMEZONE: /* Not supported */
1436 break;
1437 default:
1438 break;
1439 }
1440
1441 __archive_read_consume(a, extdsize);
1442 }
1443 invalid:
1444 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1445 "Invalid extended LHa header");
1446 return (ARCHIVE_FATAL);
1447 }
1448
1449 static int
lha_end_of_entry(struct archive_read * a)1450 lha_end_of_entry(struct archive_read *a)
1451 {
1452 struct lha *lha = (struct lha *)(a->format->data);
1453 int r = ARCHIVE_EOF;
1454
1455 if (!lha->end_of_entry_cleanup) {
1456 if ((lha->setflag & CRC_IS_SET) &&
1457 lha->crc != lha->entry_crc_calculated) {
1458 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1459 "LHa data CRC error");
1460 r = ARCHIVE_WARN;
1461 }
1462
1463 /* End-of-entry cleanup done. */
1464 lha->end_of_entry_cleanup = 1;
1465 }
1466 return (r);
1467 }
1468
1469 static int
archive_read_format_lha_read_data(struct archive_read * a,const void ** buff,size_t * size,int64_t * offset)1470 archive_read_format_lha_read_data(struct archive_read *a,
1471 const void **buff, size_t *size, int64_t *offset)
1472 {
1473 struct lha *lha = (struct lha *)(a->format->data);
1474 int r;
1475
1476 if (lha->entry_unconsumed) {
1477 /* Consume as much as the decompressor actually used. */
1478 __archive_read_consume(a, lha->entry_unconsumed);
1479 lha->entry_unconsumed = 0;
1480 }
1481 if (lha->end_of_entry) {
1482 *offset = lha->entry_offset;
1483 *size = 0;
1484 *buff = NULL;
1485 return (lha_end_of_entry(a));
1486 }
1487
1488 if (lha->entry_is_compressed)
1489 r = lha_read_data_lzh(a, buff, size, offset);
1490 else
1491 /* No compression. */
1492 r = lha_read_data_none(a, buff, size, offset);
1493 return (r);
1494 }
1495
1496 /*
1497 * Read a file content in no compression.
1498 *
1499 * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets
1500 * lha->end_of_entry if it consumes all of the data.
1501 */
1502 static int
lha_read_data_none(struct archive_read * a,const void ** buff,size_t * size,int64_t * offset)1503 lha_read_data_none(struct archive_read *a, const void **buff,
1504 size_t *size, int64_t *offset)
1505 {
1506 struct lha *lha = (struct lha *)(a->format->data);
1507 ssize_t bytes_avail;
1508
1509 if (lha->entry_bytes_remaining == 0) {
1510 *buff = NULL;
1511 *size = 0;
1512 *offset = lha->entry_offset;
1513 lha->end_of_entry = 1;
1514 return (ARCHIVE_OK);
1515 }
1516 /*
1517 * Note: '1' here is a performance optimization.
1518 * Recall that the decompression layer returns a count of
1519 * available bytes; asking for more than that forces the
1520 * decompressor to combine reads by copying data.
1521 */
1522 *buff = __archive_read_ahead(a, 1, &bytes_avail);
1523 if (bytes_avail <= 0) {
1524 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1525 "Truncated LHa file data");
1526 return (ARCHIVE_FATAL);
1527 }
1528 if (bytes_avail > lha->entry_bytes_remaining)
1529 bytes_avail = (ssize_t)lha->entry_bytes_remaining;
1530 lha->entry_crc_calculated =
1531 lha_crc16(lha->entry_crc_calculated, *buff, bytes_avail);
1532 *size = bytes_avail;
1533 *offset = lha->entry_offset;
1534 lha->entry_offset += bytes_avail;
1535 lha->entry_bytes_remaining -= bytes_avail;
1536 if (lha->entry_bytes_remaining == 0)
1537 lha->end_of_entry = 1;
1538 lha->entry_unconsumed = bytes_avail;
1539 return (ARCHIVE_OK);
1540 }
1541
1542 /*
1543 * Read a file content in LZHUFF encoding.
1544 *
1545 * Returns ARCHIVE_OK if successful, returns ARCHIVE_WARN if compression is
1546 * unsupported, ARCHIVE_FATAL otherwise, sets lha->end_of_entry if it consumes
1547 * all of the data.
1548 */
1549 static int
lha_read_data_lzh(struct archive_read * a,const void ** buff,size_t * size,int64_t * offset)1550 lha_read_data_lzh(struct archive_read *a, const void **buff,
1551 size_t *size, int64_t *offset)
1552 {
1553 struct lha *lha = (struct lha *)(a->format->data);
1554 ssize_t bytes_avail;
1555 int r;
1556
1557 /* If we haven't yet read any data, initialize the decompressor. */
1558 if (!lha->decompress_init) {
1559 r = lzh_decode_init(&(lha->strm), lha->method);
1560 switch (r) {
1561 case ARCHIVE_OK:
1562 break;
1563 case ARCHIVE_FAILED:
1564 /* Unsupported compression. */
1565 *buff = NULL;
1566 *size = 0;
1567 *offset = 0;
1568 archive_set_error(&a->archive,
1569 ARCHIVE_ERRNO_FILE_FORMAT,
1570 "Unsupported lzh compression method -%c%c%c-",
1571 lha->method[0], lha->method[1], lha->method[2]);
1572 /* We know compressed size; just skip it. */
1573 archive_read_format_lha_read_data_skip(a);
1574 return (ARCHIVE_WARN);
1575 default:
1576 archive_set_error(&a->archive, ENOMEM,
1577 "Couldn't allocate memory "
1578 "for lzh decompression");
1579 return (ARCHIVE_FATAL);
1580 }
1581 /* We've initialized decompression for this stream. */
1582 lha->decompress_init = 1;
1583 lha->strm.avail_out = 0;
1584 lha->strm.total_out = 0;
1585 }
1586
1587 /*
1588 * Note: '1' here is a performance optimization.
1589 * Recall that the decompression layer returns a count of
1590 * available bytes; asking for more than that forces the
1591 * decompressor to combine reads by copying data.
1592 */
1593 lha->strm.next_in = __archive_read_ahead(a, 1, &bytes_avail);
1594 if (bytes_avail <= 0) {
1595 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1596 "Truncated LHa file body");
1597 return (ARCHIVE_FATAL);
1598 }
1599 if (bytes_avail > lha->entry_bytes_remaining)
1600 bytes_avail = (ssize_t)lha->entry_bytes_remaining;
1601
1602 lha->strm.avail_in = (int)bytes_avail;
1603 lha->strm.total_in = 0;
1604 lha->strm.avail_out = 0;
1605
1606 r = lzh_decode(&(lha->strm), bytes_avail == lha->entry_bytes_remaining);
1607 switch (r) {
1608 case ARCHIVE_OK:
1609 break;
1610 case ARCHIVE_EOF:
1611 lha->end_of_entry = 1;
1612 break;
1613 default:
1614 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1615 "Bad lzh data");
1616 return (ARCHIVE_FAILED);
1617 }
1618 lha->entry_unconsumed = lha->strm.total_in;
1619 lha->entry_bytes_remaining -= lha->strm.total_in;
1620
1621 if (lha->strm.avail_out) {
1622 *offset = lha->entry_offset;
1623 *size = lha->strm.avail_out;
1624 *buff = lha->strm.ref_ptr;
1625 lha->entry_crc_calculated =
1626 lha_crc16(lha->entry_crc_calculated, *buff, *size);
1627 lha->entry_offset += *size;
1628 } else {
1629 *offset = lha->entry_offset;
1630 *size = 0;
1631 *buff = NULL;
1632 if (lha->end_of_entry)
1633 return (lha_end_of_entry(a));
1634 }
1635 return (ARCHIVE_OK);
1636 }
1637
1638 /*
1639 * Skip a file content.
1640 */
1641 static int
archive_read_format_lha_read_data_skip(struct archive_read * a)1642 archive_read_format_lha_read_data_skip(struct archive_read *a)
1643 {
1644 struct lha *lha;
1645 int64_t bytes_skipped;
1646
1647 lha = (struct lha *)(a->format->data);
1648
1649 if (lha->entry_unconsumed) {
1650 /* Consume as much as the decompressor actually used. */
1651 __archive_read_consume(a, lha->entry_unconsumed);
1652 lha->entry_unconsumed = 0;
1653 }
1654
1655 /* if we've already read to end of data, we're done. */
1656 if (lha->end_of_entry_cleanup)
1657 return (ARCHIVE_OK);
1658
1659 /*
1660 * If the length is at the beginning, we can skip the
1661 * compressed data much more quickly.
1662 */
1663 bytes_skipped = __archive_read_consume(a, lha->entry_bytes_remaining);
1664 if (bytes_skipped < 0)
1665 return (ARCHIVE_FATAL);
1666
1667 /* This entry is finished and done. */
1668 lha->end_of_entry_cleanup = lha->end_of_entry = 1;
1669 return (ARCHIVE_OK);
1670 }
1671
1672 static int
archive_read_format_lha_cleanup(struct archive_read * a)1673 archive_read_format_lha_cleanup(struct archive_read *a)
1674 {
1675 struct lha *lha = (struct lha *)(a->format->data);
1676
1677 lzh_decode_free(&(lha->strm));
1678 archive_string_free(&(lha->dirname));
1679 archive_string_free(&(lha->filename));
1680 archive_string_free(&(lha->uname));
1681 archive_string_free(&(lha->gname));
1682 archive_wstring_free(&(lha->ws));
1683 free(lha);
1684 (a->format->data) = NULL;
1685 return (ARCHIVE_OK);
1686 }
1687
1688 /*
1689 * 'LHa for UNIX' utility has archived a symbolic-link name after
1690 * a pathname with '|' character.
1691 * This function extracts the symbolic-link name from the pathname.
1692 *
1693 * example.
1694 * 1. a symbolic-name is 'aaa/bb/cc'
1695 * 2. a filename is 'xxx/bbb'
1696 * then an archived pathname is 'xxx/bbb|aaa/bb/cc'
1697 */
1698 static int
lha_parse_linkname(struct archive_wstring * linkname,struct archive_wstring * pathname)1699 lha_parse_linkname(struct archive_wstring *linkname,
1700 struct archive_wstring *pathname)
1701 {
1702 wchar_t * linkptr;
1703 size_t symlen;
1704
1705 linkptr = wcschr(pathname->s, L'|');
1706 if (linkptr != NULL) {
1707 symlen = wcslen(linkptr + 1);
1708 archive_wstrncpy(linkname, linkptr+1, symlen);
1709
1710 *linkptr = 0;
1711 pathname->length = wcslen(pathname->s);
1712
1713 return (1);
1714 }
1715 return (0);
1716 }
1717
1718 static unsigned char
lha_calcsum(unsigned char sum,const void * pp,int offset,size_t size)1719 lha_calcsum(unsigned char sum, const void *pp, int offset, size_t size)
1720 {
1721 unsigned char const *p = (unsigned char const *)pp;
1722
1723 p += offset;
1724 for (;size > 0; --size)
1725 sum += *p++;
1726 return (sum);
1727 }
1728
1729 static uint16_t crc16tbl[2][256];
1730 static void
lha_crc16_init(void)1731 lha_crc16_init(void)
1732 {
1733 unsigned int i;
1734 static int crc16init = 0;
1735
1736 if (crc16init)
1737 return;
1738 crc16init = 1;
1739
1740 for (i = 0; i < 256; i++) {
1741 unsigned int j;
1742 uint16_t crc = (uint16_t)i;
1743 for (j = 8; j; j--)
1744 crc = (crc >> 1) ^ ((crc & 1) * 0xA001);
1745 crc16tbl[0][i] = crc;
1746 }
1747
1748 for (i = 0; i < 256; i++) {
1749 crc16tbl[1][i] = (crc16tbl[0][i] >> 8)
1750 ^ crc16tbl[0][crc16tbl[0][i] & 0xff];
1751 }
1752 }
1753
1754 static uint16_t
lha_crc16(uint16_t crc,const void * pp,size_t len)1755 lha_crc16(uint16_t crc, const void *pp, size_t len)
1756 {
1757 const unsigned char *p = (const unsigned char *)pp;
1758 const uint16_t *buff;
1759 const union {
1760 uint32_t i;
1761 char c[4];
1762 } u = { 0x01020304 };
1763
1764 if (len == 0)
1765 return crc;
1766
1767 /* Process unaligned address. */
1768 if (((uintptr_t)p) & (uintptr_t)0x1) {
1769 crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff];
1770 len--;
1771 }
1772 buff = (const uint16_t *)p;
1773 /*
1774 * Modern C compiler such as GCC does not unroll automatically yet
1775 * without unrolling pragma, and Clang is so. So we should
1776 * unroll this loop for its performance.
1777 */
1778 for (;len >= 8; len -= 8) {
1779 /* This if statement expects compiler optimization will
1780 * remove the statement which will not be executed. */
1781 #undef bswap16
1782 #ifndef __has_builtin
1783 #define __has_builtin(x) 0
1784 #endif
1785 #if defined(_MSC_VER) && _MSC_VER >= 1400 /* Visual Studio */
1786 # define bswap16(x) _byteswap_ushort(x)
1787 #elif defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ > 4)
1788 /* GCC 4.8 and later has __builtin_bswap16() */
1789 # define bswap16(x) __builtin_bswap16(x)
1790 #elif defined(__clang__) && __has_builtin(__builtin_bswap16)
1791 /* Newer clang versions have __builtin_bswap16() */
1792 # define bswap16(x) __builtin_bswap16(x)
1793 #else
1794 # define bswap16(x) ((((x) >> 8) & 0xff) | ((x) << 8))
1795 #endif
1796 #define CRC16W do { \
1797 if(u.c[0] == 1) { /* Big endian */ \
1798 crc ^= bswap16(*buff); buff++; \
1799 } else \
1800 crc ^= *buff++; \
1801 crc = crc16tbl[1][crc & 0xff] ^ crc16tbl[0][crc >> 8];\
1802 } while (0)
1803 CRC16W;
1804 CRC16W;
1805 CRC16W;
1806 CRC16W;
1807 #undef CRC16W
1808 #undef bswap16
1809 }
1810
1811 p = (const unsigned char *)buff;
1812 for (;len; len--) {
1813 crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff];
1814 }
1815 return crc;
1816 }
1817
1818 /*
1819 * Initialize LZHUF decoder.
1820 *
1821 * Returns ARCHIVE_OK if initialization was successful.
1822 * Returns ARCHIVE_FAILED if method is unsupported.
1823 * Returns ARCHIVE_FATAL if initialization failed; memory allocation
1824 * error occurred.
1825 */
1826 static int
lzh_decode_init(struct lzh_stream * strm,const char * method)1827 lzh_decode_init(struct lzh_stream *strm, const char *method)
1828 {
1829 struct lzh_dec *ds;
1830 int w_bits, w_size;
1831
1832 if (strm->ds == NULL) {
1833 strm->ds = calloc(1, sizeof(*strm->ds));
1834 if (strm->ds == NULL)
1835 return (ARCHIVE_FATAL);
1836 }
1837 ds = strm->ds;
1838 ds->error = ARCHIVE_FAILED;
1839 if (method == NULL || method[0] != 'l' || method[1] != 'h')
1840 return (ARCHIVE_FAILED);
1841 switch (method[2]) {
1842 case '5':
1843 w_bits = 13;/* 8KiB for window */
1844 break;
1845 case '6':
1846 w_bits = 15;/* 32KiB for window */
1847 break;
1848 case '7':
1849 w_bits = 16;/* 64KiB for window */
1850 break;
1851 default:
1852 return (ARCHIVE_FAILED);/* Not supported. */
1853 }
1854 ds->error = ARCHIVE_FATAL;
1855 /* Expand a window size up to 128 KiB for decompressing process
1856 * performance whatever its original window size is. */
1857 ds->w_size = 1U << 17;
1858 ds->w_mask = ds->w_size -1;
1859 if (ds->w_buff == NULL) {
1860 ds->w_buff = malloc(ds->w_size);
1861 if (ds->w_buff == NULL)
1862 return (ARCHIVE_FATAL);
1863 }
1864 w_size = 1U << w_bits;
1865 memset(ds->w_buff + ds->w_size - w_size, 0x20, w_size);
1866 ds->w_pos = 0;
1867 ds->state = 0;
1868 ds->pos_pt_len_size = w_bits + 1;
1869 ds->pos_pt_len_bits = (w_bits == 15 || w_bits == 16)? 5: 4;
1870 ds->literal_pt_len_size = PT_BITLEN_SIZE;
1871 ds->literal_pt_len_bits = 5;
1872 ds->br.cache_buffer = 0;
1873 ds->br.cache_avail = 0;
1874
1875 if (lzh_huffman_init(&(ds->lt), LT_BITLEN_SIZE, 16)
1876 != ARCHIVE_OK)
1877 return (ARCHIVE_FATAL);
1878 ds->lt.len_bits = 9;
1879 if (lzh_huffman_init(&(ds->pt), PT_BITLEN_SIZE, 16)
1880 != ARCHIVE_OK)
1881 return (ARCHIVE_FATAL);
1882 ds->error = 0;
1883
1884 return (ARCHIVE_OK);
1885 }
1886
1887 /*
1888 * Release LZHUF decoder.
1889 */
1890 static void
lzh_decode_free(struct lzh_stream * strm)1891 lzh_decode_free(struct lzh_stream *strm)
1892 {
1893
1894 if (strm->ds == NULL)
1895 return;
1896 free(strm->ds->w_buff);
1897 lzh_huffman_free(&(strm->ds->lt));
1898 lzh_huffman_free(&(strm->ds->pt));
1899 free(strm->ds);
1900 strm->ds = NULL;
1901 }
1902
1903 /*
1904 * Bit stream reader.
1905 */
1906 /* Check that the cache buffer has enough bits. */
1907 #define lzh_br_has(br, n) ((br)->cache_avail >= n)
1908 /* Get compressed data by bit. */
1909 #define lzh_br_bits(br, n) \
1910 (((uint16_t)((br)->cache_buffer >> \
1911 ((br)->cache_avail - (n)))) & cache_masks[n])
1912 #define lzh_br_bits_forced(br, n) \
1913 (((uint16_t)((br)->cache_buffer << \
1914 ((n) - (br)->cache_avail))) & cache_masks[n])
1915 /* Read ahead to make sure the cache buffer has enough compressed data we
1916 * will use.
1917 * True : completed, there is enough data in the cache buffer.
1918 * False : we met that strm->next_in is empty, we have to get following
1919 * bytes. */
1920 #define lzh_br_read_ahead_0(strm, br, n) \
1921 (lzh_br_has(br, (n)) || lzh_br_fillup(strm, br))
1922 /* True : the cache buffer has some bits as much as we need.
1923 * False : there are no enough bits in the cache buffer to be used,
1924 * we have to get following bytes if we could. */
1925 #define lzh_br_read_ahead(strm, br, n) \
1926 (lzh_br_read_ahead_0((strm), (br), (n)) || lzh_br_has((br), (n)))
1927
1928 /* Notify how many bits we consumed. */
1929 #define lzh_br_consume(br, n) ((br)->cache_avail -= (n))
1930 #define lzh_br_unconsume(br, n) ((br)->cache_avail += (n))
1931
1932 static const uint16_t cache_masks[] = {
1933 0x0000, 0x0001, 0x0003, 0x0007,
1934 0x000F, 0x001F, 0x003F, 0x007F,
1935 0x00FF, 0x01FF, 0x03FF, 0x07FF,
1936 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF,
1937 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF
1938 };
1939
1940 /*
1941 * Shift away used bits in the cache data and fill it up with following bits.
1942 * Call this when cache buffer does not have enough bits you need.
1943 *
1944 * Returns 1 if the cache buffer is full.
1945 * Returns 0 if the cache buffer is not full; input buffer is empty.
1946 */
1947 static int
lzh_br_fillup(struct lzh_stream * strm,struct lzh_br * br)1948 lzh_br_fillup(struct lzh_stream *strm, struct lzh_br *br)
1949 {
1950 int n = CACHE_BITS - br->cache_avail;
1951
1952 for (;;) {
1953 const int x = n >> 3;
1954 if (strm->avail_in >= x) {
1955 switch (x) {
1956 case 8:
1957 br->cache_buffer =
1958 ((uint64_t)strm->next_in[0]) << 56 |
1959 ((uint64_t)strm->next_in[1]) << 48 |
1960 ((uint64_t)strm->next_in[2]) << 40 |
1961 ((uint64_t)strm->next_in[3]) << 32 |
1962 ((uint32_t)strm->next_in[4]) << 24 |
1963 ((uint32_t)strm->next_in[5]) << 16 |
1964 ((uint32_t)strm->next_in[6]) << 8 |
1965 (uint32_t)strm->next_in[7];
1966 strm->next_in += 8;
1967 strm->avail_in -= 8;
1968 br->cache_avail += 8 * 8;
1969 return (1);
1970 case 7:
1971 br->cache_buffer =
1972 (br->cache_buffer << 56) |
1973 ((uint64_t)strm->next_in[0]) << 48 |
1974 ((uint64_t)strm->next_in[1]) << 40 |
1975 ((uint64_t)strm->next_in[2]) << 32 |
1976 ((uint64_t)strm->next_in[3]) << 24 |
1977 ((uint64_t)strm->next_in[4]) << 16 |
1978 ((uint64_t)strm->next_in[5]) << 8 |
1979 (uint64_t)strm->next_in[6];
1980 strm->next_in += 7;
1981 strm->avail_in -= 7;
1982 br->cache_avail += 7 * 8;
1983 return (1);
1984 case 6:
1985 br->cache_buffer =
1986 (br->cache_buffer << 48) |
1987 ((uint64_t)strm->next_in[0]) << 40 |
1988 ((uint64_t)strm->next_in[1]) << 32 |
1989 ((uint64_t)strm->next_in[2]) << 24 |
1990 ((uint64_t)strm->next_in[3]) << 16 |
1991 ((uint64_t)strm->next_in[4]) << 8 |
1992 (uint64_t)strm->next_in[5];
1993 strm->next_in += 6;
1994 strm->avail_in -= 6;
1995 br->cache_avail += 6 * 8;
1996 return (1);
1997 case 0:
1998 /* We have enough compressed data in
1999 * the cache buffer.*/
2000 return (1);
2001 default:
2002 break;
2003 }
2004 }
2005 if (strm->avail_in == 0) {
2006 /* There is not enough compressed data to fill up the
2007 * cache buffer. */
2008 return (0);
2009 }
2010 br->cache_buffer =
2011 (br->cache_buffer << 8) | *strm->next_in++;
2012 strm->avail_in--;
2013 br->cache_avail += 8;
2014 n -= 8;
2015 }
2016 }
2017
2018 /*
2019 * Decode LZHUF.
2020 *
2021 * 1. Returns ARCHIVE_OK if output buffer or input buffer are empty.
2022 * Please set available buffer and call this function again.
2023 * 2. Returns ARCHIVE_EOF if decompression has been completed.
2024 * 3. Returns ARCHIVE_FAILED if an error occurred; compressed data
2025 * is broken or you do not set 'last' flag properly.
2026 * 4. 'last' flag is very important, you must set 1 to the flag if there
2027 * is no input data. The lha compressed data format does not provide how
2028 * to know the compressed data is really finished.
2029 * Note: lha command utility check if the total size of output bytes is
2030 * reached the uncompressed size recorded in its header. it does not mind
2031 * that the decoding process is properly finished.
2032 * GNU ZIP can decompress another compressed file made by SCO LZH compress.
2033 * it handles EOF as null to fill read buffer with zero until the decoding
2034 * process meet 2 bytes of zeros at reading a size of a next chunk, so the
2035 * zeros are treated as the mark of the end of the data although the zeros
2036 * is dummy, not the file data.
2037 */
2038 static int lzh_read_blocks(struct lzh_stream *, int);
2039 static int lzh_decode_blocks(struct lzh_stream *, int);
2040 #define ST_RD_BLOCK 0
2041 #define ST_RD_PT_1 1
2042 #define ST_RD_PT_2 2
2043 #define ST_RD_PT_3 3
2044 #define ST_RD_PT_4 4
2045 #define ST_RD_LITERAL_1 5
2046 #define ST_RD_LITERAL_2 6
2047 #define ST_RD_LITERAL_3 7
2048 #define ST_RD_POS_DATA_1 8
2049 #define ST_GET_LITERAL 9
2050 #define ST_GET_POS_1 10
2051 #define ST_GET_POS_2 11
2052 #define ST_COPY_DATA 12
2053
2054 static int
lzh_decode(struct lzh_stream * strm,int last)2055 lzh_decode(struct lzh_stream *strm, int last)
2056 {
2057 struct lzh_dec *ds = strm->ds;
2058 int avail_in;
2059 int r;
2060
2061 if (ds->error)
2062 return (ds->error);
2063
2064 avail_in = strm->avail_in;
2065 do {
2066 if (ds->state < ST_GET_LITERAL)
2067 r = lzh_read_blocks(strm, last);
2068 else
2069 r = lzh_decode_blocks(strm, last);
2070 } while (r == 100);
2071 strm->total_in += avail_in - strm->avail_in;
2072 return (r);
2073 }
2074
2075 static void
lzh_emit_window(struct lzh_stream * strm,size_t s)2076 lzh_emit_window(struct lzh_stream *strm, size_t s)
2077 {
2078 strm->ref_ptr = strm->ds->w_buff;
2079 strm->avail_out = (int)s;
2080 strm->total_out += s;
2081 }
2082
2083 static int
lzh_read_blocks(struct lzh_stream * strm,int last)2084 lzh_read_blocks(struct lzh_stream *strm, int last)
2085 {
2086 struct lzh_dec *ds = strm->ds;
2087 struct lzh_br *br = &(ds->br);
2088 int c = 0, i;
2089 unsigned rbits;
2090
2091 for (;;) {
2092 switch (ds->state) {
2093 case ST_RD_BLOCK:
2094 /*
2095 * Read a block number indicates how many blocks
2096 * we will handle. The block is composed of a
2097 * literal and a match, sometimes a literal only
2098 * in particular, there are no reference data at
2099 * the beginning of the decompression.
2100 */
2101 if (!lzh_br_read_ahead_0(strm, br, 16)) {
2102 if (!last)
2103 /* We need following data. */
2104 return (ARCHIVE_OK);
2105 if (lzh_br_has(br, 8)) {
2106 /*
2107 * It seems there are extra bits.
2108 * 1. Compressed data is broken.
2109 * 2. `last' flag does not properly
2110 * set.
2111 */
2112 goto failed;
2113 }
2114 if (ds->w_pos > 0) {
2115 lzh_emit_window(strm, ds->w_pos);
2116 ds->w_pos = 0;
2117 return (ARCHIVE_OK);
2118 }
2119 /* End of compressed data; we have completely
2120 * handled all compressed data. */
2121 return (ARCHIVE_EOF);
2122 }
2123 ds->blocks_avail = lzh_br_bits(br, 16);
2124 if (ds->blocks_avail == 0)
2125 goto failed;
2126 lzh_br_consume(br, 16);
2127 /*
2128 * Read a literal table compressed in huffman
2129 * coding.
2130 */
2131 ds->pt.len_size = ds->literal_pt_len_size;
2132 ds->pt.len_bits = ds->literal_pt_len_bits;
2133 ds->reading_position = 0;
2134 /* FALL THROUGH */
2135 case ST_RD_PT_1:
2136 /* Note: ST_RD_PT_1, ST_RD_PT_2 and ST_RD_PT_4 are
2137 * used in reading both a literal table and a
2138 * position table. */
2139 if (!lzh_br_read_ahead(strm, br, ds->pt.len_bits)) {
2140 if (last)
2141 goto failed;/* Truncated data. */
2142 ds->state = ST_RD_PT_1;
2143 return (ARCHIVE_OK);
2144 }
2145 ds->pt.len_avail = lzh_br_bits(br, ds->pt.len_bits);
2146 lzh_br_consume(br, ds->pt.len_bits);
2147 /* FALL THROUGH */
2148 case ST_RD_PT_2:
2149 if (ds->pt.len_avail == 0) {
2150 /* There is no bitlen. */
2151 if (!lzh_br_read_ahead(strm, br,
2152 ds->pt.len_bits)) {
2153 if (last)
2154 goto failed;/* Truncated data.*/
2155 ds->state = ST_RD_PT_2;
2156 return (ARCHIVE_OK);
2157 }
2158 if (!lzh_make_fake_table(&(ds->pt),
2159 lzh_br_bits(br, ds->pt.len_bits)))
2160 goto failed;/* Invalid data. */
2161 lzh_br_consume(br, ds->pt.len_bits);
2162 if (ds->reading_position)
2163 ds->state = ST_GET_LITERAL;
2164 else
2165 ds->state = ST_RD_LITERAL_1;
2166 break;
2167 } else if (ds->pt.len_avail > ds->pt.len_size)
2168 goto failed;/* Invalid data. */
2169 ds->loop = 0;
2170 memset(ds->pt.freq, 0, sizeof(ds->pt.freq));
2171 if (ds->pt.len_avail < 3 ||
2172 ds->pt.len_size == ds->pos_pt_len_size) {
2173 ds->state = ST_RD_PT_4;
2174 break;
2175 }
2176 /* FALL THROUGH */
2177 case ST_RD_PT_3:
2178 ds->loop = lzh_read_pt_bitlen(strm, ds->loop, 3);
2179 if (ds->loop < 3) {
2180 if (ds->loop < 0 || last)
2181 goto failed;/* Invalid data. */
2182 /* Not completed, get following data. */
2183 ds->state = ST_RD_PT_3;
2184 return (ARCHIVE_OK);
2185 }
2186 /* There are some null in bitlen of the literal. */
2187 if (!lzh_br_read_ahead(strm, br, 2)) {
2188 if (last)
2189 goto failed;/* Truncated data. */
2190 ds->state = ST_RD_PT_3;
2191 return (ARCHIVE_OK);
2192 }
2193 c = lzh_br_bits(br, 2);
2194 lzh_br_consume(br, 2);
2195 if (c > ds->pt.len_avail - 3)
2196 goto failed;/* Invalid data. */
2197 for (i = 3; c-- > 0 ;)
2198 ds->pt.bitlen[i++] = 0;
2199 ds->loop = i;
2200 /* FALL THROUGH */
2201 case ST_RD_PT_4:
2202 ds->loop = lzh_read_pt_bitlen(strm, ds->loop,
2203 ds->pt.len_avail);
2204 if (ds->loop < ds->pt.len_avail) {
2205 if (ds->loop < 0 || last)
2206 goto failed;/* Invalid data. */
2207 /* Not completed, get following data. */
2208 ds->state = ST_RD_PT_4;
2209 return (ARCHIVE_OK);
2210 }
2211 if (!lzh_make_huffman_table(&(ds->pt)))
2212 goto failed;/* Invalid data */
2213 if (ds->reading_position) {
2214 ds->state = ST_GET_LITERAL;
2215 break;
2216 }
2217 /* FALL THROUGH */
2218 case ST_RD_LITERAL_1:
2219 if (!lzh_br_read_ahead(strm, br, ds->lt.len_bits)) {
2220 if (last)
2221 goto failed;/* Truncated data. */
2222 ds->state = ST_RD_LITERAL_1;
2223 return (ARCHIVE_OK);
2224 }
2225 ds->lt.len_avail = lzh_br_bits(br, ds->lt.len_bits);
2226 lzh_br_consume(br, ds->lt.len_bits);
2227 /* FALL THROUGH */
2228 case ST_RD_LITERAL_2:
2229 if (ds->lt.len_avail == 0) {
2230 /* There is no bitlen. */
2231 if (!lzh_br_read_ahead(strm, br,
2232 ds->lt.len_bits)) {
2233 if (last)
2234 goto failed;/* Truncated data.*/
2235 ds->state = ST_RD_LITERAL_2;
2236 return (ARCHIVE_OK);
2237 }
2238 if (!lzh_make_fake_table(&(ds->lt),
2239 lzh_br_bits(br, ds->lt.len_bits)))
2240 goto failed;/* Invalid data */
2241 lzh_br_consume(br, ds->lt.len_bits);
2242 ds->state = ST_RD_POS_DATA_1;
2243 break;
2244 } else if (ds->lt.len_avail > ds->lt.len_size)
2245 goto failed;/* Invalid data */
2246 ds->loop = 0;
2247 memset(ds->lt.freq, 0, sizeof(ds->lt.freq));
2248 /* FALL THROUGH */
2249 case ST_RD_LITERAL_3:
2250 i = ds->loop;
2251 while (i < ds->lt.len_avail) {
2252 if (!lzh_br_read_ahead(strm, br,
2253 ds->pt.max_bits)) {
2254 if (last)
2255 goto failed;/* Truncated data.*/
2256 ds->loop = i;
2257 ds->state = ST_RD_LITERAL_3;
2258 return (ARCHIVE_OK);
2259 }
2260 rbits = lzh_br_bits(br, ds->pt.max_bits);
2261 c = lzh_decode_huffman(&(ds->pt), rbits);
2262 if (c > 2) {
2263 /* Note: 'c' will never be more than
2264 * eighteen since it's limited by
2265 * PT_BITLEN_SIZE, which is being set
2266 * to ds->pt.len_size through
2267 * ds->literal_pt_len_size. */
2268 lzh_br_consume(br, ds->pt.bitlen[c]);
2269 c -= 2;
2270 ds->lt.freq[c]++;
2271 ds->lt.bitlen[i++] = c;
2272 } else if (c == 0) {
2273 lzh_br_consume(br, ds->pt.bitlen[c]);
2274 ds->lt.bitlen[i++] = 0;
2275 } else {
2276 /* c == 1 or c == 2 */
2277 int n = (c == 1)?4:9;
2278 if (!lzh_br_read_ahead(strm, br,
2279 ds->pt.bitlen[c] + n)) {
2280 if (last) /* Truncated data. */
2281 goto failed;
2282 ds->loop = i;
2283 ds->state = ST_RD_LITERAL_3;
2284 return (ARCHIVE_OK);
2285 }
2286 lzh_br_consume(br, ds->pt.bitlen[c]);
2287 c = lzh_br_bits(br, n);
2288 lzh_br_consume(br, n);
2289 c += (n == 4)?3:20;
2290 if (i + c > ds->lt.len_avail)
2291 goto failed;/* Invalid data */
2292 memset(&(ds->lt.bitlen[i]), 0, c);
2293 i += c;
2294 }
2295 }
2296 if (i > ds->lt.len_avail ||
2297 !lzh_make_huffman_table(&(ds->lt)))
2298 goto failed;/* Invalid data */
2299 /* FALL THROUGH */
2300 case ST_RD_POS_DATA_1:
2301 /*
2302 * Read a position table compressed in huffman
2303 * coding.
2304 */
2305 ds->pt.len_size = ds->pos_pt_len_size;
2306 ds->pt.len_bits = ds->pos_pt_len_bits;
2307 ds->reading_position = 1;
2308 ds->state = ST_RD_PT_1;
2309 break;
2310 case ST_GET_LITERAL:
2311 return (100);
2312 }
2313 }
2314 failed:
2315 return (ds->error = ARCHIVE_FAILED);
2316 }
2317
2318 static int
lzh_decode_blocks(struct lzh_stream * strm,int last)2319 lzh_decode_blocks(struct lzh_stream *strm, int last)
2320 {
2321 struct lzh_dec *ds = strm->ds;
2322 struct lzh_br bre = ds->br;
2323 struct huffman *lt = &(ds->lt);
2324 struct huffman *pt = &(ds->pt);
2325 unsigned char *w_buff = ds->w_buff;
2326 unsigned char *lt_bitlen = lt->bitlen;
2327 unsigned char *pt_bitlen = pt->bitlen;
2328 int blocks_avail = ds->blocks_avail, c = 0;
2329 int copy_len = ds->copy_len, copy_pos = ds->copy_pos;
2330 int w_pos = ds->w_pos, w_mask = ds->w_mask, w_size = ds->w_size;
2331 int lt_max_bits = lt->max_bits, pt_max_bits = pt->max_bits;
2332 int state = ds->state;
2333
2334 for (;;) {
2335 switch (state) {
2336 case ST_GET_LITERAL:
2337 for (;;) {
2338 if (blocks_avail == 0) {
2339 /* We have decoded all blocks.
2340 * Let's handle next blocks. */
2341 ds->state = ST_RD_BLOCK;
2342 ds->br = bre;
2343 ds->blocks_avail = 0;
2344 ds->w_pos = w_pos;
2345 ds->copy_pos = 0;
2346 return (100);
2347 }
2348
2349 /* lzh_br_read_ahead() always tries to fill the
2350 * cache buffer up. In specific situation we
2351 * are close to the end of the data, the cache
2352 * buffer will not be full and thus we have to
2353 * determine if the cache buffer has some bits
2354 * as much as we need after lzh_br_read_ahead()
2355 * failed. */
2356 if (!lzh_br_read_ahead(strm, &bre,
2357 lt_max_bits)) {
2358 if (!last)
2359 goto next_data;
2360 /* Remaining bits are less than
2361 * maximum bits(lt.max_bits) but maybe
2362 * it still remains as much as we need,
2363 * so we should try to use it with
2364 * dummy bits. */
2365 c = lzh_decode_huffman(lt,
2366 lzh_br_bits_forced(&bre,
2367 lt_max_bits));
2368 lzh_br_consume(&bre, lt_bitlen[c]);
2369 if (!lzh_br_has(&bre, 0))
2370 goto failed;/* Over read. */
2371 } else {
2372 c = lzh_decode_huffman(lt,
2373 lzh_br_bits(&bre, lt_max_bits));
2374 lzh_br_consume(&bre, lt_bitlen[c]);
2375 }
2376 blocks_avail--;
2377 if (c > UCHAR_MAX)
2378 /* Current block is a match data. */
2379 break;
2380 /*
2381 * 'c' is exactly a literal code.
2382 */
2383 /* Save a decoded code to reference it
2384 * afterward. */
2385 w_buff[w_pos] = c;
2386 if (++w_pos >= w_size) {
2387 w_pos = 0;
2388 lzh_emit_window(strm, w_size);
2389 goto next_data;
2390 }
2391 }
2392 /* 'c' is the length of a match pattern we have
2393 * already extracted, which has be stored in
2394 * window(ds->w_buff). */
2395 copy_len = c - (UCHAR_MAX + 1) + MINMATCH;
2396 /* FALL THROUGH */
2397 case ST_GET_POS_1:
2398 /*
2399 * Get a reference position.
2400 */
2401 if (!lzh_br_read_ahead(strm, &bre, pt_max_bits)) {
2402 if (!last) {
2403 state = ST_GET_POS_1;
2404 ds->copy_len = copy_len;
2405 goto next_data;
2406 }
2407 copy_pos = lzh_decode_huffman(pt,
2408 lzh_br_bits_forced(&bre, pt_max_bits));
2409 lzh_br_consume(&bre, pt_bitlen[copy_pos]);
2410 if (!lzh_br_has(&bre, 0))
2411 goto failed;/* Over read. */
2412 } else {
2413 copy_pos = lzh_decode_huffman(pt,
2414 lzh_br_bits(&bre, pt_max_bits));
2415 lzh_br_consume(&bre, pt_bitlen[copy_pos]);
2416 }
2417 /* FALL THROUGH */
2418 case ST_GET_POS_2:
2419 if (copy_pos > 1) {
2420 /* We need an additional adjustment number to
2421 * the position. */
2422 int p = copy_pos - 1;
2423 if (!lzh_br_read_ahead(strm, &bre, p)) {
2424 if (last)
2425 goto failed;/* Truncated data.*/
2426 state = ST_GET_POS_2;
2427 ds->copy_len = copy_len;
2428 ds->copy_pos = copy_pos;
2429 goto next_data;
2430 }
2431 copy_pos = (1 << p) + lzh_br_bits(&bre, p);
2432 lzh_br_consume(&bre, p);
2433 }
2434 /* The position is actually a distance from the last
2435 * code we had extracted and thus we have to convert
2436 * it to a position of the window. */
2437 copy_pos = (w_pos - copy_pos - 1) & w_mask;
2438 /* FALL THROUGH */
2439 case ST_COPY_DATA:
2440 /*
2441 * Copy `copy_len' bytes as extracted data from
2442 * the window into the output buffer.
2443 */
2444 for (;;) {
2445 int l;
2446
2447 l = copy_len;
2448 if (copy_pos > w_pos) {
2449 if (l > w_size - copy_pos)
2450 l = w_size - copy_pos;
2451 } else {
2452 if (l > w_size - w_pos)
2453 l = w_size - w_pos;
2454 }
2455 if ((copy_pos + l < w_pos)
2456 || (w_pos + l < copy_pos)) {
2457 /* No overlap. */
2458 memcpy(w_buff + w_pos,
2459 w_buff + copy_pos, l);
2460 } else {
2461 const unsigned char *s;
2462 unsigned char *d;
2463 int li;
2464
2465 d = w_buff + w_pos;
2466 s = w_buff + copy_pos;
2467 for (li = 0; li < l-1;) {
2468 d[li] = s[li];li++;
2469 d[li] = s[li];li++;
2470 }
2471 if (li < l)
2472 d[li] = s[li];
2473 }
2474 w_pos += l;
2475 if (w_pos == w_size) {
2476 w_pos = 0;
2477 lzh_emit_window(strm, w_size);
2478 if (copy_len <= l)
2479 state = ST_GET_LITERAL;
2480 else {
2481 state = ST_COPY_DATA;
2482 ds->copy_len = copy_len - l;
2483 ds->copy_pos =
2484 (copy_pos + l) & w_mask;
2485 }
2486 goto next_data;
2487 }
2488 if (copy_len <= l)
2489 /* A copy of current pattern ended. */
2490 break;
2491 copy_len -= l;
2492 copy_pos = (copy_pos + l) & w_mask;
2493 }
2494 state = ST_GET_LITERAL;
2495 break;
2496 }
2497 }
2498 failed:
2499 return (ds->error = ARCHIVE_FAILED);
2500 next_data:
2501 ds->br = bre;
2502 ds->blocks_avail = blocks_avail;
2503 ds->state = state;
2504 ds->w_pos = w_pos;
2505 return (ARCHIVE_OK);
2506 }
2507
2508 static int
lzh_huffman_init(struct huffman * hf,size_t len_size,int tbl_bits)2509 lzh_huffman_init(struct huffman *hf, size_t len_size, int tbl_bits)
2510 {
2511 int bits;
2512
2513 if (hf->bitlen == NULL) {
2514 hf->bitlen = malloc(len_size * sizeof(hf->bitlen[0]));
2515 if (hf->bitlen == NULL)
2516 return (ARCHIVE_FATAL);
2517 }
2518 if (hf->tbl == NULL) {
2519 if (tbl_bits < HTBL_BITS)
2520 bits = tbl_bits;
2521 else
2522 bits = HTBL_BITS;
2523 hf->tbl = malloc(((size_t)1 << bits) * sizeof(hf->tbl[0]));
2524 if (hf->tbl == NULL)
2525 return (ARCHIVE_FATAL);
2526 }
2527 if (hf->tree == NULL && tbl_bits > HTBL_BITS) {
2528 hf->tree_avail = 1 << (tbl_bits - HTBL_BITS + 4);
2529 hf->tree = malloc(hf->tree_avail * sizeof(hf->tree[0]));
2530 if (hf->tree == NULL)
2531 return (ARCHIVE_FATAL);
2532 }
2533 hf->len_size = (int)len_size;
2534 hf->tbl_bits = tbl_bits;
2535 return (ARCHIVE_OK);
2536 }
2537
2538 static void
lzh_huffman_free(struct huffman * hf)2539 lzh_huffman_free(struct huffman *hf)
2540 {
2541 free(hf->bitlen);
2542 free(hf->tbl);
2543 free(hf->tree);
2544 }
2545
2546 static const char bitlen_tbl[0x400] = {
2547 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2548 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2549 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2550 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2551 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2552 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2553 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2554 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2555 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2556 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2557 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2558 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2559 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2560 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2561 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2562 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2563 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2564 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2565 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2566 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2567 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2568 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2569 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2570 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2571 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2572 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2573 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2574 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2575 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2576 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2577 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2578 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2579 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2580 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2581 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2582 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2583 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2584 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2585 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2586 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2587 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2588 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2589 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2590 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2591 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2592 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2593 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2594 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2595 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2596 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2597 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2598 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2599 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2600 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2601 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2602 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2603 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2604 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2605 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2606 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2607 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
2608 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
2609 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
2610 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 16, 0
2611 };
2612 static int
lzh_read_pt_bitlen(struct lzh_stream * strm,int start,int end)2613 lzh_read_pt_bitlen(struct lzh_stream *strm, int start, int end)
2614 {
2615 struct lzh_dec *ds = strm->ds;
2616 struct lzh_br *br = &(ds->br);
2617 int c, i;
2618
2619 for (i = start; i < end; ) {
2620 /*
2621 * bit pattern the number we need
2622 * 000 -> 0
2623 * 001 -> 1
2624 * 010 -> 2
2625 * ...
2626 * 110 -> 6
2627 * 1110 -> 7
2628 * 11110 -> 8
2629 * ...
2630 * 1111111111110 -> 16
2631 */
2632 if (!lzh_br_read_ahead(strm, br, 3))
2633 return (i);
2634 if ((c = lzh_br_bits(br, 3)) == 7) {
2635 if (!lzh_br_read_ahead(strm, br, 13))
2636 return (i);
2637 c = bitlen_tbl[lzh_br_bits(br, 13) & 0x3FF];
2638 if (c)
2639 lzh_br_consume(br, c - 3);
2640 else
2641 return (-1);/* Invalid data. */
2642 } else
2643 lzh_br_consume(br, 3);
2644 ds->pt.bitlen[i++] = c;
2645 ds->pt.freq[c]++;
2646 }
2647 return (i);
2648 }
2649
2650 static int
lzh_make_fake_table(struct huffman * hf,uint16_t c)2651 lzh_make_fake_table(struct huffman *hf, uint16_t c)
2652 {
2653 if (c >= hf->len_size)
2654 return (0);
2655 hf->tbl[0] = c;
2656 hf->max_bits = 0;
2657 hf->shift_bits = 0;
2658 hf->bitlen[hf->tbl[0]] = 0;
2659 return (1);
2660 }
2661
2662 /*
2663 * Make a huffman coding table.
2664 */
2665 static int
lzh_make_huffman_table(struct huffman * hf)2666 lzh_make_huffman_table(struct huffman *hf)
2667 {
2668 uint16_t *tbl;
2669 const unsigned char *bitlen;
2670 int bitptn[17], weight[17];
2671 int i, maxbits = 0, ptn, tbl_size, w;
2672 int diffbits, len_avail;
2673
2674 /*
2675 * Initialize bit patterns.
2676 */
2677 ptn = 0;
2678 for (i = 1, w = 1 << 15; i <= 16; i++, w >>= 1) {
2679 bitptn[i] = ptn;
2680 weight[i] = w;
2681 if (hf->freq[i]) {
2682 ptn += hf->freq[i] * w;
2683 maxbits = i;
2684 }
2685 }
2686 if (ptn != 0x10000 || maxbits > hf->tbl_bits)
2687 return (0);/* Invalid */
2688
2689 hf->max_bits = maxbits;
2690
2691 /*
2692 * Cut out extra bits which we won't house in the table.
2693 * This preparation reduces the same calculation in the for-loop
2694 * making the table.
2695 */
2696 if (maxbits < 16) {
2697 int ebits = 16 - maxbits;
2698 for (i = 1; i <= maxbits; i++) {
2699 bitptn[i] >>= ebits;
2700 weight[i] >>= ebits;
2701 }
2702 }
2703 if (maxbits > HTBL_BITS) {
2704 unsigned htbl_max;
2705 uint16_t *p;
2706
2707 diffbits = maxbits - HTBL_BITS;
2708 for (i = 1; i <= HTBL_BITS; i++) {
2709 bitptn[i] >>= diffbits;
2710 weight[i] >>= diffbits;
2711 }
2712 htbl_max = bitptn[HTBL_BITS] +
2713 weight[HTBL_BITS] * hf->freq[HTBL_BITS];
2714 p = &(hf->tbl[htbl_max]);
2715 while (p < &hf->tbl[1U<<HTBL_BITS])
2716 *p++ = 0;
2717 } else
2718 diffbits = 0;
2719 hf->shift_bits = diffbits;
2720
2721 /*
2722 * Make the table.
2723 */
2724 tbl_size = 1 << HTBL_BITS;
2725 tbl = hf->tbl;
2726 bitlen = hf->bitlen;
2727 len_avail = hf->len_avail;
2728 hf->tree_used = 0;
2729 for (i = 0; i < len_avail; i++) {
2730 uint16_t *p;
2731 int len, cnt;
2732 uint16_t bit;
2733 int extlen;
2734 struct htree_t *ht;
2735
2736 if (bitlen[i] == 0)
2737 continue;
2738 /* Get a bit pattern */
2739 len = bitlen[i];
2740 ptn = bitptn[len];
2741 cnt = weight[len];
2742 if (len <= HTBL_BITS) {
2743 /* Calculate next bit pattern */
2744 if ((bitptn[len] = ptn + cnt) > tbl_size)
2745 return (0);/* Invalid */
2746 /* Update the table */
2747 p = &(tbl[ptn]);
2748 if (cnt > 7) {
2749 uint16_t *pc;
2750
2751 cnt -= 8;
2752 pc = &p[cnt];
2753 pc[0] = (uint16_t)i;
2754 pc[1] = (uint16_t)i;
2755 pc[2] = (uint16_t)i;
2756 pc[3] = (uint16_t)i;
2757 pc[4] = (uint16_t)i;
2758 pc[5] = (uint16_t)i;
2759 pc[6] = (uint16_t)i;
2760 pc[7] = (uint16_t)i;
2761 if (cnt > 7) {
2762 cnt -= 8;
2763 memcpy(&p[cnt], pc,
2764 8 * sizeof(uint16_t));
2765 pc = &p[cnt];
2766 while (cnt > 15) {
2767 cnt -= 16;
2768 memcpy(&p[cnt], pc,
2769 16 * sizeof(uint16_t));
2770 }
2771 }
2772 if (cnt)
2773 memcpy(p, pc, cnt * sizeof(uint16_t));
2774 } else {
2775 while (cnt > 1) {
2776 p[--cnt] = (uint16_t)i;
2777 p[--cnt] = (uint16_t)i;
2778 }
2779 if (cnt)
2780 p[--cnt] = (uint16_t)i;
2781 }
2782 continue;
2783 }
2784
2785 /*
2786 * A bit length is too big to be housed to a direct table,
2787 * so we use a tree model for its extra bits.
2788 */
2789 bitptn[len] = ptn + cnt;
2790 bit = 1U << (diffbits -1);
2791 extlen = len - HTBL_BITS;
2792
2793 p = &(tbl[ptn >> diffbits]);
2794 if (*p == 0) {
2795 *p = len_avail + hf->tree_used;
2796 ht = &(hf->tree[hf->tree_used++]);
2797 if (hf->tree_used > hf->tree_avail)
2798 return (0);/* Invalid */
2799 ht->left = 0;
2800 ht->right = 0;
2801 } else {
2802 if (*p < len_avail ||
2803 *p >= (len_avail + hf->tree_used))
2804 return (0);/* Invalid */
2805 ht = &(hf->tree[*p - len_avail]);
2806 }
2807 while (--extlen > 0) {
2808 if (ptn & bit) {
2809 if (ht->left < len_avail) {
2810 ht->left = len_avail + hf->tree_used;
2811 ht = &(hf->tree[hf->tree_used++]);
2812 if (hf->tree_used > hf->tree_avail)
2813 return (0);/* Invalid */
2814 ht->left = 0;
2815 ht->right = 0;
2816 } else {
2817 ht = &(hf->tree[ht->left - len_avail]);
2818 }
2819 } else {
2820 if (ht->right < len_avail) {
2821 ht->right = len_avail + hf->tree_used;
2822 ht = &(hf->tree[hf->tree_used++]);
2823 if (hf->tree_used > hf->tree_avail)
2824 return (0);/* Invalid */
2825 ht->left = 0;
2826 ht->right = 0;
2827 } else {
2828 ht = &(hf->tree[ht->right - len_avail]);
2829 }
2830 }
2831 bit >>= 1;
2832 }
2833 if (ptn & bit) {
2834 if (ht->left != 0)
2835 return (0);/* Invalid */
2836 ht->left = (uint16_t)i;
2837 } else {
2838 if (ht->right != 0)
2839 return (0);/* Invalid */
2840 ht->right = (uint16_t)i;
2841 }
2842 }
2843 return (1);
2844 }
2845
2846 static int
lzh_decode_huffman_tree(struct huffman * hf,unsigned rbits,int c)2847 lzh_decode_huffman_tree(struct huffman *hf, unsigned rbits, int c)
2848 {
2849 struct htree_t *ht;
2850 int extlen;
2851
2852 ht = hf->tree;
2853 extlen = hf->shift_bits;
2854 while (c >= hf->len_avail) {
2855 c -= hf->len_avail;
2856 if (extlen-- <= 0 || c >= hf->tree_used)
2857 return (0);
2858 if (rbits & (1U << extlen))
2859 c = ht[c].left;
2860 else
2861 c = ht[c].right;
2862 }
2863 return (c);
2864 }
2865
2866 static inline int
lzh_decode_huffman(struct huffman * hf,unsigned rbits)2867 lzh_decode_huffman(struct huffman *hf, unsigned rbits)
2868 {
2869 int c;
2870 /*
2871 * At first search an index table for a bit pattern.
2872 * If it fails, search a huffman tree for.
2873 */
2874 c = hf->tbl[rbits >> hf->shift_bits];
2875 if (c < hf->len_avail || hf->len_avail == 0)
2876 return (c);
2877 /* This bit pattern needs to be found out at a huffman tree. */
2878 return (lzh_decode_huffman_tree(hf, rbits, c));
2879 }
2880