1 /*-
2 * Copyright (c) 2008-2014 Michihiro NAKAJIMA
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26 #include "archive_platform.h"
27
28 #ifdef HAVE_ERRNO_H
29 #include <errno.h>
30 #endif
31 #ifdef HAVE_LIMITS_H
32 #include <limits.h>
33 #endif
34 #ifdef HAVE_STDLIB_H
35 #include <stdlib.h>
36 #endif
37 #ifdef HAVE_STRING_H
38 #include <string.h>
39 #endif
40
41 #include "archive.h"
42 #include "archive_entry.h"
43 #include "archive_entry_locale.h"
44 #include "archive_private.h"
45 #include "archive_read_private.h"
46 #include "archive_time_private.h"
47 #include "archive_endian.h"
48
49
50 #define MAXMATCH 256 /* Maximum match length. */
51 #define MINMATCH 3 /* Minimum match length. */
52 /*
53 * Literal table format:
54 * +0 +256 +510
55 * +---------------+-------------------------+
56 * | literal code | match length |
57 * | 0 ... 255 | MINMATCH ... MAXMATCH |
58 * +---------------+-------------------------+
59 * <--- LT_BITLEN_SIZE --->
60 */
61 /* Literal table size. */
62 #define LT_BITLEN_SIZE (UCHAR_MAX + 1 + MAXMATCH - MINMATCH + 1)
63 /* Position table size.
64 * Note: this used for both position table and pre literal table.*/
65 #define PT_BITLEN_SIZE (3 + 16)
66
67 struct lzh_dec {
68 /* Decoding status. */
69 int state;
70
71 /*
72 * Window to see last 8Ki(lh5),32Ki(lh6),64Ki(lh7) bytes of decoded
73 * data.
74 */
75 int w_size;
76 int w_mask;
77 /* Window buffer, which is a loop buffer. */
78 unsigned char *w_buff;
79 /* The insert position to the window. */
80 int w_pos;
81 /* The position where we can copy decoded code from the window. */
82 int copy_pos;
83 /* The length how many bytes we can copy decoded code from
84 * the window. */
85 int copy_len;
86
87 /*
88 * Bit stream reader.
89 */
90 struct lzh_br {
91 #define CACHE_TYPE uint64_t
92 #define CACHE_BITS (8 * sizeof(CACHE_TYPE))
93 /* Cache buffer. */
94 CACHE_TYPE cache_buffer;
95 /* Indicates how many bits avail in cache_buffer. */
96 int cache_avail;
97 } br;
98
99 /*
100 * Huffman coding.
101 */
102 struct huffman {
103 int len_size;
104 int len_avail;
105 int len_bits;
106 int freq[17];
107 unsigned char *bitlen;
108
109 /*
110 * Use a index table. It's faster than searching a huffman
111 * coding tree, which is a binary tree. But a use of a large
112 * index table causes L1 cache read miss many times.
113 */
114 #define HTBL_BITS 10
115 int max_bits;
116 int shift_bits;
117 int tbl_bits;
118 int tree_used;
119 int tree_avail;
120 /* Direct access table. */
121 uint16_t *tbl;
122 /* Binary tree table for extra bits over the direct access. */
123 struct htree_t {
124 uint16_t left;
125 uint16_t right;
126 } *tree;
127 } lt, pt;
128
129 int blocks_avail;
130 int pos_pt_len_size;
131 int pos_pt_len_bits;
132 int literal_pt_len_size;
133 int literal_pt_len_bits;
134 int reading_position;
135 int loop;
136 int error;
137 };
138
139 struct lzh_stream {
140 const unsigned char *next_in;
141 int avail_in;
142 int64_t total_in;
143 const unsigned char *ref_ptr;
144 int avail_out;
145 int64_t total_out;
146 struct lzh_dec *ds;
147 };
148
149 struct lha {
150 /* entry_bytes_remaining is the number of bytes we expect. */
151 int64_t entry_offset;
152 int64_t entry_bytes_remaining;
153 int64_t entry_unconsumed;
154 uint16_t entry_crc_calculated;
155
156 size_t header_size; /* header size */
157 unsigned char level; /* header level */
158 char method[3]; /* compress type */
159 int64_t compsize; /* compressed data size */
160 int64_t origsize; /* original file size */
161 int setflag;
162 #define BIRTHTIME_IS_SET 1
163 #define ATIME_IS_SET 2
164 #define UNIX_MODE_IS_SET 4
165 #define CRC_IS_SET 8
166 int64_t birthtime;
167 uint32_t birthtime_tv_nsec;
168 int64_t mtime;
169 uint32_t mtime_tv_nsec;
170 int64_t atime;
171 uint32_t atime_tv_nsec;
172 mode_t mode;
173 int64_t uid;
174 int64_t gid;
175 struct archive_string uname;
176 struct archive_string gname;
177 uint16_t header_crc;
178 uint16_t crc;
179 /* dirname and filename could be in different codepages */
180 struct archive_string_conv *sconv_dir;
181 struct archive_string_conv *sconv_fname;
182 struct archive_string_conv *opt_sconv;
183
184 struct archive_string dirname;
185 struct archive_string filename;
186 struct archive_wstring ws;
187
188 unsigned char dos_attr;
189
190 /* Flag to mark progress that an archive was read their first header.*/
191 char found_first_header;
192 /* Flag to mark that indicates an empty directory. */
193 char directory;
194
195 /* Flags to mark progress of decompression. */
196 char decompress_init;
197 char end_of_entry;
198 char end_of_entry_cleanup;
199 char entry_is_compressed;
200
201 char format_name[64];
202
203 struct lzh_stream strm;
204 };
205
206 /*
207 * LHA header common member offset.
208 */
209 #define H_METHOD_OFFSET 2 /* Compress type. */
210 #define H_ATTR_OFFSET 19 /* DOS attribute. */
211 #define H_LEVEL_OFFSET 20 /* Header Level. */
212 #define H_SIZE 22 /* Minimum header size. */
213
214 static int archive_read_format_lha_bid(struct archive_read *, int);
215 static int archive_read_format_lha_options(struct archive_read *,
216 const char *, const char *);
217 static int archive_read_format_lha_read_header(struct archive_read *,
218 struct archive_entry *);
219 static int archive_read_format_lha_read_data(struct archive_read *,
220 const void **, size_t *, int64_t *);
221 static int archive_read_format_lha_read_data_skip(struct archive_read *);
222 static int archive_read_format_lha_cleanup(struct archive_read *);
223
224 static void lha_replace_path_separator(struct lha *,
225 struct archive_entry *);
226 static int lha_read_file_header_0(struct archive_read *, struct lha *);
227 static int lha_read_file_header_1(struct archive_read *, struct lha *);
228 static int lha_read_file_header_2(struct archive_read *, struct lha *);
229 static int lha_read_file_header_3(struct archive_read *, struct lha *);
230 static int lha_read_file_extended_header(struct archive_read *,
231 struct lha *, uint16_t *, int, uint64_t, size_t *);
232 static size_t lha_check_header_format(const void *);
233 static int lha_skip_sfx(struct archive_read *);
234 static unsigned char lha_calcsum(unsigned char, const void *,
235 int, size_t);
236 static int lha_parse_linkname(struct archive_wstring *,
237 struct archive_wstring *);
238 static int lha_read_data_none(struct archive_read *, const void **,
239 size_t *, int64_t *);
240 static int lha_read_data_lzh(struct archive_read *, const void **,
241 size_t *, int64_t *);
242 static void lha_crc16_init(void);
243 static uint16_t lha_crc16(uint16_t, const void *, size_t);
244 static int lzh_decode_init(struct lzh_stream *, const char *);
245 static void lzh_decode_free(struct lzh_stream *);
246 static int lzh_decode(struct lzh_stream *, int);
247 static int lzh_br_fillup(struct lzh_stream *, struct lzh_br *);
248 static int lzh_huffman_init(struct huffman *, size_t, int);
249 static void lzh_huffman_free(struct huffman *);
250 static int lzh_read_pt_bitlen(struct lzh_stream *, int start, int end);
251 static int lzh_make_fake_table(struct huffman *, uint16_t);
252 static int lzh_make_huffman_table(struct huffman *);
253 static inline int lzh_decode_huffman(struct huffman *, unsigned);
254 static int lzh_decode_huffman_tree(struct huffman *, unsigned, int);
255
256
257 int
archive_read_support_format_lha(struct archive * _a)258 archive_read_support_format_lha(struct archive *_a)
259 {
260 struct archive_read *a = (struct archive_read *)_a;
261 struct lha *lha;
262 int r;
263
264 archive_check_magic(_a, ARCHIVE_READ_MAGIC,
265 ARCHIVE_STATE_NEW, "archive_read_support_format_lha");
266
267 lha = calloc(1, sizeof(*lha));
268 if (lha == NULL) {
269 archive_set_error(&a->archive, ENOMEM,
270 "Can't allocate lha data");
271 return (ARCHIVE_FATAL);
272 }
273 archive_string_init(&lha->ws);
274
275 r = __archive_read_register_format(a,
276 lha,
277 "lha",
278 archive_read_format_lha_bid,
279 archive_read_format_lha_options,
280 archive_read_format_lha_read_header,
281 archive_read_format_lha_read_data,
282 archive_read_format_lha_read_data_skip,
283 NULL,
284 archive_read_format_lha_cleanup,
285 NULL,
286 NULL);
287
288 if (r != ARCHIVE_OK)
289 free(lha);
290 return (ARCHIVE_OK);
291 }
292
293 static size_t
lha_check_header_format(const void * h)294 lha_check_header_format(const void *h)
295 {
296 const unsigned char *p = h;
297 size_t next_skip_bytes;
298
299 switch (p[H_METHOD_OFFSET+3]) {
300 /*
301 * "-lh0-" ... "-lh7-" "-lhd-"
302 * "-lzs-" "-lz5-"
303 */
304 case '0': case '1': case '2': case '3':
305 case '4': case '5': case '6': case '7':
306 case 'd':
307 case 's':
308 next_skip_bytes = 4;
309
310 /* b0 == 0 means the end of an LHa archive file. */
311 if (p[0] == 0)
312 break;
313 if (p[H_METHOD_OFFSET] != '-' || p[H_METHOD_OFFSET+1] != 'l'
314 || p[H_METHOD_OFFSET+4] != '-')
315 break;
316
317 if (p[H_METHOD_OFFSET+2] == 'h') {
318 /* "-lh?-" */
319 if (p[H_METHOD_OFFSET+3] == 's')
320 break;
321 if (p[H_LEVEL_OFFSET] == 0)
322 return (0);
323 if (p[H_LEVEL_OFFSET] <= 3 && p[H_ATTR_OFFSET] == 0x20)
324 return (0);
325 }
326 if (p[H_METHOD_OFFSET+2] == 'z') {
327 /* LArc extensions: -lzs-,-lz4- and -lz5- */
328 if (p[H_LEVEL_OFFSET] != 0)
329 break;
330 if (p[H_METHOD_OFFSET+3] == 's'
331 || p[H_METHOD_OFFSET+3] == '4'
332 || p[H_METHOD_OFFSET+3] == '5')
333 return (0);
334 }
335 break;
336 case 'h': next_skip_bytes = 1; break;
337 case 'z': next_skip_bytes = 1; break;
338 case 'l': next_skip_bytes = 2; break;
339 case '-': next_skip_bytes = 3; break;
340 default : next_skip_bytes = 4; break;
341 }
342
343 return (next_skip_bytes);
344 }
345
346 static int
archive_read_format_lha_bid(struct archive_read * a,int best_bid)347 archive_read_format_lha_bid(struct archive_read *a, int best_bid)
348 {
349 const char *p;
350 const void *buff;
351 ssize_t bytes_avail, offset, window;
352 size_t next;
353
354 /* If there's already a better bid than we can ever
355 make, don't bother testing. */
356 if (best_bid > 30)
357 return (-1);
358
359 if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL)
360 return (-1);
361
362 if (lha_check_header_format(p) == 0)
363 return (30);
364
365 if (p[0] == 'M' && p[1] == 'Z') {
366 /* PE file */
367 offset = 0;
368 window = 4096;
369 while (offset < (1024 * 20)) {
370 buff = __archive_read_ahead(a, offset + window,
371 &bytes_avail);
372 if (buff == NULL) {
373 /* Remaining bytes are less than window. */
374 window >>= 1;
375 if (window < (H_SIZE + 3))
376 return (0);
377 continue;
378 }
379 p = (const char *)buff + offset;
380 while (p + H_SIZE < (const char *)buff + bytes_avail) {
381 if ((next = lha_check_header_format(p)) == 0)
382 return (30);
383 p += next;
384 }
385 offset = p - (const char *)buff;
386 }
387 }
388 return (0);
389 }
390
391 static int
archive_read_format_lha_options(struct archive_read * a,const char * key,const char * val)392 archive_read_format_lha_options(struct archive_read *a,
393 const char *key, const char *val)
394 {
395 struct lha *lha;
396 int ret = ARCHIVE_FAILED;
397
398 lha = (struct lha *)(a->format->data);
399 if (strcmp(key, "hdrcharset") == 0) {
400 if (val == NULL || val[0] == 0)
401 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
402 "lha: hdrcharset option needs a character-set name");
403 else {
404 lha->opt_sconv =
405 archive_string_conversion_from_charset(
406 &a->archive, val, 0);
407 if (lha->opt_sconv != NULL)
408 ret = ARCHIVE_OK;
409 else
410 ret = ARCHIVE_FATAL;
411 }
412 return (ret);
413 }
414
415 /* Note: The "warn" return is just to inform the options
416 * supervisor that we didn't handle it. It will generate
417 * a suitable error if no one used this option. */
418 return (ARCHIVE_WARN);
419 }
420
421 static int
lha_skip_sfx(struct archive_read * a)422 lha_skip_sfx(struct archive_read *a)
423 {
424 const void *h;
425 const char *p, *q;
426 size_t next, skip;
427 ssize_t bytes, window;
428
429 window = 4096;
430 for (;;) {
431 h = __archive_read_ahead(a, window, &bytes);
432 if (h == NULL) {
433 /* Remaining bytes are less than window. */
434 window >>= 1;
435 if (window < (H_SIZE + 3))
436 goto fatal;
437 continue;
438 }
439 if (bytes < H_SIZE)
440 goto fatal;
441 p = h;
442 q = p + bytes;
443
444 /*
445 * Scan ahead until we find something that looks
446 * like the lha header.
447 */
448 while (p + H_SIZE < q) {
449 if ((next = lha_check_header_format(p)) == 0) {
450 skip = p - (const char *)h;
451 __archive_read_consume(a, skip);
452 return (ARCHIVE_OK);
453 }
454 p += next;
455 }
456 skip = p - (const char *)h;
457 __archive_read_consume(a, skip);
458 }
459 fatal:
460 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
461 "Couldn't find out LHa header");
462 return (ARCHIVE_FATAL);
463 }
464
465 static int
truncated_error(struct archive_read * a)466 truncated_error(struct archive_read *a)
467 {
468 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
469 "Truncated LHa header");
470 return (ARCHIVE_FATAL);
471 }
472
473 static int
archive_read_format_lha_read_header(struct archive_read * a,struct archive_entry * entry)474 archive_read_format_lha_read_header(struct archive_read *a,
475 struct archive_entry *entry)
476 {
477 struct archive_wstring linkname;
478 struct archive_wstring pathname;
479 struct lha *lha;
480 const unsigned char *p;
481 const char *signature;
482 int err;
483 struct archive_mstring conv_buffer;
484 const wchar_t *conv_buffer_p;
485
486 lha_crc16_init();
487
488 a->archive.archive_format = ARCHIVE_FORMAT_LHA;
489 if (a->archive.archive_format_name == NULL)
490 a->archive.archive_format_name = "lha";
491
492 lha = (struct lha *)(a->format->data);
493 lha->decompress_init = 0;
494 lha->end_of_entry = 0;
495 lha->end_of_entry_cleanup = 0;
496 lha->entry_unconsumed = 0;
497
498 if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL) {
499 /*
500 * LHa archiver added 0 to the tail of its archive file as
501 * the mark of the end of the archive.
502 */
503 signature = __archive_read_ahead(a, sizeof(signature[0]), NULL);
504 if (signature == NULL || signature[0] == 0)
505 return (ARCHIVE_EOF);
506 return (truncated_error(a));
507 }
508
509 signature = (const char *)p;
510 if (lha->found_first_header == 0 &&
511 signature[0] == 'M' && signature[1] == 'Z') {
512 /* This is an executable? Must be self-extracting... */
513 err = lha_skip_sfx(a);
514 if (err < ARCHIVE_WARN)
515 return (err);
516
517 if ((p = __archive_read_ahead(a, sizeof(*p), NULL)) == NULL)
518 return (truncated_error(a));
519 signature = (const char *)p;
520 }
521 /* signature[0] == 0 means the end of an LHa archive file. */
522 if (signature[0] == 0)
523 return (ARCHIVE_EOF);
524
525 /*
526 * Check the header format and method type.
527 */
528 if (lha_check_header_format(p) != 0) {
529 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
530 "Bad LHa file");
531 return (ARCHIVE_FATAL);
532 }
533
534 /* We've found the first header. */
535 lha->found_first_header = 1;
536 /* Set a default value and common data */
537 lha->header_size = 0;
538 lha->level = p[H_LEVEL_OFFSET];
539 lha->method[0] = p[H_METHOD_OFFSET+1];
540 lha->method[1] = p[H_METHOD_OFFSET+2];
541 lha->method[2] = p[H_METHOD_OFFSET+3];
542 if (memcmp(lha->method, "lhd", 3) == 0)
543 lha->directory = 1;
544 else
545 lha->directory = 0;
546 if (memcmp(lha->method, "lh0", 3) == 0 ||
547 memcmp(lha->method, "lz4", 3) == 0)
548 lha->entry_is_compressed = 0;
549 else
550 lha->entry_is_compressed = 1;
551
552 lha->compsize = 0;
553 lha->origsize = 0;
554 lha->setflag = 0;
555 lha->birthtime = 0;
556 lha->birthtime_tv_nsec = 0;
557 lha->mtime = 0;
558 lha->mtime_tv_nsec = 0;
559 lha->atime = 0;
560 lha->atime_tv_nsec = 0;
561 lha->mode = (lha->directory)? 0777 : 0666;
562 lha->uid = 0;
563 lha->gid = 0;
564 archive_string_empty(&lha->dirname);
565 archive_string_empty(&lha->filename);
566 lha->dos_attr = 0;
567 if (lha->opt_sconv != NULL) {
568 lha->sconv_dir = lha->opt_sconv;
569 lha->sconv_fname = lha->opt_sconv;
570 } else {
571 lha->sconv_dir = NULL;
572 lha->sconv_fname = NULL;
573 }
574
575 switch (p[H_LEVEL_OFFSET]) {
576 case 0:
577 err = lha_read_file_header_0(a, lha);
578 break;
579 case 1:
580 err = lha_read_file_header_1(a, lha);
581 break;
582 case 2:
583 err = lha_read_file_header_2(a, lha);
584 break;
585 case 3:
586 err = lha_read_file_header_3(a, lha);
587 break;
588 default:
589 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
590 "Unsupported LHa header level %d", p[H_LEVEL_OFFSET]);
591 err = ARCHIVE_FATAL;
592 break;
593 }
594 if (err < ARCHIVE_WARN)
595 return (err);
596
597
598 if (!lha->directory && archive_strlen(&lha->filename) == 0)
599 /* The filename has not been set */
600 return (truncated_error(a));
601
602 /*
603 * Make a pathname from a dirname and a filename, after converting to Unicode.
604 * This is because codepages might differ between dirname and filename.
605 */
606 archive_string_init(&pathname);
607 archive_string_init(&linkname);
608 archive_string_init(&conv_buffer.aes_mbs);
609 archive_string_init(&conv_buffer.aes_mbs_in_locale);
610 archive_string_init(&conv_buffer.aes_utf8);
611 archive_string_init(&conv_buffer.aes_wcs);
612 if (0 != archive_mstring_copy_mbs_len_l(&conv_buffer, lha->dirname.s, lha->dirname.length, lha->sconv_dir)) {
613 archive_set_error(&a->archive,
614 ARCHIVE_ERRNO_FILE_FORMAT,
615 "Pathname cannot be converted "
616 "from %s to Unicode",
617 archive_string_conversion_charset_name(lha->sconv_dir));
618 err = ARCHIVE_FATAL;
619 } else if (0 != archive_mstring_get_wcs(&a->archive, &conv_buffer, &conv_buffer_p))
620 err = ARCHIVE_FATAL;
621 if (err == ARCHIVE_FATAL) {
622 archive_mstring_clean(&conv_buffer);
623 archive_wstring_free(&pathname);
624 archive_wstring_free(&linkname);
625 return (err);
626 }
627 archive_wstring_copy(&pathname, &conv_buffer.aes_wcs);
628
629 archive_string_empty(&conv_buffer.aes_mbs);
630 archive_string_empty(&conv_buffer.aes_mbs_in_locale);
631 archive_string_empty(&conv_buffer.aes_utf8);
632 archive_wstring_empty(&conv_buffer.aes_wcs);
633 if (0 != archive_mstring_copy_mbs_len_l(&conv_buffer, lha->filename.s, lha->filename.length, lha->sconv_fname)) {
634 archive_set_error(&a->archive,
635 ARCHIVE_ERRNO_FILE_FORMAT,
636 "Pathname cannot be converted "
637 "from %s to Unicode",
638 archive_string_conversion_charset_name(lha->sconv_fname));
639 err = ARCHIVE_FATAL;
640 }
641 else if (0 != archive_mstring_get_wcs(&a->archive, &conv_buffer, &conv_buffer_p))
642 err = ARCHIVE_FATAL;
643 if (err == ARCHIVE_FATAL) {
644 archive_mstring_clean(&conv_buffer);
645 archive_wstring_free(&pathname);
646 archive_wstring_free(&linkname);
647 return (err);
648 }
649 archive_wstring_concat(&pathname, &conv_buffer.aes_wcs);
650 archive_mstring_clean(&conv_buffer);
651
652 if ((lha->mode & AE_IFMT) == AE_IFLNK) {
653 /*
654 * Extract the symlink-name if it's included in the pathname.
655 */
656 if (!lha_parse_linkname(&linkname, &pathname)) {
657 /* We couldn't get the symlink-name. */
658 archive_set_error(&a->archive,
659 ARCHIVE_ERRNO_FILE_FORMAT,
660 "Unknown symlink-name");
661 archive_wstring_free(&pathname);
662 archive_wstring_free(&linkname);
663 return (ARCHIVE_FAILED);
664 }
665 } else {
666 /*
667 * Make sure a file-type is set.
668 * The mode has been overridden if it is in the extended data.
669 */
670 lha->mode = (lha->mode & ~AE_IFMT) |
671 ((lha->directory)? AE_IFDIR: AE_IFREG);
672 }
673 if ((lha->setflag & UNIX_MODE_IS_SET) == 0 &&
674 (lha->dos_attr & 1) != 0)
675 lha->mode &= ~(0222);/* read only. */
676
677 /*
678 * Set basic file parameters.
679 */
680 archive_entry_copy_pathname_w(entry, pathname.s);
681 archive_wstring_free(&pathname);
682 if (archive_strlen(&linkname) > 0) {
683 archive_entry_copy_symlink_w(entry, linkname.s);
684 } else
685 archive_entry_set_symlink(entry, NULL);
686 archive_wstring_free(&linkname);
687 /*
688 * When a header level is 0, there is a possibility that
689 * a pathname and a symlink has '\' character, a directory
690 * separator in DOS/Windows. So we should convert it to '/'.
691 */
692 if (lha->level == 0)
693 lha_replace_path_separator(lha, entry);
694
695 archive_entry_set_mode(entry, lha->mode);
696 archive_entry_set_uid(entry, lha->uid);
697 archive_entry_set_gid(entry, lha->gid);
698 if (archive_strlen(&lha->uname) > 0)
699 archive_entry_set_uname(entry, lha->uname.s);
700 if (archive_strlen(&lha->gname) > 0)
701 archive_entry_set_gname(entry, lha->gname.s);
702 if (lha->setflag & BIRTHTIME_IS_SET) {
703 archive_entry_set_birthtime(entry, lha->birthtime,
704 lha->birthtime_tv_nsec);
705 archive_entry_set_ctime(entry, lha->birthtime,
706 lha->birthtime_tv_nsec);
707 } else {
708 archive_entry_unset_birthtime(entry);
709 archive_entry_unset_ctime(entry);
710 }
711 archive_entry_set_mtime(entry, lha->mtime, lha->mtime_tv_nsec);
712 if (lha->setflag & ATIME_IS_SET)
713 archive_entry_set_atime(entry, lha->atime,
714 lha->atime_tv_nsec);
715 else
716 archive_entry_unset_atime(entry);
717 if (lha->directory || archive_entry_symlink(entry) != NULL)
718 archive_entry_unset_size(entry);
719 else
720 archive_entry_set_size(entry, lha->origsize);
721
722 /*
723 * Prepare variables used to read a file content.
724 */
725 lha->entry_bytes_remaining = lha->compsize;
726 if (lha->entry_bytes_remaining < 0) {
727 archive_set_error(&a->archive,
728 ARCHIVE_ERRNO_FILE_FORMAT,
729 "Invalid LHa entry size");
730 return (ARCHIVE_FATAL);
731 }
732 lha->entry_offset = 0;
733 lha->entry_crc_calculated = 0;
734
735 /*
736 * This file does not have a content.
737 */
738 if (lha->directory || lha->compsize == 0)
739 lha->end_of_entry = 1;
740
741 snprintf(lha->format_name, sizeof(lha->format_name), "lha -%c%c%c-",
742 lha->method[0], lha->method[1], lha->method[2]);
743 a->archive.archive_format_name = lha->format_name;
744
745 return (err);
746 }
747
748 /*
749 * Replace a DOS path separator '\' by a character '/'.
750 * Some multi-byte character set have a character '\' in its second byte.
751 */
752 static void
lha_replace_path_separator(struct lha * lha,struct archive_entry * entry)753 lha_replace_path_separator(struct lha *lha, struct archive_entry *entry)
754 {
755 const wchar_t *wp;
756 size_t i;
757
758 if ((wp = archive_entry_pathname_w(entry)) != NULL) {
759 archive_wstrcpy(&(lha->ws), wp);
760 for (i = 0; i < archive_strlen(&(lha->ws)); i++) {
761 if (lha->ws.s[i] == L'\\')
762 lha->ws.s[i] = L'/';
763 }
764 archive_entry_copy_pathname_w(entry, lha->ws.s);
765 }
766
767 if ((wp = archive_entry_symlink_w(entry)) != NULL) {
768 archive_wstrcpy(&(lha->ws), wp);
769 for (i = 0; i < archive_strlen(&(lha->ws)); i++) {
770 if (lha->ws.s[i] == L'\\')
771 lha->ws.s[i] = L'/';
772 }
773 archive_entry_copy_symlink_w(entry, lha->ws.s);
774 }
775 }
776
777 /*
778 * Header 0 format
779 *
780 * +0 +1 +2 +7 +11
781 * +---------------+----------+----------------+-------------------+
782 * |header size(*1)|header sum|compression type|compressed size(*2)|
783 * +---------------+----------+----------------+-------------------+
784 * <---------------------(*1)----------*
785 *
786 * +11 +15 +17 +19 +20 +21
787 * +-----------------+---------+---------+--------------+----------------+
788 * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=0)|
789 * +-----------------+---------+---------+--------------+----------------+
790 * *--------------------------------(*1)---------------------------------*
791 *
792 * +21 +22 +22+(*3) +22+(*3)+2 +22+(*3)+2+(*4)
793 * +---------------+---------+----------+----------------+------------------+
794 * |name length(*3)|file name|file CRC16|extra header(*4)| compressed data |
795 * +---------------+---------+----------+----------------+------------------+
796 * <--(*3)-> <------(*2)------>
797 * *----------------------(*1)-------------------------->
798 *
799 */
800 #define H0_HEADER_SIZE_OFFSET 0
801 #define H0_HEADER_SUM_OFFSET 1
802 #define H0_COMP_SIZE_OFFSET 7
803 #define H0_ORIG_SIZE_OFFSET 11
804 #define H0_DOS_TIME_OFFSET 15
805 #define H0_NAME_LEN_OFFSET 21
806 #define H0_FILE_NAME_OFFSET 22
807 #define H0_FIXED_SIZE 24
808 static int
lha_read_file_header_0(struct archive_read * a,struct lha * lha)809 lha_read_file_header_0(struct archive_read *a, struct lha *lha)
810 {
811 const unsigned char *p;
812 int extdsize, namelen;
813 unsigned char headersum, sum_calculated;
814
815 if ((p = __archive_read_ahead(a, H0_FIXED_SIZE, NULL)) == NULL)
816 return (truncated_error(a));
817 lha->header_size = p[H0_HEADER_SIZE_OFFSET] + 2;
818 headersum = p[H0_HEADER_SUM_OFFSET];
819 lha->compsize = archive_le32dec(p + H0_COMP_SIZE_OFFSET);
820 lha->origsize = archive_le32dec(p + H0_ORIG_SIZE_OFFSET);
821 lha->mtime = dos_to_unix(archive_le32dec(p + H0_DOS_TIME_OFFSET));
822 namelen = p[H0_NAME_LEN_OFFSET];
823 extdsize = (int)lha->header_size - H0_FIXED_SIZE - namelen;
824 if ((namelen > 221 || extdsize < 0) && extdsize != -2) {
825 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
826 "Invalid LHa header");
827 return (ARCHIVE_FATAL);
828 }
829 if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL)
830 return (truncated_error(a));
831
832 archive_strncpy(&lha->filename, p + H0_FILE_NAME_OFFSET, namelen);
833 /* When extdsize == -2, A CRC16 value is not present in the header. */
834 if (extdsize >= 0) {
835 lha->crc = archive_le16dec(p + H0_FILE_NAME_OFFSET + namelen);
836 lha->setflag |= CRC_IS_SET;
837 }
838 sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2);
839
840 /* Read an extended header */
841 if (extdsize > 0) {
842 /* This extended data is set by 'LHa for UNIX' only.
843 * Maybe fixed size.
844 */
845 p += H0_FILE_NAME_OFFSET + namelen + 2;
846 if (p[0] == 'U' && extdsize == 12) {
847 /* p[1] is a minor version. */
848 lha->mtime = archive_le32dec(&p[2]);
849 lha->mode = archive_le16dec(&p[6]);
850 lha->uid = archive_le16dec(&p[8]);
851 lha->gid = archive_le16dec(&p[10]);
852 lha->setflag |= UNIX_MODE_IS_SET;
853 }
854 }
855 __archive_read_consume(a, lha->header_size);
856
857 if (sum_calculated != headersum) {
858 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
859 "LHa header sum error");
860 return (ARCHIVE_FATAL);
861 }
862
863 return (ARCHIVE_OK);
864 }
865
866 /*
867 * Header 1 format
868 *
869 * +0 +1 +2 +7 +11
870 * +---------------+----------+----------------+-------------+
871 * |header size(*1)|header sum|compression type|skip size(*2)|
872 * +---------------+----------+----------------+-------------+
873 * <---------------(*1)----------*
874 *
875 * +11 +15 +17 +19 +20 +21
876 * +-----------------+---------+---------+--------------+----------------+
877 * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=1)|
878 * +-----------------+---------+---------+--------------+----------------+
879 * *-------------------------------(*1)----------------------------------*
880 *
881 * +21 +22 +22+(*3) +22+(*3)+2 +22+(*3)+3 +22+(*3)+3+(*4)
882 * +---------------+---------+----------+-----------+-----------+
883 * |name length(*3)|file name|file CRC16| creator |padding(*4)|
884 * +---------------+---------+----------+-----------+-----------+
885 * <--(*3)->
886 * *----------------------------(*1)----------------------------*
887 *
888 * +22+(*3)+3+(*4) +22+(*3)+3+(*4)+2 +22+(*3)+3+(*4)+2+(*5)
889 * +----------------+---------------------+------------------------+
890 * |next header size| extended header(*5) | compressed data |
891 * +----------------+---------------------+------------------------+
892 * *------(*1)-----> <--------------------(*2)-------------------->
893 */
894 #define H1_HEADER_SIZE_OFFSET 0
895 #define H1_HEADER_SUM_OFFSET 1
896 #define H1_COMP_SIZE_OFFSET 7
897 #define H1_ORIG_SIZE_OFFSET 11
898 #define H1_DOS_TIME_OFFSET 15
899 #define H1_NAME_LEN_OFFSET 21
900 #define H1_FILE_NAME_OFFSET 22
901 #define H1_FIXED_SIZE 27
902 static int
lha_read_file_header_1(struct archive_read * a,struct lha * lha)903 lha_read_file_header_1(struct archive_read *a, struct lha *lha)
904 {
905 const unsigned char *p;
906 size_t extdsize;
907 int i, err, err2;
908 int namelen, padding;
909 unsigned char headersum, sum_calculated;
910
911 err = ARCHIVE_OK;
912
913 if ((p = __archive_read_ahead(a, H1_FIXED_SIZE, NULL)) == NULL)
914 return (truncated_error(a));
915
916 lha->header_size = p[H1_HEADER_SIZE_OFFSET] + 2;
917 headersum = p[H1_HEADER_SUM_OFFSET];
918 /* Note: An extended header size is included in a compsize. */
919 lha->compsize = archive_le32dec(p + H1_COMP_SIZE_OFFSET);
920 lha->origsize = archive_le32dec(p + H1_ORIG_SIZE_OFFSET);
921 lha->mtime = dos_to_unix(archive_le32dec(p + H1_DOS_TIME_OFFSET));
922 namelen = p[H1_NAME_LEN_OFFSET];
923 /* Calculate a padding size. The result will be normally 0 only(?) */
924 padding = ((int)lha->header_size) - H1_FIXED_SIZE - namelen;
925
926 if (namelen > 230 || padding < 0)
927 goto invalid;
928
929 if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL)
930 return (truncated_error(a));
931
932 for (i = 0; i < namelen; i++) {
933 if (p[i + H1_FILE_NAME_OFFSET] == 0xff)
934 goto invalid;/* Invalid filename. */
935 }
936 archive_strncpy(&lha->filename, p + H1_FILE_NAME_OFFSET, namelen);
937 lha->crc = archive_le16dec(p + H1_FILE_NAME_OFFSET + namelen);
938 lha->setflag |= CRC_IS_SET;
939
940 sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2);
941 /* Consume used bytes but not include `next header size' data
942 * since it will be consumed in lha_read_file_extended_header(). */
943 __archive_read_consume(a, lha->header_size - 2);
944
945 /* Read extended headers */
946 err2 = lha_read_file_extended_header(a, lha, NULL, 2,
947 (uint64_t)(lha->compsize + 2), &extdsize);
948 if (err2 < ARCHIVE_WARN)
949 return (err2);
950 if (err2 < err)
951 err = err2;
952 /* Get a real compressed file size. */
953 lha->compsize -= extdsize - 2;
954
955 if (lha->compsize < 0)
956 goto invalid; /* Invalid compressed file size */
957
958 if (sum_calculated != headersum) {
959 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
960 "LHa header sum error");
961 return (ARCHIVE_FATAL);
962 }
963 return (err);
964 invalid:
965 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
966 "Invalid LHa header");
967 return (ARCHIVE_FATAL);
968 }
969
970 /*
971 * Header 2 format
972 *
973 * +0 +2 +7 +11 +15
974 * +---------------+----------------+-------------------+-----------------+
975 * |header size(*1)|compression type|compressed size(*2)|uncompressed size|
976 * +---------------+----------------+-------------------+-----------------+
977 * <--------------------------------(*1)---------------------------------*
978 *
979 * +15 +19 +20 +21 +23 +24
980 * +-----------------+------------+----------------+----------+-----------+
981 * |data/time(time_t)| 0x20 fixed |header level(=2)|file CRC16| creator |
982 * +-----------------+------------+----------------+----------+-----------+
983 * *---------------------------------(*1)---------------------------------*
984 *
985 * +24 +26 +26+(*3) +26+(*3)+(*4)
986 * +----------------+-------------------+-------------+-------------------+
987 * |next header size|extended header(*3)| padding(*4) | compressed data |
988 * +----------------+-------------------+-------------+-------------------+
989 * *--------------------------(*1)-------------------> <------(*2)------->
990 *
991 */
992 #define H2_HEADER_SIZE_OFFSET 0
993 #define H2_COMP_SIZE_OFFSET 7
994 #define H2_ORIG_SIZE_OFFSET 11
995 #define H2_TIME_OFFSET 15
996 #define H2_CRC_OFFSET 21
997 #define H2_FIXED_SIZE 24
998 static int
lha_read_file_header_2(struct archive_read * a,struct lha * lha)999 lha_read_file_header_2(struct archive_read *a, struct lha *lha)
1000 {
1001 const unsigned char *p;
1002 size_t extdsize;
1003 int err, padding;
1004 uint16_t header_crc;
1005
1006 if ((p = __archive_read_ahead(a, H2_FIXED_SIZE, NULL)) == NULL)
1007 return (truncated_error(a));
1008
1009 lha->header_size =archive_le16dec(p + H2_HEADER_SIZE_OFFSET);
1010 lha->compsize = archive_le32dec(p + H2_COMP_SIZE_OFFSET);
1011 lha->origsize = archive_le32dec(p + H2_ORIG_SIZE_OFFSET);
1012 lha->mtime = archive_le32dec(p + H2_TIME_OFFSET);
1013 lha->crc = archive_le16dec(p + H2_CRC_OFFSET);
1014 lha->setflag |= CRC_IS_SET;
1015
1016 if (lha->header_size < H2_FIXED_SIZE) {
1017 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1018 "Invalid LHa header size");
1019 return (ARCHIVE_FATAL);
1020 }
1021
1022 header_crc = lha_crc16(0, p, H2_FIXED_SIZE);
1023 __archive_read_consume(a, H2_FIXED_SIZE);
1024
1025 /* Read extended headers */
1026 err = lha_read_file_extended_header(a, lha, &header_crc, 2,
1027 lha->header_size - H2_FIXED_SIZE, &extdsize);
1028 if (err < ARCHIVE_WARN)
1029 return (err);
1030
1031 /* Calculate a padding size. The result will be normally 0 or 1. */
1032 padding = (int)lha->header_size - (int)(H2_FIXED_SIZE + extdsize);
1033 if (padding > 0) {
1034 if ((p = __archive_read_ahead(a, padding, NULL)) == NULL)
1035 return (truncated_error(a));
1036 header_crc = lha_crc16(header_crc, p, padding);
1037 __archive_read_consume(a, padding);
1038 }
1039
1040 if (header_crc != lha->header_crc) {
1041 #ifndef DONT_FAIL_ON_CRC_ERROR
1042 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1043 "LHa header CRC error");
1044 return (ARCHIVE_FATAL);
1045 #endif
1046 }
1047 return (err);
1048 }
1049
1050 /*
1051 * Header 3 format
1052 *
1053 * +0 +2 +7 +11 +15
1054 * +------------+----------------+-------------------+-----------------+
1055 * | 0x04 fixed |compression type|compressed size(*2)|uncompressed size|
1056 * +------------+----------------+-------------------+-----------------+
1057 * <-------------------------------(*1)-------------------------------*
1058 *
1059 * +15 +19 +20 +21 +23 +24
1060 * +-----------------+------------+----------------+----------+-----------+
1061 * |date/time(time_t)| 0x20 fixed |header level(=3)|file CRC16| creator |
1062 * +-----------------+------------+----------------+----------+-----------+
1063 * *--------------------------------(*1)----------------------------------*
1064 *
1065 * +24 +28 +32 +32+(*3)
1066 * +---------------+----------------+-------------------+-----------------+
1067 * |header size(*1)|next header size|extended header(*3)| compressed data |
1068 * +---------------+----------------+-------------------+-----------------+
1069 * *------------------------(*1)-----------------------> <------(*2)----->
1070 *
1071 */
1072 #define H3_FIELD_LEN_OFFSET 0
1073 #define H3_COMP_SIZE_OFFSET 7
1074 #define H3_ORIG_SIZE_OFFSET 11
1075 #define H3_TIME_OFFSET 15
1076 #define H3_CRC_OFFSET 21
1077 #define H3_HEADER_SIZE_OFFSET 24
1078 #define H3_FIXED_SIZE 28
1079 static int
lha_read_file_header_3(struct archive_read * a,struct lha * lha)1080 lha_read_file_header_3(struct archive_read *a, struct lha *lha)
1081 {
1082 const unsigned char *p;
1083 size_t extdsize;
1084 int err;
1085 uint16_t header_crc;
1086
1087 if ((p = __archive_read_ahead(a, H3_FIXED_SIZE, NULL)) == NULL)
1088 return (truncated_error(a));
1089
1090 if (archive_le16dec(p + H3_FIELD_LEN_OFFSET) != 4)
1091 goto invalid;
1092 lha->header_size = archive_le32dec(p + H3_HEADER_SIZE_OFFSET);
1093 lha->compsize = archive_le32dec(p + H3_COMP_SIZE_OFFSET);
1094 lha->origsize = archive_le32dec(p + H3_ORIG_SIZE_OFFSET);
1095 lha->mtime = archive_le32dec(p + H3_TIME_OFFSET);
1096 lha->crc = archive_le16dec(p + H3_CRC_OFFSET);
1097 lha->setflag |= CRC_IS_SET;
1098
1099 if (lha->header_size < H3_FIXED_SIZE + 4)
1100 goto invalid;
1101 header_crc = lha_crc16(0, p, H3_FIXED_SIZE);
1102 __archive_read_consume(a, H3_FIXED_SIZE);
1103
1104 /* Reject rediculously large header */
1105 if (lha->header_size > 65536) {
1106 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1107 "LHa header size too large");
1108 return (ARCHIVE_FATAL);
1109 }
1110
1111 /* Read extended headers */
1112 err = lha_read_file_extended_header(a, lha, &header_crc, 4,
1113 lha->header_size - H3_FIXED_SIZE, &extdsize);
1114 if (err < ARCHIVE_WARN)
1115 return (err);
1116
1117 if (header_crc != lha->header_crc) {
1118 #ifndef DONT_FAIL_ON_CRC_ERROR
1119 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1120 "LHa header CRC error");
1121 return (ARCHIVE_FATAL);
1122 #endif
1123 }
1124 return (err);
1125 invalid:
1126 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1127 "Invalid LHa header");
1128 return (ARCHIVE_FATAL);
1129 }
1130
1131 /*
1132 * Extended header format
1133 *
1134 * +0 +2 +3 -- used in header 1 and 2
1135 * +0 +4 +5 -- used in header 3
1136 * +--------------+---------+-------------------+--------------+--
1137 * |ex-header size|header id| data |ex-header size| .......
1138 * +--------------+---------+-------------------+--------------+--
1139 * <-------------( ex-header size)------------> <-- next extended header --*
1140 *
1141 * If the ex-header size is zero, it is the make of the end of extended
1142 * headers.
1143 *
1144 */
1145 static int
lha_read_file_extended_header(struct archive_read * a,struct lha * lha,uint16_t * crc,int sizefield_length,uint64_t limitsize,size_t * total_size)1146 lha_read_file_extended_header(struct archive_read *a, struct lha *lha,
1147 uint16_t *crc, int sizefield_length, uint64_t limitsize, size_t *total_size)
1148 {
1149 const void *h;
1150 const unsigned char *extdheader;
1151 size_t extdsize;
1152 size_t datasize;
1153 unsigned int i;
1154 unsigned char extdtype;
1155
1156 #define EXT_HEADER_CRC 0x00 /* Header CRC and information*/
1157 #define EXT_FILENAME 0x01 /* Filename */
1158 #define EXT_DIRECTORY 0x02 /* Directory name */
1159 #define EXT_DOS_ATTR 0x40 /* MS-DOS attribute */
1160 #define EXT_TIMESTAMP 0x41 /* Windows time stamp */
1161 #define EXT_FILESIZE 0x42 /* Large file size */
1162 #define EXT_TIMEZONE 0x43 /* Time zone */
1163 #define EXT_UTF16_FILENAME 0x44 /* UTF-16 filename */
1164 #define EXT_UTF16_DIRECTORY 0x45 /* UTF-16 directory name */
1165 #define EXT_CODEPAGE 0x46 /* Codepage */
1166 #define EXT_UNIX_MODE 0x50 /* File permission */
1167 #define EXT_UNIX_GID_UID 0x51 /* gid,uid */
1168 #define EXT_UNIX_GNAME 0x52 /* Group name */
1169 #define EXT_UNIX_UNAME 0x53 /* User name */
1170 #define EXT_UNIX_MTIME 0x54 /* Modified time */
1171 #define EXT_OS2_NEW_ATTR 0x7f /* new attribute(OS/2 only) */
1172 #define EXT_NEW_ATTR 0xff /* new attribute */
1173
1174 *total_size = sizefield_length;
1175
1176 for (;;) {
1177 /* Read an extended header size. */
1178 if ((h =
1179 __archive_read_ahead(a, sizefield_length, NULL)) == NULL)
1180 return (truncated_error(a));
1181 /* Check if the size is the zero indicates the end of the
1182 * extended header. */
1183 if (sizefield_length == sizeof(uint16_t))
1184 extdsize = archive_le16dec(h);
1185 else
1186 extdsize = archive_le32dec(h);
1187 if (extdsize == 0) {
1188 /* End of extended header */
1189 if (crc != NULL)
1190 *crc = lha_crc16(*crc, h, sizefield_length);
1191 __archive_read_consume(a, sizefield_length);
1192 return (ARCHIVE_OK);
1193 }
1194
1195 /* Sanity check to the extended header size. */
1196 if (((uint64_t)*total_size + extdsize) > limitsize ||
1197 extdsize <= (size_t)sizefield_length)
1198 goto invalid;
1199
1200 /* Read the extended header. */
1201 if ((h = __archive_read_ahead(a, extdsize, NULL)) == NULL)
1202 return (truncated_error(a));
1203 *total_size += extdsize;
1204
1205 extdheader = (const unsigned char *)h;
1206 /* Get the extended header type. */
1207 extdtype = extdheader[sizefield_length];
1208 /* Calculate an extended data size. */
1209 datasize = extdsize - (1 + sizefield_length);
1210 /* Skip an extended header size field and type field. */
1211 extdheader += sizefield_length + 1;
1212
1213 if (crc != NULL && extdtype != EXT_HEADER_CRC)
1214 *crc = lha_crc16(*crc, h, extdsize);
1215 switch (extdtype) {
1216 case EXT_HEADER_CRC:
1217 /* We only use a header CRC. Following data will not
1218 * be used. */
1219 if (datasize >= 2) {
1220 lha->header_crc = archive_le16dec(extdheader);
1221 if (crc != NULL) {
1222 static const char zeros[2] = {0, 0};
1223 *crc = lha_crc16(*crc, h,
1224 extdsize - datasize);
1225 /* CRC value itself as zero */
1226 *crc = lha_crc16(*crc, zeros, 2);
1227 *crc = lha_crc16(*crc,
1228 extdheader+2, datasize - 2);
1229 }
1230 }
1231 break;
1232 case EXT_FILENAME:
1233 if (datasize == 0) {
1234 /* maybe directory header */
1235 archive_string_empty(&lha->filename);
1236 break;
1237 }
1238 if (extdheader[0] == '\0')
1239 goto invalid;
1240 archive_strncpy(&lha->filename,
1241 (const char *)extdheader, datasize);
1242 break;
1243 case EXT_UTF16_FILENAME:
1244 if (datasize == 0) {
1245 /* maybe directory header */
1246 archive_string_empty(&lha->filename);
1247 break;
1248 } else if (datasize & 1) {
1249 /* UTF-16 characters take always 2 or 4 bytes */
1250 goto invalid;
1251 }
1252 if (extdheader[0] == '\0')
1253 goto invalid;
1254 archive_string_empty(&lha->filename);
1255 archive_array_append(&lha->filename,
1256 (const char *)extdheader, datasize);
1257 /* Setup a string conversion for a filename. */
1258 lha->sconv_fname =
1259 archive_string_conversion_from_charset(&a->archive,
1260 "UTF-16LE", 1);
1261 if (lha->sconv_fname == NULL)
1262 return (ARCHIVE_FATAL);
1263 break;
1264 case EXT_DIRECTORY:
1265 if (datasize == 0 || extdheader[0] == '\0')
1266 /* no directory name data. exit this case. */
1267 goto invalid;
1268
1269 archive_strncpy(&lha->dirname,
1270 (const char *)extdheader, datasize);
1271 /*
1272 * Convert directory delimiter from 0xFF
1273 * to '/' for local system.
1274 */
1275 for (i = 0; i < lha->dirname.length; i++) {
1276 if ((unsigned char)lha->dirname.s[i] == 0xFF)
1277 lha->dirname.s[i] = '/';
1278 }
1279 /* Is last character directory separator? */
1280 if (lha->dirname.s[lha->dirname.length-1] != '/')
1281 /* invalid directory data */
1282 goto invalid;
1283 break;
1284 case EXT_UTF16_DIRECTORY:
1285 /* UTF-16 characters take always 2 or 4 bytes */
1286 if (datasize == 0 || (datasize & 1) ||
1287 extdheader[0] == '\0') {
1288 /* no directory name data. exit this case. */
1289 goto invalid;
1290 }
1291
1292 archive_string_empty(&lha->dirname);
1293 archive_array_append(&lha->dirname,
1294 (const char *)extdheader, datasize);
1295 lha->sconv_dir =
1296 archive_string_conversion_from_charset(&a->archive,
1297 "UTF-16LE", 1);
1298 if (lha->sconv_dir == NULL)
1299 return (ARCHIVE_FATAL);
1300 else {
1301 /*
1302 * Convert directory delimiter from 0xFFFF
1303 * to '/' for local system.
1304 */
1305 uint16_t dirSep;
1306 uint16_t d = 1;
1307 if (archive_be16dec(&d) == 1)
1308 dirSep = 0x2F00;
1309 else
1310 dirSep = 0x002F;
1311
1312 /* UTF-16LE character */
1313 uint16_t *utf16name =
1314 (uint16_t *)lha->dirname.s;
1315 for (i = 0; i < lha->dirname.length / 2; i++) {
1316 if (utf16name[i] == 0xFFFF) {
1317 utf16name[i] = dirSep;
1318 }
1319 }
1320 /* Is last character directory separator? */
1321 if (utf16name[lha->dirname.length / 2 - 1] !=
1322 dirSep) {
1323 /* invalid directory data */
1324 goto invalid;
1325 }
1326 }
1327 break;
1328 case EXT_DOS_ATTR:
1329 if (datasize == 2)
1330 lha->dos_attr = (unsigned char)
1331 (archive_le16dec(extdheader) & 0xff);
1332 break;
1333 case EXT_TIMESTAMP:
1334 if (datasize == (sizeof(uint64_t) * 3)) {
1335 ntfs_to_unix(archive_le64dec(extdheader),
1336 &lha->birthtime,
1337 &lha->birthtime_tv_nsec);
1338 extdheader += sizeof(uint64_t);
1339 ntfs_to_unix(archive_le64dec(extdheader),
1340 &lha->mtime,
1341 &lha->mtime_tv_nsec);
1342 extdheader += sizeof(uint64_t);
1343 ntfs_to_unix(archive_le64dec(extdheader),
1344 &lha->atime,
1345 &lha->atime_tv_nsec);
1346 lha->setflag |= BIRTHTIME_IS_SET |
1347 ATIME_IS_SET;
1348 }
1349 break;
1350 case EXT_FILESIZE:
1351 if (datasize == sizeof(uint64_t) * 2) {
1352 lha->compsize = archive_le64dec(extdheader);
1353 extdheader += sizeof(uint64_t);
1354 lha->origsize = archive_le64dec(extdheader);
1355 if (lha->compsize < 0 || lha->origsize < 0)
1356 goto invalid;
1357 }
1358 break;
1359 case EXT_CODEPAGE:
1360 /* Get an archived filename charset from codepage.
1361 * This overwrites the charset specified by
1362 * hdrcharset option. */
1363 if (datasize == sizeof(uint32_t)) {
1364 struct archive_string cp;
1365 const char *charset;
1366
1367 archive_string_init(&cp);
1368 switch (archive_le32dec(extdheader)) {
1369 case 65001: /* UTF-8 */
1370 charset = "UTF-8";
1371 break;
1372 default:
1373 archive_string_sprintf(&cp, "CP%d",
1374 (int)archive_le32dec(extdheader));
1375 charset = cp.s;
1376 break;
1377 }
1378 lha->sconv_dir =
1379 archive_string_conversion_from_charset(
1380 &(a->archive), charset, 1);
1381 lha->sconv_fname =
1382 archive_string_conversion_from_charset(
1383 &(a->archive), charset, 1);
1384 archive_string_free(&cp);
1385 if (lha->sconv_dir == NULL)
1386 return (ARCHIVE_FATAL);
1387 if (lha->sconv_fname == NULL)
1388 return (ARCHIVE_FATAL);
1389 }
1390 break;
1391 case EXT_UNIX_MODE:
1392 if (datasize == sizeof(uint16_t)) {
1393 lha->mode = archive_le16dec(extdheader);
1394 lha->setflag |= UNIX_MODE_IS_SET;
1395 }
1396 break;
1397 case EXT_UNIX_GID_UID:
1398 if (datasize == (sizeof(uint16_t) * 2)) {
1399 lha->gid = archive_le16dec(extdheader);
1400 lha->uid = archive_le16dec(extdheader+2);
1401 }
1402 break;
1403 case EXT_UNIX_GNAME:
1404 if (datasize > 0)
1405 archive_strncpy(&lha->gname,
1406 (const char *)extdheader, datasize);
1407 break;
1408 case EXT_UNIX_UNAME:
1409 if (datasize > 0)
1410 archive_strncpy(&lha->uname,
1411 (const char *)extdheader, datasize);
1412 break;
1413 case EXT_UNIX_MTIME:
1414 if (datasize == sizeof(uint32_t))
1415 lha->mtime = archive_le32dec(extdheader);
1416 break;
1417 case EXT_OS2_NEW_ATTR:
1418 /* This extended header is OS/2 depend. */
1419 if (datasize == 16) {
1420 lha->dos_attr = (unsigned char)
1421 (archive_le16dec(extdheader) & 0xff);
1422 lha->mode = archive_le16dec(extdheader+2);
1423 lha->gid = archive_le16dec(extdheader+4);
1424 lha->uid = archive_le16dec(extdheader+6);
1425 lha->birthtime = archive_le32dec(extdheader+8);
1426 lha->atime = archive_le32dec(extdheader+12);
1427 lha->setflag |= UNIX_MODE_IS_SET
1428 | BIRTHTIME_IS_SET | ATIME_IS_SET;
1429 }
1430 break;
1431 case EXT_NEW_ATTR:
1432 if (datasize == 20) {
1433 lha->mode = (mode_t)archive_le32dec(extdheader);
1434 lha->gid = archive_le32dec(extdheader+4);
1435 lha->uid = archive_le32dec(extdheader+8);
1436 lha->birthtime = archive_le32dec(extdheader+12);
1437 lha->atime = archive_le32dec(extdheader+16);
1438 lha->setflag |= UNIX_MODE_IS_SET
1439 | BIRTHTIME_IS_SET | ATIME_IS_SET;
1440 }
1441 break;
1442 case EXT_TIMEZONE: /* Not supported */
1443 break;
1444 default:
1445 break;
1446 }
1447
1448 __archive_read_consume(a, extdsize);
1449 }
1450 invalid:
1451 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1452 "Invalid extended LHa header");
1453 return (ARCHIVE_FATAL);
1454 }
1455
1456 static int
lha_end_of_entry(struct archive_read * a)1457 lha_end_of_entry(struct archive_read *a)
1458 {
1459 struct lha *lha = (struct lha *)(a->format->data);
1460 int r = ARCHIVE_EOF;
1461
1462 if (!lha->end_of_entry_cleanup) {
1463 if ((lha->setflag & CRC_IS_SET) &&
1464 lha->crc != lha->entry_crc_calculated) {
1465 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1466 "LHa data CRC error");
1467 r = ARCHIVE_WARN;
1468 }
1469
1470 /* End-of-entry cleanup done. */
1471 lha->end_of_entry_cleanup = 1;
1472 }
1473 return (r);
1474 }
1475
1476 static int
archive_read_format_lha_read_data(struct archive_read * a,const void ** buff,size_t * size,int64_t * offset)1477 archive_read_format_lha_read_data(struct archive_read *a,
1478 const void **buff, size_t *size, int64_t *offset)
1479 {
1480 struct lha *lha = (struct lha *)(a->format->data);
1481 int r;
1482
1483 if (lha->entry_unconsumed) {
1484 /* Consume as much as the decompressor actually used. */
1485 __archive_read_consume(a, lha->entry_unconsumed);
1486 lha->entry_unconsumed = 0;
1487 }
1488 if (lha->end_of_entry) {
1489 *offset = lha->entry_offset;
1490 *size = 0;
1491 *buff = NULL;
1492 return (lha_end_of_entry(a));
1493 }
1494
1495 if (lha->entry_is_compressed)
1496 r = lha_read_data_lzh(a, buff, size, offset);
1497 else
1498 /* No compression. */
1499 r = lha_read_data_none(a, buff, size, offset);
1500 return (r);
1501 }
1502
1503 /*
1504 * Read a file content in no compression.
1505 *
1506 * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets
1507 * lha->end_of_entry if it consumes all of the data.
1508 */
1509 static int
lha_read_data_none(struct archive_read * a,const void ** buff,size_t * size,int64_t * offset)1510 lha_read_data_none(struct archive_read *a, const void **buff,
1511 size_t *size, int64_t *offset)
1512 {
1513 struct lha *lha = (struct lha *)(a->format->data);
1514 ssize_t bytes_avail;
1515
1516 if (lha->entry_bytes_remaining == 0) {
1517 *buff = NULL;
1518 *size = 0;
1519 *offset = lha->entry_offset;
1520 lha->end_of_entry = 1;
1521 return (ARCHIVE_OK);
1522 }
1523 /*
1524 * Note: '1' here is a performance optimization.
1525 * Recall that the decompression layer returns a count of
1526 * available bytes; asking for more than that forces the
1527 * decompressor to combine reads by copying data.
1528 */
1529 *buff = __archive_read_ahead(a, 1, &bytes_avail);
1530 if (bytes_avail <= 0) {
1531 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1532 "Truncated LHa file data");
1533 return (ARCHIVE_FATAL);
1534 }
1535 if (bytes_avail > lha->entry_bytes_remaining)
1536 bytes_avail = (ssize_t)lha->entry_bytes_remaining;
1537 lha->entry_crc_calculated =
1538 lha_crc16(lha->entry_crc_calculated, *buff, bytes_avail);
1539 *size = bytes_avail;
1540 *offset = lha->entry_offset;
1541 lha->entry_offset += bytes_avail;
1542 lha->entry_bytes_remaining -= bytes_avail;
1543 if (lha->entry_bytes_remaining == 0)
1544 lha->end_of_entry = 1;
1545 lha->entry_unconsumed = bytes_avail;
1546 return (ARCHIVE_OK);
1547 }
1548
1549 /*
1550 * Read a file content in LZHUFF encoding.
1551 *
1552 * Returns ARCHIVE_OK if successful, returns ARCHIVE_WARN if compression is
1553 * unsupported, ARCHIVE_FATAL otherwise, sets lha->end_of_entry if it consumes
1554 * all of the data.
1555 */
1556 static int
lha_read_data_lzh(struct archive_read * a,const void ** buff,size_t * size,int64_t * offset)1557 lha_read_data_lzh(struct archive_read *a, const void **buff,
1558 size_t *size, int64_t *offset)
1559 {
1560 struct lha *lha = (struct lha *)(a->format->data);
1561 ssize_t bytes_avail;
1562 int r;
1563
1564 /* If we haven't yet read any data, initialize the decompressor. */
1565 if (!lha->decompress_init) {
1566 r = lzh_decode_init(&(lha->strm), lha->method);
1567 switch (r) {
1568 case ARCHIVE_OK:
1569 break;
1570 case ARCHIVE_FAILED:
1571 /* Unsupported compression. */
1572 *buff = NULL;
1573 *size = 0;
1574 *offset = 0;
1575 archive_set_error(&a->archive,
1576 ARCHIVE_ERRNO_FILE_FORMAT,
1577 "Unsupported lzh compression method -%c%c%c-",
1578 lha->method[0], lha->method[1], lha->method[2]);
1579 /* We know compressed size; just skip it. */
1580 archive_read_format_lha_read_data_skip(a);
1581 return (ARCHIVE_WARN);
1582 default:
1583 archive_set_error(&a->archive, ENOMEM,
1584 "Couldn't allocate memory "
1585 "for lzh decompression");
1586 return (ARCHIVE_FATAL);
1587 }
1588 /* We've initialized decompression for this stream. */
1589 lha->decompress_init = 1;
1590 lha->strm.avail_out = 0;
1591 lha->strm.total_out = 0;
1592 }
1593
1594 /*
1595 * Note: '1' here is a performance optimization.
1596 * Recall that the decompression layer returns a count of
1597 * available bytes; asking for more than that forces the
1598 * decompressor to combine reads by copying data.
1599 */
1600 lha->strm.next_in = __archive_read_ahead(a, 1, &bytes_avail);
1601 if (bytes_avail <= 0) {
1602 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1603 "Truncated LHa file body");
1604 return (ARCHIVE_FATAL);
1605 }
1606 if (bytes_avail > lha->entry_bytes_remaining)
1607 bytes_avail = (ssize_t)lha->entry_bytes_remaining;
1608
1609 lha->strm.avail_in = (int)bytes_avail;
1610 lha->strm.total_in = 0;
1611 lha->strm.avail_out = 0;
1612
1613 r = lzh_decode(&(lha->strm), bytes_avail == lha->entry_bytes_remaining);
1614 switch (r) {
1615 case ARCHIVE_OK:
1616 break;
1617 case ARCHIVE_EOF:
1618 lha->end_of_entry = 1;
1619 break;
1620 default:
1621 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1622 "Bad lzh data");
1623 return (ARCHIVE_FAILED);
1624 }
1625 lha->entry_unconsumed = lha->strm.total_in;
1626 lha->entry_bytes_remaining -= lha->strm.total_in;
1627
1628 if (lha->strm.avail_out) {
1629 *offset = lha->entry_offset;
1630 *size = lha->strm.avail_out;
1631 *buff = lha->strm.ref_ptr;
1632 lha->entry_crc_calculated =
1633 lha_crc16(lha->entry_crc_calculated, *buff, *size);
1634 lha->entry_offset += *size;
1635 } else {
1636 *offset = lha->entry_offset;
1637 *size = 0;
1638 *buff = NULL;
1639 if (lha->end_of_entry)
1640 return (lha_end_of_entry(a));
1641 }
1642 return (ARCHIVE_OK);
1643 }
1644
1645 /*
1646 * Skip a file content.
1647 */
1648 static int
archive_read_format_lha_read_data_skip(struct archive_read * a)1649 archive_read_format_lha_read_data_skip(struct archive_read *a)
1650 {
1651 struct lha *lha;
1652 int64_t bytes_skipped;
1653
1654 lha = (struct lha *)(a->format->data);
1655
1656 if (lha->entry_unconsumed) {
1657 /* Consume as much as the decompressor actually used. */
1658 __archive_read_consume(a, lha->entry_unconsumed);
1659 lha->entry_unconsumed = 0;
1660 }
1661
1662 /* if we've already read to end of data, we're done. */
1663 if (lha->end_of_entry_cleanup)
1664 return (ARCHIVE_OK);
1665
1666 /*
1667 * If the length is at the beginning, we can skip the
1668 * compressed data much more quickly.
1669 */
1670 bytes_skipped = __archive_read_consume(a, lha->entry_bytes_remaining);
1671 if (bytes_skipped < 0)
1672 return (ARCHIVE_FATAL);
1673
1674 /* This entry is finished and done. */
1675 lha->end_of_entry_cleanup = lha->end_of_entry = 1;
1676 return (ARCHIVE_OK);
1677 }
1678
1679 static int
archive_read_format_lha_cleanup(struct archive_read * a)1680 archive_read_format_lha_cleanup(struct archive_read *a)
1681 {
1682 struct lha *lha = (struct lha *)(a->format->data);
1683
1684 lzh_decode_free(&(lha->strm));
1685 archive_string_free(&(lha->dirname));
1686 archive_string_free(&(lha->filename));
1687 archive_string_free(&(lha->uname));
1688 archive_string_free(&(lha->gname));
1689 archive_wstring_free(&(lha->ws));
1690 free(lha);
1691 (a->format->data) = NULL;
1692 return (ARCHIVE_OK);
1693 }
1694
1695 /*
1696 * 'LHa for UNIX' utility has archived a symbolic-link name after
1697 * a pathname with '|' character.
1698 * This function extracts the symbolic-link name from the pathname.
1699 *
1700 * example.
1701 * 1. a symbolic-name is 'aaa/bb/cc'
1702 * 2. a filename is 'xxx/bbb'
1703 * then an archived pathname is 'xxx/bbb|aaa/bb/cc'
1704 */
1705 static int
lha_parse_linkname(struct archive_wstring * linkname,struct archive_wstring * pathname)1706 lha_parse_linkname(struct archive_wstring *linkname,
1707 struct archive_wstring *pathname)
1708 {
1709 wchar_t * linkptr;
1710 size_t symlen;
1711
1712 linkptr = wcschr(pathname->s, L'|');
1713 if (linkptr != NULL) {
1714 symlen = wcslen(linkptr + 1);
1715 archive_wstrncpy(linkname, linkptr+1, symlen);
1716
1717 *linkptr = 0;
1718 pathname->length = wcslen(pathname->s);
1719
1720 return (1);
1721 }
1722 return (0);
1723 }
1724
1725 static unsigned char
lha_calcsum(unsigned char sum,const void * pp,int offset,size_t size)1726 lha_calcsum(unsigned char sum, const void *pp, int offset, size_t size)
1727 {
1728 unsigned char const *p = (unsigned char const *)pp;
1729
1730 p += offset;
1731 for (;size > 0; --size)
1732 sum += *p++;
1733 return (sum);
1734 }
1735
1736 static uint16_t crc16tbl[2][256];
1737 static void
lha_crc16_init(void)1738 lha_crc16_init(void)
1739 {
1740 unsigned int i;
1741 static int crc16init = 0;
1742
1743 if (crc16init)
1744 return;
1745 crc16init = 1;
1746
1747 for (i = 0; i < 256; i++) {
1748 unsigned int j;
1749 uint16_t crc = (uint16_t)i;
1750 for (j = 8; j; j--)
1751 crc = (crc >> 1) ^ ((crc & 1) * 0xA001);
1752 crc16tbl[0][i] = crc;
1753 }
1754
1755 for (i = 0; i < 256; i++) {
1756 crc16tbl[1][i] = (crc16tbl[0][i] >> 8)
1757 ^ crc16tbl[0][crc16tbl[0][i] & 0xff];
1758 }
1759 }
1760
1761 static uint16_t
lha_crc16(uint16_t crc,const void * pp,size_t len)1762 lha_crc16(uint16_t crc, const void *pp, size_t len)
1763 {
1764 const unsigned char *p = (const unsigned char *)pp;
1765 const uint16_t *buff;
1766 const union {
1767 uint32_t i;
1768 char c[4];
1769 } u = { 0x01020304 };
1770
1771 if (len == 0)
1772 return crc;
1773
1774 /* Process unaligned address. */
1775 if (((uintptr_t)p) & (uintptr_t)0x1) {
1776 crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff];
1777 len--;
1778 }
1779 buff = (const uint16_t *)p;
1780 /*
1781 * Modern C compiler such as GCC does not unroll automatically yet
1782 * without unrolling pragma, and Clang is so. So we should
1783 * unroll this loop for its performance.
1784 */
1785 for (;len >= 8; len -= 8) {
1786 /* This if statement expects compiler optimization will
1787 * remove the statement which will not be executed. */
1788 #undef bswap16
1789 #ifndef __has_builtin
1790 #define __has_builtin(x) 0
1791 #endif
1792 #if defined(_MSC_VER) && _MSC_VER >= 1400 /* Visual Studio */
1793 # define bswap16(x) _byteswap_ushort(x)
1794 #elif defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ > 4)
1795 /* GCC 4.8 and later has __builtin_bswap16() */
1796 # define bswap16(x) __builtin_bswap16(x)
1797 #elif defined(__clang__) && __has_builtin(__builtin_bswap16)
1798 /* Newer clang versions have __builtin_bswap16() */
1799 # define bswap16(x) __builtin_bswap16(x)
1800 #else
1801 # define bswap16(x) ((((x) >> 8) & 0xff) | ((x) << 8))
1802 #endif
1803 #define CRC16W do { \
1804 if(u.c[0] == 1) { /* Big endian */ \
1805 crc ^= bswap16(*buff); buff++; \
1806 } else \
1807 crc ^= *buff++; \
1808 crc = crc16tbl[1][crc & 0xff] ^ crc16tbl[0][crc >> 8];\
1809 } while (0)
1810 CRC16W;
1811 CRC16W;
1812 CRC16W;
1813 CRC16W;
1814 #undef CRC16W
1815 #undef bswap16
1816 }
1817
1818 p = (const unsigned char *)buff;
1819 for (;len; len--) {
1820 crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff];
1821 }
1822 return crc;
1823 }
1824
1825 /*
1826 * Initialize LZHUF decoder.
1827 *
1828 * Returns ARCHIVE_OK if initialization was successful.
1829 * Returns ARCHIVE_FAILED if method is unsupported.
1830 * Returns ARCHIVE_FATAL if initialization failed; memory allocation
1831 * error occurred.
1832 */
1833 static int
lzh_decode_init(struct lzh_stream * strm,const char * method)1834 lzh_decode_init(struct lzh_stream *strm, const char *method)
1835 {
1836 struct lzh_dec *ds;
1837 int w_bits, w_size;
1838
1839 if (strm->ds == NULL) {
1840 strm->ds = calloc(1, sizeof(*strm->ds));
1841 if (strm->ds == NULL)
1842 return (ARCHIVE_FATAL);
1843 }
1844 ds = strm->ds;
1845 ds->error = ARCHIVE_FAILED;
1846 if (method == NULL || method[0] != 'l' || method[1] != 'h')
1847 return (ARCHIVE_FAILED);
1848 switch (method[2]) {
1849 case '5':
1850 w_bits = 13;/* 8KiB for window */
1851 break;
1852 case '6':
1853 w_bits = 15;/* 32KiB for window */
1854 break;
1855 case '7':
1856 w_bits = 16;/* 64KiB for window */
1857 break;
1858 default:
1859 return (ARCHIVE_FAILED);/* Not supported. */
1860 }
1861 ds->error = ARCHIVE_FATAL;
1862 /* Expand a window size up to 128 KiB for decompressing process
1863 * performance whatever its original window size is. */
1864 ds->w_size = 1U << 17;
1865 ds->w_mask = ds->w_size -1;
1866 if (ds->w_buff == NULL) {
1867 ds->w_buff = malloc(ds->w_size);
1868 if (ds->w_buff == NULL)
1869 return (ARCHIVE_FATAL);
1870 }
1871 w_size = 1U << w_bits;
1872 memset(ds->w_buff + ds->w_size - w_size, 0x20, w_size);
1873 ds->w_pos = 0;
1874 ds->state = 0;
1875 ds->pos_pt_len_size = w_bits + 1;
1876 ds->pos_pt_len_bits = (w_bits == 15 || w_bits == 16)? 5: 4;
1877 ds->literal_pt_len_size = PT_BITLEN_SIZE;
1878 ds->literal_pt_len_bits = 5;
1879 ds->br.cache_buffer = 0;
1880 ds->br.cache_avail = 0;
1881
1882 if (lzh_huffman_init(&(ds->lt), LT_BITLEN_SIZE, 16)
1883 != ARCHIVE_OK)
1884 return (ARCHIVE_FATAL);
1885 ds->lt.len_bits = 9;
1886 if (lzh_huffman_init(&(ds->pt), PT_BITLEN_SIZE, 16)
1887 != ARCHIVE_OK)
1888 return (ARCHIVE_FATAL);
1889 ds->error = 0;
1890
1891 return (ARCHIVE_OK);
1892 }
1893
1894 /*
1895 * Release LZHUF decoder.
1896 */
1897 static void
lzh_decode_free(struct lzh_stream * strm)1898 lzh_decode_free(struct lzh_stream *strm)
1899 {
1900
1901 if (strm->ds == NULL)
1902 return;
1903 free(strm->ds->w_buff);
1904 lzh_huffman_free(&(strm->ds->lt));
1905 lzh_huffman_free(&(strm->ds->pt));
1906 free(strm->ds);
1907 strm->ds = NULL;
1908 }
1909
1910 /*
1911 * Bit stream reader.
1912 */
1913 /* Check that the cache buffer has enough bits. */
1914 #define lzh_br_has(br, n) ((br)->cache_avail >= n)
1915 /* Get compressed data by bit. */
1916 #define lzh_br_bits(br, n) \
1917 (((uint16_t)((br)->cache_buffer >> \
1918 ((br)->cache_avail - (n)))) & cache_masks[n])
1919 #define lzh_br_bits_forced(br, n) \
1920 (((uint16_t)((br)->cache_buffer << \
1921 ((n) - (br)->cache_avail))) & cache_masks[n])
1922 /* Read ahead to make sure the cache buffer has enough compressed data we
1923 * will use.
1924 * True : completed, there is enough data in the cache buffer.
1925 * False : we met that strm->next_in is empty, we have to get following
1926 * bytes. */
1927 #define lzh_br_read_ahead_0(strm, br, n) \
1928 (lzh_br_has(br, (n)) || lzh_br_fillup(strm, br))
1929 /* True : the cache buffer has some bits as much as we need.
1930 * False : there are no enough bits in the cache buffer to be used,
1931 * we have to get following bytes if we could. */
1932 #define lzh_br_read_ahead(strm, br, n) \
1933 (lzh_br_read_ahead_0((strm), (br), (n)) || lzh_br_has((br), (n)))
1934
1935 /* Notify how many bits we consumed. */
1936 #define lzh_br_consume(br, n) ((br)->cache_avail -= (n))
1937 #define lzh_br_unconsume(br, n) ((br)->cache_avail += (n))
1938
1939 static const uint16_t cache_masks[] = {
1940 0x0000, 0x0001, 0x0003, 0x0007,
1941 0x000F, 0x001F, 0x003F, 0x007F,
1942 0x00FF, 0x01FF, 0x03FF, 0x07FF,
1943 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF,
1944 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF
1945 };
1946
1947 /*
1948 * Shift away used bits in the cache data and fill it up with following bits.
1949 * Call this when cache buffer does not have enough bits you need.
1950 *
1951 * Returns 1 if the cache buffer is full.
1952 * Returns 0 if the cache buffer is not full; input buffer is empty.
1953 */
1954 static int
lzh_br_fillup(struct lzh_stream * strm,struct lzh_br * br)1955 lzh_br_fillup(struct lzh_stream *strm, struct lzh_br *br)
1956 {
1957 int n = CACHE_BITS - br->cache_avail;
1958
1959 for (;;) {
1960 const int x = n >> 3;
1961 if (strm->avail_in >= x) {
1962 switch (x) {
1963 case 8:
1964 br->cache_buffer =
1965 ((uint64_t)strm->next_in[0]) << 56 |
1966 ((uint64_t)strm->next_in[1]) << 48 |
1967 ((uint64_t)strm->next_in[2]) << 40 |
1968 ((uint64_t)strm->next_in[3]) << 32 |
1969 ((uint32_t)strm->next_in[4]) << 24 |
1970 ((uint32_t)strm->next_in[5]) << 16 |
1971 ((uint32_t)strm->next_in[6]) << 8 |
1972 (uint32_t)strm->next_in[7];
1973 strm->next_in += 8;
1974 strm->avail_in -= 8;
1975 br->cache_avail += 8 * 8;
1976 return (1);
1977 case 7:
1978 br->cache_buffer =
1979 (br->cache_buffer << 56) |
1980 ((uint64_t)strm->next_in[0]) << 48 |
1981 ((uint64_t)strm->next_in[1]) << 40 |
1982 ((uint64_t)strm->next_in[2]) << 32 |
1983 ((uint64_t)strm->next_in[3]) << 24 |
1984 ((uint64_t)strm->next_in[4]) << 16 |
1985 ((uint64_t)strm->next_in[5]) << 8 |
1986 (uint64_t)strm->next_in[6];
1987 strm->next_in += 7;
1988 strm->avail_in -= 7;
1989 br->cache_avail += 7 * 8;
1990 return (1);
1991 case 6:
1992 br->cache_buffer =
1993 (br->cache_buffer << 48) |
1994 ((uint64_t)strm->next_in[0]) << 40 |
1995 ((uint64_t)strm->next_in[1]) << 32 |
1996 ((uint64_t)strm->next_in[2]) << 24 |
1997 ((uint64_t)strm->next_in[3]) << 16 |
1998 ((uint64_t)strm->next_in[4]) << 8 |
1999 (uint64_t)strm->next_in[5];
2000 strm->next_in += 6;
2001 strm->avail_in -= 6;
2002 br->cache_avail += 6 * 8;
2003 return (1);
2004 case 0:
2005 /* We have enough compressed data in
2006 * the cache buffer.*/
2007 return (1);
2008 default:
2009 break;
2010 }
2011 }
2012 if (strm->avail_in == 0) {
2013 /* There is not enough compressed data to fill up the
2014 * cache buffer. */
2015 return (0);
2016 }
2017 br->cache_buffer =
2018 (br->cache_buffer << 8) | *strm->next_in++;
2019 strm->avail_in--;
2020 br->cache_avail += 8;
2021 n -= 8;
2022 }
2023 }
2024
2025 /*
2026 * Decode LZHUF.
2027 *
2028 * 1. Returns ARCHIVE_OK if output buffer or input buffer are empty.
2029 * Please set available buffer and call this function again.
2030 * 2. Returns ARCHIVE_EOF if decompression has been completed.
2031 * 3. Returns ARCHIVE_FAILED if an error occurred; compressed data
2032 * is broken or you do not set 'last' flag properly.
2033 * 4. 'last' flag is very important, you must set 1 to the flag if there
2034 * is no input data. The lha compressed data format does not provide how
2035 * to know the compressed data is really finished.
2036 * Note: lha command utility check if the total size of output bytes is
2037 * reached the uncompressed size recorded in its header. it does not mind
2038 * that the decoding process is properly finished.
2039 * GNU ZIP can decompress another compressed file made by SCO LZH compress.
2040 * it handles EOF as null to fill read buffer with zero until the decoding
2041 * process meet 2 bytes of zeros at reading a size of a next chunk, so the
2042 * zeros are treated as the mark of the end of the data although the zeros
2043 * is dummy, not the file data.
2044 */
2045 static int lzh_read_blocks(struct lzh_stream *, int);
2046 static int lzh_decode_blocks(struct lzh_stream *, int);
2047 #define ST_RD_BLOCK 0
2048 #define ST_RD_PT_1 1
2049 #define ST_RD_PT_2 2
2050 #define ST_RD_PT_3 3
2051 #define ST_RD_PT_4 4
2052 #define ST_RD_LITERAL_1 5
2053 #define ST_RD_LITERAL_2 6
2054 #define ST_RD_LITERAL_3 7
2055 #define ST_RD_POS_DATA_1 8
2056 #define ST_GET_LITERAL 9
2057 #define ST_GET_POS_1 10
2058 #define ST_GET_POS_2 11
2059 #define ST_COPY_DATA 12
2060
2061 static int
lzh_decode(struct lzh_stream * strm,int last)2062 lzh_decode(struct lzh_stream *strm, int last)
2063 {
2064 struct lzh_dec *ds = strm->ds;
2065 int avail_in;
2066 int r;
2067
2068 if (ds->error)
2069 return (ds->error);
2070
2071 avail_in = strm->avail_in;
2072 do {
2073 if (ds->state < ST_GET_LITERAL)
2074 r = lzh_read_blocks(strm, last);
2075 else
2076 r = lzh_decode_blocks(strm, last);
2077 } while (r == 100);
2078 strm->total_in += avail_in - strm->avail_in;
2079 return (r);
2080 }
2081
2082 static void
lzh_emit_window(struct lzh_stream * strm,size_t s)2083 lzh_emit_window(struct lzh_stream *strm, size_t s)
2084 {
2085 strm->ref_ptr = strm->ds->w_buff;
2086 strm->avail_out = (int)s;
2087 strm->total_out += s;
2088 }
2089
2090 static int
lzh_read_blocks(struct lzh_stream * strm,int last)2091 lzh_read_blocks(struct lzh_stream *strm, int last)
2092 {
2093 struct lzh_dec *ds = strm->ds;
2094 struct lzh_br *br = &(ds->br);
2095 int c = 0, i;
2096 unsigned rbits;
2097
2098 for (;;) {
2099 switch (ds->state) {
2100 case ST_RD_BLOCK:
2101 /*
2102 * Read a block number indicates how many blocks
2103 * we will handle. The block is composed of a
2104 * literal and a match, sometimes a literal only
2105 * in particular, there are no reference data at
2106 * the beginning of the decompression.
2107 */
2108 if (!lzh_br_read_ahead_0(strm, br, 16)) {
2109 if (!last)
2110 /* We need following data. */
2111 return (ARCHIVE_OK);
2112 if (lzh_br_has(br, 8)) {
2113 /*
2114 * It seems there are extra bits.
2115 * 1. Compressed data is broken.
2116 * 2. `last' flag does not properly
2117 * set.
2118 */
2119 goto failed;
2120 }
2121 if (ds->w_pos > 0) {
2122 lzh_emit_window(strm, ds->w_pos);
2123 ds->w_pos = 0;
2124 return (ARCHIVE_OK);
2125 }
2126 /* End of compressed data; we have completely
2127 * handled all compressed data. */
2128 return (ARCHIVE_EOF);
2129 }
2130 ds->blocks_avail = lzh_br_bits(br, 16);
2131 if (ds->blocks_avail == 0)
2132 goto failed;
2133 lzh_br_consume(br, 16);
2134 /*
2135 * Read a literal table compressed in huffman
2136 * coding.
2137 */
2138 ds->pt.len_size = ds->literal_pt_len_size;
2139 ds->pt.len_bits = ds->literal_pt_len_bits;
2140 ds->reading_position = 0;
2141 /* FALL THROUGH */
2142 case ST_RD_PT_1:
2143 /* Note: ST_RD_PT_1, ST_RD_PT_2 and ST_RD_PT_4 are
2144 * used in reading both a literal table and a
2145 * position table. */
2146 if (!lzh_br_read_ahead(strm, br, ds->pt.len_bits)) {
2147 if (last)
2148 goto failed;/* Truncated data. */
2149 ds->state = ST_RD_PT_1;
2150 return (ARCHIVE_OK);
2151 }
2152 ds->pt.len_avail = lzh_br_bits(br, ds->pt.len_bits);
2153 lzh_br_consume(br, ds->pt.len_bits);
2154 /* FALL THROUGH */
2155 case ST_RD_PT_2:
2156 if (ds->pt.len_avail == 0) {
2157 /* There is no bitlen. */
2158 if (!lzh_br_read_ahead(strm, br,
2159 ds->pt.len_bits)) {
2160 if (last)
2161 goto failed;/* Truncated data.*/
2162 ds->state = ST_RD_PT_2;
2163 return (ARCHIVE_OK);
2164 }
2165 if (!lzh_make_fake_table(&(ds->pt),
2166 lzh_br_bits(br, ds->pt.len_bits)))
2167 goto failed;/* Invalid data. */
2168 lzh_br_consume(br, ds->pt.len_bits);
2169 if (ds->reading_position)
2170 ds->state = ST_GET_LITERAL;
2171 else
2172 ds->state = ST_RD_LITERAL_1;
2173 break;
2174 } else if (ds->pt.len_avail > ds->pt.len_size)
2175 goto failed;/* Invalid data. */
2176 ds->loop = 0;
2177 memset(ds->pt.freq, 0, sizeof(ds->pt.freq));
2178 if (ds->pt.len_avail < 3 ||
2179 ds->pt.len_size == ds->pos_pt_len_size) {
2180 ds->state = ST_RD_PT_4;
2181 break;
2182 }
2183 /* FALL THROUGH */
2184 case ST_RD_PT_3:
2185 ds->loop = lzh_read_pt_bitlen(strm, ds->loop, 3);
2186 if (ds->loop < 3) {
2187 if (ds->loop < 0 || last)
2188 goto failed;/* Invalid data. */
2189 /* Not completed, get following data. */
2190 ds->state = ST_RD_PT_3;
2191 return (ARCHIVE_OK);
2192 }
2193 /* There are some null in bitlen of the literal. */
2194 if (!lzh_br_read_ahead(strm, br, 2)) {
2195 if (last)
2196 goto failed;/* Truncated data. */
2197 ds->state = ST_RD_PT_3;
2198 return (ARCHIVE_OK);
2199 }
2200 c = lzh_br_bits(br, 2);
2201 lzh_br_consume(br, 2);
2202 if (c > ds->pt.len_avail - 3)
2203 goto failed;/* Invalid data. */
2204 for (i = 3; c-- > 0 ;)
2205 ds->pt.bitlen[i++] = 0;
2206 ds->loop = i;
2207 /* FALL THROUGH */
2208 case ST_RD_PT_4:
2209 ds->loop = lzh_read_pt_bitlen(strm, ds->loop,
2210 ds->pt.len_avail);
2211 if (ds->loop < ds->pt.len_avail) {
2212 if (ds->loop < 0 || last)
2213 goto failed;/* Invalid data. */
2214 /* Not completed, get following data. */
2215 ds->state = ST_RD_PT_4;
2216 return (ARCHIVE_OK);
2217 }
2218 if (!lzh_make_huffman_table(&(ds->pt)))
2219 goto failed;/* Invalid data */
2220 if (ds->reading_position) {
2221 ds->state = ST_GET_LITERAL;
2222 break;
2223 }
2224 /* FALL THROUGH */
2225 case ST_RD_LITERAL_1:
2226 if (!lzh_br_read_ahead(strm, br, ds->lt.len_bits)) {
2227 if (last)
2228 goto failed;/* Truncated data. */
2229 ds->state = ST_RD_LITERAL_1;
2230 return (ARCHIVE_OK);
2231 }
2232 ds->lt.len_avail = lzh_br_bits(br, ds->lt.len_bits);
2233 lzh_br_consume(br, ds->lt.len_bits);
2234 /* FALL THROUGH */
2235 case ST_RD_LITERAL_2:
2236 if (ds->lt.len_avail == 0) {
2237 /* There is no bitlen. */
2238 if (!lzh_br_read_ahead(strm, br,
2239 ds->lt.len_bits)) {
2240 if (last)
2241 goto failed;/* Truncated data.*/
2242 ds->state = ST_RD_LITERAL_2;
2243 return (ARCHIVE_OK);
2244 }
2245 if (!lzh_make_fake_table(&(ds->lt),
2246 lzh_br_bits(br, ds->lt.len_bits)))
2247 goto failed;/* Invalid data */
2248 lzh_br_consume(br, ds->lt.len_bits);
2249 ds->state = ST_RD_POS_DATA_1;
2250 break;
2251 } else if (ds->lt.len_avail > ds->lt.len_size)
2252 goto failed;/* Invalid data */
2253 ds->loop = 0;
2254 memset(ds->lt.freq, 0, sizeof(ds->lt.freq));
2255 /* FALL THROUGH */
2256 case ST_RD_LITERAL_3:
2257 i = ds->loop;
2258 while (i < ds->lt.len_avail) {
2259 if (!lzh_br_read_ahead(strm, br,
2260 ds->pt.max_bits)) {
2261 if (last)
2262 goto failed;/* Truncated data.*/
2263 ds->loop = i;
2264 ds->state = ST_RD_LITERAL_3;
2265 return (ARCHIVE_OK);
2266 }
2267 rbits = lzh_br_bits(br, ds->pt.max_bits);
2268 c = lzh_decode_huffman(&(ds->pt), rbits);
2269 if (c > 2) {
2270 /* Note: 'c' will never be more than
2271 * eighteen since it's limited by
2272 * PT_BITLEN_SIZE, which is being set
2273 * to ds->pt.len_size through
2274 * ds->literal_pt_len_size. */
2275 lzh_br_consume(br, ds->pt.bitlen[c]);
2276 c -= 2;
2277 ds->lt.freq[c]++;
2278 ds->lt.bitlen[i++] = c;
2279 } else if (c == 0) {
2280 lzh_br_consume(br, ds->pt.bitlen[c]);
2281 ds->lt.bitlen[i++] = 0;
2282 } else {
2283 /* c == 1 or c == 2 */
2284 int n = (c == 1)?4:9;
2285 if (!lzh_br_read_ahead(strm, br,
2286 ds->pt.bitlen[c] + n)) {
2287 if (last) /* Truncated data. */
2288 goto failed;
2289 ds->loop = i;
2290 ds->state = ST_RD_LITERAL_3;
2291 return (ARCHIVE_OK);
2292 }
2293 lzh_br_consume(br, ds->pt.bitlen[c]);
2294 c = lzh_br_bits(br, n);
2295 lzh_br_consume(br, n);
2296 c += (n == 4)?3:20;
2297 if (i + c > ds->lt.len_avail)
2298 goto failed;/* Invalid data */
2299 memset(&(ds->lt.bitlen[i]), 0, c);
2300 i += c;
2301 }
2302 }
2303 if (i > ds->lt.len_avail ||
2304 !lzh_make_huffman_table(&(ds->lt)))
2305 goto failed;/* Invalid data */
2306 /* FALL THROUGH */
2307 case ST_RD_POS_DATA_1:
2308 /*
2309 * Read a position table compressed in huffman
2310 * coding.
2311 */
2312 ds->pt.len_size = ds->pos_pt_len_size;
2313 ds->pt.len_bits = ds->pos_pt_len_bits;
2314 ds->reading_position = 1;
2315 ds->state = ST_RD_PT_1;
2316 break;
2317 case ST_GET_LITERAL:
2318 return (100);
2319 }
2320 }
2321 failed:
2322 return (ds->error = ARCHIVE_FAILED);
2323 }
2324
2325 static int
lzh_decode_blocks(struct lzh_stream * strm,int last)2326 lzh_decode_blocks(struct lzh_stream *strm, int last)
2327 {
2328 struct lzh_dec *ds = strm->ds;
2329 struct lzh_br bre = ds->br;
2330 struct huffman *lt = &(ds->lt);
2331 struct huffman *pt = &(ds->pt);
2332 unsigned char *w_buff = ds->w_buff;
2333 unsigned char *lt_bitlen = lt->bitlen;
2334 unsigned char *pt_bitlen = pt->bitlen;
2335 int blocks_avail = ds->blocks_avail, c = 0;
2336 int copy_len = ds->copy_len, copy_pos = ds->copy_pos;
2337 int w_pos = ds->w_pos, w_mask = ds->w_mask, w_size = ds->w_size;
2338 int lt_max_bits = lt->max_bits, pt_max_bits = pt->max_bits;
2339 int state = ds->state;
2340
2341 for (;;) {
2342 switch (state) {
2343 case ST_GET_LITERAL:
2344 for (;;) {
2345 if (blocks_avail == 0) {
2346 /* We have decoded all blocks.
2347 * Let's handle next blocks. */
2348 ds->state = ST_RD_BLOCK;
2349 ds->br = bre;
2350 ds->blocks_avail = 0;
2351 ds->w_pos = w_pos;
2352 ds->copy_pos = 0;
2353 return (100);
2354 }
2355
2356 /* lzh_br_read_ahead() always tries to fill the
2357 * cache buffer up. In specific situation we
2358 * are close to the end of the data, the cache
2359 * buffer will not be full and thus we have to
2360 * determine if the cache buffer has some bits
2361 * as much as we need after lzh_br_read_ahead()
2362 * failed. */
2363 if (!lzh_br_read_ahead(strm, &bre,
2364 lt_max_bits)) {
2365 if (!last)
2366 goto next_data;
2367 /* Remaining bits are less than
2368 * maximum bits(lt.max_bits) but maybe
2369 * it still remains as much as we need,
2370 * so we should try to use it with
2371 * dummy bits. */
2372 c = lzh_decode_huffman(lt,
2373 lzh_br_bits_forced(&bre,
2374 lt_max_bits));
2375 lzh_br_consume(&bre, lt_bitlen[c]);
2376 if (!lzh_br_has(&bre, 0))
2377 goto failed;/* Over read. */
2378 } else {
2379 c = lzh_decode_huffman(lt,
2380 lzh_br_bits(&bre, lt_max_bits));
2381 lzh_br_consume(&bre, lt_bitlen[c]);
2382 }
2383 blocks_avail--;
2384 if ((unsigned int)c > UCHAR_MAX)
2385 /* Current block is a match data. */
2386 break;
2387 /*
2388 * 'c' is exactly a literal code.
2389 */
2390 /* Save a decoded code to reference it
2391 * afterward. */
2392 w_buff[w_pos] = c;
2393 if (++w_pos >= w_size) {
2394 w_pos = 0;
2395 lzh_emit_window(strm, w_size);
2396 goto next_data;
2397 }
2398 }
2399 /* 'c' is the length of a match pattern we have
2400 * already extracted, which has be stored in
2401 * window(ds->w_buff). */
2402 copy_len = c - (UCHAR_MAX + 1) + MINMATCH;
2403 /* FALL THROUGH */
2404 case ST_GET_POS_1:
2405 /*
2406 * Get a reference position.
2407 */
2408 if (!lzh_br_read_ahead(strm, &bre, pt_max_bits)) {
2409 if (!last) {
2410 state = ST_GET_POS_1;
2411 ds->copy_len = copy_len;
2412 goto next_data;
2413 }
2414 copy_pos = lzh_decode_huffman(pt,
2415 lzh_br_bits_forced(&bre, pt_max_bits));
2416 lzh_br_consume(&bre, pt_bitlen[copy_pos]);
2417 if (!lzh_br_has(&bre, 0))
2418 goto failed;/* Over read. */
2419 } else {
2420 copy_pos = lzh_decode_huffman(pt,
2421 lzh_br_bits(&bre, pt_max_bits));
2422 lzh_br_consume(&bre, pt_bitlen[copy_pos]);
2423 }
2424 /* FALL THROUGH */
2425 case ST_GET_POS_2:
2426 if (copy_pos > 1) {
2427 /* We need an additional adjustment number to
2428 * the position. */
2429 int p = copy_pos - 1;
2430 if (!lzh_br_read_ahead(strm, &bre, p)) {
2431 if (last)
2432 goto failed;/* Truncated data.*/
2433 state = ST_GET_POS_2;
2434 ds->copy_len = copy_len;
2435 ds->copy_pos = copy_pos;
2436 goto next_data;
2437 }
2438 copy_pos = (1 << p) + lzh_br_bits(&bre, p);
2439 lzh_br_consume(&bre, p);
2440 }
2441 /* The position is actually a distance from the last
2442 * code we had extracted and thus we have to convert
2443 * it to a position of the window. */
2444 copy_pos = (w_pos - copy_pos - 1) & w_mask;
2445 /* FALL THROUGH */
2446 case ST_COPY_DATA:
2447 /*
2448 * Copy `copy_len' bytes as extracted data from
2449 * the window into the output buffer.
2450 */
2451 for (;;) {
2452 int l;
2453
2454 l = copy_len;
2455 if (copy_pos > w_pos) {
2456 if (l > w_size - copy_pos)
2457 l = w_size - copy_pos;
2458 } else {
2459 if (l > w_size - w_pos)
2460 l = w_size - w_pos;
2461 }
2462 if ((copy_pos + l < w_pos)
2463 || (w_pos + l < copy_pos)) {
2464 /* No overlap. */
2465 memcpy(w_buff + w_pos,
2466 w_buff + copy_pos, l);
2467 } else {
2468 const unsigned char *s;
2469 unsigned char *d;
2470 int li;
2471
2472 d = w_buff + w_pos;
2473 s = w_buff + copy_pos;
2474 for (li = 0; li < l-1;) {
2475 d[li] = s[li];li++;
2476 d[li] = s[li];li++;
2477 }
2478 if (li < l)
2479 d[li] = s[li];
2480 }
2481 w_pos += l;
2482 if (w_pos == w_size) {
2483 w_pos = 0;
2484 lzh_emit_window(strm, w_size);
2485 if (copy_len <= l)
2486 state = ST_GET_LITERAL;
2487 else {
2488 state = ST_COPY_DATA;
2489 ds->copy_len = copy_len - l;
2490 ds->copy_pos =
2491 (copy_pos + l) & w_mask;
2492 }
2493 goto next_data;
2494 }
2495 if (copy_len <= l)
2496 /* A copy of current pattern ended. */
2497 break;
2498 copy_len -= l;
2499 copy_pos = (copy_pos + l) & w_mask;
2500 }
2501 state = ST_GET_LITERAL;
2502 break;
2503 }
2504 }
2505 failed:
2506 return (ds->error = ARCHIVE_FAILED);
2507 next_data:
2508 ds->br = bre;
2509 ds->blocks_avail = blocks_avail;
2510 ds->state = state;
2511 ds->w_pos = w_pos;
2512 return (ARCHIVE_OK);
2513 }
2514
2515 static int
lzh_huffman_init(struct huffman * hf,size_t len_size,int tbl_bits)2516 lzh_huffman_init(struct huffman *hf, size_t len_size, int tbl_bits)
2517 {
2518 int bits;
2519
2520 if (hf->bitlen == NULL) {
2521 hf->bitlen = malloc(len_size * sizeof(hf->bitlen[0]));
2522 if (hf->bitlen == NULL)
2523 return (ARCHIVE_FATAL);
2524 }
2525 if (hf->tbl == NULL) {
2526 if (tbl_bits < HTBL_BITS)
2527 bits = tbl_bits;
2528 else
2529 bits = HTBL_BITS;
2530 hf->tbl = malloc(((size_t)1 << bits) * sizeof(hf->tbl[0]));
2531 if (hf->tbl == NULL)
2532 return (ARCHIVE_FATAL);
2533 }
2534 if (hf->tree == NULL && tbl_bits > HTBL_BITS) {
2535 hf->tree_avail = 1 << (tbl_bits - HTBL_BITS + 4);
2536 hf->tree = malloc(hf->tree_avail * sizeof(hf->tree[0]));
2537 if (hf->tree == NULL)
2538 return (ARCHIVE_FATAL);
2539 }
2540 hf->len_size = (int)len_size;
2541 hf->tbl_bits = tbl_bits;
2542 return (ARCHIVE_OK);
2543 }
2544
2545 static void
lzh_huffman_free(struct huffman * hf)2546 lzh_huffman_free(struct huffman *hf)
2547 {
2548 free(hf->bitlen);
2549 free(hf->tbl);
2550 free(hf->tree);
2551 }
2552
2553 static const char bitlen_tbl[0x400] = {
2554 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2555 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2556 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2557 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2558 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2559 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2560 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2561 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2562 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2563 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2564 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2565 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2566 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2567 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2568 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2569 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2570 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2571 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2572 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2573 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2574 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2575 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2576 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2577 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2578 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2579 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2580 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2581 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2582 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2583 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2584 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2585 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2586 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2587 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2588 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2589 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2590 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2591 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2592 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2593 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2594 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2595 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2596 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2597 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2598 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2599 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2600 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2601 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2602 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2603 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2604 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2605 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2606 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2607 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2608 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2609 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2610 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2611 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2612 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2613 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2614 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
2615 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
2616 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
2617 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 16, 0
2618 };
2619 static int
lzh_read_pt_bitlen(struct lzh_stream * strm,int start,int end)2620 lzh_read_pt_bitlen(struct lzh_stream *strm, int start, int end)
2621 {
2622 struct lzh_dec *ds = strm->ds;
2623 struct lzh_br *br = &(ds->br);
2624 int c, i;
2625
2626 for (i = start; i < end; ) {
2627 /*
2628 * bit pattern the number we need
2629 * 000 -> 0
2630 * 001 -> 1
2631 * 010 -> 2
2632 * ...
2633 * 110 -> 6
2634 * 1110 -> 7
2635 * 11110 -> 8
2636 * ...
2637 * 1111111111110 -> 16
2638 */
2639 if (!lzh_br_read_ahead(strm, br, 3))
2640 return (i);
2641 if ((c = lzh_br_bits(br, 3)) == 7) {
2642 if (!lzh_br_read_ahead(strm, br, 13))
2643 return (i);
2644 c = bitlen_tbl[lzh_br_bits(br, 13) & 0x3FF];
2645 if (c)
2646 lzh_br_consume(br, c - 3);
2647 else
2648 return (-1);/* Invalid data. */
2649 } else
2650 lzh_br_consume(br, 3);
2651 ds->pt.bitlen[i++] = c;
2652 ds->pt.freq[c]++;
2653 }
2654 return (i);
2655 }
2656
2657 static int
lzh_make_fake_table(struct huffman * hf,uint16_t c)2658 lzh_make_fake_table(struct huffman *hf, uint16_t c)
2659 {
2660 if (c >= hf->len_size)
2661 return (0);
2662 hf->tbl[0] = c;
2663 hf->max_bits = 0;
2664 hf->shift_bits = 0;
2665 hf->bitlen[hf->tbl[0]] = 0;
2666 return (1);
2667 }
2668
2669 /*
2670 * Make a huffman coding table.
2671 */
2672 static int
lzh_make_huffman_table(struct huffman * hf)2673 lzh_make_huffman_table(struct huffman *hf)
2674 {
2675 uint16_t *tbl;
2676 const unsigned char *bitlen;
2677 int bitptn[17], weight[17];
2678 int i, maxbits = 0, ptn, tbl_size, w;
2679 int diffbits, len_avail;
2680
2681 /*
2682 * Initialize bit patterns.
2683 */
2684 ptn = 0;
2685 for (i = 1, w = 1 << 15; i <= 16; i++, w >>= 1) {
2686 bitptn[i] = ptn;
2687 weight[i] = w;
2688 if (hf->freq[i]) {
2689 ptn += hf->freq[i] * w;
2690 maxbits = i;
2691 }
2692 }
2693 if (ptn != 0x10000 || maxbits > hf->tbl_bits)
2694 return (0);/* Invalid */
2695
2696 hf->max_bits = maxbits;
2697
2698 /*
2699 * Cut out extra bits which we won't house in the table.
2700 * This preparation reduces the same calculation in the for-loop
2701 * making the table.
2702 */
2703 if (maxbits < 16) {
2704 int ebits = 16 - maxbits;
2705 for (i = 1; i <= maxbits; i++) {
2706 bitptn[i] >>= ebits;
2707 weight[i] >>= ebits;
2708 }
2709 }
2710 if (maxbits > HTBL_BITS) {
2711 unsigned htbl_max;
2712 uint16_t *p;
2713
2714 diffbits = maxbits - HTBL_BITS;
2715 for (i = 1; i <= HTBL_BITS; i++) {
2716 bitptn[i] >>= diffbits;
2717 weight[i] >>= diffbits;
2718 }
2719 htbl_max = bitptn[HTBL_BITS] +
2720 weight[HTBL_BITS] * hf->freq[HTBL_BITS];
2721 p = &(hf->tbl[htbl_max]);
2722 while (p < &hf->tbl[1U<<HTBL_BITS])
2723 *p++ = 0;
2724 } else
2725 diffbits = 0;
2726 hf->shift_bits = diffbits;
2727
2728 /*
2729 * Make the table.
2730 */
2731 tbl_size = 1 << HTBL_BITS;
2732 tbl = hf->tbl;
2733 bitlen = hf->bitlen;
2734 len_avail = hf->len_avail;
2735 hf->tree_used = 0;
2736 for (i = 0; i < len_avail; i++) {
2737 uint16_t *p;
2738 int len, cnt;
2739 uint16_t bit;
2740 int extlen;
2741 struct htree_t *ht;
2742
2743 if (bitlen[i] == 0)
2744 continue;
2745 /* Get a bit pattern */
2746 len = bitlen[i];
2747 ptn = bitptn[len];
2748 cnt = weight[len];
2749 if (len <= HTBL_BITS) {
2750 /* Calculate next bit pattern */
2751 if ((bitptn[len] = ptn + cnt) > tbl_size)
2752 return (0);/* Invalid */
2753 /* Update the table */
2754 p = &(tbl[ptn]);
2755 if (cnt > 7) {
2756 uint16_t *pc;
2757
2758 cnt -= 8;
2759 pc = &p[cnt];
2760 pc[0] = (uint16_t)i;
2761 pc[1] = (uint16_t)i;
2762 pc[2] = (uint16_t)i;
2763 pc[3] = (uint16_t)i;
2764 pc[4] = (uint16_t)i;
2765 pc[5] = (uint16_t)i;
2766 pc[6] = (uint16_t)i;
2767 pc[7] = (uint16_t)i;
2768 if (cnt > 7) {
2769 cnt -= 8;
2770 memcpy(&p[cnt], pc,
2771 8 * sizeof(uint16_t));
2772 pc = &p[cnt];
2773 while (cnt > 15) {
2774 cnt -= 16;
2775 memcpy(&p[cnt], pc,
2776 16 * sizeof(uint16_t));
2777 }
2778 }
2779 if (cnt)
2780 memcpy(p, pc, cnt * sizeof(uint16_t));
2781 } else {
2782 while (cnt > 1) {
2783 p[--cnt] = (uint16_t)i;
2784 p[--cnt] = (uint16_t)i;
2785 }
2786 if (cnt)
2787 p[--cnt] = (uint16_t)i;
2788 }
2789 continue;
2790 }
2791
2792 /*
2793 * A bit length is too big to be housed to a direct table,
2794 * so we use a tree model for its extra bits.
2795 */
2796 bitptn[len] = ptn + cnt;
2797 bit = 1U << (diffbits -1);
2798 extlen = len - HTBL_BITS;
2799
2800 p = &(tbl[ptn >> diffbits]);
2801 if (*p == 0) {
2802 *p = len_avail + hf->tree_used;
2803 ht = &(hf->tree[hf->tree_used++]);
2804 if (hf->tree_used > hf->tree_avail)
2805 return (0);/* Invalid */
2806 ht->left = 0;
2807 ht->right = 0;
2808 } else {
2809 if (*p < len_avail ||
2810 *p >= (len_avail + hf->tree_used))
2811 return (0);/* Invalid */
2812 ht = &(hf->tree[*p - len_avail]);
2813 }
2814 while (--extlen > 0) {
2815 if (ptn & bit) {
2816 if (ht->left < len_avail) {
2817 ht->left = len_avail + hf->tree_used;
2818 ht = &(hf->tree[hf->tree_used++]);
2819 if (hf->tree_used > hf->tree_avail)
2820 return (0);/* Invalid */
2821 ht->left = 0;
2822 ht->right = 0;
2823 } else {
2824 ht = &(hf->tree[ht->left - len_avail]);
2825 }
2826 } else {
2827 if (ht->right < len_avail) {
2828 ht->right = len_avail + hf->tree_used;
2829 ht = &(hf->tree[hf->tree_used++]);
2830 if (hf->tree_used > hf->tree_avail)
2831 return (0);/* Invalid */
2832 ht->left = 0;
2833 ht->right = 0;
2834 } else {
2835 ht = &(hf->tree[ht->right - len_avail]);
2836 }
2837 }
2838 bit >>= 1;
2839 }
2840 if (ptn & bit) {
2841 if (ht->left != 0)
2842 return (0);/* Invalid */
2843 ht->left = (uint16_t)i;
2844 } else {
2845 if (ht->right != 0)
2846 return (0);/* Invalid */
2847 ht->right = (uint16_t)i;
2848 }
2849 }
2850 return (1);
2851 }
2852
2853 static int
lzh_decode_huffman_tree(struct huffman * hf,unsigned rbits,int c)2854 lzh_decode_huffman_tree(struct huffman *hf, unsigned rbits, int c)
2855 {
2856 struct htree_t *ht;
2857 int extlen;
2858
2859 ht = hf->tree;
2860 extlen = hf->shift_bits;
2861 while (c >= hf->len_avail) {
2862 c -= hf->len_avail;
2863 if (extlen-- <= 0 || c >= hf->tree_used)
2864 return (0);
2865 if (rbits & (1U << extlen))
2866 c = ht[c].left;
2867 else
2868 c = ht[c].right;
2869 }
2870 return (c);
2871 }
2872
2873 static inline int
lzh_decode_huffman(struct huffman * hf,unsigned rbits)2874 lzh_decode_huffman(struct huffman *hf, unsigned rbits)
2875 {
2876 int c;
2877 /*
2878 * At first search an index table for a bit pattern.
2879 * If it fails, search a huffman tree for.
2880 */
2881 c = hf->tbl[rbits >> hf->shift_bits];
2882 if (c < hf->len_avail || hf->len_avail == 0)
2883 return (c);
2884 /* This bit pattern needs to be found out at a huffman tree. */
2885 return (lzh_decode_huffman_tree(hf, rbits, c));
2886 }
2887