1 /*-
2 * Copyright (c) 2008-2014 Michihiro NAKAJIMA
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26 #include "archive_platform.h"
27
28 #ifdef HAVE_ERRNO_H
29 #include <errno.h>
30 #endif
31 #ifdef HAVE_LIMITS_H
32 #include <limits.h>
33 #endif
34 #ifdef HAVE_STDLIB_H
35 #include <stdlib.h>
36 #endif
37 #ifdef HAVE_STRING_H
38 #include <string.h>
39 #endif
40
41 #include "archive.h"
42 #include "archive_entry.h"
43 #include "archive_entry_locale.h"
44 #include "archive_private.h"
45 #include "archive_read_private.h"
46 #include "archive_endian.h"
47
48
49 #define MAXMATCH 256 /* Maximum match length. */
50 #define MINMATCH 3 /* Minimum match length. */
51 /*
52 * Literal table format:
53 * +0 +256 +510
54 * +---------------+-------------------------+
55 * | literal code | match length |
56 * | 0 ... 255 | MINMATCH ... MAXMATCH |
57 * +---------------+-------------------------+
58 * <--- LT_BITLEN_SIZE --->
59 */
60 /* Literal table size. */
61 #define LT_BITLEN_SIZE (UCHAR_MAX + 1 + MAXMATCH - MINMATCH + 1)
62 /* Position table size.
63 * Note: this used for both position table and pre literal table.*/
64 #define PT_BITLEN_SIZE (3 + 16)
65
66 struct lzh_dec {
67 /* Decoding status. */
68 int state;
69
70 /*
71 * Window to see last 8Ki(lh5),32Ki(lh6),64Ki(lh7) bytes of decoded
72 * data.
73 */
74 int w_size;
75 int w_mask;
76 /* Window buffer, which is a loop buffer. */
77 unsigned char *w_buff;
78 /* The insert position to the window. */
79 int w_pos;
80 /* The position where we can copy decoded code from the window. */
81 int copy_pos;
82 /* The length how many bytes we can copy decoded code from
83 * the window. */
84 int copy_len;
85
86 /*
87 * Bit stream reader.
88 */
89 struct lzh_br {
90 #define CACHE_TYPE uint64_t
91 #define CACHE_BITS (8 * sizeof(CACHE_TYPE))
92 /* Cache buffer. */
93 CACHE_TYPE cache_buffer;
94 /* Indicates how many bits avail in cache_buffer. */
95 int cache_avail;
96 } br;
97
98 /*
99 * Huffman coding.
100 */
101 struct huffman {
102 int len_size;
103 int len_avail;
104 int len_bits;
105 int freq[17];
106 unsigned char *bitlen;
107
108 /*
109 * Use a index table. It's faster than searching a huffman
110 * coding tree, which is a binary tree. But a use of a large
111 * index table causes L1 cache read miss many times.
112 */
113 #define HTBL_BITS 10
114 int max_bits;
115 int shift_bits;
116 int tbl_bits;
117 int tree_used;
118 int tree_avail;
119 /* Direct access table. */
120 uint16_t *tbl;
121 /* Binary tree table for extra bits over the direct access. */
122 struct htree_t {
123 uint16_t left;
124 uint16_t right;
125 } *tree;
126 } lt, pt;
127
128 int blocks_avail;
129 int pos_pt_len_size;
130 int pos_pt_len_bits;
131 int literal_pt_len_size;
132 int literal_pt_len_bits;
133 int reading_position;
134 int loop;
135 int error;
136 };
137
138 struct lzh_stream {
139 const unsigned char *next_in;
140 int avail_in;
141 int64_t total_in;
142 const unsigned char *ref_ptr;
143 int avail_out;
144 int64_t total_out;
145 struct lzh_dec *ds;
146 };
147
148 struct lha {
149 /* entry_bytes_remaining is the number of bytes we expect. */
150 int64_t entry_offset;
151 int64_t entry_bytes_remaining;
152 int64_t entry_unconsumed;
153 uint16_t entry_crc_calculated;
154
155 size_t header_size; /* header size */
156 unsigned char level; /* header level */
157 char method[3]; /* compress type */
158 int64_t compsize; /* compressed data size */
159 int64_t origsize; /* original file size */
160 int setflag;
161 #define BIRTHTIME_IS_SET 1
162 #define ATIME_IS_SET 2
163 #define UNIX_MODE_IS_SET 4
164 #define CRC_IS_SET 8
165 time_t birthtime;
166 long birthtime_tv_nsec;
167 time_t mtime;
168 long mtime_tv_nsec;
169 time_t atime;
170 long atime_tv_nsec;
171 mode_t mode;
172 int64_t uid;
173 int64_t gid;
174 struct archive_string uname;
175 struct archive_string gname;
176 uint16_t header_crc;
177 uint16_t crc;
178 /* dirname and filename could be in different codepages */
179 struct archive_string_conv *sconv_dir;
180 struct archive_string_conv *sconv_fname;
181 struct archive_string_conv *opt_sconv;
182
183 struct archive_string dirname;
184 struct archive_string filename;
185 struct archive_wstring ws;
186
187 unsigned char dos_attr;
188
189 /* Flag to mark progress that an archive was read their first header.*/
190 char found_first_header;
191 /* Flag to mark that indicates an empty directory. */
192 char directory;
193
194 /* Flags to mark progress of decompression. */
195 char decompress_init;
196 char end_of_entry;
197 char end_of_entry_cleanup;
198 char entry_is_compressed;
199
200 char format_name[64];
201
202 struct lzh_stream strm;
203 };
204
205 /*
206 * LHA header common member offset.
207 */
208 #define H_METHOD_OFFSET 2 /* Compress type. */
209 #define H_ATTR_OFFSET 19 /* DOS attribute. */
210 #define H_LEVEL_OFFSET 20 /* Header Level. */
211 #define H_SIZE 22 /* Minimum header size. */
212
213 static int archive_read_format_lha_bid(struct archive_read *, int);
214 static int archive_read_format_lha_options(struct archive_read *,
215 const char *, const char *);
216 static int archive_read_format_lha_read_header(struct archive_read *,
217 struct archive_entry *);
218 static int archive_read_format_lha_read_data(struct archive_read *,
219 const void **, size_t *, int64_t *);
220 static int archive_read_format_lha_read_data_skip(struct archive_read *);
221 static int archive_read_format_lha_cleanup(struct archive_read *);
222
223 static void lha_replace_path_separator(struct lha *,
224 struct archive_entry *);
225 static int lha_read_file_header_0(struct archive_read *, struct lha *);
226 static int lha_read_file_header_1(struct archive_read *, struct lha *);
227 static int lha_read_file_header_2(struct archive_read *, struct lha *);
228 static int lha_read_file_header_3(struct archive_read *, struct lha *);
229 static int lha_read_file_extended_header(struct archive_read *,
230 struct lha *, uint16_t *, int, uint64_t, size_t *);
231 static size_t lha_check_header_format(const void *);
232 static int lha_skip_sfx(struct archive_read *);
233 static time_t lha_dos_time(const unsigned char *);
234 static time_t lha_win_time(uint64_t, long *);
235 static unsigned char lha_calcsum(unsigned char, const void *,
236 int, size_t);
237 static int lha_parse_linkname(struct archive_wstring *,
238 struct archive_wstring *);
239 static int lha_read_data_none(struct archive_read *, const void **,
240 size_t *, int64_t *);
241 static int lha_read_data_lzh(struct archive_read *, const void **,
242 size_t *, int64_t *);
243 static void lha_crc16_init(void);
244 static uint16_t lha_crc16(uint16_t, const void *, size_t);
245 static int lzh_decode_init(struct lzh_stream *, const char *);
246 static void lzh_decode_free(struct lzh_stream *);
247 static int lzh_decode(struct lzh_stream *, int);
248 static int lzh_br_fillup(struct lzh_stream *, struct lzh_br *);
249 static int lzh_huffman_init(struct huffman *, size_t, int);
250 static void lzh_huffman_free(struct huffman *);
251 static int lzh_read_pt_bitlen(struct lzh_stream *, int start, int end);
252 static int lzh_make_fake_table(struct huffman *, uint16_t);
253 static int lzh_make_huffman_table(struct huffman *);
254 static inline int lzh_decode_huffman(struct huffman *, unsigned);
255 static int lzh_decode_huffman_tree(struct huffman *, unsigned, int);
256
257
258 int
archive_read_support_format_lha(struct archive * _a)259 archive_read_support_format_lha(struct archive *_a)
260 {
261 struct archive_read *a = (struct archive_read *)_a;
262 struct lha *lha;
263 int r;
264
265 archive_check_magic(_a, ARCHIVE_READ_MAGIC,
266 ARCHIVE_STATE_NEW, "archive_read_support_format_lha");
267
268 lha = calloc(1, sizeof(*lha));
269 if (lha == NULL) {
270 archive_set_error(&a->archive, ENOMEM,
271 "Can't allocate lha data");
272 return (ARCHIVE_FATAL);
273 }
274 archive_string_init(&lha->ws);
275
276 r = __archive_read_register_format(a,
277 lha,
278 "lha",
279 archive_read_format_lha_bid,
280 archive_read_format_lha_options,
281 archive_read_format_lha_read_header,
282 archive_read_format_lha_read_data,
283 archive_read_format_lha_read_data_skip,
284 NULL,
285 archive_read_format_lha_cleanup,
286 NULL,
287 NULL);
288
289 if (r != ARCHIVE_OK)
290 free(lha);
291 return (ARCHIVE_OK);
292 }
293
294 static size_t
lha_check_header_format(const void * h)295 lha_check_header_format(const void *h)
296 {
297 const unsigned char *p = h;
298 size_t next_skip_bytes;
299
300 switch (p[H_METHOD_OFFSET+3]) {
301 /*
302 * "-lh0-" ... "-lh7-" "-lhd-"
303 * "-lzs-" "-lz5-"
304 */
305 case '0': case '1': case '2': case '3':
306 case '4': case '5': case '6': case '7':
307 case 'd':
308 case 's':
309 next_skip_bytes = 4;
310
311 /* b0 == 0 means the end of an LHa archive file. */
312 if (p[0] == 0)
313 break;
314 if (p[H_METHOD_OFFSET] != '-' || p[H_METHOD_OFFSET+1] != 'l'
315 || p[H_METHOD_OFFSET+4] != '-')
316 break;
317
318 if (p[H_METHOD_OFFSET+2] == 'h') {
319 /* "-lh?-" */
320 if (p[H_METHOD_OFFSET+3] == 's')
321 break;
322 if (p[H_LEVEL_OFFSET] == 0)
323 return (0);
324 if (p[H_LEVEL_OFFSET] <= 3 && p[H_ATTR_OFFSET] == 0x20)
325 return (0);
326 }
327 if (p[H_METHOD_OFFSET+2] == 'z') {
328 /* LArc extensions: -lzs-,-lz4- and -lz5- */
329 if (p[H_LEVEL_OFFSET] != 0)
330 break;
331 if (p[H_METHOD_OFFSET+3] == 's'
332 || p[H_METHOD_OFFSET+3] == '4'
333 || p[H_METHOD_OFFSET+3] == '5')
334 return (0);
335 }
336 break;
337 case 'h': next_skip_bytes = 1; break;
338 case 'z': next_skip_bytes = 1; break;
339 case 'l': next_skip_bytes = 2; break;
340 case '-': next_skip_bytes = 3; break;
341 default : next_skip_bytes = 4; break;
342 }
343
344 return (next_skip_bytes);
345 }
346
347 static int
archive_read_format_lha_bid(struct archive_read * a,int best_bid)348 archive_read_format_lha_bid(struct archive_read *a, int best_bid)
349 {
350 const char *p;
351 const void *buff;
352 ssize_t bytes_avail, offset, window;
353 size_t next;
354
355 /* If there's already a better bid than we can ever
356 make, don't bother testing. */
357 if (best_bid > 30)
358 return (-1);
359
360 if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL)
361 return (-1);
362
363 if (lha_check_header_format(p) == 0)
364 return (30);
365
366 if (p[0] == 'M' && p[1] == 'Z') {
367 /* PE file */
368 offset = 0;
369 window = 4096;
370 while (offset < (1024 * 20)) {
371 buff = __archive_read_ahead(a, offset + window,
372 &bytes_avail);
373 if (buff == NULL) {
374 /* Remaining bytes are less than window. */
375 window >>= 1;
376 if (window < (H_SIZE + 3))
377 return (0);
378 continue;
379 }
380 p = (const char *)buff + offset;
381 while (p + H_SIZE < (const char *)buff + bytes_avail) {
382 if ((next = lha_check_header_format(p)) == 0)
383 return (30);
384 p += next;
385 }
386 offset = p - (const char *)buff;
387 }
388 }
389 return (0);
390 }
391
392 static int
archive_read_format_lha_options(struct archive_read * a,const char * key,const char * val)393 archive_read_format_lha_options(struct archive_read *a,
394 const char *key, const char *val)
395 {
396 struct lha *lha;
397 int ret = ARCHIVE_FAILED;
398
399 lha = (struct lha *)(a->format->data);
400 if (strcmp(key, "hdrcharset") == 0) {
401 if (val == NULL || val[0] == 0)
402 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
403 "lha: hdrcharset option needs a character-set name");
404 else {
405 lha->opt_sconv =
406 archive_string_conversion_from_charset(
407 &a->archive, val, 0);
408 if (lha->opt_sconv != NULL)
409 ret = ARCHIVE_OK;
410 else
411 ret = ARCHIVE_FATAL;
412 }
413 return (ret);
414 }
415
416 /* Note: The "warn" return is just to inform the options
417 * supervisor that we didn't handle it. It will generate
418 * a suitable error if no one used this option. */
419 return (ARCHIVE_WARN);
420 }
421
422 static int
lha_skip_sfx(struct archive_read * a)423 lha_skip_sfx(struct archive_read *a)
424 {
425 const void *h;
426 const char *p, *q;
427 size_t next, skip;
428 ssize_t bytes, window;
429
430 window = 4096;
431 for (;;) {
432 h = __archive_read_ahead(a, window, &bytes);
433 if (h == NULL) {
434 /* Remaining bytes are less than window. */
435 window >>= 1;
436 if (window < (H_SIZE + 3))
437 goto fatal;
438 continue;
439 }
440 if (bytes < H_SIZE)
441 goto fatal;
442 p = h;
443 q = p + bytes;
444
445 /*
446 * Scan ahead until we find something that looks
447 * like the lha header.
448 */
449 while (p + H_SIZE < q) {
450 if ((next = lha_check_header_format(p)) == 0) {
451 skip = p - (const char *)h;
452 __archive_read_consume(a, skip);
453 return (ARCHIVE_OK);
454 }
455 p += next;
456 }
457 skip = p - (const char *)h;
458 __archive_read_consume(a, skip);
459 }
460 fatal:
461 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
462 "Couldn't find out LHa header");
463 return (ARCHIVE_FATAL);
464 }
465
466 static int
truncated_error(struct archive_read * a)467 truncated_error(struct archive_read *a)
468 {
469 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
470 "Truncated LHa header");
471 return (ARCHIVE_FATAL);
472 }
473
474 static int
archive_read_format_lha_read_header(struct archive_read * a,struct archive_entry * entry)475 archive_read_format_lha_read_header(struct archive_read *a,
476 struct archive_entry *entry)
477 {
478 struct archive_wstring linkname;
479 struct archive_wstring pathname;
480 struct lha *lha;
481 const unsigned char *p;
482 const char *signature;
483 int err;
484 struct archive_mstring conv_buffer;
485 const wchar_t *conv_buffer_p;
486
487 lha_crc16_init();
488
489 a->archive.archive_format = ARCHIVE_FORMAT_LHA;
490 if (a->archive.archive_format_name == NULL)
491 a->archive.archive_format_name = "lha";
492
493 lha = (struct lha *)(a->format->data);
494 lha->decompress_init = 0;
495 lha->end_of_entry = 0;
496 lha->end_of_entry_cleanup = 0;
497 lha->entry_unconsumed = 0;
498
499 if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL) {
500 /*
501 * LHa archiver added 0 to the tail of its archive file as
502 * the mark of the end of the archive.
503 */
504 signature = __archive_read_ahead(a, sizeof(signature[0]), NULL);
505 if (signature == NULL || signature[0] == 0)
506 return (ARCHIVE_EOF);
507 return (truncated_error(a));
508 }
509
510 signature = (const char *)p;
511 if (lha->found_first_header == 0 &&
512 signature[0] == 'M' && signature[1] == 'Z') {
513 /* This is an executable? Must be self-extracting... */
514 err = lha_skip_sfx(a);
515 if (err < ARCHIVE_WARN)
516 return (err);
517
518 if ((p = __archive_read_ahead(a, sizeof(*p), NULL)) == NULL)
519 return (truncated_error(a));
520 signature = (const char *)p;
521 }
522 /* signature[0] == 0 means the end of an LHa archive file. */
523 if (signature[0] == 0)
524 return (ARCHIVE_EOF);
525
526 /*
527 * Check the header format and method type.
528 */
529 if (lha_check_header_format(p) != 0) {
530 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
531 "Bad LHa file");
532 return (ARCHIVE_FATAL);
533 }
534
535 /* We've found the first header. */
536 lha->found_first_header = 1;
537 /* Set a default value and common data */
538 lha->header_size = 0;
539 lha->level = p[H_LEVEL_OFFSET];
540 lha->method[0] = p[H_METHOD_OFFSET+1];
541 lha->method[1] = p[H_METHOD_OFFSET+2];
542 lha->method[2] = p[H_METHOD_OFFSET+3];
543 if (memcmp(lha->method, "lhd", 3) == 0)
544 lha->directory = 1;
545 else
546 lha->directory = 0;
547 if (memcmp(lha->method, "lh0", 3) == 0 ||
548 memcmp(lha->method, "lz4", 3) == 0)
549 lha->entry_is_compressed = 0;
550 else
551 lha->entry_is_compressed = 1;
552
553 lha->compsize = 0;
554 lha->origsize = 0;
555 lha->setflag = 0;
556 lha->birthtime = 0;
557 lha->birthtime_tv_nsec = 0;
558 lha->mtime = 0;
559 lha->mtime_tv_nsec = 0;
560 lha->atime = 0;
561 lha->atime_tv_nsec = 0;
562 lha->mode = (lha->directory)? 0777 : 0666;
563 lha->uid = 0;
564 lha->gid = 0;
565 archive_string_empty(&lha->dirname);
566 archive_string_empty(&lha->filename);
567 lha->dos_attr = 0;
568 if (lha->opt_sconv != NULL) {
569 lha->sconv_dir = lha->opt_sconv;
570 lha->sconv_fname = lha->opt_sconv;
571 } else {
572 lha->sconv_dir = NULL;
573 lha->sconv_fname = NULL;
574 }
575
576 switch (p[H_LEVEL_OFFSET]) {
577 case 0:
578 err = lha_read_file_header_0(a, lha);
579 break;
580 case 1:
581 err = lha_read_file_header_1(a, lha);
582 break;
583 case 2:
584 err = lha_read_file_header_2(a, lha);
585 break;
586 case 3:
587 err = lha_read_file_header_3(a, lha);
588 break;
589 default:
590 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
591 "Unsupported LHa header level %d", p[H_LEVEL_OFFSET]);
592 err = ARCHIVE_FATAL;
593 break;
594 }
595 if (err < ARCHIVE_WARN)
596 return (err);
597
598
599 if (!lha->directory && archive_strlen(&lha->filename) == 0)
600 /* The filename has not been set */
601 return (truncated_error(a));
602
603 /*
604 * Make a pathname from a dirname and a filename, after converting to Unicode.
605 * This is because codepages might differ between dirname and filename.
606 */
607 archive_string_init(&pathname);
608 archive_string_init(&linkname);
609 archive_string_init(&conv_buffer.aes_mbs);
610 archive_string_init(&conv_buffer.aes_mbs_in_locale);
611 archive_string_init(&conv_buffer.aes_utf8);
612 archive_string_init(&conv_buffer.aes_wcs);
613 if (0 != archive_mstring_copy_mbs_len_l(&conv_buffer, lha->dirname.s, lha->dirname.length, lha->sconv_dir)) {
614 archive_set_error(&a->archive,
615 ARCHIVE_ERRNO_FILE_FORMAT,
616 "Pathname cannot be converted "
617 "from %s to Unicode.",
618 archive_string_conversion_charset_name(lha->sconv_dir));
619 err = ARCHIVE_FATAL;
620 } else if (0 != archive_mstring_get_wcs(&a->archive, &conv_buffer, &conv_buffer_p))
621 err = ARCHIVE_FATAL;
622 if (err == ARCHIVE_FATAL) {
623 archive_mstring_clean(&conv_buffer);
624 archive_wstring_free(&pathname);
625 archive_wstring_free(&linkname);
626 return (err);
627 }
628 archive_wstring_copy(&pathname, &conv_buffer.aes_wcs);
629
630 archive_string_empty(&conv_buffer.aes_mbs);
631 archive_string_empty(&conv_buffer.aes_mbs_in_locale);
632 archive_string_empty(&conv_buffer.aes_utf8);
633 archive_wstring_empty(&conv_buffer.aes_wcs);
634 if (0 != archive_mstring_copy_mbs_len_l(&conv_buffer, lha->filename.s, lha->filename.length, lha->sconv_fname)) {
635 archive_set_error(&a->archive,
636 ARCHIVE_ERRNO_FILE_FORMAT,
637 "Pathname cannot be converted "
638 "from %s to Unicode.",
639 archive_string_conversion_charset_name(lha->sconv_fname));
640 err = ARCHIVE_FATAL;
641 }
642 else if (0 != archive_mstring_get_wcs(&a->archive, &conv_buffer, &conv_buffer_p))
643 err = ARCHIVE_FATAL;
644 if (err == ARCHIVE_FATAL) {
645 archive_mstring_clean(&conv_buffer);
646 archive_wstring_free(&pathname);
647 archive_wstring_free(&linkname);
648 return (err);
649 }
650 archive_wstring_concat(&pathname, &conv_buffer.aes_wcs);
651 archive_mstring_clean(&conv_buffer);
652
653 if ((lha->mode & AE_IFMT) == AE_IFLNK) {
654 /*
655 * Extract the symlink-name if it's included in the pathname.
656 */
657 if (!lha_parse_linkname(&linkname, &pathname)) {
658 /* We couldn't get the symlink-name. */
659 archive_set_error(&a->archive,
660 ARCHIVE_ERRNO_FILE_FORMAT,
661 "Unknown symlink-name");
662 archive_wstring_free(&pathname);
663 archive_wstring_free(&linkname);
664 return (ARCHIVE_FAILED);
665 }
666 } else {
667 /*
668 * Make sure a file-type is set.
669 * The mode has been overridden if it is in the extended data.
670 */
671 lha->mode = (lha->mode & ~AE_IFMT) |
672 ((lha->directory)? AE_IFDIR: AE_IFREG);
673 }
674 if ((lha->setflag & UNIX_MODE_IS_SET) == 0 &&
675 (lha->dos_attr & 1) != 0)
676 lha->mode &= ~(0222);/* read only. */
677
678 /*
679 * Set basic file parameters.
680 */
681 archive_entry_copy_pathname_w(entry, pathname.s);
682 archive_wstring_free(&pathname);
683 if (archive_strlen(&linkname) > 0) {
684 archive_entry_copy_symlink_w(entry, linkname.s);
685 } else
686 archive_entry_set_symlink(entry, NULL);
687 archive_wstring_free(&linkname);
688 /*
689 * When a header level is 0, there is a possibility that
690 * a pathname and a symlink has '\' character, a directory
691 * separator in DOS/Windows. So we should convert it to '/'.
692 */
693 if (p[H_LEVEL_OFFSET] == 0)
694 lha_replace_path_separator(lha, entry);
695
696 archive_entry_set_mode(entry, lha->mode);
697 archive_entry_set_uid(entry, lha->uid);
698 archive_entry_set_gid(entry, lha->gid);
699 if (archive_strlen(&lha->uname) > 0)
700 archive_entry_set_uname(entry, lha->uname.s);
701 if (archive_strlen(&lha->gname) > 0)
702 archive_entry_set_gname(entry, lha->gname.s);
703 if (lha->setflag & BIRTHTIME_IS_SET) {
704 archive_entry_set_birthtime(entry, lha->birthtime,
705 lha->birthtime_tv_nsec);
706 archive_entry_set_ctime(entry, lha->birthtime,
707 lha->birthtime_tv_nsec);
708 } else {
709 archive_entry_unset_birthtime(entry);
710 archive_entry_unset_ctime(entry);
711 }
712 archive_entry_set_mtime(entry, lha->mtime, lha->mtime_tv_nsec);
713 if (lha->setflag & ATIME_IS_SET)
714 archive_entry_set_atime(entry, lha->atime,
715 lha->atime_tv_nsec);
716 else
717 archive_entry_unset_atime(entry);
718 if (lha->directory || archive_entry_symlink(entry) != NULL)
719 archive_entry_unset_size(entry);
720 else
721 archive_entry_set_size(entry, lha->origsize);
722
723 /*
724 * Prepare variables used to read a file content.
725 */
726 lha->entry_bytes_remaining = lha->compsize;
727 if (lha->entry_bytes_remaining < 0) {
728 archive_set_error(&a->archive,
729 ARCHIVE_ERRNO_FILE_FORMAT,
730 "Invalid LHa entry size");
731 return (ARCHIVE_FATAL);
732 }
733 lha->entry_offset = 0;
734 lha->entry_crc_calculated = 0;
735
736 /*
737 * This file does not have a content.
738 */
739 if (lha->directory || lha->compsize == 0)
740 lha->end_of_entry = 1;
741
742 snprintf(lha->format_name, sizeof(lha->format_name), "lha -%c%c%c-",
743 lha->method[0], lha->method[1], lha->method[2]);
744 a->archive.archive_format_name = lha->format_name;
745
746 return (err);
747 }
748
749 /*
750 * Replace a DOS path separator '\' by a character '/'.
751 * Some multi-byte character set have a character '\' in its second byte.
752 */
753 static void
lha_replace_path_separator(struct lha * lha,struct archive_entry * entry)754 lha_replace_path_separator(struct lha *lha, struct archive_entry *entry)
755 {
756 const wchar_t *wp;
757 size_t i;
758
759 if ((wp = archive_entry_pathname_w(entry)) != NULL) {
760 archive_wstrcpy(&(lha->ws), wp);
761 for (i = 0; i < archive_strlen(&(lha->ws)); i++) {
762 if (lha->ws.s[i] == L'\\')
763 lha->ws.s[i] = L'/';
764 }
765 archive_entry_copy_pathname_w(entry, lha->ws.s);
766 }
767
768 if ((wp = archive_entry_symlink_w(entry)) != NULL) {
769 archive_wstrcpy(&(lha->ws), wp);
770 for (i = 0; i < archive_strlen(&(lha->ws)); i++) {
771 if (lha->ws.s[i] == L'\\')
772 lha->ws.s[i] = L'/';
773 }
774 archive_entry_copy_symlink_w(entry, lha->ws.s);
775 }
776 }
777
778 /*
779 * Header 0 format
780 *
781 * +0 +1 +2 +7 +11
782 * +---------------+----------+----------------+-------------------+
783 * |header size(*1)|header sum|compression type|compressed size(*2)|
784 * +---------------+----------+----------------+-------------------+
785 * <---------------------(*1)----------*
786 *
787 * +11 +15 +17 +19 +20 +21
788 * +-----------------+---------+---------+--------------+----------------+
789 * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=0)|
790 * +-----------------+---------+---------+--------------+----------------+
791 * *--------------------------------(*1)---------------------------------*
792 *
793 * +21 +22 +22+(*3) +22+(*3)+2 +22+(*3)+2+(*4)
794 * +---------------+---------+----------+----------------+------------------+
795 * |name length(*3)|file name|file CRC16|extra header(*4)| compressed data |
796 * +---------------+---------+----------+----------------+------------------+
797 * <--(*3)-> <------(*2)------>
798 * *----------------------(*1)-------------------------->
799 *
800 */
801 #define H0_HEADER_SIZE_OFFSET 0
802 #define H0_HEADER_SUM_OFFSET 1
803 #define H0_COMP_SIZE_OFFSET 7
804 #define H0_ORIG_SIZE_OFFSET 11
805 #define H0_DOS_TIME_OFFSET 15
806 #define H0_NAME_LEN_OFFSET 21
807 #define H0_FILE_NAME_OFFSET 22
808 #define H0_FIXED_SIZE 24
809 static int
lha_read_file_header_0(struct archive_read * a,struct lha * lha)810 lha_read_file_header_0(struct archive_read *a, struct lha *lha)
811 {
812 const unsigned char *p;
813 int extdsize, namelen;
814 unsigned char headersum, sum_calculated;
815
816 if ((p = __archive_read_ahead(a, H0_FIXED_SIZE, NULL)) == NULL)
817 return (truncated_error(a));
818 lha->header_size = p[H0_HEADER_SIZE_OFFSET] + 2;
819 headersum = p[H0_HEADER_SUM_OFFSET];
820 lha->compsize = archive_le32dec(p + H0_COMP_SIZE_OFFSET);
821 lha->origsize = archive_le32dec(p + H0_ORIG_SIZE_OFFSET);
822 lha->mtime = lha_dos_time(p + H0_DOS_TIME_OFFSET);
823 namelen = p[H0_NAME_LEN_OFFSET];
824 extdsize = (int)lha->header_size - H0_FIXED_SIZE - namelen;
825 if ((namelen > 221 || extdsize < 0) && extdsize != -2) {
826 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
827 "Invalid LHa header");
828 return (ARCHIVE_FATAL);
829 }
830 if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL)
831 return (truncated_error(a));
832
833 archive_strncpy(&lha->filename, p + H0_FILE_NAME_OFFSET, namelen);
834 /* When extdsize == -2, A CRC16 value is not present in the header. */
835 if (extdsize >= 0) {
836 lha->crc = archive_le16dec(p + H0_FILE_NAME_OFFSET + namelen);
837 lha->setflag |= CRC_IS_SET;
838 }
839 sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2);
840
841 /* Read an extended header */
842 if (extdsize > 0) {
843 /* This extended data is set by 'LHa for UNIX' only.
844 * Maybe fixed size.
845 */
846 p += H0_FILE_NAME_OFFSET + namelen + 2;
847 if (p[0] == 'U' && extdsize == 12) {
848 /* p[1] is a minor version. */
849 lha->mtime = archive_le32dec(&p[2]);
850 lha->mode = archive_le16dec(&p[6]);
851 lha->uid = archive_le16dec(&p[8]);
852 lha->gid = archive_le16dec(&p[10]);
853 lha->setflag |= UNIX_MODE_IS_SET;
854 }
855 }
856 __archive_read_consume(a, lha->header_size);
857
858 if (sum_calculated != headersum) {
859 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
860 "LHa header sum error");
861 return (ARCHIVE_FATAL);
862 }
863
864 return (ARCHIVE_OK);
865 }
866
867 /*
868 * Header 1 format
869 *
870 * +0 +1 +2 +7 +11
871 * +---------------+----------+----------------+-------------+
872 * |header size(*1)|header sum|compression type|skip size(*2)|
873 * +---------------+----------+----------------+-------------+
874 * <---------------(*1)----------*
875 *
876 * +11 +15 +17 +19 +20 +21
877 * +-----------------+---------+---------+--------------+----------------+
878 * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=1)|
879 * +-----------------+---------+---------+--------------+----------------+
880 * *-------------------------------(*1)----------------------------------*
881 *
882 * +21 +22 +22+(*3) +22+(*3)+2 +22+(*3)+3 +22+(*3)+3+(*4)
883 * +---------------+---------+----------+-----------+-----------+
884 * |name length(*3)|file name|file CRC16| creator |padding(*4)|
885 * +---------------+---------+----------+-----------+-----------+
886 * <--(*3)->
887 * *----------------------------(*1)----------------------------*
888 *
889 * +22+(*3)+3+(*4) +22+(*3)+3+(*4)+2 +22+(*3)+3+(*4)+2+(*5)
890 * +----------------+---------------------+------------------------+
891 * |next header size| extended header(*5) | compressed data |
892 * +----------------+---------------------+------------------------+
893 * *------(*1)-----> <--------------------(*2)-------------------->
894 */
895 #define H1_HEADER_SIZE_OFFSET 0
896 #define H1_HEADER_SUM_OFFSET 1
897 #define H1_COMP_SIZE_OFFSET 7
898 #define H1_ORIG_SIZE_OFFSET 11
899 #define H1_DOS_TIME_OFFSET 15
900 #define H1_NAME_LEN_OFFSET 21
901 #define H1_FILE_NAME_OFFSET 22
902 #define H1_FIXED_SIZE 27
903 static int
lha_read_file_header_1(struct archive_read * a,struct lha * lha)904 lha_read_file_header_1(struct archive_read *a, struct lha *lha)
905 {
906 const unsigned char *p;
907 size_t extdsize;
908 int i, err, err2;
909 int namelen, padding;
910 unsigned char headersum, sum_calculated;
911
912 err = ARCHIVE_OK;
913
914 if ((p = __archive_read_ahead(a, H1_FIXED_SIZE, NULL)) == NULL)
915 return (truncated_error(a));
916
917 lha->header_size = p[H1_HEADER_SIZE_OFFSET] + 2;
918 headersum = p[H1_HEADER_SUM_OFFSET];
919 /* Note: An extended header size is included in a compsize. */
920 lha->compsize = archive_le32dec(p + H1_COMP_SIZE_OFFSET);
921 lha->origsize = archive_le32dec(p + H1_ORIG_SIZE_OFFSET);
922 lha->mtime = lha_dos_time(p + H1_DOS_TIME_OFFSET);
923 namelen = p[H1_NAME_LEN_OFFSET];
924 /* Calculate a padding size. The result will be normally 0 only(?) */
925 padding = ((int)lha->header_size) - H1_FIXED_SIZE - namelen;
926
927 if (namelen > 230 || padding < 0)
928 goto invalid;
929
930 if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL)
931 return (truncated_error(a));
932
933 for (i = 0; i < namelen; i++) {
934 if (p[i + H1_FILE_NAME_OFFSET] == 0xff)
935 goto invalid;/* Invalid filename. */
936 }
937 archive_strncpy(&lha->filename, p + H1_FILE_NAME_OFFSET, namelen);
938 lha->crc = archive_le16dec(p + H1_FILE_NAME_OFFSET + namelen);
939 lha->setflag |= CRC_IS_SET;
940
941 sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2);
942 /* Consume used bytes but not include `next header size' data
943 * since it will be consumed in lha_read_file_extended_header(). */
944 __archive_read_consume(a, lha->header_size - 2);
945
946 /* Read extended headers */
947 err2 = lha_read_file_extended_header(a, lha, NULL, 2,
948 (uint64_t)(lha->compsize + 2), &extdsize);
949 if (err2 < ARCHIVE_WARN)
950 return (err2);
951 if (err2 < err)
952 err = err2;
953 /* Get a real compressed file size. */
954 lha->compsize -= extdsize - 2;
955
956 if (lha->compsize < 0)
957 goto invalid; /* Invalid compressed file size */
958
959 if (sum_calculated != headersum) {
960 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
961 "LHa header sum error");
962 return (ARCHIVE_FATAL);
963 }
964 return (err);
965 invalid:
966 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
967 "Invalid LHa header");
968 return (ARCHIVE_FATAL);
969 }
970
971 /*
972 * Header 2 format
973 *
974 * +0 +2 +7 +11 +15
975 * +---------------+----------------+-------------------+-----------------+
976 * |header size(*1)|compression type|compressed size(*2)|uncompressed size|
977 * +---------------+----------------+-------------------+-----------------+
978 * <--------------------------------(*1)---------------------------------*
979 *
980 * +15 +19 +20 +21 +23 +24
981 * +-----------------+------------+----------------+----------+-----------+
982 * |data/time(time_t)| 0x20 fixed |header level(=2)|file CRC16| creator |
983 * +-----------------+------------+----------------+----------+-----------+
984 * *---------------------------------(*1)---------------------------------*
985 *
986 * +24 +26 +26+(*3) +26+(*3)+(*4)
987 * +----------------+-------------------+-------------+-------------------+
988 * |next header size|extended header(*3)| padding(*4) | compressed data |
989 * +----------------+-------------------+-------------+-------------------+
990 * *--------------------------(*1)-------------------> <------(*2)------->
991 *
992 */
993 #define H2_HEADER_SIZE_OFFSET 0
994 #define H2_COMP_SIZE_OFFSET 7
995 #define H2_ORIG_SIZE_OFFSET 11
996 #define H2_TIME_OFFSET 15
997 #define H2_CRC_OFFSET 21
998 #define H2_FIXED_SIZE 24
999 static int
lha_read_file_header_2(struct archive_read * a,struct lha * lha)1000 lha_read_file_header_2(struct archive_read *a, struct lha *lha)
1001 {
1002 const unsigned char *p;
1003 size_t extdsize;
1004 int err, padding;
1005 uint16_t header_crc;
1006
1007 if ((p = __archive_read_ahead(a, H2_FIXED_SIZE, NULL)) == NULL)
1008 return (truncated_error(a));
1009
1010 lha->header_size =archive_le16dec(p + H2_HEADER_SIZE_OFFSET);
1011 lha->compsize = archive_le32dec(p + H2_COMP_SIZE_OFFSET);
1012 lha->origsize = archive_le32dec(p + H2_ORIG_SIZE_OFFSET);
1013 lha->mtime = archive_le32dec(p + H2_TIME_OFFSET);
1014 lha->crc = archive_le16dec(p + H2_CRC_OFFSET);
1015 lha->setflag |= CRC_IS_SET;
1016
1017 if (lha->header_size < H2_FIXED_SIZE) {
1018 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1019 "Invalid LHa header size");
1020 return (ARCHIVE_FATAL);
1021 }
1022
1023 header_crc = lha_crc16(0, p, H2_FIXED_SIZE);
1024 __archive_read_consume(a, H2_FIXED_SIZE);
1025
1026 /* Read extended headers */
1027 err = lha_read_file_extended_header(a, lha, &header_crc, 2,
1028 lha->header_size - H2_FIXED_SIZE, &extdsize);
1029 if (err < ARCHIVE_WARN)
1030 return (err);
1031
1032 /* Calculate a padding size. The result will be normally 0 or 1. */
1033 padding = (int)lha->header_size - (int)(H2_FIXED_SIZE + extdsize);
1034 if (padding > 0) {
1035 if ((p = __archive_read_ahead(a, padding, NULL)) == NULL)
1036 return (truncated_error(a));
1037 header_crc = lha_crc16(header_crc, p, padding);
1038 __archive_read_consume(a, padding);
1039 }
1040
1041 if (header_crc != lha->header_crc) {
1042 #ifndef DONT_FAIL_ON_CRC_ERROR
1043 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1044 "LHa header CRC error");
1045 return (ARCHIVE_FATAL);
1046 #endif
1047 }
1048 return (err);
1049 }
1050
1051 /*
1052 * Header 3 format
1053 *
1054 * +0 +2 +7 +11 +15
1055 * +------------+----------------+-------------------+-----------------+
1056 * | 0x04 fixed |compression type|compressed size(*2)|uncompressed size|
1057 * +------------+----------------+-------------------+-----------------+
1058 * <-------------------------------(*1)-------------------------------*
1059 *
1060 * +15 +19 +20 +21 +23 +24
1061 * +-----------------+------------+----------------+----------+-----------+
1062 * |date/time(time_t)| 0x20 fixed |header level(=3)|file CRC16| creator |
1063 * +-----------------+------------+----------------+----------+-----------+
1064 * *--------------------------------(*1)----------------------------------*
1065 *
1066 * +24 +28 +32 +32+(*3)
1067 * +---------------+----------------+-------------------+-----------------+
1068 * |header size(*1)|next header size|extended header(*3)| compressed data |
1069 * +---------------+----------------+-------------------+-----------------+
1070 * *------------------------(*1)-----------------------> <------(*2)----->
1071 *
1072 */
1073 #define H3_FIELD_LEN_OFFSET 0
1074 #define H3_COMP_SIZE_OFFSET 7
1075 #define H3_ORIG_SIZE_OFFSET 11
1076 #define H3_TIME_OFFSET 15
1077 #define H3_CRC_OFFSET 21
1078 #define H3_HEADER_SIZE_OFFSET 24
1079 #define H3_FIXED_SIZE 28
1080 static int
lha_read_file_header_3(struct archive_read * a,struct lha * lha)1081 lha_read_file_header_3(struct archive_read *a, struct lha *lha)
1082 {
1083 const unsigned char *p;
1084 size_t extdsize;
1085 int err;
1086 uint16_t header_crc;
1087
1088 if ((p = __archive_read_ahead(a, H3_FIXED_SIZE, NULL)) == NULL)
1089 return (truncated_error(a));
1090
1091 if (archive_le16dec(p + H3_FIELD_LEN_OFFSET) != 4)
1092 goto invalid;
1093 lha->header_size =archive_le32dec(p + H3_HEADER_SIZE_OFFSET);
1094 lha->compsize = archive_le32dec(p + H3_COMP_SIZE_OFFSET);
1095 lha->origsize = archive_le32dec(p + H3_ORIG_SIZE_OFFSET);
1096 lha->mtime = archive_le32dec(p + H3_TIME_OFFSET);
1097 lha->crc = archive_le16dec(p + H3_CRC_OFFSET);
1098 lha->setflag |= CRC_IS_SET;
1099
1100 if (lha->header_size < H3_FIXED_SIZE + 4)
1101 goto invalid;
1102 header_crc = lha_crc16(0, p, H3_FIXED_SIZE);
1103 __archive_read_consume(a, H3_FIXED_SIZE);
1104
1105 /* Read extended headers */
1106 err = lha_read_file_extended_header(a, lha, &header_crc, 4,
1107 lha->header_size - H3_FIXED_SIZE, &extdsize);
1108 if (err < ARCHIVE_WARN)
1109 return (err);
1110
1111 if (header_crc != lha->header_crc) {
1112 #ifndef DONT_FAIL_ON_CRC_ERROR
1113 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1114 "LHa header CRC error");
1115 return (ARCHIVE_FATAL);
1116 #endif
1117 }
1118 return (err);
1119 invalid:
1120 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1121 "Invalid LHa header");
1122 return (ARCHIVE_FATAL);
1123 }
1124
1125 /*
1126 * Extended header format
1127 *
1128 * +0 +2 +3 -- used in header 1 and 2
1129 * +0 +4 +5 -- used in header 3
1130 * +--------------+---------+-------------------+--------------+--
1131 * |ex-header size|header id| data |ex-header size| .......
1132 * +--------------+---------+-------------------+--------------+--
1133 * <-------------( ex-header size)------------> <-- next extended header --*
1134 *
1135 * If the ex-header size is zero, it is the make of the end of extended
1136 * headers.
1137 *
1138 */
1139 static int
lha_read_file_extended_header(struct archive_read * a,struct lha * lha,uint16_t * crc,int sizefield_length,uint64_t limitsize,size_t * total_size)1140 lha_read_file_extended_header(struct archive_read *a, struct lha *lha,
1141 uint16_t *crc, int sizefield_length, uint64_t limitsize, size_t *total_size)
1142 {
1143 const void *h;
1144 const unsigned char *extdheader;
1145 size_t extdsize;
1146 size_t datasize;
1147 unsigned int i;
1148 unsigned char extdtype;
1149
1150 #define EXT_HEADER_CRC 0x00 /* Header CRC and information*/
1151 #define EXT_FILENAME 0x01 /* Filename */
1152 #define EXT_DIRECTORY 0x02 /* Directory name */
1153 #define EXT_DOS_ATTR 0x40 /* MS-DOS attribute */
1154 #define EXT_TIMESTAMP 0x41 /* Windows time stamp */
1155 #define EXT_FILESIZE 0x42 /* Large file size */
1156 #define EXT_TIMEZONE 0x43 /* Time zone */
1157 #define EXT_UTF16_FILENAME 0x44 /* UTF-16 filename */
1158 #define EXT_UTF16_DIRECTORY 0x45 /* UTF-16 directory name */
1159 #define EXT_CODEPAGE 0x46 /* Codepage */
1160 #define EXT_UNIX_MODE 0x50 /* File permission */
1161 #define EXT_UNIX_GID_UID 0x51 /* gid,uid */
1162 #define EXT_UNIX_GNAME 0x52 /* Group name */
1163 #define EXT_UNIX_UNAME 0x53 /* User name */
1164 #define EXT_UNIX_MTIME 0x54 /* Modified time */
1165 #define EXT_OS2_NEW_ATTR 0x7f /* new attribute(OS/2 only) */
1166 #define EXT_NEW_ATTR 0xff /* new attribute */
1167
1168 *total_size = sizefield_length;
1169
1170 for (;;) {
1171 /* Read an extended header size. */
1172 if ((h =
1173 __archive_read_ahead(a, sizefield_length, NULL)) == NULL)
1174 return (truncated_error(a));
1175 /* Check if the size is the zero indicates the end of the
1176 * extended header. */
1177 if (sizefield_length == sizeof(uint16_t))
1178 extdsize = archive_le16dec(h);
1179 else
1180 extdsize = archive_le32dec(h);
1181 if (extdsize == 0) {
1182 /* End of extended header */
1183 if (crc != NULL)
1184 *crc = lha_crc16(*crc, h, sizefield_length);
1185 __archive_read_consume(a, sizefield_length);
1186 return (ARCHIVE_OK);
1187 }
1188
1189 /* Sanity check to the extended header size. */
1190 if (((uint64_t)*total_size + extdsize) > limitsize ||
1191 extdsize <= (size_t)sizefield_length)
1192 goto invalid;
1193
1194 /* Read the extended header. */
1195 if ((h = __archive_read_ahead(a, extdsize, NULL)) == NULL)
1196 return (truncated_error(a));
1197 *total_size += extdsize;
1198
1199 extdheader = (const unsigned char *)h;
1200 /* Get the extended header type. */
1201 extdtype = extdheader[sizefield_length];
1202 /* Calculate an extended data size. */
1203 datasize = extdsize - (1 + sizefield_length);
1204 /* Skip an extended header size field and type field. */
1205 extdheader += sizefield_length + 1;
1206
1207 if (crc != NULL && extdtype != EXT_HEADER_CRC)
1208 *crc = lha_crc16(*crc, h, extdsize);
1209 switch (extdtype) {
1210 case EXT_HEADER_CRC:
1211 /* We only use a header CRC. Following data will not
1212 * be used. */
1213 if (datasize >= 2) {
1214 lha->header_crc = archive_le16dec(extdheader);
1215 if (crc != NULL) {
1216 static const char zeros[2] = {0, 0};
1217 *crc = lha_crc16(*crc, h,
1218 extdsize - datasize);
1219 /* CRC value itself as zero */
1220 *crc = lha_crc16(*crc, zeros, 2);
1221 *crc = lha_crc16(*crc,
1222 extdheader+2, datasize - 2);
1223 }
1224 }
1225 break;
1226 case EXT_FILENAME:
1227 if (datasize == 0) {
1228 /* maybe directory header */
1229 archive_string_empty(&lha->filename);
1230 break;
1231 }
1232 if (extdheader[0] == '\0')
1233 goto invalid;
1234 archive_strncpy(&lha->filename,
1235 (const char *)extdheader, datasize);
1236 break;
1237 case EXT_UTF16_FILENAME:
1238 if (datasize == 0) {
1239 /* maybe directory header */
1240 archive_string_empty(&lha->filename);
1241 break;
1242 } else if (datasize & 1) {
1243 /* UTF-16 characters take always 2 or 4 bytes */
1244 goto invalid;
1245 }
1246 if (extdheader[0] == '\0')
1247 goto invalid;
1248 archive_string_empty(&lha->filename);
1249 archive_array_append(&lha->filename,
1250 (const char *)extdheader, datasize);
1251 /* Setup a string conversion for a filename. */
1252 lha->sconv_fname =
1253 archive_string_conversion_from_charset(&a->archive,
1254 "UTF-16LE", 1);
1255 if (lha->sconv_fname == NULL)
1256 return (ARCHIVE_FATAL);
1257 break;
1258 case EXT_DIRECTORY:
1259 if (datasize == 0 || extdheader[0] == '\0')
1260 /* no directory name data. exit this case. */
1261 goto invalid;
1262
1263 archive_strncpy(&lha->dirname,
1264 (const char *)extdheader, datasize);
1265 /*
1266 * Convert directory delimiter from 0xFF
1267 * to '/' for local system.
1268 */
1269 for (i = 0; i < lha->dirname.length; i++) {
1270 if ((unsigned char)lha->dirname.s[i] == 0xFF)
1271 lha->dirname.s[i] = '/';
1272 }
1273 /* Is last character directory separator? */
1274 if (lha->dirname.s[lha->dirname.length-1] != '/')
1275 /* invalid directory data */
1276 goto invalid;
1277 break;
1278 case EXT_UTF16_DIRECTORY:
1279 /* UTF-16 characters take always 2 or 4 bytes */
1280 if (datasize == 0 || (datasize & 1) ||
1281 extdheader[0] == '\0') {
1282 /* no directory name data. exit this case. */
1283 goto invalid;
1284 }
1285
1286 archive_string_empty(&lha->dirname);
1287 archive_array_append(&lha->dirname,
1288 (const char *)extdheader, datasize);
1289 lha->sconv_dir =
1290 archive_string_conversion_from_charset(&a->archive,
1291 "UTF-16LE", 1);
1292 if (lha->sconv_dir == NULL)
1293 return (ARCHIVE_FATAL);
1294 else {
1295 /*
1296 * Convert directory delimiter from 0xFFFF
1297 * to '/' for local system.
1298 */
1299 uint16_t dirSep;
1300 uint16_t d = 1;
1301 if (archive_be16dec(&d) == 1)
1302 dirSep = 0x2F00;
1303 else
1304 dirSep = 0x002F;
1305
1306 /* UTF-16LE character */
1307 uint16_t *utf16name =
1308 (uint16_t *)lha->dirname.s;
1309 for (i = 0; i < lha->dirname.length / 2; i++) {
1310 if (utf16name[i] == 0xFFFF) {
1311 utf16name[i] = dirSep;
1312 }
1313 }
1314 /* Is last character directory separator? */
1315 if (utf16name[lha->dirname.length / 2 - 1] !=
1316 dirSep) {
1317 /* invalid directory data */
1318 goto invalid;
1319 }
1320 }
1321 break;
1322 case EXT_DOS_ATTR:
1323 if (datasize == 2)
1324 lha->dos_attr = (unsigned char)
1325 (archive_le16dec(extdheader) & 0xff);
1326 break;
1327 case EXT_TIMESTAMP:
1328 if (datasize == (sizeof(uint64_t) * 3)) {
1329 lha->birthtime = lha_win_time(
1330 archive_le64dec(extdheader),
1331 &lha->birthtime_tv_nsec);
1332 extdheader += sizeof(uint64_t);
1333 lha->mtime = lha_win_time(
1334 archive_le64dec(extdheader),
1335 &lha->mtime_tv_nsec);
1336 extdheader += sizeof(uint64_t);
1337 lha->atime = lha_win_time(
1338 archive_le64dec(extdheader),
1339 &lha->atime_tv_nsec);
1340 lha->setflag |= BIRTHTIME_IS_SET |
1341 ATIME_IS_SET;
1342 }
1343 break;
1344 case EXT_FILESIZE:
1345 if (datasize == sizeof(uint64_t) * 2) {
1346 lha->compsize = archive_le64dec(extdheader);
1347 extdheader += sizeof(uint64_t);
1348 lha->origsize = archive_le64dec(extdheader);
1349 if (lha->compsize < 0 || lha->origsize < 0)
1350 goto invalid;
1351 }
1352 break;
1353 case EXT_CODEPAGE:
1354 /* Get an archived filename charset from codepage.
1355 * This overwrites the charset specified by
1356 * hdrcharset option. */
1357 if (datasize == sizeof(uint32_t)) {
1358 struct archive_string cp;
1359 const char *charset;
1360
1361 archive_string_init(&cp);
1362 switch (archive_le32dec(extdheader)) {
1363 case 65001: /* UTF-8 */
1364 charset = "UTF-8";
1365 break;
1366 default:
1367 archive_string_sprintf(&cp, "CP%d",
1368 (int)archive_le32dec(extdheader));
1369 charset = cp.s;
1370 break;
1371 }
1372 lha->sconv_dir =
1373 archive_string_conversion_from_charset(
1374 &(a->archive), charset, 1);
1375 lha->sconv_fname =
1376 archive_string_conversion_from_charset(
1377 &(a->archive), charset, 1);
1378 archive_string_free(&cp);
1379 if (lha->sconv_dir == NULL)
1380 return (ARCHIVE_FATAL);
1381 if (lha->sconv_fname == NULL)
1382 return (ARCHIVE_FATAL);
1383 }
1384 break;
1385 case EXT_UNIX_MODE:
1386 if (datasize == sizeof(uint16_t)) {
1387 lha->mode = archive_le16dec(extdheader);
1388 lha->setflag |= UNIX_MODE_IS_SET;
1389 }
1390 break;
1391 case EXT_UNIX_GID_UID:
1392 if (datasize == (sizeof(uint16_t) * 2)) {
1393 lha->gid = archive_le16dec(extdheader);
1394 lha->uid = archive_le16dec(extdheader+2);
1395 }
1396 break;
1397 case EXT_UNIX_GNAME:
1398 if (datasize > 0)
1399 archive_strncpy(&lha->gname,
1400 (const char *)extdheader, datasize);
1401 break;
1402 case EXT_UNIX_UNAME:
1403 if (datasize > 0)
1404 archive_strncpy(&lha->uname,
1405 (const char *)extdheader, datasize);
1406 break;
1407 case EXT_UNIX_MTIME:
1408 if (datasize == sizeof(uint32_t))
1409 lha->mtime = archive_le32dec(extdheader);
1410 break;
1411 case EXT_OS2_NEW_ATTR:
1412 /* This extended header is OS/2 depend. */
1413 if (datasize == 16) {
1414 lha->dos_attr = (unsigned char)
1415 (archive_le16dec(extdheader) & 0xff);
1416 lha->mode = archive_le16dec(extdheader+2);
1417 lha->gid = archive_le16dec(extdheader+4);
1418 lha->uid = archive_le16dec(extdheader+6);
1419 lha->birthtime = archive_le32dec(extdheader+8);
1420 lha->atime = archive_le32dec(extdheader+12);
1421 lha->setflag |= UNIX_MODE_IS_SET
1422 | BIRTHTIME_IS_SET | ATIME_IS_SET;
1423 }
1424 break;
1425 case EXT_NEW_ATTR:
1426 if (datasize == 20) {
1427 lha->mode = (mode_t)archive_le32dec(extdheader);
1428 lha->gid = archive_le32dec(extdheader+4);
1429 lha->uid = archive_le32dec(extdheader+8);
1430 lha->birthtime = archive_le32dec(extdheader+12);
1431 lha->atime = archive_le32dec(extdheader+16);
1432 lha->setflag |= UNIX_MODE_IS_SET
1433 | BIRTHTIME_IS_SET | ATIME_IS_SET;
1434 }
1435 break;
1436 case EXT_TIMEZONE: /* Not supported */
1437 break;
1438 default:
1439 break;
1440 }
1441
1442 __archive_read_consume(a, extdsize);
1443 }
1444 invalid:
1445 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1446 "Invalid extended LHa header");
1447 return (ARCHIVE_FATAL);
1448 }
1449
1450 static int
lha_end_of_entry(struct archive_read * a)1451 lha_end_of_entry(struct archive_read *a)
1452 {
1453 struct lha *lha = (struct lha *)(a->format->data);
1454 int r = ARCHIVE_EOF;
1455
1456 if (!lha->end_of_entry_cleanup) {
1457 if ((lha->setflag & CRC_IS_SET) &&
1458 lha->crc != lha->entry_crc_calculated) {
1459 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1460 "LHa data CRC error");
1461 r = ARCHIVE_WARN;
1462 }
1463
1464 /* End-of-entry cleanup done. */
1465 lha->end_of_entry_cleanup = 1;
1466 }
1467 return (r);
1468 }
1469
1470 static int
archive_read_format_lha_read_data(struct archive_read * a,const void ** buff,size_t * size,int64_t * offset)1471 archive_read_format_lha_read_data(struct archive_read *a,
1472 const void **buff, size_t *size, int64_t *offset)
1473 {
1474 struct lha *lha = (struct lha *)(a->format->data);
1475 int r;
1476
1477 if (lha->entry_unconsumed) {
1478 /* Consume as much as the decompressor actually used. */
1479 __archive_read_consume(a, lha->entry_unconsumed);
1480 lha->entry_unconsumed = 0;
1481 }
1482 if (lha->end_of_entry) {
1483 *offset = lha->entry_offset;
1484 *size = 0;
1485 *buff = NULL;
1486 return (lha_end_of_entry(a));
1487 }
1488
1489 if (lha->entry_is_compressed)
1490 r = lha_read_data_lzh(a, buff, size, offset);
1491 else
1492 /* No compression. */
1493 r = lha_read_data_none(a, buff, size, offset);
1494 return (r);
1495 }
1496
1497 /*
1498 * Read a file content in no compression.
1499 *
1500 * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets
1501 * lha->end_of_entry if it consumes all of the data.
1502 */
1503 static int
lha_read_data_none(struct archive_read * a,const void ** buff,size_t * size,int64_t * offset)1504 lha_read_data_none(struct archive_read *a, const void **buff,
1505 size_t *size, int64_t *offset)
1506 {
1507 struct lha *lha = (struct lha *)(a->format->data);
1508 ssize_t bytes_avail;
1509
1510 if (lha->entry_bytes_remaining == 0) {
1511 *buff = NULL;
1512 *size = 0;
1513 *offset = lha->entry_offset;
1514 lha->end_of_entry = 1;
1515 return (ARCHIVE_OK);
1516 }
1517 /*
1518 * Note: '1' here is a performance optimization.
1519 * Recall that the decompression layer returns a count of
1520 * available bytes; asking for more than that forces the
1521 * decompressor to combine reads by copying data.
1522 */
1523 *buff = __archive_read_ahead(a, 1, &bytes_avail);
1524 if (bytes_avail <= 0) {
1525 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1526 "Truncated LHa file data");
1527 return (ARCHIVE_FATAL);
1528 }
1529 if (bytes_avail > lha->entry_bytes_remaining)
1530 bytes_avail = (ssize_t)lha->entry_bytes_remaining;
1531 lha->entry_crc_calculated =
1532 lha_crc16(lha->entry_crc_calculated, *buff, bytes_avail);
1533 *size = bytes_avail;
1534 *offset = lha->entry_offset;
1535 lha->entry_offset += bytes_avail;
1536 lha->entry_bytes_remaining -= bytes_avail;
1537 if (lha->entry_bytes_remaining == 0)
1538 lha->end_of_entry = 1;
1539 lha->entry_unconsumed = bytes_avail;
1540 return (ARCHIVE_OK);
1541 }
1542
1543 /*
1544 * Read a file content in LZHUFF encoding.
1545 *
1546 * Returns ARCHIVE_OK if successful, returns ARCHIVE_WARN if compression is
1547 * unsupported, ARCHIVE_FATAL otherwise, sets lha->end_of_entry if it consumes
1548 * all of the data.
1549 */
1550 static int
lha_read_data_lzh(struct archive_read * a,const void ** buff,size_t * size,int64_t * offset)1551 lha_read_data_lzh(struct archive_read *a, const void **buff,
1552 size_t *size, int64_t *offset)
1553 {
1554 struct lha *lha = (struct lha *)(a->format->data);
1555 ssize_t bytes_avail;
1556 int r;
1557
1558 /* If we haven't yet read any data, initialize the decompressor. */
1559 if (!lha->decompress_init) {
1560 r = lzh_decode_init(&(lha->strm), lha->method);
1561 switch (r) {
1562 case ARCHIVE_OK:
1563 break;
1564 case ARCHIVE_FAILED:
1565 /* Unsupported compression. */
1566 *buff = NULL;
1567 *size = 0;
1568 *offset = 0;
1569 archive_set_error(&a->archive,
1570 ARCHIVE_ERRNO_FILE_FORMAT,
1571 "Unsupported lzh compression method -%c%c%c-",
1572 lha->method[0], lha->method[1], lha->method[2]);
1573 /* We know compressed size; just skip it. */
1574 archive_read_format_lha_read_data_skip(a);
1575 return (ARCHIVE_WARN);
1576 default:
1577 archive_set_error(&a->archive, ENOMEM,
1578 "Couldn't allocate memory "
1579 "for lzh decompression");
1580 return (ARCHIVE_FATAL);
1581 }
1582 /* We've initialized decompression for this stream. */
1583 lha->decompress_init = 1;
1584 lha->strm.avail_out = 0;
1585 lha->strm.total_out = 0;
1586 }
1587
1588 /*
1589 * Note: '1' here is a performance optimization.
1590 * Recall that the decompression layer returns a count of
1591 * available bytes; asking for more than that forces the
1592 * decompressor to combine reads by copying data.
1593 */
1594 lha->strm.next_in = __archive_read_ahead(a, 1, &bytes_avail);
1595 if (bytes_avail <= 0) {
1596 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1597 "Truncated LHa file body");
1598 return (ARCHIVE_FATAL);
1599 }
1600 if (bytes_avail > lha->entry_bytes_remaining)
1601 bytes_avail = (ssize_t)lha->entry_bytes_remaining;
1602
1603 lha->strm.avail_in = (int)bytes_avail;
1604 lha->strm.total_in = 0;
1605 lha->strm.avail_out = 0;
1606
1607 r = lzh_decode(&(lha->strm), bytes_avail == lha->entry_bytes_remaining);
1608 switch (r) {
1609 case ARCHIVE_OK:
1610 break;
1611 case ARCHIVE_EOF:
1612 lha->end_of_entry = 1;
1613 break;
1614 default:
1615 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1616 "Bad lzh data");
1617 return (ARCHIVE_FAILED);
1618 }
1619 lha->entry_unconsumed = lha->strm.total_in;
1620 lha->entry_bytes_remaining -= lha->strm.total_in;
1621
1622 if (lha->strm.avail_out) {
1623 *offset = lha->entry_offset;
1624 *size = lha->strm.avail_out;
1625 *buff = lha->strm.ref_ptr;
1626 lha->entry_crc_calculated =
1627 lha_crc16(lha->entry_crc_calculated, *buff, *size);
1628 lha->entry_offset += *size;
1629 } else {
1630 *offset = lha->entry_offset;
1631 *size = 0;
1632 *buff = NULL;
1633 if (lha->end_of_entry)
1634 return (lha_end_of_entry(a));
1635 }
1636 return (ARCHIVE_OK);
1637 }
1638
1639 /*
1640 * Skip a file content.
1641 */
1642 static int
archive_read_format_lha_read_data_skip(struct archive_read * a)1643 archive_read_format_lha_read_data_skip(struct archive_read *a)
1644 {
1645 struct lha *lha;
1646 int64_t bytes_skipped;
1647
1648 lha = (struct lha *)(a->format->data);
1649
1650 if (lha->entry_unconsumed) {
1651 /* Consume as much as the decompressor actually used. */
1652 __archive_read_consume(a, lha->entry_unconsumed);
1653 lha->entry_unconsumed = 0;
1654 }
1655
1656 /* if we've already read to end of data, we're done. */
1657 if (lha->end_of_entry_cleanup)
1658 return (ARCHIVE_OK);
1659
1660 /*
1661 * If the length is at the beginning, we can skip the
1662 * compressed data much more quickly.
1663 */
1664 bytes_skipped = __archive_read_consume(a, lha->entry_bytes_remaining);
1665 if (bytes_skipped < 0)
1666 return (ARCHIVE_FATAL);
1667
1668 /* This entry is finished and done. */
1669 lha->end_of_entry_cleanup = lha->end_of_entry = 1;
1670 return (ARCHIVE_OK);
1671 }
1672
1673 static int
archive_read_format_lha_cleanup(struct archive_read * a)1674 archive_read_format_lha_cleanup(struct archive_read *a)
1675 {
1676 struct lha *lha = (struct lha *)(a->format->data);
1677
1678 lzh_decode_free(&(lha->strm));
1679 archive_string_free(&(lha->dirname));
1680 archive_string_free(&(lha->filename));
1681 archive_string_free(&(lha->uname));
1682 archive_string_free(&(lha->gname));
1683 archive_wstring_free(&(lha->ws));
1684 free(lha);
1685 (a->format->data) = NULL;
1686 return (ARCHIVE_OK);
1687 }
1688
1689 /*
1690 * 'LHa for UNIX' utility has archived a symbolic-link name after
1691 * a pathname with '|' character.
1692 * This function extracts the symbolic-link name from the pathname.
1693 *
1694 * example.
1695 * 1. a symbolic-name is 'aaa/bb/cc'
1696 * 2. a filename is 'xxx/bbb'
1697 * then an archived pathname is 'xxx/bbb|aaa/bb/cc'
1698 */
1699 static int
lha_parse_linkname(struct archive_wstring * linkname,struct archive_wstring * pathname)1700 lha_parse_linkname(struct archive_wstring *linkname,
1701 struct archive_wstring *pathname)
1702 {
1703 wchar_t * linkptr;
1704 size_t symlen;
1705
1706 linkptr = wcschr(pathname->s, L'|');
1707 if (linkptr != NULL) {
1708 symlen = wcslen(linkptr + 1);
1709 archive_wstrncpy(linkname, linkptr+1, symlen);
1710
1711 *linkptr = 0;
1712 pathname->length = wcslen(pathname->s);
1713
1714 return (1);
1715 }
1716 return (0);
1717 }
1718
1719 /* Convert an MSDOS-style date/time into Unix-style time. */
1720 static time_t
lha_dos_time(const unsigned char * p)1721 lha_dos_time(const unsigned char *p)
1722 {
1723 int msTime, msDate;
1724 struct tm ts;
1725
1726 msTime = archive_le16dec(p);
1727 msDate = archive_le16dec(p+2);
1728
1729 memset(&ts, 0, sizeof(ts));
1730 ts.tm_year = ((msDate >> 9) & 0x7f) + 80; /* Years since 1900. */
1731 ts.tm_mon = ((msDate >> 5) & 0x0f) - 1; /* Month number. */
1732 ts.tm_mday = msDate & 0x1f; /* Day of month. */
1733 ts.tm_hour = (msTime >> 11) & 0x1f;
1734 ts.tm_min = (msTime >> 5) & 0x3f;
1735 ts.tm_sec = (msTime << 1) & 0x3e;
1736 ts.tm_isdst = -1;
1737 return (mktime(&ts));
1738 }
1739
1740 /* Convert an MS-Windows-style date/time into Unix-style time. */
1741 static time_t
lha_win_time(uint64_t wintime,long * ns)1742 lha_win_time(uint64_t wintime, long *ns)
1743 {
1744 #define EPOC_TIME ARCHIVE_LITERAL_ULL(116444736000000000)
1745
1746 if (wintime >= EPOC_TIME) {
1747 wintime -= EPOC_TIME; /* 1970-01-01 00:00:00 (UTC) */
1748 if (ns != NULL)
1749 *ns = (long)(wintime % 10000000) * 100;
1750 return (wintime / 10000000);
1751 } else {
1752 if (ns != NULL)
1753 *ns = 0;
1754 return (0);
1755 }
1756 }
1757
1758 static unsigned char
lha_calcsum(unsigned char sum,const void * pp,int offset,size_t size)1759 lha_calcsum(unsigned char sum, const void *pp, int offset, size_t size)
1760 {
1761 unsigned char const *p = (unsigned char const *)pp;
1762
1763 p += offset;
1764 for (;size > 0; --size)
1765 sum += *p++;
1766 return (sum);
1767 }
1768
1769 static uint16_t crc16tbl[2][256];
1770 static void
lha_crc16_init(void)1771 lha_crc16_init(void)
1772 {
1773 unsigned int i;
1774 static int crc16init = 0;
1775
1776 if (crc16init)
1777 return;
1778 crc16init = 1;
1779
1780 for (i = 0; i < 256; i++) {
1781 unsigned int j;
1782 uint16_t crc = (uint16_t)i;
1783 for (j = 8; j; j--)
1784 crc = (crc >> 1) ^ ((crc & 1) * 0xA001);
1785 crc16tbl[0][i] = crc;
1786 }
1787
1788 for (i = 0; i < 256; i++) {
1789 crc16tbl[1][i] = (crc16tbl[0][i] >> 8)
1790 ^ crc16tbl[0][crc16tbl[0][i] & 0xff];
1791 }
1792 }
1793
1794 static uint16_t
lha_crc16(uint16_t crc,const void * pp,size_t len)1795 lha_crc16(uint16_t crc, const void *pp, size_t len)
1796 {
1797 const unsigned char *p = (const unsigned char *)pp;
1798 const uint16_t *buff;
1799 const union {
1800 uint32_t i;
1801 char c[4];
1802 } u = { 0x01020304 };
1803
1804 if (len == 0)
1805 return crc;
1806
1807 /* Process unaligned address. */
1808 if (((uintptr_t)p) & (uintptr_t)0x1) {
1809 crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff];
1810 len--;
1811 }
1812 buff = (const uint16_t *)p;
1813 /*
1814 * Modern C compiler such as GCC does not unroll automatically yet
1815 * without unrolling pragma, and Clang is so. So we should
1816 * unroll this loop for its performance.
1817 */
1818 for (;len >= 8; len -= 8) {
1819 /* This if statement expects compiler optimization will
1820 * remove the statement which will not be executed. */
1821 #undef bswap16
1822 #ifndef __has_builtin
1823 #define __has_builtin(x) 0
1824 #endif
1825 #if defined(_MSC_VER) && _MSC_VER >= 1400 /* Visual Studio */
1826 # define bswap16(x) _byteswap_ushort(x)
1827 #elif defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ > 4)
1828 /* GCC 4.8 and later has __builtin_bswap16() */
1829 # define bswap16(x) __builtin_bswap16(x)
1830 #elif defined(__clang__) && __has_builtin(__builtin_bswap16)
1831 /* Newer clang versions have __builtin_bswap16() */
1832 # define bswap16(x) __builtin_bswap16(x)
1833 #else
1834 # define bswap16(x) ((((x) >> 8) & 0xff) | ((x) << 8))
1835 #endif
1836 #define CRC16W do { \
1837 if(u.c[0] == 1) { /* Big endian */ \
1838 crc ^= bswap16(*buff); buff++; \
1839 } else \
1840 crc ^= *buff++; \
1841 crc = crc16tbl[1][crc & 0xff] ^ crc16tbl[0][crc >> 8];\
1842 } while (0)
1843 CRC16W;
1844 CRC16W;
1845 CRC16W;
1846 CRC16W;
1847 #undef CRC16W
1848 #undef bswap16
1849 }
1850
1851 p = (const unsigned char *)buff;
1852 for (;len; len--) {
1853 crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff];
1854 }
1855 return crc;
1856 }
1857
1858 /*
1859 * Initialize LZHUF decoder.
1860 *
1861 * Returns ARCHIVE_OK if initialization was successful.
1862 * Returns ARCHIVE_FAILED if method is unsupported.
1863 * Returns ARCHIVE_FATAL if initialization failed; memory allocation
1864 * error occurred.
1865 */
1866 static int
lzh_decode_init(struct lzh_stream * strm,const char * method)1867 lzh_decode_init(struct lzh_stream *strm, const char *method)
1868 {
1869 struct lzh_dec *ds;
1870 int w_bits, w_size;
1871
1872 if (strm->ds == NULL) {
1873 strm->ds = calloc(1, sizeof(*strm->ds));
1874 if (strm->ds == NULL)
1875 return (ARCHIVE_FATAL);
1876 }
1877 ds = strm->ds;
1878 ds->error = ARCHIVE_FAILED;
1879 if (method == NULL || method[0] != 'l' || method[1] != 'h')
1880 return (ARCHIVE_FAILED);
1881 switch (method[2]) {
1882 case '5':
1883 w_bits = 13;/* 8KiB for window */
1884 break;
1885 case '6':
1886 w_bits = 15;/* 32KiB for window */
1887 break;
1888 case '7':
1889 w_bits = 16;/* 64KiB for window */
1890 break;
1891 default:
1892 return (ARCHIVE_FAILED);/* Not supported. */
1893 }
1894 ds->error = ARCHIVE_FATAL;
1895 /* Expand a window size up to 128 KiB for decompressing process
1896 * performance whatever its original window size is. */
1897 ds->w_size = 1U << 17;
1898 ds->w_mask = ds->w_size -1;
1899 if (ds->w_buff == NULL) {
1900 ds->w_buff = malloc(ds->w_size);
1901 if (ds->w_buff == NULL)
1902 return (ARCHIVE_FATAL);
1903 }
1904 w_size = 1U << w_bits;
1905 memset(ds->w_buff + ds->w_size - w_size, 0x20, w_size);
1906 ds->w_pos = 0;
1907 ds->state = 0;
1908 ds->pos_pt_len_size = w_bits + 1;
1909 ds->pos_pt_len_bits = (w_bits == 15 || w_bits == 16)? 5: 4;
1910 ds->literal_pt_len_size = PT_BITLEN_SIZE;
1911 ds->literal_pt_len_bits = 5;
1912 ds->br.cache_buffer = 0;
1913 ds->br.cache_avail = 0;
1914
1915 if (lzh_huffman_init(&(ds->lt), LT_BITLEN_SIZE, 16)
1916 != ARCHIVE_OK)
1917 return (ARCHIVE_FATAL);
1918 ds->lt.len_bits = 9;
1919 if (lzh_huffman_init(&(ds->pt), PT_BITLEN_SIZE, 16)
1920 != ARCHIVE_OK)
1921 return (ARCHIVE_FATAL);
1922 ds->error = 0;
1923
1924 return (ARCHIVE_OK);
1925 }
1926
1927 /*
1928 * Release LZHUF decoder.
1929 */
1930 static void
lzh_decode_free(struct lzh_stream * strm)1931 lzh_decode_free(struct lzh_stream *strm)
1932 {
1933
1934 if (strm->ds == NULL)
1935 return;
1936 free(strm->ds->w_buff);
1937 lzh_huffman_free(&(strm->ds->lt));
1938 lzh_huffman_free(&(strm->ds->pt));
1939 free(strm->ds);
1940 strm->ds = NULL;
1941 }
1942
1943 /*
1944 * Bit stream reader.
1945 */
1946 /* Check that the cache buffer has enough bits. */
1947 #define lzh_br_has(br, n) ((br)->cache_avail >= n)
1948 /* Get compressed data by bit. */
1949 #define lzh_br_bits(br, n) \
1950 (((uint16_t)((br)->cache_buffer >> \
1951 ((br)->cache_avail - (n)))) & cache_masks[n])
1952 #define lzh_br_bits_forced(br, n) \
1953 (((uint16_t)((br)->cache_buffer << \
1954 ((n) - (br)->cache_avail))) & cache_masks[n])
1955 /* Read ahead to make sure the cache buffer has enough compressed data we
1956 * will use.
1957 * True : completed, there is enough data in the cache buffer.
1958 * False : we met that strm->next_in is empty, we have to get following
1959 * bytes. */
1960 #define lzh_br_read_ahead_0(strm, br, n) \
1961 (lzh_br_has(br, (n)) || lzh_br_fillup(strm, br))
1962 /* True : the cache buffer has some bits as much as we need.
1963 * False : there are no enough bits in the cache buffer to be used,
1964 * we have to get following bytes if we could. */
1965 #define lzh_br_read_ahead(strm, br, n) \
1966 (lzh_br_read_ahead_0((strm), (br), (n)) || lzh_br_has((br), (n)))
1967
1968 /* Notify how many bits we consumed. */
1969 #define lzh_br_consume(br, n) ((br)->cache_avail -= (n))
1970 #define lzh_br_unconsume(br, n) ((br)->cache_avail += (n))
1971
1972 static const uint16_t cache_masks[] = {
1973 0x0000, 0x0001, 0x0003, 0x0007,
1974 0x000F, 0x001F, 0x003F, 0x007F,
1975 0x00FF, 0x01FF, 0x03FF, 0x07FF,
1976 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF,
1977 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF
1978 };
1979
1980 /*
1981 * Shift away used bits in the cache data and fill it up with following bits.
1982 * Call this when cache buffer does not have enough bits you need.
1983 *
1984 * Returns 1 if the cache buffer is full.
1985 * Returns 0 if the cache buffer is not full; input buffer is empty.
1986 */
1987 static int
lzh_br_fillup(struct lzh_stream * strm,struct lzh_br * br)1988 lzh_br_fillup(struct lzh_stream *strm, struct lzh_br *br)
1989 {
1990 int n = CACHE_BITS - br->cache_avail;
1991
1992 for (;;) {
1993 const int x = n >> 3;
1994 if (strm->avail_in >= x) {
1995 switch (x) {
1996 case 8:
1997 br->cache_buffer =
1998 ((uint64_t)strm->next_in[0]) << 56 |
1999 ((uint64_t)strm->next_in[1]) << 48 |
2000 ((uint64_t)strm->next_in[2]) << 40 |
2001 ((uint64_t)strm->next_in[3]) << 32 |
2002 ((uint32_t)strm->next_in[4]) << 24 |
2003 ((uint32_t)strm->next_in[5]) << 16 |
2004 ((uint32_t)strm->next_in[6]) << 8 |
2005 (uint32_t)strm->next_in[7];
2006 strm->next_in += 8;
2007 strm->avail_in -= 8;
2008 br->cache_avail += 8 * 8;
2009 return (1);
2010 case 7:
2011 br->cache_buffer =
2012 (br->cache_buffer << 56) |
2013 ((uint64_t)strm->next_in[0]) << 48 |
2014 ((uint64_t)strm->next_in[1]) << 40 |
2015 ((uint64_t)strm->next_in[2]) << 32 |
2016 ((uint64_t)strm->next_in[3]) << 24 |
2017 ((uint64_t)strm->next_in[4]) << 16 |
2018 ((uint64_t)strm->next_in[5]) << 8 |
2019 (uint64_t)strm->next_in[6];
2020 strm->next_in += 7;
2021 strm->avail_in -= 7;
2022 br->cache_avail += 7 * 8;
2023 return (1);
2024 case 6:
2025 br->cache_buffer =
2026 (br->cache_buffer << 48) |
2027 ((uint64_t)strm->next_in[0]) << 40 |
2028 ((uint64_t)strm->next_in[1]) << 32 |
2029 ((uint64_t)strm->next_in[2]) << 24 |
2030 ((uint64_t)strm->next_in[3]) << 16 |
2031 ((uint64_t)strm->next_in[4]) << 8 |
2032 (uint64_t)strm->next_in[5];
2033 strm->next_in += 6;
2034 strm->avail_in -= 6;
2035 br->cache_avail += 6 * 8;
2036 return (1);
2037 case 0:
2038 /* We have enough compressed data in
2039 * the cache buffer.*/
2040 return (1);
2041 default:
2042 break;
2043 }
2044 }
2045 if (strm->avail_in == 0) {
2046 /* There is not enough compressed data to fill up the
2047 * cache buffer. */
2048 return (0);
2049 }
2050 br->cache_buffer =
2051 (br->cache_buffer << 8) | *strm->next_in++;
2052 strm->avail_in--;
2053 br->cache_avail += 8;
2054 n -= 8;
2055 }
2056 }
2057
2058 /*
2059 * Decode LZHUF.
2060 *
2061 * 1. Returns ARCHIVE_OK if output buffer or input buffer are empty.
2062 * Please set available buffer and call this function again.
2063 * 2. Returns ARCHIVE_EOF if decompression has been completed.
2064 * 3. Returns ARCHIVE_FAILED if an error occurred; compressed data
2065 * is broken or you do not set 'last' flag properly.
2066 * 4. 'last' flag is very important, you must set 1 to the flag if there
2067 * is no input data. The lha compressed data format does not provide how
2068 * to know the compressed data is really finished.
2069 * Note: lha command utility check if the total size of output bytes is
2070 * reached the uncompressed size recorded in its header. it does not mind
2071 * that the decoding process is properly finished.
2072 * GNU ZIP can decompress another compressed file made by SCO LZH compress.
2073 * it handles EOF as null to fill read buffer with zero until the decoding
2074 * process meet 2 bytes of zeros at reading a size of a next chunk, so the
2075 * zeros are treated as the mark of the end of the data although the zeros
2076 * is dummy, not the file data.
2077 */
2078 static int lzh_read_blocks(struct lzh_stream *, int);
2079 static int lzh_decode_blocks(struct lzh_stream *, int);
2080 #define ST_RD_BLOCK 0
2081 #define ST_RD_PT_1 1
2082 #define ST_RD_PT_2 2
2083 #define ST_RD_PT_3 3
2084 #define ST_RD_PT_4 4
2085 #define ST_RD_LITERAL_1 5
2086 #define ST_RD_LITERAL_2 6
2087 #define ST_RD_LITERAL_3 7
2088 #define ST_RD_POS_DATA_1 8
2089 #define ST_GET_LITERAL 9
2090 #define ST_GET_POS_1 10
2091 #define ST_GET_POS_2 11
2092 #define ST_COPY_DATA 12
2093
2094 static int
lzh_decode(struct lzh_stream * strm,int last)2095 lzh_decode(struct lzh_stream *strm, int last)
2096 {
2097 struct lzh_dec *ds = strm->ds;
2098 int avail_in;
2099 int r;
2100
2101 if (ds->error)
2102 return (ds->error);
2103
2104 avail_in = strm->avail_in;
2105 do {
2106 if (ds->state < ST_GET_LITERAL)
2107 r = lzh_read_blocks(strm, last);
2108 else
2109 r = lzh_decode_blocks(strm, last);
2110 } while (r == 100);
2111 strm->total_in += avail_in - strm->avail_in;
2112 return (r);
2113 }
2114
2115 static void
lzh_emit_window(struct lzh_stream * strm,size_t s)2116 lzh_emit_window(struct lzh_stream *strm, size_t s)
2117 {
2118 strm->ref_ptr = strm->ds->w_buff;
2119 strm->avail_out = (int)s;
2120 strm->total_out += s;
2121 }
2122
2123 static int
lzh_read_blocks(struct lzh_stream * strm,int last)2124 lzh_read_blocks(struct lzh_stream *strm, int last)
2125 {
2126 struct lzh_dec *ds = strm->ds;
2127 struct lzh_br *br = &(ds->br);
2128 int c = 0, i;
2129 unsigned rbits;
2130
2131 for (;;) {
2132 switch (ds->state) {
2133 case ST_RD_BLOCK:
2134 /*
2135 * Read a block number indicates how many blocks
2136 * we will handle. The block is composed of a
2137 * literal and a match, sometimes a literal only
2138 * in particular, there are no reference data at
2139 * the beginning of the decompression.
2140 */
2141 if (!lzh_br_read_ahead_0(strm, br, 16)) {
2142 if (!last)
2143 /* We need following data. */
2144 return (ARCHIVE_OK);
2145 if (lzh_br_has(br, 8)) {
2146 /*
2147 * It seems there are extra bits.
2148 * 1. Compressed data is broken.
2149 * 2. `last' flag does not properly
2150 * set.
2151 */
2152 goto failed;
2153 }
2154 if (ds->w_pos > 0) {
2155 lzh_emit_window(strm, ds->w_pos);
2156 ds->w_pos = 0;
2157 return (ARCHIVE_OK);
2158 }
2159 /* End of compressed data; we have completely
2160 * handled all compressed data. */
2161 return (ARCHIVE_EOF);
2162 }
2163 ds->blocks_avail = lzh_br_bits(br, 16);
2164 if (ds->blocks_avail == 0)
2165 goto failed;
2166 lzh_br_consume(br, 16);
2167 /*
2168 * Read a literal table compressed in huffman
2169 * coding.
2170 */
2171 ds->pt.len_size = ds->literal_pt_len_size;
2172 ds->pt.len_bits = ds->literal_pt_len_bits;
2173 ds->reading_position = 0;
2174 /* FALL THROUGH */
2175 case ST_RD_PT_1:
2176 /* Note: ST_RD_PT_1, ST_RD_PT_2 and ST_RD_PT_4 are
2177 * used in reading both a literal table and a
2178 * position table. */
2179 if (!lzh_br_read_ahead(strm, br, ds->pt.len_bits)) {
2180 if (last)
2181 goto failed;/* Truncated data. */
2182 ds->state = ST_RD_PT_1;
2183 return (ARCHIVE_OK);
2184 }
2185 ds->pt.len_avail = lzh_br_bits(br, ds->pt.len_bits);
2186 lzh_br_consume(br, ds->pt.len_bits);
2187 /* FALL THROUGH */
2188 case ST_RD_PT_2:
2189 if (ds->pt.len_avail == 0) {
2190 /* There is no bitlen. */
2191 if (!lzh_br_read_ahead(strm, br,
2192 ds->pt.len_bits)) {
2193 if (last)
2194 goto failed;/* Truncated data.*/
2195 ds->state = ST_RD_PT_2;
2196 return (ARCHIVE_OK);
2197 }
2198 if (!lzh_make_fake_table(&(ds->pt),
2199 lzh_br_bits(br, ds->pt.len_bits)))
2200 goto failed;/* Invalid data. */
2201 lzh_br_consume(br, ds->pt.len_bits);
2202 if (ds->reading_position)
2203 ds->state = ST_GET_LITERAL;
2204 else
2205 ds->state = ST_RD_LITERAL_1;
2206 break;
2207 } else if (ds->pt.len_avail > ds->pt.len_size)
2208 goto failed;/* Invalid data. */
2209 ds->loop = 0;
2210 memset(ds->pt.freq, 0, sizeof(ds->pt.freq));
2211 if (ds->pt.len_avail < 3 ||
2212 ds->pt.len_size == ds->pos_pt_len_size) {
2213 ds->state = ST_RD_PT_4;
2214 break;
2215 }
2216 /* FALL THROUGH */
2217 case ST_RD_PT_3:
2218 ds->loop = lzh_read_pt_bitlen(strm, ds->loop, 3);
2219 if (ds->loop < 3) {
2220 if (ds->loop < 0 || last)
2221 goto failed;/* Invalid data. */
2222 /* Not completed, get following data. */
2223 ds->state = ST_RD_PT_3;
2224 return (ARCHIVE_OK);
2225 }
2226 /* There are some null in bitlen of the literal. */
2227 if (!lzh_br_read_ahead(strm, br, 2)) {
2228 if (last)
2229 goto failed;/* Truncated data. */
2230 ds->state = ST_RD_PT_3;
2231 return (ARCHIVE_OK);
2232 }
2233 c = lzh_br_bits(br, 2);
2234 lzh_br_consume(br, 2);
2235 if (c > ds->pt.len_avail - 3)
2236 goto failed;/* Invalid data. */
2237 for (i = 3; c-- > 0 ;)
2238 ds->pt.bitlen[i++] = 0;
2239 ds->loop = i;
2240 /* FALL THROUGH */
2241 case ST_RD_PT_4:
2242 ds->loop = lzh_read_pt_bitlen(strm, ds->loop,
2243 ds->pt.len_avail);
2244 if (ds->loop < ds->pt.len_avail) {
2245 if (ds->loop < 0 || last)
2246 goto failed;/* Invalid data. */
2247 /* Not completed, get following data. */
2248 ds->state = ST_RD_PT_4;
2249 return (ARCHIVE_OK);
2250 }
2251 if (!lzh_make_huffman_table(&(ds->pt)))
2252 goto failed;/* Invalid data */
2253 if (ds->reading_position) {
2254 ds->state = ST_GET_LITERAL;
2255 break;
2256 }
2257 /* FALL THROUGH */
2258 case ST_RD_LITERAL_1:
2259 if (!lzh_br_read_ahead(strm, br, ds->lt.len_bits)) {
2260 if (last)
2261 goto failed;/* Truncated data. */
2262 ds->state = ST_RD_LITERAL_1;
2263 return (ARCHIVE_OK);
2264 }
2265 ds->lt.len_avail = lzh_br_bits(br, ds->lt.len_bits);
2266 lzh_br_consume(br, ds->lt.len_bits);
2267 /* FALL THROUGH */
2268 case ST_RD_LITERAL_2:
2269 if (ds->lt.len_avail == 0) {
2270 /* There is no bitlen. */
2271 if (!lzh_br_read_ahead(strm, br,
2272 ds->lt.len_bits)) {
2273 if (last)
2274 goto failed;/* Truncated data.*/
2275 ds->state = ST_RD_LITERAL_2;
2276 return (ARCHIVE_OK);
2277 }
2278 if (!lzh_make_fake_table(&(ds->lt),
2279 lzh_br_bits(br, ds->lt.len_bits)))
2280 goto failed;/* Invalid data */
2281 lzh_br_consume(br, ds->lt.len_bits);
2282 ds->state = ST_RD_POS_DATA_1;
2283 break;
2284 } else if (ds->lt.len_avail > ds->lt.len_size)
2285 goto failed;/* Invalid data */
2286 ds->loop = 0;
2287 memset(ds->lt.freq, 0, sizeof(ds->lt.freq));
2288 /* FALL THROUGH */
2289 case ST_RD_LITERAL_3:
2290 i = ds->loop;
2291 while (i < ds->lt.len_avail) {
2292 if (!lzh_br_read_ahead(strm, br,
2293 ds->pt.max_bits)) {
2294 if (last)
2295 goto failed;/* Truncated data.*/
2296 ds->loop = i;
2297 ds->state = ST_RD_LITERAL_3;
2298 return (ARCHIVE_OK);
2299 }
2300 rbits = lzh_br_bits(br, ds->pt.max_bits);
2301 c = lzh_decode_huffman(&(ds->pt), rbits);
2302 if (c > 2) {
2303 /* Note: 'c' will never be more than
2304 * eighteen since it's limited by
2305 * PT_BITLEN_SIZE, which is being set
2306 * to ds->pt.len_size through
2307 * ds->literal_pt_len_size. */
2308 lzh_br_consume(br, ds->pt.bitlen[c]);
2309 c -= 2;
2310 ds->lt.freq[c]++;
2311 ds->lt.bitlen[i++] = c;
2312 } else if (c == 0) {
2313 lzh_br_consume(br, ds->pt.bitlen[c]);
2314 ds->lt.bitlen[i++] = 0;
2315 } else {
2316 /* c == 1 or c == 2 */
2317 int n = (c == 1)?4:9;
2318 if (!lzh_br_read_ahead(strm, br,
2319 ds->pt.bitlen[c] + n)) {
2320 if (last) /* Truncated data. */
2321 goto failed;
2322 ds->loop = i;
2323 ds->state = ST_RD_LITERAL_3;
2324 return (ARCHIVE_OK);
2325 }
2326 lzh_br_consume(br, ds->pt.bitlen[c]);
2327 c = lzh_br_bits(br, n);
2328 lzh_br_consume(br, n);
2329 c += (n == 4)?3:20;
2330 if (i + c > ds->lt.len_avail)
2331 goto failed;/* Invalid data */
2332 memset(&(ds->lt.bitlen[i]), 0, c);
2333 i += c;
2334 }
2335 }
2336 if (i > ds->lt.len_avail ||
2337 !lzh_make_huffman_table(&(ds->lt)))
2338 goto failed;/* Invalid data */
2339 /* FALL THROUGH */
2340 case ST_RD_POS_DATA_1:
2341 /*
2342 * Read a position table compressed in huffman
2343 * coding.
2344 */
2345 ds->pt.len_size = ds->pos_pt_len_size;
2346 ds->pt.len_bits = ds->pos_pt_len_bits;
2347 ds->reading_position = 1;
2348 ds->state = ST_RD_PT_1;
2349 break;
2350 case ST_GET_LITERAL:
2351 return (100);
2352 }
2353 }
2354 failed:
2355 return (ds->error = ARCHIVE_FAILED);
2356 }
2357
2358 static int
lzh_decode_blocks(struct lzh_stream * strm,int last)2359 lzh_decode_blocks(struct lzh_stream *strm, int last)
2360 {
2361 struct lzh_dec *ds = strm->ds;
2362 struct lzh_br bre = ds->br;
2363 struct huffman *lt = &(ds->lt);
2364 struct huffman *pt = &(ds->pt);
2365 unsigned char *w_buff = ds->w_buff;
2366 unsigned char *lt_bitlen = lt->bitlen;
2367 unsigned char *pt_bitlen = pt->bitlen;
2368 int blocks_avail = ds->blocks_avail, c = 0;
2369 int copy_len = ds->copy_len, copy_pos = ds->copy_pos;
2370 int w_pos = ds->w_pos, w_mask = ds->w_mask, w_size = ds->w_size;
2371 int lt_max_bits = lt->max_bits, pt_max_bits = pt->max_bits;
2372 int state = ds->state;
2373
2374 for (;;) {
2375 switch (state) {
2376 case ST_GET_LITERAL:
2377 for (;;) {
2378 if (blocks_avail == 0) {
2379 /* We have decoded all blocks.
2380 * Let's handle next blocks. */
2381 ds->state = ST_RD_BLOCK;
2382 ds->br = bre;
2383 ds->blocks_avail = 0;
2384 ds->w_pos = w_pos;
2385 ds->copy_pos = 0;
2386 return (100);
2387 }
2388
2389 /* lzh_br_read_ahead() always tries to fill the
2390 * cache buffer up. In specific situation we
2391 * are close to the end of the data, the cache
2392 * buffer will not be full and thus we have to
2393 * determine if the cache buffer has some bits
2394 * as much as we need after lzh_br_read_ahead()
2395 * failed. */
2396 if (!lzh_br_read_ahead(strm, &bre,
2397 lt_max_bits)) {
2398 if (!last)
2399 goto next_data;
2400 /* Remaining bits are less than
2401 * maximum bits(lt.max_bits) but maybe
2402 * it still remains as much as we need,
2403 * so we should try to use it with
2404 * dummy bits. */
2405 c = lzh_decode_huffman(lt,
2406 lzh_br_bits_forced(&bre,
2407 lt_max_bits));
2408 lzh_br_consume(&bre, lt_bitlen[c]);
2409 if (!lzh_br_has(&bre, 0))
2410 goto failed;/* Over read. */
2411 } else {
2412 c = lzh_decode_huffman(lt,
2413 lzh_br_bits(&bre, lt_max_bits));
2414 lzh_br_consume(&bre, lt_bitlen[c]);
2415 }
2416 blocks_avail--;
2417 if (c > UCHAR_MAX)
2418 /* Current block is a match data. */
2419 break;
2420 /*
2421 * 'c' is exactly a literal code.
2422 */
2423 /* Save a decoded code to reference it
2424 * afterward. */
2425 w_buff[w_pos] = c;
2426 if (++w_pos >= w_size) {
2427 w_pos = 0;
2428 lzh_emit_window(strm, w_size);
2429 goto next_data;
2430 }
2431 }
2432 /* 'c' is the length of a match pattern we have
2433 * already extracted, which has be stored in
2434 * window(ds->w_buff). */
2435 copy_len = c - (UCHAR_MAX + 1) + MINMATCH;
2436 /* FALL THROUGH */
2437 case ST_GET_POS_1:
2438 /*
2439 * Get a reference position.
2440 */
2441 if (!lzh_br_read_ahead(strm, &bre, pt_max_bits)) {
2442 if (!last) {
2443 state = ST_GET_POS_1;
2444 ds->copy_len = copy_len;
2445 goto next_data;
2446 }
2447 copy_pos = lzh_decode_huffman(pt,
2448 lzh_br_bits_forced(&bre, pt_max_bits));
2449 lzh_br_consume(&bre, pt_bitlen[copy_pos]);
2450 if (!lzh_br_has(&bre, 0))
2451 goto failed;/* Over read. */
2452 } else {
2453 copy_pos = lzh_decode_huffman(pt,
2454 lzh_br_bits(&bre, pt_max_bits));
2455 lzh_br_consume(&bre, pt_bitlen[copy_pos]);
2456 }
2457 /* FALL THROUGH */
2458 case ST_GET_POS_2:
2459 if (copy_pos > 1) {
2460 /* We need an additional adjustment number to
2461 * the position. */
2462 int p = copy_pos - 1;
2463 if (!lzh_br_read_ahead(strm, &bre, p)) {
2464 if (last)
2465 goto failed;/* Truncated data.*/
2466 state = ST_GET_POS_2;
2467 ds->copy_len = copy_len;
2468 ds->copy_pos = copy_pos;
2469 goto next_data;
2470 }
2471 copy_pos = (1 << p) + lzh_br_bits(&bre, p);
2472 lzh_br_consume(&bre, p);
2473 }
2474 /* The position is actually a distance from the last
2475 * code we had extracted and thus we have to convert
2476 * it to a position of the window. */
2477 copy_pos = (w_pos - copy_pos - 1) & w_mask;
2478 /* FALL THROUGH */
2479 case ST_COPY_DATA:
2480 /*
2481 * Copy `copy_len' bytes as extracted data from
2482 * the window into the output buffer.
2483 */
2484 for (;;) {
2485 int l;
2486
2487 l = copy_len;
2488 if (copy_pos > w_pos) {
2489 if (l > w_size - copy_pos)
2490 l = w_size - copy_pos;
2491 } else {
2492 if (l > w_size - w_pos)
2493 l = w_size - w_pos;
2494 }
2495 if ((copy_pos + l < w_pos)
2496 || (w_pos + l < copy_pos)) {
2497 /* No overlap. */
2498 memcpy(w_buff + w_pos,
2499 w_buff + copy_pos, l);
2500 } else {
2501 const unsigned char *s;
2502 unsigned char *d;
2503 int li;
2504
2505 d = w_buff + w_pos;
2506 s = w_buff + copy_pos;
2507 for (li = 0; li < l-1;) {
2508 d[li] = s[li];li++;
2509 d[li] = s[li];li++;
2510 }
2511 if (li < l)
2512 d[li] = s[li];
2513 }
2514 w_pos += l;
2515 if (w_pos == w_size) {
2516 w_pos = 0;
2517 lzh_emit_window(strm, w_size);
2518 if (copy_len <= l)
2519 state = ST_GET_LITERAL;
2520 else {
2521 state = ST_COPY_DATA;
2522 ds->copy_len = copy_len - l;
2523 ds->copy_pos =
2524 (copy_pos + l) & w_mask;
2525 }
2526 goto next_data;
2527 }
2528 if (copy_len <= l)
2529 /* A copy of current pattern ended. */
2530 break;
2531 copy_len -= l;
2532 copy_pos = (copy_pos + l) & w_mask;
2533 }
2534 state = ST_GET_LITERAL;
2535 break;
2536 }
2537 }
2538 failed:
2539 return (ds->error = ARCHIVE_FAILED);
2540 next_data:
2541 ds->br = bre;
2542 ds->blocks_avail = blocks_avail;
2543 ds->state = state;
2544 ds->w_pos = w_pos;
2545 return (ARCHIVE_OK);
2546 }
2547
2548 static int
lzh_huffman_init(struct huffman * hf,size_t len_size,int tbl_bits)2549 lzh_huffman_init(struct huffman *hf, size_t len_size, int tbl_bits)
2550 {
2551 int bits;
2552
2553 if (hf->bitlen == NULL) {
2554 hf->bitlen = malloc(len_size * sizeof(hf->bitlen[0]));
2555 if (hf->bitlen == NULL)
2556 return (ARCHIVE_FATAL);
2557 }
2558 if (hf->tbl == NULL) {
2559 if (tbl_bits < HTBL_BITS)
2560 bits = tbl_bits;
2561 else
2562 bits = HTBL_BITS;
2563 hf->tbl = malloc(((size_t)1 << bits) * sizeof(hf->tbl[0]));
2564 if (hf->tbl == NULL)
2565 return (ARCHIVE_FATAL);
2566 }
2567 if (hf->tree == NULL && tbl_bits > HTBL_BITS) {
2568 hf->tree_avail = 1 << (tbl_bits - HTBL_BITS + 4);
2569 hf->tree = malloc(hf->tree_avail * sizeof(hf->tree[0]));
2570 if (hf->tree == NULL)
2571 return (ARCHIVE_FATAL);
2572 }
2573 hf->len_size = (int)len_size;
2574 hf->tbl_bits = tbl_bits;
2575 return (ARCHIVE_OK);
2576 }
2577
2578 static void
lzh_huffman_free(struct huffman * hf)2579 lzh_huffman_free(struct huffman *hf)
2580 {
2581 free(hf->bitlen);
2582 free(hf->tbl);
2583 free(hf->tree);
2584 }
2585
2586 static const char bitlen_tbl[0x400] = {
2587 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2588 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2589 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2590 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2591 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2592 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2593 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2594 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2595 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2596 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2597 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2598 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2599 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2600 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2601 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2602 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2603 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2604 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2605 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2606 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2607 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2608 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2609 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2610 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2611 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2612 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2613 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2614 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2615 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2616 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2617 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2618 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2619 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2620 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2621 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2622 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2623 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2624 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2625 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2626 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2627 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2628 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2629 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2630 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2631 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2632 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2633 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2634 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2635 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2636 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2637 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2638 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2639 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2640 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2641 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2642 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2643 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2644 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2645 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2646 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2647 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
2648 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
2649 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
2650 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 16, 0
2651 };
2652 static int
lzh_read_pt_bitlen(struct lzh_stream * strm,int start,int end)2653 lzh_read_pt_bitlen(struct lzh_stream *strm, int start, int end)
2654 {
2655 struct lzh_dec *ds = strm->ds;
2656 struct lzh_br *br = &(ds->br);
2657 int c, i;
2658
2659 for (i = start; i < end; ) {
2660 /*
2661 * bit pattern the number we need
2662 * 000 -> 0
2663 * 001 -> 1
2664 * 010 -> 2
2665 * ...
2666 * 110 -> 6
2667 * 1110 -> 7
2668 * 11110 -> 8
2669 * ...
2670 * 1111111111110 -> 16
2671 */
2672 if (!lzh_br_read_ahead(strm, br, 3))
2673 return (i);
2674 if ((c = lzh_br_bits(br, 3)) == 7) {
2675 if (!lzh_br_read_ahead(strm, br, 13))
2676 return (i);
2677 c = bitlen_tbl[lzh_br_bits(br, 13) & 0x3FF];
2678 if (c)
2679 lzh_br_consume(br, c - 3);
2680 else
2681 return (-1);/* Invalid data. */
2682 } else
2683 lzh_br_consume(br, 3);
2684 ds->pt.bitlen[i++] = c;
2685 ds->pt.freq[c]++;
2686 }
2687 return (i);
2688 }
2689
2690 static int
lzh_make_fake_table(struct huffman * hf,uint16_t c)2691 lzh_make_fake_table(struct huffman *hf, uint16_t c)
2692 {
2693 if (c >= hf->len_size)
2694 return (0);
2695 hf->tbl[0] = c;
2696 hf->max_bits = 0;
2697 hf->shift_bits = 0;
2698 hf->bitlen[hf->tbl[0]] = 0;
2699 return (1);
2700 }
2701
2702 /*
2703 * Make a huffman coding table.
2704 */
2705 static int
lzh_make_huffman_table(struct huffman * hf)2706 lzh_make_huffman_table(struct huffman *hf)
2707 {
2708 uint16_t *tbl;
2709 const unsigned char *bitlen;
2710 int bitptn[17], weight[17];
2711 int i, maxbits = 0, ptn, tbl_size, w;
2712 int diffbits, len_avail;
2713
2714 /*
2715 * Initialize bit patterns.
2716 */
2717 ptn = 0;
2718 for (i = 1, w = 1 << 15; i <= 16; i++, w >>= 1) {
2719 bitptn[i] = ptn;
2720 weight[i] = w;
2721 if (hf->freq[i]) {
2722 ptn += hf->freq[i] * w;
2723 maxbits = i;
2724 }
2725 }
2726 if (ptn != 0x10000 || maxbits > hf->tbl_bits)
2727 return (0);/* Invalid */
2728
2729 hf->max_bits = maxbits;
2730
2731 /*
2732 * Cut out extra bits which we won't house in the table.
2733 * This preparation reduces the same calculation in the for-loop
2734 * making the table.
2735 */
2736 if (maxbits < 16) {
2737 int ebits = 16 - maxbits;
2738 for (i = 1; i <= maxbits; i++) {
2739 bitptn[i] >>= ebits;
2740 weight[i] >>= ebits;
2741 }
2742 }
2743 if (maxbits > HTBL_BITS) {
2744 unsigned htbl_max;
2745 uint16_t *p;
2746
2747 diffbits = maxbits - HTBL_BITS;
2748 for (i = 1; i <= HTBL_BITS; i++) {
2749 bitptn[i] >>= diffbits;
2750 weight[i] >>= diffbits;
2751 }
2752 htbl_max = bitptn[HTBL_BITS] +
2753 weight[HTBL_BITS] * hf->freq[HTBL_BITS];
2754 p = &(hf->tbl[htbl_max]);
2755 while (p < &hf->tbl[1U<<HTBL_BITS])
2756 *p++ = 0;
2757 } else
2758 diffbits = 0;
2759 hf->shift_bits = diffbits;
2760
2761 /*
2762 * Make the table.
2763 */
2764 tbl_size = 1 << HTBL_BITS;
2765 tbl = hf->tbl;
2766 bitlen = hf->bitlen;
2767 len_avail = hf->len_avail;
2768 hf->tree_used = 0;
2769 for (i = 0; i < len_avail; i++) {
2770 uint16_t *p;
2771 int len, cnt;
2772 uint16_t bit;
2773 int extlen;
2774 struct htree_t *ht;
2775
2776 if (bitlen[i] == 0)
2777 continue;
2778 /* Get a bit pattern */
2779 len = bitlen[i];
2780 ptn = bitptn[len];
2781 cnt = weight[len];
2782 if (len <= HTBL_BITS) {
2783 /* Calculate next bit pattern */
2784 if ((bitptn[len] = ptn + cnt) > tbl_size)
2785 return (0);/* Invalid */
2786 /* Update the table */
2787 p = &(tbl[ptn]);
2788 if (cnt > 7) {
2789 uint16_t *pc;
2790
2791 cnt -= 8;
2792 pc = &p[cnt];
2793 pc[0] = (uint16_t)i;
2794 pc[1] = (uint16_t)i;
2795 pc[2] = (uint16_t)i;
2796 pc[3] = (uint16_t)i;
2797 pc[4] = (uint16_t)i;
2798 pc[5] = (uint16_t)i;
2799 pc[6] = (uint16_t)i;
2800 pc[7] = (uint16_t)i;
2801 if (cnt > 7) {
2802 cnt -= 8;
2803 memcpy(&p[cnt], pc,
2804 8 * sizeof(uint16_t));
2805 pc = &p[cnt];
2806 while (cnt > 15) {
2807 cnt -= 16;
2808 memcpy(&p[cnt], pc,
2809 16 * sizeof(uint16_t));
2810 }
2811 }
2812 if (cnt)
2813 memcpy(p, pc, cnt * sizeof(uint16_t));
2814 } else {
2815 while (cnt > 1) {
2816 p[--cnt] = (uint16_t)i;
2817 p[--cnt] = (uint16_t)i;
2818 }
2819 if (cnt)
2820 p[--cnt] = (uint16_t)i;
2821 }
2822 continue;
2823 }
2824
2825 /*
2826 * A bit length is too big to be housed to a direct table,
2827 * so we use a tree model for its extra bits.
2828 */
2829 bitptn[len] = ptn + cnt;
2830 bit = 1U << (diffbits -1);
2831 extlen = len - HTBL_BITS;
2832
2833 p = &(tbl[ptn >> diffbits]);
2834 if (*p == 0) {
2835 *p = len_avail + hf->tree_used;
2836 ht = &(hf->tree[hf->tree_used++]);
2837 if (hf->tree_used > hf->tree_avail)
2838 return (0);/* Invalid */
2839 ht->left = 0;
2840 ht->right = 0;
2841 } else {
2842 if (*p < len_avail ||
2843 *p >= (len_avail + hf->tree_used))
2844 return (0);/* Invalid */
2845 ht = &(hf->tree[*p - len_avail]);
2846 }
2847 while (--extlen > 0) {
2848 if (ptn & bit) {
2849 if (ht->left < len_avail) {
2850 ht->left = len_avail + hf->tree_used;
2851 ht = &(hf->tree[hf->tree_used++]);
2852 if (hf->tree_used > hf->tree_avail)
2853 return (0);/* Invalid */
2854 ht->left = 0;
2855 ht->right = 0;
2856 } else {
2857 ht = &(hf->tree[ht->left - len_avail]);
2858 }
2859 } else {
2860 if (ht->right < len_avail) {
2861 ht->right = len_avail + hf->tree_used;
2862 ht = &(hf->tree[hf->tree_used++]);
2863 if (hf->tree_used > hf->tree_avail)
2864 return (0);/* Invalid */
2865 ht->left = 0;
2866 ht->right = 0;
2867 } else {
2868 ht = &(hf->tree[ht->right - len_avail]);
2869 }
2870 }
2871 bit >>= 1;
2872 }
2873 if (ptn & bit) {
2874 if (ht->left != 0)
2875 return (0);/* Invalid */
2876 ht->left = (uint16_t)i;
2877 } else {
2878 if (ht->right != 0)
2879 return (0);/* Invalid */
2880 ht->right = (uint16_t)i;
2881 }
2882 }
2883 return (1);
2884 }
2885
2886 static int
lzh_decode_huffman_tree(struct huffman * hf,unsigned rbits,int c)2887 lzh_decode_huffman_tree(struct huffman *hf, unsigned rbits, int c)
2888 {
2889 struct htree_t *ht;
2890 int extlen;
2891
2892 ht = hf->tree;
2893 extlen = hf->shift_bits;
2894 while (c >= hf->len_avail) {
2895 c -= hf->len_avail;
2896 if (extlen-- <= 0 || c >= hf->tree_used)
2897 return (0);
2898 if (rbits & (1U << extlen))
2899 c = ht[c].left;
2900 else
2901 c = ht[c].right;
2902 }
2903 return (c);
2904 }
2905
2906 static inline int
lzh_decode_huffman(struct huffman * hf,unsigned rbits)2907 lzh_decode_huffman(struct huffman *hf, unsigned rbits)
2908 {
2909 int c;
2910 /*
2911 * At first search an index table for a bit pattern.
2912 * If it fails, search a huffman tree for.
2913 */
2914 c = hf->tbl[rbits >> hf->shift_bits];
2915 if (c < hf->len_avail || hf->len_avail == 0)
2916 return (c);
2917 /* This bit pattern needs to be found out at a huffman tree. */
2918 return (lzh_decode_huffman_tree(hf, rbits, c));
2919 }
2920
2921