xref: /freebsd/contrib/libarchive/libarchive/archive_read_support_format_lha.c (revision bd66c1b43e33540205dbc1187c2f2a15c58b57ba)
1 /*-
2  * Copyright (c) 2008-2014 Michihiro NAKAJIMA
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #include "archive_platform.h"
27 
28 #ifdef HAVE_ERRNO_H
29 #include <errno.h>
30 #endif
31 #ifdef HAVE_LIMITS_H
32 #include <limits.h>
33 #endif
34 #ifdef HAVE_STDLIB_H
35 #include <stdlib.h>
36 #endif
37 #ifdef HAVE_STRING_H
38 #include <string.h>
39 #endif
40 
41 #include "archive.h"
42 #include "archive_entry.h"
43 #include "archive_entry_locale.h"
44 #include "archive_private.h"
45 #include "archive_read_private.h"
46 #include "archive_endian.h"
47 
48 
49 #define MAXMATCH		256	/* Maximum match length. */
50 #define MINMATCH		3	/* Minimum match length. */
51 /*
52  * Literal table format:
53  * +0              +256                      +510
54  * +---------------+-------------------------+
55  * | literal code  |       match length      |
56  * |   0 ... 255   |  MINMATCH ... MAXMATCH  |
57  * +---------------+-------------------------+
58  *  <---          LT_BITLEN_SIZE         --->
59  */
60 /* Literal table size. */
61 #define LT_BITLEN_SIZE		(UCHAR_MAX + 1 + MAXMATCH - MINMATCH + 1)
62 /* Position table size.
63  * Note: this used for both position table and pre literal table.*/
64 #define PT_BITLEN_SIZE		(3 + 16)
65 
66 struct lzh_dec {
67 	/* Decoding status. */
68 	int     		 state;
69 
70 	/*
71 	 * Window to see last 8Ki(lh5),32Ki(lh6),64Ki(lh7) bytes of decoded
72 	 * data.
73 	 */
74 	int			 w_size;
75 	int			 w_mask;
76 	/* Window buffer, which is a loop buffer. */
77 	unsigned char		*w_buff;
78 	/* The insert position to the window. */
79 	int			 w_pos;
80 	/* The position where we can copy decoded code from the window. */
81 	int     		 copy_pos;
82 	/* The length how many bytes we can copy decoded code from
83 	 * the window. */
84 	int     		 copy_len;
85 
86 	/*
87 	 * Bit stream reader.
88 	 */
89 	struct lzh_br {
90 #define CACHE_TYPE		uint64_t
91 #define CACHE_BITS		(8 * sizeof(CACHE_TYPE))
92 	 	/* Cache buffer. */
93 		CACHE_TYPE	 cache_buffer;
94 		/* Indicates how many bits avail in cache_buffer. */
95 		int		 cache_avail;
96 	} br;
97 
98 	/*
99 	 * Huffman coding.
100 	 */
101 	struct huffman {
102 		int		 len_size;
103 		int		 len_avail;
104 		int		 len_bits;
105 		int		 freq[17];
106 		unsigned char	*bitlen;
107 
108 		/*
109 		 * Use a index table. It's faster than searching a huffman
110 		 * coding tree, which is a binary tree. But a use of a large
111 		 * index table causes L1 cache read miss many times.
112 		 */
113 #define HTBL_BITS	10
114 		int		 max_bits;
115 		int		 shift_bits;
116 		int		 tbl_bits;
117 		int		 tree_used;
118 		int		 tree_avail;
119 		/* Direct access table. */
120 		uint16_t	*tbl;
121 		/* Binary tree table for extra bits over the direct access. */
122 		struct htree_t {
123 			uint16_t left;
124 			uint16_t right;
125 		}		*tree;
126 	}			 lt, pt;
127 
128 	int			 blocks_avail;
129 	int			 pos_pt_len_size;
130 	int			 pos_pt_len_bits;
131 	int			 literal_pt_len_size;
132 	int			 literal_pt_len_bits;
133 	int			 reading_position;
134 	int			 loop;
135 	int			 error;
136 };
137 
138 struct lzh_stream {
139 	const unsigned char	*next_in;
140 	int			 avail_in;
141 	int64_t			 total_in;
142 	const unsigned char	*ref_ptr;
143 	int			 avail_out;
144 	int64_t			 total_out;
145 	struct lzh_dec		*ds;
146 };
147 
148 struct lha {
149 	/* entry_bytes_remaining is the number of bytes we expect.	    */
150 	int64_t                  entry_offset;
151 	int64_t                  entry_bytes_remaining;
152 	int64_t			 entry_unconsumed;
153 	uint16_t		 entry_crc_calculated;
154 
155 	size_t			 header_size;	/* header size		    */
156 	unsigned char		 level;		/* header level		    */
157 	char			 method[3];	/* compress type	    */
158 	int64_t			 compsize;	/* compressed data size	    */
159 	int64_t			 origsize;	/* original file size	    */
160 	int			 setflag;
161 #define BIRTHTIME_IS_SET	1
162 #define ATIME_IS_SET		2
163 #define UNIX_MODE_IS_SET	4
164 #define CRC_IS_SET		8
165 	time_t			 birthtime;
166 	long			 birthtime_tv_nsec;
167 	time_t			 mtime;
168 	long			 mtime_tv_nsec;
169 	time_t			 atime;
170 	long			 atime_tv_nsec;
171 	mode_t			 mode;
172 	int64_t			 uid;
173 	int64_t			 gid;
174 	struct archive_string 	 uname;
175 	struct archive_string 	 gname;
176 	uint16_t		 header_crc;
177 	uint16_t		 crc;
178 	/* dirname and filename could be in different codepages */
179 	struct archive_string_conv *sconv_dir;
180 	struct archive_string_conv *sconv_fname;
181 	struct archive_string_conv *opt_sconv;
182 
183 	struct archive_string 	 dirname;
184 	struct archive_string 	 filename;
185 	struct archive_wstring	 ws;
186 
187 	unsigned char		 dos_attr;
188 
189 	/* Flag to mark progress that an archive was read their first header.*/
190 	char			 found_first_header;
191 	/* Flag to mark that indicates an empty directory. */
192 	char			 directory;
193 
194 	/* Flags to mark progress of decompression. */
195 	char			 decompress_init;
196 	char			 end_of_entry;
197 	char			 end_of_entry_cleanup;
198 	char			 entry_is_compressed;
199 
200 	char			 format_name[64];
201 
202 	struct lzh_stream	 strm;
203 };
204 
205 /*
206  * LHA header common member offset.
207  */
208 #define H_METHOD_OFFSET	2	/* Compress type. */
209 #define H_ATTR_OFFSET	19	/* DOS attribute. */
210 #define H_LEVEL_OFFSET	20	/* Header Level.  */
211 #define H_SIZE		22	/* Minimum header size. */
212 
213 static int      archive_read_format_lha_bid(struct archive_read *, int);
214 static int      archive_read_format_lha_options(struct archive_read *,
215 		    const char *, const char *);
216 static int	archive_read_format_lha_read_header(struct archive_read *,
217 		    struct archive_entry *);
218 static int	archive_read_format_lha_read_data(struct archive_read *,
219 		    const void **, size_t *, int64_t *);
220 static int	archive_read_format_lha_read_data_skip(struct archive_read *);
221 static int	archive_read_format_lha_cleanup(struct archive_read *);
222 
223 static void	lha_replace_path_separator(struct lha *,
224 		    struct archive_entry *);
225 static int	lha_read_file_header_0(struct archive_read *, struct lha *);
226 static int	lha_read_file_header_1(struct archive_read *, struct lha *);
227 static int	lha_read_file_header_2(struct archive_read *, struct lha *);
228 static int	lha_read_file_header_3(struct archive_read *, struct lha *);
229 static int	lha_read_file_extended_header(struct archive_read *,
230 		    struct lha *, uint16_t *, int, uint64_t, size_t *);
231 static size_t	lha_check_header_format(const void *);
232 static int	lha_skip_sfx(struct archive_read *);
233 static time_t	lha_dos_time(const unsigned char *);
234 static time_t	lha_win_time(uint64_t, long *);
235 static unsigned char	lha_calcsum(unsigned char, const void *,
236 		    int, size_t);
237 static int	lha_parse_linkname(struct archive_wstring *,
238 		    struct archive_wstring *);
239 static int	lha_read_data_none(struct archive_read *, const void **,
240 		    size_t *, int64_t *);
241 static int	lha_read_data_lzh(struct archive_read *, const void **,
242 		    size_t *, int64_t *);
243 static void	lha_crc16_init(void);
244 static uint16_t lha_crc16(uint16_t, const void *, size_t);
245 static int	lzh_decode_init(struct lzh_stream *, const char *);
246 static void	lzh_decode_free(struct lzh_stream *);
247 static int	lzh_decode(struct lzh_stream *, int);
248 static int	lzh_br_fillup(struct lzh_stream *, struct lzh_br *);
249 static int	lzh_huffman_init(struct huffman *, size_t, int);
250 static void	lzh_huffman_free(struct huffman *);
251 static int	lzh_read_pt_bitlen(struct lzh_stream *, int start, int end);
252 static int	lzh_make_fake_table(struct huffman *, uint16_t);
253 static int	lzh_make_huffman_table(struct huffman *);
254 static inline int lzh_decode_huffman(struct huffman *, unsigned);
255 static int	lzh_decode_huffman_tree(struct huffman *, unsigned, int);
256 
257 
258 int
archive_read_support_format_lha(struct archive * _a)259 archive_read_support_format_lha(struct archive *_a)
260 {
261 	struct archive_read *a = (struct archive_read *)_a;
262 	struct lha *lha;
263 	int r;
264 
265 	archive_check_magic(_a, ARCHIVE_READ_MAGIC,
266 	    ARCHIVE_STATE_NEW, "archive_read_support_format_lha");
267 
268 	lha = calloc(1, sizeof(*lha));
269 	if (lha == NULL) {
270 		archive_set_error(&a->archive, ENOMEM,
271 		    "Can't allocate lha data");
272 		return (ARCHIVE_FATAL);
273 	}
274 	archive_string_init(&lha->ws);
275 
276 	r = __archive_read_register_format(a,
277 	    lha,
278 	    "lha",
279 	    archive_read_format_lha_bid,
280 	    archive_read_format_lha_options,
281 	    archive_read_format_lha_read_header,
282 	    archive_read_format_lha_read_data,
283 	    archive_read_format_lha_read_data_skip,
284 	    NULL,
285 	    archive_read_format_lha_cleanup,
286 	    NULL,
287 	    NULL);
288 
289 	if (r != ARCHIVE_OK)
290 		free(lha);
291 	return (ARCHIVE_OK);
292 }
293 
294 static size_t
lha_check_header_format(const void * h)295 lha_check_header_format(const void *h)
296 {
297 	const unsigned char *p = h;
298 	size_t next_skip_bytes;
299 
300 	switch (p[H_METHOD_OFFSET+3]) {
301 	/*
302 	 * "-lh0-" ... "-lh7-" "-lhd-"
303 	 * "-lzs-" "-lz5-"
304 	 */
305 	case '0': case '1': case '2': case '3':
306 	case '4': case '5': case '6': case '7':
307 	case 'd':
308 	case 's':
309 		next_skip_bytes = 4;
310 
311 		/* b0 == 0 means the end of an LHa archive file.	*/
312 		if (p[0] == 0)
313 			break;
314 		if (p[H_METHOD_OFFSET] != '-' || p[H_METHOD_OFFSET+1] != 'l'
315 		    ||  p[H_METHOD_OFFSET+4] != '-')
316 			break;
317 
318 		if (p[H_METHOD_OFFSET+2] == 'h') {
319 			/* "-lh?-" */
320 			if (p[H_METHOD_OFFSET+3] == 's')
321 				break;
322 			if (p[H_LEVEL_OFFSET] == 0)
323 				return (0);
324 			if (p[H_LEVEL_OFFSET] <= 3 && p[H_ATTR_OFFSET] == 0x20)
325 				return (0);
326 		}
327 		if (p[H_METHOD_OFFSET+2] == 'z') {
328 			/* LArc extensions: -lzs-,-lz4- and -lz5- */
329 			if (p[H_LEVEL_OFFSET] != 0)
330 				break;
331 			if (p[H_METHOD_OFFSET+3] == 's'
332 			    || p[H_METHOD_OFFSET+3] == '4'
333 			    || p[H_METHOD_OFFSET+3] == '5')
334 				return (0);
335 		}
336 		break;
337 	case 'h': next_skip_bytes = 1; break;
338 	case 'z': next_skip_bytes = 1; break;
339 	case 'l': next_skip_bytes = 2; break;
340 	case '-': next_skip_bytes = 3; break;
341 	default : next_skip_bytes = 4; break;
342 	}
343 
344 	return (next_skip_bytes);
345 }
346 
347 static int
archive_read_format_lha_bid(struct archive_read * a,int best_bid)348 archive_read_format_lha_bid(struct archive_read *a, int best_bid)
349 {
350 	const char *p;
351 	const void *buff;
352 	ssize_t bytes_avail, offset, window;
353 	size_t next;
354 
355 	/* If there's already a better bid than we can ever
356 	   make, don't bother testing. */
357 	if (best_bid > 30)
358 		return (-1);
359 
360 	if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL)
361 		return (-1);
362 
363 	if (lha_check_header_format(p) == 0)
364 		return (30);
365 
366 	if (p[0] == 'M' && p[1] == 'Z') {
367 		/* PE file */
368 		offset = 0;
369 		window = 4096;
370 		while (offset < (1024 * 20)) {
371 			buff = __archive_read_ahead(a, offset + window,
372 			    &bytes_avail);
373 			if (buff == NULL) {
374 				/* Remaining bytes are less than window. */
375 				window >>= 1;
376 				if (window < (H_SIZE + 3))
377 					return (0);
378 				continue;
379 			}
380 			p = (const char *)buff + offset;
381 			while (p + H_SIZE < (const char *)buff + bytes_avail) {
382 				if ((next = lha_check_header_format(p)) == 0)
383 					return (30);
384 				p += next;
385 			}
386 			offset = p - (const char *)buff;
387 		}
388 	}
389 	return (0);
390 }
391 
392 static int
archive_read_format_lha_options(struct archive_read * a,const char * key,const char * val)393 archive_read_format_lha_options(struct archive_read *a,
394     const char *key, const char *val)
395 {
396 	struct lha *lha;
397 	int ret = ARCHIVE_FAILED;
398 
399 	lha = (struct lha *)(a->format->data);
400 	if (strcmp(key, "hdrcharset")  == 0) {
401 		if (val == NULL || val[0] == 0)
402 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
403 			    "lha: hdrcharset option needs a character-set name");
404 		else {
405 			lha->opt_sconv =
406 			    archive_string_conversion_from_charset(
407 				&a->archive, val, 0);
408 			if (lha->opt_sconv != NULL)
409 				ret = ARCHIVE_OK;
410 			else
411 				ret = ARCHIVE_FATAL;
412 		}
413 		return (ret);
414 	}
415 
416 	/* Note: The "warn" return is just to inform the options
417 	 * supervisor that we didn't handle it.  It will generate
418 	 * a suitable error if no one used this option. */
419 	return (ARCHIVE_WARN);
420 }
421 
422 static int
lha_skip_sfx(struct archive_read * a)423 lha_skip_sfx(struct archive_read *a)
424 {
425 	const void *h;
426 	const char *p, *q;
427 	size_t next, skip;
428 	ssize_t bytes, window;
429 
430 	window = 4096;
431 	for (;;) {
432 		h = __archive_read_ahead(a, window, &bytes);
433 		if (h == NULL) {
434 			/* Remaining bytes are less than window. */
435 			window >>= 1;
436 			if (window < (H_SIZE + 3))
437 				goto fatal;
438 			continue;
439 		}
440 		if (bytes < H_SIZE)
441 			goto fatal;
442 		p = h;
443 		q = p + bytes;
444 
445 		/*
446 		 * Scan ahead until we find something that looks
447 		 * like the lha header.
448 		 */
449 		while (p + H_SIZE < q) {
450 			if ((next = lha_check_header_format(p)) == 0) {
451 				skip = p - (const char *)h;
452 				__archive_read_consume(a, skip);
453 				return (ARCHIVE_OK);
454 			}
455 			p += next;
456 		}
457 		skip = p - (const char *)h;
458 		__archive_read_consume(a, skip);
459 	}
460 fatal:
461 	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
462 	    "Couldn't find out LHa header");
463 	return (ARCHIVE_FATAL);
464 }
465 
466 static int
truncated_error(struct archive_read * a)467 truncated_error(struct archive_read *a)
468 {
469 	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
470 	    "Truncated LHa header");
471 	return (ARCHIVE_FATAL);
472 }
473 
474 static int
archive_read_format_lha_read_header(struct archive_read * a,struct archive_entry * entry)475 archive_read_format_lha_read_header(struct archive_read *a,
476     struct archive_entry *entry)
477 {
478 	struct archive_wstring linkname;
479 	struct archive_wstring pathname;
480 	struct lha *lha;
481 	const unsigned char *p;
482 	const char *signature;
483 	int err;
484 	struct archive_mstring conv_buffer;
485 	const wchar_t *conv_buffer_p;
486 
487 	lha_crc16_init();
488 
489 	a->archive.archive_format = ARCHIVE_FORMAT_LHA;
490 	if (a->archive.archive_format_name == NULL)
491 		a->archive.archive_format_name = "lha";
492 
493 	lha = (struct lha *)(a->format->data);
494 	lha->decompress_init = 0;
495 	lha->end_of_entry = 0;
496 	lha->end_of_entry_cleanup = 0;
497 	lha->entry_unconsumed = 0;
498 
499 	if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL) {
500 		/*
501 		 * LHa archiver added 0 to the tail of its archive file as
502 		 * the mark of the end of the archive.
503 		 */
504 		signature = __archive_read_ahead(a, sizeof(signature[0]), NULL);
505 		if (signature == NULL || signature[0] == 0)
506 			return (ARCHIVE_EOF);
507 		return (truncated_error(a));
508 	}
509 
510 	signature = (const char *)p;
511 	if (lha->found_first_header == 0 &&
512 	    signature[0] == 'M' && signature[1] == 'Z') {
513                 /* This is an executable?  Must be self-extracting... 	*/
514 		err = lha_skip_sfx(a);
515 		if (err < ARCHIVE_WARN)
516 			return (err);
517 
518 		if ((p = __archive_read_ahead(a, sizeof(*p), NULL)) == NULL)
519 			return (truncated_error(a));
520 		signature = (const char *)p;
521 	}
522 	/* signature[0] == 0 means the end of an LHa archive file. */
523 	if (signature[0] == 0)
524 		return (ARCHIVE_EOF);
525 
526 	/*
527 	 * Check the header format and method type.
528 	 */
529 	if (lha_check_header_format(p) != 0) {
530 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
531 		    "Bad LHa file");
532 		return (ARCHIVE_FATAL);
533 	}
534 
535 	/* We've found the first header. */
536 	lha->found_first_header = 1;
537 	/* Set a default value and common data */
538 	lha->header_size = 0;
539 	lha->level = p[H_LEVEL_OFFSET];
540 	lha->method[0] = p[H_METHOD_OFFSET+1];
541 	lha->method[1] = p[H_METHOD_OFFSET+2];
542 	lha->method[2] = p[H_METHOD_OFFSET+3];
543 	if (memcmp(lha->method, "lhd", 3) == 0)
544 		lha->directory = 1;
545 	else
546 		lha->directory = 0;
547 	if (memcmp(lha->method, "lh0", 3) == 0 ||
548 	    memcmp(lha->method, "lz4", 3) == 0)
549 		lha->entry_is_compressed = 0;
550 	else
551 		lha->entry_is_compressed = 1;
552 
553 	lha->compsize = 0;
554 	lha->origsize = 0;
555 	lha->setflag = 0;
556 	lha->birthtime = 0;
557 	lha->birthtime_tv_nsec = 0;
558 	lha->mtime = 0;
559 	lha->mtime_tv_nsec = 0;
560 	lha->atime = 0;
561 	lha->atime_tv_nsec = 0;
562 	lha->mode = (lha->directory)? 0777 : 0666;
563 	lha->uid = 0;
564 	lha->gid = 0;
565 	archive_string_empty(&lha->dirname);
566 	archive_string_empty(&lha->filename);
567 	lha->dos_attr = 0;
568 	if (lha->opt_sconv != NULL) {
569 		lha->sconv_dir = lha->opt_sconv;
570 		lha->sconv_fname = lha->opt_sconv;
571 	} else {
572 		lha->sconv_dir = NULL;
573 		lha->sconv_fname = NULL;
574 	}
575 
576 	switch (p[H_LEVEL_OFFSET]) {
577 	case 0:
578 		err = lha_read_file_header_0(a, lha);
579 		break;
580 	case 1:
581 		err = lha_read_file_header_1(a, lha);
582 		break;
583 	case 2:
584 		err = lha_read_file_header_2(a, lha);
585 		break;
586 	case 3:
587 		err = lha_read_file_header_3(a, lha);
588 		break;
589 	default:
590 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
591 		    "Unsupported LHa header level %d", p[H_LEVEL_OFFSET]);
592 		err = ARCHIVE_FATAL;
593 		break;
594 	}
595 	if (err < ARCHIVE_WARN)
596 		return (err);
597 
598 
599 	if (!lha->directory && archive_strlen(&lha->filename) == 0)
600 		/* The filename has not been set */
601 		return (truncated_error(a));
602 
603 	/*
604 	 * Make a pathname from a dirname and a filename, after converting to Unicode.
605 	 * This is because codepages might differ between dirname and filename.
606 	*/
607 	archive_string_init(&pathname);
608 	archive_string_init(&linkname);
609 	archive_string_init(&conv_buffer.aes_mbs);
610 	archive_string_init(&conv_buffer.aes_mbs_in_locale);
611 	archive_string_init(&conv_buffer.aes_utf8);
612 	archive_string_init(&conv_buffer.aes_wcs);
613 	if (0 != archive_mstring_copy_mbs_len_l(&conv_buffer, lha->dirname.s, lha->dirname.length, lha->sconv_dir)) {
614 		archive_set_error(&a->archive,
615 			ARCHIVE_ERRNO_FILE_FORMAT,
616 			"Pathname cannot be converted "
617 			"from %s to Unicode.",
618 			archive_string_conversion_charset_name(lha->sconv_dir));
619 		err = ARCHIVE_FATAL;
620 	} else if (0 != archive_mstring_get_wcs(&a->archive, &conv_buffer, &conv_buffer_p))
621 		err = ARCHIVE_FATAL;
622 	if (err == ARCHIVE_FATAL) {
623 		archive_mstring_clean(&conv_buffer);
624 		archive_wstring_free(&pathname);
625 		archive_wstring_free(&linkname);
626 		return (err);
627 	}
628 	archive_wstring_copy(&pathname, &conv_buffer.aes_wcs);
629 
630 	archive_string_empty(&conv_buffer.aes_mbs);
631 	archive_string_empty(&conv_buffer.aes_mbs_in_locale);
632 	archive_string_empty(&conv_buffer.aes_utf8);
633 	archive_wstring_empty(&conv_buffer.aes_wcs);
634 	if (0 != archive_mstring_copy_mbs_len_l(&conv_buffer, lha->filename.s, lha->filename.length, lha->sconv_fname)) {
635 		archive_set_error(&a->archive,
636 			ARCHIVE_ERRNO_FILE_FORMAT,
637 			"Pathname cannot be converted "
638 			"from %s to Unicode.",
639 			archive_string_conversion_charset_name(lha->sconv_fname));
640 		err = ARCHIVE_FATAL;
641 	}
642 	else if (0 != archive_mstring_get_wcs(&a->archive, &conv_buffer, &conv_buffer_p))
643 		err = ARCHIVE_FATAL;
644 	if (err == ARCHIVE_FATAL) {
645 		archive_mstring_clean(&conv_buffer);
646 		archive_wstring_free(&pathname);
647 		archive_wstring_free(&linkname);
648 		return (err);
649 	}
650 	archive_wstring_concat(&pathname, &conv_buffer.aes_wcs);
651 	archive_mstring_clean(&conv_buffer);
652 
653 	if ((lha->mode & AE_IFMT) == AE_IFLNK) {
654 		/*
655 	 	 * Extract the symlink-name if it's included in the pathname.
656 	 	 */
657 		if (!lha_parse_linkname(&linkname, &pathname)) {
658 			/* We couldn't get the symlink-name. */
659 			archive_set_error(&a->archive,
660 		    	    ARCHIVE_ERRNO_FILE_FORMAT,
661 			    "Unknown symlink-name");
662 			archive_wstring_free(&pathname);
663 			archive_wstring_free(&linkname);
664 			return (ARCHIVE_FAILED);
665 		}
666 	} else {
667 		/*
668 		 * Make sure a file-type is set.
669 		 * The mode has been overridden if it is in the extended data.
670 		 */
671 		lha->mode = (lha->mode & ~AE_IFMT) |
672 		    ((lha->directory)? AE_IFDIR: AE_IFREG);
673 	}
674 	if ((lha->setflag & UNIX_MODE_IS_SET) == 0 &&
675 	    (lha->dos_attr & 1) != 0)
676 		lha->mode &= ~(0222);/* read only. */
677 
678 	/*
679 	 * Set basic file parameters.
680 	 */
681 	archive_entry_copy_pathname_w(entry, pathname.s);
682 	archive_wstring_free(&pathname);
683 	if (archive_strlen(&linkname) > 0) {
684 		archive_entry_copy_symlink_w(entry, linkname.s);
685 	} else
686 		archive_entry_set_symlink(entry, NULL);
687 	archive_wstring_free(&linkname);
688 	/*
689 	 * When a header level is 0, there is a possibility that
690 	 * a pathname and a symlink has '\' character, a directory
691 	 * separator in DOS/Windows. So we should convert it to '/'.
692 	 */
693 	if (p[H_LEVEL_OFFSET] == 0)
694 		lha_replace_path_separator(lha, entry);
695 
696 	archive_entry_set_mode(entry, lha->mode);
697 	archive_entry_set_uid(entry, lha->uid);
698 	archive_entry_set_gid(entry, lha->gid);
699 	if (archive_strlen(&lha->uname) > 0)
700 		archive_entry_set_uname(entry, lha->uname.s);
701 	if (archive_strlen(&lha->gname) > 0)
702 		archive_entry_set_gname(entry, lha->gname.s);
703 	if (lha->setflag & BIRTHTIME_IS_SET) {
704 		archive_entry_set_birthtime(entry, lha->birthtime,
705 		    lha->birthtime_tv_nsec);
706 		archive_entry_set_ctime(entry, lha->birthtime,
707 		    lha->birthtime_tv_nsec);
708 	} else {
709 		archive_entry_unset_birthtime(entry);
710 		archive_entry_unset_ctime(entry);
711 	}
712 	archive_entry_set_mtime(entry, lha->mtime, lha->mtime_tv_nsec);
713 	if (lha->setflag & ATIME_IS_SET)
714 		archive_entry_set_atime(entry, lha->atime,
715 		    lha->atime_tv_nsec);
716 	else
717 		archive_entry_unset_atime(entry);
718 	if (lha->directory || archive_entry_symlink(entry) != NULL)
719 		archive_entry_unset_size(entry);
720 	else
721 		archive_entry_set_size(entry, lha->origsize);
722 
723 	/*
724 	 * Prepare variables used to read a file content.
725 	 */
726 	lha->entry_bytes_remaining = lha->compsize;
727 	if (lha->entry_bytes_remaining < 0) {
728 		archive_set_error(&a->archive,
729 		    ARCHIVE_ERRNO_FILE_FORMAT,
730 		    "Invalid LHa entry size");
731 		return (ARCHIVE_FATAL);
732 	}
733 	lha->entry_offset = 0;
734 	lha->entry_crc_calculated = 0;
735 
736 	/*
737 	 * This file does not have a content.
738 	 */
739 	if (lha->directory || lha->compsize == 0)
740 		lha->end_of_entry = 1;
741 
742 	snprintf(lha->format_name, sizeof(lha->format_name), "lha -%c%c%c-",
743 	    lha->method[0], lha->method[1], lha->method[2]);
744 	a->archive.archive_format_name = lha->format_name;
745 
746 	return (err);
747 }
748 
749 /*
750  * Replace a DOS path separator '\' by a character '/'.
751  * Some multi-byte character set have  a character '\' in its second byte.
752  */
753 static void
lha_replace_path_separator(struct lha * lha,struct archive_entry * entry)754 lha_replace_path_separator(struct lha *lha, struct archive_entry *entry)
755 {
756 	const wchar_t *wp;
757 	size_t i;
758 
759 	if ((wp = archive_entry_pathname_w(entry)) != NULL) {
760 		archive_wstrcpy(&(lha->ws), wp);
761 		for (i = 0; i < archive_strlen(&(lha->ws)); i++) {
762 			if (lha->ws.s[i] == L'\\')
763 				lha->ws.s[i] = L'/';
764 		}
765 		archive_entry_copy_pathname_w(entry, lha->ws.s);
766 	}
767 
768 	if ((wp = archive_entry_symlink_w(entry)) != NULL) {
769 		archive_wstrcpy(&(lha->ws), wp);
770 		for (i = 0; i < archive_strlen(&(lha->ws)); i++) {
771 			if (lha->ws.s[i] == L'\\')
772 				lha->ws.s[i] = L'/';
773 		}
774 		archive_entry_copy_symlink_w(entry, lha->ws.s);
775 	}
776 }
777 
778 /*
779  * Header 0 format
780  *
781  * +0              +1         +2               +7                  +11
782  * +---------------+----------+----------------+-------------------+
783  * |header size(*1)|header sum|compression type|compressed size(*2)|
784  * +---------------+----------+----------------+-------------------+
785  *                             <---------------------(*1)----------*
786  *
787  * +11               +15       +17       +19            +20              +21
788  * +-----------------+---------+---------+--------------+----------------+
789  * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=0)|
790  * +-----------------+---------+---------+--------------+----------------+
791  * *--------------------------------(*1)---------------------------------*
792  *
793  * +21             +22       +22+(*3)   +22+(*3)+2       +22+(*3)+2+(*4)
794  * +---------------+---------+----------+----------------+------------------+
795  * |name length(*3)|file name|file CRC16|extra header(*4)|  compressed data |
796  * +---------------+---------+----------+----------------+------------------+
797  *                  <--(*3)->                             <------(*2)------>
798  * *----------------------(*1)-------------------------->
799  *
800  */
801 #define H0_HEADER_SIZE_OFFSET	0
802 #define H0_HEADER_SUM_OFFSET	1
803 #define H0_COMP_SIZE_OFFSET	7
804 #define H0_ORIG_SIZE_OFFSET	11
805 #define H0_DOS_TIME_OFFSET	15
806 #define H0_NAME_LEN_OFFSET	21
807 #define H0_FILE_NAME_OFFSET	22
808 #define H0_FIXED_SIZE		24
809 static int
lha_read_file_header_0(struct archive_read * a,struct lha * lha)810 lha_read_file_header_0(struct archive_read *a, struct lha *lha)
811 {
812 	const unsigned char *p;
813 	int extdsize, namelen;
814 	unsigned char headersum, sum_calculated;
815 
816 	if ((p = __archive_read_ahead(a, H0_FIXED_SIZE, NULL)) == NULL)
817 		return (truncated_error(a));
818 	lha->header_size = p[H0_HEADER_SIZE_OFFSET] + 2;
819 	headersum = p[H0_HEADER_SUM_OFFSET];
820 	lha->compsize = archive_le32dec(p + H0_COMP_SIZE_OFFSET);
821 	lha->origsize = archive_le32dec(p + H0_ORIG_SIZE_OFFSET);
822 	lha->mtime = lha_dos_time(p + H0_DOS_TIME_OFFSET);
823 	namelen = p[H0_NAME_LEN_OFFSET];
824 	extdsize = (int)lha->header_size - H0_FIXED_SIZE - namelen;
825 	if ((namelen > 221 || extdsize < 0) && extdsize != -2) {
826 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
827 		    "Invalid LHa header");
828 		return (ARCHIVE_FATAL);
829 	}
830 	if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL)
831 		return (truncated_error(a));
832 
833 	archive_strncpy(&lha->filename, p + H0_FILE_NAME_OFFSET, namelen);
834 	/* When extdsize == -2, A CRC16 value is not present in the header. */
835 	if (extdsize >= 0) {
836 		lha->crc = archive_le16dec(p + H0_FILE_NAME_OFFSET + namelen);
837 		lha->setflag |= CRC_IS_SET;
838 	}
839 	sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2);
840 
841 	/* Read an extended header */
842 	if (extdsize > 0) {
843 		/* This extended data is set by 'LHa for UNIX' only.
844 		 * Maybe fixed size.
845 		 */
846 		p += H0_FILE_NAME_OFFSET + namelen + 2;
847 		if (p[0] == 'U' && extdsize == 12) {
848 			/* p[1] is a minor version. */
849 			lha->mtime = archive_le32dec(&p[2]);
850 			lha->mode = archive_le16dec(&p[6]);
851 			lha->uid = archive_le16dec(&p[8]);
852 			lha->gid = archive_le16dec(&p[10]);
853 			lha->setflag |= UNIX_MODE_IS_SET;
854 		}
855 	}
856 	__archive_read_consume(a, lha->header_size);
857 
858 	if (sum_calculated != headersum) {
859 		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
860 		    "LHa header sum error");
861 		return (ARCHIVE_FATAL);
862 	}
863 
864 	return (ARCHIVE_OK);
865 }
866 
867 /*
868  * Header 1 format
869  *
870  * +0              +1         +2               +7            +11
871  * +---------------+----------+----------------+-------------+
872  * |header size(*1)|header sum|compression type|skip size(*2)|
873  * +---------------+----------+----------------+-------------+
874  *                             <---------------(*1)----------*
875  *
876  * +11               +15       +17       +19            +20              +21
877  * +-----------------+---------+---------+--------------+----------------+
878  * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=1)|
879  * +-----------------+---------+---------+--------------+----------------+
880  * *-------------------------------(*1)----------------------------------*
881  *
882  * +21             +22       +22+(*3)   +22+(*3)+2  +22+(*3)+3  +22+(*3)+3+(*4)
883  * +---------------+---------+----------+-----------+-----------+
884  * |name length(*3)|file name|file CRC16|  creator  |padding(*4)|
885  * +---------------+---------+----------+-----------+-----------+
886  *                  <--(*3)->
887  * *----------------------------(*1)----------------------------*
888  *
889  * +22+(*3)+3+(*4)  +22+(*3)+3+(*4)+2     +22+(*3)+3+(*4)+2+(*5)
890  * +----------------+---------------------+------------------------+
891  * |next header size| extended header(*5) |     compressed data    |
892  * +----------------+---------------------+------------------------+
893  * *------(*1)-----> <--------------------(*2)-------------------->
894  */
895 #define H1_HEADER_SIZE_OFFSET	0
896 #define H1_HEADER_SUM_OFFSET	1
897 #define H1_COMP_SIZE_OFFSET	7
898 #define H1_ORIG_SIZE_OFFSET	11
899 #define H1_DOS_TIME_OFFSET	15
900 #define H1_NAME_LEN_OFFSET	21
901 #define H1_FILE_NAME_OFFSET	22
902 #define H1_FIXED_SIZE		27
903 static int
lha_read_file_header_1(struct archive_read * a,struct lha * lha)904 lha_read_file_header_1(struct archive_read *a, struct lha *lha)
905 {
906 	const unsigned char *p;
907 	size_t extdsize;
908 	int i, err, err2;
909 	int namelen, padding;
910 	unsigned char headersum, sum_calculated;
911 
912 	err = ARCHIVE_OK;
913 
914 	if ((p = __archive_read_ahead(a, H1_FIXED_SIZE, NULL)) == NULL)
915 		return (truncated_error(a));
916 
917 	lha->header_size = p[H1_HEADER_SIZE_OFFSET] + 2;
918 	headersum = p[H1_HEADER_SUM_OFFSET];
919 	/* Note: An extended header size is included in a compsize. */
920 	lha->compsize = archive_le32dec(p + H1_COMP_SIZE_OFFSET);
921 	lha->origsize = archive_le32dec(p + H1_ORIG_SIZE_OFFSET);
922 	lha->mtime = lha_dos_time(p + H1_DOS_TIME_OFFSET);
923 	namelen = p[H1_NAME_LEN_OFFSET];
924 	/* Calculate a padding size. The result will be normally 0 only(?) */
925 	padding = ((int)lha->header_size) - H1_FIXED_SIZE - namelen;
926 
927 	if (namelen > 230 || padding < 0)
928 		goto invalid;
929 
930 	if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL)
931 		return (truncated_error(a));
932 
933 	for (i = 0; i < namelen; i++) {
934 		if (p[i + H1_FILE_NAME_OFFSET] == 0xff)
935 			goto invalid;/* Invalid filename. */
936 	}
937 	archive_strncpy(&lha->filename, p + H1_FILE_NAME_OFFSET, namelen);
938 	lha->crc = archive_le16dec(p + H1_FILE_NAME_OFFSET + namelen);
939 	lha->setflag |= CRC_IS_SET;
940 
941 	sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2);
942 	/* Consume used bytes but not include `next header size' data
943 	 * since it will be consumed in lha_read_file_extended_header(). */
944 	__archive_read_consume(a, lha->header_size - 2);
945 
946 	/* Read extended headers */
947 	err2 = lha_read_file_extended_header(a, lha, NULL, 2,
948 	    (uint64_t)(lha->compsize + 2), &extdsize);
949 	if (err2 < ARCHIVE_WARN)
950 		return (err2);
951 	if (err2 < err)
952 		err = err2;
953 	/* Get a real compressed file size. */
954 	lha->compsize -= extdsize - 2;
955 
956 	if (lha->compsize < 0)
957 		goto invalid;	/* Invalid compressed file size */
958 
959 	if (sum_calculated != headersum) {
960 		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
961 		    "LHa header sum error");
962 		return (ARCHIVE_FATAL);
963 	}
964 	return (err);
965 invalid:
966 	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
967 	    "Invalid LHa header");
968 	return (ARCHIVE_FATAL);
969 }
970 
971 /*
972  * Header 2 format
973  *
974  * +0              +2               +7                  +11               +15
975  * +---------------+----------------+-------------------+-----------------+
976  * |header size(*1)|compression type|compressed size(*2)|uncompressed size|
977  * +---------------+----------------+-------------------+-----------------+
978  *  <--------------------------------(*1)---------------------------------*
979  *
980  * +15               +19          +20              +21        +23         +24
981  * +-----------------+------------+----------------+----------+-----------+
982  * |data/time(time_t)| 0x20 fixed |header level(=2)|file CRC16|  creator  |
983  * +-----------------+------------+----------------+----------+-----------+
984  * *---------------------------------(*1)---------------------------------*
985  *
986  * +24              +26                 +26+(*3)      +26+(*3)+(*4)
987  * +----------------+-------------------+-------------+-------------------+
988  * |next header size|extended header(*3)| padding(*4) |  compressed data  |
989  * +----------------+-------------------+-------------+-------------------+
990  * *--------------------------(*1)-------------------> <------(*2)------->
991  *
992  */
993 #define H2_HEADER_SIZE_OFFSET	0
994 #define H2_COMP_SIZE_OFFSET	7
995 #define H2_ORIG_SIZE_OFFSET	11
996 #define H2_TIME_OFFSET		15
997 #define H2_CRC_OFFSET		21
998 #define H2_FIXED_SIZE		24
999 static int
lha_read_file_header_2(struct archive_read * a,struct lha * lha)1000 lha_read_file_header_2(struct archive_read *a, struct lha *lha)
1001 {
1002 	const unsigned char *p;
1003 	size_t extdsize;
1004 	int err, padding;
1005 	uint16_t header_crc;
1006 
1007 	if ((p = __archive_read_ahead(a, H2_FIXED_SIZE, NULL)) == NULL)
1008 		return (truncated_error(a));
1009 
1010 	lha->header_size =archive_le16dec(p + H2_HEADER_SIZE_OFFSET);
1011 	lha->compsize = archive_le32dec(p + H2_COMP_SIZE_OFFSET);
1012 	lha->origsize = archive_le32dec(p + H2_ORIG_SIZE_OFFSET);
1013 	lha->mtime = archive_le32dec(p + H2_TIME_OFFSET);
1014 	lha->crc = archive_le16dec(p + H2_CRC_OFFSET);
1015 	lha->setflag |= CRC_IS_SET;
1016 
1017 	if (lha->header_size < H2_FIXED_SIZE) {
1018 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1019 		    "Invalid LHa header size");
1020 		return (ARCHIVE_FATAL);
1021 	}
1022 
1023 	header_crc = lha_crc16(0, p, H2_FIXED_SIZE);
1024 	__archive_read_consume(a, H2_FIXED_SIZE);
1025 
1026 	/* Read extended headers */
1027 	err = lha_read_file_extended_header(a, lha, &header_crc, 2,
1028 		  lha->header_size - H2_FIXED_SIZE, &extdsize);
1029 	if (err < ARCHIVE_WARN)
1030 		return (err);
1031 
1032 	/* Calculate a padding size. The result will be normally 0 or 1. */
1033 	padding = (int)lha->header_size - (int)(H2_FIXED_SIZE + extdsize);
1034 	if (padding > 0) {
1035 		if ((p = __archive_read_ahead(a, padding, NULL)) == NULL)
1036 			return (truncated_error(a));
1037 		header_crc = lha_crc16(header_crc, p, padding);
1038 		__archive_read_consume(a, padding);
1039 	}
1040 
1041 	if (header_crc != lha->header_crc) {
1042 #ifndef DONT_FAIL_ON_CRC_ERROR
1043 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1044 		    "LHa header CRC error");
1045 		return (ARCHIVE_FATAL);
1046 #endif
1047 	}
1048 	return (err);
1049 }
1050 
1051 /*
1052  * Header 3 format
1053  *
1054  * +0           +2               +7                  +11               +15
1055  * +------------+----------------+-------------------+-----------------+
1056  * | 0x04 fixed |compression type|compressed size(*2)|uncompressed size|
1057  * +------------+----------------+-------------------+-----------------+
1058  *  <-------------------------------(*1)-------------------------------*
1059  *
1060  * +15               +19          +20              +21        +23         +24
1061  * +-----------------+------------+----------------+----------+-----------+
1062  * |date/time(time_t)| 0x20 fixed |header level(=3)|file CRC16|  creator  |
1063  * +-----------------+------------+----------------+----------+-----------+
1064  * *--------------------------------(*1)----------------------------------*
1065  *
1066  * +24             +28              +32                 +32+(*3)
1067  * +---------------+----------------+-------------------+-----------------+
1068  * |header size(*1)|next header size|extended header(*3)| compressed data |
1069  * +---------------+----------------+-------------------+-----------------+
1070  * *------------------------(*1)-----------------------> <------(*2)----->
1071  *
1072  */
1073 #define H3_FIELD_LEN_OFFSET	0
1074 #define H3_COMP_SIZE_OFFSET	7
1075 #define H3_ORIG_SIZE_OFFSET	11
1076 #define H3_TIME_OFFSET		15
1077 #define H3_CRC_OFFSET		21
1078 #define H3_HEADER_SIZE_OFFSET	24
1079 #define H3_FIXED_SIZE		28
1080 static int
lha_read_file_header_3(struct archive_read * a,struct lha * lha)1081 lha_read_file_header_3(struct archive_read *a, struct lha *lha)
1082 {
1083 	const unsigned char *p;
1084 	size_t extdsize;
1085 	int err;
1086 	uint16_t header_crc;
1087 
1088 	if ((p = __archive_read_ahead(a, H3_FIXED_SIZE, NULL)) == NULL)
1089 		return (truncated_error(a));
1090 
1091 	if (archive_le16dec(p + H3_FIELD_LEN_OFFSET) != 4)
1092 		goto invalid;
1093 	lha->header_size =archive_le32dec(p + H3_HEADER_SIZE_OFFSET);
1094 	lha->compsize = archive_le32dec(p + H3_COMP_SIZE_OFFSET);
1095 	lha->origsize = archive_le32dec(p + H3_ORIG_SIZE_OFFSET);
1096 	lha->mtime = archive_le32dec(p + H3_TIME_OFFSET);
1097 	lha->crc = archive_le16dec(p + H3_CRC_OFFSET);
1098 	lha->setflag |= CRC_IS_SET;
1099 
1100 	if (lha->header_size < H3_FIXED_SIZE + 4)
1101 		goto invalid;
1102 	header_crc = lha_crc16(0, p, H3_FIXED_SIZE);
1103 	__archive_read_consume(a, H3_FIXED_SIZE);
1104 
1105 	/* Read extended headers */
1106 	err = lha_read_file_extended_header(a, lha, &header_crc, 4,
1107 		  lha->header_size - H3_FIXED_SIZE, &extdsize);
1108 	if (err < ARCHIVE_WARN)
1109 		return (err);
1110 
1111 	if (header_crc != lha->header_crc) {
1112 #ifndef DONT_FAIL_ON_CRC_ERROR
1113 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1114 		    "LHa header CRC error");
1115 		return (ARCHIVE_FATAL);
1116 #endif
1117 	}
1118 	return (err);
1119 invalid:
1120 	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1121 	    "Invalid LHa header");
1122 	return (ARCHIVE_FATAL);
1123 }
1124 
1125 /*
1126  * Extended header format
1127  *
1128  * +0             +2        +3  -- used in header 1 and 2
1129  * +0             +4        +5  -- used in header 3
1130  * +--------------+---------+-------------------+--------------+--
1131  * |ex-header size|header id|        data       |ex-header size| .......
1132  * +--------------+---------+-------------------+--------------+--
1133  *  <-------------( ex-header size)------------> <-- next extended header --*
1134  *
1135  * If the ex-header size is zero, it is the make of the end of extended
1136  * headers.
1137  *
1138  */
1139 static int
lha_read_file_extended_header(struct archive_read * a,struct lha * lha,uint16_t * crc,int sizefield_length,uint64_t limitsize,size_t * total_size)1140 lha_read_file_extended_header(struct archive_read *a, struct lha *lha,
1141     uint16_t *crc, int sizefield_length, uint64_t limitsize, size_t *total_size)
1142 {
1143 	const void *h;
1144 	const unsigned char *extdheader;
1145 	size_t	extdsize;
1146 	size_t	datasize;
1147 	unsigned int i;
1148 	unsigned char extdtype;
1149 
1150 #define EXT_HEADER_CRC		0x00		/* Header CRC and information*/
1151 #define EXT_FILENAME		0x01		/* Filename 		    */
1152 #define EXT_DIRECTORY		0x02		/* Directory name	    */
1153 #define EXT_DOS_ATTR		0x40		/* MS-DOS attribute	    */
1154 #define EXT_TIMESTAMP		0x41		/* Windows time stamp	    */
1155 #define EXT_FILESIZE		0x42		/* Large file size	    */
1156 #define EXT_TIMEZONE		0x43		/* Time zone		    */
1157 #define EXT_UTF16_FILENAME	0x44		/* UTF-16 filename 	    */
1158 #define EXT_UTF16_DIRECTORY	0x45		/* UTF-16 directory name    */
1159 #define EXT_CODEPAGE		0x46		/* Codepage		    */
1160 #define EXT_UNIX_MODE		0x50		/* File permission	    */
1161 #define EXT_UNIX_GID_UID	0x51		/* gid,uid		    */
1162 #define EXT_UNIX_GNAME		0x52		/* Group name		    */
1163 #define EXT_UNIX_UNAME		0x53		/* User name		    */
1164 #define EXT_UNIX_MTIME		0x54		/* Modified time	    */
1165 #define EXT_OS2_NEW_ATTR	0x7f		/* new attribute(OS/2 only) */
1166 #define EXT_NEW_ATTR		0xff		/* new attribute	    */
1167 
1168 	*total_size = sizefield_length;
1169 
1170 	for (;;) {
1171 		/* Read an extended header size. */
1172 		if ((h =
1173 		    __archive_read_ahead(a, sizefield_length, NULL)) == NULL)
1174 			return (truncated_error(a));
1175 		/* Check if the size is the zero indicates the end of the
1176 		 * extended header. */
1177 		if (sizefield_length == sizeof(uint16_t))
1178 			extdsize = archive_le16dec(h);
1179 		else
1180 			extdsize = archive_le32dec(h);
1181 		if (extdsize == 0) {
1182 			/* End of extended header */
1183 			if (crc != NULL)
1184 				*crc = lha_crc16(*crc, h, sizefield_length);
1185 			__archive_read_consume(a, sizefield_length);
1186 			return (ARCHIVE_OK);
1187 		}
1188 
1189 		/* Sanity check to the extended header size. */
1190 		if (((uint64_t)*total_size + extdsize) > limitsize ||
1191 		    extdsize <= (size_t)sizefield_length)
1192 			goto invalid;
1193 
1194 		/* Read the extended header. */
1195 		if ((h = __archive_read_ahead(a, extdsize, NULL)) == NULL)
1196 			return (truncated_error(a));
1197 		*total_size += extdsize;
1198 
1199 		extdheader = (const unsigned char *)h;
1200 		/* Get the extended header type. */
1201 		extdtype = extdheader[sizefield_length];
1202 		/* Calculate an extended data size. */
1203 		datasize = extdsize - (1 + sizefield_length);
1204 		/* Skip an extended header size field and type field. */
1205 		extdheader += sizefield_length + 1;
1206 
1207 		if (crc != NULL && extdtype != EXT_HEADER_CRC)
1208 			*crc = lha_crc16(*crc, h, extdsize);
1209 		switch (extdtype) {
1210 		case EXT_HEADER_CRC:
1211 			/* We only use a header CRC. Following data will not
1212 			 * be used. */
1213 			if (datasize >= 2) {
1214 				lha->header_crc = archive_le16dec(extdheader);
1215 				if (crc != NULL) {
1216 					static const char zeros[2] = {0, 0};
1217 					*crc = lha_crc16(*crc, h,
1218 					    extdsize - datasize);
1219 					/* CRC value itself as zero */
1220 					*crc = lha_crc16(*crc, zeros, 2);
1221 					*crc = lha_crc16(*crc,
1222 					    extdheader+2, datasize - 2);
1223 				}
1224 			}
1225 			break;
1226 		case EXT_FILENAME:
1227 			if (datasize == 0) {
1228 				/* maybe directory header */
1229 				archive_string_empty(&lha->filename);
1230 				break;
1231 			}
1232 			if (extdheader[0] == '\0')
1233 				goto invalid;
1234 			archive_strncpy(&lha->filename,
1235 			    (const char *)extdheader, datasize);
1236 			break;
1237 		case EXT_UTF16_FILENAME:
1238 			if (datasize == 0) {
1239 				/* maybe directory header */
1240 				archive_string_empty(&lha->filename);
1241 				break;
1242 			} else if (datasize & 1) {
1243 				/* UTF-16 characters take always 2 or 4 bytes */
1244 				goto invalid;
1245 			}
1246 			if (extdheader[0] == '\0')
1247 				goto invalid;
1248 			archive_string_empty(&lha->filename);
1249 			archive_array_append(&lha->filename,
1250 				(const char *)extdheader, datasize);
1251 			/* Setup a string conversion for a filename. */
1252 			lha->sconv_fname =
1253 			    archive_string_conversion_from_charset(&a->archive,
1254 			        "UTF-16LE", 1);
1255 			if (lha->sconv_fname == NULL)
1256 				return (ARCHIVE_FATAL);
1257 			break;
1258 		case EXT_DIRECTORY:
1259 			if (datasize == 0 || extdheader[0] == '\0')
1260 				/* no directory name data. exit this case. */
1261 				goto invalid;
1262 
1263 			archive_strncpy(&lha->dirname,
1264 		  	    (const char *)extdheader, datasize);
1265 			/*
1266 			 * Convert directory delimiter from 0xFF
1267 			 * to '/' for local system.
1268 	 		 */
1269 			for (i = 0; i < lha->dirname.length; i++) {
1270 				if ((unsigned char)lha->dirname.s[i] == 0xFF)
1271 					lha->dirname.s[i] = '/';
1272 			}
1273 			/* Is last character directory separator? */
1274 			if (lha->dirname.s[lha->dirname.length-1] != '/')
1275 				/* invalid directory data */
1276 				goto invalid;
1277 			break;
1278 		case EXT_UTF16_DIRECTORY:
1279 			/* UTF-16 characters take always 2 or 4 bytes */
1280 			if (datasize == 0 || (datasize & 1) ||
1281 			    extdheader[0] == '\0') {
1282 				/* no directory name data. exit this case. */
1283 				goto invalid;
1284 			}
1285 
1286 			archive_string_empty(&lha->dirname);
1287 			archive_array_append(&lha->dirname,
1288 				(const char *)extdheader, datasize);
1289 			lha->sconv_dir =
1290 			    archive_string_conversion_from_charset(&a->archive,
1291 			        "UTF-16LE", 1);
1292 			if (lha->sconv_dir == NULL)
1293 				return (ARCHIVE_FATAL);
1294 			else {
1295 				/*
1296 				 * Convert directory delimiter from 0xFFFF
1297 				 * to '/' for local system.
1298 				 */
1299 				uint16_t dirSep;
1300 				uint16_t d = 1;
1301 				if (archive_be16dec(&d) == 1)
1302 					dirSep = 0x2F00;
1303 				else
1304 					dirSep = 0x002F;
1305 
1306 				/* UTF-16LE character */
1307 				uint16_t *utf16name =
1308 				    (uint16_t *)lha->dirname.s;
1309 				for (i = 0; i < lha->dirname.length / 2; i++) {
1310 					if (utf16name[i] == 0xFFFF) {
1311 						utf16name[i] = dirSep;
1312 					}
1313 				}
1314 				/* Is last character directory separator? */
1315 				if (utf16name[lha->dirname.length / 2 - 1] !=
1316 				    dirSep) {
1317 					/* invalid directory data */
1318 					goto invalid;
1319 				}
1320 			}
1321 			break;
1322 		case EXT_DOS_ATTR:
1323 			if (datasize == 2)
1324 				lha->dos_attr = (unsigned char)
1325 				    (archive_le16dec(extdheader) & 0xff);
1326 			break;
1327 		case EXT_TIMESTAMP:
1328 			if (datasize == (sizeof(uint64_t) * 3)) {
1329 				lha->birthtime = lha_win_time(
1330 				    archive_le64dec(extdheader),
1331 				    &lha->birthtime_tv_nsec);
1332 				extdheader += sizeof(uint64_t);
1333 				lha->mtime = lha_win_time(
1334 				    archive_le64dec(extdheader),
1335 				    &lha->mtime_tv_nsec);
1336 				extdheader += sizeof(uint64_t);
1337 				lha->atime = lha_win_time(
1338 				    archive_le64dec(extdheader),
1339 				    &lha->atime_tv_nsec);
1340 				lha->setflag |= BIRTHTIME_IS_SET |
1341 				    ATIME_IS_SET;
1342 			}
1343 			break;
1344 		case EXT_FILESIZE:
1345 			if (datasize == sizeof(uint64_t) * 2) {
1346 				lha->compsize = archive_le64dec(extdheader);
1347 				extdheader += sizeof(uint64_t);
1348 				lha->origsize = archive_le64dec(extdheader);
1349 				if (lha->compsize < 0 || lha->origsize < 0)
1350 					goto invalid;
1351 			}
1352 			break;
1353 		case EXT_CODEPAGE:
1354 			/* Get an archived filename charset from codepage.
1355 			 * This overwrites the charset specified by
1356 			 * hdrcharset option. */
1357 			if (datasize == sizeof(uint32_t)) {
1358 				struct archive_string cp;
1359 				const char *charset;
1360 
1361 				archive_string_init(&cp);
1362 				switch (archive_le32dec(extdheader)) {
1363 				case 65001: /* UTF-8 */
1364 					charset = "UTF-8";
1365 					break;
1366 				default:
1367 					archive_string_sprintf(&cp, "CP%d",
1368 					    (int)archive_le32dec(extdheader));
1369 					charset = cp.s;
1370 					break;
1371 				}
1372 				lha->sconv_dir =
1373 				    archive_string_conversion_from_charset(
1374 					&(a->archive), charset, 1);
1375 				lha->sconv_fname =
1376 				    archive_string_conversion_from_charset(
1377 					&(a->archive), charset, 1);
1378 				archive_string_free(&cp);
1379 				if (lha->sconv_dir == NULL)
1380 					return (ARCHIVE_FATAL);
1381 				if (lha->sconv_fname == NULL)
1382 					return (ARCHIVE_FATAL);
1383 			}
1384 			break;
1385 		case EXT_UNIX_MODE:
1386 			if (datasize == sizeof(uint16_t)) {
1387 				lha->mode = archive_le16dec(extdheader);
1388 				lha->setflag |= UNIX_MODE_IS_SET;
1389 			}
1390 			break;
1391 		case EXT_UNIX_GID_UID:
1392 			if (datasize == (sizeof(uint16_t) * 2)) {
1393 				lha->gid = archive_le16dec(extdheader);
1394 				lha->uid = archive_le16dec(extdheader+2);
1395 			}
1396 			break;
1397 		case EXT_UNIX_GNAME:
1398 			if (datasize > 0)
1399 				archive_strncpy(&lha->gname,
1400 				    (const char *)extdheader, datasize);
1401 			break;
1402 		case EXT_UNIX_UNAME:
1403 			if (datasize > 0)
1404 				archive_strncpy(&lha->uname,
1405 				    (const char *)extdheader, datasize);
1406 			break;
1407 		case EXT_UNIX_MTIME:
1408 			if (datasize == sizeof(uint32_t))
1409 				lha->mtime = archive_le32dec(extdheader);
1410 			break;
1411 		case EXT_OS2_NEW_ATTR:
1412 			/* This extended header is OS/2 depend. */
1413 			if (datasize == 16) {
1414 				lha->dos_attr = (unsigned char)
1415 				    (archive_le16dec(extdheader) & 0xff);
1416 				lha->mode = archive_le16dec(extdheader+2);
1417 				lha->gid = archive_le16dec(extdheader+4);
1418 				lha->uid = archive_le16dec(extdheader+6);
1419 				lha->birthtime = archive_le32dec(extdheader+8);
1420 				lha->atime = archive_le32dec(extdheader+12);
1421 				lha->setflag |= UNIX_MODE_IS_SET
1422 				    | BIRTHTIME_IS_SET | ATIME_IS_SET;
1423 			}
1424 			break;
1425 		case EXT_NEW_ATTR:
1426 			if (datasize == 20) {
1427 				lha->mode = (mode_t)archive_le32dec(extdheader);
1428 				lha->gid = archive_le32dec(extdheader+4);
1429 				lha->uid = archive_le32dec(extdheader+8);
1430 				lha->birthtime = archive_le32dec(extdheader+12);
1431 				lha->atime = archive_le32dec(extdheader+16);
1432 				lha->setflag |= UNIX_MODE_IS_SET
1433 				    | BIRTHTIME_IS_SET | ATIME_IS_SET;
1434 			}
1435 			break;
1436 		case EXT_TIMEZONE:		/* Not supported */
1437 			break;
1438 		default:
1439 			break;
1440 		}
1441 
1442 		__archive_read_consume(a, extdsize);
1443 	}
1444 invalid:
1445 	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1446 	    "Invalid extended LHa header");
1447 	return (ARCHIVE_FATAL);
1448 }
1449 
1450 static int
lha_end_of_entry(struct archive_read * a)1451 lha_end_of_entry(struct archive_read *a)
1452 {
1453 	struct lha *lha = (struct lha *)(a->format->data);
1454 	int r = ARCHIVE_EOF;
1455 
1456 	if (!lha->end_of_entry_cleanup) {
1457 		if ((lha->setflag & CRC_IS_SET) &&
1458 		    lha->crc != lha->entry_crc_calculated) {
1459 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1460 			    "LHa data CRC error");
1461 			r = ARCHIVE_WARN;
1462 		}
1463 
1464 		/* End-of-entry cleanup done. */
1465 		lha->end_of_entry_cleanup = 1;
1466 	}
1467 	return (r);
1468 }
1469 
1470 static int
archive_read_format_lha_read_data(struct archive_read * a,const void ** buff,size_t * size,int64_t * offset)1471 archive_read_format_lha_read_data(struct archive_read *a,
1472     const void **buff, size_t *size, int64_t *offset)
1473 {
1474 	struct lha *lha = (struct lha *)(a->format->data);
1475 	int r;
1476 
1477 	if (lha->entry_unconsumed) {
1478 		/* Consume as much as the decompressor actually used. */
1479 		__archive_read_consume(a, lha->entry_unconsumed);
1480 		lha->entry_unconsumed = 0;
1481 	}
1482 	if (lha->end_of_entry) {
1483 		*offset = lha->entry_offset;
1484 		*size = 0;
1485 		*buff = NULL;
1486 		return (lha_end_of_entry(a));
1487 	}
1488 
1489 	if (lha->entry_is_compressed)
1490 		r =  lha_read_data_lzh(a, buff, size, offset);
1491 	else
1492 		/* No compression. */
1493 		r =  lha_read_data_none(a, buff, size, offset);
1494 	return (r);
1495 }
1496 
1497 /*
1498  * Read a file content in no compression.
1499  *
1500  * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets
1501  * lha->end_of_entry if it consumes all of the data.
1502  */
1503 static int
lha_read_data_none(struct archive_read * a,const void ** buff,size_t * size,int64_t * offset)1504 lha_read_data_none(struct archive_read *a, const void **buff,
1505     size_t *size, int64_t *offset)
1506 {
1507 	struct lha *lha = (struct lha *)(a->format->data);
1508 	ssize_t bytes_avail;
1509 
1510 	if (lha->entry_bytes_remaining == 0) {
1511 		*buff = NULL;
1512 		*size = 0;
1513 		*offset = lha->entry_offset;
1514 		lha->end_of_entry = 1;
1515 		return (ARCHIVE_OK);
1516 	}
1517 	/*
1518 	 * Note: '1' here is a performance optimization.
1519 	 * Recall that the decompression layer returns a count of
1520 	 * available bytes; asking for more than that forces the
1521 	 * decompressor to combine reads by copying data.
1522 	 */
1523 	*buff = __archive_read_ahead(a, 1, &bytes_avail);
1524 	if (bytes_avail <= 0) {
1525 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1526 		    "Truncated LHa file data");
1527 		return (ARCHIVE_FATAL);
1528 	}
1529 	if (bytes_avail > lha->entry_bytes_remaining)
1530 		bytes_avail = (ssize_t)lha->entry_bytes_remaining;
1531 	lha->entry_crc_calculated =
1532 	    lha_crc16(lha->entry_crc_calculated, *buff, bytes_avail);
1533 	*size = bytes_avail;
1534 	*offset = lha->entry_offset;
1535 	lha->entry_offset += bytes_avail;
1536 	lha->entry_bytes_remaining -= bytes_avail;
1537 	if (lha->entry_bytes_remaining == 0)
1538 		lha->end_of_entry = 1;
1539 	lha->entry_unconsumed = bytes_avail;
1540 	return (ARCHIVE_OK);
1541 }
1542 
1543 /*
1544  * Read a file content in LZHUFF encoding.
1545  *
1546  * Returns ARCHIVE_OK if successful, returns ARCHIVE_WARN if compression is
1547  * unsupported, ARCHIVE_FATAL otherwise, sets lha->end_of_entry if it consumes
1548  * all of the data.
1549  */
1550 static int
lha_read_data_lzh(struct archive_read * a,const void ** buff,size_t * size,int64_t * offset)1551 lha_read_data_lzh(struct archive_read *a, const void **buff,
1552     size_t *size, int64_t *offset)
1553 {
1554 	struct lha *lha = (struct lha *)(a->format->data);
1555 	ssize_t bytes_avail;
1556 	int r;
1557 
1558 	/* If we haven't yet read any data, initialize the decompressor. */
1559 	if (!lha->decompress_init) {
1560 		r = lzh_decode_init(&(lha->strm), lha->method);
1561 		switch (r) {
1562 		case ARCHIVE_OK:
1563 			break;
1564 		case ARCHIVE_FAILED:
1565         		/* Unsupported compression. */
1566 			*buff = NULL;
1567 			*size = 0;
1568 			*offset = 0;
1569 			archive_set_error(&a->archive,
1570 			    ARCHIVE_ERRNO_FILE_FORMAT,
1571 			    "Unsupported lzh compression method -%c%c%c-",
1572 			    lha->method[0], lha->method[1], lha->method[2]);
1573 			/* We know compressed size; just skip it. */
1574 			archive_read_format_lha_read_data_skip(a);
1575 			return (ARCHIVE_WARN);
1576 		default:
1577 			archive_set_error(&a->archive, ENOMEM,
1578 			    "Couldn't allocate memory "
1579 			    "for lzh decompression");
1580 			return (ARCHIVE_FATAL);
1581 		}
1582 		/* We've initialized decompression for this stream. */
1583 		lha->decompress_init = 1;
1584 		lha->strm.avail_out = 0;
1585 		lha->strm.total_out = 0;
1586 	}
1587 
1588 	/*
1589 	 * Note: '1' here is a performance optimization.
1590 	 * Recall that the decompression layer returns a count of
1591 	 * available bytes; asking for more than that forces the
1592 	 * decompressor to combine reads by copying data.
1593 	 */
1594 	lha->strm.next_in = __archive_read_ahead(a, 1, &bytes_avail);
1595 	if (bytes_avail <= 0) {
1596 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1597 		    "Truncated LHa file body");
1598 		return (ARCHIVE_FATAL);
1599 	}
1600 	if (bytes_avail > lha->entry_bytes_remaining)
1601 		bytes_avail = (ssize_t)lha->entry_bytes_remaining;
1602 
1603 	lha->strm.avail_in = (int)bytes_avail;
1604 	lha->strm.total_in = 0;
1605 	lha->strm.avail_out = 0;
1606 
1607 	r = lzh_decode(&(lha->strm), bytes_avail == lha->entry_bytes_remaining);
1608 	switch (r) {
1609 	case ARCHIVE_OK:
1610 		break;
1611 	case ARCHIVE_EOF:
1612 		lha->end_of_entry = 1;
1613 		break;
1614 	default:
1615 		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1616 		    "Bad lzh data");
1617 		return (ARCHIVE_FAILED);
1618 	}
1619 	lha->entry_unconsumed = lha->strm.total_in;
1620 	lha->entry_bytes_remaining -= lha->strm.total_in;
1621 
1622 	if (lha->strm.avail_out) {
1623 		*offset = lha->entry_offset;
1624 		*size = lha->strm.avail_out;
1625 		*buff = lha->strm.ref_ptr;
1626 		lha->entry_crc_calculated =
1627 		    lha_crc16(lha->entry_crc_calculated, *buff, *size);
1628 		lha->entry_offset += *size;
1629 	} else {
1630 		*offset = lha->entry_offset;
1631 		*size = 0;
1632 		*buff = NULL;
1633 		if (lha->end_of_entry)
1634 			return (lha_end_of_entry(a));
1635 	}
1636 	return (ARCHIVE_OK);
1637 }
1638 
1639 /*
1640  * Skip a file content.
1641  */
1642 static int
archive_read_format_lha_read_data_skip(struct archive_read * a)1643 archive_read_format_lha_read_data_skip(struct archive_read *a)
1644 {
1645 	struct lha *lha;
1646 	int64_t bytes_skipped;
1647 
1648 	lha = (struct lha *)(a->format->data);
1649 
1650 	if (lha->entry_unconsumed) {
1651 		/* Consume as much as the decompressor actually used. */
1652 		__archive_read_consume(a, lha->entry_unconsumed);
1653 		lha->entry_unconsumed = 0;
1654 	}
1655 
1656 	/* if we've already read to end of data, we're done. */
1657 	if (lha->end_of_entry_cleanup)
1658 		return (ARCHIVE_OK);
1659 
1660 	/*
1661 	 * If the length is at the beginning, we can skip the
1662 	 * compressed data much more quickly.
1663 	 */
1664 	bytes_skipped = __archive_read_consume(a, lha->entry_bytes_remaining);
1665 	if (bytes_skipped < 0)
1666 		return (ARCHIVE_FATAL);
1667 
1668 	/* This entry is finished and done. */
1669 	lha->end_of_entry_cleanup = lha->end_of_entry = 1;
1670 	return (ARCHIVE_OK);
1671 }
1672 
1673 static int
archive_read_format_lha_cleanup(struct archive_read * a)1674 archive_read_format_lha_cleanup(struct archive_read *a)
1675 {
1676 	struct lha *lha = (struct lha *)(a->format->data);
1677 
1678 	lzh_decode_free(&(lha->strm));
1679 	archive_string_free(&(lha->dirname));
1680 	archive_string_free(&(lha->filename));
1681 	archive_string_free(&(lha->uname));
1682 	archive_string_free(&(lha->gname));
1683 	archive_wstring_free(&(lha->ws));
1684 	free(lha);
1685 	(a->format->data) = NULL;
1686 	return (ARCHIVE_OK);
1687 }
1688 
1689 /*
1690  * 'LHa for UNIX' utility has archived a symbolic-link name after
1691  * a pathname with '|' character.
1692  * This function extracts the symbolic-link name from the pathname.
1693  *
1694  * example.
1695  *   1. a symbolic-name is 'aaa/bb/cc'
1696  *   2. a filename is 'xxx/bbb'
1697  *  then an archived pathname is 'xxx/bbb|aaa/bb/cc'
1698  */
1699 static int
lha_parse_linkname(struct archive_wstring * linkname,struct archive_wstring * pathname)1700 lha_parse_linkname(struct archive_wstring *linkname,
1701     struct archive_wstring *pathname)
1702 {
1703 	wchar_t *	linkptr;
1704 	size_t 	symlen;
1705 
1706 	linkptr = wcschr(pathname->s, L'|');
1707 	if (linkptr != NULL) {
1708 		symlen = wcslen(linkptr + 1);
1709 		archive_wstrncpy(linkname, linkptr+1, symlen);
1710 
1711 		*linkptr = 0;
1712 		pathname->length = wcslen(pathname->s);
1713 
1714 		return (1);
1715 	}
1716 	return (0);
1717 }
1718 
1719 /* Convert an MSDOS-style date/time into Unix-style time. */
1720 static time_t
lha_dos_time(const unsigned char * p)1721 lha_dos_time(const unsigned char *p)
1722 {
1723 	int msTime, msDate;
1724 	struct tm ts;
1725 
1726 	msTime = archive_le16dec(p);
1727 	msDate = archive_le16dec(p+2);
1728 
1729 	memset(&ts, 0, sizeof(ts));
1730 	ts.tm_year = ((msDate >> 9) & 0x7f) + 80;   /* Years since 1900. */
1731 	ts.tm_mon = ((msDate >> 5) & 0x0f) - 1;     /* Month number.     */
1732 	ts.tm_mday = msDate & 0x1f;		    /* Day of month.     */
1733 	ts.tm_hour = (msTime >> 11) & 0x1f;
1734 	ts.tm_min = (msTime >> 5) & 0x3f;
1735 	ts.tm_sec = (msTime << 1) & 0x3e;
1736 	ts.tm_isdst = -1;
1737 	return (mktime(&ts));
1738 }
1739 
1740 /* Convert an MS-Windows-style date/time into Unix-style time. */
1741 static time_t
lha_win_time(uint64_t wintime,long * ns)1742 lha_win_time(uint64_t wintime, long *ns)
1743 {
1744 #define EPOC_TIME ARCHIVE_LITERAL_ULL(116444736000000000)
1745 
1746 	if (wintime >= EPOC_TIME) {
1747 		wintime -= EPOC_TIME;	/* 1970-01-01 00:00:00 (UTC) */
1748 		if (ns != NULL)
1749 			*ns = (long)(wintime % 10000000) * 100;
1750 		return (wintime / 10000000);
1751 	} else {
1752 		if (ns != NULL)
1753 			*ns = 0;
1754 		return (0);
1755 	}
1756 }
1757 
1758 static unsigned char
lha_calcsum(unsigned char sum,const void * pp,int offset,size_t size)1759 lha_calcsum(unsigned char sum, const void *pp, int offset, size_t size)
1760 {
1761 	unsigned char const *p = (unsigned char const *)pp;
1762 
1763 	p += offset;
1764 	for (;size > 0; --size)
1765 		sum += *p++;
1766 	return (sum);
1767 }
1768 
1769 static uint16_t crc16tbl[2][256];
1770 static void
lha_crc16_init(void)1771 lha_crc16_init(void)
1772 {
1773 	unsigned int i;
1774 	static int crc16init = 0;
1775 
1776 	if (crc16init)
1777 		return;
1778 	crc16init = 1;
1779 
1780 	for (i = 0; i < 256; i++) {
1781 		unsigned int j;
1782 		uint16_t crc = (uint16_t)i;
1783 		for (j = 8; j; j--)
1784 			crc = (crc >> 1) ^ ((crc & 1) * 0xA001);
1785 		crc16tbl[0][i] = crc;
1786 	}
1787 
1788 	for (i = 0; i < 256; i++) {
1789 		crc16tbl[1][i] = (crc16tbl[0][i] >> 8)
1790 			^ crc16tbl[0][crc16tbl[0][i] & 0xff];
1791 	}
1792 }
1793 
1794 static uint16_t
lha_crc16(uint16_t crc,const void * pp,size_t len)1795 lha_crc16(uint16_t crc, const void *pp, size_t len)
1796 {
1797 	const unsigned char *p = (const unsigned char *)pp;
1798 	const uint16_t *buff;
1799 	const union {
1800 		uint32_t i;
1801 		char c[4];
1802 	} u = { 0x01020304 };
1803 
1804 	if (len == 0)
1805 		return crc;
1806 
1807 	/* Process unaligned address. */
1808 	if (((uintptr_t)p) & (uintptr_t)0x1) {
1809 		crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff];
1810 		len--;
1811 	}
1812 	buff = (const uint16_t *)p;
1813 	/*
1814 	 * Modern C compiler such as GCC does not unroll automatically yet
1815 	 * without unrolling pragma, and Clang is so. So we should
1816 	 * unroll this loop for its performance.
1817 	 */
1818 	for (;len >= 8; len -= 8) {
1819 		/* This if statement expects compiler optimization will
1820 		 * remove the statement which will not be executed. */
1821 #undef bswap16
1822 #ifndef __has_builtin
1823 #define __has_builtin(x) 0
1824 #endif
1825 #if defined(_MSC_VER) && _MSC_VER >= 1400  /* Visual Studio */
1826 #  define bswap16(x) _byteswap_ushort(x)
1827 #elif defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ > 4)
1828 /* GCC 4.8 and later has __builtin_bswap16() */
1829 #  define bswap16(x) __builtin_bswap16(x)
1830 #elif defined(__clang__) && __has_builtin(__builtin_bswap16)
1831 /* Newer clang versions have __builtin_bswap16() */
1832 #  define bswap16(x) __builtin_bswap16(x)
1833 #else
1834 #  define bswap16(x) ((((x) >> 8) & 0xff) | ((x) << 8))
1835 #endif
1836 #define CRC16W	do { 	\
1837 		if(u.c[0] == 1) { /* Big endian */		\
1838 			crc ^= bswap16(*buff); buff++;		\
1839 		} else						\
1840 			crc ^= *buff++;				\
1841 		crc = crc16tbl[1][crc & 0xff] ^ crc16tbl[0][crc >> 8];\
1842 } while (0)
1843 		CRC16W;
1844 		CRC16W;
1845 		CRC16W;
1846 		CRC16W;
1847 #undef CRC16W
1848 #undef bswap16
1849 	}
1850 
1851 	p = (const unsigned char *)buff;
1852 	for (;len; len--) {
1853 		crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff];
1854 	}
1855 	return crc;
1856 }
1857 
1858 /*
1859  * Initialize LZHUF decoder.
1860  *
1861  * Returns ARCHIVE_OK if initialization was successful.
1862  * Returns ARCHIVE_FAILED if method is unsupported.
1863  * Returns ARCHIVE_FATAL if initialization failed; memory allocation
1864  * error occurred.
1865  */
1866 static int
lzh_decode_init(struct lzh_stream * strm,const char * method)1867 lzh_decode_init(struct lzh_stream *strm, const char *method)
1868 {
1869 	struct lzh_dec *ds;
1870 	int w_bits, w_size;
1871 
1872 	if (strm->ds == NULL) {
1873 		strm->ds = calloc(1, sizeof(*strm->ds));
1874 		if (strm->ds == NULL)
1875 			return (ARCHIVE_FATAL);
1876 	}
1877 	ds = strm->ds;
1878 	ds->error = ARCHIVE_FAILED;
1879 	if (method == NULL || method[0] != 'l' || method[1] != 'h')
1880 		return (ARCHIVE_FAILED);
1881 	switch (method[2]) {
1882 	case '5':
1883 		w_bits = 13;/* 8KiB for window */
1884 		break;
1885 	case '6':
1886 		w_bits = 15;/* 32KiB for window */
1887 		break;
1888 	case '7':
1889 		w_bits = 16;/* 64KiB for window */
1890 		break;
1891 	default:
1892 		return (ARCHIVE_FAILED);/* Not supported. */
1893 	}
1894 	ds->error = ARCHIVE_FATAL;
1895 	/* Expand a window size up to 128 KiB for decompressing process
1896 	 * performance whatever its original window size is. */
1897 	ds->w_size = 1U << 17;
1898 	ds->w_mask = ds->w_size -1;
1899 	if (ds->w_buff == NULL) {
1900 		ds->w_buff = malloc(ds->w_size);
1901 		if (ds->w_buff == NULL)
1902 			return (ARCHIVE_FATAL);
1903 	}
1904 	w_size = 1U << w_bits;
1905 	memset(ds->w_buff + ds->w_size - w_size, 0x20, w_size);
1906 	ds->w_pos = 0;
1907 	ds->state = 0;
1908 	ds->pos_pt_len_size = w_bits + 1;
1909 	ds->pos_pt_len_bits = (w_bits == 15 || w_bits == 16)? 5: 4;
1910 	ds->literal_pt_len_size = PT_BITLEN_SIZE;
1911 	ds->literal_pt_len_bits = 5;
1912 	ds->br.cache_buffer = 0;
1913 	ds->br.cache_avail = 0;
1914 
1915 	if (lzh_huffman_init(&(ds->lt), LT_BITLEN_SIZE, 16)
1916 	    != ARCHIVE_OK)
1917 		return (ARCHIVE_FATAL);
1918 	ds->lt.len_bits = 9;
1919 	if (lzh_huffman_init(&(ds->pt), PT_BITLEN_SIZE, 16)
1920 	    != ARCHIVE_OK)
1921 		return (ARCHIVE_FATAL);
1922 	ds->error = 0;
1923 
1924 	return (ARCHIVE_OK);
1925 }
1926 
1927 /*
1928  * Release LZHUF decoder.
1929  */
1930 static void
lzh_decode_free(struct lzh_stream * strm)1931 lzh_decode_free(struct lzh_stream *strm)
1932 {
1933 
1934 	if (strm->ds == NULL)
1935 		return;
1936 	free(strm->ds->w_buff);
1937 	lzh_huffman_free(&(strm->ds->lt));
1938 	lzh_huffman_free(&(strm->ds->pt));
1939 	free(strm->ds);
1940 	strm->ds = NULL;
1941 }
1942 
1943 /*
1944  * Bit stream reader.
1945  */
1946 /* Check that the cache buffer has enough bits. */
1947 #define lzh_br_has(br, n)	((br)->cache_avail >= n)
1948 /* Get compressed data by bit. */
1949 #define lzh_br_bits(br, n)				\
1950 	(((uint16_t)((br)->cache_buffer >>		\
1951 		((br)->cache_avail - (n)))) & cache_masks[n])
1952 #define lzh_br_bits_forced(br, n)			\
1953 	(((uint16_t)((br)->cache_buffer <<		\
1954 		((n) - (br)->cache_avail))) & cache_masks[n])
1955 /* Read ahead to make sure the cache buffer has enough compressed data we
1956  * will use.
1957  *  True  : completed, there is enough data in the cache buffer.
1958  *  False : we met that strm->next_in is empty, we have to get following
1959  *          bytes. */
1960 #define lzh_br_read_ahead_0(strm, br, n)	\
1961 	(lzh_br_has(br, (n)) || lzh_br_fillup(strm, br))
1962 /*  True  : the cache buffer has some bits as much as we need.
1963  *  False : there are no enough bits in the cache buffer to be used,
1964  *          we have to get following bytes if we could. */
1965 #define lzh_br_read_ahead(strm, br, n)	\
1966 	(lzh_br_read_ahead_0((strm), (br), (n)) || lzh_br_has((br), (n)))
1967 
1968 /* Notify how many bits we consumed. */
1969 #define lzh_br_consume(br, n)	((br)->cache_avail -= (n))
1970 #define lzh_br_unconsume(br, n)	((br)->cache_avail += (n))
1971 
1972 static const uint16_t cache_masks[] = {
1973 	0x0000, 0x0001, 0x0003, 0x0007,
1974 	0x000F, 0x001F, 0x003F, 0x007F,
1975 	0x00FF, 0x01FF, 0x03FF, 0x07FF,
1976 	0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF,
1977 	0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF
1978 };
1979 
1980 /*
1981  * Shift away used bits in the cache data and fill it up with following bits.
1982  * Call this when cache buffer does not have enough bits you need.
1983  *
1984  * Returns 1 if the cache buffer is full.
1985  * Returns 0 if the cache buffer is not full; input buffer is empty.
1986  */
1987 static int
lzh_br_fillup(struct lzh_stream * strm,struct lzh_br * br)1988 lzh_br_fillup(struct lzh_stream *strm, struct lzh_br *br)
1989 {
1990 	int n = CACHE_BITS - br->cache_avail;
1991 
1992 	for (;;) {
1993 		const int x = n >> 3;
1994 		if (strm->avail_in >= x) {
1995 			switch (x) {
1996 			case 8:
1997 				br->cache_buffer =
1998 				    ((uint64_t)strm->next_in[0]) << 56 |
1999 				    ((uint64_t)strm->next_in[1]) << 48 |
2000 				    ((uint64_t)strm->next_in[2]) << 40 |
2001 				    ((uint64_t)strm->next_in[3]) << 32 |
2002 				    ((uint32_t)strm->next_in[4]) << 24 |
2003 				    ((uint32_t)strm->next_in[5]) << 16 |
2004 				    ((uint32_t)strm->next_in[6]) << 8 |
2005 				     (uint32_t)strm->next_in[7];
2006 				strm->next_in += 8;
2007 				strm->avail_in -= 8;
2008 				br->cache_avail += 8 * 8;
2009 				return (1);
2010 			case 7:
2011 				br->cache_buffer =
2012 		 		   (br->cache_buffer << 56) |
2013 				    ((uint64_t)strm->next_in[0]) << 48 |
2014 				    ((uint64_t)strm->next_in[1]) << 40 |
2015 				    ((uint64_t)strm->next_in[2]) << 32 |
2016 				    ((uint64_t)strm->next_in[3]) << 24 |
2017 				    ((uint64_t)strm->next_in[4]) << 16 |
2018 				    ((uint64_t)strm->next_in[5]) << 8 |
2019 				     (uint64_t)strm->next_in[6];
2020 				strm->next_in += 7;
2021 				strm->avail_in -= 7;
2022 				br->cache_avail += 7 * 8;
2023 				return (1);
2024 			case 6:
2025 				br->cache_buffer =
2026 		 		   (br->cache_buffer << 48) |
2027 				    ((uint64_t)strm->next_in[0]) << 40 |
2028 				    ((uint64_t)strm->next_in[1]) << 32 |
2029 				    ((uint64_t)strm->next_in[2]) << 24 |
2030 				    ((uint64_t)strm->next_in[3]) << 16 |
2031 				    ((uint64_t)strm->next_in[4]) << 8 |
2032 				     (uint64_t)strm->next_in[5];
2033 				strm->next_in += 6;
2034 				strm->avail_in -= 6;
2035 				br->cache_avail += 6 * 8;
2036 				return (1);
2037 			case 0:
2038 				/* We have enough compressed data in
2039 				 * the cache buffer.*/
2040 				return (1);
2041 			default:
2042 				break;
2043 			}
2044 		}
2045 		if (strm->avail_in == 0) {
2046 			/* There is not enough compressed data to fill up the
2047 			 * cache buffer. */
2048 			return (0);
2049 		}
2050 		br->cache_buffer =
2051 		   (br->cache_buffer << 8) | *strm->next_in++;
2052 		strm->avail_in--;
2053 		br->cache_avail += 8;
2054 		n -= 8;
2055 	}
2056 }
2057 
2058 /*
2059  * Decode LZHUF.
2060  *
2061  * 1. Returns ARCHIVE_OK if output buffer or input buffer are empty.
2062  *    Please set available buffer and call this function again.
2063  * 2. Returns ARCHIVE_EOF if decompression has been completed.
2064  * 3. Returns ARCHIVE_FAILED if an error occurred; compressed data
2065  *    is broken or you do not set 'last' flag properly.
2066  * 4. 'last' flag is very important, you must set 1 to the flag if there
2067  *    is no input data. The lha compressed data format does not provide how
2068  *    to know the compressed data is really finished.
2069  *    Note: lha command utility check if the total size of output bytes is
2070  *    reached the uncompressed size recorded in its header. it does not mind
2071  *    that the decoding process is properly finished.
2072  *    GNU ZIP can decompress another compressed file made by SCO LZH compress.
2073  *    it handles EOF as null to fill read buffer with zero until the decoding
2074  *    process meet 2 bytes of zeros at reading a size of a next chunk, so the
2075  *    zeros are treated as the mark of the end of the data although the zeros
2076  *    is dummy, not the file data.
2077  */
2078 static int	lzh_read_blocks(struct lzh_stream *, int);
2079 static int	lzh_decode_blocks(struct lzh_stream *, int);
2080 #define ST_RD_BLOCK		0
2081 #define ST_RD_PT_1		1
2082 #define ST_RD_PT_2		2
2083 #define ST_RD_PT_3		3
2084 #define ST_RD_PT_4		4
2085 #define ST_RD_LITERAL_1		5
2086 #define ST_RD_LITERAL_2		6
2087 #define ST_RD_LITERAL_3		7
2088 #define ST_RD_POS_DATA_1	8
2089 #define ST_GET_LITERAL		9
2090 #define ST_GET_POS_1		10
2091 #define ST_GET_POS_2		11
2092 #define ST_COPY_DATA		12
2093 
2094 static int
lzh_decode(struct lzh_stream * strm,int last)2095 lzh_decode(struct lzh_stream *strm, int last)
2096 {
2097 	struct lzh_dec *ds = strm->ds;
2098 	int avail_in;
2099 	int r;
2100 
2101 	if (ds->error)
2102 		return (ds->error);
2103 
2104 	avail_in = strm->avail_in;
2105 	do {
2106 		if (ds->state < ST_GET_LITERAL)
2107 			r = lzh_read_blocks(strm, last);
2108 		else
2109 			r = lzh_decode_blocks(strm, last);
2110 	} while (r == 100);
2111 	strm->total_in += avail_in - strm->avail_in;
2112 	return (r);
2113 }
2114 
2115 static void
lzh_emit_window(struct lzh_stream * strm,size_t s)2116 lzh_emit_window(struct lzh_stream *strm, size_t s)
2117 {
2118 	strm->ref_ptr = strm->ds->w_buff;
2119 	strm->avail_out = (int)s;
2120 	strm->total_out += s;
2121 }
2122 
2123 static int
lzh_read_blocks(struct lzh_stream * strm,int last)2124 lzh_read_blocks(struct lzh_stream *strm, int last)
2125 {
2126 	struct lzh_dec *ds = strm->ds;
2127 	struct lzh_br *br = &(ds->br);
2128 	int c = 0, i;
2129 	unsigned rbits;
2130 
2131 	for (;;) {
2132 		switch (ds->state) {
2133 		case ST_RD_BLOCK:
2134 			/*
2135 			 * Read a block number indicates how many blocks
2136 			 * we will handle. The block is composed of a
2137 			 * literal and a match, sometimes a literal only
2138 			 * in particular, there are no reference data at
2139 			 * the beginning of the decompression.
2140 			 */
2141 			if (!lzh_br_read_ahead_0(strm, br, 16)) {
2142 				if (!last)
2143 					/* We need following data. */
2144 					return (ARCHIVE_OK);
2145 				if (lzh_br_has(br, 8)) {
2146 					/*
2147 					 * It seems there are extra bits.
2148 					 *  1. Compressed data is broken.
2149 					 *  2. `last' flag does not properly
2150 					 *     set.
2151 					 */
2152 					goto failed;
2153 				}
2154 				if (ds->w_pos > 0) {
2155 					lzh_emit_window(strm, ds->w_pos);
2156 					ds->w_pos = 0;
2157 					return (ARCHIVE_OK);
2158 				}
2159 				/* End of compressed data; we have completely
2160 				 * handled all compressed data. */
2161 				return (ARCHIVE_EOF);
2162 			}
2163 			ds->blocks_avail = lzh_br_bits(br, 16);
2164 			if (ds->blocks_avail == 0)
2165 				goto failed;
2166 			lzh_br_consume(br, 16);
2167 			/*
2168 			 * Read a literal table compressed in huffman
2169 			 * coding.
2170 			 */
2171 			ds->pt.len_size = ds->literal_pt_len_size;
2172 			ds->pt.len_bits = ds->literal_pt_len_bits;
2173 			ds->reading_position = 0;
2174 			/* FALL THROUGH */
2175 		case ST_RD_PT_1:
2176 			/* Note: ST_RD_PT_1, ST_RD_PT_2 and ST_RD_PT_4 are
2177 			 * used in reading both a literal table and a
2178 			 * position table. */
2179 			if (!lzh_br_read_ahead(strm, br, ds->pt.len_bits)) {
2180 				if (last)
2181 					goto failed;/* Truncated data. */
2182 				ds->state = ST_RD_PT_1;
2183 				return (ARCHIVE_OK);
2184 			}
2185 			ds->pt.len_avail = lzh_br_bits(br, ds->pt.len_bits);
2186 			lzh_br_consume(br, ds->pt.len_bits);
2187 			/* FALL THROUGH */
2188 		case ST_RD_PT_2:
2189 			if (ds->pt.len_avail == 0) {
2190 				/* There is no bitlen. */
2191 				if (!lzh_br_read_ahead(strm, br,
2192 				    ds->pt.len_bits)) {
2193 					if (last)
2194 						goto failed;/* Truncated data.*/
2195 					ds->state = ST_RD_PT_2;
2196 					return (ARCHIVE_OK);
2197 				}
2198 				if (!lzh_make_fake_table(&(ds->pt),
2199 				    lzh_br_bits(br, ds->pt.len_bits)))
2200 					goto failed;/* Invalid data. */
2201 				lzh_br_consume(br, ds->pt.len_bits);
2202 				if (ds->reading_position)
2203 					ds->state = ST_GET_LITERAL;
2204 				else
2205 					ds->state = ST_RD_LITERAL_1;
2206 				break;
2207 			} else if (ds->pt.len_avail > ds->pt.len_size)
2208 				goto failed;/* Invalid data. */
2209 			ds->loop = 0;
2210 			memset(ds->pt.freq, 0, sizeof(ds->pt.freq));
2211 			if (ds->pt.len_avail < 3 ||
2212 			    ds->pt.len_size == ds->pos_pt_len_size) {
2213 				ds->state = ST_RD_PT_4;
2214 				break;
2215 			}
2216 			/* FALL THROUGH */
2217 		case ST_RD_PT_3:
2218 			ds->loop = lzh_read_pt_bitlen(strm, ds->loop, 3);
2219 			if (ds->loop < 3) {
2220 				if (ds->loop < 0 || last)
2221 					goto failed;/* Invalid data. */
2222 				/* Not completed, get following data. */
2223 				ds->state = ST_RD_PT_3;
2224 				return (ARCHIVE_OK);
2225 			}
2226 			/* There are some null in bitlen of the literal. */
2227 			if (!lzh_br_read_ahead(strm, br, 2)) {
2228 				if (last)
2229 					goto failed;/* Truncated data. */
2230 				ds->state = ST_RD_PT_3;
2231 				return (ARCHIVE_OK);
2232 			}
2233 			c = lzh_br_bits(br, 2);
2234 			lzh_br_consume(br, 2);
2235 			if (c > ds->pt.len_avail - 3)
2236 				goto failed;/* Invalid data. */
2237 			for (i = 3; c-- > 0 ;)
2238 				ds->pt.bitlen[i++] = 0;
2239 			ds->loop = i;
2240 			/* FALL THROUGH */
2241 		case ST_RD_PT_4:
2242 			ds->loop = lzh_read_pt_bitlen(strm, ds->loop,
2243 			    ds->pt.len_avail);
2244 			if (ds->loop < ds->pt.len_avail) {
2245 				if (ds->loop < 0 || last)
2246 					goto failed;/* Invalid data. */
2247 				/* Not completed, get following data. */
2248 				ds->state = ST_RD_PT_4;
2249 				return (ARCHIVE_OK);
2250 			}
2251 			if (!lzh_make_huffman_table(&(ds->pt)))
2252 				goto failed;/* Invalid data */
2253 			if (ds->reading_position) {
2254 				ds->state = ST_GET_LITERAL;
2255 				break;
2256 			}
2257 			/* FALL THROUGH */
2258 		case ST_RD_LITERAL_1:
2259 			if (!lzh_br_read_ahead(strm, br, ds->lt.len_bits)) {
2260 				if (last)
2261 					goto failed;/* Truncated data. */
2262 				ds->state = ST_RD_LITERAL_1;
2263 				return (ARCHIVE_OK);
2264 			}
2265 			ds->lt.len_avail = lzh_br_bits(br, ds->lt.len_bits);
2266 			lzh_br_consume(br, ds->lt.len_bits);
2267 			/* FALL THROUGH */
2268 		case ST_RD_LITERAL_2:
2269 			if (ds->lt.len_avail == 0) {
2270 				/* There is no bitlen. */
2271 				if (!lzh_br_read_ahead(strm, br,
2272 				    ds->lt.len_bits)) {
2273 					if (last)
2274 						goto failed;/* Truncated data.*/
2275 					ds->state = ST_RD_LITERAL_2;
2276 					return (ARCHIVE_OK);
2277 				}
2278 				if (!lzh_make_fake_table(&(ds->lt),
2279 				    lzh_br_bits(br, ds->lt.len_bits)))
2280 					goto failed;/* Invalid data */
2281 				lzh_br_consume(br, ds->lt.len_bits);
2282 				ds->state = ST_RD_POS_DATA_1;
2283 				break;
2284 			} else if (ds->lt.len_avail > ds->lt.len_size)
2285 				goto failed;/* Invalid data */
2286 			ds->loop = 0;
2287 			memset(ds->lt.freq, 0, sizeof(ds->lt.freq));
2288 			/* FALL THROUGH */
2289 		case ST_RD_LITERAL_3:
2290 			i = ds->loop;
2291 			while (i < ds->lt.len_avail) {
2292 				if (!lzh_br_read_ahead(strm, br,
2293 				    ds->pt.max_bits)) {
2294 					if (last)
2295 						goto failed;/* Truncated data.*/
2296 					ds->loop = i;
2297 					ds->state = ST_RD_LITERAL_3;
2298 					return (ARCHIVE_OK);
2299 				}
2300 				rbits = lzh_br_bits(br, ds->pt.max_bits);
2301 				c = lzh_decode_huffman(&(ds->pt), rbits);
2302 				if (c > 2) {
2303 					/* Note: 'c' will never be more than
2304 					 * eighteen since it's limited by
2305 					 * PT_BITLEN_SIZE, which is being set
2306 					 * to ds->pt.len_size through
2307 					 * ds->literal_pt_len_size. */
2308 					lzh_br_consume(br, ds->pt.bitlen[c]);
2309 					c -= 2;
2310 					ds->lt.freq[c]++;
2311 					ds->lt.bitlen[i++] = c;
2312 				} else if (c == 0) {
2313 					lzh_br_consume(br, ds->pt.bitlen[c]);
2314 					ds->lt.bitlen[i++] = 0;
2315 				} else {
2316 					/* c == 1 or c == 2 */
2317 					int n = (c == 1)?4:9;
2318 					if (!lzh_br_read_ahead(strm, br,
2319 					     ds->pt.bitlen[c] + n)) {
2320 						if (last) /* Truncated data. */
2321 							goto failed;
2322 						ds->loop = i;
2323 						ds->state = ST_RD_LITERAL_3;
2324 						return (ARCHIVE_OK);
2325 					}
2326 					lzh_br_consume(br, ds->pt.bitlen[c]);
2327 					c = lzh_br_bits(br, n);
2328 					lzh_br_consume(br, n);
2329 					c += (n == 4)?3:20;
2330 					if (i + c > ds->lt.len_avail)
2331 						goto failed;/* Invalid data */
2332 					memset(&(ds->lt.bitlen[i]), 0, c);
2333 					i += c;
2334 				}
2335 			}
2336 			if (i > ds->lt.len_avail ||
2337 			    !lzh_make_huffman_table(&(ds->lt)))
2338 				goto failed;/* Invalid data */
2339 			/* FALL THROUGH */
2340 		case ST_RD_POS_DATA_1:
2341 			/*
2342 			 * Read a position table compressed in huffman
2343 			 * coding.
2344 			 */
2345 			ds->pt.len_size = ds->pos_pt_len_size;
2346 			ds->pt.len_bits = ds->pos_pt_len_bits;
2347 			ds->reading_position = 1;
2348 			ds->state = ST_RD_PT_1;
2349 			break;
2350 		case ST_GET_LITERAL:
2351 			return (100);
2352 		}
2353 	}
2354 failed:
2355 	return (ds->error = ARCHIVE_FAILED);
2356 }
2357 
2358 static int
lzh_decode_blocks(struct lzh_stream * strm,int last)2359 lzh_decode_blocks(struct lzh_stream *strm, int last)
2360 {
2361 	struct lzh_dec *ds = strm->ds;
2362 	struct lzh_br bre = ds->br;
2363 	struct huffman *lt = &(ds->lt);
2364 	struct huffman *pt = &(ds->pt);
2365 	unsigned char *w_buff = ds->w_buff;
2366 	unsigned char *lt_bitlen = lt->bitlen;
2367 	unsigned char *pt_bitlen = pt->bitlen;
2368 	int blocks_avail = ds->blocks_avail, c = 0;
2369 	int copy_len = ds->copy_len, copy_pos = ds->copy_pos;
2370 	int w_pos = ds->w_pos, w_mask = ds->w_mask, w_size = ds->w_size;
2371 	int lt_max_bits = lt->max_bits, pt_max_bits = pt->max_bits;
2372 	int state = ds->state;
2373 
2374 	for (;;) {
2375 		switch (state) {
2376 		case ST_GET_LITERAL:
2377 			for (;;) {
2378 				if (blocks_avail == 0) {
2379 					/* We have decoded all blocks.
2380 					 * Let's handle next blocks. */
2381 					ds->state = ST_RD_BLOCK;
2382 					ds->br = bre;
2383 					ds->blocks_avail = 0;
2384 					ds->w_pos = w_pos;
2385 					ds->copy_pos = 0;
2386 					return (100);
2387 				}
2388 
2389 				/* lzh_br_read_ahead() always tries to fill the
2390 				 * cache buffer up. In specific situation we
2391 				 * are close to the end of the data, the cache
2392 				 * buffer will not be full and thus we have to
2393 				 * determine if the cache buffer has some bits
2394 				 * as much as we need after lzh_br_read_ahead()
2395 				 * failed. */
2396 				if (!lzh_br_read_ahead(strm, &bre,
2397 				    lt_max_bits)) {
2398 					if (!last)
2399 						goto next_data;
2400 					/* Remaining bits are less than
2401 					 * maximum bits(lt.max_bits) but maybe
2402 					 * it still remains as much as we need,
2403 					 * so we should try to use it with
2404 					 * dummy bits. */
2405 					c = lzh_decode_huffman(lt,
2406 					      lzh_br_bits_forced(&bre,
2407 					        lt_max_bits));
2408 					lzh_br_consume(&bre, lt_bitlen[c]);
2409 					if (!lzh_br_has(&bre, 0))
2410 						goto failed;/* Over read. */
2411 				} else {
2412 					c = lzh_decode_huffman(lt,
2413 					      lzh_br_bits(&bre, lt_max_bits));
2414 					lzh_br_consume(&bre, lt_bitlen[c]);
2415 				}
2416 				blocks_avail--;
2417 				if (c > UCHAR_MAX)
2418 					/* Current block is a match data. */
2419 					break;
2420 				/*
2421 				 * 'c' is exactly a literal code.
2422 				 */
2423 				/* Save a decoded code to reference it
2424 				 * afterward. */
2425 				w_buff[w_pos] = c;
2426 				if (++w_pos >= w_size) {
2427 					w_pos = 0;
2428 					lzh_emit_window(strm, w_size);
2429 					goto next_data;
2430 				}
2431 			}
2432 			/* 'c' is the length of a match pattern we have
2433 			 * already extracted, which has be stored in
2434 			 * window(ds->w_buff). */
2435 			copy_len = c - (UCHAR_MAX + 1) + MINMATCH;
2436 			/* FALL THROUGH */
2437 		case ST_GET_POS_1:
2438 			/*
2439 			 * Get a reference position.
2440 			 */
2441 			if (!lzh_br_read_ahead(strm, &bre, pt_max_bits)) {
2442 				if (!last) {
2443 					state = ST_GET_POS_1;
2444 					ds->copy_len = copy_len;
2445 					goto next_data;
2446 				}
2447 				copy_pos = lzh_decode_huffman(pt,
2448 				    lzh_br_bits_forced(&bre, pt_max_bits));
2449 				lzh_br_consume(&bre, pt_bitlen[copy_pos]);
2450 				if (!lzh_br_has(&bre, 0))
2451 					goto failed;/* Over read. */
2452 			} else {
2453 				copy_pos = lzh_decode_huffman(pt,
2454 				    lzh_br_bits(&bre, pt_max_bits));
2455 				lzh_br_consume(&bre, pt_bitlen[copy_pos]);
2456 			}
2457 			/* FALL THROUGH */
2458 		case ST_GET_POS_2:
2459 			if (copy_pos > 1) {
2460 				/* We need an additional adjustment number to
2461 				 * the position. */
2462 				int p = copy_pos - 1;
2463 				if (!lzh_br_read_ahead(strm, &bre, p)) {
2464 					if (last)
2465 						goto failed;/* Truncated data.*/
2466 					state = ST_GET_POS_2;
2467 					ds->copy_len = copy_len;
2468 					ds->copy_pos = copy_pos;
2469 					goto next_data;
2470 				}
2471 				copy_pos = (1 << p) + lzh_br_bits(&bre, p);
2472 				lzh_br_consume(&bre, p);
2473 			}
2474 			/* The position is actually a distance from the last
2475 			 * code we had extracted and thus we have to convert
2476 			 * it to a position of the window. */
2477 			copy_pos = (w_pos - copy_pos - 1) & w_mask;
2478 			/* FALL THROUGH */
2479 		case ST_COPY_DATA:
2480 			/*
2481 			 * Copy `copy_len' bytes as extracted data from
2482 			 * the window into the output buffer.
2483 			 */
2484 			for (;;) {
2485 				int l;
2486 
2487 				l = copy_len;
2488 				if (copy_pos > w_pos) {
2489 					if (l > w_size - copy_pos)
2490 						l = w_size - copy_pos;
2491 				} else {
2492 					if (l > w_size - w_pos)
2493 						l = w_size - w_pos;
2494 				}
2495 				if ((copy_pos + l < w_pos)
2496 				    || (w_pos + l < copy_pos)) {
2497 					/* No overlap. */
2498 					memcpy(w_buff + w_pos,
2499 					    w_buff + copy_pos, l);
2500 				} else {
2501 					const unsigned char *s;
2502 					unsigned char *d;
2503 					int li;
2504 
2505 					d = w_buff + w_pos;
2506 					s = w_buff + copy_pos;
2507 					for (li = 0; li < l-1;) {
2508 						d[li] = s[li];li++;
2509 						d[li] = s[li];li++;
2510 					}
2511 					if (li < l)
2512 						d[li] = s[li];
2513 				}
2514 				w_pos += l;
2515 				if (w_pos == w_size) {
2516 					w_pos = 0;
2517 					lzh_emit_window(strm, w_size);
2518 					if (copy_len <= l)
2519 						state = ST_GET_LITERAL;
2520 					else {
2521 						state = ST_COPY_DATA;
2522 						ds->copy_len = copy_len - l;
2523 						ds->copy_pos =
2524 						    (copy_pos + l) & w_mask;
2525 					}
2526 					goto next_data;
2527 				}
2528 				if (copy_len <= l)
2529 					/* A copy of current pattern ended. */
2530 					break;
2531 				copy_len -= l;
2532 				copy_pos = (copy_pos + l) & w_mask;
2533 			}
2534 			state = ST_GET_LITERAL;
2535 			break;
2536 		}
2537 	}
2538 failed:
2539 	return (ds->error = ARCHIVE_FAILED);
2540 next_data:
2541 	ds->br = bre;
2542 	ds->blocks_avail = blocks_avail;
2543 	ds->state = state;
2544 	ds->w_pos = w_pos;
2545 	return (ARCHIVE_OK);
2546 }
2547 
2548 static int
lzh_huffman_init(struct huffman * hf,size_t len_size,int tbl_bits)2549 lzh_huffman_init(struct huffman *hf, size_t len_size, int tbl_bits)
2550 {
2551 	int bits;
2552 
2553 	if (hf->bitlen == NULL) {
2554 		hf->bitlen = malloc(len_size * sizeof(hf->bitlen[0]));
2555 		if (hf->bitlen == NULL)
2556 			return (ARCHIVE_FATAL);
2557 	}
2558 	if (hf->tbl == NULL) {
2559 		if (tbl_bits < HTBL_BITS)
2560 			bits = tbl_bits;
2561 		else
2562 			bits = HTBL_BITS;
2563 		hf->tbl = malloc(((size_t)1 << bits) * sizeof(hf->tbl[0]));
2564 		if (hf->tbl == NULL)
2565 			return (ARCHIVE_FATAL);
2566 	}
2567 	if (hf->tree == NULL && tbl_bits > HTBL_BITS) {
2568 		hf->tree_avail = 1 << (tbl_bits - HTBL_BITS + 4);
2569 		hf->tree = malloc(hf->tree_avail * sizeof(hf->tree[0]));
2570 		if (hf->tree == NULL)
2571 			return (ARCHIVE_FATAL);
2572 	}
2573 	hf->len_size = (int)len_size;
2574 	hf->tbl_bits = tbl_bits;
2575 	return (ARCHIVE_OK);
2576 }
2577 
2578 static void
lzh_huffman_free(struct huffman * hf)2579 lzh_huffman_free(struct huffman *hf)
2580 {
2581 	free(hf->bitlen);
2582 	free(hf->tbl);
2583 	free(hf->tree);
2584 }
2585 
2586 static const char bitlen_tbl[0x400] = {
2587 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2588 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2589 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2590 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2591 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2592 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2593 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2594 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2595 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2596 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2597 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2598 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2599 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2600 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2601 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2602 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2603 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2604 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2605 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2606 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2607 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2608 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2609 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2610 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2611 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2612 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2613 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2614 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2615 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2616 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2617 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2618 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2619 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2620 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2621 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2622 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2623 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2624 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2625 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2626 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2627 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2628 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2629 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2630 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2631 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2632 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2633 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2634 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2635 	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2636 	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2637 	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2638 	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2639 	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2640 	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2641 	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2642 	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2643 	10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2644 	10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2645 	10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2646 	10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2647 	11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
2648 	11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
2649 	12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
2650 	13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 16,  0
2651 };
2652 static int
lzh_read_pt_bitlen(struct lzh_stream * strm,int start,int end)2653 lzh_read_pt_bitlen(struct lzh_stream *strm, int start, int end)
2654 {
2655 	struct lzh_dec *ds = strm->ds;
2656 	struct lzh_br *br = &(ds->br);
2657 	int c, i;
2658 
2659 	for (i = start; i < end; ) {
2660 		/*
2661 		 *  bit pattern     the number we need
2662 		 *     000           ->  0
2663 		 *     001           ->  1
2664 		 *     010           ->  2
2665 		 *     ...
2666 		 *     110           ->  6
2667 		 *     1110          ->  7
2668 		 *     11110         ->  8
2669 		 *     ...
2670 		 *     1111111111110 ->  16
2671 		 */
2672 		if (!lzh_br_read_ahead(strm, br, 3))
2673 			return (i);
2674 		if ((c = lzh_br_bits(br, 3)) == 7) {
2675 			if (!lzh_br_read_ahead(strm, br, 13))
2676 				return (i);
2677 			c = bitlen_tbl[lzh_br_bits(br, 13) & 0x3FF];
2678 			if (c)
2679 				lzh_br_consume(br, c - 3);
2680 			else
2681 				return (-1);/* Invalid data. */
2682 		} else
2683 			lzh_br_consume(br, 3);
2684 		ds->pt.bitlen[i++] = c;
2685 		ds->pt.freq[c]++;
2686 	}
2687 	return (i);
2688 }
2689 
2690 static int
lzh_make_fake_table(struct huffman * hf,uint16_t c)2691 lzh_make_fake_table(struct huffman *hf, uint16_t c)
2692 {
2693 	if (c >= hf->len_size)
2694 		return (0);
2695 	hf->tbl[0] = c;
2696 	hf->max_bits = 0;
2697 	hf->shift_bits = 0;
2698 	hf->bitlen[hf->tbl[0]] = 0;
2699 	return (1);
2700 }
2701 
2702 /*
2703  * Make a huffman coding table.
2704  */
2705 static int
lzh_make_huffman_table(struct huffman * hf)2706 lzh_make_huffman_table(struct huffman *hf)
2707 {
2708 	uint16_t *tbl;
2709 	const unsigned char *bitlen;
2710 	int bitptn[17], weight[17];
2711 	int i, maxbits = 0, ptn, tbl_size, w;
2712 	int diffbits, len_avail;
2713 
2714 	/*
2715 	 * Initialize bit patterns.
2716 	 */
2717 	ptn = 0;
2718 	for (i = 1, w = 1 << 15; i <= 16; i++, w >>= 1) {
2719 		bitptn[i] = ptn;
2720 		weight[i] = w;
2721 		if (hf->freq[i]) {
2722 			ptn += hf->freq[i] * w;
2723 			maxbits = i;
2724 		}
2725 	}
2726 	if (ptn != 0x10000 || maxbits > hf->tbl_bits)
2727 		return (0);/* Invalid */
2728 
2729 	hf->max_bits = maxbits;
2730 
2731 	/*
2732 	 * Cut out extra bits which we won't house in the table.
2733 	 * This preparation reduces the same calculation in the for-loop
2734 	 * making the table.
2735 	 */
2736 	if (maxbits < 16) {
2737 		int ebits = 16 - maxbits;
2738 		for (i = 1; i <= maxbits; i++) {
2739 			bitptn[i] >>= ebits;
2740 			weight[i] >>= ebits;
2741 		}
2742 	}
2743 	if (maxbits > HTBL_BITS) {
2744 		unsigned htbl_max;
2745 		uint16_t *p;
2746 
2747 		diffbits = maxbits - HTBL_BITS;
2748 		for (i = 1; i <= HTBL_BITS; i++) {
2749 			bitptn[i] >>= diffbits;
2750 			weight[i] >>= diffbits;
2751 		}
2752 		htbl_max = bitptn[HTBL_BITS] +
2753 		    weight[HTBL_BITS] * hf->freq[HTBL_BITS];
2754 		p = &(hf->tbl[htbl_max]);
2755 		while (p < &hf->tbl[1U<<HTBL_BITS])
2756 			*p++ = 0;
2757 	} else
2758 		diffbits = 0;
2759 	hf->shift_bits = diffbits;
2760 
2761 	/*
2762 	 * Make the table.
2763 	 */
2764 	tbl_size = 1 << HTBL_BITS;
2765 	tbl = hf->tbl;
2766 	bitlen = hf->bitlen;
2767 	len_avail = hf->len_avail;
2768 	hf->tree_used = 0;
2769 	for (i = 0; i < len_avail; i++) {
2770 		uint16_t *p;
2771 		int len, cnt;
2772 		uint16_t bit;
2773 		int extlen;
2774 		struct htree_t *ht;
2775 
2776 		if (bitlen[i] == 0)
2777 			continue;
2778 		/* Get a bit pattern */
2779 		len = bitlen[i];
2780 		ptn = bitptn[len];
2781 		cnt = weight[len];
2782 		if (len <= HTBL_BITS) {
2783 			/* Calculate next bit pattern */
2784 			if ((bitptn[len] = ptn + cnt) > tbl_size)
2785 				return (0);/* Invalid */
2786 			/* Update the table */
2787 			p = &(tbl[ptn]);
2788 			if (cnt > 7) {
2789 				uint16_t *pc;
2790 
2791 				cnt -= 8;
2792 				pc = &p[cnt];
2793 				pc[0] = (uint16_t)i;
2794 				pc[1] = (uint16_t)i;
2795 				pc[2] = (uint16_t)i;
2796 				pc[3] = (uint16_t)i;
2797 				pc[4] = (uint16_t)i;
2798 				pc[5] = (uint16_t)i;
2799 				pc[6] = (uint16_t)i;
2800 				pc[7] = (uint16_t)i;
2801 				if (cnt > 7) {
2802 					cnt -= 8;
2803 					memcpy(&p[cnt], pc,
2804 						8 * sizeof(uint16_t));
2805 					pc = &p[cnt];
2806 					while (cnt > 15) {
2807 						cnt -= 16;
2808 						memcpy(&p[cnt], pc,
2809 							16 * sizeof(uint16_t));
2810 					}
2811 				}
2812 				if (cnt)
2813 					memcpy(p, pc, cnt * sizeof(uint16_t));
2814 			} else {
2815 				while (cnt > 1) {
2816 					p[--cnt] = (uint16_t)i;
2817 					p[--cnt] = (uint16_t)i;
2818 				}
2819 				if (cnt)
2820 					p[--cnt] = (uint16_t)i;
2821 			}
2822 			continue;
2823 		}
2824 
2825 		/*
2826 		 * A bit length is too big to be housed to a direct table,
2827 		 * so we use a tree model for its extra bits.
2828 		 */
2829 		bitptn[len] = ptn + cnt;
2830 		bit = 1U << (diffbits -1);
2831 		extlen = len - HTBL_BITS;
2832 
2833 		p = &(tbl[ptn >> diffbits]);
2834 		if (*p == 0) {
2835 			*p = len_avail + hf->tree_used;
2836 			ht = &(hf->tree[hf->tree_used++]);
2837 			if (hf->tree_used > hf->tree_avail)
2838 				return (0);/* Invalid */
2839 			ht->left = 0;
2840 			ht->right = 0;
2841 		} else {
2842 			if (*p < len_avail ||
2843 			    *p >= (len_avail + hf->tree_used))
2844 				return (0);/* Invalid */
2845 			ht = &(hf->tree[*p - len_avail]);
2846 		}
2847 		while (--extlen > 0) {
2848 			if (ptn & bit) {
2849 				if (ht->left < len_avail) {
2850 					ht->left = len_avail + hf->tree_used;
2851 					ht = &(hf->tree[hf->tree_used++]);
2852 					if (hf->tree_used > hf->tree_avail)
2853 						return (0);/* Invalid */
2854 					ht->left = 0;
2855 					ht->right = 0;
2856 				} else {
2857 					ht = &(hf->tree[ht->left - len_avail]);
2858 				}
2859 			} else {
2860 				if (ht->right < len_avail) {
2861 					ht->right = len_avail + hf->tree_used;
2862 					ht = &(hf->tree[hf->tree_used++]);
2863 					if (hf->tree_used > hf->tree_avail)
2864 						return (0);/* Invalid */
2865 					ht->left = 0;
2866 					ht->right = 0;
2867 				} else {
2868 					ht = &(hf->tree[ht->right - len_avail]);
2869 				}
2870 			}
2871 			bit >>= 1;
2872 		}
2873 		if (ptn & bit) {
2874 			if (ht->left != 0)
2875 				return (0);/* Invalid */
2876 			ht->left = (uint16_t)i;
2877 		} else {
2878 			if (ht->right != 0)
2879 				return (0);/* Invalid */
2880 			ht->right = (uint16_t)i;
2881 		}
2882 	}
2883 	return (1);
2884 }
2885 
2886 static int
lzh_decode_huffman_tree(struct huffman * hf,unsigned rbits,int c)2887 lzh_decode_huffman_tree(struct huffman *hf, unsigned rbits, int c)
2888 {
2889 	struct htree_t *ht;
2890 	int extlen;
2891 
2892 	ht = hf->tree;
2893 	extlen = hf->shift_bits;
2894 	while (c >= hf->len_avail) {
2895 		c -= hf->len_avail;
2896 		if (extlen-- <= 0 || c >= hf->tree_used)
2897 			return (0);
2898 		if (rbits & (1U << extlen))
2899 			c = ht[c].left;
2900 		else
2901 			c = ht[c].right;
2902 	}
2903 	return (c);
2904 }
2905 
2906 static inline int
lzh_decode_huffman(struct huffman * hf,unsigned rbits)2907 lzh_decode_huffman(struct huffman *hf, unsigned rbits)
2908 {
2909 	int c;
2910 	/*
2911 	 * At first search an index table for a bit pattern.
2912 	 * If it fails, search a huffman tree for.
2913 	 */
2914 	c = hf->tbl[rbits >> hf->shift_bits];
2915 	if (c < hf->len_avail || hf->len_avail == 0)
2916 		return (c);
2917 	/* This bit pattern needs to be found out at a huffman tree. */
2918 	return (lzh_decode_huffman_tree(hf, rbits, c));
2919 }
2920 
2921