xref: /freebsd/contrib/libarchive/libarchive/archive_read_support_format_lha.c (revision 2e113ef82465598b8c26e0ca415fbe90677fbd47)
1 /*-
2  * Copyright (c) 2008-2014 Michihiro NAKAJIMA
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #include "archive_platform.h"
27 
28 #ifdef HAVE_ERRNO_H
29 #include <errno.h>
30 #endif
31 #ifdef HAVE_LIMITS_H
32 #include <limits.h>
33 #endif
34 #ifdef HAVE_STDLIB_H
35 #include <stdlib.h>
36 #endif
37 #ifdef HAVE_STRING_H
38 #include <string.h>
39 #endif
40 
41 #include "archive.h"
42 #include "archive_entry.h"
43 #include "archive_entry_locale.h"
44 #include "archive_private.h"
45 #include "archive_read_private.h"
46 #include "archive_time_private.h"
47 #include "archive_endian.h"
48 
49 
50 #define MAXMATCH		256	/* Maximum match length. */
51 #define MINMATCH		3	/* Minimum match length. */
52 /*
53  * Literal table format:
54  * +0              +256                      +510
55  * +---------------+-------------------------+
56  * | literal code  |       match length      |
57  * |   0 ... 255   |  MINMATCH ... MAXMATCH  |
58  * +---------------+-------------------------+
59  *  <---          LT_BITLEN_SIZE         --->
60  */
61 /* Literal table size. */
62 #define LT_BITLEN_SIZE		(UCHAR_MAX + 1 + MAXMATCH - MINMATCH + 1)
63 /* Position table size.
64  * Note: this used for both position table and pre literal table.*/
65 #define PT_BITLEN_SIZE		(3 + 16)
66 
67 struct lzh_dec {
68 	/* Decoding status. */
69 	int     		 state;
70 
71 	/*
72 	 * Window to see last 8Ki(lh5),32Ki(lh6),64Ki(lh7) bytes of decoded
73 	 * data.
74 	 */
75 	int			 w_size;
76 	int			 w_mask;
77 	/* Window buffer, which is a loop buffer. */
78 	unsigned char		*w_buff;
79 	/* The insert position to the window. */
80 	int			 w_pos;
81 	/* The position where we can copy decoded code from the window. */
82 	int     		 copy_pos;
83 	/* The length how many bytes we can copy decoded code from
84 	 * the window. */
85 	int     		 copy_len;
86 
87 	/*
88 	 * Bit stream reader.
89 	 */
90 	struct lzh_br {
91 #define CACHE_TYPE		uint64_t
92 #define CACHE_BITS		(8 * sizeof(CACHE_TYPE))
93 	 	/* Cache buffer. */
94 		CACHE_TYPE	 cache_buffer;
95 		/* Indicates how many bits avail in cache_buffer. */
96 		int		 cache_avail;
97 	} br;
98 
99 	/*
100 	 * Huffman coding.
101 	 */
102 	struct huffman {
103 		int		 len_size;
104 		int		 len_avail;
105 		int		 len_bits;
106 		int		 freq[17];
107 		unsigned char	*bitlen;
108 
109 		/*
110 		 * Use a index table. It's faster than searching a huffman
111 		 * coding tree, which is a binary tree. But a use of a large
112 		 * index table causes L1 cache read miss many times.
113 		 */
114 #define HTBL_BITS	10
115 		int		 max_bits;
116 		int		 shift_bits;
117 		int		 tbl_bits;
118 		int		 tree_used;
119 		int		 tree_avail;
120 		/* Direct access table. */
121 		uint16_t	*tbl;
122 		/* Binary tree table for extra bits over the direct access. */
123 		struct htree_t {
124 			uint16_t left;
125 			uint16_t right;
126 		}		*tree;
127 	}			 lt, pt;
128 
129 	int			 blocks_avail;
130 	int			 pos_pt_len_size;
131 	int			 pos_pt_len_bits;
132 	int			 literal_pt_len_size;
133 	int			 literal_pt_len_bits;
134 	int			 reading_position;
135 	int			 loop;
136 	int			 error;
137 };
138 
139 struct lzh_stream {
140 	const unsigned char	*next_in;
141 	int			 avail_in;
142 	int64_t			 total_in;
143 	const unsigned char	*ref_ptr;
144 	int			 avail_out;
145 	int64_t			 total_out;
146 	struct lzh_dec		*ds;
147 };
148 
149 struct lha {
150 	/* entry_bytes_remaining is the number of bytes we expect.	    */
151 	int64_t                  entry_offset;
152 	int64_t                  entry_bytes_remaining;
153 	int64_t			 entry_unconsumed;
154 	uint16_t		 entry_crc_calculated;
155 
156 	size_t			 header_size;	/* header size		    */
157 	unsigned char		 level;		/* header level		    */
158 	char			 method[3];	/* compress type	    */
159 	int64_t			 compsize;	/* compressed data size	    */
160 	int64_t			 origsize;	/* original file size	    */
161 	int			 setflag;
162 #define BIRTHTIME_IS_SET	1
163 #define ATIME_IS_SET		2
164 #define UNIX_MODE_IS_SET	4
165 #define CRC_IS_SET		8
166 	int64_t			 birthtime;
167 	uint32_t		 birthtime_tv_nsec;
168 	int64_t			 mtime;
169 	uint32_t		 mtime_tv_nsec;
170 	int64_t			 atime;
171 	uint32_t		 atime_tv_nsec;
172 	mode_t			 mode;
173 	int64_t			 uid;
174 	int64_t			 gid;
175 	struct archive_string 	 uname;
176 	struct archive_string 	 gname;
177 	uint16_t		 header_crc;
178 	uint16_t		 crc;
179 	/* dirname and filename could be in different codepages */
180 	struct archive_string_conv *sconv_dir;
181 	struct archive_string_conv *sconv_fname;
182 	struct archive_string_conv *opt_sconv;
183 
184 	struct archive_string 	 dirname;
185 	struct archive_string 	 filename;
186 	struct archive_wstring	 ws;
187 
188 	unsigned char		 dos_attr;
189 
190 	/* Flag to mark progress that an archive was read their first header.*/
191 	char			 found_first_header;
192 	/* Flag to mark that indicates an empty directory. */
193 	char			 directory;
194 
195 	/* Flags to mark progress of decompression. */
196 	char			 decompress_init;
197 	char			 end_of_entry;
198 	char			 end_of_entry_cleanup;
199 	char			 entry_is_compressed;
200 
201 	char			 format_name[64];
202 
203 	struct lzh_stream	 strm;
204 };
205 
206 /*
207  * LHA header common member offset.
208  */
209 #define H_METHOD_OFFSET	2	/* Compress type. */
210 #define H_ATTR_OFFSET	19	/* DOS attribute. */
211 #define H_LEVEL_OFFSET	20	/* Header Level.  */
212 #define H_SIZE		22	/* Minimum header size. */
213 
214 static int      archive_read_format_lha_bid(struct archive_read *, int);
215 static int      archive_read_format_lha_options(struct archive_read *,
216 		    const char *, const char *);
217 static int	archive_read_format_lha_read_header(struct archive_read *,
218 		    struct archive_entry *);
219 static int	archive_read_format_lha_read_data(struct archive_read *,
220 		    const void **, size_t *, int64_t *);
221 static int	archive_read_format_lha_read_data_skip(struct archive_read *);
222 static int	archive_read_format_lha_cleanup(struct archive_read *);
223 
224 static void	lha_replace_path_separator(struct lha *,
225 		    struct archive_entry *);
226 static int	lha_read_file_header_0(struct archive_read *, struct lha *);
227 static int	lha_read_file_header_1(struct archive_read *, struct lha *);
228 static int	lha_read_file_header_2(struct archive_read *, struct lha *);
229 static int	lha_read_file_header_3(struct archive_read *, struct lha *);
230 static int	lha_read_file_extended_header(struct archive_read *,
231 		    struct lha *, uint16_t *, int, uint64_t, size_t *);
232 static size_t	lha_check_header_format(const void *);
233 static int	lha_skip_sfx(struct archive_read *);
234 static unsigned char	lha_calcsum(unsigned char, const void *,
235 		    int, size_t);
236 static int	lha_parse_linkname(struct archive_wstring *,
237 		    struct archive_wstring *);
238 static int	lha_read_data_none(struct archive_read *, const void **,
239 		    size_t *, int64_t *);
240 static int	lha_read_data_lzh(struct archive_read *, const void **,
241 		    size_t *, int64_t *);
242 static void	lha_crc16_init(void);
243 static uint16_t lha_crc16(uint16_t, const void *, size_t);
244 static int	lzh_decode_init(struct lzh_stream *, const char *);
245 static void	lzh_decode_free(struct lzh_stream *);
246 static int	lzh_decode(struct lzh_stream *, int);
247 static int	lzh_br_fillup(struct lzh_stream *, struct lzh_br *);
248 static int	lzh_huffman_init(struct huffman *, size_t, int);
249 static void	lzh_huffman_free(struct huffman *);
250 static int	lzh_read_pt_bitlen(struct lzh_stream *, int start, int end);
251 static int	lzh_make_fake_table(struct huffman *, uint16_t);
252 static int	lzh_make_huffman_table(struct huffman *);
253 static inline int lzh_decode_huffman(struct huffman *, unsigned);
254 static int	lzh_decode_huffman_tree(struct huffman *, unsigned, int);
255 
256 
257 int
archive_read_support_format_lha(struct archive * _a)258 archive_read_support_format_lha(struct archive *_a)
259 {
260 	struct archive_read *a = (struct archive_read *)_a;
261 	struct lha *lha;
262 	int r;
263 
264 	archive_check_magic(_a, ARCHIVE_READ_MAGIC,
265 	    ARCHIVE_STATE_NEW, "archive_read_support_format_lha");
266 
267 	lha = calloc(1, sizeof(*lha));
268 	if (lha == NULL) {
269 		archive_set_error(&a->archive, ENOMEM,
270 		    "Can't allocate lha data");
271 		return (ARCHIVE_FATAL);
272 	}
273 	archive_string_init(&lha->ws);
274 
275 	r = __archive_read_register_format(a,
276 	    lha,
277 	    "lha",
278 	    archive_read_format_lha_bid,
279 	    archive_read_format_lha_options,
280 	    archive_read_format_lha_read_header,
281 	    archive_read_format_lha_read_data,
282 	    archive_read_format_lha_read_data_skip,
283 	    NULL,
284 	    archive_read_format_lha_cleanup,
285 	    NULL,
286 	    NULL);
287 
288 	if (r != ARCHIVE_OK)
289 		free(lha);
290 	return (ARCHIVE_OK);
291 }
292 
293 static size_t
lha_check_header_format(const void * h)294 lha_check_header_format(const void *h)
295 {
296 	const unsigned char *p = h;
297 	size_t next_skip_bytes;
298 
299 	switch (p[H_METHOD_OFFSET+3]) {
300 	/*
301 	 * "-lh0-" ... "-lh7-" "-lhd-"
302 	 * "-lzs-" "-lz5-"
303 	 */
304 	case '0': case '1': case '2': case '3':
305 	case '4': case '5': case '6': case '7':
306 	case 'd':
307 	case 's':
308 		next_skip_bytes = 4;
309 
310 		/* b0 == 0 means the end of an LHa archive file.	*/
311 		if (p[0] == 0)
312 			break;
313 		if (p[H_METHOD_OFFSET] != '-' || p[H_METHOD_OFFSET+1] != 'l'
314 		    ||  p[H_METHOD_OFFSET+4] != '-')
315 			break;
316 
317 		if (p[H_METHOD_OFFSET+2] == 'h') {
318 			/* "-lh?-" */
319 			if (p[H_METHOD_OFFSET+3] == 's')
320 				break;
321 			if (p[H_LEVEL_OFFSET] == 0)
322 				return (0);
323 			if (p[H_LEVEL_OFFSET] <= 3 && p[H_ATTR_OFFSET] == 0x20)
324 				return (0);
325 		}
326 		if (p[H_METHOD_OFFSET+2] == 'z') {
327 			/* LArc extensions: -lzs-,-lz4- and -lz5- */
328 			if (p[H_LEVEL_OFFSET] != 0)
329 				break;
330 			if (p[H_METHOD_OFFSET+3] == 's'
331 			    || p[H_METHOD_OFFSET+3] == '4'
332 			    || p[H_METHOD_OFFSET+3] == '5')
333 				return (0);
334 		}
335 		break;
336 	case 'h': next_skip_bytes = 1; break;
337 	case 'z': next_skip_bytes = 1; break;
338 	case 'l': next_skip_bytes = 2; break;
339 	case '-': next_skip_bytes = 3; break;
340 	default : next_skip_bytes = 4; break;
341 	}
342 
343 	return (next_skip_bytes);
344 }
345 
346 static int
archive_read_format_lha_bid(struct archive_read * a,int best_bid)347 archive_read_format_lha_bid(struct archive_read *a, int best_bid)
348 {
349 	const char *p;
350 	const void *buff;
351 	ssize_t bytes_avail, offset, window;
352 	size_t next;
353 
354 	/* If there's already a better bid than we can ever
355 	   make, don't bother testing. */
356 	if (best_bid > 30)
357 		return (-1);
358 
359 	if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL)
360 		return (-1);
361 
362 	if (lha_check_header_format(p) == 0)
363 		return (30);
364 
365 	if (p[0] == 'M' && p[1] == 'Z') {
366 		/* PE file */
367 		offset = 0;
368 		window = 4096;
369 		while (offset < (1024 * 20)) {
370 			buff = __archive_read_ahead(a, offset + window,
371 			    &bytes_avail);
372 			if (buff == NULL) {
373 				/* Remaining bytes are less than window. */
374 				window >>= 1;
375 				if (window < (H_SIZE + 3))
376 					return (0);
377 				continue;
378 			}
379 			p = (const char *)buff + offset;
380 			while (p + H_SIZE < (const char *)buff + bytes_avail) {
381 				if ((next = lha_check_header_format(p)) == 0)
382 					return (30);
383 				p += next;
384 			}
385 			offset = p - (const char *)buff;
386 		}
387 	}
388 	return (0);
389 }
390 
391 static int
archive_read_format_lha_options(struct archive_read * a,const char * key,const char * val)392 archive_read_format_lha_options(struct archive_read *a,
393     const char *key, const char *val)
394 {
395 	struct lha *lha;
396 	int ret = ARCHIVE_FAILED;
397 
398 	lha = (struct lha *)(a->format->data);
399 	if (strcmp(key, "hdrcharset")  == 0) {
400 		if (val == NULL || val[0] == 0)
401 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
402 			    "lha: hdrcharset option needs a character-set name");
403 		else {
404 			lha->opt_sconv =
405 			    archive_string_conversion_from_charset(
406 				&a->archive, val, 0);
407 			if (lha->opt_sconv != NULL)
408 				ret = ARCHIVE_OK;
409 			else
410 				ret = ARCHIVE_FATAL;
411 		}
412 		return (ret);
413 	}
414 
415 	/* Note: The "warn" return is just to inform the options
416 	 * supervisor that we didn't handle it.  It will generate
417 	 * a suitable error if no one used this option. */
418 	return (ARCHIVE_WARN);
419 }
420 
421 static int
lha_skip_sfx(struct archive_read * a)422 lha_skip_sfx(struct archive_read *a)
423 {
424 	const void *h;
425 	const char *p, *q;
426 	size_t next, skip;
427 	ssize_t bytes, window;
428 
429 	window = 4096;
430 	for (;;) {
431 		h = __archive_read_ahead(a, window, &bytes);
432 		if (h == NULL) {
433 			/* Remaining bytes are less than window. */
434 			window >>= 1;
435 			if (window < (H_SIZE + 3))
436 				goto fatal;
437 			continue;
438 		}
439 		if (bytes < H_SIZE)
440 			goto fatal;
441 		p = h;
442 		q = p + bytes;
443 
444 		/*
445 		 * Scan ahead until we find something that looks
446 		 * like the lha header.
447 		 */
448 		while (p + H_SIZE < q) {
449 			if ((next = lha_check_header_format(p)) == 0) {
450 				skip = p - (const char *)h;
451 				__archive_read_consume(a, skip);
452 				return (ARCHIVE_OK);
453 			}
454 			p += next;
455 		}
456 		skip = p - (const char *)h;
457 		__archive_read_consume(a, skip);
458 	}
459 fatal:
460 	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
461 	    "Couldn't find out LHa header");
462 	return (ARCHIVE_FATAL);
463 }
464 
465 static int
truncated_error(struct archive_read * a)466 truncated_error(struct archive_read *a)
467 {
468 	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
469 	    "Truncated LHa header");
470 	return (ARCHIVE_FATAL);
471 }
472 
473 static int
archive_read_format_lha_read_header(struct archive_read * a,struct archive_entry * entry)474 archive_read_format_lha_read_header(struct archive_read *a,
475     struct archive_entry *entry)
476 {
477 	struct archive_wstring linkname;
478 	struct archive_wstring pathname;
479 	struct lha *lha;
480 	const unsigned char *p;
481 	const char *signature;
482 	int err;
483 	struct archive_mstring conv_buffer;
484 	const wchar_t *conv_buffer_p;
485 
486 	lha_crc16_init();
487 
488 	a->archive.archive_format = ARCHIVE_FORMAT_LHA;
489 	if (a->archive.archive_format_name == NULL)
490 		a->archive.archive_format_name = "lha";
491 
492 	lha = (struct lha *)(a->format->data);
493 	lha->decompress_init = 0;
494 	lha->end_of_entry = 0;
495 	lha->end_of_entry_cleanup = 0;
496 	lha->entry_unconsumed = 0;
497 
498 	if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL) {
499 		/*
500 		 * LHa archiver added 0 to the tail of its archive file as
501 		 * the mark of the end of the archive.
502 		 */
503 		signature = __archive_read_ahead(a, sizeof(signature[0]), NULL);
504 		if (signature == NULL || signature[0] == 0)
505 			return (ARCHIVE_EOF);
506 		return (truncated_error(a));
507 	}
508 
509 	signature = (const char *)p;
510 	if (lha->found_first_header == 0 &&
511 	    signature[0] == 'M' && signature[1] == 'Z') {
512                 /* This is an executable?  Must be self-extracting... 	*/
513 		err = lha_skip_sfx(a);
514 		if (err < ARCHIVE_WARN)
515 			return (err);
516 
517 		if ((p = __archive_read_ahead(a, sizeof(*p), NULL)) == NULL)
518 			return (truncated_error(a));
519 		signature = (const char *)p;
520 	}
521 	/* signature[0] == 0 means the end of an LHa archive file. */
522 	if (signature[0] == 0)
523 		return (ARCHIVE_EOF);
524 
525 	/*
526 	 * Check the header format and method type.
527 	 */
528 	if (lha_check_header_format(p) != 0) {
529 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
530 		    "Bad LHa file");
531 		return (ARCHIVE_FATAL);
532 	}
533 
534 	/* We've found the first header. */
535 	lha->found_first_header = 1;
536 	/* Set a default value and common data */
537 	lha->header_size = 0;
538 	lha->level = p[H_LEVEL_OFFSET];
539 	lha->method[0] = p[H_METHOD_OFFSET+1];
540 	lha->method[1] = p[H_METHOD_OFFSET+2];
541 	lha->method[2] = p[H_METHOD_OFFSET+3];
542 	if (memcmp(lha->method, "lhd", 3) == 0)
543 		lha->directory = 1;
544 	else
545 		lha->directory = 0;
546 	if (memcmp(lha->method, "lh0", 3) == 0 ||
547 	    memcmp(lha->method, "lz4", 3) == 0)
548 		lha->entry_is_compressed = 0;
549 	else
550 		lha->entry_is_compressed = 1;
551 
552 	lha->compsize = 0;
553 	lha->origsize = 0;
554 	lha->setflag = 0;
555 	lha->birthtime = 0;
556 	lha->birthtime_tv_nsec = 0;
557 	lha->mtime = 0;
558 	lha->mtime_tv_nsec = 0;
559 	lha->atime = 0;
560 	lha->atime_tv_nsec = 0;
561 	lha->mode = (lha->directory)? 0777 : 0666;
562 	lha->uid = 0;
563 	lha->gid = 0;
564 	archive_string_empty(&lha->dirname);
565 	archive_string_empty(&lha->filename);
566 	lha->dos_attr = 0;
567 	if (lha->opt_sconv != NULL) {
568 		lha->sconv_dir = lha->opt_sconv;
569 		lha->sconv_fname = lha->opt_sconv;
570 	} else {
571 		lha->sconv_dir = NULL;
572 		lha->sconv_fname = NULL;
573 	}
574 
575 	switch (p[H_LEVEL_OFFSET]) {
576 	case 0:
577 		err = lha_read_file_header_0(a, lha);
578 		break;
579 	case 1:
580 		err = lha_read_file_header_1(a, lha);
581 		break;
582 	case 2:
583 		err = lha_read_file_header_2(a, lha);
584 		break;
585 	case 3:
586 		err = lha_read_file_header_3(a, lha);
587 		break;
588 	default:
589 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
590 		    "Unsupported LHa header level %d", p[H_LEVEL_OFFSET]);
591 		err = ARCHIVE_FATAL;
592 		break;
593 	}
594 	if (err < ARCHIVE_WARN)
595 		return (err);
596 
597 
598 	if (!lha->directory && archive_strlen(&lha->filename) == 0)
599 		/* The filename has not been set */
600 		return (truncated_error(a));
601 
602 	/*
603 	 * Make a pathname from a dirname and a filename, after converting to Unicode.
604 	 * This is because codepages might differ between dirname and filename.
605 	*/
606 	archive_string_init(&pathname);
607 	archive_string_init(&linkname);
608 	archive_string_init(&conv_buffer.aes_mbs);
609 	archive_string_init(&conv_buffer.aes_mbs_in_locale);
610 	archive_string_init(&conv_buffer.aes_utf8);
611 	archive_string_init(&conv_buffer.aes_wcs);
612 	if (0 != archive_mstring_copy_mbs_len_l(&conv_buffer, lha->dirname.s, lha->dirname.length, lha->sconv_dir)) {
613 		archive_set_error(&a->archive,
614 			ARCHIVE_ERRNO_FILE_FORMAT,
615 			"Pathname cannot be converted "
616 			"from %s to Unicode.",
617 			archive_string_conversion_charset_name(lha->sconv_dir));
618 		err = ARCHIVE_FATAL;
619 	} else if (0 != archive_mstring_get_wcs(&a->archive, &conv_buffer, &conv_buffer_p))
620 		err = ARCHIVE_FATAL;
621 	if (err == ARCHIVE_FATAL) {
622 		archive_mstring_clean(&conv_buffer);
623 		archive_wstring_free(&pathname);
624 		archive_wstring_free(&linkname);
625 		return (err);
626 	}
627 	archive_wstring_copy(&pathname, &conv_buffer.aes_wcs);
628 
629 	archive_string_empty(&conv_buffer.aes_mbs);
630 	archive_string_empty(&conv_buffer.aes_mbs_in_locale);
631 	archive_string_empty(&conv_buffer.aes_utf8);
632 	archive_wstring_empty(&conv_buffer.aes_wcs);
633 	if (0 != archive_mstring_copy_mbs_len_l(&conv_buffer, lha->filename.s, lha->filename.length, lha->sconv_fname)) {
634 		archive_set_error(&a->archive,
635 			ARCHIVE_ERRNO_FILE_FORMAT,
636 			"Pathname cannot be converted "
637 			"from %s to Unicode.",
638 			archive_string_conversion_charset_name(lha->sconv_fname));
639 		err = ARCHIVE_FATAL;
640 	}
641 	else if (0 != archive_mstring_get_wcs(&a->archive, &conv_buffer, &conv_buffer_p))
642 		err = ARCHIVE_FATAL;
643 	if (err == ARCHIVE_FATAL) {
644 		archive_mstring_clean(&conv_buffer);
645 		archive_wstring_free(&pathname);
646 		archive_wstring_free(&linkname);
647 		return (err);
648 	}
649 	archive_wstring_concat(&pathname, &conv_buffer.aes_wcs);
650 	archive_mstring_clean(&conv_buffer);
651 
652 	if ((lha->mode & AE_IFMT) == AE_IFLNK) {
653 		/*
654 	 	 * Extract the symlink-name if it's included in the pathname.
655 	 	 */
656 		if (!lha_parse_linkname(&linkname, &pathname)) {
657 			/* We couldn't get the symlink-name. */
658 			archive_set_error(&a->archive,
659 		    	    ARCHIVE_ERRNO_FILE_FORMAT,
660 			    "Unknown symlink-name");
661 			archive_wstring_free(&pathname);
662 			archive_wstring_free(&linkname);
663 			return (ARCHIVE_FAILED);
664 		}
665 	} else {
666 		/*
667 		 * Make sure a file-type is set.
668 		 * The mode has been overridden if it is in the extended data.
669 		 */
670 		lha->mode = (lha->mode & ~AE_IFMT) |
671 		    ((lha->directory)? AE_IFDIR: AE_IFREG);
672 	}
673 	if ((lha->setflag & UNIX_MODE_IS_SET) == 0 &&
674 	    (lha->dos_attr & 1) != 0)
675 		lha->mode &= ~(0222);/* read only. */
676 
677 	/*
678 	 * Set basic file parameters.
679 	 */
680 	archive_entry_copy_pathname_w(entry, pathname.s);
681 	archive_wstring_free(&pathname);
682 	if (archive_strlen(&linkname) > 0) {
683 		archive_entry_copy_symlink_w(entry, linkname.s);
684 	} else
685 		archive_entry_set_symlink(entry, NULL);
686 	archive_wstring_free(&linkname);
687 	/*
688 	 * When a header level is 0, there is a possibility that
689 	 * a pathname and a symlink has '\' character, a directory
690 	 * separator in DOS/Windows. So we should convert it to '/'.
691 	 */
692 	if (p[H_LEVEL_OFFSET] == 0)
693 		lha_replace_path_separator(lha, entry);
694 
695 	archive_entry_set_mode(entry, lha->mode);
696 	archive_entry_set_uid(entry, lha->uid);
697 	archive_entry_set_gid(entry, lha->gid);
698 	if (archive_strlen(&lha->uname) > 0)
699 		archive_entry_set_uname(entry, lha->uname.s);
700 	if (archive_strlen(&lha->gname) > 0)
701 		archive_entry_set_gname(entry, lha->gname.s);
702 	if (lha->setflag & BIRTHTIME_IS_SET) {
703 		archive_entry_set_birthtime(entry, lha->birthtime,
704 		    lha->birthtime_tv_nsec);
705 		archive_entry_set_ctime(entry, lha->birthtime,
706 		    lha->birthtime_tv_nsec);
707 	} else {
708 		archive_entry_unset_birthtime(entry);
709 		archive_entry_unset_ctime(entry);
710 	}
711 	archive_entry_set_mtime(entry, lha->mtime, lha->mtime_tv_nsec);
712 	if (lha->setflag & ATIME_IS_SET)
713 		archive_entry_set_atime(entry, lha->atime,
714 		    lha->atime_tv_nsec);
715 	else
716 		archive_entry_unset_atime(entry);
717 	if (lha->directory || archive_entry_symlink(entry) != NULL)
718 		archive_entry_unset_size(entry);
719 	else
720 		archive_entry_set_size(entry, lha->origsize);
721 
722 	/*
723 	 * Prepare variables used to read a file content.
724 	 */
725 	lha->entry_bytes_remaining = lha->compsize;
726 	if (lha->entry_bytes_remaining < 0) {
727 		archive_set_error(&a->archive,
728 		    ARCHIVE_ERRNO_FILE_FORMAT,
729 		    "Invalid LHa entry size");
730 		return (ARCHIVE_FATAL);
731 	}
732 	lha->entry_offset = 0;
733 	lha->entry_crc_calculated = 0;
734 
735 	/*
736 	 * This file does not have a content.
737 	 */
738 	if (lha->directory || lha->compsize == 0)
739 		lha->end_of_entry = 1;
740 
741 	snprintf(lha->format_name, sizeof(lha->format_name), "lha -%c%c%c-",
742 	    lha->method[0], lha->method[1], lha->method[2]);
743 	a->archive.archive_format_name = lha->format_name;
744 
745 	return (err);
746 }
747 
748 /*
749  * Replace a DOS path separator '\' by a character '/'.
750  * Some multi-byte character set have  a character '\' in its second byte.
751  */
752 static void
lha_replace_path_separator(struct lha * lha,struct archive_entry * entry)753 lha_replace_path_separator(struct lha *lha, struct archive_entry *entry)
754 {
755 	const wchar_t *wp;
756 	size_t i;
757 
758 	if ((wp = archive_entry_pathname_w(entry)) != NULL) {
759 		archive_wstrcpy(&(lha->ws), wp);
760 		for (i = 0; i < archive_strlen(&(lha->ws)); i++) {
761 			if (lha->ws.s[i] == L'\\')
762 				lha->ws.s[i] = L'/';
763 		}
764 		archive_entry_copy_pathname_w(entry, lha->ws.s);
765 	}
766 
767 	if ((wp = archive_entry_symlink_w(entry)) != NULL) {
768 		archive_wstrcpy(&(lha->ws), wp);
769 		for (i = 0; i < archive_strlen(&(lha->ws)); i++) {
770 			if (lha->ws.s[i] == L'\\')
771 				lha->ws.s[i] = L'/';
772 		}
773 		archive_entry_copy_symlink_w(entry, lha->ws.s);
774 	}
775 }
776 
777 /*
778  * Header 0 format
779  *
780  * +0              +1         +2               +7                  +11
781  * +---------------+----------+----------------+-------------------+
782  * |header size(*1)|header sum|compression type|compressed size(*2)|
783  * +---------------+----------+----------------+-------------------+
784  *                             <---------------------(*1)----------*
785  *
786  * +11               +15       +17       +19            +20              +21
787  * +-----------------+---------+---------+--------------+----------------+
788  * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=0)|
789  * +-----------------+---------+---------+--------------+----------------+
790  * *--------------------------------(*1)---------------------------------*
791  *
792  * +21             +22       +22+(*3)   +22+(*3)+2       +22+(*3)+2+(*4)
793  * +---------------+---------+----------+----------------+------------------+
794  * |name length(*3)|file name|file CRC16|extra header(*4)|  compressed data |
795  * +---------------+---------+----------+----------------+------------------+
796  *                  <--(*3)->                             <------(*2)------>
797  * *----------------------(*1)-------------------------->
798  *
799  */
800 #define H0_HEADER_SIZE_OFFSET	0
801 #define H0_HEADER_SUM_OFFSET	1
802 #define H0_COMP_SIZE_OFFSET	7
803 #define H0_ORIG_SIZE_OFFSET	11
804 #define H0_DOS_TIME_OFFSET	15
805 #define H0_NAME_LEN_OFFSET	21
806 #define H0_FILE_NAME_OFFSET	22
807 #define H0_FIXED_SIZE		24
808 static int
lha_read_file_header_0(struct archive_read * a,struct lha * lha)809 lha_read_file_header_0(struct archive_read *a, struct lha *lha)
810 {
811 	const unsigned char *p;
812 	int extdsize, namelen;
813 	unsigned char headersum, sum_calculated;
814 
815 	if ((p = __archive_read_ahead(a, H0_FIXED_SIZE, NULL)) == NULL)
816 		return (truncated_error(a));
817 	lha->header_size = p[H0_HEADER_SIZE_OFFSET] + 2;
818 	headersum = p[H0_HEADER_SUM_OFFSET];
819 	lha->compsize = archive_le32dec(p + H0_COMP_SIZE_OFFSET);
820 	lha->origsize = archive_le32dec(p + H0_ORIG_SIZE_OFFSET);
821 	lha->mtime = dos_to_unix(archive_le32dec(p + H0_DOS_TIME_OFFSET));
822 	namelen = p[H0_NAME_LEN_OFFSET];
823 	extdsize = (int)lha->header_size - H0_FIXED_SIZE - namelen;
824 	if ((namelen > 221 || extdsize < 0) && extdsize != -2) {
825 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
826 		    "Invalid LHa header");
827 		return (ARCHIVE_FATAL);
828 	}
829 	if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL)
830 		return (truncated_error(a));
831 
832 	archive_strncpy(&lha->filename, p + H0_FILE_NAME_OFFSET, namelen);
833 	/* When extdsize == -2, A CRC16 value is not present in the header. */
834 	if (extdsize >= 0) {
835 		lha->crc = archive_le16dec(p + H0_FILE_NAME_OFFSET + namelen);
836 		lha->setflag |= CRC_IS_SET;
837 	}
838 	sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2);
839 
840 	/* Read an extended header */
841 	if (extdsize > 0) {
842 		/* This extended data is set by 'LHa for UNIX' only.
843 		 * Maybe fixed size.
844 		 */
845 		p += H0_FILE_NAME_OFFSET + namelen + 2;
846 		if (p[0] == 'U' && extdsize == 12) {
847 			/* p[1] is a minor version. */
848 			lha->mtime = archive_le32dec(&p[2]);
849 			lha->mode = archive_le16dec(&p[6]);
850 			lha->uid = archive_le16dec(&p[8]);
851 			lha->gid = archive_le16dec(&p[10]);
852 			lha->setflag |= UNIX_MODE_IS_SET;
853 		}
854 	}
855 	__archive_read_consume(a, lha->header_size);
856 
857 	if (sum_calculated != headersum) {
858 		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
859 		    "LHa header sum error");
860 		return (ARCHIVE_FATAL);
861 	}
862 
863 	return (ARCHIVE_OK);
864 }
865 
866 /*
867  * Header 1 format
868  *
869  * +0              +1         +2               +7            +11
870  * +---------------+----------+----------------+-------------+
871  * |header size(*1)|header sum|compression type|skip size(*2)|
872  * +---------------+----------+----------------+-------------+
873  *                             <---------------(*1)----------*
874  *
875  * +11               +15       +17       +19            +20              +21
876  * +-----------------+---------+---------+--------------+----------------+
877  * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=1)|
878  * +-----------------+---------+---------+--------------+----------------+
879  * *-------------------------------(*1)----------------------------------*
880  *
881  * +21             +22       +22+(*3)   +22+(*3)+2  +22+(*3)+3  +22+(*3)+3+(*4)
882  * +---------------+---------+----------+-----------+-----------+
883  * |name length(*3)|file name|file CRC16|  creator  |padding(*4)|
884  * +---------------+---------+----------+-----------+-----------+
885  *                  <--(*3)->
886  * *----------------------------(*1)----------------------------*
887  *
888  * +22+(*3)+3+(*4)  +22+(*3)+3+(*4)+2     +22+(*3)+3+(*4)+2+(*5)
889  * +----------------+---------------------+------------------------+
890  * |next header size| extended header(*5) |     compressed data    |
891  * +----------------+---------------------+------------------------+
892  * *------(*1)-----> <--------------------(*2)-------------------->
893  */
894 #define H1_HEADER_SIZE_OFFSET	0
895 #define H1_HEADER_SUM_OFFSET	1
896 #define H1_COMP_SIZE_OFFSET	7
897 #define H1_ORIG_SIZE_OFFSET	11
898 #define H1_DOS_TIME_OFFSET	15
899 #define H1_NAME_LEN_OFFSET	21
900 #define H1_FILE_NAME_OFFSET	22
901 #define H1_FIXED_SIZE		27
902 static int
lha_read_file_header_1(struct archive_read * a,struct lha * lha)903 lha_read_file_header_1(struct archive_read *a, struct lha *lha)
904 {
905 	const unsigned char *p;
906 	size_t extdsize;
907 	int i, err, err2;
908 	int namelen, padding;
909 	unsigned char headersum, sum_calculated;
910 
911 	err = ARCHIVE_OK;
912 
913 	if ((p = __archive_read_ahead(a, H1_FIXED_SIZE, NULL)) == NULL)
914 		return (truncated_error(a));
915 
916 	lha->header_size = p[H1_HEADER_SIZE_OFFSET] + 2;
917 	headersum = p[H1_HEADER_SUM_OFFSET];
918 	/* Note: An extended header size is included in a compsize. */
919 	lha->compsize = archive_le32dec(p + H1_COMP_SIZE_OFFSET);
920 	lha->origsize = archive_le32dec(p + H1_ORIG_SIZE_OFFSET);
921 	lha->mtime = dos_to_unix(archive_le32dec(p + H1_DOS_TIME_OFFSET));
922 	namelen = p[H1_NAME_LEN_OFFSET];
923 	/* Calculate a padding size. The result will be normally 0 only(?) */
924 	padding = ((int)lha->header_size) - H1_FIXED_SIZE - namelen;
925 
926 	if (namelen > 230 || padding < 0)
927 		goto invalid;
928 
929 	if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL)
930 		return (truncated_error(a));
931 
932 	for (i = 0; i < namelen; i++) {
933 		if (p[i + H1_FILE_NAME_OFFSET] == 0xff)
934 			goto invalid;/* Invalid filename. */
935 	}
936 	archive_strncpy(&lha->filename, p + H1_FILE_NAME_OFFSET, namelen);
937 	lha->crc = archive_le16dec(p + H1_FILE_NAME_OFFSET + namelen);
938 	lha->setflag |= CRC_IS_SET;
939 
940 	sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2);
941 	/* Consume used bytes but not include `next header size' data
942 	 * since it will be consumed in lha_read_file_extended_header(). */
943 	__archive_read_consume(a, lha->header_size - 2);
944 
945 	/* Read extended headers */
946 	err2 = lha_read_file_extended_header(a, lha, NULL, 2,
947 	    (uint64_t)(lha->compsize + 2), &extdsize);
948 	if (err2 < ARCHIVE_WARN)
949 		return (err2);
950 	if (err2 < err)
951 		err = err2;
952 	/* Get a real compressed file size. */
953 	lha->compsize -= extdsize - 2;
954 
955 	if (lha->compsize < 0)
956 		goto invalid;	/* Invalid compressed file size */
957 
958 	if (sum_calculated != headersum) {
959 		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
960 		    "LHa header sum error");
961 		return (ARCHIVE_FATAL);
962 	}
963 	return (err);
964 invalid:
965 	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
966 	    "Invalid LHa header");
967 	return (ARCHIVE_FATAL);
968 }
969 
970 /*
971  * Header 2 format
972  *
973  * +0              +2               +7                  +11               +15
974  * +---------------+----------------+-------------------+-----------------+
975  * |header size(*1)|compression type|compressed size(*2)|uncompressed size|
976  * +---------------+----------------+-------------------+-----------------+
977  *  <--------------------------------(*1)---------------------------------*
978  *
979  * +15               +19          +20              +21        +23         +24
980  * +-----------------+------------+----------------+----------+-----------+
981  * |data/time(time_t)| 0x20 fixed |header level(=2)|file CRC16|  creator  |
982  * +-----------------+------------+----------------+----------+-----------+
983  * *---------------------------------(*1)---------------------------------*
984  *
985  * +24              +26                 +26+(*3)      +26+(*3)+(*4)
986  * +----------------+-------------------+-------------+-------------------+
987  * |next header size|extended header(*3)| padding(*4) |  compressed data  |
988  * +----------------+-------------------+-------------+-------------------+
989  * *--------------------------(*1)-------------------> <------(*2)------->
990  *
991  */
992 #define H2_HEADER_SIZE_OFFSET	0
993 #define H2_COMP_SIZE_OFFSET	7
994 #define H2_ORIG_SIZE_OFFSET	11
995 #define H2_TIME_OFFSET		15
996 #define H2_CRC_OFFSET		21
997 #define H2_FIXED_SIZE		24
998 static int
lha_read_file_header_2(struct archive_read * a,struct lha * lha)999 lha_read_file_header_2(struct archive_read *a, struct lha *lha)
1000 {
1001 	const unsigned char *p;
1002 	size_t extdsize;
1003 	int err, padding;
1004 	uint16_t header_crc;
1005 
1006 	if ((p = __archive_read_ahead(a, H2_FIXED_SIZE, NULL)) == NULL)
1007 		return (truncated_error(a));
1008 
1009 	lha->header_size =archive_le16dec(p + H2_HEADER_SIZE_OFFSET);
1010 	lha->compsize = archive_le32dec(p + H2_COMP_SIZE_OFFSET);
1011 	lha->origsize = archive_le32dec(p + H2_ORIG_SIZE_OFFSET);
1012 	lha->mtime = archive_le32dec(p + H2_TIME_OFFSET);
1013 	lha->crc = archive_le16dec(p + H2_CRC_OFFSET);
1014 	lha->setflag |= CRC_IS_SET;
1015 
1016 	if (lha->header_size < H2_FIXED_SIZE) {
1017 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1018 		    "Invalid LHa header size");
1019 		return (ARCHIVE_FATAL);
1020 	}
1021 
1022 	header_crc = lha_crc16(0, p, H2_FIXED_SIZE);
1023 	__archive_read_consume(a, H2_FIXED_SIZE);
1024 
1025 	/* Read extended headers */
1026 	err = lha_read_file_extended_header(a, lha, &header_crc, 2,
1027 		  lha->header_size - H2_FIXED_SIZE, &extdsize);
1028 	if (err < ARCHIVE_WARN)
1029 		return (err);
1030 
1031 	/* Calculate a padding size. The result will be normally 0 or 1. */
1032 	padding = (int)lha->header_size - (int)(H2_FIXED_SIZE + extdsize);
1033 	if (padding > 0) {
1034 		if ((p = __archive_read_ahead(a, padding, NULL)) == NULL)
1035 			return (truncated_error(a));
1036 		header_crc = lha_crc16(header_crc, p, padding);
1037 		__archive_read_consume(a, padding);
1038 	}
1039 
1040 	if (header_crc != lha->header_crc) {
1041 #ifndef DONT_FAIL_ON_CRC_ERROR
1042 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1043 		    "LHa header CRC error");
1044 		return (ARCHIVE_FATAL);
1045 #endif
1046 	}
1047 	return (err);
1048 }
1049 
1050 /*
1051  * Header 3 format
1052  *
1053  * +0           +2               +7                  +11               +15
1054  * +------------+----------------+-------------------+-----------------+
1055  * | 0x04 fixed |compression type|compressed size(*2)|uncompressed size|
1056  * +------------+----------------+-------------------+-----------------+
1057  *  <-------------------------------(*1)-------------------------------*
1058  *
1059  * +15               +19          +20              +21        +23         +24
1060  * +-----------------+------------+----------------+----------+-----------+
1061  * |date/time(time_t)| 0x20 fixed |header level(=3)|file CRC16|  creator  |
1062  * +-----------------+------------+----------------+----------+-----------+
1063  * *--------------------------------(*1)----------------------------------*
1064  *
1065  * +24             +28              +32                 +32+(*3)
1066  * +---------------+----------------+-------------------+-----------------+
1067  * |header size(*1)|next header size|extended header(*3)| compressed data |
1068  * +---------------+----------------+-------------------+-----------------+
1069  * *------------------------(*1)-----------------------> <------(*2)----->
1070  *
1071  */
1072 #define H3_FIELD_LEN_OFFSET	0
1073 #define H3_COMP_SIZE_OFFSET	7
1074 #define H3_ORIG_SIZE_OFFSET	11
1075 #define H3_TIME_OFFSET		15
1076 #define H3_CRC_OFFSET		21
1077 #define H3_HEADER_SIZE_OFFSET	24
1078 #define H3_FIXED_SIZE		28
1079 static int
lha_read_file_header_3(struct archive_read * a,struct lha * lha)1080 lha_read_file_header_3(struct archive_read *a, struct lha *lha)
1081 {
1082 	const unsigned char *p;
1083 	size_t extdsize;
1084 	int err;
1085 	uint16_t header_crc;
1086 
1087 	if ((p = __archive_read_ahead(a, H3_FIXED_SIZE, NULL)) == NULL)
1088 		return (truncated_error(a));
1089 
1090 	if (archive_le16dec(p + H3_FIELD_LEN_OFFSET) != 4)
1091 		goto invalid;
1092 	lha->header_size = archive_le32dec(p + H3_HEADER_SIZE_OFFSET);
1093 	lha->compsize = archive_le32dec(p + H3_COMP_SIZE_OFFSET);
1094 	lha->origsize = archive_le32dec(p + H3_ORIG_SIZE_OFFSET);
1095 	lha->mtime = archive_le32dec(p + H3_TIME_OFFSET);
1096 	lha->crc = archive_le16dec(p + H3_CRC_OFFSET);
1097 	lha->setflag |= CRC_IS_SET;
1098 
1099 	if (lha->header_size < H3_FIXED_SIZE + 4)
1100 		goto invalid;
1101 	header_crc = lha_crc16(0, p, H3_FIXED_SIZE);
1102 	__archive_read_consume(a, H3_FIXED_SIZE);
1103 
1104 	/* Read extended headers */
1105 	err = lha_read_file_extended_header(a, lha, &header_crc, 4,
1106 		  lha->header_size - H3_FIXED_SIZE, &extdsize);
1107 	if (err < ARCHIVE_WARN)
1108 		return (err);
1109 
1110 	if (header_crc != lha->header_crc) {
1111 #ifndef DONT_FAIL_ON_CRC_ERROR
1112 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1113 		    "LHa header CRC error");
1114 		return (ARCHIVE_FATAL);
1115 #endif
1116 	}
1117 	return (err);
1118 invalid:
1119 	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1120 	    "Invalid LHa header");
1121 	return (ARCHIVE_FATAL);
1122 }
1123 
1124 /*
1125  * Extended header format
1126  *
1127  * +0             +2        +3  -- used in header 1 and 2
1128  * +0             +4        +5  -- used in header 3
1129  * +--------------+---------+-------------------+--------------+--
1130  * |ex-header size|header id|        data       |ex-header size| .......
1131  * +--------------+---------+-------------------+--------------+--
1132  *  <-------------( ex-header size)------------> <-- next extended header --*
1133  *
1134  * If the ex-header size is zero, it is the make of the end of extended
1135  * headers.
1136  *
1137  */
1138 static int
lha_read_file_extended_header(struct archive_read * a,struct lha * lha,uint16_t * crc,int sizefield_length,uint64_t limitsize,size_t * total_size)1139 lha_read_file_extended_header(struct archive_read *a, struct lha *lha,
1140     uint16_t *crc, int sizefield_length, uint64_t limitsize, size_t *total_size)
1141 {
1142 	const void *h;
1143 	const unsigned char *extdheader;
1144 	size_t	extdsize;
1145 	size_t	datasize;
1146 	unsigned int i;
1147 	unsigned char extdtype;
1148 
1149 #define EXT_HEADER_CRC		0x00		/* Header CRC and information*/
1150 #define EXT_FILENAME		0x01		/* Filename 		    */
1151 #define EXT_DIRECTORY		0x02		/* Directory name	    */
1152 #define EXT_DOS_ATTR		0x40		/* MS-DOS attribute	    */
1153 #define EXT_TIMESTAMP		0x41		/* Windows time stamp	    */
1154 #define EXT_FILESIZE		0x42		/* Large file size	    */
1155 #define EXT_TIMEZONE		0x43		/* Time zone		    */
1156 #define EXT_UTF16_FILENAME	0x44		/* UTF-16 filename 	    */
1157 #define EXT_UTF16_DIRECTORY	0x45		/* UTF-16 directory name    */
1158 #define EXT_CODEPAGE		0x46		/* Codepage		    */
1159 #define EXT_UNIX_MODE		0x50		/* File permission	    */
1160 #define EXT_UNIX_GID_UID	0x51		/* gid,uid		    */
1161 #define EXT_UNIX_GNAME		0x52		/* Group name		    */
1162 #define EXT_UNIX_UNAME		0x53		/* User name		    */
1163 #define EXT_UNIX_MTIME		0x54		/* Modified time	    */
1164 #define EXT_OS2_NEW_ATTR	0x7f		/* new attribute(OS/2 only) */
1165 #define EXT_NEW_ATTR		0xff		/* new attribute	    */
1166 
1167 	*total_size = sizefield_length;
1168 
1169 	for (;;) {
1170 		/* Read an extended header size. */
1171 		if ((h =
1172 		    __archive_read_ahead(a, sizefield_length, NULL)) == NULL)
1173 			return (truncated_error(a));
1174 		/* Check if the size is the zero indicates the end of the
1175 		 * extended header. */
1176 		if (sizefield_length == sizeof(uint16_t))
1177 			extdsize = archive_le16dec(h);
1178 		else
1179 			extdsize = archive_le32dec(h);
1180 		if (extdsize == 0) {
1181 			/* End of extended header */
1182 			if (crc != NULL)
1183 				*crc = lha_crc16(*crc, h, sizefield_length);
1184 			__archive_read_consume(a, sizefield_length);
1185 			return (ARCHIVE_OK);
1186 		}
1187 
1188 		/* Sanity check to the extended header size. */
1189 		if (((uint64_t)*total_size + extdsize) > limitsize ||
1190 		    extdsize <= (size_t)sizefield_length)
1191 			goto invalid;
1192 
1193 		/* Read the extended header. */
1194 		if ((h = __archive_read_ahead(a, extdsize, NULL)) == NULL)
1195 			return (truncated_error(a));
1196 		*total_size += extdsize;
1197 
1198 		extdheader = (const unsigned char *)h;
1199 		/* Get the extended header type. */
1200 		extdtype = extdheader[sizefield_length];
1201 		/* Calculate an extended data size. */
1202 		datasize = extdsize - (1 + sizefield_length);
1203 		/* Skip an extended header size field and type field. */
1204 		extdheader += sizefield_length + 1;
1205 
1206 		if (crc != NULL && extdtype != EXT_HEADER_CRC)
1207 			*crc = lha_crc16(*crc, h, extdsize);
1208 		switch (extdtype) {
1209 		case EXT_HEADER_CRC:
1210 			/* We only use a header CRC. Following data will not
1211 			 * be used. */
1212 			if (datasize >= 2) {
1213 				lha->header_crc = archive_le16dec(extdheader);
1214 				if (crc != NULL) {
1215 					static const char zeros[2] = {0, 0};
1216 					*crc = lha_crc16(*crc, h,
1217 					    extdsize - datasize);
1218 					/* CRC value itself as zero */
1219 					*crc = lha_crc16(*crc, zeros, 2);
1220 					*crc = lha_crc16(*crc,
1221 					    extdheader+2, datasize - 2);
1222 				}
1223 			}
1224 			break;
1225 		case EXT_FILENAME:
1226 			if (datasize == 0) {
1227 				/* maybe directory header */
1228 				archive_string_empty(&lha->filename);
1229 				break;
1230 			}
1231 			if (extdheader[0] == '\0')
1232 				goto invalid;
1233 			archive_strncpy(&lha->filename,
1234 			    (const char *)extdheader, datasize);
1235 			break;
1236 		case EXT_UTF16_FILENAME:
1237 			if (datasize == 0) {
1238 				/* maybe directory header */
1239 				archive_string_empty(&lha->filename);
1240 				break;
1241 			} else if (datasize & 1) {
1242 				/* UTF-16 characters take always 2 or 4 bytes */
1243 				goto invalid;
1244 			}
1245 			if (extdheader[0] == '\0')
1246 				goto invalid;
1247 			archive_string_empty(&lha->filename);
1248 			archive_array_append(&lha->filename,
1249 				(const char *)extdheader, datasize);
1250 			/* Setup a string conversion for a filename. */
1251 			lha->sconv_fname =
1252 			    archive_string_conversion_from_charset(&a->archive,
1253 			        "UTF-16LE", 1);
1254 			if (lha->sconv_fname == NULL)
1255 				return (ARCHIVE_FATAL);
1256 			break;
1257 		case EXT_DIRECTORY:
1258 			if (datasize == 0 || extdheader[0] == '\0')
1259 				/* no directory name data. exit this case. */
1260 				goto invalid;
1261 
1262 			archive_strncpy(&lha->dirname,
1263 		  	    (const char *)extdheader, datasize);
1264 			/*
1265 			 * Convert directory delimiter from 0xFF
1266 			 * to '/' for local system.
1267 	 		 */
1268 			for (i = 0; i < lha->dirname.length; i++) {
1269 				if ((unsigned char)lha->dirname.s[i] == 0xFF)
1270 					lha->dirname.s[i] = '/';
1271 			}
1272 			/* Is last character directory separator? */
1273 			if (lha->dirname.s[lha->dirname.length-1] != '/')
1274 				/* invalid directory data */
1275 				goto invalid;
1276 			break;
1277 		case EXT_UTF16_DIRECTORY:
1278 			/* UTF-16 characters take always 2 or 4 bytes */
1279 			if (datasize == 0 || (datasize & 1) ||
1280 			    extdheader[0] == '\0') {
1281 				/* no directory name data. exit this case. */
1282 				goto invalid;
1283 			}
1284 
1285 			archive_string_empty(&lha->dirname);
1286 			archive_array_append(&lha->dirname,
1287 				(const char *)extdheader, datasize);
1288 			lha->sconv_dir =
1289 			    archive_string_conversion_from_charset(&a->archive,
1290 			        "UTF-16LE", 1);
1291 			if (lha->sconv_dir == NULL)
1292 				return (ARCHIVE_FATAL);
1293 			else {
1294 				/*
1295 				 * Convert directory delimiter from 0xFFFF
1296 				 * to '/' for local system.
1297 				 */
1298 				uint16_t dirSep;
1299 				uint16_t d = 1;
1300 				if (archive_be16dec(&d) == 1)
1301 					dirSep = 0x2F00;
1302 				else
1303 					dirSep = 0x002F;
1304 
1305 				/* UTF-16LE character */
1306 				uint16_t *utf16name =
1307 				    (uint16_t *)lha->dirname.s;
1308 				for (i = 0; i < lha->dirname.length / 2; i++) {
1309 					if (utf16name[i] == 0xFFFF) {
1310 						utf16name[i] = dirSep;
1311 					}
1312 				}
1313 				/* Is last character directory separator? */
1314 				if (utf16name[lha->dirname.length / 2 - 1] !=
1315 				    dirSep) {
1316 					/* invalid directory data */
1317 					goto invalid;
1318 				}
1319 			}
1320 			break;
1321 		case EXT_DOS_ATTR:
1322 			if (datasize == 2)
1323 				lha->dos_attr = (unsigned char)
1324 				    (archive_le16dec(extdheader) & 0xff);
1325 			break;
1326 		case EXT_TIMESTAMP:
1327 			if (datasize == (sizeof(uint64_t) * 3)) {
1328 				ntfs_to_unix(archive_le64dec(extdheader),
1329 					&lha->birthtime,
1330 				    &lha->birthtime_tv_nsec);
1331 				extdheader += sizeof(uint64_t);
1332 				ntfs_to_unix(archive_le64dec(extdheader),
1333 					&lha->mtime,
1334 				    &lha->mtime_tv_nsec);
1335 				extdheader += sizeof(uint64_t);
1336 				ntfs_to_unix(archive_le64dec(extdheader),
1337 					&lha->atime,
1338 				    &lha->atime_tv_nsec);
1339 				lha->setflag |= BIRTHTIME_IS_SET |
1340 				    ATIME_IS_SET;
1341 			}
1342 			break;
1343 		case EXT_FILESIZE:
1344 			if (datasize == sizeof(uint64_t) * 2) {
1345 				lha->compsize = archive_le64dec(extdheader);
1346 				extdheader += sizeof(uint64_t);
1347 				lha->origsize = archive_le64dec(extdheader);
1348 				if (lha->compsize < 0 || lha->origsize < 0)
1349 					goto invalid;
1350 			}
1351 			break;
1352 		case EXT_CODEPAGE:
1353 			/* Get an archived filename charset from codepage.
1354 			 * This overwrites the charset specified by
1355 			 * hdrcharset option. */
1356 			if (datasize == sizeof(uint32_t)) {
1357 				struct archive_string cp;
1358 				const char *charset;
1359 
1360 				archive_string_init(&cp);
1361 				switch (archive_le32dec(extdheader)) {
1362 				case 65001: /* UTF-8 */
1363 					charset = "UTF-8";
1364 					break;
1365 				default:
1366 					archive_string_sprintf(&cp, "CP%d",
1367 					    (int)archive_le32dec(extdheader));
1368 					charset = cp.s;
1369 					break;
1370 				}
1371 				lha->sconv_dir =
1372 				    archive_string_conversion_from_charset(
1373 					&(a->archive), charset, 1);
1374 				lha->sconv_fname =
1375 				    archive_string_conversion_from_charset(
1376 					&(a->archive), charset, 1);
1377 				archive_string_free(&cp);
1378 				if (lha->sconv_dir == NULL)
1379 					return (ARCHIVE_FATAL);
1380 				if (lha->sconv_fname == NULL)
1381 					return (ARCHIVE_FATAL);
1382 			}
1383 			break;
1384 		case EXT_UNIX_MODE:
1385 			if (datasize == sizeof(uint16_t)) {
1386 				lha->mode = archive_le16dec(extdheader);
1387 				lha->setflag |= UNIX_MODE_IS_SET;
1388 			}
1389 			break;
1390 		case EXT_UNIX_GID_UID:
1391 			if (datasize == (sizeof(uint16_t) * 2)) {
1392 				lha->gid = archive_le16dec(extdheader);
1393 				lha->uid = archive_le16dec(extdheader+2);
1394 			}
1395 			break;
1396 		case EXT_UNIX_GNAME:
1397 			if (datasize > 0)
1398 				archive_strncpy(&lha->gname,
1399 				    (const char *)extdheader, datasize);
1400 			break;
1401 		case EXT_UNIX_UNAME:
1402 			if (datasize > 0)
1403 				archive_strncpy(&lha->uname,
1404 				    (const char *)extdheader, datasize);
1405 			break;
1406 		case EXT_UNIX_MTIME:
1407 			if (datasize == sizeof(uint32_t))
1408 				lha->mtime = archive_le32dec(extdheader);
1409 			break;
1410 		case EXT_OS2_NEW_ATTR:
1411 			/* This extended header is OS/2 depend. */
1412 			if (datasize == 16) {
1413 				lha->dos_attr = (unsigned char)
1414 				    (archive_le16dec(extdheader) & 0xff);
1415 				lha->mode = archive_le16dec(extdheader+2);
1416 				lha->gid = archive_le16dec(extdheader+4);
1417 				lha->uid = archive_le16dec(extdheader+6);
1418 				lha->birthtime = archive_le32dec(extdheader+8);
1419 				lha->atime = archive_le32dec(extdheader+12);
1420 				lha->setflag |= UNIX_MODE_IS_SET
1421 				    | BIRTHTIME_IS_SET | ATIME_IS_SET;
1422 			}
1423 			break;
1424 		case EXT_NEW_ATTR:
1425 			if (datasize == 20) {
1426 				lha->mode = (mode_t)archive_le32dec(extdheader);
1427 				lha->gid = archive_le32dec(extdheader+4);
1428 				lha->uid = archive_le32dec(extdheader+8);
1429 				lha->birthtime = archive_le32dec(extdheader+12);
1430 				lha->atime = archive_le32dec(extdheader+16);
1431 				lha->setflag |= UNIX_MODE_IS_SET
1432 				    | BIRTHTIME_IS_SET | ATIME_IS_SET;
1433 			}
1434 			break;
1435 		case EXT_TIMEZONE:		/* Not supported */
1436 			break;
1437 		default:
1438 			break;
1439 		}
1440 
1441 		__archive_read_consume(a, extdsize);
1442 	}
1443 invalid:
1444 	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1445 	    "Invalid extended LHa header");
1446 	return (ARCHIVE_FATAL);
1447 }
1448 
1449 static int
lha_end_of_entry(struct archive_read * a)1450 lha_end_of_entry(struct archive_read *a)
1451 {
1452 	struct lha *lha = (struct lha *)(a->format->data);
1453 	int r = ARCHIVE_EOF;
1454 
1455 	if (!lha->end_of_entry_cleanup) {
1456 		if ((lha->setflag & CRC_IS_SET) &&
1457 		    lha->crc != lha->entry_crc_calculated) {
1458 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1459 			    "LHa data CRC error");
1460 			r = ARCHIVE_WARN;
1461 		}
1462 
1463 		/* End-of-entry cleanup done. */
1464 		lha->end_of_entry_cleanup = 1;
1465 	}
1466 	return (r);
1467 }
1468 
1469 static int
archive_read_format_lha_read_data(struct archive_read * a,const void ** buff,size_t * size,int64_t * offset)1470 archive_read_format_lha_read_data(struct archive_read *a,
1471     const void **buff, size_t *size, int64_t *offset)
1472 {
1473 	struct lha *lha = (struct lha *)(a->format->data);
1474 	int r;
1475 
1476 	if (lha->entry_unconsumed) {
1477 		/* Consume as much as the decompressor actually used. */
1478 		__archive_read_consume(a, lha->entry_unconsumed);
1479 		lha->entry_unconsumed = 0;
1480 	}
1481 	if (lha->end_of_entry) {
1482 		*offset = lha->entry_offset;
1483 		*size = 0;
1484 		*buff = NULL;
1485 		return (lha_end_of_entry(a));
1486 	}
1487 
1488 	if (lha->entry_is_compressed)
1489 		r =  lha_read_data_lzh(a, buff, size, offset);
1490 	else
1491 		/* No compression. */
1492 		r =  lha_read_data_none(a, buff, size, offset);
1493 	return (r);
1494 }
1495 
1496 /*
1497  * Read a file content in no compression.
1498  *
1499  * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets
1500  * lha->end_of_entry if it consumes all of the data.
1501  */
1502 static int
lha_read_data_none(struct archive_read * a,const void ** buff,size_t * size,int64_t * offset)1503 lha_read_data_none(struct archive_read *a, const void **buff,
1504     size_t *size, int64_t *offset)
1505 {
1506 	struct lha *lha = (struct lha *)(a->format->data);
1507 	ssize_t bytes_avail;
1508 
1509 	if (lha->entry_bytes_remaining == 0) {
1510 		*buff = NULL;
1511 		*size = 0;
1512 		*offset = lha->entry_offset;
1513 		lha->end_of_entry = 1;
1514 		return (ARCHIVE_OK);
1515 	}
1516 	/*
1517 	 * Note: '1' here is a performance optimization.
1518 	 * Recall that the decompression layer returns a count of
1519 	 * available bytes; asking for more than that forces the
1520 	 * decompressor to combine reads by copying data.
1521 	 */
1522 	*buff = __archive_read_ahead(a, 1, &bytes_avail);
1523 	if (bytes_avail <= 0) {
1524 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1525 		    "Truncated LHa file data");
1526 		return (ARCHIVE_FATAL);
1527 	}
1528 	if (bytes_avail > lha->entry_bytes_remaining)
1529 		bytes_avail = (ssize_t)lha->entry_bytes_remaining;
1530 	lha->entry_crc_calculated =
1531 	    lha_crc16(lha->entry_crc_calculated, *buff, bytes_avail);
1532 	*size = bytes_avail;
1533 	*offset = lha->entry_offset;
1534 	lha->entry_offset += bytes_avail;
1535 	lha->entry_bytes_remaining -= bytes_avail;
1536 	if (lha->entry_bytes_remaining == 0)
1537 		lha->end_of_entry = 1;
1538 	lha->entry_unconsumed = bytes_avail;
1539 	return (ARCHIVE_OK);
1540 }
1541 
1542 /*
1543  * Read a file content in LZHUFF encoding.
1544  *
1545  * Returns ARCHIVE_OK if successful, returns ARCHIVE_WARN if compression is
1546  * unsupported, ARCHIVE_FATAL otherwise, sets lha->end_of_entry if it consumes
1547  * all of the data.
1548  */
1549 static int
lha_read_data_lzh(struct archive_read * a,const void ** buff,size_t * size,int64_t * offset)1550 lha_read_data_lzh(struct archive_read *a, const void **buff,
1551     size_t *size, int64_t *offset)
1552 {
1553 	struct lha *lha = (struct lha *)(a->format->data);
1554 	ssize_t bytes_avail;
1555 	int r;
1556 
1557 	/* If we haven't yet read any data, initialize the decompressor. */
1558 	if (!lha->decompress_init) {
1559 		r = lzh_decode_init(&(lha->strm), lha->method);
1560 		switch (r) {
1561 		case ARCHIVE_OK:
1562 			break;
1563 		case ARCHIVE_FAILED:
1564         		/* Unsupported compression. */
1565 			*buff = NULL;
1566 			*size = 0;
1567 			*offset = 0;
1568 			archive_set_error(&a->archive,
1569 			    ARCHIVE_ERRNO_FILE_FORMAT,
1570 			    "Unsupported lzh compression method -%c%c%c-",
1571 			    lha->method[0], lha->method[1], lha->method[2]);
1572 			/* We know compressed size; just skip it. */
1573 			archive_read_format_lha_read_data_skip(a);
1574 			return (ARCHIVE_WARN);
1575 		default:
1576 			archive_set_error(&a->archive, ENOMEM,
1577 			    "Couldn't allocate memory "
1578 			    "for lzh decompression");
1579 			return (ARCHIVE_FATAL);
1580 		}
1581 		/* We've initialized decompression for this stream. */
1582 		lha->decompress_init = 1;
1583 		lha->strm.avail_out = 0;
1584 		lha->strm.total_out = 0;
1585 	}
1586 
1587 	/*
1588 	 * Note: '1' here is a performance optimization.
1589 	 * Recall that the decompression layer returns a count of
1590 	 * available bytes; asking for more than that forces the
1591 	 * decompressor to combine reads by copying data.
1592 	 */
1593 	lha->strm.next_in = __archive_read_ahead(a, 1, &bytes_avail);
1594 	if (bytes_avail <= 0) {
1595 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1596 		    "Truncated LHa file body");
1597 		return (ARCHIVE_FATAL);
1598 	}
1599 	if (bytes_avail > lha->entry_bytes_remaining)
1600 		bytes_avail = (ssize_t)lha->entry_bytes_remaining;
1601 
1602 	lha->strm.avail_in = (int)bytes_avail;
1603 	lha->strm.total_in = 0;
1604 	lha->strm.avail_out = 0;
1605 
1606 	r = lzh_decode(&(lha->strm), bytes_avail == lha->entry_bytes_remaining);
1607 	switch (r) {
1608 	case ARCHIVE_OK:
1609 		break;
1610 	case ARCHIVE_EOF:
1611 		lha->end_of_entry = 1;
1612 		break;
1613 	default:
1614 		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1615 		    "Bad lzh data");
1616 		return (ARCHIVE_FAILED);
1617 	}
1618 	lha->entry_unconsumed = lha->strm.total_in;
1619 	lha->entry_bytes_remaining -= lha->strm.total_in;
1620 
1621 	if (lha->strm.avail_out) {
1622 		*offset = lha->entry_offset;
1623 		*size = lha->strm.avail_out;
1624 		*buff = lha->strm.ref_ptr;
1625 		lha->entry_crc_calculated =
1626 		    lha_crc16(lha->entry_crc_calculated, *buff, *size);
1627 		lha->entry_offset += *size;
1628 	} else {
1629 		*offset = lha->entry_offset;
1630 		*size = 0;
1631 		*buff = NULL;
1632 		if (lha->end_of_entry)
1633 			return (lha_end_of_entry(a));
1634 	}
1635 	return (ARCHIVE_OK);
1636 }
1637 
1638 /*
1639  * Skip a file content.
1640  */
1641 static int
archive_read_format_lha_read_data_skip(struct archive_read * a)1642 archive_read_format_lha_read_data_skip(struct archive_read *a)
1643 {
1644 	struct lha *lha;
1645 	int64_t bytes_skipped;
1646 
1647 	lha = (struct lha *)(a->format->data);
1648 
1649 	if (lha->entry_unconsumed) {
1650 		/* Consume as much as the decompressor actually used. */
1651 		__archive_read_consume(a, lha->entry_unconsumed);
1652 		lha->entry_unconsumed = 0;
1653 	}
1654 
1655 	/* if we've already read to end of data, we're done. */
1656 	if (lha->end_of_entry_cleanup)
1657 		return (ARCHIVE_OK);
1658 
1659 	/*
1660 	 * If the length is at the beginning, we can skip the
1661 	 * compressed data much more quickly.
1662 	 */
1663 	bytes_skipped = __archive_read_consume(a, lha->entry_bytes_remaining);
1664 	if (bytes_skipped < 0)
1665 		return (ARCHIVE_FATAL);
1666 
1667 	/* This entry is finished and done. */
1668 	lha->end_of_entry_cleanup = lha->end_of_entry = 1;
1669 	return (ARCHIVE_OK);
1670 }
1671 
1672 static int
archive_read_format_lha_cleanup(struct archive_read * a)1673 archive_read_format_lha_cleanup(struct archive_read *a)
1674 {
1675 	struct lha *lha = (struct lha *)(a->format->data);
1676 
1677 	lzh_decode_free(&(lha->strm));
1678 	archive_string_free(&(lha->dirname));
1679 	archive_string_free(&(lha->filename));
1680 	archive_string_free(&(lha->uname));
1681 	archive_string_free(&(lha->gname));
1682 	archive_wstring_free(&(lha->ws));
1683 	free(lha);
1684 	(a->format->data) = NULL;
1685 	return (ARCHIVE_OK);
1686 }
1687 
1688 /*
1689  * 'LHa for UNIX' utility has archived a symbolic-link name after
1690  * a pathname with '|' character.
1691  * This function extracts the symbolic-link name from the pathname.
1692  *
1693  * example.
1694  *   1. a symbolic-name is 'aaa/bb/cc'
1695  *   2. a filename is 'xxx/bbb'
1696  *  then an archived pathname is 'xxx/bbb|aaa/bb/cc'
1697  */
1698 static int
lha_parse_linkname(struct archive_wstring * linkname,struct archive_wstring * pathname)1699 lha_parse_linkname(struct archive_wstring *linkname,
1700     struct archive_wstring *pathname)
1701 {
1702 	wchar_t *	linkptr;
1703 	size_t 	symlen;
1704 
1705 	linkptr = wcschr(pathname->s, L'|');
1706 	if (linkptr != NULL) {
1707 		symlen = wcslen(linkptr + 1);
1708 		archive_wstrncpy(linkname, linkptr+1, symlen);
1709 
1710 		*linkptr = 0;
1711 		pathname->length = wcslen(pathname->s);
1712 
1713 		return (1);
1714 	}
1715 	return (0);
1716 }
1717 
1718 static unsigned char
lha_calcsum(unsigned char sum,const void * pp,int offset,size_t size)1719 lha_calcsum(unsigned char sum, const void *pp, int offset, size_t size)
1720 {
1721 	unsigned char const *p = (unsigned char const *)pp;
1722 
1723 	p += offset;
1724 	for (;size > 0; --size)
1725 		sum += *p++;
1726 	return (sum);
1727 }
1728 
1729 static uint16_t crc16tbl[2][256];
1730 static void
lha_crc16_init(void)1731 lha_crc16_init(void)
1732 {
1733 	unsigned int i;
1734 	static int crc16init = 0;
1735 
1736 	if (crc16init)
1737 		return;
1738 	crc16init = 1;
1739 
1740 	for (i = 0; i < 256; i++) {
1741 		unsigned int j;
1742 		uint16_t crc = (uint16_t)i;
1743 		for (j = 8; j; j--)
1744 			crc = (crc >> 1) ^ ((crc & 1) * 0xA001);
1745 		crc16tbl[0][i] = crc;
1746 	}
1747 
1748 	for (i = 0; i < 256; i++) {
1749 		crc16tbl[1][i] = (crc16tbl[0][i] >> 8)
1750 			^ crc16tbl[0][crc16tbl[0][i] & 0xff];
1751 	}
1752 }
1753 
1754 static uint16_t
lha_crc16(uint16_t crc,const void * pp,size_t len)1755 lha_crc16(uint16_t crc, const void *pp, size_t len)
1756 {
1757 	const unsigned char *p = (const unsigned char *)pp;
1758 	const uint16_t *buff;
1759 	const union {
1760 		uint32_t i;
1761 		char c[4];
1762 	} u = { 0x01020304 };
1763 
1764 	if (len == 0)
1765 		return crc;
1766 
1767 	/* Process unaligned address. */
1768 	if (((uintptr_t)p) & (uintptr_t)0x1) {
1769 		crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff];
1770 		len--;
1771 	}
1772 	buff = (const uint16_t *)p;
1773 	/*
1774 	 * Modern C compiler such as GCC does not unroll automatically yet
1775 	 * without unrolling pragma, and Clang is so. So we should
1776 	 * unroll this loop for its performance.
1777 	 */
1778 	for (;len >= 8; len -= 8) {
1779 		/* This if statement expects compiler optimization will
1780 		 * remove the statement which will not be executed. */
1781 #undef bswap16
1782 #ifndef __has_builtin
1783 #define __has_builtin(x) 0
1784 #endif
1785 #if defined(_MSC_VER) && _MSC_VER >= 1400  /* Visual Studio */
1786 #  define bswap16(x) _byteswap_ushort(x)
1787 #elif defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ > 4)
1788 /* GCC 4.8 and later has __builtin_bswap16() */
1789 #  define bswap16(x) __builtin_bswap16(x)
1790 #elif defined(__clang__) && __has_builtin(__builtin_bswap16)
1791 /* Newer clang versions have __builtin_bswap16() */
1792 #  define bswap16(x) __builtin_bswap16(x)
1793 #else
1794 #  define bswap16(x) ((((x) >> 8) & 0xff) | ((x) << 8))
1795 #endif
1796 #define CRC16W	do { 	\
1797 		if(u.c[0] == 1) { /* Big endian */		\
1798 			crc ^= bswap16(*buff); buff++;		\
1799 		} else						\
1800 			crc ^= *buff++;				\
1801 		crc = crc16tbl[1][crc & 0xff] ^ crc16tbl[0][crc >> 8];\
1802 } while (0)
1803 		CRC16W;
1804 		CRC16W;
1805 		CRC16W;
1806 		CRC16W;
1807 #undef CRC16W
1808 #undef bswap16
1809 	}
1810 
1811 	p = (const unsigned char *)buff;
1812 	for (;len; len--) {
1813 		crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff];
1814 	}
1815 	return crc;
1816 }
1817 
1818 /*
1819  * Initialize LZHUF decoder.
1820  *
1821  * Returns ARCHIVE_OK if initialization was successful.
1822  * Returns ARCHIVE_FAILED if method is unsupported.
1823  * Returns ARCHIVE_FATAL if initialization failed; memory allocation
1824  * error occurred.
1825  */
1826 static int
lzh_decode_init(struct lzh_stream * strm,const char * method)1827 lzh_decode_init(struct lzh_stream *strm, const char *method)
1828 {
1829 	struct lzh_dec *ds;
1830 	int w_bits, w_size;
1831 
1832 	if (strm->ds == NULL) {
1833 		strm->ds = calloc(1, sizeof(*strm->ds));
1834 		if (strm->ds == NULL)
1835 			return (ARCHIVE_FATAL);
1836 	}
1837 	ds = strm->ds;
1838 	ds->error = ARCHIVE_FAILED;
1839 	if (method == NULL || method[0] != 'l' || method[1] != 'h')
1840 		return (ARCHIVE_FAILED);
1841 	switch (method[2]) {
1842 	case '5':
1843 		w_bits = 13;/* 8KiB for window */
1844 		break;
1845 	case '6':
1846 		w_bits = 15;/* 32KiB for window */
1847 		break;
1848 	case '7':
1849 		w_bits = 16;/* 64KiB for window */
1850 		break;
1851 	default:
1852 		return (ARCHIVE_FAILED);/* Not supported. */
1853 	}
1854 	ds->error = ARCHIVE_FATAL;
1855 	/* Expand a window size up to 128 KiB for decompressing process
1856 	 * performance whatever its original window size is. */
1857 	ds->w_size = 1U << 17;
1858 	ds->w_mask = ds->w_size -1;
1859 	if (ds->w_buff == NULL) {
1860 		ds->w_buff = malloc(ds->w_size);
1861 		if (ds->w_buff == NULL)
1862 			return (ARCHIVE_FATAL);
1863 	}
1864 	w_size = 1U << w_bits;
1865 	memset(ds->w_buff + ds->w_size - w_size, 0x20, w_size);
1866 	ds->w_pos = 0;
1867 	ds->state = 0;
1868 	ds->pos_pt_len_size = w_bits + 1;
1869 	ds->pos_pt_len_bits = (w_bits == 15 || w_bits == 16)? 5: 4;
1870 	ds->literal_pt_len_size = PT_BITLEN_SIZE;
1871 	ds->literal_pt_len_bits = 5;
1872 	ds->br.cache_buffer = 0;
1873 	ds->br.cache_avail = 0;
1874 
1875 	if (lzh_huffman_init(&(ds->lt), LT_BITLEN_SIZE, 16)
1876 	    != ARCHIVE_OK)
1877 		return (ARCHIVE_FATAL);
1878 	ds->lt.len_bits = 9;
1879 	if (lzh_huffman_init(&(ds->pt), PT_BITLEN_SIZE, 16)
1880 	    != ARCHIVE_OK)
1881 		return (ARCHIVE_FATAL);
1882 	ds->error = 0;
1883 
1884 	return (ARCHIVE_OK);
1885 }
1886 
1887 /*
1888  * Release LZHUF decoder.
1889  */
1890 static void
lzh_decode_free(struct lzh_stream * strm)1891 lzh_decode_free(struct lzh_stream *strm)
1892 {
1893 
1894 	if (strm->ds == NULL)
1895 		return;
1896 	free(strm->ds->w_buff);
1897 	lzh_huffman_free(&(strm->ds->lt));
1898 	lzh_huffman_free(&(strm->ds->pt));
1899 	free(strm->ds);
1900 	strm->ds = NULL;
1901 }
1902 
1903 /*
1904  * Bit stream reader.
1905  */
1906 /* Check that the cache buffer has enough bits. */
1907 #define lzh_br_has(br, n)	((br)->cache_avail >= n)
1908 /* Get compressed data by bit. */
1909 #define lzh_br_bits(br, n)				\
1910 	(((uint16_t)((br)->cache_buffer >>		\
1911 		((br)->cache_avail - (n)))) & cache_masks[n])
1912 #define lzh_br_bits_forced(br, n)			\
1913 	(((uint16_t)((br)->cache_buffer <<		\
1914 		((n) - (br)->cache_avail))) & cache_masks[n])
1915 /* Read ahead to make sure the cache buffer has enough compressed data we
1916  * will use.
1917  *  True  : completed, there is enough data in the cache buffer.
1918  *  False : we met that strm->next_in is empty, we have to get following
1919  *          bytes. */
1920 #define lzh_br_read_ahead_0(strm, br, n)	\
1921 	(lzh_br_has(br, (n)) || lzh_br_fillup(strm, br))
1922 /*  True  : the cache buffer has some bits as much as we need.
1923  *  False : there are no enough bits in the cache buffer to be used,
1924  *          we have to get following bytes if we could. */
1925 #define lzh_br_read_ahead(strm, br, n)	\
1926 	(lzh_br_read_ahead_0((strm), (br), (n)) || lzh_br_has((br), (n)))
1927 
1928 /* Notify how many bits we consumed. */
1929 #define lzh_br_consume(br, n)	((br)->cache_avail -= (n))
1930 #define lzh_br_unconsume(br, n)	((br)->cache_avail += (n))
1931 
1932 static const uint16_t cache_masks[] = {
1933 	0x0000, 0x0001, 0x0003, 0x0007,
1934 	0x000F, 0x001F, 0x003F, 0x007F,
1935 	0x00FF, 0x01FF, 0x03FF, 0x07FF,
1936 	0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF,
1937 	0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF
1938 };
1939 
1940 /*
1941  * Shift away used bits in the cache data and fill it up with following bits.
1942  * Call this when cache buffer does not have enough bits you need.
1943  *
1944  * Returns 1 if the cache buffer is full.
1945  * Returns 0 if the cache buffer is not full; input buffer is empty.
1946  */
1947 static int
lzh_br_fillup(struct lzh_stream * strm,struct lzh_br * br)1948 lzh_br_fillup(struct lzh_stream *strm, struct lzh_br *br)
1949 {
1950 	int n = CACHE_BITS - br->cache_avail;
1951 
1952 	for (;;) {
1953 		const int x = n >> 3;
1954 		if (strm->avail_in >= x) {
1955 			switch (x) {
1956 			case 8:
1957 				br->cache_buffer =
1958 				    ((uint64_t)strm->next_in[0]) << 56 |
1959 				    ((uint64_t)strm->next_in[1]) << 48 |
1960 				    ((uint64_t)strm->next_in[2]) << 40 |
1961 				    ((uint64_t)strm->next_in[3]) << 32 |
1962 				    ((uint32_t)strm->next_in[4]) << 24 |
1963 				    ((uint32_t)strm->next_in[5]) << 16 |
1964 				    ((uint32_t)strm->next_in[6]) << 8 |
1965 				     (uint32_t)strm->next_in[7];
1966 				strm->next_in += 8;
1967 				strm->avail_in -= 8;
1968 				br->cache_avail += 8 * 8;
1969 				return (1);
1970 			case 7:
1971 				br->cache_buffer =
1972 		 		   (br->cache_buffer << 56) |
1973 				    ((uint64_t)strm->next_in[0]) << 48 |
1974 				    ((uint64_t)strm->next_in[1]) << 40 |
1975 				    ((uint64_t)strm->next_in[2]) << 32 |
1976 				    ((uint64_t)strm->next_in[3]) << 24 |
1977 				    ((uint64_t)strm->next_in[4]) << 16 |
1978 				    ((uint64_t)strm->next_in[5]) << 8 |
1979 				     (uint64_t)strm->next_in[6];
1980 				strm->next_in += 7;
1981 				strm->avail_in -= 7;
1982 				br->cache_avail += 7 * 8;
1983 				return (1);
1984 			case 6:
1985 				br->cache_buffer =
1986 		 		   (br->cache_buffer << 48) |
1987 				    ((uint64_t)strm->next_in[0]) << 40 |
1988 				    ((uint64_t)strm->next_in[1]) << 32 |
1989 				    ((uint64_t)strm->next_in[2]) << 24 |
1990 				    ((uint64_t)strm->next_in[3]) << 16 |
1991 				    ((uint64_t)strm->next_in[4]) << 8 |
1992 				     (uint64_t)strm->next_in[5];
1993 				strm->next_in += 6;
1994 				strm->avail_in -= 6;
1995 				br->cache_avail += 6 * 8;
1996 				return (1);
1997 			case 0:
1998 				/* We have enough compressed data in
1999 				 * the cache buffer.*/
2000 				return (1);
2001 			default:
2002 				break;
2003 			}
2004 		}
2005 		if (strm->avail_in == 0) {
2006 			/* There is not enough compressed data to fill up the
2007 			 * cache buffer. */
2008 			return (0);
2009 		}
2010 		br->cache_buffer =
2011 		   (br->cache_buffer << 8) | *strm->next_in++;
2012 		strm->avail_in--;
2013 		br->cache_avail += 8;
2014 		n -= 8;
2015 	}
2016 }
2017 
2018 /*
2019  * Decode LZHUF.
2020  *
2021  * 1. Returns ARCHIVE_OK if output buffer or input buffer are empty.
2022  *    Please set available buffer and call this function again.
2023  * 2. Returns ARCHIVE_EOF if decompression has been completed.
2024  * 3. Returns ARCHIVE_FAILED if an error occurred; compressed data
2025  *    is broken or you do not set 'last' flag properly.
2026  * 4. 'last' flag is very important, you must set 1 to the flag if there
2027  *    is no input data. The lha compressed data format does not provide how
2028  *    to know the compressed data is really finished.
2029  *    Note: lha command utility check if the total size of output bytes is
2030  *    reached the uncompressed size recorded in its header. it does not mind
2031  *    that the decoding process is properly finished.
2032  *    GNU ZIP can decompress another compressed file made by SCO LZH compress.
2033  *    it handles EOF as null to fill read buffer with zero until the decoding
2034  *    process meet 2 bytes of zeros at reading a size of a next chunk, so the
2035  *    zeros are treated as the mark of the end of the data although the zeros
2036  *    is dummy, not the file data.
2037  */
2038 static int	lzh_read_blocks(struct lzh_stream *, int);
2039 static int	lzh_decode_blocks(struct lzh_stream *, int);
2040 #define ST_RD_BLOCK		0
2041 #define ST_RD_PT_1		1
2042 #define ST_RD_PT_2		2
2043 #define ST_RD_PT_3		3
2044 #define ST_RD_PT_4		4
2045 #define ST_RD_LITERAL_1		5
2046 #define ST_RD_LITERAL_2		6
2047 #define ST_RD_LITERAL_3		7
2048 #define ST_RD_POS_DATA_1	8
2049 #define ST_GET_LITERAL		9
2050 #define ST_GET_POS_1		10
2051 #define ST_GET_POS_2		11
2052 #define ST_COPY_DATA		12
2053 
2054 static int
lzh_decode(struct lzh_stream * strm,int last)2055 lzh_decode(struct lzh_stream *strm, int last)
2056 {
2057 	struct lzh_dec *ds = strm->ds;
2058 	int avail_in;
2059 	int r;
2060 
2061 	if (ds->error)
2062 		return (ds->error);
2063 
2064 	avail_in = strm->avail_in;
2065 	do {
2066 		if (ds->state < ST_GET_LITERAL)
2067 			r = lzh_read_blocks(strm, last);
2068 		else
2069 			r = lzh_decode_blocks(strm, last);
2070 	} while (r == 100);
2071 	strm->total_in += avail_in - strm->avail_in;
2072 	return (r);
2073 }
2074 
2075 static void
lzh_emit_window(struct lzh_stream * strm,size_t s)2076 lzh_emit_window(struct lzh_stream *strm, size_t s)
2077 {
2078 	strm->ref_ptr = strm->ds->w_buff;
2079 	strm->avail_out = (int)s;
2080 	strm->total_out += s;
2081 }
2082 
2083 static int
lzh_read_blocks(struct lzh_stream * strm,int last)2084 lzh_read_blocks(struct lzh_stream *strm, int last)
2085 {
2086 	struct lzh_dec *ds = strm->ds;
2087 	struct lzh_br *br = &(ds->br);
2088 	int c = 0, i;
2089 	unsigned rbits;
2090 
2091 	for (;;) {
2092 		switch (ds->state) {
2093 		case ST_RD_BLOCK:
2094 			/*
2095 			 * Read a block number indicates how many blocks
2096 			 * we will handle. The block is composed of a
2097 			 * literal and a match, sometimes a literal only
2098 			 * in particular, there are no reference data at
2099 			 * the beginning of the decompression.
2100 			 */
2101 			if (!lzh_br_read_ahead_0(strm, br, 16)) {
2102 				if (!last)
2103 					/* We need following data. */
2104 					return (ARCHIVE_OK);
2105 				if (lzh_br_has(br, 8)) {
2106 					/*
2107 					 * It seems there are extra bits.
2108 					 *  1. Compressed data is broken.
2109 					 *  2. `last' flag does not properly
2110 					 *     set.
2111 					 */
2112 					goto failed;
2113 				}
2114 				if (ds->w_pos > 0) {
2115 					lzh_emit_window(strm, ds->w_pos);
2116 					ds->w_pos = 0;
2117 					return (ARCHIVE_OK);
2118 				}
2119 				/* End of compressed data; we have completely
2120 				 * handled all compressed data. */
2121 				return (ARCHIVE_EOF);
2122 			}
2123 			ds->blocks_avail = lzh_br_bits(br, 16);
2124 			if (ds->blocks_avail == 0)
2125 				goto failed;
2126 			lzh_br_consume(br, 16);
2127 			/*
2128 			 * Read a literal table compressed in huffman
2129 			 * coding.
2130 			 */
2131 			ds->pt.len_size = ds->literal_pt_len_size;
2132 			ds->pt.len_bits = ds->literal_pt_len_bits;
2133 			ds->reading_position = 0;
2134 			/* FALL THROUGH */
2135 		case ST_RD_PT_1:
2136 			/* Note: ST_RD_PT_1, ST_RD_PT_2 and ST_RD_PT_4 are
2137 			 * used in reading both a literal table and a
2138 			 * position table. */
2139 			if (!lzh_br_read_ahead(strm, br, ds->pt.len_bits)) {
2140 				if (last)
2141 					goto failed;/* Truncated data. */
2142 				ds->state = ST_RD_PT_1;
2143 				return (ARCHIVE_OK);
2144 			}
2145 			ds->pt.len_avail = lzh_br_bits(br, ds->pt.len_bits);
2146 			lzh_br_consume(br, ds->pt.len_bits);
2147 			/* FALL THROUGH */
2148 		case ST_RD_PT_2:
2149 			if (ds->pt.len_avail == 0) {
2150 				/* There is no bitlen. */
2151 				if (!lzh_br_read_ahead(strm, br,
2152 				    ds->pt.len_bits)) {
2153 					if (last)
2154 						goto failed;/* Truncated data.*/
2155 					ds->state = ST_RD_PT_2;
2156 					return (ARCHIVE_OK);
2157 				}
2158 				if (!lzh_make_fake_table(&(ds->pt),
2159 				    lzh_br_bits(br, ds->pt.len_bits)))
2160 					goto failed;/* Invalid data. */
2161 				lzh_br_consume(br, ds->pt.len_bits);
2162 				if (ds->reading_position)
2163 					ds->state = ST_GET_LITERAL;
2164 				else
2165 					ds->state = ST_RD_LITERAL_1;
2166 				break;
2167 			} else if (ds->pt.len_avail > ds->pt.len_size)
2168 				goto failed;/* Invalid data. */
2169 			ds->loop = 0;
2170 			memset(ds->pt.freq, 0, sizeof(ds->pt.freq));
2171 			if (ds->pt.len_avail < 3 ||
2172 			    ds->pt.len_size == ds->pos_pt_len_size) {
2173 				ds->state = ST_RD_PT_4;
2174 				break;
2175 			}
2176 			/* FALL THROUGH */
2177 		case ST_RD_PT_3:
2178 			ds->loop = lzh_read_pt_bitlen(strm, ds->loop, 3);
2179 			if (ds->loop < 3) {
2180 				if (ds->loop < 0 || last)
2181 					goto failed;/* Invalid data. */
2182 				/* Not completed, get following data. */
2183 				ds->state = ST_RD_PT_3;
2184 				return (ARCHIVE_OK);
2185 			}
2186 			/* There are some null in bitlen of the literal. */
2187 			if (!lzh_br_read_ahead(strm, br, 2)) {
2188 				if (last)
2189 					goto failed;/* Truncated data. */
2190 				ds->state = ST_RD_PT_3;
2191 				return (ARCHIVE_OK);
2192 			}
2193 			c = lzh_br_bits(br, 2);
2194 			lzh_br_consume(br, 2);
2195 			if (c > ds->pt.len_avail - 3)
2196 				goto failed;/* Invalid data. */
2197 			for (i = 3; c-- > 0 ;)
2198 				ds->pt.bitlen[i++] = 0;
2199 			ds->loop = i;
2200 			/* FALL THROUGH */
2201 		case ST_RD_PT_4:
2202 			ds->loop = lzh_read_pt_bitlen(strm, ds->loop,
2203 			    ds->pt.len_avail);
2204 			if (ds->loop < ds->pt.len_avail) {
2205 				if (ds->loop < 0 || last)
2206 					goto failed;/* Invalid data. */
2207 				/* Not completed, get following data. */
2208 				ds->state = ST_RD_PT_4;
2209 				return (ARCHIVE_OK);
2210 			}
2211 			if (!lzh_make_huffman_table(&(ds->pt)))
2212 				goto failed;/* Invalid data */
2213 			if (ds->reading_position) {
2214 				ds->state = ST_GET_LITERAL;
2215 				break;
2216 			}
2217 			/* FALL THROUGH */
2218 		case ST_RD_LITERAL_1:
2219 			if (!lzh_br_read_ahead(strm, br, ds->lt.len_bits)) {
2220 				if (last)
2221 					goto failed;/* Truncated data. */
2222 				ds->state = ST_RD_LITERAL_1;
2223 				return (ARCHIVE_OK);
2224 			}
2225 			ds->lt.len_avail = lzh_br_bits(br, ds->lt.len_bits);
2226 			lzh_br_consume(br, ds->lt.len_bits);
2227 			/* FALL THROUGH */
2228 		case ST_RD_LITERAL_2:
2229 			if (ds->lt.len_avail == 0) {
2230 				/* There is no bitlen. */
2231 				if (!lzh_br_read_ahead(strm, br,
2232 				    ds->lt.len_bits)) {
2233 					if (last)
2234 						goto failed;/* Truncated data.*/
2235 					ds->state = ST_RD_LITERAL_2;
2236 					return (ARCHIVE_OK);
2237 				}
2238 				if (!lzh_make_fake_table(&(ds->lt),
2239 				    lzh_br_bits(br, ds->lt.len_bits)))
2240 					goto failed;/* Invalid data */
2241 				lzh_br_consume(br, ds->lt.len_bits);
2242 				ds->state = ST_RD_POS_DATA_1;
2243 				break;
2244 			} else if (ds->lt.len_avail > ds->lt.len_size)
2245 				goto failed;/* Invalid data */
2246 			ds->loop = 0;
2247 			memset(ds->lt.freq, 0, sizeof(ds->lt.freq));
2248 			/* FALL THROUGH */
2249 		case ST_RD_LITERAL_3:
2250 			i = ds->loop;
2251 			while (i < ds->lt.len_avail) {
2252 				if (!lzh_br_read_ahead(strm, br,
2253 				    ds->pt.max_bits)) {
2254 					if (last)
2255 						goto failed;/* Truncated data.*/
2256 					ds->loop = i;
2257 					ds->state = ST_RD_LITERAL_3;
2258 					return (ARCHIVE_OK);
2259 				}
2260 				rbits = lzh_br_bits(br, ds->pt.max_bits);
2261 				c = lzh_decode_huffman(&(ds->pt), rbits);
2262 				if (c > 2) {
2263 					/* Note: 'c' will never be more than
2264 					 * eighteen since it's limited by
2265 					 * PT_BITLEN_SIZE, which is being set
2266 					 * to ds->pt.len_size through
2267 					 * ds->literal_pt_len_size. */
2268 					lzh_br_consume(br, ds->pt.bitlen[c]);
2269 					c -= 2;
2270 					ds->lt.freq[c]++;
2271 					ds->lt.bitlen[i++] = c;
2272 				} else if (c == 0) {
2273 					lzh_br_consume(br, ds->pt.bitlen[c]);
2274 					ds->lt.bitlen[i++] = 0;
2275 				} else {
2276 					/* c == 1 or c == 2 */
2277 					int n = (c == 1)?4:9;
2278 					if (!lzh_br_read_ahead(strm, br,
2279 					     ds->pt.bitlen[c] + n)) {
2280 						if (last) /* Truncated data. */
2281 							goto failed;
2282 						ds->loop = i;
2283 						ds->state = ST_RD_LITERAL_3;
2284 						return (ARCHIVE_OK);
2285 					}
2286 					lzh_br_consume(br, ds->pt.bitlen[c]);
2287 					c = lzh_br_bits(br, n);
2288 					lzh_br_consume(br, n);
2289 					c += (n == 4)?3:20;
2290 					if (i + c > ds->lt.len_avail)
2291 						goto failed;/* Invalid data */
2292 					memset(&(ds->lt.bitlen[i]), 0, c);
2293 					i += c;
2294 				}
2295 			}
2296 			if (i > ds->lt.len_avail ||
2297 			    !lzh_make_huffman_table(&(ds->lt)))
2298 				goto failed;/* Invalid data */
2299 			/* FALL THROUGH */
2300 		case ST_RD_POS_DATA_1:
2301 			/*
2302 			 * Read a position table compressed in huffman
2303 			 * coding.
2304 			 */
2305 			ds->pt.len_size = ds->pos_pt_len_size;
2306 			ds->pt.len_bits = ds->pos_pt_len_bits;
2307 			ds->reading_position = 1;
2308 			ds->state = ST_RD_PT_1;
2309 			break;
2310 		case ST_GET_LITERAL:
2311 			return (100);
2312 		}
2313 	}
2314 failed:
2315 	return (ds->error = ARCHIVE_FAILED);
2316 }
2317 
2318 static int
lzh_decode_blocks(struct lzh_stream * strm,int last)2319 lzh_decode_blocks(struct lzh_stream *strm, int last)
2320 {
2321 	struct lzh_dec *ds = strm->ds;
2322 	struct lzh_br bre = ds->br;
2323 	struct huffman *lt = &(ds->lt);
2324 	struct huffman *pt = &(ds->pt);
2325 	unsigned char *w_buff = ds->w_buff;
2326 	unsigned char *lt_bitlen = lt->bitlen;
2327 	unsigned char *pt_bitlen = pt->bitlen;
2328 	int blocks_avail = ds->blocks_avail, c = 0;
2329 	int copy_len = ds->copy_len, copy_pos = ds->copy_pos;
2330 	int w_pos = ds->w_pos, w_mask = ds->w_mask, w_size = ds->w_size;
2331 	int lt_max_bits = lt->max_bits, pt_max_bits = pt->max_bits;
2332 	int state = ds->state;
2333 
2334 	for (;;) {
2335 		switch (state) {
2336 		case ST_GET_LITERAL:
2337 			for (;;) {
2338 				if (blocks_avail == 0) {
2339 					/* We have decoded all blocks.
2340 					 * Let's handle next blocks. */
2341 					ds->state = ST_RD_BLOCK;
2342 					ds->br = bre;
2343 					ds->blocks_avail = 0;
2344 					ds->w_pos = w_pos;
2345 					ds->copy_pos = 0;
2346 					return (100);
2347 				}
2348 
2349 				/* lzh_br_read_ahead() always tries to fill the
2350 				 * cache buffer up. In specific situation we
2351 				 * are close to the end of the data, the cache
2352 				 * buffer will not be full and thus we have to
2353 				 * determine if the cache buffer has some bits
2354 				 * as much as we need after lzh_br_read_ahead()
2355 				 * failed. */
2356 				if (!lzh_br_read_ahead(strm, &bre,
2357 				    lt_max_bits)) {
2358 					if (!last)
2359 						goto next_data;
2360 					/* Remaining bits are less than
2361 					 * maximum bits(lt.max_bits) but maybe
2362 					 * it still remains as much as we need,
2363 					 * so we should try to use it with
2364 					 * dummy bits. */
2365 					c = lzh_decode_huffman(lt,
2366 					      lzh_br_bits_forced(&bre,
2367 					        lt_max_bits));
2368 					lzh_br_consume(&bre, lt_bitlen[c]);
2369 					if (!lzh_br_has(&bre, 0))
2370 						goto failed;/* Over read. */
2371 				} else {
2372 					c = lzh_decode_huffman(lt,
2373 					      lzh_br_bits(&bre, lt_max_bits));
2374 					lzh_br_consume(&bre, lt_bitlen[c]);
2375 				}
2376 				blocks_avail--;
2377 				if (c > UCHAR_MAX)
2378 					/* Current block is a match data. */
2379 					break;
2380 				/*
2381 				 * 'c' is exactly a literal code.
2382 				 */
2383 				/* Save a decoded code to reference it
2384 				 * afterward. */
2385 				w_buff[w_pos] = c;
2386 				if (++w_pos >= w_size) {
2387 					w_pos = 0;
2388 					lzh_emit_window(strm, w_size);
2389 					goto next_data;
2390 				}
2391 			}
2392 			/* 'c' is the length of a match pattern we have
2393 			 * already extracted, which has be stored in
2394 			 * window(ds->w_buff). */
2395 			copy_len = c - (UCHAR_MAX + 1) + MINMATCH;
2396 			/* FALL THROUGH */
2397 		case ST_GET_POS_1:
2398 			/*
2399 			 * Get a reference position.
2400 			 */
2401 			if (!lzh_br_read_ahead(strm, &bre, pt_max_bits)) {
2402 				if (!last) {
2403 					state = ST_GET_POS_1;
2404 					ds->copy_len = copy_len;
2405 					goto next_data;
2406 				}
2407 				copy_pos = lzh_decode_huffman(pt,
2408 				    lzh_br_bits_forced(&bre, pt_max_bits));
2409 				lzh_br_consume(&bre, pt_bitlen[copy_pos]);
2410 				if (!lzh_br_has(&bre, 0))
2411 					goto failed;/* Over read. */
2412 			} else {
2413 				copy_pos = lzh_decode_huffman(pt,
2414 				    lzh_br_bits(&bre, pt_max_bits));
2415 				lzh_br_consume(&bre, pt_bitlen[copy_pos]);
2416 			}
2417 			/* FALL THROUGH */
2418 		case ST_GET_POS_2:
2419 			if (copy_pos > 1) {
2420 				/* We need an additional adjustment number to
2421 				 * the position. */
2422 				int p = copy_pos - 1;
2423 				if (!lzh_br_read_ahead(strm, &bre, p)) {
2424 					if (last)
2425 						goto failed;/* Truncated data.*/
2426 					state = ST_GET_POS_2;
2427 					ds->copy_len = copy_len;
2428 					ds->copy_pos = copy_pos;
2429 					goto next_data;
2430 				}
2431 				copy_pos = (1 << p) + lzh_br_bits(&bre, p);
2432 				lzh_br_consume(&bre, p);
2433 			}
2434 			/* The position is actually a distance from the last
2435 			 * code we had extracted and thus we have to convert
2436 			 * it to a position of the window. */
2437 			copy_pos = (w_pos - copy_pos - 1) & w_mask;
2438 			/* FALL THROUGH */
2439 		case ST_COPY_DATA:
2440 			/*
2441 			 * Copy `copy_len' bytes as extracted data from
2442 			 * the window into the output buffer.
2443 			 */
2444 			for (;;) {
2445 				int l;
2446 
2447 				l = copy_len;
2448 				if (copy_pos > w_pos) {
2449 					if (l > w_size - copy_pos)
2450 						l = w_size - copy_pos;
2451 				} else {
2452 					if (l > w_size - w_pos)
2453 						l = w_size - w_pos;
2454 				}
2455 				if ((copy_pos + l < w_pos)
2456 				    || (w_pos + l < copy_pos)) {
2457 					/* No overlap. */
2458 					memcpy(w_buff + w_pos,
2459 					    w_buff + copy_pos, l);
2460 				} else {
2461 					const unsigned char *s;
2462 					unsigned char *d;
2463 					int li;
2464 
2465 					d = w_buff + w_pos;
2466 					s = w_buff + copy_pos;
2467 					for (li = 0; li < l-1;) {
2468 						d[li] = s[li];li++;
2469 						d[li] = s[li];li++;
2470 					}
2471 					if (li < l)
2472 						d[li] = s[li];
2473 				}
2474 				w_pos += l;
2475 				if (w_pos == w_size) {
2476 					w_pos = 0;
2477 					lzh_emit_window(strm, w_size);
2478 					if (copy_len <= l)
2479 						state = ST_GET_LITERAL;
2480 					else {
2481 						state = ST_COPY_DATA;
2482 						ds->copy_len = copy_len - l;
2483 						ds->copy_pos =
2484 						    (copy_pos + l) & w_mask;
2485 					}
2486 					goto next_data;
2487 				}
2488 				if (copy_len <= l)
2489 					/* A copy of current pattern ended. */
2490 					break;
2491 				copy_len -= l;
2492 				copy_pos = (copy_pos + l) & w_mask;
2493 			}
2494 			state = ST_GET_LITERAL;
2495 			break;
2496 		}
2497 	}
2498 failed:
2499 	return (ds->error = ARCHIVE_FAILED);
2500 next_data:
2501 	ds->br = bre;
2502 	ds->blocks_avail = blocks_avail;
2503 	ds->state = state;
2504 	ds->w_pos = w_pos;
2505 	return (ARCHIVE_OK);
2506 }
2507 
2508 static int
lzh_huffman_init(struct huffman * hf,size_t len_size,int tbl_bits)2509 lzh_huffman_init(struct huffman *hf, size_t len_size, int tbl_bits)
2510 {
2511 	int bits;
2512 
2513 	if (hf->bitlen == NULL) {
2514 		hf->bitlen = malloc(len_size * sizeof(hf->bitlen[0]));
2515 		if (hf->bitlen == NULL)
2516 			return (ARCHIVE_FATAL);
2517 	}
2518 	if (hf->tbl == NULL) {
2519 		if (tbl_bits < HTBL_BITS)
2520 			bits = tbl_bits;
2521 		else
2522 			bits = HTBL_BITS;
2523 		hf->tbl = malloc(((size_t)1 << bits) * sizeof(hf->tbl[0]));
2524 		if (hf->tbl == NULL)
2525 			return (ARCHIVE_FATAL);
2526 	}
2527 	if (hf->tree == NULL && tbl_bits > HTBL_BITS) {
2528 		hf->tree_avail = 1 << (tbl_bits - HTBL_BITS + 4);
2529 		hf->tree = malloc(hf->tree_avail * sizeof(hf->tree[0]));
2530 		if (hf->tree == NULL)
2531 			return (ARCHIVE_FATAL);
2532 	}
2533 	hf->len_size = (int)len_size;
2534 	hf->tbl_bits = tbl_bits;
2535 	return (ARCHIVE_OK);
2536 }
2537 
2538 static void
lzh_huffman_free(struct huffman * hf)2539 lzh_huffman_free(struct huffman *hf)
2540 {
2541 	free(hf->bitlen);
2542 	free(hf->tbl);
2543 	free(hf->tree);
2544 }
2545 
2546 static const char bitlen_tbl[0x400] = {
2547 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2548 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2549 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2550 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2551 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2552 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2553 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2554 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2555 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2556 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2557 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2558 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2559 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2560 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2561 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2562 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2563 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2564 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2565 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2566 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2567 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2568 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2569 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2570 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2571 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2572 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2573 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2574 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2575 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2576 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2577 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2578 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2579 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2580 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2581 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2582 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2583 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2584 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2585 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2586 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2587 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2588 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2589 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2590 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2591 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2592 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2593 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2594 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2595 	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2596 	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2597 	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2598 	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2599 	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2600 	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2601 	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2602 	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2603 	10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2604 	10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2605 	10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2606 	10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2607 	11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
2608 	11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
2609 	12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
2610 	13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 16,  0
2611 };
2612 static int
lzh_read_pt_bitlen(struct lzh_stream * strm,int start,int end)2613 lzh_read_pt_bitlen(struct lzh_stream *strm, int start, int end)
2614 {
2615 	struct lzh_dec *ds = strm->ds;
2616 	struct lzh_br *br = &(ds->br);
2617 	int c, i;
2618 
2619 	for (i = start; i < end; ) {
2620 		/*
2621 		 *  bit pattern     the number we need
2622 		 *     000           ->  0
2623 		 *     001           ->  1
2624 		 *     010           ->  2
2625 		 *     ...
2626 		 *     110           ->  6
2627 		 *     1110          ->  7
2628 		 *     11110         ->  8
2629 		 *     ...
2630 		 *     1111111111110 ->  16
2631 		 */
2632 		if (!lzh_br_read_ahead(strm, br, 3))
2633 			return (i);
2634 		if ((c = lzh_br_bits(br, 3)) == 7) {
2635 			if (!lzh_br_read_ahead(strm, br, 13))
2636 				return (i);
2637 			c = bitlen_tbl[lzh_br_bits(br, 13) & 0x3FF];
2638 			if (c)
2639 				lzh_br_consume(br, c - 3);
2640 			else
2641 				return (-1);/* Invalid data. */
2642 		} else
2643 			lzh_br_consume(br, 3);
2644 		ds->pt.bitlen[i++] = c;
2645 		ds->pt.freq[c]++;
2646 	}
2647 	return (i);
2648 }
2649 
2650 static int
lzh_make_fake_table(struct huffman * hf,uint16_t c)2651 lzh_make_fake_table(struct huffman *hf, uint16_t c)
2652 {
2653 	if (c >= hf->len_size)
2654 		return (0);
2655 	hf->tbl[0] = c;
2656 	hf->max_bits = 0;
2657 	hf->shift_bits = 0;
2658 	hf->bitlen[hf->tbl[0]] = 0;
2659 	return (1);
2660 }
2661 
2662 /*
2663  * Make a huffman coding table.
2664  */
2665 static int
lzh_make_huffman_table(struct huffman * hf)2666 lzh_make_huffman_table(struct huffman *hf)
2667 {
2668 	uint16_t *tbl;
2669 	const unsigned char *bitlen;
2670 	int bitptn[17], weight[17];
2671 	int i, maxbits = 0, ptn, tbl_size, w;
2672 	int diffbits, len_avail;
2673 
2674 	/*
2675 	 * Initialize bit patterns.
2676 	 */
2677 	ptn = 0;
2678 	for (i = 1, w = 1 << 15; i <= 16; i++, w >>= 1) {
2679 		bitptn[i] = ptn;
2680 		weight[i] = w;
2681 		if (hf->freq[i]) {
2682 			ptn += hf->freq[i] * w;
2683 			maxbits = i;
2684 		}
2685 	}
2686 	if (ptn != 0x10000 || maxbits > hf->tbl_bits)
2687 		return (0);/* Invalid */
2688 
2689 	hf->max_bits = maxbits;
2690 
2691 	/*
2692 	 * Cut out extra bits which we won't house in the table.
2693 	 * This preparation reduces the same calculation in the for-loop
2694 	 * making the table.
2695 	 */
2696 	if (maxbits < 16) {
2697 		int ebits = 16 - maxbits;
2698 		for (i = 1; i <= maxbits; i++) {
2699 			bitptn[i] >>= ebits;
2700 			weight[i] >>= ebits;
2701 		}
2702 	}
2703 	if (maxbits > HTBL_BITS) {
2704 		unsigned htbl_max;
2705 		uint16_t *p;
2706 
2707 		diffbits = maxbits - HTBL_BITS;
2708 		for (i = 1; i <= HTBL_BITS; i++) {
2709 			bitptn[i] >>= diffbits;
2710 			weight[i] >>= diffbits;
2711 		}
2712 		htbl_max = bitptn[HTBL_BITS] +
2713 		    weight[HTBL_BITS] * hf->freq[HTBL_BITS];
2714 		p = &(hf->tbl[htbl_max]);
2715 		while (p < &hf->tbl[1U<<HTBL_BITS])
2716 			*p++ = 0;
2717 	} else
2718 		diffbits = 0;
2719 	hf->shift_bits = diffbits;
2720 
2721 	/*
2722 	 * Make the table.
2723 	 */
2724 	tbl_size = 1 << HTBL_BITS;
2725 	tbl = hf->tbl;
2726 	bitlen = hf->bitlen;
2727 	len_avail = hf->len_avail;
2728 	hf->tree_used = 0;
2729 	for (i = 0; i < len_avail; i++) {
2730 		uint16_t *p;
2731 		int len, cnt;
2732 		uint16_t bit;
2733 		int extlen;
2734 		struct htree_t *ht;
2735 
2736 		if (bitlen[i] == 0)
2737 			continue;
2738 		/* Get a bit pattern */
2739 		len = bitlen[i];
2740 		ptn = bitptn[len];
2741 		cnt = weight[len];
2742 		if (len <= HTBL_BITS) {
2743 			/* Calculate next bit pattern */
2744 			if ((bitptn[len] = ptn + cnt) > tbl_size)
2745 				return (0);/* Invalid */
2746 			/* Update the table */
2747 			p = &(tbl[ptn]);
2748 			if (cnt > 7) {
2749 				uint16_t *pc;
2750 
2751 				cnt -= 8;
2752 				pc = &p[cnt];
2753 				pc[0] = (uint16_t)i;
2754 				pc[1] = (uint16_t)i;
2755 				pc[2] = (uint16_t)i;
2756 				pc[3] = (uint16_t)i;
2757 				pc[4] = (uint16_t)i;
2758 				pc[5] = (uint16_t)i;
2759 				pc[6] = (uint16_t)i;
2760 				pc[7] = (uint16_t)i;
2761 				if (cnt > 7) {
2762 					cnt -= 8;
2763 					memcpy(&p[cnt], pc,
2764 						8 * sizeof(uint16_t));
2765 					pc = &p[cnt];
2766 					while (cnt > 15) {
2767 						cnt -= 16;
2768 						memcpy(&p[cnt], pc,
2769 							16 * sizeof(uint16_t));
2770 					}
2771 				}
2772 				if (cnt)
2773 					memcpy(p, pc, cnt * sizeof(uint16_t));
2774 			} else {
2775 				while (cnt > 1) {
2776 					p[--cnt] = (uint16_t)i;
2777 					p[--cnt] = (uint16_t)i;
2778 				}
2779 				if (cnt)
2780 					p[--cnt] = (uint16_t)i;
2781 			}
2782 			continue;
2783 		}
2784 
2785 		/*
2786 		 * A bit length is too big to be housed to a direct table,
2787 		 * so we use a tree model for its extra bits.
2788 		 */
2789 		bitptn[len] = ptn + cnt;
2790 		bit = 1U << (diffbits -1);
2791 		extlen = len - HTBL_BITS;
2792 
2793 		p = &(tbl[ptn >> diffbits]);
2794 		if (*p == 0) {
2795 			*p = len_avail + hf->tree_used;
2796 			ht = &(hf->tree[hf->tree_used++]);
2797 			if (hf->tree_used > hf->tree_avail)
2798 				return (0);/* Invalid */
2799 			ht->left = 0;
2800 			ht->right = 0;
2801 		} else {
2802 			if (*p < len_avail ||
2803 			    *p >= (len_avail + hf->tree_used))
2804 				return (0);/* Invalid */
2805 			ht = &(hf->tree[*p - len_avail]);
2806 		}
2807 		while (--extlen > 0) {
2808 			if (ptn & bit) {
2809 				if (ht->left < len_avail) {
2810 					ht->left = len_avail + hf->tree_used;
2811 					ht = &(hf->tree[hf->tree_used++]);
2812 					if (hf->tree_used > hf->tree_avail)
2813 						return (0);/* Invalid */
2814 					ht->left = 0;
2815 					ht->right = 0;
2816 				} else {
2817 					ht = &(hf->tree[ht->left - len_avail]);
2818 				}
2819 			} else {
2820 				if (ht->right < len_avail) {
2821 					ht->right = len_avail + hf->tree_used;
2822 					ht = &(hf->tree[hf->tree_used++]);
2823 					if (hf->tree_used > hf->tree_avail)
2824 						return (0);/* Invalid */
2825 					ht->left = 0;
2826 					ht->right = 0;
2827 				} else {
2828 					ht = &(hf->tree[ht->right - len_avail]);
2829 				}
2830 			}
2831 			bit >>= 1;
2832 		}
2833 		if (ptn & bit) {
2834 			if (ht->left != 0)
2835 				return (0);/* Invalid */
2836 			ht->left = (uint16_t)i;
2837 		} else {
2838 			if (ht->right != 0)
2839 				return (0);/* Invalid */
2840 			ht->right = (uint16_t)i;
2841 		}
2842 	}
2843 	return (1);
2844 }
2845 
2846 static int
lzh_decode_huffman_tree(struct huffman * hf,unsigned rbits,int c)2847 lzh_decode_huffman_tree(struct huffman *hf, unsigned rbits, int c)
2848 {
2849 	struct htree_t *ht;
2850 	int extlen;
2851 
2852 	ht = hf->tree;
2853 	extlen = hf->shift_bits;
2854 	while (c >= hf->len_avail) {
2855 		c -= hf->len_avail;
2856 		if (extlen-- <= 0 || c >= hf->tree_used)
2857 			return (0);
2858 		if (rbits & (1U << extlen))
2859 			c = ht[c].left;
2860 		else
2861 			c = ht[c].right;
2862 	}
2863 	return (c);
2864 }
2865 
2866 static inline int
lzh_decode_huffman(struct huffman * hf,unsigned rbits)2867 lzh_decode_huffman(struct huffman *hf, unsigned rbits)
2868 {
2869 	int c;
2870 	/*
2871 	 * At first search an index table for a bit pattern.
2872 	 * If it fails, search a huffman tree for.
2873 	 */
2874 	c = hf->tbl[rbits >> hf->shift_bits];
2875 	if (c < hf->len_avail || hf->len_avail == 0)
2876 		return (c);
2877 	/* This bit pattern needs to be found out at a huffman tree. */
2878 	return (lzh_decode_huffman_tree(hf, rbits, c));
2879 }
2880