xref: /freebsd/contrib/libarchive/libarchive/archive_read_support_format_lha.c (revision eb5165bb491138f60d9004bc4c781490016d9288)
1 /*-
2  * Copyright (c) 2008-2014 Michihiro NAKAJIMA
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #include "archive_platform.h"
27 
28 #ifdef HAVE_ERRNO_H
29 #include <errno.h>
30 #endif
31 #ifdef HAVE_LIMITS_H
32 #include <limits.h>
33 #endif
34 #ifdef HAVE_STDLIB_H
35 #include <stdlib.h>
36 #endif
37 #ifdef HAVE_STRING_H
38 #include <string.h>
39 #endif
40 
41 #include "archive.h"
42 #include "archive_entry.h"
43 #include "archive_entry_locale.h"
44 #include "archive_private.h"
45 #include "archive_read_private.h"
46 #include "archive_time_private.h"
47 #include "archive_endian.h"
48 
49 
50 #define MAXMATCH		256	/* Maximum match length. */
51 #define MINMATCH		3	/* Minimum match length. */
52 /*
53  * Literal table format:
54  * +0              +256                      +510
55  * +---------------+-------------------------+
56  * | literal code  |       match length      |
57  * |   0 ... 255   |  MINMATCH ... MAXMATCH  |
58  * +---------------+-------------------------+
59  *  <---          LT_BITLEN_SIZE         --->
60  */
61 /* Literal table size. */
62 #define LT_BITLEN_SIZE		(UCHAR_MAX + 1 + MAXMATCH - MINMATCH + 1)
63 /* Position table size.
64  * Note: this used for both position table and pre literal table.*/
65 #define PT_BITLEN_SIZE		(3 + 16)
66 
67 struct lzh_dec {
68 	/* Decoding status. */
69 	int     		 state;
70 
71 	/*
72 	 * Window to see last 8Ki(lh5),32Ki(lh6),64Ki(lh7) bytes of decoded
73 	 * data.
74 	 */
75 	int			 w_size;
76 	int			 w_mask;
77 	/* Window buffer, which is a loop buffer. */
78 	unsigned char		*w_buff;
79 	/* The insert position to the window. */
80 	int			 w_pos;
81 	/* The position where we can copy decoded code from the window. */
82 	int     		 copy_pos;
83 	/* The length how many bytes we can copy decoded code from
84 	 * the window. */
85 	int     		 copy_len;
86 
87 	/*
88 	 * Bit stream reader.
89 	 */
90 	struct lzh_br {
91 #define CACHE_TYPE		uint64_t
92 #define CACHE_BITS		(8 * sizeof(CACHE_TYPE))
93 	 	/* Cache buffer. */
94 		CACHE_TYPE	 cache_buffer;
95 		/* Indicates how many bits avail in cache_buffer. */
96 		int		 cache_avail;
97 	} br;
98 
99 	/*
100 	 * Huffman coding.
101 	 */
102 	struct huffman {
103 		int		 len_size;
104 		int		 len_avail;
105 		int		 len_bits;
106 		int		 freq[17];
107 		unsigned char	*bitlen;
108 
109 		/*
110 		 * Use a index table. It's faster than searching a huffman
111 		 * coding tree, which is a binary tree. But a use of a large
112 		 * index table causes L1 cache read miss many times.
113 		 */
114 #define HTBL_BITS	10
115 		int		 max_bits;
116 		int		 shift_bits;
117 		int		 tbl_bits;
118 		int		 tree_used;
119 		int		 tree_avail;
120 		/* Direct access table. */
121 		uint16_t	*tbl;
122 		/* Binary tree table for extra bits over the direct access. */
123 		struct htree_t {
124 			uint16_t left;
125 			uint16_t right;
126 		}		*tree;
127 	}			 lt, pt;
128 
129 	int			 blocks_avail;
130 	int			 pos_pt_len_size;
131 	int			 pos_pt_len_bits;
132 	int			 literal_pt_len_size;
133 	int			 literal_pt_len_bits;
134 	int			 reading_position;
135 	int			 loop;
136 	int			 error;
137 };
138 
139 struct lzh_stream {
140 	const unsigned char	*next_in;
141 	int			 avail_in;
142 	int64_t			 total_in;
143 	const unsigned char	*ref_ptr;
144 	int			 avail_out;
145 	int64_t			 total_out;
146 	struct lzh_dec		*ds;
147 };
148 
149 struct lha {
150 	/* entry_bytes_remaining is the number of bytes we expect.	    */
151 	int64_t                  entry_offset;
152 	int64_t                  entry_bytes_remaining;
153 	int64_t			 entry_unconsumed;
154 	uint16_t		 entry_crc_calculated;
155 
156 	size_t			 header_size;	/* header size		    */
157 	unsigned char		 level;		/* header level		    */
158 	char			 method[3];	/* compress type	    */
159 	int64_t			 compsize;	/* compressed data size	    */
160 	int64_t			 origsize;	/* original file size	    */
161 	int			 setflag;
162 #define BIRTHTIME_IS_SET	1
163 #define ATIME_IS_SET		2
164 #define UNIX_MODE_IS_SET	4
165 #define CRC_IS_SET		8
166 	int64_t			 birthtime;
167 	uint32_t		 birthtime_tv_nsec;
168 	int64_t			 mtime;
169 	uint32_t		 mtime_tv_nsec;
170 	int64_t			 atime;
171 	uint32_t		 atime_tv_nsec;
172 	mode_t			 mode;
173 	int64_t			 uid;
174 	int64_t			 gid;
175 	struct archive_string 	 uname;
176 	struct archive_string 	 gname;
177 	uint16_t		 header_crc;
178 	uint16_t		 crc;
179 	/* dirname and filename could be in different codepages */
180 	struct archive_string_conv *sconv_dir;
181 	struct archive_string_conv *sconv_fname;
182 	struct archive_string_conv *opt_sconv;
183 
184 	struct archive_string 	 dirname;
185 	struct archive_string 	 filename;
186 	struct archive_wstring	 ws;
187 
188 	unsigned char		 dos_attr;
189 
190 	/* Flag to mark progress that an archive was read their first header.*/
191 	char			 found_first_header;
192 	/* Flag to mark that indicates an empty directory. */
193 	char			 directory;
194 
195 	/* Flags to mark progress of decompression. */
196 	char			 decompress_init;
197 	char			 end_of_entry;
198 	char			 end_of_entry_cleanup;
199 	char			 entry_is_compressed;
200 
201 	char			 format_name[64];
202 
203 	struct lzh_stream	 strm;
204 };
205 
206 /*
207  * LHA header common member offset.
208  */
209 #define H_METHOD_OFFSET	2	/* Compress type. */
210 #define H_ATTR_OFFSET	19	/* DOS attribute. */
211 #define H_LEVEL_OFFSET	20	/* Header Level.  */
212 #define H_SIZE		22	/* Minimum header size. */
213 
214 static int      archive_read_format_lha_bid(struct archive_read *, int);
215 static int      archive_read_format_lha_options(struct archive_read *,
216 		    const char *, const char *);
217 static int	archive_read_format_lha_read_header(struct archive_read *,
218 		    struct archive_entry *);
219 static int	archive_read_format_lha_read_data(struct archive_read *,
220 		    const void **, size_t *, int64_t *);
221 static int	archive_read_format_lha_read_data_skip(struct archive_read *);
222 static int	archive_read_format_lha_cleanup(struct archive_read *);
223 
224 static void	lha_replace_path_separator(struct lha *,
225 		    struct archive_entry *);
226 static int	lha_read_file_header_0(struct archive_read *, struct lha *);
227 static int	lha_read_file_header_1(struct archive_read *, struct lha *);
228 static int	lha_read_file_header_2(struct archive_read *, struct lha *);
229 static int	lha_read_file_header_3(struct archive_read *, struct lha *);
230 static int	lha_read_file_extended_header(struct archive_read *,
231 		    struct lha *, uint16_t *, int, uint64_t, size_t *);
232 static size_t	lha_check_header_format(const void *);
233 static int	lha_skip_sfx(struct archive_read *);
234 static unsigned char	lha_calcsum(unsigned char, const void *,
235 		    int, size_t);
236 static int	lha_parse_linkname(struct archive_wstring *,
237 		    struct archive_wstring *);
238 static int	lha_read_data_none(struct archive_read *, const void **,
239 		    size_t *, int64_t *);
240 static int	lha_read_data_lzh(struct archive_read *, const void **,
241 		    size_t *, int64_t *);
242 static void	lha_crc16_init(void);
243 static uint16_t lha_crc16(uint16_t, const void *, size_t);
244 static int	lzh_decode_init(struct lzh_stream *, const char *);
245 static void	lzh_decode_free(struct lzh_stream *);
246 static int	lzh_decode(struct lzh_stream *, int);
247 static int	lzh_br_fillup(struct lzh_stream *, struct lzh_br *);
248 static int	lzh_huffman_init(struct huffman *, size_t, int);
249 static void	lzh_huffman_free(struct huffman *);
250 static int	lzh_read_pt_bitlen(struct lzh_stream *, int start, int end);
251 static int	lzh_make_fake_table(struct huffman *, uint16_t);
252 static int	lzh_make_huffman_table(struct huffman *);
253 static inline int lzh_decode_huffman(struct huffman *, unsigned);
254 static int	lzh_decode_huffman_tree(struct huffman *, unsigned, int);
255 
256 
257 int
archive_read_support_format_lha(struct archive * _a)258 archive_read_support_format_lha(struct archive *_a)
259 {
260 	struct archive_read *a = (struct archive_read *)_a;
261 	struct lha *lha;
262 	int r;
263 
264 	archive_check_magic(_a, ARCHIVE_READ_MAGIC,
265 	    ARCHIVE_STATE_NEW, "archive_read_support_format_lha");
266 
267 	lha = calloc(1, sizeof(*lha));
268 	if (lha == NULL) {
269 		archive_set_error(&a->archive, ENOMEM,
270 		    "Can't allocate lha data");
271 		return (ARCHIVE_FATAL);
272 	}
273 	archive_string_init(&lha->ws);
274 
275 	r = __archive_read_register_format(a,
276 	    lha,
277 	    "lha",
278 	    archive_read_format_lha_bid,
279 	    archive_read_format_lha_options,
280 	    archive_read_format_lha_read_header,
281 	    archive_read_format_lha_read_data,
282 	    archive_read_format_lha_read_data_skip,
283 	    NULL,
284 	    archive_read_format_lha_cleanup,
285 	    NULL,
286 	    NULL);
287 
288 	if (r != ARCHIVE_OK)
289 		free(lha);
290 	return (ARCHIVE_OK);
291 }
292 
293 static size_t
lha_check_header_format(const void * h)294 lha_check_header_format(const void *h)
295 {
296 	const unsigned char *p = h;
297 	size_t next_skip_bytes;
298 
299 	switch (p[H_METHOD_OFFSET+3]) {
300 	/*
301 	 * "-lh0-" ... "-lh7-" "-lhd-"
302 	 * "-lzs-" "-lz5-"
303 	 */
304 	case '0': case '1': case '2': case '3':
305 	case '4': case '5': case '6': case '7':
306 	case 'd':
307 	case 's':
308 		next_skip_bytes = 4;
309 
310 		/* b0 == 0 means the end of an LHa archive file.	*/
311 		if (p[0] == 0)
312 			break;
313 		if (p[H_METHOD_OFFSET] != '-' || p[H_METHOD_OFFSET+1] != 'l'
314 		    ||  p[H_METHOD_OFFSET+4] != '-')
315 			break;
316 
317 		if (p[H_METHOD_OFFSET+2] == 'h') {
318 			/* "-lh?-" */
319 			if (p[H_METHOD_OFFSET+3] == 's')
320 				break;
321 			if (p[H_LEVEL_OFFSET] == 0)
322 				return (0);
323 			if (p[H_LEVEL_OFFSET] <= 3 && p[H_ATTR_OFFSET] == 0x20)
324 				return (0);
325 		}
326 		if (p[H_METHOD_OFFSET+2] == 'z') {
327 			/* LArc extensions: -lzs-,-lz4- and -lz5- */
328 			if (p[H_LEVEL_OFFSET] != 0)
329 				break;
330 			if (p[H_METHOD_OFFSET+3] == 's'
331 			    || p[H_METHOD_OFFSET+3] == '4'
332 			    || p[H_METHOD_OFFSET+3] == '5')
333 				return (0);
334 		}
335 		break;
336 	case 'h': next_skip_bytes = 1; break;
337 	case 'z': next_skip_bytes = 1; break;
338 	case 'l': next_skip_bytes = 2; break;
339 	case '-': next_skip_bytes = 3; break;
340 	default : next_skip_bytes = 4; break;
341 	}
342 
343 	return (next_skip_bytes);
344 }
345 
346 static int
archive_read_format_lha_bid(struct archive_read * a,int best_bid)347 archive_read_format_lha_bid(struct archive_read *a, int best_bid)
348 {
349 	const char *p;
350 	const void *buff;
351 	ssize_t bytes_avail, offset, window;
352 	size_t next;
353 
354 	/* If there's already a better bid than we can ever
355 	   make, don't bother testing. */
356 	if (best_bid > 30)
357 		return (-1);
358 
359 	if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL)
360 		return (-1);
361 
362 	if (lha_check_header_format(p) == 0)
363 		return (30);
364 
365 	if (p[0] == 'M' && p[1] == 'Z') {
366 		/* PE file */
367 		offset = 0;
368 		window = 4096;
369 		while (offset < (1024 * 20)) {
370 			buff = __archive_read_ahead(a, offset + window,
371 			    &bytes_avail);
372 			if (buff == NULL) {
373 				/* Remaining bytes are less than window. */
374 				window >>= 1;
375 				if (window < (H_SIZE + 3))
376 					return (0);
377 				continue;
378 			}
379 			p = (const char *)buff + offset;
380 			while (p + H_SIZE < (const char *)buff + bytes_avail) {
381 				if ((next = lha_check_header_format(p)) == 0)
382 					return (30);
383 				p += next;
384 			}
385 			offset = p - (const char *)buff;
386 		}
387 	}
388 	return (0);
389 }
390 
391 static int
archive_read_format_lha_options(struct archive_read * a,const char * key,const char * val)392 archive_read_format_lha_options(struct archive_read *a,
393     const char *key, const char *val)
394 {
395 	struct lha *lha;
396 	int ret = ARCHIVE_FAILED;
397 
398 	lha = (struct lha *)(a->format->data);
399 	if (strcmp(key, "hdrcharset")  == 0) {
400 		if (val == NULL || val[0] == 0)
401 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
402 			    "lha: hdrcharset option needs a character-set name");
403 		else {
404 			lha->opt_sconv =
405 			    archive_string_conversion_from_charset(
406 				&a->archive, val, 0);
407 			if (lha->opt_sconv != NULL)
408 				ret = ARCHIVE_OK;
409 			else
410 				ret = ARCHIVE_FATAL;
411 		}
412 		return (ret);
413 	}
414 
415 	/* Note: The "warn" return is just to inform the options
416 	 * supervisor that we didn't handle it.  It will generate
417 	 * a suitable error if no one used this option. */
418 	return (ARCHIVE_WARN);
419 }
420 
421 static int
lha_skip_sfx(struct archive_read * a)422 lha_skip_sfx(struct archive_read *a)
423 {
424 	const void *h;
425 	const char *p, *q;
426 	size_t next, skip;
427 	ssize_t bytes, window;
428 
429 	window = 4096;
430 	for (;;) {
431 		h = __archive_read_ahead(a, window, &bytes);
432 		if (h == NULL) {
433 			/* Remaining bytes are less than window. */
434 			window >>= 1;
435 			if (window < (H_SIZE + 3))
436 				goto fatal;
437 			continue;
438 		}
439 		if (bytes < H_SIZE)
440 			goto fatal;
441 		p = h;
442 		q = p + bytes;
443 
444 		/*
445 		 * Scan ahead until we find something that looks
446 		 * like the lha header.
447 		 */
448 		while (p + H_SIZE < q) {
449 			if ((next = lha_check_header_format(p)) == 0) {
450 				skip = p - (const char *)h;
451 				__archive_read_consume(a, skip);
452 				return (ARCHIVE_OK);
453 			}
454 			p += next;
455 		}
456 		skip = p - (const char *)h;
457 		__archive_read_consume(a, skip);
458 	}
459 fatal:
460 	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
461 	    "Couldn't find out LHa header");
462 	return (ARCHIVE_FATAL);
463 }
464 
465 static int
truncated_error(struct archive_read * a)466 truncated_error(struct archive_read *a)
467 {
468 	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
469 	    "Truncated LHa header");
470 	return (ARCHIVE_FATAL);
471 }
472 
473 static int
archive_read_format_lha_read_header(struct archive_read * a,struct archive_entry * entry)474 archive_read_format_lha_read_header(struct archive_read *a,
475     struct archive_entry *entry)
476 {
477 	struct archive_wstring linkname;
478 	struct archive_wstring pathname;
479 	struct lha *lha;
480 	const unsigned char *p;
481 	const char *signature;
482 	int err;
483 	struct archive_mstring conv_buffer;
484 	const wchar_t *conv_buffer_p;
485 
486 	lha_crc16_init();
487 
488 	a->archive.archive_format = ARCHIVE_FORMAT_LHA;
489 	if (a->archive.archive_format_name == NULL)
490 		a->archive.archive_format_name = "lha";
491 
492 	lha = (struct lha *)(a->format->data);
493 	lha->decompress_init = 0;
494 	lha->end_of_entry = 0;
495 	lha->end_of_entry_cleanup = 0;
496 	lha->entry_unconsumed = 0;
497 
498 	if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL) {
499 		/*
500 		 * LHa archiver added 0 to the tail of its archive file as
501 		 * the mark of the end of the archive.
502 		 */
503 		signature = __archive_read_ahead(a, sizeof(signature[0]), NULL);
504 		if (signature == NULL || signature[0] == 0)
505 			return (ARCHIVE_EOF);
506 		return (truncated_error(a));
507 	}
508 
509 	signature = (const char *)p;
510 	if (lha->found_first_header == 0 &&
511 	    signature[0] == 'M' && signature[1] == 'Z') {
512                 /* This is an executable?  Must be self-extracting... 	*/
513 		err = lha_skip_sfx(a);
514 		if (err < ARCHIVE_WARN)
515 			return (err);
516 
517 		if ((p = __archive_read_ahead(a, sizeof(*p), NULL)) == NULL)
518 			return (truncated_error(a));
519 		signature = (const char *)p;
520 	}
521 	/* signature[0] == 0 means the end of an LHa archive file. */
522 	if (signature[0] == 0)
523 		return (ARCHIVE_EOF);
524 
525 	/*
526 	 * Check the header format and method type.
527 	 */
528 	if (lha_check_header_format(p) != 0) {
529 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
530 		    "Bad LHa file");
531 		return (ARCHIVE_FATAL);
532 	}
533 
534 	/* We've found the first header. */
535 	lha->found_first_header = 1;
536 	/* Set a default value and common data */
537 	lha->header_size = 0;
538 	lha->level = p[H_LEVEL_OFFSET];
539 	lha->method[0] = p[H_METHOD_OFFSET+1];
540 	lha->method[1] = p[H_METHOD_OFFSET+2];
541 	lha->method[2] = p[H_METHOD_OFFSET+3];
542 	if (memcmp(lha->method, "lhd", 3) == 0)
543 		lha->directory = 1;
544 	else
545 		lha->directory = 0;
546 	if (memcmp(lha->method, "lh0", 3) == 0 ||
547 	    memcmp(lha->method, "lz4", 3) == 0)
548 		lha->entry_is_compressed = 0;
549 	else
550 		lha->entry_is_compressed = 1;
551 
552 	lha->compsize = 0;
553 	lha->origsize = 0;
554 	lha->setflag = 0;
555 	lha->birthtime = 0;
556 	lha->birthtime_tv_nsec = 0;
557 	lha->mtime = 0;
558 	lha->mtime_tv_nsec = 0;
559 	lha->atime = 0;
560 	lha->atime_tv_nsec = 0;
561 	lha->mode = (lha->directory)? 0777 : 0666;
562 	lha->uid = 0;
563 	lha->gid = 0;
564 	archive_string_empty(&lha->dirname);
565 	archive_string_empty(&lha->filename);
566 	lha->dos_attr = 0;
567 	if (lha->opt_sconv != NULL) {
568 		lha->sconv_dir = lha->opt_sconv;
569 		lha->sconv_fname = lha->opt_sconv;
570 	} else {
571 		lha->sconv_dir = NULL;
572 		lha->sconv_fname = NULL;
573 	}
574 
575 	switch (p[H_LEVEL_OFFSET]) {
576 	case 0:
577 		err = lha_read_file_header_0(a, lha);
578 		break;
579 	case 1:
580 		err = lha_read_file_header_1(a, lha);
581 		break;
582 	case 2:
583 		err = lha_read_file_header_2(a, lha);
584 		break;
585 	case 3:
586 		err = lha_read_file_header_3(a, lha);
587 		break;
588 	default:
589 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
590 		    "Unsupported LHa header level %d", p[H_LEVEL_OFFSET]);
591 		err = ARCHIVE_FATAL;
592 		break;
593 	}
594 	if (err < ARCHIVE_WARN)
595 		return (err);
596 
597 
598 	if (!lha->directory && archive_strlen(&lha->filename) == 0)
599 		/* The filename has not been set */
600 		return (truncated_error(a));
601 
602 	/*
603 	 * Make a pathname from a dirname and a filename, after converting to Unicode.
604 	 * This is because codepages might differ between dirname and filename.
605 	*/
606 	archive_string_init(&pathname);
607 	archive_string_init(&linkname);
608 	archive_string_init(&conv_buffer.aes_mbs);
609 	archive_string_init(&conv_buffer.aes_mbs_in_locale);
610 	archive_string_init(&conv_buffer.aes_utf8);
611 	archive_string_init(&conv_buffer.aes_wcs);
612 	if (0 != archive_mstring_copy_mbs_len_l(&conv_buffer, lha->dirname.s, lha->dirname.length, lha->sconv_dir)) {
613 		archive_set_error(&a->archive,
614 			ARCHIVE_ERRNO_FILE_FORMAT,
615 			"Pathname cannot be converted "
616 			"from %s to Unicode",
617 			archive_string_conversion_charset_name(lha->sconv_dir));
618 		err = ARCHIVE_FATAL;
619 	} else if (0 != archive_mstring_get_wcs(&a->archive, &conv_buffer, &conv_buffer_p))
620 		err = ARCHIVE_FATAL;
621 	if (err == ARCHIVE_FATAL) {
622 		archive_mstring_clean(&conv_buffer);
623 		archive_wstring_free(&pathname);
624 		archive_wstring_free(&linkname);
625 		return (err);
626 	}
627 	archive_wstring_copy(&pathname, &conv_buffer.aes_wcs);
628 
629 	archive_string_empty(&conv_buffer.aes_mbs);
630 	archive_string_empty(&conv_buffer.aes_mbs_in_locale);
631 	archive_string_empty(&conv_buffer.aes_utf8);
632 	archive_wstring_empty(&conv_buffer.aes_wcs);
633 	if (0 != archive_mstring_copy_mbs_len_l(&conv_buffer, lha->filename.s, lha->filename.length, lha->sconv_fname)) {
634 		archive_set_error(&a->archive,
635 			ARCHIVE_ERRNO_FILE_FORMAT,
636 			"Pathname cannot be converted "
637 			"from %s to Unicode",
638 			archive_string_conversion_charset_name(lha->sconv_fname));
639 		err = ARCHIVE_FATAL;
640 	}
641 	else if (0 != archive_mstring_get_wcs(&a->archive, &conv_buffer, &conv_buffer_p))
642 		err = ARCHIVE_FATAL;
643 	if (err == ARCHIVE_FATAL) {
644 		archive_mstring_clean(&conv_buffer);
645 		archive_wstring_free(&pathname);
646 		archive_wstring_free(&linkname);
647 		return (err);
648 	}
649 	archive_wstring_concat(&pathname, &conv_buffer.aes_wcs);
650 	archive_mstring_clean(&conv_buffer);
651 
652 	if ((lha->mode & AE_IFMT) == AE_IFLNK) {
653 		/*
654 	 	 * Extract the symlink-name if it's included in the pathname.
655 	 	 */
656 		if (!lha_parse_linkname(&linkname, &pathname)) {
657 			/* We couldn't get the symlink-name. */
658 			archive_set_error(&a->archive,
659 		    	    ARCHIVE_ERRNO_FILE_FORMAT,
660 			    "Unknown symlink-name");
661 			archive_wstring_free(&pathname);
662 			archive_wstring_free(&linkname);
663 			return (ARCHIVE_FAILED);
664 		}
665 	} else {
666 		/*
667 		 * Make sure a file-type is set.
668 		 * The mode has been overridden if it is in the extended data.
669 		 */
670 		lha->mode = (lha->mode & ~AE_IFMT) |
671 		    ((lha->directory)? AE_IFDIR: AE_IFREG);
672 	}
673 	if ((lha->setflag & UNIX_MODE_IS_SET) == 0 &&
674 	    (lha->dos_attr & 1) != 0)
675 		lha->mode &= ~(0222);/* read only. */
676 
677 	/*
678 	 * Set basic file parameters.
679 	 */
680 	archive_entry_copy_pathname_w(entry, pathname.s);
681 	archive_wstring_free(&pathname);
682 	if (archive_strlen(&linkname) > 0) {
683 		archive_entry_copy_symlink_w(entry, linkname.s);
684 	} else
685 		archive_entry_set_symlink(entry, NULL);
686 	archive_wstring_free(&linkname);
687 	/*
688 	 * When a header level is 0, there is a possibility that
689 	 * a pathname and a symlink has '\' character, a directory
690 	 * separator in DOS/Windows. So we should convert it to '/'.
691 	 */
692 	if (lha->level == 0)
693 		lha_replace_path_separator(lha, entry);
694 
695 	archive_entry_set_mode(entry, lha->mode);
696 	archive_entry_set_uid(entry, lha->uid);
697 	archive_entry_set_gid(entry, lha->gid);
698 	if (archive_strlen(&lha->uname) > 0)
699 		archive_entry_set_uname(entry, lha->uname.s);
700 	if (archive_strlen(&lha->gname) > 0)
701 		archive_entry_set_gname(entry, lha->gname.s);
702 	if (lha->setflag & BIRTHTIME_IS_SET) {
703 		archive_entry_set_birthtime(entry, lha->birthtime,
704 		    lha->birthtime_tv_nsec);
705 		archive_entry_set_ctime(entry, lha->birthtime,
706 		    lha->birthtime_tv_nsec);
707 	} else {
708 		archive_entry_unset_birthtime(entry);
709 		archive_entry_unset_ctime(entry);
710 	}
711 	archive_entry_set_mtime(entry, lha->mtime, lha->mtime_tv_nsec);
712 	if (lha->setflag & ATIME_IS_SET)
713 		archive_entry_set_atime(entry, lha->atime,
714 		    lha->atime_tv_nsec);
715 	else
716 		archive_entry_unset_atime(entry);
717 	if (lha->directory || archive_entry_symlink(entry) != NULL)
718 		archive_entry_unset_size(entry);
719 	else
720 		archive_entry_set_size(entry, lha->origsize);
721 
722 	/*
723 	 * Prepare variables used to read a file content.
724 	 */
725 	lha->entry_bytes_remaining = lha->compsize;
726 	if (lha->entry_bytes_remaining < 0) {
727 		archive_set_error(&a->archive,
728 		    ARCHIVE_ERRNO_FILE_FORMAT,
729 		    "Invalid LHa entry size");
730 		return (ARCHIVE_FATAL);
731 	}
732 	lha->entry_offset = 0;
733 	lha->entry_crc_calculated = 0;
734 
735 	/*
736 	 * This file does not have a content.
737 	 */
738 	if (lha->directory || lha->compsize == 0)
739 		lha->end_of_entry = 1;
740 
741 	snprintf(lha->format_name, sizeof(lha->format_name), "lha -%c%c%c-",
742 	    lha->method[0], lha->method[1], lha->method[2]);
743 	a->archive.archive_format_name = lha->format_name;
744 
745 	return (err);
746 }
747 
748 /*
749  * Replace a DOS path separator '\' by a character '/'.
750  * Some multi-byte character set have  a character '\' in its second byte.
751  */
752 static void
lha_replace_path_separator(struct lha * lha,struct archive_entry * entry)753 lha_replace_path_separator(struct lha *lha, struct archive_entry *entry)
754 {
755 	const wchar_t *wp;
756 	size_t i;
757 
758 	if ((wp = archive_entry_pathname_w(entry)) != NULL) {
759 		archive_wstrcpy(&(lha->ws), wp);
760 		for (i = 0; i < archive_strlen(&(lha->ws)); i++) {
761 			if (lha->ws.s[i] == L'\\')
762 				lha->ws.s[i] = L'/';
763 		}
764 		archive_entry_copy_pathname_w(entry, lha->ws.s);
765 	}
766 
767 	if ((wp = archive_entry_symlink_w(entry)) != NULL) {
768 		archive_wstrcpy(&(lha->ws), wp);
769 		for (i = 0; i < archive_strlen(&(lha->ws)); i++) {
770 			if (lha->ws.s[i] == L'\\')
771 				lha->ws.s[i] = L'/';
772 		}
773 		archive_entry_copy_symlink_w(entry, lha->ws.s);
774 	}
775 }
776 
777 /*
778  * Header 0 format
779  *
780  * +0              +1         +2               +7                  +11
781  * +---------------+----------+----------------+-------------------+
782  * |header size(*1)|header sum|compression type|compressed size(*2)|
783  * +---------------+----------+----------------+-------------------+
784  *                             <---------------------(*1)----------*
785  *
786  * +11               +15       +17       +19            +20              +21
787  * +-----------------+---------+---------+--------------+----------------+
788  * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=0)|
789  * +-----------------+---------+---------+--------------+----------------+
790  * *--------------------------------(*1)---------------------------------*
791  *
792  * +21             +22       +22+(*3)   +22+(*3)+2       +22+(*3)+2+(*4)
793  * +---------------+---------+----------+----------------+------------------+
794  * |name length(*3)|file name|file CRC16|extra header(*4)|  compressed data |
795  * +---------------+---------+----------+----------------+------------------+
796  *                  <--(*3)->                             <------(*2)------>
797  * *----------------------(*1)-------------------------->
798  *
799  */
800 #define H0_HEADER_SIZE_OFFSET	0
801 #define H0_HEADER_SUM_OFFSET	1
802 #define H0_COMP_SIZE_OFFSET	7
803 #define H0_ORIG_SIZE_OFFSET	11
804 #define H0_DOS_TIME_OFFSET	15
805 #define H0_NAME_LEN_OFFSET	21
806 #define H0_FILE_NAME_OFFSET	22
807 #define H0_FIXED_SIZE		24
808 static int
lha_read_file_header_0(struct archive_read * a,struct lha * lha)809 lha_read_file_header_0(struct archive_read *a, struct lha *lha)
810 {
811 	const unsigned char *p;
812 	int extdsize, namelen;
813 	unsigned char headersum, sum_calculated;
814 
815 	if ((p = __archive_read_ahead(a, H0_FIXED_SIZE, NULL)) == NULL)
816 		return (truncated_error(a));
817 	lha->header_size = p[H0_HEADER_SIZE_OFFSET] + 2;
818 	headersum = p[H0_HEADER_SUM_OFFSET];
819 	lha->compsize = archive_le32dec(p + H0_COMP_SIZE_OFFSET);
820 	lha->origsize = archive_le32dec(p + H0_ORIG_SIZE_OFFSET);
821 	lha->mtime = dos_to_unix(archive_le32dec(p + H0_DOS_TIME_OFFSET));
822 	namelen = p[H0_NAME_LEN_OFFSET];
823 	extdsize = (int)lha->header_size - H0_FIXED_SIZE - namelen;
824 	if ((namelen > 221 || extdsize < 0) && extdsize != -2) {
825 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
826 		    "Invalid LHa header");
827 		return (ARCHIVE_FATAL);
828 	}
829 	if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL)
830 		return (truncated_error(a));
831 
832 	archive_strncpy(&lha->filename, p + H0_FILE_NAME_OFFSET, namelen);
833 	/* When extdsize == -2, A CRC16 value is not present in the header. */
834 	if (extdsize >= 0) {
835 		lha->crc = archive_le16dec(p + H0_FILE_NAME_OFFSET + namelen);
836 		lha->setflag |= CRC_IS_SET;
837 	}
838 	sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2);
839 
840 	/* Read an extended header */
841 	if (extdsize > 0) {
842 		/* This extended data is set by 'LHa for UNIX' only.
843 		 * Maybe fixed size.
844 		 */
845 		p += H0_FILE_NAME_OFFSET + namelen + 2;
846 		if (p[0] == 'U' && extdsize == 12) {
847 			/* p[1] is a minor version. */
848 			lha->mtime = archive_le32dec(&p[2]);
849 			lha->mode = archive_le16dec(&p[6]);
850 			lha->uid = archive_le16dec(&p[8]);
851 			lha->gid = archive_le16dec(&p[10]);
852 			lha->setflag |= UNIX_MODE_IS_SET;
853 		}
854 	}
855 	__archive_read_consume(a, lha->header_size);
856 
857 	if (sum_calculated != headersum) {
858 		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
859 		    "LHa header sum error");
860 		return (ARCHIVE_FATAL);
861 	}
862 
863 	return (ARCHIVE_OK);
864 }
865 
866 /*
867  * Header 1 format
868  *
869  * +0              +1         +2               +7            +11
870  * +---------------+----------+----------------+-------------+
871  * |header size(*1)|header sum|compression type|skip size(*2)|
872  * +---------------+----------+----------------+-------------+
873  *                             <---------------(*1)----------*
874  *
875  * +11               +15       +17       +19            +20              +21
876  * +-----------------+---------+---------+--------------+----------------+
877  * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=1)|
878  * +-----------------+---------+---------+--------------+----------------+
879  * *-------------------------------(*1)----------------------------------*
880  *
881  * +21             +22       +22+(*3)   +22+(*3)+2  +22+(*3)+3  +22+(*3)+3+(*4)
882  * +---------------+---------+----------+-----------+-----------+
883  * |name length(*3)|file name|file CRC16|  creator  |padding(*4)|
884  * +---------------+---------+----------+-----------+-----------+
885  *                  <--(*3)->
886  * *----------------------------(*1)----------------------------*
887  *
888  * +22+(*3)+3+(*4)  +22+(*3)+3+(*4)+2     +22+(*3)+3+(*4)+2+(*5)
889  * +----------------+---------------------+------------------------+
890  * |next header size| extended header(*5) |     compressed data    |
891  * +----------------+---------------------+------------------------+
892  * *------(*1)-----> <--------------------(*2)-------------------->
893  */
894 #define H1_HEADER_SIZE_OFFSET	0
895 #define H1_HEADER_SUM_OFFSET	1
896 #define H1_COMP_SIZE_OFFSET	7
897 #define H1_ORIG_SIZE_OFFSET	11
898 #define H1_DOS_TIME_OFFSET	15
899 #define H1_NAME_LEN_OFFSET	21
900 #define H1_FILE_NAME_OFFSET	22
901 #define H1_FIXED_SIZE		27
902 static int
lha_read_file_header_1(struct archive_read * a,struct lha * lha)903 lha_read_file_header_1(struct archive_read *a, struct lha *lha)
904 {
905 	const unsigned char *p;
906 	size_t extdsize;
907 	int i, err, err2;
908 	int namelen, padding;
909 	unsigned char headersum, sum_calculated;
910 
911 	err = ARCHIVE_OK;
912 
913 	if ((p = __archive_read_ahead(a, H1_FIXED_SIZE, NULL)) == NULL)
914 		return (truncated_error(a));
915 
916 	lha->header_size = p[H1_HEADER_SIZE_OFFSET] + 2;
917 	headersum = p[H1_HEADER_SUM_OFFSET];
918 	/* Note: An extended header size is included in a compsize. */
919 	lha->compsize = archive_le32dec(p + H1_COMP_SIZE_OFFSET);
920 	lha->origsize = archive_le32dec(p + H1_ORIG_SIZE_OFFSET);
921 	lha->mtime = dos_to_unix(archive_le32dec(p + H1_DOS_TIME_OFFSET));
922 	namelen = p[H1_NAME_LEN_OFFSET];
923 	/* Calculate a padding size. The result will be normally 0 only(?) */
924 	padding = ((int)lha->header_size) - H1_FIXED_SIZE - namelen;
925 
926 	if (namelen > 230 || padding < 0)
927 		goto invalid;
928 
929 	if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL)
930 		return (truncated_error(a));
931 
932 	for (i = 0; i < namelen; i++) {
933 		if (p[i + H1_FILE_NAME_OFFSET] == 0xff)
934 			goto invalid;/* Invalid filename. */
935 	}
936 	archive_strncpy(&lha->filename, p + H1_FILE_NAME_OFFSET, namelen);
937 	lha->crc = archive_le16dec(p + H1_FILE_NAME_OFFSET + namelen);
938 	lha->setflag |= CRC_IS_SET;
939 
940 	sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2);
941 	/* Consume used bytes but not include `next header size' data
942 	 * since it will be consumed in lha_read_file_extended_header(). */
943 	__archive_read_consume(a, lha->header_size - 2);
944 
945 	/* Read extended headers */
946 	err2 = lha_read_file_extended_header(a, lha, NULL, 2,
947 	    (uint64_t)(lha->compsize + 2), &extdsize);
948 	if (err2 < ARCHIVE_WARN)
949 		return (err2);
950 	if (err2 < err)
951 		err = err2;
952 	/* Get a real compressed file size. */
953 	lha->compsize -= extdsize - 2;
954 
955 	if (lha->compsize < 0)
956 		goto invalid;	/* Invalid compressed file size */
957 
958 	if (sum_calculated != headersum) {
959 		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
960 		    "LHa header sum error");
961 		return (ARCHIVE_FATAL);
962 	}
963 	return (err);
964 invalid:
965 	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
966 	    "Invalid LHa header");
967 	return (ARCHIVE_FATAL);
968 }
969 
970 /*
971  * Header 2 format
972  *
973  * +0              +2               +7                  +11               +15
974  * +---------------+----------------+-------------------+-----------------+
975  * |header size(*1)|compression type|compressed size(*2)|uncompressed size|
976  * +---------------+----------------+-------------------+-----------------+
977  *  <--------------------------------(*1)---------------------------------*
978  *
979  * +15               +19          +20              +21        +23         +24
980  * +-----------------+------------+----------------+----------+-----------+
981  * |data/time(time_t)| 0x20 fixed |header level(=2)|file CRC16|  creator  |
982  * +-----------------+------------+----------------+----------+-----------+
983  * *---------------------------------(*1)---------------------------------*
984  *
985  * +24              +26                 +26+(*3)      +26+(*3)+(*4)
986  * +----------------+-------------------+-------------+-------------------+
987  * |next header size|extended header(*3)| padding(*4) |  compressed data  |
988  * +----------------+-------------------+-------------+-------------------+
989  * *--------------------------(*1)-------------------> <------(*2)------->
990  *
991  */
992 #define H2_HEADER_SIZE_OFFSET	0
993 #define H2_COMP_SIZE_OFFSET	7
994 #define H2_ORIG_SIZE_OFFSET	11
995 #define H2_TIME_OFFSET		15
996 #define H2_CRC_OFFSET		21
997 #define H2_FIXED_SIZE		24
998 static int
lha_read_file_header_2(struct archive_read * a,struct lha * lha)999 lha_read_file_header_2(struct archive_read *a, struct lha *lha)
1000 {
1001 	const unsigned char *p;
1002 	size_t extdsize;
1003 	int err, padding;
1004 	uint16_t header_crc;
1005 
1006 	if ((p = __archive_read_ahead(a, H2_FIXED_SIZE, NULL)) == NULL)
1007 		return (truncated_error(a));
1008 
1009 	lha->header_size =archive_le16dec(p + H2_HEADER_SIZE_OFFSET);
1010 	lha->compsize = archive_le32dec(p + H2_COMP_SIZE_OFFSET);
1011 	lha->origsize = archive_le32dec(p + H2_ORIG_SIZE_OFFSET);
1012 	lha->mtime = archive_le32dec(p + H2_TIME_OFFSET);
1013 	lha->crc = archive_le16dec(p + H2_CRC_OFFSET);
1014 	lha->setflag |= CRC_IS_SET;
1015 
1016 	if (lha->header_size < H2_FIXED_SIZE) {
1017 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1018 		    "Invalid LHa header size");
1019 		return (ARCHIVE_FATAL);
1020 	}
1021 
1022 	header_crc = lha_crc16(0, p, H2_FIXED_SIZE);
1023 	__archive_read_consume(a, H2_FIXED_SIZE);
1024 
1025 	/* Read extended headers */
1026 	err = lha_read_file_extended_header(a, lha, &header_crc, 2,
1027 		  lha->header_size - H2_FIXED_SIZE, &extdsize);
1028 	if (err < ARCHIVE_WARN)
1029 		return (err);
1030 
1031 	/* Calculate a padding size. The result will be normally 0 or 1. */
1032 	padding = (int)lha->header_size - (int)(H2_FIXED_SIZE + extdsize);
1033 	if (padding > 0) {
1034 		if ((p = __archive_read_ahead(a, padding, NULL)) == NULL)
1035 			return (truncated_error(a));
1036 		header_crc = lha_crc16(header_crc, p, padding);
1037 		__archive_read_consume(a, padding);
1038 	}
1039 
1040 	if (header_crc != lha->header_crc) {
1041 #ifndef DONT_FAIL_ON_CRC_ERROR
1042 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1043 		    "LHa header CRC error");
1044 		return (ARCHIVE_FATAL);
1045 #endif
1046 	}
1047 	return (err);
1048 }
1049 
1050 /*
1051  * Header 3 format
1052  *
1053  * +0           +2               +7                  +11               +15
1054  * +------------+----------------+-------------------+-----------------+
1055  * | 0x04 fixed |compression type|compressed size(*2)|uncompressed size|
1056  * +------------+----------------+-------------------+-----------------+
1057  *  <-------------------------------(*1)-------------------------------*
1058  *
1059  * +15               +19          +20              +21        +23         +24
1060  * +-----------------+------------+----------------+----------+-----------+
1061  * |date/time(time_t)| 0x20 fixed |header level(=3)|file CRC16|  creator  |
1062  * +-----------------+------------+----------------+----------+-----------+
1063  * *--------------------------------(*1)----------------------------------*
1064  *
1065  * +24             +28              +32                 +32+(*3)
1066  * +---------------+----------------+-------------------+-----------------+
1067  * |header size(*1)|next header size|extended header(*3)| compressed data |
1068  * +---------------+----------------+-------------------+-----------------+
1069  * *------------------------(*1)-----------------------> <------(*2)----->
1070  *
1071  */
1072 #define H3_FIELD_LEN_OFFSET	0
1073 #define H3_COMP_SIZE_OFFSET	7
1074 #define H3_ORIG_SIZE_OFFSET	11
1075 #define H3_TIME_OFFSET		15
1076 #define H3_CRC_OFFSET		21
1077 #define H3_HEADER_SIZE_OFFSET	24
1078 #define H3_FIXED_SIZE		28
1079 static int
lha_read_file_header_3(struct archive_read * a,struct lha * lha)1080 lha_read_file_header_3(struct archive_read *a, struct lha *lha)
1081 {
1082 	const unsigned char *p;
1083 	size_t extdsize;
1084 	int err;
1085 	uint16_t header_crc;
1086 
1087 	if ((p = __archive_read_ahead(a, H3_FIXED_SIZE, NULL)) == NULL)
1088 		return (truncated_error(a));
1089 
1090 	if (archive_le16dec(p + H3_FIELD_LEN_OFFSET) != 4)
1091 		goto invalid;
1092 	lha->header_size = archive_le32dec(p + H3_HEADER_SIZE_OFFSET);
1093 	lha->compsize = archive_le32dec(p + H3_COMP_SIZE_OFFSET);
1094 	lha->origsize = archive_le32dec(p + H3_ORIG_SIZE_OFFSET);
1095 	lha->mtime = archive_le32dec(p + H3_TIME_OFFSET);
1096 	lha->crc = archive_le16dec(p + H3_CRC_OFFSET);
1097 	lha->setflag |= CRC_IS_SET;
1098 
1099 	if (lha->header_size < H3_FIXED_SIZE + 4)
1100 		goto invalid;
1101 	header_crc = lha_crc16(0, p, H3_FIXED_SIZE);
1102 	__archive_read_consume(a, H3_FIXED_SIZE);
1103 
1104 	/* Reject rediculously large header */
1105 	if (lha->header_size > 65536) {
1106 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1107 		    "LHa header size too large");
1108 		return (ARCHIVE_FATAL);
1109 	}
1110 
1111 	/* Read extended headers */
1112 	err = lha_read_file_extended_header(a, lha, &header_crc, 4,
1113 		  lha->header_size - H3_FIXED_SIZE, &extdsize);
1114 	if (err < ARCHIVE_WARN)
1115 		return (err);
1116 
1117 	if (header_crc != lha->header_crc) {
1118 #ifndef DONT_FAIL_ON_CRC_ERROR
1119 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1120 		    "LHa header CRC error");
1121 		return (ARCHIVE_FATAL);
1122 #endif
1123 	}
1124 	return (err);
1125 invalid:
1126 	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1127 	    "Invalid LHa header");
1128 	return (ARCHIVE_FATAL);
1129 }
1130 
1131 /*
1132  * Extended header format
1133  *
1134  * +0             +2        +3  -- used in header 1 and 2
1135  * +0             +4        +5  -- used in header 3
1136  * +--------------+---------+-------------------+--------------+--
1137  * |ex-header size|header id|        data       |ex-header size| .......
1138  * +--------------+---------+-------------------+--------------+--
1139  *  <-------------( ex-header size)------------> <-- next extended header --*
1140  *
1141  * If the ex-header size is zero, it is the make of the end of extended
1142  * headers.
1143  *
1144  */
1145 static int
lha_read_file_extended_header(struct archive_read * a,struct lha * lha,uint16_t * crc,int sizefield_length,uint64_t limitsize,size_t * total_size)1146 lha_read_file_extended_header(struct archive_read *a, struct lha *lha,
1147     uint16_t *crc, int sizefield_length, uint64_t limitsize, size_t *total_size)
1148 {
1149 	const void *h;
1150 	const unsigned char *extdheader;
1151 	size_t	extdsize;
1152 	size_t	datasize;
1153 	unsigned int i;
1154 	unsigned char extdtype;
1155 
1156 #define EXT_HEADER_CRC		0x00		/* Header CRC and information*/
1157 #define EXT_FILENAME		0x01		/* Filename 		    */
1158 #define EXT_DIRECTORY		0x02		/* Directory name	    */
1159 #define EXT_DOS_ATTR		0x40		/* MS-DOS attribute	    */
1160 #define EXT_TIMESTAMP		0x41		/* Windows time stamp	    */
1161 #define EXT_FILESIZE		0x42		/* Large file size	    */
1162 #define EXT_TIMEZONE		0x43		/* Time zone		    */
1163 #define EXT_UTF16_FILENAME	0x44		/* UTF-16 filename 	    */
1164 #define EXT_UTF16_DIRECTORY	0x45		/* UTF-16 directory name    */
1165 #define EXT_CODEPAGE		0x46		/* Codepage		    */
1166 #define EXT_UNIX_MODE		0x50		/* File permission	    */
1167 #define EXT_UNIX_GID_UID	0x51		/* gid,uid		    */
1168 #define EXT_UNIX_GNAME		0x52		/* Group name		    */
1169 #define EXT_UNIX_UNAME		0x53		/* User name		    */
1170 #define EXT_UNIX_MTIME		0x54		/* Modified time	    */
1171 #define EXT_OS2_NEW_ATTR	0x7f		/* new attribute(OS/2 only) */
1172 #define EXT_NEW_ATTR		0xff		/* new attribute	    */
1173 
1174 	*total_size = sizefield_length;
1175 
1176 	for (;;) {
1177 		/* Read an extended header size. */
1178 		if ((h =
1179 		    __archive_read_ahead(a, sizefield_length, NULL)) == NULL)
1180 			return (truncated_error(a));
1181 		/* Check if the size is the zero indicates the end of the
1182 		 * extended header. */
1183 		if (sizefield_length == sizeof(uint16_t))
1184 			extdsize = archive_le16dec(h);
1185 		else
1186 			extdsize = archive_le32dec(h);
1187 		if (extdsize == 0) {
1188 			/* End of extended header */
1189 			if (crc != NULL)
1190 				*crc = lha_crc16(*crc, h, sizefield_length);
1191 			__archive_read_consume(a, sizefield_length);
1192 			return (ARCHIVE_OK);
1193 		}
1194 
1195 		/* Sanity check to the extended header size. */
1196 		if (((uint64_t)*total_size + extdsize) > limitsize ||
1197 		    extdsize <= (size_t)sizefield_length)
1198 			goto invalid;
1199 
1200 		/* Read the extended header. */
1201 		if ((h = __archive_read_ahead(a, extdsize, NULL)) == NULL)
1202 			return (truncated_error(a));
1203 		*total_size += extdsize;
1204 
1205 		extdheader = (const unsigned char *)h;
1206 		/* Get the extended header type. */
1207 		extdtype = extdheader[sizefield_length];
1208 		/* Calculate an extended data size. */
1209 		datasize = extdsize - (1 + sizefield_length);
1210 		/* Skip an extended header size field and type field. */
1211 		extdheader += sizefield_length + 1;
1212 
1213 		if (crc != NULL && extdtype != EXT_HEADER_CRC)
1214 			*crc = lha_crc16(*crc, h, extdsize);
1215 		switch (extdtype) {
1216 		case EXT_HEADER_CRC:
1217 			/* We only use a header CRC. Following data will not
1218 			 * be used. */
1219 			if (datasize >= 2) {
1220 				lha->header_crc = archive_le16dec(extdheader);
1221 				if (crc != NULL) {
1222 					static const char zeros[2] = {0, 0};
1223 					*crc = lha_crc16(*crc, h,
1224 					    extdsize - datasize);
1225 					/* CRC value itself as zero */
1226 					*crc = lha_crc16(*crc, zeros, 2);
1227 					*crc = lha_crc16(*crc,
1228 					    extdheader+2, datasize - 2);
1229 				}
1230 			}
1231 			break;
1232 		case EXT_FILENAME:
1233 			if (datasize == 0) {
1234 				/* maybe directory header */
1235 				archive_string_empty(&lha->filename);
1236 				break;
1237 			}
1238 			if (extdheader[0] == '\0')
1239 				goto invalid;
1240 			archive_strncpy(&lha->filename,
1241 			    (const char *)extdheader, datasize);
1242 			break;
1243 		case EXT_UTF16_FILENAME:
1244 			if (datasize == 0) {
1245 				/* maybe directory header */
1246 				archive_string_empty(&lha->filename);
1247 				break;
1248 			} else if (datasize & 1) {
1249 				/* UTF-16 characters take always 2 or 4 bytes */
1250 				goto invalid;
1251 			}
1252 			if (extdheader[0] == '\0')
1253 				goto invalid;
1254 			archive_string_empty(&lha->filename);
1255 			archive_array_append(&lha->filename,
1256 				(const char *)extdheader, datasize);
1257 			/* Setup a string conversion for a filename. */
1258 			lha->sconv_fname =
1259 			    archive_string_conversion_from_charset(&a->archive,
1260 			        "UTF-16LE", 1);
1261 			if (lha->sconv_fname == NULL)
1262 				return (ARCHIVE_FATAL);
1263 			break;
1264 		case EXT_DIRECTORY:
1265 			if (datasize == 0 || extdheader[0] == '\0')
1266 				/* no directory name data. exit this case. */
1267 				goto invalid;
1268 
1269 			archive_strncpy(&lha->dirname,
1270 		  	    (const char *)extdheader, datasize);
1271 			/*
1272 			 * Convert directory delimiter from 0xFF
1273 			 * to '/' for local system.
1274 	 		 */
1275 			for (i = 0; i < lha->dirname.length; i++) {
1276 				if ((unsigned char)lha->dirname.s[i] == 0xFF)
1277 					lha->dirname.s[i] = '/';
1278 			}
1279 			/* Is last character directory separator? */
1280 			if (lha->dirname.s[lha->dirname.length-1] != '/')
1281 				/* invalid directory data */
1282 				goto invalid;
1283 			break;
1284 		case EXT_UTF16_DIRECTORY:
1285 			/* UTF-16 characters take always 2 or 4 bytes */
1286 			if (datasize == 0 || (datasize & 1) ||
1287 			    extdheader[0] == '\0') {
1288 				/* no directory name data. exit this case. */
1289 				goto invalid;
1290 			}
1291 
1292 			archive_string_empty(&lha->dirname);
1293 			archive_array_append(&lha->dirname,
1294 				(const char *)extdheader, datasize);
1295 			lha->sconv_dir =
1296 			    archive_string_conversion_from_charset(&a->archive,
1297 			        "UTF-16LE", 1);
1298 			if (lha->sconv_dir == NULL)
1299 				return (ARCHIVE_FATAL);
1300 			else {
1301 				/*
1302 				 * Convert directory delimiter from 0xFFFF
1303 				 * to '/' for local system.
1304 				 */
1305 				uint16_t dirSep;
1306 				uint16_t d = 1;
1307 				if (archive_be16dec(&d) == 1)
1308 					dirSep = 0x2F00;
1309 				else
1310 					dirSep = 0x002F;
1311 
1312 				/* UTF-16LE character */
1313 				uint16_t *utf16name =
1314 				    (uint16_t *)lha->dirname.s;
1315 				for (i = 0; i < lha->dirname.length / 2; i++) {
1316 					if (utf16name[i] == 0xFFFF) {
1317 						utf16name[i] = dirSep;
1318 					}
1319 				}
1320 				/* Is last character directory separator? */
1321 				if (utf16name[lha->dirname.length / 2 - 1] !=
1322 				    dirSep) {
1323 					/* invalid directory data */
1324 					goto invalid;
1325 				}
1326 			}
1327 			break;
1328 		case EXT_DOS_ATTR:
1329 			if (datasize == 2)
1330 				lha->dos_attr = (unsigned char)
1331 				    (archive_le16dec(extdheader) & 0xff);
1332 			break;
1333 		case EXT_TIMESTAMP:
1334 			if (datasize == (sizeof(uint64_t) * 3)) {
1335 				ntfs_to_unix(archive_le64dec(extdheader),
1336 					&lha->birthtime,
1337 				    &lha->birthtime_tv_nsec);
1338 				extdheader += sizeof(uint64_t);
1339 				ntfs_to_unix(archive_le64dec(extdheader),
1340 					&lha->mtime,
1341 				    &lha->mtime_tv_nsec);
1342 				extdheader += sizeof(uint64_t);
1343 				ntfs_to_unix(archive_le64dec(extdheader),
1344 					&lha->atime,
1345 				    &lha->atime_tv_nsec);
1346 				lha->setflag |= BIRTHTIME_IS_SET |
1347 				    ATIME_IS_SET;
1348 			}
1349 			break;
1350 		case EXT_FILESIZE:
1351 			if (datasize == sizeof(uint64_t) * 2) {
1352 				lha->compsize = archive_le64dec(extdheader);
1353 				extdheader += sizeof(uint64_t);
1354 				lha->origsize = archive_le64dec(extdheader);
1355 				if (lha->compsize < 0 || lha->origsize < 0)
1356 					goto invalid;
1357 			}
1358 			break;
1359 		case EXT_CODEPAGE:
1360 			/* Get an archived filename charset from codepage.
1361 			 * This overwrites the charset specified by
1362 			 * hdrcharset option. */
1363 			if (datasize == sizeof(uint32_t)) {
1364 				struct archive_string cp;
1365 				const char *charset;
1366 
1367 				archive_string_init(&cp);
1368 				switch (archive_le32dec(extdheader)) {
1369 				case 65001: /* UTF-8 */
1370 					charset = "UTF-8";
1371 					break;
1372 				default:
1373 					archive_string_sprintf(&cp, "CP%d",
1374 					    (int)archive_le32dec(extdheader));
1375 					charset = cp.s;
1376 					break;
1377 				}
1378 				lha->sconv_dir =
1379 				    archive_string_conversion_from_charset(
1380 					&(a->archive), charset, 1);
1381 				lha->sconv_fname =
1382 				    archive_string_conversion_from_charset(
1383 					&(a->archive), charset, 1);
1384 				archive_string_free(&cp);
1385 				if (lha->sconv_dir == NULL)
1386 					return (ARCHIVE_FATAL);
1387 				if (lha->sconv_fname == NULL)
1388 					return (ARCHIVE_FATAL);
1389 			}
1390 			break;
1391 		case EXT_UNIX_MODE:
1392 			if (datasize == sizeof(uint16_t)) {
1393 				lha->mode = archive_le16dec(extdheader);
1394 				lha->setflag |= UNIX_MODE_IS_SET;
1395 			}
1396 			break;
1397 		case EXT_UNIX_GID_UID:
1398 			if (datasize == (sizeof(uint16_t) * 2)) {
1399 				lha->gid = archive_le16dec(extdheader);
1400 				lha->uid = archive_le16dec(extdheader+2);
1401 			}
1402 			break;
1403 		case EXT_UNIX_GNAME:
1404 			if (datasize > 0)
1405 				archive_strncpy(&lha->gname,
1406 				    (const char *)extdheader, datasize);
1407 			break;
1408 		case EXT_UNIX_UNAME:
1409 			if (datasize > 0)
1410 				archive_strncpy(&lha->uname,
1411 				    (const char *)extdheader, datasize);
1412 			break;
1413 		case EXT_UNIX_MTIME:
1414 			if (datasize == sizeof(uint32_t))
1415 				lha->mtime = archive_le32dec(extdheader);
1416 			break;
1417 		case EXT_OS2_NEW_ATTR:
1418 			/* This extended header is OS/2 depend. */
1419 			if (datasize == 16) {
1420 				lha->dos_attr = (unsigned char)
1421 				    (archive_le16dec(extdheader) & 0xff);
1422 				lha->mode = archive_le16dec(extdheader+2);
1423 				lha->gid = archive_le16dec(extdheader+4);
1424 				lha->uid = archive_le16dec(extdheader+6);
1425 				lha->birthtime = archive_le32dec(extdheader+8);
1426 				lha->atime = archive_le32dec(extdheader+12);
1427 				lha->setflag |= UNIX_MODE_IS_SET
1428 				    | BIRTHTIME_IS_SET | ATIME_IS_SET;
1429 			}
1430 			break;
1431 		case EXT_NEW_ATTR:
1432 			if (datasize == 20) {
1433 				lha->mode = (mode_t)archive_le32dec(extdheader);
1434 				lha->gid = archive_le32dec(extdheader+4);
1435 				lha->uid = archive_le32dec(extdheader+8);
1436 				lha->birthtime = archive_le32dec(extdheader+12);
1437 				lha->atime = archive_le32dec(extdheader+16);
1438 				lha->setflag |= UNIX_MODE_IS_SET
1439 				    | BIRTHTIME_IS_SET | ATIME_IS_SET;
1440 			}
1441 			break;
1442 		case EXT_TIMEZONE:		/* Not supported */
1443 			break;
1444 		default:
1445 			break;
1446 		}
1447 
1448 		__archive_read_consume(a, extdsize);
1449 	}
1450 invalid:
1451 	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1452 	    "Invalid extended LHa header");
1453 	return (ARCHIVE_FATAL);
1454 }
1455 
1456 static int
lha_end_of_entry(struct archive_read * a)1457 lha_end_of_entry(struct archive_read *a)
1458 {
1459 	struct lha *lha = (struct lha *)(a->format->data);
1460 	int r = ARCHIVE_EOF;
1461 
1462 	if (!lha->end_of_entry_cleanup) {
1463 		if ((lha->setflag & CRC_IS_SET) &&
1464 		    lha->crc != lha->entry_crc_calculated) {
1465 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1466 			    "LHa data CRC error");
1467 			r = ARCHIVE_WARN;
1468 		}
1469 
1470 		/* End-of-entry cleanup done. */
1471 		lha->end_of_entry_cleanup = 1;
1472 	}
1473 	return (r);
1474 }
1475 
1476 static int
archive_read_format_lha_read_data(struct archive_read * a,const void ** buff,size_t * size,int64_t * offset)1477 archive_read_format_lha_read_data(struct archive_read *a,
1478     const void **buff, size_t *size, int64_t *offset)
1479 {
1480 	struct lha *lha = (struct lha *)(a->format->data);
1481 	int r;
1482 
1483 	if (lha->entry_unconsumed) {
1484 		/* Consume as much as the decompressor actually used. */
1485 		__archive_read_consume(a, lha->entry_unconsumed);
1486 		lha->entry_unconsumed = 0;
1487 	}
1488 	if (lha->end_of_entry) {
1489 		*offset = lha->entry_offset;
1490 		*size = 0;
1491 		*buff = NULL;
1492 		return (lha_end_of_entry(a));
1493 	}
1494 
1495 	if (lha->entry_is_compressed)
1496 		r =  lha_read_data_lzh(a, buff, size, offset);
1497 	else
1498 		/* No compression. */
1499 		r =  lha_read_data_none(a, buff, size, offset);
1500 	return (r);
1501 }
1502 
1503 /*
1504  * Read a file content in no compression.
1505  *
1506  * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets
1507  * lha->end_of_entry if it consumes all of the data.
1508  */
1509 static int
lha_read_data_none(struct archive_read * a,const void ** buff,size_t * size,int64_t * offset)1510 lha_read_data_none(struct archive_read *a, const void **buff,
1511     size_t *size, int64_t *offset)
1512 {
1513 	struct lha *lha = (struct lha *)(a->format->data);
1514 	ssize_t bytes_avail;
1515 
1516 	if (lha->entry_bytes_remaining == 0) {
1517 		*buff = NULL;
1518 		*size = 0;
1519 		*offset = lha->entry_offset;
1520 		lha->end_of_entry = 1;
1521 		return (ARCHIVE_OK);
1522 	}
1523 	/*
1524 	 * Note: '1' here is a performance optimization.
1525 	 * Recall that the decompression layer returns a count of
1526 	 * available bytes; asking for more than that forces the
1527 	 * decompressor to combine reads by copying data.
1528 	 */
1529 	*buff = __archive_read_ahead(a, 1, &bytes_avail);
1530 	if (bytes_avail <= 0) {
1531 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1532 		    "Truncated LHa file data");
1533 		return (ARCHIVE_FATAL);
1534 	}
1535 	if (bytes_avail > lha->entry_bytes_remaining)
1536 		bytes_avail = (ssize_t)lha->entry_bytes_remaining;
1537 	lha->entry_crc_calculated =
1538 	    lha_crc16(lha->entry_crc_calculated, *buff, bytes_avail);
1539 	*size = bytes_avail;
1540 	*offset = lha->entry_offset;
1541 	lha->entry_offset += bytes_avail;
1542 	lha->entry_bytes_remaining -= bytes_avail;
1543 	if (lha->entry_bytes_remaining == 0)
1544 		lha->end_of_entry = 1;
1545 	lha->entry_unconsumed = bytes_avail;
1546 	return (ARCHIVE_OK);
1547 }
1548 
1549 /*
1550  * Read a file content in LZHUFF encoding.
1551  *
1552  * Returns ARCHIVE_OK if successful, returns ARCHIVE_WARN if compression is
1553  * unsupported, ARCHIVE_FATAL otherwise, sets lha->end_of_entry if it consumes
1554  * all of the data.
1555  */
1556 static int
lha_read_data_lzh(struct archive_read * a,const void ** buff,size_t * size,int64_t * offset)1557 lha_read_data_lzh(struct archive_read *a, const void **buff,
1558     size_t *size, int64_t *offset)
1559 {
1560 	struct lha *lha = (struct lha *)(a->format->data);
1561 	ssize_t bytes_avail;
1562 	int r;
1563 
1564 	/* If we haven't yet read any data, initialize the decompressor. */
1565 	if (!lha->decompress_init) {
1566 		r = lzh_decode_init(&(lha->strm), lha->method);
1567 		switch (r) {
1568 		case ARCHIVE_OK:
1569 			break;
1570 		case ARCHIVE_FAILED:
1571         		/* Unsupported compression. */
1572 			*buff = NULL;
1573 			*size = 0;
1574 			*offset = 0;
1575 			archive_set_error(&a->archive,
1576 			    ARCHIVE_ERRNO_FILE_FORMAT,
1577 			    "Unsupported lzh compression method -%c%c%c-",
1578 			    lha->method[0], lha->method[1], lha->method[2]);
1579 			/* We know compressed size; just skip it. */
1580 			archive_read_format_lha_read_data_skip(a);
1581 			return (ARCHIVE_WARN);
1582 		default:
1583 			archive_set_error(&a->archive, ENOMEM,
1584 			    "Couldn't allocate memory "
1585 			    "for lzh decompression");
1586 			return (ARCHIVE_FATAL);
1587 		}
1588 		/* We've initialized decompression for this stream. */
1589 		lha->decompress_init = 1;
1590 		lha->strm.avail_out = 0;
1591 		lha->strm.total_out = 0;
1592 	}
1593 
1594 	/*
1595 	 * Note: '1' here is a performance optimization.
1596 	 * Recall that the decompression layer returns a count of
1597 	 * available bytes; asking for more than that forces the
1598 	 * decompressor to combine reads by copying data.
1599 	 */
1600 	lha->strm.next_in = __archive_read_ahead(a, 1, &bytes_avail);
1601 	if (bytes_avail <= 0) {
1602 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1603 		    "Truncated LHa file body");
1604 		return (ARCHIVE_FATAL);
1605 	}
1606 	if (bytes_avail > lha->entry_bytes_remaining)
1607 		bytes_avail = (ssize_t)lha->entry_bytes_remaining;
1608 
1609 	lha->strm.avail_in = (int)bytes_avail;
1610 	lha->strm.total_in = 0;
1611 	lha->strm.avail_out = 0;
1612 
1613 	r = lzh_decode(&(lha->strm), bytes_avail == lha->entry_bytes_remaining);
1614 	switch (r) {
1615 	case ARCHIVE_OK:
1616 		break;
1617 	case ARCHIVE_EOF:
1618 		lha->end_of_entry = 1;
1619 		break;
1620 	default:
1621 		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1622 		    "Bad lzh data");
1623 		return (ARCHIVE_FAILED);
1624 	}
1625 	lha->entry_unconsumed = lha->strm.total_in;
1626 	lha->entry_bytes_remaining -= lha->strm.total_in;
1627 
1628 	if (lha->strm.avail_out) {
1629 		*offset = lha->entry_offset;
1630 		*size = lha->strm.avail_out;
1631 		*buff = lha->strm.ref_ptr;
1632 		lha->entry_crc_calculated =
1633 		    lha_crc16(lha->entry_crc_calculated, *buff, *size);
1634 		lha->entry_offset += *size;
1635 	} else {
1636 		*offset = lha->entry_offset;
1637 		*size = 0;
1638 		*buff = NULL;
1639 		if (lha->end_of_entry)
1640 			return (lha_end_of_entry(a));
1641 	}
1642 	return (ARCHIVE_OK);
1643 }
1644 
1645 /*
1646  * Skip a file content.
1647  */
1648 static int
archive_read_format_lha_read_data_skip(struct archive_read * a)1649 archive_read_format_lha_read_data_skip(struct archive_read *a)
1650 {
1651 	struct lha *lha;
1652 	int64_t bytes_skipped;
1653 
1654 	lha = (struct lha *)(a->format->data);
1655 
1656 	if (lha->entry_unconsumed) {
1657 		/* Consume as much as the decompressor actually used. */
1658 		__archive_read_consume(a, lha->entry_unconsumed);
1659 		lha->entry_unconsumed = 0;
1660 	}
1661 
1662 	/* if we've already read to end of data, we're done. */
1663 	if (lha->end_of_entry_cleanup)
1664 		return (ARCHIVE_OK);
1665 
1666 	/*
1667 	 * If the length is at the beginning, we can skip the
1668 	 * compressed data much more quickly.
1669 	 */
1670 	bytes_skipped = __archive_read_consume(a, lha->entry_bytes_remaining);
1671 	if (bytes_skipped < 0)
1672 		return (ARCHIVE_FATAL);
1673 
1674 	/* This entry is finished and done. */
1675 	lha->end_of_entry_cleanup = lha->end_of_entry = 1;
1676 	return (ARCHIVE_OK);
1677 }
1678 
1679 static int
archive_read_format_lha_cleanup(struct archive_read * a)1680 archive_read_format_lha_cleanup(struct archive_read *a)
1681 {
1682 	struct lha *lha = (struct lha *)(a->format->data);
1683 
1684 	lzh_decode_free(&(lha->strm));
1685 	archive_string_free(&(lha->dirname));
1686 	archive_string_free(&(lha->filename));
1687 	archive_string_free(&(lha->uname));
1688 	archive_string_free(&(lha->gname));
1689 	archive_wstring_free(&(lha->ws));
1690 	free(lha);
1691 	(a->format->data) = NULL;
1692 	return (ARCHIVE_OK);
1693 }
1694 
1695 /*
1696  * 'LHa for UNIX' utility has archived a symbolic-link name after
1697  * a pathname with '|' character.
1698  * This function extracts the symbolic-link name from the pathname.
1699  *
1700  * example.
1701  *   1. a symbolic-name is 'aaa/bb/cc'
1702  *   2. a filename is 'xxx/bbb'
1703  *  then an archived pathname is 'xxx/bbb|aaa/bb/cc'
1704  */
1705 static int
lha_parse_linkname(struct archive_wstring * linkname,struct archive_wstring * pathname)1706 lha_parse_linkname(struct archive_wstring *linkname,
1707     struct archive_wstring *pathname)
1708 {
1709 	wchar_t *	linkptr;
1710 	size_t 	symlen;
1711 
1712 	linkptr = wcschr(pathname->s, L'|');
1713 	if (linkptr != NULL) {
1714 		symlen = wcslen(linkptr + 1);
1715 		archive_wstrncpy(linkname, linkptr+1, symlen);
1716 
1717 		*linkptr = 0;
1718 		pathname->length = wcslen(pathname->s);
1719 
1720 		return (1);
1721 	}
1722 	return (0);
1723 }
1724 
1725 static unsigned char
lha_calcsum(unsigned char sum,const void * pp,int offset,size_t size)1726 lha_calcsum(unsigned char sum, const void *pp, int offset, size_t size)
1727 {
1728 	unsigned char const *p = (unsigned char const *)pp;
1729 
1730 	p += offset;
1731 	for (;size > 0; --size)
1732 		sum += *p++;
1733 	return (sum);
1734 }
1735 
1736 static uint16_t crc16tbl[2][256];
1737 static void
lha_crc16_init(void)1738 lha_crc16_init(void)
1739 {
1740 	unsigned int i;
1741 	static int crc16init = 0;
1742 
1743 	if (crc16init)
1744 		return;
1745 	crc16init = 1;
1746 
1747 	for (i = 0; i < 256; i++) {
1748 		unsigned int j;
1749 		uint16_t crc = (uint16_t)i;
1750 		for (j = 8; j; j--)
1751 			crc = (crc >> 1) ^ ((crc & 1) * 0xA001);
1752 		crc16tbl[0][i] = crc;
1753 	}
1754 
1755 	for (i = 0; i < 256; i++) {
1756 		crc16tbl[1][i] = (crc16tbl[0][i] >> 8)
1757 			^ crc16tbl[0][crc16tbl[0][i] & 0xff];
1758 	}
1759 }
1760 
1761 static uint16_t
lha_crc16(uint16_t crc,const void * pp,size_t len)1762 lha_crc16(uint16_t crc, const void *pp, size_t len)
1763 {
1764 	const unsigned char *p = (const unsigned char *)pp;
1765 	const uint16_t *buff;
1766 	const union {
1767 		uint32_t i;
1768 		char c[4];
1769 	} u = { 0x01020304 };
1770 
1771 	if (len == 0)
1772 		return crc;
1773 
1774 	/* Process unaligned address. */
1775 	if (((uintptr_t)p) & (uintptr_t)0x1) {
1776 		crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff];
1777 		len--;
1778 	}
1779 	buff = (const uint16_t *)p;
1780 	/*
1781 	 * Modern C compiler such as GCC does not unroll automatically yet
1782 	 * without unrolling pragma, and Clang is so. So we should
1783 	 * unroll this loop for its performance.
1784 	 */
1785 	for (;len >= 8; len -= 8) {
1786 		/* This if statement expects compiler optimization will
1787 		 * remove the statement which will not be executed. */
1788 #undef bswap16
1789 #ifndef __has_builtin
1790 #define __has_builtin(x) 0
1791 #endif
1792 #if defined(_MSC_VER) && _MSC_VER >= 1400  /* Visual Studio */
1793 #  define bswap16(x) _byteswap_ushort(x)
1794 #elif defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ > 4)
1795 /* GCC 4.8 and later has __builtin_bswap16() */
1796 #  define bswap16(x) __builtin_bswap16(x)
1797 #elif defined(__clang__) && __has_builtin(__builtin_bswap16)
1798 /* Newer clang versions have __builtin_bswap16() */
1799 #  define bswap16(x) __builtin_bswap16(x)
1800 #else
1801 #  define bswap16(x) ((((x) >> 8) & 0xff) | ((x) << 8))
1802 #endif
1803 #define CRC16W	do { 	\
1804 		if(u.c[0] == 1) { /* Big endian */		\
1805 			crc ^= bswap16(*buff); buff++;		\
1806 		} else						\
1807 			crc ^= *buff++;				\
1808 		crc = crc16tbl[1][crc & 0xff] ^ crc16tbl[0][crc >> 8];\
1809 } while (0)
1810 		CRC16W;
1811 		CRC16W;
1812 		CRC16W;
1813 		CRC16W;
1814 #undef CRC16W
1815 #undef bswap16
1816 	}
1817 
1818 	p = (const unsigned char *)buff;
1819 	for (;len; len--) {
1820 		crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff];
1821 	}
1822 	return crc;
1823 }
1824 
1825 /*
1826  * Initialize LZHUF decoder.
1827  *
1828  * Returns ARCHIVE_OK if initialization was successful.
1829  * Returns ARCHIVE_FAILED if method is unsupported.
1830  * Returns ARCHIVE_FATAL if initialization failed; memory allocation
1831  * error occurred.
1832  */
1833 static int
lzh_decode_init(struct lzh_stream * strm,const char * method)1834 lzh_decode_init(struct lzh_stream *strm, const char *method)
1835 {
1836 	struct lzh_dec *ds;
1837 	int w_bits, w_size;
1838 
1839 	if (strm->ds == NULL) {
1840 		strm->ds = calloc(1, sizeof(*strm->ds));
1841 		if (strm->ds == NULL)
1842 			return (ARCHIVE_FATAL);
1843 	}
1844 	ds = strm->ds;
1845 	ds->error = ARCHIVE_FAILED;
1846 	if (method == NULL || method[0] != 'l' || method[1] != 'h')
1847 		return (ARCHIVE_FAILED);
1848 	switch (method[2]) {
1849 	case '5':
1850 		w_bits = 13;/* 8KiB for window */
1851 		break;
1852 	case '6':
1853 		w_bits = 15;/* 32KiB for window */
1854 		break;
1855 	case '7':
1856 		w_bits = 16;/* 64KiB for window */
1857 		break;
1858 	default:
1859 		return (ARCHIVE_FAILED);/* Not supported. */
1860 	}
1861 	ds->error = ARCHIVE_FATAL;
1862 	/* Expand a window size up to 128 KiB for decompressing process
1863 	 * performance whatever its original window size is. */
1864 	ds->w_size = 1U << 17;
1865 	ds->w_mask = ds->w_size -1;
1866 	if (ds->w_buff == NULL) {
1867 		ds->w_buff = malloc(ds->w_size);
1868 		if (ds->w_buff == NULL)
1869 			return (ARCHIVE_FATAL);
1870 	}
1871 	w_size = 1U << w_bits;
1872 	memset(ds->w_buff + ds->w_size - w_size, 0x20, w_size);
1873 	ds->w_pos = 0;
1874 	ds->state = 0;
1875 	ds->pos_pt_len_size = w_bits + 1;
1876 	ds->pos_pt_len_bits = (w_bits == 15 || w_bits == 16)? 5: 4;
1877 	ds->literal_pt_len_size = PT_BITLEN_SIZE;
1878 	ds->literal_pt_len_bits = 5;
1879 	ds->br.cache_buffer = 0;
1880 	ds->br.cache_avail = 0;
1881 
1882 	if (lzh_huffman_init(&(ds->lt), LT_BITLEN_SIZE, 16)
1883 	    != ARCHIVE_OK)
1884 		return (ARCHIVE_FATAL);
1885 	ds->lt.len_bits = 9;
1886 	if (lzh_huffman_init(&(ds->pt), PT_BITLEN_SIZE, 16)
1887 	    != ARCHIVE_OK)
1888 		return (ARCHIVE_FATAL);
1889 	ds->error = 0;
1890 
1891 	return (ARCHIVE_OK);
1892 }
1893 
1894 /*
1895  * Release LZHUF decoder.
1896  */
1897 static void
lzh_decode_free(struct lzh_stream * strm)1898 lzh_decode_free(struct lzh_stream *strm)
1899 {
1900 
1901 	if (strm->ds == NULL)
1902 		return;
1903 	free(strm->ds->w_buff);
1904 	lzh_huffman_free(&(strm->ds->lt));
1905 	lzh_huffman_free(&(strm->ds->pt));
1906 	free(strm->ds);
1907 	strm->ds = NULL;
1908 }
1909 
1910 /*
1911  * Bit stream reader.
1912  */
1913 /* Check that the cache buffer has enough bits. */
1914 #define lzh_br_has(br, n)	((br)->cache_avail >= n)
1915 /* Get compressed data by bit. */
1916 #define lzh_br_bits(br, n)				\
1917 	(((uint16_t)((br)->cache_buffer >>		\
1918 		((br)->cache_avail - (n)))) & cache_masks[n])
1919 #define lzh_br_bits_forced(br, n)			\
1920 	(((uint16_t)((br)->cache_buffer <<		\
1921 		((n) - (br)->cache_avail))) & cache_masks[n])
1922 /* Read ahead to make sure the cache buffer has enough compressed data we
1923  * will use.
1924  *  True  : completed, there is enough data in the cache buffer.
1925  *  False : we met that strm->next_in is empty, we have to get following
1926  *          bytes. */
1927 #define lzh_br_read_ahead_0(strm, br, n)	\
1928 	(lzh_br_has(br, (n)) || lzh_br_fillup(strm, br))
1929 /*  True  : the cache buffer has some bits as much as we need.
1930  *  False : there are no enough bits in the cache buffer to be used,
1931  *          we have to get following bytes if we could. */
1932 #define lzh_br_read_ahead(strm, br, n)	\
1933 	(lzh_br_read_ahead_0((strm), (br), (n)) || lzh_br_has((br), (n)))
1934 
1935 /* Notify how many bits we consumed. */
1936 #define lzh_br_consume(br, n)	((br)->cache_avail -= (n))
1937 #define lzh_br_unconsume(br, n)	((br)->cache_avail += (n))
1938 
1939 static const uint16_t cache_masks[] = {
1940 	0x0000, 0x0001, 0x0003, 0x0007,
1941 	0x000F, 0x001F, 0x003F, 0x007F,
1942 	0x00FF, 0x01FF, 0x03FF, 0x07FF,
1943 	0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF,
1944 	0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF
1945 };
1946 
1947 /*
1948  * Shift away used bits in the cache data and fill it up with following bits.
1949  * Call this when cache buffer does not have enough bits you need.
1950  *
1951  * Returns 1 if the cache buffer is full.
1952  * Returns 0 if the cache buffer is not full; input buffer is empty.
1953  */
1954 static int
lzh_br_fillup(struct lzh_stream * strm,struct lzh_br * br)1955 lzh_br_fillup(struct lzh_stream *strm, struct lzh_br *br)
1956 {
1957 	int n = CACHE_BITS - br->cache_avail;
1958 
1959 	for (;;) {
1960 		const int x = n >> 3;
1961 		if (strm->avail_in >= x) {
1962 			switch (x) {
1963 			case 8:
1964 				br->cache_buffer =
1965 				    ((uint64_t)strm->next_in[0]) << 56 |
1966 				    ((uint64_t)strm->next_in[1]) << 48 |
1967 				    ((uint64_t)strm->next_in[2]) << 40 |
1968 				    ((uint64_t)strm->next_in[3]) << 32 |
1969 				    ((uint32_t)strm->next_in[4]) << 24 |
1970 				    ((uint32_t)strm->next_in[5]) << 16 |
1971 				    ((uint32_t)strm->next_in[6]) << 8 |
1972 				     (uint32_t)strm->next_in[7];
1973 				strm->next_in += 8;
1974 				strm->avail_in -= 8;
1975 				br->cache_avail += 8 * 8;
1976 				return (1);
1977 			case 7:
1978 				br->cache_buffer =
1979 		 		   (br->cache_buffer << 56) |
1980 				    ((uint64_t)strm->next_in[0]) << 48 |
1981 				    ((uint64_t)strm->next_in[1]) << 40 |
1982 				    ((uint64_t)strm->next_in[2]) << 32 |
1983 				    ((uint64_t)strm->next_in[3]) << 24 |
1984 				    ((uint64_t)strm->next_in[4]) << 16 |
1985 				    ((uint64_t)strm->next_in[5]) << 8 |
1986 				     (uint64_t)strm->next_in[6];
1987 				strm->next_in += 7;
1988 				strm->avail_in -= 7;
1989 				br->cache_avail += 7 * 8;
1990 				return (1);
1991 			case 6:
1992 				br->cache_buffer =
1993 		 		   (br->cache_buffer << 48) |
1994 				    ((uint64_t)strm->next_in[0]) << 40 |
1995 				    ((uint64_t)strm->next_in[1]) << 32 |
1996 				    ((uint64_t)strm->next_in[2]) << 24 |
1997 				    ((uint64_t)strm->next_in[3]) << 16 |
1998 				    ((uint64_t)strm->next_in[4]) << 8 |
1999 				     (uint64_t)strm->next_in[5];
2000 				strm->next_in += 6;
2001 				strm->avail_in -= 6;
2002 				br->cache_avail += 6 * 8;
2003 				return (1);
2004 			case 0:
2005 				/* We have enough compressed data in
2006 				 * the cache buffer.*/
2007 				return (1);
2008 			default:
2009 				break;
2010 			}
2011 		}
2012 		if (strm->avail_in == 0) {
2013 			/* There is not enough compressed data to fill up the
2014 			 * cache buffer. */
2015 			return (0);
2016 		}
2017 		br->cache_buffer =
2018 		   (br->cache_buffer << 8) | *strm->next_in++;
2019 		strm->avail_in--;
2020 		br->cache_avail += 8;
2021 		n -= 8;
2022 	}
2023 }
2024 
2025 /*
2026  * Decode LZHUF.
2027  *
2028  * 1. Returns ARCHIVE_OK if output buffer or input buffer are empty.
2029  *    Please set available buffer and call this function again.
2030  * 2. Returns ARCHIVE_EOF if decompression has been completed.
2031  * 3. Returns ARCHIVE_FAILED if an error occurred; compressed data
2032  *    is broken or you do not set 'last' flag properly.
2033  * 4. 'last' flag is very important, you must set 1 to the flag if there
2034  *    is no input data. The lha compressed data format does not provide how
2035  *    to know the compressed data is really finished.
2036  *    Note: lha command utility check if the total size of output bytes is
2037  *    reached the uncompressed size recorded in its header. it does not mind
2038  *    that the decoding process is properly finished.
2039  *    GNU ZIP can decompress another compressed file made by SCO LZH compress.
2040  *    it handles EOF as null to fill read buffer with zero until the decoding
2041  *    process meet 2 bytes of zeros at reading a size of a next chunk, so the
2042  *    zeros are treated as the mark of the end of the data although the zeros
2043  *    is dummy, not the file data.
2044  */
2045 static int	lzh_read_blocks(struct lzh_stream *, int);
2046 static int	lzh_decode_blocks(struct lzh_stream *, int);
2047 #define ST_RD_BLOCK		0
2048 #define ST_RD_PT_1		1
2049 #define ST_RD_PT_2		2
2050 #define ST_RD_PT_3		3
2051 #define ST_RD_PT_4		4
2052 #define ST_RD_LITERAL_1		5
2053 #define ST_RD_LITERAL_2		6
2054 #define ST_RD_LITERAL_3		7
2055 #define ST_RD_POS_DATA_1	8
2056 #define ST_GET_LITERAL		9
2057 #define ST_GET_POS_1		10
2058 #define ST_GET_POS_2		11
2059 #define ST_COPY_DATA		12
2060 
2061 static int
lzh_decode(struct lzh_stream * strm,int last)2062 lzh_decode(struct lzh_stream *strm, int last)
2063 {
2064 	struct lzh_dec *ds = strm->ds;
2065 	int avail_in;
2066 	int r;
2067 
2068 	if (ds->error)
2069 		return (ds->error);
2070 
2071 	avail_in = strm->avail_in;
2072 	do {
2073 		if (ds->state < ST_GET_LITERAL)
2074 			r = lzh_read_blocks(strm, last);
2075 		else
2076 			r = lzh_decode_blocks(strm, last);
2077 	} while (r == 100);
2078 	strm->total_in += avail_in - strm->avail_in;
2079 	return (r);
2080 }
2081 
2082 static void
lzh_emit_window(struct lzh_stream * strm,size_t s)2083 lzh_emit_window(struct lzh_stream *strm, size_t s)
2084 {
2085 	strm->ref_ptr = strm->ds->w_buff;
2086 	strm->avail_out = (int)s;
2087 	strm->total_out += s;
2088 }
2089 
2090 static int
lzh_read_blocks(struct lzh_stream * strm,int last)2091 lzh_read_blocks(struct lzh_stream *strm, int last)
2092 {
2093 	struct lzh_dec *ds = strm->ds;
2094 	struct lzh_br *br = &(ds->br);
2095 	int c = 0, i;
2096 	unsigned rbits;
2097 
2098 	for (;;) {
2099 		switch (ds->state) {
2100 		case ST_RD_BLOCK:
2101 			/*
2102 			 * Read a block number indicates how many blocks
2103 			 * we will handle. The block is composed of a
2104 			 * literal and a match, sometimes a literal only
2105 			 * in particular, there are no reference data at
2106 			 * the beginning of the decompression.
2107 			 */
2108 			if (!lzh_br_read_ahead_0(strm, br, 16)) {
2109 				if (!last)
2110 					/* We need following data. */
2111 					return (ARCHIVE_OK);
2112 				if (lzh_br_has(br, 8)) {
2113 					/*
2114 					 * It seems there are extra bits.
2115 					 *  1. Compressed data is broken.
2116 					 *  2. `last' flag does not properly
2117 					 *     set.
2118 					 */
2119 					goto failed;
2120 				}
2121 				if (ds->w_pos > 0) {
2122 					lzh_emit_window(strm, ds->w_pos);
2123 					ds->w_pos = 0;
2124 					return (ARCHIVE_OK);
2125 				}
2126 				/* End of compressed data; we have completely
2127 				 * handled all compressed data. */
2128 				return (ARCHIVE_EOF);
2129 			}
2130 			ds->blocks_avail = lzh_br_bits(br, 16);
2131 			if (ds->blocks_avail == 0)
2132 				goto failed;
2133 			lzh_br_consume(br, 16);
2134 			/*
2135 			 * Read a literal table compressed in huffman
2136 			 * coding.
2137 			 */
2138 			ds->pt.len_size = ds->literal_pt_len_size;
2139 			ds->pt.len_bits = ds->literal_pt_len_bits;
2140 			ds->reading_position = 0;
2141 			/* FALL THROUGH */
2142 		case ST_RD_PT_1:
2143 			/* Note: ST_RD_PT_1, ST_RD_PT_2 and ST_RD_PT_4 are
2144 			 * used in reading both a literal table and a
2145 			 * position table. */
2146 			if (!lzh_br_read_ahead(strm, br, ds->pt.len_bits)) {
2147 				if (last)
2148 					goto failed;/* Truncated data. */
2149 				ds->state = ST_RD_PT_1;
2150 				return (ARCHIVE_OK);
2151 			}
2152 			ds->pt.len_avail = lzh_br_bits(br, ds->pt.len_bits);
2153 			lzh_br_consume(br, ds->pt.len_bits);
2154 			/* FALL THROUGH */
2155 		case ST_RD_PT_2:
2156 			if (ds->pt.len_avail == 0) {
2157 				/* There is no bitlen. */
2158 				if (!lzh_br_read_ahead(strm, br,
2159 				    ds->pt.len_bits)) {
2160 					if (last)
2161 						goto failed;/* Truncated data.*/
2162 					ds->state = ST_RD_PT_2;
2163 					return (ARCHIVE_OK);
2164 				}
2165 				if (!lzh_make_fake_table(&(ds->pt),
2166 				    lzh_br_bits(br, ds->pt.len_bits)))
2167 					goto failed;/* Invalid data. */
2168 				lzh_br_consume(br, ds->pt.len_bits);
2169 				if (ds->reading_position)
2170 					ds->state = ST_GET_LITERAL;
2171 				else
2172 					ds->state = ST_RD_LITERAL_1;
2173 				break;
2174 			} else if (ds->pt.len_avail > ds->pt.len_size)
2175 				goto failed;/* Invalid data. */
2176 			ds->loop = 0;
2177 			memset(ds->pt.freq, 0, sizeof(ds->pt.freq));
2178 			if (ds->pt.len_avail < 3 ||
2179 			    ds->pt.len_size == ds->pos_pt_len_size) {
2180 				ds->state = ST_RD_PT_4;
2181 				break;
2182 			}
2183 			/* FALL THROUGH */
2184 		case ST_RD_PT_3:
2185 			ds->loop = lzh_read_pt_bitlen(strm, ds->loop, 3);
2186 			if (ds->loop < 3) {
2187 				if (ds->loop < 0 || last)
2188 					goto failed;/* Invalid data. */
2189 				/* Not completed, get following data. */
2190 				ds->state = ST_RD_PT_3;
2191 				return (ARCHIVE_OK);
2192 			}
2193 			/* There are some null in bitlen of the literal. */
2194 			if (!lzh_br_read_ahead(strm, br, 2)) {
2195 				if (last)
2196 					goto failed;/* Truncated data. */
2197 				ds->state = ST_RD_PT_3;
2198 				return (ARCHIVE_OK);
2199 			}
2200 			c = lzh_br_bits(br, 2);
2201 			lzh_br_consume(br, 2);
2202 			if (c > ds->pt.len_avail - 3)
2203 				goto failed;/* Invalid data. */
2204 			for (i = 3; c-- > 0 ;)
2205 				ds->pt.bitlen[i++] = 0;
2206 			ds->loop = i;
2207 			/* FALL THROUGH */
2208 		case ST_RD_PT_4:
2209 			ds->loop = lzh_read_pt_bitlen(strm, ds->loop,
2210 			    ds->pt.len_avail);
2211 			if (ds->loop < ds->pt.len_avail) {
2212 				if (ds->loop < 0 || last)
2213 					goto failed;/* Invalid data. */
2214 				/* Not completed, get following data. */
2215 				ds->state = ST_RD_PT_4;
2216 				return (ARCHIVE_OK);
2217 			}
2218 			if (!lzh_make_huffman_table(&(ds->pt)))
2219 				goto failed;/* Invalid data */
2220 			if (ds->reading_position) {
2221 				ds->state = ST_GET_LITERAL;
2222 				break;
2223 			}
2224 			/* FALL THROUGH */
2225 		case ST_RD_LITERAL_1:
2226 			if (!lzh_br_read_ahead(strm, br, ds->lt.len_bits)) {
2227 				if (last)
2228 					goto failed;/* Truncated data. */
2229 				ds->state = ST_RD_LITERAL_1;
2230 				return (ARCHIVE_OK);
2231 			}
2232 			ds->lt.len_avail = lzh_br_bits(br, ds->lt.len_bits);
2233 			lzh_br_consume(br, ds->lt.len_bits);
2234 			/* FALL THROUGH */
2235 		case ST_RD_LITERAL_2:
2236 			if (ds->lt.len_avail == 0) {
2237 				/* There is no bitlen. */
2238 				if (!lzh_br_read_ahead(strm, br,
2239 				    ds->lt.len_bits)) {
2240 					if (last)
2241 						goto failed;/* Truncated data.*/
2242 					ds->state = ST_RD_LITERAL_2;
2243 					return (ARCHIVE_OK);
2244 				}
2245 				if (!lzh_make_fake_table(&(ds->lt),
2246 				    lzh_br_bits(br, ds->lt.len_bits)))
2247 					goto failed;/* Invalid data */
2248 				lzh_br_consume(br, ds->lt.len_bits);
2249 				ds->state = ST_RD_POS_DATA_1;
2250 				break;
2251 			} else if (ds->lt.len_avail > ds->lt.len_size)
2252 				goto failed;/* Invalid data */
2253 			ds->loop = 0;
2254 			memset(ds->lt.freq, 0, sizeof(ds->lt.freq));
2255 			/* FALL THROUGH */
2256 		case ST_RD_LITERAL_3:
2257 			i = ds->loop;
2258 			while (i < ds->lt.len_avail) {
2259 				if (!lzh_br_read_ahead(strm, br,
2260 				    ds->pt.max_bits)) {
2261 					if (last)
2262 						goto failed;/* Truncated data.*/
2263 					ds->loop = i;
2264 					ds->state = ST_RD_LITERAL_3;
2265 					return (ARCHIVE_OK);
2266 				}
2267 				rbits = lzh_br_bits(br, ds->pt.max_bits);
2268 				c = lzh_decode_huffman(&(ds->pt), rbits);
2269 				if (c > 2) {
2270 					/* Note: 'c' will never be more than
2271 					 * eighteen since it's limited by
2272 					 * PT_BITLEN_SIZE, which is being set
2273 					 * to ds->pt.len_size through
2274 					 * ds->literal_pt_len_size. */
2275 					lzh_br_consume(br, ds->pt.bitlen[c]);
2276 					c -= 2;
2277 					ds->lt.freq[c]++;
2278 					ds->lt.bitlen[i++] = c;
2279 				} else if (c == 0) {
2280 					lzh_br_consume(br, ds->pt.bitlen[c]);
2281 					ds->lt.bitlen[i++] = 0;
2282 				} else {
2283 					/* c == 1 or c == 2 */
2284 					int n = (c == 1)?4:9;
2285 					if (!lzh_br_read_ahead(strm, br,
2286 					     ds->pt.bitlen[c] + n)) {
2287 						if (last) /* Truncated data. */
2288 							goto failed;
2289 						ds->loop = i;
2290 						ds->state = ST_RD_LITERAL_3;
2291 						return (ARCHIVE_OK);
2292 					}
2293 					lzh_br_consume(br, ds->pt.bitlen[c]);
2294 					c = lzh_br_bits(br, n);
2295 					lzh_br_consume(br, n);
2296 					c += (n == 4)?3:20;
2297 					if (i + c > ds->lt.len_avail)
2298 						goto failed;/* Invalid data */
2299 					memset(&(ds->lt.bitlen[i]), 0, c);
2300 					i += c;
2301 				}
2302 			}
2303 			if (i > ds->lt.len_avail ||
2304 			    !lzh_make_huffman_table(&(ds->lt)))
2305 				goto failed;/* Invalid data */
2306 			/* FALL THROUGH */
2307 		case ST_RD_POS_DATA_1:
2308 			/*
2309 			 * Read a position table compressed in huffman
2310 			 * coding.
2311 			 */
2312 			ds->pt.len_size = ds->pos_pt_len_size;
2313 			ds->pt.len_bits = ds->pos_pt_len_bits;
2314 			ds->reading_position = 1;
2315 			ds->state = ST_RD_PT_1;
2316 			break;
2317 		case ST_GET_LITERAL:
2318 			return (100);
2319 		}
2320 	}
2321 failed:
2322 	return (ds->error = ARCHIVE_FAILED);
2323 }
2324 
2325 static int
lzh_decode_blocks(struct lzh_stream * strm,int last)2326 lzh_decode_blocks(struct lzh_stream *strm, int last)
2327 {
2328 	struct lzh_dec *ds = strm->ds;
2329 	struct lzh_br bre = ds->br;
2330 	struct huffman *lt = &(ds->lt);
2331 	struct huffman *pt = &(ds->pt);
2332 	unsigned char *w_buff = ds->w_buff;
2333 	unsigned char *lt_bitlen = lt->bitlen;
2334 	unsigned char *pt_bitlen = pt->bitlen;
2335 	int blocks_avail = ds->blocks_avail, c = 0;
2336 	int copy_len = ds->copy_len, copy_pos = ds->copy_pos;
2337 	int w_pos = ds->w_pos, w_mask = ds->w_mask, w_size = ds->w_size;
2338 	int lt_max_bits = lt->max_bits, pt_max_bits = pt->max_bits;
2339 	int state = ds->state;
2340 
2341 	for (;;) {
2342 		switch (state) {
2343 		case ST_GET_LITERAL:
2344 			for (;;) {
2345 				if (blocks_avail == 0) {
2346 					/* We have decoded all blocks.
2347 					 * Let's handle next blocks. */
2348 					ds->state = ST_RD_BLOCK;
2349 					ds->br = bre;
2350 					ds->blocks_avail = 0;
2351 					ds->w_pos = w_pos;
2352 					ds->copy_pos = 0;
2353 					return (100);
2354 				}
2355 
2356 				/* lzh_br_read_ahead() always tries to fill the
2357 				 * cache buffer up. In specific situation we
2358 				 * are close to the end of the data, the cache
2359 				 * buffer will not be full and thus we have to
2360 				 * determine if the cache buffer has some bits
2361 				 * as much as we need after lzh_br_read_ahead()
2362 				 * failed. */
2363 				if (!lzh_br_read_ahead(strm, &bre,
2364 				    lt_max_bits)) {
2365 					if (!last)
2366 						goto next_data;
2367 					/* Remaining bits are less than
2368 					 * maximum bits(lt.max_bits) but maybe
2369 					 * it still remains as much as we need,
2370 					 * so we should try to use it with
2371 					 * dummy bits. */
2372 					c = lzh_decode_huffman(lt,
2373 					      lzh_br_bits_forced(&bre,
2374 					        lt_max_bits));
2375 					lzh_br_consume(&bre, lt_bitlen[c]);
2376 					if (!lzh_br_has(&bre, 0))
2377 						goto failed;/* Over read. */
2378 				} else {
2379 					c = lzh_decode_huffman(lt,
2380 					      lzh_br_bits(&bre, lt_max_bits));
2381 					lzh_br_consume(&bre, lt_bitlen[c]);
2382 				}
2383 				blocks_avail--;
2384 				if ((unsigned int)c > UCHAR_MAX)
2385 					/* Current block is a match data. */
2386 					break;
2387 				/*
2388 				 * 'c' is exactly a literal code.
2389 				 */
2390 				/* Save a decoded code to reference it
2391 				 * afterward. */
2392 				w_buff[w_pos] = c;
2393 				if (++w_pos >= w_size) {
2394 					w_pos = 0;
2395 					lzh_emit_window(strm, w_size);
2396 					goto next_data;
2397 				}
2398 			}
2399 			/* 'c' is the length of a match pattern we have
2400 			 * already extracted, which has be stored in
2401 			 * window(ds->w_buff). */
2402 			copy_len = c - (UCHAR_MAX + 1) + MINMATCH;
2403 			/* FALL THROUGH */
2404 		case ST_GET_POS_1:
2405 			/*
2406 			 * Get a reference position.
2407 			 */
2408 			if (!lzh_br_read_ahead(strm, &bre, pt_max_bits)) {
2409 				if (!last) {
2410 					state = ST_GET_POS_1;
2411 					ds->copy_len = copy_len;
2412 					goto next_data;
2413 				}
2414 				copy_pos = lzh_decode_huffman(pt,
2415 				    lzh_br_bits_forced(&bre, pt_max_bits));
2416 				lzh_br_consume(&bre, pt_bitlen[copy_pos]);
2417 				if (!lzh_br_has(&bre, 0))
2418 					goto failed;/* Over read. */
2419 			} else {
2420 				copy_pos = lzh_decode_huffman(pt,
2421 				    lzh_br_bits(&bre, pt_max_bits));
2422 				lzh_br_consume(&bre, pt_bitlen[copy_pos]);
2423 			}
2424 			/* FALL THROUGH */
2425 		case ST_GET_POS_2:
2426 			if (copy_pos > 1) {
2427 				/* We need an additional adjustment number to
2428 				 * the position. */
2429 				int p = copy_pos - 1;
2430 				if (!lzh_br_read_ahead(strm, &bre, p)) {
2431 					if (last)
2432 						goto failed;/* Truncated data.*/
2433 					state = ST_GET_POS_2;
2434 					ds->copy_len = copy_len;
2435 					ds->copy_pos = copy_pos;
2436 					goto next_data;
2437 				}
2438 				copy_pos = (1 << p) + lzh_br_bits(&bre, p);
2439 				lzh_br_consume(&bre, p);
2440 			}
2441 			/* The position is actually a distance from the last
2442 			 * code we had extracted and thus we have to convert
2443 			 * it to a position of the window. */
2444 			copy_pos = (w_pos - copy_pos - 1) & w_mask;
2445 			/* FALL THROUGH */
2446 		case ST_COPY_DATA:
2447 			/*
2448 			 * Copy `copy_len' bytes as extracted data from
2449 			 * the window into the output buffer.
2450 			 */
2451 			for (;;) {
2452 				int l;
2453 
2454 				l = copy_len;
2455 				if (copy_pos > w_pos) {
2456 					if (l > w_size - copy_pos)
2457 						l = w_size - copy_pos;
2458 				} else {
2459 					if (l > w_size - w_pos)
2460 						l = w_size - w_pos;
2461 				}
2462 				if ((copy_pos + l < w_pos)
2463 				    || (w_pos + l < copy_pos)) {
2464 					/* No overlap. */
2465 					memcpy(w_buff + w_pos,
2466 					    w_buff + copy_pos, l);
2467 				} else {
2468 					const unsigned char *s;
2469 					unsigned char *d;
2470 					int li;
2471 
2472 					d = w_buff + w_pos;
2473 					s = w_buff + copy_pos;
2474 					for (li = 0; li < l-1;) {
2475 						d[li] = s[li];li++;
2476 						d[li] = s[li];li++;
2477 					}
2478 					if (li < l)
2479 						d[li] = s[li];
2480 				}
2481 				w_pos += l;
2482 				if (w_pos == w_size) {
2483 					w_pos = 0;
2484 					lzh_emit_window(strm, w_size);
2485 					if (copy_len <= l)
2486 						state = ST_GET_LITERAL;
2487 					else {
2488 						state = ST_COPY_DATA;
2489 						ds->copy_len = copy_len - l;
2490 						ds->copy_pos =
2491 						    (copy_pos + l) & w_mask;
2492 					}
2493 					goto next_data;
2494 				}
2495 				if (copy_len <= l)
2496 					/* A copy of current pattern ended. */
2497 					break;
2498 				copy_len -= l;
2499 				copy_pos = (copy_pos + l) & w_mask;
2500 			}
2501 			state = ST_GET_LITERAL;
2502 			break;
2503 		}
2504 	}
2505 failed:
2506 	return (ds->error = ARCHIVE_FAILED);
2507 next_data:
2508 	ds->br = bre;
2509 	ds->blocks_avail = blocks_avail;
2510 	ds->state = state;
2511 	ds->w_pos = w_pos;
2512 	return (ARCHIVE_OK);
2513 }
2514 
2515 static int
lzh_huffman_init(struct huffman * hf,size_t len_size,int tbl_bits)2516 lzh_huffman_init(struct huffman *hf, size_t len_size, int tbl_bits)
2517 {
2518 	int bits;
2519 
2520 	if (hf->bitlen == NULL) {
2521 		hf->bitlen = malloc(len_size * sizeof(hf->bitlen[0]));
2522 		if (hf->bitlen == NULL)
2523 			return (ARCHIVE_FATAL);
2524 	}
2525 	if (hf->tbl == NULL) {
2526 		if (tbl_bits < HTBL_BITS)
2527 			bits = tbl_bits;
2528 		else
2529 			bits = HTBL_BITS;
2530 		hf->tbl = malloc(((size_t)1 << bits) * sizeof(hf->tbl[0]));
2531 		if (hf->tbl == NULL)
2532 			return (ARCHIVE_FATAL);
2533 	}
2534 	if (hf->tree == NULL && tbl_bits > HTBL_BITS) {
2535 		hf->tree_avail = 1 << (tbl_bits - HTBL_BITS + 4);
2536 		hf->tree = malloc(hf->tree_avail * sizeof(hf->tree[0]));
2537 		if (hf->tree == NULL)
2538 			return (ARCHIVE_FATAL);
2539 	}
2540 	hf->len_size = (int)len_size;
2541 	hf->tbl_bits = tbl_bits;
2542 	return (ARCHIVE_OK);
2543 }
2544 
2545 static void
lzh_huffman_free(struct huffman * hf)2546 lzh_huffman_free(struct huffman *hf)
2547 {
2548 	free(hf->bitlen);
2549 	free(hf->tbl);
2550 	free(hf->tree);
2551 }
2552 
2553 static const char bitlen_tbl[0x400] = {
2554 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2555 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2556 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2557 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2558 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2559 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2560 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2561 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2562 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2563 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2564 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2565 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2566 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2567 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2568 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2569 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2570 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2571 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2572 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2573 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2574 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2575 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2576 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2577 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2578 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2579 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2580 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2581 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2582 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2583 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2584 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2585 	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2586 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2587 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2588 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2589 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2590 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2591 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2592 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2593 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2594 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2595 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2596 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2597 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2598 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2599 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2600 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2601 	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2602 	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2603 	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2604 	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2605 	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2606 	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2607 	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2608 	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2609 	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2610 	10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2611 	10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2612 	10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2613 	10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2614 	11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
2615 	11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
2616 	12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
2617 	13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 16,  0
2618 };
2619 static int
lzh_read_pt_bitlen(struct lzh_stream * strm,int start,int end)2620 lzh_read_pt_bitlen(struct lzh_stream *strm, int start, int end)
2621 {
2622 	struct lzh_dec *ds = strm->ds;
2623 	struct lzh_br *br = &(ds->br);
2624 	int c, i;
2625 
2626 	for (i = start; i < end; ) {
2627 		/*
2628 		 *  bit pattern     the number we need
2629 		 *     000           ->  0
2630 		 *     001           ->  1
2631 		 *     010           ->  2
2632 		 *     ...
2633 		 *     110           ->  6
2634 		 *     1110          ->  7
2635 		 *     11110         ->  8
2636 		 *     ...
2637 		 *     1111111111110 ->  16
2638 		 */
2639 		if (!lzh_br_read_ahead(strm, br, 3))
2640 			return (i);
2641 		if ((c = lzh_br_bits(br, 3)) == 7) {
2642 			if (!lzh_br_read_ahead(strm, br, 13))
2643 				return (i);
2644 			c = bitlen_tbl[lzh_br_bits(br, 13) & 0x3FF];
2645 			if (c)
2646 				lzh_br_consume(br, c - 3);
2647 			else
2648 				return (-1);/* Invalid data. */
2649 		} else
2650 			lzh_br_consume(br, 3);
2651 		ds->pt.bitlen[i++] = c;
2652 		ds->pt.freq[c]++;
2653 	}
2654 	return (i);
2655 }
2656 
2657 static int
lzh_make_fake_table(struct huffman * hf,uint16_t c)2658 lzh_make_fake_table(struct huffman *hf, uint16_t c)
2659 {
2660 	if (c >= hf->len_size)
2661 		return (0);
2662 	hf->tbl[0] = c;
2663 	hf->max_bits = 0;
2664 	hf->shift_bits = 0;
2665 	hf->bitlen[hf->tbl[0]] = 0;
2666 	return (1);
2667 }
2668 
2669 /*
2670  * Make a huffman coding table.
2671  */
2672 static int
lzh_make_huffman_table(struct huffman * hf)2673 lzh_make_huffman_table(struct huffman *hf)
2674 {
2675 	uint16_t *tbl;
2676 	const unsigned char *bitlen;
2677 	int bitptn[17], weight[17];
2678 	int i, maxbits = 0, ptn, tbl_size, w;
2679 	int diffbits, len_avail;
2680 
2681 	/*
2682 	 * Initialize bit patterns.
2683 	 */
2684 	ptn = 0;
2685 	for (i = 1, w = 1 << 15; i <= 16; i++, w >>= 1) {
2686 		bitptn[i] = ptn;
2687 		weight[i] = w;
2688 		if (hf->freq[i]) {
2689 			ptn += hf->freq[i] * w;
2690 			maxbits = i;
2691 		}
2692 	}
2693 	if (ptn != 0x10000 || maxbits > hf->tbl_bits)
2694 		return (0);/* Invalid */
2695 
2696 	hf->max_bits = maxbits;
2697 
2698 	/*
2699 	 * Cut out extra bits which we won't house in the table.
2700 	 * This preparation reduces the same calculation in the for-loop
2701 	 * making the table.
2702 	 */
2703 	if (maxbits < 16) {
2704 		int ebits = 16 - maxbits;
2705 		for (i = 1; i <= maxbits; i++) {
2706 			bitptn[i] >>= ebits;
2707 			weight[i] >>= ebits;
2708 		}
2709 	}
2710 	if (maxbits > HTBL_BITS) {
2711 		unsigned htbl_max;
2712 		uint16_t *p;
2713 
2714 		diffbits = maxbits - HTBL_BITS;
2715 		for (i = 1; i <= HTBL_BITS; i++) {
2716 			bitptn[i] >>= diffbits;
2717 			weight[i] >>= diffbits;
2718 		}
2719 		htbl_max = bitptn[HTBL_BITS] +
2720 		    weight[HTBL_BITS] * hf->freq[HTBL_BITS];
2721 		p = &(hf->tbl[htbl_max]);
2722 		while (p < &hf->tbl[1U<<HTBL_BITS])
2723 			*p++ = 0;
2724 	} else
2725 		diffbits = 0;
2726 	hf->shift_bits = diffbits;
2727 
2728 	/*
2729 	 * Make the table.
2730 	 */
2731 	tbl_size = 1 << HTBL_BITS;
2732 	tbl = hf->tbl;
2733 	bitlen = hf->bitlen;
2734 	len_avail = hf->len_avail;
2735 	hf->tree_used = 0;
2736 	for (i = 0; i < len_avail; i++) {
2737 		uint16_t *p;
2738 		int len, cnt;
2739 		uint16_t bit;
2740 		int extlen;
2741 		struct htree_t *ht;
2742 
2743 		if (bitlen[i] == 0)
2744 			continue;
2745 		/* Get a bit pattern */
2746 		len = bitlen[i];
2747 		ptn = bitptn[len];
2748 		cnt = weight[len];
2749 		if (len <= HTBL_BITS) {
2750 			/* Calculate next bit pattern */
2751 			if ((bitptn[len] = ptn + cnt) > tbl_size)
2752 				return (0);/* Invalid */
2753 			/* Update the table */
2754 			p = &(tbl[ptn]);
2755 			if (cnt > 7) {
2756 				uint16_t *pc;
2757 
2758 				cnt -= 8;
2759 				pc = &p[cnt];
2760 				pc[0] = (uint16_t)i;
2761 				pc[1] = (uint16_t)i;
2762 				pc[2] = (uint16_t)i;
2763 				pc[3] = (uint16_t)i;
2764 				pc[4] = (uint16_t)i;
2765 				pc[5] = (uint16_t)i;
2766 				pc[6] = (uint16_t)i;
2767 				pc[7] = (uint16_t)i;
2768 				if (cnt > 7) {
2769 					cnt -= 8;
2770 					memcpy(&p[cnt], pc,
2771 						8 * sizeof(uint16_t));
2772 					pc = &p[cnt];
2773 					while (cnt > 15) {
2774 						cnt -= 16;
2775 						memcpy(&p[cnt], pc,
2776 							16 * sizeof(uint16_t));
2777 					}
2778 				}
2779 				if (cnt)
2780 					memcpy(p, pc, cnt * sizeof(uint16_t));
2781 			} else {
2782 				while (cnt > 1) {
2783 					p[--cnt] = (uint16_t)i;
2784 					p[--cnt] = (uint16_t)i;
2785 				}
2786 				if (cnt)
2787 					p[--cnt] = (uint16_t)i;
2788 			}
2789 			continue;
2790 		}
2791 
2792 		/*
2793 		 * A bit length is too big to be housed to a direct table,
2794 		 * so we use a tree model for its extra bits.
2795 		 */
2796 		bitptn[len] = ptn + cnt;
2797 		bit = 1U << (diffbits -1);
2798 		extlen = len - HTBL_BITS;
2799 
2800 		p = &(tbl[ptn >> diffbits]);
2801 		if (*p == 0) {
2802 			*p = len_avail + hf->tree_used;
2803 			ht = &(hf->tree[hf->tree_used++]);
2804 			if (hf->tree_used > hf->tree_avail)
2805 				return (0);/* Invalid */
2806 			ht->left = 0;
2807 			ht->right = 0;
2808 		} else {
2809 			if (*p < len_avail ||
2810 			    *p >= (len_avail + hf->tree_used))
2811 				return (0);/* Invalid */
2812 			ht = &(hf->tree[*p - len_avail]);
2813 		}
2814 		while (--extlen > 0) {
2815 			if (ptn & bit) {
2816 				if (ht->left < len_avail) {
2817 					ht->left = len_avail + hf->tree_used;
2818 					ht = &(hf->tree[hf->tree_used++]);
2819 					if (hf->tree_used > hf->tree_avail)
2820 						return (0);/* Invalid */
2821 					ht->left = 0;
2822 					ht->right = 0;
2823 				} else {
2824 					ht = &(hf->tree[ht->left - len_avail]);
2825 				}
2826 			} else {
2827 				if (ht->right < len_avail) {
2828 					ht->right = len_avail + hf->tree_used;
2829 					ht = &(hf->tree[hf->tree_used++]);
2830 					if (hf->tree_used > hf->tree_avail)
2831 						return (0);/* Invalid */
2832 					ht->left = 0;
2833 					ht->right = 0;
2834 				} else {
2835 					ht = &(hf->tree[ht->right - len_avail]);
2836 				}
2837 			}
2838 			bit >>= 1;
2839 		}
2840 		if (ptn & bit) {
2841 			if (ht->left != 0)
2842 				return (0);/* Invalid */
2843 			ht->left = (uint16_t)i;
2844 		} else {
2845 			if (ht->right != 0)
2846 				return (0);/* Invalid */
2847 			ht->right = (uint16_t)i;
2848 		}
2849 	}
2850 	return (1);
2851 }
2852 
2853 static int
lzh_decode_huffman_tree(struct huffman * hf,unsigned rbits,int c)2854 lzh_decode_huffman_tree(struct huffman *hf, unsigned rbits, int c)
2855 {
2856 	struct htree_t *ht;
2857 	int extlen;
2858 
2859 	ht = hf->tree;
2860 	extlen = hf->shift_bits;
2861 	while (c >= hf->len_avail) {
2862 		c -= hf->len_avail;
2863 		if (extlen-- <= 0 || c >= hf->tree_used)
2864 			return (0);
2865 		if (rbits & (1U << extlen))
2866 			c = ht[c].left;
2867 		else
2868 			c = ht[c].right;
2869 	}
2870 	return (c);
2871 }
2872 
2873 static inline int
lzh_decode_huffman(struct huffman * hf,unsigned rbits)2874 lzh_decode_huffman(struct huffman *hf, unsigned rbits)
2875 {
2876 	int c;
2877 	/*
2878 	 * At first search an index table for a bit pattern.
2879 	 * If it fails, search a huffman tree for.
2880 	 */
2881 	c = hf->tbl[rbits >> hf->shift_bits];
2882 	if (c < hf->len_avail || hf->len_avail == 0)
2883 		return (c);
2884 	/* This bit pattern needs to be found out at a huffman tree. */
2885 	return (lzh_decode_huffman_tree(hf, rbits, c));
2886 }
2887