xref: /freebsd/contrib/xz/src/liblzma/common/alone_decoder.c (revision 3b35e7ee8de9b0260149a2b77e87a2b9c7a36244)
1*3b35e7eeSXin LI // SPDX-License-Identifier: 0BSD
2*3b35e7eeSXin LI 
381ad8388SMartin Matuska ///////////////////////////////////////////////////////////////////////////////
481ad8388SMartin Matuska //
581ad8388SMartin Matuska /// \file       alone_decoder.c
681ad8388SMartin Matuska /// \brief      Decoder for LZMA_Alone files
781ad8388SMartin Matuska //
881ad8388SMartin Matuska //  Author:     Lasse Collin
981ad8388SMartin Matuska //
1081ad8388SMartin Matuska ///////////////////////////////////////////////////////////////////////////////
1181ad8388SMartin Matuska 
1281ad8388SMartin Matuska #include "alone_decoder.h"
1381ad8388SMartin Matuska #include "lzma_decoder.h"
1481ad8388SMartin Matuska #include "lz_decoder.h"
1581ad8388SMartin Matuska 
1681ad8388SMartin Matuska 
171456f0f9SXin LI typedef struct {
1881ad8388SMartin Matuska 	lzma_next_coder next;
1981ad8388SMartin Matuska 
2081ad8388SMartin Matuska 	enum {
2181ad8388SMartin Matuska 		SEQ_PROPERTIES,
2281ad8388SMartin Matuska 		SEQ_DICTIONARY_SIZE,
2381ad8388SMartin Matuska 		SEQ_UNCOMPRESSED_SIZE,
2481ad8388SMartin Matuska 		SEQ_CODER_INIT,
2581ad8388SMartin Matuska 		SEQ_CODE,
2681ad8388SMartin Matuska 	} sequence;
2781ad8388SMartin Matuska 
2842b10a37SXin LI 	/// If true, reject files that are unlikely to be .lzma files.
2942b10a37SXin LI 	/// If false, more non-.lzma files get accepted and will give
3042b10a37SXin LI 	/// LZMA_DATA_ERROR either immediately or after a few output bytes.
3142b10a37SXin LI 	bool picky;
3242b10a37SXin LI 
3381ad8388SMartin Matuska 	/// Position in the header fields
3481ad8388SMartin Matuska 	size_t pos;
3581ad8388SMartin Matuska 
3681ad8388SMartin Matuska 	/// Uncompressed size decoded from the header
3781ad8388SMartin Matuska 	lzma_vli uncompressed_size;
3881ad8388SMartin Matuska 
3981ad8388SMartin Matuska 	/// Memory usage limit
4081ad8388SMartin Matuska 	uint64_t memlimit;
4181ad8388SMartin Matuska 
4281ad8388SMartin Matuska 	/// Amount of memory actually needed (only an estimate)
4381ad8388SMartin Matuska 	uint64_t memusage;
4481ad8388SMartin Matuska 
4581ad8388SMartin Matuska 	/// Options decoded from the header needed to initialize
4681ad8388SMartin Matuska 	/// the LZMA decoder
4781ad8388SMartin Matuska 	lzma_options_lzma options;
481456f0f9SXin LI } lzma_alone_coder;
4981ad8388SMartin Matuska 
5081ad8388SMartin Matuska 
5181ad8388SMartin Matuska static lzma_ret
52a8675d92SXin LI alone_decode(void *coder_ptr, const lzma_allocator *allocator,
5381ad8388SMartin Matuska 		const uint8_t *restrict in, size_t *restrict in_pos,
5481ad8388SMartin Matuska 		size_t in_size, uint8_t *restrict out,
5581ad8388SMartin Matuska 		size_t *restrict out_pos, size_t out_size,
5681ad8388SMartin Matuska 		lzma_action action)
5781ad8388SMartin Matuska {
581456f0f9SXin LI 	lzma_alone_coder *coder = coder_ptr;
591456f0f9SXin LI 
6081ad8388SMartin Matuska 	while (*out_pos < out_size
6181ad8388SMartin Matuska 			&& (coder->sequence == SEQ_CODE || *in_pos < in_size))
6281ad8388SMartin Matuska 	switch (coder->sequence) {
6381ad8388SMartin Matuska 	case SEQ_PROPERTIES:
6481ad8388SMartin Matuska 		if (lzma_lzma_lclppb_decode(&coder->options, in[*in_pos]))
6581ad8388SMartin Matuska 			return LZMA_FORMAT_ERROR;
6681ad8388SMartin Matuska 
6781ad8388SMartin Matuska 		coder->sequence = SEQ_DICTIONARY_SIZE;
6881ad8388SMartin Matuska 		++*in_pos;
6981ad8388SMartin Matuska 		break;
7081ad8388SMartin Matuska 
7181ad8388SMartin Matuska 	case SEQ_DICTIONARY_SIZE:
7281ad8388SMartin Matuska 		coder->options.dict_size
7381ad8388SMartin Matuska 				|= (size_t)(in[*in_pos]) << (coder->pos * 8);
7481ad8388SMartin Matuska 
7581ad8388SMartin Matuska 		if (++coder->pos == 4) {
7642b10a37SXin LI 			if (coder->picky && coder->options.dict_size
7742b10a37SXin LI 					!= UINT32_MAX) {
7881ad8388SMartin Matuska 				// A hack to ditch tons of false positives:
7981ad8388SMartin Matuska 				// We allow only dictionary sizes that are
8081ad8388SMartin Matuska 				// 2^n or 2^n + 2^(n-1). LZMA_Alone created
8181ad8388SMartin Matuska 				// only files with 2^n, but accepts any
8242b10a37SXin LI 				// dictionary size.
8381ad8388SMartin Matuska 				uint32_t d = coder->options.dict_size - 1;
8481ad8388SMartin Matuska 				d |= d >> 2;
8581ad8388SMartin Matuska 				d |= d >> 3;
8681ad8388SMartin Matuska 				d |= d >> 4;
8781ad8388SMartin Matuska 				d |= d >> 8;
8881ad8388SMartin Matuska 				d |= d >> 16;
8981ad8388SMartin Matuska 				++d;
9081ad8388SMartin Matuska 
9181ad8388SMartin Matuska 				if (d != coder->options.dict_size)
9281ad8388SMartin Matuska 					return LZMA_FORMAT_ERROR;
9381ad8388SMartin Matuska 			}
9481ad8388SMartin Matuska 
9581ad8388SMartin Matuska 			coder->pos = 0;
9681ad8388SMartin Matuska 			coder->sequence = SEQ_UNCOMPRESSED_SIZE;
9781ad8388SMartin Matuska 		}
9881ad8388SMartin Matuska 
9981ad8388SMartin Matuska 		++*in_pos;
10081ad8388SMartin Matuska 		break;
10181ad8388SMartin Matuska 
10281ad8388SMartin Matuska 	case SEQ_UNCOMPRESSED_SIZE:
10381ad8388SMartin Matuska 		coder->uncompressed_size
10481ad8388SMartin Matuska 				|= (lzma_vli)(in[*in_pos]) << (coder->pos * 8);
10581ad8388SMartin Matuska 		++*in_pos;
10681ad8388SMartin Matuska 		if (++coder->pos < 8)
10781ad8388SMartin Matuska 			break;
10881ad8388SMartin Matuska 
10981ad8388SMartin Matuska 		// Another hack to ditch false positives: Assume that
11081ad8388SMartin Matuska 		// if the uncompressed size is known, it must be less
11142b10a37SXin LI 		// than 256 GiB.
11273ed8e77SXin LI 		//
11373ed8e77SXin LI 		// FIXME? Without picky we allow > LZMA_VLI_MAX which doesn't
11473ed8e77SXin LI 		// really matter in this specific situation (> LZMA_VLI_MAX is
11573ed8e77SXin LI 		// safe in the LZMA decoder) but it's somewhat weird still.
11642b10a37SXin LI 		if (coder->picky
11742b10a37SXin LI 				&& coder->uncompressed_size != LZMA_VLI_UNKNOWN
11881ad8388SMartin Matuska 				&& coder->uncompressed_size
11981ad8388SMartin Matuska 					>= (LZMA_VLI_C(1) << 38))
12081ad8388SMartin Matuska 			return LZMA_FORMAT_ERROR;
12181ad8388SMartin Matuska 
12273ed8e77SXin LI 		// Use LZMA_FILTER_LZMA1EXT features to specify the
12373ed8e77SXin LI 		// uncompressed size and that the end marker is allowed
12473ed8e77SXin LI 		// even when the uncompressed size is known. Both .lzma
12573ed8e77SXin LI 		// header and LZMA1EXT use UINT64_MAX indicate that size
12673ed8e77SXin LI 		// is unknown.
12773ed8e77SXin LI 		coder->options.ext_flags = LZMA_LZMA1EXT_ALLOW_EOPM;
12873ed8e77SXin LI 		lzma_set_ext_size(coder->options, coder->uncompressed_size);
12973ed8e77SXin LI 
13081ad8388SMartin Matuska 		// Calculate the memory usage so that it is ready
13181ad8388SMartin Matuska 		// for SEQ_CODER_INIT.
13281ad8388SMartin Matuska 		coder->memusage = lzma_lzma_decoder_memusage(&coder->options)
13381ad8388SMartin Matuska 				+ LZMA_MEMUSAGE_BASE;
13481ad8388SMartin Matuska 
13581ad8388SMartin Matuska 		coder->pos = 0;
13681ad8388SMartin Matuska 		coder->sequence = SEQ_CODER_INIT;
13781ad8388SMartin Matuska 
13881ad8388SMartin Matuska 	// Fall through
13981ad8388SMartin Matuska 
14081ad8388SMartin Matuska 	case SEQ_CODER_INIT: {
14181ad8388SMartin Matuska 		if (coder->memusage > coder->memlimit)
14281ad8388SMartin Matuska 			return LZMA_MEMLIMIT_ERROR;
14381ad8388SMartin Matuska 
14481ad8388SMartin Matuska 		lzma_filter_info filters[2] = {
14581ad8388SMartin Matuska 			{
14673ed8e77SXin LI 				.id = LZMA_FILTER_LZMA1EXT,
14781ad8388SMartin Matuska 				.init = &lzma_lzma_decoder_init,
14881ad8388SMartin Matuska 				.options = &coder->options,
14981ad8388SMartin Matuska 			}, {
15081ad8388SMartin Matuska 				.init = NULL,
15181ad8388SMartin Matuska 			}
15281ad8388SMartin Matuska 		};
15381ad8388SMartin Matuska 
15473ed8e77SXin LI 		return_if_error(lzma_next_filter_init(&coder->next,
15573ed8e77SXin LI 				allocator, filters));
15681ad8388SMartin Matuska 
15781ad8388SMartin Matuska 		coder->sequence = SEQ_CODE;
15881ad8388SMartin Matuska 		break;
15981ad8388SMartin Matuska 	}
16081ad8388SMartin Matuska 
16181ad8388SMartin Matuska 	case SEQ_CODE: {
16281ad8388SMartin Matuska 		return coder->next.code(coder->next.coder,
16381ad8388SMartin Matuska 				allocator, in, in_pos, in_size,
16481ad8388SMartin Matuska 				out, out_pos, out_size, action);
16581ad8388SMartin Matuska 	}
16681ad8388SMartin Matuska 
16781ad8388SMartin Matuska 	default:
16881ad8388SMartin Matuska 		return LZMA_PROG_ERROR;
16981ad8388SMartin Matuska 	}
17081ad8388SMartin Matuska 
17181ad8388SMartin Matuska 	return LZMA_OK;
17281ad8388SMartin Matuska }
17381ad8388SMartin Matuska 
17481ad8388SMartin Matuska 
17581ad8388SMartin Matuska static void
1761456f0f9SXin LI alone_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
17781ad8388SMartin Matuska {
1781456f0f9SXin LI 	lzma_alone_coder *coder = coder_ptr;
17981ad8388SMartin Matuska 	lzma_next_end(&coder->next, allocator);
18081ad8388SMartin Matuska 	lzma_free(coder, allocator);
18181ad8388SMartin Matuska 	return;
18281ad8388SMartin Matuska }
18381ad8388SMartin Matuska 
18481ad8388SMartin Matuska 
18581ad8388SMartin Matuska static lzma_ret
1861456f0f9SXin LI alone_decoder_memconfig(void *coder_ptr, uint64_t *memusage,
18781ad8388SMartin Matuska 		uint64_t *old_memlimit, uint64_t new_memlimit)
18881ad8388SMartin Matuska {
1891456f0f9SXin LI 	lzma_alone_coder *coder = coder_ptr;
1901456f0f9SXin LI 
19181ad8388SMartin Matuska 	*memusage = coder->memusage;
19281ad8388SMartin Matuska 	*old_memlimit = coder->memlimit;
19381ad8388SMartin Matuska 
19481ad8388SMartin Matuska 	if (new_memlimit != 0) {
19581ad8388SMartin Matuska 		if (new_memlimit < coder->memusage)
19681ad8388SMartin Matuska 			return LZMA_MEMLIMIT_ERROR;
19781ad8388SMartin Matuska 
19881ad8388SMartin Matuska 		coder->memlimit = new_memlimit;
19981ad8388SMartin Matuska 	}
20081ad8388SMartin Matuska 
20181ad8388SMartin Matuska 	return LZMA_OK;
20281ad8388SMartin Matuska }
20381ad8388SMartin Matuska 
20481ad8388SMartin Matuska 
20581ad8388SMartin Matuska extern lzma_ret
20653200025SRui Paulo lzma_alone_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
20742b10a37SXin LI 		uint64_t memlimit, bool picky)
20881ad8388SMartin Matuska {
20981ad8388SMartin Matuska 	lzma_next_coder_init(&lzma_alone_decoder_init, next, allocator);
21081ad8388SMartin Matuska 
2111456f0f9SXin LI 	lzma_alone_coder *coder = next->coder;
2121456f0f9SXin LI 
2131456f0f9SXin LI 	if (coder == NULL) {
2141456f0f9SXin LI 		coder = lzma_alloc(sizeof(lzma_alone_coder), allocator);
2151456f0f9SXin LI 		if (coder == NULL)
21681ad8388SMartin Matuska 			return LZMA_MEM_ERROR;
21781ad8388SMartin Matuska 
2181456f0f9SXin LI 		next->coder = coder;
21981ad8388SMartin Matuska 		next->code = &alone_decode;
22081ad8388SMartin Matuska 		next->end = &alone_decoder_end;
22181ad8388SMartin Matuska 		next->memconfig = &alone_decoder_memconfig;
2221456f0f9SXin LI 		coder->next = LZMA_NEXT_CODER_INIT;
22381ad8388SMartin Matuska 	}
22481ad8388SMartin Matuska 
2251456f0f9SXin LI 	coder->sequence = SEQ_PROPERTIES;
2261456f0f9SXin LI 	coder->picky = picky;
2271456f0f9SXin LI 	coder->pos = 0;
2281456f0f9SXin LI 	coder->options.dict_size = 0;
2291456f0f9SXin LI 	coder->options.preset_dict = NULL;
2301456f0f9SXin LI 	coder->options.preset_dict_size = 0;
2311456f0f9SXin LI 	coder->uncompressed_size = 0;
232b71a5db3SXin LI 	coder->memlimit = my_max(1, memlimit);
2331456f0f9SXin LI 	coder->memusage = LZMA_MEMUSAGE_BASE;
23481ad8388SMartin Matuska 
23581ad8388SMartin Matuska 	return LZMA_OK;
23681ad8388SMartin Matuska }
23781ad8388SMartin Matuska 
23881ad8388SMartin Matuska 
23981ad8388SMartin Matuska extern LZMA_API(lzma_ret)
24081ad8388SMartin Matuska lzma_alone_decoder(lzma_stream *strm, uint64_t memlimit)
24181ad8388SMartin Matuska {
24242b10a37SXin LI 	lzma_next_strm_init(lzma_alone_decoder_init, strm, memlimit, false);
24381ad8388SMartin Matuska 
24481ad8388SMartin Matuska 	strm->internal->supported_actions[LZMA_RUN] = true;
24581ad8388SMartin Matuska 	strm->internal->supported_actions[LZMA_FINISH] = true;
24681ad8388SMartin Matuska 
24781ad8388SMartin Matuska 	return LZMA_OK;
24881ad8388SMartin Matuska }
249