xref: /freebsd/contrib/xz/src/liblzma/common/block_decoder.c (revision 3b35e7ee8de9b0260149a2b77e87a2b9c7a36244)
1*3b35e7eeSXin LI // SPDX-License-Identifier: 0BSD
2*3b35e7eeSXin LI 
381ad8388SMartin Matuska ///////////////////////////////////////////////////////////////////////////////
481ad8388SMartin Matuska //
581ad8388SMartin Matuska /// \file       block_decoder.c
681ad8388SMartin Matuska /// \brief      Decodes .xz Blocks
781ad8388SMartin Matuska //
881ad8388SMartin Matuska //  Author:     Lasse Collin
981ad8388SMartin Matuska //
1081ad8388SMartin Matuska ///////////////////////////////////////////////////////////////////////////////
1181ad8388SMartin Matuska 
1281ad8388SMartin Matuska #include "block_decoder.h"
1381ad8388SMartin Matuska #include "filter_decoder.h"
1481ad8388SMartin Matuska #include "check.h"
1581ad8388SMartin Matuska 
1681ad8388SMartin Matuska 
171456f0f9SXin LI typedef struct {
1881ad8388SMartin Matuska 	enum {
1981ad8388SMartin Matuska 		SEQ_CODE,
2081ad8388SMartin Matuska 		SEQ_PADDING,
2181ad8388SMartin Matuska 		SEQ_CHECK,
2281ad8388SMartin Matuska 	} sequence;
2381ad8388SMartin Matuska 
2481ad8388SMartin Matuska 	/// The filters in the chain; initialized with lzma_raw_decoder_init().
2581ad8388SMartin Matuska 	lzma_next_coder next;
2681ad8388SMartin Matuska 
2781ad8388SMartin Matuska 	/// Decoding options; we also write Compressed Size and Uncompressed
2881ad8388SMartin Matuska 	/// Size back to this structure when the decoding has been finished.
2981ad8388SMartin Matuska 	lzma_block *block;
3081ad8388SMartin Matuska 
3181ad8388SMartin Matuska 	/// Compressed Size calculated while decoding
3281ad8388SMartin Matuska 	lzma_vli compressed_size;
3381ad8388SMartin Matuska 
3481ad8388SMartin Matuska 	/// Uncompressed Size calculated while decoding
3581ad8388SMartin Matuska 	lzma_vli uncompressed_size;
3681ad8388SMartin Matuska 
3781ad8388SMartin Matuska 	/// Maximum allowed Compressed Size; this takes into account the
3881ad8388SMartin Matuska 	/// size of the Block Header and Check fields when Compressed Size
3981ad8388SMartin Matuska 	/// is unknown.
4081ad8388SMartin Matuska 	lzma_vli compressed_limit;
4181ad8388SMartin Matuska 
429e6bbe47SXin LI 	/// Maximum allowed Uncompressed Size.
439e6bbe47SXin LI 	lzma_vli uncompressed_limit;
449e6bbe47SXin LI 
4581ad8388SMartin Matuska 	/// Position when reading the Check field
4681ad8388SMartin Matuska 	size_t check_pos;
4781ad8388SMartin Matuska 
4881ad8388SMartin Matuska 	/// Check of the uncompressed data
4981ad8388SMartin Matuska 	lzma_check_state check;
5053200025SRui Paulo 
5153200025SRui Paulo 	/// True if the integrity check won't be calculated and verified.
5253200025SRui Paulo 	bool ignore_check;
531456f0f9SXin LI } lzma_block_coder;
5481ad8388SMartin Matuska 
5581ad8388SMartin Matuska 
5681ad8388SMartin Matuska static inline bool
5781ad8388SMartin Matuska is_size_valid(lzma_vli size, lzma_vli reference)
5881ad8388SMartin Matuska {
5981ad8388SMartin Matuska 	return reference == LZMA_VLI_UNKNOWN || reference == size;
6081ad8388SMartin Matuska }
6181ad8388SMartin Matuska 
6281ad8388SMartin Matuska 
6381ad8388SMartin Matuska static lzma_ret
641456f0f9SXin LI block_decode(void *coder_ptr, const lzma_allocator *allocator,
6581ad8388SMartin Matuska 		const uint8_t *restrict in, size_t *restrict in_pos,
6681ad8388SMartin Matuska 		size_t in_size, uint8_t *restrict out,
6781ad8388SMartin Matuska 		size_t *restrict out_pos, size_t out_size, lzma_action action)
6881ad8388SMartin Matuska {
691456f0f9SXin LI 	lzma_block_coder *coder = coder_ptr;
701456f0f9SXin LI 
7181ad8388SMartin Matuska 	switch (coder->sequence) {
7281ad8388SMartin Matuska 	case SEQ_CODE: {
7381ad8388SMartin Matuska 		const size_t in_start = *in_pos;
7481ad8388SMartin Matuska 		const size_t out_start = *out_pos;
7581ad8388SMartin Matuska 
769e6bbe47SXin LI 		// Limit the amount of input and output space that we give
779e6bbe47SXin LI 		// to the raw decoder based on the information we have
789e6bbe47SXin LI 		// (or don't have) from Block Header.
799e6bbe47SXin LI 		const size_t in_stop = *in_pos + (size_t)my_min(
809e6bbe47SXin LI 			in_size - *in_pos,
819e6bbe47SXin LI 			coder->compressed_limit - coder->compressed_size);
829e6bbe47SXin LI 		const size_t out_stop = *out_pos + (size_t)my_min(
839e6bbe47SXin LI 			out_size - *out_pos,
849e6bbe47SXin LI 			coder->uncompressed_limit - coder->uncompressed_size);
859e6bbe47SXin LI 
8681ad8388SMartin Matuska 		const lzma_ret ret = coder->next.code(coder->next.coder,
879e6bbe47SXin LI 				allocator, in, in_pos, in_stop,
889e6bbe47SXin LI 				out, out_pos, out_stop, action);
8981ad8388SMartin Matuska 
9081ad8388SMartin Matuska 		const size_t in_used = *in_pos - in_start;
9181ad8388SMartin Matuska 		const size_t out_used = *out_pos - out_start;
9281ad8388SMartin Matuska 
939e6bbe47SXin LI 		// Because we have limited the input and output sizes,
949e6bbe47SXin LI 		// we know that these cannot grow too big or overflow.
959e6bbe47SXin LI 		coder->compressed_size += in_used;
969e6bbe47SXin LI 		coder->uncompressed_size += out_used;
979e6bbe47SXin LI 
989e6bbe47SXin LI 		if (ret == LZMA_OK) {
999e6bbe47SXin LI 			const bool comp_done = coder->compressed_size
1009e6bbe47SXin LI 					== coder->block->compressed_size;
1019e6bbe47SXin LI 			const bool uncomp_done = coder->uncompressed_size
1029e6bbe47SXin LI 					== coder->block->uncompressed_size;
1039e6bbe47SXin LI 
1049e6bbe47SXin LI 			// If both input and output amounts match the sizes
1059e6bbe47SXin LI 			// in Block Header but we still got LZMA_OK instead
1069e6bbe47SXin LI 			// of LZMA_STREAM_END, the file is broken.
1079e6bbe47SXin LI 			if (comp_done && uncomp_done)
10881ad8388SMartin Matuska 				return LZMA_DATA_ERROR;
10981ad8388SMartin Matuska 
1109e6bbe47SXin LI 			// If the decoder has consumed all the input that it
1119e6bbe47SXin LI 			// needs but it still couldn't fill the output buffer
1129e6bbe47SXin LI 			// or return LZMA_STREAM_END, the file is broken.
1139e6bbe47SXin LI 			if (comp_done && *out_pos < out_size)
1149e6bbe47SXin LI 				return LZMA_DATA_ERROR;
1159e6bbe47SXin LI 
1169e6bbe47SXin LI 			// If the decoder has produced all the output but
1179e6bbe47SXin LI 			// it still didn't return LZMA_STREAM_END or consume
1189e6bbe47SXin LI 			// more input (for example, detecting an end of
1199e6bbe47SXin LI 			// payload marker may need more input but produce
1209e6bbe47SXin LI 			// no output) the file is broken.
1219e6bbe47SXin LI 			if (uncomp_done && *in_pos < in_size)
1229e6bbe47SXin LI 				return LZMA_DATA_ERROR;
1239e6bbe47SXin LI 		}
1249e6bbe47SXin LI 
125c917796cSXin LI 		// Don't waste time updating the integrity check if it will be
126c917796cSXin LI 		// ignored. Also skip it if no new output was produced. This
127c917796cSXin LI 		// avoids null pointer + 0 (undefined behavior) when out == 0.
128c917796cSXin LI 		if (!coder->ignore_check && out_used > 0)
12981ad8388SMartin Matuska 			lzma_check_update(&coder->check, coder->block->check,
13081ad8388SMartin Matuska 					out + out_start, out_used);
13181ad8388SMartin Matuska 
13281ad8388SMartin Matuska 		if (ret != LZMA_STREAM_END)
13381ad8388SMartin Matuska 			return ret;
13481ad8388SMartin Matuska 
13581ad8388SMartin Matuska 		// Compressed and Uncompressed Sizes are now at their final
13681ad8388SMartin Matuska 		// values. Verify that they match the values given to us.
13781ad8388SMartin Matuska 		if (!is_size_valid(coder->compressed_size,
13881ad8388SMartin Matuska 					coder->block->compressed_size)
13981ad8388SMartin Matuska 				|| !is_size_valid(coder->uncompressed_size,
14081ad8388SMartin Matuska 					coder->block->uncompressed_size))
14181ad8388SMartin Matuska 			return LZMA_DATA_ERROR;
14281ad8388SMartin Matuska 
14381ad8388SMartin Matuska 		// Copy the values into coder->block. The caller
14481ad8388SMartin Matuska 		// may use this information to construct Index.
14581ad8388SMartin Matuska 		coder->block->compressed_size = coder->compressed_size;
14681ad8388SMartin Matuska 		coder->block->uncompressed_size = coder->uncompressed_size;
14781ad8388SMartin Matuska 
14881ad8388SMartin Matuska 		coder->sequence = SEQ_PADDING;
14981ad8388SMartin Matuska 	}
15081ad8388SMartin Matuska 
15181ad8388SMartin Matuska 	// Fall through
15281ad8388SMartin Matuska 
15381ad8388SMartin Matuska 	case SEQ_PADDING:
15481ad8388SMartin Matuska 		// Compressed Data is padded to a multiple of four bytes.
15581ad8388SMartin Matuska 		while (coder->compressed_size & 3) {
15681ad8388SMartin Matuska 			if (*in_pos >= in_size)
15781ad8388SMartin Matuska 				return LZMA_OK;
15881ad8388SMartin Matuska 
15981ad8388SMartin Matuska 			// We use compressed_size here just get the Padding
16081ad8388SMartin Matuska 			// right. The actual Compressed Size was stored to
16181ad8388SMartin Matuska 			// coder->block already, and won't be modified by
16281ad8388SMartin Matuska 			// us anymore.
16381ad8388SMartin Matuska 			++coder->compressed_size;
16481ad8388SMartin Matuska 
16581ad8388SMartin Matuska 			if (in[(*in_pos)++] != 0x00)
16681ad8388SMartin Matuska 				return LZMA_DATA_ERROR;
16781ad8388SMartin Matuska 		}
16881ad8388SMartin Matuska 
16981ad8388SMartin Matuska 		if (coder->block->check == LZMA_CHECK_NONE)
17081ad8388SMartin Matuska 			return LZMA_STREAM_END;
17181ad8388SMartin Matuska 
17253200025SRui Paulo 		if (!coder->ignore_check)
17381ad8388SMartin Matuska 			lzma_check_finish(&coder->check, coder->block->check);
17453200025SRui Paulo 
17581ad8388SMartin Matuska 		coder->sequence = SEQ_CHECK;
17681ad8388SMartin Matuska 
17781ad8388SMartin Matuska 	// Fall through
17881ad8388SMartin Matuska 
17981ad8388SMartin Matuska 	case SEQ_CHECK: {
18081ad8388SMartin Matuska 		const size_t check_size = lzma_check_size(coder->block->check);
18181ad8388SMartin Matuska 		lzma_bufcpy(in, in_pos, in_size, coder->block->raw_check,
18281ad8388SMartin Matuska 				&coder->check_pos, check_size);
18381ad8388SMartin Matuska 		if (coder->check_pos < check_size)
18481ad8388SMartin Matuska 			return LZMA_OK;
18581ad8388SMartin Matuska 
18681ad8388SMartin Matuska 		// Validate the Check only if we support it.
18781ad8388SMartin Matuska 		// coder->check.buffer may be uninitialized
18881ad8388SMartin Matuska 		// when the Check ID is not supported.
18953200025SRui Paulo 		if (!coder->ignore_check
19053200025SRui Paulo 				&& lzma_check_is_supported(coder->block->check)
19181ad8388SMartin Matuska 				&& memcmp(coder->block->raw_check,
19281ad8388SMartin Matuska 					coder->check.buffer.u8,
19381ad8388SMartin Matuska 					check_size) != 0)
19481ad8388SMartin Matuska 			return LZMA_DATA_ERROR;
19581ad8388SMartin Matuska 
19681ad8388SMartin Matuska 		return LZMA_STREAM_END;
19781ad8388SMartin Matuska 	}
19881ad8388SMartin Matuska 	}
19981ad8388SMartin Matuska 
20081ad8388SMartin Matuska 	return LZMA_PROG_ERROR;
20181ad8388SMartin Matuska }
20281ad8388SMartin Matuska 
20381ad8388SMartin Matuska 
20481ad8388SMartin Matuska static void
2051456f0f9SXin LI block_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
20681ad8388SMartin Matuska {
2071456f0f9SXin LI 	lzma_block_coder *coder = coder_ptr;
20881ad8388SMartin Matuska 	lzma_next_end(&coder->next, allocator);
20981ad8388SMartin Matuska 	lzma_free(coder, allocator);
21081ad8388SMartin Matuska 	return;
21181ad8388SMartin Matuska }
21281ad8388SMartin Matuska 
21381ad8388SMartin Matuska 
21481ad8388SMartin Matuska extern lzma_ret
21553200025SRui Paulo lzma_block_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
21681ad8388SMartin Matuska 		lzma_block *block)
21781ad8388SMartin Matuska {
21881ad8388SMartin Matuska 	lzma_next_coder_init(&lzma_block_decoder_init, next, allocator);
21981ad8388SMartin Matuska 
22081ad8388SMartin Matuska 	// Validate the options. lzma_block_unpadded_size() does that for us
22181ad8388SMartin Matuska 	// except for Uncompressed Size and filters. Filters are validated
22281ad8388SMartin Matuska 	// by the raw decoder.
22381ad8388SMartin Matuska 	if (lzma_block_unpadded_size(block) == 0
22481ad8388SMartin Matuska 			|| !lzma_vli_is_valid(block->uncompressed_size))
22581ad8388SMartin Matuska 		return LZMA_PROG_ERROR;
22681ad8388SMartin Matuska 
2271456f0f9SXin LI 	// Allocate *next->coder if needed.
2281456f0f9SXin LI 	lzma_block_coder *coder = next->coder;
2291456f0f9SXin LI 	if (coder == NULL) {
2301456f0f9SXin LI 		coder = lzma_alloc(sizeof(lzma_block_coder), allocator);
2311456f0f9SXin LI 		if (coder == NULL)
23281ad8388SMartin Matuska 			return LZMA_MEM_ERROR;
23381ad8388SMartin Matuska 
2341456f0f9SXin LI 		next->coder = coder;
23581ad8388SMartin Matuska 		next->code = &block_decode;
23681ad8388SMartin Matuska 		next->end = &block_decoder_end;
2371456f0f9SXin LI 		coder->next = LZMA_NEXT_CODER_INIT;
23881ad8388SMartin Matuska 	}
23981ad8388SMartin Matuska 
24081ad8388SMartin Matuska 	// Basic initializations
2411456f0f9SXin LI 	coder->sequence = SEQ_CODE;
2421456f0f9SXin LI 	coder->block = block;
2431456f0f9SXin LI 	coder->compressed_size = 0;
2441456f0f9SXin LI 	coder->uncompressed_size = 0;
24581ad8388SMartin Matuska 
24681ad8388SMartin Matuska 	// If Compressed Size is not known, we calculate the maximum allowed
24781ad8388SMartin Matuska 	// value so that encoded size of the Block (including Block Padding)
24881ad8388SMartin Matuska 	// is still a valid VLI and a multiple of four.
2491456f0f9SXin LI 	coder->compressed_limit
25081ad8388SMartin Matuska 			= block->compressed_size == LZMA_VLI_UNKNOWN
25181ad8388SMartin Matuska 				? (LZMA_VLI_MAX & ~LZMA_VLI_C(3))
25281ad8388SMartin Matuska 					- block->header_size
25381ad8388SMartin Matuska 					- lzma_check_size(block->check)
25481ad8388SMartin Matuska 				: block->compressed_size;
25581ad8388SMartin Matuska 
2569e6bbe47SXin LI 	// With Uncompressed Size this is simpler. If Block Header lacks
2579e6bbe47SXin LI 	// the size info, then LZMA_VLI_MAX is the maximum possible
2589e6bbe47SXin LI 	// Uncompressed Size.
2599e6bbe47SXin LI 	coder->uncompressed_limit
2609e6bbe47SXin LI 			= block->uncompressed_size == LZMA_VLI_UNKNOWN
2619e6bbe47SXin LI 				? LZMA_VLI_MAX
2629e6bbe47SXin LI 				: block->uncompressed_size;
2639e6bbe47SXin LI 
26481ad8388SMartin Matuska 	// Initialize the check. It's caller's problem if the Check ID is not
26581ad8388SMartin Matuska 	// supported, and the Block decoder cannot verify the Check field.
26681ad8388SMartin Matuska 	// Caller can test lzma_check_is_supported(block->check).
2671456f0f9SXin LI 	coder->check_pos = 0;
2681456f0f9SXin LI 	lzma_check_init(&coder->check, block->check);
26981ad8388SMartin Matuska 
2701456f0f9SXin LI 	coder->ignore_check = block->version >= 1
27153200025SRui Paulo 			? block->ignore_check : false;
27253200025SRui Paulo 
27381ad8388SMartin Matuska 	// Initialize the filter chain.
2741456f0f9SXin LI 	return lzma_raw_decoder_init(&coder->next, allocator,
27581ad8388SMartin Matuska 			block->filters);
27681ad8388SMartin Matuska }
27781ad8388SMartin Matuska 
27881ad8388SMartin Matuska 
27981ad8388SMartin Matuska extern LZMA_API(lzma_ret)
28081ad8388SMartin Matuska lzma_block_decoder(lzma_stream *strm, lzma_block *block)
28181ad8388SMartin Matuska {
28281ad8388SMartin Matuska 	lzma_next_strm_init(lzma_block_decoder_init, strm, block);
28381ad8388SMartin Matuska 
28481ad8388SMartin Matuska 	strm->internal->supported_actions[LZMA_RUN] = true;
28581ad8388SMartin Matuska 	strm->internal->supported_actions[LZMA_FINISH] = true;
28681ad8388SMartin Matuska 
28781ad8388SMartin Matuska 	return LZMA_OK;
28881ad8388SMartin Matuska }
289