xref: /freebsd/contrib/xz/src/liblzma/common/block_decoder.c (revision 95eb4b873b6a8b527c5bd78d7191975dfca38998)
1 // SPDX-License-Identifier: 0BSD
2 
3 ///////////////////////////////////////////////////////////////////////////////
4 //
5 /// \file       block_decoder.c
6 /// \brief      Decodes .xz Blocks
7 //
8 //  Author:     Lasse Collin
9 //
10 ///////////////////////////////////////////////////////////////////////////////
11 
12 #include "block_decoder.h"
13 #include "filter_decoder.h"
14 #include "check.h"
15 
16 
17 typedef struct {
18 	enum {
19 		SEQ_CODE,
20 		SEQ_PADDING,
21 		SEQ_CHECK,
22 	} sequence;
23 
24 	/// The filters in the chain; initialized with lzma_raw_decoder_init().
25 	lzma_next_coder next;
26 
27 	/// Decoding options; we also write Compressed Size and Uncompressed
28 	/// Size back to this structure when the decoding has been finished.
29 	lzma_block *block;
30 
31 	/// Compressed Size calculated while decoding
32 	lzma_vli compressed_size;
33 
34 	/// Uncompressed Size calculated while decoding
35 	lzma_vli uncompressed_size;
36 
37 	/// Maximum allowed Compressed Size; this takes into account the
38 	/// size of the Block Header and Check fields when Compressed Size
39 	/// is unknown.
40 	lzma_vli compressed_limit;
41 
42 	/// Maximum allowed Uncompressed Size.
43 	lzma_vli uncompressed_limit;
44 
45 	/// Position when reading the Check field
46 	size_t check_pos;
47 
48 	/// Check of the uncompressed data
49 	lzma_check_state check;
50 
51 	/// True if the integrity check won't be calculated and verified.
52 	bool ignore_check;
53 } lzma_block_coder;
54 
55 
56 static inline bool
57 is_size_valid(lzma_vli size, lzma_vli reference)
58 {
59 	return reference == LZMA_VLI_UNKNOWN || reference == size;
60 }
61 
62 
63 static lzma_ret
64 block_decode(void *coder_ptr, const lzma_allocator *allocator,
65 		const uint8_t *restrict in, size_t *restrict in_pos,
66 		size_t in_size, uint8_t *restrict out,
67 		size_t *restrict out_pos, size_t out_size, lzma_action action)
68 {
69 	lzma_block_coder *coder = coder_ptr;
70 
71 	switch (coder->sequence) {
72 	case SEQ_CODE: {
73 		const size_t in_start = *in_pos;
74 		const size_t out_start = *out_pos;
75 
76 		// Limit the amount of input and output space that we give
77 		// to the raw decoder based on the information we have
78 		// (or don't have) from Block Header.
79 		const size_t in_stop = *in_pos + (size_t)my_min(
80 			in_size - *in_pos,
81 			coder->compressed_limit - coder->compressed_size);
82 		const size_t out_stop = *out_pos + (size_t)my_min(
83 			out_size - *out_pos,
84 			coder->uncompressed_limit - coder->uncompressed_size);
85 
86 		const lzma_ret ret = coder->next.code(coder->next.coder,
87 				allocator, in, in_pos, in_stop,
88 				out, out_pos, out_stop, action);
89 
90 		const size_t in_used = *in_pos - in_start;
91 		const size_t out_used = *out_pos - out_start;
92 
93 		// Because we have limited the input and output sizes,
94 		// we know that these cannot grow too big or overflow.
95 		coder->compressed_size += in_used;
96 		coder->uncompressed_size += out_used;
97 
98 		if (ret == LZMA_OK) {
99 			const bool comp_done = coder->compressed_size
100 					== coder->block->compressed_size;
101 			const bool uncomp_done = coder->uncompressed_size
102 					== coder->block->uncompressed_size;
103 
104 			// If both input and output amounts match the sizes
105 			// in Block Header but we still got LZMA_OK instead
106 			// of LZMA_STREAM_END, the file is broken.
107 			if (comp_done && uncomp_done)
108 				return LZMA_DATA_ERROR;
109 
110 			// If the decoder has consumed all the input that it
111 			// needs but it still couldn't fill the output buffer
112 			// or return LZMA_STREAM_END, the file is broken.
113 			if (comp_done && *out_pos < out_size)
114 				return LZMA_DATA_ERROR;
115 
116 			// If the decoder has produced all the output but
117 			// it still didn't return LZMA_STREAM_END or consume
118 			// more input (for example, detecting an end of
119 			// payload marker may need more input but produce
120 			// no output) the file is broken.
121 			if (uncomp_done && *in_pos < in_size)
122 				return LZMA_DATA_ERROR;
123 		}
124 
125 		// Don't waste time updating the integrity check if it will be
126 		// ignored. Also skip it if no new output was produced. This
127 		// avoids null pointer + 0 (undefined behavior) when out == 0.
128 		if (!coder->ignore_check && out_used > 0)
129 			lzma_check_update(&coder->check, coder->block->check,
130 					out + out_start, out_used);
131 
132 		if (ret != LZMA_STREAM_END)
133 			return ret;
134 
135 		// Compressed and Uncompressed Sizes are now at their final
136 		// values. Verify that they match the values given to us.
137 		if (!is_size_valid(coder->compressed_size,
138 					coder->block->compressed_size)
139 				|| !is_size_valid(coder->uncompressed_size,
140 					coder->block->uncompressed_size))
141 			return LZMA_DATA_ERROR;
142 
143 		// Copy the values into coder->block. The caller
144 		// may use this information to construct Index.
145 		coder->block->compressed_size = coder->compressed_size;
146 		coder->block->uncompressed_size = coder->uncompressed_size;
147 
148 		coder->sequence = SEQ_PADDING;
149 	}
150 
151 	// Fall through
152 
153 	case SEQ_PADDING:
154 		// Compressed Data is padded to a multiple of four bytes.
155 		while (coder->compressed_size & 3) {
156 			if (*in_pos >= in_size)
157 				return LZMA_OK;
158 
159 			// We use compressed_size here just get the Padding
160 			// right. The actual Compressed Size was stored to
161 			// coder->block already, and won't be modified by
162 			// us anymore.
163 			++coder->compressed_size;
164 
165 			if (in[(*in_pos)++] != 0x00)
166 				return LZMA_DATA_ERROR;
167 		}
168 
169 		if (coder->block->check == LZMA_CHECK_NONE)
170 			return LZMA_STREAM_END;
171 
172 		if (!coder->ignore_check)
173 			lzma_check_finish(&coder->check, coder->block->check);
174 
175 		coder->sequence = SEQ_CHECK;
176 
177 	// Fall through
178 
179 	case SEQ_CHECK: {
180 		const size_t check_size = lzma_check_size(coder->block->check);
181 		lzma_bufcpy(in, in_pos, in_size, coder->block->raw_check,
182 				&coder->check_pos, check_size);
183 		if (coder->check_pos < check_size)
184 			return LZMA_OK;
185 
186 		// Validate the Check only if we support it.
187 		// coder->check.buffer may be uninitialized
188 		// when the Check ID is not supported.
189 		if (!coder->ignore_check
190 				&& lzma_check_is_supported(coder->block->check)
191 				&& memcmp(coder->block->raw_check,
192 					coder->check.buffer.u8,
193 					check_size) != 0)
194 			return LZMA_DATA_ERROR;
195 
196 		return LZMA_STREAM_END;
197 	}
198 	}
199 
200 	return LZMA_PROG_ERROR;
201 }
202 
203 
204 static void
205 block_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
206 {
207 	lzma_block_coder *coder = coder_ptr;
208 	lzma_next_end(&coder->next, allocator);
209 	lzma_free(coder, allocator);
210 	return;
211 }
212 
213 
214 extern lzma_ret
215 lzma_block_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
216 		lzma_block *block)
217 {
218 	lzma_next_coder_init(&lzma_block_decoder_init, next, allocator);
219 
220 	// Validate the options. lzma_block_unpadded_size() does that for us
221 	// except for Uncompressed Size and filters. Filters are validated
222 	// by the raw decoder.
223 	if (lzma_block_unpadded_size(block) == 0
224 			|| !lzma_vli_is_valid(block->uncompressed_size))
225 		return LZMA_PROG_ERROR;
226 
227 	// Allocate *next->coder if needed.
228 	lzma_block_coder *coder = next->coder;
229 	if (coder == NULL) {
230 		coder = lzma_alloc(sizeof(lzma_block_coder), allocator);
231 		if (coder == NULL)
232 			return LZMA_MEM_ERROR;
233 
234 		next->coder = coder;
235 		next->code = &block_decode;
236 		next->end = &block_decoder_end;
237 		coder->next = LZMA_NEXT_CODER_INIT;
238 	}
239 
240 	// Basic initializations
241 	coder->sequence = SEQ_CODE;
242 	coder->block = block;
243 	coder->compressed_size = 0;
244 	coder->uncompressed_size = 0;
245 
246 	// If Compressed Size is not known, we calculate the maximum allowed
247 	// value so that encoded size of the Block (including Block Padding)
248 	// is still a valid VLI and a multiple of four.
249 	coder->compressed_limit
250 			= block->compressed_size == LZMA_VLI_UNKNOWN
251 				? (LZMA_VLI_MAX & ~LZMA_VLI_C(3))
252 					- block->header_size
253 					- lzma_check_size(block->check)
254 				: block->compressed_size;
255 
256 	// With Uncompressed Size this is simpler. If Block Header lacks
257 	// the size info, then LZMA_VLI_MAX is the maximum possible
258 	// Uncompressed Size.
259 	coder->uncompressed_limit
260 			= block->uncompressed_size == LZMA_VLI_UNKNOWN
261 				? LZMA_VLI_MAX
262 				: block->uncompressed_size;
263 
264 	// Initialize the check. It's caller's problem if the Check ID is not
265 	// supported, and the Block decoder cannot verify the Check field.
266 	// Caller can test lzma_check_is_supported(block->check).
267 	coder->check_pos = 0;
268 	lzma_check_init(&coder->check, block->check);
269 
270 	coder->ignore_check = block->version >= 1
271 			? block->ignore_check : false;
272 
273 	// Initialize the filter chain.
274 	return lzma_raw_decoder_init(&coder->next, allocator,
275 			block->filters);
276 }
277 
278 
279 extern LZMA_API(lzma_ret)
280 lzma_block_decoder(lzma_stream *strm, lzma_block *block)
281 {
282 	lzma_next_strm_init(lzma_block_decoder_init, strm, block);
283 
284 	strm->internal->supported_actions[LZMA_RUN] = true;
285 	strm->internal->supported_actions[LZMA_FINISH] = true;
286 
287 	return LZMA_OK;
288 }
289