xref: /freebsd/contrib/xz/src/liblzma/common/index_decoder.c (revision 128836d304d93f2d00eb14069c27089ab46c38d4)
1 // SPDX-License-Identifier: 0BSD
2 
3 ///////////////////////////////////////////////////////////////////////////////
4 //
5 /// \file       index_decoder.c
6 /// \brief      Decodes the Index field
7 //
8 //  Author:     Lasse Collin
9 //
10 ///////////////////////////////////////////////////////////////////////////////
11 
12 #include "index_decoder.h"
13 #include "check.h"
14 
15 
16 typedef struct {
17 	enum {
18 		SEQ_INDICATOR,
19 		SEQ_COUNT,
20 		SEQ_MEMUSAGE,
21 		SEQ_UNPADDED,
22 		SEQ_UNCOMPRESSED,
23 		SEQ_PADDING_INIT,
24 		SEQ_PADDING,
25 		SEQ_CRC32,
26 	} sequence;
27 
28 	/// Memory usage limit
29 	uint64_t memlimit;
30 
31 	/// Target Index
32 	lzma_index *index;
33 
34 	/// Pointer give by the application, which is set after
35 	/// successful decoding.
36 	lzma_index **index_ptr;
37 
38 	/// Number of Records left to decode.
39 	lzma_vli count;
40 
41 	/// The most recent Unpadded Size field
42 	lzma_vli unpadded_size;
43 
44 	/// The most recent Uncompressed Size field
45 	lzma_vli uncompressed_size;
46 
47 	/// Position in integers
48 	size_t pos;
49 
50 	/// CRC32 of the List of Records field
51 	uint32_t crc32;
52 } lzma_index_coder;
53 
54 
55 static lzma_ret
index_decode(void * coder_ptr,const lzma_allocator * allocator,const uint8_t * restrict in,size_t * restrict in_pos,size_t in_size,uint8_t * restrict out lzma_attribute ((__unused__)),size_t * restrict out_pos lzma_attribute ((__unused__)),size_t out_size lzma_attribute ((__unused__)),lzma_action action lzma_attribute ((__unused__)))56 index_decode(void *coder_ptr, const lzma_allocator *allocator,
57 		const uint8_t *restrict in, size_t *restrict in_pos,
58 		size_t in_size,
59 		uint8_t *restrict out lzma_attribute((__unused__)),
60 		size_t *restrict out_pos lzma_attribute((__unused__)),
61 		size_t out_size lzma_attribute((__unused__)),
62 		lzma_action action lzma_attribute((__unused__)))
63 {
64 	lzma_index_coder *coder = coder_ptr;
65 
66 	// Similar optimization as in index_encoder.c
67 	const size_t in_start = *in_pos;
68 	lzma_ret ret = LZMA_OK;
69 
70 	while (*in_pos < in_size)
71 	switch (coder->sequence) {
72 	case SEQ_INDICATOR:
73 		// Return LZMA_DATA_ERROR instead of e.g. LZMA_PROG_ERROR or
74 		// LZMA_FORMAT_ERROR, because a typical usage case for Index
75 		// decoder is when parsing the Stream backwards. If seeking
76 		// backward from the Stream Footer gives us something that
77 		// doesn't begin with Index Indicator, the file is considered
78 		// corrupt, not "programming error" or "unrecognized file
79 		// format". One could argue that the application should
80 		// verify the Index Indicator before trying to decode the
81 		// Index, but well, I suppose it is simpler this way.
82 		if (in[(*in_pos)++] != INDEX_INDICATOR)
83 			return LZMA_DATA_ERROR;
84 
85 		coder->sequence = SEQ_COUNT;
86 		break;
87 
88 	case SEQ_COUNT:
89 		ret = lzma_vli_decode(&coder->count, &coder->pos,
90 				in, in_pos, in_size);
91 		if (ret != LZMA_STREAM_END)
92 			goto out;
93 
94 		coder->pos = 0;
95 		coder->sequence = SEQ_MEMUSAGE;
96 		FALLTHROUGH;
97 
98 	case SEQ_MEMUSAGE:
99 		if (lzma_index_memusage(1, coder->count) > coder->memlimit) {
100 			ret = LZMA_MEMLIMIT_ERROR;
101 			goto out;
102 		}
103 
104 		// Tell the Index handling code how many Records this
105 		// Index has to allow it to allocate memory more efficiently.
106 		lzma_index_prealloc(coder->index, coder->count);
107 
108 		ret = LZMA_OK;
109 		coder->sequence = coder->count == 0
110 				? SEQ_PADDING_INIT : SEQ_UNPADDED;
111 		break;
112 
113 	case SEQ_UNPADDED:
114 	case SEQ_UNCOMPRESSED: {
115 		lzma_vli *size = coder->sequence == SEQ_UNPADDED
116 				? &coder->unpadded_size
117 				: &coder->uncompressed_size;
118 
119 		ret = lzma_vli_decode(size, &coder->pos,
120 				in, in_pos, in_size);
121 		if (ret != LZMA_STREAM_END)
122 			goto out;
123 
124 		ret = LZMA_OK;
125 		coder->pos = 0;
126 
127 		if (coder->sequence == SEQ_UNPADDED) {
128 			// Validate that encoded Unpadded Size isn't too small
129 			// or too big.
130 			if (coder->unpadded_size < UNPADDED_SIZE_MIN
131 					|| coder->unpadded_size
132 						> UNPADDED_SIZE_MAX)
133 				return LZMA_DATA_ERROR;
134 
135 			coder->sequence = SEQ_UNCOMPRESSED;
136 		} else {
137 			// Add the decoded Record to the Index.
138 			return_if_error(lzma_index_append(
139 					coder->index, allocator,
140 					coder->unpadded_size,
141 					coder->uncompressed_size));
142 
143 			// Check if this was the last Record.
144 			coder->sequence = --coder->count == 0
145 					? SEQ_PADDING_INIT
146 					: SEQ_UNPADDED;
147 		}
148 
149 		break;
150 	}
151 
152 	case SEQ_PADDING_INIT:
153 		coder->pos = lzma_index_padding_size(coder->index);
154 		coder->sequence = SEQ_PADDING;
155 		FALLTHROUGH;
156 
157 	case SEQ_PADDING:
158 		if (coder->pos > 0) {
159 			--coder->pos;
160 			if (in[(*in_pos)++] != 0x00)
161 				return LZMA_DATA_ERROR;
162 
163 			break;
164 		}
165 
166 		// Finish the CRC32 calculation.
167 		coder->crc32 = lzma_crc32(in + in_start,
168 				*in_pos - in_start, coder->crc32);
169 
170 		coder->sequence = SEQ_CRC32;
171 		FALLTHROUGH;
172 
173 	case SEQ_CRC32:
174 		do {
175 			if (*in_pos == in_size)
176 				return LZMA_OK;
177 
178 			if (((coder->crc32 >> (coder->pos * 8)) & 0xFF)
179 					!= in[(*in_pos)++]) {
180 #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
181 				return LZMA_DATA_ERROR;
182 #endif
183 			}
184 
185 		} while (++coder->pos < 4);
186 
187 		// Decoding was successful, now we can let the application
188 		// see the decoded Index.
189 		*coder->index_ptr = coder->index;
190 
191 		// Make index NULL so we don't free it unintentionally.
192 		coder->index = NULL;
193 
194 		return LZMA_STREAM_END;
195 
196 	default:
197 		assert(0);
198 		return LZMA_PROG_ERROR;
199 	}
200 
201 out:
202 	// Update the CRC32.
203 	//
204 	// Avoid null pointer + 0 (undefined behavior) in "in + in_start".
205 	// In such a case we had no input and thus in_used == 0.
206 	{
207 		const size_t in_used = *in_pos - in_start;
208 		if (in_used > 0)
209 			coder->crc32 = lzma_crc32(in + in_start,
210 					in_used, coder->crc32);
211 	}
212 
213 	return ret;
214 }
215 
216 
217 static void
index_decoder_end(void * coder_ptr,const lzma_allocator * allocator)218 index_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
219 {
220 	lzma_index_coder *coder = coder_ptr;
221 	lzma_index_end(coder->index, allocator);
222 	lzma_free(coder, allocator);
223 	return;
224 }
225 
226 
227 static lzma_ret
index_decoder_memconfig(void * coder_ptr,uint64_t * memusage,uint64_t * old_memlimit,uint64_t new_memlimit)228 index_decoder_memconfig(void *coder_ptr, uint64_t *memusage,
229 		uint64_t *old_memlimit, uint64_t new_memlimit)
230 {
231 	lzma_index_coder *coder = coder_ptr;
232 
233 	*memusage = lzma_index_memusage(1, coder->count);
234 	*old_memlimit = coder->memlimit;
235 
236 	if (new_memlimit != 0) {
237 		if (new_memlimit < *memusage)
238 			return LZMA_MEMLIMIT_ERROR;
239 
240 		coder->memlimit = new_memlimit;
241 	}
242 
243 	return LZMA_OK;
244 }
245 
246 
247 static lzma_ret
index_decoder_reset(lzma_index_coder * coder,const lzma_allocator * allocator,lzma_index ** i,uint64_t memlimit)248 index_decoder_reset(lzma_index_coder *coder, const lzma_allocator *allocator,
249 		lzma_index **i, uint64_t memlimit)
250 {
251 	// Remember the pointer given by the application. We will set it
252 	// to point to the decoded Index only if decoding is successful.
253 	// Before that, keep it NULL so that applications can always safely
254 	// pass it to lzma_index_end() no matter did decoding succeed or not.
255 	coder->index_ptr = i;
256 	*i = NULL;
257 
258 	// We always allocate a new lzma_index.
259 	coder->index = lzma_index_init(allocator);
260 	if (coder->index == NULL)
261 		return LZMA_MEM_ERROR;
262 
263 	// Initialize the rest.
264 	coder->sequence = SEQ_INDICATOR;
265 	coder->memlimit = my_max(1, memlimit);
266 	coder->count = 0; // Needs to be initialized due to _memconfig().
267 	coder->pos = 0;
268 	coder->crc32 = 0;
269 
270 	return LZMA_OK;
271 }
272 
273 
274 extern lzma_ret
lzma_index_decoder_init(lzma_next_coder * next,const lzma_allocator * allocator,lzma_index ** i,uint64_t memlimit)275 lzma_index_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
276 		lzma_index **i, uint64_t memlimit)
277 {
278 	lzma_next_coder_init(&lzma_index_decoder_init, next, allocator);
279 
280 	if (i == NULL)
281 		return LZMA_PROG_ERROR;
282 
283 	lzma_index_coder *coder = next->coder;
284 	if (coder == NULL) {
285 		coder = lzma_alloc(sizeof(lzma_index_coder), allocator);
286 		if (coder == NULL)
287 			return LZMA_MEM_ERROR;
288 
289 		next->coder = coder;
290 		next->code = &index_decode;
291 		next->end = &index_decoder_end;
292 		next->memconfig = &index_decoder_memconfig;
293 		coder->index = NULL;
294 	} else {
295 		lzma_index_end(coder->index, allocator);
296 	}
297 
298 	return index_decoder_reset(coder, allocator, i, memlimit);
299 }
300 
301 
302 extern LZMA_API(lzma_ret)
lzma_index_decoder(lzma_stream * strm,lzma_index ** i,uint64_t memlimit)303 lzma_index_decoder(lzma_stream *strm, lzma_index **i, uint64_t memlimit)
304 {
305 	// If i isn't NULL, *i must always be initialized due to
306 	// the wording in the API docs. This way it is initialized
307 	// if we return LZMA_PROG_ERROR due to strm == NULL.
308 	if (i != NULL)
309 		*i = NULL;
310 
311 	lzma_next_strm_init(lzma_index_decoder_init, strm, i, memlimit);
312 
313 	strm->internal->supported_actions[LZMA_RUN] = true;
314 	strm->internal->supported_actions[LZMA_FINISH] = true;
315 
316 	return LZMA_OK;
317 }
318 
319 
320 extern LZMA_API(lzma_ret)
lzma_index_buffer_decode(lzma_index ** i,uint64_t * memlimit,const lzma_allocator * allocator,const uint8_t * in,size_t * in_pos,size_t in_size)321 lzma_index_buffer_decode(lzma_index **i, uint64_t *memlimit,
322 		const lzma_allocator *allocator,
323 		const uint8_t *in, size_t *in_pos, size_t in_size)
324 {
325 	// If i isn't NULL, *i must always be initialized due to
326 	// the wording in the API docs.
327 	if (i != NULL)
328 		*i = NULL;
329 
330 	// Sanity checks
331 	if (i == NULL || memlimit == NULL
332 			|| in == NULL || in_pos == NULL || *in_pos > in_size)
333 		return LZMA_PROG_ERROR;
334 
335 	// Initialize the decoder.
336 	lzma_index_coder coder;
337 	return_if_error(index_decoder_reset(&coder, allocator, i, *memlimit));
338 
339 	// Store the input start position so that we can restore it in case
340 	// of an error.
341 	const size_t in_start = *in_pos;
342 
343 	// Do the actual decoding.
344 	lzma_ret ret = index_decode(&coder, allocator, in, in_pos, in_size,
345 			NULL, NULL, 0, LZMA_RUN);
346 
347 	if (ret == LZMA_STREAM_END) {
348 		ret = LZMA_OK;
349 	} else {
350 		// Something went wrong, free the Index structure and restore
351 		// the input position.
352 		lzma_index_end(coder.index, allocator);
353 		*in_pos = in_start;
354 
355 		if (ret == LZMA_OK) {
356 			// The input is truncated or otherwise corrupt.
357 			// Use LZMA_DATA_ERROR instead of LZMA_BUF_ERROR
358 			// like lzma_vli_decode() does in single-call mode.
359 			ret = LZMA_DATA_ERROR;
360 
361 		} else if (ret == LZMA_MEMLIMIT_ERROR) {
362 			// Tell the caller how much memory would have
363 			// been needed.
364 			*memlimit = lzma_index_memusage(1, coder.count);
365 		}
366 	}
367 
368 	return ret;
369 }
370