xref: /freebsd/contrib/xz/src/liblzma/common/index_decoder.c (revision 4fefe1b763573c873bf3dbf3b6f28c22de0ffada)
1 // SPDX-License-Identifier: 0BSD
2 
3 ///////////////////////////////////////////////////////////////////////////////
4 //
5 /// \file       index_decoder.c
6 /// \brief      Decodes the Index field
7 //
8 //  Author:     Lasse Collin
9 //
10 ///////////////////////////////////////////////////////////////////////////////
11 
12 #include "index_decoder.h"
13 #include "check.h"
14 
15 
16 typedef struct {
17 	enum {
18 		SEQ_INDICATOR,
19 		SEQ_COUNT,
20 		SEQ_MEMUSAGE,
21 		SEQ_UNPADDED,
22 		SEQ_UNCOMPRESSED,
23 		SEQ_PADDING_INIT,
24 		SEQ_PADDING,
25 		SEQ_CRC32,
26 	} sequence;
27 
28 	/// Memory usage limit
29 	uint64_t memlimit;
30 
31 	/// Target Index
32 	lzma_index *index;
33 
34 	/// Pointer give by the application, which is set after
35 	/// successful decoding.
36 	lzma_index **index_ptr;
37 
38 	/// Number of Records left to decode.
39 	lzma_vli count;
40 
41 	/// The most recent Unpadded Size field
42 	lzma_vli unpadded_size;
43 
44 	/// The most recent Uncompressed Size field
45 	lzma_vli uncompressed_size;
46 
47 	/// Position in integers
48 	size_t pos;
49 
50 	/// CRC32 of the List of Records field
51 	uint32_t crc32;
52 } lzma_index_coder;
53 
54 
55 static lzma_ret
56 index_decode(void *coder_ptr, const lzma_allocator *allocator,
57 		const uint8_t *restrict in, size_t *restrict in_pos,
58 		size_t in_size,
59 		uint8_t *restrict out lzma_attribute((__unused__)),
60 		size_t *restrict out_pos lzma_attribute((__unused__)),
61 		size_t out_size lzma_attribute((__unused__)),
62 		lzma_action action lzma_attribute((__unused__)))
63 {
64 	lzma_index_coder *coder = coder_ptr;
65 
66 	// Similar optimization as in index_encoder.c
67 	const size_t in_start = *in_pos;
68 	lzma_ret ret = LZMA_OK;
69 
70 	while (*in_pos < in_size)
71 	switch (coder->sequence) {
72 	case SEQ_INDICATOR:
73 		// Return LZMA_DATA_ERROR instead of e.g. LZMA_PROG_ERROR or
74 		// LZMA_FORMAT_ERROR, because a typical usage case for Index
75 		// decoder is when parsing the Stream backwards. If seeking
76 		// backward from the Stream Footer gives us something that
77 		// doesn't begin with Index Indicator, the file is considered
78 		// corrupt, not "programming error" or "unrecognized file
79 		// format". One could argue that the application should
80 		// verify the Index Indicator before trying to decode the
81 		// Index, but well, I suppose it is simpler this way.
82 		if (in[(*in_pos)++] != INDEX_INDICATOR)
83 			return LZMA_DATA_ERROR;
84 
85 		coder->sequence = SEQ_COUNT;
86 		break;
87 
88 	case SEQ_COUNT:
89 		ret = lzma_vli_decode(&coder->count, &coder->pos,
90 				in, in_pos, in_size);
91 		if (ret != LZMA_STREAM_END)
92 			goto out;
93 
94 		coder->pos = 0;
95 		coder->sequence = SEQ_MEMUSAGE;
96 
97 	// Fall through
98 
99 	case SEQ_MEMUSAGE:
100 		if (lzma_index_memusage(1, coder->count) > coder->memlimit) {
101 			ret = LZMA_MEMLIMIT_ERROR;
102 			goto out;
103 		}
104 
105 		// Tell the Index handling code how many Records this
106 		// Index has to allow it to allocate memory more efficiently.
107 		lzma_index_prealloc(coder->index, coder->count);
108 
109 		ret = LZMA_OK;
110 		coder->sequence = coder->count == 0
111 				? SEQ_PADDING_INIT : SEQ_UNPADDED;
112 		break;
113 
114 	case SEQ_UNPADDED:
115 	case SEQ_UNCOMPRESSED: {
116 		lzma_vli *size = coder->sequence == SEQ_UNPADDED
117 				? &coder->unpadded_size
118 				: &coder->uncompressed_size;
119 
120 		ret = lzma_vli_decode(size, &coder->pos,
121 				in, in_pos, in_size);
122 		if (ret != LZMA_STREAM_END)
123 			goto out;
124 
125 		ret = LZMA_OK;
126 		coder->pos = 0;
127 
128 		if (coder->sequence == SEQ_UNPADDED) {
129 			// Validate that encoded Unpadded Size isn't too small
130 			// or too big.
131 			if (coder->unpadded_size < UNPADDED_SIZE_MIN
132 					|| coder->unpadded_size
133 						> UNPADDED_SIZE_MAX)
134 				return LZMA_DATA_ERROR;
135 
136 			coder->sequence = SEQ_UNCOMPRESSED;
137 		} else {
138 			// Add the decoded Record to the Index.
139 			return_if_error(lzma_index_append(
140 					coder->index, allocator,
141 					coder->unpadded_size,
142 					coder->uncompressed_size));
143 
144 			// Check if this was the last Record.
145 			coder->sequence = --coder->count == 0
146 					? SEQ_PADDING_INIT
147 					: SEQ_UNPADDED;
148 		}
149 
150 		break;
151 	}
152 
153 	case SEQ_PADDING_INIT:
154 		coder->pos = lzma_index_padding_size(coder->index);
155 		coder->sequence = SEQ_PADDING;
156 
157 	// Fall through
158 
159 	case SEQ_PADDING:
160 		if (coder->pos > 0) {
161 			--coder->pos;
162 			if (in[(*in_pos)++] != 0x00)
163 				return LZMA_DATA_ERROR;
164 
165 			break;
166 		}
167 
168 		// Finish the CRC32 calculation.
169 		coder->crc32 = lzma_crc32(in + in_start,
170 				*in_pos - in_start, coder->crc32);
171 
172 		coder->sequence = SEQ_CRC32;
173 
174 	// Fall through
175 
176 	case SEQ_CRC32:
177 		do {
178 			if (*in_pos == in_size)
179 				return LZMA_OK;
180 
181 			if (((coder->crc32 >> (coder->pos * 8)) & 0xFF)
182 					!= in[(*in_pos)++]) {
183 #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
184 				return LZMA_DATA_ERROR;
185 #endif
186 			}
187 
188 		} while (++coder->pos < 4);
189 
190 		// Decoding was successful, now we can let the application
191 		// see the decoded Index.
192 		*coder->index_ptr = coder->index;
193 
194 		// Make index NULL so we don't free it unintentionally.
195 		coder->index = NULL;
196 
197 		return LZMA_STREAM_END;
198 
199 	default:
200 		assert(0);
201 		return LZMA_PROG_ERROR;
202 	}
203 
204 out:
205 	// Update the CRC32.
206 	//
207 	// Avoid null pointer + 0 (undefined behavior) in "in + in_start".
208 	// In such a case we had no input and thus in_used == 0.
209 	{
210 		const size_t in_used = *in_pos - in_start;
211 		if (in_used > 0)
212 			coder->crc32 = lzma_crc32(in + in_start,
213 					in_used, coder->crc32);
214 	}
215 
216 	return ret;
217 }
218 
219 
220 static void
221 index_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
222 {
223 	lzma_index_coder *coder = coder_ptr;
224 	lzma_index_end(coder->index, allocator);
225 	lzma_free(coder, allocator);
226 	return;
227 }
228 
229 
230 static lzma_ret
231 index_decoder_memconfig(void *coder_ptr, uint64_t *memusage,
232 		uint64_t *old_memlimit, uint64_t new_memlimit)
233 {
234 	lzma_index_coder *coder = coder_ptr;
235 
236 	*memusage = lzma_index_memusage(1, coder->count);
237 	*old_memlimit = coder->memlimit;
238 
239 	if (new_memlimit != 0) {
240 		if (new_memlimit < *memusage)
241 			return LZMA_MEMLIMIT_ERROR;
242 
243 		coder->memlimit = new_memlimit;
244 	}
245 
246 	return LZMA_OK;
247 }
248 
249 
250 static lzma_ret
251 index_decoder_reset(lzma_index_coder *coder, const lzma_allocator *allocator,
252 		lzma_index **i, uint64_t memlimit)
253 {
254 	// Remember the pointer given by the application. We will set it
255 	// to point to the decoded Index only if decoding is successful.
256 	// Before that, keep it NULL so that applications can always safely
257 	// pass it to lzma_index_end() no matter did decoding succeed or not.
258 	coder->index_ptr = i;
259 	*i = NULL;
260 
261 	// We always allocate a new lzma_index.
262 	coder->index = lzma_index_init(allocator);
263 	if (coder->index == NULL)
264 		return LZMA_MEM_ERROR;
265 
266 	// Initialize the rest.
267 	coder->sequence = SEQ_INDICATOR;
268 	coder->memlimit = my_max(1, memlimit);
269 	coder->count = 0; // Needs to be initialized due to _memconfig().
270 	coder->pos = 0;
271 	coder->crc32 = 0;
272 
273 	return LZMA_OK;
274 }
275 
276 
277 extern lzma_ret
278 lzma_index_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
279 		lzma_index **i, uint64_t memlimit)
280 {
281 	lzma_next_coder_init(&lzma_index_decoder_init, next, allocator);
282 
283 	if (i == NULL)
284 		return LZMA_PROG_ERROR;
285 
286 	lzma_index_coder *coder = next->coder;
287 	if (coder == NULL) {
288 		coder = lzma_alloc(sizeof(lzma_index_coder), allocator);
289 		if (coder == NULL)
290 			return LZMA_MEM_ERROR;
291 
292 		next->coder = coder;
293 		next->code = &index_decode;
294 		next->end = &index_decoder_end;
295 		next->memconfig = &index_decoder_memconfig;
296 		coder->index = NULL;
297 	} else {
298 		lzma_index_end(coder->index, allocator);
299 	}
300 
301 	return index_decoder_reset(coder, allocator, i, memlimit);
302 }
303 
304 
305 extern LZMA_API(lzma_ret)
306 lzma_index_decoder(lzma_stream *strm, lzma_index **i, uint64_t memlimit)
307 {
308 	lzma_next_strm_init(lzma_index_decoder_init, strm, i, memlimit);
309 
310 	strm->internal->supported_actions[LZMA_RUN] = true;
311 	strm->internal->supported_actions[LZMA_FINISH] = true;
312 
313 	return LZMA_OK;
314 }
315 
316 
317 extern LZMA_API(lzma_ret)
318 lzma_index_buffer_decode(lzma_index **i, uint64_t *memlimit,
319 		const lzma_allocator *allocator,
320 		const uint8_t *in, size_t *in_pos, size_t in_size)
321 {
322 	// Sanity checks
323 	if (i == NULL || memlimit == NULL
324 			|| in == NULL || in_pos == NULL || *in_pos > in_size)
325 		return LZMA_PROG_ERROR;
326 
327 	// Initialize the decoder.
328 	lzma_index_coder coder;
329 	return_if_error(index_decoder_reset(&coder, allocator, i, *memlimit));
330 
331 	// Store the input start position so that we can restore it in case
332 	// of an error.
333 	const size_t in_start = *in_pos;
334 
335 	// Do the actual decoding.
336 	lzma_ret ret = index_decode(&coder, allocator, in, in_pos, in_size,
337 			NULL, NULL, 0, LZMA_RUN);
338 
339 	if (ret == LZMA_STREAM_END) {
340 		ret = LZMA_OK;
341 	} else {
342 		// Something went wrong, free the Index structure and restore
343 		// the input position.
344 		lzma_index_end(coder.index, allocator);
345 		*in_pos = in_start;
346 
347 		if (ret == LZMA_OK) {
348 			// The input is truncated or otherwise corrupt.
349 			// Use LZMA_DATA_ERROR instead of LZMA_BUF_ERROR
350 			// like lzma_vli_decode() does in single-call mode.
351 			ret = LZMA_DATA_ERROR;
352 
353 		} else if (ret == LZMA_MEMLIMIT_ERROR) {
354 			// Tell the caller how much memory would have
355 			// been needed.
356 			*memlimit = lzma_index_memusage(1, coder.count);
357 		}
358 	}
359 
360 	return ret;
361 }
362