xref: /freebsd/contrib/xz/src/liblzma/common/index_decoder.c (revision 51015e6d0f570239b0c2088dc6cf2b018928375d)
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       index_decoder.c
4 /// \brief      Decodes the Index field
5 //
6 //  Author:     Lasse Collin
7 //
8 //  This file has been put into the public domain.
9 //  You can do whatever you want with this file.
10 //
11 ///////////////////////////////////////////////////////////////////////////////
12 
13 #include "index_decoder.h"
14 #include "check.h"
15 
16 
17 typedef struct {
18 	enum {
19 		SEQ_INDICATOR,
20 		SEQ_COUNT,
21 		SEQ_MEMUSAGE,
22 		SEQ_UNPADDED,
23 		SEQ_UNCOMPRESSED,
24 		SEQ_PADDING_INIT,
25 		SEQ_PADDING,
26 		SEQ_CRC32,
27 	} sequence;
28 
29 	/// Memory usage limit
30 	uint64_t memlimit;
31 
32 	/// Target Index
33 	lzma_index *index;
34 
35 	/// Pointer give by the application, which is set after
36 	/// successful decoding.
37 	lzma_index **index_ptr;
38 
39 	/// Number of Records left to decode.
40 	lzma_vli count;
41 
42 	/// The most recent Unpadded Size field
43 	lzma_vli unpadded_size;
44 
45 	/// The most recent Uncompressed Size field
46 	lzma_vli uncompressed_size;
47 
48 	/// Position in integers
49 	size_t pos;
50 
51 	/// CRC32 of the List of Records field
52 	uint32_t crc32;
53 } lzma_index_coder;
54 
55 
56 static lzma_ret
57 index_decode(void *coder_ptr, const lzma_allocator *allocator,
58 		const uint8_t *restrict in, size_t *restrict in_pos,
59 		size_t in_size,
60 		uint8_t *restrict out lzma_attribute((__unused__)),
61 		size_t *restrict out_pos lzma_attribute((__unused__)),
62 		size_t out_size lzma_attribute((__unused__)),
63 		lzma_action action lzma_attribute((__unused__)))
64 {
65 	lzma_index_coder *coder = coder_ptr;
66 
67 	// Similar optimization as in index_encoder.c
68 	const size_t in_start = *in_pos;
69 	lzma_ret ret = LZMA_OK;
70 
71 	while (*in_pos < in_size)
72 	switch (coder->sequence) {
73 	case SEQ_INDICATOR:
74 		// Return LZMA_DATA_ERROR instead of e.g. LZMA_PROG_ERROR or
75 		// LZMA_FORMAT_ERROR, because a typical usage case for Index
76 		// decoder is when parsing the Stream backwards. If seeking
77 		// backward from the Stream Footer gives us something that
78 		// doesn't begin with Index Indicator, the file is considered
79 		// corrupt, not "programming error" or "unrecognized file
80 		// format". One could argue that the application should
81 		// verify the Index Indicator before trying to decode the
82 		// Index, but well, I suppose it is simpler this way.
83 		if (in[(*in_pos)++] != INDEX_INDICATOR)
84 			return LZMA_DATA_ERROR;
85 
86 		coder->sequence = SEQ_COUNT;
87 		break;
88 
89 	case SEQ_COUNT:
90 		ret = lzma_vli_decode(&coder->count, &coder->pos,
91 				in, in_pos, in_size);
92 		if (ret != LZMA_STREAM_END)
93 			goto out;
94 
95 		coder->pos = 0;
96 		coder->sequence = SEQ_MEMUSAGE;
97 
98 	// Fall through
99 
100 	case SEQ_MEMUSAGE:
101 		if (lzma_index_memusage(1, coder->count) > coder->memlimit) {
102 			ret = LZMA_MEMLIMIT_ERROR;
103 			goto out;
104 		}
105 
106 		// Tell the Index handling code how many Records this
107 		// Index has to allow it to allocate memory more efficiently.
108 		lzma_index_prealloc(coder->index, coder->count);
109 
110 		ret = LZMA_OK;
111 		coder->sequence = coder->count == 0
112 				? SEQ_PADDING_INIT : SEQ_UNPADDED;
113 		break;
114 
115 	case SEQ_UNPADDED:
116 	case SEQ_UNCOMPRESSED: {
117 		lzma_vli *size = coder->sequence == SEQ_UNPADDED
118 				? &coder->unpadded_size
119 				: &coder->uncompressed_size;
120 
121 		ret = lzma_vli_decode(size, &coder->pos,
122 				in, in_pos, in_size);
123 		if (ret != LZMA_STREAM_END)
124 			goto out;
125 
126 		ret = LZMA_OK;
127 		coder->pos = 0;
128 
129 		if (coder->sequence == SEQ_UNPADDED) {
130 			// Validate that encoded Unpadded Size isn't too small
131 			// or too big.
132 			if (coder->unpadded_size < UNPADDED_SIZE_MIN
133 					|| coder->unpadded_size
134 						> UNPADDED_SIZE_MAX)
135 				return LZMA_DATA_ERROR;
136 
137 			coder->sequence = SEQ_UNCOMPRESSED;
138 		} else {
139 			// Add the decoded Record to the Index.
140 			return_if_error(lzma_index_append(
141 					coder->index, allocator,
142 					coder->unpadded_size,
143 					coder->uncompressed_size));
144 
145 			// Check if this was the last Record.
146 			coder->sequence = --coder->count == 0
147 					? SEQ_PADDING_INIT
148 					: SEQ_UNPADDED;
149 		}
150 
151 		break;
152 	}
153 
154 	case SEQ_PADDING_INIT:
155 		coder->pos = lzma_index_padding_size(coder->index);
156 		coder->sequence = SEQ_PADDING;
157 
158 	// Fall through
159 
160 	case SEQ_PADDING:
161 		if (coder->pos > 0) {
162 			--coder->pos;
163 			if (in[(*in_pos)++] != 0x00)
164 				return LZMA_DATA_ERROR;
165 
166 			break;
167 		}
168 
169 		// Finish the CRC32 calculation.
170 		coder->crc32 = lzma_crc32(in + in_start,
171 				*in_pos - in_start, coder->crc32);
172 
173 		coder->sequence = SEQ_CRC32;
174 
175 	// Fall through
176 
177 	case SEQ_CRC32:
178 		do {
179 			if (*in_pos == in_size)
180 				return LZMA_OK;
181 
182 			if (((coder->crc32 >> (coder->pos * 8)) & 0xFF)
183 					!= in[(*in_pos)++]) {
184 #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
185 				return LZMA_DATA_ERROR;
186 #endif
187 			}
188 
189 		} while (++coder->pos < 4);
190 
191 		// Decoding was successful, now we can let the application
192 		// see the decoded Index.
193 		*coder->index_ptr = coder->index;
194 
195 		// Make index NULL so we don't free it unintentionally.
196 		coder->index = NULL;
197 
198 		return LZMA_STREAM_END;
199 
200 	default:
201 		assert(0);
202 		return LZMA_PROG_ERROR;
203 	}
204 
205 out:
206 	// Update the CRC32,
207 	coder->crc32 = lzma_crc32(in + in_start,
208 			*in_pos - in_start, coder->crc32);
209 
210 	return ret;
211 }
212 
213 
214 static void
215 index_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
216 {
217 	lzma_index_coder *coder = coder_ptr;
218 	lzma_index_end(coder->index, allocator);
219 	lzma_free(coder, allocator);
220 	return;
221 }
222 
223 
224 static lzma_ret
225 index_decoder_memconfig(void *coder_ptr, uint64_t *memusage,
226 		uint64_t *old_memlimit, uint64_t new_memlimit)
227 {
228 	lzma_index_coder *coder = coder_ptr;
229 
230 	*memusage = lzma_index_memusage(1, coder->count);
231 	*old_memlimit = coder->memlimit;
232 
233 	if (new_memlimit != 0) {
234 		if (new_memlimit < *memusage)
235 			return LZMA_MEMLIMIT_ERROR;
236 
237 		coder->memlimit = new_memlimit;
238 	}
239 
240 	return LZMA_OK;
241 }
242 
243 
244 static lzma_ret
245 index_decoder_reset(lzma_index_coder *coder, const lzma_allocator *allocator,
246 		lzma_index **i, uint64_t memlimit)
247 {
248 	// Remember the pointer given by the application. We will set it
249 	// to point to the decoded Index only if decoding is successful.
250 	// Before that, keep it NULL so that applications can always safely
251 	// pass it to lzma_index_end() no matter did decoding succeed or not.
252 	coder->index_ptr = i;
253 	*i = NULL;
254 
255 	// We always allocate a new lzma_index.
256 	coder->index = lzma_index_init(allocator);
257 	if (coder->index == NULL)
258 		return LZMA_MEM_ERROR;
259 
260 	// Initialize the rest.
261 	coder->sequence = SEQ_INDICATOR;
262 	coder->memlimit = my_max(1, memlimit);
263 	coder->count = 0; // Needs to be initialized due to _memconfig().
264 	coder->pos = 0;
265 	coder->crc32 = 0;
266 
267 	return LZMA_OK;
268 }
269 
270 
271 extern lzma_ret
272 lzma_index_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
273 		lzma_index **i, uint64_t memlimit)
274 {
275 	lzma_next_coder_init(&lzma_index_decoder_init, next, allocator);
276 
277 	if (i == NULL)
278 		return LZMA_PROG_ERROR;
279 
280 	lzma_index_coder *coder = next->coder;
281 	if (coder == NULL) {
282 		coder = lzma_alloc(sizeof(lzma_index_coder), allocator);
283 		if (coder == NULL)
284 			return LZMA_MEM_ERROR;
285 
286 		next->coder = coder;
287 		next->code = &index_decode;
288 		next->end = &index_decoder_end;
289 		next->memconfig = &index_decoder_memconfig;
290 		coder->index = NULL;
291 	} else {
292 		lzma_index_end(coder->index, allocator);
293 	}
294 
295 	return index_decoder_reset(coder, allocator, i, memlimit);
296 }
297 
298 
299 extern LZMA_API(lzma_ret)
300 lzma_index_decoder(lzma_stream *strm, lzma_index **i, uint64_t memlimit)
301 {
302 	lzma_next_strm_init(lzma_index_decoder_init, strm, i, memlimit);
303 
304 	strm->internal->supported_actions[LZMA_RUN] = true;
305 	strm->internal->supported_actions[LZMA_FINISH] = true;
306 
307 	return LZMA_OK;
308 }
309 
310 
311 extern LZMA_API(lzma_ret)
312 lzma_index_buffer_decode(lzma_index **i, uint64_t *memlimit,
313 		const lzma_allocator *allocator,
314 		const uint8_t *in, size_t *in_pos, size_t in_size)
315 {
316 	// Sanity checks
317 	if (i == NULL || memlimit == NULL
318 			|| in == NULL || in_pos == NULL || *in_pos > in_size)
319 		return LZMA_PROG_ERROR;
320 
321 	// Initialize the decoder.
322 	lzma_index_coder coder;
323 	return_if_error(index_decoder_reset(&coder, allocator, i, *memlimit));
324 
325 	// Store the input start position so that we can restore it in case
326 	// of an error.
327 	const size_t in_start = *in_pos;
328 
329 	// Do the actual decoding.
330 	lzma_ret ret = index_decode(&coder, allocator, in, in_pos, in_size,
331 			NULL, NULL, 0, LZMA_RUN);
332 
333 	if (ret == LZMA_STREAM_END) {
334 		ret = LZMA_OK;
335 	} else {
336 		// Something went wrong, free the Index structure and restore
337 		// the input position.
338 		lzma_index_end(coder.index, allocator);
339 		*in_pos = in_start;
340 
341 		if (ret == LZMA_OK) {
342 			// The input is truncated or otherwise corrupt.
343 			// Use LZMA_DATA_ERROR instead of LZMA_BUF_ERROR
344 			// like lzma_vli_decode() does in single-call mode.
345 			ret = LZMA_DATA_ERROR;
346 
347 		} else if (ret == LZMA_MEMLIMIT_ERROR) {
348 			// Tell the caller how much memory would have
349 			// been needed.
350 			*memlimit = lzma_index_memusage(1, coder.count);
351 		}
352 	}
353 
354 	return ret;
355 }
356