xref: /freebsd/contrib/xz/src/liblzma/common/index_decoder.c (revision f7c32ed617858bcd22f8d1b03199099d50125721)
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       index_decoder.c
4 /// \brief      Decodes the Index field
5 //
6 //  Author:     Lasse Collin
7 //
8 //  This file has been put into the public domain.
9 //  You can do whatever you want with this file.
10 //
11 ///////////////////////////////////////////////////////////////////////////////
12 
13 #include "index.h"
14 #include "check.h"
15 
16 
17 typedef struct {
18 	enum {
19 		SEQ_INDICATOR,
20 		SEQ_COUNT,
21 		SEQ_MEMUSAGE,
22 		SEQ_UNPADDED,
23 		SEQ_UNCOMPRESSED,
24 		SEQ_PADDING_INIT,
25 		SEQ_PADDING,
26 		SEQ_CRC32,
27 	} sequence;
28 
29 	/// Memory usage limit
30 	uint64_t memlimit;
31 
32 	/// Target Index
33 	lzma_index *index;
34 
35 	/// Pointer give by the application, which is set after
36 	/// successful decoding.
37 	lzma_index **index_ptr;
38 
39 	/// Number of Records left to decode.
40 	lzma_vli count;
41 
42 	/// The most recent Unpadded Size field
43 	lzma_vli unpadded_size;
44 
45 	/// The most recent Uncompressed Size field
46 	lzma_vli uncompressed_size;
47 
48 	/// Position in integers
49 	size_t pos;
50 
51 	/// CRC32 of the List of Records field
52 	uint32_t crc32;
53 } lzma_index_coder;
54 
55 
56 static lzma_ret
57 index_decode(void *coder_ptr, const lzma_allocator *allocator,
58 		const uint8_t *restrict in, size_t *restrict in_pos,
59 		size_t in_size,
60 		uint8_t *restrict out lzma_attribute((__unused__)),
61 		size_t *restrict out_pos lzma_attribute((__unused__)),
62 		size_t out_size lzma_attribute((__unused__)),
63 		lzma_action action lzma_attribute((__unused__)))
64 {
65 	lzma_index_coder *coder = coder_ptr;
66 
67 	// Similar optimization as in index_encoder.c
68 	const size_t in_start = *in_pos;
69 	lzma_ret ret = LZMA_OK;
70 
71 	while (*in_pos < in_size)
72 	switch (coder->sequence) {
73 	case SEQ_INDICATOR:
74 		// Return LZMA_DATA_ERROR instead of e.g. LZMA_PROG_ERROR or
75 		// LZMA_FORMAT_ERROR, because a typical usage case for Index
76 		// decoder is when parsing the Stream backwards. If seeking
77 		// backward from the Stream Footer gives us something that
78 		// doesn't begin with Index Indicator, the file is considered
79 		// corrupt, not "programming error" or "unrecognized file
80 		// format". One could argue that the application should
81 		// verify the Index Indicator before trying to decode the
82 		// Index, but well, I suppose it is simpler this way.
83 		if (in[(*in_pos)++] != 0x00)
84 			return LZMA_DATA_ERROR;
85 
86 		coder->sequence = SEQ_COUNT;
87 		break;
88 
89 	case SEQ_COUNT:
90 		ret = lzma_vli_decode(&coder->count, &coder->pos,
91 				in, in_pos, in_size);
92 		if (ret != LZMA_STREAM_END)
93 			goto out;
94 
95 		coder->pos = 0;
96 		coder->sequence = SEQ_MEMUSAGE;
97 
98 	// Fall through
99 
100 	case SEQ_MEMUSAGE:
101 		if (lzma_index_memusage(1, coder->count) > coder->memlimit) {
102 			ret = LZMA_MEMLIMIT_ERROR;
103 			goto out;
104 		}
105 
106 		// Tell the Index handling code how many Records this
107 		// Index has to allow it to allocate memory more efficiently.
108 		lzma_index_prealloc(coder->index, coder->count);
109 
110 		ret = LZMA_OK;
111 		coder->sequence = coder->count == 0
112 				? SEQ_PADDING_INIT : SEQ_UNPADDED;
113 		break;
114 
115 	case SEQ_UNPADDED:
116 	case SEQ_UNCOMPRESSED: {
117 		lzma_vli *size = coder->sequence == SEQ_UNPADDED
118 				? &coder->unpadded_size
119 				: &coder->uncompressed_size;
120 
121 		ret = lzma_vli_decode(size, &coder->pos,
122 				in, in_pos, in_size);
123 		if (ret != LZMA_STREAM_END)
124 			goto out;
125 
126 		ret = LZMA_OK;
127 		coder->pos = 0;
128 
129 		if (coder->sequence == SEQ_UNPADDED) {
130 			// Validate that encoded Unpadded Size isn't too small
131 			// or too big.
132 			if (coder->unpadded_size < UNPADDED_SIZE_MIN
133 					|| coder->unpadded_size
134 						> UNPADDED_SIZE_MAX)
135 				return LZMA_DATA_ERROR;
136 
137 			coder->sequence = SEQ_UNCOMPRESSED;
138 		} else {
139 			// Add the decoded Record to the Index.
140 			return_if_error(lzma_index_append(
141 					coder->index, allocator,
142 					coder->unpadded_size,
143 					coder->uncompressed_size));
144 
145 			// Check if this was the last Record.
146 			coder->sequence = --coder->count == 0
147 					? SEQ_PADDING_INIT
148 					: SEQ_UNPADDED;
149 		}
150 
151 		break;
152 	}
153 
154 	case SEQ_PADDING_INIT:
155 		coder->pos = lzma_index_padding_size(coder->index);
156 		coder->sequence = SEQ_PADDING;
157 
158 	// Fall through
159 
160 	case SEQ_PADDING:
161 		if (coder->pos > 0) {
162 			--coder->pos;
163 			if (in[(*in_pos)++] != 0x00)
164 				return LZMA_DATA_ERROR;
165 
166 			break;
167 		}
168 
169 		// Finish the CRC32 calculation.
170 		coder->crc32 = lzma_crc32(in + in_start,
171 				*in_pos - in_start, coder->crc32);
172 
173 		coder->sequence = SEQ_CRC32;
174 
175 	// Fall through
176 
177 	case SEQ_CRC32:
178 		do {
179 			if (*in_pos == in_size)
180 				return LZMA_OK;
181 
182 			if (((coder->crc32 >> (coder->pos * 8)) & 0xFF)
183 					!= in[(*in_pos)++])
184 				return LZMA_DATA_ERROR;
185 
186 		} while (++coder->pos < 4);
187 
188 		// Decoding was successful, now we can let the application
189 		// see the decoded Index.
190 		*coder->index_ptr = coder->index;
191 
192 		// Make index NULL so we don't free it unintentionally.
193 		coder->index = NULL;
194 
195 		return LZMA_STREAM_END;
196 
197 	default:
198 		assert(0);
199 		return LZMA_PROG_ERROR;
200 	}
201 
202 out:
203 	// Update the CRC32,
204 	coder->crc32 = lzma_crc32(in + in_start,
205 			*in_pos - in_start, coder->crc32);
206 
207 	return ret;
208 }
209 
210 
211 static void
212 index_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
213 {
214 	lzma_index_coder *coder = coder_ptr;
215 	lzma_index_end(coder->index, allocator);
216 	lzma_free(coder, allocator);
217 	return;
218 }
219 
220 
221 static lzma_ret
222 index_decoder_memconfig(void *coder_ptr, uint64_t *memusage,
223 		uint64_t *old_memlimit, uint64_t new_memlimit)
224 {
225 	lzma_index_coder *coder = coder_ptr;
226 
227 	*memusage = lzma_index_memusage(1, coder->count);
228 	*old_memlimit = coder->memlimit;
229 
230 	if (new_memlimit != 0) {
231 		if (new_memlimit < *memusage)
232 			return LZMA_MEMLIMIT_ERROR;
233 
234 		coder->memlimit = new_memlimit;
235 	}
236 
237 	return LZMA_OK;
238 }
239 
240 
241 static lzma_ret
242 index_decoder_reset(lzma_index_coder *coder, const lzma_allocator *allocator,
243 		lzma_index **i, uint64_t memlimit)
244 {
245 	// Remember the pointer given by the application. We will set it
246 	// to point to the decoded Index only if decoding is successful.
247 	// Before that, keep it NULL so that applications can always safely
248 	// pass it to lzma_index_end() no matter did decoding succeed or not.
249 	coder->index_ptr = i;
250 	*i = NULL;
251 
252 	// We always allocate a new lzma_index.
253 	coder->index = lzma_index_init(allocator);
254 	if (coder->index == NULL)
255 		return LZMA_MEM_ERROR;
256 
257 	// Initialize the rest.
258 	coder->sequence = SEQ_INDICATOR;
259 	coder->memlimit = my_max(1, memlimit);
260 	coder->count = 0; // Needs to be initialized due to _memconfig().
261 	coder->pos = 0;
262 	coder->crc32 = 0;
263 
264 	return LZMA_OK;
265 }
266 
267 
268 static lzma_ret
269 index_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
270 		lzma_index **i, uint64_t memlimit)
271 {
272 	lzma_next_coder_init(&index_decoder_init, next, allocator);
273 
274 	if (i == NULL)
275 		return LZMA_PROG_ERROR;
276 
277 	lzma_index_coder *coder = next->coder;
278 	if (coder == NULL) {
279 		coder = lzma_alloc(sizeof(lzma_index_coder), allocator);
280 		if (coder == NULL)
281 			return LZMA_MEM_ERROR;
282 
283 		next->coder = coder;
284 		next->code = &index_decode;
285 		next->end = &index_decoder_end;
286 		next->memconfig = &index_decoder_memconfig;
287 		coder->index = NULL;
288 	} else {
289 		lzma_index_end(coder->index, allocator);
290 	}
291 
292 	return index_decoder_reset(coder, allocator, i, memlimit);
293 }
294 
295 
296 extern LZMA_API(lzma_ret)
297 lzma_index_decoder(lzma_stream *strm, lzma_index **i, uint64_t memlimit)
298 {
299 	lzma_next_strm_init(index_decoder_init, strm, i, memlimit);
300 
301 	strm->internal->supported_actions[LZMA_RUN] = true;
302 	strm->internal->supported_actions[LZMA_FINISH] = true;
303 
304 	return LZMA_OK;
305 }
306 
307 
308 extern LZMA_API(lzma_ret)
309 lzma_index_buffer_decode(lzma_index **i, uint64_t *memlimit,
310 		const lzma_allocator *allocator,
311 		const uint8_t *in, size_t *in_pos, size_t in_size)
312 {
313 	// Sanity checks
314 	if (i == NULL || memlimit == NULL
315 			|| in == NULL || in_pos == NULL || *in_pos > in_size)
316 		return LZMA_PROG_ERROR;
317 
318 	// Initialize the decoder.
319 	lzma_index_coder coder;
320 	return_if_error(index_decoder_reset(&coder, allocator, i, *memlimit));
321 
322 	// Store the input start position so that we can restore it in case
323 	// of an error.
324 	const size_t in_start = *in_pos;
325 
326 	// Do the actual decoding.
327 	lzma_ret ret = index_decode(&coder, allocator, in, in_pos, in_size,
328 			NULL, NULL, 0, LZMA_RUN);
329 
330 	if (ret == LZMA_STREAM_END) {
331 		ret = LZMA_OK;
332 	} else {
333 		// Something went wrong, free the Index structure and restore
334 		// the input position.
335 		lzma_index_end(coder.index, allocator);
336 		*in_pos = in_start;
337 
338 		if (ret == LZMA_OK) {
339 			// The input is truncated or otherwise corrupt.
340 			// Use LZMA_DATA_ERROR instead of LZMA_BUF_ERROR
341 			// like lzma_vli_decode() does in single-call mode.
342 			ret = LZMA_DATA_ERROR;
343 
344 		} else if (ret == LZMA_MEMLIMIT_ERROR) {
345 			// Tell the caller how much memory would have
346 			// been needed.
347 			*memlimit = lzma_index_memusage(1, coder.count);
348 		}
349 	}
350 
351 	return ret;
352 }
353