xref: /freebsd/contrib/xz/src/liblzma/common/index_decoder.c (revision 532000256b898d5d3b0067ffa328715d18f4776d)
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       index_decoder.c
4 /// \brief      Decodes the Index field
5 //
6 //  Author:     Lasse Collin
7 //
8 //  This file has been put into the public domain.
9 //  You can do whatever you want with this file.
10 //
11 ///////////////////////////////////////////////////////////////////////////////
12 
13 #include "index.h"
14 #include "check.h"
15 
16 
17 struct lzma_coder_s {
18 	enum {
19 		SEQ_INDICATOR,
20 		SEQ_COUNT,
21 		SEQ_MEMUSAGE,
22 		SEQ_UNPADDED,
23 		SEQ_UNCOMPRESSED,
24 		SEQ_PADDING_INIT,
25 		SEQ_PADDING,
26 		SEQ_CRC32,
27 	} sequence;
28 
29 	/// Memory usage limit
30 	uint64_t memlimit;
31 
32 	/// Target Index
33 	lzma_index *index;
34 
35 	/// Pointer give by the application, which is set after
36 	/// successful decoding.
37 	lzma_index **index_ptr;
38 
39 	/// Number of Records left to decode.
40 	lzma_vli count;
41 
42 	/// The most recent Unpadded Size field
43 	lzma_vli unpadded_size;
44 
45 	/// The most recent Uncompressed Size field
46 	lzma_vli uncompressed_size;
47 
48 	/// Position in integers
49 	size_t pos;
50 
51 	/// CRC32 of the List of Records field
52 	uint32_t crc32;
53 };
54 
55 
56 static lzma_ret
57 index_decode(lzma_coder *coder, const lzma_allocator *allocator,
58 		const uint8_t *restrict in, size_t *restrict in_pos,
59 		size_t in_size,
60 		uint8_t *restrict out lzma_attribute((__unused__)),
61 		size_t *restrict out_pos lzma_attribute((__unused__)),
62 		size_t out_size lzma_attribute((__unused__)),
63 		lzma_action action lzma_attribute((__unused__)))
64 {
65 	// Similar optimization as in index_encoder.c
66 	const size_t in_start = *in_pos;
67 	lzma_ret ret = LZMA_OK;
68 
69 	while (*in_pos < in_size)
70 	switch (coder->sequence) {
71 	case SEQ_INDICATOR:
72 		// Return LZMA_DATA_ERROR instead of e.g. LZMA_PROG_ERROR or
73 		// LZMA_FORMAT_ERROR, because a typical usage case for Index
74 		// decoder is when parsing the Stream backwards. If seeking
75 		// backward from the Stream Footer gives us something that
76 		// doesn't begin with Index Indicator, the file is considered
77 		// corrupt, not "programming error" or "unrecognized file
78 		// format". One could argue that the application should
79 		// verify the Index Indicator before trying to decode the
80 		// Index, but well, I suppose it is simpler this way.
81 		if (in[(*in_pos)++] != 0x00)
82 			return LZMA_DATA_ERROR;
83 
84 		coder->sequence = SEQ_COUNT;
85 		break;
86 
87 	case SEQ_COUNT:
88 		ret = lzma_vli_decode(&coder->count, &coder->pos,
89 				in, in_pos, in_size);
90 		if (ret != LZMA_STREAM_END)
91 			goto out;
92 
93 		coder->pos = 0;
94 		coder->sequence = SEQ_MEMUSAGE;
95 
96 	// Fall through
97 
98 	case SEQ_MEMUSAGE:
99 		if (lzma_index_memusage(1, coder->count) > coder->memlimit) {
100 			ret = LZMA_MEMLIMIT_ERROR;
101 			goto out;
102 		}
103 
104 		// Tell the Index handling code how many Records this
105 		// Index has to allow it to allocate memory more efficiently.
106 		lzma_index_prealloc(coder->index, coder->count);
107 
108 		ret = LZMA_OK;
109 		coder->sequence = coder->count == 0
110 				? SEQ_PADDING_INIT : SEQ_UNPADDED;
111 		break;
112 
113 	case SEQ_UNPADDED:
114 	case SEQ_UNCOMPRESSED: {
115 		lzma_vli *size = coder->sequence == SEQ_UNPADDED
116 				? &coder->unpadded_size
117 				: &coder->uncompressed_size;
118 
119 		ret = lzma_vli_decode(size, &coder->pos,
120 				in, in_pos, in_size);
121 		if (ret != LZMA_STREAM_END)
122 			goto out;
123 
124 		ret = LZMA_OK;
125 		coder->pos = 0;
126 
127 		if (coder->sequence == SEQ_UNPADDED) {
128 			// Validate that encoded Unpadded Size isn't too small
129 			// or too big.
130 			if (coder->unpadded_size < UNPADDED_SIZE_MIN
131 					|| coder->unpadded_size
132 						> UNPADDED_SIZE_MAX)
133 				return LZMA_DATA_ERROR;
134 
135 			coder->sequence = SEQ_UNCOMPRESSED;
136 		} else {
137 			// Add the decoded Record to the Index.
138 			return_if_error(lzma_index_append(
139 					coder->index, allocator,
140 					coder->unpadded_size,
141 					coder->uncompressed_size));
142 
143 			// Check if this was the last Record.
144 			coder->sequence = --coder->count == 0
145 					? SEQ_PADDING_INIT
146 					: SEQ_UNPADDED;
147 		}
148 
149 		break;
150 	}
151 
152 	case SEQ_PADDING_INIT:
153 		coder->pos = lzma_index_padding_size(coder->index);
154 		coder->sequence = SEQ_PADDING;
155 
156 	// Fall through
157 
158 	case SEQ_PADDING:
159 		if (coder->pos > 0) {
160 			--coder->pos;
161 			if (in[(*in_pos)++] != 0x00)
162 				return LZMA_DATA_ERROR;
163 
164 			break;
165 		}
166 
167 		// Finish the CRC32 calculation.
168 		coder->crc32 = lzma_crc32(in + in_start,
169 				*in_pos - in_start, coder->crc32);
170 
171 		coder->sequence = SEQ_CRC32;
172 
173 	// Fall through
174 
175 	case SEQ_CRC32:
176 		do {
177 			if (*in_pos == in_size)
178 				return LZMA_OK;
179 
180 			if (((coder->crc32 >> (coder->pos * 8)) & 0xFF)
181 					!= in[(*in_pos)++])
182 				return LZMA_DATA_ERROR;
183 
184 		} while (++coder->pos < 4);
185 
186 		// Decoding was successful, now we can let the application
187 		// see the decoded Index.
188 		*coder->index_ptr = coder->index;
189 
190 		// Make index NULL so we don't free it unintentionally.
191 		coder->index = NULL;
192 
193 		return LZMA_STREAM_END;
194 
195 	default:
196 		assert(0);
197 		return LZMA_PROG_ERROR;
198 	}
199 
200 out:
201 	// Update the CRC32,
202 	coder->crc32 = lzma_crc32(in + in_start,
203 			*in_pos - in_start, coder->crc32);
204 
205 	return ret;
206 }
207 
208 
209 static void
210 index_decoder_end(lzma_coder *coder, const lzma_allocator *allocator)
211 {
212 	lzma_index_end(coder->index, allocator);
213 	lzma_free(coder, allocator);
214 	return;
215 }
216 
217 
218 static lzma_ret
219 index_decoder_memconfig(lzma_coder *coder, uint64_t *memusage,
220 		uint64_t *old_memlimit, uint64_t new_memlimit)
221 {
222 	*memusage = lzma_index_memusage(1, coder->count);
223 	*old_memlimit = coder->memlimit;
224 
225 	if (new_memlimit != 0) {
226 		if (new_memlimit < *memusage)
227 			return LZMA_MEMLIMIT_ERROR;
228 
229 		coder->memlimit = new_memlimit;
230 	}
231 
232 	return LZMA_OK;
233 }
234 
235 
236 static lzma_ret
237 index_decoder_reset(lzma_coder *coder, const lzma_allocator *allocator,
238 		lzma_index **i, uint64_t memlimit)
239 {
240 	// Remember the pointer given by the application. We will set it
241 	// to point to the decoded Index only if decoding is successful.
242 	// Before that, keep it NULL so that applications can always safely
243 	// pass it to lzma_index_end() no matter did decoding succeed or not.
244 	coder->index_ptr = i;
245 	*i = NULL;
246 
247 	// We always allocate a new lzma_index.
248 	coder->index = lzma_index_init(allocator);
249 	if (coder->index == NULL)
250 		return LZMA_MEM_ERROR;
251 
252 	// Initialize the rest.
253 	coder->sequence = SEQ_INDICATOR;
254 	coder->memlimit = memlimit;
255 	coder->count = 0; // Needs to be initialized due to _memconfig().
256 	coder->pos = 0;
257 	coder->crc32 = 0;
258 
259 	return LZMA_OK;
260 }
261 
262 
263 static lzma_ret
264 index_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
265 		lzma_index **i, uint64_t memlimit)
266 {
267 	lzma_next_coder_init(&index_decoder_init, next, allocator);
268 
269 	if (i == NULL || memlimit == 0)
270 		return LZMA_PROG_ERROR;
271 
272 	if (next->coder == NULL) {
273 		next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
274 		if (next->coder == NULL)
275 			return LZMA_MEM_ERROR;
276 
277 		next->code = &index_decode;
278 		next->end = &index_decoder_end;
279 		next->memconfig = &index_decoder_memconfig;
280 		next->coder->index = NULL;
281 	} else {
282 		lzma_index_end(next->coder->index, allocator);
283 	}
284 
285 	return index_decoder_reset(next->coder, allocator, i, memlimit);
286 }
287 
288 
289 extern LZMA_API(lzma_ret)
290 lzma_index_decoder(lzma_stream *strm, lzma_index **i, uint64_t memlimit)
291 {
292 	lzma_next_strm_init(index_decoder_init, strm, i, memlimit);
293 
294 	strm->internal->supported_actions[LZMA_RUN] = true;
295 	strm->internal->supported_actions[LZMA_FINISH] = true;
296 
297 	return LZMA_OK;
298 }
299 
300 
301 extern LZMA_API(lzma_ret)
302 lzma_index_buffer_decode(lzma_index **i, uint64_t *memlimit,
303 		const lzma_allocator *allocator,
304 		const uint8_t *in, size_t *in_pos, size_t in_size)
305 {
306 	// Sanity checks
307 	if (i == NULL || memlimit == NULL
308 			|| in == NULL || in_pos == NULL || *in_pos > in_size)
309 		return LZMA_PROG_ERROR;
310 
311 	// Initialize the decoder.
312 	lzma_coder coder;
313 	return_if_error(index_decoder_reset(&coder, allocator, i, *memlimit));
314 
315 	// Store the input start position so that we can restore it in case
316 	// of an error.
317 	const size_t in_start = *in_pos;
318 
319 	// Do the actual decoding.
320 	lzma_ret ret = index_decode(&coder, allocator, in, in_pos, in_size,
321 			NULL, NULL, 0, LZMA_RUN);
322 
323 	if (ret == LZMA_STREAM_END) {
324 		ret = LZMA_OK;
325 	} else {
326 		// Something went wrong, free the Index structure and restore
327 		// the input position.
328 		lzma_index_end(coder.index, allocator);
329 		*in_pos = in_start;
330 
331 		if (ret == LZMA_OK) {
332 			// The input is truncated or otherwise corrupt.
333 			// Use LZMA_DATA_ERROR instead of LZMA_BUF_ERROR
334 			// like lzma_vli_decode() does in single-call mode.
335 			ret = LZMA_DATA_ERROR;
336 
337 		} else if (ret == LZMA_MEMLIMIT_ERROR) {
338 			// Tell the caller how much memory would have
339 			// been needed.
340 			*memlimit = lzma_index_memusage(1, coder.count);
341 		}
342 	}
343 
344 	return ret;
345 }
346