xref: /freebsd/contrib/xz/src/liblzma/common/index_decoder.c (revision 884a2a699669ec61e2366e3e358342dbc94be24a)
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       index_decoder.c
4 /// \brief      Decodes the Index field
5 //
6 //  Author:     Lasse Collin
7 //
8 //  This file has been put into the public domain.
9 //  You can do whatever you want with this file.
10 //
11 ///////////////////////////////////////////////////////////////////////////////
12 
13 #include "index.h"
14 #include "check.h"
15 
16 
17 struct lzma_coder_s {
18 	enum {
19 		SEQ_INDICATOR,
20 		SEQ_COUNT,
21 		SEQ_MEMUSAGE,
22 		SEQ_UNPADDED,
23 		SEQ_UNCOMPRESSED,
24 		SEQ_PADDING_INIT,
25 		SEQ_PADDING,
26 		SEQ_CRC32,
27 	} sequence;
28 
29 	/// Memory usage limit
30 	uint64_t memlimit;
31 
32 	/// Target Index
33 	lzma_index *index;
34 
35 	/// Pointer give by the application, which is set after
36 	/// successful decoding.
37 	lzma_index **index_ptr;
38 
39 	/// Number of Records left to decode.
40 	lzma_vli count;
41 
42 	/// The most recent Unpadded Size field
43 	lzma_vli unpadded_size;
44 
45 	/// The most recent Uncompressed Size field
46 	lzma_vli uncompressed_size;
47 
48 	/// Position in integers
49 	size_t pos;
50 
51 	/// CRC32 of the List of Records field
52 	uint32_t crc32;
53 };
54 
55 
56 static lzma_ret
57 index_decode(lzma_coder *coder, lzma_allocator *allocator,
58 		const uint8_t *restrict in, size_t *restrict in_pos,
59 		size_t in_size, uint8_t *restrict out lzma_attribute((unused)),
60 		size_t *restrict out_pos lzma_attribute((unused)),
61 		size_t out_size lzma_attribute((unused)),
62 		lzma_action action lzma_attribute((unused)))
63 {
64 	// Similar optimization as in index_encoder.c
65 	const size_t in_start = *in_pos;
66 	lzma_ret ret = LZMA_OK;
67 
68 	while (*in_pos < in_size)
69 	switch (coder->sequence) {
70 	case SEQ_INDICATOR:
71 		// Return LZMA_DATA_ERROR instead of e.g. LZMA_PROG_ERROR or
72 		// LZMA_FORMAT_ERROR, because a typical usage case for Index
73 		// decoder is when parsing the Stream backwards. If seeking
74 		// backward from the Stream Footer gives us something that
75 		// doesn't begin with Index Indicator, the file is considered
76 		// corrupt, not "programming error" or "unrecognized file
77 		// format". One could argue that the application should
78 		// verify the Index Indicator before trying to decode the
79 		// Index, but well, I suppose it is simpler this way.
80 		if (in[(*in_pos)++] != 0x00)
81 			return LZMA_DATA_ERROR;
82 
83 		coder->sequence = SEQ_COUNT;
84 		break;
85 
86 	case SEQ_COUNT:
87 		ret = lzma_vli_decode(&coder->count, &coder->pos,
88 				in, in_pos, in_size);
89 		if (ret != LZMA_STREAM_END)
90 			goto out;
91 
92 		coder->pos = 0;
93 		coder->sequence = SEQ_MEMUSAGE;
94 
95 	// Fall through
96 
97 	case SEQ_MEMUSAGE:
98 		if (lzma_index_memusage(1, coder->count) > coder->memlimit) {
99 			ret = LZMA_MEMLIMIT_ERROR;
100 			goto out;
101 		}
102 
103 		// Tell the Index handling code how many Records this
104 		// Index has to allow it to allocate memory more efficiently.
105 		lzma_index_prealloc(coder->index, coder->count);
106 
107 		ret = LZMA_OK;
108 		coder->sequence = coder->count == 0
109 				? SEQ_PADDING_INIT : SEQ_UNPADDED;
110 		break;
111 
112 	case SEQ_UNPADDED:
113 	case SEQ_UNCOMPRESSED: {
114 		lzma_vli *size = coder->sequence == SEQ_UNPADDED
115 				? &coder->unpadded_size
116 				: &coder->uncompressed_size;
117 
118 		ret = lzma_vli_decode(size, &coder->pos,
119 				in, in_pos, in_size);
120 		if (ret != LZMA_STREAM_END)
121 			goto out;
122 
123 		ret = LZMA_OK;
124 		coder->pos = 0;
125 
126 		if (coder->sequence == SEQ_UNPADDED) {
127 			// Validate that encoded Unpadded Size isn't too small
128 			// or too big.
129 			if (coder->unpadded_size < UNPADDED_SIZE_MIN
130 					|| coder->unpadded_size
131 						> UNPADDED_SIZE_MAX)
132 				return LZMA_DATA_ERROR;
133 
134 			coder->sequence = SEQ_UNCOMPRESSED;
135 		} else {
136 			// Add the decoded Record to the Index.
137 			return_if_error(lzma_index_append(
138 					coder->index, allocator,
139 					coder->unpadded_size,
140 					coder->uncompressed_size));
141 
142 			// Check if this was the last Record.
143 			coder->sequence = --coder->count == 0
144 					? SEQ_PADDING_INIT
145 					: SEQ_UNPADDED;
146 		}
147 
148 		break;
149 	}
150 
151 	case SEQ_PADDING_INIT:
152 		coder->pos = lzma_index_padding_size(coder->index);
153 		coder->sequence = SEQ_PADDING;
154 
155 	// Fall through
156 
157 	case SEQ_PADDING:
158 		if (coder->pos > 0) {
159 			--coder->pos;
160 			if (in[(*in_pos)++] != 0x00)
161 				return LZMA_DATA_ERROR;
162 
163 			break;
164 		}
165 
166 		// Finish the CRC32 calculation.
167 		coder->crc32 = lzma_crc32(in + in_start,
168 				*in_pos - in_start, coder->crc32);
169 
170 		coder->sequence = SEQ_CRC32;
171 
172 	// Fall through
173 
174 	case SEQ_CRC32:
175 		do {
176 			if (*in_pos == in_size)
177 				return LZMA_OK;
178 
179 			if (((coder->crc32 >> (coder->pos * 8)) & 0xFF)
180 					!= in[(*in_pos)++])
181 				return LZMA_DATA_ERROR;
182 
183 		} while (++coder->pos < 4);
184 
185 		// Decoding was successful, now we can let the application
186 		// see the decoded Index.
187 		*coder->index_ptr = coder->index;
188 
189 		// Make index NULL so we don't free it unintentionally.
190 		coder->index = NULL;
191 
192 		return LZMA_STREAM_END;
193 
194 	default:
195 		assert(0);
196 		return LZMA_PROG_ERROR;
197 	}
198 
199 out:
200 	// Update the CRC32,
201 	coder->crc32 = lzma_crc32(in + in_start,
202 			*in_pos - in_start, coder->crc32);
203 
204 	return ret;
205 }
206 
207 
208 static void
209 index_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
210 {
211 	lzma_index_end(coder->index, allocator);
212 	lzma_free(coder, allocator);
213 	return;
214 }
215 
216 
217 static lzma_ret
218 index_decoder_memconfig(lzma_coder *coder, uint64_t *memusage,
219 		uint64_t *old_memlimit, uint64_t new_memlimit)
220 {
221 	*memusage = lzma_index_memusage(1, coder->count);
222 	*old_memlimit = coder->memlimit;
223 
224 	if (new_memlimit != 0) {
225 		if (new_memlimit < *memusage)
226 			return LZMA_MEMLIMIT_ERROR;
227 
228 		coder->memlimit = new_memlimit;
229 	}
230 
231 	return LZMA_OK;
232 }
233 
234 
235 static lzma_ret
236 index_decoder_reset(lzma_coder *coder, lzma_allocator *allocator,
237 		lzma_index **i, uint64_t memlimit)
238 {
239 	// Remember the pointer given by the application. We will set it
240 	// to point to the decoded Index only if decoding is successful.
241 	// Before that, keep it NULL so that applications can always safely
242 	// pass it to lzma_index_end() no matter did decoding succeed or not.
243 	coder->index_ptr = i;
244 	*i = NULL;
245 
246 	// We always allocate a new lzma_index.
247 	coder->index = lzma_index_init(allocator);
248 	if (coder->index == NULL)
249 		return LZMA_MEM_ERROR;
250 
251 	// Initialize the rest.
252 	coder->sequence = SEQ_INDICATOR;
253 	coder->memlimit = memlimit;
254 	coder->count = 0; // Needs to be initialized due to _memconfig().
255 	coder->pos = 0;
256 	coder->crc32 = 0;
257 
258 	return LZMA_OK;
259 }
260 
261 
262 static lzma_ret
263 index_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
264 		lzma_index **i, uint64_t memlimit)
265 {
266 	lzma_next_coder_init(&index_decoder_init, next, allocator);
267 
268 	if (i == NULL || memlimit == 0)
269 		return LZMA_PROG_ERROR;
270 
271 	if (next->coder == NULL) {
272 		next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
273 		if (next->coder == NULL)
274 			return LZMA_MEM_ERROR;
275 
276 		next->code = &index_decode;
277 		next->end = &index_decoder_end;
278 		next->memconfig = &index_decoder_memconfig;
279 		next->coder->index = NULL;
280 	} else {
281 		lzma_index_end(next->coder->index, allocator);
282 	}
283 
284 	return index_decoder_reset(next->coder, allocator, i, memlimit);
285 }
286 
287 
288 extern LZMA_API(lzma_ret)
289 lzma_index_decoder(lzma_stream *strm, lzma_index **i, uint64_t memlimit)
290 {
291 	lzma_next_strm_init(index_decoder_init, strm, i, memlimit);
292 
293 	strm->internal->supported_actions[LZMA_RUN] = true;
294 	strm->internal->supported_actions[LZMA_FINISH] = true;
295 
296 	return LZMA_OK;
297 }
298 
299 
300 extern LZMA_API(lzma_ret)
301 lzma_index_buffer_decode(
302 		lzma_index **i, uint64_t *memlimit, lzma_allocator *allocator,
303 		const uint8_t *in, size_t *in_pos, size_t in_size)
304 {
305 	// Sanity checks
306 	if (i == NULL || memlimit == NULL
307 			|| in == NULL || in_pos == NULL || *in_pos > in_size)
308 		return LZMA_PROG_ERROR;
309 
310 	// Initialize the decoder.
311 	lzma_coder coder;
312 	return_if_error(index_decoder_reset(&coder, allocator, i, *memlimit));
313 
314 	// Store the input start position so that we can restore it in case
315 	// of an error.
316 	const size_t in_start = *in_pos;
317 
318 	// Do the actual decoding.
319 	lzma_ret ret = index_decode(&coder, allocator, in, in_pos, in_size,
320 			NULL, NULL, 0, LZMA_RUN);
321 
322 	if (ret == LZMA_STREAM_END) {
323 		ret = LZMA_OK;
324 	} else {
325 		// Something went wrong, free the Index structure and restore
326 		// the input position.
327 		lzma_index_end(coder.index, allocator);
328 		*in_pos = in_start;
329 
330 		if (ret == LZMA_OK) {
331 			// The input is truncated or otherwise corrupt.
332 			// Use LZMA_DATA_ERROR instead of LZMA_BUF_ERROR
333 			// like lzma_vli_decode() does in single-call mode.
334 			ret = LZMA_DATA_ERROR;
335 
336 		} else if (ret == LZMA_MEMLIMIT_ERROR) {
337 			// Tell the caller how much memory would have
338 			// been needed.
339 			*memlimit = lzma_index_memusage(1, coder.count);
340 		}
341 	}
342 
343 	return ret;
344 }
345