xref: /freebsd/contrib/xz/src/liblzma/common/file_info.c (revision 128836d304d93f2d00eb14069c27089ab46c38d4)
1 // SPDX-License-Identifier: 0BSD
2 
3 ///////////////////////////////////////////////////////////////////////////////
4 //
5 /// \file       file_info.c
6 /// \brief      Decode .xz file information into a lzma_index structure
7 //
8 //  Author:     Lasse Collin
9 //
10 ///////////////////////////////////////////////////////////////////////////////
11 
12 #include "index_decoder.h"
13 
14 
15 typedef struct {
16 	enum {
17 		SEQ_MAGIC_BYTES,
18 		SEQ_PADDING_SEEK,
19 		SEQ_PADDING_DECODE,
20 		SEQ_FOOTER,
21 		SEQ_INDEX_INIT,
22 		SEQ_INDEX_DECODE,
23 		SEQ_HEADER_DECODE,
24 		SEQ_HEADER_COMPARE,
25 	} sequence;
26 
27 	/// Absolute position of in[*in_pos] in the file. All code that
28 	/// modifies *in_pos also updates this. seek_to_pos() needs this
29 	/// to determine if we need to request the application to seek for
30 	/// us or if we can do the seeking internally by adjusting *in_pos.
31 	uint64_t file_cur_pos;
32 
33 	/// This refers to absolute positions of interesting parts of the
34 	/// input file. Sometimes it points to the *beginning* of a specific
35 	/// field and sometimes to the *end* of a field. The current target
36 	/// position at each moment is explained in the comments.
37 	uint64_t file_target_pos;
38 
39 	/// Size of the .xz file (from the application).
40 	uint64_t file_size;
41 
42 	/// Index decoder
43 	lzma_next_coder index_decoder;
44 
45 	/// Number of bytes remaining in the Index field that is currently
46 	/// being decoded.
47 	lzma_vli index_remaining;
48 
49 	/// The Index decoder will store the decoded Index in this pointer.
50 	lzma_index *this_index;
51 
52 	/// Amount of Stream Padding in the current Stream.
53 	lzma_vli stream_padding;
54 
55 	/// The final combined index is collected here.
56 	lzma_index *combined_index;
57 
58 	/// Pointer from the application where to store the index information
59 	/// after successful decoding.
60 	lzma_index **dest_index;
61 
62 	/// Pointer to lzma_stream.seek_pos to be used when returning
63 	/// LZMA_SEEK_NEEDED. This is set by seek_to_pos() when needed.
64 	uint64_t *external_seek_pos;
65 
66 	/// Memory usage limit
67 	uint64_t memlimit;
68 
69 	/// Stream Flags from the very beginning of the file.
70 	lzma_stream_flags first_header_flags;
71 
72 	/// Stream Flags from Stream Header of the current Stream.
73 	lzma_stream_flags header_flags;
74 
75 	/// Stream Flags from Stream Footer of the current Stream.
76 	lzma_stream_flags footer_flags;
77 
78 	size_t temp_pos;
79 	size_t temp_size;
80 	uint8_t temp[8192];
81 
82 } lzma_file_info_coder;
83 
84 
85 /// Copies data from in[*in_pos] into coder->temp until
86 /// coder->temp_pos == coder->temp_size. This also keeps coder->file_cur_pos
87 /// in sync with *in_pos. Returns true if more input is needed.
88 static bool
fill_temp(lzma_file_info_coder * coder,const uint8_t * restrict in,size_t * restrict in_pos,size_t in_size)89 fill_temp(lzma_file_info_coder *coder, const uint8_t *restrict in,
90 		size_t *restrict in_pos, size_t in_size)
91 {
92 	coder->file_cur_pos += lzma_bufcpy(in, in_pos, in_size,
93 			coder->temp, &coder->temp_pos, coder->temp_size);
94 	return coder->temp_pos < coder->temp_size;
95 }
96 
97 
98 /// Seeks to the absolute file position specified by target_pos.
99 /// This tries to do the seeking by only modifying *in_pos, if possible.
100 /// The main benefit of this is that if one passes the whole file at once
101 /// to lzma_code(), the decoder will never need to return LZMA_SEEK_NEEDED
102 /// as all the seeking can be done by adjusting *in_pos in this function.
103 ///
104 /// Returns true if an external seek is needed and the caller must return
105 /// LZMA_SEEK_NEEDED.
106 static bool
seek_to_pos(lzma_file_info_coder * coder,uint64_t target_pos,size_t in_start,size_t * in_pos,size_t in_size)107 seek_to_pos(lzma_file_info_coder *coder, uint64_t target_pos,
108 		size_t in_start, size_t *in_pos, size_t in_size)
109 {
110 	// The input buffer doesn't extend beyond the end of the file.
111 	// This has been checked by file_info_decode() already.
112 	assert(coder->file_size - coder->file_cur_pos >= in_size - *in_pos);
113 
114 	const uint64_t pos_min = coder->file_cur_pos - (*in_pos - in_start);
115 	const uint64_t pos_max = coder->file_cur_pos + (in_size - *in_pos);
116 
117 	bool external_seek_needed;
118 
119 	if (target_pos >= pos_min && target_pos <= pos_max) {
120 		// The requested position is available in the current input
121 		// buffer or right after it. That is, in a corner case we
122 		// end up setting *in_pos == in_size and thus will immediately
123 		// need new input bytes from the application.
124 		*in_pos += (size_t)(target_pos - coder->file_cur_pos);
125 		external_seek_needed = false;
126 	} else {
127 		// Ask the application to seek the input file.
128 		*coder->external_seek_pos = target_pos;
129 		external_seek_needed = true;
130 
131 		// Mark the whole input buffer as used. This way
132 		// lzma_stream.total_in will have a better estimate
133 		// of the amount of data read. It still won't be perfect
134 		// as the value will depend on the input buffer size that
135 		// the application uses, but it should be good enough for
136 		// those few who want an estimate.
137 		*in_pos = in_size;
138 	}
139 
140 	// After seeking (internal or external) the current position
141 	// will match the requested target position.
142 	coder->file_cur_pos = target_pos;
143 
144 	return external_seek_needed;
145 }
146 
147 
148 /// The caller sets coder->file_target_pos so that it points to the *end*
149 /// of the desired file position. This function then determines how far
150 /// backwards from that position we can seek. After seeking fill_temp()
151 /// can be used to read data into coder->temp. When fill_temp() has finished,
152 /// coder->temp[coder->temp_size] will match coder->file_target_pos.
153 ///
154 /// This also validates that coder->target_file_pos is sane in sense that
155 /// we aren't trying to seek too far backwards (too close or beyond the
156 /// beginning of the file).
157 static lzma_ret
reverse_seek(lzma_file_info_coder * coder,size_t in_start,size_t * in_pos,size_t in_size)158 reverse_seek(lzma_file_info_coder *coder,
159 		size_t in_start, size_t *in_pos, size_t in_size)
160 {
161 	// Check that there is enough data before the target position
162 	// to contain at least Stream Header and Stream Footer. If there
163 	// isn't, the file cannot be valid.
164 	if (coder->file_target_pos < 2 * LZMA_STREAM_HEADER_SIZE)
165 		return LZMA_DATA_ERROR;
166 
167 	coder->temp_pos = 0;
168 
169 	// The Stream Header at the very beginning of the file gets handled
170 	// specially in SEQ_MAGIC_BYTES and thus we will never need to seek
171 	// there. By not seeking to the first LZMA_STREAM_HEADER_SIZE bytes
172 	// we avoid a useless external seek after SEQ_MAGIC_BYTES if the
173 	// application uses an extremely small input buffer and the input
174 	// file is very small.
175 	if (coder->file_target_pos - LZMA_STREAM_HEADER_SIZE
176 			< sizeof(coder->temp))
177 		coder->temp_size = (size_t)(coder->file_target_pos
178 				- LZMA_STREAM_HEADER_SIZE);
179 	else
180 		coder->temp_size = sizeof(coder->temp);
181 
182 	// The above if-statements guarantee this. This is important because
183 	// the Stream Header/Footer decoders assume that there's at least
184 	// LZMA_STREAM_HEADER_SIZE bytes in coder->temp.
185 	assert(coder->temp_size >= LZMA_STREAM_HEADER_SIZE);
186 
187 	if (seek_to_pos(coder, coder->file_target_pos - coder->temp_size,
188 			in_start, in_pos, in_size))
189 		return LZMA_SEEK_NEEDED;
190 
191 	return LZMA_OK;
192 }
193 
194 
195 /// Gets the number of zero-bytes at the end of the buffer.
196 static size_t
get_padding_size(const uint8_t * buf,size_t buf_size)197 get_padding_size(const uint8_t *buf, size_t buf_size)
198 {
199 	size_t padding = 0;
200 	while (buf_size > 0 && buf[--buf_size] == 0x00)
201 		++padding;
202 
203 	return padding;
204 }
205 
206 
207 /// With the Stream Header at the very beginning of the file, LZMA_FORMAT_ERROR
208 /// is used to tell the application that Magic Bytes didn't match. In other
209 /// Stream Header/Footer fields (in the middle/end of the file) it could be
210 /// a bit confusing to return LZMA_FORMAT_ERROR as we already know that there
211 /// is a valid Stream Header at the beginning of the file. For those cases
212 /// this function is used to convert LZMA_FORMAT_ERROR to LZMA_DATA_ERROR.
213 static lzma_ret
hide_format_error(lzma_ret ret)214 hide_format_error(lzma_ret ret)
215 {
216 	if (ret == LZMA_FORMAT_ERROR)
217 		ret = LZMA_DATA_ERROR;
218 
219 	return ret;
220 }
221 
222 
223 /// Calls the Index decoder and updates coder->index_remaining.
224 /// This is a separate function because the input can be either directly
225 /// from the application or from coder->temp.
226 static lzma_ret
decode_index(lzma_file_info_coder * coder,const lzma_allocator * allocator,const uint8_t * restrict in,size_t * restrict in_pos,size_t in_size,bool update_file_cur_pos)227 decode_index(lzma_file_info_coder *coder, const lzma_allocator *allocator,
228 		const uint8_t *restrict in, size_t *restrict in_pos,
229 		size_t in_size, bool update_file_cur_pos)
230 {
231 	const size_t in_start = *in_pos;
232 
233 	const lzma_ret ret = coder->index_decoder.code(
234 			coder->index_decoder.coder,
235 			allocator, in, in_pos, in_size,
236 			NULL, NULL, 0, LZMA_RUN);
237 
238 	coder->index_remaining -= *in_pos - in_start;
239 
240 	if (update_file_cur_pos)
241 		coder->file_cur_pos += *in_pos - in_start;
242 
243 	return ret;
244 }
245 
246 
247 static lzma_ret
file_info_decode(void * coder_ptr,const lzma_allocator * allocator,const uint8_t * restrict in,size_t * restrict in_pos,size_t in_size,uint8_t * restrict out lzma_attribute ((__unused__)),size_t * restrict out_pos lzma_attribute ((__unused__)),size_t out_size lzma_attribute ((__unused__)),lzma_action action lzma_attribute ((__unused__)))248 file_info_decode(void *coder_ptr, const lzma_allocator *allocator,
249 		const uint8_t *restrict in, size_t *restrict in_pos,
250 		size_t in_size,
251 		uint8_t *restrict out lzma_attribute((__unused__)),
252 		size_t *restrict out_pos lzma_attribute((__unused__)),
253 		size_t out_size lzma_attribute((__unused__)),
254 		lzma_action action lzma_attribute((__unused__)))
255 {
256 	lzma_file_info_coder *coder = coder_ptr;
257 	const size_t in_start = *in_pos;
258 
259 	// If the caller provides input past the end of the file, trim
260 	// the extra bytes from the buffer so that we won't read too far.
261 	assert(coder->file_size >= coder->file_cur_pos);
262 	if (coder->file_size - coder->file_cur_pos < in_size - in_start)
263 		in_size = in_start
264 			+ (size_t)(coder->file_size - coder->file_cur_pos);
265 
266 	while (true)
267 	switch (coder->sequence) {
268 	case SEQ_MAGIC_BYTES:
269 		// Decode the Stream Header at the beginning of the file
270 		// first to check if the Magic Bytes match. The flags
271 		// are stored in coder->first_header_flags so that we
272 		// don't need to seek to it again.
273 		//
274 		// Check that the file is big enough to contain at least
275 		// Stream Header.
276 		if (coder->file_size < LZMA_STREAM_HEADER_SIZE)
277 			return LZMA_FORMAT_ERROR;
278 
279 		// Read the Stream Header field into coder->temp.
280 		if (fill_temp(coder, in, in_pos, in_size))
281 			return LZMA_OK;
282 
283 		// This is the only Stream Header/Footer decoding where we
284 		// want to return LZMA_FORMAT_ERROR if the Magic Bytes don't
285 		// match. Elsewhere it will be converted to LZMA_DATA_ERROR.
286 		return_if_error(lzma_stream_header_decode(
287 				&coder->first_header_flags, coder->temp));
288 
289 		// Now that we know that the Magic Bytes match, check the
290 		// file size. It's better to do this here after checking the
291 		// Magic Bytes since this way we can give LZMA_FORMAT_ERROR
292 		// instead of LZMA_DATA_ERROR when the Magic Bytes don't
293 		// match in a file that is too big or isn't a multiple of
294 		// four bytes.
295 		if (coder->file_size > LZMA_VLI_MAX || (coder->file_size & 3))
296 			return LZMA_DATA_ERROR;
297 
298 		// Start looking for Stream Padding and Stream Footer
299 		// at the end of the file.
300 		coder->file_target_pos = coder->file_size;
301 		FALLTHROUGH;
302 
303 	case SEQ_PADDING_SEEK:
304 		coder->sequence = SEQ_PADDING_DECODE;
305 		return_if_error(reverse_seek(
306 				coder, in_start, in_pos, in_size));
307 		FALLTHROUGH;
308 
309 	case SEQ_PADDING_DECODE: {
310 		// Copy to coder->temp first. This keeps the code simpler if
311 		// the application only provides input a few bytes at a time.
312 		if (fill_temp(coder, in, in_pos, in_size))
313 			return LZMA_OK;
314 
315 		// Scan the buffer backwards to get the size of the
316 		// Stream Padding field (if any).
317 		const size_t new_padding = get_padding_size(
318 				coder->temp, coder->temp_size);
319 		coder->stream_padding += new_padding;
320 
321 		// Set the target position to the beginning of Stream Padding
322 		// that has been observed so far. If all Stream Padding has
323 		// been seen, then the target position will be at the end
324 		// of the Stream Footer field.
325 		coder->file_target_pos -= new_padding;
326 
327 		if (new_padding == coder->temp_size) {
328 			// The whole buffer was padding. Seek backwards in
329 			// the file to get more input.
330 			coder->sequence = SEQ_PADDING_SEEK;
331 			break;
332 		}
333 
334 		// Size of Stream Padding must be a multiple of 4 bytes.
335 		if (coder->stream_padding & 3)
336 			return LZMA_DATA_ERROR;
337 
338 		coder->sequence = SEQ_FOOTER;
339 
340 		// Calculate the amount of non-padding data in coder->temp.
341 		coder->temp_size -= new_padding;
342 		coder->temp_pos = coder->temp_size;
343 
344 		// We can avoid an external seek if the whole Stream Footer
345 		// is already in coder->temp. In that case SEQ_FOOTER won't
346 		// read more input and will find the Stream Footer from
347 		// coder->temp[coder->temp_size - LZMA_STREAM_HEADER_SIZE].
348 		//
349 		// Otherwise we will need to seek. The seeking is done so
350 		// that Stream Footer will be at the end of coder->temp.
351 		// This way it's likely that we also get a complete Index
352 		// field into coder->temp without needing a separate seek
353 		// for that (unless the Index field is big).
354 		if (coder->temp_size < LZMA_STREAM_HEADER_SIZE)
355 			return_if_error(reverse_seek(
356 					coder, in_start, in_pos, in_size));
357 
358 		FALLTHROUGH;
359 	}
360 
361 	case SEQ_FOOTER:
362 		// Copy the Stream Footer field into coder->temp.
363 		// If Stream Footer was already available in coder->temp
364 		// in SEQ_PADDING_DECODE, then this does nothing.
365 		if (fill_temp(coder, in, in_pos, in_size))
366 			return LZMA_OK;
367 
368 		// Make coder->file_target_pos and coder->temp_size point
369 		// to the beginning of Stream Footer and thus to the end
370 		// of the Index field. coder->temp_pos will be updated
371 		// a bit later.
372 		coder->file_target_pos -= LZMA_STREAM_HEADER_SIZE;
373 		coder->temp_size -= LZMA_STREAM_HEADER_SIZE;
374 
375 		// Decode Stream Footer.
376 		return_if_error(hide_format_error(lzma_stream_footer_decode(
377 				&coder->footer_flags,
378 				coder->temp + coder->temp_size)));
379 
380 		// Check that we won't seek past the beginning of the file.
381 		//
382 		// LZMA_STREAM_HEADER_SIZE is added because there must be
383 		// space for Stream Header too even though we won't seek
384 		// there before decoding the Index field.
385 		//
386 		// There's no risk of integer overflow here because
387 		// Backward Size cannot be greater than 2^34.
388 		if (coder->file_target_pos < coder->footer_flags.backward_size
389 				+ LZMA_STREAM_HEADER_SIZE)
390 			return LZMA_DATA_ERROR;
391 
392 		// Set the target position to the beginning of the Index field.
393 		coder->file_target_pos -= coder->footer_flags.backward_size;
394 		coder->sequence = SEQ_INDEX_INIT;
395 
396 		// We can avoid an external seek if the whole Index field is
397 		// already available in coder->temp.
398 		if (coder->temp_size >= coder->footer_flags.backward_size) {
399 			// Set coder->temp_pos to point to the beginning
400 			// of the Index.
401 			coder->temp_pos = coder->temp_size
402 					- coder->footer_flags.backward_size;
403 		} else {
404 			// These are set to zero to indicate that there's no
405 			// useful data (Index or anything else) in coder->temp.
406 			coder->temp_pos = 0;
407 			coder->temp_size = 0;
408 
409 			// Seek to the beginning of the Index field.
410 			if (seek_to_pos(coder, coder->file_target_pos,
411 					in_start, in_pos, in_size))
412 				return LZMA_SEEK_NEEDED;
413 		}
414 
415 		FALLTHROUGH;
416 
417 	case SEQ_INDEX_INIT: {
418 		// Calculate the amount of memory already used by the earlier
419 		// Indexes so that we know how big memory limit to pass to
420 		// the Index decoder.
421 		//
422 		// NOTE: When there are multiple Streams, the separate
423 		// lzma_index structures can use more RAM (as measured by
424 		// lzma_index_memused()) than the final combined lzma_index.
425 		// Thus memlimit may need to be slightly higher than the final
426 		// calculated memory usage will be. This is perhaps a bit
427 		// confusing to the application, but I think it shouldn't
428 		// cause problems in practice.
429 		uint64_t memused = 0;
430 		if (coder->combined_index != NULL) {
431 			memused = lzma_index_memused(coder->combined_index);
432 			assert(memused <= coder->memlimit);
433 			if (memused > coder->memlimit) // Extra sanity check
434 				return LZMA_PROG_ERROR;
435 		}
436 
437 		// Initialize the Index decoder.
438 		return_if_error(lzma_index_decoder_init(
439 				&coder->index_decoder, allocator,
440 				&coder->this_index,
441 				coder->memlimit - memused));
442 
443 		coder->index_remaining = coder->footer_flags.backward_size;
444 		coder->sequence = SEQ_INDEX_DECODE;
445 		FALLTHROUGH;
446 	}
447 
448 	case SEQ_INDEX_DECODE: {
449 		// Decode (a part of) the Index. If the whole Index is already
450 		// in coder->temp, read it from there. Otherwise read from
451 		// in[*in_pos] onwards. Note that index_decode() updates
452 		// coder->index_remaining and optionally coder->file_cur_pos.
453 		lzma_ret ret;
454 		if (coder->temp_size != 0) {
455 			assert(coder->temp_size - coder->temp_pos
456 					== coder->index_remaining);
457 			ret = decode_index(coder, allocator, coder->temp,
458 					&coder->temp_pos, coder->temp_size,
459 					false);
460 		} else {
461 			// Don't give the decoder more input than the known
462 			// remaining size of the Index field.
463 			size_t in_stop = in_size;
464 			if (in_size - *in_pos > coder->index_remaining)
465 				in_stop = *in_pos
466 					+ (size_t)(coder->index_remaining);
467 
468 			ret = decode_index(coder, allocator,
469 					in, in_pos, in_stop, true);
470 		}
471 
472 		switch (ret) {
473 		case LZMA_OK:
474 			// If the Index docoder asks for more input when we
475 			// have already given it as much input as Backward Size
476 			// indicated, the file is invalid.
477 			if (coder->index_remaining == 0)
478 				return LZMA_DATA_ERROR;
479 
480 			// We cannot get here if we were reading Index from
481 			// coder->temp because when reading from coder->temp
482 			// we give the Index decoder exactly
483 			// coder->index_remaining bytes of input.
484 			assert(coder->temp_size == 0);
485 
486 			return LZMA_OK;
487 
488 		case LZMA_STREAM_END:
489 			// If the decoding seems to be successful, check also
490 			// that the Index decoder consumed as much input as
491 			// indicated by the Backward Size field.
492 			if (coder->index_remaining != 0)
493 				return LZMA_DATA_ERROR;
494 
495 			break;
496 
497 		default:
498 			return ret;
499 		}
500 
501 		// Calculate how much the Index tells us to seek backwards
502 		// (relative to the beginning of the Index): Total size of
503 		// all Blocks plus the size of the Stream Header field.
504 		// No integer overflow here because lzma_index_total_size()
505 		// cannot return a value greater than LZMA_VLI_MAX.
506 		const uint64_t seek_amount
507 				= lzma_index_total_size(coder->this_index)
508 					+ LZMA_STREAM_HEADER_SIZE;
509 
510 		// Check that Index is sane in sense that seek_amount won't
511 		// make us seek past the beginning of the file when locating
512 		// the Stream Header.
513 		//
514 		// coder->file_target_pos still points to the beginning of
515 		// the Index field.
516 		if (coder->file_target_pos < seek_amount)
517 			return LZMA_DATA_ERROR;
518 
519 		// Set the target to the beginning of Stream Header.
520 		coder->file_target_pos -= seek_amount;
521 
522 		if (coder->file_target_pos == 0) {
523 			// We would seek to the beginning of the file, but
524 			// since we already decoded that Stream Header in
525 			// SEQ_MAGIC_BYTES, we can use the cached value from
526 			// coder->first_header_flags to avoid the seek.
527 			coder->header_flags = coder->first_header_flags;
528 			coder->sequence = SEQ_HEADER_COMPARE;
529 			break;
530 		}
531 
532 		coder->sequence = SEQ_HEADER_DECODE;
533 
534 		// Make coder->file_target_pos point to the end of
535 		// the Stream Header field.
536 		coder->file_target_pos += LZMA_STREAM_HEADER_SIZE;
537 
538 		// If coder->temp_size is non-zero, it points to the end
539 		// of the Index field. Then the beginning of the Index
540 		// field is at coder->temp[coder->temp_size
541 		// - coder->footer_flags.backward_size].
542 		assert(coder->temp_size == 0 || coder->temp_size
543 				>= coder->footer_flags.backward_size);
544 
545 		// If coder->temp contained the whole Index, see if it has
546 		// enough data to contain also the Stream Header. If so,
547 		// we avoid an external seek.
548 		//
549 		// NOTE: This can happen only with small .xz files and only
550 		// for the non-first Stream as the Stream Flags of the first
551 		// Stream are cached and already handled a few lines above.
552 		// So this isn't as useful as the other seek-avoidance cases.
553 		if (coder->temp_size != 0 && coder->temp_size
554 				- coder->footer_flags.backward_size
555 				>= seek_amount) {
556 			// Make temp_pos and temp_size point to the *end* of
557 			// Stream Header so that SEQ_HEADER_DECODE will find
558 			// the start of Stream Header from coder->temp[
559 			// coder->temp_size - LZMA_STREAM_HEADER_SIZE].
560 			coder->temp_pos = coder->temp_size
561 					- coder->footer_flags.backward_size
562 					- seek_amount
563 					+ LZMA_STREAM_HEADER_SIZE;
564 			coder->temp_size = coder->temp_pos;
565 		} else {
566 			// Seek so that Stream Header will be at the end of
567 			// coder->temp. With typical multi-Stream files we
568 			// will usually also get the Stream Footer and Index
569 			// of the *previous* Stream in coder->temp and thus
570 			// won't need a separate seek for them.
571 			return_if_error(reverse_seek(coder,
572 					in_start, in_pos, in_size));
573 		}
574 
575 		FALLTHROUGH;
576 	}
577 
578 	case SEQ_HEADER_DECODE:
579 		// Copy the Stream Header field into coder->temp.
580 		// If Stream Header was already available in coder->temp
581 		// in SEQ_INDEX_DECODE, then this does nothing.
582 		if (fill_temp(coder, in, in_pos, in_size))
583 			return LZMA_OK;
584 
585 		// Make all these point to the beginning of Stream Header.
586 		coder->file_target_pos -= LZMA_STREAM_HEADER_SIZE;
587 		coder->temp_size -= LZMA_STREAM_HEADER_SIZE;
588 		coder->temp_pos = coder->temp_size;
589 
590 		// Decode the Stream Header.
591 		return_if_error(hide_format_error(lzma_stream_header_decode(
592 				&coder->header_flags,
593 				coder->temp + coder->temp_size)));
594 
595 		coder->sequence = SEQ_HEADER_COMPARE;
596 		FALLTHROUGH;
597 
598 	case SEQ_HEADER_COMPARE:
599 		// Compare Stream Header against Stream Footer. They must
600 		// match.
601 		return_if_error(lzma_stream_flags_compare(
602 				&coder->header_flags, &coder->footer_flags));
603 
604 		// Store the decoded Stream Flags into the Index. Use the
605 		// Footer Flags because it contains Backward Size, although
606 		// it shouldn't matter in practice.
607 		if (lzma_index_stream_flags(coder->this_index,
608 				&coder->footer_flags) != LZMA_OK)
609 			return LZMA_PROG_ERROR;
610 
611 		// Store also the size of the Stream Padding field. It is
612 		// needed to calculate the offsets of the Streams correctly.
613 		if (lzma_index_stream_padding(coder->this_index,
614 				coder->stream_padding) != LZMA_OK)
615 			return LZMA_PROG_ERROR;
616 
617 		// Reset it so that it's ready for the next Stream.
618 		coder->stream_padding = 0;
619 
620 		// Append the earlier decoded Indexes after this_index.
621 		if (coder->combined_index != NULL)
622 			return_if_error(lzma_index_cat(coder->this_index,
623 					coder->combined_index, allocator));
624 
625 		coder->combined_index = coder->this_index;
626 		coder->this_index = NULL;
627 
628 		// If the whole file was decoded, tell the caller that we
629 		// are finished.
630 		if (coder->file_target_pos == 0) {
631 			// The combined index must indicate the same file
632 			// size as was told to us at initialization.
633 			assert(lzma_index_file_size(coder->combined_index)
634 					== coder->file_size);
635 
636 			// Make the combined index available to
637 			// the application.
638 			*coder->dest_index = coder->combined_index;
639 			coder->combined_index = NULL;
640 
641 			// Mark the input buffer as used since we may have
642 			// done internal seeking and thus don't know how
643 			// many input bytes were actually used. This way
644 			// lzma_stream.total_in gets a slightly better
645 			// estimate of the amount of input used.
646 			*in_pos = in_size;
647 			return LZMA_STREAM_END;
648 		}
649 
650 		// We didn't hit the beginning of the file yet, so continue
651 		// reading backwards in the file. If we have unprocessed
652 		// data in coder->temp, use it before requesting more data
653 		// from the application.
654 		//
655 		// coder->file_target_pos, coder->temp_size, and
656 		// coder->temp_pos all point to the beginning of Stream Header
657 		// and thus the end of the previous Stream in the file.
658 		coder->sequence = coder->temp_size > 0
659 				? SEQ_PADDING_DECODE : SEQ_PADDING_SEEK;
660 		break;
661 
662 	default:
663 		assert(0);
664 		return LZMA_PROG_ERROR;
665 	}
666 }
667 
668 
669 static lzma_ret
file_info_decoder_memconfig(void * coder_ptr,uint64_t * memusage,uint64_t * old_memlimit,uint64_t new_memlimit)670 file_info_decoder_memconfig(void *coder_ptr, uint64_t *memusage,
671 		uint64_t *old_memlimit, uint64_t new_memlimit)
672 {
673 	lzma_file_info_coder *coder = coder_ptr;
674 
675 	// The memory usage calculation comes from three things:
676 	//
677 	// (1) The Indexes that have already been decoded and processed into
678 	//     coder->combined_index.
679 	//
680 	// (2) The latest Index in coder->this_index that has been decoded but
681 	//     not yet put into coder->combined_index.
682 	//
683 	// (3) The latest Index that we have started decoding but haven't
684 	//     finished and thus isn't available in coder->this_index yet.
685 	//     Memory usage and limit information needs to be communicated
686 	//     from/to coder->index_decoder.
687 	//
688 	// Care has to be taken to not do both (2) and (3) when calculating
689 	// the memory usage.
690 	uint64_t combined_index_memusage = 0;
691 	uint64_t this_index_memusage = 0;
692 
693 	// (1) If we have already successfully decoded one or more Indexes,
694 	// get their memory usage.
695 	if (coder->combined_index != NULL)
696 		combined_index_memusage = lzma_index_memused(
697 				coder->combined_index);
698 
699 	// Choose between (2), (3), or neither.
700 	if (coder->this_index != NULL) {
701 		// (2) The latest Index is available. Use its memory usage.
702 		this_index_memusage = lzma_index_memused(coder->this_index);
703 
704 	} else if (coder->sequence == SEQ_INDEX_DECODE) {
705 		// (3) The Index decoder is activate and hasn't yet stored
706 		// the new index in coder->this_index. Get the memory usage
707 		// information from the Index decoder.
708 		//
709 		// NOTE: If the Index decoder doesn't yet know how much memory
710 		// it will eventually need, it will return a tiny value here.
711 		uint64_t dummy;
712 		if (coder->index_decoder.memconfig(coder->index_decoder.coder,
713 					&this_index_memusage, &dummy, 0)
714 				!= LZMA_OK) {
715 			assert(0);
716 			return LZMA_PROG_ERROR;
717 		}
718 	}
719 
720 	// Now we know the total memory usage/requirement. If we had neither
721 	// old Indexes nor a new Index, this will be zero which isn't
722 	// acceptable as lzma_memusage() has to return non-zero on success
723 	// and even with an empty .xz file we will end up with a lzma_index
724 	// that takes some memory.
725 	*memusage = combined_index_memusage + this_index_memusage;
726 	if (*memusage == 0)
727 		*memusage = lzma_index_memusage(1, 0);
728 
729 	*old_memlimit = coder->memlimit;
730 
731 	// If requested, set a new memory usage limit.
732 	if (new_memlimit != 0) {
733 		if (new_memlimit < *memusage)
734 			return LZMA_MEMLIMIT_ERROR;
735 
736 		// In the condition (3) we need to tell the Index decoder
737 		// its new memory usage limit.
738 		if (coder->this_index == NULL
739 				&& coder->sequence == SEQ_INDEX_DECODE) {
740 			const uint64_t idec_new_memlimit = new_memlimit
741 					- combined_index_memusage;
742 
743 			assert(this_index_memusage > 0);
744 			assert(idec_new_memlimit > 0);
745 
746 			uint64_t dummy1;
747 			uint64_t dummy2;
748 
749 			if (coder->index_decoder.memconfig(
750 					coder->index_decoder.coder,
751 					&dummy1, &dummy2, idec_new_memlimit)
752 					!= LZMA_OK) {
753 				assert(0);
754 				return LZMA_PROG_ERROR;
755 			}
756 		}
757 
758 		coder->memlimit = new_memlimit;
759 	}
760 
761 	return LZMA_OK;
762 }
763 
764 
765 static void
file_info_decoder_end(void * coder_ptr,const lzma_allocator * allocator)766 file_info_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
767 {
768 	lzma_file_info_coder *coder = coder_ptr;
769 
770 	lzma_next_end(&coder->index_decoder, allocator);
771 	lzma_index_end(coder->this_index, allocator);
772 	lzma_index_end(coder->combined_index, allocator);
773 
774 	lzma_free(coder, allocator);
775 	return;
776 }
777 
778 
779 static lzma_ret
lzma_file_info_decoder_init(lzma_next_coder * next,const lzma_allocator * allocator,uint64_t * seek_pos,lzma_index ** dest_index,uint64_t memlimit,uint64_t file_size)780 lzma_file_info_decoder_init(lzma_next_coder *next,
781 		const lzma_allocator *allocator, uint64_t *seek_pos,
782 		lzma_index **dest_index,
783 		uint64_t memlimit, uint64_t file_size)
784 {
785 	lzma_next_coder_init(&lzma_file_info_decoder_init, next, allocator);
786 
787 	if (dest_index == NULL)
788 		return LZMA_PROG_ERROR;
789 
790 	lzma_file_info_coder *coder = next->coder;
791 	if (coder == NULL) {
792 		coder = lzma_alloc(sizeof(lzma_file_info_coder), allocator);
793 		if (coder == NULL)
794 			return LZMA_MEM_ERROR;
795 
796 		next->coder = coder;
797 		next->code = &file_info_decode;
798 		next->end = &file_info_decoder_end;
799 		next->memconfig = &file_info_decoder_memconfig;
800 
801 		coder->index_decoder = LZMA_NEXT_CODER_INIT;
802 		coder->this_index = NULL;
803 		coder->combined_index = NULL;
804 	}
805 
806 	coder->sequence = SEQ_MAGIC_BYTES;
807 	coder->file_cur_pos = 0;
808 	coder->file_target_pos = 0;
809 	coder->file_size = file_size;
810 
811 	lzma_index_end(coder->this_index, allocator);
812 	coder->this_index = NULL;
813 
814 	lzma_index_end(coder->combined_index, allocator);
815 	coder->combined_index = NULL;
816 
817 	coder->stream_padding = 0;
818 
819 	coder->dest_index = dest_index;
820 	coder->external_seek_pos = seek_pos;
821 
822 	// If memlimit is 0, make it 1 to ensure that lzma_memlimit_get()
823 	// won't return 0 (which would indicate an error).
824 	coder->memlimit = my_max(1, memlimit);
825 
826 	// Prepare these for reading the first Stream Header into coder->temp.
827 	coder->temp_pos = 0;
828 	coder->temp_size = LZMA_STREAM_HEADER_SIZE;
829 
830 	return LZMA_OK;
831 }
832 
833 
834 extern LZMA_API(lzma_ret)
lzma_file_info_decoder(lzma_stream * strm,lzma_index ** dest_index,uint64_t memlimit,uint64_t file_size)835 lzma_file_info_decoder(lzma_stream *strm, lzma_index **dest_index,
836 		uint64_t memlimit, uint64_t file_size)
837 {
838 	lzma_next_strm_init(lzma_file_info_decoder_init, strm, &strm->seek_pos,
839 			dest_index, memlimit, file_size);
840 
841 	// We allow LZMA_FINISH in addition to LZMA_RUN for convenience.
842 	// lzma_code() is able to handle the LZMA_FINISH + LZMA_SEEK_NEEDED
843 	// combination in a sane way. Applications still need to be careful
844 	// if they use LZMA_FINISH so that they remember to reset it back
845 	// to LZMA_RUN after seeking if needed.
846 	strm->internal->supported_actions[LZMA_RUN] = true;
847 	strm->internal->supported_actions[LZMA_FINISH] = true;
848 
849 	return LZMA_OK;
850 }
851