xref: /freebsd/contrib/xz/src/liblzma/common/file_info.c (revision ba3c1f5972d7b90feb6e6da47905ff2757e0fe57)
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       file_info.c
4 /// \brief      Decode .xz file information into a lzma_index structure
5 //
6 //  Author:     Lasse Collin
7 //
8 //  This file has been put into the public domain.
9 //  You can do whatever you want with this file.
10 //
11 ///////////////////////////////////////////////////////////////////////////////
12 
13 #include "index_decoder.h"
14 
15 
16 typedef struct {
17 	enum {
18 		SEQ_MAGIC_BYTES,
19 		SEQ_PADDING_SEEK,
20 		SEQ_PADDING_DECODE,
21 		SEQ_FOOTER,
22 		SEQ_INDEX_INIT,
23 		SEQ_INDEX_DECODE,
24 		SEQ_HEADER_DECODE,
25 		SEQ_HEADER_COMPARE,
26 	} sequence;
27 
28 	/// Absolute position of in[*in_pos] in the file. All code that
29 	/// modifies *in_pos also updates this. seek_to_pos() needs this
30 	/// to determine if we need to request the application to seek for
31 	/// us or if we can do the seeking internally by adjusting *in_pos.
32 	uint64_t file_cur_pos;
33 
34 	/// This refers to absolute positions of interesting parts of the
35 	/// input file. Sometimes it points to the *beginning* of a specific
36 	/// field and sometimes to the *end* of a field. The current target
37 	/// position at each moment is explained in the comments.
38 	uint64_t file_target_pos;
39 
40 	/// Size of the .xz file (from the application).
41 	uint64_t file_size;
42 
43 	/// Index decoder
44 	lzma_next_coder index_decoder;
45 
46 	/// Number of bytes remaining in the Index field that is currently
47 	/// being decoded.
48 	lzma_vli index_remaining;
49 
50 	/// The Index decoder will store the decoded Index in this pointer.
51 	lzma_index *this_index;
52 
53 	/// Amount of Stream Padding in the current Stream.
54 	lzma_vli stream_padding;
55 
56 	/// The final combined index is collected here.
57 	lzma_index *combined_index;
58 
59 	/// Pointer from the application where to store the index information
60 	/// after successful decoding.
61 	lzma_index **dest_index;
62 
63 	/// Pointer to lzma_stream.seek_pos to be used when returning
64 	/// LZMA_SEEK_NEEDED. This is set by seek_to_pos() when needed.
65 	uint64_t *external_seek_pos;
66 
67 	/// Memory usage limit
68 	uint64_t memlimit;
69 
70 	/// Stream Flags from the very beginning of the file.
71 	lzma_stream_flags first_header_flags;
72 
73 	/// Stream Flags from Stream Header of the current Stream.
74 	lzma_stream_flags header_flags;
75 
76 	/// Stream Flags from Stream Footer of the current Stream.
77 	lzma_stream_flags footer_flags;
78 
79 	size_t temp_pos;
80 	size_t temp_size;
81 	uint8_t temp[8192];
82 
83 } lzma_file_info_coder;
84 
85 
86 /// Copies data from in[*in_pos] into coder->temp until
87 /// coder->temp_pos == coder->temp_size. This also keeps coder->file_cur_pos
88 /// in sync with *in_pos. Returns true if more input is needed.
89 static bool
90 fill_temp(lzma_file_info_coder *coder, const uint8_t *restrict in,
91 		size_t *restrict in_pos, size_t in_size)
92 {
93 	coder->file_cur_pos += lzma_bufcpy(in, in_pos, in_size,
94 			coder->temp, &coder->temp_pos, coder->temp_size);
95 	return coder->temp_pos < coder->temp_size;
96 }
97 
98 
99 /// Seeks to the absolute file position specified by target_pos.
100 /// This tries to do the seeking by only modifying *in_pos, if possible.
101 /// The main benefit of this is that if one passes the whole file at once
102 /// to lzma_code(), the decoder will never need to return LZMA_SEEK_NEEDED
103 /// as all the seeking can be done by adjusting *in_pos in this function.
104 ///
105 /// Returns true if an external seek is needed and the caller must return
106 /// LZMA_SEEK_NEEDED.
107 static bool
108 seek_to_pos(lzma_file_info_coder *coder, uint64_t target_pos,
109 		size_t in_start, size_t *in_pos, size_t in_size)
110 {
111 	// The input buffer doesn't extend beyond the end of the file.
112 	// This has been checked by file_info_decode() already.
113 	assert(coder->file_size - coder->file_cur_pos >= in_size - *in_pos);
114 
115 	const uint64_t pos_min = coder->file_cur_pos - (*in_pos - in_start);
116 	const uint64_t pos_max = coder->file_cur_pos + (in_size - *in_pos);
117 
118 	bool external_seek_needed;
119 
120 	if (target_pos >= pos_min && target_pos <= pos_max) {
121 		// The requested position is available in the current input
122 		// buffer or right after it. That is, in a corner case we
123 		// end up setting *in_pos == in_size and thus will immediately
124 		// need new input bytes from the application.
125 		*in_pos += (size_t)(target_pos - coder->file_cur_pos);
126 		external_seek_needed = false;
127 	} else {
128 		// Ask the application to seek the input file.
129 		*coder->external_seek_pos = target_pos;
130 		external_seek_needed = true;
131 
132 		// Mark the whole input buffer as used. This way
133 		// lzma_stream.total_in will have a better estimate
134 		// of the amount of data read. It still won't be perfect
135 		// as the value will depend on the input buffer size that
136 		// the application uses, but it should be good enough for
137 		// those few who want an estimate.
138 		*in_pos = in_size;
139 	}
140 
141 	// After seeking (internal or external) the current position
142 	// will match the requested target position.
143 	coder->file_cur_pos = target_pos;
144 
145 	return external_seek_needed;
146 }
147 
148 
149 /// The caller sets coder->file_target_pos so that it points to the *end*
150 /// of the desired file position. This function then determines how far
151 /// backwards from that position we can seek. After seeking fill_temp()
152 /// can be used to read data into coder->temp. When fill_temp() has finished,
153 /// coder->temp[coder->temp_size] will match coder->file_target_pos.
154 ///
155 /// This also validates that coder->target_file_pos is sane in sense that
156 /// we aren't trying to seek too far backwards (too close or beyond the
157 /// beginning of the file).
158 static lzma_ret
159 reverse_seek(lzma_file_info_coder *coder,
160 		size_t in_start, size_t *in_pos, size_t in_size)
161 {
162 	// Check that there is enough data before the target position
163 	// to contain at least Stream Header and Stream Footer. If there
164 	// isn't, the file cannot be valid.
165 	if (coder->file_target_pos < 2 * LZMA_STREAM_HEADER_SIZE)
166 		return LZMA_DATA_ERROR;
167 
168 	coder->temp_pos = 0;
169 
170 	// The Stream Header at the very beginning of the file gets handled
171 	// specially in SEQ_MAGIC_BYTES and thus we will never need to seek
172 	// there. By not seeking to the first LZMA_STREAM_HEADER_SIZE bytes
173 	// we avoid a useless external seek after SEQ_MAGIC_BYTES if the
174 	// application uses an extremely small input buffer and the input
175 	// file is very small.
176 	if (coder->file_target_pos - LZMA_STREAM_HEADER_SIZE
177 			< sizeof(coder->temp))
178 		coder->temp_size = (size_t)(coder->file_target_pos
179 				- LZMA_STREAM_HEADER_SIZE);
180 	else
181 		coder->temp_size = sizeof(coder->temp);
182 
183 	// The above if-statements guarantee this. This is important because
184 	// the Stream Header/Footer decoders assume that there's at least
185 	// LZMA_STREAM_HEADER_SIZE bytes in coder->temp.
186 	assert(coder->temp_size >= LZMA_STREAM_HEADER_SIZE);
187 
188 	if (seek_to_pos(coder, coder->file_target_pos - coder->temp_size,
189 			in_start, in_pos, in_size))
190 		return LZMA_SEEK_NEEDED;
191 
192 	return LZMA_OK;
193 }
194 
195 
196 /// Gets the number of zero-bytes at the end of the buffer.
197 static size_t
198 get_padding_size(const uint8_t *buf, size_t buf_size)
199 {
200 	size_t padding = 0;
201 	while (buf_size > 0 && buf[--buf_size] == 0x00)
202 		++padding;
203 
204 	return padding;
205 }
206 
207 
208 /// With the Stream Header at the very beginning of the file, LZMA_FORMAT_ERROR
209 /// is used to tell the application that Magic Bytes didn't match. In other
210 /// Stream Header/Footer fields (in the middle/end of the file) it could be
211 /// a bit confusing to return LZMA_FORMAT_ERROR as we already know that there
212 /// is a valid Stream Header at the beginning of the file. For those cases
213 /// this function is used to convert LZMA_FORMAT_ERROR to LZMA_DATA_ERROR.
214 static lzma_ret
215 hide_format_error(lzma_ret ret)
216 {
217 	if (ret == LZMA_FORMAT_ERROR)
218 		ret = LZMA_DATA_ERROR;
219 
220 	return ret;
221 }
222 
223 
224 /// Calls the Index decoder and updates coder->index_remaining.
225 /// This is a separate function because the input can be either directly
226 /// from the application or from coder->temp.
227 static lzma_ret
228 decode_index(lzma_file_info_coder *coder, const lzma_allocator *allocator,
229 		const uint8_t *restrict in, size_t *restrict in_pos,
230 		size_t in_size, bool update_file_cur_pos)
231 {
232 	const size_t in_start = *in_pos;
233 
234 	const lzma_ret ret = coder->index_decoder.code(
235 			coder->index_decoder.coder,
236 			allocator, in, in_pos, in_size,
237 			NULL, NULL, 0, LZMA_RUN);
238 
239 	coder->index_remaining -= *in_pos - in_start;
240 
241 	if (update_file_cur_pos)
242 		coder->file_cur_pos += *in_pos - in_start;
243 
244 	return ret;
245 }
246 
247 
248 static lzma_ret
249 file_info_decode(void *coder_ptr, const lzma_allocator *allocator,
250 		const uint8_t *restrict in, size_t *restrict in_pos,
251 		size_t in_size,
252 		uint8_t *restrict out lzma_attribute((__unused__)),
253 		size_t *restrict out_pos lzma_attribute((__unused__)),
254 		size_t out_size lzma_attribute((__unused__)),
255 		lzma_action action lzma_attribute((__unused__)))
256 {
257 	lzma_file_info_coder *coder = coder_ptr;
258 	const size_t in_start = *in_pos;
259 
260 	// If the caller provides input past the end of the file, trim
261 	// the extra bytes from the buffer so that we won't read too far.
262 	assert(coder->file_size >= coder->file_cur_pos);
263 	if (coder->file_size - coder->file_cur_pos < in_size - in_start)
264 		in_size = in_start
265 			+ (size_t)(coder->file_size - coder->file_cur_pos);
266 
267 	while (true)
268 	switch (coder->sequence) {
269 	case SEQ_MAGIC_BYTES:
270 		// Decode the Stream Header at the beginning of the file
271 		// first to check if the Magic Bytes match. The flags
272 		// are stored in coder->first_header_flags so that we
273 		// don't need to seek to it again.
274 		//
275 		// Check that the file is big enough to contain at least
276 		// Stream Header.
277 		if (coder->file_size < LZMA_STREAM_HEADER_SIZE)
278 			return LZMA_FORMAT_ERROR;
279 
280 		// Read the Stream Header field into coder->temp.
281 		if (fill_temp(coder, in, in_pos, in_size))
282 			return LZMA_OK;
283 
284 		// This is the only Stream Header/Footer decoding where we
285 		// want to return LZMA_FORMAT_ERROR if the Magic Bytes don't
286 		// match. Elsewhere it will be converted to LZMA_DATA_ERROR.
287 		return_if_error(lzma_stream_header_decode(
288 				&coder->first_header_flags, coder->temp));
289 
290 		// Now that we know that the Magic Bytes match, check the
291 		// file size. It's better to do this here after checking the
292 		// Magic Bytes since this way we can give LZMA_FORMAT_ERROR
293 		// instead of LZMA_DATA_ERROR when the Magic Bytes don't
294 		// match in a file that is too big or isn't a multiple of
295 		// four bytes.
296 		if (coder->file_size > LZMA_VLI_MAX || (coder->file_size & 3))
297 			return LZMA_DATA_ERROR;
298 
299 		// Start looking for Stream Padding and Stream Footer
300 		// at the end of the file.
301 		coder->file_target_pos = coder->file_size;
302 
303 	// Fall through
304 
305 	case SEQ_PADDING_SEEK:
306 		coder->sequence = SEQ_PADDING_DECODE;
307 		return_if_error(reverse_seek(
308 				coder, in_start, in_pos, in_size));
309 
310 	// Fall through
311 
312 	case SEQ_PADDING_DECODE: {
313 		// Copy to coder->temp first. This keeps the code simpler if
314 		// the application only provides input a few bytes at a time.
315 		if (fill_temp(coder, in, in_pos, in_size))
316 			return LZMA_OK;
317 
318 		// Scan the buffer backwards to get the size of the
319 		// Stream Padding field (if any).
320 		const size_t new_padding = get_padding_size(
321 				coder->temp, coder->temp_size);
322 		coder->stream_padding += new_padding;
323 
324 		// Set the target position to the beginning of Stream Padding
325 		// that has been observed so far. If all Stream Padding has
326 		// been seen, then the target position will be at the end
327 		// of the Stream Footer field.
328 		coder->file_target_pos -= new_padding;
329 
330 		if (new_padding == coder->temp_size) {
331 			// The whole buffer was padding. Seek backwards in
332 			// the file to get more input.
333 			coder->sequence = SEQ_PADDING_SEEK;
334 			break;
335 		}
336 
337 		// Size of Stream Padding must be a multiple of 4 bytes.
338 		if (coder->stream_padding & 3)
339 			return LZMA_DATA_ERROR;
340 
341 		coder->sequence = SEQ_FOOTER;
342 
343 		// Calculate the amount of non-padding data in coder->temp.
344 		coder->temp_size -= new_padding;
345 		coder->temp_pos = coder->temp_size;
346 
347 		// We can avoid an external seek if the whole Stream Footer
348 		// is already in coder->temp. In that case SEQ_FOOTER won't
349 		// read more input and will find the Stream Footer from
350 		// coder->temp[coder->temp_size - LZMA_STREAM_HEADER_SIZE].
351 		//
352 		// Otherwise we will need to seek. The seeking is done so
353 		// that Stream Footer will be at the end of coder->temp.
354 		// This way it's likely that we also get a complete Index
355 		// field into coder->temp without needing a separate seek
356 		// for that (unless the Index field is big).
357 		if (coder->temp_size < LZMA_STREAM_HEADER_SIZE)
358 			return_if_error(reverse_seek(
359 					coder, in_start, in_pos, in_size));
360 	}
361 
362 	// Fall through
363 
364 	case SEQ_FOOTER:
365 		// Copy the Stream Footer field into coder->temp.
366 		// If Stream Footer was already available in coder->temp
367 		// in SEQ_PADDING_DECODE, then this does nothing.
368 		if (fill_temp(coder, in, in_pos, in_size))
369 			return LZMA_OK;
370 
371 		// Make coder->file_target_pos and coder->temp_size point
372 		// to the beginning of Stream Footer and thus to the end
373 		// of the Index field. coder->temp_pos will be updated
374 		// a bit later.
375 		coder->file_target_pos -= LZMA_STREAM_HEADER_SIZE;
376 		coder->temp_size -= LZMA_STREAM_HEADER_SIZE;
377 
378 		// Decode Stream Footer.
379 		return_if_error(hide_format_error(lzma_stream_footer_decode(
380 				&coder->footer_flags,
381 				coder->temp + coder->temp_size)));
382 
383 		// Check that we won't seek past the beginning of the file.
384 		//
385 		// LZMA_STREAM_HEADER_SIZE is added because there must be
386 		// space for Stream Header too even though we won't seek
387 		// there before decoding the Index field.
388 		//
389 		// There's no risk of integer overflow here because
390 		// Backward Size cannot be greater than 2^34.
391 		if (coder->file_target_pos < coder->footer_flags.backward_size
392 				+ LZMA_STREAM_HEADER_SIZE)
393 			return LZMA_DATA_ERROR;
394 
395 		// Set the target position to the beginning of the Index field.
396 		coder->file_target_pos -= coder->footer_flags.backward_size;
397 		coder->sequence = SEQ_INDEX_INIT;
398 
399 		// We can avoid an external seek if the whole Index field is
400 		// already available in coder->temp.
401 		if (coder->temp_size >= coder->footer_flags.backward_size) {
402 			// Set coder->temp_pos to point to the beginning
403 			// of the Index.
404 			coder->temp_pos = coder->temp_size
405 					- coder->footer_flags.backward_size;
406 		} else {
407 			// These are set to zero to indicate that there's no
408 			// useful data (Index or anything else) in coder->temp.
409 			coder->temp_pos = 0;
410 			coder->temp_size = 0;
411 
412 			// Seek to the beginning of the Index field.
413 			if (seek_to_pos(coder, coder->file_target_pos,
414 					in_start, in_pos, in_size))
415 				return LZMA_SEEK_NEEDED;
416 		}
417 
418 	// Fall through
419 
420 	case SEQ_INDEX_INIT: {
421 		// Calculate the amount of memory already used by the earlier
422 		// Indexes so that we know how big memory limit to pass to
423 		// the Index decoder.
424 		//
425 		// NOTE: When there are multiple Streams, the separate
426 		// lzma_index structures can use more RAM (as measured by
427 		// lzma_index_memused()) than the final combined lzma_index.
428 		// Thus memlimit may need to be slightly higher than the final
429 		// calculated memory usage will be. This is perhaps a bit
430 		// confusing to the application, but I think it shouldn't
431 		// cause problems in practice.
432 		uint64_t memused = 0;
433 		if (coder->combined_index != NULL) {
434 			memused = lzma_index_memused(coder->combined_index);
435 			assert(memused <= coder->memlimit);
436 			if (memused > coder->memlimit) // Extra sanity check
437 				return LZMA_PROG_ERROR;
438 		}
439 
440 		// Initialize the Index decoder.
441 		return_if_error(lzma_index_decoder_init(
442 				&coder->index_decoder, allocator,
443 				&coder->this_index,
444 				coder->memlimit - memused));
445 
446 		coder->index_remaining = coder->footer_flags.backward_size;
447 		coder->sequence = SEQ_INDEX_DECODE;
448 	}
449 
450 	// Fall through
451 
452 	case SEQ_INDEX_DECODE: {
453 		// Decode (a part of) the Index. If the whole Index is already
454 		// in coder->temp, read it from there. Otherwise read from
455 		// in[*in_pos] onwards. Note that index_decode() updates
456 		// coder->index_remaining and optionally coder->file_cur_pos.
457 		lzma_ret ret;
458 		if (coder->temp_size != 0) {
459 			assert(coder->temp_size - coder->temp_pos
460 					== coder->index_remaining);
461 			ret = decode_index(coder, allocator, coder->temp,
462 					&coder->temp_pos, coder->temp_size,
463 					false);
464 		} else {
465 			// Don't give the decoder more input than the known
466 			// remaining size of the Index field.
467 			size_t in_stop = in_size;
468 			if (in_size - *in_pos > coder->index_remaining)
469 				in_stop = *in_pos
470 					+ (size_t)(coder->index_remaining);
471 
472 			ret = decode_index(coder, allocator,
473 					in, in_pos, in_stop, true);
474 		}
475 
476 		switch (ret) {
477 		case LZMA_OK:
478 			// If the Index docoder asks for more input when we
479 			// have already given it as much input as Backward Size
480 			// indicated, the file is invalid.
481 			if (coder->index_remaining == 0)
482 				return LZMA_DATA_ERROR;
483 
484 			// We cannot get here if we were reading Index from
485 			// coder->temp because when reading from coder->temp
486 			// we give the Index decoder exactly
487 			// coder->index_remaining bytes of input.
488 			assert(coder->temp_size == 0);
489 
490 			return LZMA_OK;
491 
492 		case LZMA_STREAM_END:
493 			// If the decoding seems to be successful, check also
494 			// that the Index decoder consumed as much input as
495 			// indicated by the Backward Size field.
496 			if (coder->index_remaining != 0)
497 				return LZMA_DATA_ERROR;
498 
499 			break;
500 
501 		default:
502 			return ret;
503 		}
504 
505 		// Calculate how much the Index tells us to seek backwards
506 		// (relative to the beginning of the Index): Total size of
507 		// all Blocks plus the size of the Stream Header field.
508 		// No integer overflow here because lzma_index_total_size()
509 		// cannot return a value greater than LZMA_VLI_MAX.
510 		const uint64_t seek_amount
511 				= lzma_index_total_size(coder->this_index)
512 					+ LZMA_STREAM_HEADER_SIZE;
513 
514 		// Check that Index is sane in sense that seek_amount won't
515 		// make us seek past the beginning of the file when locating
516 		// the Stream Header.
517 		//
518 		// coder->file_target_pos still points to the beginning of
519 		// the Index field.
520 		if (coder->file_target_pos < seek_amount)
521 			return LZMA_DATA_ERROR;
522 
523 		// Set the target to the beginning of Stream Header.
524 		coder->file_target_pos -= seek_amount;
525 
526 		if (coder->file_target_pos == 0) {
527 			// We would seek to the beginning of the file, but
528 			// since we already decoded that Stream Header in
529 			// SEQ_MAGIC_BYTES, we can use the cached value from
530 			// coder->first_header_flags to avoid the seek.
531 			coder->header_flags = coder->first_header_flags;
532 			coder->sequence = SEQ_HEADER_COMPARE;
533 			break;
534 		}
535 
536 		coder->sequence = SEQ_HEADER_DECODE;
537 
538 		// Make coder->file_target_pos point to the end of
539 		// the Stream Header field.
540 		coder->file_target_pos += LZMA_STREAM_HEADER_SIZE;
541 
542 		// If coder->temp_size is non-zero, it points to the end
543 		// of the Index field. Then the beginning of the Index
544 		// field is at coder->temp[coder->temp_size
545 		// - coder->footer_flags.backward_size].
546 		assert(coder->temp_size == 0 || coder->temp_size
547 				>= coder->footer_flags.backward_size);
548 
549 		// If coder->temp contained the whole Index, see if it has
550 		// enough data to contain also the Stream Header. If so,
551 		// we avoid an external seek.
552 		//
553 		// NOTE: This can happen only with small .xz files and only
554 		// for the non-first Stream as the Stream Flags of the first
555 		// Stream are cached and already handled a few lines above.
556 		// So this isn't as useful as the other seek-avoidance cases.
557 		if (coder->temp_size != 0 && coder->temp_size
558 				- coder->footer_flags.backward_size
559 				>= seek_amount) {
560 			// Make temp_pos and temp_size point to the *end* of
561 			// Stream Header so that SEQ_HEADER_DECODE will find
562 			// the start of Stream Header from coder->temp[
563 			// coder->temp_size - LZMA_STREAM_HEADER_SIZE].
564 			coder->temp_pos = coder->temp_size
565 					- coder->footer_flags.backward_size
566 					- seek_amount
567 					+ LZMA_STREAM_HEADER_SIZE;
568 			coder->temp_size = coder->temp_pos;
569 		} else {
570 			// Seek so that Stream Header will be at the end of
571 			// coder->temp. With typical multi-Stream files we
572 			// will usually also get the Stream Footer and Index
573 			// of the *previous* Stream in coder->temp and thus
574 			// won't need a separate seek for them.
575 			return_if_error(reverse_seek(coder,
576 					in_start, in_pos, in_size));
577 		}
578 	}
579 
580 	// Fall through
581 
582 	case SEQ_HEADER_DECODE:
583 		// Copy the Stream Header field into coder->temp.
584 		// If Stream Header was already available in coder->temp
585 		// in SEQ_INDEX_DECODE, then this does nothing.
586 		if (fill_temp(coder, in, in_pos, in_size))
587 			return LZMA_OK;
588 
589 		// Make all these point to the beginning of Stream Header.
590 		coder->file_target_pos -= LZMA_STREAM_HEADER_SIZE;
591 		coder->temp_size -= LZMA_STREAM_HEADER_SIZE;
592 		coder->temp_pos = coder->temp_size;
593 
594 		// Decode the Stream Header.
595 		return_if_error(hide_format_error(lzma_stream_header_decode(
596 				&coder->header_flags,
597 				coder->temp + coder->temp_size)));
598 
599 		coder->sequence = SEQ_HEADER_COMPARE;
600 
601 	// Fall through
602 
603 	case SEQ_HEADER_COMPARE:
604 		// Compare Stream Header against Stream Footer. They must
605 		// match.
606 		return_if_error(lzma_stream_flags_compare(
607 				&coder->header_flags, &coder->footer_flags));
608 
609 		// Store the decoded Stream Flags into the Index. Use the
610 		// Footer Flags because it contains Backward Size, although
611 		// it shouldn't matter in practice.
612 		if (lzma_index_stream_flags(coder->this_index,
613 				&coder->footer_flags) != LZMA_OK)
614 			return LZMA_PROG_ERROR;
615 
616 		// Store also the size of the Stream Padding field. It is
617 		// needed to calculate the offsets of the Streams correctly.
618 		if (lzma_index_stream_padding(coder->this_index,
619 				coder->stream_padding) != LZMA_OK)
620 			return LZMA_PROG_ERROR;
621 
622 		// Reset it so that it's ready for the next Stream.
623 		coder->stream_padding = 0;
624 
625 		// Append the earlier decoded Indexes after this_index.
626 		if (coder->combined_index != NULL)
627 			return_if_error(lzma_index_cat(coder->this_index,
628 					coder->combined_index, allocator));
629 
630 		coder->combined_index = coder->this_index;
631 		coder->this_index = NULL;
632 
633 		// If the whole file was decoded, tell the caller that we
634 		// are finished.
635 		if (coder->file_target_pos == 0) {
636 			// The combined index must indicate the same file
637 			// size as was told to us at initialization.
638 			assert(lzma_index_file_size(coder->combined_index)
639 					== coder->file_size);
640 
641 			// Make the combined index available to
642 			// the application.
643 			*coder->dest_index = coder->combined_index;
644 			coder->combined_index = NULL;
645 
646 			// Mark the input buffer as used since we may have
647 			// done internal seeking and thus don't know how
648 			// many input bytes were actually used. This way
649 			// lzma_stream.total_in gets a slightly better
650 			// estimate of the amount of input used.
651 			*in_pos = in_size;
652 			return LZMA_STREAM_END;
653 		}
654 
655 		// We didn't hit the beginning of the file yet, so continue
656 		// reading backwards in the file. If we have unprocessed
657 		// data in coder->temp, use it before requesting more data
658 		// from the application.
659 		//
660 		// coder->file_target_pos, coder->temp_size, and
661 		// coder->temp_pos all point to the beginning of Stream Header
662 		// and thus the end of the previous Stream in the file.
663 		coder->sequence = coder->temp_size > 0
664 				? SEQ_PADDING_DECODE : SEQ_PADDING_SEEK;
665 		break;
666 
667 	default:
668 		assert(0);
669 		return LZMA_PROG_ERROR;
670 	}
671 }
672 
673 
674 static lzma_ret
675 file_info_decoder_memconfig(void *coder_ptr, uint64_t *memusage,
676 		uint64_t *old_memlimit, uint64_t new_memlimit)
677 {
678 	lzma_file_info_coder *coder = coder_ptr;
679 
680 	// The memory usage calculation comes from three things:
681 	//
682 	// (1) The Indexes that have already been decoded and processed into
683 	//     coder->combined_index.
684 	//
685 	// (2) The latest Index in coder->this_index that has been decoded but
686 	//     not yet put into coder->combined_index.
687 	//
688 	// (3) The latest Index that we have started decoding but haven't
689 	//     finished and thus isn't available in coder->this_index yet.
690 	//     Memory usage and limit information needs to be communicated
691 	//     from/to coder->index_decoder.
692 	//
693 	// Care has to be taken to not do both (2) and (3) when calculating
694 	// the memory usage.
695 	uint64_t combined_index_memusage = 0;
696 	uint64_t this_index_memusage = 0;
697 
698 	// (1) If we have already successfully decoded one or more Indexes,
699 	// get their memory usage.
700 	if (coder->combined_index != NULL)
701 		combined_index_memusage = lzma_index_memused(
702 				coder->combined_index);
703 
704 	// Choose between (2), (3), or neither.
705 	if (coder->this_index != NULL) {
706 		// (2) The latest Index is available. Use its memory usage.
707 		this_index_memusage = lzma_index_memused(coder->this_index);
708 
709 	} else if (coder->sequence == SEQ_INDEX_DECODE) {
710 		// (3) The Index decoder is activate and hasn't yet stored
711 		// the new index in coder->this_index. Get the memory usage
712 		// information from the Index decoder.
713 		//
714 		// NOTE: If the Index decoder doesn't yet know how much memory
715 		// it will eventually need, it will return a tiny value here.
716 		uint64_t dummy;
717 		if (coder->index_decoder.memconfig(coder->index_decoder.coder,
718 					&this_index_memusage, &dummy, 0)
719 				!= LZMA_OK) {
720 			assert(0);
721 			return LZMA_PROG_ERROR;
722 		}
723 	}
724 
725 	// Now we know the total memory usage/requirement. If we had neither
726 	// old Indexes nor a new Index, this will be zero which isn't
727 	// acceptable as lzma_memusage() has to return non-zero on success
728 	// and even with an empty .xz file we will end up with a lzma_index
729 	// that takes some memory.
730 	*memusage = combined_index_memusage + this_index_memusage;
731 	if (*memusage == 0)
732 		*memusage = lzma_index_memusage(1, 0);
733 
734 	*old_memlimit = coder->memlimit;
735 
736 	// If requested, set a new memory usage limit.
737 	if (new_memlimit != 0) {
738 		if (new_memlimit < *memusage)
739 			return LZMA_MEMLIMIT_ERROR;
740 
741 		// In the condition (3) we need to tell the Index decoder
742 		// its new memory usage limit.
743 		if (coder->this_index == NULL
744 				&& coder->sequence == SEQ_INDEX_DECODE) {
745 			const uint64_t idec_new_memlimit = new_memlimit
746 					- combined_index_memusage;
747 
748 			assert(this_index_memusage > 0);
749 			assert(idec_new_memlimit > 0);
750 
751 			uint64_t dummy1;
752 			uint64_t dummy2;
753 
754 			if (coder->index_decoder.memconfig(
755 					coder->index_decoder.coder,
756 					&dummy1, &dummy2, idec_new_memlimit)
757 					!= LZMA_OK) {
758 				assert(0);
759 				return LZMA_PROG_ERROR;
760 			}
761 		}
762 
763 		coder->memlimit = new_memlimit;
764 	}
765 
766 	return LZMA_OK;
767 }
768 
769 
770 static void
771 file_info_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
772 {
773 	lzma_file_info_coder *coder = coder_ptr;
774 
775 	lzma_next_end(&coder->index_decoder, allocator);
776 	lzma_index_end(coder->this_index, allocator);
777 	lzma_index_end(coder->combined_index, allocator);
778 
779 	lzma_free(coder, allocator);
780 	return;
781 }
782 
783 
784 static lzma_ret
785 lzma_file_info_decoder_init(lzma_next_coder *next,
786 		const lzma_allocator *allocator, uint64_t *seek_pos,
787 		lzma_index **dest_index,
788 		uint64_t memlimit, uint64_t file_size)
789 {
790 	lzma_next_coder_init(&lzma_file_info_decoder_init, next, allocator);
791 
792 	if (dest_index == NULL)
793 		return LZMA_PROG_ERROR;
794 
795 	lzma_file_info_coder *coder = next->coder;
796 	if (coder == NULL) {
797 		coder = lzma_alloc(sizeof(lzma_file_info_coder), allocator);
798 		if (coder == NULL)
799 			return LZMA_MEM_ERROR;
800 
801 		next->coder = coder;
802 		next->code = &file_info_decode;
803 		next->end = &file_info_decoder_end;
804 		next->memconfig = &file_info_decoder_memconfig;
805 
806 		coder->index_decoder = LZMA_NEXT_CODER_INIT;
807 		coder->this_index = NULL;
808 		coder->combined_index = NULL;
809 	}
810 
811 	coder->sequence = SEQ_MAGIC_BYTES;
812 	coder->file_cur_pos = 0;
813 	coder->file_target_pos = 0;
814 	coder->file_size = file_size;
815 
816 	lzma_index_end(coder->this_index, allocator);
817 	coder->this_index = NULL;
818 
819 	lzma_index_end(coder->combined_index, allocator);
820 	coder->combined_index = NULL;
821 
822 	coder->stream_padding = 0;
823 
824 	coder->dest_index = dest_index;
825 	coder->external_seek_pos = seek_pos;
826 
827 	// If memlimit is 0, make it 1 to ensure that lzma_memlimit_get()
828 	// won't return 0 (which would indicate an error).
829 	coder->memlimit = my_max(1, memlimit);
830 
831 	// Prepare these for reading the first Stream Header into coder->temp.
832 	coder->temp_pos = 0;
833 	coder->temp_size = LZMA_STREAM_HEADER_SIZE;
834 
835 	return LZMA_OK;
836 }
837 
838 
839 extern LZMA_API(lzma_ret)
840 lzma_file_info_decoder(lzma_stream *strm, lzma_index **dest_index,
841 		uint64_t memlimit, uint64_t file_size)
842 {
843 	lzma_next_strm_init(lzma_file_info_decoder_init, strm, &strm->seek_pos,
844 			dest_index, memlimit, file_size);
845 
846 	// We allow LZMA_FINISH in addition to LZMA_RUN for convenience.
847 	// lzma_code() is able to handle the LZMA_FINISH + LZMA_SEEK_NEEDED
848 	// combination in a sane way. Applications still need to be careful
849 	// if they use LZMA_FINISH so that they remember to reset it back
850 	// to LZMA_RUN after seeking if needed.
851 	strm->internal->supported_actions[LZMA_RUN] = true;
852 	strm->internal->supported_actions[LZMA_FINISH] = true;
853 
854 	return LZMA_OK;
855 }
856