xref: /freebsd/contrib/xz/src/liblzma/common/index_decoder.c (revision 3b35e7ee8de9b0260149a2b77e87a2b9c7a36244)
1*3b35e7eeSXin LI // SPDX-License-Identifier: 0BSD
2*3b35e7eeSXin LI 
381ad8388SMartin Matuska ///////////////////////////////////////////////////////////////////////////////
481ad8388SMartin Matuska //
581ad8388SMartin Matuska /// \file       index_decoder.c
681ad8388SMartin Matuska /// \brief      Decodes the Index field
781ad8388SMartin Matuska //
881ad8388SMartin Matuska //  Author:     Lasse Collin
981ad8388SMartin Matuska //
1081ad8388SMartin Matuska ///////////////////////////////////////////////////////////////////////////////
1181ad8388SMartin Matuska 
1273ed8e77SXin LI #include "index_decoder.h"
1381ad8388SMartin Matuska #include "check.h"
1481ad8388SMartin Matuska 
1581ad8388SMartin Matuska 
161456f0f9SXin LI typedef struct {
1781ad8388SMartin Matuska 	enum {
1881ad8388SMartin Matuska 		SEQ_INDICATOR,
1981ad8388SMartin Matuska 		SEQ_COUNT,
2081ad8388SMartin Matuska 		SEQ_MEMUSAGE,
2181ad8388SMartin Matuska 		SEQ_UNPADDED,
2281ad8388SMartin Matuska 		SEQ_UNCOMPRESSED,
2381ad8388SMartin Matuska 		SEQ_PADDING_INIT,
2481ad8388SMartin Matuska 		SEQ_PADDING,
2581ad8388SMartin Matuska 		SEQ_CRC32,
2681ad8388SMartin Matuska 	} sequence;
2781ad8388SMartin Matuska 
2881ad8388SMartin Matuska 	/// Memory usage limit
2981ad8388SMartin Matuska 	uint64_t memlimit;
3081ad8388SMartin Matuska 
3181ad8388SMartin Matuska 	/// Target Index
3281ad8388SMartin Matuska 	lzma_index *index;
3381ad8388SMartin Matuska 
3481ad8388SMartin Matuska 	/// Pointer give by the application, which is set after
3581ad8388SMartin Matuska 	/// successful decoding.
3681ad8388SMartin Matuska 	lzma_index **index_ptr;
3781ad8388SMartin Matuska 
3881ad8388SMartin Matuska 	/// Number of Records left to decode.
3981ad8388SMartin Matuska 	lzma_vli count;
4081ad8388SMartin Matuska 
4181ad8388SMartin Matuska 	/// The most recent Unpadded Size field
4281ad8388SMartin Matuska 	lzma_vli unpadded_size;
4381ad8388SMartin Matuska 
4481ad8388SMartin Matuska 	/// The most recent Uncompressed Size field
4581ad8388SMartin Matuska 	lzma_vli uncompressed_size;
4681ad8388SMartin Matuska 
4781ad8388SMartin Matuska 	/// Position in integers
4881ad8388SMartin Matuska 	size_t pos;
4981ad8388SMartin Matuska 
5081ad8388SMartin Matuska 	/// CRC32 of the List of Records field
5181ad8388SMartin Matuska 	uint32_t crc32;
521456f0f9SXin LI } lzma_index_coder;
5381ad8388SMartin Matuska 
5481ad8388SMartin Matuska 
5581ad8388SMartin Matuska static lzma_ret
561456f0f9SXin LI index_decode(void *coder_ptr, const lzma_allocator *allocator,
5781ad8388SMartin Matuska 		const uint8_t *restrict in, size_t *restrict in_pos,
58e24134bcSMartin Matuska 		size_t in_size,
59e24134bcSMartin Matuska 		uint8_t *restrict out lzma_attribute((__unused__)),
60e24134bcSMartin Matuska 		size_t *restrict out_pos lzma_attribute((__unused__)),
61e24134bcSMartin Matuska 		size_t out_size lzma_attribute((__unused__)),
62e24134bcSMartin Matuska 		lzma_action action lzma_attribute((__unused__)))
6381ad8388SMartin Matuska {
641456f0f9SXin LI 	lzma_index_coder *coder = coder_ptr;
651456f0f9SXin LI 
6681ad8388SMartin Matuska 	// Similar optimization as in index_encoder.c
6781ad8388SMartin Matuska 	const size_t in_start = *in_pos;
6881ad8388SMartin Matuska 	lzma_ret ret = LZMA_OK;
6981ad8388SMartin Matuska 
7081ad8388SMartin Matuska 	while (*in_pos < in_size)
7181ad8388SMartin Matuska 	switch (coder->sequence) {
7281ad8388SMartin Matuska 	case SEQ_INDICATOR:
7381ad8388SMartin Matuska 		// Return LZMA_DATA_ERROR instead of e.g. LZMA_PROG_ERROR or
7481ad8388SMartin Matuska 		// LZMA_FORMAT_ERROR, because a typical usage case for Index
7581ad8388SMartin Matuska 		// decoder is when parsing the Stream backwards. If seeking
7681ad8388SMartin Matuska 		// backward from the Stream Footer gives us something that
7781ad8388SMartin Matuska 		// doesn't begin with Index Indicator, the file is considered
7881ad8388SMartin Matuska 		// corrupt, not "programming error" or "unrecognized file
7981ad8388SMartin Matuska 		// format". One could argue that the application should
8081ad8388SMartin Matuska 		// verify the Index Indicator before trying to decode the
8181ad8388SMartin Matuska 		// Index, but well, I suppose it is simpler this way.
82047153b4SXin LI 		if (in[(*in_pos)++] != INDEX_INDICATOR)
8381ad8388SMartin Matuska 			return LZMA_DATA_ERROR;
8481ad8388SMartin Matuska 
8581ad8388SMartin Matuska 		coder->sequence = SEQ_COUNT;
8681ad8388SMartin Matuska 		break;
8781ad8388SMartin Matuska 
8881ad8388SMartin Matuska 	case SEQ_COUNT:
8981ad8388SMartin Matuska 		ret = lzma_vli_decode(&coder->count, &coder->pos,
9081ad8388SMartin Matuska 				in, in_pos, in_size);
9181ad8388SMartin Matuska 		if (ret != LZMA_STREAM_END)
9281ad8388SMartin Matuska 			goto out;
9381ad8388SMartin Matuska 
9481ad8388SMartin Matuska 		coder->pos = 0;
9581ad8388SMartin Matuska 		coder->sequence = SEQ_MEMUSAGE;
9681ad8388SMartin Matuska 
9781ad8388SMartin Matuska 	// Fall through
9881ad8388SMartin Matuska 
9981ad8388SMartin Matuska 	case SEQ_MEMUSAGE:
10081ad8388SMartin Matuska 		if (lzma_index_memusage(1, coder->count) > coder->memlimit) {
10181ad8388SMartin Matuska 			ret = LZMA_MEMLIMIT_ERROR;
10281ad8388SMartin Matuska 			goto out;
10381ad8388SMartin Matuska 		}
10481ad8388SMartin Matuska 
10581ad8388SMartin Matuska 		// Tell the Index handling code how many Records this
10681ad8388SMartin Matuska 		// Index has to allow it to allocate memory more efficiently.
10781ad8388SMartin Matuska 		lzma_index_prealloc(coder->index, coder->count);
10881ad8388SMartin Matuska 
10981ad8388SMartin Matuska 		ret = LZMA_OK;
11081ad8388SMartin Matuska 		coder->sequence = coder->count == 0
11181ad8388SMartin Matuska 				? SEQ_PADDING_INIT : SEQ_UNPADDED;
11281ad8388SMartin Matuska 		break;
11381ad8388SMartin Matuska 
11481ad8388SMartin Matuska 	case SEQ_UNPADDED:
11581ad8388SMartin Matuska 	case SEQ_UNCOMPRESSED: {
11681ad8388SMartin Matuska 		lzma_vli *size = coder->sequence == SEQ_UNPADDED
11781ad8388SMartin Matuska 				? &coder->unpadded_size
11881ad8388SMartin Matuska 				: &coder->uncompressed_size;
11981ad8388SMartin Matuska 
12081ad8388SMartin Matuska 		ret = lzma_vli_decode(size, &coder->pos,
12181ad8388SMartin Matuska 				in, in_pos, in_size);
12281ad8388SMartin Matuska 		if (ret != LZMA_STREAM_END)
12381ad8388SMartin Matuska 			goto out;
12481ad8388SMartin Matuska 
12581ad8388SMartin Matuska 		ret = LZMA_OK;
12681ad8388SMartin Matuska 		coder->pos = 0;
12781ad8388SMartin Matuska 
12881ad8388SMartin Matuska 		if (coder->sequence == SEQ_UNPADDED) {
12981ad8388SMartin Matuska 			// Validate that encoded Unpadded Size isn't too small
13081ad8388SMartin Matuska 			// or too big.
13181ad8388SMartin Matuska 			if (coder->unpadded_size < UNPADDED_SIZE_MIN
13281ad8388SMartin Matuska 					|| coder->unpadded_size
13381ad8388SMartin Matuska 						> UNPADDED_SIZE_MAX)
13481ad8388SMartin Matuska 				return LZMA_DATA_ERROR;
13581ad8388SMartin Matuska 
13681ad8388SMartin Matuska 			coder->sequence = SEQ_UNCOMPRESSED;
13781ad8388SMartin Matuska 		} else {
13881ad8388SMartin Matuska 			// Add the decoded Record to the Index.
13981ad8388SMartin Matuska 			return_if_error(lzma_index_append(
14081ad8388SMartin Matuska 					coder->index, allocator,
14181ad8388SMartin Matuska 					coder->unpadded_size,
14281ad8388SMartin Matuska 					coder->uncompressed_size));
14381ad8388SMartin Matuska 
14481ad8388SMartin Matuska 			// Check if this was the last Record.
14581ad8388SMartin Matuska 			coder->sequence = --coder->count == 0
14681ad8388SMartin Matuska 					? SEQ_PADDING_INIT
14781ad8388SMartin Matuska 					: SEQ_UNPADDED;
14881ad8388SMartin Matuska 		}
14981ad8388SMartin Matuska 
15081ad8388SMartin Matuska 		break;
15181ad8388SMartin Matuska 	}
15281ad8388SMartin Matuska 
15381ad8388SMartin Matuska 	case SEQ_PADDING_INIT:
15481ad8388SMartin Matuska 		coder->pos = lzma_index_padding_size(coder->index);
15581ad8388SMartin Matuska 		coder->sequence = SEQ_PADDING;
15681ad8388SMartin Matuska 
15781ad8388SMartin Matuska 	// Fall through
15881ad8388SMartin Matuska 
15981ad8388SMartin Matuska 	case SEQ_PADDING:
16081ad8388SMartin Matuska 		if (coder->pos > 0) {
16181ad8388SMartin Matuska 			--coder->pos;
16281ad8388SMartin Matuska 			if (in[(*in_pos)++] != 0x00)
16381ad8388SMartin Matuska 				return LZMA_DATA_ERROR;
16481ad8388SMartin Matuska 
16581ad8388SMartin Matuska 			break;
16681ad8388SMartin Matuska 		}
16781ad8388SMartin Matuska 
16881ad8388SMartin Matuska 		// Finish the CRC32 calculation.
16981ad8388SMartin Matuska 		coder->crc32 = lzma_crc32(in + in_start,
17081ad8388SMartin Matuska 				*in_pos - in_start, coder->crc32);
17181ad8388SMartin Matuska 
17281ad8388SMartin Matuska 		coder->sequence = SEQ_CRC32;
17381ad8388SMartin Matuska 
17481ad8388SMartin Matuska 	// Fall through
17581ad8388SMartin Matuska 
17681ad8388SMartin Matuska 	case SEQ_CRC32:
17781ad8388SMartin Matuska 		do {
17881ad8388SMartin Matuska 			if (*in_pos == in_size)
17981ad8388SMartin Matuska 				return LZMA_OK;
18081ad8388SMartin Matuska 
18181ad8388SMartin Matuska 			if (((coder->crc32 >> (coder->pos * 8)) & 0xFF)
18273ed8e77SXin LI 					!= in[(*in_pos)++]) {
18373ed8e77SXin LI #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
18481ad8388SMartin Matuska 				return LZMA_DATA_ERROR;
18573ed8e77SXin LI #endif
18673ed8e77SXin LI 			}
18781ad8388SMartin Matuska 
18881ad8388SMartin Matuska 		} while (++coder->pos < 4);
18981ad8388SMartin Matuska 
19081ad8388SMartin Matuska 		// Decoding was successful, now we can let the application
19181ad8388SMartin Matuska 		// see the decoded Index.
19281ad8388SMartin Matuska 		*coder->index_ptr = coder->index;
19381ad8388SMartin Matuska 
19481ad8388SMartin Matuska 		// Make index NULL so we don't free it unintentionally.
19581ad8388SMartin Matuska 		coder->index = NULL;
19681ad8388SMartin Matuska 
19781ad8388SMartin Matuska 		return LZMA_STREAM_END;
19881ad8388SMartin Matuska 
19981ad8388SMartin Matuska 	default:
20081ad8388SMartin Matuska 		assert(0);
20181ad8388SMartin Matuska 		return LZMA_PROG_ERROR;
20281ad8388SMartin Matuska 	}
20381ad8388SMartin Matuska 
20481ad8388SMartin Matuska out:
205c917796cSXin LI 	// Update the CRC32.
206c917796cSXin LI 	//
207c917796cSXin LI 	// Avoid null pointer + 0 (undefined behavior) in "in + in_start".
208c917796cSXin LI 	// In such a case we had no input and thus in_used == 0.
209c917796cSXin LI 	{
210c917796cSXin LI 		const size_t in_used = *in_pos - in_start;
211c917796cSXin LI 		if (in_used > 0)
21281ad8388SMartin Matuska 			coder->crc32 = lzma_crc32(in + in_start,
213c917796cSXin LI 					in_used, coder->crc32);
214c917796cSXin LI 	}
21581ad8388SMartin Matuska 
21681ad8388SMartin Matuska 	return ret;
21781ad8388SMartin Matuska }
21881ad8388SMartin Matuska 
21981ad8388SMartin Matuska 
22081ad8388SMartin Matuska static void
2211456f0f9SXin LI index_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
22281ad8388SMartin Matuska {
2231456f0f9SXin LI 	lzma_index_coder *coder = coder_ptr;
22481ad8388SMartin Matuska 	lzma_index_end(coder->index, allocator);
22581ad8388SMartin Matuska 	lzma_free(coder, allocator);
22681ad8388SMartin Matuska 	return;
22781ad8388SMartin Matuska }
22881ad8388SMartin Matuska 
22981ad8388SMartin Matuska 
23081ad8388SMartin Matuska static lzma_ret
2311456f0f9SXin LI index_decoder_memconfig(void *coder_ptr, uint64_t *memusage,
23281ad8388SMartin Matuska 		uint64_t *old_memlimit, uint64_t new_memlimit)
23381ad8388SMartin Matuska {
2341456f0f9SXin LI 	lzma_index_coder *coder = coder_ptr;
2351456f0f9SXin LI 
23681ad8388SMartin Matuska 	*memusage = lzma_index_memusage(1, coder->count);
23781ad8388SMartin Matuska 	*old_memlimit = coder->memlimit;
23881ad8388SMartin Matuska 
23981ad8388SMartin Matuska 	if (new_memlimit != 0) {
24081ad8388SMartin Matuska 		if (new_memlimit < *memusage)
24181ad8388SMartin Matuska 			return LZMA_MEMLIMIT_ERROR;
24281ad8388SMartin Matuska 
24381ad8388SMartin Matuska 		coder->memlimit = new_memlimit;
24481ad8388SMartin Matuska 	}
24581ad8388SMartin Matuska 
24681ad8388SMartin Matuska 	return LZMA_OK;
24781ad8388SMartin Matuska }
24881ad8388SMartin Matuska 
24981ad8388SMartin Matuska 
25081ad8388SMartin Matuska static lzma_ret
2511456f0f9SXin LI index_decoder_reset(lzma_index_coder *coder, const lzma_allocator *allocator,
25281ad8388SMartin Matuska 		lzma_index **i, uint64_t memlimit)
25381ad8388SMartin Matuska {
25481ad8388SMartin Matuska 	// Remember the pointer given by the application. We will set it
25581ad8388SMartin Matuska 	// to point to the decoded Index only if decoding is successful.
25681ad8388SMartin Matuska 	// Before that, keep it NULL so that applications can always safely
25781ad8388SMartin Matuska 	// pass it to lzma_index_end() no matter did decoding succeed or not.
25881ad8388SMartin Matuska 	coder->index_ptr = i;
25981ad8388SMartin Matuska 	*i = NULL;
26081ad8388SMartin Matuska 
26181ad8388SMartin Matuska 	// We always allocate a new lzma_index.
26281ad8388SMartin Matuska 	coder->index = lzma_index_init(allocator);
26381ad8388SMartin Matuska 	if (coder->index == NULL)
26481ad8388SMartin Matuska 		return LZMA_MEM_ERROR;
26581ad8388SMartin Matuska 
26681ad8388SMartin Matuska 	// Initialize the rest.
26781ad8388SMartin Matuska 	coder->sequence = SEQ_INDICATOR;
268b71a5db3SXin LI 	coder->memlimit = my_max(1, memlimit);
26981ad8388SMartin Matuska 	coder->count = 0; // Needs to be initialized due to _memconfig().
27081ad8388SMartin Matuska 	coder->pos = 0;
27181ad8388SMartin Matuska 	coder->crc32 = 0;
27281ad8388SMartin Matuska 
27381ad8388SMartin Matuska 	return LZMA_OK;
27481ad8388SMartin Matuska }
27581ad8388SMartin Matuska 
27681ad8388SMartin Matuska 
27773ed8e77SXin LI extern lzma_ret
27873ed8e77SXin LI lzma_index_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
27981ad8388SMartin Matuska 		lzma_index **i, uint64_t memlimit)
28081ad8388SMartin Matuska {
28173ed8e77SXin LI 	lzma_next_coder_init(&lzma_index_decoder_init, next, allocator);
28281ad8388SMartin Matuska 
283b71a5db3SXin LI 	if (i == NULL)
28481ad8388SMartin Matuska 		return LZMA_PROG_ERROR;
28581ad8388SMartin Matuska 
2861456f0f9SXin LI 	lzma_index_coder *coder = next->coder;
2871456f0f9SXin LI 	if (coder == NULL) {
2881456f0f9SXin LI 		coder = lzma_alloc(sizeof(lzma_index_coder), allocator);
2891456f0f9SXin LI 		if (coder == NULL)
29081ad8388SMartin Matuska 			return LZMA_MEM_ERROR;
29181ad8388SMartin Matuska 
2921456f0f9SXin LI 		next->coder = coder;
29381ad8388SMartin Matuska 		next->code = &index_decode;
29481ad8388SMartin Matuska 		next->end = &index_decoder_end;
29581ad8388SMartin Matuska 		next->memconfig = &index_decoder_memconfig;
2961456f0f9SXin LI 		coder->index = NULL;
29781ad8388SMartin Matuska 	} else {
2981456f0f9SXin LI 		lzma_index_end(coder->index, allocator);
29981ad8388SMartin Matuska 	}
30081ad8388SMartin Matuska 
3011456f0f9SXin LI 	return index_decoder_reset(coder, allocator, i, memlimit);
30281ad8388SMartin Matuska }
30381ad8388SMartin Matuska 
30481ad8388SMartin Matuska 
30581ad8388SMartin Matuska extern LZMA_API(lzma_ret)
30681ad8388SMartin Matuska lzma_index_decoder(lzma_stream *strm, lzma_index **i, uint64_t memlimit)
30781ad8388SMartin Matuska {
308*3b35e7eeSXin LI 	// If i isn't NULL, *i must always be initialized due to
309*3b35e7eeSXin LI 	// the wording in the API docs. This way it is initialized
310*3b35e7eeSXin LI 	// if we return LZMA_PROG_ERROR due to strm == NULL.
311*3b35e7eeSXin LI 	if (i != NULL)
312*3b35e7eeSXin LI 		*i = NULL;
313*3b35e7eeSXin LI 
31473ed8e77SXin LI 	lzma_next_strm_init(lzma_index_decoder_init, strm, i, memlimit);
31581ad8388SMartin Matuska 
31681ad8388SMartin Matuska 	strm->internal->supported_actions[LZMA_RUN] = true;
317542aef48SMartin Matuska 	strm->internal->supported_actions[LZMA_FINISH] = true;
31881ad8388SMartin Matuska 
31981ad8388SMartin Matuska 	return LZMA_OK;
32081ad8388SMartin Matuska }
32181ad8388SMartin Matuska 
32281ad8388SMartin Matuska 
32381ad8388SMartin Matuska extern LZMA_API(lzma_ret)
32453200025SRui Paulo lzma_index_buffer_decode(lzma_index **i, uint64_t *memlimit,
32553200025SRui Paulo 		const lzma_allocator *allocator,
32681ad8388SMartin Matuska 		const uint8_t *in, size_t *in_pos, size_t in_size)
32781ad8388SMartin Matuska {
328*3b35e7eeSXin LI 	// If i isn't NULL, *i must always be initialized due to
329*3b35e7eeSXin LI 	// the wording in the API docs.
330*3b35e7eeSXin LI 	if (i != NULL)
331*3b35e7eeSXin LI 		*i = NULL;
332*3b35e7eeSXin LI 
33381ad8388SMartin Matuska 	// Sanity checks
33481ad8388SMartin Matuska 	if (i == NULL || memlimit == NULL
33581ad8388SMartin Matuska 			|| in == NULL || in_pos == NULL || *in_pos > in_size)
33681ad8388SMartin Matuska 		return LZMA_PROG_ERROR;
33781ad8388SMartin Matuska 
33881ad8388SMartin Matuska 	// Initialize the decoder.
3391456f0f9SXin LI 	lzma_index_coder coder;
34081ad8388SMartin Matuska 	return_if_error(index_decoder_reset(&coder, allocator, i, *memlimit));
34181ad8388SMartin Matuska 
34281ad8388SMartin Matuska 	// Store the input start position so that we can restore it in case
34381ad8388SMartin Matuska 	// of an error.
34481ad8388SMartin Matuska 	const size_t in_start = *in_pos;
34581ad8388SMartin Matuska 
34681ad8388SMartin Matuska 	// Do the actual decoding.
34781ad8388SMartin Matuska 	lzma_ret ret = index_decode(&coder, allocator, in, in_pos, in_size,
34881ad8388SMartin Matuska 			NULL, NULL, 0, LZMA_RUN);
34981ad8388SMartin Matuska 
35081ad8388SMartin Matuska 	if (ret == LZMA_STREAM_END) {
35181ad8388SMartin Matuska 		ret = LZMA_OK;
35281ad8388SMartin Matuska 	} else {
35381ad8388SMartin Matuska 		// Something went wrong, free the Index structure and restore
35481ad8388SMartin Matuska 		// the input position.
35581ad8388SMartin Matuska 		lzma_index_end(coder.index, allocator);
35681ad8388SMartin Matuska 		*in_pos = in_start;
35781ad8388SMartin Matuska 
35881ad8388SMartin Matuska 		if (ret == LZMA_OK) {
35981ad8388SMartin Matuska 			// The input is truncated or otherwise corrupt.
36081ad8388SMartin Matuska 			// Use LZMA_DATA_ERROR instead of LZMA_BUF_ERROR
36181ad8388SMartin Matuska 			// like lzma_vli_decode() does in single-call mode.
36281ad8388SMartin Matuska 			ret = LZMA_DATA_ERROR;
36381ad8388SMartin Matuska 
36481ad8388SMartin Matuska 		} else if (ret == LZMA_MEMLIMIT_ERROR) {
36581ad8388SMartin Matuska 			// Tell the caller how much memory would have
36681ad8388SMartin Matuska 			// been needed.
36781ad8388SMartin Matuska 			*memlimit = lzma_index_memusage(1, coder.count);
36881ad8388SMartin Matuska 		}
36981ad8388SMartin Matuska 	}
37081ad8388SMartin Matuska 
37181ad8388SMartin Matuska 	return ret;
37281ad8388SMartin Matuska }
373