xref: /freebsd/contrib/xz/src/liblzma/common/index_hash.c (revision 128836d304d93f2d00eb14069c27089ab46c38d4)
13b35e7eeSXin LI // SPDX-License-Identifier: 0BSD
23b35e7eeSXin LI 
381ad8388SMartin Matuska ///////////////////////////////////////////////////////////////////////////////
481ad8388SMartin Matuska //
581ad8388SMartin Matuska /// \file       index_hash.c
681ad8388SMartin Matuska /// \brief      Validates Index by using a hash function
781ad8388SMartin Matuska //
881ad8388SMartin Matuska //  Author:     Lasse Collin
981ad8388SMartin Matuska //
1081ad8388SMartin Matuska ///////////////////////////////////////////////////////////////////////////////
1181ad8388SMartin Matuska 
1281ad8388SMartin Matuska #include "common.h"
1381ad8388SMartin Matuska #include "index.h"
1481ad8388SMartin Matuska #include "check.h"
1581ad8388SMartin Matuska 
1681ad8388SMartin Matuska 
1781ad8388SMartin Matuska typedef struct {
1881ad8388SMartin Matuska 	/// Sum of the Block sizes (including Block Padding)
1981ad8388SMartin Matuska 	lzma_vli blocks_size;
2081ad8388SMartin Matuska 
2181ad8388SMartin Matuska 	/// Sum of the Uncompressed Size fields
2281ad8388SMartin Matuska 	lzma_vli uncompressed_size;
2381ad8388SMartin Matuska 
2481ad8388SMartin Matuska 	/// Number of Records
2581ad8388SMartin Matuska 	lzma_vli count;
2681ad8388SMartin Matuska 
2781ad8388SMartin Matuska 	/// Size of the List of Index Records as bytes
2881ad8388SMartin Matuska 	lzma_vli index_list_size;
2981ad8388SMartin Matuska 
3081ad8388SMartin Matuska 	/// Check calculated from Unpadded Sizes and Uncompressed Sizes.
3181ad8388SMartin Matuska 	lzma_check_state check;
3281ad8388SMartin Matuska 
3381ad8388SMartin Matuska } lzma_index_hash_info;
3481ad8388SMartin Matuska 
3581ad8388SMartin Matuska 
3681ad8388SMartin Matuska struct lzma_index_hash_s {
3781ad8388SMartin Matuska 	enum {
3881ad8388SMartin Matuska 		SEQ_BLOCK,
3981ad8388SMartin Matuska 		SEQ_COUNT,
4081ad8388SMartin Matuska 		SEQ_UNPADDED,
4181ad8388SMartin Matuska 		SEQ_UNCOMPRESSED,
4281ad8388SMartin Matuska 		SEQ_PADDING_INIT,
4381ad8388SMartin Matuska 		SEQ_PADDING,
4481ad8388SMartin Matuska 		SEQ_CRC32,
4581ad8388SMartin Matuska 	} sequence;
4681ad8388SMartin Matuska 
4781ad8388SMartin Matuska 	/// Information collected while decoding the actual Blocks.
4881ad8388SMartin Matuska 	lzma_index_hash_info blocks;
4981ad8388SMartin Matuska 
5081ad8388SMartin Matuska 	/// Information collected from the Index field.
5181ad8388SMartin Matuska 	lzma_index_hash_info records;
5281ad8388SMartin Matuska 
5381ad8388SMartin Matuska 	/// Number of Records not fully decoded
5481ad8388SMartin Matuska 	lzma_vli remaining;
5581ad8388SMartin Matuska 
5681ad8388SMartin Matuska 	/// Unpadded Size currently being read from an Index Record.
5781ad8388SMartin Matuska 	lzma_vli unpadded_size;
5881ad8388SMartin Matuska 
5981ad8388SMartin Matuska 	/// Uncompressed Size currently being read from an Index Record.
6081ad8388SMartin Matuska 	lzma_vli uncompressed_size;
6181ad8388SMartin Matuska 
6281ad8388SMartin Matuska 	/// Position in variable-length integers when decoding them from
6381ad8388SMartin Matuska 	/// the List of Records.
6481ad8388SMartin Matuska 	size_t pos;
6581ad8388SMartin Matuska 
6681ad8388SMartin Matuska 	/// CRC32 of the Index
6781ad8388SMartin Matuska 	uint32_t crc32;
6881ad8388SMartin Matuska };
6981ad8388SMartin Matuska 
7081ad8388SMartin Matuska 
7181ad8388SMartin Matuska extern LZMA_API(lzma_index_hash *)
lzma_index_hash_init(lzma_index_hash * index_hash,const lzma_allocator * allocator)7253200025SRui Paulo lzma_index_hash_init(lzma_index_hash *index_hash,
7353200025SRui Paulo 		const lzma_allocator *allocator)
7481ad8388SMartin Matuska {
7581ad8388SMartin Matuska 	if (index_hash == NULL) {
7681ad8388SMartin Matuska 		index_hash = lzma_alloc(sizeof(lzma_index_hash), allocator);
7781ad8388SMartin Matuska 		if (index_hash == NULL)
7881ad8388SMartin Matuska 			return NULL;
7981ad8388SMartin Matuska 	}
8081ad8388SMartin Matuska 
8181ad8388SMartin Matuska 	index_hash->sequence = SEQ_BLOCK;
8281ad8388SMartin Matuska 	index_hash->blocks.blocks_size = 0;
8381ad8388SMartin Matuska 	index_hash->blocks.uncompressed_size = 0;
8481ad8388SMartin Matuska 	index_hash->blocks.count = 0;
8581ad8388SMartin Matuska 	index_hash->blocks.index_list_size = 0;
8681ad8388SMartin Matuska 	index_hash->records.blocks_size = 0;
8781ad8388SMartin Matuska 	index_hash->records.uncompressed_size = 0;
8881ad8388SMartin Matuska 	index_hash->records.count = 0;
8981ad8388SMartin Matuska 	index_hash->records.index_list_size = 0;
9081ad8388SMartin Matuska 	index_hash->unpadded_size = 0;
9181ad8388SMartin Matuska 	index_hash->uncompressed_size = 0;
9281ad8388SMartin Matuska 	index_hash->pos = 0;
9381ad8388SMartin Matuska 	index_hash->crc32 = 0;
9481ad8388SMartin Matuska 
9581ad8388SMartin Matuska 	// These cannot fail because LZMA_CHECK_BEST is known to be supported.
9681ad8388SMartin Matuska 	(void)lzma_check_init(&index_hash->blocks.check, LZMA_CHECK_BEST);
9781ad8388SMartin Matuska 	(void)lzma_check_init(&index_hash->records.check, LZMA_CHECK_BEST);
9881ad8388SMartin Matuska 
9981ad8388SMartin Matuska 	return index_hash;
10081ad8388SMartin Matuska }
10181ad8388SMartin Matuska 
10281ad8388SMartin Matuska 
10381ad8388SMartin Matuska extern LZMA_API(void)
lzma_index_hash_end(lzma_index_hash * index_hash,const lzma_allocator * allocator)10453200025SRui Paulo lzma_index_hash_end(lzma_index_hash *index_hash,
10553200025SRui Paulo 		const lzma_allocator *allocator)
10681ad8388SMartin Matuska {
10781ad8388SMartin Matuska 	lzma_free(index_hash, allocator);
10881ad8388SMartin Matuska 	return;
10981ad8388SMartin Matuska }
11081ad8388SMartin Matuska 
11181ad8388SMartin Matuska 
11281ad8388SMartin Matuska extern LZMA_API(lzma_vli)
lzma_index_hash_size(const lzma_index_hash * index_hash)11381ad8388SMartin Matuska lzma_index_hash_size(const lzma_index_hash *index_hash)
11481ad8388SMartin Matuska {
11581ad8388SMartin Matuska 	// Get the size of the Index from ->blocks instead of ->records for
11681ad8388SMartin Matuska 	// cases where application wants to know the Index Size before
11781ad8388SMartin Matuska 	// decoding the Index.
11881ad8388SMartin Matuska 	return index_size(index_hash->blocks.count,
11981ad8388SMartin Matuska 			index_hash->blocks.index_list_size);
12081ad8388SMartin Matuska }
12181ad8388SMartin Matuska 
12281ad8388SMartin Matuska 
12381ad8388SMartin Matuska /// Updates the sizes and the hash without any validation.
1249e6bbe47SXin LI static void
hash_append(lzma_index_hash_info * info,lzma_vli unpadded_size,lzma_vli uncompressed_size)12581ad8388SMartin Matuska hash_append(lzma_index_hash_info *info, lzma_vli unpadded_size,
12681ad8388SMartin Matuska 		lzma_vli uncompressed_size)
12781ad8388SMartin Matuska {
12881ad8388SMartin Matuska 	info->blocks_size += vli_ceil4(unpadded_size);
12981ad8388SMartin Matuska 	info->uncompressed_size += uncompressed_size;
13081ad8388SMartin Matuska 	info->index_list_size += lzma_vli_size(unpadded_size)
13181ad8388SMartin Matuska 			+ lzma_vli_size(uncompressed_size);
13281ad8388SMartin Matuska 	++info->count;
13381ad8388SMartin Matuska 
13481ad8388SMartin Matuska 	const lzma_vli sizes[2] = { unpadded_size, uncompressed_size };
13581ad8388SMartin Matuska 	lzma_check_update(&info->check, LZMA_CHECK_BEST,
13681ad8388SMartin Matuska 			(const uint8_t *)(sizes), sizeof(sizes));
13781ad8388SMartin Matuska 
1389e6bbe47SXin LI 	return;
13981ad8388SMartin Matuska }
14081ad8388SMartin Matuska 
14181ad8388SMartin Matuska 
14281ad8388SMartin Matuska extern LZMA_API(lzma_ret)
lzma_index_hash_append(lzma_index_hash * index_hash,lzma_vli unpadded_size,lzma_vli uncompressed_size)14381ad8388SMartin Matuska lzma_index_hash_append(lzma_index_hash *index_hash, lzma_vli unpadded_size,
14481ad8388SMartin Matuska 		lzma_vli uncompressed_size)
14581ad8388SMartin Matuska {
14681ad8388SMartin Matuska 	// Validate the arguments.
147047153b4SXin LI 	if (index_hash == NULL || index_hash->sequence != SEQ_BLOCK
14881ad8388SMartin Matuska 			|| unpadded_size < UNPADDED_SIZE_MIN
14981ad8388SMartin Matuska 			|| unpadded_size > UNPADDED_SIZE_MAX
15081ad8388SMartin Matuska 			|| uncompressed_size > LZMA_VLI_MAX)
15181ad8388SMartin Matuska 		return LZMA_PROG_ERROR;
15281ad8388SMartin Matuska 
15381ad8388SMartin Matuska 	// Update the hash.
1549e6bbe47SXin LI 	hash_append(&index_hash->blocks, unpadded_size, uncompressed_size);
15581ad8388SMartin Matuska 
15681ad8388SMartin Matuska 	// Validate the properties of *info are still in allowed limits.
15781ad8388SMartin Matuska 	if (index_hash->blocks.blocks_size > LZMA_VLI_MAX
15881ad8388SMartin Matuska 			|| index_hash->blocks.uncompressed_size > LZMA_VLI_MAX
15981ad8388SMartin Matuska 			|| index_size(index_hash->blocks.count,
16081ad8388SMartin Matuska 					index_hash->blocks.index_list_size)
16181ad8388SMartin Matuska 				> LZMA_BACKWARD_SIZE_MAX
16281ad8388SMartin Matuska 			|| index_stream_size(index_hash->blocks.blocks_size,
16381ad8388SMartin Matuska 					index_hash->blocks.count,
16481ad8388SMartin Matuska 					index_hash->blocks.index_list_size)
16581ad8388SMartin Matuska 				> LZMA_VLI_MAX)
16681ad8388SMartin Matuska 		return LZMA_DATA_ERROR;
16781ad8388SMartin Matuska 
16881ad8388SMartin Matuska 	return LZMA_OK;
16981ad8388SMartin Matuska }
17081ad8388SMartin Matuska 
17181ad8388SMartin Matuska 
17281ad8388SMartin Matuska extern LZMA_API(lzma_ret)
lzma_index_hash_decode(lzma_index_hash * index_hash,const uint8_t * in,size_t * in_pos,size_t in_size)17381ad8388SMartin Matuska lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in,
17481ad8388SMartin Matuska 		size_t *in_pos, size_t in_size)
17581ad8388SMartin Matuska {
17681ad8388SMartin Matuska 	// Catch zero input buffer here, because in contrast to Index encoder
17781ad8388SMartin Matuska 	// and decoder functions, applications call this function directly
17881ad8388SMartin Matuska 	// instead of via lzma_code(), which does the buffer checking.
17981ad8388SMartin Matuska 	if (*in_pos >= in_size)
18081ad8388SMartin Matuska 		return LZMA_BUF_ERROR;
18181ad8388SMartin Matuska 
18281ad8388SMartin Matuska 	// NOTE: This function has many similarities to index_encode() and
18381ad8388SMartin Matuska 	// index_decode() functions found from index_encoder.c and
18481ad8388SMartin Matuska 	// index_decoder.c. See the comments especially in index_encoder.c.
18581ad8388SMartin Matuska 	const size_t in_start = *in_pos;
18681ad8388SMartin Matuska 	lzma_ret ret = LZMA_OK;
18781ad8388SMartin Matuska 
18881ad8388SMartin Matuska 	while (*in_pos < in_size)
18981ad8388SMartin Matuska 	switch (index_hash->sequence) {
19081ad8388SMartin Matuska 	case SEQ_BLOCK:
19181ad8388SMartin Matuska 		// Check the Index Indicator is present.
192047153b4SXin LI 		if (in[(*in_pos)++] != INDEX_INDICATOR)
19381ad8388SMartin Matuska 			return LZMA_DATA_ERROR;
19481ad8388SMartin Matuska 
19581ad8388SMartin Matuska 		index_hash->sequence = SEQ_COUNT;
19681ad8388SMartin Matuska 		break;
19781ad8388SMartin Matuska 
19881ad8388SMartin Matuska 	case SEQ_COUNT: {
19981ad8388SMartin Matuska 		ret = lzma_vli_decode(&index_hash->remaining,
20081ad8388SMartin Matuska 				&index_hash->pos, in, in_pos, in_size);
20181ad8388SMartin Matuska 		if (ret != LZMA_STREAM_END)
20281ad8388SMartin Matuska 			goto out;
20381ad8388SMartin Matuska 
20481ad8388SMartin Matuska 		// The count must match the count of the Blocks decoded.
20581ad8388SMartin Matuska 		if (index_hash->remaining != index_hash->blocks.count)
20681ad8388SMartin Matuska 			return LZMA_DATA_ERROR;
20781ad8388SMartin Matuska 
20881ad8388SMartin Matuska 		ret = LZMA_OK;
20981ad8388SMartin Matuska 		index_hash->pos = 0;
21081ad8388SMartin Matuska 
21181ad8388SMartin Matuska 		// Handle the special case when there are no Blocks.
21281ad8388SMartin Matuska 		index_hash->sequence = index_hash->remaining == 0
21381ad8388SMartin Matuska 				? SEQ_PADDING_INIT : SEQ_UNPADDED;
21481ad8388SMartin Matuska 		break;
21581ad8388SMartin Matuska 	}
21681ad8388SMartin Matuska 
21781ad8388SMartin Matuska 	case SEQ_UNPADDED:
21881ad8388SMartin Matuska 	case SEQ_UNCOMPRESSED: {
21981ad8388SMartin Matuska 		lzma_vli *size = index_hash->sequence == SEQ_UNPADDED
22081ad8388SMartin Matuska 				? &index_hash->unpadded_size
22181ad8388SMartin Matuska 				: &index_hash->uncompressed_size;
22281ad8388SMartin Matuska 
22381ad8388SMartin Matuska 		ret = lzma_vli_decode(size, &index_hash->pos,
22481ad8388SMartin Matuska 				in, in_pos, in_size);
22581ad8388SMartin Matuska 		if (ret != LZMA_STREAM_END)
22681ad8388SMartin Matuska 			goto out;
22781ad8388SMartin Matuska 
22881ad8388SMartin Matuska 		ret = LZMA_OK;
22981ad8388SMartin Matuska 		index_hash->pos = 0;
23081ad8388SMartin Matuska 
23181ad8388SMartin Matuska 		if (index_hash->sequence == SEQ_UNPADDED) {
23281ad8388SMartin Matuska 			if (index_hash->unpadded_size < UNPADDED_SIZE_MIN
23381ad8388SMartin Matuska 					|| index_hash->unpadded_size
23481ad8388SMartin Matuska 						> UNPADDED_SIZE_MAX)
23581ad8388SMartin Matuska 				return LZMA_DATA_ERROR;
23681ad8388SMartin Matuska 
23781ad8388SMartin Matuska 			index_hash->sequence = SEQ_UNCOMPRESSED;
23881ad8388SMartin Matuska 		} else {
23981ad8388SMartin Matuska 			// Update the hash.
2409e6bbe47SXin LI 			hash_append(&index_hash->records,
24181ad8388SMartin Matuska 					index_hash->unpadded_size,
2429e6bbe47SXin LI 					index_hash->uncompressed_size);
24381ad8388SMartin Matuska 
24481ad8388SMartin Matuska 			// Verify that we don't go over the known sizes. Note
24581ad8388SMartin Matuska 			// that this validation is simpler than the one used
24681ad8388SMartin Matuska 			// in lzma_index_hash_append(), because here we know
24781ad8388SMartin Matuska 			// that values in index_hash->blocks are already
24881ad8388SMartin Matuska 			// validated and we are fine as long as we don't
24981ad8388SMartin Matuska 			// exceed them in index_hash->records.
25081ad8388SMartin Matuska 			if (index_hash->blocks.blocks_size
25181ad8388SMartin Matuska 					< index_hash->records.blocks_size
25281ad8388SMartin Matuska 					|| index_hash->blocks.uncompressed_size
25381ad8388SMartin Matuska 					< index_hash->records.uncompressed_size
25481ad8388SMartin Matuska 					|| index_hash->blocks.index_list_size
25581ad8388SMartin Matuska 					< index_hash->records.index_list_size)
25681ad8388SMartin Matuska 				return LZMA_DATA_ERROR;
25781ad8388SMartin Matuska 
25881ad8388SMartin Matuska 			// Check if this was the last Record.
25981ad8388SMartin Matuska 			index_hash->sequence = --index_hash->remaining == 0
26081ad8388SMartin Matuska 					? SEQ_PADDING_INIT : SEQ_UNPADDED;
26181ad8388SMartin Matuska 		}
26281ad8388SMartin Matuska 
26381ad8388SMartin Matuska 		break;
26481ad8388SMartin Matuska 	}
26581ad8388SMartin Matuska 
26681ad8388SMartin Matuska 	case SEQ_PADDING_INIT:
26781ad8388SMartin Matuska 		index_hash->pos = (LZMA_VLI_C(4) - index_size_unpadded(
26881ad8388SMartin Matuska 				index_hash->records.count,
26981ad8388SMartin Matuska 				index_hash->records.index_list_size)) & 3;
27081ad8388SMartin Matuska 
271*128836d3SXin LI 		index_hash->sequence = SEQ_PADDING;
272*128836d3SXin LI 		FALLTHROUGH;
27381ad8388SMartin Matuska 
27481ad8388SMartin Matuska 	case SEQ_PADDING:
27581ad8388SMartin Matuska 		if (index_hash->pos > 0) {
27681ad8388SMartin Matuska 			--index_hash->pos;
27781ad8388SMartin Matuska 			if (in[(*in_pos)++] != 0x00)
27881ad8388SMartin Matuska 				return LZMA_DATA_ERROR;
27981ad8388SMartin Matuska 
28081ad8388SMartin Matuska 			break;
28181ad8388SMartin Matuska 		}
28281ad8388SMartin Matuska 
28381ad8388SMartin Matuska 		// Compare the sizes.
28481ad8388SMartin Matuska 		if (index_hash->blocks.blocks_size
28581ad8388SMartin Matuska 				!= index_hash->records.blocks_size
28681ad8388SMartin Matuska 				|| index_hash->blocks.uncompressed_size
28781ad8388SMartin Matuska 				!= index_hash->records.uncompressed_size
28881ad8388SMartin Matuska 				|| index_hash->blocks.index_list_size
28981ad8388SMartin Matuska 				!= index_hash->records.index_list_size)
29081ad8388SMartin Matuska 			return LZMA_DATA_ERROR;
29181ad8388SMartin Matuska 
29281ad8388SMartin Matuska 		// Finish the hashes and compare them.
29381ad8388SMartin Matuska 		lzma_check_finish(&index_hash->blocks.check, LZMA_CHECK_BEST);
29481ad8388SMartin Matuska 		lzma_check_finish(&index_hash->records.check, LZMA_CHECK_BEST);
29581ad8388SMartin Matuska 		if (memcmp(index_hash->blocks.check.buffer.u8,
29681ad8388SMartin Matuska 				index_hash->records.check.buffer.u8,
29781ad8388SMartin Matuska 				lzma_check_size(LZMA_CHECK_BEST)) != 0)
29881ad8388SMartin Matuska 			return LZMA_DATA_ERROR;
29981ad8388SMartin Matuska 
30081ad8388SMartin Matuska 		// Finish the CRC32 calculation.
30181ad8388SMartin Matuska 		index_hash->crc32 = lzma_crc32(in + in_start,
30281ad8388SMartin Matuska 				*in_pos - in_start, index_hash->crc32);
30381ad8388SMartin Matuska 
30481ad8388SMartin Matuska 		index_hash->sequence = SEQ_CRC32;
305*128836d3SXin LI 		FALLTHROUGH;
30681ad8388SMartin Matuska 
30781ad8388SMartin Matuska 	case SEQ_CRC32:
30881ad8388SMartin Matuska 		do {
30981ad8388SMartin Matuska 			if (*in_pos == in_size)
31081ad8388SMartin Matuska 				return LZMA_OK;
31181ad8388SMartin Matuska 
31281ad8388SMartin Matuska 			if (((index_hash->crc32 >> (index_hash->pos * 8))
31373ed8e77SXin LI 					& 0xFF) != in[(*in_pos)++]) {
31473ed8e77SXin LI #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
31581ad8388SMartin Matuska 				return LZMA_DATA_ERROR;
31673ed8e77SXin LI #endif
31773ed8e77SXin LI 			}
31881ad8388SMartin Matuska 
31981ad8388SMartin Matuska 		} while (++index_hash->pos < 4);
32081ad8388SMartin Matuska 
32181ad8388SMartin Matuska 		return LZMA_STREAM_END;
32281ad8388SMartin Matuska 
32381ad8388SMartin Matuska 	default:
32481ad8388SMartin Matuska 		assert(0);
32581ad8388SMartin Matuska 		return LZMA_PROG_ERROR;
32681ad8388SMartin Matuska 	}
32781ad8388SMartin Matuska 
32881ad8388SMartin Matuska out:
329c917796cSXin LI 	// Update the CRC32.
330c917796cSXin LI 	//
331c917796cSXin LI 	// Avoid null pointer + 0 (undefined behavior) in "in + in_start".
332c917796cSXin LI 	// In such a case we had no input and thus in_used == 0.
333c917796cSXin LI 	{
334c917796cSXin LI 		const size_t in_used = *in_pos - in_start;
335c917796cSXin LI 		if (in_used > 0)
33681ad8388SMartin Matuska 			index_hash->crc32 = lzma_crc32(in + in_start,
337c917796cSXin LI 					in_used, index_hash->crc32);
338c917796cSXin LI 	}
33981ad8388SMartin Matuska 
34081ad8388SMartin Matuska 	return ret;
34181ad8388SMartin Matuska }
342