13b35e7eeSXin LI // SPDX-License-Identifier: 0BSD
23b35e7eeSXin LI
381ad8388SMartin Matuska ///////////////////////////////////////////////////////////////////////////////
481ad8388SMartin Matuska //
581ad8388SMartin Matuska /// \file index_hash.c
681ad8388SMartin Matuska /// \brief Validates Index by using a hash function
781ad8388SMartin Matuska //
881ad8388SMartin Matuska // Author: Lasse Collin
981ad8388SMartin Matuska //
1081ad8388SMartin Matuska ///////////////////////////////////////////////////////////////////////////////
1181ad8388SMartin Matuska
1281ad8388SMartin Matuska #include "common.h"
1381ad8388SMartin Matuska #include "index.h"
1481ad8388SMartin Matuska #include "check.h"
1581ad8388SMartin Matuska
1681ad8388SMartin Matuska
1781ad8388SMartin Matuska typedef struct {
1881ad8388SMartin Matuska /// Sum of the Block sizes (including Block Padding)
1981ad8388SMartin Matuska lzma_vli blocks_size;
2081ad8388SMartin Matuska
2181ad8388SMartin Matuska /// Sum of the Uncompressed Size fields
2281ad8388SMartin Matuska lzma_vli uncompressed_size;
2381ad8388SMartin Matuska
2481ad8388SMartin Matuska /// Number of Records
2581ad8388SMartin Matuska lzma_vli count;
2681ad8388SMartin Matuska
2781ad8388SMartin Matuska /// Size of the List of Index Records as bytes
2881ad8388SMartin Matuska lzma_vli index_list_size;
2981ad8388SMartin Matuska
3081ad8388SMartin Matuska /// Check calculated from Unpadded Sizes and Uncompressed Sizes.
3181ad8388SMartin Matuska lzma_check_state check;
3281ad8388SMartin Matuska
3381ad8388SMartin Matuska } lzma_index_hash_info;
3481ad8388SMartin Matuska
3581ad8388SMartin Matuska
3681ad8388SMartin Matuska struct lzma_index_hash_s {
3781ad8388SMartin Matuska enum {
3881ad8388SMartin Matuska SEQ_BLOCK,
3981ad8388SMartin Matuska SEQ_COUNT,
4081ad8388SMartin Matuska SEQ_UNPADDED,
4181ad8388SMartin Matuska SEQ_UNCOMPRESSED,
4281ad8388SMartin Matuska SEQ_PADDING_INIT,
4381ad8388SMartin Matuska SEQ_PADDING,
4481ad8388SMartin Matuska SEQ_CRC32,
4581ad8388SMartin Matuska } sequence;
4681ad8388SMartin Matuska
4781ad8388SMartin Matuska /// Information collected while decoding the actual Blocks.
4881ad8388SMartin Matuska lzma_index_hash_info blocks;
4981ad8388SMartin Matuska
5081ad8388SMartin Matuska /// Information collected from the Index field.
5181ad8388SMartin Matuska lzma_index_hash_info records;
5281ad8388SMartin Matuska
5381ad8388SMartin Matuska /// Number of Records not fully decoded
5481ad8388SMartin Matuska lzma_vli remaining;
5581ad8388SMartin Matuska
5681ad8388SMartin Matuska /// Unpadded Size currently being read from an Index Record.
5781ad8388SMartin Matuska lzma_vli unpadded_size;
5881ad8388SMartin Matuska
5981ad8388SMartin Matuska /// Uncompressed Size currently being read from an Index Record.
6081ad8388SMartin Matuska lzma_vli uncompressed_size;
6181ad8388SMartin Matuska
6281ad8388SMartin Matuska /// Position in variable-length integers when decoding them from
6381ad8388SMartin Matuska /// the List of Records.
6481ad8388SMartin Matuska size_t pos;
6581ad8388SMartin Matuska
6681ad8388SMartin Matuska /// CRC32 of the Index
6781ad8388SMartin Matuska uint32_t crc32;
6881ad8388SMartin Matuska };
6981ad8388SMartin Matuska
7081ad8388SMartin Matuska
7181ad8388SMartin Matuska extern LZMA_API(lzma_index_hash *)
lzma_index_hash_init(lzma_index_hash * index_hash,const lzma_allocator * allocator)7253200025SRui Paulo lzma_index_hash_init(lzma_index_hash *index_hash,
7353200025SRui Paulo const lzma_allocator *allocator)
7481ad8388SMartin Matuska {
7581ad8388SMartin Matuska if (index_hash == NULL) {
7681ad8388SMartin Matuska index_hash = lzma_alloc(sizeof(lzma_index_hash), allocator);
7781ad8388SMartin Matuska if (index_hash == NULL)
7881ad8388SMartin Matuska return NULL;
7981ad8388SMartin Matuska }
8081ad8388SMartin Matuska
8181ad8388SMartin Matuska index_hash->sequence = SEQ_BLOCK;
8281ad8388SMartin Matuska index_hash->blocks.blocks_size = 0;
8381ad8388SMartin Matuska index_hash->blocks.uncompressed_size = 0;
8481ad8388SMartin Matuska index_hash->blocks.count = 0;
8581ad8388SMartin Matuska index_hash->blocks.index_list_size = 0;
8681ad8388SMartin Matuska index_hash->records.blocks_size = 0;
8781ad8388SMartin Matuska index_hash->records.uncompressed_size = 0;
8881ad8388SMartin Matuska index_hash->records.count = 0;
8981ad8388SMartin Matuska index_hash->records.index_list_size = 0;
9081ad8388SMartin Matuska index_hash->unpadded_size = 0;
9181ad8388SMartin Matuska index_hash->uncompressed_size = 0;
9281ad8388SMartin Matuska index_hash->pos = 0;
9381ad8388SMartin Matuska index_hash->crc32 = 0;
9481ad8388SMartin Matuska
9581ad8388SMartin Matuska // These cannot fail because LZMA_CHECK_BEST is known to be supported.
9681ad8388SMartin Matuska (void)lzma_check_init(&index_hash->blocks.check, LZMA_CHECK_BEST);
9781ad8388SMartin Matuska (void)lzma_check_init(&index_hash->records.check, LZMA_CHECK_BEST);
9881ad8388SMartin Matuska
9981ad8388SMartin Matuska return index_hash;
10081ad8388SMartin Matuska }
10181ad8388SMartin Matuska
10281ad8388SMartin Matuska
10381ad8388SMartin Matuska extern LZMA_API(void)
lzma_index_hash_end(lzma_index_hash * index_hash,const lzma_allocator * allocator)10453200025SRui Paulo lzma_index_hash_end(lzma_index_hash *index_hash,
10553200025SRui Paulo const lzma_allocator *allocator)
10681ad8388SMartin Matuska {
10781ad8388SMartin Matuska lzma_free(index_hash, allocator);
10881ad8388SMartin Matuska return;
10981ad8388SMartin Matuska }
11081ad8388SMartin Matuska
11181ad8388SMartin Matuska
11281ad8388SMartin Matuska extern LZMA_API(lzma_vli)
lzma_index_hash_size(const lzma_index_hash * index_hash)11381ad8388SMartin Matuska lzma_index_hash_size(const lzma_index_hash *index_hash)
11481ad8388SMartin Matuska {
11581ad8388SMartin Matuska // Get the size of the Index from ->blocks instead of ->records for
11681ad8388SMartin Matuska // cases where application wants to know the Index Size before
11781ad8388SMartin Matuska // decoding the Index.
11881ad8388SMartin Matuska return index_size(index_hash->blocks.count,
11981ad8388SMartin Matuska index_hash->blocks.index_list_size);
12081ad8388SMartin Matuska }
12181ad8388SMartin Matuska
12281ad8388SMartin Matuska
12381ad8388SMartin Matuska /// Updates the sizes and the hash without any validation.
1249e6bbe47SXin LI static void
hash_append(lzma_index_hash_info * info,lzma_vli unpadded_size,lzma_vli uncompressed_size)12581ad8388SMartin Matuska hash_append(lzma_index_hash_info *info, lzma_vli unpadded_size,
12681ad8388SMartin Matuska lzma_vli uncompressed_size)
12781ad8388SMartin Matuska {
12881ad8388SMartin Matuska info->blocks_size += vli_ceil4(unpadded_size);
12981ad8388SMartin Matuska info->uncompressed_size += uncompressed_size;
13081ad8388SMartin Matuska info->index_list_size += lzma_vli_size(unpadded_size)
13181ad8388SMartin Matuska + lzma_vli_size(uncompressed_size);
13281ad8388SMartin Matuska ++info->count;
13381ad8388SMartin Matuska
13481ad8388SMartin Matuska const lzma_vli sizes[2] = { unpadded_size, uncompressed_size };
13581ad8388SMartin Matuska lzma_check_update(&info->check, LZMA_CHECK_BEST,
13681ad8388SMartin Matuska (const uint8_t *)(sizes), sizeof(sizes));
13781ad8388SMartin Matuska
1389e6bbe47SXin LI return;
13981ad8388SMartin Matuska }
14081ad8388SMartin Matuska
14181ad8388SMartin Matuska
14281ad8388SMartin Matuska extern LZMA_API(lzma_ret)
lzma_index_hash_append(lzma_index_hash * index_hash,lzma_vli unpadded_size,lzma_vli uncompressed_size)14381ad8388SMartin Matuska lzma_index_hash_append(lzma_index_hash *index_hash, lzma_vli unpadded_size,
14481ad8388SMartin Matuska lzma_vli uncompressed_size)
14581ad8388SMartin Matuska {
14681ad8388SMartin Matuska // Validate the arguments.
147047153b4SXin LI if (index_hash == NULL || index_hash->sequence != SEQ_BLOCK
14881ad8388SMartin Matuska || unpadded_size < UNPADDED_SIZE_MIN
14981ad8388SMartin Matuska || unpadded_size > UNPADDED_SIZE_MAX
15081ad8388SMartin Matuska || uncompressed_size > LZMA_VLI_MAX)
15181ad8388SMartin Matuska return LZMA_PROG_ERROR;
15281ad8388SMartin Matuska
15381ad8388SMartin Matuska // Update the hash.
1549e6bbe47SXin LI hash_append(&index_hash->blocks, unpadded_size, uncompressed_size);
15581ad8388SMartin Matuska
15681ad8388SMartin Matuska // Validate the properties of *info are still in allowed limits.
15781ad8388SMartin Matuska if (index_hash->blocks.blocks_size > LZMA_VLI_MAX
15881ad8388SMartin Matuska || index_hash->blocks.uncompressed_size > LZMA_VLI_MAX
15981ad8388SMartin Matuska || index_size(index_hash->blocks.count,
16081ad8388SMartin Matuska index_hash->blocks.index_list_size)
16181ad8388SMartin Matuska > LZMA_BACKWARD_SIZE_MAX
16281ad8388SMartin Matuska || index_stream_size(index_hash->blocks.blocks_size,
16381ad8388SMartin Matuska index_hash->blocks.count,
16481ad8388SMartin Matuska index_hash->blocks.index_list_size)
16581ad8388SMartin Matuska > LZMA_VLI_MAX)
16681ad8388SMartin Matuska return LZMA_DATA_ERROR;
16781ad8388SMartin Matuska
16881ad8388SMartin Matuska return LZMA_OK;
16981ad8388SMartin Matuska }
17081ad8388SMartin Matuska
17181ad8388SMartin Matuska
17281ad8388SMartin Matuska extern LZMA_API(lzma_ret)
lzma_index_hash_decode(lzma_index_hash * index_hash,const uint8_t * in,size_t * in_pos,size_t in_size)17381ad8388SMartin Matuska lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in,
17481ad8388SMartin Matuska size_t *in_pos, size_t in_size)
17581ad8388SMartin Matuska {
17681ad8388SMartin Matuska // Catch zero input buffer here, because in contrast to Index encoder
17781ad8388SMartin Matuska // and decoder functions, applications call this function directly
17881ad8388SMartin Matuska // instead of via lzma_code(), which does the buffer checking.
17981ad8388SMartin Matuska if (*in_pos >= in_size)
18081ad8388SMartin Matuska return LZMA_BUF_ERROR;
18181ad8388SMartin Matuska
18281ad8388SMartin Matuska // NOTE: This function has many similarities to index_encode() and
18381ad8388SMartin Matuska // index_decode() functions found from index_encoder.c and
18481ad8388SMartin Matuska // index_decoder.c. See the comments especially in index_encoder.c.
18581ad8388SMartin Matuska const size_t in_start = *in_pos;
18681ad8388SMartin Matuska lzma_ret ret = LZMA_OK;
18781ad8388SMartin Matuska
18881ad8388SMartin Matuska while (*in_pos < in_size)
18981ad8388SMartin Matuska switch (index_hash->sequence) {
19081ad8388SMartin Matuska case SEQ_BLOCK:
19181ad8388SMartin Matuska // Check the Index Indicator is present.
192047153b4SXin LI if (in[(*in_pos)++] != INDEX_INDICATOR)
19381ad8388SMartin Matuska return LZMA_DATA_ERROR;
19481ad8388SMartin Matuska
19581ad8388SMartin Matuska index_hash->sequence = SEQ_COUNT;
19681ad8388SMartin Matuska break;
19781ad8388SMartin Matuska
19881ad8388SMartin Matuska case SEQ_COUNT: {
19981ad8388SMartin Matuska ret = lzma_vli_decode(&index_hash->remaining,
20081ad8388SMartin Matuska &index_hash->pos, in, in_pos, in_size);
20181ad8388SMartin Matuska if (ret != LZMA_STREAM_END)
20281ad8388SMartin Matuska goto out;
20381ad8388SMartin Matuska
20481ad8388SMartin Matuska // The count must match the count of the Blocks decoded.
20581ad8388SMartin Matuska if (index_hash->remaining != index_hash->blocks.count)
20681ad8388SMartin Matuska return LZMA_DATA_ERROR;
20781ad8388SMartin Matuska
20881ad8388SMartin Matuska ret = LZMA_OK;
20981ad8388SMartin Matuska index_hash->pos = 0;
21081ad8388SMartin Matuska
21181ad8388SMartin Matuska // Handle the special case when there are no Blocks.
21281ad8388SMartin Matuska index_hash->sequence = index_hash->remaining == 0
21381ad8388SMartin Matuska ? SEQ_PADDING_INIT : SEQ_UNPADDED;
21481ad8388SMartin Matuska break;
21581ad8388SMartin Matuska }
21681ad8388SMartin Matuska
21781ad8388SMartin Matuska case SEQ_UNPADDED:
21881ad8388SMartin Matuska case SEQ_UNCOMPRESSED: {
21981ad8388SMartin Matuska lzma_vli *size = index_hash->sequence == SEQ_UNPADDED
22081ad8388SMartin Matuska ? &index_hash->unpadded_size
22181ad8388SMartin Matuska : &index_hash->uncompressed_size;
22281ad8388SMartin Matuska
22381ad8388SMartin Matuska ret = lzma_vli_decode(size, &index_hash->pos,
22481ad8388SMartin Matuska in, in_pos, in_size);
22581ad8388SMartin Matuska if (ret != LZMA_STREAM_END)
22681ad8388SMartin Matuska goto out;
22781ad8388SMartin Matuska
22881ad8388SMartin Matuska ret = LZMA_OK;
22981ad8388SMartin Matuska index_hash->pos = 0;
23081ad8388SMartin Matuska
23181ad8388SMartin Matuska if (index_hash->sequence == SEQ_UNPADDED) {
23281ad8388SMartin Matuska if (index_hash->unpadded_size < UNPADDED_SIZE_MIN
23381ad8388SMartin Matuska || index_hash->unpadded_size
23481ad8388SMartin Matuska > UNPADDED_SIZE_MAX)
23581ad8388SMartin Matuska return LZMA_DATA_ERROR;
23681ad8388SMartin Matuska
23781ad8388SMartin Matuska index_hash->sequence = SEQ_UNCOMPRESSED;
23881ad8388SMartin Matuska } else {
23981ad8388SMartin Matuska // Update the hash.
2409e6bbe47SXin LI hash_append(&index_hash->records,
24181ad8388SMartin Matuska index_hash->unpadded_size,
2429e6bbe47SXin LI index_hash->uncompressed_size);
24381ad8388SMartin Matuska
24481ad8388SMartin Matuska // Verify that we don't go over the known sizes. Note
24581ad8388SMartin Matuska // that this validation is simpler than the one used
24681ad8388SMartin Matuska // in lzma_index_hash_append(), because here we know
24781ad8388SMartin Matuska // that values in index_hash->blocks are already
24881ad8388SMartin Matuska // validated and we are fine as long as we don't
24981ad8388SMartin Matuska // exceed them in index_hash->records.
25081ad8388SMartin Matuska if (index_hash->blocks.blocks_size
25181ad8388SMartin Matuska < index_hash->records.blocks_size
25281ad8388SMartin Matuska || index_hash->blocks.uncompressed_size
25381ad8388SMartin Matuska < index_hash->records.uncompressed_size
25481ad8388SMartin Matuska || index_hash->blocks.index_list_size
25581ad8388SMartin Matuska < index_hash->records.index_list_size)
25681ad8388SMartin Matuska return LZMA_DATA_ERROR;
25781ad8388SMartin Matuska
25881ad8388SMartin Matuska // Check if this was the last Record.
25981ad8388SMartin Matuska index_hash->sequence = --index_hash->remaining == 0
26081ad8388SMartin Matuska ? SEQ_PADDING_INIT : SEQ_UNPADDED;
26181ad8388SMartin Matuska }
26281ad8388SMartin Matuska
26381ad8388SMartin Matuska break;
26481ad8388SMartin Matuska }
26581ad8388SMartin Matuska
26681ad8388SMartin Matuska case SEQ_PADDING_INIT:
26781ad8388SMartin Matuska index_hash->pos = (LZMA_VLI_C(4) - index_size_unpadded(
26881ad8388SMartin Matuska index_hash->records.count,
26981ad8388SMartin Matuska index_hash->records.index_list_size)) & 3;
27081ad8388SMartin Matuska
271*128836d3SXin LI index_hash->sequence = SEQ_PADDING;
272*128836d3SXin LI FALLTHROUGH;
27381ad8388SMartin Matuska
27481ad8388SMartin Matuska case SEQ_PADDING:
27581ad8388SMartin Matuska if (index_hash->pos > 0) {
27681ad8388SMartin Matuska --index_hash->pos;
27781ad8388SMartin Matuska if (in[(*in_pos)++] != 0x00)
27881ad8388SMartin Matuska return LZMA_DATA_ERROR;
27981ad8388SMartin Matuska
28081ad8388SMartin Matuska break;
28181ad8388SMartin Matuska }
28281ad8388SMartin Matuska
28381ad8388SMartin Matuska // Compare the sizes.
28481ad8388SMartin Matuska if (index_hash->blocks.blocks_size
28581ad8388SMartin Matuska != index_hash->records.blocks_size
28681ad8388SMartin Matuska || index_hash->blocks.uncompressed_size
28781ad8388SMartin Matuska != index_hash->records.uncompressed_size
28881ad8388SMartin Matuska || index_hash->blocks.index_list_size
28981ad8388SMartin Matuska != index_hash->records.index_list_size)
29081ad8388SMartin Matuska return LZMA_DATA_ERROR;
29181ad8388SMartin Matuska
29281ad8388SMartin Matuska // Finish the hashes and compare them.
29381ad8388SMartin Matuska lzma_check_finish(&index_hash->blocks.check, LZMA_CHECK_BEST);
29481ad8388SMartin Matuska lzma_check_finish(&index_hash->records.check, LZMA_CHECK_BEST);
29581ad8388SMartin Matuska if (memcmp(index_hash->blocks.check.buffer.u8,
29681ad8388SMartin Matuska index_hash->records.check.buffer.u8,
29781ad8388SMartin Matuska lzma_check_size(LZMA_CHECK_BEST)) != 0)
29881ad8388SMartin Matuska return LZMA_DATA_ERROR;
29981ad8388SMartin Matuska
30081ad8388SMartin Matuska // Finish the CRC32 calculation.
30181ad8388SMartin Matuska index_hash->crc32 = lzma_crc32(in + in_start,
30281ad8388SMartin Matuska *in_pos - in_start, index_hash->crc32);
30381ad8388SMartin Matuska
30481ad8388SMartin Matuska index_hash->sequence = SEQ_CRC32;
305*128836d3SXin LI FALLTHROUGH;
30681ad8388SMartin Matuska
30781ad8388SMartin Matuska case SEQ_CRC32:
30881ad8388SMartin Matuska do {
30981ad8388SMartin Matuska if (*in_pos == in_size)
31081ad8388SMartin Matuska return LZMA_OK;
31181ad8388SMartin Matuska
31281ad8388SMartin Matuska if (((index_hash->crc32 >> (index_hash->pos * 8))
31373ed8e77SXin LI & 0xFF) != in[(*in_pos)++]) {
31473ed8e77SXin LI #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
31581ad8388SMartin Matuska return LZMA_DATA_ERROR;
31673ed8e77SXin LI #endif
31773ed8e77SXin LI }
31881ad8388SMartin Matuska
31981ad8388SMartin Matuska } while (++index_hash->pos < 4);
32081ad8388SMartin Matuska
32181ad8388SMartin Matuska return LZMA_STREAM_END;
32281ad8388SMartin Matuska
32381ad8388SMartin Matuska default:
32481ad8388SMartin Matuska assert(0);
32581ad8388SMartin Matuska return LZMA_PROG_ERROR;
32681ad8388SMartin Matuska }
32781ad8388SMartin Matuska
32881ad8388SMartin Matuska out:
329c917796cSXin LI // Update the CRC32.
330c917796cSXin LI //
331c917796cSXin LI // Avoid null pointer + 0 (undefined behavior) in "in + in_start".
332c917796cSXin LI // In such a case we had no input and thus in_used == 0.
333c917796cSXin LI {
334c917796cSXin LI const size_t in_used = *in_pos - in_start;
335c917796cSXin LI if (in_used > 0)
33681ad8388SMartin Matuska index_hash->crc32 = lzma_crc32(in + in_start,
337c917796cSXin LI in_used, index_hash->crc32);
338c917796cSXin LI }
33981ad8388SMartin Matuska
34081ad8388SMartin Matuska return ret;
34181ad8388SMartin Matuska }
342