1*3b35e7eeSXin LI // SPDX-License-Identifier: 0BSD 2*3b35e7eeSXin LI 381ad8388SMartin Matuska /////////////////////////////////////////////////////////////////////////////// 481ad8388SMartin Matuska // 581ad8388SMartin Matuska /// \file index_decoder.c 681ad8388SMartin Matuska /// \brief Decodes the Index field 781ad8388SMartin Matuska // 881ad8388SMartin Matuska // Author: Lasse Collin 981ad8388SMartin Matuska // 1081ad8388SMartin Matuska /////////////////////////////////////////////////////////////////////////////// 1181ad8388SMartin Matuska 1273ed8e77SXin LI #include "index_decoder.h" 1381ad8388SMartin Matuska #include "check.h" 1481ad8388SMartin Matuska 1581ad8388SMartin Matuska 161456f0f9SXin LI typedef struct { 1781ad8388SMartin Matuska enum { 1881ad8388SMartin Matuska SEQ_INDICATOR, 1981ad8388SMartin Matuska SEQ_COUNT, 2081ad8388SMartin Matuska SEQ_MEMUSAGE, 2181ad8388SMartin Matuska SEQ_UNPADDED, 2281ad8388SMartin Matuska SEQ_UNCOMPRESSED, 2381ad8388SMartin Matuska SEQ_PADDING_INIT, 2481ad8388SMartin Matuska SEQ_PADDING, 2581ad8388SMartin Matuska SEQ_CRC32, 2681ad8388SMartin Matuska } sequence; 2781ad8388SMartin Matuska 2881ad8388SMartin Matuska /// Memory usage limit 2981ad8388SMartin Matuska uint64_t memlimit; 3081ad8388SMartin Matuska 3181ad8388SMartin Matuska /// Target Index 3281ad8388SMartin Matuska lzma_index *index; 3381ad8388SMartin Matuska 3481ad8388SMartin Matuska /// Pointer give by the application, which is set after 3581ad8388SMartin Matuska /// successful decoding. 3681ad8388SMartin Matuska lzma_index **index_ptr; 3781ad8388SMartin Matuska 3881ad8388SMartin Matuska /// Number of Records left to decode. 3981ad8388SMartin Matuska lzma_vli count; 4081ad8388SMartin Matuska 4181ad8388SMartin Matuska /// The most recent Unpadded Size field 4281ad8388SMartin Matuska lzma_vli unpadded_size; 4381ad8388SMartin Matuska 4481ad8388SMartin Matuska /// The most recent Uncompressed Size field 4581ad8388SMartin Matuska lzma_vli uncompressed_size; 4681ad8388SMartin Matuska 4781ad8388SMartin Matuska /// Position in integers 4881ad8388SMartin Matuska size_t pos; 4981ad8388SMartin Matuska 5081ad8388SMartin Matuska /// CRC32 of the List of Records field 5181ad8388SMartin Matuska uint32_t crc32; 521456f0f9SXin LI } lzma_index_coder; 5381ad8388SMartin Matuska 5481ad8388SMartin Matuska 5581ad8388SMartin Matuska static lzma_ret 561456f0f9SXin LI index_decode(void *coder_ptr, const lzma_allocator *allocator, 5781ad8388SMartin Matuska const uint8_t *restrict in, size_t *restrict in_pos, 58e24134bcSMartin Matuska size_t in_size, 59e24134bcSMartin Matuska uint8_t *restrict out lzma_attribute((__unused__)), 60e24134bcSMartin Matuska size_t *restrict out_pos lzma_attribute((__unused__)), 61e24134bcSMartin Matuska size_t out_size lzma_attribute((__unused__)), 62e24134bcSMartin Matuska lzma_action action lzma_attribute((__unused__))) 6381ad8388SMartin Matuska { 641456f0f9SXin LI lzma_index_coder *coder = coder_ptr; 651456f0f9SXin LI 6681ad8388SMartin Matuska // Similar optimization as in index_encoder.c 6781ad8388SMartin Matuska const size_t in_start = *in_pos; 6881ad8388SMartin Matuska lzma_ret ret = LZMA_OK; 6981ad8388SMartin Matuska 7081ad8388SMartin Matuska while (*in_pos < in_size) 7181ad8388SMartin Matuska switch (coder->sequence) { 7281ad8388SMartin Matuska case SEQ_INDICATOR: 7381ad8388SMartin Matuska // Return LZMA_DATA_ERROR instead of e.g. LZMA_PROG_ERROR or 7481ad8388SMartin Matuska // LZMA_FORMAT_ERROR, because a typical usage case for Index 7581ad8388SMartin Matuska // decoder is when parsing the Stream backwards. If seeking 7681ad8388SMartin Matuska // backward from the Stream Footer gives us something that 7781ad8388SMartin Matuska // doesn't begin with Index Indicator, the file is considered 7881ad8388SMartin Matuska // corrupt, not "programming error" or "unrecognized file 7981ad8388SMartin Matuska // format". One could argue that the application should 8081ad8388SMartin Matuska // verify the Index Indicator before trying to decode the 8181ad8388SMartin Matuska // Index, but well, I suppose it is simpler this way. 82047153b4SXin LI if (in[(*in_pos)++] != INDEX_INDICATOR) 8381ad8388SMartin Matuska return LZMA_DATA_ERROR; 8481ad8388SMartin Matuska 8581ad8388SMartin Matuska coder->sequence = SEQ_COUNT; 8681ad8388SMartin Matuska break; 8781ad8388SMartin Matuska 8881ad8388SMartin Matuska case SEQ_COUNT: 8981ad8388SMartin Matuska ret = lzma_vli_decode(&coder->count, &coder->pos, 9081ad8388SMartin Matuska in, in_pos, in_size); 9181ad8388SMartin Matuska if (ret != LZMA_STREAM_END) 9281ad8388SMartin Matuska goto out; 9381ad8388SMartin Matuska 9481ad8388SMartin Matuska coder->pos = 0; 9581ad8388SMartin Matuska coder->sequence = SEQ_MEMUSAGE; 9681ad8388SMartin Matuska 9781ad8388SMartin Matuska // Fall through 9881ad8388SMartin Matuska 9981ad8388SMartin Matuska case SEQ_MEMUSAGE: 10081ad8388SMartin Matuska if (lzma_index_memusage(1, coder->count) > coder->memlimit) { 10181ad8388SMartin Matuska ret = LZMA_MEMLIMIT_ERROR; 10281ad8388SMartin Matuska goto out; 10381ad8388SMartin Matuska } 10481ad8388SMartin Matuska 10581ad8388SMartin Matuska // Tell the Index handling code how many Records this 10681ad8388SMartin Matuska // Index has to allow it to allocate memory more efficiently. 10781ad8388SMartin Matuska lzma_index_prealloc(coder->index, coder->count); 10881ad8388SMartin Matuska 10981ad8388SMartin Matuska ret = LZMA_OK; 11081ad8388SMartin Matuska coder->sequence = coder->count == 0 11181ad8388SMartin Matuska ? SEQ_PADDING_INIT : SEQ_UNPADDED; 11281ad8388SMartin Matuska break; 11381ad8388SMartin Matuska 11481ad8388SMartin Matuska case SEQ_UNPADDED: 11581ad8388SMartin Matuska case SEQ_UNCOMPRESSED: { 11681ad8388SMartin Matuska lzma_vli *size = coder->sequence == SEQ_UNPADDED 11781ad8388SMartin Matuska ? &coder->unpadded_size 11881ad8388SMartin Matuska : &coder->uncompressed_size; 11981ad8388SMartin Matuska 12081ad8388SMartin Matuska ret = lzma_vli_decode(size, &coder->pos, 12181ad8388SMartin Matuska in, in_pos, in_size); 12281ad8388SMartin Matuska if (ret != LZMA_STREAM_END) 12381ad8388SMartin Matuska goto out; 12481ad8388SMartin Matuska 12581ad8388SMartin Matuska ret = LZMA_OK; 12681ad8388SMartin Matuska coder->pos = 0; 12781ad8388SMartin Matuska 12881ad8388SMartin Matuska if (coder->sequence == SEQ_UNPADDED) { 12981ad8388SMartin Matuska // Validate that encoded Unpadded Size isn't too small 13081ad8388SMartin Matuska // or too big. 13181ad8388SMartin Matuska if (coder->unpadded_size < UNPADDED_SIZE_MIN 13281ad8388SMartin Matuska || coder->unpadded_size 13381ad8388SMartin Matuska > UNPADDED_SIZE_MAX) 13481ad8388SMartin Matuska return LZMA_DATA_ERROR; 13581ad8388SMartin Matuska 13681ad8388SMartin Matuska coder->sequence = SEQ_UNCOMPRESSED; 13781ad8388SMartin Matuska } else { 13881ad8388SMartin Matuska // Add the decoded Record to the Index. 13981ad8388SMartin Matuska return_if_error(lzma_index_append( 14081ad8388SMartin Matuska coder->index, allocator, 14181ad8388SMartin Matuska coder->unpadded_size, 14281ad8388SMartin Matuska coder->uncompressed_size)); 14381ad8388SMartin Matuska 14481ad8388SMartin Matuska // Check if this was the last Record. 14581ad8388SMartin Matuska coder->sequence = --coder->count == 0 14681ad8388SMartin Matuska ? SEQ_PADDING_INIT 14781ad8388SMartin Matuska : SEQ_UNPADDED; 14881ad8388SMartin Matuska } 14981ad8388SMartin Matuska 15081ad8388SMartin Matuska break; 15181ad8388SMartin Matuska } 15281ad8388SMartin Matuska 15381ad8388SMartin Matuska case SEQ_PADDING_INIT: 15481ad8388SMartin Matuska coder->pos = lzma_index_padding_size(coder->index); 15581ad8388SMartin Matuska coder->sequence = SEQ_PADDING; 15681ad8388SMartin Matuska 15781ad8388SMartin Matuska // Fall through 15881ad8388SMartin Matuska 15981ad8388SMartin Matuska case SEQ_PADDING: 16081ad8388SMartin Matuska if (coder->pos > 0) { 16181ad8388SMartin Matuska --coder->pos; 16281ad8388SMartin Matuska if (in[(*in_pos)++] != 0x00) 16381ad8388SMartin Matuska return LZMA_DATA_ERROR; 16481ad8388SMartin Matuska 16581ad8388SMartin Matuska break; 16681ad8388SMartin Matuska } 16781ad8388SMartin Matuska 16881ad8388SMartin Matuska // Finish the CRC32 calculation. 16981ad8388SMartin Matuska coder->crc32 = lzma_crc32(in + in_start, 17081ad8388SMartin Matuska *in_pos - in_start, coder->crc32); 17181ad8388SMartin Matuska 17281ad8388SMartin Matuska coder->sequence = SEQ_CRC32; 17381ad8388SMartin Matuska 17481ad8388SMartin Matuska // Fall through 17581ad8388SMartin Matuska 17681ad8388SMartin Matuska case SEQ_CRC32: 17781ad8388SMartin Matuska do { 17881ad8388SMartin Matuska if (*in_pos == in_size) 17981ad8388SMartin Matuska return LZMA_OK; 18081ad8388SMartin Matuska 18181ad8388SMartin Matuska if (((coder->crc32 >> (coder->pos * 8)) & 0xFF) 18273ed8e77SXin LI != in[(*in_pos)++]) { 18373ed8e77SXin LI #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION 18481ad8388SMartin Matuska return LZMA_DATA_ERROR; 18573ed8e77SXin LI #endif 18673ed8e77SXin LI } 18781ad8388SMartin Matuska 18881ad8388SMartin Matuska } while (++coder->pos < 4); 18981ad8388SMartin Matuska 19081ad8388SMartin Matuska // Decoding was successful, now we can let the application 19181ad8388SMartin Matuska // see the decoded Index. 19281ad8388SMartin Matuska *coder->index_ptr = coder->index; 19381ad8388SMartin Matuska 19481ad8388SMartin Matuska // Make index NULL so we don't free it unintentionally. 19581ad8388SMartin Matuska coder->index = NULL; 19681ad8388SMartin Matuska 19781ad8388SMartin Matuska return LZMA_STREAM_END; 19881ad8388SMartin Matuska 19981ad8388SMartin Matuska default: 20081ad8388SMartin Matuska assert(0); 20181ad8388SMartin Matuska return LZMA_PROG_ERROR; 20281ad8388SMartin Matuska } 20381ad8388SMartin Matuska 20481ad8388SMartin Matuska out: 205c917796cSXin LI // Update the CRC32. 206c917796cSXin LI // 207c917796cSXin LI // Avoid null pointer + 0 (undefined behavior) in "in + in_start". 208c917796cSXin LI // In such a case we had no input and thus in_used == 0. 209c917796cSXin LI { 210c917796cSXin LI const size_t in_used = *in_pos - in_start; 211c917796cSXin LI if (in_used > 0) 21281ad8388SMartin Matuska coder->crc32 = lzma_crc32(in + in_start, 213c917796cSXin LI in_used, coder->crc32); 214c917796cSXin LI } 21581ad8388SMartin Matuska 21681ad8388SMartin Matuska return ret; 21781ad8388SMartin Matuska } 21881ad8388SMartin Matuska 21981ad8388SMartin Matuska 22081ad8388SMartin Matuska static void 2211456f0f9SXin LI index_decoder_end(void *coder_ptr, const lzma_allocator *allocator) 22281ad8388SMartin Matuska { 2231456f0f9SXin LI lzma_index_coder *coder = coder_ptr; 22481ad8388SMartin Matuska lzma_index_end(coder->index, allocator); 22581ad8388SMartin Matuska lzma_free(coder, allocator); 22681ad8388SMartin Matuska return; 22781ad8388SMartin Matuska } 22881ad8388SMartin Matuska 22981ad8388SMartin Matuska 23081ad8388SMartin Matuska static lzma_ret 2311456f0f9SXin LI index_decoder_memconfig(void *coder_ptr, uint64_t *memusage, 23281ad8388SMartin Matuska uint64_t *old_memlimit, uint64_t new_memlimit) 23381ad8388SMartin Matuska { 2341456f0f9SXin LI lzma_index_coder *coder = coder_ptr; 2351456f0f9SXin LI 23681ad8388SMartin Matuska *memusage = lzma_index_memusage(1, coder->count); 23781ad8388SMartin Matuska *old_memlimit = coder->memlimit; 23881ad8388SMartin Matuska 23981ad8388SMartin Matuska if (new_memlimit != 0) { 24081ad8388SMartin Matuska if (new_memlimit < *memusage) 24181ad8388SMartin Matuska return LZMA_MEMLIMIT_ERROR; 24281ad8388SMartin Matuska 24381ad8388SMartin Matuska coder->memlimit = new_memlimit; 24481ad8388SMartin Matuska } 24581ad8388SMartin Matuska 24681ad8388SMartin Matuska return LZMA_OK; 24781ad8388SMartin Matuska } 24881ad8388SMartin Matuska 24981ad8388SMartin Matuska 25081ad8388SMartin Matuska static lzma_ret 2511456f0f9SXin LI index_decoder_reset(lzma_index_coder *coder, const lzma_allocator *allocator, 25281ad8388SMartin Matuska lzma_index **i, uint64_t memlimit) 25381ad8388SMartin Matuska { 25481ad8388SMartin Matuska // Remember the pointer given by the application. We will set it 25581ad8388SMartin Matuska // to point to the decoded Index only if decoding is successful. 25681ad8388SMartin Matuska // Before that, keep it NULL so that applications can always safely 25781ad8388SMartin Matuska // pass it to lzma_index_end() no matter did decoding succeed or not. 25881ad8388SMartin Matuska coder->index_ptr = i; 25981ad8388SMartin Matuska *i = NULL; 26081ad8388SMartin Matuska 26181ad8388SMartin Matuska // We always allocate a new lzma_index. 26281ad8388SMartin Matuska coder->index = lzma_index_init(allocator); 26381ad8388SMartin Matuska if (coder->index == NULL) 26481ad8388SMartin Matuska return LZMA_MEM_ERROR; 26581ad8388SMartin Matuska 26681ad8388SMartin Matuska // Initialize the rest. 26781ad8388SMartin Matuska coder->sequence = SEQ_INDICATOR; 268b71a5db3SXin LI coder->memlimit = my_max(1, memlimit); 26981ad8388SMartin Matuska coder->count = 0; // Needs to be initialized due to _memconfig(). 27081ad8388SMartin Matuska coder->pos = 0; 27181ad8388SMartin Matuska coder->crc32 = 0; 27281ad8388SMartin Matuska 27381ad8388SMartin Matuska return LZMA_OK; 27481ad8388SMartin Matuska } 27581ad8388SMartin Matuska 27681ad8388SMartin Matuska 27773ed8e77SXin LI extern lzma_ret 27873ed8e77SXin LI lzma_index_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, 27981ad8388SMartin Matuska lzma_index **i, uint64_t memlimit) 28081ad8388SMartin Matuska { 28173ed8e77SXin LI lzma_next_coder_init(&lzma_index_decoder_init, next, allocator); 28281ad8388SMartin Matuska 283b71a5db3SXin LI if (i == NULL) 28481ad8388SMartin Matuska return LZMA_PROG_ERROR; 28581ad8388SMartin Matuska 2861456f0f9SXin LI lzma_index_coder *coder = next->coder; 2871456f0f9SXin LI if (coder == NULL) { 2881456f0f9SXin LI coder = lzma_alloc(sizeof(lzma_index_coder), allocator); 2891456f0f9SXin LI if (coder == NULL) 29081ad8388SMartin Matuska return LZMA_MEM_ERROR; 29181ad8388SMartin Matuska 2921456f0f9SXin LI next->coder = coder; 29381ad8388SMartin Matuska next->code = &index_decode; 29481ad8388SMartin Matuska next->end = &index_decoder_end; 29581ad8388SMartin Matuska next->memconfig = &index_decoder_memconfig; 2961456f0f9SXin LI coder->index = NULL; 29781ad8388SMartin Matuska } else { 2981456f0f9SXin LI lzma_index_end(coder->index, allocator); 29981ad8388SMartin Matuska } 30081ad8388SMartin Matuska 3011456f0f9SXin LI return index_decoder_reset(coder, allocator, i, memlimit); 30281ad8388SMartin Matuska } 30381ad8388SMartin Matuska 30481ad8388SMartin Matuska 30581ad8388SMartin Matuska extern LZMA_API(lzma_ret) 30681ad8388SMartin Matuska lzma_index_decoder(lzma_stream *strm, lzma_index **i, uint64_t memlimit) 30781ad8388SMartin Matuska { 308*3b35e7eeSXin LI // If i isn't NULL, *i must always be initialized due to 309*3b35e7eeSXin LI // the wording in the API docs. This way it is initialized 310*3b35e7eeSXin LI // if we return LZMA_PROG_ERROR due to strm == NULL. 311*3b35e7eeSXin LI if (i != NULL) 312*3b35e7eeSXin LI *i = NULL; 313*3b35e7eeSXin LI 31473ed8e77SXin LI lzma_next_strm_init(lzma_index_decoder_init, strm, i, memlimit); 31581ad8388SMartin Matuska 31681ad8388SMartin Matuska strm->internal->supported_actions[LZMA_RUN] = true; 317542aef48SMartin Matuska strm->internal->supported_actions[LZMA_FINISH] = true; 31881ad8388SMartin Matuska 31981ad8388SMartin Matuska return LZMA_OK; 32081ad8388SMartin Matuska } 32181ad8388SMartin Matuska 32281ad8388SMartin Matuska 32381ad8388SMartin Matuska extern LZMA_API(lzma_ret) 32453200025SRui Paulo lzma_index_buffer_decode(lzma_index **i, uint64_t *memlimit, 32553200025SRui Paulo const lzma_allocator *allocator, 32681ad8388SMartin Matuska const uint8_t *in, size_t *in_pos, size_t in_size) 32781ad8388SMartin Matuska { 328*3b35e7eeSXin LI // If i isn't NULL, *i must always be initialized due to 329*3b35e7eeSXin LI // the wording in the API docs. 330*3b35e7eeSXin LI if (i != NULL) 331*3b35e7eeSXin LI *i = NULL; 332*3b35e7eeSXin LI 33381ad8388SMartin Matuska // Sanity checks 33481ad8388SMartin Matuska if (i == NULL || memlimit == NULL 33581ad8388SMartin Matuska || in == NULL || in_pos == NULL || *in_pos > in_size) 33681ad8388SMartin Matuska return LZMA_PROG_ERROR; 33781ad8388SMartin Matuska 33881ad8388SMartin Matuska // Initialize the decoder. 3391456f0f9SXin LI lzma_index_coder coder; 34081ad8388SMartin Matuska return_if_error(index_decoder_reset(&coder, allocator, i, *memlimit)); 34181ad8388SMartin Matuska 34281ad8388SMartin Matuska // Store the input start position so that we can restore it in case 34381ad8388SMartin Matuska // of an error. 34481ad8388SMartin Matuska const size_t in_start = *in_pos; 34581ad8388SMartin Matuska 34681ad8388SMartin Matuska // Do the actual decoding. 34781ad8388SMartin Matuska lzma_ret ret = index_decode(&coder, allocator, in, in_pos, in_size, 34881ad8388SMartin Matuska NULL, NULL, 0, LZMA_RUN); 34981ad8388SMartin Matuska 35081ad8388SMartin Matuska if (ret == LZMA_STREAM_END) { 35181ad8388SMartin Matuska ret = LZMA_OK; 35281ad8388SMartin Matuska } else { 35381ad8388SMartin Matuska // Something went wrong, free the Index structure and restore 35481ad8388SMartin Matuska // the input position. 35581ad8388SMartin Matuska lzma_index_end(coder.index, allocator); 35681ad8388SMartin Matuska *in_pos = in_start; 35781ad8388SMartin Matuska 35881ad8388SMartin Matuska if (ret == LZMA_OK) { 35981ad8388SMartin Matuska // The input is truncated or otherwise corrupt. 36081ad8388SMartin Matuska // Use LZMA_DATA_ERROR instead of LZMA_BUF_ERROR 36181ad8388SMartin Matuska // like lzma_vli_decode() does in single-call mode. 36281ad8388SMartin Matuska ret = LZMA_DATA_ERROR; 36381ad8388SMartin Matuska 36481ad8388SMartin Matuska } else if (ret == LZMA_MEMLIMIT_ERROR) { 36581ad8388SMartin Matuska // Tell the caller how much memory would have 36681ad8388SMartin Matuska // been needed. 36781ad8388SMartin Matuska *memlimit = lzma_index_memusage(1, coder.count); 36881ad8388SMartin Matuska } 36981ad8388SMartin Matuska } 37081ad8388SMartin Matuska 37181ad8388SMartin Matuska return ret; 37281ad8388SMartin Matuska } 373