1*3b35e7eeSXin LI // SPDX-License-Identifier: 0BSD 2*3b35e7eeSXin LI 381ad8388SMartin Matuska /////////////////////////////////////////////////////////////////////////////// 481ad8388SMartin Matuska // 581ad8388SMartin Matuska /// \file block_decoder.c 681ad8388SMartin Matuska /// \brief Decodes .xz Blocks 781ad8388SMartin Matuska // 881ad8388SMartin Matuska // Author: Lasse Collin 981ad8388SMartin Matuska // 1081ad8388SMartin Matuska /////////////////////////////////////////////////////////////////////////////// 1181ad8388SMartin Matuska 1281ad8388SMartin Matuska #include "block_decoder.h" 1381ad8388SMartin Matuska #include "filter_decoder.h" 1481ad8388SMartin Matuska #include "check.h" 1581ad8388SMartin Matuska 1681ad8388SMartin Matuska 171456f0f9SXin LI typedef struct { 1881ad8388SMartin Matuska enum { 1981ad8388SMartin Matuska SEQ_CODE, 2081ad8388SMartin Matuska SEQ_PADDING, 2181ad8388SMartin Matuska SEQ_CHECK, 2281ad8388SMartin Matuska } sequence; 2381ad8388SMartin Matuska 2481ad8388SMartin Matuska /// The filters in the chain; initialized with lzma_raw_decoder_init(). 2581ad8388SMartin Matuska lzma_next_coder next; 2681ad8388SMartin Matuska 2781ad8388SMartin Matuska /// Decoding options; we also write Compressed Size and Uncompressed 2881ad8388SMartin Matuska /// Size back to this structure when the decoding has been finished. 2981ad8388SMartin Matuska lzma_block *block; 3081ad8388SMartin Matuska 3181ad8388SMartin Matuska /// Compressed Size calculated while decoding 3281ad8388SMartin Matuska lzma_vli compressed_size; 3381ad8388SMartin Matuska 3481ad8388SMartin Matuska /// Uncompressed Size calculated while decoding 3581ad8388SMartin Matuska lzma_vli uncompressed_size; 3681ad8388SMartin Matuska 3781ad8388SMartin Matuska /// Maximum allowed Compressed Size; this takes into account the 3881ad8388SMartin Matuska /// size of the Block Header and Check fields when Compressed Size 3981ad8388SMartin Matuska /// is unknown. 4081ad8388SMartin Matuska lzma_vli compressed_limit; 4181ad8388SMartin Matuska 429e6bbe47SXin LI /// Maximum allowed Uncompressed Size. 439e6bbe47SXin LI lzma_vli uncompressed_limit; 449e6bbe47SXin LI 4581ad8388SMartin Matuska /// Position when reading the Check field 4681ad8388SMartin Matuska size_t check_pos; 4781ad8388SMartin Matuska 4881ad8388SMartin Matuska /// Check of the uncompressed data 4981ad8388SMartin Matuska lzma_check_state check; 5053200025SRui Paulo 5153200025SRui Paulo /// True if the integrity check won't be calculated and verified. 5253200025SRui Paulo bool ignore_check; 531456f0f9SXin LI } lzma_block_coder; 5481ad8388SMartin Matuska 5581ad8388SMartin Matuska 5681ad8388SMartin Matuska static inline bool 5781ad8388SMartin Matuska is_size_valid(lzma_vli size, lzma_vli reference) 5881ad8388SMartin Matuska { 5981ad8388SMartin Matuska return reference == LZMA_VLI_UNKNOWN || reference == size; 6081ad8388SMartin Matuska } 6181ad8388SMartin Matuska 6281ad8388SMartin Matuska 6381ad8388SMartin Matuska static lzma_ret 641456f0f9SXin LI block_decode(void *coder_ptr, const lzma_allocator *allocator, 6581ad8388SMartin Matuska const uint8_t *restrict in, size_t *restrict in_pos, 6681ad8388SMartin Matuska size_t in_size, uint8_t *restrict out, 6781ad8388SMartin Matuska size_t *restrict out_pos, size_t out_size, lzma_action action) 6881ad8388SMartin Matuska { 691456f0f9SXin LI lzma_block_coder *coder = coder_ptr; 701456f0f9SXin LI 7181ad8388SMartin Matuska switch (coder->sequence) { 7281ad8388SMartin Matuska case SEQ_CODE: { 7381ad8388SMartin Matuska const size_t in_start = *in_pos; 7481ad8388SMartin Matuska const size_t out_start = *out_pos; 7581ad8388SMartin Matuska 769e6bbe47SXin LI // Limit the amount of input and output space that we give 779e6bbe47SXin LI // to the raw decoder based on the information we have 789e6bbe47SXin LI // (or don't have) from Block Header. 799e6bbe47SXin LI const size_t in_stop = *in_pos + (size_t)my_min( 809e6bbe47SXin LI in_size - *in_pos, 819e6bbe47SXin LI coder->compressed_limit - coder->compressed_size); 829e6bbe47SXin LI const size_t out_stop = *out_pos + (size_t)my_min( 839e6bbe47SXin LI out_size - *out_pos, 849e6bbe47SXin LI coder->uncompressed_limit - coder->uncompressed_size); 859e6bbe47SXin LI 8681ad8388SMartin Matuska const lzma_ret ret = coder->next.code(coder->next.coder, 879e6bbe47SXin LI allocator, in, in_pos, in_stop, 889e6bbe47SXin LI out, out_pos, out_stop, action); 8981ad8388SMartin Matuska 9081ad8388SMartin Matuska const size_t in_used = *in_pos - in_start; 9181ad8388SMartin Matuska const size_t out_used = *out_pos - out_start; 9281ad8388SMartin Matuska 939e6bbe47SXin LI // Because we have limited the input and output sizes, 949e6bbe47SXin LI // we know that these cannot grow too big or overflow. 959e6bbe47SXin LI coder->compressed_size += in_used; 969e6bbe47SXin LI coder->uncompressed_size += out_used; 979e6bbe47SXin LI 989e6bbe47SXin LI if (ret == LZMA_OK) { 999e6bbe47SXin LI const bool comp_done = coder->compressed_size 1009e6bbe47SXin LI == coder->block->compressed_size; 1019e6bbe47SXin LI const bool uncomp_done = coder->uncompressed_size 1029e6bbe47SXin LI == coder->block->uncompressed_size; 1039e6bbe47SXin LI 1049e6bbe47SXin LI // If both input and output amounts match the sizes 1059e6bbe47SXin LI // in Block Header but we still got LZMA_OK instead 1069e6bbe47SXin LI // of LZMA_STREAM_END, the file is broken. 1079e6bbe47SXin LI if (comp_done && uncomp_done) 10881ad8388SMartin Matuska return LZMA_DATA_ERROR; 10981ad8388SMartin Matuska 1109e6bbe47SXin LI // If the decoder has consumed all the input that it 1119e6bbe47SXin LI // needs but it still couldn't fill the output buffer 1129e6bbe47SXin LI // or return LZMA_STREAM_END, the file is broken. 1139e6bbe47SXin LI if (comp_done && *out_pos < out_size) 1149e6bbe47SXin LI return LZMA_DATA_ERROR; 1159e6bbe47SXin LI 1169e6bbe47SXin LI // If the decoder has produced all the output but 1179e6bbe47SXin LI // it still didn't return LZMA_STREAM_END or consume 1189e6bbe47SXin LI // more input (for example, detecting an end of 1199e6bbe47SXin LI // payload marker may need more input but produce 1209e6bbe47SXin LI // no output) the file is broken. 1219e6bbe47SXin LI if (uncomp_done && *in_pos < in_size) 1229e6bbe47SXin LI return LZMA_DATA_ERROR; 1239e6bbe47SXin LI } 1249e6bbe47SXin LI 125c917796cSXin LI // Don't waste time updating the integrity check if it will be 126c917796cSXin LI // ignored. Also skip it if no new output was produced. This 127c917796cSXin LI // avoids null pointer + 0 (undefined behavior) when out == 0. 128c917796cSXin LI if (!coder->ignore_check && out_used > 0) 12981ad8388SMartin Matuska lzma_check_update(&coder->check, coder->block->check, 13081ad8388SMartin Matuska out + out_start, out_used); 13181ad8388SMartin Matuska 13281ad8388SMartin Matuska if (ret != LZMA_STREAM_END) 13381ad8388SMartin Matuska return ret; 13481ad8388SMartin Matuska 13581ad8388SMartin Matuska // Compressed and Uncompressed Sizes are now at their final 13681ad8388SMartin Matuska // values. Verify that they match the values given to us. 13781ad8388SMartin Matuska if (!is_size_valid(coder->compressed_size, 13881ad8388SMartin Matuska coder->block->compressed_size) 13981ad8388SMartin Matuska || !is_size_valid(coder->uncompressed_size, 14081ad8388SMartin Matuska coder->block->uncompressed_size)) 14181ad8388SMartin Matuska return LZMA_DATA_ERROR; 14281ad8388SMartin Matuska 14381ad8388SMartin Matuska // Copy the values into coder->block. The caller 14481ad8388SMartin Matuska // may use this information to construct Index. 14581ad8388SMartin Matuska coder->block->compressed_size = coder->compressed_size; 14681ad8388SMartin Matuska coder->block->uncompressed_size = coder->uncompressed_size; 14781ad8388SMartin Matuska 14881ad8388SMartin Matuska coder->sequence = SEQ_PADDING; 14981ad8388SMartin Matuska } 15081ad8388SMartin Matuska 15181ad8388SMartin Matuska // Fall through 15281ad8388SMartin Matuska 15381ad8388SMartin Matuska case SEQ_PADDING: 15481ad8388SMartin Matuska // Compressed Data is padded to a multiple of four bytes. 15581ad8388SMartin Matuska while (coder->compressed_size & 3) { 15681ad8388SMartin Matuska if (*in_pos >= in_size) 15781ad8388SMartin Matuska return LZMA_OK; 15881ad8388SMartin Matuska 15981ad8388SMartin Matuska // We use compressed_size here just get the Padding 16081ad8388SMartin Matuska // right. The actual Compressed Size was stored to 16181ad8388SMartin Matuska // coder->block already, and won't be modified by 16281ad8388SMartin Matuska // us anymore. 16381ad8388SMartin Matuska ++coder->compressed_size; 16481ad8388SMartin Matuska 16581ad8388SMartin Matuska if (in[(*in_pos)++] != 0x00) 16681ad8388SMartin Matuska return LZMA_DATA_ERROR; 16781ad8388SMartin Matuska } 16881ad8388SMartin Matuska 16981ad8388SMartin Matuska if (coder->block->check == LZMA_CHECK_NONE) 17081ad8388SMartin Matuska return LZMA_STREAM_END; 17181ad8388SMartin Matuska 17253200025SRui Paulo if (!coder->ignore_check) 17381ad8388SMartin Matuska lzma_check_finish(&coder->check, coder->block->check); 17453200025SRui Paulo 17581ad8388SMartin Matuska coder->sequence = SEQ_CHECK; 17681ad8388SMartin Matuska 17781ad8388SMartin Matuska // Fall through 17881ad8388SMartin Matuska 17981ad8388SMartin Matuska case SEQ_CHECK: { 18081ad8388SMartin Matuska const size_t check_size = lzma_check_size(coder->block->check); 18181ad8388SMartin Matuska lzma_bufcpy(in, in_pos, in_size, coder->block->raw_check, 18281ad8388SMartin Matuska &coder->check_pos, check_size); 18381ad8388SMartin Matuska if (coder->check_pos < check_size) 18481ad8388SMartin Matuska return LZMA_OK; 18581ad8388SMartin Matuska 18681ad8388SMartin Matuska // Validate the Check only if we support it. 18781ad8388SMartin Matuska // coder->check.buffer may be uninitialized 18881ad8388SMartin Matuska // when the Check ID is not supported. 18953200025SRui Paulo if (!coder->ignore_check 19053200025SRui Paulo && lzma_check_is_supported(coder->block->check) 19181ad8388SMartin Matuska && memcmp(coder->block->raw_check, 19281ad8388SMartin Matuska coder->check.buffer.u8, 19381ad8388SMartin Matuska check_size) != 0) 19481ad8388SMartin Matuska return LZMA_DATA_ERROR; 19581ad8388SMartin Matuska 19681ad8388SMartin Matuska return LZMA_STREAM_END; 19781ad8388SMartin Matuska } 19881ad8388SMartin Matuska } 19981ad8388SMartin Matuska 20081ad8388SMartin Matuska return LZMA_PROG_ERROR; 20181ad8388SMartin Matuska } 20281ad8388SMartin Matuska 20381ad8388SMartin Matuska 20481ad8388SMartin Matuska static void 2051456f0f9SXin LI block_decoder_end(void *coder_ptr, const lzma_allocator *allocator) 20681ad8388SMartin Matuska { 2071456f0f9SXin LI lzma_block_coder *coder = coder_ptr; 20881ad8388SMartin Matuska lzma_next_end(&coder->next, allocator); 20981ad8388SMartin Matuska lzma_free(coder, allocator); 21081ad8388SMartin Matuska return; 21181ad8388SMartin Matuska } 21281ad8388SMartin Matuska 21381ad8388SMartin Matuska 21481ad8388SMartin Matuska extern lzma_ret 21553200025SRui Paulo lzma_block_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, 21681ad8388SMartin Matuska lzma_block *block) 21781ad8388SMartin Matuska { 21881ad8388SMartin Matuska lzma_next_coder_init(&lzma_block_decoder_init, next, allocator); 21981ad8388SMartin Matuska 22081ad8388SMartin Matuska // Validate the options. lzma_block_unpadded_size() does that for us 22181ad8388SMartin Matuska // except for Uncompressed Size and filters. Filters are validated 22281ad8388SMartin Matuska // by the raw decoder. 22381ad8388SMartin Matuska if (lzma_block_unpadded_size(block) == 0 22481ad8388SMartin Matuska || !lzma_vli_is_valid(block->uncompressed_size)) 22581ad8388SMartin Matuska return LZMA_PROG_ERROR; 22681ad8388SMartin Matuska 2271456f0f9SXin LI // Allocate *next->coder if needed. 2281456f0f9SXin LI lzma_block_coder *coder = next->coder; 2291456f0f9SXin LI if (coder == NULL) { 2301456f0f9SXin LI coder = lzma_alloc(sizeof(lzma_block_coder), allocator); 2311456f0f9SXin LI if (coder == NULL) 23281ad8388SMartin Matuska return LZMA_MEM_ERROR; 23381ad8388SMartin Matuska 2341456f0f9SXin LI next->coder = coder; 23581ad8388SMartin Matuska next->code = &block_decode; 23681ad8388SMartin Matuska next->end = &block_decoder_end; 2371456f0f9SXin LI coder->next = LZMA_NEXT_CODER_INIT; 23881ad8388SMartin Matuska } 23981ad8388SMartin Matuska 24081ad8388SMartin Matuska // Basic initializations 2411456f0f9SXin LI coder->sequence = SEQ_CODE; 2421456f0f9SXin LI coder->block = block; 2431456f0f9SXin LI coder->compressed_size = 0; 2441456f0f9SXin LI coder->uncompressed_size = 0; 24581ad8388SMartin Matuska 24681ad8388SMartin Matuska // If Compressed Size is not known, we calculate the maximum allowed 24781ad8388SMartin Matuska // value so that encoded size of the Block (including Block Padding) 24881ad8388SMartin Matuska // is still a valid VLI and a multiple of four. 2491456f0f9SXin LI coder->compressed_limit 25081ad8388SMartin Matuska = block->compressed_size == LZMA_VLI_UNKNOWN 25181ad8388SMartin Matuska ? (LZMA_VLI_MAX & ~LZMA_VLI_C(3)) 25281ad8388SMartin Matuska - block->header_size 25381ad8388SMartin Matuska - lzma_check_size(block->check) 25481ad8388SMartin Matuska : block->compressed_size; 25581ad8388SMartin Matuska 2569e6bbe47SXin LI // With Uncompressed Size this is simpler. If Block Header lacks 2579e6bbe47SXin LI // the size info, then LZMA_VLI_MAX is the maximum possible 2589e6bbe47SXin LI // Uncompressed Size. 2599e6bbe47SXin LI coder->uncompressed_limit 2609e6bbe47SXin LI = block->uncompressed_size == LZMA_VLI_UNKNOWN 2619e6bbe47SXin LI ? LZMA_VLI_MAX 2629e6bbe47SXin LI : block->uncompressed_size; 2639e6bbe47SXin LI 26481ad8388SMartin Matuska // Initialize the check. It's caller's problem if the Check ID is not 26581ad8388SMartin Matuska // supported, and the Block decoder cannot verify the Check field. 26681ad8388SMartin Matuska // Caller can test lzma_check_is_supported(block->check). 2671456f0f9SXin LI coder->check_pos = 0; 2681456f0f9SXin LI lzma_check_init(&coder->check, block->check); 26981ad8388SMartin Matuska 2701456f0f9SXin LI coder->ignore_check = block->version >= 1 27153200025SRui Paulo ? block->ignore_check : false; 27253200025SRui Paulo 27381ad8388SMartin Matuska // Initialize the filter chain. 2741456f0f9SXin LI return lzma_raw_decoder_init(&coder->next, allocator, 27581ad8388SMartin Matuska block->filters); 27681ad8388SMartin Matuska } 27781ad8388SMartin Matuska 27881ad8388SMartin Matuska 27981ad8388SMartin Matuska extern LZMA_API(lzma_ret) 28081ad8388SMartin Matuska lzma_block_decoder(lzma_stream *strm, lzma_block *block) 28181ad8388SMartin Matuska { 28281ad8388SMartin Matuska lzma_next_strm_init(lzma_block_decoder_init, strm, block); 28381ad8388SMartin Matuska 28481ad8388SMartin Matuska strm->internal->supported_actions[LZMA_RUN] = true; 28581ad8388SMartin Matuska strm->internal->supported_actions[LZMA_FINISH] = true; 28681ad8388SMartin Matuska 28781ad8388SMartin Matuska return LZMA_OK; 28881ad8388SMartin Matuska } 289