1*3b35e7eeSXin LI // SPDX-License-Identifier: 0BSD 2*3b35e7eeSXin LI 381ad8388SMartin Matuska /////////////////////////////////////////////////////////////////////////////// 481ad8388SMartin Matuska // 581ad8388SMartin Matuska /// \file lz_decoder.c 681ad8388SMartin Matuska /// \brief LZ out window 781ad8388SMartin Matuska /// 881ad8388SMartin Matuska // Authors: Igor Pavlov 981ad8388SMartin Matuska // Lasse Collin 1081ad8388SMartin Matuska // 1181ad8388SMartin Matuska /////////////////////////////////////////////////////////////////////////////// 1281ad8388SMartin Matuska 1381ad8388SMartin Matuska // liblzma supports multiple LZ77-based filters. The LZ part is shared 1481ad8388SMartin Matuska // between these filters. The LZ code takes care of dictionary handling 1581ad8388SMartin Matuska // and passing the data between filters in the chain. The filter-specific 1681ad8388SMartin Matuska // part decodes from the input buffer to the dictionary. 1781ad8388SMartin Matuska 1881ad8388SMartin Matuska 1981ad8388SMartin Matuska #include "lz_decoder.h" 2081ad8388SMartin Matuska 2181ad8388SMartin Matuska 221456f0f9SXin LI typedef struct { 2381ad8388SMartin Matuska /// Dictionary (history buffer) 2481ad8388SMartin Matuska lzma_dict dict; 2581ad8388SMartin Matuska 2681ad8388SMartin Matuska /// The actual LZ-based decoder e.g. LZMA 2781ad8388SMartin Matuska lzma_lz_decoder lz; 2881ad8388SMartin Matuska 2981ad8388SMartin Matuska /// Next filter in the chain, if any. Note that LZMA and LZMA2 are 3081ad8388SMartin Matuska /// only allowed as the last filter, but the long-range filter in 3181ad8388SMartin Matuska /// future can be in the middle of the chain. 3281ad8388SMartin Matuska lzma_next_coder next; 3381ad8388SMartin Matuska 3481ad8388SMartin Matuska /// True if the next filter in the chain has returned LZMA_STREAM_END. 3581ad8388SMartin Matuska bool next_finished; 3681ad8388SMartin Matuska 3781ad8388SMartin Matuska /// True if the LZ decoder (e.g. LZMA) has detected end of payload 3881ad8388SMartin Matuska /// marker. This may become true before next_finished becomes true. 3981ad8388SMartin Matuska bool this_finished; 4081ad8388SMartin Matuska 4181ad8388SMartin Matuska /// Temporary buffer needed when the LZ-based filter is not the last 4281ad8388SMartin Matuska /// filter in the chain. The output of the next filter is first 4381ad8388SMartin Matuska /// decoded into buffer[], which is then used as input for the actual 4481ad8388SMartin Matuska /// LZ-based decoder. 4581ad8388SMartin Matuska struct { 4681ad8388SMartin Matuska size_t pos; 4781ad8388SMartin Matuska size_t size; 4881ad8388SMartin Matuska uint8_t buffer[LZMA_BUFFER_SIZE]; 4981ad8388SMartin Matuska } temp; 501456f0f9SXin LI } lzma_coder; 5181ad8388SMartin Matuska 5281ad8388SMartin Matuska 5381ad8388SMartin Matuska static void 5481ad8388SMartin Matuska lz_decoder_reset(lzma_coder *coder) 5581ad8388SMartin Matuska { 56*3b35e7eeSXin LI coder->dict.pos = 2 * LZ_DICT_REPEAT_MAX; 5781ad8388SMartin Matuska coder->dict.full = 0; 58*3b35e7eeSXin LI coder->dict.buf[2 * LZ_DICT_REPEAT_MAX - 1] = '\0'; 59*3b35e7eeSXin LI coder->dict.has_wrapped = false; 6081ad8388SMartin Matuska coder->dict.need_reset = false; 6181ad8388SMartin Matuska return; 6281ad8388SMartin Matuska } 6381ad8388SMartin Matuska 6481ad8388SMartin Matuska 6581ad8388SMartin Matuska static lzma_ret 6681ad8388SMartin Matuska decode_buffer(lzma_coder *coder, 6781ad8388SMartin Matuska const uint8_t *restrict in, size_t *restrict in_pos, 6881ad8388SMartin Matuska size_t in_size, uint8_t *restrict out, 6981ad8388SMartin Matuska size_t *restrict out_pos, size_t out_size) 7081ad8388SMartin Matuska { 7181ad8388SMartin Matuska while (true) { 7281ad8388SMartin Matuska // Wrap the dictionary if needed. 73*3b35e7eeSXin LI if (coder->dict.pos == coder->dict.size) { 74*3b35e7eeSXin LI // See the comment of #define LZ_DICT_REPEAT_MAX. 75*3b35e7eeSXin LI coder->dict.pos = LZ_DICT_REPEAT_MAX; 76*3b35e7eeSXin LI coder->dict.has_wrapped = true; 77*3b35e7eeSXin LI memcpy(coder->dict.buf, coder->dict.buf 78*3b35e7eeSXin LI + coder->dict.size 79*3b35e7eeSXin LI - LZ_DICT_REPEAT_MAX, 80*3b35e7eeSXin LI LZ_DICT_REPEAT_MAX); 81*3b35e7eeSXin LI } 8281ad8388SMartin Matuska 8381ad8388SMartin Matuska // Store the current dictionary position. It is needed to know 8481ad8388SMartin Matuska // where to start copying to the out[] buffer. 8581ad8388SMartin Matuska const size_t dict_start = coder->dict.pos; 8681ad8388SMartin Matuska 8781ad8388SMartin Matuska // Calculate how much we allow coder->lz.code() to decode. 8881ad8388SMartin Matuska // It must not decode past the end of the dictionary 8981ad8388SMartin Matuska // buffer, and we don't want it to decode more than is 9081ad8388SMartin Matuska // actually needed to fill the out[] buffer. 91e0f0e66dSMartin Matuska coder->dict.limit = coder->dict.pos 92e0f0e66dSMartin Matuska + my_min(out_size - *out_pos, 9381ad8388SMartin Matuska coder->dict.size - coder->dict.pos); 9481ad8388SMartin Matuska 9581ad8388SMartin Matuska // Call the coder->lz.code() to do the actual decoding. 9681ad8388SMartin Matuska const lzma_ret ret = coder->lz.code( 9781ad8388SMartin Matuska coder->lz.coder, &coder->dict, 9881ad8388SMartin Matuska in, in_pos, in_size); 9981ad8388SMartin Matuska 10081ad8388SMartin Matuska // Copy the decoded data from the dictionary to the out[] 101a8675d92SXin LI // buffer. Do it conditionally because out can be NULL 102a8675d92SXin LI // (in which case copy_size is always 0). Calling memcpy() 103a8675d92SXin LI // with a null-pointer is undefined even if the third 104a8675d92SXin LI // argument is 0. 10581ad8388SMartin Matuska const size_t copy_size = coder->dict.pos - dict_start; 10681ad8388SMartin Matuska assert(copy_size <= out_size - *out_pos); 107a8675d92SXin LI 108a8675d92SXin LI if (copy_size > 0) 10981ad8388SMartin Matuska memcpy(out + *out_pos, coder->dict.buf + dict_start, 11081ad8388SMartin Matuska copy_size); 111a8675d92SXin LI 11281ad8388SMartin Matuska *out_pos += copy_size; 11381ad8388SMartin Matuska 11481ad8388SMartin Matuska // Reset the dictionary if so requested by coder->lz.code(). 11581ad8388SMartin Matuska if (coder->dict.need_reset) { 11681ad8388SMartin Matuska lz_decoder_reset(coder); 11781ad8388SMartin Matuska 11881ad8388SMartin Matuska // Since we reset dictionary, we don't check if 11981ad8388SMartin Matuska // dictionary became full. 12081ad8388SMartin Matuska if (ret != LZMA_OK || *out_pos == out_size) 12181ad8388SMartin Matuska return ret; 12281ad8388SMartin Matuska } else { 12381ad8388SMartin Matuska // Return if everything got decoded or an error 12481ad8388SMartin Matuska // occurred, or if there's no more data to decode. 12581ad8388SMartin Matuska // 12681ad8388SMartin Matuska // Note that detecting if there's something to decode 12781ad8388SMartin Matuska // is done by looking if dictionary become full 12881ad8388SMartin Matuska // instead of looking if *in_pos == in_size. This 12981ad8388SMartin Matuska // is because it is possible that all the input was 13081ad8388SMartin Matuska // consumed already but some data is pending to be 13181ad8388SMartin Matuska // written to the dictionary. 13281ad8388SMartin Matuska if (ret != LZMA_OK || *out_pos == out_size 13381ad8388SMartin Matuska || coder->dict.pos < coder->dict.size) 13481ad8388SMartin Matuska return ret; 13581ad8388SMartin Matuska } 13681ad8388SMartin Matuska } 13781ad8388SMartin Matuska } 13881ad8388SMartin Matuska 13981ad8388SMartin Matuska 14081ad8388SMartin Matuska static lzma_ret 141a8675d92SXin LI lz_decode(void *coder_ptr, const lzma_allocator *allocator, 14281ad8388SMartin Matuska const uint8_t *restrict in, size_t *restrict in_pos, 14381ad8388SMartin Matuska size_t in_size, uint8_t *restrict out, 14481ad8388SMartin Matuska size_t *restrict out_pos, size_t out_size, 14581ad8388SMartin Matuska lzma_action action) 14681ad8388SMartin Matuska { 1471456f0f9SXin LI lzma_coder *coder = coder_ptr; 1481456f0f9SXin LI 14981ad8388SMartin Matuska if (coder->next.code == NULL) 15081ad8388SMartin Matuska return decode_buffer(coder, in, in_pos, in_size, 15181ad8388SMartin Matuska out, out_pos, out_size); 15281ad8388SMartin Matuska 15381ad8388SMartin Matuska // We aren't the last coder in the chain, we need to decode 15481ad8388SMartin Matuska // our input to a temporary buffer. 15581ad8388SMartin Matuska while (*out_pos < out_size) { 15681ad8388SMartin Matuska // Fill the temporary buffer if it is empty. 15781ad8388SMartin Matuska if (!coder->next_finished 15881ad8388SMartin Matuska && coder->temp.pos == coder->temp.size) { 15981ad8388SMartin Matuska coder->temp.pos = 0; 16081ad8388SMartin Matuska coder->temp.size = 0; 16181ad8388SMartin Matuska 16281ad8388SMartin Matuska const lzma_ret ret = coder->next.code( 16381ad8388SMartin Matuska coder->next.coder, 16481ad8388SMartin Matuska allocator, in, in_pos, in_size, 16581ad8388SMartin Matuska coder->temp.buffer, &coder->temp.size, 16681ad8388SMartin Matuska LZMA_BUFFER_SIZE, action); 16781ad8388SMartin Matuska 16881ad8388SMartin Matuska if (ret == LZMA_STREAM_END) 16981ad8388SMartin Matuska coder->next_finished = true; 17081ad8388SMartin Matuska else if (ret != LZMA_OK || coder->temp.size == 0) 17181ad8388SMartin Matuska return ret; 17281ad8388SMartin Matuska } 17381ad8388SMartin Matuska 17481ad8388SMartin Matuska if (coder->this_finished) { 17581ad8388SMartin Matuska if (coder->temp.size != 0) 17681ad8388SMartin Matuska return LZMA_DATA_ERROR; 17781ad8388SMartin Matuska 17881ad8388SMartin Matuska if (coder->next_finished) 17981ad8388SMartin Matuska return LZMA_STREAM_END; 18081ad8388SMartin Matuska 18181ad8388SMartin Matuska return LZMA_OK; 18281ad8388SMartin Matuska } 18381ad8388SMartin Matuska 18481ad8388SMartin Matuska const lzma_ret ret = decode_buffer(coder, coder->temp.buffer, 18581ad8388SMartin Matuska &coder->temp.pos, coder->temp.size, 18681ad8388SMartin Matuska out, out_pos, out_size); 18781ad8388SMartin Matuska 18881ad8388SMartin Matuska if (ret == LZMA_STREAM_END) 18981ad8388SMartin Matuska coder->this_finished = true; 19081ad8388SMartin Matuska else if (ret != LZMA_OK) 19181ad8388SMartin Matuska return ret; 19281ad8388SMartin Matuska else if (coder->next_finished && *out_pos < out_size) 19381ad8388SMartin Matuska return LZMA_DATA_ERROR; 19481ad8388SMartin Matuska } 19581ad8388SMartin Matuska 19681ad8388SMartin Matuska return LZMA_OK; 19781ad8388SMartin Matuska } 19881ad8388SMartin Matuska 19981ad8388SMartin Matuska 20081ad8388SMartin Matuska static void 2011456f0f9SXin LI lz_decoder_end(void *coder_ptr, const lzma_allocator *allocator) 20281ad8388SMartin Matuska { 2031456f0f9SXin LI lzma_coder *coder = coder_ptr; 2041456f0f9SXin LI 20581ad8388SMartin Matuska lzma_next_end(&coder->next, allocator); 20681ad8388SMartin Matuska lzma_free(coder->dict.buf, allocator); 20781ad8388SMartin Matuska 20881ad8388SMartin Matuska if (coder->lz.end != NULL) 20981ad8388SMartin Matuska coder->lz.end(coder->lz.coder, allocator); 21081ad8388SMartin Matuska else 21181ad8388SMartin Matuska lzma_free(coder->lz.coder, allocator); 21281ad8388SMartin Matuska 21381ad8388SMartin Matuska lzma_free(coder, allocator); 21481ad8388SMartin Matuska return; 21581ad8388SMartin Matuska } 21681ad8388SMartin Matuska 21781ad8388SMartin Matuska 21881ad8388SMartin Matuska extern lzma_ret 21953200025SRui Paulo lzma_lz_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, 22081ad8388SMartin Matuska const lzma_filter_info *filters, 22181ad8388SMartin Matuska lzma_ret (*lz_init)(lzma_lz_decoder *lz, 22273ed8e77SXin LI const lzma_allocator *allocator, 22373ed8e77SXin LI lzma_vli id, const void *options, 22481ad8388SMartin Matuska lzma_lz_options *lz_options)) 22581ad8388SMartin Matuska { 22681ad8388SMartin Matuska // Allocate the base structure if it isn't already allocated. 2271456f0f9SXin LI lzma_coder *coder = next->coder; 2281456f0f9SXin LI if (coder == NULL) { 2291456f0f9SXin LI coder = lzma_alloc(sizeof(lzma_coder), allocator); 2301456f0f9SXin LI if (coder == NULL) 23181ad8388SMartin Matuska return LZMA_MEM_ERROR; 23281ad8388SMartin Matuska 2331456f0f9SXin LI next->coder = coder; 23481ad8388SMartin Matuska next->code = &lz_decode; 23581ad8388SMartin Matuska next->end = &lz_decoder_end; 23681ad8388SMartin Matuska 2371456f0f9SXin LI coder->dict.buf = NULL; 2381456f0f9SXin LI coder->dict.size = 0; 2391456f0f9SXin LI coder->lz = LZMA_LZ_DECODER_INIT; 2401456f0f9SXin LI coder->next = LZMA_NEXT_CODER_INIT; 24181ad8388SMartin Matuska } 24281ad8388SMartin Matuska 24381ad8388SMartin Matuska // Allocate and initialize the LZ-based decoder. It will also give 24481ad8388SMartin Matuska // us the dictionary size. 24581ad8388SMartin Matuska lzma_lz_options lz_options; 2461456f0f9SXin LI return_if_error(lz_init(&coder->lz, allocator, 24773ed8e77SXin LI filters[0].id, filters[0].options, &lz_options)); 24881ad8388SMartin Matuska 24981ad8388SMartin Matuska // If the dictionary size is very small, increase it to 4096 bytes. 25081ad8388SMartin Matuska // This is to prevent constant wrapping of the dictionary, which 25181ad8388SMartin Matuska // would slow things down. The downside is that since we don't check 25281ad8388SMartin Matuska // separately for the real dictionary size, we may happily accept 25381ad8388SMartin Matuska // corrupt files. 25481ad8388SMartin Matuska if (lz_options.dict_size < 4096) 25581ad8388SMartin Matuska lz_options.dict_size = 4096; 25681ad8388SMartin Matuska 257a8675d92SXin LI // Make dictionary size a multiple of 16. Some LZ-based decoders like 25881ad8388SMartin Matuska // LZMA use the lowest bits lzma_dict.pos to know the alignment of the 25981ad8388SMartin Matuska // data. Aligned buffer is also good when memcpying from the 26081ad8388SMartin Matuska // dictionary to the output buffer, since applications are 26181ad8388SMartin Matuska // recommended to give aligned buffers to liblzma. 26281ad8388SMartin Matuska // 263*3b35e7eeSXin LI // Reserve 2 * LZ_DICT_REPEAT_MAX bytes of extra space which is 264*3b35e7eeSXin LI // needed for alloc_size. 265*3b35e7eeSXin LI // 26681ad8388SMartin Matuska // Avoid integer overflow. 267*3b35e7eeSXin LI if (lz_options.dict_size > SIZE_MAX - 15 - 2 * LZ_DICT_REPEAT_MAX) 26881ad8388SMartin Matuska return LZMA_MEM_ERROR; 26981ad8388SMartin Matuska 27081ad8388SMartin Matuska lz_options.dict_size = (lz_options.dict_size + 15) & ~((size_t)(15)); 27181ad8388SMartin Matuska 272*3b35e7eeSXin LI // Reserve extra space as explained in the comment 273*3b35e7eeSXin LI // of #define LZ_DICT_REPEAT_MAX. 274*3b35e7eeSXin LI const size_t alloc_size 275*3b35e7eeSXin LI = lz_options.dict_size + 2 * LZ_DICT_REPEAT_MAX; 276*3b35e7eeSXin LI 27781ad8388SMartin Matuska // Allocate and initialize the dictionary. 278*3b35e7eeSXin LI if (coder->dict.size != alloc_size) { 2791456f0f9SXin LI lzma_free(coder->dict.buf, allocator); 280*3b35e7eeSXin LI coder->dict.buf = lzma_alloc(alloc_size, allocator); 2811456f0f9SXin LI if (coder->dict.buf == NULL) 28281ad8388SMartin Matuska return LZMA_MEM_ERROR; 28381ad8388SMartin Matuska 284*3b35e7eeSXin LI // NOTE: Yes, alloc_size, not lz_options.dict_size. The way 285*3b35e7eeSXin LI // coder->dict.full is updated will take care that we will 286*3b35e7eeSXin LI // still reject distances larger than lz_options.dict_size. 287*3b35e7eeSXin LI coder->dict.size = alloc_size; 28881ad8388SMartin Matuska } 28981ad8388SMartin Matuska 29081ad8388SMartin Matuska lz_decoder_reset(next->coder); 29181ad8388SMartin Matuska 29281ad8388SMartin Matuska // Use the preset dictionary if it was given to us. 29381ad8388SMartin Matuska if (lz_options.preset_dict != NULL 29481ad8388SMartin Matuska && lz_options.preset_dict_size > 0) { 29581ad8388SMartin Matuska // If the preset dictionary is bigger than the actual 29681ad8388SMartin Matuska // dictionary, copy only the tail. 297e0f0e66dSMartin Matuska const size_t copy_size = my_min(lz_options.preset_dict_size, 29881ad8388SMartin Matuska lz_options.dict_size); 29981ad8388SMartin Matuska const size_t offset = lz_options.preset_dict_size - copy_size; 300*3b35e7eeSXin LI memcpy(coder->dict.buf + coder->dict.pos, 301*3b35e7eeSXin LI lz_options.preset_dict + offset, 30281ad8388SMartin Matuska copy_size); 303*3b35e7eeSXin LI 304*3b35e7eeSXin LI // dict.pos isn't zero after lz_decoder_reset(). 305*3b35e7eeSXin LI coder->dict.pos += copy_size; 3061456f0f9SXin LI coder->dict.full = copy_size; 30781ad8388SMartin Matuska } 30881ad8388SMartin Matuska 30981ad8388SMartin Matuska // Miscellaneous initializations 3101456f0f9SXin LI coder->next_finished = false; 3111456f0f9SXin LI coder->this_finished = false; 3121456f0f9SXin LI coder->temp.pos = 0; 3131456f0f9SXin LI coder->temp.size = 0; 31481ad8388SMartin Matuska 31581ad8388SMartin Matuska // Initialize the next filter in the chain, if any. 3161456f0f9SXin LI return lzma_next_filter_init(&coder->next, allocator, filters + 1); 31781ad8388SMartin Matuska } 31881ad8388SMartin Matuska 31981ad8388SMartin Matuska 32081ad8388SMartin Matuska extern uint64_t 32181ad8388SMartin Matuska lzma_lz_decoder_memusage(size_t dictionary_size) 32281ad8388SMartin Matuska { 32381ad8388SMartin Matuska return sizeof(lzma_coder) + (uint64_t)(dictionary_size); 32481ad8388SMartin Matuska } 325