181ad8388SMartin Matuska /////////////////////////////////////////////////////////////////////////////// 281ad8388SMartin Matuska // 381ad8388SMartin Matuska /// \file lz_decoder.c 481ad8388SMartin Matuska /// \brief LZ out window 581ad8388SMartin Matuska /// 681ad8388SMartin Matuska // Authors: Igor Pavlov 781ad8388SMartin Matuska // Lasse Collin 881ad8388SMartin Matuska // 981ad8388SMartin Matuska // This file has been put into the public domain. 1081ad8388SMartin Matuska // You can do whatever you want with this file. 1181ad8388SMartin Matuska // 1281ad8388SMartin Matuska /////////////////////////////////////////////////////////////////////////////// 1381ad8388SMartin Matuska 1481ad8388SMartin Matuska // liblzma supports multiple LZ77-based filters. The LZ part is shared 1581ad8388SMartin Matuska // between these filters. The LZ code takes care of dictionary handling 1681ad8388SMartin Matuska // and passing the data between filters in the chain. The filter-specific 1781ad8388SMartin Matuska // part decodes from the input buffer to the dictionary. 1881ad8388SMartin Matuska 1981ad8388SMartin Matuska 2081ad8388SMartin Matuska #include "lz_decoder.h" 2181ad8388SMartin Matuska 2281ad8388SMartin Matuska 231456f0f9SXin LI typedef struct { 2481ad8388SMartin Matuska /// Dictionary (history buffer) 2581ad8388SMartin Matuska lzma_dict dict; 2681ad8388SMartin Matuska 2781ad8388SMartin Matuska /// The actual LZ-based decoder e.g. LZMA 2881ad8388SMartin Matuska lzma_lz_decoder lz; 2981ad8388SMartin Matuska 3081ad8388SMartin Matuska /// Next filter in the chain, if any. Note that LZMA and LZMA2 are 3181ad8388SMartin Matuska /// only allowed as the last filter, but the long-range filter in 3281ad8388SMartin Matuska /// future can be in the middle of the chain. 3381ad8388SMartin Matuska lzma_next_coder next; 3481ad8388SMartin Matuska 3581ad8388SMartin Matuska /// True if the next filter in the chain has returned LZMA_STREAM_END. 3681ad8388SMartin Matuska bool next_finished; 3781ad8388SMartin Matuska 3881ad8388SMartin Matuska /// True if the LZ decoder (e.g. LZMA) has detected end of payload 3981ad8388SMartin Matuska /// marker. This may become true before next_finished becomes true. 4081ad8388SMartin Matuska bool this_finished; 4181ad8388SMartin Matuska 4281ad8388SMartin Matuska /// Temporary buffer needed when the LZ-based filter is not the last 4381ad8388SMartin Matuska /// filter in the chain. The output of the next filter is first 4481ad8388SMartin Matuska /// decoded into buffer[], which is then used as input for the actual 4581ad8388SMartin Matuska /// LZ-based decoder. 4681ad8388SMartin Matuska struct { 4781ad8388SMartin Matuska size_t pos; 4881ad8388SMartin Matuska size_t size; 4981ad8388SMartin Matuska uint8_t buffer[LZMA_BUFFER_SIZE]; 5081ad8388SMartin Matuska } temp; 511456f0f9SXin LI } lzma_coder; 5281ad8388SMartin Matuska 5381ad8388SMartin Matuska 5481ad8388SMartin Matuska static void 5581ad8388SMartin Matuska lz_decoder_reset(lzma_coder *coder) 5681ad8388SMartin Matuska { 5781ad8388SMartin Matuska coder->dict.pos = 0; 5881ad8388SMartin Matuska coder->dict.full = 0; 5981ad8388SMartin Matuska coder->dict.buf[coder->dict.size - 1] = '\0'; 6081ad8388SMartin Matuska coder->dict.need_reset = false; 6181ad8388SMartin Matuska return; 6281ad8388SMartin Matuska } 6381ad8388SMartin Matuska 6481ad8388SMartin Matuska 6581ad8388SMartin Matuska static lzma_ret 6681ad8388SMartin Matuska decode_buffer(lzma_coder *coder, 6781ad8388SMartin Matuska const uint8_t *restrict in, size_t *restrict in_pos, 6881ad8388SMartin Matuska size_t in_size, uint8_t *restrict out, 6981ad8388SMartin Matuska size_t *restrict out_pos, size_t out_size) 7081ad8388SMartin Matuska { 7181ad8388SMartin Matuska while (true) { 7281ad8388SMartin Matuska // Wrap the dictionary if needed. 7381ad8388SMartin Matuska if (coder->dict.pos == coder->dict.size) 7481ad8388SMartin Matuska coder->dict.pos = 0; 7581ad8388SMartin Matuska 7681ad8388SMartin Matuska // Store the current dictionary position. It is needed to know 7781ad8388SMartin Matuska // where to start copying to the out[] buffer. 7881ad8388SMartin Matuska const size_t dict_start = coder->dict.pos; 7981ad8388SMartin Matuska 8081ad8388SMartin Matuska // Calculate how much we allow coder->lz.code() to decode. 8181ad8388SMartin Matuska // It must not decode past the end of the dictionary 8281ad8388SMartin Matuska // buffer, and we don't want it to decode more than is 8381ad8388SMartin Matuska // actually needed to fill the out[] buffer. 84e0f0e66dSMartin Matuska coder->dict.limit = coder->dict.pos 85e0f0e66dSMartin Matuska + my_min(out_size - *out_pos, 8681ad8388SMartin Matuska coder->dict.size - coder->dict.pos); 8781ad8388SMartin Matuska 8881ad8388SMartin Matuska // Call the coder->lz.code() to do the actual decoding. 8981ad8388SMartin Matuska const lzma_ret ret = coder->lz.code( 9081ad8388SMartin Matuska coder->lz.coder, &coder->dict, 9181ad8388SMartin Matuska in, in_pos, in_size); 9281ad8388SMartin Matuska 9381ad8388SMartin Matuska // Copy the decoded data from the dictionary to the out[] 94a8675d92SXin LI // buffer. Do it conditionally because out can be NULL 95a8675d92SXin LI // (in which case copy_size is always 0). Calling memcpy() 96a8675d92SXin LI // with a null-pointer is undefined even if the third 97a8675d92SXin LI // argument is 0. 9881ad8388SMartin Matuska const size_t copy_size = coder->dict.pos - dict_start; 9981ad8388SMartin Matuska assert(copy_size <= out_size - *out_pos); 100a8675d92SXin LI 101a8675d92SXin LI if (copy_size > 0) 10281ad8388SMartin Matuska memcpy(out + *out_pos, coder->dict.buf + dict_start, 10381ad8388SMartin Matuska copy_size); 104a8675d92SXin LI 10581ad8388SMartin Matuska *out_pos += copy_size; 10681ad8388SMartin Matuska 10781ad8388SMartin Matuska // Reset the dictionary if so requested by coder->lz.code(). 10881ad8388SMartin Matuska if (coder->dict.need_reset) { 10981ad8388SMartin Matuska lz_decoder_reset(coder); 11081ad8388SMartin Matuska 11181ad8388SMartin Matuska // Since we reset dictionary, we don't check if 11281ad8388SMartin Matuska // dictionary became full. 11381ad8388SMartin Matuska if (ret != LZMA_OK || *out_pos == out_size) 11481ad8388SMartin Matuska return ret; 11581ad8388SMartin Matuska } else { 11681ad8388SMartin Matuska // Return if everything got decoded or an error 11781ad8388SMartin Matuska // occurred, or if there's no more data to decode. 11881ad8388SMartin Matuska // 11981ad8388SMartin Matuska // Note that detecting if there's something to decode 12081ad8388SMartin Matuska // is done by looking if dictionary become full 12181ad8388SMartin Matuska // instead of looking if *in_pos == in_size. This 12281ad8388SMartin Matuska // is because it is possible that all the input was 12381ad8388SMartin Matuska // consumed already but some data is pending to be 12481ad8388SMartin Matuska // written to the dictionary. 12581ad8388SMartin Matuska if (ret != LZMA_OK || *out_pos == out_size 12681ad8388SMartin Matuska || coder->dict.pos < coder->dict.size) 12781ad8388SMartin Matuska return ret; 12881ad8388SMartin Matuska } 12981ad8388SMartin Matuska } 13081ad8388SMartin Matuska } 13181ad8388SMartin Matuska 13281ad8388SMartin Matuska 13381ad8388SMartin Matuska static lzma_ret 134a8675d92SXin LI lz_decode(void *coder_ptr, const lzma_allocator *allocator, 13581ad8388SMartin Matuska const uint8_t *restrict in, size_t *restrict in_pos, 13681ad8388SMartin Matuska size_t in_size, uint8_t *restrict out, 13781ad8388SMartin Matuska size_t *restrict out_pos, size_t out_size, 13881ad8388SMartin Matuska lzma_action action) 13981ad8388SMartin Matuska { 1401456f0f9SXin LI lzma_coder *coder = coder_ptr; 1411456f0f9SXin LI 14281ad8388SMartin Matuska if (coder->next.code == NULL) 14381ad8388SMartin Matuska return decode_buffer(coder, in, in_pos, in_size, 14481ad8388SMartin Matuska out, out_pos, out_size); 14581ad8388SMartin Matuska 14681ad8388SMartin Matuska // We aren't the last coder in the chain, we need to decode 14781ad8388SMartin Matuska // our input to a temporary buffer. 14881ad8388SMartin Matuska while (*out_pos < out_size) { 14981ad8388SMartin Matuska // Fill the temporary buffer if it is empty. 15081ad8388SMartin Matuska if (!coder->next_finished 15181ad8388SMartin Matuska && coder->temp.pos == coder->temp.size) { 15281ad8388SMartin Matuska coder->temp.pos = 0; 15381ad8388SMartin Matuska coder->temp.size = 0; 15481ad8388SMartin Matuska 15581ad8388SMartin Matuska const lzma_ret ret = coder->next.code( 15681ad8388SMartin Matuska coder->next.coder, 15781ad8388SMartin Matuska allocator, in, in_pos, in_size, 15881ad8388SMartin Matuska coder->temp.buffer, &coder->temp.size, 15981ad8388SMartin Matuska LZMA_BUFFER_SIZE, action); 16081ad8388SMartin Matuska 16181ad8388SMartin Matuska if (ret == LZMA_STREAM_END) 16281ad8388SMartin Matuska coder->next_finished = true; 16381ad8388SMartin Matuska else if (ret != LZMA_OK || coder->temp.size == 0) 16481ad8388SMartin Matuska return ret; 16581ad8388SMartin Matuska } 16681ad8388SMartin Matuska 16781ad8388SMartin Matuska if (coder->this_finished) { 16881ad8388SMartin Matuska if (coder->temp.size != 0) 16981ad8388SMartin Matuska return LZMA_DATA_ERROR; 17081ad8388SMartin Matuska 17181ad8388SMartin Matuska if (coder->next_finished) 17281ad8388SMartin Matuska return LZMA_STREAM_END; 17381ad8388SMartin Matuska 17481ad8388SMartin Matuska return LZMA_OK; 17581ad8388SMartin Matuska } 17681ad8388SMartin Matuska 17781ad8388SMartin Matuska const lzma_ret ret = decode_buffer(coder, coder->temp.buffer, 17881ad8388SMartin Matuska &coder->temp.pos, coder->temp.size, 17981ad8388SMartin Matuska out, out_pos, out_size); 18081ad8388SMartin Matuska 18181ad8388SMartin Matuska if (ret == LZMA_STREAM_END) 18281ad8388SMartin Matuska coder->this_finished = true; 18381ad8388SMartin Matuska else if (ret != LZMA_OK) 18481ad8388SMartin Matuska return ret; 18581ad8388SMartin Matuska else if (coder->next_finished && *out_pos < out_size) 18681ad8388SMartin Matuska return LZMA_DATA_ERROR; 18781ad8388SMartin Matuska } 18881ad8388SMartin Matuska 18981ad8388SMartin Matuska return LZMA_OK; 19081ad8388SMartin Matuska } 19181ad8388SMartin Matuska 19281ad8388SMartin Matuska 19381ad8388SMartin Matuska static void 1941456f0f9SXin LI lz_decoder_end(void *coder_ptr, const lzma_allocator *allocator) 19581ad8388SMartin Matuska { 1961456f0f9SXin LI lzma_coder *coder = coder_ptr; 1971456f0f9SXin LI 19881ad8388SMartin Matuska lzma_next_end(&coder->next, allocator); 19981ad8388SMartin Matuska lzma_free(coder->dict.buf, allocator); 20081ad8388SMartin Matuska 20181ad8388SMartin Matuska if (coder->lz.end != NULL) 20281ad8388SMartin Matuska coder->lz.end(coder->lz.coder, allocator); 20381ad8388SMartin Matuska else 20481ad8388SMartin Matuska lzma_free(coder->lz.coder, allocator); 20581ad8388SMartin Matuska 20681ad8388SMartin Matuska lzma_free(coder, allocator); 20781ad8388SMartin Matuska return; 20881ad8388SMartin Matuska } 20981ad8388SMartin Matuska 21081ad8388SMartin Matuska 21181ad8388SMartin Matuska extern lzma_ret 21253200025SRui Paulo lzma_lz_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, 21381ad8388SMartin Matuska const lzma_filter_info *filters, 21481ad8388SMartin Matuska lzma_ret (*lz_init)(lzma_lz_decoder *lz, 215*73ed8e77SXin LI const lzma_allocator *allocator, 216*73ed8e77SXin LI lzma_vli id, const void *options, 21781ad8388SMartin Matuska lzma_lz_options *lz_options)) 21881ad8388SMartin Matuska { 21981ad8388SMartin Matuska // Allocate the base structure if it isn't already allocated. 2201456f0f9SXin LI lzma_coder *coder = next->coder; 2211456f0f9SXin LI if (coder == NULL) { 2221456f0f9SXin LI coder = lzma_alloc(sizeof(lzma_coder), allocator); 2231456f0f9SXin LI if (coder == NULL) 22481ad8388SMartin Matuska return LZMA_MEM_ERROR; 22581ad8388SMartin Matuska 2261456f0f9SXin LI next->coder = coder; 22781ad8388SMartin Matuska next->code = &lz_decode; 22881ad8388SMartin Matuska next->end = &lz_decoder_end; 22981ad8388SMartin Matuska 2301456f0f9SXin LI coder->dict.buf = NULL; 2311456f0f9SXin LI coder->dict.size = 0; 2321456f0f9SXin LI coder->lz = LZMA_LZ_DECODER_INIT; 2331456f0f9SXin LI coder->next = LZMA_NEXT_CODER_INIT; 23481ad8388SMartin Matuska } 23581ad8388SMartin Matuska 23681ad8388SMartin Matuska // Allocate and initialize the LZ-based decoder. It will also give 23781ad8388SMartin Matuska // us the dictionary size. 23881ad8388SMartin Matuska lzma_lz_options lz_options; 2391456f0f9SXin LI return_if_error(lz_init(&coder->lz, allocator, 240*73ed8e77SXin LI filters[0].id, filters[0].options, &lz_options)); 24181ad8388SMartin Matuska 24281ad8388SMartin Matuska // If the dictionary size is very small, increase it to 4096 bytes. 24381ad8388SMartin Matuska // This is to prevent constant wrapping of the dictionary, which 24481ad8388SMartin Matuska // would slow things down. The downside is that since we don't check 24581ad8388SMartin Matuska // separately for the real dictionary size, we may happily accept 24681ad8388SMartin Matuska // corrupt files. 24781ad8388SMartin Matuska if (lz_options.dict_size < 4096) 24881ad8388SMartin Matuska lz_options.dict_size = 4096; 24981ad8388SMartin Matuska 250a8675d92SXin LI // Make dictionary size a multiple of 16. Some LZ-based decoders like 25181ad8388SMartin Matuska // LZMA use the lowest bits lzma_dict.pos to know the alignment of the 25281ad8388SMartin Matuska // data. Aligned buffer is also good when memcpying from the 25381ad8388SMartin Matuska // dictionary to the output buffer, since applications are 25481ad8388SMartin Matuska // recommended to give aligned buffers to liblzma. 25581ad8388SMartin Matuska // 25681ad8388SMartin Matuska // Avoid integer overflow. 25781ad8388SMartin Matuska if (lz_options.dict_size > SIZE_MAX - 15) 25881ad8388SMartin Matuska return LZMA_MEM_ERROR; 25981ad8388SMartin Matuska 26081ad8388SMartin Matuska lz_options.dict_size = (lz_options.dict_size + 15) & ~((size_t)(15)); 26181ad8388SMartin Matuska 26281ad8388SMartin Matuska // Allocate and initialize the dictionary. 2631456f0f9SXin LI if (coder->dict.size != lz_options.dict_size) { 2641456f0f9SXin LI lzma_free(coder->dict.buf, allocator); 2651456f0f9SXin LI coder->dict.buf 26681ad8388SMartin Matuska = lzma_alloc(lz_options.dict_size, allocator); 2671456f0f9SXin LI if (coder->dict.buf == NULL) 26881ad8388SMartin Matuska return LZMA_MEM_ERROR; 26981ad8388SMartin Matuska 2701456f0f9SXin LI coder->dict.size = lz_options.dict_size; 27181ad8388SMartin Matuska } 27281ad8388SMartin Matuska 27381ad8388SMartin Matuska lz_decoder_reset(next->coder); 27481ad8388SMartin Matuska 27581ad8388SMartin Matuska // Use the preset dictionary if it was given to us. 27681ad8388SMartin Matuska if (lz_options.preset_dict != NULL 27781ad8388SMartin Matuska && lz_options.preset_dict_size > 0) { 27881ad8388SMartin Matuska // If the preset dictionary is bigger than the actual 27981ad8388SMartin Matuska // dictionary, copy only the tail. 280e0f0e66dSMartin Matuska const size_t copy_size = my_min(lz_options.preset_dict_size, 28181ad8388SMartin Matuska lz_options.dict_size); 28281ad8388SMartin Matuska const size_t offset = lz_options.preset_dict_size - copy_size; 2831456f0f9SXin LI memcpy(coder->dict.buf, lz_options.preset_dict + offset, 28481ad8388SMartin Matuska copy_size); 2851456f0f9SXin LI coder->dict.pos = copy_size; 2861456f0f9SXin LI coder->dict.full = copy_size; 28781ad8388SMartin Matuska } 28881ad8388SMartin Matuska 28981ad8388SMartin Matuska // Miscellaneous initializations 2901456f0f9SXin LI coder->next_finished = false; 2911456f0f9SXin LI coder->this_finished = false; 2921456f0f9SXin LI coder->temp.pos = 0; 2931456f0f9SXin LI coder->temp.size = 0; 29481ad8388SMartin Matuska 29581ad8388SMartin Matuska // Initialize the next filter in the chain, if any. 2961456f0f9SXin LI return lzma_next_filter_init(&coder->next, allocator, filters + 1); 29781ad8388SMartin Matuska } 29881ad8388SMartin Matuska 29981ad8388SMartin Matuska 30081ad8388SMartin Matuska extern uint64_t 30181ad8388SMartin Matuska lzma_lz_decoder_memusage(size_t dictionary_size) 30281ad8388SMartin Matuska { 30381ad8388SMartin Matuska return sizeof(lzma_coder) + (uint64_t)(dictionary_size); 30481ad8388SMartin Matuska } 305