1*3b35e7eeSXin LI // SPDX-License-Identifier: 0BSD 2*3b35e7eeSXin LI 381ad8388SMartin Matuska /////////////////////////////////////////////////////////////////////////////// 481ad8388SMartin Matuska // 581ad8388SMartin Matuska /// \file lz_decoder.h 681ad8388SMartin Matuska /// \brief LZ out window 781ad8388SMartin Matuska /// 881ad8388SMartin Matuska // Authors: Igor Pavlov 981ad8388SMartin Matuska // Lasse Collin 1081ad8388SMartin Matuska // 1181ad8388SMartin Matuska /////////////////////////////////////////////////////////////////////////////// 1281ad8388SMartin Matuska 1381ad8388SMartin Matuska #ifndef LZMA_LZ_DECODER_H 1481ad8388SMartin Matuska #define LZMA_LZ_DECODER_H 1581ad8388SMartin Matuska 1681ad8388SMartin Matuska #include "common.h" 1781ad8388SMartin Matuska 1881ad8388SMartin Matuska 19*3b35e7eeSXin LI /// Maximum length of a match rounded up to a nice power of 2 which is 20*3b35e7eeSXin LI /// a good size for aligned memcpy(). The allocated dictionary buffer will 21*3b35e7eeSXin LI /// be 2 * LZ_DICT_REPEAT_MAX bytes larger than the actual dictionary size: 22*3b35e7eeSXin LI /// 23*3b35e7eeSXin LI /// (1) Every time the decoder reaches the end of the dictionary buffer, 24*3b35e7eeSXin LI /// the last LZ_DICT_REPEAT_MAX bytes will be copied to the beginning. 25*3b35e7eeSXin LI /// This way dict_repeat() will only need to copy from one place, 26*3b35e7eeSXin LI /// never from both the end and beginning of the buffer. 27*3b35e7eeSXin LI /// 28*3b35e7eeSXin LI /// (2) The other LZ_DICT_REPEAT_MAX bytes is kept as a buffer between 29*3b35e7eeSXin LI /// the oldest byte still in the dictionary and the current write 30*3b35e7eeSXin LI /// position. This way dict_repeat(dict, dict->size - 1, &len) 31*3b35e7eeSXin LI /// won't need memmove() as the copying cannot overlap. 32*3b35e7eeSXin LI /// 33*3b35e7eeSXin LI /// Note that memcpy() still cannot be used if distance < len. 34*3b35e7eeSXin LI /// 35*3b35e7eeSXin LI /// LZMA's longest match length is 273 so pick a multiple of 16 above that. 36*3b35e7eeSXin LI #define LZ_DICT_REPEAT_MAX 288 37*3b35e7eeSXin LI 38*3b35e7eeSXin LI 3981ad8388SMartin Matuska typedef struct { 40*3b35e7eeSXin LI /// Pointer to the dictionary buffer. 4181ad8388SMartin Matuska uint8_t *buf; 4281ad8388SMartin Matuska 4381ad8388SMartin Matuska /// Write position in dictionary. The next byte will be written to 4481ad8388SMartin Matuska /// buf[pos]. 4581ad8388SMartin Matuska size_t pos; 4681ad8388SMartin Matuska 4781ad8388SMartin Matuska /// Indicates how full the dictionary is. This is used by 4881ad8388SMartin Matuska /// dict_is_distance_valid() to detect corrupt files that would 4981ad8388SMartin Matuska /// read beyond the beginning of the dictionary. 5081ad8388SMartin Matuska size_t full; 5181ad8388SMartin Matuska 5281ad8388SMartin Matuska /// Write limit 5381ad8388SMartin Matuska size_t limit; 5481ad8388SMartin Matuska 55*3b35e7eeSXin LI /// Allocated size of buf. This is 2 * LZ_DICT_REPEAT_MAX bytes 56*3b35e7eeSXin LI /// larger than the actual dictionary size. This is enforced by 57*3b35e7eeSXin LI /// how the value for "full" is set; it can be at most 58*3b35e7eeSXin LI /// "size - 2 * LZ_DICT_REPEAT_MAX". 5981ad8388SMartin Matuska size_t size; 6081ad8388SMartin Matuska 61*3b35e7eeSXin LI /// True once the dictionary has become full and the writing position 62*3b35e7eeSXin LI /// has been wrapped in decode_buffer() in lz_decoder.c. 63*3b35e7eeSXin LI bool has_wrapped; 64*3b35e7eeSXin LI 6581ad8388SMartin Matuska /// True when dictionary should be reset before decoding more data. 6681ad8388SMartin Matuska bool need_reset; 6781ad8388SMartin Matuska 6881ad8388SMartin Matuska } lzma_dict; 6981ad8388SMartin Matuska 7081ad8388SMartin Matuska 7181ad8388SMartin Matuska typedef struct { 7281ad8388SMartin Matuska size_t dict_size; 7381ad8388SMartin Matuska const uint8_t *preset_dict; 7481ad8388SMartin Matuska size_t preset_dict_size; 7581ad8388SMartin Matuska } lzma_lz_options; 7681ad8388SMartin Matuska 7781ad8388SMartin Matuska 7881ad8388SMartin Matuska typedef struct { 7981ad8388SMartin Matuska /// Data specific to the LZ-based decoder 801456f0f9SXin LI void *coder; 8181ad8388SMartin Matuska 8281ad8388SMartin Matuska /// Function to decode from in[] to *dict 831456f0f9SXin LI lzma_ret (*code)(void *coder, 8481ad8388SMartin Matuska lzma_dict *restrict dict, const uint8_t *restrict in, 8581ad8388SMartin Matuska size_t *restrict in_pos, size_t in_size); 8681ad8388SMartin Matuska 871456f0f9SXin LI void (*reset)(void *coder, const void *options); 8881ad8388SMartin Matuska 899e6bbe47SXin LI /// Set the uncompressed size. If uncompressed_size == LZMA_VLI_UNKNOWN 909e6bbe47SXin LI /// then allow_eopm will always be true. 919e6bbe47SXin LI void (*set_uncompressed)(void *coder, lzma_vli uncompressed_size, 929e6bbe47SXin LI bool allow_eopm); 9381ad8388SMartin Matuska 9481ad8388SMartin Matuska /// Free allocated resources 951456f0f9SXin LI void (*end)(void *coder, const lzma_allocator *allocator); 9681ad8388SMartin Matuska 9781ad8388SMartin Matuska } lzma_lz_decoder; 9881ad8388SMartin Matuska 9981ad8388SMartin Matuska 10081ad8388SMartin Matuska #define LZMA_LZ_DECODER_INIT \ 10181ad8388SMartin Matuska (lzma_lz_decoder){ \ 10281ad8388SMartin Matuska .coder = NULL, \ 10381ad8388SMartin Matuska .code = NULL, \ 10481ad8388SMartin Matuska .reset = NULL, \ 10581ad8388SMartin Matuska .set_uncompressed = NULL, \ 10681ad8388SMartin Matuska .end = NULL, \ 10781ad8388SMartin Matuska } 10881ad8388SMartin Matuska 10981ad8388SMartin Matuska 11081ad8388SMartin Matuska extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next, 11153200025SRui Paulo const lzma_allocator *allocator, 11253200025SRui Paulo const lzma_filter_info *filters, 11381ad8388SMartin Matuska lzma_ret (*lz_init)(lzma_lz_decoder *lz, 11473ed8e77SXin LI const lzma_allocator *allocator, 11573ed8e77SXin LI lzma_vli id, const void *options, 11681ad8388SMartin Matuska lzma_lz_options *lz_options)); 11781ad8388SMartin Matuska 11881ad8388SMartin Matuska extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size); 11981ad8388SMartin Matuska 12081ad8388SMartin Matuska 12181ad8388SMartin Matuska ////////////////////// 12281ad8388SMartin Matuska // Inline functions // 12381ad8388SMartin Matuska ////////////////////// 12481ad8388SMartin Matuska 12581ad8388SMartin Matuska /// Get a byte from the history buffer. 12681ad8388SMartin Matuska static inline uint8_t 12781ad8388SMartin Matuska dict_get(const lzma_dict *const dict, const uint32_t distance) 12881ad8388SMartin Matuska { 12981ad8388SMartin Matuska return dict->buf[dict->pos - distance - 1 130*3b35e7eeSXin LI + (distance < dict->pos 131*3b35e7eeSXin LI ? 0 : dict->size - LZ_DICT_REPEAT_MAX)]; 132*3b35e7eeSXin LI } 133*3b35e7eeSXin LI 134*3b35e7eeSXin LI 135*3b35e7eeSXin LI /// Optimized version of dict_get(dict, 0) 136*3b35e7eeSXin LI static inline uint8_t 137*3b35e7eeSXin LI dict_get0(const lzma_dict *const dict) 138*3b35e7eeSXin LI { 139*3b35e7eeSXin LI return dict->buf[dict->pos - 1]; 14081ad8388SMartin Matuska } 14181ad8388SMartin Matuska 14281ad8388SMartin Matuska 14381ad8388SMartin Matuska /// Test if dictionary is empty. 14481ad8388SMartin Matuska static inline bool 14581ad8388SMartin Matuska dict_is_empty(const lzma_dict *const dict) 14681ad8388SMartin Matuska { 14781ad8388SMartin Matuska return dict->full == 0; 14881ad8388SMartin Matuska } 14981ad8388SMartin Matuska 15081ad8388SMartin Matuska 15181ad8388SMartin Matuska /// Validate the match distance 15281ad8388SMartin Matuska static inline bool 15381ad8388SMartin Matuska dict_is_distance_valid(const lzma_dict *const dict, const size_t distance) 15481ad8388SMartin Matuska { 15581ad8388SMartin Matuska return dict->full > distance; 15681ad8388SMartin Matuska } 15781ad8388SMartin Matuska 15881ad8388SMartin Matuska 15981ad8388SMartin Matuska /// Repeat *len bytes at distance. 16081ad8388SMartin Matuska static inline bool 16181ad8388SMartin Matuska dict_repeat(lzma_dict *dict, uint32_t distance, uint32_t *len) 16281ad8388SMartin Matuska { 16381ad8388SMartin Matuska // Don't write past the end of the dictionary. 16481ad8388SMartin Matuska const size_t dict_avail = dict->limit - dict->pos; 165e0f0e66dSMartin Matuska uint32_t left = my_min(dict_avail, *len); 16681ad8388SMartin Matuska *len -= left; 16781ad8388SMartin Matuska 168*3b35e7eeSXin LI size_t back = dict->pos - distance - 1; 169*3b35e7eeSXin LI if (distance >= dict->pos) 170*3b35e7eeSXin LI back += dict->size - LZ_DICT_REPEAT_MAX; 171*3b35e7eeSXin LI 17281ad8388SMartin Matuska // Repeat a block of data from the history. Because memcpy() is faster 17381ad8388SMartin Matuska // than copying byte by byte in a loop, the copying process gets split 174*3b35e7eeSXin LI // into two cases. 17581ad8388SMartin Matuska if (distance < left) { 17681ad8388SMartin Matuska // Source and target areas overlap, thus we can't use 17781ad8388SMartin Matuska // memcpy() nor even memmove() safely. 17881ad8388SMartin Matuska do { 179*3b35e7eeSXin LI dict->buf[dict->pos++] = dict->buf[back++]; 18081ad8388SMartin Matuska } while (--left > 0); 1812f9cd13dSXin LI } else { 182*3b35e7eeSXin LI memcpy(dict->buf + dict->pos, dict->buf + back, left); 1832f9cd13dSXin LI dict->pos += left; 1842f9cd13dSXin LI } 18581ad8388SMartin Matuska 18681ad8388SMartin Matuska // Update how full the dictionary is. 187*3b35e7eeSXin LI if (!dict->has_wrapped) 188*3b35e7eeSXin LI dict->full = dict->pos - 2 * LZ_DICT_REPEAT_MAX; 18981ad8388SMartin Matuska 190*3b35e7eeSXin LI return *len != 0; 191*3b35e7eeSXin LI } 192*3b35e7eeSXin LI 193*3b35e7eeSXin LI 194*3b35e7eeSXin LI static inline void 195*3b35e7eeSXin LI dict_put(lzma_dict *dict, uint8_t byte) 196*3b35e7eeSXin LI { 197*3b35e7eeSXin LI dict->buf[dict->pos++] = byte; 198*3b35e7eeSXin LI 199*3b35e7eeSXin LI if (!dict->has_wrapped) 200*3b35e7eeSXin LI dict->full = dict->pos - 2 * LZ_DICT_REPEAT_MAX; 20181ad8388SMartin Matuska } 20281ad8388SMartin Matuska 20381ad8388SMartin Matuska 20481ad8388SMartin Matuska /// Puts one byte into the dictionary. Returns true if the dictionary was 20581ad8388SMartin Matuska /// already full and the byte couldn't be added. 20681ad8388SMartin Matuska static inline bool 207*3b35e7eeSXin LI dict_put_safe(lzma_dict *dict, uint8_t byte) 20881ad8388SMartin Matuska { 20981ad8388SMartin Matuska if (unlikely(dict->pos == dict->limit)) 21081ad8388SMartin Matuska return true; 21181ad8388SMartin Matuska 212*3b35e7eeSXin LI dict_put(dict, byte); 21381ad8388SMartin Matuska return false; 21481ad8388SMartin Matuska } 21581ad8388SMartin Matuska 21681ad8388SMartin Matuska 21781ad8388SMartin Matuska /// Copies arbitrary amount of data into the dictionary. 21881ad8388SMartin Matuska static inline void 21981ad8388SMartin Matuska dict_write(lzma_dict *restrict dict, const uint8_t *restrict in, 22081ad8388SMartin Matuska size_t *restrict in_pos, size_t in_size, 22181ad8388SMartin Matuska size_t *restrict left) 22281ad8388SMartin Matuska { 22381ad8388SMartin Matuska // NOTE: If we are being given more data than the size of the 22481ad8388SMartin Matuska // dictionary, it could be possible to optimize the LZ decoder 22581ad8388SMartin Matuska // so that not everything needs to go through the dictionary. 22681ad8388SMartin Matuska // This shouldn't be very common thing in practice though, and 22781ad8388SMartin Matuska // the slowdown of one extra memcpy() isn't bad compared to how 22881ad8388SMartin Matuska // much time it would have taken if the data were compressed. 22981ad8388SMartin Matuska 23081ad8388SMartin Matuska if (in_size - *in_pos > *left) 23181ad8388SMartin Matuska in_size = *in_pos + *left; 23281ad8388SMartin Matuska 23381ad8388SMartin Matuska *left -= lzma_bufcpy(in, in_pos, in_size, 23481ad8388SMartin Matuska dict->buf, &dict->pos, dict->limit); 23581ad8388SMartin Matuska 236*3b35e7eeSXin LI if (!dict->has_wrapped) 237*3b35e7eeSXin LI dict->full = dict->pos - 2 * LZ_DICT_REPEAT_MAX; 23881ad8388SMartin Matuska 23981ad8388SMartin Matuska return; 24081ad8388SMartin Matuska } 24181ad8388SMartin Matuska 24281ad8388SMartin Matuska 24381ad8388SMartin Matuska static inline void 24481ad8388SMartin Matuska dict_reset(lzma_dict *dict) 24581ad8388SMartin Matuska { 24681ad8388SMartin Matuska dict->need_reset = true; 24781ad8388SMartin Matuska return; 24881ad8388SMartin Matuska } 24981ad8388SMartin Matuska 25081ad8388SMartin Matuska #endif 251