1 // SPDX-License-Identifier: 0BSD 2 3 /////////////////////////////////////////////////////////////////////////////// 4 // 5 /// \file lz_decoder.h 6 /// \brief LZ out window 7 /// 8 // Authors: Igor Pavlov 9 // Lasse Collin 10 // 11 /////////////////////////////////////////////////////////////////////////////// 12 13 #ifndef LZMA_LZ_DECODER_H 14 #define LZMA_LZ_DECODER_H 15 16 #include "common.h" 17 18 19 /// Maximum length of a match rounded up to a nice power of 2 which is 20 /// a good size for aligned memcpy(). The allocated dictionary buffer will 21 /// be 2 * LZ_DICT_REPEAT_MAX bytes larger than the actual dictionary size: 22 /// 23 /// (1) Every time the decoder reaches the end of the dictionary buffer, 24 /// the last LZ_DICT_REPEAT_MAX bytes will be copied to the beginning. 25 /// This way dict_repeat() will only need to copy from one place, 26 /// never from both the end and beginning of the buffer. 27 /// 28 /// (2) The other LZ_DICT_REPEAT_MAX bytes is kept as a buffer between 29 /// the oldest byte still in the dictionary and the current write 30 /// position. This way dict_repeat(dict, dict->size - 1, &len) 31 /// won't need memmove() as the copying cannot overlap. 32 /// 33 /// Note that memcpy() still cannot be used if distance < len. 34 /// 35 /// LZMA's longest match length is 273 so pick a multiple of 16 above that. 36 #define LZ_DICT_REPEAT_MAX 288 37 38 39 typedef struct { 40 /// Pointer to the dictionary buffer. 41 uint8_t *buf; 42 43 /// Write position in dictionary. The next byte will be written to 44 /// buf[pos]. 45 size_t pos; 46 47 /// Indicates how full the dictionary is. This is used by 48 /// dict_is_distance_valid() to detect corrupt files that would 49 /// read beyond the beginning of the dictionary. 50 size_t full; 51 52 /// Write limit 53 size_t limit; 54 55 /// Allocated size of buf. This is 2 * LZ_DICT_REPEAT_MAX bytes 56 /// larger than the actual dictionary size. This is enforced by 57 /// how the value for "full" is set; it can be at most 58 /// "size - 2 * LZ_DICT_REPEAT_MAX". 59 size_t size; 60 61 /// True once the dictionary has become full and the writing position 62 /// has been wrapped in decode_buffer() in lz_decoder.c. 63 bool has_wrapped; 64 65 /// True when dictionary should be reset before decoding more data. 66 bool need_reset; 67 68 } lzma_dict; 69 70 71 typedef struct { 72 size_t dict_size; 73 const uint8_t *preset_dict; 74 size_t preset_dict_size; 75 } lzma_lz_options; 76 77 78 typedef struct { 79 /// Data specific to the LZ-based decoder 80 void *coder; 81 82 /// Function to decode from in[] to *dict 83 lzma_ret (*code)(void *coder, 84 lzma_dict *restrict dict, const uint8_t *restrict in, 85 size_t *restrict in_pos, size_t in_size); 86 87 void (*reset)(void *coder, const void *options); 88 89 /// Set the uncompressed size. If uncompressed_size == LZMA_VLI_UNKNOWN 90 /// then allow_eopm will always be true. 91 void (*set_uncompressed)(void *coder, lzma_vli uncompressed_size, 92 bool allow_eopm); 93 94 /// Free allocated resources 95 void (*end)(void *coder, const lzma_allocator *allocator); 96 97 } lzma_lz_decoder; 98 99 100 #define LZMA_LZ_DECODER_INIT \ 101 (lzma_lz_decoder){ \ 102 .coder = NULL, \ 103 .code = NULL, \ 104 .reset = NULL, \ 105 .set_uncompressed = NULL, \ 106 .end = NULL, \ 107 } 108 109 110 extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next, 111 const lzma_allocator *allocator, 112 const lzma_filter_info *filters, 113 lzma_ret (*lz_init)(lzma_lz_decoder *lz, 114 const lzma_allocator *allocator, 115 lzma_vli id, const void *options, 116 lzma_lz_options *lz_options)); 117 118 extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size); 119 120 121 ////////////////////// 122 // Inline functions // 123 ////////////////////// 124 125 /// Get a byte from the history buffer. 126 static inline uint8_t 127 dict_get(const lzma_dict *const dict, const uint32_t distance) 128 { 129 return dict->buf[dict->pos - distance - 1 130 + (distance < dict->pos 131 ? 0 : dict->size - LZ_DICT_REPEAT_MAX)]; 132 } 133 134 135 /// Optimized version of dict_get(dict, 0) 136 static inline uint8_t 137 dict_get0(const lzma_dict *const dict) 138 { 139 return dict->buf[dict->pos - 1]; 140 } 141 142 143 /// Test if dictionary is empty. 144 static inline bool 145 dict_is_empty(const lzma_dict *const dict) 146 { 147 return dict->full == 0; 148 } 149 150 151 /// Validate the match distance 152 static inline bool 153 dict_is_distance_valid(const lzma_dict *const dict, const size_t distance) 154 { 155 return dict->full > distance; 156 } 157 158 159 /// Repeat *len bytes at distance. 160 static inline bool 161 dict_repeat(lzma_dict *dict, uint32_t distance, uint32_t *len) 162 { 163 // Don't write past the end of the dictionary. 164 const size_t dict_avail = dict->limit - dict->pos; 165 uint32_t left = my_min(dict_avail, *len); 166 *len -= left; 167 168 size_t back = dict->pos - distance - 1; 169 if (distance >= dict->pos) 170 back += dict->size - LZ_DICT_REPEAT_MAX; 171 172 // Repeat a block of data from the history. Because memcpy() is faster 173 // than copying byte by byte in a loop, the copying process gets split 174 // into two cases. 175 if (distance < left) { 176 // Source and target areas overlap, thus we can't use 177 // memcpy() nor even memmove() safely. 178 do { 179 dict->buf[dict->pos++] = dict->buf[back++]; 180 } while (--left > 0); 181 } else { 182 memcpy(dict->buf + dict->pos, dict->buf + back, left); 183 dict->pos += left; 184 } 185 186 // Update how full the dictionary is. 187 if (!dict->has_wrapped) 188 dict->full = dict->pos - 2 * LZ_DICT_REPEAT_MAX; 189 190 return *len != 0; 191 } 192 193 194 static inline void 195 dict_put(lzma_dict *dict, uint8_t byte) 196 { 197 dict->buf[dict->pos++] = byte; 198 199 if (!dict->has_wrapped) 200 dict->full = dict->pos - 2 * LZ_DICT_REPEAT_MAX; 201 } 202 203 204 /// Puts one byte into the dictionary. Returns true if the dictionary was 205 /// already full and the byte couldn't be added. 206 static inline bool 207 dict_put_safe(lzma_dict *dict, uint8_t byte) 208 { 209 if (unlikely(dict->pos == dict->limit)) 210 return true; 211 212 dict_put(dict, byte); 213 return false; 214 } 215 216 217 /// Copies arbitrary amount of data into the dictionary. 218 static inline void 219 dict_write(lzma_dict *restrict dict, const uint8_t *restrict in, 220 size_t *restrict in_pos, size_t in_size, 221 size_t *restrict left) 222 { 223 // NOTE: If we are being given more data than the size of the 224 // dictionary, it could be possible to optimize the LZ decoder 225 // so that not everything needs to go through the dictionary. 226 // This shouldn't be very common thing in practice though, and 227 // the slowdown of one extra memcpy() isn't bad compared to how 228 // much time it would have taken if the data were compressed. 229 230 if (in_size - *in_pos > *left) 231 in_size = *in_pos + *left; 232 233 *left -= lzma_bufcpy(in, in_pos, in_size, 234 dict->buf, &dict->pos, dict->limit); 235 236 if (!dict->has_wrapped) 237 dict->full = dict->pos - 2 * LZ_DICT_REPEAT_MAX; 238 239 return; 240 } 241 242 243 static inline void 244 dict_reset(lzma_dict *dict) 245 { 246 dict->need_reset = true; 247 return; 248 } 249 250 #endif 251