xref: /freebsd/contrib/xz/src/liblzma/lz/lz_decoder.h (revision 128836d304d93f2d00eb14069c27089ab46c38d4)
1 // SPDX-License-Identifier: 0BSD
2 
3 ///////////////////////////////////////////////////////////////////////////////
4 //
5 /// \file       lz_decoder.h
6 /// \brief      LZ out window
7 ///
8 //  Authors:    Igor Pavlov
9 //              Lasse Collin
10 //
11 ///////////////////////////////////////////////////////////////////////////////
12 
13 #ifndef LZMA_LZ_DECODER_H
14 #define LZMA_LZ_DECODER_H
15 
16 #include "common.h"
17 
18 #ifdef HAVE_IMMINTRIN_H
19 #	include <immintrin.h>
20 #endif
21 
22 
23 // dict_repeat() implementation variant:
24 // 0 = Byte-by-byte copying only.
25 // 1 = Use memcpy() for non-overlapping copies.
26 // 2 = Use x86 SSE2 for non-overlapping copies.
27 #ifndef LZMA_LZ_DECODER_CONFIG
28 #	if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
29 		&& defined(HAVE_IMMINTRIN_H) \
30 		&& (defined(__SSE2__) || defined(_M_X64) \
31 			|| (defined(_M_IX86_FP) && _M_IX86_FP >= 2))
32 #		define LZMA_LZ_DECODER_CONFIG 2
33 #	else
34 #		define LZMA_LZ_DECODER_CONFIG 1
35 #	endif
36 #endif
37 
38 /// Byte-by-byte and memcpy() copy exactly the amount needed. Other methods
39 /// can copy up to LZ_DICT_EXTRA bytes more than requested, and this amount
40 /// of extra space is needed at the end of the allocated dictionary buffer.
41 ///
42 /// NOTE: If this is increased, update LZMA_DICT_REPEAT_MAX too.
43 #if LZMA_LZ_DECODER_CONFIG >= 2
44 #	define LZ_DICT_EXTRA 32
45 #else
46 #	define LZ_DICT_EXTRA 0
47 #endif
48 
49 /// Maximum number of bytes that dict_repeat() may copy. The allocated
50 /// dictionary buffer will be 2 * LZ_DICT_REPEAT_MAX + LZMA_DICT_EXTRA bytes
51 /// larger than the actual dictionary size:
52 ///
53 /// (1) Every time the decoder reaches the end of the dictionary buffer,
54 ///     the last LZ_DICT_REPEAT_MAX bytes will be copied to the beginning.
55 ///     This way dict_repeat() will only need to copy from one place,
56 ///     never from both the end and beginning of the buffer.
57 ///
58 /// (2) The other LZ_DICT_REPEAT_MAX bytes is kept as a buffer between
59 ///     the oldest byte still in the dictionary and the current write
60 ///     position. This way dict_repeat() with the maximum valid distance
61 ///     won't need memmove() as the copying cannot overlap.
62 ///
63 /// (3) LZ_DICT_EXTRA bytes are required at the end of the dictionary buffer
64 ///     so that extra copying done by dict_repeat() won't write or read past
65 ///     the end of the allocated buffer. This amount is *not* counted as part
66 ///     of lzma_dict.size.
67 ///
68 /// Note that memcpy() still cannot be used if distance < len.
69 ///
70 /// LZMA's longest match length is 273 bytes. The LZMA decoder looks at
71 /// the lowest four bits of the dictionary position, thus 273 must be
72 /// rounded up to the next multiple of 16 (288). In addition, optimized
73 /// dict_repeat() copies 32 bytes at a time, thus this must also be
74 /// a multiple of 32.
75 #define LZ_DICT_REPEAT_MAX 288
76 
77 /// Initial position in lzma_dict.buf when the dictionary is empty.
78 #define LZ_DICT_INIT_POS (2 * LZ_DICT_REPEAT_MAX)
79 
80 
81 typedef struct {
82 	/// Pointer to the dictionary buffer.
83 	uint8_t *buf;
84 
85 	/// Write position in dictionary. The next byte will be written to
86 	/// buf[pos].
87 	size_t pos;
88 
89 	/// Indicates how full the dictionary is. This is used by
90 	/// dict_is_distance_valid() to detect corrupt files that would
91 	/// read beyond the beginning of the dictionary.
92 	size_t full;
93 
94 	/// Write limit
95 	size_t limit;
96 
97 	/// Allocated size of buf. This is 2 * LZ_DICT_REPEAT_MAX bytes
98 	/// larger than the actual dictionary size. This is enforced by
99 	/// how the value for "full" is set; it can be at most
100 	/// "size - 2 * LZ_DICT_REPEAT_MAX".
101 	size_t size;
102 
103 	/// True once the dictionary has become full and the writing position
104 	/// has been wrapped in decode_buffer() in lz_decoder.c.
105 	bool has_wrapped;
106 
107 	/// True when dictionary should be reset before decoding more data.
108 	bool need_reset;
109 
110 } lzma_dict;
111 
112 
113 typedef struct {
114 	size_t dict_size;
115 	const uint8_t *preset_dict;
116 	size_t preset_dict_size;
117 } lzma_lz_options;
118 
119 
120 typedef struct {
121 	/// Data specific to the LZ-based decoder
122 	void *coder;
123 
124 	/// Function to decode from in[] to *dict
125 	lzma_ret (*code)(void *coder,
126 			lzma_dict *restrict dict, const uint8_t *restrict in,
127 			size_t *restrict in_pos, size_t in_size);
128 
129 	void (*reset)(void *coder, const void *options);
130 
131 	/// Set the uncompressed size. If uncompressed_size == LZMA_VLI_UNKNOWN
132 	/// then allow_eopm will always be true.
133 	void (*set_uncompressed)(void *coder, lzma_vli uncompressed_size,
134 			bool allow_eopm);
135 
136 	/// Free allocated resources
137 	void (*end)(void *coder, const lzma_allocator *allocator);
138 
139 } lzma_lz_decoder;
140 
141 
142 #define LZMA_LZ_DECODER_INIT \
143 	(lzma_lz_decoder){ \
144 		.coder = NULL, \
145 		.code = NULL, \
146 		.reset = NULL, \
147 		.set_uncompressed = NULL, \
148 		.end = NULL, \
149 	}
150 
151 
152 extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next,
153 		const lzma_allocator *allocator,
154 		const lzma_filter_info *filters,
155 		lzma_ret (*lz_init)(lzma_lz_decoder *lz,
156 			const lzma_allocator *allocator,
157 			lzma_vli id, const void *options,
158 			lzma_lz_options *lz_options));
159 
160 extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size);
161 
162 
163 //////////////////////
164 // Inline functions //
165 //////////////////////
166 
167 /// Get a byte from the history buffer.
168 static inline uint8_t
dict_get(const lzma_dict * const dict,const uint32_t distance)169 dict_get(const lzma_dict *const dict, const uint32_t distance)
170 {
171 	return dict->buf[dict->pos - distance - 1
172 			+ (distance < dict->pos
173 				? 0 : dict->size - LZ_DICT_REPEAT_MAX)];
174 }
175 
176 
177 /// Optimized version of dict_get(dict, 0)
178 static inline uint8_t
dict_get0(const lzma_dict * const dict)179 dict_get0(const lzma_dict *const dict)
180 {
181 	return dict->buf[dict->pos - 1];
182 }
183 
184 
185 /// Test if dictionary is empty.
186 static inline bool
dict_is_empty(const lzma_dict * const dict)187 dict_is_empty(const lzma_dict *const dict)
188 {
189 	return dict->full == 0;
190 }
191 
192 
193 /// Validate the match distance
194 static inline bool
dict_is_distance_valid(const lzma_dict * const dict,const size_t distance)195 dict_is_distance_valid(const lzma_dict *const dict, const size_t distance)
196 {
197 	return dict->full > distance;
198 }
199 
200 
201 /// Repeat *len bytes at distance.
202 static inline bool
dict_repeat(lzma_dict * restrict dict,uint32_t distance,uint32_t * restrict len)203 dict_repeat(lzma_dict *restrict dict,
204 		uint32_t distance, uint32_t *restrict len)
205 {
206 	// Don't write past the end of the dictionary.
207 	const size_t dict_avail = dict->limit - dict->pos;
208 	uint32_t left = my_min(dict_avail, *len);
209 	*len -= left;
210 
211 	size_t back = dict->pos - distance - 1;
212 	if (distance >= dict->pos)
213 		back += dict->size - LZ_DICT_REPEAT_MAX;
214 
215 #if LZMA_LZ_DECODER_CONFIG == 0
216 	// Minimal byte-by-byte method. This might be the least bad choice
217 	// if memcpy() isn't fast and there's no replacement for it below.
218 	while (left-- > 0) {
219 		dict->buf[dict->pos++] = dict->buf[back++];
220 	}
221 
222 #else
223 	// Because memcpy() or a similar method can be faster than copying
224 	// byte by byte in a loop, the copying process is split into
225 	// two cases.
226 	if (distance < left) {
227 		// Source and target areas overlap, thus we can't use
228 		// memcpy() nor even memmove() safely.
229 		do {
230 			dict->buf[dict->pos++] = dict->buf[back++];
231 		} while (--left > 0);
232 	} else {
233 #	if LZMA_LZ_DECODER_CONFIG == 1
234 		memcpy(dict->buf + dict->pos, dict->buf + back, left);
235 		dict->pos += left;
236 
237 #	elif LZMA_LZ_DECODER_CONFIG == 2
238 		// This can copy up to 32 bytes more than required.
239 		// (If left == 0, we still copy 32 bytes.)
240 		size_t pos = dict->pos;
241 		dict->pos += left;
242 		do {
243 			const __m128i x0 = _mm_loadu_si128(
244 					(__m128i *)(dict->buf + back));
245 			const __m128i x1 = _mm_loadu_si128(
246 					(__m128i *)(dict->buf + back + 16));
247 			back += 32;
248 			_mm_storeu_si128(
249 					(__m128i *)(dict->buf + pos), x0);
250 			_mm_storeu_si128(
251 					(__m128i *)(dict->buf + pos + 16), x1);
252 			pos += 32;
253 		} while (pos < dict->pos);
254 
255 #	else
256 #		error "Invalid LZMA_LZ_DECODER_CONFIG value"
257 #	endif
258 	}
259 #endif
260 
261 	// Update how full the dictionary is.
262 	if (!dict->has_wrapped)
263 		dict->full = dict->pos - LZ_DICT_INIT_POS;
264 
265 	return *len != 0;
266 }
267 
268 
269 static inline void
dict_put(lzma_dict * restrict dict,uint8_t byte)270 dict_put(lzma_dict *restrict dict, uint8_t byte)
271 {
272 	dict->buf[dict->pos++] = byte;
273 
274 	if (!dict->has_wrapped)
275 		dict->full = dict->pos - LZ_DICT_INIT_POS;
276 }
277 
278 
279 /// Puts one byte into the dictionary. Returns true if the dictionary was
280 /// already full and the byte couldn't be added.
281 static inline bool
dict_put_safe(lzma_dict * restrict dict,uint8_t byte)282 dict_put_safe(lzma_dict *restrict dict, uint8_t byte)
283 {
284 	if (unlikely(dict->pos == dict->limit))
285 		return true;
286 
287 	dict_put(dict, byte);
288 	return false;
289 }
290 
291 
292 /// Copies arbitrary amount of data into the dictionary.
293 static inline void
dict_write(lzma_dict * restrict dict,const uint8_t * restrict in,size_t * restrict in_pos,size_t in_size,size_t * restrict left)294 dict_write(lzma_dict *restrict dict, const uint8_t *restrict in,
295 		size_t *restrict in_pos, size_t in_size,
296 		size_t *restrict left)
297 {
298 	// NOTE: If we are being given more data than the size of the
299 	// dictionary, it could be possible to optimize the LZ decoder
300 	// so that not everything needs to go through the dictionary.
301 	// This shouldn't be very common thing in practice though, and
302 	// the slowdown of one extra memcpy() isn't bad compared to how
303 	// much time it would have taken if the data were compressed.
304 
305 	if (in_size - *in_pos > *left)
306 		in_size = *in_pos + *left;
307 
308 	*left -= lzma_bufcpy(in, in_pos, in_size,
309 			dict->buf, &dict->pos, dict->limit);
310 
311 	if (!dict->has_wrapped)
312 		dict->full = dict->pos - LZ_DICT_INIT_POS;
313 
314 	return;
315 }
316 
317 
318 static inline void
dict_reset(lzma_dict * dict)319 dict_reset(lzma_dict *dict)
320 {
321 	dict->need_reset = true;
322 	return;
323 }
324 
325 #endif
326