xref: /freebsd/contrib/xz/src/liblzma/lz/lz_decoder.h (revision 5b56413d04e608379c9a306373554a8e4d321bc0)
1 // SPDX-License-Identifier: 0BSD
2 
3 ///////////////////////////////////////////////////////////////////////////////
4 //
5 /// \file       lz_decoder.h
6 /// \brief      LZ out window
7 ///
8 //  Authors:    Igor Pavlov
9 //              Lasse Collin
10 //
11 ///////////////////////////////////////////////////////////////////////////////
12 
13 #ifndef LZMA_LZ_DECODER_H
14 #define LZMA_LZ_DECODER_H
15 
16 #include "common.h"
17 
18 
19 /// Maximum length of a match rounded up to a nice power of 2 which is
20 /// a good size for aligned memcpy(). The allocated dictionary buffer will
21 /// be 2 * LZ_DICT_REPEAT_MAX bytes larger than the actual dictionary size:
22 ///
23 /// (1) Every time the decoder reaches the end of the dictionary buffer,
24 ///     the last LZ_DICT_REPEAT_MAX bytes will be copied to the beginning.
25 ///     This way dict_repeat() will only need to copy from one place,
26 ///     never from both the end and beginning of the buffer.
27 ///
28 /// (2) The other LZ_DICT_REPEAT_MAX bytes is kept as a buffer between
29 ///     the oldest byte still in the dictionary and the current write
30 ///     position. This way dict_repeat(dict, dict->size - 1, &len)
31 ///     won't need memmove() as the copying cannot overlap.
32 ///
33 /// Note that memcpy() still cannot be used if distance < len.
34 ///
35 /// LZMA's longest match length is 273 so pick a multiple of 16 above that.
36 #define LZ_DICT_REPEAT_MAX 288
37 
38 
39 typedef struct {
40 	/// Pointer to the dictionary buffer.
41 	uint8_t *buf;
42 
43 	/// Write position in dictionary. The next byte will be written to
44 	/// buf[pos].
45 	size_t pos;
46 
47 	/// Indicates how full the dictionary is. This is used by
48 	/// dict_is_distance_valid() to detect corrupt files that would
49 	/// read beyond the beginning of the dictionary.
50 	size_t full;
51 
52 	/// Write limit
53 	size_t limit;
54 
55 	/// Allocated size of buf. This is 2 * LZ_DICT_REPEAT_MAX bytes
56 	/// larger than the actual dictionary size. This is enforced by
57 	/// how the value for "full" is set; it can be at most
58 	/// "size - 2 * LZ_DICT_REPEAT_MAX".
59 	size_t size;
60 
61 	/// True once the dictionary has become full and the writing position
62 	/// has been wrapped in decode_buffer() in lz_decoder.c.
63 	bool has_wrapped;
64 
65 	/// True when dictionary should be reset before decoding more data.
66 	bool need_reset;
67 
68 } lzma_dict;
69 
70 
71 typedef struct {
72 	size_t dict_size;
73 	const uint8_t *preset_dict;
74 	size_t preset_dict_size;
75 } lzma_lz_options;
76 
77 
78 typedef struct {
79 	/// Data specific to the LZ-based decoder
80 	void *coder;
81 
82 	/// Function to decode from in[] to *dict
83 	lzma_ret (*code)(void *coder,
84 			lzma_dict *restrict dict, const uint8_t *restrict in,
85 			size_t *restrict in_pos, size_t in_size);
86 
87 	void (*reset)(void *coder, const void *options);
88 
89 	/// Set the uncompressed size. If uncompressed_size == LZMA_VLI_UNKNOWN
90 	/// then allow_eopm will always be true.
91 	void (*set_uncompressed)(void *coder, lzma_vli uncompressed_size,
92 			bool allow_eopm);
93 
94 	/// Free allocated resources
95 	void (*end)(void *coder, const lzma_allocator *allocator);
96 
97 } lzma_lz_decoder;
98 
99 
100 #define LZMA_LZ_DECODER_INIT \
101 	(lzma_lz_decoder){ \
102 		.coder = NULL, \
103 		.code = NULL, \
104 		.reset = NULL, \
105 		.set_uncompressed = NULL, \
106 		.end = NULL, \
107 	}
108 
109 
110 extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next,
111 		const lzma_allocator *allocator,
112 		const lzma_filter_info *filters,
113 		lzma_ret (*lz_init)(lzma_lz_decoder *lz,
114 			const lzma_allocator *allocator,
115 			lzma_vli id, const void *options,
116 			lzma_lz_options *lz_options));
117 
118 extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size);
119 
120 
121 //////////////////////
122 // Inline functions //
123 //////////////////////
124 
125 /// Get a byte from the history buffer.
126 static inline uint8_t
127 dict_get(const lzma_dict *const dict, const uint32_t distance)
128 {
129 	return dict->buf[dict->pos - distance - 1
130 			+ (distance < dict->pos
131 				? 0 : dict->size - LZ_DICT_REPEAT_MAX)];
132 }
133 
134 
135 /// Optimized version of dict_get(dict, 0)
136 static inline uint8_t
137 dict_get0(const lzma_dict *const dict)
138 {
139 	return dict->buf[dict->pos - 1];
140 }
141 
142 
143 /// Test if dictionary is empty.
144 static inline bool
145 dict_is_empty(const lzma_dict *const dict)
146 {
147 	return dict->full == 0;
148 }
149 
150 
151 /// Validate the match distance
152 static inline bool
153 dict_is_distance_valid(const lzma_dict *const dict, const size_t distance)
154 {
155 	return dict->full > distance;
156 }
157 
158 
159 /// Repeat *len bytes at distance.
160 static inline bool
161 dict_repeat(lzma_dict *dict, uint32_t distance, uint32_t *len)
162 {
163 	// Don't write past the end of the dictionary.
164 	const size_t dict_avail = dict->limit - dict->pos;
165 	uint32_t left = my_min(dict_avail, *len);
166 	*len -= left;
167 
168 	size_t back = dict->pos - distance - 1;
169 	if (distance >= dict->pos)
170 		back += dict->size - LZ_DICT_REPEAT_MAX;
171 
172 	// Repeat a block of data from the history. Because memcpy() is faster
173 	// than copying byte by byte in a loop, the copying process gets split
174 	// into two cases.
175 	if (distance < left) {
176 		// Source and target areas overlap, thus we can't use
177 		// memcpy() nor even memmove() safely.
178 		do {
179 			dict->buf[dict->pos++] = dict->buf[back++];
180 		} while (--left > 0);
181 	} else {
182 		memcpy(dict->buf + dict->pos, dict->buf + back, left);
183 		dict->pos += left;
184 	}
185 
186 	// Update how full the dictionary is.
187 	if (!dict->has_wrapped)
188 		dict->full = dict->pos - 2 * LZ_DICT_REPEAT_MAX;
189 
190 	return *len != 0;
191 }
192 
193 
194 static inline void
195 dict_put(lzma_dict *dict, uint8_t byte)
196 {
197 	dict->buf[dict->pos++] = byte;
198 
199 	if (!dict->has_wrapped)
200 		dict->full = dict->pos - 2 * LZ_DICT_REPEAT_MAX;
201 }
202 
203 
204 /// Puts one byte into the dictionary. Returns true if the dictionary was
205 /// already full and the byte couldn't be added.
206 static inline bool
207 dict_put_safe(lzma_dict *dict, uint8_t byte)
208 {
209 	if (unlikely(dict->pos == dict->limit))
210 		return true;
211 
212 	dict_put(dict, byte);
213 	return false;
214 }
215 
216 
217 /// Copies arbitrary amount of data into the dictionary.
218 static inline void
219 dict_write(lzma_dict *restrict dict, const uint8_t *restrict in,
220 		size_t *restrict in_pos, size_t in_size,
221 		size_t *restrict left)
222 {
223 	// NOTE: If we are being given more data than the size of the
224 	// dictionary, it could be possible to optimize the LZ decoder
225 	// so that not everything needs to go through the dictionary.
226 	// This shouldn't be very common thing in practice though, and
227 	// the slowdown of one extra memcpy() isn't bad compared to how
228 	// much time it would have taken if the data were compressed.
229 
230 	if (in_size - *in_pos > *left)
231 		in_size = *in_pos + *left;
232 
233 	*left -= lzma_bufcpy(in, in_pos, in_size,
234 			dict->buf, &dict->pos, dict->limit);
235 
236 	if (!dict->has_wrapped)
237 		dict->full = dict->pos - 2 * LZ_DICT_REPEAT_MAX;
238 
239 	return;
240 }
241 
242 
243 static inline void
244 dict_reset(lzma_dict *dict)
245 {
246 	dict->need_reset = true;
247 	return;
248 }
249 
250 #endif
251