xref: /freebsd/contrib/xz/src/liblzma/lz/lz_decoder.h (revision 9f23cbd6cae82fd77edfad7173432fa8dccd0a95)
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       lz_decoder.h
4 /// \brief      LZ out window
5 ///
6 //  Authors:    Igor Pavlov
7 //              Lasse Collin
8 //
9 //  This file has been put into the public domain.
10 //  You can do whatever you want with this file.
11 //
12 ///////////////////////////////////////////////////////////////////////////////
13 
14 #ifndef LZMA_LZ_DECODER_H
15 #define LZMA_LZ_DECODER_H
16 
17 #include "common.h"
18 
19 
20 typedef struct {
21 	/// Pointer to the dictionary buffer. It can be an allocated buffer
22 	/// internal to liblzma, or it can a be a buffer given by the
23 	/// application when in single-call mode (not implemented yet).
24 	uint8_t *buf;
25 
26 	/// Write position in dictionary. The next byte will be written to
27 	/// buf[pos].
28 	size_t pos;
29 
30 	/// Indicates how full the dictionary is. This is used by
31 	/// dict_is_distance_valid() to detect corrupt files that would
32 	/// read beyond the beginning of the dictionary.
33 	size_t full;
34 
35 	/// Write limit
36 	size_t limit;
37 
38 	/// Size of the dictionary
39 	size_t size;
40 
41 	/// True when dictionary should be reset before decoding more data.
42 	bool need_reset;
43 
44 } lzma_dict;
45 
46 
47 typedef struct {
48 	size_t dict_size;
49 	const uint8_t *preset_dict;
50 	size_t preset_dict_size;
51 } lzma_lz_options;
52 
53 
54 typedef struct {
55 	/// Data specific to the LZ-based decoder
56 	void *coder;
57 
58 	/// Function to decode from in[] to *dict
59 	lzma_ret (*code)(void *coder,
60 			lzma_dict *restrict dict, const uint8_t *restrict in,
61 			size_t *restrict in_pos, size_t in_size);
62 
63 	void (*reset)(void *coder, const void *options);
64 
65 	/// Set the uncompressed size. If uncompressed_size == LZMA_VLI_UNKNOWN
66 	/// then allow_eopm will always be true.
67 	void (*set_uncompressed)(void *coder, lzma_vli uncompressed_size,
68 			bool allow_eopm);
69 
70 	/// Free allocated resources
71 	void (*end)(void *coder, const lzma_allocator *allocator);
72 
73 } lzma_lz_decoder;
74 
75 
76 #define LZMA_LZ_DECODER_INIT \
77 	(lzma_lz_decoder){ \
78 		.coder = NULL, \
79 		.code = NULL, \
80 		.reset = NULL, \
81 		.set_uncompressed = NULL, \
82 		.end = NULL, \
83 	}
84 
85 
86 extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next,
87 		const lzma_allocator *allocator,
88 		const lzma_filter_info *filters,
89 		lzma_ret (*lz_init)(lzma_lz_decoder *lz,
90 			const lzma_allocator *allocator,
91 			lzma_vli id, const void *options,
92 			lzma_lz_options *lz_options));
93 
94 extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size);
95 
96 
97 //////////////////////
98 // Inline functions //
99 //////////////////////
100 
101 /// Get a byte from the history buffer.
102 static inline uint8_t
103 dict_get(const lzma_dict *const dict, const uint32_t distance)
104 {
105 	return dict->buf[dict->pos - distance - 1
106 			+ (distance < dict->pos ? 0 : dict->size)];
107 }
108 
109 
110 /// Test if dictionary is empty.
111 static inline bool
112 dict_is_empty(const lzma_dict *const dict)
113 {
114 	return dict->full == 0;
115 }
116 
117 
118 /// Validate the match distance
119 static inline bool
120 dict_is_distance_valid(const lzma_dict *const dict, const size_t distance)
121 {
122 	return dict->full > distance;
123 }
124 
125 
126 /// Repeat *len bytes at distance.
127 static inline bool
128 dict_repeat(lzma_dict *dict, uint32_t distance, uint32_t *len)
129 {
130 	// Don't write past the end of the dictionary.
131 	const size_t dict_avail = dict->limit - dict->pos;
132 	uint32_t left = my_min(dict_avail, *len);
133 	*len -= left;
134 
135 	// Repeat a block of data from the history. Because memcpy() is faster
136 	// than copying byte by byte in a loop, the copying process gets split
137 	// into three cases.
138 	if (distance < left) {
139 		// Source and target areas overlap, thus we can't use
140 		// memcpy() nor even memmove() safely.
141 		do {
142 			dict->buf[dict->pos] = dict_get(dict, distance);
143 			++dict->pos;
144 		} while (--left > 0);
145 
146 	} else if (distance < dict->pos) {
147 		// The easiest and fastest case
148 		memcpy(dict->buf + dict->pos,
149 				dict->buf + dict->pos - distance - 1,
150 				left);
151 		dict->pos += left;
152 
153 	} else {
154 		// The bigger the dictionary, the more rare this
155 		// case occurs. We need to "wrap" the dict, thus
156 		// we might need two memcpy() to copy all the data.
157 		assert(dict->full == dict->size);
158 		const uint32_t copy_pos
159 				= dict->pos - distance - 1 + dict->size;
160 		uint32_t copy_size = dict->size - copy_pos;
161 
162 		if (copy_size < left) {
163 			memmove(dict->buf + dict->pos, dict->buf + copy_pos,
164 					copy_size);
165 			dict->pos += copy_size;
166 			copy_size = left - copy_size;
167 			memcpy(dict->buf + dict->pos, dict->buf, copy_size);
168 			dict->pos += copy_size;
169 		} else {
170 			memmove(dict->buf + dict->pos, dict->buf + copy_pos,
171 					left);
172 			dict->pos += left;
173 		}
174 	}
175 
176 	// Update how full the dictionary is.
177 	if (dict->full < dict->pos)
178 		dict->full = dict->pos;
179 
180 	return unlikely(*len != 0);
181 }
182 
183 
184 /// Puts one byte into the dictionary. Returns true if the dictionary was
185 /// already full and the byte couldn't be added.
186 static inline bool
187 dict_put(lzma_dict *dict, uint8_t byte)
188 {
189 	if (unlikely(dict->pos == dict->limit))
190 		return true;
191 
192 	dict->buf[dict->pos++] = byte;
193 
194 	if (dict->pos > dict->full)
195 		dict->full = dict->pos;
196 
197 	return false;
198 }
199 
200 
201 /// Copies arbitrary amount of data into the dictionary.
202 static inline void
203 dict_write(lzma_dict *restrict dict, const uint8_t *restrict in,
204 		size_t *restrict in_pos, size_t in_size,
205 		size_t *restrict left)
206 {
207 	// NOTE: If we are being given more data than the size of the
208 	// dictionary, it could be possible to optimize the LZ decoder
209 	// so that not everything needs to go through the dictionary.
210 	// This shouldn't be very common thing in practice though, and
211 	// the slowdown of one extra memcpy() isn't bad compared to how
212 	// much time it would have taken if the data were compressed.
213 
214 	if (in_size - *in_pos > *left)
215 		in_size = *in_pos + *left;
216 
217 	*left -= lzma_bufcpy(in, in_pos, in_size,
218 			dict->buf, &dict->pos, dict->limit);
219 
220 	if (dict->pos > dict->full)
221 		dict->full = dict->pos;
222 
223 	return;
224 }
225 
226 
227 static inline void
228 dict_reset(lzma_dict *dict)
229 {
230 	dict->need_reset = true;
231 	return;
232 }
233 
234 #endif
235