xref: /freebsd/contrib/xz/src/liblzma/lzma/lzma_decoder.c (revision 4b15965daa99044daf184221b7c283bf7f2d7e66)
1 // SPDX-License-Identifier: 0BSD
2 
3 ///////////////////////////////////////////////////////////////////////////////
4 //
5 /// \file       lzma_decoder.c
6 /// \brief      LZMA decoder
7 ///
8 //  Authors:    Igor Pavlov
9 //              Lasse Collin
10 //              Jia Tan
11 //
12 ///////////////////////////////////////////////////////////////////////////////
13 
14 #include "lz_decoder.h"
15 #include "lzma_common.h"
16 #include "lzma_decoder.h"
17 #include "range_decoder.h"
18 
19 // The macros unroll loops with switch statements.
20 // Silence warnings about missing fall-through comments.
21 #if TUKLIB_GNUC_REQ(7, 0) || defined(__clang__)
22 #	pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
23 #endif
24 
25 // Minimum number of input bytes to safely decode one LZMA symbol.
26 // The worst case is that we decode 22 bits using probabilities and 26
27 // direct bits. This may decode at maximum 20 bytes of input.
28 #define LZMA_IN_REQUIRED 20
29 
30 
31 // Macros for (somewhat) size-optimized code.
32 // This is used to decode the match length (how many bytes must be repeated
33 // from the dictionary). This version is used in the Resumable mode and
34 // does not unroll any loops.
35 #define len_decode(target, ld, pos_state, seq) \
36 do { \
37 case seq ## _CHOICE: \
38 	rc_if_0_safe(ld.choice, seq ## _CHOICE) { \
39 		rc_update_0(ld.choice); \
40 		probs = ld.low[pos_state];\
41 		limit = LEN_LOW_SYMBOLS; \
42 		target = MATCH_LEN_MIN; \
43 	} else { \
44 		rc_update_1(ld.choice); \
45 case seq ## _CHOICE2: \
46 		rc_if_0_safe(ld.choice2, seq ## _CHOICE2) { \
47 			rc_update_0(ld.choice2); \
48 			probs = ld.mid[pos_state]; \
49 			limit = LEN_MID_SYMBOLS; \
50 			target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \
51 		} else { \
52 			rc_update_1(ld.choice2); \
53 			probs = ld.high; \
54 			limit = LEN_HIGH_SYMBOLS; \
55 			target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS \
56 					+ LEN_MID_SYMBOLS; \
57 		} \
58 	} \
59 	symbol = 1; \
60 case seq ## _BITTREE: \
61 	do { \
62 		rc_bit_safe(probs[symbol], , , seq ## _BITTREE); \
63 	} while (symbol < limit); \
64 	target += symbol - limit; \
65 } while (0)
66 
67 
68 // This is the faster version of the match length decoder that does not
69 // worry about being resumable. It unrolls the bittree decoding loop.
70 #define len_decode_fast(target, ld, pos_state) \
71 do { \
72 	symbol = 1; \
73 	rc_if_0(ld.choice) { \
74 		rc_update_0(ld.choice); \
75 		rc_bittree3(ld.low[pos_state], \
76 				-LEN_LOW_SYMBOLS + MATCH_LEN_MIN); \
77 		target = symbol; \
78 	} else { \
79 		rc_update_1(ld.choice); \
80 		rc_if_0(ld.choice2) { \
81 			rc_update_0(ld.choice2); \
82 			rc_bittree3(ld.mid[pos_state], -LEN_MID_SYMBOLS \
83 					+ MATCH_LEN_MIN + LEN_LOW_SYMBOLS); \
84 			target = symbol; \
85 		} else { \
86 			rc_update_1(ld.choice2); \
87 			rc_bittree8(ld.high, -LEN_HIGH_SYMBOLS \
88 					+ MATCH_LEN_MIN \
89 					+ LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS); \
90 			target = symbol; \
91 		} \
92 	} \
93 } while (0)
94 
95 
96 /// Length decoder probabilities; see comments in lzma_common.h.
97 typedef struct {
98 	probability choice;
99 	probability choice2;
100 	probability low[POS_STATES_MAX][LEN_LOW_SYMBOLS];
101 	probability mid[POS_STATES_MAX][LEN_MID_SYMBOLS];
102 	probability high[LEN_HIGH_SYMBOLS];
103 } lzma_length_decoder;
104 
105 
106 typedef struct {
107 	///////////////////
108 	// Probabilities //
109 	///////////////////
110 
111 	/// Literals; see comments in lzma_common.h.
112 	probability literal[LITERAL_CODERS_MAX * LITERAL_CODER_SIZE];
113 
114 	/// If 1, it's a match. Otherwise it's a single 8-bit literal.
115 	probability is_match[STATES][POS_STATES_MAX];
116 
117 	/// If 1, it's a repeated match. The distance is one of rep0 .. rep3.
118 	probability is_rep[STATES];
119 
120 	/// If 0, distance of a repeated match is rep0.
121 	/// Otherwise check is_rep1.
122 	probability is_rep0[STATES];
123 
124 	/// If 0, distance of a repeated match is rep1.
125 	/// Otherwise check is_rep2.
126 	probability is_rep1[STATES];
127 
128 	/// If 0, distance of a repeated match is rep2. Otherwise it is rep3.
129 	probability is_rep2[STATES];
130 
131 	/// If 1, the repeated match has length of one byte. Otherwise
132 	/// the length is decoded from rep_len_decoder.
133 	probability is_rep0_long[STATES][POS_STATES_MAX];
134 
135 	/// Probability tree for the highest two bits of the match distance.
136 	/// There is a separate probability tree for match lengths of
137 	/// 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273].
138 	probability dist_slot[DIST_STATES][DIST_SLOTS];
139 
140 	/// Probability trees for additional bits for match distance when the
141 	/// distance is in the range [4, 127].
142 	probability pos_special[FULL_DISTANCES - DIST_MODEL_END];
143 
144 	/// Probability tree for the lowest four bits of a match distance
145 	/// that is equal to or greater than 128.
146 	probability pos_align[ALIGN_SIZE];
147 
148 	/// Length of a normal match
149 	lzma_length_decoder match_len_decoder;
150 
151 	/// Length of a repeated match
152 	lzma_length_decoder rep_len_decoder;
153 
154 	///////////////////
155 	// Decoder state //
156 	///////////////////
157 
158 	// Range coder
159 	lzma_range_decoder rc;
160 
161 	// Types of the most recently seen LZMA symbols
162 	lzma_lzma_state state;
163 
164 	uint32_t rep0;      ///< Distance of the latest match
165 	uint32_t rep1;      ///< Distance of second latest match
166 	uint32_t rep2;      ///< Distance of third latest match
167 	uint32_t rep3;      ///< Distance of fourth latest match
168 
169 	uint32_t pos_mask; // (1U << pb) - 1
170 	uint32_t literal_context_bits;
171 	uint32_t literal_mask;
172 
173 	/// Uncompressed size as bytes, or LZMA_VLI_UNKNOWN if end of
174 	/// payload marker is expected.
175 	lzma_vli uncompressed_size;
176 
177 	/// True if end of payload marker (EOPM) is allowed even when
178 	/// uncompressed_size is known; false if EOPM must not be present.
179 	/// This is ignored if uncompressed_size == LZMA_VLI_UNKNOWN.
180 	bool allow_eopm;
181 
182 	////////////////////////////////
183 	// State of incomplete symbol //
184 	////////////////////////////////
185 
186 	/// Position where to continue the decoder loop
187 	enum {
188 		SEQ_NORMALIZE,
189 		SEQ_IS_MATCH,
190 		SEQ_LITERAL,
191 		SEQ_LITERAL_MATCHED,
192 		SEQ_LITERAL_WRITE,
193 		SEQ_IS_REP,
194 		SEQ_MATCH_LEN_CHOICE,
195 		SEQ_MATCH_LEN_CHOICE2,
196 		SEQ_MATCH_LEN_BITTREE,
197 		SEQ_DIST_SLOT,
198 		SEQ_DIST_MODEL,
199 		SEQ_DIRECT,
200 		SEQ_ALIGN,
201 		SEQ_EOPM,
202 		SEQ_IS_REP0,
203 		SEQ_SHORTREP,
204 		SEQ_IS_REP0_LONG,
205 		SEQ_IS_REP1,
206 		SEQ_IS_REP2,
207 		SEQ_REP_LEN_CHOICE,
208 		SEQ_REP_LEN_CHOICE2,
209 		SEQ_REP_LEN_BITTREE,
210 		SEQ_COPY,
211 	} sequence;
212 
213 	/// Base of the current probability tree
214 	probability *probs;
215 
216 	/// Symbol being decoded. This is also used as an index variable in
217 	/// bittree decoders: probs[symbol]
218 	uint32_t symbol;
219 
220 	/// Used as a loop termination condition on bittree decoders and
221 	/// direct bits decoder.
222 	uint32_t limit;
223 
224 	/// Matched literal decoder: 0x100 or 0 to help avoiding branches.
225 	/// Bittree reverse decoders: Offset of the next bit: 1 << offset
226 	uint32_t offset;
227 
228 	/// If decoding a literal: match byte.
229 	/// If decoding a match: length of the match.
230 	uint32_t len;
231 } lzma_lzma1_decoder;
232 
233 
234 static lzma_ret
235 lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
236 		const uint8_t *restrict in,
237 		size_t *restrict in_pos, size_t in_size)
238 {
239 	lzma_lzma1_decoder *restrict coder = coder_ptr;
240 
241 	////////////////////
242 	// Initialization //
243 	////////////////////
244 
245 	{
246 		const lzma_ret ret = rc_read_init(
247 				&coder->rc, in, in_pos, in_size);
248 		if (ret != LZMA_STREAM_END)
249 			return ret;
250 	}
251 
252 	///////////////
253 	// Variables //
254 	///////////////
255 
256 	// Making local copies of often-used variables improves both
257 	// speed and readability.
258 
259 	lzma_dict dict = *dictptr;
260 
261 	const size_t dict_start = dict.pos;
262 
263 	// Range decoder
264 	rc_to_local(coder->rc, *in_pos, LZMA_IN_REQUIRED);
265 
266 	// State
267 	uint32_t state = coder->state;
268 	uint32_t rep0 = coder->rep0;
269 	uint32_t rep1 = coder->rep1;
270 	uint32_t rep2 = coder->rep2;
271 	uint32_t rep3 = coder->rep3;
272 
273 	const uint32_t pos_mask = coder->pos_mask;
274 
275 	// These variables are actually needed only if we last time ran
276 	// out of input in the middle of the decoder loop.
277 	probability *probs = coder->probs;
278 	uint32_t symbol = coder->symbol;
279 	uint32_t limit = coder->limit;
280 	uint32_t offset = coder->offset;
281 	uint32_t len = coder->len;
282 
283 	const uint32_t literal_mask = coder->literal_mask;
284 	const uint32_t literal_context_bits = coder->literal_context_bits;
285 
286 	// Temporary variables
287 	uint32_t pos_state = dict.pos & pos_mask;
288 
289 	lzma_ret ret = LZMA_OK;
290 
291 	// This is true when the next LZMA symbol is allowed to be EOPM.
292 	// That is, if this is false, then EOPM is considered
293 	// an invalid symbol and we will return LZMA_DATA_ERROR.
294 	//
295 	// EOPM is always required (not just allowed) when
296 	// the uncompressed size isn't known. When uncompressed size
297 	// is known, eopm_is_valid may be set to true later.
298 	bool eopm_is_valid = coder->uncompressed_size == LZMA_VLI_UNKNOWN;
299 
300 	// If uncompressed size is known and there is enough output space
301 	// to decode all the data, limit the available buffer space so that
302 	// the main loop won't try to decode past the end of the stream.
303 	bool might_finish_without_eopm = false;
304 	if (coder->uncompressed_size != LZMA_VLI_UNKNOWN
305 			&& coder->uncompressed_size <= dict.limit - dict.pos) {
306 		dict.limit = dict.pos + (size_t)(coder->uncompressed_size);
307 		might_finish_without_eopm = true;
308 	}
309 
310 	// The main decoder loop. The "switch" is used to resume the decoder at
311 	// correct location. Once resumed, the "switch" is no longer used.
312 	// The decoder loops is split into two modes:
313 	//
314 	// 1 - Non-resumable mode (fast). This is used when it is guaranteed
315 	//     there is enough input to decode the next symbol. If the output
316 	//     limit is reached, then the decoder loop will save the place
317 	//     for the resumable mode to continue. This mode is not used if
318 	//     HAVE_SMALL is defined. This is faster than Resumable mode
319 	//     because it reduces the number of branches needed and allows
320 	//     for more compiler optimizations.
321 	//
322 	// 2 - Resumable mode (slow). This is used when a previous decoder
323 	//     loop did not have enough space in the input or output buffers
324 	//     to complete. It uses sequence enum values to set remind
325 	//     coder->sequence where to resume in the decoder loop. This
326 	//     is the only mode used when HAVE_SMALL is defined.
327 
328 	switch (coder->sequence)
329 	while (true) {
330 		// Calculate new pos_state. This is skipped on the first loop
331 		// since we already calculated it when setting up the local
332 		// variables.
333 		pos_state = dict.pos & pos_mask;
334 
335 #ifndef HAVE_SMALL
336 
337 		///////////////////////////////
338 		// Non-resumable Mode (fast) //
339 		///////////////////////////////
340 
341 		// Go to Resumable mode (1) if there is not enough input to
342 		// safely decode any possible LZMA symbol or (2) if the
343 		// dictionary is full, which may need special checks that
344 		// are only done in the Resumable mode.
345 		if (unlikely(!rc_is_fast_allowed()
346 				|| dict.pos == dict.limit))
347 			goto slow;
348 
349 		// Decode the first bit from the next LZMA symbol.
350 		// If the bit is a 0, then we handle it as a literal.
351 		// If the bit is a 1, then it is a match of previously
352 		// decoded data.
353 		rc_if_0(coder->is_match[state][pos_state]) {
354 			/////////////////////
355 			// Decode literal. //
356 			/////////////////////
357 
358 			// Update the RC that we have decoded a 0.
359 			rc_update_0(coder->is_match[state][pos_state]);
360 
361 			// Get the correct probability array from lp and
362 			// lc params.
363 			probs = literal_subcoder(coder->literal,
364 					literal_context_bits, literal_mask,
365 					dict.pos, dict_get0(&dict));
366 
367 			if (is_literal_state(state)) {
368 				update_literal_normal(state);
369 
370 				// Decode literal without match byte.
371 				rc_bittree8(probs, 0);
372 			} else {
373 				update_literal_matched(state);
374 
375 				// Decode literal with match byte.
376 				rc_matched_literal(probs,
377 						dict_get(&dict, rep0));
378 			}
379 
380 			// Write decoded literal to dictionary
381 			dict_put(&dict, symbol);
382 			continue;
383 		}
384 
385 		///////////////////
386 		// Decode match. //
387 		///////////////////
388 
389 		// Instead of a new byte we are going to decode a
390 		// distance-length pair. The distance represents how far
391 		// back in the dictionary to begin copying. The length
392 		// represents how many bytes to copy.
393 
394 		rc_update_1(coder->is_match[state][pos_state]);
395 
396 		rc_if_0(coder->is_rep[state]) {
397 			///////////////////
398 			// Simple match. //
399 			///////////////////
400 
401 			// Not a repeated match. In this case,
402 			// the length (how many bytes to copy) must be
403 			// decoded first. Then, the distance (where to
404 			// start copying) is decoded.
405 			//
406 			// This is also how we know when we are done
407 			// decoding. If the distance decodes to UINT32_MAX,
408 			// then we know to stop decoding (end of payload
409 			// marker).
410 
411 			rc_update_0(coder->is_rep[state]);
412 			update_match(state);
413 
414 			// The latest three match distances are kept in
415 			// memory in case there are repeated matches.
416 			rep3 = rep2;
417 			rep2 = rep1;
418 			rep1 = rep0;
419 
420 			// Decode the length of the match.
421 			len_decode_fast(len, coder->match_len_decoder,
422 					pos_state);
423 
424 			// Next, decode the distance into rep0.
425 
426 			// The next 6 bits determine how to decode the
427 			// rest of the distance.
428 			probs = coder->dist_slot[get_dist_state(len)];
429 
430 			rc_bittree6(probs, -DIST_SLOTS);
431 			assert(symbol <= 63);
432 
433 			if (symbol < DIST_MODEL_START) {
434 				// If the decoded symbol is < DIST_MODEL_START
435 				// then we use its value directly as the
436 				// match distance. No other bits are needed.
437 				// The only possible distance values
438 				// are [0, 3].
439 				rep0 = symbol;
440 			} else {
441 				// Use the first two bits of symbol as the
442 				// highest bits of the match distance.
443 
444 				// "limit" represents the number of low bits
445 				// to decode.
446 				limit = (symbol >> 1) - 1;
447 				assert(limit >= 1 && limit <= 30);
448 				rep0 = 2 + (symbol & 1);
449 
450 				if (symbol < DIST_MODEL_END) {
451 					// When symbol is > DIST_MODEL_START,
452 					// but symbol < DIST_MODEL_END, then
453 					// it can decode distances between
454 					// [4, 127].
455 					assert(limit <= 5);
456 					rep0 <<= limit;
457 					assert(rep0 <= 96);
458 
459 					// -1 is fine, because we start
460 					// decoding at probs[1], not probs[0].
461 					// NOTE: This violates the C standard,
462 					// since we are doing pointer
463 					// arithmetic past the beginning of
464 					// the array.
465 					assert((int32_t)(rep0 - symbol - 1)
466 							>= -1);
467 					assert((int32_t)(rep0 - symbol - 1)
468 							<= 82);
469 					probs = coder->pos_special + rep0
470 							- symbol - 1;
471 					symbol = 1;
472 					offset = 1;
473 
474 					// Variable number (1-5) of bits
475 					// from a reverse bittree. This
476 					// isn't worth manual unrolling.
477 					//
478 					// NOTE: Making one or many of the
479 					// variables (probs, symbol, offset,
480 					// or limit) local here (instead of
481 					// using those declared outside the
482 					// main loop) can affect code size
483 					// and performance which isn't a
484 					// surprise but it's not so clear
485 					// what is the best.
486 					do {
487 						rc_bit_add_if_1(probs,
488 								rep0, offset);
489 						offset <<= 1;
490 					} while (--limit > 0);
491 				} else {
492 					// The distance is >= 128. Decode the
493 					// lower bits without probabilities
494 					// except the lowest four bits.
495 					assert(symbol >= 14);
496 					assert(limit >= 6);
497 
498 					limit -= ALIGN_BITS;
499 					assert(limit >= 2);
500 
501 					rc_direct(rep0, limit);
502 
503 					// Decode the lowest four bits using
504 					// probabilities.
505 					rep0 <<= ALIGN_BITS;
506 					rc_bittree_rev4(coder->pos_align);
507 					rep0 += symbol;
508 
509 					// If the end of payload marker (EOPM)
510 					// is detected, jump to the safe code.
511 					// The EOPM handling isn't speed
512 					// critical at all.
513 					//
514 					// A final normalization is needed
515 					// after the EOPM (there can be a
516 					// dummy byte to read in some cases).
517 					// If the normalization was done here
518 					// in the fast code, it would need to
519 					// be taken into account in the value
520 					// of LZMA_IN_REQUIRED. Using the
521 					// safe code allows keeping
522 					// LZMA_IN_REQUIRED as 20 instead of
523 					// 21.
524 					if (rep0 == UINT32_MAX)
525 						goto eopm;
526 				}
527 			}
528 
529 			// Validate the distance we just decoded.
530 			if (unlikely(!dict_is_distance_valid(&dict, rep0))) {
531 				ret = LZMA_DATA_ERROR;
532 				goto out;
533 			}
534 
535 		} else {
536 			rc_update_1(coder->is_rep[state]);
537 
538 			/////////////////////
539 			// Repeated match. //
540 			/////////////////////
541 
542 			// The match distance is a value that we have decoded
543 			// recently. The latest four match distances are
544 			// available as rep0, rep1, rep2 and rep3. We will
545 			// now decode which of them is the new distance.
546 			//
547 			// There cannot be a match if we haven't produced
548 			// any output, so check that first.
549 			if (unlikely(!dict_is_distance_valid(&dict, 0))) {
550 				ret = LZMA_DATA_ERROR;
551 				goto out;
552 			}
553 
554 			rc_if_0(coder->is_rep0[state]) {
555 				rc_update_0(coder->is_rep0[state]);
556 				// The distance is rep0.
557 
558 				// Decode the next bit to determine if 1 byte
559 				// should be copied from rep0 distance or
560 				// if the number of bytes needs to be decoded.
561 
562 				// If the next bit is 0, then it is a
563 				// "Short Rep Match" and only 1 bit is copied.
564 				// Otherwise, the length of the match is
565 				// decoded after the "else" statement.
566 				rc_if_0(coder->is_rep0_long[state][pos_state]) {
567 					rc_update_0(coder->is_rep0_long[
568 							state][pos_state]);
569 
570 					update_short_rep(state);
571 					dict_put(&dict, dict_get(&dict, rep0));
572 					continue;
573 				}
574 
575 				// Repeating more than one byte at
576 				// distance of rep0.
577 				rc_update_1(coder->is_rep0_long[
578 						state][pos_state]);
579 
580 			} else {
581 				rc_update_1(coder->is_rep0[state]);
582 
583 				// The distance is rep1, rep2 or rep3. Once
584 				// we find out which one of these three, it
585 				// is stored to rep0 and rep1, rep2 and rep3
586 				// are updated accordingly. There is no
587 				// "Short Rep Match" option, so the length
588 				// of the match must always be decoded next.
589 				rc_if_0(coder->is_rep1[state]) {
590 					// The distance is rep1.
591 					rc_update_0(coder->is_rep1[state]);
592 
593 					const uint32_t distance = rep1;
594 					rep1 = rep0;
595 					rep0 = distance;
596 
597 				} else {
598 					rc_update_1(coder->is_rep1[state]);
599 
600 					rc_if_0(coder->is_rep2[state]) {
601 						// The distance is rep2.
602 						rc_update_0(coder->is_rep2[
603 								state]);
604 
605 						const uint32_t distance = rep2;
606 						rep2 = rep1;
607 						rep1 = rep0;
608 						rep0 = distance;
609 
610 					} else {
611 						// The distance is rep3.
612 						rc_update_1(coder->is_rep2[
613 								state]);
614 
615 						const uint32_t distance = rep3;
616 						rep3 = rep2;
617 						rep2 = rep1;
618 						rep1 = rep0;
619 						rep0 = distance;
620 					}
621 				}
622 			}
623 
624 			update_long_rep(state);
625 
626 			// Decode the length of the repeated match.
627 			len_decode_fast(len, coder->rep_len_decoder,
628 					pos_state);
629 		}
630 
631 		/////////////////////////////////
632 		// Repeat from history buffer. //
633 		/////////////////////////////////
634 
635 		// The length is always between these limits. There is no way
636 		// to trigger the algorithm to set len outside this range.
637 		assert(len >= MATCH_LEN_MIN);
638 		assert(len <= MATCH_LEN_MAX);
639 
640 		// Repeat len bytes from distance of rep0.
641 		if (unlikely(dict_repeat(&dict, rep0, &len))) {
642 			coder->sequence = SEQ_COPY;
643 			goto out;
644 		}
645 
646 		continue;
647 
648 slow:
649 #endif
650 	///////////////////////////
651 	// Resumable Mode (slow) //
652 	///////////////////////////
653 
654 	// This is very similar to Non-resumable Mode, so most of the
655 	// comments are not repeated. The main differences are:
656 	// - case labels are used to resume at the correct location.
657 	// - Loops are not unrolled.
658 	// - Range coder macros take an extra sequence argument
659 	//   so they can save to coder->sequence the location to
660 	//   resume in case there is not enough input.
661 	case SEQ_NORMALIZE:
662 	case SEQ_IS_MATCH:
663 		if (unlikely(might_finish_without_eopm
664 				&& dict.pos == dict.limit)) {
665 			// In rare cases there is a useless byte that needs
666 			// to be read anyway.
667 			rc_normalize_safe(SEQ_NORMALIZE);
668 
669 			// If the range decoder state is such that we can
670 			// be at the end of the LZMA stream, then the
671 			// decoding is finished.
672 			if (rc_is_finished(rc)) {
673 				ret = LZMA_STREAM_END;
674 				goto out;
675 			}
676 
677 			// If the caller hasn't allowed EOPM to be present
678 			// together with known uncompressed size, then the
679 			// LZMA stream is corrupt.
680 			if (!coder->allow_eopm) {
681 				ret = LZMA_DATA_ERROR;
682 				goto out;
683 			}
684 
685 			// Otherwise continue decoding with the expectation
686 			// that the next LZMA symbol is EOPM.
687 			eopm_is_valid = true;
688 		}
689 
690 		rc_if_0_safe(coder->is_match[state][pos_state], SEQ_IS_MATCH) {
691 			/////////////////////
692 			// Decode literal. //
693 			/////////////////////
694 
695 			rc_update_0(coder->is_match[state][pos_state]);
696 
697 			probs = literal_subcoder(coder->literal,
698 					literal_context_bits, literal_mask,
699 					dict.pos, dict_get0(&dict));
700 			symbol = 1;
701 
702 			if (is_literal_state(state)) {
703 				update_literal_normal(state);
704 
705 				// Decode literal without match byte.
706 				// The "slow" version does not unroll
707 				// the loop.
708 	case SEQ_LITERAL:
709 				do {
710 					rc_bit_safe(probs[symbol], , ,
711 							SEQ_LITERAL);
712 				} while (symbol < (1 << 8));
713 			} else {
714 				update_literal_matched(state);
715 
716 				// Decode literal with match byte.
717 				len = (uint32_t)(dict_get(&dict, rep0)) << 1;
718 
719 				offset = 0x100;
720 
721 	case SEQ_LITERAL_MATCHED:
722 				do {
723 					const uint32_t match_bit
724 							= len & offset;
725 					const uint32_t subcoder_index
726 							= offset + match_bit
727 							+ symbol;
728 
729 					rc_bit_safe(probs[subcoder_index],
730 							offset &= ~match_bit,
731 							offset &= match_bit,
732 							SEQ_LITERAL_MATCHED);
733 
734 					// It seems to be faster to do this
735 					// here instead of putting it to the
736 					// beginning of the loop and then
737 					// putting the "case" in the middle
738 					// of the loop.
739 					len <<= 1;
740 
741 				} while (symbol < (1 << 8));
742 			}
743 
744 	case SEQ_LITERAL_WRITE:
745 			if (dict_put_safe(&dict, symbol)) {
746 				coder->sequence = SEQ_LITERAL_WRITE;
747 				goto out;
748 			}
749 
750 			continue;
751 		}
752 
753 		///////////////////
754 		// Decode match. //
755 		///////////////////
756 
757 		rc_update_1(coder->is_match[state][pos_state]);
758 
759 	case SEQ_IS_REP:
760 		rc_if_0_safe(coder->is_rep[state], SEQ_IS_REP) {
761 			///////////////////
762 			// Simple match. //
763 			///////////////////
764 
765 			rc_update_0(coder->is_rep[state]);
766 			update_match(state);
767 
768 			rep3 = rep2;
769 			rep2 = rep1;
770 			rep1 = rep0;
771 
772 			len_decode(len, coder->match_len_decoder,
773 					pos_state, SEQ_MATCH_LEN);
774 
775 			probs = coder->dist_slot[get_dist_state(len)];
776 			symbol = 1;
777 
778 	case SEQ_DIST_SLOT:
779 			do {
780 				rc_bit_safe(probs[symbol], , , SEQ_DIST_SLOT);
781 			} while (symbol < DIST_SLOTS);
782 
783 			symbol -= DIST_SLOTS;
784 			assert(symbol <= 63);
785 
786 			if (symbol < DIST_MODEL_START) {
787 				rep0 = symbol;
788 			} else {
789 				limit = (symbol >> 1) - 1;
790 				assert(limit >= 1 && limit <= 30);
791 				rep0 = 2 + (symbol & 1);
792 
793 				if (symbol < DIST_MODEL_END) {
794 					assert(limit <= 5);
795 					rep0 <<= limit;
796 					assert(rep0 <= 96);
797 					// -1 is fine, because we start
798 					// decoding at probs[1], not probs[0].
799 					// NOTE: This violates the C standard,
800 					// since we are doing pointer
801 					// arithmetic past the beginning of
802 					// the array.
803 					assert((int32_t)(rep0 - symbol - 1)
804 							>= -1);
805 					assert((int32_t)(rep0 - symbol - 1)
806 							<= 82);
807 					probs = coder->pos_special + rep0
808 							- symbol - 1;
809 					symbol = 1;
810 					offset = 0;
811 	case SEQ_DIST_MODEL:
812 					do {
813 						rc_bit_safe(probs[symbol], ,
814 							rep0 += 1U << offset,
815 							SEQ_DIST_MODEL);
816 					} while (++offset < limit);
817 				} else {
818 					assert(symbol >= 14);
819 					assert(limit >= 6);
820 					limit -= ALIGN_BITS;
821 					assert(limit >= 2);
822 	case SEQ_DIRECT:
823 					rc_direct_safe(rep0, limit,
824 							SEQ_DIRECT);
825 
826 					rep0 <<= ALIGN_BITS;
827 					symbol = 0;
828 					offset = 1;
829 	case SEQ_ALIGN:
830 					do {
831 						rc_bit_last_safe(
832 							coder->pos_align[
833 								offset
834 								+ symbol],
835 							,
836 							symbol += offset,
837 							SEQ_ALIGN);
838 						offset <<= 1;
839 					} while (offset < ALIGN_SIZE);
840 
841 					rep0 += symbol;
842 
843 					if (rep0 == UINT32_MAX) {
844 						// End of payload marker was
845 						// found. It may only be
846 						// present if
847 						//   - uncompressed size is
848 						//     unknown or
849 						//   - after known uncompressed
850 						//     size amount of bytes has
851 						//     been decompressed and
852 						//     caller has indicated
853 						//     that EOPM might be used
854 						//     (it's not allowed in
855 						//     LZMA2).
856 #ifndef HAVE_SMALL
857 eopm:
858 #endif
859 						if (!eopm_is_valid) {
860 							ret = LZMA_DATA_ERROR;
861 							goto out;
862 						}
863 
864 	case SEQ_EOPM:
865 						// LZMA1 stream with
866 						// end-of-payload marker.
867 						rc_normalize_safe(SEQ_EOPM);
868 						ret = rc_is_finished(rc)
869 							? LZMA_STREAM_END
870 							: LZMA_DATA_ERROR;
871 						goto out;
872 					}
873 				}
874 			}
875 
876 			if (unlikely(!dict_is_distance_valid(&dict, rep0))) {
877 				ret = LZMA_DATA_ERROR;
878 				goto out;
879 			}
880 
881 		} else {
882 			/////////////////////
883 			// Repeated match. //
884 			/////////////////////
885 
886 			rc_update_1(coder->is_rep[state]);
887 
888 			if (unlikely(!dict_is_distance_valid(&dict, 0))) {
889 				ret = LZMA_DATA_ERROR;
890 				goto out;
891 			}
892 
893 	case SEQ_IS_REP0:
894 			rc_if_0_safe(coder->is_rep0[state], SEQ_IS_REP0) {
895 				rc_update_0(coder->is_rep0[state]);
896 
897 	case SEQ_IS_REP0_LONG:
898 				rc_if_0_safe(coder->is_rep0_long
899 						[state][pos_state],
900 						SEQ_IS_REP0_LONG) {
901 					rc_update_0(coder->is_rep0_long[
902 							state][pos_state]);
903 
904 					update_short_rep(state);
905 
906 	case SEQ_SHORTREP:
907 					if (dict_put_safe(&dict,
908 							dict_get(&dict,
909 							rep0))) {
910 						coder->sequence = SEQ_SHORTREP;
911 						goto out;
912 					}
913 
914 					continue;
915 				}
916 
917 				rc_update_1(coder->is_rep0_long[
918 						state][pos_state]);
919 
920 			} else {
921 				rc_update_1(coder->is_rep0[state]);
922 
923 	case SEQ_IS_REP1:
924 				rc_if_0_safe(coder->is_rep1[state], SEQ_IS_REP1) {
925 					rc_update_0(coder->is_rep1[state]);
926 
927 					const uint32_t distance = rep1;
928 					rep1 = rep0;
929 					rep0 = distance;
930 
931 				} else {
932 					rc_update_1(coder->is_rep1[state]);
933 	case SEQ_IS_REP2:
934 					rc_if_0_safe(coder->is_rep2[state],
935 							SEQ_IS_REP2) {
936 						rc_update_0(coder->is_rep2[
937 								state]);
938 
939 						const uint32_t distance = rep2;
940 						rep2 = rep1;
941 						rep1 = rep0;
942 						rep0 = distance;
943 
944 					} else {
945 						rc_update_1(coder->is_rep2[
946 								state]);
947 
948 						const uint32_t distance = rep3;
949 						rep3 = rep2;
950 						rep2 = rep1;
951 						rep1 = rep0;
952 						rep0 = distance;
953 					}
954 				}
955 			}
956 
957 			update_long_rep(state);
958 
959 			len_decode(len, coder->rep_len_decoder,
960 					pos_state, SEQ_REP_LEN);
961 		}
962 
963 		/////////////////////////////////
964 		// Repeat from history buffer. //
965 		/////////////////////////////////
966 
967 		assert(len >= MATCH_LEN_MIN);
968 		assert(len <= MATCH_LEN_MAX);
969 
970 	case SEQ_COPY:
971 		if (unlikely(dict_repeat(&dict, rep0, &len))) {
972 			coder->sequence = SEQ_COPY;
973 			goto out;
974 		}
975 	}
976 
977 out:
978 	// Save state
979 
980 	// NOTE: Must not copy dict.limit.
981 	dictptr->pos = dict.pos;
982 	dictptr->full = dict.full;
983 
984 	rc_from_local(coder->rc, *in_pos);
985 
986 	coder->state = state;
987 	coder->rep0 = rep0;
988 	coder->rep1 = rep1;
989 	coder->rep2 = rep2;
990 	coder->rep3 = rep3;
991 
992 	coder->probs = probs;
993 	coder->symbol = symbol;
994 	coder->limit = limit;
995 	coder->offset = offset;
996 	coder->len = len;
997 
998 	// Update the remaining amount of uncompressed data if uncompressed
999 	// size was known.
1000 	if (coder->uncompressed_size != LZMA_VLI_UNKNOWN) {
1001 		coder->uncompressed_size -= dict.pos - dict_start;
1002 
1003 		// If we have gotten all the output but the decoder wants
1004 		// to write more output, the file is corrupt. There are
1005 		// three SEQ values where output is produced.
1006 		if (coder->uncompressed_size == 0 && ret == LZMA_OK
1007 				&& (coder->sequence == SEQ_LITERAL_WRITE
1008 					|| coder->sequence == SEQ_SHORTREP
1009 					|| coder->sequence == SEQ_COPY))
1010 			ret = LZMA_DATA_ERROR;
1011 	}
1012 
1013 	if (ret == LZMA_STREAM_END) {
1014 		// Reset the range decoder so that it is ready to reinitialize
1015 		// for a new LZMA2 chunk.
1016 		rc_reset(coder->rc);
1017 		coder->sequence = SEQ_IS_MATCH;
1018 	}
1019 
1020 	return ret;
1021 }
1022 
1023 
1024 static void
1025 lzma_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size,
1026 		bool allow_eopm)
1027 {
1028 	lzma_lzma1_decoder *coder = coder_ptr;
1029 	coder->uncompressed_size = uncompressed_size;
1030 	coder->allow_eopm = allow_eopm;
1031 }
1032 
1033 
1034 static void
1035 lzma_decoder_reset(void *coder_ptr, const void *opt)
1036 {
1037 	lzma_lzma1_decoder *coder = coder_ptr;
1038 	const lzma_options_lzma *options = opt;
1039 
1040 	// NOTE: We assume that lc/lp/pb are valid since they were
1041 	// successfully decoded with lzma_lzma_decode_properties().
1042 
1043 	// Calculate pos_mask. We don't need pos_bits as is for anything.
1044 	coder->pos_mask = (1U << options->pb) - 1;
1045 
1046 	// Initialize the literal decoder.
1047 	literal_init(coder->literal, options->lc, options->lp);
1048 
1049 	coder->literal_context_bits = options->lc;
1050 	coder->literal_mask = literal_mask_calc(options->lc, options->lp);
1051 
1052 	// State
1053 	coder->state = STATE_LIT_LIT;
1054 	coder->rep0 = 0;
1055 	coder->rep1 = 0;
1056 	coder->rep2 = 0;
1057 	coder->rep3 = 0;
1058 	coder->pos_mask = (1U << options->pb) - 1;
1059 
1060 	// Range decoder
1061 	rc_reset(coder->rc);
1062 
1063 	// Bit and bittree decoders
1064 	for (uint32_t i = 0; i < STATES; ++i) {
1065 		for (uint32_t j = 0; j <= coder->pos_mask; ++j) {
1066 			bit_reset(coder->is_match[i][j]);
1067 			bit_reset(coder->is_rep0_long[i][j]);
1068 		}
1069 
1070 		bit_reset(coder->is_rep[i]);
1071 		bit_reset(coder->is_rep0[i]);
1072 		bit_reset(coder->is_rep1[i]);
1073 		bit_reset(coder->is_rep2[i]);
1074 	}
1075 
1076 	for (uint32_t i = 0; i < DIST_STATES; ++i)
1077 		bittree_reset(coder->dist_slot[i], DIST_SLOT_BITS);
1078 
1079 	for (uint32_t i = 0; i < FULL_DISTANCES - DIST_MODEL_END; ++i)
1080 		bit_reset(coder->pos_special[i]);
1081 
1082 	bittree_reset(coder->pos_align, ALIGN_BITS);
1083 
1084 	// Len decoders (also bit/bittree)
1085 	const uint32_t num_pos_states = 1U << options->pb;
1086 	bit_reset(coder->match_len_decoder.choice);
1087 	bit_reset(coder->match_len_decoder.choice2);
1088 	bit_reset(coder->rep_len_decoder.choice);
1089 	bit_reset(coder->rep_len_decoder.choice2);
1090 
1091 	for (uint32_t pos_state = 0; pos_state < num_pos_states; ++pos_state) {
1092 		bittree_reset(coder->match_len_decoder.low[pos_state],
1093 				LEN_LOW_BITS);
1094 		bittree_reset(coder->match_len_decoder.mid[pos_state],
1095 				LEN_MID_BITS);
1096 
1097 		bittree_reset(coder->rep_len_decoder.low[pos_state],
1098 				LEN_LOW_BITS);
1099 		bittree_reset(coder->rep_len_decoder.mid[pos_state],
1100 				LEN_MID_BITS);
1101 	}
1102 
1103 	bittree_reset(coder->match_len_decoder.high, LEN_HIGH_BITS);
1104 	bittree_reset(coder->rep_len_decoder.high, LEN_HIGH_BITS);
1105 
1106 	coder->sequence = SEQ_IS_MATCH;
1107 	coder->probs = NULL;
1108 	coder->symbol = 0;
1109 	coder->limit = 0;
1110 	coder->offset = 0;
1111 	coder->len = 0;
1112 
1113 	return;
1114 }
1115 
1116 
1117 extern lzma_ret
1118 lzma_lzma_decoder_create(lzma_lz_decoder *lz, const lzma_allocator *allocator,
1119 		const lzma_options_lzma *options, lzma_lz_options *lz_options)
1120 {
1121 	if (lz->coder == NULL) {
1122 		lz->coder = lzma_alloc(sizeof(lzma_lzma1_decoder), allocator);
1123 		if (lz->coder == NULL)
1124 			return LZMA_MEM_ERROR;
1125 
1126 		lz->code = &lzma_decode;
1127 		lz->reset = &lzma_decoder_reset;
1128 		lz->set_uncompressed = &lzma_decoder_uncompressed;
1129 	}
1130 
1131 	// All dictionary sizes are OK here. LZ decoder will take care of
1132 	// the special cases.
1133 	lz_options->dict_size = options->dict_size;
1134 	lz_options->preset_dict = options->preset_dict;
1135 	lz_options->preset_dict_size = options->preset_dict_size;
1136 
1137 	return LZMA_OK;
1138 }
1139 
1140 
1141 /// Allocate and initialize LZMA decoder. This is used only via LZ
1142 /// initialization (lzma_lzma_decoder_init() passes function pointer to
1143 /// the LZ initialization).
1144 static lzma_ret
1145 lzma_decoder_init(lzma_lz_decoder *lz, const lzma_allocator *allocator,
1146 		lzma_vli id, const void *options, lzma_lz_options *lz_options)
1147 {
1148 	if (!is_lclppb_valid(options))
1149 		return LZMA_PROG_ERROR;
1150 
1151 	lzma_vli uncomp_size = LZMA_VLI_UNKNOWN;
1152 	bool allow_eopm = true;
1153 
1154 	if (id == LZMA_FILTER_LZMA1EXT) {
1155 		const lzma_options_lzma *opt = options;
1156 
1157 		// Only one flag is supported.
1158 		if (opt->ext_flags & ~LZMA_LZMA1EXT_ALLOW_EOPM)
1159 			return LZMA_OPTIONS_ERROR;
1160 
1161 		// FIXME? Using lzma_vli instead of uint64_t is weird because
1162 		// this has nothing to do with .xz headers and variable-length
1163 		// integer encoding. On the other hand, using LZMA_VLI_UNKNOWN
1164 		// instead of UINT64_MAX is clearer when unknown size is
1165 		// meant. A problem with using lzma_vli is that now we
1166 		// allow > LZMA_VLI_MAX which is fine in this file but
1167 		// it's still confusing. Note that alone_decoder.c also
1168 		// allows > LZMA_VLI_MAX when setting uncompressed size.
1169 		uncomp_size = opt->ext_size_low
1170 				+ ((uint64_t)(opt->ext_size_high) << 32);
1171 		allow_eopm = (opt->ext_flags & LZMA_LZMA1EXT_ALLOW_EOPM) != 0
1172 				|| uncomp_size == LZMA_VLI_UNKNOWN;
1173 	}
1174 
1175 	return_if_error(lzma_lzma_decoder_create(
1176 			lz, allocator, options, lz_options));
1177 
1178 	lzma_decoder_reset(lz->coder, options);
1179 	lzma_decoder_uncompressed(lz->coder, uncomp_size, allow_eopm);
1180 
1181 	return LZMA_OK;
1182 }
1183 
1184 
1185 extern lzma_ret
1186 lzma_lzma_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
1187 		const lzma_filter_info *filters)
1188 {
1189 	// LZMA can only be the last filter in the chain. This is enforced
1190 	// by the raw_decoder initialization.
1191 	assert(filters[1].init == NULL);
1192 
1193 	return lzma_lz_decoder_init(next, allocator, filters,
1194 			&lzma_decoder_init);
1195 }
1196 
1197 
1198 extern bool
1199 lzma_lzma_lclppb_decode(lzma_options_lzma *options, uint8_t byte)
1200 {
1201 	if (byte > (4 * 5 + 4) * 9 + 8)
1202 		return true;
1203 
1204 	// See the file format specification to understand this.
1205 	options->pb = byte / (9 * 5);
1206 	byte -= options->pb * 9 * 5;
1207 	options->lp = byte / 9;
1208 	options->lc = byte - options->lp * 9;
1209 
1210 	return options->lc + options->lp > LZMA_LCLP_MAX;
1211 }
1212 
1213 
1214 extern uint64_t
1215 lzma_lzma_decoder_memusage_nocheck(const void *options)
1216 {
1217 	const lzma_options_lzma *const opt = options;
1218 	return sizeof(lzma_lzma1_decoder)
1219 			+ lzma_lz_decoder_memusage(opt->dict_size);
1220 }
1221 
1222 
1223 extern uint64_t
1224 lzma_lzma_decoder_memusage(const void *options)
1225 {
1226 	if (!is_lclppb_valid(options))
1227 		return UINT64_MAX;
1228 
1229 	return lzma_lzma_decoder_memusage_nocheck(options);
1230 }
1231 
1232 
1233 extern lzma_ret
1234 lzma_lzma_props_decode(void **options, const lzma_allocator *allocator,
1235 		const uint8_t *props, size_t props_size)
1236 {
1237 	if (props_size != 5)
1238 		return LZMA_OPTIONS_ERROR;
1239 
1240 	lzma_options_lzma *opt
1241 			= lzma_alloc(sizeof(lzma_options_lzma), allocator);
1242 	if (opt == NULL)
1243 		return LZMA_MEM_ERROR;
1244 
1245 	if (lzma_lzma_lclppb_decode(opt, props[0]))
1246 		goto error;
1247 
1248 	// All dictionary sizes are accepted, including zero. LZ decoder
1249 	// will automatically use a dictionary at least a few KiB even if
1250 	// a smaller dictionary is requested.
1251 	opt->dict_size = read32le(props + 1);
1252 
1253 	opt->preset_dict = NULL;
1254 	opt->preset_dict_size = 0;
1255 
1256 	*options = opt;
1257 
1258 	return LZMA_OK;
1259 
1260 error:
1261 	lzma_free(opt, allocator);
1262 	return LZMA_OPTIONS_ERROR;
1263 }
1264