xref: /freebsd/contrib/xz/src/liblzma/lzma/lzma_decoder.c (revision 61898cde69374d5a9994e2074605bc4101aff72d)
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       lzma_decoder.c
4 /// \brief      LZMA decoder
5 ///
6 //  Authors:    Igor Pavlov
7 //              Lasse Collin
8 //
9 //  This file has been put into the public domain.
10 //  You can do whatever you want with this file.
11 //
12 ///////////////////////////////////////////////////////////////////////////////
13 
14 #include "lz_decoder.h"
15 #include "lzma_common.h"
16 #include "lzma_decoder.h"
17 #include "range_decoder.h"
18 
19 // The macros unroll loops with switch statements.
20 // Silence warnings about missing fall-through comments.
21 #if TUKLIB_GNUC_REQ(7, 0)
22 #	pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
23 #endif
24 
25 
26 #ifdef HAVE_SMALL
27 
28 // Macros for (somewhat) size-optimized code.
29 #define seq_4(seq) seq
30 
31 #define seq_6(seq) seq
32 
33 #define seq_8(seq) seq
34 
35 #define seq_len(seq) \
36 	seq ## _CHOICE, \
37 	seq ## _CHOICE2, \
38 	seq ## _BITTREE
39 
40 #define len_decode(target, ld, pos_state, seq) \
41 do { \
42 case seq ## _CHOICE: \
43 	rc_if_0(ld.choice, seq ## _CHOICE) { \
44 		rc_update_0(ld.choice); \
45 		probs = ld.low[pos_state];\
46 		limit = LEN_LOW_SYMBOLS; \
47 		target = MATCH_LEN_MIN; \
48 	} else { \
49 		rc_update_1(ld.choice); \
50 case seq ## _CHOICE2: \
51 		rc_if_0(ld.choice2, seq ## _CHOICE2) { \
52 			rc_update_0(ld.choice2); \
53 			probs = ld.mid[pos_state]; \
54 			limit = LEN_MID_SYMBOLS; \
55 			target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \
56 		} else { \
57 			rc_update_1(ld.choice2); \
58 			probs = ld.high; \
59 			limit = LEN_HIGH_SYMBOLS; \
60 			target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS \
61 					+ LEN_MID_SYMBOLS; \
62 		} \
63 	} \
64 	symbol = 1; \
65 case seq ## _BITTREE: \
66 	do { \
67 		rc_bit(probs[symbol], , , seq ## _BITTREE); \
68 	} while (symbol < limit); \
69 	target += symbol - limit; \
70 } while (0)
71 
72 #else // HAVE_SMALL
73 
74 // Unrolled versions
75 #define seq_4(seq) \
76 	seq ## 0, \
77 	seq ## 1, \
78 	seq ## 2, \
79 	seq ## 3
80 
81 #define seq_6(seq) \
82 	seq ## 0, \
83 	seq ## 1, \
84 	seq ## 2, \
85 	seq ## 3, \
86 	seq ## 4, \
87 	seq ## 5
88 
89 #define seq_8(seq) \
90 	seq ## 0, \
91 	seq ## 1, \
92 	seq ## 2, \
93 	seq ## 3, \
94 	seq ## 4, \
95 	seq ## 5, \
96 	seq ## 6, \
97 	seq ## 7
98 
99 #define seq_len(seq) \
100 	seq ## _CHOICE, \
101 	seq ## _LOW0, \
102 	seq ## _LOW1, \
103 	seq ## _LOW2, \
104 	seq ## _CHOICE2, \
105 	seq ## _MID0, \
106 	seq ## _MID1, \
107 	seq ## _MID2, \
108 	seq ## _HIGH0, \
109 	seq ## _HIGH1, \
110 	seq ## _HIGH2, \
111 	seq ## _HIGH3, \
112 	seq ## _HIGH4, \
113 	seq ## _HIGH5, \
114 	seq ## _HIGH6, \
115 	seq ## _HIGH7
116 
117 #define len_decode(target, ld, pos_state, seq) \
118 do { \
119 	symbol = 1; \
120 case seq ## _CHOICE: \
121 	rc_if_0(ld.choice, seq ## _CHOICE) { \
122 		rc_update_0(ld.choice); \
123 		rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW0); \
124 		rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW1); \
125 		rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW2); \
126 		target = symbol - LEN_LOW_SYMBOLS + MATCH_LEN_MIN; \
127 	} else { \
128 		rc_update_1(ld.choice); \
129 case seq ## _CHOICE2: \
130 		rc_if_0(ld.choice2, seq ## _CHOICE2) { \
131 			rc_update_0(ld.choice2); \
132 			rc_bit_case(ld.mid[pos_state][symbol], , , \
133 					seq ## _MID0); \
134 			rc_bit_case(ld.mid[pos_state][symbol], , , \
135 					seq ## _MID1); \
136 			rc_bit_case(ld.mid[pos_state][symbol], , , \
137 					seq ## _MID2); \
138 			target = symbol - LEN_MID_SYMBOLS \
139 					+ MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \
140 		} else { \
141 			rc_update_1(ld.choice2); \
142 			rc_bit_case(ld.high[symbol], , , seq ## _HIGH0); \
143 			rc_bit_case(ld.high[symbol], , , seq ## _HIGH1); \
144 			rc_bit_case(ld.high[symbol], , , seq ## _HIGH2); \
145 			rc_bit_case(ld.high[symbol], , , seq ## _HIGH3); \
146 			rc_bit_case(ld.high[symbol], , , seq ## _HIGH4); \
147 			rc_bit_case(ld.high[symbol], , , seq ## _HIGH5); \
148 			rc_bit_case(ld.high[symbol], , , seq ## _HIGH6); \
149 			rc_bit_case(ld.high[symbol], , , seq ## _HIGH7); \
150 			target = symbol - LEN_HIGH_SYMBOLS \
151 					+ MATCH_LEN_MIN \
152 					+ LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; \
153 		} \
154 	} \
155 } while (0)
156 
157 #endif // HAVE_SMALL
158 
159 
160 /// Length decoder probabilities; see comments in lzma_common.h.
161 typedef struct {
162 	probability choice;
163 	probability choice2;
164 	probability low[POS_STATES_MAX][LEN_LOW_SYMBOLS];
165 	probability mid[POS_STATES_MAX][LEN_MID_SYMBOLS];
166 	probability high[LEN_HIGH_SYMBOLS];
167 } lzma_length_decoder;
168 
169 
170 typedef struct {
171 	///////////////////
172 	// Probabilities //
173 	///////////////////
174 
175 	/// Literals; see comments in lzma_common.h.
176 	probability literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE];
177 
178 	/// If 1, it's a match. Otherwise it's a single 8-bit literal.
179 	probability is_match[STATES][POS_STATES_MAX];
180 
181 	/// If 1, it's a repeated match. The distance is one of rep0 .. rep3.
182 	probability is_rep[STATES];
183 
184 	/// If 0, distance of a repeated match is rep0.
185 	/// Otherwise check is_rep1.
186 	probability is_rep0[STATES];
187 
188 	/// If 0, distance of a repeated match is rep1.
189 	/// Otherwise check is_rep2.
190 	probability is_rep1[STATES];
191 
192 	/// If 0, distance of a repeated match is rep2. Otherwise it is rep3.
193 	probability is_rep2[STATES];
194 
195 	/// If 1, the repeated match has length of one byte. Otherwise
196 	/// the length is decoded from rep_len_decoder.
197 	probability is_rep0_long[STATES][POS_STATES_MAX];
198 
199 	/// Probability tree for the highest two bits of the match distance.
200 	/// There is a separate probability tree for match lengths of
201 	/// 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273].
202 	probability dist_slot[DIST_STATES][DIST_SLOTS];
203 
204 	/// Probability trees for additional bits for match distance when the
205 	/// distance is in the range [4, 127].
206 	probability pos_special[FULL_DISTANCES - DIST_MODEL_END];
207 
208 	/// Probability tree for the lowest four bits of a match distance
209 	/// that is equal to or greater than 128.
210 	probability pos_align[ALIGN_SIZE];
211 
212 	/// Length of a normal match
213 	lzma_length_decoder match_len_decoder;
214 
215 	/// Length of a repeated match
216 	lzma_length_decoder rep_len_decoder;
217 
218 	///////////////////
219 	// Decoder state //
220 	///////////////////
221 
222 	// Range coder
223 	lzma_range_decoder rc;
224 
225 	// Types of the most recently seen LZMA symbols
226 	lzma_lzma_state state;
227 
228 	uint32_t rep0;      ///< Distance of the latest match
229 	uint32_t rep1;      ///< Distance of second latest match
230 	uint32_t rep2;      ///< Distance of third latest match
231 	uint32_t rep3;      ///< Distance of fourth latest match
232 
233 	uint32_t pos_mask; // (1U << pb) - 1
234 	uint32_t literal_context_bits;
235 	uint32_t literal_pos_mask;
236 
237 	/// Uncompressed size as bytes, or LZMA_VLI_UNKNOWN if end of
238 	/// payload marker is expected.
239 	lzma_vli uncompressed_size;
240 
241 	////////////////////////////////
242 	// State of incomplete symbol //
243 	////////////////////////////////
244 
245 	/// Position where to continue the decoder loop
246 	enum {
247 		SEQ_NORMALIZE,
248 		SEQ_IS_MATCH,
249 		seq_8(SEQ_LITERAL),
250 		seq_8(SEQ_LITERAL_MATCHED),
251 		SEQ_LITERAL_WRITE,
252 		SEQ_IS_REP,
253 		seq_len(SEQ_MATCH_LEN),
254 		seq_6(SEQ_DIST_SLOT),
255 		SEQ_DIST_MODEL,
256 		SEQ_DIRECT,
257 		seq_4(SEQ_ALIGN),
258 		SEQ_EOPM,
259 		SEQ_IS_REP0,
260 		SEQ_SHORTREP,
261 		SEQ_IS_REP0_LONG,
262 		SEQ_IS_REP1,
263 		SEQ_IS_REP2,
264 		seq_len(SEQ_REP_LEN),
265 		SEQ_COPY,
266 	} sequence;
267 
268 	/// Base of the current probability tree
269 	probability *probs;
270 
271 	/// Symbol being decoded. This is also used as an index variable in
272 	/// bittree decoders: probs[symbol]
273 	uint32_t symbol;
274 
275 	/// Used as a loop termination condition on bittree decoders and
276 	/// direct bits decoder.
277 	uint32_t limit;
278 
279 	/// Matched literal decoder: 0x100 or 0 to help avoiding branches.
280 	/// Bittree reverse decoders: Offset of the next bit: 1 << offset
281 	uint32_t offset;
282 
283 	/// If decoding a literal: match byte.
284 	/// If decoding a match: length of the match.
285 	uint32_t len;
286 } lzma_lzma1_decoder;
287 
288 
289 static lzma_ret
290 lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
291 		const uint8_t *restrict in,
292 		size_t *restrict in_pos, size_t in_size)
293 {
294 	lzma_lzma1_decoder *restrict coder = coder_ptr;
295 
296 	////////////////////
297 	// Initialization //
298 	////////////////////
299 
300 	{
301 		const lzma_ret ret = rc_read_init(
302 				&coder->rc, in, in_pos, in_size);
303 		if (ret != LZMA_STREAM_END)
304 			return ret;
305 	}
306 
307 	///////////////
308 	// Variables //
309 	///////////////
310 
311 	// Making local copies of often-used variables improves both
312 	// speed and readability.
313 
314 	lzma_dict dict = *dictptr;
315 
316 	const size_t dict_start = dict.pos;
317 
318 	// Range decoder
319 	rc_to_local(coder->rc, *in_pos);
320 
321 	// State
322 	uint32_t state = coder->state;
323 	uint32_t rep0 = coder->rep0;
324 	uint32_t rep1 = coder->rep1;
325 	uint32_t rep2 = coder->rep2;
326 	uint32_t rep3 = coder->rep3;
327 
328 	const uint32_t pos_mask = coder->pos_mask;
329 
330 	// These variables are actually needed only if we last time ran
331 	// out of input in the middle of the decoder loop.
332 	probability *probs = coder->probs;
333 	uint32_t symbol = coder->symbol;
334 	uint32_t limit = coder->limit;
335 	uint32_t offset = coder->offset;
336 	uint32_t len = coder->len;
337 
338 	const uint32_t literal_pos_mask = coder->literal_pos_mask;
339 	const uint32_t literal_context_bits = coder->literal_context_bits;
340 
341 	// Temporary variables
342 	uint32_t pos_state = dict.pos & pos_mask;
343 
344 	lzma_ret ret = LZMA_OK;
345 
346 	// If uncompressed size is known, there must be no end of payload
347 	// marker.
348 	const bool no_eopm = coder->uncompressed_size
349 			!= LZMA_VLI_UNKNOWN;
350 	if (no_eopm && coder->uncompressed_size < dict.limit - dict.pos)
351 		dict.limit = dict.pos + (size_t)(coder->uncompressed_size);
352 
353 	// The main decoder loop. The "switch" is used to restart the decoder at
354 	// correct location. Once restarted, the "switch" is no longer used.
355 	switch (coder->sequence)
356 	while (true) {
357 		// Calculate new pos_state. This is skipped on the first loop
358 		// since we already calculated it when setting up the local
359 		// variables.
360 		pos_state = dict.pos & pos_mask;
361 
362 	case SEQ_NORMALIZE:
363 	case SEQ_IS_MATCH:
364 		if (unlikely(no_eopm && dict.pos == dict.limit))
365 			break;
366 
367 		rc_if_0(coder->is_match[state][pos_state], SEQ_IS_MATCH) {
368 			rc_update_0(coder->is_match[state][pos_state]);
369 
370 			// It's a literal i.e. a single 8-bit byte.
371 
372 			probs = literal_subcoder(coder->literal,
373 					literal_context_bits, literal_pos_mask,
374 					dict.pos, dict_get(&dict, 0));
375 			symbol = 1;
376 
377 			if (is_literal_state(state)) {
378 				// Decode literal without match byte.
379 #ifdef HAVE_SMALL
380 	case SEQ_LITERAL:
381 				do {
382 					rc_bit(probs[symbol], , , SEQ_LITERAL);
383 				} while (symbol < (1 << 8));
384 #else
385 				rc_bit_case(probs[symbol], , , SEQ_LITERAL0);
386 				rc_bit_case(probs[symbol], , , SEQ_LITERAL1);
387 				rc_bit_case(probs[symbol], , , SEQ_LITERAL2);
388 				rc_bit_case(probs[symbol], , , SEQ_LITERAL3);
389 				rc_bit_case(probs[symbol], , , SEQ_LITERAL4);
390 				rc_bit_case(probs[symbol], , , SEQ_LITERAL5);
391 				rc_bit_case(probs[symbol], , , SEQ_LITERAL6);
392 				rc_bit_case(probs[symbol], , , SEQ_LITERAL7);
393 #endif
394 			} else {
395 				// Decode literal with match byte.
396 				//
397 				// We store the byte we compare against
398 				// ("match byte") to "len" to minimize the
399 				// number of variables we need to store
400 				// between decoder calls.
401 				len = (uint32_t)(dict_get(&dict, rep0)) << 1;
402 
403 				// The usage of "offset" allows omitting some
404 				// branches, which should give tiny speed
405 				// improvement on some CPUs. "offset" gets
406 				// set to zero if match_bit didn't match.
407 				offset = 0x100;
408 
409 #ifdef HAVE_SMALL
410 	case SEQ_LITERAL_MATCHED:
411 				do {
412 					const uint32_t match_bit
413 							= len & offset;
414 					const uint32_t subcoder_index
415 							= offset + match_bit
416 							+ symbol;
417 
418 					rc_bit(probs[subcoder_index],
419 							offset &= ~match_bit,
420 							offset &= match_bit,
421 							SEQ_LITERAL_MATCHED);
422 
423 					// It seems to be faster to do this
424 					// here instead of putting it to the
425 					// beginning of the loop and then
426 					// putting the "case" in the middle
427 					// of the loop.
428 					len <<= 1;
429 
430 				} while (symbol < (1 << 8));
431 #else
432 				// Unroll the loop.
433 				uint32_t match_bit;
434 				uint32_t subcoder_index;
435 
436 #	define d(seq) \
437 		case seq: \
438 			match_bit = len & offset; \
439 			subcoder_index = offset + match_bit + symbol; \
440 			rc_bit(probs[subcoder_index], \
441 					offset &= ~match_bit, \
442 					offset &= match_bit, \
443 					seq)
444 
445 				d(SEQ_LITERAL_MATCHED0);
446 				len <<= 1;
447 				d(SEQ_LITERAL_MATCHED1);
448 				len <<= 1;
449 				d(SEQ_LITERAL_MATCHED2);
450 				len <<= 1;
451 				d(SEQ_LITERAL_MATCHED3);
452 				len <<= 1;
453 				d(SEQ_LITERAL_MATCHED4);
454 				len <<= 1;
455 				d(SEQ_LITERAL_MATCHED5);
456 				len <<= 1;
457 				d(SEQ_LITERAL_MATCHED6);
458 				len <<= 1;
459 				d(SEQ_LITERAL_MATCHED7);
460 #	undef d
461 #endif
462 			}
463 
464 			//update_literal(state);
465 			// Use a lookup table to update to literal state,
466 			// since compared to other state updates, this would
467 			// need two branches.
468 			static const lzma_lzma_state next_state[] = {
469 				STATE_LIT_LIT,
470 				STATE_LIT_LIT,
471 				STATE_LIT_LIT,
472 				STATE_LIT_LIT,
473 				STATE_MATCH_LIT_LIT,
474 				STATE_REP_LIT_LIT,
475 				STATE_SHORTREP_LIT_LIT,
476 				STATE_MATCH_LIT,
477 				STATE_REP_LIT,
478 				STATE_SHORTREP_LIT,
479 				STATE_MATCH_LIT,
480 				STATE_REP_LIT
481 			};
482 			state = next_state[state];
483 
484 	case SEQ_LITERAL_WRITE:
485 			if (unlikely(dict_put(&dict, symbol))) {
486 				coder->sequence = SEQ_LITERAL_WRITE;
487 				goto out;
488 			}
489 
490 			continue;
491 		}
492 
493 		// Instead of a new byte we are going to get a byte range
494 		// (distance and length) which will be repeated from our
495 		// output history.
496 
497 		rc_update_1(coder->is_match[state][pos_state]);
498 
499 	case SEQ_IS_REP:
500 		rc_if_0(coder->is_rep[state], SEQ_IS_REP) {
501 			// Not a repeated match
502 			rc_update_0(coder->is_rep[state]);
503 			update_match(state);
504 
505 			// The latest three match distances are kept in
506 			// memory in case there are repeated matches.
507 			rep3 = rep2;
508 			rep2 = rep1;
509 			rep1 = rep0;
510 
511 			// Decode the length of the match.
512 			len_decode(len, coder->match_len_decoder,
513 					pos_state, SEQ_MATCH_LEN);
514 
515 			// Prepare to decode the highest two bits of the
516 			// match distance.
517 			probs = coder->dist_slot[get_dist_state(len)];
518 			symbol = 1;
519 
520 #ifdef HAVE_SMALL
521 	case SEQ_DIST_SLOT:
522 			do {
523 				rc_bit(probs[symbol], , , SEQ_DIST_SLOT);
524 			} while (symbol < DIST_SLOTS);
525 #else
526 			rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT0);
527 			rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT1);
528 			rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT2);
529 			rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT3);
530 			rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT4);
531 			rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT5);
532 #endif
533 			// Get rid of the highest bit that was needed for
534 			// indexing of the probability array.
535 			symbol -= DIST_SLOTS;
536 			assert(symbol <= 63);
537 
538 			if (symbol < DIST_MODEL_START) {
539 				// Match distances [0, 3] have only two bits.
540 				rep0 = symbol;
541 			} else {
542 				// Decode the lowest [1, 29] bits of
543 				// the match distance.
544 				limit = (symbol >> 1) - 1;
545 				assert(limit >= 1 && limit <= 30);
546 				rep0 = 2 + (symbol & 1);
547 
548 				if (symbol < DIST_MODEL_END) {
549 					// Prepare to decode the low bits for
550 					// a distance of [4, 127].
551 					assert(limit <= 5);
552 					rep0 <<= limit;
553 					assert(rep0 <= 96);
554 					// -1 is fine, because we start
555 					// decoding at probs[1], not probs[0].
556 					// NOTE: This violates the C standard,
557 					// since we are doing pointer
558 					// arithmetic past the beginning of
559 					// the array.
560 					assert((int32_t)(rep0 - symbol - 1)
561 							>= -1);
562 					assert((int32_t)(rep0 - symbol - 1)
563 							<= 82);
564 					probs = coder->pos_special + rep0
565 							- symbol - 1;
566 					symbol = 1;
567 					offset = 0;
568 	case SEQ_DIST_MODEL:
569 #ifdef HAVE_SMALL
570 					do {
571 						rc_bit(probs[symbol], ,
572 							rep0 += 1U << offset,
573 							SEQ_DIST_MODEL);
574 					} while (++offset < limit);
575 #else
576 					switch (limit) {
577 					case 5:
578 						assert(offset == 0);
579 						rc_bit(probs[symbol], ,
580 							rep0 += 1U,
581 							SEQ_DIST_MODEL);
582 						++offset;
583 						--limit;
584 					case 4:
585 						rc_bit(probs[symbol], ,
586 							rep0 += 1U << offset,
587 							SEQ_DIST_MODEL);
588 						++offset;
589 						--limit;
590 					case 3:
591 						rc_bit(probs[symbol], ,
592 							rep0 += 1U << offset,
593 							SEQ_DIST_MODEL);
594 						++offset;
595 						--limit;
596 					case 2:
597 						rc_bit(probs[symbol], ,
598 							rep0 += 1U << offset,
599 							SEQ_DIST_MODEL);
600 						++offset;
601 						--limit;
602 					case 1:
603 						// We need "symbol" only for
604 						// indexing the probability
605 						// array, thus we can use
606 						// rc_bit_last() here to omit
607 						// the unneeded updating of
608 						// "symbol".
609 						rc_bit_last(probs[symbol], ,
610 							rep0 += 1U << offset,
611 							SEQ_DIST_MODEL);
612 					}
613 #endif
614 				} else {
615 					// The distance is >= 128. Decode the
616 					// lower bits without probabilities
617 					// except the lowest four bits.
618 					assert(symbol >= 14);
619 					assert(limit >= 6);
620 					limit -= ALIGN_BITS;
621 					assert(limit >= 2);
622 	case SEQ_DIRECT:
623 					// Not worth manual unrolling
624 					do {
625 						rc_direct(rep0, SEQ_DIRECT);
626 					} while (--limit > 0);
627 
628 					// Decode the lowest four bits using
629 					// probabilities.
630 					rep0 <<= ALIGN_BITS;
631 					symbol = 1;
632 #ifdef HAVE_SMALL
633 					offset = 0;
634 	case SEQ_ALIGN:
635 					do {
636 						rc_bit(coder->pos_align[
637 								symbol], ,
638 							rep0 += 1U << offset,
639 							SEQ_ALIGN);
640 					} while (++offset < ALIGN_BITS);
641 #else
642 	case SEQ_ALIGN0:
643 					rc_bit(coder->pos_align[symbol], ,
644 							rep0 += 1, SEQ_ALIGN0);
645 	case SEQ_ALIGN1:
646 					rc_bit(coder->pos_align[symbol], ,
647 							rep0 += 2, SEQ_ALIGN1);
648 	case SEQ_ALIGN2:
649 					rc_bit(coder->pos_align[symbol], ,
650 							rep0 += 4, SEQ_ALIGN2);
651 	case SEQ_ALIGN3:
652 					// Like in SEQ_DIST_MODEL, we don't
653 					// need "symbol" for anything else
654 					// than indexing the probability array.
655 					rc_bit_last(coder->pos_align[symbol], ,
656 							rep0 += 8, SEQ_ALIGN3);
657 #endif
658 
659 					if (rep0 == UINT32_MAX) {
660 						// End of payload marker was
661 						// found. It must not be
662 						// present if uncompressed
663 						// size is known.
664 						if (coder->uncompressed_size
665 						!= LZMA_VLI_UNKNOWN) {
666 							ret = LZMA_DATA_ERROR;
667 							goto out;
668 						}
669 
670 	case SEQ_EOPM:
671 						// LZMA1 stream with
672 						// end-of-payload marker.
673 						rc_normalize(SEQ_EOPM);
674 						ret = LZMA_STREAM_END;
675 						goto out;
676 					}
677 				}
678 			}
679 
680 			// Validate the distance we just decoded.
681 			if (unlikely(!dict_is_distance_valid(&dict, rep0))) {
682 				ret = LZMA_DATA_ERROR;
683 				goto out;
684 			}
685 
686 		} else {
687 			rc_update_1(coder->is_rep[state]);
688 
689 			// Repeated match
690 			//
691 			// The match distance is a value that we have had
692 			// earlier. The latest four match distances are
693 			// available as rep0, rep1, rep2 and rep3. We will
694 			// now decode which of them is the new distance.
695 			//
696 			// There cannot be a match if we haven't produced
697 			// any output, so check that first.
698 			if (unlikely(!dict_is_distance_valid(&dict, 0))) {
699 				ret = LZMA_DATA_ERROR;
700 				goto out;
701 			}
702 
703 	case SEQ_IS_REP0:
704 			rc_if_0(coder->is_rep0[state], SEQ_IS_REP0) {
705 				rc_update_0(coder->is_rep0[state]);
706 				// The distance is rep0.
707 
708 	case SEQ_IS_REP0_LONG:
709 				rc_if_0(coder->is_rep0_long[state][pos_state],
710 						SEQ_IS_REP0_LONG) {
711 					rc_update_0(coder->is_rep0_long[
712 							state][pos_state]);
713 
714 					update_short_rep(state);
715 
716 	case SEQ_SHORTREP:
717 					if (unlikely(dict_put(&dict, dict_get(
718 							&dict, rep0)))) {
719 						coder->sequence = SEQ_SHORTREP;
720 						goto out;
721 					}
722 
723 					continue;
724 				}
725 
726 				// Repeating more than one byte at
727 				// distance of rep0.
728 				rc_update_1(coder->is_rep0_long[
729 						state][pos_state]);
730 
731 			} else {
732 				rc_update_1(coder->is_rep0[state]);
733 
734 	case SEQ_IS_REP1:
735 				// The distance is rep1, rep2 or rep3. Once
736 				// we find out which one of these three, it
737 				// is stored to rep0 and rep1, rep2 and rep3
738 				// are updated accordingly.
739 				rc_if_0(coder->is_rep1[state], SEQ_IS_REP1) {
740 					rc_update_0(coder->is_rep1[state]);
741 
742 					const uint32_t distance = rep1;
743 					rep1 = rep0;
744 					rep0 = distance;
745 
746 				} else {
747 					rc_update_1(coder->is_rep1[state]);
748 	case SEQ_IS_REP2:
749 					rc_if_0(coder->is_rep2[state],
750 							SEQ_IS_REP2) {
751 						rc_update_0(coder->is_rep2[
752 								state]);
753 
754 						const uint32_t distance = rep2;
755 						rep2 = rep1;
756 						rep1 = rep0;
757 						rep0 = distance;
758 
759 					} else {
760 						rc_update_1(coder->is_rep2[
761 								state]);
762 
763 						const uint32_t distance = rep3;
764 						rep3 = rep2;
765 						rep2 = rep1;
766 						rep1 = rep0;
767 						rep0 = distance;
768 					}
769 				}
770 			}
771 
772 			update_long_rep(state);
773 
774 			// Decode the length of the repeated match.
775 			len_decode(len, coder->rep_len_decoder,
776 					pos_state, SEQ_REP_LEN);
777 		}
778 
779 		/////////////////////////////////
780 		// Repeat from history buffer. //
781 		/////////////////////////////////
782 
783 		// The length is always between these limits. There is no way
784 		// to trigger the algorithm to set len outside this range.
785 		assert(len >= MATCH_LEN_MIN);
786 		assert(len <= MATCH_LEN_MAX);
787 
788 	case SEQ_COPY:
789 		// Repeat len bytes from distance of rep0.
790 		if (unlikely(dict_repeat(&dict, rep0, &len))) {
791 			coder->sequence = SEQ_COPY;
792 			goto out;
793 		}
794 	}
795 
796 	rc_normalize(SEQ_NORMALIZE);
797 	coder->sequence = SEQ_IS_MATCH;
798 
799 out:
800 	// Save state
801 
802 	// NOTE: Must not copy dict.limit.
803 	dictptr->pos = dict.pos;
804 	dictptr->full = dict.full;
805 
806 	rc_from_local(coder->rc, *in_pos);
807 
808 	coder->state = state;
809 	coder->rep0 = rep0;
810 	coder->rep1 = rep1;
811 	coder->rep2 = rep2;
812 	coder->rep3 = rep3;
813 
814 	coder->probs = probs;
815 	coder->symbol = symbol;
816 	coder->limit = limit;
817 	coder->offset = offset;
818 	coder->len = len;
819 
820 	// Update the remaining amount of uncompressed data if uncompressed
821 	// size was known.
822 	if (coder->uncompressed_size != LZMA_VLI_UNKNOWN) {
823 		coder->uncompressed_size -= dict.pos - dict_start;
824 
825 		// Since there cannot be end of payload marker if the
826 		// uncompressed size was known, we check here if we
827 		// finished decoding.
828 		if (coder->uncompressed_size == 0 && ret == LZMA_OK
829 				&& coder->sequence != SEQ_NORMALIZE)
830 			ret = coder->sequence == SEQ_IS_MATCH
831 					? LZMA_STREAM_END : LZMA_DATA_ERROR;
832 	}
833 
834 	// We can do an additional check in the range decoder to catch some
835 	// corrupted files.
836 	if (ret == LZMA_STREAM_END) {
837 		if (!rc_is_finished(coder->rc))
838 			ret = LZMA_DATA_ERROR;
839 
840 		// Reset the range decoder so that it is ready to reinitialize
841 		// for a new LZMA2 chunk.
842 		rc_reset(coder->rc);
843 	}
844 
845 	return ret;
846 }
847 
848 
849 
850 static void
851 lzma_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size)
852 {
853 	lzma_lzma1_decoder *coder = coder_ptr;
854 	coder->uncompressed_size = uncompressed_size;
855 }
856 
857 
858 static void
859 lzma_decoder_reset(void *coder_ptr, const void *opt)
860 {
861 	lzma_lzma1_decoder *coder = coder_ptr;
862 	const lzma_options_lzma *options = opt;
863 
864 	// NOTE: We assume that lc/lp/pb are valid since they were
865 	// successfully decoded with lzma_lzma_decode_properties().
866 
867 	// Calculate pos_mask. We don't need pos_bits as is for anything.
868 	coder->pos_mask = (1U << options->pb) - 1;
869 
870 	// Initialize the literal decoder.
871 	literal_init(coder->literal, options->lc, options->lp);
872 
873 	coder->literal_context_bits = options->lc;
874 	coder->literal_pos_mask = (1U << options->lp) - 1;
875 
876 	// State
877 	coder->state = STATE_LIT_LIT;
878 	coder->rep0 = 0;
879 	coder->rep1 = 0;
880 	coder->rep2 = 0;
881 	coder->rep3 = 0;
882 	coder->pos_mask = (1U << options->pb) - 1;
883 
884 	// Range decoder
885 	rc_reset(coder->rc);
886 
887 	// Bit and bittree decoders
888 	for (uint32_t i = 0; i < STATES; ++i) {
889 		for (uint32_t j = 0; j <= coder->pos_mask; ++j) {
890 			bit_reset(coder->is_match[i][j]);
891 			bit_reset(coder->is_rep0_long[i][j]);
892 		}
893 
894 		bit_reset(coder->is_rep[i]);
895 		bit_reset(coder->is_rep0[i]);
896 		bit_reset(coder->is_rep1[i]);
897 		bit_reset(coder->is_rep2[i]);
898 	}
899 
900 	for (uint32_t i = 0; i < DIST_STATES; ++i)
901 		bittree_reset(coder->dist_slot[i], DIST_SLOT_BITS);
902 
903 	for (uint32_t i = 0; i < FULL_DISTANCES - DIST_MODEL_END; ++i)
904 		bit_reset(coder->pos_special[i]);
905 
906 	bittree_reset(coder->pos_align, ALIGN_BITS);
907 
908 	// Len decoders (also bit/bittree)
909 	const uint32_t num_pos_states = 1U << options->pb;
910 	bit_reset(coder->match_len_decoder.choice);
911 	bit_reset(coder->match_len_decoder.choice2);
912 	bit_reset(coder->rep_len_decoder.choice);
913 	bit_reset(coder->rep_len_decoder.choice2);
914 
915 	for (uint32_t pos_state = 0; pos_state < num_pos_states; ++pos_state) {
916 		bittree_reset(coder->match_len_decoder.low[pos_state],
917 				LEN_LOW_BITS);
918 		bittree_reset(coder->match_len_decoder.mid[pos_state],
919 				LEN_MID_BITS);
920 
921 		bittree_reset(coder->rep_len_decoder.low[pos_state],
922 				LEN_LOW_BITS);
923 		bittree_reset(coder->rep_len_decoder.mid[pos_state],
924 				LEN_MID_BITS);
925 	}
926 
927 	bittree_reset(coder->match_len_decoder.high, LEN_HIGH_BITS);
928 	bittree_reset(coder->rep_len_decoder.high, LEN_HIGH_BITS);
929 
930 	coder->sequence = SEQ_IS_MATCH;
931 	coder->probs = NULL;
932 	coder->symbol = 0;
933 	coder->limit = 0;
934 	coder->offset = 0;
935 	coder->len = 0;
936 
937 	return;
938 }
939 
940 
941 extern lzma_ret
942 lzma_lzma_decoder_create(lzma_lz_decoder *lz, const lzma_allocator *allocator,
943 		const void *opt, lzma_lz_options *lz_options)
944 {
945 	if (lz->coder == NULL) {
946 		lz->coder = lzma_alloc(sizeof(lzma_lzma1_decoder), allocator);
947 		if (lz->coder == NULL)
948 			return LZMA_MEM_ERROR;
949 
950 		lz->code = &lzma_decode;
951 		lz->reset = &lzma_decoder_reset;
952 		lz->set_uncompressed = &lzma_decoder_uncompressed;
953 	}
954 
955 	// All dictionary sizes are OK here. LZ decoder will take care of
956 	// the special cases.
957 	const lzma_options_lzma *options = opt;
958 	lz_options->dict_size = options->dict_size;
959 	lz_options->preset_dict = options->preset_dict;
960 	lz_options->preset_dict_size = options->preset_dict_size;
961 
962 	return LZMA_OK;
963 }
964 
965 
966 /// Allocate and initialize LZMA decoder. This is used only via LZ
967 /// initialization (lzma_lzma_decoder_init() passes function pointer to
968 /// the LZ initialization).
969 static lzma_ret
970 lzma_decoder_init(lzma_lz_decoder *lz, const lzma_allocator *allocator,
971 		const void *options, lzma_lz_options *lz_options)
972 {
973 	if (!is_lclppb_valid(options))
974 		return LZMA_PROG_ERROR;
975 
976 	return_if_error(lzma_lzma_decoder_create(
977 			lz, allocator, options, lz_options));
978 
979 	lzma_decoder_reset(lz->coder, options);
980 	lzma_decoder_uncompressed(lz->coder, LZMA_VLI_UNKNOWN);
981 
982 	return LZMA_OK;
983 }
984 
985 
986 extern lzma_ret
987 lzma_lzma_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
988 		const lzma_filter_info *filters)
989 {
990 	// LZMA can only be the last filter in the chain. This is enforced
991 	// by the raw_decoder initialization.
992 	assert(filters[1].init == NULL);
993 
994 	return lzma_lz_decoder_init(next, allocator, filters,
995 			&lzma_decoder_init);
996 }
997 
998 
999 extern bool
1000 lzma_lzma_lclppb_decode(lzma_options_lzma *options, uint8_t byte)
1001 {
1002 	if (byte > (4 * 5 + 4) * 9 + 8)
1003 		return true;
1004 
1005 	// See the file format specification to understand this.
1006 	options->pb = byte / (9 * 5);
1007 	byte -= options->pb * 9 * 5;
1008 	options->lp = byte / 9;
1009 	options->lc = byte - options->lp * 9;
1010 
1011 	return options->lc + options->lp > LZMA_LCLP_MAX;
1012 }
1013 
1014 
1015 extern uint64_t
1016 lzma_lzma_decoder_memusage_nocheck(const void *options)
1017 {
1018 	const lzma_options_lzma *const opt = options;
1019 	return sizeof(lzma_lzma1_decoder)
1020 			+ lzma_lz_decoder_memusage(opt->dict_size);
1021 }
1022 
1023 
1024 extern uint64_t
1025 lzma_lzma_decoder_memusage(const void *options)
1026 {
1027 	if (!is_lclppb_valid(options))
1028 		return UINT64_MAX;
1029 
1030 	return lzma_lzma_decoder_memusage_nocheck(options);
1031 }
1032 
1033 
1034 extern lzma_ret
1035 lzma_lzma_props_decode(void **options, const lzma_allocator *allocator,
1036 		const uint8_t *props, size_t props_size)
1037 {
1038 	if (props_size != 5)
1039 		return LZMA_OPTIONS_ERROR;
1040 
1041 	lzma_options_lzma *opt
1042 			= lzma_alloc(sizeof(lzma_options_lzma), allocator);
1043 	if (opt == NULL)
1044 		return LZMA_MEM_ERROR;
1045 
1046 	if (lzma_lzma_lclppb_decode(opt, props[0]))
1047 		goto error;
1048 
1049 	// All dictionary sizes are accepted, including zero. LZ decoder
1050 	// will automatically use a dictionary at least a few KiB even if
1051 	// a smaller dictionary is requested.
1052 	opt->dict_size = read32le(props + 1);
1053 
1054 	opt->preset_dict = NULL;
1055 	opt->preset_dict_size = 0;
1056 
1057 	*options = opt;
1058 
1059 	return LZMA_OK;
1060 
1061 error:
1062 	lzma_free(opt, allocator);
1063 	return LZMA_OPTIONS_ERROR;
1064 }
1065