xref: /freebsd/contrib/xz/src/liblzma/lzma/lzma_decoder.c (revision b37d1deb045d7bc7877fb1d9afdb39d43130dcc4)
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       lzma_decoder.c
4 /// \brief      LZMA decoder
5 ///
6 //  Authors:    Igor Pavlov
7 //              Lasse Collin
8 //
9 //  This file has been put into the public domain.
10 //  You can do whatever you want with this file.
11 //
12 ///////////////////////////////////////////////////////////////////////////////
13 
14 #include "lz_decoder.h"
15 #include "lzma_common.h"
16 #include "lzma_decoder.h"
17 #include "range_decoder.h"
18 
19 // The macros unroll loops with switch statements.
20 // Silence warnings about missing fall-through comments.
21 #if TUKLIB_GNUC_REQ(7, 0)
22 #	pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
23 #endif
24 
25 
26 #ifdef HAVE_SMALL
27 
28 // Macros for (somewhat) size-optimized code.
29 #define seq_4(seq) seq
30 
31 #define seq_6(seq) seq
32 
33 #define seq_8(seq) seq
34 
35 #define seq_len(seq) \
36 	seq ## _CHOICE, \
37 	seq ## _CHOICE2, \
38 	seq ## _BITTREE
39 
40 #define len_decode(target, ld, pos_state, seq) \
41 do { \
42 case seq ## _CHOICE: \
43 	rc_if_0(ld.choice, seq ## _CHOICE) { \
44 		rc_update_0(ld.choice); \
45 		probs = ld.low[pos_state];\
46 		limit = LEN_LOW_SYMBOLS; \
47 		target = MATCH_LEN_MIN; \
48 	} else { \
49 		rc_update_1(ld.choice); \
50 case seq ## _CHOICE2: \
51 		rc_if_0(ld.choice2, seq ## _CHOICE2) { \
52 			rc_update_0(ld.choice2); \
53 			probs = ld.mid[pos_state]; \
54 			limit = LEN_MID_SYMBOLS; \
55 			target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \
56 		} else { \
57 			rc_update_1(ld.choice2); \
58 			probs = ld.high; \
59 			limit = LEN_HIGH_SYMBOLS; \
60 			target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS \
61 					+ LEN_MID_SYMBOLS; \
62 		} \
63 	} \
64 	symbol = 1; \
65 case seq ## _BITTREE: \
66 	do { \
67 		rc_bit(probs[symbol], , , seq ## _BITTREE); \
68 	} while (symbol < limit); \
69 	target += symbol - limit; \
70 } while (0)
71 
72 #else // HAVE_SMALL
73 
74 // Unrolled versions
75 #define seq_4(seq) \
76 	seq ## 0, \
77 	seq ## 1, \
78 	seq ## 2, \
79 	seq ## 3
80 
81 #define seq_6(seq) \
82 	seq ## 0, \
83 	seq ## 1, \
84 	seq ## 2, \
85 	seq ## 3, \
86 	seq ## 4, \
87 	seq ## 5
88 
89 #define seq_8(seq) \
90 	seq ## 0, \
91 	seq ## 1, \
92 	seq ## 2, \
93 	seq ## 3, \
94 	seq ## 4, \
95 	seq ## 5, \
96 	seq ## 6, \
97 	seq ## 7
98 
99 #define seq_len(seq) \
100 	seq ## _CHOICE, \
101 	seq ## _LOW0, \
102 	seq ## _LOW1, \
103 	seq ## _LOW2, \
104 	seq ## _CHOICE2, \
105 	seq ## _MID0, \
106 	seq ## _MID1, \
107 	seq ## _MID2, \
108 	seq ## _HIGH0, \
109 	seq ## _HIGH1, \
110 	seq ## _HIGH2, \
111 	seq ## _HIGH3, \
112 	seq ## _HIGH4, \
113 	seq ## _HIGH5, \
114 	seq ## _HIGH6, \
115 	seq ## _HIGH7
116 
117 #define len_decode(target, ld, pos_state, seq) \
118 do { \
119 	symbol = 1; \
120 case seq ## _CHOICE: \
121 	rc_if_0(ld.choice, seq ## _CHOICE) { \
122 		rc_update_0(ld.choice); \
123 		rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW0); \
124 		rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW1); \
125 		rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW2); \
126 		target = symbol - LEN_LOW_SYMBOLS + MATCH_LEN_MIN; \
127 	} else { \
128 		rc_update_1(ld.choice); \
129 case seq ## _CHOICE2: \
130 		rc_if_0(ld.choice2, seq ## _CHOICE2) { \
131 			rc_update_0(ld.choice2); \
132 			rc_bit_case(ld.mid[pos_state][symbol], , , \
133 					seq ## _MID0); \
134 			rc_bit_case(ld.mid[pos_state][symbol], , , \
135 					seq ## _MID1); \
136 			rc_bit_case(ld.mid[pos_state][symbol], , , \
137 					seq ## _MID2); \
138 			target = symbol - LEN_MID_SYMBOLS \
139 					+ MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \
140 		} else { \
141 			rc_update_1(ld.choice2); \
142 			rc_bit_case(ld.high[symbol], , , seq ## _HIGH0); \
143 			rc_bit_case(ld.high[symbol], , , seq ## _HIGH1); \
144 			rc_bit_case(ld.high[symbol], , , seq ## _HIGH2); \
145 			rc_bit_case(ld.high[symbol], , , seq ## _HIGH3); \
146 			rc_bit_case(ld.high[symbol], , , seq ## _HIGH4); \
147 			rc_bit_case(ld.high[symbol], , , seq ## _HIGH5); \
148 			rc_bit_case(ld.high[symbol], , , seq ## _HIGH6); \
149 			rc_bit_case(ld.high[symbol], , , seq ## _HIGH7); \
150 			target = symbol - LEN_HIGH_SYMBOLS \
151 					+ MATCH_LEN_MIN \
152 					+ LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; \
153 		} \
154 	} \
155 } while (0)
156 
157 #endif // HAVE_SMALL
158 
159 
160 /// Length decoder probabilities; see comments in lzma_common.h.
161 typedef struct {
162 	probability choice;
163 	probability choice2;
164 	probability low[POS_STATES_MAX][LEN_LOW_SYMBOLS];
165 	probability mid[POS_STATES_MAX][LEN_MID_SYMBOLS];
166 	probability high[LEN_HIGH_SYMBOLS];
167 } lzma_length_decoder;
168 
169 
170 typedef struct {
171 	///////////////////
172 	// Probabilities //
173 	///////////////////
174 
175 	/// Literals; see comments in lzma_common.h.
176 	probability literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE];
177 
178 	/// If 1, it's a match. Otherwise it's a single 8-bit literal.
179 	probability is_match[STATES][POS_STATES_MAX];
180 
181 	/// If 1, it's a repeated match. The distance is one of rep0 .. rep3.
182 	probability is_rep[STATES];
183 
184 	/// If 0, distance of a repeated match is rep0.
185 	/// Otherwise check is_rep1.
186 	probability is_rep0[STATES];
187 
188 	/// If 0, distance of a repeated match is rep1.
189 	/// Otherwise check is_rep2.
190 	probability is_rep1[STATES];
191 
192 	/// If 0, distance of a repeated match is rep2. Otherwise it is rep3.
193 	probability is_rep2[STATES];
194 
195 	/// If 1, the repeated match has length of one byte. Otherwise
196 	/// the length is decoded from rep_len_decoder.
197 	probability is_rep0_long[STATES][POS_STATES_MAX];
198 
199 	/// Probability tree for the highest two bits of the match distance.
200 	/// There is a separate probability tree for match lengths of
201 	/// 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273].
202 	probability dist_slot[DIST_STATES][DIST_SLOTS];
203 
204 	/// Probability trees for additional bits for match distance when the
205 	/// distance is in the range [4, 127].
206 	probability pos_special[FULL_DISTANCES - DIST_MODEL_END];
207 
208 	/// Probability tree for the lowest four bits of a match distance
209 	/// that is equal to or greater than 128.
210 	probability pos_align[ALIGN_SIZE];
211 
212 	/// Length of a normal match
213 	lzma_length_decoder match_len_decoder;
214 
215 	/// Length of a repeated match
216 	lzma_length_decoder rep_len_decoder;
217 
218 	///////////////////
219 	// Decoder state //
220 	///////////////////
221 
222 	// Range coder
223 	lzma_range_decoder rc;
224 
225 	// Types of the most recently seen LZMA symbols
226 	lzma_lzma_state state;
227 
228 	uint32_t rep0;      ///< Distance of the latest match
229 	uint32_t rep1;      ///< Distance of second latest match
230 	uint32_t rep2;      ///< Distance of third latest match
231 	uint32_t rep3;      ///< Distance of fourth latest match
232 
233 	uint32_t pos_mask; // (1U << pb) - 1
234 	uint32_t literal_context_bits;
235 	uint32_t literal_pos_mask;
236 
237 	/// Uncompressed size as bytes, or LZMA_VLI_UNKNOWN if end of
238 	/// payload marker is expected.
239 	lzma_vli uncompressed_size;
240 
241 	/// True if end of payload marker (EOPM) is allowed even when
242 	/// uncompressed_size is known; false if EOPM must not be present.
243 	/// This is ignored if uncompressed_size == LZMA_VLI_UNKNOWN.
244 	bool allow_eopm;
245 
246 	////////////////////////////////
247 	// State of incomplete symbol //
248 	////////////////////////////////
249 
250 	/// Position where to continue the decoder loop
251 	enum {
252 		SEQ_NORMALIZE,
253 		SEQ_IS_MATCH,
254 		seq_8(SEQ_LITERAL),
255 		seq_8(SEQ_LITERAL_MATCHED),
256 		SEQ_LITERAL_WRITE,
257 		SEQ_IS_REP,
258 		seq_len(SEQ_MATCH_LEN),
259 		seq_6(SEQ_DIST_SLOT),
260 		SEQ_DIST_MODEL,
261 		SEQ_DIRECT,
262 		seq_4(SEQ_ALIGN),
263 		SEQ_EOPM,
264 		SEQ_IS_REP0,
265 		SEQ_SHORTREP,
266 		SEQ_IS_REP0_LONG,
267 		SEQ_IS_REP1,
268 		SEQ_IS_REP2,
269 		seq_len(SEQ_REP_LEN),
270 		SEQ_COPY,
271 	} sequence;
272 
273 	/// Base of the current probability tree
274 	probability *probs;
275 
276 	/// Symbol being decoded. This is also used as an index variable in
277 	/// bittree decoders: probs[symbol]
278 	uint32_t symbol;
279 
280 	/// Used as a loop termination condition on bittree decoders and
281 	/// direct bits decoder.
282 	uint32_t limit;
283 
284 	/// Matched literal decoder: 0x100 or 0 to help avoiding branches.
285 	/// Bittree reverse decoders: Offset of the next bit: 1 << offset
286 	uint32_t offset;
287 
288 	/// If decoding a literal: match byte.
289 	/// If decoding a match: length of the match.
290 	uint32_t len;
291 } lzma_lzma1_decoder;
292 
293 
294 static lzma_ret
295 lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
296 		const uint8_t *restrict in,
297 		size_t *restrict in_pos, size_t in_size)
298 {
299 	lzma_lzma1_decoder *restrict coder = coder_ptr;
300 
301 	////////////////////
302 	// Initialization //
303 	////////////////////
304 
305 	{
306 		const lzma_ret ret = rc_read_init(
307 				&coder->rc, in, in_pos, in_size);
308 		if (ret != LZMA_STREAM_END)
309 			return ret;
310 	}
311 
312 	///////////////
313 	// Variables //
314 	///////////////
315 
316 	// Making local copies of often-used variables improves both
317 	// speed and readability.
318 
319 	lzma_dict dict = *dictptr;
320 
321 	const size_t dict_start = dict.pos;
322 
323 	// Range decoder
324 	rc_to_local(coder->rc, *in_pos);
325 
326 	// State
327 	uint32_t state = coder->state;
328 	uint32_t rep0 = coder->rep0;
329 	uint32_t rep1 = coder->rep1;
330 	uint32_t rep2 = coder->rep2;
331 	uint32_t rep3 = coder->rep3;
332 
333 	const uint32_t pos_mask = coder->pos_mask;
334 
335 	// These variables are actually needed only if we last time ran
336 	// out of input in the middle of the decoder loop.
337 	probability *probs = coder->probs;
338 	uint32_t symbol = coder->symbol;
339 	uint32_t limit = coder->limit;
340 	uint32_t offset = coder->offset;
341 	uint32_t len = coder->len;
342 
343 	const uint32_t literal_pos_mask = coder->literal_pos_mask;
344 	const uint32_t literal_context_bits = coder->literal_context_bits;
345 
346 	// Temporary variables
347 	uint32_t pos_state = dict.pos & pos_mask;
348 
349 	lzma_ret ret = LZMA_OK;
350 
351 	// This is true when the next LZMA symbol is allowed to be EOPM.
352 	// That is, if this is false, then EOPM is considered
353 	// an invalid symbol and we will return LZMA_DATA_ERROR.
354 	//
355 	// EOPM is always required (not just allowed) when
356 	// the uncompressed size isn't known. When uncompressed size
357 	// is known, eopm_is_valid may be set to true later.
358 	bool eopm_is_valid = coder->uncompressed_size == LZMA_VLI_UNKNOWN;
359 
360 	// If uncompressed size is known and there is enough output space
361 	// to decode all the data, limit the available buffer space so that
362 	// the main loop won't try to decode past the end of the stream.
363 	bool might_finish_without_eopm = false;
364 	if (coder->uncompressed_size != LZMA_VLI_UNKNOWN
365 			&& coder->uncompressed_size <= dict.limit - dict.pos) {
366 		dict.limit = dict.pos + (size_t)(coder->uncompressed_size);
367 		might_finish_without_eopm = true;
368 	}
369 
370 	// The main decoder loop. The "switch" is used to restart the decoder at
371 	// correct location. Once restarted, the "switch" is no longer used.
372 	switch (coder->sequence)
373 	while (true) {
374 		// Calculate new pos_state. This is skipped on the first loop
375 		// since we already calculated it when setting up the local
376 		// variables.
377 		pos_state = dict.pos & pos_mask;
378 
379 	case SEQ_NORMALIZE:
380 	case SEQ_IS_MATCH:
381 		if (unlikely(might_finish_without_eopm
382 				&& dict.pos == dict.limit)) {
383 			// In rare cases there is a useless byte that needs
384 			// to be read anyway.
385 			rc_normalize(SEQ_NORMALIZE);
386 
387 			// If the range decoder state is such that we can
388 			// be at the end of the LZMA stream, then the
389 			// decoding is finished.
390 			if (rc_is_finished(rc)) {
391 				ret = LZMA_STREAM_END;
392 				goto out;
393 			}
394 
395 			// If the caller hasn't allowed EOPM to be present
396 			// together with known uncompressed size, then the
397 			// LZMA stream is corrupt.
398 			if (!coder->allow_eopm) {
399 				ret = LZMA_DATA_ERROR;
400 				goto out;
401 			}
402 
403 			// Otherwise continue decoding with the expectation
404 			// that the next LZMA symbol is EOPM.
405 			eopm_is_valid = true;
406 		}
407 
408 		rc_if_0(coder->is_match[state][pos_state], SEQ_IS_MATCH) {
409 			rc_update_0(coder->is_match[state][pos_state]);
410 
411 			// It's a literal i.e. a single 8-bit byte.
412 
413 			probs = literal_subcoder(coder->literal,
414 					literal_context_bits, literal_pos_mask,
415 					dict.pos, dict_get(&dict, 0));
416 			symbol = 1;
417 
418 			if (is_literal_state(state)) {
419 				// Decode literal without match byte.
420 #ifdef HAVE_SMALL
421 	case SEQ_LITERAL:
422 				do {
423 					rc_bit(probs[symbol], , , SEQ_LITERAL);
424 				} while (symbol < (1 << 8));
425 #else
426 				rc_bit_case(probs[symbol], , , SEQ_LITERAL0);
427 				rc_bit_case(probs[symbol], , , SEQ_LITERAL1);
428 				rc_bit_case(probs[symbol], , , SEQ_LITERAL2);
429 				rc_bit_case(probs[symbol], , , SEQ_LITERAL3);
430 				rc_bit_case(probs[symbol], , , SEQ_LITERAL4);
431 				rc_bit_case(probs[symbol], , , SEQ_LITERAL5);
432 				rc_bit_case(probs[symbol], , , SEQ_LITERAL6);
433 				rc_bit_case(probs[symbol], , , SEQ_LITERAL7);
434 #endif
435 			} else {
436 				// Decode literal with match byte.
437 				//
438 				// We store the byte we compare against
439 				// ("match byte") to "len" to minimize the
440 				// number of variables we need to store
441 				// between decoder calls.
442 				len = (uint32_t)(dict_get(&dict, rep0)) << 1;
443 
444 				// The usage of "offset" allows omitting some
445 				// branches, which should give tiny speed
446 				// improvement on some CPUs. "offset" gets
447 				// set to zero if match_bit didn't match.
448 				offset = 0x100;
449 
450 #ifdef HAVE_SMALL
451 	case SEQ_LITERAL_MATCHED:
452 				do {
453 					const uint32_t match_bit
454 							= len & offset;
455 					const uint32_t subcoder_index
456 							= offset + match_bit
457 							+ symbol;
458 
459 					rc_bit(probs[subcoder_index],
460 							offset &= ~match_bit,
461 							offset &= match_bit,
462 							SEQ_LITERAL_MATCHED);
463 
464 					// It seems to be faster to do this
465 					// here instead of putting it to the
466 					// beginning of the loop and then
467 					// putting the "case" in the middle
468 					// of the loop.
469 					len <<= 1;
470 
471 				} while (symbol < (1 << 8));
472 #else
473 				// Unroll the loop.
474 				uint32_t match_bit;
475 				uint32_t subcoder_index;
476 
477 #	define d(seq) \
478 		case seq: \
479 			match_bit = len & offset; \
480 			subcoder_index = offset + match_bit + symbol; \
481 			rc_bit(probs[subcoder_index], \
482 					offset &= ~match_bit, \
483 					offset &= match_bit, \
484 					seq)
485 
486 				d(SEQ_LITERAL_MATCHED0);
487 				len <<= 1;
488 				d(SEQ_LITERAL_MATCHED1);
489 				len <<= 1;
490 				d(SEQ_LITERAL_MATCHED2);
491 				len <<= 1;
492 				d(SEQ_LITERAL_MATCHED3);
493 				len <<= 1;
494 				d(SEQ_LITERAL_MATCHED4);
495 				len <<= 1;
496 				d(SEQ_LITERAL_MATCHED5);
497 				len <<= 1;
498 				d(SEQ_LITERAL_MATCHED6);
499 				len <<= 1;
500 				d(SEQ_LITERAL_MATCHED7);
501 #	undef d
502 #endif
503 			}
504 
505 			//update_literal(state);
506 			// Use a lookup table to update to literal state,
507 			// since compared to other state updates, this would
508 			// need two branches.
509 			static const lzma_lzma_state next_state[] = {
510 				STATE_LIT_LIT,
511 				STATE_LIT_LIT,
512 				STATE_LIT_LIT,
513 				STATE_LIT_LIT,
514 				STATE_MATCH_LIT_LIT,
515 				STATE_REP_LIT_LIT,
516 				STATE_SHORTREP_LIT_LIT,
517 				STATE_MATCH_LIT,
518 				STATE_REP_LIT,
519 				STATE_SHORTREP_LIT,
520 				STATE_MATCH_LIT,
521 				STATE_REP_LIT
522 			};
523 			state = next_state[state];
524 
525 	case SEQ_LITERAL_WRITE:
526 			if (unlikely(dict_put(&dict, symbol))) {
527 				coder->sequence = SEQ_LITERAL_WRITE;
528 				goto out;
529 			}
530 
531 			continue;
532 		}
533 
534 		// Instead of a new byte we are going to get a byte range
535 		// (distance and length) which will be repeated from our
536 		// output history.
537 
538 		rc_update_1(coder->is_match[state][pos_state]);
539 
540 	case SEQ_IS_REP:
541 		rc_if_0(coder->is_rep[state], SEQ_IS_REP) {
542 			// Not a repeated match
543 			rc_update_0(coder->is_rep[state]);
544 			update_match(state);
545 
546 			// The latest three match distances are kept in
547 			// memory in case there are repeated matches.
548 			rep3 = rep2;
549 			rep2 = rep1;
550 			rep1 = rep0;
551 
552 			// Decode the length of the match.
553 			len_decode(len, coder->match_len_decoder,
554 					pos_state, SEQ_MATCH_LEN);
555 
556 			// Prepare to decode the highest two bits of the
557 			// match distance.
558 			probs = coder->dist_slot[get_dist_state(len)];
559 			symbol = 1;
560 
561 #ifdef HAVE_SMALL
562 	case SEQ_DIST_SLOT:
563 			do {
564 				rc_bit(probs[symbol], , , SEQ_DIST_SLOT);
565 			} while (symbol < DIST_SLOTS);
566 #else
567 			rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT0);
568 			rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT1);
569 			rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT2);
570 			rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT3);
571 			rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT4);
572 			rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT5);
573 #endif
574 			// Get rid of the highest bit that was needed for
575 			// indexing of the probability array.
576 			symbol -= DIST_SLOTS;
577 			assert(symbol <= 63);
578 
579 			if (symbol < DIST_MODEL_START) {
580 				// Match distances [0, 3] have only two bits.
581 				rep0 = symbol;
582 			} else {
583 				// Decode the lowest [1, 29] bits of
584 				// the match distance.
585 				limit = (symbol >> 1) - 1;
586 				assert(limit >= 1 && limit <= 30);
587 				rep0 = 2 + (symbol & 1);
588 
589 				if (symbol < DIST_MODEL_END) {
590 					// Prepare to decode the low bits for
591 					// a distance of [4, 127].
592 					assert(limit <= 5);
593 					rep0 <<= limit;
594 					assert(rep0 <= 96);
595 					// -1 is fine, because we start
596 					// decoding at probs[1], not probs[0].
597 					// NOTE: This violates the C standard,
598 					// since we are doing pointer
599 					// arithmetic past the beginning of
600 					// the array.
601 					assert((int32_t)(rep0 - symbol - 1)
602 							>= -1);
603 					assert((int32_t)(rep0 - symbol - 1)
604 							<= 82);
605 					probs = coder->pos_special + rep0
606 							- symbol - 1;
607 					symbol = 1;
608 					offset = 0;
609 	case SEQ_DIST_MODEL:
610 #ifdef HAVE_SMALL
611 					do {
612 						rc_bit(probs[symbol], ,
613 							rep0 += 1U << offset,
614 							SEQ_DIST_MODEL);
615 					} while (++offset < limit);
616 #else
617 					switch (limit) {
618 					case 5:
619 						assert(offset == 0);
620 						rc_bit(probs[symbol], ,
621 							rep0 += 1U,
622 							SEQ_DIST_MODEL);
623 						++offset;
624 						--limit;
625 					case 4:
626 						rc_bit(probs[symbol], ,
627 							rep0 += 1U << offset,
628 							SEQ_DIST_MODEL);
629 						++offset;
630 						--limit;
631 					case 3:
632 						rc_bit(probs[symbol], ,
633 							rep0 += 1U << offset,
634 							SEQ_DIST_MODEL);
635 						++offset;
636 						--limit;
637 					case 2:
638 						rc_bit(probs[symbol], ,
639 							rep0 += 1U << offset,
640 							SEQ_DIST_MODEL);
641 						++offset;
642 						--limit;
643 					case 1:
644 						// We need "symbol" only for
645 						// indexing the probability
646 						// array, thus we can use
647 						// rc_bit_last() here to omit
648 						// the unneeded updating of
649 						// "symbol".
650 						rc_bit_last(probs[symbol], ,
651 							rep0 += 1U << offset,
652 							SEQ_DIST_MODEL);
653 					}
654 #endif
655 				} else {
656 					// The distance is >= 128. Decode the
657 					// lower bits without probabilities
658 					// except the lowest four bits.
659 					assert(symbol >= 14);
660 					assert(limit >= 6);
661 					limit -= ALIGN_BITS;
662 					assert(limit >= 2);
663 	case SEQ_DIRECT:
664 					// Not worth manual unrolling
665 					do {
666 						rc_direct(rep0, SEQ_DIRECT);
667 					} while (--limit > 0);
668 
669 					// Decode the lowest four bits using
670 					// probabilities.
671 					rep0 <<= ALIGN_BITS;
672 					symbol = 1;
673 #ifdef HAVE_SMALL
674 					offset = 0;
675 	case SEQ_ALIGN:
676 					do {
677 						rc_bit(coder->pos_align[
678 								symbol], ,
679 							rep0 += 1U << offset,
680 							SEQ_ALIGN);
681 					} while (++offset < ALIGN_BITS);
682 #else
683 	case SEQ_ALIGN0:
684 					rc_bit(coder->pos_align[symbol], ,
685 							rep0 += 1, SEQ_ALIGN0);
686 	case SEQ_ALIGN1:
687 					rc_bit(coder->pos_align[symbol], ,
688 							rep0 += 2, SEQ_ALIGN1);
689 	case SEQ_ALIGN2:
690 					rc_bit(coder->pos_align[symbol], ,
691 							rep0 += 4, SEQ_ALIGN2);
692 	case SEQ_ALIGN3:
693 					// Like in SEQ_DIST_MODEL, we don't
694 					// need "symbol" for anything else
695 					// than indexing the probability array.
696 					rc_bit_last(coder->pos_align[symbol], ,
697 							rep0 += 8, SEQ_ALIGN3);
698 #endif
699 
700 					if (rep0 == UINT32_MAX) {
701 						// End of payload marker was
702 						// found. It may only be
703 						// present if
704 						//   - uncompressed size is
705 						//     unknown or
706 						//   - after known uncompressed
707 						//     size amount of bytes has
708 						//     been decompressed and
709 						//     caller has indicated
710 						//     that EOPM might be used
711 						//     (it's not allowed in
712 						//     LZMA2).
713 						if (!eopm_is_valid) {
714 							ret = LZMA_DATA_ERROR;
715 							goto out;
716 						}
717 
718 	case SEQ_EOPM:
719 						// LZMA1 stream with
720 						// end-of-payload marker.
721 						rc_normalize(SEQ_EOPM);
722 						ret = rc_is_finished(rc)
723 							? LZMA_STREAM_END
724 							: LZMA_DATA_ERROR;
725 						goto out;
726 					}
727 				}
728 			}
729 
730 			// Validate the distance we just decoded.
731 			if (unlikely(!dict_is_distance_valid(&dict, rep0))) {
732 				ret = LZMA_DATA_ERROR;
733 				goto out;
734 			}
735 
736 		} else {
737 			rc_update_1(coder->is_rep[state]);
738 
739 			// Repeated match
740 			//
741 			// The match distance is a value that we have had
742 			// earlier. The latest four match distances are
743 			// available as rep0, rep1, rep2 and rep3. We will
744 			// now decode which of them is the new distance.
745 			//
746 			// There cannot be a match if we haven't produced
747 			// any output, so check that first.
748 			if (unlikely(!dict_is_distance_valid(&dict, 0))) {
749 				ret = LZMA_DATA_ERROR;
750 				goto out;
751 			}
752 
753 	case SEQ_IS_REP0:
754 			rc_if_0(coder->is_rep0[state], SEQ_IS_REP0) {
755 				rc_update_0(coder->is_rep0[state]);
756 				// The distance is rep0.
757 
758 	case SEQ_IS_REP0_LONG:
759 				rc_if_0(coder->is_rep0_long[state][pos_state],
760 						SEQ_IS_REP0_LONG) {
761 					rc_update_0(coder->is_rep0_long[
762 							state][pos_state]);
763 
764 					update_short_rep(state);
765 
766 	case SEQ_SHORTREP:
767 					if (unlikely(dict_put(&dict, dict_get(
768 							&dict, rep0)))) {
769 						coder->sequence = SEQ_SHORTREP;
770 						goto out;
771 					}
772 
773 					continue;
774 				}
775 
776 				// Repeating more than one byte at
777 				// distance of rep0.
778 				rc_update_1(coder->is_rep0_long[
779 						state][pos_state]);
780 
781 			} else {
782 				rc_update_1(coder->is_rep0[state]);
783 
784 	case SEQ_IS_REP1:
785 				// The distance is rep1, rep2 or rep3. Once
786 				// we find out which one of these three, it
787 				// is stored to rep0 and rep1, rep2 and rep3
788 				// are updated accordingly.
789 				rc_if_0(coder->is_rep1[state], SEQ_IS_REP1) {
790 					rc_update_0(coder->is_rep1[state]);
791 
792 					const uint32_t distance = rep1;
793 					rep1 = rep0;
794 					rep0 = distance;
795 
796 				} else {
797 					rc_update_1(coder->is_rep1[state]);
798 	case SEQ_IS_REP2:
799 					rc_if_0(coder->is_rep2[state],
800 							SEQ_IS_REP2) {
801 						rc_update_0(coder->is_rep2[
802 								state]);
803 
804 						const uint32_t distance = rep2;
805 						rep2 = rep1;
806 						rep1 = rep0;
807 						rep0 = distance;
808 
809 					} else {
810 						rc_update_1(coder->is_rep2[
811 								state]);
812 
813 						const uint32_t distance = rep3;
814 						rep3 = rep2;
815 						rep2 = rep1;
816 						rep1 = rep0;
817 						rep0 = distance;
818 					}
819 				}
820 			}
821 
822 			update_long_rep(state);
823 
824 			// Decode the length of the repeated match.
825 			len_decode(len, coder->rep_len_decoder,
826 					pos_state, SEQ_REP_LEN);
827 		}
828 
829 		/////////////////////////////////
830 		// Repeat from history buffer. //
831 		/////////////////////////////////
832 
833 		// The length is always between these limits. There is no way
834 		// to trigger the algorithm to set len outside this range.
835 		assert(len >= MATCH_LEN_MIN);
836 		assert(len <= MATCH_LEN_MAX);
837 
838 	case SEQ_COPY:
839 		// Repeat len bytes from distance of rep0.
840 		if (unlikely(dict_repeat(&dict, rep0, &len))) {
841 			coder->sequence = SEQ_COPY;
842 			goto out;
843 		}
844 	}
845 
846 out:
847 	// Save state
848 
849 	// NOTE: Must not copy dict.limit.
850 	dictptr->pos = dict.pos;
851 	dictptr->full = dict.full;
852 
853 	rc_from_local(coder->rc, *in_pos);
854 
855 	coder->state = state;
856 	coder->rep0 = rep0;
857 	coder->rep1 = rep1;
858 	coder->rep2 = rep2;
859 	coder->rep3 = rep3;
860 
861 	coder->probs = probs;
862 	coder->symbol = symbol;
863 	coder->limit = limit;
864 	coder->offset = offset;
865 	coder->len = len;
866 
867 	// Update the remaining amount of uncompressed data if uncompressed
868 	// size was known.
869 	if (coder->uncompressed_size != LZMA_VLI_UNKNOWN) {
870 		coder->uncompressed_size -= dict.pos - dict_start;
871 
872 		// If we have gotten all the output but the decoder wants
873 		// to write more output, the file is corrupt. There are
874 		// three SEQ values where output is produced.
875 		if (coder->uncompressed_size == 0 && ret == LZMA_OK
876 				&& (coder->sequence == SEQ_LITERAL_WRITE
877 					|| coder->sequence == SEQ_SHORTREP
878 					|| coder->sequence == SEQ_COPY))
879 			ret = LZMA_DATA_ERROR;
880 	}
881 
882 	if (ret == LZMA_STREAM_END) {
883 		// Reset the range decoder so that it is ready to reinitialize
884 		// for a new LZMA2 chunk.
885 		rc_reset(coder->rc);
886 		coder->sequence = SEQ_IS_MATCH;
887 	}
888 
889 	return ret;
890 }
891 
892 
893 
894 static void
895 lzma_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size,
896 		bool allow_eopm)
897 {
898 	lzma_lzma1_decoder *coder = coder_ptr;
899 	coder->uncompressed_size = uncompressed_size;
900 	coder->allow_eopm = allow_eopm;
901 }
902 
903 
904 static void
905 lzma_decoder_reset(void *coder_ptr, const void *opt)
906 {
907 	lzma_lzma1_decoder *coder = coder_ptr;
908 	const lzma_options_lzma *options = opt;
909 
910 	// NOTE: We assume that lc/lp/pb are valid since they were
911 	// successfully decoded with lzma_lzma_decode_properties().
912 
913 	// Calculate pos_mask. We don't need pos_bits as is for anything.
914 	coder->pos_mask = (1U << options->pb) - 1;
915 
916 	// Initialize the literal decoder.
917 	literal_init(coder->literal, options->lc, options->lp);
918 
919 	coder->literal_context_bits = options->lc;
920 	coder->literal_pos_mask = (1U << options->lp) - 1;
921 
922 	// State
923 	coder->state = STATE_LIT_LIT;
924 	coder->rep0 = 0;
925 	coder->rep1 = 0;
926 	coder->rep2 = 0;
927 	coder->rep3 = 0;
928 	coder->pos_mask = (1U << options->pb) - 1;
929 
930 	// Range decoder
931 	rc_reset(coder->rc);
932 
933 	// Bit and bittree decoders
934 	for (uint32_t i = 0; i < STATES; ++i) {
935 		for (uint32_t j = 0; j <= coder->pos_mask; ++j) {
936 			bit_reset(coder->is_match[i][j]);
937 			bit_reset(coder->is_rep0_long[i][j]);
938 		}
939 
940 		bit_reset(coder->is_rep[i]);
941 		bit_reset(coder->is_rep0[i]);
942 		bit_reset(coder->is_rep1[i]);
943 		bit_reset(coder->is_rep2[i]);
944 	}
945 
946 	for (uint32_t i = 0; i < DIST_STATES; ++i)
947 		bittree_reset(coder->dist_slot[i], DIST_SLOT_BITS);
948 
949 	for (uint32_t i = 0; i < FULL_DISTANCES - DIST_MODEL_END; ++i)
950 		bit_reset(coder->pos_special[i]);
951 
952 	bittree_reset(coder->pos_align, ALIGN_BITS);
953 
954 	// Len decoders (also bit/bittree)
955 	const uint32_t num_pos_states = 1U << options->pb;
956 	bit_reset(coder->match_len_decoder.choice);
957 	bit_reset(coder->match_len_decoder.choice2);
958 	bit_reset(coder->rep_len_decoder.choice);
959 	bit_reset(coder->rep_len_decoder.choice2);
960 
961 	for (uint32_t pos_state = 0; pos_state < num_pos_states; ++pos_state) {
962 		bittree_reset(coder->match_len_decoder.low[pos_state],
963 				LEN_LOW_BITS);
964 		bittree_reset(coder->match_len_decoder.mid[pos_state],
965 				LEN_MID_BITS);
966 
967 		bittree_reset(coder->rep_len_decoder.low[pos_state],
968 				LEN_LOW_BITS);
969 		bittree_reset(coder->rep_len_decoder.mid[pos_state],
970 				LEN_MID_BITS);
971 	}
972 
973 	bittree_reset(coder->match_len_decoder.high, LEN_HIGH_BITS);
974 	bittree_reset(coder->rep_len_decoder.high, LEN_HIGH_BITS);
975 
976 	coder->sequence = SEQ_IS_MATCH;
977 	coder->probs = NULL;
978 	coder->symbol = 0;
979 	coder->limit = 0;
980 	coder->offset = 0;
981 	coder->len = 0;
982 
983 	return;
984 }
985 
986 
987 extern lzma_ret
988 lzma_lzma_decoder_create(lzma_lz_decoder *lz, const lzma_allocator *allocator,
989 		const lzma_options_lzma *options, lzma_lz_options *lz_options)
990 {
991 	if (lz->coder == NULL) {
992 		lz->coder = lzma_alloc(sizeof(lzma_lzma1_decoder), allocator);
993 		if (lz->coder == NULL)
994 			return LZMA_MEM_ERROR;
995 
996 		lz->code = &lzma_decode;
997 		lz->reset = &lzma_decoder_reset;
998 		lz->set_uncompressed = &lzma_decoder_uncompressed;
999 	}
1000 
1001 	// All dictionary sizes are OK here. LZ decoder will take care of
1002 	// the special cases.
1003 	lz_options->dict_size = options->dict_size;
1004 	lz_options->preset_dict = options->preset_dict;
1005 	lz_options->preset_dict_size = options->preset_dict_size;
1006 
1007 	return LZMA_OK;
1008 }
1009 
1010 
1011 /// Allocate and initialize LZMA decoder. This is used only via LZ
1012 /// initialization (lzma_lzma_decoder_init() passes function pointer to
1013 /// the LZ initialization).
1014 static lzma_ret
1015 lzma_decoder_init(lzma_lz_decoder *lz, const lzma_allocator *allocator,
1016 		lzma_vli id, const void *options, lzma_lz_options *lz_options)
1017 {
1018 	if (!is_lclppb_valid(options))
1019 		return LZMA_PROG_ERROR;
1020 
1021 	lzma_vli uncomp_size = LZMA_VLI_UNKNOWN;
1022 	bool allow_eopm = true;
1023 
1024 	if (id == LZMA_FILTER_LZMA1EXT) {
1025 		const lzma_options_lzma *opt = options;
1026 
1027 		// Only one flag is supported.
1028 		if (opt->ext_flags & ~LZMA_LZMA1EXT_ALLOW_EOPM)
1029 			return LZMA_OPTIONS_ERROR;
1030 
1031 		// FIXME? Using lzma_vli instead of uint64_t is weird because
1032 		// this has nothing to do with .xz headers and variable-length
1033 		// integer encoding. On the other hand, using LZMA_VLI_UNKNOWN
1034 		// instead of UINT64_MAX is clearer when unknown size is
1035 		// meant. A problem with using lzma_vli is that now we
1036 		// allow > LZMA_VLI_MAX which is fine in this file but
1037 		// it's still confusing. Note that alone_decoder.c also
1038 		// allows > LZMA_VLI_MAX when setting uncompressed size.
1039 		uncomp_size = opt->ext_size_low
1040 				+ ((uint64_t)(opt->ext_size_high) << 32);
1041 		allow_eopm = (opt->ext_flags & LZMA_LZMA1EXT_ALLOW_EOPM) != 0
1042 				|| uncomp_size == LZMA_VLI_UNKNOWN;
1043 	}
1044 
1045 	return_if_error(lzma_lzma_decoder_create(
1046 			lz, allocator, options, lz_options));
1047 
1048 	lzma_decoder_reset(lz->coder, options);
1049 	lzma_decoder_uncompressed(lz->coder, uncomp_size, allow_eopm);
1050 
1051 	return LZMA_OK;
1052 }
1053 
1054 
1055 extern lzma_ret
1056 lzma_lzma_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
1057 		const lzma_filter_info *filters)
1058 {
1059 	// LZMA can only be the last filter in the chain. This is enforced
1060 	// by the raw_decoder initialization.
1061 	assert(filters[1].init == NULL);
1062 
1063 	return lzma_lz_decoder_init(next, allocator, filters,
1064 			&lzma_decoder_init);
1065 }
1066 
1067 
1068 extern bool
1069 lzma_lzma_lclppb_decode(lzma_options_lzma *options, uint8_t byte)
1070 {
1071 	if (byte > (4 * 5 + 4) * 9 + 8)
1072 		return true;
1073 
1074 	// See the file format specification to understand this.
1075 	options->pb = byte / (9 * 5);
1076 	byte -= options->pb * 9 * 5;
1077 	options->lp = byte / 9;
1078 	options->lc = byte - options->lp * 9;
1079 
1080 	return options->lc + options->lp > LZMA_LCLP_MAX;
1081 }
1082 
1083 
1084 extern uint64_t
1085 lzma_lzma_decoder_memusage_nocheck(const void *options)
1086 {
1087 	const lzma_options_lzma *const opt = options;
1088 	return sizeof(lzma_lzma1_decoder)
1089 			+ lzma_lz_decoder_memusage(opt->dict_size);
1090 }
1091 
1092 
1093 extern uint64_t
1094 lzma_lzma_decoder_memusage(const void *options)
1095 {
1096 	if (!is_lclppb_valid(options))
1097 		return UINT64_MAX;
1098 
1099 	return lzma_lzma_decoder_memusage_nocheck(options);
1100 }
1101 
1102 
1103 extern lzma_ret
1104 lzma_lzma_props_decode(void **options, const lzma_allocator *allocator,
1105 		const uint8_t *props, size_t props_size)
1106 {
1107 	if (props_size != 5)
1108 		return LZMA_OPTIONS_ERROR;
1109 
1110 	lzma_options_lzma *opt
1111 			= lzma_alloc(sizeof(lzma_options_lzma), allocator);
1112 	if (opt == NULL)
1113 		return LZMA_MEM_ERROR;
1114 
1115 	if (lzma_lzma_lclppb_decode(opt, props[0]))
1116 		goto error;
1117 
1118 	// All dictionary sizes are accepted, including zero. LZ decoder
1119 	// will automatically use a dictionary at least a few KiB even if
1120 	// a smaller dictionary is requested.
1121 	opt->dict_size = read32le(props + 1);
1122 
1123 	opt->preset_dict = NULL;
1124 	opt->preset_dict_size = 0;
1125 
1126 	*options = opt;
1127 
1128 	return LZMA_OK;
1129 
1130 error:
1131 	lzma_free(opt, allocator);
1132 	return LZMA_OPTIONS_ERROR;
1133 }
1134