1 // SPDX-License-Identifier: 0BSD 2 3 /////////////////////////////////////////////////////////////////////////////// 4 // 5 /// \file lzma_decoder.c 6 /// \brief LZMA decoder 7 /// 8 // Authors: Igor Pavlov 9 // Lasse Collin 10 // Jia Tan 11 // 12 /////////////////////////////////////////////////////////////////////////////// 13 14 #include "lz_decoder.h" 15 #include "lzma_common.h" 16 #include "lzma_decoder.h" 17 #include "range_decoder.h" 18 19 // The macros unroll loops with switch statements. 20 // Silence warnings about missing fall-through comments. 21 #if TUKLIB_GNUC_REQ(7, 0) 22 # pragma GCC diagnostic ignored "-Wimplicit-fallthrough" 23 #endif 24 25 // Minimum number of input bytes to safely decode one LZMA symbol. 26 // The worst case is that we decode 22 bits using probabilities and 26 27 // direct bits. This may decode at maximum 20 bytes of input. 28 #define LZMA_IN_REQUIRED 20 29 30 31 // Macros for (somewhat) size-optimized code. 32 // This is used to decode the match length (how many bytes must be repeated 33 // from the dictionary). This version is used in the Resumable mode and 34 // does not unroll any loops. 35 #define len_decode(target, ld, pos_state, seq) \ 36 do { \ 37 case seq ## _CHOICE: \ 38 rc_if_0_safe(ld.choice, seq ## _CHOICE) { \ 39 rc_update_0(ld.choice); \ 40 probs = ld.low[pos_state];\ 41 limit = LEN_LOW_SYMBOLS; \ 42 target = MATCH_LEN_MIN; \ 43 } else { \ 44 rc_update_1(ld.choice); \ 45 case seq ## _CHOICE2: \ 46 rc_if_0_safe(ld.choice2, seq ## _CHOICE2) { \ 47 rc_update_0(ld.choice2); \ 48 probs = ld.mid[pos_state]; \ 49 limit = LEN_MID_SYMBOLS; \ 50 target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \ 51 } else { \ 52 rc_update_1(ld.choice2); \ 53 probs = ld.high; \ 54 limit = LEN_HIGH_SYMBOLS; \ 55 target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS \ 56 + LEN_MID_SYMBOLS; \ 57 } \ 58 } \ 59 symbol = 1; \ 60 case seq ## _BITTREE: \ 61 do { \ 62 rc_bit_safe(probs[symbol], , , seq ## _BITTREE); \ 63 } while (symbol < limit); \ 64 target += symbol - limit; \ 65 } while (0) 66 67 68 // This is the faster version of the match length decoder that does not 69 // worry about being resumable. It unrolls the bittree decoding loop. 70 #define len_decode_fast(target, ld, pos_state) \ 71 do { \ 72 symbol = 1; \ 73 rc_if_0(ld.choice) { \ 74 rc_update_0(ld.choice); \ 75 rc_bittree3(ld.low[pos_state], \ 76 -LEN_LOW_SYMBOLS + MATCH_LEN_MIN); \ 77 target = symbol; \ 78 } else { \ 79 rc_update_1(ld.choice); \ 80 rc_if_0(ld.choice2) { \ 81 rc_update_0(ld.choice2); \ 82 rc_bittree3(ld.mid[pos_state], -LEN_MID_SYMBOLS \ 83 + MATCH_LEN_MIN + LEN_LOW_SYMBOLS); \ 84 target = symbol; \ 85 } else { \ 86 rc_update_1(ld.choice2); \ 87 rc_bittree8(ld.high, -LEN_HIGH_SYMBOLS \ 88 + MATCH_LEN_MIN \ 89 + LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS); \ 90 target = symbol; \ 91 } \ 92 } \ 93 } while (0) 94 95 96 /// Length decoder probabilities; see comments in lzma_common.h. 97 typedef struct { 98 probability choice; 99 probability choice2; 100 probability low[POS_STATES_MAX][LEN_LOW_SYMBOLS]; 101 probability mid[POS_STATES_MAX][LEN_MID_SYMBOLS]; 102 probability high[LEN_HIGH_SYMBOLS]; 103 } lzma_length_decoder; 104 105 106 typedef struct { 107 /////////////////// 108 // Probabilities // 109 /////////////////// 110 111 /// Literals; see comments in lzma_common.h. 112 probability literal[LITERAL_CODERS_MAX * LITERAL_CODER_SIZE]; 113 114 /// If 1, it's a match. Otherwise it's a single 8-bit literal. 115 probability is_match[STATES][POS_STATES_MAX]; 116 117 /// If 1, it's a repeated match. The distance is one of rep0 .. rep3. 118 probability is_rep[STATES]; 119 120 /// If 0, distance of a repeated match is rep0. 121 /// Otherwise check is_rep1. 122 probability is_rep0[STATES]; 123 124 /// If 0, distance of a repeated match is rep1. 125 /// Otherwise check is_rep2. 126 probability is_rep1[STATES]; 127 128 /// If 0, distance of a repeated match is rep2. Otherwise it is rep3. 129 probability is_rep2[STATES]; 130 131 /// If 1, the repeated match has length of one byte. Otherwise 132 /// the length is decoded from rep_len_decoder. 133 probability is_rep0_long[STATES][POS_STATES_MAX]; 134 135 /// Probability tree for the highest two bits of the match distance. 136 /// There is a separate probability tree for match lengths of 137 /// 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273]. 138 probability dist_slot[DIST_STATES][DIST_SLOTS]; 139 140 /// Probability trees for additional bits for match distance when the 141 /// distance is in the range [4, 127]. 142 probability pos_special[FULL_DISTANCES - DIST_MODEL_END]; 143 144 /// Probability tree for the lowest four bits of a match distance 145 /// that is equal to or greater than 128. 146 probability pos_align[ALIGN_SIZE]; 147 148 /// Length of a normal match 149 lzma_length_decoder match_len_decoder; 150 151 /// Length of a repeated match 152 lzma_length_decoder rep_len_decoder; 153 154 /////////////////// 155 // Decoder state // 156 /////////////////// 157 158 // Range coder 159 lzma_range_decoder rc; 160 161 // Types of the most recently seen LZMA symbols 162 lzma_lzma_state state; 163 164 uint32_t rep0; ///< Distance of the latest match 165 uint32_t rep1; ///< Distance of second latest match 166 uint32_t rep2; ///< Distance of third latest match 167 uint32_t rep3; ///< Distance of fourth latest match 168 169 uint32_t pos_mask; // (1U << pb) - 1 170 uint32_t literal_context_bits; 171 uint32_t literal_mask; 172 173 /// Uncompressed size as bytes, or LZMA_VLI_UNKNOWN if end of 174 /// payload marker is expected. 175 lzma_vli uncompressed_size; 176 177 /// True if end of payload marker (EOPM) is allowed even when 178 /// uncompressed_size is known; false if EOPM must not be present. 179 /// This is ignored if uncompressed_size == LZMA_VLI_UNKNOWN. 180 bool allow_eopm; 181 182 //////////////////////////////// 183 // State of incomplete symbol // 184 //////////////////////////////// 185 186 /// Position where to continue the decoder loop 187 enum { 188 SEQ_NORMALIZE, 189 SEQ_IS_MATCH, 190 SEQ_LITERAL, 191 SEQ_LITERAL_MATCHED, 192 SEQ_LITERAL_WRITE, 193 SEQ_IS_REP, 194 SEQ_MATCH_LEN_CHOICE, 195 SEQ_MATCH_LEN_CHOICE2, 196 SEQ_MATCH_LEN_BITTREE, 197 SEQ_DIST_SLOT, 198 SEQ_DIST_MODEL, 199 SEQ_DIRECT, 200 SEQ_ALIGN, 201 SEQ_EOPM, 202 SEQ_IS_REP0, 203 SEQ_SHORTREP, 204 SEQ_IS_REP0_LONG, 205 SEQ_IS_REP1, 206 SEQ_IS_REP2, 207 SEQ_REP_LEN_CHOICE, 208 SEQ_REP_LEN_CHOICE2, 209 SEQ_REP_LEN_BITTREE, 210 SEQ_COPY, 211 } sequence; 212 213 /// Base of the current probability tree 214 probability *probs; 215 216 /// Symbol being decoded. This is also used as an index variable in 217 /// bittree decoders: probs[symbol] 218 uint32_t symbol; 219 220 /// Used as a loop termination condition on bittree decoders and 221 /// direct bits decoder. 222 uint32_t limit; 223 224 /// Matched literal decoder: 0x100 or 0 to help avoiding branches. 225 /// Bittree reverse decoders: Offset of the next bit: 1 << offset 226 uint32_t offset; 227 228 /// If decoding a literal: match byte. 229 /// If decoding a match: length of the match. 230 uint32_t len; 231 } lzma_lzma1_decoder; 232 233 234 static lzma_ret 235 lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr, 236 const uint8_t *restrict in, 237 size_t *restrict in_pos, size_t in_size) 238 { 239 lzma_lzma1_decoder *restrict coder = coder_ptr; 240 241 //////////////////// 242 // Initialization // 243 //////////////////// 244 245 { 246 const lzma_ret ret = rc_read_init( 247 &coder->rc, in, in_pos, in_size); 248 if (ret != LZMA_STREAM_END) 249 return ret; 250 } 251 252 /////////////// 253 // Variables // 254 /////////////// 255 256 // Making local copies of often-used variables improves both 257 // speed and readability. 258 259 lzma_dict dict = *dictptr; 260 261 const size_t dict_start = dict.pos; 262 263 // Range decoder 264 rc_to_local(coder->rc, *in_pos, LZMA_IN_REQUIRED); 265 266 // State 267 uint32_t state = coder->state; 268 uint32_t rep0 = coder->rep0; 269 uint32_t rep1 = coder->rep1; 270 uint32_t rep2 = coder->rep2; 271 uint32_t rep3 = coder->rep3; 272 273 const uint32_t pos_mask = coder->pos_mask; 274 275 // These variables are actually needed only if we last time ran 276 // out of input in the middle of the decoder loop. 277 probability *probs = coder->probs; 278 uint32_t symbol = coder->symbol; 279 uint32_t limit = coder->limit; 280 uint32_t offset = coder->offset; 281 uint32_t len = coder->len; 282 283 const uint32_t literal_mask = coder->literal_mask; 284 const uint32_t literal_context_bits = coder->literal_context_bits; 285 286 // Temporary variables 287 uint32_t pos_state = dict.pos & pos_mask; 288 289 lzma_ret ret = LZMA_OK; 290 291 // This is true when the next LZMA symbol is allowed to be EOPM. 292 // That is, if this is false, then EOPM is considered 293 // an invalid symbol and we will return LZMA_DATA_ERROR. 294 // 295 // EOPM is always required (not just allowed) when 296 // the uncompressed size isn't known. When uncompressed size 297 // is known, eopm_is_valid may be set to true later. 298 bool eopm_is_valid = coder->uncompressed_size == LZMA_VLI_UNKNOWN; 299 300 // If uncompressed size is known and there is enough output space 301 // to decode all the data, limit the available buffer space so that 302 // the main loop won't try to decode past the end of the stream. 303 bool might_finish_without_eopm = false; 304 if (coder->uncompressed_size != LZMA_VLI_UNKNOWN 305 && coder->uncompressed_size <= dict.limit - dict.pos) { 306 dict.limit = dict.pos + (size_t)(coder->uncompressed_size); 307 might_finish_without_eopm = true; 308 } 309 310 // The main decoder loop. The "switch" is used to resume the decoder at 311 // correct location. Once resumed, the "switch" is no longer used. 312 // The decoder loops is split into two modes: 313 // 314 // 1 - Non-resumable mode (fast). This is used when it is guaranteed 315 // there is enough input to decode the next symbol. If the output 316 // limit is reached, then the decoder loop will save the place 317 // for the resumable mode to continue. This mode is not used if 318 // HAVE_SMALL is defined. This is faster than Resumable mode 319 // because it reduces the number of branches needed and allows 320 // for more compiler optimizations. 321 // 322 // 2 - Resumable mode (slow). This is used when a previous decoder 323 // loop did not have enough space in the input or output buffers 324 // to complete. It uses sequence enum values to set remind 325 // coder->sequence where to resume in the decoder loop. This 326 // is the only mode used when HAVE_SMALL is defined. 327 328 switch (coder->sequence) 329 while (true) { 330 // Calculate new pos_state. This is skipped on the first loop 331 // since we already calculated it when setting up the local 332 // variables. 333 pos_state = dict.pos & pos_mask; 334 335 #ifndef HAVE_SMALL 336 337 /////////////////////////////// 338 // Non-resumable Mode (fast) // 339 /////////////////////////////// 340 341 // Go to Resumable mode (1) if there is not enough input to 342 // safely decode any possible LZMA symbol or (2) if the 343 // dictionary is full, which may need special checks that 344 // are only done in the Resumable mode. 345 if (unlikely(!rc_is_fast_allowed() 346 || dict.pos == dict.limit)) 347 goto slow; 348 349 // Decode the first bit from the next LZMA symbol. 350 // If the bit is a 0, then we handle it as a literal. 351 // If the bit is a 1, then it is a match of previously 352 // decoded data. 353 rc_if_0(coder->is_match[state][pos_state]) { 354 ///////////////////// 355 // Decode literal. // 356 ///////////////////// 357 358 // Update the RC that we have decoded a 0. 359 rc_update_0(coder->is_match[state][pos_state]); 360 361 // Get the correct probability array from lp and 362 // lc params. 363 probs = literal_subcoder(coder->literal, 364 literal_context_bits, literal_mask, 365 dict.pos, dict_get0(&dict)); 366 367 if (is_literal_state(state)) { 368 update_literal_normal(state); 369 370 // Decode literal without match byte. 371 rc_bittree8(probs, 0); 372 } else { 373 update_literal_matched(state); 374 375 // Decode literal with match byte. 376 rc_matched_literal(probs, 377 dict_get(&dict, rep0)); 378 } 379 380 // Write decoded literal to dictionary 381 dict_put(&dict, symbol); 382 continue; 383 } 384 385 /////////////////// 386 // Decode match. // 387 /////////////////// 388 389 // Instead of a new byte we are going to decode a 390 // distance-length pair. The distance represents how far 391 // back in the dictionary to begin copying. The length 392 // represents how many bytes to copy. 393 394 rc_update_1(coder->is_match[state][pos_state]); 395 396 rc_if_0(coder->is_rep[state]) { 397 /////////////////// 398 // Simple match. // 399 /////////////////// 400 401 // Not a repeated match. In this case, 402 // the length (how many bytes to copy) must be 403 // decoded first. Then, the distance (where to 404 // start copying) is decoded. 405 // 406 // This is also how we know when we are done 407 // decoding. If the distance decodes to UINT32_MAX, 408 // then we know to stop decoding (end of payload 409 // marker). 410 411 rc_update_0(coder->is_rep[state]); 412 update_match(state); 413 414 // The latest three match distances are kept in 415 // memory in case there are repeated matches. 416 rep3 = rep2; 417 rep2 = rep1; 418 rep1 = rep0; 419 420 // Decode the length of the match. 421 len_decode_fast(len, coder->match_len_decoder, 422 pos_state); 423 424 // Next, decode the distance into rep0. 425 426 // The next 6 bits determine how to decode the 427 // rest of the distance. 428 probs = coder->dist_slot[get_dist_state(len)]; 429 430 rc_bittree6(probs, -DIST_SLOTS); 431 assert(symbol <= 63); 432 433 if (symbol < DIST_MODEL_START) { 434 // If the decoded symbol is < DIST_MODEL_START 435 // then we use its value directly as the 436 // match distance. No other bits are needed. 437 // The only possible distance values 438 // are [0, 3]. 439 rep0 = symbol; 440 } else { 441 // Use the first two bits of symbol as the 442 // highest bits of the match distance. 443 444 // "limit" represents the number of low bits 445 // to decode. 446 limit = (symbol >> 1) - 1; 447 assert(limit >= 1 && limit <= 30); 448 rep0 = 2 + (symbol & 1); 449 450 if (symbol < DIST_MODEL_END) { 451 // When symbol is > DIST_MODEL_START, 452 // but symbol < DIST_MODEL_END, then 453 // it can decode distances between 454 // [4, 127]. 455 assert(limit <= 5); 456 rep0 <<= limit; 457 assert(rep0 <= 96); 458 459 // -1 is fine, because we start 460 // decoding at probs[1], not probs[0]. 461 // NOTE: This violates the C standard, 462 // since we are doing pointer 463 // arithmetic past the beginning of 464 // the array. 465 assert((int32_t)(rep0 - symbol - 1) 466 >= -1); 467 assert((int32_t)(rep0 - symbol - 1) 468 <= 82); 469 probs = coder->pos_special + rep0 470 - symbol - 1; 471 symbol = 1; 472 offset = 1; 473 474 // Variable number (1-5) of bits 475 // from a reverse bittree. This 476 // isn't worth manual unrolling. 477 // 478 // NOTE: Making one or many of the 479 // variables (probs, symbol, offset, 480 // or limit) local here (instead of 481 // using those declared outside the 482 // main loop) can affect code size 483 // and performance which isn't a 484 // surprise but it's not so clear 485 // what is the best. 486 do { 487 rc_bit_add_if_1(probs, 488 rep0, offset); 489 offset <<= 1; 490 } while (--limit > 0); 491 } else { 492 // The distance is >= 128. Decode the 493 // lower bits without probabilities 494 // except the lowest four bits. 495 assert(symbol >= 14); 496 assert(limit >= 6); 497 498 limit -= ALIGN_BITS; 499 assert(limit >= 2); 500 501 rc_direct(rep0, limit); 502 503 // Decode the lowest four bits using 504 // probabilities. 505 rep0 <<= ALIGN_BITS; 506 rc_bittree_rev4(coder->pos_align); 507 rep0 += symbol; 508 509 // If the end of payload marker (EOPM) 510 // is detected, jump to the safe code. 511 // The EOPM handling isn't speed 512 // critical at all. 513 // 514 // A final normalization is needed 515 // after the EOPM (there can be a 516 // dummy byte to read in some cases). 517 // If the normalization was done here 518 // in the fast code, it would need to 519 // be taken into account in the value 520 // of LZMA_IN_REQUIRED. Using the 521 // safe code allows keeping 522 // LZMA_IN_REQUIRED as 20 instead of 523 // 21. 524 if (rep0 == UINT32_MAX) 525 goto eopm; 526 } 527 } 528 529 // Validate the distance we just decoded. 530 if (unlikely(!dict_is_distance_valid(&dict, rep0))) { 531 ret = LZMA_DATA_ERROR; 532 goto out; 533 } 534 535 } else { 536 rc_update_1(coder->is_rep[state]); 537 538 ///////////////////// 539 // Repeated match. // 540 ///////////////////// 541 542 // The match distance is a value that we have decoded 543 // recently. The latest four match distances are 544 // available as rep0, rep1, rep2 and rep3. We will 545 // now decode which of them is the new distance. 546 // 547 // There cannot be a match if we haven't produced 548 // any output, so check that first. 549 if (unlikely(!dict_is_distance_valid(&dict, 0))) { 550 ret = LZMA_DATA_ERROR; 551 goto out; 552 } 553 554 rc_if_0(coder->is_rep0[state]) { 555 rc_update_0(coder->is_rep0[state]); 556 // The distance is rep0. 557 558 // Decode the next bit to determine if 1 byte 559 // should be copied from rep0 distance or 560 // if the number of bytes needs to be decoded. 561 562 // If the next bit is 0, then it is a 563 // "Short Rep Match" and only 1 bit is copied. 564 // Otherwise, the length of the match is 565 // decoded after the "else" statement. 566 rc_if_0(coder->is_rep0_long[state][pos_state]) { 567 rc_update_0(coder->is_rep0_long[ 568 state][pos_state]); 569 570 update_short_rep(state); 571 dict_put(&dict, dict_get(&dict, rep0)); 572 continue; 573 } 574 575 // Repeating more than one byte at 576 // distance of rep0. 577 rc_update_1(coder->is_rep0_long[ 578 state][pos_state]); 579 580 } else { 581 rc_update_1(coder->is_rep0[state]); 582 583 // The distance is rep1, rep2 or rep3. Once 584 // we find out which one of these three, it 585 // is stored to rep0 and rep1, rep2 and rep3 586 // are updated accordingly. There is no 587 // "Short Rep Match" option, so the length 588 // of the match must always be decoded next. 589 rc_if_0(coder->is_rep1[state]) { 590 // The distance is rep1. 591 rc_update_0(coder->is_rep1[state]); 592 593 const uint32_t distance = rep1; 594 rep1 = rep0; 595 rep0 = distance; 596 597 } else { 598 rc_update_1(coder->is_rep1[state]); 599 600 rc_if_0(coder->is_rep2[state]) { 601 // The distance is rep2. 602 rc_update_0(coder->is_rep2[ 603 state]); 604 605 const uint32_t distance = rep2; 606 rep2 = rep1; 607 rep1 = rep0; 608 rep0 = distance; 609 610 } else { 611 // The distance is rep3. 612 rc_update_1(coder->is_rep2[ 613 state]); 614 615 const uint32_t distance = rep3; 616 rep3 = rep2; 617 rep2 = rep1; 618 rep1 = rep0; 619 rep0 = distance; 620 } 621 } 622 } 623 624 update_long_rep(state); 625 626 // Decode the length of the repeated match. 627 len_decode_fast(len, coder->rep_len_decoder, 628 pos_state); 629 } 630 631 ///////////////////////////////// 632 // Repeat from history buffer. // 633 ///////////////////////////////// 634 635 // The length is always between these limits. There is no way 636 // to trigger the algorithm to set len outside this range. 637 assert(len >= MATCH_LEN_MIN); 638 assert(len <= MATCH_LEN_MAX); 639 640 // Repeat len bytes from distance of rep0. 641 if (unlikely(dict_repeat(&dict, rep0, &len))) { 642 coder->sequence = SEQ_COPY; 643 goto out; 644 } 645 646 continue; 647 648 slow: 649 #endif 650 /////////////////////////// 651 // Resumable Mode (slow) // 652 /////////////////////////// 653 654 // This is very similar to Non-resumable Mode, so most of the 655 // comments are not repeated. The main differences are: 656 // - case labels are used to resume at the correct location. 657 // - Loops are not unrolled. 658 // - Range coder macros take an extra sequence argument 659 // so they can save to coder->sequence the location to 660 // resume in case there is not enough input. 661 case SEQ_NORMALIZE: 662 case SEQ_IS_MATCH: 663 if (unlikely(might_finish_without_eopm 664 && dict.pos == dict.limit)) { 665 // In rare cases there is a useless byte that needs 666 // to be read anyway. 667 rc_normalize_safe(SEQ_NORMALIZE); 668 669 // If the range decoder state is such that we can 670 // be at the end of the LZMA stream, then the 671 // decoding is finished. 672 if (rc_is_finished(rc)) { 673 ret = LZMA_STREAM_END; 674 goto out; 675 } 676 677 // If the caller hasn't allowed EOPM to be present 678 // together with known uncompressed size, then the 679 // LZMA stream is corrupt. 680 if (!coder->allow_eopm) { 681 ret = LZMA_DATA_ERROR; 682 goto out; 683 } 684 685 // Otherwise continue decoding with the expectation 686 // that the next LZMA symbol is EOPM. 687 eopm_is_valid = true; 688 } 689 690 rc_if_0_safe(coder->is_match[state][pos_state], SEQ_IS_MATCH) { 691 ///////////////////// 692 // Decode literal. // 693 ///////////////////// 694 695 rc_update_0(coder->is_match[state][pos_state]); 696 697 probs = literal_subcoder(coder->literal, 698 literal_context_bits, literal_mask, 699 dict.pos, dict_get0(&dict)); 700 symbol = 1; 701 702 if (is_literal_state(state)) { 703 update_literal_normal(state); 704 705 // Decode literal without match byte. 706 // The "slow" version does not unroll 707 // the loop. 708 case SEQ_LITERAL: 709 do { 710 rc_bit_safe(probs[symbol], , , 711 SEQ_LITERAL); 712 } while (symbol < (1 << 8)); 713 } else { 714 update_literal_matched(state); 715 716 // Decode literal with match byte. 717 len = (uint32_t)(dict_get(&dict, rep0)) << 1; 718 719 offset = 0x100; 720 721 case SEQ_LITERAL_MATCHED: 722 do { 723 const uint32_t match_bit 724 = len & offset; 725 const uint32_t subcoder_index 726 = offset + match_bit 727 + symbol; 728 729 rc_bit_safe(probs[subcoder_index], 730 offset &= ~match_bit, 731 offset &= match_bit, 732 SEQ_LITERAL_MATCHED); 733 734 // It seems to be faster to do this 735 // here instead of putting it to the 736 // beginning of the loop and then 737 // putting the "case" in the middle 738 // of the loop. 739 len <<= 1; 740 741 } while (symbol < (1 << 8)); 742 } 743 744 case SEQ_LITERAL_WRITE: 745 if (dict_put_safe(&dict, symbol)) { 746 coder->sequence = SEQ_LITERAL_WRITE; 747 goto out; 748 } 749 750 continue; 751 } 752 753 /////////////////// 754 // Decode match. // 755 /////////////////// 756 757 rc_update_1(coder->is_match[state][pos_state]); 758 759 case SEQ_IS_REP: 760 rc_if_0_safe(coder->is_rep[state], SEQ_IS_REP) { 761 /////////////////// 762 // Simple match. // 763 /////////////////// 764 765 rc_update_0(coder->is_rep[state]); 766 update_match(state); 767 768 rep3 = rep2; 769 rep2 = rep1; 770 rep1 = rep0; 771 772 len_decode(len, coder->match_len_decoder, 773 pos_state, SEQ_MATCH_LEN); 774 775 probs = coder->dist_slot[get_dist_state(len)]; 776 symbol = 1; 777 778 case SEQ_DIST_SLOT: 779 do { 780 rc_bit_safe(probs[symbol], , , SEQ_DIST_SLOT); 781 } while (symbol < DIST_SLOTS); 782 783 symbol -= DIST_SLOTS; 784 assert(symbol <= 63); 785 786 if (symbol < DIST_MODEL_START) { 787 rep0 = symbol; 788 } else { 789 limit = (symbol >> 1) - 1; 790 assert(limit >= 1 && limit <= 30); 791 rep0 = 2 + (symbol & 1); 792 793 if (symbol < DIST_MODEL_END) { 794 assert(limit <= 5); 795 rep0 <<= limit; 796 assert(rep0 <= 96); 797 // -1 is fine, because we start 798 // decoding at probs[1], not probs[0]. 799 // NOTE: This violates the C standard, 800 // since we are doing pointer 801 // arithmetic past the beginning of 802 // the array. 803 assert((int32_t)(rep0 - symbol - 1) 804 >= -1); 805 assert((int32_t)(rep0 - symbol - 1) 806 <= 82); 807 probs = coder->pos_special + rep0 808 - symbol - 1; 809 symbol = 1; 810 offset = 0; 811 case SEQ_DIST_MODEL: 812 do { 813 rc_bit_safe(probs[symbol], , 814 rep0 += 1U << offset, 815 SEQ_DIST_MODEL); 816 } while (++offset < limit); 817 } else { 818 assert(symbol >= 14); 819 assert(limit >= 6); 820 limit -= ALIGN_BITS; 821 assert(limit >= 2); 822 case SEQ_DIRECT: 823 rc_direct_safe(rep0, limit, 824 SEQ_DIRECT); 825 826 rep0 <<= ALIGN_BITS; 827 symbol = 0; 828 offset = 1; 829 case SEQ_ALIGN: 830 do { 831 rc_bit_last_safe( 832 coder->pos_align[ 833 offset 834 + symbol], 835 , 836 symbol += offset, 837 SEQ_ALIGN); 838 offset <<= 1; 839 } while (offset < ALIGN_SIZE); 840 841 rep0 += symbol; 842 843 if (rep0 == UINT32_MAX) { 844 // End of payload marker was 845 // found. It may only be 846 // present if 847 // - uncompressed size is 848 // unknown or 849 // - after known uncompressed 850 // size amount of bytes has 851 // been decompressed and 852 // caller has indicated 853 // that EOPM might be used 854 // (it's not allowed in 855 // LZMA2). 856 #ifndef HAVE_SMALL 857 eopm: 858 #endif 859 if (!eopm_is_valid) { 860 ret = LZMA_DATA_ERROR; 861 goto out; 862 } 863 864 case SEQ_EOPM: 865 // LZMA1 stream with 866 // end-of-payload marker. 867 rc_normalize_safe(SEQ_EOPM); 868 ret = rc_is_finished(rc) 869 ? LZMA_STREAM_END 870 : LZMA_DATA_ERROR; 871 goto out; 872 } 873 } 874 } 875 876 if (unlikely(!dict_is_distance_valid(&dict, rep0))) { 877 ret = LZMA_DATA_ERROR; 878 goto out; 879 } 880 881 } else { 882 ///////////////////// 883 // Repeated match. // 884 ///////////////////// 885 886 rc_update_1(coder->is_rep[state]); 887 888 if (unlikely(!dict_is_distance_valid(&dict, 0))) { 889 ret = LZMA_DATA_ERROR; 890 goto out; 891 } 892 893 case SEQ_IS_REP0: 894 rc_if_0_safe(coder->is_rep0[state], SEQ_IS_REP0) { 895 rc_update_0(coder->is_rep0[state]); 896 897 case SEQ_IS_REP0_LONG: 898 rc_if_0_safe(coder->is_rep0_long 899 [state][pos_state], 900 SEQ_IS_REP0_LONG) { 901 rc_update_0(coder->is_rep0_long[ 902 state][pos_state]); 903 904 update_short_rep(state); 905 906 case SEQ_SHORTREP: 907 if (dict_put_safe(&dict, 908 dict_get(&dict, 909 rep0))) { 910 coder->sequence = SEQ_SHORTREP; 911 goto out; 912 } 913 914 continue; 915 } 916 917 rc_update_1(coder->is_rep0_long[ 918 state][pos_state]); 919 920 } else { 921 rc_update_1(coder->is_rep0[state]); 922 923 case SEQ_IS_REP1: 924 rc_if_0_safe(coder->is_rep1[state], SEQ_IS_REP1) { 925 rc_update_0(coder->is_rep1[state]); 926 927 const uint32_t distance = rep1; 928 rep1 = rep0; 929 rep0 = distance; 930 931 } else { 932 rc_update_1(coder->is_rep1[state]); 933 case SEQ_IS_REP2: 934 rc_if_0_safe(coder->is_rep2[state], 935 SEQ_IS_REP2) { 936 rc_update_0(coder->is_rep2[ 937 state]); 938 939 const uint32_t distance = rep2; 940 rep2 = rep1; 941 rep1 = rep0; 942 rep0 = distance; 943 944 } else { 945 rc_update_1(coder->is_rep2[ 946 state]); 947 948 const uint32_t distance = rep3; 949 rep3 = rep2; 950 rep2 = rep1; 951 rep1 = rep0; 952 rep0 = distance; 953 } 954 } 955 } 956 957 update_long_rep(state); 958 959 len_decode(len, coder->rep_len_decoder, 960 pos_state, SEQ_REP_LEN); 961 } 962 963 ///////////////////////////////// 964 // Repeat from history buffer. // 965 ///////////////////////////////// 966 967 assert(len >= MATCH_LEN_MIN); 968 assert(len <= MATCH_LEN_MAX); 969 970 case SEQ_COPY: 971 if (unlikely(dict_repeat(&dict, rep0, &len))) { 972 coder->sequence = SEQ_COPY; 973 goto out; 974 } 975 } 976 977 out: 978 // Save state 979 980 // NOTE: Must not copy dict.limit. 981 dictptr->pos = dict.pos; 982 dictptr->full = dict.full; 983 984 rc_from_local(coder->rc, *in_pos); 985 986 coder->state = state; 987 coder->rep0 = rep0; 988 coder->rep1 = rep1; 989 coder->rep2 = rep2; 990 coder->rep3 = rep3; 991 992 coder->probs = probs; 993 coder->symbol = symbol; 994 coder->limit = limit; 995 coder->offset = offset; 996 coder->len = len; 997 998 // Update the remaining amount of uncompressed data if uncompressed 999 // size was known. 1000 if (coder->uncompressed_size != LZMA_VLI_UNKNOWN) { 1001 coder->uncompressed_size -= dict.pos - dict_start; 1002 1003 // If we have gotten all the output but the decoder wants 1004 // to write more output, the file is corrupt. There are 1005 // three SEQ values where output is produced. 1006 if (coder->uncompressed_size == 0 && ret == LZMA_OK 1007 && (coder->sequence == SEQ_LITERAL_WRITE 1008 || coder->sequence == SEQ_SHORTREP 1009 || coder->sequence == SEQ_COPY)) 1010 ret = LZMA_DATA_ERROR; 1011 } 1012 1013 if (ret == LZMA_STREAM_END) { 1014 // Reset the range decoder so that it is ready to reinitialize 1015 // for a new LZMA2 chunk. 1016 rc_reset(coder->rc); 1017 coder->sequence = SEQ_IS_MATCH; 1018 } 1019 1020 return ret; 1021 } 1022 1023 1024 static void 1025 lzma_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size, 1026 bool allow_eopm) 1027 { 1028 lzma_lzma1_decoder *coder = coder_ptr; 1029 coder->uncompressed_size = uncompressed_size; 1030 coder->allow_eopm = allow_eopm; 1031 } 1032 1033 1034 static void 1035 lzma_decoder_reset(void *coder_ptr, const void *opt) 1036 { 1037 lzma_lzma1_decoder *coder = coder_ptr; 1038 const lzma_options_lzma *options = opt; 1039 1040 // NOTE: We assume that lc/lp/pb are valid since they were 1041 // successfully decoded with lzma_lzma_decode_properties(). 1042 1043 // Calculate pos_mask. We don't need pos_bits as is for anything. 1044 coder->pos_mask = (1U << options->pb) - 1; 1045 1046 // Initialize the literal decoder. 1047 literal_init(coder->literal, options->lc, options->lp); 1048 1049 coder->literal_context_bits = options->lc; 1050 coder->literal_mask = literal_mask_calc(options->lc, options->lp); 1051 1052 // State 1053 coder->state = STATE_LIT_LIT; 1054 coder->rep0 = 0; 1055 coder->rep1 = 0; 1056 coder->rep2 = 0; 1057 coder->rep3 = 0; 1058 coder->pos_mask = (1U << options->pb) - 1; 1059 1060 // Range decoder 1061 rc_reset(coder->rc); 1062 1063 // Bit and bittree decoders 1064 for (uint32_t i = 0; i < STATES; ++i) { 1065 for (uint32_t j = 0; j <= coder->pos_mask; ++j) { 1066 bit_reset(coder->is_match[i][j]); 1067 bit_reset(coder->is_rep0_long[i][j]); 1068 } 1069 1070 bit_reset(coder->is_rep[i]); 1071 bit_reset(coder->is_rep0[i]); 1072 bit_reset(coder->is_rep1[i]); 1073 bit_reset(coder->is_rep2[i]); 1074 } 1075 1076 for (uint32_t i = 0; i < DIST_STATES; ++i) 1077 bittree_reset(coder->dist_slot[i], DIST_SLOT_BITS); 1078 1079 for (uint32_t i = 0; i < FULL_DISTANCES - DIST_MODEL_END; ++i) 1080 bit_reset(coder->pos_special[i]); 1081 1082 bittree_reset(coder->pos_align, ALIGN_BITS); 1083 1084 // Len decoders (also bit/bittree) 1085 const uint32_t num_pos_states = 1U << options->pb; 1086 bit_reset(coder->match_len_decoder.choice); 1087 bit_reset(coder->match_len_decoder.choice2); 1088 bit_reset(coder->rep_len_decoder.choice); 1089 bit_reset(coder->rep_len_decoder.choice2); 1090 1091 for (uint32_t pos_state = 0; pos_state < num_pos_states; ++pos_state) { 1092 bittree_reset(coder->match_len_decoder.low[pos_state], 1093 LEN_LOW_BITS); 1094 bittree_reset(coder->match_len_decoder.mid[pos_state], 1095 LEN_MID_BITS); 1096 1097 bittree_reset(coder->rep_len_decoder.low[pos_state], 1098 LEN_LOW_BITS); 1099 bittree_reset(coder->rep_len_decoder.mid[pos_state], 1100 LEN_MID_BITS); 1101 } 1102 1103 bittree_reset(coder->match_len_decoder.high, LEN_HIGH_BITS); 1104 bittree_reset(coder->rep_len_decoder.high, LEN_HIGH_BITS); 1105 1106 coder->sequence = SEQ_IS_MATCH; 1107 coder->probs = NULL; 1108 coder->symbol = 0; 1109 coder->limit = 0; 1110 coder->offset = 0; 1111 coder->len = 0; 1112 1113 return; 1114 } 1115 1116 1117 extern lzma_ret 1118 lzma_lzma_decoder_create(lzma_lz_decoder *lz, const lzma_allocator *allocator, 1119 const lzma_options_lzma *options, lzma_lz_options *lz_options) 1120 { 1121 if (lz->coder == NULL) { 1122 lz->coder = lzma_alloc(sizeof(lzma_lzma1_decoder), allocator); 1123 if (lz->coder == NULL) 1124 return LZMA_MEM_ERROR; 1125 1126 lz->code = &lzma_decode; 1127 lz->reset = &lzma_decoder_reset; 1128 lz->set_uncompressed = &lzma_decoder_uncompressed; 1129 } 1130 1131 // All dictionary sizes are OK here. LZ decoder will take care of 1132 // the special cases. 1133 lz_options->dict_size = options->dict_size; 1134 lz_options->preset_dict = options->preset_dict; 1135 lz_options->preset_dict_size = options->preset_dict_size; 1136 1137 return LZMA_OK; 1138 } 1139 1140 1141 /// Allocate and initialize LZMA decoder. This is used only via LZ 1142 /// initialization (lzma_lzma_decoder_init() passes function pointer to 1143 /// the LZ initialization). 1144 static lzma_ret 1145 lzma_decoder_init(lzma_lz_decoder *lz, const lzma_allocator *allocator, 1146 lzma_vli id, const void *options, lzma_lz_options *lz_options) 1147 { 1148 if (!is_lclppb_valid(options)) 1149 return LZMA_PROG_ERROR; 1150 1151 lzma_vli uncomp_size = LZMA_VLI_UNKNOWN; 1152 bool allow_eopm = true; 1153 1154 if (id == LZMA_FILTER_LZMA1EXT) { 1155 const lzma_options_lzma *opt = options; 1156 1157 // Only one flag is supported. 1158 if (opt->ext_flags & ~LZMA_LZMA1EXT_ALLOW_EOPM) 1159 return LZMA_OPTIONS_ERROR; 1160 1161 // FIXME? Using lzma_vli instead of uint64_t is weird because 1162 // this has nothing to do with .xz headers and variable-length 1163 // integer encoding. On the other hand, using LZMA_VLI_UNKNOWN 1164 // instead of UINT64_MAX is clearer when unknown size is 1165 // meant. A problem with using lzma_vli is that now we 1166 // allow > LZMA_VLI_MAX which is fine in this file but 1167 // it's still confusing. Note that alone_decoder.c also 1168 // allows > LZMA_VLI_MAX when setting uncompressed size. 1169 uncomp_size = opt->ext_size_low 1170 + ((uint64_t)(opt->ext_size_high) << 32); 1171 allow_eopm = (opt->ext_flags & LZMA_LZMA1EXT_ALLOW_EOPM) != 0 1172 || uncomp_size == LZMA_VLI_UNKNOWN; 1173 } 1174 1175 return_if_error(lzma_lzma_decoder_create( 1176 lz, allocator, options, lz_options)); 1177 1178 lzma_decoder_reset(lz->coder, options); 1179 lzma_decoder_uncompressed(lz->coder, uncomp_size, allow_eopm); 1180 1181 return LZMA_OK; 1182 } 1183 1184 1185 extern lzma_ret 1186 lzma_lzma_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, 1187 const lzma_filter_info *filters) 1188 { 1189 // LZMA can only be the last filter in the chain. This is enforced 1190 // by the raw_decoder initialization. 1191 assert(filters[1].init == NULL); 1192 1193 return lzma_lz_decoder_init(next, allocator, filters, 1194 &lzma_decoder_init); 1195 } 1196 1197 1198 extern bool 1199 lzma_lzma_lclppb_decode(lzma_options_lzma *options, uint8_t byte) 1200 { 1201 if (byte > (4 * 5 + 4) * 9 + 8) 1202 return true; 1203 1204 // See the file format specification to understand this. 1205 options->pb = byte / (9 * 5); 1206 byte -= options->pb * 9 * 5; 1207 options->lp = byte / 9; 1208 options->lc = byte - options->lp * 9; 1209 1210 return options->lc + options->lp > LZMA_LCLP_MAX; 1211 } 1212 1213 1214 extern uint64_t 1215 lzma_lzma_decoder_memusage_nocheck(const void *options) 1216 { 1217 const lzma_options_lzma *const opt = options; 1218 return sizeof(lzma_lzma1_decoder) 1219 + lzma_lz_decoder_memusage(opt->dict_size); 1220 } 1221 1222 1223 extern uint64_t 1224 lzma_lzma_decoder_memusage(const void *options) 1225 { 1226 if (!is_lclppb_valid(options)) 1227 return UINT64_MAX; 1228 1229 return lzma_lzma_decoder_memusage_nocheck(options); 1230 } 1231 1232 1233 extern lzma_ret 1234 lzma_lzma_props_decode(void **options, const lzma_allocator *allocator, 1235 const uint8_t *props, size_t props_size) 1236 { 1237 if (props_size != 5) 1238 return LZMA_OPTIONS_ERROR; 1239 1240 lzma_options_lzma *opt 1241 = lzma_alloc(sizeof(lzma_options_lzma), allocator); 1242 if (opt == NULL) 1243 return LZMA_MEM_ERROR; 1244 1245 if (lzma_lzma_lclppb_decode(opt, props[0])) 1246 goto error; 1247 1248 // All dictionary sizes are accepted, including zero. LZ decoder 1249 // will automatically use a dictionary at least a few KiB even if 1250 // a smaller dictionary is requested. 1251 opt->dict_size = read32le(props + 1); 1252 1253 opt->preset_dict = NULL; 1254 opt->preset_dict_size = 0; 1255 1256 *options = opt; 1257 1258 return LZMA_OK; 1259 1260 error: 1261 lzma_free(opt, allocator); 1262 return LZMA_OPTIONS_ERROR; 1263 } 1264