1 /* 2 * .xz Stream decoder 3 * 4 * Author: Lasse Collin <lasse.collin@tukaani.org> 5 * 6 * This file has been put into the public domain. 7 * You can do whatever you want with this file. 8 */ 9 10 #include "xz_private.h" 11 #include "xz_stream.h" 12 13 #ifdef XZ_USE_CRC64 14 # define IS_CRC64(check_type) ((check_type) == XZ_CHECK_CRC64) 15 #else 16 # define IS_CRC64(check_type) false 17 #endif 18 19 /* Hash used to validate the Index field */ 20 struct xz_dec_hash { 21 vli_type unpadded; 22 vli_type uncompressed; 23 uint32_t crc32; 24 }; 25 26 struct xz_dec { 27 /* Position in dec_main() */ 28 enum { 29 SEQ_STREAM_HEADER, 30 SEQ_BLOCK_START, 31 SEQ_BLOCK_HEADER, 32 SEQ_BLOCK_UNCOMPRESS, 33 SEQ_BLOCK_PADDING, 34 SEQ_BLOCK_CHECK, 35 SEQ_INDEX, 36 SEQ_INDEX_PADDING, 37 SEQ_INDEX_CRC32, 38 SEQ_STREAM_FOOTER, 39 SEQ_STREAM_PADDING 40 } sequence; 41 42 /* Position in variable-length integers and Check fields */ 43 uint32_t pos; 44 45 /* Variable-length integer decoded by dec_vli() */ 46 vli_type vli; 47 48 /* Saved in_pos and out_pos */ 49 size_t in_start; 50 size_t out_start; 51 52 #ifdef XZ_USE_CRC64 53 /* CRC32 or CRC64 value in Block or CRC32 value in Index */ 54 uint64_t crc; 55 #else 56 /* CRC32 value in Block or Index */ 57 uint32_t crc; 58 #endif 59 60 /* Type of the integrity check calculated from uncompressed data */ 61 enum xz_check check_type; 62 63 /* Operation mode */ 64 enum xz_mode mode; 65 66 /* 67 * True if the next call to xz_dec_run() is allowed to return 68 * XZ_BUF_ERROR. 69 */ 70 bool allow_buf_error; 71 72 /* Information stored in Block Header */ 73 struct { 74 /* 75 * Value stored in the Compressed Size field, or 76 * VLI_UNKNOWN if Compressed Size is not present. 77 */ 78 vli_type compressed; 79 80 /* 81 * Value stored in the Uncompressed Size field, or 82 * VLI_UNKNOWN if Uncompressed Size is not present. 83 */ 84 vli_type uncompressed; 85 86 /* Size of the Block Header field */ 87 uint32_t size; 88 } block_header; 89 90 /* Information collected when decoding Blocks */ 91 struct { 92 /* Observed compressed size of the current Block */ 93 vli_type compressed; 94 95 /* Observed uncompressed size of the current Block */ 96 vli_type uncompressed; 97 98 /* Number of Blocks decoded so far */ 99 vli_type count; 100 101 /* 102 * Hash calculated from the Block sizes. This is used to 103 * validate the Index field. 104 */ 105 struct xz_dec_hash hash; 106 } block; 107 108 /* Variables needed when verifying the Index field */ 109 struct { 110 /* Position in dec_index() */ 111 enum { 112 SEQ_INDEX_COUNT, 113 SEQ_INDEX_UNPADDED, 114 SEQ_INDEX_UNCOMPRESSED 115 } sequence; 116 117 /* Size of the Index in bytes */ 118 vli_type size; 119 120 /* Number of Records (matches block.count in valid files) */ 121 vli_type count; 122 123 /* 124 * Hash calculated from the Records (matches block.hash in 125 * valid files). 126 */ 127 struct xz_dec_hash hash; 128 } index; 129 130 /* 131 * Temporary buffer needed to hold Stream Header, Block Header, 132 * and Stream Footer. The Block Header is the biggest (1 KiB) 133 * so we reserve space according to that. buf[] has to be aligned 134 * to a multiple of four bytes; the size_t variables before it 135 * should guarantee this. 136 */ 137 struct { 138 size_t pos; 139 size_t size; 140 uint8_t buf[1024]; 141 } temp; 142 143 struct xz_dec_lzma2 *lzma2; 144 145 #ifdef XZ_DEC_BCJ 146 struct xz_dec_bcj *bcj; 147 bool bcj_active; 148 #endif 149 }; 150 151 #ifdef XZ_DEC_ANY_CHECK 152 /* Sizes of the Check field with different Check IDs */ 153 static const uint8_t check_sizes[16] = { 154 0, 155 4, 4, 4, 156 8, 8, 8, 157 16, 16, 16, 158 32, 32, 32, 159 64, 64, 64 160 }; 161 #endif 162 163 /* 164 * Fill s->temp by copying data starting from b->in[b->in_pos]. Caller 165 * must have set s->temp.pos to indicate how much data we are supposed 166 * to copy into s->temp.buf. Return true once s->temp.pos has reached 167 * s->temp.size. 168 */ 169 static bool fill_temp(struct xz_dec *s, struct xz_buf *b) 170 { 171 size_t copy_size = min_t(size_t, 172 b->in_size - b->in_pos, s->temp.size - s->temp.pos); 173 174 memcpy(s->temp.buf + s->temp.pos, b->in + b->in_pos, copy_size); 175 b->in_pos += copy_size; 176 s->temp.pos += copy_size; 177 178 if (s->temp.pos == s->temp.size) { 179 s->temp.pos = 0; 180 return true; 181 } 182 183 return false; 184 } 185 186 /* Decode a variable-length integer (little-endian base-128 encoding) */ 187 static enum xz_ret dec_vli(struct xz_dec *s, const uint8_t *in, 188 size_t *in_pos, size_t in_size) 189 { 190 uint8_t byte; 191 192 if (s->pos == 0) 193 s->vli = 0; 194 195 while (*in_pos < in_size) { 196 byte = in[*in_pos]; 197 ++*in_pos; 198 199 s->vli |= (vli_type)(byte & 0x7F) << s->pos; 200 201 if ((byte & 0x80) == 0) { 202 /* Don't allow non-minimal encodings. */ 203 if (byte == 0 && s->pos != 0) 204 return XZ_DATA_ERROR; 205 206 s->pos = 0; 207 return XZ_STREAM_END; 208 } 209 210 s->pos += 7; 211 if (s->pos == 7 * VLI_BYTES_MAX) 212 return XZ_DATA_ERROR; 213 } 214 215 return XZ_OK; 216 } 217 218 /* 219 * Decode the Compressed Data field from a Block. Update and validate 220 * the observed compressed and uncompressed sizes of the Block so that 221 * they don't exceed the values possibly stored in the Block Header 222 * (validation assumes that no integer overflow occurs, since vli_type 223 * is normally uint64_t). Update the CRC32 or CRC64 value if presence of 224 * the CRC32 or CRC64 field was indicated in Stream Header. 225 * 226 * Once the decoding is finished, validate that the observed sizes match 227 * the sizes possibly stored in the Block Header. Update the hash and 228 * Block count, which are later used to validate the Index field. 229 */ 230 static enum xz_ret dec_block(struct xz_dec *s, struct xz_buf *b) 231 { 232 enum xz_ret ret; 233 234 s->in_start = b->in_pos; 235 s->out_start = b->out_pos; 236 237 #ifdef XZ_DEC_BCJ 238 if (s->bcj_active) 239 ret = xz_dec_bcj_run(s->bcj, s->lzma2, b); 240 else 241 #endif 242 ret = xz_dec_lzma2_run(s->lzma2, b); 243 244 s->block.compressed += b->in_pos - s->in_start; 245 s->block.uncompressed += b->out_pos - s->out_start; 246 247 /* 248 * There is no need to separately check for VLI_UNKNOWN, since 249 * the observed sizes are always smaller than VLI_UNKNOWN. 250 */ 251 if (s->block.compressed > s->block_header.compressed 252 || s->block.uncompressed 253 > s->block_header.uncompressed) 254 return XZ_DATA_ERROR; 255 256 if (s->check_type == XZ_CHECK_CRC32) 257 s->crc = xz_crc32(b->out + s->out_start, 258 b->out_pos - s->out_start, s->crc); 259 #ifdef XZ_USE_CRC64 260 else if (s->check_type == XZ_CHECK_CRC64) 261 s->crc = xz_crc64(b->out + s->out_start, 262 b->out_pos - s->out_start, s->crc); 263 #endif 264 265 if (ret == XZ_STREAM_END) { 266 if (s->block_header.compressed != VLI_UNKNOWN 267 && s->block_header.compressed 268 != s->block.compressed) 269 return XZ_DATA_ERROR; 270 271 if (s->block_header.uncompressed != VLI_UNKNOWN 272 && s->block_header.uncompressed 273 != s->block.uncompressed) 274 return XZ_DATA_ERROR; 275 276 s->block.hash.unpadded += s->block_header.size 277 + s->block.compressed; 278 279 #ifdef XZ_DEC_ANY_CHECK 280 s->block.hash.unpadded += check_sizes[s->check_type]; 281 #else 282 if (s->check_type == XZ_CHECK_CRC32) 283 s->block.hash.unpadded += 4; 284 else if (IS_CRC64(s->check_type)) 285 s->block.hash.unpadded += 8; 286 #endif 287 288 s->block.hash.uncompressed += s->block.uncompressed; 289 s->block.hash.crc32 = xz_crc32( 290 (const uint8_t *)&s->block.hash, 291 sizeof(s->block.hash), s->block.hash.crc32); 292 293 ++s->block.count; 294 } 295 296 return ret; 297 } 298 299 /* Update the Index size and the CRC32 value. */ 300 static void index_update(struct xz_dec *s, const struct xz_buf *b) 301 { 302 size_t in_used = b->in_pos - s->in_start; 303 s->index.size += in_used; 304 s->crc = xz_crc32(b->in + s->in_start, in_used, s->crc); 305 } 306 307 /* 308 * Decode the Number of Records, Unpadded Size, and Uncompressed Size 309 * fields from the Index field. That is, Index Padding and CRC32 are not 310 * decoded by this function. 311 * 312 * This can return XZ_OK (more input needed), XZ_STREAM_END (everything 313 * successfully decoded), or XZ_DATA_ERROR (input is corrupt). 314 */ 315 static enum xz_ret dec_index(struct xz_dec *s, struct xz_buf *b) 316 { 317 enum xz_ret ret; 318 319 do { 320 ret = dec_vli(s, b->in, &b->in_pos, b->in_size); 321 if (ret != XZ_STREAM_END) { 322 index_update(s, b); 323 return ret; 324 } 325 326 switch (s->index.sequence) { 327 case SEQ_INDEX_COUNT: 328 s->index.count = s->vli; 329 330 /* 331 * Validate that the Number of Records field 332 * indicates the same number of Records as 333 * there were Blocks in the Stream. 334 */ 335 if (s->index.count != s->block.count) 336 return XZ_DATA_ERROR; 337 338 s->index.sequence = SEQ_INDEX_UNPADDED; 339 break; 340 341 case SEQ_INDEX_UNPADDED: 342 s->index.hash.unpadded += s->vli; 343 s->index.sequence = SEQ_INDEX_UNCOMPRESSED; 344 break; 345 346 case SEQ_INDEX_UNCOMPRESSED: 347 s->index.hash.uncompressed += s->vli; 348 s->index.hash.crc32 = xz_crc32( 349 (const uint8_t *)&s->index.hash, 350 sizeof(s->index.hash), 351 s->index.hash.crc32); 352 --s->index.count; 353 s->index.sequence = SEQ_INDEX_UNPADDED; 354 break; 355 } 356 } while (s->index.count > 0); 357 358 return XZ_STREAM_END; 359 } 360 361 /* 362 * Validate that the next four or eight input bytes match the value 363 * of s->crc. s->pos must be zero when starting to validate the first byte. 364 * The "bits" argument allows using the same code for both CRC32 and CRC64. 365 */ 366 static enum xz_ret crc_validate(struct xz_dec *s, struct xz_buf *b, 367 uint32_t bits) 368 { 369 do { 370 if (b->in_pos == b->in_size) 371 return XZ_OK; 372 373 if (((s->crc >> s->pos) & 0xFF) != b->in[b->in_pos++]) 374 return XZ_DATA_ERROR; 375 376 s->pos += 8; 377 378 } while (s->pos < bits); 379 380 s->crc = 0; 381 s->pos = 0; 382 383 return XZ_STREAM_END; 384 } 385 386 #ifdef XZ_DEC_ANY_CHECK 387 /* 388 * Skip over the Check field when the Check ID is not supported. 389 * Returns true once the whole Check field has been skipped over. 390 */ 391 static bool check_skip(struct xz_dec *s, struct xz_buf *b) 392 { 393 while (s->pos < check_sizes[s->check_type]) { 394 if (b->in_pos == b->in_size) 395 return false; 396 397 ++b->in_pos; 398 ++s->pos; 399 } 400 401 s->pos = 0; 402 403 return true; 404 } 405 #endif 406 407 /* Decode the Stream Header field (the first 12 bytes of the .xz Stream). */ 408 static enum xz_ret dec_stream_header(struct xz_dec *s) 409 { 410 if (!memeq(s->temp.buf, HEADER_MAGIC, HEADER_MAGIC_SIZE)) 411 return XZ_FORMAT_ERROR; 412 413 if (xz_crc32(s->temp.buf + HEADER_MAGIC_SIZE, 2, 0) 414 != get_le32(s->temp.buf + HEADER_MAGIC_SIZE + 2)) 415 return XZ_DATA_ERROR; 416 417 if (s->temp.buf[HEADER_MAGIC_SIZE] != 0) 418 return XZ_OPTIONS_ERROR; 419 420 /* 421 * Of integrity checks, we support none (Check ID = 0), 422 * CRC32 (Check ID = 1), and optionally CRC64 (Check ID = 4). 423 * However, if XZ_DEC_ANY_CHECK is defined, we will accept other 424 * check types too, but then the check won't be verified and 425 * a warning (XZ_UNSUPPORTED_CHECK) will be given. 426 */ 427 if (s->temp.buf[HEADER_MAGIC_SIZE + 1] > XZ_CHECK_MAX) 428 return XZ_OPTIONS_ERROR; 429 430 s->check_type = s->temp.buf[HEADER_MAGIC_SIZE + 1]; 431 432 #ifdef XZ_DEC_ANY_CHECK 433 if (s->check_type > XZ_CHECK_CRC32 && !IS_CRC64(s->check_type)) 434 return XZ_UNSUPPORTED_CHECK; 435 #else 436 if (s->check_type > XZ_CHECK_CRC32 && !IS_CRC64(s->check_type)) 437 return XZ_OPTIONS_ERROR; 438 #endif 439 440 return XZ_OK; 441 } 442 443 /* Decode the Stream Footer field (the last 12 bytes of the .xz Stream) */ 444 static enum xz_ret dec_stream_footer(struct xz_dec *s) 445 { 446 if (!memeq(s->temp.buf + 10, FOOTER_MAGIC, FOOTER_MAGIC_SIZE)) 447 return XZ_DATA_ERROR; 448 449 if (xz_crc32(s->temp.buf + 4, 6, 0) != get_le32(s->temp.buf)) 450 return XZ_DATA_ERROR; 451 452 /* 453 * Validate Backward Size. Note that we never added the size of the 454 * Index CRC32 field to s->index.size, thus we use s->index.size / 4 455 * instead of s->index.size / 4 - 1. 456 */ 457 if ((s->index.size >> 2) != get_le32(s->temp.buf + 4)) 458 return XZ_DATA_ERROR; 459 460 if (s->temp.buf[8] != 0 || s->temp.buf[9] != s->check_type) 461 return XZ_DATA_ERROR; 462 463 /* 464 * Use XZ_STREAM_END instead of XZ_OK to be more convenient 465 * for the caller. 466 */ 467 return XZ_STREAM_END; 468 } 469 470 /* Decode the Block Header and initialize the filter chain. */ 471 static enum xz_ret dec_block_header(struct xz_dec *s) 472 { 473 enum xz_ret ret; 474 475 /* 476 * Validate the CRC32. We know that the temp buffer is at least 477 * eight bytes so this is safe. 478 */ 479 s->temp.size -= 4; 480 if (xz_crc32(s->temp.buf, s->temp.size, 0) 481 != get_le32(s->temp.buf + s->temp.size)) 482 return XZ_DATA_ERROR; 483 484 s->temp.pos = 2; 485 486 /* 487 * Catch unsupported Block Flags. We support only one or two filters 488 * in the chain, so we catch that with the same test. 489 */ 490 #ifdef XZ_DEC_BCJ 491 if (s->temp.buf[1] & 0x3E) 492 #else 493 if (s->temp.buf[1] & 0x3F) 494 #endif 495 return XZ_OPTIONS_ERROR; 496 497 /* Compressed Size */ 498 if (s->temp.buf[1] & 0x40) { 499 if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size) 500 != XZ_STREAM_END) 501 return XZ_DATA_ERROR; 502 503 s->block_header.compressed = s->vli; 504 } else { 505 s->block_header.compressed = VLI_UNKNOWN; 506 } 507 508 /* Uncompressed Size */ 509 if (s->temp.buf[1] & 0x80) { 510 if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size) 511 != XZ_STREAM_END) 512 return XZ_DATA_ERROR; 513 514 s->block_header.uncompressed = s->vli; 515 } else { 516 s->block_header.uncompressed = VLI_UNKNOWN; 517 } 518 519 #ifdef XZ_DEC_BCJ 520 /* If there are two filters, the first one must be a BCJ filter. */ 521 s->bcj_active = s->temp.buf[1] & 0x01; 522 if (s->bcj_active) { 523 if (s->temp.size - s->temp.pos < 2) 524 return XZ_OPTIONS_ERROR; 525 526 ret = xz_dec_bcj_reset(s->bcj, s->temp.buf[s->temp.pos++]); 527 if (ret != XZ_OK) 528 return ret; 529 530 /* 531 * We don't support custom start offset, 532 * so Size of Properties must be zero. 533 */ 534 if (s->temp.buf[s->temp.pos++] != 0x00) 535 return XZ_OPTIONS_ERROR; 536 } 537 #endif 538 539 /* Valid Filter Flags always take at least two bytes. */ 540 if (s->temp.size - s->temp.pos < 2) 541 return XZ_DATA_ERROR; 542 543 /* Filter ID = LZMA2 */ 544 if (s->temp.buf[s->temp.pos++] != 0x21) 545 return XZ_OPTIONS_ERROR; 546 547 /* Size of Properties = 1-byte Filter Properties */ 548 if (s->temp.buf[s->temp.pos++] != 0x01) 549 return XZ_OPTIONS_ERROR; 550 551 /* Filter Properties contains LZMA2 dictionary size. */ 552 if (s->temp.size - s->temp.pos < 1) 553 return XZ_DATA_ERROR; 554 555 ret = xz_dec_lzma2_reset(s->lzma2, s->temp.buf[s->temp.pos++]); 556 if (ret != XZ_OK) 557 return ret; 558 559 /* The rest must be Header Padding. */ 560 while (s->temp.pos < s->temp.size) 561 if (s->temp.buf[s->temp.pos++] != 0x00) 562 return XZ_OPTIONS_ERROR; 563 564 s->temp.pos = 0; 565 s->block.compressed = 0; 566 s->block.uncompressed = 0; 567 568 return XZ_OK; 569 } 570 571 static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b) 572 { 573 enum xz_ret ret; 574 575 /* 576 * Store the start position for the case when we are in the middle 577 * of the Index field. 578 */ 579 s->in_start = b->in_pos; 580 581 while (true) { 582 switch (s->sequence) { 583 case SEQ_STREAM_HEADER: 584 /* 585 * Stream Header is copied to s->temp, and then 586 * decoded from there. This way if the caller 587 * gives us only little input at a time, we can 588 * still keep the Stream Header decoding code 589 * simple. Similar approach is used in many places 590 * in this file. 591 */ 592 if (!fill_temp(s, b)) 593 return XZ_OK; 594 595 /* 596 * If dec_stream_header() returns 597 * XZ_UNSUPPORTED_CHECK, it is still possible 598 * to continue decoding if working in multi-call 599 * mode. Thus, update s->sequence before calling 600 * dec_stream_header(). 601 */ 602 s->sequence = SEQ_BLOCK_START; 603 604 ret = dec_stream_header(s); 605 if (ret != XZ_OK) 606 return ret; 607 608 /* Fall through */ 609 610 case SEQ_BLOCK_START: 611 /* We need one byte of input to continue. */ 612 if (b->in_pos == b->in_size) 613 return XZ_OK; 614 615 /* See if this is the beginning of the Index field. */ 616 if (b->in[b->in_pos] == 0) { 617 s->in_start = b->in_pos++; 618 s->sequence = SEQ_INDEX; 619 break; 620 } 621 622 /* 623 * Calculate the size of the Block Header and 624 * prepare to decode it. 625 */ 626 s->block_header.size 627 = ((uint32_t)b->in[b->in_pos] + 1) * 4; 628 629 s->temp.size = s->block_header.size; 630 s->temp.pos = 0; 631 s->sequence = SEQ_BLOCK_HEADER; 632 633 /* Fall through */ 634 635 case SEQ_BLOCK_HEADER: 636 if (!fill_temp(s, b)) 637 return XZ_OK; 638 639 ret = dec_block_header(s); 640 if (ret != XZ_OK) 641 return ret; 642 643 s->sequence = SEQ_BLOCK_UNCOMPRESS; 644 645 /* Fall through */ 646 647 case SEQ_BLOCK_UNCOMPRESS: 648 ret = dec_block(s, b); 649 if (ret != XZ_STREAM_END) 650 return ret; 651 652 s->sequence = SEQ_BLOCK_PADDING; 653 654 /* Fall through */ 655 656 case SEQ_BLOCK_PADDING: 657 /* 658 * Size of Compressed Data + Block Padding 659 * must be a multiple of four. We don't need 660 * s->block.compressed for anything else 661 * anymore, so we use it here to test the size 662 * of the Block Padding field. 663 */ 664 while (s->block.compressed & 3) { 665 if (b->in_pos == b->in_size) 666 return XZ_OK; 667 668 if (b->in[b->in_pos++] != 0) 669 return XZ_DATA_ERROR; 670 671 ++s->block.compressed; 672 } 673 674 s->sequence = SEQ_BLOCK_CHECK; 675 676 /* Fall through */ 677 678 case SEQ_BLOCK_CHECK: 679 if (s->check_type == XZ_CHECK_CRC32) { 680 ret = crc_validate(s, b, 32); 681 if (ret != XZ_STREAM_END) 682 return ret; 683 } 684 else if (IS_CRC64(s->check_type)) { 685 ret = crc_validate(s, b, 64); 686 if (ret != XZ_STREAM_END) 687 return ret; 688 } 689 #ifdef XZ_DEC_ANY_CHECK 690 else if (!check_skip(s, b)) { 691 return XZ_OK; 692 } 693 #endif 694 695 s->sequence = SEQ_BLOCK_START; 696 break; 697 698 case SEQ_INDEX: 699 ret = dec_index(s, b); 700 if (ret != XZ_STREAM_END) 701 return ret; 702 703 s->sequence = SEQ_INDEX_PADDING; 704 705 /* Fall through */ 706 707 case SEQ_INDEX_PADDING: 708 while ((s->index.size + (b->in_pos - s->in_start)) 709 & 3) { 710 if (b->in_pos == b->in_size) { 711 index_update(s, b); 712 return XZ_OK; 713 } 714 715 if (b->in[b->in_pos++] != 0) 716 return XZ_DATA_ERROR; 717 } 718 719 /* Finish the CRC32 value and Index size. */ 720 index_update(s, b); 721 722 /* Compare the hashes to validate the Index field. */ 723 if (!memeq(&s->block.hash, &s->index.hash, 724 sizeof(s->block.hash))) 725 return XZ_DATA_ERROR; 726 727 s->sequence = SEQ_INDEX_CRC32; 728 729 /* Fall through */ 730 731 case SEQ_INDEX_CRC32: 732 ret = crc_validate(s, b, 32); 733 if (ret != XZ_STREAM_END) 734 return ret; 735 736 s->temp.size = STREAM_HEADER_SIZE; 737 s->sequence = SEQ_STREAM_FOOTER; 738 739 /* Fall through */ 740 741 case SEQ_STREAM_FOOTER: 742 if (!fill_temp(s, b)) 743 return XZ_OK; 744 745 return dec_stream_footer(s); 746 747 case SEQ_STREAM_PADDING: 748 /* Never reached, only silencing a warning */ 749 break; 750 } 751 } 752 753 /* Never reached */ 754 } 755 756 /* 757 * xz_dec_run() is a wrapper for dec_main() to handle some special cases in 758 * multi-call and single-call decoding. 759 * 760 * In multi-call mode, we must return XZ_BUF_ERROR when it seems clear that we 761 * are not going to make any progress anymore. This is to prevent the caller 762 * from calling us infinitely when the input file is truncated or otherwise 763 * corrupt. Since zlib-style API allows that the caller fills the input buffer 764 * only when the decoder doesn't produce any new output, we have to be careful 765 * to avoid returning XZ_BUF_ERROR too easily: XZ_BUF_ERROR is returned only 766 * after the second consecutive call to xz_dec_run() that makes no progress. 767 * 768 * In single-call mode, if we couldn't decode everything and no error 769 * occurred, either the input is truncated or the output buffer is too small. 770 * Since we know that the last input byte never produces any output, we know 771 * that if all the input was consumed and decoding wasn't finished, the file 772 * must be corrupt. Otherwise the output buffer has to be too small or the 773 * file is corrupt in a way that decoding it produces too big output. 774 * 775 * If single-call decoding fails, we reset b->in_pos and b->out_pos back to 776 * their original values. This is because with some filter chains there won't 777 * be any valid uncompressed data in the output buffer unless the decoding 778 * actually succeeds (that's the price to pay of using the output buffer as 779 * the workspace). 780 */ 781 XZ_EXTERN enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b) 782 { 783 size_t in_start; 784 size_t out_start; 785 enum xz_ret ret; 786 787 if (DEC_IS_SINGLE(s->mode)) 788 xz_dec_reset(s); 789 790 in_start = b->in_pos; 791 out_start = b->out_pos; 792 ret = dec_main(s, b); 793 794 if (DEC_IS_SINGLE(s->mode)) { 795 if (ret == XZ_OK) 796 ret = b->in_pos == b->in_size 797 ? XZ_DATA_ERROR : XZ_BUF_ERROR; 798 799 if (ret != XZ_STREAM_END) { 800 b->in_pos = in_start; 801 b->out_pos = out_start; 802 } 803 804 } else if (ret == XZ_OK && in_start == b->in_pos 805 && out_start == b->out_pos) { 806 if (s->allow_buf_error) 807 ret = XZ_BUF_ERROR; 808 809 s->allow_buf_error = true; 810 } else { 811 s->allow_buf_error = false; 812 } 813 814 return ret; 815 } 816 817 #ifdef XZ_DEC_CONCATENATED 818 XZ_EXTERN enum xz_ret xz_dec_catrun(struct xz_dec *s, struct xz_buf *b, 819 int finish) 820 { 821 enum xz_ret ret; 822 823 if (DEC_IS_SINGLE(s->mode)) { 824 xz_dec_reset(s); 825 finish = true; 826 } 827 828 while (true) { 829 if (s->sequence == SEQ_STREAM_PADDING) { 830 /* 831 * Skip Stream Padding. Its size must be a multiple 832 * of four bytes which is tracked with s->pos. 833 */ 834 while (true) { 835 if (b->in_pos == b->in_size) { 836 /* 837 * Note that if we are repeatedly 838 * given no input and finish is false, 839 * we will keep returning XZ_OK even 840 * though no progress is being made. 841 * The lack of XZ_BUF_ERROR support 842 * isn't a problem here because a 843 * reasonable caller will eventually 844 * provide more input or set finish 845 * to true. 846 */ 847 if (!finish) 848 return XZ_OK; 849 850 if (s->pos != 0) 851 return XZ_DATA_ERROR; 852 853 return XZ_STREAM_END; 854 } 855 856 if (b->in[b->in_pos] != 0x00) { 857 if (s->pos != 0) 858 return XZ_DATA_ERROR; 859 860 break; 861 } 862 863 ++b->in_pos; 864 s->pos = (s->pos + 1) & 3; 865 } 866 867 /* 868 * More input remains. It should be a new Stream. 869 * 870 * In single-call mode xz_dec_run() will always call 871 * xz_dec_reset(). Thus, we need to do it here only 872 * in multi-call mode. 873 */ 874 if (DEC_IS_MULTI(s->mode)) 875 xz_dec_reset(s); 876 } 877 878 ret = xz_dec_run(s, b); 879 880 if (ret != XZ_STREAM_END) 881 break; 882 883 s->sequence = SEQ_STREAM_PADDING; 884 } 885 886 return ret; 887 } 888 #endif 889 890 XZ_EXTERN struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max) 891 { 892 struct xz_dec *s = kmalloc(sizeof(*s), GFP_KERNEL); 893 if (s == NULL) 894 return NULL; 895 896 s->mode = mode; 897 898 #ifdef XZ_DEC_BCJ 899 s->bcj = xz_dec_bcj_create(DEC_IS_SINGLE(mode)); 900 if (s->bcj == NULL) 901 goto error_bcj; 902 #endif 903 904 s->lzma2 = xz_dec_lzma2_create(mode, dict_max); 905 if (s->lzma2 == NULL) 906 goto error_lzma2; 907 908 xz_dec_reset(s); 909 return s; 910 911 error_lzma2: 912 #ifdef XZ_DEC_BCJ 913 xz_dec_bcj_end(s->bcj); 914 error_bcj: 915 #endif 916 kfree(s); 917 return NULL; 918 } 919 920 XZ_EXTERN void xz_dec_reset(struct xz_dec *s) 921 { 922 s->sequence = SEQ_STREAM_HEADER; 923 s->allow_buf_error = false; 924 s->pos = 0; 925 s->crc = 0; 926 memzero(&s->block, sizeof(s->block)); 927 memzero(&s->index, sizeof(s->index)); 928 s->temp.pos = 0; 929 s->temp.size = STREAM_HEADER_SIZE; 930 } 931 932 XZ_EXTERN void xz_dec_end(struct xz_dec *s) 933 { 934 if (s != NULL) { 935 xz_dec_lzma2_end(s->lzma2); 936 #ifdef XZ_DEC_BCJ 937 xz_dec_bcj_end(s->bcj); 938 #endif 939 kfree(s); 940 } 941 } 942