1 /////////////////////////////////////////////////////////////////////////////// 2 // 3 /// \file file_info.c 4 /// \brief Decode .xz file information into a lzma_index structure 5 // 6 // Author: Lasse Collin 7 // 8 // This file has been put into the public domain. 9 // You can do whatever you want with this file. 10 // 11 /////////////////////////////////////////////////////////////////////////////// 12 13 #include "index_decoder.h" 14 15 16 typedef struct { 17 enum { 18 SEQ_MAGIC_BYTES, 19 SEQ_PADDING_SEEK, 20 SEQ_PADDING_DECODE, 21 SEQ_FOOTER, 22 SEQ_INDEX_INIT, 23 SEQ_INDEX_DECODE, 24 SEQ_HEADER_DECODE, 25 SEQ_HEADER_COMPARE, 26 } sequence; 27 28 /// Absolute position of in[*in_pos] in the file. All code that 29 /// modifies *in_pos also updates this. seek_to_pos() needs this 30 /// to determine if we need to request the application to seek for 31 /// us or if we can do the seeking internally by adjusting *in_pos. 32 uint64_t file_cur_pos; 33 34 /// This refers to absolute positions of interesting parts of the 35 /// input file. Sometimes it points to the *beginning* of a specific 36 /// field and sometimes to the *end* of a field. The current target 37 /// position at each moment is explained in the comments. 38 uint64_t file_target_pos; 39 40 /// Size of the .xz file (from the application). 41 uint64_t file_size; 42 43 /// Index decoder 44 lzma_next_coder index_decoder; 45 46 /// Number of bytes remaining in the Index field that is currently 47 /// being decoded. 48 lzma_vli index_remaining; 49 50 /// The Index decoder will store the decoded Index in this pointer. 51 lzma_index *this_index; 52 53 /// Amount of Stream Padding in the current Stream. 54 lzma_vli stream_padding; 55 56 /// The final combined index is collected here. 57 lzma_index *combined_index; 58 59 /// Pointer from the application where to store the index information 60 /// after successful decoding. 61 lzma_index **dest_index; 62 63 /// Pointer to lzma_stream.seek_pos to be used when returning 64 /// LZMA_SEEK_NEEDED. This is set by seek_to_pos() when needed. 65 uint64_t *external_seek_pos; 66 67 /// Memory usage limit 68 uint64_t memlimit; 69 70 /// Stream Flags from the very beginning of the file. 71 lzma_stream_flags first_header_flags; 72 73 /// Stream Flags from Stream Header of the current Stream. 74 lzma_stream_flags header_flags; 75 76 /// Stream Flags from Stream Footer of the current Stream. 77 lzma_stream_flags footer_flags; 78 79 size_t temp_pos; 80 size_t temp_size; 81 uint8_t temp[8192]; 82 83 } lzma_file_info_coder; 84 85 86 /// Copies data from in[*in_pos] into coder->temp until 87 /// coder->temp_pos == coder->temp_size. This also keeps coder->file_cur_pos 88 /// in sync with *in_pos. Returns true if more input is needed. 89 static bool 90 fill_temp(lzma_file_info_coder *coder, const uint8_t *restrict in, 91 size_t *restrict in_pos, size_t in_size) 92 { 93 coder->file_cur_pos += lzma_bufcpy(in, in_pos, in_size, 94 coder->temp, &coder->temp_pos, coder->temp_size); 95 return coder->temp_pos < coder->temp_size; 96 } 97 98 99 /// Seeks to the absolute file position specified by target_pos. 100 /// This tries to do the seeking by only modifying *in_pos, if possible. 101 /// The main benefit of this is that if one passes the whole file at once 102 /// to lzma_code(), the decoder will never need to return LZMA_SEEK_NEEDED 103 /// as all the seeking can be done by adjusting *in_pos in this function. 104 /// 105 /// Returns true if an external seek is needed and the caller must return 106 /// LZMA_SEEK_NEEDED. 107 static bool 108 seek_to_pos(lzma_file_info_coder *coder, uint64_t target_pos, 109 size_t in_start, size_t *in_pos, size_t in_size) 110 { 111 // The input buffer doesn't extend beyond the end of the file. 112 // This has been checked by file_info_decode() already. 113 assert(coder->file_size - coder->file_cur_pos >= in_size - *in_pos); 114 115 const uint64_t pos_min = coder->file_cur_pos - (*in_pos - in_start); 116 const uint64_t pos_max = coder->file_cur_pos + (in_size - *in_pos); 117 118 bool external_seek_needed; 119 120 if (target_pos >= pos_min && target_pos <= pos_max) { 121 // The requested position is available in the current input 122 // buffer or right after it. That is, in a corner case we 123 // end up setting *in_pos == in_size and thus will immediately 124 // need new input bytes from the application. 125 *in_pos += (size_t)(target_pos - coder->file_cur_pos); 126 external_seek_needed = false; 127 } else { 128 // Ask the application to seek the input file. 129 *coder->external_seek_pos = target_pos; 130 external_seek_needed = true; 131 132 // Mark the whole input buffer as used. This way 133 // lzma_stream.total_in will have a better estimate 134 // of the amount of data read. It still won't be perfect 135 // as the value will depend on the input buffer size that 136 // the application uses, but it should be good enough for 137 // those few who want an estimate. 138 *in_pos = in_size; 139 } 140 141 // After seeking (internal or external) the current position 142 // will match the requested target position. 143 coder->file_cur_pos = target_pos; 144 145 return external_seek_needed; 146 } 147 148 149 /// The caller sets coder->file_target_pos so that it points to the *end* 150 /// of the desired file position. This function then determines how far 151 /// backwards from that position we can seek. After seeking fill_temp() 152 /// can be used to read data into coder->temp. When fill_temp() has finished, 153 /// coder->temp[coder->temp_size] will match coder->file_target_pos. 154 /// 155 /// This also validates that coder->target_file_pos is sane in sense that 156 /// we aren't trying to seek too far backwards (too close or beyond the 157 /// beginning of the file). 158 static lzma_ret 159 reverse_seek(lzma_file_info_coder *coder, 160 size_t in_start, size_t *in_pos, size_t in_size) 161 { 162 // Check that there is enough data before the target position 163 // to contain at least Stream Header and Stream Footer. If there 164 // isn't, the file cannot be valid. 165 if (coder->file_target_pos < 2 * LZMA_STREAM_HEADER_SIZE) 166 return LZMA_DATA_ERROR; 167 168 coder->temp_pos = 0; 169 170 // The Stream Header at the very beginning of the file gets handled 171 // specially in SEQ_MAGIC_BYTES and thus we will never need to seek 172 // there. By not seeking to the first LZMA_STREAM_HEADER_SIZE bytes 173 // we avoid a useless external seek after SEQ_MAGIC_BYTES if the 174 // application uses an extremely small input buffer and the input 175 // file is very small. 176 if (coder->file_target_pos - LZMA_STREAM_HEADER_SIZE 177 < sizeof(coder->temp)) 178 coder->temp_size = (size_t)(coder->file_target_pos 179 - LZMA_STREAM_HEADER_SIZE); 180 else 181 coder->temp_size = sizeof(coder->temp); 182 183 // The above if-statements guarantee this. This is important because 184 // the Stream Header/Footer decoders assume that there's at least 185 // LZMA_STREAM_HEADER_SIZE bytes in coder->temp. 186 assert(coder->temp_size >= LZMA_STREAM_HEADER_SIZE); 187 188 if (seek_to_pos(coder, coder->file_target_pos - coder->temp_size, 189 in_start, in_pos, in_size)) 190 return LZMA_SEEK_NEEDED; 191 192 return LZMA_OK; 193 } 194 195 196 /// Gets the number of zero-bytes at the end of the buffer. 197 static size_t 198 get_padding_size(const uint8_t *buf, size_t buf_size) 199 { 200 size_t padding = 0; 201 while (buf_size > 0 && buf[--buf_size] == 0x00) 202 ++padding; 203 204 return padding; 205 } 206 207 208 /// With the Stream Header at the very beginning of the file, LZMA_FORMAT_ERROR 209 /// is used to tell the application that Magic Bytes didn't match. In other 210 /// Stream Header/Footer fields (in the middle/end of the file) it could be 211 /// a bit confusing to return LZMA_FORMAT_ERROR as we already know that there 212 /// is a valid Stream Header at the beginning of the file. For those cases 213 /// this function is used to convert LZMA_FORMAT_ERROR to LZMA_DATA_ERROR. 214 static lzma_ret 215 hide_format_error(lzma_ret ret) 216 { 217 if (ret == LZMA_FORMAT_ERROR) 218 ret = LZMA_DATA_ERROR; 219 220 return ret; 221 } 222 223 224 /// Calls the Index decoder and updates coder->index_remaining. 225 /// This is a separate function because the input can be either directly 226 /// from the application or from coder->temp. 227 static lzma_ret 228 decode_index(lzma_file_info_coder *coder, const lzma_allocator *allocator, 229 const uint8_t *restrict in, size_t *restrict in_pos, 230 size_t in_size, bool update_file_cur_pos) 231 { 232 const size_t in_start = *in_pos; 233 234 const lzma_ret ret = coder->index_decoder.code( 235 coder->index_decoder.coder, 236 allocator, in, in_pos, in_size, 237 NULL, NULL, 0, LZMA_RUN); 238 239 coder->index_remaining -= *in_pos - in_start; 240 241 if (update_file_cur_pos) 242 coder->file_cur_pos += *in_pos - in_start; 243 244 return ret; 245 } 246 247 248 static lzma_ret 249 file_info_decode(void *coder_ptr, const lzma_allocator *allocator, 250 const uint8_t *restrict in, size_t *restrict in_pos, 251 size_t in_size, 252 uint8_t *restrict out lzma_attribute((__unused__)), 253 size_t *restrict out_pos lzma_attribute((__unused__)), 254 size_t out_size lzma_attribute((__unused__)), 255 lzma_action action lzma_attribute((__unused__))) 256 { 257 lzma_file_info_coder *coder = coder_ptr; 258 const size_t in_start = *in_pos; 259 260 // If the caller provides input past the end of the file, trim 261 // the extra bytes from the buffer so that we won't read too far. 262 assert(coder->file_size >= coder->file_cur_pos); 263 if (coder->file_size - coder->file_cur_pos < in_size - in_start) 264 in_size = in_start 265 + (size_t)(coder->file_size - coder->file_cur_pos); 266 267 while (true) 268 switch (coder->sequence) { 269 case SEQ_MAGIC_BYTES: 270 // Decode the Stream Header at the beginning of the file 271 // first to check if the Magic Bytes match. The flags 272 // are stored in coder->first_header_flags so that we 273 // don't need to seek to it again. 274 // 275 // Check that the file is big enough to contain at least 276 // Stream Header. 277 if (coder->file_size < LZMA_STREAM_HEADER_SIZE) 278 return LZMA_FORMAT_ERROR; 279 280 // Read the Stream Header field into coder->temp. 281 if (fill_temp(coder, in, in_pos, in_size)) 282 return LZMA_OK; 283 284 // This is the only Stream Header/Footer decoding where we 285 // want to return LZMA_FORMAT_ERROR if the Magic Bytes don't 286 // match. Elsewhere it will be converted to LZMA_DATA_ERROR. 287 return_if_error(lzma_stream_header_decode( 288 &coder->first_header_flags, coder->temp)); 289 290 // Now that we know that the Magic Bytes match, check the 291 // file size. It's better to do this here after checking the 292 // Magic Bytes since this way we can give LZMA_FORMAT_ERROR 293 // instead of LZMA_DATA_ERROR when the Magic Bytes don't 294 // match in a file that is too big or isn't a multiple of 295 // four bytes. 296 if (coder->file_size > LZMA_VLI_MAX || (coder->file_size & 3)) 297 return LZMA_DATA_ERROR; 298 299 // Start looking for Stream Padding and Stream Footer 300 // at the end of the file. 301 coder->file_target_pos = coder->file_size; 302 303 // Fall through 304 305 case SEQ_PADDING_SEEK: 306 coder->sequence = SEQ_PADDING_DECODE; 307 return_if_error(reverse_seek( 308 coder, in_start, in_pos, in_size)); 309 310 // Fall through 311 312 case SEQ_PADDING_DECODE: { 313 // Copy to coder->temp first. This keeps the code simpler if 314 // the application only provides input a few bytes at a time. 315 if (fill_temp(coder, in, in_pos, in_size)) 316 return LZMA_OK; 317 318 // Scan the buffer backwards to get the size of the 319 // Stream Padding field (if any). 320 const size_t new_padding = get_padding_size( 321 coder->temp, coder->temp_size); 322 coder->stream_padding += new_padding; 323 324 // Set the target position to the beginning of Stream Padding 325 // that has been observed so far. If all Stream Padding has 326 // been seen, then the target position will be at the end 327 // of the Stream Footer field. 328 coder->file_target_pos -= new_padding; 329 330 if (new_padding == coder->temp_size) { 331 // The whole buffer was padding. Seek backwards in 332 // the file to get more input. 333 coder->sequence = SEQ_PADDING_SEEK; 334 break; 335 } 336 337 // Size of Stream Padding must be a multiple of 4 bytes. 338 if (coder->stream_padding & 3) 339 return LZMA_DATA_ERROR; 340 341 coder->sequence = SEQ_FOOTER; 342 343 // Calculate the amount of non-padding data in coder->temp. 344 coder->temp_size -= new_padding; 345 coder->temp_pos = coder->temp_size; 346 347 // We can avoid an external seek if the whole Stream Footer 348 // is already in coder->temp. In that case SEQ_FOOTER won't 349 // read more input and will find the Stream Footer from 350 // coder->temp[coder->temp_size - LZMA_STREAM_HEADER_SIZE]. 351 // 352 // Otherwise we will need to seek. The seeking is done so 353 // that Stream Footer wil be at the end of coder->temp. 354 // This way it's likely that we also get a complete Index 355 // field into coder->temp without needing a separate seek 356 // for that (unless the Index field is big). 357 if (coder->temp_size < LZMA_STREAM_HEADER_SIZE) 358 return_if_error(reverse_seek( 359 coder, in_start, in_pos, in_size)); 360 } 361 362 // Fall through 363 364 case SEQ_FOOTER: 365 // Copy the Stream Footer field into coder->temp. 366 // If Stream Footer was already available in coder->temp 367 // in SEQ_PADDING_DECODE, then this does nothing. 368 if (fill_temp(coder, in, in_pos, in_size)) 369 return LZMA_OK; 370 371 // Make coder->file_target_pos and coder->temp_size point 372 // to the beginning of Stream Footer and thus to the end 373 // of the Index field. coder->temp_pos will be updated 374 // a bit later. 375 coder->file_target_pos -= LZMA_STREAM_HEADER_SIZE; 376 coder->temp_size -= LZMA_STREAM_HEADER_SIZE; 377 378 // Decode Stream Footer. 379 return_if_error(hide_format_error(lzma_stream_footer_decode( 380 &coder->footer_flags, 381 coder->temp + coder->temp_size))); 382 383 // Check that we won't seek past the beginning of the file. 384 // 385 // LZMA_STREAM_HEADER_SIZE is added because there must be 386 // space for Stream Header too even though we won't seek 387 // there before decoding the Index field. 388 // 389 // There's no risk of integer overflow here because 390 // Backward Size cannot be greater than 2^34. 391 if (coder->file_target_pos < coder->footer_flags.backward_size 392 + LZMA_STREAM_HEADER_SIZE) 393 return LZMA_DATA_ERROR; 394 395 // Set the target position to the beginning of the Index field. 396 coder->file_target_pos -= coder->footer_flags.backward_size; 397 coder->sequence = SEQ_INDEX_INIT; 398 399 // We can avoid an external seek if the whole Index field is 400 // already available in coder->temp. 401 if (coder->temp_size >= coder->footer_flags.backward_size) { 402 // Set coder->temp_pos to point to the beginning 403 // of the Index. 404 coder->temp_pos = coder->temp_size 405 - coder->footer_flags.backward_size; 406 } else { 407 // These are set to zero to indicate that there's no 408 // useful data (Index or anything else) in coder->temp. 409 coder->temp_pos = 0; 410 coder->temp_size = 0; 411 412 // Seek to the beginning of the Index field. 413 if (seek_to_pos(coder, coder->file_target_pos, 414 in_start, in_pos, in_size)) 415 return LZMA_SEEK_NEEDED; 416 } 417 418 // Fall through 419 420 case SEQ_INDEX_INIT: { 421 // Calculate the amount of memory already used by the earlier 422 // Indexes so that we know how big memory limit to pass to 423 // the Index decoder. 424 // 425 // NOTE: When there are multiple Streams, the separate 426 // lzma_index structures can use more RAM (as measured by 427 // lzma_index_memused()) than the final combined lzma_index. 428 // Thus memlimit may need to be slightly higher than the final 429 // calculated memory usage will be. This is perhaps a bit 430 // confusing to the application, but I think it shouldn't 431 // cause problems in practice. 432 uint64_t memused = 0; 433 if (coder->combined_index != NULL) { 434 memused = lzma_index_memused(coder->combined_index); 435 assert(memused <= coder->memlimit); 436 if (memused > coder->memlimit) // Extra sanity check 437 return LZMA_PROG_ERROR; 438 } 439 440 // Initialize the Index decoder. 441 return_if_error(lzma_index_decoder_init( 442 &coder->index_decoder, allocator, 443 &coder->this_index, 444 coder->memlimit - memused)); 445 446 coder->index_remaining = coder->footer_flags.backward_size; 447 coder->sequence = SEQ_INDEX_DECODE; 448 } 449 450 // Fall through 451 452 case SEQ_INDEX_DECODE: { 453 // Decode (a part of) the Index. If the whole Index is already 454 // in coder->temp, read it from there. Otherwise read from 455 // in[*in_pos] onwards. Note that index_decode() updates 456 // coder->index_remaining and optionally coder->file_cur_pos. 457 lzma_ret ret; 458 if (coder->temp_size != 0) { 459 assert(coder->temp_size - coder->temp_pos 460 == coder->index_remaining); 461 ret = decode_index(coder, allocator, coder->temp, 462 &coder->temp_pos, coder->temp_size, 463 false); 464 } else { 465 // Don't give the decoder more input than the known 466 // remaining size of the Index field. 467 size_t in_stop = in_size; 468 if (in_size - *in_pos > coder->index_remaining) 469 in_stop = *in_pos 470 + (size_t)(coder->index_remaining); 471 472 ret = decode_index(coder, allocator, 473 in, in_pos, in_stop, true); 474 } 475 476 switch (ret) { 477 case LZMA_OK: 478 // If the Index docoder asks for more input when we 479 // have already given it as much input as Backward Size 480 // indicated, the file is invalid. 481 if (coder->index_remaining == 0) 482 return LZMA_DATA_ERROR; 483 484 // We cannot get here if we were reading Index from 485 // coder->temp because when reading from coder->temp 486 // we give the Index decoder exactly 487 // coder->index_remaining bytes of input. 488 assert(coder->temp_size == 0); 489 490 return LZMA_OK; 491 492 case LZMA_STREAM_END: 493 // If the decoding seems to be successful, check also 494 // that the Index decoder consumed as much input as 495 // indicated by the Backward Size field. 496 if (coder->index_remaining != 0) 497 return LZMA_DATA_ERROR; 498 499 break; 500 501 default: 502 return ret; 503 } 504 505 // Calculate how much the Index tells us to seek backwards 506 // (relative to the beginning of the Index): Total size of 507 // all Blocks plus the size of the Stream Header field. 508 // No integer overflow here because lzma_index_total_size() 509 // cannot return a value greater than LZMA_VLI_MAX. 510 const uint64_t seek_amount 511 = lzma_index_total_size(coder->this_index) 512 + LZMA_STREAM_HEADER_SIZE; 513 514 // Check that Index is sane in sense that seek_amount won't 515 // make us seek past the beginning of the file when locating 516 // the Stream Header. 517 // 518 // coder->file_target_pos still points to the beginning of 519 // the Index field. 520 if (coder->file_target_pos < seek_amount) 521 return LZMA_DATA_ERROR; 522 523 // Set the target to the beginning of Stream Header. 524 coder->file_target_pos -= seek_amount; 525 526 if (coder->file_target_pos == 0) { 527 // We would seek to the beginning of the file, but 528 // since we already decoded that Stream Header in 529 // SEQ_MAGIC_BYTES, we can use the cached value from 530 // coder->first_header_flags to avoid the seek. 531 coder->header_flags = coder->first_header_flags; 532 coder->sequence = SEQ_HEADER_COMPARE; 533 break; 534 } 535 536 coder->sequence = SEQ_HEADER_DECODE; 537 538 // Make coder->file_target_pos point to the end of 539 // the Stream Header field. 540 coder->file_target_pos += LZMA_STREAM_HEADER_SIZE; 541 542 // If coder->temp_size is non-zero, it points to the end 543 // of the Index field. Then the beginning of the Index 544 // field is at coder->temp[coder->temp_size 545 // - coder->footer_flags.backward_size]. 546 assert(coder->temp_size == 0 || coder->temp_size 547 >= coder->footer_flags.backward_size); 548 549 // If coder->temp contained the whole Index, see if it has 550 // enough data to contain also the Stream Header. If so, 551 // we avoid an external seek. 552 // 553 // NOTE: This can happen only with small .xz files and only 554 // for the non-first Stream as the Stream Flags of the first 555 // Stream are cached and already handled a few lines above. 556 // So this isn't as useful as the other seek-avoidance cases. 557 if (coder->temp_size != 0 && coder->temp_size 558 - coder->footer_flags.backward_size 559 >= seek_amount) { 560 // Make temp_pos and temp_size point to the *end* of 561 // Stream Header so that SEQ_HEADER_DECODE will find 562 // the start of Stream Header from coder->temp[ 563 // coder->temp_size - LZMA_STREAM_HEADER_SIZE]. 564 coder->temp_pos = coder->temp_size 565 - coder->footer_flags.backward_size 566 - seek_amount 567 + LZMA_STREAM_HEADER_SIZE; 568 coder->temp_size = coder->temp_pos; 569 } else { 570 // Seek so that Stream Header will be at the end of 571 // coder->temp. With typical multi-Stream files we 572 // will usually also get the Stream Footer and Index 573 // of the *previous* Stream in coder->temp and thus 574 // won't need a separate seek for them. 575 return_if_error(reverse_seek(coder, 576 in_start, in_pos, in_size)); 577 } 578 } 579 580 // Fall through 581 582 case SEQ_HEADER_DECODE: 583 // Copy the Stream Header field into coder->temp. 584 // If Stream Header was already available in coder->temp 585 // in SEQ_INDEX_DECODE, then this does nothing. 586 if (fill_temp(coder, in, in_pos, in_size)) 587 return LZMA_OK; 588 589 // Make all these point to the beginning of Stream Header. 590 coder->file_target_pos -= LZMA_STREAM_HEADER_SIZE; 591 coder->temp_size -= LZMA_STREAM_HEADER_SIZE; 592 coder->temp_pos = coder->temp_size; 593 594 // Decode the Stream Header. 595 return_if_error(hide_format_error(lzma_stream_header_decode( 596 &coder->header_flags, 597 coder->temp + coder->temp_size))); 598 599 coder->sequence = SEQ_HEADER_COMPARE; 600 601 // Fall through 602 603 case SEQ_HEADER_COMPARE: 604 // Compare Stream Header against Stream Footer. They must 605 // match. 606 return_if_error(lzma_stream_flags_compare( 607 &coder->header_flags, &coder->footer_flags)); 608 609 // Store the decoded Stream Flags into the Index. Use the 610 // Footer Flags because it contains Backward Size, although 611 // it shouldn't matter in practice. 612 if (lzma_index_stream_flags(coder->this_index, 613 &coder->footer_flags) != LZMA_OK) 614 return LZMA_PROG_ERROR; 615 616 // Store also the size of the Stream Padding field. It is 617 // needed to calculate the offsets of the Streams correctly. 618 if (lzma_index_stream_padding(coder->this_index, 619 coder->stream_padding) != LZMA_OK) 620 return LZMA_PROG_ERROR; 621 622 // Reset it so that it's ready for the next Stream. 623 coder->stream_padding = 0; 624 625 // Append the earlier decoded Indexes after this_index. 626 if (coder->combined_index != NULL) 627 return_if_error(lzma_index_cat(coder->this_index, 628 coder->combined_index, allocator)); 629 630 coder->combined_index = coder->this_index; 631 coder->this_index = NULL; 632 633 // If the whole file was decoded, tell the caller that we 634 // are finished. 635 if (coder->file_target_pos == 0) { 636 // The combined index must indicate the same file 637 // size as was told to us at initialization. 638 assert(lzma_index_file_size(coder->combined_index) 639 == coder->file_size); 640 641 // Make the combined index available to 642 // the application. 643 *coder->dest_index = coder->combined_index; 644 coder->combined_index = NULL; 645 646 // Mark the input buffer as used since we may have 647 // done internal seeking and thus don't know how 648 // many input bytes were actually used. This way 649 // lzma_stream.total_in gets a slightly better 650 // estimate of the amount of input used. 651 *in_pos = in_size; 652 return LZMA_STREAM_END; 653 } 654 655 // We didn't hit the beginning of the file yet, so continue 656 // reading backwards in the file. If we have unprocessed 657 // data in coder->temp, use it before requesting more data 658 // from the application. 659 // 660 // coder->file_target_pos, coder->temp_size, and 661 // coder->temp_pos all point to the beginning of Stream Header 662 // and thus the end of the previous Stream in the file. 663 coder->sequence = coder->temp_size > 0 664 ? SEQ_PADDING_DECODE : SEQ_PADDING_SEEK; 665 break; 666 667 default: 668 assert(0); 669 return LZMA_PROG_ERROR; 670 } 671 } 672 673 674 static lzma_ret 675 file_info_decoder_memconfig(void *coder_ptr, uint64_t *memusage, 676 uint64_t *old_memlimit, uint64_t new_memlimit) 677 { 678 lzma_file_info_coder *coder = coder_ptr; 679 680 // The memory usage calculation comes from three things: 681 // 682 // (1) The Indexes that have already been decoded and processed into 683 // coder->combined_index. 684 // 685 // (2) The latest Index in coder->this_index that has been decoded but 686 // not yet put into coder->combined_index. 687 // 688 // (3) The latest Index that we have started decoding but haven't 689 // finished and thus isn't available in coder->this_index yet. 690 // Memory usage and limit information needs to be communicated 691 // from/to coder->index_decoder. 692 // 693 // Care has to be taken to not do both (2) and (3) when calculating 694 // the memory usage. 695 uint64_t combined_index_memusage = 0; 696 uint64_t this_index_memusage = 0; 697 698 // (1) If we have already successfully decoded one or more Indexes, 699 // get their memory usage. 700 if (coder->combined_index != NULL) 701 combined_index_memusage = lzma_index_memused( 702 coder->combined_index); 703 704 // Choose between (2), (3), or neither. 705 if (coder->this_index != NULL) { 706 // (2) The latest Index is available. Use its memory usage. 707 this_index_memusage = lzma_index_memused(coder->this_index); 708 709 } else if (coder->sequence == SEQ_INDEX_DECODE) { 710 // (3) The Index decoder is activate and hasn't yet stored 711 // the new index in coder->this_index. Get the memory usage 712 // information from the Index decoder. 713 // 714 // NOTE: If the Index decoder doesn't yet know how much memory 715 // it will eventually need, it will return a tiny value here. 716 uint64_t dummy; 717 if (coder->index_decoder.memconfig(coder->index_decoder.coder, 718 &this_index_memusage, &dummy, 0) 719 != LZMA_OK) { 720 assert(0); 721 return LZMA_PROG_ERROR; 722 } 723 } 724 725 // Now we know the total memory usage/requirement. If we had neither 726 // old Indexes nor a new Index, this will be zero which isn't 727 // acceptable as lzma_memusage() has to return non-zero on success 728 // and even with an empty .xz file we will end up with a lzma_index 729 // that takes some memory. 730 *memusage = combined_index_memusage + this_index_memusage; 731 if (*memusage == 0) 732 *memusage = lzma_index_memusage(1, 0); 733 734 *old_memlimit = coder->memlimit; 735 736 // If requested, set a new memory usage limit. 737 if (new_memlimit != 0) { 738 if (new_memlimit < *memusage) 739 return LZMA_MEMLIMIT_ERROR; 740 741 // In the condition (3) we need to tell the Index decoder 742 // its new memory usage limit. 743 if (coder->this_index == NULL 744 && coder->sequence == SEQ_INDEX_DECODE) { 745 const uint64_t idec_new_memlimit = new_memlimit 746 - combined_index_memusage; 747 748 assert(this_index_memusage > 0); 749 assert(idec_new_memlimit > 0); 750 751 uint64_t dummy1; 752 uint64_t dummy2; 753 754 if (coder->index_decoder.memconfig( 755 coder->index_decoder.coder, 756 &dummy1, &dummy2, idec_new_memlimit) 757 != LZMA_OK) { 758 assert(0); 759 return LZMA_PROG_ERROR; 760 } 761 } 762 763 coder->memlimit = new_memlimit; 764 } 765 766 return LZMA_OK; 767 } 768 769 770 static void 771 file_info_decoder_end(void *coder_ptr, const lzma_allocator *allocator) 772 { 773 lzma_file_info_coder *coder = coder_ptr; 774 775 lzma_next_end(&coder->index_decoder, allocator); 776 lzma_index_end(coder->this_index, allocator); 777 lzma_index_end(coder->combined_index, allocator); 778 779 lzma_free(coder, allocator); 780 return; 781 } 782 783 784 static lzma_ret 785 lzma_file_info_decoder_init(lzma_next_coder *next, 786 const lzma_allocator *allocator, uint64_t *seek_pos, 787 lzma_index **dest_index, 788 uint64_t memlimit, uint64_t file_size) 789 { 790 lzma_next_coder_init(&lzma_file_info_decoder_init, next, allocator); 791 792 if (dest_index == NULL) 793 return LZMA_PROG_ERROR; 794 795 lzma_file_info_coder *coder = next->coder; 796 if (coder == NULL) { 797 coder = lzma_alloc(sizeof(lzma_file_info_coder), allocator); 798 if (coder == NULL) 799 return LZMA_MEM_ERROR; 800 801 next->coder = coder; 802 next->code = &file_info_decode; 803 next->end = &file_info_decoder_end; 804 next->memconfig = &file_info_decoder_memconfig; 805 806 coder->index_decoder = LZMA_NEXT_CODER_INIT; 807 coder->this_index = NULL; 808 coder->combined_index = NULL; 809 } 810 811 coder->sequence = SEQ_MAGIC_BYTES; 812 coder->file_cur_pos = 0; 813 coder->file_target_pos = 0; 814 coder->file_size = file_size; 815 816 lzma_index_end(coder->this_index, allocator); 817 coder->this_index = NULL; 818 819 lzma_index_end(coder->combined_index, allocator); 820 coder->combined_index = NULL; 821 822 coder->stream_padding = 0; 823 824 coder->dest_index = dest_index; 825 coder->external_seek_pos = seek_pos; 826 827 // If memlimit is 0, make it 1 to ensure that lzma_memlimit_get() 828 // won't return 0 (which would indicate an error). 829 coder->memlimit = my_max(1, memlimit); 830 831 // Prepare these for reading the first Stream Header into coder->temp. 832 coder->temp_pos = 0; 833 coder->temp_size = LZMA_STREAM_HEADER_SIZE; 834 835 return LZMA_OK; 836 } 837 838 839 extern LZMA_API(lzma_ret) 840 lzma_file_info_decoder(lzma_stream *strm, lzma_index **dest_index, 841 uint64_t memlimit, uint64_t file_size) 842 { 843 lzma_next_strm_init(lzma_file_info_decoder_init, strm, &strm->seek_pos, 844 dest_index, memlimit, file_size); 845 846 // We allow LZMA_FINISH in addition to LZMA_RUN for convenience. 847 // lzma_code() is able to handle the LZMA_FINISH + LZMA_SEEK_NEEDED 848 // combination in a sane way. Applications still need to be careful 849 // if they use LZMA_FINISH so that they remember to reset it back 850 // to LZMA_RUN after seeking if needed. 851 strm->internal->supported_actions[LZMA_RUN] = true; 852 strm->internal->supported_actions[LZMA_FINISH] = true; 853 854 return LZMA_OK; 855 } 856