1 // SPDX-License-Identifier: 0BSD 2 3 /////////////////////////////////////////////////////////////////////////////// 4 // 5 /// \file file_info.c 6 /// \brief Decode .xz file information into a lzma_index structure 7 // 8 // Author: Lasse Collin 9 // 10 /////////////////////////////////////////////////////////////////////////////// 11 12 #include "index_decoder.h" 13 14 15 typedef struct { 16 enum { 17 SEQ_MAGIC_BYTES, 18 SEQ_PADDING_SEEK, 19 SEQ_PADDING_DECODE, 20 SEQ_FOOTER, 21 SEQ_INDEX_INIT, 22 SEQ_INDEX_DECODE, 23 SEQ_HEADER_DECODE, 24 SEQ_HEADER_COMPARE, 25 } sequence; 26 27 /// Absolute position of in[*in_pos] in the file. All code that 28 /// modifies *in_pos also updates this. seek_to_pos() needs this 29 /// to determine if we need to request the application to seek for 30 /// us or if we can do the seeking internally by adjusting *in_pos. 31 uint64_t file_cur_pos; 32 33 /// This refers to absolute positions of interesting parts of the 34 /// input file. Sometimes it points to the *beginning* of a specific 35 /// field and sometimes to the *end* of a field. The current target 36 /// position at each moment is explained in the comments. 37 uint64_t file_target_pos; 38 39 /// Size of the .xz file (from the application). 40 uint64_t file_size; 41 42 /// Index decoder 43 lzma_next_coder index_decoder; 44 45 /// Number of bytes remaining in the Index field that is currently 46 /// being decoded. 47 lzma_vli index_remaining; 48 49 /// The Index decoder will store the decoded Index in this pointer. 50 lzma_index *this_index; 51 52 /// Amount of Stream Padding in the current Stream. 53 lzma_vli stream_padding; 54 55 /// The final combined index is collected here. 56 lzma_index *combined_index; 57 58 /// Pointer from the application where to store the index information 59 /// after successful decoding. 60 lzma_index **dest_index; 61 62 /// Pointer to lzma_stream.seek_pos to be used when returning 63 /// LZMA_SEEK_NEEDED. This is set by seek_to_pos() when needed. 64 uint64_t *external_seek_pos; 65 66 /// Memory usage limit 67 uint64_t memlimit; 68 69 /// Stream Flags from the very beginning of the file. 70 lzma_stream_flags first_header_flags; 71 72 /// Stream Flags from Stream Header of the current Stream. 73 lzma_stream_flags header_flags; 74 75 /// Stream Flags from Stream Footer of the current Stream. 76 lzma_stream_flags footer_flags; 77 78 size_t temp_pos; 79 size_t temp_size; 80 uint8_t temp[8192]; 81 82 } lzma_file_info_coder; 83 84 85 /// Copies data from in[*in_pos] into coder->temp until 86 /// coder->temp_pos == coder->temp_size. This also keeps coder->file_cur_pos 87 /// in sync with *in_pos. Returns true if more input is needed. 88 static bool 89 fill_temp(lzma_file_info_coder *coder, const uint8_t *restrict in, 90 size_t *restrict in_pos, size_t in_size) 91 { 92 coder->file_cur_pos += lzma_bufcpy(in, in_pos, in_size, 93 coder->temp, &coder->temp_pos, coder->temp_size); 94 return coder->temp_pos < coder->temp_size; 95 } 96 97 98 /// Seeks to the absolute file position specified by target_pos. 99 /// This tries to do the seeking by only modifying *in_pos, if possible. 100 /// The main benefit of this is that if one passes the whole file at once 101 /// to lzma_code(), the decoder will never need to return LZMA_SEEK_NEEDED 102 /// as all the seeking can be done by adjusting *in_pos in this function. 103 /// 104 /// Returns true if an external seek is needed and the caller must return 105 /// LZMA_SEEK_NEEDED. 106 static bool 107 seek_to_pos(lzma_file_info_coder *coder, uint64_t target_pos, 108 size_t in_start, size_t *in_pos, size_t in_size) 109 { 110 // The input buffer doesn't extend beyond the end of the file. 111 // This has been checked by file_info_decode() already. 112 assert(coder->file_size - coder->file_cur_pos >= in_size - *in_pos); 113 114 const uint64_t pos_min = coder->file_cur_pos - (*in_pos - in_start); 115 const uint64_t pos_max = coder->file_cur_pos + (in_size - *in_pos); 116 117 bool external_seek_needed; 118 119 if (target_pos >= pos_min && target_pos <= pos_max) { 120 // The requested position is available in the current input 121 // buffer or right after it. That is, in a corner case we 122 // end up setting *in_pos == in_size and thus will immediately 123 // need new input bytes from the application. 124 *in_pos += (size_t)(target_pos - coder->file_cur_pos); 125 external_seek_needed = false; 126 } else { 127 // Ask the application to seek the input file. 128 *coder->external_seek_pos = target_pos; 129 external_seek_needed = true; 130 131 // Mark the whole input buffer as used. This way 132 // lzma_stream.total_in will have a better estimate 133 // of the amount of data read. It still won't be perfect 134 // as the value will depend on the input buffer size that 135 // the application uses, but it should be good enough for 136 // those few who want an estimate. 137 *in_pos = in_size; 138 } 139 140 // After seeking (internal or external) the current position 141 // will match the requested target position. 142 coder->file_cur_pos = target_pos; 143 144 return external_seek_needed; 145 } 146 147 148 /// The caller sets coder->file_target_pos so that it points to the *end* 149 /// of the desired file position. This function then determines how far 150 /// backwards from that position we can seek. After seeking fill_temp() 151 /// can be used to read data into coder->temp. When fill_temp() has finished, 152 /// coder->temp[coder->temp_size] will match coder->file_target_pos. 153 /// 154 /// This also validates that coder->target_file_pos is sane in sense that 155 /// we aren't trying to seek too far backwards (too close or beyond the 156 /// beginning of the file). 157 static lzma_ret 158 reverse_seek(lzma_file_info_coder *coder, 159 size_t in_start, size_t *in_pos, size_t in_size) 160 { 161 // Check that there is enough data before the target position 162 // to contain at least Stream Header and Stream Footer. If there 163 // isn't, the file cannot be valid. 164 if (coder->file_target_pos < 2 * LZMA_STREAM_HEADER_SIZE) 165 return LZMA_DATA_ERROR; 166 167 coder->temp_pos = 0; 168 169 // The Stream Header at the very beginning of the file gets handled 170 // specially in SEQ_MAGIC_BYTES and thus we will never need to seek 171 // there. By not seeking to the first LZMA_STREAM_HEADER_SIZE bytes 172 // we avoid a useless external seek after SEQ_MAGIC_BYTES if the 173 // application uses an extremely small input buffer and the input 174 // file is very small. 175 if (coder->file_target_pos - LZMA_STREAM_HEADER_SIZE 176 < sizeof(coder->temp)) 177 coder->temp_size = (size_t)(coder->file_target_pos 178 - LZMA_STREAM_HEADER_SIZE); 179 else 180 coder->temp_size = sizeof(coder->temp); 181 182 // The above if-statements guarantee this. This is important because 183 // the Stream Header/Footer decoders assume that there's at least 184 // LZMA_STREAM_HEADER_SIZE bytes in coder->temp. 185 assert(coder->temp_size >= LZMA_STREAM_HEADER_SIZE); 186 187 if (seek_to_pos(coder, coder->file_target_pos - coder->temp_size, 188 in_start, in_pos, in_size)) 189 return LZMA_SEEK_NEEDED; 190 191 return LZMA_OK; 192 } 193 194 195 /// Gets the number of zero-bytes at the end of the buffer. 196 static size_t 197 get_padding_size(const uint8_t *buf, size_t buf_size) 198 { 199 size_t padding = 0; 200 while (buf_size > 0 && buf[--buf_size] == 0x00) 201 ++padding; 202 203 return padding; 204 } 205 206 207 /// With the Stream Header at the very beginning of the file, LZMA_FORMAT_ERROR 208 /// is used to tell the application that Magic Bytes didn't match. In other 209 /// Stream Header/Footer fields (in the middle/end of the file) it could be 210 /// a bit confusing to return LZMA_FORMAT_ERROR as we already know that there 211 /// is a valid Stream Header at the beginning of the file. For those cases 212 /// this function is used to convert LZMA_FORMAT_ERROR to LZMA_DATA_ERROR. 213 static lzma_ret 214 hide_format_error(lzma_ret ret) 215 { 216 if (ret == LZMA_FORMAT_ERROR) 217 ret = LZMA_DATA_ERROR; 218 219 return ret; 220 } 221 222 223 /// Calls the Index decoder and updates coder->index_remaining. 224 /// This is a separate function because the input can be either directly 225 /// from the application or from coder->temp. 226 static lzma_ret 227 decode_index(lzma_file_info_coder *coder, const lzma_allocator *allocator, 228 const uint8_t *restrict in, size_t *restrict in_pos, 229 size_t in_size, bool update_file_cur_pos) 230 { 231 const size_t in_start = *in_pos; 232 233 const lzma_ret ret = coder->index_decoder.code( 234 coder->index_decoder.coder, 235 allocator, in, in_pos, in_size, 236 NULL, NULL, 0, LZMA_RUN); 237 238 coder->index_remaining -= *in_pos - in_start; 239 240 if (update_file_cur_pos) 241 coder->file_cur_pos += *in_pos - in_start; 242 243 return ret; 244 } 245 246 247 static lzma_ret 248 file_info_decode(void *coder_ptr, const lzma_allocator *allocator, 249 const uint8_t *restrict in, size_t *restrict in_pos, 250 size_t in_size, 251 uint8_t *restrict out lzma_attribute((__unused__)), 252 size_t *restrict out_pos lzma_attribute((__unused__)), 253 size_t out_size lzma_attribute((__unused__)), 254 lzma_action action lzma_attribute((__unused__))) 255 { 256 lzma_file_info_coder *coder = coder_ptr; 257 const size_t in_start = *in_pos; 258 259 // If the caller provides input past the end of the file, trim 260 // the extra bytes from the buffer so that we won't read too far. 261 assert(coder->file_size >= coder->file_cur_pos); 262 if (coder->file_size - coder->file_cur_pos < in_size - in_start) 263 in_size = in_start 264 + (size_t)(coder->file_size - coder->file_cur_pos); 265 266 while (true) 267 switch (coder->sequence) { 268 case SEQ_MAGIC_BYTES: 269 // Decode the Stream Header at the beginning of the file 270 // first to check if the Magic Bytes match. The flags 271 // are stored in coder->first_header_flags so that we 272 // don't need to seek to it again. 273 // 274 // Check that the file is big enough to contain at least 275 // Stream Header. 276 if (coder->file_size < LZMA_STREAM_HEADER_SIZE) 277 return LZMA_FORMAT_ERROR; 278 279 // Read the Stream Header field into coder->temp. 280 if (fill_temp(coder, in, in_pos, in_size)) 281 return LZMA_OK; 282 283 // This is the only Stream Header/Footer decoding where we 284 // want to return LZMA_FORMAT_ERROR if the Magic Bytes don't 285 // match. Elsewhere it will be converted to LZMA_DATA_ERROR. 286 return_if_error(lzma_stream_header_decode( 287 &coder->first_header_flags, coder->temp)); 288 289 // Now that we know that the Magic Bytes match, check the 290 // file size. It's better to do this here after checking the 291 // Magic Bytes since this way we can give LZMA_FORMAT_ERROR 292 // instead of LZMA_DATA_ERROR when the Magic Bytes don't 293 // match in a file that is too big or isn't a multiple of 294 // four bytes. 295 if (coder->file_size > LZMA_VLI_MAX || (coder->file_size & 3)) 296 return LZMA_DATA_ERROR; 297 298 // Start looking for Stream Padding and Stream Footer 299 // at the end of the file. 300 coder->file_target_pos = coder->file_size; 301 302 // Fall through 303 304 case SEQ_PADDING_SEEK: 305 coder->sequence = SEQ_PADDING_DECODE; 306 return_if_error(reverse_seek( 307 coder, in_start, in_pos, in_size)); 308 309 // Fall through 310 311 case SEQ_PADDING_DECODE: { 312 // Copy to coder->temp first. This keeps the code simpler if 313 // the application only provides input a few bytes at a time. 314 if (fill_temp(coder, in, in_pos, in_size)) 315 return LZMA_OK; 316 317 // Scan the buffer backwards to get the size of the 318 // Stream Padding field (if any). 319 const size_t new_padding = get_padding_size( 320 coder->temp, coder->temp_size); 321 coder->stream_padding += new_padding; 322 323 // Set the target position to the beginning of Stream Padding 324 // that has been observed so far. If all Stream Padding has 325 // been seen, then the target position will be at the end 326 // of the Stream Footer field. 327 coder->file_target_pos -= new_padding; 328 329 if (new_padding == coder->temp_size) { 330 // The whole buffer was padding. Seek backwards in 331 // the file to get more input. 332 coder->sequence = SEQ_PADDING_SEEK; 333 break; 334 } 335 336 // Size of Stream Padding must be a multiple of 4 bytes. 337 if (coder->stream_padding & 3) 338 return LZMA_DATA_ERROR; 339 340 coder->sequence = SEQ_FOOTER; 341 342 // Calculate the amount of non-padding data in coder->temp. 343 coder->temp_size -= new_padding; 344 coder->temp_pos = coder->temp_size; 345 346 // We can avoid an external seek if the whole Stream Footer 347 // is already in coder->temp. In that case SEQ_FOOTER won't 348 // read more input and will find the Stream Footer from 349 // coder->temp[coder->temp_size - LZMA_STREAM_HEADER_SIZE]. 350 // 351 // Otherwise we will need to seek. The seeking is done so 352 // that Stream Footer will be at the end of coder->temp. 353 // This way it's likely that we also get a complete Index 354 // field into coder->temp without needing a separate seek 355 // for that (unless the Index field is big). 356 if (coder->temp_size < LZMA_STREAM_HEADER_SIZE) 357 return_if_error(reverse_seek( 358 coder, in_start, in_pos, in_size)); 359 } 360 361 // Fall through 362 363 case SEQ_FOOTER: 364 // Copy the Stream Footer field into coder->temp. 365 // If Stream Footer was already available in coder->temp 366 // in SEQ_PADDING_DECODE, then this does nothing. 367 if (fill_temp(coder, in, in_pos, in_size)) 368 return LZMA_OK; 369 370 // Make coder->file_target_pos and coder->temp_size point 371 // to the beginning of Stream Footer and thus to the end 372 // of the Index field. coder->temp_pos will be updated 373 // a bit later. 374 coder->file_target_pos -= LZMA_STREAM_HEADER_SIZE; 375 coder->temp_size -= LZMA_STREAM_HEADER_SIZE; 376 377 // Decode Stream Footer. 378 return_if_error(hide_format_error(lzma_stream_footer_decode( 379 &coder->footer_flags, 380 coder->temp + coder->temp_size))); 381 382 // Check that we won't seek past the beginning of the file. 383 // 384 // LZMA_STREAM_HEADER_SIZE is added because there must be 385 // space for Stream Header too even though we won't seek 386 // there before decoding the Index field. 387 // 388 // There's no risk of integer overflow here because 389 // Backward Size cannot be greater than 2^34. 390 if (coder->file_target_pos < coder->footer_flags.backward_size 391 + LZMA_STREAM_HEADER_SIZE) 392 return LZMA_DATA_ERROR; 393 394 // Set the target position to the beginning of the Index field. 395 coder->file_target_pos -= coder->footer_flags.backward_size; 396 coder->sequence = SEQ_INDEX_INIT; 397 398 // We can avoid an external seek if the whole Index field is 399 // already available in coder->temp. 400 if (coder->temp_size >= coder->footer_flags.backward_size) { 401 // Set coder->temp_pos to point to the beginning 402 // of the Index. 403 coder->temp_pos = coder->temp_size 404 - coder->footer_flags.backward_size; 405 } else { 406 // These are set to zero to indicate that there's no 407 // useful data (Index or anything else) in coder->temp. 408 coder->temp_pos = 0; 409 coder->temp_size = 0; 410 411 // Seek to the beginning of the Index field. 412 if (seek_to_pos(coder, coder->file_target_pos, 413 in_start, in_pos, in_size)) 414 return LZMA_SEEK_NEEDED; 415 } 416 417 // Fall through 418 419 case SEQ_INDEX_INIT: { 420 // Calculate the amount of memory already used by the earlier 421 // Indexes so that we know how big memory limit to pass to 422 // the Index decoder. 423 // 424 // NOTE: When there are multiple Streams, the separate 425 // lzma_index structures can use more RAM (as measured by 426 // lzma_index_memused()) than the final combined lzma_index. 427 // Thus memlimit may need to be slightly higher than the final 428 // calculated memory usage will be. This is perhaps a bit 429 // confusing to the application, but I think it shouldn't 430 // cause problems in practice. 431 uint64_t memused = 0; 432 if (coder->combined_index != NULL) { 433 memused = lzma_index_memused(coder->combined_index); 434 assert(memused <= coder->memlimit); 435 if (memused > coder->memlimit) // Extra sanity check 436 return LZMA_PROG_ERROR; 437 } 438 439 // Initialize the Index decoder. 440 return_if_error(lzma_index_decoder_init( 441 &coder->index_decoder, allocator, 442 &coder->this_index, 443 coder->memlimit - memused)); 444 445 coder->index_remaining = coder->footer_flags.backward_size; 446 coder->sequence = SEQ_INDEX_DECODE; 447 } 448 449 // Fall through 450 451 case SEQ_INDEX_DECODE: { 452 // Decode (a part of) the Index. If the whole Index is already 453 // in coder->temp, read it from there. Otherwise read from 454 // in[*in_pos] onwards. Note that index_decode() updates 455 // coder->index_remaining and optionally coder->file_cur_pos. 456 lzma_ret ret; 457 if (coder->temp_size != 0) { 458 assert(coder->temp_size - coder->temp_pos 459 == coder->index_remaining); 460 ret = decode_index(coder, allocator, coder->temp, 461 &coder->temp_pos, coder->temp_size, 462 false); 463 } else { 464 // Don't give the decoder more input than the known 465 // remaining size of the Index field. 466 size_t in_stop = in_size; 467 if (in_size - *in_pos > coder->index_remaining) 468 in_stop = *in_pos 469 + (size_t)(coder->index_remaining); 470 471 ret = decode_index(coder, allocator, 472 in, in_pos, in_stop, true); 473 } 474 475 switch (ret) { 476 case LZMA_OK: 477 // If the Index docoder asks for more input when we 478 // have already given it as much input as Backward Size 479 // indicated, the file is invalid. 480 if (coder->index_remaining == 0) 481 return LZMA_DATA_ERROR; 482 483 // We cannot get here if we were reading Index from 484 // coder->temp because when reading from coder->temp 485 // we give the Index decoder exactly 486 // coder->index_remaining bytes of input. 487 assert(coder->temp_size == 0); 488 489 return LZMA_OK; 490 491 case LZMA_STREAM_END: 492 // If the decoding seems to be successful, check also 493 // that the Index decoder consumed as much input as 494 // indicated by the Backward Size field. 495 if (coder->index_remaining != 0) 496 return LZMA_DATA_ERROR; 497 498 break; 499 500 default: 501 return ret; 502 } 503 504 // Calculate how much the Index tells us to seek backwards 505 // (relative to the beginning of the Index): Total size of 506 // all Blocks plus the size of the Stream Header field. 507 // No integer overflow here because lzma_index_total_size() 508 // cannot return a value greater than LZMA_VLI_MAX. 509 const uint64_t seek_amount 510 = lzma_index_total_size(coder->this_index) 511 + LZMA_STREAM_HEADER_SIZE; 512 513 // Check that Index is sane in sense that seek_amount won't 514 // make us seek past the beginning of the file when locating 515 // the Stream Header. 516 // 517 // coder->file_target_pos still points to the beginning of 518 // the Index field. 519 if (coder->file_target_pos < seek_amount) 520 return LZMA_DATA_ERROR; 521 522 // Set the target to the beginning of Stream Header. 523 coder->file_target_pos -= seek_amount; 524 525 if (coder->file_target_pos == 0) { 526 // We would seek to the beginning of the file, but 527 // since we already decoded that Stream Header in 528 // SEQ_MAGIC_BYTES, we can use the cached value from 529 // coder->first_header_flags to avoid the seek. 530 coder->header_flags = coder->first_header_flags; 531 coder->sequence = SEQ_HEADER_COMPARE; 532 break; 533 } 534 535 coder->sequence = SEQ_HEADER_DECODE; 536 537 // Make coder->file_target_pos point to the end of 538 // the Stream Header field. 539 coder->file_target_pos += LZMA_STREAM_HEADER_SIZE; 540 541 // If coder->temp_size is non-zero, it points to the end 542 // of the Index field. Then the beginning of the Index 543 // field is at coder->temp[coder->temp_size 544 // - coder->footer_flags.backward_size]. 545 assert(coder->temp_size == 0 || coder->temp_size 546 >= coder->footer_flags.backward_size); 547 548 // If coder->temp contained the whole Index, see if it has 549 // enough data to contain also the Stream Header. If so, 550 // we avoid an external seek. 551 // 552 // NOTE: This can happen only with small .xz files and only 553 // for the non-first Stream as the Stream Flags of the first 554 // Stream are cached and already handled a few lines above. 555 // So this isn't as useful as the other seek-avoidance cases. 556 if (coder->temp_size != 0 && coder->temp_size 557 - coder->footer_flags.backward_size 558 >= seek_amount) { 559 // Make temp_pos and temp_size point to the *end* of 560 // Stream Header so that SEQ_HEADER_DECODE will find 561 // the start of Stream Header from coder->temp[ 562 // coder->temp_size - LZMA_STREAM_HEADER_SIZE]. 563 coder->temp_pos = coder->temp_size 564 - coder->footer_flags.backward_size 565 - seek_amount 566 + LZMA_STREAM_HEADER_SIZE; 567 coder->temp_size = coder->temp_pos; 568 } else { 569 // Seek so that Stream Header will be at the end of 570 // coder->temp. With typical multi-Stream files we 571 // will usually also get the Stream Footer and Index 572 // of the *previous* Stream in coder->temp and thus 573 // won't need a separate seek for them. 574 return_if_error(reverse_seek(coder, 575 in_start, in_pos, in_size)); 576 } 577 } 578 579 // Fall through 580 581 case SEQ_HEADER_DECODE: 582 // Copy the Stream Header field into coder->temp. 583 // If Stream Header was already available in coder->temp 584 // in SEQ_INDEX_DECODE, then this does nothing. 585 if (fill_temp(coder, in, in_pos, in_size)) 586 return LZMA_OK; 587 588 // Make all these point to the beginning of Stream Header. 589 coder->file_target_pos -= LZMA_STREAM_HEADER_SIZE; 590 coder->temp_size -= LZMA_STREAM_HEADER_SIZE; 591 coder->temp_pos = coder->temp_size; 592 593 // Decode the Stream Header. 594 return_if_error(hide_format_error(lzma_stream_header_decode( 595 &coder->header_flags, 596 coder->temp + coder->temp_size))); 597 598 coder->sequence = SEQ_HEADER_COMPARE; 599 600 // Fall through 601 602 case SEQ_HEADER_COMPARE: 603 // Compare Stream Header against Stream Footer. They must 604 // match. 605 return_if_error(lzma_stream_flags_compare( 606 &coder->header_flags, &coder->footer_flags)); 607 608 // Store the decoded Stream Flags into the Index. Use the 609 // Footer Flags because it contains Backward Size, although 610 // it shouldn't matter in practice. 611 if (lzma_index_stream_flags(coder->this_index, 612 &coder->footer_flags) != LZMA_OK) 613 return LZMA_PROG_ERROR; 614 615 // Store also the size of the Stream Padding field. It is 616 // needed to calculate the offsets of the Streams correctly. 617 if (lzma_index_stream_padding(coder->this_index, 618 coder->stream_padding) != LZMA_OK) 619 return LZMA_PROG_ERROR; 620 621 // Reset it so that it's ready for the next Stream. 622 coder->stream_padding = 0; 623 624 // Append the earlier decoded Indexes after this_index. 625 if (coder->combined_index != NULL) 626 return_if_error(lzma_index_cat(coder->this_index, 627 coder->combined_index, allocator)); 628 629 coder->combined_index = coder->this_index; 630 coder->this_index = NULL; 631 632 // If the whole file was decoded, tell the caller that we 633 // are finished. 634 if (coder->file_target_pos == 0) { 635 // The combined index must indicate the same file 636 // size as was told to us at initialization. 637 assert(lzma_index_file_size(coder->combined_index) 638 == coder->file_size); 639 640 // Make the combined index available to 641 // the application. 642 *coder->dest_index = coder->combined_index; 643 coder->combined_index = NULL; 644 645 // Mark the input buffer as used since we may have 646 // done internal seeking and thus don't know how 647 // many input bytes were actually used. This way 648 // lzma_stream.total_in gets a slightly better 649 // estimate of the amount of input used. 650 *in_pos = in_size; 651 return LZMA_STREAM_END; 652 } 653 654 // We didn't hit the beginning of the file yet, so continue 655 // reading backwards in the file. If we have unprocessed 656 // data in coder->temp, use it before requesting more data 657 // from the application. 658 // 659 // coder->file_target_pos, coder->temp_size, and 660 // coder->temp_pos all point to the beginning of Stream Header 661 // and thus the end of the previous Stream in the file. 662 coder->sequence = coder->temp_size > 0 663 ? SEQ_PADDING_DECODE : SEQ_PADDING_SEEK; 664 break; 665 666 default: 667 assert(0); 668 return LZMA_PROG_ERROR; 669 } 670 } 671 672 673 static lzma_ret 674 file_info_decoder_memconfig(void *coder_ptr, uint64_t *memusage, 675 uint64_t *old_memlimit, uint64_t new_memlimit) 676 { 677 lzma_file_info_coder *coder = coder_ptr; 678 679 // The memory usage calculation comes from three things: 680 // 681 // (1) The Indexes that have already been decoded and processed into 682 // coder->combined_index. 683 // 684 // (2) The latest Index in coder->this_index that has been decoded but 685 // not yet put into coder->combined_index. 686 // 687 // (3) The latest Index that we have started decoding but haven't 688 // finished and thus isn't available in coder->this_index yet. 689 // Memory usage and limit information needs to be communicated 690 // from/to coder->index_decoder. 691 // 692 // Care has to be taken to not do both (2) and (3) when calculating 693 // the memory usage. 694 uint64_t combined_index_memusage = 0; 695 uint64_t this_index_memusage = 0; 696 697 // (1) If we have already successfully decoded one or more Indexes, 698 // get their memory usage. 699 if (coder->combined_index != NULL) 700 combined_index_memusage = lzma_index_memused( 701 coder->combined_index); 702 703 // Choose between (2), (3), or neither. 704 if (coder->this_index != NULL) { 705 // (2) The latest Index is available. Use its memory usage. 706 this_index_memusage = lzma_index_memused(coder->this_index); 707 708 } else if (coder->sequence == SEQ_INDEX_DECODE) { 709 // (3) The Index decoder is activate and hasn't yet stored 710 // the new index in coder->this_index. Get the memory usage 711 // information from the Index decoder. 712 // 713 // NOTE: If the Index decoder doesn't yet know how much memory 714 // it will eventually need, it will return a tiny value here. 715 uint64_t dummy; 716 if (coder->index_decoder.memconfig(coder->index_decoder.coder, 717 &this_index_memusage, &dummy, 0) 718 != LZMA_OK) { 719 assert(0); 720 return LZMA_PROG_ERROR; 721 } 722 } 723 724 // Now we know the total memory usage/requirement. If we had neither 725 // old Indexes nor a new Index, this will be zero which isn't 726 // acceptable as lzma_memusage() has to return non-zero on success 727 // and even with an empty .xz file we will end up with a lzma_index 728 // that takes some memory. 729 *memusage = combined_index_memusage + this_index_memusage; 730 if (*memusage == 0) 731 *memusage = lzma_index_memusage(1, 0); 732 733 *old_memlimit = coder->memlimit; 734 735 // If requested, set a new memory usage limit. 736 if (new_memlimit != 0) { 737 if (new_memlimit < *memusage) 738 return LZMA_MEMLIMIT_ERROR; 739 740 // In the condition (3) we need to tell the Index decoder 741 // its new memory usage limit. 742 if (coder->this_index == NULL 743 && coder->sequence == SEQ_INDEX_DECODE) { 744 const uint64_t idec_new_memlimit = new_memlimit 745 - combined_index_memusage; 746 747 assert(this_index_memusage > 0); 748 assert(idec_new_memlimit > 0); 749 750 uint64_t dummy1; 751 uint64_t dummy2; 752 753 if (coder->index_decoder.memconfig( 754 coder->index_decoder.coder, 755 &dummy1, &dummy2, idec_new_memlimit) 756 != LZMA_OK) { 757 assert(0); 758 return LZMA_PROG_ERROR; 759 } 760 } 761 762 coder->memlimit = new_memlimit; 763 } 764 765 return LZMA_OK; 766 } 767 768 769 static void 770 file_info_decoder_end(void *coder_ptr, const lzma_allocator *allocator) 771 { 772 lzma_file_info_coder *coder = coder_ptr; 773 774 lzma_next_end(&coder->index_decoder, allocator); 775 lzma_index_end(coder->this_index, allocator); 776 lzma_index_end(coder->combined_index, allocator); 777 778 lzma_free(coder, allocator); 779 return; 780 } 781 782 783 static lzma_ret 784 lzma_file_info_decoder_init(lzma_next_coder *next, 785 const lzma_allocator *allocator, uint64_t *seek_pos, 786 lzma_index **dest_index, 787 uint64_t memlimit, uint64_t file_size) 788 { 789 lzma_next_coder_init(&lzma_file_info_decoder_init, next, allocator); 790 791 if (dest_index == NULL) 792 return LZMA_PROG_ERROR; 793 794 lzma_file_info_coder *coder = next->coder; 795 if (coder == NULL) { 796 coder = lzma_alloc(sizeof(lzma_file_info_coder), allocator); 797 if (coder == NULL) 798 return LZMA_MEM_ERROR; 799 800 next->coder = coder; 801 next->code = &file_info_decode; 802 next->end = &file_info_decoder_end; 803 next->memconfig = &file_info_decoder_memconfig; 804 805 coder->index_decoder = LZMA_NEXT_CODER_INIT; 806 coder->this_index = NULL; 807 coder->combined_index = NULL; 808 } 809 810 coder->sequence = SEQ_MAGIC_BYTES; 811 coder->file_cur_pos = 0; 812 coder->file_target_pos = 0; 813 coder->file_size = file_size; 814 815 lzma_index_end(coder->this_index, allocator); 816 coder->this_index = NULL; 817 818 lzma_index_end(coder->combined_index, allocator); 819 coder->combined_index = NULL; 820 821 coder->stream_padding = 0; 822 823 coder->dest_index = dest_index; 824 coder->external_seek_pos = seek_pos; 825 826 // If memlimit is 0, make it 1 to ensure that lzma_memlimit_get() 827 // won't return 0 (which would indicate an error). 828 coder->memlimit = my_max(1, memlimit); 829 830 // Prepare these for reading the first Stream Header into coder->temp. 831 coder->temp_pos = 0; 832 coder->temp_size = LZMA_STREAM_HEADER_SIZE; 833 834 return LZMA_OK; 835 } 836 837 838 extern LZMA_API(lzma_ret) 839 lzma_file_info_decoder(lzma_stream *strm, lzma_index **dest_index, 840 uint64_t memlimit, uint64_t file_size) 841 { 842 lzma_next_strm_init(lzma_file_info_decoder_init, strm, &strm->seek_pos, 843 dest_index, memlimit, file_size); 844 845 // We allow LZMA_FINISH in addition to LZMA_RUN for convenience. 846 // lzma_code() is able to handle the LZMA_FINISH + LZMA_SEEK_NEEDED 847 // combination in a sane way. Applications still need to be careful 848 // if they use LZMA_FINISH so that they remember to reset it back 849 // to LZMA_RUN after seeking if needed. 850 strm->internal->supported_actions[LZMA_RUN] = true; 851 strm->internal->supported_actions[LZMA_FINISH] = true; 852 853 return LZMA_OK; 854 } 855