1 // SPDX-License-Identifier: 0BSD 2 3 /////////////////////////////////////////////////////////////////////////////// 4 // 5 /// \file file_info.c 6 /// \brief Decode .xz file information into a lzma_index structure 7 // 8 // Author: Lasse Collin 9 // 10 /////////////////////////////////////////////////////////////////////////////// 11 12 #include "index_decoder.h" 13 14 15 typedef struct { 16 enum { 17 SEQ_MAGIC_BYTES, 18 SEQ_PADDING_SEEK, 19 SEQ_PADDING_DECODE, 20 SEQ_FOOTER, 21 SEQ_INDEX_INIT, 22 SEQ_INDEX_DECODE, 23 SEQ_HEADER_DECODE, 24 SEQ_HEADER_COMPARE, 25 } sequence; 26 27 /// Absolute position of in[*in_pos] in the file. All code that 28 /// modifies *in_pos also updates this. seek_to_pos() needs this 29 /// to determine if we need to request the application to seek for 30 /// us or if we can do the seeking internally by adjusting *in_pos. 31 uint64_t file_cur_pos; 32 33 /// This refers to absolute positions of interesting parts of the 34 /// input file. Sometimes it points to the *beginning* of a specific 35 /// field and sometimes to the *end* of a field. The current target 36 /// position at each moment is explained in the comments. 37 uint64_t file_target_pos; 38 39 /// Size of the .xz file (from the application). 40 uint64_t file_size; 41 42 /// Index decoder 43 lzma_next_coder index_decoder; 44 45 /// Number of bytes remaining in the Index field that is currently 46 /// being decoded. 47 lzma_vli index_remaining; 48 49 /// The Index decoder will store the decoded Index in this pointer. 50 lzma_index *this_index; 51 52 /// Amount of Stream Padding in the current Stream. 53 lzma_vli stream_padding; 54 55 /// The final combined index is collected here. 56 lzma_index *combined_index; 57 58 /// Pointer from the application where to store the index information 59 /// after successful decoding. 60 lzma_index **dest_index; 61 62 /// Pointer to lzma_stream.seek_pos to be used when returning 63 /// LZMA_SEEK_NEEDED. This is set by seek_to_pos() when needed. 64 uint64_t *external_seek_pos; 65 66 /// Memory usage limit 67 uint64_t memlimit; 68 69 /// Stream Flags from the very beginning of the file. 70 lzma_stream_flags first_header_flags; 71 72 /// Stream Flags from Stream Header of the current Stream. 73 lzma_stream_flags header_flags; 74 75 /// Stream Flags from Stream Footer of the current Stream. 76 lzma_stream_flags footer_flags; 77 78 size_t temp_pos; 79 size_t temp_size; 80 uint8_t temp[8192]; 81 82 } lzma_file_info_coder; 83 84 85 /// Copies data from in[*in_pos] into coder->temp until 86 /// coder->temp_pos == coder->temp_size. This also keeps coder->file_cur_pos 87 /// in sync with *in_pos. Returns true if more input is needed. 88 static bool 89 fill_temp(lzma_file_info_coder *coder, const uint8_t *restrict in, 90 size_t *restrict in_pos, size_t in_size) 91 { 92 coder->file_cur_pos += lzma_bufcpy(in, in_pos, in_size, 93 coder->temp, &coder->temp_pos, coder->temp_size); 94 return coder->temp_pos < coder->temp_size; 95 } 96 97 98 /// Seeks to the absolute file position specified by target_pos. 99 /// This tries to do the seeking by only modifying *in_pos, if possible. 100 /// The main benefit of this is that if one passes the whole file at once 101 /// to lzma_code(), the decoder will never need to return LZMA_SEEK_NEEDED 102 /// as all the seeking can be done by adjusting *in_pos in this function. 103 /// 104 /// Returns true if an external seek is needed and the caller must return 105 /// LZMA_SEEK_NEEDED. 106 static bool 107 seek_to_pos(lzma_file_info_coder *coder, uint64_t target_pos, 108 size_t in_start, size_t *in_pos, size_t in_size) 109 { 110 // The input buffer doesn't extend beyond the end of the file. 111 // This has been checked by file_info_decode() already. 112 assert(coder->file_size - coder->file_cur_pos >= in_size - *in_pos); 113 114 const uint64_t pos_min = coder->file_cur_pos - (*in_pos - in_start); 115 const uint64_t pos_max = coder->file_cur_pos + (in_size - *in_pos); 116 117 bool external_seek_needed; 118 119 if (target_pos >= pos_min && target_pos <= pos_max) { 120 // The requested position is available in the current input 121 // buffer or right after it. That is, in a corner case we 122 // end up setting *in_pos == in_size and thus will immediately 123 // need new input bytes from the application. 124 *in_pos += (size_t)(target_pos - coder->file_cur_pos); 125 external_seek_needed = false; 126 } else { 127 // Ask the application to seek the input file. 128 *coder->external_seek_pos = target_pos; 129 external_seek_needed = true; 130 131 // Mark the whole input buffer as used. This way 132 // lzma_stream.total_in will have a better estimate 133 // of the amount of data read. It still won't be perfect 134 // as the value will depend on the input buffer size that 135 // the application uses, but it should be good enough for 136 // those few who want an estimate. 137 *in_pos = in_size; 138 } 139 140 // After seeking (internal or external) the current position 141 // will match the requested target position. 142 coder->file_cur_pos = target_pos; 143 144 return external_seek_needed; 145 } 146 147 148 /// The caller sets coder->file_target_pos so that it points to the *end* 149 /// of the desired file position. This function then determines how far 150 /// backwards from that position we can seek. After seeking fill_temp() 151 /// can be used to read data into coder->temp. When fill_temp() has finished, 152 /// coder->temp[coder->temp_size] will match coder->file_target_pos. 153 /// 154 /// This also validates that coder->target_file_pos is sane in sense that 155 /// we aren't trying to seek too far backwards (too close or beyond the 156 /// beginning of the file). 157 static lzma_ret 158 reverse_seek(lzma_file_info_coder *coder, 159 size_t in_start, size_t *in_pos, size_t in_size) 160 { 161 // Check that there is enough data before the target position 162 // to contain at least Stream Header and Stream Footer. If there 163 // isn't, the file cannot be valid. 164 if (coder->file_target_pos < 2 * LZMA_STREAM_HEADER_SIZE) 165 return LZMA_DATA_ERROR; 166 167 coder->temp_pos = 0; 168 169 // The Stream Header at the very beginning of the file gets handled 170 // specially in SEQ_MAGIC_BYTES and thus we will never need to seek 171 // there. By not seeking to the first LZMA_STREAM_HEADER_SIZE bytes 172 // we avoid a useless external seek after SEQ_MAGIC_BYTES if the 173 // application uses an extremely small input buffer and the input 174 // file is very small. 175 if (coder->file_target_pos - LZMA_STREAM_HEADER_SIZE 176 < sizeof(coder->temp)) 177 coder->temp_size = (size_t)(coder->file_target_pos 178 - LZMA_STREAM_HEADER_SIZE); 179 else 180 coder->temp_size = sizeof(coder->temp); 181 182 // The above if-statements guarantee this. This is important because 183 // the Stream Header/Footer decoders assume that there's at least 184 // LZMA_STREAM_HEADER_SIZE bytes in coder->temp. 185 assert(coder->temp_size >= LZMA_STREAM_HEADER_SIZE); 186 187 if (seek_to_pos(coder, coder->file_target_pos - coder->temp_size, 188 in_start, in_pos, in_size)) 189 return LZMA_SEEK_NEEDED; 190 191 return LZMA_OK; 192 } 193 194 195 /// Gets the number of zero-bytes at the end of the buffer. 196 static size_t 197 get_padding_size(const uint8_t *buf, size_t buf_size) 198 { 199 size_t padding = 0; 200 while (buf_size > 0 && buf[--buf_size] == 0x00) 201 ++padding; 202 203 return padding; 204 } 205 206 207 /// With the Stream Header at the very beginning of the file, LZMA_FORMAT_ERROR 208 /// is used to tell the application that Magic Bytes didn't match. In other 209 /// Stream Header/Footer fields (in the middle/end of the file) it could be 210 /// a bit confusing to return LZMA_FORMAT_ERROR as we already know that there 211 /// is a valid Stream Header at the beginning of the file. For those cases 212 /// this function is used to convert LZMA_FORMAT_ERROR to LZMA_DATA_ERROR. 213 static lzma_ret 214 hide_format_error(lzma_ret ret) 215 { 216 if (ret == LZMA_FORMAT_ERROR) 217 ret = LZMA_DATA_ERROR; 218 219 return ret; 220 } 221 222 223 /// Calls the Index decoder and updates coder->index_remaining. 224 /// This is a separate function because the input can be either directly 225 /// from the application or from coder->temp. 226 static lzma_ret 227 decode_index(lzma_file_info_coder *coder, const lzma_allocator *allocator, 228 const uint8_t *restrict in, size_t *restrict in_pos, 229 size_t in_size, bool update_file_cur_pos) 230 { 231 const size_t in_start = *in_pos; 232 233 const lzma_ret ret = coder->index_decoder.code( 234 coder->index_decoder.coder, 235 allocator, in, in_pos, in_size, 236 NULL, NULL, 0, LZMA_RUN); 237 238 coder->index_remaining -= *in_pos - in_start; 239 240 if (update_file_cur_pos) 241 coder->file_cur_pos += *in_pos - in_start; 242 243 return ret; 244 } 245 246 247 static lzma_ret 248 file_info_decode(void *coder_ptr, const lzma_allocator *allocator, 249 const uint8_t *restrict in, size_t *restrict in_pos, 250 size_t in_size, 251 uint8_t *restrict out lzma_attribute((__unused__)), 252 size_t *restrict out_pos lzma_attribute((__unused__)), 253 size_t out_size lzma_attribute((__unused__)), 254 lzma_action action lzma_attribute((__unused__))) 255 { 256 lzma_file_info_coder *coder = coder_ptr; 257 const size_t in_start = *in_pos; 258 259 // If the caller provides input past the end of the file, trim 260 // the extra bytes from the buffer so that we won't read too far. 261 assert(coder->file_size >= coder->file_cur_pos); 262 if (coder->file_size - coder->file_cur_pos < in_size - in_start) 263 in_size = in_start 264 + (size_t)(coder->file_size - coder->file_cur_pos); 265 266 while (true) 267 switch (coder->sequence) { 268 case SEQ_MAGIC_BYTES: 269 // Decode the Stream Header at the beginning of the file 270 // first to check if the Magic Bytes match. The flags 271 // are stored in coder->first_header_flags so that we 272 // don't need to seek to it again. 273 // 274 // Check that the file is big enough to contain at least 275 // Stream Header. 276 if (coder->file_size < LZMA_STREAM_HEADER_SIZE) 277 return LZMA_FORMAT_ERROR; 278 279 // Read the Stream Header field into coder->temp. 280 if (fill_temp(coder, in, in_pos, in_size)) 281 return LZMA_OK; 282 283 // This is the only Stream Header/Footer decoding where we 284 // want to return LZMA_FORMAT_ERROR if the Magic Bytes don't 285 // match. Elsewhere it will be converted to LZMA_DATA_ERROR. 286 return_if_error(lzma_stream_header_decode( 287 &coder->first_header_flags, coder->temp)); 288 289 // Now that we know that the Magic Bytes match, check the 290 // file size. It's better to do this here after checking the 291 // Magic Bytes since this way we can give LZMA_FORMAT_ERROR 292 // instead of LZMA_DATA_ERROR when the Magic Bytes don't 293 // match in a file that is too big or isn't a multiple of 294 // four bytes. 295 if (coder->file_size > LZMA_VLI_MAX || (coder->file_size & 3)) 296 return LZMA_DATA_ERROR; 297 298 // Start looking for Stream Padding and Stream Footer 299 // at the end of the file. 300 coder->file_target_pos = coder->file_size; 301 FALLTHROUGH; 302 303 case SEQ_PADDING_SEEK: 304 coder->sequence = SEQ_PADDING_DECODE; 305 return_if_error(reverse_seek( 306 coder, in_start, in_pos, in_size)); 307 FALLTHROUGH; 308 309 case SEQ_PADDING_DECODE: { 310 // Copy to coder->temp first. This keeps the code simpler if 311 // the application only provides input a few bytes at a time. 312 if (fill_temp(coder, in, in_pos, in_size)) 313 return LZMA_OK; 314 315 // Scan the buffer backwards to get the size of the 316 // Stream Padding field (if any). 317 const size_t new_padding = get_padding_size( 318 coder->temp, coder->temp_size); 319 coder->stream_padding += new_padding; 320 321 // Set the target position to the beginning of Stream Padding 322 // that has been observed so far. If all Stream Padding has 323 // been seen, then the target position will be at the end 324 // of the Stream Footer field. 325 coder->file_target_pos -= new_padding; 326 327 if (new_padding == coder->temp_size) { 328 // The whole buffer was padding. Seek backwards in 329 // the file to get more input. 330 coder->sequence = SEQ_PADDING_SEEK; 331 break; 332 } 333 334 // Size of Stream Padding must be a multiple of 4 bytes. 335 if (coder->stream_padding & 3) 336 return LZMA_DATA_ERROR; 337 338 coder->sequence = SEQ_FOOTER; 339 340 // Calculate the amount of non-padding data in coder->temp. 341 coder->temp_size -= new_padding; 342 coder->temp_pos = coder->temp_size; 343 344 // We can avoid an external seek if the whole Stream Footer 345 // is already in coder->temp. In that case SEQ_FOOTER won't 346 // read more input and will find the Stream Footer from 347 // coder->temp[coder->temp_size - LZMA_STREAM_HEADER_SIZE]. 348 // 349 // Otherwise we will need to seek. The seeking is done so 350 // that Stream Footer will be at the end of coder->temp. 351 // This way it's likely that we also get a complete Index 352 // field into coder->temp without needing a separate seek 353 // for that (unless the Index field is big). 354 if (coder->temp_size < LZMA_STREAM_HEADER_SIZE) 355 return_if_error(reverse_seek( 356 coder, in_start, in_pos, in_size)); 357 358 FALLTHROUGH; 359 } 360 361 case SEQ_FOOTER: 362 // Copy the Stream Footer field into coder->temp. 363 // If Stream Footer was already available in coder->temp 364 // in SEQ_PADDING_DECODE, then this does nothing. 365 if (fill_temp(coder, in, in_pos, in_size)) 366 return LZMA_OK; 367 368 // Make coder->file_target_pos and coder->temp_size point 369 // to the beginning of Stream Footer and thus to the end 370 // of the Index field. coder->temp_pos will be updated 371 // a bit later. 372 coder->file_target_pos -= LZMA_STREAM_HEADER_SIZE; 373 coder->temp_size -= LZMA_STREAM_HEADER_SIZE; 374 375 // Decode Stream Footer. 376 return_if_error(hide_format_error(lzma_stream_footer_decode( 377 &coder->footer_flags, 378 coder->temp + coder->temp_size))); 379 380 // Check that we won't seek past the beginning of the file. 381 // 382 // LZMA_STREAM_HEADER_SIZE is added because there must be 383 // space for Stream Header too even though we won't seek 384 // there before decoding the Index field. 385 // 386 // There's no risk of integer overflow here because 387 // Backward Size cannot be greater than 2^34. 388 if (coder->file_target_pos < coder->footer_flags.backward_size 389 + LZMA_STREAM_HEADER_SIZE) 390 return LZMA_DATA_ERROR; 391 392 // Set the target position to the beginning of the Index field. 393 coder->file_target_pos -= coder->footer_flags.backward_size; 394 coder->sequence = SEQ_INDEX_INIT; 395 396 // We can avoid an external seek if the whole Index field is 397 // already available in coder->temp. 398 if (coder->temp_size >= coder->footer_flags.backward_size) { 399 // Set coder->temp_pos to point to the beginning 400 // of the Index. 401 coder->temp_pos = coder->temp_size 402 - coder->footer_flags.backward_size; 403 } else { 404 // These are set to zero to indicate that there's no 405 // useful data (Index or anything else) in coder->temp. 406 coder->temp_pos = 0; 407 coder->temp_size = 0; 408 409 // Seek to the beginning of the Index field. 410 if (seek_to_pos(coder, coder->file_target_pos, 411 in_start, in_pos, in_size)) 412 return LZMA_SEEK_NEEDED; 413 } 414 415 FALLTHROUGH; 416 417 case SEQ_INDEX_INIT: { 418 // Calculate the amount of memory already used by the earlier 419 // Indexes so that we know how big memory limit to pass to 420 // the Index decoder. 421 // 422 // NOTE: When there are multiple Streams, the separate 423 // lzma_index structures can use more RAM (as measured by 424 // lzma_index_memused()) than the final combined lzma_index. 425 // Thus memlimit may need to be slightly higher than the final 426 // calculated memory usage will be. This is perhaps a bit 427 // confusing to the application, but I think it shouldn't 428 // cause problems in practice. 429 uint64_t memused = 0; 430 if (coder->combined_index != NULL) { 431 memused = lzma_index_memused(coder->combined_index); 432 assert(memused <= coder->memlimit); 433 if (memused > coder->memlimit) // Extra sanity check 434 return LZMA_PROG_ERROR; 435 } 436 437 // Initialize the Index decoder. 438 return_if_error(lzma_index_decoder_init( 439 &coder->index_decoder, allocator, 440 &coder->this_index, 441 coder->memlimit - memused)); 442 443 coder->index_remaining = coder->footer_flags.backward_size; 444 coder->sequence = SEQ_INDEX_DECODE; 445 FALLTHROUGH; 446 } 447 448 case SEQ_INDEX_DECODE: { 449 // Decode (a part of) the Index. If the whole Index is already 450 // in coder->temp, read it from there. Otherwise read from 451 // in[*in_pos] onwards. Note that index_decode() updates 452 // coder->index_remaining and optionally coder->file_cur_pos. 453 lzma_ret ret; 454 if (coder->temp_size != 0) { 455 assert(coder->temp_size - coder->temp_pos 456 == coder->index_remaining); 457 ret = decode_index(coder, allocator, coder->temp, 458 &coder->temp_pos, coder->temp_size, 459 false); 460 } else { 461 // Don't give the decoder more input than the known 462 // remaining size of the Index field. 463 size_t in_stop = in_size; 464 if (in_size - *in_pos > coder->index_remaining) 465 in_stop = *in_pos 466 + (size_t)(coder->index_remaining); 467 468 ret = decode_index(coder, allocator, 469 in, in_pos, in_stop, true); 470 } 471 472 switch (ret) { 473 case LZMA_OK: 474 // If the Index docoder asks for more input when we 475 // have already given it as much input as Backward Size 476 // indicated, the file is invalid. 477 if (coder->index_remaining == 0) 478 return LZMA_DATA_ERROR; 479 480 // We cannot get here if we were reading Index from 481 // coder->temp because when reading from coder->temp 482 // we give the Index decoder exactly 483 // coder->index_remaining bytes of input. 484 assert(coder->temp_size == 0); 485 486 return LZMA_OK; 487 488 case LZMA_STREAM_END: 489 // If the decoding seems to be successful, check also 490 // that the Index decoder consumed as much input as 491 // indicated by the Backward Size field. 492 if (coder->index_remaining != 0) 493 return LZMA_DATA_ERROR; 494 495 break; 496 497 default: 498 return ret; 499 } 500 501 // Calculate how much the Index tells us to seek backwards 502 // (relative to the beginning of the Index): Total size of 503 // all Blocks plus the size of the Stream Header field. 504 // No integer overflow here because lzma_index_total_size() 505 // cannot return a value greater than LZMA_VLI_MAX. 506 const uint64_t seek_amount 507 = lzma_index_total_size(coder->this_index) 508 + LZMA_STREAM_HEADER_SIZE; 509 510 // Check that Index is sane in sense that seek_amount won't 511 // make us seek past the beginning of the file when locating 512 // the Stream Header. 513 // 514 // coder->file_target_pos still points to the beginning of 515 // the Index field. 516 if (coder->file_target_pos < seek_amount) 517 return LZMA_DATA_ERROR; 518 519 // Set the target to the beginning of Stream Header. 520 coder->file_target_pos -= seek_amount; 521 522 if (coder->file_target_pos == 0) { 523 // We would seek to the beginning of the file, but 524 // since we already decoded that Stream Header in 525 // SEQ_MAGIC_BYTES, we can use the cached value from 526 // coder->first_header_flags to avoid the seek. 527 coder->header_flags = coder->first_header_flags; 528 coder->sequence = SEQ_HEADER_COMPARE; 529 break; 530 } 531 532 coder->sequence = SEQ_HEADER_DECODE; 533 534 // Make coder->file_target_pos point to the end of 535 // the Stream Header field. 536 coder->file_target_pos += LZMA_STREAM_HEADER_SIZE; 537 538 // If coder->temp_size is non-zero, it points to the end 539 // of the Index field. Then the beginning of the Index 540 // field is at coder->temp[coder->temp_size 541 // - coder->footer_flags.backward_size]. 542 assert(coder->temp_size == 0 || coder->temp_size 543 >= coder->footer_flags.backward_size); 544 545 // If coder->temp contained the whole Index, see if it has 546 // enough data to contain also the Stream Header. If so, 547 // we avoid an external seek. 548 // 549 // NOTE: This can happen only with small .xz files and only 550 // for the non-first Stream as the Stream Flags of the first 551 // Stream are cached and already handled a few lines above. 552 // So this isn't as useful as the other seek-avoidance cases. 553 if (coder->temp_size != 0 && coder->temp_size 554 - coder->footer_flags.backward_size 555 >= seek_amount) { 556 // Make temp_pos and temp_size point to the *end* of 557 // Stream Header so that SEQ_HEADER_DECODE will find 558 // the start of Stream Header from coder->temp[ 559 // coder->temp_size - LZMA_STREAM_HEADER_SIZE]. 560 coder->temp_pos = coder->temp_size 561 - coder->footer_flags.backward_size 562 - seek_amount 563 + LZMA_STREAM_HEADER_SIZE; 564 coder->temp_size = coder->temp_pos; 565 } else { 566 // Seek so that Stream Header will be at the end of 567 // coder->temp. With typical multi-Stream files we 568 // will usually also get the Stream Footer and Index 569 // of the *previous* Stream in coder->temp and thus 570 // won't need a separate seek for them. 571 return_if_error(reverse_seek(coder, 572 in_start, in_pos, in_size)); 573 } 574 575 FALLTHROUGH; 576 } 577 578 case SEQ_HEADER_DECODE: 579 // Copy the Stream Header field into coder->temp. 580 // If Stream Header was already available in coder->temp 581 // in SEQ_INDEX_DECODE, then this does nothing. 582 if (fill_temp(coder, in, in_pos, in_size)) 583 return LZMA_OK; 584 585 // Make all these point to the beginning of Stream Header. 586 coder->file_target_pos -= LZMA_STREAM_HEADER_SIZE; 587 coder->temp_size -= LZMA_STREAM_HEADER_SIZE; 588 coder->temp_pos = coder->temp_size; 589 590 // Decode the Stream Header. 591 return_if_error(hide_format_error(lzma_stream_header_decode( 592 &coder->header_flags, 593 coder->temp + coder->temp_size))); 594 595 coder->sequence = SEQ_HEADER_COMPARE; 596 FALLTHROUGH; 597 598 case SEQ_HEADER_COMPARE: 599 // Compare Stream Header against Stream Footer. They must 600 // match. 601 return_if_error(lzma_stream_flags_compare( 602 &coder->header_flags, &coder->footer_flags)); 603 604 // Store the decoded Stream Flags into the Index. Use the 605 // Footer Flags because it contains Backward Size, although 606 // it shouldn't matter in practice. 607 if (lzma_index_stream_flags(coder->this_index, 608 &coder->footer_flags) != LZMA_OK) 609 return LZMA_PROG_ERROR; 610 611 // Store also the size of the Stream Padding field. It is 612 // needed to calculate the offsets of the Streams correctly. 613 if (lzma_index_stream_padding(coder->this_index, 614 coder->stream_padding) != LZMA_OK) 615 return LZMA_PROG_ERROR; 616 617 // Reset it so that it's ready for the next Stream. 618 coder->stream_padding = 0; 619 620 // Append the earlier decoded Indexes after this_index. 621 if (coder->combined_index != NULL) 622 return_if_error(lzma_index_cat(coder->this_index, 623 coder->combined_index, allocator)); 624 625 coder->combined_index = coder->this_index; 626 coder->this_index = NULL; 627 628 // If the whole file was decoded, tell the caller that we 629 // are finished. 630 if (coder->file_target_pos == 0) { 631 // The combined index must indicate the same file 632 // size as was told to us at initialization. 633 assert(lzma_index_file_size(coder->combined_index) 634 == coder->file_size); 635 636 // Make the combined index available to 637 // the application. 638 *coder->dest_index = coder->combined_index; 639 coder->combined_index = NULL; 640 641 // Mark the input buffer as used since we may have 642 // done internal seeking and thus don't know how 643 // many input bytes were actually used. This way 644 // lzma_stream.total_in gets a slightly better 645 // estimate of the amount of input used. 646 *in_pos = in_size; 647 return LZMA_STREAM_END; 648 } 649 650 // We didn't hit the beginning of the file yet, so continue 651 // reading backwards in the file. If we have unprocessed 652 // data in coder->temp, use it before requesting more data 653 // from the application. 654 // 655 // coder->file_target_pos, coder->temp_size, and 656 // coder->temp_pos all point to the beginning of Stream Header 657 // and thus the end of the previous Stream in the file. 658 coder->sequence = coder->temp_size > 0 659 ? SEQ_PADDING_DECODE : SEQ_PADDING_SEEK; 660 break; 661 662 default: 663 assert(0); 664 return LZMA_PROG_ERROR; 665 } 666 } 667 668 669 static lzma_ret 670 file_info_decoder_memconfig(void *coder_ptr, uint64_t *memusage, 671 uint64_t *old_memlimit, uint64_t new_memlimit) 672 { 673 lzma_file_info_coder *coder = coder_ptr; 674 675 // The memory usage calculation comes from three things: 676 // 677 // (1) The Indexes that have already been decoded and processed into 678 // coder->combined_index. 679 // 680 // (2) The latest Index in coder->this_index that has been decoded but 681 // not yet put into coder->combined_index. 682 // 683 // (3) The latest Index that we have started decoding but haven't 684 // finished and thus isn't available in coder->this_index yet. 685 // Memory usage and limit information needs to be communicated 686 // from/to coder->index_decoder. 687 // 688 // Care has to be taken to not do both (2) and (3) when calculating 689 // the memory usage. 690 uint64_t combined_index_memusage = 0; 691 uint64_t this_index_memusage = 0; 692 693 // (1) If we have already successfully decoded one or more Indexes, 694 // get their memory usage. 695 if (coder->combined_index != NULL) 696 combined_index_memusage = lzma_index_memused( 697 coder->combined_index); 698 699 // Choose between (2), (3), or neither. 700 if (coder->this_index != NULL) { 701 // (2) The latest Index is available. Use its memory usage. 702 this_index_memusage = lzma_index_memused(coder->this_index); 703 704 } else if (coder->sequence == SEQ_INDEX_DECODE) { 705 // (3) The Index decoder is activate and hasn't yet stored 706 // the new index in coder->this_index. Get the memory usage 707 // information from the Index decoder. 708 // 709 // NOTE: If the Index decoder doesn't yet know how much memory 710 // it will eventually need, it will return a tiny value here. 711 uint64_t dummy; 712 if (coder->index_decoder.memconfig(coder->index_decoder.coder, 713 &this_index_memusage, &dummy, 0) 714 != LZMA_OK) { 715 assert(0); 716 return LZMA_PROG_ERROR; 717 } 718 } 719 720 // Now we know the total memory usage/requirement. If we had neither 721 // old Indexes nor a new Index, this will be zero which isn't 722 // acceptable as lzma_memusage() has to return non-zero on success 723 // and even with an empty .xz file we will end up with a lzma_index 724 // that takes some memory. 725 *memusage = combined_index_memusage + this_index_memusage; 726 if (*memusage == 0) 727 *memusage = lzma_index_memusage(1, 0); 728 729 *old_memlimit = coder->memlimit; 730 731 // If requested, set a new memory usage limit. 732 if (new_memlimit != 0) { 733 if (new_memlimit < *memusage) 734 return LZMA_MEMLIMIT_ERROR; 735 736 // In the condition (3) we need to tell the Index decoder 737 // its new memory usage limit. 738 if (coder->this_index == NULL 739 && coder->sequence == SEQ_INDEX_DECODE) { 740 const uint64_t idec_new_memlimit = new_memlimit 741 - combined_index_memusage; 742 743 assert(this_index_memusage > 0); 744 assert(idec_new_memlimit > 0); 745 746 uint64_t dummy1; 747 uint64_t dummy2; 748 749 if (coder->index_decoder.memconfig( 750 coder->index_decoder.coder, 751 &dummy1, &dummy2, idec_new_memlimit) 752 != LZMA_OK) { 753 assert(0); 754 return LZMA_PROG_ERROR; 755 } 756 } 757 758 coder->memlimit = new_memlimit; 759 } 760 761 return LZMA_OK; 762 } 763 764 765 static void 766 file_info_decoder_end(void *coder_ptr, const lzma_allocator *allocator) 767 { 768 lzma_file_info_coder *coder = coder_ptr; 769 770 lzma_next_end(&coder->index_decoder, allocator); 771 lzma_index_end(coder->this_index, allocator); 772 lzma_index_end(coder->combined_index, allocator); 773 774 lzma_free(coder, allocator); 775 return; 776 } 777 778 779 static lzma_ret 780 lzma_file_info_decoder_init(lzma_next_coder *next, 781 const lzma_allocator *allocator, uint64_t *seek_pos, 782 lzma_index **dest_index, 783 uint64_t memlimit, uint64_t file_size) 784 { 785 lzma_next_coder_init(&lzma_file_info_decoder_init, next, allocator); 786 787 if (dest_index == NULL) 788 return LZMA_PROG_ERROR; 789 790 lzma_file_info_coder *coder = next->coder; 791 if (coder == NULL) { 792 coder = lzma_alloc(sizeof(lzma_file_info_coder), allocator); 793 if (coder == NULL) 794 return LZMA_MEM_ERROR; 795 796 next->coder = coder; 797 next->code = &file_info_decode; 798 next->end = &file_info_decoder_end; 799 next->memconfig = &file_info_decoder_memconfig; 800 801 coder->index_decoder = LZMA_NEXT_CODER_INIT; 802 coder->this_index = NULL; 803 coder->combined_index = NULL; 804 } 805 806 coder->sequence = SEQ_MAGIC_BYTES; 807 coder->file_cur_pos = 0; 808 coder->file_target_pos = 0; 809 coder->file_size = file_size; 810 811 lzma_index_end(coder->this_index, allocator); 812 coder->this_index = NULL; 813 814 lzma_index_end(coder->combined_index, allocator); 815 coder->combined_index = NULL; 816 817 coder->stream_padding = 0; 818 819 coder->dest_index = dest_index; 820 coder->external_seek_pos = seek_pos; 821 822 // If memlimit is 0, make it 1 to ensure that lzma_memlimit_get() 823 // won't return 0 (which would indicate an error). 824 coder->memlimit = my_max(1, memlimit); 825 826 // Prepare these for reading the first Stream Header into coder->temp. 827 coder->temp_pos = 0; 828 coder->temp_size = LZMA_STREAM_HEADER_SIZE; 829 830 return LZMA_OK; 831 } 832 833 834 extern LZMA_API(lzma_ret) 835 lzma_file_info_decoder(lzma_stream *strm, lzma_index **dest_index, 836 uint64_t memlimit, uint64_t file_size) 837 { 838 lzma_next_strm_init(lzma_file_info_decoder_init, strm, &strm->seek_pos, 839 dest_index, memlimit, file_size); 840 841 // We allow LZMA_FINISH in addition to LZMA_RUN for convenience. 842 // lzma_code() is able to handle the LZMA_FINISH + LZMA_SEEK_NEEDED 843 // combination in a sane way. Applications still need to be careful 844 // if they use LZMA_FINISH so that they remember to reset it back 845 // to LZMA_RUN after seeking if needed. 846 strm->internal->supported_actions[LZMA_RUN] = true; 847 strm->internal->supported_actions[LZMA_FINISH] = true; 848 849 return LZMA_OK; 850 } 851