1 /** 2 * \file lzma/index.h 3 * \brief Handling of .xz Index and related information 4 */ 5 6 /* 7 * Author: Lasse Collin 8 * 9 * This file has been put into the public domain. 10 * You can do whatever you want with this file. 11 * 12 * See ../lzma.h for information about liblzma as a whole. 13 */ 14 15 #ifndef LZMA_H_INTERNAL 16 # error Never include this file directly. Use <lzma.h> instead. 17 #endif 18 19 20 /** 21 * \brief Opaque data type to hold the Index(es) and other information 22 * 23 * lzma_index often holds just one .xz Index and possibly the Stream Flags 24 * of the same Stream and size of the Stream Padding field. However, 25 * multiple lzma_indexes can be concatenated with lzma_index_cat() and then 26 * there may be information about multiple Streams in the same lzma_index. 27 * 28 * Notes about thread safety: Only one thread may modify lzma_index at 29 * a time. All functions that take non-const pointer to lzma_index 30 * modify it. As long as no thread is modifying the lzma_index, getting 31 * information from the same lzma_index can be done from multiple threads 32 * at the same time with functions that take a const pointer to 33 * lzma_index or use lzma_index_iter. The same iterator must be used 34 * only by one thread at a time, of course, but there can be as many 35 * iterators for the same lzma_index as needed. 36 */ 37 typedef struct lzma_index_s lzma_index; 38 39 40 /** 41 * \brief Iterator to get information about Blocks and Streams 42 */ 43 typedef struct { 44 struct { 45 /** 46 * \brief Pointer to Stream Flags 47 * 48 * This is NULL if Stream Flags have not been set for 49 * this Stream with lzma_index_stream_flags(). 50 */ 51 const lzma_stream_flags *flags; 52 53 const void *reserved_ptr1; 54 const void *reserved_ptr2; 55 const void *reserved_ptr3; 56 57 /** 58 * \brief Stream number in the lzma_index 59 * 60 * The first Stream is 1. 61 */ 62 lzma_vli number; 63 64 /** 65 * \brief Number of Blocks in the Stream 66 * 67 * If this is zero, the block structure below has 68 * undefined values. 69 */ 70 lzma_vli block_count; 71 72 /** 73 * \brief Compressed start offset of this Stream 74 * 75 * The offset is relative to the beginning of the lzma_index 76 * (i.e. usually the beginning of the .xz file). 77 */ 78 lzma_vli compressed_offset; 79 80 /** 81 * \brief Uncompressed start offset of this Stream 82 * 83 * The offset is relative to the beginning of the lzma_index 84 * (i.e. usually the beginning of the .xz file). 85 */ 86 lzma_vli uncompressed_offset; 87 88 /** 89 * \brief Compressed size of this Stream 90 * 91 * This includes all headers except the possible 92 * Stream Padding after this Stream. 93 */ 94 lzma_vli compressed_size; 95 96 /** 97 * \brief Uncompressed size of this Stream 98 */ 99 lzma_vli uncompressed_size; 100 101 /** 102 * \brief Size of Stream Padding after this Stream 103 * 104 * If it hasn't been set with lzma_index_stream_padding(), 105 * this defaults to zero. Stream Padding is always 106 * a multiple of four bytes. 107 */ 108 lzma_vli padding; 109 110 lzma_vli reserved_vli1; 111 lzma_vli reserved_vli2; 112 lzma_vli reserved_vli3; 113 lzma_vli reserved_vli4; 114 } stream; 115 116 struct { 117 /** 118 * \brief Block number in the file 119 * 120 * The first Block is 1. 121 */ 122 lzma_vli number_in_file; 123 124 /** 125 * \brief Compressed start offset of this Block 126 * 127 * This offset is relative to the beginning of the 128 * lzma_index (i.e. usually the beginning of the .xz file). 129 * Normally this is where you should seek in the .xz file 130 * to start decompressing this Block. 131 */ 132 lzma_vli compressed_file_offset; 133 134 /** 135 * \brief Uncompressed start offset of this Block 136 * 137 * This offset is relative to the beginning of the lzma_index 138 * (i.e. usually the beginning of the .xz file). 139 * 140 * When doing random-access reading, it is possible that 141 * the target offset is not exactly at Block boundary. One 142 * will need to compare the target offset against 143 * uncompressed_file_offset or uncompressed_stream_offset, 144 * and possibly decode and throw away some amount of data 145 * before reaching the target offset. 146 */ 147 lzma_vli uncompressed_file_offset; 148 149 /** 150 * \brief Block number in this Stream 151 * 152 * The first Block is 1. 153 */ 154 lzma_vli number_in_stream; 155 156 /** 157 * \brief Compressed start offset of this Block 158 * 159 * This offset is relative to the beginning of the Stream 160 * containing this Block. 161 */ 162 lzma_vli compressed_stream_offset; 163 164 /** 165 * \brief Uncompressed start offset of this Block 166 * 167 * This offset is relative to the beginning of the Stream 168 * containing this Block. 169 */ 170 lzma_vli uncompressed_stream_offset; 171 172 /** 173 * \brief Uncompressed size of this Block 174 * 175 * You should pass this to the Block decoder if you will 176 * decode this Block. It will allow the Block decoder to 177 * validate the uncompressed size. 178 */ 179 lzma_vli uncompressed_size; 180 181 /** 182 * \brief Unpadded size of this Block 183 * 184 * You should pass this to the Block decoder if you will 185 * decode this Block. It will allow the Block decoder to 186 * validate the unpadded size. 187 */ 188 lzma_vli unpadded_size; 189 190 /** 191 * \brief Total compressed size 192 * 193 * This includes all headers and padding in this Block. 194 * This is useful if you need to know how many bytes 195 * the Block decoder will actually read. 196 */ 197 lzma_vli total_size; 198 199 lzma_vli reserved_vli1; 200 lzma_vli reserved_vli2; 201 lzma_vli reserved_vli3; 202 lzma_vli reserved_vli4; 203 204 const void *reserved_ptr1; 205 const void *reserved_ptr2; 206 const void *reserved_ptr3; 207 const void *reserved_ptr4; 208 } block; 209 210 /* 211 * Internal data which is used to store the state of the iterator. 212 * The exact format may vary between liblzma versions, so don't 213 * touch these in any way. 214 */ 215 union { 216 const void *p; 217 size_t s; 218 lzma_vli v; 219 } internal[6]; 220 } lzma_index_iter; 221 222 223 /** 224 * \brief Operation mode for lzma_index_iter_next() 225 */ 226 typedef enum { 227 LZMA_INDEX_ITER_ANY = 0, 228 /**< 229 * \brief Get the next Block or Stream 230 * 231 * Go to the next Block if the current Stream has at least 232 * one Block left. Otherwise go to the next Stream even if 233 * it has no Blocks. If the Stream has no Blocks 234 * (lzma_index_iter.stream.block_count == 0), 235 * lzma_index_iter.block will have undefined values. 236 */ 237 238 LZMA_INDEX_ITER_STREAM = 1, 239 /**< 240 * \brief Get the next Stream 241 * 242 * Go to the next Stream even if the current Stream has 243 * unread Blocks left. If the next Stream has at least one 244 * Block, the iterator will point to the first Block. 245 * If there are no Blocks, lzma_index_iter.block will have 246 * undefined values. 247 */ 248 249 LZMA_INDEX_ITER_BLOCK = 2, 250 /**< 251 * \brief Get the next Block 252 * 253 * Go to the next Block if the current Stream has at least 254 * one Block left. If the current Stream has no Blocks left, 255 * the next Stream with at least one Block is located and 256 * the iterator will be made to point to the first Block of 257 * that Stream. 258 */ 259 260 LZMA_INDEX_ITER_NONEMPTY_BLOCK = 3 261 /**< 262 * \brief Get the next non-empty Block 263 * 264 * This is like LZMA_INDEX_ITER_BLOCK except that it will 265 * skip Blocks whose Uncompressed Size is zero. 266 */ 267 268 } lzma_index_iter_mode; 269 270 271 /** 272 * \brief Calculate memory usage of lzma_index 273 * 274 * On disk, the size of the Index field depends on both the number of Records 275 * stored and how big values the Records store (due to variable-length integer 276 * encoding). When the Index is kept in lzma_index structure, the memory usage 277 * depends only on the number of Records/Blocks stored in the Index(es), and 278 * in case of concatenated lzma_indexes, the number of Streams. The size in 279 * RAM is almost always significantly bigger than in the encoded form on disk. 280 * 281 * This function calculates an approximate amount of memory needed hold 282 * the given number of Streams and Blocks in lzma_index structure. This 283 * value may vary between CPU architectures and also between liblzma versions 284 * if the internal implementation is modified. 285 */ 286 extern LZMA_API(uint64_t) lzma_index_memusage( 287 lzma_vli streams, lzma_vli blocks) lzma_nothrow; 288 289 290 /** 291 * \brief Calculate the memory usage of an existing lzma_index 292 * 293 * This is a shorthand for lzma_index_memusage(lzma_index_stream_count(i), 294 * lzma_index_block_count(i)). 295 */ 296 extern LZMA_API(uint64_t) lzma_index_memused(const lzma_index *i) 297 lzma_nothrow; 298 299 300 /** 301 * \brief Allocate and initialize a new lzma_index structure 302 * 303 * \return On success, a pointer to an empty initialized lzma_index is 304 * returned. If allocation fails, NULL is returned. 305 */ 306 extern LZMA_API(lzma_index *) lzma_index_init(const lzma_allocator *allocator) 307 lzma_nothrow; 308 309 310 /** 311 * \brief Deallocate lzma_index 312 * 313 * If i is NULL, this does nothing. 314 */ 315 extern LZMA_API(void) lzma_index_end( 316 lzma_index *i, const lzma_allocator *allocator) lzma_nothrow; 317 318 319 /** 320 * \brief Add a new Block to lzma_index 321 * 322 * \param i Pointer to a lzma_index structure 323 * \param allocator Pointer to lzma_allocator, or NULL to 324 * use malloc() 325 * \param unpadded_size Unpadded Size of a Block. This can be 326 * calculated with lzma_block_unpadded_size() 327 * after encoding or decoding the Block. 328 * \param uncompressed_size Uncompressed Size of a Block. This can be 329 * taken directly from lzma_block structure 330 * after encoding or decoding the Block. 331 * 332 * Appending a new Block does not invalidate iterators. For example, 333 * if an iterator was pointing to the end of the lzma_index, after 334 * lzma_index_append() it is possible to read the next Block with 335 * an existing iterator. 336 * 337 * \return - LZMA_OK 338 * - LZMA_MEM_ERROR 339 * - LZMA_DATA_ERROR: Compressed or uncompressed size of the 340 * Stream or size of the Index field would grow too big. 341 * - LZMA_PROG_ERROR 342 */ 343 extern LZMA_API(lzma_ret) lzma_index_append( 344 lzma_index *i, const lzma_allocator *allocator, 345 lzma_vli unpadded_size, lzma_vli uncompressed_size) 346 lzma_nothrow lzma_attr_warn_unused_result; 347 348 349 /** 350 * \brief Set the Stream Flags 351 * 352 * Set the Stream Flags of the last (and typically the only) Stream 353 * in lzma_index. This can be useful when reading information from the 354 * lzma_index, because to decode Blocks, knowing the integrity check type 355 * is needed. 356 * 357 * The given Stream Flags are copied into internal preallocated structure 358 * in the lzma_index, thus the caller doesn't need to keep the *stream_flags 359 * available after calling this function. 360 * 361 * \return - LZMA_OK 362 * - LZMA_OPTIONS_ERROR: Unsupported stream_flags->version. 363 * - LZMA_PROG_ERROR 364 */ 365 extern LZMA_API(lzma_ret) lzma_index_stream_flags( 366 lzma_index *i, const lzma_stream_flags *stream_flags) 367 lzma_nothrow lzma_attr_warn_unused_result; 368 369 370 /** 371 * \brief Get the types of integrity Checks 372 * 373 * If lzma_index_stream_flags() is used to set the Stream Flags for 374 * every Stream, lzma_index_checks() can be used to get a bitmask to 375 * indicate which Check types have been used. It can be useful e.g. if 376 * showing the Check types to the user. 377 * 378 * The bitmask is 1 << check_id, e.g. CRC32 is 1 << 1 and SHA-256 is 1 << 10. 379 */ 380 extern LZMA_API(uint32_t) lzma_index_checks(const lzma_index *i) 381 lzma_nothrow lzma_attr_pure; 382 383 384 /** 385 * \brief Set the amount of Stream Padding 386 * 387 * Set the amount of Stream Padding of the last (and typically the only) 388 * Stream in the lzma_index. This is needed when planning to do random-access 389 * reading within multiple concatenated Streams. 390 * 391 * By default, the amount of Stream Padding is assumed to be zero bytes. 392 * 393 * \return - LZMA_OK 394 * - LZMA_DATA_ERROR: The file size would grow too big. 395 * - LZMA_PROG_ERROR 396 */ 397 extern LZMA_API(lzma_ret) lzma_index_stream_padding( 398 lzma_index *i, lzma_vli stream_padding) 399 lzma_nothrow lzma_attr_warn_unused_result; 400 401 402 /** 403 * \brief Get the number of Streams 404 */ 405 extern LZMA_API(lzma_vli) lzma_index_stream_count(const lzma_index *i) 406 lzma_nothrow lzma_attr_pure; 407 408 409 /** 410 * \brief Get the number of Blocks 411 * 412 * This returns the total number of Blocks in lzma_index. To get number 413 * of Blocks in individual Streams, use lzma_index_iter. 414 */ 415 extern LZMA_API(lzma_vli) lzma_index_block_count(const lzma_index *i) 416 lzma_nothrow lzma_attr_pure; 417 418 419 /** 420 * \brief Get the size of the Index field as bytes 421 * 422 * This is needed to verify the Backward Size field in the Stream Footer. 423 */ 424 extern LZMA_API(lzma_vli) lzma_index_size(const lzma_index *i) 425 lzma_nothrow lzma_attr_pure; 426 427 428 /** 429 * \brief Get the total size of the Stream 430 * 431 * If multiple lzma_indexes have been combined, this works as if the Blocks 432 * were in a single Stream. This is useful if you are going to combine 433 * Blocks from multiple Streams into a single new Stream. 434 */ 435 extern LZMA_API(lzma_vli) lzma_index_stream_size(const lzma_index *i) 436 lzma_nothrow lzma_attr_pure; 437 438 439 /** 440 * \brief Get the total size of the Blocks 441 * 442 * This doesn't include the Stream Header, Stream Footer, Stream Padding, 443 * or Index fields. 444 */ 445 extern LZMA_API(lzma_vli) lzma_index_total_size(const lzma_index *i) 446 lzma_nothrow lzma_attr_pure; 447 448 449 /** 450 * \brief Get the total size of the file 451 * 452 * When no lzma_indexes have been combined with lzma_index_cat() and there is 453 * no Stream Padding, this function is identical to lzma_index_stream_size(). 454 * If multiple lzma_indexes have been combined, this includes also the headers 455 * of each separate Stream and the possible Stream Padding fields. 456 */ 457 extern LZMA_API(lzma_vli) lzma_index_file_size(const lzma_index *i) 458 lzma_nothrow lzma_attr_pure; 459 460 461 /** 462 * \brief Get the uncompressed size of the file 463 */ 464 extern LZMA_API(lzma_vli) lzma_index_uncompressed_size(const lzma_index *i) 465 lzma_nothrow lzma_attr_pure; 466 467 468 /** 469 * \brief Initialize an iterator 470 * 471 * \param iter Pointer to a lzma_index_iter structure 472 * \param i lzma_index to which the iterator will be associated 473 * 474 * This function associates the iterator with the given lzma_index, and calls 475 * lzma_index_iter_rewind() on the iterator. 476 * 477 * This function doesn't allocate any memory, thus there is no 478 * lzma_index_iter_end(). The iterator is valid as long as the 479 * associated lzma_index is valid, that is, until lzma_index_end() or 480 * using it as source in lzma_index_cat(). Specifically, lzma_index doesn't 481 * become invalid if new Blocks are added to it with lzma_index_append() or 482 * if it is used as the destination in lzma_index_cat(). 483 * 484 * It is safe to make copies of an initialized lzma_index_iter, for example, 485 * to easily restart reading at some particular position. 486 */ 487 extern LZMA_API(void) lzma_index_iter_init( 488 lzma_index_iter *iter, const lzma_index *i) lzma_nothrow; 489 490 491 /** 492 * \brief Rewind the iterator 493 * 494 * Rewind the iterator so that next call to lzma_index_iter_next() will 495 * return the first Block or Stream. 496 */ 497 extern LZMA_API(void) lzma_index_iter_rewind(lzma_index_iter *iter) 498 lzma_nothrow; 499 500 501 /** 502 * \brief Get the next Block or Stream 503 * 504 * \param iter Iterator initialized with lzma_index_iter_init() 505 * \param mode Specify what kind of information the caller wants 506 * to get. See lzma_index_iter_mode for details. 507 * 508 * \return If next Block or Stream matching the mode was found, *iter 509 * is updated and this function returns false. If no Block or 510 * Stream matching the mode is found, *iter is not modified 511 * and this function returns true. If mode is set to an unknown 512 * value, *iter is not modified and this function returns true. 513 */ 514 extern LZMA_API(lzma_bool) lzma_index_iter_next( 515 lzma_index_iter *iter, lzma_index_iter_mode mode) 516 lzma_nothrow lzma_attr_warn_unused_result; 517 518 519 /** 520 * \brief Locate a Block 521 * 522 * If it is possible to seek in the .xz file, it is possible to parse 523 * the Index field(s) and use lzma_index_iter_locate() to do random-access 524 * reading with granularity of Block size. 525 * 526 * \param iter Iterator that was earlier initialized with 527 * lzma_index_iter_init(). 528 * \param target Uncompressed target offset which the caller would 529 * like to locate from the Stream 530 * 531 * If the target is smaller than the uncompressed size of the Stream (can be 532 * checked with lzma_index_uncompressed_size()): 533 * - Information about the Stream and Block containing the requested 534 * uncompressed offset is stored into *iter. 535 * - Internal state of the iterator is adjusted so that 536 * lzma_index_iter_next() can be used to read subsequent Blocks or Streams. 537 * - This function returns false. 538 * 539 * If target is greater than the uncompressed size of the Stream, *iter 540 * is not modified, and this function returns true. 541 */ 542 extern LZMA_API(lzma_bool) lzma_index_iter_locate( 543 lzma_index_iter *iter, lzma_vli target) lzma_nothrow; 544 545 546 /** 547 * \brief Concatenate lzma_indexes 548 * 549 * Concatenating lzma_indexes is useful when doing random-access reading in 550 * multi-Stream .xz file, or when combining multiple Streams into single 551 * Stream. 552 * 553 * \param dest lzma_index after which src is appended 554 * \param src lzma_index to be appended after dest. If this 555 * function succeeds, the memory allocated for src 556 * is freed or moved to be part of dest, and all 557 * iterators pointing to src will become invalid. 558 * \param allocator Custom memory allocator; can be NULL to use 559 * malloc() and free(). 560 * 561 * \return - LZMA_OK: lzma_indexes were concatenated successfully. 562 * src is now a dangling pointer. 563 * - LZMA_DATA_ERROR: *dest would grow too big. 564 * - LZMA_MEM_ERROR 565 * - LZMA_PROG_ERROR 566 */ 567 extern LZMA_API(lzma_ret) lzma_index_cat(lzma_index *dest, lzma_index *src, 568 const lzma_allocator *allocator) 569 lzma_nothrow lzma_attr_warn_unused_result; 570 571 572 /** 573 * \brief Duplicate lzma_index 574 * 575 * \return A copy of the lzma_index, or NULL if memory allocation failed. 576 */ 577 extern LZMA_API(lzma_index *) lzma_index_dup( 578 const lzma_index *i, const lzma_allocator *allocator) 579 lzma_nothrow lzma_attr_warn_unused_result; 580 581 582 /** 583 * \brief Initialize .xz Index encoder 584 * 585 * \param strm Pointer to properly prepared lzma_stream 586 * \param i Pointer to lzma_index which should be encoded. 587 * 588 * The valid `action' values for lzma_code() are LZMA_RUN and LZMA_FINISH. 589 * It is enough to use only one of them (you can choose freely). 590 * 591 * \return - LZMA_OK: Initialization succeeded, continue with lzma_code(). 592 * - LZMA_MEM_ERROR 593 * - LZMA_PROG_ERROR 594 */ 595 extern LZMA_API(lzma_ret) lzma_index_encoder( 596 lzma_stream *strm, const lzma_index *i) 597 lzma_nothrow lzma_attr_warn_unused_result; 598 599 600 /** 601 * \brief Initialize .xz Index decoder 602 * 603 * \param strm Pointer to properly prepared lzma_stream 604 * \param i The decoded Index will be made available via 605 * this pointer. Initially this function will 606 * set *i to NULL (the old value is ignored). If 607 * decoding succeeds (lzma_code() returns 608 * LZMA_STREAM_END), *i will be set to point 609 * to a new lzma_index, which the application 610 * has to later free with lzma_index_end(). 611 * \param memlimit How much memory the resulting lzma_index is 612 * allowed to require. liblzma 5.2.3 and earlier 613 * don't allow 0 here and return LZMA_PROG_ERROR; 614 * later versions treat 0 as if 1 had been specified. 615 * 616 * Valid `action' arguments to lzma_code() are LZMA_RUN and LZMA_FINISH. 617 * There is no need to use LZMA_FINISH, but it's allowed because it may 618 * simplify certain types of applications. 619 * 620 * \return - LZMA_OK: Initialization succeeded, continue with lzma_code(). 621 * - LZMA_MEM_ERROR 622 * - LZMA_PROG_ERROR 623 * 624 * liblzma 5.2.3 and older list also LZMA_MEMLIMIT_ERROR here 625 * but that error code has never been possible from this 626 * initialization function. 627 */ 628 extern LZMA_API(lzma_ret) lzma_index_decoder( 629 lzma_stream *strm, lzma_index **i, uint64_t memlimit) 630 lzma_nothrow lzma_attr_warn_unused_result; 631 632 633 /** 634 * \brief Single-call .xz Index encoder 635 * 636 * \param i lzma_index to be encoded 637 * \param out Beginning of the output buffer 638 * \param out_pos The next byte will be written to out[*out_pos]. 639 * *out_pos is updated only if encoding succeeds. 640 * \param out_size Size of the out buffer; the first byte into 641 * which no data is written to is out[out_size]. 642 * 643 * \return - LZMA_OK: Encoding was successful. 644 * - LZMA_BUF_ERROR: Output buffer is too small. Use 645 * lzma_index_size() to find out how much output 646 * space is needed. 647 * - LZMA_PROG_ERROR 648 * 649 * \note This function doesn't take allocator argument since all 650 * the internal data is allocated on stack. 651 */ 652 extern LZMA_API(lzma_ret) lzma_index_buffer_encode(const lzma_index *i, 653 uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow; 654 655 656 /** 657 * \brief Single-call .xz Index decoder 658 * 659 * \param i If decoding succeeds, *i will point to a new 660 * lzma_index, which the application has to 661 * later free with lzma_index_end(). If an error 662 * occurs, *i will be NULL. The old value of *i 663 * is always ignored and thus doesn't need to be 664 * initialized by the caller. 665 * \param memlimit Pointer to how much memory the resulting 666 * lzma_index is allowed to require. The value 667 * pointed by this pointer is modified if and only 668 * if LZMA_MEMLIMIT_ERROR is returned. 669 * \param allocator Pointer to lzma_allocator, or NULL to use malloc() 670 * \param in Beginning of the input buffer 671 * \param in_pos The next byte will be read from in[*in_pos]. 672 * *in_pos is updated only if decoding succeeds. 673 * \param in_size Size of the input buffer; the first byte that 674 * won't be read is in[in_size]. 675 * 676 * \return - LZMA_OK: Decoding was successful. 677 * - LZMA_MEM_ERROR 678 * - LZMA_MEMLIMIT_ERROR: Memory usage limit was reached. 679 * The minimum required memlimit value was stored to *memlimit. 680 * - LZMA_DATA_ERROR 681 * - LZMA_PROG_ERROR 682 */ 683 extern LZMA_API(lzma_ret) lzma_index_buffer_decode(lzma_index **i, 684 uint64_t *memlimit, const lzma_allocator *allocator, 685 const uint8_t *in, size_t *in_pos, size_t in_size) 686 lzma_nothrow; 687