1 /** 2 * \file lzma/index.h 3 * \brief Handling of .xz Index and related information 4 */ 5 6 /* 7 * Author: Lasse Collin 8 * 9 * This file has been put into the public domain. 10 * You can do whatever you want with this file. 11 * 12 * See ../lzma.h for information about liblzma as a whole. 13 */ 14 15 #ifndef LZMA_H_INTERNAL 16 # error Never include this file directly. Use <lzma.h> instead. 17 #endif 18 19 20 /** 21 * \brief Opaque data type to hold the Index(es) and other information 22 * 23 * lzma_index often holds just one .xz Index and possibly the Stream Flags 24 * of the same Stream and size of the Stream Padding field. However, 25 * multiple lzma_indexes can be concatenated with lzma_index_cat() and then 26 * there may be information about multiple Streams in the same lzma_index. 27 * 28 * Notes about thread safety: Only one thread may modify lzma_index at 29 * a time. All functions that take non-const pointer to lzma_index 30 * modify it. As long as no thread is modifying the lzma_index, getting 31 * information from the same lzma_index can be done from multiple threads 32 * at the same time with functions that take a const pointer to 33 * lzma_index or use lzma_index_iter. The same iterator must be used 34 * only by one thread at a time, of course, but there can be as many 35 * iterators for the same lzma_index as needed. 36 */ 37 typedef struct lzma_index_s lzma_index; 38 39 40 /** 41 * \brief Iterator to get information about Blocks and Streams 42 */ 43 typedef struct { 44 struct { 45 /** 46 * \brief Pointer to Stream Flags 47 * 48 * This is NULL if Stream Flags have not been set for 49 * this Stream with lzma_index_stream_flags(). 50 */ 51 const lzma_stream_flags *flags; 52 53 const void *reserved_ptr1; 54 const void *reserved_ptr2; 55 const void *reserved_ptr3; 56 57 /** 58 * \brief Stream number in the lzma_index 59 * 60 * The first Stream is 1. 61 */ 62 lzma_vli number; 63 64 /** 65 * \brief Number of Blocks in the Stream 66 * 67 * If this is zero, the block structure below has 68 * undefined values. 69 */ 70 lzma_vli block_count; 71 72 /** 73 * \brief Compressed start offset of this Stream 74 * 75 * The offset is relative to the beginning of the lzma_index 76 * (i.e. usually the beginning of the .xz file). 77 */ 78 lzma_vli compressed_offset; 79 80 /** 81 * \brief Uncompressed start offset of this Stream 82 * 83 * The offset is relative to the beginning of the lzma_index 84 * (i.e. usually the beginning of the .xz file). 85 */ 86 lzma_vli uncompressed_offset; 87 88 /** 89 * \brief Compressed size of this Stream 90 * 91 * This includes all headers except the possible 92 * Stream Padding after this Stream. 93 */ 94 lzma_vli compressed_size; 95 96 /** 97 * \brief Uncompressed size of this Stream 98 */ 99 lzma_vli uncompressed_size; 100 101 /** 102 * \brief Size of Stream Padding after this Stream 103 * 104 * If it hasn't been set with lzma_index_stream_padding(), 105 * this defaults to zero. Stream Padding is always 106 * a multiple of four bytes. 107 */ 108 lzma_vli padding; 109 110 lzma_vli reserved_vli1; 111 lzma_vli reserved_vli2; 112 lzma_vli reserved_vli3; 113 lzma_vli reserved_vli4; 114 } stream; 115 116 struct { 117 /** 118 * \brief Block number in the file 119 * 120 * The first Block is 1. 121 */ 122 lzma_vli number_in_file; 123 124 /** 125 * \brief Compressed start offset of this Block 126 * 127 * This offset is relative to the beginning of the 128 * lzma_index (i.e. usually the beginning of the .xz file). 129 * Normally this is where you should seek in the .xz file 130 * to start decompressing this Block. 131 */ 132 lzma_vli compressed_file_offset; 133 134 /** 135 * \brief Uncompressed start offset of this Block 136 * 137 * This offset is relative to the beginning of the lzma_index 138 * (i.e. usually the beginning of the .xz file). 139 */ 140 lzma_vli uncompressed_file_offset; 141 142 /** 143 * \brief Block number in this Stream 144 * 145 * The first Block is 1. 146 */ 147 lzma_vli number_in_stream; 148 149 /** 150 * \brief Compressed start offset of this Block 151 * 152 * This offset is relative to the beginning of the Stream 153 * containing this Block. 154 */ 155 lzma_vli compressed_stream_offset; 156 157 /** 158 * \brief Uncompressed start offset of this Block 159 * 160 * This offset is relative to the beginning of the Stream 161 * containing this Block. 162 */ 163 lzma_vli uncompressed_stream_offset; 164 165 /** 166 * \brief Uncompressed size of this Block 167 * 168 * You should pass this to the Block decoder if you will 169 * decode this Block. 170 * 171 * When doing random-access reading, it is possible that 172 * the target offset is not exactly at Block boundary. One 173 * will need to compare the target offset against 174 * uncompressed_file_offset or uncompressed_stream_offset, 175 * and possibly decode and throw away some amount of data 176 * before reaching the target offset. 177 */ 178 lzma_vli uncompressed_size; 179 180 /** 181 * \brief Unpadded size of this Block 182 * 183 * You should pass this to the Block decoder if you will 184 * decode this Block. 185 */ 186 lzma_vli unpadded_size; 187 188 /** 189 * \brief Total compressed size 190 * 191 * This includes all headers and padding in this Block. 192 * This is useful if you need to know how many bytes 193 * the Block decoder will actually read. 194 */ 195 lzma_vli total_size; 196 197 lzma_vli reserved_vli1; 198 lzma_vli reserved_vli2; 199 lzma_vli reserved_vli3; 200 lzma_vli reserved_vli4; 201 202 const void *reserved_ptr1; 203 const void *reserved_ptr2; 204 const void *reserved_ptr3; 205 const void *reserved_ptr4; 206 } block; 207 208 /* 209 * Internal data which is used to store the state of the iterator. 210 * The exact format may vary between liblzma versions, so don't 211 * touch these in any way. 212 */ 213 union { 214 const void *p; 215 size_t s; 216 lzma_vli v; 217 } internal[6]; 218 } lzma_index_iter; 219 220 221 /** 222 * \brief Operation mode for lzma_index_iter_next() 223 */ 224 typedef enum { 225 LZMA_INDEX_ITER_ANY = 0, 226 /**< 227 * \brief Get the next Block or Stream 228 * 229 * Go to the next Block if the current Stream has at least 230 * one Block left. Otherwise go to the next Stream even if 231 * it has no Blocks. If the Stream has no Blocks 232 * (lzma_index_iter.stream.block_count == 0), 233 * lzma_index_iter.block will have undefined values. 234 */ 235 236 LZMA_INDEX_ITER_STREAM = 1, 237 /**< 238 * \brief Get the next Stream 239 * 240 * Go to the next Stream even if the current Stream has 241 * unread Blocks left. If the next Stream has at least one 242 * Block, the iterator will point to the first Block. 243 * If there are no Blocks, lzma_index_iter.block will have 244 * undefined values. 245 */ 246 247 LZMA_INDEX_ITER_BLOCK = 2, 248 /**< 249 * \brief Get the next Block 250 * 251 * Go to the next Block if the current Stream has at least 252 * one Block left. If the current Stream has no Blocks left, 253 * the next Stream with at least one Block is located and 254 * the iterator will be made to point to the first Block of 255 * that Stream. 256 */ 257 258 LZMA_INDEX_ITER_NONEMPTY_BLOCK = 3 259 /**< 260 * \brief Get the next non-empty Block 261 * 262 * This is like LZMA_INDEX_ITER_BLOCK except that it will 263 * skip Blocks whose Uncompressed Size is zero. 264 */ 265 266 } lzma_index_iter_mode; 267 268 269 /** 270 * \brief Calculate memory usage of lzma_index 271 * 272 * On disk, the size of the Index field depends on both the number of Records 273 * stored and how big values the Records store (due to variable-length integer 274 * encoding). When the Index is kept in lzma_index structure, the memory usage 275 * depends only on the number of Records/Blocks stored in the Index(es), and 276 * in case of concatenated lzma_indexes, the number of Streams. The size in 277 * RAM is almost always significantly bigger than in the encoded form on disk. 278 * 279 * This function calculates an approximate amount of memory needed hold 280 * the given number of Streams and Blocks in lzma_index structure. This 281 * value may vary between CPU architectures and also between liblzma versions 282 * if the internal implementation is modified. 283 */ 284 extern LZMA_API(uint64_t) lzma_index_memusage( 285 lzma_vli streams, lzma_vli blocks) lzma_nothrow; 286 287 288 /** 289 * \brief Calculate the memory usage of an existing lzma_index 290 * 291 * This is a shorthand for lzma_index_memusage(lzma_index_stream_count(i), 292 * lzma_index_block_count(i)). 293 */ 294 extern LZMA_API(uint64_t) lzma_index_memused(const lzma_index *i) 295 lzma_nothrow; 296 297 298 /** 299 * \brief Allocate and initialize a new lzma_index structure 300 * 301 * \return On success, a pointer to an empty initialized lzma_index is 302 * returned. If allocation fails, NULL is returned. 303 */ 304 extern LZMA_API(lzma_index *) lzma_index_init(lzma_allocator *allocator) 305 lzma_nothrow; 306 307 308 /** 309 * \brief Deallocate lzma_index 310 * 311 * If i is NULL, this does nothing. 312 */ 313 extern LZMA_API(void) lzma_index_end(lzma_index *i, lzma_allocator *allocator) 314 lzma_nothrow; 315 316 317 /** 318 * \brief Add a new Block to lzma_index 319 * 320 * \param i Pointer to a lzma_index structure 321 * \param allocator Pointer to lzma_allocator, or NULL to 322 * use malloc() 323 * \param unpadded_size Unpadded Size of a Block. This can be 324 * calculated with lzma_block_unpadded_size() 325 * after encoding or decoding the Block. 326 * \param uncompressed_size Uncompressed Size of a Block. This can be 327 * taken directly from lzma_block structure 328 * after encoding or decoding the Block. 329 * 330 * Appending a new Block does not invalidate iterators. For example, 331 * if an iterator was pointing to the end of the lzma_index, after 332 * lzma_index_append() it is possible to read the next Block with 333 * an existing iterator. 334 * 335 * \return - LZMA_OK 336 * - LZMA_MEM_ERROR 337 * - LZMA_DATA_ERROR: Compressed or uncompressed size of the 338 * Stream or size of the Index field would grow too big. 339 * - LZMA_PROG_ERROR 340 */ 341 extern LZMA_API(lzma_ret) lzma_index_append( 342 lzma_index *i, lzma_allocator *allocator, 343 lzma_vli unpadded_size, lzma_vli uncompressed_size) 344 lzma_nothrow lzma_attr_warn_unused_result; 345 346 347 /** 348 * \brief Set the Stream Flags 349 * 350 * Set the Stream Flags of the last (and typically the only) Stream 351 * in lzma_index. This can be useful when reading information from the 352 * lzma_index, because to decode Blocks, knowing the integrity check type 353 * is needed. 354 * 355 * The given Stream Flags are copied into internal preallocated structure 356 * in the lzma_index, thus the caller doesn't need to keep the *stream_flags 357 * available after calling this function. 358 * 359 * \return - LZMA_OK 360 * - LZMA_OPTIONS_ERROR: Unsupported stream_flags->version. 361 * - LZMA_PROG_ERROR 362 */ 363 extern LZMA_API(lzma_ret) lzma_index_stream_flags( 364 lzma_index *i, const lzma_stream_flags *stream_flags) 365 lzma_nothrow lzma_attr_warn_unused_result; 366 367 368 /** 369 * \brief Get the types of integrity Checks 370 * 371 * If lzma_index_stream_padding() is used to set the Stream Flags for 372 * every Stream, lzma_index_checks() can be used to get a bitmask to 373 * indicate which Check types have been used. It can be useful e.g. if 374 * showing the Check types to the user. 375 * 376 * The bitmask is 1 << check_id, e.g. CRC32 is 1 << 1 and SHA-256 is 1 << 10. 377 */ 378 extern LZMA_API(uint32_t) lzma_index_checks(const lzma_index *i) 379 lzma_nothrow lzma_attr_pure; 380 381 382 /** 383 * \brief Set the amount of Stream Padding 384 * 385 * Set the amount of Stream Padding of the last (and typically the only) 386 * Stream in the lzma_index. This is needed when planning to do random-access 387 * reading within multiple concatenated Streams. 388 * 389 * By default, the amount of Stream Padding is assumed to be zero bytes. 390 * 391 * \return - LZMA_OK 392 * - LZMA_DATA_ERROR: The file size would grow too big. 393 * - LZMA_PROG_ERROR 394 */ 395 extern LZMA_API(lzma_ret) lzma_index_stream_padding( 396 lzma_index *i, lzma_vli stream_padding) 397 lzma_nothrow lzma_attr_warn_unused_result; 398 399 400 /** 401 * \brief Get the number of Streams 402 */ 403 extern LZMA_API(lzma_vli) lzma_index_stream_count(const lzma_index *i) 404 lzma_nothrow lzma_attr_pure; 405 406 407 /** 408 * \brief Get the number of Blocks 409 * 410 * This returns the total number of Blocks in lzma_index. To get number 411 * of Blocks in individual Streams, use lzma_index_iter. 412 */ 413 extern LZMA_API(lzma_vli) lzma_index_block_count(const lzma_index *i) 414 lzma_nothrow lzma_attr_pure; 415 416 417 /** 418 * \brief Get the size of the Index field as bytes 419 * 420 * This is needed to verify the Backward Size field in the Stream Footer. 421 */ 422 extern LZMA_API(lzma_vli) lzma_index_size(const lzma_index *i) 423 lzma_nothrow lzma_attr_pure; 424 425 426 /** 427 * \brief Get the total size of the Stream 428 * 429 * If multiple lzma_indexes have been combined, this works as if the Blocks 430 * were in a single Stream. This is useful if you are going to combine 431 * Blocks from multiple Streams into a single new Stream. 432 */ 433 extern LZMA_API(lzma_vli) lzma_index_stream_size(const lzma_index *i) 434 lzma_nothrow lzma_attr_pure; 435 436 437 /** 438 * \brief Get the total size of the Blocks 439 * 440 * This doesn't include the Stream Header, Stream Footer, Stream Padding, 441 * or Index fields. 442 */ 443 extern LZMA_API(lzma_vli) lzma_index_total_size(const lzma_index *i) 444 lzma_nothrow lzma_attr_pure; 445 446 447 /** 448 * \brief Get the total size of the file 449 * 450 * When no lzma_indexes have been combined with lzma_index_cat() and there is 451 * no Stream Padding, this function is identical to lzma_index_stream_size(). 452 * If multiple lzma_indexes have been combined, this includes also the headers 453 * of each separate Stream and the possible Stream Padding fields. 454 */ 455 extern LZMA_API(lzma_vli) lzma_index_file_size(const lzma_index *i) 456 lzma_nothrow lzma_attr_pure; 457 458 459 /** 460 * \brief Get the uncompressed size of the file 461 */ 462 extern LZMA_API(lzma_vli) lzma_index_uncompressed_size(const lzma_index *i) 463 lzma_nothrow lzma_attr_pure; 464 465 466 /** 467 * \brief Initialize an iterator 468 * 469 * \param iter Pointer to a lzma_index_iter structure 470 * \param i lzma_index to which the iterator will be associated 471 * 472 * This function associates the iterator with the given lzma_index, and calls 473 * lzma_index_iter_rewind() on the iterator. 474 * 475 * This function doesn't allocate any memory, thus there is no 476 * lzma_index_iter_end(). The iterator is valid as long as the 477 * associated lzma_index is valid, that is, until lzma_index_end() or 478 * using it as source in lzma_index_cat(). Specifically, lzma_index doesn't 479 * become invalid if new Blocks are added to it with lzma_index_append() or 480 * if it is used as the destination in lzma_index_cat(). 481 * 482 * It is safe to make copies of an initialized lzma_index_iter, for example, 483 * to easily restart reading at some particular position. 484 */ 485 extern LZMA_API(void) lzma_index_iter_init( 486 lzma_index_iter *iter, const lzma_index *i) lzma_nothrow; 487 488 489 /** 490 * \brief Rewind the iterator 491 * 492 * Rewind the iterator so that next call to lzma_index_iter_next() will 493 * return the first Block or Stream. 494 */ 495 extern LZMA_API(void) lzma_index_iter_rewind(lzma_index_iter *iter) 496 lzma_nothrow; 497 498 499 /** 500 * \brief Get the next Block or Stream 501 * 502 * \param iter Iterator initialized with lzma_index_iter_init() 503 * \param mode Specify what kind of information the caller wants 504 * to get. See lzma_index_iter_mode for details. 505 * 506 * \return If next Block or Stream matching the mode was found, *iter 507 * is updated and this function returns false. If no Block or 508 * Stream matching the mode is found, *iter is not modified 509 * and this function returns true. If mode is set to an unknown 510 * value, *iter is not modified and this function returns true. 511 */ 512 extern LZMA_API(lzma_bool) lzma_index_iter_next( 513 lzma_index_iter *iter, lzma_index_iter_mode mode) 514 lzma_nothrow lzma_attr_warn_unused_result; 515 516 517 /** 518 * \brief Locate a Block 519 * 520 * If it is possible to seek in the .xz file, it is possible to parse 521 * the Index field(s) and use lzma_index_iter_locate() to do random-access 522 * reading with granularity of Block size. 523 * 524 * \param iter Iterator that was earlier initialized with 525 * lzma_index_iter_init(). 526 * \param target Uncompressed target offset which the caller would 527 * like to locate from the Stream 528 * 529 * If the target is smaller than the uncompressed size of the Stream (can be 530 * checked with lzma_index_uncompressed_size()): 531 * - Information about the Stream and Block containing the requested 532 * uncompressed offset is stored into *iter. 533 * - Internal state of the iterator is adjusted so that 534 * lzma_index_iter_next() can be used to read subsequent Blocks or Streams. 535 * - This function returns false. 536 * 537 * If target is greater than the uncompressed size of the Stream, *iter 538 * is not modified, and this function returns true. 539 */ 540 extern LZMA_API(lzma_bool) lzma_index_iter_locate( 541 lzma_index_iter *iter, lzma_vli target) lzma_nothrow; 542 543 544 /** 545 * \brief Concatenate lzma_indexes 546 * 547 * Concatenating lzma_indexes is useful when doing random-access reading in 548 * multi-Stream .xz file, or when combining multiple Streams into single 549 * Stream. 550 * 551 * \param dest lzma_index after which src is appended 552 * \param src lzma_index to be appended after dest. If this 553 * function succeeds, the memory allocated for src 554 * is freed or moved to be part of dest, and all 555 * iterators pointing to src will become invalid. 556 * \param allocator Custom memory allocator; can be NULL to use 557 * malloc() and free(). 558 * 559 * \return - LZMA_OK: lzma_indexes were concatenated successfully. 560 * src is now a dangling pointer. 561 * - LZMA_DATA_ERROR: *dest would grow too big. 562 * - LZMA_MEM_ERROR 563 * - LZMA_PROG_ERROR 564 */ 565 extern LZMA_API(lzma_ret) lzma_index_cat(lzma_index *lzma_restrict dest, 566 lzma_index *lzma_restrict src, 567 lzma_allocator *allocator) 568 lzma_nothrow lzma_attr_warn_unused_result; 569 570 571 /** 572 * \brief Duplicate lzma_index 573 * 574 * \return A copy of the lzma_index, or NULL if memory allocation failed. 575 */ 576 extern LZMA_API(lzma_index *) lzma_index_dup( 577 const lzma_index *i, lzma_allocator *allocator) 578 lzma_nothrow lzma_attr_warn_unused_result; 579 580 581 /** 582 * \brief Initialize .xz Index encoder 583 * 584 * \param strm Pointer to properly prepared lzma_stream 585 * \param i Pointer to lzma_index which should be encoded. 586 * 587 * The only valid action value for lzma_code() is LZMA_RUN. 588 * 589 * \return - LZMA_OK: Initialization succeeded, continue with lzma_code(). 590 * - LZMA_MEM_ERROR 591 * - LZMA_PROG_ERROR 592 */ 593 extern LZMA_API(lzma_ret) lzma_index_encoder( 594 lzma_stream *strm, const lzma_index *i) 595 lzma_nothrow lzma_attr_warn_unused_result; 596 597 598 /** 599 * \brief Initialize .xz Index decoder 600 * 601 * \param strm Pointer to properly prepared lzma_stream 602 * \param i The decoded Index will be made available via 603 * this pointer. Initially this function will 604 * set *i to NULL (the old value is ignored). If 605 * decoding succeeds (lzma_code() returns 606 * LZMA_STREAM_END), *i will be set to point 607 * to a new lzma_index, which the application 608 * has to later free with lzma_index_end(). 609 * \param memlimit How much memory the resulting lzma_index is 610 * allowed to require. 611 * 612 * The only valid action value for lzma_code() is LZMA_RUN. 613 * 614 * \return - LZMA_OK: Initialization succeeded, continue with lzma_code(). 615 * - LZMA_MEM_ERROR 616 * - LZMA_MEMLIMIT_ERROR 617 * - LZMA_PROG_ERROR 618 */ 619 extern LZMA_API(lzma_ret) lzma_index_decoder( 620 lzma_stream *strm, lzma_index **i, uint64_t memlimit) 621 lzma_nothrow lzma_attr_warn_unused_result; 622 623 624 /** 625 * \brief Single-call .xz Index encoder 626 * 627 * \param i lzma_index to be encoded 628 * \param out Beginning of the output buffer 629 * \param out_pos The next byte will be written to out[*out_pos]. 630 * *out_pos is updated only if encoding succeeds. 631 * \param out_size Size of the out buffer; the first byte into 632 * which no data is written to is out[out_size]. 633 * 634 * \return - LZMA_OK: Encoding was successful. 635 * - LZMA_BUF_ERROR: Output buffer is too small. Use 636 * lzma_index_size() to find out how much output 637 * space is needed. 638 * - LZMA_PROG_ERROR 639 * 640 * \note This function doesn't take allocator argument since all 641 * the internal data is allocated on stack. 642 */ 643 extern LZMA_API(lzma_ret) lzma_index_buffer_encode(const lzma_index *i, 644 uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow; 645 646 647 /** 648 * \brief Single-call .xz Index decoder 649 * 650 * \param i If decoding succeeds, *i will point to a new 651 * lzma_index, which the application has to 652 * later free with lzma_index_end(). If an error 653 * occurs, *i will be NULL. The old value of *i 654 * is always ignored and thus doesn't need to be 655 * initialized by the caller. 656 * \param memlimit Pointer to how much memory the resulting 657 * lzma_index is allowed to require. The value 658 * pointed by this pointer is modified if and only 659 * if LZMA_MEMLIMIT_ERROR is returned. 660 * \param allocator Pointer to lzma_allocator, or NULL to use malloc() 661 * \param in Beginning of the input buffer 662 * \param in_pos The next byte will be read from in[*in_pos]. 663 * *in_pos is updated only if decoding succeeds. 664 * \param in_size Size of the input buffer; the first byte that 665 * won't be read is in[in_size]. 666 * 667 * \return - LZMA_OK: Decoding was successful. 668 * - LZMA_MEM_ERROR 669 * - LZMA_MEMLIMIT_ERROR: Memory usage limit was reached. 670 * The minimum required memlimit value was stored to *memlimit. 671 * - LZMA_DATA_ERROR 672 * - LZMA_PROG_ERROR 673 */ 674 extern LZMA_API(lzma_ret) lzma_index_buffer_decode(lzma_index **i, 675 uint64_t *memlimit, lzma_allocator *allocator, 676 const uint8_t *in, size_t *in_pos, size_t in_size) 677 lzma_nothrow; 678