1*3b35e7eeSXin LI // SPDX-License-Identifier: 0BSD 2*3b35e7eeSXin LI 381ad8388SMartin Matuska /////////////////////////////////////////////////////////////////////////////// 481ad8388SMartin Matuska // 581ad8388SMartin Matuska /// \file block_buffer_encoder.c 681ad8388SMartin Matuska /// \brief Single-call .xz Block encoder 781ad8388SMartin Matuska // 881ad8388SMartin Matuska // Author: Lasse Collin 981ad8388SMartin Matuska // 1081ad8388SMartin Matuska /////////////////////////////////////////////////////////////////////////////// 1181ad8388SMartin Matuska 1253200025SRui Paulo #include "block_buffer_encoder.h" 1381ad8388SMartin Matuska #include "block_encoder.h" 1481ad8388SMartin Matuska #include "filter_encoder.h" 1581ad8388SMartin Matuska #include "lzma2_encoder.h" 1681ad8388SMartin Matuska #include "check.h" 1781ad8388SMartin Matuska 1881ad8388SMartin Matuska 1981ad8388SMartin Matuska /// Estimate the maximum size of the Block Header and Check fields for 2081ad8388SMartin Matuska /// a Block that uses LZMA2 uncompressed chunks. We could use 2181ad8388SMartin Matuska /// lzma_block_header_size() but this is simpler. 2281ad8388SMartin Matuska /// 2381ad8388SMartin Matuska /// Block Header Size + Block Flags + Compressed Size 2481ad8388SMartin Matuska /// + Uncompressed Size + Filter Flags for LZMA2 + CRC32 + Check 2581ad8388SMartin Matuska /// and round up to the next multiple of four to take Header Padding 2681ad8388SMartin Matuska /// into account. 2781ad8388SMartin Matuska #define HEADERS_BOUND ((1 + 1 + 2 * LZMA_VLI_BYTES_MAX + 3 + 4 \ 2881ad8388SMartin Matuska + LZMA_CHECK_SIZE_MAX + 3) & ~3) 2981ad8388SMartin Matuska 3081ad8388SMartin Matuska 3153200025SRui Paulo static uint64_t 3253200025SRui Paulo lzma2_bound(uint64_t uncompressed_size) 3381ad8388SMartin Matuska { 3481ad8388SMartin Matuska // Prevent integer overflow in overhead calculation. 3581ad8388SMartin Matuska if (uncompressed_size > COMPRESSED_SIZE_MAX) 3681ad8388SMartin Matuska return 0; 3781ad8388SMartin Matuska 3881ad8388SMartin Matuska // Calculate the exact overhead of the LZMA2 headers: Round 3981ad8388SMartin Matuska // uncompressed_size up to the next multiple of LZMA2_CHUNK_MAX, 4081ad8388SMartin Matuska // multiply by the size of per-chunk header, and add one byte for 4181ad8388SMartin Matuska // the end marker. 4253200025SRui Paulo const uint64_t overhead = ((uncompressed_size + LZMA2_CHUNK_MAX - 1) 4381ad8388SMartin Matuska / LZMA2_CHUNK_MAX) 4481ad8388SMartin Matuska * LZMA2_HEADER_UNCOMPRESSED + 1; 4581ad8388SMartin Matuska 4681ad8388SMartin Matuska // Catch the possible integer overflow. 4781ad8388SMartin Matuska if (COMPRESSED_SIZE_MAX - overhead < uncompressed_size) 4881ad8388SMartin Matuska return 0; 4981ad8388SMartin Matuska 5081ad8388SMartin Matuska return uncompressed_size + overhead; 5181ad8388SMartin Matuska } 5281ad8388SMartin Matuska 5381ad8388SMartin Matuska 5453200025SRui Paulo extern uint64_t 5553200025SRui Paulo lzma_block_buffer_bound64(uint64_t uncompressed_size) 5681ad8388SMartin Matuska { 5753200025SRui Paulo // If the data doesn't compress, we always use uncompressed 5853200025SRui Paulo // LZMA2 chunks. 5953200025SRui Paulo uint64_t lzma2_size = lzma2_bound(uncompressed_size); 6081ad8388SMartin Matuska if (lzma2_size == 0) 6181ad8388SMartin Matuska return 0; 6281ad8388SMartin Matuska 6381ad8388SMartin Matuska // Take Block Padding into account. 6453200025SRui Paulo lzma2_size = (lzma2_size + 3) & ~UINT64_C(3); 6581ad8388SMartin Matuska 6653200025SRui Paulo // No risk of integer overflow because lzma2_bound() already takes 6781ad8388SMartin Matuska // into account the size of the headers in the Block. 6853200025SRui Paulo return HEADERS_BOUND + lzma2_size; 6953200025SRui Paulo } 7053200025SRui Paulo 7153200025SRui Paulo 7253200025SRui Paulo extern LZMA_API(size_t) 7353200025SRui Paulo lzma_block_buffer_bound(size_t uncompressed_size) 7453200025SRui Paulo { 7553200025SRui Paulo uint64_t ret = lzma_block_buffer_bound64(uncompressed_size); 7653200025SRui Paulo 7753200025SRui Paulo #if SIZE_MAX < UINT64_MAX 7853200025SRui Paulo // Catch the possible integer overflow on 32-bit systems. 7953200025SRui Paulo if (ret > SIZE_MAX) 8081ad8388SMartin Matuska return 0; 8181ad8388SMartin Matuska #endif 8281ad8388SMartin Matuska 8353200025SRui Paulo return ret; 8481ad8388SMartin Matuska } 8581ad8388SMartin Matuska 8681ad8388SMartin Matuska 8781ad8388SMartin Matuska static lzma_ret 8881ad8388SMartin Matuska block_encode_uncompressed(lzma_block *block, const uint8_t *in, size_t in_size, 8981ad8388SMartin Matuska uint8_t *out, size_t *out_pos, size_t out_size) 9081ad8388SMartin Matuska { 9181ad8388SMartin Matuska // Use LZMA2 uncompressed chunks. We wouldn't need a dictionary at 9281ad8388SMartin Matuska // all, but LZMA2 always requires a dictionary, so use the minimum 9381ad8388SMartin Matuska // value to minimize memory usage of the decoder. 9481ad8388SMartin Matuska lzma_options_lzma lzma2 = { 9581ad8388SMartin Matuska .dict_size = LZMA_DICT_SIZE_MIN, 9681ad8388SMartin Matuska }; 9781ad8388SMartin Matuska 9881ad8388SMartin Matuska lzma_filter filters[2]; 9981ad8388SMartin Matuska filters[0].id = LZMA_FILTER_LZMA2; 10081ad8388SMartin Matuska filters[0].options = &lzma2; 10181ad8388SMartin Matuska filters[1].id = LZMA_VLI_UNKNOWN; 10281ad8388SMartin Matuska 10381ad8388SMartin Matuska // Set the above filter options to *block temporarily so that we can 10481ad8388SMartin Matuska // encode the Block Header. 10581ad8388SMartin Matuska lzma_filter *filters_orig = block->filters; 10681ad8388SMartin Matuska block->filters = filters; 10781ad8388SMartin Matuska 10881ad8388SMartin Matuska if (lzma_block_header_size(block) != LZMA_OK) { 10981ad8388SMartin Matuska block->filters = filters_orig; 11081ad8388SMartin Matuska return LZMA_PROG_ERROR; 11181ad8388SMartin Matuska } 11281ad8388SMartin Matuska 11381ad8388SMartin Matuska // Check that there's enough output space. The caller has already 11481ad8388SMartin Matuska // set block->compressed_size to what lzma2_bound() has returned, 11581ad8388SMartin Matuska // so we can reuse that value. We know that compressed_size is a 11681ad8388SMartin Matuska // known valid VLI and header_size is a small value so their sum 11781ad8388SMartin Matuska // will never overflow. 11881ad8388SMartin Matuska assert(block->compressed_size == lzma2_bound(in_size)); 11981ad8388SMartin Matuska if (out_size - *out_pos 12081ad8388SMartin Matuska < block->header_size + block->compressed_size) { 12181ad8388SMartin Matuska block->filters = filters_orig; 12281ad8388SMartin Matuska return LZMA_BUF_ERROR; 12381ad8388SMartin Matuska } 12481ad8388SMartin Matuska 12581ad8388SMartin Matuska if (lzma_block_header_encode(block, out + *out_pos) != LZMA_OK) { 12681ad8388SMartin Matuska block->filters = filters_orig; 12781ad8388SMartin Matuska return LZMA_PROG_ERROR; 12881ad8388SMartin Matuska } 12981ad8388SMartin Matuska 13081ad8388SMartin Matuska block->filters = filters_orig; 13181ad8388SMartin Matuska *out_pos += block->header_size; 13281ad8388SMartin Matuska 13381ad8388SMartin Matuska // Encode the data using LZMA2 uncompressed chunks. 13481ad8388SMartin Matuska size_t in_pos = 0; 13581ad8388SMartin Matuska uint8_t control = 0x01; // Dictionary reset 13681ad8388SMartin Matuska 13781ad8388SMartin Matuska while (in_pos < in_size) { 13881ad8388SMartin Matuska // Control byte: Indicate uncompressed chunk, of which 13981ad8388SMartin Matuska // the first resets the dictionary. 14081ad8388SMartin Matuska out[(*out_pos)++] = control; 14181ad8388SMartin Matuska control = 0x02; // No dictionary reset 14281ad8388SMartin Matuska 14381ad8388SMartin Matuska // Size of the uncompressed chunk 14481ad8388SMartin Matuska const size_t copy_size 145e0f0e66dSMartin Matuska = my_min(in_size - in_pos, LZMA2_CHUNK_MAX); 14681ad8388SMartin Matuska out[(*out_pos)++] = (copy_size - 1) >> 8; 14781ad8388SMartin Matuska out[(*out_pos)++] = (copy_size - 1) & 0xFF; 14881ad8388SMartin Matuska 14981ad8388SMartin Matuska // The actual data 15081ad8388SMartin Matuska assert(*out_pos + copy_size <= out_size); 15181ad8388SMartin Matuska memcpy(out + *out_pos, in + in_pos, copy_size); 15281ad8388SMartin Matuska 15381ad8388SMartin Matuska in_pos += copy_size; 15481ad8388SMartin Matuska *out_pos += copy_size; 15581ad8388SMartin Matuska } 15681ad8388SMartin Matuska 15781ad8388SMartin Matuska // End marker 15881ad8388SMartin Matuska out[(*out_pos)++] = 0x00; 15981ad8388SMartin Matuska assert(*out_pos <= out_size); 16081ad8388SMartin Matuska 16181ad8388SMartin Matuska return LZMA_OK; 16281ad8388SMartin Matuska } 16381ad8388SMartin Matuska 16481ad8388SMartin Matuska 16581ad8388SMartin Matuska static lzma_ret 16653200025SRui Paulo block_encode_normal(lzma_block *block, const lzma_allocator *allocator, 16781ad8388SMartin Matuska const uint8_t *in, size_t in_size, 16881ad8388SMartin Matuska uint8_t *out, size_t *out_pos, size_t out_size) 16981ad8388SMartin Matuska { 17081ad8388SMartin Matuska // Find out the size of the Block Header. 17181ad8388SMartin Matuska return_if_error(lzma_block_header_size(block)); 17281ad8388SMartin Matuska 17381ad8388SMartin Matuska // Reserve space for the Block Header and skip it for now. 17481ad8388SMartin Matuska if (out_size - *out_pos <= block->header_size) 17581ad8388SMartin Matuska return LZMA_BUF_ERROR; 17681ad8388SMartin Matuska 17781ad8388SMartin Matuska const size_t out_start = *out_pos; 17881ad8388SMartin Matuska *out_pos += block->header_size; 17981ad8388SMartin Matuska 18081ad8388SMartin Matuska // Limit out_size so that we stop encoding if the output would grow 18181ad8388SMartin Matuska // bigger than what uncompressed Block would be. 18281ad8388SMartin Matuska if (out_size - *out_pos > block->compressed_size) 18381ad8388SMartin Matuska out_size = *out_pos + block->compressed_size; 18481ad8388SMartin Matuska 18581ad8388SMartin Matuska // TODO: In many common cases this could be optimized to use 18681ad8388SMartin Matuska // significantly less memory. 18781ad8388SMartin Matuska lzma_next_coder raw_encoder = LZMA_NEXT_CODER_INIT; 18881ad8388SMartin Matuska lzma_ret ret = lzma_raw_encoder_init( 18981ad8388SMartin Matuska &raw_encoder, allocator, block->filters); 19081ad8388SMartin Matuska 19181ad8388SMartin Matuska if (ret == LZMA_OK) { 19281ad8388SMartin Matuska size_t in_pos = 0; 19381ad8388SMartin Matuska ret = raw_encoder.code(raw_encoder.coder, allocator, 19481ad8388SMartin Matuska in, &in_pos, in_size, out, out_pos, out_size, 19581ad8388SMartin Matuska LZMA_FINISH); 19681ad8388SMartin Matuska } 19781ad8388SMartin Matuska 19881ad8388SMartin Matuska // NOTE: This needs to be run even if lzma_raw_encoder_init() failed. 19981ad8388SMartin Matuska lzma_next_end(&raw_encoder, allocator); 20081ad8388SMartin Matuska 20181ad8388SMartin Matuska if (ret == LZMA_STREAM_END) { 20281ad8388SMartin Matuska // Compression was successful. Write the Block Header. 20381ad8388SMartin Matuska block->compressed_size 20481ad8388SMartin Matuska = *out_pos - (out_start + block->header_size); 20581ad8388SMartin Matuska ret = lzma_block_header_encode(block, out + out_start); 20681ad8388SMartin Matuska if (ret != LZMA_OK) 20781ad8388SMartin Matuska ret = LZMA_PROG_ERROR; 20881ad8388SMartin Matuska 20981ad8388SMartin Matuska } else if (ret == LZMA_OK) { 21081ad8388SMartin Matuska // Output buffer became full. 21181ad8388SMartin Matuska ret = LZMA_BUF_ERROR; 21281ad8388SMartin Matuska } 21381ad8388SMartin Matuska 21481ad8388SMartin Matuska // Reset *out_pos if something went wrong. 21581ad8388SMartin Matuska if (ret != LZMA_OK) 21681ad8388SMartin Matuska *out_pos = out_start; 21781ad8388SMartin Matuska 21881ad8388SMartin Matuska return ret; 21981ad8388SMartin Matuska } 22081ad8388SMartin Matuska 22181ad8388SMartin Matuska 22253200025SRui Paulo static lzma_ret 22353200025SRui Paulo block_buffer_encode(lzma_block *block, const lzma_allocator *allocator, 22481ad8388SMartin Matuska const uint8_t *in, size_t in_size, 22553200025SRui Paulo uint8_t *out, size_t *out_pos, size_t out_size, 22653200025SRui Paulo bool try_to_compress) 22781ad8388SMartin Matuska { 228e24134bcSMartin Matuska // Validate the arguments. 229e24134bcSMartin Matuska if (block == NULL || (in == NULL && in_size != 0) || out == NULL 23081ad8388SMartin Matuska || out_pos == NULL || *out_pos > out_size) 23181ad8388SMartin Matuska return LZMA_PROG_ERROR; 23281ad8388SMartin Matuska 233e24134bcSMartin Matuska // The contents of the structure may depend on the version so 234e24134bcSMartin Matuska // check the version before validating the contents of *block. 23553200025SRui Paulo if (block->version > 1) 23681ad8388SMartin Matuska return LZMA_OPTIONS_ERROR; 23781ad8388SMartin Matuska 238e24134bcSMartin Matuska if ((unsigned int)(block->check) > LZMA_CHECK_ID_MAX 23953200025SRui Paulo || (try_to_compress && block->filters == NULL)) 240e24134bcSMartin Matuska return LZMA_PROG_ERROR; 241e24134bcSMartin Matuska 242e24134bcSMartin Matuska if (!lzma_check_is_supported(block->check)) 243e24134bcSMartin Matuska return LZMA_UNSUPPORTED_CHECK; 244e24134bcSMartin Matuska 24581ad8388SMartin Matuska // Size of a Block has to be a multiple of four, so limit the size 24681ad8388SMartin Matuska // here already. This way we don't need to check it again when adding 24781ad8388SMartin Matuska // Block Padding. 24881ad8388SMartin Matuska out_size -= (out_size - *out_pos) & 3; 24981ad8388SMartin Matuska 25081ad8388SMartin Matuska // Get the size of the Check field. 25181ad8388SMartin Matuska const size_t check_size = lzma_check_size(block->check); 252e24134bcSMartin Matuska assert(check_size != UINT32_MAX); 25381ad8388SMartin Matuska 25481ad8388SMartin Matuska // Reserve space for the Check field. 25581ad8388SMartin Matuska if (out_size - *out_pos <= check_size) 25681ad8388SMartin Matuska return LZMA_BUF_ERROR; 25781ad8388SMartin Matuska 25881ad8388SMartin Matuska out_size -= check_size; 25981ad8388SMartin Matuska 26053200025SRui Paulo // Initialize block->uncompressed_size and calculate the worst-case 26153200025SRui Paulo // value for block->compressed_size. 26253200025SRui Paulo block->uncompressed_size = in_size; 26353200025SRui Paulo block->compressed_size = lzma2_bound(in_size); 26453200025SRui Paulo if (block->compressed_size == 0) 26553200025SRui Paulo return LZMA_DATA_ERROR; 26653200025SRui Paulo 26781ad8388SMartin Matuska // Do the actual compression. 26853200025SRui Paulo lzma_ret ret = LZMA_BUF_ERROR; 26953200025SRui Paulo if (try_to_compress) 27053200025SRui Paulo ret = block_encode_normal(block, allocator, 27181ad8388SMartin Matuska in, in_size, out, out_pos, out_size); 27253200025SRui Paulo 27381ad8388SMartin Matuska if (ret != LZMA_OK) { 27481ad8388SMartin Matuska // If the error was something else than output buffer 27581ad8388SMartin Matuska // becoming full, return the error now. 27681ad8388SMartin Matuska if (ret != LZMA_BUF_ERROR) 27781ad8388SMartin Matuska return ret; 27881ad8388SMartin Matuska 2791f3ced26SXin LI // The data was incompressible (at least with the options 28081ad8388SMartin Matuska // given to us) or the output buffer was too small. Use the 28181ad8388SMartin Matuska // uncompressed chunks of LZMA2 to wrap the data into a valid 28281ad8388SMartin Matuska // Block. If we haven't been given enough output space, even 28381ad8388SMartin Matuska // this may fail. 28481ad8388SMartin Matuska return_if_error(block_encode_uncompressed(block, in, in_size, 28581ad8388SMartin Matuska out, out_pos, out_size)); 28681ad8388SMartin Matuska } 28781ad8388SMartin Matuska 28881ad8388SMartin Matuska assert(*out_pos <= out_size); 28981ad8388SMartin Matuska 29081ad8388SMartin Matuska // Block Padding. No buffer overflow here, because we already adjusted 29181ad8388SMartin Matuska // out_size so that (out_size - out_start) is a multiple of four. 29281ad8388SMartin Matuska // Thus, if the buffer is full, the loop body can never run. 29381ad8388SMartin Matuska for (size_t i = (size_t)(block->compressed_size); i & 3; ++i) { 29481ad8388SMartin Matuska assert(*out_pos < out_size); 29581ad8388SMartin Matuska out[(*out_pos)++] = 0x00; 29681ad8388SMartin Matuska } 29781ad8388SMartin Matuska 29881ad8388SMartin Matuska // If there's no Check field, we are done now. 29981ad8388SMartin Matuska if (check_size > 0) { 30081ad8388SMartin Matuska // Calculate the integrity check. We reserved space for 30181ad8388SMartin Matuska // the Check field earlier so we don't need to check for 30281ad8388SMartin Matuska // available output space here. 30381ad8388SMartin Matuska lzma_check_state check; 30481ad8388SMartin Matuska lzma_check_init(&check, block->check); 30581ad8388SMartin Matuska lzma_check_update(&check, block->check, in, in_size); 30681ad8388SMartin Matuska lzma_check_finish(&check, block->check); 30781ad8388SMartin Matuska 30881ad8388SMartin Matuska memcpy(block->raw_check, check.buffer.u8, check_size); 30981ad8388SMartin Matuska memcpy(out + *out_pos, check.buffer.u8, check_size); 31081ad8388SMartin Matuska *out_pos += check_size; 31181ad8388SMartin Matuska } 31281ad8388SMartin Matuska 31381ad8388SMartin Matuska return LZMA_OK; 31481ad8388SMartin Matuska } 31553200025SRui Paulo 31653200025SRui Paulo 31753200025SRui Paulo extern LZMA_API(lzma_ret) 31853200025SRui Paulo lzma_block_buffer_encode(lzma_block *block, const lzma_allocator *allocator, 31953200025SRui Paulo const uint8_t *in, size_t in_size, 32053200025SRui Paulo uint8_t *out, size_t *out_pos, size_t out_size) 32153200025SRui Paulo { 32253200025SRui Paulo return block_buffer_encode(block, allocator, 32353200025SRui Paulo in, in_size, out, out_pos, out_size, true); 32453200025SRui Paulo } 32553200025SRui Paulo 32653200025SRui Paulo 3279e6bbe47SXin LI #ifdef HAVE_SYMBOL_VERSIONS_LINUX 3289e6bbe47SXin LI // This is for compatibility with binaries linked against liblzma that 3299e6bbe47SXin LI // has been patched with xz-5.2.2-compat-libs.patch from RHEL/CentOS 7. 3309e6bbe47SXin LI LZMA_SYMVER_API("lzma_block_uncomp_encode@XZ_5.2.2", 3319e6bbe47SXin LI lzma_ret, lzma_block_uncomp_encode_522)(lzma_block *block, 3329e6bbe47SXin LI const uint8_t *in, size_t in_size, 3339e6bbe47SXin LI uint8_t *out, size_t *out_pos, size_t out_size) 3349e6bbe47SXin LI lzma_nothrow lzma_attr_warn_unused_result 3359e6bbe47SXin LI __attribute__((__alias__("lzma_block_uncomp_encode_52"))); 3369e6bbe47SXin LI 3379e6bbe47SXin LI LZMA_SYMVER_API("lzma_block_uncomp_encode@@XZ_5.2", 3389e6bbe47SXin LI lzma_ret, lzma_block_uncomp_encode_52)(lzma_block *block, 3399e6bbe47SXin LI const uint8_t *in, size_t in_size, 3409e6bbe47SXin LI uint8_t *out, size_t *out_pos, size_t out_size) 3419e6bbe47SXin LI lzma_nothrow lzma_attr_warn_unused_result; 3429e6bbe47SXin LI 3439e6bbe47SXin LI #define lzma_block_uncomp_encode lzma_block_uncomp_encode_52 3449e6bbe47SXin LI #endif 34553200025SRui Paulo extern LZMA_API(lzma_ret) 34653200025SRui Paulo lzma_block_uncomp_encode(lzma_block *block, 34753200025SRui Paulo const uint8_t *in, size_t in_size, 34853200025SRui Paulo uint8_t *out, size_t *out_pos, size_t out_size) 34953200025SRui Paulo { 35053200025SRui Paulo // It won't allocate any memory from heap so no need 35153200025SRui Paulo // for lzma_allocator. 35253200025SRui Paulo return block_buffer_encode(block, NULL, 35353200025SRui Paulo in, in_size, out, out_pos, out_size, false); 35453200025SRui Paulo } 355