1*3b35e7eeSXin LI // SPDX-License-Identifier: 0BSD 2*3b35e7eeSXin LI 381ad8388SMartin Matuska /////////////////////////////////////////////////////////////////////////////// 481ad8388SMartin Matuska // 581ad8388SMartin Matuska /// \file alone_decoder.c 681ad8388SMartin Matuska /// \brief Decoder for LZMA_Alone files 781ad8388SMartin Matuska // 881ad8388SMartin Matuska // Author: Lasse Collin 981ad8388SMartin Matuska // 1081ad8388SMartin Matuska /////////////////////////////////////////////////////////////////////////////// 1181ad8388SMartin Matuska 1281ad8388SMartin Matuska #include "alone_decoder.h" 1381ad8388SMartin Matuska #include "lzma_decoder.h" 1481ad8388SMartin Matuska #include "lz_decoder.h" 1581ad8388SMartin Matuska 1681ad8388SMartin Matuska 171456f0f9SXin LI typedef struct { 1881ad8388SMartin Matuska lzma_next_coder next; 1981ad8388SMartin Matuska 2081ad8388SMartin Matuska enum { 2181ad8388SMartin Matuska SEQ_PROPERTIES, 2281ad8388SMartin Matuska SEQ_DICTIONARY_SIZE, 2381ad8388SMartin Matuska SEQ_UNCOMPRESSED_SIZE, 2481ad8388SMartin Matuska SEQ_CODER_INIT, 2581ad8388SMartin Matuska SEQ_CODE, 2681ad8388SMartin Matuska } sequence; 2781ad8388SMartin Matuska 2842b10a37SXin LI /// If true, reject files that are unlikely to be .lzma files. 2942b10a37SXin LI /// If false, more non-.lzma files get accepted and will give 3042b10a37SXin LI /// LZMA_DATA_ERROR either immediately or after a few output bytes. 3142b10a37SXin LI bool picky; 3242b10a37SXin LI 3381ad8388SMartin Matuska /// Position in the header fields 3481ad8388SMartin Matuska size_t pos; 3581ad8388SMartin Matuska 3681ad8388SMartin Matuska /// Uncompressed size decoded from the header 3781ad8388SMartin Matuska lzma_vli uncompressed_size; 3881ad8388SMartin Matuska 3981ad8388SMartin Matuska /// Memory usage limit 4081ad8388SMartin Matuska uint64_t memlimit; 4181ad8388SMartin Matuska 4281ad8388SMartin Matuska /// Amount of memory actually needed (only an estimate) 4381ad8388SMartin Matuska uint64_t memusage; 4481ad8388SMartin Matuska 4581ad8388SMartin Matuska /// Options decoded from the header needed to initialize 4681ad8388SMartin Matuska /// the LZMA decoder 4781ad8388SMartin Matuska lzma_options_lzma options; 481456f0f9SXin LI } lzma_alone_coder; 4981ad8388SMartin Matuska 5081ad8388SMartin Matuska 5181ad8388SMartin Matuska static lzma_ret 52a8675d92SXin LI alone_decode(void *coder_ptr, const lzma_allocator *allocator, 5381ad8388SMartin Matuska const uint8_t *restrict in, size_t *restrict in_pos, 5481ad8388SMartin Matuska size_t in_size, uint8_t *restrict out, 5581ad8388SMartin Matuska size_t *restrict out_pos, size_t out_size, 5681ad8388SMartin Matuska lzma_action action) 5781ad8388SMartin Matuska { 581456f0f9SXin LI lzma_alone_coder *coder = coder_ptr; 591456f0f9SXin LI 6081ad8388SMartin Matuska while (*out_pos < out_size 6181ad8388SMartin Matuska && (coder->sequence == SEQ_CODE || *in_pos < in_size)) 6281ad8388SMartin Matuska switch (coder->sequence) { 6381ad8388SMartin Matuska case SEQ_PROPERTIES: 6481ad8388SMartin Matuska if (lzma_lzma_lclppb_decode(&coder->options, in[*in_pos])) 6581ad8388SMartin Matuska return LZMA_FORMAT_ERROR; 6681ad8388SMartin Matuska 6781ad8388SMartin Matuska coder->sequence = SEQ_DICTIONARY_SIZE; 6881ad8388SMartin Matuska ++*in_pos; 6981ad8388SMartin Matuska break; 7081ad8388SMartin Matuska 7181ad8388SMartin Matuska case SEQ_DICTIONARY_SIZE: 7281ad8388SMartin Matuska coder->options.dict_size 7381ad8388SMartin Matuska |= (size_t)(in[*in_pos]) << (coder->pos * 8); 7481ad8388SMartin Matuska 7581ad8388SMartin Matuska if (++coder->pos == 4) { 7642b10a37SXin LI if (coder->picky && coder->options.dict_size 7742b10a37SXin LI != UINT32_MAX) { 7881ad8388SMartin Matuska // A hack to ditch tons of false positives: 7981ad8388SMartin Matuska // We allow only dictionary sizes that are 8081ad8388SMartin Matuska // 2^n or 2^n + 2^(n-1). LZMA_Alone created 8181ad8388SMartin Matuska // only files with 2^n, but accepts any 8242b10a37SXin LI // dictionary size. 8381ad8388SMartin Matuska uint32_t d = coder->options.dict_size - 1; 8481ad8388SMartin Matuska d |= d >> 2; 8581ad8388SMartin Matuska d |= d >> 3; 8681ad8388SMartin Matuska d |= d >> 4; 8781ad8388SMartin Matuska d |= d >> 8; 8881ad8388SMartin Matuska d |= d >> 16; 8981ad8388SMartin Matuska ++d; 9081ad8388SMartin Matuska 9181ad8388SMartin Matuska if (d != coder->options.dict_size) 9281ad8388SMartin Matuska return LZMA_FORMAT_ERROR; 9381ad8388SMartin Matuska } 9481ad8388SMartin Matuska 9581ad8388SMartin Matuska coder->pos = 0; 9681ad8388SMartin Matuska coder->sequence = SEQ_UNCOMPRESSED_SIZE; 9781ad8388SMartin Matuska } 9881ad8388SMartin Matuska 9981ad8388SMartin Matuska ++*in_pos; 10081ad8388SMartin Matuska break; 10181ad8388SMartin Matuska 10281ad8388SMartin Matuska case SEQ_UNCOMPRESSED_SIZE: 10381ad8388SMartin Matuska coder->uncompressed_size 10481ad8388SMartin Matuska |= (lzma_vli)(in[*in_pos]) << (coder->pos * 8); 10581ad8388SMartin Matuska ++*in_pos; 10681ad8388SMartin Matuska if (++coder->pos < 8) 10781ad8388SMartin Matuska break; 10881ad8388SMartin Matuska 10981ad8388SMartin Matuska // Another hack to ditch false positives: Assume that 11081ad8388SMartin Matuska // if the uncompressed size is known, it must be less 11142b10a37SXin LI // than 256 GiB. 11273ed8e77SXin LI // 11373ed8e77SXin LI // FIXME? Without picky we allow > LZMA_VLI_MAX which doesn't 11473ed8e77SXin LI // really matter in this specific situation (> LZMA_VLI_MAX is 11573ed8e77SXin LI // safe in the LZMA decoder) but it's somewhat weird still. 11642b10a37SXin LI if (coder->picky 11742b10a37SXin LI && coder->uncompressed_size != LZMA_VLI_UNKNOWN 11881ad8388SMartin Matuska && coder->uncompressed_size 11981ad8388SMartin Matuska >= (LZMA_VLI_C(1) << 38)) 12081ad8388SMartin Matuska return LZMA_FORMAT_ERROR; 12181ad8388SMartin Matuska 12273ed8e77SXin LI // Use LZMA_FILTER_LZMA1EXT features to specify the 12373ed8e77SXin LI // uncompressed size and that the end marker is allowed 12473ed8e77SXin LI // even when the uncompressed size is known. Both .lzma 12573ed8e77SXin LI // header and LZMA1EXT use UINT64_MAX indicate that size 12673ed8e77SXin LI // is unknown. 12773ed8e77SXin LI coder->options.ext_flags = LZMA_LZMA1EXT_ALLOW_EOPM; 12873ed8e77SXin LI lzma_set_ext_size(coder->options, coder->uncompressed_size); 12973ed8e77SXin LI 13081ad8388SMartin Matuska // Calculate the memory usage so that it is ready 13181ad8388SMartin Matuska // for SEQ_CODER_INIT. 13281ad8388SMartin Matuska coder->memusage = lzma_lzma_decoder_memusage(&coder->options) 13381ad8388SMartin Matuska + LZMA_MEMUSAGE_BASE; 13481ad8388SMartin Matuska 13581ad8388SMartin Matuska coder->pos = 0; 13681ad8388SMartin Matuska coder->sequence = SEQ_CODER_INIT; 13781ad8388SMartin Matuska 13881ad8388SMartin Matuska // Fall through 13981ad8388SMartin Matuska 14081ad8388SMartin Matuska case SEQ_CODER_INIT: { 14181ad8388SMartin Matuska if (coder->memusage > coder->memlimit) 14281ad8388SMartin Matuska return LZMA_MEMLIMIT_ERROR; 14381ad8388SMartin Matuska 14481ad8388SMartin Matuska lzma_filter_info filters[2] = { 14581ad8388SMartin Matuska { 14673ed8e77SXin LI .id = LZMA_FILTER_LZMA1EXT, 14781ad8388SMartin Matuska .init = &lzma_lzma_decoder_init, 14881ad8388SMartin Matuska .options = &coder->options, 14981ad8388SMartin Matuska }, { 15081ad8388SMartin Matuska .init = NULL, 15181ad8388SMartin Matuska } 15281ad8388SMartin Matuska }; 15381ad8388SMartin Matuska 15473ed8e77SXin LI return_if_error(lzma_next_filter_init(&coder->next, 15573ed8e77SXin LI allocator, filters)); 15681ad8388SMartin Matuska 15781ad8388SMartin Matuska coder->sequence = SEQ_CODE; 15881ad8388SMartin Matuska break; 15981ad8388SMartin Matuska } 16081ad8388SMartin Matuska 16181ad8388SMartin Matuska case SEQ_CODE: { 16281ad8388SMartin Matuska return coder->next.code(coder->next.coder, 16381ad8388SMartin Matuska allocator, in, in_pos, in_size, 16481ad8388SMartin Matuska out, out_pos, out_size, action); 16581ad8388SMartin Matuska } 16681ad8388SMartin Matuska 16781ad8388SMartin Matuska default: 16881ad8388SMartin Matuska return LZMA_PROG_ERROR; 16981ad8388SMartin Matuska } 17081ad8388SMartin Matuska 17181ad8388SMartin Matuska return LZMA_OK; 17281ad8388SMartin Matuska } 17381ad8388SMartin Matuska 17481ad8388SMartin Matuska 17581ad8388SMartin Matuska static void 1761456f0f9SXin LI alone_decoder_end(void *coder_ptr, const lzma_allocator *allocator) 17781ad8388SMartin Matuska { 1781456f0f9SXin LI lzma_alone_coder *coder = coder_ptr; 17981ad8388SMartin Matuska lzma_next_end(&coder->next, allocator); 18081ad8388SMartin Matuska lzma_free(coder, allocator); 18181ad8388SMartin Matuska return; 18281ad8388SMartin Matuska } 18381ad8388SMartin Matuska 18481ad8388SMartin Matuska 18581ad8388SMartin Matuska static lzma_ret 1861456f0f9SXin LI alone_decoder_memconfig(void *coder_ptr, uint64_t *memusage, 18781ad8388SMartin Matuska uint64_t *old_memlimit, uint64_t new_memlimit) 18881ad8388SMartin Matuska { 1891456f0f9SXin LI lzma_alone_coder *coder = coder_ptr; 1901456f0f9SXin LI 19181ad8388SMartin Matuska *memusage = coder->memusage; 19281ad8388SMartin Matuska *old_memlimit = coder->memlimit; 19381ad8388SMartin Matuska 19481ad8388SMartin Matuska if (new_memlimit != 0) { 19581ad8388SMartin Matuska if (new_memlimit < coder->memusage) 19681ad8388SMartin Matuska return LZMA_MEMLIMIT_ERROR; 19781ad8388SMartin Matuska 19881ad8388SMartin Matuska coder->memlimit = new_memlimit; 19981ad8388SMartin Matuska } 20081ad8388SMartin Matuska 20181ad8388SMartin Matuska return LZMA_OK; 20281ad8388SMartin Matuska } 20381ad8388SMartin Matuska 20481ad8388SMartin Matuska 20581ad8388SMartin Matuska extern lzma_ret 20653200025SRui Paulo lzma_alone_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, 20742b10a37SXin LI uint64_t memlimit, bool picky) 20881ad8388SMartin Matuska { 20981ad8388SMartin Matuska lzma_next_coder_init(&lzma_alone_decoder_init, next, allocator); 21081ad8388SMartin Matuska 2111456f0f9SXin LI lzma_alone_coder *coder = next->coder; 2121456f0f9SXin LI 2131456f0f9SXin LI if (coder == NULL) { 2141456f0f9SXin LI coder = lzma_alloc(sizeof(lzma_alone_coder), allocator); 2151456f0f9SXin LI if (coder == NULL) 21681ad8388SMartin Matuska return LZMA_MEM_ERROR; 21781ad8388SMartin Matuska 2181456f0f9SXin LI next->coder = coder; 21981ad8388SMartin Matuska next->code = &alone_decode; 22081ad8388SMartin Matuska next->end = &alone_decoder_end; 22181ad8388SMartin Matuska next->memconfig = &alone_decoder_memconfig; 2221456f0f9SXin LI coder->next = LZMA_NEXT_CODER_INIT; 22381ad8388SMartin Matuska } 22481ad8388SMartin Matuska 2251456f0f9SXin LI coder->sequence = SEQ_PROPERTIES; 2261456f0f9SXin LI coder->picky = picky; 2271456f0f9SXin LI coder->pos = 0; 2281456f0f9SXin LI coder->options.dict_size = 0; 2291456f0f9SXin LI coder->options.preset_dict = NULL; 2301456f0f9SXin LI coder->options.preset_dict_size = 0; 2311456f0f9SXin LI coder->uncompressed_size = 0; 232b71a5db3SXin LI coder->memlimit = my_max(1, memlimit); 2331456f0f9SXin LI coder->memusage = LZMA_MEMUSAGE_BASE; 23481ad8388SMartin Matuska 23581ad8388SMartin Matuska return LZMA_OK; 23681ad8388SMartin Matuska } 23781ad8388SMartin Matuska 23881ad8388SMartin Matuska 23981ad8388SMartin Matuska extern LZMA_API(lzma_ret) 24081ad8388SMartin Matuska lzma_alone_decoder(lzma_stream *strm, uint64_t memlimit) 24181ad8388SMartin Matuska { 24242b10a37SXin LI lzma_next_strm_init(lzma_alone_decoder_init, strm, memlimit, false); 24381ad8388SMartin Matuska 24481ad8388SMartin Matuska strm->internal->supported_actions[LZMA_RUN] = true; 24581ad8388SMartin Matuska strm->internal->supported_actions[LZMA_FINISH] = true; 24681ad8388SMartin Matuska 24781ad8388SMartin Matuska return LZMA_OK; 24881ad8388SMartin Matuska } 249