1 // SPDX-License-Identifier: 0BSD 2 3 /////////////////////////////////////////////////////////////////////////////// 4 // 5 /// \file lzma2_decoder.c 6 /// \brief LZMA2 decoder 7 /// 8 // Authors: Igor Pavlov 9 // Lasse Collin 10 // 11 /////////////////////////////////////////////////////////////////////////////// 12 13 #include "lzma2_decoder.h" 14 #include "lz_decoder.h" 15 #include "lzma_decoder.h" 16 17 18 typedef struct { 19 enum sequence { 20 SEQ_CONTROL, 21 SEQ_UNCOMPRESSED_1, 22 SEQ_UNCOMPRESSED_2, 23 SEQ_COMPRESSED_0, 24 SEQ_COMPRESSED_1, 25 SEQ_PROPERTIES, 26 SEQ_LZMA, 27 SEQ_COPY, 28 } sequence; 29 30 /// Sequence after the size fields have been decoded. 31 enum sequence next_sequence; 32 33 /// LZMA decoder 34 lzma_lz_decoder lzma; 35 36 /// Uncompressed size of LZMA chunk 37 size_t uncompressed_size; 38 39 /// Compressed size of the chunk (naturally equals to uncompressed 40 /// size of uncompressed chunk) 41 size_t compressed_size; 42 43 /// True if properties are needed. This is false before the 44 /// first LZMA chunk. 45 bool need_properties; 46 47 /// True if dictionary reset is needed. This is false before the 48 /// first chunk (LZMA or uncompressed). 49 bool need_dictionary_reset; 50 51 lzma_options_lzma options; 52 } lzma_lzma2_coder; 53 54 55 static lzma_ret 56 lzma2_decode(void *coder_ptr, lzma_dict *restrict dict, 57 const uint8_t *restrict in, size_t *restrict in_pos, 58 size_t in_size) 59 { 60 lzma_lzma2_coder *restrict coder = coder_ptr; 61 62 // With SEQ_LZMA it is possible that no new input is needed to do 63 // some progress. The rest of the sequences assume that there is 64 // at least one byte of input. 65 while (*in_pos < in_size || coder->sequence == SEQ_LZMA) 66 switch (coder->sequence) { 67 case SEQ_CONTROL: { 68 const uint32_t control = in[*in_pos]; 69 ++*in_pos; 70 71 // End marker 72 if (control == 0x00) 73 return LZMA_STREAM_END; 74 75 if (control >= 0xE0 || control == 1) { 76 // Dictionary reset implies that next LZMA chunk has 77 // to set new properties. 78 coder->need_properties = true; 79 coder->need_dictionary_reset = true; 80 } else if (coder->need_dictionary_reset) { 81 return LZMA_DATA_ERROR; 82 } 83 84 if (control >= 0x80) { 85 // LZMA chunk. The highest five bits of the 86 // uncompressed size are taken from the control byte. 87 coder->uncompressed_size = (control & 0x1F) << 16; 88 coder->sequence = SEQ_UNCOMPRESSED_1; 89 90 // See if there are new properties or if we need to 91 // reset the state. 92 if (control >= 0xC0) { 93 // When there are new properties, state reset 94 // is done at SEQ_PROPERTIES. 95 coder->need_properties = false; 96 coder->next_sequence = SEQ_PROPERTIES; 97 98 } else if (coder->need_properties) { 99 return LZMA_DATA_ERROR; 100 101 } else { 102 coder->next_sequence = SEQ_LZMA; 103 104 // If only state reset is wanted with old 105 // properties, do the resetting here for 106 // simplicity. 107 if (control >= 0xA0) 108 coder->lzma.reset(coder->lzma.coder, 109 &coder->options); 110 } 111 } else { 112 // Invalid control values 113 if (control > 2) 114 return LZMA_DATA_ERROR; 115 116 // It's uncompressed chunk 117 coder->sequence = SEQ_COMPRESSED_0; 118 coder->next_sequence = SEQ_COPY; 119 } 120 121 if (coder->need_dictionary_reset) { 122 // Finish the dictionary reset and let the caller 123 // flush the dictionary to the actual output buffer. 124 coder->need_dictionary_reset = false; 125 dict_reset(dict); 126 return LZMA_OK; 127 } 128 129 break; 130 } 131 132 case SEQ_UNCOMPRESSED_1: 133 coder->uncompressed_size += (uint32_t)(in[(*in_pos)++]) << 8; 134 coder->sequence = SEQ_UNCOMPRESSED_2; 135 break; 136 137 case SEQ_UNCOMPRESSED_2: 138 coder->uncompressed_size += in[(*in_pos)++] + 1U; 139 coder->sequence = SEQ_COMPRESSED_0; 140 coder->lzma.set_uncompressed(coder->lzma.coder, 141 coder->uncompressed_size, false); 142 break; 143 144 case SEQ_COMPRESSED_0: 145 coder->compressed_size = (uint32_t)(in[(*in_pos)++]) << 8; 146 coder->sequence = SEQ_COMPRESSED_1; 147 break; 148 149 case SEQ_COMPRESSED_1: 150 coder->compressed_size += in[(*in_pos)++] + 1U; 151 coder->sequence = coder->next_sequence; 152 break; 153 154 case SEQ_PROPERTIES: 155 if (lzma_lzma_lclppb_decode(&coder->options, in[(*in_pos)++])) 156 return LZMA_DATA_ERROR; 157 158 coder->lzma.reset(coder->lzma.coder, &coder->options); 159 160 coder->sequence = SEQ_LZMA; 161 break; 162 163 case SEQ_LZMA: { 164 // Store the start offset so that we can update 165 // coder->compressed_size later. 166 const size_t in_start = *in_pos; 167 168 // Decode from in[] to *dict. 169 const lzma_ret ret = coder->lzma.code(coder->lzma.coder, 170 dict, in, in_pos, in_size); 171 172 // Validate and update coder->compressed_size. 173 const size_t in_used = *in_pos - in_start; 174 if (in_used > coder->compressed_size) 175 return LZMA_DATA_ERROR; 176 177 coder->compressed_size -= in_used; 178 179 // Return if we didn't finish the chunk, or an error occurred. 180 if (ret != LZMA_STREAM_END) 181 return ret; 182 183 // The LZMA decoder must have consumed the whole chunk now. 184 // We don't need to worry about uncompressed size since it 185 // is checked by the LZMA decoder. 186 if (coder->compressed_size != 0) 187 return LZMA_DATA_ERROR; 188 189 coder->sequence = SEQ_CONTROL; 190 break; 191 } 192 193 case SEQ_COPY: { 194 // Copy from input to the dictionary as is. 195 dict_write(dict, in, in_pos, in_size, &coder->compressed_size); 196 if (coder->compressed_size != 0) 197 return LZMA_OK; 198 199 coder->sequence = SEQ_CONTROL; 200 break; 201 } 202 203 default: 204 assert(0); 205 return LZMA_PROG_ERROR; 206 } 207 208 return LZMA_OK; 209 } 210 211 212 static void 213 lzma2_decoder_end(void *coder_ptr, const lzma_allocator *allocator) 214 { 215 lzma_lzma2_coder *coder = coder_ptr; 216 217 assert(coder->lzma.end == NULL); 218 lzma_free(coder->lzma.coder, allocator); 219 220 lzma_free(coder, allocator); 221 222 return; 223 } 224 225 226 static lzma_ret 227 lzma2_decoder_init(lzma_lz_decoder *lz, const lzma_allocator *allocator, 228 lzma_vli id lzma_attribute((__unused__)), const void *opt, 229 lzma_lz_options *lz_options) 230 { 231 lzma_lzma2_coder *coder = lz->coder; 232 if (coder == NULL) { 233 coder = lzma_alloc(sizeof(lzma_lzma2_coder), allocator); 234 if (coder == NULL) 235 return LZMA_MEM_ERROR; 236 237 lz->coder = coder; 238 lz->code = &lzma2_decode; 239 lz->end = &lzma2_decoder_end; 240 241 coder->lzma = LZMA_LZ_DECODER_INIT; 242 } 243 244 const lzma_options_lzma *options = opt; 245 246 coder->sequence = SEQ_CONTROL; 247 coder->need_properties = true; 248 coder->need_dictionary_reset = options->preset_dict == NULL 249 || options->preset_dict_size == 0; 250 251 return lzma_lzma_decoder_create(&coder->lzma, 252 allocator, options, lz_options); 253 } 254 255 256 extern lzma_ret 257 lzma_lzma2_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, 258 const lzma_filter_info *filters) 259 { 260 // LZMA2 can only be the last filter in the chain. This is enforced 261 // by the raw_decoder initialization. 262 assert(filters[1].init == NULL); 263 264 return lzma_lz_decoder_init(next, allocator, filters, 265 &lzma2_decoder_init); 266 } 267 268 269 extern uint64_t 270 lzma_lzma2_decoder_memusage(const void *options) 271 { 272 return sizeof(lzma_lzma2_coder) 273 + lzma_lzma_decoder_memusage_nocheck(options); 274 } 275 276 277 extern lzma_ret 278 lzma_lzma2_props_decode(void **options, const lzma_allocator *allocator, 279 const uint8_t *props, size_t props_size) 280 { 281 if (props_size != 1) 282 return LZMA_OPTIONS_ERROR; 283 284 // Check that reserved bits are unset. 285 if (props[0] & 0xC0) 286 return LZMA_OPTIONS_ERROR; 287 288 // Decode the dictionary size. 289 if (props[0] > 40) 290 return LZMA_OPTIONS_ERROR; 291 292 lzma_options_lzma *opt = lzma_alloc( 293 sizeof(lzma_options_lzma), allocator); 294 if (opt == NULL) 295 return LZMA_MEM_ERROR; 296 297 if (props[0] == 40) { 298 opt->dict_size = UINT32_MAX; 299 } else { 300 opt->dict_size = 2 | (props[0] & 1U); 301 opt->dict_size <<= props[0] / 2U + 11; 302 } 303 304 opt->preset_dict = NULL; 305 opt->preset_dict_size = 0; 306 307 *options = opt; 308 309 return LZMA_OK; 310 } 311