1 /////////////////////////////////////////////////////////////////////////////// 2 // 3 /// \file lzma2_decoder.c 4 /// \brief LZMA2 decoder 5 /// 6 // Authors: Igor Pavlov 7 // Lasse Collin 8 // 9 // This file has been put into the public domain. 10 // You can do whatever you want with this file. 11 // 12 /////////////////////////////////////////////////////////////////////////////// 13 14 #include "lzma2_decoder.h" 15 #include "lz_decoder.h" 16 #include "lzma_decoder.h" 17 18 19 typedef struct { 20 enum sequence { 21 SEQ_CONTROL, 22 SEQ_UNCOMPRESSED_1, 23 SEQ_UNCOMPRESSED_2, 24 SEQ_COMPRESSED_0, 25 SEQ_COMPRESSED_1, 26 SEQ_PROPERTIES, 27 SEQ_LZMA, 28 SEQ_COPY, 29 } sequence; 30 31 /// Sequence after the size fields have been decoded. 32 enum sequence next_sequence; 33 34 /// LZMA decoder 35 lzma_lz_decoder lzma; 36 37 /// Uncompressed size of LZMA chunk 38 size_t uncompressed_size; 39 40 /// Compressed size of the chunk (naturally equals to uncompressed 41 /// size of uncompressed chunk) 42 size_t compressed_size; 43 44 /// True if properties are needed. This is false before the 45 /// first LZMA chunk. 46 bool need_properties; 47 48 /// True if dictionary reset is needed. This is false before the 49 /// first chunk (LZMA or uncompressed). 50 bool need_dictionary_reset; 51 52 lzma_options_lzma options; 53 } lzma_lzma2_coder; 54 55 56 static lzma_ret 57 lzma2_decode(void *coder_ptr, lzma_dict *restrict dict, 58 const uint8_t *restrict in, size_t *restrict in_pos, 59 size_t in_size) 60 { 61 lzma_lzma2_coder *restrict coder = coder_ptr; 62 63 // With SEQ_LZMA it is possible that no new input is needed to do 64 // some progress. The rest of the sequences assume that there is 65 // at least one byte of input. 66 while (*in_pos < in_size || coder->sequence == SEQ_LZMA) 67 switch (coder->sequence) { 68 case SEQ_CONTROL: { 69 const uint32_t control = in[*in_pos]; 70 ++*in_pos; 71 72 // End marker 73 if (control == 0x00) 74 return LZMA_STREAM_END; 75 76 if (control >= 0xE0 || control == 1) { 77 // Dictionary reset implies that next LZMA chunk has 78 // to set new properties. 79 coder->need_properties = true; 80 coder->need_dictionary_reset = true; 81 } else if (coder->need_dictionary_reset) { 82 return LZMA_DATA_ERROR; 83 } 84 85 if (control >= 0x80) { 86 // LZMA chunk. The highest five bits of the 87 // uncompressed size are taken from the control byte. 88 coder->uncompressed_size = (control & 0x1F) << 16; 89 coder->sequence = SEQ_UNCOMPRESSED_1; 90 91 // See if there are new properties or if we need to 92 // reset the state. 93 if (control >= 0xC0) { 94 // When there are new properties, state reset 95 // is done at SEQ_PROPERTIES. 96 coder->need_properties = false; 97 coder->next_sequence = SEQ_PROPERTIES; 98 99 } else if (coder->need_properties) { 100 return LZMA_DATA_ERROR; 101 102 } else { 103 coder->next_sequence = SEQ_LZMA; 104 105 // If only state reset is wanted with old 106 // properties, do the resetting here for 107 // simplicity. 108 if (control >= 0xA0) 109 coder->lzma.reset(coder->lzma.coder, 110 &coder->options); 111 } 112 } else { 113 // Invalid control values 114 if (control > 2) 115 return LZMA_DATA_ERROR; 116 117 // It's uncompressed chunk 118 coder->sequence = SEQ_COMPRESSED_0; 119 coder->next_sequence = SEQ_COPY; 120 } 121 122 if (coder->need_dictionary_reset) { 123 // Finish the dictionary reset and let the caller 124 // flush the dictionary to the actual output buffer. 125 coder->need_dictionary_reset = false; 126 dict_reset(dict); 127 return LZMA_OK; 128 } 129 130 break; 131 } 132 133 case SEQ_UNCOMPRESSED_1: 134 coder->uncompressed_size += (uint32_t)(in[(*in_pos)++]) << 8; 135 coder->sequence = SEQ_UNCOMPRESSED_2; 136 break; 137 138 case SEQ_UNCOMPRESSED_2: 139 coder->uncompressed_size += in[(*in_pos)++] + 1; 140 coder->sequence = SEQ_COMPRESSED_0; 141 coder->lzma.set_uncompressed(coder->lzma.coder, 142 coder->uncompressed_size); 143 break; 144 145 case SEQ_COMPRESSED_0: 146 coder->compressed_size = (uint32_t)(in[(*in_pos)++]) << 8; 147 coder->sequence = SEQ_COMPRESSED_1; 148 break; 149 150 case SEQ_COMPRESSED_1: 151 coder->compressed_size += in[(*in_pos)++] + 1; 152 coder->sequence = coder->next_sequence; 153 break; 154 155 case SEQ_PROPERTIES: 156 if (lzma_lzma_lclppb_decode(&coder->options, in[(*in_pos)++])) 157 return LZMA_DATA_ERROR; 158 159 coder->lzma.reset(coder->lzma.coder, &coder->options); 160 161 coder->sequence = SEQ_LZMA; 162 break; 163 164 case SEQ_LZMA: { 165 // Store the start offset so that we can update 166 // coder->compressed_size later. 167 const size_t in_start = *in_pos; 168 169 // Decode from in[] to *dict. 170 const lzma_ret ret = coder->lzma.code(coder->lzma.coder, 171 dict, in, in_pos, in_size); 172 173 // Validate and update coder->compressed_size. 174 const size_t in_used = *in_pos - in_start; 175 if (in_used > coder->compressed_size) 176 return LZMA_DATA_ERROR; 177 178 coder->compressed_size -= in_used; 179 180 // Return if we didn't finish the chunk, or an error occurred. 181 if (ret != LZMA_STREAM_END) 182 return ret; 183 184 // The LZMA decoder must have consumed the whole chunk now. 185 // We don't need to worry about uncompressed size since it 186 // is checked by the LZMA decoder. 187 if (coder->compressed_size != 0) 188 return LZMA_DATA_ERROR; 189 190 coder->sequence = SEQ_CONTROL; 191 break; 192 } 193 194 case SEQ_COPY: { 195 // Copy from input to the dictionary as is. 196 dict_write(dict, in, in_pos, in_size, &coder->compressed_size); 197 if (coder->compressed_size != 0) 198 return LZMA_OK; 199 200 coder->sequence = SEQ_CONTROL; 201 break; 202 } 203 204 default: 205 assert(0); 206 return LZMA_PROG_ERROR; 207 } 208 209 return LZMA_OK; 210 } 211 212 213 static void 214 lzma2_decoder_end(void *coder_ptr, const lzma_allocator *allocator) 215 { 216 lzma_lzma2_coder *coder = coder_ptr; 217 218 assert(coder->lzma.end == NULL); 219 lzma_free(coder->lzma.coder, allocator); 220 221 lzma_free(coder, allocator); 222 223 return; 224 } 225 226 227 static lzma_ret 228 lzma2_decoder_init(lzma_lz_decoder *lz, const lzma_allocator *allocator, 229 const void *opt, lzma_lz_options *lz_options) 230 { 231 lzma_lzma2_coder *coder = lz->coder; 232 if (coder == NULL) { 233 coder = lzma_alloc(sizeof(lzma_lzma2_coder), allocator); 234 if (coder == NULL) 235 return LZMA_MEM_ERROR; 236 237 lz->coder = coder; 238 lz->code = &lzma2_decode; 239 lz->end = &lzma2_decoder_end; 240 241 coder->lzma = LZMA_LZ_DECODER_INIT; 242 } 243 244 const lzma_options_lzma *options = opt; 245 246 coder->sequence = SEQ_CONTROL; 247 coder->need_properties = true; 248 coder->need_dictionary_reset = options->preset_dict == NULL 249 || options->preset_dict_size == 0; 250 251 return lzma_lzma_decoder_create(&coder->lzma, 252 allocator, options, lz_options); 253 } 254 255 256 extern lzma_ret 257 lzma_lzma2_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, 258 const lzma_filter_info *filters) 259 { 260 // LZMA2 can only be the last filter in the chain. This is enforced 261 // by the raw_decoder initialization. 262 assert(filters[1].init == NULL); 263 264 return lzma_lz_decoder_init(next, allocator, filters, 265 &lzma2_decoder_init); 266 } 267 268 269 extern uint64_t 270 lzma_lzma2_decoder_memusage(const void *options) 271 { 272 return sizeof(lzma_lzma2_coder) 273 + lzma_lzma_decoder_memusage_nocheck(options); 274 } 275 276 277 extern lzma_ret 278 lzma_lzma2_props_decode(void **options, const lzma_allocator *allocator, 279 const uint8_t *props, size_t props_size) 280 { 281 if (props_size != 1) 282 return LZMA_OPTIONS_ERROR; 283 284 // Check that reserved bits are unset. 285 if (props[0] & 0xC0) 286 return LZMA_OPTIONS_ERROR; 287 288 // Decode the dictionary size. 289 if (props[0] > 40) 290 return LZMA_OPTIONS_ERROR; 291 292 lzma_options_lzma *opt = lzma_alloc( 293 sizeof(lzma_options_lzma), allocator); 294 if (opt == NULL) 295 return LZMA_MEM_ERROR; 296 297 if (props[0] == 40) { 298 opt->dict_size = UINT32_MAX; 299 } else { 300 opt->dict_size = 2 | (props[0] & 1); 301 opt->dict_size <<= props[0] / 2 + 11; 302 } 303 304 opt->preset_dict = NULL; 305 opt->preset_dict_size = 0; 306 307 *options = opt; 308 309 return LZMA_OK; 310 } 311