1 /////////////////////////////////////////////////////////////////////////////// 2 // 3 /// \file lzma2_decoder.c 4 /// \brief LZMA2 decoder 5 /// 6 // Authors: Igor Pavlov 7 // Lasse Collin 8 // 9 // This file has been put into the public domain. 10 // You can do whatever you want with this file. 11 // 12 /////////////////////////////////////////////////////////////////////////////// 13 14 #include "lzma2_decoder.h" 15 #include "lz_decoder.h" 16 #include "lzma_decoder.h" 17 18 19 struct lzma_coder_s { 20 enum sequence { 21 SEQ_CONTROL, 22 SEQ_UNCOMPRESSED_1, 23 SEQ_UNCOMPRESSED_2, 24 SEQ_COMPRESSED_0, 25 SEQ_COMPRESSED_1, 26 SEQ_PROPERTIES, 27 SEQ_LZMA, 28 SEQ_COPY, 29 } sequence; 30 31 /// Sequence after the size fields have been decoded. 32 enum sequence next_sequence; 33 34 /// LZMA decoder 35 lzma_lz_decoder lzma; 36 37 /// Uncompressed size of LZMA chunk 38 size_t uncompressed_size; 39 40 /// Compressed size of the chunk (naturally equals to uncompressed 41 /// size of uncompressed chunk) 42 size_t compressed_size; 43 44 /// True if properties are needed. This is false before the 45 /// first LZMA chunk. 46 bool need_properties; 47 48 /// True if dictionary reset is needed. This is false before the 49 /// first chunk (LZMA or uncompressed). 50 bool need_dictionary_reset; 51 52 lzma_options_lzma options; 53 }; 54 55 56 static lzma_ret 57 lzma2_decode(lzma_coder *restrict coder, lzma_dict *restrict dict, 58 const uint8_t *restrict in, size_t *restrict in_pos, 59 size_t in_size) 60 { 61 // With SEQ_LZMA it is possible that no new input is needed to do 62 // some progress. The rest of the sequences assume that there is 63 // at least one byte of input. 64 while (*in_pos < in_size || coder->sequence == SEQ_LZMA) 65 switch (coder->sequence) { 66 case SEQ_CONTROL: { 67 const uint32_t control = in[*in_pos]; 68 ++*in_pos; 69 70 if (control >= 0xE0 || control == 1) { 71 // Dictionary reset implies that next LZMA chunk has 72 // to set new properties. 73 coder->need_properties = true; 74 coder->need_dictionary_reset = true; 75 } else if (coder->need_dictionary_reset) { 76 return LZMA_DATA_ERROR; 77 } 78 79 if (control >= 0x80) { 80 // LZMA chunk. The highest five bits of the 81 // uncompressed size are taken from the control byte. 82 coder->uncompressed_size = (control & 0x1F) << 16; 83 coder->sequence = SEQ_UNCOMPRESSED_1; 84 85 // See if there are new properties or if we need to 86 // reset the state. 87 if (control >= 0xC0) { 88 // When there are new properties, state reset 89 // is done at SEQ_PROPERTIES. 90 coder->need_properties = false; 91 coder->next_sequence = SEQ_PROPERTIES; 92 93 } else if (coder->need_properties) { 94 return LZMA_DATA_ERROR; 95 96 } else { 97 coder->next_sequence = SEQ_LZMA; 98 99 // If only state reset is wanted with old 100 // properties, do the resetting here for 101 // simplicity. 102 if (control >= 0xA0) 103 coder->lzma.reset(coder->lzma.coder, 104 &coder->options); 105 } 106 } else { 107 // End marker 108 if (control == 0x00) 109 return LZMA_STREAM_END; 110 111 // Invalid control values 112 if (control > 2) 113 return LZMA_DATA_ERROR; 114 115 // It's uncompressed chunk 116 coder->sequence = SEQ_COMPRESSED_0; 117 coder->next_sequence = SEQ_COPY; 118 } 119 120 if (coder->need_dictionary_reset) { 121 // Finish the dictionary reset and let the caller 122 // flush the dictionary to the actual output buffer. 123 coder->need_dictionary_reset = false; 124 dict_reset(dict); 125 return LZMA_OK; 126 } 127 128 break; 129 } 130 131 case SEQ_UNCOMPRESSED_1: 132 coder->uncompressed_size += (uint32_t)(in[(*in_pos)++]) << 8; 133 coder->sequence = SEQ_UNCOMPRESSED_2; 134 break; 135 136 case SEQ_UNCOMPRESSED_2: 137 coder->uncompressed_size += in[(*in_pos)++] + 1; 138 coder->sequence = SEQ_COMPRESSED_0; 139 coder->lzma.set_uncompressed(coder->lzma.coder, 140 coder->uncompressed_size); 141 break; 142 143 case SEQ_COMPRESSED_0: 144 coder->compressed_size = (uint32_t)(in[(*in_pos)++]) << 8; 145 coder->sequence = SEQ_COMPRESSED_1; 146 break; 147 148 case SEQ_COMPRESSED_1: 149 coder->compressed_size += in[(*in_pos)++] + 1; 150 coder->sequence = coder->next_sequence; 151 break; 152 153 case SEQ_PROPERTIES: 154 if (lzma_lzma_lclppb_decode(&coder->options, in[(*in_pos)++])) 155 return LZMA_DATA_ERROR; 156 157 coder->lzma.reset(coder->lzma.coder, &coder->options); 158 159 coder->sequence = SEQ_LZMA; 160 break; 161 162 case SEQ_LZMA: { 163 // Store the start offset so that we can update 164 // coder->compressed_size later. 165 const size_t in_start = *in_pos; 166 167 // Decode from in[] to *dict. 168 const lzma_ret ret = coder->lzma.code(coder->lzma.coder, 169 dict, in, in_pos, in_size); 170 171 // Validate and update coder->compressed_size. 172 const size_t in_used = *in_pos - in_start; 173 if (in_used > coder->compressed_size) 174 return LZMA_DATA_ERROR; 175 176 coder->compressed_size -= in_used; 177 178 // Return if we didn't finish the chunk, or an error occurred. 179 if (ret != LZMA_STREAM_END) 180 return ret; 181 182 // The LZMA decoder must have consumed the whole chunk now. 183 // We don't need to worry about uncompressed size since it 184 // is checked by the LZMA decoder. 185 if (coder->compressed_size != 0) 186 return LZMA_DATA_ERROR; 187 188 coder->sequence = SEQ_CONTROL; 189 break; 190 } 191 192 case SEQ_COPY: { 193 // Copy from input to the dictionary as is. 194 dict_write(dict, in, in_pos, in_size, &coder->compressed_size); 195 if (coder->compressed_size != 0) 196 return LZMA_OK; 197 198 coder->sequence = SEQ_CONTROL; 199 break; 200 } 201 202 default: 203 assert(0); 204 return LZMA_PROG_ERROR; 205 } 206 207 return LZMA_OK; 208 } 209 210 211 static void 212 lzma2_decoder_end(lzma_coder *coder, lzma_allocator *allocator) 213 { 214 assert(coder->lzma.end == NULL); 215 lzma_free(coder->lzma.coder, allocator); 216 217 lzma_free(coder, allocator); 218 219 return; 220 } 221 222 223 static lzma_ret 224 lzma2_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator, 225 const void *opt, lzma_lz_options *lz_options) 226 { 227 if (lz->coder == NULL) { 228 lz->coder = lzma_alloc(sizeof(lzma_coder), allocator); 229 if (lz->coder == NULL) 230 return LZMA_MEM_ERROR; 231 232 lz->code = &lzma2_decode; 233 lz->end = &lzma2_decoder_end; 234 235 lz->coder->lzma = LZMA_LZ_DECODER_INIT; 236 } 237 238 const lzma_options_lzma *options = opt; 239 240 lz->coder->sequence = SEQ_CONTROL; 241 lz->coder->need_properties = true; 242 lz->coder->need_dictionary_reset = options->preset_dict == NULL 243 || options->preset_dict_size == 0; 244 245 return lzma_lzma_decoder_create(&lz->coder->lzma, 246 allocator, options, lz_options); 247 } 248 249 250 extern lzma_ret 251 lzma_lzma2_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, 252 const lzma_filter_info *filters) 253 { 254 // LZMA2 can only be the last filter in the chain. This is enforced 255 // by the raw_decoder initialization. 256 assert(filters[1].init == NULL); 257 258 return lzma_lz_decoder_init(next, allocator, filters, 259 &lzma2_decoder_init); 260 } 261 262 263 extern uint64_t 264 lzma_lzma2_decoder_memusage(const void *options) 265 { 266 return sizeof(lzma_coder) 267 + lzma_lzma_decoder_memusage_nocheck(options); 268 } 269 270 271 extern lzma_ret 272 lzma_lzma2_props_decode(void **options, lzma_allocator *allocator, 273 const uint8_t *props, size_t props_size) 274 { 275 if (props_size != 1) 276 return LZMA_OPTIONS_ERROR; 277 278 // Check that reserved bits are unset. 279 if (props[0] & 0xC0) 280 return LZMA_OPTIONS_ERROR; 281 282 // Decode the dictionary size. 283 if (props[0] > 40) 284 return LZMA_OPTIONS_ERROR; 285 286 lzma_options_lzma *opt = lzma_alloc( 287 sizeof(lzma_options_lzma), allocator); 288 if (opt == NULL) 289 return LZMA_MEM_ERROR; 290 291 if (props[0] == 40) { 292 opt->dict_size = UINT32_MAX; 293 } else { 294 opt->dict_size = 2 | (props[0] & 1); 295 opt->dict_size <<= props[0] / 2 + 11; 296 } 297 298 opt->preset_dict = NULL; 299 opt->preset_dict_size = 0; 300 301 *options = opt; 302 303 return LZMA_OK; 304 } 305