1 /////////////////////////////////////////////////////////////////////////////// 2 // 3 /// \file index_decoder.c 4 /// \brief Decodes the Index field 5 // 6 // Author: Lasse Collin 7 // 8 // This file has been put into the public domain. 9 // You can do whatever you want with this file. 10 // 11 /////////////////////////////////////////////////////////////////////////////// 12 13 #include "index.h" 14 #include "check.h" 15 16 17 struct lzma_coder_s { 18 enum { 19 SEQ_INDICATOR, 20 SEQ_COUNT, 21 SEQ_MEMUSAGE, 22 SEQ_UNPADDED, 23 SEQ_UNCOMPRESSED, 24 SEQ_PADDING_INIT, 25 SEQ_PADDING, 26 SEQ_CRC32, 27 } sequence; 28 29 /// Memory usage limit 30 uint64_t memlimit; 31 32 /// Target Index 33 lzma_index *index; 34 35 /// Pointer give by the application, which is set after 36 /// successful decoding. 37 lzma_index **index_ptr; 38 39 /// Number of Records left to decode. 40 lzma_vli count; 41 42 /// The most recent Unpadded Size field 43 lzma_vli unpadded_size; 44 45 /// The most recent Uncompressed Size field 46 lzma_vli uncompressed_size; 47 48 /// Position in integers 49 size_t pos; 50 51 /// CRC32 of the List of Records field 52 uint32_t crc32; 53 }; 54 55 56 static lzma_ret 57 index_decode(lzma_coder *coder, lzma_allocator *allocator, 58 const uint8_t *restrict in, size_t *restrict in_pos, 59 size_t in_size, 60 uint8_t *restrict out lzma_attribute((__unused__)), 61 size_t *restrict out_pos lzma_attribute((__unused__)), 62 size_t out_size lzma_attribute((__unused__)), 63 lzma_action action lzma_attribute((__unused__))) 64 { 65 // Similar optimization as in index_encoder.c 66 const size_t in_start = *in_pos; 67 lzma_ret ret = LZMA_OK; 68 69 while (*in_pos < in_size) 70 switch (coder->sequence) { 71 case SEQ_INDICATOR: 72 // Return LZMA_DATA_ERROR instead of e.g. LZMA_PROG_ERROR or 73 // LZMA_FORMAT_ERROR, because a typical usage case for Index 74 // decoder is when parsing the Stream backwards. If seeking 75 // backward from the Stream Footer gives us something that 76 // doesn't begin with Index Indicator, the file is considered 77 // corrupt, not "programming error" or "unrecognized file 78 // format". One could argue that the application should 79 // verify the Index Indicator before trying to decode the 80 // Index, but well, I suppose it is simpler this way. 81 if (in[(*in_pos)++] != 0x00) 82 return LZMA_DATA_ERROR; 83 84 coder->sequence = SEQ_COUNT; 85 break; 86 87 case SEQ_COUNT: 88 ret = lzma_vli_decode(&coder->count, &coder->pos, 89 in, in_pos, in_size); 90 if (ret != LZMA_STREAM_END) 91 goto out; 92 93 coder->pos = 0; 94 coder->sequence = SEQ_MEMUSAGE; 95 96 // Fall through 97 98 case SEQ_MEMUSAGE: 99 if (lzma_index_memusage(1, coder->count) > coder->memlimit) { 100 ret = LZMA_MEMLIMIT_ERROR; 101 goto out; 102 } 103 104 // Tell the Index handling code how many Records this 105 // Index has to allow it to allocate memory more efficiently. 106 lzma_index_prealloc(coder->index, coder->count); 107 108 ret = LZMA_OK; 109 coder->sequence = coder->count == 0 110 ? SEQ_PADDING_INIT : SEQ_UNPADDED; 111 break; 112 113 case SEQ_UNPADDED: 114 case SEQ_UNCOMPRESSED: { 115 lzma_vli *size = coder->sequence == SEQ_UNPADDED 116 ? &coder->unpadded_size 117 : &coder->uncompressed_size; 118 119 ret = lzma_vli_decode(size, &coder->pos, 120 in, in_pos, in_size); 121 if (ret != LZMA_STREAM_END) 122 goto out; 123 124 ret = LZMA_OK; 125 coder->pos = 0; 126 127 if (coder->sequence == SEQ_UNPADDED) { 128 // Validate that encoded Unpadded Size isn't too small 129 // or too big. 130 if (coder->unpadded_size < UNPADDED_SIZE_MIN 131 || coder->unpadded_size 132 > UNPADDED_SIZE_MAX) 133 return LZMA_DATA_ERROR; 134 135 coder->sequence = SEQ_UNCOMPRESSED; 136 } else { 137 // Add the decoded Record to the Index. 138 return_if_error(lzma_index_append( 139 coder->index, allocator, 140 coder->unpadded_size, 141 coder->uncompressed_size)); 142 143 // Check if this was the last Record. 144 coder->sequence = --coder->count == 0 145 ? SEQ_PADDING_INIT 146 : SEQ_UNPADDED; 147 } 148 149 break; 150 } 151 152 case SEQ_PADDING_INIT: 153 coder->pos = lzma_index_padding_size(coder->index); 154 coder->sequence = SEQ_PADDING; 155 156 // Fall through 157 158 case SEQ_PADDING: 159 if (coder->pos > 0) { 160 --coder->pos; 161 if (in[(*in_pos)++] != 0x00) 162 return LZMA_DATA_ERROR; 163 164 break; 165 } 166 167 // Finish the CRC32 calculation. 168 coder->crc32 = lzma_crc32(in + in_start, 169 *in_pos - in_start, coder->crc32); 170 171 coder->sequence = SEQ_CRC32; 172 173 // Fall through 174 175 case SEQ_CRC32: 176 do { 177 if (*in_pos == in_size) 178 return LZMA_OK; 179 180 if (((coder->crc32 >> (coder->pos * 8)) & 0xFF) 181 != in[(*in_pos)++]) 182 return LZMA_DATA_ERROR; 183 184 } while (++coder->pos < 4); 185 186 // Decoding was successful, now we can let the application 187 // see the decoded Index. 188 *coder->index_ptr = coder->index; 189 190 // Make index NULL so we don't free it unintentionally. 191 coder->index = NULL; 192 193 return LZMA_STREAM_END; 194 195 default: 196 assert(0); 197 return LZMA_PROG_ERROR; 198 } 199 200 out: 201 // Update the CRC32, 202 coder->crc32 = lzma_crc32(in + in_start, 203 *in_pos - in_start, coder->crc32); 204 205 return ret; 206 } 207 208 209 static void 210 index_decoder_end(lzma_coder *coder, lzma_allocator *allocator) 211 { 212 lzma_index_end(coder->index, allocator); 213 lzma_free(coder, allocator); 214 return; 215 } 216 217 218 static lzma_ret 219 index_decoder_memconfig(lzma_coder *coder, uint64_t *memusage, 220 uint64_t *old_memlimit, uint64_t new_memlimit) 221 { 222 *memusage = lzma_index_memusage(1, coder->count); 223 *old_memlimit = coder->memlimit; 224 225 if (new_memlimit != 0) { 226 if (new_memlimit < *memusage) 227 return LZMA_MEMLIMIT_ERROR; 228 229 coder->memlimit = new_memlimit; 230 } 231 232 return LZMA_OK; 233 } 234 235 236 static lzma_ret 237 index_decoder_reset(lzma_coder *coder, lzma_allocator *allocator, 238 lzma_index **i, uint64_t memlimit) 239 { 240 // Remember the pointer given by the application. We will set it 241 // to point to the decoded Index only if decoding is successful. 242 // Before that, keep it NULL so that applications can always safely 243 // pass it to lzma_index_end() no matter did decoding succeed or not. 244 coder->index_ptr = i; 245 *i = NULL; 246 247 // We always allocate a new lzma_index. 248 coder->index = lzma_index_init(allocator); 249 if (coder->index == NULL) 250 return LZMA_MEM_ERROR; 251 252 // Initialize the rest. 253 coder->sequence = SEQ_INDICATOR; 254 coder->memlimit = memlimit; 255 coder->count = 0; // Needs to be initialized due to _memconfig(). 256 coder->pos = 0; 257 coder->crc32 = 0; 258 259 return LZMA_OK; 260 } 261 262 263 static lzma_ret 264 index_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, 265 lzma_index **i, uint64_t memlimit) 266 { 267 lzma_next_coder_init(&index_decoder_init, next, allocator); 268 269 if (i == NULL || memlimit == 0) 270 return LZMA_PROG_ERROR; 271 272 if (next->coder == NULL) { 273 next->coder = lzma_alloc(sizeof(lzma_coder), allocator); 274 if (next->coder == NULL) 275 return LZMA_MEM_ERROR; 276 277 next->code = &index_decode; 278 next->end = &index_decoder_end; 279 next->memconfig = &index_decoder_memconfig; 280 next->coder->index = NULL; 281 } else { 282 lzma_index_end(next->coder->index, allocator); 283 } 284 285 return index_decoder_reset(next->coder, allocator, i, memlimit); 286 } 287 288 289 extern LZMA_API(lzma_ret) 290 lzma_index_decoder(lzma_stream *strm, lzma_index **i, uint64_t memlimit) 291 { 292 lzma_next_strm_init(index_decoder_init, strm, i, memlimit); 293 294 strm->internal->supported_actions[LZMA_RUN] = true; 295 strm->internal->supported_actions[LZMA_FINISH] = true; 296 297 return LZMA_OK; 298 } 299 300 301 extern LZMA_API(lzma_ret) 302 lzma_index_buffer_decode( 303 lzma_index **i, uint64_t *memlimit, lzma_allocator *allocator, 304 const uint8_t *in, size_t *in_pos, size_t in_size) 305 { 306 // Sanity checks 307 if (i == NULL || memlimit == NULL 308 || in == NULL || in_pos == NULL || *in_pos > in_size) 309 return LZMA_PROG_ERROR; 310 311 // Initialize the decoder. 312 lzma_coder coder; 313 return_if_error(index_decoder_reset(&coder, allocator, i, *memlimit)); 314 315 // Store the input start position so that we can restore it in case 316 // of an error. 317 const size_t in_start = *in_pos; 318 319 // Do the actual decoding. 320 lzma_ret ret = index_decode(&coder, allocator, in, in_pos, in_size, 321 NULL, NULL, 0, LZMA_RUN); 322 323 if (ret == LZMA_STREAM_END) { 324 ret = LZMA_OK; 325 } else { 326 // Something went wrong, free the Index structure and restore 327 // the input position. 328 lzma_index_end(coder.index, allocator); 329 *in_pos = in_start; 330 331 if (ret == LZMA_OK) { 332 // The input is truncated or otherwise corrupt. 333 // Use LZMA_DATA_ERROR instead of LZMA_BUF_ERROR 334 // like lzma_vli_decode() does in single-call mode. 335 ret = LZMA_DATA_ERROR; 336 337 } else if (ret == LZMA_MEMLIMIT_ERROR) { 338 // Tell the caller how much memory would have 339 // been needed. 340 *memlimit = lzma_index_memusage(1, coder.count); 341 } 342 } 343 344 return ret; 345 } 346