1 /////////////////////////////////////////////////////////////////////////////// 2 // 3 /// \file index_decoder.c 4 /// \brief Decodes the Index field 5 // 6 // Author: Lasse Collin 7 // 8 // This file has been put into the public domain. 9 // You can do whatever you want with this file. 10 // 11 /////////////////////////////////////////////////////////////////////////////// 12 13 #include "index.h" 14 #include "check.h" 15 16 17 struct lzma_coder_s { 18 enum { 19 SEQ_INDICATOR, 20 SEQ_COUNT, 21 SEQ_MEMUSAGE, 22 SEQ_UNPADDED, 23 SEQ_UNCOMPRESSED, 24 SEQ_PADDING_INIT, 25 SEQ_PADDING, 26 SEQ_CRC32, 27 } sequence; 28 29 /// Memory usage limit 30 uint64_t memlimit; 31 32 /// Target Index 33 lzma_index *index; 34 35 /// Pointer give by the application, which is set after 36 /// successful decoding. 37 lzma_index **index_ptr; 38 39 /// Number of Records left to decode. 40 lzma_vli count; 41 42 /// The most recent Unpadded Size field 43 lzma_vli unpadded_size; 44 45 /// The most recent Uncompressed Size field 46 lzma_vli uncompressed_size; 47 48 /// Position in integers 49 size_t pos; 50 51 /// CRC32 of the List of Records field 52 uint32_t crc32; 53 }; 54 55 56 static lzma_ret 57 index_decode(lzma_coder *coder, lzma_allocator *allocator, 58 const uint8_t *restrict in, size_t *restrict in_pos, 59 size_t in_size, uint8_t *restrict out lzma_attribute((unused)), 60 size_t *restrict out_pos lzma_attribute((unused)), 61 size_t out_size lzma_attribute((unused)), 62 lzma_action action lzma_attribute((unused))) 63 { 64 // Similar optimization as in index_encoder.c 65 const size_t in_start = *in_pos; 66 lzma_ret ret = LZMA_OK; 67 68 while (*in_pos < in_size) 69 switch (coder->sequence) { 70 case SEQ_INDICATOR: 71 // Return LZMA_DATA_ERROR instead of e.g. LZMA_PROG_ERROR or 72 // LZMA_FORMAT_ERROR, because a typical usage case for Index 73 // decoder is when parsing the Stream backwards. If seeking 74 // backward from the Stream Footer gives us something that 75 // doesn't begin with Index Indicator, the file is considered 76 // corrupt, not "programming error" or "unrecognized file 77 // format". One could argue that the application should 78 // verify the Index Indicator before trying to decode the 79 // Index, but well, I suppose it is simpler this way. 80 if (in[(*in_pos)++] != 0x00) 81 return LZMA_DATA_ERROR; 82 83 coder->sequence = SEQ_COUNT; 84 break; 85 86 case SEQ_COUNT: 87 ret = lzma_vli_decode(&coder->count, &coder->pos, 88 in, in_pos, in_size); 89 if (ret != LZMA_STREAM_END) 90 goto out; 91 92 coder->pos = 0; 93 coder->sequence = SEQ_MEMUSAGE; 94 95 // Fall through 96 97 case SEQ_MEMUSAGE: 98 if (lzma_index_memusage(1, coder->count) > coder->memlimit) { 99 ret = LZMA_MEMLIMIT_ERROR; 100 goto out; 101 } 102 103 // Tell the Index handling code how many Records this 104 // Index has to allow it to allocate memory more efficiently. 105 lzma_index_prealloc(coder->index, coder->count); 106 107 ret = LZMA_OK; 108 coder->sequence = coder->count == 0 109 ? SEQ_PADDING_INIT : SEQ_UNPADDED; 110 break; 111 112 case SEQ_UNPADDED: 113 case SEQ_UNCOMPRESSED: { 114 lzma_vli *size = coder->sequence == SEQ_UNPADDED 115 ? &coder->unpadded_size 116 : &coder->uncompressed_size; 117 118 ret = lzma_vli_decode(size, &coder->pos, 119 in, in_pos, in_size); 120 if (ret != LZMA_STREAM_END) 121 goto out; 122 123 ret = LZMA_OK; 124 coder->pos = 0; 125 126 if (coder->sequence == SEQ_UNPADDED) { 127 // Validate that encoded Unpadded Size isn't too small 128 // or too big. 129 if (coder->unpadded_size < UNPADDED_SIZE_MIN 130 || coder->unpadded_size 131 > UNPADDED_SIZE_MAX) 132 return LZMA_DATA_ERROR; 133 134 coder->sequence = SEQ_UNCOMPRESSED; 135 } else { 136 // Add the decoded Record to the Index. 137 return_if_error(lzma_index_append( 138 coder->index, allocator, 139 coder->unpadded_size, 140 coder->uncompressed_size)); 141 142 // Check if this was the last Record. 143 coder->sequence = --coder->count == 0 144 ? SEQ_PADDING_INIT 145 : SEQ_UNPADDED; 146 } 147 148 break; 149 } 150 151 case SEQ_PADDING_INIT: 152 coder->pos = lzma_index_padding_size(coder->index); 153 coder->sequence = SEQ_PADDING; 154 155 // Fall through 156 157 case SEQ_PADDING: 158 if (coder->pos > 0) { 159 --coder->pos; 160 if (in[(*in_pos)++] != 0x00) 161 return LZMA_DATA_ERROR; 162 163 break; 164 } 165 166 // Finish the CRC32 calculation. 167 coder->crc32 = lzma_crc32(in + in_start, 168 *in_pos - in_start, coder->crc32); 169 170 coder->sequence = SEQ_CRC32; 171 172 // Fall through 173 174 case SEQ_CRC32: 175 do { 176 if (*in_pos == in_size) 177 return LZMA_OK; 178 179 if (((coder->crc32 >> (coder->pos * 8)) & 0xFF) 180 != in[(*in_pos)++]) 181 return LZMA_DATA_ERROR; 182 183 } while (++coder->pos < 4); 184 185 // Decoding was successful, now we can let the application 186 // see the decoded Index. 187 *coder->index_ptr = coder->index; 188 189 // Make index NULL so we don't free it unintentionally. 190 coder->index = NULL; 191 192 return LZMA_STREAM_END; 193 194 default: 195 assert(0); 196 return LZMA_PROG_ERROR; 197 } 198 199 out: 200 // Update the CRC32, 201 coder->crc32 = lzma_crc32(in + in_start, 202 *in_pos - in_start, coder->crc32); 203 204 return ret; 205 } 206 207 208 static void 209 index_decoder_end(lzma_coder *coder, lzma_allocator *allocator) 210 { 211 lzma_index_end(coder->index, allocator); 212 lzma_free(coder, allocator); 213 return; 214 } 215 216 217 static lzma_ret 218 index_decoder_memconfig(lzma_coder *coder, uint64_t *memusage, 219 uint64_t *old_memlimit, uint64_t new_memlimit) 220 { 221 *memusage = lzma_index_memusage(1, coder->count); 222 *old_memlimit = coder->memlimit; 223 224 if (new_memlimit != 0) { 225 if (new_memlimit < *memusage) 226 return LZMA_MEMLIMIT_ERROR; 227 228 coder->memlimit = new_memlimit; 229 } 230 231 return LZMA_OK; 232 } 233 234 235 static lzma_ret 236 index_decoder_reset(lzma_coder *coder, lzma_allocator *allocator, 237 lzma_index **i, uint64_t memlimit) 238 { 239 // Remember the pointer given by the application. We will set it 240 // to point to the decoded Index only if decoding is successful. 241 // Before that, keep it NULL so that applications can always safely 242 // pass it to lzma_index_end() no matter did decoding succeed or not. 243 coder->index_ptr = i; 244 *i = NULL; 245 246 // We always allocate a new lzma_index. 247 coder->index = lzma_index_init(allocator); 248 if (coder->index == NULL) 249 return LZMA_MEM_ERROR; 250 251 // Initialize the rest. 252 coder->sequence = SEQ_INDICATOR; 253 coder->memlimit = memlimit; 254 coder->count = 0; // Needs to be initialized due to _memconfig(). 255 coder->pos = 0; 256 coder->crc32 = 0; 257 258 return LZMA_OK; 259 } 260 261 262 static lzma_ret 263 index_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, 264 lzma_index **i, uint64_t memlimit) 265 { 266 lzma_next_coder_init(&index_decoder_init, next, allocator); 267 268 if (i == NULL || memlimit == 0) 269 return LZMA_PROG_ERROR; 270 271 if (next->coder == NULL) { 272 next->coder = lzma_alloc(sizeof(lzma_coder), allocator); 273 if (next->coder == NULL) 274 return LZMA_MEM_ERROR; 275 276 next->code = &index_decode; 277 next->end = &index_decoder_end; 278 next->memconfig = &index_decoder_memconfig; 279 next->coder->index = NULL; 280 } else { 281 lzma_index_end(next->coder->index, allocator); 282 } 283 284 return index_decoder_reset(next->coder, allocator, i, memlimit); 285 } 286 287 288 extern LZMA_API(lzma_ret) 289 lzma_index_decoder(lzma_stream *strm, lzma_index **i, uint64_t memlimit) 290 { 291 lzma_next_strm_init(index_decoder_init, strm, i, memlimit); 292 293 strm->internal->supported_actions[LZMA_RUN] = true; 294 strm->internal->supported_actions[LZMA_FINISH] = true; 295 296 return LZMA_OK; 297 } 298 299 300 extern LZMA_API(lzma_ret) 301 lzma_index_buffer_decode( 302 lzma_index **i, uint64_t *memlimit, lzma_allocator *allocator, 303 const uint8_t *in, size_t *in_pos, size_t in_size) 304 { 305 // Sanity checks 306 if (i == NULL || memlimit == NULL 307 || in == NULL || in_pos == NULL || *in_pos > in_size) 308 return LZMA_PROG_ERROR; 309 310 // Initialize the decoder. 311 lzma_coder coder; 312 return_if_error(index_decoder_reset(&coder, allocator, i, *memlimit)); 313 314 // Store the input start position so that we can restore it in case 315 // of an error. 316 const size_t in_start = *in_pos; 317 318 // Do the actual decoding. 319 lzma_ret ret = index_decode(&coder, allocator, in, in_pos, in_size, 320 NULL, NULL, 0, LZMA_RUN); 321 322 if (ret == LZMA_STREAM_END) { 323 ret = LZMA_OK; 324 } else { 325 // Something went wrong, free the Index structure and restore 326 // the input position. 327 lzma_index_end(coder.index, allocator); 328 *in_pos = in_start; 329 330 if (ret == LZMA_OK) { 331 // The input is truncated or otherwise corrupt. 332 // Use LZMA_DATA_ERROR instead of LZMA_BUF_ERROR 333 // like lzma_vli_decode() does in single-call mode. 334 ret = LZMA_DATA_ERROR; 335 336 } else if (ret == LZMA_MEMLIMIT_ERROR) { 337 // Tell the caller how much memory would have 338 // been needed. 339 *memlimit = lzma_index_memusage(1, coder.count); 340 } 341 } 342 343 return ret; 344 } 345