1 /////////////////////////////////////////////////////////////////////////////// 2 // 3 /// \file index_decoder.c 4 /// \brief Decodes the Index field 5 // 6 // Author: Lasse Collin 7 // 8 // This file has been put into the public domain. 9 // You can do whatever you want with this file. 10 // 11 /////////////////////////////////////////////////////////////////////////////// 12 13 #include "index_decoder.h" 14 #include "check.h" 15 16 17 typedef struct { 18 enum { 19 SEQ_INDICATOR, 20 SEQ_COUNT, 21 SEQ_MEMUSAGE, 22 SEQ_UNPADDED, 23 SEQ_UNCOMPRESSED, 24 SEQ_PADDING_INIT, 25 SEQ_PADDING, 26 SEQ_CRC32, 27 } sequence; 28 29 /// Memory usage limit 30 uint64_t memlimit; 31 32 /// Target Index 33 lzma_index *index; 34 35 /// Pointer give by the application, which is set after 36 /// successful decoding. 37 lzma_index **index_ptr; 38 39 /// Number of Records left to decode. 40 lzma_vli count; 41 42 /// The most recent Unpadded Size field 43 lzma_vli unpadded_size; 44 45 /// The most recent Uncompressed Size field 46 lzma_vli uncompressed_size; 47 48 /// Position in integers 49 size_t pos; 50 51 /// CRC32 of the List of Records field 52 uint32_t crc32; 53 } lzma_index_coder; 54 55 56 static lzma_ret 57 index_decode(void *coder_ptr, const lzma_allocator *allocator, 58 const uint8_t *restrict in, size_t *restrict in_pos, 59 size_t in_size, 60 uint8_t *restrict out lzma_attribute((__unused__)), 61 size_t *restrict out_pos lzma_attribute((__unused__)), 62 size_t out_size lzma_attribute((__unused__)), 63 lzma_action action lzma_attribute((__unused__))) 64 { 65 lzma_index_coder *coder = coder_ptr; 66 67 // Similar optimization as in index_encoder.c 68 const size_t in_start = *in_pos; 69 lzma_ret ret = LZMA_OK; 70 71 while (*in_pos < in_size) 72 switch (coder->sequence) { 73 case SEQ_INDICATOR: 74 // Return LZMA_DATA_ERROR instead of e.g. LZMA_PROG_ERROR or 75 // LZMA_FORMAT_ERROR, because a typical usage case for Index 76 // decoder is when parsing the Stream backwards. If seeking 77 // backward from the Stream Footer gives us something that 78 // doesn't begin with Index Indicator, the file is considered 79 // corrupt, not "programming error" or "unrecognized file 80 // format". One could argue that the application should 81 // verify the Index Indicator before trying to decode the 82 // Index, but well, I suppose it is simpler this way. 83 if (in[(*in_pos)++] != INDEX_INDICATOR) 84 return LZMA_DATA_ERROR; 85 86 coder->sequence = SEQ_COUNT; 87 break; 88 89 case SEQ_COUNT: 90 ret = lzma_vli_decode(&coder->count, &coder->pos, 91 in, in_pos, in_size); 92 if (ret != LZMA_STREAM_END) 93 goto out; 94 95 coder->pos = 0; 96 coder->sequence = SEQ_MEMUSAGE; 97 98 // Fall through 99 100 case SEQ_MEMUSAGE: 101 if (lzma_index_memusage(1, coder->count) > coder->memlimit) { 102 ret = LZMA_MEMLIMIT_ERROR; 103 goto out; 104 } 105 106 // Tell the Index handling code how many Records this 107 // Index has to allow it to allocate memory more efficiently. 108 lzma_index_prealloc(coder->index, coder->count); 109 110 ret = LZMA_OK; 111 coder->sequence = coder->count == 0 112 ? SEQ_PADDING_INIT : SEQ_UNPADDED; 113 break; 114 115 case SEQ_UNPADDED: 116 case SEQ_UNCOMPRESSED: { 117 lzma_vli *size = coder->sequence == SEQ_UNPADDED 118 ? &coder->unpadded_size 119 : &coder->uncompressed_size; 120 121 ret = lzma_vli_decode(size, &coder->pos, 122 in, in_pos, in_size); 123 if (ret != LZMA_STREAM_END) 124 goto out; 125 126 ret = LZMA_OK; 127 coder->pos = 0; 128 129 if (coder->sequence == SEQ_UNPADDED) { 130 // Validate that encoded Unpadded Size isn't too small 131 // or too big. 132 if (coder->unpadded_size < UNPADDED_SIZE_MIN 133 || coder->unpadded_size 134 > UNPADDED_SIZE_MAX) 135 return LZMA_DATA_ERROR; 136 137 coder->sequence = SEQ_UNCOMPRESSED; 138 } else { 139 // Add the decoded Record to the Index. 140 return_if_error(lzma_index_append( 141 coder->index, allocator, 142 coder->unpadded_size, 143 coder->uncompressed_size)); 144 145 // Check if this was the last Record. 146 coder->sequence = --coder->count == 0 147 ? SEQ_PADDING_INIT 148 : SEQ_UNPADDED; 149 } 150 151 break; 152 } 153 154 case SEQ_PADDING_INIT: 155 coder->pos = lzma_index_padding_size(coder->index); 156 coder->sequence = SEQ_PADDING; 157 158 // Fall through 159 160 case SEQ_PADDING: 161 if (coder->pos > 0) { 162 --coder->pos; 163 if (in[(*in_pos)++] != 0x00) 164 return LZMA_DATA_ERROR; 165 166 break; 167 } 168 169 // Finish the CRC32 calculation. 170 coder->crc32 = lzma_crc32(in + in_start, 171 *in_pos - in_start, coder->crc32); 172 173 coder->sequence = SEQ_CRC32; 174 175 // Fall through 176 177 case SEQ_CRC32: 178 do { 179 if (*in_pos == in_size) 180 return LZMA_OK; 181 182 if (((coder->crc32 >> (coder->pos * 8)) & 0xFF) 183 != in[(*in_pos)++]) { 184 #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION 185 return LZMA_DATA_ERROR; 186 #endif 187 } 188 189 } while (++coder->pos < 4); 190 191 // Decoding was successful, now we can let the application 192 // see the decoded Index. 193 *coder->index_ptr = coder->index; 194 195 // Make index NULL so we don't free it unintentionally. 196 coder->index = NULL; 197 198 return LZMA_STREAM_END; 199 200 default: 201 assert(0); 202 return LZMA_PROG_ERROR; 203 } 204 205 out: 206 // Update the CRC32. 207 // 208 // Avoid null pointer + 0 (undefined behavior) in "in + in_start". 209 // In such a case we had no input and thus in_used == 0. 210 { 211 const size_t in_used = *in_pos - in_start; 212 if (in_used > 0) 213 coder->crc32 = lzma_crc32(in + in_start, 214 in_used, coder->crc32); 215 } 216 217 return ret; 218 } 219 220 221 static void 222 index_decoder_end(void *coder_ptr, const lzma_allocator *allocator) 223 { 224 lzma_index_coder *coder = coder_ptr; 225 lzma_index_end(coder->index, allocator); 226 lzma_free(coder, allocator); 227 return; 228 } 229 230 231 static lzma_ret 232 index_decoder_memconfig(void *coder_ptr, uint64_t *memusage, 233 uint64_t *old_memlimit, uint64_t new_memlimit) 234 { 235 lzma_index_coder *coder = coder_ptr; 236 237 *memusage = lzma_index_memusage(1, coder->count); 238 *old_memlimit = coder->memlimit; 239 240 if (new_memlimit != 0) { 241 if (new_memlimit < *memusage) 242 return LZMA_MEMLIMIT_ERROR; 243 244 coder->memlimit = new_memlimit; 245 } 246 247 return LZMA_OK; 248 } 249 250 251 static lzma_ret 252 index_decoder_reset(lzma_index_coder *coder, const lzma_allocator *allocator, 253 lzma_index **i, uint64_t memlimit) 254 { 255 // Remember the pointer given by the application. We will set it 256 // to point to the decoded Index only if decoding is successful. 257 // Before that, keep it NULL so that applications can always safely 258 // pass it to lzma_index_end() no matter did decoding succeed or not. 259 coder->index_ptr = i; 260 *i = NULL; 261 262 // We always allocate a new lzma_index. 263 coder->index = lzma_index_init(allocator); 264 if (coder->index == NULL) 265 return LZMA_MEM_ERROR; 266 267 // Initialize the rest. 268 coder->sequence = SEQ_INDICATOR; 269 coder->memlimit = my_max(1, memlimit); 270 coder->count = 0; // Needs to be initialized due to _memconfig(). 271 coder->pos = 0; 272 coder->crc32 = 0; 273 274 return LZMA_OK; 275 } 276 277 278 extern lzma_ret 279 lzma_index_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, 280 lzma_index **i, uint64_t memlimit) 281 { 282 lzma_next_coder_init(&lzma_index_decoder_init, next, allocator); 283 284 if (i == NULL) 285 return LZMA_PROG_ERROR; 286 287 lzma_index_coder *coder = next->coder; 288 if (coder == NULL) { 289 coder = lzma_alloc(sizeof(lzma_index_coder), allocator); 290 if (coder == NULL) 291 return LZMA_MEM_ERROR; 292 293 next->coder = coder; 294 next->code = &index_decode; 295 next->end = &index_decoder_end; 296 next->memconfig = &index_decoder_memconfig; 297 coder->index = NULL; 298 } else { 299 lzma_index_end(coder->index, allocator); 300 } 301 302 return index_decoder_reset(coder, allocator, i, memlimit); 303 } 304 305 306 extern LZMA_API(lzma_ret) 307 lzma_index_decoder(lzma_stream *strm, lzma_index **i, uint64_t memlimit) 308 { 309 lzma_next_strm_init(lzma_index_decoder_init, strm, i, memlimit); 310 311 strm->internal->supported_actions[LZMA_RUN] = true; 312 strm->internal->supported_actions[LZMA_FINISH] = true; 313 314 return LZMA_OK; 315 } 316 317 318 extern LZMA_API(lzma_ret) 319 lzma_index_buffer_decode(lzma_index **i, uint64_t *memlimit, 320 const lzma_allocator *allocator, 321 const uint8_t *in, size_t *in_pos, size_t in_size) 322 { 323 // Sanity checks 324 if (i == NULL || memlimit == NULL 325 || in == NULL || in_pos == NULL || *in_pos > in_size) 326 return LZMA_PROG_ERROR; 327 328 // Initialize the decoder. 329 lzma_index_coder coder; 330 return_if_error(index_decoder_reset(&coder, allocator, i, *memlimit)); 331 332 // Store the input start position so that we can restore it in case 333 // of an error. 334 const size_t in_start = *in_pos; 335 336 // Do the actual decoding. 337 lzma_ret ret = index_decode(&coder, allocator, in, in_pos, in_size, 338 NULL, NULL, 0, LZMA_RUN); 339 340 if (ret == LZMA_STREAM_END) { 341 ret = LZMA_OK; 342 } else { 343 // Something went wrong, free the Index structure and restore 344 // the input position. 345 lzma_index_end(coder.index, allocator); 346 *in_pos = in_start; 347 348 if (ret == LZMA_OK) { 349 // The input is truncated or otherwise corrupt. 350 // Use LZMA_DATA_ERROR instead of LZMA_BUF_ERROR 351 // like lzma_vli_decode() does in single-call mode. 352 ret = LZMA_DATA_ERROR; 353 354 } else if (ret == LZMA_MEMLIMIT_ERROR) { 355 // Tell the caller how much memory would have 356 // been needed. 357 *memlimit = lzma_index_memusage(1, coder.count); 358 } 359 } 360 361 return ret; 362 } 363