1 // SPDX-License-Identifier: 0BSD 2 3 /////////////////////////////////////////////////////////////////////////////// 4 // 5 /// \file index_decoder.c 6 /// \brief Decodes the Index field 7 // 8 // Author: Lasse Collin 9 // 10 /////////////////////////////////////////////////////////////////////////////// 11 12 #include "index_decoder.h" 13 #include "check.h" 14 15 16 typedef struct { 17 enum { 18 SEQ_INDICATOR, 19 SEQ_COUNT, 20 SEQ_MEMUSAGE, 21 SEQ_UNPADDED, 22 SEQ_UNCOMPRESSED, 23 SEQ_PADDING_INIT, 24 SEQ_PADDING, 25 SEQ_CRC32, 26 } sequence; 27 28 /// Memory usage limit 29 uint64_t memlimit; 30 31 /// Target Index 32 lzma_index *index; 33 34 /// Pointer give by the application, which is set after 35 /// successful decoding. 36 lzma_index **index_ptr; 37 38 /// Number of Records left to decode. 39 lzma_vli count; 40 41 /// The most recent Unpadded Size field 42 lzma_vli unpadded_size; 43 44 /// The most recent Uncompressed Size field 45 lzma_vli uncompressed_size; 46 47 /// Position in integers 48 size_t pos; 49 50 /// CRC32 of the List of Records field 51 uint32_t crc32; 52 } lzma_index_coder; 53 54 55 static lzma_ret 56 index_decode(void *coder_ptr, const lzma_allocator *allocator, 57 const uint8_t *restrict in, size_t *restrict in_pos, 58 size_t in_size, 59 uint8_t *restrict out lzma_attribute((__unused__)), 60 size_t *restrict out_pos lzma_attribute((__unused__)), 61 size_t out_size lzma_attribute((__unused__)), 62 lzma_action action lzma_attribute((__unused__))) 63 { 64 lzma_index_coder *coder = coder_ptr; 65 66 // Similar optimization as in index_encoder.c 67 const size_t in_start = *in_pos; 68 lzma_ret ret = LZMA_OK; 69 70 while (*in_pos < in_size) 71 switch (coder->sequence) { 72 case SEQ_INDICATOR: 73 // Return LZMA_DATA_ERROR instead of e.g. LZMA_PROG_ERROR or 74 // LZMA_FORMAT_ERROR, because a typical usage case for Index 75 // decoder is when parsing the Stream backwards. If seeking 76 // backward from the Stream Footer gives us something that 77 // doesn't begin with Index Indicator, the file is considered 78 // corrupt, not "programming error" or "unrecognized file 79 // format". One could argue that the application should 80 // verify the Index Indicator before trying to decode the 81 // Index, but well, I suppose it is simpler this way. 82 if (in[(*in_pos)++] != INDEX_INDICATOR) 83 return LZMA_DATA_ERROR; 84 85 coder->sequence = SEQ_COUNT; 86 break; 87 88 case SEQ_COUNT: 89 ret = lzma_vli_decode(&coder->count, &coder->pos, 90 in, in_pos, in_size); 91 if (ret != LZMA_STREAM_END) 92 goto out; 93 94 coder->pos = 0; 95 coder->sequence = SEQ_MEMUSAGE; 96 97 // Fall through 98 99 case SEQ_MEMUSAGE: 100 if (lzma_index_memusage(1, coder->count) > coder->memlimit) { 101 ret = LZMA_MEMLIMIT_ERROR; 102 goto out; 103 } 104 105 // Tell the Index handling code how many Records this 106 // Index has to allow it to allocate memory more efficiently. 107 lzma_index_prealloc(coder->index, coder->count); 108 109 ret = LZMA_OK; 110 coder->sequence = coder->count == 0 111 ? SEQ_PADDING_INIT : SEQ_UNPADDED; 112 break; 113 114 case SEQ_UNPADDED: 115 case SEQ_UNCOMPRESSED: { 116 lzma_vli *size = coder->sequence == SEQ_UNPADDED 117 ? &coder->unpadded_size 118 : &coder->uncompressed_size; 119 120 ret = lzma_vli_decode(size, &coder->pos, 121 in, in_pos, in_size); 122 if (ret != LZMA_STREAM_END) 123 goto out; 124 125 ret = LZMA_OK; 126 coder->pos = 0; 127 128 if (coder->sequence == SEQ_UNPADDED) { 129 // Validate that encoded Unpadded Size isn't too small 130 // or too big. 131 if (coder->unpadded_size < UNPADDED_SIZE_MIN 132 || coder->unpadded_size 133 > UNPADDED_SIZE_MAX) 134 return LZMA_DATA_ERROR; 135 136 coder->sequence = SEQ_UNCOMPRESSED; 137 } else { 138 // Add the decoded Record to the Index. 139 return_if_error(lzma_index_append( 140 coder->index, allocator, 141 coder->unpadded_size, 142 coder->uncompressed_size)); 143 144 // Check if this was the last Record. 145 coder->sequence = --coder->count == 0 146 ? SEQ_PADDING_INIT 147 : SEQ_UNPADDED; 148 } 149 150 break; 151 } 152 153 case SEQ_PADDING_INIT: 154 coder->pos = lzma_index_padding_size(coder->index); 155 coder->sequence = SEQ_PADDING; 156 157 // Fall through 158 159 case SEQ_PADDING: 160 if (coder->pos > 0) { 161 --coder->pos; 162 if (in[(*in_pos)++] != 0x00) 163 return LZMA_DATA_ERROR; 164 165 break; 166 } 167 168 // Finish the CRC32 calculation. 169 coder->crc32 = lzma_crc32(in + in_start, 170 *in_pos - in_start, coder->crc32); 171 172 coder->sequence = SEQ_CRC32; 173 174 // Fall through 175 176 case SEQ_CRC32: 177 do { 178 if (*in_pos == in_size) 179 return LZMA_OK; 180 181 if (((coder->crc32 >> (coder->pos * 8)) & 0xFF) 182 != in[(*in_pos)++]) { 183 #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION 184 return LZMA_DATA_ERROR; 185 #endif 186 } 187 188 } while (++coder->pos < 4); 189 190 // Decoding was successful, now we can let the application 191 // see the decoded Index. 192 *coder->index_ptr = coder->index; 193 194 // Make index NULL so we don't free it unintentionally. 195 coder->index = NULL; 196 197 return LZMA_STREAM_END; 198 199 default: 200 assert(0); 201 return LZMA_PROG_ERROR; 202 } 203 204 out: 205 // Update the CRC32. 206 // 207 // Avoid null pointer + 0 (undefined behavior) in "in + in_start". 208 // In such a case we had no input and thus in_used == 0. 209 { 210 const size_t in_used = *in_pos - in_start; 211 if (in_used > 0) 212 coder->crc32 = lzma_crc32(in + in_start, 213 in_used, coder->crc32); 214 } 215 216 return ret; 217 } 218 219 220 static void 221 index_decoder_end(void *coder_ptr, const lzma_allocator *allocator) 222 { 223 lzma_index_coder *coder = coder_ptr; 224 lzma_index_end(coder->index, allocator); 225 lzma_free(coder, allocator); 226 return; 227 } 228 229 230 static lzma_ret 231 index_decoder_memconfig(void *coder_ptr, uint64_t *memusage, 232 uint64_t *old_memlimit, uint64_t new_memlimit) 233 { 234 lzma_index_coder *coder = coder_ptr; 235 236 *memusage = lzma_index_memusage(1, coder->count); 237 *old_memlimit = coder->memlimit; 238 239 if (new_memlimit != 0) { 240 if (new_memlimit < *memusage) 241 return LZMA_MEMLIMIT_ERROR; 242 243 coder->memlimit = new_memlimit; 244 } 245 246 return LZMA_OK; 247 } 248 249 250 static lzma_ret 251 index_decoder_reset(lzma_index_coder *coder, const lzma_allocator *allocator, 252 lzma_index **i, uint64_t memlimit) 253 { 254 // Remember the pointer given by the application. We will set it 255 // to point to the decoded Index only if decoding is successful. 256 // Before that, keep it NULL so that applications can always safely 257 // pass it to lzma_index_end() no matter did decoding succeed or not. 258 coder->index_ptr = i; 259 *i = NULL; 260 261 // We always allocate a new lzma_index. 262 coder->index = lzma_index_init(allocator); 263 if (coder->index == NULL) 264 return LZMA_MEM_ERROR; 265 266 // Initialize the rest. 267 coder->sequence = SEQ_INDICATOR; 268 coder->memlimit = my_max(1, memlimit); 269 coder->count = 0; // Needs to be initialized due to _memconfig(). 270 coder->pos = 0; 271 coder->crc32 = 0; 272 273 return LZMA_OK; 274 } 275 276 277 extern lzma_ret 278 lzma_index_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, 279 lzma_index **i, uint64_t memlimit) 280 { 281 lzma_next_coder_init(&lzma_index_decoder_init, next, allocator); 282 283 if (i == NULL) 284 return LZMA_PROG_ERROR; 285 286 lzma_index_coder *coder = next->coder; 287 if (coder == NULL) { 288 coder = lzma_alloc(sizeof(lzma_index_coder), allocator); 289 if (coder == NULL) 290 return LZMA_MEM_ERROR; 291 292 next->coder = coder; 293 next->code = &index_decode; 294 next->end = &index_decoder_end; 295 next->memconfig = &index_decoder_memconfig; 296 coder->index = NULL; 297 } else { 298 lzma_index_end(coder->index, allocator); 299 } 300 301 return index_decoder_reset(coder, allocator, i, memlimit); 302 } 303 304 305 extern LZMA_API(lzma_ret) 306 lzma_index_decoder(lzma_stream *strm, lzma_index **i, uint64_t memlimit) 307 { 308 // If i isn't NULL, *i must always be initialized due to 309 // the wording in the API docs. This way it is initialized 310 // if we return LZMA_PROG_ERROR due to strm == NULL. 311 if (i != NULL) 312 *i = NULL; 313 314 lzma_next_strm_init(lzma_index_decoder_init, strm, i, memlimit); 315 316 strm->internal->supported_actions[LZMA_RUN] = true; 317 strm->internal->supported_actions[LZMA_FINISH] = true; 318 319 return LZMA_OK; 320 } 321 322 323 extern LZMA_API(lzma_ret) 324 lzma_index_buffer_decode(lzma_index **i, uint64_t *memlimit, 325 const lzma_allocator *allocator, 326 const uint8_t *in, size_t *in_pos, size_t in_size) 327 { 328 // If i isn't NULL, *i must always be initialized due to 329 // the wording in the API docs. 330 if (i != NULL) 331 *i = NULL; 332 333 // Sanity checks 334 if (i == NULL || memlimit == NULL 335 || in == NULL || in_pos == NULL || *in_pos > in_size) 336 return LZMA_PROG_ERROR; 337 338 // Initialize the decoder. 339 lzma_index_coder coder; 340 return_if_error(index_decoder_reset(&coder, allocator, i, *memlimit)); 341 342 // Store the input start position so that we can restore it in case 343 // of an error. 344 const size_t in_start = *in_pos; 345 346 // Do the actual decoding. 347 lzma_ret ret = index_decode(&coder, allocator, in, in_pos, in_size, 348 NULL, NULL, 0, LZMA_RUN); 349 350 if (ret == LZMA_STREAM_END) { 351 ret = LZMA_OK; 352 } else { 353 // Something went wrong, free the Index structure and restore 354 // the input position. 355 lzma_index_end(coder.index, allocator); 356 *in_pos = in_start; 357 358 if (ret == LZMA_OK) { 359 // The input is truncated or otherwise corrupt. 360 // Use LZMA_DATA_ERROR instead of LZMA_BUF_ERROR 361 // like lzma_vli_decode() does in single-call mode. 362 ret = LZMA_DATA_ERROR; 363 364 } else if (ret == LZMA_MEMLIMIT_ERROR) { 365 // Tell the caller how much memory would have 366 // been needed. 367 *memlimit = lzma_index_memusage(1, coder.count); 368 } 369 } 370 371 return ret; 372 } 373