1 /////////////////////////////////////////////////////////////////////////////// 2 // 3 /// \file index_hash.c 4 /// \brief Validates Index by using a hash function 5 // 6 // Author: Lasse Collin 7 // 8 // This file has been put into the public domain. 9 // You can do whatever you want with this file. 10 // 11 /////////////////////////////////////////////////////////////////////////////// 12 13 #include "common.h" 14 #include "index.h" 15 #include "check.h" 16 17 18 typedef struct { 19 /// Sum of the Block sizes (including Block Padding) 20 lzma_vli blocks_size; 21 22 /// Sum of the Uncompressed Size fields 23 lzma_vli uncompressed_size; 24 25 /// Number of Records 26 lzma_vli count; 27 28 /// Size of the List of Index Records as bytes 29 lzma_vli index_list_size; 30 31 /// Check calculated from Unpadded Sizes and Uncompressed Sizes. 32 lzma_check_state check; 33 34 } lzma_index_hash_info; 35 36 37 struct lzma_index_hash_s { 38 enum { 39 SEQ_BLOCK, 40 SEQ_COUNT, 41 SEQ_UNPADDED, 42 SEQ_UNCOMPRESSED, 43 SEQ_PADDING_INIT, 44 SEQ_PADDING, 45 SEQ_CRC32, 46 } sequence; 47 48 /// Information collected while decoding the actual Blocks. 49 lzma_index_hash_info blocks; 50 51 /// Information collected from the Index field. 52 lzma_index_hash_info records; 53 54 /// Number of Records not fully decoded 55 lzma_vli remaining; 56 57 /// Unpadded Size currently being read from an Index Record. 58 lzma_vli unpadded_size; 59 60 /// Uncompressed Size currently being read from an Index Record. 61 lzma_vli uncompressed_size; 62 63 /// Position in variable-length integers when decoding them from 64 /// the List of Records. 65 size_t pos; 66 67 /// CRC32 of the Index 68 uint32_t crc32; 69 }; 70 71 72 extern LZMA_API(lzma_index_hash *) 73 lzma_index_hash_init(lzma_index_hash *index_hash, 74 const lzma_allocator *allocator) 75 { 76 if (index_hash == NULL) { 77 index_hash = lzma_alloc(sizeof(lzma_index_hash), allocator); 78 if (index_hash == NULL) 79 return NULL; 80 } 81 82 index_hash->sequence = SEQ_BLOCK; 83 index_hash->blocks.blocks_size = 0; 84 index_hash->blocks.uncompressed_size = 0; 85 index_hash->blocks.count = 0; 86 index_hash->blocks.index_list_size = 0; 87 index_hash->records.blocks_size = 0; 88 index_hash->records.uncompressed_size = 0; 89 index_hash->records.count = 0; 90 index_hash->records.index_list_size = 0; 91 index_hash->unpadded_size = 0; 92 index_hash->uncompressed_size = 0; 93 index_hash->pos = 0; 94 index_hash->crc32 = 0; 95 96 // These cannot fail because LZMA_CHECK_BEST is known to be supported. 97 (void)lzma_check_init(&index_hash->blocks.check, LZMA_CHECK_BEST); 98 (void)lzma_check_init(&index_hash->records.check, LZMA_CHECK_BEST); 99 100 return index_hash; 101 } 102 103 104 extern LZMA_API(void) 105 lzma_index_hash_end(lzma_index_hash *index_hash, 106 const lzma_allocator *allocator) 107 { 108 lzma_free(index_hash, allocator); 109 return; 110 } 111 112 113 extern LZMA_API(lzma_vli) 114 lzma_index_hash_size(const lzma_index_hash *index_hash) 115 { 116 // Get the size of the Index from ->blocks instead of ->records for 117 // cases where application wants to know the Index Size before 118 // decoding the Index. 119 return index_size(index_hash->blocks.count, 120 index_hash->blocks.index_list_size); 121 } 122 123 124 /// Updates the sizes and the hash without any validation. 125 static void 126 hash_append(lzma_index_hash_info *info, lzma_vli unpadded_size, 127 lzma_vli uncompressed_size) 128 { 129 info->blocks_size += vli_ceil4(unpadded_size); 130 info->uncompressed_size += uncompressed_size; 131 info->index_list_size += lzma_vli_size(unpadded_size) 132 + lzma_vli_size(uncompressed_size); 133 ++info->count; 134 135 const lzma_vli sizes[2] = { unpadded_size, uncompressed_size }; 136 lzma_check_update(&info->check, LZMA_CHECK_BEST, 137 (const uint8_t *)(sizes), sizeof(sizes)); 138 139 return; 140 } 141 142 143 extern LZMA_API(lzma_ret) 144 lzma_index_hash_append(lzma_index_hash *index_hash, lzma_vli unpadded_size, 145 lzma_vli uncompressed_size) 146 { 147 // Validate the arguments. 148 if (index_hash == NULL || index_hash->sequence != SEQ_BLOCK 149 || unpadded_size < UNPADDED_SIZE_MIN 150 || unpadded_size > UNPADDED_SIZE_MAX 151 || uncompressed_size > LZMA_VLI_MAX) 152 return LZMA_PROG_ERROR; 153 154 // Update the hash. 155 hash_append(&index_hash->blocks, unpadded_size, uncompressed_size); 156 157 // Validate the properties of *info are still in allowed limits. 158 if (index_hash->blocks.blocks_size > LZMA_VLI_MAX 159 || index_hash->blocks.uncompressed_size > LZMA_VLI_MAX 160 || index_size(index_hash->blocks.count, 161 index_hash->blocks.index_list_size) 162 > LZMA_BACKWARD_SIZE_MAX 163 || index_stream_size(index_hash->blocks.blocks_size, 164 index_hash->blocks.count, 165 index_hash->blocks.index_list_size) 166 > LZMA_VLI_MAX) 167 return LZMA_DATA_ERROR; 168 169 return LZMA_OK; 170 } 171 172 173 extern LZMA_API(lzma_ret) 174 lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in, 175 size_t *in_pos, size_t in_size) 176 { 177 // Catch zero input buffer here, because in contrast to Index encoder 178 // and decoder functions, applications call this function directly 179 // instead of via lzma_code(), which does the buffer checking. 180 if (*in_pos >= in_size) 181 return LZMA_BUF_ERROR; 182 183 // NOTE: This function has many similarities to index_encode() and 184 // index_decode() functions found from index_encoder.c and 185 // index_decoder.c. See the comments especially in index_encoder.c. 186 const size_t in_start = *in_pos; 187 lzma_ret ret = LZMA_OK; 188 189 while (*in_pos < in_size) 190 switch (index_hash->sequence) { 191 case SEQ_BLOCK: 192 // Check the Index Indicator is present. 193 if (in[(*in_pos)++] != INDEX_INDICATOR) 194 return LZMA_DATA_ERROR; 195 196 index_hash->sequence = SEQ_COUNT; 197 break; 198 199 case SEQ_COUNT: { 200 ret = lzma_vli_decode(&index_hash->remaining, 201 &index_hash->pos, in, in_pos, in_size); 202 if (ret != LZMA_STREAM_END) 203 goto out; 204 205 // The count must match the count of the Blocks decoded. 206 if (index_hash->remaining != index_hash->blocks.count) 207 return LZMA_DATA_ERROR; 208 209 ret = LZMA_OK; 210 index_hash->pos = 0; 211 212 // Handle the special case when there are no Blocks. 213 index_hash->sequence = index_hash->remaining == 0 214 ? SEQ_PADDING_INIT : SEQ_UNPADDED; 215 break; 216 } 217 218 case SEQ_UNPADDED: 219 case SEQ_UNCOMPRESSED: { 220 lzma_vli *size = index_hash->sequence == SEQ_UNPADDED 221 ? &index_hash->unpadded_size 222 : &index_hash->uncompressed_size; 223 224 ret = lzma_vli_decode(size, &index_hash->pos, 225 in, in_pos, in_size); 226 if (ret != LZMA_STREAM_END) 227 goto out; 228 229 ret = LZMA_OK; 230 index_hash->pos = 0; 231 232 if (index_hash->sequence == SEQ_UNPADDED) { 233 if (index_hash->unpadded_size < UNPADDED_SIZE_MIN 234 || index_hash->unpadded_size 235 > UNPADDED_SIZE_MAX) 236 return LZMA_DATA_ERROR; 237 238 index_hash->sequence = SEQ_UNCOMPRESSED; 239 } else { 240 // Update the hash. 241 hash_append(&index_hash->records, 242 index_hash->unpadded_size, 243 index_hash->uncompressed_size); 244 245 // Verify that we don't go over the known sizes. Note 246 // that this validation is simpler than the one used 247 // in lzma_index_hash_append(), because here we know 248 // that values in index_hash->blocks are already 249 // validated and we are fine as long as we don't 250 // exceed them in index_hash->records. 251 if (index_hash->blocks.blocks_size 252 < index_hash->records.blocks_size 253 || index_hash->blocks.uncompressed_size 254 < index_hash->records.uncompressed_size 255 || index_hash->blocks.index_list_size 256 < index_hash->records.index_list_size) 257 return LZMA_DATA_ERROR; 258 259 // Check if this was the last Record. 260 index_hash->sequence = --index_hash->remaining == 0 261 ? SEQ_PADDING_INIT : SEQ_UNPADDED; 262 } 263 264 break; 265 } 266 267 case SEQ_PADDING_INIT: 268 index_hash->pos = (LZMA_VLI_C(4) - index_size_unpadded( 269 index_hash->records.count, 270 index_hash->records.index_list_size)) & 3; 271 index_hash->sequence = SEQ_PADDING; 272 273 // Fall through 274 275 case SEQ_PADDING: 276 if (index_hash->pos > 0) { 277 --index_hash->pos; 278 if (in[(*in_pos)++] != 0x00) 279 return LZMA_DATA_ERROR; 280 281 break; 282 } 283 284 // Compare the sizes. 285 if (index_hash->blocks.blocks_size 286 != index_hash->records.blocks_size 287 || index_hash->blocks.uncompressed_size 288 != index_hash->records.uncompressed_size 289 || index_hash->blocks.index_list_size 290 != index_hash->records.index_list_size) 291 return LZMA_DATA_ERROR; 292 293 // Finish the hashes and compare them. 294 lzma_check_finish(&index_hash->blocks.check, LZMA_CHECK_BEST); 295 lzma_check_finish(&index_hash->records.check, LZMA_CHECK_BEST); 296 if (memcmp(index_hash->blocks.check.buffer.u8, 297 index_hash->records.check.buffer.u8, 298 lzma_check_size(LZMA_CHECK_BEST)) != 0) 299 return LZMA_DATA_ERROR; 300 301 // Finish the CRC32 calculation. 302 index_hash->crc32 = lzma_crc32(in + in_start, 303 *in_pos - in_start, index_hash->crc32); 304 305 index_hash->sequence = SEQ_CRC32; 306 307 // Fall through 308 309 case SEQ_CRC32: 310 do { 311 if (*in_pos == in_size) 312 return LZMA_OK; 313 314 if (((index_hash->crc32 >> (index_hash->pos * 8)) 315 & 0xFF) != in[(*in_pos)++]) { 316 #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION 317 return LZMA_DATA_ERROR; 318 #endif 319 } 320 321 } while (++index_hash->pos < 4); 322 323 return LZMA_STREAM_END; 324 325 default: 326 assert(0); 327 return LZMA_PROG_ERROR; 328 } 329 330 out: 331 // Update the CRC32. 332 // 333 // Avoid null pointer + 0 (undefined behavior) in "in + in_start". 334 // In such a case we had no input and thus in_used == 0. 335 { 336 const size_t in_used = *in_pos - in_start; 337 if (in_used > 0) 338 index_hash->crc32 = lzma_crc32(in + in_start, 339 in_used, index_hash->crc32); 340 } 341 342 return ret; 343 } 344