1 /////////////////////////////////////////////////////////////////////////////// 2 // 3 /// \file index_hash.c 4 /// \brief Validates Index by using a hash function 5 // 6 // Author: Lasse Collin 7 // 8 // This file has been put into the public domain. 9 // You can do whatever you want with this file. 10 // 11 /////////////////////////////////////////////////////////////////////////////// 12 13 #include "common.h" 14 #include "index.h" 15 #include "check.h" 16 17 18 typedef struct { 19 /// Sum of the Block sizes (including Block Padding) 20 lzma_vli blocks_size; 21 22 /// Sum of the Uncompressed Size fields 23 lzma_vli uncompressed_size; 24 25 /// Number of Records 26 lzma_vli count; 27 28 /// Size of the List of Index Records as bytes 29 lzma_vli index_list_size; 30 31 /// Check calculated from Unpadded Sizes and Uncompressed Sizes. 32 lzma_check_state check; 33 34 } lzma_index_hash_info; 35 36 37 struct lzma_index_hash_s { 38 enum { 39 SEQ_BLOCK, 40 SEQ_COUNT, 41 SEQ_UNPADDED, 42 SEQ_UNCOMPRESSED, 43 SEQ_PADDING_INIT, 44 SEQ_PADDING, 45 SEQ_CRC32, 46 } sequence; 47 48 /// Information collected while decoding the actual Blocks. 49 lzma_index_hash_info blocks; 50 51 /// Information collected from the Index field. 52 lzma_index_hash_info records; 53 54 /// Number of Records not fully decoded 55 lzma_vli remaining; 56 57 /// Unpadded Size currently being read from an Index Record. 58 lzma_vli unpadded_size; 59 60 /// Uncompressed Size currently being read from an Index Record. 61 lzma_vli uncompressed_size; 62 63 /// Position in variable-length integers when decoding them from 64 /// the List of Records. 65 size_t pos; 66 67 /// CRC32 of the Index 68 uint32_t crc32; 69 }; 70 71 72 extern LZMA_API(lzma_index_hash *) 73 lzma_index_hash_init(lzma_index_hash *index_hash, 74 const lzma_allocator *allocator) 75 { 76 if (index_hash == NULL) { 77 index_hash = lzma_alloc(sizeof(lzma_index_hash), allocator); 78 if (index_hash == NULL) 79 return NULL; 80 } 81 82 index_hash->sequence = SEQ_BLOCK; 83 index_hash->blocks.blocks_size = 0; 84 index_hash->blocks.uncompressed_size = 0; 85 index_hash->blocks.count = 0; 86 index_hash->blocks.index_list_size = 0; 87 index_hash->records.blocks_size = 0; 88 index_hash->records.uncompressed_size = 0; 89 index_hash->records.count = 0; 90 index_hash->records.index_list_size = 0; 91 index_hash->unpadded_size = 0; 92 index_hash->uncompressed_size = 0; 93 index_hash->pos = 0; 94 index_hash->crc32 = 0; 95 96 // These cannot fail because LZMA_CHECK_BEST is known to be supported. 97 (void)lzma_check_init(&index_hash->blocks.check, LZMA_CHECK_BEST); 98 (void)lzma_check_init(&index_hash->records.check, LZMA_CHECK_BEST); 99 100 return index_hash; 101 } 102 103 104 extern LZMA_API(void) 105 lzma_index_hash_end(lzma_index_hash *index_hash, 106 const lzma_allocator *allocator) 107 { 108 lzma_free(index_hash, allocator); 109 return; 110 } 111 112 113 extern LZMA_API(lzma_vli) 114 lzma_index_hash_size(const lzma_index_hash *index_hash) 115 { 116 // Get the size of the Index from ->blocks instead of ->records for 117 // cases where application wants to know the Index Size before 118 // decoding the Index. 119 return index_size(index_hash->blocks.count, 120 index_hash->blocks.index_list_size); 121 } 122 123 124 /// Updates the sizes and the hash without any validation. 125 static lzma_ret 126 hash_append(lzma_index_hash_info *info, lzma_vli unpadded_size, 127 lzma_vli uncompressed_size) 128 { 129 info->blocks_size += vli_ceil4(unpadded_size); 130 info->uncompressed_size += uncompressed_size; 131 info->index_list_size += lzma_vli_size(unpadded_size) 132 + lzma_vli_size(uncompressed_size); 133 ++info->count; 134 135 const lzma_vli sizes[2] = { unpadded_size, uncompressed_size }; 136 lzma_check_update(&info->check, LZMA_CHECK_BEST, 137 (const uint8_t *)(sizes), sizeof(sizes)); 138 139 return LZMA_OK; 140 } 141 142 143 extern LZMA_API(lzma_ret) 144 lzma_index_hash_append(lzma_index_hash *index_hash, lzma_vli unpadded_size, 145 lzma_vli uncompressed_size) 146 { 147 // Validate the arguments. 148 if (index_hash->sequence != SEQ_BLOCK 149 || unpadded_size < UNPADDED_SIZE_MIN 150 || unpadded_size > UNPADDED_SIZE_MAX 151 || uncompressed_size > LZMA_VLI_MAX) 152 return LZMA_PROG_ERROR; 153 154 // Update the hash. 155 return_if_error(hash_append(&index_hash->blocks, 156 unpadded_size, uncompressed_size)); 157 158 // Validate the properties of *info are still in allowed limits. 159 if (index_hash->blocks.blocks_size > LZMA_VLI_MAX 160 || index_hash->blocks.uncompressed_size > LZMA_VLI_MAX 161 || index_size(index_hash->blocks.count, 162 index_hash->blocks.index_list_size) 163 > LZMA_BACKWARD_SIZE_MAX 164 || index_stream_size(index_hash->blocks.blocks_size, 165 index_hash->blocks.count, 166 index_hash->blocks.index_list_size) 167 > LZMA_VLI_MAX) 168 return LZMA_DATA_ERROR; 169 170 return LZMA_OK; 171 } 172 173 174 extern LZMA_API(lzma_ret) 175 lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in, 176 size_t *in_pos, size_t in_size) 177 { 178 // Catch zero input buffer here, because in contrast to Index encoder 179 // and decoder functions, applications call this function directly 180 // instead of via lzma_code(), which does the buffer checking. 181 if (*in_pos >= in_size) 182 return LZMA_BUF_ERROR; 183 184 // NOTE: This function has many similarities to index_encode() and 185 // index_decode() functions found from index_encoder.c and 186 // index_decoder.c. See the comments especially in index_encoder.c. 187 const size_t in_start = *in_pos; 188 lzma_ret ret = LZMA_OK; 189 190 while (*in_pos < in_size) 191 switch (index_hash->sequence) { 192 case SEQ_BLOCK: 193 // Check the Index Indicator is present. 194 if (in[(*in_pos)++] != 0x00) 195 return LZMA_DATA_ERROR; 196 197 index_hash->sequence = SEQ_COUNT; 198 break; 199 200 case SEQ_COUNT: { 201 ret = lzma_vli_decode(&index_hash->remaining, 202 &index_hash->pos, in, in_pos, in_size); 203 if (ret != LZMA_STREAM_END) 204 goto out; 205 206 // The count must match the count of the Blocks decoded. 207 if (index_hash->remaining != index_hash->blocks.count) 208 return LZMA_DATA_ERROR; 209 210 ret = LZMA_OK; 211 index_hash->pos = 0; 212 213 // Handle the special case when there are no Blocks. 214 index_hash->sequence = index_hash->remaining == 0 215 ? SEQ_PADDING_INIT : SEQ_UNPADDED; 216 break; 217 } 218 219 case SEQ_UNPADDED: 220 case SEQ_UNCOMPRESSED: { 221 lzma_vli *size = index_hash->sequence == SEQ_UNPADDED 222 ? &index_hash->unpadded_size 223 : &index_hash->uncompressed_size; 224 225 ret = lzma_vli_decode(size, &index_hash->pos, 226 in, in_pos, in_size); 227 if (ret != LZMA_STREAM_END) 228 goto out; 229 230 ret = LZMA_OK; 231 index_hash->pos = 0; 232 233 if (index_hash->sequence == SEQ_UNPADDED) { 234 if (index_hash->unpadded_size < UNPADDED_SIZE_MIN 235 || index_hash->unpadded_size 236 > UNPADDED_SIZE_MAX) 237 return LZMA_DATA_ERROR; 238 239 index_hash->sequence = SEQ_UNCOMPRESSED; 240 } else { 241 // Update the hash. 242 return_if_error(hash_append(&index_hash->records, 243 index_hash->unpadded_size, 244 index_hash->uncompressed_size)); 245 246 // Verify that we don't go over the known sizes. Note 247 // that this validation is simpler than the one used 248 // in lzma_index_hash_append(), because here we know 249 // that values in index_hash->blocks are already 250 // validated and we are fine as long as we don't 251 // exceed them in index_hash->records. 252 if (index_hash->blocks.blocks_size 253 < index_hash->records.blocks_size 254 || index_hash->blocks.uncompressed_size 255 < index_hash->records.uncompressed_size 256 || index_hash->blocks.index_list_size 257 < index_hash->records.index_list_size) 258 return LZMA_DATA_ERROR; 259 260 // Check if this was the last Record. 261 index_hash->sequence = --index_hash->remaining == 0 262 ? SEQ_PADDING_INIT : SEQ_UNPADDED; 263 } 264 265 break; 266 } 267 268 case SEQ_PADDING_INIT: 269 index_hash->pos = (LZMA_VLI_C(4) - index_size_unpadded( 270 index_hash->records.count, 271 index_hash->records.index_list_size)) & 3; 272 index_hash->sequence = SEQ_PADDING; 273 274 // Fall through 275 276 case SEQ_PADDING: 277 if (index_hash->pos > 0) { 278 --index_hash->pos; 279 if (in[(*in_pos)++] != 0x00) 280 return LZMA_DATA_ERROR; 281 282 break; 283 } 284 285 // Compare the sizes. 286 if (index_hash->blocks.blocks_size 287 != index_hash->records.blocks_size 288 || index_hash->blocks.uncompressed_size 289 != index_hash->records.uncompressed_size 290 || index_hash->blocks.index_list_size 291 != index_hash->records.index_list_size) 292 return LZMA_DATA_ERROR; 293 294 // Finish the hashes and compare them. 295 lzma_check_finish(&index_hash->blocks.check, LZMA_CHECK_BEST); 296 lzma_check_finish(&index_hash->records.check, LZMA_CHECK_BEST); 297 if (memcmp(index_hash->blocks.check.buffer.u8, 298 index_hash->records.check.buffer.u8, 299 lzma_check_size(LZMA_CHECK_BEST)) != 0) 300 return LZMA_DATA_ERROR; 301 302 // Finish the CRC32 calculation. 303 index_hash->crc32 = lzma_crc32(in + in_start, 304 *in_pos - in_start, index_hash->crc32); 305 306 index_hash->sequence = SEQ_CRC32; 307 308 // Fall through 309 310 case SEQ_CRC32: 311 do { 312 if (*in_pos == in_size) 313 return LZMA_OK; 314 315 if (((index_hash->crc32 >> (index_hash->pos * 8)) 316 & 0xFF) != in[(*in_pos)++]) 317 return LZMA_DATA_ERROR; 318 319 } while (++index_hash->pos < 4); 320 321 return LZMA_STREAM_END; 322 323 default: 324 assert(0); 325 return LZMA_PROG_ERROR; 326 } 327 328 out: 329 // Update the CRC32, 330 index_hash->crc32 = lzma_crc32(in + in_start, 331 *in_pos - in_start, index_hash->crc32); 332 333 return ret; 334 } 335