1 /////////////////////////////////////////////////////////////////////////////// 2 // 3 /// \file index_hash.c 4 /// \brief Validates Index by using a hash function 5 // 6 // Author: Lasse Collin 7 // 8 // This file has been put into the public domain. 9 // You can do whatever you want with this file. 10 // 11 /////////////////////////////////////////////////////////////////////////////// 12 13 #include "common.h" 14 #include "index.h" 15 #include "check.h" 16 17 18 typedef struct { 19 /// Sum of the Block sizes (including Block Padding) 20 lzma_vli blocks_size; 21 22 /// Sum of the Uncompressed Size fields 23 lzma_vli uncompressed_size; 24 25 /// Number of Records 26 lzma_vli count; 27 28 /// Size of the List of Index Records as bytes 29 lzma_vli index_list_size; 30 31 /// Check calculated from Unpadded Sizes and Uncompressed Sizes. 32 lzma_check_state check; 33 34 } lzma_index_hash_info; 35 36 37 struct lzma_index_hash_s { 38 enum { 39 SEQ_BLOCK, 40 SEQ_COUNT, 41 SEQ_UNPADDED, 42 SEQ_UNCOMPRESSED, 43 SEQ_PADDING_INIT, 44 SEQ_PADDING, 45 SEQ_CRC32, 46 } sequence; 47 48 /// Information collected while decoding the actual Blocks. 49 lzma_index_hash_info blocks; 50 51 /// Information collected from the Index field. 52 lzma_index_hash_info records; 53 54 /// Number of Records not fully decoded 55 lzma_vli remaining; 56 57 /// Unpadded Size currently being read from an Index Record. 58 lzma_vli unpadded_size; 59 60 /// Uncompressed Size currently being read from an Index Record. 61 lzma_vli uncompressed_size; 62 63 /// Position in variable-length integers when decoding them from 64 /// the List of Records. 65 size_t pos; 66 67 /// CRC32 of the Index 68 uint32_t crc32; 69 }; 70 71 72 extern LZMA_API(lzma_index_hash *) 73 lzma_index_hash_init(lzma_index_hash *index_hash, lzma_allocator *allocator) 74 { 75 if (index_hash == NULL) { 76 index_hash = lzma_alloc(sizeof(lzma_index_hash), allocator); 77 if (index_hash == NULL) 78 return NULL; 79 } 80 81 index_hash->sequence = SEQ_BLOCK; 82 index_hash->blocks.blocks_size = 0; 83 index_hash->blocks.uncompressed_size = 0; 84 index_hash->blocks.count = 0; 85 index_hash->blocks.index_list_size = 0; 86 index_hash->records.blocks_size = 0; 87 index_hash->records.uncompressed_size = 0; 88 index_hash->records.count = 0; 89 index_hash->records.index_list_size = 0; 90 index_hash->unpadded_size = 0; 91 index_hash->uncompressed_size = 0; 92 index_hash->pos = 0; 93 index_hash->crc32 = 0; 94 95 // These cannot fail because LZMA_CHECK_BEST is known to be supported. 96 (void)lzma_check_init(&index_hash->blocks.check, LZMA_CHECK_BEST); 97 (void)lzma_check_init(&index_hash->records.check, LZMA_CHECK_BEST); 98 99 return index_hash; 100 } 101 102 103 extern LZMA_API(void) 104 lzma_index_hash_end(lzma_index_hash *index_hash, lzma_allocator *allocator) 105 { 106 lzma_free(index_hash, allocator); 107 return; 108 } 109 110 111 extern LZMA_API(lzma_vli) 112 lzma_index_hash_size(const lzma_index_hash *index_hash) 113 { 114 // Get the size of the Index from ->blocks instead of ->records for 115 // cases where application wants to know the Index Size before 116 // decoding the Index. 117 return index_size(index_hash->blocks.count, 118 index_hash->blocks.index_list_size); 119 } 120 121 122 /// Updates the sizes and the hash without any validation. 123 static lzma_ret 124 hash_append(lzma_index_hash_info *info, lzma_vli unpadded_size, 125 lzma_vli uncompressed_size) 126 { 127 info->blocks_size += vli_ceil4(unpadded_size); 128 info->uncompressed_size += uncompressed_size; 129 info->index_list_size += lzma_vli_size(unpadded_size) 130 + lzma_vli_size(uncompressed_size); 131 ++info->count; 132 133 const lzma_vli sizes[2] = { unpadded_size, uncompressed_size }; 134 lzma_check_update(&info->check, LZMA_CHECK_BEST, 135 (const uint8_t *)(sizes), sizeof(sizes)); 136 137 return LZMA_OK; 138 } 139 140 141 extern LZMA_API(lzma_ret) 142 lzma_index_hash_append(lzma_index_hash *index_hash, lzma_vli unpadded_size, 143 lzma_vli uncompressed_size) 144 { 145 // Validate the arguments. 146 if (index_hash->sequence != SEQ_BLOCK 147 || unpadded_size < UNPADDED_SIZE_MIN 148 || unpadded_size > UNPADDED_SIZE_MAX 149 || uncompressed_size > LZMA_VLI_MAX) 150 return LZMA_PROG_ERROR; 151 152 // Update the hash. 153 return_if_error(hash_append(&index_hash->blocks, 154 unpadded_size, uncompressed_size)); 155 156 // Validate the properties of *info are still in allowed limits. 157 if (index_hash->blocks.blocks_size > LZMA_VLI_MAX 158 || index_hash->blocks.uncompressed_size > LZMA_VLI_MAX 159 || index_size(index_hash->blocks.count, 160 index_hash->blocks.index_list_size) 161 > LZMA_BACKWARD_SIZE_MAX 162 || index_stream_size(index_hash->blocks.blocks_size, 163 index_hash->blocks.count, 164 index_hash->blocks.index_list_size) 165 > LZMA_VLI_MAX) 166 return LZMA_DATA_ERROR; 167 168 return LZMA_OK; 169 } 170 171 172 extern LZMA_API(lzma_ret) 173 lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in, 174 size_t *in_pos, size_t in_size) 175 { 176 // Catch zero input buffer here, because in contrast to Index encoder 177 // and decoder functions, applications call this function directly 178 // instead of via lzma_code(), which does the buffer checking. 179 if (*in_pos >= in_size) 180 return LZMA_BUF_ERROR; 181 182 // NOTE: This function has many similarities to index_encode() and 183 // index_decode() functions found from index_encoder.c and 184 // index_decoder.c. See the comments especially in index_encoder.c. 185 const size_t in_start = *in_pos; 186 lzma_ret ret = LZMA_OK; 187 188 while (*in_pos < in_size) 189 switch (index_hash->sequence) { 190 case SEQ_BLOCK: 191 // Check the Index Indicator is present. 192 if (in[(*in_pos)++] != 0x00) 193 return LZMA_DATA_ERROR; 194 195 index_hash->sequence = SEQ_COUNT; 196 break; 197 198 case SEQ_COUNT: { 199 ret = lzma_vli_decode(&index_hash->remaining, 200 &index_hash->pos, in, in_pos, in_size); 201 if (ret != LZMA_STREAM_END) 202 goto out; 203 204 // The count must match the count of the Blocks decoded. 205 if (index_hash->remaining != index_hash->blocks.count) 206 return LZMA_DATA_ERROR; 207 208 ret = LZMA_OK; 209 index_hash->pos = 0; 210 211 // Handle the special case when there are no Blocks. 212 index_hash->sequence = index_hash->remaining == 0 213 ? SEQ_PADDING_INIT : SEQ_UNPADDED; 214 break; 215 } 216 217 case SEQ_UNPADDED: 218 case SEQ_UNCOMPRESSED: { 219 lzma_vli *size = index_hash->sequence == SEQ_UNPADDED 220 ? &index_hash->unpadded_size 221 : &index_hash->uncompressed_size; 222 223 ret = lzma_vli_decode(size, &index_hash->pos, 224 in, in_pos, in_size); 225 if (ret != LZMA_STREAM_END) 226 goto out; 227 228 ret = LZMA_OK; 229 index_hash->pos = 0; 230 231 if (index_hash->sequence == SEQ_UNPADDED) { 232 if (index_hash->unpadded_size < UNPADDED_SIZE_MIN 233 || index_hash->unpadded_size 234 > UNPADDED_SIZE_MAX) 235 return LZMA_DATA_ERROR; 236 237 index_hash->sequence = SEQ_UNCOMPRESSED; 238 } else { 239 // Update the hash. 240 return_if_error(hash_append(&index_hash->records, 241 index_hash->unpadded_size, 242 index_hash->uncompressed_size)); 243 244 // Verify that we don't go over the known sizes. Note 245 // that this validation is simpler than the one used 246 // in lzma_index_hash_append(), because here we know 247 // that values in index_hash->blocks are already 248 // validated and we are fine as long as we don't 249 // exceed them in index_hash->records. 250 if (index_hash->blocks.blocks_size 251 < index_hash->records.blocks_size 252 || index_hash->blocks.uncompressed_size 253 < index_hash->records.uncompressed_size 254 || index_hash->blocks.index_list_size 255 < index_hash->records.index_list_size) 256 return LZMA_DATA_ERROR; 257 258 // Check if this was the last Record. 259 index_hash->sequence = --index_hash->remaining == 0 260 ? SEQ_PADDING_INIT : SEQ_UNPADDED; 261 } 262 263 break; 264 } 265 266 case SEQ_PADDING_INIT: 267 index_hash->pos = (LZMA_VLI_C(4) - index_size_unpadded( 268 index_hash->records.count, 269 index_hash->records.index_list_size)) & 3; 270 index_hash->sequence = SEQ_PADDING; 271 272 // Fall through 273 274 case SEQ_PADDING: 275 if (index_hash->pos > 0) { 276 --index_hash->pos; 277 if (in[(*in_pos)++] != 0x00) 278 return LZMA_DATA_ERROR; 279 280 break; 281 } 282 283 // Compare the sizes. 284 if (index_hash->blocks.blocks_size 285 != index_hash->records.blocks_size 286 || index_hash->blocks.uncompressed_size 287 != index_hash->records.uncompressed_size 288 || index_hash->blocks.index_list_size 289 != index_hash->records.index_list_size) 290 return LZMA_DATA_ERROR; 291 292 // Finish the hashes and compare them. 293 lzma_check_finish(&index_hash->blocks.check, LZMA_CHECK_BEST); 294 lzma_check_finish(&index_hash->records.check, LZMA_CHECK_BEST); 295 if (memcmp(index_hash->blocks.check.buffer.u8, 296 index_hash->records.check.buffer.u8, 297 lzma_check_size(LZMA_CHECK_BEST)) != 0) 298 return LZMA_DATA_ERROR; 299 300 // Finish the CRC32 calculation. 301 index_hash->crc32 = lzma_crc32(in + in_start, 302 *in_pos - in_start, index_hash->crc32); 303 304 index_hash->sequence = SEQ_CRC32; 305 306 // Fall through 307 308 case SEQ_CRC32: 309 do { 310 if (*in_pos == in_size) 311 return LZMA_OK; 312 313 if (((index_hash->crc32 >> (index_hash->pos * 8)) 314 & 0xFF) != in[(*in_pos)++]) 315 return LZMA_DATA_ERROR; 316 317 } while (++index_hash->pos < 4); 318 319 return LZMA_STREAM_END; 320 321 default: 322 assert(0); 323 return LZMA_PROG_ERROR; 324 } 325 326 out: 327 // Update the CRC32, 328 index_hash->crc32 = lzma_crc32(in + in_start, 329 *in_pos - in_start, index_hash->crc32); 330 331 return ret; 332 } 333