1 // SPDX-License-Identifier: 0BSD 2 3 /////////////////////////////////////////////////////////////////////////////// 4 // 5 /// \file index_hash.c 6 /// \brief Validates Index by using a hash function 7 // 8 // Author: Lasse Collin 9 // 10 /////////////////////////////////////////////////////////////////////////////// 11 12 #include "common.h" 13 #include "index.h" 14 #include "check.h" 15 16 17 typedef struct { 18 /// Sum of the Block sizes (including Block Padding) 19 lzma_vli blocks_size; 20 21 /// Sum of the Uncompressed Size fields 22 lzma_vli uncompressed_size; 23 24 /// Number of Records 25 lzma_vli count; 26 27 /// Size of the List of Index Records as bytes 28 lzma_vli index_list_size; 29 30 /// Check calculated from Unpadded Sizes and Uncompressed Sizes. 31 lzma_check_state check; 32 33 } lzma_index_hash_info; 34 35 36 struct lzma_index_hash_s { 37 enum { 38 SEQ_BLOCK, 39 SEQ_COUNT, 40 SEQ_UNPADDED, 41 SEQ_UNCOMPRESSED, 42 SEQ_PADDING_INIT, 43 SEQ_PADDING, 44 SEQ_CRC32, 45 } sequence; 46 47 /// Information collected while decoding the actual Blocks. 48 lzma_index_hash_info blocks; 49 50 /// Information collected from the Index field. 51 lzma_index_hash_info records; 52 53 /// Number of Records not fully decoded 54 lzma_vli remaining; 55 56 /// Unpadded Size currently being read from an Index Record. 57 lzma_vli unpadded_size; 58 59 /// Uncompressed Size currently being read from an Index Record. 60 lzma_vli uncompressed_size; 61 62 /// Position in variable-length integers when decoding them from 63 /// the List of Records. 64 size_t pos; 65 66 /// CRC32 of the Index 67 uint32_t crc32; 68 }; 69 70 71 extern LZMA_API(lzma_index_hash *) 72 lzma_index_hash_init(lzma_index_hash *index_hash, 73 const lzma_allocator *allocator) 74 { 75 if (index_hash == NULL) { 76 index_hash = lzma_alloc(sizeof(lzma_index_hash), allocator); 77 if (index_hash == NULL) 78 return NULL; 79 } 80 81 index_hash->sequence = SEQ_BLOCK; 82 index_hash->blocks.blocks_size = 0; 83 index_hash->blocks.uncompressed_size = 0; 84 index_hash->blocks.count = 0; 85 index_hash->blocks.index_list_size = 0; 86 index_hash->records.blocks_size = 0; 87 index_hash->records.uncompressed_size = 0; 88 index_hash->records.count = 0; 89 index_hash->records.index_list_size = 0; 90 index_hash->unpadded_size = 0; 91 index_hash->uncompressed_size = 0; 92 index_hash->pos = 0; 93 index_hash->crc32 = 0; 94 95 // These cannot fail because LZMA_CHECK_BEST is known to be supported. 96 (void)lzma_check_init(&index_hash->blocks.check, LZMA_CHECK_BEST); 97 (void)lzma_check_init(&index_hash->records.check, LZMA_CHECK_BEST); 98 99 return index_hash; 100 } 101 102 103 extern LZMA_API(void) 104 lzma_index_hash_end(lzma_index_hash *index_hash, 105 const lzma_allocator *allocator) 106 { 107 lzma_free(index_hash, allocator); 108 return; 109 } 110 111 112 extern LZMA_API(lzma_vli) 113 lzma_index_hash_size(const lzma_index_hash *index_hash) 114 { 115 // Get the size of the Index from ->blocks instead of ->records for 116 // cases where application wants to know the Index Size before 117 // decoding the Index. 118 return index_size(index_hash->blocks.count, 119 index_hash->blocks.index_list_size); 120 } 121 122 123 /// Updates the sizes and the hash without any validation. 124 static void 125 hash_append(lzma_index_hash_info *info, lzma_vli unpadded_size, 126 lzma_vli uncompressed_size) 127 { 128 info->blocks_size += vli_ceil4(unpadded_size); 129 info->uncompressed_size += uncompressed_size; 130 info->index_list_size += lzma_vli_size(unpadded_size) 131 + lzma_vli_size(uncompressed_size); 132 ++info->count; 133 134 const lzma_vli sizes[2] = { unpadded_size, uncompressed_size }; 135 lzma_check_update(&info->check, LZMA_CHECK_BEST, 136 (const uint8_t *)(sizes), sizeof(sizes)); 137 138 return; 139 } 140 141 142 extern LZMA_API(lzma_ret) 143 lzma_index_hash_append(lzma_index_hash *index_hash, lzma_vli unpadded_size, 144 lzma_vli uncompressed_size) 145 { 146 // Validate the arguments. 147 if (index_hash == NULL || index_hash->sequence != SEQ_BLOCK 148 || unpadded_size < UNPADDED_SIZE_MIN 149 || unpadded_size > UNPADDED_SIZE_MAX 150 || uncompressed_size > LZMA_VLI_MAX) 151 return LZMA_PROG_ERROR; 152 153 // Update the hash. 154 hash_append(&index_hash->blocks, unpadded_size, uncompressed_size); 155 156 // Validate the properties of *info are still in allowed limits. 157 if (index_hash->blocks.blocks_size > LZMA_VLI_MAX 158 || index_hash->blocks.uncompressed_size > LZMA_VLI_MAX 159 || index_size(index_hash->blocks.count, 160 index_hash->blocks.index_list_size) 161 > LZMA_BACKWARD_SIZE_MAX 162 || index_stream_size(index_hash->blocks.blocks_size, 163 index_hash->blocks.count, 164 index_hash->blocks.index_list_size) 165 > LZMA_VLI_MAX) 166 return LZMA_DATA_ERROR; 167 168 return LZMA_OK; 169 } 170 171 172 extern LZMA_API(lzma_ret) 173 lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in, 174 size_t *in_pos, size_t in_size) 175 { 176 // Catch zero input buffer here, because in contrast to Index encoder 177 // and decoder functions, applications call this function directly 178 // instead of via lzma_code(), which does the buffer checking. 179 if (*in_pos >= in_size) 180 return LZMA_BUF_ERROR; 181 182 // NOTE: This function has many similarities to index_encode() and 183 // index_decode() functions found from index_encoder.c and 184 // index_decoder.c. See the comments especially in index_encoder.c. 185 const size_t in_start = *in_pos; 186 lzma_ret ret = LZMA_OK; 187 188 while (*in_pos < in_size) 189 switch (index_hash->sequence) { 190 case SEQ_BLOCK: 191 // Check the Index Indicator is present. 192 if (in[(*in_pos)++] != INDEX_INDICATOR) 193 return LZMA_DATA_ERROR; 194 195 index_hash->sequence = SEQ_COUNT; 196 break; 197 198 case SEQ_COUNT: { 199 ret = lzma_vli_decode(&index_hash->remaining, 200 &index_hash->pos, in, in_pos, in_size); 201 if (ret != LZMA_STREAM_END) 202 goto out; 203 204 // The count must match the count of the Blocks decoded. 205 if (index_hash->remaining != index_hash->blocks.count) 206 return LZMA_DATA_ERROR; 207 208 ret = LZMA_OK; 209 index_hash->pos = 0; 210 211 // Handle the special case when there are no Blocks. 212 index_hash->sequence = index_hash->remaining == 0 213 ? SEQ_PADDING_INIT : SEQ_UNPADDED; 214 break; 215 } 216 217 case SEQ_UNPADDED: 218 case SEQ_UNCOMPRESSED: { 219 lzma_vli *size = index_hash->sequence == SEQ_UNPADDED 220 ? &index_hash->unpadded_size 221 : &index_hash->uncompressed_size; 222 223 ret = lzma_vli_decode(size, &index_hash->pos, 224 in, in_pos, in_size); 225 if (ret != LZMA_STREAM_END) 226 goto out; 227 228 ret = LZMA_OK; 229 index_hash->pos = 0; 230 231 if (index_hash->sequence == SEQ_UNPADDED) { 232 if (index_hash->unpadded_size < UNPADDED_SIZE_MIN 233 || index_hash->unpadded_size 234 > UNPADDED_SIZE_MAX) 235 return LZMA_DATA_ERROR; 236 237 index_hash->sequence = SEQ_UNCOMPRESSED; 238 } else { 239 // Update the hash. 240 hash_append(&index_hash->records, 241 index_hash->unpadded_size, 242 index_hash->uncompressed_size); 243 244 // Verify that we don't go over the known sizes. Note 245 // that this validation is simpler than the one used 246 // in lzma_index_hash_append(), because here we know 247 // that values in index_hash->blocks are already 248 // validated and we are fine as long as we don't 249 // exceed them in index_hash->records. 250 if (index_hash->blocks.blocks_size 251 < index_hash->records.blocks_size 252 || index_hash->blocks.uncompressed_size 253 < index_hash->records.uncompressed_size 254 || index_hash->blocks.index_list_size 255 < index_hash->records.index_list_size) 256 return LZMA_DATA_ERROR; 257 258 // Check if this was the last Record. 259 index_hash->sequence = --index_hash->remaining == 0 260 ? SEQ_PADDING_INIT : SEQ_UNPADDED; 261 } 262 263 break; 264 } 265 266 case SEQ_PADDING_INIT: 267 index_hash->pos = (LZMA_VLI_C(4) - index_size_unpadded( 268 index_hash->records.count, 269 index_hash->records.index_list_size)) & 3; 270 index_hash->sequence = SEQ_PADDING; 271 272 // Fall through 273 274 case SEQ_PADDING: 275 if (index_hash->pos > 0) { 276 --index_hash->pos; 277 if (in[(*in_pos)++] != 0x00) 278 return LZMA_DATA_ERROR; 279 280 break; 281 } 282 283 // Compare the sizes. 284 if (index_hash->blocks.blocks_size 285 != index_hash->records.blocks_size 286 || index_hash->blocks.uncompressed_size 287 != index_hash->records.uncompressed_size 288 || index_hash->blocks.index_list_size 289 != index_hash->records.index_list_size) 290 return LZMA_DATA_ERROR; 291 292 // Finish the hashes and compare them. 293 lzma_check_finish(&index_hash->blocks.check, LZMA_CHECK_BEST); 294 lzma_check_finish(&index_hash->records.check, LZMA_CHECK_BEST); 295 if (memcmp(index_hash->blocks.check.buffer.u8, 296 index_hash->records.check.buffer.u8, 297 lzma_check_size(LZMA_CHECK_BEST)) != 0) 298 return LZMA_DATA_ERROR; 299 300 // Finish the CRC32 calculation. 301 index_hash->crc32 = lzma_crc32(in + in_start, 302 *in_pos - in_start, index_hash->crc32); 303 304 index_hash->sequence = SEQ_CRC32; 305 306 // Fall through 307 308 case SEQ_CRC32: 309 do { 310 if (*in_pos == in_size) 311 return LZMA_OK; 312 313 if (((index_hash->crc32 >> (index_hash->pos * 8)) 314 & 0xFF) != in[(*in_pos)++]) { 315 #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION 316 return LZMA_DATA_ERROR; 317 #endif 318 } 319 320 } while (++index_hash->pos < 4); 321 322 return LZMA_STREAM_END; 323 324 default: 325 assert(0); 326 return LZMA_PROG_ERROR; 327 } 328 329 out: 330 // Update the CRC32. 331 // 332 // Avoid null pointer + 0 (undefined behavior) in "in + in_start". 333 // In such a case we had no input and thus in_used == 0. 334 { 335 const size_t in_used = *in_pos - in_start; 336 if (in_used > 0) 337 index_hash->crc32 = lzma_crc32(in + in_start, 338 in_used, index_hash->crc32); 339 } 340 341 return ret; 342 } 343