1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright 2023 Red Hat 4 */ 5 #include "delta-index.h" 6 7 #include <linux/bitops.h> 8 #include <linux/bits.h> 9 #include <linux/compiler.h> 10 #include <linux/limits.h> 11 #include <linux/log2.h> 12 13 #include "cpu.h" 14 #include "errors.h" 15 #include "logger.h" 16 #include "memory-alloc.h" 17 #include "numeric.h" 18 #include "permassert.h" 19 #include "string-utils.h" 20 #include "time-utils.h" 21 22 #include "config.h" 23 #include "indexer.h" 24 25 /* 26 * The entries in a delta index could be stored in a single delta list, but to reduce search times 27 * and update costs it uses multiple delta lists. These lists are stored in a single chunk of 28 * memory managed by the delta_zone structure. The delta_zone can move the data around within its 29 * memory, so the location of each delta list is recorded as a bit offset into the memory. Because 30 * the volume index can contain over a million delta lists, we want to be efficient with the size 31 * of the delta list header information. This information is encoded into 16 bytes per list. The 32 * volume index delta list memory can easily exceed 4 gigabits, so a 64 bit value is needed to 33 * address the memory. The volume index delta lists average around 6 kilobits, so 16 bits are 34 * sufficient to store the size of a delta list. 35 * 36 * Each delta list is stored as a bit stream. Within the delta list encoding, bits and bytes are 37 * numbered in little endian order. Within a byte, bit 0 is the least significant bit (0x1), and 38 * bit 7 is the most significant bit (0x80). Within a bit stream, bit 7 is the most significant bit 39 * of byte 0, and bit 8 is the least significant bit of byte 1. Within a byte array, a byte's 40 * number corresponds to its index in the array. 41 * 42 * A standard delta list entry is stored as a fixed length payload (the value) followed by a 43 * variable length key (the delta). A collision entry is used when two block names have the same 44 * delta list address. A collision entry always follows a standard entry for the hash with which it 45 * collides, and is encoded with DELTA == 0 with an additional 256 bits field at the end, 46 * containing the full block name. An entry with a delta of 0 at the beginning of a delta list 47 * indicates a normal entry. 48 * 49 * The delta in each entry is encoded with a variable-length Huffman code to minimize the memory 50 * used by small deltas. The Huffman code is specified by three parameters, which can be computed 51 * from the desired mean delta when the index is full. (See compute_coding_constants() for 52 * details.) 53 * 54 * The bit field utilities used to read and write delta entries assume that it is possible to read 55 * some bytes beyond the end of the bit field, so a delta_zone memory allocation is guarded by two 56 * invalid delta lists to prevent reading outside the delta_zone memory. The valid delta lists are 57 * numbered 1 to N, and the guard lists are numbered 0 and N+1. The function to decode the bit 58 * stream include a step that skips over bits set to 0 until the first 1 bit is found. A corrupted 59 * delta list could cause this step to run off the end of the delta_zone memory, so as extra 60 * protection against this happening, the tail guard list is set to all ones. 61 * 62 * The delta_index supports two different forms. The mutable form is created by 63 * uds_initialize_delta_index(), and is used for the volume index and for open chapter indexes. The 64 * immutable form is created by uds_initialize_delta_index_page(), and is used for closed (and 65 * cached) chapter index pages. The immutable form does not allocate delta list headers or 66 * temporary offsets, and thus is somewhat more memory efficient. 67 */ 68 69 /* 70 * This is the largest field size supported by get_field() and set_field(). Any field that is 71 * larger is not guaranteed to fit in a single byte-aligned u32. 72 */ 73 #define MAX_FIELD_BITS ((sizeof(u32) - 1) * BITS_PER_BYTE + 1) 74 75 /* 76 * This is the largest field size supported by get_big_field() and set_big_field(). Any field that 77 * is larger is not guaranteed to fit in a single byte-aligned u64. 78 */ 79 #define MAX_BIG_FIELD_BITS ((sizeof(u64) - 1) * BITS_PER_BYTE + 1) 80 81 /* 82 * This is the number of guard bytes needed at the end of the memory byte array when using the bit 83 * utilities. These utilities call get_big_field() and set_big_field(), which can access up to 7 84 * bytes beyond the end of the desired field. The definition is written to make it clear how this 85 * value is derived. 86 */ 87 #define POST_FIELD_GUARD_BYTES (sizeof(u64) - 1) 88 89 /* The number of guard bits that are needed in the tail guard list */ 90 #define GUARD_BITS (POST_FIELD_GUARD_BYTES * BITS_PER_BYTE) 91 92 /* 93 * The maximum size of a single delta list in bytes. We count guard bytes in this value because a 94 * buffer of this size can be used with move_bits(). 95 */ 96 #define DELTA_LIST_MAX_BYTE_COUNT \ 97 ((U16_MAX + BITS_PER_BYTE) / BITS_PER_BYTE + POST_FIELD_GUARD_BYTES) 98 99 /* The number of extra bytes and bits needed to store a collision entry */ 100 #define COLLISION_BYTES UDS_RECORD_NAME_SIZE 101 #define COLLISION_BITS (COLLISION_BYTES * BITS_PER_BYTE) 102 103 /* 104 * Immutable delta lists are packed into pages containing a header that encodes the delta list 105 * information into 19 bits per list (64KB bit offset). 106 */ 107 #define IMMUTABLE_HEADER_SIZE 19 108 109 /* 110 * Constants and structures for the saved delta index. "DI" is for delta_index, and -##### is a 111 * number to increment when the format of the data changes. 112 */ 113 #define MAGIC_SIZE 8 114 115 static const char DELTA_INDEX_MAGIC[] = "DI-00002"; 116 117 struct delta_index_header { 118 char magic[MAGIC_SIZE]; 119 u32 zone_number; 120 u32 zone_count; 121 u32 first_list; 122 u32 list_count; 123 u64 record_count; 124 u64 collision_count; 125 }; 126 127 /* 128 * Header data used for immutable delta index pages. This data is followed by the delta list offset 129 * table. 130 */ 131 struct delta_page_header { 132 /* Externally-defined nonce */ 133 u64 nonce; 134 /* The virtual chapter number */ 135 u64 virtual_chapter_number; 136 /* Index of the first delta list on the page */ 137 u16 first_list; 138 /* Number of delta lists on the page */ 139 u16 list_count; 140 } __packed; 141 142 static inline u64 get_delta_list_byte_start(const struct delta_list *delta_list) 143 { 144 return delta_list->start / BITS_PER_BYTE; 145 } 146 147 static inline u16 get_delta_list_byte_size(const struct delta_list *delta_list) 148 { 149 unsigned int bit_offset = delta_list->start % BITS_PER_BYTE; 150 151 return BITS_TO_BYTES(bit_offset + delta_list->size); 152 } 153 154 static void rebalance_delta_zone(const struct delta_zone *delta_zone, u32 first, 155 u32 last) 156 { 157 struct delta_list *delta_list; 158 u64 new_start; 159 160 if (first == last) { 161 /* Only one list is moving, and we know there is space. */ 162 delta_list = &delta_zone->delta_lists[first]; 163 new_start = delta_zone->new_offsets[first]; 164 if (delta_list->start != new_start) { 165 u64 source; 166 u64 destination; 167 168 source = get_delta_list_byte_start(delta_list); 169 delta_list->start = new_start; 170 destination = get_delta_list_byte_start(delta_list); 171 memmove(delta_zone->memory + destination, 172 delta_zone->memory + source, 173 get_delta_list_byte_size(delta_list)); 174 } 175 } else { 176 /* 177 * There is more than one list. Divide the problem in half, and use recursive calls 178 * to process each half. Note that after this computation, first <= middle, and 179 * middle < last. 180 */ 181 u32 middle = (first + last) / 2; 182 183 delta_list = &delta_zone->delta_lists[middle]; 184 new_start = delta_zone->new_offsets[middle]; 185 186 /* 187 * The direction that our middle list is moving determines which half of the 188 * problem must be processed first. 189 */ 190 if (new_start > delta_list->start) { 191 rebalance_delta_zone(delta_zone, middle + 1, last); 192 rebalance_delta_zone(delta_zone, first, middle); 193 } else { 194 rebalance_delta_zone(delta_zone, first, middle); 195 rebalance_delta_zone(delta_zone, middle + 1, last); 196 } 197 } 198 } 199 200 static inline size_t get_zone_memory_size(unsigned int zone_count, size_t memory_size) 201 { 202 /* Round up so that each zone is a multiple of 64K in size. */ 203 size_t ALLOC_BOUNDARY = 64 * 1024; 204 205 return (memory_size / zone_count + ALLOC_BOUNDARY - 1) & -ALLOC_BOUNDARY; 206 } 207 208 void uds_reset_delta_index(const struct delta_index *delta_index) 209 { 210 unsigned int z; 211 212 /* 213 * Initialize all delta lists to be empty. We keep 2 extra delta list descriptors, one 214 * before the first real entry and one after so that we don't need to bounds check the 215 * array access when calculating preceding and following gap sizes. 216 */ 217 for (z = 0; z < delta_index->zone_count; z++) { 218 u64 list_bits; 219 u64 spacing; 220 u64 offset; 221 unsigned int i; 222 struct delta_zone *zone = &delta_index->delta_zones[z]; 223 struct delta_list *delta_lists = zone->delta_lists; 224 225 /* Zeroing the delta list headers initializes the head guard list correctly. */ 226 memset(delta_lists, 0, 227 (zone->list_count + 2) * sizeof(struct delta_list)); 228 229 /* Set all the bits in the end guard list. */ 230 list_bits = (u64) zone->size * BITS_PER_BYTE - GUARD_BITS; 231 delta_lists[zone->list_count + 1].start = list_bits; 232 delta_lists[zone->list_count + 1].size = GUARD_BITS; 233 memset(zone->memory + (list_bits / BITS_PER_BYTE), ~0, 234 POST_FIELD_GUARD_BYTES); 235 236 /* Evenly space out the real delta lists by setting regular offsets. */ 237 spacing = list_bits / zone->list_count; 238 offset = spacing / 2; 239 for (i = 1; i <= zone->list_count; i++) { 240 delta_lists[i].start = offset; 241 offset += spacing; 242 } 243 244 /* Update the statistics. */ 245 zone->discard_count += zone->record_count; 246 zone->record_count = 0; 247 zone->collision_count = 0; 248 } 249 } 250 251 /* Compute the Huffman coding parameters for the given mean delta. The Huffman code is specified by 252 * three parameters: 253 * 254 * MINBITS The number of bits in the smallest code 255 * BASE The number of values coded using a code of length MINBITS 256 * INCR The number of values coded by using one additional bit 257 * 258 * These parameters are related by this equation: 259 * 260 * BASE + INCR == 1 << MINBITS 261 * 262 * The math for the Huffman code of an exponential distribution says that 263 * 264 * INCR = log(2) * MEAN_DELTA 265 * 266 * Then use the smallest MINBITS value so that 267 * 268 * (1 << MINBITS) > INCR 269 * 270 * And then 271 * 272 * BASE = (1 << MINBITS) - INCR 273 * 274 * Now the index can generate a code such that 275 * - The first BASE values code using MINBITS bits. 276 * - The next INCR values code using MINBITS+1 bits. 277 * - The next INCR values code using MINBITS+2 bits. 278 * - (and so on). 279 */ 280 static void compute_coding_constants(u32 mean_delta, u16 *min_bits, u32 *min_keys, u32 *incr_keys) 281 { 282 /* 283 * We want to compute the rounded value of log(2) * mean_delta. Since we cannot always use 284 * floating point, use a really good integer approximation. 285 */ 286 *incr_keys = (836158UL * mean_delta + 603160UL) / 1206321UL; 287 *min_bits = bits_per(*incr_keys + 1); 288 *min_keys = (1 << *min_bits) - *incr_keys; 289 } 290 291 void uds_uninitialize_delta_index(struct delta_index *delta_index) 292 { 293 unsigned int z; 294 295 if (delta_index->delta_zones == NULL) 296 return; 297 298 for (z = 0; z < delta_index->zone_count; z++) { 299 vdo_free(vdo_forget(delta_index->delta_zones[z].new_offsets)); 300 vdo_free(vdo_forget(delta_index->delta_zones[z].delta_lists)); 301 vdo_free(vdo_forget(delta_index->delta_zones[z].memory)); 302 } 303 304 vdo_free(delta_index->delta_zones); 305 memset(delta_index, 0, sizeof(struct delta_index)); 306 } 307 308 static int initialize_delta_zone(struct delta_zone *delta_zone, size_t size, 309 u32 first_list, u32 list_count, u32 mean_delta, 310 u32 payload_bits, u8 tag) 311 { 312 int result; 313 314 result = vdo_allocate(size, "delta list", &delta_zone->memory); 315 if (result != VDO_SUCCESS) 316 return result; 317 318 result = vdo_allocate(list_count + 2, "delta list temp", &delta_zone->new_offsets); 319 if (result != VDO_SUCCESS) 320 return result; 321 322 /* Allocate the delta lists. */ 323 result = vdo_allocate(list_count + 2, "delta lists", &delta_zone->delta_lists); 324 if (result != VDO_SUCCESS) 325 return result; 326 327 compute_coding_constants(mean_delta, &delta_zone->min_bits, 328 &delta_zone->min_keys, &delta_zone->incr_keys); 329 delta_zone->value_bits = payload_bits; 330 delta_zone->buffered_writer = NULL; 331 delta_zone->size = size; 332 delta_zone->rebalance_time = 0; 333 delta_zone->rebalance_count = 0; 334 delta_zone->record_count = 0; 335 delta_zone->collision_count = 0; 336 delta_zone->discard_count = 0; 337 delta_zone->overflow_count = 0; 338 delta_zone->first_list = first_list; 339 delta_zone->list_count = list_count; 340 delta_zone->tag = tag; 341 342 return UDS_SUCCESS; 343 } 344 345 int uds_initialize_delta_index(struct delta_index *delta_index, unsigned int zone_count, 346 u32 list_count, u32 mean_delta, u32 payload_bits, 347 size_t memory_size, u8 tag) 348 { 349 int result; 350 unsigned int z; 351 size_t zone_memory; 352 353 result = vdo_allocate(zone_count, "Delta Index Zones", &delta_index->delta_zones); 354 if (result != VDO_SUCCESS) 355 return result; 356 357 delta_index->zone_count = zone_count; 358 delta_index->list_count = list_count; 359 delta_index->lists_per_zone = DIV_ROUND_UP(list_count, zone_count); 360 delta_index->memory_size = 0; 361 delta_index->mutable = true; 362 delta_index->tag = tag; 363 364 for (z = 0; z < zone_count; z++) { 365 u32 lists_in_zone = delta_index->lists_per_zone; 366 u32 first_list_in_zone = z * lists_in_zone; 367 368 if (z == zone_count - 1) { 369 /* 370 * The last zone gets fewer lists if zone_count doesn't evenly divide 371 * list_count. We'll have an underflow if the assertion below doesn't hold. 372 */ 373 if (delta_index->list_count <= first_list_in_zone) { 374 uds_uninitialize_delta_index(delta_index); 375 return vdo_log_error_strerror(UDS_INVALID_ARGUMENT, 376 "%u delta lists not enough for %u zones", 377 list_count, zone_count); 378 } 379 lists_in_zone = delta_index->list_count - first_list_in_zone; 380 } 381 382 zone_memory = get_zone_memory_size(zone_count, memory_size); 383 result = initialize_delta_zone(&delta_index->delta_zones[z], zone_memory, 384 first_list_in_zone, lists_in_zone, 385 mean_delta, payload_bits, tag); 386 if (result != UDS_SUCCESS) { 387 uds_uninitialize_delta_index(delta_index); 388 return result; 389 } 390 391 delta_index->memory_size += 392 (sizeof(struct delta_zone) + zone_memory + 393 (lists_in_zone + 2) * (sizeof(struct delta_list) + sizeof(u64))); 394 } 395 396 uds_reset_delta_index(delta_index); 397 return UDS_SUCCESS; 398 } 399 400 /* Read a bit field from an arbitrary bit boundary. */ 401 static inline u32 get_field(const u8 *memory, u64 offset, u8 size) 402 { 403 const void *addr = memory + offset / BITS_PER_BYTE; 404 405 return (get_unaligned_le32(addr) >> (offset % BITS_PER_BYTE)) & ((1 << size) - 1); 406 } 407 408 /* Write a bit field to an arbitrary bit boundary. */ 409 static inline void set_field(u32 value, u8 *memory, u64 offset, u8 size) 410 { 411 void *addr = memory + offset / BITS_PER_BYTE; 412 int shift = offset % BITS_PER_BYTE; 413 u32 data = get_unaligned_le32(addr); 414 415 data &= ~(((1 << size) - 1) << shift); 416 data |= value << shift; 417 put_unaligned_le32(data, addr); 418 } 419 420 /* Get the bit offset to the immutable delta list header. */ 421 static inline u32 get_immutable_header_offset(u32 list_number) 422 { 423 return sizeof(struct delta_page_header) * BITS_PER_BYTE + 424 list_number * IMMUTABLE_HEADER_SIZE; 425 } 426 427 /* Get the bit offset to the start of the immutable delta list bit stream. */ 428 static inline u32 get_immutable_start(const u8 *memory, u32 list_number) 429 { 430 return get_field(memory, get_immutable_header_offset(list_number), 431 IMMUTABLE_HEADER_SIZE); 432 } 433 434 /* Set the bit offset to the start of the immutable delta list bit stream. */ 435 static inline void set_immutable_start(u8 *memory, u32 list_number, u32 start) 436 { 437 set_field(start, memory, get_immutable_header_offset(list_number), 438 IMMUTABLE_HEADER_SIZE); 439 } 440 441 static bool verify_delta_index_page(u64 nonce, u16 list_count, u64 expected_nonce, 442 u8 *memory, size_t memory_size) 443 { 444 unsigned int i; 445 446 /* 447 * Verify the nonce. A mismatch can happen here during rebuild if we haven't written the 448 * entire volume at least once. 449 */ 450 if (nonce != expected_nonce) 451 return false; 452 453 /* Verify that the number of delta lists can fit in the page. */ 454 if (list_count > ((memory_size - sizeof(struct delta_page_header)) * 455 BITS_PER_BYTE / IMMUTABLE_HEADER_SIZE)) 456 return false; 457 458 /* 459 * Verify that the first delta list is immediately after the last delta 460 * list header. 461 */ 462 if (get_immutable_start(memory, 0) != get_immutable_header_offset(list_count + 1)) 463 return false; 464 465 /* Verify that the lists are in the correct order. */ 466 for (i = 0; i < list_count; i++) { 467 if (get_immutable_start(memory, i) > get_immutable_start(memory, i + 1)) 468 return false; 469 } 470 471 /* 472 * Verify that the last list ends on the page, and that there is room 473 * for the post-field guard bits. 474 */ 475 if (get_immutable_start(memory, list_count) > 476 (memory_size - POST_FIELD_GUARD_BYTES) * BITS_PER_BYTE) 477 return false; 478 479 /* Verify that the guard bytes are correctly set to all ones. */ 480 for (i = 0; i < POST_FIELD_GUARD_BYTES; i++) { 481 if (memory[memory_size - POST_FIELD_GUARD_BYTES + i] != (u8) ~0) 482 return false; 483 } 484 485 /* All verifications passed. */ 486 return true; 487 } 488 489 /* Initialize a delta index page to refer to a supplied page. */ 490 int uds_initialize_delta_index_page(struct delta_index_page *delta_index_page, 491 u64 expected_nonce, u32 mean_delta, u32 payload_bits, 492 u8 *memory, size_t memory_size) 493 { 494 u64 nonce; 495 u64 vcn; 496 u64 first_list; 497 u64 list_count; 498 struct delta_page_header *header = (struct delta_page_header *) memory; 499 struct delta_zone *delta_zone = &delta_index_page->delta_zone; 500 const u8 *nonce_addr = (const u8 *) &header->nonce; 501 const u8 *vcn_addr = (const u8 *) &header->virtual_chapter_number; 502 const u8 *first_list_addr = (const u8 *) &header->first_list; 503 const u8 *list_count_addr = (const u8 *) &header->list_count; 504 505 /* First assume that the header is little endian. */ 506 nonce = get_unaligned_le64(nonce_addr); 507 vcn = get_unaligned_le64(vcn_addr); 508 first_list = get_unaligned_le16(first_list_addr); 509 list_count = get_unaligned_le16(list_count_addr); 510 if (!verify_delta_index_page(nonce, list_count, expected_nonce, memory, 511 memory_size)) { 512 /* If that fails, try big endian. */ 513 nonce = get_unaligned_be64(nonce_addr); 514 vcn = get_unaligned_be64(vcn_addr); 515 first_list = get_unaligned_be16(first_list_addr); 516 list_count = get_unaligned_be16(list_count_addr); 517 if (!verify_delta_index_page(nonce, list_count, expected_nonce, memory, 518 memory_size)) { 519 /* 520 * Both attempts failed. Do not log this as an error, because it can happen 521 * during a rebuild if we haven't written the entire volume at least once. 522 */ 523 return UDS_CORRUPT_DATA; 524 } 525 } 526 527 delta_index_page->delta_index.delta_zones = delta_zone; 528 delta_index_page->delta_index.zone_count = 1; 529 delta_index_page->delta_index.list_count = list_count; 530 delta_index_page->delta_index.lists_per_zone = list_count; 531 delta_index_page->delta_index.mutable = false; 532 delta_index_page->delta_index.tag = 'p'; 533 delta_index_page->virtual_chapter_number = vcn; 534 delta_index_page->lowest_list_number = first_list; 535 delta_index_page->highest_list_number = first_list + list_count - 1; 536 537 compute_coding_constants(mean_delta, &delta_zone->min_bits, 538 &delta_zone->min_keys, &delta_zone->incr_keys); 539 delta_zone->value_bits = payload_bits; 540 delta_zone->memory = memory; 541 delta_zone->delta_lists = NULL; 542 delta_zone->new_offsets = NULL; 543 delta_zone->buffered_writer = NULL; 544 delta_zone->size = memory_size; 545 delta_zone->rebalance_time = 0; 546 delta_zone->rebalance_count = 0; 547 delta_zone->record_count = 0; 548 delta_zone->collision_count = 0; 549 delta_zone->discard_count = 0; 550 delta_zone->overflow_count = 0; 551 delta_zone->first_list = 0; 552 delta_zone->list_count = list_count; 553 delta_zone->tag = 'p'; 554 555 return UDS_SUCCESS; 556 } 557 558 /* Read a large bit field from an arbitrary bit boundary. */ 559 static inline u64 get_big_field(const u8 *memory, u64 offset, u8 size) 560 { 561 const void *addr = memory + offset / BITS_PER_BYTE; 562 563 return (get_unaligned_le64(addr) >> (offset % BITS_PER_BYTE)) & ((1UL << size) - 1); 564 } 565 566 /* Write a large bit field to an arbitrary bit boundary. */ 567 static inline void set_big_field(u64 value, u8 *memory, u64 offset, u8 size) 568 { 569 void *addr = memory + offset / BITS_PER_BYTE; 570 u8 shift = offset % BITS_PER_BYTE; 571 u64 data = get_unaligned_le64(addr); 572 573 data &= ~(((1UL << size) - 1) << shift); 574 data |= value << shift; 575 put_unaligned_le64(data, addr); 576 } 577 578 /* Set a sequence of bits to all zeros. */ 579 static inline void set_zero(u8 *memory, u64 offset, u32 size) 580 { 581 if (size > 0) { 582 u8 *addr = memory + offset / BITS_PER_BYTE; 583 u8 shift = offset % BITS_PER_BYTE; 584 u32 count = size + shift > BITS_PER_BYTE ? (u32) BITS_PER_BYTE - shift : size; 585 586 *addr++ &= ~(((1 << count) - 1) << shift); 587 for (size -= count; size > BITS_PER_BYTE; size -= BITS_PER_BYTE) 588 *addr++ = 0; 589 590 if (size > 0) 591 *addr &= 0xFF << size; 592 } 593 } 594 595 /* 596 * Move several bits from a higher to a lower address, moving the lower addressed bits first. The 597 * size and memory offsets are measured in bits. 598 */ 599 static void move_bits_down(const u8 *from, u64 from_offset, u8 *to, u64 to_offset, u32 size) 600 { 601 const u8 *source; 602 u8 *destination; 603 u8 offset; 604 u8 count; 605 u64 field; 606 607 /* Start by moving one field that ends on a to int boundary. */ 608 count = (MAX_BIG_FIELD_BITS - ((to_offset + MAX_BIG_FIELD_BITS) % BITS_PER_TYPE(u32))); 609 field = get_big_field(from, from_offset, count); 610 set_big_field(field, to, to_offset, count); 611 from_offset += count; 612 to_offset += count; 613 size -= count; 614 615 /* Now do the main loop to copy 32 bit chunks that are int-aligned at the destination. */ 616 offset = from_offset % BITS_PER_TYPE(u32); 617 source = from + (from_offset - offset) / BITS_PER_BYTE; 618 destination = to + to_offset / BITS_PER_BYTE; 619 while (size > MAX_BIG_FIELD_BITS) { 620 put_unaligned_le32(get_unaligned_le64(source) >> offset, destination); 621 source += sizeof(u32); 622 destination += sizeof(u32); 623 from_offset += BITS_PER_TYPE(u32); 624 to_offset += BITS_PER_TYPE(u32); 625 size -= BITS_PER_TYPE(u32); 626 } 627 628 /* Finish up by moving any remaining bits. */ 629 if (size > 0) { 630 field = get_big_field(from, from_offset, size); 631 set_big_field(field, to, to_offset, size); 632 } 633 } 634 635 /* 636 * Move several bits from a lower to a higher address, moving the higher addressed bits first. The 637 * size and memory offsets are measured in bits. 638 */ 639 static void move_bits_up(const u8 *from, u64 from_offset, u8 *to, u64 to_offset, u32 size) 640 { 641 const u8 *source; 642 u8 *destination; 643 u8 offset; 644 u8 count; 645 u64 field; 646 647 /* Start by moving one field that begins on a destination int boundary. */ 648 count = (to_offset + size) % BITS_PER_TYPE(u32); 649 if (count > 0) { 650 size -= count; 651 field = get_big_field(from, from_offset + size, count); 652 set_big_field(field, to, to_offset + size, count); 653 } 654 655 /* Now do the main loop to copy 32 bit chunks that are int-aligned at the destination. */ 656 offset = (from_offset + size) % BITS_PER_TYPE(u32); 657 source = from + (from_offset + size - offset) / BITS_PER_BYTE; 658 destination = to + (to_offset + size) / BITS_PER_BYTE; 659 while (size > MAX_BIG_FIELD_BITS) { 660 source -= sizeof(u32); 661 destination -= sizeof(u32); 662 size -= BITS_PER_TYPE(u32); 663 put_unaligned_le32(get_unaligned_le64(source) >> offset, destination); 664 } 665 666 /* Finish up by moving any remaining bits. */ 667 if (size > 0) { 668 field = get_big_field(from, from_offset, size); 669 set_big_field(field, to, to_offset, size); 670 } 671 } 672 673 /* 674 * Move bits from one field to another. When the fields overlap, behave as if we first move all the 675 * bits from the source to a temporary value, and then move all the bits from the temporary value 676 * to the destination. The size and memory offsets are measured in bits. 677 */ 678 static void move_bits(const u8 *from, u64 from_offset, u8 *to, u64 to_offset, u32 size) 679 { 680 u64 field; 681 682 /* A small move doesn't require special handling. */ 683 if (size <= MAX_BIG_FIELD_BITS) { 684 if (size > 0) { 685 field = get_big_field(from, from_offset, size); 686 set_big_field(field, to, to_offset, size); 687 } 688 689 return; 690 } 691 692 if (from_offset > to_offset) 693 move_bits_down(from, from_offset, to, to_offset, size); 694 else 695 move_bits_up(from, from_offset, to, to_offset, size); 696 } 697 698 /* 699 * Pack delta lists from a mutable delta index into an immutable delta index page. A range of delta 700 * lists (starting with a specified list index) is copied from the mutable delta index into a 701 * memory page used in the immutable index. The number of lists copied onto the page is returned in 702 * list_count. 703 */ 704 int uds_pack_delta_index_page(const struct delta_index *delta_index, u64 header_nonce, 705 u8 *memory, size_t memory_size, u64 virtual_chapter_number, 706 u32 first_list, u32 *list_count) 707 { 708 const struct delta_zone *delta_zone; 709 struct delta_list *delta_lists; 710 u32 max_lists; 711 u32 n_lists = 0; 712 u32 offset; 713 u32 i; 714 int free_bits; 715 int bits; 716 struct delta_page_header *header; 717 718 delta_zone = &delta_index->delta_zones[0]; 719 delta_lists = &delta_zone->delta_lists[first_list + 1]; 720 max_lists = delta_index->list_count - first_list; 721 722 /* 723 * Compute how many lists will fit on the page. Subtract the size of the fixed header, one 724 * delta list offset, and the guard bytes from the page size to determine how much space is 725 * available for delta lists. 726 */ 727 free_bits = memory_size * BITS_PER_BYTE; 728 free_bits -= get_immutable_header_offset(1); 729 free_bits -= GUARD_BITS; 730 if (free_bits < IMMUTABLE_HEADER_SIZE) { 731 /* This page is too small to store any delta lists. */ 732 return vdo_log_error_strerror(UDS_OVERFLOW, 733 "Chapter Index Page of %zu bytes is too small", 734 memory_size); 735 } 736 737 while (n_lists < max_lists) { 738 /* Each list requires a delta list offset and the list data. */ 739 bits = IMMUTABLE_HEADER_SIZE + delta_lists[n_lists].size; 740 if (bits > free_bits) 741 break; 742 743 n_lists++; 744 free_bits -= bits; 745 } 746 747 *list_count = n_lists; 748 749 header = (struct delta_page_header *) memory; 750 put_unaligned_le64(header_nonce, (u8 *) &header->nonce); 751 put_unaligned_le64(virtual_chapter_number, 752 (u8 *) &header->virtual_chapter_number); 753 put_unaligned_le16(first_list, (u8 *) &header->first_list); 754 put_unaligned_le16(n_lists, (u8 *) &header->list_count); 755 756 /* Construct the delta list offset table. */ 757 offset = get_immutable_header_offset(n_lists + 1); 758 set_immutable_start(memory, 0, offset); 759 for (i = 0; i < n_lists; i++) { 760 offset += delta_lists[i].size; 761 set_immutable_start(memory, i + 1, offset); 762 } 763 764 /* Copy the delta list data onto the memory page. */ 765 for (i = 0; i < n_lists; i++) { 766 move_bits(delta_zone->memory, delta_lists[i].start, memory, 767 get_immutable_start(memory, i), delta_lists[i].size); 768 } 769 770 /* Set all the bits in the guard bytes. */ 771 memset(memory + memory_size - POST_FIELD_GUARD_BYTES, ~0, 772 POST_FIELD_GUARD_BYTES); 773 return UDS_SUCCESS; 774 } 775 776 /* Compute the new offsets of the delta lists. */ 777 static void compute_new_list_offsets(struct delta_zone *delta_zone, u32 growing_index, 778 size_t growing_size, size_t used_space) 779 { 780 size_t spacing; 781 u32 i; 782 struct delta_list *delta_lists = delta_zone->delta_lists; 783 u32 tail_guard_index = delta_zone->list_count + 1; 784 785 spacing = (delta_zone->size - used_space) / delta_zone->list_count; 786 delta_zone->new_offsets[0] = 0; 787 for (i = 0; i <= delta_zone->list_count; i++) { 788 delta_zone->new_offsets[i + 1] = 789 (delta_zone->new_offsets[i] + 790 get_delta_list_byte_size(&delta_lists[i]) + spacing); 791 delta_zone->new_offsets[i] *= BITS_PER_BYTE; 792 delta_zone->new_offsets[i] += delta_lists[i].start % BITS_PER_BYTE; 793 if (i == 0) 794 delta_zone->new_offsets[i + 1] -= spacing / 2; 795 if (i + 1 == growing_index) 796 delta_zone->new_offsets[i + 1] += growing_size; 797 } 798 799 delta_zone->new_offsets[tail_guard_index] = 800 (delta_zone->size * BITS_PER_BYTE - delta_lists[tail_guard_index].size); 801 } 802 803 static void rebalance_lists(struct delta_zone *delta_zone) 804 { 805 struct delta_list *delta_lists; 806 u32 i; 807 size_t used_space = 0; 808 809 /* Extend and balance memory to receive the delta lists */ 810 delta_lists = delta_zone->delta_lists; 811 for (i = 0; i <= delta_zone->list_count + 1; i++) 812 used_space += get_delta_list_byte_size(&delta_lists[i]); 813 814 compute_new_list_offsets(delta_zone, 0, 0, used_space); 815 for (i = 1; i <= delta_zone->list_count + 1; i++) 816 delta_lists[i].start = delta_zone->new_offsets[i]; 817 } 818 819 /* Start restoring a delta index from multiple input streams. */ 820 int uds_start_restoring_delta_index(struct delta_index *delta_index, 821 struct buffered_reader **buffered_readers, 822 unsigned int reader_count) 823 { 824 int result; 825 unsigned int zone_count = reader_count; 826 u64 record_count = 0; 827 u64 collision_count = 0; 828 u32 first_list[MAX_ZONES]; 829 u32 list_count[MAX_ZONES]; 830 unsigned int z; 831 u32 list_next = 0; 832 const struct delta_zone *delta_zone; 833 834 /* Read and validate each header. */ 835 for (z = 0; z < zone_count; z++) { 836 struct delta_index_header header; 837 u8 buffer[sizeof(struct delta_index_header)]; 838 size_t offset = 0; 839 840 result = uds_read_from_buffered_reader(buffered_readers[z], buffer, 841 sizeof(buffer)); 842 if (result != UDS_SUCCESS) { 843 return vdo_log_warning_strerror(result, 844 "failed to read delta index header"); 845 } 846 847 memcpy(&header.magic, buffer, MAGIC_SIZE); 848 offset += MAGIC_SIZE; 849 decode_u32_le(buffer, &offset, &header.zone_number); 850 decode_u32_le(buffer, &offset, &header.zone_count); 851 decode_u32_le(buffer, &offset, &header.first_list); 852 decode_u32_le(buffer, &offset, &header.list_count); 853 decode_u64_le(buffer, &offset, &header.record_count); 854 decode_u64_le(buffer, &offset, &header.collision_count); 855 856 result = VDO_ASSERT(offset == sizeof(struct delta_index_header), 857 "%zu bytes decoded of %zu expected", offset, 858 sizeof(struct delta_index_header)); 859 if (result != VDO_SUCCESS) { 860 return vdo_log_warning_strerror(result, 861 "failed to read delta index header"); 862 } 863 864 if (memcmp(header.magic, DELTA_INDEX_MAGIC, MAGIC_SIZE) != 0) { 865 return vdo_log_warning_strerror(UDS_CORRUPT_DATA, 866 "delta index file has bad magic number"); 867 } 868 869 if (zone_count != header.zone_count) { 870 return vdo_log_warning_strerror(UDS_CORRUPT_DATA, 871 "delta index files contain mismatched zone counts (%u,%u)", 872 zone_count, header.zone_count); 873 } 874 875 if (header.zone_number != z) { 876 return vdo_log_warning_strerror(UDS_CORRUPT_DATA, 877 "delta index zone %u found in slot %u", 878 header.zone_number, z); 879 } 880 881 first_list[z] = header.first_list; 882 list_count[z] = header.list_count; 883 record_count += header.record_count; 884 collision_count += header.collision_count; 885 886 if (first_list[z] != list_next) { 887 return vdo_log_warning_strerror(UDS_CORRUPT_DATA, 888 "delta index file for zone %u starts with list %u instead of list %u", 889 z, first_list[z], list_next); 890 } 891 892 list_next += list_count[z]; 893 } 894 895 if (list_next != delta_index->list_count) { 896 return vdo_log_warning_strerror(UDS_CORRUPT_DATA, 897 "delta index files contain %u delta lists instead of %u delta lists", 898 list_next, delta_index->list_count); 899 } 900 901 if (collision_count > record_count) { 902 return vdo_log_warning_strerror(UDS_CORRUPT_DATA, 903 "delta index files contain %llu collisions and %llu records", 904 (unsigned long long) collision_count, 905 (unsigned long long) record_count); 906 } 907 908 uds_reset_delta_index(delta_index); 909 delta_index->delta_zones[0].record_count = record_count; 910 delta_index->delta_zones[0].collision_count = collision_count; 911 912 /* Read the delta lists and distribute them to the proper zones. */ 913 for (z = 0; z < zone_count; z++) { 914 u32 i; 915 916 delta_index->load_lists[z] = 0; 917 for (i = 0; i < list_count[z]; i++) { 918 u16 delta_list_size; 919 u32 list_number; 920 unsigned int zone_number; 921 u8 size_data[sizeof(u16)]; 922 923 result = uds_read_from_buffered_reader(buffered_readers[z], 924 size_data, 925 sizeof(size_data)); 926 if (result != UDS_SUCCESS) { 927 return vdo_log_warning_strerror(result, 928 "failed to read delta index size"); 929 } 930 931 delta_list_size = get_unaligned_le16(size_data); 932 if (delta_list_size > 0) 933 delta_index->load_lists[z] += 1; 934 935 list_number = first_list[z] + i; 936 zone_number = list_number / delta_index->lists_per_zone; 937 delta_zone = &delta_index->delta_zones[zone_number]; 938 list_number -= delta_zone->first_list; 939 delta_zone->delta_lists[list_number + 1].size = delta_list_size; 940 } 941 } 942 943 /* Prepare each zone to start receiving the delta list data. */ 944 for (z = 0; z < delta_index->zone_count; z++) 945 rebalance_lists(&delta_index->delta_zones[z]); 946 947 return UDS_SUCCESS; 948 } 949 950 static int restore_delta_list_to_zone(struct delta_zone *delta_zone, 951 const struct delta_list_save_info *save_info, 952 const u8 *data) 953 { 954 struct delta_list *delta_list; 955 u16 bit_count; 956 u16 byte_count; 957 u32 list_number = save_info->index - delta_zone->first_list; 958 959 if (list_number >= delta_zone->list_count) { 960 return vdo_log_warning_strerror(UDS_CORRUPT_DATA, 961 "invalid delta list number %u not in range [%u,%u)", 962 save_info->index, delta_zone->first_list, 963 delta_zone->first_list + delta_zone->list_count); 964 } 965 966 delta_list = &delta_zone->delta_lists[list_number + 1]; 967 if (delta_list->size == 0) { 968 return vdo_log_warning_strerror(UDS_CORRUPT_DATA, 969 "unexpected delta list number %u", 970 save_info->index); 971 } 972 973 bit_count = delta_list->size + save_info->bit_offset; 974 byte_count = BITS_TO_BYTES(bit_count); 975 if (save_info->byte_count != byte_count) { 976 return vdo_log_warning_strerror(UDS_CORRUPT_DATA, 977 "unexpected delta list size %u != %u", 978 save_info->byte_count, byte_count); 979 } 980 981 move_bits(data, save_info->bit_offset, delta_zone->memory, delta_list->start, 982 delta_list->size); 983 return UDS_SUCCESS; 984 } 985 986 static int restore_delta_list_data(struct delta_index *delta_index, unsigned int load_zone, 987 struct buffered_reader *buffered_reader, u8 *data) 988 { 989 int result; 990 struct delta_list_save_info save_info; 991 u8 buffer[sizeof(struct delta_list_save_info)]; 992 unsigned int new_zone; 993 994 result = uds_read_from_buffered_reader(buffered_reader, buffer, sizeof(buffer)); 995 if (result != UDS_SUCCESS) { 996 return vdo_log_warning_strerror(result, 997 "failed to read delta list data"); 998 } 999 1000 save_info = (struct delta_list_save_info) { 1001 .tag = buffer[0], 1002 .bit_offset = buffer[1], 1003 .byte_count = get_unaligned_le16(&buffer[2]), 1004 .index = get_unaligned_le32(&buffer[4]), 1005 }; 1006 1007 if ((save_info.bit_offset >= BITS_PER_BYTE) || 1008 (save_info.byte_count > DELTA_LIST_MAX_BYTE_COUNT)) { 1009 return vdo_log_warning_strerror(UDS_CORRUPT_DATA, 1010 "corrupt delta list data"); 1011 } 1012 1013 /* Make sure the data is intended for this delta index. */ 1014 if (save_info.tag != delta_index->tag) 1015 return UDS_CORRUPT_DATA; 1016 1017 if (save_info.index >= delta_index->list_count) { 1018 return vdo_log_warning_strerror(UDS_CORRUPT_DATA, 1019 "invalid delta list number %u of %u", 1020 save_info.index, 1021 delta_index->list_count); 1022 } 1023 1024 result = uds_read_from_buffered_reader(buffered_reader, data, 1025 save_info.byte_count); 1026 if (result != UDS_SUCCESS) { 1027 return vdo_log_warning_strerror(result, 1028 "failed to read delta list data"); 1029 } 1030 1031 delta_index->load_lists[load_zone] -= 1; 1032 new_zone = save_info.index / delta_index->lists_per_zone; 1033 return restore_delta_list_to_zone(&delta_index->delta_zones[new_zone], 1034 &save_info, data); 1035 } 1036 1037 /* Restore delta lists from saved data. */ 1038 int uds_finish_restoring_delta_index(struct delta_index *delta_index, 1039 struct buffered_reader **buffered_readers, 1040 unsigned int reader_count) 1041 { 1042 int result; 1043 int saved_result = UDS_SUCCESS; 1044 unsigned int z; 1045 u8 *data; 1046 1047 result = vdo_allocate(DELTA_LIST_MAX_BYTE_COUNT, __func__, &data); 1048 if (result != VDO_SUCCESS) 1049 return result; 1050 1051 for (z = 0; z < reader_count; z++) { 1052 while (delta_index->load_lists[z] > 0) { 1053 result = restore_delta_list_data(delta_index, z, 1054 buffered_readers[z], data); 1055 if (result != UDS_SUCCESS) { 1056 saved_result = result; 1057 break; 1058 } 1059 } 1060 } 1061 1062 vdo_free(data); 1063 return saved_result; 1064 } 1065 1066 int uds_check_guard_delta_lists(struct buffered_reader **buffered_readers, 1067 unsigned int reader_count) 1068 { 1069 int result; 1070 unsigned int z; 1071 u8 buffer[sizeof(struct delta_list_save_info)]; 1072 1073 for (z = 0; z < reader_count; z++) { 1074 result = uds_read_from_buffered_reader(buffered_readers[z], buffer, 1075 sizeof(buffer)); 1076 if (result != UDS_SUCCESS) 1077 return result; 1078 1079 if (buffer[0] != 'z') 1080 return UDS_CORRUPT_DATA; 1081 } 1082 1083 return UDS_SUCCESS; 1084 } 1085 1086 static int flush_delta_list(struct delta_zone *zone, u32 flush_index) 1087 { 1088 struct delta_list *delta_list; 1089 u8 buffer[sizeof(struct delta_list_save_info)]; 1090 int result; 1091 1092 delta_list = &zone->delta_lists[flush_index + 1]; 1093 1094 buffer[0] = zone->tag; 1095 buffer[1] = delta_list->start % BITS_PER_BYTE; 1096 put_unaligned_le16(get_delta_list_byte_size(delta_list), &buffer[2]); 1097 put_unaligned_le32(zone->first_list + flush_index, &buffer[4]); 1098 1099 result = uds_write_to_buffered_writer(zone->buffered_writer, buffer, 1100 sizeof(buffer)); 1101 if (result != UDS_SUCCESS) { 1102 vdo_log_warning_strerror(result, "failed to write delta list memory"); 1103 return result; 1104 } 1105 1106 result = uds_write_to_buffered_writer(zone->buffered_writer, 1107 zone->memory + get_delta_list_byte_start(delta_list), 1108 get_delta_list_byte_size(delta_list)); 1109 if (result != UDS_SUCCESS) 1110 vdo_log_warning_strerror(result, "failed to write delta list memory"); 1111 1112 return result; 1113 } 1114 1115 /* Start saving a delta index zone to a buffered output stream. */ 1116 int uds_start_saving_delta_index(const struct delta_index *delta_index, 1117 unsigned int zone_number, 1118 struct buffered_writer *buffered_writer) 1119 { 1120 int result; 1121 u32 i; 1122 struct delta_zone *delta_zone; 1123 u8 buffer[sizeof(struct delta_index_header)]; 1124 size_t offset = 0; 1125 1126 delta_zone = &delta_index->delta_zones[zone_number]; 1127 memcpy(buffer, DELTA_INDEX_MAGIC, MAGIC_SIZE); 1128 offset += MAGIC_SIZE; 1129 encode_u32_le(buffer, &offset, zone_number); 1130 encode_u32_le(buffer, &offset, delta_index->zone_count); 1131 encode_u32_le(buffer, &offset, delta_zone->first_list); 1132 encode_u32_le(buffer, &offset, delta_zone->list_count); 1133 encode_u64_le(buffer, &offset, delta_zone->record_count); 1134 encode_u64_le(buffer, &offset, delta_zone->collision_count); 1135 1136 result = VDO_ASSERT(offset == sizeof(struct delta_index_header), 1137 "%zu bytes encoded of %zu expected", offset, 1138 sizeof(struct delta_index_header)); 1139 if (result != VDO_SUCCESS) 1140 return result; 1141 1142 result = uds_write_to_buffered_writer(buffered_writer, buffer, offset); 1143 if (result != UDS_SUCCESS) 1144 return vdo_log_warning_strerror(result, 1145 "failed to write delta index header"); 1146 1147 for (i = 0; i < delta_zone->list_count; i++) { 1148 u8 data[sizeof(u16)]; 1149 struct delta_list *delta_list; 1150 1151 delta_list = &delta_zone->delta_lists[i + 1]; 1152 put_unaligned_le16(delta_list->size, data); 1153 result = uds_write_to_buffered_writer(buffered_writer, data, 1154 sizeof(data)); 1155 if (result != UDS_SUCCESS) 1156 return vdo_log_warning_strerror(result, 1157 "failed to write delta list size"); 1158 } 1159 1160 delta_zone->buffered_writer = buffered_writer; 1161 return UDS_SUCCESS; 1162 } 1163 1164 int uds_finish_saving_delta_index(const struct delta_index *delta_index, 1165 unsigned int zone_number) 1166 { 1167 int result; 1168 int first_error = UDS_SUCCESS; 1169 u32 i; 1170 struct delta_zone *delta_zone; 1171 struct delta_list *delta_list; 1172 1173 delta_zone = &delta_index->delta_zones[zone_number]; 1174 for (i = 0; i < delta_zone->list_count; i++) { 1175 delta_list = &delta_zone->delta_lists[i + 1]; 1176 if (delta_list->size > 0) { 1177 result = flush_delta_list(delta_zone, i); 1178 if ((result != UDS_SUCCESS) && (first_error == UDS_SUCCESS)) 1179 first_error = result; 1180 } 1181 } 1182 1183 delta_zone->buffered_writer = NULL; 1184 return first_error; 1185 } 1186 1187 int uds_write_guard_delta_list(struct buffered_writer *buffered_writer) 1188 { 1189 int result; 1190 u8 buffer[sizeof(struct delta_list_save_info)]; 1191 1192 memset(buffer, 0, sizeof(struct delta_list_save_info)); 1193 buffer[0] = 'z'; 1194 1195 result = uds_write_to_buffered_writer(buffered_writer, buffer, sizeof(buffer)); 1196 if (result != UDS_SUCCESS) 1197 vdo_log_warning_strerror(result, "failed to write guard delta list"); 1198 1199 return UDS_SUCCESS; 1200 } 1201 1202 size_t uds_compute_delta_index_save_bytes(u32 list_count, size_t memory_size) 1203 { 1204 /* One zone will use at least as much memory as other zone counts. */ 1205 return (sizeof(struct delta_index_header) + 1206 list_count * (sizeof(struct delta_list_save_info) + 1) + 1207 get_zone_memory_size(1, memory_size)); 1208 } 1209 1210 static int assert_not_at_end(const struct delta_index_entry *delta_entry) 1211 { 1212 int result = VDO_ASSERT(!delta_entry->at_end, 1213 "operation is invalid because the list entry is at the end of the delta list"); 1214 if (result != VDO_SUCCESS) 1215 result = UDS_BAD_STATE; 1216 1217 return result; 1218 } 1219 1220 /* 1221 * Prepare to search for an entry in the specified delta list. 1222 * 1223 * This is always the first function to be called when dealing with delta index entries. It is 1224 * always followed by calls to uds_next_delta_index_entry() to iterate through a delta list. The 1225 * fields of the delta_index_entry argument will be set up for iteration, but will not contain an 1226 * entry from the list. 1227 */ 1228 int uds_start_delta_index_search(const struct delta_index *delta_index, u32 list_number, 1229 u32 key, struct delta_index_entry *delta_entry) 1230 { 1231 int result; 1232 unsigned int zone_number; 1233 struct delta_zone *delta_zone; 1234 struct delta_list *delta_list; 1235 1236 result = VDO_ASSERT((list_number < delta_index->list_count), 1237 "Delta list number (%u) is out of range (%u)", list_number, 1238 delta_index->list_count); 1239 if (result != VDO_SUCCESS) 1240 return UDS_CORRUPT_DATA; 1241 1242 zone_number = list_number / delta_index->lists_per_zone; 1243 delta_zone = &delta_index->delta_zones[zone_number]; 1244 list_number -= delta_zone->first_list; 1245 result = VDO_ASSERT((list_number < delta_zone->list_count), 1246 "Delta list number (%u) is out of range (%u) for zone (%u)", 1247 list_number, delta_zone->list_count, zone_number); 1248 if (result != VDO_SUCCESS) 1249 return UDS_CORRUPT_DATA; 1250 1251 if (delta_index->mutable) { 1252 delta_list = &delta_zone->delta_lists[list_number + 1]; 1253 } else { 1254 u32 end_offset; 1255 1256 /* 1257 * Translate the immutable delta list header into a temporary 1258 * full delta list header. 1259 */ 1260 delta_list = &delta_entry->temp_delta_list; 1261 delta_list->start = get_immutable_start(delta_zone->memory, list_number); 1262 end_offset = get_immutable_start(delta_zone->memory, list_number + 1); 1263 delta_list->size = end_offset - delta_list->start; 1264 delta_list->save_key = 0; 1265 delta_list->save_offset = 0; 1266 } 1267 1268 if (key > delta_list->save_key) { 1269 delta_entry->key = delta_list->save_key; 1270 delta_entry->offset = delta_list->save_offset; 1271 } else { 1272 delta_entry->key = 0; 1273 delta_entry->offset = 0; 1274 if (key == 0) { 1275 /* 1276 * This usually means we're about to walk the entire delta list, so get all 1277 * of it into the CPU cache. 1278 */ 1279 uds_prefetch_range(&delta_zone->memory[delta_list->start / BITS_PER_BYTE], 1280 delta_list->size / BITS_PER_BYTE, false); 1281 } 1282 } 1283 1284 delta_entry->at_end = false; 1285 delta_entry->delta_zone = delta_zone; 1286 delta_entry->delta_list = delta_list; 1287 delta_entry->entry_bits = 0; 1288 delta_entry->is_collision = false; 1289 delta_entry->list_number = list_number; 1290 delta_entry->list_overflow = false; 1291 delta_entry->value_bits = delta_zone->value_bits; 1292 return UDS_SUCCESS; 1293 } 1294 1295 static inline u64 get_delta_entry_offset(const struct delta_index_entry *delta_entry) 1296 { 1297 return delta_entry->delta_list->start + delta_entry->offset; 1298 } 1299 1300 /* 1301 * Decode a delta index entry delta value. The delta_index_entry basically describes the previous 1302 * list entry, and has had its offset field changed to point to the subsequent entry. We decode the 1303 * bit stream and update the delta_list_entry to describe the entry. 1304 */ 1305 static inline void decode_delta(struct delta_index_entry *delta_entry) 1306 { 1307 int key_bits; 1308 u32 delta; 1309 const struct delta_zone *delta_zone = delta_entry->delta_zone; 1310 const u8 *memory = delta_zone->memory; 1311 u64 delta_offset = get_delta_entry_offset(delta_entry) + delta_entry->value_bits; 1312 const u8 *addr = memory + delta_offset / BITS_PER_BYTE; 1313 int offset = delta_offset % BITS_PER_BYTE; 1314 u32 data = get_unaligned_le32(addr) >> offset; 1315 1316 addr += sizeof(u32); 1317 key_bits = delta_zone->min_bits; 1318 delta = data & ((1 << key_bits) - 1); 1319 if (delta >= delta_zone->min_keys) { 1320 data >>= key_bits; 1321 if (data == 0) { 1322 key_bits = sizeof(u32) * BITS_PER_BYTE - offset; 1323 while ((data = get_unaligned_le32(addr)) == 0) { 1324 addr += sizeof(u32); 1325 key_bits += sizeof(u32) * BITS_PER_BYTE; 1326 } 1327 } 1328 key_bits += ffs(data); 1329 delta += ((key_bits - delta_zone->min_bits - 1) * delta_zone->incr_keys); 1330 } 1331 delta_entry->delta = delta; 1332 delta_entry->key += delta; 1333 1334 /* Check for a collision, a delta of zero after the start. */ 1335 if (unlikely((delta == 0) && (delta_entry->offset > 0))) { 1336 delta_entry->is_collision = true; 1337 delta_entry->entry_bits = delta_entry->value_bits + key_bits + COLLISION_BITS; 1338 } else { 1339 delta_entry->is_collision = false; 1340 delta_entry->entry_bits = delta_entry->value_bits + key_bits; 1341 } 1342 } 1343 1344 noinline int uds_next_delta_index_entry(struct delta_index_entry *delta_entry) 1345 { 1346 int result; 1347 const struct delta_list *delta_list; 1348 u32 next_offset; 1349 u16 size; 1350 1351 result = assert_not_at_end(delta_entry); 1352 if (result != UDS_SUCCESS) 1353 return result; 1354 1355 delta_list = delta_entry->delta_list; 1356 delta_entry->offset += delta_entry->entry_bits; 1357 size = delta_list->size; 1358 if (unlikely(delta_entry->offset >= size)) { 1359 delta_entry->at_end = true; 1360 delta_entry->delta = 0; 1361 delta_entry->is_collision = false; 1362 result = VDO_ASSERT((delta_entry->offset == size), 1363 "next offset past end of delta list"); 1364 if (result != VDO_SUCCESS) 1365 result = UDS_CORRUPT_DATA; 1366 1367 return result; 1368 } 1369 1370 decode_delta(delta_entry); 1371 1372 next_offset = delta_entry->offset + delta_entry->entry_bits; 1373 if (next_offset > size) { 1374 /* 1375 * This is not an assertion because uds_validate_chapter_index_page() wants to 1376 * handle this error. 1377 */ 1378 vdo_log_warning("Decoded past the end of the delta list"); 1379 return UDS_CORRUPT_DATA; 1380 } 1381 1382 return UDS_SUCCESS; 1383 } 1384 1385 int uds_remember_delta_index_offset(const struct delta_index_entry *delta_entry) 1386 { 1387 int result; 1388 struct delta_list *delta_list = delta_entry->delta_list; 1389 1390 result = VDO_ASSERT(!delta_entry->is_collision, "entry is not a collision"); 1391 if (result != VDO_SUCCESS) 1392 return result; 1393 1394 delta_list->save_key = delta_entry->key - delta_entry->delta; 1395 delta_list->save_offset = delta_entry->offset; 1396 return UDS_SUCCESS; 1397 } 1398 1399 static void set_delta(struct delta_index_entry *delta_entry, u32 delta) 1400 { 1401 const struct delta_zone *delta_zone = delta_entry->delta_zone; 1402 u32 key_bits = (delta_zone->min_bits + 1403 ((delta_zone->incr_keys - delta_zone->min_keys + delta) / 1404 delta_zone->incr_keys)); 1405 1406 delta_entry->delta = delta; 1407 delta_entry->entry_bits = delta_entry->value_bits + key_bits; 1408 } 1409 1410 static void get_collision_name(const struct delta_index_entry *entry, u8 *name) 1411 { 1412 u64 offset = get_delta_entry_offset(entry) + entry->entry_bits - COLLISION_BITS; 1413 const u8 *addr = entry->delta_zone->memory + offset / BITS_PER_BYTE; 1414 int size = COLLISION_BYTES; 1415 int shift = offset % BITS_PER_BYTE; 1416 1417 while (--size >= 0) 1418 *name++ = get_unaligned_le16(addr++) >> shift; 1419 } 1420 1421 static void set_collision_name(const struct delta_index_entry *entry, const u8 *name) 1422 { 1423 u64 offset = get_delta_entry_offset(entry) + entry->entry_bits - COLLISION_BITS; 1424 u8 *addr = entry->delta_zone->memory + offset / BITS_PER_BYTE; 1425 int size = COLLISION_BYTES; 1426 int shift = offset % BITS_PER_BYTE; 1427 u16 mask = ~((u16) 0xFF << shift); 1428 u16 data; 1429 1430 while (--size >= 0) { 1431 data = (get_unaligned_le16(addr) & mask) | (*name++ << shift); 1432 put_unaligned_le16(data, addr++); 1433 } 1434 } 1435 1436 int uds_get_delta_index_entry(const struct delta_index *delta_index, u32 list_number, 1437 u32 key, const u8 *name, 1438 struct delta_index_entry *delta_entry) 1439 { 1440 int result; 1441 1442 result = uds_start_delta_index_search(delta_index, list_number, key, 1443 delta_entry); 1444 if (result != UDS_SUCCESS) 1445 return result; 1446 1447 do { 1448 result = uds_next_delta_index_entry(delta_entry); 1449 if (result != UDS_SUCCESS) 1450 return result; 1451 } while (!delta_entry->at_end && (key > delta_entry->key)); 1452 1453 result = uds_remember_delta_index_offset(delta_entry); 1454 if (result != UDS_SUCCESS) 1455 return result; 1456 1457 if (!delta_entry->at_end && (key == delta_entry->key)) { 1458 struct delta_index_entry collision_entry = *delta_entry; 1459 1460 for (;;) { 1461 u8 full_name[COLLISION_BYTES]; 1462 1463 result = uds_next_delta_index_entry(&collision_entry); 1464 if (result != UDS_SUCCESS) 1465 return result; 1466 1467 if (collision_entry.at_end || !collision_entry.is_collision) 1468 break; 1469 1470 get_collision_name(&collision_entry, full_name); 1471 if (memcmp(full_name, name, COLLISION_BYTES) == 0) { 1472 *delta_entry = collision_entry; 1473 break; 1474 } 1475 } 1476 } 1477 1478 return UDS_SUCCESS; 1479 } 1480 1481 int uds_get_delta_entry_collision(const struct delta_index_entry *delta_entry, u8 *name) 1482 { 1483 int result; 1484 1485 result = assert_not_at_end(delta_entry); 1486 if (result != UDS_SUCCESS) 1487 return result; 1488 1489 result = VDO_ASSERT(delta_entry->is_collision, 1490 "Cannot get full block name from a non-collision delta index entry"); 1491 if (result != VDO_SUCCESS) 1492 return UDS_BAD_STATE; 1493 1494 get_collision_name(delta_entry, name); 1495 return UDS_SUCCESS; 1496 } 1497 1498 u32 uds_get_delta_entry_value(const struct delta_index_entry *delta_entry) 1499 { 1500 return get_field(delta_entry->delta_zone->memory, 1501 get_delta_entry_offset(delta_entry), delta_entry->value_bits); 1502 } 1503 1504 static int assert_mutable_entry(const struct delta_index_entry *delta_entry) 1505 { 1506 int result = VDO_ASSERT((delta_entry->delta_list != &delta_entry->temp_delta_list), 1507 "delta index is mutable"); 1508 if (result != VDO_SUCCESS) 1509 result = UDS_BAD_STATE; 1510 1511 return result; 1512 } 1513 1514 int uds_set_delta_entry_value(const struct delta_index_entry *delta_entry, u32 value) 1515 { 1516 int result; 1517 u32 value_mask = (1 << delta_entry->value_bits) - 1; 1518 1519 result = assert_mutable_entry(delta_entry); 1520 if (result != UDS_SUCCESS) 1521 return result; 1522 1523 result = assert_not_at_end(delta_entry); 1524 if (result != UDS_SUCCESS) 1525 return result; 1526 1527 result = VDO_ASSERT((value & value_mask) == value, 1528 "Value (%u) being set in a delta index is too large (must fit in %u bits)", 1529 value, delta_entry->value_bits); 1530 if (result != VDO_SUCCESS) 1531 return UDS_INVALID_ARGUMENT; 1532 1533 set_field(value, delta_entry->delta_zone->memory, 1534 get_delta_entry_offset(delta_entry), delta_entry->value_bits); 1535 return UDS_SUCCESS; 1536 } 1537 1538 /* 1539 * Extend the memory used by the delta lists by adding growing_size bytes before the list indicated 1540 * by growing_index, then rebalancing the lists in the new chunk. 1541 */ 1542 static int extend_delta_zone(struct delta_zone *delta_zone, u32 growing_index, 1543 size_t growing_size) 1544 { 1545 ktime_t start_time; 1546 ktime_t end_time; 1547 struct delta_list *delta_lists; 1548 u32 i; 1549 size_t used_space; 1550 1551 1552 /* Calculate the amount of space that is or will be in use. */ 1553 start_time = current_time_ns(CLOCK_MONOTONIC); 1554 delta_lists = delta_zone->delta_lists; 1555 used_space = growing_size; 1556 for (i = 0; i <= delta_zone->list_count + 1; i++) 1557 used_space += get_delta_list_byte_size(&delta_lists[i]); 1558 1559 if (delta_zone->size < used_space) 1560 return UDS_OVERFLOW; 1561 1562 /* Compute the new offsets of the delta lists. */ 1563 compute_new_list_offsets(delta_zone, growing_index, growing_size, used_space); 1564 1565 /* 1566 * When we rebalance the delta list, we will include the end guard list in the rebalancing. 1567 * It contains the end guard data, which must be copied. 1568 */ 1569 rebalance_delta_zone(delta_zone, 1, delta_zone->list_count + 1); 1570 end_time = current_time_ns(CLOCK_MONOTONIC); 1571 delta_zone->rebalance_count++; 1572 delta_zone->rebalance_time += ktime_sub(end_time, start_time); 1573 return UDS_SUCCESS; 1574 } 1575 1576 static int insert_bits(struct delta_index_entry *delta_entry, u16 size) 1577 { 1578 u64 free_before; 1579 u64 free_after; 1580 u64 source; 1581 u64 destination; 1582 u32 count; 1583 bool before_flag; 1584 u8 *memory; 1585 struct delta_zone *delta_zone = delta_entry->delta_zone; 1586 struct delta_list *delta_list = delta_entry->delta_list; 1587 /* Compute bits in use before and after the inserted bits. */ 1588 u32 total_size = delta_list->size; 1589 u32 before_size = delta_entry->offset; 1590 u32 after_size = total_size - delta_entry->offset; 1591 1592 if (total_size + size > U16_MAX) { 1593 delta_entry->list_overflow = true; 1594 delta_zone->overflow_count++; 1595 return UDS_OVERFLOW; 1596 } 1597 1598 /* Compute bits available before and after the delta list. */ 1599 free_before = (delta_list[0].start - (delta_list[-1].start + delta_list[-1].size)); 1600 free_after = (delta_list[1].start - (delta_list[0].start + delta_list[0].size)); 1601 1602 if ((size <= free_before) && (size <= free_after)) { 1603 /* 1604 * We have enough space to use either before or after the list. Select the smaller 1605 * amount of data. If it is exactly the same, try to take from the larger amount of 1606 * free space. 1607 */ 1608 if (before_size < after_size) 1609 before_flag = true; 1610 else if (after_size < before_size) 1611 before_flag = false; 1612 else 1613 before_flag = free_before > free_after; 1614 } else if (size <= free_before) { 1615 /* There is space before but not after. */ 1616 before_flag = true; 1617 } else if (size <= free_after) { 1618 /* There is space after but not before. */ 1619 before_flag = false; 1620 } else { 1621 /* 1622 * Neither of the surrounding spaces is large enough for this request. Extend 1623 * and/or rebalance the delta list memory choosing to move the least amount of 1624 * data. 1625 */ 1626 int result; 1627 u32 growing_index = delta_entry->list_number + 1; 1628 1629 before_flag = before_size < after_size; 1630 if (!before_flag) 1631 growing_index++; 1632 result = extend_delta_zone(delta_zone, growing_index, 1633 BITS_TO_BYTES(size)); 1634 if (result != UDS_SUCCESS) 1635 return result; 1636 } 1637 1638 delta_list->size += size; 1639 if (before_flag) { 1640 source = delta_list->start; 1641 destination = source - size; 1642 delta_list->start -= size; 1643 count = before_size; 1644 } else { 1645 source = delta_list->start + delta_entry->offset; 1646 destination = source + size; 1647 count = after_size; 1648 } 1649 1650 memory = delta_zone->memory; 1651 move_bits(memory, source, memory, destination, count); 1652 return UDS_SUCCESS; 1653 } 1654 1655 static void encode_delta(const struct delta_index_entry *delta_entry) 1656 { 1657 u32 temp; 1658 u32 t1; 1659 u32 t2; 1660 u64 offset; 1661 const struct delta_zone *delta_zone = delta_entry->delta_zone; 1662 u8 *memory = delta_zone->memory; 1663 1664 offset = get_delta_entry_offset(delta_entry) + delta_entry->value_bits; 1665 if (delta_entry->delta < delta_zone->min_keys) { 1666 set_field(delta_entry->delta, memory, offset, delta_zone->min_bits); 1667 return; 1668 } 1669 1670 temp = delta_entry->delta - delta_zone->min_keys; 1671 t1 = (temp % delta_zone->incr_keys) + delta_zone->min_keys; 1672 t2 = temp / delta_zone->incr_keys; 1673 set_field(t1, memory, offset, delta_zone->min_bits); 1674 set_zero(memory, offset + delta_zone->min_bits, t2); 1675 set_field(1, memory, offset + delta_zone->min_bits + t2, 1); 1676 } 1677 1678 static void encode_entry(const struct delta_index_entry *delta_entry, u32 value, 1679 const u8 *name) 1680 { 1681 u8 *memory = delta_entry->delta_zone->memory; 1682 u64 offset = get_delta_entry_offset(delta_entry); 1683 1684 set_field(value, memory, offset, delta_entry->value_bits); 1685 encode_delta(delta_entry); 1686 if (name != NULL) 1687 set_collision_name(delta_entry, name); 1688 } 1689 1690 /* 1691 * Create a new entry in the delta index. If the entry is a collision, the full 256 bit name must 1692 * be provided. 1693 */ 1694 int uds_put_delta_index_entry(struct delta_index_entry *delta_entry, u32 key, u32 value, 1695 const u8 *name) 1696 { 1697 int result; 1698 struct delta_zone *delta_zone; 1699 1700 result = assert_mutable_entry(delta_entry); 1701 if (result != UDS_SUCCESS) 1702 return result; 1703 1704 if (delta_entry->is_collision) { 1705 /* 1706 * The caller wants us to insert a collision entry onto a collision entry. This 1707 * happens when we find a collision and attempt to add the name again to the index. 1708 * This is normally a fatal error unless we are replaying a closed chapter while we 1709 * are rebuilding a volume index. 1710 */ 1711 return UDS_DUPLICATE_NAME; 1712 } 1713 1714 if (delta_entry->offset < delta_entry->delta_list->save_offset) { 1715 /* 1716 * The saved entry offset is after the new entry and will no longer be valid, so 1717 * replace it with the insertion point. 1718 */ 1719 result = uds_remember_delta_index_offset(delta_entry); 1720 if (result != UDS_SUCCESS) 1721 return result; 1722 } 1723 1724 if (name != NULL) { 1725 /* Insert a collision entry which is placed after this entry. */ 1726 result = assert_not_at_end(delta_entry); 1727 if (result != UDS_SUCCESS) 1728 return result; 1729 1730 result = VDO_ASSERT((key == delta_entry->key), 1731 "incorrect key for collision entry"); 1732 if (result != VDO_SUCCESS) 1733 return result; 1734 1735 delta_entry->offset += delta_entry->entry_bits; 1736 set_delta(delta_entry, 0); 1737 delta_entry->is_collision = true; 1738 delta_entry->entry_bits += COLLISION_BITS; 1739 result = insert_bits(delta_entry, delta_entry->entry_bits); 1740 } else if (delta_entry->at_end) { 1741 /* Insert a new entry at the end of the delta list. */ 1742 result = VDO_ASSERT((key >= delta_entry->key), "key past end of list"); 1743 if (result != VDO_SUCCESS) 1744 return result; 1745 1746 set_delta(delta_entry, key - delta_entry->key); 1747 delta_entry->key = key; 1748 delta_entry->at_end = false; 1749 result = insert_bits(delta_entry, delta_entry->entry_bits); 1750 } else { 1751 u16 old_entry_size; 1752 u16 additional_size; 1753 struct delta_index_entry next_entry; 1754 u32 next_value; 1755 1756 /* 1757 * Insert a new entry which requires the delta in the following entry to be 1758 * updated. 1759 */ 1760 result = VDO_ASSERT((key < delta_entry->key), 1761 "key precedes following entry"); 1762 if (result != VDO_SUCCESS) 1763 return result; 1764 1765 result = VDO_ASSERT((key >= delta_entry->key - delta_entry->delta), 1766 "key effects following entry's delta"); 1767 if (result != VDO_SUCCESS) 1768 return result; 1769 1770 old_entry_size = delta_entry->entry_bits; 1771 next_entry = *delta_entry; 1772 next_value = uds_get_delta_entry_value(&next_entry); 1773 set_delta(delta_entry, key - (delta_entry->key - delta_entry->delta)); 1774 delta_entry->key = key; 1775 set_delta(&next_entry, next_entry.key - key); 1776 next_entry.offset += delta_entry->entry_bits; 1777 /* The two new entries are always bigger than the single entry being replaced. */ 1778 additional_size = (delta_entry->entry_bits + 1779 next_entry.entry_bits - old_entry_size); 1780 result = insert_bits(delta_entry, additional_size); 1781 if (result != UDS_SUCCESS) 1782 return result; 1783 1784 encode_entry(&next_entry, next_value, NULL); 1785 } 1786 1787 if (result != UDS_SUCCESS) 1788 return result; 1789 1790 encode_entry(delta_entry, value, name); 1791 delta_zone = delta_entry->delta_zone; 1792 delta_zone->record_count++; 1793 delta_zone->collision_count += delta_entry->is_collision ? 1 : 0; 1794 return UDS_SUCCESS; 1795 } 1796 1797 static void delete_bits(const struct delta_index_entry *delta_entry, int size) 1798 { 1799 u64 source; 1800 u64 destination; 1801 u32 count; 1802 bool before_flag; 1803 struct delta_list *delta_list = delta_entry->delta_list; 1804 u8 *memory = delta_entry->delta_zone->memory; 1805 /* Compute bits retained before and after the deleted bits. */ 1806 u32 total_size = delta_list->size; 1807 u32 before_size = delta_entry->offset; 1808 u32 after_size = total_size - delta_entry->offset - size; 1809 1810 /* 1811 * Determine whether to add to the available space either before or after the delta list. 1812 * We prefer to move the least amount of data. If it is exactly the same, try to add to the 1813 * smaller amount of free space. 1814 */ 1815 if (before_size < after_size) { 1816 before_flag = true; 1817 } else if (after_size < before_size) { 1818 before_flag = false; 1819 } else { 1820 u64 free_before = 1821 (delta_list[0].start - (delta_list[-1].start + delta_list[-1].size)); 1822 u64 free_after = 1823 (delta_list[1].start - (delta_list[0].start + delta_list[0].size)); 1824 1825 before_flag = (free_before < free_after); 1826 } 1827 1828 delta_list->size -= size; 1829 if (before_flag) { 1830 source = delta_list->start; 1831 destination = source + size; 1832 delta_list->start += size; 1833 count = before_size; 1834 } else { 1835 destination = delta_list->start + delta_entry->offset; 1836 source = destination + size; 1837 count = after_size; 1838 } 1839 1840 move_bits(memory, source, memory, destination, count); 1841 } 1842 1843 int uds_remove_delta_index_entry(struct delta_index_entry *delta_entry) 1844 { 1845 int result; 1846 struct delta_index_entry next_entry; 1847 struct delta_zone *delta_zone; 1848 struct delta_list *delta_list; 1849 1850 result = assert_mutable_entry(delta_entry); 1851 if (result != UDS_SUCCESS) 1852 return result; 1853 1854 next_entry = *delta_entry; 1855 result = uds_next_delta_index_entry(&next_entry); 1856 if (result != UDS_SUCCESS) 1857 return result; 1858 1859 delta_zone = delta_entry->delta_zone; 1860 1861 if (delta_entry->is_collision) { 1862 /* This is a collision entry, so just remove it. */ 1863 delete_bits(delta_entry, delta_entry->entry_bits); 1864 next_entry.offset = delta_entry->offset; 1865 delta_zone->collision_count -= 1; 1866 } else if (next_entry.at_end) { 1867 /* This entry is at the end of the list, so just remove it. */ 1868 delete_bits(delta_entry, delta_entry->entry_bits); 1869 next_entry.key -= delta_entry->delta; 1870 next_entry.offset = delta_entry->offset; 1871 } else { 1872 /* The delta in the next entry needs to be updated. */ 1873 u32 next_value = uds_get_delta_entry_value(&next_entry); 1874 u16 old_size = delta_entry->entry_bits + next_entry.entry_bits; 1875 1876 if (next_entry.is_collision) { 1877 next_entry.is_collision = false; 1878 delta_zone->collision_count -= 1; 1879 } 1880 1881 set_delta(&next_entry, delta_entry->delta + next_entry.delta); 1882 next_entry.offset = delta_entry->offset; 1883 /* The one new entry is always smaller than the two entries being replaced. */ 1884 delete_bits(delta_entry, old_size - next_entry.entry_bits); 1885 encode_entry(&next_entry, next_value, NULL); 1886 } 1887 1888 delta_zone->record_count--; 1889 delta_zone->discard_count++; 1890 *delta_entry = next_entry; 1891 1892 delta_list = delta_entry->delta_list; 1893 if (delta_entry->offset < delta_list->save_offset) { 1894 /* The saved entry offset is no longer valid. */ 1895 delta_list->save_key = 0; 1896 delta_list->save_offset = 0; 1897 } 1898 1899 return UDS_SUCCESS; 1900 } 1901 1902 void uds_get_delta_index_stats(const struct delta_index *delta_index, 1903 struct delta_index_stats *stats) 1904 { 1905 unsigned int z; 1906 const struct delta_zone *delta_zone; 1907 1908 memset(stats, 0, sizeof(struct delta_index_stats)); 1909 for (z = 0; z < delta_index->zone_count; z++) { 1910 delta_zone = &delta_index->delta_zones[z]; 1911 stats->rebalance_time += delta_zone->rebalance_time; 1912 stats->rebalance_count += delta_zone->rebalance_count; 1913 stats->record_count += delta_zone->record_count; 1914 stats->collision_count += delta_zone->collision_count; 1915 stats->discard_count += delta_zone->discard_count; 1916 stats->overflow_count += delta_zone->overflow_count; 1917 stats->list_count += delta_zone->list_count; 1918 } 1919 } 1920 1921 size_t uds_compute_delta_index_size(u32 entry_count, u32 mean_delta, u32 payload_bits) 1922 { 1923 u16 min_bits; 1924 u32 incr_keys; 1925 u32 min_keys; 1926 1927 compute_coding_constants(mean_delta, &min_bits, &min_keys, &incr_keys); 1928 /* On average, each delta is encoded into about min_bits + 1.5 bits. */ 1929 return entry_count * (payload_bits + min_bits + 1) + entry_count / 2; 1930 } 1931 1932 u32 uds_get_delta_index_page_count(u32 entry_count, u32 list_count, u32 mean_delta, 1933 u32 payload_bits, size_t bytes_per_page) 1934 { 1935 unsigned int bits_per_delta_list; 1936 unsigned int bits_per_page; 1937 size_t bits_per_index; 1938 1939 /* Compute the expected number of bits needed for all the entries. */ 1940 bits_per_index = uds_compute_delta_index_size(entry_count, mean_delta, 1941 payload_bits); 1942 bits_per_delta_list = bits_per_index / list_count; 1943 1944 /* Add in the immutable delta list headers. */ 1945 bits_per_index += list_count * IMMUTABLE_HEADER_SIZE; 1946 /* Compute the number of usable bits on an immutable index page. */ 1947 bits_per_page = ((bytes_per_page - sizeof(struct delta_page_header)) * BITS_PER_BYTE); 1948 /* 1949 * Reduce the bits per page by one immutable delta list header and one delta list to 1950 * account for internal fragmentation. 1951 */ 1952 bits_per_page -= IMMUTABLE_HEADER_SIZE + bits_per_delta_list; 1953 /* Now compute the number of pages needed. */ 1954 return DIV_ROUND_UP(bits_per_index, bits_per_page); 1955 } 1956 1957 void uds_log_delta_index_entry(struct delta_index_entry *delta_entry) 1958 { 1959 vdo_log_ratelimit(vdo_log_info, 1960 "List 0x%X Key 0x%X Offset 0x%X%s%s List_size 0x%X%s", 1961 delta_entry->list_number, delta_entry->key, 1962 delta_entry->offset, delta_entry->at_end ? " end" : "", 1963 delta_entry->is_collision ? " collision" : "", 1964 delta_entry->delta_list->size, 1965 delta_entry->list_overflow ? " overflow" : ""); 1966 delta_entry->list_overflow = false; 1967 } 1968