1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright 2023 Red Hat 4 */ 5 #include "delta-index.h" 6 7 #include <linux/bitops.h> 8 #include <linux/bits.h> 9 #include <linux/compiler.h> 10 #include <linux/limits.h> 11 #include <linux/log2.h> 12 13 #include "cpu.h" 14 #include "errors.h" 15 #include "logger.h" 16 #include "memory-alloc.h" 17 #include "numeric.h" 18 #include "permassert.h" 19 #include "string-utils.h" 20 #include "time-utils.h" 21 22 #include "config.h" 23 #include "indexer.h" 24 25 /* 26 * The entries in a delta index could be stored in a single delta list, but to reduce search times 27 * and update costs it uses multiple delta lists. These lists are stored in a single chunk of 28 * memory managed by the delta_zone structure. The delta_zone can move the data around within its 29 * memory, so the location of each delta list is recorded as a bit offset into the memory. Because 30 * the volume index can contain over a million delta lists, we want to be efficient with the size 31 * of the delta list header information. This information is encoded into 16 bytes per list. The 32 * volume index delta list memory can easily exceed 4 gigabits, so a 64 bit value is needed to 33 * address the memory. The volume index delta lists average around 6 kilobits, so 16 bits are 34 * sufficient to store the size of a delta list. 35 * 36 * Each delta list is stored as a bit stream. Within the delta list encoding, bits and bytes are 37 * numbered in little endian order. Within a byte, bit 0 is the least significant bit (0x1), and 38 * bit 7 is the most significant bit (0x80). Within a bit stream, bit 7 is the most significant bit 39 * of byte 0, and bit 8 is the least significant bit of byte 1. Within a byte array, a byte's 40 * number corresponds to its index in the array. 41 * 42 * A standard delta list entry is stored as a fixed length payload (the value) followed by a 43 * variable length key (the delta). A collision entry is used when two block names have the same 44 * delta list address. A collision entry always follows a standard entry for the hash with which it 45 * collides, and is encoded with DELTA == 0 with an additional 256 bits field at the end, 46 * containing the full block name. An entry with a delta of 0 at the beginning of a delta list 47 * indicates a normal entry. 48 * 49 * The delta in each entry is encoded with a variable-length Huffman code to minimize the memory 50 * used by small deltas. The Huffman code is specified by three parameters, which can be computed 51 * from the desired mean delta when the index is full. (See compute_coding_constants() for 52 * details.) 53 * 54 * The bit field utilities used to read and write delta entries assume that it is possible to read 55 * some bytes beyond the end of the bit field, so a delta_zone memory allocation is guarded by two 56 * invalid delta lists to prevent reading outside the delta_zone memory. The valid delta lists are 57 * numbered 1 to N, and the guard lists are numbered 0 and N+1. The function to decode the bit 58 * stream include a step that skips over bits set to 0 until the first 1 bit is found. A corrupted 59 * delta list could cause this step to run off the end of the delta_zone memory, so as extra 60 * protection against this happening, the tail guard list is set to all ones. 61 * 62 * The delta_index supports two different forms. The mutable form is created by 63 * uds_initialize_delta_index(), and is used for the volume index and for open chapter indexes. The 64 * immutable form is created by uds_initialize_delta_index_page(), and is used for closed (and 65 * cached) chapter index pages. The immutable form does not allocate delta list headers or 66 * temporary offsets, and thus is somewhat more memory efficient. 67 */ 68 69 /* 70 * This is the largest field size supported by get_field() and set_field(). Any field that is 71 * larger is not guaranteed to fit in a single byte-aligned u32. 72 */ 73 #define MAX_FIELD_BITS ((sizeof(u32) - 1) * BITS_PER_BYTE + 1) 74 75 /* 76 * This is the largest field size supported by get_big_field() and set_big_field(). Any field that 77 * is larger is not guaranteed to fit in a single byte-aligned u64. 78 */ 79 #define MAX_BIG_FIELD_BITS ((sizeof(u64) - 1) * BITS_PER_BYTE + 1) 80 81 /* 82 * This is the number of guard bytes needed at the end of the memory byte array when using the bit 83 * utilities. These utilities call get_big_field() and set_big_field(), which can access up to 7 84 * bytes beyond the end of the desired field. The definition is written to make it clear how this 85 * value is derived. 86 */ 87 #define POST_FIELD_GUARD_BYTES (sizeof(u64) - 1) 88 89 /* The number of guard bits that are needed in the tail guard list */ 90 #define GUARD_BITS (POST_FIELD_GUARD_BYTES * BITS_PER_BYTE) 91 92 /* 93 * The maximum size of a single delta list in bytes. We count guard bytes in this value because a 94 * buffer of this size can be used with move_bits(). 95 */ 96 #define DELTA_LIST_MAX_BYTE_COUNT \ 97 ((U16_MAX + BITS_PER_BYTE) / BITS_PER_BYTE + POST_FIELD_GUARD_BYTES) 98 99 /* The number of extra bytes and bits needed to store a collision entry */ 100 #define COLLISION_BYTES UDS_RECORD_NAME_SIZE 101 #define COLLISION_BITS (COLLISION_BYTES * BITS_PER_BYTE) 102 103 /* 104 * Immutable delta lists are packed into pages containing a header that encodes the delta list 105 * information into 19 bits per list (64KB bit offset). 106 */ 107 #define IMMUTABLE_HEADER_SIZE 19 108 109 /* 110 * Constants and structures for the saved delta index. "DI" is for delta_index, and -##### is a 111 * number to increment when the format of the data changes. 112 */ 113 #define MAGIC_SIZE 8 114 115 static const char DELTA_INDEX_MAGIC[] = "DI-00002"; 116 117 struct delta_index_header { 118 char magic[MAGIC_SIZE]; 119 u32 zone_number; 120 u32 zone_count; 121 u32 first_list; 122 u32 list_count; 123 u64 record_count; 124 u64 collision_count; 125 }; 126 127 /* 128 * Header data used for immutable delta index pages. This data is followed by the delta list offset 129 * table. 130 */ 131 struct delta_page_header { 132 /* Externally-defined nonce */ 133 u64 nonce; 134 /* The virtual chapter number */ 135 u64 virtual_chapter_number; 136 /* Index of the first delta list on the page */ 137 u16 first_list; 138 /* Number of delta lists on the page */ 139 u16 list_count; 140 } __packed; 141 142 static inline u64 get_delta_list_byte_start(const struct delta_list *delta_list) 143 { 144 return delta_list->start / BITS_PER_BYTE; 145 } 146 147 static inline u16 get_delta_list_byte_size(const struct delta_list *delta_list) 148 { 149 unsigned int bit_offset = delta_list->start % BITS_PER_BYTE; 150 151 return BITS_TO_BYTES(bit_offset + delta_list->size); 152 } 153 154 static void rebalance_delta_zone(const struct delta_zone *delta_zone, u32 first, 155 u32 last) 156 { 157 struct delta_list *delta_list; 158 u64 new_start; 159 160 if (first == last) { 161 /* Only one list is moving, and we know there is space. */ 162 delta_list = &delta_zone->delta_lists[first]; 163 new_start = delta_zone->new_offsets[first]; 164 if (delta_list->start != new_start) { 165 u64 source; 166 u64 destination; 167 168 source = get_delta_list_byte_start(delta_list); 169 delta_list->start = new_start; 170 destination = get_delta_list_byte_start(delta_list); 171 memmove(delta_zone->memory + destination, 172 delta_zone->memory + source, 173 get_delta_list_byte_size(delta_list)); 174 } 175 } else { 176 /* 177 * There is more than one list. Divide the problem in half, and use recursive calls 178 * to process each half. Note that after this computation, first <= middle, and 179 * middle < last. 180 */ 181 u32 middle = (first + last) / 2; 182 183 delta_list = &delta_zone->delta_lists[middle]; 184 new_start = delta_zone->new_offsets[middle]; 185 186 /* 187 * The direction that our middle list is moving determines which half of the 188 * problem must be processed first. 189 */ 190 if (new_start > delta_list->start) { 191 rebalance_delta_zone(delta_zone, middle + 1, last); 192 rebalance_delta_zone(delta_zone, first, middle); 193 } else { 194 rebalance_delta_zone(delta_zone, first, middle); 195 rebalance_delta_zone(delta_zone, middle + 1, last); 196 } 197 } 198 } 199 200 static inline size_t get_zone_memory_size(unsigned int zone_count, size_t memory_size) 201 { 202 /* Round up so that each zone is a multiple of 64K in size. */ 203 size_t ALLOC_BOUNDARY = 64 * 1024; 204 205 return (memory_size / zone_count + ALLOC_BOUNDARY - 1) & -ALLOC_BOUNDARY; 206 } 207 208 void uds_reset_delta_index(const struct delta_index *delta_index) 209 { 210 unsigned int z; 211 212 /* 213 * Initialize all delta lists to be empty. We keep 2 extra delta list descriptors, one 214 * before the first real entry and one after so that we don't need to bounds check the 215 * array access when calculating preceding and following gap sizes. 216 */ 217 for (z = 0; z < delta_index->zone_count; z++) { 218 u64 list_bits; 219 u64 spacing; 220 u64 offset; 221 unsigned int i; 222 struct delta_zone *zone = &delta_index->delta_zones[z]; 223 struct delta_list *delta_lists = zone->delta_lists; 224 225 /* Zeroing the delta list headers initializes the head guard list correctly. */ 226 memset(delta_lists, 0, 227 (zone->list_count + 2) * sizeof(struct delta_list)); 228 229 /* Set all the bits in the end guard list. */ 230 list_bits = (u64) zone->size * BITS_PER_BYTE - GUARD_BITS; 231 delta_lists[zone->list_count + 1].start = list_bits; 232 delta_lists[zone->list_count + 1].size = GUARD_BITS; 233 memset(zone->memory + (list_bits / BITS_PER_BYTE), ~0, 234 POST_FIELD_GUARD_BYTES); 235 236 /* Evenly space out the real delta lists by setting regular offsets. */ 237 spacing = list_bits / zone->list_count; 238 offset = spacing / 2; 239 for (i = 1; i <= zone->list_count; i++) { 240 delta_lists[i].start = offset; 241 offset += spacing; 242 } 243 244 /* Update the statistics. */ 245 zone->discard_count += zone->record_count; 246 zone->record_count = 0; 247 zone->collision_count = 0; 248 } 249 } 250 251 /* Compute the Huffman coding parameters for the given mean delta. The Huffman code is specified by 252 * three parameters: 253 * 254 * MINBITS The number of bits in the smallest code 255 * BASE The number of values coded using a code of length MINBITS 256 * INCR The number of values coded by using one additional bit 257 * 258 * These parameters are related by this equation: 259 * 260 * BASE + INCR == 1 << MINBITS 261 * 262 * The math for the Huffman code of an exponential distribution says that 263 * 264 * INCR = log(2) * MEAN_DELTA 265 * 266 * Then use the smallest MINBITS value so that 267 * 268 * (1 << MINBITS) > INCR 269 * 270 * And then 271 * 272 * BASE = (1 << MINBITS) - INCR 273 * 274 * Now the index can generate a code such that 275 * - The first BASE values code using MINBITS bits. 276 * - The next INCR values code using MINBITS+1 bits. 277 * - The next INCR values code using MINBITS+2 bits. 278 * - (and so on). 279 */ 280 static void compute_coding_constants(u32 mean_delta, u16 *min_bits, u32 *min_keys, u32 *incr_keys) 281 { 282 /* 283 * We want to compute the rounded value of log(2) * mean_delta. Since we cannot always use 284 * floating point, use a really good integer approximation. 285 */ 286 *incr_keys = (836158UL * mean_delta + 603160UL) / 1206321UL; 287 *min_bits = bits_per(*incr_keys + 1); 288 *min_keys = (1 << *min_bits) - *incr_keys; 289 } 290 291 void uds_uninitialize_delta_index(struct delta_index *delta_index) 292 { 293 unsigned int z; 294 295 if (delta_index->delta_zones == NULL) 296 return; 297 298 for (z = 0; z < delta_index->zone_count; z++) { 299 vdo_free(vdo_forget(delta_index->delta_zones[z].new_offsets)); 300 vdo_free(vdo_forget(delta_index->delta_zones[z].delta_lists)); 301 vdo_free(vdo_forget(delta_index->delta_zones[z].memory)); 302 } 303 304 vdo_free(delta_index->delta_zones); 305 memset(delta_index, 0, sizeof(struct delta_index)); 306 } 307 308 static int initialize_delta_zone(struct delta_zone *delta_zone, size_t size, 309 u32 first_list, u32 list_count, u32 mean_delta, 310 u32 payload_bits, u8 tag) 311 { 312 int result; 313 314 result = vdo_allocate(size, u8, "delta list", &delta_zone->memory); 315 if (result != VDO_SUCCESS) 316 return result; 317 318 result = vdo_allocate(list_count + 2, u64, "delta list temp", 319 &delta_zone->new_offsets); 320 if (result != VDO_SUCCESS) 321 return result; 322 323 /* Allocate the delta lists. */ 324 result = vdo_allocate(list_count + 2, struct delta_list, "delta lists", 325 &delta_zone->delta_lists); 326 if (result != VDO_SUCCESS) 327 return result; 328 329 compute_coding_constants(mean_delta, &delta_zone->min_bits, 330 &delta_zone->min_keys, &delta_zone->incr_keys); 331 delta_zone->value_bits = payload_bits; 332 delta_zone->buffered_writer = NULL; 333 delta_zone->size = size; 334 delta_zone->rebalance_time = 0; 335 delta_zone->rebalance_count = 0; 336 delta_zone->record_count = 0; 337 delta_zone->collision_count = 0; 338 delta_zone->discard_count = 0; 339 delta_zone->overflow_count = 0; 340 delta_zone->first_list = first_list; 341 delta_zone->list_count = list_count; 342 delta_zone->tag = tag; 343 344 return UDS_SUCCESS; 345 } 346 347 int uds_initialize_delta_index(struct delta_index *delta_index, unsigned int zone_count, 348 u32 list_count, u32 mean_delta, u32 payload_bits, 349 size_t memory_size, u8 tag) 350 { 351 int result; 352 unsigned int z; 353 size_t zone_memory; 354 355 result = vdo_allocate(zone_count, struct delta_zone, "Delta Index Zones", 356 &delta_index->delta_zones); 357 if (result != VDO_SUCCESS) 358 return result; 359 360 delta_index->zone_count = zone_count; 361 delta_index->list_count = list_count; 362 delta_index->lists_per_zone = DIV_ROUND_UP(list_count, zone_count); 363 delta_index->memory_size = 0; 364 delta_index->mutable = true; 365 delta_index->tag = tag; 366 367 for (z = 0; z < zone_count; z++) { 368 u32 lists_in_zone = delta_index->lists_per_zone; 369 u32 first_list_in_zone = z * lists_in_zone; 370 371 if (z == zone_count - 1) { 372 /* 373 * The last zone gets fewer lists if zone_count doesn't evenly divide 374 * list_count. We'll have an underflow if the assertion below doesn't hold. 375 */ 376 if (delta_index->list_count <= first_list_in_zone) { 377 uds_uninitialize_delta_index(delta_index); 378 return vdo_log_error_strerror(UDS_INVALID_ARGUMENT, 379 "%u delta lists not enough for %u zones", 380 list_count, zone_count); 381 } 382 lists_in_zone = delta_index->list_count - first_list_in_zone; 383 } 384 385 zone_memory = get_zone_memory_size(zone_count, memory_size); 386 result = initialize_delta_zone(&delta_index->delta_zones[z], zone_memory, 387 first_list_in_zone, lists_in_zone, 388 mean_delta, payload_bits, tag); 389 if (result != UDS_SUCCESS) { 390 uds_uninitialize_delta_index(delta_index); 391 return result; 392 } 393 394 delta_index->memory_size += 395 (sizeof(struct delta_zone) + zone_memory + 396 (lists_in_zone + 2) * (sizeof(struct delta_list) + sizeof(u64))); 397 } 398 399 uds_reset_delta_index(delta_index); 400 return UDS_SUCCESS; 401 } 402 403 /* Read a bit field from an arbitrary bit boundary. */ 404 static inline u32 get_field(const u8 *memory, u64 offset, u8 size) 405 { 406 const void *addr = memory + offset / BITS_PER_BYTE; 407 408 return (get_unaligned_le32(addr) >> (offset % BITS_PER_BYTE)) & ((1 << size) - 1); 409 } 410 411 /* Write a bit field to an arbitrary bit boundary. */ 412 static inline void set_field(u32 value, u8 *memory, u64 offset, u8 size) 413 { 414 void *addr = memory + offset / BITS_PER_BYTE; 415 int shift = offset % BITS_PER_BYTE; 416 u32 data = get_unaligned_le32(addr); 417 418 data &= ~(((1 << size) - 1) << shift); 419 data |= value << shift; 420 put_unaligned_le32(data, addr); 421 } 422 423 /* Get the bit offset to the immutable delta list header. */ 424 static inline u32 get_immutable_header_offset(u32 list_number) 425 { 426 return sizeof(struct delta_page_header) * BITS_PER_BYTE + 427 list_number * IMMUTABLE_HEADER_SIZE; 428 } 429 430 /* Get the bit offset to the start of the immutable delta list bit stream. */ 431 static inline u32 get_immutable_start(const u8 *memory, u32 list_number) 432 { 433 return get_field(memory, get_immutable_header_offset(list_number), 434 IMMUTABLE_HEADER_SIZE); 435 } 436 437 /* Set the bit offset to the start of the immutable delta list bit stream. */ 438 static inline void set_immutable_start(u8 *memory, u32 list_number, u32 start) 439 { 440 set_field(start, memory, get_immutable_header_offset(list_number), 441 IMMUTABLE_HEADER_SIZE); 442 } 443 444 static bool verify_delta_index_page(u64 nonce, u16 list_count, u64 expected_nonce, 445 u8 *memory, size_t memory_size) 446 { 447 unsigned int i; 448 449 /* 450 * Verify the nonce. A mismatch can happen here during rebuild if we haven't written the 451 * entire volume at least once. 452 */ 453 if (nonce != expected_nonce) 454 return false; 455 456 /* Verify that the number of delta lists can fit in the page. */ 457 if (list_count > ((memory_size - sizeof(struct delta_page_header)) * 458 BITS_PER_BYTE / IMMUTABLE_HEADER_SIZE)) 459 return false; 460 461 /* 462 * Verify that the first delta list is immediately after the last delta 463 * list header. 464 */ 465 if (get_immutable_start(memory, 0) != get_immutable_header_offset(list_count + 1)) 466 return false; 467 468 /* Verify that the lists are in the correct order. */ 469 for (i = 0; i < list_count; i++) { 470 if (get_immutable_start(memory, i) > get_immutable_start(memory, i + 1)) 471 return false; 472 } 473 474 /* 475 * Verify that the last list ends on the page, and that there is room 476 * for the post-field guard bits. 477 */ 478 if (get_immutable_start(memory, list_count) > 479 (memory_size - POST_FIELD_GUARD_BYTES) * BITS_PER_BYTE) 480 return false; 481 482 /* Verify that the guard bytes are correctly set to all ones. */ 483 for (i = 0; i < POST_FIELD_GUARD_BYTES; i++) { 484 if (memory[memory_size - POST_FIELD_GUARD_BYTES + i] != (u8) ~0) 485 return false; 486 } 487 488 /* All verifications passed. */ 489 return true; 490 } 491 492 /* Initialize a delta index page to refer to a supplied page. */ 493 int uds_initialize_delta_index_page(struct delta_index_page *delta_index_page, 494 u64 expected_nonce, u32 mean_delta, u32 payload_bits, 495 u8 *memory, size_t memory_size) 496 { 497 u64 nonce; 498 u64 vcn; 499 u64 first_list; 500 u64 list_count; 501 struct delta_page_header *header = (struct delta_page_header *) memory; 502 struct delta_zone *delta_zone = &delta_index_page->delta_zone; 503 const u8 *nonce_addr = (const u8 *) &header->nonce; 504 const u8 *vcn_addr = (const u8 *) &header->virtual_chapter_number; 505 const u8 *first_list_addr = (const u8 *) &header->first_list; 506 const u8 *list_count_addr = (const u8 *) &header->list_count; 507 508 /* First assume that the header is little endian. */ 509 nonce = get_unaligned_le64(nonce_addr); 510 vcn = get_unaligned_le64(vcn_addr); 511 first_list = get_unaligned_le16(first_list_addr); 512 list_count = get_unaligned_le16(list_count_addr); 513 if (!verify_delta_index_page(nonce, list_count, expected_nonce, memory, 514 memory_size)) { 515 /* If that fails, try big endian. */ 516 nonce = get_unaligned_be64(nonce_addr); 517 vcn = get_unaligned_be64(vcn_addr); 518 first_list = get_unaligned_be16(first_list_addr); 519 list_count = get_unaligned_be16(list_count_addr); 520 if (!verify_delta_index_page(nonce, list_count, expected_nonce, memory, 521 memory_size)) { 522 /* 523 * Both attempts failed. Do not log this as an error, because it can happen 524 * during a rebuild if we haven't written the entire volume at least once. 525 */ 526 return UDS_CORRUPT_DATA; 527 } 528 } 529 530 delta_index_page->delta_index.delta_zones = delta_zone; 531 delta_index_page->delta_index.zone_count = 1; 532 delta_index_page->delta_index.list_count = list_count; 533 delta_index_page->delta_index.lists_per_zone = list_count; 534 delta_index_page->delta_index.mutable = false; 535 delta_index_page->delta_index.tag = 'p'; 536 delta_index_page->virtual_chapter_number = vcn; 537 delta_index_page->lowest_list_number = first_list; 538 delta_index_page->highest_list_number = first_list + list_count - 1; 539 540 compute_coding_constants(mean_delta, &delta_zone->min_bits, 541 &delta_zone->min_keys, &delta_zone->incr_keys); 542 delta_zone->value_bits = payload_bits; 543 delta_zone->memory = memory; 544 delta_zone->delta_lists = NULL; 545 delta_zone->new_offsets = NULL; 546 delta_zone->buffered_writer = NULL; 547 delta_zone->size = memory_size; 548 delta_zone->rebalance_time = 0; 549 delta_zone->rebalance_count = 0; 550 delta_zone->record_count = 0; 551 delta_zone->collision_count = 0; 552 delta_zone->discard_count = 0; 553 delta_zone->overflow_count = 0; 554 delta_zone->first_list = 0; 555 delta_zone->list_count = list_count; 556 delta_zone->tag = 'p'; 557 558 return UDS_SUCCESS; 559 } 560 561 /* Read a large bit field from an arbitrary bit boundary. */ 562 static inline u64 get_big_field(const u8 *memory, u64 offset, u8 size) 563 { 564 const void *addr = memory + offset / BITS_PER_BYTE; 565 566 return (get_unaligned_le64(addr) >> (offset % BITS_PER_BYTE)) & ((1UL << size) - 1); 567 } 568 569 /* Write a large bit field to an arbitrary bit boundary. */ 570 static inline void set_big_field(u64 value, u8 *memory, u64 offset, u8 size) 571 { 572 void *addr = memory + offset / BITS_PER_BYTE; 573 u8 shift = offset % BITS_PER_BYTE; 574 u64 data = get_unaligned_le64(addr); 575 576 data &= ~(((1UL << size) - 1) << shift); 577 data |= value << shift; 578 put_unaligned_le64(data, addr); 579 } 580 581 /* Set a sequence of bits to all zeros. */ 582 static inline void set_zero(u8 *memory, u64 offset, u32 size) 583 { 584 if (size > 0) { 585 u8 *addr = memory + offset / BITS_PER_BYTE; 586 u8 shift = offset % BITS_PER_BYTE; 587 u32 count = size + shift > BITS_PER_BYTE ? (u32) BITS_PER_BYTE - shift : size; 588 589 *addr++ &= ~(((1 << count) - 1) << shift); 590 for (size -= count; size > BITS_PER_BYTE; size -= BITS_PER_BYTE) 591 *addr++ = 0; 592 593 if (size > 0) 594 *addr &= 0xFF << size; 595 } 596 } 597 598 /* 599 * Move several bits from a higher to a lower address, moving the lower addressed bits first. The 600 * size and memory offsets are measured in bits. 601 */ 602 static void move_bits_down(const u8 *from, u64 from_offset, u8 *to, u64 to_offset, u32 size) 603 { 604 const u8 *source; 605 u8 *destination; 606 u8 offset; 607 u8 count; 608 u64 field; 609 610 /* Start by moving one field that ends on a to int boundary. */ 611 count = (MAX_BIG_FIELD_BITS - ((to_offset + MAX_BIG_FIELD_BITS) % BITS_PER_TYPE(u32))); 612 field = get_big_field(from, from_offset, count); 613 set_big_field(field, to, to_offset, count); 614 from_offset += count; 615 to_offset += count; 616 size -= count; 617 618 /* Now do the main loop to copy 32 bit chunks that are int-aligned at the destination. */ 619 offset = from_offset % BITS_PER_TYPE(u32); 620 source = from + (from_offset - offset) / BITS_PER_BYTE; 621 destination = to + to_offset / BITS_PER_BYTE; 622 while (size > MAX_BIG_FIELD_BITS) { 623 put_unaligned_le32(get_unaligned_le64(source) >> offset, destination); 624 source += sizeof(u32); 625 destination += sizeof(u32); 626 from_offset += BITS_PER_TYPE(u32); 627 to_offset += BITS_PER_TYPE(u32); 628 size -= BITS_PER_TYPE(u32); 629 } 630 631 /* Finish up by moving any remaining bits. */ 632 if (size > 0) { 633 field = get_big_field(from, from_offset, size); 634 set_big_field(field, to, to_offset, size); 635 } 636 } 637 638 /* 639 * Move several bits from a lower to a higher address, moving the higher addressed bits first. The 640 * size and memory offsets are measured in bits. 641 */ 642 static void move_bits_up(const u8 *from, u64 from_offset, u8 *to, u64 to_offset, u32 size) 643 { 644 const u8 *source; 645 u8 *destination; 646 u8 offset; 647 u8 count; 648 u64 field; 649 650 /* Start by moving one field that begins on a destination int boundary. */ 651 count = (to_offset + size) % BITS_PER_TYPE(u32); 652 if (count > 0) { 653 size -= count; 654 field = get_big_field(from, from_offset + size, count); 655 set_big_field(field, to, to_offset + size, count); 656 } 657 658 /* Now do the main loop to copy 32 bit chunks that are int-aligned at the destination. */ 659 offset = (from_offset + size) % BITS_PER_TYPE(u32); 660 source = from + (from_offset + size - offset) / BITS_PER_BYTE; 661 destination = to + (to_offset + size) / BITS_PER_BYTE; 662 while (size > MAX_BIG_FIELD_BITS) { 663 source -= sizeof(u32); 664 destination -= sizeof(u32); 665 size -= BITS_PER_TYPE(u32); 666 put_unaligned_le32(get_unaligned_le64(source) >> offset, destination); 667 } 668 669 /* Finish up by moving any remaining bits. */ 670 if (size > 0) { 671 field = get_big_field(from, from_offset, size); 672 set_big_field(field, to, to_offset, size); 673 } 674 } 675 676 /* 677 * Move bits from one field to another. When the fields overlap, behave as if we first move all the 678 * bits from the source to a temporary value, and then move all the bits from the temporary value 679 * to the destination. The size and memory offsets are measured in bits. 680 */ 681 static void move_bits(const u8 *from, u64 from_offset, u8 *to, u64 to_offset, u32 size) 682 { 683 u64 field; 684 685 /* A small move doesn't require special handling. */ 686 if (size <= MAX_BIG_FIELD_BITS) { 687 if (size > 0) { 688 field = get_big_field(from, from_offset, size); 689 set_big_field(field, to, to_offset, size); 690 } 691 692 return; 693 } 694 695 if (from_offset > to_offset) 696 move_bits_down(from, from_offset, to, to_offset, size); 697 else 698 move_bits_up(from, from_offset, to, to_offset, size); 699 } 700 701 /* 702 * Pack delta lists from a mutable delta index into an immutable delta index page. A range of delta 703 * lists (starting with a specified list index) is copied from the mutable delta index into a 704 * memory page used in the immutable index. The number of lists copied onto the page is returned in 705 * list_count. 706 */ 707 int uds_pack_delta_index_page(const struct delta_index *delta_index, u64 header_nonce, 708 u8 *memory, size_t memory_size, u64 virtual_chapter_number, 709 u32 first_list, u32 *list_count) 710 { 711 const struct delta_zone *delta_zone; 712 struct delta_list *delta_lists; 713 u32 max_lists; 714 u32 n_lists = 0; 715 u32 offset; 716 u32 i; 717 int free_bits; 718 int bits; 719 struct delta_page_header *header; 720 721 delta_zone = &delta_index->delta_zones[0]; 722 delta_lists = &delta_zone->delta_lists[first_list + 1]; 723 max_lists = delta_index->list_count - first_list; 724 725 /* 726 * Compute how many lists will fit on the page. Subtract the size of the fixed header, one 727 * delta list offset, and the guard bytes from the page size to determine how much space is 728 * available for delta lists. 729 */ 730 free_bits = memory_size * BITS_PER_BYTE; 731 free_bits -= get_immutable_header_offset(1); 732 free_bits -= GUARD_BITS; 733 if (free_bits < IMMUTABLE_HEADER_SIZE) { 734 /* This page is too small to store any delta lists. */ 735 return vdo_log_error_strerror(UDS_OVERFLOW, 736 "Chapter Index Page of %zu bytes is too small", 737 memory_size); 738 } 739 740 while (n_lists < max_lists) { 741 /* Each list requires a delta list offset and the list data. */ 742 bits = IMMUTABLE_HEADER_SIZE + delta_lists[n_lists].size; 743 if (bits > free_bits) 744 break; 745 746 n_lists++; 747 free_bits -= bits; 748 } 749 750 *list_count = n_lists; 751 752 header = (struct delta_page_header *) memory; 753 put_unaligned_le64(header_nonce, (u8 *) &header->nonce); 754 put_unaligned_le64(virtual_chapter_number, 755 (u8 *) &header->virtual_chapter_number); 756 put_unaligned_le16(first_list, (u8 *) &header->first_list); 757 put_unaligned_le16(n_lists, (u8 *) &header->list_count); 758 759 /* Construct the delta list offset table. */ 760 offset = get_immutable_header_offset(n_lists + 1); 761 set_immutable_start(memory, 0, offset); 762 for (i = 0; i < n_lists; i++) { 763 offset += delta_lists[i].size; 764 set_immutable_start(memory, i + 1, offset); 765 } 766 767 /* Copy the delta list data onto the memory page. */ 768 for (i = 0; i < n_lists; i++) { 769 move_bits(delta_zone->memory, delta_lists[i].start, memory, 770 get_immutable_start(memory, i), delta_lists[i].size); 771 } 772 773 /* Set all the bits in the guard bytes. */ 774 memset(memory + memory_size - POST_FIELD_GUARD_BYTES, ~0, 775 POST_FIELD_GUARD_BYTES); 776 return UDS_SUCCESS; 777 } 778 779 /* Compute the new offsets of the delta lists. */ 780 static void compute_new_list_offsets(struct delta_zone *delta_zone, u32 growing_index, 781 size_t growing_size, size_t used_space) 782 { 783 size_t spacing; 784 u32 i; 785 struct delta_list *delta_lists = delta_zone->delta_lists; 786 u32 tail_guard_index = delta_zone->list_count + 1; 787 788 spacing = (delta_zone->size - used_space) / delta_zone->list_count; 789 delta_zone->new_offsets[0] = 0; 790 for (i = 0; i <= delta_zone->list_count; i++) { 791 delta_zone->new_offsets[i + 1] = 792 (delta_zone->new_offsets[i] + 793 get_delta_list_byte_size(&delta_lists[i]) + spacing); 794 delta_zone->new_offsets[i] *= BITS_PER_BYTE; 795 delta_zone->new_offsets[i] += delta_lists[i].start % BITS_PER_BYTE; 796 if (i == 0) 797 delta_zone->new_offsets[i + 1] -= spacing / 2; 798 if (i + 1 == growing_index) 799 delta_zone->new_offsets[i + 1] += growing_size; 800 } 801 802 delta_zone->new_offsets[tail_guard_index] = 803 (delta_zone->size * BITS_PER_BYTE - delta_lists[tail_guard_index].size); 804 } 805 806 static void rebalance_lists(struct delta_zone *delta_zone) 807 { 808 struct delta_list *delta_lists; 809 u32 i; 810 size_t used_space = 0; 811 812 /* Extend and balance memory to receive the delta lists */ 813 delta_lists = delta_zone->delta_lists; 814 for (i = 0; i <= delta_zone->list_count + 1; i++) 815 used_space += get_delta_list_byte_size(&delta_lists[i]); 816 817 compute_new_list_offsets(delta_zone, 0, 0, used_space); 818 for (i = 1; i <= delta_zone->list_count + 1; i++) 819 delta_lists[i].start = delta_zone->new_offsets[i]; 820 } 821 822 /* Start restoring a delta index from multiple input streams. */ 823 int uds_start_restoring_delta_index(struct delta_index *delta_index, 824 struct buffered_reader **buffered_readers, 825 unsigned int reader_count) 826 { 827 int result; 828 unsigned int zone_count = reader_count; 829 u64 record_count = 0; 830 u64 collision_count = 0; 831 u32 first_list[MAX_ZONES]; 832 u32 list_count[MAX_ZONES]; 833 unsigned int z; 834 u32 list_next = 0; 835 const struct delta_zone *delta_zone; 836 837 /* Read and validate each header. */ 838 for (z = 0; z < zone_count; z++) { 839 struct delta_index_header header; 840 u8 buffer[sizeof(struct delta_index_header)]; 841 size_t offset = 0; 842 843 result = uds_read_from_buffered_reader(buffered_readers[z], buffer, 844 sizeof(buffer)); 845 if (result != UDS_SUCCESS) { 846 return vdo_log_warning_strerror(result, 847 "failed to read delta index header"); 848 } 849 850 memcpy(&header.magic, buffer, MAGIC_SIZE); 851 offset += MAGIC_SIZE; 852 decode_u32_le(buffer, &offset, &header.zone_number); 853 decode_u32_le(buffer, &offset, &header.zone_count); 854 decode_u32_le(buffer, &offset, &header.first_list); 855 decode_u32_le(buffer, &offset, &header.list_count); 856 decode_u64_le(buffer, &offset, &header.record_count); 857 decode_u64_le(buffer, &offset, &header.collision_count); 858 859 result = VDO_ASSERT(offset == sizeof(struct delta_index_header), 860 "%zu bytes decoded of %zu expected", offset, 861 sizeof(struct delta_index_header)); 862 if (result != VDO_SUCCESS) { 863 return vdo_log_warning_strerror(result, 864 "failed to read delta index header"); 865 } 866 867 if (memcmp(header.magic, DELTA_INDEX_MAGIC, MAGIC_SIZE) != 0) { 868 return vdo_log_warning_strerror(UDS_CORRUPT_DATA, 869 "delta index file has bad magic number"); 870 } 871 872 if (zone_count != header.zone_count) { 873 return vdo_log_warning_strerror(UDS_CORRUPT_DATA, 874 "delta index files contain mismatched zone counts (%u,%u)", 875 zone_count, header.zone_count); 876 } 877 878 if (header.zone_number != z) { 879 return vdo_log_warning_strerror(UDS_CORRUPT_DATA, 880 "delta index zone %u found in slot %u", 881 header.zone_number, z); 882 } 883 884 first_list[z] = header.first_list; 885 list_count[z] = header.list_count; 886 record_count += header.record_count; 887 collision_count += header.collision_count; 888 889 if (first_list[z] != list_next) { 890 return vdo_log_warning_strerror(UDS_CORRUPT_DATA, 891 "delta index file for zone %u starts with list %u instead of list %u", 892 z, first_list[z], list_next); 893 } 894 895 list_next += list_count[z]; 896 } 897 898 if (list_next != delta_index->list_count) { 899 return vdo_log_warning_strerror(UDS_CORRUPT_DATA, 900 "delta index files contain %u delta lists instead of %u delta lists", 901 list_next, delta_index->list_count); 902 } 903 904 if (collision_count > record_count) { 905 return vdo_log_warning_strerror(UDS_CORRUPT_DATA, 906 "delta index files contain %llu collisions and %llu records", 907 (unsigned long long) collision_count, 908 (unsigned long long) record_count); 909 } 910 911 uds_reset_delta_index(delta_index); 912 delta_index->delta_zones[0].record_count = record_count; 913 delta_index->delta_zones[0].collision_count = collision_count; 914 915 /* Read the delta lists and distribute them to the proper zones. */ 916 for (z = 0; z < zone_count; z++) { 917 u32 i; 918 919 delta_index->load_lists[z] = 0; 920 for (i = 0; i < list_count[z]; i++) { 921 u16 delta_list_size; 922 u32 list_number; 923 unsigned int zone_number; 924 u8 size_data[sizeof(u16)]; 925 926 result = uds_read_from_buffered_reader(buffered_readers[z], 927 size_data, 928 sizeof(size_data)); 929 if (result != UDS_SUCCESS) { 930 return vdo_log_warning_strerror(result, 931 "failed to read delta index size"); 932 } 933 934 delta_list_size = get_unaligned_le16(size_data); 935 if (delta_list_size > 0) 936 delta_index->load_lists[z] += 1; 937 938 list_number = first_list[z] + i; 939 zone_number = list_number / delta_index->lists_per_zone; 940 delta_zone = &delta_index->delta_zones[zone_number]; 941 list_number -= delta_zone->first_list; 942 delta_zone->delta_lists[list_number + 1].size = delta_list_size; 943 } 944 } 945 946 /* Prepare each zone to start receiving the delta list data. */ 947 for (z = 0; z < delta_index->zone_count; z++) 948 rebalance_lists(&delta_index->delta_zones[z]); 949 950 return UDS_SUCCESS; 951 } 952 953 static int restore_delta_list_to_zone(struct delta_zone *delta_zone, 954 const struct delta_list_save_info *save_info, 955 const u8 *data) 956 { 957 struct delta_list *delta_list; 958 u16 bit_count; 959 u16 byte_count; 960 u32 list_number = save_info->index - delta_zone->first_list; 961 962 if (list_number >= delta_zone->list_count) { 963 return vdo_log_warning_strerror(UDS_CORRUPT_DATA, 964 "invalid delta list number %u not in range [%u,%u)", 965 save_info->index, delta_zone->first_list, 966 delta_zone->first_list + delta_zone->list_count); 967 } 968 969 delta_list = &delta_zone->delta_lists[list_number + 1]; 970 if (delta_list->size == 0) { 971 return vdo_log_warning_strerror(UDS_CORRUPT_DATA, 972 "unexpected delta list number %u", 973 save_info->index); 974 } 975 976 bit_count = delta_list->size + save_info->bit_offset; 977 byte_count = BITS_TO_BYTES(bit_count); 978 if (save_info->byte_count != byte_count) { 979 return vdo_log_warning_strerror(UDS_CORRUPT_DATA, 980 "unexpected delta list size %u != %u", 981 save_info->byte_count, byte_count); 982 } 983 984 move_bits(data, save_info->bit_offset, delta_zone->memory, delta_list->start, 985 delta_list->size); 986 return UDS_SUCCESS; 987 } 988 989 static int restore_delta_list_data(struct delta_index *delta_index, unsigned int load_zone, 990 struct buffered_reader *buffered_reader, u8 *data) 991 { 992 int result; 993 struct delta_list_save_info save_info; 994 u8 buffer[sizeof(struct delta_list_save_info)]; 995 unsigned int new_zone; 996 997 result = uds_read_from_buffered_reader(buffered_reader, buffer, sizeof(buffer)); 998 if (result != UDS_SUCCESS) { 999 return vdo_log_warning_strerror(result, 1000 "failed to read delta list data"); 1001 } 1002 1003 save_info = (struct delta_list_save_info) { 1004 .tag = buffer[0], 1005 .bit_offset = buffer[1], 1006 .byte_count = get_unaligned_le16(&buffer[2]), 1007 .index = get_unaligned_le32(&buffer[4]), 1008 }; 1009 1010 if ((save_info.bit_offset >= BITS_PER_BYTE) || 1011 (save_info.byte_count > DELTA_LIST_MAX_BYTE_COUNT)) { 1012 return vdo_log_warning_strerror(UDS_CORRUPT_DATA, 1013 "corrupt delta list data"); 1014 } 1015 1016 /* Make sure the data is intended for this delta index. */ 1017 if (save_info.tag != delta_index->tag) 1018 return UDS_CORRUPT_DATA; 1019 1020 if (save_info.index >= delta_index->list_count) { 1021 return vdo_log_warning_strerror(UDS_CORRUPT_DATA, 1022 "invalid delta list number %u of %u", 1023 save_info.index, 1024 delta_index->list_count); 1025 } 1026 1027 result = uds_read_from_buffered_reader(buffered_reader, data, 1028 save_info.byte_count); 1029 if (result != UDS_SUCCESS) { 1030 return vdo_log_warning_strerror(result, 1031 "failed to read delta list data"); 1032 } 1033 1034 delta_index->load_lists[load_zone] -= 1; 1035 new_zone = save_info.index / delta_index->lists_per_zone; 1036 return restore_delta_list_to_zone(&delta_index->delta_zones[new_zone], 1037 &save_info, data); 1038 } 1039 1040 /* Restore delta lists from saved data. */ 1041 int uds_finish_restoring_delta_index(struct delta_index *delta_index, 1042 struct buffered_reader **buffered_readers, 1043 unsigned int reader_count) 1044 { 1045 int result; 1046 int saved_result = UDS_SUCCESS; 1047 unsigned int z; 1048 u8 *data; 1049 1050 result = vdo_allocate(DELTA_LIST_MAX_BYTE_COUNT, u8, __func__, &data); 1051 if (result != VDO_SUCCESS) 1052 return result; 1053 1054 for (z = 0; z < reader_count; z++) { 1055 while (delta_index->load_lists[z] > 0) { 1056 result = restore_delta_list_data(delta_index, z, 1057 buffered_readers[z], data); 1058 if (result != UDS_SUCCESS) { 1059 saved_result = result; 1060 break; 1061 } 1062 } 1063 } 1064 1065 vdo_free(data); 1066 return saved_result; 1067 } 1068 1069 int uds_check_guard_delta_lists(struct buffered_reader **buffered_readers, 1070 unsigned int reader_count) 1071 { 1072 int result; 1073 unsigned int z; 1074 u8 buffer[sizeof(struct delta_list_save_info)]; 1075 1076 for (z = 0; z < reader_count; z++) { 1077 result = uds_read_from_buffered_reader(buffered_readers[z], buffer, 1078 sizeof(buffer)); 1079 if (result != UDS_SUCCESS) 1080 return result; 1081 1082 if (buffer[0] != 'z') 1083 return UDS_CORRUPT_DATA; 1084 } 1085 1086 return UDS_SUCCESS; 1087 } 1088 1089 static int flush_delta_list(struct delta_zone *zone, u32 flush_index) 1090 { 1091 struct delta_list *delta_list; 1092 u8 buffer[sizeof(struct delta_list_save_info)]; 1093 int result; 1094 1095 delta_list = &zone->delta_lists[flush_index + 1]; 1096 1097 buffer[0] = zone->tag; 1098 buffer[1] = delta_list->start % BITS_PER_BYTE; 1099 put_unaligned_le16(get_delta_list_byte_size(delta_list), &buffer[2]); 1100 put_unaligned_le32(zone->first_list + flush_index, &buffer[4]); 1101 1102 result = uds_write_to_buffered_writer(zone->buffered_writer, buffer, 1103 sizeof(buffer)); 1104 if (result != UDS_SUCCESS) { 1105 vdo_log_warning_strerror(result, "failed to write delta list memory"); 1106 return result; 1107 } 1108 1109 result = uds_write_to_buffered_writer(zone->buffered_writer, 1110 zone->memory + get_delta_list_byte_start(delta_list), 1111 get_delta_list_byte_size(delta_list)); 1112 if (result != UDS_SUCCESS) 1113 vdo_log_warning_strerror(result, "failed to write delta list memory"); 1114 1115 return result; 1116 } 1117 1118 /* Start saving a delta index zone to a buffered output stream. */ 1119 int uds_start_saving_delta_index(const struct delta_index *delta_index, 1120 unsigned int zone_number, 1121 struct buffered_writer *buffered_writer) 1122 { 1123 int result; 1124 u32 i; 1125 struct delta_zone *delta_zone; 1126 u8 buffer[sizeof(struct delta_index_header)]; 1127 size_t offset = 0; 1128 1129 delta_zone = &delta_index->delta_zones[zone_number]; 1130 memcpy(buffer, DELTA_INDEX_MAGIC, MAGIC_SIZE); 1131 offset += MAGIC_SIZE; 1132 encode_u32_le(buffer, &offset, zone_number); 1133 encode_u32_le(buffer, &offset, delta_index->zone_count); 1134 encode_u32_le(buffer, &offset, delta_zone->first_list); 1135 encode_u32_le(buffer, &offset, delta_zone->list_count); 1136 encode_u64_le(buffer, &offset, delta_zone->record_count); 1137 encode_u64_le(buffer, &offset, delta_zone->collision_count); 1138 1139 result = VDO_ASSERT(offset == sizeof(struct delta_index_header), 1140 "%zu bytes encoded of %zu expected", offset, 1141 sizeof(struct delta_index_header)); 1142 if (result != VDO_SUCCESS) 1143 return result; 1144 1145 result = uds_write_to_buffered_writer(buffered_writer, buffer, offset); 1146 if (result != UDS_SUCCESS) 1147 return vdo_log_warning_strerror(result, 1148 "failed to write delta index header"); 1149 1150 for (i = 0; i < delta_zone->list_count; i++) { 1151 u8 data[sizeof(u16)]; 1152 struct delta_list *delta_list; 1153 1154 delta_list = &delta_zone->delta_lists[i + 1]; 1155 put_unaligned_le16(delta_list->size, data); 1156 result = uds_write_to_buffered_writer(buffered_writer, data, 1157 sizeof(data)); 1158 if (result != UDS_SUCCESS) 1159 return vdo_log_warning_strerror(result, 1160 "failed to write delta list size"); 1161 } 1162 1163 delta_zone->buffered_writer = buffered_writer; 1164 return UDS_SUCCESS; 1165 } 1166 1167 int uds_finish_saving_delta_index(const struct delta_index *delta_index, 1168 unsigned int zone_number) 1169 { 1170 int result; 1171 int first_error = UDS_SUCCESS; 1172 u32 i; 1173 struct delta_zone *delta_zone; 1174 struct delta_list *delta_list; 1175 1176 delta_zone = &delta_index->delta_zones[zone_number]; 1177 for (i = 0; i < delta_zone->list_count; i++) { 1178 delta_list = &delta_zone->delta_lists[i + 1]; 1179 if (delta_list->size > 0) { 1180 result = flush_delta_list(delta_zone, i); 1181 if ((result != UDS_SUCCESS) && (first_error == UDS_SUCCESS)) 1182 first_error = result; 1183 } 1184 } 1185 1186 delta_zone->buffered_writer = NULL; 1187 return first_error; 1188 } 1189 1190 int uds_write_guard_delta_list(struct buffered_writer *buffered_writer) 1191 { 1192 int result; 1193 u8 buffer[sizeof(struct delta_list_save_info)]; 1194 1195 memset(buffer, 0, sizeof(struct delta_list_save_info)); 1196 buffer[0] = 'z'; 1197 1198 result = uds_write_to_buffered_writer(buffered_writer, buffer, sizeof(buffer)); 1199 if (result != UDS_SUCCESS) 1200 vdo_log_warning_strerror(result, "failed to write guard delta list"); 1201 1202 return UDS_SUCCESS; 1203 } 1204 1205 size_t uds_compute_delta_index_save_bytes(u32 list_count, size_t memory_size) 1206 { 1207 /* One zone will use at least as much memory as other zone counts. */ 1208 return (sizeof(struct delta_index_header) + 1209 list_count * (sizeof(struct delta_list_save_info) + 1) + 1210 get_zone_memory_size(1, memory_size)); 1211 } 1212 1213 static int assert_not_at_end(const struct delta_index_entry *delta_entry) 1214 { 1215 int result = VDO_ASSERT(!delta_entry->at_end, 1216 "operation is invalid because the list entry is at the end of the delta list"); 1217 if (result != VDO_SUCCESS) 1218 result = UDS_BAD_STATE; 1219 1220 return result; 1221 } 1222 1223 /* 1224 * Prepare to search for an entry in the specified delta list. 1225 * 1226 * This is always the first function to be called when dealing with delta index entries. It is 1227 * always followed by calls to uds_next_delta_index_entry() to iterate through a delta list. The 1228 * fields of the delta_index_entry argument will be set up for iteration, but will not contain an 1229 * entry from the list. 1230 */ 1231 int uds_start_delta_index_search(const struct delta_index *delta_index, u32 list_number, 1232 u32 key, struct delta_index_entry *delta_entry) 1233 { 1234 int result; 1235 unsigned int zone_number; 1236 struct delta_zone *delta_zone; 1237 struct delta_list *delta_list; 1238 1239 result = VDO_ASSERT((list_number < delta_index->list_count), 1240 "Delta list number (%u) is out of range (%u)", list_number, 1241 delta_index->list_count); 1242 if (result != VDO_SUCCESS) 1243 return UDS_CORRUPT_DATA; 1244 1245 zone_number = list_number / delta_index->lists_per_zone; 1246 delta_zone = &delta_index->delta_zones[zone_number]; 1247 list_number -= delta_zone->first_list; 1248 result = VDO_ASSERT((list_number < delta_zone->list_count), 1249 "Delta list number (%u) is out of range (%u) for zone (%u)", 1250 list_number, delta_zone->list_count, zone_number); 1251 if (result != VDO_SUCCESS) 1252 return UDS_CORRUPT_DATA; 1253 1254 if (delta_index->mutable) { 1255 delta_list = &delta_zone->delta_lists[list_number + 1]; 1256 } else { 1257 u32 end_offset; 1258 1259 /* 1260 * Translate the immutable delta list header into a temporary 1261 * full delta list header. 1262 */ 1263 delta_list = &delta_entry->temp_delta_list; 1264 delta_list->start = get_immutable_start(delta_zone->memory, list_number); 1265 end_offset = get_immutable_start(delta_zone->memory, list_number + 1); 1266 delta_list->size = end_offset - delta_list->start; 1267 delta_list->save_key = 0; 1268 delta_list->save_offset = 0; 1269 } 1270 1271 if (key > delta_list->save_key) { 1272 delta_entry->key = delta_list->save_key; 1273 delta_entry->offset = delta_list->save_offset; 1274 } else { 1275 delta_entry->key = 0; 1276 delta_entry->offset = 0; 1277 if (key == 0) { 1278 /* 1279 * This usually means we're about to walk the entire delta list, so get all 1280 * of it into the CPU cache. 1281 */ 1282 uds_prefetch_range(&delta_zone->memory[delta_list->start / BITS_PER_BYTE], 1283 delta_list->size / BITS_PER_BYTE, false); 1284 } 1285 } 1286 1287 delta_entry->at_end = false; 1288 delta_entry->delta_zone = delta_zone; 1289 delta_entry->delta_list = delta_list; 1290 delta_entry->entry_bits = 0; 1291 delta_entry->is_collision = false; 1292 delta_entry->list_number = list_number; 1293 delta_entry->list_overflow = false; 1294 delta_entry->value_bits = delta_zone->value_bits; 1295 return UDS_SUCCESS; 1296 } 1297 1298 static inline u64 get_delta_entry_offset(const struct delta_index_entry *delta_entry) 1299 { 1300 return delta_entry->delta_list->start + delta_entry->offset; 1301 } 1302 1303 /* 1304 * Decode a delta index entry delta value. The delta_index_entry basically describes the previous 1305 * list entry, and has had its offset field changed to point to the subsequent entry. We decode the 1306 * bit stream and update the delta_list_entry to describe the entry. 1307 */ 1308 static inline void decode_delta(struct delta_index_entry *delta_entry) 1309 { 1310 int key_bits; 1311 u32 delta; 1312 const struct delta_zone *delta_zone = delta_entry->delta_zone; 1313 const u8 *memory = delta_zone->memory; 1314 u64 delta_offset = get_delta_entry_offset(delta_entry) + delta_entry->value_bits; 1315 const u8 *addr = memory + delta_offset / BITS_PER_BYTE; 1316 int offset = delta_offset % BITS_PER_BYTE; 1317 u32 data = get_unaligned_le32(addr) >> offset; 1318 1319 addr += sizeof(u32); 1320 key_bits = delta_zone->min_bits; 1321 delta = data & ((1 << key_bits) - 1); 1322 if (delta >= delta_zone->min_keys) { 1323 data >>= key_bits; 1324 if (data == 0) { 1325 key_bits = sizeof(u32) * BITS_PER_BYTE - offset; 1326 while ((data = get_unaligned_le32(addr)) == 0) { 1327 addr += sizeof(u32); 1328 key_bits += sizeof(u32) * BITS_PER_BYTE; 1329 } 1330 } 1331 key_bits += ffs(data); 1332 delta += ((key_bits - delta_zone->min_bits - 1) * delta_zone->incr_keys); 1333 } 1334 delta_entry->delta = delta; 1335 delta_entry->key += delta; 1336 1337 /* Check for a collision, a delta of zero after the start. */ 1338 if (unlikely((delta == 0) && (delta_entry->offset > 0))) { 1339 delta_entry->is_collision = true; 1340 delta_entry->entry_bits = delta_entry->value_bits + key_bits + COLLISION_BITS; 1341 } else { 1342 delta_entry->is_collision = false; 1343 delta_entry->entry_bits = delta_entry->value_bits + key_bits; 1344 } 1345 } 1346 1347 noinline int uds_next_delta_index_entry(struct delta_index_entry *delta_entry) 1348 { 1349 int result; 1350 const struct delta_list *delta_list; 1351 u32 next_offset; 1352 u16 size; 1353 1354 result = assert_not_at_end(delta_entry); 1355 if (result != UDS_SUCCESS) 1356 return result; 1357 1358 delta_list = delta_entry->delta_list; 1359 delta_entry->offset += delta_entry->entry_bits; 1360 size = delta_list->size; 1361 if (unlikely(delta_entry->offset >= size)) { 1362 delta_entry->at_end = true; 1363 delta_entry->delta = 0; 1364 delta_entry->is_collision = false; 1365 result = VDO_ASSERT((delta_entry->offset == size), 1366 "next offset past end of delta list"); 1367 if (result != VDO_SUCCESS) 1368 result = UDS_CORRUPT_DATA; 1369 1370 return result; 1371 } 1372 1373 decode_delta(delta_entry); 1374 1375 next_offset = delta_entry->offset + delta_entry->entry_bits; 1376 if (next_offset > size) { 1377 /* 1378 * This is not an assertion because uds_validate_chapter_index_page() wants to 1379 * handle this error. 1380 */ 1381 vdo_log_warning("Decoded past the end of the delta list"); 1382 return UDS_CORRUPT_DATA; 1383 } 1384 1385 return UDS_SUCCESS; 1386 } 1387 1388 int uds_remember_delta_index_offset(const struct delta_index_entry *delta_entry) 1389 { 1390 int result; 1391 struct delta_list *delta_list = delta_entry->delta_list; 1392 1393 result = VDO_ASSERT(!delta_entry->is_collision, "entry is not a collision"); 1394 if (result != VDO_SUCCESS) 1395 return result; 1396 1397 delta_list->save_key = delta_entry->key - delta_entry->delta; 1398 delta_list->save_offset = delta_entry->offset; 1399 return UDS_SUCCESS; 1400 } 1401 1402 static void set_delta(struct delta_index_entry *delta_entry, u32 delta) 1403 { 1404 const struct delta_zone *delta_zone = delta_entry->delta_zone; 1405 u32 key_bits = (delta_zone->min_bits + 1406 ((delta_zone->incr_keys - delta_zone->min_keys + delta) / 1407 delta_zone->incr_keys)); 1408 1409 delta_entry->delta = delta; 1410 delta_entry->entry_bits = delta_entry->value_bits + key_bits; 1411 } 1412 1413 static void get_collision_name(const struct delta_index_entry *entry, u8 *name) 1414 { 1415 u64 offset = get_delta_entry_offset(entry) + entry->entry_bits - COLLISION_BITS; 1416 const u8 *addr = entry->delta_zone->memory + offset / BITS_PER_BYTE; 1417 int size = COLLISION_BYTES; 1418 int shift = offset % BITS_PER_BYTE; 1419 1420 while (--size >= 0) 1421 *name++ = get_unaligned_le16(addr++) >> shift; 1422 } 1423 1424 static void set_collision_name(const struct delta_index_entry *entry, const u8 *name) 1425 { 1426 u64 offset = get_delta_entry_offset(entry) + entry->entry_bits - COLLISION_BITS; 1427 u8 *addr = entry->delta_zone->memory + offset / BITS_PER_BYTE; 1428 int size = COLLISION_BYTES; 1429 int shift = offset % BITS_PER_BYTE; 1430 u16 mask = ~((u16) 0xFF << shift); 1431 u16 data; 1432 1433 while (--size >= 0) { 1434 data = (get_unaligned_le16(addr) & mask) | (*name++ << shift); 1435 put_unaligned_le16(data, addr++); 1436 } 1437 } 1438 1439 int uds_get_delta_index_entry(const struct delta_index *delta_index, u32 list_number, 1440 u32 key, const u8 *name, 1441 struct delta_index_entry *delta_entry) 1442 { 1443 int result; 1444 1445 result = uds_start_delta_index_search(delta_index, list_number, key, 1446 delta_entry); 1447 if (result != UDS_SUCCESS) 1448 return result; 1449 1450 do { 1451 result = uds_next_delta_index_entry(delta_entry); 1452 if (result != UDS_SUCCESS) 1453 return result; 1454 } while (!delta_entry->at_end && (key > delta_entry->key)); 1455 1456 result = uds_remember_delta_index_offset(delta_entry); 1457 if (result != UDS_SUCCESS) 1458 return result; 1459 1460 if (!delta_entry->at_end && (key == delta_entry->key)) { 1461 struct delta_index_entry collision_entry = *delta_entry; 1462 1463 for (;;) { 1464 u8 full_name[COLLISION_BYTES]; 1465 1466 result = uds_next_delta_index_entry(&collision_entry); 1467 if (result != UDS_SUCCESS) 1468 return result; 1469 1470 if (collision_entry.at_end || !collision_entry.is_collision) 1471 break; 1472 1473 get_collision_name(&collision_entry, full_name); 1474 if (memcmp(full_name, name, COLLISION_BYTES) == 0) { 1475 *delta_entry = collision_entry; 1476 break; 1477 } 1478 } 1479 } 1480 1481 return UDS_SUCCESS; 1482 } 1483 1484 int uds_get_delta_entry_collision(const struct delta_index_entry *delta_entry, u8 *name) 1485 { 1486 int result; 1487 1488 result = assert_not_at_end(delta_entry); 1489 if (result != UDS_SUCCESS) 1490 return result; 1491 1492 result = VDO_ASSERT(delta_entry->is_collision, 1493 "Cannot get full block name from a non-collision delta index entry"); 1494 if (result != VDO_SUCCESS) 1495 return UDS_BAD_STATE; 1496 1497 get_collision_name(delta_entry, name); 1498 return UDS_SUCCESS; 1499 } 1500 1501 u32 uds_get_delta_entry_value(const struct delta_index_entry *delta_entry) 1502 { 1503 return get_field(delta_entry->delta_zone->memory, 1504 get_delta_entry_offset(delta_entry), delta_entry->value_bits); 1505 } 1506 1507 static int assert_mutable_entry(const struct delta_index_entry *delta_entry) 1508 { 1509 int result = VDO_ASSERT((delta_entry->delta_list != &delta_entry->temp_delta_list), 1510 "delta index is mutable"); 1511 if (result != VDO_SUCCESS) 1512 result = UDS_BAD_STATE; 1513 1514 return result; 1515 } 1516 1517 int uds_set_delta_entry_value(const struct delta_index_entry *delta_entry, u32 value) 1518 { 1519 int result; 1520 u32 value_mask = (1 << delta_entry->value_bits) - 1; 1521 1522 result = assert_mutable_entry(delta_entry); 1523 if (result != UDS_SUCCESS) 1524 return result; 1525 1526 result = assert_not_at_end(delta_entry); 1527 if (result != UDS_SUCCESS) 1528 return result; 1529 1530 result = VDO_ASSERT((value & value_mask) == value, 1531 "Value (%u) being set in a delta index is too large (must fit in %u bits)", 1532 value, delta_entry->value_bits); 1533 if (result != VDO_SUCCESS) 1534 return UDS_INVALID_ARGUMENT; 1535 1536 set_field(value, delta_entry->delta_zone->memory, 1537 get_delta_entry_offset(delta_entry), delta_entry->value_bits); 1538 return UDS_SUCCESS; 1539 } 1540 1541 /* 1542 * Extend the memory used by the delta lists by adding growing_size bytes before the list indicated 1543 * by growing_index, then rebalancing the lists in the new chunk. 1544 */ 1545 static int extend_delta_zone(struct delta_zone *delta_zone, u32 growing_index, 1546 size_t growing_size) 1547 { 1548 ktime_t start_time; 1549 ktime_t end_time; 1550 struct delta_list *delta_lists; 1551 u32 i; 1552 size_t used_space; 1553 1554 1555 /* Calculate the amount of space that is or will be in use. */ 1556 start_time = current_time_ns(CLOCK_MONOTONIC); 1557 delta_lists = delta_zone->delta_lists; 1558 used_space = growing_size; 1559 for (i = 0; i <= delta_zone->list_count + 1; i++) 1560 used_space += get_delta_list_byte_size(&delta_lists[i]); 1561 1562 if (delta_zone->size < used_space) 1563 return UDS_OVERFLOW; 1564 1565 /* Compute the new offsets of the delta lists. */ 1566 compute_new_list_offsets(delta_zone, growing_index, growing_size, used_space); 1567 1568 /* 1569 * When we rebalance the delta list, we will include the end guard list in the rebalancing. 1570 * It contains the end guard data, which must be copied. 1571 */ 1572 rebalance_delta_zone(delta_zone, 1, delta_zone->list_count + 1); 1573 end_time = current_time_ns(CLOCK_MONOTONIC); 1574 delta_zone->rebalance_count++; 1575 delta_zone->rebalance_time += ktime_sub(end_time, start_time); 1576 return UDS_SUCCESS; 1577 } 1578 1579 static int insert_bits(struct delta_index_entry *delta_entry, u16 size) 1580 { 1581 u64 free_before; 1582 u64 free_after; 1583 u64 source; 1584 u64 destination; 1585 u32 count; 1586 bool before_flag; 1587 u8 *memory; 1588 struct delta_zone *delta_zone = delta_entry->delta_zone; 1589 struct delta_list *delta_list = delta_entry->delta_list; 1590 /* Compute bits in use before and after the inserted bits. */ 1591 u32 total_size = delta_list->size; 1592 u32 before_size = delta_entry->offset; 1593 u32 after_size = total_size - delta_entry->offset; 1594 1595 if (total_size + size > U16_MAX) { 1596 delta_entry->list_overflow = true; 1597 delta_zone->overflow_count++; 1598 return UDS_OVERFLOW; 1599 } 1600 1601 /* Compute bits available before and after the delta list. */ 1602 free_before = (delta_list[0].start - (delta_list[-1].start + delta_list[-1].size)); 1603 free_after = (delta_list[1].start - (delta_list[0].start + delta_list[0].size)); 1604 1605 if ((size <= free_before) && (size <= free_after)) { 1606 /* 1607 * We have enough space to use either before or after the list. Select the smaller 1608 * amount of data. If it is exactly the same, try to take from the larger amount of 1609 * free space. 1610 */ 1611 if (before_size < after_size) 1612 before_flag = true; 1613 else if (after_size < before_size) 1614 before_flag = false; 1615 else 1616 before_flag = free_before > free_after; 1617 } else if (size <= free_before) { 1618 /* There is space before but not after. */ 1619 before_flag = true; 1620 } else if (size <= free_after) { 1621 /* There is space after but not before. */ 1622 before_flag = false; 1623 } else { 1624 /* 1625 * Neither of the surrounding spaces is large enough for this request. Extend 1626 * and/or rebalance the delta list memory choosing to move the least amount of 1627 * data. 1628 */ 1629 int result; 1630 u32 growing_index = delta_entry->list_number + 1; 1631 1632 before_flag = before_size < after_size; 1633 if (!before_flag) 1634 growing_index++; 1635 result = extend_delta_zone(delta_zone, growing_index, 1636 BITS_TO_BYTES(size)); 1637 if (result != UDS_SUCCESS) 1638 return result; 1639 } 1640 1641 delta_list->size += size; 1642 if (before_flag) { 1643 source = delta_list->start; 1644 destination = source - size; 1645 delta_list->start -= size; 1646 count = before_size; 1647 } else { 1648 source = delta_list->start + delta_entry->offset; 1649 destination = source + size; 1650 count = after_size; 1651 } 1652 1653 memory = delta_zone->memory; 1654 move_bits(memory, source, memory, destination, count); 1655 return UDS_SUCCESS; 1656 } 1657 1658 static void encode_delta(const struct delta_index_entry *delta_entry) 1659 { 1660 u32 temp; 1661 u32 t1; 1662 u32 t2; 1663 u64 offset; 1664 const struct delta_zone *delta_zone = delta_entry->delta_zone; 1665 u8 *memory = delta_zone->memory; 1666 1667 offset = get_delta_entry_offset(delta_entry) + delta_entry->value_bits; 1668 if (delta_entry->delta < delta_zone->min_keys) { 1669 set_field(delta_entry->delta, memory, offset, delta_zone->min_bits); 1670 return; 1671 } 1672 1673 temp = delta_entry->delta - delta_zone->min_keys; 1674 t1 = (temp % delta_zone->incr_keys) + delta_zone->min_keys; 1675 t2 = temp / delta_zone->incr_keys; 1676 set_field(t1, memory, offset, delta_zone->min_bits); 1677 set_zero(memory, offset + delta_zone->min_bits, t2); 1678 set_field(1, memory, offset + delta_zone->min_bits + t2, 1); 1679 } 1680 1681 static void encode_entry(const struct delta_index_entry *delta_entry, u32 value, 1682 const u8 *name) 1683 { 1684 u8 *memory = delta_entry->delta_zone->memory; 1685 u64 offset = get_delta_entry_offset(delta_entry); 1686 1687 set_field(value, memory, offset, delta_entry->value_bits); 1688 encode_delta(delta_entry); 1689 if (name != NULL) 1690 set_collision_name(delta_entry, name); 1691 } 1692 1693 /* 1694 * Create a new entry in the delta index. If the entry is a collision, the full 256 bit name must 1695 * be provided. 1696 */ 1697 int uds_put_delta_index_entry(struct delta_index_entry *delta_entry, u32 key, u32 value, 1698 const u8 *name) 1699 { 1700 int result; 1701 struct delta_zone *delta_zone; 1702 1703 result = assert_mutable_entry(delta_entry); 1704 if (result != UDS_SUCCESS) 1705 return result; 1706 1707 if (delta_entry->is_collision) { 1708 /* 1709 * The caller wants us to insert a collision entry onto a collision entry. This 1710 * happens when we find a collision and attempt to add the name again to the index. 1711 * This is normally a fatal error unless we are replaying a closed chapter while we 1712 * are rebuilding a volume index. 1713 */ 1714 return UDS_DUPLICATE_NAME; 1715 } 1716 1717 if (delta_entry->offset < delta_entry->delta_list->save_offset) { 1718 /* 1719 * The saved entry offset is after the new entry and will no longer be valid, so 1720 * replace it with the insertion point. 1721 */ 1722 result = uds_remember_delta_index_offset(delta_entry); 1723 if (result != UDS_SUCCESS) 1724 return result; 1725 } 1726 1727 if (name != NULL) { 1728 /* Insert a collision entry which is placed after this entry. */ 1729 result = assert_not_at_end(delta_entry); 1730 if (result != UDS_SUCCESS) 1731 return result; 1732 1733 result = VDO_ASSERT((key == delta_entry->key), 1734 "incorrect key for collision entry"); 1735 if (result != VDO_SUCCESS) 1736 return result; 1737 1738 delta_entry->offset += delta_entry->entry_bits; 1739 set_delta(delta_entry, 0); 1740 delta_entry->is_collision = true; 1741 delta_entry->entry_bits += COLLISION_BITS; 1742 result = insert_bits(delta_entry, delta_entry->entry_bits); 1743 } else if (delta_entry->at_end) { 1744 /* Insert a new entry at the end of the delta list. */ 1745 result = VDO_ASSERT((key >= delta_entry->key), "key past end of list"); 1746 if (result != VDO_SUCCESS) 1747 return result; 1748 1749 set_delta(delta_entry, key - delta_entry->key); 1750 delta_entry->key = key; 1751 delta_entry->at_end = false; 1752 result = insert_bits(delta_entry, delta_entry->entry_bits); 1753 } else { 1754 u16 old_entry_size; 1755 u16 additional_size; 1756 struct delta_index_entry next_entry; 1757 u32 next_value; 1758 1759 /* 1760 * Insert a new entry which requires the delta in the following entry to be 1761 * updated. 1762 */ 1763 result = VDO_ASSERT((key < delta_entry->key), 1764 "key precedes following entry"); 1765 if (result != VDO_SUCCESS) 1766 return result; 1767 1768 result = VDO_ASSERT((key >= delta_entry->key - delta_entry->delta), 1769 "key effects following entry's delta"); 1770 if (result != VDO_SUCCESS) 1771 return result; 1772 1773 old_entry_size = delta_entry->entry_bits; 1774 next_entry = *delta_entry; 1775 next_value = uds_get_delta_entry_value(&next_entry); 1776 set_delta(delta_entry, key - (delta_entry->key - delta_entry->delta)); 1777 delta_entry->key = key; 1778 set_delta(&next_entry, next_entry.key - key); 1779 next_entry.offset += delta_entry->entry_bits; 1780 /* The two new entries are always bigger than the single entry being replaced. */ 1781 additional_size = (delta_entry->entry_bits + 1782 next_entry.entry_bits - old_entry_size); 1783 result = insert_bits(delta_entry, additional_size); 1784 if (result != UDS_SUCCESS) 1785 return result; 1786 1787 encode_entry(&next_entry, next_value, NULL); 1788 } 1789 1790 if (result != UDS_SUCCESS) 1791 return result; 1792 1793 encode_entry(delta_entry, value, name); 1794 delta_zone = delta_entry->delta_zone; 1795 delta_zone->record_count++; 1796 delta_zone->collision_count += delta_entry->is_collision ? 1 : 0; 1797 return UDS_SUCCESS; 1798 } 1799 1800 static void delete_bits(const struct delta_index_entry *delta_entry, int size) 1801 { 1802 u64 source; 1803 u64 destination; 1804 u32 count; 1805 bool before_flag; 1806 struct delta_list *delta_list = delta_entry->delta_list; 1807 u8 *memory = delta_entry->delta_zone->memory; 1808 /* Compute bits retained before and after the deleted bits. */ 1809 u32 total_size = delta_list->size; 1810 u32 before_size = delta_entry->offset; 1811 u32 after_size = total_size - delta_entry->offset - size; 1812 1813 /* 1814 * Determine whether to add to the available space either before or after the delta list. 1815 * We prefer to move the least amount of data. If it is exactly the same, try to add to the 1816 * smaller amount of free space. 1817 */ 1818 if (before_size < after_size) { 1819 before_flag = true; 1820 } else if (after_size < before_size) { 1821 before_flag = false; 1822 } else { 1823 u64 free_before = 1824 (delta_list[0].start - (delta_list[-1].start + delta_list[-1].size)); 1825 u64 free_after = 1826 (delta_list[1].start - (delta_list[0].start + delta_list[0].size)); 1827 1828 before_flag = (free_before < free_after); 1829 } 1830 1831 delta_list->size -= size; 1832 if (before_flag) { 1833 source = delta_list->start; 1834 destination = source + size; 1835 delta_list->start += size; 1836 count = before_size; 1837 } else { 1838 destination = delta_list->start + delta_entry->offset; 1839 source = destination + size; 1840 count = after_size; 1841 } 1842 1843 move_bits(memory, source, memory, destination, count); 1844 } 1845 1846 int uds_remove_delta_index_entry(struct delta_index_entry *delta_entry) 1847 { 1848 int result; 1849 struct delta_index_entry next_entry; 1850 struct delta_zone *delta_zone; 1851 struct delta_list *delta_list; 1852 1853 result = assert_mutable_entry(delta_entry); 1854 if (result != UDS_SUCCESS) 1855 return result; 1856 1857 next_entry = *delta_entry; 1858 result = uds_next_delta_index_entry(&next_entry); 1859 if (result != UDS_SUCCESS) 1860 return result; 1861 1862 delta_zone = delta_entry->delta_zone; 1863 1864 if (delta_entry->is_collision) { 1865 /* This is a collision entry, so just remove it. */ 1866 delete_bits(delta_entry, delta_entry->entry_bits); 1867 next_entry.offset = delta_entry->offset; 1868 delta_zone->collision_count -= 1; 1869 } else if (next_entry.at_end) { 1870 /* This entry is at the end of the list, so just remove it. */ 1871 delete_bits(delta_entry, delta_entry->entry_bits); 1872 next_entry.key -= delta_entry->delta; 1873 next_entry.offset = delta_entry->offset; 1874 } else { 1875 /* The delta in the next entry needs to be updated. */ 1876 u32 next_value = uds_get_delta_entry_value(&next_entry); 1877 u16 old_size = delta_entry->entry_bits + next_entry.entry_bits; 1878 1879 if (next_entry.is_collision) { 1880 next_entry.is_collision = false; 1881 delta_zone->collision_count -= 1; 1882 } 1883 1884 set_delta(&next_entry, delta_entry->delta + next_entry.delta); 1885 next_entry.offset = delta_entry->offset; 1886 /* The one new entry is always smaller than the two entries being replaced. */ 1887 delete_bits(delta_entry, old_size - next_entry.entry_bits); 1888 encode_entry(&next_entry, next_value, NULL); 1889 } 1890 1891 delta_zone->record_count--; 1892 delta_zone->discard_count++; 1893 *delta_entry = next_entry; 1894 1895 delta_list = delta_entry->delta_list; 1896 if (delta_entry->offset < delta_list->save_offset) { 1897 /* The saved entry offset is no longer valid. */ 1898 delta_list->save_key = 0; 1899 delta_list->save_offset = 0; 1900 } 1901 1902 return UDS_SUCCESS; 1903 } 1904 1905 void uds_get_delta_index_stats(const struct delta_index *delta_index, 1906 struct delta_index_stats *stats) 1907 { 1908 unsigned int z; 1909 const struct delta_zone *delta_zone; 1910 1911 memset(stats, 0, sizeof(struct delta_index_stats)); 1912 for (z = 0; z < delta_index->zone_count; z++) { 1913 delta_zone = &delta_index->delta_zones[z]; 1914 stats->rebalance_time += delta_zone->rebalance_time; 1915 stats->rebalance_count += delta_zone->rebalance_count; 1916 stats->record_count += delta_zone->record_count; 1917 stats->collision_count += delta_zone->collision_count; 1918 stats->discard_count += delta_zone->discard_count; 1919 stats->overflow_count += delta_zone->overflow_count; 1920 stats->list_count += delta_zone->list_count; 1921 } 1922 } 1923 1924 size_t uds_compute_delta_index_size(u32 entry_count, u32 mean_delta, u32 payload_bits) 1925 { 1926 u16 min_bits; 1927 u32 incr_keys; 1928 u32 min_keys; 1929 1930 compute_coding_constants(mean_delta, &min_bits, &min_keys, &incr_keys); 1931 /* On average, each delta is encoded into about min_bits + 1.5 bits. */ 1932 return entry_count * (payload_bits + min_bits + 1) + entry_count / 2; 1933 } 1934 1935 u32 uds_get_delta_index_page_count(u32 entry_count, u32 list_count, u32 mean_delta, 1936 u32 payload_bits, size_t bytes_per_page) 1937 { 1938 unsigned int bits_per_delta_list; 1939 unsigned int bits_per_page; 1940 size_t bits_per_index; 1941 1942 /* Compute the expected number of bits needed for all the entries. */ 1943 bits_per_index = uds_compute_delta_index_size(entry_count, mean_delta, 1944 payload_bits); 1945 bits_per_delta_list = bits_per_index / list_count; 1946 1947 /* Add in the immutable delta list headers. */ 1948 bits_per_index += list_count * IMMUTABLE_HEADER_SIZE; 1949 /* Compute the number of usable bits on an immutable index page. */ 1950 bits_per_page = ((bytes_per_page - sizeof(struct delta_page_header)) * BITS_PER_BYTE); 1951 /* 1952 * Reduce the bits per page by one immutable delta list header and one delta list to 1953 * account for internal fragmentation. 1954 */ 1955 bits_per_page -= IMMUTABLE_HEADER_SIZE + bits_per_delta_list; 1956 /* Now compute the number of pages needed. */ 1957 return DIV_ROUND_UP(bits_per_index, bits_per_page); 1958 } 1959 1960 void uds_log_delta_index_entry(struct delta_index_entry *delta_entry) 1961 { 1962 vdo_log_ratelimit(vdo_log_info, 1963 "List 0x%X Key 0x%X Offset 0x%X%s%s List_size 0x%X%s", 1964 delta_entry->list_number, delta_entry->key, 1965 delta_entry->offset, delta_entry->at_end ? " end" : "", 1966 delta_entry->is_collision ? " collision" : "", 1967 delta_entry->delta_list->size, 1968 delta_entry->list_overflow ? " overflow" : ""); 1969 delta_entry->list_overflow = false; 1970 } 1971