1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright 2023 Red Hat 4 */ 5 6 #include "physical-zone.h" 7 8 #include <linux/list.h> 9 10 #include "logger.h" 11 #include "memory-alloc.h" 12 #include "permassert.h" 13 14 #include "block-map.h" 15 #include "completion.h" 16 #include "constants.h" 17 #include "data-vio.h" 18 #include "dedupe.h" 19 #include "encodings.h" 20 #include "flush.h" 21 #include "int-map.h" 22 #include "slab-depot.h" 23 #include "status-codes.h" 24 #include "vdo.h" 25 26 /* Each user data_vio needs a PBN read lock and write lock. */ 27 #define LOCK_POOL_CAPACITY (2 * MAXIMUM_VDO_USER_VIOS) 28 29 struct pbn_lock_implementation { 30 enum pbn_lock_type type; 31 const char *name; 32 const char *release_reason; 33 }; 34 35 /* This array must have an entry for every pbn_lock_type value. */ 36 static const struct pbn_lock_implementation LOCK_IMPLEMENTATIONS[] = { 37 [VIO_READ_LOCK] = { 38 .type = VIO_READ_LOCK, 39 .name = "read", 40 .release_reason = "candidate duplicate", 41 }, 42 [VIO_WRITE_LOCK] = { 43 .type = VIO_WRITE_LOCK, 44 .name = "write", 45 .release_reason = "newly allocated", 46 }, 47 [VIO_BLOCK_MAP_WRITE_LOCK] = { 48 .type = VIO_BLOCK_MAP_WRITE_LOCK, 49 .name = "block map write", 50 .release_reason = "block map write", 51 }, 52 }; 53 54 static inline bool has_lock_type(const struct pbn_lock *lock, enum pbn_lock_type type) 55 { 56 return (lock->implementation == &LOCK_IMPLEMENTATIONS[type]); 57 } 58 59 /** 60 * vdo_is_pbn_read_lock() - Check whether a pbn_lock is a read lock. 61 * @lock: The lock to check. 62 * 63 * Return: True if the lock is a read lock. 64 */ 65 bool vdo_is_pbn_read_lock(const struct pbn_lock *lock) 66 { 67 return has_lock_type(lock, VIO_READ_LOCK); 68 } 69 70 static inline void set_pbn_lock_type(struct pbn_lock *lock, enum pbn_lock_type type) 71 { 72 lock->implementation = &LOCK_IMPLEMENTATIONS[type]; 73 } 74 75 /** 76 * vdo_downgrade_pbn_write_lock() - Downgrade a PBN write lock to a PBN read lock. 77 * @lock: The PBN write lock to downgrade. 78 * @compressed_write: True if the written block was a compressed block. 79 * 80 * The lock holder count is cleared and the caller is responsible for setting the new count. 81 */ 82 void vdo_downgrade_pbn_write_lock(struct pbn_lock *lock, bool compressed_write) 83 { 84 VDO_ASSERT_LOG_ONLY(!vdo_is_pbn_read_lock(lock), 85 "PBN lock must not already have been downgraded"); 86 VDO_ASSERT_LOG_ONLY(!has_lock_type(lock, VIO_BLOCK_MAP_WRITE_LOCK), 87 "must not downgrade block map write locks"); 88 VDO_ASSERT_LOG_ONLY(lock->holder_count == 1, 89 "PBN write lock should have one holder but has %u", 90 lock->holder_count); 91 /* 92 * data_vio write locks are downgraded in place--the writer retains the hold on the lock. 93 * If this was a compressed write, the holder has not yet journaled its own inc ref, 94 * otherwise, it has. 95 */ 96 lock->increment_limit = 97 (compressed_write ? MAXIMUM_REFERENCE_COUNT : MAXIMUM_REFERENCE_COUNT - 1); 98 set_pbn_lock_type(lock, VIO_READ_LOCK); 99 } 100 101 /** 102 * vdo_claim_pbn_lock_increment() - Try to claim one of the available reference count increments on 103 * a read lock. 104 * @lock: The PBN read lock from which to claim an increment. 105 * 106 * Claims may be attempted from any thread. A claim is only valid until the PBN lock is released. 107 * 108 * Return: true if the claim succeeded, guaranteeing one increment can be made without overflowing 109 * the PBN's reference count. 110 */ 111 bool vdo_claim_pbn_lock_increment(struct pbn_lock *lock) 112 { 113 /* 114 * Claim the next free reference atomically since hash locks from multiple hash zone 115 * threads might be concurrently deduplicating against a single PBN lock on compressed 116 * block. As long as hitting the increment limit will lead to the PBN lock being released 117 * in a sane time-frame, we won't overflow a 32-bit claim counter, allowing a simple add 118 * instead of a compare-and-swap. 119 */ 120 u32 claim_number = (u32) atomic_add_return(1, &lock->increments_claimed); 121 122 return (claim_number <= lock->increment_limit); 123 } 124 125 /** 126 * vdo_assign_pbn_lock_provisional_reference() - Inform a PBN lock that it is responsible for a 127 * provisional reference. 128 * @lock: The PBN lock. 129 */ 130 void vdo_assign_pbn_lock_provisional_reference(struct pbn_lock *lock) 131 { 132 VDO_ASSERT_LOG_ONLY(!lock->has_provisional_reference, 133 "lock does not have a provisional reference"); 134 lock->has_provisional_reference = true; 135 } 136 137 /** 138 * vdo_unassign_pbn_lock_provisional_reference() - Inform a PBN lock that it is no longer 139 * responsible for a provisional reference. 140 * @lock: The PBN lock. 141 */ 142 void vdo_unassign_pbn_lock_provisional_reference(struct pbn_lock *lock) 143 { 144 lock->has_provisional_reference = false; 145 } 146 147 /** 148 * release_pbn_lock_provisional_reference() - If the lock is responsible for a provisional 149 * reference, release that reference. 150 * @lock: The lock. 151 * @locked_pbn: The PBN covered by the lock. 152 * @allocator: The block allocator from which to release the reference. 153 * 154 * This method is called when the lock is released. 155 */ 156 static void release_pbn_lock_provisional_reference(struct pbn_lock *lock, 157 physical_block_number_t locked_pbn, 158 struct block_allocator *allocator) 159 { 160 int result; 161 162 if (!vdo_pbn_lock_has_provisional_reference(lock)) 163 return; 164 165 result = vdo_release_block_reference(allocator, locked_pbn); 166 if (result != VDO_SUCCESS) { 167 vdo_log_error_strerror(result, 168 "Failed to release reference to %s physical block %llu", 169 lock->implementation->release_reason, 170 (unsigned long long) locked_pbn); 171 } 172 173 vdo_unassign_pbn_lock_provisional_reference(lock); 174 } 175 176 /** 177 * union idle_pbn_lock - PBN lock list entries. 178 * 179 * Unused (idle) PBN locks are kept in a list. Just like in a malloc implementation, the lock 180 * structure is unused memory, so we can save a bit of space (and not pollute the lock structure 181 * proper) by using a union to overlay the lock structure with the free list. 182 */ 183 typedef union { 184 /** @entry: Only used while locks are in the pool. */ 185 struct list_head entry; 186 /** @lock: Only used while locks are not in the pool. */ 187 struct pbn_lock lock; 188 } idle_pbn_lock; 189 190 /** 191 * struct pbn_lock_pool - list of PBN locks. 192 * 193 * The lock pool is little more than the memory allocated for the locks. 194 */ 195 struct pbn_lock_pool { 196 /** @capacity: The number of locks allocated for the pool. */ 197 size_t capacity; 198 /** @borrowed: The number of locks currently borrowed from the pool. */ 199 size_t borrowed; 200 /** @idle_list: A list containing all idle PBN lock instances. */ 201 struct list_head idle_list; 202 /** @locks: The memory for all the locks allocated by this pool. */ 203 idle_pbn_lock locks[]; 204 }; 205 206 /** 207 * return_pbn_lock_to_pool() - Return a pbn lock to its pool. 208 * @pool: The pool from which the lock was borrowed. 209 * @lock: The last reference to the lock being returned. 210 * 211 * It must be the last live reference, as if the memory were being freed (the lock memory will 212 * re-initialized or zeroed). 213 */ 214 static void return_pbn_lock_to_pool(struct pbn_lock_pool *pool, struct pbn_lock *lock) 215 { 216 idle_pbn_lock *idle; 217 218 /* A bit expensive, but will promptly catch some use-after-free errors. */ 219 memset(lock, 0, sizeof(*lock)); 220 221 idle = container_of(lock, idle_pbn_lock, lock); 222 INIT_LIST_HEAD(&idle->entry); 223 list_add_tail(&idle->entry, &pool->idle_list); 224 225 VDO_ASSERT_LOG_ONLY(pool->borrowed > 0, "shouldn't return more than borrowed"); 226 pool->borrowed -= 1; 227 } 228 229 /** 230 * make_pbn_lock_pool() - Create a new PBN lock pool and all the lock instances it can loan out. 231 * 232 * @capacity: The number of PBN locks to allocate for the pool. 233 * @pool_ptr: A pointer to receive the new pool. 234 * 235 * Return: VDO_SUCCESS or an error code. 236 */ 237 static int make_pbn_lock_pool(size_t capacity, struct pbn_lock_pool **pool_ptr) 238 { 239 size_t i; 240 struct pbn_lock_pool *pool; 241 int result; 242 243 result = vdo_allocate_extended(struct pbn_lock_pool, capacity, idle_pbn_lock, 244 __func__, &pool); 245 if (result != VDO_SUCCESS) 246 return result; 247 248 pool->capacity = capacity; 249 pool->borrowed = capacity; 250 INIT_LIST_HEAD(&pool->idle_list); 251 252 for (i = 0; i < capacity; i++) 253 return_pbn_lock_to_pool(pool, &pool->locks[i].lock); 254 255 *pool_ptr = pool; 256 return VDO_SUCCESS; 257 } 258 259 /** 260 * free_pbn_lock_pool() - Free a PBN lock pool. 261 * @pool: The lock pool to free. 262 * 263 * This also frees all the PBN locks it allocated, so the caller must ensure that all locks have 264 * been returned to the pool. 265 */ 266 static void free_pbn_lock_pool(struct pbn_lock_pool *pool) 267 { 268 if (pool == NULL) 269 return; 270 271 VDO_ASSERT_LOG_ONLY(pool->borrowed == 0, 272 "All PBN locks must be returned to the pool before it is freed, but %zu locks are still on loan", 273 pool->borrowed); 274 vdo_free(pool); 275 } 276 277 /** 278 * borrow_pbn_lock_from_pool() - Borrow a PBN lock from the pool and initialize it with the 279 * provided type. 280 * @pool: The pool from which to borrow. 281 * @type: The type with which to initialize the lock. 282 * @lock_ptr: A pointer to receive the borrowed lock. 283 * 284 * Pools do not grow on demand or allocate memory, so this will fail if the pool is empty. Borrowed 285 * locks are still associated with this pool and must be returned to only this pool. 286 * 287 * Return: VDO_SUCCESS, or VDO_LOCK_ERROR if the pool is empty. 288 */ 289 static int __must_check borrow_pbn_lock_from_pool(struct pbn_lock_pool *pool, 290 enum pbn_lock_type type, 291 struct pbn_lock **lock_ptr) 292 { 293 int result; 294 struct list_head *idle_entry; 295 idle_pbn_lock *idle; 296 297 if (pool->borrowed >= pool->capacity) 298 return vdo_log_error_strerror(VDO_LOCK_ERROR, 299 "no free PBN locks left to borrow"); 300 pool->borrowed += 1; 301 302 result = VDO_ASSERT(!list_empty(&pool->idle_list), 303 "idle list should not be empty if pool not at capacity"); 304 if (result != VDO_SUCCESS) 305 return result; 306 307 idle_entry = pool->idle_list.prev; 308 list_del(idle_entry); 309 memset(idle_entry, 0, sizeof(*idle_entry)); 310 311 idle = list_entry(idle_entry, idle_pbn_lock, entry); 312 idle->lock.holder_count = 0; 313 set_pbn_lock_type(&idle->lock, type); 314 315 *lock_ptr = &idle->lock; 316 return VDO_SUCCESS; 317 } 318 319 /** 320 * initialize_zone() - Initialize a physical zone. 321 * @vdo: The vdo to which the zone will belong. 322 * @zones: The physical_zones to which the zone being initialized belongs 323 * 324 * Return: VDO_SUCCESS or an error code. 325 */ 326 static int initialize_zone(struct vdo *vdo, struct physical_zones *zones) 327 { 328 int result; 329 zone_count_t zone_number = zones->zone_count; 330 struct physical_zone *zone = &zones->zones[zone_number]; 331 332 result = vdo_int_map_create(VDO_LOCK_MAP_CAPACITY, &zone->pbn_operations); 333 if (result != VDO_SUCCESS) 334 return result; 335 336 result = make_pbn_lock_pool(LOCK_POOL_CAPACITY, &zone->lock_pool); 337 if (result != VDO_SUCCESS) { 338 vdo_int_map_free(zone->pbn_operations); 339 return result; 340 } 341 342 zone->zone_number = zone_number; 343 zone->thread_id = vdo->thread_config.physical_threads[zone_number]; 344 zone->allocator = &vdo->depot->allocators[zone_number]; 345 zone->next = &zones->zones[(zone_number + 1) % vdo->thread_config.physical_zone_count]; 346 result = vdo_make_default_thread(vdo, zone->thread_id); 347 if (result != VDO_SUCCESS) { 348 free_pbn_lock_pool(vdo_forget(zone->lock_pool)); 349 vdo_int_map_free(zone->pbn_operations); 350 return result; 351 } 352 return result; 353 } 354 355 /** 356 * vdo_make_physical_zones() - Make the physical zones for a vdo. 357 * @vdo: The vdo being constructed 358 * @zones_ptr: A pointer to hold the zones 359 * 360 * Return: VDO_SUCCESS or an error code. 361 */ 362 int vdo_make_physical_zones(struct vdo *vdo, struct physical_zones **zones_ptr) 363 { 364 struct physical_zones *zones; 365 int result; 366 zone_count_t zone_count = vdo->thread_config.physical_zone_count; 367 368 if (zone_count == 0) 369 return VDO_SUCCESS; 370 371 result = vdo_allocate_extended(struct physical_zones, zone_count, 372 struct physical_zone, __func__, &zones); 373 if (result != VDO_SUCCESS) 374 return result; 375 376 for (zones->zone_count = 0; zones->zone_count < zone_count; zones->zone_count++) { 377 result = initialize_zone(vdo, zones); 378 if (result != VDO_SUCCESS) { 379 vdo_free_physical_zones(zones); 380 return result; 381 } 382 } 383 384 *zones_ptr = zones; 385 return VDO_SUCCESS; 386 } 387 388 /** 389 * vdo_free_physical_zones() - Destroy the physical zones. 390 * @zones: The zones to free. 391 */ 392 void vdo_free_physical_zones(struct physical_zones *zones) 393 { 394 zone_count_t index; 395 396 if (zones == NULL) 397 return; 398 399 for (index = 0; index < zones->zone_count; index++) { 400 struct physical_zone *zone = &zones->zones[index]; 401 402 free_pbn_lock_pool(vdo_forget(zone->lock_pool)); 403 vdo_int_map_free(vdo_forget(zone->pbn_operations)); 404 } 405 406 vdo_free(zones); 407 } 408 409 /** 410 * vdo_get_physical_zone_pbn_lock() - Get the lock on a PBN if one exists. 411 * @zone: The physical zone responsible for the PBN. 412 * @pbn: The physical block number whose lock is desired. 413 * 414 * Return: The lock or NULL if the PBN is not locked. 415 */ 416 struct pbn_lock *vdo_get_physical_zone_pbn_lock(struct physical_zone *zone, 417 physical_block_number_t pbn) 418 { 419 return ((zone == NULL) ? NULL : vdo_int_map_get(zone->pbn_operations, pbn)); 420 } 421 422 /** 423 * vdo_attempt_physical_zone_pbn_lock() - Attempt to lock a physical block in the zone responsible 424 * for it. 425 * @zone: The physical zone responsible for the PBN. 426 * @pbn: The physical block number to lock. 427 * @type: The type with which to initialize a new lock. 428 * @lock_ptr: A pointer to receive the lock, existing or new. 429 * 430 * If the PBN is already locked, the existing lock will be returned. Otherwise, a new lock instance 431 * will be borrowed from the pool, initialized, and returned. The lock owner will be NULL for a new 432 * lock acquired by the caller, who is responsible for setting that field promptly. The lock owner 433 * will be non-NULL when there is already an existing lock on the PBN. 434 * 435 * Return: VDO_SUCCESS or an error. 436 */ 437 int vdo_attempt_physical_zone_pbn_lock(struct physical_zone *zone, 438 physical_block_number_t pbn, 439 enum pbn_lock_type type, 440 struct pbn_lock **lock_ptr) 441 { 442 /* 443 * Borrow and prepare a lock from the pool so we don't have to do two int_map accesses in 444 * the common case of no lock contention. 445 */ 446 struct pbn_lock *lock, *new_lock = NULL; 447 int result; 448 449 result = borrow_pbn_lock_from_pool(zone->lock_pool, type, &new_lock); 450 if (result != VDO_SUCCESS) { 451 VDO_ASSERT_LOG_ONLY(false, "must always be able to borrow a PBN lock"); 452 return result; 453 } 454 455 result = vdo_int_map_put(zone->pbn_operations, pbn, new_lock, false, 456 (void **) &lock); 457 if (result != VDO_SUCCESS) { 458 return_pbn_lock_to_pool(zone->lock_pool, new_lock); 459 return result; 460 } 461 462 if (lock != NULL) { 463 /* The lock is already held, so we don't need the borrowed one. */ 464 return_pbn_lock_to_pool(zone->lock_pool, vdo_forget(new_lock)); 465 result = VDO_ASSERT(lock->holder_count > 0, "physical block %llu lock held", 466 (unsigned long long) pbn); 467 if (result != VDO_SUCCESS) 468 return result; 469 *lock_ptr = lock; 470 } else { 471 *lock_ptr = new_lock; 472 } 473 return VDO_SUCCESS; 474 } 475 476 /** 477 * allocate_and_lock_block() - Attempt to allocate a block from this zone. 478 * @allocation: The struct allocation of the data_vio attempting to allocate. 479 * 480 * If a block is allocated, the recipient will also hold a lock on it. 481 * 482 * Return: VDO_SUCCESS if a block was allocated, or an error code. 483 */ 484 static int allocate_and_lock_block(struct allocation *allocation) 485 { 486 int result; 487 struct pbn_lock *lock; 488 489 VDO_ASSERT_LOG_ONLY(allocation->lock == NULL, 490 "must not allocate a block while already holding a lock on one"); 491 492 result = vdo_allocate_block(allocation->zone->allocator, &allocation->pbn); 493 if (result != VDO_SUCCESS) 494 return result; 495 496 result = vdo_attempt_physical_zone_pbn_lock(allocation->zone, allocation->pbn, 497 allocation->write_lock_type, &lock); 498 if (result != VDO_SUCCESS) 499 return result; 500 501 if (lock->holder_count > 0) { 502 /* This block is already locked, which should be impossible. */ 503 return vdo_log_error_strerror(VDO_LOCK_ERROR, 504 "Newly allocated block %llu was spuriously locked (holder_count=%u)", 505 (unsigned long long) allocation->pbn, 506 lock->holder_count); 507 } 508 509 /* We've successfully acquired a new lock, so mark it as ours. */ 510 lock->holder_count += 1; 511 allocation->lock = lock; 512 vdo_assign_pbn_lock_provisional_reference(lock); 513 return VDO_SUCCESS; 514 } 515 516 /** 517 * retry_allocation() - Retry allocating a block now that we're done waiting for scrubbing. 518 * @waiter: The allocating_vio that was waiting to allocate. 519 * @context: The context (unused). 520 */ 521 static void retry_allocation(struct vdo_waiter *waiter, void __always_unused *context) 522 { 523 struct data_vio *data_vio = vdo_waiter_as_data_vio(waiter); 524 525 /* Now that some slab has scrubbed, restart the allocation process. */ 526 data_vio->allocation.wait_for_clean_slab = false; 527 data_vio->allocation.first_allocation_zone = data_vio->allocation.zone->zone_number; 528 continue_data_vio(data_vio); 529 } 530 531 /** 532 * continue_allocating() - Continue searching for an allocation by enqueuing to wait for scrubbing 533 * or switching to the next zone. 534 * @data_vio: The data_vio attempting to get an allocation. 535 * 536 * This method should only be called from the error handler set in data_vio_allocate_data_block. 537 * 538 * Return: true if the allocation process has continued in another zone. 539 */ 540 static bool continue_allocating(struct data_vio *data_vio) 541 { 542 struct allocation *allocation = &data_vio->allocation; 543 struct physical_zone *zone = allocation->zone; 544 struct vdo_completion *completion = &data_vio->vio.completion; 545 int result = VDO_SUCCESS; 546 bool was_waiting = allocation->wait_for_clean_slab; 547 bool tried_all = (allocation->first_allocation_zone == zone->next->zone_number); 548 549 vdo_reset_completion(completion); 550 551 if (tried_all && !was_waiting) { 552 /* 553 * We've already looked in all the zones, and found nothing. So go through the 554 * zones again, and wait for each to scrub before trying to allocate. 555 */ 556 allocation->wait_for_clean_slab = true; 557 allocation->first_allocation_zone = zone->zone_number; 558 } 559 560 if (allocation->wait_for_clean_slab) { 561 data_vio->waiter.callback = retry_allocation; 562 result = vdo_enqueue_clean_slab_waiter(zone->allocator, 563 &data_vio->waiter); 564 if (result == VDO_SUCCESS) { 565 /* We've enqueued to wait for a slab to be scrubbed. */ 566 return true; 567 } 568 569 if ((result != VDO_NO_SPACE) || (was_waiting && tried_all)) { 570 vdo_set_completion_result(completion, result); 571 return false; 572 } 573 } 574 575 allocation->zone = zone->next; 576 completion->callback_thread_id = allocation->zone->thread_id; 577 vdo_launch_completion(completion); 578 return true; 579 } 580 581 /** 582 * vdo_allocate_block_in_zone() - Attempt to allocate a block in the current physical zone, and if 583 * that fails try the next if possible. 584 * @data_vio: The data_vio needing an allocation. 585 * 586 * Return: True if a block was allocated, if not the data_vio will have been dispatched so the 587 * caller must not touch it. 588 */ 589 bool vdo_allocate_block_in_zone(struct data_vio *data_vio) 590 { 591 int result = allocate_and_lock_block(&data_vio->allocation); 592 593 if (result == VDO_SUCCESS) 594 return true; 595 596 if ((result != VDO_NO_SPACE) || !continue_allocating(data_vio)) 597 continue_data_vio_with_error(data_vio, result); 598 599 return false; 600 } 601 602 /** 603 * vdo_release_physical_zone_pbn_lock() - Release a physical block lock if it is held and return it 604 * to the lock pool. 605 * @zone: The physical zone in which the lock was obtained. 606 * @locked_pbn: The physical block number to unlock. 607 * @lock: The lock being released. 608 * 609 * It must be the last live reference, as if the memory were being freed (the 610 * lock memory will re-initialized or zeroed). 611 */ 612 void vdo_release_physical_zone_pbn_lock(struct physical_zone *zone, 613 physical_block_number_t locked_pbn, 614 struct pbn_lock *lock) 615 { 616 struct pbn_lock *holder; 617 618 if (lock == NULL) 619 return; 620 621 VDO_ASSERT_LOG_ONLY(lock->holder_count > 0, 622 "should not be releasing a lock that is not held"); 623 624 lock->holder_count -= 1; 625 if (lock->holder_count > 0) { 626 /* The lock was shared and is still referenced, so don't release it yet. */ 627 return; 628 } 629 630 holder = vdo_int_map_remove(zone->pbn_operations, locked_pbn); 631 VDO_ASSERT_LOG_ONLY((lock == holder), "physical block lock mismatch for block %llu", 632 (unsigned long long) locked_pbn); 633 634 release_pbn_lock_provisional_reference(lock, locked_pbn, zone->allocator); 635 return_pbn_lock_to_pool(zone->lock_pool, lock); 636 } 637 638 /** 639 * vdo_dump_physical_zone() - Dump information about a physical zone to the log for debugging. 640 * @zone: The zone to dump. 641 */ 642 void vdo_dump_physical_zone(const struct physical_zone *zone) 643 { 644 vdo_dump_block_allocator(zone->allocator); 645 } 646