1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2019 Arrikto, Inc. All Rights Reserved. 4 */ 5 6 #include <linux/mm.h> 7 #include <linux/err.h> 8 #include <linux/slab.h> 9 #include <linux/rwsem.h> 10 #include <linux/bitops.h> 11 #include <linux/bitmap.h> 12 #include <linux/device-mapper.h> 13 14 #include "persistent-data/dm-bitset.h" 15 #include "persistent-data/dm-space-map.h" 16 #include "persistent-data/dm-block-manager.h" 17 #include "persistent-data/dm-transaction-manager.h" 18 19 #include "dm-clone-metadata.h" 20 21 #define DM_MSG_PREFIX "clone metadata" 22 23 #define SUPERBLOCK_LOCATION 0 24 #define SUPERBLOCK_MAGIC 0x8af27f64 25 #define SUPERBLOCK_CSUM_XOR 257649492 26 27 #define DM_CLONE_MAX_CONCURRENT_LOCKS 5 28 29 #define UUID_LEN 16 30 31 /* Min and max dm-clone metadata versions supported */ 32 #define DM_CLONE_MIN_METADATA_VERSION 1 33 #define DM_CLONE_MAX_METADATA_VERSION 1 34 35 /* 36 * On-disk metadata layout 37 */ 38 struct superblock_disk { 39 __le32 csum; 40 __le32 flags; 41 __le64 blocknr; 42 43 __u8 uuid[UUID_LEN]; 44 __le64 magic; 45 __le32 version; 46 47 __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE]; 48 49 __le64 region_size; 50 __le64 target_size; 51 52 __le64 bitset_root; 53 } __packed; 54 55 /* 56 * Region and Dirty bitmaps. 57 * 58 * dm-clone logically splits the source and destination devices in regions of 59 * fixed size. The destination device's regions are gradually hydrated, i.e., 60 * we copy (clone) the source's regions to the destination device. Eventually, 61 * all regions will get hydrated and all I/O will be served from the 62 * destination device. 63 * 64 * We maintain an on-disk bitmap which tracks the state of each of the 65 * destination device's regions, i.e., whether they are hydrated or not. 66 * 67 * To save constantly doing look ups on disk we keep an in core copy of the 68 * on-disk bitmap, the region_map. 69 * 70 * In order to track which regions are hydrated during a metadata transaction, 71 * we use a second set of bitmaps, the dmap (dirty bitmap), which includes two 72 * bitmaps, namely dirty_regions and dirty_words. The dirty_regions bitmap 73 * tracks the regions that got hydrated during the current metadata 74 * transaction. The dirty_words bitmap tracks the dirty words, i.e. longs, of 75 * the dirty_regions bitmap. 76 * 77 * This allows us to precisely track the regions that were hydrated during the 78 * current metadata transaction and update the metadata accordingly, when we 79 * commit the current transaction. This is important because dm-clone should 80 * only commit the metadata of regions that were properly flushed to the 81 * destination device beforehand. Otherwise, in case of a crash, we could end 82 * up with a corrupted dm-clone device. 83 * 84 * When a region finishes hydrating dm-clone calls 85 * dm_clone_set_region_hydrated(), or for discard requests 86 * dm_clone_cond_set_range(), which sets the corresponding bits in region_map 87 * and dmap. 88 * 89 * During a metadata commit we scan dmap->dirty_words and dmap->dirty_regions 90 * and update the on-disk metadata accordingly. Thus, we don't have to flush to 91 * disk the whole region_map. We can just flush the dirty region_map bits. 92 * 93 * We use the helper dmap->dirty_words bitmap, which is smaller than the 94 * original region_map, to reduce the amount of memory accesses during a 95 * metadata commit. Moreover, as dm-bitset also accesses the on-disk bitmap in 96 * 64-bit word granularity, the dirty_words bitmap helps us avoid useless disk 97 * accesses. 98 * 99 * We could update directly the on-disk bitmap, when dm-clone calls either 100 * dm_clone_set_region_hydrated() or dm_clone_cond_set_range(), buts this 101 * inserts significant metadata I/O overhead in dm-clone's I/O path. Also, as 102 * these two functions don't block, we can call them in interrupt context, 103 * e.g., in a hooked overwrite bio's completion routine, and further reduce the 104 * I/O completion latency. 105 * 106 * We maintain two dirty bitmap sets. During a metadata commit we atomically 107 * swap the currently used dmap with the unused one. This allows the metadata 108 * update functions to run concurrently with an ongoing commit. 109 */ 110 struct dirty_map { 111 unsigned long *dirty_words; 112 unsigned long *dirty_regions; 113 unsigned int changed; 114 }; 115 116 struct dm_clone_metadata { 117 /* The metadata block device */ 118 struct block_device *bdev; 119 120 sector_t target_size; 121 sector_t region_size; 122 unsigned long nr_regions; 123 unsigned long nr_words; 124 125 /* Spinlock protecting the region and dirty bitmaps. */ 126 spinlock_t bitmap_lock; 127 struct dirty_map dmap[2]; 128 struct dirty_map *current_dmap; 129 130 /* Protected by lock */ 131 struct dirty_map *committing_dmap; 132 133 /* 134 * In core copy of the on-disk bitmap to save constantly doing look ups 135 * on disk. 136 */ 137 unsigned long *region_map; 138 139 /* Protected by bitmap_lock */ 140 unsigned int read_only; 141 142 struct dm_block_manager *bm; 143 struct dm_space_map *sm; 144 struct dm_transaction_manager *tm; 145 146 struct rw_semaphore lock; 147 148 struct dm_disk_bitset bitset_info; 149 dm_block_t bitset_root; 150 151 /* 152 * Reading the space map root can fail, so we read it into this 153 * buffer before the superblock is locked and updated. 154 */ 155 __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE]; 156 157 bool hydration_done:1; 158 bool fail_io:1; 159 }; 160 161 /*---------------------------------------------------------------------------*/ 162 163 /* 164 * Superblock validation. 165 */ 166 static void sb_prepare_for_write(const struct dm_block_validator *v, 167 struct dm_block *b, size_t sb_block_size) 168 { 169 struct superblock_disk *sb; 170 u32 csum; 171 172 sb = dm_block_data(b); 173 sb->blocknr = cpu_to_le64(dm_block_location(b)); 174 175 csum = dm_bm_checksum(&sb->flags, sb_block_size - sizeof(__le32), 176 SUPERBLOCK_CSUM_XOR); 177 sb->csum = cpu_to_le32(csum); 178 } 179 180 static int sb_check(const struct dm_block_validator *v, struct dm_block *b, 181 size_t sb_block_size) 182 { 183 struct superblock_disk *sb; 184 u32 csum, metadata_version; 185 186 sb = dm_block_data(b); 187 188 if (dm_block_location(b) != le64_to_cpu(sb->blocknr)) { 189 DMERR("Superblock check failed: blocknr %llu, expected %llu", 190 le64_to_cpu(sb->blocknr), 191 (unsigned long long)dm_block_location(b)); 192 return -ENOTBLK; 193 } 194 195 if (le64_to_cpu(sb->magic) != SUPERBLOCK_MAGIC) { 196 DMERR("Superblock check failed: magic %llu, expected %llu", 197 le64_to_cpu(sb->magic), 198 (unsigned long long)SUPERBLOCK_MAGIC); 199 return -EILSEQ; 200 } 201 202 csum = dm_bm_checksum(&sb->flags, sb_block_size - sizeof(__le32), 203 SUPERBLOCK_CSUM_XOR); 204 if (sb->csum != cpu_to_le32(csum)) { 205 DMERR("Superblock check failed: checksum %u, expected %u", 206 csum, le32_to_cpu(sb->csum)); 207 return -EILSEQ; 208 } 209 210 /* Check metadata version */ 211 metadata_version = le32_to_cpu(sb->version); 212 if (metadata_version < DM_CLONE_MIN_METADATA_VERSION || 213 metadata_version > DM_CLONE_MAX_METADATA_VERSION) { 214 DMERR("Clone metadata version %u found, but only versions between %u and %u supported.", 215 metadata_version, DM_CLONE_MIN_METADATA_VERSION, 216 DM_CLONE_MAX_METADATA_VERSION); 217 return -EINVAL; 218 } 219 220 return 0; 221 } 222 223 static const struct dm_block_validator sb_validator = { 224 .name = "superblock", 225 .prepare_for_write = sb_prepare_for_write, 226 .check = sb_check 227 }; 228 229 /* 230 * Check if the superblock is formatted or not. We consider the superblock to 231 * be formatted in case we find non-zero bytes in it. 232 */ 233 static int __superblock_all_zeroes(struct dm_block_manager *bm, bool *formatted) 234 { 235 int r; 236 unsigned int i, nr_words; 237 struct dm_block *sblock; 238 __le64 *data_le, zero = cpu_to_le64(0); 239 240 /* 241 * We don't use a validator here because the superblock could be all 242 * zeroes. 243 */ 244 r = dm_bm_read_lock(bm, SUPERBLOCK_LOCATION, NULL, &sblock); 245 if (r) { 246 DMERR("Failed to read_lock superblock"); 247 return r; 248 } 249 250 data_le = dm_block_data(sblock); 251 *formatted = false; 252 253 /* This assumes that the block size is a multiple of 8 bytes */ 254 BUG_ON(dm_bm_block_size(bm) % sizeof(__le64)); 255 nr_words = dm_bm_block_size(bm) / sizeof(__le64); 256 for (i = 0; i < nr_words; i++) { 257 if (data_le[i] != zero) { 258 *formatted = true; 259 break; 260 } 261 } 262 263 dm_bm_unlock(sblock); 264 265 return 0; 266 } 267 268 /*---------------------------------------------------------------------------*/ 269 270 /* 271 * Low-level metadata handling. 272 */ 273 static inline int superblock_read_lock(struct dm_clone_metadata *cmd, 274 struct dm_block **sblock) 275 { 276 return dm_bm_read_lock(cmd->bm, SUPERBLOCK_LOCATION, &sb_validator, sblock); 277 } 278 279 static inline int superblock_write_lock_zero(struct dm_clone_metadata *cmd, 280 struct dm_block **sblock) 281 { 282 return dm_bm_write_lock_zero(cmd->bm, SUPERBLOCK_LOCATION, &sb_validator, sblock); 283 } 284 285 static int __copy_sm_root(struct dm_clone_metadata *cmd) 286 { 287 int r; 288 size_t root_size; 289 290 r = dm_sm_root_size(cmd->sm, &root_size); 291 if (r) 292 return r; 293 294 return dm_sm_copy_root(cmd->sm, &cmd->metadata_space_map_root, root_size); 295 } 296 297 /* Save dm-clone metadata in superblock */ 298 static void __prepare_superblock(struct dm_clone_metadata *cmd, 299 struct superblock_disk *sb) 300 { 301 sb->flags = cpu_to_le32(0UL); 302 303 /* FIXME: UUID is currently unused */ 304 memset(sb->uuid, 0, sizeof(sb->uuid)); 305 306 sb->magic = cpu_to_le64(SUPERBLOCK_MAGIC); 307 sb->version = cpu_to_le32(DM_CLONE_MAX_METADATA_VERSION); 308 309 /* Save the metadata space_map root */ 310 memcpy(&sb->metadata_space_map_root, &cmd->metadata_space_map_root, 311 sizeof(cmd->metadata_space_map_root)); 312 313 sb->region_size = cpu_to_le64(cmd->region_size); 314 sb->target_size = cpu_to_le64(cmd->target_size); 315 sb->bitset_root = cpu_to_le64(cmd->bitset_root); 316 } 317 318 static int __open_metadata(struct dm_clone_metadata *cmd) 319 { 320 int r; 321 struct dm_block *sblock; 322 struct superblock_disk *sb; 323 324 r = superblock_read_lock(cmd, &sblock); 325 326 if (r) { 327 DMERR("Failed to read_lock superblock"); 328 return r; 329 } 330 331 sb = dm_block_data(sblock); 332 333 /* Verify that target_size and region_size haven't changed. */ 334 if (cmd->region_size != le64_to_cpu(sb->region_size) || 335 cmd->target_size != le64_to_cpu(sb->target_size)) { 336 DMERR("Region and/or target size don't match the ones in metadata"); 337 r = -EINVAL; 338 goto out_with_lock; 339 } 340 341 r = dm_tm_open_with_sm(cmd->bm, SUPERBLOCK_LOCATION, 342 sb->metadata_space_map_root, 343 sizeof(sb->metadata_space_map_root), 344 &cmd->tm, &cmd->sm); 345 346 if (r) { 347 DMERR("dm_tm_open_with_sm failed"); 348 goto out_with_lock; 349 } 350 351 dm_disk_bitset_init(cmd->tm, &cmd->bitset_info); 352 cmd->bitset_root = le64_to_cpu(sb->bitset_root); 353 354 out_with_lock: 355 dm_bm_unlock(sblock); 356 357 return r; 358 } 359 360 static int __format_metadata(struct dm_clone_metadata *cmd) 361 { 362 int r; 363 struct dm_block *sblock; 364 struct superblock_disk *sb; 365 366 r = dm_tm_create_with_sm(cmd->bm, SUPERBLOCK_LOCATION, &cmd->tm, &cmd->sm); 367 if (r) { 368 DMERR("Failed to create transaction manager"); 369 return r; 370 } 371 372 dm_disk_bitset_init(cmd->tm, &cmd->bitset_info); 373 374 r = dm_bitset_empty(&cmd->bitset_info, &cmd->bitset_root); 375 if (r) { 376 DMERR("Failed to create empty on-disk bitset"); 377 goto err_with_tm; 378 } 379 380 r = dm_bitset_resize(&cmd->bitset_info, cmd->bitset_root, 0, 381 cmd->nr_regions, false, &cmd->bitset_root); 382 if (r) { 383 DMERR("Failed to resize on-disk bitset to %lu entries", cmd->nr_regions); 384 goto err_with_tm; 385 } 386 387 /* Flush to disk all blocks, except the superblock */ 388 r = dm_tm_pre_commit(cmd->tm); 389 if (r) { 390 DMERR("dm_tm_pre_commit failed"); 391 goto err_with_tm; 392 } 393 394 r = __copy_sm_root(cmd); 395 if (r) { 396 DMERR("__copy_sm_root failed"); 397 goto err_with_tm; 398 } 399 400 r = superblock_write_lock_zero(cmd, &sblock); 401 if (r) { 402 DMERR("Failed to write_lock superblock"); 403 goto err_with_tm; 404 } 405 406 sb = dm_block_data(sblock); 407 __prepare_superblock(cmd, sb); 408 r = dm_tm_commit(cmd->tm, sblock); 409 if (r) { 410 DMERR("Failed to commit superblock"); 411 goto err_with_tm; 412 } 413 414 return 0; 415 416 err_with_tm: 417 dm_sm_destroy(cmd->sm); 418 dm_tm_destroy(cmd->tm); 419 420 return r; 421 } 422 423 static int __open_or_format_metadata(struct dm_clone_metadata *cmd, bool may_format_device) 424 { 425 int r; 426 bool formatted = false; 427 428 r = __superblock_all_zeroes(cmd->bm, &formatted); 429 if (r) 430 return r; 431 432 if (!formatted) 433 return may_format_device ? __format_metadata(cmd) : -EPERM; 434 435 return __open_metadata(cmd); 436 } 437 438 static int __create_persistent_data_structures(struct dm_clone_metadata *cmd, 439 bool may_format_device) 440 { 441 int r; 442 443 /* Create block manager */ 444 cmd->bm = dm_block_manager_create(cmd->bdev, 445 DM_CLONE_METADATA_BLOCK_SIZE << SECTOR_SHIFT, 446 DM_CLONE_MAX_CONCURRENT_LOCKS); 447 if (IS_ERR(cmd->bm)) { 448 DMERR("Failed to create block manager"); 449 return PTR_ERR(cmd->bm); 450 } 451 452 r = __open_or_format_metadata(cmd, may_format_device); 453 if (r) 454 dm_block_manager_destroy(cmd->bm); 455 456 return r; 457 } 458 459 static void __destroy_persistent_data_structures(struct dm_clone_metadata *cmd) 460 { 461 dm_sm_destroy(cmd->sm); 462 dm_tm_destroy(cmd->tm); 463 dm_block_manager_destroy(cmd->bm); 464 } 465 466 /*---------------------------------------------------------------------------*/ 467 468 static int __dirty_map_init(struct dirty_map *dmap, unsigned long nr_words, 469 unsigned long nr_regions) 470 { 471 dmap->changed = 0; 472 473 dmap->dirty_words = kvzalloc(bitmap_size(nr_words), GFP_KERNEL); 474 if (!dmap->dirty_words) 475 return -ENOMEM; 476 477 dmap->dirty_regions = kvzalloc(bitmap_size(nr_regions), GFP_KERNEL); 478 if (!dmap->dirty_regions) { 479 kvfree(dmap->dirty_words); 480 return -ENOMEM; 481 } 482 483 return 0; 484 } 485 486 static void __dirty_map_exit(struct dirty_map *dmap) 487 { 488 kvfree(dmap->dirty_words); 489 kvfree(dmap->dirty_regions); 490 } 491 492 static int dirty_map_init(struct dm_clone_metadata *cmd) 493 { 494 if (__dirty_map_init(&cmd->dmap[0], cmd->nr_words, cmd->nr_regions)) { 495 DMERR("Failed to allocate dirty bitmap"); 496 return -ENOMEM; 497 } 498 499 if (__dirty_map_init(&cmd->dmap[1], cmd->nr_words, cmd->nr_regions)) { 500 DMERR("Failed to allocate dirty bitmap"); 501 __dirty_map_exit(&cmd->dmap[0]); 502 return -ENOMEM; 503 } 504 505 cmd->current_dmap = &cmd->dmap[0]; 506 cmd->committing_dmap = NULL; 507 508 return 0; 509 } 510 511 static void dirty_map_exit(struct dm_clone_metadata *cmd) 512 { 513 __dirty_map_exit(&cmd->dmap[0]); 514 __dirty_map_exit(&cmd->dmap[1]); 515 } 516 517 static int __load_bitset_in_core(struct dm_clone_metadata *cmd) 518 { 519 int r; 520 unsigned long i; 521 struct dm_bitset_cursor c; 522 523 /* Flush bitset cache */ 524 r = dm_bitset_flush(&cmd->bitset_info, cmd->bitset_root, &cmd->bitset_root); 525 if (r) 526 return r; 527 528 r = dm_bitset_cursor_begin(&cmd->bitset_info, cmd->bitset_root, cmd->nr_regions, &c); 529 if (r) 530 return r; 531 532 for (i = 0; ; i++) { 533 if (dm_bitset_cursor_get_value(&c)) 534 __set_bit(i, cmd->region_map); 535 else 536 __clear_bit(i, cmd->region_map); 537 538 if (i >= (cmd->nr_regions - 1)) 539 break; 540 541 r = dm_bitset_cursor_next(&c); 542 543 if (r) 544 break; 545 } 546 547 dm_bitset_cursor_end(&c); 548 549 return r; 550 } 551 552 struct dm_clone_metadata *dm_clone_metadata_open(struct block_device *bdev, 553 sector_t target_size, 554 sector_t region_size) 555 { 556 int r; 557 struct dm_clone_metadata *cmd; 558 559 cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); 560 if (!cmd) { 561 DMERR("Failed to allocate memory for dm-clone metadata"); 562 return ERR_PTR(-ENOMEM); 563 } 564 565 cmd->bdev = bdev; 566 cmd->target_size = target_size; 567 cmd->region_size = region_size; 568 cmd->nr_regions = dm_sector_div_up(cmd->target_size, cmd->region_size); 569 cmd->nr_words = BITS_TO_LONGS(cmd->nr_regions); 570 571 init_rwsem(&cmd->lock); 572 spin_lock_init(&cmd->bitmap_lock); 573 cmd->read_only = 0; 574 cmd->fail_io = false; 575 cmd->hydration_done = false; 576 577 cmd->region_map = kvmalloc(bitmap_size(cmd->nr_regions), GFP_KERNEL); 578 if (!cmd->region_map) { 579 DMERR("Failed to allocate memory for region bitmap"); 580 r = -ENOMEM; 581 goto out_with_md; 582 } 583 584 r = __create_persistent_data_structures(cmd, true); 585 if (r) 586 goto out_with_region_map; 587 588 r = __load_bitset_in_core(cmd); 589 if (r) { 590 DMERR("Failed to load on-disk region map"); 591 goto out_with_pds; 592 } 593 594 r = dirty_map_init(cmd); 595 if (r) 596 goto out_with_pds; 597 598 if (bitmap_full(cmd->region_map, cmd->nr_regions)) 599 cmd->hydration_done = true; 600 601 return cmd; 602 603 out_with_pds: 604 __destroy_persistent_data_structures(cmd); 605 606 out_with_region_map: 607 kvfree(cmd->region_map); 608 609 out_with_md: 610 kfree(cmd); 611 612 return ERR_PTR(r); 613 } 614 615 void dm_clone_metadata_close(struct dm_clone_metadata *cmd) 616 { 617 if (!cmd->fail_io) 618 __destroy_persistent_data_structures(cmd); 619 620 dirty_map_exit(cmd); 621 kvfree(cmd->region_map); 622 kfree(cmd); 623 } 624 625 bool dm_clone_is_hydration_done(struct dm_clone_metadata *cmd) 626 { 627 return cmd->hydration_done; 628 } 629 630 bool dm_clone_is_region_hydrated(struct dm_clone_metadata *cmd, unsigned long region_nr) 631 { 632 return dm_clone_is_hydration_done(cmd) || test_bit(region_nr, cmd->region_map); 633 } 634 635 bool dm_clone_is_range_hydrated(struct dm_clone_metadata *cmd, 636 unsigned long start, unsigned long nr_regions) 637 { 638 unsigned long bit; 639 640 if (dm_clone_is_hydration_done(cmd)) 641 return true; 642 643 bit = find_next_zero_bit(cmd->region_map, cmd->nr_regions, start); 644 645 return (bit >= (start + nr_regions)); 646 } 647 648 unsigned int dm_clone_nr_of_hydrated_regions(struct dm_clone_metadata *cmd) 649 { 650 return bitmap_weight(cmd->region_map, cmd->nr_regions); 651 } 652 653 unsigned long dm_clone_find_next_unhydrated_region(struct dm_clone_metadata *cmd, 654 unsigned long start) 655 { 656 return find_next_zero_bit(cmd->region_map, cmd->nr_regions, start); 657 } 658 659 static int __update_metadata_word(struct dm_clone_metadata *cmd, 660 unsigned long *dirty_regions, 661 unsigned long word) 662 { 663 int r; 664 unsigned long index = word * BITS_PER_LONG; 665 unsigned long max_index = min(cmd->nr_regions, (word + 1) * BITS_PER_LONG); 666 667 while (index < max_index) { 668 if (test_bit(index, dirty_regions)) { 669 r = dm_bitset_set_bit(&cmd->bitset_info, cmd->bitset_root, 670 index, &cmd->bitset_root); 671 if (r) { 672 DMERR("dm_bitset_set_bit failed"); 673 return r; 674 } 675 __clear_bit(index, dirty_regions); 676 } 677 index++; 678 } 679 680 return 0; 681 } 682 683 static int __metadata_commit(struct dm_clone_metadata *cmd) 684 { 685 int r; 686 struct dm_block *sblock; 687 struct superblock_disk *sb; 688 689 /* Flush bitset cache */ 690 r = dm_bitset_flush(&cmd->bitset_info, cmd->bitset_root, &cmd->bitset_root); 691 if (r) { 692 DMERR("dm_bitset_flush failed"); 693 return r; 694 } 695 696 /* Flush to disk all blocks, except the superblock */ 697 r = dm_tm_pre_commit(cmd->tm); 698 if (r) { 699 DMERR("dm_tm_pre_commit failed"); 700 return r; 701 } 702 703 /* Save the space map root in cmd->metadata_space_map_root */ 704 r = __copy_sm_root(cmd); 705 if (r) { 706 DMERR("__copy_sm_root failed"); 707 return r; 708 } 709 710 /* Lock the superblock */ 711 r = superblock_write_lock_zero(cmd, &sblock); 712 if (r) { 713 DMERR("Failed to write_lock superblock"); 714 return r; 715 } 716 717 /* Save the metadata in superblock */ 718 sb = dm_block_data(sblock); 719 __prepare_superblock(cmd, sb); 720 721 /* Unlock superblock and commit it to disk */ 722 r = dm_tm_commit(cmd->tm, sblock); 723 if (r) { 724 DMERR("Failed to commit superblock"); 725 return r; 726 } 727 728 /* 729 * FIXME: Find a more efficient way to check if the hydration is done. 730 */ 731 if (bitmap_full(cmd->region_map, cmd->nr_regions)) 732 cmd->hydration_done = true; 733 734 return 0; 735 } 736 737 static int __flush_dmap(struct dm_clone_metadata *cmd, struct dirty_map *dmap) 738 { 739 int r; 740 unsigned long word; 741 742 word = 0; 743 do { 744 word = find_next_bit(dmap->dirty_words, cmd->nr_words, word); 745 746 if (word == cmd->nr_words) 747 break; 748 749 r = __update_metadata_word(cmd, dmap->dirty_regions, word); 750 751 if (r) 752 return r; 753 754 __clear_bit(word, dmap->dirty_words); 755 word++; 756 } while (word < cmd->nr_words); 757 758 r = __metadata_commit(cmd); 759 760 if (r) 761 return r; 762 763 /* Update the changed flag */ 764 spin_lock_irq(&cmd->bitmap_lock); 765 dmap->changed = 0; 766 spin_unlock_irq(&cmd->bitmap_lock); 767 768 return 0; 769 } 770 771 int dm_clone_metadata_pre_commit(struct dm_clone_metadata *cmd) 772 { 773 int r = 0; 774 struct dirty_map *dmap, *next_dmap; 775 776 down_write(&cmd->lock); 777 778 if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) { 779 r = -EPERM; 780 goto out; 781 } 782 783 /* Get current dirty bitmap */ 784 dmap = cmd->current_dmap; 785 786 /* Get next dirty bitmap */ 787 next_dmap = (dmap == &cmd->dmap[0]) ? &cmd->dmap[1] : &cmd->dmap[0]; 788 789 /* 790 * The last commit failed, so we don't have a clean dirty-bitmap to 791 * use. 792 */ 793 if (WARN_ON(next_dmap->changed || cmd->committing_dmap)) { 794 r = -EINVAL; 795 goto out; 796 } 797 798 /* Swap dirty bitmaps */ 799 spin_lock_irq(&cmd->bitmap_lock); 800 cmd->current_dmap = next_dmap; 801 spin_unlock_irq(&cmd->bitmap_lock); 802 803 /* Set old dirty bitmap as currently committing */ 804 cmd->committing_dmap = dmap; 805 out: 806 up_write(&cmd->lock); 807 808 return r; 809 } 810 811 int dm_clone_metadata_commit(struct dm_clone_metadata *cmd) 812 { 813 int r = -EPERM; 814 815 down_write(&cmd->lock); 816 817 if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) 818 goto out; 819 820 if (WARN_ON(!cmd->committing_dmap)) { 821 r = -EINVAL; 822 goto out; 823 } 824 825 r = __flush_dmap(cmd, cmd->committing_dmap); 826 if (!r) { 827 /* Clear committing dmap */ 828 cmd->committing_dmap = NULL; 829 } 830 out: 831 up_write(&cmd->lock); 832 833 return r; 834 } 835 836 int dm_clone_set_region_hydrated(struct dm_clone_metadata *cmd, unsigned long region_nr) 837 { 838 int r = 0; 839 struct dirty_map *dmap; 840 unsigned long word, flags; 841 842 if (unlikely(region_nr >= cmd->nr_regions)) { 843 DMERR("Region %lu out of range (total number of regions %lu)", 844 region_nr, cmd->nr_regions); 845 return -ERANGE; 846 } 847 848 word = region_nr / BITS_PER_LONG; 849 850 spin_lock_irqsave(&cmd->bitmap_lock, flags); 851 852 if (cmd->read_only) { 853 r = -EPERM; 854 goto out; 855 } 856 857 dmap = cmd->current_dmap; 858 859 __set_bit(word, dmap->dirty_words); 860 __set_bit(region_nr, dmap->dirty_regions); 861 __set_bit(region_nr, cmd->region_map); 862 dmap->changed = 1; 863 864 out: 865 spin_unlock_irqrestore(&cmd->bitmap_lock, flags); 866 867 return r; 868 } 869 870 int dm_clone_cond_set_range(struct dm_clone_metadata *cmd, unsigned long start, 871 unsigned long nr_regions) 872 { 873 int r = 0; 874 struct dirty_map *dmap; 875 unsigned long word, region_nr; 876 877 if (unlikely(start >= cmd->nr_regions || (start + nr_regions) < start || 878 (start + nr_regions) > cmd->nr_regions)) { 879 DMERR("Invalid region range: start %lu, nr_regions %lu (total number of regions %lu)", 880 start, nr_regions, cmd->nr_regions); 881 return -ERANGE; 882 } 883 884 spin_lock_irq(&cmd->bitmap_lock); 885 886 if (cmd->read_only) { 887 r = -EPERM; 888 goto out; 889 } 890 891 dmap = cmd->current_dmap; 892 for (region_nr = start; region_nr < (start + nr_regions); region_nr++) { 893 if (!test_bit(region_nr, cmd->region_map)) { 894 word = region_nr / BITS_PER_LONG; 895 __set_bit(word, dmap->dirty_words); 896 __set_bit(region_nr, dmap->dirty_regions); 897 __set_bit(region_nr, cmd->region_map); 898 dmap->changed = 1; 899 } 900 } 901 out: 902 spin_unlock_irq(&cmd->bitmap_lock); 903 904 return r; 905 } 906 907 /* 908 * WARNING: This must not be called concurrently with either 909 * dm_clone_set_region_hydrated() or dm_clone_cond_set_range(), as it changes 910 * cmd->region_map without taking the cmd->bitmap_lock spinlock. The only 911 * exception is after setting the metadata to read-only mode, using 912 * dm_clone_metadata_set_read_only(). 913 * 914 * We don't take the spinlock because __load_bitset_in_core() does I/O, so it 915 * may block. 916 */ 917 int dm_clone_reload_in_core_bitset(struct dm_clone_metadata *cmd) 918 { 919 int r = -EINVAL; 920 921 down_write(&cmd->lock); 922 923 if (cmd->fail_io) 924 goto out; 925 926 r = __load_bitset_in_core(cmd); 927 out: 928 up_write(&cmd->lock); 929 930 return r; 931 } 932 933 bool dm_clone_changed_this_transaction(struct dm_clone_metadata *cmd) 934 { 935 bool r; 936 unsigned long flags; 937 938 spin_lock_irqsave(&cmd->bitmap_lock, flags); 939 r = cmd->dmap[0].changed || cmd->dmap[1].changed; 940 spin_unlock_irqrestore(&cmd->bitmap_lock, flags); 941 942 return r; 943 } 944 945 int dm_clone_metadata_abort(struct dm_clone_metadata *cmd) 946 { 947 int r = -EPERM; 948 949 down_write(&cmd->lock); 950 951 if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) 952 goto out; 953 954 __destroy_persistent_data_structures(cmd); 955 956 r = __create_persistent_data_structures(cmd, false); 957 if (r) { 958 /* If something went wrong we can neither write nor read the metadata */ 959 cmd->fail_io = true; 960 } 961 out: 962 up_write(&cmd->lock); 963 964 return r; 965 } 966 967 void dm_clone_metadata_set_read_only(struct dm_clone_metadata *cmd) 968 { 969 down_write(&cmd->lock); 970 971 spin_lock_irq(&cmd->bitmap_lock); 972 cmd->read_only = 1; 973 spin_unlock_irq(&cmd->bitmap_lock); 974 975 if (!cmd->fail_io) 976 dm_bm_set_read_only(cmd->bm); 977 978 up_write(&cmd->lock); 979 } 980 981 void dm_clone_metadata_set_read_write(struct dm_clone_metadata *cmd) 982 { 983 down_write(&cmd->lock); 984 985 spin_lock_irq(&cmd->bitmap_lock); 986 cmd->read_only = 0; 987 spin_unlock_irq(&cmd->bitmap_lock); 988 989 if (!cmd->fail_io) 990 dm_bm_set_read_write(cmd->bm); 991 992 up_write(&cmd->lock); 993 } 994 995 int dm_clone_get_free_metadata_block_count(struct dm_clone_metadata *cmd, 996 dm_block_t *result) 997 { 998 int r = -EINVAL; 999 1000 down_read(&cmd->lock); 1001 1002 if (!cmd->fail_io) 1003 r = dm_sm_get_nr_free(cmd->sm, result); 1004 1005 up_read(&cmd->lock); 1006 1007 return r; 1008 } 1009 1010 int dm_clone_get_metadata_dev_size(struct dm_clone_metadata *cmd, 1011 dm_block_t *result) 1012 { 1013 int r = -EINVAL; 1014 1015 down_read(&cmd->lock); 1016 1017 if (!cmd->fail_io) 1018 r = dm_sm_get_nr_blocks(cmd->sm, result); 1019 1020 up_read(&cmd->lock); 1021 1022 return r; 1023 } 1024