1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2019 Arrikto, Inc. All Rights Reserved. 4 */ 5 6 #include <linux/mm.h> 7 #include <linux/err.h> 8 #include <linux/slab.h> 9 #include <linux/rwsem.h> 10 #include <linux/bitops.h> 11 #include <linux/bitmap.h> 12 #include <linux/device-mapper.h> 13 14 #include "persistent-data/dm-bitset.h" 15 #include "persistent-data/dm-space-map.h" 16 #include "persistent-data/dm-block-manager.h" 17 #include "persistent-data/dm-transaction-manager.h" 18 19 #include "dm-clone-metadata.h" 20 21 #define DM_MSG_PREFIX "clone metadata" 22 23 #define SUPERBLOCK_LOCATION 0 24 #define SUPERBLOCK_MAGIC 0x8af27f64 25 #define SUPERBLOCK_CSUM_XOR 257649492 26 27 #define DM_CLONE_MAX_CONCURRENT_LOCKS 5 28 29 #define UUID_LEN 16 30 31 /* Min and max dm-clone metadata versions supported */ 32 #define DM_CLONE_MIN_METADATA_VERSION 1 33 #define DM_CLONE_MAX_METADATA_VERSION 1 34 35 /* 36 * On-disk metadata layout 37 */ 38 struct superblock_disk { 39 __le32 csum; 40 __le32 flags; 41 __le64 blocknr; 42 43 __u8 uuid[UUID_LEN]; 44 __le64 magic; 45 __le32 version; 46 47 __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE]; 48 49 __le64 region_size; 50 __le64 target_size; 51 52 __le64 bitset_root; 53 } __packed; 54 55 /* 56 * Region and Dirty bitmaps. 57 * 58 * dm-clone logically splits the source and destination devices in regions of 59 * fixed size. The destination device's regions are gradually hydrated, i.e., 60 * we copy (clone) the source's regions to the destination device. Eventually, 61 * all regions will get hydrated and all I/O will be served from the 62 * destination device. 63 * 64 * We maintain an on-disk bitmap which tracks the state of each of the 65 * destination device's regions, i.e., whether they are hydrated or not. 66 * 67 * To save constantly doing look ups on disk we keep an in core copy of the 68 * on-disk bitmap, the region_map. 69 * 70 * In order to track which regions are hydrated during a metadata transaction, 71 * we use a second set of bitmaps, the dmap (dirty bitmap), which includes two 72 * bitmaps, namely dirty_regions and dirty_words. The dirty_regions bitmap 73 * tracks the regions that got hydrated during the current metadata 74 * transaction. The dirty_words bitmap tracks the dirty words, i.e. longs, of 75 * the dirty_regions bitmap. 76 * 77 * This allows us to precisely track the regions that were hydrated during the 78 * current metadata transaction and update the metadata accordingly, when we 79 * commit the current transaction. This is important because dm-clone should 80 * only commit the metadata of regions that were properly flushed to the 81 * destination device beforehand. Otherwise, in case of a crash, we could end 82 * up with a corrupted dm-clone device. 83 * 84 * When a region finishes hydrating dm-clone calls 85 * dm_clone_set_region_hydrated(), or for discard requests 86 * dm_clone_cond_set_range(), which sets the corresponding bits in region_map 87 * and dmap. 88 * 89 * During a metadata commit we scan dmap->dirty_words and dmap->dirty_regions 90 * and update the on-disk metadata accordingly. Thus, we don't have to flush to 91 * disk the whole region_map. We can just flush the dirty region_map bits. 92 * 93 * We use the helper dmap->dirty_words bitmap, which is smaller than the 94 * original region_map, to reduce the amount of memory accesses during a 95 * metadata commit. Moreover, as dm-bitset also accesses the on-disk bitmap in 96 * 64-bit word granularity, the dirty_words bitmap helps us avoid useless disk 97 * accesses. 98 * 99 * We could update directly the on-disk bitmap, when dm-clone calls either 100 * dm_clone_set_region_hydrated() or dm_clone_cond_set_range(), buts this 101 * inserts significant metadata I/O overhead in dm-clone's I/O path. Also, as 102 * these two functions don't block, we can call them in interrupt context, 103 * e.g., in a hooked overwrite bio's completion routine, and further reduce the 104 * I/O completion latency. 105 * 106 * We maintain two dirty bitmap sets. During a metadata commit we atomically 107 * swap the currently used dmap with the unused one. This allows the metadata 108 * update functions to run concurrently with an ongoing commit. 109 */ 110 struct dirty_map { 111 unsigned long *dirty_words; 112 unsigned long *dirty_regions; 113 unsigned int changed; 114 }; 115 116 struct dm_clone_metadata { 117 /* The metadata block device */ 118 struct block_device *bdev; 119 120 sector_t target_size; 121 sector_t region_size; 122 unsigned long nr_regions; 123 unsigned long nr_words; 124 125 /* Spinlock protecting the region and dirty bitmaps. */ 126 spinlock_t bitmap_lock; 127 struct dirty_map dmap[2]; 128 struct dirty_map *current_dmap; 129 130 /* Protected by lock */ 131 struct dirty_map *committing_dmap; 132 133 /* 134 * In core copy of the on-disk bitmap to save constantly doing look ups 135 * on disk. 136 */ 137 unsigned long *region_map; 138 139 /* Protected by bitmap_lock */ 140 unsigned int read_only; 141 142 struct dm_block_manager *bm; 143 struct dm_space_map *sm; 144 struct dm_transaction_manager *tm; 145 146 struct rw_semaphore lock; 147 148 struct dm_disk_bitset bitset_info; 149 dm_block_t bitset_root; 150 151 /* 152 * Reading the space map root can fail, so we read it into this 153 * buffer before the superblock is locked and updated. 154 */ 155 __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE]; 156 157 bool hydration_done:1; 158 bool fail_io:1; 159 }; 160 161 /*---------------------------------------------------------------------------*/ 162 163 /* 164 * Superblock validation. 165 */ 166 static void sb_prepare_for_write(const struct dm_block_validator *v, 167 struct dm_block *b, size_t sb_block_size) 168 { 169 struct superblock_disk *sb; 170 u32 csum; 171 172 sb = dm_block_data(b); 173 sb->blocknr = cpu_to_le64(dm_block_location(b)); 174 175 csum = dm_bm_checksum(&sb->flags, sb_block_size - sizeof(__le32), 176 SUPERBLOCK_CSUM_XOR); 177 sb->csum = cpu_to_le32(csum); 178 } 179 180 static int sb_check(const struct dm_block_validator *v, struct dm_block *b, 181 size_t sb_block_size) 182 { 183 struct superblock_disk *sb; 184 u32 csum, metadata_version; 185 186 sb = dm_block_data(b); 187 188 if (dm_block_location(b) != le64_to_cpu(sb->blocknr)) { 189 DMERR("Superblock check failed: blocknr %llu, expected %llu", 190 le64_to_cpu(sb->blocknr), 191 (unsigned long long)dm_block_location(b)); 192 return -ENOTBLK; 193 } 194 195 if (le64_to_cpu(sb->magic) != SUPERBLOCK_MAGIC) { 196 DMERR("Superblock check failed: magic %llu, expected %llu", 197 le64_to_cpu(sb->magic), 198 (unsigned long long)SUPERBLOCK_MAGIC); 199 return -EILSEQ; 200 } 201 202 csum = dm_bm_checksum(&sb->flags, sb_block_size - sizeof(__le32), 203 SUPERBLOCK_CSUM_XOR); 204 if (sb->csum != cpu_to_le32(csum)) { 205 DMERR("Superblock check failed: checksum %u, expected %u", 206 csum, le32_to_cpu(sb->csum)); 207 return -EILSEQ; 208 } 209 210 /* Check metadata version */ 211 metadata_version = le32_to_cpu(sb->version); 212 if (metadata_version < DM_CLONE_MIN_METADATA_VERSION || 213 metadata_version > DM_CLONE_MAX_METADATA_VERSION) { 214 DMERR("Clone metadata version %u found, but only versions between %u and %u supported.", 215 metadata_version, DM_CLONE_MIN_METADATA_VERSION, 216 DM_CLONE_MAX_METADATA_VERSION); 217 return -EINVAL; 218 } 219 220 return 0; 221 } 222 223 static const struct dm_block_validator sb_validator = { 224 .name = "superblock", 225 .prepare_for_write = sb_prepare_for_write, 226 .check = sb_check 227 }; 228 229 /* 230 * Check if the superblock is formatted or not. We consider the superblock to 231 * be formatted in case we find non-zero bytes in it. 232 */ 233 static int __superblock_all_zeroes(struct dm_block_manager *bm, bool *formatted) 234 { 235 int r; 236 unsigned int i, nr_words; 237 struct dm_block *sblock; 238 __le64 *data_le, zero = cpu_to_le64(0); 239 240 /* 241 * We don't use a validator here because the superblock could be all 242 * zeroes. 243 */ 244 r = dm_bm_read_lock(bm, SUPERBLOCK_LOCATION, NULL, &sblock); 245 if (r) { 246 DMERR("Failed to read_lock superblock"); 247 return r; 248 } 249 250 data_le = dm_block_data(sblock); 251 *formatted = false; 252 253 /* This assumes that the block size is a multiple of 8 bytes */ 254 BUG_ON(dm_bm_block_size(bm) % sizeof(__le64)); 255 nr_words = dm_bm_block_size(bm) / sizeof(__le64); 256 for (i = 0; i < nr_words; i++) { 257 if (data_le[i] != zero) { 258 *formatted = true; 259 break; 260 } 261 } 262 263 dm_bm_unlock(sblock); 264 265 return 0; 266 } 267 268 /*---------------------------------------------------------------------------*/ 269 270 /* 271 * Low-level metadata handling. 272 */ 273 static inline int superblock_read_lock(struct dm_clone_metadata *cmd, 274 struct dm_block **sblock) 275 { 276 return dm_bm_read_lock(cmd->bm, SUPERBLOCK_LOCATION, &sb_validator, sblock); 277 } 278 279 static inline int superblock_write_lock_zero(struct dm_clone_metadata *cmd, 280 struct dm_block **sblock) 281 { 282 return dm_bm_write_lock_zero(cmd->bm, SUPERBLOCK_LOCATION, &sb_validator, sblock); 283 } 284 285 static int __copy_sm_root(struct dm_clone_metadata *cmd) 286 { 287 int r; 288 size_t root_size; 289 290 r = dm_sm_root_size(cmd->sm, &root_size); 291 if (r) 292 return r; 293 294 return dm_sm_copy_root(cmd->sm, &cmd->metadata_space_map_root, root_size); 295 } 296 297 /* Save dm-clone metadata in superblock */ 298 static void __prepare_superblock(struct dm_clone_metadata *cmd, 299 struct superblock_disk *sb) 300 { 301 sb->flags = cpu_to_le32(0UL); 302 303 /* FIXME: UUID is currently unused */ 304 memset(sb->uuid, 0, sizeof(sb->uuid)); 305 306 sb->magic = cpu_to_le64(SUPERBLOCK_MAGIC); 307 sb->version = cpu_to_le32(DM_CLONE_MAX_METADATA_VERSION); 308 309 /* Save the metadata space_map root */ 310 memcpy(&sb->metadata_space_map_root, &cmd->metadata_space_map_root, 311 sizeof(cmd->metadata_space_map_root)); 312 313 sb->region_size = cpu_to_le64(cmd->region_size); 314 sb->target_size = cpu_to_le64(cmd->target_size); 315 sb->bitset_root = cpu_to_le64(cmd->bitset_root); 316 } 317 318 static int __open_metadata(struct dm_clone_metadata *cmd) 319 { 320 int r; 321 struct dm_block *sblock; 322 struct superblock_disk *sb; 323 324 r = superblock_read_lock(cmd, &sblock); 325 326 if (r) { 327 DMERR("Failed to read_lock superblock"); 328 return r; 329 } 330 331 sb = dm_block_data(sblock); 332 333 /* Verify that target_size and region_size haven't changed. */ 334 if (cmd->region_size != le64_to_cpu(sb->region_size) || 335 cmd->target_size != le64_to_cpu(sb->target_size)) { 336 DMERR("Region and/or target size don't match the ones in metadata"); 337 r = -EINVAL; 338 goto out_with_lock; 339 } 340 341 r = dm_tm_open_with_sm(cmd->bm, SUPERBLOCK_LOCATION, 342 sb->metadata_space_map_root, 343 sizeof(sb->metadata_space_map_root), 344 &cmd->tm, &cmd->sm); 345 346 if (r) { 347 DMERR("dm_tm_open_with_sm failed"); 348 goto out_with_lock; 349 } 350 351 dm_disk_bitset_init(cmd->tm, &cmd->bitset_info); 352 cmd->bitset_root = le64_to_cpu(sb->bitset_root); 353 354 out_with_lock: 355 dm_bm_unlock(sblock); 356 357 return r; 358 } 359 360 static int __format_metadata(struct dm_clone_metadata *cmd) 361 { 362 int r; 363 struct dm_block *sblock; 364 struct superblock_disk *sb; 365 366 r = dm_tm_create_with_sm(cmd->bm, SUPERBLOCK_LOCATION, &cmd->tm, &cmd->sm); 367 if (r) { 368 DMERR("Failed to create transaction manager"); 369 return r; 370 } 371 372 dm_disk_bitset_init(cmd->tm, &cmd->bitset_info); 373 374 r = dm_bitset_empty(&cmd->bitset_info, &cmd->bitset_root); 375 if (r) { 376 DMERR("Failed to create empty on-disk bitset"); 377 goto err_with_tm; 378 } 379 380 r = dm_bitset_resize(&cmd->bitset_info, cmd->bitset_root, 0, 381 cmd->nr_regions, false, &cmd->bitset_root); 382 if (r) { 383 DMERR("Failed to resize on-disk bitset to %lu entries", cmd->nr_regions); 384 goto err_with_tm; 385 } 386 387 /* Flush to disk all blocks, except the superblock */ 388 r = dm_tm_pre_commit(cmd->tm); 389 if (r) { 390 DMERR("dm_tm_pre_commit failed"); 391 goto err_with_tm; 392 } 393 394 r = __copy_sm_root(cmd); 395 if (r) { 396 DMERR("__copy_sm_root failed"); 397 goto err_with_tm; 398 } 399 400 r = superblock_write_lock_zero(cmd, &sblock); 401 if (r) { 402 DMERR("Failed to write_lock superblock"); 403 goto err_with_tm; 404 } 405 406 sb = dm_block_data(sblock); 407 __prepare_superblock(cmd, sb); 408 r = dm_tm_commit(cmd->tm, sblock); 409 if (r) { 410 DMERR("Failed to commit superblock"); 411 goto err_with_tm; 412 } 413 414 return 0; 415 416 err_with_tm: 417 dm_sm_destroy(cmd->sm); 418 dm_tm_destroy(cmd->tm); 419 420 return r; 421 } 422 423 static int __open_or_format_metadata(struct dm_clone_metadata *cmd, bool may_format_device) 424 { 425 int r; 426 bool formatted = false; 427 428 r = __superblock_all_zeroes(cmd->bm, &formatted); 429 if (r) 430 return r; 431 432 if (!formatted) 433 return may_format_device ? __format_metadata(cmd) : -EPERM; 434 435 return __open_metadata(cmd); 436 } 437 438 static int __create_persistent_data_structures(struct dm_clone_metadata *cmd, 439 bool may_format_device) 440 { 441 int r; 442 443 /* Create block manager */ 444 cmd->bm = dm_block_manager_create(cmd->bdev, 445 DM_CLONE_METADATA_BLOCK_SIZE << SECTOR_SHIFT, 446 DM_CLONE_MAX_CONCURRENT_LOCKS); 447 if (IS_ERR(cmd->bm)) { 448 DMERR("Failed to create block manager"); 449 return PTR_ERR(cmd->bm); 450 } 451 452 r = __open_or_format_metadata(cmd, may_format_device); 453 if (r) 454 dm_block_manager_destroy(cmd->bm); 455 456 return r; 457 } 458 459 static void __destroy_persistent_data_structures(struct dm_clone_metadata *cmd) 460 { 461 dm_sm_destroy(cmd->sm); 462 dm_tm_destroy(cmd->tm); 463 dm_block_manager_destroy(cmd->bm); 464 } 465 466 /*---------------------------------------------------------------------------*/ 467 468 static int __dirty_map_init(struct dirty_map *dmap, unsigned long nr_words, 469 unsigned long nr_regions) 470 { 471 dmap->changed = 0; 472 473 dmap->dirty_words = kvzalloc(bitmap_size(nr_words), GFP_KERNEL); 474 if (!dmap->dirty_words) 475 return -ENOMEM; 476 477 dmap->dirty_regions = kvzalloc(bitmap_size(nr_regions), GFP_KERNEL); 478 if (!dmap->dirty_regions) { 479 kvfree(dmap->dirty_words); 480 return -ENOMEM; 481 } 482 483 return 0; 484 } 485 486 static void __dirty_map_exit(struct dirty_map *dmap) 487 { 488 kvfree(dmap->dirty_words); 489 kvfree(dmap->dirty_regions); 490 } 491 492 static int dirty_map_init(struct dm_clone_metadata *cmd) 493 { 494 if (__dirty_map_init(&cmd->dmap[0], cmd->nr_words, cmd->nr_regions)) { 495 DMERR("Failed to allocate dirty bitmap"); 496 return -ENOMEM; 497 } 498 499 if (__dirty_map_init(&cmd->dmap[1], cmd->nr_words, cmd->nr_regions)) { 500 DMERR("Failed to allocate dirty bitmap"); 501 __dirty_map_exit(&cmd->dmap[0]); 502 return -ENOMEM; 503 } 504 505 cmd->current_dmap = &cmd->dmap[0]; 506 cmd->committing_dmap = NULL; 507 508 return 0; 509 } 510 511 static void dirty_map_exit(struct dm_clone_metadata *cmd) 512 { 513 __dirty_map_exit(&cmd->dmap[0]); 514 __dirty_map_exit(&cmd->dmap[1]); 515 } 516 517 static int __load_bitset_in_core(struct dm_clone_metadata *cmd) 518 { 519 int r; 520 unsigned long i; 521 struct dm_bitset_cursor c; 522 523 /* Flush bitset cache */ 524 r = dm_bitset_flush(&cmd->bitset_info, cmd->bitset_root, &cmd->bitset_root); 525 if (r) 526 return r; 527 528 r = dm_bitset_cursor_begin(&cmd->bitset_info, cmd->bitset_root, cmd->nr_regions, &c); 529 if (r) 530 return r; 531 532 for (i = 0; ; i++) { 533 __assign_bit(i, cmd->region_map, dm_bitset_cursor_get_value(&c)); 534 535 if (i >= (cmd->nr_regions - 1)) 536 break; 537 538 r = dm_bitset_cursor_next(&c); 539 540 if (r) 541 break; 542 } 543 544 dm_bitset_cursor_end(&c); 545 546 return r; 547 } 548 549 struct dm_clone_metadata *dm_clone_metadata_open(struct block_device *bdev, 550 sector_t target_size, 551 sector_t region_size) 552 { 553 int r; 554 struct dm_clone_metadata *cmd; 555 556 cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); 557 if (!cmd) { 558 DMERR("Failed to allocate memory for dm-clone metadata"); 559 return ERR_PTR(-ENOMEM); 560 } 561 562 cmd->bdev = bdev; 563 cmd->target_size = target_size; 564 cmd->region_size = region_size; 565 cmd->nr_regions = dm_sector_div_up(cmd->target_size, cmd->region_size); 566 cmd->nr_words = BITS_TO_LONGS(cmd->nr_regions); 567 568 init_rwsem(&cmd->lock); 569 spin_lock_init(&cmd->bitmap_lock); 570 cmd->read_only = 0; 571 cmd->fail_io = false; 572 cmd->hydration_done = false; 573 574 cmd->region_map = kvmalloc(bitmap_size(cmd->nr_regions), GFP_KERNEL); 575 if (!cmd->region_map) { 576 DMERR("Failed to allocate memory for region bitmap"); 577 r = -ENOMEM; 578 goto out_with_md; 579 } 580 581 r = __create_persistent_data_structures(cmd, true); 582 if (r) 583 goto out_with_region_map; 584 585 r = __load_bitset_in_core(cmd); 586 if (r) { 587 DMERR("Failed to load on-disk region map"); 588 goto out_with_pds; 589 } 590 591 r = dirty_map_init(cmd); 592 if (r) 593 goto out_with_pds; 594 595 if (bitmap_full(cmd->region_map, cmd->nr_regions)) 596 cmd->hydration_done = true; 597 598 return cmd; 599 600 out_with_pds: 601 __destroy_persistent_data_structures(cmd); 602 603 out_with_region_map: 604 kvfree(cmd->region_map); 605 606 out_with_md: 607 kfree(cmd); 608 609 return ERR_PTR(r); 610 } 611 612 void dm_clone_metadata_close(struct dm_clone_metadata *cmd) 613 { 614 if (!cmd->fail_io) 615 __destroy_persistent_data_structures(cmd); 616 617 dirty_map_exit(cmd); 618 kvfree(cmd->region_map); 619 kfree(cmd); 620 } 621 622 bool dm_clone_is_hydration_done(struct dm_clone_metadata *cmd) 623 { 624 return cmd->hydration_done; 625 } 626 627 bool dm_clone_is_region_hydrated(struct dm_clone_metadata *cmd, unsigned long region_nr) 628 { 629 return dm_clone_is_hydration_done(cmd) || test_bit(region_nr, cmd->region_map); 630 } 631 632 bool dm_clone_is_range_hydrated(struct dm_clone_metadata *cmd, 633 unsigned long start, unsigned long nr_regions) 634 { 635 unsigned long bit; 636 637 if (dm_clone_is_hydration_done(cmd)) 638 return true; 639 640 bit = find_next_zero_bit(cmd->region_map, cmd->nr_regions, start); 641 642 return (bit >= (start + nr_regions)); 643 } 644 645 unsigned int dm_clone_nr_of_hydrated_regions(struct dm_clone_metadata *cmd) 646 { 647 return bitmap_weight(cmd->region_map, cmd->nr_regions); 648 } 649 650 unsigned long dm_clone_find_next_unhydrated_region(struct dm_clone_metadata *cmd, 651 unsigned long start) 652 { 653 return find_next_zero_bit(cmd->region_map, cmd->nr_regions, start); 654 } 655 656 static int __update_metadata_word(struct dm_clone_metadata *cmd, 657 unsigned long *dirty_regions, 658 unsigned long word) 659 { 660 int r; 661 unsigned long index = word * BITS_PER_LONG; 662 unsigned long max_index = min(cmd->nr_regions, (word + 1) * BITS_PER_LONG); 663 664 while (index < max_index) { 665 if (test_bit(index, dirty_regions)) { 666 r = dm_bitset_set_bit(&cmd->bitset_info, cmd->bitset_root, 667 index, &cmd->bitset_root); 668 if (r) { 669 DMERR("dm_bitset_set_bit failed"); 670 return r; 671 } 672 __clear_bit(index, dirty_regions); 673 } 674 index++; 675 } 676 677 return 0; 678 } 679 680 static int __metadata_commit(struct dm_clone_metadata *cmd) 681 { 682 int r; 683 struct dm_block *sblock; 684 struct superblock_disk *sb; 685 686 /* Flush bitset cache */ 687 r = dm_bitset_flush(&cmd->bitset_info, cmd->bitset_root, &cmd->bitset_root); 688 if (r) { 689 DMERR("dm_bitset_flush failed"); 690 return r; 691 } 692 693 /* Flush to disk all blocks, except the superblock */ 694 r = dm_tm_pre_commit(cmd->tm); 695 if (r) { 696 DMERR("dm_tm_pre_commit failed"); 697 return r; 698 } 699 700 /* Save the space map root in cmd->metadata_space_map_root */ 701 r = __copy_sm_root(cmd); 702 if (r) { 703 DMERR("__copy_sm_root failed"); 704 return r; 705 } 706 707 /* Lock the superblock */ 708 r = superblock_write_lock_zero(cmd, &sblock); 709 if (r) { 710 DMERR("Failed to write_lock superblock"); 711 return r; 712 } 713 714 /* Save the metadata in superblock */ 715 sb = dm_block_data(sblock); 716 __prepare_superblock(cmd, sb); 717 718 /* Unlock superblock and commit it to disk */ 719 r = dm_tm_commit(cmd->tm, sblock); 720 if (r) { 721 DMERR("Failed to commit superblock"); 722 return r; 723 } 724 725 /* 726 * FIXME: Find a more efficient way to check if the hydration is done. 727 */ 728 if (bitmap_full(cmd->region_map, cmd->nr_regions)) 729 cmd->hydration_done = true; 730 731 return 0; 732 } 733 734 static int __flush_dmap(struct dm_clone_metadata *cmd, struct dirty_map *dmap) 735 { 736 int r; 737 unsigned long word; 738 739 word = 0; 740 do { 741 word = find_next_bit(dmap->dirty_words, cmd->nr_words, word); 742 743 if (word == cmd->nr_words) 744 break; 745 746 r = __update_metadata_word(cmd, dmap->dirty_regions, word); 747 748 if (r) 749 return r; 750 751 __clear_bit(word, dmap->dirty_words); 752 word++; 753 } while (word < cmd->nr_words); 754 755 r = __metadata_commit(cmd); 756 757 if (r) 758 return r; 759 760 /* Update the changed flag */ 761 spin_lock_irq(&cmd->bitmap_lock); 762 dmap->changed = 0; 763 spin_unlock_irq(&cmd->bitmap_lock); 764 765 return 0; 766 } 767 768 int dm_clone_metadata_pre_commit(struct dm_clone_metadata *cmd) 769 { 770 int r = 0; 771 struct dirty_map *dmap, *next_dmap; 772 773 down_write(&cmd->lock); 774 775 if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) { 776 r = -EPERM; 777 goto out; 778 } 779 780 /* Get current dirty bitmap */ 781 dmap = cmd->current_dmap; 782 783 /* Get next dirty bitmap */ 784 next_dmap = (dmap == &cmd->dmap[0]) ? &cmd->dmap[1] : &cmd->dmap[0]; 785 786 /* 787 * The last commit failed, so we don't have a clean dirty-bitmap to 788 * use. 789 */ 790 if (WARN_ON(next_dmap->changed || cmd->committing_dmap)) { 791 r = -EINVAL; 792 goto out; 793 } 794 795 /* Swap dirty bitmaps */ 796 spin_lock_irq(&cmd->bitmap_lock); 797 cmd->current_dmap = next_dmap; 798 spin_unlock_irq(&cmd->bitmap_lock); 799 800 /* Set old dirty bitmap as currently committing */ 801 cmd->committing_dmap = dmap; 802 out: 803 up_write(&cmd->lock); 804 805 return r; 806 } 807 808 int dm_clone_metadata_commit(struct dm_clone_metadata *cmd) 809 { 810 int r = -EPERM; 811 812 down_write(&cmd->lock); 813 814 if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) 815 goto out; 816 817 if (WARN_ON(!cmd->committing_dmap)) { 818 r = -EINVAL; 819 goto out; 820 } 821 822 r = __flush_dmap(cmd, cmd->committing_dmap); 823 if (!r) { 824 /* Clear committing dmap */ 825 cmd->committing_dmap = NULL; 826 } 827 out: 828 up_write(&cmd->lock); 829 830 return r; 831 } 832 833 int dm_clone_set_region_hydrated(struct dm_clone_metadata *cmd, unsigned long region_nr) 834 { 835 int r = 0; 836 struct dirty_map *dmap; 837 unsigned long word, flags; 838 839 if (unlikely(region_nr >= cmd->nr_regions)) { 840 DMERR("Region %lu out of range (total number of regions %lu)", 841 region_nr, cmd->nr_regions); 842 return -ERANGE; 843 } 844 845 word = region_nr / BITS_PER_LONG; 846 847 spin_lock_irqsave(&cmd->bitmap_lock, flags); 848 849 if (cmd->read_only) { 850 r = -EPERM; 851 goto out; 852 } 853 854 dmap = cmd->current_dmap; 855 856 __set_bit(word, dmap->dirty_words); 857 __set_bit(region_nr, dmap->dirty_regions); 858 __set_bit(region_nr, cmd->region_map); 859 dmap->changed = 1; 860 861 out: 862 spin_unlock_irqrestore(&cmd->bitmap_lock, flags); 863 864 return r; 865 } 866 867 int dm_clone_cond_set_range(struct dm_clone_metadata *cmd, unsigned long start, 868 unsigned long nr_regions) 869 { 870 int r = 0; 871 struct dirty_map *dmap; 872 unsigned long word, region_nr; 873 874 if (unlikely(start >= cmd->nr_regions || (start + nr_regions) < start || 875 (start + nr_regions) > cmd->nr_regions)) { 876 DMERR("Invalid region range: start %lu, nr_regions %lu (total number of regions %lu)", 877 start, nr_regions, cmd->nr_regions); 878 return -ERANGE; 879 } 880 881 spin_lock_irq(&cmd->bitmap_lock); 882 883 if (cmd->read_only) { 884 r = -EPERM; 885 goto out; 886 } 887 888 dmap = cmd->current_dmap; 889 for (region_nr = start; region_nr < (start + nr_regions); region_nr++) { 890 if (!test_bit(region_nr, cmd->region_map)) { 891 word = region_nr / BITS_PER_LONG; 892 __set_bit(word, dmap->dirty_words); 893 __set_bit(region_nr, dmap->dirty_regions); 894 __set_bit(region_nr, cmd->region_map); 895 dmap->changed = 1; 896 } 897 } 898 out: 899 spin_unlock_irq(&cmd->bitmap_lock); 900 901 return r; 902 } 903 904 /* 905 * WARNING: This must not be called concurrently with either 906 * dm_clone_set_region_hydrated() or dm_clone_cond_set_range(), as it changes 907 * cmd->region_map without taking the cmd->bitmap_lock spinlock. The only 908 * exception is after setting the metadata to read-only mode, using 909 * dm_clone_metadata_set_read_only(). 910 * 911 * We don't take the spinlock because __load_bitset_in_core() does I/O, so it 912 * may block. 913 */ 914 int dm_clone_reload_in_core_bitset(struct dm_clone_metadata *cmd) 915 { 916 int r = -EINVAL; 917 918 down_write(&cmd->lock); 919 920 if (cmd->fail_io) 921 goto out; 922 923 r = __load_bitset_in_core(cmd); 924 out: 925 up_write(&cmd->lock); 926 927 return r; 928 } 929 930 bool dm_clone_changed_this_transaction(struct dm_clone_metadata *cmd) 931 { 932 bool r; 933 unsigned long flags; 934 935 spin_lock_irqsave(&cmd->bitmap_lock, flags); 936 r = cmd->dmap[0].changed || cmd->dmap[1].changed; 937 spin_unlock_irqrestore(&cmd->bitmap_lock, flags); 938 939 return r; 940 } 941 942 int dm_clone_metadata_abort(struct dm_clone_metadata *cmd) 943 { 944 int r = -EPERM; 945 946 down_write(&cmd->lock); 947 948 if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) 949 goto out; 950 951 __destroy_persistent_data_structures(cmd); 952 953 r = __create_persistent_data_structures(cmd, false); 954 if (r) { 955 /* If something went wrong we can neither write nor read the metadata */ 956 cmd->fail_io = true; 957 } 958 out: 959 up_write(&cmd->lock); 960 961 return r; 962 } 963 964 void dm_clone_metadata_set_read_only(struct dm_clone_metadata *cmd) 965 { 966 down_write(&cmd->lock); 967 968 spin_lock_irq(&cmd->bitmap_lock); 969 cmd->read_only = 1; 970 spin_unlock_irq(&cmd->bitmap_lock); 971 972 if (!cmd->fail_io) 973 dm_bm_set_read_only(cmd->bm); 974 975 up_write(&cmd->lock); 976 } 977 978 void dm_clone_metadata_set_read_write(struct dm_clone_metadata *cmd) 979 { 980 down_write(&cmd->lock); 981 982 spin_lock_irq(&cmd->bitmap_lock); 983 cmd->read_only = 0; 984 spin_unlock_irq(&cmd->bitmap_lock); 985 986 if (!cmd->fail_io) 987 dm_bm_set_read_write(cmd->bm); 988 989 up_write(&cmd->lock); 990 } 991 992 int dm_clone_get_free_metadata_block_count(struct dm_clone_metadata *cmd, 993 dm_block_t *result) 994 { 995 int r = -EINVAL; 996 997 down_read(&cmd->lock); 998 999 if (!cmd->fail_io) 1000 r = dm_sm_get_nr_free(cmd->sm, result); 1001 1002 up_read(&cmd->lock); 1003 1004 return r; 1005 } 1006 1007 int dm_clone_get_metadata_dev_size(struct dm_clone_metadata *cmd, 1008 dm_block_t *result) 1009 { 1010 int r = -EINVAL; 1011 1012 down_read(&cmd->lock); 1013 1014 if (!cmd->fail_io) 1015 r = dm_sm_get_nr_blocks(cmd->sm, result); 1016 1017 up_read(&cmd->lock); 1018 1019 return r; 1020 } 1021