1 // SPDX-License-Identifier: GPL-2.0-only 2 #include "dm.h" 3 #include "persistent-data/dm-transaction-manager.h" 4 #include "persistent-data/dm-bitset.h" 5 #include "persistent-data/dm-space-map.h" 6 7 #include <linux/dm-io.h> 8 #include <linux/dm-kcopyd.h> 9 #include <linux/init.h> 10 #include <linux/mempool.h> 11 #include <linux/module.h> 12 #include <linux/slab.h> 13 #include <linux/vmalloc.h> 14 15 #define DM_MSG_PREFIX "era" 16 17 #define SUPERBLOCK_LOCATION 0 18 #define SUPERBLOCK_MAGIC 2126579579 19 #define SUPERBLOCK_CSUM_XOR 146538381 20 #define MIN_ERA_VERSION 1 21 #define MAX_ERA_VERSION 1 22 #define INVALID_WRITESET_ROOT SUPERBLOCK_LOCATION 23 #define MIN_BLOCK_SIZE 8 24 25 /*---------------------------------------------------------------- 26 * Writeset 27 *--------------------------------------------------------------*/ 28 struct writeset_metadata { 29 uint32_t nr_bits; 30 dm_block_t root; 31 }; 32 33 struct writeset { 34 struct writeset_metadata md; 35 36 /* 37 * An in core copy of the bits to save constantly doing look ups on 38 * disk. 39 */ 40 unsigned long *bits; 41 }; 42 43 /* 44 * This does not free off the on disk bitset as this will normally be done 45 * after digesting into the era array. 46 */ 47 static void writeset_free(struct writeset *ws) 48 { 49 vfree(ws->bits); 50 } 51 52 static int setup_on_disk_bitset(struct dm_disk_bitset *info, 53 unsigned nr_bits, dm_block_t *root) 54 { 55 int r; 56 57 r = dm_bitset_empty(info, root); 58 if (r) 59 return r; 60 61 return dm_bitset_resize(info, *root, 0, nr_bits, false, root); 62 } 63 64 static size_t bitset_size(unsigned nr_bits) 65 { 66 return sizeof(unsigned long) * dm_div_up(nr_bits, BITS_PER_LONG); 67 } 68 69 /* 70 * Allocates memory for the in core bitset. 71 */ 72 static int writeset_alloc(struct writeset *ws, dm_block_t nr_blocks) 73 { 74 ws->md.nr_bits = nr_blocks; 75 ws->md.root = INVALID_WRITESET_ROOT; 76 ws->bits = vzalloc(bitset_size(nr_blocks)); 77 if (!ws->bits) { 78 DMERR("%s: couldn't allocate in memory bitset", __func__); 79 return -ENOMEM; 80 } 81 82 return 0; 83 } 84 85 /* 86 * Wipes the in-core bitset, and creates a new on disk bitset. 87 */ 88 static int writeset_init(struct dm_disk_bitset *info, struct writeset *ws) 89 { 90 int r; 91 92 memset(ws->bits, 0, bitset_size(ws->md.nr_bits)); 93 94 r = setup_on_disk_bitset(info, ws->md.nr_bits, &ws->md.root); 95 if (r) { 96 DMERR("%s: setup_on_disk_bitset failed", __func__); 97 return r; 98 } 99 100 return 0; 101 } 102 103 static bool writeset_marked(struct writeset *ws, dm_block_t block) 104 { 105 return test_bit(block, ws->bits); 106 } 107 108 static int writeset_marked_on_disk(struct dm_disk_bitset *info, 109 struct writeset_metadata *m, dm_block_t block, 110 bool *result) 111 { 112 dm_block_t old = m->root; 113 114 /* 115 * The bitset was flushed when it was archived, so we know there'll 116 * be no change to the root. 117 */ 118 int r = dm_bitset_test_bit(info, m->root, block, &m->root, result); 119 if (r) { 120 DMERR("%s: dm_bitset_test_bit failed", __func__); 121 return r; 122 } 123 124 BUG_ON(m->root != old); 125 126 return r; 127 } 128 129 /* 130 * Returns < 0 on error, 0 if the bit wasn't previously set, 1 if it was. 131 */ 132 static int writeset_test_and_set(struct dm_disk_bitset *info, 133 struct writeset *ws, uint32_t block) 134 { 135 int r; 136 137 if (!test_and_set_bit(block, ws->bits)) { 138 r = dm_bitset_set_bit(info, ws->md.root, block, &ws->md.root); 139 if (r) { 140 /* FIXME: fail mode */ 141 return r; 142 } 143 144 return 0; 145 } 146 147 return 1; 148 } 149 150 /*---------------------------------------------------------------- 151 * On disk metadata layout 152 *--------------------------------------------------------------*/ 153 #define SPACE_MAP_ROOT_SIZE 128 154 #define UUID_LEN 16 155 156 struct writeset_disk { 157 __le32 nr_bits; 158 __le64 root; 159 } __packed; 160 161 struct superblock_disk { 162 __le32 csum; 163 __le32 flags; 164 __le64 blocknr; 165 166 __u8 uuid[UUID_LEN]; 167 __le64 magic; 168 __le32 version; 169 170 __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE]; 171 172 __le32 data_block_size; 173 __le32 metadata_block_size; 174 __le32 nr_blocks; 175 176 __le32 current_era; 177 struct writeset_disk current_writeset; 178 179 /* 180 * Only these two fields are valid within the metadata snapshot. 181 */ 182 __le64 writeset_tree_root; 183 __le64 era_array_root; 184 185 __le64 metadata_snap; 186 } __packed; 187 188 /*---------------------------------------------------------------- 189 * Superblock validation 190 *--------------------------------------------------------------*/ 191 static void sb_prepare_for_write(struct dm_block_validator *v, 192 struct dm_block *b, 193 size_t sb_block_size) 194 { 195 struct superblock_disk *disk = dm_block_data(b); 196 197 disk->blocknr = cpu_to_le64(dm_block_location(b)); 198 disk->csum = cpu_to_le32(dm_bm_checksum(&disk->flags, 199 sb_block_size - sizeof(__le32), 200 SUPERBLOCK_CSUM_XOR)); 201 } 202 203 static int check_metadata_version(struct superblock_disk *disk) 204 { 205 uint32_t metadata_version = le32_to_cpu(disk->version); 206 if (metadata_version < MIN_ERA_VERSION || metadata_version > MAX_ERA_VERSION) { 207 DMERR("Era metadata version %u found, but only versions between %u and %u supported.", 208 metadata_version, MIN_ERA_VERSION, MAX_ERA_VERSION); 209 return -EINVAL; 210 } 211 212 return 0; 213 } 214 215 static int sb_check(struct dm_block_validator *v, 216 struct dm_block *b, 217 size_t sb_block_size) 218 { 219 struct superblock_disk *disk = dm_block_data(b); 220 __le32 csum_le; 221 222 if (dm_block_location(b) != le64_to_cpu(disk->blocknr)) { 223 DMERR("sb_check failed: blocknr %llu: wanted %llu", 224 le64_to_cpu(disk->blocknr), 225 (unsigned long long)dm_block_location(b)); 226 return -ENOTBLK; 227 } 228 229 if (le64_to_cpu(disk->magic) != SUPERBLOCK_MAGIC) { 230 DMERR("sb_check failed: magic %llu: wanted %llu", 231 le64_to_cpu(disk->magic), 232 (unsigned long long) SUPERBLOCK_MAGIC); 233 return -EILSEQ; 234 } 235 236 csum_le = cpu_to_le32(dm_bm_checksum(&disk->flags, 237 sb_block_size - sizeof(__le32), 238 SUPERBLOCK_CSUM_XOR)); 239 if (csum_le != disk->csum) { 240 DMERR("sb_check failed: csum %u: wanted %u", 241 le32_to_cpu(csum_le), le32_to_cpu(disk->csum)); 242 return -EILSEQ; 243 } 244 245 return check_metadata_version(disk); 246 } 247 248 static struct dm_block_validator sb_validator = { 249 .name = "superblock", 250 .prepare_for_write = sb_prepare_for_write, 251 .check = sb_check 252 }; 253 254 /*---------------------------------------------------------------- 255 * Low level metadata handling 256 *--------------------------------------------------------------*/ 257 #define DM_ERA_METADATA_BLOCK_SIZE 4096 258 #define ERA_MAX_CONCURRENT_LOCKS 5 259 260 struct era_metadata { 261 struct block_device *bdev; 262 struct dm_block_manager *bm; 263 struct dm_space_map *sm; 264 struct dm_transaction_manager *tm; 265 266 dm_block_t block_size; 267 uint32_t nr_blocks; 268 269 uint32_t current_era; 270 271 /* 272 * We preallocate 2 writesets. When an era rolls over we 273 * switch between them. This means the allocation is done at 274 * preresume time, rather than on the io path. 275 */ 276 struct writeset writesets[2]; 277 struct writeset *current_writeset; 278 279 dm_block_t writeset_tree_root; 280 dm_block_t era_array_root; 281 282 struct dm_disk_bitset bitset_info; 283 struct dm_btree_info writeset_tree_info; 284 struct dm_array_info era_array_info; 285 286 dm_block_t metadata_snap; 287 288 /* 289 * A flag that is set whenever a writeset has been archived. 290 */ 291 bool archived_writesets; 292 293 /* 294 * Reading the space map root can fail, so we read it into this 295 * buffer before the superblock is locked and updated. 296 */ 297 __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE]; 298 }; 299 300 static int superblock_read_lock(struct era_metadata *md, 301 struct dm_block **sblock) 302 { 303 return dm_bm_read_lock(md->bm, SUPERBLOCK_LOCATION, 304 &sb_validator, sblock); 305 } 306 307 static int superblock_lock_zero(struct era_metadata *md, 308 struct dm_block **sblock) 309 { 310 return dm_bm_write_lock_zero(md->bm, SUPERBLOCK_LOCATION, 311 &sb_validator, sblock); 312 } 313 314 static int superblock_lock(struct era_metadata *md, 315 struct dm_block **sblock) 316 { 317 return dm_bm_write_lock(md->bm, SUPERBLOCK_LOCATION, 318 &sb_validator, sblock); 319 } 320 321 /* FIXME: duplication with cache and thin */ 322 static int superblock_all_zeroes(struct dm_block_manager *bm, bool *result) 323 { 324 int r; 325 unsigned i; 326 struct dm_block *b; 327 __le64 *data_le, zero = cpu_to_le64(0); 328 unsigned sb_block_size = dm_bm_block_size(bm) / sizeof(__le64); 329 330 /* 331 * We can't use a validator here - it may be all zeroes. 332 */ 333 r = dm_bm_read_lock(bm, SUPERBLOCK_LOCATION, NULL, &b); 334 if (r) 335 return r; 336 337 data_le = dm_block_data(b); 338 *result = true; 339 for (i = 0; i < sb_block_size; i++) { 340 if (data_le[i] != zero) { 341 *result = false; 342 break; 343 } 344 } 345 346 dm_bm_unlock(b); 347 348 return 0; 349 } 350 351 /*----------------------------------------------------------------*/ 352 353 static void ws_pack(const struct writeset_metadata *core, struct writeset_disk *disk) 354 { 355 disk->nr_bits = cpu_to_le32(core->nr_bits); 356 disk->root = cpu_to_le64(core->root); 357 } 358 359 static void ws_unpack(const struct writeset_disk *disk, struct writeset_metadata *core) 360 { 361 core->nr_bits = le32_to_cpu(disk->nr_bits); 362 core->root = le64_to_cpu(disk->root); 363 } 364 365 static void ws_inc(void *context, const void *value) 366 { 367 struct era_metadata *md = context; 368 struct writeset_disk ws_d; 369 dm_block_t b; 370 371 memcpy(&ws_d, value, sizeof(ws_d)); 372 b = le64_to_cpu(ws_d.root); 373 374 dm_tm_inc(md->tm, b); 375 } 376 377 static void ws_dec(void *context, const void *value) 378 { 379 struct era_metadata *md = context; 380 struct writeset_disk ws_d; 381 dm_block_t b; 382 383 memcpy(&ws_d, value, sizeof(ws_d)); 384 b = le64_to_cpu(ws_d.root); 385 386 dm_bitset_del(&md->bitset_info, b); 387 } 388 389 static int ws_eq(void *context, const void *value1, const void *value2) 390 { 391 return !memcmp(value1, value2, sizeof(struct writeset_metadata)); 392 } 393 394 /*----------------------------------------------------------------*/ 395 396 static void setup_writeset_tree_info(struct era_metadata *md) 397 { 398 struct dm_btree_value_type *vt = &md->writeset_tree_info.value_type; 399 md->writeset_tree_info.tm = md->tm; 400 md->writeset_tree_info.levels = 1; 401 vt->context = md; 402 vt->size = sizeof(struct writeset_disk); 403 vt->inc = ws_inc; 404 vt->dec = ws_dec; 405 vt->equal = ws_eq; 406 } 407 408 static void setup_era_array_info(struct era_metadata *md) 409 410 { 411 struct dm_btree_value_type vt; 412 vt.context = NULL; 413 vt.size = sizeof(__le32); 414 vt.inc = NULL; 415 vt.dec = NULL; 416 vt.equal = NULL; 417 418 dm_array_info_init(&md->era_array_info, md->tm, &vt); 419 } 420 421 static void setup_infos(struct era_metadata *md) 422 { 423 dm_disk_bitset_init(md->tm, &md->bitset_info); 424 setup_writeset_tree_info(md); 425 setup_era_array_info(md); 426 } 427 428 /*----------------------------------------------------------------*/ 429 430 static int create_fresh_metadata(struct era_metadata *md) 431 { 432 int r; 433 434 r = dm_tm_create_with_sm(md->bm, SUPERBLOCK_LOCATION, 435 &md->tm, &md->sm); 436 if (r < 0) { 437 DMERR("dm_tm_create_with_sm failed"); 438 return r; 439 } 440 441 setup_infos(md); 442 443 r = dm_btree_empty(&md->writeset_tree_info, &md->writeset_tree_root); 444 if (r) { 445 DMERR("couldn't create new writeset tree"); 446 goto bad; 447 } 448 449 r = dm_array_empty(&md->era_array_info, &md->era_array_root); 450 if (r) { 451 DMERR("couldn't create era array"); 452 goto bad; 453 } 454 455 return 0; 456 457 bad: 458 dm_sm_destroy(md->sm); 459 dm_tm_destroy(md->tm); 460 461 return r; 462 } 463 464 static int save_sm_root(struct era_metadata *md) 465 { 466 int r; 467 size_t metadata_len; 468 469 r = dm_sm_root_size(md->sm, &metadata_len); 470 if (r < 0) 471 return r; 472 473 return dm_sm_copy_root(md->sm, &md->metadata_space_map_root, 474 metadata_len); 475 } 476 477 static void copy_sm_root(struct era_metadata *md, struct superblock_disk *disk) 478 { 479 memcpy(&disk->metadata_space_map_root, 480 &md->metadata_space_map_root, 481 sizeof(md->metadata_space_map_root)); 482 } 483 484 /* 485 * Writes a superblock, including the static fields that don't get updated 486 * with every commit (possible optimisation here). 'md' should be fully 487 * constructed when this is called. 488 */ 489 static void prepare_superblock(struct era_metadata *md, struct superblock_disk *disk) 490 { 491 disk->magic = cpu_to_le64(SUPERBLOCK_MAGIC); 492 disk->flags = cpu_to_le32(0ul); 493 494 /* FIXME: can't keep blanking the uuid (uuid is currently unused though) */ 495 memset(disk->uuid, 0, sizeof(disk->uuid)); 496 disk->version = cpu_to_le32(MAX_ERA_VERSION); 497 498 copy_sm_root(md, disk); 499 500 disk->data_block_size = cpu_to_le32(md->block_size); 501 disk->metadata_block_size = cpu_to_le32(DM_ERA_METADATA_BLOCK_SIZE >> SECTOR_SHIFT); 502 disk->nr_blocks = cpu_to_le32(md->nr_blocks); 503 disk->current_era = cpu_to_le32(md->current_era); 504 505 ws_pack(&md->current_writeset->md, &disk->current_writeset); 506 disk->writeset_tree_root = cpu_to_le64(md->writeset_tree_root); 507 disk->era_array_root = cpu_to_le64(md->era_array_root); 508 disk->metadata_snap = cpu_to_le64(md->metadata_snap); 509 } 510 511 static int write_superblock(struct era_metadata *md) 512 { 513 int r; 514 struct dm_block *sblock; 515 struct superblock_disk *disk; 516 517 r = save_sm_root(md); 518 if (r) { 519 DMERR("%s: save_sm_root failed", __func__); 520 return r; 521 } 522 523 r = superblock_lock_zero(md, &sblock); 524 if (r) 525 return r; 526 527 disk = dm_block_data(sblock); 528 prepare_superblock(md, disk); 529 530 return dm_tm_commit(md->tm, sblock); 531 } 532 533 /* 534 * Assumes block_size and the infos are set. 535 */ 536 static int format_metadata(struct era_metadata *md) 537 { 538 int r; 539 540 r = create_fresh_metadata(md); 541 if (r) 542 return r; 543 544 r = write_superblock(md); 545 if (r) { 546 dm_sm_destroy(md->sm); 547 dm_tm_destroy(md->tm); 548 return r; 549 } 550 551 return 0; 552 } 553 554 static int open_metadata(struct era_metadata *md) 555 { 556 int r; 557 struct dm_block *sblock; 558 struct superblock_disk *disk; 559 560 r = superblock_read_lock(md, &sblock); 561 if (r) { 562 DMERR("couldn't read_lock superblock"); 563 return r; 564 } 565 566 disk = dm_block_data(sblock); 567 r = dm_tm_open_with_sm(md->bm, SUPERBLOCK_LOCATION, 568 disk->metadata_space_map_root, 569 sizeof(disk->metadata_space_map_root), 570 &md->tm, &md->sm); 571 if (r) { 572 DMERR("dm_tm_open_with_sm failed"); 573 goto bad; 574 } 575 576 setup_infos(md); 577 578 md->block_size = le32_to_cpu(disk->data_block_size); 579 md->nr_blocks = le32_to_cpu(disk->nr_blocks); 580 md->current_era = le32_to_cpu(disk->current_era); 581 582 md->writeset_tree_root = le64_to_cpu(disk->writeset_tree_root); 583 md->era_array_root = le64_to_cpu(disk->era_array_root); 584 md->metadata_snap = le64_to_cpu(disk->metadata_snap); 585 md->archived_writesets = true; 586 587 dm_bm_unlock(sblock); 588 589 return 0; 590 591 bad: 592 dm_bm_unlock(sblock); 593 return r; 594 } 595 596 static int open_or_format_metadata(struct era_metadata *md, 597 bool may_format) 598 { 599 int r; 600 bool unformatted = false; 601 602 r = superblock_all_zeroes(md->bm, &unformatted); 603 if (r) 604 return r; 605 606 if (unformatted) 607 return may_format ? format_metadata(md) : -EPERM; 608 609 return open_metadata(md); 610 } 611 612 static int create_persistent_data_objects(struct era_metadata *md, 613 bool may_format) 614 { 615 int r; 616 617 md->bm = dm_block_manager_create(md->bdev, DM_ERA_METADATA_BLOCK_SIZE, 618 ERA_MAX_CONCURRENT_LOCKS); 619 if (IS_ERR(md->bm)) { 620 DMERR("could not create block manager"); 621 return PTR_ERR(md->bm); 622 } 623 624 r = open_or_format_metadata(md, may_format); 625 if (r) 626 dm_block_manager_destroy(md->bm); 627 628 return r; 629 } 630 631 static void destroy_persistent_data_objects(struct era_metadata *md) 632 { 633 dm_sm_destroy(md->sm); 634 dm_tm_destroy(md->tm); 635 dm_block_manager_destroy(md->bm); 636 } 637 638 /* 639 * This waits until all era_map threads have picked up the new filter. 640 */ 641 static void swap_writeset(struct era_metadata *md, struct writeset *new_writeset) 642 { 643 rcu_assign_pointer(md->current_writeset, new_writeset); 644 synchronize_rcu(); 645 } 646 647 /*---------------------------------------------------------------- 648 * Writesets get 'digested' into the main era array. 649 * 650 * We're using a coroutine here so the worker thread can do the digestion, 651 * thus avoiding synchronisation of the metadata. Digesting a whole 652 * writeset in one go would cause too much latency. 653 *--------------------------------------------------------------*/ 654 struct digest { 655 uint32_t era; 656 unsigned nr_bits, current_bit; 657 struct writeset_metadata writeset; 658 __le32 value; 659 struct dm_disk_bitset info; 660 661 int (*step)(struct era_metadata *, struct digest *); 662 }; 663 664 static int metadata_digest_lookup_writeset(struct era_metadata *md, 665 struct digest *d); 666 667 static int metadata_digest_remove_writeset(struct era_metadata *md, 668 struct digest *d) 669 { 670 int r; 671 uint64_t key = d->era; 672 673 r = dm_btree_remove(&md->writeset_tree_info, md->writeset_tree_root, 674 &key, &md->writeset_tree_root); 675 if (r) { 676 DMERR("%s: dm_btree_remove failed", __func__); 677 return r; 678 } 679 680 d->step = metadata_digest_lookup_writeset; 681 return 0; 682 } 683 684 #define INSERTS_PER_STEP 100 685 686 static int metadata_digest_transcribe_writeset(struct era_metadata *md, 687 struct digest *d) 688 { 689 int r; 690 bool marked; 691 unsigned b, e = min(d->current_bit + INSERTS_PER_STEP, d->nr_bits); 692 693 for (b = d->current_bit; b < e; b++) { 694 r = writeset_marked_on_disk(&d->info, &d->writeset, b, &marked); 695 if (r) { 696 DMERR("%s: writeset_marked_on_disk failed", __func__); 697 return r; 698 } 699 700 if (!marked) 701 continue; 702 703 __dm_bless_for_disk(&d->value); 704 r = dm_array_set_value(&md->era_array_info, md->era_array_root, 705 b, &d->value, &md->era_array_root); 706 if (r) { 707 DMERR("%s: dm_array_set_value failed", __func__); 708 return r; 709 } 710 } 711 712 if (b == d->nr_bits) 713 d->step = metadata_digest_remove_writeset; 714 else 715 d->current_bit = b; 716 717 return 0; 718 } 719 720 static int metadata_digest_lookup_writeset(struct era_metadata *md, 721 struct digest *d) 722 { 723 int r; 724 uint64_t key; 725 struct writeset_disk disk; 726 727 r = dm_btree_find_lowest_key(&md->writeset_tree_info, 728 md->writeset_tree_root, &key); 729 if (r < 0) 730 return r; 731 732 d->era = key; 733 734 r = dm_btree_lookup(&md->writeset_tree_info, 735 md->writeset_tree_root, &key, &disk); 736 if (r) { 737 if (r == -ENODATA) { 738 d->step = NULL; 739 return 0; 740 } 741 742 DMERR("%s: dm_btree_lookup failed", __func__); 743 return r; 744 } 745 746 ws_unpack(&disk, &d->writeset); 747 d->value = cpu_to_le32(key); 748 749 d->nr_bits = min(d->writeset.nr_bits, md->nr_blocks); 750 d->current_bit = 0; 751 d->step = metadata_digest_transcribe_writeset; 752 753 return 0; 754 } 755 756 static int metadata_digest_start(struct era_metadata *md, struct digest *d) 757 { 758 if (d->step) 759 return 0; 760 761 memset(d, 0, sizeof(*d)); 762 763 /* 764 * We initialise another bitset info to avoid any caching side 765 * effects with the previous one. 766 */ 767 dm_disk_bitset_init(md->tm, &d->info); 768 d->step = metadata_digest_lookup_writeset; 769 770 return 0; 771 } 772 773 /*---------------------------------------------------------------- 774 * High level metadata interface. Target methods should use these, and not 775 * the lower level ones. 776 *--------------------------------------------------------------*/ 777 static struct era_metadata *metadata_open(struct block_device *bdev, 778 sector_t block_size, 779 bool may_format) 780 { 781 int r; 782 struct era_metadata *md = kzalloc(sizeof(*md), GFP_KERNEL); 783 784 if (!md) 785 return NULL; 786 787 md->bdev = bdev; 788 md->block_size = block_size; 789 790 md->writesets[0].md.root = INVALID_WRITESET_ROOT; 791 md->writesets[1].md.root = INVALID_WRITESET_ROOT; 792 md->current_writeset = &md->writesets[0]; 793 794 r = create_persistent_data_objects(md, may_format); 795 if (r) { 796 kfree(md); 797 return ERR_PTR(r); 798 } 799 800 return md; 801 } 802 803 static void metadata_close(struct era_metadata *md) 804 { 805 destroy_persistent_data_objects(md); 806 kfree(md); 807 } 808 809 static bool valid_nr_blocks(dm_block_t n) 810 { 811 /* 812 * dm_bitset restricts us to 2^32. test_bit & co. restrict us 813 * further to 2^31 - 1 814 */ 815 return n < (1ull << 31); 816 } 817 818 static int metadata_resize(struct era_metadata *md, void *arg) 819 { 820 int r; 821 dm_block_t *new_size = arg; 822 __le32 value; 823 824 if (!valid_nr_blocks(*new_size)) { 825 DMERR("Invalid number of origin blocks %llu", 826 (unsigned long long) *new_size); 827 return -EINVAL; 828 } 829 830 writeset_free(&md->writesets[0]); 831 writeset_free(&md->writesets[1]); 832 833 r = writeset_alloc(&md->writesets[0], *new_size); 834 if (r) { 835 DMERR("%s: writeset_alloc failed for writeset 0", __func__); 836 return r; 837 } 838 839 r = writeset_alloc(&md->writesets[1], *new_size); 840 if (r) { 841 DMERR("%s: writeset_alloc failed for writeset 1", __func__); 842 return r; 843 } 844 845 value = cpu_to_le32(0u); 846 __dm_bless_for_disk(&value); 847 r = dm_array_resize(&md->era_array_info, md->era_array_root, 848 md->nr_blocks, *new_size, 849 &value, &md->era_array_root); 850 if (r) { 851 DMERR("%s: dm_array_resize failed", __func__); 852 return r; 853 } 854 855 md->nr_blocks = *new_size; 856 return 0; 857 } 858 859 static int metadata_era_archive(struct era_metadata *md) 860 { 861 int r; 862 uint64_t keys[1]; 863 struct writeset_disk value; 864 865 r = dm_bitset_flush(&md->bitset_info, md->current_writeset->md.root, 866 &md->current_writeset->md.root); 867 if (r) { 868 DMERR("%s: dm_bitset_flush failed", __func__); 869 return r; 870 } 871 872 ws_pack(&md->current_writeset->md, &value); 873 md->current_writeset->md.root = INVALID_WRITESET_ROOT; 874 875 keys[0] = md->current_era; 876 __dm_bless_for_disk(&value); 877 r = dm_btree_insert(&md->writeset_tree_info, md->writeset_tree_root, 878 keys, &value, &md->writeset_tree_root); 879 if (r) { 880 DMERR("%s: couldn't insert writeset into btree", __func__); 881 /* FIXME: fail mode */ 882 return r; 883 } 884 885 md->archived_writesets = true; 886 887 return 0; 888 } 889 890 static struct writeset *next_writeset(struct era_metadata *md) 891 { 892 return (md->current_writeset == &md->writesets[0]) ? 893 &md->writesets[1] : &md->writesets[0]; 894 } 895 896 static int metadata_new_era(struct era_metadata *md) 897 { 898 int r; 899 struct writeset *new_writeset = next_writeset(md); 900 901 r = writeset_init(&md->bitset_info, new_writeset); 902 if (r) { 903 DMERR("%s: writeset_init failed", __func__); 904 return r; 905 } 906 907 swap_writeset(md, new_writeset); 908 md->current_era++; 909 910 return 0; 911 } 912 913 static int metadata_era_rollover(struct era_metadata *md) 914 { 915 int r; 916 917 if (md->current_writeset->md.root != INVALID_WRITESET_ROOT) { 918 r = metadata_era_archive(md); 919 if (r) { 920 DMERR("%s: metadata_archive_era failed", __func__); 921 /* FIXME: fail mode? */ 922 return r; 923 } 924 } 925 926 r = metadata_new_era(md); 927 if (r) { 928 DMERR("%s: new era failed", __func__); 929 /* FIXME: fail mode */ 930 return r; 931 } 932 933 return 0; 934 } 935 936 static bool metadata_current_marked(struct era_metadata *md, dm_block_t block) 937 { 938 bool r; 939 struct writeset *ws; 940 941 rcu_read_lock(); 942 ws = rcu_dereference(md->current_writeset); 943 r = writeset_marked(ws, block); 944 rcu_read_unlock(); 945 946 return r; 947 } 948 949 static int metadata_commit(struct era_metadata *md) 950 { 951 int r; 952 struct dm_block *sblock; 953 954 if (md->current_writeset->md.root != SUPERBLOCK_LOCATION) { 955 r = dm_bitset_flush(&md->bitset_info, md->current_writeset->md.root, 956 &md->current_writeset->md.root); 957 if (r) { 958 DMERR("%s: bitset flush failed", __func__); 959 return r; 960 } 961 } 962 963 r = dm_tm_pre_commit(md->tm); 964 if (r) { 965 DMERR("%s: pre commit failed", __func__); 966 return r; 967 } 968 969 r = save_sm_root(md); 970 if (r) { 971 DMERR("%s: save_sm_root failed", __func__); 972 return r; 973 } 974 975 r = superblock_lock(md, &sblock); 976 if (r) { 977 DMERR("%s: superblock lock failed", __func__); 978 return r; 979 } 980 981 prepare_superblock(md, dm_block_data(sblock)); 982 983 return dm_tm_commit(md->tm, sblock); 984 } 985 986 static int metadata_checkpoint(struct era_metadata *md) 987 { 988 /* 989 * For now we just rollover, but later I want to put a check in to 990 * avoid this if the filter is still pretty fresh. 991 */ 992 return metadata_era_rollover(md); 993 } 994 995 /* 996 * Metadata snapshots allow userland to access era data. 997 */ 998 static int metadata_take_snap(struct era_metadata *md) 999 { 1000 int r, inc; 1001 struct dm_block *clone; 1002 1003 if (md->metadata_snap != SUPERBLOCK_LOCATION) { 1004 DMERR("%s: metadata snapshot already exists", __func__); 1005 return -EINVAL; 1006 } 1007 1008 r = metadata_era_rollover(md); 1009 if (r) { 1010 DMERR("%s: era rollover failed", __func__); 1011 return r; 1012 } 1013 1014 r = metadata_commit(md); 1015 if (r) { 1016 DMERR("%s: pre commit failed", __func__); 1017 return r; 1018 } 1019 1020 r = dm_sm_inc_block(md->sm, SUPERBLOCK_LOCATION); 1021 if (r) { 1022 DMERR("%s: couldn't increment superblock", __func__); 1023 return r; 1024 } 1025 1026 r = dm_tm_shadow_block(md->tm, SUPERBLOCK_LOCATION, 1027 &sb_validator, &clone, &inc); 1028 if (r) { 1029 DMERR("%s: couldn't shadow superblock", __func__); 1030 dm_sm_dec_block(md->sm, SUPERBLOCK_LOCATION); 1031 return r; 1032 } 1033 BUG_ON(!inc); 1034 1035 r = dm_sm_inc_block(md->sm, md->writeset_tree_root); 1036 if (r) { 1037 DMERR("%s: couldn't inc writeset tree root", __func__); 1038 dm_tm_unlock(md->tm, clone); 1039 return r; 1040 } 1041 1042 r = dm_sm_inc_block(md->sm, md->era_array_root); 1043 if (r) { 1044 DMERR("%s: couldn't inc era tree root", __func__); 1045 dm_sm_dec_block(md->sm, md->writeset_tree_root); 1046 dm_tm_unlock(md->tm, clone); 1047 return r; 1048 } 1049 1050 md->metadata_snap = dm_block_location(clone); 1051 1052 dm_tm_unlock(md->tm, clone); 1053 1054 return 0; 1055 } 1056 1057 static int metadata_drop_snap(struct era_metadata *md) 1058 { 1059 int r; 1060 dm_block_t location; 1061 struct dm_block *clone; 1062 struct superblock_disk *disk; 1063 1064 if (md->metadata_snap == SUPERBLOCK_LOCATION) { 1065 DMERR("%s: no snap to drop", __func__); 1066 return -EINVAL; 1067 } 1068 1069 r = dm_tm_read_lock(md->tm, md->metadata_snap, &sb_validator, &clone); 1070 if (r) { 1071 DMERR("%s: couldn't read lock superblock clone", __func__); 1072 return r; 1073 } 1074 1075 /* 1076 * Whatever happens now we'll commit with no record of the metadata 1077 * snap. 1078 */ 1079 md->metadata_snap = SUPERBLOCK_LOCATION; 1080 1081 disk = dm_block_data(clone); 1082 r = dm_btree_del(&md->writeset_tree_info, 1083 le64_to_cpu(disk->writeset_tree_root)); 1084 if (r) { 1085 DMERR("%s: error deleting writeset tree clone", __func__); 1086 dm_tm_unlock(md->tm, clone); 1087 return r; 1088 } 1089 1090 r = dm_array_del(&md->era_array_info, le64_to_cpu(disk->era_array_root)); 1091 if (r) { 1092 DMERR("%s: error deleting era array clone", __func__); 1093 dm_tm_unlock(md->tm, clone); 1094 return r; 1095 } 1096 1097 location = dm_block_location(clone); 1098 dm_tm_unlock(md->tm, clone); 1099 1100 return dm_sm_dec_block(md->sm, location); 1101 } 1102 1103 struct metadata_stats { 1104 dm_block_t used; 1105 dm_block_t total; 1106 dm_block_t snap; 1107 uint32_t era; 1108 }; 1109 1110 static int metadata_get_stats(struct era_metadata *md, void *ptr) 1111 { 1112 int r; 1113 struct metadata_stats *s = ptr; 1114 dm_block_t nr_free, nr_total; 1115 1116 r = dm_sm_get_nr_free(md->sm, &nr_free); 1117 if (r) { 1118 DMERR("dm_sm_get_nr_free returned %d", r); 1119 return r; 1120 } 1121 1122 r = dm_sm_get_nr_blocks(md->sm, &nr_total); 1123 if (r) { 1124 DMERR("dm_pool_get_metadata_dev_size returned %d", r); 1125 return r; 1126 } 1127 1128 s->used = nr_total - nr_free; 1129 s->total = nr_total; 1130 s->snap = md->metadata_snap; 1131 s->era = md->current_era; 1132 1133 return 0; 1134 } 1135 1136 /*----------------------------------------------------------------*/ 1137 1138 struct era { 1139 struct dm_target *ti; 1140 1141 struct dm_dev *metadata_dev; 1142 struct dm_dev *origin_dev; 1143 1144 dm_block_t nr_blocks; 1145 uint32_t sectors_per_block; 1146 int sectors_per_block_shift; 1147 struct era_metadata *md; 1148 1149 struct workqueue_struct *wq; 1150 struct work_struct worker; 1151 1152 spinlock_t deferred_lock; 1153 struct bio_list deferred_bios; 1154 1155 spinlock_t rpc_lock; 1156 struct list_head rpc_calls; 1157 1158 struct digest digest; 1159 atomic_t suspended; 1160 }; 1161 1162 struct rpc { 1163 struct list_head list; 1164 1165 int (*fn0)(struct era_metadata *); 1166 int (*fn1)(struct era_metadata *, void *); 1167 void *arg; 1168 int result; 1169 1170 struct completion complete; 1171 }; 1172 1173 /*---------------------------------------------------------------- 1174 * Remapping. 1175 *---------------------------------------------------------------*/ 1176 static bool block_size_is_power_of_two(struct era *era) 1177 { 1178 return era->sectors_per_block_shift >= 0; 1179 } 1180 1181 static dm_block_t get_block(struct era *era, struct bio *bio) 1182 { 1183 sector_t block_nr = bio->bi_iter.bi_sector; 1184 1185 if (!block_size_is_power_of_two(era)) 1186 (void) sector_div(block_nr, era->sectors_per_block); 1187 else 1188 block_nr >>= era->sectors_per_block_shift; 1189 1190 return block_nr; 1191 } 1192 1193 static void remap_to_origin(struct era *era, struct bio *bio) 1194 { 1195 bio_set_dev(bio, era->origin_dev->bdev); 1196 } 1197 1198 /*---------------------------------------------------------------- 1199 * Worker thread 1200 *--------------------------------------------------------------*/ 1201 static void wake_worker(struct era *era) 1202 { 1203 if (!atomic_read(&era->suspended)) 1204 queue_work(era->wq, &era->worker); 1205 } 1206 1207 static void process_old_eras(struct era *era) 1208 { 1209 int r; 1210 1211 if (!era->digest.step) 1212 return; 1213 1214 r = era->digest.step(era->md, &era->digest); 1215 if (r < 0) { 1216 DMERR("%s: digest step failed, stopping digestion", __func__); 1217 era->digest.step = NULL; 1218 1219 } else if (era->digest.step) 1220 wake_worker(era); 1221 } 1222 1223 static void process_deferred_bios(struct era *era) 1224 { 1225 int r; 1226 struct bio_list deferred_bios, marked_bios; 1227 struct bio *bio; 1228 bool commit_needed = false; 1229 bool failed = false; 1230 1231 bio_list_init(&deferred_bios); 1232 bio_list_init(&marked_bios); 1233 1234 spin_lock(&era->deferred_lock); 1235 bio_list_merge(&deferred_bios, &era->deferred_bios); 1236 bio_list_init(&era->deferred_bios); 1237 spin_unlock(&era->deferred_lock); 1238 1239 while ((bio = bio_list_pop(&deferred_bios))) { 1240 r = writeset_test_and_set(&era->md->bitset_info, 1241 era->md->current_writeset, 1242 get_block(era, bio)); 1243 if (r < 0) { 1244 /* 1245 * This is bad news, we need to rollback. 1246 * FIXME: finish. 1247 */ 1248 failed = true; 1249 1250 } else if (r == 0) 1251 commit_needed = true; 1252 1253 bio_list_add(&marked_bios, bio); 1254 } 1255 1256 if (commit_needed) { 1257 r = metadata_commit(era->md); 1258 if (r) 1259 failed = true; 1260 } 1261 1262 if (failed) 1263 while ((bio = bio_list_pop(&marked_bios))) 1264 bio_io_error(bio); 1265 else 1266 while ((bio = bio_list_pop(&marked_bios))) 1267 submit_bio_noacct(bio); 1268 } 1269 1270 static void process_rpc_calls(struct era *era) 1271 { 1272 int r; 1273 bool need_commit = false; 1274 struct list_head calls; 1275 struct rpc *rpc, *tmp; 1276 1277 INIT_LIST_HEAD(&calls); 1278 spin_lock(&era->rpc_lock); 1279 list_splice_init(&era->rpc_calls, &calls); 1280 spin_unlock(&era->rpc_lock); 1281 1282 list_for_each_entry_safe(rpc, tmp, &calls, list) { 1283 rpc->result = rpc->fn0 ? rpc->fn0(era->md) : rpc->fn1(era->md, rpc->arg); 1284 need_commit = true; 1285 } 1286 1287 if (need_commit) { 1288 r = metadata_commit(era->md); 1289 if (r) 1290 list_for_each_entry_safe(rpc, tmp, &calls, list) 1291 rpc->result = r; 1292 } 1293 1294 list_for_each_entry_safe(rpc, tmp, &calls, list) 1295 complete(&rpc->complete); 1296 } 1297 1298 static void kick_off_digest(struct era *era) 1299 { 1300 if (era->md->archived_writesets) { 1301 era->md->archived_writesets = false; 1302 metadata_digest_start(era->md, &era->digest); 1303 } 1304 } 1305 1306 static void do_work(struct work_struct *ws) 1307 { 1308 struct era *era = container_of(ws, struct era, worker); 1309 1310 kick_off_digest(era); 1311 process_old_eras(era); 1312 process_deferred_bios(era); 1313 process_rpc_calls(era); 1314 } 1315 1316 static void defer_bio(struct era *era, struct bio *bio) 1317 { 1318 spin_lock(&era->deferred_lock); 1319 bio_list_add(&era->deferred_bios, bio); 1320 spin_unlock(&era->deferred_lock); 1321 1322 wake_worker(era); 1323 } 1324 1325 /* 1326 * Make an rpc call to the worker to change the metadata. 1327 */ 1328 static int perform_rpc(struct era *era, struct rpc *rpc) 1329 { 1330 rpc->result = 0; 1331 init_completion(&rpc->complete); 1332 1333 spin_lock(&era->rpc_lock); 1334 list_add(&rpc->list, &era->rpc_calls); 1335 spin_unlock(&era->rpc_lock); 1336 1337 wake_worker(era); 1338 wait_for_completion(&rpc->complete); 1339 1340 return rpc->result; 1341 } 1342 1343 static int in_worker0(struct era *era, int (*fn)(struct era_metadata *)) 1344 { 1345 struct rpc rpc; 1346 rpc.fn0 = fn; 1347 rpc.fn1 = NULL; 1348 1349 return perform_rpc(era, &rpc); 1350 } 1351 1352 static int in_worker1(struct era *era, 1353 int (*fn)(struct era_metadata *, void *), void *arg) 1354 { 1355 struct rpc rpc; 1356 rpc.fn0 = NULL; 1357 rpc.fn1 = fn; 1358 rpc.arg = arg; 1359 1360 return perform_rpc(era, &rpc); 1361 } 1362 1363 static void start_worker(struct era *era) 1364 { 1365 atomic_set(&era->suspended, 0); 1366 } 1367 1368 static void stop_worker(struct era *era) 1369 { 1370 atomic_set(&era->suspended, 1); 1371 flush_workqueue(era->wq); 1372 } 1373 1374 /*---------------------------------------------------------------- 1375 * Target methods 1376 *--------------------------------------------------------------*/ 1377 static void era_destroy(struct era *era) 1378 { 1379 if (era->md) 1380 metadata_close(era->md); 1381 1382 if (era->wq) 1383 destroy_workqueue(era->wq); 1384 1385 if (era->origin_dev) 1386 dm_put_device(era->ti, era->origin_dev); 1387 1388 if (era->metadata_dev) 1389 dm_put_device(era->ti, era->metadata_dev); 1390 1391 kfree(era); 1392 } 1393 1394 static dm_block_t calc_nr_blocks(struct era *era) 1395 { 1396 return dm_sector_div_up(era->ti->len, era->sectors_per_block); 1397 } 1398 1399 static bool valid_block_size(dm_block_t block_size) 1400 { 1401 bool greater_than_zero = block_size > 0; 1402 bool multiple_of_min_block_size = (block_size & (MIN_BLOCK_SIZE - 1)) == 0; 1403 1404 return greater_than_zero && multiple_of_min_block_size; 1405 } 1406 1407 /* 1408 * <metadata dev> <data dev> <data block size (sectors)> 1409 */ 1410 static int era_ctr(struct dm_target *ti, unsigned argc, char **argv) 1411 { 1412 int r; 1413 char dummy; 1414 struct era *era; 1415 struct era_metadata *md; 1416 1417 if (argc != 3) { 1418 ti->error = "Invalid argument count"; 1419 return -EINVAL; 1420 } 1421 1422 era = kzalloc(sizeof(*era), GFP_KERNEL); 1423 if (!era) { 1424 ti->error = "Error allocating era structure"; 1425 return -ENOMEM; 1426 } 1427 1428 era->ti = ti; 1429 1430 r = dm_get_device(ti, argv[0], FMODE_READ | FMODE_WRITE, &era->metadata_dev); 1431 if (r) { 1432 ti->error = "Error opening metadata device"; 1433 era_destroy(era); 1434 return -EINVAL; 1435 } 1436 1437 r = dm_get_device(ti, argv[1], FMODE_READ | FMODE_WRITE, &era->origin_dev); 1438 if (r) { 1439 ti->error = "Error opening data device"; 1440 era_destroy(era); 1441 return -EINVAL; 1442 } 1443 1444 r = sscanf(argv[2], "%u%c", &era->sectors_per_block, &dummy); 1445 if (r != 1) { 1446 ti->error = "Error parsing block size"; 1447 era_destroy(era); 1448 return -EINVAL; 1449 } 1450 1451 r = dm_set_target_max_io_len(ti, era->sectors_per_block); 1452 if (r) { 1453 ti->error = "could not set max io len"; 1454 era_destroy(era); 1455 return -EINVAL; 1456 } 1457 1458 if (!valid_block_size(era->sectors_per_block)) { 1459 ti->error = "Invalid block size"; 1460 era_destroy(era); 1461 return -EINVAL; 1462 } 1463 if (era->sectors_per_block & (era->sectors_per_block - 1)) 1464 era->sectors_per_block_shift = -1; 1465 else 1466 era->sectors_per_block_shift = __ffs(era->sectors_per_block); 1467 1468 md = metadata_open(era->metadata_dev->bdev, era->sectors_per_block, true); 1469 if (IS_ERR(md)) { 1470 ti->error = "Error reading metadata"; 1471 era_destroy(era); 1472 return PTR_ERR(md); 1473 } 1474 era->md = md; 1475 1476 era->nr_blocks = calc_nr_blocks(era); 1477 1478 r = metadata_resize(era->md, &era->nr_blocks); 1479 if (r) { 1480 ti->error = "couldn't resize metadata"; 1481 era_destroy(era); 1482 return -ENOMEM; 1483 } 1484 1485 era->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM); 1486 if (!era->wq) { 1487 ti->error = "could not create workqueue for metadata object"; 1488 era_destroy(era); 1489 return -ENOMEM; 1490 } 1491 INIT_WORK(&era->worker, do_work); 1492 1493 spin_lock_init(&era->deferred_lock); 1494 bio_list_init(&era->deferred_bios); 1495 1496 spin_lock_init(&era->rpc_lock); 1497 INIT_LIST_HEAD(&era->rpc_calls); 1498 1499 ti->private = era; 1500 ti->num_flush_bios = 1; 1501 ti->flush_supported = true; 1502 1503 ti->num_discard_bios = 1; 1504 1505 return 0; 1506 } 1507 1508 static void era_dtr(struct dm_target *ti) 1509 { 1510 era_destroy(ti->private); 1511 } 1512 1513 static int era_map(struct dm_target *ti, struct bio *bio) 1514 { 1515 struct era *era = ti->private; 1516 dm_block_t block = get_block(era, bio); 1517 1518 /* 1519 * All bios get remapped to the origin device. We do this now, but 1520 * it may not get issued until later. Depending on whether the 1521 * block is marked in this era. 1522 */ 1523 remap_to_origin(era, bio); 1524 1525 /* 1526 * REQ_PREFLUSH bios carry no data, so we're not interested in them. 1527 */ 1528 if (!(bio->bi_opf & REQ_PREFLUSH) && 1529 (bio_data_dir(bio) == WRITE) && 1530 !metadata_current_marked(era->md, block)) { 1531 defer_bio(era, bio); 1532 return DM_MAPIO_SUBMITTED; 1533 } 1534 1535 return DM_MAPIO_REMAPPED; 1536 } 1537 1538 static void era_postsuspend(struct dm_target *ti) 1539 { 1540 int r; 1541 struct era *era = ti->private; 1542 1543 r = in_worker0(era, metadata_era_archive); 1544 if (r) { 1545 DMERR("%s: couldn't archive current era", __func__); 1546 /* FIXME: fail mode */ 1547 } 1548 1549 stop_worker(era); 1550 } 1551 1552 static int era_preresume(struct dm_target *ti) 1553 { 1554 int r; 1555 struct era *era = ti->private; 1556 dm_block_t new_size = calc_nr_blocks(era); 1557 1558 if (era->nr_blocks != new_size) { 1559 r = in_worker1(era, metadata_resize, &new_size); 1560 if (r) 1561 return r; 1562 1563 era->nr_blocks = new_size; 1564 } 1565 1566 start_worker(era); 1567 1568 r = in_worker0(era, metadata_new_era); 1569 if (r) { 1570 DMERR("%s: metadata_era_rollover failed", __func__); 1571 return r; 1572 } 1573 1574 return 0; 1575 } 1576 1577 /* 1578 * Status format: 1579 * 1580 * <metadata block size> <#used metadata blocks>/<#total metadata blocks> 1581 * <current era> <held metadata root | '-'> 1582 */ 1583 static void era_status(struct dm_target *ti, status_type_t type, 1584 unsigned status_flags, char *result, unsigned maxlen) 1585 { 1586 int r; 1587 struct era *era = ti->private; 1588 ssize_t sz = 0; 1589 struct metadata_stats stats; 1590 char buf[BDEVNAME_SIZE]; 1591 1592 switch (type) { 1593 case STATUSTYPE_INFO: 1594 r = in_worker1(era, metadata_get_stats, &stats); 1595 if (r) 1596 goto err; 1597 1598 DMEMIT("%u %llu/%llu %u", 1599 (unsigned) (DM_ERA_METADATA_BLOCK_SIZE >> SECTOR_SHIFT), 1600 (unsigned long long) stats.used, 1601 (unsigned long long) stats.total, 1602 (unsigned) stats.era); 1603 1604 if (stats.snap != SUPERBLOCK_LOCATION) 1605 DMEMIT(" %llu", stats.snap); 1606 else 1607 DMEMIT(" -"); 1608 break; 1609 1610 case STATUSTYPE_TABLE: 1611 format_dev_t(buf, era->metadata_dev->bdev->bd_dev); 1612 DMEMIT("%s ", buf); 1613 format_dev_t(buf, era->origin_dev->bdev->bd_dev); 1614 DMEMIT("%s %u", buf, era->sectors_per_block); 1615 break; 1616 } 1617 1618 return; 1619 1620 err: 1621 DMEMIT("Error"); 1622 } 1623 1624 static int era_message(struct dm_target *ti, unsigned argc, char **argv, 1625 char *result, unsigned maxlen) 1626 { 1627 struct era *era = ti->private; 1628 1629 if (argc != 1) { 1630 DMERR("incorrect number of message arguments"); 1631 return -EINVAL; 1632 } 1633 1634 if (!strcasecmp(argv[0], "checkpoint")) 1635 return in_worker0(era, metadata_checkpoint); 1636 1637 if (!strcasecmp(argv[0], "take_metadata_snap")) 1638 return in_worker0(era, metadata_take_snap); 1639 1640 if (!strcasecmp(argv[0], "drop_metadata_snap")) 1641 return in_worker0(era, metadata_drop_snap); 1642 1643 DMERR("unsupported message '%s'", argv[0]); 1644 return -EINVAL; 1645 } 1646 1647 static sector_t get_dev_size(struct dm_dev *dev) 1648 { 1649 return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT; 1650 } 1651 1652 static int era_iterate_devices(struct dm_target *ti, 1653 iterate_devices_callout_fn fn, void *data) 1654 { 1655 struct era *era = ti->private; 1656 return fn(ti, era->origin_dev, 0, get_dev_size(era->origin_dev), data); 1657 } 1658 1659 static void era_io_hints(struct dm_target *ti, struct queue_limits *limits) 1660 { 1661 struct era *era = ti->private; 1662 uint64_t io_opt_sectors = limits->io_opt >> SECTOR_SHIFT; 1663 1664 /* 1665 * If the system-determined stacked limits are compatible with the 1666 * era device's blocksize (io_opt is a factor) do not override them. 1667 */ 1668 if (io_opt_sectors < era->sectors_per_block || 1669 do_div(io_opt_sectors, era->sectors_per_block)) { 1670 blk_limits_io_min(limits, 0); 1671 blk_limits_io_opt(limits, era->sectors_per_block << SECTOR_SHIFT); 1672 } 1673 } 1674 1675 /*----------------------------------------------------------------*/ 1676 1677 static struct target_type era_target = { 1678 .name = "era", 1679 .version = {1, 0, 0}, 1680 .module = THIS_MODULE, 1681 .ctr = era_ctr, 1682 .dtr = era_dtr, 1683 .map = era_map, 1684 .postsuspend = era_postsuspend, 1685 .preresume = era_preresume, 1686 .status = era_status, 1687 .message = era_message, 1688 .iterate_devices = era_iterate_devices, 1689 .io_hints = era_io_hints 1690 }; 1691 1692 static int __init dm_era_init(void) 1693 { 1694 int r; 1695 1696 r = dm_register_target(&era_target); 1697 if (r) { 1698 DMERR("era target registration failed: %d", r); 1699 return r; 1700 } 1701 1702 return 0; 1703 } 1704 1705 static void __exit dm_era_exit(void) 1706 { 1707 dm_unregister_target(&era_target); 1708 } 1709 1710 module_init(dm_era_init); 1711 module_exit(dm_era_exit); 1712 1713 MODULE_DESCRIPTION(DM_NAME " era target"); 1714 MODULE_AUTHOR("Joe Thornber <ejt@redhat.com>"); 1715 MODULE_LICENSE("GPL"); 1716