1 #include "dm.h" 2 #include "persistent-data/dm-transaction-manager.h" 3 #include "persistent-data/dm-bitset.h" 4 #include "persistent-data/dm-space-map.h" 5 6 #include <linux/dm-io.h> 7 #include <linux/dm-kcopyd.h> 8 #include <linux/init.h> 9 #include <linux/mempool.h> 10 #include <linux/module.h> 11 #include <linux/slab.h> 12 #include <linux/vmalloc.h> 13 14 #define DM_MSG_PREFIX "era" 15 16 #define SUPERBLOCK_LOCATION 0 17 #define SUPERBLOCK_MAGIC 2126579579 18 #define SUPERBLOCK_CSUM_XOR 146538381 19 #define MIN_ERA_VERSION 1 20 #define MAX_ERA_VERSION 1 21 #define INVALID_WRITESET_ROOT SUPERBLOCK_LOCATION 22 #define MIN_BLOCK_SIZE 8 23 24 /*---------------------------------------------------------------- 25 * Writeset 26 *--------------------------------------------------------------*/ 27 struct writeset_metadata { 28 uint32_t nr_bits; 29 dm_block_t root; 30 }; 31 32 struct writeset { 33 struct writeset_metadata md; 34 35 /* 36 * An in core copy of the bits to save constantly doing look ups on 37 * disk. 38 */ 39 unsigned long *bits; 40 }; 41 42 /* 43 * This does not free off the on disk bitset as this will normally be done 44 * after digesting into the era array. 45 */ 46 static void writeset_free(struct writeset *ws) 47 { 48 vfree(ws->bits); 49 } 50 51 static int setup_on_disk_bitset(struct dm_disk_bitset *info, 52 unsigned nr_bits, dm_block_t *root) 53 { 54 int r; 55 56 r = dm_bitset_empty(info, root); 57 if (r) 58 return r; 59 60 return dm_bitset_resize(info, *root, 0, nr_bits, false, root); 61 } 62 63 static size_t bitset_size(unsigned nr_bits) 64 { 65 return sizeof(unsigned long) * dm_div_up(nr_bits, BITS_PER_LONG); 66 } 67 68 /* 69 * Allocates memory for the in core bitset. 70 */ 71 static int writeset_alloc(struct writeset *ws, dm_block_t nr_blocks) 72 { 73 ws->md.nr_bits = nr_blocks; 74 ws->md.root = INVALID_WRITESET_ROOT; 75 ws->bits = vzalloc(bitset_size(nr_blocks)); 76 if (!ws->bits) { 77 DMERR("%s: couldn't allocate in memory bitset", __func__); 78 return -ENOMEM; 79 } 80 81 return 0; 82 } 83 84 /* 85 * Wipes the in-core bitset, and creates a new on disk bitset. 86 */ 87 static int writeset_init(struct dm_disk_bitset *info, struct writeset *ws) 88 { 89 int r; 90 91 memset(ws->bits, 0, bitset_size(ws->md.nr_bits)); 92 93 r = setup_on_disk_bitset(info, ws->md.nr_bits, &ws->md.root); 94 if (r) { 95 DMERR("%s: setup_on_disk_bitset failed", __func__); 96 return r; 97 } 98 99 return 0; 100 } 101 102 static bool writeset_marked(struct writeset *ws, dm_block_t block) 103 { 104 return test_bit(block, ws->bits); 105 } 106 107 static int writeset_marked_on_disk(struct dm_disk_bitset *info, 108 struct writeset_metadata *m, dm_block_t block, 109 bool *result) 110 { 111 dm_block_t old = m->root; 112 113 /* 114 * The bitset was flushed when it was archived, so we know there'll 115 * be no change to the root. 116 */ 117 int r = dm_bitset_test_bit(info, m->root, block, &m->root, result); 118 if (r) { 119 DMERR("%s: dm_bitset_test_bit failed", __func__); 120 return r; 121 } 122 123 BUG_ON(m->root != old); 124 125 return r; 126 } 127 128 /* 129 * Returns < 0 on error, 0 if the bit wasn't previously set, 1 if it was. 130 */ 131 static int writeset_test_and_set(struct dm_disk_bitset *info, 132 struct writeset *ws, uint32_t block) 133 { 134 int r; 135 136 if (!test_and_set_bit(block, ws->bits)) { 137 r = dm_bitset_set_bit(info, ws->md.root, block, &ws->md.root); 138 if (r) { 139 /* FIXME: fail mode */ 140 return r; 141 } 142 143 return 0; 144 } 145 146 return 1; 147 } 148 149 /*---------------------------------------------------------------- 150 * On disk metadata layout 151 *--------------------------------------------------------------*/ 152 #define SPACE_MAP_ROOT_SIZE 128 153 #define UUID_LEN 16 154 155 struct writeset_disk { 156 __le32 nr_bits; 157 __le64 root; 158 } __packed; 159 160 struct superblock_disk { 161 __le32 csum; 162 __le32 flags; 163 __le64 blocknr; 164 165 __u8 uuid[UUID_LEN]; 166 __le64 magic; 167 __le32 version; 168 169 __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE]; 170 171 __le32 data_block_size; 172 __le32 metadata_block_size; 173 __le32 nr_blocks; 174 175 __le32 current_era; 176 struct writeset_disk current_writeset; 177 178 /* 179 * Only these two fields are valid within the metadata snapshot. 180 */ 181 __le64 writeset_tree_root; 182 __le64 era_array_root; 183 184 __le64 metadata_snap; 185 } __packed; 186 187 /*---------------------------------------------------------------- 188 * Superblock validation 189 *--------------------------------------------------------------*/ 190 static void sb_prepare_for_write(struct dm_block_validator *v, 191 struct dm_block *b, 192 size_t sb_block_size) 193 { 194 struct superblock_disk *disk = dm_block_data(b); 195 196 disk->blocknr = cpu_to_le64(dm_block_location(b)); 197 disk->csum = cpu_to_le32(dm_bm_checksum(&disk->flags, 198 sb_block_size - sizeof(__le32), 199 SUPERBLOCK_CSUM_XOR)); 200 } 201 202 static int check_metadata_version(struct superblock_disk *disk) 203 { 204 uint32_t metadata_version = le32_to_cpu(disk->version); 205 if (metadata_version < MIN_ERA_VERSION || metadata_version > MAX_ERA_VERSION) { 206 DMERR("Era metadata version %u found, but only versions between %u and %u supported.", 207 metadata_version, MIN_ERA_VERSION, MAX_ERA_VERSION); 208 return -EINVAL; 209 } 210 211 return 0; 212 } 213 214 static int sb_check(struct dm_block_validator *v, 215 struct dm_block *b, 216 size_t sb_block_size) 217 { 218 struct superblock_disk *disk = dm_block_data(b); 219 __le32 csum_le; 220 221 if (dm_block_location(b) != le64_to_cpu(disk->blocknr)) { 222 DMERR("sb_check failed: blocknr %llu: wanted %llu", 223 le64_to_cpu(disk->blocknr), 224 (unsigned long long)dm_block_location(b)); 225 return -ENOTBLK; 226 } 227 228 if (le64_to_cpu(disk->magic) != SUPERBLOCK_MAGIC) { 229 DMERR("sb_check failed: magic %llu: wanted %llu", 230 le64_to_cpu(disk->magic), 231 (unsigned long long) SUPERBLOCK_MAGIC); 232 return -EILSEQ; 233 } 234 235 csum_le = cpu_to_le32(dm_bm_checksum(&disk->flags, 236 sb_block_size - sizeof(__le32), 237 SUPERBLOCK_CSUM_XOR)); 238 if (csum_le != disk->csum) { 239 DMERR("sb_check failed: csum %u: wanted %u", 240 le32_to_cpu(csum_le), le32_to_cpu(disk->csum)); 241 return -EILSEQ; 242 } 243 244 return check_metadata_version(disk); 245 } 246 247 static struct dm_block_validator sb_validator = { 248 .name = "superblock", 249 .prepare_for_write = sb_prepare_for_write, 250 .check = sb_check 251 }; 252 253 /*---------------------------------------------------------------- 254 * Low level metadata handling 255 *--------------------------------------------------------------*/ 256 #define DM_ERA_METADATA_BLOCK_SIZE 4096 257 #define DM_ERA_METADATA_CACHE_SIZE 64 258 #define ERA_MAX_CONCURRENT_LOCKS 5 259 260 struct era_metadata { 261 struct block_device *bdev; 262 struct dm_block_manager *bm; 263 struct dm_space_map *sm; 264 struct dm_transaction_manager *tm; 265 266 dm_block_t block_size; 267 uint32_t nr_blocks; 268 269 uint32_t current_era; 270 271 /* 272 * We preallocate 2 writesets. When an era rolls over we 273 * switch between them. This means the allocation is done at 274 * preresume time, rather than on the io path. 275 */ 276 struct writeset writesets[2]; 277 struct writeset *current_writeset; 278 279 dm_block_t writeset_tree_root; 280 dm_block_t era_array_root; 281 282 struct dm_disk_bitset bitset_info; 283 struct dm_btree_info writeset_tree_info; 284 struct dm_array_info era_array_info; 285 286 dm_block_t metadata_snap; 287 288 /* 289 * A flag that is set whenever a writeset has been archived. 290 */ 291 bool archived_writesets; 292 293 /* 294 * Reading the space map root can fail, so we read it into this 295 * buffer before the superblock is locked and updated. 296 */ 297 __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE]; 298 }; 299 300 static int superblock_read_lock(struct era_metadata *md, 301 struct dm_block **sblock) 302 { 303 return dm_bm_read_lock(md->bm, SUPERBLOCK_LOCATION, 304 &sb_validator, sblock); 305 } 306 307 static int superblock_lock_zero(struct era_metadata *md, 308 struct dm_block **sblock) 309 { 310 return dm_bm_write_lock_zero(md->bm, SUPERBLOCK_LOCATION, 311 &sb_validator, sblock); 312 } 313 314 static int superblock_lock(struct era_metadata *md, 315 struct dm_block **sblock) 316 { 317 return dm_bm_write_lock(md->bm, SUPERBLOCK_LOCATION, 318 &sb_validator, sblock); 319 } 320 321 /* FIXME: duplication with cache and thin */ 322 static int superblock_all_zeroes(struct dm_block_manager *bm, bool *result) 323 { 324 int r; 325 unsigned i; 326 struct dm_block *b; 327 __le64 *data_le, zero = cpu_to_le64(0); 328 unsigned sb_block_size = dm_bm_block_size(bm) / sizeof(__le64); 329 330 /* 331 * We can't use a validator here - it may be all zeroes. 332 */ 333 r = dm_bm_read_lock(bm, SUPERBLOCK_LOCATION, NULL, &b); 334 if (r) 335 return r; 336 337 data_le = dm_block_data(b); 338 *result = true; 339 for (i = 0; i < sb_block_size; i++) { 340 if (data_le[i] != zero) { 341 *result = false; 342 break; 343 } 344 } 345 346 return dm_bm_unlock(b); 347 } 348 349 /*----------------------------------------------------------------*/ 350 351 static void ws_pack(const struct writeset_metadata *core, struct writeset_disk *disk) 352 { 353 disk->nr_bits = cpu_to_le32(core->nr_bits); 354 disk->root = cpu_to_le64(core->root); 355 } 356 357 static void ws_unpack(const struct writeset_disk *disk, struct writeset_metadata *core) 358 { 359 core->nr_bits = le32_to_cpu(disk->nr_bits); 360 core->root = le64_to_cpu(disk->root); 361 } 362 363 static void ws_inc(void *context, const void *value) 364 { 365 struct era_metadata *md = context; 366 struct writeset_disk ws_d; 367 dm_block_t b; 368 369 memcpy(&ws_d, value, sizeof(ws_d)); 370 b = le64_to_cpu(ws_d.root); 371 372 dm_tm_inc(md->tm, b); 373 } 374 375 static void ws_dec(void *context, const void *value) 376 { 377 struct era_metadata *md = context; 378 struct writeset_disk ws_d; 379 dm_block_t b; 380 381 memcpy(&ws_d, value, sizeof(ws_d)); 382 b = le64_to_cpu(ws_d.root); 383 384 dm_bitset_del(&md->bitset_info, b); 385 } 386 387 static int ws_eq(void *context, const void *value1, const void *value2) 388 { 389 return !memcmp(value1, value2, sizeof(struct writeset_metadata)); 390 } 391 392 /*----------------------------------------------------------------*/ 393 394 static void setup_writeset_tree_info(struct era_metadata *md) 395 { 396 struct dm_btree_value_type *vt = &md->writeset_tree_info.value_type; 397 md->writeset_tree_info.tm = md->tm; 398 md->writeset_tree_info.levels = 1; 399 vt->context = md; 400 vt->size = sizeof(struct writeset_disk); 401 vt->inc = ws_inc; 402 vt->dec = ws_dec; 403 vt->equal = ws_eq; 404 } 405 406 static void setup_era_array_info(struct era_metadata *md) 407 408 { 409 struct dm_btree_value_type vt; 410 vt.context = NULL; 411 vt.size = sizeof(__le32); 412 vt.inc = NULL; 413 vt.dec = NULL; 414 vt.equal = NULL; 415 416 dm_array_info_init(&md->era_array_info, md->tm, &vt); 417 } 418 419 static void setup_infos(struct era_metadata *md) 420 { 421 dm_disk_bitset_init(md->tm, &md->bitset_info); 422 setup_writeset_tree_info(md); 423 setup_era_array_info(md); 424 } 425 426 /*----------------------------------------------------------------*/ 427 428 static int create_fresh_metadata(struct era_metadata *md) 429 { 430 int r; 431 432 r = dm_tm_create_with_sm(md->bm, SUPERBLOCK_LOCATION, 433 &md->tm, &md->sm); 434 if (r < 0) { 435 DMERR("dm_tm_create_with_sm failed"); 436 return r; 437 } 438 439 setup_infos(md); 440 441 r = dm_btree_empty(&md->writeset_tree_info, &md->writeset_tree_root); 442 if (r) { 443 DMERR("couldn't create new writeset tree"); 444 goto bad; 445 } 446 447 r = dm_array_empty(&md->era_array_info, &md->era_array_root); 448 if (r) { 449 DMERR("couldn't create era array"); 450 goto bad; 451 } 452 453 return 0; 454 455 bad: 456 dm_sm_destroy(md->sm); 457 dm_tm_destroy(md->tm); 458 459 return r; 460 } 461 462 static int save_sm_root(struct era_metadata *md) 463 { 464 int r; 465 size_t metadata_len; 466 467 r = dm_sm_root_size(md->sm, &metadata_len); 468 if (r < 0) 469 return r; 470 471 return dm_sm_copy_root(md->sm, &md->metadata_space_map_root, 472 metadata_len); 473 } 474 475 static void copy_sm_root(struct era_metadata *md, struct superblock_disk *disk) 476 { 477 memcpy(&disk->metadata_space_map_root, 478 &md->metadata_space_map_root, 479 sizeof(md->metadata_space_map_root)); 480 } 481 482 /* 483 * Writes a superblock, including the static fields that don't get updated 484 * with every commit (possible optimisation here). 'md' should be fully 485 * constructed when this is called. 486 */ 487 static void prepare_superblock(struct era_metadata *md, struct superblock_disk *disk) 488 { 489 disk->magic = cpu_to_le64(SUPERBLOCK_MAGIC); 490 disk->flags = cpu_to_le32(0ul); 491 492 /* FIXME: can't keep blanking the uuid (uuid is currently unused though) */ 493 memset(disk->uuid, 0, sizeof(disk->uuid)); 494 disk->version = cpu_to_le32(MAX_ERA_VERSION); 495 496 copy_sm_root(md, disk); 497 498 disk->data_block_size = cpu_to_le32(md->block_size); 499 disk->metadata_block_size = cpu_to_le32(DM_ERA_METADATA_BLOCK_SIZE >> SECTOR_SHIFT); 500 disk->nr_blocks = cpu_to_le32(md->nr_blocks); 501 disk->current_era = cpu_to_le32(md->current_era); 502 503 ws_pack(&md->current_writeset->md, &disk->current_writeset); 504 disk->writeset_tree_root = cpu_to_le64(md->writeset_tree_root); 505 disk->era_array_root = cpu_to_le64(md->era_array_root); 506 disk->metadata_snap = cpu_to_le64(md->metadata_snap); 507 } 508 509 static int write_superblock(struct era_metadata *md) 510 { 511 int r; 512 struct dm_block *sblock; 513 struct superblock_disk *disk; 514 515 r = save_sm_root(md); 516 if (r) { 517 DMERR("%s: save_sm_root failed", __func__); 518 return r; 519 } 520 521 r = superblock_lock_zero(md, &sblock); 522 if (r) 523 return r; 524 525 disk = dm_block_data(sblock); 526 prepare_superblock(md, disk); 527 528 return dm_tm_commit(md->tm, sblock); 529 } 530 531 /* 532 * Assumes block_size and the infos are set. 533 */ 534 static int format_metadata(struct era_metadata *md) 535 { 536 int r; 537 538 r = create_fresh_metadata(md); 539 if (r) 540 return r; 541 542 r = write_superblock(md); 543 if (r) { 544 dm_sm_destroy(md->sm); 545 dm_tm_destroy(md->tm); 546 return r; 547 } 548 549 return 0; 550 } 551 552 static int open_metadata(struct era_metadata *md) 553 { 554 int r; 555 struct dm_block *sblock; 556 struct superblock_disk *disk; 557 558 r = superblock_read_lock(md, &sblock); 559 if (r) { 560 DMERR("couldn't read_lock superblock"); 561 return r; 562 } 563 564 disk = dm_block_data(sblock); 565 r = dm_tm_open_with_sm(md->bm, SUPERBLOCK_LOCATION, 566 disk->metadata_space_map_root, 567 sizeof(disk->metadata_space_map_root), 568 &md->tm, &md->sm); 569 if (r) { 570 DMERR("dm_tm_open_with_sm failed"); 571 goto bad; 572 } 573 574 setup_infos(md); 575 576 md->block_size = le32_to_cpu(disk->data_block_size); 577 md->nr_blocks = le32_to_cpu(disk->nr_blocks); 578 md->current_era = le32_to_cpu(disk->current_era); 579 580 md->writeset_tree_root = le64_to_cpu(disk->writeset_tree_root); 581 md->era_array_root = le64_to_cpu(disk->era_array_root); 582 md->metadata_snap = le64_to_cpu(disk->metadata_snap); 583 md->archived_writesets = true; 584 585 return dm_bm_unlock(sblock); 586 587 bad: 588 dm_bm_unlock(sblock); 589 return r; 590 } 591 592 static int open_or_format_metadata(struct era_metadata *md, 593 bool may_format) 594 { 595 int r; 596 bool unformatted = false; 597 598 r = superblock_all_zeroes(md->bm, &unformatted); 599 if (r) 600 return r; 601 602 if (unformatted) 603 return may_format ? format_metadata(md) : -EPERM; 604 605 return open_metadata(md); 606 } 607 608 static int create_persistent_data_objects(struct era_metadata *md, 609 bool may_format) 610 { 611 int r; 612 613 md->bm = dm_block_manager_create(md->bdev, DM_ERA_METADATA_BLOCK_SIZE, 614 DM_ERA_METADATA_CACHE_SIZE, 615 ERA_MAX_CONCURRENT_LOCKS); 616 if (IS_ERR(md->bm)) { 617 DMERR("could not create block manager"); 618 return PTR_ERR(md->bm); 619 } 620 621 r = open_or_format_metadata(md, may_format); 622 if (r) 623 dm_block_manager_destroy(md->bm); 624 625 return r; 626 } 627 628 static void destroy_persistent_data_objects(struct era_metadata *md) 629 { 630 dm_sm_destroy(md->sm); 631 dm_tm_destroy(md->tm); 632 dm_block_manager_destroy(md->bm); 633 } 634 635 /* 636 * This waits until all era_map threads have picked up the new filter. 637 */ 638 static void swap_writeset(struct era_metadata *md, struct writeset *new_writeset) 639 { 640 rcu_assign_pointer(md->current_writeset, new_writeset); 641 synchronize_rcu(); 642 } 643 644 /*---------------------------------------------------------------- 645 * Writesets get 'digested' into the main era array. 646 * 647 * We're using a coroutine here so the worker thread can do the digestion, 648 * thus avoiding synchronisation of the metadata. Digesting a whole 649 * writeset in one go would cause too much latency. 650 *--------------------------------------------------------------*/ 651 struct digest { 652 uint32_t era; 653 unsigned nr_bits, current_bit; 654 struct writeset_metadata writeset; 655 __le32 value; 656 struct dm_disk_bitset info; 657 658 int (*step)(struct era_metadata *, struct digest *); 659 }; 660 661 static int metadata_digest_lookup_writeset(struct era_metadata *md, 662 struct digest *d); 663 664 static int metadata_digest_remove_writeset(struct era_metadata *md, 665 struct digest *d) 666 { 667 int r; 668 uint64_t key = d->era; 669 670 r = dm_btree_remove(&md->writeset_tree_info, md->writeset_tree_root, 671 &key, &md->writeset_tree_root); 672 if (r) { 673 DMERR("%s: dm_btree_remove failed", __func__); 674 return r; 675 } 676 677 d->step = metadata_digest_lookup_writeset; 678 return 0; 679 } 680 681 #define INSERTS_PER_STEP 100 682 683 static int metadata_digest_transcribe_writeset(struct era_metadata *md, 684 struct digest *d) 685 { 686 int r; 687 bool marked; 688 unsigned b, e = min(d->current_bit + INSERTS_PER_STEP, d->nr_bits); 689 690 for (b = d->current_bit; b < e; b++) { 691 r = writeset_marked_on_disk(&d->info, &d->writeset, b, &marked); 692 if (r) { 693 DMERR("%s: writeset_marked_on_disk failed", __func__); 694 return r; 695 } 696 697 if (!marked) 698 continue; 699 700 __dm_bless_for_disk(&d->value); 701 r = dm_array_set_value(&md->era_array_info, md->era_array_root, 702 b, &d->value, &md->era_array_root); 703 if (r) { 704 DMERR("%s: dm_array_set_value failed", __func__); 705 return r; 706 } 707 } 708 709 if (b == d->nr_bits) 710 d->step = metadata_digest_remove_writeset; 711 else 712 d->current_bit = b; 713 714 return 0; 715 } 716 717 static int metadata_digest_lookup_writeset(struct era_metadata *md, 718 struct digest *d) 719 { 720 int r; 721 uint64_t key; 722 struct writeset_disk disk; 723 724 r = dm_btree_find_lowest_key(&md->writeset_tree_info, 725 md->writeset_tree_root, &key); 726 if (r < 0) 727 return r; 728 729 d->era = key; 730 731 r = dm_btree_lookup(&md->writeset_tree_info, 732 md->writeset_tree_root, &key, &disk); 733 if (r) { 734 if (r == -ENODATA) { 735 d->step = NULL; 736 return 0; 737 } 738 739 DMERR("%s: dm_btree_lookup failed", __func__); 740 return r; 741 } 742 743 ws_unpack(&disk, &d->writeset); 744 d->value = cpu_to_le32(key); 745 746 d->nr_bits = min(d->writeset.nr_bits, md->nr_blocks); 747 d->current_bit = 0; 748 d->step = metadata_digest_transcribe_writeset; 749 750 return 0; 751 } 752 753 static int metadata_digest_start(struct era_metadata *md, struct digest *d) 754 { 755 if (d->step) 756 return 0; 757 758 memset(d, 0, sizeof(*d)); 759 760 /* 761 * We initialise another bitset info to avoid any caching side 762 * effects with the previous one. 763 */ 764 dm_disk_bitset_init(md->tm, &d->info); 765 d->step = metadata_digest_lookup_writeset; 766 767 return 0; 768 } 769 770 /*---------------------------------------------------------------- 771 * High level metadata interface. Target methods should use these, and not 772 * the lower level ones. 773 *--------------------------------------------------------------*/ 774 static struct era_metadata *metadata_open(struct block_device *bdev, 775 sector_t block_size, 776 bool may_format) 777 { 778 int r; 779 struct era_metadata *md = kzalloc(sizeof(*md), GFP_KERNEL); 780 781 if (!md) 782 return NULL; 783 784 md->bdev = bdev; 785 md->block_size = block_size; 786 787 md->writesets[0].md.root = INVALID_WRITESET_ROOT; 788 md->writesets[1].md.root = INVALID_WRITESET_ROOT; 789 md->current_writeset = &md->writesets[0]; 790 791 r = create_persistent_data_objects(md, may_format); 792 if (r) { 793 kfree(md); 794 return ERR_PTR(r); 795 } 796 797 return md; 798 } 799 800 static void metadata_close(struct era_metadata *md) 801 { 802 destroy_persistent_data_objects(md); 803 kfree(md); 804 } 805 806 static bool valid_nr_blocks(dm_block_t n) 807 { 808 /* 809 * dm_bitset restricts us to 2^32. test_bit & co. restrict us 810 * further to 2^31 - 1 811 */ 812 return n < (1ull << 31); 813 } 814 815 static int metadata_resize(struct era_metadata *md, void *arg) 816 { 817 int r; 818 dm_block_t *new_size = arg; 819 __le32 value; 820 821 if (!valid_nr_blocks(*new_size)) { 822 DMERR("Invalid number of origin blocks %llu", 823 (unsigned long long) *new_size); 824 return -EINVAL; 825 } 826 827 writeset_free(&md->writesets[0]); 828 writeset_free(&md->writesets[1]); 829 830 r = writeset_alloc(&md->writesets[0], *new_size); 831 if (r) { 832 DMERR("%s: writeset_alloc failed for writeset 0", __func__); 833 return r; 834 } 835 836 r = writeset_alloc(&md->writesets[1], *new_size); 837 if (r) { 838 DMERR("%s: writeset_alloc failed for writeset 1", __func__); 839 return r; 840 } 841 842 value = cpu_to_le32(0u); 843 __dm_bless_for_disk(&value); 844 r = dm_array_resize(&md->era_array_info, md->era_array_root, 845 md->nr_blocks, *new_size, 846 &value, &md->era_array_root); 847 if (r) { 848 DMERR("%s: dm_array_resize failed", __func__); 849 return r; 850 } 851 852 md->nr_blocks = *new_size; 853 return 0; 854 } 855 856 static int metadata_era_archive(struct era_metadata *md) 857 { 858 int r; 859 uint64_t keys[1]; 860 struct writeset_disk value; 861 862 r = dm_bitset_flush(&md->bitset_info, md->current_writeset->md.root, 863 &md->current_writeset->md.root); 864 if (r) { 865 DMERR("%s: dm_bitset_flush failed", __func__); 866 return r; 867 } 868 869 ws_pack(&md->current_writeset->md, &value); 870 md->current_writeset->md.root = INVALID_WRITESET_ROOT; 871 872 keys[0] = md->current_era; 873 __dm_bless_for_disk(&value); 874 r = dm_btree_insert(&md->writeset_tree_info, md->writeset_tree_root, 875 keys, &value, &md->writeset_tree_root); 876 if (r) { 877 DMERR("%s: couldn't insert writeset into btree", __func__); 878 /* FIXME: fail mode */ 879 return r; 880 } 881 882 md->archived_writesets = true; 883 884 return 0; 885 } 886 887 static struct writeset *next_writeset(struct era_metadata *md) 888 { 889 return (md->current_writeset == &md->writesets[0]) ? 890 &md->writesets[1] : &md->writesets[0]; 891 } 892 893 static int metadata_new_era(struct era_metadata *md) 894 { 895 int r; 896 struct writeset *new_writeset = next_writeset(md); 897 898 r = writeset_init(&md->bitset_info, new_writeset); 899 if (r) { 900 DMERR("%s: writeset_init failed", __func__); 901 return r; 902 } 903 904 swap_writeset(md, new_writeset); 905 md->current_era++; 906 907 return 0; 908 } 909 910 static int metadata_era_rollover(struct era_metadata *md) 911 { 912 int r; 913 914 if (md->current_writeset->md.root != INVALID_WRITESET_ROOT) { 915 r = metadata_era_archive(md); 916 if (r) { 917 DMERR("%s: metadata_archive_era failed", __func__); 918 /* FIXME: fail mode? */ 919 return r; 920 } 921 } 922 923 r = metadata_new_era(md); 924 if (r) { 925 DMERR("%s: new era failed", __func__); 926 /* FIXME: fail mode */ 927 return r; 928 } 929 930 return 0; 931 } 932 933 static bool metadata_current_marked(struct era_metadata *md, dm_block_t block) 934 { 935 bool r; 936 struct writeset *ws; 937 938 rcu_read_lock(); 939 ws = rcu_dereference(md->current_writeset); 940 r = writeset_marked(ws, block); 941 rcu_read_unlock(); 942 943 return r; 944 } 945 946 static int metadata_commit(struct era_metadata *md) 947 { 948 int r; 949 struct dm_block *sblock; 950 951 if (md->current_writeset->md.root != SUPERBLOCK_LOCATION) { 952 r = dm_bitset_flush(&md->bitset_info, md->current_writeset->md.root, 953 &md->current_writeset->md.root); 954 if (r) { 955 DMERR("%s: bitset flush failed", __func__); 956 return r; 957 } 958 } 959 960 r = save_sm_root(md); 961 if (r) { 962 DMERR("%s: save_sm_root failed", __func__); 963 return r; 964 } 965 966 r = dm_tm_pre_commit(md->tm); 967 if (r) { 968 DMERR("%s: pre commit failed", __func__); 969 return r; 970 } 971 972 r = superblock_lock(md, &sblock); 973 if (r) { 974 DMERR("%s: superblock lock failed", __func__); 975 return r; 976 } 977 978 prepare_superblock(md, dm_block_data(sblock)); 979 980 return dm_tm_commit(md->tm, sblock); 981 } 982 983 static int metadata_checkpoint(struct era_metadata *md) 984 { 985 /* 986 * For now we just rollover, but later I want to put a check in to 987 * avoid this if the filter is still pretty fresh. 988 */ 989 return metadata_era_rollover(md); 990 } 991 992 /* 993 * Metadata snapshots allow userland to access era data. 994 */ 995 static int metadata_take_snap(struct era_metadata *md) 996 { 997 int r, inc; 998 struct dm_block *clone; 999 1000 if (md->metadata_snap != SUPERBLOCK_LOCATION) { 1001 DMERR("%s: metadata snapshot already exists", __func__); 1002 return -EINVAL; 1003 } 1004 1005 r = metadata_era_rollover(md); 1006 if (r) { 1007 DMERR("%s: era rollover failed", __func__); 1008 return r; 1009 } 1010 1011 r = metadata_commit(md); 1012 if (r) { 1013 DMERR("%s: pre commit failed", __func__); 1014 return r; 1015 } 1016 1017 r = dm_sm_inc_block(md->sm, SUPERBLOCK_LOCATION); 1018 if (r) { 1019 DMERR("%s: couldn't increment superblock", __func__); 1020 return r; 1021 } 1022 1023 r = dm_tm_shadow_block(md->tm, SUPERBLOCK_LOCATION, 1024 &sb_validator, &clone, &inc); 1025 if (r) { 1026 DMERR("%s: couldn't shadow superblock", __func__); 1027 dm_sm_dec_block(md->sm, SUPERBLOCK_LOCATION); 1028 return r; 1029 } 1030 BUG_ON(!inc); 1031 1032 r = dm_sm_inc_block(md->sm, md->writeset_tree_root); 1033 if (r) { 1034 DMERR("%s: couldn't inc writeset tree root", __func__); 1035 dm_tm_unlock(md->tm, clone); 1036 return r; 1037 } 1038 1039 r = dm_sm_inc_block(md->sm, md->era_array_root); 1040 if (r) { 1041 DMERR("%s: couldn't inc era tree root", __func__); 1042 dm_sm_dec_block(md->sm, md->writeset_tree_root); 1043 dm_tm_unlock(md->tm, clone); 1044 return r; 1045 } 1046 1047 md->metadata_snap = dm_block_location(clone); 1048 1049 r = dm_tm_unlock(md->tm, clone); 1050 if (r) { 1051 DMERR("%s: couldn't unlock clone", __func__); 1052 md->metadata_snap = SUPERBLOCK_LOCATION; 1053 return r; 1054 } 1055 1056 return 0; 1057 } 1058 1059 static int metadata_drop_snap(struct era_metadata *md) 1060 { 1061 int r; 1062 dm_block_t location; 1063 struct dm_block *clone; 1064 struct superblock_disk *disk; 1065 1066 if (md->metadata_snap == SUPERBLOCK_LOCATION) { 1067 DMERR("%s: no snap to drop", __func__); 1068 return -EINVAL; 1069 } 1070 1071 r = dm_tm_read_lock(md->tm, md->metadata_snap, &sb_validator, &clone); 1072 if (r) { 1073 DMERR("%s: couldn't read lock superblock clone", __func__); 1074 return r; 1075 } 1076 1077 /* 1078 * Whatever happens now we'll commit with no record of the metadata 1079 * snap. 1080 */ 1081 md->metadata_snap = SUPERBLOCK_LOCATION; 1082 1083 disk = dm_block_data(clone); 1084 r = dm_btree_del(&md->writeset_tree_info, 1085 le64_to_cpu(disk->writeset_tree_root)); 1086 if (r) { 1087 DMERR("%s: error deleting writeset tree clone", __func__); 1088 dm_tm_unlock(md->tm, clone); 1089 return r; 1090 } 1091 1092 r = dm_array_del(&md->era_array_info, le64_to_cpu(disk->era_array_root)); 1093 if (r) { 1094 DMERR("%s: error deleting era array clone", __func__); 1095 dm_tm_unlock(md->tm, clone); 1096 return r; 1097 } 1098 1099 location = dm_block_location(clone); 1100 dm_tm_unlock(md->tm, clone); 1101 1102 return dm_sm_dec_block(md->sm, location); 1103 } 1104 1105 struct metadata_stats { 1106 dm_block_t used; 1107 dm_block_t total; 1108 dm_block_t snap; 1109 uint32_t era; 1110 }; 1111 1112 static int metadata_get_stats(struct era_metadata *md, void *ptr) 1113 { 1114 int r; 1115 struct metadata_stats *s = ptr; 1116 dm_block_t nr_free, nr_total; 1117 1118 r = dm_sm_get_nr_free(md->sm, &nr_free); 1119 if (r) { 1120 DMERR("dm_sm_get_nr_free returned %d", r); 1121 return r; 1122 } 1123 1124 r = dm_sm_get_nr_blocks(md->sm, &nr_total); 1125 if (r) { 1126 DMERR("dm_pool_get_metadata_dev_size returned %d", r); 1127 return r; 1128 } 1129 1130 s->used = nr_total - nr_free; 1131 s->total = nr_total; 1132 s->snap = md->metadata_snap; 1133 s->era = md->current_era; 1134 1135 return 0; 1136 } 1137 1138 /*----------------------------------------------------------------*/ 1139 1140 struct era { 1141 struct dm_target *ti; 1142 struct dm_target_callbacks callbacks; 1143 1144 struct dm_dev *metadata_dev; 1145 struct dm_dev *origin_dev; 1146 1147 dm_block_t nr_blocks; 1148 uint32_t sectors_per_block; 1149 int sectors_per_block_shift; 1150 struct era_metadata *md; 1151 1152 struct workqueue_struct *wq; 1153 struct work_struct worker; 1154 1155 spinlock_t deferred_lock; 1156 struct bio_list deferred_bios; 1157 1158 spinlock_t rpc_lock; 1159 struct list_head rpc_calls; 1160 1161 struct digest digest; 1162 atomic_t suspended; 1163 }; 1164 1165 struct rpc { 1166 struct list_head list; 1167 1168 int (*fn0)(struct era_metadata *); 1169 int (*fn1)(struct era_metadata *, void *); 1170 void *arg; 1171 int result; 1172 1173 struct completion complete; 1174 }; 1175 1176 /*---------------------------------------------------------------- 1177 * Remapping. 1178 *---------------------------------------------------------------*/ 1179 static bool block_size_is_power_of_two(struct era *era) 1180 { 1181 return era->sectors_per_block_shift >= 0; 1182 } 1183 1184 static dm_block_t get_block(struct era *era, struct bio *bio) 1185 { 1186 sector_t block_nr = bio->bi_iter.bi_sector; 1187 1188 if (!block_size_is_power_of_two(era)) 1189 (void) sector_div(block_nr, era->sectors_per_block); 1190 else 1191 block_nr >>= era->sectors_per_block_shift; 1192 1193 return block_nr; 1194 } 1195 1196 static void remap_to_origin(struct era *era, struct bio *bio) 1197 { 1198 bio->bi_bdev = era->origin_dev->bdev; 1199 } 1200 1201 /*---------------------------------------------------------------- 1202 * Worker thread 1203 *--------------------------------------------------------------*/ 1204 static void wake_worker(struct era *era) 1205 { 1206 if (!atomic_read(&era->suspended)) 1207 queue_work(era->wq, &era->worker); 1208 } 1209 1210 static void process_old_eras(struct era *era) 1211 { 1212 int r; 1213 1214 if (!era->digest.step) 1215 return; 1216 1217 r = era->digest.step(era->md, &era->digest); 1218 if (r < 0) { 1219 DMERR("%s: digest step failed, stopping digestion", __func__); 1220 era->digest.step = NULL; 1221 1222 } else if (era->digest.step) 1223 wake_worker(era); 1224 } 1225 1226 static void process_deferred_bios(struct era *era) 1227 { 1228 int r; 1229 struct bio_list deferred_bios, marked_bios; 1230 struct bio *bio; 1231 bool commit_needed = false; 1232 bool failed = false; 1233 1234 bio_list_init(&deferred_bios); 1235 bio_list_init(&marked_bios); 1236 1237 spin_lock(&era->deferred_lock); 1238 bio_list_merge(&deferred_bios, &era->deferred_bios); 1239 bio_list_init(&era->deferred_bios); 1240 spin_unlock(&era->deferred_lock); 1241 1242 while ((bio = bio_list_pop(&deferred_bios))) { 1243 r = writeset_test_and_set(&era->md->bitset_info, 1244 era->md->current_writeset, 1245 get_block(era, bio)); 1246 if (r < 0) { 1247 /* 1248 * This is bad news, we need to rollback. 1249 * FIXME: finish. 1250 */ 1251 failed = true; 1252 1253 } else if (r == 0) 1254 commit_needed = true; 1255 1256 bio_list_add(&marked_bios, bio); 1257 } 1258 1259 if (commit_needed) { 1260 r = metadata_commit(era->md); 1261 if (r) 1262 failed = true; 1263 } 1264 1265 if (failed) 1266 while ((bio = bio_list_pop(&marked_bios))) 1267 bio_io_error(bio); 1268 else 1269 while ((bio = bio_list_pop(&marked_bios))) 1270 generic_make_request(bio); 1271 } 1272 1273 static void process_rpc_calls(struct era *era) 1274 { 1275 int r; 1276 bool need_commit = false; 1277 struct list_head calls; 1278 struct rpc *rpc, *tmp; 1279 1280 INIT_LIST_HEAD(&calls); 1281 spin_lock(&era->rpc_lock); 1282 list_splice_init(&era->rpc_calls, &calls); 1283 spin_unlock(&era->rpc_lock); 1284 1285 list_for_each_entry_safe(rpc, tmp, &calls, list) { 1286 rpc->result = rpc->fn0 ? rpc->fn0(era->md) : rpc->fn1(era->md, rpc->arg); 1287 need_commit = true; 1288 } 1289 1290 if (need_commit) { 1291 r = metadata_commit(era->md); 1292 if (r) 1293 list_for_each_entry_safe(rpc, tmp, &calls, list) 1294 rpc->result = r; 1295 } 1296 1297 list_for_each_entry_safe(rpc, tmp, &calls, list) 1298 complete(&rpc->complete); 1299 } 1300 1301 static void kick_off_digest(struct era *era) 1302 { 1303 if (era->md->archived_writesets) { 1304 era->md->archived_writesets = false; 1305 metadata_digest_start(era->md, &era->digest); 1306 } 1307 } 1308 1309 static void do_work(struct work_struct *ws) 1310 { 1311 struct era *era = container_of(ws, struct era, worker); 1312 1313 kick_off_digest(era); 1314 process_old_eras(era); 1315 process_deferred_bios(era); 1316 process_rpc_calls(era); 1317 } 1318 1319 static void defer_bio(struct era *era, struct bio *bio) 1320 { 1321 spin_lock(&era->deferred_lock); 1322 bio_list_add(&era->deferred_bios, bio); 1323 spin_unlock(&era->deferred_lock); 1324 1325 wake_worker(era); 1326 } 1327 1328 /* 1329 * Make an rpc call to the worker to change the metadata. 1330 */ 1331 static int perform_rpc(struct era *era, struct rpc *rpc) 1332 { 1333 rpc->result = 0; 1334 init_completion(&rpc->complete); 1335 1336 spin_lock(&era->rpc_lock); 1337 list_add(&rpc->list, &era->rpc_calls); 1338 spin_unlock(&era->rpc_lock); 1339 1340 wake_worker(era); 1341 wait_for_completion(&rpc->complete); 1342 1343 return rpc->result; 1344 } 1345 1346 static int in_worker0(struct era *era, int (*fn)(struct era_metadata *)) 1347 { 1348 struct rpc rpc; 1349 rpc.fn0 = fn; 1350 rpc.fn1 = NULL; 1351 1352 return perform_rpc(era, &rpc); 1353 } 1354 1355 static int in_worker1(struct era *era, 1356 int (*fn)(struct era_metadata *, void *), void *arg) 1357 { 1358 struct rpc rpc; 1359 rpc.fn0 = NULL; 1360 rpc.fn1 = fn; 1361 rpc.arg = arg; 1362 1363 return perform_rpc(era, &rpc); 1364 } 1365 1366 static void start_worker(struct era *era) 1367 { 1368 atomic_set(&era->suspended, 0); 1369 } 1370 1371 static void stop_worker(struct era *era) 1372 { 1373 atomic_set(&era->suspended, 1); 1374 flush_workqueue(era->wq); 1375 } 1376 1377 /*---------------------------------------------------------------- 1378 * Target methods 1379 *--------------------------------------------------------------*/ 1380 static int dev_is_congested(struct dm_dev *dev, int bdi_bits) 1381 { 1382 struct request_queue *q = bdev_get_queue(dev->bdev); 1383 return bdi_congested(&q->backing_dev_info, bdi_bits); 1384 } 1385 1386 static int era_is_congested(struct dm_target_callbacks *cb, int bdi_bits) 1387 { 1388 struct era *era = container_of(cb, struct era, callbacks); 1389 return dev_is_congested(era->origin_dev, bdi_bits); 1390 } 1391 1392 static void era_destroy(struct era *era) 1393 { 1394 if (era->md) 1395 metadata_close(era->md); 1396 1397 if (era->wq) 1398 destroy_workqueue(era->wq); 1399 1400 if (era->origin_dev) 1401 dm_put_device(era->ti, era->origin_dev); 1402 1403 if (era->metadata_dev) 1404 dm_put_device(era->ti, era->metadata_dev); 1405 1406 kfree(era); 1407 } 1408 1409 static dm_block_t calc_nr_blocks(struct era *era) 1410 { 1411 return dm_sector_div_up(era->ti->len, era->sectors_per_block); 1412 } 1413 1414 static bool valid_block_size(dm_block_t block_size) 1415 { 1416 bool greater_than_zero = block_size > 0; 1417 bool multiple_of_min_block_size = (block_size & (MIN_BLOCK_SIZE - 1)) == 0; 1418 1419 return greater_than_zero && multiple_of_min_block_size; 1420 } 1421 1422 /* 1423 * <metadata dev> <data dev> <data block size (sectors)> 1424 */ 1425 static int era_ctr(struct dm_target *ti, unsigned argc, char **argv) 1426 { 1427 int r; 1428 char dummy; 1429 struct era *era; 1430 struct era_metadata *md; 1431 1432 if (argc != 3) { 1433 ti->error = "Invalid argument count"; 1434 return -EINVAL; 1435 } 1436 1437 era = kzalloc(sizeof(*era), GFP_KERNEL); 1438 if (!era) { 1439 ti->error = "Error allocating era structure"; 1440 return -ENOMEM; 1441 } 1442 1443 era->ti = ti; 1444 1445 r = dm_get_device(ti, argv[0], FMODE_READ | FMODE_WRITE, &era->metadata_dev); 1446 if (r) { 1447 ti->error = "Error opening metadata device"; 1448 era_destroy(era); 1449 return -EINVAL; 1450 } 1451 1452 r = dm_get_device(ti, argv[1], FMODE_READ | FMODE_WRITE, &era->origin_dev); 1453 if (r) { 1454 ti->error = "Error opening data device"; 1455 era_destroy(era); 1456 return -EINVAL; 1457 } 1458 1459 r = sscanf(argv[2], "%u%c", &era->sectors_per_block, &dummy); 1460 if (r != 1) { 1461 ti->error = "Error parsing block size"; 1462 era_destroy(era); 1463 return -EINVAL; 1464 } 1465 1466 r = dm_set_target_max_io_len(ti, era->sectors_per_block); 1467 if (r) { 1468 ti->error = "could not set max io len"; 1469 era_destroy(era); 1470 return -EINVAL; 1471 } 1472 1473 if (!valid_block_size(era->sectors_per_block)) { 1474 ti->error = "Invalid block size"; 1475 era_destroy(era); 1476 return -EINVAL; 1477 } 1478 if (era->sectors_per_block & (era->sectors_per_block - 1)) 1479 era->sectors_per_block_shift = -1; 1480 else 1481 era->sectors_per_block_shift = __ffs(era->sectors_per_block); 1482 1483 md = metadata_open(era->metadata_dev->bdev, era->sectors_per_block, true); 1484 if (IS_ERR(md)) { 1485 ti->error = "Error reading metadata"; 1486 era_destroy(era); 1487 return PTR_ERR(md); 1488 } 1489 era->md = md; 1490 1491 era->nr_blocks = calc_nr_blocks(era); 1492 1493 r = metadata_resize(era->md, &era->nr_blocks); 1494 if (r) { 1495 ti->error = "couldn't resize metadata"; 1496 era_destroy(era); 1497 return -ENOMEM; 1498 } 1499 1500 era->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM); 1501 if (!era->wq) { 1502 ti->error = "could not create workqueue for metadata object"; 1503 era_destroy(era); 1504 return -ENOMEM; 1505 } 1506 INIT_WORK(&era->worker, do_work); 1507 1508 spin_lock_init(&era->deferred_lock); 1509 bio_list_init(&era->deferred_bios); 1510 1511 spin_lock_init(&era->rpc_lock); 1512 INIT_LIST_HEAD(&era->rpc_calls); 1513 1514 ti->private = era; 1515 ti->num_flush_bios = 1; 1516 ti->flush_supported = true; 1517 1518 ti->num_discard_bios = 1; 1519 ti->discards_supported = true; 1520 era->callbacks.congested_fn = era_is_congested; 1521 dm_table_add_target_callbacks(ti->table, &era->callbacks); 1522 1523 return 0; 1524 } 1525 1526 static void era_dtr(struct dm_target *ti) 1527 { 1528 era_destroy(ti->private); 1529 } 1530 1531 static int era_map(struct dm_target *ti, struct bio *bio) 1532 { 1533 struct era *era = ti->private; 1534 dm_block_t block = get_block(era, bio); 1535 1536 /* 1537 * All bios get remapped to the origin device. We do this now, but 1538 * it may not get issued until later. Depending on whether the 1539 * block is marked in this era. 1540 */ 1541 remap_to_origin(era, bio); 1542 1543 /* 1544 * REQ_FLUSH bios carry no data, so we're not interested in them. 1545 */ 1546 if (!(bio->bi_rw & REQ_FLUSH) && 1547 (bio_data_dir(bio) == WRITE) && 1548 !metadata_current_marked(era->md, block)) { 1549 defer_bio(era, bio); 1550 return DM_MAPIO_SUBMITTED; 1551 } 1552 1553 return DM_MAPIO_REMAPPED; 1554 } 1555 1556 static void era_postsuspend(struct dm_target *ti) 1557 { 1558 int r; 1559 struct era *era = ti->private; 1560 1561 r = in_worker0(era, metadata_era_archive); 1562 if (r) { 1563 DMERR("%s: couldn't archive current era", __func__); 1564 /* FIXME: fail mode */ 1565 } 1566 1567 stop_worker(era); 1568 } 1569 1570 static int era_preresume(struct dm_target *ti) 1571 { 1572 int r; 1573 struct era *era = ti->private; 1574 dm_block_t new_size = calc_nr_blocks(era); 1575 1576 if (era->nr_blocks != new_size) { 1577 r = in_worker1(era, metadata_resize, &new_size); 1578 if (r) 1579 return r; 1580 1581 era->nr_blocks = new_size; 1582 } 1583 1584 start_worker(era); 1585 1586 r = in_worker0(era, metadata_new_era); 1587 if (r) { 1588 DMERR("%s: metadata_era_rollover failed", __func__); 1589 return r; 1590 } 1591 1592 return 0; 1593 } 1594 1595 /* 1596 * Status format: 1597 * 1598 * <metadata block size> <#used metadata blocks>/<#total metadata blocks> 1599 * <current era> <held metadata root | '-'> 1600 */ 1601 static void era_status(struct dm_target *ti, status_type_t type, 1602 unsigned status_flags, char *result, unsigned maxlen) 1603 { 1604 int r; 1605 struct era *era = ti->private; 1606 ssize_t sz = 0; 1607 struct metadata_stats stats; 1608 char buf[BDEVNAME_SIZE]; 1609 1610 switch (type) { 1611 case STATUSTYPE_INFO: 1612 r = in_worker1(era, metadata_get_stats, &stats); 1613 if (r) 1614 goto err; 1615 1616 DMEMIT("%u %llu/%llu %u", 1617 (unsigned) (DM_ERA_METADATA_BLOCK_SIZE >> SECTOR_SHIFT), 1618 (unsigned long long) stats.used, 1619 (unsigned long long) stats.total, 1620 (unsigned) stats.era); 1621 1622 if (stats.snap != SUPERBLOCK_LOCATION) 1623 DMEMIT(" %llu", stats.snap); 1624 else 1625 DMEMIT(" -"); 1626 break; 1627 1628 case STATUSTYPE_TABLE: 1629 format_dev_t(buf, era->metadata_dev->bdev->bd_dev); 1630 DMEMIT("%s ", buf); 1631 format_dev_t(buf, era->origin_dev->bdev->bd_dev); 1632 DMEMIT("%s %u", buf, era->sectors_per_block); 1633 break; 1634 } 1635 1636 return; 1637 1638 err: 1639 DMEMIT("Error"); 1640 } 1641 1642 static int era_message(struct dm_target *ti, unsigned argc, char **argv) 1643 { 1644 struct era *era = ti->private; 1645 1646 if (argc != 1) { 1647 DMERR("incorrect number of message arguments"); 1648 return -EINVAL; 1649 } 1650 1651 if (!strcasecmp(argv[0], "checkpoint")) 1652 return in_worker0(era, metadata_checkpoint); 1653 1654 if (!strcasecmp(argv[0], "take_metadata_snap")) 1655 return in_worker0(era, metadata_take_snap); 1656 1657 if (!strcasecmp(argv[0], "drop_metadata_snap")) 1658 return in_worker0(era, metadata_drop_snap); 1659 1660 DMERR("unsupported message '%s'", argv[0]); 1661 return -EINVAL; 1662 } 1663 1664 static sector_t get_dev_size(struct dm_dev *dev) 1665 { 1666 return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT; 1667 } 1668 1669 static int era_iterate_devices(struct dm_target *ti, 1670 iterate_devices_callout_fn fn, void *data) 1671 { 1672 struct era *era = ti->private; 1673 return fn(ti, era->origin_dev, 0, get_dev_size(era->origin_dev), data); 1674 } 1675 1676 static int era_merge(struct dm_target *ti, struct bvec_merge_data *bvm, 1677 struct bio_vec *biovec, int max_size) 1678 { 1679 struct era *era = ti->private; 1680 struct request_queue *q = bdev_get_queue(era->origin_dev->bdev); 1681 1682 if (!q->merge_bvec_fn) 1683 return max_size; 1684 1685 bvm->bi_bdev = era->origin_dev->bdev; 1686 1687 return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); 1688 } 1689 1690 static void era_io_hints(struct dm_target *ti, struct queue_limits *limits) 1691 { 1692 struct era *era = ti->private; 1693 uint64_t io_opt_sectors = limits->io_opt >> SECTOR_SHIFT; 1694 1695 /* 1696 * If the system-determined stacked limits are compatible with the 1697 * era device's blocksize (io_opt is a factor) do not override them. 1698 */ 1699 if (io_opt_sectors < era->sectors_per_block || 1700 do_div(io_opt_sectors, era->sectors_per_block)) { 1701 blk_limits_io_min(limits, 0); 1702 blk_limits_io_opt(limits, era->sectors_per_block << SECTOR_SHIFT); 1703 } 1704 } 1705 1706 /*----------------------------------------------------------------*/ 1707 1708 static struct target_type era_target = { 1709 .name = "era", 1710 .version = {1, 0, 0}, 1711 .module = THIS_MODULE, 1712 .ctr = era_ctr, 1713 .dtr = era_dtr, 1714 .map = era_map, 1715 .postsuspend = era_postsuspend, 1716 .preresume = era_preresume, 1717 .status = era_status, 1718 .message = era_message, 1719 .iterate_devices = era_iterate_devices, 1720 .merge = era_merge, 1721 .io_hints = era_io_hints 1722 }; 1723 1724 static int __init dm_era_init(void) 1725 { 1726 int r; 1727 1728 r = dm_register_target(&era_target); 1729 if (r) { 1730 DMERR("era target registration failed: %d", r); 1731 return r; 1732 } 1733 1734 return 0; 1735 } 1736 1737 static void __exit dm_era_exit(void) 1738 { 1739 dm_unregister_target(&era_target); 1740 } 1741 1742 module_init(dm_era_init); 1743 module_exit(dm_era_exit); 1744 1745 MODULE_DESCRIPTION(DM_NAME " era target"); 1746 MODULE_AUTHOR("Joe Thornber <ejt@redhat.com>"); 1747 MODULE_LICENSE("GPL"); 1748