1 /* 2 * Copyright (C) 2012 Red Hat, Inc. 3 * 4 * This file is released under the GPL. 5 */ 6 7 #include "dm-cache-metadata.h" 8 9 #include "persistent-data/dm-array.h" 10 #include "persistent-data/dm-bitset.h" 11 #include "persistent-data/dm-space-map.h" 12 #include "persistent-data/dm-space-map-disk.h" 13 #include "persistent-data/dm-transaction-manager.h" 14 15 #include <linux/device-mapper.h> 16 17 /*----------------------------------------------------------------*/ 18 19 #define DM_MSG_PREFIX "cache metadata" 20 21 #define CACHE_SUPERBLOCK_MAGIC 06142003 22 #define CACHE_SUPERBLOCK_LOCATION 0 23 24 /* 25 * defines a range of metadata versions that this module can handle. 26 */ 27 #define MIN_CACHE_VERSION 1 28 #define MAX_CACHE_VERSION 1 29 30 #define CACHE_METADATA_CACHE_SIZE 64 31 32 /* 33 * 3 for btree insert + 34 * 2 for btree lookup used within space map 35 */ 36 #define CACHE_MAX_CONCURRENT_LOCKS 5 37 #define SPACE_MAP_ROOT_SIZE 128 38 39 enum superblock_flag_bits { 40 /* for spotting crashes that would invalidate the dirty bitset */ 41 CLEAN_SHUTDOWN, 42 }; 43 44 /* 45 * Each mapping from cache block -> origin block carries a set of flags. 46 */ 47 enum mapping_bits { 48 /* 49 * A valid mapping. Because we're using an array we clear this 50 * flag for an non existant mapping. 51 */ 52 M_VALID = 1, 53 54 /* 55 * The data on the cache is different from that on the origin. 56 */ 57 M_DIRTY = 2 58 }; 59 60 struct cache_disk_superblock { 61 __le32 csum; 62 __le32 flags; 63 __le64 blocknr; 64 65 __u8 uuid[16]; 66 __le64 magic; 67 __le32 version; 68 69 __u8 policy_name[CACHE_POLICY_NAME_SIZE]; 70 __le32 policy_hint_size; 71 72 __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE]; 73 __le64 mapping_root; 74 __le64 hint_root; 75 76 __le64 discard_root; 77 __le64 discard_block_size; 78 __le64 discard_nr_blocks; 79 80 __le32 data_block_size; 81 __le32 metadata_block_size; 82 __le32 cache_blocks; 83 84 __le32 compat_flags; 85 __le32 compat_ro_flags; 86 __le32 incompat_flags; 87 88 __le32 read_hits; 89 __le32 read_misses; 90 __le32 write_hits; 91 __le32 write_misses; 92 93 __le32 policy_version[CACHE_POLICY_VERSION_SIZE]; 94 } __packed; 95 96 struct dm_cache_metadata { 97 atomic_t ref_count; 98 struct list_head list; 99 100 struct block_device *bdev; 101 struct dm_block_manager *bm; 102 struct dm_space_map *metadata_sm; 103 struct dm_transaction_manager *tm; 104 105 struct dm_array_info info; 106 struct dm_array_info hint_info; 107 struct dm_disk_bitset discard_info; 108 109 struct rw_semaphore root_lock; 110 dm_block_t root; 111 dm_block_t hint_root; 112 dm_block_t discard_root; 113 114 sector_t discard_block_size; 115 dm_dblock_t discard_nr_blocks; 116 117 sector_t data_block_size; 118 dm_cblock_t cache_blocks; 119 bool changed:1; 120 bool clean_when_opened:1; 121 122 char policy_name[CACHE_POLICY_NAME_SIZE]; 123 unsigned policy_version[CACHE_POLICY_VERSION_SIZE]; 124 size_t policy_hint_size; 125 struct dm_cache_statistics stats; 126 127 /* 128 * Reading the space map root can fail, so we read it into this 129 * buffer before the superblock is locked and updated. 130 */ 131 __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE]; 132 }; 133 134 /*------------------------------------------------------------------- 135 * superblock validator 136 *-----------------------------------------------------------------*/ 137 138 #define SUPERBLOCK_CSUM_XOR 9031977 139 140 static void sb_prepare_for_write(struct dm_block_validator *v, 141 struct dm_block *b, 142 size_t sb_block_size) 143 { 144 struct cache_disk_superblock *disk_super = dm_block_data(b); 145 146 disk_super->blocknr = cpu_to_le64(dm_block_location(b)); 147 disk_super->csum = cpu_to_le32(dm_bm_checksum(&disk_super->flags, 148 sb_block_size - sizeof(__le32), 149 SUPERBLOCK_CSUM_XOR)); 150 } 151 152 static int check_metadata_version(struct cache_disk_superblock *disk_super) 153 { 154 uint32_t metadata_version = le32_to_cpu(disk_super->version); 155 if (metadata_version < MIN_CACHE_VERSION || metadata_version > MAX_CACHE_VERSION) { 156 DMERR("Cache metadata version %u found, but only versions between %u and %u supported.", 157 metadata_version, MIN_CACHE_VERSION, MAX_CACHE_VERSION); 158 return -EINVAL; 159 } 160 161 return 0; 162 } 163 164 static int sb_check(struct dm_block_validator *v, 165 struct dm_block *b, 166 size_t sb_block_size) 167 { 168 struct cache_disk_superblock *disk_super = dm_block_data(b); 169 __le32 csum_le; 170 171 if (dm_block_location(b) != le64_to_cpu(disk_super->blocknr)) { 172 DMERR("sb_check failed: blocknr %llu: wanted %llu", 173 le64_to_cpu(disk_super->blocknr), 174 (unsigned long long)dm_block_location(b)); 175 return -ENOTBLK; 176 } 177 178 if (le64_to_cpu(disk_super->magic) != CACHE_SUPERBLOCK_MAGIC) { 179 DMERR("sb_check failed: magic %llu: wanted %llu", 180 le64_to_cpu(disk_super->magic), 181 (unsigned long long)CACHE_SUPERBLOCK_MAGIC); 182 return -EILSEQ; 183 } 184 185 csum_le = cpu_to_le32(dm_bm_checksum(&disk_super->flags, 186 sb_block_size - sizeof(__le32), 187 SUPERBLOCK_CSUM_XOR)); 188 if (csum_le != disk_super->csum) { 189 DMERR("sb_check failed: csum %u: wanted %u", 190 le32_to_cpu(csum_le), le32_to_cpu(disk_super->csum)); 191 return -EILSEQ; 192 } 193 194 return check_metadata_version(disk_super); 195 } 196 197 static struct dm_block_validator sb_validator = { 198 .name = "superblock", 199 .prepare_for_write = sb_prepare_for_write, 200 .check = sb_check 201 }; 202 203 /*----------------------------------------------------------------*/ 204 205 static int superblock_read_lock(struct dm_cache_metadata *cmd, 206 struct dm_block **sblock) 207 { 208 return dm_bm_read_lock(cmd->bm, CACHE_SUPERBLOCK_LOCATION, 209 &sb_validator, sblock); 210 } 211 212 static int superblock_lock_zero(struct dm_cache_metadata *cmd, 213 struct dm_block **sblock) 214 { 215 return dm_bm_write_lock_zero(cmd->bm, CACHE_SUPERBLOCK_LOCATION, 216 &sb_validator, sblock); 217 } 218 219 static int superblock_lock(struct dm_cache_metadata *cmd, 220 struct dm_block **sblock) 221 { 222 return dm_bm_write_lock(cmd->bm, CACHE_SUPERBLOCK_LOCATION, 223 &sb_validator, sblock); 224 } 225 226 /*----------------------------------------------------------------*/ 227 228 static int __superblock_all_zeroes(struct dm_block_manager *bm, bool *result) 229 { 230 int r; 231 unsigned i; 232 struct dm_block *b; 233 __le64 *data_le, zero = cpu_to_le64(0); 234 unsigned sb_block_size = dm_bm_block_size(bm) / sizeof(__le64); 235 236 /* 237 * We can't use a validator here - it may be all zeroes. 238 */ 239 r = dm_bm_read_lock(bm, CACHE_SUPERBLOCK_LOCATION, NULL, &b); 240 if (r) 241 return r; 242 243 data_le = dm_block_data(b); 244 *result = true; 245 for (i = 0; i < sb_block_size; i++) { 246 if (data_le[i] != zero) { 247 *result = false; 248 break; 249 } 250 } 251 252 return dm_bm_unlock(b); 253 } 254 255 static void __setup_mapping_info(struct dm_cache_metadata *cmd) 256 { 257 struct dm_btree_value_type vt; 258 259 vt.context = NULL; 260 vt.size = sizeof(__le64); 261 vt.inc = NULL; 262 vt.dec = NULL; 263 vt.equal = NULL; 264 dm_array_info_init(&cmd->info, cmd->tm, &vt); 265 266 if (cmd->policy_hint_size) { 267 vt.size = sizeof(__le32); 268 dm_array_info_init(&cmd->hint_info, cmd->tm, &vt); 269 } 270 } 271 272 static int __save_sm_root(struct dm_cache_metadata *cmd) 273 { 274 int r; 275 size_t metadata_len; 276 277 r = dm_sm_root_size(cmd->metadata_sm, &metadata_len); 278 if (r < 0) 279 return r; 280 281 return dm_sm_copy_root(cmd->metadata_sm, &cmd->metadata_space_map_root, 282 metadata_len); 283 } 284 285 static void __copy_sm_root(struct dm_cache_metadata *cmd, 286 struct cache_disk_superblock *disk_super) 287 { 288 memcpy(&disk_super->metadata_space_map_root, 289 &cmd->metadata_space_map_root, 290 sizeof(cmd->metadata_space_map_root)); 291 } 292 293 static int __write_initial_superblock(struct dm_cache_metadata *cmd) 294 { 295 int r; 296 struct dm_block *sblock; 297 struct cache_disk_superblock *disk_super; 298 sector_t bdev_size = i_size_read(cmd->bdev->bd_inode) >> SECTOR_SHIFT; 299 300 /* FIXME: see if we can lose the max sectors limit */ 301 if (bdev_size > DM_CACHE_METADATA_MAX_SECTORS) 302 bdev_size = DM_CACHE_METADATA_MAX_SECTORS; 303 304 r = dm_tm_pre_commit(cmd->tm); 305 if (r < 0) 306 return r; 307 308 /* 309 * dm_sm_copy_root() can fail. So we need to do it before we start 310 * updating the superblock. 311 */ 312 r = __save_sm_root(cmd); 313 if (r) 314 return r; 315 316 r = superblock_lock_zero(cmd, &sblock); 317 if (r) 318 return r; 319 320 disk_super = dm_block_data(sblock); 321 disk_super->flags = 0; 322 memset(disk_super->uuid, 0, sizeof(disk_super->uuid)); 323 disk_super->magic = cpu_to_le64(CACHE_SUPERBLOCK_MAGIC); 324 disk_super->version = cpu_to_le32(MAX_CACHE_VERSION); 325 memset(disk_super->policy_name, 0, sizeof(disk_super->policy_name)); 326 memset(disk_super->policy_version, 0, sizeof(disk_super->policy_version)); 327 disk_super->policy_hint_size = 0; 328 329 __copy_sm_root(cmd, disk_super); 330 331 disk_super->mapping_root = cpu_to_le64(cmd->root); 332 disk_super->hint_root = cpu_to_le64(cmd->hint_root); 333 disk_super->discard_root = cpu_to_le64(cmd->discard_root); 334 disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size); 335 disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks)); 336 disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE); 337 disk_super->data_block_size = cpu_to_le32(cmd->data_block_size); 338 disk_super->cache_blocks = cpu_to_le32(0); 339 340 disk_super->read_hits = cpu_to_le32(0); 341 disk_super->read_misses = cpu_to_le32(0); 342 disk_super->write_hits = cpu_to_le32(0); 343 disk_super->write_misses = cpu_to_le32(0); 344 345 return dm_tm_commit(cmd->tm, sblock); 346 } 347 348 static int __format_metadata(struct dm_cache_metadata *cmd) 349 { 350 int r; 351 352 r = dm_tm_create_with_sm(cmd->bm, CACHE_SUPERBLOCK_LOCATION, 353 &cmd->tm, &cmd->metadata_sm); 354 if (r < 0) { 355 DMERR("tm_create_with_sm failed"); 356 return r; 357 } 358 359 __setup_mapping_info(cmd); 360 361 r = dm_array_empty(&cmd->info, &cmd->root); 362 if (r < 0) 363 goto bad; 364 365 dm_disk_bitset_init(cmd->tm, &cmd->discard_info); 366 367 r = dm_bitset_empty(&cmd->discard_info, &cmd->discard_root); 368 if (r < 0) 369 goto bad; 370 371 cmd->discard_block_size = 0; 372 cmd->discard_nr_blocks = 0; 373 374 r = __write_initial_superblock(cmd); 375 if (r) 376 goto bad; 377 378 cmd->clean_when_opened = true; 379 return 0; 380 381 bad: 382 dm_tm_destroy(cmd->tm); 383 dm_sm_destroy(cmd->metadata_sm); 384 385 return r; 386 } 387 388 static int __check_incompat_features(struct cache_disk_superblock *disk_super, 389 struct dm_cache_metadata *cmd) 390 { 391 uint32_t features; 392 393 features = le32_to_cpu(disk_super->incompat_flags) & ~DM_CACHE_FEATURE_INCOMPAT_SUPP; 394 if (features) { 395 DMERR("could not access metadata due to unsupported optional features (%lx).", 396 (unsigned long)features); 397 return -EINVAL; 398 } 399 400 /* 401 * Check for read-only metadata to skip the following RDWR checks. 402 */ 403 if (get_disk_ro(cmd->bdev->bd_disk)) 404 return 0; 405 406 features = le32_to_cpu(disk_super->compat_ro_flags) & ~DM_CACHE_FEATURE_COMPAT_RO_SUPP; 407 if (features) { 408 DMERR("could not access metadata RDWR due to unsupported optional features (%lx).", 409 (unsigned long)features); 410 return -EINVAL; 411 } 412 413 return 0; 414 } 415 416 static int __open_metadata(struct dm_cache_metadata *cmd) 417 { 418 int r; 419 struct dm_block *sblock; 420 struct cache_disk_superblock *disk_super; 421 unsigned long sb_flags; 422 423 r = superblock_read_lock(cmd, &sblock); 424 if (r < 0) { 425 DMERR("couldn't read lock superblock"); 426 return r; 427 } 428 429 disk_super = dm_block_data(sblock); 430 431 /* Verify the data block size hasn't changed */ 432 if (le32_to_cpu(disk_super->data_block_size) != cmd->data_block_size) { 433 DMERR("changing the data block size (from %u to %llu) is not supported", 434 le32_to_cpu(disk_super->data_block_size), 435 (unsigned long long)cmd->data_block_size); 436 r = -EINVAL; 437 goto bad; 438 } 439 440 r = __check_incompat_features(disk_super, cmd); 441 if (r < 0) 442 goto bad; 443 444 r = dm_tm_open_with_sm(cmd->bm, CACHE_SUPERBLOCK_LOCATION, 445 disk_super->metadata_space_map_root, 446 sizeof(disk_super->metadata_space_map_root), 447 &cmd->tm, &cmd->metadata_sm); 448 if (r < 0) { 449 DMERR("tm_open_with_sm failed"); 450 goto bad; 451 } 452 453 __setup_mapping_info(cmd); 454 dm_disk_bitset_init(cmd->tm, &cmd->discard_info); 455 sb_flags = le32_to_cpu(disk_super->flags); 456 cmd->clean_when_opened = test_bit(CLEAN_SHUTDOWN, &sb_flags); 457 return dm_bm_unlock(sblock); 458 459 bad: 460 dm_bm_unlock(sblock); 461 return r; 462 } 463 464 static int __open_or_format_metadata(struct dm_cache_metadata *cmd, 465 bool format_device) 466 { 467 int r; 468 bool unformatted = false; 469 470 r = __superblock_all_zeroes(cmd->bm, &unformatted); 471 if (r) 472 return r; 473 474 if (unformatted) 475 return format_device ? __format_metadata(cmd) : -EPERM; 476 477 return __open_metadata(cmd); 478 } 479 480 static int __create_persistent_data_objects(struct dm_cache_metadata *cmd, 481 bool may_format_device) 482 { 483 int r; 484 cmd->bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE << SECTOR_SHIFT, 485 CACHE_METADATA_CACHE_SIZE, 486 CACHE_MAX_CONCURRENT_LOCKS); 487 if (IS_ERR(cmd->bm)) { 488 DMERR("could not create block manager"); 489 return PTR_ERR(cmd->bm); 490 } 491 492 r = __open_or_format_metadata(cmd, may_format_device); 493 if (r) 494 dm_block_manager_destroy(cmd->bm); 495 496 return r; 497 } 498 499 static void __destroy_persistent_data_objects(struct dm_cache_metadata *cmd) 500 { 501 dm_sm_destroy(cmd->metadata_sm); 502 dm_tm_destroy(cmd->tm); 503 dm_block_manager_destroy(cmd->bm); 504 } 505 506 typedef unsigned long (*flags_mutator)(unsigned long); 507 508 static void update_flags(struct cache_disk_superblock *disk_super, 509 flags_mutator mutator) 510 { 511 uint32_t sb_flags = mutator(le32_to_cpu(disk_super->flags)); 512 disk_super->flags = cpu_to_le32(sb_flags); 513 } 514 515 static unsigned long set_clean_shutdown(unsigned long flags) 516 { 517 set_bit(CLEAN_SHUTDOWN, &flags); 518 return flags; 519 } 520 521 static unsigned long clear_clean_shutdown(unsigned long flags) 522 { 523 clear_bit(CLEAN_SHUTDOWN, &flags); 524 return flags; 525 } 526 527 static void read_superblock_fields(struct dm_cache_metadata *cmd, 528 struct cache_disk_superblock *disk_super) 529 { 530 cmd->root = le64_to_cpu(disk_super->mapping_root); 531 cmd->hint_root = le64_to_cpu(disk_super->hint_root); 532 cmd->discard_root = le64_to_cpu(disk_super->discard_root); 533 cmd->discard_block_size = le64_to_cpu(disk_super->discard_block_size); 534 cmd->discard_nr_blocks = to_dblock(le64_to_cpu(disk_super->discard_nr_blocks)); 535 cmd->data_block_size = le32_to_cpu(disk_super->data_block_size); 536 cmd->cache_blocks = to_cblock(le32_to_cpu(disk_super->cache_blocks)); 537 strncpy(cmd->policy_name, disk_super->policy_name, sizeof(cmd->policy_name)); 538 cmd->policy_version[0] = le32_to_cpu(disk_super->policy_version[0]); 539 cmd->policy_version[1] = le32_to_cpu(disk_super->policy_version[1]); 540 cmd->policy_version[2] = le32_to_cpu(disk_super->policy_version[2]); 541 cmd->policy_hint_size = le32_to_cpu(disk_super->policy_hint_size); 542 543 cmd->stats.read_hits = le32_to_cpu(disk_super->read_hits); 544 cmd->stats.read_misses = le32_to_cpu(disk_super->read_misses); 545 cmd->stats.write_hits = le32_to_cpu(disk_super->write_hits); 546 cmd->stats.write_misses = le32_to_cpu(disk_super->write_misses); 547 548 cmd->changed = false; 549 } 550 551 /* 552 * The mutator updates the superblock flags. 553 */ 554 static int __begin_transaction_flags(struct dm_cache_metadata *cmd, 555 flags_mutator mutator) 556 { 557 int r; 558 struct cache_disk_superblock *disk_super; 559 struct dm_block *sblock; 560 561 r = superblock_lock(cmd, &sblock); 562 if (r) 563 return r; 564 565 disk_super = dm_block_data(sblock); 566 update_flags(disk_super, mutator); 567 read_superblock_fields(cmd, disk_super); 568 dm_bm_unlock(sblock); 569 570 return dm_bm_flush(cmd->bm); 571 } 572 573 static int __begin_transaction(struct dm_cache_metadata *cmd) 574 { 575 int r; 576 struct cache_disk_superblock *disk_super; 577 struct dm_block *sblock; 578 579 /* 580 * We re-read the superblock every time. Shouldn't need to do this 581 * really. 582 */ 583 r = superblock_read_lock(cmd, &sblock); 584 if (r) 585 return r; 586 587 disk_super = dm_block_data(sblock); 588 read_superblock_fields(cmd, disk_super); 589 dm_bm_unlock(sblock); 590 591 return 0; 592 } 593 594 static int __commit_transaction(struct dm_cache_metadata *cmd, 595 flags_mutator mutator) 596 { 597 int r; 598 struct cache_disk_superblock *disk_super; 599 struct dm_block *sblock; 600 601 /* 602 * We need to know if the cache_disk_superblock exceeds a 512-byte sector. 603 */ 604 BUILD_BUG_ON(sizeof(struct cache_disk_superblock) > 512); 605 606 r = dm_bitset_flush(&cmd->discard_info, cmd->discard_root, 607 &cmd->discard_root); 608 if (r) 609 return r; 610 611 r = dm_tm_pre_commit(cmd->tm); 612 if (r < 0) 613 return r; 614 615 r = __save_sm_root(cmd); 616 if (r) 617 return r; 618 619 r = superblock_lock(cmd, &sblock); 620 if (r) 621 return r; 622 623 disk_super = dm_block_data(sblock); 624 625 if (mutator) 626 update_flags(disk_super, mutator); 627 628 disk_super->mapping_root = cpu_to_le64(cmd->root); 629 disk_super->hint_root = cpu_to_le64(cmd->hint_root); 630 disk_super->discard_root = cpu_to_le64(cmd->discard_root); 631 disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size); 632 disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks)); 633 disk_super->cache_blocks = cpu_to_le32(from_cblock(cmd->cache_blocks)); 634 strncpy(disk_super->policy_name, cmd->policy_name, sizeof(disk_super->policy_name)); 635 disk_super->policy_version[0] = cpu_to_le32(cmd->policy_version[0]); 636 disk_super->policy_version[1] = cpu_to_le32(cmd->policy_version[1]); 637 disk_super->policy_version[2] = cpu_to_le32(cmd->policy_version[2]); 638 639 disk_super->read_hits = cpu_to_le32(cmd->stats.read_hits); 640 disk_super->read_misses = cpu_to_le32(cmd->stats.read_misses); 641 disk_super->write_hits = cpu_to_le32(cmd->stats.write_hits); 642 disk_super->write_misses = cpu_to_le32(cmd->stats.write_misses); 643 __copy_sm_root(cmd, disk_super); 644 645 return dm_tm_commit(cmd->tm, sblock); 646 } 647 648 /*----------------------------------------------------------------*/ 649 650 /* 651 * The mappings are held in a dm-array that has 64-bit values stored in 652 * little-endian format. The index is the cblock, the high 48bits of the 653 * value are the oblock and the low 16 bit the flags. 654 */ 655 #define FLAGS_MASK ((1 << 16) - 1) 656 657 static __le64 pack_value(dm_oblock_t block, unsigned flags) 658 { 659 uint64_t value = from_oblock(block); 660 value <<= 16; 661 value = value | (flags & FLAGS_MASK); 662 return cpu_to_le64(value); 663 } 664 665 static void unpack_value(__le64 value_le, dm_oblock_t *block, unsigned *flags) 666 { 667 uint64_t value = le64_to_cpu(value_le); 668 uint64_t b = value >> 16; 669 *block = to_oblock(b); 670 *flags = value & FLAGS_MASK; 671 } 672 673 /*----------------------------------------------------------------*/ 674 675 static struct dm_cache_metadata *metadata_open(struct block_device *bdev, 676 sector_t data_block_size, 677 bool may_format_device, 678 size_t policy_hint_size) 679 { 680 int r; 681 struct dm_cache_metadata *cmd; 682 683 cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); 684 if (!cmd) { 685 DMERR("could not allocate metadata struct"); 686 return ERR_PTR(-ENOMEM); 687 } 688 689 atomic_set(&cmd->ref_count, 1); 690 init_rwsem(&cmd->root_lock); 691 cmd->bdev = bdev; 692 cmd->data_block_size = data_block_size; 693 cmd->cache_blocks = 0; 694 cmd->policy_hint_size = policy_hint_size; 695 cmd->changed = true; 696 697 r = __create_persistent_data_objects(cmd, may_format_device); 698 if (r) { 699 kfree(cmd); 700 return ERR_PTR(r); 701 } 702 703 r = __begin_transaction_flags(cmd, clear_clean_shutdown); 704 if (r < 0) { 705 dm_cache_metadata_close(cmd); 706 return ERR_PTR(r); 707 } 708 709 return cmd; 710 } 711 712 /* 713 * We keep a little list of ref counted metadata objects to prevent two 714 * different target instances creating separate bufio instances. This is 715 * an issue if a table is reloaded before the suspend. 716 */ 717 static DEFINE_MUTEX(table_lock); 718 static LIST_HEAD(table); 719 720 static struct dm_cache_metadata *lookup(struct block_device *bdev) 721 { 722 struct dm_cache_metadata *cmd; 723 724 list_for_each_entry(cmd, &table, list) 725 if (cmd->bdev == bdev) { 726 atomic_inc(&cmd->ref_count); 727 return cmd; 728 } 729 730 return NULL; 731 } 732 733 static struct dm_cache_metadata *lookup_or_open(struct block_device *bdev, 734 sector_t data_block_size, 735 bool may_format_device, 736 size_t policy_hint_size) 737 { 738 struct dm_cache_metadata *cmd, *cmd2; 739 740 mutex_lock(&table_lock); 741 cmd = lookup(bdev); 742 mutex_unlock(&table_lock); 743 744 if (cmd) 745 return cmd; 746 747 cmd = metadata_open(bdev, data_block_size, may_format_device, policy_hint_size); 748 if (!IS_ERR(cmd)) { 749 mutex_lock(&table_lock); 750 cmd2 = lookup(bdev); 751 if (cmd2) { 752 mutex_unlock(&table_lock); 753 __destroy_persistent_data_objects(cmd); 754 kfree(cmd); 755 return cmd2; 756 } 757 list_add(&cmd->list, &table); 758 mutex_unlock(&table_lock); 759 } 760 761 return cmd; 762 } 763 764 static bool same_params(struct dm_cache_metadata *cmd, sector_t data_block_size) 765 { 766 if (cmd->data_block_size != data_block_size) { 767 DMERR("data_block_size (%llu) different from that in metadata (%llu)\n", 768 (unsigned long long) data_block_size, 769 (unsigned long long) cmd->data_block_size); 770 return false; 771 } 772 773 return true; 774 } 775 776 struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev, 777 sector_t data_block_size, 778 bool may_format_device, 779 size_t policy_hint_size) 780 { 781 struct dm_cache_metadata *cmd = lookup_or_open(bdev, data_block_size, 782 may_format_device, policy_hint_size); 783 784 if (!IS_ERR(cmd) && !same_params(cmd, data_block_size)) { 785 dm_cache_metadata_close(cmd); 786 return ERR_PTR(-EINVAL); 787 } 788 789 return cmd; 790 } 791 792 void dm_cache_metadata_close(struct dm_cache_metadata *cmd) 793 { 794 if (atomic_dec_and_test(&cmd->ref_count)) { 795 mutex_lock(&table_lock); 796 list_del(&cmd->list); 797 mutex_unlock(&table_lock); 798 799 __destroy_persistent_data_objects(cmd); 800 kfree(cmd); 801 } 802 } 803 804 /* 805 * Checks that the given cache block is either unmapped or clean. 806 */ 807 static int block_unmapped_or_clean(struct dm_cache_metadata *cmd, dm_cblock_t b, 808 bool *result) 809 { 810 int r; 811 __le64 value; 812 dm_oblock_t ob; 813 unsigned flags; 814 815 r = dm_array_get_value(&cmd->info, cmd->root, from_cblock(b), &value); 816 if (r) { 817 DMERR("block_unmapped_or_clean failed"); 818 return r; 819 } 820 821 unpack_value(value, &ob, &flags); 822 *result = !((flags & M_VALID) && (flags & M_DIRTY)); 823 824 return 0; 825 } 826 827 static int blocks_are_unmapped_or_clean(struct dm_cache_metadata *cmd, 828 dm_cblock_t begin, dm_cblock_t end, 829 bool *result) 830 { 831 int r; 832 *result = true; 833 834 while (begin != end) { 835 r = block_unmapped_or_clean(cmd, begin, result); 836 if (r) 837 return r; 838 839 if (!*result) { 840 DMERR("cache block %llu is dirty", 841 (unsigned long long) from_cblock(begin)); 842 return 0; 843 } 844 845 begin = to_cblock(from_cblock(begin) + 1); 846 } 847 848 return 0; 849 } 850 851 int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size) 852 { 853 int r; 854 bool clean; 855 __le64 null_mapping = pack_value(0, 0); 856 857 down_write(&cmd->root_lock); 858 __dm_bless_for_disk(&null_mapping); 859 860 if (from_cblock(new_cache_size) < from_cblock(cmd->cache_blocks)) { 861 r = blocks_are_unmapped_or_clean(cmd, new_cache_size, cmd->cache_blocks, &clean); 862 if (r) { 863 __dm_unbless_for_disk(&null_mapping); 864 goto out; 865 } 866 867 if (!clean) { 868 DMERR("unable to shrink cache due to dirty blocks"); 869 r = -EINVAL; 870 __dm_unbless_for_disk(&null_mapping); 871 goto out; 872 } 873 } 874 875 r = dm_array_resize(&cmd->info, cmd->root, from_cblock(cmd->cache_blocks), 876 from_cblock(new_cache_size), 877 &null_mapping, &cmd->root); 878 if (!r) 879 cmd->cache_blocks = new_cache_size; 880 cmd->changed = true; 881 882 out: 883 up_write(&cmd->root_lock); 884 885 return r; 886 } 887 888 int dm_cache_discard_bitset_resize(struct dm_cache_metadata *cmd, 889 sector_t discard_block_size, 890 dm_dblock_t new_nr_entries) 891 { 892 int r; 893 894 down_write(&cmd->root_lock); 895 r = dm_bitset_resize(&cmd->discard_info, 896 cmd->discard_root, 897 from_dblock(cmd->discard_nr_blocks), 898 from_dblock(new_nr_entries), 899 false, &cmd->discard_root); 900 if (!r) { 901 cmd->discard_block_size = discard_block_size; 902 cmd->discard_nr_blocks = new_nr_entries; 903 } 904 905 cmd->changed = true; 906 up_write(&cmd->root_lock); 907 908 return r; 909 } 910 911 static int __set_discard(struct dm_cache_metadata *cmd, dm_dblock_t b) 912 { 913 return dm_bitset_set_bit(&cmd->discard_info, cmd->discard_root, 914 from_dblock(b), &cmd->discard_root); 915 } 916 917 static int __clear_discard(struct dm_cache_metadata *cmd, dm_dblock_t b) 918 { 919 return dm_bitset_clear_bit(&cmd->discard_info, cmd->discard_root, 920 from_dblock(b), &cmd->discard_root); 921 } 922 923 static int __is_discarded(struct dm_cache_metadata *cmd, dm_dblock_t b, 924 bool *is_discarded) 925 { 926 return dm_bitset_test_bit(&cmd->discard_info, cmd->discard_root, 927 from_dblock(b), &cmd->discard_root, 928 is_discarded); 929 } 930 931 static int __discard(struct dm_cache_metadata *cmd, 932 dm_dblock_t dblock, bool discard) 933 { 934 int r; 935 936 r = (discard ? __set_discard : __clear_discard)(cmd, dblock); 937 if (r) 938 return r; 939 940 cmd->changed = true; 941 return 0; 942 } 943 944 int dm_cache_set_discard(struct dm_cache_metadata *cmd, 945 dm_dblock_t dblock, bool discard) 946 { 947 int r; 948 949 down_write(&cmd->root_lock); 950 r = __discard(cmd, dblock, discard); 951 up_write(&cmd->root_lock); 952 953 return r; 954 } 955 956 static int __load_discards(struct dm_cache_metadata *cmd, 957 load_discard_fn fn, void *context) 958 { 959 int r = 0; 960 dm_block_t b; 961 bool discard; 962 963 for (b = 0; b < from_dblock(cmd->discard_nr_blocks); b++) { 964 dm_dblock_t dblock = to_dblock(b); 965 966 if (cmd->clean_when_opened) { 967 r = __is_discarded(cmd, dblock, &discard); 968 if (r) 969 return r; 970 } else 971 discard = false; 972 973 r = fn(context, cmd->discard_block_size, dblock, discard); 974 if (r) 975 break; 976 } 977 978 return r; 979 } 980 981 int dm_cache_load_discards(struct dm_cache_metadata *cmd, 982 load_discard_fn fn, void *context) 983 { 984 int r; 985 986 down_read(&cmd->root_lock); 987 r = __load_discards(cmd, fn, context); 988 up_read(&cmd->root_lock); 989 990 return r; 991 } 992 993 dm_cblock_t dm_cache_size(struct dm_cache_metadata *cmd) 994 { 995 dm_cblock_t r; 996 997 down_read(&cmd->root_lock); 998 r = cmd->cache_blocks; 999 up_read(&cmd->root_lock); 1000 1001 return r; 1002 } 1003 1004 static int __remove(struct dm_cache_metadata *cmd, dm_cblock_t cblock) 1005 { 1006 int r; 1007 __le64 value = pack_value(0, 0); 1008 1009 __dm_bless_for_disk(&value); 1010 r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock), 1011 &value, &cmd->root); 1012 if (r) 1013 return r; 1014 1015 cmd->changed = true; 1016 return 0; 1017 } 1018 1019 int dm_cache_remove_mapping(struct dm_cache_metadata *cmd, dm_cblock_t cblock) 1020 { 1021 int r; 1022 1023 down_write(&cmd->root_lock); 1024 r = __remove(cmd, cblock); 1025 up_write(&cmd->root_lock); 1026 1027 return r; 1028 } 1029 1030 static int __insert(struct dm_cache_metadata *cmd, 1031 dm_cblock_t cblock, dm_oblock_t oblock) 1032 { 1033 int r; 1034 __le64 value = pack_value(oblock, M_VALID); 1035 __dm_bless_for_disk(&value); 1036 1037 r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock), 1038 &value, &cmd->root); 1039 if (r) 1040 return r; 1041 1042 cmd->changed = true; 1043 return 0; 1044 } 1045 1046 int dm_cache_insert_mapping(struct dm_cache_metadata *cmd, 1047 dm_cblock_t cblock, dm_oblock_t oblock) 1048 { 1049 int r; 1050 1051 down_write(&cmd->root_lock); 1052 r = __insert(cmd, cblock, oblock); 1053 up_write(&cmd->root_lock); 1054 1055 return r; 1056 } 1057 1058 struct thunk { 1059 load_mapping_fn fn; 1060 void *context; 1061 1062 struct dm_cache_metadata *cmd; 1063 bool respect_dirty_flags; 1064 bool hints_valid; 1065 }; 1066 1067 static bool policy_unchanged(struct dm_cache_metadata *cmd, 1068 struct dm_cache_policy *policy) 1069 { 1070 const char *policy_name = dm_cache_policy_get_name(policy); 1071 const unsigned *policy_version = dm_cache_policy_get_version(policy); 1072 size_t policy_hint_size = dm_cache_policy_get_hint_size(policy); 1073 1074 /* 1075 * Ensure policy names match. 1076 */ 1077 if (strncmp(cmd->policy_name, policy_name, sizeof(cmd->policy_name))) 1078 return false; 1079 1080 /* 1081 * Ensure policy major versions match. 1082 */ 1083 if (cmd->policy_version[0] != policy_version[0]) 1084 return false; 1085 1086 /* 1087 * Ensure policy hint sizes match. 1088 */ 1089 if (cmd->policy_hint_size != policy_hint_size) 1090 return false; 1091 1092 return true; 1093 } 1094 1095 static bool hints_array_initialized(struct dm_cache_metadata *cmd) 1096 { 1097 return cmd->hint_root && cmd->policy_hint_size; 1098 } 1099 1100 static bool hints_array_available(struct dm_cache_metadata *cmd, 1101 struct dm_cache_policy *policy) 1102 { 1103 return cmd->clean_when_opened && policy_unchanged(cmd, policy) && 1104 hints_array_initialized(cmd); 1105 } 1106 1107 static int __load_mapping(void *context, uint64_t cblock, void *leaf) 1108 { 1109 int r = 0; 1110 bool dirty; 1111 __le64 value; 1112 __le32 hint_value = 0; 1113 dm_oblock_t oblock; 1114 unsigned flags; 1115 struct thunk *thunk = context; 1116 struct dm_cache_metadata *cmd = thunk->cmd; 1117 1118 memcpy(&value, leaf, sizeof(value)); 1119 unpack_value(value, &oblock, &flags); 1120 1121 if (flags & M_VALID) { 1122 if (thunk->hints_valid) { 1123 r = dm_array_get_value(&cmd->hint_info, cmd->hint_root, 1124 cblock, &hint_value); 1125 if (r && r != -ENODATA) 1126 return r; 1127 } 1128 1129 dirty = thunk->respect_dirty_flags ? (flags & M_DIRTY) : true; 1130 r = thunk->fn(thunk->context, oblock, to_cblock(cblock), 1131 dirty, le32_to_cpu(hint_value), thunk->hints_valid); 1132 } 1133 1134 return r; 1135 } 1136 1137 static int __load_mappings(struct dm_cache_metadata *cmd, 1138 struct dm_cache_policy *policy, 1139 load_mapping_fn fn, void *context) 1140 { 1141 struct thunk thunk; 1142 1143 thunk.fn = fn; 1144 thunk.context = context; 1145 1146 thunk.cmd = cmd; 1147 thunk.respect_dirty_flags = cmd->clean_when_opened; 1148 thunk.hints_valid = hints_array_available(cmd, policy); 1149 1150 return dm_array_walk(&cmd->info, cmd->root, __load_mapping, &thunk); 1151 } 1152 1153 int dm_cache_load_mappings(struct dm_cache_metadata *cmd, 1154 struct dm_cache_policy *policy, 1155 load_mapping_fn fn, void *context) 1156 { 1157 int r; 1158 1159 down_read(&cmd->root_lock); 1160 r = __load_mappings(cmd, policy, fn, context); 1161 up_read(&cmd->root_lock); 1162 1163 return r; 1164 } 1165 1166 static int __dump_mapping(void *context, uint64_t cblock, void *leaf) 1167 { 1168 int r = 0; 1169 __le64 value; 1170 dm_oblock_t oblock; 1171 unsigned flags; 1172 1173 memcpy(&value, leaf, sizeof(value)); 1174 unpack_value(value, &oblock, &flags); 1175 1176 return r; 1177 } 1178 1179 static int __dump_mappings(struct dm_cache_metadata *cmd) 1180 { 1181 return dm_array_walk(&cmd->info, cmd->root, __dump_mapping, NULL); 1182 } 1183 1184 void dm_cache_dump(struct dm_cache_metadata *cmd) 1185 { 1186 down_read(&cmd->root_lock); 1187 __dump_mappings(cmd); 1188 up_read(&cmd->root_lock); 1189 } 1190 1191 int dm_cache_changed_this_transaction(struct dm_cache_metadata *cmd) 1192 { 1193 int r; 1194 1195 down_read(&cmd->root_lock); 1196 r = cmd->changed; 1197 up_read(&cmd->root_lock); 1198 1199 return r; 1200 } 1201 1202 static int __dirty(struct dm_cache_metadata *cmd, dm_cblock_t cblock, bool dirty) 1203 { 1204 int r; 1205 unsigned flags; 1206 dm_oblock_t oblock; 1207 __le64 value; 1208 1209 r = dm_array_get_value(&cmd->info, cmd->root, from_cblock(cblock), &value); 1210 if (r) 1211 return r; 1212 1213 unpack_value(value, &oblock, &flags); 1214 1215 if (((flags & M_DIRTY) && dirty) || (!(flags & M_DIRTY) && !dirty)) 1216 /* nothing to be done */ 1217 return 0; 1218 1219 value = pack_value(oblock, (flags & ~M_DIRTY) | (dirty ? M_DIRTY : 0)); 1220 __dm_bless_for_disk(&value); 1221 1222 r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock), 1223 &value, &cmd->root); 1224 if (r) 1225 return r; 1226 1227 cmd->changed = true; 1228 return 0; 1229 1230 } 1231 1232 int dm_cache_set_dirty(struct dm_cache_metadata *cmd, 1233 dm_cblock_t cblock, bool dirty) 1234 { 1235 int r; 1236 1237 down_write(&cmd->root_lock); 1238 r = __dirty(cmd, cblock, dirty); 1239 up_write(&cmd->root_lock); 1240 1241 return r; 1242 } 1243 1244 void dm_cache_metadata_get_stats(struct dm_cache_metadata *cmd, 1245 struct dm_cache_statistics *stats) 1246 { 1247 down_read(&cmd->root_lock); 1248 *stats = cmd->stats; 1249 up_read(&cmd->root_lock); 1250 } 1251 1252 void dm_cache_metadata_set_stats(struct dm_cache_metadata *cmd, 1253 struct dm_cache_statistics *stats) 1254 { 1255 down_write(&cmd->root_lock); 1256 cmd->stats = *stats; 1257 up_write(&cmd->root_lock); 1258 } 1259 1260 int dm_cache_commit(struct dm_cache_metadata *cmd, bool clean_shutdown) 1261 { 1262 int r; 1263 flags_mutator mutator = (clean_shutdown ? set_clean_shutdown : 1264 clear_clean_shutdown); 1265 1266 down_write(&cmd->root_lock); 1267 r = __commit_transaction(cmd, mutator); 1268 if (r) 1269 goto out; 1270 1271 r = __begin_transaction(cmd); 1272 1273 out: 1274 up_write(&cmd->root_lock); 1275 return r; 1276 } 1277 1278 int dm_cache_get_free_metadata_block_count(struct dm_cache_metadata *cmd, 1279 dm_block_t *result) 1280 { 1281 int r = -EINVAL; 1282 1283 down_read(&cmd->root_lock); 1284 r = dm_sm_get_nr_free(cmd->metadata_sm, result); 1285 up_read(&cmd->root_lock); 1286 1287 return r; 1288 } 1289 1290 int dm_cache_get_metadata_dev_size(struct dm_cache_metadata *cmd, 1291 dm_block_t *result) 1292 { 1293 int r = -EINVAL; 1294 1295 down_read(&cmd->root_lock); 1296 r = dm_sm_get_nr_blocks(cmd->metadata_sm, result); 1297 up_read(&cmd->root_lock); 1298 1299 return r; 1300 } 1301 1302 /*----------------------------------------------------------------*/ 1303 1304 static int begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy) 1305 { 1306 int r; 1307 __le32 value; 1308 size_t hint_size; 1309 const char *policy_name = dm_cache_policy_get_name(policy); 1310 const unsigned *policy_version = dm_cache_policy_get_version(policy); 1311 1312 if (!policy_name[0] || 1313 (strlen(policy_name) > sizeof(cmd->policy_name) - 1)) 1314 return -EINVAL; 1315 1316 if (!policy_unchanged(cmd, policy)) { 1317 strncpy(cmd->policy_name, policy_name, sizeof(cmd->policy_name)); 1318 memcpy(cmd->policy_version, policy_version, sizeof(cmd->policy_version)); 1319 1320 hint_size = dm_cache_policy_get_hint_size(policy); 1321 if (!hint_size) 1322 return 0; /* short-circuit hints initialization */ 1323 cmd->policy_hint_size = hint_size; 1324 1325 if (cmd->hint_root) { 1326 r = dm_array_del(&cmd->hint_info, cmd->hint_root); 1327 if (r) 1328 return r; 1329 } 1330 1331 r = dm_array_empty(&cmd->hint_info, &cmd->hint_root); 1332 if (r) 1333 return r; 1334 1335 value = cpu_to_le32(0); 1336 __dm_bless_for_disk(&value); 1337 r = dm_array_resize(&cmd->hint_info, cmd->hint_root, 0, 1338 from_cblock(cmd->cache_blocks), 1339 &value, &cmd->hint_root); 1340 if (r) 1341 return r; 1342 } 1343 1344 return 0; 1345 } 1346 1347 static int save_hint(void *context, dm_cblock_t cblock, dm_oblock_t oblock, uint32_t hint) 1348 { 1349 struct dm_cache_metadata *cmd = context; 1350 __le32 value = cpu_to_le32(hint); 1351 int r; 1352 1353 __dm_bless_for_disk(&value); 1354 1355 r = dm_array_set_value(&cmd->hint_info, cmd->hint_root, 1356 from_cblock(cblock), &value, &cmd->hint_root); 1357 cmd->changed = true; 1358 1359 return r; 1360 } 1361 1362 static int write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy) 1363 { 1364 int r; 1365 1366 r = begin_hints(cmd, policy); 1367 if (r) { 1368 DMERR("begin_hints failed"); 1369 return r; 1370 } 1371 1372 return policy_walk_mappings(policy, save_hint, cmd); 1373 } 1374 1375 int dm_cache_write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy) 1376 { 1377 int r; 1378 1379 down_write(&cmd->root_lock); 1380 r = write_hints(cmd, policy); 1381 up_write(&cmd->root_lock); 1382 1383 return r; 1384 } 1385 1386 int dm_cache_metadata_all_clean(struct dm_cache_metadata *cmd, bool *result) 1387 { 1388 return blocks_are_unmapped_or_clean(cmd, 0, cmd->cache_blocks, result); 1389 } 1390