1 /* 2 * Copyright (C) 2012 Red Hat, Inc. 3 * 4 * This file is released under the GPL. 5 */ 6 7 #include "dm-cache-metadata.h" 8 9 #include "persistent-data/dm-array.h" 10 #include "persistent-data/dm-bitset.h" 11 #include "persistent-data/dm-space-map.h" 12 #include "persistent-data/dm-space-map-disk.h" 13 #include "persistent-data/dm-transaction-manager.h" 14 15 #include <linux/device-mapper.h> 16 17 /*----------------------------------------------------------------*/ 18 19 #define DM_MSG_PREFIX "cache metadata" 20 21 #define CACHE_SUPERBLOCK_MAGIC 06142003 22 #define CACHE_SUPERBLOCK_LOCATION 0 23 24 /* 25 * defines a range of metadata versions that this module can handle. 26 */ 27 #define MIN_CACHE_VERSION 1 28 #define MAX_CACHE_VERSION 1 29 30 #define CACHE_METADATA_CACHE_SIZE 64 31 32 /* 33 * 3 for btree insert + 34 * 2 for btree lookup used within space map 35 */ 36 #define CACHE_MAX_CONCURRENT_LOCKS 5 37 #define SPACE_MAP_ROOT_SIZE 128 38 39 enum superblock_flag_bits { 40 /* for spotting crashes that would invalidate the dirty bitset */ 41 CLEAN_SHUTDOWN, 42 }; 43 44 /* 45 * Each mapping from cache block -> origin block carries a set of flags. 46 */ 47 enum mapping_bits { 48 /* 49 * A valid mapping. Because we're using an array we clear this 50 * flag for an non existant mapping. 51 */ 52 M_VALID = 1, 53 54 /* 55 * The data on the cache is different from that on the origin. 56 */ 57 M_DIRTY = 2 58 }; 59 60 struct cache_disk_superblock { 61 __le32 csum; 62 __le32 flags; 63 __le64 blocknr; 64 65 __u8 uuid[16]; 66 __le64 magic; 67 __le32 version; 68 69 __u8 policy_name[CACHE_POLICY_NAME_SIZE]; 70 __le32 policy_hint_size; 71 72 __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE]; 73 __le64 mapping_root; 74 __le64 hint_root; 75 76 __le64 discard_root; 77 __le64 discard_block_size; 78 __le64 discard_nr_blocks; 79 80 __le32 data_block_size; 81 __le32 metadata_block_size; 82 __le32 cache_blocks; 83 84 __le32 compat_flags; 85 __le32 compat_ro_flags; 86 __le32 incompat_flags; 87 88 __le32 read_hits; 89 __le32 read_misses; 90 __le32 write_hits; 91 __le32 write_misses; 92 93 __le32 policy_version[CACHE_POLICY_VERSION_SIZE]; 94 } __packed; 95 96 struct dm_cache_metadata { 97 struct block_device *bdev; 98 struct dm_block_manager *bm; 99 struct dm_space_map *metadata_sm; 100 struct dm_transaction_manager *tm; 101 102 struct dm_array_info info; 103 struct dm_array_info hint_info; 104 struct dm_disk_bitset discard_info; 105 106 struct rw_semaphore root_lock; 107 dm_block_t root; 108 dm_block_t hint_root; 109 dm_block_t discard_root; 110 111 sector_t discard_block_size; 112 dm_oblock_t discard_nr_blocks; 113 114 sector_t data_block_size; 115 dm_cblock_t cache_blocks; 116 bool changed:1; 117 bool clean_when_opened:1; 118 119 char policy_name[CACHE_POLICY_NAME_SIZE]; 120 unsigned policy_version[CACHE_POLICY_VERSION_SIZE]; 121 size_t policy_hint_size; 122 struct dm_cache_statistics stats; 123 124 /* 125 * Reading the space map root can fail, so we read it into this 126 * buffer before the superblock is locked and updated. 127 */ 128 __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE]; 129 }; 130 131 /*------------------------------------------------------------------- 132 * superblock validator 133 *-----------------------------------------------------------------*/ 134 135 #define SUPERBLOCK_CSUM_XOR 9031977 136 137 static void sb_prepare_for_write(struct dm_block_validator *v, 138 struct dm_block *b, 139 size_t sb_block_size) 140 { 141 struct cache_disk_superblock *disk_super = dm_block_data(b); 142 143 disk_super->blocknr = cpu_to_le64(dm_block_location(b)); 144 disk_super->csum = cpu_to_le32(dm_bm_checksum(&disk_super->flags, 145 sb_block_size - sizeof(__le32), 146 SUPERBLOCK_CSUM_XOR)); 147 } 148 149 static int check_metadata_version(struct cache_disk_superblock *disk_super) 150 { 151 uint32_t metadata_version = le32_to_cpu(disk_super->version); 152 if (metadata_version < MIN_CACHE_VERSION || metadata_version > MAX_CACHE_VERSION) { 153 DMERR("Cache metadata version %u found, but only versions between %u and %u supported.", 154 metadata_version, MIN_CACHE_VERSION, MAX_CACHE_VERSION); 155 return -EINVAL; 156 } 157 158 return 0; 159 } 160 161 static int sb_check(struct dm_block_validator *v, 162 struct dm_block *b, 163 size_t sb_block_size) 164 { 165 struct cache_disk_superblock *disk_super = dm_block_data(b); 166 __le32 csum_le; 167 168 if (dm_block_location(b) != le64_to_cpu(disk_super->blocknr)) { 169 DMERR("sb_check failed: blocknr %llu: wanted %llu", 170 le64_to_cpu(disk_super->blocknr), 171 (unsigned long long)dm_block_location(b)); 172 return -ENOTBLK; 173 } 174 175 if (le64_to_cpu(disk_super->magic) != CACHE_SUPERBLOCK_MAGIC) { 176 DMERR("sb_check failed: magic %llu: wanted %llu", 177 le64_to_cpu(disk_super->magic), 178 (unsigned long long)CACHE_SUPERBLOCK_MAGIC); 179 return -EILSEQ; 180 } 181 182 csum_le = cpu_to_le32(dm_bm_checksum(&disk_super->flags, 183 sb_block_size - sizeof(__le32), 184 SUPERBLOCK_CSUM_XOR)); 185 if (csum_le != disk_super->csum) { 186 DMERR("sb_check failed: csum %u: wanted %u", 187 le32_to_cpu(csum_le), le32_to_cpu(disk_super->csum)); 188 return -EILSEQ; 189 } 190 191 return check_metadata_version(disk_super); 192 } 193 194 static struct dm_block_validator sb_validator = { 195 .name = "superblock", 196 .prepare_for_write = sb_prepare_for_write, 197 .check = sb_check 198 }; 199 200 /*----------------------------------------------------------------*/ 201 202 static int superblock_read_lock(struct dm_cache_metadata *cmd, 203 struct dm_block **sblock) 204 { 205 return dm_bm_read_lock(cmd->bm, CACHE_SUPERBLOCK_LOCATION, 206 &sb_validator, sblock); 207 } 208 209 static int superblock_lock_zero(struct dm_cache_metadata *cmd, 210 struct dm_block **sblock) 211 { 212 return dm_bm_write_lock_zero(cmd->bm, CACHE_SUPERBLOCK_LOCATION, 213 &sb_validator, sblock); 214 } 215 216 static int superblock_lock(struct dm_cache_metadata *cmd, 217 struct dm_block **sblock) 218 { 219 return dm_bm_write_lock(cmd->bm, CACHE_SUPERBLOCK_LOCATION, 220 &sb_validator, sblock); 221 } 222 223 /*----------------------------------------------------------------*/ 224 225 static int __superblock_all_zeroes(struct dm_block_manager *bm, bool *result) 226 { 227 int r; 228 unsigned i; 229 struct dm_block *b; 230 __le64 *data_le, zero = cpu_to_le64(0); 231 unsigned sb_block_size = dm_bm_block_size(bm) / sizeof(__le64); 232 233 /* 234 * We can't use a validator here - it may be all zeroes. 235 */ 236 r = dm_bm_read_lock(bm, CACHE_SUPERBLOCK_LOCATION, NULL, &b); 237 if (r) 238 return r; 239 240 data_le = dm_block_data(b); 241 *result = true; 242 for (i = 0; i < sb_block_size; i++) { 243 if (data_le[i] != zero) { 244 *result = false; 245 break; 246 } 247 } 248 249 return dm_bm_unlock(b); 250 } 251 252 static void __setup_mapping_info(struct dm_cache_metadata *cmd) 253 { 254 struct dm_btree_value_type vt; 255 256 vt.context = NULL; 257 vt.size = sizeof(__le64); 258 vt.inc = NULL; 259 vt.dec = NULL; 260 vt.equal = NULL; 261 dm_array_info_init(&cmd->info, cmd->tm, &vt); 262 263 if (cmd->policy_hint_size) { 264 vt.size = sizeof(__le32); 265 dm_array_info_init(&cmd->hint_info, cmd->tm, &vt); 266 } 267 } 268 269 static int __save_sm_root(struct dm_cache_metadata *cmd) 270 { 271 int r; 272 size_t metadata_len; 273 274 r = dm_sm_root_size(cmd->metadata_sm, &metadata_len); 275 if (r < 0) 276 return r; 277 278 return dm_sm_copy_root(cmd->metadata_sm, &cmd->metadata_space_map_root, 279 metadata_len); 280 } 281 282 static void __copy_sm_root(struct dm_cache_metadata *cmd, 283 struct cache_disk_superblock *disk_super) 284 { 285 memcpy(&disk_super->metadata_space_map_root, 286 &cmd->metadata_space_map_root, 287 sizeof(cmd->metadata_space_map_root)); 288 } 289 290 static int __write_initial_superblock(struct dm_cache_metadata *cmd) 291 { 292 int r; 293 struct dm_block *sblock; 294 struct cache_disk_superblock *disk_super; 295 sector_t bdev_size = i_size_read(cmd->bdev->bd_inode) >> SECTOR_SHIFT; 296 297 /* FIXME: see if we can lose the max sectors limit */ 298 if (bdev_size > DM_CACHE_METADATA_MAX_SECTORS) 299 bdev_size = DM_CACHE_METADATA_MAX_SECTORS; 300 301 r = dm_tm_pre_commit(cmd->tm); 302 if (r < 0) 303 return r; 304 305 /* 306 * dm_sm_copy_root() can fail. So we need to do it before we start 307 * updating the superblock. 308 */ 309 r = __save_sm_root(cmd); 310 if (r) 311 return r; 312 313 r = superblock_lock_zero(cmd, &sblock); 314 if (r) 315 return r; 316 317 disk_super = dm_block_data(sblock); 318 disk_super->flags = 0; 319 memset(disk_super->uuid, 0, sizeof(disk_super->uuid)); 320 disk_super->magic = cpu_to_le64(CACHE_SUPERBLOCK_MAGIC); 321 disk_super->version = cpu_to_le32(MAX_CACHE_VERSION); 322 memset(disk_super->policy_name, 0, sizeof(disk_super->policy_name)); 323 memset(disk_super->policy_version, 0, sizeof(disk_super->policy_version)); 324 disk_super->policy_hint_size = 0; 325 326 __copy_sm_root(cmd, disk_super); 327 328 disk_super->mapping_root = cpu_to_le64(cmd->root); 329 disk_super->hint_root = cpu_to_le64(cmd->hint_root); 330 disk_super->discard_root = cpu_to_le64(cmd->discard_root); 331 disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size); 332 disk_super->discard_nr_blocks = cpu_to_le64(from_oblock(cmd->discard_nr_blocks)); 333 disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE); 334 disk_super->data_block_size = cpu_to_le32(cmd->data_block_size); 335 disk_super->cache_blocks = cpu_to_le32(0); 336 337 disk_super->read_hits = cpu_to_le32(0); 338 disk_super->read_misses = cpu_to_le32(0); 339 disk_super->write_hits = cpu_to_le32(0); 340 disk_super->write_misses = cpu_to_le32(0); 341 342 return dm_tm_commit(cmd->tm, sblock); 343 } 344 345 static int __format_metadata(struct dm_cache_metadata *cmd) 346 { 347 int r; 348 349 r = dm_tm_create_with_sm(cmd->bm, CACHE_SUPERBLOCK_LOCATION, 350 &cmd->tm, &cmd->metadata_sm); 351 if (r < 0) { 352 DMERR("tm_create_with_sm failed"); 353 return r; 354 } 355 356 __setup_mapping_info(cmd); 357 358 r = dm_array_empty(&cmd->info, &cmd->root); 359 if (r < 0) 360 goto bad; 361 362 dm_disk_bitset_init(cmd->tm, &cmd->discard_info); 363 364 r = dm_bitset_empty(&cmd->discard_info, &cmd->discard_root); 365 if (r < 0) 366 goto bad; 367 368 cmd->discard_block_size = 0; 369 cmd->discard_nr_blocks = 0; 370 371 r = __write_initial_superblock(cmd); 372 if (r) 373 goto bad; 374 375 cmd->clean_when_opened = true; 376 return 0; 377 378 bad: 379 dm_tm_destroy(cmd->tm); 380 dm_sm_destroy(cmd->metadata_sm); 381 382 return r; 383 } 384 385 static int __check_incompat_features(struct cache_disk_superblock *disk_super, 386 struct dm_cache_metadata *cmd) 387 { 388 uint32_t features; 389 390 features = le32_to_cpu(disk_super->incompat_flags) & ~DM_CACHE_FEATURE_INCOMPAT_SUPP; 391 if (features) { 392 DMERR("could not access metadata due to unsupported optional features (%lx).", 393 (unsigned long)features); 394 return -EINVAL; 395 } 396 397 /* 398 * Check for read-only metadata to skip the following RDWR checks. 399 */ 400 if (get_disk_ro(cmd->bdev->bd_disk)) 401 return 0; 402 403 features = le32_to_cpu(disk_super->compat_ro_flags) & ~DM_CACHE_FEATURE_COMPAT_RO_SUPP; 404 if (features) { 405 DMERR("could not access metadata RDWR due to unsupported optional features (%lx).", 406 (unsigned long)features); 407 return -EINVAL; 408 } 409 410 return 0; 411 } 412 413 static int __open_metadata(struct dm_cache_metadata *cmd) 414 { 415 int r; 416 struct dm_block *sblock; 417 struct cache_disk_superblock *disk_super; 418 unsigned long sb_flags; 419 420 r = superblock_read_lock(cmd, &sblock); 421 if (r < 0) { 422 DMERR("couldn't read lock superblock"); 423 return r; 424 } 425 426 disk_super = dm_block_data(sblock); 427 428 /* Verify the data block size hasn't changed */ 429 if (le32_to_cpu(disk_super->data_block_size) != cmd->data_block_size) { 430 DMERR("changing the data block size (from %u to %llu) is not supported", 431 le32_to_cpu(disk_super->data_block_size), 432 (unsigned long long)cmd->data_block_size); 433 r = -EINVAL; 434 goto bad; 435 } 436 437 r = __check_incompat_features(disk_super, cmd); 438 if (r < 0) 439 goto bad; 440 441 r = dm_tm_open_with_sm(cmd->bm, CACHE_SUPERBLOCK_LOCATION, 442 disk_super->metadata_space_map_root, 443 sizeof(disk_super->metadata_space_map_root), 444 &cmd->tm, &cmd->metadata_sm); 445 if (r < 0) { 446 DMERR("tm_open_with_sm failed"); 447 goto bad; 448 } 449 450 __setup_mapping_info(cmd); 451 dm_disk_bitset_init(cmd->tm, &cmd->discard_info); 452 sb_flags = le32_to_cpu(disk_super->flags); 453 cmd->clean_when_opened = test_bit(CLEAN_SHUTDOWN, &sb_flags); 454 return dm_bm_unlock(sblock); 455 456 bad: 457 dm_bm_unlock(sblock); 458 return r; 459 } 460 461 static int __open_or_format_metadata(struct dm_cache_metadata *cmd, 462 bool format_device) 463 { 464 int r; 465 bool unformatted = false; 466 467 r = __superblock_all_zeroes(cmd->bm, &unformatted); 468 if (r) 469 return r; 470 471 if (unformatted) 472 return format_device ? __format_metadata(cmd) : -EPERM; 473 474 return __open_metadata(cmd); 475 } 476 477 static int __create_persistent_data_objects(struct dm_cache_metadata *cmd, 478 bool may_format_device) 479 { 480 int r; 481 cmd->bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE << SECTOR_SHIFT, 482 CACHE_METADATA_CACHE_SIZE, 483 CACHE_MAX_CONCURRENT_LOCKS); 484 if (IS_ERR(cmd->bm)) { 485 DMERR("could not create block manager"); 486 return PTR_ERR(cmd->bm); 487 } 488 489 r = __open_or_format_metadata(cmd, may_format_device); 490 if (r) 491 dm_block_manager_destroy(cmd->bm); 492 493 return r; 494 } 495 496 static void __destroy_persistent_data_objects(struct dm_cache_metadata *cmd) 497 { 498 dm_sm_destroy(cmd->metadata_sm); 499 dm_tm_destroy(cmd->tm); 500 dm_block_manager_destroy(cmd->bm); 501 } 502 503 typedef unsigned long (*flags_mutator)(unsigned long); 504 505 static void update_flags(struct cache_disk_superblock *disk_super, 506 flags_mutator mutator) 507 { 508 uint32_t sb_flags = mutator(le32_to_cpu(disk_super->flags)); 509 disk_super->flags = cpu_to_le32(sb_flags); 510 } 511 512 static unsigned long set_clean_shutdown(unsigned long flags) 513 { 514 set_bit(CLEAN_SHUTDOWN, &flags); 515 return flags; 516 } 517 518 static unsigned long clear_clean_shutdown(unsigned long flags) 519 { 520 clear_bit(CLEAN_SHUTDOWN, &flags); 521 return flags; 522 } 523 524 static void read_superblock_fields(struct dm_cache_metadata *cmd, 525 struct cache_disk_superblock *disk_super) 526 { 527 cmd->root = le64_to_cpu(disk_super->mapping_root); 528 cmd->hint_root = le64_to_cpu(disk_super->hint_root); 529 cmd->discard_root = le64_to_cpu(disk_super->discard_root); 530 cmd->discard_block_size = le64_to_cpu(disk_super->discard_block_size); 531 cmd->discard_nr_blocks = to_oblock(le64_to_cpu(disk_super->discard_nr_blocks)); 532 cmd->data_block_size = le32_to_cpu(disk_super->data_block_size); 533 cmd->cache_blocks = to_cblock(le32_to_cpu(disk_super->cache_blocks)); 534 strncpy(cmd->policy_name, disk_super->policy_name, sizeof(cmd->policy_name)); 535 cmd->policy_version[0] = le32_to_cpu(disk_super->policy_version[0]); 536 cmd->policy_version[1] = le32_to_cpu(disk_super->policy_version[1]); 537 cmd->policy_version[2] = le32_to_cpu(disk_super->policy_version[2]); 538 cmd->policy_hint_size = le32_to_cpu(disk_super->policy_hint_size); 539 540 cmd->stats.read_hits = le32_to_cpu(disk_super->read_hits); 541 cmd->stats.read_misses = le32_to_cpu(disk_super->read_misses); 542 cmd->stats.write_hits = le32_to_cpu(disk_super->write_hits); 543 cmd->stats.write_misses = le32_to_cpu(disk_super->write_misses); 544 545 cmd->changed = false; 546 } 547 548 /* 549 * The mutator updates the superblock flags. 550 */ 551 static int __begin_transaction_flags(struct dm_cache_metadata *cmd, 552 flags_mutator mutator) 553 { 554 int r; 555 struct cache_disk_superblock *disk_super; 556 struct dm_block *sblock; 557 558 r = superblock_lock(cmd, &sblock); 559 if (r) 560 return r; 561 562 disk_super = dm_block_data(sblock); 563 update_flags(disk_super, mutator); 564 read_superblock_fields(cmd, disk_super); 565 dm_bm_unlock(sblock); 566 567 return dm_bm_flush(cmd->bm); 568 } 569 570 static int __begin_transaction(struct dm_cache_metadata *cmd) 571 { 572 int r; 573 struct cache_disk_superblock *disk_super; 574 struct dm_block *sblock; 575 576 /* 577 * We re-read the superblock every time. Shouldn't need to do this 578 * really. 579 */ 580 r = superblock_read_lock(cmd, &sblock); 581 if (r) 582 return r; 583 584 disk_super = dm_block_data(sblock); 585 read_superblock_fields(cmd, disk_super); 586 dm_bm_unlock(sblock); 587 588 return 0; 589 } 590 591 static int __commit_transaction(struct dm_cache_metadata *cmd, 592 flags_mutator mutator) 593 { 594 int r; 595 struct cache_disk_superblock *disk_super; 596 struct dm_block *sblock; 597 598 /* 599 * We need to know if the cache_disk_superblock exceeds a 512-byte sector. 600 */ 601 BUILD_BUG_ON(sizeof(struct cache_disk_superblock) > 512); 602 603 r = dm_bitset_flush(&cmd->discard_info, cmd->discard_root, 604 &cmd->discard_root); 605 if (r) 606 return r; 607 608 r = dm_tm_pre_commit(cmd->tm); 609 if (r < 0) 610 return r; 611 612 r = __save_sm_root(cmd); 613 if (r) 614 return r; 615 616 r = superblock_lock(cmd, &sblock); 617 if (r) 618 return r; 619 620 disk_super = dm_block_data(sblock); 621 622 if (mutator) 623 update_flags(disk_super, mutator); 624 625 disk_super->mapping_root = cpu_to_le64(cmd->root); 626 disk_super->hint_root = cpu_to_le64(cmd->hint_root); 627 disk_super->discard_root = cpu_to_le64(cmd->discard_root); 628 disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size); 629 disk_super->discard_nr_blocks = cpu_to_le64(from_oblock(cmd->discard_nr_blocks)); 630 disk_super->cache_blocks = cpu_to_le32(from_cblock(cmd->cache_blocks)); 631 strncpy(disk_super->policy_name, cmd->policy_name, sizeof(disk_super->policy_name)); 632 disk_super->policy_version[0] = cpu_to_le32(cmd->policy_version[0]); 633 disk_super->policy_version[1] = cpu_to_le32(cmd->policy_version[1]); 634 disk_super->policy_version[2] = cpu_to_le32(cmd->policy_version[2]); 635 636 disk_super->read_hits = cpu_to_le32(cmd->stats.read_hits); 637 disk_super->read_misses = cpu_to_le32(cmd->stats.read_misses); 638 disk_super->write_hits = cpu_to_le32(cmd->stats.write_hits); 639 disk_super->write_misses = cpu_to_le32(cmd->stats.write_misses); 640 __copy_sm_root(cmd, disk_super); 641 642 return dm_tm_commit(cmd->tm, sblock); 643 } 644 645 /*----------------------------------------------------------------*/ 646 647 /* 648 * The mappings are held in a dm-array that has 64-bit values stored in 649 * little-endian format. The index is the cblock, the high 48bits of the 650 * value are the oblock and the low 16 bit the flags. 651 */ 652 #define FLAGS_MASK ((1 << 16) - 1) 653 654 static __le64 pack_value(dm_oblock_t block, unsigned flags) 655 { 656 uint64_t value = from_oblock(block); 657 value <<= 16; 658 value = value | (flags & FLAGS_MASK); 659 return cpu_to_le64(value); 660 } 661 662 static void unpack_value(__le64 value_le, dm_oblock_t *block, unsigned *flags) 663 { 664 uint64_t value = le64_to_cpu(value_le); 665 uint64_t b = value >> 16; 666 *block = to_oblock(b); 667 *flags = value & FLAGS_MASK; 668 } 669 670 /*----------------------------------------------------------------*/ 671 672 struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev, 673 sector_t data_block_size, 674 bool may_format_device, 675 size_t policy_hint_size) 676 { 677 int r; 678 struct dm_cache_metadata *cmd; 679 680 cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); 681 if (!cmd) { 682 DMERR("could not allocate metadata struct"); 683 return NULL; 684 } 685 686 init_rwsem(&cmd->root_lock); 687 cmd->bdev = bdev; 688 cmd->data_block_size = data_block_size; 689 cmd->cache_blocks = 0; 690 cmd->policy_hint_size = policy_hint_size; 691 cmd->changed = true; 692 693 r = __create_persistent_data_objects(cmd, may_format_device); 694 if (r) { 695 kfree(cmd); 696 return ERR_PTR(r); 697 } 698 699 r = __begin_transaction_flags(cmd, clear_clean_shutdown); 700 if (r < 0) { 701 dm_cache_metadata_close(cmd); 702 return ERR_PTR(r); 703 } 704 705 return cmd; 706 } 707 708 void dm_cache_metadata_close(struct dm_cache_metadata *cmd) 709 { 710 __destroy_persistent_data_objects(cmd); 711 kfree(cmd); 712 } 713 714 /* 715 * Checks that the given cache block is either unmapped or clean. 716 */ 717 static int block_unmapped_or_clean(struct dm_cache_metadata *cmd, dm_cblock_t b, 718 bool *result) 719 { 720 int r; 721 __le64 value; 722 dm_oblock_t ob; 723 unsigned flags; 724 725 r = dm_array_get_value(&cmd->info, cmd->root, from_cblock(b), &value); 726 if (r) { 727 DMERR("block_unmapped_or_clean failed"); 728 return r; 729 } 730 731 unpack_value(value, &ob, &flags); 732 *result = !((flags & M_VALID) && (flags & M_DIRTY)); 733 734 return 0; 735 } 736 737 static int blocks_are_unmapped_or_clean(struct dm_cache_metadata *cmd, 738 dm_cblock_t begin, dm_cblock_t end, 739 bool *result) 740 { 741 int r; 742 *result = true; 743 744 while (begin != end) { 745 r = block_unmapped_or_clean(cmd, begin, result); 746 if (r) 747 return r; 748 749 if (!*result) { 750 DMERR("cache block %llu is dirty", 751 (unsigned long long) from_cblock(begin)); 752 return 0; 753 } 754 755 begin = to_cblock(from_cblock(begin) + 1); 756 } 757 758 return 0; 759 } 760 761 int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size) 762 { 763 int r; 764 bool clean; 765 __le64 null_mapping = pack_value(0, 0); 766 767 down_write(&cmd->root_lock); 768 __dm_bless_for_disk(&null_mapping); 769 770 if (from_cblock(new_cache_size) < from_cblock(cmd->cache_blocks)) { 771 r = blocks_are_unmapped_or_clean(cmd, new_cache_size, cmd->cache_blocks, &clean); 772 if (r) { 773 __dm_unbless_for_disk(&null_mapping); 774 goto out; 775 } 776 777 if (!clean) { 778 DMERR("unable to shrink cache due to dirty blocks"); 779 r = -EINVAL; 780 __dm_unbless_for_disk(&null_mapping); 781 goto out; 782 } 783 } 784 785 r = dm_array_resize(&cmd->info, cmd->root, from_cblock(cmd->cache_blocks), 786 from_cblock(new_cache_size), 787 &null_mapping, &cmd->root); 788 if (!r) 789 cmd->cache_blocks = new_cache_size; 790 cmd->changed = true; 791 792 out: 793 up_write(&cmd->root_lock); 794 795 return r; 796 } 797 798 int dm_cache_discard_bitset_resize(struct dm_cache_metadata *cmd, 799 sector_t discard_block_size, 800 dm_oblock_t new_nr_entries) 801 { 802 int r; 803 804 down_write(&cmd->root_lock); 805 r = dm_bitset_resize(&cmd->discard_info, 806 cmd->discard_root, 807 from_oblock(cmd->discard_nr_blocks), 808 from_oblock(new_nr_entries), 809 false, &cmd->discard_root); 810 if (!r) { 811 cmd->discard_block_size = discard_block_size; 812 cmd->discard_nr_blocks = new_nr_entries; 813 } 814 815 cmd->changed = true; 816 up_write(&cmd->root_lock); 817 818 return r; 819 } 820 821 static int __set_discard(struct dm_cache_metadata *cmd, dm_oblock_t b) 822 { 823 return dm_bitset_set_bit(&cmd->discard_info, cmd->discard_root, 824 from_oblock(b), &cmd->discard_root); 825 } 826 827 static int __clear_discard(struct dm_cache_metadata *cmd, dm_oblock_t b) 828 { 829 return dm_bitset_clear_bit(&cmd->discard_info, cmd->discard_root, 830 from_oblock(b), &cmd->discard_root); 831 } 832 833 static int __is_discarded(struct dm_cache_metadata *cmd, dm_oblock_t b, 834 bool *is_discarded) 835 { 836 return dm_bitset_test_bit(&cmd->discard_info, cmd->discard_root, 837 from_oblock(b), &cmd->discard_root, 838 is_discarded); 839 } 840 841 static int __discard(struct dm_cache_metadata *cmd, 842 dm_oblock_t dblock, bool discard) 843 { 844 int r; 845 846 r = (discard ? __set_discard : __clear_discard)(cmd, dblock); 847 if (r) 848 return r; 849 850 cmd->changed = true; 851 return 0; 852 } 853 854 int dm_cache_set_discard(struct dm_cache_metadata *cmd, 855 dm_oblock_t dblock, bool discard) 856 { 857 int r; 858 859 down_write(&cmd->root_lock); 860 r = __discard(cmd, dblock, discard); 861 up_write(&cmd->root_lock); 862 863 return r; 864 } 865 866 static int __load_discards(struct dm_cache_metadata *cmd, 867 load_discard_fn fn, void *context) 868 { 869 int r = 0; 870 dm_block_t b; 871 bool discard; 872 873 for (b = 0; b < from_oblock(cmd->discard_nr_blocks); b++) { 874 dm_oblock_t dblock = to_oblock(b); 875 876 if (cmd->clean_when_opened) { 877 r = __is_discarded(cmd, dblock, &discard); 878 if (r) 879 return r; 880 } else 881 discard = false; 882 883 r = fn(context, cmd->discard_block_size, dblock, discard); 884 if (r) 885 break; 886 } 887 888 return r; 889 } 890 891 int dm_cache_load_discards(struct dm_cache_metadata *cmd, 892 load_discard_fn fn, void *context) 893 { 894 int r; 895 896 down_read(&cmd->root_lock); 897 r = __load_discards(cmd, fn, context); 898 up_read(&cmd->root_lock); 899 900 return r; 901 } 902 903 dm_cblock_t dm_cache_size(struct dm_cache_metadata *cmd) 904 { 905 dm_cblock_t r; 906 907 down_read(&cmd->root_lock); 908 r = cmd->cache_blocks; 909 up_read(&cmd->root_lock); 910 911 return r; 912 } 913 914 static int __remove(struct dm_cache_metadata *cmd, dm_cblock_t cblock) 915 { 916 int r; 917 __le64 value = pack_value(0, 0); 918 919 __dm_bless_for_disk(&value); 920 r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock), 921 &value, &cmd->root); 922 if (r) 923 return r; 924 925 cmd->changed = true; 926 return 0; 927 } 928 929 int dm_cache_remove_mapping(struct dm_cache_metadata *cmd, dm_cblock_t cblock) 930 { 931 int r; 932 933 down_write(&cmd->root_lock); 934 r = __remove(cmd, cblock); 935 up_write(&cmd->root_lock); 936 937 return r; 938 } 939 940 static int __insert(struct dm_cache_metadata *cmd, 941 dm_cblock_t cblock, dm_oblock_t oblock) 942 { 943 int r; 944 __le64 value = pack_value(oblock, M_VALID); 945 __dm_bless_for_disk(&value); 946 947 r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock), 948 &value, &cmd->root); 949 if (r) 950 return r; 951 952 cmd->changed = true; 953 return 0; 954 } 955 956 int dm_cache_insert_mapping(struct dm_cache_metadata *cmd, 957 dm_cblock_t cblock, dm_oblock_t oblock) 958 { 959 int r; 960 961 down_write(&cmd->root_lock); 962 r = __insert(cmd, cblock, oblock); 963 up_write(&cmd->root_lock); 964 965 return r; 966 } 967 968 struct thunk { 969 load_mapping_fn fn; 970 void *context; 971 972 struct dm_cache_metadata *cmd; 973 bool respect_dirty_flags; 974 bool hints_valid; 975 }; 976 977 static bool policy_unchanged(struct dm_cache_metadata *cmd, 978 struct dm_cache_policy *policy) 979 { 980 const char *policy_name = dm_cache_policy_get_name(policy); 981 const unsigned *policy_version = dm_cache_policy_get_version(policy); 982 size_t policy_hint_size = dm_cache_policy_get_hint_size(policy); 983 984 /* 985 * Ensure policy names match. 986 */ 987 if (strncmp(cmd->policy_name, policy_name, sizeof(cmd->policy_name))) 988 return false; 989 990 /* 991 * Ensure policy major versions match. 992 */ 993 if (cmd->policy_version[0] != policy_version[0]) 994 return false; 995 996 /* 997 * Ensure policy hint sizes match. 998 */ 999 if (cmd->policy_hint_size != policy_hint_size) 1000 return false; 1001 1002 return true; 1003 } 1004 1005 static bool hints_array_initialized(struct dm_cache_metadata *cmd) 1006 { 1007 return cmd->hint_root && cmd->policy_hint_size; 1008 } 1009 1010 static bool hints_array_available(struct dm_cache_metadata *cmd, 1011 struct dm_cache_policy *policy) 1012 { 1013 return cmd->clean_when_opened && policy_unchanged(cmd, policy) && 1014 hints_array_initialized(cmd); 1015 } 1016 1017 static int __load_mapping(void *context, uint64_t cblock, void *leaf) 1018 { 1019 int r = 0; 1020 bool dirty; 1021 __le64 value; 1022 __le32 hint_value = 0; 1023 dm_oblock_t oblock; 1024 unsigned flags; 1025 struct thunk *thunk = context; 1026 struct dm_cache_metadata *cmd = thunk->cmd; 1027 1028 memcpy(&value, leaf, sizeof(value)); 1029 unpack_value(value, &oblock, &flags); 1030 1031 if (flags & M_VALID) { 1032 if (thunk->hints_valid) { 1033 r = dm_array_get_value(&cmd->hint_info, cmd->hint_root, 1034 cblock, &hint_value); 1035 if (r && r != -ENODATA) 1036 return r; 1037 } 1038 1039 dirty = thunk->respect_dirty_flags ? (flags & M_DIRTY) : true; 1040 r = thunk->fn(thunk->context, oblock, to_cblock(cblock), 1041 dirty, le32_to_cpu(hint_value), thunk->hints_valid); 1042 } 1043 1044 return r; 1045 } 1046 1047 static int __load_mappings(struct dm_cache_metadata *cmd, 1048 struct dm_cache_policy *policy, 1049 load_mapping_fn fn, void *context) 1050 { 1051 struct thunk thunk; 1052 1053 thunk.fn = fn; 1054 thunk.context = context; 1055 1056 thunk.cmd = cmd; 1057 thunk.respect_dirty_flags = cmd->clean_when_opened; 1058 thunk.hints_valid = hints_array_available(cmd, policy); 1059 1060 return dm_array_walk(&cmd->info, cmd->root, __load_mapping, &thunk); 1061 } 1062 1063 int dm_cache_load_mappings(struct dm_cache_metadata *cmd, 1064 struct dm_cache_policy *policy, 1065 load_mapping_fn fn, void *context) 1066 { 1067 int r; 1068 1069 down_read(&cmd->root_lock); 1070 r = __load_mappings(cmd, policy, fn, context); 1071 up_read(&cmd->root_lock); 1072 1073 return r; 1074 } 1075 1076 static int __dump_mapping(void *context, uint64_t cblock, void *leaf) 1077 { 1078 int r = 0; 1079 __le64 value; 1080 dm_oblock_t oblock; 1081 unsigned flags; 1082 1083 memcpy(&value, leaf, sizeof(value)); 1084 unpack_value(value, &oblock, &flags); 1085 1086 return r; 1087 } 1088 1089 static int __dump_mappings(struct dm_cache_metadata *cmd) 1090 { 1091 return dm_array_walk(&cmd->info, cmd->root, __dump_mapping, NULL); 1092 } 1093 1094 void dm_cache_dump(struct dm_cache_metadata *cmd) 1095 { 1096 down_read(&cmd->root_lock); 1097 __dump_mappings(cmd); 1098 up_read(&cmd->root_lock); 1099 } 1100 1101 int dm_cache_changed_this_transaction(struct dm_cache_metadata *cmd) 1102 { 1103 int r; 1104 1105 down_read(&cmd->root_lock); 1106 r = cmd->changed; 1107 up_read(&cmd->root_lock); 1108 1109 return r; 1110 } 1111 1112 static int __dirty(struct dm_cache_metadata *cmd, dm_cblock_t cblock, bool dirty) 1113 { 1114 int r; 1115 unsigned flags; 1116 dm_oblock_t oblock; 1117 __le64 value; 1118 1119 r = dm_array_get_value(&cmd->info, cmd->root, from_cblock(cblock), &value); 1120 if (r) 1121 return r; 1122 1123 unpack_value(value, &oblock, &flags); 1124 1125 if (((flags & M_DIRTY) && dirty) || (!(flags & M_DIRTY) && !dirty)) 1126 /* nothing to be done */ 1127 return 0; 1128 1129 value = pack_value(oblock, (flags & ~M_DIRTY) | (dirty ? M_DIRTY : 0)); 1130 __dm_bless_for_disk(&value); 1131 1132 r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock), 1133 &value, &cmd->root); 1134 if (r) 1135 return r; 1136 1137 cmd->changed = true; 1138 return 0; 1139 1140 } 1141 1142 int dm_cache_set_dirty(struct dm_cache_metadata *cmd, 1143 dm_cblock_t cblock, bool dirty) 1144 { 1145 int r; 1146 1147 down_write(&cmd->root_lock); 1148 r = __dirty(cmd, cblock, dirty); 1149 up_write(&cmd->root_lock); 1150 1151 return r; 1152 } 1153 1154 void dm_cache_metadata_get_stats(struct dm_cache_metadata *cmd, 1155 struct dm_cache_statistics *stats) 1156 { 1157 down_read(&cmd->root_lock); 1158 *stats = cmd->stats; 1159 up_read(&cmd->root_lock); 1160 } 1161 1162 void dm_cache_metadata_set_stats(struct dm_cache_metadata *cmd, 1163 struct dm_cache_statistics *stats) 1164 { 1165 down_write(&cmd->root_lock); 1166 cmd->stats = *stats; 1167 up_write(&cmd->root_lock); 1168 } 1169 1170 int dm_cache_commit(struct dm_cache_metadata *cmd, bool clean_shutdown) 1171 { 1172 int r; 1173 flags_mutator mutator = (clean_shutdown ? set_clean_shutdown : 1174 clear_clean_shutdown); 1175 1176 down_write(&cmd->root_lock); 1177 r = __commit_transaction(cmd, mutator); 1178 if (r) 1179 goto out; 1180 1181 r = __begin_transaction(cmd); 1182 1183 out: 1184 up_write(&cmd->root_lock); 1185 return r; 1186 } 1187 1188 int dm_cache_get_free_metadata_block_count(struct dm_cache_metadata *cmd, 1189 dm_block_t *result) 1190 { 1191 int r = -EINVAL; 1192 1193 down_read(&cmd->root_lock); 1194 r = dm_sm_get_nr_free(cmd->metadata_sm, result); 1195 up_read(&cmd->root_lock); 1196 1197 return r; 1198 } 1199 1200 int dm_cache_get_metadata_dev_size(struct dm_cache_metadata *cmd, 1201 dm_block_t *result) 1202 { 1203 int r = -EINVAL; 1204 1205 down_read(&cmd->root_lock); 1206 r = dm_sm_get_nr_blocks(cmd->metadata_sm, result); 1207 up_read(&cmd->root_lock); 1208 1209 return r; 1210 } 1211 1212 /*----------------------------------------------------------------*/ 1213 1214 static int begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy) 1215 { 1216 int r; 1217 __le32 value; 1218 size_t hint_size; 1219 const char *policy_name = dm_cache_policy_get_name(policy); 1220 const unsigned *policy_version = dm_cache_policy_get_version(policy); 1221 1222 if (!policy_name[0] || 1223 (strlen(policy_name) > sizeof(cmd->policy_name) - 1)) 1224 return -EINVAL; 1225 1226 if (!policy_unchanged(cmd, policy)) { 1227 strncpy(cmd->policy_name, policy_name, sizeof(cmd->policy_name)); 1228 memcpy(cmd->policy_version, policy_version, sizeof(cmd->policy_version)); 1229 1230 hint_size = dm_cache_policy_get_hint_size(policy); 1231 if (!hint_size) 1232 return 0; /* short-circuit hints initialization */ 1233 cmd->policy_hint_size = hint_size; 1234 1235 if (cmd->hint_root) { 1236 r = dm_array_del(&cmd->hint_info, cmd->hint_root); 1237 if (r) 1238 return r; 1239 } 1240 1241 r = dm_array_empty(&cmd->hint_info, &cmd->hint_root); 1242 if (r) 1243 return r; 1244 1245 value = cpu_to_le32(0); 1246 __dm_bless_for_disk(&value); 1247 r = dm_array_resize(&cmd->hint_info, cmd->hint_root, 0, 1248 from_cblock(cmd->cache_blocks), 1249 &value, &cmd->hint_root); 1250 if (r) 1251 return r; 1252 } 1253 1254 return 0; 1255 } 1256 1257 static int save_hint(void *context, dm_cblock_t cblock, dm_oblock_t oblock, uint32_t hint) 1258 { 1259 struct dm_cache_metadata *cmd = context; 1260 __le32 value = cpu_to_le32(hint); 1261 int r; 1262 1263 __dm_bless_for_disk(&value); 1264 1265 r = dm_array_set_value(&cmd->hint_info, cmd->hint_root, 1266 from_cblock(cblock), &value, &cmd->hint_root); 1267 cmd->changed = true; 1268 1269 return r; 1270 } 1271 1272 static int write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy) 1273 { 1274 int r; 1275 1276 r = begin_hints(cmd, policy); 1277 if (r) { 1278 DMERR("begin_hints failed"); 1279 return r; 1280 } 1281 1282 return policy_walk_mappings(policy, save_hint, cmd); 1283 } 1284 1285 int dm_cache_write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy) 1286 { 1287 int r; 1288 1289 down_write(&cmd->root_lock); 1290 r = write_hints(cmd, policy); 1291 up_write(&cmd->root_lock); 1292 1293 return r; 1294 } 1295 1296 int dm_cache_metadata_all_clean(struct dm_cache_metadata *cmd, bool *result) 1297 { 1298 return blocks_are_unmapped_or_clean(cmd, 0, cmd->cache_blocks, result); 1299 } 1300