1 /* 2 * Copyright (C) 2012 Red Hat. All rights reserved. 3 * 4 * This file is released under the GPL. 5 */ 6 7 #include "dm.h" 8 #include "dm-bio-prison.h" 9 #include "dm-cache-metadata.h" 10 11 #include <linux/dm-io.h> 12 #include <linux/dm-kcopyd.h> 13 #include <linux/init.h> 14 #include <linux/mempool.h> 15 #include <linux/module.h> 16 #include <linux/slab.h> 17 #include <linux/vmalloc.h> 18 19 #define DM_MSG_PREFIX "cache" 20 21 DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(cache_copy_throttle, 22 "A percentage of time allocated for copying to and/or from cache"); 23 24 /*----------------------------------------------------------------*/ 25 26 /* 27 * Glossary: 28 * 29 * oblock: index of an origin block 30 * cblock: index of a cache block 31 * promotion: movement of a block from origin to cache 32 * demotion: movement of a block from cache to origin 33 * migration: movement of a block between the origin and cache device, 34 * either direction 35 */ 36 37 /*----------------------------------------------------------------*/ 38 39 static size_t bitset_size_in_bytes(unsigned nr_entries) 40 { 41 return sizeof(unsigned long) * dm_div_up(nr_entries, BITS_PER_LONG); 42 } 43 44 static unsigned long *alloc_bitset(unsigned nr_entries) 45 { 46 size_t s = bitset_size_in_bytes(nr_entries); 47 return vzalloc(s); 48 } 49 50 static void clear_bitset(void *bitset, unsigned nr_entries) 51 { 52 size_t s = bitset_size_in_bytes(nr_entries); 53 memset(bitset, 0, s); 54 } 55 56 static void free_bitset(unsigned long *bits) 57 { 58 vfree(bits); 59 } 60 61 /*----------------------------------------------------------------*/ 62 63 #define PRISON_CELLS 1024 64 #define MIGRATION_POOL_SIZE 128 65 #define COMMIT_PERIOD HZ 66 #define MIGRATION_COUNT_WINDOW 10 67 68 /* 69 * The block size of the device holding cache data must be >= 32KB 70 */ 71 #define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (32 * 1024 >> SECTOR_SHIFT) 72 73 /* 74 * FIXME: the cache is read/write for the time being. 75 */ 76 enum cache_mode { 77 CM_WRITE, /* metadata may be changed */ 78 CM_READ_ONLY, /* metadata may not be changed */ 79 }; 80 81 struct cache_features { 82 enum cache_mode mode; 83 bool write_through:1; 84 }; 85 86 struct cache_stats { 87 atomic_t read_hit; 88 atomic_t read_miss; 89 atomic_t write_hit; 90 atomic_t write_miss; 91 atomic_t demotion; 92 atomic_t promotion; 93 atomic_t copies_avoided; 94 atomic_t cache_cell_clash; 95 atomic_t commit_count; 96 atomic_t discard_count; 97 }; 98 99 struct cache { 100 struct dm_target *ti; 101 struct dm_target_callbacks callbacks; 102 103 /* 104 * Metadata is written to this device. 105 */ 106 struct dm_dev *metadata_dev; 107 108 /* 109 * The slower of the two data devices. Typically a spindle. 110 */ 111 struct dm_dev *origin_dev; 112 113 /* 114 * The faster of the two data devices. Typically an SSD. 115 */ 116 struct dm_dev *cache_dev; 117 118 /* 119 * Cache features such as write-through. 120 */ 121 struct cache_features features; 122 123 /* 124 * Size of the origin device in _complete_ blocks and native sectors. 125 */ 126 dm_oblock_t origin_blocks; 127 sector_t origin_sectors; 128 129 /* 130 * Size of the cache device in blocks. 131 */ 132 dm_cblock_t cache_size; 133 134 /* 135 * Fields for converting from sectors to blocks. 136 */ 137 uint32_t sectors_per_block; 138 int sectors_per_block_shift; 139 140 struct dm_cache_metadata *cmd; 141 142 spinlock_t lock; 143 struct bio_list deferred_bios; 144 struct bio_list deferred_flush_bios; 145 struct bio_list deferred_writethrough_bios; 146 struct list_head quiesced_migrations; 147 struct list_head completed_migrations; 148 struct list_head need_commit_migrations; 149 sector_t migration_threshold; 150 atomic_t nr_migrations; 151 wait_queue_head_t migration_wait; 152 153 /* 154 * cache_size entries, dirty if set 155 */ 156 dm_cblock_t nr_dirty; 157 unsigned long *dirty_bitset; 158 159 /* 160 * origin_blocks entries, discarded if set. 161 */ 162 uint32_t discard_block_size; /* a power of 2 times sectors per block */ 163 dm_dblock_t discard_nr_blocks; 164 unsigned long *discard_bitset; 165 166 struct dm_kcopyd_client *copier; 167 struct workqueue_struct *wq; 168 struct work_struct worker; 169 170 struct delayed_work waker; 171 unsigned long last_commit_jiffies; 172 173 struct dm_bio_prison *prison; 174 struct dm_deferred_set *all_io_ds; 175 176 mempool_t *migration_pool; 177 struct dm_cache_migration *next_migration; 178 179 struct dm_cache_policy *policy; 180 unsigned policy_nr_args; 181 182 bool need_tick_bio:1; 183 bool sized:1; 184 bool quiescing:1; 185 bool commit_requested:1; 186 bool loaded_mappings:1; 187 bool loaded_discards:1; 188 189 struct cache_stats stats; 190 191 /* 192 * Rather than reconstructing the table line for the status we just 193 * save it and regurgitate. 194 */ 195 unsigned nr_ctr_args; 196 const char **ctr_args; 197 }; 198 199 struct per_bio_data { 200 bool tick:1; 201 unsigned req_nr:2; 202 struct dm_deferred_entry *all_io_entry; 203 204 /* writethrough fields */ 205 struct cache *cache; 206 dm_cblock_t cblock; 207 bio_end_io_t *saved_bi_end_io; 208 }; 209 210 struct dm_cache_migration { 211 struct list_head list; 212 struct cache *cache; 213 214 unsigned long start_jiffies; 215 dm_oblock_t old_oblock; 216 dm_oblock_t new_oblock; 217 dm_cblock_t cblock; 218 219 bool err:1; 220 bool writeback:1; 221 bool demote:1; 222 bool promote:1; 223 224 struct dm_bio_prison_cell *old_ocell; 225 struct dm_bio_prison_cell *new_ocell; 226 }; 227 228 /* 229 * Processing a bio in the worker thread may require these memory 230 * allocations. We prealloc to avoid deadlocks (the same worker thread 231 * frees them back to the mempool). 232 */ 233 struct prealloc { 234 struct dm_cache_migration *mg; 235 struct dm_bio_prison_cell *cell1; 236 struct dm_bio_prison_cell *cell2; 237 }; 238 239 static void wake_worker(struct cache *cache) 240 { 241 queue_work(cache->wq, &cache->worker); 242 } 243 244 /*----------------------------------------------------------------*/ 245 246 static struct dm_bio_prison_cell *alloc_prison_cell(struct cache *cache) 247 { 248 /* FIXME: change to use a local slab. */ 249 return dm_bio_prison_alloc_cell(cache->prison, GFP_NOWAIT); 250 } 251 252 static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell *cell) 253 { 254 dm_bio_prison_free_cell(cache->prison, cell); 255 } 256 257 static int prealloc_data_structs(struct cache *cache, struct prealloc *p) 258 { 259 if (!p->mg) { 260 p->mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT); 261 if (!p->mg) 262 return -ENOMEM; 263 } 264 265 if (!p->cell1) { 266 p->cell1 = alloc_prison_cell(cache); 267 if (!p->cell1) 268 return -ENOMEM; 269 } 270 271 if (!p->cell2) { 272 p->cell2 = alloc_prison_cell(cache); 273 if (!p->cell2) 274 return -ENOMEM; 275 } 276 277 return 0; 278 } 279 280 static void prealloc_free_structs(struct cache *cache, struct prealloc *p) 281 { 282 if (p->cell2) 283 free_prison_cell(cache, p->cell2); 284 285 if (p->cell1) 286 free_prison_cell(cache, p->cell1); 287 288 if (p->mg) 289 mempool_free(p->mg, cache->migration_pool); 290 } 291 292 static struct dm_cache_migration *prealloc_get_migration(struct prealloc *p) 293 { 294 struct dm_cache_migration *mg = p->mg; 295 296 BUG_ON(!mg); 297 p->mg = NULL; 298 299 return mg; 300 } 301 302 /* 303 * You must have a cell within the prealloc struct to return. If not this 304 * function will BUG() rather than returning NULL. 305 */ 306 static struct dm_bio_prison_cell *prealloc_get_cell(struct prealloc *p) 307 { 308 struct dm_bio_prison_cell *r = NULL; 309 310 if (p->cell1) { 311 r = p->cell1; 312 p->cell1 = NULL; 313 314 } else if (p->cell2) { 315 r = p->cell2; 316 p->cell2 = NULL; 317 } else 318 BUG(); 319 320 return r; 321 } 322 323 /* 324 * You can't have more than two cells in a prealloc struct. BUG() will be 325 * called if you try and overfill. 326 */ 327 static void prealloc_put_cell(struct prealloc *p, struct dm_bio_prison_cell *cell) 328 { 329 if (!p->cell2) 330 p->cell2 = cell; 331 332 else if (!p->cell1) 333 p->cell1 = cell; 334 335 else 336 BUG(); 337 } 338 339 /*----------------------------------------------------------------*/ 340 341 static void build_key(dm_oblock_t oblock, struct dm_cell_key *key) 342 { 343 key->virtual = 0; 344 key->dev = 0; 345 key->block = from_oblock(oblock); 346 } 347 348 /* 349 * The caller hands in a preallocated cell, and a free function for it. 350 * The cell will be freed if there's an error, or if it wasn't used because 351 * a cell with that key already exists. 352 */ 353 typedef void (*cell_free_fn)(void *context, struct dm_bio_prison_cell *cell); 354 355 static int bio_detain(struct cache *cache, dm_oblock_t oblock, 356 struct bio *bio, struct dm_bio_prison_cell *cell_prealloc, 357 cell_free_fn free_fn, void *free_context, 358 struct dm_bio_prison_cell **cell_result) 359 { 360 int r; 361 struct dm_cell_key key; 362 363 build_key(oblock, &key); 364 r = dm_bio_detain(cache->prison, &key, bio, cell_prealloc, cell_result); 365 if (r) 366 free_fn(free_context, cell_prealloc); 367 368 return r; 369 } 370 371 static int get_cell(struct cache *cache, 372 dm_oblock_t oblock, 373 struct prealloc *structs, 374 struct dm_bio_prison_cell **cell_result) 375 { 376 int r; 377 struct dm_cell_key key; 378 struct dm_bio_prison_cell *cell_prealloc; 379 380 cell_prealloc = prealloc_get_cell(structs); 381 382 build_key(oblock, &key); 383 r = dm_get_cell(cache->prison, &key, cell_prealloc, cell_result); 384 if (r) 385 prealloc_put_cell(structs, cell_prealloc); 386 387 return r; 388 } 389 390 /*----------------------------------------------------------------*/ 391 392 static bool is_dirty(struct cache *cache, dm_cblock_t b) 393 { 394 return test_bit(from_cblock(b), cache->dirty_bitset); 395 } 396 397 static void set_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock) 398 { 399 if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset)) { 400 cache->nr_dirty = to_cblock(from_cblock(cache->nr_dirty) + 1); 401 policy_set_dirty(cache->policy, oblock); 402 } 403 } 404 405 static void clear_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock) 406 { 407 if (test_and_clear_bit(from_cblock(cblock), cache->dirty_bitset)) { 408 policy_clear_dirty(cache->policy, oblock); 409 cache->nr_dirty = to_cblock(from_cblock(cache->nr_dirty) - 1); 410 if (!from_cblock(cache->nr_dirty)) 411 dm_table_event(cache->ti->table); 412 } 413 } 414 415 /*----------------------------------------------------------------*/ 416 static bool block_size_is_power_of_two(struct cache *cache) 417 { 418 return cache->sectors_per_block_shift >= 0; 419 } 420 421 static dm_block_t block_div(dm_block_t b, uint32_t n) 422 { 423 do_div(b, n); 424 425 return b; 426 } 427 428 static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock) 429 { 430 uint32_t discard_blocks = cache->discard_block_size; 431 dm_block_t b = from_oblock(oblock); 432 433 if (!block_size_is_power_of_two(cache)) 434 discard_blocks = discard_blocks / cache->sectors_per_block; 435 else 436 discard_blocks >>= cache->sectors_per_block_shift; 437 438 b = block_div(b, discard_blocks); 439 440 return to_dblock(b); 441 } 442 443 static void set_discard(struct cache *cache, dm_dblock_t b) 444 { 445 unsigned long flags; 446 447 atomic_inc(&cache->stats.discard_count); 448 449 spin_lock_irqsave(&cache->lock, flags); 450 set_bit(from_dblock(b), cache->discard_bitset); 451 spin_unlock_irqrestore(&cache->lock, flags); 452 } 453 454 static void clear_discard(struct cache *cache, dm_dblock_t b) 455 { 456 unsigned long flags; 457 458 spin_lock_irqsave(&cache->lock, flags); 459 clear_bit(from_dblock(b), cache->discard_bitset); 460 spin_unlock_irqrestore(&cache->lock, flags); 461 } 462 463 static bool is_discarded(struct cache *cache, dm_dblock_t b) 464 { 465 int r; 466 unsigned long flags; 467 468 spin_lock_irqsave(&cache->lock, flags); 469 r = test_bit(from_dblock(b), cache->discard_bitset); 470 spin_unlock_irqrestore(&cache->lock, flags); 471 472 return r; 473 } 474 475 static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b) 476 { 477 int r; 478 unsigned long flags; 479 480 spin_lock_irqsave(&cache->lock, flags); 481 r = test_bit(from_dblock(oblock_to_dblock(cache, b)), 482 cache->discard_bitset); 483 spin_unlock_irqrestore(&cache->lock, flags); 484 485 return r; 486 } 487 488 /*----------------------------------------------------------------*/ 489 490 static void load_stats(struct cache *cache) 491 { 492 struct dm_cache_statistics stats; 493 494 dm_cache_metadata_get_stats(cache->cmd, &stats); 495 atomic_set(&cache->stats.read_hit, stats.read_hits); 496 atomic_set(&cache->stats.read_miss, stats.read_misses); 497 atomic_set(&cache->stats.write_hit, stats.write_hits); 498 atomic_set(&cache->stats.write_miss, stats.write_misses); 499 } 500 501 static void save_stats(struct cache *cache) 502 { 503 struct dm_cache_statistics stats; 504 505 stats.read_hits = atomic_read(&cache->stats.read_hit); 506 stats.read_misses = atomic_read(&cache->stats.read_miss); 507 stats.write_hits = atomic_read(&cache->stats.write_hit); 508 stats.write_misses = atomic_read(&cache->stats.write_miss); 509 510 dm_cache_metadata_set_stats(cache->cmd, &stats); 511 } 512 513 /*---------------------------------------------------------------- 514 * Per bio data 515 *--------------------------------------------------------------*/ 516 static struct per_bio_data *get_per_bio_data(struct bio *bio) 517 { 518 struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data)); 519 BUG_ON(!pb); 520 return pb; 521 } 522 523 static struct per_bio_data *init_per_bio_data(struct bio *bio) 524 { 525 struct per_bio_data *pb = get_per_bio_data(bio); 526 527 pb->tick = false; 528 pb->req_nr = dm_bio_get_target_bio_nr(bio); 529 pb->all_io_entry = NULL; 530 531 return pb; 532 } 533 534 /*---------------------------------------------------------------- 535 * Remapping 536 *--------------------------------------------------------------*/ 537 static void remap_to_origin(struct cache *cache, struct bio *bio) 538 { 539 bio->bi_bdev = cache->origin_dev->bdev; 540 } 541 542 static void remap_to_cache(struct cache *cache, struct bio *bio, 543 dm_cblock_t cblock) 544 { 545 sector_t bi_sector = bio->bi_sector; 546 547 bio->bi_bdev = cache->cache_dev->bdev; 548 if (!block_size_is_power_of_two(cache)) 549 bio->bi_sector = (from_cblock(cblock) * cache->sectors_per_block) + 550 sector_div(bi_sector, cache->sectors_per_block); 551 else 552 bio->bi_sector = (from_cblock(cblock) << cache->sectors_per_block_shift) | 553 (bi_sector & (cache->sectors_per_block - 1)); 554 } 555 556 static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio) 557 { 558 unsigned long flags; 559 struct per_bio_data *pb = get_per_bio_data(bio); 560 561 spin_lock_irqsave(&cache->lock, flags); 562 if (cache->need_tick_bio && 563 !(bio->bi_rw & (REQ_FUA | REQ_FLUSH | REQ_DISCARD))) { 564 pb->tick = true; 565 cache->need_tick_bio = false; 566 } 567 spin_unlock_irqrestore(&cache->lock, flags); 568 } 569 570 static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio, 571 dm_oblock_t oblock) 572 { 573 check_if_tick_bio_needed(cache, bio); 574 remap_to_origin(cache, bio); 575 if (bio_data_dir(bio) == WRITE) 576 clear_discard(cache, oblock_to_dblock(cache, oblock)); 577 } 578 579 static void remap_to_cache_dirty(struct cache *cache, struct bio *bio, 580 dm_oblock_t oblock, dm_cblock_t cblock) 581 { 582 remap_to_cache(cache, bio, cblock); 583 if (bio_data_dir(bio) == WRITE) { 584 set_dirty(cache, oblock, cblock); 585 clear_discard(cache, oblock_to_dblock(cache, oblock)); 586 } 587 } 588 589 static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio) 590 { 591 sector_t block_nr = bio->bi_sector; 592 593 if (!block_size_is_power_of_two(cache)) 594 (void) sector_div(block_nr, cache->sectors_per_block); 595 else 596 block_nr >>= cache->sectors_per_block_shift; 597 598 return to_oblock(block_nr); 599 } 600 601 static int bio_triggers_commit(struct cache *cache, struct bio *bio) 602 { 603 return bio->bi_rw & (REQ_FLUSH | REQ_FUA); 604 } 605 606 static void issue(struct cache *cache, struct bio *bio) 607 { 608 unsigned long flags; 609 610 if (!bio_triggers_commit(cache, bio)) { 611 generic_make_request(bio); 612 return; 613 } 614 615 /* 616 * Batch together any bios that trigger commits and then issue a 617 * single commit for them in do_worker(). 618 */ 619 spin_lock_irqsave(&cache->lock, flags); 620 cache->commit_requested = true; 621 bio_list_add(&cache->deferred_flush_bios, bio); 622 spin_unlock_irqrestore(&cache->lock, flags); 623 } 624 625 static void defer_writethrough_bio(struct cache *cache, struct bio *bio) 626 { 627 unsigned long flags; 628 629 spin_lock_irqsave(&cache->lock, flags); 630 bio_list_add(&cache->deferred_writethrough_bios, bio); 631 spin_unlock_irqrestore(&cache->lock, flags); 632 633 wake_worker(cache); 634 } 635 636 static void writethrough_endio(struct bio *bio, int err) 637 { 638 struct per_bio_data *pb = get_per_bio_data(bio); 639 bio->bi_end_io = pb->saved_bi_end_io; 640 641 if (err) { 642 bio_endio(bio, err); 643 return; 644 } 645 646 remap_to_cache(pb->cache, bio, pb->cblock); 647 648 /* 649 * We can't issue this bio directly, since we're in interrupt 650 * context. So it get's put on a bio list for processing by the 651 * worker thread. 652 */ 653 defer_writethrough_bio(pb->cache, bio); 654 } 655 656 /* 657 * When running in writethrough mode we need to send writes to clean blocks 658 * to both the cache and origin devices. In future we'd like to clone the 659 * bio and send them in parallel, but for now we're doing them in 660 * series as this is easier. 661 */ 662 static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio, 663 dm_oblock_t oblock, dm_cblock_t cblock) 664 { 665 struct per_bio_data *pb = get_per_bio_data(bio); 666 667 pb->cache = cache; 668 pb->cblock = cblock; 669 pb->saved_bi_end_io = bio->bi_end_io; 670 bio->bi_end_io = writethrough_endio; 671 672 remap_to_origin_clear_discard(pb->cache, bio, oblock); 673 } 674 675 /*---------------------------------------------------------------- 676 * Migration processing 677 * 678 * Migration covers moving data from the origin device to the cache, or 679 * vice versa. 680 *--------------------------------------------------------------*/ 681 static void free_migration(struct dm_cache_migration *mg) 682 { 683 mempool_free(mg, mg->cache->migration_pool); 684 } 685 686 static void inc_nr_migrations(struct cache *cache) 687 { 688 atomic_inc(&cache->nr_migrations); 689 } 690 691 static void dec_nr_migrations(struct cache *cache) 692 { 693 atomic_dec(&cache->nr_migrations); 694 695 /* 696 * Wake the worker in case we're suspending the target. 697 */ 698 wake_up(&cache->migration_wait); 699 } 700 701 static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell, 702 bool holder) 703 { 704 (holder ? dm_cell_release : dm_cell_release_no_holder) 705 (cache->prison, cell, &cache->deferred_bios); 706 free_prison_cell(cache, cell); 707 } 708 709 static void cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell, 710 bool holder) 711 { 712 unsigned long flags; 713 714 spin_lock_irqsave(&cache->lock, flags); 715 __cell_defer(cache, cell, holder); 716 spin_unlock_irqrestore(&cache->lock, flags); 717 718 wake_worker(cache); 719 } 720 721 static void cleanup_migration(struct dm_cache_migration *mg) 722 { 723 dec_nr_migrations(mg->cache); 724 free_migration(mg); 725 } 726 727 static void migration_failure(struct dm_cache_migration *mg) 728 { 729 struct cache *cache = mg->cache; 730 731 if (mg->writeback) { 732 DMWARN_LIMIT("writeback failed; couldn't copy block"); 733 set_dirty(cache, mg->old_oblock, mg->cblock); 734 cell_defer(cache, mg->old_ocell, false); 735 736 } else if (mg->demote) { 737 DMWARN_LIMIT("demotion failed; couldn't copy block"); 738 policy_force_mapping(cache->policy, mg->new_oblock, mg->old_oblock); 739 740 cell_defer(cache, mg->old_ocell, mg->promote ? 0 : 1); 741 if (mg->promote) 742 cell_defer(cache, mg->new_ocell, 1); 743 } else { 744 DMWARN_LIMIT("promotion failed; couldn't copy block"); 745 policy_remove_mapping(cache->policy, mg->new_oblock); 746 cell_defer(cache, mg->new_ocell, 1); 747 } 748 749 cleanup_migration(mg); 750 } 751 752 static void migration_success_pre_commit(struct dm_cache_migration *mg) 753 { 754 unsigned long flags; 755 struct cache *cache = mg->cache; 756 757 if (mg->writeback) { 758 cell_defer(cache, mg->old_ocell, false); 759 clear_dirty(cache, mg->old_oblock, mg->cblock); 760 cleanup_migration(mg); 761 return; 762 763 } else if (mg->demote) { 764 if (dm_cache_remove_mapping(cache->cmd, mg->cblock)) { 765 DMWARN_LIMIT("demotion failed; couldn't update on disk metadata"); 766 policy_force_mapping(cache->policy, mg->new_oblock, 767 mg->old_oblock); 768 if (mg->promote) 769 cell_defer(cache, mg->new_ocell, true); 770 cleanup_migration(mg); 771 return; 772 } 773 } else { 774 if (dm_cache_insert_mapping(cache->cmd, mg->cblock, mg->new_oblock)) { 775 DMWARN_LIMIT("promotion failed; couldn't update on disk metadata"); 776 policy_remove_mapping(cache->policy, mg->new_oblock); 777 cleanup_migration(mg); 778 return; 779 } 780 } 781 782 spin_lock_irqsave(&cache->lock, flags); 783 list_add_tail(&mg->list, &cache->need_commit_migrations); 784 cache->commit_requested = true; 785 spin_unlock_irqrestore(&cache->lock, flags); 786 } 787 788 static void migration_success_post_commit(struct dm_cache_migration *mg) 789 { 790 unsigned long flags; 791 struct cache *cache = mg->cache; 792 793 if (mg->writeback) { 794 DMWARN("writeback unexpectedly triggered commit"); 795 return; 796 797 } else if (mg->demote) { 798 cell_defer(cache, mg->old_ocell, mg->promote ? 0 : 1); 799 800 if (mg->promote) { 801 mg->demote = false; 802 803 spin_lock_irqsave(&cache->lock, flags); 804 list_add_tail(&mg->list, &cache->quiesced_migrations); 805 spin_unlock_irqrestore(&cache->lock, flags); 806 807 } else 808 cleanup_migration(mg); 809 810 } else { 811 cell_defer(cache, mg->new_ocell, true); 812 clear_dirty(cache, mg->new_oblock, mg->cblock); 813 cleanup_migration(mg); 814 } 815 } 816 817 static void copy_complete(int read_err, unsigned long write_err, void *context) 818 { 819 unsigned long flags; 820 struct dm_cache_migration *mg = (struct dm_cache_migration *) context; 821 struct cache *cache = mg->cache; 822 823 if (read_err || write_err) 824 mg->err = true; 825 826 spin_lock_irqsave(&cache->lock, flags); 827 list_add_tail(&mg->list, &cache->completed_migrations); 828 spin_unlock_irqrestore(&cache->lock, flags); 829 830 wake_worker(cache); 831 } 832 833 static void issue_copy_real(struct dm_cache_migration *mg) 834 { 835 int r; 836 struct dm_io_region o_region, c_region; 837 struct cache *cache = mg->cache; 838 839 o_region.bdev = cache->origin_dev->bdev; 840 o_region.count = cache->sectors_per_block; 841 842 c_region.bdev = cache->cache_dev->bdev; 843 c_region.sector = from_cblock(mg->cblock) * cache->sectors_per_block; 844 c_region.count = cache->sectors_per_block; 845 846 if (mg->writeback || mg->demote) { 847 /* demote */ 848 o_region.sector = from_oblock(mg->old_oblock) * cache->sectors_per_block; 849 r = dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 0, copy_complete, mg); 850 } else { 851 /* promote */ 852 o_region.sector = from_oblock(mg->new_oblock) * cache->sectors_per_block; 853 r = dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, mg); 854 } 855 856 if (r < 0) 857 migration_failure(mg); 858 } 859 860 static void avoid_copy(struct dm_cache_migration *mg) 861 { 862 atomic_inc(&mg->cache->stats.copies_avoided); 863 migration_success_pre_commit(mg); 864 } 865 866 static void issue_copy(struct dm_cache_migration *mg) 867 { 868 bool avoid; 869 struct cache *cache = mg->cache; 870 871 if (mg->writeback || mg->demote) 872 avoid = !is_dirty(cache, mg->cblock) || 873 is_discarded_oblock(cache, mg->old_oblock); 874 else 875 avoid = is_discarded_oblock(cache, mg->new_oblock); 876 877 avoid ? avoid_copy(mg) : issue_copy_real(mg); 878 } 879 880 static void complete_migration(struct dm_cache_migration *mg) 881 { 882 if (mg->err) 883 migration_failure(mg); 884 else 885 migration_success_pre_commit(mg); 886 } 887 888 static void process_migrations(struct cache *cache, struct list_head *head, 889 void (*fn)(struct dm_cache_migration *)) 890 { 891 unsigned long flags; 892 struct list_head list; 893 struct dm_cache_migration *mg, *tmp; 894 895 INIT_LIST_HEAD(&list); 896 spin_lock_irqsave(&cache->lock, flags); 897 list_splice_init(head, &list); 898 spin_unlock_irqrestore(&cache->lock, flags); 899 900 list_for_each_entry_safe(mg, tmp, &list, list) 901 fn(mg); 902 } 903 904 static void __queue_quiesced_migration(struct dm_cache_migration *mg) 905 { 906 list_add_tail(&mg->list, &mg->cache->quiesced_migrations); 907 } 908 909 static void queue_quiesced_migration(struct dm_cache_migration *mg) 910 { 911 unsigned long flags; 912 struct cache *cache = mg->cache; 913 914 spin_lock_irqsave(&cache->lock, flags); 915 __queue_quiesced_migration(mg); 916 spin_unlock_irqrestore(&cache->lock, flags); 917 918 wake_worker(cache); 919 } 920 921 static void queue_quiesced_migrations(struct cache *cache, struct list_head *work) 922 { 923 unsigned long flags; 924 struct dm_cache_migration *mg, *tmp; 925 926 spin_lock_irqsave(&cache->lock, flags); 927 list_for_each_entry_safe(mg, tmp, work, list) 928 __queue_quiesced_migration(mg); 929 spin_unlock_irqrestore(&cache->lock, flags); 930 931 wake_worker(cache); 932 } 933 934 static void check_for_quiesced_migrations(struct cache *cache, 935 struct per_bio_data *pb) 936 { 937 struct list_head work; 938 939 if (!pb->all_io_entry) 940 return; 941 942 INIT_LIST_HEAD(&work); 943 if (pb->all_io_entry) 944 dm_deferred_entry_dec(pb->all_io_entry, &work); 945 946 if (!list_empty(&work)) 947 queue_quiesced_migrations(cache, &work); 948 } 949 950 static void quiesce_migration(struct dm_cache_migration *mg) 951 { 952 if (!dm_deferred_set_add_work(mg->cache->all_io_ds, &mg->list)) 953 queue_quiesced_migration(mg); 954 } 955 956 static void promote(struct cache *cache, struct prealloc *structs, 957 dm_oblock_t oblock, dm_cblock_t cblock, 958 struct dm_bio_prison_cell *cell) 959 { 960 struct dm_cache_migration *mg = prealloc_get_migration(structs); 961 962 mg->err = false; 963 mg->writeback = false; 964 mg->demote = false; 965 mg->promote = true; 966 mg->cache = cache; 967 mg->new_oblock = oblock; 968 mg->cblock = cblock; 969 mg->old_ocell = NULL; 970 mg->new_ocell = cell; 971 mg->start_jiffies = jiffies; 972 973 inc_nr_migrations(cache); 974 quiesce_migration(mg); 975 } 976 977 static void writeback(struct cache *cache, struct prealloc *structs, 978 dm_oblock_t oblock, dm_cblock_t cblock, 979 struct dm_bio_prison_cell *cell) 980 { 981 struct dm_cache_migration *mg = prealloc_get_migration(structs); 982 983 mg->err = false; 984 mg->writeback = true; 985 mg->demote = false; 986 mg->promote = false; 987 mg->cache = cache; 988 mg->old_oblock = oblock; 989 mg->cblock = cblock; 990 mg->old_ocell = cell; 991 mg->new_ocell = NULL; 992 mg->start_jiffies = jiffies; 993 994 inc_nr_migrations(cache); 995 quiesce_migration(mg); 996 } 997 998 static void demote_then_promote(struct cache *cache, struct prealloc *structs, 999 dm_oblock_t old_oblock, dm_oblock_t new_oblock, 1000 dm_cblock_t cblock, 1001 struct dm_bio_prison_cell *old_ocell, 1002 struct dm_bio_prison_cell *new_ocell) 1003 { 1004 struct dm_cache_migration *mg = prealloc_get_migration(structs); 1005 1006 mg->err = false; 1007 mg->writeback = false; 1008 mg->demote = true; 1009 mg->promote = true; 1010 mg->cache = cache; 1011 mg->old_oblock = old_oblock; 1012 mg->new_oblock = new_oblock; 1013 mg->cblock = cblock; 1014 mg->old_ocell = old_ocell; 1015 mg->new_ocell = new_ocell; 1016 mg->start_jiffies = jiffies; 1017 1018 inc_nr_migrations(cache); 1019 quiesce_migration(mg); 1020 } 1021 1022 /*---------------------------------------------------------------- 1023 * bio processing 1024 *--------------------------------------------------------------*/ 1025 static void defer_bio(struct cache *cache, struct bio *bio) 1026 { 1027 unsigned long flags; 1028 1029 spin_lock_irqsave(&cache->lock, flags); 1030 bio_list_add(&cache->deferred_bios, bio); 1031 spin_unlock_irqrestore(&cache->lock, flags); 1032 1033 wake_worker(cache); 1034 } 1035 1036 static void process_flush_bio(struct cache *cache, struct bio *bio) 1037 { 1038 struct per_bio_data *pb = get_per_bio_data(bio); 1039 1040 BUG_ON(bio->bi_size); 1041 if (!pb->req_nr) 1042 remap_to_origin(cache, bio); 1043 else 1044 remap_to_cache(cache, bio, 0); 1045 1046 issue(cache, bio); 1047 } 1048 1049 /* 1050 * People generally discard large parts of a device, eg, the whole device 1051 * when formatting. Splitting these large discards up into cache block 1052 * sized ios and then quiescing (always neccessary for discard) takes too 1053 * long. 1054 * 1055 * We keep it simple, and allow any size of discard to come in, and just 1056 * mark off blocks on the discard bitset. No passdown occurs! 1057 * 1058 * To implement passdown we need to change the bio_prison such that a cell 1059 * can have a key that spans many blocks. 1060 */ 1061 static void process_discard_bio(struct cache *cache, struct bio *bio) 1062 { 1063 dm_block_t start_block = dm_sector_div_up(bio->bi_sector, 1064 cache->discard_block_size); 1065 dm_block_t end_block = bio->bi_sector + bio_sectors(bio); 1066 dm_block_t b; 1067 1068 end_block = block_div(end_block, cache->discard_block_size); 1069 1070 for (b = start_block; b < end_block; b++) 1071 set_discard(cache, to_dblock(b)); 1072 1073 bio_endio(bio, 0); 1074 } 1075 1076 static bool spare_migration_bandwidth(struct cache *cache) 1077 { 1078 sector_t current_volume = (atomic_read(&cache->nr_migrations) + 1) * 1079 cache->sectors_per_block; 1080 return current_volume < cache->migration_threshold; 1081 } 1082 1083 static bool is_writethrough_io(struct cache *cache, struct bio *bio, 1084 dm_cblock_t cblock) 1085 { 1086 return bio_data_dir(bio) == WRITE && 1087 cache->features.write_through && !is_dirty(cache, cblock); 1088 } 1089 1090 static void inc_hit_counter(struct cache *cache, struct bio *bio) 1091 { 1092 atomic_inc(bio_data_dir(bio) == READ ? 1093 &cache->stats.read_hit : &cache->stats.write_hit); 1094 } 1095 1096 static void inc_miss_counter(struct cache *cache, struct bio *bio) 1097 { 1098 atomic_inc(bio_data_dir(bio) == READ ? 1099 &cache->stats.read_miss : &cache->stats.write_miss); 1100 } 1101 1102 static void process_bio(struct cache *cache, struct prealloc *structs, 1103 struct bio *bio) 1104 { 1105 int r; 1106 bool release_cell = true; 1107 dm_oblock_t block = get_bio_block(cache, bio); 1108 struct dm_bio_prison_cell *cell_prealloc, *old_ocell, *new_ocell; 1109 struct policy_result lookup_result; 1110 struct per_bio_data *pb = get_per_bio_data(bio); 1111 bool discarded_block = is_discarded_oblock(cache, block); 1112 bool can_migrate = discarded_block || spare_migration_bandwidth(cache); 1113 1114 /* 1115 * Check to see if that block is currently migrating. 1116 */ 1117 cell_prealloc = prealloc_get_cell(structs); 1118 r = bio_detain(cache, block, bio, cell_prealloc, 1119 (cell_free_fn) prealloc_put_cell, 1120 structs, &new_ocell); 1121 if (r > 0) 1122 return; 1123 1124 r = policy_map(cache->policy, block, true, can_migrate, discarded_block, 1125 bio, &lookup_result); 1126 1127 if (r == -EWOULDBLOCK) 1128 /* migration has been denied */ 1129 lookup_result.op = POLICY_MISS; 1130 1131 switch (lookup_result.op) { 1132 case POLICY_HIT: 1133 inc_hit_counter(cache, bio); 1134 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); 1135 1136 if (is_writethrough_io(cache, bio, lookup_result.cblock)) 1137 remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock); 1138 else 1139 remap_to_cache_dirty(cache, bio, block, lookup_result.cblock); 1140 1141 issue(cache, bio); 1142 break; 1143 1144 case POLICY_MISS: 1145 inc_miss_counter(cache, bio); 1146 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); 1147 remap_to_origin_clear_discard(cache, bio, block); 1148 issue(cache, bio); 1149 break; 1150 1151 case POLICY_NEW: 1152 atomic_inc(&cache->stats.promotion); 1153 promote(cache, structs, block, lookup_result.cblock, new_ocell); 1154 release_cell = false; 1155 break; 1156 1157 case POLICY_REPLACE: 1158 cell_prealloc = prealloc_get_cell(structs); 1159 r = bio_detain(cache, lookup_result.old_oblock, bio, cell_prealloc, 1160 (cell_free_fn) prealloc_put_cell, 1161 structs, &old_ocell); 1162 if (r > 0) { 1163 /* 1164 * We have to be careful to avoid lock inversion of 1165 * the cells. So we back off, and wait for the 1166 * old_ocell to become free. 1167 */ 1168 policy_force_mapping(cache->policy, block, 1169 lookup_result.old_oblock); 1170 atomic_inc(&cache->stats.cache_cell_clash); 1171 break; 1172 } 1173 atomic_inc(&cache->stats.demotion); 1174 atomic_inc(&cache->stats.promotion); 1175 1176 demote_then_promote(cache, structs, lookup_result.old_oblock, 1177 block, lookup_result.cblock, 1178 old_ocell, new_ocell); 1179 release_cell = false; 1180 break; 1181 1182 default: 1183 DMERR_LIMIT("%s: erroring bio, unknown policy op: %u", __func__, 1184 (unsigned) lookup_result.op); 1185 bio_io_error(bio); 1186 } 1187 1188 if (release_cell) 1189 cell_defer(cache, new_ocell, false); 1190 } 1191 1192 static int need_commit_due_to_time(struct cache *cache) 1193 { 1194 return jiffies < cache->last_commit_jiffies || 1195 jiffies > cache->last_commit_jiffies + COMMIT_PERIOD; 1196 } 1197 1198 static int commit_if_needed(struct cache *cache) 1199 { 1200 if (dm_cache_changed_this_transaction(cache->cmd) && 1201 (cache->commit_requested || need_commit_due_to_time(cache))) { 1202 atomic_inc(&cache->stats.commit_count); 1203 cache->last_commit_jiffies = jiffies; 1204 cache->commit_requested = false; 1205 return dm_cache_commit(cache->cmd, false); 1206 } 1207 1208 return 0; 1209 } 1210 1211 static void process_deferred_bios(struct cache *cache) 1212 { 1213 unsigned long flags; 1214 struct bio_list bios; 1215 struct bio *bio; 1216 struct prealloc structs; 1217 1218 memset(&structs, 0, sizeof(structs)); 1219 bio_list_init(&bios); 1220 1221 spin_lock_irqsave(&cache->lock, flags); 1222 bio_list_merge(&bios, &cache->deferred_bios); 1223 bio_list_init(&cache->deferred_bios); 1224 spin_unlock_irqrestore(&cache->lock, flags); 1225 1226 while (!bio_list_empty(&bios)) { 1227 /* 1228 * If we've got no free migration structs, and processing 1229 * this bio might require one, we pause until there are some 1230 * prepared mappings to process. 1231 */ 1232 if (prealloc_data_structs(cache, &structs)) { 1233 spin_lock_irqsave(&cache->lock, flags); 1234 bio_list_merge(&cache->deferred_bios, &bios); 1235 spin_unlock_irqrestore(&cache->lock, flags); 1236 break; 1237 } 1238 1239 bio = bio_list_pop(&bios); 1240 1241 if (bio->bi_rw & REQ_FLUSH) 1242 process_flush_bio(cache, bio); 1243 else if (bio->bi_rw & REQ_DISCARD) 1244 process_discard_bio(cache, bio); 1245 else 1246 process_bio(cache, &structs, bio); 1247 } 1248 1249 prealloc_free_structs(cache, &structs); 1250 } 1251 1252 static void process_deferred_flush_bios(struct cache *cache, bool submit_bios) 1253 { 1254 unsigned long flags; 1255 struct bio_list bios; 1256 struct bio *bio; 1257 1258 bio_list_init(&bios); 1259 1260 spin_lock_irqsave(&cache->lock, flags); 1261 bio_list_merge(&bios, &cache->deferred_flush_bios); 1262 bio_list_init(&cache->deferred_flush_bios); 1263 spin_unlock_irqrestore(&cache->lock, flags); 1264 1265 while ((bio = bio_list_pop(&bios))) 1266 submit_bios ? generic_make_request(bio) : bio_io_error(bio); 1267 } 1268 1269 static void process_deferred_writethrough_bios(struct cache *cache) 1270 { 1271 unsigned long flags; 1272 struct bio_list bios; 1273 struct bio *bio; 1274 1275 bio_list_init(&bios); 1276 1277 spin_lock_irqsave(&cache->lock, flags); 1278 bio_list_merge(&bios, &cache->deferred_writethrough_bios); 1279 bio_list_init(&cache->deferred_writethrough_bios); 1280 spin_unlock_irqrestore(&cache->lock, flags); 1281 1282 while ((bio = bio_list_pop(&bios))) 1283 generic_make_request(bio); 1284 } 1285 1286 static void writeback_some_dirty_blocks(struct cache *cache) 1287 { 1288 int r = 0; 1289 dm_oblock_t oblock; 1290 dm_cblock_t cblock; 1291 struct prealloc structs; 1292 struct dm_bio_prison_cell *old_ocell; 1293 1294 memset(&structs, 0, sizeof(structs)); 1295 1296 while (spare_migration_bandwidth(cache)) { 1297 if (prealloc_data_structs(cache, &structs)) 1298 break; 1299 1300 r = policy_writeback_work(cache->policy, &oblock, &cblock); 1301 if (r) 1302 break; 1303 1304 r = get_cell(cache, oblock, &structs, &old_ocell); 1305 if (r) { 1306 policy_set_dirty(cache->policy, oblock); 1307 break; 1308 } 1309 1310 writeback(cache, &structs, oblock, cblock, old_ocell); 1311 } 1312 1313 prealloc_free_structs(cache, &structs); 1314 } 1315 1316 /*---------------------------------------------------------------- 1317 * Main worker loop 1318 *--------------------------------------------------------------*/ 1319 static void start_quiescing(struct cache *cache) 1320 { 1321 unsigned long flags; 1322 1323 spin_lock_irqsave(&cache->lock, flags); 1324 cache->quiescing = 1; 1325 spin_unlock_irqrestore(&cache->lock, flags); 1326 } 1327 1328 static void stop_quiescing(struct cache *cache) 1329 { 1330 unsigned long flags; 1331 1332 spin_lock_irqsave(&cache->lock, flags); 1333 cache->quiescing = 0; 1334 spin_unlock_irqrestore(&cache->lock, flags); 1335 } 1336 1337 static bool is_quiescing(struct cache *cache) 1338 { 1339 int r; 1340 unsigned long flags; 1341 1342 spin_lock_irqsave(&cache->lock, flags); 1343 r = cache->quiescing; 1344 spin_unlock_irqrestore(&cache->lock, flags); 1345 1346 return r; 1347 } 1348 1349 static void wait_for_migrations(struct cache *cache) 1350 { 1351 wait_event(cache->migration_wait, !atomic_read(&cache->nr_migrations)); 1352 } 1353 1354 static void stop_worker(struct cache *cache) 1355 { 1356 cancel_delayed_work(&cache->waker); 1357 flush_workqueue(cache->wq); 1358 } 1359 1360 static void requeue_deferred_io(struct cache *cache) 1361 { 1362 struct bio *bio; 1363 struct bio_list bios; 1364 1365 bio_list_init(&bios); 1366 bio_list_merge(&bios, &cache->deferred_bios); 1367 bio_list_init(&cache->deferred_bios); 1368 1369 while ((bio = bio_list_pop(&bios))) 1370 bio_endio(bio, DM_ENDIO_REQUEUE); 1371 } 1372 1373 static int more_work(struct cache *cache) 1374 { 1375 if (is_quiescing(cache)) 1376 return !list_empty(&cache->quiesced_migrations) || 1377 !list_empty(&cache->completed_migrations) || 1378 !list_empty(&cache->need_commit_migrations); 1379 else 1380 return !bio_list_empty(&cache->deferred_bios) || 1381 !bio_list_empty(&cache->deferred_flush_bios) || 1382 !bio_list_empty(&cache->deferred_writethrough_bios) || 1383 !list_empty(&cache->quiesced_migrations) || 1384 !list_empty(&cache->completed_migrations) || 1385 !list_empty(&cache->need_commit_migrations); 1386 } 1387 1388 static void do_worker(struct work_struct *ws) 1389 { 1390 struct cache *cache = container_of(ws, struct cache, worker); 1391 1392 do { 1393 if (!is_quiescing(cache)) 1394 process_deferred_bios(cache); 1395 1396 process_migrations(cache, &cache->quiesced_migrations, issue_copy); 1397 process_migrations(cache, &cache->completed_migrations, complete_migration); 1398 1399 writeback_some_dirty_blocks(cache); 1400 1401 process_deferred_writethrough_bios(cache); 1402 1403 if (commit_if_needed(cache)) { 1404 process_deferred_flush_bios(cache, false); 1405 1406 /* 1407 * FIXME: rollback metadata or just go into a 1408 * failure mode and error everything 1409 */ 1410 } else { 1411 process_deferred_flush_bios(cache, true); 1412 process_migrations(cache, &cache->need_commit_migrations, 1413 migration_success_post_commit); 1414 } 1415 } while (more_work(cache)); 1416 } 1417 1418 /* 1419 * We want to commit periodically so that not too much 1420 * unwritten metadata builds up. 1421 */ 1422 static void do_waker(struct work_struct *ws) 1423 { 1424 struct cache *cache = container_of(to_delayed_work(ws), struct cache, waker); 1425 wake_worker(cache); 1426 queue_delayed_work(cache->wq, &cache->waker, COMMIT_PERIOD); 1427 } 1428 1429 /*----------------------------------------------------------------*/ 1430 1431 static int is_congested(struct dm_dev *dev, int bdi_bits) 1432 { 1433 struct request_queue *q = bdev_get_queue(dev->bdev); 1434 return bdi_congested(&q->backing_dev_info, bdi_bits); 1435 } 1436 1437 static int cache_is_congested(struct dm_target_callbacks *cb, int bdi_bits) 1438 { 1439 struct cache *cache = container_of(cb, struct cache, callbacks); 1440 1441 return is_congested(cache->origin_dev, bdi_bits) || 1442 is_congested(cache->cache_dev, bdi_bits); 1443 } 1444 1445 /*---------------------------------------------------------------- 1446 * Target methods 1447 *--------------------------------------------------------------*/ 1448 1449 /* 1450 * This function gets called on the error paths of the constructor, so we 1451 * have to cope with a partially initialised struct. 1452 */ 1453 static void destroy(struct cache *cache) 1454 { 1455 unsigned i; 1456 1457 if (cache->next_migration) 1458 mempool_free(cache->next_migration, cache->migration_pool); 1459 1460 if (cache->migration_pool) 1461 mempool_destroy(cache->migration_pool); 1462 1463 if (cache->all_io_ds) 1464 dm_deferred_set_destroy(cache->all_io_ds); 1465 1466 if (cache->prison) 1467 dm_bio_prison_destroy(cache->prison); 1468 1469 if (cache->wq) 1470 destroy_workqueue(cache->wq); 1471 1472 if (cache->dirty_bitset) 1473 free_bitset(cache->dirty_bitset); 1474 1475 if (cache->discard_bitset) 1476 free_bitset(cache->discard_bitset); 1477 1478 if (cache->copier) 1479 dm_kcopyd_client_destroy(cache->copier); 1480 1481 if (cache->cmd) 1482 dm_cache_metadata_close(cache->cmd); 1483 1484 if (cache->metadata_dev) 1485 dm_put_device(cache->ti, cache->metadata_dev); 1486 1487 if (cache->origin_dev) 1488 dm_put_device(cache->ti, cache->origin_dev); 1489 1490 if (cache->cache_dev) 1491 dm_put_device(cache->ti, cache->cache_dev); 1492 1493 if (cache->policy) 1494 dm_cache_policy_destroy(cache->policy); 1495 1496 for (i = 0; i < cache->nr_ctr_args ; i++) 1497 kfree(cache->ctr_args[i]); 1498 kfree(cache->ctr_args); 1499 1500 kfree(cache); 1501 } 1502 1503 static void cache_dtr(struct dm_target *ti) 1504 { 1505 struct cache *cache = ti->private; 1506 1507 destroy(cache); 1508 } 1509 1510 static sector_t get_dev_size(struct dm_dev *dev) 1511 { 1512 return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT; 1513 } 1514 1515 /*----------------------------------------------------------------*/ 1516 1517 /* 1518 * Construct a cache device mapping. 1519 * 1520 * cache <metadata dev> <cache dev> <origin dev> <block size> 1521 * <#feature args> [<feature arg>]* 1522 * <policy> <#policy args> [<policy arg>]* 1523 * 1524 * metadata dev : fast device holding the persistent metadata 1525 * cache dev : fast device holding cached data blocks 1526 * origin dev : slow device holding original data blocks 1527 * block size : cache unit size in sectors 1528 * 1529 * #feature args : number of feature arguments passed 1530 * feature args : writethrough. (The default is writeback.) 1531 * 1532 * policy : the replacement policy to use 1533 * #policy args : an even number of policy arguments corresponding 1534 * to key/value pairs passed to the policy 1535 * policy args : key/value pairs passed to the policy 1536 * E.g. 'sequential_threshold 1024' 1537 * See cache-policies.txt for details. 1538 * 1539 * Optional feature arguments are: 1540 * writethrough : write through caching that prohibits cache block 1541 * content from being different from origin block content. 1542 * Without this argument, the default behaviour is to write 1543 * back cache block contents later for performance reasons, 1544 * so they may differ from the corresponding origin blocks. 1545 */ 1546 struct cache_args { 1547 struct dm_target *ti; 1548 1549 struct dm_dev *metadata_dev; 1550 1551 struct dm_dev *cache_dev; 1552 sector_t cache_sectors; 1553 1554 struct dm_dev *origin_dev; 1555 sector_t origin_sectors; 1556 1557 uint32_t block_size; 1558 1559 const char *policy_name; 1560 int policy_argc; 1561 const char **policy_argv; 1562 1563 struct cache_features features; 1564 }; 1565 1566 static void destroy_cache_args(struct cache_args *ca) 1567 { 1568 if (ca->metadata_dev) 1569 dm_put_device(ca->ti, ca->metadata_dev); 1570 1571 if (ca->cache_dev) 1572 dm_put_device(ca->ti, ca->cache_dev); 1573 1574 if (ca->origin_dev) 1575 dm_put_device(ca->ti, ca->origin_dev); 1576 1577 kfree(ca); 1578 } 1579 1580 static bool at_least_one_arg(struct dm_arg_set *as, char **error) 1581 { 1582 if (!as->argc) { 1583 *error = "Insufficient args"; 1584 return false; 1585 } 1586 1587 return true; 1588 } 1589 1590 static int parse_metadata_dev(struct cache_args *ca, struct dm_arg_set *as, 1591 char **error) 1592 { 1593 int r; 1594 sector_t metadata_dev_size; 1595 char b[BDEVNAME_SIZE]; 1596 1597 if (!at_least_one_arg(as, error)) 1598 return -EINVAL; 1599 1600 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE, 1601 &ca->metadata_dev); 1602 if (r) { 1603 *error = "Error opening metadata device"; 1604 return r; 1605 } 1606 1607 metadata_dev_size = get_dev_size(ca->metadata_dev); 1608 if (metadata_dev_size > DM_CACHE_METADATA_MAX_SECTORS_WARNING) 1609 DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.", 1610 bdevname(ca->metadata_dev->bdev, b), THIN_METADATA_MAX_SECTORS); 1611 1612 return 0; 1613 } 1614 1615 static int parse_cache_dev(struct cache_args *ca, struct dm_arg_set *as, 1616 char **error) 1617 { 1618 int r; 1619 1620 if (!at_least_one_arg(as, error)) 1621 return -EINVAL; 1622 1623 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE, 1624 &ca->cache_dev); 1625 if (r) { 1626 *error = "Error opening cache device"; 1627 return r; 1628 } 1629 ca->cache_sectors = get_dev_size(ca->cache_dev); 1630 1631 return 0; 1632 } 1633 1634 static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as, 1635 char **error) 1636 { 1637 int r; 1638 1639 if (!at_least_one_arg(as, error)) 1640 return -EINVAL; 1641 1642 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE, 1643 &ca->origin_dev); 1644 if (r) { 1645 *error = "Error opening origin device"; 1646 return r; 1647 } 1648 1649 ca->origin_sectors = get_dev_size(ca->origin_dev); 1650 if (ca->ti->len > ca->origin_sectors) { 1651 *error = "Device size larger than cached device"; 1652 return -EINVAL; 1653 } 1654 1655 return 0; 1656 } 1657 1658 static int parse_block_size(struct cache_args *ca, struct dm_arg_set *as, 1659 char **error) 1660 { 1661 unsigned long tmp; 1662 1663 if (!at_least_one_arg(as, error)) 1664 return -EINVAL; 1665 1666 if (kstrtoul(dm_shift_arg(as), 10, &tmp) || !tmp || 1667 tmp < DATA_DEV_BLOCK_SIZE_MIN_SECTORS || 1668 tmp & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) { 1669 *error = "Invalid data block size"; 1670 return -EINVAL; 1671 } 1672 1673 if (tmp > ca->cache_sectors) { 1674 *error = "Data block size is larger than the cache device"; 1675 return -EINVAL; 1676 } 1677 1678 ca->block_size = tmp; 1679 1680 return 0; 1681 } 1682 1683 static void init_features(struct cache_features *cf) 1684 { 1685 cf->mode = CM_WRITE; 1686 cf->write_through = false; 1687 } 1688 1689 static int parse_features(struct cache_args *ca, struct dm_arg_set *as, 1690 char **error) 1691 { 1692 static struct dm_arg _args[] = { 1693 {0, 1, "Invalid number of cache feature arguments"}, 1694 }; 1695 1696 int r; 1697 unsigned argc; 1698 const char *arg; 1699 struct cache_features *cf = &ca->features; 1700 1701 init_features(cf); 1702 1703 r = dm_read_arg_group(_args, as, &argc, error); 1704 if (r) 1705 return -EINVAL; 1706 1707 while (argc--) { 1708 arg = dm_shift_arg(as); 1709 1710 if (!strcasecmp(arg, "writeback")) 1711 cf->write_through = false; 1712 1713 else if (!strcasecmp(arg, "writethrough")) 1714 cf->write_through = true; 1715 1716 else { 1717 *error = "Unrecognised cache feature requested"; 1718 return -EINVAL; 1719 } 1720 } 1721 1722 return 0; 1723 } 1724 1725 static int parse_policy(struct cache_args *ca, struct dm_arg_set *as, 1726 char **error) 1727 { 1728 static struct dm_arg _args[] = { 1729 {0, 1024, "Invalid number of policy arguments"}, 1730 }; 1731 1732 int r; 1733 1734 if (!at_least_one_arg(as, error)) 1735 return -EINVAL; 1736 1737 ca->policy_name = dm_shift_arg(as); 1738 1739 r = dm_read_arg_group(_args, as, &ca->policy_argc, error); 1740 if (r) 1741 return -EINVAL; 1742 1743 ca->policy_argv = (const char **)as->argv; 1744 dm_consume_args(as, ca->policy_argc); 1745 1746 return 0; 1747 } 1748 1749 static int parse_cache_args(struct cache_args *ca, int argc, char **argv, 1750 char **error) 1751 { 1752 int r; 1753 struct dm_arg_set as; 1754 1755 as.argc = argc; 1756 as.argv = argv; 1757 1758 r = parse_metadata_dev(ca, &as, error); 1759 if (r) 1760 return r; 1761 1762 r = parse_cache_dev(ca, &as, error); 1763 if (r) 1764 return r; 1765 1766 r = parse_origin_dev(ca, &as, error); 1767 if (r) 1768 return r; 1769 1770 r = parse_block_size(ca, &as, error); 1771 if (r) 1772 return r; 1773 1774 r = parse_features(ca, &as, error); 1775 if (r) 1776 return r; 1777 1778 r = parse_policy(ca, &as, error); 1779 if (r) 1780 return r; 1781 1782 return 0; 1783 } 1784 1785 /*----------------------------------------------------------------*/ 1786 1787 static struct kmem_cache *migration_cache; 1788 1789 static int set_config_values(struct dm_cache_policy *p, int argc, const char **argv) 1790 { 1791 int r = 0; 1792 1793 if (argc & 1) { 1794 DMWARN("Odd number of policy arguments given but they should be <key> <value> pairs."); 1795 return -EINVAL; 1796 } 1797 1798 while (argc) { 1799 r = policy_set_config_value(p, argv[0], argv[1]); 1800 if (r) { 1801 DMWARN("policy_set_config_value failed: key = '%s', value = '%s'", 1802 argv[0], argv[1]); 1803 return r; 1804 } 1805 1806 argc -= 2; 1807 argv += 2; 1808 } 1809 1810 return r; 1811 } 1812 1813 static int create_cache_policy(struct cache *cache, struct cache_args *ca, 1814 char **error) 1815 { 1816 int r; 1817 1818 cache->policy = dm_cache_policy_create(ca->policy_name, 1819 cache->cache_size, 1820 cache->origin_sectors, 1821 cache->sectors_per_block); 1822 if (!cache->policy) { 1823 *error = "Error creating cache's policy"; 1824 return -ENOMEM; 1825 } 1826 1827 r = set_config_values(cache->policy, ca->policy_argc, ca->policy_argv); 1828 if (r) { 1829 *error = "Error setting cache policy's config values"; 1830 dm_cache_policy_destroy(cache->policy); 1831 cache->policy = NULL; 1832 } 1833 1834 return r; 1835 } 1836 1837 /* 1838 * We want the discard block size to be a power of two, at least the size 1839 * of the cache block size, and have no more than 2^14 discard blocks 1840 * across the origin. 1841 */ 1842 #define MAX_DISCARD_BLOCKS (1 << 14) 1843 1844 static bool too_many_discard_blocks(sector_t discard_block_size, 1845 sector_t origin_size) 1846 { 1847 (void) sector_div(origin_size, discard_block_size); 1848 1849 return origin_size > MAX_DISCARD_BLOCKS; 1850 } 1851 1852 static sector_t calculate_discard_block_size(sector_t cache_block_size, 1853 sector_t origin_size) 1854 { 1855 sector_t discard_block_size; 1856 1857 discard_block_size = roundup_pow_of_two(cache_block_size); 1858 1859 if (origin_size) 1860 while (too_many_discard_blocks(discard_block_size, origin_size)) 1861 discard_block_size *= 2; 1862 1863 return discard_block_size; 1864 } 1865 1866 #define DEFAULT_MIGRATION_THRESHOLD (2048 * 100) 1867 1868 static int cache_create(struct cache_args *ca, struct cache **result) 1869 { 1870 int r = 0; 1871 char **error = &ca->ti->error; 1872 struct cache *cache; 1873 struct dm_target *ti = ca->ti; 1874 dm_block_t origin_blocks; 1875 struct dm_cache_metadata *cmd; 1876 bool may_format = ca->features.mode == CM_WRITE; 1877 1878 cache = kzalloc(sizeof(*cache), GFP_KERNEL); 1879 if (!cache) 1880 return -ENOMEM; 1881 1882 cache->ti = ca->ti; 1883 ti->private = cache; 1884 ti->per_bio_data_size = sizeof(struct per_bio_data); 1885 ti->num_flush_bios = 2; 1886 ti->flush_supported = true; 1887 1888 ti->num_discard_bios = 1; 1889 ti->discards_supported = true; 1890 ti->discard_zeroes_data_unsupported = true; 1891 1892 memcpy(&cache->features, &ca->features, sizeof(cache->features)); 1893 1894 cache->callbacks.congested_fn = cache_is_congested; 1895 dm_table_add_target_callbacks(ti->table, &cache->callbacks); 1896 1897 cache->metadata_dev = ca->metadata_dev; 1898 cache->origin_dev = ca->origin_dev; 1899 cache->cache_dev = ca->cache_dev; 1900 1901 ca->metadata_dev = ca->origin_dev = ca->cache_dev = NULL; 1902 1903 /* FIXME: factor out this whole section */ 1904 origin_blocks = cache->origin_sectors = ca->origin_sectors; 1905 origin_blocks = block_div(origin_blocks, ca->block_size); 1906 cache->origin_blocks = to_oblock(origin_blocks); 1907 1908 cache->sectors_per_block = ca->block_size; 1909 if (dm_set_target_max_io_len(ti, cache->sectors_per_block)) { 1910 r = -EINVAL; 1911 goto bad; 1912 } 1913 1914 if (ca->block_size & (ca->block_size - 1)) { 1915 dm_block_t cache_size = ca->cache_sectors; 1916 1917 cache->sectors_per_block_shift = -1; 1918 cache_size = block_div(cache_size, ca->block_size); 1919 cache->cache_size = to_cblock(cache_size); 1920 } else { 1921 cache->sectors_per_block_shift = __ffs(ca->block_size); 1922 cache->cache_size = to_cblock(ca->cache_sectors >> cache->sectors_per_block_shift); 1923 } 1924 1925 r = create_cache_policy(cache, ca, error); 1926 if (r) 1927 goto bad; 1928 cache->policy_nr_args = ca->policy_argc; 1929 1930 cmd = dm_cache_metadata_open(cache->metadata_dev->bdev, 1931 ca->block_size, may_format, 1932 dm_cache_policy_get_hint_size(cache->policy)); 1933 if (IS_ERR(cmd)) { 1934 *error = "Error creating metadata object"; 1935 r = PTR_ERR(cmd); 1936 goto bad; 1937 } 1938 cache->cmd = cmd; 1939 1940 spin_lock_init(&cache->lock); 1941 bio_list_init(&cache->deferred_bios); 1942 bio_list_init(&cache->deferred_flush_bios); 1943 bio_list_init(&cache->deferred_writethrough_bios); 1944 INIT_LIST_HEAD(&cache->quiesced_migrations); 1945 INIT_LIST_HEAD(&cache->completed_migrations); 1946 INIT_LIST_HEAD(&cache->need_commit_migrations); 1947 cache->migration_threshold = DEFAULT_MIGRATION_THRESHOLD; 1948 atomic_set(&cache->nr_migrations, 0); 1949 init_waitqueue_head(&cache->migration_wait); 1950 1951 cache->nr_dirty = 0; 1952 cache->dirty_bitset = alloc_bitset(from_cblock(cache->cache_size)); 1953 if (!cache->dirty_bitset) { 1954 *error = "could not allocate dirty bitset"; 1955 goto bad; 1956 } 1957 clear_bitset(cache->dirty_bitset, from_cblock(cache->cache_size)); 1958 1959 cache->discard_block_size = 1960 calculate_discard_block_size(cache->sectors_per_block, 1961 cache->origin_sectors); 1962 cache->discard_nr_blocks = oblock_to_dblock(cache, cache->origin_blocks); 1963 cache->discard_bitset = alloc_bitset(from_dblock(cache->discard_nr_blocks)); 1964 if (!cache->discard_bitset) { 1965 *error = "could not allocate discard bitset"; 1966 goto bad; 1967 } 1968 clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks)); 1969 1970 cache->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle); 1971 if (IS_ERR(cache->copier)) { 1972 *error = "could not create kcopyd client"; 1973 r = PTR_ERR(cache->copier); 1974 goto bad; 1975 } 1976 1977 cache->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM); 1978 if (!cache->wq) { 1979 *error = "could not create workqueue for metadata object"; 1980 goto bad; 1981 } 1982 INIT_WORK(&cache->worker, do_worker); 1983 INIT_DELAYED_WORK(&cache->waker, do_waker); 1984 cache->last_commit_jiffies = jiffies; 1985 1986 cache->prison = dm_bio_prison_create(PRISON_CELLS); 1987 if (!cache->prison) { 1988 *error = "could not create bio prison"; 1989 goto bad; 1990 } 1991 1992 cache->all_io_ds = dm_deferred_set_create(); 1993 if (!cache->all_io_ds) { 1994 *error = "could not create all_io deferred set"; 1995 goto bad; 1996 } 1997 1998 cache->migration_pool = mempool_create_slab_pool(MIGRATION_POOL_SIZE, 1999 migration_cache); 2000 if (!cache->migration_pool) { 2001 *error = "Error creating cache's migration mempool"; 2002 goto bad; 2003 } 2004 2005 cache->next_migration = NULL; 2006 2007 cache->need_tick_bio = true; 2008 cache->sized = false; 2009 cache->quiescing = false; 2010 cache->commit_requested = false; 2011 cache->loaded_mappings = false; 2012 cache->loaded_discards = false; 2013 2014 load_stats(cache); 2015 2016 atomic_set(&cache->stats.demotion, 0); 2017 atomic_set(&cache->stats.promotion, 0); 2018 atomic_set(&cache->stats.copies_avoided, 0); 2019 atomic_set(&cache->stats.cache_cell_clash, 0); 2020 atomic_set(&cache->stats.commit_count, 0); 2021 atomic_set(&cache->stats.discard_count, 0); 2022 2023 *result = cache; 2024 return 0; 2025 2026 bad: 2027 destroy(cache); 2028 return r; 2029 } 2030 2031 static int copy_ctr_args(struct cache *cache, int argc, const char **argv) 2032 { 2033 unsigned i; 2034 const char **copy; 2035 2036 copy = kcalloc(argc, sizeof(*copy), GFP_KERNEL); 2037 if (!copy) 2038 return -ENOMEM; 2039 for (i = 0; i < argc; i++) { 2040 copy[i] = kstrdup(argv[i], GFP_KERNEL); 2041 if (!copy[i]) { 2042 while (i--) 2043 kfree(copy[i]); 2044 kfree(copy); 2045 return -ENOMEM; 2046 } 2047 } 2048 2049 cache->nr_ctr_args = argc; 2050 cache->ctr_args = copy; 2051 2052 return 0; 2053 } 2054 2055 static int cache_ctr(struct dm_target *ti, unsigned argc, char **argv) 2056 { 2057 int r = -EINVAL; 2058 struct cache_args *ca; 2059 struct cache *cache = NULL; 2060 2061 ca = kzalloc(sizeof(*ca), GFP_KERNEL); 2062 if (!ca) { 2063 ti->error = "Error allocating memory for cache"; 2064 return -ENOMEM; 2065 } 2066 ca->ti = ti; 2067 2068 r = parse_cache_args(ca, argc, argv, &ti->error); 2069 if (r) 2070 goto out; 2071 2072 r = cache_create(ca, &cache); 2073 if (r) 2074 goto out; 2075 2076 r = copy_ctr_args(cache, argc - 3, (const char **)argv + 3); 2077 if (r) { 2078 destroy(cache); 2079 goto out; 2080 } 2081 2082 ti->private = cache; 2083 2084 out: 2085 destroy_cache_args(ca); 2086 return r; 2087 } 2088 2089 static int cache_map(struct dm_target *ti, struct bio *bio) 2090 { 2091 struct cache *cache = ti->private; 2092 2093 int r; 2094 dm_oblock_t block = get_bio_block(cache, bio); 2095 bool can_migrate = false; 2096 bool discarded_block; 2097 struct dm_bio_prison_cell *cell; 2098 struct policy_result lookup_result; 2099 struct per_bio_data *pb; 2100 2101 if (from_oblock(block) > from_oblock(cache->origin_blocks)) { 2102 /* 2103 * This can only occur if the io goes to a partial block at 2104 * the end of the origin device. We don't cache these. 2105 * Just remap to the origin and carry on. 2106 */ 2107 remap_to_origin_clear_discard(cache, bio, block); 2108 return DM_MAPIO_REMAPPED; 2109 } 2110 2111 pb = init_per_bio_data(bio); 2112 2113 if (bio->bi_rw & (REQ_FLUSH | REQ_FUA | REQ_DISCARD)) { 2114 defer_bio(cache, bio); 2115 return DM_MAPIO_SUBMITTED; 2116 } 2117 2118 /* 2119 * Check to see if that block is currently migrating. 2120 */ 2121 cell = alloc_prison_cell(cache); 2122 if (!cell) { 2123 defer_bio(cache, bio); 2124 return DM_MAPIO_SUBMITTED; 2125 } 2126 2127 r = bio_detain(cache, block, bio, cell, 2128 (cell_free_fn) free_prison_cell, 2129 cache, &cell); 2130 if (r) { 2131 if (r < 0) 2132 defer_bio(cache, bio); 2133 2134 return DM_MAPIO_SUBMITTED; 2135 } 2136 2137 discarded_block = is_discarded_oblock(cache, block); 2138 2139 r = policy_map(cache->policy, block, false, can_migrate, discarded_block, 2140 bio, &lookup_result); 2141 if (r == -EWOULDBLOCK) { 2142 cell_defer(cache, cell, true); 2143 return DM_MAPIO_SUBMITTED; 2144 2145 } else if (r) { 2146 DMERR_LIMIT("Unexpected return from cache replacement policy: %d", r); 2147 bio_io_error(bio); 2148 return DM_MAPIO_SUBMITTED; 2149 } 2150 2151 switch (lookup_result.op) { 2152 case POLICY_HIT: 2153 inc_hit_counter(cache, bio); 2154 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); 2155 2156 if (is_writethrough_io(cache, bio, lookup_result.cblock)) 2157 remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock); 2158 else 2159 remap_to_cache_dirty(cache, bio, block, lookup_result.cblock); 2160 2161 cell_defer(cache, cell, false); 2162 break; 2163 2164 case POLICY_MISS: 2165 inc_miss_counter(cache, bio); 2166 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); 2167 2168 if (pb->req_nr != 0) { 2169 /* 2170 * This is a duplicate writethrough io that is no 2171 * longer needed because the block has been demoted. 2172 */ 2173 bio_endio(bio, 0); 2174 cell_defer(cache, cell, false); 2175 return DM_MAPIO_SUBMITTED; 2176 } else { 2177 remap_to_origin_clear_discard(cache, bio, block); 2178 cell_defer(cache, cell, false); 2179 } 2180 break; 2181 2182 default: 2183 DMERR_LIMIT("%s: erroring bio: unknown policy op: %u", __func__, 2184 (unsigned) lookup_result.op); 2185 bio_io_error(bio); 2186 return DM_MAPIO_SUBMITTED; 2187 } 2188 2189 return DM_MAPIO_REMAPPED; 2190 } 2191 2192 static int cache_end_io(struct dm_target *ti, struct bio *bio, int error) 2193 { 2194 struct cache *cache = ti->private; 2195 unsigned long flags; 2196 struct per_bio_data *pb = get_per_bio_data(bio); 2197 2198 if (pb->tick) { 2199 policy_tick(cache->policy); 2200 2201 spin_lock_irqsave(&cache->lock, flags); 2202 cache->need_tick_bio = true; 2203 spin_unlock_irqrestore(&cache->lock, flags); 2204 } 2205 2206 check_for_quiesced_migrations(cache, pb); 2207 2208 return 0; 2209 } 2210 2211 static int write_dirty_bitset(struct cache *cache) 2212 { 2213 unsigned i, r; 2214 2215 for (i = 0; i < from_cblock(cache->cache_size); i++) { 2216 r = dm_cache_set_dirty(cache->cmd, to_cblock(i), 2217 is_dirty(cache, to_cblock(i))); 2218 if (r) 2219 return r; 2220 } 2221 2222 return 0; 2223 } 2224 2225 static int write_discard_bitset(struct cache *cache) 2226 { 2227 unsigned i, r; 2228 2229 r = dm_cache_discard_bitset_resize(cache->cmd, cache->discard_block_size, 2230 cache->discard_nr_blocks); 2231 if (r) { 2232 DMERR("could not resize on-disk discard bitset"); 2233 return r; 2234 } 2235 2236 for (i = 0; i < from_dblock(cache->discard_nr_blocks); i++) { 2237 r = dm_cache_set_discard(cache->cmd, to_dblock(i), 2238 is_discarded(cache, to_dblock(i))); 2239 if (r) 2240 return r; 2241 } 2242 2243 return 0; 2244 } 2245 2246 static int save_hint(void *context, dm_cblock_t cblock, dm_oblock_t oblock, 2247 uint32_t hint) 2248 { 2249 struct cache *cache = context; 2250 return dm_cache_save_hint(cache->cmd, cblock, hint); 2251 } 2252 2253 static int write_hints(struct cache *cache) 2254 { 2255 int r; 2256 2257 r = dm_cache_begin_hints(cache->cmd, cache->policy); 2258 if (r) { 2259 DMERR("dm_cache_begin_hints failed"); 2260 return r; 2261 } 2262 2263 r = policy_walk_mappings(cache->policy, save_hint, cache); 2264 if (r) 2265 DMERR("policy_walk_mappings failed"); 2266 2267 return r; 2268 } 2269 2270 /* 2271 * returns true on success 2272 */ 2273 static bool sync_metadata(struct cache *cache) 2274 { 2275 int r1, r2, r3, r4; 2276 2277 r1 = write_dirty_bitset(cache); 2278 if (r1) 2279 DMERR("could not write dirty bitset"); 2280 2281 r2 = write_discard_bitset(cache); 2282 if (r2) 2283 DMERR("could not write discard bitset"); 2284 2285 save_stats(cache); 2286 2287 r3 = write_hints(cache); 2288 if (r3) 2289 DMERR("could not write hints"); 2290 2291 /* 2292 * If writing the above metadata failed, we still commit, but don't 2293 * set the clean shutdown flag. This will effectively force every 2294 * dirty bit to be set on reload. 2295 */ 2296 r4 = dm_cache_commit(cache->cmd, !r1 && !r2 && !r3); 2297 if (r4) 2298 DMERR("could not write cache metadata. Data loss may occur."); 2299 2300 return !r1 && !r2 && !r3 && !r4; 2301 } 2302 2303 static void cache_postsuspend(struct dm_target *ti) 2304 { 2305 struct cache *cache = ti->private; 2306 2307 start_quiescing(cache); 2308 wait_for_migrations(cache); 2309 stop_worker(cache); 2310 requeue_deferred_io(cache); 2311 stop_quiescing(cache); 2312 2313 (void) sync_metadata(cache); 2314 } 2315 2316 static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock, 2317 bool dirty, uint32_t hint, bool hint_valid) 2318 { 2319 int r; 2320 struct cache *cache = context; 2321 2322 r = policy_load_mapping(cache->policy, oblock, cblock, hint, hint_valid); 2323 if (r) 2324 return r; 2325 2326 if (dirty) 2327 set_dirty(cache, oblock, cblock); 2328 else 2329 clear_dirty(cache, oblock, cblock); 2330 2331 return 0; 2332 } 2333 2334 static int load_discard(void *context, sector_t discard_block_size, 2335 dm_dblock_t dblock, bool discard) 2336 { 2337 struct cache *cache = context; 2338 2339 /* FIXME: handle mis-matched block size */ 2340 2341 if (discard) 2342 set_discard(cache, dblock); 2343 else 2344 clear_discard(cache, dblock); 2345 2346 return 0; 2347 } 2348 2349 static int cache_preresume(struct dm_target *ti) 2350 { 2351 int r = 0; 2352 struct cache *cache = ti->private; 2353 sector_t actual_cache_size = get_dev_size(cache->cache_dev); 2354 (void) sector_div(actual_cache_size, cache->sectors_per_block); 2355 2356 /* 2357 * Check to see if the cache has resized. 2358 */ 2359 if (from_cblock(cache->cache_size) != actual_cache_size || !cache->sized) { 2360 cache->cache_size = to_cblock(actual_cache_size); 2361 2362 r = dm_cache_resize(cache->cmd, cache->cache_size); 2363 if (r) { 2364 DMERR("could not resize cache metadata"); 2365 return r; 2366 } 2367 2368 cache->sized = true; 2369 } 2370 2371 if (!cache->loaded_mappings) { 2372 r = dm_cache_load_mappings(cache->cmd, cache->policy, 2373 load_mapping, cache); 2374 if (r) { 2375 DMERR("could not load cache mappings"); 2376 return r; 2377 } 2378 2379 cache->loaded_mappings = true; 2380 } 2381 2382 if (!cache->loaded_discards) { 2383 r = dm_cache_load_discards(cache->cmd, load_discard, cache); 2384 if (r) { 2385 DMERR("could not load origin discards"); 2386 return r; 2387 } 2388 2389 cache->loaded_discards = true; 2390 } 2391 2392 return r; 2393 } 2394 2395 static void cache_resume(struct dm_target *ti) 2396 { 2397 struct cache *cache = ti->private; 2398 2399 cache->need_tick_bio = true; 2400 do_waker(&cache->waker.work); 2401 } 2402 2403 /* 2404 * Status format: 2405 * 2406 * <#used metadata blocks>/<#total metadata blocks> 2407 * <#read hits> <#read misses> <#write hits> <#write misses> 2408 * <#demotions> <#promotions> <#blocks in cache> <#dirty> 2409 * <#features> <features>* 2410 * <#core args> <core args> 2411 * <#policy args> <policy args>* 2412 */ 2413 static void cache_status(struct dm_target *ti, status_type_t type, 2414 unsigned status_flags, char *result, unsigned maxlen) 2415 { 2416 int r = 0; 2417 unsigned i; 2418 ssize_t sz = 0; 2419 dm_block_t nr_free_blocks_metadata = 0; 2420 dm_block_t nr_blocks_metadata = 0; 2421 char buf[BDEVNAME_SIZE]; 2422 struct cache *cache = ti->private; 2423 dm_cblock_t residency; 2424 2425 switch (type) { 2426 case STATUSTYPE_INFO: 2427 /* Commit to ensure statistics aren't out-of-date */ 2428 if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti)) { 2429 r = dm_cache_commit(cache->cmd, false); 2430 if (r) 2431 DMERR("could not commit metadata for accurate status"); 2432 } 2433 2434 r = dm_cache_get_free_metadata_block_count(cache->cmd, 2435 &nr_free_blocks_metadata); 2436 if (r) { 2437 DMERR("could not get metadata free block count"); 2438 goto err; 2439 } 2440 2441 r = dm_cache_get_metadata_dev_size(cache->cmd, &nr_blocks_metadata); 2442 if (r) { 2443 DMERR("could not get metadata device size"); 2444 goto err; 2445 } 2446 2447 residency = policy_residency(cache->policy); 2448 2449 DMEMIT("%llu/%llu %u %u %u %u %u %u %llu %u ", 2450 (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata), 2451 (unsigned long long)nr_blocks_metadata, 2452 (unsigned) atomic_read(&cache->stats.read_hit), 2453 (unsigned) atomic_read(&cache->stats.read_miss), 2454 (unsigned) atomic_read(&cache->stats.write_hit), 2455 (unsigned) atomic_read(&cache->stats.write_miss), 2456 (unsigned) atomic_read(&cache->stats.demotion), 2457 (unsigned) atomic_read(&cache->stats.promotion), 2458 (unsigned long long) from_cblock(residency), 2459 cache->nr_dirty); 2460 2461 if (cache->features.write_through) 2462 DMEMIT("1 writethrough "); 2463 else 2464 DMEMIT("0 "); 2465 2466 DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold); 2467 if (sz < maxlen) { 2468 r = policy_emit_config_values(cache->policy, result + sz, maxlen - sz); 2469 if (r) 2470 DMERR("policy_emit_config_values returned %d", r); 2471 } 2472 2473 break; 2474 2475 case STATUSTYPE_TABLE: 2476 format_dev_t(buf, cache->metadata_dev->bdev->bd_dev); 2477 DMEMIT("%s ", buf); 2478 format_dev_t(buf, cache->cache_dev->bdev->bd_dev); 2479 DMEMIT("%s ", buf); 2480 format_dev_t(buf, cache->origin_dev->bdev->bd_dev); 2481 DMEMIT("%s", buf); 2482 2483 for (i = 0; i < cache->nr_ctr_args - 1; i++) 2484 DMEMIT(" %s", cache->ctr_args[i]); 2485 if (cache->nr_ctr_args) 2486 DMEMIT(" %s", cache->ctr_args[cache->nr_ctr_args - 1]); 2487 } 2488 2489 return; 2490 2491 err: 2492 DMEMIT("Error"); 2493 } 2494 2495 #define NOT_CORE_OPTION 1 2496 2497 static int process_config_option(struct cache *cache, char **argv) 2498 { 2499 unsigned long tmp; 2500 2501 if (!strcasecmp(argv[0], "migration_threshold")) { 2502 if (kstrtoul(argv[1], 10, &tmp)) 2503 return -EINVAL; 2504 2505 cache->migration_threshold = tmp; 2506 return 0; 2507 } 2508 2509 return NOT_CORE_OPTION; 2510 } 2511 2512 /* 2513 * Supports <key> <value>. 2514 * 2515 * The key migration_threshold is supported by the cache target core. 2516 */ 2517 static int cache_message(struct dm_target *ti, unsigned argc, char **argv) 2518 { 2519 int r; 2520 struct cache *cache = ti->private; 2521 2522 if (argc != 2) 2523 return -EINVAL; 2524 2525 r = process_config_option(cache, argv); 2526 if (r == NOT_CORE_OPTION) 2527 return policy_set_config_value(cache->policy, argv[0], argv[1]); 2528 2529 return r; 2530 } 2531 2532 static int cache_iterate_devices(struct dm_target *ti, 2533 iterate_devices_callout_fn fn, void *data) 2534 { 2535 int r = 0; 2536 struct cache *cache = ti->private; 2537 2538 r = fn(ti, cache->cache_dev, 0, get_dev_size(cache->cache_dev), data); 2539 if (!r) 2540 r = fn(ti, cache->origin_dev, 0, ti->len, data); 2541 2542 return r; 2543 } 2544 2545 /* 2546 * We assume I/O is going to the origin (which is the volume 2547 * more likely to have restrictions e.g. by being striped). 2548 * (Looking up the exact location of the data would be expensive 2549 * and could always be out of date by the time the bio is submitted.) 2550 */ 2551 static int cache_bvec_merge(struct dm_target *ti, 2552 struct bvec_merge_data *bvm, 2553 struct bio_vec *biovec, int max_size) 2554 { 2555 struct cache *cache = ti->private; 2556 struct request_queue *q = bdev_get_queue(cache->origin_dev->bdev); 2557 2558 if (!q->merge_bvec_fn) 2559 return max_size; 2560 2561 bvm->bi_bdev = cache->origin_dev->bdev; 2562 return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); 2563 } 2564 2565 static void set_discard_limits(struct cache *cache, struct queue_limits *limits) 2566 { 2567 /* 2568 * FIXME: these limits may be incompatible with the cache device 2569 */ 2570 limits->max_discard_sectors = cache->discard_block_size * 1024; 2571 limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT; 2572 } 2573 2574 static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) 2575 { 2576 struct cache *cache = ti->private; 2577 2578 blk_limits_io_min(limits, 0); 2579 blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT); 2580 set_discard_limits(cache, limits); 2581 } 2582 2583 /*----------------------------------------------------------------*/ 2584 2585 static struct target_type cache_target = { 2586 .name = "cache", 2587 .version = {1, 1, 0}, 2588 .module = THIS_MODULE, 2589 .ctr = cache_ctr, 2590 .dtr = cache_dtr, 2591 .map = cache_map, 2592 .end_io = cache_end_io, 2593 .postsuspend = cache_postsuspend, 2594 .preresume = cache_preresume, 2595 .resume = cache_resume, 2596 .status = cache_status, 2597 .message = cache_message, 2598 .iterate_devices = cache_iterate_devices, 2599 .merge = cache_bvec_merge, 2600 .io_hints = cache_io_hints, 2601 }; 2602 2603 static int __init dm_cache_init(void) 2604 { 2605 int r; 2606 2607 r = dm_register_target(&cache_target); 2608 if (r) { 2609 DMERR("cache target registration failed: %d", r); 2610 return r; 2611 } 2612 2613 migration_cache = KMEM_CACHE(dm_cache_migration, 0); 2614 if (!migration_cache) { 2615 dm_unregister_target(&cache_target); 2616 return -ENOMEM; 2617 } 2618 2619 return 0; 2620 } 2621 2622 static void __exit dm_cache_exit(void) 2623 { 2624 dm_unregister_target(&cache_target); 2625 kmem_cache_destroy(migration_cache); 2626 } 2627 2628 module_init(dm_cache_init); 2629 module_exit(dm_cache_exit); 2630 2631 MODULE_DESCRIPTION(DM_NAME " cache target"); 2632 MODULE_AUTHOR("Joe Thornber <ejt@redhat.com>"); 2633 MODULE_LICENSE("GPL"); 2634