1 /* 2 * Copyright (C) 2012 Red Hat. All rights reserved. 3 * 4 * This file is released under the GPL. 5 */ 6 7 #include "dm.h" 8 #include "dm-bio-prison.h" 9 #include "dm-bio-record.h" 10 #include "dm-cache-metadata.h" 11 12 #include <linux/dm-io.h> 13 #include <linux/dm-kcopyd.h> 14 #include <linux/init.h> 15 #include <linux/mempool.h> 16 #include <linux/module.h> 17 #include <linux/slab.h> 18 #include <linux/vmalloc.h> 19 20 #define DM_MSG_PREFIX "cache" 21 22 DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(cache_copy_throttle, 23 "A percentage of time allocated for copying to and/or from cache"); 24 25 /*----------------------------------------------------------------*/ 26 27 /* 28 * Glossary: 29 * 30 * oblock: index of an origin block 31 * cblock: index of a cache block 32 * promotion: movement of a block from origin to cache 33 * demotion: movement of a block from cache to origin 34 * migration: movement of a block between the origin and cache device, 35 * either direction 36 */ 37 38 /*----------------------------------------------------------------*/ 39 40 static size_t bitset_size_in_bytes(unsigned nr_entries) 41 { 42 return sizeof(unsigned long) * dm_div_up(nr_entries, BITS_PER_LONG); 43 } 44 45 static unsigned long *alloc_bitset(unsigned nr_entries) 46 { 47 size_t s = bitset_size_in_bytes(nr_entries); 48 return vzalloc(s); 49 } 50 51 static void clear_bitset(void *bitset, unsigned nr_entries) 52 { 53 size_t s = bitset_size_in_bytes(nr_entries); 54 memset(bitset, 0, s); 55 } 56 57 static void free_bitset(unsigned long *bits) 58 { 59 vfree(bits); 60 } 61 62 /*----------------------------------------------------------------*/ 63 64 /* 65 * There are a couple of places where we let a bio run, but want to do some 66 * work before calling its endio function. We do this by temporarily 67 * changing the endio fn. 68 */ 69 struct dm_hook_info { 70 bio_end_io_t *bi_end_io; 71 void *bi_private; 72 }; 73 74 static void dm_hook_bio(struct dm_hook_info *h, struct bio *bio, 75 bio_end_io_t *bi_end_io, void *bi_private) 76 { 77 h->bi_end_io = bio->bi_end_io; 78 h->bi_private = bio->bi_private; 79 80 bio->bi_end_io = bi_end_io; 81 bio->bi_private = bi_private; 82 } 83 84 static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio) 85 { 86 bio->bi_end_io = h->bi_end_io; 87 bio->bi_private = h->bi_private; 88 89 /* 90 * Must bump bi_remaining to allow bio to complete with 91 * restored bi_end_io. 92 */ 93 atomic_inc(&bio->bi_remaining); 94 } 95 96 /*----------------------------------------------------------------*/ 97 98 #define PRISON_CELLS 1024 99 #define MIGRATION_POOL_SIZE 128 100 #define COMMIT_PERIOD HZ 101 #define MIGRATION_COUNT_WINDOW 10 102 103 /* 104 * The block size of the device holding cache data must be 105 * between 32KB and 1GB. 106 */ 107 #define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (32 * 1024 >> SECTOR_SHIFT) 108 #define DATA_DEV_BLOCK_SIZE_MAX_SECTORS (1024 * 1024 * 1024 >> SECTOR_SHIFT) 109 110 /* 111 * FIXME: the cache is read/write for the time being. 112 */ 113 enum cache_metadata_mode { 114 CM_WRITE, /* metadata may be changed */ 115 CM_READ_ONLY, /* metadata may not be changed */ 116 }; 117 118 enum cache_io_mode { 119 /* 120 * Data is written to cached blocks only. These blocks are marked 121 * dirty. If you lose the cache device you will lose data. 122 * Potential performance increase for both reads and writes. 123 */ 124 CM_IO_WRITEBACK, 125 126 /* 127 * Data is written to both cache and origin. Blocks are never 128 * dirty. Potential performance benfit for reads only. 129 */ 130 CM_IO_WRITETHROUGH, 131 132 /* 133 * A degraded mode useful for various cache coherency situations 134 * (eg, rolling back snapshots). Reads and writes always go to the 135 * origin. If a write goes to a cached oblock, then the cache 136 * block is invalidated. 137 */ 138 CM_IO_PASSTHROUGH 139 }; 140 141 struct cache_features { 142 enum cache_metadata_mode mode; 143 enum cache_io_mode io_mode; 144 }; 145 146 struct cache_stats { 147 atomic_t read_hit; 148 atomic_t read_miss; 149 atomic_t write_hit; 150 atomic_t write_miss; 151 atomic_t demotion; 152 atomic_t promotion; 153 atomic_t copies_avoided; 154 atomic_t cache_cell_clash; 155 atomic_t commit_count; 156 atomic_t discard_count; 157 }; 158 159 /* 160 * Defines a range of cblocks, begin to (end - 1) are in the range. end is 161 * the one-past-the-end value. 162 */ 163 struct cblock_range { 164 dm_cblock_t begin; 165 dm_cblock_t end; 166 }; 167 168 struct invalidation_request { 169 struct list_head list; 170 struct cblock_range *cblocks; 171 172 atomic_t complete; 173 int err; 174 175 wait_queue_head_t result_wait; 176 }; 177 178 struct cache { 179 struct dm_target *ti; 180 struct dm_target_callbacks callbacks; 181 182 struct dm_cache_metadata *cmd; 183 184 /* 185 * Metadata is written to this device. 186 */ 187 struct dm_dev *metadata_dev; 188 189 /* 190 * The slower of the two data devices. Typically a spindle. 191 */ 192 struct dm_dev *origin_dev; 193 194 /* 195 * The faster of the two data devices. Typically an SSD. 196 */ 197 struct dm_dev *cache_dev; 198 199 /* 200 * Size of the origin device in _complete_ blocks and native sectors. 201 */ 202 dm_oblock_t origin_blocks; 203 sector_t origin_sectors; 204 205 /* 206 * Size of the cache device in blocks. 207 */ 208 dm_cblock_t cache_size; 209 210 /* 211 * Fields for converting from sectors to blocks. 212 */ 213 uint32_t sectors_per_block; 214 int sectors_per_block_shift; 215 216 spinlock_t lock; 217 struct bio_list deferred_bios; 218 struct bio_list deferred_flush_bios; 219 struct bio_list deferred_writethrough_bios; 220 struct list_head quiesced_migrations; 221 struct list_head completed_migrations; 222 struct list_head need_commit_migrations; 223 sector_t migration_threshold; 224 wait_queue_head_t migration_wait; 225 atomic_t nr_migrations; 226 227 wait_queue_head_t quiescing_wait; 228 atomic_t quiescing; 229 atomic_t quiescing_ack; 230 231 /* 232 * cache_size entries, dirty if set 233 */ 234 dm_cblock_t nr_dirty; 235 unsigned long *dirty_bitset; 236 237 /* 238 * origin_blocks entries, discarded if set. 239 */ 240 dm_dblock_t discard_nr_blocks; 241 unsigned long *discard_bitset; 242 uint32_t discard_block_size; /* a power of 2 times sectors per block */ 243 244 /* 245 * Rather than reconstructing the table line for the status we just 246 * save it and regurgitate. 247 */ 248 unsigned nr_ctr_args; 249 const char **ctr_args; 250 251 struct dm_kcopyd_client *copier; 252 struct workqueue_struct *wq; 253 struct work_struct worker; 254 255 struct delayed_work waker; 256 unsigned long last_commit_jiffies; 257 258 struct dm_bio_prison *prison; 259 struct dm_deferred_set *all_io_ds; 260 261 mempool_t *migration_pool; 262 struct dm_cache_migration *next_migration; 263 264 struct dm_cache_policy *policy; 265 unsigned policy_nr_args; 266 267 bool need_tick_bio:1; 268 bool sized:1; 269 bool invalidate:1; 270 bool commit_requested:1; 271 bool loaded_mappings:1; 272 bool loaded_discards:1; 273 274 /* 275 * Cache features such as write-through. 276 */ 277 struct cache_features features; 278 279 struct cache_stats stats; 280 281 /* 282 * Invalidation fields. 283 */ 284 spinlock_t invalidation_lock; 285 struct list_head invalidation_requests; 286 }; 287 288 struct per_bio_data { 289 bool tick:1; 290 unsigned req_nr:2; 291 struct dm_deferred_entry *all_io_entry; 292 struct dm_hook_info hook_info; 293 294 /* 295 * writethrough fields. These MUST remain at the end of this 296 * structure and the 'cache' member must be the first as it 297 * is used to determine the offset of the writethrough fields. 298 */ 299 struct cache *cache; 300 dm_cblock_t cblock; 301 struct dm_bio_details bio_details; 302 }; 303 304 struct dm_cache_migration { 305 struct list_head list; 306 struct cache *cache; 307 308 unsigned long start_jiffies; 309 dm_oblock_t old_oblock; 310 dm_oblock_t new_oblock; 311 dm_cblock_t cblock; 312 313 bool err:1; 314 bool writeback:1; 315 bool demote:1; 316 bool promote:1; 317 bool requeue_holder:1; 318 bool invalidate:1; 319 320 struct dm_bio_prison_cell *old_ocell; 321 struct dm_bio_prison_cell *new_ocell; 322 }; 323 324 /* 325 * Processing a bio in the worker thread may require these memory 326 * allocations. We prealloc to avoid deadlocks (the same worker thread 327 * frees them back to the mempool). 328 */ 329 struct prealloc { 330 struct dm_cache_migration *mg; 331 struct dm_bio_prison_cell *cell1; 332 struct dm_bio_prison_cell *cell2; 333 }; 334 335 static void wake_worker(struct cache *cache) 336 { 337 queue_work(cache->wq, &cache->worker); 338 } 339 340 /*----------------------------------------------------------------*/ 341 342 static struct dm_bio_prison_cell *alloc_prison_cell(struct cache *cache) 343 { 344 /* FIXME: change to use a local slab. */ 345 return dm_bio_prison_alloc_cell(cache->prison, GFP_NOWAIT); 346 } 347 348 static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell *cell) 349 { 350 dm_bio_prison_free_cell(cache->prison, cell); 351 } 352 353 static int prealloc_data_structs(struct cache *cache, struct prealloc *p) 354 { 355 if (!p->mg) { 356 p->mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT); 357 if (!p->mg) 358 return -ENOMEM; 359 } 360 361 if (!p->cell1) { 362 p->cell1 = alloc_prison_cell(cache); 363 if (!p->cell1) 364 return -ENOMEM; 365 } 366 367 if (!p->cell2) { 368 p->cell2 = alloc_prison_cell(cache); 369 if (!p->cell2) 370 return -ENOMEM; 371 } 372 373 return 0; 374 } 375 376 static void prealloc_free_structs(struct cache *cache, struct prealloc *p) 377 { 378 if (p->cell2) 379 free_prison_cell(cache, p->cell2); 380 381 if (p->cell1) 382 free_prison_cell(cache, p->cell1); 383 384 if (p->mg) 385 mempool_free(p->mg, cache->migration_pool); 386 } 387 388 static struct dm_cache_migration *prealloc_get_migration(struct prealloc *p) 389 { 390 struct dm_cache_migration *mg = p->mg; 391 392 BUG_ON(!mg); 393 p->mg = NULL; 394 395 return mg; 396 } 397 398 /* 399 * You must have a cell within the prealloc struct to return. If not this 400 * function will BUG() rather than returning NULL. 401 */ 402 static struct dm_bio_prison_cell *prealloc_get_cell(struct prealloc *p) 403 { 404 struct dm_bio_prison_cell *r = NULL; 405 406 if (p->cell1) { 407 r = p->cell1; 408 p->cell1 = NULL; 409 410 } else if (p->cell2) { 411 r = p->cell2; 412 p->cell2 = NULL; 413 } else 414 BUG(); 415 416 return r; 417 } 418 419 /* 420 * You can't have more than two cells in a prealloc struct. BUG() will be 421 * called if you try and overfill. 422 */ 423 static void prealloc_put_cell(struct prealloc *p, struct dm_bio_prison_cell *cell) 424 { 425 if (!p->cell2) 426 p->cell2 = cell; 427 428 else if (!p->cell1) 429 p->cell1 = cell; 430 431 else 432 BUG(); 433 } 434 435 /*----------------------------------------------------------------*/ 436 437 static void build_key(dm_oblock_t oblock, struct dm_cell_key *key) 438 { 439 key->virtual = 0; 440 key->dev = 0; 441 key->block = from_oblock(oblock); 442 } 443 444 /* 445 * The caller hands in a preallocated cell, and a free function for it. 446 * The cell will be freed if there's an error, or if it wasn't used because 447 * a cell with that key already exists. 448 */ 449 typedef void (*cell_free_fn)(void *context, struct dm_bio_prison_cell *cell); 450 451 static int bio_detain(struct cache *cache, dm_oblock_t oblock, 452 struct bio *bio, struct dm_bio_prison_cell *cell_prealloc, 453 cell_free_fn free_fn, void *free_context, 454 struct dm_bio_prison_cell **cell_result) 455 { 456 int r; 457 struct dm_cell_key key; 458 459 build_key(oblock, &key); 460 r = dm_bio_detain(cache->prison, &key, bio, cell_prealloc, cell_result); 461 if (r) 462 free_fn(free_context, cell_prealloc); 463 464 return r; 465 } 466 467 static int get_cell(struct cache *cache, 468 dm_oblock_t oblock, 469 struct prealloc *structs, 470 struct dm_bio_prison_cell **cell_result) 471 { 472 int r; 473 struct dm_cell_key key; 474 struct dm_bio_prison_cell *cell_prealloc; 475 476 cell_prealloc = prealloc_get_cell(structs); 477 478 build_key(oblock, &key); 479 r = dm_get_cell(cache->prison, &key, cell_prealloc, cell_result); 480 if (r) 481 prealloc_put_cell(structs, cell_prealloc); 482 483 return r; 484 } 485 486 /*----------------------------------------------------------------*/ 487 488 static bool is_dirty(struct cache *cache, dm_cblock_t b) 489 { 490 return test_bit(from_cblock(b), cache->dirty_bitset); 491 } 492 493 static void set_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock) 494 { 495 if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset)) { 496 cache->nr_dirty = to_cblock(from_cblock(cache->nr_dirty) + 1); 497 policy_set_dirty(cache->policy, oblock); 498 } 499 } 500 501 static void clear_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock) 502 { 503 if (test_and_clear_bit(from_cblock(cblock), cache->dirty_bitset)) { 504 policy_clear_dirty(cache->policy, oblock); 505 cache->nr_dirty = to_cblock(from_cblock(cache->nr_dirty) - 1); 506 if (!from_cblock(cache->nr_dirty)) 507 dm_table_event(cache->ti->table); 508 } 509 } 510 511 /*----------------------------------------------------------------*/ 512 513 static bool block_size_is_power_of_two(struct cache *cache) 514 { 515 return cache->sectors_per_block_shift >= 0; 516 } 517 518 /* gcc on ARM generates spurious references to __udivdi3 and __umoddi3 */ 519 #if defined(CONFIG_ARM) && __GNUC__ == 4 && __GNUC_MINOR__ <= 6 520 __always_inline 521 #endif 522 static dm_block_t block_div(dm_block_t b, uint32_t n) 523 { 524 do_div(b, n); 525 526 return b; 527 } 528 529 static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock) 530 { 531 uint32_t discard_blocks = cache->discard_block_size; 532 dm_block_t b = from_oblock(oblock); 533 534 if (!block_size_is_power_of_two(cache)) 535 discard_blocks = discard_blocks / cache->sectors_per_block; 536 else 537 discard_blocks >>= cache->sectors_per_block_shift; 538 539 b = block_div(b, discard_blocks); 540 541 return to_dblock(b); 542 } 543 544 static void set_discard(struct cache *cache, dm_dblock_t b) 545 { 546 unsigned long flags; 547 548 atomic_inc(&cache->stats.discard_count); 549 550 spin_lock_irqsave(&cache->lock, flags); 551 set_bit(from_dblock(b), cache->discard_bitset); 552 spin_unlock_irqrestore(&cache->lock, flags); 553 } 554 555 static void clear_discard(struct cache *cache, dm_dblock_t b) 556 { 557 unsigned long flags; 558 559 spin_lock_irqsave(&cache->lock, flags); 560 clear_bit(from_dblock(b), cache->discard_bitset); 561 spin_unlock_irqrestore(&cache->lock, flags); 562 } 563 564 static bool is_discarded(struct cache *cache, dm_dblock_t b) 565 { 566 int r; 567 unsigned long flags; 568 569 spin_lock_irqsave(&cache->lock, flags); 570 r = test_bit(from_dblock(b), cache->discard_bitset); 571 spin_unlock_irqrestore(&cache->lock, flags); 572 573 return r; 574 } 575 576 static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b) 577 { 578 int r; 579 unsigned long flags; 580 581 spin_lock_irqsave(&cache->lock, flags); 582 r = test_bit(from_dblock(oblock_to_dblock(cache, b)), 583 cache->discard_bitset); 584 spin_unlock_irqrestore(&cache->lock, flags); 585 586 return r; 587 } 588 589 /*----------------------------------------------------------------*/ 590 591 static void load_stats(struct cache *cache) 592 { 593 struct dm_cache_statistics stats; 594 595 dm_cache_metadata_get_stats(cache->cmd, &stats); 596 atomic_set(&cache->stats.read_hit, stats.read_hits); 597 atomic_set(&cache->stats.read_miss, stats.read_misses); 598 atomic_set(&cache->stats.write_hit, stats.write_hits); 599 atomic_set(&cache->stats.write_miss, stats.write_misses); 600 } 601 602 static void save_stats(struct cache *cache) 603 { 604 struct dm_cache_statistics stats; 605 606 stats.read_hits = atomic_read(&cache->stats.read_hit); 607 stats.read_misses = atomic_read(&cache->stats.read_miss); 608 stats.write_hits = atomic_read(&cache->stats.write_hit); 609 stats.write_misses = atomic_read(&cache->stats.write_miss); 610 611 dm_cache_metadata_set_stats(cache->cmd, &stats); 612 } 613 614 /*---------------------------------------------------------------- 615 * Per bio data 616 *--------------------------------------------------------------*/ 617 618 /* 619 * If using writeback, leave out struct per_bio_data's writethrough fields. 620 */ 621 #define PB_DATA_SIZE_WB (offsetof(struct per_bio_data, cache)) 622 #define PB_DATA_SIZE_WT (sizeof(struct per_bio_data)) 623 624 static bool writethrough_mode(struct cache_features *f) 625 { 626 return f->io_mode == CM_IO_WRITETHROUGH; 627 } 628 629 static bool writeback_mode(struct cache_features *f) 630 { 631 return f->io_mode == CM_IO_WRITEBACK; 632 } 633 634 static bool passthrough_mode(struct cache_features *f) 635 { 636 return f->io_mode == CM_IO_PASSTHROUGH; 637 } 638 639 static size_t get_per_bio_data_size(struct cache *cache) 640 { 641 return writethrough_mode(&cache->features) ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB; 642 } 643 644 static struct per_bio_data *get_per_bio_data(struct bio *bio, size_t data_size) 645 { 646 struct per_bio_data *pb = dm_per_bio_data(bio, data_size); 647 BUG_ON(!pb); 648 return pb; 649 } 650 651 static struct per_bio_data *init_per_bio_data(struct bio *bio, size_t data_size) 652 { 653 struct per_bio_data *pb = get_per_bio_data(bio, data_size); 654 655 pb->tick = false; 656 pb->req_nr = dm_bio_get_target_bio_nr(bio); 657 pb->all_io_entry = NULL; 658 659 return pb; 660 } 661 662 /*---------------------------------------------------------------- 663 * Remapping 664 *--------------------------------------------------------------*/ 665 static void remap_to_origin(struct cache *cache, struct bio *bio) 666 { 667 bio->bi_bdev = cache->origin_dev->bdev; 668 } 669 670 static void remap_to_cache(struct cache *cache, struct bio *bio, 671 dm_cblock_t cblock) 672 { 673 sector_t bi_sector = bio->bi_iter.bi_sector; 674 sector_t block = from_cblock(cblock); 675 676 bio->bi_bdev = cache->cache_dev->bdev; 677 if (!block_size_is_power_of_two(cache)) 678 bio->bi_iter.bi_sector = 679 (block * cache->sectors_per_block) + 680 sector_div(bi_sector, cache->sectors_per_block); 681 else 682 bio->bi_iter.bi_sector = 683 (block << cache->sectors_per_block_shift) | 684 (bi_sector & (cache->sectors_per_block - 1)); 685 } 686 687 static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio) 688 { 689 unsigned long flags; 690 size_t pb_data_size = get_per_bio_data_size(cache); 691 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); 692 693 spin_lock_irqsave(&cache->lock, flags); 694 if (cache->need_tick_bio && 695 !(bio->bi_rw & (REQ_FUA | REQ_FLUSH | REQ_DISCARD))) { 696 pb->tick = true; 697 cache->need_tick_bio = false; 698 } 699 spin_unlock_irqrestore(&cache->lock, flags); 700 } 701 702 static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio, 703 dm_oblock_t oblock) 704 { 705 check_if_tick_bio_needed(cache, bio); 706 remap_to_origin(cache, bio); 707 if (bio_data_dir(bio) == WRITE) 708 clear_discard(cache, oblock_to_dblock(cache, oblock)); 709 } 710 711 static void remap_to_cache_dirty(struct cache *cache, struct bio *bio, 712 dm_oblock_t oblock, dm_cblock_t cblock) 713 { 714 check_if_tick_bio_needed(cache, bio); 715 remap_to_cache(cache, bio, cblock); 716 if (bio_data_dir(bio) == WRITE) { 717 set_dirty(cache, oblock, cblock); 718 clear_discard(cache, oblock_to_dblock(cache, oblock)); 719 } 720 } 721 722 static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio) 723 { 724 sector_t block_nr = bio->bi_iter.bi_sector; 725 726 if (!block_size_is_power_of_two(cache)) 727 (void) sector_div(block_nr, cache->sectors_per_block); 728 else 729 block_nr >>= cache->sectors_per_block_shift; 730 731 return to_oblock(block_nr); 732 } 733 734 static int bio_triggers_commit(struct cache *cache, struct bio *bio) 735 { 736 return bio->bi_rw & (REQ_FLUSH | REQ_FUA); 737 } 738 739 static void issue(struct cache *cache, struct bio *bio) 740 { 741 unsigned long flags; 742 743 if (!bio_triggers_commit(cache, bio)) { 744 generic_make_request(bio); 745 return; 746 } 747 748 /* 749 * Batch together any bios that trigger commits and then issue a 750 * single commit for them in do_worker(). 751 */ 752 spin_lock_irqsave(&cache->lock, flags); 753 cache->commit_requested = true; 754 bio_list_add(&cache->deferred_flush_bios, bio); 755 spin_unlock_irqrestore(&cache->lock, flags); 756 } 757 758 static void defer_writethrough_bio(struct cache *cache, struct bio *bio) 759 { 760 unsigned long flags; 761 762 spin_lock_irqsave(&cache->lock, flags); 763 bio_list_add(&cache->deferred_writethrough_bios, bio); 764 spin_unlock_irqrestore(&cache->lock, flags); 765 766 wake_worker(cache); 767 } 768 769 static void writethrough_endio(struct bio *bio, int err) 770 { 771 struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT); 772 773 dm_unhook_bio(&pb->hook_info, bio); 774 775 if (err) { 776 bio_endio(bio, err); 777 return; 778 } 779 780 dm_bio_restore(&pb->bio_details, bio); 781 remap_to_cache(pb->cache, bio, pb->cblock); 782 783 /* 784 * We can't issue this bio directly, since we're in interrupt 785 * context. So it gets put on a bio list for processing by the 786 * worker thread. 787 */ 788 defer_writethrough_bio(pb->cache, bio); 789 } 790 791 /* 792 * When running in writethrough mode we need to send writes to clean blocks 793 * to both the cache and origin devices. In future we'd like to clone the 794 * bio and send them in parallel, but for now we're doing them in 795 * series as this is easier. 796 */ 797 static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio, 798 dm_oblock_t oblock, dm_cblock_t cblock) 799 { 800 struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT); 801 802 pb->cache = cache; 803 pb->cblock = cblock; 804 dm_hook_bio(&pb->hook_info, bio, writethrough_endio, NULL); 805 dm_bio_record(&pb->bio_details, bio); 806 807 remap_to_origin_clear_discard(pb->cache, bio, oblock); 808 } 809 810 /*---------------------------------------------------------------- 811 * Migration processing 812 * 813 * Migration covers moving data from the origin device to the cache, or 814 * vice versa. 815 *--------------------------------------------------------------*/ 816 static void free_migration(struct dm_cache_migration *mg) 817 { 818 mempool_free(mg, mg->cache->migration_pool); 819 } 820 821 static void inc_nr_migrations(struct cache *cache) 822 { 823 atomic_inc(&cache->nr_migrations); 824 } 825 826 static void dec_nr_migrations(struct cache *cache) 827 { 828 atomic_dec(&cache->nr_migrations); 829 830 /* 831 * Wake the worker in case we're suspending the target. 832 */ 833 wake_up(&cache->migration_wait); 834 } 835 836 static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell, 837 bool holder) 838 { 839 (holder ? dm_cell_release : dm_cell_release_no_holder) 840 (cache->prison, cell, &cache->deferred_bios); 841 free_prison_cell(cache, cell); 842 } 843 844 static void cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell, 845 bool holder) 846 { 847 unsigned long flags; 848 849 spin_lock_irqsave(&cache->lock, flags); 850 __cell_defer(cache, cell, holder); 851 spin_unlock_irqrestore(&cache->lock, flags); 852 853 wake_worker(cache); 854 } 855 856 static void cleanup_migration(struct dm_cache_migration *mg) 857 { 858 struct cache *cache = mg->cache; 859 free_migration(mg); 860 dec_nr_migrations(cache); 861 } 862 863 static void migration_failure(struct dm_cache_migration *mg) 864 { 865 struct cache *cache = mg->cache; 866 867 if (mg->writeback) { 868 DMWARN_LIMIT("writeback failed; couldn't copy block"); 869 set_dirty(cache, mg->old_oblock, mg->cblock); 870 cell_defer(cache, mg->old_ocell, false); 871 872 } else if (mg->demote) { 873 DMWARN_LIMIT("demotion failed; couldn't copy block"); 874 policy_force_mapping(cache->policy, mg->new_oblock, mg->old_oblock); 875 876 cell_defer(cache, mg->old_ocell, mg->promote ? false : true); 877 if (mg->promote) 878 cell_defer(cache, mg->new_ocell, true); 879 } else { 880 DMWARN_LIMIT("promotion failed; couldn't copy block"); 881 policy_remove_mapping(cache->policy, mg->new_oblock); 882 cell_defer(cache, mg->new_ocell, true); 883 } 884 885 cleanup_migration(mg); 886 } 887 888 static void migration_success_pre_commit(struct dm_cache_migration *mg) 889 { 890 unsigned long flags; 891 struct cache *cache = mg->cache; 892 893 if (mg->writeback) { 894 cell_defer(cache, mg->old_ocell, false); 895 clear_dirty(cache, mg->old_oblock, mg->cblock); 896 cleanup_migration(mg); 897 return; 898 899 } else if (mg->demote) { 900 if (dm_cache_remove_mapping(cache->cmd, mg->cblock)) { 901 DMWARN_LIMIT("demotion failed; couldn't update on disk metadata"); 902 policy_force_mapping(cache->policy, mg->new_oblock, 903 mg->old_oblock); 904 if (mg->promote) 905 cell_defer(cache, mg->new_ocell, true); 906 cleanup_migration(mg); 907 return; 908 } 909 } else { 910 if (dm_cache_insert_mapping(cache->cmd, mg->cblock, mg->new_oblock)) { 911 DMWARN_LIMIT("promotion failed; couldn't update on disk metadata"); 912 policy_remove_mapping(cache->policy, mg->new_oblock); 913 cleanup_migration(mg); 914 return; 915 } 916 } 917 918 spin_lock_irqsave(&cache->lock, flags); 919 list_add_tail(&mg->list, &cache->need_commit_migrations); 920 cache->commit_requested = true; 921 spin_unlock_irqrestore(&cache->lock, flags); 922 } 923 924 static void migration_success_post_commit(struct dm_cache_migration *mg) 925 { 926 unsigned long flags; 927 struct cache *cache = mg->cache; 928 929 if (mg->writeback) { 930 DMWARN("writeback unexpectedly triggered commit"); 931 return; 932 933 } else if (mg->demote) { 934 cell_defer(cache, mg->old_ocell, mg->promote ? false : true); 935 936 if (mg->promote) { 937 mg->demote = false; 938 939 spin_lock_irqsave(&cache->lock, flags); 940 list_add_tail(&mg->list, &cache->quiesced_migrations); 941 spin_unlock_irqrestore(&cache->lock, flags); 942 943 } else { 944 if (mg->invalidate) 945 policy_remove_mapping(cache->policy, mg->old_oblock); 946 cleanup_migration(mg); 947 } 948 949 } else { 950 if (mg->requeue_holder) 951 cell_defer(cache, mg->new_ocell, true); 952 else { 953 bio_endio(mg->new_ocell->holder, 0); 954 cell_defer(cache, mg->new_ocell, false); 955 } 956 clear_dirty(cache, mg->new_oblock, mg->cblock); 957 cleanup_migration(mg); 958 } 959 } 960 961 static void copy_complete(int read_err, unsigned long write_err, void *context) 962 { 963 unsigned long flags; 964 struct dm_cache_migration *mg = (struct dm_cache_migration *) context; 965 struct cache *cache = mg->cache; 966 967 if (read_err || write_err) 968 mg->err = true; 969 970 spin_lock_irqsave(&cache->lock, flags); 971 list_add_tail(&mg->list, &cache->completed_migrations); 972 spin_unlock_irqrestore(&cache->lock, flags); 973 974 wake_worker(cache); 975 } 976 977 static void issue_copy_real(struct dm_cache_migration *mg) 978 { 979 int r; 980 struct dm_io_region o_region, c_region; 981 struct cache *cache = mg->cache; 982 983 o_region.bdev = cache->origin_dev->bdev; 984 o_region.count = cache->sectors_per_block; 985 986 c_region.bdev = cache->cache_dev->bdev; 987 c_region.sector = from_cblock(mg->cblock) * cache->sectors_per_block; 988 c_region.count = cache->sectors_per_block; 989 990 if (mg->writeback || mg->demote) { 991 /* demote */ 992 o_region.sector = from_oblock(mg->old_oblock) * cache->sectors_per_block; 993 r = dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 0, copy_complete, mg); 994 } else { 995 /* promote */ 996 o_region.sector = from_oblock(mg->new_oblock) * cache->sectors_per_block; 997 r = dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, mg); 998 } 999 1000 if (r < 0) { 1001 DMERR_LIMIT("issuing migration failed"); 1002 migration_failure(mg); 1003 } 1004 } 1005 1006 static void overwrite_endio(struct bio *bio, int err) 1007 { 1008 struct dm_cache_migration *mg = bio->bi_private; 1009 struct cache *cache = mg->cache; 1010 size_t pb_data_size = get_per_bio_data_size(cache); 1011 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); 1012 unsigned long flags; 1013 1014 dm_unhook_bio(&pb->hook_info, bio); 1015 1016 if (err) 1017 mg->err = true; 1018 1019 mg->requeue_holder = false; 1020 1021 spin_lock_irqsave(&cache->lock, flags); 1022 list_add_tail(&mg->list, &cache->completed_migrations); 1023 spin_unlock_irqrestore(&cache->lock, flags); 1024 1025 wake_worker(cache); 1026 } 1027 1028 static void issue_overwrite(struct dm_cache_migration *mg, struct bio *bio) 1029 { 1030 size_t pb_data_size = get_per_bio_data_size(mg->cache); 1031 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); 1032 1033 dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg); 1034 remap_to_cache_dirty(mg->cache, bio, mg->new_oblock, mg->cblock); 1035 generic_make_request(bio); 1036 } 1037 1038 static bool bio_writes_complete_block(struct cache *cache, struct bio *bio) 1039 { 1040 return (bio_data_dir(bio) == WRITE) && 1041 (bio->bi_iter.bi_size == (cache->sectors_per_block << SECTOR_SHIFT)); 1042 } 1043 1044 static void avoid_copy(struct dm_cache_migration *mg) 1045 { 1046 atomic_inc(&mg->cache->stats.copies_avoided); 1047 migration_success_pre_commit(mg); 1048 } 1049 1050 static void issue_copy(struct dm_cache_migration *mg) 1051 { 1052 bool avoid; 1053 struct cache *cache = mg->cache; 1054 1055 if (mg->writeback || mg->demote) 1056 avoid = !is_dirty(cache, mg->cblock) || 1057 is_discarded_oblock(cache, mg->old_oblock); 1058 else { 1059 struct bio *bio = mg->new_ocell->holder; 1060 1061 avoid = is_discarded_oblock(cache, mg->new_oblock); 1062 1063 if (!avoid && bio_writes_complete_block(cache, bio)) { 1064 issue_overwrite(mg, bio); 1065 return; 1066 } 1067 } 1068 1069 avoid ? avoid_copy(mg) : issue_copy_real(mg); 1070 } 1071 1072 static void complete_migration(struct dm_cache_migration *mg) 1073 { 1074 if (mg->err) 1075 migration_failure(mg); 1076 else 1077 migration_success_pre_commit(mg); 1078 } 1079 1080 static void process_migrations(struct cache *cache, struct list_head *head, 1081 void (*fn)(struct dm_cache_migration *)) 1082 { 1083 unsigned long flags; 1084 struct list_head list; 1085 struct dm_cache_migration *mg, *tmp; 1086 1087 INIT_LIST_HEAD(&list); 1088 spin_lock_irqsave(&cache->lock, flags); 1089 list_splice_init(head, &list); 1090 spin_unlock_irqrestore(&cache->lock, flags); 1091 1092 list_for_each_entry_safe(mg, tmp, &list, list) 1093 fn(mg); 1094 } 1095 1096 static void __queue_quiesced_migration(struct dm_cache_migration *mg) 1097 { 1098 list_add_tail(&mg->list, &mg->cache->quiesced_migrations); 1099 } 1100 1101 static void queue_quiesced_migration(struct dm_cache_migration *mg) 1102 { 1103 unsigned long flags; 1104 struct cache *cache = mg->cache; 1105 1106 spin_lock_irqsave(&cache->lock, flags); 1107 __queue_quiesced_migration(mg); 1108 spin_unlock_irqrestore(&cache->lock, flags); 1109 1110 wake_worker(cache); 1111 } 1112 1113 static void queue_quiesced_migrations(struct cache *cache, struct list_head *work) 1114 { 1115 unsigned long flags; 1116 struct dm_cache_migration *mg, *tmp; 1117 1118 spin_lock_irqsave(&cache->lock, flags); 1119 list_for_each_entry_safe(mg, tmp, work, list) 1120 __queue_quiesced_migration(mg); 1121 spin_unlock_irqrestore(&cache->lock, flags); 1122 1123 wake_worker(cache); 1124 } 1125 1126 static void check_for_quiesced_migrations(struct cache *cache, 1127 struct per_bio_data *pb) 1128 { 1129 struct list_head work; 1130 1131 if (!pb->all_io_entry) 1132 return; 1133 1134 INIT_LIST_HEAD(&work); 1135 if (pb->all_io_entry) 1136 dm_deferred_entry_dec(pb->all_io_entry, &work); 1137 1138 if (!list_empty(&work)) 1139 queue_quiesced_migrations(cache, &work); 1140 } 1141 1142 static void quiesce_migration(struct dm_cache_migration *mg) 1143 { 1144 if (!dm_deferred_set_add_work(mg->cache->all_io_ds, &mg->list)) 1145 queue_quiesced_migration(mg); 1146 } 1147 1148 static void promote(struct cache *cache, struct prealloc *structs, 1149 dm_oblock_t oblock, dm_cblock_t cblock, 1150 struct dm_bio_prison_cell *cell) 1151 { 1152 struct dm_cache_migration *mg = prealloc_get_migration(structs); 1153 1154 mg->err = false; 1155 mg->writeback = false; 1156 mg->demote = false; 1157 mg->promote = true; 1158 mg->requeue_holder = true; 1159 mg->invalidate = false; 1160 mg->cache = cache; 1161 mg->new_oblock = oblock; 1162 mg->cblock = cblock; 1163 mg->old_ocell = NULL; 1164 mg->new_ocell = cell; 1165 mg->start_jiffies = jiffies; 1166 1167 inc_nr_migrations(cache); 1168 quiesce_migration(mg); 1169 } 1170 1171 static void writeback(struct cache *cache, struct prealloc *structs, 1172 dm_oblock_t oblock, dm_cblock_t cblock, 1173 struct dm_bio_prison_cell *cell) 1174 { 1175 struct dm_cache_migration *mg = prealloc_get_migration(structs); 1176 1177 mg->err = false; 1178 mg->writeback = true; 1179 mg->demote = false; 1180 mg->promote = false; 1181 mg->requeue_holder = true; 1182 mg->invalidate = false; 1183 mg->cache = cache; 1184 mg->old_oblock = oblock; 1185 mg->cblock = cblock; 1186 mg->old_ocell = cell; 1187 mg->new_ocell = NULL; 1188 mg->start_jiffies = jiffies; 1189 1190 inc_nr_migrations(cache); 1191 quiesce_migration(mg); 1192 } 1193 1194 static void demote_then_promote(struct cache *cache, struct prealloc *structs, 1195 dm_oblock_t old_oblock, dm_oblock_t new_oblock, 1196 dm_cblock_t cblock, 1197 struct dm_bio_prison_cell *old_ocell, 1198 struct dm_bio_prison_cell *new_ocell) 1199 { 1200 struct dm_cache_migration *mg = prealloc_get_migration(structs); 1201 1202 mg->err = false; 1203 mg->writeback = false; 1204 mg->demote = true; 1205 mg->promote = true; 1206 mg->requeue_holder = true; 1207 mg->invalidate = false; 1208 mg->cache = cache; 1209 mg->old_oblock = old_oblock; 1210 mg->new_oblock = new_oblock; 1211 mg->cblock = cblock; 1212 mg->old_ocell = old_ocell; 1213 mg->new_ocell = new_ocell; 1214 mg->start_jiffies = jiffies; 1215 1216 inc_nr_migrations(cache); 1217 quiesce_migration(mg); 1218 } 1219 1220 /* 1221 * Invalidate a cache entry. No writeback occurs; any changes in the cache 1222 * block are thrown away. 1223 */ 1224 static void invalidate(struct cache *cache, struct prealloc *structs, 1225 dm_oblock_t oblock, dm_cblock_t cblock, 1226 struct dm_bio_prison_cell *cell) 1227 { 1228 struct dm_cache_migration *mg = prealloc_get_migration(structs); 1229 1230 mg->err = false; 1231 mg->writeback = false; 1232 mg->demote = true; 1233 mg->promote = false; 1234 mg->requeue_holder = true; 1235 mg->invalidate = true; 1236 mg->cache = cache; 1237 mg->old_oblock = oblock; 1238 mg->cblock = cblock; 1239 mg->old_ocell = cell; 1240 mg->new_ocell = NULL; 1241 mg->start_jiffies = jiffies; 1242 1243 inc_nr_migrations(cache); 1244 quiesce_migration(mg); 1245 } 1246 1247 /*---------------------------------------------------------------- 1248 * bio processing 1249 *--------------------------------------------------------------*/ 1250 static void defer_bio(struct cache *cache, struct bio *bio) 1251 { 1252 unsigned long flags; 1253 1254 spin_lock_irqsave(&cache->lock, flags); 1255 bio_list_add(&cache->deferred_bios, bio); 1256 spin_unlock_irqrestore(&cache->lock, flags); 1257 1258 wake_worker(cache); 1259 } 1260 1261 static void process_flush_bio(struct cache *cache, struct bio *bio) 1262 { 1263 size_t pb_data_size = get_per_bio_data_size(cache); 1264 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); 1265 1266 BUG_ON(bio->bi_iter.bi_size); 1267 if (!pb->req_nr) 1268 remap_to_origin(cache, bio); 1269 else 1270 remap_to_cache(cache, bio, 0); 1271 1272 issue(cache, bio); 1273 } 1274 1275 /* 1276 * People generally discard large parts of a device, eg, the whole device 1277 * when formatting. Splitting these large discards up into cache block 1278 * sized ios and then quiescing (always neccessary for discard) takes too 1279 * long. 1280 * 1281 * We keep it simple, and allow any size of discard to come in, and just 1282 * mark off blocks on the discard bitset. No passdown occurs! 1283 * 1284 * To implement passdown we need to change the bio_prison such that a cell 1285 * can have a key that spans many blocks. 1286 */ 1287 static void process_discard_bio(struct cache *cache, struct bio *bio) 1288 { 1289 dm_block_t start_block = dm_sector_div_up(bio->bi_iter.bi_sector, 1290 cache->discard_block_size); 1291 dm_block_t end_block = bio_end_sector(bio); 1292 dm_block_t b; 1293 1294 end_block = block_div(end_block, cache->discard_block_size); 1295 1296 for (b = start_block; b < end_block; b++) 1297 set_discard(cache, to_dblock(b)); 1298 1299 bio_endio(bio, 0); 1300 } 1301 1302 static bool spare_migration_bandwidth(struct cache *cache) 1303 { 1304 sector_t current_volume = (atomic_read(&cache->nr_migrations) + 1) * 1305 cache->sectors_per_block; 1306 return current_volume < cache->migration_threshold; 1307 } 1308 1309 static void inc_hit_counter(struct cache *cache, struct bio *bio) 1310 { 1311 atomic_inc(bio_data_dir(bio) == READ ? 1312 &cache->stats.read_hit : &cache->stats.write_hit); 1313 } 1314 1315 static void inc_miss_counter(struct cache *cache, struct bio *bio) 1316 { 1317 atomic_inc(bio_data_dir(bio) == READ ? 1318 &cache->stats.read_miss : &cache->stats.write_miss); 1319 } 1320 1321 static void issue_cache_bio(struct cache *cache, struct bio *bio, 1322 struct per_bio_data *pb, 1323 dm_oblock_t oblock, dm_cblock_t cblock) 1324 { 1325 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); 1326 remap_to_cache_dirty(cache, bio, oblock, cblock); 1327 issue(cache, bio); 1328 } 1329 1330 static void process_bio(struct cache *cache, struct prealloc *structs, 1331 struct bio *bio) 1332 { 1333 int r; 1334 bool release_cell = true; 1335 dm_oblock_t block = get_bio_block(cache, bio); 1336 struct dm_bio_prison_cell *cell_prealloc, *old_ocell, *new_ocell; 1337 struct policy_result lookup_result; 1338 size_t pb_data_size = get_per_bio_data_size(cache); 1339 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); 1340 bool discarded_block = is_discarded_oblock(cache, block); 1341 bool passthrough = passthrough_mode(&cache->features); 1342 bool can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache)); 1343 1344 /* 1345 * Check to see if that block is currently migrating. 1346 */ 1347 cell_prealloc = prealloc_get_cell(structs); 1348 r = bio_detain(cache, block, bio, cell_prealloc, 1349 (cell_free_fn) prealloc_put_cell, 1350 structs, &new_ocell); 1351 if (r > 0) 1352 return; 1353 1354 r = policy_map(cache->policy, block, true, can_migrate, discarded_block, 1355 bio, &lookup_result); 1356 1357 if (r == -EWOULDBLOCK) 1358 /* migration has been denied */ 1359 lookup_result.op = POLICY_MISS; 1360 1361 switch (lookup_result.op) { 1362 case POLICY_HIT: 1363 if (passthrough) { 1364 inc_miss_counter(cache, bio); 1365 1366 /* 1367 * Passthrough always maps to the origin, 1368 * invalidating any cache blocks that are written 1369 * to. 1370 */ 1371 1372 if (bio_data_dir(bio) == WRITE) { 1373 atomic_inc(&cache->stats.demotion); 1374 invalidate(cache, structs, block, lookup_result.cblock, new_ocell); 1375 release_cell = false; 1376 1377 } else { 1378 /* FIXME: factor out issue_origin() */ 1379 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); 1380 remap_to_origin_clear_discard(cache, bio, block); 1381 issue(cache, bio); 1382 } 1383 } else { 1384 inc_hit_counter(cache, bio); 1385 1386 if (bio_data_dir(bio) == WRITE && 1387 writethrough_mode(&cache->features) && 1388 !is_dirty(cache, lookup_result.cblock)) { 1389 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); 1390 remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock); 1391 issue(cache, bio); 1392 } else 1393 issue_cache_bio(cache, bio, pb, block, lookup_result.cblock); 1394 } 1395 1396 break; 1397 1398 case POLICY_MISS: 1399 inc_miss_counter(cache, bio); 1400 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); 1401 remap_to_origin_clear_discard(cache, bio, block); 1402 issue(cache, bio); 1403 break; 1404 1405 case POLICY_NEW: 1406 atomic_inc(&cache->stats.promotion); 1407 promote(cache, structs, block, lookup_result.cblock, new_ocell); 1408 release_cell = false; 1409 break; 1410 1411 case POLICY_REPLACE: 1412 cell_prealloc = prealloc_get_cell(structs); 1413 r = bio_detain(cache, lookup_result.old_oblock, bio, cell_prealloc, 1414 (cell_free_fn) prealloc_put_cell, 1415 structs, &old_ocell); 1416 if (r > 0) { 1417 /* 1418 * We have to be careful to avoid lock inversion of 1419 * the cells. So we back off, and wait for the 1420 * old_ocell to become free. 1421 */ 1422 policy_force_mapping(cache->policy, block, 1423 lookup_result.old_oblock); 1424 atomic_inc(&cache->stats.cache_cell_clash); 1425 break; 1426 } 1427 atomic_inc(&cache->stats.demotion); 1428 atomic_inc(&cache->stats.promotion); 1429 1430 demote_then_promote(cache, structs, lookup_result.old_oblock, 1431 block, lookup_result.cblock, 1432 old_ocell, new_ocell); 1433 release_cell = false; 1434 break; 1435 1436 default: 1437 DMERR_LIMIT("%s: erroring bio, unknown policy op: %u", __func__, 1438 (unsigned) lookup_result.op); 1439 bio_io_error(bio); 1440 } 1441 1442 if (release_cell) 1443 cell_defer(cache, new_ocell, false); 1444 } 1445 1446 static int need_commit_due_to_time(struct cache *cache) 1447 { 1448 return jiffies < cache->last_commit_jiffies || 1449 jiffies > cache->last_commit_jiffies + COMMIT_PERIOD; 1450 } 1451 1452 static int commit_if_needed(struct cache *cache) 1453 { 1454 int r = 0; 1455 1456 if ((cache->commit_requested || need_commit_due_to_time(cache)) && 1457 dm_cache_changed_this_transaction(cache->cmd)) { 1458 atomic_inc(&cache->stats.commit_count); 1459 cache->commit_requested = false; 1460 r = dm_cache_commit(cache->cmd, false); 1461 cache->last_commit_jiffies = jiffies; 1462 } 1463 1464 return r; 1465 } 1466 1467 static void process_deferred_bios(struct cache *cache) 1468 { 1469 unsigned long flags; 1470 struct bio_list bios; 1471 struct bio *bio; 1472 struct prealloc structs; 1473 1474 memset(&structs, 0, sizeof(structs)); 1475 bio_list_init(&bios); 1476 1477 spin_lock_irqsave(&cache->lock, flags); 1478 bio_list_merge(&bios, &cache->deferred_bios); 1479 bio_list_init(&cache->deferred_bios); 1480 spin_unlock_irqrestore(&cache->lock, flags); 1481 1482 while (!bio_list_empty(&bios)) { 1483 /* 1484 * If we've got no free migration structs, and processing 1485 * this bio might require one, we pause until there are some 1486 * prepared mappings to process. 1487 */ 1488 if (prealloc_data_structs(cache, &structs)) { 1489 spin_lock_irqsave(&cache->lock, flags); 1490 bio_list_merge(&cache->deferred_bios, &bios); 1491 spin_unlock_irqrestore(&cache->lock, flags); 1492 break; 1493 } 1494 1495 bio = bio_list_pop(&bios); 1496 1497 if (bio->bi_rw & REQ_FLUSH) 1498 process_flush_bio(cache, bio); 1499 else if (bio->bi_rw & REQ_DISCARD) 1500 process_discard_bio(cache, bio); 1501 else 1502 process_bio(cache, &structs, bio); 1503 } 1504 1505 prealloc_free_structs(cache, &structs); 1506 } 1507 1508 static void process_deferred_flush_bios(struct cache *cache, bool submit_bios) 1509 { 1510 unsigned long flags; 1511 struct bio_list bios; 1512 struct bio *bio; 1513 1514 bio_list_init(&bios); 1515 1516 spin_lock_irqsave(&cache->lock, flags); 1517 bio_list_merge(&bios, &cache->deferred_flush_bios); 1518 bio_list_init(&cache->deferred_flush_bios); 1519 spin_unlock_irqrestore(&cache->lock, flags); 1520 1521 while ((bio = bio_list_pop(&bios))) 1522 submit_bios ? generic_make_request(bio) : bio_io_error(bio); 1523 } 1524 1525 static void process_deferred_writethrough_bios(struct cache *cache) 1526 { 1527 unsigned long flags; 1528 struct bio_list bios; 1529 struct bio *bio; 1530 1531 bio_list_init(&bios); 1532 1533 spin_lock_irqsave(&cache->lock, flags); 1534 bio_list_merge(&bios, &cache->deferred_writethrough_bios); 1535 bio_list_init(&cache->deferred_writethrough_bios); 1536 spin_unlock_irqrestore(&cache->lock, flags); 1537 1538 while ((bio = bio_list_pop(&bios))) 1539 generic_make_request(bio); 1540 } 1541 1542 static void writeback_some_dirty_blocks(struct cache *cache) 1543 { 1544 int r = 0; 1545 dm_oblock_t oblock; 1546 dm_cblock_t cblock; 1547 struct prealloc structs; 1548 struct dm_bio_prison_cell *old_ocell; 1549 1550 memset(&structs, 0, sizeof(structs)); 1551 1552 while (spare_migration_bandwidth(cache)) { 1553 if (prealloc_data_structs(cache, &structs)) 1554 break; 1555 1556 r = policy_writeback_work(cache->policy, &oblock, &cblock); 1557 if (r) 1558 break; 1559 1560 r = get_cell(cache, oblock, &structs, &old_ocell); 1561 if (r) { 1562 policy_set_dirty(cache->policy, oblock); 1563 break; 1564 } 1565 1566 writeback(cache, &structs, oblock, cblock, old_ocell); 1567 } 1568 1569 prealloc_free_structs(cache, &structs); 1570 } 1571 1572 /*---------------------------------------------------------------- 1573 * Invalidations. 1574 * Dropping something from the cache *without* writing back. 1575 *--------------------------------------------------------------*/ 1576 1577 static void process_invalidation_request(struct cache *cache, struct invalidation_request *req) 1578 { 1579 int r = 0; 1580 uint64_t begin = from_cblock(req->cblocks->begin); 1581 uint64_t end = from_cblock(req->cblocks->end); 1582 1583 while (begin != end) { 1584 r = policy_remove_cblock(cache->policy, to_cblock(begin)); 1585 if (!r) { 1586 r = dm_cache_remove_mapping(cache->cmd, to_cblock(begin)); 1587 if (r) 1588 break; 1589 1590 } else if (r == -ENODATA) { 1591 /* harmless, already unmapped */ 1592 r = 0; 1593 1594 } else { 1595 DMERR("policy_remove_cblock failed"); 1596 break; 1597 } 1598 1599 begin++; 1600 } 1601 1602 cache->commit_requested = true; 1603 1604 req->err = r; 1605 atomic_set(&req->complete, 1); 1606 1607 wake_up(&req->result_wait); 1608 } 1609 1610 static void process_invalidation_requests(struct cache *cache) 1611 { 1612 struct list_head list; 1613 struct invalidation_request *req, *tmp; 1614 1615 INIT_LIST_HEAD(&list); 1616 spin_lock(&cache->invalidation_lock); 1617 list_splice_init(&cache->invalidation_requests, &list); 1618 spin_unlock(&cache->invalidation_lock); 1619 1620 list_for_each_entry_safe (req, tmp, &list, list) 1621 process_invalidation_request(cache, req); 1622 } 1623 1624 /*---------------------------------------------------------------- 1625 * Main worker loop 1626 *--------------------------------------------------------------*/ 1627 static bool is_quiescing(struct cache *cache) 1628 { 1629 return atomic_read(&cache->quiescing); 1630 } 1631 1632 static void ack_quiescing(struct cache *cache) 1633 { 1634 if (is_quiescing(cache)) { 1635 atomic_inc(&cache->quiescing_ack); 1636 wake_up(&cache->quiescing_wait); 1637 } 1638 } 1639 1640 static void wait_for_quiescing_ack(struct cache *cache) 1641 { 1642 wait_event(cache->quiescing_wait, atomic_read(&cache->quiescing_ack)); 1643 } 1644 1645 static void start_quiescing(struct cache *cache) 1646 { 1647 atomic_inc(&cache->quiescing); 1648 wait_for_quiescing_ack(cache); 1649 } 1650 1651 static void stop_quiescing(struct cache *cache) 1652 { 1653 atomic_set(&cache->quiescing, 0); 1654 atomic_set(&cache->quiescing_ack, 0); 1655 } 1656 1657 static void wait_for_migrations(struct cache *cache) 1658 { 1659 wait_event(cache->migration_wait, !atomic_read(&cache->nr_migrations)); 1660 } 1661 1662 static void stop_worker(struct cache *cache) 1663 { 1664 cancel_delayed_work(&cache->waker); 1665 flush_workqueue(cache->wq); 1666 } 1667 1668 static void requeue_deferred_io(struct cache *cache) 1669 { 1670 struct bio *bio; 1671 struct bio_list bios; 1672 1673 bio_list_init(&bios); 1674 bio_list_merge(&bios, &cache->deferred_bios); 1675 bio_list_init(&cache->deferred_bios); 1676 1677 while ((bio = bio_list_pop(&bios))) 1678 bio_endio(bio, DM_ENDIO_REQUEUE); 1679 } 1680 1681 static int more_work(struct cache *cache) 1682 { 1683 if (is_quiescing(cache)) 1684 return !list_empty(&cache->quiesced_migrations) || 1685 !list_empty(&cache->completed_migrations) || 1686 !list_empty(&cache->need_commit_migrations); 1687 else 1688 return !bio_list_empty(&cache->deferred_bios) || 1689 !bio_list_empty(&cache->deferred_flush_bios) || 1690 !bio_list_empty(&cache->deferred_writethrough_bios) || 1691 !list_empty(&cache->quiesced_migrations) || 1692 !list_empty(&cache->completed_migrations) || 1693 !list_empty(&cache->need_commit_migrations) || 1694 cache->invalidate; 1695 } 1696 1697 static void do_worker(struct work_struct *ws) 1698 { 1699 struct cache *cache = container_of(ws, struct cache, worker); 1700 1701 do { 1702 if (!is_quiescing(cache)) { 1703 writeback_some_dirty_blocks(cache); 1704 process_deferred_writethrough_bios(cache); 1705 process_deferred_bios(cache); 1706 process_invalidation_requests(cache); 1707 } 1708 1709 process_migrations(cache, &cache->quiesced_migrations, issue_copy); 1710 process_migrations(cache, &cache->completed_migrations, complete_migration); 1711 1712 if (commit_if_needed(cache)) { 1713 process_deferred_flush_bios(cache, false); 1714 1715 /* 1716 * FIXME: rollback metadata or just go into a 1717 * failure mode and error everything 1718 */ 1719 } else { 1720 process_deferred_flush_bios(cache, true); 1721 process_migrations(cache, &cache->need_commit_migrations, 1722 migration_success_post_commit); 1723 } 1724 1725 ack_quiescing(cache); 1726 1727 } while (more_work(cache)); 1728 } 1729 1730 /* 1731 * We want to commit periodically so that not too much 1732 * unwritten metadata builds up. 1733 */ 1734 static void do_waker(struct work_struct *ws) 1735 { 1736 struct cache *cache = container_of(to_delayed_work(ws), struct cache, waker); 1737 policy_tick(cache->policy); 1738 wake_worker(cache); 1739 queue_delayed_work(cache->wq, &cache->waker, COMMIT_PERIOD); 1740 } 1741 1742 /*----------------------------------------------------------------*/ 1743 1744 static int is_congested(struct dm_dev *dev, int bdi_bits) 1745 { 1746 struct request_queue *q = bdev_get_queue(dev->bdev); 1747 return bdi_congested(&q->backing_dev_info, bdi_bits); 1748 } 1749 1750 static int cache_is_congested(struct dm_target_callbacks *cb, int bdi_bits) 1751 { 1752 struct cache *cache = container_of(cb, struct cache, callbacks); 1753 1754 return is_congested(cache->origin_dev, bdi_bits) || 1755 is_congested(cache->cache_dev, bdi_bits); 1756 } 1757 1758 /*---------------------------------------------------------------- 1759 * Target methods 1760 *--------------------------------------------------------------*/ 1761 1762 /* 1763 * This function gets called on the error paths of the constructor, so we 1764 * have to cope with a partially initialised struct. 1765 */ 1766 static void destroy(struct cache *cache) 1767 { 1768 unsigned i; 1769 1770 if (cache->next_migration) 1771 mempool_free(cache->next_migration, cache->migration_pool); 1772 1773 if (cache->migration_pool) 1774 mempool_destroy(cache->migration_pool); 1775 1776 if (cache->all_io_ds) 1777 dm_deferred_set_destroy(cache->all_io_ds); 1778 1779 if (cache->prison) 1780 dm_bio_prison_destroy(cache->prison); 1781 1782 if (cache->wq) 1783 destroy_workqueue(cache->wq); 1784 1785 if (cache->dirty_bitset) 1786 free_bitset(cache->dirty_bitset); 1787 1788 if (cache->discard_bitset) 1789 free_bitset(cache->discard_bitset); 1790 1791 if (cache->copier) 1792 dm_kcopyd_client_destroy(cache->copier); 1793 1794 if (cache->cmd) 1795 dm_cache_metadata_close(cache->cmd); 1796 1797 if (cache->metadata_dev) 1798 dm_put_device(cache->ti, cache->metadata_dev); 1799 1800 if (cache->origin_dev) 1801 dm_put_device(cache->ti, cache->origin_dev); 1802 1803 if (cache->cache_dev) 1804 dm_put_device(cache->ti, cache->cache_dev); 1805 1806 if (cache->policy) 1807 dm_cache_policy_destroy(cache->policy); 1808 1809 for (i = 0; i < cache->nr_ctr_args ; i++) 1810 kfree(cache->ctr_args[i]); 1811 kfree(cache->ctr_args); 1812 1813 kfree(cache); 1814 } 1815 1816 static void cache_dtr(struct dm_target *ti) 1817 { 1818 struct cache *cache = ti->private; 1819 1820 destroy(cache); 1821 } 1822 1823 static sector_t get_dev_size(struct dm_dev *dev) 1824 { 1825 return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT; 1826 } 1827 1828 /*----------------------------------------------------------------*/ 1829 1830 /* 1831 * Construct a cache device mapping. 1832 * 1833 * cache <metadata dev> <cache dev> <origin dev> <block size> 1834 * <#feature args> [<feature arg>]* 1835 * <policy> <#policy args> [<policy arg>]* 1836 * 1837 * metadata dev : fast device holding the persistent metadata 1838 * cache dev : fast device holding cached data blocks 1839 * origin dev : slow device holding original data blocks 1840 * block size : cache unit size in sectors 1841 * 1842 * #feature args : number of feature arguments passed 1843 * feature args : writethrough. (The default is writeback.) 1844 * 1845 * policy : the replacement policy to use 1846 * #policy args : an even number of policy arguments corresponding 1847 * to key/value pairs passed to the policy 1848 * policy args : key/value pairs passed to the policy 1849 * E.g. 'sequential_threshold 1024' 1850 * See cache-policies.txt for details. 1851 * 1852 * Optional feature arguments are: 1853 * writethrough : write through caching that prohibits cache block 1854 * content from being different from origin block content. 1855 * Without this argument, the default behaviour is to write 1856 * back cache block contents later for performance reasons, 1857 * so they may differ from the corresponding origin blocks. 1858 */ 1859 struct cache_args { 1860 struct dm_target *ti; 1861 1862 struct dm_dev *metadata_dev; 1863 1864 struct dm_dev *cache_dev; 1865 sector_t cache_sectors; 1866 1867 struct dm_dev *origin_dev; 1868 sector_t origin_sectors; 1869 1870 uint32_t block_size; 1871 1872 const char *policy_name; 1873 int policy_argc; 1874 const char **policy_argv; 1875 1876 struct cache_features features; 1877 }; 1878 1879 static void destroy_cache_args(struct cache_args *ca) 1880 { 1881 if (ca->metadata_dev) 1882 dm_put_device(ca->ti, ca->metadata_dev); 1883 1884 if (ca->cache_dev) 1885 dm_put_device(ca->ti, ca->cache_dev); 1886 1887 if (ca->origin_dev) 1888 dm_put_device(ca->ti, ca->origin_dev); 1889 1890 kfree(ca); 1891 } 1892 1893 static bool at_least_one_arg(struct dm_arg_set *as, char **error) 1894 { 1895 if (!as->argc) { 1896 *error = "Insufficient args"; 1897 return false; 1898 } 1899 1900 return true; 1901 } 1902 1903 static int parse_metadata_dev(struct cache_args *ca, struct dm_arg_set *as, 1904 char **error) 1905 { 1906 int r; 1907 sector_t metadata_dev_size; 1908 char b[BDEVNAME_SIZE]; 1909 1910 if (!at_least_one_arg(as, error)) 1911 return -EINVAL; 1912 1913 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE, 1914 &ca->metadata_dev); 1915 if (r) { 1916 *error = "Error opening metadata device"; 1917 return r; 1918 } 1919 1920 metadata_dev_size = get_dev_size(ca->metadata_dev); 1921 if (metadata_dev_size > DM_CACHE_METADATA_MAX_SECTORS_WARNING) 1922 DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.", 1923 bdevname(ca->metadata_dev->bdev, b), THIN_METADATA_MAX_SECTORS); 1924 1925 return 0; 1926 } 1927 1928 static int parse_cache_dev(struct cache_args *ca, struct dm_arg_set *as, 1929 char **error) 1930 { 1931 int r; 1932 1933 if (!at_least_one_arg(as, error)) 1934 return -EINVAL; 1935 1936 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE, 1937 &ca->cache_dev); 1938 if (r) { 1939 *error = "Error opening cache device"; 1940 return r; 1941 } 1942 ca->cache_sectors = get_dev_size(ca->cache_dev); 1943 1944 return 0; 1945 } 1946 1947 static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as, 1948 char **error) 1949 { 1950 int r; 1951 1952 if (!at_least_one_arg(as, error)) 1953 return -EINVAL; 1954 1955 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE, 1956 &ca->origin_dev); 1957 if (r) { 1958 *error = "Error opening origin device"; 1959 return r; 1960 } 1961 1962 ca->origin_sectors = get_dev_size(ca->origin_dev); 1963 if (ca->ti->len > ca->origin_sectors) { 1964 *error = "Device size larger than cached device"; 1965 return -EINVAL; 1966 } 1967 1968 return 0; 1969 } 1970 1971 static int parse_block_size(struct cache_args *ca, struct dm_arg_set *as, 1972 char **error) 1973 { 1974 unsigned long block_size; 1975 1976 if (!at_least_one_arg(as, error)) 1977 return -EINVAL; 1978 1979 if (kstrtoul(dm_shift_arg(as), 10, &block_size) || !block_size || 1980 block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS || 1981 block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS || 1982 block_size & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) { 1983 *error = "Invalid data block size"; 1984 return -EINVAL; 1985 } 1986 1987 if (block_size > ca->cache_sectors) { 1988 *error = "Data block size is larger than the cache device"; 1989 return -EINVAL; 1990 } 1991 1992 ca->block_size = block_size; 1993 1994 return 0; 1995 } 1996 1997 static void init_features(struct cache_features *cf) 1998 { 1999 cf->mode = CM_WRITE; 2000 cf->io_mode = CM_IO_WRITEBACK; 2001 } 2002 2003 static int parse_features(struct cache_args *ca, struct dm_arg_set *as, 2004 char **error) 2005 { 2006 static struct dm_arg _args[] = { 2007 {0, 1, "Invalid number of cache feature arguments"}, 2008 }; 2009 2010 int r; 2011 unsigned argc; 2012 const char *arg; 2013 struct cache_features *cf = &ca->features; 2014 2015 init_features(cf); 2016 2017 r = dm_read_arg_group(_args, as, &argc, error); 2018 if (r) 2019 return -EINVAL; 2020 2021 while (argc--) { 2022 arg = dm_shift_arg(as); 2023 2024 if (!strcasecmp(arg, "writeback")) 2025 cf->io_mode = CM_IO_WRITEBACK; 2026 2027 else if (!strcasecmp(arg, "writethrough")) 2028 cf->io_mode = CM_IO_WRITETHROUGH; 2029 2030 else if (!strcasecmp(arg, "passthrough")) 2031 cf->io_mode = CM_IO_PASSTHROUGH; 2032 2033 else { 2034 *error = "Unrecognised cache feature requested"; 2035 return -EINVAL; 2036 } 2037 } 2038 2039 return 0; 2040 } 2041 2042 static int parse_policy(struct cache_args *ca, struct dm_arg_set *as, 2043 char **error) 2044 { 2045 static struct dm_arg _args[] = { 2046 {0, 1024, "Invalid number of policy arguments"}, 2047 }; 2048 2049 int r; 2050 2051 if (!at_least_one_arg(as, error)) 2052 return -EINVAL; 2053 2054 ca->policy_name = dm_shift_arg(as); 2055 2056 r = dm_read_arg_group(_args, as, &ca->policy_argc, error); 2057 if (r) 2058 return -EINVAL; 2059 2060 ca->policy_argv = (const char **)as->argv; 2061 dm_consume_args(as, ca->policy_argc); 2062 2063 return 0; 2064 } 2065 2066 static int parse_cache_args(struct cache_args *ca, int argc, char **argv, 2067 char **error) 2068 { 2069 int r; 2070 struct dm_arg_set as; 2071 2072 as.argc = argc; 2073 as.argv = argv; 2074 2075 r = parse_metadata_dev(ca, &as, error); 2076 if (r) 2077 return r; 2078 2079 r = parse_cache_dev(ca, &as, error); 2080 if (r) 2081 return r; 2082 2083 r = parse_origin_dev(ca, &as, error); 2084 if (r) 2085 return r; 2086 2087 r = parse_block_size(ca, &as, error); 2088 if (r) 2089 return r; 2090 2091 r = parse_features(ca, &as, error); 2092 if (r) 2093 return r; 2094 2095 r = parse_policy(ca, &as, error); 2096 if (r) 2097 return r; 2098 2099 return 0; 2100 } 2101 2102 /*----------------------------------------------------------------*/ 2103 2104 static struct kmem_cache *migration_cache; 2105 2106 #define NOT_CORE_OPTION 1 2107 2108 static int process_config_option(struct cache *cache, const char *key, const char *value) 2109 { 2110 unsigned long tmp; 2111 2112 if (!strcasecmp(key, "migration_threshold")) { 2113 if (kstrtoul(value, 10, &tmp)) 2114 return -EINVAL; 2115 2116 cache->migration_threshold = tmp; 2117 return 0; 2118 } 2119 2120 return NOT_CORE_OPTION; 2121 } 2122 2123 static int set_config_value(struct cache *cache, const char *key, const char *value) 2124 { 2125 int r = process_config_option(cache, key, value); 2126 2127 if (r == NOT_CORE_OPTION) 2128 r = policy_set_config_value(cache->policy, key, value); 2129 2130 if (r) 2131 DMWARN("bad config value for %s: %s", key, value); 2132 2133 return r; 2134 } 2135 2136 static int set_config_values(struct cache *cache, int argc, const char **argv) 2137 { 2138 int r = 0; 2139 2140 if (argc & 1) { 2141 DMWARN("Odd number of policy arguments given but they should be <key> <value> pairs."); 2142 return -EINVAL; 2143 } 2144 2145 while (argc) { 2146 r = set_config_value(cache, argv[0], argv[1]); 2147 if (r) 2148 break; 2149 2150 argc -= 2; 2151 argv += 2; 2152 } 2153 2154 return r; 2155 } 2156 2157 static int create_cache_policy(struct cache *cache, struct cache_args *ca, 2158 char **error) 2159 { 2160 struct dm_cache_policy *p = dm_cache_policy_create(ca->policy_name, 2161 cache->cache_size, 2162 cache->origin_sectors, 2163 cache->sectors_per_block); 2164 if (IS_ERR(p)) { 2165 *error = "Error creating cache's policy"; 2166 return PTR_ERR(p); 2167 } 2168 cache->policy = p; 2169 2170 return 0; 2171 } 2172 2173 /* 2174 * We want the discard block size to be a power of two, at least the size 2175 * of the cache block size, and have no more than 2^14 discard blocks 2176 * across the origin. 2177 */ 2178 #define MAX_DISCARD_BLOCKS (1 << 14) 2179 2180 static bool too_many_discard_blocks(sector_t discard_block_size, 2181 sector_t origin_size) 2182 { 2183 (void) sector_div(origin_size, discard_block_size); 2184 2185 return origin_size > MAX_DISCARD_BLOCKS; 2186 } 2187 2188 static sector_t calculate_discard_block_size(sector_t cache_block_size, 2189 sector_t origin_size) 2190 { 2191 sector_t discard_block_size; 2192 2193 discard_block_size = roundup_pow_of_two(cache_block_size); 2194 2195 if (origin_size) 2196 while (too_many_discard_blocks(discard_block_size, origin_size)) 2197 discard_block_size *= 2; 2198 2199 return discard_block_size; 2200 } 2201 2202 #define DEFAULT_MIGRATION_THRESHOLD 2048 2203 2204 static int cache_create(struct cache_args *ca, struct cache **result) 2205 { 2206 int r = 0; 2207 char **error = &ca->ti->error; 2208 struct cache *cache; 2209 struct dm_target *ti = ca->ti; 2210 dm_block_t origin_blocks; 2211 struct dm_cache_metadata *cmd; 2212 bool may_format = ca->features.mode == CM_WRITE; 2213 2214 cache = kzalloc(sizeof(*cache), GFP_KERNEL); 2215 if (!cache) 2216 return -ENOMEM; 2217 2218 cache->ti = ca->ti; 2219 ti->private = cache; 2220 ti->num_flush_bios = 2; 2221 ti->flush_supported = true; 2222 2223 ti->num_discard_bios = 1; 2224 ti->discards_supported = true; 2225 ti->discard_zeroes_data_unsupported = true; 2226 2227 cache->features = ca->features; 2228 ti->per_bio_data_size = get_per_bio_data_size(cache); 2229 2230 cache->callbacks.congested_fn = cache_is_congested; 2231 dm_table_add_target_callbacks(ti->table, &cache->callbacks); 2232 2233 cache->metadata_dev = ca->metadata_dev; 2234 cache->origin_dev = ca->origin_dev; 2235 cache->cache_dev = ca->cache_dev; 2236 2237 ca->metadata_dev = ca->origin_dev = ca->cache_dev = NULL; 2238 2239 /* FIXME: factor out this whole section */ 2240 origin_blocks = cache->origin_sectors = ca->origin_sectors; 2241 origin_blocks = block_div(origin_blocks, ca->block_size); 2242 cache->origin_blocks = to_oblock(origin_blocks); 2243 2244 cache->sectors_per_block = ca->block_size; 2245 if (dm_set_target_max_io_len(ti, cache->sectors_per_block)) { 2246 r = -EINVAL; 2247 goto bad; 2248 } 2249 2250 if (ca->block_size & (ca->block_size - 1)) { 2251 dm_block_t cache_size = ca->cache_sectors; 2252 2253 cache->sectors_per_block_shift = -1; 2254 cache_size = block_div(cache_size, ca->block_size); 2255 cache->cache_size = to_cblock(cache_size); 2256 } else { 2257 cache->sectors_per_block_shift = __ffs(ca->block_size); 2258 cache->cache_size = to_cblock(ca->cache_sectors >> cache->sectors_per_block_shift); 2259 } 2260 2261 r = create_cache_policy(cache, ca, error); 2262 if (r) 2263 goto bad; 2264 2265 cache->policy_nr_args = ca->policy_argc; 2266 cache->migration_threshold = DEFAULT_MIGRATION_THRESHOLD; 2267 2268 r = set_config_values(cache, ca->policy_argc, ca->policy_argv); 2269 if (r) { 2270 *error = "Error setting cache policy's config values"; 2271 goto bad; 2272 } 2273 2274 cmd = dm_cache_metadata_open(cache->metadata_dev->bdev, 2275 ca->block_size, may_format, 2276 dm_cache_policy_get_hint_size(cache->policy)); 2277 if (IS_ERR(cmd)) { 2278 *error = "Error creating metadata object"; 2279 r = PTR_ERR(cmd); 2280 goto bad; 2281 } 2282 cache->cmd = cmd; 2283 2284 if (passthrough_mode(&cache->features)) { 2285 bool all_clean; 2286 2287 r = dm_cache_metadata_all_clean(cache->cmd, &all_clean); 2288 if (r) { 2289 *error = "dm_cache_metadata_all_clean() failed"; 2290 goto bad; 2291 } 2292 2293 if (!all_clean) { 2294 *error = "Cannot enter passthrough mode unless all blocks are clean"; 2295 r = -EINVAL; 2296 goto bad; 2297 } 2298 } 2299 2300 spin_lock_init(&cache->lock); 2301 bio_list_init(&cache->deferred_bios); 2302 bio_list_init(&cache->deferred_flush_bios); 2303 bio_list_init(&cache->deferred_writethrough_bios); 2304 INIT_LIST_HEAD(&cache->quiesced_migrations); 2305 INIT_LIST_HEAD(&cache->completed_migrations); 2306 INIT_LIST_HEAD(&cache->need_commit_migrations); 2307 atomic_set(&cache->nr_migrations, 0); 2308 init_waitqueue_head(&cache->migration_wait); 2309 2310 init_waitqueue_head(&cache->quiescing_wait); 2311 atomic_set(&cache->quiescing, 0); 2312 atomic_set(&cache->quiescing_ack, 0); 2313 2314 r = -ENOMEM; 2315 cache->nr_dirty = 0; 2316 cache->dirty_bitset = alloc_bitset(from_cblock(cache->cache_size)); 2317 if (!cache->dirty_bitset) { 2318 *error = "could not allocate dirty bitset"; 2319 goto bad; 2320 } 2321 clear_bitset(cache->dirty_bitset, from_cblock(cache->cache_size)); 2322 2323 cache->discard_block_size = 2324 calculate_discard_block_size(cache->sectors_per_block, 2325 cache->origin_sectors); 2326 cache->discard_nr_blocks = oblock_to_dblock(cache, cache->origin_blocks); 2327 cache->discard_bitset = alloc_bitset(from_dblock(cache->discard_nr_blocks)); 2328 if (!cache->discard_bitset) { 2329 *error = "could not allocate discard bitset"; 2330 goto bad; 2331 } 2332 clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks)); 2333 2334 cache->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle); 2335 if (IS_ERR(cache->copier)) { 2336 *error = "could not create kcopyd client"; 2337 r = PTR_ERR(cache->copier); 2338 goto bad; 2339 } 2340 2341 cache->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM); 2342 if (!cache->wq) { 2343 *error = "could not create workqueue for metadata object"; 2344 goto bad; 2345 } 2346 INIT_WORK(&cache->worker, do_worker); 2347 INIT_DELAYED_WORK(&cache->waker, do_waker); 2348 cache->last_commit_jiffies = jiffies; 2349 2350 cache->prison = dm_bio_prison_create(PRISON_CELLS); 2351 if (!cache->prison) { 2352 *error = "could not create bio prison"; 2353 goto bad; 2354 } 2355 2356 cache->all_io_ds = dm_deferred_set_create(); 2357 if (!cache->all_io_ds) { 2358 *error = "could not create all_io deferred set"; 2359 goto bad; 2360 } 2361 2362 cache->migration_pool = mempool_create_slab_pool(MIGRATION_POOL_SIZE, 2363 migration_cache); 2364 if (!cache->migration_pool) { 2365 *error = "Error creating cache's migration mempool"; 2366 goto bad; 2367 } 2368 2369 cache->next_migration = NULL; 2370 2371 cache->need_tick_bio = true; 2372 cache->sized = false; 2373 cache->invalidate = false; 2374 cache->commit_requested = false; 2375 cache->loaded_mappings = false; 2376 cache->loaded_discards = false; 2377 2378 load_stats(cache); 2379 2380 atomic_set(&cache->stats.demotion, 0); 2381 atomic_set(&cache->stats.promotion, 0); 2382 atomic_set(&cache->stats.copies_avoided, 0); 2383 atomic_set(&cache->stats.cache_cell_clash, 0); 2384 atomic_set(&cache->stats.commit_count, 0); 2385 atomic_set(&cache->stats.discard_count, 0); 2386 2387 spin_lock_init(&cache->invalidation_lock); 2388 INIT_LIST_HEAD(&cache->invalidation_requests); 2389 2390 *result = cache; 2391 return 0; 2392 2393 bad: 2394 destroy(cache); 2395 return r; 2396 } 2397 2398 static int copy_ctr_args(struct cache *cache, int argc, const char **argv) 2399 { 2400 unsigned i; 2401 const char **copy; 2402 2403 copy = kcalloc(argc, sizeof(*copy), GFP_KERNEL); 2404 if (!copy) 2405 return -ENOMEM; 2406 for (i = 0; i < argc; i++) { 2407 copy[i] = kstrdup(argv[i], GFP_KERNEL); 2408 if (!copy[i]) { 2409 while (i--) 2410 kfree(copy[i]); 2411 kfree(copy); 2412 return -ENOMEM; 2413 } 2414 } 2415 2416 cache->nr_ctr_args = argc; 2417 cache->ctr_args = copy; 2418 2419 return 0; 2420 } 2421 2422 static int cache_ctr(struct dm_target *ti, unsigned argc, char **argv) 2423 { 2424 int r = -EINVAL; 2425 struct cache_args *ca; 2426 struct cache *cache = NULL; 2427 2428 ca = kzalloc(sizeof(*ca), GFP_KERNEL); 2429 if (!ca) { 2430 ti->error = "Error allocating memory for cache"; 2431 return -ENOMEM; 2432 } 2433 ca->ti = ti; 2434 2435 r = parse_cache_args(ca, argc, argv, &ti->error); 2436 if (r) 2437 goto out; 2438 2439 r = cache_create(ca, &cache); 2440 if (r) 2441 goto out; 2442 2443 r = copy_ctr_args(cache, argc - 3, (const char **)argv + 3); 2444 if (r) { 2445 destroy(cache); 2446 goto out; 2447 } 2448 2449 ti->private = cache; 2450 2451 out: 2452 destroy_cache_args(ca); 2453 return r; 2454 } 2455 2456 static int cache_map(struct dm_target *ti, struct bio *bio) 2457 { 2458 struct cache *cache = ti->private; 2459 2460 int r; 2461 dm_oblock_t block = get_bio_block(cache, bio); 2462 size_t pb_data_size = get_per_bio_data_size(cache); 2463 bool can_migrate = false; 2464 bool discarded_block; 2465 struct dm_bio_prison_cell *cell; 2466 struct policy_result lookup_result; 2467 struct per_bio_data *pb; 2468 2469 if (from_oblock(block) > from_oblock(cache->origin_blocks)) { 2470 /* 2471 * This can only occur if the io goes to a partial block at 2472 * the end of the origin device. We don't cache these. 2473 * Just remap to the origin and carry on. 2474 */ 2475 remap_to_origin_clear_discard(cache, bio, block); 2476 return DM_MAPIO_REMAPPED; 2477 } 2478 2479 pb = init_per_bio_data(bio, pb_data_size); 2480 2481 if (bio->bi_rw & (REQ_FLUSH | REQ_FUA | REQ_DISCARD)) { 2482 defer_bio(cache, bio); 2483 return DM_MAPIO_SUBMITTED; 2484 } 2485 2486 /* 2487 * Check to see if that block is currently migrating. 2488 */ 2489 cell = alloc_prison_cell(cache); 2490 if (!cell) { 2491 defer_bio(cache, bio); 2492 return DM_MAPIO_SUBMITTED; 2493 } 2494 2495 r = bio_detain(cache, block, bio, cell, 2496 (cell_free_fn) free_prison_cell, 2497 cache, &cell); 2498 if (r) { 2499 if (r < 0) 2500 defer_bio(cache, bio); 2501 2502 return DM_MAPIO_SUBMITTED; 2503 } 2504 2505 discarded_block = is_discarded_oblock(cache, block); 2506 2507 r = policy_map(cache->policy, block, false, can_migrate, discarded_block, 2508 bio, &lookup_result); 2509 if (r == -EWOULDBLOCK) { 2510 cell_defer(cache, cell, true); 2511 return DM_MAPIO_SUBMITTED; 2512 2513 } else if (r) { 2514 DMERR_LIMIT("Unexpected return from cache replacement policy: %d", r); 2515 bio_io_error(bio); 2516 return DM_MAPIO_SUBMITTED; 2517 } 2518 2519 r = DM_MAPIO_REMAPPED; 2520 switch (lookup_result.op) { 2521 case POLICY_HIT: 2522 if (passthrough_mode(&cache->features)) { 2523 if (bio_data_dir(bio) == WRITE) { 2524 /* 2525 * We need to invalidate this block, so 2526 * defer for the worker thread. 2527 */ 2528 cell_defer(cache, cell, true); 2529 r = DM_MAPIO_SUBMITTED; 2530 2531 } else { 2532 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); 2533 inc_miss_counter(cache, bio); 2534 remap_to_origin_clear_discard(cache, bio, block); 2535 2536 cell_defer(cache, cell, false); 2537 } 2538 2539 } else { 2540 inc_hit_counter(cache, bio); 2541 2542 if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) && 2543 !is_dirty(cache, lookup_result.cblock)) 2544 remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock); 2545 else 2546 remap_to_cache_dirty(cache, bio, block, lookup_result.cblock); 2547 2548 cell_defer(cache, cell, false); 2549 } 2550 break; 2551 2552 case POLICY_MISS: 2553 inc_miss_counter(cache, bio); 2554 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); 2555 2556 if (pb->req_nr != 0) { 2557 /* 2558 * This is a duplicate writethrough io that is no 2559 * longer needed because the block has been demoted. 2560 */ 2561 bio_endio(bio, 0); 2562 cell_defer(cache, cell, false); 2563 return DM_MAPIO_SUBMITTED; 2564 } else { 2565 remap_to_origin_clear_discard(cache, bio, block); 2566 cell_defer(cache, cell, false); 2567 } 2568 break; 2569 2570 default: 2571 DMERR_LIMIT("%s: erroring bio: unknown policy op: %u", __func__, 2572 (unsigned) lookup_result.op); 2573 bio_io_error(bio); 2574 r = DM_MAPIO_SUBMITTED; 2575 } 2576 2577 return r; 2578 } 2579 2580 static int cache_end_io(struct dm_target *ti, struct bio *bio, int error) 2581 { 2582 struct cache *cache = ti->private; 2583 unsigned long flags; 2584 size_t pb_data_size = get_per_bio_data_size(cache); 2585 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); 2586 2587 if (pb->tick) { 2588 policy_tick(cache->policy); 2589 2590 spin_lock_irqsave(&cache->lock, flags); 2591 cache->need_tick_bio = true; 2592 spin_unlock_irqrestore(&cache->lock, flags); 2593 } 2594 2595 check_for_quiesced_migrations(cache, pb); 2596 2597 return 0; 2598 } 2599 2600 static int write_dirty_bitset(struct cache *cache) 2601 { 2602 unsigned i, r; 2603 2604 for (i = 0; i < from_cblock(cache->cache_size); i++) { 2605 r = dm_cache_set_dirty(cache->cmd, to_cblock(i), 2606 is_dirty(cache, to_cblock(i))); 2607 if (r) 2608 return r; 2609 } 2610 2611 return 0; 2612 } 2613 2614 static int write_discard_bitset(struct cache *cache) 2615 { 2616 unsigned i, r; 2617 2618 r = dm_cache_discard_bitset_resize(cache->cmd, cache->discard_block_size, 2619 cache->discard_nr_blocks); 2620 if (r) { 2621 DMERR("could not resize on-disk discard bitset"); 2622 return r; 2623 } 2624 2625 for (i = 0; i < from_dblock(cache->discard_nr_blocks); i++) { 2626 r = dm_cache_set_discard(cache->cmd, to_dblock(i), 2627 is_discarded(cache, to_dblock(i))); 2628 if (r) 2629 return r; 2630 } 2631 2632 return 0; 2633 } 2634 2635 static int save_hint(void *context, dm_cblock_t cblock, dm_oblock_t oblock, 2636 uint32_t hint) 2637 { 2638 struct cache *cache = context; 2639 return dm_cache_save_hint(cache->cmd, cblock, hint); 2640 } 2641 2642 static int write_hints(struct cache *cache) 2643 { 2644 int r; 2645 2646 r = dm_cache_begin_hints(cache->cmd, cache->policy); 2647 if (r) { 2648 DMERR("dm_cache_begin_hints failed"); 2649 return r; 2650 } 2651 2652 r = policy_walk_mappings(cache->policy, save_hint, cache); 2653 if (r) 2654 DMERR("policy_walk_mappings failed"); 2655 2656 return r; 2657 } 2658 2659 /* 2660 * returns true on success 2661 */ 2662 static bool sync_metadata(struct cache *cache) 2663 { 2664 int r1, r2, r3, r4; 2665 2666 r1 = write_dirty_bitset(cache); 2667 if (r1) 2668 DMERR("could not write dirty bitset"); 2669 2670 r2 = write_discard_bitset(cache); 2671 if (r2) 2672 DMERR("could not write discard bitset"); 2673 2674 save_stats(cache); 2675 2676 r3 = write_hints(cache); 2677 if (r3) 2678 DMERR("could not write hints"); 2679 2680 /* 2681 * If writing the above metadata failed, we still commit, but don't 2682 * set the clean shutdown flag. This will effectively force every 2683 * dirty bit to be set on reload. 2684 */ 2685 r4 = dm_cache_commit(cache->cmd, !r1 && !r2 && !r3); 2686 if (r4) 2687 DMERR("could not write cache metadata. Data loss may occur."); 2688 2689 return !r1 && !r2 && !r3 && !r4; 2690 } 2691 2692 static void cache_postsuspend(struct dm_target *ti) 2693 { 2694 struct cache *cache = ti->private; 2695 2696 start_quiescing(cache); 2697 wait_for_migrations(cache); 2698 stop_worker(cache); 2699 requeue_deferred_io(cache); 2700 stop_quiescing(cache); 2701 2702 (void) sync_metadata(cache); 2703 } 2704 2705 static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock, 2706 bool dirty, uint32_t hint, bool hint_valid) 2707 { 2708 int r; 2709 struct cache *cache = context; 2710 2711 r = policy_load_mapping(cache->policy, oblock, cblock, hint, hint_valid); 2712 if (r) 2713 return r; 2714 2715 if (dirty) 2716 set_dirty(cache, oblock, cblock); 2717 else 2718 clear_dirty(cache, oblock, cblock); 2719 2720 return 0; 2721 } 2722 2723 static int load_discard(void *context, sector_t discard_block_size, 2724 dm_dblock_t dblock, bool discard) 2725 { 2726 struct cache *cache = context; 2727 2728 /* FIXME: handle mis-matched block size */ 2729 2730 if (discard) 2731 set_discard(cache, dblock); 2732 else 2733 clear_discard(cache, dblock); 2734 2735 return 0; 2736 } 2737 2738 static dm_cblock_t get_cache_dev_size(struct cache *cache) 2739 { 2740 sector_t size = get_dev_size(cache->cache_dev); 2741 (void) sector_div(size, cache->sectors_per_block); 2742 return to_cblock(size); 2743 } 2744 2745 static bool can_resize(struct cache *cache, dm_cblock_t new_size) 2746 { 2747 if (from_cblock(new_size) > from_cblock(cache->cache_size)) 2748 return true; 2749 2750 /* 2751 * We can't drop a dirty block when shrinking the cache. 2752 */ 2753 while (from_cblock(new_size) < from_cblock(cache->cache_size)) { 2754 new_size = to_cblock(from_cblock(new_size) + 1); 2755 if (is_dirty(cache, new_size)) { 2756 DMERR("unable to shrink cache; cache block %llu is dirty", 2757 (unsigned long long) from_cblock(new_size)); 2758 return false; 2759 } 2760 } 2761 2762 return true; 2763 } 2764 2765 static int resize_cache_dev(struct cache *cache, dm_cblock_t new_size) 2766 { 2767 int r; 2768 2769 r = dm_cache_resize(cache->cmd, new_size); 2770 if (r) { 2771 DMERR("could not resize cache metadata"); 2772 return r; 2773 } 2774 2775 cache->cache_size = new_size; 2776 2777 return 0; 2778 } 2779 2780 static int cache_preresume(struct dm_target *ti) 2781 { 2782 int r = 0; 2783 struct cache *cache = ti->private; 2784 dm_cblock_t csize = get_cache_dev_size(cache); 2785 2786 /* 2787 * Check to see if the cache has resized. 2788 */ 2789 if (!cache->sized) { 2790 r = resize_cache_dev(cache, csize); 2791 if (r) 2792 return r; 2793 2794 cache->sized = true; 2795 2796 } else if (csize != cache->cache_size) { 2797 if (!can_resize(cache, csize)) 2798 return -EINVAL; 2799 2800 r = resize_cache_dev(cache, csize); 2801 if (r) 2802 return r; 2803 } 2804 2805 if (!cache->loaded_mappings) { 2806 r = dm_cache_load_mappings(cache->cmd, cache->policy, 2807 load_mapping, cache); 2808 if (r) { 2809 DMERR("could not load cache mappings"); 2810 return r; 2811 } 2812 2813 cache->loaded_mappings = true; 2814 } 2815 2816 if (!cache->loaded_discards) { 2817 r = dm_cache_load_discards(cache->cmd, load_discard, cache); 2818 if (r) { 2819 DMERR("could not load origin discards"); 2820 return r; 2821 } 2822 2823 cache->loaded_discards = true; 2824 } 2825 2826 return r; 2827 } 2828 2829 static void cache_resume(struct dm_target *ti) 2830 { 2831 struct cache *cache = ti->private; 2832 2833 cache->need_tick_bio = true; 2834 do_waker(&cache->waker.work); 2835 } 2836 2837 /* 2838 * Status format: 2839 * 2840 * <metadata block size> <#used metadata blocks>/<#total metadata blocks> 2841 * <cache block size> <#used cache blocks>/<#total cache blocks> 2842 * <#read hits> <#read misses> <#write hits> <#write misses> 2843 * <#demotions> <#promotions> <#dirty> 2844 * <#features> <features>* 2845 * <#core args> <core args> 2846 * <policy name> <#policy args> <policy args>* 2847 */ 2848 static void cache_status(struct dm_target *ti, status_type_t type, 2849 unsigned status_flags, char *result, unsigned maxlen) 2850 { 2851 int r = 0; 2852 unsigned i; 2853 ssize_t sz = 0; 2854 dm_block_t nr_free_blocks_metadata = 0; 2855 dm_block_t nr_blocks_metadata = 0; 2856 char buf[BDEVNAME_SIZE]; 2857 struct cache *cache = ti->private; 2858 dm_cblock_t residency; 2859 2860 switch (type) { 2861 case STATUSTYPE_INFO: 2862 /* Commit to ensure statistics aren't out-of-date */ 2863 if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti)) { 2864 r = dm_cache_commit(cache->cmd, false); 2865 if (r) 2866 DMERR("could not commit metadata for accurate status"); 2867 } 2868 2869 r = dm_cache_get_free_metadata_block_count(cache->cmd, 2870 &nr_free_blocks_metadata); 2871 if (r) { 2872 DMERR("could not get metadata free block count"); 2873 goto err; 2874 } 2875 2876 r = dm_cache_get_metadata_dev_size(cache->cmd, &nr_blocks_metadata); 2877 if (r) { 2878 DMERR("could not get metadata device size"); 2879 goto err; 2880 } 2881 2882 residency = policy_residency(cache->policy); 2883 2884 DMEMIT("%u %llu/%llu %u %llu/%llu %u %u %u %u %u %u %llu ", 2885 (unsigned)(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT), 2886 (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata), 2887 (unsigned long long)nr_blocks_metadata, 2888 cache->sectors_per_block, 2889 (unsigned long long) from_cblock(residency), 2890 (unsigned long long) from_cblock(cache->cache_size), 2891 (unsigned) atomic_read(&cache->stats.read_hit), 2892 (unsigned) atomic_read(&cache->stats.read_miss), 2893 (unsigned) atomic_read(&cache->stats.write_hit), 2894 (unsigned) atomic_read(&cache->stats.write_miss), 2895 (unsigned) atomic_read(&cache->stats.demotion), 2896 (unsigned) atomic_read(&cache->stats.promotion), 2897 (unsigned long long) from_cblock(cache->nr_dirty)); 2898 2899 if (writethrough_mode(&cache->features)) 2900 DMEMIT("1 writethrough "); 2901 2902 else if (passthrough_mode(&cache->features)) 2903 DMEMIT("1 passthrough "); 2904 2905 else if (writeback_mode(&cache->features)) 2906 DMEMIT("1 writeback "); 2907 2908 else { 2909 DMERR("internal error: unknown io mode: %d", (int) cache->features.io_mode); 2910 goto err; 2911 } 2912 2913 DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold); 2914 2915 DMEMIT("%s ", dm_cache_policy_get_name(cache->policy)); 2916 if (sz < maxlen) { 2917 r = policy_emit_config_values(cache->policy, result + sz, maxlen - sz); 2918 if (r) 2919 DMERR("policy_emit_config_values returned %d", r); 2920 } 2921 2922 break; 2923 2924 case STATUSTYPE_TABLE: 2925 format_dev_t(buf, cache->metadata_dev->bdev->bd_dev); 2926 DMEMIT("%s ", buf); 2927 format_dev_t(buf, cache->cache_dev->bdev->bd_dev); 2928 DMEMIT("%s ", buf); 2929 format_dev_t(buf, cache->origin_dev->bdev->bd_dev); 2930 DMEMIT("%s", buf); 2931 2932 for (i = 0; i < cache->nr_ctr_args - 1; i++) 2933 DMEMIT(" %s", cache->ctr_args[i]); 2934 if (cache->nr_ctr_args) 2935 DMEMIT(" %s", cache->ctr_args[cache->nr_ctr_args - 1]); 2936 } 2937 2938 return; 2939 2940 err: 2941 DMEMIT("Error"); 2942 } 2943 2944 /* 2945 * A cache block range can take two forms: 2946 * 2947 * i) A single cblock, eg. '3456' 2948 * ii) A begin and end cblock with dots between, eg. 123-234 2949 */ 2950 static int parse_cblock_range(struct cache *cache, const char *str, 2951 struct cblock_range *result) 2952 { 2953 char dummy; 2954 uint64_t b, e; 2955 int r; 2956 2957 /* 2958 * Try and parse form (ii) first. 2959 */ 2960 r = sscanf(str, "%llu-%llu%c", &b, &e, &dummy); 2961 if (r < 0) 2962 return r; 2963 2964 if (r == 2) { 2965 result->begin = to_cblock(b); 2966 result->end = to_cblock(e); 2967 return 0; 2968 } 2969 2970 /* 2971 * That didn't work, try form (i). 2972 */ 2973 r = sscanf(str, "%llu%c", &b, &dummy); 2974 if (r < 0) 2975 return r; 2976 2977 if (r == 1) { 2978 result->begin = to_cblock(b); 2979 result->end = to_cblock(from_cblock(result->begin) + 1u); 2980 return 0; 2981 } 2982 2983 DMERR("invalid cblock range '%s'", str); 2984 return -EINVAL; 2985 } 2986 2987 static int validate_cblock_range(struct cache *cache, struct cblock_range *range) 2988 { 2989 uint64_t b = from_cblock(range->begin); 2990 uint64_t e = from_cblock(range->end); 2991 uint64_t n = from_cblock(cache->cache_size); 2992 2993 if (b >= n) { 2994 DMERR("begin cblock out of range: %llu >= %llu", b, n); 2995 return -EINVAL; 2996 } 2997 2998 if (e > n) { 2999 DMERR("end cblock out of range: %llu > %llu", e, n); 3000 return -EINVAL; 3001 } 3002 3003 if (b >= e) { 3004 DMERR("invalid cblock range: %llu >= %llu", b, e); 3005 return -EINVAL; 3006 } 3007 3008 return 0; 3009 } 3010 3011 static int request_invalidation(struct cache *cache, struct cblock_range *range) 3012 { 3013 struct invalidation_request req; 3014 3015 INIT_LIST_HEAD(&req.list); 3016 req.cblocks = range; 3017 atomic_set(&req.complete, 0); 3018 req.err = 0; 3019 init_waitqueue_head(&req.result_wait); 3020 3021 spin_lock(&cache->invalidation_lock); 3022 list_add(&req.list, &cache->invalidation_requests); 3023 spin_unlock(&cache->invalidation_lock); 3024 wake_worker(cache); 3025 3026 wait_event(req.result_wait, atomic_read(&req.complete)); 3027 return req.err; 3028 } 3029 3030 static int process_invalidate_cblocks_message(struct cache *cache, unsigned count, 3031 const char **cblock_ranges) 3032 { 3033 int r = 0; 3034 unsigned i; 3035 struct cblock_range range; 3036 3037 if (!passthrough_mode(&cache->features)) { 3038 DMERR("cache has to be in passthrough mode for invalidation"); 3039 return -EPERM; 3040 } 3041 3042 for (i = 0; i < count; i++) { 3043 r = parse_cblock_range(cache, cblock_ranges[i], &range); 3044 if (r) 3045 break; 3046 3047 r = validate_cblock_range(cache, &range); 3048 if (r) 3049 break; 3050 3051 /* 3052 * Pass begin and end origin blocks to the worker and wake it. 3053 */ 3054 r = request_invalidation(cache, &range); 3055 if (r) 3056 break; 3057 } 3058 3059 return r; 3060 } 3061 3062 /* 3063 * Supports 3064 * "<key> <value>" 3065 * and 3066 * "invalidate_cblocks [(<begin>)|(<begin>-<end>)]* 3067 * 3068 * The key migration_threshold is supported by the cache target core. 3069 */ 3070 static int cache_message(struct dm_target *ti, unsigned argc, char **argv) 3071 { 3072 struct cache *cache = ti->private; 3073 3074 if (!argc) 3075 return -EINVAL; 3076 3077 if (!strcasecmp(argv[0], "invalidate_cblocks")) 3078 return process_invalidate_cblocks_message(cache, argc - 1, (const char **) argv + 1); 3079 3080 if (argc != 2) 3081 return -EINVAL; 3082 3083 return set_config_value(cache, argv[0], argv[1]); 3084 } 3085 3086 static int cache_iterate_devices(struct dm_target *ti, 3087 iterate_devices_callout_fn fn, void *data) 3088 { 3089 int r = 0; 3090 struct cache *cache = ti->private; 3091 3092 r = fn(ti, cache->cache_dev, 0, get_dev_size(cache->cache_dev), data); 3093 if (!r) 3094 r = fn(ti, cache->origin_dev, 0, ti->len, data); 3095 3096 return r; 3097 } 3098 3099 /* 3100 * We assume I/O is going to the origin (which is the volume 3101 * more likely to have restrictions e.g. by being striped). 3102 * (Looking up the exact location of the data would be expensive 3103 * and could always be out of date by the time the bio is submitted.) 3104 */ 3105 static int cache_bvec_merge(struct dm_target *ti, 3106 struct bvec_merge_data *bvm, 3107 struct bio_vec *biovec, int max_size) 3108 { 3109 struct cache *cache = ti->private; 3110 struct request_queue *q = bdev_get_queue(cache->origin_dev->bdev); 3111 3112 if (!q->merge_bvec_fn) 3113 return max_size; 3114 3115 bvm->bi_bdev = cache->origin_dev->bdev; 3116 return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); 3117 } 3118 3119 static void set_discard_limits(struct cache *cache, struct queue_limits *limits) 3120 { 3121 /* 3122 * FIXME: these limits may be incompatible with the cache device 3123 */ 3124 limits->max_discard_sectors = cache->discard_block_size * 1024; 3125 limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT; 3126 } 3127 3128 static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) 3129 { 3130 struct cache *cache = ti->private; 3131 uint64_t io_opt_sectors = limits->io_opt >> SECTOR_SHIFT; 3132 3133 /* 3134 * If the system-determined stacked limits are compatible with the 3135 * cache's blocksize (io_opt is a factor) do not override them. 3136 */ 3137 if (io_opt_sectors < cache->sectors_per_block || 3138 do_div(io_opt_sectors, cache->sectors_per_block)) { 3139 blk_limits_io_min(limits, 0); 3140 blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT); 3141 } 3142 set_discard_limits(cache, limits); 3143 } 3144 3145 /*----------------------------------------------------------------*/ 3146 3147 static struct target_type cache_target = { 3148 .name = "cache", 3149 .version = {1, 3, 0}, 3150 .module = THIS_MODULE, 3151 .ctr = cache_ctr, 3152 .dtr = cache_dtr, 3153 .map = cache_map, 3154 .end_io = cache_end_io, 3155 .postsuspend = cache_postsuspend, 3156 .preresume = cache_preresume, 3157 .resume = cache_resume, 3158 .status = cache_status, 3159 .message = cache_message, 3160 .iterate_devices = cache_iterate_devices, 3161 .merge = cache_bvec_merge, 3162 .io_hints = cache_io_hints, 3163 }; 3164 3165 static int __init dm_cache_init(void) 3166 { 3167 int r; 3168 3169 r = dm_register_target(&cache_target); 3170 if (r) { 3171 DMERR("cache target registration failed: %d", r); 3172 return r; 3173 } 3174 3175 migration_cache = KMEM_CACHE(dm_cache_migration, 0); 3176 if (!migration_cache) { 3177 dm_unregister_target(&cache_target); 3178 return -ENOMEM; 3179 } 3180 3181 return 0; 3182 } 3183 3184 static void __exit dm_cache_exit(void) 3185 { 3186 dm_unregister_target(&cache_target); 3187 kmem_cache_destroy(migration_cache); 3188 } 3189 3190 module_init(dm_cache_init); 3191 module_exit(dm_cache_exit); 3192 3193 MODULE_DESCRIPTION(DM_NAME " cache target"); 3194 MODULE_AUTHOR("Joe Thornber <ejt@redhat.com>"); 3195 MODULE_LICENSE("GPL"); 3196