1 /* 2 * Copyright (C) 2012 Red Hat. All rights reserved. 3 * 4 * This file is released under the GPL. 5 */ 6 7 #include "dm.h" 8 #include "dm-bio-prison.h" 9 #include "dm-bio-record.h" 10 #include "dm-cache-metadata.h" 11 12 #include <linux/dm-io.h> 13 #include <linux/dm-kcopyd.h> 14 #include <linux/init.h> 15 #include <linux/mempool.h> 16 #include <linux/module.h> 17 #include <linux/slab.h> 18 #include <linux/vmalloc.h> 19 20 #define DM_MSG_PREFIX "cache" 21 22 DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(cache_copy_throttle, 23 "A percentage of time allocated for copying to and/or from cache"); 24 25 /*----------------------------------------------------------------*/ 26 27 /* 28 * Glossary: 29 * 30 * oblock: index of an origin block 31 * cblock: index of a cache block 32 * promotion: movement of a block from origin to cache 33 * demotion: movement of a block from cache to origin 34 * migration: movement of a block between the origin and cache device, 35 * either direction 36 */ 37 38 /*----------------------------------------------------------------*/ 39 40 static size_t bitset_size_in_bytes(unsigned nr_entries) 41 { 42 return sizeof(unsigned long) * dm_div_up(nr_entries, BITS_PER_LONG); 43 } 44 45 static unsigned long *alloc_bitset(unsigned nr_entries) 46 { 47 size_t s = bitset_size_in_bytes(nr_entries); 48 return vzalloc(s); 49 } 50 51 static void clear_bitset(void *bitset, unsigned nr_entries) 52 { 53 size_t s = bitset_size_in_bytes(nr_entries); 54 memset(bitset, 0, s); 55 } 56 57 static void free_bitset(unsigned long *bits) 58 { 59 vfree(bits); 60 } 61 62 /*----------------------------------------------------------------*/ 63 64 /* 65 * There are a couple of places where we let a bio run, but want to do some 66 * work before calling its endio function. We do this by temporarily 67 * changing the endio fn. 68 */ 69 struct dm_hook_info { 70 bio_end_io_t *bi_end_io; 71 void *bi_private; 72 }; 73 74 static void dm_hook_bio(struct dm_hook_info *h, struct bio *bio, 75 bio_end_io_t *bi_end_io, void *bi_private) 76 { 77 h->bi_end_io = bio->bi_end_io; 78 h->bi_private = bio->bi_private; 79 80 bio->bi_end_io = bi_end_io; 81 bio->bi_private = bi_private; 82 } 83 84 static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio) 85 { 86 bio->bi_end_io = h->bi_end_io; 87 bio->bi_private = h->bi_private; 88 89 /* 90 * Must bump bi_remaining to allow bio to complete with 91 * restored bi_end_io. 92 */ 93 atomic_inc(&bio->bi_remaining); 94 } 95 96 /*----------------------------------------------------------------*/ 97 98 #define PRISON_CELLS 1024 99 #define MIGRATION_POOL_SIZE 128 100 #define COMMIT_PERIOD HZ 101 #define MIGRATION_COUNT_WINDOW 10 102 103 /* 104 * The block size of the device holding cache data must be 105 * between 32KB and 1GB. 106 */ 107 #define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (32 * 1024 >> SECTOR_SHIFT) 108 #define DATA_DEV_BLOCK_SIZE_MAX_SECTORS (1024 * 1024 * 1024 >> SECTOR_SHIFT) 109 110 /* 111 * FIXME: the cache is read/write for the time being. 112 */ 113 enum cache_metadata_mode { 114 CM_WRITE, /* metadata may be changed */ 115 CM_READ_ONLY, /* metadata may not be changed */ 116 }; 117 118 enum cache_io_mode { 119 /* 120 * Data is written to cached blocks only. These blocks are marked 121 * dirty. If you lose the cache device you will lose data. 122 * Potential performance increase for both reads and writes. 123 */ 124 CM_IO_WRITEBACK, 125 126 /* 127 * Data is written to both cache and origin. Blocks are never 128 * dirty. Potential performance benfit for reads only. 129 */ 130 CM_IO_WRITETHROUGH, 131 132 /* 133 * A degraded mode useful for various cache coherency situations 134 * (eg, rolling back snapshots). Reads and writes always go to the 135 * origin. If a write goes to a cached oblock, then the cache 136 * block is invalidated. 137 */ 138 CM_IO_PASSTHROUGH 139 }; 140 141 struct cache_features { 142 enum cache_metadata_mode mode; 143 enum cache_io_mode io_mode; 144 }; 145 146 struct cache_stats { 147 atomic_t read_hit; 148 atomic_t read_miss; 149 atomic_t write_hit; 150 atomic_t write_miss; 151 atomic_t demotion; 152 atomic_t promotion; 153 atomic_t copies_avoided; 154 atomic_t cache_cell_clash; 155 atomic_t commit_count; 156 atomic_t discard_count; 157 }; 158 159 /* 160 * Defines a range of cblocks, begin to (end - 1) are in the range. end is 161 * the one-past-the-end value. 162 */ 163 struct cblock_range { 164 dm_cblock_t begin; 165 dm_cblock_t end; 166 }; 167 168 struct invalidation_request { 169 struct list_head list; 170 struct cblock_range *cblocks; 171 172 atomic_t complete; 173 int err; 174 175 wait_queue_head_t result_wait; 176 }; 177 178 struct cache { 179 struct dm_target *ti; 180 struct dm_target_callbacks callbacks; 181 182 struct dm_cache_metadata *cmd; 183 184 /* 185 * Metadata is written to this device. 186 */ 187 struct dm_dev *metadata_dev; 188 189 /* 190 * The slower of the two data devices. Typically a spindle. 191 */ 192 struct dm_dev *origin_dev; 193 194 /* 195 * The faster of the two data devices. Typically an SSD. 196 */ 197 struct dm_dev *cache_dev; 198 199 /* 200 * Size of the origin device in _complete_ blocks and native sectors. 201 */ 202 dm_oblock_t origin_blocks; 203 sector_t origin_sectors; 204 205 /* 206 * Size of the cache device in blocks. 207 */ 208 dm_cblock_t cache_size; 209 210 /* 211 * Fields for converting from sectors to blocks. 212 */ 213 uint32_t sectors_per_block; 214 int sectors_per_block_shift; 215 216 spinlock_t lock; 217 struct bio_list deferred_bios; 218 struct bio_list deferred_flush_bios; 219 struct bio_list deferred_writethrough_bios; 220 struct list_head quiesced_migrations; 221 struct list_head completed_migrations; 222 struct list_head need_commit_migrations; 223 sector_t migration_threshold; 224 wait_queue_head_t migration_wait; 225 atomic_t nr_migrations; 226 227 wait_queue_head_t quiescing_wait; 228 atomic_t quiescing; 229 atomic_t quiescing_ack; 230 231 /* 232 * cache_size entries, dirty if set 233 */ 234 dm_cblock_t nr_dirty; 235 unsigned long *dirty_bitset; 236 237 /* 238 * origin_blocks entries, discarded if set. 239 */ 240 dm_oblock_t discard_nr_blocks; 241 unsigned long *discard_bitset; 242 243 /* 244 * Rather than reconstructing the table line for the status we just 245 * save it and regurgitate. 246 */ 247 unsigned nr_ctr_args; 248 const char **ctr_args; 249 250 struct dm_kcopyd_client *copier; 251 struct workqueue_struct *wq; 252 struct work_struct worker; 253 254 struct delayed_work waker; 255 unsigned long last_commit_jiffies; 256 257 struct dm_bio_prison *prison; 258 struct dm_deferred_set *all_io_ds; 259 260 mempool_t *migration_pool; 261 struct dm_cache_migration *next_migration; 262 263 struct dm_cache_policy *policy; 264 unsigned policy_nr_args; 265 266 bool need_tick_bio:1; 267 bool sized:1; 268 bool invalidate:1; 269 bool commit_requested:1; 270 bool loaded_mappings:1; 271 bool loaded_discards:1; 272 273 /* 274 * Cache features such as write-through. 275 */ 276 struct cache_features features; 277 278 struct cache_stats stats; 279 280 /* 281 * Invalidation fields. 282 */ 283 spinlock_t invalidation_lock; 284 struct list_head invalidation_requests; 285 }; 286 287 struct per_bio_data { 288 bool tick:1; 289 unsigned req_nr:2; 290 struct dm_deferred_entry *all_io_entry; 291 struct dm_hook_info hook_info; 292 293 /* 294 * writethrough fields. These MUST remain at the end of this 295 * structure and the 'cache' member must be the first as it 296 * is used to determine the offset of the writethrough fields. 297 */ 298 struct cache *cache; 299 dm_cblock_t cblock; 300 struct dm_bio_details bio_details; 301 }; 302 303 struct dm_cache_migration { 304 struct list_head list; 305 struct cache *cache; 306 307 unsigned long start_jiffies; 308 dm_oblock_t old_oblock; 309 dm_oblock_t new_oblock; 310 dm_cblock_t cblock; 311 312 bool err:1; 313 bool writeback:1; 314 bool demote:1; 315 bool promote:1; 316 bool requeue_holder:1; 317 bool invalidate:1; 318 319 struct dm_bio_prison_cell *old_ocell; 320 struct dm_bio_prison_cell *new_ocell; 321 }; 322 323 /* 324 * Processing a bio in the worker thread may require these memory 325 * allocations. We prealloc to avoid deadlocks (the same worker thread 326 * frees them back to the mempool). 327 */ 328 struct prealloc { 329 struct dm_cache_migration *mg; 330 struct dm_bio_prison_cell *cell1; 331 struct dm_bio_prison_cell *cell2; 332 }; 333 334 static void wake_worker(struct cache *cache) 335 { 336 queue_work(cache->wq, &cache->worker); 337 } 338 339 /*----------------------------------------------------------------*/ 340 341 static struct dm_bio_prison_cell *alloc_prison_cell(struct cache *cache) 342 { 343 /* FIXME: change to use a local slab. */ 344 return dm_bio_prison_alloc_cell(cache->prison, GFP_NOWAIT); 345 } 346 347 static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell *cell) 348 { 349 dm_bio_prison_free_cell(cache->prison, cell); 350 } 351 352 static int prealloc_data_structs(struct cache *cache, struct prealloc *p) 353 { 354 if (!p->mg) { 355 p->mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT); 356 if (!p->mg) 357 return -ENOMEM; 358 } 359 360 if (!p->cell1) { 361 p->cell1 = alloc_prison_cell(cache); 362 if (!p->cell1) 363 return -ENOMEM; 364 } 365 366 if (!p->cell2) { 367 p->cell2 = alloc_prison_cell(cache); 368 if (!p->cell2) 369 return -ENOMEM; 370 } 371 372 return 0; 373 } 374 375 static void prealloc_free_structs(struct cache *cache, struct prealloc *p) 376 { 377 if (p->cell2) 378 free_prison_cell(cache, p->cell2); 379 380 if (p->cell1) 381 free_prison_cell(cache, p->cell1); 382 383 if (p->mg) 384 mempool_free(p->mg, cache->migration_pool); 385 } 386 387 static struct dm_cache_migration *prealloc_get_migration(struct prealloc *p) 388 { 389 struct dm_cache_migration *mg = p->mg; 390 391 BUG_ON(!mg); 392 p->mg = NULL; 393 394 return mg; 395 } 396 397 /* 398 * You must have a cell within the prealloc struct to return. If not this 399 * function will BUG() rather than returning NULL. 400 */ 401 static struct dm_bio_prison_cell *prealloc_get_cell(struct prealloc *p) 402 { 403 struct dm_bio_prison_cell *r = NULL; 404 405 if (p->cell1) { 406 r = p->cell1; 407 p->cell1 = NULL; 408 409 } else if (p->cell2) { 410 r = p->cell2; 411 p->cell2 = NULL; 412 } else 413 BUG(); 414 415 return r; 416 } 417 418 /* 419 * You can't have more than two cells in a prealloc struct. BUG() will be 420 * called if you try and overfill. 421 */ 422 static void prealloc_put_cell(struct prealloc *p, struct dm_bio_prison_cell *cell) 423 { 424 if (!p->cell2) 425 p->cell2 = cell; 426 427 else if (!p->cell1) 428 p->cell1 = cell; 429 430 else 431 BUG(); 432 } 433 434 /*----------------------------------------------------------------*/ 435 436 static void build_key(dm_oblock_t oblock, struct dm_cell_key *key) 437 { 438 key->virtual = 0; 439 key->dev = 0; 440 key->block = from_oblock(oblock); 441 } 442 443 /* 444 * The caller hands in a preallocated cell, and a free function for it. 445 * The cell will be freed if there's an error, or if it wasn't used because 446 * a cell with that key already exists. 447 */ 448 typedef void (*cell_free_fn)(void *context, struct dm_bio_prison_cell *cell); 449 450 static int bio_detain(struct cache *cache, dm_oblock_t oblock, 451 struct bio *bio, struct dm_bio_prison_cell *cell_prealloc, 452 cell_free_fn free_fn, void *free_context, 453 struct dm_bio_prison_cell **cell_result) 454 { 455 int r; 456 struct dm_cell_key key; 457 458 build_key(oblock, &key); 459 r = dm_bio_detain(cache->prison, &key, bio, cell_prealloc, cell_result); 460 if (r) 461 free_fn(free_context, cell_prealloc); 462 463 return r; 464 } 465 466 static int get_cell(struct cache *cache, 467 dm_oblock_t oblock, 468 struct prealloc *structs, 469 struct dm_bio_prison_cell **cell_result) 470 { 471 int r; 472 struct dm_cell_key key; 473 struct dm_bio_prison_cell *cell_prealloc; 474 475 cell_prealloc = prealloc_get_cell(structs); 476 477 build_key(oblock, &key); 478 r = dm_get_cell(cache->prison, &key, cell_prealloc, cell_result); 479 if (r) 480 prealloc_put_cell(structs, cell_prealloc); 481 482 return r; 483 } 484 485 /*----------------------------------------------------------------*/ 486 487 static bool is_dirty(struct cache *cache, dm_cblock_t b) 488 { 489 return test_bit(from_cblock(b), cache->dirty_bitset); 490 } 491 492 static void set_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock) 493 { 494 if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset)) { 495 cache->nr_dirty = to_cblock(from_cblock(cache->nr_dirty) + 1); 496 policy_set_dirty(cache->policy, oblock); 497 } 498 } 499 500 static void clear_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock) 501 { 502 if (test_and_clear_bit(from_cblock(cblock), cache->dirty_bitset)) { 503 policy_clear_dirty(cache->policy, oblock); 504 cache->nr_dirty = to_cblock(from_cblock(cache->nr_dirty) - 1); 505 if (!from_cblock(cache->nr_dirty)) 506 dm_table_event(cache->ti->table); 507 } 508 } 509 510 /*----------------------------------------------------------------*/ 511 512 static bool block_size_is_power_of_two(struct cache *cache) 513 { 514 return cache->sectors_per_block_shift >= 0; 515 } 516 517 /* gcc on ARM generates spurious references to __udivdi3 and __umoddi3 */ 518 #if defined(CONFIG_ARM) && __GNUC__ == 4 && __GNUC_MINOR__ <= 6 519 __always_inline 520 #endif 521 static dm_block_t block_div(dm_block_t b, uint32_t n) 522 { 523 do_div(b, n); 524 525 return b; 526 } 527 528 static void set_discard(struct cache *cache, dm_oblock_t b) 529 { 530 unsigned long flags; 531 532 atomic_inc(&cache->stats.discard_count); 533 534 spin_lock_irqsave(&cache->lock, flags); 535 set_bit(from_oblock(b), cache->discard_bitset); 536 spin_unlock_irqrestore(&cache->lock, flags); 537 } 538 539 static void clear_discard(struct cache *cache, dm_oblock_t b) 540 { 541 unsigned long flags; 542 543 spin_lock_irqsave(&cache->lock, flags); 544 clear_bit(from_oblock(b), cache->discard_bitset); 545 spin_unlock_irqrestore(&cache->lock, flags); 546 } 547 548 static bool is_discarded(struct cache *cache, dm_oblock_t b) 549 { 550 int r; 551 unsigned long flags; 552 553 spin_lock_irqsave(&cache->lock, flags); 554 r = test_bit(from_oblock(b), cache->discard_bitset); 555 spin_unlock_irqrestore(&cache->lock, flags); 556 557 return r; 558 } 559 560 static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b) 561 { 562 int r; 563 unsigned long flags; 564 565 spin_lock_irqsave(&cache->lock, flags); 566 r = test_bit(from_oblock(b), cache->discard_bitset); 567 spin_unlock_irqrestore(&cache->lock, flags); 568 569 return r; 570 } 571 572 /*----------------------------------------------------------------*/ 573 574 static void load_stats(struct cache *cache) 575 { 576 struct dm_cache_statistics stats; 577 578 dm_cache_metadata_get_stats(cache->cmd, &stats); 579 atomic_set(&cache->stats.read_hit, stats.read_hits); 580 atomic_set(&cache->stats.read_miss, stats.read_misses); 581 atomic_set(&cache->stats.write_hit, stats.write_hits); 582 atomic_set(&cache->stats.write_miss, stats.write_misses); 583 } 584 585 static void save_stats(struct cache *cache) 586 { 587 struct dm_cache_statistics stats; 588 589 stats.read_hits = atomic_read(&cache->stats.read_hit); 590 stats.read_misses = atomic_read(&cache->stats.read_miss); 591 stats.write_hits = atomic_read(&cache->stats.write_hit); 592 stats.write_misses = atomic_read(&cache->stats.write_miss); 593 594 dm_cache_metadata_set_stats(cache->cmd, &stats); 595 } 596 597 /*---------------------------------------------------------------- 598 * Per bio data 599 *--------------------------------------------------------------*/ 600 601 /* 602 * If using writeback, leave out struct per_bio_data's writethrough fields. 603 */ 604 #define PB_DATA_SIZE_WB (offsetof(struct per_bio_data, cache)) 605 #define PB_DATA_SIZE_WT (sizeof(struct per_bio_data)) 606 607 static bool writethrough_mode(struct cache_features *f) 608 { 609 return f->io_mode == CM_IO_WRITETHROUGH; 610 } 611 612 static bool writeback_mode(struct cache_features *f) 613 { 614 return f->io_mode == CM_IO_WRITEBACK; 615 } 616 617 static bool passthrough_mode(struct cache_features *f) 618 { 619 return f->io_mode == CM_IO_PASSTHROUGH; 620 } 621 622 static size_t get_per_bio_data_size(struct cache *cache) 623 { 624 return writethrough_mode(&cache->features) ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB; 625 } 626 627 static struct per_bio_data *get_per_bio_data(struct bio *bio, size_t data_size) 628 { 629 struct per_bio_data *pb = dm_per_bio_data(bio, data_size); 630 BUG_ON(!pb); 631 return pb; 632 } 633 634 static struct per_bio_data *init_per_bio_data(struct bio *bio, size_t data_size) 635 { 636 struct per_bio_data *pb = get_per_bio_data(bio, data_size); 637 638 pb->tick = false; 639 pb->req_nr = dm_bio_get_target_bio_nr(bio); 640 pb->all_io_entry = NULL; 641 642 return pb; 643 } 644 645 /*---------------------------------------------------------------- 646 * Remapping 647 *--------------------------------------------------------------*/ 648 static void remap_to_origin(struct cache *cache, struct bio *bio) 649 { 650 bio->bi_bdev = cache->origin_dev->bdev; 651 } 652 653 static void remap_to_cache(struct cache *cache, struct bio *bio, 654 dm_cblock_t cblock) 655 { 656 sector_t bi_sector = bio->bi_iter.bi_sector; 657 sector_t block = from_cblock(cblock); 658 659 bio->bi_bdev = cache->cache_dev->bdev; 660 if (!block_size_is_power_of_two(cache)) 661 bio->bi_iter.bi_sector = 662 (block * cache->sectors_per_block) + 663 sector_div(bi_sector, cache->sectors_per_block); 664 else 665 bio->bi_iter.bi_sector = 666 (block << cache->sectors_per_block_shift) | 667 (bi_sector & (cache->sectors_per_block - 1)); 668 } 669 670 static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio) 671 { 672 unsigned long flags; 673 size_t pb_data_size = get_per_bio_data_size(cache); 674 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); 675 676 spin_lock_irqsave(&cache->lock, flags); 677 if (cache->need_tick_bio && 678 !(bio->bi_rw & (REQ_FUA | REQ_FLUSH | REQ_DISCARD))) { 679 pb->tick = true; 680 cache->need_tick_bio = false; 681 } 682 spin_unlock_irqrestore(&cache->lock, flags); 683 } 684 685 static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio, 686 dm_oblock_t oblock) 687 { 688 check_if_tick_bio_needed(cache, bio); 689 remap_to_origin(cache, bio); 690 if (bio_data_dir(bio) == WRITE) 691 clear_discard(cache, oblock); 692 } 693 694 static void remap_to_cache_dirty(struct cache *cache, struct bio *bio, 695 dm_oblock_t oblock, dm_cblock_t cblock) 696 { 697 check_if_tick_bio_needed(cache, bio); 698 remap_to_cache(cache, bio, cblock); 699 if (bio_data_dir(bio) == WRITE) { 700 set_dirty(cache, oblock, cblock); 701 clear_discard(cache, oblock); 702 } 703 } 704 705 static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio) 706 { 707 sector_t block_nr = bio->bi_iter.bi_sector; 708 709 if (!block_size_is_power_of_two(cache)) 710 (void) sector_div(block_nr, cache->sectors_per_block); 711 else 712 block_nr >>= cache->sectors_per_block_shift; 713 714 return to_oblock(block_nr); 715 } 716 717 static int bio_triggers_commit(struct cache *cache, struct bio *bio) 718 { 719 return bio->bi_rw & (REQ_FLUSH | REQ_FUA); 720 } 721 722 static void issue(struct cache *cache, struct bio *bio) 723 { 724 unsigned long flags; 725 726 if (!bio_triggers_commit(cache, bio)) { 727 generic_make_request(bio); 728 return; 729 } 730 731 /* 732 * Batch together any bios that trigger commits and then issue a 733 * single commit for them in do_worker(). 734 */ 735 spin_lock_irqsave(&cache->lock, flags); 736 cache->commit_requested = true; 737 bio_list_add(&cache->deferred_flush_bios, bio); 738 spin_unlock_irqrestore(&cache->lock, flags); 739 } 740 741 static void defer_writethrough_bio(struct cache *cache, struct bio *bio) 742 { 743 unsigned long flags; 744 745 spin_lock_irqsave(&cache->lock, flags); 746 bio_list_add(&cache->deferred_writethrough_bios, bio); 747 spin_unlock_irqrestore(&cache->lock, flags); 748 749 wake_worker(cache); 750 } 751 752 static void writethrough_endio(struct bio *bio, int err) 753 { 754 struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT); 755 756 dm_unhook_bio(&pb->hook_info, bio); 757 758 if (err) { 759 bio_endio(bio, err); 760 return; 761 } 762 763 dm_bio_restore(&pb->bio_details, bio); 764 remap_to_cache(pb->cache, bio, pb->cblock); 765 766 /* 767 * We can't issue this bio directly, since we're in interrupt 768 * context. So it gets put on a bio list for processing by the 769 * worker thread. 770 */ 771 defer_writethrough_bio(pb->cache, bio); 772 } 773 774 /* 775 * When running in writethrough mode we need to send writes to clean blocks 776 * to both the cache and origin devices. In future we'd like to clone the 777 * bio and send them in parallel, but for now we're doing them in 778 * series as this is easier. 779 */ 780 static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio, 781 dm_oblock_t oblock, dm_cblock_t cblock) 782 { 783 struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT); 784 785 pb->cache = cache; 786 pb->cblock = cblock; 787 dm_hook_bio(&pb->hook_info, bio, writethrough_endio, NULL); 788 dm_bio_record(&pb->bio_details, bio); 789 790 remap_to_origin_clear_discard(pb->cache, bio, oblock); 791 } 792 793 /*---------------------------------------------------------------- 794 * Migration processing 795 * 796 * Migration covers moving data from the origin device to the cache, or 797 * vice versa. 798 *--------------------------------------------------------------*/ 799 static void free_migration(struct dm_cache_migration *mg) 800 { 801 mempool_free(mg, mg->cache->migration_pool); 802 } 803 804 static void inc_nr_migrations(struct cache *cache) 805 { 806 atomic_inc(&cache->nr_migrations); 807 } 808 809 static void dec_nr_migrations(struct cache *cache) 810 { 811 atomic_dec(&cache->nr_migrations); 812 813 /* 814 * Wake the worker in case we're suspending the target. 815 */ 816 wake_up(&cache->migration_wait); 817 } 818 819 static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell, 820 bool holder) 821 { 822 (holder ? dm_cell_release : dm_cell_release_no_holder) 823 (cache->prison, cell, &cache->deferred_bios); 824 free_prison_cell(cache, cell); 825 } 826 827 static void cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell, 828 bool holder) 829 { 830 unsigned long flags; 831 832 spin_lock_irqsave(&cache->lock, flags); 833 __cell_defer(cache, cell, holder); 834 spin_unlock_irqrestore(&cache->lock, flags); 835 836 wake_worker(cache); 837 } 838 839 static void cleanup_migration(struct dm_cache_migration *mg) 840 { 841 struct cache *cache = mg->cache; 842 free_migration(mg); 843 dec_nr_migrations(cache); 844 } 845 846 static void migration_failure(struct dm_cache_migration *mg) 847 { 848 struct cache *cache = mg->cache; 849 850 if (mg->writeback) { 851 DMWARN_LIMIT("writeback failed; couldn't copy block"); 852 set_dirty(cache, mg->old_oblock, mg->cblock); 853 cell_defer(cache, mg->old_ocell, false); 854 855 } else if (mg->demote) { 856 DMWARN_LIMIT("demotion failed; couldn't copy block"); 857 policy_force_mapping(cache->policy, mg->new_oblock, mg->old_oblock); 858 859 cell_defer(cache, mg->old_ocell, mg->promote ? false : true); 860 if (mg->promote) 861 cell_defer(cache, mg->new_ocell, true); 862 } else { 863 DMWARN_LIMIT("promotion failed; couldn't copy block"); 864 policy_remove_mapping(cache->policy, mg->new_oblock); 865 cell_defer(cache, mg->new_ocell, true); 866 } 867 868 cleanup_migration(mg); 869 } 870 871 static void migration_success_pre_commit(struct dm_cache_migration *mg) 872 { 873 unsigned long flags; 874 struct cache *cache = mg->cache; 875 876 if (mg->writeback) { 877 cell_defer(cache, mg->old_ocell, false); 878 clear_dirty(cache, mg->old_oblock, mg->cblock); 879 cleanup_migration(mg); 880 return; 881 882 } else if (mg->demote) { 883 if (dm_cache_remove_mapping(cache->cmd, mg->cblock)) { 884 DMWARN_LIMIT("demotion failed; couldn't update on disk metadata"); 885 policy_force_mapping(cache->policy, mg->new_oblock, 886 mg->old_oblock); 887 if (mg->promote) 888 cell_defer(cache, mg->new_ocell, true); 889 cleanup_migration(mg); 890 return; 891 } 892 } else { 893 if (dm_cache_insert_mapping(cache->cmd, mg->cblock, mg->new_oblock)) { 894 DMWARN_LIMIT("promotion failed; couldn't update on disk metadata"); 895 policy_remove_mapping(cache->policy, mg->new_oblock); 896 cleanup_migration(mg); 897 return; 898 } 899 } 900 901 spin_lock_irqsave(&cache->lock, flags); 902 list_add_tail(&mg->list, &cache->need_commit_migrations); 903 cache->commit_requested = true; 904 spin_unlock_irqrestore(&cache->lock, flags); 905 } 906 907 static void migration_success_post_commit(struct dm_cache_migration *mg) 908 { 909 unsigned long flags; 910 struct cache *cache = mg->cache; 911 912 if (mg->writeback) { 913 DMWARN("writeback unexpectedly triggered commit"); 914 return; 915 916 } else if (mg->demote) { 917 cell_defer(cache, mg->old_ocell, mg->promote ? false : true); 918 919 if (mg->promote) { 920 mg->demote = false; 921 922 spin_lock_irqsave(&cache->lock, flags); 923 list_add_tail(&mg->list, &cache->quiesced_migrations); 924 spin_unlock_irqrestore(&cache->lock, flags); 925 926 } else { 927 if (mg->invalidate) 928 policy_remove_mapping(cache->policy, mg->old_oblock); 929 cleanup_migration(mg); 930 } 931 932 } else { 933 if (mg->requeue_holder) 934 cell_defer(cache, mg->new_ocell, true); 935 else { 936 bio_endio(mg->new_ocell->holder, 0); 937 cell_defer(cache, mg->new_ocell, false); 938 } 939 clear_dirty(cache, mg->new_oblock, mg->cblock); 940 cleanup_migration(mg); 941 } 942 } 943 944 static void copy_complete(int read_err, unsigned long write_err, void *context) 945 { 946 unsigned long flags; 947 struct dm_cache_migration *mg = (struct dm_cache_migration *) context; 948 struct cache *cache = mg->cache; 949 950 if (read_err || write_err) 951 mg->err = true; 952 953 spin_lock_irqsave(&cache->lock, flags); 954 list_add_tail(&mg->list, &cache->completed_migrations); 955 spin_unlock_irqrestore(&cache->lock, flags); 956 957 wake_worker(cache); 958 } 959 960 static void issue_copy_real(struct dm_cache_migration *mg) 961 { 962 int r; 963 struct dm_io_region o_region, c_region; 964 struct cache *cache = mg->cache; 965 sector_t cblock = from_cblock(mg->cblock); 966 967 o_region.bdev = cache->origin_dev->bdev; 968 o_region.count = cache->sectors_per_block; 969 970 c_region.bdev = cache->cache_dev->bdev; 971 c_region.sector = cblock * cache->sectors_per_block; 972 c_region.count = cache->sectors_per_block; 973 974 if (mg->writeback || mg->demote) { 975 /* demote */ 976 o_region.sector = from_oblock(mg->old_oblock) * cache->sectors_per_block; 977 r = dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 0, copy_complete, mg); 978 } else { 979 /* promote */ 980 o_region.sector = from_oblock(mg->new_oblock) * cache->sectors_per_block; 981 r = dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, mg); 982 } 983 984 if (r < 0) { 985 DMERR_LIMIT("issuing migration failed"); 986 migration_failure(mg); 987 } 988 } 989 990 static void overwrite_endio(struct bio *bio, int err) 991 { 992 struct dm_cache_migration *mg = bio->bi_private; 993 struct cache *cache = mg->cache; 994 size_t pb_data_size = get_per_bio_data_size(cache); 995 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); 996 unsigned long flags; 997 998 dm_unhook_bio(&pb->hook_info, bio); 999 1000 if (err) 1001 mg->err = true; 1002 1003 mg->requeue_holder = false; 1004 1005 spin_lock_irqsave(&cache->lock, flags); 1006 list_add_tail(&mg->list, &cache->completed_migrations); 1007 spin_unlock_irqrestore(&cache->lock, flags); 1008 1009 wake_worker(cache); 1010 } 1011 1012 static void issue_overwrite(struct dm_cache_migration *mg, struct bio *bio) 1013 { 1014 size_t pb_data_size = get_per_bio_data_size(mg->cache); 1015 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); 1016 1017 dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg); 1018 remap_to_cache_dirty(mg->cache, bio, mg->new_oblock, mg->cblock); 1019 generic_make_request(bio); 1020 } 1021 1022 static bool bio_writes_complete_block(struct cache *cache, struct bio *bio) 1023 { 1024 return (bio_data_dir(bio) == WRITE) && 1025 (bio->bi_iter.bi_size == (cache->sectors_per_block << SECTOR_SHIFT)); 1026 } 1027 1028 static void avoid_copy(struct dm_cache_migration *mg) 1029 { 1030 atomic_inc(&mg->cache->stats.copies_avoided); 1031 migration_success_pre_commit(mg); 1032 } 1033 1034 static void issue_copy(struct dm_cache_migration *mg) 1035 { 1036 bool avoid; 1037 struct cache *cache = mg->cache; 1038 1039 if (mg->writeback || mg->demote) 1040 avoid = !is_dirty(cache, mg->cblock) || 1041 is_discarded_oblock(cache, mg->old_oblock); 1042 else { 1043 struct bio *bio = mg->new_ocell->holder; 1044 1045 avoid = is_discarded_oblock(cache, mg->new_oblock); 1046 1047 if (!avoid && bio_writes_complete_block(cache, bio)) { 1048 issue_overwrite(mg, bio); 1049 return; 1050 } 1051 } 1052 1053 avoid ? avoid_copy(mg) : issue_copy_real(mg); 1054 } 1055 1056 static void complete_migration(struct dm_cache_migration *mg) 1057 { 1058 if (mg->err) 1059 migration_failure(mg); 1060 else 1061 migration_success_pre_commit(mg); 1062 } 1063 1064 static void process_migrations(struct cache *cache, struct list_head *head, 1065 void (*fn)(struct dm_cache_migration *)) 1066 { 1067 unsigned long flags; 1068 struct list_head list; 1069 struct dm_cache_migration *mg, *tmp; 1070 1071 INIT_LIST_HEAD(&list); 1072 spin_lock_irqsave(&cache->lock, flags); 1073 list_splice_init(head, &list); 1074 spin_unlock_irqrestore(&cache->lock, flags); 1075 1076 list_for_each_entry_safe(mg, tmp, &list, list) 1077 fn(mg); 1078 } 1079 1080 static void __queue_quiesced_migration(struct dm_cache_migration *mg) 1081 { 1082 list_add_tail(&mg->list, &mg->cache->quiesced_migrations); 1083 } 1084 1085 static void queue_quiesced_migration(struct dm_cache_migration *mg) 1086 { 1087 unsigned long flags; 1088 struct cache *cache = mg->cache; 1089 1090 spin_lock_irqsave(&cache->lock, flags); 1091 __queue_quiesced_migration(mg); 1092 spin_unlock_irqrestore(&cache->lock, flags); 1093 1094 wake_worker(cache); 1095 } 1096 1097 static void queue_quiesced_migrations(struct cache *cache, struct list_head *work) 1098 { 1099 unsigned long flags; 1100 struct dm_cache_migration *mg, *tmp; 1101 1102 spin_lock_irqsave(&cache->lock, flags); 1103 list_for_each_entry_safe(mg, tmp, work, list) 1104 __queue_quiesced_migration(mg); 1105 spin_unlock_irqrestore(&cache->lock, flags); 1106 1107 wake_worker(cache); 1108 } 1109 1110 static void check_for_quiesced_migrations(struct cache *cache, 1111 struct per_bio_data *pb) 1112 { 1113 struct list_head work; 1114 1115 if (!pb->all_io_entry) 1116 return; 1117 1118 INIT_LIST_HEAD(&work); 1119 if (pb->all_io_entry) 1120 dm_deferred_entry_dec(pb->all_io_entry, &work); 1121 1122 if (!list_empty(&work)) 1123 queue_quiesced_migrations(cache, &work); 1124 } 1125 1126 static void quiesce_migration(struct dm_cache_migration *mg) 1127 { 1128 if (!dm_deferred_set_add_work(mg->cache->all_io_ds, &mg->list)) 1129 queue_quiesced_migration(mg); 1130 } 1131 1132 static void promote(struct cache *cache, struct prealloc *structs, 1133 dm_oblock_t oblock, dm_cblock_t cblock, 1134 struct dm_bio_prison_cell *cell) 1135 { 1136 struct dm_cache_migration *mg = prealloc_get_migration(structs); 1137 1138 mg->err = false; 1139 mg->writeback = false; 1140 mg->demote = false; 1141 mg->promote = true; 1142 mg->requeue_holder = true; 1143 mg->invalidate = false; 1144 mg->cache = cache; 1145 mg->new_oblock = oblock; 1146 mg->cblock = cblock; 1147 mg->old_ocell = NULL; 1148 mg->new_ocell = cell; 1149 mg->start_jiffies = jiffies; 1150 1151 inc_nr_migrations(cache); 1152 quiesce_migration(mg); 1153 } 1154 1155 static void writeback(struct cache *cache, struct prealloc *structs, 1156 dm_oblock_t oblock, dm_cblock_t cblock, 1157 struct dm_bio_prison_cell *cell) 1158 { 1159 struct dm_cache_migration *mg = prealloc_get_migration(structs); 1160 1161 mg->err = false; 1162 mg->writeback = true; 1163 mg->demote = false; 1164 mg->promote = false; 1165 mg->requeue_holder = true; 1166 mg->invalidate = false; 1167 mg->cache = cache; 1168 mg->old_oblock = oblock; 1169 mg->cblock = cblock; 1170 mg->old_ocell = cell; 1171 mg->new_ocell = NULL; 1172 mg->start_jiffies = jiffies; 1173 1174 inc_nr_migrations(cache); 1175 quiesce_migration(mg); 1176 } 1177 1178 static void demote_then_promote(struct cache *cache, struct prealloc *structs, 1179 dm_oblock_t old_oblock, dm_oblock_t new_oblock, 1180 dm_cblock_t cblock, 1181 struct dm_bio_prison_cell *old_ocell, 1182 struct dm_bio_prison_cell *new_ocell) 1183 { 1184 struct dm_cache_migration *mg = prealloc_get_migration(structs); 1185 1186 mg->err = false; 1187 mg->writeback = false; 1188 mg->demote = true; 1189 mg->promote = true; 1190 mg->requeue_holder = true; 1191 mg->invalidate = false; 1192 mg->cache = cache; 1193 mg->old_oblock = old_oblock; 1194 mg->new_oblock = new_oblock; 1195 mg->cblock = cblock; 1196 mg->old_ocell = old_ocell; 1197 mg->new_ocell = new_ocell; 1198 mg->start_jiffies = jiffies; 1199 1200 inc_nr_migrations(cache); 1201 quiesce_migration(mg); 1202 } 1203 1204 /* 1205 * Invalidate a cache entry. No writeback occurs; any changes in the cache 1206 * block are thrown away. 1207 */ 1208 static void invalidate(struct cache *cache, struct prealloc *structs, 1209 dm_oblock_t oblock, dm_cblock_t cblock, 1210 struct dm_bio_prison_cell *cell) 1211 { 1212 struct dm_cache_migration *mg = prealloc_get_migration(structs); 1213 1214 mg->err = false; 1215 mg->writeback = false; 1216 mg->demote = true; 1217 mg->promote = false; 1218 mg->requeue_holder = true; 1219 mg->invalidate = true; 1220 mg->cache = cache; 1221 mg->old_oblock = oblock; 1222 mg->cblock = cblock; 1223 mg->old_ocell = cell; 1224 mg->new_ocell = NULL; 1225 mg->start_jiffies = jiffies; 1226 1227 inc_nr_migrations(cache); 1228 quiesce_migration(mg); 1229 } 1230 1231 /*---------------------------------------------------------------- 1232 * bio processing 1233 *--------------------------------------------------------------*/ 1234 static void defer_bio(struct cache *cache, struct bio *bio) 1235 { 1236 unsigned long flags; 1237 1238 spin_lock_irqsave(&cache->lock, flags); 1239 bio_list_add(&cache->deferred_bios, bio); 1240 spin_unlock_irqrestore(&cache->lock, flags); 1241 1242 wake_worker(cache); 1243 } 1244 1245 static void process_flush_bio(struct cache *cache, struct bio *bio) 1246 { 1247 size_t pb_data_size = get_per_bio_data_size(cache); 1248 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); 1249 1250 BUG_ON(bio->bi_iter.bi_size); 1251 if (!pb->req_nr) 1252 remap_to_origin(cache, bio); 1253 else 1254 remap_to_cache(cache, bio, 0); 1255 1256 issue(cache, bio); 1257 } 1258 1259 /* 1260 * People generally discard large parts of a device, eg, the whole device 1261 * when formatting. Splitting these large discards up into cache block 1262 * sized ios and then quiescing (always neccessary for discard) takes too 1263 * long. 1264 * 1265 * We keep it simple, and allow any size of discard to come in, and just 1266 * mark off blocks on the discard bitset. No passdown occurs! 1267 * 1268 * To implement passdown we need to change the bio_prison such that a cell 1269 * can have a key that spans many blocks. 1270 */ 1271 static void process_discard_bio(struct cache *cache, struct bio *bio) 1272 { 1273 dm_block_t start_block = dm_sector_div_up(bio->bi_iter.bi_sector, 1274 cache->sectors_per_block); 1275 dm_block_t end_block = bio_end_sector(bio); 1276 dm_block_t b; 1277 1278 end_block = block_div(end_block, cache->sectors_per_block); 1279 1280 for (b = start_block; b < end_block; b++) 1281 set_discard(cache, to_oblock(b)); 1282 1283 bio_endio(bio, 0); 1284 } 1285 1286 static bool spare_migration_bandwidth(struct cache *cache) 1287 { 1288 sector_t current_volume = (atomic_read(&cache->nr_migrations) + 1) * 1289 cache->sectors_per_block; 1290 return current_volume < cache->migration_threshold; 1291 } 1292 1293 static void inc_hit_counter(struct cache *cache, struct bio *bio) 1294 { 1295 atomic_inc(bio_data_dir(bio) == READ ? 1296 &cache->stats.read_hit : &cache->stats.write_hit); 1297 } 1298 1299 static void inc_miss_counter(struct cache *cache, struct bio *bio) 1300 { 1301 atomic_inc(bio_data_dir(bio) == READ ? 1302 &cache->stats.read_miss : &cache->stats.write_miss); 1303 } 1304 1305 static void issue_cache_bio(struct cache *cache, struct bio *bio, 1306 struct per_bio_data *pb, 1307 dm_oblock_t oblock, dm_cblock_t cblock) 1308 { 1309 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); 1310 remap_to_cache_dirty(cache, bio, oblock, cblock); 1311 issue(cache, bio); 1312 } 1313 1314 static void process_bio(struct cache *cache, struct prealloc *structs, 1315 struct bio *bio) 1316 { 1317 int r; 1318 bool release_cell = true; 1319 dm_oblock_t block = get_bio_block(cache, bio); 1320 struct dm_bio_prison_cell *cell_prealloc, *old_ocell, *new_ocell; 1321 struct policy_result lookup_result; 1322 size_t pb_data_size = get_per_bio_data_size(cache); 1323 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); 1324 bool discarded_block = is_discarded_oblock(cache, block); 1325 bool passthrough = passthrough_mode(&cache->features); 1326 bool can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache)); 1327 1328 /* 1329 * Check to see if that block is currently migrating. 1330 */ 1331 cell_prealloc = prealloc_get_cell(structs); 1332 r = bio_detain(cache, block, bio, cell_prealloc, 1333 (cell_free_fn) prealloc_put_cell, 1334 structs, &new_ocell); 1335 if (r > 0) 1336 return; 1337 1338 r = policy_map(cache->policy, block, true, can_migrate, discarded_block, 1339 bio, &lookup_result); 1340 1341 if (r == -EWOULDBLOCK) 1342 /* migration has been denied */ 1343 lookup_result.op = POLICY_MISS; 1344 1345 switch (lookup_result.op) { 1346 case POLICY_HIT: 1347 if (passthrough) { 1348 inc_miss_counter(cache, bio); 1349 1350 /* 1351 * Passthrough always maps to the origin, 1352 * invalidating any cache blocks that are written 1353 * to. 1354 */ 1355 1356 if (bio_data_dir(bio) == WRITE) { 1357 atomic_inc(&cache->stats.demotion); 1358 invalidate(cache, structs, block, lookup_result.cblock, new_ocell); 1359 release_cell = false; 1360 1361 } else { 1362 /* FIXME: factor out issue_origin() */ 1363 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); 1364 remap_to_origin_clear_discard(cache, bio, block); 1365 issue(cache, bio); 1366 } 1367 } else { 1368 inc_hit_counter(cache, bio); 1369 1370 if (bio_data_dir(bio) == WRITE && 1371 writethrough_mode(&cache->features) && 1372 !is_dirty(cache, lookup_result.cblock)) { 1373 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); 1374 remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock); 1375 issue(cache, bio); 1376 } else 1377 issue_cache_bio(cache, bio, pb, block, lookup_result.cblock); 1378 } 1379 1380 break; 1381 1382 case POLICY_MISS: 1383 inc_miss_counter(cache, bio); 1384 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); 1385 remap_to_origin_clear_discard(cache, bio, block); 1386 issue(cache, bio); 1387 break; 1388 1389 case POLICY_NEW: 1390 atomic_inc(&cache->stats.promotion); 1391 promote(cache, structs, block, lookup_result.cblock, new_ocell); 1392 release_cell = false; 1393 break; 1394 1395 case POLICY_REPLACE: 1396 cell_prealloc = prealloc_get_cell(structs); 1397 r = bio_detain(cache, lookup_result.old_oblock, bio, cell_prealloc, 1398 (cell_free_fn) prealloc_put_cell, 1399 structs, &old_ocell); 1400 if (r > 0) { 1401 /* 1402 * We have to be careful to avoid lock inversion of 1403 * the cells. So we back off, and wait for the 1404 * old_ocell to become free. 1405 */ 1406 policy_force_mapping(cache->policy, block, 1407 lookup_result.old_oblock); 1408 atomic_inc(&cache->stats.cache_cell_clash); 1409 break; 1410 } 1411 atomic_inc(&cache->stats.demotion); 1412 atomic_inc(&cache->stats.promotion); 1413 1414 demote_then_promote(cache, structs, lookup_result.old_oblock, 1415 block, lookup_result.cblock, 1416 old_ocell, new_ocell); 1417 release_cell = false; 1418 break; 1419 1420 default: 1421 DMERR_LIMIT("%s: erroring bio, unknown policy op: %u", __func__, 1422 (unsigned) lookup_result.op); 1423 bio_io_error(bio); 1424 } 1425 1426 if (release_cell) 1427 cell_defer(cache, new_ocell, false); 1428 } 1429 1430 static int need_commit_due_to_time(struct cache *cache) 1431 { 1432 return jiffies < cache->last_commit_jiffies || 1433 jiffies > cache->last_commit_jiffies + COMMIT_PERIOD; 1434 } 1435 1436 static int commit_if_needed(struct cache *cache) 1437 { 1438 int r = 0; 1439 1440 if ((cache->commit_requested || need_commit_due_to_time(cache)) && 1441 dm_cache_changed_this_transaction(cache->cmd)) { 1442 atomic_inc(&cache->stats.commit_count); 1443 cache->commit_requested = false; 1444 r = dm_cache_commit(cache->cmd, false); 1445 cache->last_commit_jiffies = jiffies; 1446 } 1447 1448 return r; 1449 } 1450 1451 static void process_deferred_bios(struct cache *cache) 1452 { 1453 unsigned long flags; 1454 struct bio_list bios; 1455 struct bio *bio; 1456 struct prealloc structs; 1457 1458 memset(&structs, 0, sizeof(structs)); 1459 bio_list_init(&bios); 1460 1461 spin_lock_irqsave(&cache->lock, flags); 1462 bio_list_merge(&bios, &cache->deferred_bios); 1463 bio_list_init(&cache->deferred_bios); 1464 spin_unlock_irqrestore(&cache->lock, flags); 1465 1466 while (!bio_list_empty(&bios)) { 1467 /* 1468 * If we've got no free migration structs, and processing 1469 * this bio might require one, we pause until there are some 1470 * prepared mappings to process. 1471 */ 1472 if (prealloc_data_structs(cache, &structs)) { 1473 spin_lock_irqsave(&cache->lock, flags); 1474 bio_list_merge(&cache->deferred_bios, &bios); 1475 spin_unlock_irqrestore(&cache->lock, flags); 1476 break; 1477 } 1478 1479 bio = bio_list_pop(&bios); 1480 1481 if (bio->bi_rw & REQ_FLUSH) 1482 process_flush_bio(cache, bio); 1483 else if (bio->bi_rw & REQ_DISCARD) 1484 process_discard_bio(cache, bio); 1485 else 1486 process_bio(cache, &structs, bio); 1487 } 1488 1489 prealloc_free_structs(cache, &structs); 1490 } 1491 1492 static void process_deferred_flush_bios(struct cache *cache, bool submit_bios) 1493 { 1494 unsigned long flags; 1495 struct bio_list bios; 1496 struct bio *bio; 1497 1498 bio_list_init(&bios); 1499 1500 spin_lock_irqsave(&cache->lock, flags); 1501 bio_list_merge(&bios, &cache->deferred_flush_bios); 1502 bio_list_init(&cache->deferred_flush_bios); 1503 spin_unlock_irqrestore(&cache->lock, flags); 1504 1505 while ((bio = bio_list_pop(&bios))) 1506 submit_bios ? generic_make_request(bio) : bio_io_error(bio); 1507 } 1508 1509 static void process_deferred_writethrough_bios(struct cache *cache) 1510 { 1511 unsigned long flags; 1512 struct bio_list bios; 1513 struct bio *bio; 1514 1515 bio_list_init(&bios); 1516 1517 spin_lock_irqsave(&cache->lock, flags); 1518 bio_list_merge(&bios, &cache->deferred_writethrough_bios); 1519 bio_list_init(&cache->deferred_writethrough_bios); 1520 spin_unlock_irqrestore(&cache->lock, flags); 1521 1522 while ((bio = bio_list_pop(&bios))) 1523 generic_make_request(bio); 1524 } 1525 1526 static void writeback_some_dirty_blocks(struct cache *cache) 1527 { 1528 int r = 0; 1529 dm_oblock_t oblock; 1530 dm_cblock_t cblock; 1531 struct prealloc structs; 1532 struct dm_bio_prison_cell *old_ocell; 1533 1534 memset(&structs, 0, sizeof(structs)); 1535 1536 while (spare_migration_bandwidth(cache)) { 1537 if (prealloc_data_structs(cache, &structs)) 1538 break; 1539 1540 r = policy_writeback_work(cache->policy, &oblock, &cblock); 1541 if (r) 1542 break; 1543 1544 r = get_cell(cache, oblock, &structs, &old_ocell); 1545 if (r) { 1546 policy_set_dirty(cache->policy, oblock); 1547 break; 1548 } 1549 1550 writeback(cache, &structs, oblock, cblock, old_ocell); 1551 } 1552 1553 prealloc_free_structs(cache, &structs); 1554 } 1555 1556 /*---------------------------------------------------------------- 1557 * Invalidations. 1558 * Dropping something from the cache *without* writing back. 1559 *--------------------------------------------------------------*/ 1560 1561 static void process_invalidation_request(struct cache *cache, struct invalidation_request *req) 1562 { 1563 int r = 0; 1564 uint64_t begin = from_cblock(req->cblocks->begin); 1565 uint64_t end = from_cblock(req->cblocks->end); 1566 1567 while (begin != end) { 1568 r = policy_remove_cblock(cache->policy, to_cblock(begin)); 1569 if (!r) { 1570 r = dm_cache_remove_mapping(cache->cmd, to_cblock(begin)); 1571 if (r) 1572 break; 1573 1574 } else if (r == -ENODATA) { 1575 /* harmless, already unmapped */ 1576 r = 0; 1577 1578 } else { 1579 DMERR("policy_remove_cblock failed"); 1580 break; 1581 } 1582 1583 begin++; 1584 } 1585 1586 cache->commit_requested = true; 1587 1588 req->err = r; 1589 atomic_set(&req->complete, 1); 1590 1591 wake_up(&req->result_wait); 1592 } 1593 1594 static void process_invalidation_requests(struct cache *cache) 1595 { 1596 struct list_head list; 1597 struct invalidation_request *req, *tmp; 1598 1599 INIT_LIST_HEAD(&list); 1600 spin_lock(&cache->invalidation_lock); 1601 list_splice_init(&cache->invalidation_requests, &list); 1602 spin_unlock(&cache->invalidation_lock); 1603 1604 list_for_each_entry_safe (req, tmp, &list, list) 1605 process_invalidation_request(cache, req); 1606 } 1607 1608 /*---------------------------------------------------------------- 1609 * Main worker loop 1610 *--------------------------------------------------------------*/ 1611 static bool is_quiescing(struct cache *cache) 1612 { 1613 return atomic_read(&cache->quiescing); 1614 } 1615 1616 static void ack_quiescing(struct cache *cache) 1617 { 1618 if (is_quiescing(cache)) { 1619 atomic_inc(&cache->quiescing_ack); 1620 wake_up(&cache->quiescing_wait); 1621 } 1622 } 1623 1624 static void wait_for_quiescing_ack(struct cache *cache) 1625 { 1626 wait_event(cache->quiescing_wait, atomic_read(&cache->quiescing_ack)); 1627 } 1628 1629 static void start_quiescing(struct cache *cache) 1630 { 1631 atomic_inc(&cache->quiescing); 1632 wait_for_quiescing_ack(cache); 1633 } 1634 1635 static void stop_quiescing(struct cache *cache) 1636 { 1637 atomic_set(&cache->quiescing, 0); 1638 atomic_set(&cache->quiescing_ack, 0); 1639 } 1640 1641 static void wait_for_migrations(struct cache *cache) 1642 { 1643 wait_event(cache->migration_wait, !atomic_read(&cache->nr_migrations)); 1644 } 1645 1646 static void stop_worker(struct cache *cache) 1647 { 1648 cancel_delayed_work(&cache->waker); 1649 flush_workqueue(cache->wq); 1650 } 1651 1652 static void requeue_deferred_io(struct cache *cache) 1653 { 1654 struct bio *bio; 1655 struct bio_list bios; 1656 1657 bio_list_init(&bios); 1658 bio_list_merge(&bios, &cache->deferred_bios); 1659 bio_list_init(&cache->deferred_bios); 1660 1661 while ((bio = bio_list_pop(&bios))) 1662 bio_endio(bio, DM_ENDIO_REQUEUE); 1663 } 1664 1665 static int more_work(struct cache *cache) 1666 { 1667 if (is_quiescing(cache)) 1668 return !list_empty(&cache->quiesced_migrations) || 1669 !list_empty(&cache->completed_migrations) || 1670 !list_empty(&cache->need_commit_migrations); 1671 else 1672 return !bio_list_empty(&cache->deferred_bios) || 1673 !bio_list_empty(&cache->deferred_flush_bios) || 1674 !bio_list_empty(&cache->deferred_writethrough_bios) || 1675 !list_empty(&cache->quiesced_migrations) || 1676 !list_empty(&cache->completed_migrations) || 1677 !list_empty(&cache->need_commit_migrations) || 1678 cache->invalidate; 1679 } 1680 1681 static void do_worker(struct work_struct *ws) 1682 { 1683 struct cache *cache = container_of(ws, struct cache, worker); 1684 1685 do { 1686 if (!is_quiescing(cache)) { 1687 writeback_some_dirty_blocks(cache); 1688 process_deferred_writethrough_bios(cache); 1689 process_deferred_bios(cache); 1690 process_invalidation_requests(cache); 1691 } 1692 1693 process_migrations(cache, &cache->quiesced_migrations, issue_copy); 1694 process_migrations(cache, &cache->completed_migrations, complete_migration); 1695 1696 if (commit_if_needed(cache)) { 1697 process_deferred_flush_bios(cache, false); 1698 1699 /* 1700 * FIXME: rollback metadata or just go into a 1701 * failure mode and error everything 1702 */ 1703 } else { 1704 process_deferred_flush_bios(cache, true); 1705 process_migrations(cache, &cache->need_commit_migrations, 1706 migration_success_post_commit); 1707 } 1708 1709 ack_quiescing(cache); 1710 1711 } while (more_work(cache)); 1712 } 1713 1714 /* 1715 * We want to commit periodically so that not too much 1716 * unwritten metadata builds up. 1717 */ 1718 static void do_waker(struct work_struct *ws) 1719 { 1720 struct cache *cache = container_of(to_delayed_work(ws), struct cache, waker); 1721 policy_tick(cache->policy); 1722 wake_worker(cache); 1723 queue_delayed_work(cache->wq, &cache->waker, COMMIT_PERIOD); 1724 } 1725 1726 /*----------------------------------------------------------------*/ 1727 1728 static int is_congested(struct dm_dev *dev, int bdi_bits) 1729 { 1730 struct request_queue *q = bdev_get_queue(dev->bdev); 1731 return bdi_congested(&q->backing_dev_info, bdi_bits); 1732 } 1733 1734 static int cache_is_congested(struct dm_target_callbacks *cb, int bdi_bits) 1735 { 1736 struct cache *cache = container_of(cb, struct cache, callbacks); 1737 1738 return is_congested(cache->origin_dev, bdi_bits) || 1739 is_congested(cache->cache_dev, bdi_bits); 1740 } 1741 1742 /*---------------------------------------------------------------- 1743 * Target methods 1744 *--------------------------------------------------------------*/ 1745 1746 /* 1747 * This function gets called on the error paths of the constructor, so we 1748 * have to cope with a partially initialised struct. 1749 */ 1750 static void destroy(struct cache *cache) 1751 { 1752 unsigned i; 1753 1754 if (cache->next_migration) 1755 mempool_free(cache->next_migration, cache->migration_pool); 1756 1757 if (cache->migration_pool) 1758 mempool_destroy(cache->migration_pool); 1759 1760 if (cache->all_io_ds) 1761 dm_deferred_set_destroy(cache->all_io_ds); 1762 1763 if (cache->prison) 1764 dm_bio_prison_destroy(cache->prison); 1765 1766 if (cache->wq) 1767 destroy_workqueue(cache->wq); 1768 1769 if (cache->dirty_bitset) 1770 free_bitset(cache->dirty_bitset); 1771 1772 if (cache->discard_bitset) 1773 free_bitset(cache->discard_bitset); 1774 1775 if (cache->copier) 1776 dm_kcopyd_client_destroy(cache->copier); 1777 1778 if (cache->cmd) 1779 dm_cache_metadata_close(cache->cmd); 1780 1781 if (cache->metadata_dev) 1782 dm_put_device(cache->ti, cache->metadata_dev); 1783 1784 if (cache->origin_dev) 1785 dm_put_device(cache->ti, cache->origin_dev); 1786 1787 if (cache->cache_dev) 1788 dm_put_device(cache->ti, cache->cache_dev); 1789 1790 if (cache->policy) 1791 dm_cache_policy_destroy(cache->policy); 1792 1793 for (i = 0; i < cache->nr_ctr_args ; i++) 1794 kfree(cache->ctr_args[i]); 1795 kfree(cache->ctr_args); 1796 1797 kfree(cache); 1798 } 1799 1800 static void cache_dtr(struct dm_target *ti) 1801 { 1802 struct cache *cache = ti->private; 1803 1804 destroy(cache); 1805 } 1806 1807 static sector_t get_dev_size(struct dm_dev *dev) 1808 { 1809 return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT; 1810 } 1811 1812 /*----------------------------------------------------------------*/ 1813 1814 /* 1815 * Construct a cache device mapping. 1816 * 1817 * cache <metadata dev> <cache dev> <origin dev> <block size> 1818 * <#feature args> [<feature arg>]* 1819 * <policy> <#policy args> [<policy arg>]* 1820 * 1821 * metadata dev : fast device holding the persistent metadata 1822 * cache dev : fast device holding cached data blocks 1823 * origin dev : slow device holding original data blocks 1824 * block size : cache unit size in sectors 1825 * 1826 * #feature args : number of feature arguments passed 1827 * feature args : writethrough. (The default is writeback.) 1828 * 1829 * policy : the replacement policy to use 1830 * #policy args : an even number of policy arguments corresponding 1831 * to key/value pairs passed to the policy 1832 * policy args : key/value pairs passed to the policy 1833 * E.g. 'sequential_threshold 1024' 1834 * See cache-policies.txt for details. 1835 * 1836 * Optional feature arguments are: 1837 * writethrough : write through caching that prohibits cache block 1838 * content from being different from origin block content. 1839 * Without this argument, the default behaviour is to write 1840 * back cache block contents later for performance reasons, 1841 * so they may differ from the corresponding origin blocks. 1842 */ 1843 struct cache_args { 1844 struct dm_target *ti; 1845 1846 struct dm_dev *metadata_dev; 1847 1848 struct dm_dev *cache_dev; 1849 sector_t cache_sectors; 1850 1851 struct dm_dev *origin_dev; 1852 sector_t origin_sectors; 1853 1854 uint32_t block_size; 1855 1856 const char *policy_name; 1857 int policy_argc; 1858 const char **policy_argv; 1859 1860 struct cache_features features; 1861 }; 1862 1863 static void destroy_cache_args(struct cache_args *ca) 1864 { 1865 if (ca->metadata_dev) 1866 dm_put_device(ca->ti, ca->metadata_dev); 1867 1868 if (ca->cache_dev) 1869 dm_put_device(ca->ti, ca->cache_dev); 1870 1871 if (ca->origin_dev) 1872 dm_put_device(ca->ti, ca->origin_dev); 1873 1874 kfree(ca); 1875 } 1876 1877 static bool at_least_one_arg(struct dm_arg_set *as, char **error) 1878 { 1879 if (!as->argc) { 1880 *error = "Insufficient args"; 1881 return false; 1882 } 1883 1884 return true; 1885 } 1886 1887 static int parse_metadata_dev(struct cache_args *ca, struct dm_arg_set *as, 1888 char **error) 1889 { 1890 int r; 1891 sector_t metadata_dev_size; 1892 char b[BDEVNAME_SIZE]; 1893 1894 if (!at_least_one_arg(as, error)) 1895 return -EINVAL; 1896 1897 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE, 1898 &ca->metadata_dev); 1899 if (r) { 1900 *error = "Error opening metadata device"; 1901 return r; 1902 } 1903 1904 metadata_dev_size = get_dev_size(ca->metadata_dev); 1905 if (metadata_dev_size > DM_CACHE_METADATA_MAX_SECTORS_WARNING) 1906 DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.", 1907 bdevname(ca->metadata_dev->bdev, b), THIN_METADATA_MAX_SECTORS); 1908 1909 return 0; 1910 } 1911 1912 static int parse_cache_dev(struct cache_args *ca, struct dm_arg_set *as, 1913 char **error) 1914 { 1915 int r; 1916 1917 if (!at_least_one_arg(as, error)) 1918 return -EINVAL; 1919 1920 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE, 1921 &ca->cache_dev); 1922 if (r) { 1923 *error = "Error opening cache device"; 1924 return r; 1925 } 1926 ca->cache_sectors = get_dev_size(ca->cache_dev); 1927 1928 return 0; 1929 } 1930 1931 static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as, 1932 char **error) 1933 { 1934 int r; 1935 1936 if (!at_least_one_arg(as, error)) 1937 return -EINVAL; 1938 1939 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE, 1940 &ca->origin_dev); 1941 if (r) { 1942 *error = "Error opening origin device"; 1943 return r; 1944 } 1945 1946 ca->origin_sectors = get_dev_size(ca->origin_dev); 1947 if (ca->ti->len > ca->origin_sectors) { 1948 *error = "Device size larger than cached device"; 1949 return -EINVAL; 1950 } 1951 1952 return 0; 1953 } 1954 1955 static int parse_block_size(struct cache_args *ca, struct dm_arg_set *as, 1956 char **error) 1957 { 1958 unsigned long block_size; 1959 1960 if (!at_least_one_arg(as, error)) 1961 return -EINVAL; 1962 1963 if (kstrtoul(dm_shift_arg(as), 10, &block_size) || !block_size || 1964 block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS || 1965 block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS || 1966 block_size & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) { 1967 *error = "Invalid data block size"; 1968 return -EINVAL; 1969 } 1970 1971 if (block_size > ca->cache_sectors) { 1972 *error = "Data block size is larger than the cache device"; 1973 return -EINVAL; 1974 } 1975 1976 ca->block_size = block_size; 1977 1978 return 0; 1979 } 1980 1981 static void init_features(struct cache_features *cf) 1982 { 1983 cf->mode = CM_WRITE; 1984 cf->io_mode = CM_IO_WRITEBACK; 1985 } 1986 1987 static int parse_features(struct cache_args *ca, struct dm_arg_set *as, 1988 char **error) 1989 { 1990 static struct dm_arg _args[] = { 1991 {0, 1, "Invalid number of cache feature arguments"}, 1992 }; 1993 1994 int r; 1995 unsigned argc; 1996 const char *arg; 1997 struct cache_features *cf = &ca->features; 1998 1999 init_features(cf); 2000 2001 r = dm_read_arg_group(_args, as, &argc, error); 2002 if (r) 2003 return -EINVAL; 2004 2005 while (argc--) { 2006 arg = dm_shift_arg(as); 2007 2008 if (!strcasecmp(arg, "writeback")) 2009 cf->io_mode = CM_IO_WRITEBACK; 2010 2011 else if (!strcasecmp(arg, "writethrough")) 2012 cf->io_mode = CM_IO_WRITETHROUGH; 2013 2014 else if (!strcasecmp(arg, "passthrough")) 2015 cf->io_mode = CM_IO_PASSTHROUGH; 2016 2017 else { 2018 *error = "Unrecognised cache feature requested"; 2019 return -EINVAL; 2020 } 2021 } 2022 2023 return 0; 2024 } 2025 2026 static int parse_policy(struct cache_args *ca, struct dm_arg_set *as, 2027 char **error) 2028 { 2029 static struct dm_arg _args[] = { 2030 {0, 1024, "Invalid number of policy arguments"}, 2031 }; 2032 2033 int r; 2034 2035 if (!at_least_one_arg(as, error)) 2036 return -EINVAL; 2037 2038 ca->policy_name = dm_shift_arg(as); 2039 2040 r = dm_read_arg_group(_args, as, &ca->policy_argc, error); 2041 if (r) 2042 return -EINVAL; 2043 2044 ca->policy_argv = (const char **)as->argv; 2045 dm_consume_args(as, ca->policy_argc); 2046 2047 return 0; 2048 } 2049 2050 static int parse_cache_args(struct cache_args *ca, int argc, char **argv, 2051 char **error) 2052 { 2053 int r; 2054 struct dm_arg_set as; 2055 2056 as.argc = argc; 2057 as.argv = argv; 2058 2059 r = parse_metadata_dev(ca, &as, error); 2060 if (r) 2061 return r; 2062 2063 r = parse_cache_dev(ca, &as, error); 2064 if (r) 2065 return r; 2066 2067 r = parse_origin_dev(ca, &as, error); 2068 if (r) 2069 return r; 2070 2071 r = parse_block_size(ca, &as, error); 2072 if (r) 2073 return r; 2074 2075 r = parse_features(ca, &as, error); 2076 if (r) 2077 return r; 2078 2079 r = parse_policy(ca, &as, error); 2080 if (r) 2081 return r; 2082 2083 return 0; 2084 } 2085 2086 /*----------------------------------------------------------------*/ 2087 2088 static struct kmem_cache *migration_cache; 2089 2090 #define NOT_CORE_OPTION 1 2091 2092 static int process_config_option(struct cache *cache, const char *key, const char *value) 2093 { 2094 unsigned long tmp; 2095 2096 if (!strcasecmp(key, "migration_threshold")) { 2097 if (kstrtoul(value, 10, &tmp)) 2098 return -EINVAL; 2099 2100 cache->migration_threshold = tmp; 2101 return 0; 2102 } 2103 2104 return NOT_CORE_OPTION; 2105 } 2106 2107 static int set_config_value(struct cache *cache, const char *key, const char *value) 2108 { 2109 int r = process_config_option(cache, key, value); 2110 2111 if (r == NOT_CORE_OPTION) 2112 r = policy_set_config_value(cache->policy, key, value); 2113 2114 if (r) 2115 DMWARN("bad config value for %s: %s", key, value); 2116 2117 return r; 2118 } 2119 2120 static int set_config_values(struct cache *cache, int argc, const char **argv) 2121 { 2122 int r = 0; 2123 2124 if (argc & 1) { 2125 DMWARN("Odd number of policy arguments given but they should be <key> <value> pairs."); 2126 return -EINVAL; 2127 } 2128 2129 while (argc) { 2130 r = set_config_value(cache, argv[0], argv[1]); 2131 if (r) 2132 break; 2133 2134 argc -= 2; 2135 argv += 2; 2136 } 2137 2138 return r; 2139 } 2140 2141 static int create_cache_policy(struct cache *cache, struct cache_args *ca, 2142 char **error) 2143 { 2144 struct dm_cache_policy *p = dm_cache_policy_create(ca->policy_name, 2145 cache->cache_size, 2146 cache->origin_sectors, 2147 cache->sectors_per_block); 2148 if (IS_ERR(p)) { 2149 *error = "Error creating cache's policy"; 2150 return PTR_ERR(p); 2151 } 2152 cache->policy = p; 2153 2154 return 0; 2155 } 2156 2157 #define DEFAULT_MIGRATION_THRESHOLD 2048 2158 2159 static int cache_create(struct cache_args *ca, struct cache **result) 2160 { 2161 int r = 0; 2162 char **error = &ca->ti->error; 2163 struct cache *cache; 2164 struct dm_target *ti = ca->ti; 2165 dm_block_t origin_blocks; 2166 struct dm_cache_metadata *cmd; 2167 bool may_format = ca->features.mode == CM_WRITE; 2168 2169 cache = kzalloc(sizeof(*cache), GFP_KERNEL); 2170 if (!cache) 2171 return -ENOMEM; 2172 2173 cache->ti = ca->ti; 2174 ti->private = cache; 2175 ti->num_flush_bios = 2; 2176 ti->flush_supported = true; 2177 2178 ti->num_discard_bios = 1; 2179 ti->discards_supported = true; 2180 ti->discard_zeroes_data_unsupported = true; 2181 2182 cache->features = ca->features; 2183 ti->per_bio_data_size = get_per_bio_data_size(cache); 2184 2185 cache->callbacks.congested_fn = cache_is_congested; 2186 dm_table_add_target_callbacks(ti->table, &cache->callbacks); 2187 2188 cache->metadata_dev = ca->metadata_dev; 2189 cache->origin_dev = ca->origin_dev; 2190 cache->cache_dev = ca->cache_dev; 2191 2192 ca->metadata_dev = ca->origin_dev = ca->cache_dev = NULL; 2193 2194 /* FIXME: factor out this whole section */ 2195 origin_blocks = cache->origin_sectors = ca->origin_sectors; 2196 origin_blocks = block_div(origin_blocks, ca->block_size); 2197 cache->origin_blocks = to_oblock(origin_blocks); 2198 2199 cache->sectors_per_block = ca->block_size; 2200 if (dm_set_target_max_io_len(ti, cache->sectors_per_block)) { 2201 r = -EINVAL; 2202 goto bad; 2203 } 2204 2205 if (ca->block_size & (ca->block_size - 1)) { 2206 dm_block_t cache_size = ca->cache_sectors; 2207 2208 cache->sectors_per_block_shift = -1; 2209 cache_size = block_div(cache_size, ca->block_size); 2210 cache->cache_size = to_cblock(cache_size); 2211 } else { 2212 cache->sectors_per_block_shift = __ffs(ca->block_size); 2213 cache->cache_size = to_cblock(ca->cache_sectors >> cache->sectors_per_block_shift); 2214 } 2215 2216 r = create_cache_policy(cache, ca, error); 2217 if (r) 2218 goto bad; 2219 2220 cache->policy_nr_args = ca->policy_argc; 2221 cache->migration_threshold = DEFAULT_MIGRATION_THRESHOLD; 2222 2223 r = set_config_values(cache, ca->policy_argc, ca->policy_argv); 2224 if (r) { 2225 *error = "Error setting cache policy's config values"; 2226 goto bad; 2227 } 2228 2229 cmd = dm_cache_metadata_open(cache->metadata_dev->bdev, 2230 ca->block_size, may_format, 2231 dm_cache_policy_get_hint_size(cache->policy)); 2232 if (IS_ERR(cmd)) { 2233 *error = "Error creating metadata object"; 2234 r = PTR_ERR(cmd); 2235 goto bad; 2236 } 2237 cache->cmd = cmd; 2238 2239 if (passthrough_mode(&cache->features)) { 2240 bool all_clean; 2241 2242 r = dm_cache_metadata_all_clean(cache->cmd, &all_clean); 2243 if (r) { 2244 *error = "dm_cache_metadata_all_clean() failed"; 2245 goto bad; 2246 } 2247 2248 if (!all_clean) { 2249 *error = "Cannot enter passthrough mode unless all blocks are clean"; 2250 r = -EINVAL; 2251 goto bad; 2252 } 2253 } 2254 2255 spin_lock_init(&cache->lock); 2256 bio_list_init(&cache->deferred_bios); 2257 bio_list_init(&cache->deferred_flush_bios); 2258 bio_list_init(&cache->deferred_writethrough_bios); 2259 INIT_LIST_HEAD(&cache->quiesced_migrations); 2260 INIT_LIST_HEAD(&cache->completed_migrations); 2261 INIT_LIST_HEAD(&cache->need_commit_migrations); 2262 atomic_set(&cache->nr_migrations, 0); 2263 init_waitqueue_head(&cache->migration_wait); 2264 2265 init_waitqueue_head(&cache->quiescing_wait); 2266 atomic_set(&cache->quiescing, 0); 2267 atomic_set(&cache->quiescing_ack, 0); 2268 2269 r = -ENOMEM; 2270 cache->nr_dirty = 0; 2271 cache->dirty_bitset = alloc_bitset(from_cblock(cache->cache_size)); 2272 if (!cache->dirty_bitset) { 2273 *error = "could not allocate dirty bitset"; 2274 goto bad; 2275 } 2276 clear_bitset(cache->dirty_bitset, from_cblock(cache->cache_size)); 2277 2278 cache->discard_nr_blocks = cache->origin_blocks; 2279 cache->discard_bitset = alloc_bitset(from_oblock(cache->discard_nr_blocks)); 2280 if (!cache->discard_bitset) { 2281 *error = "could not allocate discard bitset"; 2282 goto bad; 2283 } 2284 clear_bitset(cache->discard_bitset, from_oblock(cache->discard_nr_blocks)); 2285 2286 cache->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle); 2287 if (IS_ERR(cache->copier)) { 2288 *error = "could not create kcopyd client"; 2289 r = PTR_ERR(cache->copier); 2290 goto bad; 2291 } 2292 2293 cache->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM); 2294 if (!cache->wq) { 2295 *error = "could not create workqueue for metadata object"; 2296 goto bad; 2297 } 2298 INIT_WORK(&cache->worker, do_worker); 2299 INIT_DELAYED_WORK(&cache->waker, do_waker); 2300 cache->last_commit_jiffies = jiffies; 2301 2302 cache->prison = dm_bio_prison_create(PRISON_CELLS); 2303 if (!cache->prison) { 2304 *error = "could not create bio prison"; 2305 goto bad; 2306 } 2307 2308 cache->all_io_ds = dm_deferred_set_create(); 2309 if (!cache->all_io_ds) { 2310 *error = "could not create all_io deferred set"; 2311 goto bad; 2312 } 2313 2314 cache->migration_pool = mempool_create_slab_pool(MIGRATION_POOL_SIZE, 2315 migration_cache); 2316 if (!cache->migration_pool) { 2317 *error = "Error creating cache's migration mempool"; 2318 goto bad; 2319 } 2320 2321 cache->next_migration = NULL; 2322 2323 cache->need_tick_bio = true; 2324 cache->sized = false; 2325 cache->invalidate = false; 2326 cache->commit_requested = false; 2327 cache->loaded_mappings = false; 2328 cache->loaded_discards = false; 2329 2330 load_stats(cache); 2331 2332 atomic_set(&cache->stats.demotion, 0); 2333 atomic_set(&cache->stats.promotion, 0); 2334 atomic_set(&cache->stats.copies_avoided, 0); 2335 atomic_set(&cache->stats.cache_cell_clash, 0); 2336 atomic_set(&cache->stats.commit_count, 0); 2337 atomic_set(&cache->stats.discard_count, 0); 2338 2339 spin_lock_init(&cache->invalidation_lock); 2340 INIT_LIST_HEAD(&cache->invalidation_requests); 2341 2342 *result = cache; 2343 return 0; 2344 2345 bad: 2346 destroy(cache); 2347 return r; 2348 } 2349 2350 static int copy_ctr_args(struct cache *cache, int argc, const char **argv) 2351 { 2352 unsigned i; 2353 const char **copy; 2354 2355 copy = kcalloc(argc, sizeof(*copy), GFP_KERNEL); 2356 if (!copy) 2357 return -ENOMEM; 2358 for (i = 0; i < argc; i++) { 2359 copy[i] = kstrdup(argv[i], GFP_KERNEL); 2360 if (!copy[i]) { 2361 while (i--) 2362 kfree(copy[i]); 2363 kfree(copy); 2364 return -ENOMEM; 2365 } 2366 } 2367 2368 cache->nr_ctr_args = argc; 2369 cache->ctr_args = copy; 2370 2371 return 0; 2372 } 2373 2374 static int cache_ctr(struct dm_target *ti, unsigned argc, char **argv) 2375 { 2376 int r = -EINVAL; 2377 struct cache_args *ca; 2378 struct cache *cache = NULL; 2379 2380 ca = kzalloc(sizeof(*ca), GFP_KERNEL); 2381 if (!ca) { 2382 ti->error = "Error allocating memory for cache"; 2383 return -ENOMEM; 2384 } 2385 ca->ti = ti; 2386 2387 r = parse_cache_args(ca, argc, argv, &ti->error); 2388 if (r) 2389 goto out; 2390 2391 r = cache_create(ca, &cache); 2392 if (r) 2393 goto out; 2394 2395 r = copy_ctr_args(cache, argc - 3, (const char **)argv + 3); 2396 if (r) { 2397 destroy(cache); 2398 goto out; 2399 } 2400 2401 ti->private = cache; 2402 2403 out: 2404 destroy_cache_args(ca); 2405 return r; 2406 } 2407 2408 static int cache_map(struct dm_target *ti, struct bio *bio) 2409 { 2410 struct cache *cache = ti->private; 2411 2412 int r; 2413 dm_oblock_t block = get_bio_block(cache, bio); 2414 size_t pb_data_size = get_per_bio_data_size(cache); 2415 bool can_migrate = false; 2416 bool discarded_block; 2417 struct dm_bio_prison_cell *cell; 2418 struct policy_result lookup_result; 2419 struct per_bio_data *pb = init_per_bio_data(bio, pb_data_size); 2420 2421 if (unlikely(from_oblock(block) >= from_oblock(cache->origin_blocks))) { 2422 /* 2423 * This can only occur if the io goes to a partial block at 2424 * the end of the origin device. We don't cache these. 2425 * Just remap to the origin and carry on. 2426 */ 2427 remap_to_origin(cache, bio); 2428 return DM_MAPIO_REMAPPED; 2429 } 2430 2431 if (bio->bi_rw & (REQ_FLUSH | REQ_FUA | REQ_DISCARD)) { 2432 defer_bio(cache, bio); 2433 return DM_MAPIO_SUBMITTED; 2434 } 2435 2436 /* 2437 * Check to see if that block is currently migrating. 2438 */ 2439 cell = alloc_prison_cell(cache); 2440 if (!cell) { 2441 defer_bio(cache, bio); 2442 return DM_MAPIO_SUBMITTED; 2443 } 2444 2445 r = bio_detain(cache, block, bio, cell, 2446 (cell_free_fn) free_prison_cell, 2447 cache, &cell); 2448 if (r) { 2449 if (r < 0) 2450 defer_bio(cache, bio); 2451 2452 return DM_MAPIO_SUBMITTED; 2453 } 2454 2455 discarded_block = is_discarded_oblock(cache, block); 2456 2457 r = policy_map(cache->policy, block, false, can_migrate, discarded_block, 2458 bio, &lookup_result); 2459 if (r == -EWOULDBLOCK) { 2460 cell_defer(cache, cell, true); 2461 return DM_MAPIO_SUBMITTED; 2462 2463 } else if (r) { 2464 DMERR_LIMIT("Unexpected return from cache replacement policy: %d", r); 2465 bio_io_error(bio); 2466 return DM_MAPIO_SUBMITTED; 2467 } 2468 2469 r = DM_MAPIO_REMAPPED; 2470 switch (lookup_result.op) { 2471 case POLICY_HIT: 2472 if (passthrough_mode(&cache->features)) { 2473 if (bio_data_dir(bio) == WRITE) { 2474 /* 2475 * We need to invalidate this block, so 2476 * defer for the worker thread. 2477 */ 2478 cell_defer(cache, cell, true); 2479 r = DM_MAPIO_SUBMITTED; 2480 2481 } else { 2482 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); 2483 inc_miss_counter(cache, bio); 2484 remap_to_origin_clear_discard(cache, bio, block); 2485 2486 cell_defer(cache, cell, false); 2487 } 2488 2489 } else { 2490 inc_hit_counter(cache, bio); 2491 2492 if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) && 2493 !is_dirty(cache, lookup_result.cblock)) 2494 remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock); 2495 else 2496 remap_to_cache_dirty(cache, bio, block, lookup_result.cblock); 2497 2498 cell_defer(cache, cell, false); 2499 } 2500 break; 2501 2502 case POLICY_MISS: 2503 inc_miss_counter(cache, bio); 2504 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); 2505 2506 if (pb->req_nr != 0) { 2507 /* 2508 * This is a duplicate writethrough io that is no 2509 * longer needed because the block has been demoted. 2510 */ 2511 bio_endio(bio, 0); 2512 cell_defer(cache, cell, false); 2513 return DM_MAPIO_SUBMITTED; 2514 } else { 2515 remap_to_origin_clear_discard(cache, bio, block); 2516 cell_defer(cache, cell, false); 2517 } 2518 break; 2519 2520 default: 2521 DMERR_LIMIT("%s: erroring bio: unknown policy op: %u", __func__, 2522 (unsigned) lookup_result.op); 2523 bio_io_error(bio); 2524 r = DM_MAPIO_SUBMITTED; 2525 } 2526 2527 return r; 2528 } 2529 2530 static int cache_end_io(struct dm_target *ti, struct bio *bio, int error) 2531 { 2532 struct cache *cache = ti->private; 2533 unsigned long flags; 2534 size_t pb_data_size = get_per_bio_data_size(cache); 2535 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); 2536 2537 if (pb->tick) { 2538 policy_tick(cache->policy); 2539 2540 spin_lock_irqsave(&cache->lock, flags); 2541 cache->need_tick_bio = true; 2542 spin_unlock_irqrestore(&cache->lock, flags); 2543 } 2544 2545 check_for_quiesced_migrations(cache, pb); 2546 2547 return 0; 2548 } 2549 2550 static int write_dirty_bitset(struct cache *cache) 2551 { 2552 unsigned i, r; 2553 2554 for (i = 0; i < from_cblock(cache->cache_size); i++) { 2555 r = dm_cache_set_dirty(cache->cmd, to_cblock(i), 2556 is_dirty(cache, to_cblock(i))); 2557 if (r) 2558 return r; 2559 } 2560 2561 return 0; 2562 } 2563 2564 static int write_discard_bitset(struct cache *cache) 2565 { 2566 unsigned i, r; 2567 2568 r = dm_cache_discard_bitset_resize(cache->cmd, cache->sectors_per_block, 2569 cache->origin_blocks); 2570 if (r) { 2571 DMERR("could not resize on-disk discard bitset"); 2572 return r; 2573 } 2574 2575 for (i = 0; i < from_oblock(cache->discard_nr_blocks); i++) { 2576 r = dm_cache_set_discard(cache->cmd, to_oblock(i), 2577 is_discarded(cache, to_oblock(i))); 2578 if (r) 2579 return r; 2580 } 2581 2582 return 0; 2583 } 2584 2585 /* 2586 * returns true on success 2587 */ 2588 static bool sync_metadata(struct cache *cache) 2589 { 2590 int r1, r2, r3, r4; 2591 2592 r1 = write_dirty_bitset(cache); 2593 if (r1) 2594 DMERR("could not write dirty bitset"); 2595 2596 r2 = write_discard_bitset(cache); 2597 if (r2) 2598 DMERR("could not write discard bitset"); 2599 2600 save_stats(cache); 2601 2602 r3 = dm_cache_write_hints(cache->cmd, cache->policy); 2603 if (r3) 2604 DMERR("could not write hints"); 2605 2606 /* 2607 * If writing the above metadata failed, we still commit, but don't 2608 * set the clean shutdown flag. This will effectively force every 2609 * dirty bit to be set on reload. 2610 */ 2611 r4 = dm_cache_commit(cache->cmd, !r1 && !r2 && !r3); 2612 if (r4) 2613 DMERR("could not write cache metadata. Data loss may occur."); 2614 2615 return !r1 && !r2 && !r3 && !r4; 2616 } 2617 2618 static void cache_postsuspend(struct dm_target *ti) 2619 { 2620 struct cache *cache = ti->private; 2621 2622 start_quiescing(cache); 2623 wait_for_migrations(cache); 2624 stop_worker(cache); 2625 requeue_deferred_io(cache); 2626 stop_quiescing(cache); 2627 2628 (void) sync_metadata(cache); 2629 } 2630 2631 static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock, 2632 bool dirty, uint32_t hint, bool hint_valid) 2633 { 2634 int r; 2635 struct cache *cache = context; 2636 2637 r = policy_load_mapping(cache->policy, oblock, cblock, hint, hint_valid); 2638 if (r) 2639 return r; 2640 2641 if (dirty) 2642 set_dirty(cache, oblock, cblock); 2643 else 2644 clear_dirty(cache, oblock, cblock); 2645 2646 return 0; 2647 } 2648 2649 static int load_discard(void *context, sector_t discard_block_size, 2650 dm_oblock_t oblock, bool discard) 2651 { 2652 struct cache *cache = context; 2653 2654 if (discard) 2655 set_discard(cache, oblock); 2656 else 2657 clear_discard(cache, oblock); 2658 2659 return 0; 2660 } 2661 2662 static dm_cblock_t get_cache_dev_size(struct cache *cache) 2663 { 2664 sector_t size = get_dev_size(cache->cache_dev); 2665 (void) sector_div(size, cache->sectors_per_block); 2666 return to_cblock(size); 2667 } 2668 2669 static bool can_resize(struct cache *cache, dm_cblock_t new_size) 2670 { 2671 if (from_cblock(new_size) > from_cblock(cache->cache_size)) 2672 return true; 2673 2674 /* 2675 * We can't drop a dirty block when shrinking the cache. 2676 */ 2677 while (from_cblock(new_size) < from_cblock(cache->cache_size)) { 2678 new_size = to_cblock(from_cblock(new_size) + 1); 2679 if (is_dirty(cache, new_size)) { 2680 DMERR("unable to shrink cache; cache block %llu is dirty", 2681 (unsigned long long) from_cblock(new_size)); 2682 return false; 2683 } 2684 } 2685 2686 return true; 2687 } 2688 2689 static int resize_cache_dev(struct cache *cache, dm_cblock_t new_size) 2690 { 2691 int r; 2692 2693 r = dm_cache_resize(cache->cmd, new_size); 2694 if (r) { 2695 DMERR("could not resize cache metadata"); 2696 return r; 2697 } 2698 2699 cache->cache_size = new_size; 2700 2701 return 0; 2702 } 2703 2704 static int cache_preresume(struct dm_target *ti) 2705 { 2706 int r = 0; 2707 struct cache *cache = ti->private; 2708 dm_cblock_t csize = get_cache_dev_size(cache); 2709 2710 /* 2711 * Check to see if the cache has resized. 2712 */ 2713 if (!cache->sized) { 2714 r = resize_cache_dev(cache, csize); 2715 if (r) 2716 return r; 2717 2718 cache->sized = true; 2719 2720 } else if (csize != cache->cache_size) { 2721 if (!can_resize(cache, csize)) 2722 return -EINVAL; 2723 2724 r = resize_cache_dev(cache, csize); 2725 if (r) 2726 return r; 2727 } 2728 2729 if (!cache->loaded_mappings) { 2730 r = dm_cache_load_mappings(cache->cmd, cache->policy, 2731 load_mapping, cache); 2732 if (r) { 2733 DMERR("could not load cache mappings"); 2734 return r; 2735 } 2736 2737 cache->loaded_mappings = true; 2738 } 2739 2740 if (!cache->loaded_discards) { 2741 r = dm_cache_load_discards(cache->cmd, load_discard, cache); 2742 if (r) { 2743 DMERR("could not load origin discards"); 2744 return r; 2745 } 2746 2747 cache->loaded_discards = true; 2748 } 2749 2750 return r; 2751 } 2752 2753 static void cache_resume(struct dm_target *ti) 2754 { 2755 struct cache *cache = ti->private; 2756 2757 cache->need_tick_bio = true; 2758 do_waker(&cache->waker.work); 2759 } 2760 2761 /* 2762 * Status format: 2763 * 2764 * <metadata block size> <#used metadata blocks>/<#total metadata blocks> 2765 * <cache block size> <#used cache blocks>/<#total cache blocks> 2766 * <#read hits> <#read misses> <#write hits> <#write misses> 2767 * <#demotions> <#promotions> <#dirty> 2768 * <#features> <features>* 2769 * <#core args> <core args> 2770 * <policy name> <#policy args> <policy args>* 2771 */ 2772 static void cache_status(struct dm_target *ti, status_type_t type, 2773 unsigned status_flags, char *result, unsigned maxlen) 2774 { 2775 int r = 0; 2776 unsigned i; 2777 ssize_t sz = 0; 2778 dm_block_t nr_free_blocks_metadata = 0; 2779 dm_block_t nr_blocks_metadata = 0; 2780 char buf[BDEVNAME_SIZE]; 2781 struct cache *cache = ti->private; 2782 dm_cblock_t residency; 2783 2784 switch (type) { 2785 case STATUSTYPE_INFO: 2786 /* Commit to ensure statistics aren't out-of-date */ 2787 if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti)) { 2788 r = dm_cache_commit(cache->cmd, false); 2789 if (r) 2790 DMERR("could not commit metadata for accurate status"); 2791 } 2792 2793 r = dm_cache_get_free_metadata_block_count(cache->cmd, 2794 &nr_free_blocks_metadata); 2795 if (r) { 2796 DMERR("could not get metadata free block count"); 2797 goto err; 2798 } 2799 2800 r = dm_cache_get_metadata_dev_size(cache->cmd, &nr_blocks_metadata); 2801 if (r) { 2802 DMERR("could not get metadata device size"); 2803 goto err; 2804 } 2805 2806 residency = policy_residency(cache->policy); 2807 2808 DMEMIT("%u %llu/%llu %u %llu/%llu %u %u %u %u %u %u %llu ", 2809 (unsigned)(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT), 2810 (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata), 2811 (unsigned long long)nr_blocks_metadata, 2812 cache->sectors_per_block, 2813 (unsigned long long) from_cblock(residency), 2814 (unsigned long long) from_cblock(cache->cache_size), 2815 (unsigned) atomic_read(&cache->stats.read_hit), 2816 (unsigned) atomic_read(&cache->stats.read_miss), 2817 (unsigned) atomic_read(&cache->stats.write_hit), 2818 (unsigned) atomic_read(&cache->stats.write_miss), 2819 (unsigned) atomic_read(&cache->stats.demotion), 2820 (unsigned) atomic_read(&cache->stats.promotion), 2821 (unsigned long long) from_cblock(cache->nr_dirty)); 2822 2823 if (writethrough_mode(&cache->features)) 2824 DMEMIT("1 writethrough "); 2825 2826 else if (passthrough_mode(&cache->features)) 2827 DMEMIT("1 passthrough "); 2828 2829 else if (writeback_mode(&cache->features)) 2830 DMEMIT("1 writeback "); 2831 2832 else { 2833 DMERR("internal error: unknown io mode: %d", (int) cache->features.io_mode); 2834 goto err; 2835 } 2836 2837 DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold); 2838 2839 DMEMIT("%s ", dm_cache_policy_get_name(cache->policy)); 2840 if (sz < maxlen) { 2841 r = policy_emit_config_values(cache->policy, result + sz, maxlen - sz); 2842 if (r) 2843 DMERR("policy_emit_config_values returned %d", r); 2844 } 2845 2846 break; 2847 2848 case STATUSTYPE_TABLE: 2849 format_dev_t(buf, cache->metadata_dev->bdev->bd_dev); 2850 DMEMIT("%s ", buf); 2851 format_dev_t(buf, cache->cache_dev->bdev->bd_dev); 2852 DMEMIT("%s ", buf); 2853 format_dev_t(buf, cache->origin_dev->bdev->bd_dev); 2854 DMEMIT("%s", buf); 2855 2856 for (i = 0; i < cache->nr_ctr_args - 1; i++) 2857 DMEMIT(" %s", cache->ctr_args[i]); 2858 if (cache->nr_ctr_args) 2859 DMEMIT(" %s", cache->ctr_args[cache->nr_ctr_args - 1]); 2860 } 2861 2862 return; 2863 2864 err: 2865 DMEMIT("Error"); 2866 } 2867 2868 /* 2869 * A cache block range can take two forms: 2870 * 2871 * i) A single cblock, eg. '3456' 2872 * ii) A begin and end cblock with dots between, eg. 123-234 2873 */ 2874 static int parse_cblock_range(struct cache *cache, const char *str, 2875 struct cblock_range *result) 2876 { 2877 char dummy; 2878 uint64_t b, e; 2879 int r; 2880 2881 /* 2882 * Try and parse form (ii) first. 2883 */ 2884 r = sscanf(str, "%llu-%llu%c", &b, &e, &dummy); 2885 if (r < 0) 2886 return r; 2887 2888 if (r == 2) { 2889 result->begin = to_cblock(b); 2890 result->end = to_cblock(e); 2891 return 0; 2892 } 2893 2894 /* 2895 * That didn't work, try form (i). 2896 */ 2897 r = sscanf(str, "%llu%c", &b, &dummy); 2898 if (r < 0) 2899 return r; 2900 2901 if (r == 1) { 2902 result->begin = to_cblock(b); 2903 result->end = to_cblock(from_cblock(result->begin) + 1u); 2904 return 0; 2905 } 2906 2907 DMERR("invalid cblock range '%s'", str); 2908 return -EINVAL; 2909 } 2910 2911 static int validate_cblock_range(struct cache *cache, struct cblock_range *range) 2912 { 2913 uint64_t b = from_cblock(range->begin); 2914 uint64_t e = from_cblock(range->end); 2915 uint64_t n = from_cblock(cache->cache_size); 2916 2917 if (b >= n) { 2918 DMERR("begin cblock out of range: %llu >= %llu", b, n); 2919 return -EINVAL; 2920 } 2921 2922 if (e > n) { 2923 DMERR("end cblock out of range: %llu > %llu", e, n); 2924 return -EINVAL; 2925 } 2926 2927 if (b >= e) { 2928 DMERR("invalid cblock range: %llu >= %llu", b, e); 2929 return -EINVAL; 2930 } 2931 2932 return 0; 2933 } 2934 2935 static int request_invalidation(struct cache *cache, struct cblock_range *range) 2936 { 2937 struct invalidation_request req; 2938 2939 INIT_LIST_HEAD(&req.list); 2940 req.cblocks = range; 2941 atomic_set(&req.complete, 0); 2942 req.err = 0; 2943 init_waitqueue_head(&req.result_wait); 2944 2945 spin_lock(&cache->invalidation_lock); 2946 list_add(&req.list, &cache->invalidation_requests); 2947 spin_unlock(&cache->invalidation_lock); 2948 wake_worker(cache); 2949 2950 wait_event(req.result_wait, atomic_read(&req.complete)); 2951 return req.err; 2952 } 2953 2954 static int process_invalidate_cblocks_message(struct cache *cache, unsigned count, 2955 const char **cblock_ranges) 2956 { 2957 int r = 0; 2958 unsigned i; 2959 struct cblock_range range; 2960 2961 if (!passthrough_mode(&cache->features)) { 2962 DMERR("cache has to be in passthrough mode for invalidation"); 2963 return -EPERM; 2964 } 2965 2966 for (i = 0; i < count; i++) { 2967 r = parse_cblock_range(cache, cblock_ranges[i], &range); 2968 if (r) 2969 break; 2970 2971 r = validate_cblock_range(cache, &range); 2972 if (r) 2973 break; 2974 2975 /* 2976 * Pass begin and end origin blocks to the worker and wake it. 2977 */ 2978 r = request_invalidation(cache, &range); 2979 if (r) 2980 break; 2981 } 2982 2983 return r; 2984 } 2985 2986 /* 2987 * Supports 2988 * "<key> <value>" 2989 * and 2990 * "invalidate_cblocks [(<begin>)|(<begin>-<end>)]* 2991 * 2992 * The key migration_threshold is supported by the cache target core. 2993 */ 2994 static int cache_message(struct dm_target *ti, unsigned argc, char **argv) 2995 { 2996 struct cache *cache = ti->private; 2997 2998 if (!argc) 2999 return -EINVAL; 3000 3001 if (!strcasecmp(argv[0], "invalidate_cblocks")) 3002 return process_invalidate_cblocks_message(cache, argc - 1, (const char **) argv + 1); 3003 3004 if (argc != 2) 3005 return -EINVAL; 3006 3007 return set_config_value(cache, argv[0], argv[1]); 3008 } 3009 3010 static int cache_iterate_devices(struct dm_target *ti, 3011 iterate_devices_callout_fn fn, void *data) 3012 { 3013 int r = 0; 3014 struct cache *cache = ti->private; 3015 3016 r = fn(ti, cache->cache_dev, 0, get_dev_size(cache->cache_dev), data); 3017 if (!r) 3018 r = fn(ti, cache->origin_dev, 0, ti->len, data); 3019 3020 return r; 3021 } 3022 3023 /* 3024 * We assume I/O is going to the origin (which is the volume 3025 * more likely to have restrictions e.g. by being striped). 3026 * (Looking up the exact location of the data would be expensive 3027 * and could always be out of date by the time the bio is submitted.) 3028 */ 3029 static int cache_bvec_merge(struct dm_target *ti, 3030 struct bvec_merge_data *bvm, 3031 struct bio_vec *biovec, int max_size) 3032 { 3033 struct cache *cache = ti->private; 3034 struct request_queue *q = bdev_get_queue(cache->origin_dev->bdev); 3035 3036 if (!q->merge_bvec_fn) 3037 return max_size; 3038 3039 bvm->bi_bdev = cache->origin_dev->bdev; 3040 return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); 3041 } 3042 3043 static void set_discard_limits(struct cache *cache, struct queue_limits *limits) 3044 { 3045 /* 3046 * FIXME: these limits may be incompatible with the cache device 3047 */ 3048 limits->max_discard_sectors = cache->sectors_per_block; 3049 limits->discard_granularity = cache->sectors_per_block << SECTOR_SHIFT; 3050 } 3051 3052 static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) 3053 { 3054 struct cache *cache = ti->private; 3055 uint64_t io_opt_sectors = limits->io_opt >> SECTOR_SHIFT; 3056 3057 /* 3058 * If the system-determined stacked limits are compatible with the 3059 * cache's blocksize (io_opt is a factor) do not override them. 3060 */ 3061 if (io_opt_sectors < cache->sectors_per_block || 3062 do_div(io_opt_sectors, cache->sectors_per_block)) { 3063 blk_limits_io_min(limits, 0); 3064 blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT); 3065 } 3066 set_discard_limits(cache, limits); 3067 } 3068 3069 /*----------------------------------------------------------------*/ 3070 3071 static struct target_type cache_target = { 3072 .name = "cache", 3073 .version = {1, 4, 0}, 3074 .module = THIS_MODULE, 3075 .ctr = cache_ctr, 3076 .dtr = cache_dtr, 3077 .map = cache_map, 3078 .end_io = cache_end_io, 3079 .postsuspend = cache_postsuspend, 3080 .preresume = cache_preresume, 3081 .resume = cache_resume, 3082 .status = cache_status, 3083 .message = cache_message, 3084 .iterate_devices = cache_iterate_devices, 3085 .merge = cache_bvec_merge, 3086 .io_hints = cache_io_hints, 3087 }; 3088 3089 static int __init dm_cache_init(void) 3090 { 3091 int r; 3092 3093 r = dm_register_target(&cache_target); 3094 if (r) { 3095 DMERR("cache target registration failed: %d", r); 3096 return r; 3097 } 3098 3099 migration_cache = KMEM_CACHE(dm_cache_migration, 0); 3100 if (!migration_cache) { 3101 dm_unregister_target(&cache_target); 3102 return -ENOMEM; 3103 } 3104 3105 return 0; 3106 } 3107 3108 static void __exit dm_cache_exit(void) 3109 { 3110 dm_unregister_target(&cache_target); 3111 kmem_cache_destroy(migration_cache); 3112 } 3113 3114 module_init(dm_cache_init); 3115 module_exit(dm_cache_exit); 3116 3117 MODULE_DESCRIPTION(DM_NAME " cache target"); 3118 MODULE_AUTHOR("Joe Thornber <ejt@redhat.com>"); 3119 MODULE_LICENSE("GPL"); 3120