13bd94003SHeinz Mauelshagen // SPDX-License-Identifier: GPL-2.0-only 22d1e580aSAlasdair G Kergon /* 32d1e580aSAlasdair G Kergon * Copyright (C) 2002 Sistina Software (UK) Limited. 42d1e580aSAlasdair G Kergon * Copyright (C) 2006 Red Hat GmbH 52d1e580aSAlasdair G Kergon * 62d1e580aSAlasdair G Kergon * This file is released under the GPL. 72d1e580aSAlasdair G Kergon * 82d1e580aSAlasdair G Kergon * Kcopyd provides a simple interface for copying an area of one 92d1e580aSAlasdair G Kergon * block-device to one or more other block-devices, with an asynchronous 102d1e580aSAlasdair G Kergon * completion notification. 112d1e580aSAlasdair G Kergon */ 122d1e580aSAlasdair G Kergon 132d1e580aSAlasdair G Kergon #include <linux/types.h> 1460063497SArun Sharma #include <linux/atomic.h> 152d1e580aSAlasdair G Kergon #include <linux/blkdev.h> 162d1e580aSAlasdair G Kergon #include <linux/fs.h> 172d1e580aSAlasdair G Kergon #include <linux/init.h> 182d1e580aSAlasdair G Kergon #include <linux/list.h> 192d1e580aSAlasdair G Kergon #include <linux/mempool.h> 202d1e580aSAlasdair G Kergon #include <linux/module.h> 212d1e580aSAlasdair G Kergon #include <linux/pagemap.h> 222d1e580aSAlasdair G Kergon #include <linux/slab.h> 232d1e580aSAlasdair G Kergon #include <linux/vmalloc.h> 242d1e580aSAlasdair G Kergon #include <linux/workqueue.h> 252d1e580aSAlasdair G Kergon #include <linux/mutex.h> 26df5d2e90SMikulas Patocka #include <linux/delay.h> 27586e80e6SMikulas Patocka #include <linux/device-mapper.h> 28a765e20eSAlasdair G Kergon #include <linux/dm-kcopyd.h> 292d1e580aSAlasdair G Kergon 304cc96131SMike Snitzer #include "dm-core.h" 312d1e580aSAlasdair G Kergon 32c6ea41fbSMikulas Patocka #define SPLIT_COUNT 8 33c6ea41fbSMikulas Patocka #define MIN_JOBS 8 34c663e040SNikos Tsironis 35c663e040SNikos Tsironis #define DEFAULT_SUB_JOB_SIZE_KB 512 36c663e040SNikos Tsironis #define MAX_SUB_JOB_SIZE_KB 1024 37c663e040SNikos Tsironis 3886a3238cSHeinz Mauelshagen static unsigned int kcopyd_subjob_size_kb = DEFAULT_SUB_JOB_SIZE_KB; 39c663e040SNikos Tsironis 40c663e040SNikos Tsironis module_param(kcopyd_subjob_size_kb, uint, S_IRUGO | S_IWUSR); 41c663e040SNikos Tsironis MODULE_PARM_DESC(kcopyd_subjob_size_kb, "Sub-job size for dm-kcopyd clients"); 42c663e040SNikos Tsironis 4386a3238cSHeinz Mauelshagen static unsigned int dm_get_kcopyd_subjob_size(void) 44c663e040SNikos Tsironis { 4586a3238cSHeinz Mauelshagen unsigned int sub_job_size_kb; 46c663e040SNikos Tsironis 47c663e040SNikos Tsironis sub_job_size_kb = __dm_get_module_param(&kcopyd_subjob_size_kb, 48c663e040SNikos Tsironis DEFAULT_SUB_JOB_SIZE_KB, 49c663e040SNikos Tsironis MAX_SUB_JOB_SIZE_KB); 50c663e040SNikos Tsironis 51c663e040SNikos Tsironis return sub_job_size_kb << 1; 52c663e040SNikos Tsironis } 53c6ea41fbSMikulas Patocka 54*a4a82ce3SHeinz Mauelshagen /* 55*a4a82ce3SHeinz Mauelshagen *---------------------------------------------------------------- 562d1e580aSAlasdair G Kergon * Each kcopyd client has its own little pool of preallocated 572d1e580aSAlasdair G Kergon * pages for kcopyd io. 58*a4a82ce3SHeinz Mauelshagen *--------------------------------------------------------------- 59*a4a82ce3SHeinz Mauelshagen */ 602d1e580aSAlasdair G Kergon struct dm_kcopyd_client { 612d1e580aSAlasdair G Kergon struct page_list *pages; 6286a3238cSHeinz Mauelshagen unsigned int nr_reserved_pages; 6386a3238cSHeinz Mauelshagen unsigned int nr_free_pages; 6486a3238cSHeinz Mauelshagen unsigned int sub_job_size; 652d1e580aSAlasdair G Kergon 662d1e580aSAlasdair G Kergon struct dm_io_client *io_client; 672d1e580aSAlasdair G Kergon 682d1e580aSAlasdair G Kergon wait_queue_head_t destroyq; 692d1e580aSAlasdair G Kergon 706f1c819cSKent Overstreet mempool_t job_pool; 712d1e580aSAlasdair G Kergon 722d1e580aSAlasdair G Kergon struct workqueue_struct *kcopyd_wq; 732d1e580aSAlasdair G Kergon struct work_struct kcopyd_work; 742d1e580aSAlasdair G Kergon 75df5d2e90SMikulas Patocka struct dm_kcopyd_throttle *throttle; 76df5d2e90SMikulas Patocka 7772d711c8SMike Snitzer atomic_t nr_jobs; 7872d711c8SMike Snitzer 792d1e580aSAlasdair G Kergon /* 80d7e6b8dfSNikos Tsironis * We maintain four lists of jobs: 812d1e580aSAlasdair G Kergon * 822d1e580aSAlasdair G Kergon * i) jobs waiting for pages 832d1e580aSAlasdair G Kergon * ii) jobs that have pages, and are waiting for the io to be issued. 84d7e6b8dfSNikos Tsironis * iii) jobs that don't need to do any IO and just run a callback 85d7e6b8dfSNikos Tsironis * iv) jobs that have completed. 862d1e580aSAlasdair G Kergon * 87d7e6b8dfSNikos Tsironis * All four of these are protected by job_lock. 882d1e580aSAlasdair G Kergon */ 892d1e580aSAlasdair G Kergon spinlock_t job_lock; 90d7e6b8dfSNikos Tsironis struct list_head callback_jobs; 912d1e580aSAlasdair G Kergon struct list_head complete_jobs; 922d1e580aSAlasdair G Kergon struct list_head io_jobs; 932d1e580aSAlasdair G Kergon struct list_head pages_jobs; 942d1e580aSAlasdair G Kergon }; 952d1e580aSAlasdair G Kergon 967f069653SMikulas Patocka static struct page_list zero_page_list; 977f069653SMikulas Patocka 98df5d2e90SMikulas Patocka static DEFINE_SPINLOCK(throttle_spinlock); 99df5d2e90SMikulas Patocka 100df5d2e90SMikulas Patocka /* 101df5d2e90SMikulas Patocka * IO/IDLE accounting slowly decays after (1 << ACCOUNT_INTERVAL_SHIFT) period. 102df5d2e90SMikulas Patocka * When total_period >= (1 << ACCOUNT_INTERVAL_SHIFT) the counters are divided 103df5d2e90SMikulas Patocka * by 2. 104df5d2e90SMikulas Patocka */ 105df5d2e90SMikulas Patocka #define ACCOUNT_INTERVAL_SHIFT SHIFT_HZ 106df5d2e90SMikulas Patocka 107df5d2e90SMikulas Patocka /* 108df5d2e90SMikulas Patocka * Sleep this number of milliseconds. 109df5d2e90SMikulas Patocka * 110df5d2e90SMikulas Patocka * The value was decided experimentally. 111df5d2e90SMikulas Patocka * Smaller values seem to cause an increased copy rate above the limit. 112df5d2e90SMikulas Patocka * The reason for this is unknown but possibly due to jiffies rounding errors 113df5d2e90SMikulas Patocka * or read/write cache inside the disk. 114df5d2e90SMikulas Patocka */ 115238d991fSHeinz Mauelshagen #define SLEEP_USEC 100000 116df5d2e90SMikulas Patocka 117df5d2e90SMikulas Patocka /* 118df5d2e90SMikulas Patocka * Maximum number of sleep events. There is a theoretical livelock if more 119df5d2e90SMikulas Patocka * kcopyd clients do work simultaneously which this limit avoids. 120df5d2e90SMikulas Patocka */ 121df5d2e90SMikulas Patocka #define MAX_SLEEPS 10 122df5d2e90SMikulas Patocka 123df5d2e90SMikulas Patocka static void io_job_start(struct dm_kcopyd_throttle *t) 124df5d2e90SMikulas Patocka { 12586a3238cSHeinz Mauelshagen unsigned int throttle, now, difference; 126df5d2e90SMikulas Patocka int slept = 0, skew; 127df5d2e90SMikulas Patocka 128df5d2e90SMikulas Patocka if (unlikely(!t)) 129df5d2e90SMikulas Patocka return; 130df5d2e90SMikulas Patocka 131df5d2e90SMikulas Patocka try_again: 132df5d2e90SMikulas Patocka spin_lock_irq(&throttle_spinlock); 133df5d2e90SMikulas Patocka 1346aa7de05SMark Rutland throttle = READ_ONCE(t->throttle); 135df5d2e90SMikulas Patocka 136df5d2e90SMikulas Patocka if (likely(throttle >= 100)) 137df5d2e90SMikulas Patocka goto skip_limit; 138df5d2e90SMikulas Patocka 139df5d2e90SMikulas Patocka now = jiffies; 140df5d2e90SMikulas Patocka difference = now - t->last_jiffies; 141df5d2e90SMikulas Patocka t->last_jiffies = now; 142df5d2e90SMikulas Patocka if (t->num_io_jobs) 143df5d2e90SMikulas Patocka t->io_period += difference; 144df5d2e90SMikulas Patocka t->total_period += difference; 145df5d2e90SMikulas Patocka 146df5d2e90SMikulas Patocka /* 147df5d2e90SMikulas Patocka * Maintain sane values if we got a temporary overflow. 148df5d2e90SMikulas Patocka */ 149df5d2e90SMikulas Patocka if (unlikely(t->io_period > t->total_period)) 150df5d2e90SMikulas Patocka t->io_period = t->total_period; 151df5d2e90SMikulas Patocka 152df5d2e90SMikulas Patocka if (unlikely(t->total_period >= (1 << ACCOUNT_INTERVAL_SHIFT))) { 153df5d2e90SMikulas Patocka int shift = fls(t->total_period >> ACCOUNT_INTERVAL_SHIFT); 154df5d2e90SMikulas Patocka t->total_period >>= shift; 155df5d2e90SMikulas Patocka t->io_period >>= shift; 156df5d2e90SMikulas Patocka } 157df5d2e90SMikulas Patocka 158df5d2e90SMikulas Patocka skew = t->io_period - throttle * t->total_period / 100; 159df5d2e90SMikulas Patocka 160df5d2e90SMikulas Patocka if (unlikely(skew > 0) && slept < MAX_SLEEPS) { 161df5d2e90SMikulas Patocka slept++; 162df5d2e90SMikulas Patocka spin_unlock_irq(&throttle_spinlock); 163238d991fSHeinz Mauelshagen fsleep(SLEEP_USEC); 164df5d2e90SMikulas Patocka goto try_again; 165df5d2e90SMikulas Patocka } 166df5d2e90SMikulas Patocka 167df5d2e90SMikulas Patocka skip_limit: 168df5d2e90SMikulas Patocka t->num_io_jobs++; 169df5d2e90SMikulas Patocka 170df5d2e90SMikulas Patocka spin_unlock_irq(&throttle_spinlock); 171df5d2e90SMikulas Patocka } 172df5d2e90SMikulas Patocka 173df5d2e90SMikulas Patocka static void io_job_finish(struct dm_kcopyd_throttle *t) 174df5d2e90SMikulas Patocka { 175df5d2e90SMikulas Patocka unsigned long flags; 176df5d2e90SMikulas Patocka 177df5d2e90SMikulas Patocka if (unlikely(!t)) 178df5d2e90SMikulas Patocka return; 179df5d2e90SMikulas Patocka 180df5d2e90SMikulas Patocka spin_lock_irqsave(&throttle_spinlock, flags); 181df5d2e90SMikulas Patocka 182df5d2e90SMikulas Patocka t->num_io_jobs--; 183df5d2e90SMikulas Patocka 1846aa7de05SMark Rutland if (likely(READ_ONCE(t->throttle) >= 100)) 185df5d2e90SMikulas Patocka goto skip_limit; 186df5d2e90SMikulas Patocka 187df5d2e90SMikulas Patocka if (!t->num_io_jobs) { 18886a3238cSHeinz Mauelshagen unsigned int now, difference; 189df5d2e90SMikulas Patocka 190df5d2e90SMikulas Patocka now = jiffies; 191df5d2e90SMikulas Patocka difference = now - t->last_jiffies; 192df5d2e90SMikulas Patocka t->last_jiffies = now; 193df5d2e90SMikulas Patocka 194df5d2e90SMikulas Patocka t->io_period += difference; 195df5d2e90SMikulas Patocka t->total_period += difference; 196df5d2e90SMikulas Patocka 197df5d2e90SMikulas Patocka /* 198df5d2e90SMikulas Patocka * Maintain sane values if we got a temporary overflow. 199df5d2e90SMikulas Patocka */ 200df5d2e90SMikulas Patocka if (unlikely(t->io_period > t->total_period)) 201df5d2e90SMikulas Patocka t->io_period = t->total_period; 202df5d2e90SMikulas Patocka } 203df5d2e90SMikulas Patocka 204df5d2e90SMikulas Patocka skip_limit: 205df5d2e90SMikulas Patocka spin_unlock_irqrestore(&throttle_spinlock, flags); 206df5d2e90SMikulas Patocka } 207df5d2e90SMikulas Patocka 208df5d2e90SMikulas Patocka 2092d1e580aSAlasdair G Kergon static void wake(struct dm_kcopyd_client *kc) 2102d1e580aSAlasdair G Kergon { 2112d1e580aSAlasdair G Kergon queue_work(kc->kcopyd_wq, &kc->kcopyd_work); 2122d1e580aSAlasdair G Kergon } 2132d1e580aSAlasdair G Kergon 214d0471458SMikulas Patocka /* 215d0471458SMikulas Patocka * Obtain one page for the use of kcopyd. 216d0471458SMikulas Patocka */ 217f99b55eeSMikulas Patocka static struct page_list *alloc_pl(gfp_t gfp) 2182d1e580aSAlasdair G Kergon { 2192d1e580aSAlasdair G Kergon struct page_list *pl; 2202d1e580aSAlasdair G Kergon 221f99b55eeSMikulas Patocka pl = kmalloc(sizeof(*pl), gfp); 2222d1e580aSAlasdair G Kergon if (!pl) 2232d1e580aSAlasdair G Kergon return NULL; 2242d1e580aSAlasdair G Kergon 225949d49ecSMikulas Patocka pl->page = alloc_page(gfp | __GFP_HIGHMEM); 2262d1e580aSAlasdair G Kergon if (!pl->page) { 2272d1e580aSAlasdair G Kergon kfree(pl); 2282d1e580aSAlasdair G Kergon return NULL; 2292d1e580aSAlasdair G Kergon } 2302d1e580aSAlasdair G Kergon 2312d1e580aSAlasdair G Kergon return pl; 2322d1e580aSAlasdair G Kergon } 2332d1e580aSAlasdair G Kergon 2342d1e580aSAlasdair G Kergon static void free_pl(struct page_list *pl) 2352d1e580aSAlasdair G Kergon { 2362d1e580aSAlasdair G Kergon __free_page(pl->page); 2372d1e580aSAlasdair G Kergon kfree(pl); 2382d1e580aSAlasdair G Kergon } 2392d1e580aSAlasdair G Kergon 240d0471458SMikulas Patocka /* 241d0471458SMikulas Patocka * Add the provided pages to a client's free page list, releasing 242d0471458SMikulas Patocka * back to the system any beyond the reserved_pages limit. 243d0471458SMikulas Patocka */ 244d0471458SMikulas Patocka static void kcopyd_put_pages(struct dm_kcopyd_client *kc, struct page_list *pl) 245d0471458SMikulas Patocka { 246d0471458SMikulas Patocka struct page_list *next; 247d0471458SMikulas Patocka 248d0471458SMikulas Patocka do { 249d0471458SMikulas Patocka next = pl->next; 250d0471458SMikulas Patocka 251d0471458SMikulas Patocka if (kc->nr_free_pages >= kc->nr_reserved_pages) 252d0471458SMikulas Patocka free_pl(pl); 253d0471458SMikulas Patocka else { 254d0471458SMikulas Patocka pl->next = kc->pages; 255d0471458SMikulas Patocka kc->pages = pl; 256d0471458SMikulas Patocka kc->nr_free_pages++; 257d0471458SMikulas Patocka } 258d0471458SMikulas Patocka 259d0471458SMikulas Patocka pl = next; 260d0471458SMikulas Patocka } while (pl); 261d0471458SMikulas Patocka } 262d0471458SMikulas Patocka 2632d1e580aSAlasdair G Kergon static int kcopyd_get_pages(struct dm_kcopyd_client *kc, 2642d1e580aSAlasdair G Kergon unsigned int nr, struct page_list **pages) 2652d1e580aSAlasdair G Kergon { 2662d1e580aSAlasdair G Kergon struct page_list *pl; 2672d1e580aSAlasdair G Kergon 268d0471458SMikulas Patocka *pages = NULL; 2692d1e580aSAlasdair G Kergon 270d0471458SMikulas Patocka do { 271d0164adcSMel Gorman pl = alloc_pl(__GFP_NOWARN | __GFP_NORETRY | __GFP_KSWAPD_RECLAIM); 272d0471458SMikulas Patocka if (unlikely(!pl)) { 273d0471458SMikulas Patocka /* Use reserved pages */ 274d0471458SMikulas Patocka pl = kc->pages; 275d0471458SMikulas Patocka if (unlikely(!pl)) 276d0471458SMikulas Patocka goto out_of_memory; 2772d1e580aSAlasdair G Kergon kc->pages = pl->next; 278d0471458SMikulas Patocka kc->nr_free_pages--; 279d0471458SMikulas Patocka } 280d0471458SMikulas Patocka pl->next = *pages; 281d0471458SMikulas Patocka *pages = pl; 282d0471458SMikulas Patocka } while (--nr); 2832d1e580aSAlasdair G Kergon 2842d1e580aSAlasdair G Kergon return 0; 2852d1e580aSAlasdair G Kergon 286d0471458SMikulas Patocka out_of_memory: 287d0471458SMikulas Patocka if (*pages) 288d0471458SMikulas Patocka kcopyd_put_pages(kc, *pages); 289d0471458SMikulas Patocka return -ENOMEM; 2902d1e580aSAlasdair G Kergon } 2912d1e580aSAlasdair G Kergon 2922d1e580aSAlasdair G Kergon /* 2932d1e580aSAlasdair G Kergon * These three functions resize the page pool. 2942d1e580aSAlasdair G Kergon */ 2952d1e580aSAlasdair G Kergon static void drop_pages(struct page_list *pl) 2962d1e580aSAlasdair G Kergon { 2972d1e580aSAlasdair G Kergon struct page_list *next; 2982d1e580aSAlasdair G Kergon 2992d1e580aSAlasdair G Kergon while (pl) { 3002d1e580aSAlasdair G Kergon next = pl->next; 3012d1e580aSAlasdair G Kergon free_pl(pl); 3022d1e580aSAlasdair G Kergon pl = next; 3032d1e580aSAlasdair G Kergon } 3042d1e580aSAlasdair G Kergon } 3052d1e580aSAlasdair G Kergon 306d0471458SMikulas Patocka /* 307d0471458SMikulas Patocka * Allocate and reserve nr_pages for the use of a specific client. 308d0471458SMikulas Patocka */ 30986a3238cSHeinz Mauelshagen static int client_reserve_pages(struct dm_kcopyd_client *kc, unsigned int nr_pages) 3102d1e580aSAlasdair G Kergon { 31186a3238cSHeinz Mauelshagen unsigned int i; 3122d1e580aSAlasdair G Kergon struct page_list *pl = NULL, *next; 3132d1e580aSAlasdair G Kergon 314d0471458SMikulas Patocka for (i = 0; i < nr_pages; i++) { 315f99b55eeSMikulas Patocka next = alloc_pl(GFP_KERNEL); 3162d1e580aSAlasdair G Kergon if (!next) { 3172d1e580aSAlasdair G Kergon if (pl) 3182d1e580aSAlasdair G Kergon drop_pages(pl); 3192d1e580aSAlasdair G Kergon return -ENOMEM; 3202d1e580aSAlasdair G Kergon } 3212d1e580aSAlasdair G Kergon next->next = pl; 3222d1e580aSAlasdair G Kergon pl = next; 3232d1e580aSAlasdair G Kergon } 3242d1e580aSAlasdair G Kergon 325d0471458SMikulas Patocka kc->nr_reserved_pages += nr_pages; 3262d1e580aSAlasdair G Kergon kcopyd_put_pages(kc, pl); 327d0471458SMikulas Patocka 3282d1e580aSAlasdair G Kergon return 0; 3292d1e580aSAlasdair G Kergon } 3302d1e580aSAlasdair G Kergon 3312d1e580aSAlasdair G Kergon static void client_free_pages(struct dm_kcopyd_client *kc) 3322d1e580aSAlasdair G Kergon { 333d0471458SMikulas Patocka BUG_ON(kc->nr_free_pages != kc->nr_reserved_pages); 3342d1e580aSAlasdair G Kergon drop_pages(kc->pages); 3352d1e580aSAlasdair G Kergon kc->pages = NULL; 336d0471458SMikulas Patocka kc->nr_free_pages = kc->nr_reserved_pages = 0; 3372d1e580aSAlasdair G Kergon } 3382d1e580aSAlasdair G Kergon 339*a4a82ce3SHeinz Mauelshagen /* 340*a4a82ce3SHeinz Mauelshagen *--------------------------------------------------------------- 3412d1e580aSAlasdair G Kergon * kcopyd_jobs need to be allocated by the *clients* of kcopyd, 3422d1e580aSAlasdair G Kergon * for this reason we use a mempool to prevent the client from 3432d1e580aSAlasdair G Kergon * ever having to do io (which could cause a deadlock). 344*a4a82ce3SHeinz Mauelshagen *--------------------------------------------------------------- 345*a4a82ce3SHeinz Mauelshagen */ 3462d1e580aSAlasdair G Kergon struct kcopyd_job { 3472d1e580aSAlasdair G Kergon struct dm_kcopyd_client *kc; 3482d1e580aSAlasdair G Kergon struct list_head list; 34986a3238cSHeinz Mauelshagen unsigned int flags; 3502d1e580aSAlasdair G Kergon 3512d1e580aSAlasdair G Kergon /* 3522d1e580aSAlasdair G Kergon * Error state of the job. 3532d1e580aSAlasdair G Kergon */ 3542d1e580aSAlasdair G Kergon int read_err; 3552d1e580aSAlasdair G Kergon unsigned long write_err; 3562d1e580aSAlasdair G Kergon 3572d1e580aSAlasdair G Kergon /* 35871f7113dSBart Van Assche * REQ_OP_READ, REQ_OP_WRITE or REQ_OP_WRITE_ZEROES. 3592d1e580aSAlasdair G Kergon */ 36071f7113dSBart Van Assche enum req_op op; 3612d1e580aSAlasdair G Kergon struct dm_io_region source; 3622d1e580aSAlasdair G Kergon 3632d1e580aSAlasdair G Kergon /* 3642d1e580aSAlasdair G Kergon * The destinations for the transfer. 3652d1e580aSAlasdair G Kergon */ 3662d1e580aSAlasdair G Kergon unsigned int num_dests; 3672d1e580aSAlasdair G Kergon struct dm_io_region dests[DM_KCOPYD_MAX_REGIONS]; 3682d1e580aSAlasdair G Kergon 3692d1e580aSAlasdair G Kergon struct page_list *pages; 3702d1e580aSAlasdair G Kergon 3712d1e580aSAlasdair G Kergon /* 3722d1e580aSAlasdair G Kergon * Set this to ensure you are notified when the job has 3732d1e580aSAlasdair G Kergon * completed. 'context' is for callback to use. 3742d1e580aSAlasdair G Kergon */ 3752d1e580aSAlasdair G Kergon dm_kcopyd_notify_fn fn; 3762d1e580aSAlasdair G Kergon void *context; 3772d1e580aSAlasdair G Kergon 3782d1e580aSAlasdair G Kergon /* 3792d1e580aSAlasdair G Kergon * These fields are only used if the job has been split 3802d1e580aSAlasdair G Kergon * into more manageable parts. 3812d1e580aSAlasdair G Kergon */ 3822d1e580aSAlasdair G Kergon struct mutex lock; 3832d1e580aSAlasdair G Kergon atomic_t sub_jobs; 3842d1e580aSAlasdair G Kergon sector_t progress; 385b73c67c2SDamien Le Moal sector_t write_offset; 3862d1e580aSAlasdair G Kergon 387c6ea41fbSMikulas Patocka struct kcopyd_job *master_job; 388c6ea41fbSMikulas Patocka }; 3892d1e580aSAlasdair G Kergon 3902d1e580aSAlasdair G Kergon static struct kmem_cache *_job_cache; 3912d1e580aSAlasdair G Kergon 3922d1e580aSAlasdair G Kergon int __init dm_kcopyd_init(void) 3932d1e580aSAlasdair G Kergon { 394c6ea41fbSMikulas Patocka _job_cache = kmem_cache_create("kcopyd_job", 395c6ea41fbSMikulas Patocka sizeof(struct kcopyd_job) * (SPLIT_COUNT + 1), 396c6ea41fbSMikulas Patocka __alignof__(struct kcopyd_job), 0, NULL); 3972d1e580aSAlasdair G Kergon if (!_job_cache) 3982d1e580aSAlasdair G Kergon return -ENOMEM; 3992d1e580aSAlasdair G Kergon 4007f069653SMikulas Patocka zero_page_list.next = &zero_page_list; 4017f069653SMikulas Patocka zero_page_list.page = ZERO_PAGE(0); 4027f069653SMikulas Patocka 4032d1e580aSAlasdair G Kergon return 0; 4042d1e580aSAlasdair G Kergon } 4052d1e580aSAlasdair G Kergon 4062d1e580aSAlasdair G Kergon void dm_kcopyd_exit(void) 4072d1e580aSAlasdair G Kergon { 4082d1e580aSAlasdair G Kergon kmem_cache_destroy(_job_cache); 4092d1e580aSAlasdair G Kergon _job_cache = NULL; 4102d1e580aSAlasdair G Kergon } 4112d1e580aSAlasdair G Kergon 4122d1e580aSAlasdair G Kergon /* 4132d1e580aSAlasdair G Kergon * Functions to push and pop a job onto the head of a given job 4142d1e580aSAlasdair G Kergon * list. 4152d1e580aSAlasdair G Kergon */ 416b73c67c2SDamien Le Moal static struct kcopyd_job *pop_io_job(struct list_head *jobs, 417b73c67c2SDamien Le Moal struct dm_kcopyd_client *kc) 418b73c67c2SDamien Le Moal { 419b73c67c2SDamien Le Moal struct kcopyd_job *job; 420b73c67c2SDamien Le Moal 421b73c67c2SDamien Le Moal /* 422b73c67c2SDamien Le Moal * For I/O jobs, pop any read, any write without sequential write 423b73c67c2SDamien Le Moal * constraint and sequential writes that are at the right position. 424b73c67c2SDamien Le Moal */ 425b73c67c2SDamien Le Moal list_for_each_entry(job, jobs, list) { 42671f7113dSBart Van Assche if (job->op == REQ_OP_READ || 42771f7113dSBart Van Assche !(job->flags & BIT(DM_KCOPYD_WRITE_SEQ))) { 428b73c67c2SDamien Le Moal list_del(&job->list); 429b73c67c2SDamien Le Moal return job; 430b73c67c2SDamien Le Moal } 431b73c67c2SDamien Le Moal 432b73c67c2SDamien Le Moal if (job->write_offset == job->master_job->write_offset) { 433b73c67c2SDamien Le Moal job->master_job->write_offset += job->source.count; 434b73c67c2SDamien Le Moal list_del(&job->list); 435b73c67c2SDamien Le Moal return job; 436b73c67c2SDamien Le Moal } 437b73c67c2SDamien Le Moal } 438b73c67c2SDamien Le Moal 439b73c67c2SDamien Le Moal return NULL; 440b73c67c2SDamien Le Moal } 441b73c67c2SDamien Le Moal 4422d1e580aSAlasdair G Kergon static struct kcopyd_job *pop(struct list_head *jobs, 4432d1e580aSAlasdair G Kergon struct dm_kcopyd_client *kc) 4442d1e580aSAlasdair G Kergon { 4452d1e580aSAlasdair G Kergon struct kcopyd_job *job = NULL; 4462d1e580aSAlasdair G Kergon 4476bcd658fSMikulas Patocka spin_lock_irq(&kc->job_lock); 4482d1e580aSAlasdair G Kergon 4492d1e580aSAlasdair G Kergon if (!list_empty(jobs)) { 450b73c67c2SDamien Le Moal if (jobs == &kc->io_jobs) 451b73c67c2SDamien Le Moal job = pop_io_job(jobs, kc); 452b73c67c2SDamien Le Moal else { 4532d1e580aSAlasdair G Kergon job = list_entry(jobs->next, struct kcopyd_job, list); 4542d1e580aSAlasdair G Kergon list_del(&job->list); 4552d1e580aSAlasdair G Kergon } 456b73c67c2SDamien Le Moal } 4576bcd658fSMikulas Patocka spin_unlock_irq(&kc->job_lock); 4582d1e580aSAlasdair G Kergon 4592d1e580aSAlasdair G Kergon return job; 4602d1e580aSAlasdair G Kergon } 4612d1e580aSAlasdair G Kergon 4622d1e580aSAlasdair G Kergon static void push(struct list_head *jobs, struct kcopyd_job *job) 4632d1e580aSAlasdair G Kergon { 4642d1e580aSAlasdair G Kergon unsigned long flags; 4652d1e580aSAlasdair G Kergon struct dm_kcopyd_client *kc = job->kc; 4662d1e580aSAlasdair G Kergon 4672d1e580aSAlasdair G Kergon spin_lock_irqsave(&kc->job_lock, flags); 4682d1e580aSAlasdair G Kergon list_add_tail(&job->list, jobs); 4692d1e580aSAlasdair G Kergon spin_unlock_irqrestore(&kc->job_lock, flags); 4702d1e580aSAlasdair G Kergon } 4712d1e580aSAlasdair G Kergon 472b673c3a8SKazuo Ito 473b673c3a8SKazuo Ito static void push_head(struct list_head *jobs, struct kcopyd_job *job) 474b673c3a8SKazuo Ito { 475b673c3a8SKazuo Ito struct dm_kcopyd_client *kc = job->kc; 476b673c3a8SKazuo Ito 4776bcd658fSMikulas Patocka spin_lock_irq(&kc->job_lock); 478b673c3a8SKazuo Ito list_add(&job->list, jobs); 4796bcd658fSMikulas Patocka spin_unlock_irq(&kc->job_lock); 480b673c3a8SKazuo Ito } 481b673c3a8SKazuo Ito 4822d1e580aSAlasdair G Kergon /* 4832d1e580aSAlasdair G Kergon * These three functions process 1 item from the corresponding 4842d1e580aSAlasdair G Kergon * job list. 4852d1e580aSAlasdair G Kergon * 4862d1e580aSAlasdair G Kergon * They return: 4872d1e580aSAlasdair G Kergon * < 0: error 4882d1e580aSAlasdair G Kergon * 0: success 4892d1e580aSAlasdair G Kergon * > 0: can't process yet. 4902d1e580aSAlasdair G Kergon */ 4912d1e580aSAlasdair G Kergon static int run_complete_job(struct kcopyd_job *job) 4922d1e580aSAlasdair G Kergon { 4932d1e580aSAlasdair G Kergon void *context = job->context; 4942d1e580aSAlasdair G Kergon int read_err = job->read_err; 4952d1e580aSAlasdair G Kergon unsigned long write_err = job->write_err; 4962d1e580aSAlasdair G Kergon dm_kcopyd_notify_fn fn = job->fn; 4972d1e580aSAlasdair G Kergon struct dm_kcopyd_client *kc = job->kc; 4982d1e580aSAlasdair G Kergon 4997f069653SMikulas Patocka if (job->pages && job->pages != &zero_page_list) 5002d1e580aSAlasdair G Kergon kcopyd_put_pages(kc, job->pages); 501c6ea41fbSMikulas Patocka /* 502c6ea41fbSMikulas Patocka * If this is the master job, the sub jobs have already 503c6ea41fbSMikulas Patocka * completed so we can free everything. 504c6ea41fbSMikulas Patocka */ 505d5ffebddSMike Snitzer if (job->master_job == job) { 506d5ffebddSMike Snitzer mutex_destroy(&job->lock); 5076f1c819cSKent Overstreet mempool_free(job, &kc->job_pool); 508d5ffebddSMike Snitzer } 5092d1e580aSAlasdair G Kergon fn(read_err, write_err, context); 5102d1e580aSAlasdair G Kergon 5112d1e580aSAlasdair G Kergon if (atomic_dec_and_test(&kc->nr_jobs)) 5122d1e580aSAlasdair G Kergon wake_up(&kc->destroyq); 5132d1e580aSAlasdair G Kergon 514784c9a29SJohn Pittman cond_resched(); 515784c9a29SJohn Pittman 5162d1e580aSAlasdair G Kergon return 0; 5172d1e580aSAlasdair G Kergon } 5182d1e580aSAlasdair G Kergon 5192d1e580aSAlasdair G Kergon static void complete_io(unsigned long error, void *context) 5202d1e580aSAlasdair G Kergon { 5212d1e580aSAlasdair G Kergon struct kcopyd_job *job = (struct kcopyd_job *) context; 5222d1e580aSAlasdair G Kergon struct dm_kcopyd_client *kc = job->kc; 5232d1e580aSAlasdair G Kergon 524df5d2e90SMikulas Patocka io_job_finish(kc->throttle); 525df5d2e90SMikulas Patocka 5262d1e580aSAlasdair G Kergon if (error) { 52771f7113dSBart Van Assche if (op_is_write(job->op)) 5282d1e580aSAlasdair G Kergon job->write_err |= error; 5292d1e580aSAlasdair G Kergon else 5302d1e580aSAlasdair G Kergon job->read_err = 1; 5312d1e580aSAlasdair G Kergon 532db2351ebSMikulas Patocka if (!(job->flags & BIT(DM_KCOPYD_IGNORE_ERROR))) { 5332d1e580aSAlasdair G Kergon push(&kc->complete_jobs, job); 5342d1e580aSAlasdair G Kergon wake(kc); 5352d1e580aSAlasdair G Kergon return; 5362d1e580aSAlasdair G Kergon } 5372d1e580aSAlasdair G Kergon } 5382d1e580aSAlasdair G Kergon 53971f7113dSBart Van Assche if (op_is_write(job->op)) 5402d1e580aSAlasdair G Kergon push(&kc->complete_jobs, job); 5412d1e580aSAlasdair G Kergon 5422d1e580aSAlasdair G Kergon else { 54371f7113dSBart Van Assche job->op = REQ_OP_WRITE; 5442d1e580aSAlasdair G Kergon push(&kc->io_jobs, job); 5452d1e580aSAlasdair G Kergon } 5462d1e580aSAlasdair G Kergon 5472d1e580aSAlasdair G Kergon wake(kc); 5482d1e580aSAlasdair G Kergon } 5492d1e580aSAlasdair G Kergon 5502d1e580aSAlasdair G Kergon /* 5512d1e580aSAlasdair G Kergon * Request io on as many buffer heads as we can currently get for 5522d1e580aSAlasdair G Kergon * a particular job. 5532d1e580aSAlasdair G Kergon */ 5542d1e580aSAlasdair G Kergon static int run_io_job(struct kcopyd_job *job) 5552d1e580aSAlasdair G Kergon { 5562d1e580aSAlasdair G Kergon int r; 5572d1e580aSAlasdair G Kergon struct dm_io_request io_req = { 55871f7113dSBart Van Assche .bi_opf = job->op, 5592d1e580aSAlasdair G Kergon .mem.type = DM_IO_PAGE_LIST, 5602d1e580aSAlasdair G Kergon .mem.ptr.pl = job->pages, 5614622afb3SMikulas Patocka .mem.offset = 0, 5622d1e580aSAlasdair G Kergon .notify.fn = complete_io, 5632d1e580aSAlasdair G Kergon .notify.context = job, 5642d1e580aSAlasdair G Kergon .client = job->kc->io_client, 5652d1e580aSAlasdair G Kergon }; 5662d1e580aSAlasdair G Kergon 567b73c67c2SDamien Le Moal /* 568b73c67c2SDamien Le Moal * If we need to write sequentially and some reads or writes failed, 569b73c67c2SDamien Le Moal * no point in continuing. 570b73c67c2SDamien Le Moal */ 571db2351ebSMikulas Patocka if (job->flags & BIT(DM_KCOPYD_WRITE_SEQ) && 572d1fef414SDmitry Fomichev job->master_job->write_err) { 573d1fef414SDmitry Fomichev job->write_err = job->master_job->write_err; 574b73c67c2SDamien Le Moal return -EIO; 575d1fef414SDmitry Fomichev } 576b73c67c2SDamien Le Moal 577df5d2e90SMikulas Patocka io_job_start(job->kc->throttle); 578df5d2e90SMikulas Patocka 57971f7113dSBart Van Assche if (job->op == REQ_OP_READ) 5802d1e580aSAlasdair G Kergon r = dm_io(&io_req, 1, &job->source, NULL); 581721a9602SJens Axboe else 5822d1e580aSAlasdair G Kergon r = dm_io(&io_req, job->num_dests, job->dests, NULL); 5832d1e580aSAlasdair G Kergon 5842d1e580aSAlasdair G Kergon return r; 5852d1e580aSAlasdair G Kergon } 5862d1e580aSAlasdair G Kergon 5872d1e580aSAlasdair G Kergon static int run_pages_job(struct kcopyd_job *job) 5882d1e580aSAlasdair G Kergon { 5892d1e580aSAlasdair G Kergon int r; 59086a3238cSHeinz Mauelshagen unsigned int nr_pages = dm_div_up(job->dests[0].count, PAGE_SIZE >> 9); 5912d1e580aSAlasdair G Kergon 5925bf45a3dSMikulas Patocka r = kcopyd_get_pages(job->kc, nr_pages, &job->pages); 5932d1e580aSAlasdair G Kergon if (!r) { 5942d1e580aSAlasdair G Kergon /* this job is ready for io */ 5952d1e580aSAlasdair G Kergon push(&job->kc->io_jobs, job); 5962d1e580aSAlasdair G Kergon return 0; 5972d1e580aSAlasdair G Kergon } 5982d1e580aSAlasdair G Kergon 5992d1e580aSAlasdair G Kergon if (r == -ENOMEM) 6002d1e580aSAlasdair G Kergon /* can't complete now */ 6012d1e580aSAlasdair G Kergon return 1; 6022d1e580aSAlasdair G Kergon 6032d1e580aSAlasdair G Kergon return r; 6042d1e580aSAlasdair G Kergon } 6052d1e580aSAlasdair G Kergon 6062d1e580aSAlasdair G Kergon /* 6072d1e580aSAlasdair G Kergon * Run through a list for as long as possible. Returns the count 6082d1e580aSAlasdair G Kergon * of successful jobs. 6092d1e580aSAlasdair G Kergon */ 6102d1e580aSAlasdair G Kergon static int process_jobs(struct list_head *jobs, struct dm_kcopyd_client *kc, 6112d1e580aSAlasdair G Kergon int (*fn) (struct kcopyd_job *)) 6122d1e580aSAlasdair G Kergon { 6132d1e580aSAlasdair G Kergon struct kcopyd_job *job; 6142d1e580aSAlasdair G Kergon int r, count = 0; 6152d1e580aSAlasdair G Kergon 6162d1e580aSAlasdair G Kergon while ((job = pop(jobs, kc))) { 6172d1e580aSAlasdair G Kergon 6182d1e580aSAlasdair G Kergon r = fn(job); 6192d1e580aSAlasdair G Kergon 6202d1e580aSAlasdair G Kergon if (r < 0) { 6212d1e580aSAlasdair G Kergon /* error this rogue job */ 62271f7113dSBart Van Assche if (op_is_write(job->op)) 6232d1e580aSAlasdair G Kergon job->write_err = (unsigned long) -1L; 6242d1e580aSAlasdair G Kergon else 6252d1e580aSAlasdair G Kergon job->read_err = 1; 6262d1e580aSAlasdair G Kergon push(&kc->complete_jobs, job); 627d1fef414SDmitry Fomichev wake(kc); 6282d1e580aSAlasdair G Kergon break; 6292d1e580aSAlasdair G Kergon } 6302d1e580aSAlasdair G Kergon 6312d1e580aSAlasdair G Kergon if (r > 0) { 6322d1e580aSAlasdair G Kergon /* 6332d1e580aSAlasdair G Kergon * We couldn't service this job ATM, so 6342d1e580aSAlasdair G Kergon * push this job back onto the list. 6352d1e580aSAlasdair G Kergon */ 636b673c3a8SKazuo Ito push_head(jobs, job); 6372d1e580aSAlasdair G Kergon break; 6382d1e580aSAlasdair G Kergon } 6392d1e580aSAlasdair G Kergon 6402d1e580aSAlasdair G Kergon count++; 6412d1e580aSAlasdair G Kergon } 6422d1e580aSAlasdair G Kergon 6432d1e580aSAlasdair G Kergon return count; 6442d1e580aSAlasdair G Kergon } 6452d1e580aSAlasdair G Kergon 6462d1e580aSAlasdair G Kergon /* 6472d1e580aSAlasdair G Kergon * kcopyd does this every time it's woken up. 6482d1e580aSAlasdair G Kergon */ 6492d1e580aSAlasdair G Kergon static void do_work(struct work_struct *work) 6502d1e580aSAlasdair G Kergon { 6512d1e580aSAlasdair G Kergon struct dm_kcopyd_client *kc = container_of(work, 6522d1e580aSAlasdair G Kergon struct dm_kcopyd_client, kcopyd_work); 6537eaceaccSJens Axboe struct blk_plug plug; 6542d1e580aSAlasdair G Kergon 6552d1e580aSAlasdair G Kergon /* 6562d1e580aSAlasdair G Kergon * The order that these are called is *very* important. 6572d1e580aSAlasdair G Kergon * complete jobs can free some pages for pages jobs. 6582d1e580aSAlasdair G Kergon * Pages jobs when successful will jump onto the io jobs 6592d1e580aSAlasdair G Kergon * list. io jobs call wake when they complete and it all 6602d1e580aSAlasdair G Kergon * starts again. 6612d1e580aSAlasdair G Kergon */ 6626bcd658fSMikulas Patocka spin_lock_irq(&kc->job_lock); 663d7e6b8dfSNikos Tsironis list_splice_tail_init(&kc->callback_jobs, &kc->complete_jobs); 6646bcd658fSMikulas Patocka spin_unlock_irq(&kc->job_lock); 665d7e6b8dfSNikos Tsironis 6667eaceaccSJens Axboe blk_start_plug(&plug); 6672d1e580aSAlasdair G Kergon process_jobs(&kc->complete_jobs, kc, run_complete_job); 6682d1e580aSAlasdair G Kergon process_jobs(&kc->pages_jobs, kc, run_pages_job); 6692d1e580aSAlasdair G Kergon process_jobs(&kc->io_jobs, kc, run_io_job); 6707eaceaccSJens Axboe blk_finish_plug(&plug); 6712d1e580aSAlasdair G Kergon } 6722d1e580aSAlasdair G Kergon 6732d1e580aSAlasdair G Kergon /* 6742d1e580aSAlasdair G Kergon * If we are copying a small region we just dispatch a single job 6752d1e580aSAlasdair G Kergon * to do the copy, otherwise the io has to be split up into many 6762d1e580aSAlasdair G Kergon * jobs. 6772d1e580aSAlasdair G Kergon */ 6782d1e580aSAlasdair G Kergon static void dispatch_job(struct kcopyd_job *job) 6792d1e580aSAlasdair G Kergon { 6802d1e580aSAlasdair G Kergon struct dm_kcopyd_client *kc = job->kc; 6812d1e580aSAlasdair G Kergon atomic_inc(&kc->nr_jobs); 6829ca170a3SMikulas Patocka if (unlikely(!job->source.count)) 683d7e6b8dfSNikos Tsironis push(&kc->callback_jobs, job); 6847f069653SMikulas Patocka else if (job->pages == &zero_page_list) 6857f069653SMikulas Patocka push(&kc->io_jobs, job); 6869ca170a3SMikulas Patocka else 6872d1e580aSAlasdair G Kergon push(&kc->pages_jobs, job); 6882d1e580aSAlasdair G Kergon wake(kc); 6892d1e580aSAlasdair G Kergon } 6902d1e580aSAlasdair G Kergon 6912d1e580aSAlasdair G Kergon static void segment_complete(int read_err, unsigned long write_err, 6922d1e580aSAlasdair G Kergon void *context) 6932d1e580aSAlasdair G Kergon { 6942d1e580aSAlasdair G Kergon /* FIXME: tidy this function */ 6952d1e580aSAlasdair G Kergon sector_t progress = 0; 6962d1e580aSAlasdair G Kergon sector_t count = 0; 697c6ea41fbSMikulas Patocka struct kcopyd_job *sub_job = (struct kcopyd_job *) context; 698c6ea41fbSMikulas Patocka struct kcopyd_job *job = sub_job->master_job; 69973830857SMikulas Patocka struct dm_kcopyd_client *kc = job->kc; 7002d1e580aSAlasdair G Kergon 7012d1e580aSAlasdair G Kergon mutex_lock(&job->lock); 7022d1e580aSAlasdair G Kergon 7032d1e580aSAlasdair G Kergon /* update the error */ 7042d1e580aSAlasdair G Kergon if (read_err) 7052d1e580aSAlasdair G Kergon job->read_err = 1; 7062d1e580aSAlasdair G Kergon 7072d1e580aSAlasdair G Kergon if (write_err) 7082d1e580aSAlasdair G Kergon job->write_err |= write_err; 7092d1e580aSAlasdair G Kergon 7102d1e580aSAlasdair G Kergon /* 7112d1e580aSAlasdair G Kergon * Only dispatch more work if there hasn't been an error. 7122d1e580aSAlasdair G Kergon */ 7132d1e580aSAlasdair G Kergon if ((!job->read_err && !job->write_err) || 714db2351ebSMikulas Patocka job->flags & BIT(DM_KCOPYD_IGNORE_ERROR)) { 7152d1e580aSAlasdair G Kergon /* get the next chunk of work */ 7162d1e580aSAlasdair G Kergon progress = job->progress; 7172d1e580aSAlasdair G Kergon count = job->source.count - progress; 7182d1e580aSAlasdair G Kergon if (count) { 719c663e040SNikos Tsironis if (count > kc->sub_job_size) 720c663e040SNikos Tsironis count = kc->sub_job_size; 7212d1e580aSAlasdair G Kergon 7222d1e580aSAlasdair G Kergon job->progress += count; 7232d1e580aSAlasdair G Kergon } 7242d1e580aSAlasdair G Kergon } 7252d1e580aSAlasdair G Kergon mutex_unlock(&job->lock); 7262d1e580aSAlasdair G Kergon 7272d1e580aSAlasdair G Kergon if (count) { 7282d1e580aSAlasdair G Kergon int i; 7292d1e580aSAlasdair G Kergon 7302d1e580aSAlasdair G Kergon *sub_job = *job; 731b73c67c2SDamien Le Moal sub_job->write_offset = progress; 7322d1e580aSAlasdair G Kergon sub_job->source.sector += progress; 7332d1e580aSAlasdair G Kergon sub_job->source.count = count; 7342d1e580aSAlasdair G Kergon 7352d1e580aSAlasdair G Kergon for (i = 0; i < job->num_dests; i++) { 7362d1e580aSAlasdair G Kergon sub_job->dests[i].sector += progress; 7372d1e580aSAlasdair G Kergon sub_job->dests[i].count = count; 7382d1e580aSAlasdair G Kergon } 7392d1e580aSAlasdair G Kergon 7402d1e580aSAlasdair G Kergon sub_job->fn = segment_complete; 741c6ea41fbSMikulas Patocka sub_job->context = sub_job; 7422d1e580aSAlasdair G Kergon dispatch_job(sub_job); 7432d1e580aSAlasdair G Kergon 7442d1e580aSAlasdair G Kergon } else if (atomic_dec_and_test(&job->sub_jobs)) { 7452d1e580aSAlasdair G Kergon 7462d1e580aSAlasdair G Kergon /* 747340cd444SMikulas Patocka * Queue the completion callback to the kcopyd thread. 748340cd444SMikulas Patocka * 749340cd444SMikulas Patocka * Some callers assume that all the completions are called 750340cd444SMikulas Patocka * from a single thread and don't race with each other. 751340cd444SMikulas Patocka * 752340cd444SMikulas Patocka * We must not call the callback directly here because this 753340cd444SMikulas Patocka * code may not be executing in the thread. 7542d1e580aSAlasdair G Kergon */ 755340cd444SMikulas Patocka push(&kc->complete_jobs, job); 756340cd444SMikulas Patocka wake(kc); 7572d1e580aSAlasdair G Kergon } 7582d1e580aSAlasdair G Kergon } 7592d1e580aSAlasdair G Kergon 7602d1e580aSAlasdair G Kergon /* 761c6ea41fbSMikulas Patocka * Create some sub jobs to share the work between them. 7622d1e580aSAlasdair G Kergon */ 763c6ea41fbSMikulas Patocka static void split_job(struct kcopyd_job *master_job) 7642d1e580aSAlasdair G Kergon { 7652d1e580aSAlasdair G Kergon int i; 7662d1e580aSAlasdair G Kergon 767c6ea41fbSMikulas Patocka atomic_inc(&master_job->kc->nr_jobs); 768340cd444SMikulas Patocka 769c6ea41fbSMikulas Patocka atomic_set(&master_job->sub_jobs, SPLIT_COUNT); 770c6ea41fbSMikulas Patocka for (i = 0; i < SPLIT_COUNT; i++) { 771c6ea41fbSMikulas Patocka master_job[i + 1].master_job = master_job; 772c6ea41fbSMikulas Patocka segment_complete(0, 0u, &master_job[i + 1]); 773c6ea41fbSMikulas Patocka } 7742d1e580aSAlasdair G Kergon } 7752d1e580aSAlasdair G Kergon 7767209049dSMike Snitzer void dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, 7772d1e580aSAlasdair G Kergon unsigned int num_dests, struct dm_io_region *dests, 7782d1e580aSAlasdair G Kergon unsigned int flags, dm_kcopyd_notify_fn fn, void *context) 7792d1e580aSAlasdair G Kergon { 7802d1e580aSAlasdair G Kergon struct kcopyd_job *job; 78170d6c400SMike Snitzer int i; 7822d1e580aSAlasdair G Kergon 7832d1e580aSAlasdair G Kergon /* 784c6ea41fbSMikulas Patocka * Allocate an array of jobs consisting of one master job 785c6ea41fbSMikulas Patocka * followed by SPLIT_COUNT sub jobs. 7862d1e580aSAlasdair G Kergon */ 7876f1c819cSKent Overstreet job = mempool_alloc(&kc->job_pool, GFP_NOIO); 788d5ffebddSMike Snitzer mutex_init(&job->lock); 7892d1e580aSAlasdair G Kergon 7902d1e580aSAlasdair G Kergon /* 7912d1e580aSAlasdair G Kergon * set up for the read. 7922d1e580aSAlasdair G Kergon */ 7932d1e580aSAlasdair G Kergon job->kc = kc; 7942d1e580aSAlasdair G Kergon job->flags = flags; 7952d1e580aSAlasdair G Kergon job->read_err = 0; 7962d1e580aSAlasdair G Kergon job->write_err = 0; 7972d1e580aSAlasdair G Kergon 7982d1e580aSAlasdair G Kergon job->num_dests = num_dests; 7992d1e580aSAlasdair G Kergon memcpy(&job->dests, dests, sizeof(*dests) * num_dests); 8002d1e580aSAlasdair G Kergon 801b73c67c2SDamien Le Moal /* 802b73c67c2SDamien Le Moal * If one of the destination is a host-managed zoned block device, 803b73c67c2SDamien Le Moal * we need to write sequentially. If one of the destination is a 804b73c67c2SDamien Le Moal * host-aware device, then leave it to the caller to choose what to do. 805b73c67c2SDamien Le Moal */ 806db2351ebSMikulas Patocka if (!(job->flags & BIT(DM_KCOPYD_WRITE_SEQ))) { 807b73c67c2SDamien Le Moal for (i = 0; i < job->num_dests; i++) { 808b73c67c2SDamien Le Moal if (bdev_zoned_model(dests[i].bdev) == BLK_ZONED_HM) { 809db2351ebSMikulas Patocka job->flags |= BIT(DM_KCOPYD_WRITE_SEQ); 810b73c67c2SDamien Le Moal break; 811b73c67c2SDamien Le Moal } 812b73c67c2SDamien Le Moal } 813b73c67c2SDamien Le Moal } 814b73c67c2SDamien Le Moal 815b73c67c2SDamien Le Moal /* 816b73c67c2SDamien Le Moal * If we need to write sequentially, errors cannot be ignored. 817b73c67c2SDamien Le Moal */ 818db2351ebSMikulas Patocka if (job->flags & BIT(DM_KCOPYD_WRITE_SEQ) && 819db2351ebSMikulas Patocka job->flags & BIT(DM_KCOPYD_IGNORE_ERROR)) 820db2351ebSMikulas Patocka job->flags &= ~BIT(DM_KCOPYD_IGNORE_ERROR); 821b73c67c2SDamien Le Moal 8227f069653SMikulas Patocka if (from) { 8237f069653SMikulas Patocka job->source = *from; 8242d1e580aSAlasdair G Kergon job->pages = NULL; 82571f7113dSBart Van Assche job->op = REQ_OP_READ; 8267f069653SMikulas Patocka } else { 8277f069653SMikulas Patocka memset(&job->source, 0, sizeof job->source); 8287f069653SMikulas Patocka job->source.count = job->dests[0].count; 8297f069653SMikulas Patocka job->pages = &zero_page_list; 83070d6c400SMike Snitzer 83170d6c400SMike Snitzer /* 832615ec946SChristoph Hellwig * Use WRITE ZEROES to optimize zeroing if all dests support it. 83370d6c400SMike Snitzer */ 83471f7113dSBart Van Assche job->op = REQ_OP_WRITE_ZEROES; 83570d6c400SMike Snitzer for (i = 0; i < job->num_dests; i++) 836615ec946SChristoph Hellwig if (!bdev_write_zeroes_sectors(job->dests[i].bdev)) { 83771f7113dSBart Van Assche job->op = REQ_OP_WRITE; 83870d6c400SMike Snitzer break; 83970d6c400SMike Snitzer } 8407f069653SMikulas Patocka } 8412d1e580aSAlasdair G Kergon 8422d1e580aSAlasdair G Kergon job->fn = fn; 8432d1e580aSAlasdair G Kergon job->context = context; 844c6ea41fbSMikulas Patocka job->master_job = job; 845b73c67c2SDamien Le Moal job->write_offset = 0; 8462d1e580aSAlasdair G Kergon 847c663e040SNikos Tsironis if (job->source.count <= kc->sub_job_size) 8482d1e580aSAlasdair G Kergon dispatch_job(job); 8492d1e580aSAlasdair G Kergon else { 8502d1e580aSAlasdair G Kergon job->progress = 0; 8512d1e580aSAlasdair G Kergon split_job(job); 8522d1e580aSAlasdair G Kergon } 8532d1e580aSAlasdair G Kergon } 8542d1e580aSAlasdair G Kergon EXPORT_SYMBOL(dm_kcopyd_copy); 8552d1e580aSAlasdair G Kergon 8567209049dSMike Snitzer void dm_kcopyd_zero(struct dm_kcopyd_client *kc, 85786a3238cSHeinz Mauelshagen unsigned int num_dests, struct dm_io_region *dests, 85886a3238cSHeinz Mauelshagen unsigned int flags, dm_kcopyd_notify_fn fn, void *context) 8597f069653SMikulas Patocka { 8607209049dSMike Snitzer dm_kcopyd_copy(kc, NULL, num_dests, dests, flags, fn, context); 8617f069653SMikulas Patocka } 8627f069653SMikulas Patocka EXPORT_SYMBOL(dm_kcopyd_zero); 8637f069653SMikulas Patocka 864a6e50b40SMikulas Patocka void *dm_kcopyd_prepare_callback(struct dm_kcopyd_client *kc, 865a6e50b40SMikulas Patocka dm_kcopyd_notify_fn fn, void *context) 866a6e50b40SMikulas Patocka { 867a6e50b40SMikulas Patocka struct kcopyd_job *job; 868a6e50b40SMikulas Patocka 8696f1c819cSKent Overstreet job = mempool_alloc(&kc->job_pool, GFP_NOIO); 870a6e50b40SMikulas Patocka 871a6e50b40SMikulas Patocka memset(job, 0, sizeof(struct kcopyd_job)); 872a6e50b40SMikulas Patocka job->kc = kc; 873a6e50b40SMikulas Patocka job->fn = fn; 874a6e50b40SMikulas Patocka job->context = context; 875d136f2efSAlasdair G Kergon job->master_job = job; 876a6e50b40SMikulas Patocka 877a6e50b40SMikulas Patocka atomic_inc(&kc->nr_jobs); 878a6e50b40SMikulas Patocka 879a6e50b40SMikulas Patocka return job; 880a6e50b40SMikulas Patocka } 881a6e50b40SMikulas Patocka EXPORT_SYMBOL(dm_kcopyd_prepare_callback); 882a6e50b40SMikulas Patocka 883a6e50b40SMikulas Patocka void dm_kcopyd_do_callback(void *j, int read_err, unsigned long write_err) 884a6e50b40SMikulas Patocka { 885a6e50b40SMikulas Patocka struct kcopyd_job *job = j; 886a6e50b40SMikulas Patocka struct dm_kcopyd_client *kc = job->kc; 887a6e50b40SMikulas Patocka 888a6e50b40SMikulas Patocka job->read_err = read_err; 889a6e50b40SMikulas Patocka job->write_err = write_err; 890a6e50b40SMikulas Patocka 891d7e6b8dfSNikos Tsironis push(&kc->callback_jobs, job); 892a6e50b40SMikulas Patocka wake(kc); 893a6e50b40SMikulas Patocka } 894a6e50b40SMikulas Patocka EXPORT_SYMBOL(dm_kcopyd_do_callback); 895a6e50b40SMikulas Patocka 8962d1e580aSAlasdair G Kergon /* 8972d1e580aSAlasdair G Kergon * Cancels a kcopyd job, eg. someone might be deactivating a 8982d1e580aSAlasdair G Kergon * mirror. 8992d1e580aSAlasdair G Kergon */ 9002d1e580aSAlasdair G Kergon #if 0 9012d1e580aSAlasdair G Kergon int kcopyd_cancel(struct kcopyd_job *job, int block) 9022d1e580aSAlasdair G Kergon { 9032d1e580aSAlasdair G Kergon /* FIXME: finish */ 9042d1e580aSAlasdair G Kergon return -1; 9052d1e580aSAlasdair G Kergon } 9062d1e580aSAlasdair G Kergon #endif /* 0 */ 9072d1e580aSAlasdair G Kergon 908*a4a82ce3SHeinz Mauelshagen /* 909*a4a82ce3SHeinz Mauelshagen *--------------------------------------------------------------- 9102d1e580aSAlasdair G Kergon * Client setup 911*a4a82ce3SHeinz Mauelshagen *--------------------------------------------------------------- 912*a4a82ce3SHeinz Mauelshagen */ 913df5d2e90SMikulas Patocka struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *throttle) 9142d1e580aSAlasdair G Kergon { 9156f1c819cSKent Overstreet int r; 91686a3238cSHeinz Mauelshagen unsigned int reserve_pages; 9172d1e580aSAlasdair G Kergon struct dm_kcopyd_client *kc; 9182d1e580aSAlasdair G Kergon 919d3775354SKent Overstreet kc = kzalloc(sizeof(*kc), GFP_KERNEL); 9202d1e580aSAlasdair G Kergon if (!kc) 921fa34ce73SMikulas Patocka return ERR_PTR(-ENOMEM); 9222d1e580aSAlasdair G Kergon 9232d1e580aSAlasdair G Kergon spin_lock_init(&kc->job_lock); 924d7e6b8dfSNikos Tsironis INIT_LIST_HEAD(&kc->callback_jobs); 9252d1e580aSAlasdair G Kergon INIT_LIST_HEAD(&kc->complete_jobs); 9262d1e580aSAlasdair G Kergon INIT_LIST_HEAD(&kc->io_jobs); 9272d1e580aSAlasdair G Kergon INIT_LIST_HEAD(&kc->pages_jobs); 928df5d2e90SMikulas Patocka kc->throttle = throttle; 9292d1e580aSAlasdair G Kergon 9306f1c819cSKent Overstreet r = mempool_init_slab_pool(&kc->job_pool, MIN_JOBS, _job_cache); 9316f1c819cSKent Overstreet if (r) 9322d1e580aSAlasdair G Kergon goto bad_slab; 9332d1e580aSAlasdair G Kergon 9342d1e580aSAlasdair G Kergon INIT_WORK(&kc->kcopyd_work, do_work); 935670368a8STejun Heo kc->kcopyd_wq = alloc_workqueue("kcopyd", WQ_MEM_RECLAIM, 0); 9366f1c819cSKent Overstreet if (!kc->kcopyd_wq) { 9376f1c819cSKent Overstreet r = -ENOMEM; 9382d1e580aSAlasdair G Kergon goto bad_workqueue; 9396f1c819cSKent Overstreet } 9402d1e580aSAlasdair G Kergon 941c663e040SNikos Tsironis kc->sub_job_size = dm_get_kcopyd_subjob_size(); 942c663e040SNikos Tsironis reserve_pages = DIV_ROUND_UP(kc->sub_job_size << SECTOR_SHIFT, PAGE_SIZE); 943c663e040SNikos Tsironis 9442d1e580aSAlasdair G Kergon kc->pages = NULL; 945d0471458SMikulas Patocka kc->nr_reserved_pages = kc->nr_free_pages = 0; 946c663e040SNikos Tsironis r = client_reserve_pages(kc, reserve_pages); 9472d1e580aSAlasdair G Kergon if (r) 9482d1e580aSAlasdair G Kergon goto bad_client_pages; 9492d1e580aSAlasdair G Kergon 950bda8efecSMikulas Patocka kc->io_client = dm_io_client_create(); 9512d1e580aSAlasdair G Kergon if (IS_ERR(kc->io_client)) { 9522d1e580aSAlasdair G Kergon r = PTR_ERR(kc->io_client); 9532d1e580aSAlasdair G Kergon goto bad_io_client; 9542d1e580aSAlasdair G Kergon } 9552d1e580aSAlasdair G Kergon 9562d1e580aSAlasdair G Kergon init_waitqueue_head(&kc->destroyq); 9572d1e580aSAlasdair G Kergon atomic_set(&kc->nr_jobs, 0); 9582d1e580aSAlasdair G Kergon 959fa34ce73SMikulas Patocka return kc; 9602d1e580aSAlasdair G Kergon 9612d1e580aSAlasdair G Kergon bad_io_client: 9622d1e580aSAlasdair G Kergon client_free_pages(kc); 9632d1e580aSAlasdair G Kergon bad_client_pages: 9642d1e580aSAlasdair G Kergon destroy_workqueue(kc->kcopyd_wq); 9652d1e580aSAlasdair G Kergon bad_workqueue: 9666f1c819cSKent Overstreet mempool_exit(&kc->job_pool); 9672d1e580aSAlasdair G Kergon bad_slab: 9682d1e580aSAlasdair G Kergon kfree(kc); 9692d1e580aSAlasdair G Kergon 970fa34ce73SMikulas Patocka return ERR_PTR(r); 9712d1e580aSAlasdair G Kergon } 9722d1e580aSAlasdair G Kergon EXPORT_SYMBOL(dm_kcopyd_client_create); 9732d1e580aSAlasdair G Kergon 9742d1e580aSAlasdair G Kergon void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc) 9752d1e580aSAlasdair G Kergon { 9762d1e580aSAlasdair G Kergon /* Wait for completion of all jobs submitted by this client. */ 9772d1e580aSAlasdair G Kergon wait_event(kc->destroyq, !atomic_read(&kc->nr_jobs)); 9782d1e580aSAlasdair G Kergon 979d7e6b8dfSNikos Tsironis BUG_ON(!list_empty(&kc->callback_jobs)); 9802d1e580aSAlasdair G Kergon BUG_ON(!list_empty(&kc->complete_jobs)); 9812d1e580aSAlasdair G Kergon BUG_ON(!list_empty(&kc->io_jobs)); 9822d1e580aSAlasdair G Kergon BUG_ON(!list_empty(&kc->pages_jobs)); 9832d1e580aSAlasdair G Kergon destroy_workqueue(kc->kcopyd_wq); 9842d1e580aSAlasdair G Kergon dm_io_client_destroy(kc->io_client); 9852d1e580aSAlasdair G Kergon client_free_pages(kc); 9866f1c819cSKent Overstreet mempool_exit(&kc->job_pool); 9872d1e580aSAlasdair G Kergon kfree(kc); 9882d1e580aSAlasdair G Kergon } 9892d1e580aSAlasdair G Kergon EXPORT_SYMBOL(dm_kcopyd_client_destroy); 990293128b1SMikulas Patocka 991293128b1SMikulas Patocka void dm_kcopyd_client_flush(struct dm_kcopyd_client *kc) 992293128b1SMikulas Patocka { 993293128b1SMikulas Patocka flush_workqueue(kc->kcopyd_wq); 994293128b1SMikulas Patocka } 995293128b1SMikulas Patocka EXPORT_SYMBOL(dm_kcopyd_client_flush); 996