12d1e580aSAlasdair G Kergon /* 22d1e580aSAlasdair G Kergon * Copyright (C) 2002 Sistina Software (UK) Limited. 32d1e580aSAlasdair G Kergon * Copyright (C) 2006 Red Hat GmbH 42d1e580aSAlasdair G Kergon * 52d1e580aSAlasdair G Kergon * This file is released under the GPL. 62d1e580aSAlasdair G Kergon * 72d1e580aSAlasdair G Kergon * Kcopyd provides a simple interface for copying an area of one 82d1e580aSAlasdair G Kergon * block-device to one or more other block-devices, with an asynchronous 92d1e580aSAlasdair G Kergon * completion notification. 102d1e580aSAlasdair G Kergon */ 112d1e580aSAlasdair G Kergon 122d1e580aSAlasdair G Kergon #include <linux/types.h> 1360063497SArun Sharma #include <linux/atomic.h> 142d1e580aSAlasdair G Kergon #include <linux/blkdev.h> 152d1e580aSAlasdair G Kergon #include <linux/fs.h> 162d1e580aSAlasdair G Kergon #include <linux/init.h> 172d1e580aSAlasdair G Kergon #include <linux/list.h> 182d1e580aSAlasdair G Kergon #include <linux/mempool.h> 192d1e580aSAlasdair G Kergon #include <linux/module.h> 202d1e580aSAlasdair G Kergon #include <linux/pagemap.h> 212d1e580aSAlasdair G Kergon #include <linux/slab.h> 222d1e580aSAlasdair G Kergon #include <linux/vmalloc.h> 232d1e580aSAlasdair G Kergon #include <linux/workqueue.h> 242d1e580aSAlasdair G Kergon #include <linux/mutex.h> 25df5d2e90SMikulas Patocka #include <linux/delay.h> 26586e80e6SMikulas Patocka #include <linux/device-mapper.h> 27a765e20eSAlasdair G Kergon #include <linux/dm-kcopyd.h> 282d1e580aSAlasdair G Kergon 294cc96131SMike Snitzer #include "dm-core.h" 302d1e580aSAlasdair G Kergon 31c6ea41fbSMikulas Patocka #define SPLIT_COUNT 8 32c6ea41fbSMikulas Patocka #define MIN_JOBS 8 33c663e040SNikos Tsironis 34c663e040SNikos Tsironis #define DEFAULT_SUB_JOB_SIZE_KB 512 35c663e040SNikos Tsironis #define MAX_SUB_JOB_SIZE_KB 1024 36c663e040SNikos Tsironis 37c663e040SNikos Tsironis static unsigned kcopyd_subjob_size_kb = DEFAULT_SUB_JOB_SIZE_KB; 38c663e040SNikos Tsironis 39c663e040SNikos Tsironis module_param(kcopyd_subjob_size_kb, uint, S_IRUGO | S_IWUSR); 40c663e040SNikos Tsironis MODULE_PARM_DESC(kcopyd_subjob_size_kb, "Sub-job size for dm-kcopyd clients"); 41c663e040SNikos Tsironis 42c663e040SNikos Tsironis static unsigned dm_get_kcopyd_subjob_size(void) 43c663e040SNikos Tsironis { 44c663e040SNikos Tsironis unsigned sub_job_size_kb; 45c663e040SNikos Tsironis 46c663e040SNikos Tsironis sub_job_size_kb = __dm_get_module_param(&kcopyd_subjob_size_kb, 47c663e040SNikos Tsironis DEFAULT_SUB_JOB_SIZE_KB, 48c663e040SNikos Tsironis MAX_SUB_JOB_SIZE_KB); 49c663e040SNikos Tsironis 50c663e040SNikos Tsironis return sub_job_size_kb << 1; 51c663e040SNikos Tsironis } 52c6ea41fbSMikulas Patocka 532d1e580aSAlasdair G Kergon /*----------------------------------------------------------------- 542d1e580aSAlasdair G Kergon * Each kcopyd client has its own little pool of preallocated 552d1e580aSAlasdair G Kergon * pages for kcopyd io. 562d1e580aSAlasdair G Kergon *---------------------------------------------------------------*/ 572d1e580aSAlasdair G Kergon struct dm_kcopyd_client { 582d1e580aSAlasdair G Kergon struct page_list *pages; 59d0471458SMikulas Patocka unsigned nr_reserved_pages; 60d0471458SMikulas Patocka unsigned nr_free_pages; 61c663e040SNikos Tsironis unsigned sub_job_size; 622d1e580aSAlasdair G Kergon 632d1e580aSAlasdair G Kergon struct dm_io_client *io_client; 642d1e580aSAlasdair G Kergon 652d1e580aSAlasdair G Kergon wait_queue_head_t destroyq; 662d1e580aSAlasdair G Kergon 676f1c819cSKent Overstreet mempool_t job_pool; 682d1e580aSAlasdair G Kergon 692d1e580aSAlasdair G Kergon struct workqueue_struct *kcopyd_wq; 702d1e580aSAlasdair G Kergon struct work_struct kcopyd_work; 712d1e580aSAlasdair G Kergon 72df5d2e90SMikulas Patocka struct dm_kcopyd_throttle *throttle; 73df5d2e90SMikulas Patocka 7472d711c8SMike Snitzer atomic_t nr_jobs; 7572d711c8SMike Snitzer 762d1e580aSAlasdair G Kergon /* 77d7e6b8dfSNikos Tsironis * We maintain four lists of jobs: 782d1e580aSAlasdair G Kergon * 792d1e580aSAlasdair G Kergon * i) jobs waiting for pages 802d1e580aSAlasdair G Kergon * ii) jobs that have pages, and are waiting for the io to be issued. 81d7e6b8dfSNikos Tsironis * iii) jobs that don't need to do any IO and just run a callback 82d7e6b8dfSNikos Tsironis * iv) jobs that have completed. 832d1e580aSAlasdair G Kergon * 84d7e6b8dfSNikos Tsironis * All four of these are protected by job_lock. 852d1e580aSAlasdair G Kergon */ 862d1e580aSAlasdair G Kergon spinlock_t job_lock; 87d7e6b8dfSNikos Tsironis struct list_head callback_jobs; 882d1e580aSAlasdair G Kergon struct list_head complete_jobs; 892d1e580aSAlasdair G Kergon struct list_head io_jobs; 902d1e580aSAlasdair G Kergon struct list_head pages_jobs; 912d1e580aSAlasdair G Kergon }; 922d1e580aSAlasdair G Kergon 937f069653SMikulas Patocka static struct page_list zero_page_list; 947f069653SMikulas Patocka 95df5d2e90SMikulas Patocka static DEFINE_SPINLOCK(throttle_spinlock); 96df5d2e90SMikulas Patocka 97df5d2e90SMikulas Patocka /* 98df5d2e90SMikulas Patocka * IO/IDLE accounting slowly decays after (1 << ACCOUNT_INTERVAL_SHIFT) period. 99df5d2e90SMikulas Patocka * When total_period >= (1 << ACCOUNT_INTERVAL_SHIFT) the counters are divided 100df5d2e90SMikulas Patocka * by 2. 101df5d2e90SMikulas Patocka */ 102df5d2e90SMikulas Patocka #define ACCOUNT_INTERVAL_SHIFT SHIFT_HZ 103df5d2e90SMikulas Patocka 104df5d2e90SMikulas Patocka /* 105df5d2e90SMikulas Patocka * Sleep this number of milliseconds. 106df5d2e90SMikulas Patocka * 107df5d2e90SMikulas Patocka * The value was decided experimentally. 108df5d2e90SMikulas Patocka * Smaller values seem to cause an increased copy rate above the limit. 109df5d2e90SMikulas Patocka * The reason for this is unknown but possibly due to jiffies rounding errors 110df5d2e90SMikulas Patocka * or read/write cache inside the disk. 111df5d2e90SMikulas Patocka */ 112df5d2e90SMikulas Patocka #define SLEEP_MSEC 100 113df5d2e90SMikulas Patocka 114df5d2e90SMikulas Patocka /* 115df5d2e90SMikulas Patocka * Maximum number of sleep events. There is a theoretical livelock if more 116df5d2e90SMikulas Patocka * kcopyd clients do work simultaneously which this limit avoids. 117df5d2e90SMikulas Patocka */ 118df5d2e90SMikulas Patocka #define MAX_SLEEPS 10 119df5d2e90SMikulas Patocka 120df5d2e90SMikulas Patocka static void io_job_start(struct dm_kcopyd_throttle *t) 121df5d2e90SMikulas Patocka { 122df5d2e90SMikulas Patocka unsigned throttle, now, difference; 123df5d2e90SMikulas Patocka int slept = 0, skew; 124df5d2e90SMikulas Patocka 125df5d2e90SMikulas Patocka if (unlikely(!t)) 126df5d2e90SMikulas Patocka return; 127df5d2e90SMikulas Patocka 128df5d2e90SMikulas Patocka try_again: 129df5d2e90SMikulas Patocka spin_lock_irq(&throttle_spinlock); 130df5d2e90SMikulas Patocka 1316aa7de05SMark Rutland throttle = READ_ONCE(t->throttle); 132df5d2e90SMikulas Patocka 133df5d2e90SMikulas Patocka if (likely(throttle >= 100)) 134df5d2e90SMikulas Patocka goto skip_limit; 135df5d2e90SMikulas Patocka 136df5d2e90SMikulas Patocka now = jiffies; 137df5d2e90SMikulas Patocka difference = now - t->last_jiffies; 138df5d2e90SMikulas Patocka t->last_jiffies = now; 139df5d2e90SMikulas Patocka if (t->num_io_jobs) 140df5d2e90SMikulas Patocka t->io_period += difference; 141df5d2e90SMikulas Patocka t->total_period += difference; 142df5d2e90SMikulas Patocka 143df5d2e90SMikulas Patocka /* 144df5d2e90SMikulas Patocka * Maintain sane values if we got a temporary overflow. 145df5d2e90SMikulas Patocka */ 146df5d2e90SMikulas Patocka if (unlikely(t->io_period > t->total_period)) 147df5d2e90SMikulas Patocka t->io_period = t->total_period; 148df5d2e90SMikulas Patocka 149df5d2e90SMikulas Patocka if (unlikely(t->total_period >= (1 << ACCOUNT_INTERVAL_SHIFT))) { 150df5d2e90SMikulas Patocka int shift = fls(t->total_period >> ACCOUNT_INTERVAL_SHIFT); 151df5d2e90SMikulas Patocka t->total_period >>= shift; 152df5d2e90SMikulas Patocka t->io_period >>= shift; 153df5d2e90SMikulas Patocka } 154df5d2e90SMikulas Patocka 155df5d2e90SMikulas Patocka skew = t->io_period - throttle * t->total_period / 100; 156df5d2e90SMikulas Patocka 157df5d2e90SMikulas Patocka if (unlikely(skew > 0) && slept < MAX_SLEEPS) { 158df5d2e90SMikulas Patocka slept++; 159df5d2e90SMikulas Patocka spin_unlock_irq(&throttle_spinlock); 160df5d2e90SMikulas Patocka msleep(SLEEP_MSEC); 161df5d2e90SMikulas Patocka goto try_again; 162df5d2e90SMikulas Patocka } 163df5d2e90SMikulas Patocka 164df5d2e90SMikulas Patocka skip_limit: 165df5d2e90SMikulas Patocka t->num_io_jobs++; 166df5d2e90SMikulas Patocka 167df5d2e90SMikulas Patocka spin_unlock_irq(&throttle_spinlock); 168df5d2e90SMikulas Patocka } 169df5d2e90SMikulas Patocka 170df5d2e90SMikulas Patocka static void io_job_finish(struct dm_kcopyd_throttle *t) 171df5d2e90SMikulas Patocka { 172df5d2e90SMikulas Patocka unsigned long flags; 173df5d2e90SMikulas Patocka 174df5d2e90SMikulas Patocka if (unlikely(!t)) 175df5d2e90SMikulas Patocka return; 176df5d2e90SMikulas Patocka 177df5d2e90SMikulas Patocka spin_lock_irqsave(&throttle_spinlock, flags); 178df5d2e90SMikulas Patocka 179df5d2e90SMikulas Patocka t->num_io_jobs--; 180df5d2e90SMikulas Patocka 1816aa7de05SMark Rutland if (likely(READ_ONCE(t->throttle) >= 100)) 182df5d2e90SMikulas Patocka goto skip_limit; 183df5d2e90SMikulas Patocka 184df5d2e90SMikulas Patocka if (!t->num_io_jobs) { 185df5d2e90SMikulas Patocka unsigned now, difference; 186df5d2e90SMikulas Patocka 187df5d2e90SMikulas Patocka now = jiffies; 188df5d2e90SMikulas Patocka difference = now - t->last_jiffies; 189df5d2e90SMikulas Patocka t->last_jiffies = now; 190df5d2e90SMikulas Patocka 191df5d2e90SMikulas Patocka t->io_period += difference; 192df5d2e90SMikulas Patocka t->total_period += difference; 193df5d2e90SMikulas Patocka 194df5d2e90SMikulas Patocka /* 195df5d2e90SMikulas Patocka * Maintain sane values if we got a temporary overflow. 196df5d2e90SMikulas Patocka */ 197df5d2e90SMikulas Patocka if (unlikely(t->io_period > t->total_period)) 198df5d2e90SMikulas Patocka t->io_period = t->total_period; 199df5d2e90SMikulas Patocka } 200df5d2e90SMikulas Patocka 201df5d2e90SMikulas Patocka skip_limit: 202df5d2e90SMikulas Patocka spin_unlock_irqrestore(&throttle_spinlock, flags); 203df5d2e90SMikulas Patocka } 204df5d2e90SMikulas Patocka 205df5d2e90SMikulas Patocka 2062d1e580aSAlasdair G Kergon static void wake(struct dm_kcopyd_client *kc) 2072d1e580aSAlasdair G Kergon { 2082d1e580aSAlasdair G Kergon queue_work(kc->kcopyd_wq, &kc->kcopyd_work); 2092d1e580aSAlasdair G Kergon } 2102d1e580aSAlasdair G Kergon 211d0471458SMikulas Patocka /* 212d0471458SMikulas Patocka * Obtain one page for the use of kcopyd. 213d0471458SMikulas Patocka */ 214f99b55eeSMikulas Patocka static struct page_list *alloc_pl(gfp_t gfp) 2152d1e580aSAlasdair G Kergon { 2162d1e580aSAlasdair G Kergon struct page_list *pl; 2172d1e580aSAlasdair G Kergon 218f99b55eeSMikulas Patocka pl = kmalloc(sizeof(*pl), gfp); 2192d1e580aSAlasdair G Kergon if (!pl) 2202d1e580aSAlasdair G Kergon return NULL; 2212d1e580aSAlasdair G Kergon 222f99b55eeSMikulas Patocka pl->page = alloc_page(gfp); 2232d1e580aSAlasdair G Kergon if (!pl->page) { 2242d1e580aSAlasdair G Kergon kfree(pl); 2252d1e580aSAlasdair G Kergon return NULL; 2262d1e580aSAlasdair G Kergon } 2272d1e580aSAlasdair G Kergon 2282d1e580aSAlasdair G Kergon return pl; 2292d1e580aSAlasdair G Kergon } 2302d1e580aSAlasdair G Kergon 2312d1e580aSAlasdair G Kergon static void free_pl(struct page_list *pl) 2322d1e580aSAlasdair G Kergon { 2332d1e580aSAlasdair G Kergon __free_page(pl->page); 2342d1e580aSAlasdair G Kergon kfree(pl); 2352d1e580aSAlasdair G Kergon } 2362d1e580aSAlasdair G Kergon 237d0471458SMikulas Patocka /* 238d0471458SMikulas Patocka * Add the provided pages to a client's free page list, releasing 239d0471458SMikulas Patocka * back to the system any beyond the reserved_pages limit. 240d0471458SMikulas Patocka */ 241d0471458SMikulas Patocka static void kcopyd_put_pages(struct dm_kcopyd_client *kc, struct page_list *pl) 242d0471458SMikulas Patocka { 243d0471458SMikulas Patocka struct page_list *next; 244d0471458SMikulas Patocka 245d0471458SMikulas Patocka do { 246d0471458SMikulas Patocka next = pl->next; 247d0471458SMikulas Patocka 248d0471458SMikulas Patocka if (kc->nr_free_pages >= kc->nr_reserved_pages) 249d0471458SMikulas Patocka free_pl(pl); 250d0471458SMikulas Patocka else { 251d0471458SMikulas Patocka pl->next = kc->pages; 252d0471458SMikulas Patocka kc->pages = pl; 253d0471458SMikulas Patocka kc->nr_free_pages++; 254d0471458SMikulas Patocka } 255d0471458SMikulas Patocka 256d0471458SMikulas Patocka pl = next; 257d0471458SMikulas Patocka } while (pl); 258d0471458SMikulas Patocka } 259d0471458SMikulas Patocka 2602d1e580aSAlasdair G Kergon static int kcopyd_get_pages(struct dm_kcopyd_client *kc, 2612d1e580aSAlasdair G Kergon unsigned int nr, struct page_list **pages) 2622d1e580aSAlasdair G Kergon { 2632d1e580aSAlasdair G Kergon struct page_list *pl; 2642d1e580aSAlasdair G Kergon 265d0471458SMikulas Patocka *pages = NULL; 2662d1e580aSAlasdair G Kergon 267d0471458SMikulas Patocka do { 268d0164adcSMel Gorman pl = alloc_pl(__GFP_NOWARN | __GFP_NORETRY | __GFP_KSWAPD_RECLAIM); 269d0471458SMikulas Patocka if (unlikely(!pl)) { 270d0471458SMikulas Patocka /* Use reserved pages */ 271d0471458SMikulas Patocka pl = kc->pages; 272d0471458SMikulas Patocka if (unlikely(!pl)) 273d0471458SMikulas Patocka goto out_of_memory; 2742d1e580aSAlasdair G Kergon kc->pages = pl->next; 275d0471458SMikulas Patocka kc->nr_free_pages--; 276d0471458SMikulas Patocka } 277d0471458SMikulas Patocka pl->next = *pages; 278d0471458SMikulas Patocka *pages = pl; 279d0471458SMikulas Patocka } while (--nr); 2802d1e580aSAlasdair G Kergon 2812d1e580aSAlasdair G Kergon return 0; 2822d1e580aSAlasdair G Kergon 283d0471458SMikulas Patocka out_of_memory: 284d0471458SMikulas Patocka if (*pages) 285d0471458SMikulas Patocka kcopyd_put_pages(kc, *pages); 286d0471458SMikulas Patocka return -ENOMEM; 2872d1e580aSAlasdair G Kergon } 2882d1e580aSAlasdair G Kergon 2892d1e580aSAlasdair G Kergon /* 2902d1e580aSAlasdair G Kergon * These three functions resize the page pool. 2912d1e580aSAlasdair G Kergon */ 2922d1e580aSAlasdair G Kergon static void drop_pages(struct page_list *pl) 2932d1e580aSAlasdair G Kergon { 2942d1e580aSAlasdair G Kergon struct page_list *next; 2952d1e580aSAlasdair G Kergon 2962d1e580aSAlasdair G Kergon while (pl) { 2972d1e580aSAlasdair G Kergon next = pl->next; 2982d1e580aSAlasdair G Kergon free_pl(pl); 2992d1e580aSAlasdair G Kergon pl = next; 3002d1e580aSAlasdair G Kergon } 3012d1e580aSAlasdair G Kergon } 3022d1e580aSAlasdair G Kergon 303d0471458SMikulas Patocka /* 304d0471458SMikulas Patocka * Allocate and reserve nr_pages for the use of a specific client. 305d0471458SMikulas Patocka */ 306d0471458SMikulas Patocka static int client_reserve_pages(struct dm_kcopyd_client *kc, unsigned nr_pages) 3072d1e580aSAlasdair G Kergon { 308d0471458SMikulas Patocka unsigned i; 3092d1e580aSAlasdair G Kergon struct page_list *pl = NULL, *next; 3102d1e580aSAlasdair G Kergon 311d0471458SMikulas Patocka for (i = 0; i < nr_pages; i++) { 312f99b55eeSMikulas Patocka next = alloc_pl(GFP_KERNEL); 3132d1e580aSAlasdair G Kergon if (!next) { 3142d1e580aSAlasdair G Kergon if (pl) 3152d1e580aSAlasdair G Kergon drop_pages(pl); 3162d1e580aSAlasdair G Kergon return -ENOMEM; 3172d1e580aSAlasdair G Kergon } 3182d1e580aSAlasdair G Kergon next->next = pl; 3192d1e580aSAlasdair G Kergon pl = next; 3202d1e580aSAlasdair G Kergon } 3212d1e580aSAlasdair G Kergon 322d0471458SMikulas Patocka kc->nr_reserved_pages += nr_pages; 3232d1e580aSAlasdair G Kergon kcopyd_put_pages(kc, pl); 324d0471458SMikulas Patocka 3252d1e580aSAlasdair G Kergon return 0; 3262d1e580aSAlasdair G Kergon } 3272d1e580aSAlasdair G Kergon 3282d1e580aSAlasdair G Kergon static void client_free_pages(struct dm_kcopyd_client *kc) 3292d1e580aSAlasdair G Kergon { 330d0471458SMikulas Patocka BUG_ON(kc->nr_free_pages != kc->nr_reserved_pages); 3312d1e580aSAlasdair G Kergon drop_pages(kc->pages); 3322d1e580aSAlasdair G Kergon kc->pages = NULL; 333d0471458SMikulas Patocka kc->nr_free_pages = kc->nr_reserved_pages = 0; 3342d1e580aSAlasdair G Kergon } 3352d1e580aSAlasdair G Kergon 3362d1e580aSAlasdair G Kergon /*----------------------------------------------------------------- 3372d1e580aSAlasdair G Kergon * kcopyd_jobs need to be allocated by the *clients* of kcopyd, 3382d1e580aSAlasdair G Kergon * for this reason we use a mempool to prevent the client from 3392d1e580aSAlasdair G Kergon * ever having to do io (which could cause a deadlock). 3402d1e580aSAlasdair G Kergon *---------------------------------------------------------------*/ 3412d1e580aSAlasdair G Kergon struct kcopyd_job { 3422d1e580aSAlasdair G Kergon struct dm_kcopyd_client *kc; 3432d1e580aSAlasdair G Kergon struct list_head list; 344db2351ebSMikulas Patocka unsigned flags; 3452d1e580aSAlasdair G Kergon 3462d1e580aSAlasdair G Kergon /* 3472d1e580aSAlasdair G Kergon * Error state of the job. 3482d1e580aSAlasdair G Kergon */ 3492d1e580aSAlasdair G Kergon int read_err; 3502d1e580aSAlasdair G Kergon unsigned long write_err; 3512d1e580aSAlasdair G Kergon 3522d1e580aSAlasdair G Kergon /* 3532d1e580aSAlasdair G Kergon * Either READ or WRITE 3542d1e580aSAlasdair G Kergon */ 3552d1e580aSAlasdair G Kergon int rw; 3562d1e580aSAlasdair G Kergon struct dm_io_region source; 3572d1e580aSAlasdair G Kergon 3582d1e580aSAlasdair G Kergon /* 3592d1e580aSAlasdair G Kergon * The destinations for the transfer. 3602d1e580aSAlasdair G Kergon */ 3612d1e580aSAlasdair G Kergon unsigned int num_dests; 3622d1e580aSAlasdair G Kergon struct dm_io_region dests[DM_KCOPYD_MAX_REGIONS]; 3632d1e580aSAlasdair G Kergon 3642d1e580aSAlasdair G Kergon struct page_list *pages; 3652d1e580aSAlasdair G Kergon 3662d1e580aSAlasdair G Kergon /* 3672d1e580aSAlasdair G Kergon * Set this to ensure you are notified when the job has 3682d1e580aSAlasdair G Kergon * completed. 'context' is for callback to use. 3692d1e580aSAlasdair G Kergon */ 3702d1e580aSAlasdair G Kergon dm_kcopyd_notify_fn fn; 3712d1e580aSAlasdair G Kergon void *context; 3722d1e580aSAlasdair G Kergon 3732d1e580aSAlasdair G Kergon /* 3742d1e580aSAlasdair G Kergon * These fields are only used if the job has been split 3752d1e580aSAlasdair G Kergon * into more manageable parts. 3762d1e580aSAlasdair G Kergon */ 3772d1e580aSAlasdair G Kergon struct mutex lock; 3782d1e580aSAlasdair G Kergon atomic_t sub_jobs; 3792d1e580aSAlasdair G Kergon sector_t progress; 380b73c67c2SDamien Le Moal sector_t write_offset; 3812d1e580aSAlasdair G Kergon 382c6ea41fbSMikulas Patocka struct kcopyd_job *master_job; 383c6ea41fbSMikulas Patocka }; 3842d1e580aSAlasdair G Kergon 3852d1e580aSAlasdair G Kergon static struct kmem_cache *_job_cache; 3862d1e580aSAlasdair G Kergon 3872d1e580aSAlasdair G Kergon int __init dm_kcopyd_init(void) 3882d1e580aSAlasdair G Kergon { 389c6ea41fbSMikulas Patocka _job_cache = kmem_cache_create("kcopyd_job", 390c6ea41fbSMikulas Patocka sizeof(struct kcopyd_job) * (SPLIT_COUNT + 1), 391c6ea41fbSMikulas Patocka __alignof__(struct kcopyd_job), 0, NULL); 3922d1e580aSAlasdair G Kergon if (!_job_cache) 3932d1e580aSAlasdair G Kergon return -ENOMEM; 3942d1e580aSAlasdair G Kergon 3957f069653SMikulas Patocka zero_page_list.next = &zero_page_list; 3967f069653SMikulas Patocka zero_page_list.page = ZERO_PAGE(0); 3977f069653SMikulas Patocka 3982d1e580aSAlasdair G Kergon return 0; 3992d1e580aSAlasdair G Kergon } 4002d1e580aSAlasdair G Kergon 4012d1e580aSAlasdair G Kergon void dm_kcopyd_exit(void) 4022d1e580aSAlasdair G Kergon { 4032d1e580aSAlasdair G Kergon kmem_cache_destroy(_job_cache); 4042d1e580aSAlasdair G Kergon _job_cache = NULL; 4052d1e580aSAlasdair G Kergon } 4062d1e580aSAlasdair G Kergon 4072d1e580aSAlasdair G Kergon /* 4082d1e580aSAlasdair G Kergon * Functions to push and pop a job onto the head of a given job 4092d1e580aSAlasdair G Kergon * list. 4102d1e580aSAlasdair G Kergon */ 411b73c67c2SDamien Le Moal static struct kcopyd_job *pop_io_job(struct list_head *jobs, 412b73c67c2SDamien Le Moal struct dm_kcopyd_client *kc) 413b73c67c2SDamien Le Moal { 414b73c67c2SDamien Le Moal struct kcopyd_job *job; 415b73c67c2SDamien Le Moal 416b73c67c2SDamien Le Moal /* 417b73c67c2SDamien Le Moal * For I/O jobs, pop any read, any write without sequential write 418b73c67c2SDamien Le Moal * constraint and sequential writes that are at the right position. 419b73c67c2SDamien Le Moal */ 420b73c67c2SDamien Le Moal list_for_each_entry(job, jobs, list) { 421db2351ebSMikulas Patocka if (job->rw == READ || !(job->flags & BIT(DM_KCOPYD_WRITE_SEQ))) { 422b73c67c2SDamien Le Moal list_del(&job->list); 423b73c67c2SDamien Le Moal return job; 424b73c67c2SDamien Le Moal } 425b73c67c2SDamien Le Moal 426b73c67c2SDamien Le Moal if (job->write_offset == job->master_job->write_offset) { 427b73c67c2SDamien Le Moal job->master_job->write_offset += job->source.count; 428b73c67c2SDamien Le Moal list_del(&job->list); 429b73c67c2SDamien Le Moal return job; 430b73c67c2SDamien Le Moal } 431b73c67c2SDamien Le Moal } 432b73c67c2SDamien Le Moal 433b73c67c2SDamien Le Moal return NULL; 434b73c67c2SDamien Le Moal } 435b73c67c2SDamien Le Moal 4362d1e580aSAlasdair G Kergon static struct kcopyd_job *pop(struct list_head *jobs, 4372d1e580aSAlasdair G Kergon struct dm_kcopyd_client *kc) 4382d1e580aSAlasdair G Kergon { 4392d1e580aSAlasdair G Kergon struct kcopyd_job *job = NULL; 4402d1e580aSAlasdair G Kergon 441*6bcd658fSMikulas Patocka spin_lock_irq(&kc->job_lock); 4422d1e580aSAlasdair G Kergon 4432d1e580aSAlasdair G Kergon if (!list_empty(jobs)) { 444b73c67c2SDamien Le Moal if (jobs == &kc->io_jobs) 445b73c67c2SDamien Le Moal job = pop_io_job(jobs, kc); 446b73c67c2SDamien Le Moal else { 4472d1e580aSAlasdair G Kergon job = list_entry(jobs->next, struct kcopyd_job, list); 4482d1e580aSAlasdair G Kergon list_del(&job->list); 4492d1e580aSAlasdair G Kergon } 450b73c67c2SDamien Le Moal } 451*6bcd658fSMikulas Patocka spin_unlock_irq(&kc->job_lock); 4522d1e580aSAlasdair G Kergon 4532d1e580aSAlasdair G Kergon return job; 4542d1e580aSAlasdair G Kergon } 4552d1e580aSAlasdair G Kergon 4562d1e580aSAlasdair G Kergon static void push(struct list_head *jobs, struct kcopyd_job *job) 4572d1e580aSAlasdair G Kergon { 4582d1e580aSAlasdair G Kergon unsigned long flags; 4592d1e580aSAlasdair G Kergon struct dm_kcopyd_client *kc = job->kc; 4602d1e580aSAlasdair G Kergon 4612d1e580aSAlasdair G Kergon spin_lock_irqsave(&kc->job_lock, flags); 4622d1e580aSAlasdair G Kergon list_add_tail(&job->list, jobs); 4632d1e580aSAlasdair G Kergon spin_unlock_irqrestore(&kc->job_lock, flags); 4642d1e580aSAlasdair G Kergon } 4652d1e580aSAlasdair G Kergon 466b673c3a8SKazuo Ito 467b673c3a8SKazuo Ito static void push_head(struct list_head *jobs, struct kcopyd_job *job) 468b673c3a8SKazuo Ito { 469b673c3a8SKazuo Ito struct dm_kcopyd_client *kc = job->kc; 470b673c3a8SKazuo Ito 471*6bcd658fSMikulas Patocka spin_lock_irq(&kc->job_lock); 472b673c3a8SKazuo Ito list_add(&job->list, jobs); 473*6bcd658fSMikulas Patocka spin_unlock_irq(&kc->job_lock); 474b673c3a8SKazuo Ito } 475b673c3a8SKazuo Ito 4762d1e580aSAlasdair G Kergon /* 4772d1e580aSAlasdair G Kergon * These three functions process 1 item from the corresponding 4782d1e580aSAlasdair G Kergon * job list. 4792d1e580aSAlasdair G Kergon * 4802d1e580aSAlasdair G Kergon * They return: 4812d1e580aSAlasdair G Kergon * < 0: error 4822d1e580aSAlasdair G Kergon * 0: success 4832d1e580aSAlasdair G Kergon * > 0: can't process yet. 4842d1e580aSAlasdair G Kergon */ 4852d1e580aSAlasdair G Kergon static int run_complete_job(struct kcopyd_job *job) 4862d1e580aSAlasdair G Kergon { 4872d1e580aSAlasdair G Kergon void *context = job->context; 4882d1e580aSAlasdair G Kergon int read_err = job->read_err; 4892d1e580aSAlasdair G Kergon unsigned long write_err = job->write_err; 4902d1e580aSAlasdair G Kergon dm_kcopyd_notify_fn fn = job->fn; 4912d1e580aSAlasdair G Kergon struct dm_kcopyd_client *kc = job->kc; 4922d1e580aSAlasdair G Kergon 4937f069653SMikulas Patocka if (job->pages && job->pages != &zero_page_list) 4942d1e580aSAlasdair G Kergon kcopyd_put_pages(kc, job->pages); 495c6ea41fbSMikulas Patocka /* 496c6ea41fbSMikulas Patocka * If this is the master job, the sub jobs have already 497c6ea41fbSMikulas Patocka * completed so we can free everything. 498c6ea41fbSMikulas Patocka */ 499d5ffebddSMike Snitzer if (job->master_job == job) { 500d5ffebddSMike Snitzer mutex_destroy(&job->lock); 5016f1c819cSKent Overstreet mempool_free(job, &kc->job_pool); 502d5ffebddSMike Snitzer } 5032d1e580aSAlasdair G Kergon fn(read_err, write_err, context); 5042d1e580aSAlasdair G Kergon 5052d1e580aSAlasdair G Kergon if (atomic_dec_and_test(&kc->nr_jobs)) 5062d1e580aSAlasdair G Kergon wake_up(&kc->destroyq); 5072d1e580aSAlasdair G Kergon 508784c9a29SJohn Pittman cond_resched(); 509784c9a29SJohn Pittman 5102d1e580aSAlasdair G Kergon return 0; 5112d1e580aSAlasdair G Kergon } 5122d1e580aSAlasdair G Kergon 5132d1e580aSAlasdair G Kergon static void complete_io(unsigned long error, void *context) 5142d1e580aSAlasdair G Kergon { 5152d1e580aSAlasdair G Kergon struct kcopyd_job *job = (struct kcopyd_job *) context; 5162d1e580aSAlasdair G Kergon struct dm_kcopyd_client *kc = job->kc; 5172d1e580aSAlasdair G Kergon 518df5d2e90SMikulas Patocka io_job_finish(kc->throttle); 519df5d2e90SMikulas Patocka 5202d1e580aSAlasdair G Kergon if (error) { 52151111666SMike Christie if (op_is_write(job->rw)) 5222d1e580aSAlasdair G Kergon job->write_err |= error; 5232d1e580aSAlasdair G Kergon else 5242d1e580aSAlasdair G Kergon job->read_err = 1; 5252d1e580aSAlasdair G Kergon 526db2351ebSMikulas Patocka if (!(job->flags & BIT(DM_KCOPYD_IGNORE_ERROR))) { 5272d1e580aSAlasdair G Kergon push(&kc->complete_jobs, job); 5282d1e580aSAlasdair G Kergon wake(kc); 5292d1e580aSAlasdair G Kergon return; 5302d1e580aSAlasdair G Kergon } 5312d1e580aSAlasdair G Kergon } 5322d1e580aSAlasdair G Kergon 53351111666SMike Christie if (op_is_write(job->rw)) 5342d1e580aSAlasdair G Kergon push(&kc->complete_jobs, job); 5352d1e580aSAlasdair G Kergon 5362d1e580aSAlasdair G Kergon else { 5372d1e580aSAlasdair G Kergon job->rw = WRITE; 5382d1e580aSAlasdair G Kergon push(&kc->io_jobs, job); 5392d1e580aSAlasdair G Kergon } 5402d1e580aSAlasdair G Kergon 5412d1e580aSAlasdair G Kergon wake(kc); 5422d1e580aSAlasdair G Kergon } 5432d1e580aSAlasdair G Kergon 5442d1e580aSAlasdair G Kergon /* 5452d1e580aSAlasdair G Kergon * Request io on as many buffer heads as we can currently get for 5462d1e580aSAlasdair G Kergon * a particular job. 5472d1e580aSAlasdair G Kergon */ 5482d1e580aSAlasdair G Kergon static int run_io_job(struct kcopyd_job *job) 5492d1e580aSAlasdair G Kergon { 5502d1e580aSAlasdair G Kergon int r; 5512d1e580aSAlasdair G Kergon struct dm_io_request io_req = { 552e6047149SMike Christie .bi_op = job->rw, 553e6047149SMike Christie .bi_op_flags = 0, 5542d1e580aSAlasdair G Kergon .mem.type = DM_IO_PAGE_LIST, 5552d1e580aSAlasdair G Kergon .mem.ptr.pl = job->pages, 5564622afb3SMikulas Patocka .mem.offset = 0, 5572d1e580aSAlasdair G Kergon .notify.fn = complete_io, 5582d1e580aSAlasdair G Kergon .notify.context = job, 5592d1e580aSAlasdair G Kergon .client = job->kc->io_client, 5602d1e580aSAlasdair G Kergon }; 5612d1e580aSAlasdair G Kergon 562b73c67c2SDamien Le Moal /* 563b73c67c2SDamien Le Moal * If we need to write sequentially and some reads or writes failed, 564b73c67c2SDamien Le Moal * no point in continuing. 565b73c67c2SDamien Le Moal */ 566db2351ebSMikulas Patocka if (job->flags & BIT(DM_KCOPYD_WRITE_SEQ) && 567d1fef414SDmitry Fomichev job->master_job->write_err) { 568d1fef414SDmitry Fomichev job->write_err = job->master_job->write_err; 569b73c67c2SDamien Le Moal return -EIO; 570d1fef414SDmitry Fomichev } 571b73c67c2SDamien Le Moal 572df5d2e90SMikulas Patocka io_job_start(job->kc->throttle); 573df5d2e90SMikulas Patocka 5747eaceaccSJens Axboe if (job->rw == READ) 5752d1e580aSAlasdair G Kergon r = dm_io(&io_req, 1, &job->source, NULL); 576721a9602SJens Axboe else 5772d1e580aSAlasdair G Kergon r = dm_io(&io_req, job->num_dests, job->dests, NULL); 5782d1e580aSAlasdair G Kergon 5792d1e580aSAlasdair G Kergon return r; 5802d1e580aSAlasdair G Kergon } 5812d1e580aSAlasdair G Kergon 5822d1e580aSAlasdair G Kergon static int run_pages_job(struct kcopyd_job *job) 5832d1e580aSAlasdair G Kergon { 5842d1e580aSAlasdair G Kergon int r; 5855bf45a3dSMikulas Patocka unsigned nr_pages = dm_div_up(job->dests[0].count, PAGE_SIZE >> 9); 5862d1e580aSAlasdair G Kergon 5875bf45a3dSMikulas Patocka r = kcopyd_get_pages(job->kc, nr_pages, &job->pages); 5882d1e580aSAlasdair G Kergon if (!r) { 5892d1e580aSAlasdair G Kergon /* this job is ready for io */ 5902d1e580aSAlasdair G Kergon push(&job->kc->io_jobs, job); 5912d1e580aSAlasdair G Kergon return 0; 5922d1e580aSAlasdair G Kergon } 5932d1e580aSAlasdair G Kergon 5942d1e580aSAlasdair G Kergon if (r == -ENOMEM) 5952d1e580aSAlasdair G Kergon /* can't complete now */ 5962d1e580aSAlasdair G Kergon return 1; 5972d1e580aSAlasdair G Kergon 5982d1e580aSAlasdair G Kergon return r; 5992d1e580aSAlasdair G Kergon } 6002d1e580aSAlasdair G Kergon 6012d1e580aSAlasdair G Kergon /* 6022d1e580aSAlasdair G Kergon * Run through a list for as long as possible. Returns the count 6032d1e580aSAlasdair G Kergon * of successful jobs. 6042d1e580aSAlasdair G Kergon */ 6052d1e580aSAlasdair G Kergon static int process_jobs(struct list_head *jobs, struct dm_kcopyd_client *kc, 6062d1e580aSAlasdair G Kergon int (*fn) (struct kcopyd_job *)) 6072d1e580aSAlasdair G Kergon { 6082d1e580aSAlasdair G Kergon struct kcopyd_job *job; 6092d1e580aSAlasdair G Kergon int r, count = 0; 6102d1e580aSAlasdair G Kergon 6112d1e580aSAlasdair G Kergon while ((job = pop(jobs, kc))) { 6122d1e580aSAlasdair G Kergon 6132d1e580aSAlasdair G Kergon r = fn(job); 6142d1e580aSAlasdair G Kergon 6152d1e580aSAlasdair G Kergon if (r < 0) { 6162d1e580aSAlasdair G Kergon /* error this rogue job */ 61751111666SMike Christie if (op_is_write(job->rw)) 6182d1e580aSAlasdair G Kergon job->write_err = (unsigned long) -1L; 6192d1e580aSAlasdair G Kergon else 6202d1e580aSAlasdair G Kergon job->read_err = 1; 6212d1e580aSAlasdair G Kergon push(&kc->complete_jobs, job); 622d1fef414SDmitry Fomichev wake(kc); 6232d1e580aSAlasdair G Kergon break; 6242d1e580aSAlasdair G Kergon } 6252d1e580aSAlasdair G Kergon 6262d1e580aSAlasdair G Kergon if (r > 0) { 6272d1e580aSAlasdair G Kergon /* 6282d1e580aSAlasdair G Kergon * We couldn't service this job ATM, so 6292d1e580aSAlasdair G Kergon * push this job back onto the list. 6302d1e580aSAlasdair G Kergon */ 631b673c3a8SKazuo Ito push_head(jobs, job); 6322d1e580aSAlasdair G Kergon break; 6332d1e580aSAlasdair G Kergon } 6342d1e580aSAlasdair G Kergon 6352d1e580aSAlasdair G Kergon count++; 6362d1e580aSAlasdair G Kergon } 6372d1e580aSAlasdair G Kergon 6382d1e580aSAlasdair G Kergon return count; 6392d1e580aSAlasdair G Kergon } 6402d1e580aSAlasdair G Kergon 6412d1e580aSAlasdair G Kergon /* 6422d1e580aSAlasdair G Kergon * kcopyd does this every time it's woken up. 6432d1e580aSAlasdair G Kergon */ 6442d1e580aSAlasdair G Kergon static void do_work(struct work_struct *work) 6452d1e580aSAlasdair G Kergon { 6462d1e580aSAlasdair G Kergon struct dm_kcopyd_client *kc = container_of(work, 6472d1e580aSAlasdair G Kergon struct dm_kcopyd_client, kcopyd_work); 6487eaceaccSJens Axboe struct blk_plug plug; 6492d1e580aSAlasdair G Kergon 6502d1e580aSAlasdair G Kergon /* 6512d1e580aSAlasdair G Kergon * The order that these are called is *very* important. 6522d1e580aSAlasdair G Kergon * complete jobs can free some pages for pages jobs. 6532d1e580aSAlasdair G Kergon * Pages jobs when successful will jump onto the io jobs 6542d1e580aSAlasdair G Kergon * list. io jobs call wake when they complete and it all 6552d1e580aSAlasdair G Kergon * starts again. 6562d1e580aSAlasdair G Kergon */ 657*6bcd658fSMikulas Patocka spin_lock_irq(&kc->job_lock); 658d7e6b8dfSNikos Tsironis list_splice_tail_init(&kc->callback_jobs, &kc->complete_jobs); 659*6bcd658fSMikulas Patocka spin_unlock_irq(&kc->job_lock); 660d7e6b8dfSNikos Tsironis 6617eaceaccSJens Axboe blk_start_plug(&plug); 6622d1e580aSAlasdair G Kergon process_jobs(&kc->complete_jobs, kc, run_complete_job); 6632d1e580aSAlasdair G Kergon process_jobs(&kc->pages_jobs, kc, run_pages_job); 6642d1e580aSAlasdair G Kergon process_jobs(&kc->io_jobs, kc, run_io_job); 6657eaceaccSJens Axboe blk_finish_plug(&plug); 6662d1e580aSAlasdair G Kergon } 6672d1e580aSAlasdair G Kergon 6682d1e580aSAlasdair G Kergon /* 6692d1e580aSAlasdair G Kergon * If we are copying a small region we just dispatch a single job 6702d1e580aSAlasdair G Kergon * to do the copy, otherwise the io has to be split up into many 6712d1e580aSAlasdair G Kergon * jobs. 6722d1e580aSAlasdair G Kergon */ 6732d1e580aSAlasdair G Kergon static void dispatch_job(struct kcopyd_job *job) 6742d1e580aSAlasdair G Kergon { 6752d1e580aSAlasdair G Kergon struct dm_kcopyd_client *kc = job->kc; 6762d1e580aSAlasdair G Kergon atomic_inc(&kc->nr_jobs); 6779ca170a3SMikulas Patocka if (unlikely(!job->source.count)) 678d7e6b8dfSNikos Tsironis push(&kc->callback_jobs, job); 6797f069653SMikulas Patocka else if (job->pages == &zero_page_list) 6807f069653SMikulas Patocka push(&kc->io_jobs, job); 6819ca170a3SMikulas Patocka else 6822d1e580aSAlasdair G Kergon push(&kc->pages_jobs, job); 6832d1e580aSAlasdair G Kergon wake(kc); 6842d1e580aSAlasdair G Kergon } 6852d1e580aSAlasdair G Kergon 6862d1e580aSAlasdair G Kergon static void segment_complete(int read_err, unsigned long write_err, 6872d1e580aSAlasdair G Kergon void *context) 6882d1e580aSAlasdair G Kergon { 6892d1e580aSAlasdair G Kergon /* FIXME: tidy this function */ 6902d1e580aSAlasdair G Kergon sector_t progress = 0; 6912d1e580aSAlasdair G Kergon sector_t count = 0; 692c6ea41fbSMikulas Patocka struct kcopyd_job *sub_job = (struct kcopyd_job *) context; 693c6ea41fbSMikulas Patocka struct kcopyd_job *job = sub_job->master_job; 69473830857SMikulas Patocka struct dm_kcopyd_client *kc = job->kc; 6952d1e580aSAlasdair G Kergon 6962d1e580aSAlasdair G Kergon mutex_lock(&job->lock); 6972d1e580aSAlasdair G Kergon 6982d1e580aSAlasdair G Kergon /* update the error */ 6992d1e580aSAlasdair G Kergon if (read_err) 7002d1e580aSAlasdair G Kergon job->read_err = 1; 7012d1e580aSAlasdair G Kergon 7022d1e580aSAlasdair G Kergon if (write_err) 7032d1e580aSAlasdair G Kergon job->write_err |= write_err; 7042d1e580aSAlasdair G Kergon 7052d1e580aSAlasdair G Kergon /* 7062d1e580aSAlasdair G Kergon * Only dispatch more work if there hasn't been an error. 7072d1e580aSAlasdair G Kergon */ 7082d1e580aSAlasdair G Kergon if ((!job->read_err && !job->write_err) || 709db2351ebSMikulas Patocka job->flags & BIT(DM_KCOPYD_IGNORE_ERROR)) { 7102d1e580aSAlasdair G Kergon /* get the next chunk of work */ 7112d1e580aSAlasdair G Kergon progress = job->progress; 7122d1e580aSAlasdair G Kergon count = job->source.count - progress; 7132d1e580aSAlasdair G Kergon if (count) { 714c663e040SNikos Tsironis if (count > kc->sub_job_size) 715c663e040SNikos Tsironis count = kc->sub_job_size; 7162d1e580aSAlasdair G Kergon 7172d1e580aSAlasdair G Kergon job->progress += count; 7182d1e580aSAlasdair G Kergon } 7192d1e580aSAlasdair G Kergon } 7202d1e580aSAlasdair G Kergon mutex_unlock(&job->lock); 7212d1e580aSAlasdair G Kergon 7222d1e580aSAlasdair G Kergon if (count) { 7232d1e580aSAlasdair G Kergon int i; 7242d1e580aSAlasdair G Kergon 7252d1e580aSAlasdair G Kergon *sub_job = *job; 726b73c67c2SDamien Le Moal sub_job->write_offset = progress; 7272d1e580aSAlasdair G Kergon sub_job->source.sector += progress; 7282d1e580aSAlasdair G Kergon sub_job->source.count = count; 7292d1e580aSAlasdair G Kergon 7302d1e580aSAlasdair G Kergon for (i = 0; i < job->num_dests; i++) { 7312d1e580aSAlasdair G Kergon sub_job->dests[i].sector += progress; 7322d1e580aSAlasdair G Kergon sub_job->dests[i].count = count; 7332d1e580aSAlasdair G Kergon } 7342d1e580aSAlasdair G Kergon 7352d1e580aSAlasdair G Kergon sub_job->fn = segment_complete; 736c6ea41fbSMikulas Patocka sub_job->context = sub_job; 7372d1e580aSAlasdair G Kergon dispatch_job(sub_job); 7382d1e580aSAlasdair G Kergon 7392d1e580aSAlasdair G Kergon } else if (atomic_dec_and_test(&job->sub_jobs)) { 7402d1e580aSAlasdair G Kergon 7412d1e580aSAlasdair G Kergon /* 742340cd444SMikulas Patocka * Queue the completion callback to the kcopyd thread. 743340cd444SMikulas Patocka * 744340cd444SMikulas Patocka * Some callers assume that all the completions are called 745340cd444SMikulas Patocka * from a single thread and don't race with each other. 746340cd444SMikulas Patocka * 747340cd444SMikulas Patocka * We must not call the callback directly here because this 748340cd444SMikulas Patocka * code may not be executing in the thread. 7492d1e580aSAlasdair G Kergon */ 750340cd444SMikulas Patocka push(&kc->complete_jobs, job); 751340cd444SMikulas Patocka wake(kc); 7522d1e580aSAlasdair G Kergon } 7532d1e580aSAlasdair G Kergon } 7542d1e580aSAlasdair G Kergon 7552d1e580aSAlasdair G Kergon /* 756c6ea41fbSMikulas Patocka * Create some sub jobs to share the work between them. 7572d1e580aSAlasdair G Kergon */ 758c6ea41fbSMikulas Patocka static void split_job(struct kcopyd_job *master_job) 7592d1e580aSAlasdair G Kergon { 7602d1e580aSAlasdair G Kergon int i; 7612d1e580aSAlasdair G Kergon 762c6ea41fbSMikulas Patocka atomic_inc(&master_job->kc->nr_jobs); 763340cd444SMikulas Patocka 764c6ea41fbSMikulas Patocka atomic_set(&master_job->sub_jobs, SPLIT_COUNT); 765c6ea41fbSMikulas Patocka for (i = 0; i < SPLIT_COUNT; i++) { 766c6ea41fbSMikulas Patocka master_job[i + 1].master_job = master_job; 767c6ea41fbSMikulas Patocka segment_complete(0, 0u, &master_job[i + 1]); 768c6ea41fbSMikulas Patocka } 7692d1e580aSAlasdair G Kergon } 7702d1e580aSAlasdair G Kergon 7717209049dSMike Snitzer void dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, 7722d1e580aSAlasdair G Kergon unsigned int num_dests, struct dm_io_region *dests, 7732d1e580aSAlasdair G Kergon unsigned int flags, dm_kcopyd_notify_fn fn, void *context) 7742d1e580aSAlasdair G Kergon { 7752d1e580aSAlasdair G Kergon struct kcopyd_job *job; 77670d6c400SMike Snitzer int i; 7772d1e580aSAlasdair G Kergon 7782d1e580aSAlasdair G Kergon /* 779c6ea41fbSMikulas Patocka * Allocate an array of jobs consisting of one master job 780c6ea41fbSMikulas Patocka * followed by SPLIT_COUNT sub jobs. 7812d1e580aSAlasdair G Kergon */ 7826f1c819cSKent Overstreet job = mempool_alloc(&kc->job_pool, GFP_NOIO); 783d5ffebddSMike Snitzer mutex_init(&job->lock); 7842d1e580aSAlasdair G Kergon 7852d1e580aSAlasdair G Kergon /* 7862d1e580aSAlasdair G Kergon * set up for the read. 7872d1e580aSAlasdair G Kergon */ 7882d1e580aSAlasdair G Kergon job->kc = kc; 7892d1e580aSAlasdair G Kergon job->flags = flags; 7902d1e580aSAlasdair G Kergon job->read_err = 0; 7912d1e580aSAlasdair G Kergon job->write_err = 0; 7922d1e580aSAlasdair G Kergon 7932d1e580aSAlasdair G Kergon job->num_dests = num_dests; 7942d1e580aSAlasdair G Kergon memcpy(&job->dests, dests, sizeof(*dests) * num_dests); 7952d1e580aSAlasdair G Kergon 796b73c67c2SDamien Le Moal /* 797b73c67c2SDamien Le Moal * If one of the destination is a host-managed zoned block device, 798b73c67c2SDamien Le Moal * we need to write sequentially. If one of the destination is a 799b73c67c2SDamien Le Moal * host-aware device, then leave it to the caller to choose what to do. 800b73c67c2SDamien Le Moal */ 801db2351ebSMikulas Patocka if (!(job->flags & BIT(DM_KCOPYD_WRITE_SEQ))) { 802b73c67c2SDamien Le Moal for (i = 0; i < job->num_dests; i++) { 803b73c67c2SDamien Le Moal if (bdev_zoned_model(dests[i].bdev) == BLK_ZONED_HM) { 804db2351ebSMikulas Patocka job->flags |= BIT(DM_KCOPYD_WRITE_SEQ); 805b73c67c2SDamien Le Moal break; 806b73c67c2SDamien Le Moal } 807b73c67c2SDamien Le Moal } 808b73c67c2SDamien Le Moal } 809b73c67c2SDamien Le Moal 810b73c67c2SDamien Le Moal /* 811b73c67c2SDamien Le Moal * If we need to write sequentially, errors cannot be ignored. 812b73c67c2SDamien Le Moal */ 813db2351ebSMikulas Patocka if (job->flags & BIT(DM_KCOPYD_WRITE_SEQ) && 814db2351ebSMikulas Patocka job->flags & BIT(DM_KCOPYD_IGNORE_ERROR)) 815db2351ebSMikulas Patocka job->flags &= ~BIT(DM_KCOPYD_IGNORE_ERROR); 816b73c67c2SDamien Le Moal 8177f069653SMikulas Patocka if (from) { 8187f069653SMikulas Patocka job->source = *from; 8192d1e580aSAlasdair G Kergon job->pages = NULL; 8207f069653SMikulas Patocka job->rw = READ; 8217f069653SMikulas Patocka } else { 8227f069653SMikulas Patocka memset(&job->source, 0, sizeof job->source); 8237f069653SMikulas Patocka job->source.count = job->dests[0].count; 8247f069653SMikulas Patocka job->pages = &zero_page_list; 82570d6c400SMike Snitzer 82670d6c400SMike Snitzer /* 827615ec946SChristoph Hellwig * Use WRITE ZEROES to optimize zeroing if all dests support it. 82870d6c400SMike Snitzer */ 829615ec946SChristoph Hellwig job->rw = REQ_OP_WRITE_ZEROES; 83070d6c400SMike Snitzer for (i = 0; i < job->num_dests; i++) 831615ec946SChristoph Hellwig if (!bdev_write_zeroes_sectors(job->dests[i].bdev)) { 8327f069653SMikulas Patocka job->rw = WRITE; 83370d6c400SMike Snitzer break; 83470d6c400SMike Snitzer } 8357f069653SMikulas Patocka } 8362d1e580aSAlasdair G Kergon 8372d1e580aSAlasdair G Kergon job->fn = fn; 8382d1e580aSAlasdair G Kergon job->context = context; 839c6ea41fbSMikulas Patocka job->master_job = job; 840b73c67c2SDamien Le Moal job->write_offset = 0; 8412d1e580aSAlasdair G Kergon 842c663e040SNikos Tsironis if (job->source.count <= kc->sub_job_size) 8432d1e580aSAlasdair G Kergon dispatch_job(job); 8442d1e580aSAlasdair G Kergon else { 8452d1e580aSAlasdair G Kergon job->progress = 0; 8462d1e580aSAlasdair G Kergon split_job(job); 8472d1e580aSAlasdair G Kergon } 8482d1e580aSAlasdair G Kergon } 8492d1e580aSAlasdair G Kergon EXPORT_SYMBOL(dm_kcopyd_copy); 8502d1e580aSAlasdair G Kergon 8517209049dSMike Snitzer void dm_kcopyd_zero(struct dm_kcopyd_client *kc, 8527f069653SMikulas Patocka unsigned num_dests, struct dm_io_region *dests, 8537f069653SMikulas Patocka unsigned flags, dm_kcopyd_notify_fn fn, void *context) 8547f069653SMikulas Patocka { 8557209049dSMike Snitzer dm_kcopyd_copy(kc, NULL, num_dests, dests, flags, fn, context); 8567f069653SMikulas Patocka } 8577f069653SMikulas Patocka EXPORT_SYMBOL(dm_kcopyd_zero); 8587f069653SMikulas Patocka 859a6e50b40SMikulas Patocka void *dm_kcopyd_prepare_callback(struct dm_kcopyd_client *kc, 860a6e50b40SMikulas Patocka dm_kcopyd_notify_fn fn, void *context) 861a6e50b40SMikulas Patocka { 862a6e50b40SMikulas Patocka struct kcopyd_job *job; 863a6e50b40SMikulas Patocka 8646f1c819cSKent Overstreet job = mempool_alloc(&kc->job_pool, GFP_NOIO); 865a6e50b40SMikulas Patocka 866a6e50b40SMikulas Patocka memset(job, 0, sizeof(struct kcopyd_job)); 867a6e50b40SMikulas Patocka job->kc = kc; 868a6e50b40SMikulas Patocka job->fn = fn; 869a6e50b40SMikulas Patocka job->context = context; 870d136f2efSAlasdair G Kergon job->master_job = job; 871a6e50b40SMikulas Patocka 872a6e50b40SMikulas Patocka atomic_inc(&kc->nr_jobs); 873a6e50b40SMikulas Patocka 874a6e50b40SMikulas Patocka return job; 875a6e50b40SMikulas Patocka } 876a6e50b40SMikulas Patocka EXPORT_SYMBOL(dm_kcopyd_prepare_callback); 877a6e50b40SMikulas Patocka 878a6e50b40SMikulas Patocka void dm_kcopyd_do_callback(void *j, int read_err, unsigned long write_err) 879a6e50b40SMikulas Patocka { 880a6e50b40SMikulas Patocka struct kcopyd_job *job = j; 881a6e50b40SMikulas Patocka struct dm_kcopyd_client *kc = job->kc; 882a6e50b40SMikulas Patocka 883a6e50b40SMikulas Patocka job->read_err = read_err; 884a6e50b40SMikulas Patocka job->write_err = write_err; 885a6e50b40SMikulas Patocka 886d7e6b8dfSNikos Tsironis push(&kc->callback_jobs, job); 887a6e50b40SMikulas Patocka wake(kc); 888a6e50b40SMikulas Patocka } 889a6e50b40SMikulas Patocka EXPORT_SYMBOL(dm_kcopyd_do_callback); 890a6e50b40SMikulas Patocka 8912d1e580aSAlasdair G Kergon /* 8922d1e580aSAlasdair G Kergon * Cancels a kcopyd job, eg. someone might be deactivating a 8932d1e580aSAlasdair G Kergon * mirror. 8942d1e580aSAlasdair G Kergon */ 8952d1e580aSAlasdair G Kergon #if 0 8962d1e580aSAlasdair G Kergon int kcopyd_cancel(struct kcopyd_job *job, int block) 8972d1e580aSAlasdair G Kergon { 8982d1e580aSAlasdair G Kergon /* FIXME: finish */ 8992d1e580aSAlasdair G Kergon return -1; 9002d1e580aSAlasdair G Kergon } 9012d1e580aSAlasdair G Kergon #endif /* 0 */ 9022d1e580aSAlasdair G Kergon 9032d1e580aSAlasdair G Kergon /*----------------------------------------------------------------- 9042d1e580aSAlasdair G Kergon * Client setup 9052d1e580aSAlasdair G Kergon *---------------------------------------------------------------*/ 906df5d2e90SMikulas Patocka struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *throttle) 9072d1e580aSAlasdair G Kergon { 9086f1c819cSKent Overstreet int r; 909c663e040SNikos Tsironis unsigned reserve_pages; 9102d1e580aSAlasdair G Kergon struct dm_kcopyd_client *kc; 9112d1e580aSAlasdair G Kergon 912d3775354SKent Overstreet kc = kzalloc(sizeof(*kc), GFP_KERNEL); 9132d1e580aSAlasdair G Kergon if (!kc) 914fa34ce73SMikulas Patocka return ERR_PTR(-ENOMEM); 9152d1e580aSAlasdair G Kergon 9162d1e580aSAlasdair G Kergon spin_lock_init(&kc->job_lock); 917d7e6b8dfSNikos Tsironis INIT_LIST_HEAD(&kc->callback_jobs); 9182d1e580aSAlasdair G Kergon INIT_LIST_HEAD(&kc->complete_jobs); 9192d1e580aSAlasdair G Kergon INIT_LIST_HEAD(&kc->io_jobs); 9202d1e580aSAlasdair G Kergon INIT_LIST_HEAD(&kc->pages_jobs); 921df5d2e90SMikulas Patocka kc->throttle = throttle; 9222d1e580aSAlasdair G Kergon 9236f1c819cSKent Overstreet r = mempool_init_slab_pool(&kc->job_pool, MIN_JOBS, _job_cache); 9246f1c819cSKent Overstreet if (r) 9252d1e580aSAlasdair G Kergon goto bad_slab; 9262d1e580aSAlasdair G Kergon 9272d1e580aSAlasdair G Kergon INIT_WORK(&kc->kcopyd_work, do_work); 928670368a8STejun Heo kc->kcopyd_wq = alloc_workqueue("kcopyd", WQ_MEM_RECLAIM, 0); 9296f1c819cSKent Overstreet if (!kc->kcopyd_wq) { 9306f1c819cSKent Overstreet r = -ENOMEM; 9312d1e580aSAlasdair G Kergon goto bad_workqueue; 9326f1c819cSKent Overstreet } 9332d1e580aSAlasdair G Kergon 934c663e040SNikos Tsironis kc->sub_job_size = dm_get_kcopyd_subjob_size(); 935c663e040SNikos Tsironis reserve_pages = DIV_ROUND_UP(kc->sub_job_size << SECTOR_SHIFT, PAGE_SIZE); 936c663e040SNikos Tsironis 9372d1e580aSAlasdair G Kergon kc->pages = NULL; 938d0471458SMikulas Patocka kc->nr_reserved_pages = kc->nr_free_pages = 0; 939c663e040SNikos Tsironis r = client_reserve_pages(kc, reserve_pages); 9402d1e580aSAlasdair G Kergon if (r) 9412d1e580aSAlasdair G Kergon goto bad_client_pages; 9422d1e580aSAlasdair G Kergon 943bda8efecSMikulas Patocka kc->io_client = dm_io_client_create(); 9442d1e580aSAlasdair G Kergon if (IS_ERR(kc->io_client)) { 9452d1e580aSAlasdair G Kergon r = PTR_ERR(kc->io_client); 9462d1e580aSAlasdair G Kergon goto bad_io_client; 9472d1e580aSAlasdair G Kergon } 9482d1e580aSAlasdair G Kergon 9492d1e580aSAlasdair G Kergon init_waitqueue_head(&kc->destroyq); 9502d1e580aSAlasdair G Kergon atomic_set(&kc->nr_jobs, 0); 9512d1e580aSAlasdair G Kergon 952fa34ce73SMikulas Patocka return kc; 9532d1e580aSAlasdair G Kergon 9542d1e580aSAlasdair G Kergon bad_io_client: 9552d1e580aSAlasdair G Kergon client_free_pages(kc); 9562d1e580aSAlasdair G Kergon bad_client_pages: 9572d1e580aSAlasdair G Kergon destroy_workqueue(kc->kcopyd_wq); 9582d1e580aSAlasdair G Kergon bad_workqueue: 9596f1c819cSKent Overstreet mempool_exit(&kc->job_pool); 9602d1e580aSAlasdair G Kergon bad_slab: 9612d1e580aSAlasdair G Kergon kfree(kc); 9622d1e580aSAlasdair G Kergon 963fa34ce73SMikulas Patocka return ERR_PTR(r); 9642d1e580aSAlasdair G Kergon } 9652d1e580aSAlasdair G Kergon EXPORT_SYMBOL(dm_kcopyd_client_create); 9662d1e580aSAlasdair G Kergon 9672d1e580aSAlasdair G Kergon void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc) 9682d1e580aSAlasdair G Kergon { 9692d1e580aSAlasdair G Kergon /* Wait for completion of all jobs submitted by this client. */ 9702d1e580aSAlasdair G Kergon wait_event(kc->destroyq, !atomic_read(&kc->nr_jobs)); 9712d1e580aSAlasdair G Kergon 972d7e6b8dfSNikos Tsironis BUG_ON(!list_empty(&kc->callback_jobs)); 9732d1e580aSAlasdair G Kergon BUG_ON(!list_empty(&kc->complete_jobs)); 9742d1e580aSAlasdair G Kergon BUG_ON(!list_empty(&kc->io_jobs)); 9752d1e580aSAlasdair G Kergon BUG_ON(!list_empty(&kc->pages_jobs)); 9762d1e580aSAlasdair G Kergon destroy_workqueue(kc->kcopyd_wq); 9772d1e580aSAlasdair G Kergon dm_io_client_destroy(kc->io_client); 9782d1e580aSAlasdair G Kergon client_free_pages(kc); 9796f1c819cSKent Overstreet mempool_exit(&kc->job_pool); 9802d1e580aSAlasdair G Kergon kfree(kc); 9812d1e580aSAlasdair G Kergon } 9822d1e580aSAlasdair G Kergon EXPORT_SYMBOL(dm_kcopyd_client_destroy); 983