xref: /linux/drivers/md/dm-kcopyd.c (revision 6bcd658f2a2a13fb63c38fc018e1ab210396aefc)
12d1e580aSAlasdair G Kergon /*
22d1e580aSAlasdair G Kergon  * Copyright (C) 2002 Sistina Software (UK) Limited.
32d1e580aSAlasdair G Kergon  * Copyright (C) 2006 Red Hat GmbH
42d1e580aSAlasdair G Kergon  *
52d1e580aSAlasdair G Kergon  * This file is released under the GPL.
62d1e580aSAlasdair G Kergon  *
72d1e580aSAlasdair G Kergon  * Kcopyd provides a simple interface for copying an area of one
82d1e580aSAlasdair G Kergon  * block-device to one or more other block-devices, with an asynchronous
92d1e580aSAlasdair G Kergon  * completion notification.
102d1e580aSAlasdair G Kergon  */
112d1e580aSAlasdair G Kergon 
122d1e580aSAlasdair G Kergon #include <linux/types.h>
1360063497SArun Sharma #include <linux/atomic.h>
142d1e580aSAlasdair G Kergon #include <linux/blkdev.h>
152d1e580aSAlasdair G Kergon #include <linux/fs.h>
162d1e580aSAlasdair G Kergon #include <linux/init.h>
172d1e580aSAlasdair G Kergon #include <linux/list.h>
182d1e580aSAlasdair G Kergon #include <linux/mempool.h>
192d1e580aSAlasdair G Kergon #include <linux/module.h>
202d1e580aSAlasdair G Kergon #include <linux/pagemap.h>
212d1e580aSAlasdair G Kergon #include <linux/slab.h>
222d1e580aSAlasdair G Kergon #include <linux/vmalloc.h>
232d1e580aSAlasdair G Kergon #include <linux/workqueue.h>
242d1e580aSAlasdair G Kergon #include <linux/mutex.h>
25df5d2e90SMikulas Patocka #include <linux/delay.h>
26586e80e6SMikulas Patocka #include <linux/device-mapper.h>
27a765e20eSAlasdair G Kergon #include <linux/dm-kcopyd.h>
282d1e580aSAlasdair G Kergon 
294cc96131SMike Snitzer #include "dm-core.h"
302d1e580aSAlasdair G Kergon 
31c6ea41fbSMikulas Patocka #define SPLIT_COUNT	8
32c6ea41fbSMikulas Patocka #define MIN_JOBS	8
33c663e040SNikos Tsironis 
34c663e040SNikos Tsironis #define DEFAULT_SUB_JOB_SIZE_KB 512
35c663e040SNikos Tsironis #define MAX_SUB_JOB_SIZE_KB     1024
36c663e040SNikos Tsironis 
37c663e040SNikos Tsironis static unsigned kcopyd_subjob_size_kb = DEFAULT_SUB_JOB_SIZE_KB;
38c663e040SNikos Tsironis 
39c663e040SNikos Tsironis module_param(kcopyd_subjob_size_kb, uint, S_IRUGO | S_IWUSR);
40c663e040SNikos Tsironis MODULE_PARM_DESC(kcopyd_subjob_size_kb, "Sub-job size for dm-kcopyd clients");
41c663e040SNikos Tsironis 
42c663e040SNikos Tsironis static unsigned dm_get_kcopyd_subjob_size(void)
43c663e040SNikos Tsironis {
44c663e040SNikos Tsironis 	unsigned sub_job_size_kb;
45c663e040SNikos Tsironis 
46c663e040SNikos Tsironis 	sub_job_size_kb = __dm_get_module_param(&kcopyd_subjob_size_kb,
47c663e040SNikos Tsironis 						DEFAULT_SUB_JOB_SIZE_KB,
48c663e040SNikos Tsironis 						MAX_SUB_JOB_SIZE_KB);
49c663e040SNikos Tsironis 
50c663e040SNikos Tsironis 	return sub_job_size_kb << 1;
51c663e040SNikos Tsironis }
52c6ea41fbSMikulas Patocka 
532d1e580aSAlasdair G Kergon /*-----------------------------------------------------------------
542d1e580aSAlasdair G Kergon  * Each kcopyd client has its own little pool of preallocated
552d1e580aSAlasdair G Kergon  * pages for kcopyd io.
562d1e580aSAlasdair G Kergon  *---------------------------------------------------------------*/
572d1e580aSAlasdair G Kergon struct dm_kcopyd_client {
582d1e580aSAlasdair G Kergon 	struct page_list *pages;
59d0471458SMikulas Patocka 	unsigned nr_reserved_pages;
60d0471458SMikulas Patocka 	unsigned nr_free_pages;
61c663e040SNikos Tsironis 	unsigned sub_job_size;
622d1e580aSAlasdair G Kergon 
632d1e580aSAlasdair G Kergon 	struct dm_io_client *io_client;
642d1e580aSAlasdair G Kergon 
652d1e580aSAlasdair G Kergon 	wait_queue_head_t destroyq;
662d1e580aSAlasdair G Kergon 
676f1c819cSKent Overstreet 	mempool_t job_pool;
682d1e580aSAlasdair G Kergon 
692d1e580aSAlasdair G Kergon 	struct workqueue_struct *kcopyd_wq;
702d1e580aSAlasdair G Kergon 	struct work_struct kcopyd_work;
712d1e580aSAlasdair G Kergon 
72df5d2e90SMikulas Patocka 	struct dm_kcopyd_throttle *throttle;
73df5d2e90SMikulas Patocka 
7472d711c8SMike Snitzer 	atomic_t nr_jobs;
7572d711c8SMike Snitzer 
762d1e580aSAlasdair G Kergon /*
77d7e6b8dfSNikos Tsironis  * We maintain four lists of jobs:
782d1e580aSAlasdair G Kergon  *
792d1e580aSAlasdair G Kergon  * i)   jobs waiting for pages
802d1e580aSAlasdair G Kergon  * ii)  jobs that have pages, and are waiting for the io to be issued.
81d7e6b8dfSNikos Tsironis  * iii) jobs that don't need to do any IO and just run a callback
82d7e6b8dfSNikos Tsironis  * iv) jobs that have completed.
832d1e580aSAlasdair G Kergon  *
84d7e6b8dfSNikos Tsironis  * All four of these are protected by job_lock.
852d1e580aSAlasdair G Kergon  */
862d1e580aSAlasdair G Kergon 	spinlock_t job_lock;
87d7e6b8dfSNikos Tsironis 	struct list_head callback_jobs;
882d1e580aSAlasdair G Kergon 	struct list_head complete_jobs;
892d1e580aSAlasdair G Kergon 	struct list_head io_jobs;
902d1e580aSAlasdair G Kergon 	struct list_head pages_jobs;
912d1e580aSAlasdair G Kergon };
922d1e580aSAlasdair G Kergon 
937f069653SMikulas Patocka static struct page_list zero_page_list;
947f069653SMikulas Patocka 
95df5d2e90SMikulas Patocka static DEFINE_SPINLOCK(throttle_spinlock);
96df5d2e90SMikulas Patocka 
97df5d2e90SMikulas Patocka /*
98df5d2e90SMikulas Patocka  * IO/IDLE accounting slowly decays after (1 << ACCOUNT_INTERVAL_SHIFT) period.
99df5d2e90SMikulas Patocka  * When total_period >= (1 << ACCOUNT_INTERVAL_SHIFT) the counters are divided
100df5d2e90SMikulas Patocka  * by 2.
101df5d2e90SMikulas Patocka  */
102df5d2e90SMikulas Patocka #define ACCOUNT_INTERVAL_SHIFT		SHIFT_HZ
103df5d2e90SMikulas Patocka 
104df5d2e90SMikulas Patocka /*
105df5d2e90SMikulas Patocka  * Sleep this number of milliseconds.
106df5d2e90SMikulas Patocka  *
107df5d2e90SMikulas Patocka  * The value was decided experimentally.
108df5d2e90SMikulas Patocka  * Smaller values seem to cause an increased copy rate above the limit.
109df5d2e90SMikulas Patocka  * The reason for this is unknown but possibly due to jiffies rounding errors
110df5d2e90SMikulas Patocka  * or read/write cache inside the disk.
111df5d2e90SMikulas Patocka  */
112df5d2e90SMikulas Patocka #define SLEEP_MSEC			100
113df5d2e90SMikulas Patocka 
114df5d2e90SMikulas Patocka /*
115df5d2e90SMikulas Patocka  * Maximum number of sleep events. There is a theoretical livelock if more
116df5d2e90SMikulas Patocka  * kcopyd clients do work simultaneously which this limit avoids.
117df5d2e90SMikulas Patocka  */
118df5d2e90SMikulas Patocka #define MAX_SLEEPS			10
119df5d2e90SMikulas Patocka 
120df5d2e90SMikulas Patocka static void io_job_start(struct dm_kcopyd_throttle *t)
121df5d2e90SMikulas Patocka {
122df5d2e90SMikulas Patocka 	unsigned throttle, now, difference;
123df5d2e90SMikulas Patocka 	int slept = 0, skew;
124df5d2e90SMikulas Patocka 
125df5d2e90SMikulas Patocka 	if (unlikely(!t))
126df5d2e90SMikulas Patocka 		return;
127df5d2e90SMikulas Patocka 
128df5d2e90SMikulas Patocka try_again:
129df5d2e90SMikulas Patocka 	spin_lock_irq(&throttle_spinlock);
130df5d2e90SMikulas Patocka 
1316aa7de05SMark Rutland 	throttle = READ_ONCE(t->throttle);
132df5d2e90SMikulas Patocka 
133df5d2e90SMikulas Patocka 	if (likely(throttle >= 100))
134df5d2e90SMikulas Patocka 		goto skip_limit;
135df5d2e90SMikulas Patocka 
136df5d2e90SMikulas Patocka 	now = jiffies;
137df5d2e90SMikulas Patocka 	difference = now - t->last_jiffies;
138df5d2e90SMikulas Patocka 	t->last_jiffies = now;
139df5d2e90SMikulas Patocka 	if (t->num_io_jobs)
140df5d2e90SMikulas Patocka 		t->io_period += difference;
141df5d2e90SMikulas Patocka 	t->total_period += difference;
142df5d2e90SMikulas Patocka 
143df5d2e90SMikulas Patocka 	/*
144df5d2e90SMikulas Patocka 	 * Maintain sane values if we got a temporary overflow.
145df5d2e90SMikulas Patocka 	 */
146df5d2e90SMikulas Patocka 	if (unlikely(t->io_period > t->total_period))
147df5d2e90SMikulas Patocka 		t->io_period = t->total_period;
148df5d2e90SMikulas Patocka 
149df5d2e90SMikulas Patocka 	if (unlikely(t->total_period >= (1 << ACCOUNT_INTERVAL_SHIFT))) {
150df5d2e90SMikulas Patocka 		int shift = fls(t->total_period >> ACCOUNT_INTERVAL_SHIFT);
151df5d2e90SMikulas Patocka 		t->total_period >>= shift;
152df5d2e90SMikulas Patocka 		t->io_period >>= shift;
153df5d2e90SMikulas Patocka 	}
154df5d2e90SMikulas Patocka 
155df5d2e90SMikulas Patocka 	skew = t->io_period - throttle * t->total_period / 100;
156df5d2e90SMikulas Patocka 
157df5d2e90SMikulas Patocka 	if (unlikely(skew > 0) && slept < MAX_SLEEPS) {
158df5d2e90SMikulas Patocka 		slept++;
159df5d2e90SMikulas Patocka 		spin_unlock_irq(&throttle_spinlock);
160df5d2e90SMikulas Patocka 		msleep(SLEEP_MSEC);
161df5d2e90SMikulas Patocka 		goto try_again;
162df5d2e90SMikulas Patocka 	}
163df5d2e90SMikulas Patocka 
164df5d2e90SMikulas Patocka skip_limit:
165df5d2e90SMikulas Patocka 	t->num_io_jobs++;
166df5d2e90SMikulas Patocka 
167df5d2e90SMikulas Patocka 	spin_unlock_irq(&throttle_spinlock);
168df5d2e90SMikulas Patocka }
169df5d2e90SMikulas Patocka 
170df5d2e90SMikulas Patocka static void io_job_finish(struct dm_kcopyd_throttle *t)
171df5d2e90SMikulas Patocka {
172df5d2e90SMikulas Patocka 	unsigned long flags;
173df5d2e90SMikulas Patocka 
174df5d2e90SMikulas Patocka 	if (unlikely(!t))
175df5d2e90SMikulas Patocka 		return;
176df5d2e90SMikulas Patocka 
177df5d2e90SMikulas Patocka 	spin_lock_irqsave(&throttle_spinlock, flags);
178df5d2e90SMikulas Patocka 
179df5d2e90SMikulas Patocka 	t->num_io_jobs--;
180df5d2e90SMikulas Patocka 
1816aa7de05SMark Rutland 	if (likely(READ_ONCE(t->throttle) >= 100))
182df5d2e90SMikulas Patocka 		goto skip_limit;
183df5d2e90SMikulas Patocka 
184df5d2e90SMikulas Patocka 	if (!t->num_io_jobs) {
185df5d2e90SMikulas Patocka 		unsigned now, difference;
186df5d2e90SMikulas Patocka 
187df5d2e90SMikulas Patocka 		now = jiffies;
188df5d2e90SMikulas Patocka 		difference = now - t->last_jiffies;
189df5d2e90SMikulas Patocka 		t->last_jiffies = now;
190df5d2e90SMikulas Patocka 
191df5d2e90SMikulas Patocka 		t->io_period += difference;
192df5d2e90SMikulas Patocka 		t->total_period += difference;
193df5d2e90SMikulas Patocka 
194df5d2e90SMikulas Patocka 		/*
195df5d2e90SMikulas Patocka 		 * Maintain sane values if we got a temporary overflow.
196df5d2e90SMikulas Patocka 		 */
197df5d2e90SMikulas Patocka 		if (unlikely(t->io_period > t->total_period))
198df5d2e90SMikulas Patocka 			t->io_period = t->total_period;
199df5d2e90SMikulas Patocka 	}
200df5d2e90SMikulas Patocka 
201df5d2e90SMikulas Patocka skip_limit:
202df5d2e90SMikulas Patocka 	spin_unlock_irqrestore(&throttle_spinlock, flags);
203df5d2e90SMikulas Patocka }
204df5d2e90SMikulas Patocka 
205df5d2e90SMikulas Patocka 
2062d1e580aSAlasdair G Kergon static void wake(struct dm_kcopyd_client *kc)
2072d1e580aSAlasdair G Kergon {
2082d1e580aSAlasdair G Kergon 	queue_work(kc->kcopyd_wq, &kc->kcopyd_work);
2092d1e580aSAlasdair G Kergon }
2102d1e580aSAlasdair G Kergon 
211d0471458SMikulas Patocka /*
212d0471458SMikulas Patocka  * Obtain one page for the use of kcopyd.
213d0471458SMikulas Patocka  */
214f99b55eeSMikulas Patocka static struct page_list *alloc_pl(gfp_t gfp)
2152d1e580aSAlasdair G Kergon {
2162d1e580aSAlasdair G Kergon 	struct page_list *pl;
2172d1e580aSAlasdair G Kergon 
218f99b55eeSMikulas Patocka 	pl = kmalloc(sizeof(*pl), gfp);
2192d1e580aSAlasdair G Kergon 	if (!pl)
2202d1e580aSAlasdair G Kergon 		return NULL;
2212d1e580aSAlasdair G Kergon 
222f99b55eeSMikulas Patocka 	pl->page = alloc_page(gfp);
2232d1e580aSAlasdair G Kergon 	if (!pl->page) {
2242d1e580aSAlasdair G Kergon 		kfree(pl);
2252d1e580aSAlasdair G Kergon 		return NULL;
2262d1e580aSAlasdair G Kergon 	}
2272d1e580aSAlasdair G Kergon 
2282d1e580aSAlasdair G Kergon 	return pl;
2292d1e580aSAlasdair G Kergon }
2302d1e580aSAlasdair G Kergon 
2312d1e580aSAlasdair G Kergon static void free_pl(struct page_list *pl)
2322d1e580aSAlasdair G Kergon {
2332d1e580aSAlasdair G Kergon 	__free_page(pl->page);
2342d1e580aSAlasdair G Kergon 	kfree(pl);
2352d1e580aSAlasdair G Kergon }
2362d1e580aSAlasdair G Kergon 
237d0471458SMikulas Patocka /*
238d0471458SMikulas Patocka  * Add the provided pages to a client's free page list, releasing
239d0471458SMikulas Patocka  * back to the system any beyond the reserved_pages limit.
240d0471458SMikulas Patocka  */
241d0471458SMikulas Patocka static void kcopyd_put_pages(struct dm_kcopyd_client *kc, struct page_list *pl)
242d0471458SMikulas Patocka {
243d0471458SMikulas Patocka 	struct page_list *next;
244d0471458SMikulas Patocka 
245d0471458SMikulas Patocka 	do {
246d0471458SMikulas Patocka 		next = pl->next;
247d0471458SMikulas Patocka 
248d0471458SMikulas Patocka 		if (kc->nr_free_pages >= kc->nr_reserved_pages)
249d0471458SMikulas Patocka 			free_pl(pl);
250d0471458SMikulas Patocka 		else {
251d0471458SMikulas Patocka 			pl->next = kc->pages;
252d0471458SMikulas Patocka 			kc->pages = pl;
253d0471458SMikulas Patocka 			kc->nr_free_pages++;
254d0471458SMikulas Patocka 		}
255d0471458SMikulas Patocka 
256d0471458SMikulas Patocka 		pl = next;
257d0471458SMikulas Patocka 	} while (pl);
258d0471458SMikulas Patocka }
259d0471458SMikulas Patocka 
2602d1e580aSAlasdair G Kergon static int kcopyd_get_pages(struct dm_kcopyd_client *kc,
2612d1e580aSAlasdair G Kergon 			    unsigned int nr, struct page_list **pages)
2622d1e580aSAlasdair G Kergon {
2632d1e580aSAlasdair G Kergon 	struct page_list *pl;
2642d1e580aSAlasdair G Kergon 
265d0471458SMikulas Patocka 	*pages = NULL;
2662d1e580aSAlasdair G Kergon 
267d0471458SMikulas Patocka 	do {
268d0164adcSMel Gorman 		pl = alloc_pl(__GFP_NOWARN | __GFP_NORETRY | __GFP_KSWAPD_RECLAIM);
269d0471458SMikulas Patocka 		if (unlikely(!pl)) {
270d0471458SMikulas Patocka 			/* Use reserved pages */
271d0471458SMikulas Patocka 			pl = kc->pages;
272d0471458SMikulas Patocka 			if (unlikely(!pl))
273d0471458SMikulas Patocka 				goto out_of_memory;
2742d1e580aSAlasdair G Kergon 			kc->pages = pl->next;
275d0471458SMikulas Patocka 			kc->nr_free_pages--;
276d0471458SMikulas Patocka 		}
277d0471458SMikulas Patocka 		pl->next = *pages;
278d0471458SMikulas Patocka 		*pages = pl;
279d0471458SMikulas Patocka 	} while (--nr);
2802d1e580aSAlasdair G Kergon 
2812d1e580aSAlasdair G Kergon 	return 0;
2822d1e580aSAlasdair G Kergon 
283d0471458SMikulas Patocka out_of_memory:
284d0471458SMikulas Patocka 	if (*pages)
285d0471458SMikulas Patocka 		kcopyd_put_pages(kc, *pages);
286d0471458SMikulas Patocka 	return -ENOMEM;
2872d1e580aSAlasdair G Kergon }
2882d1e580aSAlasdair G Kergon 
2892d1e580aSAlasdair G Kergon /*
2902d1e580aSAlasdair G Kergon  * These three functions resize the page pool.
2912d1e580aSAlasdair G Kergon  */
2922d1e580aSAlasdair G Kergon static void drop_pages(struct page_list *pl)
2932d1e580aSAlasdair G Kergon {
2942d1e580aSAlasdair G Kergon 	struct page_list *next;
2952d1e580aSAlasdair G Kergon 
2962d1e580aSAlasdair G Kergon 	while (pl) {
2972d1e580aSAlasdair G Kergon 		next = pl->next;
2982d1e580aSAlasdair G Kergon 		free_pl(pl);
2992d1e580aSAlasdair G Kergon 		pl = next;
3002d1e580aSAlasdair G Kergon 	}
3012d1e580aSAlasdair G Kergon }
3022d1e580aSAlasdair G Kergon 
303d0471458SMikulas Patocka /*
304d0471458SMikulas Patocka  * Allocate and reserve nr_pages for the use of a specific client.
305d0471458SMikulas Patocka  */
306d0471458SMikulas Patocka static int client_reserve_pages(struct dm_kcopyd_client *kc, unsigned nr_pages)
3072d1e580aSAlasdair G Kergon {
308d0471458SMikulas Patocka 	unsigned i;
3092d1e580aSAlasdair G Kergon 	struct page_list *pl = NULL, *next;
3102d1e580aSAlasdair G Kergon 
311d0471458SMikulas Patocka 	for (i = 0; i < nr_pages; i++) {
312f99b55eeSMikulas Patocka 		next = alloc_pl(GFP_KERNEL);
3132d1e580aSAlasdair G Kergon 		if (!next) {
3142d1e580aSAlasdair G Kergon 			if (pl)
3152d1e580aSAlasdair G Kergon 				drop_pages(pl);
3162d1e580aSAlasdair G Kergon 			return -ENOMEM;
3172d1e580aSAlasdair G Kergon 		}
3182d1e580aSAlasdair G Kergon 		next->next = pl;
3192d1e580aSAlasdair G Kergon 		pl = next;
3202d1e580aSAlasdair G Kergon 	}
3212d1e580aSAlasdair G Kergon 
322d0471458SMikulas Patocka 	kc->nr_reserved_pages += nr_pages;
3232d1e580aSAlasdair G Kergon 	kcopyd_put_pages(kc, pl);
324d0471458SMikulas Patocka 
3252d1e580aSAlasdair G Kergon 	return 0;
3262d1e580aSAlasdair G Kergon }
3272d1e580aSAlasdair G Kergon 
3282d1e580aSAlasdair G Kergon static void client_free_pages(struct dm_kcopyd_client *kc)
3292d1e580aSAlasdair G Kergon {
330d0471458SMikulas Patocka 	BUG_ON(kc->nr_free_pages != kc->nr_reserved_pages);
3312d1e580aSAlasdair G Kergon 	drop_pages(kc->pages);
3322d1e580aSAlasdair G Kergon 	kc->pages = NULL;
333d0471458SMikulas Patocka 	kc->nr_free_pages = kc->nr_reserved_pages = 0;
3342d1e580aSAlasdair G Kergon }
3352d1e580aSAlasdair G Kergon 
3362d1e580aSAlasdair G Kergon /*-----------------------------------------------------------------
3372d1e580aSAlasdair G Kergon  * kcopyd_jobs need to be allocated by the *clients* of kcopyd,
3382d1e580aSAlasdair G Kergon  * for this reason we use a mempool to prevent the client from
3392d1e580aSAlasdair G Kergon  * ever having to do io (which could cause a deadlock).
3402d1e580aSAlasdair G Kergon  *---------------------------------------------------------------*/
3412d1e580aSAlasdair G Kergon struct kcopyd_job {
3422d1e580aSAlasdair G Kergon 	struct dm_kcopyd_client *kc;
3432d1e580aSAlasdair G Kergon 	struct list_head list;
344db2351ebSMikulas Patocka 	unsigned flags;
3452d1e580aSAlasdair G Kergon 
3462d1e580aSAlasdair G Kergon 	/*
3472d1e580aSAlasdair G Kergon 	 * Error state of the job.
3482d1e580aSAlasdair G Kergon 	 */
3492d1e580aSAlasdair G Kergon 	int read_err;
3502d1e580aSAlasdair G Kergon 	unsigned long write_err;
3512d1e580aSAlasdair G Kergon 
3522d1e580aSAlasdair G Kergon 	/*
3532d1e580aSAlasdair G Kergon 	 * Either READ or WRITE
3542d1e580aSAlasdair G Kergon 	 */
3552d1e580aSAlasdair G Kergon 	int rw;
3562d1e580aSAlasdair G Kergon 	struct dm_io_region source;
3572d1e580aSAlasdair G Kergon 
3582d1e580aSAlasdair G Kergon 	/*
3592d1e580aSAlasdair G Kergon 	 * The destinations for the transfer.
3602d1e580aSAlasdair G Kergon 	 */
3612d1e580aSAlasdair G Kergon 	unsigned int num_dests;
3622d1e580aSAlasdair G Kergon 	struct dm_io_region dests[DM_KCOPYD_MAX_REGIONS];
3632d1e580aSAlasdair G Kergon 
3642d1e580aSAlasdair G Kergon 	struct page_list *pages;
3652d1e580aSAlasdair G Kergon 
3662d1e580aSAlasdair G Kergon 	/*
3672d1e580aSAlasdair G Kergon 	 * Set this to ensure you are notified when the job has
3682d1e580aSAlasdair G Kergon 	 * completed.  'context' is for callback to use.
3692d1e580aSAlasdair G Kergon 	 */
3702d1e580aSAlasdair G Kergon 	dm_kcopyd_notify_fn fn;
3712d1e580aSAlasdair G Kergon 	void *context;
3722d1e580aSAlasdair G Kergon 
3732d1e580aSAlasdair G Kergon 	/*
3742d1e580aSAlasdair G Kergon 	 * These fields are only used if the job has been split
3752d1e580aSAlasdair G Kergon 	 * into more manageable parts.
3762d1e580aSAlasdair G Kergon 	 */
3772d1e580aSAlasdair G Kergon 	struct mutex lock;
3782d1e580aSAlasdair G Kergon 	atomic_t sub_jobs;
3792d1e580aSAlasdair G Kergon 	sector_t progress;
380b73c67c2SDamien Le Moal 	sector_t write_offset;
3812d1e580aSAlasdair G Kergon 
382c6ea41fbSMikulas Patocka 	struct kcopyd_job *master_job;
383c6ea41fbSMikulas Patocka };
3842d1e580aSAlasdair G Kergon 
3852d1e580aSAlasdair G Kergon static struct kmem_cache *_job_cache;
3862d1e580aSAlasdair G Kergon 
3872d1e580aSAlasdair G Kergon int __init dm_kcopyd_init(void)
3882d1e580aSAlasdair G Kergon {
389c6ea41fbSMikulas Patocka 	_job_cache = kmem_cache_create("kcopyd_job",
390c6ea41fbSMikulas Patocka 				sizeof(struct kcopyd_job) * (SPLIT_COUNT + 1),
391c6ea41fbSMikulas Patocka 				__alignof__(struct kcopyd_job), 0, NULL);
3922d1e580aSAlasdair G Kergon 	if (!_job_cache)
3932d1e580aSAlasdair G Kergon 		return -ENOMEM;
3942d1e580aSAlasdair G Kergon 
3957f069653SMikulas Patocka 	zero_page_list.next = &zero_page_list;
3967f069653SMikulas Patocka 	zero_page_list.page = ZERO_PAGE(0);
3977f069653SMikulas Patocka 
3982d1e580aSAlasdair G Kergon 	return 0;
3992d1e580aSAlasdair G Kergon }
4002d1e580aSAlasdair G Kergon 
4012d1e580aSAlasdair G Kergon void dm_kcopyd_exit(void)
4022d1e580aSAlasdair G Kergon {
4032d1e580aSAlasdair G Kergon 	kmem_cache_destroy(_job_cache);
4042d1e580aSAlasdair G Kergon 	_job_cache = NULL;
4052d1e580aSAlasdair G Kergon }
4062d1e580aSAlasdair G Kergon 
4072d1e580aSAlasdair G Kergon /*
4082d1e580aSAlasdair G Kergon  * Functions to push and pop a job onto the head of a given job
4092d1e580aSAlasdair G Kergon  * list.
4102d1e580aSAlasdair G Kergon  */
411b73c67c2SDamien Le Moal static struct kcopyd_job *pop_io_job(struct list_head *jobs,
412b73c67c2SDamien Le Moal 				     struct dm_kcopyd_client *kc)
413b73c67c2SDamien Le Moal {
414b73c67c2SDamien Le Moal 	struct kcopyd_job *job;
415b73c67c2SDamien Le Moal 
416b73c67c2SDamien Le Moal 	/*
417b73c67c2SDamien Le Moal 	 * For I/O jobs, pop any read, any write without sequential write
418b73c67c2SDamien Le Moal 	 * constraint and sequential writes that are at the right position.
419b73c67c2SDamien Le Moal 	 */
420b73c67c2SDamien Le Moal 	list_for_each_entry(job, jobs, list) {
421db2351ebSMikulas Patocka 		if (job->rw == READ || !(job->flags & BIT(DM_KCOPYD_WRITE_SEQ))) {
422b73c67c2SDamien Le Moal 			list_del(&job->list);
423b73c67c2SDamien Le Moal 			return job;
424b73c67c2SDamien Le Moal 		}
425b73c67c2SDamien Le Moal 
426b73c67c2SDamien Le Moal 		if (job->write_offset == job->master_job->write_offset) {
427b73c67c2SDamien Le Moal 			job->master_job->write_offset += job->source.count;
428b73c67c2SDamien Le Moal 			list_del(&job->list);
429b73c67c2SDamien Le Moal 			return job;
430b73c67c2SDamien Le Moal 		}
431b73c67c2SDamien Le Moal 	}
432b73c67c2SDamien Le Moal 
433b73c67c2SDamien Le Moal 	return NULL;
434b73c67c2SDamien Le Moal }
435b73c67c2SDamien Le Moal 
4362d1e580aSAlasdair G Kergon static struct kcopyd_job *pop(struct list_head *jobs,
4372d1e580aSAlasdair G Kergon 			      struct dm_kcopyd_client *kc)
4382d1e580aSAlasdair G Kergon {
4392d1e580aSAlasdair G Kergon 	struct kcopyd_job *job = NULL;
4402d1e580aSAlasdair G Kergon 
441*6bcd658fSMikulas Patocka 	spin_lock_irq(&kc->job_lock);
4422d1e580aSAlasdair G Kergon 
4432d1e580aSAlasdair G Kergon 	if (!list_empty(jobs)) {
444b73c67c2SDamien Le Moal 		if (jobs == &kc->io_jobs)
445b73c67c2SDamien Le Moal 			job = pop_io_job(jobs, kc);
446b73c67c2SDamien Le Moal 		else {
4472d1e580aSAlasdair G Kergon 			job = list_entry(jobs->next, struct kcopyd_job, list);
4482d1e580aSAlasdair G Kergon 			list_del(&job->list);
4492d1e580aSAlasdair G Kergon 		}
450b73c67c2SDamien Le Moal 	}
451*6bcd658fSMikulas Patocka 	spin_unlock_irq(&kc->job_lock);
4522d1e580aSAlasdair G Kergon 
4532d1e580aSAlasdair G Kergon 	return job;
4542d1e580aSAlasdair G Kergon }
4552d1e580aSAlasdair G Kergon 
4562d1e580aSAlasdair G Kergon static void push(struct list_head *jobs, struct kcopyd_job *job)
4572d1e580aSAlasdair G Kergon {
4582d1e580aSAlasdair G Kergon 	unsigned long flags;
4592d1e580aSAlasdair G Kergon 	struct dm_kcopyd_client *kc = job->kc;
4602d1e580aSAlasdair G Kergon 
4612d1e580aSAlasdair G Kergon 	spin_lock_irqsave(&kc->job_lock, flags);
4622d1e580aSAlasdair G Kergon 	list_add_tail(&job->list, jobs);
4632d1e580aSAlasdair G Kergon 	spin_unlock_irqrestore(&kc->job_lock, flags);
4642d1e580aSAlasdair G Kergon }
4652d1e580aSAlasdair G Kergon 
466b673c3a8SKazuo Ito 
467b673c3a8SKazuo Ito static void push_head(struct list_head *jobs, struct kcopyd_job *job)
468b673c3a8SKazuo Ito {
469b673c3a8SKazuo Ito 	struct dm_kcopyd_client *kc = job->kc;
470b673c3a8SKazuo Ito 
471*6bcd658fSMikulas Patocka 	spin_lock_irq(&kc->job_lock);
472b673c3a8SKazuo Ito 	list_add(&job->list, jobs);
473*6bcd658fSMikulas Patocka 	spin_unlock_irq(&kc->job_lock);
474b673c3a8SKazuo Ito }
475b673c3a8SKazuo Ito 
4762d1e580aSAlasdair G Kergon /*
4772d1e580aSAlasdair G Kergon  * These three functions process 1 item from the corresponding
4782d1e580aSAlasdair G Kergon  * job list.
4792d1e580aSAlasdair G Kergon  *
4802d1e580aSAlasdair G Kergon  * They return:
4812d1e580aSAlasdair G Kergon  * < 0: error
4822d1e580aSAlasdair G Kergon  *   0: success
4832d1e580aSAlasdair G Kergon  * > 0: can't process yet.
4842d1e580aSAlasdair G Kergon  */
4852d1e580aSAlasdair G Kergon static int run_complete_job(struct kcopyd_job *job)
4862d1e580aSAlasdair G Kergon {
4872d1e580aSAlasdair G Kergon 	void *context = job->context;
4882d1e580aSAlasdair G Kergon 	int read_err = job->read_err;
4892d1e580aSAlasdair G Kergon 	unsigned long write_err = job->write_err;
4902d1e580aSAlasdair G Kergon 	dm_kcopyd_notify_fn fn = job->fn;
4912d1e580aSAlasdair G Kergon 	struct dm_kcopyd_client *kc = job->kc;
4922d1e580aSAlasdair G Kergon 
4937f069653SMikulas Patocka 	if (job->pages && job->pages != &zero_page_list)
4942d1e580aSAlasdair G Kergon 		kcopyd_put_pages(kc, job->pages);
495c6ea41fbSMikulas Patocka 	/*
496c6ea41fbSMikulas Patocka 	 * If this is the master job, the sub jobs have already
497c6ea41fbSMikulas Patocka 	 * completed so we can free everything.
498c6ea41fbSMikulas Patocka 	 */
499d5ffebddSMike Snitzer 	if (job->master_job == job) {
500d5ffebddSMike Snitzer 		mutex_destroy(&job->lock);
5016f1c819cSKent Overstreet 		mempool_free(job, &kc->job_pool);
502d5ffebddSMike Snitzer 	}
5032d1e580aSAlasdair G Kergon 	fn(read_err, write_err, context);
5042d1e580aSAlasdair G Kergon 
5052d1e580aSAlasdair G Kergon 	if (atomic_dec_and_test(&kc->nr_jobs))
5062d1e580aSAlasdair G Kergon 		wake_up(&kc->destroyq);
5072d1e580aSAlasdair G Kergon 
508784c9a29SJohn Pittman 	cond_resched();
509784c9a29SJohn Pittman 
5102d1e580aSAlasdair G Kergon 	return 0;
5112d1e580aSAlasdair G Kergon }
5122d1e580aSAlasdair G Kergon 
5132d1e580aSAlasdair G Kergon static void complete_io(unsigned long error, void *context)
5142d1e580aSAlasdair G Kergon {
5152d1e580aSAlasdair G Kergon 	struct kcopyd_job *job = (struct kcopyd_job *) context;
5162d1e580aSAlasdair G Kergon 	struct dm_kcopyd_client *kc = job->kc;
5172d1e580aSAlasdair G Kergon 
518df5d2e90SMikulas Patocka 	io_job_finish(kc->throttle);
519df5d2e90SMikulas Patocka 
5202d1e580aSAlasdair G Kergon 	if (error) {
52151111666SMike Christie 		if (op_is_write(job->rw))
5222d1e580aSAlasdair G Kergon 			job->write_err |= error;
5232d1e580aSAlasdair G Kergon 		else
5242d1e580aSAlasdair G Kergon 			job->read_err = 1;
5252d1e580aSAlasdair G Kergon 
526db2351ebSMikulas Patocka 		if (!(job->flags & BIT(DM_KCOPYD_IGNORE_ERROR))) {
5272d1e580aSAlasdair G Kergon 			push(&kc->complete_jobs, job);
5282d1e580aSAlasdair G Kergon 			wake(kc);
5292d1e580aSAlasdair G Kergon 			return;
5302d1e580aSAlasdair G Kergon 		}
5312d1e580aSAlasdair G Kergon 	}
5322d1e580aSAlasdair G Kergon 
53351111666SMike Christie 	if (op_is_write(job->rw))
5342d1e580aSAlasdair G Kergon 		push(&kc->complete_jobs, job);
5352d1e580aSAlasdair G Kergon 
5362d1e580aSAlasdair G Kergon 	else {
5372d1e580aSAlasdair G Kergon 		job->rw = WRITE;
5382d1e580aSAlasdair G Kergon 		push(&kc->io_jobs, job);
5392d1e580aSAlasdair G Kergon 	}
5402d1e580aSAlasdair G Kergon 
5412d1e580aSAlasdair G Kergon 	wake(kc);
5422d1e580aSAlasdair G Kergon }
5432d1e580aSAlasdair G Kergon 
5442d1e580aSAlasdair G Kergon /*
5452d1e580aSAlasdair G Kergon  * Request io on as many buffer heads as we can currently get for
5462d1e580aSAlasdair G Kergon  * a particular job.
5472d1e580aSAlasdair G Kergon  */
5482d1e580aSAlasdair G Kergon static int run_io_job(struct kcopyd_job *job)
5492d1e580aSAlasdair G Kergon {
5502d1e580aSAlasdair G Kergon 	int r;
5512d1e580aSAlasdair G Kergon 	struct dm_io_request io_req = {
552e6047149SMike Christie 		.bi_op = job->rw,
553e6047149SMike Christie 		.bi_op_flags = 0,
5542d1e580aSAlasdair G Kergon 		.mem.type = DM_IO_PAGE_LIST,
5552d1e580aSAlasdair G Kergon 		.mem.ptr.pl = job->pages,
5564622afb3SMikulas Patocka 		.mem.offset = 0,
5572d1e580aSAlasdair G Kergon 		.notify.fn = complete_io,
5582d1e580aSAlasdair G Kergon 		.notify.context = job,
5592d1e580aSAlasdair G Kergon 		.client = job->kc->io_client,
5602d1e580aSAlasdair G Kergon 	};
5612d1e580aSAlasdair G Kergon 
562b73c67c2SDamien Le Moal 	/*
563b73c67c2SDamien Le Moal 	 * If we need to write sequentially and some reads or writes failed,
564b73c67c2SDamien Le Moal 	 * no point in continuing.
565b73c67c2SDamien Le Moal 	 */
566db2351ebSMikulas Patocka 	if (job->flags & BIT(DM_KCOPYD_WRITE_SEQ) &&
567d1fef414SDmitry Fomichev 	    job->master_job->write_err) {
568d1fef414SDmitry Fomichev 		job->write_err = job->master_job->write_err;
569b73c67c2SDamien Le Moal 		return -EIO;
570d1fef414SDmitry Fomichev 	}
571b73c67c2SDamien Le Moal 
572df5d2e90SMikulas Patocka 	io_job_start(job->kc->throttle);
573df5d2e90SMikulas Patocka 
5747eaceaccSJens Axboe 	if (job->rw == READ)
5752d1e580aSAlasdair G Kergon 		r = dm_io(&io_req, 1, &job->source, NULL);
576721a9602SJens Axboe 	else
5772d1e580aSAlasdair G Kergon 		r = dm_io(&io_req, job->num_dests, job->dests, NULL);
5782d1e580aSAlasdair G Kergon 
5792d1e580aSAlasdair G Kergon 	return r;
5802d1e580aSAlasdair G Kergon }
5812d1e580aSAlasdair G Kergon 
5822d1e580aSAlasdair G Kergon static int run_pages_job(struct kcopyd_job *job)
5832d1e580aSAlasdair G Kergon {
5842d1e580aSAlasdair G Kergon 	int r;
5855bf45a3dSMikulas Patocka 	unsigned nr_pages = dm_div_up(job->dests[0].count, PAGE_SIZE >> 9);
5862d1e580aSAlasdair G Kergon 
5875bf45a3dSMikulas Patocka 	r = kcopyd_get_pages(job->kc, nr_pages, &job->pages);
5882d1e580aSAlasdair G Kergon 	if (!r) {
5892d1e580aSAlasdair G Kergon 		/* this job is ready for io */
5902d1e580aSAlasdair G Kergon 		push(&job->kc->io_jobs, job);
5912d1e580aSAlasdair G Kergon 		return 0;
5922d1e580aSAlasdair G Kergon 	}
5932d1e580aSAlasdair G Kergon 
5942d1e580aSAlasdair G Kergon 	if (r == -ENOMEM)
5952d1e580aSAlasdair G Kergon 		/* can't complete now */
5962d1e580aSAlasdair G Kergon 		return 1;
5972d1e580aSAlasdair G Kergon 
5982d1e580aSAlasdair G Kergon 	return r;
5992d1e580aSAlasdair G Kergon }
6002d1e580aSAlasdair G Kergon 
6012d1e580aSAlasdair G Kergon /*
6022d1e580aSAlasdair G Kergon  * Run through a list for as long as possible.  Returns the count
6032d1e580aSAlasdair G Kergon  * of successful jobs.
6042d1e580aSAlasdair G Kergon  */
6052d1e580aSAlasdair G Kergon static int process_jobs(struct list_head *jobs, struct dm_kcopyd_client *kc,
6062d1e580aSAlasdair G Kergon 			int (*fn) (struct kcopyd_job *))
6072d1e580aSAlasdair G Kergon {
6082d1e580aSAlasdair G Kergon 	struct kcopyd_job *job;
6092d1e580aSAlasdair G Kergon 	int r, count = 0;
6102d1e580aSAlasdair G Kergon 
6112d1e580aSAlasdair G Kergon 	while ((job = pop(jobs, kc))) {
6122d1e580aSAlasdair G Kergon 
6132d1e580aSAlasdair G Kergon 		r = fn(job);
6142d1e580aSAlasdair G Kergon 
6152d1e580aSAlasdair G Kergon 		if (r < 0) {
6162d1e580aSAlasdair G Kergon 			/* error this rogue job */
61751111666SMike Christie 			if (op_is_write(job->rw))
6182d1e580aSAlasdair G Kergon 				job->write_err = (unsigned long) -1L;
6192d1e580aSAlasdair G Kergon 			else
6202d1e580aSAlasdair G Kergon 				job->read_err = 1;
6212d1e580aSAlasdair G Kergon 			push(&kc->complete_jobs, job);
622d1fef414SDmitry Fomichev 			wake(kc);
6232d1e580aSAlasdair G Kergon 			break;
6242d1e580aSAlasdair G Kergon 		}
6252d1e580aSAlasdair G Kergon 
6262d1e580aSAlasdair G Kergon 		if (r > 0) {
6272d1e580aSAlasdair G Kergon 			/*
6282d1e580aSAlasdair G Kergon 			 * We couldn't service this job ATM, so
6292d1e580aSAlasdair G Kergon 			 * push this job back onto the list.
6302d1e580aSAlasdair G Kergon 			 */
631b673c3a8SKazuo Ito 			push_head(jobs, job);
6322d1e580aSAlasdair G Kergon 			break;
6332d1e580aSAlasdair G Kergon 		}
6342d1e580aSAlasdair G Kergon 
6352d1e580aSAlasdair G Kergon 		count++;
6362d1e580aSAlasdair G Kergon 	}
6372d1e580aSAlasdair G Kergon 
6382d1e580aSAlasdair G Kergon 	return count;
6392d1e580aSAlasdair G Kergon }
6402d1e580aSAlasdair G Kergon 
6412d1e580aSAlasdair G Kergon /*
6422d1e580aSAlasdair G Kergon  * kcopyd does this every time it's woken up.
6432d1e580aSAlasdair G Kergon  */
6442d1e580aSAlasdair G Kergon static void do_work(struct work_struct *work)
6452d1e580aSAlasdair G Kergon {
6462d1e580aSAlasdair G Kergon 	struct dm_kcopyd_client *kc = container_of(work,
6472d1e580aSAlasdair G Kergon 					struct dm_kcopyd_client, kcopyd_work);
6487eaceaccSJens Axboe 	struct blk_plug plug;
6492d1e580aSAlasdair G Kergon 
6502d1e580aSAlasdair G Kergon 	/*
6512d1e580aSAlasdair G Kergon 	 * The order that these are called is *very* important.
6522d1e580aSAlasdair G Kergon 	 * complete jobs can free some pages for pages jobs.
6532d1e580aSAlasdair G Kergon 	 * Pages jobs when successful will jump onto the io jobs
6542d1e580aSAlasdair G Kergon 	 * list.  io jobs call wake when they complete and it all
6552d1e580aSAlasdair G Kergon 	 * starts again.
6562d1e580aSAlasdair G Kergon 	 */
657*6bcd658fSMikulas Patocka 	spin_lock_irq(&kc->job_lock);
658d7e6b8dfSNikos Tsironis 	list_splice_tail_init(&kc->callback_jobs, &kc->complete_jobs);
659*6bcd658fSMikulas Patocka 	spin_unlock_irq(&kc->job_lock);
660d7e6b8dfSNikos Tsironis 
6617eaceaccSJens Axboe 	blk_start_plug(&plug);
6622d1e580aSAlasdair G Kergon 	process_jobs(&kc->complete_jobs, kc, run_complete_job);
6632d1e580aSAlasdair G Kergon 	process_jobs(&kc->pages_jobs, kc, run_pages_job);
6642d1e580aSAlasdair G Kergon 	process_jobs(&kc->io_jobs, kc, run_io_job);
6657eaceaccSJens Axboe 	blk_finish_plug(&plug);
6662d1e580aSAlasdair G Kergon }
6672d1e580aSAlasdair G Kergon 
6682d1e580aSAlasdair G Kergon /*
6692d1e580aSAlasdair G Kergon  * If we are copying a small region we just dispatch a single job
6702d1e580aSAlasdair G Kergon  * to do the copy, otherwise the io has to be split up into many
6712d1e580aSAlasdair G Kergon  * jobs.
6722d1e580aSAlasdair G Kergon  */
6732d1e580aSAlasdair G Kergon static void dispatch_job(struct kcopyd_job *job)
6742d1e580aSAlasdair G Kergon {
6752d1e580aSAlasdair G Kergon 	struct dm_kcopyd_client *kc = job->kc;
6762d1e580aSAlasdair G Kergon 	atomic_inc(&kc->nr_jobs);
6779ca170a3SMikulas Patocka 	if (unlikely(!job->source.count))
678d7e6b8dfSNikos Tsironis 		push(&kc->callback_jobs, job);
6797f069653SMikulas Patocka 	else if (job->pages == &zero_page_list)
6807f069653SMikulas Patocka 		push(&kc->io_jobs, job);
6819ca170a3SMikulas Patocka 	else
6822d1e580aSAlasdair G Kergon 		push(&kc->pages_jobs, job);
6832d1e580aSAlasdair G Kergon 	wake(kc);
6842d1e580aSAlasdair G Kergon }
6852d1e580aSAlasdair G Kergon 
6862d1e580aSAlasdair G Kergon static void segment_complete(int read_err, unsigned long write_err,
6872d1e580aSAlasdair G Kergon 			     void *context)
6882d1e580aSAlasdair G Kergon {
6892d1e580aSAlasdair G Kergon 	/* FIXME: tidy this function */
6902d1e580aSAlasdair G Kergon 	sector_t progress = 0;
6912d1e580aSAlasdair G Kergon 	sector_t count = 0;
692c6ea41fbSMikulas Patocka 	struct kcopyd_job *sub_job = (struct kcopyd_job *) context;
693c6ea41fbSMikulas Patocka 	struct kcopyd_job *job = sub_job->master_job;
69473830857SMikulas Patocka 	struct dm_kcopyd_client *kc = job->kc;
6952d1e580aSAlasdair G Kergon 
6962d1e580aSAlasdair G Kergon 	mutex_lock(&job->lock);
6972d1e580aSAlasdair G Kergon 
6982d1e580aSAlasdair G Kergon 	/* update the error */
6992d1e580aSAlasdair G Kergon 	if (read_err)
7002d1e580aSAlasdair G Kergon 		job->read_err = 1;
7012d1e580aSAlasdair G Kergon 
7022d1e580aSAlasdair G Kergon 	if (write_err)
7032d1e580aSAlasdair G Kergon 		job->write_err |= write_err;
7042d1e580aSAlasdair G Kergon 
7052d1e580aSAlasdair G Kergon 	/*
7062d1e580aSAlasdair G Kergon 	 * Only dispatch more work if there hasn't been an error.
7072d1e580aSAlasdair G Kergon 	 */
7082d1e580aSAlasdair G Kergon 	if ((!job->read_err && !job->write_err) ||
709db2351ebSMikulas Patocka 	    job->flags & BIT(DM_KCOPYD_IGNORE_ERROR)) {
7102d1e580aSAlasdair G Kergon 		/* get the next chunk of work */
7112d1e580aSAlasdair G Kergon 		progress = job->progress;
7122d1e580aSAlasdair G Kergon 		count = job->source.count - progress;
7132d1e580aSAlasdair G Kergon 		if (count) {
714c663e040SNikos Tsironis 			if (count > kc->sub_job_size)
715c663e040SNikos Tsironis 				count = kc->sub_job_size;
7162d1e580aSAlasdair G Kergon 
7172d1e580aSAlasdair G Kergon 			job->progress += count;
7182d1e580aSAlasdair G Kergon 		}
7192d1e580aSAlasdair G Kergon 	}
7202d1e580aSAlasdair G Kergon 	mutex_unlock(&job->lock);
7212d1e580aSAlasdair G Kergon 
7222d1e580aSAlasdair G Kergon 	if (count) {
7232d1e580aSAlasdair G Kergon 		int i;
7242d1e580aSAlasdair G Kergon 
7252d1e580aSAlasdair G Kergon 		*sub_job = *job;
726b73c67c2SDamien Le Moal 		sub_job->write_offset = progress;
7272d1e580aSAlasdair G Kergon 		sub_job->source.sector += progress;
7282d1e580aSAlasdair G Kergon 		sub_job->source.count = count;
7292d1e580aSAlasdair G Kergon 
7302d1e580aSAlasdair G Kergon 		for (i = 0; i < job->num_dests; i++) {
7312d1e580aSAlasdair G Kergon 			sub_job->dests[i].sector += progress;
7322d1e580aSAlasdair G Kergon 			sub_job->dests[i].count = count;
7332d1e580aSAlasdair G Kergon 		}
7342d1e580aSAlasdair G Kergon 
7352d1e580aSAlasdair G Kergon 		sub_job->fn = segment_complete;
736c6ea41fbSMikulas Patocka 		sub_job->context = sub_job;
7372d1e580aSAlasdair G Kergon 		dispatch_job(sub_job);
7382d1e580aSAlasdair G Kergon 
7392d1e580aSAlasdair G Kergon 	} else if (atomic_dec_and_test(&job->sub_jobs)) {
7402d1e580aSAlasdair G Kergon 
7412d1e580aSAlasdair G Kergon 		/*
742340cd444SMikulas Patocka 		 * Queue the completion callback to the kcopyd thread.
743340cd444SMikulas Patocka 		 *
744340cd444SMikulas Patocka 		 * Some callers assume that all the completions are called
745340cd444SMikulas Patocka 		 * from a single thread and don't race with each other.
746340cd444SMikulas Patocka 		 *
747340cd444SMikulas Patocka 		 * We must not call the callback directly here because this
748340cd444SMikulas Patocka 		 * code may not be executing in the thread.
7492d1e580aSAlasdair G Kergon 		 */
750340cd444SMikulas Patocka 		push(&kc->complete_jobs, job);
751340cd444SMikulas Patocka 		wake(kc);
7522d1e580aSAlasdair G Kergon 	}
7532d1e580aSAlasdair G Kergon }
7542d1e580aSAlasdair G Kergon 
7552d1e580aSAlasdair G Kergon /*
756c6ea41fbSMikulas Patocka  * Create some sub jobs to share the work between them.
7572d1e580aSAlasdair G Kergon  */
758c6ea41fbSMikulas Patocka static void split_job(struct kcopyd_job *master_job)
7592d1e580aSAlasdair G Kergon {
7602d1e580aSAlasdair G Kergon 	int i;
7612d1e580aSAlasdair G Kergon 
762c6ea41fbSMikulas Patocka 	atomic_inc(&master_job->kc->nr_jobs);
763340cd444SMikulas Patocka 
764c6ea41fbSMikulas Patocka 	atomic_set(&master_job->sub_jobs, SPLIT_COUNT);
765c6ea41fbSMikulas Patocka 	for (i = 0; i < SPLIT_COUNT; i++) {
766c6ea41fbSMikulas Patocka 		master_job[i + 1].master_job = master_job;
767c6ea41fbSMikulas Patocka 		segment_complete(0, 0u, &master_job[i + 1]);
768c6ea41fbSMikulas Patocka 	}
7692d1e580aSAlasdair G Kergon }
7702d1e580aSAlasdair G Kergon 
7717209049dSMike Snitzer void dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
7722d1e580aSAlasdair G Kergon 		    unsigned int num_dests, struct dm_io_region *dests,
7732d1e580aSAlasdair G Kergon 		    unsigned int flags, dm_kcopyd_notify_fn fn, void *context)
7742d1e580aSAlasdair G Kergon {
7752d1e580aSAlasdair G Kergon 	struct kcopyd_job *job;
77670d6c400SMike Snitzer 	int i;
7772d1e580aSAlasdair G Kergon 
7782d1e580aSAlasdair G Kergon 	/*
779c6ea41fbSMikulas Patocka 	 * Allocate an array of jobs consisting of one master job
780c6ea41fbSMikulas Patocka 	 * followed by SPLIT_COUNT sub jobs.
7812d1e580aSAlasdair G Kergon 	 */
7826f1c819cSKent Overstreet 	job = mempool_alloc(&kc->job_pool, GFP_NOIO);
783d5ffebddSMike Snitzer 	mutex_init(&job->lock);
7842d1e580aSAlasdair G Kergon 
7852d1e580aSAlasdair G Kergon 	/*
7862d1e580aSAlasdair G Kergon 	 * set up for the read.
7872d1e580aSAlasdair G Kergon 	 */
7882d1e580aSAlasdair G Kergon 	job->kc = kc;
7892d1e580aSAlasdair G Kergon 	job->flags = flags;
7902d1e580aSAlasdair G Kergon 	job->read_err = 0;
7912d1e580aSAlasdair G Kergon 	job->write_err = 0;
7922d1e580aSAlasdair G Kergon 
7932d1e580aSAlasdair G Kergon 	job->num_dests = num_dests;
7942d1e580aSAlasdair G Kergon 	memcpy(&job->dests, dests, sizeof(*dests) * num_dests);
7952d1e580aSAlasdair G Kergon 
796b73c67c2SDamien Le Moal 	/*
797b73c67c2SDamien Le Moal 	 * If one of the destination is a host-managed zoned block device,
798b73c67c2SDamien Le Moal 	 * we need to write sequentially. If one of the destination is a
799b73c67c2SDamien Le Moal 	 * host-aware device, then leave it to the caller to choose what to do.
800b73c67c2SDamien Le Moal 	 */
801db2351ebSMikulas Patocka 	if (!(job->flags & BIT(DM_KCOPYD_WRITE_SEQ))) {
802b73c67c2SDamien Le Moal 		for (i = 0; i < job->num_dests; i++) {
803b73c67c2SDamien Le Moal 			if (bdev_zoned_model(dests[i].bdev) == BLK_ZONED_HM) {
804db2351ebSMikulas Patocka 				job->flags |= BIT(DM_KCOPYD_WRITE_SEQ);
805b73c67c2SDamien Le Moal 				break;
806b73c67c2SDamien Le Moal 			}
807b73c67c2SDamien Le Moal 		}
808b73c67c2SDamien Le Moal 	}
809b73c67c2SDamien Le Moal 
810b73c67c2SDamien Le Moal 	/*
811b73c67c2SDamien Le Moal 	 * If we need to write sequentially, errors cannot be ignored.
812b73c67c2SDamien Le Moal 	 */
813db2351ebSMikulas Patocka 	if (job->flags & BIT(DM_KCOPYD_WRITE_SEQ) &&
814db2351ebSMikulas Patocka 	    job->flags & BIT(DM_KCOPYD_IGNORE_ERROR))
815db2351ebSMikulas Patocka 		job->flags &= ~BIT(DM_KCOPYD_IGNORE_ERROR);
816b73c67c2SDamien Le Moal 
8177f069653SMikulas Patocka 	if (from) {
8187f069653SMikulas Patocka 		job->source = *from;
8192d1e580aSAlasdair G Kergon 		job->pages = NULL;
8207f069653SMikulas Patocka 		job->rw = READ;
8217f069653SMikulas Patocka 	} else {
8227f069653SMikulas Patocka 		memset(&job->source, 0, sizeof job->source);
8237f069653SMikulas Patocka 		job->source.count = job->dests[0].count;
8247f069653SMikulas Patocka 		job->pages = &zero_page_list;
82570d6c400SMike Snitzer 
82670d6c400SMike Snitzer 		/*
827615ec946SChristoph Hellwig 		 * Use WRITE ZEROES to optimize zeroing if all dests support it.
82870d6c400SMike Snitzer 		 */
829615ec946SChristoph Hellwig 		job->rw = REQ_OP_WRITE_ZEROES;
83070d6c400SMike Snitzer 		for (i = 0; i < job->num_dests; i++)
831615ec946SChristoph Hellwig 			if (!bdev_write_zeroes_sectors(job->dests[i].bdev)) {
8327f069653SMikulas Patocka 				job->rw = WRITE;
83370d6c400SMike Snitzer 				break;
83470d6c400SMike Snitzer 			}
8357f069653SMikulas Patocka 	}
8362d1e580aSAlasdair G Kergon 
8372d1e580aSAlasdair G Kergon 	job->fn = fn;
8382d1e580aSAlasdair G Kergon 	job->context = context;
839c6ea41fbSMikulas Patocka 	job->master_job = job;
840b73c67c2SDamien Le Moal 	job->write_offset = 0;
8412d1e580aSAlasdair G Kergon 
842c663e040SNikos Tsironis 	if (job->source.count <= kc->sub_job_size)
8432d1e580aSAlasdair G Kergon 		dispatch_job(job);
8442d1e580aSAlasdair G Kergon 	else {
8452d1e580aSAlasdair G Kergon 		job->progress = 0;
8462d1e580aSAlasdair G Kergon 		split_job(job);
8472d1e580aSAlasdair G Kergon 	}
8482d1e580aSAlasdair G Kergon }
8492d1e580aSAlasdair G Kergon EXPORT_SYMBOL(dm_kcopyd_copy);
8502d1e580aSAlasdair G Kergon 
8517209049dSMike Snitzer void dm_kcopyd_zero(struct dm_kcopyd_client *kc,
8527f069653SMikulas Patocka 		    unsigned num_dests, struct dm_io_region *dests,
8537f069653SMikulas Patocka 		    unsigned flags, dm_kcopyd_notify_fn fn, void *context)
8547f069653SMikulas Patocka {
8557209049dSMike Snitzer 	dm_kcopyd_copy(kc, NULL, num_dests, dests, flags, fn, context);
8567f069653SMikulas Patocka }
8577f069653SMikulas Patocka EXPORT_SYMBOL(dm_kcopyd_zero);
8587f069653SMikulas Patocka 
859a6e50b40SMikulas Patocka void *dm_kcopyd_prepare_callback(struct dm_kcopyd_client *kc,
860a6e50b40SMikulas Patocka 				 dm_kcopyd_notify_fn fn, void *context)
861a6e50b40SMikulas Patocka {
862a6e50b40SMikulas Patocka 	struct kcopyd_job *job;
863a6e50b40SMikulas Patocka 
8646f1c819cSKent Overstreet 	job = mempool_alloc(&kc->job_pool, GFP_NOIO);
865a6e50b40SMikulas Patocka 
866a6e50b40SMikulas Patocka 	memset(job, 0, sizeof(struct kcopyd_job));
867a6e50b40SMikulas Patocka 	job->kc = kc;
868a6e50b40SMikulas Patocka 	job->fn = fn;
869a6e50b40SMikulas Patocka 	job->context = context;
870d136f2efSAlasdair G Kergon 	job->master_job = job;
871a6e50b40SMikulas Patocka 
872a6e50b40SMikulas Patocka 	atomic_inc(&kc->nr_jobs);
873a6e50b40SMikulas Patocka 
874a6e50b40SMikulas Patocka 	return job;
875a6e50b40SMikulas Patocka }
876a6e50b40SMikulas Patocka EXPORT_SYMBOL(dm_kcopyd_prepare_callback);
877a6e50b40SMikulas Patocka 
878a6e50b40SMikulas Patocka void dm_kcopyd_do_callback(void *j, int read_err, unsigned long write_err)
879a6e50b40SMikulas Patocka {
880a6e50b40SMikulas Patocka 	struct kcopyd_job *job = j;
881a6e50b40SMikulas Patocka 	struct dm_kcopyd_client *kc = job->kc;
882a6e50b40SMikulas Patocka 
883a6e50b40SMikulas Patocka 	job->read_err = read_err;
884a6e50b40SMikulas Patocka 	job->write_err = write_err;
885a6e50b40SMikulas Patocka 
886d7e6b8dfSNikos Tsironis 	push(&kc->callback_jobs, job);
887a6e50b40SMikulas Patocka 	wake(kc);
888a6e50b40SMikulas Patocka }
889a6e50b40SMikulas Patocka EXPORT_SYMBOL(dm_kcopyd_do_callback);
890a6e50b40SMikulas Patocka 
8912d1e580aSAlasdair G Kergon /*
8922d1e580aSAlasdair G Kergon  * Cancels a kcopyd job, eg. someone might be deactivating a
8932d1e580aSAlasdair G Kergon  * mirror.
8942d1e580aSAlasdair G Kergon  */
8952d1e580aSAlasdair G Kergon #if 0
8962d1e580aSAlasdair G Kergon int kcopyd_cancel(struct kcopyd_job *job, int block)
8972d1e580aSAlasdair G Kergon {
8982d1e580aSAlasdair G Kergon 	/* FIXME: finish */
8992d1e580aSAlasdair G Kergon 	return -1;
9002d1e580aSAlasdair G Kergon }
9012d1e580aSAlasdair G Kergon #endif  /*  0  */
9022d1e580aSAlasdair G Kergon 
9032d1e580aSAlasdair G Kergon /*-----------------------------------------------------------------
9042d1e580aSAlasdair G Kergon  * Client setup
9052d1e580aSAlasdair G Kergon  *---------------------------------------------------------------*/
906df5d2e90SMikulas Patocka struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *throttle)
9072d1e580aSAlasdair G Kergon {
9086f1c819cSKent Overstreet 	int r;
909c663e040SNikos Tsironis 	unsigned reserve_pages;
9102d1e580aSAlasdair G Kergon 	struct dm_kcopyd_client *kc;
9112d1e580aSAlasdair G Kergon 
912d3775354SKent Overstreet 	kc = kzalloc(sizeof(*kc), GFP_KERNEL);
9132d1e580aSAlasdair G Kergon 	if (!kc)
914fa34ce73SMikulas Patocka 		return ERR_PTR(-ENOMEM);
9152d1e580aSAlasdair G Kergon 
9162d1e580aSAlasdair G Kergon 	spin_lock_init(&kc->job_lock);
917d7e6b8dfSNikos Tsironis 	INIT_LIST_HEAD(&kc->callback_jobs);
9182d1e580aSAlasdair G Kergon 	INIT_LIST_HEAD(&kc->complete_jobs);
9192d1e580aSAlasdair G Kergon 	INIT_LIST_HEAD(&kc->io_jobs);
9202d1e580aSAlasdair G Kergon 	INIT_LIST_HEAD(&kc->pages_jobs);
921df5d2e90SMikulas Patocka 	kc->throttle = throttle;
9222d1e580aSAlasdair G Kergon 
9236f1c819cSKent Overstreet 	r = mempool_init_slab_pool(&kc->job_pool, MIN_JOBS, _job_cache);
9246f1c819cSKent Overstreet 	if (r)
9252d1e580aSAlasdair G Kergon 		goto bad_slab;
9262d1e580aSAlasdair G Kergon 
9272d1e580aSAlasdair G Kergon 	INIT_WORK(&kc->kcopyd_work, do_work);
928670368a8STejun Heo 	kc->kcopyd_wq = alloc_workqueue("kcopyd", WQ_MEM_RECLAIM, 0);
9296f1c819cSKent Overstreet 	if (!kc->kcopyd_wq) {
9306f1c819cSKent Overstreet 		r = -ENOMEM;
9312d1e580aSAlasdair G Kergon 		goto bad_workqueue;
9326f1c819cSKent Overstreet 	}
9332d1e580aSAlasdair G Kergon 
934c663e040SNikos Tsironis 	kc->sub_job_size = dm_get_kcopyd_subjob_size();
935c663e040SNikos Tsironis 	reserve_pages = DIV_ROUND_UP(kc->sub_job_size << SECTOR_SHIFT, PAGE_SIZE);
936c663e040SNikos Tsironis 
9372d1e580aSAlasdair G Kergon 	kc->pages = NULL;
938d0471458SMikulas Patocka 	kc->nr_reserved_pages = kc->nr_free_pages = 0;
939c663e040SNikos Tsironis 	r = client_reserve_pages(kc, reserve_pages);
9402d1e580aSAlasdair G Kergon 	if (r)
9412d1e580aSAlasdair G Kergon 		goto bad_client_pages;
9422d1e580aSAlasdair G Kergon 
943bda8efecSMikulas Patocka 	kc->io_client = dm_io_client_create();
9442d1e580aSAlasdair G Kergon 	if (IS_ERR(kc->io_client)) {
9452d1e580aSAlasdair G Kergon 		r = PTR_ERR(kc->io_client);
9462d1e580aSAlasdair G Kergon 		goto bad_io_client;
9472d1e580aSAlasdair G Kergon 	}
9482d1e580aSAlasdair G Kergon 
9492d1e580aSAlasdair G Kergon 	init_waitqueue_head(&kc->destroyq);
9502d1e580aSAlasdair G Kergon 	atomic_set(&kc->nr_jobs, 0);
9512d1e580aSAlasdair G Kergon 
952fa34ce73SMikulas Patocka 	return kc;
9532d1e580aSAlasdair G Kergon 
9542d1e580aSAlasdair G Kergon bad_io_client:
9552d1e580aSAlasdair G Kergon 	client_free_pages(kc);
9562d1e580aSAlasdair G Kergon bad_client_pages:
9572d1e580aSAlasdair G Kergon 	destroy_workqueue(kc->kcopyd_wq);
9582d1e580aSAlasdair G Kergon bad_workqueue:
9596f1c819cSKent Overstreet 	mempool_exit(&kc->job_pool);
9602d1e580aSAlasdair G Kergon bad_slab:
9612d1e580aSAlasdair G Kergon 	kfree(kc);
9622d1e580aSAlasdair G Kergon 
963fa34ce73SMikulas Patocka 	return ERR_PTR(r);
9642d1e580aSAlasdair G Kergon }
9652d1e580aSAlasdair G Kergon EXPORT_SYMBOL(dm_kcopyd_client_create);
9662d1e580aSAlasdair G Kergon 
9672d1e580aSAlasdair G Kergon void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc)
9682d1e580aSAlasdair G Kergon {
9692d1e580aSAlasdair G Kergon 	/* Wait for completion of all jobs submitted by this client. */
9702d1e580aSAlasdair G Kergon 	wait_event(kc->destroyq, !atomic_read(&kc->nr_jobs));
9712d1e580aSAlasdair G Kergon 
972d7e6b8dfSNikos Tsironis 	BUG_ON(!list_empty(&kc->callback_jobs));
9732d1e580aSAlasdair G Kergon 	BUG_ON(!list_empty(&kc->complete_jobs));
9742d1e580aSAlasdair G Kergon 	BUG_ON(!list_empty(&kc->io_jobs));
9752d1e580aSAlasdair G Kergon 	BUG_ON(!list_empty(&kc->pages_jobs));
9762d1e580aSAlasdair G Kergon 	destroy_workqueue(kc->kcopyd_wq);
9772d1e580aSAlasdair G Kergon 	dm_io_client_destroy(kc->io_client);
9782d1e580aSAlasdair G Kergon 	client_free_pages(kc);
9796f1c819cSKent Overstreet 	mempool_exit(&kc->job_pool);
9802d1e580aSAlasdair G Kergon 	kfree(kc);
9812d1e580aSAlasdair G Kergon }
9822d1e580aSAlasdair G Kergon EXPORT_SYMBOL(dm_kcopyd_client_destroy);
983