xref: /linux/drivers/md/dm-delay.c (revision eb01fe7abbe2d0b38824d2a93fdb4cc3eaf2ccc1)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2005-2007 Red Hat GmbH
4  *
5  * A target that delays reads and/or writes and can send
6  * them to different devices.
7  *
8  * This file is released under the GPL.
9  */
10 
11 #include <linux/module.h>
12 #include <linux/init.h>
13 #include <linux/blkdev.h>
14 #include <linux/bio.h>
15 #include <linux/slab.h>
16 #include <linux/kthread.h>
17 
18 #include <linux/device-mapper.h>
19 
20 #define DM_MSG_PREFIX "delay"
21 
22 struct delay_class {
23 	struct dm_dev *dev;
24 	sector_t start;
25 	unsigned int delay;
26 	unsigned int ops;
27 };
28 
29 struct delay_c {
30 	struct timer_list delay_timer;
31 	struct mutex timer_lock;
32 	struct workqueue_struct *kdelayd_wq;
33 	struct work_struct flush_expired_bios;
34 	struct list_head delayed_bios;
35 	struct task_struct *worker;
36 	bool may_delay;
37 
38 	struct delay_class read;
39 	struct delay_class write;
40 	struct delay_class flush;
41 
42 	int argc;
43 };
44 
45 struct dm_delay_info {
46 	struct delay_c *context;
47 	struct delay_class *class;
48 	struct list_head list;
49 	unsigned long expires;
50 };
51 
52 static DEFINE_MUTEX(delayed_bios_lock);
53 
54 static void handle_delayed_timer(struct timer_list *t)
55 {
56 	struct delay_c *dc = from_timer(dc, t, delay_timer);
57 
58 	queue_work(dc->kdelayd_wq, &dc->flush_expired_bios);
59 }
60 
61 static void queue_timeout(struct delay_c *dc, unsigned long expires)
62 {
63 	mutex_lock(&dc->timer_lock);
64 
65 	if (!timer_pending(&dc->delay_timer) || expires < dc->delay_timer.expires)
66 		mod_timer(&dc->delay_timer, expires);
67 
68 	mutex_unlock(&dc->timer_lock);
69 }
70 
71 static inline bool delay_is_fast(struct delay_c *dc)
72 {
73 	return !!dc->worker;
74 }
75 
76 static void flush_bios(struct bio *bio)
77 {
78 	struct bio *n;
79 
80 	while (bio) {
81 		n = bio->bi_next;
82 		bio->bi_next = NULL;
83 		dm_submit_bio_remap(bio, NULL);
84 		bio = n;
85 	}
86 }
87 
88 static void flush_delayed_bios(struct delay_c *dc, bool flush_all)
89 {
90 	struct dm_delay_info *delayed, *next;
91 	struct bio_list flush_bio_list;
92 	unsigned long next_expires = 0;
93 	bool start_timer = false;
94 	bio_list_init(&flush_bio_list);
95 
96 	mutex_lock(&delayed_bios_lock);
97 	list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
98 		cond_resched();
99 		if (flush_all || time_after_eq(jiffies, delayed->expires)) {
100 			struct bio *bio = dm_bio_from_per_bio_data(delayed,
101 						sizeof(struct dm_delay_info));
102 			list_del(&delayed->list);
103 			bio_list_add(&flush_bio_list, bio);
104 			delayed->class->ops--;
105 			continue;
106 		}
107 
108 		if (!delay_is_fast(dc)) {
109 			if (!start_timer) {
110 				start_timer = true;
111 				next_expires = delayed->expires;
112 			} else {
113 				next_expires = min(next_expires, delayed->expires);
114 			}
115 		}
116 	}
117 	mutex_unlock(&delayed_bios_lock);
118 
119 	if (start_timer)
120 		queue_timeout(dc, next_expires);
121 
122 	flush_bios(bio_list_get(&flush_bio_list));
123 }
124 
125 static int flush_worker_fn(void *data)
126 {
127 	struct delay_c *dc = data;
128 
129 	while (!kthread_should_stop()) {
130 		flush_delayed_bios(dc, false);
131 		mutex_lock(&delayed_bios_lock);
132 		if (unlikely(list_empty(&dc->delayed_bios))) {
133 			set_current_state(TASK_INTERRUPTIBLE);
134 			mutex_unlock(&delayed_bios_lock);
135 			schedule();
136 		} else {
137 			mutex_unlock(&delayed_bios_lock);
138 			cond_resched();
139 		}
140 	}
141 
142 	return 0;
143 }
144 
145 static void flush_expired_bios(struct work_struct *work)
146 {
147 	struct delay_c *dc;
148 
149 	dc = container_of(work, struct delay_c, flush_expired_bios);
150 	flush_delayed_bios(dc, false);
151 }
152 
153 static void delay_dtr(struct dm_target *ti)
154 {
155 	struct delay_c *dc = ti->private;
156 
157 	if (dc->kdelayd_wq)
158 		destroy_workqueue(dc->kdelayd_wq);
159 
160 	if (dc->read.dev)
161 		dm_put_device(ti, dc->read.dev);
162 	if (dc->write.dev)
163 		dm_put_device(ti, dc->write.dev);
164 	if (dc->flush.dev)
165 		dm_put_device(ti, dc->flush.dev);
166 	if (dc->worker)
167 		kthread_stop(dc->worker);
168 
169 	mutex_destroy(&dc->timer_lock);
170 
171 	kfree(dc);
172 }
173 
174 static int delay_class_ctr(struct dm_target *ti, struct delay_class *c, char **argv)
175 {
176 	int ret;
177 	unsigned long long tmpll;
178 	char dummy;
179 
180 	if (sscanf(argv[1], "%llu%c", &tmpll, &dummy) != 1 || tmpll != (sector_t)tmpll) {
181 		ti->error = "Invalid device sector";
182 		return -EINVAL;
183 	}
184 	c->start = tmpll;
185 
186 	if (sscanf(argv[2], "%u%c", &c->delay, &dummy) != 1) {
187 		ti->error = "Invalid delay";
188 		return -EINVAL;
189 	}
190 
191 	ret = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &c->dev);
192 	if (ret) {
193 		ti->error = "Device lookup failed";
194 		return ret;
195 	}
196 
197 	return 0;
198 }
199 
200 /*
201  * Mapping parameters:
202  *    <device> <offset> <delay> [<write_device> <write_offset> <write_delay>]
203  *
204  * With separate write parameters, the first set is only used for reads.
205  * Offsets are specified in sectors.
206  * Delays are specified in milliseconds.
207  */
208 static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
209 {
210 	struct delay_c *dc;
211 	int ret;
212 	unsigned int max_delay;
213 
214 	if (argc != 3 && argc != 6 && argc != 9) {
215 		ti->error = "Requires exactly 3, 6 or 9 arguments";
216 		return -EINVAL;
217 	}
218 
219 	dc = kzalloc(sizeof(*dc), GFP_KERNEL);
220 	if (!dc) {
221 		ti->error = "Cannot allocate context";
222 		return -ENOMEM;
223 	}
224 
225 	ti->private = dc;
226 	INIT_LIST_HEAD(&dc->delayed_bios);
227 	mutex_init(&dc->timer_lock);
228 	dc->may_delay = true;
229 	dc->argc = argc;
230 
231 	ret = delay_class_ctr(ti, &dc->read, argv);
232 	if (ret)
233 		goto bad;
234 	max_delay = dc->read.delay;
235 
236 	if (argc == 3) {
237 		ret = delay_class_ctr(ti, &dc->write, argv);
238 		if (ret)
239 			goto bad;
240 		ret = delay_class_ctr(ti, &dc->flush, argv);
241 		if (ret)
242 			goto bad;
243 		max_delay = max(max_delay, dc->write.delay);
244 		max_delay = max(max_delay, dc->flush.delay);
245 		goto out;
246 	}
247 
248 	ret = delay_class_ctr(ti, &dc->write, argv + 3);
249 	if (ret)
250 		goto bad;
251 	if (argc == 6) {
252 		ret = delay_class_ctr(ti, &dc->flush, argv + 3);
253 		if (ret)
254 			goto bad;
255 		max_delay = max(max_delay, dc->flush.delay);
256 		goto out;
257 	}
258 
259 	ret = delay_class_ctr(ti, &dc->flush, argv + 6);
260 	if (ret)
261 		goto bad;
262 	max_delay = max(max_delay, dc->flush.delay);
263 
264 out:
265 	if (max_delay < 50) {
266 		/*
267 		 * In case of small requested delays, use kthread instead of
268 		 * timers and workqueue to achieve better latency.
269 		 */
270 		dc->worker = kthread_create(&flush_worker_fn, dc,
271 					    "dm-delay-flush-worker");
272 		if (IS_ERR(dc->worker)) {
273 			ret = PTR_ERR(dc->worker);
274 			dc->worker = NULL;
275 			goto bad;
276 		}
277 	} else {
278 		timer_setup(&dc->delay_timer, handle_delayed_timer, 0);
279 		INIT_WORK(&dc->flush_expired_bios, flush_expired_bios);
280 		dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0);
281 		if (!dc->kdelayd_wq) {
282 			ret = -EINVAL;
283 			DMERR("Couldn't start kdelayd");
284 			goto bad;
285 		}
286 	}
287 
288 	ti->num_flush_bios = 1;
289 	ti->num_discard_bios = 1;
290 	ti->accounts_remapped_io = true;
291 	ti->per_io_data_size = sizeof(struct dm_delay_info);
292 	return 0;
293 
294 bad:
295 	delay_dtr(ti);
296 	return ret;
297 }
298 
299 static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio)
300 {
301 	struct dm_delay_info *delayed;
302 	unsigned long expires = 0;
303 
304 	if (!c->delay)
305 		return DM_MAPIO_REMAPPED;
306 
307 	delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info));
308 
309 	delayed->context = dc;
310 	delayed->expires = expires = jiffies + msecs_to_jiffies(c->delay);
311 
312 	mutex_lock(&delayed_bios_lock);
313 	if (unlikely(!dc->may_delay)) {
314 		mutex_unlock(&delayed_bios_lock);
315 		return DM_MAPIO_REMAPPED;
316 	}
317 	c->ops++;
318 	list_add_tail(&delayed->list, &dc->delayed_bios);
319 	mutex_unlock(&delayed_bios_lock);
320 
321 	if (delay_is_fast(dc))
322 		wake_up_process(dc->worker);
323 	else
324 		queue_timeout(dc, expires);
325 
326 	return DM_MAPIO_SUBMITTED;
327 }
328 
329 static void delay_presuspend(struct dm_target *ti)
330 {
331 	struct delay_c *dc = ti->private;
332 
333 	mutex_lock(&delayed_bios_lock);
334 	dc->may_delay = false;
335 	mutex_unlock(&delayed_bios_lock);
336 
337 	if (!delay_is_fast(dc))
338 		del_timer_sync(&dc->delay_timer);
339 	flush_delayed_bios(dc, true);
340 }
341 
342 static void delay_resume(struct dm_target *ti)
343 {
344 	struct delay_c *dc = ti->private;
345 
346 	dc->may_delay = true;
347 }
348 
349 static int delay_map(struct dm_target *ti, struct bio *bio)
350 {
351 	struct delay_c *dc = ti->private;
352 	struct delay_class *c;
353 	struct dm_delay_info *delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info));
354 
355 	if (bio_data_dir(bio) == WRITE) {
356 		if (unlikely(bio->bi_opf & REQ_PREFLUSH))
357 			c = &dc->flush;
358 		else
359 			c = &dc->write;
360 	} else {
361 		c = &dc->read;
362 	}
363 	delayed->class = c;
364 	bio_set_dev(bio, c->dev->bdev);
365 	bio->bi_iter.bi_sector = c->start + dm_target_offset(ti, bio->bi_iter.bi_sector);
366 
367 	return delay_bio(dc, c, bio);
368 }
369 
370 #define DMEMIT_DELAY_CLASS(c) \
371 	DMEMIT("%s %llu %u", (c)->dev->name, (unsigned long long)(c)->start, (c)->delay)
372 
373 static void delay_status(struct dm_target *ti, status_type_t type,
374 			 unsigned int status_flags, char *result, unsigned int maxlen)
375 {
376 	struct delay_c *dc = ti->private;
377 	int sz = 0;
378 
379 	switch (type) {
380 	case STATUSTYPE_INFO:
381 		DMEMIT("%u %u %u", dc->read.ops, dc->write.ops, dc->flush.ops);
382 		break;
383 
384 	case STATUSTYPE_TABLE:
385 		DMEMIT_DELAY_CLASS(&dc->read);
386 		if (dc->argc >= 6) {
387 			DMEMIT(" ");
388 			DMEMIT_DELAY_CLASS(&dc->write);
389 		}
390 		if (dc->argc >= 9) {
391 			DMEMIT(" ");
392 			DMEMIT_DELAY_CLASS(&dc->flush);
393 		}
394 		break;
395 
396 	case STATUSTYPE_IMA:
397 		*result = '\0';
398 		break;
399 	}
400 }
401 
402 static int delay_iterate_devices(struct dm_target *ti,
403 				 iterate_devices_callout_fn fn, void *data)
404 {
405 	struct delay_c *dc = ti->private;
406 	int ret = 0;
407 
408 	ret = fn(ti, dc->read.dev, dc->read.start, ti->len, data);
409 	if (ret)
410 		goto out;
411 	ret = fn(ti, dc->write.dev, dc->write.start, ti->len, data);
412 	if (ret)
413 		goto out;
414 	ret = fn(ti, dc->flush.dev, dc->flush.start, ti->len, data);
415 	if (ret)
416 		goto out;
417 
418 out:
419 	return ret;
420 }
421 
422 static struct target_type delay_target = {
423 	.name	     = "delay",
424 	.version     = {1, 4, 0},
425 	.features    = DM_TARGET_PASSES_INTEGRITY,
426 	.module      = THIS_MODULE,
427 	.ctr	     = delay_ctr,
428 	.dtr	     = delay_dtr,
429 	.map	     = delay_map,
430 	.presuspend  = delay_presuspend,
431 	.resume	     = delay_resume,
432 	.status	     = delay_status,
433 	.iterate_devices = delay_iterate_devices,
434 };
435 module_dm(delay);
436 
437 MODULE_DESCRIPTION(DM_NAME " delay target");
438 MODULE_AUTHOR("Heinz Mauelshagen <mauelshagen@redhat.com>");
439 MODULE_LICENSE("GPL");
440