xref: /linux/drivers/md/dm-delay.c (revision c48a7c44a1d02516309015b6134c9bb982e17008)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2005-2007 Red Hat GmbH
4  *
5  * A target that delays reads and/or writes and can send
6  * them to different devices.
7  *
8  * This file is released under the GPL.
9  */
10 
11 #include <linux/module.h>
12 #include <linux/init.h>
13 #include <linux/blkdev.h>
14 #include <linux/bio.h>
15 #include <linux/slab.h>
16 #include <linux/kthread.h>
17 
18 #include <linux/device-mapper.h>
19 
20 #define DM_MSG_PREFIX "delay"
21 
22 struct delay_class {
23 	struct dm_dev *dev;
24 	sector_t start;
25 	unsigned int delay;
26 	unsigned int ops;
27 };
28 
29 struct delay_c {
30 	struct timer_list delay_timer;
31 	struct mutex timer_lock;
32 	struct workqueue_struct *kdelayd_wq;
33 	struct work_struct flush_expired_bios;
34 	struct list_head delayed_bios;
35 	struct task_struct *worker;
36 	atomic_t may_delay;
37 
38 	struct delay_class read;
39 	struct delay_class write;
40 	struct delay_class flush;
41 
42 	int argc;
43 };
44 
45 struct dm_delay_info {
46 	struct delay_c *context;
47 	struct delay_class *class;
48 	struct list_head list;
49 	unsigned long expires;
50 };
51 
52 static DEFINE_MUTEX(delayed_bios_lock);
53 
54 static void handle_delayed_timer(struct timer_list *t)
55 {
56 	struct delay_c *dc = from_timer(dc, t, delay_timer);
57 
58 	queue_work(dc->kdelayd_wq, &dc->flush_expired_bios);
59 }
60 
61 static void queue_timeout(struct delay_c *dc, unsigned long expires)
62 {
63 	mutex_lock(&dc->timer_lock);
64 
65 	if (!timer_pending(&dc->delay_timer) || expires < dc->delay_timer.expires)
66 		mod_timer(&dc->delay_timer, expires);
67 
68 	mutex_unlock(&dc->timer_lock);
69 }
70 
71 static inline bool delay_is_fast(struct delay_c *dc)
72 {
73 	return !!dc->worker;
74 }
75 
76 static void flush_delayed_bios_fast(struct delay_c *dc, bool flush_all)
77 {
78 	struct dm_delay_info *delayed, *next;
79 
80 	mutex_lock(&delayed_bios_lock);
81 	list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
82 		if (flush_all || time_after_eq(jiffies, delayed->expires)) {
83 			struct bio *bio = dm_bio_from_per_bio_data(delayed,
84 						sizeof(struct dm_delay_info));
85 			list_del(&delayed->list);
86 			dm_submit_bio_remap(bio, NULL);
87 			delayed->class->ops--;
88 		}
89 	}
90 	mutex_unlock(&delayed_bios_lock);
91 }
92 
93 static int flush_worker_fn(void *data)
94 {
95 	struct delay_c *dc = data;
96 
97 	while (1) {
98 		flush_delayed_bios_fast(dc, false);
99 		if (unlikely(list_empty(&dc->delayed_bios))) {
100 			set_current_state(TASK_INTERRUPTIBLE);
101 			schedule();
102 		} else
103 			cond_resched();
104 	}
105 
106 	return 0;
107 }
108 
109 static void flush_bios(struct bio *bio)
110 {
111 	struct bio *n;
112 
113 	while (bio) {
114 		n = bio->bi_next;
115 		bio->bi_next = NULL;
116 		dm_submit_bio_remap(bio, NULL);
117 		bio = n;
118 	}
119 }
120 
121 static struct bio *flush_delayed_bios(struct delay_c *dc, bool flush_all)
122 {
123 	struct dm_delay_info *delayed, *next;
124 	unsigned long next_expires = 0;
125 	unsigned long start_timer = 0;
126 	struct bio_list flush_bios = { };
127 
128 	mutex_lock(&delayed_bios_lock);
129 	list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
130 		if (flush_all || time_after_eq(jiffies, delayed->expires)) {
131 			struct bio *bio = dm_bio_from_per_bio_data(delayed,
132 						sizeof(struct dm_delay_info));
133 			list_del(&delayed->list);
134 			bio_list_add(&flush_bios, bio);
135 			delayed->class->ops--;
136 			continue;
137 		}
138 
139 		if (!start_timer) {
140 			start_timer = 1;
141 			next_expires = delayed->expires;
142 		} else
143 			next_expires = min(next_expires, delayed->expires);
144 	}
145 	mutex_unlock(&delayed_bios_lock);
146 
147 	if (start_timer)
148 		queue_timeout(dc, next_expires);
149 
150 	return bio_list_get(&flush_bios);
151 }
152 
153 static void flush_expired_bios(struct work_struct *work)
154 {
155 	struct delay_c *dc;
156 
157 	dc = container_of(work, struct delay_c, flush_expired_bios);
158 	if (delay_is_fast(dc))
159 		flush_delayed_bios_fast(dc, false);
160 	else
161 		flush_bios(flush_delayed_bios(dc, false));
162 }
163 
164 static void delay_dtr(struct dm_target *ti)
165 {
166 	struct delay_c *dc = ti->private;
167 
168 	if (dc->kdelayd_wq)
169 		destroy_workqueue(dc->kdelayd_wq);
170 
171 	if (dc->read.dev)
172 		dm_put_device(ti, dc->read.dev);
173 	if (dc->write.dev)
174 		dm_put_device(ti, dc->write.dev);
175 	if (dc->flush.dev)
176 		dm_put_device(ti, dc->flush.dev);
177 	if (dc->worker)
178 		kthread_stop(dc->worker);
179 
180 	if (!delay_is_fast(dc))
181 		mutex_destroy(&dc->timer_lock);
182 
183 	kfree(dc);
184 }
185 
186 static int delay_class_ctr(struct dm_target *ti, struct delay_class *c, char **argv)
187 {
188 	int ret;
189 	unsigned long long tmpll;
190 	char dummy;
191 
192 	if (sscanf(argv[1], "%llu%c", &tmpll, &dummy) != 1 || tmpll != (sector_t)tmpll) {
193 		ti->error = "Invalid device sector";
194 		return -EINVAL;
195 	}
196 	c->start = tmpll;
197 
198 	if (sscanf(argv[2], "%u%c", &c->delay, &dummy) != 1) {
199 		ti->error = "Invalid delay";
200 		return -EINVAL;
201 	}
202 
203 	ret = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &c->dev);
204 	if (ret) {
205 		ti->error = "Device lookup failed";
206 		return ret;
207 	}
208 
209 	return 0;
210 }
211 
212 /*
213  * Mapping parameters:
214  *    <device> <offset> <delay> [<write_device> <write_offset> <write_delay>]
215  *
216  * With separate write parameters, the first set is only used for reads.
217  * Offsets are specified in sectors.
218  * Delays are specified in milliseconds.
219  */
220 static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
221 {
222 	struct delay_c *dc;
223 	int ret;
224 	unsigned int max_delay;
225 
226 	if (argc != 3 && argc != 6 && argc != 9) {
227 		ti->error = "Requires exactly 3, 6 or 9 arguments";
228 		return -EINVAL;
229 	}
230 
231 	dc = kzalloc(sizeof(*dc), GFP_KERNEL);
232 	if (!dc) {
233 		ti->error = "Cannot allocate context";
234 		return -ENOMEM;
235 	}
236 
237 	ti->private = dc;
238 	INIT_LIST_HEAD(&dc->delayed_bios);
239 	atomic_set(&dc->may_delay, 1);
240 	dc->argc = argc;
241 
242 	ret = delay_class_ctr(ti, &dc->read, argv);
243 	if (ret)
244 		goto bad;
245 	max_delay = dc->read.delay;
246 
247 	if (argc == 3) {
248 		ret = delay_class_ctr(ti, &dc->write, argv);
249 		if (ret)
250 			goto bad;
251 		ret = delay_class_ctr(ti, &dc->flush, argv);
252 		if (ret)
253 			goto bad;
254 		max_delay = max(max_delay, dc->write.delay);
255 		max_delay = max(max_delay, dc->flush.delay);
256 		goto out;
257 	}
258 
259 	ret = delay_class_ctr(ti, &dc->write, argv + 3);
260 	if (ret)
261 		goto bad;
262 	if (argc == 6) {
263 		ret = delay_class_ctr(ti, &dc->flush, argv + 3);
264 		if (ret)
265 			goto bad;
266 		max_delay = max(max_delay, dc->flush.delay);
267 		goto out;
268 	}
269 
270 	ret = delay_class_ctr(ti, &dc->flush, argv + 6);
271 	if (ret)
272 		goto bad;
273 	max_delay = max(max_delay, dc->flush.delay);
274 
275 out:
276 	if (max_delay < 50) {
277 		/*
278 		 * In case of small requested delays, use kthread instead of
279 		 * timers and workqueue to achieve better latency.
280 		 */
281 		dc->worker = kthread_create(&flush_worker_fn, dc,
282 					    "dm-delay-flush-worker");
283 		if (IS_ERR(dc->worker)) {
284 			ret = PTR_ERR(dc->worker);
285 			goto bad;
286 		}
287 	} else {
288 		timer_setup(&dc->delay_timer, handle_delayed_timer, 0);
289 		INIT_WORK(&dc->flush_expired_bios, flush_expired_bios);
290 		mutex_init(&dc->timer_lock);
291 		dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0);
292 		if (!dc->kdelayd_wq) {
293 			ret = -EINVAL;
294 			DMERR("Couldn't start kdelayd");
295 			goto bad;
296 		}
297 	}
298 
299 	ti->num_flush_bios = 1;
300 	ti->num_discard_bios = 1;
301 	ti->accounts_remapped_io = true;
302 	ti->per_io_data_size = sizeof(struct dm_delay_info);
303 	return 0;
304 
305 bad:
306 	delay_dtr(ti);
307 	return ret;
308 }
309 
310 static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio)
311 {
312 	struct dm_delay_info *delayed;
313 	unsigned long expires = 0;
314 
315 	if (!c->delay || !atomic_read(&dc->may_delay))
316 		return DM_MAPIO_REMAPPED;
317 
318 	delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info));
319 
320 	delayed->context = dc;
321 	delayed->expires = expires = jiffies + msecs_to_jiffies(c->delay);
322 
323 	mutex_lock(&delayed_bios_lock);
324 	c->ops++;
325 	list_add_tail(&delayed->list, &dc->delayed_bios);
326 	mutex_unlock(&delayed_bios_lock);
327 
328 	if (delay_is_fast(dc))
329 		wake_up_process(dc->worker);
330 	else
331 		queue_timeout(dc, expires);
332 
333 	return DM_MAPIO_SUBMITTED;
334 }
335 
336 static void delay_presuspend(struct dm_target *ti)
337 {
338 	struct delay_c *dc = ti->private;
339 
340 	atomic_set(&dc->may_delay, 0);
341 
342 	if (delay_is_fast(dc))
343 		flush_delayed_bios_fast(dc, true);
344 	else {
345 		del_timer_sync(&dc->delay_timer);
346 		flush_bios(flush_delayed_bios(dc, true));
347 	}
348 }
349 
350 static void delay_resume(struct dm_target *ti)
351 {
352 	struct delay_c *dc = ti->private;
353 
354 	atomic_set(&dc->may_delay, 1);
355 }
356 
357 static int delay_map(struct dm_target *ti, struct bio *bio)
358 {
359 	struct delay_c *dc = ti->private;
360 	struct delay_class *c;
361 	struct dm_delay_info *delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info));
362 
363 	if (bio_data_dir(bio) == WRITE) {
364 		if (unlikely(bio->bi_opf & REQ_PREFLUSH))
365 			c = &dc->flush;
366 		else
367 			c = &dc->write;
368 	} else {
369 		c = &dc->read;
370 	}
371 	delayed->class = c;
372 	bio_set_dev(bio, c->dev->bdev);
373 	bio->bi_iter.bi_sector = c->start + dm_target_offset(ti, bio->bi_iter.bi_sector);
374 
375 	return delay_bio(dc, c, bio);
376 }
377 
378 #define DMEMIT_DELAY_CLASS(c) \
379 	DMEMIT("%s %llu %u", (c)->dev->name, (unsigned long long)(c)->start, (c)->delay)
380 
381 static void delay_status(struct dm_target *ti, status_type_t type,
382 			 unsigned int status_flags, char *result, unsigned int maxlen)
383 {
384 	struct delay_c *dc = ti->private;
385 	int sz = 0;
386 
387 	switch (type) {
388 	case STATUSTYPE_INFO:
389 		DMEMIT("%u %u %u", dc->read.ops, dc->write.ops, dc->flush.ops);
390 		break;
391 
392 	case STATUSTYPE_TABLE:
393 		DMEMIT_DELAY_CLASS(&dc->read);
394 		if (dc->argc >= 6) {
395 			DMEMIT(" ");
396 			DMEMIT_DELAY_CLASS(&dc->write);
397 		}
398 		if (dc->argc >= 9) {
399 			DMEMIT(" ");
400 			DMEMIT_DELAY_CLASS(&dc->flush);
401 		}
402 		break;
403 
404 	case STATUSTYPE_IMA:
405 		*result = '\0';
406 		break;
407 	}
408 }
409 
410 static int delay_iterate_devices(struct dm_target *ti,
411 				 iterate_devices_callout_fn fn, void *data)
412 {
413 	struct delay_c *dc = ti->private;
414 	int ret = 0;
415 
416 	ret = fn(ti, dc->read.dev, dc->read.start, ti->len, data);
417 	if (ret)
418 		goto out;
419 	ret = fn(ti, dc->write.dev, dc->write.start, ti->len, data);
420 	if (ret)
421 		goto out;
422 	ret = fn(ti, dc->flush.dev, dc->flush.start, ti->len, data);
423 	if (ret)
424 		goto out;
425 
426 out:
427 	return ret;
428 }
429 
430 static struct target_type delay_target = {
431 	.name	     = "delay",
432 	.version     = {1, 4, 0},
433 	.features    = DM_TARGET_PASSES_INTEGRITY,
434 	.module      = THIS_MODULE,
435 	.ctr	     = delay_ctr,
436 	.dtr	     = delay_dtr,
437 	.map	     = delay_map,
438 	.presuspend  = delay_presuspend,
439 	.resume	     = delay_resume,
440 	.status	     = delay_status,
441 	.iterate_devices = delay_iterate_devices,
442 };
443 module_dm(delay);
444 
445 MODULE_DESCRIPTION(DM_NAME " delay target");
446 MODULE_AUTHOR("Heinz Mauelshagen <mauelshagen@redhat.com>");
447 MODULE_LICENSE("GPL");
448