1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2005-2007 Red Hat GmbH 4 * 5 * A target that delays reads and/or writes and can send 6 * them to different devices. 7 * 8 * This file is released under the GPL. 9 */ 10 11 #include <linux/module.h> 12 #include <linux/init.h> 13 #include <linux/blkdev.h> 14 #include <linux/bio.h> 15 #include <linux/slab.h> 16 #include <linux/kthread.h> 17 18 #include <linux/device-mapper.h> 19 20 #define DM_MSG_PREFIX "delay" 21 22 struct delay_class { 23 struct dm_dev *dev; 24 sector_t start; 25 unsigned int delay; 26 unsigned int ops; 27 }; 28 29 struct delay_c { 30 struct timer_list delay_timer; 31 struct mutex timer_lock; 32 struct workqueue_struct *kdelayd_wq; 33 struct work_struct flush_expired_bios; 34 struct list_head delayed_bios; 35 struct task_struct *worker; 36 atomic_t may_delay; 37 38 struct delay_class read; 39 struct delay_class write; 40 struct delay_class flush; 41 42 int argc; 43 }; 44 45 struct dm_delay_info { 46 struct delay_c *context; 47 struct delay_class *class; 48 struct list_head list; 49 unsigned long expires; 50 }; 51 52 static DEFINE_MUTEX(delayed_bios_lock); 53 54 static void handle_delayed_timer(struct timer_list *t) 55 { 56 struct delay_c *dc = from_timer(dc, t, delay_timer); 57 58 queue_work(dc->kdelayd_wq, &dc->flush_expired_bios); 59 } 60 61 static void queue_timeout(struct delay_c *dc, unsigned long expires) 62 { 63 mutex_lock(&dc->timer_lock); 64 65 if (!timer_pending(&dc->delay_timer) || expires < dc->delay_timer.expires) 66 mod_timer(&dc->delay_timer, expires); 67 68 mutex_unlock(&dc->timer_lock); 69 } 70 71 static inline bool delay_is_fast(struct delay_c *dc) 72 { 73 return !!dc->worker; 74 } 75 76 static void flush_delayed_bios_fast(struct delay_c *dc, bool flush_all) 77 { 78 struct dm_delay_info *delayed, *next; 79 80 mutex_lock(&delayed_bios_lock); 81 list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) { 82 if (flush_all || time_after_eq(jiffies, delayed->expires)) { 83 struct bio *bio = dm_bio_from_per_bio_data(delayed, 84 sizeof(struct dm_delay_info)); 85 list_del(&delayed->list); 86 dm_submit_bio_remap(bio, NULL); 87 delayed->class->ops--; 88 } 89 } 90 mutex_unlock(&delayed_bios_lock); 91 } 92 93 static int flush_worker_fn(void *data) 94 { 95 struct delay_c *dc = data; 96 97 while (1) { 98 flush_delayed_bios_fast(dc, false); 99 if (unlikely(list_empty(&dc->delayed_bios))) { 100 set_current_state(TASK_INTERRUPTIBLE); 101 schedule(); 102 } else 103 cond_resched(); 104 } 105 106 return 0; 107 } 108 109 static void flush_bios(struct bio *bio) 110 { 111 struct bio *n; 112 113 while (bio) { 114 n = bio->bi_next; 115 bio->bi_next = NULL; 116 dm_submit_bio_remap(bio, NULL); 117 bio = n; 118 } 119 } 120 121 static struct bio *flush_delayed_bios(struct delay_c *dc, bool flush_all) 122 { 123 struct dm_delay_info *delayed, *next; 124 unsigned long next_expires = 0; 125 unsigned long start_timer = 0; 126 struct bio_list flush_bios = { }; 127 128 mutex_lock(&delayed_bios_lock); 129 list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) { 130 if (flush_all || time_after_eq(jiffies, delayed->expires)) { 131 struct bio *bio = dm_bio_from_per_bio_data(delayed, 132 sizeof(struct dm_delay_info)); 133 list_del(&delayed->list); 134 bio_list_add(&flush_bios, bio); 135 delayed->class->ops--; 136 continue; 137 } 138 139 if (!start_timer) { 140 start_timer = 1; 141 next_expires = delayed->expires; 142 } else 143 next_expires = min(next_expires, delayed->expires); 144 } 145 mutex_unlock(&delayed_bios_lock); 146 147 if (start_timer) 148 queue_timeout(dc, next_expires); 149 150 return bio_list_get(&flush_bios); 151 } 152 153 static void flush_expired_bios(struct work_struct *work) 154 { 155 struct delay_c *dc; 156 157 dc = container_of(work, struct delay_c, flush_expired_bios); 158 if (delay_is_fast(dc)) 159 flush_delayed_bios_fast(dc, false); 160 else 161 flush_bios(flush_delayed_bios(dc, false)); 162 } 163 164 static void delay_dtr(struct dm_target *ti) 165 { 166 struct delay_c *dc = ti->private; 167 168 if (dc->kdelayd_wq) 169 destroy_workqueue(dc->kdelayd_wq); 170 171 if (dc->read.dev) 172 dm_put_device(ti, dc->read.dev); 173 if (dc->write.dev) 174 dm_put_device(ti, dc->write.dev); 175 if (dc->flush.dev) 176 dm_put_device(ti, dc->flush.dev); 177 if (dc->worker) 178 kthread_stop(dc->worker); 179 180 if (!delay_is_fast(dc)) 181 mutex_destroy(&dc->timer_lock); 182 183 kfree(dc); 184 } 185 186 static int delay_class_ctr(struct dm_target *ti, struct delay_class *c, char **argv) 187 { 188 int ret; 189 unsigned long long tmpll; 190 char dummy; 191 192 if (sscanf(argv[1], "%llu%c", &tmpll, &dummy) != 1 || tmpll != (sector_t)tmpll) { 193 ti->error = "Invalid device sector"; 194 return -EINVAL; 195 } 196 c->start = tmpll; 197 198 if (sscanf(argv[2], "%u%c", &c->delay, &dummy) != 1) { 199 ti->error = "Invalid delay"; 200 return -EINVAL; 201 } 202 203 ret = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &c->dev); 204 if (ret) { 205 ti->error = "Device lookup failed"; 206 return ret; 207 } 208 209 return 0; 210 } 211 212 /* 213 * Mapping parameters: 214 * <device> <offset> <delay> [<write_device> <write_offset> <write_delay>] 215 * 216 * With separate write parameters, the first set is only used for reads. 217 * Offsets are specified in sectors. 218 * Delays are specified in milliseconds. 219 */ 220 static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) 221 { 222 struct delay_c *dc; 223 int ret; 224 unsigned int max_delay; 225 226 if (argc != 3 && argc != 6 && argc != 9) { 227 ti->error = "Requires exactly 3, 6 or 9 arguments"; 228 return -EINVAL; 229 } 230 231 dc = kzalloc(sizeof(*dc), GFP_KERNEL); 232 if (!dc) { 233 ti->error = "Cannot allocate context"; 234 return -ENOMEM; 235 } 236 237 ti->private = dc; 238 INIT_LIST_HEAD(&dc->delayed_bios); 239 atomic_set(&dc->may_delay, 1); 240 dc->argc = argc; 241 242 ret = delay_class_ctr(ti, &dc->read, argv); 243 if (ret) 244 goto bad; 245 max_delay = dc->read.delay; 246 247 if (argc == 3) { 248 ret = delay_class_ctr(ti, &dc->write, argv); 249 if (ret) 250 goto bad; 251 ret = delay_class_ctr(ti, &dc->flush, argv); 252 if (ret) 253 goto bad; 254 max_delay = max(max_delay, dc->write.delay); 255 max_delay = max(max_delay, dc->flush.delay); 256 goto out; 257 } 258 259 ret = delay_class_ctr(ti, &dc->write, argv + 3); 260 if (ret) 261 goto bad; 262 if (argc == 6) { 263 ret = delay_class_ctr(ti, &dc->flush, argv + 3); 264 if (ret) 265 goto bad; 266 max_delay = max(max_delay, dc->flush.delay); 267 goto out; 268 } 269 270 ret = delay_class_ctr(ti, &dc->flush, argv + 6); 271 if (ret) 272 goto bad; 273 max_delay = max(max_delay, dc->flush.delay); 274 275 out: 276 if (max_delay < 50) { 277 /* 278 * In case of small requested delays, use kthread instead of 279 * timers and workqueue to achieve better latency. 280 */ 281 dc->worker = kthread_create(&flush_worker_fn, dc, 282 "dm-delay-flush-worker"); 283 if (IS_ERR(dc->worker)) { 284 ret = PTR_ERR(dc->worker); 285 goto bad; 286 } 287 } else { 288 timer_setup(&dc->delay_timer, handle_delayed_timer, 0); 289 INIT_WORK(&dc->flush_expired_bios, flush_expired_bios); 290 mutex_init(&dc->timer_lock); 291 dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0); 292 if (!dc->kdelayd_wq) { 293 ret = -EINVAL; 294 DMERR("Couldn't start kdelayd"); 295 goto bad; 296 } 297 } 298 299 ti->num_flush_bios = 1; 300 ti->num_discard_bios = 1; 301 ti->accounts_remapped_io = true; 302 ti->per_io_data_size = sizeof(struct dm_delay_info); 303 return 0; 304 305 bad: 306 delay_dtr(ti); 307 return ret; 308 } 309 310 static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio) 311 { 312 struct dm_delay_info *delayed; 313 unsigned long expires = 0; 314 315 if (!c->delay || !atomic_read(&dc->may_delay)) 316 return DM_MAPIO_REMAPPED; 317 318 delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info)); 319 320 delayed->context = dc; 321 delayed->expires = expires = jiffies + msecs_to_jiffies(c->delay); 322 323 mutex_lock(&delayed_bios_lock); 324 c->ops++; 325 list_add_tail(&delayed->list, &dc->delayed_bios); 326 mutex_unlock(&delayed_bios_lock); 327 328 if (delay_is_fast(dc)) 329 wake_up_process(dc->worker); 330 else 331 queue_timeout(dc, expires); 332 333 return DM_MAPIO_SUBMITTED; 334 } 335 336 static void delay_presuspend(struct dm_target *ti) 337 { 338 struct delay_c *dc = ti->private; 339 340 atomic_set(&dc->may_delay, 0); 341 342 if (delay_is_fast(dc)) 343 flush_delayed_bios_fast(dc, true); 344 else { 345 del_timer_sync(&dc->delay_timer); 346 flush_bios(flush_delayed_bios(dc, true)); 347 } 348 } 349 350 static void delay_resume(struct dm_target *ti) 351 { 352 struct delay_c *dc = ti->private; 353 354 atomic_set(&dc->may_delay, 1); 355 } 356 357 static int delay_map(struct dm_target *ti, struct bio *bio) 358 { 359 struct delay_c *dc = ti->private; 360 struct delay_class *c; 361 struct dm_delay_info *delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info)); 362 363 if (bio_data_dir(bio) == WRITE) { 364 if (unlikely(bio->bi_opf & REQ_PREFLUSH)) 365 c = &dc->flush; 366 else 367 c = &dc->write; 368 } else { 369 c = &dc->read; 370 } 371 delayed->class = c; 372 bio_set_dev(bio, c->dev->bdev); 373 bio->bi_iter.bi_sector = c->start + dm_target_offset(ti, bio->bi_iter.bi_sector); 374 375 return delay_bio(dc, c, bio); 376 } 377 378 #define DMEMIT_DELAY_CLASS(c) \ 379 DMEMIT("%s %llu %u", (c)->dev->name, (unsigned long long)(c)->start, (c)->delay) 380 381 static void delay_status(struct dm_target *ti, status_type_t type, 382 unsigned int status_flags, char *result, unsigned int maxlen) 383 { 384 struct delay_c *dc = ti->private; 385 int sz = 0; 386 387 switch (type) { 388 case STATUSTYPE_INFO: 389 DMEMIT("%u %u %u", dc->read.ops, dc->write.ops, dc->flush.ops); 390 break; 391 392 case STATUSTYPE_TABLE: 393 DMEMIT_DELAY_CLASS(&dc->read); 394 if (dc->argc >= 6) { 395 DMEMIT(" "); 396 DMEMIT_DELAY_CLASS(&dc->write); 397 } 398 if (dc->argc >= 9) { 399 DMEMIT(" "); 400 DMEMIT_DELAY_CLASS(&dc->flush); 401 } 402 break; 403 404 case STATUSTYPE_IMA: 405 *result = '\0'; 406 break; 407 } 408 } 409 410 static int delay_iterate_devices(struct dm_target *ti, 411 iterate_devices_callout_fn fn, void *data) 412 { 413 struct delay_c *dc = ti->private; 414 int ret = 0; 415 416 ret = fn(ti, dc->read.dev, dc->read.start, ti->len, data); 417 if (ret) 418 goto out; 419 ret = fn(ti, dc->write.dev, dc->write.start, ti->len, data); 420 if (ret) 421 goto out; 422 ret = fn(ti, dc->flush.dev, dc->flush.start, ti->len, data); 423 if (ret) 424 goto out; 425 426 out: 427 return ret; 428 } 429 430 static struct target_type delay_target = { 431 .name = "delay", 432 .version = {1, 4, 0}, 433 .features = DM_TARGET_PASSES_INTEGRITY, 434 .module = THIS_MODULE, 435 .ctr = delay_ctr, 436 .dtr = delay_dtr, 437 .map = delay_map, 438 .presuspend = delay_presuspend, 439 .resume = delay_resume, 440 .status = delay_status, 441 .iterate_devices = delay_iterate_devices, 442 }; 443 module_dm(delay); 444 445 MODULE_DESCRIPTION(DM_NAME " delay target"); 446 MODULE_AUTHOR("Heinz Mauelshagen <mauelshagen@redhat.com>"); 447 MODULE_LICENSE("GPL"); 448