1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2005-2007 Red Hat GmbH 4 * 5 * A target that delays reads and/or writes and can send 6 * them to different devices. 7 * 8 * This file is released under the GPL. 9 */ 10 11 #include <linux/module.h> 12 #include <linux/init.h> 13 #include <linux/blkdev.h> 14 #include <linux/bio.h> 15 #include <linux/slab.h> 16 #include <linux/kthread.h> 17 18 #include <linux/device-mapper.h> 19 20 #define DM_MSG_PREFIX "delay" 21 22 struct delay_class { 23 struct dm_dev *dev; 24 sector_t start; 25 unsigned int delay; 26 unsigned int ops; 27 }; 28 29 struct delay_c { 30 struct timer_list delay_timer; 31 struct mutex process_bios_lock; /* hold while removing bios to be processed from list */ 32 spinlock_t delayed_bios_lock; /* hold on all accesses to delayed_bios list */ 33 struct workqueue_struct *kdelayd_wq; 34 struct work_struct flush_expired_bios; 35 struct list_head delayed_bios; 36 struct task_struct *worker; 37 bool may_delay; 38 39 struct delay_class read; 40 struct delay_class write; 41 struct delay_class flush; 42 43 int argc; 44 }; 45 46 struct dm_delay_info { 47 struct delay_c *context; 48 struct delay_class *class; 49 struct list_head list; 50 unsigned long expires; 51 }; 52 53 static void handle_delayed_timer(struct timer_list *t) 54 { 55 struct delay_c *dc = from_timer(dc, t, delay_timer); 56 57 queue_work(dc->kdelayd_wq, &dc->flush_expired_bios); 58 } 59 60 static void queue_timeout(struct delay_c *dc, unsigned long expires) 61 { 62 timer_reduce(&dc->delay_timer, expires); 63 } 64 65 static inline bool delay_is_fast(struct delay_c *dc) 66 { 67 return !!dc->worker; 68 } 69 70 static void flush_bios(struct bio *bio) 71 { 72 struct bio *n; 73 74 while (bio) { 75 n = bio->bi_next; 76 bio->bi_next = NULL; 77 dm_submit_bio_remap(bio, NULL); 78 bio = n; 79 } 80 } 81 82 static void flush_delayed_bios(struct delay_c *dc, bool flush_all) 83 { 84 struct dm_delay_info *delayed, *next; 85 struct bio_list flush_bio_list; 86 LIST_HEAD(local_list); 87 unsigned long next_expires = 0; 88 bool start_timer = false; 89 bio_list_init(&flush_bio_list); 90 91 mutex_lock(&dc->process_bios_lock); 92 spin_lock(&dc->delayed_bios_lock); 93 list_replace_init(&dc->delayed_bios, &local_list); 94 spin_unlock(&dc->delayed_bios_lock); 95 list_for_each_entry_safe(delayed, next, &local_list, list) { 96 cond_resched(); 97 if (flush_all || time_after_eq(jiffies, delayed->expires)) { 98 struct bio *bio = dm_bio_from_per_bio_data(delayed, 99 sizeof(struct dm_delay_info)); 100 list_del(&delayed->list); 101 bio_list_add(&flush_bio_list, bio); 102 delayed->class->ops--; 103 continue; 104 } 105 106 if (!delay_is_fast(dc)) { 107 if (!start_timer) { 108 start_timer = true; 109 next_expires = delayed->expires; 110 } else { 111 next_expires = min(next_expires, delayed->expires); 112 } 113 } 114 } 115 spin_lock(&dc->delayed_bios_lock); 116 list_splice(&local_list, &dc->delayed_bios); 117 spin_unlock(&dc->delayed_bios_lock); 118 mutex_unlock(&dc->process_bios_lock); 119 120 if (start_timer) 121 queue_timeout(dc, next_expires); 122 123 flush_bios(bio_list_get(&flush_bio_list)); 124 } 125 126 static int flush_worker_fn(void *data) 127 { 128 struct delay_c *dc = data; 129 130 while (!kthread_should_stop()) { 131 flush_delayed_bios(dc, false); 132 spin_lock(&dc->delayed_bios_lock); 133 if (unlikely(list_empty(&dc->delayed_bios))) { 134 set_current_state(TASK_INTERRUPTIBLE); 135 spin_unlock(&dc->delayed_bios_lock); 136 schedule(); 137 } else { 138 spin_unlock(&dc->delayed_bios_lock); 139 cond_resched(); 140 } 141 } 142 143 return 0; 144 } 145 146 static void flush_expired_bios(struct work_struct *work) 147 { 148 struct delay_c *dc; 149 150 dc = container_of(work, struct delay_c, flush_expired_bios); 151 flush_delayed_bios(dc, false); 152 } 153 154 static void delay_dtr(struct dm_target *ti) 155 { 156 struct delay_c *dc = ti->private; 157 158 if (dc->kdelayd_wq) { 159 timer_shutdown_sync(&dc->delay_timer); 160 destroy_workqueue(dc->kdelayd_wq); 161 } 162 163 if (dc->read.dev) 164 dm_put_device(ti, dc->read.dev); 165 if (dc->write.dev) 166 dm_put_device(ti, dc->write.dev); 167 if (dc->flush.dev) 168 dm_put_device(ti, dc->flush.dev); 169 if (dc->worker) 170 kthread_stop(dc->worker); 171 172 mutex_destroy(&dc->process_bios_lock); 173 174 kfree(dc); 175 } 176 177 static int delay_class_ctr(struct dm_target *ti, struct delay_class *c, char **argv) 178 { 179 int ret; 180 unsigned long long tmpll; 181 char dummy; 182 183 if (sscanf(argv[1], "%llu%c", &tmpll, &dummy) != 1 || tmpll != (sector_t)tmpll) { 184 ti->error = "Invalid device sector"; 185 return -EINVAL; 186 } 187 c->start = tmpll; 188 189 if (sscanf(argv[2], "%u%c", &c->delay, &dummy) != 1) { 190 ti->error = "Invalid delay"; 191 return -EINVAL; 192 } 193 194 ret = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &c->dev); 195 if (ret) { 196 ti->error = "Device lookup failed"; 197 return ret; 198 } 199 200 return 0; 201 } 202 203 /* 204 * Mapping parameters: 205 * <device> <offset> <delay> [<write_device> <write_offset> <write_delay>] 206 * 207 * With separate write parameters, the first set is only used for reads. 208 * Offsets are specified in sectors. 209 * Delays are specified in milliseconds. 210 */ 211 static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) 212 { 213 struct delay_c *dc; 214 int ret; 215 unsigned int max_delay; 216 217 if (argc != 3 && argc != 6 && argc != 9) { 218 ti->error = "Requires exactly 3, 6 or 9 arguments"; 219 return -EINVAL; 220 } 221 222 dc = kzalloc(sizeof(*dc), GFP_KERNEL); 223 if (!dc) { 224 ti->error = "Cannot allocate context"; 225 return -ENOMEM; 226 } 227 228 ti->private = dc; 229 INIT_LIST_HEAD(&dc->delayed_bios); 230 mutex_init(&dc->process_bios_lock); 231 spin_lock_init(&dc->delayed_bios_lock); 232 dc->may_delay = true; 233 dc->argc = argc; 234 235 ret = delay_class_ctr(ti, &dc->read, argv); 236 if (ret) 237 goto bad; 238 max_delay = dc->read.delay; 239 240 if (argc == 3) { 241 ret = delay_class_ctr(ti, &dc->write, argv); 242 if (ret) 243 goto bad; 244 ret = delay_class_ctr(ti, &dc->flush, argv); 245 if (ret) 246 goto bad; 247 goto out; 248 } 249 250 ret = delay_class_ctr(ti, &dc->write, argv + 3); 251 if (ret) 252 goto bad; 253 max_delay = max(max_delay, dc->write.delay); 254 255 if (argc == 6) { 256 ret = delay_class_ctr(ti, &dc->flush, argv + 3); 257 if (ret) 258 goto bad; 259 goto out; 260 } 261 262 ret = delay_class_ctr(ti, &dc->flush, argv + 6); 263 if (ret) 264 goto bad; 265 max_delay = max(max_delay, dc->flush.delay); 266 267 out: 268 if (max_delay < 50) { 269 /* 270 * In case of small requested delays, use kthread instead of 271 * timers and workqueue to achieve better latency. 272 */ 273 dc->worker = kthread_run(&flush_worker_fn, dc, "dm-delay-flush-worker"); 274 if (IS_ERR(dc->worker)) { 275 ret = PTR_ERR(dc->worker); 276 dc->worker = NULL; 277 goto bad; 278 } 279 } else { 280 timer_setup(&dc->delay_timer, handle_delayed_timer, 0); 281 INIT_WORK(&dc->flush_expired_bios, flush_expired_bios); 282 dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0); 283 if (!dc->kdelayd_wq) { 284 ret = -EINVAL; 285 DMERR("Couldn't start kdelayd"); 286 goto bad; 287 } 288 } 289 290 ti->num_flush_bios = 1; 291 ti->num_discard_bios = 1; 292 ti->accounts_remapped_io = true; 293 ti->per_io_data_size = sizeof(struct dm_delay_info); 294 return 0; 295 296 bad: 297 delay_dtr(ti); 298 return ret; 299 } 300 301 static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio) 302 { 303 struct dm_delay_info *delayed; 304 unsigned long expires = 0; 305 306 if (!c->delay) 307 return DM_MAPIO_REMAPPED; 308 309 delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info)); 310 311 delayed->context = dc; 312 delayed->expires = expires = jiffies + msecs_to_jiffies(c->delay); 313 314 spin_lock(&dc->delayed_bios_lock); 315 if (unlikely(!dc->may_delay)) { 316 spin_unlock(&dc->delayed_bios_lock); 317 return DM_MAPIO_REMAPPED; 318 } 319 c->ops++; 320 list_add_tail(&delayed->list, &dc->delayed_bios); 321 spin_unlock(&dc->delayed_bios_lock); 322 323 if (delay_is_fast(dc)) 324 wake_up_process(dc->worker); 325 else 326 queue_timeout(dc, expires); 327 328 return DM_MAPIO_SUBMITTED; 329 } 330 331 static void delay_presuspend(struct dm_target *ti) 332 { 333 struct delay_c *dc = ti->private; 334 335 spin_lock(&dc->delayed_bios_lock); 336 dc->may_delay = false; 337 spin_unlock(&dc->delayed_bios_lock); 338 339 if (!delay_is_fast(dc)) 340 timer_delete(&dc->delay_timer); 341 flush_delayed_bios(dc, true); 342 } 343 344 static void delay_resume(struct dm_target *ti) 345 { 346 struct delay_c *dc = ti->private; 347 348 dc->may_delay = true; 349 } 350 351 static int delay_map(struct dm_target *ti, struct bio *bio) 352 { 353 struct delay_c *dc = ti->private; 354 struct delay_class *c; 355 struct dm_delay_info *delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info)); 356 357 if (bio_data_dir(bio) == WRITE) { 358 if (unlikely(bio->bi_opf & REQ_PREFLUSH)) 359 c = &dc->flush; 360 else 361 c = &dc->write; 362 } else { 363 c = &dc->read; 364 } 365 delayed->class = c; 366 bio_set_dev(bio, c->dev->bdev); 367 bio->bi_iter.bi_sector = c->start + dm_target_offset(ti, bio->bi_iter.bi_sector); 368 369 return delay_bio(dc, c, bio); 370 } 371 372 #define DMEMIT_DELAY_CLASS(c) \ 373 DMEMIT("%s %llu %u", (c)->dev->name, (unsigned long long)(c)->start, (c)->delay) 374 375 static void delay_status(struct dm_target *ti, status_type_t type, 376 unsigned int status_flags, char *result, unsigned int maxlen) 377 { 378 struct delay_c *dc = ti->private; 379 int sz = 0; 380 381 switch (type) { 382 case STATUSTYPE_INFO: 383 DMEMIT("%u %u %u", dc->read.ops, dc->write.ops, dc->flush.ops); 384 break; 385 386 case STATUSTYPE_TABLE: 387 DMEMIT_DELAY_CLASS(&dc->read); 388 if (dc->argc >= 6) { 389 DMEMIT(" "); 390 DMEMIT_DELAY_CLASS(&dc->write); 391 } 392 if (dc->argc >= 9) { 393 DMEMIT(" "); 394 DMEMIT_DELAY_CLASS(&dc->flush); 395 } 396 break; 397 398 case STATUSTYPE_IMA: 399 *result = '\0'; 400 break; 401 } 402 } 403 404 static int delay_iterate_devices(struct dm_target *ti, 405 iterate_devices_callout_fn fn, void *data) 406 { 407 struct delay_c *dc = ti->private; 408 int ret = 0; 409 410 ret = fn(ti, dc->read.dev, dc->read.start, ti->len, data); 411 if (ret) 412 goto out; 413 ret = fn(ti, dc->write.dev, dc->write.start, ti->len, data); 414 if (ret) 415 goto out; 416 ret = fn(ti, dc->flush.dev, dc->flush.start, ti->len, data); 417 if (ret) 418 goto out; 419 420 out: 421 return ret; 422 } 423 424 static struct target_type delay_target = { 425 .name = "delay", 426 .version = {1, 4, 0}, 427 .features = DM_TARGET_PASSES_INTEGRITY, 428 .module = THIS_MODULE, 429 .ctr = delay_ctr, 430 .dtr = delay_dtr, 431 .map = delay_map, 432 .presuspend = delay_presuspend, 433 .resume = delay_resume, 434 .status = delay_status, 435 .iterate_devices = delay_iterate_devices, 436 }; 437 module_dm(delay); 438 439 MODULE_DESCRIPTION(DM_NAME " delay target"); 440 MODULE_AUTHOR("Heinz Mauelshagen <mauelshagen@redhat.com>"); 441 MODULE_LICENSE("GPL"); 442