1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2005-2007 Red Hat GmbH 4 * 5 * A target that delays reads and/or writes and can send 6 * them to different devices. 7 * 8 * This file is released under the GPL. 9 */ 10 11 #include <linux/module.h> 12 #include <linux/init.h> 13 #include <linux/blkdev.h> 14 #include <linux/bio.h> 15 #include <linux/slab.h> 16 #include <linux/kthread.h> 17 18 #include <linux/device-mapper.h> 19 20 #define DM_MSG_PREFIX "delay" 21 22 struct delay_class { 23 struct dm_dev *dev; 24 sector_t start; 25 unsigned int delay; 26 unsigned int ops; 27 }; 28 29 struct delay_c { 30 struct timer_list delay_timer; 31 struct mutex timer_lock; 32 struct workqueue_struct *kdelayd_wq; 33 struct work_struct flush_expired_bios; 34 struct list_head delayed_bios; 35 struct task_struct *worker; 36 bool may_delay; 37 38 struct delay_class read; 39 struct delay_class write; 40 struct delay_class flush; 41 42 int argc; 43 }; 44 45 struct dm_delay_info { 46 struct delay_c *context; 47 struct delay_class *class; 48 struct list_head list; 49 unsigned long expires; 50 }; 51 52 static DEFINE_MUTEX(delayed_bios_lock); 53 54 static void handle_delayed_timer(struct timer_list *t) 55 { 56 struct delay_c *dc = from_timer(dc, t, delay_timer); 57 58 queue_work(dc->kdelayd_wq, &dc->flush_expired_bios); 59 } 60 61 static void queue_timeout(struct delay_c *dc, unsigned long expires) 62 { 63 mutex_lock(&dc->timer_lock); 64 65 if (!timer_pending(&dc->delay_timer) || expires < dc->delay_timer.expires) 66 mod_timer(&dc->delay_timer, expires); 67 68 mutex_unlock(&dc->timer_lock); 69 } 70 71 static inline bool delay_is_fast(struct delay_c *dc) 72 { 73 return !!dc->worker; 74 } 75 76 static void flush_bios(struct bio *bio) 77 { 78 struct bio *n; 79 80 while (bio) { 81 n = bio->bi_next; 82 bio->bi_next = NULL; 83 dm_submit_bio_remap(bio, NULL); 84 bio = n; 85 } 86 } 87 88 static void flush_delayed_bios(struct delay_c *dc, bool flush_all) 89 { 90 struct dm_delay_info *delayed, *next; 91 struct bio_list flush_bio_list; 92 unsigned long next_expires = 0; 93 bool start_timer = false; 94 bio_list_init(&flush_bio_list); 95 96 mutex_lock(&delayed_bios_lock); 97 list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) { 98 cond_resched(); 99 if (flush_all || time_after_eq(jiffies, delayed->expires)) { 100 struct bio *bio = dm_bio_from_per_bio_data(delayed, 101 sizeof(struct dm_delay_info)); 102 list_del(&delayed->list); 103 bio_list_add(&flush_bio_list, bio); 104 delayed->class->ops--; 105 continue; 106 } 107 108 if (!delay_is_fast(dc)) { 109 if (!start_timer) { 110 start_timer = true; 111 next_expires = delayed->expires; 112 } else { 113 next_expires = min(next_expires, delayed->expires); 114 } 115 } 116 } 117 mutex_unlock(&delayed_bios_lock); 118 119 if (start_timer) 120 queue_timeout(dc, next_expires); 121 122 flush_bios(bio_list_get(&flush_bio_list)); 123 } 124 125 static int flush_worker_fn(void *data) 126 { 127 struct delay_c *dc = data; 128 129 while (!kthread_should_stop()) { 130 flush_delayed_bios(dc, false); 131 mutex_lock(&delayed_bios_lock); 132 if (unlikely(list_empty(&dc->delayed_bios))) { 133 set_current_state(TASK_INTERRUPTIBLE); 134 mutex_unlock(&delayed_bios_lock); 135 schedule(); 136 } else { 137 mutex_unlock(&delayed_bios_lock); 138 cond_resched(); 139 } 140 } 141 142 return 0; 143 } 144 145 static void flush_expired_bios(struct work_struct *work) 146 { 147 struct delay_c *dc; 148 149 dc = container_of(work, struct delay_c, flush_expired_bios); 150 flush_delayed_bios(dc, false); 151 } 152 153 static void delay_dtr(struct dm_target *ti) 154 { 155 struct delay_c *dc = ti->private; 156 157 if (dc->kdelayd_wq) 158 destroy_workqueue(dc->kdelayd_wq); 159 160 if (dc->read.dev) 161 dm_put_device(ti, dc->read.dev); 162 if (dc->write.dev) 163 dm_put_device(ti, dc->write.dev); 164 if (dc->flush.dev) 165 dm_put_device(ti, dc->flush.dev); 166 if (dc->worker) 167 kthread_stop(dc->worker); 168 169 mutex_destroy(&dc->timer_lock); 170 171 kfree(dc); 172 } 173 174 static int delay_class_ctr(struct dm_target *ti, struct delay_class *c, char **argv) 175 { 176 int ret; 177 unsigned long long tmpll; 178 char dummy; 179 180 if (sscanf(argv[1], "%llu%c", &tmpll, &dummy) != 1 || tmpll != (sector_t)tmpll) { 181 ti->error = "Invalid device sector"; 182 return -EINVAL; 183 } 184 c->start = tmpll; 185 186 if (sscanf(argv[2], "%u%c", &c->delay, &dummy) != 1) { 187 ti->error = "Invalid delay"; 188 return -EINVAL; 189 } 190 191 ret = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &c->dev); 192 if (ret) { 193 ti->error = "Device lookup failed"; 194 return ret; 195 } 196 197 return 0; 198 } 199 200 /* 201 * Mapping parameters: 202 * <device> <offset> <delay> [<write_device> <write_offset> <write_delay>] 203 * 204 * With separate write parameters, the first set is only used for reads. 205 * Offsets are specified in sectors. 206 * Delays are specified in milliseconds. 207 */ 208 static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) 209 { 210 struct delay_c *dc; 211 int ret; 212 unsigned int max_delay; 213 214 if (argc != 3 && argc != 6 && argc != 9) { 215 ti->error = "Requires exactly 3, 6 or 9 arguments"; 216 return -EINVAL; 217 } 218 219 dc = kzalloc(sizeof(*dc), GFP_KERNEL); 220 if (!dc) { 221 ti->error = "Cannot allocate context"; 222 return -ENOMEM; 223 } 224 225 ti->private = dc; 226 INIT_LIST_HEAD(&dc->delayed_bios); 227 mutex_init(&dc->timer_lock); 228 dc->may_delay = true; 229 dc->argc = argc; 230 231 ret = delay_class_ctr(ti, &dc->read, argv); 232 if (ret) 233 goto bad; 234 max_delay = dc->read.delay; 235 236 if (argc == 3) { 237 ret = delay_class_ctr(ti, &dc->write, argv); 238 if (ret) 239 goto bad; 240 ret = delay_class_ctr(ti, &dc->flush, argv); 241 if (ret) 242 goto bad; 243 max_delay = max(max_delay, dc->write.delay); 244 max_delay = max(max_delay, dc->flush.delay); 245 goto out; 246 } 247 248 ret = delay_class_ctr(ti, &dc->write, argv + 3); 249 if (ret) 250 goto bad; 251 if (argc == 6) { 252 ret = delay_class_ctr(ti, &dc->flush, argv + 3); 253 if (ret) 254 goto bad; 255 max_delay = max(max_delay, dc->flush.delay); 256 goto out; 257 } 258 259 ret = delay_class_ctr(ti, &dc->flush, argv + 6); 260 if (ret) 261 goto bad; 262 max_delay = max(max_delay, dc->flush.delay); 263 264 out: 265 if (max_delay < 50) { 266 /* 267 * In case of small requested delays, use kthread instead of 268 * timers and workqueue to achieve better latency. 269 */ 270 dc->worker = kthread_create(&flush_worker_fn, dc, 271 "dm-delay-flush-worker"); 272 if (IS_ERR(dc->worker)) { 273 ret = PTR_ERR(dc->worker); 274 dc->worker = NULL; 275 goto bad; 276 } 277 } else { 278 timer_setup(&dc->delay_timer, handle_delayed_timer, 0); 279 INIT_WORK(&dc->flush_expired_bios, flush_expired_bios); 280 dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0); 281 if (!dc->kdelayd_wq) { 282 ret = -EINVAL; 283 DMERR("Couldn't start kdelayd"); 284 goto bad; 285 } 286 } 287 288 ti->num_flush_bios = 1; 289 ti->num_discard_bios = 1; 290 ti->accounts_remapped_io = true; 291 ti->per_io_data_size = sizeof(struct dm_delay_info); 292 return 0; 293 294 bad: 295 delay_dtr(ti); 296 return ret; 297 } 298 299 static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio) 300 { 301 struct dm_delay_info *delayed; 302 unsigned long expires = 0; 303 304 if (!c->delay) 305 return DM_MAPIO_REMAPPED; 306 307 delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info)); 308 309 delayed->context = dc; 310 delayed->expires = expires = jiffies + msecs_to_jiffies(c->delay); 311 312 mutex_lock(&delayed_bios_lock); 313 if (unlikely(!dc->may_delay)) { 314 mutex_unlock(&delayed_bios_lock); 315 return DM_MAPIO_REMAPPED; 316 } 317 c->ops++; 318 list_add_tail(&delayed->list, &dc->delayed_bios); 319 mutex_unlock(&delayed_bios_lock); 320 321 if (delay_is_fast(dc)) 322 wake_up_process(dc->worker); 323 else 324 queue_timeout(dc, expires); 325 326 return DM_MAPIO_SUBMITTED; 327 } 328 329 static void delay_presuspend(struct dm_target *ti) 330 { 331 struct delay_c *dc = ti->private; 332 333 mutex_lock(&delayed_bios_lock); 334 dc->may_delay = false; 335 mutex_unlock(&delayed_bios_lock); 336 337 if (!delay_is_fast(dc)) 338 del_timer_sync(&dc->delay_timer); 339 flush_delayed_bios(dc, true); 340 } 341 342 static void delay_resume(struct dm_target *ti) 343 { 344 struct delay_c *dc = ti->private; 345 346 dc->may_delay = true; 347 } 348 349 static int delay_map(struct dm_target *ti, struct bio *bio) 350 { 351 struct delay_c *dc = ti->private; 352 struct delay_class *c; 353 struct dm_delay_info *delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info)); 354 355 if (bio_data_dir(bio) == WRITE) { 356 if (unlikely(bio->bi_opf & REQ_PREFLUSH)) 357 c = &dc->flush; 358 else 359 c = &dc->write; 360 } else { 361 c = &dc->read; 362 } 363 delayed->class = c; 364 bio_set_dev(bio, c->dev->bdev); 365 bio->bi_iter.bi_sector = c->start + dm_target_offset(ti, bio->bi_iter.bi_sector); 366 367 return delay_bio(dc, c, bio); 368 } 369 370 #define DMEMIT_DELAY_CLASS(c) \ 371 DMEMIT("%s %llu %u", (c)->dev->name, (unsigned long long)(c)->start, (c)->delay) 372 373 static void delay_status(struct dm_target *ti, status_type_t type, 374 unsigned int status_flags, char *result, unsigned int maxlen) 375 { 376 struct delay_c *dc = ti->private; 377 int sz = 0; 378 379 switch (type) { 380 case STATUSTYPE_INFO: 381 DMEMIT("%u %u %u", dc->read.ops, dc->write.ops, dc->flush.ops); 382 break; 383 384 case STATUSTYPE_TABLE: 385 DMEMIT_DELAY_CLASS(&dc->read); 386 if (dc->argc >= 6) { 387 DMEMIT(" "); 388 DMEMIT_DELAY_CLASS(&dc->write); 389 } 390 if (dc->argc >= 9) { 391 DMEMIT(" "); 392 DMEMIT_DELAY_CLASS(&dc->flush); 393 } 394 break; 395 396 case STATUSTYPE_IMA: 397 *result = '\0'; 398 break; 399 } 400 } 401 402 static int delay_iterate_devices(struct dm_target *ti, 403 iterate_devices_callout_fn fn, void *data) 404 { 405 struct delay_c *dc = ti->private; 406 int ret = 0; 407 408 ret = fn(ti, dc->read.dev, dc->read.start, ti->len, data); 409 if (ret) 410 goto out; 411 ret = fn(ti, dc->write.dev, dc->write.start, ti->len, data); 412 if (ret) 413 goto out; 414 ret = fn(ti, dc->flush.dev, dc->flush.start, ti->len, data); 415 if (ret) 416 goto out; 417 418 out: 419 return ret; 420 } 421 422 static struct target_type delay_target = { 423 .name = "delay", 424 .version = {1, 4, 0}, 425 .features = DM_TARGET_PASSES_INTEGRITY, 426 .module = THIS_MODULE, 427 .ctr = delay_ctr, 428 .dtr = delay_dtr, 429 .map = delay_map, 430 .presuspend = delay_presuspend, 431 .resume = delay_resume, 432 .status = delay_status, 433 .iterate_devices = delay_iterate_devices, 434 }; 435 module_dm(delay); 436 437 MODULE_DESCRIPTION(DM_NAME " delay target"); 438 MODULE_AUTHOR("Heinz Mauelshagen <mauelshagen@redhat.com>"); 439 MODULE_LICENSE("GPL"); 440