1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2016 Red Hat, Inc. All rights reserved. 4 * 5 * This file is released under the GPL. 6 */ 7 8 #include "dm-core.h" 9 #include "dm-rq.h" 10 11 #include <linux/blk-mq.h> 12 13 #define DM_MSG_PREFIX "core-rq" 14 15 /* 16 * One of these is allocated per request. 17 */ 18 struct dm_rq_target_io { 19 struct mapped_device *md; 20 struct dm_target *ti; 21 struct request *orig, *clone; 22 struct kthread_work work; 23 blk_status_t error; 24 union map_info info; 25 struct dm_stats_aux stats_aux; 26 unsigned long duration_jiffies; 27 unsigned int n_sectors; 28 unsigned int completed; 29 }; 30 31 #define DM_MQ_NR_HW_QUEUES 1 32 #define DM_MQ_QUEUE_DEPTH 2048 33 static unsigned int dm_mq_nr_hw_queues = DM_MQ_NR_HW_QUEUES; 34 static unsigned int dm_mq_queue_depth = DM_MQ_QUEUE_DEPTH; 35 36 /* 37 * Request-based DM's mempools' reserved IOs set by the user. 38 */ 39 #define RESERVED_REQUEST_BASED_IOS 256 40 static unsigned int reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS; 41 42 unsigned int dm_get_reserved_rq_based_ios(void) 43 { 44 return __dm_get_module_param(&reserved_rq_based_ios, 45 RESERVED_REQUEST_BASED_IOS, DM_RESERVED_MAX_IOS); 46 } 47 48 static unsigned int dm_get_blk_mq_nr_hw_queues(void) 49 { 50 return __dm_get_module_param(&dm_mq_nr_hw_queues, 1, 32); 51 } 52 53 static unsigned int dm_get_blk_mq_queue_depth(void) 54 { 55 return __dm_get_module_param(&dm_mq_queue_depth, 56 DM_MQ_QUEUE_DEPTH, BLK_MQ_MAX_DEPTH); 57 } 58 59 int dm_request_based(struct mapped_device *md) 60 { 61 return queue_is_mq(md->queue); 62 } 63 64 void dm_start_queue(struct request_queue *q) 65 { 66 blk_mq_unquiesce_queue(q); 67 blk_mq_kick_requeue_list(q); 68 } 69 70 void dm_stop_queue(struct request_queue *q) 71 { 72 blk_mq_quiesce_queue(q); 73 } 74 75 /* 76 * Partial completion handling for request-based dm 77 */ 78 static void end_clone_bio(struct bio *clone) 79 { 80 struct dm_rq_clone_bio_info *info = 81 container_of(clone, struct dm_rq_clone_bio_info, clone); 82 struct dm_rq_target_io *tio = info->tio; 83 unsigned int nr_bytes = info->orig->bi_iter.bi_size; 84 blk_status_t error = clone->bi_status; 85 bool is_last = !clone->bi_next; 86 87 bio_put(clone); 88 89 if (tio->error) 90 /* 91 * An error has already been detected on the request. 92 * Once error occurred, just let clone->end_io() handle 93 * the remainder. 94 */ 95 return; 96 else if (error) { 97 /* 98 * Don't notice the error to the upper layer yet. 99 * The error handling decision is made by the target driver, 100 * when the request is completed. 101 */ 102 tio->error = error; 103 goto exit; 104 } 105 106 /* 107 * I/O for the bio successfully completed. 108 * Notice the data completion to the upper layer. 109 */ 110 tio->completed += nr_bytes; 111 112 if (!is_last) 113 return; 114 /* 115 * At this moment we know this is the last bio of the cloned request, 116 * and all cloned bios have been released, so reset the clone request's 117 * bio pointer to avoid double free. 118 */ 119 tio->clone->bio = NULL; 120 exit: 121 /* 122 * Update the original request. 123 * Do not use blk_mq_end_request() here, because it may complete 124 * the original request before the clone, and break the ordering. 125 */ 126 blk_update_request(tio->orig, BLK_STS_OK, tio->completed); 127 } 128 129 static struct dm_rq_target_io *tio_from_request(struct request *rq) 130 { 131 return blk_mq_rq_to_pdu(rq); 132 } 133 134 static void rq_end_stats(struct mapped_device *md, struct request *orig) 135 { 136 if (unlikely(dm_stats_used(&md->stats))) { 137 struct dm_rq_target_io *tio = tio_from_request(orig); 138 139 tio->duration_jiffies = jiffies - tio->duration_jiffies; 140 dm_stats_account_io(&md->stats, rq_data_dir(orig), 141 blk_rq_pos(orig), tio->n_sectors, true, 142 tio->duration_jiffies, &tio->stats_aux); 143 } 144 } 145 146 /* 147 * Don't touch any member of the md after calling this function because 148 * the md may be freed in dm_put() at the end of this function. 149 * Or do dm_get() before calling this function and dm_put() later. 150 */ 151 static void rq_completed(struct mapped_device *md) 152 { 153 /* 154 * dm_put() must be at the end of this function. See the comment above 155 */ 156 dm_put(md); 157 } 158 159 /* 160 * Complete the clone and the original request. 161 * Must be called without clone's queue lock held, 162 * see end_clone_request() for more details. 163 */ 164 static void dm_end_request(struct request *clone, blk_status_t error) 165 { 166 struct dm_rq_target_io *tio = clone->end_io_data; 167 struct mapped_device *md = tio->md; 168 struct request *rq = tio->orig; 169 170 blk_rq_unprep_clone(clone); 171 tio->ti->type->release_clone_rq(clone, NULL); 172 173 rq_end_stats(md, rq); 174 blk_mq_end_request(rq, error); 175 rq_completed(md); 176 } 177 178 static void __dm_mq_kick_requeue_list(struct request_queue *q, unsigned long msecs) 179 { 180 blk_mq_delay_kick_requeue_list(q, msecs); 181 } 182 183 void dm_mq_kick_requeue_list(struct mapped_device *md) 184 { 185 __dm_mq_kick_requeue_list(md->queue, 0); 186 } 187 EXPORT_SYMBOL(dm_mq_kick_requeue_list); 188 189 static void dm_mq_delay_requeue_request(struct request *rq, unsigned long msecs) 190 { 191 blk_mq_requeue_request(rq, false); 192 __dm_mq_kick_requeue_list(rq->q, msecs); 193 } 194 195 static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_requeue) 196 { 197 struct mapped_device *md = tio->md; 198 struct request *rq = tio->orig; 199 unsigned long delay_ms = delay_requeue ? 100 : 0; 200 201 rq_end_stats(md, rq); 202 if (tio->clone) { 203 blk_rq_unprep_clone(tio->clone); 204 tio->ti->type->release_clone_rq(tio->clone, NULL); 205 } 206 207 dm_mq_delay_requeue_request(rq, delay_ms); 208 rq_completed(md); 209 } 210 211 static void dm_done(struct request *clone, blk_status_t error, bool mapped) 212 { 213 int r = DM_ENDIO_DONE; 214 struct dm_rq_target_io *tio = clone->end_io_data; 215 dm_request_endio_fn rq_end_io = NULL; 216 217 if (tio->ti) { 218 rq_end_io = tio->ti->type->rq_end_io; 219 220 if (mapped && rq_end_io) 221 r = rq_end_io(tio->ti, clone, error, &tio->info); 222 } 223 224 if (unlikely(error == BLK_STS_TARGET)) { 225 if (req_op(clone) == REQ_OP_DISCARD && 226 !clone->q->limits.max_discard_sectors) 227 blk_queue_disable_discard(tio->md->queue); 228 else if (req_op(clone) == REQ_OP_WRITE_ZEROES && 229 !clone->q->limits.max_write_zeroes_sectors) 230 blk_queue_disable_write_zeroes(tio->md->queue); 231 } 232 233 switch (r) { 234 case DM_ENDIO_DONE: 235 /* The target wants to complete the I/O */ 236 dm_end_request(clone, error); 237 break; 238 case DM_ENDIO_INCOMPLETE: 239 /* The target will handle the I/O */ 240 return; 241 case DM_ENDIO_REQUEUE: 242 /* The target wants to requeue the I/O */ 243 dm_requeue_original_request(tio, false); 244 break; 245 case DM_ENDIO_DELAY_REQUEUE: 246 /* The target wants to requeue the I/O after a delay */ 247 dm_requeue_original_request(tio, true); 248 break; 249 default: 250 DMCRIT("unimplemented target endio return value: %d", r); 251 BUG(); 252 } 253 } 254 255 /* 256 * Request completion handler for request-based dm 257 */ 258 static void dm_softirq_done(struct request *rq) 259 { 260 bool mapped = true; 261 struct dm_rq_target_io *tio = tio_from_request(rq); 262 struct request *clone = tio->clone; 263 264 if (!clone) { 265 struct mapped_device *md = tio->md; 266 267 rq_end_stats(md, rq); 268 blk_mq_end_request(rq, tio->error); 269 rq_completed(md); 270 return; 271 } 272 273 if (rq->rq_flags & RQF_FAILED) 274 mapped = false; 275 276 dm_done(clone, tio->error, mapped); 277 } 278 279 /* 280 * Complete the clone and the original request with the error status 281 * through softirq context. 282 */ 283 static void dm_complete_request(struct request *rq, blk_status_t error) 284 { 285 struct dm_rq_target_io *tio = tio_from_request(rq); 286 287 tio->error = error; 288 blk_mq_complete_request(rq); 289 } 290 291 /* 292 * Complete the not-mapped clone and the original request with the error status 293 * through softirq context. 294 * Target's rq_end_io() function isn't called. 295 * This may be used when the target's clone_and_map_rq() function fails. 296 */ 297 static void dm_kill_unmapped_request(struct request *rq, blk_status_t error) 298 { 299 rq->rq_flags |= RQF_FAILED; 300 dm_complete_request(rq, error); 301 } 302 303 static enum rq_end_io_ret end_clone_request(struct request *clone, 304 blk_status_t error, 305 const struct io_comp_batch *iob) 306 { 307 struct dm_rq_target_io *tio = clone->end_io_data; 308 309 dm_complete_request(tio->orig, error); 310 return RQ_END_IO_NONE; 311 } 312 313 static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig, 314 void *data) 315 { 316 struct dm_rq_target_io *tio = data; 317 struct dm_rq_clone_bio_info *info = 318 container_of(bio, struct dm_rq_clone_bio_info, clone); 319 320 info->orig = bio_orig; 321 info->tio = tio; 322 bio->bi_end_io = end_clone_bio; 323 324 return 0; 325 } 326 327 static int setup_clone(struct request *clone, struct request *rq, 328 struct dm_rq_target_io *tio, gfp_t gfp_mask) 329 { 330 int r; 331 332 r = blk_rq_prep_clone(clone, rq, &tio->md->mempools->bs, gfp_mask, 333 dm_rq_bio_constructor, tio); 334 if (r) 335 return r; 336 337 clone->end_io = end_clone_request; 338 clone->end_io_data = tio; 339 340 tio->clone = clone; 341 342 return 0; 343 } 344 345 static void init_tio(struct dm_rq_target_io *tio, struct request *rq, 346 struct mapped_device *md) 347 { 348 tio->md = md; 349 tio->ti = NULL; 350 tio->clone = NULL; 351 tio->orig = rq; 352 tio->error = 0; 353 tio->completed = 0; 354 /* 355 * Avoid initializing info for blk-mq; it passes 356 * target-specific data through info.ptr 357 * (see: dm_mq_init_request) 358 */ 359 if (!md->init_tio_pdu) 360 memset(&tio->info, 0, sizeof(tio->info)); 361 } 362 363 /* 364 * Returns: 365 * DM_MAPIO_* : the request has been processed as indicated 366 * DM_MAPIO_REQUEUE : the original request needs to be immediately requeued 367 * < 0 : the request was completed due to failure 368 */ 369 static int map_request(struct dm_rq_target_io *tio) 370 { 371 int r; 372 struct dm_target *ti = tio->ti; 373 struct mapped_device *md = tio->md; 374 struct request *rq = tio->orig; 375 struct request *clone = NULL; 376 blk_status_t ret; 377 378 r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone); 379 switch (r) { 380 case DM_MAPIO_SUBMITTED: 381 /* The target has taken the I/O to submit by itself later */ 382 break; 383 case DM_MAPIO_REMAPPED: 384 if (setup_clone(clone, rq, tio, GFP_ATOMIC)) { 385 /* -ENOMEM */ 386 ti->type->release_clone_rq(clone, &tio->info); 387 return DM_MAPIO_REQUEUE; 388 } 389 390 /* The target has remapped the I/O so dispatch it */ 391 trace_block_rq_remap(clone, disk_devt(dm_disk(md)), 392 blk_rq_pos(rq)); 393 ret = blk_insert_cloned_request(clone); 394 switch (ret) { 395 case BLK_STS_OK: 396 break; 397 case BLK_STS_RESOURCE: 398 case BLK_STS_DEV_RESOURCE: 399 blk_rq_unprep_clone(clone); 400 blk_mq_cleanup_rq(clone); 401 tio->ti->type->release_clone_rq(clone, &tio->info); 402 tio->clone = NULL; 403 return DM_MAPIO_REQUEUE; 404 default: 405 /* must complete clone in terms of original request */ 406 dm_complete_request(rq, ret); 407 } 408 break; 409 case DM_MAPIO_REQUEUE: 410 /* The target wants to requeue the I/O */ 411 break; 412 case DM_MAPIO_DELAY_REQUEUE: 413 /* The target wants to requeue the I/O after a delay */ 414 dm_requeue_original_request(tio, true); 415 break; 416 case DM_MAPIO_KILL: 417 /* The target wants to complete the I/O */ 418 dm_kill_unmapped_request(rq, BLK_STS_IOERR); 419 break; 420 default: 421 DMCRIT("unimplemented target map return value: %d", r); 422 BUG(); 423 } 424 425 return r; 426 } 427 428 /* DEPRECATED: previously used for request-based merge heuristic in dm_request_fn() */ 429 ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf) 430 { 431 return sprintf(buf, "%u\n", 0); 432 } 433 434 ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md, 435 const char *buf, size_t count) 436 { 437 return count; 438 } 439 440 static void dm_start_request(struct mapped_device *md, struct request *orig) 441 { 442 blk_mq_start_request(orig); 443 444 if (unlikely(dm_stats_used(&md->stats))) { 445 struct dm_rq_target_io *tio = tio_from_request(orig); 446 447 tio->duration_jiffies = jiffies; 448 tio->n_sectors = blk_rq_sectors(orig); 449 dm_stats_account_io(&md->stats, rq_data_dir(orig), 450 blk_rq_pos(orig), tio->n_sectors, false, 0, 451 &tio->stats_aux); 452 } 453 454 /* 455 * Hold the md reference here for the in-flight I/O. 456 * We can't rely on the reference count by device opener, 457 * because the device may be closed during the request completion 458 * when all bios are completed. 459 * See the comment in rq_completed() too. 460 */ 461 dm_get(md); 462 } 463 464 static int dm_mq_init_request(struct blk_mq_tag_set *set, struct request *rq, 465 unsigned int hctx_idx, unsigned int numa_node) 466 { 467 struct mapped_device *md = set->driver_data; 468 struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq); 469 470 /* 471 * Must initialize md member of tio, otherwise it won't 472 * be available in dm_mq_queue_rq. 473 */ 474 tio->md = md; 475 476 if (md->init_tio_pdu) { 477 /* target-specific per-io data is immediately after the tio */ 478 tio->info.ptr = tio + 1; 479 } 480 481 return 0; 482 } 483 484 static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx, 485 const struct blk_mq_queue_data *bd) 486 { 487 struct request *rq = bd->rq; 488 struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq); 489 struct mapped_device *md = tio->md; 490 struct dm_target *ti = md->immutable_target; 491 492 /* 493 * blk-mq's unquiesce may come from outside events, such as 494 * elevator switch, updating nr_requests or others, and request may 495 * come during suspend, so simply ask for blk-mq to requeue it. 496 */ 497 if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) 498 return BLK_STS_RESOURCE; 499 500 if (unlikely(!ti)) { 501 int srcu_idx; 502 struct dm_table *map; 503 504 map = dm_get_live_table(md, &srcu_idx); 505 if (unlikely(!map)) { 506 DMERR_LIMIT("%s: mapping table unavailable, erroring io", 507 dm_device_name(md)); 508 dm_put_live_table(md, srcu_idx); 509 return BLK_STS_IOERR; 510 } 511 ti = dm_table_find_target(map, 0); 512 dm_put_live_table(md, srcu_idx); 513 } 514 515 if (ti->type->busy && ti->type->busy(ti)) 516 return BLK_STS_RESOURCE; 517 518 dm_start_request(md, rq); 519 520 /* Init tio using md established in .init_request */ 521 init_tio(tio, rq, md); 522 523 /* 524 * Establish tio->ti before calling map_request(). 525 */ 526 tio->ti = ti; 527 528 /* Direct call is fine since .queue_rq allows allocations */ 529 if (map_request(tio) == DM_MAPIO_REQUEUE) { 530 /* Undo dm_start_request() before requeuing */ 531 rq_end_stats(md, rq); 532 rq_completed(md); 533 return BLK_STS_RESOURCE; 534 } 535 536 return BLK_STS_OK; 537 } 538 539 static const struct blk_mq_ops dm_mq_ops = { 540 .queue_rq = dm_mq_queue_rq, 541 .complete = dm_softirq_done, 542 .init_request = dm_mq_init_request, 543 }; 544 545 int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t) 546 { 547 struct dm_target *immutable_tgt; 548 int err; 549 550 md->tag_set = kzalloc_node(sizeof(struct blk_mq_tag_set), GFP_KERNEL, md->numa_node_id); 551 if (!md->tag_set) 552 return -ENOMEM; 553 554 md->tag_set->ops = &dm_mq_ops; 555 md->tag_set->queue_depth = dm_get_blk_mq_queue_depth(); 556 md->tag_set->numa_node = md->numa_node_id; 557 md->tag_set->flags = BLK_MQ_F_STACKING; 558 md->tag_set->nr_hw_queues = dm_get_blk_mq_nr_hw_queues(); 559 md->tag_set->driver_data = md; 560 561 md->tag_set->cmd_size = sizeof(struct dm_rq_target_io); 562 immutable_tgt = dm_table_get_immutable_target(t); 563 if (immutable_tgt && immutable_tgt->per_io_data_size) { 564 /* any target-specific per-io data is immediately after the tio */ 565 md->tag_set->cmd_size += immutable_tgt->per_io_data_size; 566 md->init_tio_pdu = true; 567 } 568 569 err = blk_mq_alloc_tag_set(md->tag_set); 570 if (err) 571 goto out_kfree_tag_set; 572 573 err = blk_mq_init_allocated_queue(md->tag_set, md->queue); 574 if (err) 575 goto out_tag_set; 576 return 0; 577 578 out_tag_set: 579 blk_mq_free_tag_set(md->tag_set); 580 out_kfree_tag_set: 581 kfree(md->tag_set); 582 md->tag_set = NULL; 583 584 return err; 585 } 586 587 void dm_mq_cleanup_mapped_device(struct mapped_device *md) 588 { 589 if (md->tag_set) { 590 blk_mq_free_tag_set(md->tag_set); 591 kfree(md->tag_set); 592 md->tag_set = NULL; 593 } 594 } 595 596 module_param(reserved_rq_based_ios, uint, 0644); 597 MODULE_PARM_DESC(reserved_rq_based_ios, "Reserved IOs in request-based mempools"); 598 599 /* Unused, but preserved for userspace compatibility */ 600 static bool use_blk_mq = true; 601 module_param(use_blk_mq, bool, 0644); 602 MODULE_PARM_DESC(use_blk_mq, "Use block multiqueue for request-based DM devices"); 603 604 module_param(dm_mq_nr_hw_queues, uint, 0644); 605 MODULE_PARM_DESC(dm_mq_nr_hw_queues, "Number of hardware queues for request-based dm-mq devices"); 606 607 module_param(dm_mq_queue_depth, uint, 0644); 608 MODULE_PARM_DESC(dm_mq_queue_depth, "Queue depth for request-based dm-mq devices"); 609